350 lines
9.6 KiB
Go
350 lines
9.6 KiB
Go
/*
|
|
* Copyright 2025 coze-dev Authors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package service
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"strconv"
|
|
|
|
"github.com/cloudwego/eino/schema"
|
|
|
|
"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
|
|
knowledgeModel "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
|
|
"github.com/coze-dev/coze-studio/backend/domain/knowledge/entity"
|
|
"github.com/coze-dev/coze-studio/backend/domain/knowledge/internal/convert"
|
|
"github.com/coze-dev/coze-studio/backend/infra/contract/document"
|
|
"github.com/coze-dev/coze-studio/backend/infra/contract/document/searchstore"
|
|
"github.com/coze-dev/coze-studio/backend/pkg/lang/ptr"
|
|
)
|
|
|
|
const fieldNameDocumentID = "document_id"
|
|
|
|
type fieldMappingFn func(doc *entity.Document, enableCompactTable bool) []*searchstore.Field
|
|
|
|
type slice2DocumentFn func(ctx context.Context, slice *entity.Slice, columns []*entity.TableColumn, enableCompactTable bool) (*schema.Document, error)
|
|
|
|
type document2SliceFn func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error)
|
|
|
|
var fMapping = map[knowledge.DocumentType]fieldMappingFn{
|
|
knowledge.DocumentTypeText: func(doc *entity.Document, enableCompactTable bool) []*searchstore.Field {
|
|
fields := []*searchstore.Field{
|
|
{
|
|
Name: searchstore.FieldID,
|
|
Type: searchstore.FieldTypeInt64,
|
|
IsPrimary: true,
|
|
},
|
|
{
|
|
Name: searchstore.FieldCreatorID,
|
|
Type: searchstore.FieldTypeInt64,
|
|
},
|
|
{
|
|
Name: fieldNameDocumentID,
|
|
Type: searchstore.FieldTypeInt64,
|
|
},
|
|
{
|
|
Name: searchstore.FieldTextContent,
|
|
Type: searchstore.FieldTypeText,
|
|
Indexing: true,
|
|
},
|
|
}
|
|
return fields
|
|
},
|
|
knowledge.DocumentTypeTable: func(doc *entity.Document, enableCompactTable bool) []*searchstore.Field {
|
|
fields := []*searchstore.Field{
|
|
{
|
|
Name: searchstore.FieldID,
|
|
Type: searchstore.FieldTypeInt64,
|
|
IsPrimary: true,
|
|
},
|
|
{
|
|
Name: searchstore.FieldCreatorID,
|
|
Type: searchstore.FieldTypeInt64,
|
|
},
|
|
{
|
|
Name: fieldNameDocumentID,
|
|
Type: searchstore.FieldTypeInt64,
|
|
},
|
|
}
|
|
|
|
if enableCompactTable {
|
|
fields = append(fields, &searchstore.Field{
|
|
Name: searchstore.FieldTextContent,
|
|
Type: searchstore.FieldTypeText,
|
|
Indexing: true,
|
|
})
|
|
} else {
|
|
for _, col := range doc.TableInfo.Columns {
|
|
if !col.Indexing {
|
|
continue
|
|
}
|
|
fields = append(fields, &searchstore.Field{
|
|
Name: getColName(col.ID),
|
|
Type: searchstore.FieldTypeText,
|
|
Indexing: true,
|
|
})
|
|
}
|
|
}
|
|
return fields
|
|
},
|
|
knowledge.DocumentTypeImage: func(doc *entity.Document, enableCompactTable bool) []*searchstore.Field {
|
|
fields := []*searchstore.Field{
|
|
{
|
|
Name: searchstore.FieldID,
|
|
Type: searchstore.FieldTypeInt64,
|
|
IsPrimary: true,
|
|
},
|
|
{
|
|
Name: searchstore.FieldCreatorID,
|
|
Type: searchstore.FieldTypeInt64,
|
|
},
|
|
{
|
|
Name: fieldNameDocumentID,
|
|
Type: searchstore.FieldTypeInt64,
|
|
},
|
|
{
|
|
Name: searchstore.FieldTextContent,
|
|
Type: searchstore.FieldTypeText,
|
|
Indexing: true,
|
|
},
|
|
}
|
|
return fields
|
|
},
|
|
}
|
|
|
|
var s2dMapping = map[knowledge.DocumentType]slice2DocumentFn{
|
|
knowledge.DocumentTypeText: func(ctx context.Context, slice *entity.Slice, columns []*entity.TableColumn, enableCompactTable bool) (doc *schema.Document, err error) {
|
|
doc = &schema.Document{
|
|
ID: strconv.FormatInt(slice.ID, 10),
|
|
Content: slice.GetSliceContent(),
|
|
MetaData: map[string]any{
|
|
document.MetaDataKeyCreatorID: slice.CreatorID,
|
|
document.MetaDataKeyExternalStorage: map[string]any{
|
|
fieldNameDocumentID: slice.DocumentID,
|
|
},
|
|
},
|
|
}
|
|
|
|
return doc, nil
|
|
},
|
|
knowledge.DocumentTypeTable: func(ctx context.Context, slice *entity.Slice, columns []*entity.TableColumn, enableCompactTable bool) (doc *schema.Document, err error) {
|
|
ext := map[string]any{
|
|
fieldNameDocumentID: slice.DocumentID,
|
|
}
|
|
|
|
doc = &schema.Document{
|
|
ID: strconv.FormatInt(slice.ID, 10),
|
|
Content: "",
|
|
MetaData: map[string]any{
|
|
document.MetaDataKeyCreatorID: slice.CreatorID,
|
|
document.MetaDataKeyExternalStorage: ext,
|
|
},
|
|
}
|
|
|
|
if len(slice.RawContent) == 0 || slice.RawContent[0].Type != knowledgeModel.SliceContentTypeTable || slice.RawContent[0].Table == nil {
|
|
return nil, fmt.Errorf("[s2dMapping] columns data not provided")
|
|
}
|
|
|
|
fm := make(map[string]any)
|
|
vals := slice.RawContent[0].Table.Columns
|
|
colIDMapping := convert.ColumnIDMapping(convert.FilterColumnsRDBID(columns))
|
|
|
|
for _, val := range vals {
|
|
col, found := colIDMapping[val.ColumnID]
|
|
if !found {
|
|
return nil, fmt.Errorf("[s2dMapping] column not found, id=%d, name=%s", val.ColumnID, val.ColumnName)
|
|
}
|
|
if !col.Indexing {
|
|
continue
|
|
}
|
|
if enableCompactTable {
|
|
fm[val.ColumnName] = val.GetValue()
|
|
} else {
|
|
ext[getColName(col.ID)] = val.GetValue()
|
|
}
|
|
}
|
|
|
|
if len(fm) > 0 {
|
|
b, err := json.Marshal(fm)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("[s2dMapping] json marshal failed, %w", err)
|
|
}
|
|
doc.Content = string(b)
|
|
}
|
|
|
|
return doc, nil
|
|
},
|
|
knowledge.DocumentTypeImage: func(ctx context.Context, slice *entity.Slice, columns []*entity.TableColumn, enableCompactTable bool) (*schema.Document, error) {
|
|
doc := &schema.Document{
|
|
ID: strconv.FormatInt(slice.ID, 10),
|
|
Content: slice.GetSliceContent(),
|
|
MetaData: map[string]any{
|
|
document.MetaDataKeyCreatorID: slice.CreatorID,
|
|
document.MetaDataKeyExternalStorage: map[string]any{
|
|
fieldNameDocumentID: slice.DocumentID,
|
|
},
|
|
},
|
|
}
|
|
|
|
return doc, nil
|
|
},
|
|
}
|
|
|
|
var d2sMapping = map[knowledge.DocumentType]document2SliceFn{
|
|
knowledge.DocumentTypeText: func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error) {
|
|
slice := &entity.Slice{
|
|
Info: knowledge.Info{},
|
|
KnowledgeID: knowledgeID,
|
|
DocumentID: documentID,
|
|
RawContent: nil,
|
|
}
|
|
|
|
if doc.ID != "" {
|
|
id, err := strconv.ParseInt(doc.ID, 10, 64)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("[d2sMapping] parse id failed, %w", err)
|
|
}
|
|
|
|
slice.ID = id
|
|
}
|
|
|
|
slice.RawContent = append(slice.RawContent, &knowledgeModel.SliceContent{
|
|
Type: knowledgeModel.SliceContentTypeText,
|
|
Text: ptr.Of(doc.Content),
|
|
})
|
|
|
|
if creatorID != 0 {
|
|
slice.CreatorID = creatorID
|
|
} else {
|
|
cid, err := document.GetDocumentCreatorID(doc)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
slice.CreatorID = cid
|
|
}
|
|
|
|
if ext, err := document.GetDocumentExternalStorage(doc); err == nil {
|
|
if documentID, ok := ext[fieldNameDocumentID].(int64); ok {
|
|
slice.DocumentID = documentID
|
|
}
|
|
}
|
|
|
|
return slice, nil
|
|
},
|
|
knowledge.DocumentTypeTable: func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error) {
|
|
// NOTICE: The original data source of table type needs to be checked in rdb
|
|
slice := &entity.Slice{
|
|
Info: knowledge.Info{},
|
|
KnowledgeID: knowledgeID,
|
|
DocumentID: documentID,
|
|
RawContent: nil,
|
|
}
|
|
|
|
if doc.ID != "" {
|
|
id, err := strconv.ParseInt(doc.ID, 10, 64)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("[d2sMapping] parse id failed, %w", err)
|
|
}
|
|
slice.ID = id
|
|
}
|
|
|
|
if creatorID != 0 {
|
|
slice.CreatorID = creatorID
|
|
} else {
|
|
cid, err := document.GetDocumentCreatorID(doc)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
slice.CreatorID = cid
|
|
}
|
|
|
|
if ext, err := document.GetDocumentExternalStorage(doc); err == nil {
|
|
if documentID, ok := ext[fieldNameDocumentID].(int64); ok {
|
|
slice.DocumentID = documentID
|
|
}
|
|
}
|
|
|
|
if vals, err := document.GetDocumentColumnData(doc); err == nil {
|
|
slice.RawContent = append(slice.RawContent, &knowledgeModel.SliceContent{
|
|
Type: knowledgeModel.SliceContentTypeTable,
|
|
Table: &knowledgeModel.SliceTable{Columns: vals},
|
|
})
|
|
}
|
|
|
|
return slice, nil
|
|
},
|
|
knowledge.DocumentTypeImage: func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error) {
|
|
slice := &entity.Slice{
|
|
Info: knowledge.Info{},
|
|
KnowledgeID: knowledgeID,
|
|
DocumentID: documentID,
|
|
RawContent: nil,
|
|
}
|
|
|
|
if doc.ID != "" {
|
|
id, err := strconv.ParseInt(doc.ID, 10, 64)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("[d2sMapping] parse id failed, %w", err)
|
|
}
|
|
|
|
slice.ID = id
|
|
}
|
|
|
|
slice.RawContent = append(slice.RawContent, &knowledgeModel.SliceContent{
|
|
Type: knowledgeModel.SliceContentTypeText,
|
|
Text: ptr.Of(doc.Content),
|
|
})
|
|
|
|
if creatorID != 0 {
|
|
slice.CreatorID = creatorID
|
|
} else {
|
|
cid, err := document.GetDocumentCreatorID(doc)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
slice.CreatorID = cid
|
|
}
|
|
|
|
if ext, err := document.GetDocumentExternalStorage(doc); err == nil {
|
|
if documentID, ok := ext[fieldNameDocumentID].(int64); ok {
|
|
slice.DocumentID = documentID
|
|
}
|
|
}
|
|
|
|
return slice, nil
|
|
},
|
|
}
|
|
|
|
func getCollectionName(knowledgeID int64) string {
|
|
return fmt.Sprintf("opencoze_%d", knowledgeID)
|
|
}
|
|
|
|
func getIndexingFields(fields []*searchstore.Field) []string {
|
|
var indexingFields []string
|
|
for _, field := range fields {
|
|
if field.Indexing {
|
|
indexingFields = append(indexingFields, field.Name)
|
|
}
|
|
}
|
|
return indexingFields
|
|
}
|
|
|
|
func getColName(colID int64) string {
|
|
return fmt.Sprintf("col_%d", colID)
|
|
}
|