feat: manually mirror opencoze's code from bytedance
Change-Id: I09a73aadda978ad9511264a756b2ce51f5761adf
This commit is contained in:
349
backend/domain/knowledge/service/convert.go
Normal file
349
backend/domain/knowledge/service/convert.go
Normal file
@@ -0,0 +1,349 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"github.com/cloudwego/eino/schema"
|
||||
|
||||
"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
|
||||
knowledgeModel "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
|
||||
"github.com/coze-dev/coze-studio/backend/domain/knowledge/entity"
|
||||
"github.com/coze-dev/coze-studio/backend/domain/knowledge/internal/convert"
|
||||
"github.com/coze-dev/coze-studio/backend/infra/contract/document"
|
||||
"github.com/coze-dev/coze-studio/backend/infra/contract/document/searchstore"
|
||||
"github.com/coze-dev/coze-studio/backend/pkg/lang/ptr"
|
||||
)
|
||||
|
||||
const fieldNameDocumentID = "document_id"
|
||||
|
||||
type fieldMappingFn func(doc *entity.Document, enableCompactTable bool) []*searchstore.Field
|
||||
|
||||
type slice2DocumentFn func(ctx context.Context, slice *entity.Slice, columns []*entity.TableColumn, enableCompactTable bool) (*schema.Document, error)
|
||||
|
||||
type document2SliceFn func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error)
|
||||
|
||||
var fMapping = map[knowledge.DocumentType]fieldMappingFn{
|
||||
knowledge.DocumentTypeText: func(doc *entity.Document, enableCompactTable bool) []*searchstore.Field {
|
||||
fields := []*searchstore.Field{
|
||||
{
|
||||
Name: searchstore.FieldID,
|
||||
Type: searchstore.FieldTypeInt64,
|
||||
IsPrimary: true,
|
||||
},
|
||||
{
|
||||
Name: searchstore.FieldCreatorID,
|
||||
Type: searchstore.FieldTypeInt64,
|
||||
},
|
||||
{
|
||||
Name: fieldNameDocumentID,
|
||||
Type: searchstore.FieldTypeInt64,
|
||||
},
|
||||
{
|
||||
Name: searchstore.FieldTextContent,
|
||||
Type: searchstore.FieldTypeText,
|
||||
Indexing: true,
|
||||
},
|
||||
}
|
||||
return fields
|
||||
},
|
||||
knowledge.DocumentTypeTable: func(doc *entity.Document, enableCompactTable bool) []*searchstore.Field {
|
||||
fields := []*searchstore.Field{
|
||||
{
|
||||
Name: searchstore.FieldID,
|
||||
Type: searchstore.FieldTypeInt64,
|
||||
IsPrimary: true,
|
||||
},
|
||||
{
|
||||
Name: searchstore.FieldCreatorID,
|
||||
Type: searchstore.FieldTypeInt64,
|
||||
},
|
||||
{
|
||||
Name: fieldNameDocumentID,
|
||||
Type: searchstore.FieldTypeInt64,
|
||||
},
|
||||
}
|
||||
|
||||
if enableCompactTable {
|
||||
fields = append(fields, &searchstore.Field{
|
||||
Name: searchstore.FieldTextContent,
|
||||
Type: searchstore.FieldTypeText,
|
||||
Indexing: true,
|
||||
})
|
||||
} else {
|
||||
for _, col := range doc.TableInfo.Columns {
|
||||
if !col.Indexing {
|
||||
continue
|
||||
}
|
||||
fields = append(fields, &searchstore.Field{
|
||||
Name: getColName(col.ID),
|
||||
Type: searchstore.FieldTypeText,
|
||||
Indexing: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
return fields
|
||||
},
|
||||
knowledge.DocumentTypeImage: func(doc *entity.Document, enableCompactTable bool) []*searchstore.Field {
|
||||
fields := []*searchstore.Field{
|
||||
{
|
||||
Name: searchstore.FieldID,
|
||||
Type: searchstore.FieldTypeInt64,
|
||||
IsPrimary: true,
|
||||
},
|
||||
{
|
||||
Name: searchstore.FieldCreatorID,
|
||||
Type: searchstore.FieldTypeInt64,
|
||||
},
|
||||
{
|
||||
Name: fieldNameDocumentID,
|
||||
Type: searchstore.FieldTypeInt64,
|
||||
},
|
||||
{
|
||||
Name: searchstore.FieldTextContent,
|
||||
Type: searchstore.FieldTypeText,
|
||||
Indexing: true,
|
||||
},
|
||||
}
|
||||
return fields
|
||||
},
|
||||
}
|
||||
|
||||
var s2dMapping = map[knowledge.DocumentType]slice2DocumentFn{
|
||||
knowledge.DocumentTypeText: func(ctx context.Context, slice *entity.Slice, columns []*entity.TableColumn, enableCompactTable bool) (doc *schema.Document, err error) {
|
||||
doc = &schema.Document{
|
||||
ID: strconv.FormatInt(slice.ID, 10),
|
||||
Content: slice.GetSliceContent(),
|
||||
MetaData: map[string]any{
|
||||
document.MetaDataKeyCreatorID: slice.CreatorID,
|
||||
document.MetaDataKeyExternalStorage: map[string]any{
|
||||
fieldNameDocumentID: slice.DocumentID,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return doc, nil
|
||||
},
|
||||
knowledge.DocumentTypeTable: func(ctx context.Context, slice *entity.Slice, columns []*entity.TableColumn, enableCompactTable bool) (doc *schema.Document, err error) {
|
||||
ext := map[string]any{
|
||||
fieldNameDocumentID: slice.DocumentID,
|
||||
}
|
||||
|
||||
doc = &schema.Document{
|
||||
ID: strconv.FormatInt(slice.ID, 10),
|
||||
Content: "",
|
||||
MetaData: map[string]any{
|
||||
document.MetaDataKeyCreatorID: slice.CreatorID,
|
||||
document.MetaDataKeyExternalStorage: ext,
|
||||
},
|
||||
}
|
||||
|
||||
if len(slice.RawContent) == 0 || slice.RawContent[0].Type != knowledgeModel.SliceContentTypeTable || slice.RawContent[0].Table == nil {
|
||||
return nil, fmt.Errorf("[s2dMapping] columns data not provided")
|
||||
}
|
||||
|
||||
fm := make(map[string]any)
|
||||
vals := slice.RawContent[0].Table.Columns
|
||||
colIDMapping := convert.ColumnIDMapping(convert.FilterColumnsRDBID(columns))
|
||||
|
||||
for _, val := range vals {
|
||||
col, found := colIDMapping[val.ColumnID]
|
||||
if !found {
|
||||
return nil, fmt.Errorf("[s2dMapping] column not found, id=%d, name=%s", val.ColumnID, val.ColumnName)
|
||||
}
|
||||
if !col.Indexing {
|
||||
continue
|
||||
}
|
||||
if enableCompactTable {
|
||||
fm[val.ColumnName] = val.GetValue()
|
||||
} else {
|
||||
ext[getColName(col.ID)] = val.GetValue()
|
||||
}
|
||||
}
|
||||
|
||||
if len(fm) > 0 {
|
||||
b, err := json.Marshal(fm)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("[s2dMapping] json marshal failed, %w", err)
|
||||
}
|
||||
doc.Content = string(b)
|
||||
}
|
||||
|
||||
return doc, nil
|
||||
},
|
||||
knowledge.DocumentTypeImage: func(ctx context.Context, slice *entity.Slice, columns []*entity.TableColumn, enableCompactTable bool) (*schema.Document, error) {
|
||||
doc := &schema.Document{
|
||||
ID: strconv.FormatInt(slice.ID, 10),
|
||||
Content: slice.GetSliceContent(),
|
||||
MetaData: map[string]any{
|
||||
document.MetaDataKeyCreatorID: slice.CreatorID,
|
||||
document.MetaDataKeyExternalStorage: map[string]any{
|
||||
fieldNameDocumentID: slice.DocumentID,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return doc, nil
|
||||
},
|
||||
}
|
||||
|
||||
var d2sMapping = map[knowledge.DocumentType]document2SliceFn{
|
||||
knowledge.DocumentTypeText: func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error) {
|
||||
slice := &entity.Slice{
|
||||
Info: knowledge.Info{},
|
||||
KnowledgeID: knowledgeID,
|
||||
DocumentID: documentID,
|
||||
RawContent: nil,
|
||||
}
|
||||
|
||||
if doc.ID != "" {
|
||||
id, err := strconv.ParseInt(doc.ID, 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("[d2sMapping] parse id failed, %w", err)
|
||||
}
|
||||
|
||||
slice.ID = id
|
||||
}
|
||||
|
||||
slice.RawContent = append(slice.RawContent, &knowledgeModel.SliceContent{
|
||||
Type: knowledgeModel.SliceContentTypeText,
|
||||
Text: ptr.Of(doc.Content),
|
||||
})
|
||||
|
||||
if creatorID != 0 {
|
||||
slice.CreatorID = creatorID
|
||||
} else {
|
||||
cid, err := document.GetDocumentCreatorID(doc)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
slice.CreatorID = cid
|
||||
}
|
||||
|
||||
if ext, err := document.GetDocumentExternalStorage(doc); err == nil {
|
||||
if documentID, ok := ext[fieldNameDocumentID].(int64); ok {
|
||||
slice.DocumentID = documentID
|
||||
}
|
||||
}
|
||||
|
||||
return slice, nil
|
||||
},
|
||||
knowledge.DocumentTypeTable: func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error) {
|
||||
// NOTICE: table 类型的原始数据需要去 rdb 里查
|
||||
slice := &entity.Slice{
|
||||
Info: knowledge.Info{},
|
||||
KnowledgeID: knowledgeID,
|
||||
DocumentID: documentID,
|
||||
RawContent: nil,
|
||||
}
|
||||
|
||||
if doc.ID != "" {
|
||||
id, err := strconv.ParseInt(doc.ID, 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("[d2sMapping] parse id failed, %w", err)
|
||||
}
|
||||
slice.ID = id
|
||||
}
|
||||
|
||||
if creatorID != 0 {
|
||||
slice.CreatorID = creatorID
|
||||
} else {
|
||||
cid, err := document.GetDocumentCreatorID(doc)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
slice.CreatorID = cid
|
||||
}
|
||||
|
||||
if ext, err := document.GetDocumentExternalStorage(doc); err == nil {
|
||||
if documentID, ok := ext[fieldNameDocumentID].(int64); ok {
|
||||
slice.DocumentID = documentID
|
||||
}
|
||||
}
|
||||
|
||||
if vals, err := document.GetDocumentColumnData(doc); err == nil {
|
||||
slice.RawContent = append(slice.RawContent, &knowledgeModel.SliceContent{
|
||||
Type: knowledgeModel.SliceContentTypeTable,
|
||||
Table: &knowledgeModel.SliceTable{Columns: vals},
|
||||
})
|
||||
}
|
||||
|
||||
return slice, nil
|
||||
},
|
||||
knowledge.DocumentTypeImage: func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error) {
|
||||
slice := &entity.Slice{
|
||||
Info: knowledge.Info{},
|
||||
KnowledgeID: knowledgeID,
|
||||
DocumentID: documentID,
|
||||
RawContent: nil,
|
||||
}
|
||||
|
||||
if doc.ID != "" {
|
||||
id, err := strconv.ParseInt(doc.ID, 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("[d2sMapping] parse id failed, %w", err)
|
||||
}
|
||||
|
||||
slice.ID = id
|
||||
}
|
||||
|
||||
slice.RawContent = append(slice.RawContent, &knowledgeModel.SliceContent{
|
||||
Type: knowledgeModel.SliceContentTypeText,
|
||||
Text: ptr.Of(doc.Content),
|
||||
})
|
||||
|
||||
if creatorID != 0 {
|
||||
slice.CreatorID = creatorID
|
||||
} else {
|
||||
cid, err := document.GetDocumentCreatorID(doc)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
slice.CreatorID = cid
|
||||
}
|
||||
|
||||
if ext, err := document.GetDocumentExternalStorage(doc); err == nil {
|
||||
if documentID, ok := ext[fieldNameDocumentID].(int64); ok {
|
||||
slice.DocumentID = documentID
|
||||
}
|
||||
}
|
||||
|
||||
return slice, nil
|
||||
},
|
||||
}
|
||||
|
||||
func getCollectionName(knowledgeID int64) string {
|
||||
return fmt.Sprintf("opencoze_%d", knowledgeID)
|
||||
}
|
||||
|
||||
func getIndexingFields(fields []*searchstore.Field) []string {
|
||||
var indexingFields []string
|
||||
for _, field := range fields {
|
||||
if field.Indexing {
|
||||
indexingFields = append(indexingFields, field.Name)
|
||||
}
|
||||
}
|
||||
return indexingFields
|
||||
}
|
||||
|
||||
func getColName(colID int64) string {
|
||||
return fmt.Sprintf("col_%d", colID)
|
||||
}
|
||||
Reference in New Issue
Block a user