coze-studio/backend/domain/knowledge/service/convert.go

350 lines
9.6 KiB
Go

/*
* Copyright 2025 coze-dev Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package service
import (
"context"
"encoding/json"
"fmt"
"strconv"
"github.com/cloudwego/eino/schema"
"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
knowledgeModel "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
"github.com/coze-dev/coze-studio/backend/domain/knowledge/entity"
"github.com/coze-dev/coze-studio/backend/domain/knowledge/internal/convert"
"github.com/coze-dev/coze-studio/backend/infra/contract/document"
"github.com/coze-dev/coze-studio/backend/infra/contract/document/searchstore"
"github.com/coze-dev/coze-studio/backend/pkg/lang/ptr"
)
const fieldNameDocumentID = "document_id"
type fieldMappingFn func(doc *entity.Document, enableCompactTable bool) []*searchstore.Field
type slice2DocumentFn func(ctx context.Context, slice *entity.Slice, columns []*entity.TableColumn, enableCompactTable bool) (*schema.Document, error)
type document2SliceFn func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error)
var fMapping = map[knowledge.DocumentType]fieldMappingFn{
knowledge.DocumentTypeText: func(doc *entity.Document, enableCompactTable bool) []*searchstore.Field {
fields := []*searchstore.Field{
{
Name: searchstore.FieldID,
Type: searchstore.FieldTypeInt64,
IsPrimary: true,
},
{
Name: searchstore.FieldCreatorID,
Type: searchstore.FieldTypeInt64,
},
{
Name: fieldNameDocumentID,
Type: searchstore.FieldTypeInt64,
},
{
Name: searchstore.FieldTextContent,
Type: searchstore.FieldTypeText,
Indexing: true,
},
}
return fields
},
knowledge.DocumentTypeTable: func(doc *entity.Document, enableCompactTable bool) []*searchstore.Field {
fields := []*searchstore.Field{
{
Name: searchstore.FieldID,
Type: searchstore.FieldTypeInt64,
IsPrimary: true,
},
{
Name: searchstore.FieldCreatorID,
Type: searchstore.FieldTypeInt64,
},
{
Name: fieldNameDocumentID,
Type: searchstore.FieldTypeInt64,
},
}
if enableCompactTable {
fields = append(fields, &searchstore.Field{
Name: searchstore.FieldTextContent,
Type: searchstore.FieldTypeText,
Indexing: true,
})
} else {
for _, col := range doc.TableInfo.Columns {
if !col.Indexing {
continue
}
fields = append(fields, &searchstore.Field{
Name: getColName(col.ID),
Type: searchstore.FieldTypeText,
Indexing: true,
})
}
}
return fields
},
knowledge.DocumentTypeImage: func(doc *entity.Document, enableCompactTable bool) []*searchstore.Field {
fields := []*searchstore.Field{
{
Name: searchstore.FieldID,
Type: searchstore.FieldTypeInt64,
IsPrimary: true,
},
{
Name: searchstore.FieldCreatorID,
Type: searchstore.FieldTypeInt64,
},
{
Name: fieldNameDocumentID,
Type: searchstore.FieldTypeInt64,
},
{
Name: searchstore.FieldTextContent,
Type: searchstore.FieldTypeText,
Indexing: true,
},
}
return fields
},
}
var s2dMapping = map[knowledge.DocumentType]slice2DocumentFn{
knowledge.DocumentTypeText: func(ctx context.Context, slice *entity.Slice, columns []*entity.TableColumn, enableCompactTable bool) (doc *schema.Document, err error) {
doc = &schema.Document{
ID: strconv.FormatInt(slice.ID, 10),
Content: slice.GetSliceContent(),
MetaData: map[string]any{
document.MetaDataKeyCreatorID: slice.CreatorID,
document.MetaDataKeyExternalStorage: map[string]any{
fieldNameDocumentID: slice.DocumentID,
},
},
}
return doc, nil
},
knowledge.DocumentTypeTable: func(ctx context.Context, slice *entity.Slice, columns []*entity.TableColumn, enableCompactTable bool) (doc *schema.Document, err error) {
ext := map[string]any{
fieldNameDocumentID: slice.DocumentID,
}
doc = &schema.Document{
ID: strconv.FormatInt(slice.ID, 10),
Content: "",
MetaData: map[string]any{
document.MetaDataKeyCreatorID: slice.CreatorID,
document.MetaDataKeyExternalStorage: ext,
},
}
if len(slice.RawContent) == 0 || slice.RawContent[0].Type != knowledgeModel.SliceContentTypeTable || slice.RawContent[0].Table == nil {
return nil, fmt.Errorf("[s2dMapping] columns data not provided")
}
fm := make(map[string]any)
vals := slice.RawContent[0].Table.Columns
colIDMapping := convert.ColumnIDMapping(convert.FilterColumnsRDBID(columns))
for _, val := range vals {
col, found := colIDMapping[val.ColumnID]
if !found {
return nil, fmt.Errorf("[s2dMapping] column not found, id=%d, name=%s", val.ColumnID, val.ColumnName)
}
if !col.Indexing {
continue
}
if enableCompactTable {
fm[val.ColumnName] = val.GetValue()
} else {
ext[getColName(col.ID)] = val.GetValue()
}
}
if len(fm) > 0 {
b, err := json.Marshal(fm)
if err != nil {
return nil, fmt.Errorf("[s2dMapping] json marshal failed, %w", err)
}
doc.Content = string(b)
}
return doc, nil
},
knowledge.DocumentTypeImage: func(ctx context.Context, slice *entity.Slice, columns []*entity.TableColumn, enableCompactTable bool) (*schema.Document, error) {
doc := &schema.Document{
ID: strconv.FormatInt(slice.ID, 10),
Content: slice.GetSliceContent(),
MetaData: map[string]any{
document.MetaDataKeyCreatorID: slice.CreatorID,
document.MetaDataKeyExternalStorage: map[string]any{
fieldNameDocumentID: slice.DocumentID,
},
},
}
return doc, nil
},
}
var d2sMapping = map[knowledge.DocumentType]document2SliceFn{
knowledge.DocumentTypeText: func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error) {
slice := &entity.Slice{
Info: knowledge.Info{},
KnowledgeID: knowledgeID,
DocumentID: documentID,
RawContent: nil,
}
if doc.ID != "" {
id, err := strconv.ParseInt(doc.ID, 10, 64)
if err != nil {
return nil, fmt.Errorf("[d2sMapping] parse id failed, %w", err)
}
slice.ID = id
}
slice.RawContent = append(slice.RawContent, &knowledgeModel.SliceContent{
Type: knowledgeModel.SliceContentTypeText,
Text: ptr.Of(doc.Content),
})
if creatorID != 0 {
slice.CreatorID = creatorID
} else {
cid, err := document.GetDocumentCreatorID(doc)
if err != nil {
return nil, err
}
slice.CreatorID = cid
}
if ext, err := document.GetDocumentExternalStorage(doc); err == nil {
if documentID, ok := ext[fieldNameDocumentID].(int64); ok {
slice.DocumentID = documentID
}
}
return slice, nil
},
knowledge.DocumentTypeTable: func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error) {
// NOTICE: The original data source of table type needs to be checked in rdb
slice := &entity.Slice{
Info: knowledge.Info{},
KnowledgeID: knowledgeID,
DocumentID: documentID,
RawContent: nil,
}
if doc.ID != "" {
id, err := strconv.ParseInt(doc.ID, 10, 64)
if err != nil {
return nil, fmt.Errorf("[d2sMapping] parse id failed, %w", err)
}
slice.ID = id
}
if creatorID != 0 {
slice.CreatorID = creatorID
} else {
cid, err := document.GetDocumentCreatorID(doc)
if err != nil {
return nil, err
}
slice.CreatorID = cid
}
if ext, err := document.GetDocumentExternalStorage(doc); err == nil {
if documentID, ok := ext[fieldNameDocumentID].(int64); ok {
slice.DocumentID = documentID
}
}
if vals, err := document.GetDocumentColumnData(doc); err == nil {
slice.RawContent = append(slice.RawContent, &knowledgeModel.SliceContent{
Type: knowledgeModel.SliceContentTypeTable,
Table: &knowledgeModel.SliceTable{Columns: vals},
})
}
return slice, nil
},
knowledge.DocumentTypeImage: func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error) {
slice := &entity.Slice{
Info: knowledge.Info{},
KnowledgeID: knowledgeID,
DocumentID: documentID,
RawContent: nil,
}
if doc.ID != "" {
id, err := strconv.ParseInt(doc.ID, 10, 64)
if err != nil {
return nil, fmt.Errorf("[d2sMapping] parse id failed, %w", err)
}
slice.ID = id
}
slice.RawContent = append(slice.RawContent, &knowledgeModel.SliceContent{
Type: knowledgeModel.SliceContentTypeText,
Text: ptr.Of(doc.Content),
})
if creatorID != 0 {
slice.CreatorID = creatorID
} else {
cid, err := document.GetDocumentCreatorID(doc)
if err != nil {
return nil, err
}
slice.CreatorID = cid
}
if ext, err := document.GetDocumentExternalStorage(doc); err == nil {
if documentID, ok := ext[fieldNameDocumentID].(int64); ok {
slice.DocumentID = documentID
}
}
return slice, nil
},
}
func getCollectionName(knowledgeID int64) string {
return fmt.Sprintf("opencoze_%d", knowledgeID)
}
func getIndexingFields(fields []*searchstore.Field) []string {
var indexingFields []string
for _, field := range fields {
if field.Indexing {
indexingFields = append(indexingFields, field.Name)
}
}
return indexingFields
}
func getColName(colID int64) string {
return fmt.Sprintf("col_%d", colID)
}