350 lines
		
	
	
		
			9.6 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			350 lines
		
	
	
		
			9.6 KiB
		
	
	
	
		
			Go
		
	
	
	
| /*
 | |
|  * Copyright 2025 coze-dev Authors
 | |
|  *
 | |
|  * Licensed under the Apache License, Version 2.0 (the "License");
 | |
|  * you may not use this file except in compliance with the License.
 | |
|  * You may obtain a copy of the License at
 | |
|  *
 | |
|  *     http://www.apache.org/licenses/LICENSE-2.0
 | |
|  *
 | |
|  * Unless required by applicable law or agreed to in writing, software
 | |
|  * distributed under the License is distributed on an "AS IS" BASIS,
 | |
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
|  * See the License for the specific language governing permissions and
 | |
|  * limitations under the License.
 | |
|  */
 | |
| 
 | |
| package service
 | |
| 
 | |
| import (
 | |
| 	"context"
 | |
| 	"encoding/json"
 | |
| 	"fmt"
 | |
| 	"strconv"
 | |
| 
 | |
| 	"github.com/cloudwego/eino/schema"
 | |
| 
 | |
| 	"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
 | |
| 	knowledgeModel "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
 | |
| 	"github.com/coze-dev/coze-studio/backend/domain/knowledge/entity"
 | |
| 	"github.com/coze-dev/coze-studio/backend/domain/knowledge/internal/convert"
 | |
| 	"github.com/coze-dev/coze-studio/backend/infra/contract/document"
 | |
| 	"github.com/coze-dev/coze-studio/backend/infra/contract/document/searchstore"
 | |
| 	"github.com/coze-dev/coze-studio/backend/pkg/lang/ptr"
 | |
| )
 | |
| 
 | |
| const fieldNameDocumentID = "document_id"
 | |
| 
 | |
| type fieldMappingFn func(doc *entity.Document, enableCompactTable bool) []*searchstore.Field
 | |
| 
 | |
| type slice2DocumentFn func(ctx context.Context, slice *entity.Slice, columns []*entity.TableColumn, enableCompactTable bool) (*schema.Document, error)
 | |
| 
 | |
| type document2SliceFn func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error)
 | |
| 
 | |
| var fMapping = map[knowledge.DocumentType]fieldMappingFn{
 | |
| 	knowledge.DocumentTypeText: func(doc *entity.Document, enableCompactTable bool) []*searchstore.Field {
 | |
| 		fields := []*searchstore.Field{
 | |
| 			{
 | |
| 				Name:      searchstore.FieldID,
 | |
| 				Type:      searchstore.FieldTypeInt64,
 | |
| 				IsPrimary: true,
 | |
| 			},
 | |
| 			{
 | |
| 				Name: searchstore.FieldCreatorID,
 | |
| 				Type: searchstore.FieldTypeInt64,
 | |
| 			},
 | |
| 			{
 | |
| 				Name: fieldNameDocumentID,
 | |
| 				Type: searchstore.FieldTypeInt64,
 | |
| 			},
 | |
| 			{
 | |
| 				Name:     searchstore.FieldTextContent,
 | |
| 				Type:     searchstore.FieldTypeText,
 | |
| 				Indexing: true,
 | |
| 			},
 | |
| 		}
 | |
| 		return fields
 | |
| 	},
 | |
| 	knowledge.DocumentTypeTable: func(doc *entity.Document, enableCompactTable bool) []*searchstore.Field {
 | |
| 		fields := []*searchstore.Field{
 | |
| 			{
 | |
| 				Name:      searchstore.FieldID,
 | |
| 				Type:      searchstore.FieldTypeInt64,
 | |
| 				IsPrimary: true,
 | |
| 			},
 | |
| 			{
 | |
| 				Name: searchstore.FieldCreatorID,
 | |
| 				Type: searchstore.FieldTypeInt64,
 | |
| 			},
 | |
| 			{
 | |
| 				Name: fieldNameDocumentID,
 | |
| 				Type: searchstore.FieldTypeInt64,
 | |
| 			},
 | |
| 		}
 | |
| 
 | |
| 		if enableCompactTable {
 | |
| 			fields = append(fields, &searchstore.Field{
 | |
| 				Name:     searchstore.FieldTextContent,
 | |
| 				Type:     searchstore.FieldTypeText,
 | |
| 				Indexing: true,
 | |
| 			})
 | |
| 		} else {
 | |
| 			for _, col := range doc.TableInfo.Columns {
 | |
| 				if !col.Indexing {
 | |
| 					continue
 | |
| 				}
 | |
| 				fields = append(fields, &searchstore.Field{
 | |
| 					Name:     getColName(col.ID),
 | |
| 					Type:     searchstore.FieldTypeText,
 | |
| 					Indexing: true,
 | |
| 				})
 | |
| 			}
 | |
| 		}
 | |
| 		return fields
 | |
| 	},
 | |
| 	knowledge.DocumentTypeImage: func(doc *entity.Document, enableCompactTable bool) []*searchstore.Field {
 | |
| 		fields := []*searchstore.Field{
 | |
| 			{
 | |
| 				Name:      searchstore.FieldID,
 | |
| 				Type:      searchstore.FieldTypeInt64,
 | |
| 				IsPrimary: true,
 | |
| 			},
 | |
| 			{
 | |
| 				Name: searchstore.FieldCreatorID,
 | |
| 				Type: searchstore.FieldTypeInt64,
 | |
| 			},
 | |
| 			{
 | |
| 				Name: fieldNameDocumentID,
 | |
| 				Type: searchstore.FieldTypeInt64,
 | |
| 			},
 | |
| 			{
 | |
| 				Name:     searchstore.FieldTextContent,
 | |
| 				Type:     searchstore.FieldTypeText,
 | |
| 				Indexing: true,
 | |
| 			},
 | |
| 		}
 | |
| 		return fields
 | |
| 	},
 | |
| }
 | |
| 
 | |
| var s2dMapping = map[knowledge.DocumentType]slice2DocumentFn{
 | |
| 	knowledge.DocumentTypeText: func(ctx context.Context, slice *entity.Slice, columns []*entity.TableColumn, enableCompactTable bool) (doc *schema.Document, err error) {
 | |
| 		doc = &schema.Document{
 | |
| 			ID:      strconv.FormatInt(slice.ID, 10),
 | |
| 			Content: slice.GetSliceContent(),
 | |
| 			MetaData: map[string]any{
 | |
| 				document.MetaDataKeyCreatorID: slice.CreatorID,
 | |
| 				document.MetaDataKeyExternalStorage: map[string]any{
 | |
| 					fieldNameDocumentID: slice.DocumentID,
 | |
| 				},
 | |
| 			},
 | |
| 		}
 | |
| 
 | |
| 		return doc, nil
 | |
| 	},
 | |
| 	knowledge.DocumentTypeTable: func(ctx context.Context, slice *entity.Slice, columns []*entity.TableColumn, enableCompactTable bool) (doc *schema.Document, err error) {
 | |
| 		ext := map[string]any{
 | |
| 			fieldNameDocumentID: slice.DocumentID,
 | |
| 		}
 | |
| 
 | |
| 		doc = &schema.Document{
 | |
| 			ID:      strconv.FormatInt(slice.ID, 10),
 | |
| 			Content: "",
 | |
| 			MetaData: map[string]any{
 | |
| 				document.MetaDataKeyCreatorID:       slice.CreatorID,
 | |
| 				document.MetaDataKeyExternalStorage: ext,
 | |
| 			},
 | |
| 		}
 | |
| 
 | |
| 		if len(slice.RawContent) == 0 || slice.RawContent[0].Type != knowledgeModel.SliceContentTypeTable || slice.RawContent[0].Table == nil {
 | |
| 			return nil, fmt.Errorf("[s2dMapping] columns data not provided")
 | |
| 		}
 | |
| 
 | |
| 		fm := make(map[string]any)
 | |
| 		vals := slice.RawContent[0].Table.Columns
 | |
| 		colIDMapping := convert.ColumnIDMapping(convert.FilterColumnsRDBID(columns))
 | |
| 
 | |
| 		for _, val := range vals {
 | |
| 			col, found := colIDMapping[val.ColumnID]
 | |
| 			if !found {
 | |
| 				return nil, fmt.Errorf("[s2dMapping] column not found, id=%d, name=%s", val.ColumnID, val.ColumnName)
 | |
| 			}
 | |
| 			if !col.Indexing {
 | |
| 				continue
 | |
| 			}
 | |
| 			if enableCompactTable {
 | |
| 				fm[val.ColumnName] = val.GetValue()
 | |
| 			} else {
 | |
| 				ext[getColName(col.ID)] = val.GetValue()
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		if len(fm) > 0 {
 | |
| 			b, err := json.Marshal(fm)
 | |
| 			if err != nil {
 | |
| 				return nil, fmt.Errorf("[s2dMapping] json marshal failed, %w", err)
 | |
| 			}
 | |
| 			doc.Content = string(b)
 | |
| 		}
 | |
| 
 | |
| 		return doc, nil
 | |
| 	},
 | |
| 	knowledge.DocumentTypeImage: func(ctx context.Context, slice *entity.Slice, columns []*entity.TableColumn, enableCompactTable bool) (*schema.Document, error) {
 | |
| 		doc := &schema.Document{
 | |
| 			ID:      strconv.FormatInt(slice.ID, 10),
 | |
| 			Content: slice.GetSliceContent(),
 | |
| 			MetaData: map[string]any{
 | |
| 				document.MetaDataKeyCreatorID: slice.CreatorID,
 | |
| 				document.MetaDataKeyExternalStorage: map[string]any{
 | |
| 					fieldNameDocumentID: slice.DocumentID,
 | |
| 				},
 | |
| 			},
 | |
| 		}
 | |
| 
 | |
| 		return doc, nil
 | |
| 	},
 | |
| }
 | |
| 
 | |
| var d2sMapping = map[knowledge.DocumentType]document2SliceFn{
 | |
| 	knowledge.DocumentTypeText: func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error) {
 | |
| 		slice := &entity.Slice{
 | |
| 			Info:        knowledge.Info{},
 | |
| 			KnowledgeID: knowledgeID,
 | |
| 			DocumentID:  documentID,
 | |
| 			RawContent:  nil,
 | |
| 		}
 | |
| 
 | |
| 		if doc.ID != "" {
 | |
| 			id, err := strconv.ParseInt(doc.ID, 10, 64)
 | |
| 			if err != nil {
 | |
| 				return nil, fmt.Errorf("[d2sMapping] parse id failed, %w", err)
 | |
| 			}
 | |
| 
 | |
| 			slice.ID = id
 | |
| 		}
 | |
| 
 | |
| 		slice.RawContent = append(slice.RawContent, &knowledgeModel.SliceContent{
 | |
| 			Type: knowledgeModel.SliceContentTypeText,
 | |
| 			Text: ptr.Of(doc.Content),
 | |
| 		})
 | |
| 
 | |
| 		if creatorID != 0 {
 | |
| 			slice.CreatorID = creatorID
 | |
| 		} else {
 | |
| 			cid, err := document.GetDocumentCreatorID(doc)
 | |
| 			if err != nil {
 | |
| 				return nil, err
 | |
| 			}
 | |
| 			slice.CreatorID = cid
 | |
| 		}
 | |
| 
 | |
| 		if ext, err := document.GetDocumentExternalStorage(doc); err == nil {
 | |
| 			if documentID, ok := ext[fieldNameDocumentID].(int64); ok {
 | |
| 				slice.DocumentID = documentID
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		return slice, nil
 | |
| 	},
 | |
| 	knowledge.DocumentTypeTable: func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error) {
 | |
| 		// NOTICE: table 类型的原始数据需要去 rdb 里查
 | |
| 		slice := &entity.Slice{
 | |
| 			Info:        knowledge.Info{},
 | |
| 			KnowledgeID: knowledgeID,
 | |
| 			DocumentID:  documentID,
 | |
| 			RawContent:  nil,
 | |
| 		}
 | |
| 
 | |
| 		if doc.ID != "" {
 | |
| 			id, err := strconv.ParseInt(doc.ID, 10, 64)
 | |
| 			if err != nil {
 | |
| 				return nil, fmt.Errorf("[d2sMapping] parse id failed, %w", err)
 | |
| 			}
 | |
| 			slice.ID = id
 | |
| 		}
 | |
| 
 | |
| 		if creatorID != 0 {
 | |
| 			slice.CreatorID = creatorID
 | |
| 		} else {
 | |
| 			cid, err := document.GetDocumentCreatorID(doc)
 | |
| 			if err != nil {
 | |
| 				return nil, err
 | |
| 			}
 | |
| 			slice.CreatorID = cid
 | |
| 		}
 | |
| 
 | |
| 		if ext, err := document.GetDocumentExternalStorage(doc); err == nil {
 | |
| 			if documentID, ok := ext[fieldNameDocumentID].(int64); ok {
 | |
| 				slice.DocumentID = documentID
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		if vals, err := document.GetDocumentColumnData(doc); err == nil {
 | |
| 			slice.RawContent = append(slice.RawContent, &knowledgeModel.SliceContent{
 | |
| 				Type:  knowledgeModel.SliceContentTypeTable,
 | |
| 				Table: &knowledgeModel.SliceTable{Columns: vals},
 | |
| 			})
 | |
| 		}
 | |
| 
 | |
| 		return slice, nil
 | |
| 	},
 | |
| 	knowledge.DocumentTypeImage: func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error) {
 | |
| 		slice := &entity.Slice{
 | |
| 			Info:        knowledge.Info{},
 | |
| 			KnowledgeID: knowledgeID,
 | |
| 			DocumentID:  documentID,
 | |
| 			RawContent:  nil,
 | |
| 		}
 | |
| 
 | |
| 		if doc.ID != "" {
 | |
| 			id, err := strconv.ParseInt(doc.ID, 10, 64)
 | |
| 			if err != nil {
 | |
| 				return nil, fmt.Errorf("[d2sMapping] parse id failed, %w", err)
 | |
| 			}
 | |
| 
 | |
| 			slice.ID = id
 | |
| 		}
 | |
| 
 | |
| 		slice.RawContent = append(slice.RawContent, &knowledgeModel.SliceContent{
 | |
| 			Type: knowledgeModel.SliceContentTypeText,
 | |
| 			Text: ptr.Of(doc.Content),
 | |
| 		})
 | |
| 
 | |
| 		if creatorID != 0 {
 | |
| 			slice.CreatorID = creatorID
 | |
| 		} else {
 | |
| 			cid, err := document.GetDocumentCreatorID(doc)
 | |
| 			if err != nil {
 | |
| 				return nil, err
 | |
| 			}
 | |
| 			slice.CreatorID = cid
 | |
| 		}
 | |
| 
 | |
| 		if ext, err := document.GetDocumentExternalStorage(doc); err == nil {
 | |
| 			if documentID, ok := ext[fieldNameDocumentID].(int64); ok {
 | |
| 				slice.DocumentID = documentID
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		return slice, nil
 | |
| 	},
 | |
| }
 | |
| 
 | |
| func getCollectionName(knowledgeID int64) string {
 | |
| 	return fmt.Sprintf("opencoze_%d", knowledgeID)
 | |
| }
 | |
| 
 | |
| func getIndexingFields(fields []*searchstore.Field) []string {
 | |
| 	var indexingFields []string
 | |
| 	for _, field := range fields {
 | |
| 		if field.Indexing {
 | |
| 			indexingFields = append(indexingFields, field.Name)
 | |
| 		}
 | |
| 	}
 | |
| 	return indexingFields
 | |
| }
 | |
| 
 | |
| func getColName(colID int64) string {
 | |
| 	return fmt.Sprintf("col_%d", colID)
 | |
| }
 |