350 lines
		
	
	
		
			9.6 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			350 lines
		
	
	
		
			9.6 KiB
		
	
	
	
		
			Go
		
	
	
	
/*
 | 
						|
 * Copyright 2025 coze-dev Authors
 | 
						|
 *
 | 
						|
 * Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
 * you may not use this file except in compliance with the License.
 | 
						|
 * You may obtain a copy of the License at
 | 
						|
 *
 | 
						|
 *     http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 *
 | 
						|
 * Unless required by applicable law or agreed to in writing, software
 | 
						|
 * distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
 * See the License for the specific language governing permissions and
 | 
						|
 * limitations under the License.
 | 
						|
 */
 | 
						|
 | 
						|
package service
 | 
						|
 | 
						|
import (
 | 
						|
	"context"
 | 
						|
	"encoding/json"
 | 
						|
	"fmt"
 | 
						|
	"strconv"
 | 
						|
 | 
						|
	"github.com/cloudwego/eino/schema"
 | 
						|
 | 
						|
	"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
 | 
						|
	knowledgeModel "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
 | 
						|
	"github.com/coze-dev/coze-studio/backend/domain/knowledge/entity"
 | 
						|
	"github.com/coze-dev/coze-studio/backend/domain/knowledge/internal/convert"
 | 
						|
	"github.com/coze-dev/coze-studio/backend/infra/contract/document"
 | 
						|
	"github.com/coze-dev/coze-studio/backend/infra/contract/document/searchstore"
 | 
						|
	"github.com/coze-dev/coze-studio/backend/pkg/lang/ptr"
 | 
						|
)
 | 
						|
 | 
						|
const fieldNameDocumentID = "document_id"
 | 
						|
 | 
						|
type fieldMappingFn func(doc *entity.Document, enableCompactTable bool) []*searchstore.Field
 | 
						|
 | 
						|
type slice2DocumentFn func(ctx context.Context, slice *entity.Slice, columns []*entity.TableColumn, enableCompactTable bool) (*schema.Document, error)
 | 
						|
 | 
						|
type document2SliceFn func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error)
 | 
						|
 | 
						|
var fMapping = map[knowledge.DocumentType]fieldMappingFn{
 | 
						|
	knowledge.DocumentTypeText: func(doc *entity.Document, enableCompactTable bool) []*searchstore.Field {
 | 
						|
		fields := []*searchstore.Field{
 | 
						|
			{
 | 
						|
				Name:      searchstore.FieldID,
 | 
						|
				Type:      searchstore.FieldTypeInt64,
 | 
						|
				IsPrimary: true,
 | 
						|
			},
 | 
						|
			{
 | 
						|
				Name: searchstore.FieldCreatorID,
 | 
						|
				Type: searchstore.FieldTypeInt64,
 | 
						|
			},
 | 
						|
			{
 | 
						|
				Name: fieldNameDocumentID,
 | 
						|
				Type: searchstore.FieldTypeInt64,
 | 
						|
			},
 | 
						|
			{
 | 
						|
				Name:     searchstore.FieldTextContent,
 | 
						|
				Type:     searchstore.FieldTypeText,
 | 
						|
				Indexing: true,
 | 
						|
			},
 | 
						|
		}
 | 
						|
		return fields
 | 
						|
	},
 | 
						|
	knowledge.DocumentTypeTable: func(doc *entity.Document, enableCompactTable bool) []*searchstore.Field {
 | 
						|
		fields := []*searchstore.Field{
 | 
						|
			{
 | 
						|
				Name:      searchstore.FieldID,
 | 
						|
				Type:      searchstore.FieldTypeInt64,
 | 
						|
				IsPrimary: true,
 | 
						|
			},
 | 
						|
			{
 | 
						|
				Name: searchstore.FieldCreatorID,
 | 
						|
				Type: searchstore.FieldTypeInt64,
 | 
						|
			},
 | 
						|
			{
 | 
						|
				Name: fieldNameDocumentID,
 | 
						|
				Type: searchstore.FieldTypeInt64,
 | 
						|
			},
 | 
						|
		}
 | 
						|
 | 
						|
		if enableCompactTable {
 | 
						|
			fields = append(fields, &searchstore.Field{
 | 
						|
				Name:     searchstore.FieldTextContent,
 | 
						|
				Type:     searchstore.FieldTypeText,
 | 
						|
				Indexing: true,
 | 
						|
			})
 | 
						|
		} else {
 | 
						|
			for _, col := range doc.TableInfo.Columns {
 | 
						|
				if !col.Indexing {
 | 
						|
					continue
 | 
						|
				}
 | 
						|
				fields = append(fields, &searchstore.Field{
 | 
						|
					Name:     getColName(col.ID),
 | 
						|
					Type:     searchstore.FieldTypeText,
 | 
						|
					Indexing: true,
 | 
						|
				})
 | 
						|
			}
 | 
						|
		}
 | 
						|
		return fields
 | 
						|
	},
 | 
						|
	knowledge.DocumentTypeImage: func(doc *entity.Document, enableCompactTable bool) []*searchstore.Field {
 | 
						|
		fields := []*searchstore.Field{
 | 
						|
			{
 | 
						|
				Name:      searchstore.FieldID,
 | 
						|
				Type:      searchstore.FieldTypeInt64,
 | 
						|
				IsPrimary: true,
 | 
						|
			},
 | 
						|
			{
 | 
						|
				Name: searchstore.FieldCreatorID,
 | 
						|
				Type: searchstore.FieldTypeInt64,
 | 
						|
			},
 | 
						|
			{
 | 
						|
				Name: fieldNameDocumentID,
 | 
						|
				Type: searchstore.FieldTypeInt64,
 | 
						|
			},
 | 
						|
			{
 | 
						|
				Name:     searchstore.FieldTextContent,
 | 
						|
				Type:     searchstore.FieldTypeText,
 | 
						|
				Indexing: true,
 | 
						|
			},
 | 
						|
		}
 | 
						|
		return fields
 | 
						|
	},
 | 
						|
}
 | 
						|
 | 
						|
var s2dMapping = map[knowledge.DocumentType]slice2DocumentFn{
 | 
						|
	knowledge.DocumentTypeText: func(ctx context.Context, slice *entity.Slice, columns []*entity.TableColumn, enableCompactTable bool) (doc *schema.Document, err error) {
 | 
						|
		doc = &schema.Document{
 | 
						|
			ID:      strconv.FormatInt(slice.ID, 10),
 | 
						|
			Content: slice.GetSliceContent(),
 | 
						|
			MetaData: map[string]any{
 | 
						|
				document.MetaDataKeyCreatorID: slice.CreatorID,
 | 
						|
				document.MetaDataKeyExternalStorage: map[string]any{
 | 
						|
					fieldNameDocumentID: slice.DocumentID,
 | 
						|
				},
 | 
						|
			},
 | 
						|
		}
 | 
						|
 | 
						|
		return doc, nil
 | 
						|
	},
 | 
						|
	knowledge.DocumentTypeTable: func(ctx context.Context, slice *entity.Slice, columns []*entity.TableColumn, enableCompactTable bool) (doc *schema.Document, err error) {
 | 
						|
		ext := map[string]any{
 | 
						|
			fieldNameDocumentID: slice.DocumentID,
 | 
						|
		}
 | 
						|
 | 
						|
		doc = &schema.Document{
 | 
						|
			ID:      strconv.FormatInt(slice.ID, 10),
 | 
						|
			Content: "",
 | 
						|
			MetaData: map[string]any{
 | 
						|
				document.MetaDataKeyCreatorID:       slice.CreatorID,
 | 
						|
				document.MetaDataKeyExternalStorage: ext,
 | 
						|
			},
 | 
						|
		}
 | 
						|
 | 
						|
		if len(slice.RawContent) == 0 || slice.RawContent[0].Type != knowledgeModel.SliceContentTypeTable || slice.RawContent[0].Table == nil {
 | 
						|
			return nil, fmt.Errorf("[s2dMapping] columns data not provided")
 | 
						|
		}
 | 
						|
 | 
						|
		fm := make(map[string]any)
 | 
						|
		vals := slice.RawContent[0].Table.Columns
 | 
						|
		colIDMapping := convert.ColumnIDMapping(convert.FilterColumnsRDBID(columns))
 | 
						|
 | 
						|
		for _, val := range vals {
 | 
						|
			col, found := colIDMapping[val.ColumnID]
 | 
						|
			if !found {
 | 
						|
				return nil, fmt.Errorf("[s2dMapping] column not found, id=%d, name=%s", val.ColumnID, val.ColumnName)
 | 
						|
			}
 | 
						|
			if !col.Indexing {
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			if enableCompactTable {
 | 
						|
				fm[val.ColumnName] = val.GetValue()
 | 
						|
			} else {
 | 
						|
				ext[getColName(col.ID)] = val.GetValue()
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		if len(fm) > 0 {
 | 
						|
			b, err := json.Marshal(fm)
 | 
						|
			if err != nil {
 | 
						|
				return nil, fmt.Errorf("[s2dMapping] json marshal failed, %w", err)
 | 
						|
			}
 | 
						|
			doc.Content = string(b)
 | 
						|
		}
 | 
						|
 | 
						|
		return doc, nil
 | 
						|
	},
 | 
						|
	knowledge.DocumentTypeImage: func(ctx context.Context, slice *entity.Slice, columns []*entity.TableColumn, enableCompactTable bool) (*schema.Document, error) {
 | 
						|
		doc := &schema.Document{
 | 
						|
			ID:      strconv.FormatInt(slice.ID, 10),
 | 
						|
			Content: slice.GetSliceContent(),
 | 
						|
			MetaData: map[string]any{
 | 
						|
				document.MetaDataKeyCreatorID: slice.CreatorID,
 | 
						|
				document.MetaDataKeyExternalStorage: map[string]any{
 | 
						|
					fieldNameDocumentID: slice.DocumentID,
 | 
						|
				},
 | 
						|
			},
 | 
						|
		}
 | 
						|
 | 
						|
		return doc, nil
 | 
						|
	},
 | 
						|
}
 | 
						|
 | 
						|
var d2sMapping = map[knowledge.DocumentType]document2SliceFn{
 | 
						|
	knowledge.DocumentTypeText: func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error) {
 | 
						|
		slice := &entity.Slice{
 | 
						|
			Info:        knowledge.Info{},
 | 
						|
			KnowledgeID: knowledgeID,
 | 
						|
			DocumentID:  documentID,
 | 
						|
			RawContent:  nil,
 | 
						|
		}
 | 
						|
 | 
						|
		if doc.ID != "" {
 | 
						|
			id, err := strconv.ParseInt(doc.ID, 10, 64)
 | 
						|
			if err != nil {
 | 
						|
				return nil, fmt.Errorf("[d2sMapping] parse id failed, %w", err)
 | 
						|
			}
 | 
						|
 | 
						|
			slice.ID = id
 | 
						|
		}
 | 
						|
 | 
						|
		slice.RawContent = append(slice.RawContent, &knowledgeModel.SliceContent{
 | 
						|
			Type: knowledgeModel.SliceContentTypeText,
 | 
						|
			Text: ptr.Of(doc.Content),
 | 
						|
		})
 | 
						|
 | 
						|
		if creatorID != 0 {
 | 
						|
			slice.CreatorID = creatorID
 | 
						|
		} else {
 | 
						|
			cid, err := document.GetDocumentCreatorID(doc)
 | 
						|
			if err != nil {
 | 
						|
				return nil, err
 | 
						|
			}
 | 
						|
			slice.CreatorID = cid
 | 
						|
		}
 | 
						|
 | 
						|
		if ext, err := document.GetDocumentExternalStorage(doc); err == nil {
 | 
						|
			if documentID, ok := ext[fieldNameDocumentID].(int64); ok {
 | 
						|
				slice.DocumentID = documentID
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		return slice, nil
 | 
						|
	},
 | 
						|
	knowledge.DocumentTypeTable: func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error) {
 | 
						|
		// NOTICE: table 类型的原始数据需要去 rdb 里查
 | 
						|
		slice := &entity.Slice{
 | 
						|
			Info:        knowledge.Info{},
 | 
						|
			KnowledgeID: knowledgeID,
 | 
						|
			DocumentID:  documentID,
 | 
						|
			RawContent:  nil,
 | 
						|
		}
 | 
						|
 | 
						|
		if doc.ID != "" {
 | 
						|
			id, err := strconv.ParseInt(doc.ID, 10, 64)
 | 
						|
			if err != nil {
 | 
						|
				return nil, fmt.Errorf("[d2sMapping] parse id failed, %w", err)
 | 
						|
			}
 | 
						|
			slice.ID = id
 | 
						|
		}
 | 
						|
 | 
						|
		if creatorID != 0 {
 | 
						|
			slice.CreatorID = creatorID
 | 
						|
		} else {
 | 
						|
			cid, err := document.GetDocumentCreatorID(doc)
 | 
						|
			if err != nil {
 | 
						|
				return nil, err
 | 
						|
			}
 | 
						|
			slice.CreatorID = cid
 | 
						|
		}
 | 
						|
 | 
						|
		if ext, err := document.GetDocumentExternalStorage(doc); err == nil {
 | 
						|
			if documentID, ok := ext[fieldNameDocumentID].(int64); ok {
 | 
						|
				slice.DocumentID = documentID
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		if vals, err := document.GetDocumentColumnData(doc); err == nil {
 | 
						|
			slice.RawContent = append(slice.RawContent, &knowledgeModel.SliceContent{
 | 
						|
				Type:  knowledgeModel.SliceContentTypeTable,
 | 
						|
				Table: &knowledgeModel.SliceTable{Columns: vals},
 | 
						|
			})
 | 
						|
		}
 | 
						|
 | 
						|
		return slice, nil
 | 
						|
	},
 | 
						|
	knowledge.DocumentTypeImage: func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error) {
 | 
						|
		slice := &entity.Slice{
 | 
						|
			Info:        knowledge.Info{},
 | 
						|
			KnowledgeID: knowledgeID,
 | 
						|
			DocumentID:  documentID,
 | 
						|
			RawContent:  nil,
 | 
						|
		}
 | 
						|
 | 
						|
		if doc.ID != "" {
 | 
						|
			id, err := strconv.ParseInt(doc.ID, 10, 64)
 | 
						|
			if err != nil {
 | 
						|
				return nil, fmt.Errorf("[d2sMapping] parse id failed, %w", err)
 | 
						|
			}
 | 
						|
 | 
						|
			slice.ID = id
 | 
						|
		}
 | 
						|
 | 
						|
		slice.RawContent = append(slice.RawContent, &knowledgeModel.SliceContent{
 | 
						|
			Type: knowledgeModel.SliceContentTypeText,
 | 
						|
			Text: ptr.Of(doc.Content),
 | 
						|
		})
 | 
						|
 | 
						|
		if creatorID != 0 {
 | 
						|
			slice.CreatorID = creatorID
 | 
						|
		} else {
 | 
						|
			cid, err := document.GetDocumentCreatorID(doc)
 | 
						|
			if err != nil {
 | 
						|
				return nil, err
 | 
						|
			}
 | 
						|
			slice.CreatorID = cid
 | 
						|
		}
 | 
						|
 | 
						|
		if ext, err := document.GetDocumentExternalStorage(doc); err == nil {
 | 
						|
			if documentID, ok := ext[fieldNameDocumentID].(int64); ok {
 | 
						|
				slice.DocumentID = documentID
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		return slice, nil
 | 
						|
	},
 | 
						|
}
 | 
						|
 | 
						|
func getCollectionName(knowledgeID int64) string {
 | 
						|
	return fmt.Sprintf("opencoze_%d", knowledgeID)
 | 
						|
}
 | 
						|
 | 
						|
func getIndexingFields(fields []*searchstore.Field) []string {
 | 
						|
	var indexingFields []string
 | 
						|
	for _, field := range fields {
 | 
						|
		if field.Indexing {
 | 
						|
			indexingFields = append(indexingFields, field.Name)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return indexingFields
 | 
						|
}
 | 
						|
 | 
						|
func getColName(colID int64) string {
 | 
						|
	return fmt.Sprintf("col_%d", colID)
 | 
						|
}
 |