1513 lines
		
	
	
		
			55 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			1513 lines
		
	
	
		
			55 KiB
		
	
	
	
		
			Go
		
	
	
	
| /*
 | ||
|  * Copyright 2025 coze-dev Authors
 | ||
|  *
 | ||
|  * Licensed under the Apache License, Version 2.0 (the "License");
 | ||
|  * you may not use this file except in compliance with the License.
 | ||
|  * You may obtain a copy of the License at
 | ||
|  *
 | ||
|  *     http://www.apache.org/licenses/LICENSE-2.0
 | ||
|  *
 | ||
|  * Unless required by applicable law or agreed to in writing, software
 | ||
|  * distributed under the License is distributed on an "AS IS" BASIS,
 | ||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | ||
|  * See the License for the specific language governing permissions and
 | ||
|  * limitations under the License.
 | ||
|  */
 | ||
| 
 | ||
| package service
 | ||
| 
 | ||
| import (
 | ||
| 	"bytes"
 | ||
| 	"context"
 | ||
| 	"crypto/sha256"
 | ||
| 	"encoding/base64"
 | ||
| 	"errors"
 | ||
| 	"fmt"
 | ||
| 	"io"
 | ||
| 	"math/rand"
 | ||
| 	"net/http"
 | ||
| 	"strconv"
 | ||
| 	"strings"
 | ||
| 	"time"
 | ||
| 	"unicode/utf8"
 | ||
| 
 | ||
| 	"github.com/bytedance/sonic"
 | ||
| 	redisV9 "github.com/redis/go-redis/v9"
 | ||
| 	"gorm.io/gorm"
 | ||
| 
 | ||
| 	"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
 | ||
| 	knowledgeModel "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/api/model/ocean/cloud/developer_api"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/application/base/ctxutil"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/domain/knowledge/repository"
 | ||
| 
 | ||
| 	"github.com/coze-dev/coze-studio/backend/domain/knowledge/entity"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/domain/knowledge/internal/consts"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/domain/knowledge/internal/convert"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/domain/knowledge/internal/dal/model"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/domain/knowledge/internal/events"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/domain/knowledge/processor/impl"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/infra/contract/cache"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/infra/contract/chatmodel"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/infra/contract/document/nl2sql"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/infra/contract/document/ocr"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/infra/contract/document/parser"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/infra/contract/document/rerank"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/infra/contract/document/searchstore"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/infra/contract/eventbus"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/infra/contract/idgen"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/infra/contract/messages2query"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/infra/contract/rdb"
 | ||
| 	rdbEntity "github.com/coze-dev/coze-studio/backend/infra/contract/rdb/entity"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/infra/contract/storage"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/infra/impl/document/parser/builtin"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/infra/impl/document/progressbar"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/infra/impl/document/rerank/rrf"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/pkg/errorx"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/pkg/lang/ptr"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/pkg/lang/slices"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/pkg/logs"
 | ||
| 	"github.com/coze-dev/coze-studio/backend/types/errno"
 | ||
| )
 | ||
| 
 | ||
| func NewKnowledgeSVC(config *KnowledgeSVCConfig) (Knowledge, eventbus.ConsumerHandler) {
 | ||
| 	svc := &knowledgeSVC{
 | ||
| 		knowledgeRepo:             repository.NewKnowledgeDAO(config.DB),
 | ||
| 		documentRepo:              repository.NewKnowledgeDocumentDAO(config.DB),
 | ||
| 		sliceRepo:                 repository.NewKnowledgeDocumentSliceDAO(config.DB),
 | ||
| 		reviewRepo:                repository.NewKnowledgeDocumentReviewDAO(config.DB),
 | ||
| 		idgen:                     config.IDGen,
 | ||
| 		rdb:                       config.RDB,
 | ||
| 		producer:                  config.Producer,
 | ||
| 		searchStoreManagers:       config.SearchStoreManagers,
 | ||
| 		parseManager:              config.ParseManager,
 | ||
| 		storage:                   config.Storage,
 | ||
| 		reranker:                  config.Reranker,
 | ||
| 		rewriter:                  config.Rewriter,
 | ||
| 		nl2Sql:                    config.NL2Sql,
 | ||
| 		enableCompactTable:        ptr.FromOrDefault(config.EnableCompactTable, true),
 | ||
| 		cacheCli:                  config.CacheCli,
 | ||
| 		isAutoAnnotationSupported: config.IsAutoAnnotationSupported,
 | ||
| 		modelFactory:              config.ModelFactory,
 | ||
| 	}
 | ||
| 	if svc.reranker == nil {
 | ||
| 		svc.reranker = rrf.NewRRFReranker(0)
 | ||
| 	}
 | ||
| 	if svc.parseManager == nil {
 | ||
| 		svc.parseManager = builtin.NewManager(config.Storage, config.OCR, nil)
 | ||
| 	}
 | ||
| 
 | ||
| 	return svc, svc
 | ||
| }
 | ||
| 
 | ||
| type KnowledgeSVCConfig struct {
 | ||
| 	DB                        *gorm.DB                       // required
 | ||
| 	IDGen                     idgen.IDGenerator              // required
 | ||
| 	RDB                       rdb.RDB                        // required: 表格存储
 | ||
| 	Producer                  eventbus.Producer              // required: 文档 indexing 过程走 mq 异步处理
 | ||
| 	SearchStoreManagers       []searchstore.Manager          // required: 向量 / 全文
 | ||
| 	ParseManager              parser.Manager                 // optional: 文档切分与处理能力, default builtin parser
 | ||
| 	Storage                   storage.Storage                // required: oss
 | ||
| 	ModelFactory              chatmodel.Factory              // required: 模型 factory
 | ||
| 	Rewriter                  messages2query.MessagesToQuery // optional: 未配置时不改写
 | ||
| 	Reranker                  rerank.Reranker                // optional: 未配置时默认 rrf
 | ||
| 	NL2Sql                    nl2sql.NL2SQL                  // optional: 未配置时默认不支持
 | ||
| 	EnableCompactTable        *bool                          // optional: 表格数据压缩,默认 true
 | ||
| 	OCR                       ocr.OCR                        // optional: ocr, 未提供时 ocr 功能不可用
 | ||
| 	CacheCli                  cache.Cmdable                  // optional: 缓存实现
 | ||
| 	IsAutoAnnotationSupported bool                           // 是否支持了图片自动标注
 | ||
| }
 | ||
| 
 | ||
| type knowledgeSVC struct {
 | ||
| 	knowledgeRepo repository.KnowledgeRepo
 | ||
| 	documentRepo  repository.KnowledgeDocumentRepo
 | ||
| 	sliceRepo     repository.KnowledgeDocumentSliceRepo
 | ||
| 	reviewRepo    repository.KnowledgeDocumentReviewRepo
 | ||
| 	modelFactory  chatmodel.Factory
 | ||
| 
 | ||
| 	idgen                     idgen.IDGenerator
 | ||
| 	rdb                       rdb.RDB
 | ||
| 	producer                  eventbus.Producer
 | ||
| 	searchStoreManagers       []searchstore.Manager
 | ||
| 	parseManager              parser.Manager
 | ||
| 	rewriter                  messages2query.MessagesToQuery
 | ||
| 	reranker                  rerank.Reranker
 | ||
| 	storage                   storage.Storage
 | ||
| 	nl2Sql                    nl2sql.NL2SQL
 | ||
| 	cacheCli                  cache.Cmdable
 | ||
| 	enableCompactTable        bool // 表格数据压缩
 | ||
| 	isAutoAnnotationSupported bool // 是否支持了图片自动标注
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) CreateKnowledge(ctx context.Context, request *CreateKnowledgeRequest) (response *CreateKnowledgeResponse, err error) {
 | ||
| 	now := time.Now().UnixMilli()
 | ||
| 	if len(request.Name) == 0 {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "knowledge name is empty"))
 | ||
| 	}
 | ||
| 	if request.CreatorID == 0 {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "knowledge creator id is empty"))
 | ||
| 	}
 | ||
| 	if request.SpaceID == 0 {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "knowledge space id is empty"))
 | ||
| 	}
 | ||
| 	id, err := k.idgen.GenID(ctx)
 | ||
| 	if err != nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeIDGenCode)
 | ||
| 	}
 | ||
| 
 | ||
| 	if err = k.knowledgeRepo.Create(ctx, &model.Knowledge{
 | ||
| 		ID:          id,
 | ||
| 		Name:        request.Name,
 | ||
| 		CreatorID:   request.CreatorID,
 | ||
| 		AppID:       request.AppID,
 | ||
| 		SpaceID:     request.SpaceID,
 | ||
| 		CreatedAt:   now,
 | ||
| 		UpdatedAt:   now,
 | ||
| 		Status:      int32(knowledgeModel.KnowledgeStatusEnable), // 目前向量库的初始化由文档触发,知识库无 init 过程
 | ||
| 		Description: request.Description,
 | ||
| 		IconURI:     request.IconUri,
 | ||
| 		FormatType:  int32(request.FormatType),
 | ||
| 	}); err != nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 
 | ||
| 	return &CreateKnowledgeResponse{
 | ||
| 		KnowledgeID: id,
 | ||
| 		CreatedAtMs: now,
 | ||
| 	}, nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) UpdateKnowledge(ctx context.Context, request *UpdateKnowledgeRequest) error {
 | ||
| 	if request.KnowledgeID == 0 {
 | ||
| 		return errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "knowledge id is empty"))
 | ||
| 	}
 | ||
| 	if request.Name != nil && len(*request.Name) == 0 {
 | ||
| 		return errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "knowledge name is empty"))
 | ||
| 	}
 | ||
| 	knModel, err := k.knowledgeRepo.GetByID(ctx, request.KnowledgeID)
 | ||
| 	if err != nil {
 | ||
| 		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if knModel == nil {
 | ||
| 		return errorx.New(errno.ErrKnowledgeNotExistCode, errorx.KV("msg", "knowledge not found"))
 | ||
| 	}
 | ||
| 	now := time.Now().UnixMilli()
 | ||
| 	if request.Status != nil {
 | ||
| 		knModel.Status = int32(*request.Status)
 | ||
| 	}
 | ||
| 	if request.Name != nil {
 | ||
| 		knModel.Name = *request.Name
 | ||
| 	}
 | ||
| 	if request.IconUri != nil {
 | ||
| 		knModel.IconURI = *request.IconUri
 | ||
| 	}
 | ||
| 	if request.Description != nil {
 | ||
| 		knModel.Description = *request.Description
 | ||
| 	}
 | ||
| 	knModel.UpdatedAt = now
 | ||
| 	if err := k.knowledgeRepo.Update(ctx, knModel); err != nil {
 | ||
| 		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	knowledge, err := k.fromModelKnowledge(ctx, knModel)
 | ||
| 	if err != nil {
 | ||
| 		return err
 | ||
| 	}
 | ||
| 	knowledge.UpdatedAtMs = now
 | ||
| 	return err
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) DeleteKnowledge(ctx context.Context, request *DeleteKnowledgeRequest) error {
 | ||
| 	// 先获取一下knowledge的信息
 | ||
| 	knModel, err := k.knowledgeRepo.GetByID(ctx, request.KnowledgeID)
 | ||
| 	if err != nil {
 | ||
| 		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if knModel == nil || knModel.ID == 0 {
 | ||
| 		return errorx.New(errno.ErrKnowledgeNotExistCode, errorx.KV("msg", "knowledge not found"))
 | ||
| 	}
 | ||
| 	docs, _, err := k.documentRepo.FindDocumentByCondition(ctx, &entity.WhereDocumentOpt{
 | ||
| 		KnowledgeIDs: []int64{request.KnowledgeID},
 | ||
| 		SelectAll:    true,
 | ||
| 	})
 | ||
| 	if err != nil {
 | ||
| 		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if knModel.FormatType == int32(knowledgeModel.DocumentTypeTable) {
 | ||
| 		for _, doc := range docs {
 | ||
| 			if doc == nil {
 | ||
| 				continue
 | ||
| 			}
 | ||
| 			if doc.TableInfo != nil {
 | ||
| 				resp, err := k.rdb.DropTable(ctx, &rdb.DropTableRequest{
 | ||
| 					TableName: doc.TableInfo.PhysicalTableName,
 | ||
| 					IfExists:  true,
 | ||
| 				})
 | ||
| 				if err != nil {
 | ||
| 					logs.CtxWarnf(ctx, "[DeleteKnowledge] drop table failed, err %v", err)
 | ||
| 				}
 | ||
| 				if !resp.Success {
 | ||
| 					logs.CtxWarnf(ctx, "[DeleteKnowledge] drop table failed")
 | ||
| 				}
 | ||
| 			}
 | ||
| 		}
 | ||
| 	}
 | ||
| 	collectionName := getCollectionName(request.KnowledgeID)
 | ||
| 	for _, mgr := range k.searchStoreManagers {
 | ||
| 		if err = mgr.Drop(ctx, &searchstore.DropRequest{CollectionName: collectionName}); err != nil {
 | ||
| 			return errorx.New(errno.ErrKnowledgeSearchStoreCode, errorx.KV("msg", err.Error()))
 | ||
| 		}
 | ||
| 	}
 | ||
| 
 | ||
| 	err = k.knowledgeRepo.Delete(ctx, request.KnowledgeID)
 | ||
| 	if err != nil {
 | ||
| 		return err
 | ||
| 	}
 | ||
| 
 | ||
| 	if err = k.documentRepo.DeleteDocuments(ctx, slices.Transform(docs, func(a *model.KnowledgeDocument) int64 {
 | ||
| 		return a.ID
 | ||
| 	})); err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "[DeleteKnowledge] delete documents failed, err %v", err)
 | ||
| 		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 
 | ||
| 	return nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) ListKnowledge(ctx context.Context, request *ListKnowledgeRequest) (response *ListKnowledgeResponse, err error) {
 | ||
| 	if len(request.IDs) == 0 && request.AppID == nil && request.SpaceID == nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "knowledge ids, project id, space id and query can not be all empty"))
 | ||
| 	}
 | ||
| 	opts := &entity.WhereKnowledgeOption{
 | ||
| 		KnowledgeIDs: request.IDs,
 | ||
| 		AppID:        request.AppID,
 | ||
| 		SpaceID:      request.SpaceID,
 | ||
| 		Name:         request.Name,
 | ||
| 		Status:       request.Status,
 | ||
| 		UserID:       request.UserID,
 | ||
| 		Query:        request.Query,
 | ||
| 		Page:         request.Page,
 | ||
| 		PageSize:     request.PageSize,
 | ||
| 		Order:        convertOrder(request.Order),
 | ||
| 		OrderType:    convertOrderType(request.OrderType),
 | ||
| 	}
 | ||
| 	if request.FormatType != nil {
 | ||
| 		opts.FormatType = ptr.Of(int64(*request.FormatType))
 | ||
| 	}
 | ||
| 	pos, total, err := k.knowledgeRepo.FindKnowledgeByCondition(ctx, opts)
 | ||
| 	if err != nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	knList := make([]*knowledgeModel.Knowledge, len(pos))
 | ||
| 	for i := range pos {
 | ||
| 		if pos[i] == nil {
 | ||
| 			continue
 | ||
| 		}
 | ||
| 		knList[i], err = k.fromModelKnowledge(ctx, pos[i])
 | ||
| 		if err != nil {
 | ||
| 			return nil, err
 | ||
| 		}
 | ||
| 	}
 | ||
| 
 | ||
| 	return &ListKnowledgeResponse{
 | ||
| 		KnowledgeList: knList,
 | ||
| 		Total:         total,
 | ||
| 	}, nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) checkRequest(request *CreateDocumentRequest) error {
 | ||
| 	if len(request.Documents) == 0 {
 | ||
| 		return errors.New("document is empty")
 | ||
| 	}
 | ||
| 	for i := range request.Documents {
 | ||
| 		if request.Documents[i].Type == knowledgeModel.DocumentTypeImage && ptr.From(request.Documents[i].ParsingStrategy.CaptionType) == parser.ImageAnnotationTypeModel {
 | ||
| 			if !k.isAutoAnnotationSupported {
 | ||
| 				return errors.New("auto caption type is not supported")
 | ||
| 			}
 | ||
| 		}
 | ||
| 		if request.Documents[i].ChunkingStrategy != nil {
 | ||
| 			if request.Documents[i].ChunkingStrategy.ChunkType == parser.ChunkTypeDefault {
 | ||
| 				request.Documents[i].ChunkingStrategy = getDefaultChunkStrategy()
 | ||
| 			}
 | ||
| 		}
 | ||
| 	}
 | ||
| 	return nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) CreateDocument(ctx context.Context, request *CreateDocumentRequest) (response *CreateDocumentResponse, err error) {
 | ||
| 	if err = k.checkRequest(request); err != nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if err = k.documentsURL2URI(ctx, request.Documents); err != nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDownloadFailedCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	userID := request.Documents[0].CreatorID
 | ||
| 	spaceID := request.Documents[0].SpaceID
 | ||
| 	documentSource := request.Documents[0].Source
 | ||
| 	docProcessor := impl.NewDocProcessor(ctx, &impl.DocProcessorConfig{
 | ||
| 		UserID:         userID,
 | ||
| 		SpaceID:        spaceID,
 | ||
| 		DocumentSource: documentSource,
 | ||
| 		Documents:      request.Documents,
 | ||
| 		KnowledgeRepo:  k.knowledgeRepo,
 | ||
| 		DocumentRepo:   k.documentRepo,
 | ||
| 		SliceRepo:      k.sliceRepo,
 | ||
| 		Idgen:          k.idgen,
 | ||
| 		Producer:       k.producer,
 | ||
| 		ParseManager:   k.parseManager,
 | ||
| 		Storage:        k.storage,
 | ||
| 		Rdb:            k.rdb,
 | ||
| 	})
 | ||
| 	// 1. 前置的动作,上传 tos 等
 | ||
| 	err = docProcessor.BeforeCreate()
 | ||
| 	if err != nil {
 | ||
| 		return nil, err
 | ||
| 	}
 | ||
| 	// 2. 构建 落库
 | ||
| 	err = docProcessor.BuildDBModel()
 | ||
| 	if err != nil {
 | ||
| 		return nil, err
 | ||
| 	}
 | ||
| 	// 3. 插入数据库
 | ||
| 	err = docProcessor.InsertDBModel()
 | ||
| 	if err != nil {
 | ||
| 		return nil, err
 | ||
| 	}
 | ||
| 	// 4. 发起索引任务
 | ||
| 	err = docProcessor.Indexing()
 | ||
| 	if err != nil {
 | ||
| 		return nil, err
 | ||
| 	}
 | ||
| 	// 5. 返回处理后的文档信息
 | ||
| 	docs := docProcessor.GetResp()
 | ||
| 	return &CreateDocumentResponse{
 | ||
| 		Documents: docs,
 | ||
| 	}, nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) UpdateDocument(ctx context.Context, request *UpdateDocumentRequest) error {
 | ||
| 	if request == nil {
 | ||
| 		return errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | ||
| 	}
 | ||
| 	doc, err := k.documentRepo.GetByID(ctx, request.DocumentID)
 | ||
| 	if err != nil {
 | ||
| 		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if request.DocumentName != nil {
 | ||
| 		doc.Name = *request.DocumentName
 | ||
| 	}
 | ||
| 
 | ||
| 	if doc.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
 | ||
| 		// 如果是表格类型,可能是要改table的meta
 | ||
| 		if doc.TableInfo != nil {
 | ||
| 			finalColumns, err := k.alterTableSchema(ctx, doc.TableInfo.Columns, request.TableInfo.Columns, doc.TableInfo.PhysicalTableName)
 | ||
| 			if err != nil {
 | ||
| 				return err
 | ||
| 			}
 | ||
| 			doc.TableInfo.VirtualTableName = doc.Name
 | ||
| 			if len(request.TableInfo.Columns) != 0 {
 | ||
| 				doc.TableInfo.Columns = finalColumns
 | ||
| 			}
 | ||
| 		}
 | ||
| 	}
 | ||
| 	doc.UpdatedAt = time.Now().UnixMilli()
 | ||
| 	err = k.documentRepo.Update(ctx, doc)
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "[UpdateDocument] update document failed, err: %v", err)
 | ||
| 		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	return nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) DeleteDocument(ctx context.Context, request *DeleteDocumentRequest) error {
 | ||
| 	if request == nil {
 | ||
| 		return errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | ||
| 	}
 | ||
| 	doc, err := k.documentRepo.GetByID(ctx, request.DocumentID)
 | ||
| 	if err != nil {
 | ||
| 		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if doc == nil || doc.ID == 0 {
 | ||
| 		logs.CtxWarnf(ctx, "[DeleteDocument] document not found, doc_id: %d", request.DocumentID)
 | ||
| 		return nil
 | ||
| 	}
 | ||
| 
 | ||
| 	if doc.DocumentType == int32(knowledgeModel.DocumentTypeTable) && doc.TableInfo != nil {
 | ||
| 		resp, err := k.rdb.DropTable(ctx, &rdb.DropTableRequest{
 | ||
| 			TableName: doc.TableInfo.PhysicalTableName,
 | ||
| 			IfExists:  true,
 | ||
| 		})
 | ||
| 		if err != nil {
 | ||
| 			logs.CtxWarnf(ctx, "[DeleteDocument] drop table failed, err: %v", err)
 | ||
| 			return errorx.New(errno.ErrKnowledgeCrossDomainCode, errorx.KV("msg", err.Error()))
 | ||
| 		}
 | ||
| 		if !resp.Success {
 | ||
| 			logs.CtxWarnf(ctx, "[DeleteDocument] drop table failed")
 | ||
| 			return errorx.New(errno.ErrKnowledgeCrossDomainCode, errorx.KV("msg", "drop table failed"))
 | ||
| 		}
 | ||
| 	}
 | ||
| 
 | ||
| 	err = k.documentRepo.DeleteDocuments(ctx, []int64{request.DocumentID})
 | ||
| 	if err != nil {
 | ||
| 		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 
 | ||
| 	sliceIDs, err := k.sliceRepo.GetDocumentSliceIDs(ctx, []int64{request.DocumentID})
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "[DeleteDocument] get document slice ids failed, err: %v", err)
 | ||
| 		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 
 | ||
| 	if err = k.emitDeleteKnowledgeDataEvent(ctx, doc.KnowledgeID, sliceIDs, strconv.FormatInt(request.DocumentID, 10)); err != nil {
 | ||
| 		return err
 | ||
| 	}
 | ||
| 
 | ||
| 	return nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) ListDocument(ctx context.Context, request *ListDocumentRequest) (response *ListDocumentResponse, err error) {
 | ||
| 	if request == nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | ||
| 	}
 | ||
| 	opts := entity.WhereDocumentOpt{
 | ||
| 		StatusNotIn: []int32{int32(entity.DocumentStatusDeleted)},
 | ||
| 	}
 | ||
| 	if request.Limit != nil {
 | ||
| 		opts.Limit = ptr.From(request.Limit)
 | ||
| 	}
 | ||
| 	if request.Offset != nil {
 | ||
| 		opts.Offset = request.Offset
 | ||
| 	}
 | ||
| 	if request.Cursor != nil {
 | ||
| 		opts.Cursor = request.Cursor
 | ||
| 	}
 | ||
| 	if len(request.DocumentIDs) > 0 {
 | ||
| 		opts.IDs = request.DocumentIDs
 | ||
| 	}
 | ||
| 	if request.KnowledgeID != 0 {
 | ||
| 		opts.KnowledgeIDs = []int64{request.KnowledgeID}
 | ||
| 	}
 | ||
| 	if request.SelectAll {
 | ||
| 		opts.SelectAll = true
 | ||
| 	}
 | ||
| 	documents, total, err := k.documentRepo.FindDocumentByCondition(ctx, &opts)
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "list document failed, err: %v", err)
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 
 | ||
| 	resp := &ListDocumentResponse{
 | ||
| 		Total: total,
 | ||
| 	}
 | ||
| 	if len(documents)+ptr.From(opts.Offset) < int(total) {
 | ||
| 		resp.HasMore = true
 | ||
| 		if len(documents) > 0 {
 | ||
| 			nextCursor := strconv.FormatInt(documents[len(documents)-1].ID, 10)
 | ||
| 			resp.NextCursor = &nextCursor
 | ||
| 		}
 | ||
| 	}
 | ||
| 	resp.Documents = []*entity.Document{}
 | ||
| 	for i := range documents {
 | ||
| 		docItem, err := k.fromModelDocument(ctx, documents[i])
 | ||
| 		if err != nil {
 | ||
| 			return nil, err
 | ||
| 		}
 | ||
| 		resp.Documents = append(resp.Documents, docItem)
 | ||
| 	}
 | ||
| 	return resp, nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) MGetDocumentProgress(ctx context.Context, request *MGetDocumentProgressRequest) (response *MGetDocumentProgressResponse, err error) {
 | ||
| 	if request == nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | ||
| 	}
 | ||
| 	documents, err := k.documentRepo.MGetByID(ctx, request.DocumentIDs)
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "mget document failed, err: %v", err)
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	progresslist := []*DocumentProgress{}
 | ||
| 	for i := range documents {
 | ||
| 		item := DocumentProgress{
 | ||
| 			ID:            documents[i].ID,
 | ||
| 			Name:          documents[i].Name,
 | ||
| 			Size:          documents[i].Size,
 | ||
| 			FileExtension: documents[i].FileExtension,
 | ||
| 			Status:        entity.DocumentStatus(documents[i].Status),
 | ||
| 			StatusMsg:     entity.DocumentStatus(documents[i].Status).String(),
 | ||
| 		}
 | ||
| 		if documents[i].DocumentType == int32(knowledge.DocumentTypeImage) && len(documents[i].URI) != 0 {
 | ||
| 			item.URL, err = k.storage.GetObjectUrl(ctx, documents[i].URI)
 | ||
| 			if err != nil {
 | ||
| 				logs.CtxErrorf(ctx, "get object url failed, err: %v", err)
 | ||
| 				return nil, errorx.New(errno.ErrKnowledgeGetObjectURLFailCode, errorx.KV("msg", err.Error()))
 | ||
| 			}
 | ||
| 		}
 | ||
| 		if documents[i].Status == int32(entity.DocumentStatusEnable) || documents[i].Status == int32(entity.DocumentStatusFailed) {
 | ||
| 			item.Progress = progressbar.ProcessDone
 | ||
| 		} else {
 | ||
| 			if documents[i].FailReason != "" {
 | ||
| 				item.StatusMsg = documents[i].FailReason
 | ||
| 				item.Status = entity.DocumentStatusFailed
 | ||
| 				progresslist = append(progresslist, &item)
 | ||
| 				continue
 | ||
| 			}
 | ||
| 			err = k.getProgressFromCache(ctx, &item)
 | ||
| 			if err != nil {
 | ||
| 				logs.CtxErrorf(ctx, "get progress from cache failed, err: %v", err)
 | ||
| 				return nil, errorx.New(errno.ErrKnowledgeGetDocProgressFailCode, errorx.KV("msg", err.Error()))
 | ||
| 			}
 | ||
| 		}
 | ||
| 		progresslist = append(progresslist, &item)
 | ||
| 	}
 | ||
| 	return &MGetDocumentProgressResponse{
 | ||
| 		ProgressList: progresslist,
 | ||
| 	}, nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) getProgressFromCache(ctx context.Context, documentProgress *DocumentProgress) (err error) {
 | ||
| 	progressBar := progressbar.NewProgressBar(ctx, documentProgress.ID, 0, k.cacheCli, false)
 | ||
| 	percent, remainSec, errMsg := progressBar.GetProgress(ctx)
 | ||
| 	documentProgress.Progress = int(percent)
 | ||
| 	documentProgress.RemainingSec = int64(remainSec)
 | ||
| 	if len(errMsg) != 0 {
 | ||
| 		documentProgress.Status = entity.DocumentStatusFailed
 | ||
| 		documentProgress.StatusMsg = errMsg
 | ||
| 		return err
 | ||
| 	}
 | ||
| 	return err
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) ResegmentDocument(ctx context.Context, request *ResegmentDocumentRequest) (response *ResegmentDocumentResponse, err error) {
 | ||
| 	if request == nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | ||
| 	}
 | ||
| 	if request.ChunkingStrategy != nil {
 | ||
| 		if request.ChunkingStrategy.ChunkType == parser.ChunkTypeDefault {
 | ||
| 			request.ChunkingStrategy = getDefaultChunkStrategy()
 | ||
| 		}
 | ||
| 	}
 | ||
| 	doc, err := k.documentRepo.GetByID(ctx, request.DocumentID)
 | ||
| 	if err != nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if doc == nil || doc.ID == 0 {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDocumentNotExistCode, errorx.KV("msg", "document not found"))
 | ||
| 	}
 | ||
| 	docEntity, err := k.fromModelDocument(ctx, doc)
 | ||
| 	if err != nil {
 | ||
| 		return nil, err
 | ||
| 	}
 | ||
| 	docEntity.ChunkingStrategy = request.ChunkingStrategy
 | ||
| 	docEntity.ParsingStrategy = request.ParsingStrategy
 | ||
| 	event := events.NewIndexDocumentEvent(docEntity.KnowledgeID, docEntity)
 | ||
| 	body, err := sonic.Marshal(event)
 | ||
| 	if err != nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeParseJSONCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	doc.ParseRule.ChunkingStrategy = request.ChunkingStrategy
 | ||
| 	doc.ParseRule.ParsingStrategy = request.ParsingStrategy
 | ||
| 	doc.Status = int32(entity.DocumentStatusChunking)
 | ||
| 	err = k.documentRepo.Update(ctx, doc)
 | ||
| 	if err != nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if err = k.producer.Send(ctx, body, eventbus.WithShardingKey(strconv.FormatInt(docEntity.KnowledgeID, 10))); err != nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeMQSendFailCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	docEntity.Status = entity.DocumentStatusChunking
 | ||
| 	return &ResegmentDocumentResponse{
 | ||
| 		Document: docEntity,
 | ||
| 	}, nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) CreateSlice(ctx context.Context, request *CreateSliceRequest) (response *CreateSliceResponse, err error) {
 | ||
| 	if request == nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | ||
| 	}
 | ||
| 	docInfo, err := k.documentRepo.GetByID(ctx, request.DocumentID)
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "find document failed, err: %v", err)
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if docInfo == nil || docInfo.ID == 0 {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDocumentNotExistCode, errorx.KV("msg", "document not found"))
 | ||
| 	}
 | ||
| 	if docInfo.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
 | ||
| 		_, total, err := k.sliceRepo.FindSliceByCondition(ctx, &entity.WhereSliceOpt{
 | ||
| 			DocumentID: docInfo.ID,
 | ||
| 		})
 | ||
| 		if err != nil {
 | ||
| 			logs.CtxErrorf(ctx, "FindSliceByCondition err:%v", err)
 | ||
| 			return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 		}
 | ||
| 		request.Position = total + 1
 | ||
| 	}
 | ||
| 	slices, err := k.sliceRepo.GetSliceBySequence(ctx, request.DocumentID, request.Position)
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "get slice by sequence failed, err: %v", err)
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	now := time.Now().UnixMilli()
 | ||
| 	id, err := k.idgen.GenID(ctx)
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "gen id failed, err: %v", err)
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeIDGenCode)
 | ||
| 	}
 | ||
| 	sliceInfo := model.KnowledgeDocumentSlice{
 | ||
| 		ID:          id,
 | ||
| 		KnowledgeID: docInfo.KnowledgeID,
 | ||
| 		DocumentID:  docInfo.ID,
 | ||
| 		CreatedAt:   now,
 | ||
| 		UpdatedAt:   now,
 | ||
| 		CreatorID:   request.CreatorID,
 | ||
| 		SpaceID:     docInfo.SpaceID,
 | ||
| 		Status:      int32(knowledgeModel.SliceStatusInit),
 | ||
| 	}
 | ||
| 	if len(slices) == 0 {
 | ||
| 		if request.Position == 0 {
 | ||
| 			request.Position = 1
 | ||
| 			sliceInfo.Sequence = 1
 | ||
| 		} else {
 | ||
| 			return nil, errorx.New(errno.ErrKnowledgeSliceInsertPositionIllegalCode)
 | ||
| 		}
 | ||
| 	}
 | ||
| 	if len(slices) == 1 {
 | ||
| 		if request.Position == 1 || request.Position == 0 {
 | ||
| 			// 插入到最前面
 | ||
| 			sliceInfo.Sequence = slices[0].Sequence - 1
 | ||
| 		} else {
 | ||
| 			sliceInfo.Sequence = slices[0].Sequence + 1
 | ||
| 		}
 | ||
| 	}
 | ||
| 	if len(slices) == 2 {
 | ||
| 		if request.Position == 0 || request.Position == 1 {
 | ||
| 			sliceInfo.Sequence = slices[0].Sequence - 1
 | ||
| 		} else {
 | ||
| 			if slices[0].Sequence+1 < slices[1].Sequence {
 | ||
| 				sliceInfo.Sequence = float64(int(slices[0].Sequence) + 1)
 | ||
| 			} else {
 | ||
| 				sliceInfo.Sequence = (slices[0].Sequence + slices[1].Sequence) / 2
 | ||
| 			}
 | ||
| 		}
 | ||
| 	}
 | ||
| 	sliceEntity := entity.Slice{
 | ||
| 		Info: knowledgeModel.Info{
 | ||
| 			ID:        id,
 | ||
| 			CreatorID: request.CreatorID,
 | ||
| 		},
 | ||
| 		DocumentID: request.DocumentID,
 | ||
| 		RawContent: request.RawContent,
 | ||
| 	}
 | ||
| 	docEntity, err := k.fromModelDocument(ctx, docInfo)
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "fromModelDocument failed, err: %v", err)
 | ||
| 		return nil, err
 | ||
| 	}
 | ||
| 	indexSliceEvent := events.NewIndexSliceEvent(&sliceEntity, docEntity)
 | ||
| 	if docInfo.DocumentType == int32(knowledgeModel.DocumentTypeText) ||
 | ||
| 		docInfo.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
 | ||
| 		sliceInfo.Content = sliceEntity.GetSliceContent()
 | ||
| 	}
 | ||
| 	if docInfo.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
 | ||
| 		sliceEntity.ID = sliceInfo.ID
 | ||
| 		err = k.upsertDataToTable(ctx, docInfo.TableInfo, []*entity.Slice{&sliceEntity})
 | ||
| 		if err != nil {
 | ||
| 			logs.CtxErrorf(ctx, "insert data to table failed, err: %v", err)
 | ||
| 			return nil, err
 | ||
| 		}
 | ||
| 	}
 | ||
| 	err = k.sliceRepo.Create(ctx, &sliceInfo)
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "create slice failed, err: %v", err)
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	body, err := sonic.Marshal(&indexSliceEvent)
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "marshal event failed, err: %v", err)
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeParseJSONCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if err = k.producer.Send(ctx, body, eventbus.WithShardingKey(strconv.FormatInt(sliceInfo.DocumentID, 10))); err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "send message failed, err: %v", err)
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeMQSendFailCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if err = k.documentRepo.UpdateDocumentSliceInfo(ctx, docInfo.ID); err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "update document slice info failed, err: %v", err)
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	return &CreateSliceResponse{
 | ||
| 		SliceID: id,
 | ||
| 	}, nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) UpdateSlice(ctx context.Context, request *UpdateSliceRequest) error {
 | ||
| 	if request == nil {
 | ||
| 		return errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | ||
| 	}
 | ||
| 	sliceInfo, err := k.sliceRepo.MGetSlices(ctx, []int64{request.SliceID})
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "mget slice failed, err: %v", err)
 | ||
| 		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if len(sliceInfo) != 1 {
 | ||
| 		return errorx.New(errno.ErrKnowledgeSliceNotExistCode)
 | ||
| 	}
 | ||
| 	docInfo, err := k.documentRepo.GetByID(ctx, request.DocumentID)
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "find document failed, err: %v", err)
 | ||
| 		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if docInfo == nil || docInfo.ID == 0 {
 | ||
| 		return errorx.New(errno.ErrKnowledgeDocumentNotExistCode)
 | ||
| 	}
 | ||
| 	// 更新数据库中的存储
 | ||
| 	if docInfo.DocumentType == int32(knowledgeModel.DocumentTypeText) ||
 | ||
| 		docInfo.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
 | ||
| 		sliceEntity := entity.Slice{RawContent: request.RawContent}
 | ||
| 		sliceInfo[0].Content = sliceEntity.GetSliceContent()
 | ||
| 	}
 | ||
| 	if docInfo.DocumentType == int32(knowledgeModel.DocumentTypeImage) {
 | ||
| 		sliceInfo[0].Content = ptr.From(request.RawContent[0].Text)
 | ||
| 	}
 | ||
| 	docEntity, err := k.fromModelDocument(ctx, docInfo)
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "fromModelDocument failed, err: %v", err)
 | ||
| 		return err
 | ||
| 	}
 | ||
| 	sliceInfo[0].UpdatedAt = time.Now().UnixMilli()
 | ||
| 	sliceInfo[0].Status = int32(knowledgeModel.SliceStatusInit)
 | ||
| 	indexSliceEvent := events.NewIndexSliceEvent(&entity.Slice{
 | ||
| 		Info: knowledgeModel.Info{
 | ||
| 			ID: sliceInfo[0].ID,
 | ||
| 		},
 | ||
| 		KnowledgeID: sliceInfo[0].KnowledgeID,
 | ||
| 		DocumentID:  sliceInfo[0].DocumentID,
 | ||
| 		RawContent:  request.RawContent,
 | ||
| 	}, docEntity)
 | ||
| 	if docInfo.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
 | ||
| 		indexSliceEvent.Slice.ID = sliceInfo[0].ID
 | ||
| 		err = k.upsertDataToTable(ctx, docInfo.TableInfo, []*entity.Slice{indexSliceEvent.Slice})
 | ||
| 		if err != nil {
 | ||
| 			logs.CtxErrorf(ctx, "upsert data to table failed, err: %v", err)
 | ||
| 			return err
 | ||
| 		}
 | ||
| 	}
 | ||
| 	err = k.sliceRepo.Update(ctx, sliceInfo[0])
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "update slice failed, err: %v", err)
 | ||
| 		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	body, err := sonic.Marshal(&indexSliceEvent)
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "marshal event failed, err: %v", err)
 | ||
| 		return errorx.New(errno.ErrKnowledgeParseJSONCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if err = k.producer.Send(ctx, body, eventbus.WithShardingKey(strconv.FormatInt(sliceInfo[0].DocumentID, 10))); err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "send message failed, err: %v", err)
 | ||
| 		return errorx.New(errno.ErrKnowledgeMQSendFailCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if err = k.documentRepo.UpdateDocumentSliceInfo(ctx, docInfo.ID); err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "update document slice info failed, err: %v", err)
 | ||
| 		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	return nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) DeleteSlice(ctx context.Context, request *DeleteSliceRequest) error {
 | ||
| 	if request == nil {
 | ||
| 		return errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | ||
| 	}
 | ||
| 	sliceInfo, err := k.sliceRepo.MGetSlices(ctx, []int64{request.SliceID})
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "mget slice failed, err: %v", err)
 | ||
| 		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if len(sliceInfo) != 1 {
 | ||
| 		logs.CtxWarnf(ctx, "slice not found, slice_id: %d", request.SliceID)
 | ||
| 		return nil
 | ||
| 	}
 | ||
| 	docInfo, err := k.documentRepo.GetByID(ctx, sliceInfo[0].DocumentID)
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "find document failed, err: %v", err)
 | ||
| 		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if docInfo == nil || docInfo.ID == 0 {
 | ||
| 		return errorx.New(errno.ErrKnowledgeDocumentNotExistCode)
 | ||
| 	}
 | ||
| 	if docInfo.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
 | ||
| 		_, err := k.rdb.DeleteData(ctx, &rdb.DeleteDataRequest{
 | ||
| 			TableName: docInfo.TableInfo.PhysicalTableName,
 | ||
| 			Where: &rdb.ComplexCondition{
 | ||
| 				Conditions: []*rdb.Condition{
 | ||
| 					{
 | ||
| 						Field:    consts.RDBFieldID,
 | ||
| 						Operator: rdbEntity.OperatorEqual,
 | ||
| 						Value:    request.SliceID,
 | ||
| 					},
 | ||
| 				},
 | ||
| 			},
 | ||
| 		})
 | ||
| 		if err != nil {
 | ||
| 			logs.CtxErrorf(ctx, "delete data failed, err: %v", err)
 | ||
| 			return errorx.New(errno.ErrKnowledgeCrossDomainCode, errorx.KV("msg", err.Error()))
 | ||
| 		}
 | ||
| 	}
 | ||
| 	// 删除数据库中的存储
 | ||
| 	err = k.sliceRepo.Delete(ctx, &model.KnowledgeDocumentSlice{ID: request.SliceID})
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "delete slice failed, err: %v", err)
 | ||
| 		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 
 | ||
| 	if err = k.emitDeleteKnowledgeDataEvent(ctx, sliceInfo[0].KnowledgeID, []int64{request.SliceID}, strconv.FormatInt(sliceInfo[0].DocumentID, 10)); err != nil {
 | ||
| 		return err
 | ||
| 	}
 | ||
| 	if err = k.documentRepo.UpdateDocumentSliceInfo(ctx, docInfo.ID); err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "update document slice info failed, err: %v", err)
 | ||
| 		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	return nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) ListSlice(ctx context.Context, request *ListSliceRequest) (response *ListSliceResponse, err error) {
 | ||
| 	if request == nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | ||
| 	}
 | ||
| 	if request.DocumentID == nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "document_id is empty"))
 | ||
| 	}
 | ||
| 	doc, err := k.documentRepo.GetByID(ctx, ptr.From(request.DocumentID))
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "get document failed, err: %v", err)
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	resp := ListSliceResponse{}
 | ||
| 	if doc.Status == int32(entity.DocumentStatusDeleted) {
 | ||
| 		return &resp, nil
 | ||
| 	}
 | ||
| 
 | ||
| 	slices, total, err := k.sliceRepo.FindSliceByCondition(ctx, &entity.WhereSliceOpt{
 | ||
| 		KnowledgeID: ptr.From(request.KnowledgeID),
 | ||
| 		DocumentID:  ptr.From(request.DocumentID),
 | ||
| 		Keyword:     request.Keyword,
 | ||
| 		Sequence:    request.Sequence,
 | ||
| 		PageSize:    request.Limit,
 | ||
| 		Offset:      request.Offset,
 | ||
| 	})
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "list slice failed, err: %v", err)
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 
 | ||
| 	if total > (request.Sequence + request.Limit) {
 | ||
| 		resp.HasMore = true
 | ||
| 	} else {
 | ||
| 		resp.HasMore = false
 | ||
| 	}
 | ||
| 	resp.Total = int(total)
 | ||
| 	var sliceMap map[int64]*entity.Slice
 | ||
| 	// 如果是表格类型,那么去table中取一下原始数据
 | ||
| 	if doc.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
 | ||
| 		// 从数据库中查询原始数据
 | ||
| 		sliceMap, err = k.selectTableData(ctx, doc.TableInfo, slices)
 | ||
| 		if err != nil {
 | ||
| 			logs.CtxErrorf(ctx, "select table data failed, err: %v", err)
 | ||
| 			return nil, err
 | ||
| 		}
 | ||
| 	}
 | ||
| 	resp.Slices = []*entity.Slice{}
 | ||
| 	for i := range slices {
 | ||
| 		resp.Slices = append(resp.Slices, k.fromModelSlice(ctx, slices[i]))
 | ||
| 		if sliceMap[slices[i].ID] != nil {
 | ||
| 			resp.Slices[i].RawContent = sliceMap[slices[i].ID].RawContent
 | ||
| 		}
 | ||
| 		resp.Slices[i].Sequence = request.Sequence + 1 + int64(i)
 | ||
| 	}
 | ||
| 	return &resp, nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) GetSlice(ctx context.Context, request *GetSliceRequest) (response *GetSliceResponse, err error) {
 | ||
| 	slices, err := k.sliceRepo.MGetSlices(ctx, []int64{request.SliceID})
 | ||
| 	if err != nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 
 | ||
| 	if len(slices) == 0 {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeSliceNotExistCode)
 | ||
| 	}
 | ||
| 
 | ||
| 	return &GetSliceResponse{
 | ||
| 		Slice: k.fromModelSlice(ctx, slices[0]),
 | ||
| 	}, nil
 | ||
| }
 | ||
| 
 | ||
| func getDefaultChunkStrategy() *entity.ChunkingStrategy {
 | ||
| 	return &entity.ChunkingStrategy{
 | ||
| 		ChunkType:       parser.ChunkTypeDefault,
 | ||
| 		ChunkSize:       consts.DefaultChunkSize,
 | ||
| 		Separator:       consts.DefaultSeparator,
 | ||
| 		Overlap:         consts.DefaultOverlap,
 | ||
| 		TrimSpace:       consts.DefaultTrimSpace,
 | ||
| 		TrimURLAndEmail: consts.DefaultTrimURLAndEmail,
 | ||
| 	}
 | ||
| }
 | ||
| func (k *knowledgeSVC) CreateDocumentReview(ctx context.Context, request *CreateDocumentReviewRequest) (response *CreateDocumentReviewResponse, err error) {
 | ||
| 	if request == nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | ||
| 	}
 | ||
| 	if request.ChunkStrategy != nil {
 | ||
| 		if request.ChunkStrategy.ChunkType == parser.ChunkTypeDefault {
 | ||
| 			request.ChunkStrategy = getDefaultChunkStrategy()
 | ||
| 		}
 | ||
| 	}
 | ||
| 	uid := ctxutil.GetUIDFromCtx(ctx)
 | ||
| 	if uid == nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgePermissionCode, errorx.KV("msg", "session required"))
 | ||
| 	}
 | ||
| 	kn, err := k.knowledgeRepo.GetByID(ctx, request.KnowledgeID)
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "get knowledge failed, err: %v", err)
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if kn == nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeNotExistCode)
 | ||
| 	}
 | ||
| 	documentIDs := make([]int64, 0, len(request.Reviews))
 | ||
| 	documentMap := make(map[int64]*model.KnowledgeDocument)
 | ||
| 	for _, input := range request.Reviews {
 | ||
| 		if input.DocumentID != nil && *input.DocumentID > 0 {
 | ||
| 			documentIDs = append(documentIDs, *input.DocumentID)
 | ||
| 		}
 | ||
| 	}
 | ||
| 	if len(documentIDs) > 0 {
 | ||
| 		documents, err := k.documentRepo.MGetByID(ctx, documentIDs)
 | ||
| 		if err != nil {
 | ||
| 			return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 		}
 | ||
| 		for _, document := range documents {
 | ||
| 			documentMap[document.ID] = document
 | ||
| 		}
 | ||
| 	}
 | ||
| 	reviews := make([]*entity.Review, 0, len(request.Reviews))
 | ||
| 	for _, input := range request.Reviews {
 | ||
| 		review := &entity.Review{
 | ||
| 			DocumentName: input.DocumentName,
 | ||
| 			DocumentType: input.DocumentType,
 | ||
| 			Uri:          input.TosUri,
 | ||
| 		}
 | ||
| 		if input.DocumentID != nil && *input.DocumentID > 0 {
 | ||
| 			if document, ok := documentMap[*input.DocumentID]; ok {
 | ||
| 				review.DocumentName = document.Name
 | ||
| 				names := strings.Split(document.URI, "/")
 | ||
| 				objectName := strings.Split(names[len(names)-1], ".")
 | ||
| 				review.DocumentType = objectName[len(objectName)-1]
 | ||
| 				review.Uri = document.URI
 | ||
| 			}
 | ||
| 		}
 | ||
| 		review.Url, err = k.storage.GetObjectUrl(ctx, review.Uri)
 | ||
| 		if err != nil {
 | ||
| 			logs.CtxErrorf(ctx, "get object url failed, err: %v", err)
 | ||
| 			return nil, errorx.New(errno.ErrKnowledgeGetObjectURLFailCode, errorx.KV("msg", err.Error()))
 | ||
| 		}
 | ||
| 		reviews = append(reviews, review)
 | ||
| 	}
 | ||
| 	// STEP 1. 生成ID
 | ||
| 	reviewIDs, err := k.genMultiIDs(ctx, len(request.Reviews))
 | ||
| 	if err != nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeIDGenCode)
 | ||
| 	}
 | ||
| 	for i := range request.Reviews {
 | ||
| 		reviews[i].ReviewID = ptr.Of(reviewIDs[i])
 | ||
| 	}
 | ||
| 	modelReviews := make([]*model.KnowledgeDocumentReview, 0, len(reviews))
 | ||
| 	for _, review := range reviews {
 | ||
| 		modelReviews = append(modelReviews, &model.KnowledgeDocumentReview{
 | ||
| 			ID:          *review.ReviewID,
 | ||
| 			KnowledgeID: request.KnowledgeID,
 | ||
| 			SpaceID:     kn.SpaceID,
 | ||
| 			Name:        review.DocumentName,
 | ||
| 			Type:        review.DocumentType,
 | ||
| 			URI:         review.Uri,
 | ||
| 			CreatorID:   *uid,
 | ||
| 		})
 | ||
| 	}
 | ||
| 	err = k.reviewRepo.CreateInBatches(ctx, modelReviews)
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "create review failed, err: %v", err)
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	for i := range reviews {
 | ||
| 		review := reviews[i]
 | ||
| 		doc := &entity.Document{
 | ||
| 			KnowledgeID:      request.KnowledgeID,
 | ||
| 			ParsingStrategy:  request.ParsingStrategy,
 | ||
| 			ChunkingStrategy: request.ChunkStrategy,
 | ||
| 			Type:             knowledgeModel.DocumentTypeText,
 | ||
| 			URI:              review.Uri,
 | ||
| 			FileExtension:    parser.FileExtension(review.DocumentType),
 | ||
| 			Info: knowledgeModel.Info{
 | ||
| 				Name:      review.DocumentName,
 | ||
| 				CreatorID: *uid,
 | ||
| 			},
 | ||
| 			Source: entity.DocumentSourceLocal,
 | ||
| 		}
 | ||
| 		reviewEvent := events.NewDocumentReviewEvent(doc, review)
 | ||
| 		body, err := sonic.Marshal(&reviewEvent)
 | ||
| 		if err != nil {
 | ||
| 			logs.CtxErrorf(ctx, "marshal event failed, err: %v", err)
 | ||
| 			return nil, errorx.New(errno.ErrKnowledgeParseJSONCode, errorx.KV("msg", err.Error()))
 | ||
| 		}
 | ||
| 		err = k.producer.Send(ctx, body)
 | ||
| 		if err != nil {
 | ||
| 			logs.CtxErrorf(ctx, "send message failed, err: %v", err)
 | ||
| 			return nil, errorx.New(errno.ErrKnowledgeMQSendFailCode, errorx.KV("msg", err.Error()))
 | ||
| 		}
 | ||
| 	}
 | ||
| 	return &CreateDocumentReviewResponse{
 | ||
| 		Reviews: reviews,
 | ||
| 	}, nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) MGetDocumentReview(ctx context.Context, request *MGetDocumentReviewRequest) (response *MGetDocumentReviewResponse, err error) {
 | ||
| 	reviews, err := k.reviewRepo.MGetByIDs(ctx, request.ReviewIDs)
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "mget review failed, err: %v", err)
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	for _, review := range reviews {
 | ||
| 		if review.KnowledgeID != request.KnowledgeID {
 | ||
| 			return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "knowledge_id and doc not match"))
 | ||
| 		}
 | ||
| 	}
 | ||
| 	reviewEntity := make([]*entity.Review, 0, len(reviews))
 | ||
| 	for _, review := range reviews {
 | ||
| 		status := entity.ReviewStatus(review.Status)
 | ||
| 		var reviewTosURL, reviewChunkRespTosURL string
 | ||
| 		if review.URI != "" {
 | ||
| 			reviewTosURL, err = k.getObjectURL(ctx, review.URI)
 | ||
| 			if err != nil {
 | ||
| 				logs.CtxErrorf(ctx, "get object url failed, err: %v", err)
 | ||
| 				return nil, errorx.New(errno.ErrKnowledgeGetObjectURLFailCode, errorx.KV("msg", err.Error()))
 | ||
| 			}
 | ||
| 		}
 | ||
| 		if review.ChunkRespURI != "" {
 | ||
| 			reviewChunkRespTosURL, err = k.getObjectURL(ctx, review.ChunkRespURI)
 | ||
| 			if err != nil {
 | ||
| 				logs.CtxErrorf(ctx, "get object url failed, err: %v", err)
 | ||
| 				return nil, errorx.New(errno.ErrKnowledgeGetObjectURLFailCode, errorx.KV("msg", err.Error()))
 | ||
| 			}
 | ||
| 		}
 | ||
| 		reviewEntity = append(reviewEntity, &entity.Review{
 | ||
| 			ReviewID:      &review.ID,
 | ||
| 			DocumentName:  review.Name,
 | ||
| 			DocumentType:  review.Type,
 | ||
| 			Url:           reviewTosURL,
 | ||
| 			Status:        &status,
 | ||
| 			DocTreeTosUrl: ptr.Of(reviewChunkRespTosURL),
 | ||
| 			PreviewTosUrl: ptr.Of(reviewTosURL),
 | ||
| 		})
 | ||
| 	}
 | ||
| 	return &MGetDocumentReviewResponse{
 | ||
| 		Reviews: reviewEntity,
 | ||
| 	}, nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) SaveDocumentReview(ctx context.Context, request *SaveDocumentReviewRequest) error {
 | ||
| 	if request == nil {
 | ||
| 		return errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | ||
| 	}
 | ||
| 	review, err := k.reviewRepo.GetByID(ctx, request.ReviewID)
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "get review failed, err: %v", err)
 | ||
| 		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	uri := review.ChunkRespURI
 | ||
| 	if review.Status == int32(entity.ReviewStatus_Enable) && len(uri) > 0 {
 | ||
| 		newTosUri := fmt.Sprintf("DocReview/%d_%d_%d.txt", review.CreatorID, time.Now().UnixMilli(), review.ID)
 | ||
| 		err = k.storage.PutObject(ctx, newTosUri, []byte(request.DocTreeJson))
 | ||
| 		if err != nil {
 | ||
| 			logs.CtxErrorf(ctx, "put object failed, err: %v", err)
 | ||
| 			return errorx.New(errno.ErrKnowledgePutObjectFailCode, errorx.KV("msg", err.Error()))
 | ||
| 		}
 | ||
| 		err = k.reviewRepo.UpdateReview(ctx, review.ID, map[string]interface{}{
 | ||
| 			"chunk_resp_uri": newTosUri,
 | ||
| 		})
 | ||
| 		if err != nil {
 | ||
| 			logs.CtxErrorf(ctx, "update review chunk uri failed, err: %v", err)
 | ||
| 			return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 		}
 | ||
| 	}
 | ||
| 	return nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) documentsURL2URI(ctx context.Context, docs []*entity.Document) error {
 | ||
| 	download := func(url string) ([]byte, error) {
 | ||
| 		resp, err := http.Get(url)
 | ||
| 		if err != nil {
 | ||
| 			return nil, fmt.Errorf("http get failed, %w", err)
 | ||
| 		}
 | ||
| 		defer resp.Body.Close()
 | ||
| 		if resp.StatusCode != http.StatusOK {
 | ||
| 			return nil, fmt.Errorf("download file failed, status code=%d", resp.StatusCode)
 | ||
| 		}
 | ||
| 		data, err := io.ReadAll(resp.Body)
 | ||
| 		if err != nil {
 | ||
| 			return nil, fmt.Errorf("read all failed, %w", err)
 | ||
| 		}
 | ||
| 		return data, nil
 | ||
| 	}
 | ||
| 
 | ||
| 	// same as UploadFile
 | ||
| 	const baseWord = "1Aa2Bb3Cc4Dd5Ee6Ff7Gg8Hh9Ii0JjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz"
 | ||
| 	createURI := func(uid int64, fileType string) string {
 | ||
| 		num := 10
 | ||
| 		input := fmt.Sprintf("upload_%d_Ma*9)fhi_%d_gou_%s_rand_%d", uid, time.Now().Unix(), fileType, rand.Intn(100000))
 | ||
| 		hash := sha256.Sum256([]byte(fmt.Sprintf("%s", input)))
 | ||
| 		hashString := base64.StdEncoding.EncodeToString(hash[:])
 | ||
| 		if len(hashString) > num {
 | ||
| 			hashString = hashString[:num]
 | ||
| 		}
 | ||
| 		secret := ""
 | ||
| 		for _, char := range hashString {
 | ||
| 			index := int(char) % 62
 | ||
| 			secret += string(baseWord[index])
 | ||
| 		}
 | ||
| 		suffix := fmt.Sprintf("%d_%d_%s.%s", uid, time.Now().UnixNano(), secret, fileType)
 | ||
| 		uri := fmt.Sprintf("%s/%s", developer_api.FileBizType_BIZ_BOT_DATASET, suffix)
 | ||
| 		return uri
 | ||
| 	}
 | ||
| 
 | ||
| 	for _, doc := range docs {
 | ||
| 		if doc.URI != "" || doc.URL == "" {
 | ||
| 			continue
 | ||
| 		}
 | ||
| 		b, err := download(doc.URL)
 | ||
| 		if err != nil {
 | ||
| 			return fmt.Errorf("[documentsURL2URI] download document failed, %w", err)
 | ||
| 		}
 | ||
| 		uri := createURI(doc.CreatorID, string(doc.FileExtension))
 | ||
| 		if err = k.storage.PutObject(ctx, uri, b); err != nil {
 | ||
| 			return fmt.Errorf("[documentsURL2URI] upload document failed, %w", err)
 | ||
| 		}
 | ||
| 		doc.URI = uri
 | ||
| 	}
 | ||
| 
 | ||
| 	return nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) emitDeleteKnowledgeDataEvent(ctx context.Context, knowledgeID int64, sliceIDs []int64, shardingKey string) error {
 | ||
| 	deleteSliceEvent := events.NewDeleteKnowledgeDataEvent(knowledgeID, sliceIDs)
 | ||
| 	body, err := sonic.Marshal(&deleteSliceEvent)
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "marshal event failed, err: %v", err)
 | ||
| 		return errorx.New(errno.ErrKnowledgeParseJSONCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if err = k.producer.Send(ctx, body, eventbus.WithShardingKey(shardingKey)); err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "send message failed, err: %v", err)
 | ||
| 		return errorx.New(errno.ErrKnowledgeMQSendFailCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	return nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) fromModelKnowledge(ctx context.Context, knowledge *model.Knowledge) (*knowledgeModel.Knowledge, error) {
 | ||
| 	if knowledge == nil {
 | ||
| 		return nil, nil
 | ||
| 	}
 | ||
| 	sliceHit, err := k.sliceRepo.GetSliceHitByKnowledgeID(ctx, knowledge.ID)
 | ||
| 	if err != nil {
 | ||
| 		logs.CtxErrorf(ctx, "get slice hit count failed, err: %v", err)
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	knEntity := &knowledgeModel.Knowledge{
 | ||
| 		Info: knowledgeModel.Info{
 | ||
| 			ID:          knowledge.ID,
 | ||
| 			Name:        knowledge.Name,
 | ||
| 			Description: knowledge.Description,
 | ||
| 			IconURI:     knowledge.IconURI,
 | ||
| 			CreatorID:   knowledge.CreatorID,
 | ||
| 			SpaceID:     knowledge.SpaceID,
 | ||
| 			CreatedAtMs: knowledge.CreatedAt,
 | ||
| 			UpdatedAtMs: knowledge.UpdatedAt,
 | ||
| 			AppID:       knowledge.AppID,
 | ||
| 		},
 | ||
| 		SliceHit: sliceHit,
 | ||
| 		Type:     knowledgeModel.DocumentType(knowledge.FormatType),
 | ||
| 		Status:   knowledgeModel.KnowledgeStatus(knowledge.Status),
 | ||
| 	}
 | ||
| 
 | ||
| 	if knowledge.IconURI != "" {
 | ||
| 		objUrl, err := k.storage.GetObjectUrl(ctx, knowledge.IconURI)
 | ||
| 		if err != nil {
 | ||
| 			logs.CtxErrorf(ctx, "get object url failed, err: %v", err)
 | ||
| 			return nil, errorx.New(errno.ErrKnowledgeGetObjectURLFailCode, errorx.KV("msg", err.Error()))
 | ||
| 		}
 | ||
| 		knEntity.IconURL = objUrl
 | ||
| 	}
 | ||
| 	return knEntity, nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) fromModelDocument(ctx context.Context, document *model.KnowledgeDocument) (*entity.Document, error) {
 | ||
| 	if document == nil {
 | ||
| 		return nil, nil
 | ||
| 	}
 | ||
| 	documentEntity := &entity.Document{
 | ||
| 		Info: knowledgeModel.Info{
 | ||
| 			ID:          document.ID,
 | ||
| 			Name:        document.Name,
 | ||
| 			CreatorID:   document.CreatorID,
 | ||
| 			SpaceID:     document.SpaceID,
 | ||
| 			CreatedAtMs: document.CreatedAt,
 | ||
| 			UpdatedAtMs: document.UpdatedAt,
 | ||
| 		},
 | ||
| 		Type:             knowledgeModel.DocumentType(document.DocumentType),
 | ||
| 		KnowledgeID:      document.KnowledgeID,
 | ||
| 		URI:              document.URI,
 | ||
| 		Size:             document.Size,
 | ||
| 		SliceCount:       document.SliceCount,
 | ||
| 		CharCount:        document.CharCount,
 | ||
| 		FileExtension:    parser.FileExtension(document.FileExtension),
 | ||
| 		Source:           entity.DocumentSource(document.SourceType),
 | ||
| 		Status:           entity.DocumentStatus(document.Status),
 | ||
| 		ParsingStrategy:  document.ParseRule.ParsingStrategy,
 | ||
| 		ChunkingStrategy: document.ParseRule.ChunkingStrategy,
 | ||
| 	}
 | ||
| 	if document.TableInfo != nil {
 | ||
| 		documentEntity.TableInfo = *document.TableInfo
 | ||
| 		documentEntity.TableInfo.Columns = make([]*entity.TableColumn, 0)
 | ||
| 		for i := range document.TableInfo.Columns {
 | ||
| 			if document.TableInfo.Columns[i] == nil {
 | ||
| 				continue
 | ||
| 			}
 | ||
| 			if document.TableInfo.Columns[i].Name == consts.RDBFieldID {
 | ||
| 				continue
 | ||
| 			}
 | ||
| 			documentEntity.TableInfo.Columns = append(documentEntity.TableInfo.Columns, document.TableInfo.Columns[i])
 | ||
| 		}
 | ||
| 	}
 | ||
| 	switch document.Status {
 | ||
| 	case int32(entity.DocumentStatusChunking), int32(entity.DocumentStatusInit), int32(entity.DocumentStatusUploading):
 | ||
| 		if document.FailReason != "" {
 | ||
| 			documentEntity.Status = entity.DocumentStatusFailed
 | ||
| 			documentEntity.StatusMsg = document.FailReason
 | ||
| 		}
 | ||
| 	case int32(entity.DocumentStatusFailed):
 | ||
| 		documentEntity.StatusMsg = document.FailReason
 | ||
| 	default:
 | ||
| 	}
 | ||
| 	if len(document.URI) != 0 {
 | ||
| 		objUrl, err := k.storage.GetObjectUrl(ctx, document.URI)
 | ||
| 		if err != nil {
 | ||
| 			logs.CtxErrorf(ctx, "get object url failed, err: %v", err)
 | ||
| 			return nil, errorx.New(errno.ErrKnowledgeGetObjectURLFailCode, errorx.KV("msg", err.Error()))
 | ||
| 		}
 | ||
| 		documentEntity.URL = objUrl
 | ||
| 	}
 | ||
| 	return documentEntity, nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) fromModelSlice(ctx context.Context, slice *model.KnowledgeDocumentSlice) *entity.Slice {
 | ||
| 	if slice == nil {
 | ||
| 		return nil
 | ||
| 	}
 | ||
| 	s := &entity.Slice{
 | ||
| 		Info: knowledgeModel.Info{
 | ||
| 			ID:          slice.ID,
 | ||
| 			CreatorID:   slice.CreatorID,
 | ||
| 			SpaceID:     slice.SpaceID,
 | ||
| 			CreatedAtMs: slice.CreatedAt,
 | ||
| 			UpdatedAtMs: slice.UpdatedAt,
 | ||
| 		},
 | ||
| 		DocumentID:  slice.DocumentID,
 | ||
| 		KnowledgeID: slice.KnowledgeID,
 | ||
| 		ByteCount:   int64(len(slice.Content)),
 | ||
| 		CharCount:   int64(utf8.RuneCountInString(slice.Content)),
 | ||
| 		Hit:         slice.Hit,
 | ||
| 		SliceStatus: knowledgeModel.SliceStatus(slice.Status),
 | ||
| 	}
 | ||
| 	if slice.Content != "" {
 | ||
| 		processedContent := k.formatSliceContent(ctx, slice.Content)
 | ||
| 		s.RawContent = make([]*knowledgeModel.SliceContent, 0)
 | ||
| 		s.RawContent = append(s.RawContent, &knowledgeModel.SliceContent{
 | ||
| 			Type: knowledgeModel.SliceContentTypeText,
 | ||
| 			Text: ptr.Of(processedContent),
 | ||
| 		})
 | ||
| 	}
 | ||
| 	return s
 | ||
| }
 | ||
| 
 | ||
| func convertOrderType(orderType *knowledgeModel.OrderType) *entity.OrderType {
 | ||
| 	if orderType == nil {
 | ||
| 		return nil
 | ||
| 	}
 | ||
| 	odType := *orderType
 | ||
| 	switch odType {
 | ||
| 	case knowledgeModel.OrderTypeAsc:
 | ||
| 		return ptr.Of(entity.OrderTypeAsc)
 | ||
| 	case knowledgeModel.OrderTypeDesc:
 | ||
| 		return ptr.Of(entity.OrderTypeDesc)
 | ||
| 	default:
 | ||
| 		return ptr.Of(entity.OrderTypeDesc)
 | ||
| 	}
 | ||
| }
 | ||
| 
 | ||
| func convertOrder(order *knowledgeModel.Order) *entity.Order {
 | ||
| 	if order == nil {
 | ||
| 		return nil
 | ||
| 	}
 | ||
| 	od := *order
 | ||
| 	switch od {
 | ||
| 	case knowledgeModel.OrderCreatedAt:
 | ||
| 		return ptr.Of(entity.OrderCreatedAt)
 | ||
| 	case knowledgeModel.OrderUpdatedAt:
 | ||
| 		return ptr.Of(entity.OrderUpdatedAt)
 | ||
| 	default:
 | ||
| 		return ptr.Of(entity.OrderCreatedAt)
 | ||
| 	}
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) GetKnowledgeByID(ctx context.Context, request *GetKnowledgeByIDRequest) (response *GetKnowledgeByIDResponse, err error) {
 | ||
| 	if request == nil || request.KnowledgeID == 0 {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | ||
| 	}
 | ||
| 	kn, err := k.knowledgeRepo.GetByID(ctx, request.KnowledgeID)
 | ||
| 	if err != nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if kn == nil || kn.ID == 0 {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeNotExistCode, errorx.KV("msg", "knowledge not found"))
 | ||
| 	}
 | ||
| 	knEntity, err := k.fromModelKnowledge(ctx, kn)
 | ||
| 	if err != nil {
 | ||
| 		return nil, err
 | ||
| 	}
 | ||
| 	return &GetKnowledgeByIDResponse{
 | ||
| 		Knowledge: knEntity,
 | ||
| 	}, nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) ListPhotoSlice(ctx context.Context, request *ListPhotoSliceRequest) (response *ListPhotoSliceResponse, err error) {
 | ||
| 	if request == nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | ||
| 	}
 | ||
| 	sliceArr, total, err := k.sliceRepo.FindSliceByCondition(ctx, &entity.WhereSliceOpt{
 | ||
| 		KnowledgeID: request.KnowledgeID,
 | ||
| 		DocumentIDs: request.DocumentIDs,
 | ||
| 		Offset:      int64(ptr.From(request.Offset)),
 | ||
| 		PageSize:    int64(ptr.From(request.Limit)),
 | ||
| 		NotEmpty:    request.HasCaption,
 | ||
| 	})
 | ||
| 	if err != nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	response = &ListPhotoSliceResponse{
 | ||
| 		Total: int(total),
 | ||
| 		Slices: slices.Transform(sliceArr, func(item *model.KnowledgeDocumentSlice) *entity.Slice {
 | ||
| 			res := k.fromModelSlice(ctx, item)
 | ||
| 			return res
 | ||
| 		}),
 | ||
| 	}
 | ||
| 	return response, nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) ExtractPhotoCaption(ctx context.Context, request *ExtractPhotoCaptionRequest) (response *ExtractPhotoCaptionResponse, err error) {
 | ||
| 	response = &ExtractPhotoCaptionResponse{}
 | ||
| 	if request == nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | ||
| 	}
 | ||
| 	if !k.isAutoAnnotationSupported {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeAutoAnnotationNotSupportedCode, errorx.KV("msg", "auto annotation is not supported"))
 | ||
| 	}
 | ||
| 	docInfo, err := k.documentRepo.GetByID(ctx, request.DocumentID)
 | ||
| 	if err != nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if docInfo == nil || docInfo.ID == 0 {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDocumentNotExistCode, errorx.KV("msg", "document not found"))
 | ||
| 	}
 | ||
| 	docEntity, err := k.fromModelDocument(ctx, docInfo)
 | ||
| 	if err != nil {
 | ||
| 		return nil, err
 | ||
| 	}
 | ||
| 	docEntity.ParsingStrategy.CaptionType = ptr.Of(parser.ImageAnnotationTypeModel)
 | ||
| 	parser, err := k.parseManager.GetParser(convert.DocumentToParseConfig(docEntity))
 | ||
| 	if err != nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeGetParserFailCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	imageByte, err := k.storage.GetObject(ctx, docEntity.URI)
 | ||
| 	if err != nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeGetObjectFailCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	reader := bytes.NewReader(imageByte)
 | ||
| 	schemaDoc, err := parser.Parse(ctx, reader)
 | ||
| 	if err != nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeParserParseFailCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	if len(schemaDoc) == 0 {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeParserParseFailCode, errorx.KV("msg", "parse fail, schema doc is empty"))
 | ||
| 	}
 | ||
| 	response.Caption = schemaDoc[0].Content
 | ||
| 	return response, nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) MGetKnowledgeByID(ctx context.Context, request *MGetKnowledgeByIDRequest) (response *MGetKnowledgeByIDResponse, err error) {
 | ||
| 	if request == nil || len(request.KnowledgeIDs) == 0 {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | ||
| 	}
 | ||
| 
 | ||
| 	models, err := k.knowledgeRepo.MGetByID(ctx, request.KnowledgeIDs)
 | ||
| 	if err != nil {
 | ||
| 		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | ||
| 	}
 | ||
| 	response = &MGetKnowledgeByIDResponse{}
 | ||
| 	response.Knowledge = make([]*knowledgeModel.Knowledge, 0, len(models))
 | ||
| 	for _, model := range models {
 | ||
| 		if model == nil {
 | ||
| 			continue
 | ||
| 		}
 | ||
| 		kn, err := k.fromModelKnowledge(ctx, model)
 | ||
| 		if err != nil {
 | ||
| 			return nil, err
 | ||
| 		}
 | ||
| 		response.Knowledge = append(response.Knowledge, kn)
 | ||
| 	}
 | ||
| 	return response, nil
 | ||
| }
 | ||
| 
 | ||
| const (
 | ||
| 	expireTime = 21600
 | ||
| 	cacheTime  = 7200
 | ||
| )
 | ||
| 
 | ||
| func (k *knowledgeSVC) getObjectURL(ctx context.Context, uri string) (string, error) {
 | ||
| 	cmd := k.cacheCli.Get(ctx, uri)
 | ||
| 	if cmd.Err() != nil {
 | ||
| 		url, err := k.storage.GetObjectUrl(ctx, uri, storage.WithExpire(expireTime))
 | ||
| 		if err != nil {
 | ||
| 			return "", errorx.New(errno.ErrKnowledgeGetObjectURLFailCode, errorx.KV("msg", fmt.Sprintf("get object url failed, %v", err)))
 | ||
| 		}
 | ||
| 		if errors.Is(cmd.Err(), redisV9.Nil) {
 | ||
| 			err = k.cacheCli.Set(ctx, uri, url, cacheTime*time.Second).Err()
 | ||
| 			if err != nil {
 | ||
| 				logs.CtxErrorf(ctx, "[getObjectURL] set cache failed, %v", err)
 | ||
| 			}
 | ||
| 		}
 | ||
| 		return url, nil
 | ||
| 	}
 | ||
| 
 | ||
| 	url := cmd.Val()
 | ||
| 	return url, nil
 | ||
| }
 | ||
| 
 | ||
| func (k *knowledgeSVC) genMultiIDs(ctx context.Context, counts int) ([]int64, error) {
 | ||
| 	allIDs := make([]int64, 0)
 | ||
| 	for l := 0; l < counts; l += 100 {
 | ||
| 		r := min(l+100, counts)
 | ||
| 		batchSize := r - l
 | ||
| 		ids, err := k.idgen.GenMultiIDs(ctx, batchSize)
 | ||
| 		if err != nil {
 | ||
| 			return nil, errorx.New(errno.ErrKnowledgeIDGenCode, errorx.KV("msg", fmt.Sprintf("GenMultiIDs failed, err: %v", err)))
 | ||
| 		}
 | ||
| 		allIDs = append(allIDs, ids...)
 | ||
| 	}
 | ||
| 	return allIDs, nil
 | ||
| }
 |