1513 lines
		
	
	
		
			55 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			1513 lines
		
	
	
		
			55 KiB
		
	
	
	
		
			Go
		
	
	
	
/*
 | 
						||
 * Copyright 2025 coze-dev Authors
 | 
						||
 *
 | 
						||
 * Licensed under the Apache License, Version 2.0 (the "License");
 | 
						||
 * you may not use this file except in compliance with the License.
 | 
						||
 * You may obtain a copy of the License at
 | 
						||
 *
 | 
						||
 *     http://www.apache.org/licenses/LICENSE-2.0
 | 
						||
 *
 | 
						||
 * Unless required by applicable law or agreed to in writing, software
 | 
						||
 * distributed under the License is distributed on an "AS IS" BASIS,
 | 
						||
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						||
 * See the License for the specific language governing permissions and
 | 
						||
 * limitations under the License.
 | 
						||
 */
 | 
						||
 | 
						||
package service
 | 
						||
 | 
						||
import (
 | 
						||
	"bytes"
 | 
						||
	"context"
 | 
						||
	"crypto/sha256"
 | 
						||
	"encoding/base64"
 | 
						||
	"errors"
 | 
						||
	"fmt"
 | 
						||
	"io"
 | 
						||
	"math/rand"
 | 
						||
	"net/http"
 | 
						||
	"strconv"
 | 
						||
	"strings"
 | 
						||
	"time"
 | 
						||
	"unicode/utf8"
 | 
						||
 | 
						||
	"github.com/bytedance/sonic"
 | 
						||
	redisV9 "github.com/redis/go-redis/v9"
 | 
						||
	"gorm.io/gorm"
 | 
						||
 | 
						||
	"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
 | 
						||
	knowledgeModel "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/api/model/ocean/cloud/developer_api"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/application/base/ctxutil"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/domain/knowledge/repository"
 | 
						||
 | 
						||
	"github.com/coze-dev/coze-studio/backend/domain/knowledge/entity"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/domain/knowledge/internal/consts"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/domain/knowledge/internal/convert"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/domain/knowledge/internal/dal/model"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/domain/knowledge/internal/events"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/domain/knowledge/processor/impl"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/infra/contract/cache"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/infra/contract/chatmodel"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/infra/contract/document/nl2sql"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/infra/contract/document/ocr"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/infra/contract/document/parser"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/infra/contract/document/rerank"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/infra/contract/document/searchstore"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/infra/contract/eventbus"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/infra/contract/idgen"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/infra/contract/messages2query"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/infra/contract/rdb"
 | 
						||
	rdbEntity "github.com/coze-dev/coze-studio/backend/infra/contract/rdb/entity"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/infra/contract/storage"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/infra/impl/document/parser/builtin"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/infra/impl/document/progressbar"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/infra/impl/document/rerank/rrf"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/pkg/errorx"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/pkg/lang/ptr"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/pkg/lang/slices"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/pkg/logs"
 | 
						||
	"github.com/coze-dev/coze-studio/backend/types/errno"
 | 
						||
)
 | 
						||
 | 
						||
func NewKnowledgeSVC(config *KnowledgeSVCConfig) (Knowledge, eventbus.ConsumerHandler) {
 | 
						||
	svc := &knowledgeSVC{
 | 
						||
		knowledgeRepo:             repository.NewKnowledgeDAO(config.DB),
 | 
						||
		documentRepo:              repository.NewKnowledgeDocumentDAO(config.DB),
 | 
						||
		sliceRepo:                 repository.NewKnowledgeDocumentSliceDAO(config.DB),
 | 
						||
		reviewRepo:                repository.NewKnowledgeDocumentReviewDAO(config.DB),
 | 
						||
		idgen:                     config.IDGen,
 | 
						||
		rdb:                       config.RDB,
 | 
						||
		producer:                  config.Producer,
 | 
						||
		searchStoreManagers:       config.SearchStoreManagers,
 | 
						||
		parseManager:              config.ParseManager,
 | 
						||
		storage:                   config.Storage,
 | 
						||
		reranker:                  config.Reranker,
 | 
						||
		rewriter:                  config.Rewriter,
 | 
						||
		nl2Sql:                    config.NL2Sql,
 | 
						||
		enableCompactTable:        ptr.FromOrDefault(config.EnableCompactTable, true),
 | 
						||
		cacheCli:                  config.CacheCli,
 | 
						||
		isAutoAnnotationSupported: config.IsAutoAnnotationSupported,
 | 
						||
		modelFactory:              config.ModelFactory,
 | 
						||
	}
 | 
						||
	if svc.reranker == nil {
 | 
						||
		svc.reranker = rrf.NewRRFReranker(0)
 | 
						||
	}
 | 
						||
	if svc.parseManager == nil {
 | 
						||
		svc.parseManager = builtin.NewManager(config.Storage, config.OCR, nil)
 | 
						||
	}
 | 
						||
 | 
						||
	return svc, svc
 | 
						||
}
 | 
						||
 | 
						||
type KnowledgeSVCConfig struct {
 | 
						||
	DB                        *gorm.DB                       // required
 | 
						||
	IDGen                     idgen.IDGenerator              // required
 | 
						||
	RDB                       rdb.RDB                        // required: 表格存储
 | 
						||
	Producer                  eventbus.Producer              // required: 文档 indexing 过程走 mq 异步处理
 | 
						||
	SearchStoreManagers       []searchstore.Manager          // required: 向量 / 全文
 | 
						||
	ParseManager              parser.Manager                 // optional: 文档切分与处理能力, default builtin parser
 | 
						||
	Storage                   storage.Storage                // required: oss
 | 
						||
	ModelFactory              chatmodel.Factory              // required: 模型 factory
 | 
						||
	Rewriter                  messages2query.MessagesToQuery // optional: 未配置时不改写
 | 
						||
	Reranker                  rerank.Reranker                // optional: 未配置时默认 rrf
 | 
						||
	NL2Sql                    nl2sql.NL2SQL                  // optional: 未配置时默认不支持
 | 
						||
	EnableCompactTable        *bool                          // optional: 表格数据压缩,默认 true
 | 
						||
	OCR                       ocr.OCR                        // optional: ocr, 未提供时 ocr 功能不可用
 | 
						||
	CacheCli                  cache.Cmdable                  // optional: 缓存实现
 | 
						||
	IsAutoAnnotationSupported bool                           // 是否支持了图片自动标注
 | 
						||
}
 | 
						||
 | 
						||
type knowledgeSVC struct {
 | 
						||
	knowledgeRepo repository.KnowledgeRepo
 | 
						||
	documentRepo  repository.KnowledgeDocumentRepo
 | 
						||
	sliceRepo     repository.KnowledgeDocumentSliceRepo
 | 
						||
	reviewRepo    repository.KnowledgeDocumentReviewRepo
 | 
						||
	modelFactory  chatmodel.Factory
 | 
						||
 | 
						||
	idgen                     idgen.IDGenerator
 | 
						||
	rdb                       rdb.RDB
 | 
						||
	producer                  eventbus.Producer
 | 
						||
	searchStoreManagers       []searchstore.Manager
 | 
						||
	parseManager              parser.Manager
 | 
						||
	rewriter                  messages2query.MessagesToQuery
 | 
						||
	reranker                  rerank.Reranker
 | 
						||
	storage                   storage.Storage
 | 
						||
	nl2Sql                    nl2sql.NL2SQL
 | 
						||
	cacheCli                  cache.Cmdable
 | 
						||
	enableCompactTable        bool // 表格数据压缩
 | 
						||
	isAutoAnnotationSupported bool // 是否支持了图片自动标注
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) CreateKnowledge(ctx context.Context, request *CreateKnowledgeRequest) (response *CreateKnowledgeResponse, err error) {
 | 
						||
	now := time.Now().UnixMilli()
 | 
						||
	if len(request.Name) == 0 {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "knowledge name is empty"))
 | 
						||
	}
 | 
						||
	if request.CreatorID == 0 {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "knowledge creator id is empty"))
 | 
						||
	}
 | 
						||
	if request.SpaceID == 0 {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "knowledge space id is empty"))
 | 
						||
	}
 | 
						||
	id, err := k.idgen.GenID(ctx)
 | 
						||
	if err != nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeIDGenCode)
 | 
						||
	}
 | 
						||
 | 
						||
	if err = k.knowledgeRepo.Create(ctx, &model.Knowledge{
 | 
						||
		ID:          id,
 | 
						||
		Name:        request.Name,
 | 
						||
		CreatorID:   request.CreatorID,
 | 
						||
		AppID:       request.AppID,
 | 
						||
		SpaceID:     request.SpaceID,
 | 
						||
		CreatedAt:   now,
 | 
						||
		UpdatedAt:   now,
 | 
						||
		Status:      int32(knowledgeModel.KnowledgeStatusEnable), // 目前向量库的初始化由文档触发,知识库无 init 过程
 | 
						||
		Description: request.Description,
 | 
						||
		IconURI:     request.IconUri,
 | 
						||
		FormatType:  int32(request.FormatType),
 | 
						||
	}); err != nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
 | 
						||
	return &CreateKnowledgeResponse{
 | 
						||
		KnowledgeID: id,
 | 
						||
		CreatedAtMs: now,
 | 
						||
	}, nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) UpdateKnowledge(ctx context.Context, request *UpdateKnowledgeRequest) error {
 | 
						||
	if request.KnowledgeID == 0 {
 | 
						||
		return errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "knowledge id is empty"))
 | 
						||
	}
 | 
						||
	if request.Name != nil && len(*request.Name) == 0 {
 | 
						||
		return errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "knowledge name is empty"))
 | 
						||
	}
 | 
						||
	knModel, err := k.knowledgeRepo.GetByID(ctx, request.KnowledgeID)
 | 
						||
	if err != nil {
 | 
						||
		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if knModel == nil {
 | 
						||
		return errorx.New(errno.ErrKnowledgeNotExistCode, errorx.KV("msg", "knowledge not found"))
 | 
						||
	}
 | 
						||
	now := time.Now().UnixMilli()
 | 
						||
	if request.Status != nil {
 | 
						||
		knModel.Status = int32(*request.Status)
 | 
						||
	}
 | 
						||
	if request.Name != nil {
 | 
						||
		knModel.Name = *request.Name
 | 
						||
	}
 | 
						||
	if request.IconUri != nil {
 | 
						||
		knModel.IconURI = *request.IconUri
 | 
						||
	}
 | 
						||
	if request.Description != nil {
 | 
						||
		knModel.Description = *request.Description
 | 
						||
	}
 | 
						||
	knModel.UpdatedAt = now
 | 
						||
	if err := k.knowledgeRepo.Update(ctx, knModel); err != nil {
 | 
						||
		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	knowledge, err := k.fromModelKnowledge(ctx, knModel)
 | 
						||
	if err != nil {
 | 
						||
		return err
 | 
						||
	}
 | 
						||
	knowledge.UpdatedAtMs = now
 | 
						||
	return err
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) DeleteKnowledge(ctx context.Context, request *DeleteKnowledgeRequest) error {
 | 
						||
	// 先获取一下knowledge的信息
 | 
						||
	knModel, err := k.knowledgeRepo.GetByID(ctx, request.KnowledgeID)
 | 
						||
	if err != nil {
 | 
						||
		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if knModel == nil || knModel.ID == 0 {
 | 
						||
		return errorx.New(errno.ErrKnowledgeNotExistCode, errorx.KV("msg", "knowledge not found"))
 | 
						||
	}
 | 
						||
	docs, _, err := k.documentRepo.FindDocumentByCondition(ctx, &entity.WhereDocumentOpt{
 | 
						||
		KnowledgeIDs: []int64{request.KnowledgeID},
 | 
						||
		SelectAll:    true,
 | 
						||
	})
 | 
						||
	if err != nil {
 | 
						||
		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if knModel.FormatType == int32(knowledgeModel.DocumentTypeTable) {
 | 
						||
		for _, doc := range docs {
 | 
						||
			if doc == nil {
 | 
						||
				continue
 | 
						||
			}
 | 
						||
			if doc.TableInfo != nil {
 | 
						||
				resp, err := k.rdb.DropTable(ctx, &rdb.DropTableRequest{
 | 
						||
					TableName: doc.TableInfo.PhysicalTableName,
 | 
						||
					IfExists:  true,
 | 
						||
				})
 | 
						||
				if err != nil {
 | 
						||
					logs.CtxWarnf(ctx, "[DeleteKnowledge] drop table failed, err %v", err)
 | 
						||
				}
 | 
						||
				if !resp.Success {
 | 
						||
					logs.CtxWarnf(ctx, "[DeleteKnowledge] drop table failed")
 | 
						||
				}
 | 
						||
			}
 | 
						||
		}
 | 
						||
	}
 | 
						||
	collectionName := getCollectionName(request.KnowledgeID)
 | 
						||
	for _, mgr := range k.searchStoreManagers {
 | 
						||
		if err = mgr.Drop(ctx, &searchstore.DropRequest{CollectionName: collectionName}); err != nil {
 | 
						||
			return errorx.New(errno.ErrKnowledgeSearchStoreCode, errorx.KV("msg", err.Error()))
 | 
						||
		}
 | 
						||
	}
 | 
						||
 | 
						||
	err = k.knowledgeRepo.Delete(ctx, request.KnowledgeID)
 | 
						||
	if err != nil {
 | 
						||
		return err
 | 
						||
	}
 | 
						||
 | 
						||
	if err = k.documentRepo.DeleteDocuments(ctx, slices.Transform(docs, func(a *model.KnowledgeDocument) int64 {
 | 
						||
		return a.ID
 | 
						||
	})); err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "[DeleteKnowledge] delete documents failed, err %v", err)
 | 
						||
		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
 | 
						||
	return nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) ListKnowledge(ctx context.Context, request *ListKnowledgeRequest) (response *ListKnowledgeResponse, err error) {
 | 
						||
	if len(request.IDs) == 0 && request.AppID == nil && request.SpaceID == nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "knowledge ids, project id, space id and query can not be all empty"))
 | 
						||
	}
 | 
						||
	opts := &entity.WhereKnowledgeOption{
 | 
						||
		KnowledgeIDs: request.IDs,
 | 
						||
		AppID:        request.AppID,
 | 
						||
		SpaceID:      request.SpaceID,
 | 
						||
		Name:         request.Name,
 | 
						||
		Status:       request.Status,
 | 
						||
		UserID:       request.UserID,
 | 
						||
		Query:        request.Query,
 | 
						||
		Page:         request.Page,
 | 
						||
		PageSize:     request.PageSize,
 | 
						||
		Order:        convertOrder(request.Order),
 | 
						||
		OrderType:    convertOrderType(request.OrderType),
 | 
						||
	}
 | 
						||
	if request.FormatType != nil {
 | 
						||
		opts.FormatType = ptr.Of(int64(*request.FormatType))
 | 
						||
	}
 | 
						||
	pos, total, err := k.knowledgeRepo.FindKnowledgeByCondition(ctx, opts)
 | 
						||
	if err != nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	knList := make([]*knowledgeModel.Knowledge, len(pos))
 | 
						||
	for i := range pos {
 | 
						||
		if pos[i] == nil {
 | 
						||
			continue
 | 
						||
		}
 | 
						||
		knList[i], err = k.fromModelKnowledge(ctx, pos[i])
 | 
						||
		if err != nil {
 | 
						||
			return nil, err
 | 
						||
		}
 | 
						||
	}
 | 
						||
 | 
						||
	return &ListKnowledgeResponse{
 | 
						||
		KnowledgeList: knList,
 | 
						||
		Total:         total,
 | 
						||
	}, nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) checkRequest(request *CreateDocumentRequest) error {
 | 
						||
	if len(request.Documents) == 0 {
 | 
						||
		return errors.New("document is empty")
 | 
						||
	}
 | 
						||
	for i := range request.Documents {
 | 
						||
		if request.Documents[i].Type == knowledgeModel.DocumentTypeImage && ptr.From(request.Documents[i].ParsingStrategy.CaptionType) == parser.ImageAnnotationTypeModel {
 | 
						||
			if !k.isAutoAnnotationSupported {
 | 
						||
				return errors.New("auto caption type is not supported")
 | 
						||
			}
 | 
						||
		}
 | 
						||
		if request.Documents[i].ChunkingStrategy != nil {
 | 
						||
			if request.Documents[i].ChunkingStrategy.ChunkType == parser.ChunkTypeDefault {
 | 
						||
				request.Documents[i].ChunkingStrategy = getDefaultChunkStrategy()
 | 
						||
			}
 | 
						||
		}
 | 
						||
	}
 | 
						||
	return nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) CreateDocument(ctx context.Context, request *CreateDocumentRequest) (response *CreateDocumentResponse, err error) {
 | 
						||
	if err = k.checkRequest(request); err != nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if err = k.documentsURL2URI(ctx, request.Documents); err != nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDownloadFailedCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	userID := request.Documents[0].CreatorID
 | 
						||
	spaceID := request.Documents[0].SpaceID
 | 
						||
	documentSource := request.Documents[0].Source
 | 
						||
	docProcessor := impl.NewDocProcessor(ctx, &impl.DocProcessorConfig{
 | 
						||
		UserID:         userID,
 | 
						||
		SpaceID:        spaceID,
 | 
						||
		DocumentSource: documentSource,
 | 
						||
		Documents:      request.Documents,
 | 
						||
		KnowledgeRepo:  k.knowledgeRepo,
 | 
						||
		DocumentRepo:   k.documentRepo,
 | 
						||
		SliceRepo:      k.sliceRepo,
 | 
						||
		Idgen:          k.idgen,
 | 
						||
		Producer:       k.producer,
 | 
						||
		ParseManager:   k.parseManager,
 | 
						||
		Storage:        k.storage,
 | 
						||
		Rdb:            k.rdb,
 | 
						||
	})
 | 
						||
	// 1. 前置的动作,上传 tos 等
 | 
						||
	err = docProcessor.BeforeCreate()
 | 
						||
	if err != nil {
 | 
						||
		return nil, err
 | 
						||
	}
 | 
						||
	// 2. 构建 落库
 | 
						||
	err = docProcessor.BuildDBModel()
 | 
						||
	if err != nil {
 | 
						||
		return nil, err
 | 
						||
	}
 | 
						||
	// 3. 插入数据库
 | 
						||
	err = docProcessor.InsertDBModel()
 | 
						||
	if err != nil {
 | 
						||
		return nil, err
 | 
						||
	}
 | 
						||
	// 4. 发起索引任务
 | 
						||
	err = docProcessor.Indexing()
 | 
						||
	if err != nil {
 | 
						||
		return nil, err
 | 
						||
	}
 | 
						||
	// 5. 返回处理后的文档信息
 | 
						||
	docs := docProcessor.GetResp()
 | 
						||
	return &CreateDocumentResponse{
 | 
						||
		Documents: docs,
 | 
						||
	}, nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) UpdateDocument(ctx context.Context, request *UpdateDocumentRequest) error {
 | 
						||
	if request == nil {
 | 
						||
		return errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | 
						||
	}
 | 
						||
	doc, err := k.documentRepo.GetByID(ctx, request.DocumentID)
 | 
						||
	if err != nil {
 | 
						||
		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if request.DocumentName != nil {
 | 
						||
		doc.Name = *request.DocumentName
 | 
						||
	}
 | 
						||
 | 
						||
	if doc.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
 | 
						||
		// 如果是表格类型,可能是要改table的meta
 | 
						||
		if doc.TableInfo != nil {
 | 
						||
			finalColumns, err := k.alterTableSchema(ctx, doc.TableInfo.Columns, request.TableInfo.Columns, doc.TableInfo.PhysicalTableName)
 | 
						||
			if err != nil {
 | 
						||
				return err
 | 
						||
			}
 | 
						||
			doc.TableInfo.VirtualTableName = doc.Name
 | 
						||
			if len(request.TableInfo.Columns) != 0 {
 | 
						||
				doc.TableInfo.Columns = finalColumns
 | 
						||
			}
 | 
						||
		}
 | 
						||
	}
 | 
						||
	doc.UpdatedAt = time.Now().UnixMilli()
 | 
						||
	err = k.documentRepo.Update(ctx, doc)
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "[UpdateDocument] update document failed, err: %v", err)
 | 
						||
		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	return nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) DeleteDocument(ctx context.Context, request *DeleteDocumentRequest) error {
 | 
						||
	if request == nil {
 | 
						||
		return errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | 
						||
	}
 | 
						||
	doc, err := k.documentRepo.GetByID(ctx, request.DocumentID)
 | 
						||
	if err != nil {
 | 
						||
		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if doc == nil || doc.ID == 0 {
 | 
						||
		logs.CtxWarnf(ctx, "[DeleteDocument] document not found, doc_id: %d", request.DocumentID)
 | 
						||
		return nil
 | 
						||
	}
 | 
						||
 | 
						||
	if doc.DocumentType == int32(knowledgeModel.DocumentTypeTable) && doc.TableInfo != nil {
 | 
						||
		resp, err := k.rdb.DropTable(ctx, &rdb.DropTableRequest{
 | 
						||
			TableName: doc.TableInfo.PhysicalTableName,
 | 
						||
			IfExists:  true,
 | 
						||
		})
 | 
						||
		if err != nil {
 | 
						||
			logs.CtxWarnf(ctx, "[DeleteDocument] drop table failed, err: %v", err)
 | 
						||
			return errorx.New(errno.ErrKnowledgeCrossDomainCode, errorx.KV("msg", err.Error()))
 | 
						||
		}
 | 
						||
		if !resp.Success {
 | 
						||
			logs.CtxWarnf(ctx, "[DeleteDocument] drop table failed")
 | 
						||
			return errorx.New(errno.ErrKnowledgeCrossDomainCode, errorx.KV("msg", "drop table failed"))
 | 
						||
		}
 | 
						||
	}
 | 
						||
 | 
						||
	err = k.documentRepo.DeleteDocuments(ctx, []int64{request.DocumentID})
 | 
						||
	if err != nil {
 | 
						||
		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
 | 
						||
	sliceIDs, err := k.sliceRepo.GetDocumentSliceIDs(ctx, []int64{request.DocumentID})
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "[DeleteDocument] get document slice ids failed, err: %v", err)
 | 
						||
		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
 | 
						||
	if err = k.emitDeleteKnowledgeDataEvent(ctx, doc.KnowledgeID, sliceIDs, strconv.FormatInt(request.DocumentID, 10)); err != nil {
 | 
						||
		return err
 | 
						||
	}
 | 
						||
 | 
						||
	return nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) ListDocument(ctx context.Context, request *ListDocumentRequest) (response *ListDocumentResponse, err error) {
 | 
						||
	if request == nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | 
						||
	}
 | 
						||
	opts := entity.WhereDocumentOpt{
 | 
						||
		StatusNotIn: []int32{int32(entity.DocumentStatusDeleted)},
 | 
						||
	}
 | 
						||
	if request.Limit != nil {
 | 
						||
		opts.Limit = ptr.From(request.Limit)
 | 
						||
	}
 | 
						||
	if request.Offset != nil {
 | 
						||
		opts.Offset = request.Offset
 | 
						||
	}
 | 
						||
	if request.Cursor != nil {
 | 
						||
		opts.Cursor = request.Cursor
 | 
						||
	}
 | 
						||
	if len(request.DocumentIDs) > 0 {
 | 
						||
		opts.IDs = request.DocumentIDs
 | 
						||
	}
 | 
						||
	if request.KnowledgeID != 0 {
 | 
						||
		opts.KnowledgeIDs = []int64{request.KnowledgeID}
 | 
						||
	}
 | 
						||
	if request.SelectAll {
 | 
						||
		opts.SelectAll = true
 | 
						||
	}
 | 
						||
	documents, total, err := k.documentRepo.FindDocumentByCondition(ctx, &opts)
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "list document failed, err: %v", err)
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
 | 
						||
	resp := &ListDocumentResponse{
 | 
						||
		Total: total,
 | 
						||
	}
 | 
						||
	if len(documents)+ptr.From(opts.Offset) < int(total) {
 | 
						||
		resp.HasMore = true
 | 
						||
		if len(documents) > 0 {
 | 
						||
			nextCursor := strconv.FormatInt(documents[len(documents)-1].ID, 10)
 | 
						||
			resp.NextCursor = &nextCursor
 | 
						||
		}
 | 
						||
	}
 | 
						||
	resp.Documents = []*entity.Document{}
 | 
						||
	for i := range documents {
 | 
						||
		docItem, err := k.fromModelDocument(ctx, documents[i])
 | 
						||
		if err != nil {
 | 
						||
			return nil, err
 | 
						||
		}
 | 
						||
		resp.Documents = append(resp.Documents, docItem)
 | 
						||
	}
 | 
						||
	return resp, nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) MGetDocumentProgress(ctx context.Context, request *MGetDocumentProgressRequest) (response *MGetDocumentProgressResponse, err error) {
 | 
						||
	if request == nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | 
						||
	}
 | 
						||
	documents, err := k.documentRepo.MGetByID(ctx, request.DocumentIDs)
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "mget document failed, err: %v", err)
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	progresslist := []*DocumentProgress{}
 | 
						||
	for i := range documents {
 | 
						||
		item := DocumentProgress{
 | 
						||
			ID:            documents[i].ID,
 | 
						||
			Name:          documents[i].Name,
 | 
						||
			Size:          documents[i].Size,
 | 
						||
			FileExtension: documents[i].FileExtension,
 | 
						||
			Status:        entity.DocumentStatus(documents[i].Status),
 | 
						||
			StatusMsg:     entity.DocumentStatus(documents[i].Status).String(),
 | 
						||
		}
 | 
						||
		if documents[i].DocumentType == int32(knowledge.DocumentTypeImage) && len(documents[i].URI) != 0 {
 | 
						||
			item.URL, err = k.storage.GetObjectUrl(ctx, documents[i].URI)
 | 
						||
			if err != nil {
 | 
						||
				logs.CtxErrorf(ctx, "get object url failed, err: %v", err)
 | 
						||
				return nil, errorx.New(errno.ErrKnowledgeGetObjectURLFailCode, errorx.KV("msg", err.Error()))
 | 
						||
			}
 | 
						||
		}
 | 
						||
		if documents[i].Status == int32(entity.DocumentStatusEnable) || documents[i].Status == int32(entity.DocumentStatusFailed) {
 | 
						||
			item.Progress = progressbar.ProcessDone
 | 
						||
		} else {
 | 
						||
			if documents[i].FailReason != "" {
 | 
						||
				item.StatusMsg = documents[i].FailReason
 | 
						||
				item.Status = entity.DocumentStatusFailed
 | 
						||
				progresslist = append(progresslist, &item)
 | 
						||
				continue
 | 
						||
			}
 | 
						||
			err = k.getProgressFromCache(ctx, &item)
 | 
						||
			if err != nil {
 | 
						||
				logs.CtxErrorf(ctx, "get progress from cache failed, err: %v", err)
 | 
						||
				return nil, errorx.New(errno.ErrKnowledgeGetDocProgressFailCode, errorx.KV("msg", err.Error()))
 | 
						||
			}
 | 
						||
		}
 | 
						||
		progresslist = append(progresslist, &item)
 | 
						||
	}
 | 
						||
	return &MGetDocumentProgressResponse{
 | 
						||
		ProgressList: progresslist,
 | 
						||
	}, nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) getProgressFromCache(ctx context.Context, documentProgress *DocumentProgress) (err error) {
 | 
						||
	progressBar := progressbar.NewProgressBar(ctx, documentProgress.ID, 0, k.cacheCli, false)
 | 
						||
	percent, remainSec, errMsg := progressBar.GetProgress(ctx)
 | 
						||
	documentProgress.Progress = int(percent)
 | 
						||
	documentProgress.RemainingSec = int64(remainSec)
 | 
						||
	if len(errMsg) != 0 {
 | 
						||
		documentProgress.Status = entity.DocumentStatusFailed
 | 
						||
		documentProgress.StatusMsg = errMsg
 | 
						||
		return err
 | 
						||
	}
 | 
						||
	return err
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) ResegmentDocument(ctx context.Context, request *ResegmentDocumentRequest) (response *ResegmentDocumentResponse, err error) {
 | 
						||
	if request == nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | 
						||
	}
 | 
						||
	if request.ChunkingStrategy != nil {
 | 
						||
		if request.ChunkingStrategy.ChunkType == parser.ChunkTypeDefault {
 | 
						||
			request.ChunkingStrategy = getDefaultChunkStrategy()
 | 
						||
		}
 | 
						||
	}
 | 
						||
	doc, err := k.documentRepo.GetByID(ctx, request.DocumentID)
 | 
						||
	if err != nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if doc == nil || doc.ID == 0 {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDocumentNotExistCode, errorx.KV("msg", "document not found"))
 | 
						||
	}
 | 
						||
	docEntity, err := k.fromModelDocument(ctx, doc)
 | 
						||
	if err != nil {
 | 
						||
		return nil, err
 | 
						||
	}
 | 
						||
	docEntity.ChunkingStrategy = request.ChunkingStrategy
 | 
						||
	docEntity.ParsingStrategy = request.ParsingStrategy
 | 
						||
	event := events.NewIndexDocumentEvent(docEntity.KnowledgeID, docEntity)
 | 
						||
	body, err := sonic.Marshal(event)
 | 
						||
	if err != nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeParseJSONCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	doc.ParseRule.ChunkingStrategy = request.ChunkingStrategy
 | 
						||
	doc.ParseRule.ParsingStrategy = request.ParsingStrategy
 | 
						||
	doc.Status = int32(entity.DocumentStatusChunking)
 | 
						||
	err = k.documentRepo.Update(ctx, doc)
 | 
						||
	if err != nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if err = k.producer.Send(ctx, body, eventbus.WithShardingKey(strconv.FormatInt(docEntity.KnowledgeID, 10))); err != nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeMQSendFailCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	docEntity.Status = entity.DocumentStatusChunking
 | 
						||
	return &ResegmentDocumentResponse{
 | 
						||
		Document: docEntity,
 | 
						||
	}, nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) CreateSlice(ctx context.Context, request *CreateSliceRequest) (response *CreateSliceResponse, err error) {
 | 
						||
	if request == nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | 
						||
	}
 | 
						||
	docInfo, err := k.documentRepo.GetByID(ctx, request.DocumentID)
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "find document failed, err: %v", err)
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if docInfo == nil || docInfo.ID == 0 {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDocumentNotExistCode, errorx.KV("msg", "document not found"))
 | 
						||
	}
 | 
						||
	if docInfo.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
 | 
						||
		_, total, err := k.sliceRepo.FindSliceByCondition(ctx, &entity.WhereSliceOpt{
 | 
						||
			DocumentID: docInfo.ID,
 | 
						||
		})
 | 
						||
		if err != nil {
 | 
						||
			logs.CtxErrorf(ctx, "FindSliceByCondition err:%v", err)
 | 
						||
			return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
		}
 | 
						||
		request.Position = total + 1
 | 
						||
	}
 | 
						||
	slices, err := k.sliceRepo.GetSliceBySequence(ctx, request.DocumentID, request.Position)
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "get slice by sequence failed, err: %v", err)
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	now := time.Now().UnixMilli()
 | 
						||
	id, err := k.idgen.GenID(ctx)
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "gen id failed, err: %v", err)
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeIDGenCode)
 | 
						||
	}
 | 
						||
	sliceInfo := model.KnowledgeDocumentSlice{
 | 
						||
		ID:          id,
 | 
						||
		KnowledgeID: docInfo.KnowledgeID,
 | 
						||
		DocumentID:  docInfo.ID,
 | 
						||
		CreatedAt:   now,
 | 
						||
		UpdatedAt:   now,
 | 
						||
		CreatorID:   request.CreatorID,
 | 
						||
		SpaceID:     docInfo.SpaceID,
 | 
						||
		Status:      int32(knowledgeModel.SliceStatusInit),
 | 
						||
	}
 | 
						||
	if len(slices) == 0 {
 | 
						||
		if request.Position == 0 {
 | 
						||
			request.Position = 1
 | 
						||
			sliceInfo.Sequence = 1
 | 
						||
		} else {
 | 
						||
			return nil, errorx.New(errno.ErrKnowledgeSliceInsertPositionIllegalCode)
 | 
						||
		}
 | 
						||
	}
 | 
						||
	if len(slices) == 1 {
 | 
						||
		if request.Position == 1 || request.Position == 0 {
 | 
						||
			// 插入到最前面
 | 
						||
			sliceInfo.Sequence = slices[0].Sequence - 1
 | 
						||
		} else {
 | 
						||
			sliceInfo.Sequence = slices[0].Sequence + 1
 | 
						||
		}
 | 
						||
	}
 | 
						||
	if len(slices) == 2 {
 | 
						||
		if request.Position == 0 || request.Position == 1 {
 | 
						||
			sliceInfo.Sequence = slices[0].Sequence - 1
 | 
						||
		} else {
 | 
						||
			if slices[0].Sequence+1 < slices[1].Sequence {
 | 
						||
				sliceInfo.Sequence = float64(int(slices[0].Sequence) + 1)
 | 
						||
			} else {
 | 
						||
				sliceInfo.Sequence = (slices[0].Sequence + slices[1].Sequence) / 2
 | 
						||
			}
 | 
						||
		}
 | 
						||
	}
 | 
						||
	sliceEntity := entity.Slice{
 | 
						||
		Info: knowledgeModel.Info{
 | 
						||
			ID:        id,
 | 
						||
			CreatorID: request.CreatorID,
 | 
						||
		},
 | 
						||
		DocumentID: request.DocumentID,
 | 
						||
		RawContent: request.RawContent,
 | 
						||
	}
 | 
						||
	docEntity, err := k.fromModelDocument(ctx, docInfo)
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "fromModelDocument failed, err: %v", err)
 | 
						||
		return nil, err
 | 
						||
	}
 | 
						||
	indexSliceEvent := events.NewIndexSliceEvent(&sliceEntity, docEntity)
 | 
						||
	if docInfo.DocumentType == int32(knowledgeModel.DocumentTypeText) ||
 | 
						||
		docInfo.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
 | 
						||
		sliceInfo.Content = sliceEntity.GetSliceContent()
 | 
						||
	}
 | 
						||
	if docInfo.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
 | 
						||
		sliceEntity.ID = sliceInfo.ID
 | 
						||
		err = k.upsertDataToTable(ctx, docInfo.TableInfo, []*entity.Slice{&sliceEntity})
 | 
						||
		if err != nil {
 | 
						||
			logs.CtxErrorf(ctx, "insert data to table failed, err: %v", err)
 | 
						||
			return nil, err
 | 
						||
		}
 | 
						||
	}
 | 
						||
	err = k.sliceRepo.Create(ctx, &sliceInfo)
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "create slice failed, err: %v", err)
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	body, err := sonic.Marshal(&indexSliceEvent)
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "marshal event failed, err: %v", err)
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeParseJSONCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if err = k.producer.Send(ctx, body, eventbus.WithShardingKey(strconv.FormatInt(sliceInfo.DocumentID, 10))); err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "send message failed, err: %v", err)
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeMQSendFailCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if err = k.documentRepo.UpdateDocumentSliceInfo(ctx, docInfo.ID); err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "update document slice info failed, err: %v", err)
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	return &CreateSliceResponse{
 | 
						||
		SliceID: id,
 | 
						||
	}, nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) UpdateSlice(ctx context.Context, request *UpdateSliceRequest) error {
 | 
						||
	if request == nil {
 | 
						||
		return errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | 
						||
	}
 | 
						||
	sliceInfo, err := k.sliceRepo.MGetSlices(ctx, []int64{request.SliceID})
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "mget slice failed, err: %v", err)
 | 
						||
		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if len(sliceInfo) != 1 {
 | 
						||
		return errorx.New(errno.ErrKnowledgeSliceNotExistCode)
 | 
						||
	}
 | 
						||
	docInfo, err := k.documentRepo.GetByID(ctx, request.DocumentID)
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "find document failed, err: %v", err)
 | 
						||
		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if docInfo == nil || docInfo.ID == 0 {
 | 
						||
		return errorx.New(errno.ErrKnowledgeDocumentNotExistCode)
 | 
						||
	}
 | 
						||
	// 更新数据库中的存储
 | 
						||
	if docInfo.DocumentType == int32(knowledgeModel.DocumentTypeText) ||
 | 
						||
		docInfo.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
 | 
						||
		sliceEntity := entity.Slice{RawContent: request.RawContent}
 | 
						||
		sliceInfo[0].Content = sliceEntity.GetSliceContent()
 | 
						||
	}
 | 
						||
	if docInfo.DocumentType == int32(knowledgeModel.DocumentTypeImage) {
 | 
						||
		sliceInfo[0].Content = ptr.From(request.RawContent[0].Text)
 | 
						||
	}
 | 
						||
	docEntity, err := k.fromModelDocument(ctx, docInfo)
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "fromModelDocument failed, err: %v", err)
 | 
						||
		return err
 | 
						||
	}
 | 
						||
	sliceInfo[0].UpdatedAt = time.Now().UnixMilli()
 | 
						||
	sliceInfo[0].Status = int32(knowledgeModel.SliceStatusInit)
 | 
						||
	indexSliceEvent := events.NewIndexSliceEvent(&entity.Slice{
 | 
						||
		Info: knowledgeModel.Info{
 | 
						||
			ID: sliceInfo[0].ID,
 | 
						||
		},
 | 
						||
		KnowledgeID: sliceInfo[0].KnowledgeID,
 | 
						||
		DocumentID:  sliceInfo[0].DocumentID,
 | 
						||
		RawContent:  request.RawContent,
 | 
						||
	}, docEntity)
 | 
						||
	if docInfo.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
 | 
						||
		indexSliceEvent.Slice.ID = sliceInfo[0].ID
 | 
						||
		err = k.upsertDataToTable(ctx, docInfo.TableInfo, []*entity.Slice{indexSliceEvent.Slice})
 | 
						||
		if err != nil {
 | 
						||
			logs.CtxErrorf(ctx, "upsert data to table failed, err: %v", err)
 | 
						||
			return err
 | 
						||
		}
 | 
						||
	}
 | 
						||
	err = k.sliceRepo.Update(ctx, sliceInfo[0])
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "update slice failed, err: %v", err)
 | 
						||
		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	body, err := sonic.Marshal(&indexSliceEvent)
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "marshal event failed, err: %v", err)
 | 
						||
		return errorx.New(errno.ErrKnowledgeParseJSONCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if err = k.producer.Send(ctx, body, eventbus.WithShardingKey(strconv.FormatInt(sliceInfo[0].DocumentID, 10))); err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "send message failed, err: %v", err)
 | 
						||
		return errorx.New(errno.ErrKnowledgeMQSendFailCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if err = k.documentRepo.UpdateDocumentSliceInfo(ctx, docInfo.ID); err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "update document slice info failed, err: %v", err)
 | 
						||
		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	return nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) DeleteSlice(ctx context.Context, request *DeleteSliceRequest) error {
 | 
						||
	if request == nil {
 | 
						||
		return errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | 
						||
	}
 | 
						||
	sliceInfo, err := k.sliceRepo.MGetSlices(ctx, []int64{request.SliceID})
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "mget slice failed, err: %v", err)
 | 
						||
		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if len(sliceInfo) != 1 {
 | 
						||
		logs.CtxWarnf(ctx, "slice not found, slice_id: %d", request.SliceID)
 | 
						||
		return nil
 | 
						||
	}
 | 
						||
	docInfo, err := k.documentRepo.GetByID(ctx, sliceInfo[0].DocumentID)
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "find document failed, err: %v", err)
 | 
						||
		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if docInfo == nil || docInfo.ID == 0 {
 | 
						||
		return errorx.New(errno.ErrKnowledgeDocumentNotExistCode)
 | 
						||
	}
 | 
						||
	if docInfo.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
 | 
						||
		_, err := k.rdb.DeleteData(ctx, &rdb.DeleteDataRequest{
 | 
						||
			TableName: docInfo.TableInfo.PhysicalTableName,
 | 
						||
			Where: &rdb.ComplexCondition{
 | 
						||
				Conditions: []*rdb.Condition{
 | 
						||
					{
 | 
						||
						Field:    consts.RDBFieldID,
 | 
						||
						Operator: rdbEntity.OperatorEqual,
 | 
						||
						Value:    request.SliceID,
 | 
						||
					},
 | 
						||
				},
 | 
						||
			},
 | 
						||
		})
 | 
						||
		if err != nil {
 | 
						||
			logs.CtxErrorf(ctx, "delete data failed, err: %v", err)
 | 
						||
			return errorx.New(errno.ErrKnowledgeCrossDomainCode, errorx.KV("msg", err.Error()))
 | 
						||
		}
 | 
						||
	}
 | 
						||
	// 删除数据库中的存储
 | 
						||
	err = k.sliceRepo.Delete(ctx, &model.KnowledgeDocumentSlice{ID: request.SliceID})
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "delete slice failed, err: %v", err)
 | 
						||
		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
 | 
						||
	if err = k.emitDeleteKnowledgeDataEvent(ctx, sliceInfo[0].KnowledgeID, []int64{request.SliceID}, strconv.FormatInt(sliceInfo[0].DocumentID, 10)); err != nil {
 | 
						||
		return err
 | 
						||
	}
 | 
						||
	if err = k.documentRepo.UpdateDocumentSliceInfo(ctx, docInfo.ID); err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "update document slice info failed, err: %v", err)
 | 
						||
		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	return nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) ListSlice(ctx context.Context, request *ListSliceRequest) (response *ListSliceResponse, err error) {
 | 
						||
	if request == nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | 
						||
	}
 | 
						||
	if request.DocumentID == nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "document_id is empty"))
 | 
						||
	}
 | 
						||
	doc, err := k.documentRepo.GetByID(ctx, ptr.From(request.DocumentID))
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "get document failed, err: %v", err)
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	resp := ListSliceResponse{}
 | 
						||
	if doc.Status == int32(entity.DocumentStatusDeleted) {
 | 
						||
		return &resp, nil
 | 
						||
	}
 | 
						||
 | 
						||
	slices, total, err := k.sliceRepo.FindSliceByCondition(ctx, &entity.WhereSliceOpt{
 | 
						||
		KnowledgeID: ptr.From(request.KnowledgeID),
 | 
						||
		DocumentID:  ptr.From(request.DocumentID),
 | 
						||
		Keyword:     request.Keyword,
 | 
						||
		Sequence:    request.Sequence,
 | 
						||
		PageSize:    request.Limit,
 | 
						||
		Offset:      request.Offset,
 | 
						||
	})
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "list slice failed, err: %v", err)
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
 | 
						||
	if total > (request.Sequence + request.Limit) {
 | 
						||
		resp.HasMore = true
 | 
						||
	} else {
 | 
						||
		resp.HasMore = false
 | 
						||
	}
 | 
						||
	resp.Total = int(total)
 | 
						||
	var sliceMap map[int64]*entity.Slice
 | 
						||
	// 如果是表格类型,那么去table中取一下原始数据
 | 
						||
	if doc.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
 | 
						||
		// 从数据库中查询原始数据
 | 
						||
		sliceMap, err = k.selectTableData(ctx, doc.TableInfo, slices)
 | 
						||
		if err != nil {
 | 
						||
			logs.CtxErrorf(ctx, "select table data failed, err: %v", err)
 | 
						||
			return nil, err
 | 
						||
		}
 | 
						||
	}
 | 
						||
	resp.Slices = []*entity.Slice{}
 | 
						||
	for i := range slices {
 | 
						||
		resp.Slices = append(resp.Slices, k.fromModelSlice(ctx, slices[i]))
 | 
						||
		if sliceMap[slices[i].ID] != nil {
 | 
						||
			resp.Slices[i].RawContent = sliceMap[slices[i].ID].RawContent
 | 
						||
		}
 | 
						||
		resp.Slices[i].Sequence = request.Sequence + 1 + int64(i)
 | 
						||
	}
 | 
						||
	return &resp, nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) GetSlice(ctx context.Context, request *GetSliceRequest) (response *GetSliceResponse, err error) {
 | 
						||
	slices, err := k.sliceRepo.MGetSlices(ctx, []int64{request.SliceID})
 | 
						||
	if err != nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
 | 
						||
	if len(slices) == 0 {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeSliceNotExistCode)
 | 
						||
	}
 | 
						||
 | 
						||
	return &GetSliceResponse{
 | 
						||
		Slice: k.fromModelSlice(ctx, slices[0]),
 | 
						||
	}, nil
 | 
						||
}
 | 
						||
 | 
						||
func getDefaultChunkStrategy() *entity.ChunkingStrategy {
 | 
						||
	return &entity.ChunkingStrategy{
 | 
						||
		ChunkType:       parser.ChunkTypeDefault,
 | 
						||
		ChunkSize:       consts.DefaultChunkSize,
 | 
						||
		Separator:       consts.DefaultSeparator,
 | 
						||
		Overlap:         consts.DefaultOverlap,
 | 
						||
		TrimSpace:       consts.DefaultTrimSpace,
 | 
						||
		TrimURLAndEmail: consts.DefaultTrimURLAndEmail,
 | 
						||
	}
 | 
						||
}
 | 
						||
func (k *knowledgeSVC) CreateDocumentReview(ctx context.Context, request *CreateDocumentReviewRequest) (response *CreateDocumentReviewResponse, err error) {
 | 
						||
	if request == nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | 
						||
	}
 | 
						||
	if request.ChunkStrategy != nil {
 | 
						||
		if request.ChunkStrategy.ChunkType == parser.ChunkTypeDefault {
 | 
						||
			request.ChunkStrategy = getDefaultChunkStrategy()
 | 
						||
		}
 | 
						||
	}
 | 
						||
	uid := ctxutil.GetUIDFromCtx(ctx)
 | 
						||
	if uid == nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgePermissionCode, errorx.KV("msg", "session required"))
 | 
						||
	}
 | 
						||
	kn, err := k.knowledgeRepo.GetByID(ctx, request.KnowledgeID)
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "get knowledge failed, err: %v", err)
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if kn == nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeNotExistCode)
 | 
						||
	}
 | 
						||
	documentIDs := make([]int64, 0, len(request.Reviews))
 | 
						||
	documentMap := make(map[int64]*model.KnowledgeDocument)
 | 
						||
	for _, input := range request.Reviews {
 | 
						||
		if input.DocumentID != nil && *input.DocumentID > 0 {
 | 
						||
			documentIDs = append(documentIDs, *input.DocumentID)
 | 
						||
		}
 | 
						||
	}
 | 
						||
	if len(documentIDs) > 0 {
 | 
						||
		documents, err := k.documentRepo.MGetByID(ctx, documentIDs)
 | 
						||
		if err != nil {
 | 
						||
			return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
		}
 | 
						||
		for _, document := range documents {
 | 
						||
			documentMap[document.ID] = document
 | 
						||
		}
 | 
						||
	}
 | 
						||
	reviews := make([]*entity.Review, 0, len(request.Reviews))
 | 
						||
	for _, input := range request.Reviews {
 | 
						||
		review := &entity.Review{
 | 
						||
			DocumentName: input.DocumentName,
 | 
						||
			DocumentType: input.DocumentType,
 | 
						||
			Uri:          input.TosUri,
 | 
						||
		}
 | 
						||
		if input.DocumentID != nil && *input.DocumentID > 0 {
 | 
						||
			if document, ok := documentMap[*input.DocumentID]; ok {
 | 
						||
				review.DocumentName = document.Name
 | 
						||
				names := strings.Split(document.URI, "/")
 | 
						||
				objectName := strings.Split(names[len(names)-1], ".")
 | 
						||
				review.DocumentType = objectName[len(objectName)-1]
 | 
						||
				review.Uri = document.URI
 | 
						||
			}
 | 
						||
		}
 | 
						||
		review.Url, err = k.storage.GetObjectUrl(ctx, review.Uri)
 | 
						||
		if err != nil {
 | 
						||
			logs.CtxErrorf(ctx, "get object url failed, err: %v", err)
 | 
						||
			return nil, errorx.New(errno.ErrKnowledgeGetObjectURLFailCode, errorx.KV("msg", err.Error()))
 | 
						||
		}
 | 
						||
		reviews = append(reviews, review)
 | 
						||
	}
 | 
						||
	// STEP 1. 生成ID
 | 
						||
	reviewIDs, err := k.genMultiIDs(ctx, len(request.Reviews))
 | 
						||
	if err != nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeIDGenCode)
 | 
						||
	}
 | 
						||
	for i := range request.Reviews {
 | 
						||
		reviews[i].ReviewID = ptr.Of(reviewIDs[i])
 | 
						||
	}
 | 
						||
	modelReviews := make([]*model.KnowledgeDocumentReview, 0, len(reviews))
 | 
						||
	for _, review := range reviews {
 | 
						||
		modelReviews = append(modelReviews, &model.KnowledgeDocumentReview{
 | 
						||
			ID:          *review.ReviewID,
 | 
						||
			KnowledgeID: request.KnowledgeID,
 | 
						||
			SpaceID:     kn.SpaceID,
 | 
						||
			Name:        review.DocumentName,
 | 
						||
			Type:        review.DocumentType,
 | 
						||
			URI:         review.Uri,
 | 
						||
			CreatorID:   *uid,
 | 
						||
		})
 | 
						||
	}
 | 
						||
	err = k.reviewRepo.CreateInBatches(ctx, modelReviews)
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "create review failed, err: %v", err)
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	for i := range reviews {
 | 
						||
		review := reviews[i]
 | 
						||
		doc := &entity.Document{
 | 
						||
			KnowledgeID:      request.KnowledgeID,
 | 
						||
			ParsingStrategy:  request.ParsingStrategy,
 | 
						||
			ChunkingStrategy: request.ChunkStrategy,
 | 
						||
			Type:             knowledgeModel.DocumentTypeText,
 | 
						||
			URI:              review.Uri,
 | 
						||
			FileExtension:    parser.FileExtension(review.DocumentType),
 | 
						||
			Info: knowledgeModel.Info{
 | 
						||
				Name:      review.DocumentName,
 | 
						||
				CreatorID: *uid,
 | 
						||
			},
 | 
						||
			Source: entity.DocumentSourceLocal,
 | 
						||
		}
 | 
						||
		reviewEvent := events.NewDocumentReviewEvent(doc, review)
 | 
						||
		body, err := sonic.Marshal(&reviewEvent)
 | 
						||
		if err != nil {
 | 
						||
			logs.CtxErrorf(ctx, "marshal event failed, err: %v", err)
 | 
						||
			return nil, errorx.New(errno.ErrKnowledgeParseJSONCode, errorx.KV("msg", err.Error()))
 | 
						||
		}
 | 
						||
		err = k.producer.Send(ctx, body)
 | 
						||
		if err != nil {
 | 
						||
			logs.CtxErrorf(ctx, "send message failed, err: %v", err)
 | 
						||
			return nil, errorx.New(errno.ErrKnowledgeMQSendFailCode, errorx.KV("msg", err.Error()))
 | 
						||
		}
 | 
						||
	}
 | 
						||
	return &CreateDocumentReviewResponse{
 | 
						||
		Reviews: reviews,
 | 
						||
	}, nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) MGetDocumentReview(ctx context.Context, request *MGetDocumentReviewRequest) (response *MGetDocumentReviewResponse, err error) {
 | 
						||
	reviews, err := k.reviewRepo.MGetByIDs(ctx, request.ReviewIDs)
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "mget review failed, err: %v", err)
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	for _, review := range reviews {
 | 
						||
		if review.KnowledgeID != request.KnowledgeID {
 | 
						||
			return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "knowledge_id and doc not match"))
 | 
						||
		}
 | 
						||
	}
 | 
						||
	reviewEntity := make([]*entity.Review, 0, len(reviews))
 | 
						||
	for _, review := range reviews {
 | 
						||
		status := entity.ReviewStatus(review.Status)
 | 
						||
		var reviewTosURL, reviewChunkRespTosURL string
 | 
						||
		if review.URI != "" {
 | 
						||
			reviewTosURL, err = k.getObjectURL(ctx, review.URI)
 | 
						||
			if err != nil {
 | 
						||
				logs.CtxErrorf(ctx, "get object url failed, err: %v", err)
 | 
						||
				return nil, errorx.New(errno.ErrKnowledgeGetObjectURLFailCode, errorx.KV("msg", err.Error()))
 | 
						||
			}
 | 
						||
		}
 | 
						||
		if review.ChunkRespURI != "" {
 | 
						||
			reviewChunkRespTosURL, err = k.getObjectURL(ctx, review.ChunkRespURI)
 | 
						||
			if err != nil {
 | 
						||
				logs.CtxErrorf(ctx, "get object url failed, err: %v", err)
 | 
						||
				return nil, errorx.New(errno.ErrKnowledgeGetObjectURLFailCode, errorx.KV("msg", err.Error()))
 | 
						||
			}
 | 
						||
		}
 | 
						||
		reviewEntity = append(reviewEntity, &entity.Review{
 | 
						||
			ReviewID:      &review.ID,
 | 
						||
			DocumentName:  review.Name,
 | 
						||
			DocumentType:  review.Type,
 | 
						||
			Url:           reviewTosURL,
 | 
						||
			Status:        &status,
 | 
						||
			DocTreeTosUrl: ptr.Of(reviewChunkRespTosURL),
 | 
						||
			PreviewTosUrl: ptr.Of(reviewTosURL),
 | 
						||
		})
 | 
						||
	}
 | 
						||
	return &MGetDocumentReviewResponse{
 | 
						||
		Reviews: reviewEntity,
 | 
						||
	}, nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) SaveDocumentReview(ctx context.Context, request *SaveDocumentReviewRequest) error {
 | 
						||
	if request == nil {
 | 
						||
		return errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | 
						||
	}
 | 
						||
	review, err := k.reviewRepo.GetByID(ctx, request.ReviewID)
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "get review failed, err: %v", err)
 | 
						||
		return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	uri := review.ChunkRespURI
 | 
						||
	if review.Status == int32(entity.ReviewStatus_Enable) && len(uri) > 0 {
 | 
						||
		newTosUri := fmt.Sprintf("DocReview/%d_%d_%d.txt", review.CreatorID, time.Now().UnixMilli(), review.ID)
 | 
						||
		err = k.storage.PutObject(ctx, newTosUri, []byte(request.DocTreeJson))
 | 
						||
		if err != nil {
 | 
						||
			logs.CtxErrorf(ctx, "put object failed, err: %v", err)
 | 
						||
			return errorx.New(errno.ErrKnowledgePutObjectFailCode, errorx.KV("msg", err.Error()))
 | 
						||
		}
 | 
						||
		err = k.reviewRepo.UpdateReview(ctx, review.ID, map[string]interface{}{
 | 
						||
			"chunk_resp_uri": newTosUri,
 | 
						||
		})
 | 
						||
		if err != nil {
 | 
						||
			logs.CtxErrorf(ctx, "update review chunk uri failed, err: %v", err)
 | 
						||
			return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
		}
 | 
						||
	}
 | 
						||
	return nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) documentsURL2URI(ctx context.Context, docs []*entity.Document) error {
 | 
						||
	download := func(url string) ([]byte, error) {
 | 
						||
		resp, err := http.Get(url)
 | 
						||
		if err != nil {
 | 
						||
			return nil, fmt.Errorf("http get failed, %w", err)
 | 
						||
		}
 | 
						||
		defer resp.Body.Close()
 | 
						||
		if resp.StatusCode != http.StatusOK {
 | 
						||
			return nil, fmt.Errorf("download file failed, status code=%d", resp.StatusCode)
 | 
						||
		}
 | 
						||
		data, err := io.ReadAll(resp.Body)
 | 
						||
		if err != nil {
 | 
						||
			return nil, fmt.Errorf("read all failed, %w", err)
 | 
						||
		}
 | 
						||
		return data, nil
 | 
						||
	}
 | 
						||
 | 
						||
	// same as UploadFile
 | 
						||
	const baseWord = "1Aa2Bb3Cc4Dd5Ee6Ff7Gg8Hh9Ii0JjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz"
 | 
						||
	createURI := func(uid int64, fileType string) string {
 | 
						||
		num := 10
 | 
						||
		input := fmt.Sprintf("upload_%d_Ma*9)fhi_%d_gou_%s_rand_%d", uid, time.Now().Unix(), fileType, rand.Intn(100000))
 | 
						||
		hash := sha256.Sum256([]byte(fmt.Sprintf("%s", input)))
 | 
						||
		hashString := base64.StdEncoding.EncodeToString(hash[:])
 | 
						||
		if len(hashString) > num {
 | 
						||
			hashString = hashString[:num]
 | 
						||
		}
 | 
						||
		secret := ""
 | 
						||
		for _, char := range hashString {
 | 
						||
			index := int(char) % 62
 | 
						||
			secret += string(baseWord[index])
 | 
						||
		}
 | 
						||
		suffix := fmt.Sprintf("%d_%d_%s.%s", uid, time.Now().UnixNano(), secret, fileType)
 | 
						||
		uri := fmt.Sprintf("%s/%s", developer_api.FileBizType_BIZ_BOT_DATASET, suffix)
 | 
						||
		return uri
 | 
						||
	}
 | 
						||
 | 
						||
	for _, doc := range docs {
 | 
						||
		if doc.URI != "" || doc.URL == "" {
 | 
						||
			continue
 | 
						||
		}
 | 
						||
		b, err := download(doc.URL)
 | 
						||
		if err != nil {
 | 
						||
			return fmt.Errorf("[documentsURL2URI] download document failed, %w", err)
 | 
						||
		}
 | 
						||
		uri := createURI(doc.CreatorID, string(doc.FileExtension))
 | 
						||
		if err = k.storage.PutObject(ctx, uri, b); err != nil {
 | 
						||
			return fmt.Errorf("[documentsURL2URI] upload document failed, %w", err)
 | 
						||
		}
 | 
						||
		doc.URI = uri
 | 
						||
	}
 | 
						||
 | 
						||
	return nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) emitDeleteKnowledgeDataEvent(ctx context.Context, knowledgeID int64, sliceIDs []int64, shardingKey string) error {
 | 
						||
	deleteSliceEvent := events.NewDeleteKnowledgeDataEvent(knowledgeID, sliceIDs)
 | 
						||
	body, err := sonic.Marshal(&deleteSliceEvent)
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "marshal event failed, err: %v", err)
 | 
						||
		return errorx.New(errno.ErrKnowledgeParseJSONCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if err = k.producer.Send(ctx, body, eventbus.WithShardingKey(shardingKey)); err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "send message failed, err: %v", err)
 | 
						||
		return errorx.New(errno.ErrKnowledgeMQSendFailCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	return nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) fromModelKnowledge(ctx context.Context, knowledge *model.Knowledge) (*knowledgeModel.Knowledge, error) {
 | 
						||
	if knowledge == nil {
 | 
						||
		return nil, nil
 | 
						||
	}
 | 
						||
	sliceHit, err := k.sliceRepo.GetSliceHitByKnowledgeID(ctx, knowledge.ID)
 | 
						||
	if err != nil {
 | 
						||
		logs.CtxErrorf(ctx, "get slice hit count failed, err: %v", err)
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	knEntity := &knowledgeModel.Knowledge{
 | 
						||
		Info: knowledgeModel.Info{
 | 
						||
			ID:          knowledge.ID,
 | 
						||
			Name:        knowledge.Name,
 | 
						||
			Description: knowledge.Description,
 | 
						||
			IconURI:     knowledge.IconURI,
 | 
						||
			CreatorID:   knowledge.CreatorID,
 | 
						||
			SpaceID:     knowledge.SpaceID,
 | 
						||
			CreatedAtMs: knowledge.CreatedAt,
 | 
						||
			UpdatedAtMs: knowledge.UpdatedAt,
 | 
						||
			AppID:       knowledge.AppID,
 | 
						||
		},
 | 
						||
		SliceHit: sliceHit,
 | 
						||
		Type:     knowledgeModel.DocumentType(knowledge.FormatType),
 | 
						||
		Status:   knowledgeModel.KnowledgeStatus(knowledge.Status),
 | 
						||
	}
 | 
						||
 | 
						||
	if knowledge.IconURI != "" {
 | 
						||
		objUrl, err := k.storage.GetObjectUrl(ctx, knowledge.IconURI)
 | 
						||
		if err != nil {
 | 
						||
			logs.CtxErrorf(ctx, "get object url failed, err: %v", err)
 | 
						||
			return nil, errorx.New(errno.ErrKnowledgeGetObjectURLFailCode, errorx.KV("msg", err.Error()))
 | 
						||
		}
 | 
						||
		knEntity.IconURL = objUrl
 | 
						||
	}
 | 
						||
	return knEntity, nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) fromModelDocument(ctx context.Context, document *model.KnowledgeDocument) (*entity.Document, error) {
 | 
						||
	if document == nil {
 | 
						||
		return nil, nil
 | 
						||
	}
 | 
						||
	documentEntity := &entity.Document{
 | 
						||
		Info: knowledgeModel.Info{
 | 
						||
			ID:          document.ID,
 | 
						||
			Name:        document.Name,
 | 
						||
			CreatorID:   document.CreatorID,
 | 
						||
			SpaceID:     document.SpaceID,
 | 
						||
			CreatedAtMs: document.CreatedAt,
 | 
						||
			UpdatedAtMs: document.UpdatedAt,
 | 
						||
		},
 | 
						||
		Type:             knowledgeModel.DocumentType(document.DocumentType),
 | 
						||
		KnowledgeID:      document.KnowledgeID,
 | 
						||
		URI:              document.URI,
 | 
						||
		Size:             document.Size,
 | 
						||
		SliceCount:       document.SliceCount,
 | 
						||
		CharCount:        document.CharCount,
 | 
						||
		FileExtension:    parser.FileExtension(document.FileExtension),
 | 
						||
		Source:           entity.DocumentSource(document.SourceType),
 | 
						||
		Status:           entity.DocumentStatus(document.Status),
 | 
						||
		ParsingStrategy:  document.ParseRule.ParsingStrategy,
 | 
						||
		ChunkingStrategy: document.ParseRule.ChunkingStrategy,
 | 
						||
	}
 | 
						||
	if document.TableInfo != nil {
 | 
						||
		documentEntity.TableInfo = *document.TableInfo
 | 
						||
		documentEntity.TableInfo.Columns = make([]*entity.TableColumn, 0)
 | 
						||
		for i := range document.TableInfo.Columns {
 | 
						||
			if document.TableInfo.Columns[i] == nil {
 | 
						||
				continue
 | 
						||
			}
 | 
						||
			if document.TableInfo.Columns[i].Name == consts.RDBFieldID {
 | 
						||
				continue
 | 
						||
			}
 | 
						||
			documentEntity.TableInfo.Columns = append(documentEntity.TableInfo.Columns, document.TableInfo.Columns[i])
 | 
						||
		}
 | 
						||
	}
 | 
						||
	switch document.Status {
 | 
						||
	case int32(entity.DocumentStatusChunking), int32(entity.DocumentStatusInit), int32(entity.DocumentStatusUploading):
 | 
						||
		if document.FailReason != "" {
 | 
						||
			documentEntity.Status = entity.DocumentStatusFailed
 | 
						||
			documentEntity.StatusMsg = document.FailReason
 | 
						||
		}
 | 
						||
	case int32(entity.DocumentStatusFailed):
 | 
						||
		documentEntity.StatusMsg = document.FailReason
 | 
						||
	default:
 | 
						||
	}
 | 
						||
	if len(document.URI) != 0 {
 | 
						||
		objUrl, err := k.storage.GetObjectUrl(ctx, document.URI)
 | 
						||
		if err != nil {
 | 
						||
			logs.CtxErrorf(ctx, "get object url failed, err: %v", err)
 | 
						||
			return nil, errorx.New(errno.ErrKnowledgeGetObjectURLFailCode, errorx.KV("msg", err.Error()))
 | 
						||
		}
 | 
						||
		documentEntity.URL = objUrl
 | 
						||
	}
 | 
						||
	return documentEntity, nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) fromModelSlice(ctx context.Context, slice *model.KnowledgeDocumentSlice) *entity.Slice {
 | 
						||
	if slice == nil {
 | 
						||
		return nil
 | 
						||
	}
 | 
						||
	s := &entity.Slice{
 | 
						||
		Info: knowledgeModel.Info{
 | 
						||
			ID:          slice.ID,
 | 
						||
			CreatorID:   slice.CreatorID,
 | 
						||
			SpaceID:     slice.SpaceID,
 | 
						||
			CreatedAtMs: slice.CreatedAt,
 | 
						||
			UpdatedAtMs: slice.UpdatedAt,
 | 
						||
		},
 | 
						||
		DocumentID:  slice.DocumentID,
 | 
						||
		KnowledgeID: slice.KnowledgeID,
 | 
						||
		ByteCount:   int64(len(slice.Content)),
 | 
						||
		CharCount:   int64(utf8.RuneCountInString(slice.Content)),
 | 
						||
		Hit:         slice.Hit,
 | 
						||
		SliceStatus: knowledgeModel.SliceStatus(slice.Status),
 | 
						||
	}
 | 
						||
	if slice.Content != "" {
 | 
						||
		processedContent := k.formatSliceContent(ctx, slice.Content)
 | 
						||
		s.RawContent = make([]*knowledgeModel.SliceContent, 0)
 | 
						||
		s.RawContent = append(s.RawContent, &knowledgeModel.SliceContent{
 | 
						||
			Type: knowledgeModel.SliceContentTypeText,
 | 
						||
			Text: ptr.Of(processedContent),
 | 
						||
		})
 | 
						||
	}
 | 
						||
	return s
 | 
						||
}
 | 
						||
 | 
						||
func convertOrderType(orderType *knowledgeModel.OrderType) *entity.OrderType {
 | 
						||
	if orderType == nil {
 | 
						||
		return nil
 | 
						||
	}
 | 
						||
	odType := *orderType
 | 
						||
	switch odType {
 | 
						||
	case knowledgeModel.OrderTypeAsc:
 | 
						||
		return ptr.Of(entity.OrderTypeAsc)
 | 
						||
	case knowledgeModel.OrderTypeDesc:
 | 
						||
		return ptr.Of(entity.OrderTypeDesc)
 | 
						||
	default:
 | 
						||
		return ptr.Of(entity.OrderTypeDesc)
 | 
						||
	}
 | 
						||
}
 | 
						||
 | 
						||
func convertOrder(order *knowledgeModel.Order) *entity.Order {
 | 
						||
	if order == nil {
 | 
						||
		return nil
 | 
						||
	}
 | 
						||
	od := *order
 | 
						||
	switch od {
 | 
						||
	case knowledgeModel.OrderCreatedAt:
 | 
						||
		return ptr.Of(entity.OrderCreatedAt)
 | 
						||
	case knowledgeModel.OrderUpdatedAt:
 | 
						||
		return ptr.Of(entity.OrderUpdatedAt)
 | 
						||
	default:
 | 
						||
		return ptr.Of(entity.OrderCreatedAt)
 | 
						||
	}
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) GetKnowledgeByID(ctx context.Context, request *GetKnowledgeByIDRequest) (response *GetKnowledgeByIDResponse, err error) {
 | 
						||
	if request == nil || request.KnowledgeID == 0 {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | 
						||
	}
 | 
						||
	kn, err := k.knowledgeRepo.GetByID(ctx, request.KnowledgeID)
 | 
						||
	if err != nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if kn == nil || kn.ID == 0 {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeNotExistCode, errorx.KV("msg", "knowledge not found"))
 | 
						||
	}
 | 
						||
	knEntity, err := k.fromModelKnowledge(ctx, kn)
 | 
						||
	if err != nil {
 | 
						||
		return nil, err
 | 
						||
	}
 | 
						||
	return &GetKnowledgeByIDResponse{
 | 
						||
		Knowledge: knEntity,
 | 
						||
	}, nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) ListPhotoSlice(ctx context.Context, request *ListPhotoSliceRequest) (response *ListPhotoSliceResponse, err error) {
 | 
						||
	if request == nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | 
						||
	}
 | 
						||
	sliceArr, total, err := k.sliceRepo.FindSliceByCondition(ctx, &entity.WhereSliceOpt{
 | 
						||
		KnowledgeID: request.KnowledgeID,
 | 
						||
		DocumentIDs: request.DocumentIDs,
 | 
						||
		Offset:      int64(ptr.From(request.Offset)),
 | 
						||
		PageSize:    int64(ptr.From(request.Limit)),
 | 
						||
		NotEmpty:    request.HasCaption,
 | 
						||
	})
 | 
						||
	if err != nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	response = &ListPhotoSliceResponse{
 | 
						||
		Total: int(total),
 | 
						||
		Slices: slices.Transform(sliceArr, func(item *model.KnowledgeDocumentSlice) *entity.Slice {
 | 
						||
			res := k.fromModelSlice(ctx, item)
 | 
						||
			return res
 | 
						||
		}),
 | 
						||
	}
 | 
						||
	return response, nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) ExtractPhotoCaption(ctx context.Context, request *ExtractPhotoCaptionRequest) (response *ExtractPhotoCaptionResponse, err error) {
 | 
						||
	response = &ExtractPhotoCaptionResponse{}
 | 
						||
	if request == nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | 
						||
	}
 | 
						||
	if !k.isAutoAnnotationSupported {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeAutoAnnotationNotSupportedCode, errorx.KV("msg", "auto annotation is not supported"))
 | 
						||
	}
 | 
						||
	docInfo, err := k.documentRepo.GetByID(ctx, request.DocumentID)
 | 
						||
	if err != nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if docInfo == nil || docInfo.ID == 0 {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDocumentNotExistCode, errorx.KV("msg", "document not found"))
 | 
						||
	}
 | 
						||
	docEntity, err := k.fromModelDocument(ctx, docInfo)
 | 
						||
	if err != nil {
 | 
						||
		return nil, err
 | 
						||
	}
 | 
						||
	docEntity.ParsingStrategy.CaptionType = ptr.Of(parser.ImageAnnotationTypeModel)
 | 
						||
	parser, err := k.parseManager.GetParser(convert.DocumentToParseConfig(docEntity))
 | 
						||
	if err != nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeGetParserFailCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	imageByte, err := k.storage.GetObject(ctx, docEntity.URI)
 | 
						||
	if err != nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeGetObjectFailCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	reader := bytes.NewReader(imageByte)
 | 
						||
	schemaDoc, err := parser.Parse(ctx, reader)
 | 
						||
	if err != nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeParserParseFailCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	if len(schemaDoc) == 0 {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeParserParseFailCode, errorx.KV("msg", "parse fail, schema doc is empty"))
 | 
						||
	}
 | 
						||
	response.Caption = schemaDoc[0].Content
 | 
						||
	return response, nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) MGetKnowledgeByID(ctx context.Context, request *MGetKnowledgeByIDRequest) (response *MGetKnowledgeByIDResponse, err error) {
 | 
						||
	if request == nil || len(request.KnowledgeIDs) == 0 {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
 | 
						||
	}
 | 
						||
 | 
						||
	models, err := k.knowledgeRepo.MGetByID(ctx, request.KnowledgeIDs)
 | 
						||
	if err != nil {
 | 
						||
		return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
 | 
						||
	}
 | 
						||
	response = &MGetKnowledgeByIDResponse{}
 | 
						||
	response.Knowledge = make([]*knowledgeModel.Knowledge, 0, len(models))
 | 
						||
	for _, model := range models {
 | 
						||
		if model == nil {
 | 
						||
			continue
 | 
						||
		}
 | 
						||
		kn, err := k.fromModelKnowledge(ctx, model)
 | 
						||
		if err != nil {
 | 
						||
			return nil, err
 | 
						||
		}
 | 
						||
		response.Knowledge = append(response.Knowledge, kn)
 | 
						||
	}
 | 
						||
	return response, nil
 | 
						||
}
 | 
						||
 | 
						||
const (
 | 
						||
	expireTime = 21600
 | 
						||
	cacheTime  = 7200
 | 
						||
)
 | 
						||
 | 
						||
func (k *knowledgeSVC) getObjectURL(ctx context.Context, uri string) (string, error) {
 | 
						||
	cmd := k.cacheCli.Get(ctx, uri)
 | 
						||
	if cmd.Err() != nil {
 | 
						||
		url, err := k.storage.GetObjectUrl(ctx, uri, storage.WithExpire(expireTime))
 | 
						||
		if err != nil {
 | 
						||
			return "", errorx.New(errno.ErrKnowledgeGetObjectURLFailCode, errorx.KV("msg", fmt.Sprintf("get object url failed, %v", err)))
 | 
						||
		}
 | 
						||
		if errors.Is(cmd.Err(), redisV9.Nil) {
 | 
						||
			err = k.cacheCli.Set(ctx, uri, url, cacheTime*time.Second).Err()
 | 
						||
			if err != nil {
 | 
						||
				logs.CtxErrorf(ctx, "[getObjectURL] set cache failed, %v", err)
 | 
						||
			}
 | 
						||
		}
 | 
						||
		return url, nil
 | 
						||
	}
 | 
						||
 | 
						||
	url := cmd.Val()
 | 
						||
	return url, nil
 | 
						||
}
 | 
						||
 | 
						||
func (k *knowledgeSVC) genMultiIDs(ctx context.Context, counts int) ([]int64, error) {
 | 
						||
	allIDs := make([]int64, 0)
 | 
						||
	for l := 0; l < counts; l += 100 {
 | 
						||
		r := min(l+100, counts)
 | 
						||
		batchSize := r - l
 | 
						||
		ids, err := k.idgen.GenMultiIDs(ctx, batchSize)
 | 
						||
		if err != nil {
 | 
						||
			return nil, errorx.New(errno.ErrKnowledgeIDGenCode, errorx.KV("msg", fmt.Sprintf("GenMultiIDs failed, err: %v", err)))
 | 
						||
		}
 | 
						||
		allIDs = append(allIDs, ids...)
 | 
						||
	}
 | 
						||
	return allIDs, nil
 | 
						||
}
 |