feat: manually mirror opencoze's code from bytedance
Change-Id: I09a73aadda978ad9511264a756b2ce51f5761adf
This commit is contained in:
246
backend/domain/knowledge/processor/impl/base.go
Normal file
246
backend/domain/knowledge/processor/impl/base.go
Normal file
@@ -0,0 +1,246 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package impl
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/bytedance/sonic"
|
||||
|
||||
"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
|
||||
"github.com/coze-dev/coze-studio/backend/domain/knowledge/entity"
|
||||
"github.com/coze-dev/coze-studio/backend/domain/knowledge/internal/consts"
|
||||
"github.com/coze-dev/coze-studio/backend/domain/knowledge/internal/convert"
|
||||
"github.com/coze-dev/coze-studio/backend/domain/knowledge/internal/dal/model"
|
||||
"github.com/coze-dev/coze-studio/backend/domain/knowledge/internal/events"
|
||||
"github.com/coze-dev/coze-studio/backend/domain/knowledge/repository"
|
||||
"github.com/coze-dev/coze-studio/backend/infra/contract/document"
|
||||
"github.com/coze-dev/coze-studio/backend/infra/contract/document/parser"
|
||||
"github.com/coze-dev/coze-studio/backend/infra/contract/eventbus"
|
||||
"github.com/coze-dev/coze-studio/backend/infra/contract/idgen"
|
||||
"github.com/coze-dev/coze-studio/backend/infra/contract/rdb"
|
||||
rdbEntity "github.com/coze-dev/coze-studio/backend/infra/contract/rdb/entity"
|
||||
"github.com/coze-dev/coze-studio/backend/infra/contract/storage"
|
||||
"github.com/coze-dev/coze-studio/backend/pkg/errorx"
|
||||
"github.com/coze-dev/coze-studio/backend/pkg/logs"
|
||||
"github.com/coze-dev/coze-studio/backend/types/errno"
|
||||
)
|
||||
|
||||
type baseDocProcessor struct {
|
||||
ctx context.Context
|
||||
UserID int64
|
||||
SpaceID int64
|
||||
Documents []*entity.Document
|
||||
documentSource *entity.DocumentSource
|
||||
|
||||
// 落DB 的 model
|
||||
TableName string
|
||||
docModels []*model.KnowledgeDocument
|
||||
|
||||
storage storage.Storage
|
||||
knowledgeRepo repository.KnowledgeRepo
|
||||
documentRepo repository.KnowledgeDocumentRepo
|
||||
sliceRepo repository.KnowledgeDocumentSliceRepo
|
||||
idgen idgen.IDGenerator
|
||||
rdb rdb.RDB
|
||||
producer eventbus.Producer
|
||||
parseManager parser.Manager
|
||||
}
|
||||
|
||||
func (p *baseDocProcessor) BeforeCreate() error {
|
||||
// 从数据源拉取数据
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *baseDocProcessor) BuildDBModel() error {
|
||||
p.docModels = make([]*model.KnowledgeDocument, 0, len(p.Documents))
|
||||
ids, err := p.idgen.GenMultiIDs(p.ctx, len(p.Documents))
|
||||
if err != nil {
|
||||
logs.CtxErrorf(p.ctx, "gen ids failed, err: %v", err)
|
||||
return errorx.New(errno.ErrKnowledgeIDGenCode)
|
||||
}
|
||||
for i := range p.Documents {
|
||||
docModel := &model.KnowledgeDocument{
|
||||
ID: ids[i],
|
||||
KnowledgeID: p.Documents[i].KnowledgeID,
|
||||
Name: p.Documents[i].Name,
|
||||
FileExtension: string(p.Documents[i].FileExtension),
|
||||
URI: p.Documents[i].URI,
|
||||
DocumentType: int32(p.Documents[i].Type),
|
||||
CreatorID: p.UserID,
|
||||
SpaceID: p.SpaceID,
|
||||
SourceType: int32(p.Documents[i].Source),
|
||||
Status: int32(knowledge.KnowledgeStatusInit),
|
||||
ParseRule: &model.DocumentParseRule{
|
||||
ParsingStrategy: p.Documents[i].ParsingStrategy,
|
||||
ChunkingStrategy: p.Documents[i].ChunkingStrategy,
|
||||
},
|
||||
CreatedAt: time.Now().UnixMilli(),
|
||||
UpdatedAt: time.Now().UnixMilli(),
|
||||
}
|
||||
p.Documents[i].ID = docModel.ID
|
||||
p.docModels = append(p.docModels, docModel)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *baseDocProcessor) InsertDBModel() (err error) {
|
||||
ctx := p.ctx
|
||||
|
||||
if !isTableAppend(p.Documents) {
|
||||
err = p.createTable()
|
||||
if err != nil {
|
||||
logs.CtxErrorf(ctx, "create table failed, err: %v", err)
|
||||
return errorx.New(errno.ErrKnowledgeCrossDomainCode, errorx.KV("msg", err.Error()))
|
||||
}
|
||||
}
|
||||
|
||||
tx, err := p.knowledgeRepo.InitTx()
|
||||
if err != nil {
|
||||
logs.CtxErrorf(ctx, "init tx failed, err: %v", err)
|
||||
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
|
||||
}
|
||||
defer func() {
|
||||
if e := recover(); e != nil {
|
||||
logs.CtxErrorf(ctx, "panic: %v", e)
|
||||
err = errorx.New(errno.ErrKnowledgeSystemCode, errorx.KVf("msg", "panic: %v", e))
|
||||
tx.Rollback()
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
logs.CtxErrorf(ctx, "InsertDBModel err: %v", err)
|
||||
tx.Rollback()
|
||||
if p.TableName != "" {
|
||||
deleteErr := p.deleteTable()
|
||||
if deleteErr != nil {
|
||||
logs.CtxErrorf(ctx, "delete table failed, err: %v", deleteErr)
|
||||
return
|
||||
}
|
||||
}
|
||||
} else {
|
||||
tx.Commit()
|
||||
}
|
||||
}()
|
||||
err = p.documentRepo.CreateWithTx(ctx, tx, p.docModels)
|
||||
if err != nil {
|
||||
logs.CtxErrorf(ctx, "create document failed, err: %v", err)
|
||||
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
|
||||
}
|
||||
err = p.knowledgeRepo.UpdateWithTx(ctx, tx, p.Documents[0].KnowledgeID, map[string]interface{}{
|
||||
"updated_at": time.Now().UnixMilli(),
|
||||
})
|
||||
if err != nil {
|
||||
logs.CtxErrorf(ctx, "update knowledge failed, err: %v", err)
|
||||
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *baseDocProcessor) createTable() error {
|
||||
if len(p.Documents) == 1 && p.Documents[0].Type == knowledge.DocumentTypeTable {
|
||||
// 表格型知识库,创建表
|
||||
rdbColumns := []*rdbEntity.Column{}
|
||||
tableColumns := p.Documents[0].TableInfo.Columns
|
||||
columnIDs, err := p.idgen.GenMultiIDs(p.ctx, len(tableColumns)+1)
|
||||
if err != nil {
|
||||
logs.CtxErrorf(p.ctx, "gen ids failed, err: %v", err)
|
||||
return errorx.New(errno.ErrKnowledgeIDGenCode)
|
||||
}
|
||||
for i := range tableColumns {
|
||||
tableColumns[i].ID = columnIDs[i]
|
||||
rdbColumns = append(rdbColumns, &rdbEntity.Column{
|
||||
Name: convert.ColumnIDToRDBField(columnIDs[i]),
|
||||
DataType: convert.ConvertColumnType(tableColumns[i].Type),
|
||||
NotNull: tableColumns[i].Indexing,
|
||||
})
|
||||
}
|
||||
p.Documents[0].TableInfo.Columns = append(p.Documents[0].TableInfo.Columns, &entity.TableColumn{
|
||||
ID: columnIDs[len(columnIDs)-1],
|
||||
Name: consts.RDBFieldID,
|
||||
Type: document.TableColumnTypeInteger,
|
||||
Description: "主键ID",
|
||||
Indexing: false,
|
||||
Sequence: -1,
|
||||
})
|
||||
// 为每个表格增加个主键ID
|
||||
rdbColumns = append(rdbColumns, &rdbEntity.Column{
|
||||
Name: consts.RDBFieldID,
|
||||
DataType: rdbEntity.TypeBigInt,
|
||||
NotNull: true,
|
||||
})
|
||||
// 创建一个数据表
|
||||
resp, err := p.rdb.CreateTable(p.ctx, &rdb.CreateTableRequest{
|
||||
Table: &rdbEntity.Table{
|
||||
Columns: rdbColumns,
|
||||
Indexes: []*rdbEntity.Index{
|
||||
{
|
||||
Name: "pk",
|
||||
Type: rdbEntity.PrimaryKey,
|
||||
Columns: []string{consts.RDBFieldID},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
logs.CtxErrorf(p.ctx, "create table failed, err: %v", err)
|
||||
return errorx.New(errno.ErrKnowledgeCrossDomainCode, errorx.KV("msg", err.Error()))
|
||||
}
|
||||
p.TableName = resp.Table.Name
|
||||
p.Documents[0].TableInfo.PhysicalTableName = p.TableName
|
||||
p.docModels[0].TableInfo = &entity.TableInfo{
|
||||
VirtualTableName: p.Documents[0].Name,
|
||||
PhysicalTableName: p.TableName,
|
||||
TableDesc: p.Documents[0].Description,
|
||||
Columns: p.Documents[0].TableInfo.Columns,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *baseDocProcessor) deleteTable() error {
|
||||
if len(p.Documents) == 1 && p.Documents[0].Type == knowledge.DocumentTypeTable {
|
||||
_, err := p.rdb.DropTable(p.ctx, &rdb.DropTableRequest{
|
||||
TableName: p.TableName,
|
||||
IfExists: false,
|
||||
})
|
||||
if err != nil {
|
||||
logs.CtxErrorf(p.ctx, "[deleteTable] drop table failed, err: %v", err)
|
||||
return errorx.New(errno.ErrKnowledgeCrossDomainCode, errorx.KV("msg", err.Error()))
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *baseDocProcessor) Indexing() error {
|
||||
event := events.NewIndexDocumentsEvent(p.Documents[0].KnowledgeID, p.Documents)
|
||||
body, err := sonic.Marshal(event)
|
||||
if err != nil {
|
||||
return errorx.New(errno.ErrKnowledgeParseJSONCode, errorx.KV("msg", err.Error()))
|
||||
}
|
||||
|
||||
if err = p.producer.Send(p.ctx, body); err != nil {
|
||||
logs.CtxErrorf(p.ctx, "send message failed, err: %v", err)
|
||||
return errorx.New(errno.ErrKnowledgeMQSendFailCode, errorx.KV("msg", err.Error()))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *baseDocProcessor) GetResp() []*entity.Document {
|
||||
return p.Documents
|
||||
}
|
||||
41
backend/domain/knowledge/processor/impl/custom_doc.go
Normal file
41
backend/domain/knowledge/processor/impl/custom_doc.go
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package impl
|
||||
|
||||
import "github.com/coze-dev/coze-studio/backend/pkg/logs"
|
||||
|
||||
// 用户输入自定义内容后创建文档
|
||||
type customDocProcessor struct {
|
||||
baseDocProcessor
|
||||
}
|
||||
|
||||
func (c *customDocProcessor) BeforeCreate() error {
|
||||
for i := range c.Documents {
|
||||
if c.Documents[i].RawContent != "" {
|
||||
c.Documents[i].FileExtension = getFormatType(c.Documents[i].Type)
|
||||
uri := getTosUri(c.UserID, string(c.Documents[i].FileExtension))
|
||||
err := c.storage.PutObject(c.ctx, uri, []byte(c.Documents[i].RawContent))
|
||||
if err != nil {
|
||||
logs.CtxErrorf(c.ctx, "put object failed, err: %v", err)
|
||||
return err
|
||||
}
|
||||
c.Documents[i].URI = uri
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
108
backend/domain/knowledge/processor/impl/custom_table.go
Normal file
108
backend/domain/knowledge/processor/impl/custom_table.go
Normal file
@@ -0,0 +1,108 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package impl
|
||||
|
||||
import (
|
||||
"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
|
||||
"github.com/coze-dev/coze-studio/backend/domain/knowledge/entity"
|
||||
"github.com/coze-dev/coze-studio/backend/pkg/errorx"
|
||||
"github.com/coze-dev/coze-studio/backend/pkg/logs"
|
||||
"github.com/coze-dev/coze-studio/backend/types/errno"
|
||||
)
|
||||
|
||||
// 用户自定义表格创建文档
|
||||
type customTableProcessor struct {
|
||||
baseDocProcessor
|
||||
}
|
||||
|
||||
func (c *customTableProcessor) BeforeCreate() error {
|
||||
if isTableAppend(c.Documents) {
|
||||
tableDoc, _, err := c.documentRepo.FindDocumentByCondition(c.ctx, &entity.WhereDocumentOpt{KnowledgeIDs: []int64{c.Documents[0].KnowledgeID}, SelectAll: true})
|
||||
if err != nil {
|
||||
logs.CtxErrorf(c.ctx, "find document failed, err: %v", err)
|
||||
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
|
||||
}
|
||||
if len(tableDoc) == 0 {
|
||||
logs.CtxErrorf(c.ctx, "table doc not found")
|
||||
return errorx.New(errno.ErrKnowledgeDocumentNotExistCode, errorx.KV("msg", "doc not found"))
|
||||
}
|
||||
c.Documents[0].ID = tableDoc[0].ID
|
||||
if tableDoc[0].TableInfo == nil {
|
||||
logs.CtxErrorf(c.ctx, "table info not found")
|
||||
return errorx.New(errno.ErrKnowledgeTableInfoNotExistCode, errorx.KVf("msg", "table info not found, doc_id: %d", tableDoc[0].ID))
|
||||
}
|
||||
c.Documents[0].TableInfo = *tableDoc[0].TableInfo
|
||||
// 追加场景
|
||||
if c.Documents[0].RawContent != "" {
|
||||
c.Documents[0].FileExtension = getFormatType(c.Documents[0].Type)
|
||||
uri := getTosUri(c.UserID, string(c.Documents[0].FileExtension))
|
||||
err := c.storage.PutObject(c.ctx, uri, []byte(c.Documents[0].RawContent))
|
||||
if err != nil {
|
||||
logs.CtxErrorf(c.ctx, "put object failed, err: %v", err)
|
||||
return errorx.New(errno.ErrKnowledgePutObjectFailCode, errorx.KV("msg", err.Error()))
|
||||
}
|
||||
c.Documents[0].URI = uri
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *customTableProcessor) BuildDBModel() error {
|
||||
if len(c.Documents) > 0 &&
|
||||
c.Documents[0].Type == knowledge.DocumentTypeTable {
|
||||
if c.Documents[0].IsAppend {
|
||||
// 追加场景,不需要创建表了
|
||||
// 一是用户自定义一些数据、二是再上传一个表格,把表格里的数据追加到表格中
|
||||
} else {
|
||||
err := c.baseDocProcessor.BuildDBModel()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// 因为这种创建方式不带数据,所以直接设置状态为可用
|
||||
for i := range c.docModels {
|
||||
c.docModels[i].DocumentType = 1
|
||||
c.docModels[i].Status = int32(entity.DocumentStatusInit)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *customTableProcessor) InsertDBModel() error {
|
||||
if isTableAppend(c.Documents) {
|
||||
// 追加场景,设置文档为处理中状态
|
||||
err := c.documentRepo.SetStatus(c.ctx, c.Documents[0].ID, int32(entity.DocumentStatusUploading), "")
|
||||
if err != nil {
|
||||
logs.CtxErrorf(c.ctx, "document set status err:%v", err)
|
||||
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return c.baseDocProcessor.InsertDBModel()
|
||||
}
|
||||
|
||||
func (c *customTableProcessor) Indexing() error {
|
||||
// c.baseDocProcessor.Indexing()
|
||||
if isTableAppend(c.Documents) {
|
||||
err := c.baseDocProcessor.Indexing()
|
||||
if err != nil {
|
||||
logs.CtxErrorf(c.ctx, "document indexing err:%v", err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
87
backend/domain/knowledge/processor/impl/init.go
Normal file
87
backend/domain/knowledge/processor/impl/init.go
Normal file
@@ -0,0 +1,87 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package impl
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
|
||||
"github.com/coze-dev/coze-studio/backend/domain/knowledge/entity"
|
||||
"github.com/coze-dev/coze-studio/backend/domain/knowledge/processor"
|
||||
"github.com/coze-dev/coze-studio/backend/domain/knowledge/repository"
|
||||
"github.com/coze-dev/coze-studio/backend/infra/contract/document/parser"
|
||||
"github.com/coze-dev/coze-studio/backend/infra/contract/eventbus"
|
||||
"github.com/coze-dev/coze-studio/backend/infra/contract/idgen"
|
||||
"github.com/coze-dev/coze-studio/backend/infra/contract/rdb"
|
||||
"github.com/coze-dev/coze-studio/backend/infra/contract/storage"
|
||||
)
|
||||
|
||||
type DocProcessorConfig struct {
|
||||
UserID int64
|
||||
SpaceID int64
|
||||
DocumentSource entity.DocumentSource
|
||||
Documents []*entity.Document
|
||||
|
||||
KnowledgeRepo repository.KnowledgeRepo
|
||||
DocumentRepo repository.KnowledgeDocumentRepo
|
||||
SliceRepo repository.KnowledgeDocumentSliceRepo
|
||||
Idgen idgen.IDGenerator
|
||||
Storage storage.Storage
|
||||
Rdb rdb.RDB
|
||||
Producer eventbus.Producer
|
||||
ParseManager parser.Manager
|
||||
}
|
||||
|
||||
func NewDocProcessor(ctx context.Context, config *DocProcessorConfig) (p processor.DocProcessor) {
|
||||
base := &baseDocProcessor{
|
||||
ctx: ctx,
|
||||
UserID: config.UserID,
|
||||
SpaceID: config.SpaceID,
|
||||
Documents: config.Documents,
|
||||
documentSource: &config.DocumentSource,
|
||||
knowledgeRepo: config.KnowledgeRepo,
|
||||
documentRepo: config.DocumentRepo,
|
||||
sliceRepo: config.SliceRepo,
|
||||
storage: config.Storage,
|
||||
idgen: config.Idgen,
|
||||
rdb: config.Rdb,
|
||||
producer: config.Producer,
|
||||
parseManager: config.ParseManager,
|
||||
}
|
||||
|
||||
switch config.DocumentSource {
|
||||
case entity.DocumentSourceCustom:
|
||||
p = &customDocProcessor{
|
||||
baseDocProcessor: *base,
|
||||
}
|
||||
if config.Documents[0].Type == knowledge.DocumentTypeTable {
|
||||
p = &customTableProcessor{
|
||||
baseDocProcessor: *base,
|
||||
}
|
||||
}
|
||||
return p
|
||||
case entity.DocumentSourceLocal:
|
||||
if config.Documents[0].Type == knowledge.DocumentTypeTable {
|
||||
return &localTableProcessor{
|
||||
baseDocProcessor: *base,
|
||||
}
|
||||
}
|
||||
return base
|
||||
default:
|
||||
return base
|
||||
}
|
||||
}
|
||||
77
backend/domain/knowledge/processor/impl/local_table.go
Normal file
77
backend/domain/knowledge/processor/impl/local_table.go
Normal file
@@ -0,0 +1,77 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package impl
|
||||
|
||||
import (
|
||||
"github.com/coze-dev/coze-studio/backend/domain/knowledge/entity"
|
||||
"github.com/coze-dev/coze-studio/backend/pkg/errorx"
|
||||
"github.com/coze-dev/coze-studio/backend/pkg/lang/ptr"
|
||||
"github.com/coze-dev/coze-studio/backend/pkg/logs"
|
||||
"github.com/coze-dev/coze-studio/backend/types/errno"
|
||||
)
|
||||
|
||||
type localTableProcessor struct {
|
||||
baseDocProcessor
|
||||
}
|
||||
|
||||
func (l *localTableProcessor) BeforeCreate() error {
|
||||
if isTableAppend(l.Documents) {
|
||||
tableDoc, _, err := l.documentRepo.FindDocumentByCondition(l.ctx, &entity.WhereDocumentOpt{
|
||||
KnowledgeIDs: []int64{l.Documents[0].KnowledgeID},
|
||||
SelectAll: true,
|
||||
})
|
||||
if err != nil {
|
||||
logs.CtxErrorf(l.ctx, "find document failed, err: %v", err)
|
||||
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
|
||||
}
|
||||
|
||||
if len(tableDoc) == 0 {
|
||||
logs.CtxErrorf(l.ctx, "table doc not found")
|
||||
return errorx.New(errno.ErrKnowledgeDocumentNotExistCode, errorx.KV("msg", "doc not found"))
|
||||
}
|
||||
|
||||
l.Documents[0].ID = tableDoc[0].ID
|
||||
|
||||
if tableDoc[0].TableInfo == nil {
|
||||
logs.CtxErrorf(l.ctx, "table info not found")
|
||||
return errorx.New(errno.ErrKnowledgeTableInfoNotExistCode, errorx.KVf("msg", "table info not found, doc_id: %d", tableDoc[0].ID))
|
||||
}
|
||||
l.Documents[0].TableInfo = ptr.From(tableDoc[0].TableInfo)
|
||||
return nil
|
||||
}
|
||||
return l.baseDocProcessor.BeforeCreate()
|
||||
}
|
||||
|
||||
func (l *localTableProcessor) BuildDBModel() error {
|
||||
if isTableAppend(l.Documents) {
|
||||
return nil
|
||||
}
|
||||
return l.baseDocProcessor.BuildDBModel()
|
||||
}
|
||||
|
||||
func (l *localTableProcessor) InsertDBModel() error {
|
||||
if isTableAppend(l.Documents) {
|
||||
// 追加场景,设置文档为处理中状态
|
||||
err := l.documentRepo.SetStatus(l.ctx, l.Documents[0].ID, int32(entity.DocumentStatusUploading), "")
|
||||
if err != nil {
|
||||
logs.CtxErrorf(l.ctx, "document set status err:%v", err)
|
||||
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return l.baseDocProcessor.InsertDBModel()
|
||||
}
|
||||
45
backend/domain/knowledge/processor/impl/utils.go
Normal file
45
backend/domain/knowledge/processor/impl/utils.go
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package impl
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
|
||||
"github.com/coze-dev/coze-studio/backend/domain/knowledge/entity"
|
||||
"github.com/coze-dev/coze-studio/backend/infra/contract/document/parser"
|
||||
)
|
||||
|
||||
func getFormatType(tp knowledge.DocumentType) parser.FileExtension {
|
||||
docType := parser.FileExtensionTXT
|
||||
if tp == knowledge.DocumentTypeTable {
|
||||
docType = parser.FileExtensionJSON
|
||||
}
|
||||
return docType
|
||||
}
|
||||
|
||||
func getTosUri(userID int64, fileType string) string {
|
||||
fileName := fmt.Sprintf("FileBizType.Knowledge/%d_%d.%s", userID, time.Now().UnixNano(), fileType)
|
||||
return fileName
|
||||
}
|
||||
|
||||
func isTableAppend(docs []*entity.Document) bool {
|
||||
return len(docs) > 0 &&
|
||||
docs[0].Type == knowledge.DocumentTypeTable &&
|
||||
docs[0].IsAppend
|
||||
}
|
||||
Reference in New Issue
Block a user