chore: replace all cn comments to en version by volc api (#313)
This commit is contained in:
@@ -19,14 +19,14 @@ package entity
|
||||
type DocumentStatus int64
|
||||
|
||||
const (
|
||||
DocumentStatusInit DocumentStatus = -1 // 初始化
|
||||
DocumentStatusUploading DocumentStatus = 0 // 上传中
|
||||
DocumentStatusEnable DocumentStatus = 1 // 生效
|
||||
DocumentStatusDisable DocumentStatus = 2 // 失效
|
||||
DocumentStatusDeleted DocumentStatus = 3 // 已删除
|
||||
DocumentStatusChunking DocumentStatus = 4 // 切片中
|
||||
// DocumentStatusRefreshing DocumentStatus = 5 // 刷新中
|
||||
DocumentStatusFailed DocumentStatus = 9 // 失败
|
||||
DocumentStatusInit DocumentStatus = -1 // initialization
|
||||
DocumentStatusUploading DocumentStatus = 0 // Uploading
|
||||
DocumentStatusEnable DocumentStatus = 1 // take effect
|
||||
DocumentStatusDisable DocumentStatus = 2 // failure
|
||||
DocumentStatusDeleted DocumentStatus = 3 // deleted
|
||||
DocumentStatusChunking DocumentStatus = 4 // Slicing
|
||||
// DocumentStatusRefreshing DocumentStatus = 5//Refreshing
|
||||
DocumentStatusFailed DocumentStatus = 9 // fail
|
||||
)
|
||||
|
||||
func (s DocumentStatus) String() string {
|
||||
@@ -44,7 +44,7 @@ func (s DocumentStatus) String() string {
|
||||
case DocumentStatusChunking:
|
||||
return "切片中"
|
||||
// case DocumentStatusRefreshing:
|
||||
// return "刷新中"
|
||||
// Returns "Refreshing"
|
||||
case DocumentStatusFailed:
|
||||
return "失败"
|
||||
default:
|
||||
@@ -55,6 +55,6 @@ func (s DocumentStatus) String() string {
|
||||
type DocumentSource int64
|
||||
|
||||
const (
|
||||
DocumentSourceLocal DocumentSource = 0 // 本地文件上传
|
||||
DocumentSourceCustom DocumentSource = 2 // 自定义文本
|
||||
DocumentSourceLocal DocumentSource = 0 // local file upload
|
||||
DocumentSourceCustom DocumentSource = 2 // custom text
|
||||
)
|
||||
|
||||
@@ -27,25 +27,25 @@ type Document struct {
|
||||
|
||||
KnowledgeID int64
|
||||
Type knowledge.DocumentType
|
||||
RawContent string // 用户自定义的原始内容
|
||||
URI string // 文档 uri
|
||||
URL string // 文档 url
|
||||
Size int64 // 文档 bytes
|
||||
SliceCount int64 // slice 数量
|
||||
CharCount int64 // 文档字符数
|
||||
FileExtension parser.FileExtension // 文档后缀, csv/pdf...
|
||||
Status DocumentStatus // 文档状态
|
||||
StatusMsg string // 文档状态详细信息
|
||||
Hits int64 // 命中次数
|
||||
Source DocumentSource // 文档来源
|
||||
ParsingStrategy *ParsingStrategy // 解析策略
|
||||
ChunkingStrategy *ChunkingStrategy // 分段策略
|
||||
RawContent string // User-defined original content
|
||||
URI string // Document URI
|
||||
URL string // Document URL
|
||||
Size int64 // Document bytes
|
||||
SliceCount int64 // Number of slices
|
||||
CharCount int64 // Number of document characters
|
||||
FileExtension parser.FileExtension // Document suffix, csv/pdf...
|
||||
Status DocumentStatus // Document Status
|
||||
StatusMsg string // Document Status Details
|
||||
Hits int64 // hit count
|
||||
Source DocumentSource // document source
|
||||
ParsingStrategy *ParsingStrategy // parsing strategy
|
||||
ChunkingStrategy *ChunkingStrategy // segmentation strategy
|
||||
|
||||
TableInfo TableInfo
|
||||
IsAppend bool // 是否在表格中追加
|
||||
IsAppend bool // Whether to append to the table
|
||||
|
||||
// LevelURI string // 层级分段预览 uri
|
||||
// PreviewURI string // 预览 uri
|
||||
// LevelURI string//Hierarchical segmentation preview uri
|
||||
// PreviewURI string//preview uri
|
||||
}
|
||||
|
||||
type TableInfo struct {
|
||||
@@ -56,18 +56,18 @@ type TableInfo struct {
|
||||
}
|
||||
type TableSheet struct {
|
||||
SheetId int64 // sheet id
|
||||
HeaderLineIdx int64 // 表头行
|
||||
StartLineIdx int64 // 数据起始行
|
||||
SheetName string // sheet的名称
|
||||
TotalRows int64 // 总行数
|
||||
HeaderLineIdx int64 // header row
|
||||
StartLineIdx int64 // Data start row
|
||||
SheetName string // Name of sheet
|
||||
TotalRows int64 // total number of rows
|
||||
}
|
||||
type TableColumn struct {
|
||||
ID int64
|
||||
Name string
|
||||
Type document.TableColumnType
|
||||
Description string
|
||||
Indexing bool // 是否索引
|
||||
Sequence int64 // 表格中的原始序号
|
||||
Indexing bool // whether to index
|
||||
Sequence int64 // The original serial number in the table
|
||||
}
|
||||
|
||||
type WhereDocumentOpt struct {
|
||||
|
||||
@@ -29,20 +29,20 @@ type Event struct {
|
||||
|
||||
type EventType string
|
||||
|
||||
// 文档 event
|
||||
// 切分 + 写入向量库操作事务性由实现自行保证
|
||||
// Document event
|
||||
// Split + write vector library operation transactionality is guaranteed by the implementation itself
|
||||
const (
|
||||
EventTypeIndexDocuments EventType = "index_documents"
|
||||
|
||||
// EventTypeIndexDocument 文档信息已写入 orm,逻辑中需要解析+切分+搜索数据入库
|
||||
// EventTypeIndexDocument document information has been written to orm, the logic needs to parse + split + search data warehousing
|
||||
// Event requires: Event.Document
|
||||
EventTypeIndexDocument EventType = "index_document"
|
||||
|
||||
// EventTypeIndexSlice 切片信息已写入 orm,逻辑中仅写入搜索数据
|
||||
// EventTypeIndexSlice slice information has been written to orm, and only search data is written in the logic
|
||||
// Event requires: Event.Slice
|
||||
EventTypeIndexSlice EventType = "index_slice"
|
||||
|
||||
// EventTypeDeleteKnowledgeData 删除 knowledge
|
||||
// EventTypeDeleteKnowledgeData remove knowledge
|
||||
// Event requires: Event.KnowledgeID, Event.SliceIDs
|
||||
EventTypeDeleteKnowledgeData EventType = "delete_knowledge_data"
|
||||
|
||||
|
||||
@@ -26,10 +26,10 @@ type WhereKnowledgeOption struct {
|
||||
KnowledgeIDs []int64
|
||||
AppID *int64
|
||||
SpaceID *int64
|
||||
Name *string // 完全匹配
|
||||
Name *string // Exact match
|
||||
Status []int32
|
||||
UserID *int64
|
||||
Query *string // 模糊匹配
|
||||
Query *string // fuzzy match
|
||||
Page *int
|
||||
PageSize *int
|
||||
Order *Order
|
||||
|
||||
@@ -25,18 +25,18 @@ type RetrievalStrategy = knowledge.RetrievalStrategy
|
||||
|
||||
// ParsingStrategy for document parse before indexing
|
||||
type ParsingStrategy struct {
|
||||
ParsingType ParsingType `json:"parsing_type"` // 解析类型
|
||||
ParsingType ParsingType `json:"parsing_type"` // parse type
|
||||
// Doc
|
||||
ExtractImage bool `json:"extract_image"` // 提取图片元素
|
||||
ExtractTable bool `json:"extract_table"` // 提取表格元素
|
||||
ImageOCR bool `json:"image_ocr"` // 图片 ocr
|
||||
FilterPages []int `json:"filter_pages"` // 过滤页数
|
||||
ExtractImage bool `json:"extract_image"` // Extract image elements
|
||||
ExtractTable bool `json:"extract_table"` // Extract table elements
|
||||
ImageOCR bool `json:"image_ocr"` // Image ocr
|
||||
FilterPages []int `json:"filter_pages"` // filter pages
|
||||
|
||||
// Sheet
|
||||
SheetID int64 `json:"sheet_id"` // xlsx sheet id
|
||||
HeaderLine int `json:"header_line"` // 表头行
|
||||
DataStartLine int `json:"data_start_line"` // 数据起始行
|
||||
RowsCount int `json:"rows_count"` // 读取数据行数
|
||||
HeaderLine int `json:"header_line"` // header row
|
||||
DataStartLine int `json:"data_start_line"` // Data start row
|
||||
RowsCount int `json:"rows_count"` // number of rows read
|
||||
|
||||
// Image
|
||||
CaptionType *parser.ImageAnnotationType `json:"caption_type"`
|
||||
@@ -52,13 +52,13 @@ const (
|
||||
type ChunkingStrategy struct {
|
||||
ChunkType parser.ChunkType `json:"chunk_type"`
|
||||
// custom chunk config
|
||||
ChunkSize int64 `json:"chunk_size"` // 分段最大长度
|
||||
Separator string `json:"separator"` // 分段标识符
|
||||
Overlap int64 `json:"overlap"` // 分段重叠
|
||||
ChunkSize int64 `json:"chunk_size"` // maximum segmentation length
|
||||
Separator string `json:"separator"` // segmentation identifier
|
||||
Overlap int64 `json:"overlap"` // segmented overlap
|
||||
TrimSpace bool `json:"trim_space"`
|
||||
TrimURLAndEmail bool `json:"trim_url_and_email"`
|
||||
|
||||
// 按层级分段
|
||||
MaxDepth int64 `json:"max_depth"` // 按层级分段时的最大层级
|
||||
SaveTitle bool `json:"save_title"` // 保留层级标题
|
||||
// segmentation by hierarchy
|
||||
MaxDepth int64 `json:"max_depth"` // Maximum level when segmented by level
|
||||
SaveTitle bool `json:"save_title"` // Preserve Hierarchical Titles
|
||||
}
|
||||
|
||||
@@ -132,12 +132,12 @@ func (dao *KnowledgeDocumentDAO) DeleteDocuments(ctx context.Context, ids []int6
|
||||
tx.Commit()
|
||||
}
|
||||
}()
|
||||
// 删除document
|
||||
// Delete document
|
||||
err = tx.WithContext(ctx).Model(&model.KnowledgeDocument{}).Where("id in ?", ids).Delete(&model.KnowledgeDocument{}).Error
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// 删除document_slice
|
||||
// Delete document_slice
|
||||
err = tx.WithContext(ctx).Model(&model.KnowledgeDocumentSlice{}).Where("document_id in?", ids).Delete(&model.KnowledgeDocumentSlice{}).Error
|
||||
if err != nil {
|
||||
return err
|
||||
|
||||
@@ -120,7 +120,7 @@ func (dao *KnowledgeDocumentSliceDAO) listBatch(ctx context.Context, knowledgeID
|
||||
pos []*model.KnowledgeDocumentSlice, hasMore bool, err error) {
|
||||
|
||||
if batchSize <= 0 {
|
||||
batchSize = 100 // 默认批量大小
|
||||
batchSize = 100 // Default batch size
|
||||
}
|
||||
|
||||
do, err := dao.listDo(ctx, knowledgeID, documentID)
|
||||
@@ -161,7 +161,7 @@ func (dao *KnowledgeDocumentSliceDAO) GetDocumentSliceIDs(ctx context.Context, d
|
||||
if len(docIDs) == 0 {
|
||||
return nil, errors.New("empty document ids")
|
||||
}
|
||||
// doc可能会有很多slice,所以批量处理
|
||||
// Doc may have many slices, so batch processing
|
||||
sliceIDs = make([]int64, 0)
|
||||
var mu sync.Mutex
|
||||
errGroup, ctx := errgroup.WithContext(ctx)
|
||||
|
||||
@@ -10,20 +10,20 @@ import (
|
||||
|
||||
const TableNameKnowledge = "knowledge"
|
||||
|
||||
// Knowledge 知识库表
|
||||
// Knowledge knowledge tabke
|
||||
type Knowledge struct {
|
||||
ID int64 `gorm:"column:id;primaryKey;comment:主键ID" json:"id"` // 主键ID
|
||||
Name string `gorm:"column:name;not null;comment:名称" json:"name"` // 名称
|
||||
AppID int64 `gorm:"column:app_id;not null;comment:项目ID,标识该资源是否是项目独有" json:"app_id"` // 项目ID,标识该资源是否是项目独有
|
||||
CreatorID int64 `gorm:"column:creator_id;not null;comment:ID" json:"creator_id"` // ID
|
||||
SpaceID int64 `gorm:"column:space_id;not null;comment:空间ID" json:"space_id"` // 空间ID
|
||||
CreatedAt int64 `gorm:"column:created_at;not null;comment:Create Time in Milliseconds" json:"created_at"` // Create Time in Milliseconds
|
||||
UpdatedAt int64 `gorm:"column:updated_at;not null;comment:Update Time in Milliseconds" json:"updated_at"` // Update Time in Milliseconds
|
||||
DeletedAt gorm.DeletedAt `gorm:"column:deleted_at;comment:Delete Time in Milliseconds" json:"deleted_at"` // Delete Time in Milliseconds
|
||||
Status int32 `gorm:"column:status;not null;default:1;comment:0 初始化, 1 生效 2 失效" json:"status"` // 0 初始化, 1 生效 2 失效
|
||||
Description string `gorm:"column:description;comment:描述" json:"description"` // 描述
|
||||
IconURI string `gorm:"column:icon_uri;comment:头像uri" json:"icon_uri"` // 头像uri
|
||||
FormatType int32 `gorm:"column:format_type;not null;comment:0:文本 1:表格 2:图片" json:"format_type"` // 0:文本 1:表格 2:图片
|
||||
ID int64 `gorm:"column:id;primaryKey;comment:id" json:"id"` // id
|
||||
Name string `gorm:"column:name;not null;comment:knowledge's name" json:"name"` // knowledge's name
|
||||
AppID int64 `gorm:"column:app_id;not null;comment:app id" json:"app_id"` // app id
|
||||
CreatorID int64 `gorm:"column:creator_id;not null;comment:creator id" json:"creator_id"` // creator id
|
||||
SpaceID int64 `gorm:"column:space_id;not null;comment:space id" json:"space_id"` // space id
|
||||
CreatedAt int64 `gorm:"column:created_at;not null;comment:Create Time in Milliseconds" json:"created_at"` // Create Time in Milliseconds
|
||||
UpdatedAt int64 `gorm:"column:updated_at;not null;comment:Update Time in Milliseconds" json:"updated_at"` // Update Time in Milliseconds
|
||||
DeletedAt gorm.DeletedAt `gorm:"column:deleted_at;comment:Delete Time" json:"deleted_at"` // Delete Time
|
||||
Status int32 `gorm:"column:status;not null;default:1;comment:0 initialization, 1 effective, 2 invalid" json:"status"` // 0 initialization, 1 effective, 2 invalid
|
||||
Description string `gorm:"column:description;comment:description" json:"description"` // description
|
||||
IconURI string `gorm:"column:icon_uri;comment:icon uri" json:"icon_uri"` // icon uri
|
||||
FormatType int32 `gorm:"column:format_type;not null;comment:0: Text 1: Table 2: Images" json:"format_type"` // 0: Text 1: Table 2: Images
|
||||
}
|
||||
|
||||
// TableName Knowledge's table name
|
||||
|
||||
@@ -11,27 +11,27 @@ import (
|
||||
|
||||
const TableNameKnowledgeDocument = "knowledge_document"
|
||||
|
||||
// KnowledgeDocument 知识库文档表
|
||||
// KnowledgeDocument knowledge document info
|
||||
type KnowledgeDocument struct {
|
||||
ID int64 `gorm:"column:id;primaryKey;comment:主键ID" json:"id"` // 主键ID
|
||||
KnowledgeID int64 `gorm:"column:knowledge_id;not null;comment:所属knowledge的ID" json:"knowledge_id"` // 所属knowledge的ID
|
||||
Name string `gorm:"column:name;not null;comment:文档名称" json:"name"` // 文档名称
|
||||
FileExtension string `gorm:"column:file_extension;not null;default:0;comment:文档类型, txt/pdf/csv/..." json:"file_extension"` // 文档类型, txt/pdf/csv/...
|
||||
DocumentType int32 `gorm:"column:document_type;not null;comment:文档类型: 0:文本 1:表格 2:图片" json:"document_type"` // 文档类型: 0:文本 1:表格 2:图片
|
||||
URI string `gorm:"column:uri;comment:资源uri" json:"uri"` // 资源uri
|
||||
Size int64 `gorm:"column:size;not null;comment:文档大小" json:"size"` // 文档大小
|
||||
SliceCount int64 `gorm:"column:slice_count;not null;comment:分片数量" json:"slice_count"` // 分片数量
|
||||
CharCount int64 `gorm:"column:char_count;not null;comment:字符数" json:"char_count"` // 字符数
|
||||
CreatorID int64 `gorm:"column:creator_id;not null;comment:创建者ID" json:"creator_id"` // 创建者ID
|
||||
SpaceID int64 `gorm:"column:space_id;not null;comment:空间id" json:"space_id"` // 空间id
|
||||
CreatedAt int64 `gorm:"column:created_at;not null;comment:Create Time in Milliseconds" json:"created_at"` // Create Time in Milliseconds
|
||||
UpdatedAt int64 `gorm:"column:updated_at;not null;comment:Update Time in Milliseconds" json:"updated_at"` // Update Time in Milliseconds
|
||||
DeletedAt gorm.DeletedAt `gorm:"column:deleted_at;comment:Delete Time in Milliseconds" json:"deleted_at"` // Delete Time in Milliseconds
|
||||
SourceType int32 `gorm:"column:source_type;not null;comment:0:本地文件上传, 2:自定义文本" json:"source_type"` // 0:本地文件上传, 2:自定义文本
|
||||
Status int32 `gorm:"column:status;not null;comment:状态" json:"status"` // 状态
|
||||
FailReason string `gorm:"column:fail_reason;comment:失败原因" json:"fail_reason"` // 失败原因
|
||||
ParseRule *DocumentParseRule `gorm:"column:parse_rule;comment:解析+切片规则;serializer:json" json:"parse_rule"` // 解析+切片规则
|
||||
TableInfo *entity.TableInfo `gorm:"column:table_info;comment:表格信息;serializer:json" json:"table_info"` // 表格信息
|
||||
ID int64 `gorm:"column:id;primaryKey;comment:id" json:"id"` // id
|
||||
KnowledgeID int64 `gorm:"column:knowledge_id;not null;comment:knowledge id" json:"knowledge_id"` // knowledge id
|
||||
Name string `gorm:"column:name;not null;comment:document name" json:"name"` // document name
|
||||
FileExtension string `gorm:"column:file_extension;not null;default:0;comment:Document type, txt/pdf/csv etc.." json:"file_extension"` // Document type, txt/pdf/csv etc..
|
||||
DocumentType int32 `gorm:"column:document_type;not null;comment:Document type: 0: Text 1: Table 2: Image" json:"document_type"` // Document type: 0: Text 1: Table 2: Image
|
||||
URI string `gorm:"column:uri;comment:uri" json:"uri"` // uri
|
||||
Size int64 `gorm:"column:size;not null;comment:document size" json:"size"` // document size
|
||||
SliceCount int64 `gorm:"column:slice_count;not null;comment:slice count" json:"slice_count"` // slice count
|
||||
CharCount int64 `gorm:"column:char_count;not null;comment:number of characters" json:"char_count"` // number of characters
|
||||
CreatorID int64 `gorm:"column:creator_id;not null;comment:creator id" json:"creator_id"` // creator id
|
||||
SpaceID int64 `gorm:"column:space_id;not null;comment:space id" json:"space_id"` // space id
|
||||
CreatedAt int64 `gorm:"column:created_at;not null;comment:Create Time in Milliseconds" json:"created_at"` // Create Time in Milliseconds
|
||||
UpdatedAt int64 `gorm:"column:updated_at;not null;comment:Update Time in Milliseconds" json:"updated_at"` // Update Time in Milliseconds
|
||||
DeletedAt gorm.DeletedAt `gorm:"column:deleted_at;comment:Delete Time" json:"deleted_at"` // Delete Time
|
||||
SourceType int32 `gorm:"column:source_type;comment:0: Local file upload, 2: Custom text, 103: Feishu 104: Lark" json:"source_type"` // 0: Local file upload, 2: Custom text, 103: Feishu 104: Lark
|
||||
Status int32 `gorm:"column:status;not null;comment:status" json:"status"` // status
|
||||
FailReason string `gorm:"column:fail_reason;comment:fail reason" json:"fail_reason"` // fail reason
|
||||
ParseRule *DocumentParseRule `gorm:"column:parse_rule;comment:parse rule;serializer:json" json:"parse_rule"` // parse rule
|
||||
TableInfo *entity.TableInfo `gorm:"column:table_info;comment:table info;serializer:json" json:"table_info"` // table info
|
||||
}
|
||||
|
||||
// TableName KnowledgeDocument's table name
|
||||
|
||||
@@ -10,21 +10,21 @@ import (
|
||||
|
||||
const TableNameKnowledgeDocumentReview = "knowledge_document_review"
|
||||
|
||||
// KnowledgeDocumentReview 文档审阅表
|
||||
// KnowledgeDocumentReview Document slice preview info
|
||||
type KnowledgeDocumentReview struct {
|
||||
ID int64 `gorm:"column:id;primaryKey;comment:主键ID" json:"id"` // 主键ID
|
||||
KnowledgeID int64 `gorm:"column:knowledge_id;not null;comment:knowledge id" json:"knowledge_id"` // knowledge id
|
||||
SpaceID int64 `gorm:"column:space_id;not null;comment:空间id" json:"space_id"` // 空间id
|
||||
Name string `gorm:"column:name;not null;comment:文档名称" json:"name"` // 文档名称
|
||||
Type string `gorm:"column:type;not null;default:0;comment:文档类型" json:"type"` // 文档类型
|
||||
URI string `gorm:"column:uri;comment:资源标识" json:"uri"` // 资源标识
|
||||
FormatType int32 `gorm:"column:format_type;not null;comment:0 文本, 1 表格, 2 图片" json:"format_type"` // 0 文本, 1 表格, 2 图片
|
||||
Status int32 `gorm:"column:status;not null;comment:0 处理中,1 已完成,2 失败,3 失效" json:"status"` // 0 处理中,1 已完成,2 失败,3 失效
|
||||
ChunkRespURI string `gorm:"column:chunk_resp_uri;comment:预切片tos资源标识" json:"chunk_resp_uri"` // 预切片tos资源标识
|
||||
DeletedAt gorm.DeletedAt `gorm:"column:deleted_at;comment:Delete Time in Milliseconds" json:"deleted_at"` // Delete Time in Milliseconds
|
||||
CreatedAt int64 `gorm:"column:created_at;not null;comment:Create Time in Milliseconds" json:"created_at"` // Create Time in Milliseconds
|
||||
UpdatedAt int64 `gorm:"column:updated_at;not null;comment:Update Time in Milliseconds" json:"updated_at"` // Update Time in Milliseconds
|
||||
CreatorID int64 `gorm:"column:creator_id;not null;comment:创建者ID" json:"creator_id"` // 创建者ID
|
||||
ID int64 `gorm:"column:id;primaryKey;comment:id" json:"id"` // id
|
||||
KnowledgeID int64 `gorm:"column:knowledge_id;not null;comment:knowledge id" json:"knowledge_id"` // knowledge id
|
||||
SpaceID int64 `gorm:"column:space_id;not null;comment:space id" json:"space_id"` // space id
|
||||
Name string `gorm:"column:name;not null;comment:name" json:"name"` // name
|
||||
Type string `gorm:"column:type;not null;default:0;comment:document type" json:"type"` // document type
|
||||
URI string `gorm:"column:uri;comment:uri" json:"uri"` // uri
|
||||
FormatType int32 `gorm:"column:format_type;not null;comment:0 text, 1 table, 2 images" json:"format_type"` // 0 text, 1 table, 2 images
|
||||
Status int32 `gorm:"column:status;not null;comment:0 Processing 1 Completed 2 Failed 3 Expired" json:"status"` // 0 Processing 1 Completed 2 Failed 3 Expired
|
||||
ChunkRespURI string `gorm:"column:chunk_resp_uri;comment:pre-sliced uri" json:"chunk_resp_uri"` // pre-sliced uri
|
||||
DeletedAt gorm.DeletedAt `gorm:"column:deleted_at;comment:Delete Time" json:"deleted_at"` // Delete Time
|
||||
CreatedAt int64 `gorm:"column:created_at;not null;comment:Create Time in Milliseconds" json:"created_at"` // Create Time in Milliseconds
|
||||
UpdatedAt int64 `gorm:"column:updated_at;not null;comment:Update Time in Milliseconds" json:"updated_at"` // Update Time in Milliseconds
|
||||
CreatorID int64 `gorm:"column:creator_id;not null;comment:creator id" json:"creator_id"` // creator id
|
||||
}
|
||||
|
||||
// TableName KnowledgeDocumentReview's table name
|
||||
|
||||
@@ -10,21 +10,21 @@ import (
|
||||
|
||||
const TableNameKnowledgeDocumentSlice = "knowledge_document_slice"
|
||||
|
||||
// KnowledgeDocumentSlice 知识库文件切片表
|
||||
// KnowledgeDocumentSlice knowledge document slice
|
||||
type KnowledgeDocumentSlice struct {
|
||||
ID int64 `gorm:"column:id;primaryKey;comment:主键ID" json:"id"` // 主键ID
|
||||
KnowledgeID int64 `gorm:"column:knowledge_id;not null;comment:knowledge id" json:"knowledge_id"` // knowledge id
|
||||
DocumentID int64 `gorm:"column:document_id;not null;comment:document id" json:"document_id"` // document id
|
||||
Content string `gorm:"column:content;comment:切片内容" json:"content"` // 切片内容
|
||||
Sequence float64 `gorm:"column:sequence;not null;type:decimal(20,5);comment:切片顺序号, 从1开始" json:"sequence"` // 切片顺序号, 从1开始
|
||||
CreatedAt int64 `gorm:"column:created_at;not null;comment:Create Time in Milliseconds" json:"created_at"` // Create Time in Milliseconds
|
||||
UpdatedAt int64 `gorm:"column:updated_at;not null;comment:Update Time in Milliseconds" json:"updated_at"` // Update Time in Milliseconds
|
||||
DeletedAt gorm.DeletedAt `gorm:"column:deleted_at;comment:Delete Time in Milliseconds" json:"deleted_at"` // Delete Time in Milliseconds
|
||||
CreatorID int64 `gorm:"column:creator_id;not null;comment:创建者ID" json:"creator_id"` // 创建者ID
|
||||
SpaceID int64 `gorm:"column:space_id;not null;comment:空间ID" json:"space_id"` // 空间ID
|
||||
Status int32 `gorm:"column:status;not null;comment:状态" json:"status"` // 状态
|
||||
FailReason string `gorm:"column:fail_reason;comment:失败原因" json:"fail_reason"` // 失败原因
|
||||
Hit int64 `gorm:"column:hit;not null;comment:命中次数" json:"hit"` // 命中次数
|
||||
ID int64 `gorm:"column:id;primaryKey;comment:id" json:"id"` // id
|
||||
KnowledgeID int64 `gorm:"column:knowledge_id;not null;comment:knowledge id" json:"knowledge_id"` // knowledge id
|
||||
DocumentID int64 `gorm:"column:document_id;not null;comment:document_id" json:"document_id"` // document_id
|
||||
Content string `gorm:"column:content;comment:content" json:"content"` // content
|
||||
Sequence float64 `gorm:"column:sequence;not null;comment:slice sequence number, starting from 1" json:"sequence"` // slice sequence number, starting from 1
|
||||
CreatedAt int64 `gorm:"column:created_at;not null;comment:Create Time in Milliseconds" json:"created_at"` // Create Time in Milliseconds
|
||||
UpdatedAt int64 `gorm:"column:updated_at;not null;comment:Update Time in Milliseconds" json:"updated_at"` // Update Time in Milliseconds
|
||||
DeletedAt gorm.DeletedAt `gorm:"column:deleted_at;comment:Delete Time" json:"deleted_at"` // Delete Time
|
||||
CreatorID int64 `gorm:"column:creator_id;not null;comment:creator id" json:"creator_id"` // creator id
|
||||
SpaceID int64 `gorm:"column:space_id;not null;comment:space id" json:"space_id"` // space id
|
||||
Status int32 `gorm:"column:status;not null;comment:status" json:"status"` // status
|
||||
FailReason string `gorm:"column:fail_reason;comment:fail reason" json:"fail_reason"` // fail reason
|
||||
Hit int64 `gorm:"column:hit;not null;comment:hit counts" json:"hit"` // hit counts
|
||||
}
|
||||
|
||||
// TableName KnowledgeDocumentSlice's table name
|
||||
|
||||
@@ -45,23 +45,23 @@ func newKnowledge(db *gorm.DB, opts ...gen.DOOption) knowledge {
|
||||
return _knowledge
|
||||
}
|
||||
|
||||
// knowledge 知识库表
|
||||
// knowledge knowledge tabke
|
||||
type knowledge struct {
|
||||
knowledgeDo
|
||||
|
||||
ALL field.Asterisk
|
||||
ID field.Int64 // 主键ID
|
||||
Name field.String // 名称
|
||||
AppID field.Int64 // 项目ID,标识该资源是否是项目独有
|
||||
CreatorID field.Int64 // ID
|
||||
SpaceID field.Int64 // 空间ID
|
||||
ID field.Int64 // id
|
||||
Name field.String // knowledge's name
|
||||
AppID field.Int64 // app id
|
||||
CreatorID field.Int64 // creator id
|
||||
SpaceID field.Int64 // space id
|
||||
CreatedAt field.Int64 // Create Time in Milliseconds
|
||||
UpdatedAt field.Int64 // Update Time in Milliseconds
|
||||
DeletedAt field.Field // Delete Time in Milliseconds
|
||||
Status field.Int32 // 0 初始化, 1 生效 2 失效
|
||||
Description field.String // 描述
|
||||
IconURI field.String // 头像uri
|
||||
FormatType field.Int32 // 0:文本 1:表格 2:图片
|
||||
DeletedAt field.Field // Delete Time
|
||||
Status field.Int32 // 0 initialization, 1 effective, 2 invalid
|
||||
Description field.String // description
|
||||
IconURI field.String // icon uri
|
||||
FormatType field.Int32 // 0: Text 1: Table 2: Images
|
||||
|
||||
fieldMap map[string]field.Expr
|
||||
}
|
||||
|
||||
@@ -52,30 +52,30 @@ func newKnowledgeDocument(db *gorm.DB, opts ...gen.DOOption) knowledgeDocument {
|
||||
return _knowledgeDocument
|
||||
}
|
||||
|
||||
// knowledgeDocument 知识库文档表
|
||||
// knowledgeDocument knowledge document info
|
||||
type knowledgeDocument struct {
|
||||
knowledgeDocumentDo
|
||||
|
||||
ALL field.Asterisk
|
||||
ID field.Int64 // 主键ID
|
||||
KnowledgeID field.Int64 // 所属knowledge的ID
|
||||
Name field.String // 文档名称
|
||||
FileExtension field.String // 文档类型, txt/pdf/csv/...
|
||||
DocumentType field.Int32 // 文档类型: 0:文本 1:表格 2:图片
|
||||
URI field.String // 资源uri
|
||||
Size field.Int64 // 文档大小
|
||||
SliceCount field.Int64 // 分片数量
|
||||
CharCount field.Int64 // 字符数
|
||||
CreatorID field.Int64 // 创建者ID
|
||||
SpaceID field.Int64 // 空间id
|
||||
ID field.Int64 // id
|
||||
KnowledgeID field.Int64 // knowledge id
|
||||
Name field.String // document name
|
||||
FileExtension field.String // Document type, txt/pdf/csv etc..
|
||||
DocumentType field.Int32 // Document type: 0: Text 1: Table 2: Image
|
||||
URI field.String // uri
|
||||
Size field.Int64 // document size
|
||||
SliceCount field.Int64 // slice count
|
||||
CharCount field.Int64 // number of characters
|
||||
CreatorID field.Int64 // creator id
|
||||
SpaceID field.Int64 // space id
|
||||
CreatedAt field.Int64 // Create Time in Milliseconds
|
||||
UpdatedAt field.Int64 // Update Time in Milliseconds
|
||||
DeletedAt field.Field // Delete Time in Milliseconds
|
||||
SourceType field.Int32 // 0:本地文件上传, 2:自定义文本
|
||||
Status field.Int32 // 状态
|
||||
FailReason field.String // 失败原因
|
||||
ParseRule field.Field // 解析+切片规则
|
||||
TableInfo field.Field // 表格信息
|
||||
DeletedAt field.Field // Delete Time
|
||||
SourceType field.Int32 // 0: Local file upload, 2: Custom text, 103: Feishu 104: Lark
|
||||
Status field.Int32 // status
|
||||
FailReason field.String // fail reason
|
||||
ParseRule field.Field // parse rule
|
||||
TableInfo field.Field // table info
|
||||
|
||||
fieldMap map[string]field.Expr
|
||||
}
|
||||
|
||||
@@ -46,24 +46,24 @@ func newKnowledgeDocumentReview(db *gorm.DB, opts ...gen.DOOption) knowledgeDocu
|
||||
return _knowledgeDocumentReview
|
||||
}
|
||||
|
||||
// knowledgeDocumentReview 文档审阅表
|
||||
// knowledgeDocumentReview Document slice preview info
|
||||
type knowledgeDocumentReview struct {
|
||||
knowledgeDocumentReviewDo
|
||||
|
||||
ALL field.Asterisk
|
||||
ID field.Int64 // 主键ID
|
||||
ID field.Int64 // id
|
||||
KnowledgeID field.Int64 // knowledge id
|
||||
SpaceID field.Int64 // 空间id
|
||||
Name field.String // 文档名称
|
||||
Type field.String // 文档类型
|
||||
URI field.String // 资源标识
|
||||
FormatType field.Int32 // 0 文本, 1 表格, 2 图片
|
||||
Status field.Int32 // 0 处理中,1 已完成,2 失败,3 失效
|
||||
ChunkRespURI field.String // 预切片tos资源标识
|
||||
DeletedAt field.Field // Delete Time in Milliseconds
|
||||
SpaceID field.Int64 // space id
|
||||
Name field.String // name
|
||||
Type field.String // document type
|
||||
URI field.String // uri
|
||||
FormatType field.Int32 // 0 text, 1 table, 2 images
|
||||
Status field.Int32 // 0 Processing 1 Completed 2 Failed 3 Expired
|
||||
ChunkRespURI field.String // pre-sliced uri
|
||||
DeletedAt field.Field // Delete Time
|
||||
CreatedAt field.Int64 // Create Time in Milliseconds
|
||||
UpdatedAt field.Int64 // Update Time in Milliseconds
|
||||
CreatorID field.Int64 // 创建者ID
|
||||
CreatorID field.Int64 // creator id
|
||||
|
||||
fieldMap map[string]field.Expr
|
||||
}
|
||||
|
||||
@@ -46,24 +46,24 @@ func newKnowledgeDocumentSlice(db *gorm.DB, opts ...gen.DOOption) knowledgeDocum
|
||||
return _knowledgeDocumentSlice
|
||||
}
|
||||
|
||||
// knowledgeDocumentSlice 知识库文件切片表
|
||||
// knowledgeDocumentSlice knowledge document slice
|
||||
type knowledgeDocumentSlice struct {
|
||||
knowledgeDocumentSliceDo
|
||||
|
||||
ALL field.Asterisk
|
||||
ID field.Int64 // 主键ID
|
||||
ID field.Int64 // id
|
||||
KnowledgeID field.Int64 // knowledge id
|
||||
DocumentID field.Int64 // document id
|
||||
Content field.String // 切片内容
|
||||
Sequence field.Float64 // 切片顺序号, 从1开始
|
||||
DocumentID field.Int64 // document_id
|
||||
Content field.String // content
|
||||
Sequence field.Float64 // slice sequence number, starting from 1
|
||||
CreatedAt field.Int64 // Create Time in Milliseconds
|
||||
UpdatedAt field.Int64 // Update Time in Milliseconds
|
||||
DeletedAt field.Field // Delete Time in Milliseconds
|
||||
CreatorID field.Int64 // 创建者ID
|
||||
SpaceID field.Int64 // 空间ID
|
||||
Status field.Int32 // 状态
|
||||
FailReason field.String // 失败原因
|
||||
Hit field.Int64 // 命中次数
|
||||
DeletedAt field.Field // Delete Time
|
||||
CreatorID field.Int64 // creator id
|
||||
SpaceID field.Int64 // space id
|
||||
Status field.Int32 // status
|
||||
FailReason field.String // fail reason
|
||||
Hit field.Int64 // hit counts
|
||||
|
||||
fieldMap map[string]field.Expr
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@ type baseDocProcessor struct {
|
||||
Documents []*entity.Document
|
||||
documentSource *entity.DocumentSource
|
||||
|
||||
// 落DB 的 model
|
||||
// Drop DB model
|
||||
TableName string
|
||||
docModels []*model.KnowledgeDocument
|
||||
|
||||
@@ -63,7 +63,7 @@ type baseDocProcessor struct {
|
||||
}
|
||||
|
||||
func (p *baseDocProcessor) BeforeCreate() error {
|
||||
// 从数据源拉取数据
|
||||
// Pull data from a data source
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -154,7 +154,7 @@ func (p *baseDocProcessor) InsertDBModel() (err error) {
|
||||
|
||||
func (p *baseDocProcessor) createTable() error {
|
||||
if len(p.Documents) == 1 && p.Documents[0].Type == knowledge.DocumentTypeTable {
|
||||
// 表格型知识库,创建表
|
||||
// Tabular knowledge base, creating tables
|
||||
rdbColumns := []*rdbEntity.Column{}
|
||||
tableColumns := p.Documents[0].TableInfo.Columns
|
||||
columnIDs, err := p.idgen.GenMultiIDs(p.ctx, len(tableColumns)+1)
|
||||
@@ -178,13 +178,13 @@ func (p *baseDocProcessor) createTable() error {
|
||||
Indexing: false,
|
||||
Sequence: -1,
|
||||
})
|
||||
// 为每个表格增加个主键ID
|
||||
// Add a primary key ID to each table
|
||||
rdbColumns = append(rdbColumns, &rdbEntity.Column{
|
||||
Name: consts.RDBFieldID,
|
||||
DataType: rdbEntity.TypeBigInt,
|
||||
NotNull: true,
|
||||
})
|
||||
// 创建一个数据表
|
||||
// Create a data table
|
||||
resp, err := p.rdb.CreateTable(p.ctx, &rdb.CreateTableRequest{
|
||||
Table: &rdbEntity.Table{
|
||||
Columns: rdbColumns,
|
||||
|
||||
@@ -18,7 +18,7 @@ package impl
|
||||
|
||||
import "github.com/coze-dev/coze-studio/backend/pkg/logs"
|
||||
|
||||
// 用户输入自定义内容后创建文档
|
||||
// Create a document after the user enters custom content
|
||||
type customDocProcessor struct {
|
||||
baseDocProcessor
|
||||
}
|
||||
|
||||
@@ -24,7 +24,7 @@ import (
|
||||
"github.com/coze-dev/coze-studio/backend/types/errno"
|
||||
)
|
||||
|
||||
// 用户自定义表格创建文档
|
||||
// User-defined form creation document
|
||||
type customTableProcessor struct {
|
||||
baseDocProcessor
|
||||
}
|
||||
@@ -46,7 +46,7 @@ func (c *customTableProcessor) BeforeCreate() error {
|
||||
return errorx.New(errno.ErrKnowledgeTableInfoNotExistCode, errorx.KVf("msg", "table info not found, doc_id: %d", tableDoc[0].ID))
|
||||
}
|
||||
c.Documents[0].TableInfo = *tableDoc[0].TableInfo
|
||||
// 追加场景
|
||||
// append scene
|
||||
if c.Documents[0].RawContent != "" {
|
||||
c.Documents[0].FileExtension = getFormatType(c.Documents[0].Type)
|
||||
uri := getTosUri(c.UserID, string(c.Documents[0].FileExtension))
|
||||
@@ -65,14 +65,14 @@ func (c *customTableProcessor) BuildDBModel() error {
|
||||
if len(c.Documents) > 0 &&
|
||||
c.Documents[0].Type == knowledge.DocumentTypeTable {
|
||||
if c.Documents[0].IsAppend {
|
||||
// 追加场景,不需要创建表了
|
||||
// 一是用户自定义一些数据、二是再上传一个表格,把表格里的数据追加到表格中
|
||||
// Append the scene, no need to create a table
|
||||
// First, the user customizes some data, and second, uploads another form and appends the data in the form to the form
|
||||
} else {
|
||||
err := c.baseDocProcessor.BuildDBModel()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// 因为这种创建方式不带数据,所以直接设置状态为可用
|
||||
// Since this method of creation does not carry any data, the state is set to available directly
|
||||
for i := range c.docModels {
|
||||
c.docModels[i].DocumentType = 1
|
||||
c.docModels[i].Status = int32(entity.DocumentStatusInit)
|
||||
@@ -84,7 +84,7 @@ func (c *customTableProcessor) BuildDBModel() error {
|
||||
|
||||
func (c *customTableProcessor) InsertDBModel() error {
|
||||
if isTableAppend(c.Documents) {
|
||||
// 追加场景,设置文档为处理中状态
|
||||
// Append the scene and set the document to the processing state
|
||||
err := c.documentRepo.SetStatus(c.ctx, c.Documents[0].ID, int32(entity.DocumentStatusUploading), "")
|
||||
if err != nil {
|
||||
logs.CtxErrorf(c.ctx, "document set status err:%v", err)
|
||||
|
||||
@@ -65,7 +65,7 @@ func (l *localTableProcessor) BuildDBModel() error {
|
||||
|
||||
func (l *localTableProcessor) InsertDBModel() error {
|
||||
if isTableAppend(l.Documents) {
|
||||
// 追加场景,设置文档为处理中状态
|
||||
// Append the scene and set the document to the processing state
|
||||
err := l.documentRepo.SetStatus(l.ctx, l.Documents[0].ID, int32(entity.DocumentStatusUploading), "")
|
||||
if err != nil {
|
||||
logs.CtxErrorf(l.ctx, "document set status err:%v", err)
|
||||
|
||||
@@ -19,10 +19,10 @@ package processor
|
||||
import "github.com/coze-dev/coze-studio/backend/domain/knowledge/entity"
|
||||
|
||||
type DocProcessor interface {
|
||||
BeforeCreate() error // 获取数据源
|
||||
BuildDBModel() error // 构建Doc记录
|
||||
InsertDBModel() error // 向数据库中插入一条Doc记录
|
||||
Indexing() error // 发起索引任务
|
||||
GetResp() []*entity.Document // 返回处理后的文档信息
|
||||
BeforeCreate() error // Get data source
|
||||
BuildDBModel() error // Build Doc Record
|
||||
InsertDBModel() error // Insert a Doc record into the database
|
||||
Indexing() error // Initiate indexing task
|
||||
GetResp() []*entity.Document // Return the processed document information
|
||||
//GetColumnName()
|
||||
}
|
||||
|
||||
@@ -247,7 +247,7 @@ var d2sMapping = map[knowledge.DocumentType]document2SliceFn{
|
||||
return slice, nil
|
||||
},
|
||||
knowledge.DocumentTypeTable: func(doc *schema.Document, knowledgeID, documentID, creatorID int64) (*entity.Slice, error) {
|
||||
// NOTICE: table 类型的原始数据需要去 rdb 里查
|
||||
// NOTICE: The original data source of table type needs to be checked in rdb
|
||||
slice := &entity.Slice{
|
||||
Info: knowledge.Info{},
|
||||
KnowledgeID: knowledgeID,
|
||||
|
||||
@@ -200,7 +200,7 @@ func (k *knowledgeSVC) copyKnowledge(ctx context.Context, copyCtx *knowledgeCopy
|
||||
}
|
||||
|
||||
func (k *knowledgeSVC) copyKnowledgeDocuments(ctx context.Context, copyCtx *knowledgeCopyCtx) (err error) {
|
||||
// 查询document信息(仅处理完成的文档)
|
||||
// Query document information (only processed documents)
|
||||
documents, _, err := k.documentRepo.FindDocumentByCondition(ctx, &entity.WhereDocumentOpt{
|
||||
KnowledgeIDs: []int64{copyCtx.OriginData.ID},
|
||||
StatusIn: []int32{int32(entity.DocumentStatusEnable), int32(entity.DocumentStatusInit)},
|
||||
@@ -248,7 +248,7 @@ func (k *knowledgeSVC) copyKnowledgeDocuments(ctx context.Context, copyCtx *know
|
||||
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
|
||||
}
|
||||
}
|
||||
// 表格类复制
|
||||
// table copy
|
||||
eg := errgroup.Group{}
|
||||
eg.SetLimit(10)
|
||||
mu := sync.Mutex{}
|
||||
@@ -286,7 +286,7 @@ func (k *knowledgeSVC) copyKnowledgeDocuments(ctx context.Context, copyCtx *know
|
||||
}
|
||||
|
||||
func (k *knowledgeSVC) copyDocument(ctx context.Context, copyCtx *knowledgeCopyCtx, doc *model.KnowledgeDocument, newDocID int64) (err error) {
|
||||
// 表格类文档复制
|
||||
// tabular document replication
|
||||
newDoc := model.KnowledgeDocument{
|
||||
ID: newDocID,
|
||||
KnowledgeID: copyCtx.CopyTask.TargetDataID,
|
||||
@@ -307,7 +307,7 @@ func (k *knowledgeSVC) copyDocument(ctx context.Context, copyCtx *knowledgeCopyC
|
||||
ParseRule: doc.ParseRule,
|
||||
}
|
||||
columnMap := map[int64]int64{}
|
||||
// 如果是表格型知识库->创建新的表格
|
||||
// If it is a tabular knowledge base - > create a new table
|
||||
if doc.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
|
||||
if doc.TableInfo != nil {
|
||||
newTableInfo := entity.TableInfo{}
|
||||
@@ -472,7 +472,7 @@ func (k *knowledgeSVC) copyDocument(ctx context.Context, copyCtx *knowledgeCopyC
|
||||
return nil
|
||||
}
|
||||
func (k *knowledgeSVC) createTable(ctx context.Context, doc *model.KnowledgeDocument) error {
|
||||
// 表格型知识库,创建表
|
||||
// Tabular knowledge base, creating tables
|
||||
rdbColumns := []*rdbEntity.Column{}
|
||||
tableColumns := doc.TableInfo.Columns
|
||||
columnIDs, err := k.genMultiIDs(ctx, len(tableColumns)+1)
|
||||
@@ -495,13 +495,13 @@ func (k *knowledgeSVC) createTable(ctx context.Context, doc *model.KnowledgeDocu
|
||||
Indexing: false,
|
||||
Sequence: -1,
|
||||
})
|
||||
// 为每个表格增加个主键ID
|
||||
// Add a primary key ID to each table
|
||||
rdbColumns = append(rdbColumns, &rdbEntity.Column{
|
||||
Name: consts.RDBFieldID,
|
||||
DataType: rdbEntity.TypeBigInt,
|
||||
NotNull: true,
|
||||
})
|
||||
// 创建一个数据表
|
||||
// Create a data table
|
||||
resp, err := k.rdb.CreateTable(ctx, &rdb.CreateTableRequest{
|
||||
Table: &rdbEntity.Table{
|
||||
Columns: rdbColumns,
|
||||
|
||||
@@ -97,7 +97,7 @@ func (k *knowledgeSVC) HandleMessage(ctx context.Context, msg *eventbus.Message)
|
||||
}
|
||||
|
||||
func (k *knowledgeSVC) deleteKnowledgeDataEventHandler(ctx context.Context, event *entity.Event) error {
|
||||
// 删除知识库在各个存储里的数据
|
||||
// Delete the data in each store of the knowledge base
|
||||
for _, manager := range k.searchStoreManagers {
|
||||
s, err := manager.GetSearchStore(ctx, getCollectionName(event.KnowledgeID))
|
||||
if err != nil {
|
||||
@@ -145,8 +145,8 @@ func (k *knowledgeSVC) indexDocument(ctx context.Context, event *entity.Event) (
|
||||
return errorx.New(errno.ErrKnowledgeNonRetryableCode, errorx.KV("reason", "[indexDocument] document not provided"))
|
||||
}
|
||||
|
||||
// 1. retry 队列和普通队列中对同一文档的 index 操作并发,同一个文档数据写入两份(在后端 bugfix 上线时产生)
|
||||
// 2. rebalance 重复消费同一条消息
|
||||
// 1. The index operations on the same document in the retry queue and the ordinary queue are concurrent, and the same document data is written twice (generated when the backend bugfix is online)
|
||||
// 2. rebalance repeated consumption of the same message
|
||||
|
||||
// check knowledge and document status
|
||||
if valid, err := k.isWritableKnowledgeAndDocument(ctx, doc.KnowledgeID, doc.ID); err != nil {
|
||||
@@ -281,7 +281,7 @@ func (k *knowledgeSVC) indexDocument(ctx context.Context, event *entity.Event) (
|
||||
|
||||
// save slices
|
||||
if doc.Type == knowledge.DocumentTypeTable {
|
||||
// 表格类型,将数据插入到数据库中
|
||||
// Table type to insert data into a database
|
||||
err = k.upsertDataToTable(ctx, &doc.TableInfo, sliceEntities)
|
||||
if err != nil {
|
||||
logs.CtxErrorf(ctx, "[indexDocument] insert data to table failed, err: %v", err)
|
||||
@@ -360,7 +360,7 @@ func (k *knowledgeSVC) indexDocument(ctx context.Context, event *entity.Event) (
|
||||
}); err != nil {
|
||||
return errorx.New(errno.ErrKnowledgeSearchStoreCode, errorx.KV("msg", fmt.Sprintf("create search store failed, err: %v", err)))
|
||||
}
|
||||
// 图片型知识库kn:doc:slice = 1:n:n,可能content为空,不需要写入
|
||||
// Picture knowledge base kn: doc: slice = 1: n: n, maybe the content is empty, no need to write
|
||||
if doc.Type == knowledge.DocumentTypeImage && len(ssDocs) == 1 && len(ssDocs[0].Content) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -203,16 +203,16 @@ type RetrieveRequest = knowledge.RetrieveRequest
|
||||
|
||||
type RetrieveContext struct {
|
||||
Ctx context.Context
|
||||
OriginQuery string // 原始 query
|
||||
RewrittenQuery *string // 改写后的 query, 如果没有改写,就是 nil, 会在执行过程中添加上去
|
||||
ChatHistory []*schema.Message // 如果没有对话历史或者不需要历史,则为 nil
|
||||
KnowledgeIDs sets.Set[int64] // 本次检索涉及的知识库id
|
||||
KnowledgeInfoMap map[int64]*KnowledgeInfo // 知识库id到文档id的映射
|
||||
// 召回策略
|
||||
OriginQuery string // Original query
|
||||
RewrittenQuery *string // The rewritten query, if not rewritten, is nil, which will be added during execution
|
||||
ChatHistory []*schema.Message // Nil if there is no dialogue history or no history is required
|
||||
KnowledgeIDs sets.Set[int64] // The knowledge base ID involved in this search
|
||||
KnowledgeInfoMap map[int64]*KnowledgeInfo // Mapping of Knowledge Base IDs to Document IDs
|
||||
// recall strategy
|
||||
Strategy *entity.RetrievalStrategy
|
||||
// 检索涉及的 document 信息
|
||||
// Retrieve the document information involved
|
||||
Documents []*model.KnowledgeDocument
|
||||
// 用于 nl2sql 和 message to query 的 chat model
|
||||
// A chat model for nl2sql and message to query
|
||||
ChatModel chatmodel.BaseChatModel
|
||||
}
|
||||
|
||||
@@ -254,9 +254,9 @@ type TableSchemaResponse struct {
|
||||
type TableDataType int32
|
||||
|
||||
const (
|
||||
AllData TableDataType = 0 // schema sheets 和 preview data
|
||||
OnlySchema TableDataType = 1 // 只需要 schema 结构 & Sheets
|
||||
OnlyPreview TableDataType = 2 // 只需要 preview data
|
||||
AllData TableDataType = 0 // Schema sheets and preview data
|
||||
OnlySchema TableDataType = 1 // Only need schema structure & Sheets
|
||||
OnlyPreview TableDataType = 2 // Just preview the data
|
||||
)
|
||||
|
||||
type GetDocumentTableInfoRequest struct {
|
||||
|
||||
@@ -103,19 +103,19 @@ func NewKnowledgeSVC(config *KnowledgeSVCConfig) (Knowledge, eventbus.ConsumerHa
|
||||
type KnowledgeSVCConfig struct {
|
||||
DB *gorm.DB // required
|
||||
IDGen idgen.IDGenerator // required
|
||||
RDB rdb.RDB // required: 表格存储
|
||||
Producer eventbus.Producer // required: 文档 indexing 过程走 mq 异步处理
|
||||
SearchStoreManagers []searchstore.Manager // required: 向量 / 全文
|
||||
ParseManager parser.Manager // optional: 文档切分与处理能力, default builtin parser
|
||||
RDB rdb.RDB // Required: Form storage
|
||||
Producer eventbus.Producer // Required: Document indexing process goes through mq asynchronous processing
|
||||
SearchStoreManagers []searchstore.Manager // Required: Vector/Full Text
|
||||
ParseManager parser.Manager // Optional: document segmentation and processing capability, default builtin parser
|
||||
Storage storage.Storage // required: oss
|
||||
ModelFactory chatmodel.Factory // required: 模型 factory
|
||||
Rewriter messages2query.MessagesToQuery // optional: 未配置时不改写
|
||||
Reranker rerank.Reranker // optional: 未配置时默认 rrf
|
||||
NL2Sql nl2sql.NL2SQL // optional: 未配置时默认不支持
|
||||
EnableCompactTable *bool // optional: 表格数据压缩,默认 true
|
||||
OCR ocr.OCR // optional: ocr, 未提供时 ocr 功能不可用
|
||||
CacheCli cache.Cmdable // optional: 缓存实现
|
||||
IsAutoAnnotationSupported bool // 是否支持了图片自动标注
|
||||
ModelFactory chatmodel.Factory // Required: Model factory
|
||||
Rewriter messages2query.MessagesToQuery // Optional: Do not overwrite when not configured
|
||||
Reranker rerank.Reranker // Optional: default rrf when not configured
|
||||
NL2Sql nl2sql.NL2SQL // Optional: Not supported by default when not configured
|
||||
EnableCompactTable *bool // Optional: Table data compression, default true
|
||||
OCR ocr.OCR // Optional: ocr, ocr function is not available when not provided
|
||||
CacheCli cache.Cmdable // Optional: cache implementation
|
||||
IsAutoAnnotationSupported bool // Does it support automatic image labeling?
|
||||
}
|
||||
|
||||
type knowledgeSVC struct {
|
||||
@@ -135,8 +135,8 @@ type knowledgeSVC struct {
|
||||
storage storage.Storage
|
||||
nl2Sql nl2sql.NL2SQL
|
||||
cacheCli cache.Cmdable
|
||||
enableCompactTable bool // 表格数据压缩
|
||||
isAutoAnnotationSupported bool // 是否支持了图片自动标注
|
||||
enableCompactTable bool // Table data compression
|
||||
isAutoAnnotationSupported bool // Does it support automatic image labeling?
|
||||
}
|
||||
|
||||
func (k *knowledgeSVC) CreateKnowledge(ctx context.Context, request *CreateKnowledgeRequest) (response *CreateKnowledgeResponse, err error) {
|
||||
@@ -163,7 +163,7 @@ func (k *knowledgeSVC) CreateKnowledge(ctx context.Context, request *CreateKnowl
|
||||
SpaceID: request.SpaceID,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
Status: int32(knowledgeModel.KnowledgeStatusEnable), // 目前向量库的初始化由文档触发,知识库无 init 过程
|
||||
Status: int32(knowledgeModel.KnowledgeStatusEnable), // At present, the initialization of the vector library is triggered by the document, and the knowledge base has no init process
|
||||
Description: request.Description,
|
||||
IconURI: request.IconUri,
|
||||
FormatType: int32(request.FormatType),
|
||||
@@ -217,7 +217,7 @@ func (k *knowledgeSVC) UpdateKnowledge(ctx context.Context, request *UpdateKnowl
|
||||
}
|
||||
|
||||
func (k *knowledgeSVC) DeleteKnowledge(ctx context.Context, request *DeleteKnowledgeRequest) error {
|
||||
// 先获取一下knowledge的信息
|
||||
// Get some knowledge first
|
||||
knModel, err := k.knowledgeRepo.GetByID(ctx, request.KnowledgeID)
|
||||
if err != nil {
|
||||
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
|
||||
@@ -357,27 +357,27 @@ func (k *knowledgeSVC) CreateDocument(ctx context.Context, request *CreateDocume
|
||||
Storage: k.storage,
|
||||
Rdb: k.rdb,
|
||||
})
|
||||
// 1. 前置的动作,上传 tos 等
|
||||
// 1. Front action, upload tos, etc
|
||||
err = docProcessor.BeforeCreate()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// 2. 构建 落库
|
||||
// 2. Build, drop library
|
||||
err = docProcessor.BuildDBModel()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// 3. 插入数据库
|
||||
// 3. Insert into the database
|
||||
err = docProcessor.InsertDBModel()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// 4. 发起索引任务
|
||||
// 4. Initiate the indexing task
|
||||
err = docProcessor.Indexing()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// 5. 返回处理后的文档信息
|
||||
// 5. Return the processed document information
|
||||
docs := docProcessor.GetResp()
|
||||
return &CreateDocumentResponse{
|
||||
Documents: docs,
|
||||
@@ -397,7 +397,7 @@ func (k *knowledgeSVC) UpdateDocument(ctx context.Context, request *UpdateDocume
|
||||
}
|
||||
|
||||
if doc.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
|
||||
// 如果是表格类型,可能是要改table的meta
|
||||
// If it is a table type, it may be necessary to change the meta of the table.
|
||||
if doc.TableInfo != nil {
|
||||
finalColumns, err := k.alterTableSchema(ctx, doc.TableInfo.Columns, request.TableInfo.Columns, doc.TableInfo.PhysicalTableName)
|
||||
if err != nil {
|
||||
@@ -673,7 +673,7 @@ func (k *knowledgeSVC) CreateSlice(ctx context.Context, request *CreateSliceRequ
|
||||
}
|
||||
if len(slices) == 1 {
|
||||
if request.Position == 1 || request.Position == 0 {
|
||||
// 插入到最前面
|
||||
// Insert to the front
|
||||
sliceInfo.Sequence = slices[0].Sequence - 1
|
||||
} else {
|
||||
sliceInfo.Sequence = slices[0].Sequence + 1
|
||||
@@ -759,7 +759,7 @@ func (k *knowledgeSVC) UpdateSlice(ctx context.Context, request *UpdateSliceRequ
|
||||
if docInfo == nil || docInfo.ID == 0 {
|
||||
return errorx.New(errno.ErrKnowledgeDocumentNotExistCode)
|
||||
}
|
||||
// 更新数据库中的存储
|
||||
// Update storage in the database
|
||||
if docInfo.DocumentType == int32(knowledgeModel.DocumentTypeText) ||
|
||||
docInfo.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
|
||||
sliceEntity := entity.Slice{RawContent: request.RawContent}
|
||||
@@ -851,7 +851,7 @@ func (k *knowledgeSVC) DeleteSlice(ctx context.Context, request *DeleteSliceRequ
|
||||
return errorx.New(errno.ErrKnowledgeCrossDomainCode, errorx.KV("msg", err.Error()))
|
||||
}
|
||||
}
|
||||
// 删除数据库中的存储
|
||||
// Delete storage in the database
|
||||
err = k.sliceRepo.Delete(ctx, &model.KnowledgeDocumentSlice{ID: request.SliceID})
|
||||
if err != nil {
|
||||
logs.CtxErrorf(ctx, "delete slice failed, err: %v", err)
|
||||
@@ -905,9 +905,9 @@ func (k *knowledgeSVC) ListSlice(ctx context.Context, request *ListSliceRequest)
|
||||
}
|
||||
resp.Total = int(total)
|
||||
var sliceMap map[int64]*entity.Slice
|
||||
// 如果是表格类型,那么去table中取一下原始数据
|
||||
// If it is a table type, then go to the table to get the original data source
|
||||
if doc.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
|
||||
// 从数据库中查询原始数据
|
||||
// Query original data source from database
|
||||
sliceMap, err = k.selectTableData(ctx, doc.TableInfo, slices)
|
||||
if err != nil {
|
||||
logs.CtxErrorf(ctx, "select table data failed, err: %v", err)
|
||||
@@ -1010,7 +1010,7 @@ func (k *knowledgeSVC) CreateDocumentReview(ctx context.Context, request *Create
|
||||
}
|
||||
reviews = append(reviews, review)
|
||||
}
|
||||
// STEP 1. 生成ID
|
||||
// STEP 1. Generate ID
|
||||
reviewIDs, err := k.genMultiIDs(ctx, len(request.Reviews))
|
||||
if err != nil {
|
||||
return nil, errorx.New(errno.ErrKnowledgeIDGenCode)
|
||||
|
||||
@@ -190,7 +190,7 @@ func TestKnowledgeSVC_CreateDocument(t *testing.T) {
|
||||
// IconURI: "icon.png",
|
||||
// },
|
||||
// KnowledgeID: 666,
|
||||
// RawContent: "测试测试测试测试",
|
||||
// RawContent: "Test Test Test",
|
||||
// Source: entity.DocumentSourceCustom,
|
||||
// FileExtension: "txt",
|
||||
// }
|
||||
@@ -274,25 +274,25 @@ func TestKnowledgeSVC_CreateDocument(t *testing.T) {
|
||||
// VirtualTableName: "test",
|
||||
// Columns: []*entity.TableColumn{
|
||||
// {
|
||||
// Name: "第一列",
|
||||
// Name: "First Column",
|
||||
// Type: entity.TableColumnTypeBoolean,
|
||||
// Indexing: true,
|
||||
// Sequence: 0,
|
||||
// },
|
||||
// {
|
||||
// Name: "第二列",
|
||||
// Name: "Second column",
|
||||
// Type: entity.TableColumnTypeTime,
|
||||
// Indexing: false,
|
||||
// Sequence: 1,
|
||||
// },
|
||||
// {
|
||||
// Name: "第三列",
|
||||
// Name: "Third Column",
|
||||
// Type: entity.TableColumnTypeString,
|
||||
// Indexing: false,
|
||||
// Sequence: 2,
|
||||
// },
|
||||
// {
|
||||
// Name: "第四列",
|
||||
// Name: "Fourth column",
|
||||
// Type: entity.TableColumnTypeNumber,
|
||||
// Indexing: true,
|
||||
// Sequence: 3,
|
||||
@@ -384,25 +384,25 @@ func TestKnowledgeSVC_DeleteDocument(t *testing.T) {
|
||||
// VirtualTableName: "test",
|
||||
// Columns: []*entity.TableColumn{
|
||||
// {
|
||||
// Name: "第一列",
|
||||
// Name: "First Column",
|
||||
// Type: entity.TableColumnTypeBoolean,
|
||||
// Indexing: true,
|
||||
// Sequence: 0,
|
||||
// },
|
||||
// {
|
||||
// Name: "第二列",
|
||||
// Name: "Second column",
|
||||
// Type: entity.TableColumnTypeTime,
|
||||
// Indexing: false,
|
||||
// Sequence: 1,
|
||||
// },
|
||||
// {
|
||||
// Name: "第三列",
|
||||
// Name: "Third Column",
|
||||
// Type: entity.TableColumnTypeString,
|
||||
// Indexing: false,
|
||||
// Sequence: 2,
|
||||
// },
|
||||
// {
|
||||
// Name: "第四列",
|
||||
// Name: "Fourth column",
|
||||
// Type: entity.TableColumnTypeNumber,
|
||||
// Indexing: true,
|
||||
// Sequence: 3,
|
||||
@@ -415,14 +415,14 @@ func TestKnowledgeSVC_DeleteDocument(t *testing.T) {
|
||||
// assert.Equal(t, 1, len(doc))
|
||||
// time.Sleep(time.Second * 5)
|
||||
// doc[0].Name = "new_name"
|
||||
// doc[0].TableInfo.Columns[0].Name = "第一列_changeName"
|
||||
// doc[0].TableInfo.Columns[1].Name = "第二列_changeSeq"
|
||||
// Doc [0]. TableInfo. Columns [0]. Name = "First column _changeName"
|
||||
// Doc [0]. TableInfo. Columns [1]. Name = "Second column _changeSeq"
|
||||
// doc[0].TableInfo.Columns[1].Sequence = 2
|
||||
// doc[0].TableInfo.Columns[2].Name = "第三列_changeType"
|
||||
// Doc [0]. TableInfo. Columns [2]. Name = "Third column _changeType"
|
||||
// doc[0].TableInfo.Columns[2].Type = entity.TableColumnTypeInteger
|
||||
// doc[0].TableInfo.Columns[2].Sequence = 1
|
||||
// // 删除原来的第四列并新建第四列
|
||||
// doc[0].TableInfo.Columns[3].Name = "第五列_create"
|
||||
// Delete the original fourth column and create a new fourth column
|
||||
// Doc [0]. TableInfo. Columns [3]. Name = "Fifth column _create"
|
||||
// doc[0].TableInfo.Columns[3].Type = entity.TableColumnTypeNumber
|
||||
// doc[0].TableInfo.Columns[3].Sequence = 3
|
||||
// doc[0].TableInfo.Columns[3].ID = 0
|
||||
@@ -479,25 +479,25 @@ func TestKnowledgeSVC_ListDocument(t *testing.T) {
|
||||
// VirtualTableName: "test",
|
||||
// Columns: []*entity.TableColumn{
|
||||
// {
|
||||
// Name: "第一列",
|
||||
// Name: "First Column",
|
||||
// Type: entity.TableColumnTypeBoolean,
|
||||
// Indexing: true,
|
||||
// Sequence: 0,
|
||||
// },
|
||||
// {
|
||||
// Name: "第二列",
|
||||
// Name: "Second column",
|
||||
// Type: entity.TableColumnTypeTime,
|
||||
// Indexing: false,
|
||||
// Sequence: 1,
|
||||
// },
|
||||
// {
|
||||
// Name: "第三列",
|
||||
// Name: "Third Column",
|
||||
// Type: entity.TableColumnTypeString,
|
||||
// Indexing: false,
|
||||
// Sequence: 2,
|
||||
// },
|
||||
// {
|
||||
// Name: "第四列",
|
||||
// Name: "Fourth column",
|
||||
// Type: entity.TableColumnTypeNumber,
|
||||
// Indexing: true,
|
||||
// Sequence: 3,
|
||||
@@ -781,25 +781,25 @@ func TestKnowledgeSVC_ListDocument(t *testing.T) {
|
||||
// VirtualTableName: "test",
|
||||
// Columns: []*entity.TableColumn{
|
||||
// {
|
||||
// Name: "第一列",
|
||||
// Name: "First Column",
|
||||
// Type: entity.TableColumnTypeBoolean,
|
||||
// Indexing: true,
|
||||
// Sequence: 0,
|
||||
// },
|
||||
// {
|
||||
// Name: "第二列",
|
||||
// Name: "Second column",
|
||||
// Type: entity.TableColumnTypeTime,
|
||||
// Indexing: false,
|
||||
// Sequence: 1,
|
||||
// },
|
||||
// {
|
||||
// Name: "第三列",
|
||||
// Name: "Third Column",
|
||||
// Type: entity.TableColumnTypeString,
|
||||
// Indexing: false,
|
||||
// Sequence: 2,
|
||||
// },
|
||||
// {
|
||||
// Name: "第四列",
|
||||
// Name: "Fourth column",
|
||||
// Type: entity.TableColumnTypeNumber,
|
||||
// Indexing: true,
|
||||
// Sequence: 3,
|
||||
@@ -979,7 +979,7 @@ func TestKnowledgeSVC_Retrieve(t *testing.T) {
|
||||
//svc := MockKnowledgeSVC(t)
|
||||
//mockey.PatchConvey("test retrieve", t, func() {
|
||||
// res, err := svc.Retrieve(ctx, &knowledge.RetrieveRequest{
|
||||
// Query: "查找第三列为gogogo的数据",
|
||||
// Query: "Find the data of the third column gogogo",
|
||||
// KnowledgeIDs: []int64{1745810102455734000, 1745810094197395000},
|
||||
// Strategy: &entity.RetrievalStrategy{
|
||||
// TopK: ptr.Of(int64(2)),
|
||||
|
||||
@@ -116,7 +116,7 @@ func (k *knowledgeSVC) alterTableSchema(ctx context.Context, beforeColumns []*en
|
||||
continue
|
||||
}
|
||||
if targetColumns[i].ID == 0 {
|
||||
// 要新增的列
|
||||
// Columns to be added
|
||||
columnID, err := k.idgen.GenID(ctx)
|
||||
if err != nil {
|
||||
logs.CtxErrorf(ctx, "gen id failed, err: %v", err)
|
||||
@@ -132,7 +132,7 @@ func (k *knowledgeSVC) alterTableSchema(ctx context.Context, beforeColumns []*en
|
||||
})
|
||||
} else {
|
||||
if checkColumnExist(targetColumns[i].ID, beforeColumns) {
|
||||
// 要修改的列
|
||||
// Column to modify
|
||||
alterRequest.Operations = append(alterRequest.Operations, &rdb.AlterTableOperation{
|
||||
Action: rdbEntity.ModifyColumn,
|
||||
Column: &rdbEntity.Column{
|
||||
@@ -153,7 +153,7 @@ func (k *knowledgeSVC) alterTableSchema(ctx context.Context, beforeColumns []*en
|
||||
continue
|
||||
}
|
||||
if !checkColumnExist(beforeColumns[i].ID, targetColumns) {
|
||||
// 要删除的列
|
||||
// Column to delete
|
||||
alterRequest.Operations = append(alterRequest.Operations, &rdb.AlterTableOperation{
|
||||
Action: rdbEntity.DropColumn,
|
||||
Column: &rdbEntity.Column{
|
||||
|
||||
@@ -69,17 +69,17 @@ func (k *knowledgeSVC) Retrieve(ctx context.Context, request *RetrieveRequest) (
|
||||
}
|
||||
chain := compose.NewChain[*RetrieveContext, []*knowledgeModel.RetrieveSlice]()
|
||||
rewriteNode := compose.InvokableLambda(k.queryRewriteNode)
|
||||
// 向量化召回
|
||||
// vectorized recall
|
||||
vectorRetrieveNode := compose.InvokableLambda(k.vectorRetrieveNode)
|
||||
// ES召回
|
||||
// ES recall
|
||||
EsRetrieveNode := compose.InvokableLambda(k.esRetrieveNode)
|
||||
// Nl2Sql召回
|
||||
// Nl2Sql recall
|
||||
Nl2SqlRetrieveNode := compose.InvokableLambda(k.nl2SqlRetrieveNode)
|
||||
// pass user query Node
|
||||
passRequestContextNode := compose.InvokableLambda(k.passRequestContext)
|
||||
// reRank Node
|
||||
reRankNode := compose.InvokableLambda(k.reRankNode)
|
||||
// pack Result接口
|
||||
// Pack Result Interface
|
||||
packResult := compose.InvokableLambda(k.packResults)
|
||||
parallelNode := compose.NewParallel().
|
||||
AddLambda("vectorRetrieveNode", vectorRetrieveNode).
|
||||
@@ -190,11 +190,11 @@ func (k *knowledgeSVC) prepareRAGDocuments(ctx context.Context, documentIDs []in
|
||||
|
||||
func (k *knowledgeSVC) queryRewriteNode(ctx context.Context, req *RetrieveContext) (newRetrieveContext *RetrieveContext, err error) {
|
||||
if len(req.ChatHistory) == 0 {
|
||||
// 没有上下文不需要改写
|
||||
// No context, no rewriting.
|
||||
return req, nil
|
||||
}
|
||||
if !req.Strategy.EnableQueryRewrite || k.rewriter == nil {
|
||||
// 未开启rewrite功能,不需要上下文改写
|
||||
// Rewrite function is not enabled, no context rewrite is required
|
||||
return req, nil
|
||||
}
|
||||
var opts []messages2query.Option
|
||||
@@ -206,7 +206,7 @@ func (k *knowledgeSVC) queryRewriteNode(ctx context.Context, req *RetrieveContex
|
||||
logs.CtxErrorf(ctx, "rewrite query failed: %v", err)
|
||||
return req, nil
|
||||
}
|
||||
// 改写完成
|
||||
// Rewrite completed
|
||||
req.RewrittenQuery = &rewrittenQuery
|
||||
return req, nil
|
||||
}
|
||||
@@ -373,7 +373,7 @@ func (k *knowledgeSVC) nl2SqlExec(ctx context.Context, doc *model.KnowledgeDocum
|
||||
return nil, err
|
||||
}
|
||||
sql = addSliceIdColumn(sql)
|
||||
// 执行sql
|
||||
// Execute sql
|
||||
replaceMap := map[string]sqlparsercontract.TableColumn{}
|
||||
replaceMap[doc.Name] = sqlparsercontract.TableColumn{
|
||||
NewTableName: ptr.Of(doc.TableInfo.PhysicalTableName),
|
||||
@@ -395,7 +395,7 @@ func (k *knowledgeSVC) nl2SqlExec(ctx context.Context, doc *model.KnowledgeDocum
|
||||
logs.CtxErrorf(ctx, "parse sql failed: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
// 执行sql
|
||||
// Execute sql
|
||||
resp, err := k.rdb.ExecuteSQL(ctx, &rdb.ExecuteSQLRequest{
|
||||
SQL: parsedSQL,
|
||||
})
|
||||
@@ -428,7 +428,7 @@ func addSliceIdColumn(originalSql string) string {
|
||||
if selectIndex == -1 {
|
||||
return originalSql
|
||||
}
|
||||
result := originalSql[:selectIndex+len("select ")] // 保留 select 部分
|
||||
result := originalSql[:selectIndex+len("select ")] // Keep selected part
|
||||
remainder := originalSql[selectIndex+len("select "):]
|
||||
|
||||
lowerRemainder := strings.ToLower(remainder)
|
||||
@@ -474,25 +474,25 @@ func (k *knowledgeSVC) passRequestContext(ctx context.Context, req *RetrieveCont
|
||||
}
|
||||
|
||||
func (k *knowledgeSVC) reRankNode(ctx context.Context, resultMap map[string]any) (retrieveResult []*schema.Document, err error) {
|
||||
// 首先获取下retrieve上下文
|
||||
// First retrieve the context
|
||||
retrieveCtx, ok := resultMap["passRequestContext"].(*RetrieveContext)
|
||||
if !ok {
|
||||
logs.CtxErrorf(ctx, "retrieve context is not found")
|
||||
return nil, errorx.New(errno.ErrKnowledgeSystemCode, errorx.KV("msg", "retrieve context is not found"))
|
||||
}
|
||||
// 获取下向量化召回的接口
|
||||
// Get the interface for the downvectorized recall
|
||||
vectorRetrieveResult, ok := resultMap["vectorRetrieveNode"].([]*schema.Document)
|
||||
if !ok {
|
||||
logs.CtxErrorf(ctx, "vector retrieve result is not found")
|
||||
vectorRetrieveResult = []*schema.Document{}
|
||||
}
|
||||
// 获取下es召回的接口
|
||||
// Get the interface of the es recall.
|
||||
esRetrieveResult, ok := resultMap["esRetrieveNode"].([]*schema.Document)
|
||||
if !ok {
|
||||
logs.CtxErrorf(ctx, "es retrieve result is not found")
|
||||
esRetrieveResult = []*schema.Document{}
|
||||
}
|
||||
// 获取下nl2sql召回的接口
|
||||
// Get the interface recalled under nl2sql
|
||||
nl2SqlRetrieveResult, ok := resultMap["nl2SqlRetrieveNode"].([]*schema.Document)
|
||||
if !ok {
|
||||
logs.CtxErrorf(ctx, "nl2sql retrieve result is not found")
|
||||
@@ -508,10 +508,10 @@ func (k *knowledgeSVC) reRankNode(ctx context.Context, resultMap map[string]any)
|
||||
return data
|
||||
}
|
||||
|
||||
// 根据召回策略从不同渠道获取召回结果
|
||||
// Obtain recall results from different channels according to the recall strategy
|
||||
var retrieveResultArr [][]*rerank.Data
|
||||
if retrieveCtx.Strategy.EnableNL2SQL {
|
||||
// nl2sql结果
|
||||
// Nl2sql results
|
||||
retrieveResultArr = append(retrieveResultArr, docs2RerankData(nl2SqlRetrieveResult))
|
||||
}
|
||||
switch retrieveCtx.Strategy.SearchType {
|
||||
@@ -739,18 +739,18 @@ func (i *ImageContent) SetKV(k string, v string) {
|
||||
func (k *knowledgeSVC) ParseFrontEndImageContent(ctx context.Context, s string) []*ImageContent {
|
||||
res := make([]*ImageContent, 0)
|
||||
imgRe := regexp.MustCompile(`<img\s+[^>]*>`)
|
||||
// 查找所有匹配项
|
||||
// Find all matches
|
||||
matches := imgRe.FindAllSubmatchIndex([]byte(s), -1)
|
||||
// 遍历匹配项并输出src和data-tos-key字段
|
||||
// 遍历每个匹配项的索引
|
||||
// Traverse matches and output the src and data-tos-key fields
|
||||
// Iterate the index of each match
|
||||
for _, match := range matches {
|
||||
// 输出每个匹配项整个正则在文本中的开始和结束位置
|
||||
// Outputs the beginning and end positions of the entire regular for each match in the text
|
||||
matchStart := match[0]
|
||||
matchEnd := match[1]
|
||||
all := s[match[0]:match[1]]
|
||||
|
||||
re := regexp.MustCompile(`<img\s+([^>]+)>`)
|
||||
// 初始化map存储kv信息,把多余信息去掉
|
||||
// Initialize map to store kv information and remove redundant information
|
||||
m := make(map[string]string)
|
||||
l := make([]string, 0)
|
||||
match := re.FindStringSubmatch(all)
|
||||
@@ -758,13 +758,13 @@ func (k *knowledgeSVC) ParseFrontEndImageContent(ctx context.Context, s string)
|
||||
continue
|
||||
}
|
||||
attributes := match[1]
|
||||
// 定义正则表达式模式,用于提取属性键值对
|
||||
// Defines a regular expression pattern for extracting attribute key-value pairs
|
||||
attrRe := regexp.MustCompile(`(\S+)=(?:"([^"]*)"|'([^']*)')`)
|
||||
|
||||
// 查找所有属性键值对
|
||||
// Find all attribute key-value pairs
|
||||
attrMatches := attrRe.FindAllStringSubmatch(attributes, -1)
|
||||
|
||||
// 提取并存储kv信息
|
||||
// Extract and store kv information
|
||||
for _, attrMatch := range attrMatches {
|
||||
key := attrMatch[1]
|
||||
value := attrMatch[2]
|
||||
|
||||
@@ -271,11 +271,11 @@ func (k *knowledgeSVC) ValidateTableSchema(ctx context.Context, request *Validat
|
||||
dst := doc.TableInfo
|
||||
result := make(map[string]string)
|
||||
|
||||
// validate 通过条件:
|
||||
// 1. 表头名称对齐(不要求顺序一致)
|
||||
// 2. indexing 列必须有值, 其余列可以为空
|
||||
// 3. 值类型可转换
|
||||
// 4. 已有表表头字段全包含
|
||||
// Validate the conditions:
|
||||
// 1. Header name alignment (consistent order is not required)
|
||||
// 2. The indexing column must have a value, and the remaining columns can be empty
|
||||
// 3. Value types are convertible
|
||||
// 4. All existing table header fields are included
|
||||
dstMapping := make(map[string]*entity.TableColumn)
|
||||
for _, col := range dst.Columns {
|
||||
dstCol := col
|
||||
@@ -395,7 +395,7 @@ func (k *knowledgeSVC) GetDocumentTableInfo(ctx context.Context, request *GetDoc
|
||||
}, nil
|
||||
}
|
||||
|
||||
// GetDocumentTableInfoByID 先不作为接口,有需要再改
|
||||
// GetDocumentTableInfoByID not as an interface first, and then change it if necessary
|
||||
func (k *knowledgeSVC) GetDocumentTableInfoByID(ctx context.Context, documentID int64, needData bool) (*TableSchemaResponse, error) {
|
||||
docs, err := k.documentRepo.MGetByID(ctx, []int64{documentID})
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user