chore: replace all cn comments to en version by volc api (#313)
This commit is contained in:
@@ -1,23 +1,23 @@
|
||||
namespace go flow.dataengine.dataset
|
||||
|
||||
// 类型
|
||||
// type
|
||||
enum FormatType {
|
||||
Text = 0 // 文本
|
||||
Table = 1 // 表格
|
||||
Image = 2 // 图片
|
||||
Database = 3 // 数据库
|
||||
Text = 0 // Text
|
||||
Table = 1 // table
|
||||
Image = 2 // image
|
||||
Database = 3 // database
|
||||
}
|
||||
|
||||
struct ChunkStrategy {
|
||||
1: string separator // 分隔符,如句号
|
||||
2: i64 max_tokens // 分片的最大token数
|
||||
3: bool remove_extra_spaces // 替换掉连续的空格、换行符和制表符
|
||||
4: bool remove_urls_emails // 是否去除url和email
|
||||
5: ChunkType chunk_type // 如果为0, 则不使用以上字段的配置
|
||||
7: optional CaptionType caption_type // 图片类型,图片描述文字的标注方式
|
||||
8: optional i64 overlap; //分段重叠度
|
||||
9: optional i64 max_level; //最大层级数(按层级分段时生效)
|
||||
10: optional bool save_title; //切片保留层级标题(按层级分段时生效)
|
||||
1: string separator // A separator, such as a period
|
||||
2: i64 max_tokens // Maximum number of tokens for sharding
|
||||
3: bool remove_extra_spaces // Replace consecutive spaces, newlines, and tabs
|
||||
4: bool remove_urls_emails // Remove URL and email
|
||||
5: ChunkType chunk_type // If 0, the configuration of the above fields is not used
|
||||
7: optional CaptionType caption_type // Image type, image description text annotation method
|
||||
8: optional i64 overlap; //segmented overlap
|
||||
9: optional i64 max_level; //Maximum number of levels (effective when segmented by level)
|
||||
10: optional bool save_title; //Slice preserves level headers (effective when segmented by level)
|
||||
}
|
||||
|
||||
enum ChunkType{
|
||||
@@ -32,67 +32,67 @@ enum ContentSchema{
|
||||
}
|
||||
|
||||
enum CaptionType {
|
||||
Auto = 0 // 智能标注
|
||||
Manual = 1 // 人工标注
|
||||
Auto = 0 // intelligent annotation
|
||||
Manual = 1 // manual annotation
|
||||
}
|
||||
|
||||
enum DocumentStatus {
|
||||
Processing = 0 // 上传中
|
||||
Enable = 1 // 生效
|
||||
Disable = 2 // 失效
|
||||
Deleted = 3 // 删除
|
||||
Resegment = 4 // 重新分片中,调用方不感知该状态
|
||||
Refreshing = 5 // 刷新中(刷新成功后会删除)
|
||||
Failed = 9 // 失败
|
||||
Processing = 0 // Uploading
|
||||
Enable = 1 // take effect
|
||||
Disable = 2 // failure
|
||||
Deleted = 3 // delete
|
||||
Resegment = 4 // In rescaling, the caller is not aware of the state
|
||||
Refreshing = 5 // Refreshing (will be deleted after successful refresh)
|
||||
Failed = 9 // fail
|
||||
}
|
||||
|
||||
enum DocumentSource {
|
||||
Document = 0 // 本地文件上传
|
||||
Custom = 2 // 自定义类型
|
||||
Document = 0 // local file upload
|
||||
Custom = 2 // custom type
|
||||
}
|
||||
|
||||
|
||||
struct ParsingStrategy{
|
||||
1: optional ParsingType parsing_type; //解析类型
|
||||
2: optional bool image_extraction; //是否开启图片元素提取(精准解析时生效)
|
||||
3: optional bool table_extraction; //是否开启表格元素提取(精准解析时生效)
|
||||
4: optional bool image_ocr; //是否开启图片OCR(精准解析时生效)
|
||||
1: optional ParsingType parsing_type; //parse type
|
||||
2: optional bool image_extraction; //Whether to enable image element extraction (effective when accurately parsing)
|
||||
3: optional bool table_extraction; //Whether to enable table element extraction (effective when accurately parsing)
|
||||
4: optional bool image_ocr; //Whether to turn on picture OCR (effective when accurate analysis)
|
||||
}
|
||||
|
||||
enum ParsingType{
|
||||
FastParsing = 0 //快速解析
|
||||
AccurateParsing = 1 //精准解析
|
||||
FastParsing = 0 //fast parse
|
||||
AccurateParsing = 1 //accurate analysis
|
||||
}
|
||||
|
||||
struct IndexStrategy{
|
||||
1: optional bool vector_indexing; //是否开启向量索引(默认为true)
|
||||
2: optional bool keyword_indexing; //是否开启关键词索引(默认为true)
|
||||
3: optional bool hierarchical_indexing; //是否开启分层索引
|
||||
4: optional string model; //向量模型
|
||||
1: optional bool vector_indexing; //Whether to enable vector indexing (default is true)
|
||||
2: optional bool keyword_indexing; //Whether to enable keyword indexing (default is true)
|
||||
3: optional bool hierarchical_indexing; //Whether to enable hierarchical indexing
|
||||
4: optional string model; //vector model
|
||||
}
|
||||
|
||||
struct FilterStrategy{
|
||||
1: optional list<i32> filter_page; //过滤页数
|
||||
1: optional list<i32> filter_page; //filter pages
|
||||
}
|
||||
|
||||
// 排序字段
|
||||
// sort field
|
||||
enum OrderField {
|
||||
CreateTime = 1
|
||||
UpdateTime = 2
|
||||
}
|
||||
|
||||
// 排序规则
|
||||
// OrderType
|
||||
enum OrderType {
|
||||
Desc = 1
|
||||
Asc = 2
|
||||
}
|
||||
|
||||
struct SinkStrategy {
|
||||
1: bool check_index // 是否检查索引成功
|
||||
1: bool check_index // Check whether the index was successful
|
||||
}
|
||||
enum ReviewStatus {
|
||||
Processing = 0 // 处理中
|
||||
Enable = 1 // 已完成
|
||||
Failed = 2 // 失败
|
||||
ForceStop = 3 // 失败
|
||||
Processing = 0 // Processing
|
||||
Enable = 1 // Completed.
|
||||
Failed = 2 // fail
|
||||
ForceStop = 3 // fail
|
||||
}
|
||||
@@ -5,13 +5,13 @@ include "common.thrift"
|
||||
namespace go flow.dataengine.dataset
|
||||
|
||||
struct CreateDatasetRequest {
|
||||
1: string name // 知识库名称,长度不超过100个字符
|
||||
2: string description // 知识库描述
|
||||
3: i64 space_id (agw.js_conv="str", api.js_conv="true") // 空间ID
|
||||
4: string icon_uri // 知识库头像URI
|
||||
1: string name // Knowledge base name, no more than 100 characters in length
|
||||
2: string description // Knowledge Base Description
|
||||
3: i64 space_id (agw.js_conv="str", api.js_conv="true") // Space ID
|
||||
4: string icon_uri // Knowledge Base Avatar URI
|
||||
5: common.FormatType format_type
|
||||
6: i64 biz_id (agw.js_conv="str", api.js_conv="true") // 开放给第三方的业务标识, coze 传 0 或者不传
|
||||
7: i64 project_id (agw.js_conv="str", api.js_conv="true") //新增project ID
|
||||
6: i64 biz_id (agw.js_conv="str", api.js_conv="true") // Open to third-party business identity, coze pass 0 or no pass
|
||||
7: i64 project_id (agw.js_conv="str", api.js_conv="true") //project ID
|
||||
|
||||
255: optional base.Base Base
|
||||
}
|
||||
@@ -25,7 +25,7 @@ struct CreateDatasetResponse {
|
||||
}
|
||||
struct DatasetDetailRequest {
|
||||
1: list<string> DatasetIDs (agw.js_conv="str", api.body="dataset_ids")
|
||||
3: i64 project_id (agw.js_conv="str", api.js_conv="true") //新增project ID
|
||||
3: i64 project_id (agw.js_conv="str", api.js_conv="true") // project ID
|
||||
2: i64 space_id (agw.js_conv="str", api.js_conv="true")
|
||||
|
||||
255: optional base.Base Base
|
||||
@@ -42,38 +42,38 @@ struct DatasetDetailResponse {
|
||||
enum DatasetStatus {
|
||||
DatasetProcessing = 0
|
||||
DatasetReady = 1
|
||||
DatasetDeleted = 2 // 软删
|
||||
DatasetForbid = 3 // 不启用
|
||||
DatasetDeleted = 2 // soft delete
|
||||
DatasetForbid = 3 // Do not enable
|
||||
DatasetFailed = 9
|
||||
}
|
||||
|
||||
|
||||
struct Dataset {
|
||||
1: i64 dataset_id(agw.js_conv="str", api.js_conv="true")
|
||||
2: string name // 数据集名称
|
||||
3: list<string> file_list // 文件列表
|
||||
4: i64 all_file_size (agw.js_conv="str", api.js_conv="true") // 所有文件大小
|
||||
5: i32 bot_used_count // 使用Bot数
|
||||
2: string name // Dataset name
|
||||
3: list<string> file_list // file list
|
||||
4: i64 all_file_size (agw.js_conv="str", api.js_conv="true") // All file sizes
|
||||
5: i32 bot_used_count // Bot count
|
||||
6: DatasetStatus status
|
||||
7: list<string> processing_file_list // 处理中的文件名称列表,兼容老逻辑
|
||||
8: i32 update_time // 更新时间,秒级时间戳
|
||||
7: list<string> processing_file_list // List of file names in process, compatible with old logic
|
||||
8: i32 update_time // Update time, second timestamp
|
||||
9: string icon_url
|
||||
10: string description
|
||||
11: string icon_uri
|
||||
12: bool can_edit // 是否可以编辑
|
||||
13: i32 create_time // 创建时间,秒级时间戳
|
||||
14: i64 creator_id (agw.js_conv="str", api.js_conv="true") // 创建者ID
|
||||
15: i64 space_id (agw.js_conv="str", api.js_conv="true") // 空间ID
|
||||
18: list<string> failed_file_list (agw.js_conv="str") // 处理失败的文件
|
||||
12: bool can_edit // Can it be edited?
|
||||
13: i32 create_time // create_time, second timestamp
|
||||
14: i64 creator_id (agw.js_conv="str", api.js_conv="true") // creator ID
|
||||
15: i64 space_id (agw.js_conv="str", api.js_conv="true") // Space ID
|
||||
18: list<string> failed_file_list (agw.js_conv="str") // Processing failed files
|
||||
|
||||
19: common.FormatType format_type
|
||||
20: i32 slice_count // 分段数量
|
||||
21: i32 hit_count // 命中次数
|
||||
22: i32 doc_count // 文档数量
|
||||
23: common.ChunkStrategy chunk_strategy // 切片规则
|
||||
20: i32 slice_count // number of segments
|
||||
21: i32 hit_count // hit count
|
||||
22: i32 doc_count // number of documents
|
||||
23: common.ChunkStrategy chunk_strategy // slicing rule
|
||||
|
||||
24: list<string> processing_file_id_list // 处理中的文件ID列表
|
||||
25: string project_id //新增project ID
|
||||
24: list<string> processing_file_id_list // List of file IDs in process
|
||||
25: string project_id //project ID
|
||||
}
|
||||
|
||||
struct ListDatasetRequest {
|
||||
@@ -82,12 +82,12 @@ struct ListDatasetRequest {
|
||||
3: optional i32 page
|
||||
4: optional i32 size
|
||||
5: i64 space_id (agw.js_conv="str", api.js_conv="true")
|
||||
6: optional common.OrderField order_field // 排序字段
|
||||
7: optional common.OrderType order_type // 排序规则
|
||||
8: optional string space_auth // 如果传了指定值, 就放开校验
|
||||
9: optional i64 biz_id (agw.js_conv="str", api.js_conv="true") // 开放给第三方的业务标识
|
||||
10: optional bool need_ref_bots // 是否需要拉取引用bots的数量,会增加响应延时
|
||||
11: optional string project_id //新增project ID
|
||||
6: optional common.OrderField order_field // sort field
|
||||
7: optional common.OrderType order_type // order_type
|
||||
8: optional string space_auth // If the specified value is passed, the verification is released
|
||||
9: optional i64 biz_id (agw.js_conv="str", api.js_conv="true") // Business identity open to third parties
|
||||
10: optional bool need_ref_bots // Whether the number of reference bots needs to be pulled will increase the response delay
|
||||
11: optional string project_id //project ID
|
||||
255: optional base.Base Base
|
||||
}
|
||||
|
||||
@@ -99,12 +99,12 @@ struct ListDatasetResponse {
|
||||
255: required base.BaseResp BaseResp
|
||||
}
|
||||
struct DatasetFilter {
|
||||
// 如果都设置了,And 关系
|
||||
1: optional string name // 关键字搜索, 按照名称模糊匹配
|
||||
2: optional list<string> dataset_ids (agw.js_conv="str") // 知识库id列表
|
||||
3: optional DatasetSource source_type // 来源
|
||||
4: optional DatasetScopeType scope_type // 搜索类型
|
||||
5: optional common.FormatType format_type // 类型
|
||||
// The following conditions are related to and
|
||||
1: optional string name // Keyword search, fuzzy match by name
|
||||
2: optional list<string> dataset_ids (agw.js_conv="str") // Knowledge id list
|
||||
3: optional DatasetSource source_type // source
|
||||
4: optional DatasetScopeType scope_type // search type
|
||||
5: optional common.FormatType format_type // type
|
||||
}
|
||||
|
||||
enum DatasetScopeType {
|
||||
@@ -131,10 +131,10 @@ struct DeleteDatasetResponse {
|
||||
}
|
||||
|
||||
struct UpdateDatasetRequest {
|
||||
1: i64 dataset_id (agw.js_conv="str", api.js_conv="true") // 知识库id
|
||||
2: string name // 知识库名称,不能为空
|
||||
3: string icon_uri // 知识库icon
|
||||
4: string description // 知识库描述
|
||||
1: i64 dataset_id (agw.js_conv="str", api.js_conv="true") // Knowledge ID
|
||||
2: string name // Knowledge base name, cannot be empty
|
||||
3: string icon_uri // Knowledge base icon
|
||||
4: string description // Knowledge Base Description
|
||||
5: optional DatasetStatus status
|
||||
|
||||
255: optional base.Base Base;
|
||||
|
||||
@@ -8,7 +8,7 @@ struct ListDocumentRequest {
|
||||
2: optional list<string> document_ids (agw.js_conv='str')
|
||||
3: optional i32 page
|
||||
4: optional i32 size
|
||||
5: optional string keyword // 根据名称搜索
|
||||
5: optional string keyword // Search by name
|
||||
|
||||
255: optional base.Base Base
|
||||
}
|
||||
@@ -25,59 +25,59 @@ struct ListDocumentResponse {
|
||||
struct DocumentInfo {
|
||||
1: string name
|
||||
2: i64 document_id(agw.js_conv='str', api.js_conv='true')
|
||||
3: optional string tos_uri // 文件链接
|
||||
5: i32 create_time // 创建时间
|
||||
6: i32 update_time // 更新时间
|
||||
7: optional i64 creator_id (agw.js_conv="str", api.js_conv='true', api.body="creator_id") // 创建人
|
||||
8: i32 slice_count // 包含分段数量
|
||||
9: string type // 文件后缀 csv, pdf 等
|
||||
10: i32 size // 文件大小 字节数
|
||||
11: i32 char_count // 字符数
|
||||
12: common.DocumentStatus status // 状态
|
||||
13: i32 hit_count // 命中次数
|
||||
14: common.DocumentSource source_type // 来源
|
||||
18: common.FormatType format_type // 文件类型
|
||||
19: optional list<TableColumn> table_meta // 表格类型元数据
|
||||
20: optional string web_url // url 地址
|
||||
21: optional string status_descript // 状态的详细信息;如果切片失败,返回失败信息
|
||||
24: optional i64 space_id(agw.js_conv="str", api.js_conv="true") // 空间id
|
||||
3: optional string tos_uri // file link
|
||||
5: i32 create_time // create_time
|
||||
6: i32 update_time // update time
|
||||
7: optional i64 creator_id (agw.js_conv="str", api.js_conv='true', api.body="creator_id") // creator_id
|
||||
8: i32 slice_count // number of segments included
|
||||
9: string type // File suffix csv, pdf, etc
|
||||
10: i32 size // File size, number of bytes
|
||||
11: i32 char_count // character count
|
||||
12: common.DocumentStatus status // status
|
||||
13: i32 hit_count // hit count
|
||||
14: common.DocumentSource source_type // source
|
||||
18: common.FormatType format_type // file type
|
||||
19: optional list<TableColumn> table_meta // Table type metadata
|
||||
20: optional string web_url // URL address
|
||||
21: optional string status_descript // Details of the status; if the slice fails, return the failure information
|
||||
24: optional i64 space_id(agw.js_conv="str", api.js_conv="true") // Space ID
|
||||
|
||||
// 以下字段仅针对重构后的表格类型有用,用于前端判断
|
||||
26: optional bool editable_append_content // 仅针对表格类型,是否允许添加内容、修改表结构
|
||||
27: common.ChunkStrategy chunk_strategy // 切片规则
|
||||
// The following fields are only useful for the reconstructed table type and are used for front-end judgment
|
||||
26: optional bool editable_append_content // Only for table types, are you allowed to add content and modify the table structure?
|
||||
27: common.ChunkStrategy chunk_strategy // slicing rule
|
||||
|
||||
28: optional string imagex_uri // imagex 存储的文件链接
|
||||
29: optional string doc_outline // 层级分段文档树Json (未使用)
|
||||
30: optional common.ParsingStrategy parsing_strategy // 解析策略
|
||||
32: optional common.FilterStrategy filter_strategy // 过滤策略
|
||||
33: optional string doc_tree_tos_url // 层级分段文档树 tos_url
|
||||
34: optional string preview_tos_url // 预览用的原文档 tos_url
|
||||
35: optional i64 review_id // 预览用的原文档 tos_url
|
||||
28: optional string imagex_uri // File links stored by ImageX
|
||||
29: optional string doc_outline // Hierarchical Segmentation Document Tree Json (unused)
|
||||
30: optional common.ParsingStrategy parsing_strategy // parsing strategy
|
||||
32: optional common.FilterStrategy filter_strategy // filtering strategy
|
||||
33: optional string doc_tree_tos_url // Hierarchical segmented document tree tos_url
|
||||
34: optional string preview_tos_url // Preview the original document tos_url
|
||||
35: optional i64 review_id // Preview the original document tos_url
|
||||
}
|
||||
|
||||
struct TableColumn {
|
||||
1: i64 id(agw.js_conv="str", api.js_conv="true", api.body="id") // 列 id
|
||||
2: string column_name // 列名
|
||||
3: bool is_semantic // 是否为语义匹配列
|
||||
4: i64 sequence(agw.js_conv="str", api.js_conv="true", api.body="sequence")// 列原本在 excel 的序号
|
||||
5: optional ColumnType column_type // 列类型
|
||||
1: i64 id(agw.js_conv="str", api.js_conv="true", api.body="id") // Column ID
|
||||
2: string column_name // column_name
|
||||
3: bool is_semantic // Is it a semantically matched column?
|
||||
4: i64 sequence(agw.js_conv="str", api.js_conv="true", api.body="sequence")// List the serial number originally in excel
|
||||
5: optional ColumnType column_type // column type
|
||||
6: optional bool contains_empty_value
|
||||
7: optional string desc // 描述
|
||||
7: optional string desc // describe
|
||||
}
|
||||
|
||||
|
||||
enum ColumnType {
|
||||
Unknown = 0
|
||||
Text = 1 // 文本
|
||||
Number = 2 // 数字
|
||||
Date = 3 // 时间
|
||||
Text = 1 // Text
|
||||
Number = 2 // number
|
||||
Date = 3 // time
|
||||
Float = 4 // float
|
||||
Boolean = 5 // bool
|
||||
Image = 6 // 图片
|
||||
Image = 6 // picture
|
||||
}
|
||||
|
||||
struct DeleteDocumentRequest {
|
||||
2: list<string> document_ids // 要删除的文档ID列表
|
||||
2: list<string> document_ids // List of document IDs to delete
|
||||
|
||||
255: optional base.Base Base
|
||||
}
|
||||
@@ -91,12 +91,12 @@ struct DeleteDocumentResponse {
|
||||
struct UpdateDocumentRequest{
|
||||
1: i64 document_id (agw.js_conv="str", api.js_conv="true")
|
||||
|
||||
// 需要更新就传, 更新名称
|
||||
// If you need to update, please upload it and update the name.
|
||||
3: optional string document_name
|
||||
|
||||
|
||||
// 更新表结构
|
||||
5: optional list<TableColumn> table_meta // 表格元数据
|
||||
// Update table structure
|
||||
5: optional list<TableColumn> table_meta // Table metadata
|
||||
|
||||
255: optional base.Base Base
|
||||
}
|
||||
@@ -109,8 +109,8 @@ struct UpdateDocumentResponse {
|
||||
|
||||
|
||||
struct UpdatePhotoCaptionRequest {
|
||||
1: required i64 document_id(agw.js_conv='str', api.js_conv='true') // 文档ID
|
||||
2: required string caption // 要更新的图片描述信息
|
||||
1: required i64 document_id(agw.js_conv='str', api.js_conv='true') // Document ID
|
||||
2: required string caption // Picture description information to be updated
|
||||
|
||||
255: optional base.Base Base
|
||||
}
|
||||
@@ -122,18 +122,18 @@ struct UpdatePhotoCaptionResponse {
|
||||
}
|
||||
|
||||
struct ListPhotoRequest {
|
||||
1: required i64 dataset_id(agw.js_conv='str', api.js_conv='true') // 知识库ID
|
||||
2: optional i32 page // 页数,从 1 开始
|
||||
3: optional i32 size // 每页大小
|
||||
1: required i64 dataset_id(agw.js_conv='str', api.js_conv='true') // Knowledge ID
|
||||
2: optional i32 page // Number of pages, starting from 1
|
||||
3: optional i32 size // page size
|
||||
4: optional PhotoFilter filter
|
||||
|
||||
255: optional base.Base Base
|
||||
}
|
||||
|
||||
struct PhotoFilter {
|
||||
1: optional bool has_caption // true 筛选 “已标注” 的图片,false 筛选 “未标注” 的图片
|
||||
2: optional string keyword // 搜索关键字,对图片名称和图片描述进行搜索
|
||||
3: optional common.DocumentStatus status // 状态
|
||||
1: optional bool has_caption // True to filter "marked" images, false to filter "unmarked" images
|
||||
2: optional string keyword // Search keywords, search for image names and picture descriptions
|
||||
3: optional common.DocumentStatus status // status
|
||||
}
|
||||
|
||||
struct ListPhotoResponse {
|
||||
@@ -145,44 +145,44 @@ struct ListPhotoResponse {
|
||||
255: required base.BaseResp BaseResp(api.none="true")
|
||||
}
|
||||
|
||||
struct PhotoInfo { // 图片型知识库一个图片对应一个文档
|
||||
1: string name // 图片名称
|
||||
2: i64 document_id(agw.js_conv='str', api.js_conv='true') // 文档ID
|
||||
3: string url // 图片链接
|
||||
4: string caption // 图片描述信息
|
||||
5: i32 create_time // 创建时间
|
||||
6: i32 update_time // 更新时间
|
||||
7: i64 creator_id (agw.js_conv="str", api.js_conv='true', agw.key="creator_id", api.body="creator_id") // 创建人
|
||||
8: string type // 图片后缀 jpg, png 等
|
||||
9: i32 size // 图片大小
|
||||
10: common.DocumentStatus status // 状态
|
||||
11: common.DocumentSource source_type // 来源
|
||||
struct PhotoInfo { // Picture Knowledge Base One picture corresponds to one document
|
||||
1: string name // image name
|
||||
2: i64 document_id(agw.js_conv='str', api.js_conv='true') // Document ID
|
||||
3: string url // image link
|
||||
4: string caption // picture description information
|
||||
5: i32 create_time // create_time
|
||||
6: i32 update_time // update time
|
||||
7: i64 creator_id (agw.js_conv="str", api.js_conv='true', agw.key="creator_id", api.body="creator_id") // creator_id
|
||||
8: string type // Image suffix jpg, png, etc
|
||||
9: i32 size // image size
|
||||
10: common.DocumentStatus status // status
|
||||
11: common.DocumentSource source_type // source
|
||||
}
|
||||
|
||||
struct PhotoDetailRequest {
|
||||
1: required list<string> document_ids (agw.js_conv='str') // 文档ID列表
|
||||
2: required i64 dataset_id(agw.js_conv='str', api.js_conv='true') // 知识库ID
|
||||
1: required list<string> document_ids (agw.js_conv='str') // Document ID List
|
||||
2: required i64 dataset_id(agw.js_conv='str', api.js_conv='true') // Knowledge ID
|
||||
255: optional base.Base Base
|
||||
}
|
||||
|
||||
struct PhotoDetailResponse {
|
||||
1: map<string, PhotoInfo> photo_infos // 文档ID到图片信息的映射
|
||||
1: map<string, PhotoInfo> photo_infos // Mapping document ID to image information
|
||||
253: required i64 code
|
||||
254: required string msg
|
||||
255: required base.BaseResp BaseResp(api.none="true")
|
||||
}
|
||||
|
||||
struct ResegmentRequest {
|
||||
1: i64 dataset_id (agw.js_conv="str", api.js_conv="true") // 知识库ID
|
||||
2: list<string> document_ids // 要重新分段的文档
|
||||
3: common.ChunkStrategy chunk_strategy // 分段策略
|
||||
5: optional common.ParsingStrategy parsing_strategy // 解析策略
|
||||
7: optional common.FilterStrategy filter_strategy; // 过滤策略
|
||||
1: i64 dataset_id (agw.js_conv="str", api.js_conv="true") // Knowledge ID
|
||||
2: list<string> document_ids // Document to be re-segmented
|
||||
3: common.ChunkStrategy chunk_strategy // segmentation strategy
|
||||
5: optional common.ParsingStrategy parsing_strategy // parsing strategy
|
||||
7: optional common.FilterStrategy filter_strategy; // filtering strategy
|
||||
255: optional base.Base Base
|
||||
}
|
||||
|
||||
struct ResegmentResponse {
|
||||
1: list<DocumentInfo> document_infos // 老版需要. 仅返回id 和名称即可
|
||||
1: list<DocumentInfo> document_infos // The old version requires. Just return the id and name.
|
||||
|
||||
253: required i64 code
|
||||
254: required string msg
|
||||
@@ -190,16 +190,16 @@ struct ResegmentResponse {
|
||||
}
|
||||
|
||||
struct CreateDocumentRequest {
|
||||
1: i64 dataset_id(agw.js_conv='str', api.js_conv='true') // 要插入文档的知识库id
|
||||
1: i64 dataset_id(agw.js_conv='str', api.js_conv='true') // The knowledge base id of the document to insert.
|
||||
|
||||
4: common.FormatType format_type // 知识库的类型,目前支持文本、表格、图片三种知识库
|
||||
4: common.FormatType format_type // Types of knowledge bases, currently supporting text, tables, and images
|
||||
|
||||
// 表格类型一次只能创建一个
|
||||
6: list<DocumentBase> document_bases // 待创建的文档信息
|
||||
// Only one table type can be created at a time
|
||||
6: list<DocumentBase> document_bases // Document information to be created
|
||||
|
||||
17: optional common.ChunkStrategy chunk_strategy // 只在知识库中没有文档时需要传递,已有则从知识库获取.切片规则,为空则自动按段落切片
|
||||
31: optional bool is_append // 为 true 时向已有的 document 追加内容。text 类型不能使用
|
||||
32: optional common.ParsingStrategy parsing_strategy // 解析策略
|
||||
17: optional common.ChunkStrategy chunk_strategy // Only when there is no document in the knowledge base, it needs to be passed, and if there is one, it will be obtained from the knowledge base. Slicing rules, if it is empty, it will be automatically sliced by paragraph
|
||||
31: optional bool is_append // Appends content to an existing document when true. The text type cannot be used
|
||||
32: optional common.ParsingStrategy parsing_strategy // parsing strategy
|
||||
|
||||
255: optional base.Base Base
|
||||
}
|
||||
@@ -212,37 +212,37 @@ struct CreateDocumentResponse {
|
||||
255: required base.BaseResp BaseResp
|
||||
}
|
||||
|
||||
// 用于创建文档的基本信息
|
||||
// Basic information for creating a document
|
||||
struct DocumentBase{
|
||||
1: string name // 文档名称
|
||||
1: string name // Document name
|
||||
2: SourceInfo source_info
|
||||
// 以下参数表格类型需要传递
|
||||
4: optional list<TableColumn> table_meta // 表格元数据
|
||||
5: optional TableSheet table_sheet // 表格解析信息
|
||||
6: optional common.FilterStrategy filter_strategy // 过滤策略
|
||||
7: optional string caption // 图片类型知识库,人工标注时的图片描述
|
||||
// The following parameter table types need to be passed
|
||||
4: optional list<TableColumn> table_meta // Table metadata
|
||||
5: optional TableSheet table_sheet // Table parsing information
|
||||
6: optional common.FilterStrategy filter_strategy // filtering strategy
|
||||
7: optional string caption // Image type knowledge base, picture description when manually annotated
|
||||
}
|
||||
|
||||
// 支持多种数据源
|
||||
// Supports multiple data sources
|
||||
struct SourceInfo {
|
||||
1: optional string tos_uri (api.body="tos_uri"); // 本地上传返回的 uri
|
||||
1: optional string tos_uri (api.body="tos_uri"); // Upload the returned URI locally.
|
||||
|
||||
4: optional common.DocumentSource document_source (api.body="document_source");
|
||||
|
||||
// document_source 自定义原始内容: 表格型知识库需要符合的格式:json list<map<string, string>>
|
||||
// document_source custom raw content: Format required for a tabular knowledge base: json list < map < string, string > >
|
||||
5: optional string custom_content (api.body="custom_content")
|
||||
|
||||
// document_source 本地: 如果不传 tos 地址, 则需要传文件 base64, 类型
|
||||
7: optional string file_base64 // 文件经过 base64 后的字符串
|
||||
8: optional string file_type // 文件类型, 比如 pdf
|
||||
// document_source local: If you don't send the tos address, you need to send the file base64, type
|
||||
7: optional string file_base64 // File string after base64
|
||||
8: optional string file_type // File type, such as PDF
|
||||
|
||||
// imagex_uri, 和 tos_uri 二选一, imagex_uri 优先,需要通过 imagex 的方法获取数据和签发 url
|
||||
// imagex_uri, and tos_uri choose one, imagex_uri priority, need to get data and sign url through imagex method
|
||||
10: optional string imagex_uri
|
||||
}
|
||||
struct TableSheet {
|
||||
1: i64 sheet_id (agw.js_conv="str", agw.key="sheet_id", api.js_conv="true", api.body="sheet_id") , // 用户选择的 sheet id
|
||||
2: i64 header_line_idx (agw.js_conv="str", agw.key="header_line_idx", api.js_conv="true", api.body="header_line_idx"), // 用户选择的表头行数,从 0 开始编号
|
||||
3: i64 start_line_idx (agw.js_conv="str", agw.key="start_line_idx", api.js_conv="true", api.body="start_line_idx") , // 用户选择的起始行号,从 0 开始编号
|
||||
1: i64 sheet_id (agw.js_conv="str", agw.key="sheet_id", api.js_conv="true", api.body="sheet_id") , // User selected sheet id
|
||||
2: i64 header_line_idx (agw.js_conv="str", agw.key="header_line_idx", api.js_conv="true", api.body="header_line_idx"), // The number of header rows selected by the user, numbered from 0
|
||||
3: i64 start_line_idx (agw.js_conv="str", agw.key="start_line_idx", api.js_conv="true", api.body="start_line_idx") , // User-selected starting line number, numbered from 0
|
||||
}
|
||||
|
||||
|
||||
@@ -261,63 +261,63 @@ struct GetDocumentProgressResponse {
|
||||
|
||||
struct DocumentProgress {
|
||||
1: i64 document_id(agw.js_conv="str", api.js_conv='true')
|
||||
2: i32 progress // 知识库进度百分比
|
||||
2: i32 progress // Knowledge Base Progress Percentage
|
||||
3: common.DocumentStatus status
|
||||
4: optional string status_descript // 状态的详细描述;如果切片失败,返回失败信息
|
||||
4: optional string status_descript // A detailed description of the status; if the slice fails, a failure message is returned
|
||||
5: string document_name
|
||||
6: optional i64 remaining_time // 剩余时间单位秒
|
||||
6: optional i64 remaining_time // Remaining time in seconds
|
||||
7: optional i64 size
|
||||
8: optional string type
|
||||
9: optional string url
|
||||
}
|
||||
|
||||
// 获取 database 上传的表格文件元信息
|
||||
// Get the table file meta information uploaded by the database
|
||||
struct GetTableSchemaRequest {
|
||||
1: optional TableSheet table_sheet; // 表格解析信息, 默认初始值0,0,1,表示第1个表格,表头行为第1行,数据行从第2行开始
|
||||
2: optional TableDataType table_data_type; // 不传默认返回所有数据
|
||||
3: optional i64 document_id(agw.js_conv="str", agw.key="document_id", api.js_conv="true", api.body="document_id"); // 兼容重构前的版本:如果需要拉取的是当前 document 的 schema 时传递该值
|
||||
4: optional SourceInfo source_file; // source file 的信息,新增 segment / 之前逻辑迁移到这里
|
||||
5: optional list<TableColumn> origin_table_meta; // 表格预览前端需要传递原始的数据表结构
|
||||
6: optional list<TableColumn> preview_table_meta; // 表格预览前端需要传递用户编辑之后的数据表结构
|
||||
1: optional TableSheet table_sheet; // Table parsing information, the default initial value is 0, 0, 1, which represents the first table, the first row of the table header, and the data row starts from the second row
|
||||
2: optional TableDataType table_data_type; // All data is returned by default without passing it on.
|
||||
3: optional i64 document_id(agw.js_conv="str", agw.key="document_id", api.js_conv="true", api.body="document_id"); // Compatible with pre-refactoring versions: pass this value if you need to pull the schema of the current document
|
||||
4: optional SourceInfo source_file; // Source file information, add segment/before logic migrate here
|
||||
5: optional list<TableColumn> origin_table_meta; // The table preview front end needs to pass the original data table structure
|
||||
6: optional list<TableColumn> preview_table_meta; // The table preview front end needs to pass the data table structure edited by the user
|
||||
|
||||
255: optional base.Base Base
|
||||
}
|
||||
|
||||
enum TableDataType {
|
||||
AllData = 0 // schema sheets 和 preview data
|
||||
OnlySchema = 1 // 只需要 schema 结构 & Sheets
|
||||
OnlyPreview = 2 // 只需要 preview data
|
||||
AllData = 0 // Schema sheets and preview data
|
||||
OnlySchema = 1 // Only need schema structure & Sheets
|
||||
OnlyPreview = 2 // Just preview the data
|
||||
}
|
||||
|
||||
struct DocTableSheet {
|
||||
1: i64 id; // sheet 的编号
|
||||
2: string sheet_name; // sheet 名
|
||||
3: i64 total_row; // 总行数
|
||||
1: i64 id; // Number of sheet
|
||||
2: string sheet_name; // Sheet name
|
||||
3: i64 total_row; // total number of rows
|
||||
}
|
||||
|
||||
struct GetTableSchemaResponse {
|
||||
1: required i32 code
|
||||
2: required string msg
|
||||
3: list<DocTableSheet> sheet_list
|
||||
4: list<TableColumn> table_meta // 选中的 sheet 的 schema, 不选择默认返回第一个 sheet
|
||||
5: list<map<string,string>> preview_data(api.body="preview_data") // knowledge table 场景中会返回
|
||||
4: list<TableColumn> table_meta // The schema of the selected sheet, not selected to return the first sheet by default
|
||||
5: list<map<string,string>> preview_data(api.body="preview_data") // The knowledge table will return
|
||||
|
||||
255: optional base.BaseResp BaseResp(api.none="true")
|
||||
}
|
||||
|
||||
// 判断用户配置的 schema 是否和对应 document id 的一致
|
||||
// Determine whether the schema configured by the user is consistent with the corresponding document id
|
||||
struct ValidateTableSchemaRequest {
|
||||
1: i64 space_id (agw.js_conv="str", agw.key="space_id", api.js_conv="true", api.body="space_id") // 空间ID
|
||||
2: i64 document_id (agw.js_conv="str", agw.key="document_id", api.js_conv="true", api.body="document_id") // 要校验的文档ID
|
||||
3: SourceInfo source_info (api.body="source_file") // source file 的信息
|
||||
4: TableSheet table_sheet (api.body="table_sheet") // 表格解析信息, 默认初始值0,0,1,表示第1个表格,表头行为第1行,数据行从第2行开始
|
||||
1: i64 space_id (agw.js_conv="str", agw.key="space_id", api.js_conv="true", api.body="space_id") // Space ID
|
||||
2: i64 document_id (agw.js_conv="str", agw.key="document_id", api.js_conv="true", api.body="document_id") // Document ID to verify
|
||||
3: SourceInfo source_info (api.body="source_file") // Information from the source file
|
||||
4: TableSheet table_sheet (api.body="table_sheet") // Table parsing information, the default initial value is 0, 0, 1, which represents the first table, the first row of the table header, and the data row starts from the second row
|
||||
|
||||
255: optional base.Base Base
|
||||
}
|
||||
|
||||
struct ValidateTableSchemaResponse {
|
||||
1: optional map<string,string> ColumnValidResult (api.body="column_valid_result");
|
||||
// 如果失败会返回错误码
|
||||
// If it fails, an error code will be returned.
|
||||
253: required i64 code
|
||||
254: required string msg
|
||||
255: optional base.BaseResp BaseResp(api.none="true")
|
||||
@@ -330,7 +330,7 @@ struct ExtractPhotoCaptionRequest {
|
||||
}
|
||||
|
||||
struct ExtractPhotoCaptionResponse {
|
||||
1: string caption // 图片描述
|
||||
1: string caption // picture description
|
||||
253: required i64 code
|
||||
254: required string msg
|
||||
255: required base.BaseResp BaseResp(api.none="true")
|
||||
|
||||
@@ -7,14 +7,14 @@ include "review.thrift"
|
||||
namespace go flow.dataengine.dataset
|
||||
|
||||
service DatasetService {
|
||||
// 知识库相关
|
||||
// Knowledge base related
|
||||
dataset.GetIconResponse GetIconForDataset(1:dataset.GetIconRequest req) (api.post='/api/knowledge/icon/get', api.category="knowledge",agw.preserve_base="true")
|
||||
dataset.CreateDatasetResponse CreateDataset(1:dataset.CreateDatasetRequest req) (api.post='/api/knowledge/create', api.category="knowledge",agw.preserve_base="true")
|
||||
dataset.DatasetDetailResponse DatasetDetail(1:dataset.DatasetDetailRequest req) (api.post='/api/knowledge/detail', api.category="knowledge",agw.preserve_base="true")
|
||||
dataset.ListDatasetResponse ListDataset(1:dataset.ListDatasetRequest req) (api.post='/api/knowledge/list', api.category="knowledge",agw.preserve_base="true")
|
||||
dataset.DeleteDatasetResponse DeleteDataset(1:dataset.DeleteDatasetRequest req) (api.post='/api/knowledge/delete', api.category="knowledge",agw.preserve_base="true")
|
||||
dataset.UpdateDatasetResponse UpdateDataset(1:dataset.UpdateDatasetRequest req) (api.post='/api/knowledge/update', api.category="knowledge",agw.preserve_base="true")
|
||||
// Document相关
|
||||
// Document related
|
||||
document.CreateDocumentResponse CreateDocument(1:document.CreateDocumentRequest req) (api.post='/api/knowledge/document/create', api.category="knowledge",agw.preserve_base="true")
|
||||
document.ListDocumentResponse ListDocument(1:document.ListDocumentRequest req) (api.post='/api/knowledge/document/list', api.category="knowledge",agw.preserve_base="true")
|
||||
document.DeleteDocumentResponse DeleteDocument(1:document.DeleteDocumentRequest req) (api.post='/api/knowledge/document/delete', api.category="knowledge",agw.preserve_base="true")
|
||||
@@ -28,12 +28,12 @@ service DatasetService {
|
||||
document.GetTableSchemaResponse GetTableSchema(1:document.GetTableSchemaRequest req) (api.post='/api/knowledge/table_schema/get', api.category="knowledge",agw.preserve_base="true")
|
||||
document.ValidateTableSchemaResponse ValidateTableSchema(1:document.ValidateTableSchemaRequest req) (api.post='/api/knowledge/table_schema/validate', api.category="knowledge",agw.preserve_base="true")
|
||||
|
||||
// slice相关
|
||||
// Slice related
|
||||
slice.DeleteSliceResponse DeleteSlice(1:slice.DeleteSliceRequest req) (api.post='/api/knowledge/slice/delete', api.category="knowledge",agw.preserve_base="true")
|
||||
slice.CreateSliceResponse CreateSlice(1:slice.CreateSliceRequest req) (api.post='/api/knowledge/slice/create', api.category="knowledge",agw.preserve_base="true")
|
||||
slice.UpdateSliceResponse UpdateSlice(1:slice.UpdateSliceRequest req) (api.post='/api/knowledge/slice/update', api.category="knowledge",agw.preserve_base="true")
|
||||
slice.ListSliceResponse ListSlice(1:slice.ListSliceRequest req) (api.post='/api/knowledge/slice/list', api.category="knowledge",agw.preserve_base="true")
|
||||
/** 预分片相关 **/
|
||||
/** Pre-sharding related **/
|
||||
review.CreateDocumentReviewResponse CreateDocumentReview(1:review.CreateDocumentReviewRequest req) (api.post='/api/knowledge/review/create', api.category="knowledge",agw.preserve_base="true")
|
||||
review.MGetDocumentReviewResponse MGetDocumentReview(1:review.MGetDocumentReviewRequest req) (api.post='/api/knowledge/review/mget', api.category="knowledge",agw.preserve_base="true")
|
||||
review.SaveDocumentReviewResponse SaveDocumentReview(1:review.SaveDocumentReviewRequest req) (api.post='/api/knowledge/review/save', api.category="knowledge",agw.preserve_base="true")
|
||||
|
||||
@@ -15,7 +15,7 @@ struct Review {
|
||||
2: string document_name
|
||||
3: string document_type
|
||||
4: string tos_url
|
||||
5: optional common.ReviewStatus status // 状态
|
||||
5: optional common.ReviewStatus status // status
|
||||
6: optional string doc_tree_tos_url
|
||||
7: optional string preview_tos_url
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ include "common.thrift"
|
||||
namespace go flow.dataengine.dataset
|
||||
|
||||
struct DeleteSliceRequest {
|
||||
4: optional list<string> slice_ids (api.body="slice_ids") // 要删除的分片ID列表
|
||||
4: optional list<string> slice_ids (api.body="slice_ids") // List of sharding IDs to delete
|
||||
255: optional base.Base Base
|
||||
}
|
||||
|
||||
@@ -15,14 +15,14 @@ struct DeleteSliceResponse {
|
||||
}
|
||||
|
||||
struct CreateSliceRequest {
|
||||
2: required i64 document_id(agw.js_conv="str", api.js_conv="true") // 新增分片插入的文档ID
|
||||
5: optional string raw_text // 新增分片的内容
|
||||
6: optional i64 sequence(agw.js_conv="str", api.js_conv="true") // 分片插入位置,1表示文档开头,最大值为最后一个分片位置+1
|
||||
2: required i64 document_id(agw.js_conv="str", api.js_conv="true") // Add sharding inserted document ID
|
||||
5: optional string raw_text // Add sharding content
|
||||
6: optional i64 sequence(agw.js_conv="str", api.js_conv="true") // Sharding insertion position, 1 indicates the beginning of the document, and the maximum value is the last sharding position + 1
|
||||
255: optional base.Base Base
|
||||
}
|
||||
|
||||
struct CreateSliceResponse {
|
||||
1: i64 slice_id (agw.js_conv="str", api.js_conv="true") // 新增分片ID
|
||||
1: i64 slice_id (agw.js_conv="str", api.js_conv="true") // Add sharding ID
|
||||
|
||||
253: required i64 code
|
||||
254: required string msg
|
||||
@@ -30,15 +30,15 @@ struct CreateSliceResponse {
|
||||
}
|
||||
|
||||
struct UpdateSliceRequest {
|
||||
2: required i64 slice_id (agw.js_conv="str", api.js_conv="true") // 要更新的分片ID
|
||||
7: optional string raw_text // 要更新的内容
|
||||
2: required i64 slice_id (agw.js_conv="str", api.js_conv="true") // The sharding ID to update
|
||||
7: optional string raw_text // Content to be updated
|
||||
255: optional base.Base Base
|
||||
}
|
||||
|
||||
enum SliceStatus {
|
||||
PendingVectoring = 0 // 未向量化
|
||||
FinishVectoring = 1 // 已向量化
|
||||
Deactive = 9 // 禁用
|
||||
PendingVectoring = 0 // unvectorized
|
||||
FinishVectoring = 1 // vectorized
|
||||
Deactive = 9 // disable
|
||||
}
|
||||
|
||||
struct UpdateSliceResponse {
|
||||
@@ -48,18 +48,18 @@ struct UpdateSliceResponse {
|
||||
}
|
||||
|
||||
struct ListSliceRequest {
|
||||
2: optional i64 document_id(agw.js_conv="str", api.js_conv="true") // 要list的分片所属的文档ID
|
||||
3: optional i64 sequence(agw.js_conv="str", api.js_conv="true") // 分片序号,表示从该序号的分片开始list
|
||||
4: optional string keyword // 查询关键字
|
||||
5: optional i64 dataset_id (agw.js_conv="str", api.js_conv="true") // 如果只传 dataset_id,则返回该知识库下的分片
|
||||
21: i64 page_size(agw.js_conv="str", api.js_conv="true") // 每页大小
|
||||
2: optional i64 document_id(agw.js_conv="str", api.js_conv="true") // The document ID of the sharding to list
|
||||
3: optional i64 sequence(agw.js_conv="str", api.js_conv="true") // Sharding serial number, indicating that the list starts from the sharding of this serial number
|
||||
4: optional string keyword // query keyword
|
||||
5: optional i64 dataset_id (agw.js_conv="str", api.js_conv="true") // If only dataset_id, return sharding under that knowledge base
|
||||
21: i64 page_size(agw.js_conv="str", api.js_conv="true") // page size
|
||||
255: optional base.Base Base
|
||||
}
|
||||
|
||||
struct ListSliceResponse {
|
||||
1: list<SliceInfo> slices // 返回的分片列表
|
||||
2: i64 total(agw.js_conv="str", api.js_conv="true") // 总分片数
|
||||
3: bool hasmore // 是否还有更多分片
|
||||
1: list<SliceInfo> slices // Returned list of shardings
|
||||
2: i64 total(agw.js_conv="str", api.js_conv="true") // Total shardings
|
||||
3: bool hasmore // Is there more sharding?
|
||||
|
||||
253: required i64 code
|
||||
254: required string msg
|
||||
@@ -67,12 +67,12 @@ struct ListSliceResponse {
|
||||
}
|
||||
|
||||
struct SliceInfo {
|
||||
1: i64 slice_id (agw.js_conv="str", api.js_conv="true") // 分片ID
|
||||
2: string content // 分片内容
|
||||
3: SliceStatus status // 分片状态
|
||||
4: i64 hit_count(agw.js_conv="str", api.js_conv="true") // 命中次数
|
||||
5: i64 char_count(agw.js_conv="str", api.js_conv="true") // 字符数
|
||||
7: i64 sequence(agw.js_conv="str", api.js_conv="true") // 序号
|
||||
8: i64 document_id(agw.js_conv="str", api.js_conv="true") // 分片所属的文档ID
|
||||
9: string chunk_info // 分片相关的元信息, 透传 slice 表里的 extra->chunk_info 字段 (json)
|
||||
1: i64 slice_id (agw.js_conv="str", api.js_conv="true") // Sharding ID
|
||||
2: string content // Sharding content
|
||||
3: SliceStatus status // Sharding state
|
||||
4: i64 hit_count(agw.js_conv="str", api.js_conv="true") // hit count
|
||||
5: i64 char_count(agw.js_conv="str", api.js_conv="true") // character count
|
||||
7: i64 sequence(agw.js_conv="str", api.js_conv="true") // serial number
|
||||
8: i64 document_id(agw.js_conv="str", api.js_conv="true") // The document ID to which sharding belongs
|
||||
9: string chunk_info // Meta information related to sharding, extra- > chunk_info field in the transparent slice table (json)
|
||||
}
|
||||
Reference in New Issue
Block a user