refactor: IDL structure modification (#475)

This commit is contained in:
liuyunchao-1998
2025-08-07 15:59:51 +08:00
committed by GitHub
parent b25bf6728a
commit e7d25319ff
221 changed files with 8289 additions and 17471 deletions

View File

@@ -0,0 +1,119 @@
namespace go data.knowledge
// type
enum FormatType {
Text = 0 // Text
Table = 1 // table
Image = 2 // image
Database = 3 // database
}
struct ChunkStrategy {
1: string separator // A separator, such as a period
2: i64 max_tokens // Maximum number of tokens for sharding
3: bool remove_extra_spaces // Replace consecutive spaces, newlines, and tabs
4: bool remove_urls_emails // Remove URL and email
5: ChunkType chunk_type // If 0, the configuration of the above fields is not used
7: optional CaptionType caption_type // Image type, image description text annotation method
8: optional i64 overlap; //segmented overlap
9: optional i64 max_level; //Maximum number of levels (effective when segmented by level)
10: optional bool save_title; //Slice preserves level headers (effective when segmented by level)
}
enum ChunkType{
DefaultChunk = 0
CustomChunk = 1
LevelChunk = 2
}
enum ContentSchema{
DefaultSchema = 0
LinkReaderSchema = 1
}
enum CaptionType {
Auto = 0 // intelligent annotation
Manual = 1 // manual annotation
}
enum DocumentStatus {
Processing = 0 // Uploading
Enable = 1 // take effect
Disable = 2 // failure
Deleted = 3 // delete
Resegment = 4 // In rescaling, the caller is not aware of the state
Refreshing = 5 // Refreshing (will be deleted after successful refresh)
Failed = 9 // fail
}
enum DocumentSource {
Document = 0 // local file upload
Custom = 2 // custom type
}
struct ParsingStrategy{
1: optional ParsingType parsing_type; //parse type
2: optional bool image_extraction; //Whether to enable image element extraction (effective when accurately parsing)
3: optional bool table_extraction; //Whether to enable table element extraction (effective when accurately parsing)
4: optional bool image_ocr; //Whether to turn on picture OCR (effective when accurate analysis)
}
enum ParsingType{
FastParsing = 0 //fast parse
AccurateParsing = 1 //accurate analysis
}
struct IndexStrategy{
1: optional bool vector_indexing; //Whether to enable vector indexing (default is true)
2: optional bool keyword_indexing; //Whether to enable keyword indexing (default is true)
3: optional bool hierarchical_indexing; //Whether to enable hierarchical indexing
4: optional string model; //vector model
}
struct FilterStrategy{
1: optional list<i32> filter_page; //filter pages
}
// sort field
enum OrderField {
CreateTime = 1
UpdateTime = 2
}
// OrderType
enum OrderType {
Desc = 1
Asc = 2
}
struct SinkStrategy {
1: bool check_index // Check whether the index was successful
}
enum ReviewStatus {
Processing = 0 // Processing
Enable = 1 // Completed.
Failed = 2 // fail
ForceStop = 3 // fail
}
// Table column information
struct DocTableColumn {
1: i64 id(agw.js_conv="str", api.js_conv="true", api.body="id"); // Column ID
2: string column_name; // column_name
3: bool is_semantic; // Is it a semantically matched column?
4: i64 sequence(agw.js_conv="str", api.js_conv="true", api.body="sequence"); // List the serial number originally in excel
5: optional ColumnType column_type; // column type
6: optional bool contains_empty_value
7: optional string desc; // describe
}
enum ColumnType {
Unknown = 0
Text = 1 // Text
Number = 2 // number
Date = 3 // time
Float = 4 // float
Boolean = 5 // bool
Image = 6 // picture
}

View File

@@ -0,0 +1,353 @@
include "../../base.thrift"
include "common.thrift"
namespace go data.knowledge
struct ListDocumentRequest {
1: required i64 dataset_id(agw.js_conv='str', api.js_conv='true')
2: optional list<string> document_ids (agw.js_conv='str')
3: optional i32 page
4: optional i32 size
5: optional string keyword // Search by name
255: optional base.Base Base
}
struct ListDocumentResponse {
1: list<DocumentInfo> document_infos
2: i32 total
253: required i64 code
254: required string msg
255: required base.BaseResp BaseResp
}
struct DocumentInfo {
1: string name
2: i64 document_id(agw.js_conv='str', api.js_conv='true')
3: optional string tos_uri // file link
5: i32 create_time // create_time
6: i32 update_time // update time
7: optional i64 creator_id (agw.js_conv="str", api.js_conv='true', api.body="creator_id") // creator_id
8: i32 slice_count // number of segments included
9: string type // File suffix csv, pdf, etc
10: i32 size // File size, number of bytes
11: i32 char_count // character count
12: common.DocumentStatus status // status
13: i32 hit_count // hit count
14: common.DocumentSource source_type // source
18: common.FormatType format_type // file type
19: optional list<TableColumn> table_meta // Table type metadata
20: optional string web_url // URL address
21: optional string status_descript // Details of the status; if the slice fails, return the failure information
24: optional i64 space_id(agw.js_conv="str", api.js_conv="true") // Space ID
// The following fields are only useful for the reconstructed table type and are used for front-end judgment
26: optional bool editable_append_content // Only for table types, are you allowed to add content and modify the table structure?
27: common.ChunkStrategy chunk_strategy // slicing rule
28: optional string imagex_uri // File links stored by ImageX
29: optional string doc_outline // Hierarchical Segmentation Document Tree Json (unused)
30: optional common.ParsingStrategy parsing_strategy // parsing strategy
32: optional common.FilterStrategy filter_strategy // filtering strategy
33: optional string doc_tree_tos_url // Hierarchical segmented document tree tos_url
34: optional string preview_tos_url // Preview the original document tos_url
35: optional i64 review_id // Preview the original document tos_url
}
struct TableColumn {
1: i64 id(agw.js_conv="str", api.js_conv="true", api.body="id") // Column ID
2: string column_name // column_name
3: bool is_semantic // Is it a semantically matched column?
4: i64 sequence(agw.js_conv="str", api.js_conv="true", api.body="sequence")// List the serial number originally in excel
5: optional common.ColumnType column_type // column type
6: optional bool contains_empty_value
7: optional string desc // describe
}
struct DeleteDocumentRequest {
2: list<string> document_ids // List of document IDs to delete
255: optional base.Base Base
}
struct DeleteDocumentResponse {
253: required i64 code
254: required string msg
255: required base.BaseResp BaseResp
}
struct UpdateDocumentRequest{
1: i64 document_id (agw.js_conv="str", api.js_conv="true")
// If you need to update, please upload it and update the name.
3: optional string document_name
// Update table structure
5: optional list<TableColumn> table_meta // Table metadata
255: optional base.Base Base
}
struct UpdateDocumentResponse {
253: required i64 code
254: required string msg
255: optional base.BaseResp BaseResp
}
struct UpdatePhotoCaptionRequest {
1: required i64 document_id(agw.js_conv='str', api.js_conv='true') // Document ID
2: required string caption // Picture description information to be updated
255: optional base.Base Base
}
struct UpdatePhotoCaptionResponse {
253: required i64 code
254: required string msg
255: required base.BaseResp BaseResp(api.none="true")
}
struct ListPhotoRequest {
1: required i64 dataset_id(agw.js_conv='str', api.js_conv='true') // Knowledge ID
2: optional i32 page // Number of pages, starting from 1
3: optional i32 size // page size
4: optional PhotoFilter filter
255: optional base.Base Base
}
struct PhotoFilter {
1: optional bool has_caption // True to filter "marked" images, false to filter "unmarked" images
2: optional string keyword // Search keywords, search for image names and picture descriptions
3: optional common.DocumentStatus status // status
}
struct ListPhotoResponse {
1: list<PhotoInfo> photo_infos
2: i32 total
253: required i64 code
254: required string msg
255: required base.BaseResp BaseResp(api.none="true")
}
struct PhotoInfo { // Picture Knowledge Base One picture corresponds to one document
1: string name // image name
2: i64 document_id(agw.js_conv='str', api.js_conv='true') // Document ID
3: string url // image link
4: string caption // picture description information
5: i32 create_time // create_time
6: i32 update_time // update time
7: i64 creator_id (agw.js_conv="str", api.js_conv='true', agw.key="creator_id", api.body="creator_id") // creator_id
8: string type // Image suffix jpg, png, etc
9: i32 size // image size
10: common.DocumentStatus status // status
11: common.DocumentSource source_type // source
}
struct PhotoDetailRequest {
1: required list<string> document_ids (agw.js_conv='str') // Document ID List
2: required i64 dataset_id(agw.js_conv='str', api.js_conv='true') // Knowledge ID
255: optional base.Base Base
}
struct PhotoDetailResponse {
1: map<string, PhotoInfo> photo_infos // Mapping document ID to image information
253: required i64 code
254: required string msg
255: required base.BaseResp BaseResp(api.none="true")
}
struct ResegmentRequest {
1: i64 dataset_id (agw.js_conv="str", api.js_conv="true") // Knowledge ID
2: list<string> document_ids // Document to be re-segmented
3: common.ChunkStrategy chunk_strategy // segmentation strategy
5: optional common.ParsingStrategy parsing_strategy // parsing strategy
7: optional common.FilterStrategy filter_strategy; // filtering strategy
255: optional base.Base Base
}
struct ResegmentResponse {
1: list<DocumentInfo> document_infos // The old version requires. Just return the id and name.
253: required i64 code
254: required string msg
255: optional base.BaseResp BaseResp
}
struct CreateDocumentRequest {
1: i64 dataset_id(agw.js_conv='str', api.js_conv='true') // The knowledge base id of the document to insert.
4: common.FormatType format_type // Types of knowledge bases, currently supporting text, tables, and images
// Only one table type can be created at a time
6: list<DocumentBase> document_bases // Document information to be created
17: optional common.ChunkStrategy chunk_strategy // Only when there is no document in the knowledge base, it needs to be passed, and if there is one, it will be obtained from the knowledge base. Slicing rules, if it is empty, it will be automatically sliced by paragraph
31: optional bool is_append // Appends content to an existing document when true. The text type cannot be used
32: optional common.ParsingStrategy parsing_strategy // parsing strategy
255: optional base.Base Base
}
struct CreateDocumentResponse {
2: list<DocumentInfo> document_infos
253: required i32 code
254: required string msg
255: required base.BaseResp BaseResp
}
// Basic information for creating a document
struct DocumentBase{
1: string name // Document name
2: SourceInfo source_info
// The following parameter table types need to be passed
4: optional list<TableColumn> table_meta // Table metadata
5: optional TableSheet table_sheet // Table parsing information
6: optional common.FilterStrategy filter_strategy // filtering strategy
7: optional string caption // Image type knowledge base, picture description when manually annotated
}
// Supports multiple data sources
struct SourceInfo {
1: optional string tos_uri (api.body="tos_uri"); // Upload the returned URI locally.
4: optional common.DocumentSource document_source (api.body="document_source");
// document_source custom raw content: Format required for a tabular knowledge base: json list < map < string, string > >
5: optional string custom_content (api.body="custom_content")
// document_source local: If you don't send the tos address, you need to send the file base64, type
7: optional string file_base64 // File string after base64
8: optional string file_type // File type, such as PDF
// imagex_uri, and tos_uri choose one, imagex_uri priority, need to get data and sign url through imagex method
10: optional string imagex_uri
}
struct TableSheet {
1: i64 sheet_id (agw.js_conv="str", agw.key="sheet_id", api.js_conv="true", api.body="sheet_id") , // User selected sheet id
2: i64 header_line_idx (agw.js_conv="str", agw.key="header_line_idx", api.js_conv="true", api.body="header_line_idx"), // The number of header rows selected by the user, numbered from 0
3: i64 start_line_idx (agw.js_conv="str", agw.key="start_line_idx", api.js_conv="true", api.body="start_line_idx") , // User-selected starting line number, numbered from 0
}
struct GetDocumentProgressRequest {
1: list<string> document_ids
255: optional base.Base Base
}
struct GetDocumentProgressResponse {
1: list<DocumentProgress> data
253: required i64 code
254: required string msg
255: optional base.BaseResp BaseResp
}
struct DocumentProgress {
1: i64 document_id(agw.js_conv="str", api.js_conv='true')
2: i32 progress // Knowledge Base Progress Percentage
3: common.DocumentStatus status
4: optional string status_descript // A detailed description of the status; if the slice fails, a failure message is returned
5: string document_name
6: optional i64 remaining_time // Remaining time in seconds
7: optional i64 size
8: optional string type
9: optional string url
}
// Get the table file meta information uploaded by the database
struct GetTableSchemaRequest {
1: optional TableSheet table_sheet; // Table parsing information, the default initial value is 0, 0, 1, which represents the first table, the first row of the table header, and the data row starts from the second row
2: optional TableDataType table_data_type; // All data is returned by default without passing it on.
3: optional i64 document_id(agw.js_conv="str", agw.key="document_id", api.js_conv="true", api.body="document_id"); // Compatible with pre-refactoring versions: pass this value if you need to pull the schema of the current document
4: optional SourceInfo source_file; // Source file information, add segment/before logic migrate here
5: optional list<TableColumn> origin_table_meta; // The table preview front end needs to pass the original data table structure
6: optional list<TableColumn> preview_table_meta; // The table preview front end needs to pass the data table structure edited by the user
255: optional base.Base Base
}
enum TableDataType {
AllData = 0 // Schema sheets and preview data
OnlySchema = 1 // Only need schema structure & Sheets
OnlyPreview = 2 // Just preview the data
}
struct DocTableSheet {
1: i64 id; // Number of sheet
2: string sheet_name; // Sheet name
3: i64 total_row; // total number of rows
}
struct GetTableSchemaResponse {
1: required i32 code
2: required string msg
3: list<DocTableSheet> sheet_list
4: list<TableColumn> table_meta // The schema of the selected sheet, not selected to return the first sheet by default
5: list<map<string,string>> preview_data(api.body="preview_data") // The knowledge table will return
255: optional base.BaseResp BaseResp(api.none="true")
}
// Determine whether the schema configured by the user is consistent with the corresponding document id
struct ValidateTableSchemaRequest {
1: i64 space_id (agw.js_conv="str", agw.key="space_id", api.js_conv="true", api.body="space_id") // Space ID
2: i64 document_id (agw.js_conv="str", agw.key="document_id", api.js_conv="true", api.body="document_id") // Document ID to verify
3: SourceInfo source_info (api.body="source_file") // Information from the source file
4: TableSheet table_sheet (api.body="table_sheet") // Table parsing information, the default initial value is 0, 0, 1, which represents the first table, the first row of the table header, and the data row starts from the second row
255: optional base.Base Base
}
struct ValidateTableSchemaResponse {
1: optional map<string,string> ColumnValidResult (api.body="column_valid_result");
// If it fails, an error code will be returned.
253: required i64 code
254: required string msg
255: optional base.BaseResp BaseResp(api.none="true")
}
struct ExtractPhotoCaptionRequest {
1: required i64 document_id (agw.js_conv="str", agw.key="document_id", api.js_conv="true", api.body="document_id")
255: optional base.Base Base
}
struct ExtractPhotoCaptionResponse {
1: string caption // picture description
253: required i64 code
254: required string msg
255: required base.BaseResp BaseResp(api.none="true")
}
struct GetDocumentTableInfoRequest {
2: optional string tos_uri; // If the table is uploaded for the first local file, pass the value
3: optional i64 document_id(agw.js_conv="str", api.js_conv="true", api.body="document_id"); // If it is a document with an existing table, pass the value
4: i64 creator_id; // Creator [http interface does not need to be passed]
255: optional base.Base Base
}
struct GetDocumentTableInfoResponse {
1: i32 code
2: string msg
3: list<DocTableSheet> sheet_list
4: map<string, list<common.DocTableColumn>> table_meta(api.body="table_meta") // key: sheet_id -> list<common.DocTableColumn>
5: map<string, list<map<string,string>>> preview_data(api.body="preview_data") // key: sheet_id -> list_preview_data
255: required base.BaseResp BaseResp(api.none="true")
}
struct GetTableSchemaInfoResponse {
1: i32 code
2: string msg
3: list<DocTableSheet> sheet_list
4: list<common.DocTableColumn> table_meta // The schema of the selected sheet, not selected to return the first sheet by default
5: list<map<i64,string>> preview_data(agw.js_conv="str", agw.key="preview_data") // The knowledge table will return
255: optional base.BaseResp BaseResp(api.none="true")
}

View File

@@ -0,0 +1,185 @@
include "../../base.thrift"
include "common.thrift"
namespace go data.knowledge
struct CreateDatasetRequest {
1: string name // Knowledge base name, no more than 100 characters in length
2: string description // Knowledge Base Description
3: i64 space_id (agw.js_conv="str", api.js_conv="true") // Space ID
4: string icon_uri // Knowledge Base Avatar URI
5: common.FormatType format_type
6: i64 biz_id (agw.js_conv="str", api.js_conv="true") // Open to third-party business identity, coze pass 0 or no pass
7: i64 project_id (agw.js_conv="str", api.js_conv="true") //project ID
255: optional base.Base Base
}
struct CreateDatasetResponse {
1: i64 dataset_id (agw.js_conv="str", api.js_conv="true")
253: required i64 code
254: required string msg
255: optional base.BaseResp BaseResp
}
struct DatasetDetailRequest {
1: list<string> DatasetIDs (agw.js_conv="str", api.body="dataset_ids")
3: i64 project_id (agw.js_conv="str", api.js_conv="true") // project ID
2: i64 space_id (agw.js_conv="str", api.js_conv="true")
255: optional base.Base Base
}
struct DatasetDetailResponse {
1: map<string, Dataset> dataset_details (agw.js_conv="str")
253: required i64 code
254: required string msg
255: optional base.BaseResp BaseResp
}
enum DatasetStatus {
DatasetProcessing = 0
DatasetReady = 1
DatasetDeleted = 2 // soft delete
DatasetForbid = 3 // Do not enable
DatasetFailed = 9
}
struct Dataset {
1: i64 dataset_id(agw.js_conv="str", api.js_conv="true")
2: string name // Dataset name
3: list<string> file_list // file list
4: i64 all_file_size (agw.js_conv="str", api.js_conv="true") // All file sizes
5: i32 bot_used_count // Bot count
6: DatasetStatus status
7: list<string> processing_file_list // List of file names in process, compatible with old logic
8: i32 update_time // Update time, second timestamp
9: string icon_url
10: string description
11: string icon_uri
12: bool can_edit // Can it be edited?
13: i32 create_time // create_time, second timestamp
14: i64 creator_id (agw.js_conv="str", api.js_conv="true") // creator ID
15: i64 space_id (agw.js_conv="str", api.js_conv="true") // Space ID
18: list<string> failed_file_list (agw.js_conv="str") // Processing failed files
19: common.FormatType format_type
20: i32 slice_count // number of segments
21: i32 hit_count // hit count
22: i32 doc_count // number of documents
23: common.ChunkStrategy chunk_strategy // slicing rule
24: list<string> processing_file_id_list // List of file IDs in process
25: string project_id //project ID
}
struct ListDatasetRequest {
1: optional DatasetFilter filter
3: optional i32 page
4: optional i32 size
5: i64 space_id (agw.js_conv="str", api.js_conv="true")
6: optional common.OrderField order_field // sort field
7: optional common.OrderType order_type // order_type
8: optional string space_auth // If the specified value is passed, the verification is released
9: optional i64 biz_id (agw.js_conv="str", api.js_conv="true") // Business identity open to third parties
10: optional bool need_ref_bots // Whether the number of reference bots needs to be pulled will increase the response delay
11: optional string project_id //project ID
255: optional base.Base Base
}
struct ListDatasetResponse {
1: list<Dataset> dataset_list
2: i32 total
253: required i64 code
254: required string msg
255: required base.BaseResp BaseResp
}
struct DatasetFilter {
// The following conditions are related to and
1: optional string name // Keyword search, fuzzy match by name
2: optional list<string> dataset_ids (agw.js_conv="str") // Knowledge id list
3: optional DatasetSource source_type // source
4: optional DatasetScopeType scope_type // search type
5: optional common.FormatType format_type // type
}
enum DatasetScopeType {
ScopeAll = 1
ScopeSelf = 2
}
enum DatasetSource{
SourceSelf = 1
SourceExplore = 2
}
struct DeleteDatasetRequest {
1: i64 dataset_id (agw.js_conv="str", api.js_conv="true")
255: optional base.Base Base
}
struct DeleteDatasetResponse {
253: required i64 code
254: required string msg
255: optional base.BaseResp BaseResp
}
struct UpdateDatasetRequest {
1: i64 dataset_id (agw.js_conv="str", api.js_conv="true") // Knowledge ID
2: string name // Knowledge base name, cannot be empty
3: string icon_uri // Knowledge base icon
4: string description // Knowledge Base Description
5: optional DatasetStatus status
255: optional base.Base Base;
}
struct UpdateDatasetResponse {
253: required i64 code
254: required string msg
255: optional base.BaseResp BaseResp
}
struct GetIconRequest {
1: common.FormatType format_type
}
struct Icon {
1: string url
2: string uri
}
struct GetIconResponse {
1: Icon icon
253: required i64 code
254: required string msg
255: optional base.BaseResp BaseResp
}
struct GetModeConfigRequest {
1: required i64 bot_id // bot id
2: optional i64 connector_id // line of business id
3: optional string connector_uid // line of business user id
255: optional base.Base Base
}
struct GetModeConfigResponse {
1: i32 code
2: string msg
3: string mode
4: i64 bot_id
5: i64 max_table_num
6: i64 max_column_num
7: i64 max_capacity_kb
8: i64 max_row_num
255: optional base.BaseResp BaseResp
}

View File

@@ -0,0 +1,41 @@
include "slice.thrift"
include "knowledge.thrift"
include "document.thrift"
include "common.thrift"
include "review.thrift"
namespace go data.knowledge
service DatasetService {
// Knowledge base related
knowledge.GetIconResponse GetIconForDataset(1:knowledge.GetIconRequest req) (api.post='/api/knowledge/icon/get', api.category="knowledge",agw.preserve_base="true")
knowledge.CreateDatasetResponse CreateDataset(1:knowledge.CreateDatasetRequest req) (api.post='/api/knowledge/create', api.category="knowledge",agw.preserve_base="true")
knowledge.DatasetDetailResponse DatasetDetail(1:knowledge.DatasetDetailRequest req) (api.post='/api/knowledge/detail', api.category="knowledge",agw.preserve_base="true")
knowledge.ListDatasetResponse ListDataset(1:knowledge.ListDatasetRequest req) (api.post='/api/knowledge/list', api.category="knowledge",agw.preserve_base="true")
knowledge.DeleteDatasetResponse DeleteDataset(1:knowledge.DeleteDatasetRequest req) (api.post='/api/knowledge/delete', api.category="knowledge",agw.preserve_base="true")
knowledge.UpdateDatasetResponse UpdateDataset(1:knowledge.UpdateDatasetRequest req) (api.post='/api/knowledge/update', api.category="knowledge",agw.preserve_base="true")
knowledge.GetModeConfigResponse GetModeConfig(1:knowledge.GetModeConfigRequest req)(api.get='/api/memory/table_mode_config', api.category="memory", agw.preserve_base="true")
// Document related
document.CreateDocumentResponse CreateDocument(1:document.CreateDocumentRequest req) (api.post='/api/knowledge/document/create', api.category="knowledge",agw.preserve_base="true")
document.ListDocumentResponse ListDocument(1:document.ListDocumentRequest req) (api.post='/api/knowledge/document/list', api.category="knowledge",agw.preserve_base="true")
document.DeleteDocumentResponse DeleteDocument(1:document.DeleteDocumentRequest req) (api.post='/api/knowledge/document/delete', api.category="knowledge",agw.preserve_base="true")
document.UpdateDocumentResponse UpdateDocument(1:document.UpdateDocumentRequest req) (api.post='/api/knowledge/document/update', api.category="knowledge",agw.preserve_base="true")
document.GetDocumentProgressResponse GetDocumentProgress(1:document.GetDocumentProgressRequest req) (api.post='/api/knowledge/document/progress/get', api.category="knowledge",agw.preserve_base="true")
document.ResegmentResponse Resegment(1:document.ResegmentRequest req) (api.post='/api/knowledge/document/resegment', api.category="knowledge",agw.preserve_base="true")
document.UpdatePhotoCaptionResponse UpdatePhotoCaption(1:document.UpdatePhotoCaptionRequest req) (api.post='/api/knowledge/photo/caption', api.category="knowledge",agw.preserve_base="true")
document.ListPhotoResponse ListPhoto(1:document.ListPhotoRequest req) (api.post='/api/knowledge/photo/list', api.category="knowledge",agw.preserve_base="true")
document.PhotoDetailResponse PhotoDetail(1:document.PhotoDetailRequest req) (api.post='/api/knowledge/photo/detail', api.category="knowledge",agw.preserve_base="true")
document.ExtractPhotoCaptionResponse ExtractPhotoCaption(1:document.ExtractPhotoCaptionRequest req) (api.post='/api/knowledge/photo/extract_caption', api.category="knowledge",agw.preserve_base="true")
document.GetTableSchemaResponse GetTableSchema(1:document.GetTableSchemaRequest req) (api.post='/api/knowledge/table_schema/get', api.category="knowledge",agw.preserve_base="true")
document.ValidateTableSchemaResponse ValidateTableSchema(1:document.ValidateTableSchemaRequest req) (api.post='/api/knowledge/table_schema/validate', api.category="knowledge",agw.preserve_base="true")
document.GetDocumentTableInfoResponse GetDocumentTableInfo(1:document.GetDocumentTableInfoRequest req) (api.get='/api/memory/doc_table_info', api.category="memory", agw.preserve_base="true")
// Slice related
slice.DeleteSliceResponse DeleteSlice(1:slice.DeleteSliceRequest req) (api.post='/api/knowledge/slice/delete', api.category="knowledge",agw.preserve_base="true")
slice.CreateSliceResponse CreateSlice(1:slice.CreateSliceRequest req) (api.post='/api/knowledge/slice/create', api.category="knowledge",agw.preserve_base="true")
slice.UpdateSliceResponse UpdateSlice(1:slice.UpdateSliceRequest req) (api.post='/api/knowledge/slice/update', api.category="knowledge",agw.preserve_base="true")
slice.ListSliceResponse ListSlice(1:slice.ListSliceRequest req) (api.post='/api/knowledge/slice/list', api.category="knowledge",agw.preserve_base="true")
/** Pre-sharding related **/
review.CreateDocumentReviewResponse CreateDocumentReview(1:review.CreateDocumentReviewRequest req) (api.post='/api/knowledge/review/create', api.category="knowledge",agw.preserve_base="true")
review.MGetDocumentReviewResponse MGetDocumentReview(1:review.MGetDocumentReviewRequest req) (api.post='/api/knowledge/review/mget', api.category="knowledge",agw.preserve_base="true")
review.SaveDocumentReviewResponse SaveDocumentReview(1:review.SaveDocumentReviewRequest req) (api.post='/api/knowledge/review/save', api.category="knowledge",agw.preserve_base="true")
}

View File

@@ -0,0 +1,69 @@
include "../../base.thrift"
include "common.thrift"
namespace go data.knowledge
struct ReviewInput {
1: string document_name
2: string document_type
3: string tos_uri
4: optional i64 document_id (agw.js_conv="str",api.js_conv="true")
}
struct Review {
1: optional i64 review_id (agw.js_conv="str",api.js_conv="true")
2: string document_name
3: string document_type
4: string tos_url
5: optional common.ReviewStatus status // status
6: optional string doc_tree_tos_url
7: optional string preview_tos_url
}
struct CreateDocumentReviewRequest {
1: i64 dataset_id (agw.js_conv="str",api.js_conv="true")
2: list<ReviewInput> reviews
3: optional common.ChunkStrategy chunk_strategy
4: optional common.ParsingStrategy parsing_strategy
255: optional base.Base Base
}
struct CreateDocumentReviewResponse {
1: i64 dataset_id (agw.js_conv="str",api.js_conv="true")
2: list<Review> reviews
253: required i64 code
254: required string msg
255: required base.BaseResp BaseResp
}
struct MGetDocumentReviewRequest {
1: i64 dataset_id (agw.js_conv="str",api.js_conv="true")
2: list<string> review_ids (agw.js_conv="str")
255: optional base.Base Base
}
struct MGetDocumentReviewResponse {
1: i64 dataset_id (agw.js_conv="str",api.js_conv="true")
2: list<Review> reviews
253: required i64 code
254: required string msg
255: required base.BaseResp BaseResp
}
struct SaveDocumentReviewRequest {
1: i64 dataset_id (agw.js_conv="str",api.js_conv="true")
2: i64 review_id (agw.js_conv="str",api.js_conv="true")
3: string doc_tree_json
255: optional base.Base Base
}
struct SaveDocumentReviewResponse {
253: required i64 code
254: required string msg
255: required base.BaseResp BaseResp
}

View File

@@ -0,0 +1,78 @@
include "../../base.thrift"
include "common.thrift"
namespace go data.knowledge
struct DeleteSliceRequest {
4: optional list<string> slice_ids (api.body="slice_ids") // List of sharding IDs to delete
255: optional base.Base Base
}
struct DeleteSliceResponse {
253: required i64 code
254: required string msg
255: optional base.BaseResp BaseResp(api.none="true")
}
struct CreateSliceRequest {
2: required i64 document_id(agw.js_conv="str", api.js_conv="true") // Add sharding inserted document ID
5: optional string raw_text // Add sharding content
6: optional i64 sequence(agw.js_conv="str", api.js_conv="true") // Sharding insertion position, 1 indicates the beginning of the document, and the maximum value is the last sharding position + 1
255: optional base.Base Base
}
struct CreateSliceResponse {
1: i64 slice_id (agw.js_conv="str", api.js_conv="true") // Add sharding ID
253: required i64 code
254: required string msg
255: optional base.BaseResp BaseResp
}
struct UpdateSliceRequest {
2: required i64 slice_id (agw.js_conv="str", api.js_conv="true") // The sharding ID to update
7: optional string raw_text // Content to be updated
255: optional base.Base Base
}
enum SliceStatus {
PendingVectoring = 0 // unvectorized
FinishVectoring = 1 // vectorized
Deactive = 9 // disable
}
struct UpdateSliceResponse {
253: required i64 code
254: required string msg
255: optional base.BaseResp BaseResp
}
struct ListSliceRequest {
2: optional i64 document_id(agw.js_conv="str", api.js_conv="true") // The document ID of the sharding to list
3: optional i64 sequence(agw.js_conv="str", api.js_conv="true") // Sharding serial number, indicating that the list starts from the sharding of this serial number
4: optional string keyword // query keyword
5: optional i64 dataset_id (agw.js_conv="str", api.js_conv="true") // If only dataset_id, return sharding under that knowledge base
21: i64 page_size(agw.js_conv="str", api.js_conv="true") // page size
255: optional base.Base Base
}
struct ListSliceResponse {
1: list<SliceInfo> slices // Returned list of shardings
2: i64 total(agw.js_conv="str", api.js_conv="true") // Total shardings
3: bool hasmore // Is there more sharding?
253: required i64 code
254: required string msg
255: optional base.BaseResp BaseResp
}
struct SliceInfo {
1: i64 slice_id (agw.js_conv="str", api.js_conv="true") // Sharding ID
2: string content // Sharding content
3: SliceStatus status // Sharding state
4: i64 hit_count(agw.js_conv="str", api.js_conv="true") // hit count
5: i64 char_count(agw.js_conv="str", api.js_conv="true") // character count
7: i64 sequence(agw.js_conv="str", api.js_conv="true") // serial number
8: i64 document_id(agw.js_conv="str", api.js_conv="true") // The document ID to which sharding belongs
9: string chunk_info // Meta information related to sharding, extra- > chunk_info field in the transparent slice table (json)
}