feat: manually mirror opencoze's code from bytedance
Change-Id: I09a73aadda978ad9511264a756b2ce51f5761adf
This commit is contained in:
126
backend/infra/contract/document/extra.go
Normal file
126
backend/infra/contract/document/extra.go
Normal file
@@ -0,0 +1,126 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package document
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/cloudwego/eino/schema"
|
||||
)
|
||||
|
||||
const (
|
||||
MetaDataKeyColumns = "table_columns" // val: []*Column
|
||||
MetaDataKeyColumnData = "table_column_data" // val: []*ColumnData
|
||||
MetaDataKeyColumnsOnly = "table_columns_only" // val: struct{}, which means table has no data, only header.
|
||||
|
||||
MetaDataKeyCreatorID = "creator_id" // val: int64
|
||||
MetaDataKeyExternalStorage = "external_storage" // val: map[string]any
|
||||
)
|
||||
|
||||
func GetDocumentColumns(doc *schema.Document) ([]*Column, error) {
|
||||
if doc == nil || doc.MetaData == nil {
|
||||
return nil, fmt.Errorf("invalid document")
|
||||
}
|
||||
|
||||
columns, ok := doc.MetaData[MetaDataKeyColumns].([]*Column)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid document columns")
|
||||
}
|
||||
|
||||
return columns, nil
|
||||
}
|
||||
|
||||
func WithDocumentColumns(doc *schema.Document, columns []*Column) *schema.Document {
|
||||
doc.MetaData[MetaDataKeyColumns] = columns
|
||||
return doc
|
||||
}
|
||||
|
||||
func GetDocumentColumnData(doc *schema.Document) ([]*ColumnData, error) {
|
||||
if doc == nil || doc.MetaData == nil {
|
||||
return nil, fmt.Errorf("invalid document")
|
||||
}
|
||||
|
||||
data, ok := doc.MetaData[MetaDataKeyColumnData].([]*ColumnData)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid document column data")
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func WithDocumentColumnData(doc *schema.Document, data []*ColumnData) *schema.Document {
|
||||
doc.MetaData[MetaDataKeyColumnData] = data
|
||||
return doc
|
||||
}
|
||||
|
||||
func WithDocumentColumnsOnly(doc *schema.Document) *schema.Document {
|
||||
doc.MetaData[MetaDataKeyColumnsOnly] = struct{}{}
|
||||
return doc
|
||||
}
|
||||
|
||||
func GetDocumentColumnsOnly(doc *schema.Document) (bool, error) {
|
||||
if doc == nil || doc.MetaData == nil {
|
||||
return false, fmt.Errorf("invalid document")
|
||||
}
|
||||
|
||||
_, ok := doc.MetaData[MetaDataKeyColumnsOnly].(struct{})
|
||||
return ok, nil
|
||||
}
|
||||
|
||||
func GetDocumentsColumnsOnly(docs []*schema.Document) (bool, error) {
|
||||
if len(docs) != 1 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return GetDocumentColumnsOnly(docs[0])
|
||||
}
|
||||
|
||||
func GetDocumentCreatorID(doc *schema.Document) (int64, error) {
|
||||
if doc == nil || doc.MetaData == nil {
|
||||
return 0, fmt.Errorf("invalid document")
|
||||
}
|
||||
|
||||
creatorID, ok := doc.MetaData[MetaDataKeyCreatorID].(int64)
|
||||
if !ok {
|
||||
return 0, fmt.Errorf("invalid document creator id")
|
||||
}
|
||||
|
||||
return creatorID, nil
|
||||
}
|
||||
|
||||
func WithDocumentCreatorID(doc *schema.Document, creatorID int64) *schema.Document {
|
||||
doc.MetaData[MetaDataKeyCreatorID] = creatorID
|
||||
return doc
|
||||
}
|
||||
|
||||
func GetDocumentExternalStorage(doc *schema.Document) (map[string]any, error) {
|
||||
if doc == nil || doc.MetaData == nil {
|
||||
return nil, fmt.Errorf("invalid document")
|
||||
}
|
||||
|
||||
data, ok := doc.MetaData[MetaDataKeyExternalStorage].(map[string]any)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid document external storage")
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func WithDocumentExternalStorage(doc *schema.Document, externalStorage map[string]any) *schema.Document {
|
||||
doc.MetaData[MetaDataKeyExternalStorage] = externalStorage
|
||||
return doc
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package imageunderstand
|
||||
|
||||
import "context"
|
||||
|
||||
type ImageUnderstand interface {
|
||||
ImageUnderstand(ctx context.Context, image []byte) (content string, err error)
|
||||
}
|
||||
29
backend/infra/contract/document/nl2sql/nl2sql.go
Normal file
29
backend/infra/contract/document/nl2sql/nl2sql.go
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nl2sql
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/cloudwego/eino/schema"
|
||||
|
||||
"github.com/coze-dev/coze-studio/backend/infra/contract/document"
|
||||
)
|
||||
|
||||
type NL2SQL interface {
|
||||
NL2SQL(ctx context.Context, messages []*schema.Message, tables []*document.TableSchema, opts ...Option) (sql string, err error)
|
||||
}
|
||||
31
backend/infra/contract/document/nl2sql/options.go
Normal file
31
backend/infra/contract/document/nl2sql/options.go
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nl2sql
|
||||
|
||||
import "github.com/coze-dev/coze-studio/backend/infra/contract/chatmodel"
|
||||
|
||||
type Option func(o *Options)
|
||||
|
||||
type Options struct {
|
||||
ChatModel chatmodel.BaseChatModel
|
||||
}
|
||||
|
||||
func WithChatModel(cm chatmodel.BaseChatModel) Option {
|
||||
return func(o *Options) {
|
||||
o.ChatModel = cm
|
||||
}
|
||||
}
|
||||
24
backend/infra/contract/document/ocr/ocr.go
Normal file
24
backend/infra/contract/document/ocr/ocr.go
Normal file
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package ocr
|
||||
|
||||
import "context"
|
||||
|
||||
type OCR interface {
|
||||
FromBase64(ctx context.Context, b64 string) (texts []string, err error)
|
||||
FromURL(ctx context.Context, url string) (texts []string, err error)
|
||||
}
|
||||
128
backend/infra/contract/document/parser/manager.go
Normal file
128
backend/infra/contract/document/parser/manager.go
Normal file
@@ -0,0 +1,128 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package parser
|
||||
|
||||
import (
|
||||
"github.com/coze-dev/coze-studio/backend/infra/contract/document"
|
||||
"github.com/coze-dev/coze-studio/backend/pkg/lang/sets"
|
||||
)
|
||||
|
||||
type Manager interface {
|
||||
GetParser(config *Config) (Parser, error)
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
FileExtension FileExtension
|
||||
ParsingStrategy *ParsingStrategy
|
||||
ChunkingStrategy *ChunkingStrategy
|
||||
}
|
||||
|
||||
// ParsingStrategy for document parse before indexing
|
||||
type ParsingStrategy struct {
|
||||
// Doc
|
||||
ExtractImage bool `json:"extract_image"` // 提取图片元素
|
||||
ExtractTable bool `json:"extract_table"` // 提取表格元素
|
||||
ImageOCR bool `json:"image_ocr"` // 图片 ocr
|
||||
FilterPages []int `json:"filter_pages"` // 页过滤, 第一页=1
|
||||
|
||||
// Sheet
|
||||
SheetID *int `json:"sheet_id"` // xlsx sheet id
|
||||
HeaderLine int `json:"header_line"` // 表头行
|
||||
DataStartLine int `json:"data_start_line"` // 数据起始行
|
||||
RowsCount int `json:"rows_count"` // 读取数据行数
|
||||
IsAppend bool `json:"-"` // 行插入
|
||||
Columns []*document.Column `json:"-"` // sheet 对齐表头
|
||||
IgnoreColumnTypeErr bool `json:"-"` // true 时忽略 column type 与 value 未对齐的问题,此时 value 为空
|
||||
|
||||
// Image
|
||||
ImageAnnotationType ImageAnnotationType `json:"image_annotation_type"` // 图片内容标注类型
|
||||
}
|
||||
|
||||
type ChunkingStrategy struct {
|
||||
ChunkType ChunkType `json:"chunk_type"`
|
||||
|
||||
// custom config
|
||||
ChunkSize int64 `json:"chunk_size"` // 分段最大长度
|
||||
Separator string `json:"separator"` // 分段标识符
|
||||
Overlap int64 `json:"overlap"` // 分段重叠比例
|
||||
TrimSpace bool `json:"trim_space"`
|
||||
TrimURLAndEmail bool `json:"trim_url_and_email"`
|
||||
|
||||
// leveled config
|
||||
MaxDepth int64 `json:"max_depth"` // 按层级分段时的最大层级
|
||||
SaveTitle bool `json:"save_title"` // 保留层级标题
|
||||
}
|
||||
|
||||
type ChunkType int64
|
||||
|
||||
const (
|
||||
ChunkTypeDefault ChunkType = 0 // 自动分片
|
||||
ChunkTypeCustom ChunkType = 1 // 自定义规则分片
|
||||
ChunkTypeLeveled ChunkType = 2 // 层级分片
|
||||
)
|
||||
|
||||
type ImageAnnotationType int64
|
||||
|
||||
const (
|
||||
ImageAnnotationTypeModel ImageAnnotationType = 0 // 模型自动标注
|
||||
ImageAnnotationTypeManual ImageAnnotationType = 1 // 人工标注
|
||||
)
|
||||
|
||||
type FileExtension string
|
||||
|
||||
const (
|
||||
// document
|
||||
FileExtensionPDF FileExtension = "pdf"
|
||||
FileExtensionTXT FileExtension = "txt"
|
||||
FileExtensionDoc FileExtension = "doc"
|
||||
FileExtensionDocx FileExtension = "docx"
|
||||
FileExtensionMarkdown FileExtension = "md"
|
||||
|
||||
// sheet
|
||||
FileExtensionCSV FileExtension = "csv"
|
||||
FileExtensionXLSX FileExtension = "xlsx"
|
||||
FileExtensionJSON FileExtension = "json"
|
||||
FileExtensionJsonMaps FileExtension = "json_maps" // json of []map[string]string
|
||||
|
||||
// image
|
||||
FileExtensionJPG FileExtension = "jpg"
|
||||
FileExtensionJPEG FileExtension = "jpeg"
|
||||
FileExtensionPNG FileExtension = "png"
|
||||
)
|
||||
|
||||
func ValidateFileExtension(fileSuffix string) (ext FileExtension, support bool) {
|
||||
fileExtension := FileExtension(fileSuffix)
|
||||
_, ok := fileExtensionSet[fileExtension]
|
||||
if !ok {
|
||||
return "", false
|
||||
}
|
||||
return fileExtension, true
|
||||
}
|
||||
|
||||
var fileExtensionSet = sets.Set[FileExtension]{
|
||||
FileExtensionPDF: {},
|
||||
FileExtensionTXT: {},
|
||||
FileExtensionDoc: {},
|
||||
FileExtensionDocx: {},
|
||||
FileExtensionMarkdown: {},
|
||||
FileExtensionCSV: {},
|
||||
FileExtensionJSON: {},
|
||||
FileExtensionJsonMaps: {},
|
||||
FileExtensionJPG: {},
|
||||
FileExtensionJPEG: {},
|
||||
FileExtensionPNG: {},
|
||||
}
|
||||
21
backend/infra/contract/document/parser/parser.go
Normal file
21
backend/infra/contract/document/parser/parser.go
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package parser
|
||||
|
||||
import "github.com/cloudwego/eino/components/document/parser"
|
||||
|
||||
type Parser = parser.Parser
|
||||
26
backend/infra/contract/document/progressbar/interface.go
Normal file
26
backend/infra/contract/document/progressbar/interface.go
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package progressbar
|
||||
|
||||
import "context"
|
||||
|
||||
// ProgressBar is the interface for the progress bar.
|
||||
type ProgressBar interface {
|
||||
AddN(n int) error
|
||||
ReportError(err error) error
|
||||
GetProgress(ctx context.Context) (percent int, remainSec int, errMsg string)
|
||||
}
|
||||
43
backend/infra/contract/document/rerank/rerank.go
Normal file
43
backend/infra/contract/document/rerank/rerank.go
Normal file
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package rerank
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/cloudwego/eino/schema"
|
||||
)
|
||||
|
||||
type Reranker interface {
|
||||
Rerank(ctx context.Context, req *Request) (*Response, error)
|
||||
}
|
||||
|
||||
type Request struct {
|
||||
Query string
|
||||
Data [][]*Data
|
||||
TopN *int64
|
||||
}
|
||||
|
||||
type Response struct {
|
||||
SortedData []*Data // 高分在前
|
||||
TokenUsage *int64
|
||||
}
|
||||
|
||||
type Data struct {
|
||||
Document *schema.Document
|
||||
Score float64
|
||||
}
|
||||
54
backend/infra/contract/document/searchstore/dsl.go
Normal file
54
backend/infra/contract/document/searchstore/dsl.go
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package searchstore
|
||||
|
||||
import "fmt"
|
||||
|
||||
type DSL struct {
|
||||
Op Op
|
||||
Field string
|
||||
Value interface{} // builtin types / []*DSL
|
||||
}
|
||||
|
||||
type Op string
|
||||
|
||||
const (
|
||||
OpEq Op = "eq"
|
||||
OpNe Op = "ne"
|
||||
OpLike Op = "like"
|
||||
OpIn Op = "in"
|
||||
|
||||
OpAnd Op = "and"
|
||||
OpOr Op = "or"
|
||||
)
|
||||
|
||||
func (d *DSL) DSL() map[string]any {
|
||||
return map[string]any{"dsl": d}
|
||||
}
|
||||
|
||||
func LoadDSL(src map[string]any) (*DSL, error) {
|
||||
if src == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
dsl, ok := src["dsl"].(*DSL)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("load dsl failed")
|
||||
}
|
||||
|
||||
return dsl, nil
|
||||
}
|
||||
82
backend/infra/contract/document/searchstore/manager.go
Normal file
82
backend/infra/contract/document/searchstore/manager.go
Normal file
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package searchstore
|
||||
|
||||
import (
|
||||
"context"
|
||||
)
|
||||
|
||||
type Manager interface {
|
||||
Create(ctx context.Context, req *CreateRequest) error
|
||||
|
||||
Drop(ctx context.Context, req *DropRequest) error
|
||||
|
||||
GetType() SearchStoreType
|
||||
|
||||
GetSearchStore(ctx context.Context, collectionName string) (SearchStore, error)
|
||||
}
|
||||
|
||||
type CreateRequest struct {
|
||||
CollectionName string
|
||||
Fields []*Field
|
||||
CollectionMeta map[string]string
|
||||
}
|
||||
|
||||
type DropRequest struct {
|
||||
CollectionName string
|
||||
}
|
||||
|
||||
type GetSearchStoreRequest struct {
|
||||
CollectionName string
|
||||
}
|
||||
|
||||
type Field struct {
|
||||
Name FieldName
|
||||
Type FieldType
|
||||
Description string
|
||||
|
||||
Nullable bool
|
||||
IsPrimary bool
|
||||
|
||||
Indexing bool
|
||||
}
|
||||
|
||||
type SearchStoreType string
|
||||
|
||||
const (
|
||||
TypeVectorStore SearchStoreType = "vector"
|
||||
TypeTextStore SearchStoreType = "text"
|
||||
)
|
||||
|
||||
type FieldName = string
|
||||
|
||||
// 内置 field name
|
||||
const (
|
||||
FieldID FieldName = "id" // int64
|
||||
FieldCreatorID FieldName = "creator_id" // int64
|
||||
FieldTextContent FieldName = "text_content" // string
|
||||
)
|
||||
|
||||
type FieldType int64
|
||||
|
||||
const (
|
||||
FieldTypeUnknown FieldType = 0
|
||||
FieldTypeInt64 FieldType = 1
|
||||
FieldTypeText FieldType = 2
|
||||
FieldTypeDenseVector FieldType = 3
|
||||
FieldTypeSparseVector FieldType = 4
|
||||
)
|
||||
87
backend/infra/contract/document/searchstore/options.go
Normal file
87
backend/infra/contract/document/searchstore/options.go
Normal file
@@ -0,0 +1,87 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package searchstore
|
||||
|
||||
import (
|
||||
"github.com/cloudwego/eino/components/indexer"
|
||||
"github.com/cloudwego/eino/components/retriever"
|
||||
|
||||
"github.com/coze-dev/coze-studio/backend/infra/contract/document/progressbar"
|
||||
)
|
||||
|
||||
type IndexerOptions struct {
|
||||
PartitionKey *string
|
||||
Partition *string // 存储分片映射
|
||||
IndexingFields []string
|
||||
ProgressBar progressbar.ProgressBar
|
||||
}
|
||||
|
||||
type RetrieverOptions struct {
|
||||
MultiMatch *MultiMatch // 多 field 查询
|
||||
PartitionKey *string
|
||||
Partitions []string // 查询分片映射
|
||||
}
|
||||
|
||||
type MultiMatch struct {
|
||||
Fields []string
|
||||
Query string
|
||||
}
|
||||
|
||||
func WithIndexerPartitionKey(key string) indexer.Option {
|
||||
return indexer.WrapImplSpecificOptFn(func(o *IndexerOptions) {
|
||||
o.PartitionKey = &key
|
||||
})
|
||||
}
|
||||
|
||||
func WithPartition(partition string) indexer.Option {
|
||||
return indexer.WrapImplSpecificOptFn(func(o *IndexerOptions) {
|
||||
o.Partition = &partition
|
||||
})
|
||||
}
|
||||
|
||||
func WithIndexingFields(fields []string) indexer.Option {
|
||||
return indexer.WrapImplSpecificOptFn(func(o *IndexerOptions) {
|
||||
o.IndexingFields = fields
|
||||
})
|
||||
}
|
||||
|
||||
func WithProgressBar(progressBar progressbar.ProgressBar) indexer.Option {
|
||||
return indexer.WrapImplSpecificOptFn(func(o *IndexerOptions) {
|
||||
o.ProgressBar = progressBar
|
||||
})
|
||||
}
|
||||
|
||||
func WithMultiMatch(fields []string, query string) retriever.Option {
|
||||
return retriever.WrapImplSpecificOptFn(func(o *RetrieverOptions) {
|
||||
o.MultiMatch = &MultiMatch{
|
||||
Fields: fields,
|
||||
Query: query,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func WithRetrieverPartitionKey(key string) retriever.Option {
|
||||
return retriever.WrapImplSpecificOptFn(func(o *RetrieverOptions) {
|
||||
o.PartitionKey = &key
|
||||
})
|
||||
}
|
||||
|
||||
func WithPartitions(partitions []string) retriever.Option {
|
||||
return retriever.WrapImplSpecificOptFn(func(o *RetrieverOptions) {
|
||||
o.Partitions = partitions
|
||||
})
|
||||
}
|
||||
32
backend/infra/contract/document/searchstore/searchstore.go
Normal file
32
backend/infra/contract/document/searchstore/searchstore.go
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package searchstore
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/cloudwego/eino/components/indexer"
|
||||
"github.com/cloudwego/eino/components/retriever"
|
||||
)
|
||||
|
||||
type SearchStore interface {
|
||||
indexer.Indexer
|
||||
|
||||
retriever.Retriever
|
||||
|
||||
Delete(ctx context.Context, ids []string) error
|
||||
}
|
||||
155
backend/infra/contract/document/table.go
Normal file
155
backend/infra/contract/document/table.go
Normal file
@@ -0,0 +1,155 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package document
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/coze-dev/coze-studio/backend/pkg/lang/ptr"
|
||||
)
|
||||
|
||||
type TableSchema struct {
|
||||
Name string
|
||||
Comment string
|
||||
Columns []*Column
|
||||
}
|
||||
|
||||
type Column struct {
|
||||
ID int64
|
||||
Name string
|
||||
Type TableColumnType
|
||||
Description string
|
||||
Nullable bool
|
||||
IsPrimary bool
|
||||
Sequence int // 排序编号
|
||||
}
|
||||
|
||||
type TableColumnType int
|
||||
|
||||
const (
|
||||
TableColumnTypeUnknown TableColumnType = 0
|
||||
TableColumnTypeString TableColumnType = 1
|
||||
TableColumnTypeInteger TableColumnType = 2
|
||||
TableColumnTypeTime TableColumnType = 3
|
||||
TableColumnTypeNumber TableColumnType = 4
|
||||
TableColumnTypeBoolean TableColumnType = 5
|
||||
TableColumnTypeImage TableColumnType = 6
|
||||
)
|
||||
|
||||
func (t TableColumnType) String() string {
|
||||
switch t {
|
||||
case TableColumnTypeString:
|
||||
return "varchar"
|
||||
case TableColumnTypeInteger:
|
||||
return "bigint"
|
||||
case TableColumnTypeTime:
|
||||
return "timestamp"
|
||||
case TableColumnTypeNumber:
|
||||
return "double"
|
||||
case TableColumnTypeBoolean:
|
||||
return "boolean"
|
||||
case TableColumnTypeImage:
|
||||
return "image"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
|
||||
type ColumnData struct {
|
||||
ColumnID int64
|
||||
ColumnName string
|
||||
Type TableColumnType
|
||||
ValString *string
|
||||
ValInteger *int64
|
||||
ValTime *time.Time
|
||||
ValNumber *float64
|
||||
ValBoolean *bool
|
||||
ValImage *string // base64 / url
|
||||
}
|
||||
|
||||
func (d *ColumnData) GetValue() interface{} {
|
||||
switch d.Type {
|
||||
case TableColumnTypeString:
|
||||
return d.ValString
|
||||
case TableColumnTypeInteger:
|
||||
return d.ValInteger
|
||||
case TableColumnTypeTime:
|
||||
return d.ValTime
|
||||
case TableColumnTypeNumber:
|
||||
return d.ValNumber
|
||||
case TableColumnTypeBoolean:
|
||||
return d.ValBoolean
|
||||
case TableColumnTypeImage:
|
||||
return d.ValImage
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (d *ColumnData) GetStringValue() string {
|
||||
switch d.Type {
|
||||
case TableColumnTypeString:
|
||||
return ptr.From(d.ValString)
|
||||
case TableColumnTypeInteger:
|
||||
return strconv.FormatInt(ptr.From(d.ValInteger), 10)
|
||||
case TableColumnTypeTime:
|
||||
return ptr.From(d.ValTime).Format(time.DateTime)
|
||||
case TableColumnTypeNumber:
|
||||
return strconv.FormatFloat(ptr.From(d.ValNumber), 'f', 20, 64)
|
||||
case TableColumnTypeBoolean:
|
||||
return strconv.FormatBool(ptr.From(d.ValBoolean))
|
||||
case TableColumnTypeImage:
|
||||
return ptr.From(d.ValImage)
|
||||
default:
|
||||
return ptr.From(d.ValString)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *ColumnData) GetNullableStringValue() string {
|
||||
switch d.Type {
|
||||
case TableColumnTypeString:
|
||||
return ptr.From(d.ValString)
|
||||
case TableColumnTypeInteger:
|
||||
if d.ValInteger == nil {
|
||||
return ""
|
||||
}
|
||||
return strconv.FormatInt(ptr.From(d.ValInteger), 10)
|
||||
case TableColumnTypeTime:
|
||||
if d.ValTime == nil {
|
||||
return ""
|
||||
}
|
||||
return ptr.From(d.ValTime).Format(time.DateTime)
|
||||
case TableColumnTypeNumber:
|
||||
if d.ValNumber == nil {
|
||||
return ""
|
||||
}
|
||||
return strconv.FormatFloat(ptr.From(d.ValNumber), 'f', 20, 64)
|
||||
case TableColumnTypeBoolean:
|
||||
if d.ValBoolean == nil {
|
||||
return ""
|
||||
}
|
||||
return strconv.FormatBool(ptr.From(d.ValBoolean))
|
||||
case TableColumnTypeImage:
|
||||
if d.ValImage == nil {
|
||||
return ""
|
||||
}
|
||||
return ptr.From(d.ValImage)
|
||||
default:
|
||||
return ptr.From(d.ValString)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user