diff --git a/backend/application/knowledge/init.go b/backend/application/knowledge/init.go index d2591028..cb165639 100644 --- a/backend/application/knowledge/init.go +++ b/backend/application/knowledge/init.go @@ -36,6 +36,7 @@ import ( "github.com/cloudwego/eino-ext/components/model/qwen" "github.com/cloudwego/eino/components/prompt" "github.com/cloudwego/eino/schema" + "github.com/coze-dev/coze-studio/backend/infra/impl/embedding/http" "github.com/milvus-io/milvus/client/v2/milvusclient" "github.com/volcengine/volc-sdk-golang/service/vikingdb" "github.com/volcengine/volc-sdk-golang/service/visual" @@ -371,6 +372,21 @@ func getEmbedding(ctx context.Context) (embedding.Embedder, error) { return nil, fmt.Errorf("init ollama embedding failed, err=%w", err) } + + case "http": + var ( + httpEmbeddingBaseURL = os.Getenv("HTTP_EMBEDDING_ADDR") + httpEmbeddingDims = os.Getenv("HTTP_EMBEDDING_DIMS") + ) + dims, err := strconv.ParseInt(httpEmbeddingDims, 10, 64) + if err != nil { + return nil, fmt.Errorf("init http embedding dims failed, err=%w", err) + } + emb, err = http.NewEmbedding(httpEmbeddingBaseURL, dims) + if err != nil { + return nil, fmt.Errorf("init http embedding failed, err=%w", err) + } + default: return nil, fmt.Errorf("init knowledge embedding failed, type not configured") } diff --git a/backend/domain/knowledge/service/knowledge_integration_test.go b/backend/domain/knowledge/service/knowledge_integration_test.go index 128a9824..eafd78b7 100644 --- a/backend/domain/knowledge/service/knowledge_integration_test.go +++ b/backend/domain/knowledge/service/knowledge_integration_test.go @@ -145,7 +145,7 @@ func (suite *KnowledgeTestSuite) SetupSuite() { panic(err) } - emb, err := hembed.NewEmbedding(embEndpoint) + emb, err := hembed.NewEmbedding(embEndpoint, 1024) if err != nil { panic(err) } diff --git a/backend/go.mod b/backend/go.mod index 11a7b965..e1fa95bb 100755 --- a/backend/go.mod +++ b/backend/go.mod @@ -54,6 +54,7 @@ require github.com/alicebob/miniredis/v2 v2.34.0 require ( github.com/DATA-DOG/go-sqlmock v1.5.2 + github.com/aws/aws-sdk-go-v2/service/s3 v1.84.1 github.com/cloudwego/eino-ext/components/embedding/ark v0.0.0-20250522060253-ddb617598b09 github.com/cloudwego/eino-ext/components/embedding/ollama v0.0.0-20250728060543-79ec300857b8 github.com/cloudwego/eino-ext/components/embedding/openai v0.0.0-20250522060253-ddb617598b09 @@ -84,7 +85,6 @@ require ( github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.37 // indirect github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.5 // indirect github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.18 // indirect - github.com/aws/aws-sdk-go-v2/service/s3 v1.84.1 // indirect github.com/cloudwego/gopkg v0.1.4 // indirect github.com/evanphx/json-patch v4.12.0+incompatible // indirect github.com/extrame/ole2 v0.0.0-20160812065207-d69429661ad7 // indirect @@ -111,10 +111,10 @@ require ( require ( github.com/anthropics/anthropic-sdk-go v1.4.0 // indirect github.com/avast/retry-go v3.0.0+incompatible // indirect - github.com/aws/aws-sdk-go-v2 v1.36.6 // indirect + github.com/aws/aws-sdk-go-v2 v1.36.6 github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.11 // indirect - github.com/aws/aws-sdk-go-v2/config v1.29.1 // indirect - github.com/aws/aws-sdk-go-v2/credentials v1.17.54 // indirect + github.com/aws/aws-sdk-go-v2/config v1.29.1 + github.com/aws/aws-sdk-go-v2/credentials v1.17.54 github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.24 // indirect github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.37 // indirect github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.37 // indirect diff --git a/backend/go.sum b/backend/go.sum index 37dcbbec..12e93bf7 100644 --- a/backend/go.sum +++ b/backend/go.sum @@ -822,12 +822,8 @@ github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHS github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY= github.com/aws/aws-sdk-go v1.40.45/go.mod h1:585smgzpB/KqRA+K3y/NL/oYRqQvpNJYvLm+LY1U59Q= github.com/aws/aws-sdk-go-v2 v1.9.1/go.mod h1:cK/D0BBs0b/oWPIcX/Z/obahJK1TT7IPVjy53i/mX/4= -github.com/aws/aws-sdk-go-v2 v1.33.0 h1:Evgm4DI9imD81V0WwD+TN4DCwjUMdc94TrduMLbgZJs= -github.com/aws/aws-sdk-go-v2 v1.33.0/go.mod h1:P5WJBrYqqbWVaOxgH0X/FYYD47/nooaPOZPlQdmiN2U= github.com/aws/aws-sdk-go-v2 v1.36.6 h1:zJqGjVbRdTPojeCGWn5IR5pbJwSQSBh5RWFTQcEQGdU= github.com/aws/aws-sdk-go-v2 v1.36.6/go.mod h1:EYrzvCCN9CMUTa5+6lf6MM4tq3Zjp8UhSGR/cBsjai0= -github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.3 h1:tW1/Rkad38LA15X4UQtjXZXNKsCgkshC3EbmcUmghTg= -github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.3/go.mod h1:UbnqO+zjqk3uIt9yCACHJ9IVNhyhOCnYk8yA19SAWrM= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.11 h1:12SpdwU8Djs+YGklkinSSlcrPyj3H4VifVsKf78KbwA= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.11/go.mod h1:dd+Lkp6YmMryke+qxW/VnKyhMBDTYP41Q2Bb+6gNZgY= github.com/aws/aws-sdk-go-v2/config v1.29.1 h1:JZhGawAyZ/EuJeBtbQYnaoftczcb2drR2Iq36Wgz4sQ= @@ -836,12 +832,8 @@ github.com/aws/aws-sdk-go-v2/credentials v1.17.54 h1:4UmqeOqJPvdvASZWrKlhzpRahAu github.com/aws/aws-sdk-go-v2/credentials v1.17.54/go.mod h1:RTdfo0P0hbbTxIhmQrOsC/PquBZGabEPnCaxxKRPSnI= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.24 h1:5grmdTdMsovn9kPZPI23Hhvp0ZyNm5cRO+IZFIYiAfw= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.24/go.mod h1:zqi7TVKTswH3Ozq28PkmBmgzG1tona7mo9G2IJg4Cis= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.28 h1:igORFSiH3bfq4lxKFkTSYDhJEUCYo6C8VKiWJjYwQuQ= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.28/go.mod h1:3So8EA/aAYm36L7XIvCVwLa0s5N0P7o2b1oqnx/2R4g= github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.37 h1:osMWfm/sC/L4tvEdQ65Gri5ZZDCUpuYJZbTTDrsn4I0= github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.37/go.mod h1:ZV2/1fbjOPr4G4v38G3Ww5TBT4+hmsK45s/rxu1fGy0= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.28 h1:1mOW9zAUMhTSrMDssEHS/ajx8JcAj/IcftzcmNlmVLI= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.28/go.mod h1:kGlXVIWDfvt2Ox5zEaNglmq0hXPHgQFNMix33Tw22jA= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.37 h1:v+X21AvTb2wZ+ycg1gx+orkB/9U6L7AOp93R7qYxsxM= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.37/go.mod h1:G0uM1kyssELxmJ2VZEfG0q2npObR3BAkF3c1VsfVnfs= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1 h1:VaRN3TlFdd6KxX1x3ILT5ynH6HvKgqdiXoTxAF4HQcQ= @@ -849,14 +841,10 @@ github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1/go.mod h1:FbtygfRFze9usAadmnGJN github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.37 h1:XTZZ0I3SZUHAtBLBU6395ad+VOblE0DwQP6MuaNeics= github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.37/go.mod h1:Pi6ksbniAWVwu2S8pEzcYPyhUkAcLaufxN7PfAUQjBk= github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.8.1/go.mod h1:CM+19rL1+4dFWnOQKwDc7H1KwXTz+h61oUSHyhV0b3o= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.1 h1:iXtILhvDxB6kPvEXgsDhGaZCSC6LQET5ZHSdJozeI0Y= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.1/go.mod h1:9nu0fVANtYiAePIBh2/pFUSwtJ402hLnp854CNoDOeE= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.4 h1:CXV68E2dNqhuynZJPB80bhPQwAKqBWVer887figW6Jc= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.4/go.mod h1:/xFi9KtvBXP97ppCz1TAEvU1Uf66qvid89rbem3wCzQ= github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.5 h1:M5/B8JUaCI8+9QD+u3S/f4YHpvqE9RpSkV3rf0Iks2w= github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.5/go.mod h1:Bktzci1bwdbpuLiu3AOksiNPMl/LLKmX1TWmqp2xbvs= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.9 h1:TQmKDyETFGiXVhZfQ/I0cCFziqqX58pi4tKJGYGFSz0= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.9/go.mod h1:HVLPK2iHQBUx7HfZeOQSEu3v2ubZaAY2YPbAm5/WUyY= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.18 h1:vvbXsA2TVO80/KT7ZqCbx934dt6PY+vQ8hZpUZ/cpYg= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.18/go.mod h1:m2JJHledjBGNMsLOF1g9gbAxprzq3KjC8e4lxtn+eWg= github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.18 h1:OS2e0SKqsU2LiJPqL8u9x41tKc6MMEHrWjLVLn3oysg= @@ -870,8 +858,6 @@ github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.10/go.mod h1:Fzsj6lZEb8AkTE5S github.com/aws/aws-sdk-go-v2/service/sts v1.33.9 h1:BRVDbewN6VZcwr+FBOszDKvYeXY1kJ+GGMCcpghlw0U= github.com/aws/aws-sdk-go-v2/service/sts v1.33.9/go.mod h1:f6vjfZER1M17Fokn0IzssOTMT2N8ZSq+7jnNF0tArvw= github.com/aws/smithy-go v1.8.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E= -github.com/aws/smithy-go v1.22.1 h1:/HPHZQ0g7f4eUeK6HKglFz8uwVfZKgoI25rb/J+dnro= -github.com/aws/smithy-go v1.22.1/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= github.com/aws/smithy-go v1.22.4 h1:uqXzVZNuNexwc/xrh6Tb56u89WDlJY6HS+KC0S4QSjw= github.com/aws/smithy-go v1.22.4/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI= github.com/aymerick/raymond v2.0.3-0.20180322193309-b565731e1464+incompatible/go.mod h1:osfaiScAUVup+UC9Nfq76eWqDhXlp+4UYaA8uhTBO6g= diff --git a/backend/infra/impl/embedding/http/http.go b/backend/infra/impl/embedding/http/http.go index e5624412..2f101699 100644 --- a/backend/infra/impl/embedding/http/http.go +++ b/backend/infra/impl/embedding/http/http.go @@ -22,7 +22,6 @@ import ( "encoding/json" "io" "net/http" - "strconv" "time" opt "github.com/cloudwego/eino/components/embedding" @@ -30,10 +29,7 @@ import ( "github.com/coze-dev/coze-studio/backend/infra/contract/embedding" ) -const ( - pathDim = "/dimension" - pathEmbed = "/embedding" -) +const pathEmbed = "/embedding" type embedReq struct { Texts []string `json:"texts"` @@ -45,32 +41,12 @@ type embedResp struct { Sparse []map[int]float64 `json:"sparse"` } -func NewEmbedding(addr string) (embedding.Embedder, error) { +func NewEmbedding(addr string, dims int64) (embedding.Embedder, error) { cli := &http.Client{Timeout: time.Second * 30} - req, err := http.NewRequest(http.MethodGet, addr+pathDim, nil) - if err != nil { - return nil, err - } - - resp, err := cli.Do(req) - if err != nil { - return nil, err - } - defer resp.Body.Close() - b, err := io.ReadAll(resp.Body) - if err != nil { - return nil, err - } - - dim, err := strconv.ParseInt(string(b), 10, 64) - if err != nil { - return nil, err - } - return &embedder{ cli: cli, addr: addr, - dim: dim, + dim: dims, }, nil } diff --git a/backend/infra/impl/embedding/http/http_test.go b/backend/infra/impl/embedding/http/http_test.go index 368ec03a..5c88b63b 100644 --- a/backend/infra/impl/embedding/http/http_test.go +++ b/backend/infra/impl/embedding/http/http_test.go @@ -31,7 +31,7 @@ func TestHTTPEmbedding(t *testing.T) { } ctx := context.Background() - emb, err := NewEmbedding("http://127.0.0.1:6543") + emb, err := NewEmbedding("http://127.0.0.1:6543", 1024) assert.NoError(t, err) texts := []string{ "hello", diff --git a/docker/.env.example b/docker/.env.example index d4d14166..1f51c45e 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -115,6 +115,7 @@ export OLLAMA_EMBEDDING_DIMS="" # http embedding export HTTP_EMBEDDING_ADDR="http://127.0.0.1:6543" +export HTTP_EMBEDDING_DIMS=1024 # Settings for OCR # If you want to use the OCR-related functions in the knowledge base feature,You need to set up the OCR configuration.