feat: support EMBEDDING_MAX_BATCH_SIZE (#311)

This commit is contained in:
N3ko
2025-07-30 15:31:54 +08:00
committed by GitHub
parent f93f26fc48
commit bb74272385
9 changed files with 85 additions and 71 deletions

View File

@@ -27,13 +27,14 @@ import (
)
type denseOnlyWrap struct {
dims int64
dims int64
batchSize int
embedding.Embedder
}
func (d denseOnlyWrap) EmbedStrings(ctx context.Context, texts []string, opts ...embedding.Option) ([][]float64, error) {
resp := make([][]float64, 0, len(texts))
for _, part := range slices.Chunks(texts, 100) {
for _, part := range slices.Chunks(texts, d.batchSize) {
partResult, err := d.Embedder.EmbedStrings(ctx, part, opts...)
if err != nil {
return nil, err

View File

@@ -24,10 +24,10 @@ import (
contract "github.com/coze-dev/coze-studio/backend/infra/contract/embedding"
)
func NewOllamaEmbedder(ctx context.Context, config *ollama.EmbeddingConfig, dimensions int64) (contract.Embedder, error) {
func NewOllamaEmbedder(ctx context.Context, config *ollama.EmbeddingConfig, dimensions int64, batchSize int) (contract.Embedder, error) {
emb, err := ollama.NewEmbedder(ctx, config)
if err != nil {
return nil, err
}
return &denseOnlyWrap{dims: dimensions, Embedder: emb}, nil
return &denseOnlyWrap{dims: dimensions, batchSize: batchSize, Embedder: emb}, nil
}

View File

@@ -24,10 +24,10 @@ import (
contract "github.com/coze-dev/coze-studio/backend/infra/contract/embedding"
)
func NewOpenAIEmbedder(ctx context.Context, config *openai.EmbeddingConfig, dimensions int64) (contract.Embedder, error) {
func NewOpenAIEmbedder(ctx context.Context, config *openai.EmbeddingConfig, dimensions int64, batchSize int) (contract.Embedder, error) {
emb, err := openai.NewEmbedder(ctx, config)
if err != nil {
return nil, err
}
return &denseOnlyWrap{dims: dimensions, Embedder: emb}, nil
return &denseOnlyWrap{dims: dimensions, batchSize: batchSize, Embedder: emb}, nil
}