feat(infra): integrate PaddleOCR's PP-StructureV3 as a document parser backend (#714)

This commit is contained in:
Lin Manhui
2025-08-13 16:37:42 +08:00
committed by GitHub
parent 708a6ed0c0
commit 6b60c07c22
30 changed files with 657 additions and 174 deletions

View File

@@ -29,7 +29,7 @@ import (
contract "github.com/coze-dev/coze-studio/backend/infra/contract/document/parser"
)
func parseCSV(config *contract.Config) parseFn {
func ParseCSV(config *contract.Config) ParseFn {
return func(ctx context.Context, reader io.Reader, opts ...parser.Option) (docs []*schema.Document, err error) {
iter := &csvIterator{csv.NewReader(utfbom.SkipOnly(reader))}
return parseByRowIterator(iter, config, opts...)