feat(infra): integrate PaddleOCR's PP-StructureV3 as a document parser backend (#714)

This commit is contained in:
Lin Manhui
2025-08-13 16:37:42 +08:00
committed by GitHub
parent 708a6ed0c0
commit 6b60c07c22
30 changed files with 657 additions and 174 deletions

View File

@@ -55,7 +55,7 @@ func TestParseJSON(t *testing.T) {
},
ChunkingStrategy: nil,
}
pfn := parseJSON(config)
pfn := ParseJSON(config)
docs, err := pfn(context.Background(), reader, parser.WithExtraMeta(map[string]any{
"document_id": int64(123),
"knowledge_id": int64(456),
@@ -121,7 +121,7 @@ func TestParseJSONWithSchema(t *testing.T) {
},
},
}
pfn := parseJSON(config)
pfn := ParseJSON(config)
docs, err := pfn(context.Background(), reader, parser.WithExtraMeta(map[string]any{
"document_id": int64(123),
"knowledge_id": int64(456),