feat(infra): integrate PaddleOCR's PP-StructureV3 as a document parser backend (#714)

This commit is contained in:
Lin Manhui
2025-08-13 16:37:42 +08:00
committed by GitHub
parent 708a6ed0c0
commit 6b60c07c22
30 changed files with 657 additions and 174 deletions

View File

@@ -39,7 +39,7 @@ func TestChunkCustom(t *testing.T) {
TrimURLAndEmail: true,
}
slices, err := chunkCustom(ctx, text, &parser.Config{ChunkingStrategy: cs})
slices, err := ChunkCustom(ctx, text, &parser.Config{ChunkingStrategy: cs})
assert.NoError(t, err)
assert.Len(t, slices, 10)