From 02b41715762c8c96ffe28af0fcbc09465bc00478 Mon Sep 17 00:00:00 2001 From: N3ko Date: Tue, 29 Jul 2025 21:16:17 +0800 Subject: [PATCH] fix: error when parsing tables with only headers in docx/pdf (#271) --- .../infra/impl/document/parser/builtin/py_parser_protocol.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/backend/infra/impl/document/parser/builtin/py_parser_protocol.go b/backend/infra/impl/document/parser/builtin/py_parser_protocol.go index a622d010..6477acc6 100644 --- a/backend/infra/impl/document/parser/builtin/py_parser_protocol.go +++ b/backend/infra/impl/document/parser/builtin/py_parser_protocol.go @@ -249,6 +249,11 @@ func formatTablesInDocument(input []*schema.Document) (output []*schema.Document values = append(values, col.Name) } write(values) + if colOnly, err := document.GetDocumentColumnsOnly(doc); err != nil { + return nil, err + } else if colOnly { + break + } } data, err := document.GetDocumentColumnData(doc)