feat(infra): integrate PaddleOCR's PP-StructureV3 as a document parser backend (#714)

2025-08-13 16:37:42 +08:00
parent 708a6ed0c0
commit 6b60c07c22
30 changed files with 657 additions and 174 deletions
--- a/backend/infra/impl/document/ocr/veocr/paddleocr_ocr.go
+++ b/backend/infra/impl/document/ocr/veocr/paddleocr_ocr.go
@@ -14,7 +14,7 @@
 * limitations under the License.
 */

-package veocr
+package ppocr

 import (
 	"bytes"
@@ -28,7 +28,7 @@ import (
 	"github.com/coze-dev/coze-studio/backend/types/errno"
 )

-type PPOCRConfig struct {
+type Config struct {
 	Client *http.Client
 	URL    string

@@ -44,12 +44,12 @@ type PPOCRConfig struct {
 	TextRecScoreThresh        *float64
 }

-func NewPPOCR(config *PPOCRConfig) ocr.OCR {
+func NewOCR(config *Config) ocr.OCR {
 	return &ppocrImpl{config}
 }

 type ppocrImpl struct {
-	config *PPOCRConfig
+	config *Config
 }

 type ppocrResponse struct {
@@ -133,6 +133,10 @@ func (o *ppocrImpl) makeRequest(reqBody map[string]interface{}) ([]string, error
 	}
 	defer resp.Body.Close()

+	if resp.StatusCode != http.StatusOK {
+		return nil, errorx.WrapByCode(err, errno.ErrKnowledgeNonRetryableCode)
+	}
+
 	respBody, err := io.ReadAll(resp.Body)
 	if err != nil {
 		return nil, errorx.WrapByCode(err, errno.ErrKnowledgeNonRetryableCode)