131 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			131 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			Go
		
	
	
	
| /*
 | |
|  * Copyright 2025 coze-dev Authors
 | |
|  *
 | |
|  * Licensed under the Apache License, Version 2.0 (the "License");
 | |
|  * you may not use this file except in compliance with the License.
 | |
|  * You may obtain a copy of the License at
 | |
|  *
 | |
|  *     http://www.apache.org/licenses/LICENSE-2.0
 | |
|  *
 | |
|  * Unless required by applicable law or agreed to in writing, software
 | |
|  * distributed under the License is distributed on an "AS IS" BASIS,
 | |
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
|  * See the License for the specific language governing permissions and
 | |
|  * limitations under the License.
 | |
|  */
 | |
| 
 | |
| package builtin
 | |
| 
 | |
| import (
 | |
| 	"context"
 | |
| 	"encoding/json"
 | |
| 	"fmt"
 | |
| 	"io"
 | |
| 
 | |
| 	"github.com/cloudwego/eino/components/document/parser"
 | |
| 	"github.com/cloudwego/eino/schema"
 | |
| 
 | |
| 	"github.com/coze-dev/coze-studio/backend/infra/contract/document"
 | |
| 	contract "github.com/coze-dev/coze-studio/backend/infra/contract/document/parser"
 | |
| )
 | |
| 
 | |
| func ParseJSONMaps(config *contract.Config) ParseFn {
 | |
| 	return func(ctx context.Context, reader io.Reader, opts ...parser.Option) (docs []*schema.Document, err error) {
 | |
| 		b, err := io.ReadAll(reader)
 | |
| 		if err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 
 | |
| 		var customContent []map[string]string
 | |
| 		if err = json.Unmarshal(b, &customContent); err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 
 | |
| 		if config.ParsingStrategy == nil {
 | |
| 			config.ParsingStrategy = &contract.ParsingStrategy{
 | |
| 				HeaderLine:    0,
 | |
| 				DataStartLine: 1,
 | |
| 				RowsCount:     0,
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		iter := &customContentContainer{
 | |
| 			i:             0,
 | |
| 			colIdx:        nil,
 | |
| 			customContent: customContent,
 | |
| 			curColumns:    config.ParsingStrategy.Columns,
 | |
| 		}
 | |
| 
 | |
| 		newConfig := &contract.Config{
 | |
| 			FileExtension: config.FileExtension,
 | |
| 			ParsingStrategy: &contract.ParsingStrategy{
 | |
| 				SheetID:       config.ParsingStrategy.SheetID,
 | |
| 				HeaderLine:    0,
 | |
| 				DataStartLine: 1,
 | |
| 				RowsCount:     0,
 | |
| 				IsAppend:      config.ParsingStrategy.IsAppend,
 | |
| 				Columns:       config.ParsingStrategy.Columns,
 | |
| 			},
 | |
| 			ChunkingStrategy: config.ChunkingStrategy,
 | |
| 		}
 | |
| 
 | |
| 		return parseByRowIterator(iter, newConfig, opts...)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| type customContentContainer struct {
 | |
| 	i             int
 | |
| 	colIdx        map[string]int
 | |
| 	customContent []map[string]string
 | |
| 	curColumns    []*document.Column
 | |
| }
 | |
| 
 | |
| func (c *customContentContainer) NextRow() (row []string, end bool, err error) {
 | |
| 	if c.i == 0 && c.colIdx == nil {
 | |
| 		if len(c.customContent) == 0 {
 | |
| 			return nil, false, fmt.Errorf("[customContentContainer] data is nil")
 | |
| 		}
 | |
| 
 | |
| 		headerRow := c.customContent[0]
 | |
| 		founded := make(map[string]struct{})
 | |
| 		colIdx := make(map[string]int, len(headerRow))
 | |
| 
 | |
| 		for _, col := range c.curColumns {
 | |
| 			name := col.Name
 | |
| 			if _, found := headerRow[name]; found {
 | |
| 				founded[name] = struct{}{}
 | |
| 				colIdx[name] = len(colIdx)
 | |
| 				row = append(row, name)
 | |
| 			}
 | |
| 		}
 | |
| 		for name := range headerRow {
 | |
| 			if _, found := founded[name]; !found {
 | |
| 				colIdx[name] = len(colIdx)
 | |
| 				row = append(row, name)
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		c.colIdx = colIdx
 | |
| 		return row, false, nil
 | |
| 	}
 | |
| 
 | |
| 	if c.i >= len(c.customContent) {
 | |
| 		return nil, true, nil
 | |
| 	}
 | |
| 
 | |
| 	content := c.customContent[c.i]
 | |
| 	c.i++
 | |
| 	row = make([]string, len(content))
 | |
| 
 | |
| 	for k, v := range content {
 | |
| 		idx, found := c.colIdx[k]
 | |
| 		if !found {
 | |
| 			return nil, false, fmt.Errorf("[customContentContainer] column not found, name=%s", k)
 | |
| 		}
 | |
| 
 | |
| 		row[idx] = v
 | |
| 	}
 | |
| 
 | |
| 	return row, false, nil
 | |
| }
 |