76 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			76 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Go
		
	
	
	
/*
 | 
						|
 * Copyright 2025 coze-dev Authors
 | 
						|
 *
 | 
						|
 * Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
 * you may not use this file except in compliance with the License.
 | 
						|
 * You may obtain a copy of the License at
 | 
						|
 *
 | 
						|
 *     http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 *
 | 
						|
 * Unless required by applicable law or agreed to in writing, software
 | 
						|
 * distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
 * See the License for the specific language governing permissions and
 | 
						|
 * limitations under the License.
 | 
						|
 */
 | 
						|
 | 
						|
package builtin
 | 
						|
 | 
						|
import (
 | 
						|
	"context"
 | 
						|
	"fmt"
 | 
						|
	"os"
 | 
						|
	"testing"
 | 
						|
 | 
						|
	"github.com/cloudwego/eino/components/document/parser"
 | 
						|
	"github.com/cloudwego/eino/schema"
 | 
						|
	"github.com/stretchr/testify/assert"
 | 
						|
	"go.uber.org/mock/gomock"
 | 
						|
 | 
						|
	contract "github.com/coze-dev/coze-studio/backend/infra/contract/document/parser"
 | 
						|
	ms "github.com/coze-dev/coze-studio/backend/internal/mock/infra/contract/storage"
 | 
						|
)
 | 
						|
 | 
						|
func TestParseMarkdown(t *testing.T) {
 | 
						|
	ctx := context.Background()
 | 
						|
	ctrl := gomock.NewController(t)
 | 
						|
	mockStorage := ms.NewMockStorage(ctrl)
 | 
						|
	mockStorage.EXPECT().PutObject(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
 | 
						|
 | 
						|
	pfn := parseMarkdown(&contract.Config{
 | 
						|
		FileExtension: contract.FileExtensionMarkdown,
 | 
						|
		ParsingStrategy: &contract.ParsingStrategy{
 | 
						|
			ExtractImage: true,
 | 
						|
			ExtractTable: true,
 | 
						|
			ImageOCR:     true,
 | 
						|
		},
 | 
						|
		ChunkingStrategy: &contract.ChunkingStrategy{
 | 
						|
			ChunkType:       contract.ChunkTypeCustom,
 | 
						|
			ChunkSize:       800,
 | 
						|
			Separator:       "\n",
 | 
						|
			Overlap:         10,
 | 
						|
			TrimSpace:       true,
 | 
						|
			TrimURLAndEmail: true,
 | 
						|
		},
 | 
						|
	}, mockStorage, nil)
 | 
						|
 | 
						|
	f, err := os.Open("test_data/test_markdown.md")
 | 
						|
	assert.NoError(t, err)
 | 
						|
	docs, err := pfn(ctx, f, parser.WithExtraMeta(map[string]any{
 | 
						|
		"document_id":  int64(123),
 | 
						|
		"knowledge_id": int64(456),
 | 
						|
	}))
 | 
						|
	assert.NoError(t, err)
 | 
						|
	for _, doc := range docs {
 | 
						|
		assertDoc(t, doc)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func assertDoc(t *testing.T, doc *schema.Document) {
 | 
						|
	assert.NotZero(t, doc.Content)
 | 
						|
	fmt.Println(doc.Content)
 | 
						|
	assert.NotNil(t, doc.MetaData)
 | 
						|
	assert.Equal(t, int64(123), doc.MetaData["document_id"].(int64))
 | 
						|
	assert.Equal(t, int64(456), doc.MetaData["knowledge_id"].(int64))
 | 
						|
}
 |