/*
 * Copyright 2025 coze-dev Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package entity

import (
	"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
	"github.com/coze-dev/coze-studio/backend/infra/contract/document/parser"
)

type RetrievalStrategy = knowledge.RetrievalStrategy

// ParsingStrategy for document parse before indexing
type ParsingStrategy struct {
	ParsingType ParsingType `json:"parsing_type"` // 解析类型
	// Doc
	ExtractImage bool  `json:"extract_image"` // 提取图片元素
	ExtractTable bool  `json:"extract_table"` // 提取表格元素
	ImageOCR     bool  `json:"image_ocr"`     // 图片 ocr
	FilterPages  []int `json:"filter_pages"`  // 过滤页数

	// Sheet
	SheetID       int64 `json:"sheet_id"`        // xlsx sheet id
	HeaderLine    int   `json:"header_line"`     // 表头行
	DataStartLine int   `json:"data_start_line"` // 数据起始行
	RowsCount     int   `json:"rows_count"`      // 读取数据行数

	// Image
	CaptionType *parser.ImageAnnotationType `json:"caption_type"`
}
type ParsingType int64

const (
	ParsingType_FastParsing     ParsingType = 0
	ParsingType_AccurateParsing ParsingType = 1
)

// ChunkingStrategy for document chunk before indexing
type ChunkingStrategy struct {
	ChunkType parser.ChunkType `json:"chunk_type"`
	// custom chunk config
	ChunkSize       int64  `json:"chunk_size"` // 分段最大长度
	Separator       string `json:"separator"`  // 分段标识符
	Overlap         int64  `json:"overlap"`    // 分段重叠
	TrimSpace       bool   `json:"trim_space"`
	TrimURLAndEmail bool   `json:"trim_url_and_email"`

	// 按层级分段
	MaxDepth  int64 `json:"max_depth"`  // 按层级分段时的最大层级
	SaveTitle bool  `json:"save_title"` // 保留层级标题
}