feat: manually mirror opencoze's code from bytedance
Change-Id: I09a73aadda978ad9511264a756b2ce51f5761adf
This commit is contained in:
@@ -0,0 +1,199 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { get } from 'lodash-es';
|
||||
import { useKnowledgeParams } from '@coze-data/knowledge-stores';
|
||||
import { DataNamespace, dataReporter } from '@coze-data/reporter';
|
||||
import {
|
||||
type UnitItem,
|
||||
OptType,
|
||||
UploadStatus,
|
||||
type CreateUnitStatus,
|
||||
UnitType,
|
||||
} from '@coze-data/knowledge-resource-processor-core';
|
||||
import { REPORT_EVENTS } from '@coze-arch/report-events';
|
||||
import { I18n } from '@coze-arch/i18n';
|
||||
import { CustomError } from '@coze-arch/bot-error';
|
||||
import {
|
||||
type DocumentInfo,
|
||||
DocumentStatus,
|
||||
type DocumentProgress,
|
||||
} from '@coze-arch/bot-api/knowledge';
|
||||
import { type UploadFileData } from '@coze-arch/bot-api/developer_api';
|
||||
import { Toast } from '@coze-arch/coze-design';
|
||||
|
||||
import { SUCCESSFUL_UPLOAD_PROGRESS } from '../constants';
|
||||
|
||||
export const transformUnitList = ({
|
||||
unitList,
|
||||
data,
|
||||
fileInstance,
|
||||
index,
|
||||
}: {
|
||||
unitList: UnitItem[];
|
||||
data: UploadFileData | undefined;
|
||||
fileInstance: File;
|
||||
index: number;
|
||||
}): UnitItem[] => {
|
||||
if (!data) {
|
||||
return unitList;
|
||||
}
|
||||
const filteredList = unitList.map((unit, i) => {
|
||||
if (index === i) {
|
||||
return {
|
||||
...unit,
|
||||
uri: data.upload_uri || '',
|
||||
status: UploadStatus.SUCCESS,
|
||||
percent: 100,
|
||||
fileInstance,
|
||||
};
|
||||
}
|
||||
return unit;
|
||||
});
|
||||
// TODO as 待解
|
||||
return filteredList as UnitItem[];
|
||||
};
|
||||
|
||||
export function reportFailGetProgress(data: DocumentProgress[]) {
|
||||
const failIds = data.filter(item => item.status === DocumentStatus.Failed);
|
||||
if (failIds.length) {
|
||||
dataReporter.errorEvent(DataNamespace.KNOWLEDGE, {
|
||||
eventName: REPORT_EVENTS.KnowledgeGetTaskProgress,
|
||||
error: new CustomError(
|
||||
REPORT_EVENTS.KnowledgeGetTaskProgress,
|
||||
`${
|
||||
REPORT_EVENTS.KnowledgeGetTaskProgress
|
||||
}: get progress fail. ${JSON.stringify(failIds)}`,
|
||||
),
|
||||
meta: {
|
||||
failIds,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export function isStopPolling(data: DocumentProgress[]) {
|
||||
return (
|
||||
data.length > 0 &&
|
||||
data.every(
|
||||
item =>
|
||||
item.progress === SUCCESSFUL_UPLOAD_PROGRESS ||
|
||||
item.status === DocumentStatus.Failed,
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
export const clearPolling = (
|
||||
pollingId: React.MutableRefObject<number | undefined>,
|
||||
) => {
|
||||
if (pollingId.current) {
|
||||
clearTimeout(pollingId.current);
|
||||
pollingId.current = undefined;
|
||||
}
|
||||
};
|
||||
|
||||
export function useOptFromQuery(): OptType {
|
||||
const query = useKnowledgeParams();
|
||||
const opt = get(query, 'opt', OptType.ADD) as OptType;
|
||||
return opt;
|
||||
}
|
||||
|
||||
/** 为什么返回undefined? 不一定需要空字符串,如果取不到就返回undefined */
|
||||
/**现在还有 docID 这个入口吗??? */
|
||||
export function useDocIdFromQuery(): string | undefined {
|
||||
const query = useKnowledgeParams();
|
||||
return get(query, 'docID', undefined);
|
||||
}
|
||||
|
||||
export const getFileExtension = (name: string) => {
|
||||
const index = name.lastIndexOf('.');
|
||||
return name.slice(index + 1).toLowerCase();
|
||||
};
|
||||
|
||||
export const getBase64 = (file: Blob): Promise<string> =>
|
||||
new Promise((resolve, reject) => {
|
||||
const fileReader = new FileReader();
|
||||
fileReader.onload = event => {
|
||||
const result = event.target?.result;
|
||||
|
||||
if (!result || typeof result !== 'string') {
|
||||
reject(new CustomError('getBase64', 'file read invalid'));
|
||||
return;
|
||||
}
|
||||
|
||||
resolve(result.replace(/^.*?,/, ''));
|
||||
};
|
||||
fileReader.onerror = () => {
|
||||
Toast.error(I18n.t('read_file_failed_please_retry'));
|
||||
reject(new CustomError('getBase64', 'file read fail'));
|
||||
};
|
||||
fileReader.onabort = () => {
|
||||
reject(new CustomError('getBase64', 'file read abort'));
|
||||
};
|
||||
fileReader.readAsDataURL(file);
|
||||
});
|
||||
|
||||
export const getUint8Array = (file: Blob): Promise<Uint8Array> =>
|
||||
new Promise((resolve, reject) => {
|
||||
const fileReader = new FileReader();
|
||||
|
||||
fileReader.onload = event => {
|
||||
if (event.target?.result) {
|
||||
const arrayBuffer = event.target.result as ArrayBuffer;
|
||||
const uint8Array = new Uint8Array(arrayBuffer);
|
||||
resolve(uint8Array);
|
||||
} else {
|
||||
reject(new CustomError('getUint8Array', 'file read invalid'));
|
||||
}
|
||||
};
|
||||
|
||||
fileReader.readAsArrayBuffer(file);
|
||||
});
|
||||
|
||||
export function reportProcessDocumentFail(
|
||||
docInfos: DocumentInfo[],
|
||||
reportEventName: string,
|
||||
) {
|
||||
const failDocumentIds = docInfos.filter(
|
||||
item => item.status === DocumentStatus.Failed,
|
||||
);
|
||||
failDocumentIds.length > 0 &&
|
||||
dataReporter.errorEvent(DataNamespace.KNOWLEDGE, {
|
||||
eventName: reportEventName,
|
||||
error: new CustomError(
|
||||
reportEventName,
|
||||
`${reportEventName}: fail document_ids are ${JSON.stringify(
|
||||
failDocumentIds,
|
||||
)}`,
|
||||
),
|
||||
meta: {
|
||||
failDocumentIds,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export const getProcessingDescMsg = (taskStatus: CreateUnitStatus) =>
|
||||
I18n.t('knowledge_add_unit_process_notice');
|
||||
// taskStatus === CreateUnitStatus.TASK_FINISH
|
||||
// ? ''
|
||||
// : I18n.t('knowledge_add_unit_process_notice');
|
||||
|
||||
export const isThirdResegment = (opt: OptType, type: UnitType | undefined) =>
|
||||
opt === OptType.RESEGMENT &&
|
||||
type &&
|
||||
[UnitType.TABLE_GOOGLE_DRIVE, UnitType.TABLE_FEISHU].includes(type);
|
||||
|
||||
export const isIncremental = (opt: OptType) => opt === OptType.INCREMENTAL;
|
||||
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import type Cropper from 'cropperjs';
|
||||
|
||||
import { type CropperSizePercent } from '@/features/knowledge-type/text/interface';
|
||||
|
||||
const fixPrecision = (value: number) => parseFloat(value.toFixed(2));
|
||||
|
||||
export const convertCropDataToPercentSize = ({
|
||||
data,
|
||||
pdfSize: { naturalHeight, naturalWidth },
|
||||
}: {
|
||||
data: Cropper.Data;
|
||||
pdfSize: {
|
||||
naturalHeight: number;
|
||||
naturalWidth: number;
|
||||
};
|
||||
}): CropperSizePercent => {
|
||||
const topPixel = data.y;
|
||||
const bottomPixel = data.y + data.height;
|
||||
const leftPixel = data.x;
|
||||
const rightPixel = data.x + data.width;
|
||||
return {
|
||||
topPercent: fixPrecision(topPixel / naturalHeight),
|
||||
bottomPercent: fixPrecision((naturalHeight - bottomPixel) / naturalHeight),
|
||||
leftPercent: fixPrecision(leftPixel / naturalWidth),
|
||||
rightPercent: fixPrecision((naturalWidth - rightPixel) / naturalWidth),
|
||||
};
|
||||
};
|
||||
|
||||
export const convertPercentSizeToCropData = ({
|
||||
cropSizePercent: { topPercent, bottomPercent, rightPercent, leftPercent },
|
||||
pdfSize: { naturalHeight, naturalWidth },
|
||||
}: {
|
||||
cropSizePercent: CropperSizePercent;
|
||||
pdfSize: {
|
||||
naturalHeight: number;
|
||||
naturalWidth: number;
|
||||
};
|
||||
}): Cropper.Data => {
|
||||
const x = leftPercent * naturalWidth;
|
||||
const y = topPercent * naturalHeight;
|
||||
const width = naturalWidth - x - naturalWidth * rightPercent;
|
||||
const height = naturalHeight - y - naturalHeight * bottomPercent;
|
||||
return {
|
||||
scaleX: 1,
|
||||
scaleY: 1,
|
||||
rotate: 0,
|
||||
x,
|
||||
y,
|
||||
width,
|
||||
height,
|
||||
};
|
||||
};
|
||||
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { type ResegmentRequest } from '@coze-arch/idl/knowledge';
|
||||
|
||||
import { type PDFDocumentFilterValue } from '@/features/knowledge-type/text/interface';
|
||||
|
||||
import { mapPDFFilterConfig } from './map-pdf-filter-config';
|
||||
|
||||
export const convertFilterStrategyToParams = (
|
||||
filterValue: PDFDocumentFilterValue | undefined,
|
||||
): ResegmentRequest => {
|
||||
if (!filterValue) {
|
||||
return {};
|
||||
}
|
||||
// const { topPercent, rightPercent, bottomPercent, leftPercent } =
|
||||
// filterValue.cropperSizePercent;
|
||||
return {
|
||||
filter_strategy: {
|
||||
// filter_box_position: [
|
||||
// topPercent,
|
||||
// rightPercent,
|
||||
// bottomPercent,
|
||||
// leftPercent,
|
||||
// ],
|
||||
filter_page: mapPDFFilterConfig(filterValue.filterPagesConfig),
|
||||
},
|
||||
};
|
||||
};
|
||||
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { type Dataset, StorageLocation } from '@coze-arch/idl/knowledge';
|
||||
|
||||
export function getStorageStrategyEnabled(dataset?: Dataset) {
|
||||
return (
|
||||
// 云搜索只在国内环境上线
|
||||
IS_CN_REGION &&
|
||||
// 只有知识库首次上传,才可以配置云搜索
|
||||
dataset?.doc_count === 0 &&
|
||||
dataset?.storage_location === StorageLocation.Default
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
export {
|
||||
getFrequencyMap,
|
||||
IValidateRes,
|
||||
validateField,
|
||||
tableSettingsToString,
|
||||
} from './table';
|
||||
export {
|
||||
transformUnitList,
|
||||
reportFailGetProgress,
|
||||
isStopPolling,
|
||||
clearPolling,
|
||||
useOptFromQuery,
|
||||
useDocIdFromQuery,
|
||||
getFileExtension,
|
||||
getBase64,
|
||||
getUint8Array,
|
||||
reportProcessDocumentFail,
|
||||
getProcessingDescMsg,
|
||||
isThirdResegment,
|
||||
isIncremental,
|
||||
} from './common';
|
||||
|
||||
export { getSegmentCleanerParams } from './text';
|
||||
|
||||
export { getStorageStrategyEnabled } from './get-storage-strategy-enabled';
|
||||
|
||||
export { validateCommonDocResegmentStep } from './validate-common-doc-next-step';
|
||||
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import {
|
||||
type DocumentInfo,
|
||||
DocumentSource,
|
||||
FormatType,
|
||||
} from '@coze-arch/idl/knowledge';
|
||||
|
||||
export const isLocalTextDocument = (document: DocumentInfo) =>
|
||||
document.format_type === FormatType.Text &&
|
||||
document.source_type === DocumentSource.Document;
|
||||
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { type FilterPageConfig } from '@/features/knowledge-type/text/interface';
|
||||
|
||||
export const mapPDFFilterConfig = (list: FilterPageConfig[]) =>
|
||||
list
|
||||
.map(config => {
|
||||
if (config.isFilter) {
|
||||
return config.pageIndex;
|
||||
}
|
||||
return null;
|
||||
})
|
||||
.filter((page): page is number => typeof page === 'number');
|
||||
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { I18n } from '@coze-arch/i18n';
|
||||
|
||||
import {
|
||||
type FilterPageConfig,
|
||||
type PDFDocumentFilterValue,
|
||||
} from '@/features/knowledge-type/text/interface';
|
||||
|
||||
export const getSortedFilterPages = (filterPagesConfig: FilterPageConfig[]) =>
|
||||
filterPagesConfig
|
||||
.filter(config => config.isFilter)
|
||||
.map(config => config.pageIndex)
|
||||
.sort((prev, after) => prev - after);
|
||||
|
||||
export const getFilterPagesString = (pages: number[]) => pages.join(' / ');
|
||||
|
||||
/**
|
||||
* 渲染为形如下方例子的内容:
|
||||
* 论文 1:过滤第 2 / 4 / 6 页;设置了页面局部过滤
|
||||
* 论文 2:过滤第 1 页...
|
||||
*/
|
||||
export const renderDocumentFilterValue = ({
|
||||
filterValue,
|
||||
pdfList,
|
||||
}: {
|
||||
filterValue: PDFDocumentFilterValue[];
|
||||
pdfList: { name: string; uri: string }[];
|
||||
}) =>
|
||||
filterValue
|
||||
.map(value => {
|
||||
const pdf = pdfList.find(item => item.uri === value.uri);
|
||||
if (!pdf) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const filterPages = getSortedFilterPages(value.filterPagesConfig);
|
||||
|
||||
if (!filterPages.length) {
|
||||
return null;
|
||||
}
|
||||
const filterPagesString = getFilterPagesString(filterPages);
|
||||
return `${pdf.name}: ${I18n.t('data_filter_values', {
|
||||
filterPages: filterPagesString,
|
||||
})}`;
|
||||
})
|
||||
.filter((filterString): filterString is string => Boolean(filterString))
|
||||
.join('\n');
|
||||
@@ -0,0 +1,119 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/** 此文件放的是 table 通用 utils */
|
||||
import { get } from 'lodash-es';
|
||||
import { I18n } from '@coze-arch/i18n';
|
||||
|
||||
import { type TableSettings } from '../types';
|
||||
import { FrequencyDay, TableSettingFormFields } from '../constants';
|
||||
|
||||
export const getFrequencyMap = (updateInterval: FrequencyDay): string => {
|
||||
const frequencyMap = {
|
||||
[FrequencyDay.ZERO]: I18n.t('datasets_frequencyModal_frequency_noUpdate'),
|
||||
[FrequencyDay.ONE]: I18n.t('datasets_frequencyModal_frequency_day', {
|
||||
num: 1,
|
||||
}),
|
||||
[FrequencyDay.THREE]: I18n.t('datasets_frequencyModal_frequency_day', {
|
||||
num: 3,
|
||||
}),
|
||||
[FrequencyDay.SEVEN]: I18n.t('datasets_frequencyModal_frequency_day', {
|
||||
num: 7,
|
||||
}),
|
||||
[FrequencyDay.THIRTY]: I18n.t('datasets_frequencyModal_frequency_day', {
|
||||
num: 30,
|
||||
}),
|
||||
};
|
||||
return frequencyMap[updateInterval];
|
||||
};
|
||||
|
||||
export interface IValidateRes {
|
||||
valid: boolean;
|
||||
errorMsg: string;
|
||||
}
|
||||
// 校验tableStructure列名及表明是否包含特殊字符
|
||||
export const validateField = (
|
||||
fieldName: string,
|
||||
emptyMsg = '',
|
||||
): IValidateRes => {
|
||||
let valid = true;
|
||||
let errorMsg = '';
|
||||
|
||||
// 是否包含特殊字符-->单引号,双引号,转义符,反引号
|
||||
const notationReg = /["'`\\]+/g;
|
||||
|
||||
if (!fieldName) {
|
||||
return {
|
||||
valid: false,
|
||||
errorMsg: emptyMsg,
|
||||
};
|
||||
}
|
||||
|
||||
if (notationReg.test(fieldName)) {
|
||||
valid = false;
|
||||
errorMsg = I18n.t('knowledge_tableStructure_field_errLegally');
|
||||
}
|
||||
// 不能包含_knowledge_slice_id关键字
|
||||
if (['_knowledge_slice_id'].includes(fieldName)) {
|
||||
valid = false;
|
||||
errorMsg = I18n.t('knowledge_tableStructure_errSystemField');
|
||||
}
|
||||
return {
|
||||
valid,
|
||||
errorMsg,
|
||||
};
|
||||
};
|
||||
export const getSrcFromImg = (str: string): string[] => {
|
||||
if (!str) {
|
||||
return [];
|
||||
}
|
||||
const imgRegx = /<img[^>]+src\s*=\s*['"]([^'"]+)['"][^>]*>/g;
|
||||
// 使用正则表达式进行匹配
|
||||
const matches = str.match(imgRegx);
|
||||
|
||||
// 提取匹配结果中的src属性值
|
||||
const srcList: string[] = [];
|
||||
if (matches) {
|
||||
for (let i = 0; i < matches.length; i++) {
|
||||
const match = matches[i];
|
||||
const src = match.match(/src\s*=\s*['"]([^'"]+)['"]/)?.[1];
|
||||
if (src) {
|
||||
srcList.push(src);
|
||||
}
|
||||
}
|
||||
}
|
||||
return srcList;
|
||||
};
|
||||
|
||||
export const isKeyInTableSettings = (
|
||||
key: string,
|
||||
): key is TableSettingFormFields =>
|
||||
Object.values(TableSettingFormFields).includes(key as TableSettingFormFields);
|
||||
|
||||
export const tableSettingsToString = (tableSettings: TableSettings) => {
|
||||
const res: { [key in keyof TableSettings]: string } = {
|
||||
sheet_id: '',
|
||||
header_line_idx: '',
|
||||
start_line_idx: '',
|
||||
};
|
||||
Object.keys(tableSettings).reduce((acc, key) => {
|
||||
if (isKeyInTableSettings(key)) {
|
||||
acc[key] = String(get(tableSettings, key));
|
||||
}
|
||||
return acc;
|
||||
}, res);
|
||||
return res;
|
||||
};
|
||||
@@ -0,0 +1,101 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import {
|
||||
ChunkType,
|
||||
type ChunkStrategy,
|
||||
type DocumentInfo,
|
||||
} from '@coze-arch/bot-api/knowledge';
|
||||
|
||||
import { defaultCustomSegmentRule } from '@/constants/text';
|
||||
|
||||
import {
|
||||
SegmentMode,
|
||||
PreProcessRule,
|
||||
SeperatorType,
|
||||
type Seperator,
|
||||
} from '../types';
|
||||
|
||||
export const getSegmentMode = (rule: ChunkStrategy) => {
|
||||
if (rule.chunk_type === ChunkType.CustomChunk) {
|
||||
return SegmentMode.CUSTOM;
|
||||
}
|
||||
if (rule.chunk_type === ChunkType.LevelChunk) {
|
||||
return SegmentMode.LEVEL;
|
||||
}
|
||||
return SegmentMode.AUTO;
|
||||
};
|
||||
|
||||
export const getSegmentCleanerParams = (docInfo: DocumentInfo) => {
|
||||
if (docInfo && Object.keys(docInfo) && docInfo?.chunk_strategy) {
|
||||
try {
|
||||
const rule = docInfo?.chunk_strategy || {};
|
||||
const preProcessRules: PreProcessRule[] = [];
|
||||
if (rule.remove_extra_spaces) {
|
||||
preProcessRules.push(PreProcessRule.REMOVE_SPACES);
|
||||
}
|
||||
if (rule.remove_urls_emails) {
|
||||
preProcessRules.push(PreProcessRule.REMOVE_EMAILS);
|
||||
}
|
||||
return {
|
||||
docInfo,
|
||||
segmentMode: getSegmentMode(docInfo?.chunk_strategy),
|
||||
segmentRule: {
|
||||
separator: rule.separator
|
||||
? getSeparator(rule.separator as SeperatorType)
|
||||
: defaultCustomSegmentRule.separator,
|
||||
maxTokens: rule.max_tokens
|
||||
? Number(rule.max_tokens)
|
||||
: defaultCustomSegmentRule.maxTokens,
|
||||
preProcessRules,
|
||||
overlap: rule.overlap
|
||||
? Number(rule.overlap)
|
||||
: defaultCustomSegmentRule.overlap,
|
||||
},
|
||||
};
|
||||
} catch (e) {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
};
|
||||
|
||||
function getSeperatorTypeExceptCustom(
|
||||
seperatorType: typeof SeperatorType,
|
||||
): string[] {
|
||||
const result: string[] = [];
|
||||
|
||||
for (const [, value] of Object.entries(seperatorType)) {
|
||||
if (value !== seperatorType.CUSTOM) {
|
||||
result.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
export const getSeparator = (separator: SeperatorType): Seperator => {
|
||||
const seperatorType = getSeperatorTypeExceptCustom(SeperatorType);
|
||||
if (seperatorType.indexOf(separator) > -1) {
|
||||
return {
|
||||
type: separator,
|
||||
};
|
||||
}
|
||||
return {
|
||||
type: SeperatorType.CUSTOM,
|
||||
customValue: separator,
|
||||
};
|
||||
};
|
||||
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { FooterBtnStatus } from '@coze-data/knowledge-resource-processor-core';
|
||||
|
||||
import { type CustomSegmentRule, SegmentMode, SeperatorType } from '../types';
|
||||
|
||||
export const validateCommonDocResegmentStep = (
|
||||
segmentMode: SegmentMode,
|
||||
segmentRule: CustomSegmentRule,
|
||||
): FooterBtnStatus => {
|
||||
if (segmentMode === SegmentMode.CUSTOM) {
|
||||
const maxTokens = segmentRule?.maxTokens || 0;
|
||||
const separator = segmentRule?.separator;
|
||||
const isCustomSeperatorEmpty =
|
||||
separator?.type === SeperatorType.CUSTOM && !separator?.customValue;
|
||||
|
||||
if (
|
||||
maxTokens === 0 ||
|
||||
isCustomSeperatorEmpty ||
|
||||
typeof segmentRule.overlap !== 'number' ||
|
||||
Number.isNaN(segmentRule.overlap)
|
||||
) {
|
||||
return FooterBtnStatus.DISABLE;
|
||||
}
|
||||
}
|
||||
|
||||
return FooterBtnStatus.ENABLE;
|
||||
};
|
||||
Reference in New Issue
Block a user