feat: manually mirror opencoze's code from bytedance
Change-Id: I09a73aadda978ad9511264a756b2ce51f5761adf
This commit is contained in:
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
export const SUCCESSFUL_UPLOAD_PROGRESS = 100;
|
||||
|
||||
export const POLLING_TIME = 3000;
|
||||
|
||||
export const MAX_UNIT_NAME_LEN = 100;
|
||||
|
||||
export const BOT_DATA_REFACTOR_CLASS_NAME = 'data-refactor';
|
||||
|
||||
export const TABLE_ACCEPT_LOCAL_FILE = ['.xls', '.xlsx', '.csv'];
|
||||
|
||||
interface TextUploadChannelConfig {
|
||||
acceptFileTypes: string[];
|
||||
fileFormatString: string;
|
||||
addUnitMaxLimit: number;
|
||||
}
|
||||
|
||||
export type Channel = 'DOUYIN' | 'DEFAULT';
|
||||
|
||||
const textUploadChannelConfigMap: Record<Channel, TextUploadChannelConfig> = {
|
||||
DOUYIN: {
|
||||
acceptFileTypes: ['.pdf', '.txt', '.doc', '.docx'],
|
||||
fileFormatString: 'PDF、TXT、DOC、DOCX',
|
||||
addUnitMaxLimit: 100,
|
||||
},
|
||||
DEFAULT: {
|
||||
acceptFileTypes: ['.pdf', '.txt', '.doc', '.docx', '.md'],
|
||||
fileFormatString: 'PDF、TXT、DOC、DOCX、MD',
|
||||
addUnitMaxLimit: 300,
|
||||
},
|
||||
};
|
||||
|
||||
export const getTextUploadChannelConfig = (
|
||||
channel?: Channel,
|
||||
): TextUploadChannelConfig =>
|
||||
(channel && textUploadChannelConfigMap[channel]) ||
|
||||
textUploadChannelConfigMap.DEFAULT;
|
||||
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/** 更新频率 */
|
||||
export enum FrequencyDay {
|
||||
ZERO = 0,
|
||||
ONE = 1,
|
||||
THREE = 3,
|
||||
SEVEN = 7,
|
||||
THIRTY = 30,
|
||||
}
|
||||
export enum TableSettingFormFields {
|
||||
SHEET = 'sheet_id',
|
||||
KEY_START_ROW = 'header_line_idx',
|
||||
DATA_START_ROW = 'start_line_idx',
|
||||
}
|
||||
|
||||
/** 知识库上传文件最大 size 100MB */
|
||||
export const UNIT_MAX_MB = 100;
|
||||
|
||||
export const PDF_MAX_PAGES = 500;
|
||||
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
export {
|
||||
SUCCESSFUL_UPLOAD_PROGRESS,
|
||||
POLLING_TIME,
|
||||
MAX_UNIT_NAME_LEN,
|
||||
BOT_DATA_REFACTOR_CLASS_NAME,
|
||||
} from './common';
|
||||
export {
|
||||
TableStatus,
|
||||
MAX_TABLE_META_COLUMN_LEN,
|
||||
MAX_TABLE_META_STR_LEN,
|
||||
DEFAULT_TABLE_SETTINGS_FROM_ONE,
|
||||
DEFAULT_TABLE_SETTINGS_FROM_ZERO,
|
||||
TableSettingFormFields,
|
||||
} from './table';
|
||||
export { defaultCustomSegmentRule, getSeperatorOptionList } from './text';
|
||||
export { FrequencyDay, UNIT_MAX_MB, PDF_MAX_PAGES } from './components';
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/** table common constants */
|
||||
|
||||
export enum TableStatus {
|
||||
ERROR = 'error',
|
||||
LOADING = 'loading',
|
||||
NORMAL = 'normal',
|
||||
}
|
||||
|
||||
export const MAX_TABLE_META_COLUMN_LEN = 50;
|
||||
|
||||
export const MAX_TABLE_META_STR_LEN = 30;
|
||||
|
||||
/** table-local resegment unit steps */
|
||||
export enum TableLocalResegmentStep {
|
||||
CONFIGURATION,
|
||||
PREVIEW,
|
||||
PROCESSING,
|
||||
}
|
||||
|
||||
export enum TableSettingFormFields {
|
||||
SHEET = 'sheet_id',
|
||||
KEY_START_ROW = 'header_line_idx',
|
||||
DATA_START_ROW = 'start_line_idx',
|
||||
}
|
||||
|
||||
export const DEFAULT_TABLE_SETTINGS_FROM_ONE = {
|
||||
[TableSettingFormFields.SHEET]: 0,
|
||||
[TableSettingFormFields.KEY_START_ROW]: 0,
|
||||
[TableSettingFormFields.DATA_START_ROW]: 1,
|
||||
};
|
||||
|
||||
export const DEFAULT_TABLE_SETTINGS_FROM_ZERO = {
|
||||
[TableSettingFormFields.SHEET]: 0,
|
||||
[TableSettingFormFields.KEY_START_ROW]: 0,
|
||||
[TableSettingFormFields.DATA_START_ROW]: 0,
|
||||
};
|
||||
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright 2025 coze-dev Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { I18n } from '@coze-arch/i18n';
|
||||
|
||||
import { type CustomSegmentRule, SeperatorType } from '../types';
|
||||
|
||||
const getSeperatorSelect = () => ({
|
||||
[SeperatorType.LINE_BREAK]: I18n.t('datasets_Custom_segmentID_linebreak'),
|
||||
[SeperatorType.LINE_BREAK2]: I18n.t('datasets_Custom_segmentID_2linebreak'),
|
||||
[SeperatorType.CN_PERIOD]: I18n.t('datasets_Custom_segmentID_cnperiod'),
|
||||
[SeperatorType.CN_EXCLAMATION]: I18n.t(
|
||||
'datasets_Custom_segmentID_cn_exclamation',
|
||||
),
|
||||
[SeperatorType.EN_PERIOD]: I18n.t('datasets_Custom_segmentID_enperiod'),
|
||||
[SeperatorType.EN_EXCLAMATION]: I18n.t(
|
||||
'datasets_Custom_segmentID_en_exclamation',
|
||||
),
|
||||
[SeperatorType.CN_QUESTION]: I18n.t('datasets_Custom_segmentID_cn_question'),
|
||||
[SeperatorType.EN_QUESTION]: I18n.t('datasets_Custom_segmentID_en_question'),
|
||||
[SeperatorType.CUSTOM]: I18n.t('datasets_Custom_segmentID_custom'),
|
||||
});
|
||||
|
||||
export const getSeperatorOptionList = () =>
|
||||
Object.entries(getSeperatorSelect()).map(([k, label]) => ({
|
||||
value: k,
|
||||
label,
|
||||
}));
|
||||
|
||||
const defaultMaxTokens = 800;
|
||||
|
||||
const defaultOverlap = 10;
|
||||
|
||||
export const defaultCustomSegmentRule: CustomSegmentRule = {
|
||||
separator: {
|
||||
type: SeperatorType.LINE_BREAK,
|
||||
customValue: '###',
|
||||
},
|
||||
maxTokens: defaultMaxTokens,
|
||||
preProcessRules: [],
|
||||
overlap: defaultOverlap,
|
||||
};
|
||||
Reference in New Issue
Block a user