/** * Unicode Range Regular Expressions for Chinese Characters */ const CHINESE_REGEX = /[\u4e00-\u9fff\u3400-\u4dbf\uf900-\ufaff]/; const CHINESE_EXTRACT_REGEX = /[\u4e00-\u9fff\u3400-\u4dbf\uf900-\ufaff\u3000-\u303f\uff00-\uffef]+/g; /** * Detect whether the text contains Chinese characters */ export const containsChinese = (text: string): boolean => { return CHINESE_REGEX.test(text); }; /** * Extract the Chinese part of the text */ export const extractChineseParts = (text: string): string[] => { return text.match(CHINESE_EXTRACT_REGEX) || []; }; /** * Count the number of Chinese characters in a text */ export const countChineseCharacters = (text: string): number => { const matches = text.match(CHINESE_EXTRACT_REGEX); if (!matches) return 0; return matches.reduce((count, match) => count + match.length, 0); }; /** * Detect whether the text is mainly composed of Chinese */ export const isPrimarilyChinese = (text: string, threshold: number = 0.5): boolean => { const totalLength = text.length; if (totalLength === 0) return false; const chineseLength = countChineseCharacters(text); return chineseLength / totalLength >= threshold; }; /** * Clean up comment text, remove comment symbols and extra spaces */ export const cleanCommentText = ( text: string, commentType: 'single-line' | 'multi-line', language?: string ): string => { let cleaned = text; if (commentType === 'single-line') { // Remove different single-line comment symbols based on language type switch (language) { case 'yaml': case 'toml': case 'shell': case 'python': case 'ruby': cleaned = cleaned.replace(/^#\s*/, ''); break; case 'ini': cleaned = cleaned.replace(/^[;#]\s*/, ''); break; case 'php': cleaned = cleaned.replace(/^(?:\/\/|#)\s*/, ''); break; default: // JavaScript/TypeScript/Go/Java/C/C++/C# style cleaned = cleaned.replace(/^\/\/\s*/, ''); } } else if (commentType === 'multi-line') { // Remove different multi-line comment symbols based on language type switch (language) { case 'html': case 'xml': case 'markdown': cleaned = cleaned.replace(/^$/, ''); break; case 'python': cleaned = cleaned.replace(/^"""\s*/, '').replace(/\s*"""$/, ''); break; case 'ruby': cleaned = cleaned.replace(/^=begin\s*/, '').replace(/\s*=end$/, ''); break; default: // JavaScript/TypeScript/Go/Java/C/C++/C#/CSS style cleaned = cleaned.replace(/^\/\*\s*/, '').replace(/\s*\*\/$/, ''); // Remove the * symbol at the beginning of each line cleaned = cleaned.replace(/^\s*\*\s?/gm, ''); } } // Remove extra spaces and newlines cleaned = cleaned.trim(); return cleaned; }; /** * Verify whether the translation result is valid. */ export const isValidTranslation = (original: string, translated: string): boolean => { // basic verification if (!translated || translated.trim().length === 0) { return false; } // Check if Chinese is also included (translation may fail) if (containsChinese(translated)) { return false; } // Check if the length is reasonable (the translated text should not be much longer than the original). if (translated.length > original.length * 3) { return false; } return true; };