feat: manually mirror opencoze's code from bytedance

Change-Id: I09a73aadda978ad9511264a756b2ce51f5761adf
This commit is contained in:
fanlv
2025-07-20 17:36:12 +08:00
commit 890153324f
14811 changed files with 1923430 additions and 0 deletions

View File

@@ -0,0 +1 @@
lib

View File

@@ -0,0 +1,10 @@
# @coze-arch/pdfjs-shadow
## Description
原始的 pdfjs-dist 包兼容性过低,需要重新编译,增加 polyfill 之后才能正常运行,因此设计该 package主要作用
1. 收敛 pdfjs-dist 调用,避免 bot 环境中多出定义 pdfjs-dist 版本;
2. 收敛 worker src url 的计算逻辑。
注意,该 package 仅供 coze 消费。

View File

@@ -0,0 +1,89 @@
/*
* Copyright 2025 coze-dev Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { generatePdfAssetsUrl } from '../src/generate-assets';
import pkg from '../package.json';
describe('generatePdfAssetsUrl', () => {
const originalRegion = global.REGION;
beforeEach(() => {
// 重置模拟
vi.resetAllMocks();
});
afterEach(() => {
// 恢复原始 REGION 值
global.REGION = originalRegion;
});
it('应该为 cmaps 生成正确的 URL中国区域', () => {
// 设置区域为中国
global.REGION = 'cn';
const url = generatePdfAssetsUrl('cmaps');
// 验证 URL 格式
expect(url).toContain('//lf-cdn.coze.cn/obj/unpkg');
expect(url).toContain(pkg.name.replace(/^@/, ''));
expect(url).toContain('lib/cmaps/');
});
it('应该为 pdf.worker 生成正确的 URL中国区域', () => {
// 设置区域为中国
global.REGION = 'cn';
const url = generatePdfAssetsUrl('pdf.worker');
// 验证 URL 格式
expect(url).toContain('//lf-cdn.coze.cn/obj/unpkg');
expect(url).toContain(pkg.name.replace(/^@/, ''));
expect(url).toContain('lib/worker.js');
});
it('应该为 cmaps 生成正确的 URL国际区域', () => {
// 设置区域为国际
global.REGION = 'va';
const url = generatePdfAssetsUrl('cmaps');
// 验证 URL 格式
expect(url).toContain('//sf-cdn.coze.com/obj/unpkg-va');
expect(url).toContain(pkg.name.replace(/^@/, ''));
expect(url).toContain('lib/cmaps/');
});
it('应该为 pdf.worker 生成正确的 URL国际区域', () => {
// 设置区域为国际
global.REGION = 'va';
const url = generatePdfAssetsUrl('pdf.worker');
// 验证 URL 格式
expect(url).toContain('//sf-cdn.coze.com/obj/unpkg-va');
expect(url).toContain(pkg.name.replace(/^@/, ''));
expect(url).toContain('lib/worker.js');
});
it('应该在传入无效资源类型时抛出错误', () => {
// 使用类型断言来测试错误情况
expect(() => generatePdfAssetsUrl('invalid' as any)).toThrow(
'目前只支持引用 cmaps 与 pdf.worker 文件',
);
});
});

View File

@@ -0,0 +1,49 @@
/*
* Copyright 2025 coze-dev Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { describe, it, expect, vi } from 'vitest';
// 模拟 pdfjs-dist 模块
vi.mock('pdfjs-dist', () => ({
getDocument: vi.fn(),
}));
// 模拟 generate-assets 和 init-pdfjs-dist 模块
vi.mock('../src/generate-assets', () => ({
generatePdfAssetsUrl: vi.fn(),
}));
vi.mock('../src/init-pdfjs-dist', () => ({
initPdfJsWorker: vi.fn(),
}));
// 导入被测试的模块
import {
generatePdfAssetsUrl,
initPdfJsWorker,
getDocument,
} from '../src/index';
describe('pdfjs-shadow index', () => {
it('应该导出所有必要的函数和类型', () => {
// 验证导出的函数
expect(typeof generatePdfAssetsUrl).toBe('function');
expect(typeof initPdfJsWorker).toBe('function');
// 验证从 pdfjs-dist 重新导出的函数和类型
expect(getDocument).toBeDefined();
});
});

View File

@@ -0,0 +1,78 @@
/*
* Copyright 2025 coze-dev Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
// 模拟 pdfjs-dist 模块
vi.mock('pdfjs-dist', () => ({
GlobalWorkerOptions: {
workerSrc: '',
},
}));
// 模拟 generate-assets 模块
vi.mock('../src/generate-assets', () => ({
generatePdfAssetsUrl: vi.fn().mockReturnValue('mocked-worker-url'),
}));
// 导入被测试的模块
import { GlobalWorkerOptions } from 'pdfjs-dist';
import { initPdfJsWorker } from '../src/init-pdfjs-dist';
import { generatePdfAssetsUrl } from '../src/generate-assets';
describe('initPdfJsWorker', () => {
beforeEach(() => {
// 每个测试前重置 GlobalWorkerOptions.workerSrc
GlobalWorkerOptions.workerSrc = '';
// 清除所有模拟函数的调用记录
vi.clearAllMocks();
});
afterEach(() => {
// 每个测试后重置模拟
vi.resetAllMocks();
});
it('应该设置 GlobalWorkerOptions.workerSrc 当它为空时', () => {
// 确保 workerSrc 初始为空
expect(GlobalWorkerOptions.workerSrc).toBe('');
// 调用初始化函数
initPdfJsWorker();
// 验证 generatePdfAssetsUrl 被调用,且参数正确
expect(generatePdfAssetsUrl).toHaveBeenCalledTimes(1);
expect(generatePdfAssetsUrl).toHaveBeenCalledWith('pdf.worker');
// 验证 workerSrc 被正确设置
expect(GlobalWorkerOptions.workerSrc).toBe('mocked-worker-url');
});
it('不应该重新设置 GlobalWorkerOptions.workerSrc 当它已经有值时', () => {
// 预先设置 workerSrc
GlobalWorkerOptions.workerSrc = 'existing-worker-url';
// 调用初始化函数
initPdfJsWorker();
// 验证 generatePdfAssetsUrl 没有被调用
expect(generatePdfAssetsUrl).not.toHaveBeenCalled();
// 验证 workerSrc 保持不变
expect(GlobalWorkerOptions.workerSrc).toBe('existing-worker-url');
});
});

View File

@@ -0,0 +1,16 @@
{
"operationSettings": [
{
"operationName": "build",
"outputFolderNames": ["dist", "lib"]
},
{
"operationName": "test:cov",
"outputFolderNames": ["coverage"]
},
{
"operationName": "ts-check",
"outputFolderNames": ["./dist"]
}
]
}

View File

@@ -0,0 +1,9 @@
const { defineConfig } = require('@coze-arch/eslint-config');
module.exports = defineConfig({
packageRoot: __dirname,
preset: 'web',
rules: {
'@coze-arch/package-disallow-deps': 0,
},
});

View File

@@ -0,0 +1,39 @@
{
"name": "@coze-arch/pdfjs-shadow",
"version": "0.0.1",
"description": "shadow copy of pdfjs-dist",
"license": "Apache-2.0",
"author": "fanwenjie.fe@bytedance.com",
"maintainers": [],
"main": "src/index.ts",
"unpkg": "./lib",
"types": "./src/index.ts",
"files": [
"lib",
"README.md"
],
"scripts": {
"build": "tsc -b tsconfig.build.json && node -r sucrase/register scripts/build.ts",
"lint": "eslint ./ --cache",
"test": "vitest --run --passWithNoTests",
"test:cov": "npm run test -- --coverage"
},
"devDependencies": {
"@coze-arch/eslint-config": "workspace:*",
"@coze-arch/stylelint-config": "workspace:*",
"@coze-arch/ts-config": "workspace:*",
"@coze-arch/vitest-config": "workspace:*",
"@types/node": "^18",
"@vitest/coverage-v8": "~3.0.5",
"core-js": "^3.37.1",
"esbuild": "^0.15.18",
"pdfjs-dist": "4.3.136",
"sucrase": "^3.32.0",
"vitest": "~3.0.5"
},
"// deps": "@types/react-dom@^18.2.7 为脚本自动补齐,请勿改动",
"botPublishConfig": {
"main": "lib/worker.js"
}
}

View File

@@ -0,0 +1,51 @@
/*
* Copyright 2025 coze-dev Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import path from 'path';
import fs from 'fs/promises';
import { OUTPUT_DIR } from './const';
// 复制目录的函数
const copyDir = async (src: string, dest: string) => {
// 读取目录下所有文件/文件夹
const entries = await fs.readdir(src, { withFileTypes: true });
// 创建目标目录
await fs.mkdir(dest, { recursive: true });
// 遍历所有文件/文件夹
for (const entry of entries) {
const srcPath = path.join(src, entry.name);
const destPath = path.join(dest, entry.name);
if (entry.isDirectory()) {
// 如果是文件夹则递归复制
await copyDir(srcPath, destPath);
} else {
// 如果是文件则直接复制
await fs.copyFile(srcPath, destPath);
}
}
};
export const buildAssets = async () => {
const source = path.resolve(
path.dirname(require.resolve('pdfjs-dist/package.json')),
'./cmaps',
);
await copyDir(source, path.resolve(OUTPUT_DIR, './cmaps'));
};

View File

@@ -0,0 +1,41 @@
/*
* Copyright 2025 coze-dev Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import path from 'path';
// eslint-disable-next-line @coze-arch/no-batch-import-or-export
import * as esbuild from 'esbuild';
import { OUTPUT_DIR } from './const';
export const buildWorker = async () => {
const input =
'import "core-js/proposals/promise-with-resolvers"; import "pdfjs-dist/build/pdf.worker.min.mjs"';
await esbuild.build({
sourcemap: false,
stdin: {
contents: input,
loader: 'ts',
resolveDir: '.',
},
bundle: true,
platform: 'node',
target: ['chrome85'],
outfile: path.resolve(OUTPUT_DIR, 'worker.js'),
logLevel: 'error',
minify: true,
});
};

View File

@@ -0,0 +1,24 @@
/*
* Copyright 2025 coze-dev Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { buildWorker } from './build-worker';
import { buildAssets } from './build-assets';
const run = async () => {
await Promise.all([buildAssets(), buildWorker()]);
};
run();

View File

@@ -0,0 +1,19 @@
/*
* Copyright 2025 coze-dev Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import path from 'path';
export const OUTPUT_DIR = path.resolve(__dirname, '../lib');

View File

@@ -0,0 +1,9 @@
{
"$schema": "https://json.schemastore.org/tsconfig",
"extends": "@coze-arch/ts-config/tsconfig.node.json",
"compilerOptions": {
"rootDir": "./",
"types": ["node"]
},
"include": ["*.ts"]
}

View File

@@ -0,0 +1,55 @@
/*
* Copyright 2025 coze-dev Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import pkg from '../package.json';
type AssetsType = 'cmaps' | 'pdf.worker';
// 这里需要写 bnpm 已经发布的版本
//
const DEFAULT_VERSION = '0.1.0-alpha.x6e892414ec';
/**
* 该方法用于生产 unpkg 环境的 worker & cmaps 链接,注意并非 pdfjs 原生方法
*/
export const generatePdfAssetsUrl = (assets: AssetsType) => {
const { name } = pkg;
let assetsUrl;
switch (assets) {
case 'cmaps': {
assetsUrl = 'lib/cmaps/';
break;
}
case 'pdf.worker': {
assetsUrl = 'lib/worker.js';
break;
}
default: {
throw new Error(
'目前只支持引用 cmaps 与 pdf.worker 文件,如需引用其他文件请联系 @fanwenjie.fe',
);
}
}
const onlinePkgName = name.replace(/^@/, '');
const domain =
REGION === 'cn'
? 'lf-cdn.coze.cn/obj/unpkg'
: 'sf-cdn.coze.com/obj/unpkg-va';
// cp-disable-next-line
return `//${domain}/${onlinePkgName}/${DEFAULT_VERSION}/${assetsUrl}`;
};

View File

@@ -0,0 +1,20 @@
/*
* Copyright 2025 coze-dev Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <reference types='pdfjs-dist' />
declare module 'pdfjs-dist/build/pdf.worker.mjs';
declare module 'pdfjs-dist/build/pdf.worker.entry.js';
declare const REGION: 'cn' | 'sg' | 'va';

View File

@@ -0,0 +1,27 @@
/*
* Copyright 2025 coze-dev Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
export {
getDocument,
type PDFDocumentProxy,
type PDFPageProxy,
type PageViewport,
} from 'pdfjs-dist';
export { type TextContent } from 'pdfjs-dist/types/src/display/text_layer';
export { type TextItem } from 'pdfjs-dist/types/src/display/api';
export { generatePdfAssetsUrl } from './generate-assets';
export { initPdfJsWorker } from './init-pdfjs-dist';

View File

@@ -0,0 +1,28 @@
/*
* Copyright 2025 coze-dev Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { GlobalWorkerOptions } from 'pdfjs-dist';
import { generatePdfAssetsUrl } from './generate-assets';
/**
* 该方法用于初始化 pdfjs-dist 的 workerSrc 参数,可重复调用
*/
export const initPdfJsWorker = () => {
if (!GlobalWorkerOptions.workerSrc) {
GlobalWorkerOptions.workerSrc = generatePdfAssetsUrl('pdf.worker');
}
};

View File

@@ -0,0 +1,28 @@
{
"$schema": "https://json.schemastore.org/tsconfig",
"extends": "@coze-arch/ts-config/tsconfig.web.json",
"compilerOptions": {
"resolveJsonModule": true,
"types": [],
"strictNullChecks": true,
"noImplicitAny": true,
"rootDir": "./",
"outDir": "./dist",
"tsBuildInfoFile": "./dist/tsconfig.build.tsbuildinfo"
},
"include": ["src", "package.json"],
"references": [
{
"path": "../../../config/eslint-config/tsconfig.build.json"
},
{
"path": "../../../config/stylelint-config/tsconfig.build.json"
},
{
"path": "../../../config/ts-config/tsconfig.build.json"
},
{
"path": "../../../config/vitest-config/tsconfig.build.json"
}
]
}

View File

@@ -0,0 +1,15 @@
{
"$schema": "https://json.schemastore.org/tsconfig",
"compilerOptions": {
"composite": true
},
"references": [
{
"path": "./tsconfig.build.json"
},
{
"path": "./tsconfig.misc.json"
}
],
"exclude": ["**/*"]
}

View File

@@ -0,0 +1,24 @@
{
"extends": "@coze-arch/ts-config/tsconfig.web.json",
"$schema": "https://json.schemastore.org/tsconfig",
"include": [
"__tests__",
"vitest.config.ts",
"rspack.config.ts",
"package.json"
],
"exclude": ["./dist"],
"references": [
{
"path": "./tsconfig.build.json"
}
],
"compilerOptions": {
"rootDir": "./",
"resolveJsonModule": true,
"types": ["vitest/globals"],
"strictNullChecks": true,
"noImplicitAny": true,
"outDir": "./dist"
}
}

View File

@@ -0,0 +1,22 @@
/*
* Copyright 2025 coze-dev Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { defineConfig } from '@coze-arch/vitest-config';
export default defineConfig({
dirname: __dirname,
preset: 'web',
});