feat: manually mirror opencoze's code from bytedance

Change-Id: I09a73aadda978ad9511264a756b2ce51f5761adf
This commit is contained in:
fanlv
2025-07-20 17:36:12 +08:00
commit 890153324f
14811 changed files with 1923430 additions and 0 deletions

View File

@@ -0,0 +1,272 @@
/*
* Copyright 2025 coze-dev Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package code
import (
"context"
"errors"
"fmt"
"regexp"
"strings"
"golang.org/x/exp/maps"
"github.com/coze-dev/coze-studio/backend/domain/workflow/crossdomain/code"
"github.com/coze-dev/coze-studio/backend/domain/workflow/entity/vo"
"github.com/coze-dev/coze-studio/backend/domain/workflow/internal/nodes"
"github.com/coze-dev/coze-studio/backend/pkg/ctxcache"
"github.com/coze-dev/coze-studio/backend/pkg/errorx"
"github.com/coze-dev/coze-studio/backend/pkg/logs"
"github.com/coze-dev/coze-studio/backend/types/errno"
)
const (
coderRunnerRawOutputCtxKey = "ctx_raw_output"
coderRunnerWarnErrorLevelCtxKey = "ctx_warn_error_level"
)
var (
importRegex = regexp.MustCompile(`^\s*import\s+([a-zA-Z0-9_.,\s]+)`)
fromImportRegex = regexp.MustCompile(`^\s*from\s+([a-zA-Z0-9_.]+)\s+import`)
)
// pythonBuiltinModules is the list of python built-in modules,
// see: https://docs.python.org/3.9/library/
var pythonBuiltinModules = map[string]struct{}{
"abc": {}, "aifc": {}, "antigravity": {}, "argparse": {}, "ast": {}, "asynchat": {}, "asyncio": {}, "asyncore": {}, "array": {},
"atexit": {}, "base64": {}, "bdb": {}, "binhex": {}, "bisect": {}, "builtins": {}, "bz2": {}, "cProfile": {}, "binascii": {},
"calendar": {}, "cgi": {}, "cgitb": {}, "chunk": {}, "cmd": {}, "code": {}, "codecs": {}, "codeop": {}, "cmath": {}, "audioop": {},
"collections": {}, "colorsys": {}, "compileall": {}, "concurrent": {}, "configparser": {}, "contextlib": {}, "contextvars": {}, "copy": {},
"copyreg": {}, "crypt": {}, "csv": {}, "ctypes": {}, "curses": {}, "dataclasses": {}, "datetime": {}, "dbm": {}, "fcntl": {},
"decimal": {}, "difflib": {}, "dis": {}, "distutils": {}, "doctest": {}, "email": {}, "encodings": {}, "ensurepip": {}, "ossaudiodev": {},
"enum": {}, "errno": {}, "faulthandler": {}, "filecmp": {}, "fileinput": {}, "fnmatch": {}, "formatter": {}, "fractions": {},
"ftplib": {}, "functools": {}, "gc": {}, "genericpath": {}, "getopt": {}, "getpass": {}, "gettext": {}, "glob": {}, "grp": {},
"graphlib": {}, "gzip": {}, "hashlib": {}, "heapq": {}, "hmac": {}, "html": {}, "http": {}, "imaplib": {}, "msvcrt": {},
"imghdr": {}, "imp": {}, "importlib": {}, "inspect": {}, "io": {}, "ipaddress": {}, "itertools": {}, "json": {}, "mmap": {},
"keyword": {}, "lib2to3": {}, "linecache": {}, "locale": {}, "logging": {}, "lzma": {}, "mailbox": {}, "mailcap": {}, "msilib": {},
"marshal": {}, "math": {}, "mimetypes": {}, "modulefinder": {}, "multiprocessing": {}, "netrc": {}, "nntplib": {}, "ntpath": {},
"nturl2path": {}, "numbers": {}, "opcode": {}, "operator": {}, "optparse": {}, "os": {}, "pathlib": {}, "pdb": {}, "readline": {},
"pickle": {}, "pickletools": {}, "pipes": {}, "pkgutil": {}, "platform": {}, "plistlib": {}, "poplib": {}, "posix": {}, "parser": {},
"posixpath": {}, "pprint": {}, "profile": {}, "pstats": {}, "pty": {}, "pwd": {}, "py_compile": {}, "pyclbr": {}, "spwd": {},
"pydoc": {}, "pydoc_data": {}, "queue": {}, "quopri": {}, "random": {}, "re": {}, "reprlib": {}, "rlcompleter": {}, "resource": {},
"runpy": {}, "sched": {}, "secrets": {}, "selectors": {}, "shelve": {}, "shlex": {}, "shutil": {}, "signal": {}, "select": {},
"site": {}, "smtpd": {}, "smtplib": {}, "sndhdr": {}, "socket": {}, "socketserver": {}, "sqlite3": {}, "sre_compile": {},
"sre_constants": {}, "sre_parse": {}, "ssl": {}, "stat": {}, "statistics": {}, "string": {}, "stringprep": {}, "struct": {},
"subprocess": {}, "sunau": {}, "symbol": {}, "symtable": {}, "sys": {}, "sysconfig": {}, "tabnanny": {}, "tarfile": {}, "nis": {},
"telnetlib": {}, "tempfile": {}, "textwrap": {}, "this": {}, "threading": {}, "time": {}, "timeit": {}, "tkinter": {}, "test": {},
"token": {}, "tokenize": {}, "trace": {}, "traceback": {}, "tracemalloc": {}, "tty": {}, "turtle": {}, "turtledemo": {},
"types": {}, "typing": {}, "unittest": {}, "urllib": {}, "uu": {}, "uuid": {}, "venv": {}, "warnings": {}, "termios": {},
"wave": {}, "weakref": {}, "webbrowser": {}, "wsgiref": {}, "xdrlib": {}, "xml": {}, "xmlrpc": {}, "xxsubtype": {}, "zlib": {},
"zipapp": {}, "zipfile": {}, "zipimport": {}, "zoneinfo": {}, "winreg": {}, "syslog": {}, "winsound": {}, "unicodedata": {},
}
// pythonBuiltinBlacklist is the blacklist of python built-in modules,
// see: https://www.coze.cn/open/docs/guides/code_node#7f41f073
var pythonBuiltinBlacklist = map[string]struct{}{
"curses": {},
"dbm": {},
"ensurepip": {},
"fcntl": {},
"grp": {},
"idlelib": {},
"lib2to3": {},
"msvcrt": {},
"pwd": {},
"resource": {},
"syslog": {},
"termios": {},
"tkinter": {},
"turtle": {},
"turtledemo": {},
"venv": {},
"winreg": {},
"winsound": {},
"multiprocessing": {},
"threading": {},
"socket": {},
"pty": {},
"tty": {},
}
// pythonThirdPartyWhitelist is the whitelist of python third-party modules,
// see: https://www.coze.cn/open/docs/guides/code_node#7f41f073
// If you want to use other third-party libraries, you can add them to this whitelist.
// And you also need to install them in `/scripts/setup/python.sh` and `/backend/Dockerfile` via `pip install`.
var pythonThirdPartyWhitelist = map[string]struct{}{
"requests_async": {},
"numpy": {},
}
type Config struct {
Code string
Language code.Language
OutputConfig map[string]*vo.TypeInfo
Runner code.Runner
}
type CodeRunner struct {
config *Config
importError error
}
func NewCodeRunner(ctx context.Context, cfg *Config) (*CodeRunner, error) {
if cfg == nil {
return nil, errors.New("cfg is required")
}
if cfg.Language == "" {
return nil, errors.New("language is required")
}
if cfg.Code == "" {
return nil, errors.New("code is required")
}
if cfg.Language != code.Python {
return nil, errors.New("only support python language")
}
if len(cfg.OutputConfig) == 0 {
return nil, errors.New("output config is required")
}
if cfg.Runner == nil {
return nil, errors.New("run coder is required")
}
importErr := validatePythonImports(cfg.Code)
return &CodeRunner{
config: cfg,
importError: importErr,
}, nil
}
func validatePythonImports(code string) error {
imports := parsePythonImports(code)
importErrors := make([]string, 0)
var blacklistedModules []string
var nonWhitelistedModules []string
for _, imp := range imports {
if _, ok := pythonBuiltinModules[imp]; ok {
if _, blacklisted := pythonBuiltinBlacklist[imp]; blacklisted {
blacklistedModules = append(blacklistedModules, imp)
}
} else {
if _, whitelisted := pythonThirdPartyWhitelist[imp]; !whitelisted {
nonWhitelistedModules = append(nonWhitelistedModules, imp)
}
}
}
if len(blacklistedModules) > 0 {
moduleNames := fmt.Sprintf("'%s'", strings.Join(blacklistedModules, "', '"))
importErrors = append(importErrors, fmt.Sprintf("ModuleNotFoundError: The module(s) %s are removed from the Python standard library for security reasons\n", moduleNames))
}
if len(nonWhitelistedModules) > 0 {
moduleNames := fmt.Sprintf("'%s'", strings.Join(nonWhitelistedModules, "', '"))
importErrors = append(importErrors, fmt.Sprintf("ModuleNotFoundError: No module named %s\n", moduleNames))
}
if len(importErrors) > 0 {
return errors.New(strings.Join(importErrors, ","))
}
return nil
}
func (c *CodeRunner) RunCode(ctx context.Context, input map[string]any) (ret map[string]any, err error) {
if c.importError != nil {
return nil, vo.WrapError(errno.ErrCodeExecuteFail, c.importError, errorx.KV("detail", c.importError.Error()))
}
response, err := c.config.Runner.Run(ctx, &code.RunRequest{Code: c.config.Code, Language: c.config.Language, Params: input})
if err != nil {
return nil, vo.WrapError(errno.ErrCodeExecuteFail, err, errorx.KV("detail", err.Error()))
}
result := response.Result
ctxcache.Store(ctx, coderRunnerRawOutputCtxKey, result)
output, ws, err := nodes.ConvertInputs(ctx, result, c.config.OutputConfig)
if err != nil {
return nil, vo.WrapIfNeeded(errno.ErrCodeExecuteFail, err, errorx.KV("detail", err.Error()))
}
if ws != nil && len(*ws) > 0 {
logs.CtxWarnf(ctx, "convert inputs warnings: %v", *ws)
ctxcache.Store(ctx, coderRunnerWarnErrorLevelCtxKey, *ws)
}
return output, nil
}
func (c *CodeRunner) ToCallbackOutput(ctx context.Context, output map[string]any) (*nodes.StructuredCallbackOutput, error) {
rawOutput, ok := ctxcache.Get[map[string]any](ctx, coderRunnerRawOutputCtxKey)
if !ok {
return nil, errors.New("raw output config is required")
}
var wfe vo.WorkflowError
if warnings, ok := ctxcache.Get[nodes.ConversionWarnings](ctx, coderRunnerWarnErrorLevelCtxKey); ok {
wfe = vo.WrapWarn(errno.ErrNodeOutputParseFail, warnings, errorx.KV("warnings", warnings.Error()))
}
return &nodes.StructuredCallbackOutput{
Output: output,
RawOutput: rawOutput,
Error: wfe,
},
nil
}
func parsePythonImports(code string) []string {
modules := make(map[string]struct{})
lines := strings.Split(code, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "#") {
continue
}
if matches := importRegex.FindStringSubmatch(line); len(matches) > 1 {
importedItemsStr := matches[1]
importedItems := strings.Split(importedItemsStr, ",")
for _, item := range importedItems {
item = strings.TrimSpace(item)
parts := strings.Split(item, " ")
if len(parts) > 0 {
moduleName := parts[0]
topLevelModule := strings.Split(moduleName, ".")[0]
modules[topLevelModule] = struct{}{}
}
}
continue
}
if matches := fromImportRegex.FindStringSubmatch(line); len(matches) > 1 {
fullModuleName := matches[1]
parts := strings.Split(fullModuleName, ".")
if len(parts) > 0 {
modules[parts[0]] = struct{}{}
}
}
}
return maps.Keys(modules)
}

View File

@@ -0,0 +1,262 @@
/*
* Copyright 2025 coze-dev Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package code
import (
"encoding/json"
"fmt"
"testing"
"github.com/stretchr/testify/assert"
"go.uber.org/mock/gomock"
"github.com/coze-dev/coze-studio/backend/domain/workflow/crossdomain/code"
"github.com/coze-dev/coze-studio/backend/domain/workflow/entity/vo"
"github.com/coze-dev/coze-studio/backend/domain/workflow/internal/nodes"
mockcode "github.com/coze-dev/coze-studio/backend/internal/mock/domain/workflow/crossdomain/code"
"github.com/coze-dev/coze-studio/backend/pkg/ctxcache"
)
var codeTpl string
func TestCode_RunCode(t *testing.T) {
ctrl := gomock.NewController(t)
mockRunner := mockcode.NewMockRunner(ctrl)
t.Run("normal", func(t *testing.T) {
var codeTpl = `
async def main(args:Args)->Output:
params = args.params
ret: Output = {
"key0": params['input'] + params['input'],
"key1": ["hello", "world"],
"key2": [123, "345"],
"key3": {
"key31": "hi",
"key32": "hello",
"key33": ["123","456"],
"key34": {
"key341":"123",
"key342":456,
}
},
}
return ret
`
ret := map[string]any{
"key0": int64(11231123),
"key1": []any{"hello", "world"},
"key2": []interface{}{int64(123), "345"},
"key3": map[string]interface{}{"key31": "hi", "key32": "hello", "key33": []any{"123", "456"}, "key34": map[string]interface{}{"key341": "123", "key342": int64(456)}},
"key4": []any{
map[string]any{"key41": "41"},
map[string]any{"key42": "42"},
},
}
response := &code.RunResponse{
Result: ret,
}
mockRunner.EXPECT().Run(gomock.Any(), gomock.Any()).Return(response, nil)
ctx := t.Context()
c := &CodeRunner{
config: &Config{
Language: code.Python,
Code: codeTpl,
OutputConfig: map[string]*vo.TypeInfo{
"key0": {Type: vo.DataTypeInteger},
"key1": {Type: vo.DataTypeArray, ElemTypeInfo: &vo.TypeInfo{Type: vo.DataTypeString}},
"key2": {Type: vo.DataTypeArray, ElemTypeInfo: &vo.TypeInfo{Type: vo.DataTypeNumber}},
"key3": {Type: vo.DataTypeObject, Properties: map[string]*vo.TypeInfo{
"key31": &vo.TypeInfo{Type: vo.DataTypeString},
"key32": &vo.TypeInfo{Type: vo.DataTypeString},
"key33": &vo.TypeInfo{Type: vo.DataTypeArray, ElemTypeInfo: &vo.TypeInfo{Type: vo.DataTypeNumber}},
"key34": &vo.TypeInfo{Type: vo.DataTypeObject, Properties: map[string]*vo.TypeInfo{
"key341": &vo.TypeInfo{Type: vo.DataTypeString},
"key342": &vo.TypeInfo{Type: vo.DataTypeString},
}},
},
},
"key4": {Type: vo.DataTypeArray, ElemTypeInfo: &vo.TypeInfo{Type: vo.DataTypeObject}},
},
Runner: mockRunner,
},
}
ret, err := c.RunCode(ctx, map[string]any{
"input": "1123",
})
bs, _ := json.Marshal(ret)
fmt.Println(string(bs))
assert.NoError(t, err)
assert.Equal(t, int64(11231123), ret["key0"])
assert.Equal(t, []any{"hello", "world"}, ret["key1"])
assert.Equal(t, []any{float64(123), float64(345)}, ret["key2"])
assert.Equal(t, []any{float64(123), float64(456)}, ret["key3"].(map[string]any)["key33"])
assert.Equal(t, map[string]any{"key41": "41"}, ret["key4"].([]any)[0].(map[string]any))
})
t.Run("field not in return", func(t *testing.T) {
codeTpl = `
async def main(args:Args)->Output:
params = args.params
ret: Output = {
"key0": params['input'] + params['input'],
"key1": ["hello", "world"],
"key2": [123, "345"],
"key3": {
"key31": "hi",
"key32": "hello",
"key34": {
"key341":"123"
}
},
}
return ret
`
ret := map[string]any{
"key0": int64(11231123),
"key1": []any{"hello", "world"},
"key2": []interface{}{int64(123), "345"},
"key3": map[string]interface{}{"key31": "hi", "key32": "hello", "key34": map[string]interface{}{"key341": "123"}},
}
response := &code.RunResponse{
Result: ret,
}
mockRunner.EXPECT().Run(gomock.Any(), gomock.Any()).Return(response, nil)
ctx := t.Context()
c := &CodeRunner{
config: &Config{
Code: codeTpl,
Language: code.Python,
OutputConfig: map[string]*vo.TypeInfo{
"key0": {Type: vo.DataTypeInteger},
"key1": {Type: vo.DataTypeArray, ElemTypeInfo: &vo.TypeInfo{Type: vo.DataTypeString}},
"key2": {Type: vo.DataTypeArray, ElemTypeInfo: &vo.TypeInfo{Type: vo.DataTypeNumber}},
"key3": {Type: vo.DataTypeObject, Properties: map[string]*vo.TypeInfo{
"key31": &vo.TypeInfo{Type: vo.DataTypeString},
"key32": &vo.TypeInfo{Type: vo.DataTypeString},
"key33": &vo.TypeInfo{Type: vo.DataTypeArray, ElemTypeInfo: &vo.TypeInfo{Type: vo.DataTypeNumber}},
"key34": &vo.TypeInfo{Type: vo.DataTypeObject, Properties: map[string]*vo.TypeInfo{
"key341": &vo.TypeInfo{Type: vo.DataTypeString},
"key342": &vo.TypeInfo{Type: vo.DataTypeString},
}},
}},
"key4": {Type: vo.DataTypeObject, Properties: map[string]*vo.TypeInfo{
"key31": &vo.TypeInfo{Type: vo.DataTypeString},
"key32": &vo.TypeInfo{Type: vo.DataTypeString},
"key33": &vo.TypeInfo{Type: vo.DataTypeArray, ElemTypeInfo: &vo.TypeInfo{Type: vo.DataTypeNumber}},
"key34": &vo.TypeInfo{Type: vo.DataTypeObject, Properties: map[string]*vo.TypeInfo{
"key341": &vo.TypeInfo{Type: vo.DataTypeString},
"key342": &vo.TypeInfo{Type: vo.DataTypeString},
},
}},
},
},
Runner: mockRunner,
},
}
ret, err := c.RunCode(ctx, map[string]any{
"input": "1123",
})
assert.NoError(t, err)
assert.Equal(t, int64(11231123), ret["key0"])
assert.Equal(t, []any{"hello", "world"}, ret["key1"])
assert.Equal(t, []any{float64(123), float64(345)}, ret["key2"])
assert.Equal(t, nil, ret["key4"])
assert.Equal(t, nil, ret["key3"].(map[string]any)["key33"])
})
t.Run("field convert failed", func(t *testing.T) {
codeTpl = `
async def main(args:Args)->Output:
params = args.params
ret: Output = {
"key0": params['input'] + params['input'],
"key1": ["hello", "world"],
"key2": [123, "345"],
"key3": {
"key31": "hi",
"key32": "hello",
"key34": {
"key341":"123",
"key343": ["hello", "world"],
}
},
}
return ret
`
ctx := t.Context()
ctx = ctxcache.Init(ctx)
ret := map[string]any{
"key0": int64(11231123),
"key1": []any{"hello", "world"},
"key2": []interface{}{int64(123), "345"},
"key3": map[string]interface{}{"key31": "hi", "key32": "hello", "key34": map[string]interface{}{"key341": "123", "key343": []any{"hello", "world"}}},
}
response := &code.RunResponse{
Result: ret,
}
mockRunner.EXPECT().Run(gomock.Any(), gomock.Any()).Return(response, nil)
c := &CodeRunner{
config: &Config{
Code: codeTpl,
Language: code.Python,
OutputConfig: map[string]*vo.TypeInfo{
"key0": {Type: vo.DataTypeInteger},
"key1": {Type: vo.DataTypeArray, ElemTypeInfo: &vo.TypeInfo{Type: vo.DataTypeNumber}},
"key2": {Type: vo.DataTypeArray, ElemTypeInfo: &vo.TypeInfo{Type: vo.DataTypeNumber}},
"key3": {Type: vo.DataTypeObject, Properties: map[string]*vo.TypeInfo{
"key31": &vo.TypeInfo{Type: vo.DataTypeString},
"key32": &vo.TypeInfo{Type: vo.DataTypeString},
"key33": &vo.TypeInfo{Type: vo.DataTypeArray, ElemTypeInfo: &vo.TypeInfo{Type: vo.DataTypeNumber}},
"key34": &vo.TypeInfo{Type: vo.DataTypeObject, Properties: map[string]*vo.TypeInfo{
"key341": &vo.TypeInfo{Type: vo.DataTypeString},
"key342": &vo.TypeInfo{Type: vo.DataTypeString},
"key343": &vo.TypeInfo{Type: vo.DataTypeArray, ElemTypeInfo: &vo.TypeInfo{Type: vo.DataTypeNumber}},
}},
},
},
},
Runner: mockRunner,
},
}
ret, err := c.RunCode(ctx, map[string]any{
"input": "1123",
})
assert.NoError(t, err)
assert.NoError(t, err)
assert.Equal(t, int64(11231123), ret["key0"])
assert.Equal(t, []any{float64(123), float64(345)}, ret["key2"])
warnings, ok := ctxcache.Get[nodes.ConversionWarnings](ctx, coderRunnerWarnErrorLevelCtxKey)
assert.True(t, ok)
s := warnings.Error()
assert.Contains(t, s, "field key3.key34.key343.0 is not number")
assert.Contains(t, s, "field key3.key34.key343.1 is not number")
assert.Contains(t, s, "field key1.0 is not number")
assert.Contains(t, s, "field key1.1 is not number")
})
}