diff --git a/backend/Dockerfile b/backend/Dockerfile index 4f5f6452..322e166d 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -23,7 +23,6 @@ COPY backend/ ./ RUN go build -ldflags="-s -w" -o /app/opencoze main.go - # Stage 2: Final image FROM alpine:3.22.0 @@ -32,7 +31,7 @@ WORKDIR /app # Install runtime dependencies for Go app and base for Python # pax-utils for scanelf, python3 for running Python, python3-dev for headers/shared libs # bind-tools for nslookup etc., file for debugging file types -RUN apk add --no-cache pax-utils python3 python3-dev bind-tools file +RUN apk add --no-cache pax-utils python3 python3-dev bind-tools file deno # Install Python build dependencies, create venv, install packages, then remove build deps RUN apk add --no-cache --virtual .python-build-deps build-base py3-pip git && \ @@ -48,6 +47,7 @@ RUN apk add --no-cache --virtual .python-build-deps build-base py3-pip git && \ # Remove build dependencies apk del .python-build-deps + # Copy the built Go binary from the builder stage COPY --from=builder /app/opencoze /app/opencoze COPY --from=builder /app/proxy_app /app/proxy @@ -55,6 +55,7 @@ COPY --from=builder /app/proxy_app /app/proxy # Copy Python application scripts COPY backend/infra/impl/document/parser/builtin/parse_pdf.py /app/parse_pdf.py COPY backend/infra/impl/document/parser/builtin/parse_docx.py /app/parse_docx.py +COPY backend/infra/impl/coderunner/script/sandbox.py /app/sandbox.py # Copy static resources diff --git a/backend/api/handler/coze/workflow_service_test.go b/backend/api/handler/coze/workflow_service_test.go index 1836b404..fb66d106 100644 --- a/backend/api/handler/coze/workflow_service_test.go +++ b/backend/api/handler/coze/workflow_service_test.go @@ -40,13 +40,6 @@ import ( "github.com/cloudwego/hertz/pkg/common/ut" "github.com/cloudwego/hertz/pkg/protocol" "github.com/cloudwego/hertz/pkg/protocol/sse" - "github.com/redis/go-redis/v9" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "go.uber.org/mock/gomock" - "gorm.io/driver/mysql" - "gorm.io/gorm" - modelknowledge "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge" plugin2 "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/plugin" pluginmodel "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/plugin" @@ -84,8 +77,9 @@ import ( "github.com/coze-dev/coze-studio/backend/domain/workflow/entity/vo" "github.com/coze-dev/coze-studio/backend/domain/workflow/service" "github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr" + "github.com/coze-dev/coze-studio/backend/infra/contract/coderunner" "github.com/coze-dev/coze-studio/backend/infra/impl/checkpoint" - "github.com/coze-dev/coze-studio/backend/infra/impl/coderunner" + "github.com/coze-dev/coze-studio/backend/infra/impl/coderunner/direct" mockCrossUser "github.com/coze-dev/coze-studio/backend/internal/mock/crossdomain/crossuser" mockPlugin "github.com/coze-dev/coze-studio/backend/internal/mock/domain/plugin" mockcode "github.com/coze-dev/coze-studio/backend/internal/mock/domain/workflow/crossdomain/code" @@ -99,6 +93,12 @@ import ( "github.com/coze-dev/coze-studio/backend/pkg/sonic" "github.com/coze-dev/coze-studio/backend/types/consts" "github.com/coze-dev/coze-studio/backend/types/errno" + "github.com/redis/go-redis/v9" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/mock/gomock" + "gorm.io/driver/mysql" + "gorm.io/gorm" ) type wfTestRunner struct { @@ -3636,8 +3636,8 @@ func TestNodeDebugLoop(t *testing.T) { r := newWfTestRunner(t) defer r.closeFn() runner := mockcode.NewMockRunner(r.ctrl) - runner.EXPECT().Run(gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, request *code.RunRequest) (*code.RunResponse, error) { - return &code.RunResponse{ + runner.EXPECT().Run(gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, request *coderunner.RunRequest) (*coderunner.RunResponse, error) { + return &coderunner.RunResponse{ Result: request.Params, }, nil }).AnyTimes() @@ -3959,7 +3959,7 @@ func TestCodeExceptionBranch(t *testing.T) { id := r.load("exception/code_exception_branch.json") mockey.PatchConvey("exception branch", func() { - code.SetCodeRunner(coderunner.NewRunner()) + code.SetCodeRunner(direct.NewRunner()) exeID := r.testRun(id, map[string]string{"input": "hello"}) e := r.getProcess(id, exeID) @@ -3973,7 +3973,7 @@ func TestCodeExceptionBranch(t *testing.T) { mockey.PatchConvey("normal branch", func() { mockCodeRunner := mockcode.NewMockRunner(r.ctrl) mockey.Mock(code.GetCodeRunner).Return(mockCodeRunner).Build() - mockCodeRunner.EXPECT().Run(gomock.Any(), gomock.Any()).Return(&code.RunResponse{ + mockCodeRunner.EXPECT().Run(gomock.Any(), gomock.Any()).Return(&coderunner.RunResponse{ Result: map[string]any{ "key0": "value0", "key1": []string{"value1", "value2"}, diff --git a/backend/application/application.go b/backend/application/application.go index 5822bfa5..1e14e496 100644 --- a/backend/application/application.go +++ b/backend/application/application.go @@ -282,6 +282,7 @@ func (b *basicServices) toWorkflowServiceComponents(pluginSVC *plugin.PluginAppl ModelManager: b.infra.ModelMgr, DomainNotifier: b.eventbus.resourceEventBus, CPStore: checkpoint.NewRedisStore(b.infra.CacheCli), + CodeRunner: b.infra.CodeRunner, } } diff --git a/backend/application/base/appinfra/app_infra.go b/backend/application/base/appinfra/app_infra.go index 766d13dc..2aabe24a 100644 --- a/backend/application/base/appinfra/app_infra.go +++ b/backend/application/base/appinfra/app_infra.go @@ -20,12 +20,17 @@ import ( "context" "fmt" "os" + "strconv" + "strings" "gorm.io/gorm" + "github.com/coze-dev/coze-studio/backend/infra/contract/coderunner" "github.com/coze-dev/coze-studio/backend/infra/contract/imagex" "github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr" "github.com/coze-dev/coze-studio/backend/infra/impl/cache/redis" + "github.com/coze-dev/coze-studio/backend/infra/impl/coderunner/direct" + "github.com/coze-dev/coze-studio/backend/infra/impl/coderunner/sandbox" "github.com/coze-dev/coze-studio/backend/infra/impl/es" "github.com/coze-dev/coze-studio/backend/infra/impl/eventbus" "github.com/coze-dev/coze-studio/backend/infra/impl/idgen" @@ -45,6 +50,7 @@ type AppDependencies struct { ResourceEventProducer eventbus.Producer AppEventProducer eventbus.Producer ModelMgr modelmgr.Manager + CodeRunner coderunner.Runner } func Init(ctx context.Context) (*AppDependencies, error) { @@ -93,6 +99,8 @@ func Init(ctx context.Context) (*AppDependencies, error) { return nil, err } + deps.CodeRunner = initCodeRunner() + return deps, nil } @@ -137,3 +145,40 @@ func initAppEventProducer() (eventbus.Producer, error) { return appEventProducer, nil } + +func initCodeRunner() coderunner.Runner { + switch typ := os.Getenv(consts.CodeRunnerType); typ { + case "sandbox": + getAndSplit := func(key string) []string { + v := os.Getenv(key) + if v == "" { + return nil + } + return strings.Split(v, ",") + } + config := &sandbox.Config{ + AllowEnv: getAndSplit(consts.CodeRunnerAllowEnv), + AllowRead: getAndSplit(consts.CodeRunnerAllowRead), + AllowWrite: getAndSplit(consts.CodeRunnerAllowWrite), + AllowNet: getAndSplit(consts.CodeRunnerAllowNet), + AllowRun: getAndSplit(consts.CodeRunnerAllowRun), + AllowFFI: getAndSplit(consts.CodeRunnerAllowFFI), + NodeModulesDir: os.Getenv(consts.CodeRunnerNodeModulesDir), + TimeoutSeconds: 0, + MemoryLimitMB: 0, + } + if f, err := strconv.ParseFloat(os.Getenv(consts.CodeRunnerTimeoutSeconds), 64); err == nil { + config.TimeoutSeconds = f + } else { + config.TimeoutSeconds = 60.0 + } + if mem, err := strconv.ParseInt(os.Getenv(consts.CodeRunnerMemoryLimitMB), 10, 64); err == nil { + config.MemoryLimitMB = mem + } else { + config.MemoryLimitMB = 100 + } + return sandbox.NewRunner(config) + default: + return direct.NewRunner() + } +} diff --git a/backend/application/workflow/init.go b/backend/application/workflow/init.go index 6a6d0a76..0046ea78 100644 --- a/backend/application/workflow/init.go +++ b/backend/application/workflow/init.go @@ -41,11 +41,11 @@ import ( crosssearch "github.com/coze-dev/coze-studio/backend/domain/workflow/crossdomain/search" crossvariable "github.com/coze-dev/coze-studio/backend/domain/workflow/crossdomain/variable" "github.com/coze-dev/coze-studio/backend/domain/workflow/service" + "github.com/coze-dev/coze-studio/backend/infra/contract/coderunner" "github.com/coze-dev/coze-studio/backend/infra/contract/idgen" "github.com/coze-dev/coze-studio/backend/infra/contract/imagex" "github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr" "github.com/coze-dev/coze-studio/backend/infra/contract/storage" - "github.com/coze-dev/coze-studio/backend/infra/impl/coderunner" ) type ServiceComponents struct { @@ -61,6 +61,7 @@ type ServiceComponents struct { Tos storage.Storage ImageX imagex.ImageX CPStore compose.CheckPointStore + CodeRunner coderunner.Runner } func InitService(components *ServiceComponents) *ApplicationService { @@ -75,7 +76,7 @@ func InitService(components *ServiceComponents) *ApplicationService { crossplugin.SetPluginService(wfplugin.NewPluginService(components.PluginDomainSVC, components.Tos)) crossknowledge.SetKnowledgeOperator(wfknowledge.NewKnowledgeRepository(components.KnowledgeDomainSVC, components.IDGen)) crossmodel.SetManager(wfmodel.NewModelManager(components.ModelManager, nil)) - crosscode.SetCodeRunner(coderunner.NewRunner()) + crosscode.SetCodeRunner(components.CodeRunner) crosssearch.SetNotifier(wfsearch.NewNotify(components.DomainNotifier)) SVC.DomainSVC = workflowDomainSVC diff --git a/backend/domain/workflow/crossdomain/code/code.go b/backend/domain/workflow/crossdomain/code/code.go index fee21275..32a2f29c 100644 --- a/backend/domain/workflow/crossdomain/code/code.go +++ b/backend/domain/workflow/crossdomain/code/code.go @@ -16,35 +16,16 @@ package code -import "context" - -type Language string - -const ( - Python Language = "Python" - JavaScript Language = "JavaScript" +import ( + "github.com/coze-dev/coze-studio/backend/infra/contract/coderunner" ) -type RunRequest struct { - Code string - Params map[string]any - Language Language -} -type RunResponse struct { - Result map[string]any -} - -func GetCodeRunner() Runner { +func GetCodeRunner() coderunner.Runner { return runnerImpl } -func SetCodeRunner(runner Runner) { +func SetCodeRunner(runner coderunner.Runner) { runnerImpl = runner } -var runnerImpl Runner - -//go:generate mockgen -destination ../../../../internal/mock/domain/workflow/crossdomain/code/code_mock.go --package code -source code.go -type Runner interface { - Run(ctx context.Context, request *RunRequest) (*RunResponse, error) -} +var runnerImpl coderunner.Runner diff --git a/backend/domain/workflow/internal/canvas/adaptor/canvas_test.go b/backend/domain/workflow/internal/canvas/adaptor/canvas_test.go index 2d1a3329..9722ed19 100644 --- a/backend/domain/workflow/internal/canvas/adaptor/canvas_test.go +++ b/backend/domain/workflow/internal/canvas/adaptor/canvas_test.go @@ -29,6 +29,7 @@ import ( "github.com/bytedance/mockey" "github.com/cloudwego/eino/schema" + "github.com/coze-dev/coze-studio/backend/infra/contract/coderunner" "github.com/stretchr/testify/assert" "go.uber.org/mock/gomock" @@ -746,7 +747,7 @@ func TestCodeAndPluginNodes(t *testing.T) { defer ctrl.Finish() mockCodeRunner := mockcode.NewMockRunner(ctrl) mockey.Mock(code.GetCodeRunner).Return(mockCodeRunner).Build() - mockCodeRunner.EXPECT().Run(gomock.Any(), gomock.Any()).Return(&code.RunResponse{ + mockCodeRunner.EXPECT().Run(gomock.Any(), gomock.Any()).Return(&coderunner.RunResponse{ Result: map[string]any{ "key0": "value0", "key1": []string{"value1", "value2"}, diff --git a/backend/domain/workflow/internal/canvas/adaptor/type_convert.go b/backend/domain/workflow/internal/canvas/adaptor/type_convert.go index bf7ae00f..4d87a864 100644 --- a/backend/domain/workflow/internal/canvas/adaptor/type_convert.go +++ b/backend/domain/workflow/internal/canvas/adaptor/type_convert.go @@ -23,8 +23,6 @@ import ( "strings" einoCompose "github.com/cloudwego/eino/compose" - - "github.com/coze-dev/coze-studio/backend/domain/workflow/crossdomain/code" "github.com/coze-dev/coze-studio/backend/domain/workflow/crossdomain/database" "github.com/coze-dev/coze-studio/backend/domain/workflow/crossdomain/knowledge" "github.com/coze-dev/coze-studio/backend/domain/workflow/crossdomain/model" @@ -34,6 +32,7 @@ import ( "github.com/coze-dev/coze-studio/backend/domain/workflow/internal/nodes/loop" "github.com/coze-dev/coze-studio/backend/domain/workflow/internal/nodes/qa" "github.com/coze-dev/coze-studio/backend/domain/workflow/internal/nodes/selector" + "github.com/coze-dev/coze-studio/backend/infra/contract/coderunner" "github.com/coze-dev/coze-studio/backend/pkg/lang/crypto" "github.com/coze-dev/coze-studio/backend/pkg/sonic" "github.com/coze-dev/coze-studio/backend/types/errno" @@ -1075,12 +1074,12 @@ func ConvertRetrievalSearchType(s int64) (knowledge.SearchType, error) { } } -func ConvertCodeLanguage(l int64) (code.Language, error) { +func ConvertCodeLanguage(l int64) (coderunner.Language, error) { switch l { case 5: - return code.JavaScript, nil + return coderunner.JavaScript, nil case 3: - return code.Python, nil + return coderunner.Python, nil default: return "", fmt.Errorf("invalid language: %d", l) diff --git a/backend/domain/workflow/internal/compose/to_node.go b/backend/domain/workflow/internal/compose/to_node.go index c63596ba..8581e182 100644 --- a/backend/domain/workflow/internal/compose/to_node.go +++ b/backend/domain/workflow/internal/compose/to_node.go @@ -61,6 +61,7 @@ import ( "github.com/coze-dev/coze-studio/backend/domain/workflow/internal/nodes/textprocessor" "github.com/coze-dev/coze-studio/backend/domain/workflow/internal/nodes/variableaggregator" "github.com/coze-dev/coze-studio/backend/domain/workflow/internal/nodes/variableassigner" + "github.com/coze-dev/coze-studio/backend/infra/contract/coderunner" "github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr" "github.com/coze-dev/coze-studio/backend/pkg/lang/ptr" "github.com/coze-dev/coze-studio/backend/pkg/safego" @@ -577,7 +578,7 @@ func (s *NodeSchema) ToPluginConfig() (*plugin.Config, error) { func (s *NodeSchema) ToCodeRunnerConfig() (*code.Config, error) { return &code.Config{ Code: mustGetKey[string]("Code", s.Configs), - Language: mustGetKey[crosscode.Language]("Language", s.Configs), + Language: mustGetKey[coderunner.Language]("Language", s.Configs), OutputConfig: s.OutputTypes, Runner: crosscode.GetCodeRunner(), }, nil diff --git a/backend/domain/workflow/internal/nodes/code/code.go b/backend/domain/workflow/internal/nodes/code/code.go index db4db0e2..69844f50 100644 --- a/backend/domain/workflow/internal/nodes/code/code.go +++ b/backend/domain/workflow/internal/nodes/code/code.go @@ -23,9 +23,9 @@ import ( "regexp" "strings" + "github.com/coze-dev/coze-studio/backend/infra/contract/coderunner" "golang.org/x/exp/maps" - "github.com/coze-dev/coze-studio/backend/domain/workflow/crossdomain/code" "github.com/coze-dev/coze-studio/backend/domain/workflow/entity/vo" "github.com/coze-dev/coze-studio/backend/domain/workflow/internal/nodes" "github.com/coze-dev/coze-studio/backend/pkg/ctxcache" @@ -113,9 +113,9 @@ var pythonThirdPartyWhitelist = map[string]struct{}{ type Config struct { Code string - Language code.Language + Language coderunner.Language OutputConfig map[string]*vo.TypeInfo - Runner code.Runner + Runner coderunner.Runner } type CodeRunner struct { @@ -136,7 +136,7 @@ func NewCodeRunner(ctx context.Context, cfg *Config) (*CodeRunner, error) { return nil, errors.New("code is required") } - if cfg.Language != code.Python { + if cfg.Language != coderunner.Python { return nil, errors.New("only support python language") } @@ -194,7 +194,7 @@ func (c *CodeRunner) RunCode(ctx context.Context, input map[string]any) (ret map if c.importError != nil { return nil, vo.WrapError(errno.ErrCodeExecuteFail, c.importError, errorx.KV("detail", c.importError.Error())) } - response, err := c.config.Runner.Run(ctx, &code.RunRequest{Code: c.config.Code, Language: c.config.Language, Params: input}) + response, err := c.config.Runner.Run(ctx, &coderunner.RunRequest{Code: c.config.Code, Language: c.config.Language, Params: input}) if err != nil { return nil, vo.WrapError(errno.ErrCodeExecuteFail, err, errorx.KV("detail", err.Error())) } diff --git a/backend/domain/workflow/internal/nodes/code/code_test.go b/backend/domain/workflow/internal/nodes/code/code_test.go index 971f0835..ef3754f0 100644 --- a/backend/domain/workflow/internal/nodes/code/code_test.go +++ b/backend/domain/workflow/internal/nodes/code/code_test.go @@ -21,10 +21,10 @@ import ( "fmt" "testing" + "github.com/coze-dev/coze-studio/backend/infra/contract/coderunner" "github.com/stretchr/testify/assert" "go.uber.org/mock/gomock" - "github.com/coze-dev/coze-studio/backend/domain/workflow/crossdomain/code" "github.com/coze-dev/coze-studio/backend/domain/workflow/entity/vo" "github.com/coze-dev/coze-studio/backend/domain/workflow/internal/nodes" mockcode "github.com/coze-dev/coze-studio/backend/internal/mock/domain/workflow/crossdomain/code" @@ -68,7 +68,7 @@ async def main(args:Args)->Output: }, } - response := &code.RunResponse{ + response := &coderunner.RunResponse{ Result: ret, } @@ -76,7 +76,7 @@ async def main(args:Args)->Output: ctx := t.Context() c := &CodeRunner{ config: &Config{ - Language: code.Python, + Language: coderunner.Python, Code: codeTpl, OutputConfig: map[string]*vo.TypeInfo{ "key0": {Type: vo.DataTypeInteger}, @@ -138,7 +138,7 @@ async def main(args:Args)->Output: "key3": map[string]interface{}{"key31": "hi", "key32": "hello", "key34": map[string]interface{}{"key341": "123"}}, } - response := &code.RunResponse{ + response := &coderunner.RunResponse{ Result: ret, } mockRunner.EXPECT().Run(gomock.Any(), gomock.Any()).Return(response, nil) @@ -147,7 +147,7 @@ async def main(args:Args)->Output: c := &CodeRunner{ config: &Config{ Code: codeTpl, - Language: code.Python, + Language: coderunner.Python, OutputConfig: map[string]*vo.TypeInfo{ "key0": {Type: vo.DataTypeInteger}, "key1": {Type: vo.DataTypeArray, ElemTypeInfo: &vo.TypeInfo{Type: vo.DataTypeString}}, @@ -213,7 +213,7 @@ async def main(args:Args)->Output: "key2": []interface{}{int64(123), "345"}, "key3": map[string]interface{}{"key31": "hi", "key32": "hello", "key34": map[string]interface{}{"key341": "123", "key343": []any{"hello", "world"}}}, } - response := &code.RunResponse{ + response := &coderunner.RunResponse{ Result: ret, } mockRunner.EXPECT().Run(gomock.Any(), gomock.Any()).Return(response, nil) @@ -221,7 +221,7 @@ async def main(args:Args)->Output: c := &CodeRunner{ config: &Config{ Code: codeTpl, - Language: code.Python, + Language: coderunner.Python, OutputConfig: map[string]*vo.TypeInfo{ "key0": {Type: vo.DataTypeInteger}, "key1": {Type: vo.DataTypeArray, ElemTypeInfo: &vo.TypeInfo{Type: vo.DataTypeNumber}}, diff --git a/backend/infra/contract/coderunner/code.go b/backend/infra/contract/coderunner/code.go new file mode 100644 index 00000000..18f19a61 --- /dev/null +++ b/backend/infra/contract/coderunner/code.go @@ -0,0 +1,40 @@ +/* + * Copyright 2025 coze-dev Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package coderunner + +import "context" + +type Language string + +const ( + Python Language = "Python" + JavaScript Language = "JavaScript" +) + +type RunRequest struct { + Code string + Params map[string]any + Language Language +} +type RunResponse struct { + Result map[string]any +} + +//go:generate mockgen -destination ../../../internal/mock/domain/workflow/crossdomain/code/code_mock.go --package code -source code.go +type Runner interface { + Run(ctx context.Context, request *RunRequest) (*RunResponse, error) +} diff --git a/backend/infra/impl/coderunner/runner.go b/backend/infra/impl/coderunner/direct/runner.go similarity index 81% rename from backend/infra/impl/coderunner/runner.go rename to backend/infra/impl/coderunner/direct/runner.go index 541f9fdf..76232e41 100644 --- a/backend/infra/impl/coderunner/runner.go +++ b/backend/infra/impl/coderunner/direct/runner.go @@ -14,7 +14,7 @@ * limitations under the License. */ -package coderunner +package direct import ( "bytes" @@ -22,7 +22,7 @@ import ( "fmt" "os/exec" - "github.com/coze-dev/coze-studio/backend/domain/workflow/crossdomain/code" + "github.com/coze-dev/coze-studio/backend/infra/contract/coderunner" "github.com/coze-dev/coze-studio/backend/pkg/goutil" "github.com/coze-dev/coze-studio/backend/pkg/sonic" ) @@ -50,32 +50,32 @@ except Exception as e: ` -type Runner struct{} - -func NewRunner() *Runner { - return &Runner{} +func NewRunner() coderunner.Runner { + return &runner{} } -func (r *Runner) Run(ctx context.Context, request *code.RunRequest) (*code.RunResponse, error) { +type runner struct{} + +func (r *runner) Run(ctx context.Context, request *coderunner.RunRequest) (*coderunner.RunResponse, error) { var ( params = request.Params c = request.Code ) - if request.Language == code.Python { + if request.Language == coderunner.Python { ret, err := r.pythonCmdRun(ctx, c, params) if err != nil { return nil, err } - return &code.RunResponse{ + return &coderunner.RunResponse{ Result: ret, }, nil } return nil, fmt.Errorf("unsupported language: %s", request.Language) } -func (r *Runner) pythonCmdRun(_ context.Context, code string, params map[string]any) (map[string]any, error) { +func (r *runner) pythonCmdRun(_ context.Context, code string, params map[string]any) (map[string]any, error) { bs, _ := sonic.Marshal(params) - cmd := exec.Command(goutil.GetPython3Path(), "-c", fmt.Sprintf(pythonCode, code), string(bs)) //ignore_security_alert RCE + cmd := exec.Command(goutil.GetPython3Path(), "-c", fmt.Sprintf(pythonCode, code), string(bs)) // ignore_security_alert RCE stdout := new(bytes.Buffer) stderr := new(bytes.Buffer) cmd.Stdout = stdout diff --git a/backend/infra/impl/coderunner/sandbox/runner.go b/backend/infra/impl/coderunner/sandbox/runner.go new file mode 100644 index 00000000..fd8bd5f7 --- /dev/null +++ b/backend/infra/impl/coderunner/sandbox/runner.go @@ -0,0 +1,103 @@ +package sandbox + +import ( + "context" + "encoding/json" + "fmt" + "os" + "os/exec" + + "github.com/coze-dev/coze-studio/backend/infra/contract/coderunner" + "github.com/coze-dev/coze-studio/backend/pkg/goutil" + "github.com/coze-dev/coze-studio/backend/pkg/logs" +) + +func NewRunner(config *Config) coderunner.Runner { + return &runner{ + pyPath: goutil.GetPython3Path(), + scriptPath: goutil.GetPythonFilePath("sandbox.py"), + config: config, + } +} + +type Config struct { + AllowEnv []string `json:"allow_env,omitempty"` + AllowRead []string `json:"allow_read,omitempty"` + AllowWrite []string `json:"allow_write,omitempty"` + AllowNet []string `json:"allow_net,omitempty"` + AllowRun []string `json:"allow_run,omitempty"` + AllowFFI []string `json:"allow_ffi,omitempty"` + NodeModulesDir string `json:"node_modules_dir,omitempty"` + TimeoutSeconds float64 `json:"timeout_seconds,omitempty"` + MemoryLimitMB int64 `json:"memory_limit_mb,omitempty"` +} + +type runner struct { + pyPath, scriptPath string + config *Config +} + +func (runner *runner) Run(ctx context.Context, request *coderunner.RunRequest) (*coderunner.RunResponse, error) { + if request.Language == coderunner.JavaScript { + return nil, fmt.Errorf("js not supported yet") + } + b, err := json.Marshal(req{ + Config: runner.config, + Code: request.Code, + Params: request.Params, + }) + if err != nil { + return nil, err + } + pr, pw, err := os.Pipe() + if err != nil { + return nil, err + } + r, w, err := os.Pipe() + if err != nil { + return nil, err + } + if _, err = pw.Write(b); err != nil { + return nil, err + } + if err = pw.Close(); err != nil { + return nil, err + } + cmd := exec.Command(runner.pyPath, runner.scriptPath) + cmd.ExtraFiles = []*os.File{w, pr} + if err = cmd.Start(); err != nil { + return nil, err + } + if err = w.Close(); err != nil { + return nil, err + } + result := &resp{} + d := json.NewDecoder(r) + d.UseNumber() + if err = d.Decode(result); err != nil { + return nil, err + } + if err = cmd.Wait(); err != nil { + return nil, err + } + logs.CtxDebugf(ctx, "resp=%v\n", result) + if result.Status != "success" { + return nil, fmt.Errorf("exec failed, stdout=%s, stderr=%s, sandbox_err=%s", result.Stdout, result.Stderr, result.SandboxError) + } + return &coderunner.RunResponse{Result: result.Result}, nil +} + +type req struct { + Config *Config `json:"config"` + Code string `json:"code"` + Params map[string]any `json:"params"` +} + +type resp struct { + Result map[string]any `json:"result"` + Stdout string `json:"stdout"` + Stderr string `json:"stderr"` + Status string `json:"status"` + ExecutionTime float64 `json:"execution_time"` + SandboxError string `json:"sandbox_error"` +} diff --git a/backend/infra/impl/coderunner/script/sandbox.py b/backend/infra/impl/coderunner/script/sandbox.py new file mode 100644 index 00000000..28ee898b --- /dev/null +++ b/backend/infra/impl/coderunner/script/sandbox.py @@ -0,0 +1,214 @@ +import os +import dataclasses +import json +import subprocess +import time +from typing import Dict, Literal + +Status = Literal["success", "error"] + +PKG_NAME = "jsr:@langchain/pyodide-sandbox@0.0.4" + +@dataclasses.dataclass(kw_only=True) +class Output: + result: Dict = None + stdout: str | None = None + stderr: str | None = None + status: Status + execution_time: float + +def build_permission_flag( + flag: str, + *, + value: bool | list[str], +) -> str | None: + if value is True: + return flag + if isinstance(value, list) and value: + return f"{flag}={','.join(value)}" + return None + + +class Sandbox: + def __init__( + self, + *, + allow_env: list[str] | bool = False, + allow_read: list[str] | bool = False, + allow_write: list[str] | bool = False, + allow_net: list[str] | bool = False, + allow_run: list[str] | bool = False, + allow_ffi: list[str] | bool = False, + node_modules_dir: str = "auto", + **kwargs + ) -> None: + self.permissions = [] + + perm_defs = [ + ("--allow-env", allow_env, None), + ("--allow-read", allow_read, ["node_modules"]), + ("--allow-write", allow_write, ["node_modules"]), + ("--allow-net", allow_net, None), + ("--allow-run", allow_run, None), + ("--allow-ffi", allow_ffi, None), + ] + + self.permissions = [] + for flag, value, defaults in perm_defs: + perm = build_permission_flag(flag, value=value) + if perm is None and defaults is not None: + default_value = ",".join(defaults) + perm = f"{flag}={default_value}" + if perm: + self.permissions.append(perm) + + self.permissions.append(f"--node-modules-dir={node_modules_dir}") + + def _build_command( + self, + code: str, + *, + session_bytes: bytes | None = None, + session_metadata: dict | None = None, + memory_limit_mb: int | None = 100, + **kwargs + ) -> list[str]: + cmd = [ + "deno", + "run", + ] + + cmd.extend(self.permissions) + + v8_flags = ["--experimental-wasm-stack-switching"] + + if memory_limit_mb is not None and memory_limit_mb > 0: + v8_flags.append(f"--max-old-space-size={memory_limit_mb}") + + cmd.append(f"--v8-flags={','.join(v8_flags)}") + + cmd.append(PKG_NAME) + + cmd.extend(["--code", code]) + + if session_bytes: + bytes_array = list(session_bytes) + cmd.extend(["--session-bytes", json.dumps(bytes_array)]) + + if session_metadata: + cmd.extend(["--session-metadata", json.dumps(session_metadata)]) + + return cmd + + def execute( + self, + code: str, + *, + session_bytes: bytes | None = None, + session_metadata: dict | None = None, + timeout_seconds: float | None = None, + memory_limit_mb: int | None = None, + **kwargs + ) -> Output: + start_time = time.time() + stdout = "" + result = None + stderr: str + status: Literal["success", "error"] + cmd = self._build_command( + code, + session_bytes=session_bytes, + session_metadata=session_metadata, + memory_limit_mb=memory_limit_mb, + ) + + try: + process = subprocess.run( + cmd, + capture_output=True, + text=False, + timeout=timeout_seconds, + check=False, + ) + + stdout_bytes = process.stdout + stderr_bytes = process.stderr + + stdout = stdout_bytes.decode("utf-8", errors="replace") + + if stdout: + full_result = json.loads(stdout) + stdout = full_result.get("stdout", None) + stderr = full_result.get("stderr", None) + result = full_result.get("result", None) + status = "success" if full_result.get("success", False) else "error" + else: + stderr = stderr_bytes.decode("utf-8", errors="replace") + status = "error" + + except subprocess.TimeoutExpired: + status = "error" + stderr = f"Execution timed out after {timeout_seconds} seconds" + + end_time = time.time() + + return Output( + status=status, + execution_time=end_time - start_time, + stdout=stdout or None, + stderr=stderr or None, + result=result, + ) + + +prefix = """\ +import json +import sys +import asyncio +class Args: + def __init__(self, params): + self.params = params + +class Output(dict): + pass + +args = {} + +""" + +suffix = """\ + +result = None +try: + result = asyncio.run(main(Args(args))) +except Exception as e: + print(f"{type(e).__name__}: {str(e)}", file=sys.stderr) + sys.exit(1) +result +""" + + +if __name__ == "__main__": + w = os.fdopen(3, "wb", ) + r = os.fdopen(4, "rb", ) + + try: + req = json.load(r) + user_code, params, config = req["code"], req["params"], req["config"] or {} + sandbox = Sandbox(**config) + + if params is not None: + code = prefix + f'args={json.dumps(params)}\n' + user_code + suffix + else: + code = prefix + user_code + suffix + + resp = sandbox.execute(code, **config) + result = json.dumps(dataclasses.asdict(resp), ensure_ascii=False) + w.write(str.encode(result)) + w.flush() + w.close() + except Exception as e: + print("sandbox exec error", e) + w.write(str.encode(json.dumps({"sandbox_error": str(e)}))) + w.flush() + w.close() \ No newline at end of file diff --git a/backend/internal/mock/domain/workflow/crossdomain/code/code_mock.go b/backend/internal/mock/domain/workflow/crossdomain/code/code_mock.go index 35085b4b..f7d6b079 100644 --- a/backend/internal/mock/domain/workflow/crossdomain/code/code_mock.go +++ b/backend/internal/mock/domain/workflow/crossdomain/code/code_mock.go @@ -1,5 +1,10 @@ // Code generated by MockGen. DO NOT EDIT. // Source: code.go +// +// Generated by this command: +// +// mockgen -destination ../../../internal/mock/domain/workflow/crossdomain/code/code_mock.go --package code -source code.go +// // Package code is a generated GoMock package. package code @@ -8,9 +13,8 @@ import ( context "context" reflect "reflect" + coderunner "github.com/coze-dev/coze-studio/backend/infra/contract/coderunner" gomock "go.uber.org/mock/gomock" - - code "github.com/coze-dev/coze-studio/backend/domain/workflow/crossdomain/code" ) // MockRunner is a mock of Runner interface. @@ -37,16 +41,16 @@ func (m *MockRunner) EXPECT() *MockRunnerMockRecorder { } // Run mocks base method. -func (m *MockRunner) Run(ctx context.Context, request *code.RunRequest) (*code.RunResponse, error) { +func (m *MockRunner) Run(ctx context.Context, request *coderunner.RunRequest) (*coderunner.RunResponse, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "Run", ctx, request) - ret0, _ := ret[0].(*code.RunResponse) + ret0, _ := ret[0].(*coderunner.RunResponse) ret1, _ := ret[1].(error) return ret0, ret1 } // Run indicates an expected call of Run. -func (mr *MockRunnerMockRecorder) Run(ctx, request interface{}) *gomock.Call { +func (mr *MockRunnerMockRecorder) Run(ctx, request any) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Run", reflect.TypeOf((*MockRunner)(nil).Run), ctx, request) } diff --git a/backend/types/consts/consts.go b/backend/types/consts/consts.go index 732ec7c4..2b3cd4bb 100644 --- a/backend/types/consts/consts.go +++ b/backend/types/consts/consts.go @@ -65,6 +65,17 @@ const ( SessionDataKeyInCtx = "session_data_key_in_ctx" OpenapiAuthKeyInCtx = "openapi_auth_key_in_ctx" + + CodeRunnerType = "CODE_RUNNER_TYPE" + CodeRunnerAllowEnv = "CODE_RUNNER_ALLOW_ENV" + CodeRunnerAllowRead = "CODE_RUNNER_ALLOW_READ" + CodeRunnerAllowWrite = "CODE_RUNNER_ALLOW_WRITE" + CodeRunnerAllowNet = "CODE_RUNNER_ALLOW_NET" + CodeRunnerAllowRun = "CODE_RUNNER_ALLOW_RUN" + CodeRunnerAllowFFI = "CODE_RUNNER_ALLOW_FFI" + CodeRunnerNodeModulesDir = "CODE_RUNNER_NODE_MODULES_DIR" + CodeRunnerTimeoutSeconds = "CODE_RUNNER_TIMEOUT_SECONDS" + CodeRunnerMemoryLimitMB = "CODE_RUNNER_MEMORY_LIMIT_MB" ) const ( diff --git a/docker/.env.example b/docker/.env.example index 4c0c6711..4efd0798 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -163,3 +163,31 @@ export BUILTIN_CM_GEMINI_PROJECT="" export BUILTIN_CM_GEMINI_LOCATION="" export BUILTIN_CM_GEMINI_BASE_URL="" export BUILTIN_CM_GEMINI_MODEL="" + + +# Workflow Code Runner Configuration +# Supported code runner types: sandbox / local +# Default using local +# - sandbox: execute python code in a sandboxed env with deno + pyodide +# - local: using venv, no env isolation +export CODE_RUNNER_TYPE="local" +# Sandbox sub configuration +# Access restricted to specific environment variables, split with comma, e.g. "PATH,USERNAME" +export CODE_RUNNER_ALLOW_ENV="" +# Read access restricted to specific paths, split with comma, e.g. "/tmp,./data" +export CODE_RUNNER_ALLOW_READ="" +# Write access restricted to specific paths, split with comma, e.g. "/tmp,./data" +export CODE_RUNNER_ALLOW_WRITE="" +# Subprocess execution restricted to specific commands, split with comma, e.g. "python,git" +export CODE_RUNNER_ALLOW_RUN="" +# Network access restricted to specific domains/IPs, split with comma, e.g. "api.test.com,api.test.org:8080" +# The following CDN supports downloading the packages required for pyodide to run Python code. Sandbox may not work properly if removed. +export CODE_RUNNER_ALLOW_NET="cdn.jsdelivr.net" +# Foreign Function Interface access to specific libraries, split with comma, e.g. "/usr/lib/libm.so" +export CODE_RUNNER_ALLOW_FFI="" +# Directory for deno modules, default using pwd. e.g. "/tmp/path/node_modules" +export CODE_RUNNER_NODE_MODULES_DIR="" +# Code execution timeout, default 60 seconds. e.g. "2.56" +export CODE_RUNNER_TIMEOUT_SECONDS="" +# Code execution memory limit, default 100MB. e.g. "256" +export CODE_RUNNER_MEMORY_LIMIT_MB="" \ No newline at end of file diff --git a/scripts/setup/python.sh b/scripts/setup/python.sh index 90e09be4..7af97ab6 100755 --- a/scripts/setup/python.sh +++ b/scripts/setup/python.sh @@ -60,6 +60,7 @@ deactivate PARSER_SCRIPT_ROOT="$BACKEND_DIR/infra/impl/document/parser/builtin" PDF_PARSER="$PARSER_SCRIPT_ROOT/parse_pdf.py" DOCX_PARSER="$PARSER_SCRIPT_ROOT/parse_docx.py" +WORKFLOW_SANBOX="$BACKEND_DIR/infra/impl/coderunner/script/sandbox.py" if [ -f "$PDF_PARSER" ]; then cp "$PDF_PARSER" "$BIN_DIR/parse_pdf.py" @@ -75,7 +76,9 @@ else exit 1 fi - - - - +if [ -f "$WORKFLOW_SANBOX" ]; then + cp "$WORKFLOW_SANBOX" "$BIN_DIR/sandbox.py" +else + echo "❌ $WORKFLOW_SANBOX file not found" + exit 1 +fi \ No newline at end of file diff --git a/scripts/setup/server.sh b/scripts/setup/server.sh index 86487886..44181e42 100755 --- a/scripts/setup/server.sh +++ b/scripts/setup/server.sh @@ -7,6 +7,15 @@ BIN_DIR="$BASE_DIR/bin" CONFIG_DIR="$BIN_DIR/resources/conf" RESOURCES_DIR="$BIN_DIR/resources/" DOCKER_DIR="$BASE_DIR/docker" +source "$DOCKER_DIR/.env" + +if [[ "$CODE_RUNNER_TYPE" == "sandbox" ]] && ! command -v deno &> /dev/null; then + echo "deno is not installed, installing now..." + curl -fsSL https://deno.land/install.sh | sh + export PATH="$HOME/.deno/bin:$PATH" +fi + +echo "🧹 Checking for sandbo availability..." echo "🧹 Checking for goimports availability..."