feat: py sandbox for workflow

* chore: update Dockerfile and sandbox.py
* feat: py sandbox for workflow
* feat: py sandbox for workflow

See merge request: !885
This commit is contained in:
徐兆楠
2025-07-25 07:17:25 +00:00
parent e8686379b2
commit 3749abdea0
20 changed files with 521 additions and 79 deletions

View File

@@ -0,0 +1,40 @@
/*
* Copyright 2025 coze-dev Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package coderunner
import "context"
type Language string
const (
Python Language = "Python"
JavaScript Language = "JavaScript"
)
type RunRequest struct {
Code string
Params map[string]any
Language Language
}
type RunResponse struct {
Result map[string]any
}
//go:generate mockgen -destination ../../../internal/mock/domain/workflow/crossdomain/code/code_mock.go --package code -source code.go
type Runner interface {
Run(ctx context.Context, request *RunRequest) (*RunResponse, error)
}

View File

@@ -14,7 +14,7 @@
* limitations under the License.
*/
package coderunner
package direct
import (
"bytes"
@@ -22,7 +22,7 @@ import (
"fmt"
"os/exec"
"github.com/coze-dev/coze-studio/backend/domain/workflow/crossdomain/code"
"github.com/coze-dev/coze-studio/backend/infra/contract/coderunner"
"github.com/coze-dev/coze-studio/backend/pkg/goutil"
"github.com/coze-dev/coze-studio/backend/pkg/sonic"
)
@@ -50,32 +50,32 @@ except Exception as e:
`
type Runner struct{}
func NewRunner() *Runner {
return &Runner{}
func NewRunner() coderunner.Runner {
return &runner{}
}
func (r *Runner) Run(ctx context.Context, request *code.RunRequest) (*code.RunResponse, error) {
type runner struct{}
func (r *runner) Run(ctx context.Context, request *coderunner.RunRequest) (*coderunner.RunResponse, error) {
var (
params = request.Params
c = request.Code
)
if request.Language == code.Python {
if request.Language == coderunner.Python {
ret, err := r.pythonCmdRun(ctx, c, params)
if err != nil {
return nil, err
}
return &code.RunResponse{
return &coderunner.RunResponse{
Result: ret,
}, nil
}
return nil, fmt.Errorf("unsupported language: %s", request.Language)
}
func (r *Runner) pythonCmdRun(_ context.Context, code string, params map[string]any) (map[string]any, error) {
func (r *runner) pythonCmdRun(_ context.Context, code string, params map[string]any) (map[string]any, error) {
bs, _ := sonic.Marshal(params)
cmd := exec.Command(goutil.GetPython3Path(), "-c", fmt.Sprintf(pythonCode, code), string(bs)) //ignore_security_alert RCE
cmd := exec.Command(goutil.GetPython3Path(), "-c", fmt.Sprintf(pythonCode, code), string(bs)) // ignore_security_alert RCE
stdout := new(bytes.Buffer)
stderr := new(bytes.Buffer)
cmd.Stdout = stdout

View File

@@ -0,0 +1,103 @@
package sandbox
import (
"context"
"encoding/json"
"fmt"
"os"
"os/exec"
"github.com/coze-dev/coze-studio/backend/infra/contract/coderunner"
"github.com/coze-dev/coze-studio/backend/pkg/goutil"
"github.com/coze-dev/coze-studio/backend/pkg/logs"
)
func NewRunner(config *Config) coderunner.Runner {
return &runner{
pyPath: goutil.GetPython3Path(),
scriptPath: goutil.GetPythonFilePath("sandbox.py"),
config: config,
}
}
type Config struct {
AllowEnv []string `json:"allow_env,omitempty"`
AllowRead []string `json:"allow_read,omitempty"`
AllowWrite []string `json:"allow_write,omitempty"`
AllowNet []string `json:"allow_net,omitempty"`
AllowRun []string `json:"allow_run,omitempty"`
AllowFFI []string `json:"allow_ffi,omitempty"`
NodeModulesDir string `json:"node_modules_dir,omitempty"`
TimeoutSeconds float64 `json:"timeout_seconds,omitempty"`
MemoryLimitMB int64 `json:"memory_limit_mb,omitempty"`
}
type runner struct {
pyPath, scriptPath string
config *Config
}
func (runner *runner) Run(ctx context.Context, request *coderunner.RunRequest) (*coderunner.RunResponse, error) {
if request.Language == coderunner.JavaScript {
return nil, fmt.Errorf("js not supported yet")
}
b, err := json.Marshal(req{
Config: runner.config,
Code: request.Code,
Params: request.Params,
})
if err != nil {
return nil, err
}
pr, pw, err := os.Pipe()
if err != nil {
return nil, err
}
r, w, err := os.Pipe()
if err != nil {
return nil, err
}
if _, err = pw.Write(b); err != nil {
return nil, err
}
if err = pw.Close(); err != nil {
return nil, err
}
cmd := exec.Command(runner.pyPath, runner.scriptPath)
cmd.ExtraFiles = []*os.File{w, pr}
if err = cmd.Start(); err != nil {
return nil, err
}
if err = w.Close(); err != nil {
return nil, err
}
result := &resp{}
d := json.NewDecoder(r)
d.UseNumber()
if err = d.Decode(result); err != nil {
return nil, err
}
if err = cmd.Wait(); err != nil {
return nil, err
}
logs.CtxDebugf(ctx, "resp=%v\n", result)
if result.Status != "success" {
return nil, fmt.Errorf("exec failed, stdout=%s, stderr=%s, sandbox_err=%s", result.Stdout, result.Stderr, result.SandboxError)
}
return &coderunner.RunResponse{Result: result.Result}, nil
}
type req struct {
Config *Config `json:"config"`
Code string `json:"code"`
Params map[string]any `json:"params"`
}
type resp struct {
Result map[string]any `json:"result"`
Stdout string `json:"stdout"`
Stderr string `json:"stderr"`
Status string `json:"status"`
ExecutionTime float64 `json:"execution_time"`
SandboxError string `json:"sandbox_error"`
}

View File

@@ -0,0 +1,214 @@
import os
import dataclasses
import json
import subprocess
import time
from typing import Dict, Literal
Status = Literal["success", "error"]
PKG_NAME = "jsr:@langchain/pyodide-sandbox@0.0.4"
@dataclasses.dataclass(kw_only=True)
class Output:
result: Dict = None
stdout: str | None = None
stderr: str | None = None
status: Status
execution_time: float
def build_permission_flag(
flag: str,
*,
value: bool | list[str],
) -> str | None:
if value is True:
return flag
if isinstance(value, list) and value:
return f"{flag}={','.join(value)}"
return None
class Sandbox:
def __init__(
self,
*,
allow_env: list[str] | bool = False,
allow_read: list[str] | bool = False,
allow_write: list[str] | bool = False,
allow_net: list[str] | bool = False,
allow_run: list[str] | bool = False,
allow_ffi: list[str] | bool = False,
node_modules_dir: str = "auto",
**kwargs
) -> None:
self.permissions = []
perm_defs = [
("--allow-env", allow_env, None),
("--allow-read", allow_read, ["node_modules"]),
("--allow-write", allow_write, ["node_modules"]),
("--allow-net", allow_net, None),
("--allow-run", allow_run, None),
("--allow-ffi", allow_ffi, None),
]
self.permissions = []
for flag, value, defaults in perm_defs:
perm = build_permission_flag(flag, value=value)
if perm is None and defaults is not None:
default_value = ",".join(defaults)
perm = f"{flag}={default_value}"
if perm:
self.permissions.append(perm)
self.permissions.append(f"--node-modules-dir={node_modules_dir}")
def _build_command(
self,
code: str,
*,
session_bytes: bytes | None = None,
session_metadata: dict | None = None,
memory_limit_mb: int | None = 100,
**kwargs
) -> list[str]:
cmd = [
"deno",
"run",
]
cmd.extend(self.permissions)
v8_flags = ["--experimental-wasm-stack-switching"]
if memory_limit_mb is not None and memory_limit_mb > 0:
v8_flags.append(f"--max-old-space-size={memory_limit_mb}")
cmd.append(f"--v8-flags={','.join(v8_flags)}")
cmd.append(PKG_NAME)
cmd.extend(["--code", code])
if session_bytes:
bytes_array = list(session_bytes)
cmd.extend(["--session-bytes", json.dumps(bytes_array)])
if session_metadata:
cmd.extend(["--session-metadata", json.dumps(session_metadata)])
return cmd
def execute(
self,
code: str,
*,
session_bytes: bytes | None = None,
session_metadata: dict | None = None,
timeout_seconds: float | None = None,
memory_limit_mb: int | None = None,
**kwargs
) -> Output:
start_time = time.time()
stdout = ""
result = None
stderr: str
status: Literal["success", "error"]
cmd = self._build_command(
code,
session_bytes=session_bytes,
session_metadata=session_metadata,
memory_limit_mb=memory_limit_mb,
)
try:
process = subprocess.run(
cmd,
capture_output=True,
text=False,
timeout=timeout_seconds,
check=False,
)
stdout_bytes = process.stdout
stderr_bytes = process.stderr
stdout = stdout_bytes.decode("utf-8", errors="replace")
if stdout:
full_result = json.loads(stdout)
stdout = full_result.get("stdout", None)
stderr = full_result.get("stderr", None)
result = full_result.get("result", None)
status = "success" if full_result.get("success", False) else "error"
else:
stderr = stderr_bytes.decode("utf-8", errors="replace")
status = "error"
except subprocess.TimeoutExpired:
status = "error"
stderr = f"Execution timed out after {timeout_seconds} seconds"
end_time = time.time()
return Output(
status=status,
execution_time=end_time - start_time,
stdout=stdout or None,
stderr=stderr or None,
result=result,
)
prefix = """\
import json
import sys
import asyncio
class Args:
def __init__(self, params):
self.params = params
class Output(dict):
pass
args = {}
"""
suffix = """\
result = None
try:
result = asyncio.run(main(Args(args)))
except Exception as e:
print(f"{type(e).__name__}: {str(e)}", file=sys.stderr)
sys.exit(1)
result
"""
if __name__ == "__main__":
w = os.fdopen(3, "wb", )
r = os.fdopen(4, "rb", )
try:
req = json.load(r)
user_code, params, config = req["code"], req["params"], req["config"] or {}
sandbox = Sandbox(**config)
if params is not None:
code = prefix + f'args={json.dumps(params)}\n' + user_code + suffix
else:
code = prefix + user_code + suffix
resp = sandbox.execute(code, **config)
result = json.dumps(dataclasses.asdict(resp), ensure_ascii=False)
w.write(str.encode(result))
w.flush()
w.close()
except Exception as e:
print("sandbox exec error", e)
w.write(str.encode(json.dumps({"sandbox_error": str(e)})))
w.flush()
w.close()