feat: manually mirror opencoze's code from bytedance

Change-Id: I09a73aadda978ad9511264a756b2ce51f5761adf
2025-07-20 17:36:12 +08:00
commit 890153324f
14811 changed files with 1923430 additions and 0 deletions
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -0,0 +1,164 @@
+# Server
+export LISTEN_ADDR=":8888"
+export LOG_LEVEL="debug"
+export MAX_REQUEST_BODY_SIZE=1073741824
+export SERVER_HOST="localhost${LISTEN_ADDR}"
+export MINIO_PROXY_ENDPOINT=":8889"
+
+# MySQL
+export MYSQL_ROOT_PASSWORD=root
+export MYSQL_DATABASE=opencoze
+export MYSQL_USER=coze
+export MYSQL_PASSWORD=coze123
+export MYSQL_HOST=localhost
+export MYSQL_PORT=3306
+export MYSQL_DSN="${MYSQL_USER}:${MYSQL_PASSWORD}@tcp(${MYSQL_HOST}:${MYSQL_PORT})/${MYSQL_DATABASE}?charset=utf8mb4&parseTime=True"
+export ATLAS_URL="mysql://${MYSQL_USER}:${MYSQL_PASSWORD}@${MYSQL_HOST}:${MYSQL_PORT}/${MYSQL_DATABASE}?charset=utf8mb4&parseTime=True"
+
+# Redis
+export REDIS_AOF_ENABLED=no
+export REDIS_IO_THREADS=4
+export ALLOW_EMPTY_PASSWORD=yes
+export REDIS_ADDR="localhost:6379"
+
+# This Upload component used in Agent / workflow File/Image With LLM  , support the component of imagex / storage
+# default: storage, use the settings of storage component
+# if imagex, you must finish the configuration of <VolcEngine ImageX> 
+export FILE_UPLOAD_COMPONENT_TYPE="storage"
+
+# VolcEngine ImageX
+export VE_IMAGEX_AK=""
+export VE_IMAGEX_SK=""
+export VE_IMAGEX_SERVER_ID=""
+export VE_IMAGEX_DOMAIN=""
+export VE_IMAGEX_TEMPLATE=""
+export VE_IMAGEX_UPLOAD_HOST="https://imagex.volcengineapi.com"
+
+# Storage component 
+export STORAGE_TYPE="minio" # minio / tos
+export STORAGE_BUCKET="opencoze"
+# MiniIO
+export MINIO_ROOT_USER=minioadmin
+export MINIO_ROOT_PASSWORD=minioadmin123
+export MINIO_DEFAULT_BUCKETS=milvus
+export MINIO_AK=$MINIO_ROOT_USER
+export MINIO_SK=$MINIO_ROOT_PASSWORD
+export MINIO_ENDPOINT="localhost:9000"
+export MINIO_API_HOST="http://${MINIO_ENDPOINT}"
+
+# TOS
+export TOS_ACCESS_KEY=
+export TOS_SECRET_KEY=
+export TOS_ENDPOINT=https://tos-cn-beijing.volces.com
+export TOS_REGION=cn-beijing
+
+# Elasticsearch
+export ES_ADDR="http://localhost:9200"
+export ES_VERSION="v8"
+export ES_USERNAME=""
+export ES_PASSWORD=""
+
+
+export COZE_MQ_TYPE="nsq" # nsq / kafka / rmq
+export MQ_NAME_SERVER="127.0.0.1:4150"
+# RocketMQ
+export RMQ_ACCESS_KEY=""
+export RMQ_SECRET_KEY=""
+
+# Settings for VectorStore
+# VectorStore type: milvus / vikingdb
+# If you want to use vikingdb, you need to set up the vikingdb configuration.
+export VECTOR_STORE_TYPE="milvus"
+# milvus vector store
+export MILVUS_ADDR="localhost:19530"
+# vikingdb vector store for Volcengine
+export VIKING_DB_HOST=""
+export VIKING_DB_REGION=""
+export VIKING_DB_AK=""
+export VIKING_DB_SK=""
+export VIKING_DB_SCHEME=""
+export VIKING_DB_MODEL_NAME="" # if vikingdb model name is not set, you need to set Embedding settings
+
+# Settings for Embedding
+# The Embedding model relied on by knowledge base vectorization does not need to be configured
+# if the vector database comes with built-in Embedding functionality (such as VikingDB). Currently,
+# Coze Studio supports three access methods: openai, ark, and custom http. Users can simply choose one of them when using
+# embedding type: openai / ark / http
+export EMBEDDING_TYPE="ark"
+# openai embedding
+export OPENAI_EMBEDDING_BASE_URL=""    # (string) OpenAI base_url
+export OPENAI_EMBEDDING_MODEL=""       # (string) OpenAI embedding model
+export OPENAI_EMBEDDING_API_KEY=""     # (string) OpenAI api_key
+export OPENAI_EMBEDDING_BY_AZURE=true  # (bool) OpenAI by_azure
+export OPENAI_EMBEDDING_API_VERSION="" # OpenAI azure api version
+export OPENAI_EMBEDDING_DIMS=1024      # (int) 向量维度
+export OPENAI_EMBEDDING_REQUEST_DIMS=0
+
+# ark embedding
+export ARK_EMBEDDING_MODEL=""
+export ARK_EMBEDDING_AK=""
+export ARK_EMBEDDING_DIMS="2048"
+export ARK_EMBEDDING_BASE_URL=""
+
+
+# http embedding
+export HTTP_EMBEDDING_ADDR="http://127.0.0.1:6543"
+
+# Settings for OCR
+# If you want to use the OCR-related functions in the knowledge base feature，You need to set up the OCR configuration.
+# Currently, Coze Studio has built-in Volcano OCR.
+# ocr_type: default type `ve`
+export OCR_TYPE="ve"
+# ve ocr
+export VE_OCR_AK=""
+export VE_OCR_SK=""
+
+# Settings for Model
+# Model for agent & workflow
+# add suffix number to add different models
+export MODEL_PROTOCOL_0="ark"       # protocol
+export MODEL_OPENCOZE_ID_0="100001" # id for record
+export MODEL_NAME_0=""              # model name for show
+export MODEL_ID_0=""                # model name for connection
+export MODEL_API_KEY_0=""           # model api key
+export MODEL_BASE_URL_0=""           # model base url
+
+# Model for knowledge nl2sql, messages2query (rewrite), image annotation
+# add prefix to assign specific model, downgrade to default config when prefix is not configured:
+# 1. nl2sql:            NL2SQL_ (e.g. NL2SQL_BUILTIN_CM_TYPE)
+# 2. messages2query:    M2Q_    (e.g. M2Q_BUILTIN_CM_TYPE)
+# 3. image annotation:  IA_     (e.g. IA_BUILTIN_CM_TYPE)
+# supported chat model type: openai / ark / deepseek / ollama / qwen / gemini
+export BUILTIN_CM_TYPE="ark"
+# type openai
+export BUILTIN_CM_OPENAI_BASE_URL=""
+export BUILTIN_CM_OPENAI_API_KEY=""
+export BUILTIN_CM_OPENAI_BY_AZURE=true
+export BUILTIN_CM_OPENAI_MODEL=""
+
+# type ark
+export BUILTIN_CM_ARK_API_KEY=""
+export BUILTIN_CM_ARK_MODEL=""
+export BUILTIN_CM_ARK_BASE_URL=""
+
+# type deepseek
+export BUILTIN_CM_DEEPSEEK_BASE_URL=""
+export BUILTIN_CM_DEEPSEEK_API_KEY=""
+export BUILTIN_CM_DEEPSEEK_MODEL=""
+
+# type ollama
+export BUILTIN_CM_OLLAMA_BASE_URL=""
+export BUILTIN_CM_OLLAMA_MODEL=""
+
+# type qwen
+export BUILTIN_CM_QWEN_BASE_URL=""
+export BUILTIN_CM_QWEN_API_KEY=""
+export BUILTIN_CM_QWEN_MODEL=""
+
+# type gemini
+export BUILTIN_CM_GEMINI_BACKEND=""
+export BUILTIN_CM_GEMINI_API_KEY=""
+export BUILTIN_CM_GEMINI_PROJECT=""
+export BUILTIN_CM_GEMINI_LOCATION=""
+export BUILTIN_CM_GEMINI_BASE_URL=""
+export BUILTIN_CM_GEMINI_MODEL=""