feat: refactor model manager

* chore: mv model icon * fix: model icon * fix: model icon * feat: refactor model manager * fix: model icon * fix: model icon * feat: refactor model manager See merge request: !905
2025-07-24 13:12:44 +00:00
parent 12f7762797
commit 9b3814e2c5
114 changed files with 2888 additions and 4982 deletions
--- a/backend/api/handler/coze/workflow_service_test.go
+++ b/backend/api/handler/coze/workflow_service_test.go
@@ -18,7 +18,6 @@ package coze

 import (
 	"bytes"
-
 	"context"
 	"errors"
 	"fmt"
@@ -49,7 +48,6 @@ import (
 	"gorm.io/gorm"

 	modelknowledge "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
-	crossmodelmgr "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
 	plugin2 "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/plugin"
 	pluginmodel "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/plugin"
 	"github.com/coze-dev/coze-studio/backend/api/model/ocean/cloud/playground"
@@ -85,6 +83,7 @@ import (
 	"github.com/coze-dev/coze-studio/backend/domain/workflow/entity"
 	"github.com/coze-dev/coze-studio/backend/domain/workflow/entity/vo"
 	"github.com/coze-dev/coze-studio/backend/domain/workflow/service"
+	"github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr"
 	"github.com/coze-dev/coze-studio/backend/infra/impl/checkpoint"
 	"github.com/coze-dev/coze-studio/backend/infra/impl/coderunner"
 	mockCrossUser "github.com/coze-dev/coze-studio/backend/internal/mock/crossdomain/crossuser"
@@ -1503,7 +1502,7 @@ func TestNestedSubWorkflowWithInterrupt(t *testing.T) {
 			},
 		}

-		r.modelManage.EXPECT().GetModel(gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, params *model.LLMParams) (model2.BaseChatModel, *crossmodelmgr.Model, error) {
+		r.modelManage.EXPECT().GetModel(gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, params *model.LLMParams) (model2.BaseChatModel, *modelmgr.Model, error) {
 			if params.ModelType == 1737521813 {
 				return chatModel1, nil, nil
 			} else {
@@ -1972,7 +1971,7 @@ func TestReturnDirectlyStreamableTool(t *testing.T) {
 			},
 		}

-		r.modelManage.EXPECT().GetModel(gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, params *model.LLMParams) (model2.BaseChatModel, *crossmodelmgr.Model, error) {
+		r.modelManage.EXPECT().GetModel(gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, params *model.LLMParams) (model2.BaseChatModel, *modelmgr.Model, error) {
 			if params.ModelType == 1706077826 {
 				innerModel.ModelType = strconv.FormatInt(params.ModelType, 10)
 				return innerModel, nil, nil
@@ -2161,7 +2160,7 @@ func TestStreamableToolWithMultipleInterrupts(t *testing.T) {
 			},
 		}

-		r.modelManage.EXPECT().GetModel(gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, params *model.LLMParams) (model2.BaseChatModel, *crossmodelmgr.Model, error) {
+		r.modelManage.EXPECT().GetModel(gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, params *model.LLMParams) (model2.BaseChatModel, *modelmgr.Model, error) {
 			if params.ModelType == 1706077827 {
 				outerModel.ModelType = strconv.FormatInt(params.ModelType, 10)
 				return outerModel, nil, nil
@@ -2455,7 +2454,7 @@ func TestAggregateStreamVariables(t *testing.T) {
 			},
 		}

-		r.modelManage.EXPECT().GetModel(gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, params *model.LLMParams) (model2.BaseChatModel, *crossmodelmgr.Model, error) {
+		r.modelManage.EXPECT().GetModel(gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, params *model.LLMParams) (model2.BaseChatModel, *modelmgr.Model, error) {
 			if params.ModelType == 1737521813 {
 				cm1.ModelType = strconv.FormatInt(params.ModelType, 10)
 				return cm1, nil, nil
@@ -2598,7 +2597,7 @@ func TestParallelInterrupts(t *testing.T) {
 				}
 			},
 		}
-		r.modelManage.EXPECT().GetModel(gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, params *model.LLMParams) (model2.BaseChatModel, *crossmodelmgr.Model, error) {
+		r.modelManage.EXPECT().GetModel(gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, params *model.LLMParams) (model2.BaseChatModel, *modelmgr.Model, error) {
 			if params.ModelType == 1737521813 {
 				return chatModel1, nil, nil
 			} else {
@@ -3871,7 +3870,7 @@ func TestLLMException(t *testing.T) {
 			},
 		}

-		r.modelManage.EXPECT().GetModel(gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, params *model.LLMParams) (model2.BaseChatModel, *crossmodelmgr.Model, error) {
+		r.modelManage.EXPECT().GetModel(gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, params *model.LLMParams) (model2.BaseChatModel, *modelmgr.Model, error) {
 			if params.ModelType == 1737521813 {
 				return mainChatModel, nil, nil
 			} else {
@@ -3938,7 +3937,7 @@ func TestLLMExceptionThenThrow(t *testing.T) {
 			},
 		}

-		r.modelManage.EXPECT().GetModel(gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, params *model.LLMParams) (model2.BaseChatModel, *crossmodelmgr.Model, error) {
+		r.modelManage.EXPECT().GetModel(gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, params *model.LLMParams) (model2.BaseChatModel, *modelmgr.Model, error) {
 			if params.ModelType == 1737521813 {
 				return mainChatModel, nil, nil
 			} else {
--- a/backend/application/application.go
+++ b/backend/application/application.go
@@ -47,7 +47,6 @@ import (
 	"github.com/coze-dev/coze-studio/backend/crossdomain/contract/crossdatacopy"
 	"github.com/coze-dev/coze-studio/backend/crossdomain/contract/crossknowledge"
 	"github.com/coze-dev/coze-studio/backend/crossdomain/contract/crossmessage"
-	"github.com/coze-dev/coze-studio/backend/crossdomain/contract/crossmodelmgr"
 	"github.com/coze-dev/coze-studio/backend/crossdomain/contract/crossplugin"
 	"github.com/coze-dev/coze-studio/backend/crossdomain/contract/crossuser"
 	"github.com/coze-dev/coze-studio/backend/crossdomain/contract/crossvariables"
@@ -60,7 +59,6 @@ import (
 	dataCopyImpl "github.com/coze-dev/coze-studio/backend/crossdomain/impl/datacopy"
 	knowledgeImpl "github.com/coze-dev/coze-studio/backend/crossdomain/impl/knowledge"
 	messageImpl "github.com/coze-dev/coze-studio/backend/crossdomain/impl/message"
-	modelmgrImpl "github.com/coze-dev/coze-studio/backend/crossdomain/impl/modelmgr"
 	pluginImpl "github.com/coze-dev/coze-studio/backend/crossdomain/impl/plugin"
 	searchImpl "github.com/coze-dev/coze-studio/backend/crossdomain/impl/search"
 	singleagentImpl "github.com/coze-dev/coze-studio/backend/crossdomain/impl/singleagent"
@@ -130,7 +128,6 @@ func Init(ctx context.Context) (err error) {
 	crossconnector.SetDefaultSVC(connectorImpl.InitDomainService(basicServices.connectorSVC.DomainSVC))
 	crossdatabase.SetDefaultSVC(databaseImpl.InitDomainService(primaryServices.memorySVC.DatabaseDomainSVC))
 	crossknowledge.SetDefaultSVC(knowledgeImpl.InitDomainService(primaryServices.knowledgeSVC.DomainSVC))
-	crossmodelmgr.SetDefaultSVC(modelmgrImpl.InitDomainService(basicServices.modelMgrSVC.DomainSVC))
 	crossplugin.SetDefaultSVC(pluginImpl.InitDomainService(primaryServices.pluginSVC.DomainSVC))
 	crossvariables.SetDefaultSVC(variablesImpl.InitDomainService(primaryServices.memorySVC.VariablesDomainSVC))
 	crossworkflow.SetDefaultSVC(workflowImpl.InitDomainService(primaryServices.workflowSVC.DomainSVC))
@@ -158,10 +155,7 @@ func initBasicServices(ctx context.Context, infra *appinfra.AppDependencies, e *
 	upload.InitService(infra.TOSClient, infra.CacheCli)
 	openAuthSVC := openauth.InitService(infra.DB, infra.IDGenSVC)
 	promptSVC := prompt.InitService(infra.DB, infra.IDGenSVC, e.resourceEventBus)
-	modelMgrSVC, err := modelmgr.InitService(infra.DB, infra.IDGenSVC, infra.TOSClient)
-	if err != nil {
-		return nil, err
-	}
+	modelMgrSVC := modelmgr.InitService(infra.ModelMgr, infra.TOSClient)
 	connectorSVC := connector.InitService(infra.TOSClient)
 	userSVC := user.InitService(ctx, infra.DB, infra.TOSClient, infra.IDGenSVC)
 	templateSVC := template.InitService(ctx, &template.ServiceComponents{
@@ -285,7 +279,7 @@ func (b *basicServices) toWorkflowServiceComponents(pluginSVC *plugin.PluginAppl
 		VariablesDomainSVC: memorySVC.VariablesDomainSVC,
 		PluginDomainSVC:    pluginSVC.DomainSVC,
 		KnowledgeDomainSVC: knowledgeSVC.DomainSVC,
-		ModelManager:       b.modelMgrSVC.DomainSVC,
+		ModelManager:       b.infra.ModelMgr,
 		DomainNotifier:     b.eventbus.resourceEventBus,
 		CPStore:            checkpoint.NewRedisStore(b.infra.CacheCli),
 	}
@@ -299,7 +293,7 @@ func (p *primaryServices) toSingleAgentServiceComponents() *singleagent.ServiceC
 		Cache:                p.basicServices.infra.CacheCli,
 		TosClient:            p.basicServices.infra.TOSClient,
 		ImageX:               p.basicServices.infra.ImageXClient,
-		ModelMgrDomainSVC:    p.basicServices.modelMgrSVC.DomainSVC,
+		ModelMgr:             p.infra.ModelMgr,
 		UserDomainSVC:        p.basicServices.userSVC.DomainSVC,
 		EventBus:             p.basicServices.eventbus.projectEventBus,
 		DatabaseDomainSVC:    p.memorySVC.DatabaseDomainSVC,
--- a/backend/application/base/appinfra/app_infra.go
+++ b/backend/application/base/appinfra/app_infra.go
@@ -24,6 +24,7 @@ import (
 	"gorm.io/gorm"

 	"github.com/coze-dev/coze-studio/backend/infra/contract/imagex"
+	"github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr"
 	"github.com/coze-dev/coze-studio/backend/infra/impl/cache/redis"
 	"github.com/coze-dev/coze-studio/backend/infra/impl/es"
 	"github.com/coze-dev/coze-studio/backend/infra/impl/eventbus"
@@ -43,6 +44,7 @@ type AppDependencies struct {
 	TOSClient             storage.Storage
 	ResourceEventProducer eventbus.Producer
 	AppEventProducer      eventbus.Producer
+	ModelMgr              modelmgr.Manager
 }

 func Init(ctx context.Context) (*AppDependencies, error) {
@@ -86,6 +88,11 @@ func Init(ctx context.Context) (*AppDependencies, error) {
 		return nil, err
 	}

+	deps.ModelMgr, err = initModelMgr()
+	if err != nil {
+		return nil, err
+	}
+
 	return deps, nil
 }

--- a/backend/application/base/appinfra/modelmgr.go
+++ b/backend/application/base/appinfra/modelmgr.go
@@ -1,21 +1,61 @@
-package modelmgr
+package appinfra

 import (
 	"fmt"
 	"os"
 	"path/filepath"
 	"strconv"
+	"strings"

 	"gopkg.in/yaml.v3"

-	"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
 	"github.com/coze-dev/coze-studio/backend/infra/contract/chatmodel"
+	"github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr"
+	"github.com/coze-dev/coze-studio/backend/infra/impl/modelmgr/static"
 	"github.com/coze-dev/coze-studio/backend/pkg/logs"
 )

-func initModelByEnv(wd, templatePath string) (metaSlice []*modelmgr.ModelMeta, entitySlice []*modelmgr.Model, err error) {
-	metaRoot := filepath.Join(wd, templatePath, "meta")
-	entityRoot := filepath.Join(wd, templatePath, "entity")
+func initModelMgr() (modelmgr.Manager, error) {
+	wd, err := os.Getwd()
+	if err != nil {
+		return nil, err
+	}
+
+	staticModel, err := initModelByTemplate(wd, "resources/conf/model")
+	if err != nil {
+		return nil, err
+	}
+
+	envModel, err := initModelByEnv(wd, "resources/conf/model/template")
+	if err != nil {
+		return nil, err
+	}
+
+	all := append(staticModel, envModel...)
+	if err := fillModelContent(all); err != nil {
+		return nil, err
+	}
+
+	mgr, err := static.NewModelMgr(all)
+	if err != nil {
+		return nil, err
+	}
+
+	return mgr, nil
+}
+
+func initModelByTemplate(wd, configPath string) ([]*modelmgr.Model, error) {
+	configRoot := filepath.Join(wd, configPath)
+	staticModel, err := readDirYaml[modelmgr.Model](configRoot)
+	if err != nil {
+		return nil, err
+	}
+
+	return staticModel, nil
+}
+
+func initModelByEnv(wd, templatePath string) (modelEntities []*modelmgr.Model, err error) {
+	entityRoot := filepath.Join(wd, templatePath)

 	for i := -1; i < 1000; i++ {
 		rawProtocol := os.Getenv(concatEnvKey(modelProtocolPrefix, i))
@@ -35,7 +75,7 @@ func initModelByEnv(wd, templatePath string) (metaSlice []*modelmgr.ModelMeta, e

 		mapping, found := modelMapping[protocol]
 		if !found {
-			return nil, nil, fmt.Errorf("[initModelByEnv] unsupport protocol: %s", rawProtocol)
+			return nil, fmt.Errorf("[initModelByEnv] unsupport protocol: %s", rawProtocol)
 		}

 		switch protocol {
@@ -44,41 +84,28 @@ func initModelByEnv(wd, templatePath string) (metaSlice []*modelmgr.ModelMeta, e
 			if !foundTemplate {
 				logs.Warnf("[initModelByEnv] unsupport model=%s, using default config", info.modelName)
 			}
-			modelMeta, err := readYaml[modelmgr.ModelMeta](filepath.Join(metaRoot, concatTemplateFileName("model_meta_template_ark", fileSuffix)))
+			modelEntity, err := readYaml[modelmgr.Model](filepath.Join(entityRoot, concatTemplateFileName("model_template_ark", fileSuffix)))
 			if err != nil {
-				return nil, nil, err
-			}
-			modelEntity, err := readYaml[modelmgr.Model](filepath.Join(entityRoot, concatTemplateFileName("model_entity_template_ark", fileSuffix)))
-			if err != nil {
-				return nil, nil, err
+				return nil, err
 			}
 			id, err := strconv.ParseInt(info.id, 10, 64)
 			if err != nil {
-				return nil, nil, err
+				return nil, err
 			}

-			// meta 和 entity 用一个 id，有概率冲突
-			modelMeta.ID = id
-			modelMeta.ConnConfig.Model = info.modelID
-			modelMeta.ConnConfig.APIKey = info.apiKey
-			if info.baseURL != "" {
-				modelMeta.ConnConfig.BaseURL = info.baseURL
-			}
 			modelEntity.ID = id
-			modelEntity.Meta.ID = id
 			if !foundTemplate {
 				modelEntity.Name = info.modelName
 			}

-			metaSlice = append(metaSlice, modelMeta)
-			entitySlice = append(entitySlice, modelEntity)
+			modelEntities = append(modelEntities, modelEntity)

 		default:
-			return nil, nil, fmt.Errorf("[initModelByEnv] unsupport protocol: %s", rawProtocol)
+			return nil, fmt.Errorf("[initModelByEnv] unsupport protocol: %s", rawProtocol)
 		}
 	}

-	return metaSlice, entitySlice, nil
+	return modelEntities, nil
 }

 type envModelInfo struct {
@@ -95,6 +122,32 @@ func getModelEnv(idx int) (info envModelInfo, valid bool) {
 	return
 }

+func readDirYaml[T any](dir string) ([]*T, error) {
+	des, err := os.ReadDir(dir)
+	if err != nil {
+		return nil, err
+	}
+	resp := make([]*T, 0, len(des))
+	for _, file := range des {
+		if file.IsDir() {
+			continue
+		}
+		if strings.HasSuffix(file.Name(), ".yaml") || strings.HasSuffix(file.Name(), ".yml") {
+			filePath := filepath.Join(dir, file.Name())
+			data, err := os.ReadFile(filePath)
+			if err != nil {
+				return nil, err
+			}
+			var content T
+			if err := yaml.Unmarshal(data, &content); err != nil {
+				return nil, err
+			}
+			resp = append(resp, &content)
+		}
+	}
+	return resp, nil
+}
+
 func readYaml[T any](fPath string) (*T, error) {
 	data, err := os.ReadFile(fPath)
 	if err != nil {
@@ -146,3 +199,18 @@ var modelMapping = map[chatmodel.Protocol]map[string]string{
 		"deepseek-v3":                    "volc_deepseek-v3",
 	},
 }
+
+func fillModelContent(items []*modelmgr.Model) error {
+	for i := range items {
+		item := items[i]
+		if item.Meta.Status == modelmgr.StatusDefault {
+			item.Meta.Status = modelmgr.StatusInUse
+		}
+
+		if item.IconURI == "" && item.IconURL == "" {
+			return fmt.Errorf("missing icon URI or icon URL, id=%d", item.ID)
+		}
+	}
+
+	return nil
+}
--- a/backend/application/base/appinfra/modelmgr_test.go
+++ b/backend/application/base/appinfra/modelmgr_test.go
@@ -1,4 +1,4 @@
-package modelmgr
+package appinfra

 import (
 	"fmt"
@@ -24,8 +24,7 @@ func TestInitByEnv(t *testing.T) {
 	wd, err := os.Getwd()
 	assert.NoError(t, err)

-	ms, es, err := initModelByEnv(wd, "../../conf/model/template")
+	ms, err := initModelByEnv(wd, "../../../conf/model/template")
 	assert.NoError(t, err)
 	assert.Len(t, ms, len(modelMapping[chatmodel.ProtocolArk]))
-	assert.Len(t, es, len(modelMapping[chatmodel.ProtocolArk]))
 }
--- a/backend/application/modelmgr/init.go
+++ b/backend/application/modelmgr/init.go
@@ -1,198 +1,11 @@
-/*
- * Copyright 2025 coze-dev Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
 package modelmgr

 import (
-	"context"
-	"errors"
-	"fmt"
-	"os"
-	"path/filepath"
-	"strings"
-	"time"
-
-	"gopkg.in/yaml.v3"
-	"gorm.io/gorm"
-
-	crossmodelmgr "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr"
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr/entity"
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr/service"
-	"github.com/coze-dev/coze-studio/backend/infra/contract/storage"
-	"github.com/coze-dev/coze-studio/backend/infra/impl/idgen"
-	"github.com/coze-dev/coze-studio/backend/pkg/logs"
+	"github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr"
+	"github.com/coze-dev/coze-studio/backend/infra/impl/storage"
 )

-func InitService(db *gorm.DB, idgen idgen.IDGenerator, oss storage.Storage) (*ModelmgrApplicationService, error) {
-	svc := service.NewModelManager(db, idgen, oss)
-	if err := loadStaticModelConfig(svc, oss); err != nil {
-		return nil, err
-	}
-	ModelmgrApplicationSVC.DomainSVC = svc
-
-	return ModelmgrApplicationSVC, nil
-}
-
-func loadStaticModelConfig(svc modelmgr.Manager, oss storage.Storage) error {
-	ctx := context.Background()
-
-	id2Meta := make(map[int64]*entity.ModelMeta)
-	var cursor *string
-	for {
-		req := &modelmgr.ListModelMetaRequest{
-			Status: []entity.ModelMetaStatus{
-				crossmodelmgr.StatusInUse,
-				crossmodelmgr.StatusPending,
-				crossmodelmgr.StatusDeleted,
-			},
-			Limit:  100,
-			Cursor: cursor,
-		}
-		listMetaResp, err := svc.ListModelMeta(ctx, req)
-		if err != nil {
-			return err
-		}
-		for _, item := range listMetaResp.ModelMetaList {
-			cpItem := item
-			id2Meta[cpItem.ID] = cpItem
-		}
-		if !listMetaResp.HasMore {
-			break
-		}
-		cursor = listMetaResp.NextCursor
-	}
-
-	root, err := os.Getwd()
-	if err != nil {
-		return err
-	}
-
-	envModelMeta, envModelEntity, err := initModelByEnv(root, "resources/conf/model/template")
-	if err != nil {
-		return err
-	}
-
-	filePath := filepath.Join(root, "resources/conf/model/meta")
-	staticModelMeta, err := readDirYaml[crossmodelmgr.ModelMeta](filePath)
-	if err != nil {
-		return err
-	}
-	staticModelMeta = append(staticModelMeta, envModelMeta...)
-	for _, modelMeta := range staticModelMeta {
-		if _, found := id2Meta[modelMeta.ID]; !found {
-			if modelMeta.IconURI == "" && modelMeta.IconURL == "" {
-				return fmt.Errorf("missing icon URI or icon URL, id=%d", modelMeta.ID)
-			} else if modelMeta.IconURL != "" {
-				// do nothing
-			} else if modelMeta.IconURI != "" {
-				// try local path
-				base := filepath.Base(modelMeta.IconURI)
-				iconPath := filepath.Join("resources/conf/model/icon", base)
-				if _, err = os.Stat(iconPath); err == nil {
-					// try upload icon
-					icon, err := os.ReadFile(iconPath)
-					if err != nil {
-						return err
-					}
-					key := fmt.Sprintf("icon_%s_%d", base, time.Now().Second())
-					if err := oss.PutObject(ctx, key, icon); err != nil {
-						return err
-					}
-					modelMeta.IconURI = key
-				} else if errors.Is(err, os.ErrNotExist) {
-					// try to get object from uri
-					if _, err := oss.GetObject(ctx, modelMeta.IconURI); err != nil {
-						return err
-					}
-				} else {
-					return err
-				}
-			}
-			newMeta, err := svc.CreateModelMeta(ctx, modelMeta)
-			if err != nil {
-				if errors.Is(err, gorm.ErrDuplicatedKey) {
-					logs.Infof("[loadStaticModelConfig] model meta conflict for id=%d, skip", newMeta.ID)
-				}
-				return err
-			} else {
-				logs.Infof("[loadStaticModelConfig] model meta create success, id=%d", newMeta.ID)
-			}
-			id2Meta[newMeta.ID] = newMeta
-		} else {
-			logs.Infof("[loadStaticModelConfig] model meta founded, skip create, id=%d", modelMeta.ID)
-
-		}
-	}
-
-	filePath = filepath.Join(root, "resources/conf/model/entity")
-	staticModel, err := readDirYaml[crossmodelmgr.Model](filePath)
-	if err != nil {
-		return err
-	}
-	staticModel = append(staticModel, envModelEntity...)
-	for _, modelEntity := range staticModel {
-		curModelEntities, err := svc.MGetModelByID(ctx, &modelmgr.MGetModelRequest{IDs: []int64{modelEntity.ID}})
-		if err != nil {
-			return err
-		}
-		if len(curModelEntities) > 0 {
-			logs.Infof("[loadStaticModelConfig] model entity founded, skip create, id=%d", modelEntity.ID)
-			continue
-		}
-		meta, found := id2Meta[modelEntity.Meta.ID]
-		if !found {
-			return fmt.Errorf("model meta not found for id=%d, model_id=%d", modelEntity.Meta.ID, modelEntity.ID)
-		}
-		modelEntity.Meta = *meta
-		if _, err = svc.CreateModel(ctx, &entity.Model{Model: modelEntity}); err != nil {
-			if errors.Is(err, gorm.ErrDuplicatedKey) {
-				logs.Infof("[loadStaticModelConfig] model entity conflict for id=%d, skip", modelEntity.ID)
-			}
-			return err
-		} else {
-			logs.Infof("[loadStaticModelConfig] model entity create success, id=%d", modelEntity.ID)
-		}
-	}
-
-	return nil
-}
-
-func readDirYaml[T any](dir string) ([]*T, error) {
-	des, err := os.ReadDir(dir)
-	if err != nil {
-		return nil, err
-	}
-	resp := make([]*T, 0, len(des))
-	for _, file := range des {
-		if file.IsDir() {
-			continue
-		}
-		if strings.HasSuffix(file.Name(), ".yaml") || strings.HasSuffix(file.Name(), ".yml") {
-			filePath := filepath.Join(dir, file.Name())
-			data, err := os.ReadFile(filePath)
-			if err != nil {
-				return nil, err
-			}
-			var content T
-			if err := yaml.Unmarshal(data, &content); err != nil {
-				return nil, err
-			}
-			resp = append(resp, &content)
-		}
-	}
-	return resp, nil
+func InitService(mgr modelmgr.Manager, tosClient storage.Storage) *ModelmgrApplicationService {
+	ModelmgrApplicationSVC = &ModelmgrApplicationService{mgr, tosClient}
+	return ModelmgrApplicationSVC
 }
--- a/backend/application/modelmgr/modelmgr.go
+++ b/backend/application/modelmgr/modelmgr.go
@@ -19,10 +19,9 @@ package modelmgr
 import (
 	"context"

-	modelmgrEntity "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
 	"github.com/coze-dev/coze-studio/backend/api/model/ocean/cloud/developer_api"
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr"
-	modelEntity "github.com/coze-dev/coze-studio/backend/domain/modelmgr/entity"
+	"github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr"
+	"github.com/coze-dev/coze-studio/backend/infra/impl/storage"
 	"github.com/coze-dev/coze-studio/backend/pkg/i18n"
 	"github.com/coze-dev/coze-studio/backend/pkg/lang/ptr"
 	"github.com/coze-dev/coze-studio/backend/pkg/lang/sets"
@@ -31,18 +30,19 @@ import (
 )

 type ModelmgrApplicationService struct {
-	DomainSVC modelmgr.Manager
+	Mgr       modelmgr.Manager
+	TosClient storage.Storage
 }

 var ModelmgrApplicationSVC = &ModelmgrApplicationService{}

-func (m *ModelmgrApplicationService) GetModelList(ctx context.Context, req *developer_api.GetTypeListRequest) (
+func (m *ModelmgrApplicationService) GetModelList(ctx context.Context, _ *developer_api.GetTypeListRequest) (
 	resp *developer_api.GetTypeListResponse, err error,
 ) {
 	// 一般不太可能同时配置这么多模型
 	const modelMaxLimit = 300

-	modelResp, err := m.DomainSVC.ListModel(ctx, &modelmgr.ListModelRequest{
+	modelResp, err := m.Mgr.ListModel(ctx, &modelmgr.ListModelRequest{
 		Limit:  modelMaxLimit,
 		Cursor: nil,
 	})
@@ -51,9 +51,15 @@ func (m *ModelmgrApplicationService) GetModelList(ctx context.Context, req *deve
 	}

 	locale := i18n.GetLocale(ctx)
-	modelList, err := slices.TransformWithErrorCheck(modelResp.ModelList, func(m *modelEntity.Model) (*developer_api.Model, error) {
-		logs.CtxInfof(ctx, "ChatModel DefaultParameters: %v", m.DefaultParameters)
-		return modelDo2To(m, locale)
+	modelList, err := slices.TransformWithErrorCheck(modelResp.ModelList, func(mm *modelmgr.Model) (*developer_api.Model, error) {
+		logs.CtxInfof(ctx, "ChatModel DefaultParameters: %v", mm.DefaultParameters)
+		if mm.IconURI != "" {
+			iconUrl, err := m.TosClient.GetObjectUrl(ctx, mm.IconURI)
+			if err == nil {
+				mm.IconURL = iconUrl
+			}
+		}
+		return modelDo2To(mm, locale)
 	})
 	if err != nil {
 		return nil, err
@@ -68,11 +74,11 @@ func (m *ModelmgrApplicationService) GetModelList(ctx context.Context, req *deve
 	}, nil
 }

-func modelDo2To(model *modelEntity.Model, locale i18n.Locale) (*developer_api.Model, error) {
+func modelDo2To(model *modelmgr.Model, locale i18n.Locale) (*developer_api.Model, error) {
 	mm := model.Meta

 	mps := slices.Transform(model.DefaultParameters,
-		func(param *modelmgrEntity.Parameter) *developer_api.ModelParameter {
+		func(param *modelmgr.Parameter) *developer_api.ModelParameter {
 			return parameterDo2To(param, locale)
 		},
 	)
@@ -83,7 +89,7 @@ func modelDo2To(model *modelEntity.Model, locale i18n.Locale) (*developer_api.Mo
 		Name:             model.Name,
 		ModelType:        model.ID,
 		ModelClass:       mm.Protocol.TOModelClass(),
-		ModelIcon:        mm.IconURL,
+		ModelIcon:        model.IconURL,
 		ModelInputPrice:  0,
 		ModelOutputPrice: 0,
 		ModelQuota: &developer_api.ModelQuota{
@@ -102,19 +108,19 @@ func modelDo2To(model *modelEntity.Model, locale i18n.Locale) (*developer_api.Mo
 		},
 		ModelName:      mm.Name,
 		ModelClassName: mm.Protocol.TOModelClass().String(),
-		IsOffline:      mm.Status != modelmgrEntity.StatusInUse,
+		IsOffline:      mm.Status != modelmgr.StatusInUse,
 		ModelParams:    mps,
 		ModelDesc: []*developer_api.ModelDescGroup{
 			{
 				GroupName: "Description",
-				Desc:      []string{model.Description},
+				Desc:      []string{model.Description.Read(locale)},
 			},
 		},
 		FuncConfig:     nil,
 		EndpointName:   nil,
 		ModelTagList:   nil,
 		IsUpRequired:   nil,
-		ModelBriefDesc: mm.Description.Read(locale),
+		ModelBriefDesc: model.Description.Read(locale),
 		ModelSeries: &developer_api.ModelSeriesInfo{ // TODO: 替换为真实配置
 			SeriesName: "热门模型",
 		},
@@ -122,16 +128,16 @@ func modelDo2To(model *modelEntity.Model, locale i18n.Locale) (*developer_api.Mo
 		ModelAbility: &developer_api.ModelAbility{
 			CotDisplay:         ptr.Of(mm.Capability.Reasoning),
 			FunctionCall:       ptr.Of(mm.Capability.FunctionCall),
-			ImageUnderstanding: ptr.Of(modalSet.Contains(modelmgrEntity.ModalImage)),
-			VideoUnderstanding: ptr.Of(modalSet.Contains(modelmgrEntity.ModalVideo)),
-			AudioUnderstanding: ptr.Of(modalSet.Contains(modelmgrEntity.ModalAudio)),
+			ImageUnderstanding: ptr.Of(modalSet.Contains(modelmgr.ModalImage)),
+			VideoUnderstanding: ptr.Of(modalSet.Contains(modelmgr.ModalVideo)),
+			AudioUnderstanding: ptr.Of(modalSet.Contains(modelmgr.ModalAudio)),
 			SupportMultiModal:  ptr.Of(len(modalSet) > 1),
 			PrefillResp:        ptr.Of(mm.Capability.PrefillResponse),
 		},
 	}, nil
 }

-func parameterDo2To(param *modelmgrEntity.Parameter, locale i18n.Locale) *developer_api.ModelParameter {
+func parameterDo2To(param *modelmgr.Parameter, locale i18n.Locale) *developer_api.ModelParameter {
 	if param == nil {
 		return nil
 	}
@@ -146,19 +152,19 @@ func parameterDo2To(param *modelmgrEntity.Parameter, locale i18n.Locale) *develo

 	var custom string
 	var creative, balance, precise *string
-	if val, ok := param.DefaultVal[modelmgrEntity.DefaultTypeDefault]; ok {
+	if val, ok := param.DefaultVal[modelmgr.DefaultTypeDefault]; ok {
 		custom = val
 	}

-	if val, ok := param.DefaultVal[modelmgrEntity.DefaultTypeCreative]; ok {
+	if val, ok := param.DefaultVal[modelmgr.DefaultTypeCreative]; ok {
 		creative = ptr.Of(val)
 	}

-	if val, ok := param.DefaultVal[modelmgrEntity.DefaultTypeBalance]; ok {
+	if val, ok := param.DefaultVal[modelmgr.DefaultTypeBalance]; ok {
 		balance = ptr.Of(val)
 	}

-	if val, ok := param.DefaultVal[modelmgrEntity.DefaultTypePrecise]; ok {
+	if val, ok := param.DefaultVal[modelmgr.DefaultTypePrecise]; ok {
 		precise = ptr.Of(val)
 	}

@@ -168,11 +174,11 @@ func parameterDo2To(param *modelmgrEntity.Parameter, locale i18n.Locale) *develo
 		Desc:  param.Desc.Read(locale),
 		Type: func() developer_api.ModelParamType {
 			switch param.Type {
-			case modelmgrEntity.ValueTypeBoolean:
+			case modelmgr.ValueTypeBoolean:
 				return developer_api.ModelParamType_Boolean
-			case modelmgrEntity.ValueTypeInt:
+			case modelmgr.ValueTypeInt:
 				return developer_api.ModelParamType_Int
-			case modelmgrEntity.ValueTypeFloat:
+			case modelmgr.ValueTypeFloat:
 				return developer_api.ModelParamType_Float
 			default:
 				return developer_api.ModelParamType_String
@@ -191,9 +197,9 @@ func parameterDo2To(param *modelmgrEntity.Parameter, locale i18n.Locale) *develo
 		ParamClass: &developer_api.ModelParamClass{
 			ClassID: func() int32 {
 				switch param.Style.Widget {
-				case modelmgrEntity.WidgetSlider:
+				case modelmgr.WidgetSlider:
 					return 1
-				case modelmgrEntity.WidgetRadioButtons:
+				case modelmgr.WidgetRadioButtons:
 					return 2
 				default:
 					return 0
--- a/backend/application/singleagent/create.go
+++ b/backend/application/singleagent/create.go
@@ -20,15 +20,14 @@ import (
 	"context"
 	"time"

-	modelmgrEntity "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
 	"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/singleagent"
 	intelligence "github.com/coze-dev/coze-studio/backend/api/model/intelligence/common"
 	"github.com/coze-dev/coze-studio/backend/api/model/ocean/cloud/bot_common"
 	"github.com/coze-dev/coze-studio/backend/api/model/ocean/cloud/developer_api"
 	"github.com/coze-dev/coze-studio/backend/application/base/ctxutil"
 	"github.com/coze-dev/coze-studio/backend/domain/agent/singleagent/entity"
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr"
 	searchEntity "github.com/coze-dev/coze-studio/backend/domain/search/entity"
+	"github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr"
 	"github.com/coze-dev/coze-studio/backend/pkg/errorx"
 	"github.com/coze-dev/coze-studio/backend/pkg/lang/ptr"
 	"github.com/coze-dev/coze-studio/backend/types/errno"
@@ -95,7 +94,7 @@ func (s *SingleAgentApplicationService) newDefaultSingleAgent(ctx context.Contex
 			Plugin:         []*bot_common.PluginInfo{},
 			Knowledge: &bot_common.Knowledge{
 				TopK:           ptr.Of(int64(1)),
-				MinScore:       ptr.Of(float64(0.01)),
+				MinScore:       ptr.Of(0.01),
 				SearchStrategy: ptr.Of(bot_common.SearchStrategy_SemanticSearch),
 				RecallStrategy: &bot_common.RecallStrategy{
 					UseNl2sql:  ptr.Of(true),
@@ -115,8 +114,8 @@ func (s *SingleAgentApplicationService) newDefaultSingleAgent(ctx context.Contex
 }

 func (s *SingleAgentApplicationService) defaultModelInfo(ctx context.Context) (*bot_common.ModelInfo, error) {
-	modelResp, err := s.appContext.ModelMgrDomainSVC.ListModel(ctx, &modelmgr.ListModelRequest{
-		Status: []modelmgrEntity.ModelEntityStatus{modelmgrEntity.ModelEntityStatusDefault, modelmgrEntity.ModelEntityStatusInUse},
+	modelResp, err := s.appContext.ModelMgr.ListModel(ctx, &modelmgr.ListModelRequest{
+		Status: []modelmgr.ModelStatus{modelmgr.StatusInUse},
 		Limit:  1,
 		Cursor: nil,
 	})
@@ -131,8 +130,8 @@ func (s *SingleAgentApplicationService) defaultModelInfo(ctx context.Context) (*
 	dm := modelResp.ModelList[0]

 	var temperature *float64
-	if tp, ok := dm.FindParameter(modelmgrEntity.Temperature); ok {
-		t, err := tp.GetFloat(modelmgrEntity.DefaultTypeBalance)
+	if tp, ok := dm.FindParameter(modelmgr.Temperature); ok {
+		t, err := tp.GetFloat(modelmgr.DefaultTypeBalance)
 		if err != nil {
 			return nil, err
 		}
@@ -141,8 +140,8 @@ func (s *SingleAgentApplicationService) defaultModelInfo(ctx context.Context) (*
 	}

 	var maxTokens *int32
-	if tp, ok := dm.FindParameter(modelmgrEntity.MaxTokens); ok {
-		t, err := tp.GetInt(modelmgrEntity.DefaultTypeBalance)
+	if tp, ok := dm.FindParameter(modelmgr.MaxTokens); ok {
+		t, err := tp.GetInt(modelmgr.DefaultTypeBalance)
 		if err != nil {
 			return nil, err
 		}
@@ -152,8 +151,8 @@ func (s *SingleAgentApplicationService) defaultModelInfo(ctx context.Context) (*
 	}

 	var topP *float64
-	if tp, ok := dm.FindParameter(modelmgrEntity.TopP); ok {
-		t, err := tp.GetFloat(modelmgrEntity.DefaultTypeBalance)
+	if tp, ok := dm.FindParameter(modelmgr.TopP); ok {
+		t, err := tp.GetFloat(modelmgr.DefaultTypeBalance)
 		if err != nil {
 			return nil, err
 		}
@@ -161,8 +160,8 @@ func (s *SingleAgentApplicationService) defaultModelInfo(ctx context.Context) (*
 	}

 	var topK *int32
-	if tp, ok := dm.FindParameter(modelmgrEntity.TopK); ok {
-		t, err := tp.GetInt(modelmgrEntity.DefaultTypeBalance)
+	if tp, ok := dm.FindParameter(modelmgr.TopK); ok {
+		t, err := tp.GetInt(modelmgr.DefaultTypeBalance)
 		if err != nil {
 			return nil, err
 		}
@@ -170,8 +169,8 @@ func (s *SingleAgentApplicationService) defaultModelInfo(ctx context.Context) (*
 	}

 	var frequencyPenalty *float64
-	if tp, ok := dm.FindParameter(modelmgrEntity.FrequencyPenalty); ok {
-		t, err := tp.GetFloat(modelmgrEntity.DefaultTypeBalance)
+	if tp, ok := dm.FindParameter(modelmgr.FrequencyPenalty); ok {
+		t, err := tp.GetFloat(modelmgr.DefaultTypeBalance)
 		if err != nil {
 			return nil, err
 		}
@@ -179,8 +178,8 @@ func (s *SingleAgentApplicationService) defaultModelInfo(ctx context.Context) (*
 	}

 	var presencePenalty *float64
-	if tp, ok := dm.FindParameter(modelmgrEntity.PresencePenalty); ok {
-		t, err := tp.GetFloat(modelmgrEntity.DefaultTypeBalance)
+	if tp, ok := dm.FindParameter(modelmgr.PresencePenalty); ok {
+		t, err := tp.GetFloat(modelmgr.DefaultTypeBalance)
 		if err != nil {
 			return nil, err
 		}
--- a/backend/application/singleagent/get.go
+++ b/backend/application/singleagent/get.go
@@ -30,13 +30,12 @@ import (
 	"github.com/coze-dev/coze-studio/backend/api/model/plugin_develop_common"
 	"github.com/coze-dev/coze-studio/backend/domain/agent/singleagent/entity"
 	knowledge "github.com/coze-dev/coze-studio/backend/domain/knowledge/service"
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr"
-	modelEntity "github.com/coze-dev/coze-studio/backend/domain/modelmgr/entity"
 	pluginEntity "github.com/coze-dev/coze-studio/backend/domain/plugin/entity"
 	"github.com/coze-dev/coze-studio/backend/domain/plugin/service"
 	shortcutCMDEntity "github.com/coze-dev/coze-studio/backend/domain/shortcutcmd/entity"
 	workflowEntity "github.com/coze-dev/coze-studio/backend/domain/workflow/entity"
 	"github.com/coze-dev/coze-studio/backend/domain/workflow/entity/vo"
+	"github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr"
 	"github.com/coze-dev/coze-studio/backend/pkg/errorx"
 	"github.com/coze-dev/coze-studio/backend/pkg/lang/conv"
 	"github.com/coze-dev/coze-studio/backend/pkg/lang/ptr"
@@ -159,13 +158,13 @@ func (s *SingleAgentApplicationService) shortcutCMDDo2Vo(cmdDOs []*shortcutCMDEn
 	})
 }

-func (s *SingleAgentApplicationService) fetchModelDetails(ctx context.Context, agentInfo *entity.SingleAgent) ([]*modelEntity.Model, error) {
+func (s *SingleAgentApplicationService) fetchModelDetails(ctx context.Context, agentInfo *entity.SingleAgent) ([]*modelmgr.Model, error) {
 	if agentInfo.ModelInfo.ModelId == nil {
 		return nil, nil
 	}

 	modelID := agentInfo.ModelInfo.GetModelId()
-	modelInfos, err := s.appContext.ModelMgrDomainSVC.MGetModelByID(ctx, &modelmgr.MGetModelRequest{
+	modelInfos, err := s.appContext.ModelMgr.MGetModelByID(ctx, &modelmgr.MGetModelRequest{
 		IDs: []int64{modelID},
 	})
 	if err != nil {
@@ -249,13 +248,13 @@ func (s *SingleAgentApplicationService) fetchWorkflowDetails(ctx context.Context
 	return ret, nil
 }

-func modelInfoDo2Vo(modelInfos []*modelEntity.Model) map[int64]*playground.ModelDetail {
-	return slices.ToMap(modelInfos, func(e *modelEntity.Model) (int64, *playground.ModelDetail) {
+func modelInfoDo2Vo(modelInfos []*modelmgr.Model) map[int64]*playground.ModelDetail {
+	return slices.ToMap(modelInfos, func(e *modelmgr.Model) (int64, *playground.ModelDetail) {
 		return e.ID, toModelDetail(e)
 	})
 }

-func toModelDetail(m *modelEntity.Model) *playground.ModelDetail {
+func toModelDetail(m *modelmgr.Model) *playground.ModelDetail {
 	mm := m.Meta

 	return &playground.ModelDetail{
@@ -263,7 +262,7 @@ func toModelDetail(m *modelEntity.Model) *playground.ModelDetail {
 		ModelName:    ptr.Of(m.Meta.Name),
 		ModelID:      ptr.Of(m.ID),
 		ModelFamily:  ptr.Of(int64(mm.Protocol.TOModelClass())),
-		ModelIconURL: ptr.Of(mm.IconURL),
+		ModelIconURL: ptr.Of(m.IconURL),
 	}
 }

--- a/backend/application/singleagent/init.go
+++ b/backend/application/singleagent/init.go
@@ -28,7 +28,6 @@ import (
 	knowledge "github.com/coze-dev/coze-studio/backend/domain/knowledge/service"
 	database "github.com/coze-dev/coze-studio/backend/domain/memory/database/service"
 	variables "github.com/coze-dev/coze-studio/backend/domain/memory/variables/service"
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr"
 	"github.com/coze-dev/coze-studio/backend/domain/plugin/service"
 	search "github.com/coze-dev/coze-studio/backend/domain/search/service"
 	shortcutCmd "github.com/coze-dev/coze-studio/backend/domain/shortcutcmd/service"
@@ -36,6 +35,7 @@ import (
 	"github.com/coze-dev/coze-studio/backend/domain/workflow"
 	"github.com/coze-dev/coze-studio/backend/infra/contract/idgen"
 	"github.com/coze-dev/coze-studio/backend/infra/contract/imagex"
+	"github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr"
 	"github.com/coze-dev/coze-studio/backend/infra/contract/storage"
 	"github.com/coze-dev/coze-studio/backend/infra/impl/chatmodel"
 	"github.com/coze-dev/coze-studio/backend/pkg/jsoncache"
@@ -55,9 +55,9 @@ type ServiceComponents struct {
 	ImageX      imagex.ImageX
 	EventBus    search.ProjectEventBus
 	CounterRepo repository.CounterRepository
+	ModelMgr    modelmgr.Manager

 	KnowledgeDomainSVC   knowledge.Knowledge
-	ModelMgrDomainSVC    modelmgr.Manager
 	PluginDomainSVC      service.PluginService
 	WorkflowDomainSVC    workflow.Service
 	UserDomainSVC        user.User
@@ -76,6 +76,7 @@ func InitService(c *ServiceComponents) (*SingleAgentApplicationService, error) {
 		CounterRepo:      repository.NewCounterRepo(c.Cache),
 		CPStore:          c.CPStore,
 		ModelFactory:     chatmodel.NewDefaultFactory(),
+		ModelMgr:         c.ModelMgr,
 	}

 	singleAgentDomainSVC := singleagent.NewService(domainComponents)
--- a/backend/application/workflow/init.go
+++ b/backend/application/workflow/init.go
@@ -30,7 +30,6 @@ import (
 	knowledge "github.com/coze-dev/coze-studio/backend/domain/knowledge/service"
 	dbservice "github.com/coze-dev/coze-studio/backend/domain/memory/database/service"
 	variables "github.com/coze-dev/coze-studio/backend/domain/memory/variables/service"
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr"
 	plugin "github.com/coze-dev/coze-studio/backend/domain/plugin/service"
 	search "github.com/coze-dev/coze-studio/backend/domain/search/service"
 	"github.com/coze-dev/coze-studio/backend/domain/workflow"
@@ -44,6 +43,7 @@ import (
 	"github.com/coze-dev/coze-studio/backend/domain/workflow/service"
 	"github.com/coze-dev/coze-studio/backend/infra/contract/idgen"
 	"github.com/coze-dev/coze-studio/backend/infra/contract/imagex"
+	"github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr"
 	"github.com/coze-dev/coze-studio/backend/infra/contract/storage"
 	"github.com/coze-dev/coze-studio/backend/infra/impl/coderunner"
 )
--- a/backend/conf/model/entity/.gitkeep
+++ b/backend/conf/model/entity/.gitkeep
--- a/backend/conf/model/meta/.gitkeep
+++ b/backend/conf/model/meta/.gitkeep
--- a/backend/conf/model/template/entity/model_entity_template_ark.yaml
+++ b/backend/conf/model/template/entity/model_entity_template_ark.yaml
@@ -1,94 +0,0 @@
-id: 2002 # 模型 entity id, 同 id 数据不会覆盖
-name: Doubao Model
-description: test doubao description
-meta:
-  id: 102
-default_parameters:
-  -
-    name: temperature
-    label:
-      zh: 生成随机性
-      en: Temperature
-    label_en: ''
-    desc:
-      zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
-      en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 1
-    default_val:
-      default_val: '1.0'
-      creative: '1'
-      balance: '0.8'
-      precise: '0.3'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: max_tokens
-    label:
-      zh: 最大回复长度
-      en: Response max length
-    desc:
-      zh: '控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。'
-      en: 'You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.'
-    type: int
-    min: '1'
-    max: '4096'
-    precision: 0
-    default_val:
-      default_val: '4096'
-    style:
-      widget: slider
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
-  -
-    name: top_p
-    label:
-      zh: Top P
-      en: Top P
-    desc:
-      zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
-      en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 2
-    default_val:
-      default_val: '0.7'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: response_format
-    label:
-      zh: 输出格式
-      en: Response format
-    desc:
-      zh: '- **文本**: 使用普通文本格式回复\n- **Markdown**: 将引导模型使用Markdown格式输出回复\n- **JSON**: 将引导模型使用JSON格式输出'
-      en: '**Response Format**:\n\n- **Text**: Replies in plain text format\n- **Markdown**: Uses Markdown format for replies\n- **JSON**: Uses JSON format for replies'
-    type: int
-    precision: 0
-    default_val:
-      default_val: '0'
-    options:
-      -
-        value: '0'
-        label: 'Text'
-      -
-        value: '1'
-        label: 'Markdown'
-      -
-        value: '2'
-        label: 'JSON'
-    style:
-      widget: radio_buttons
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
--- a/backend/conf/model/template/entity/model_entity_template_ark_doubao-1.5-lite.yaml
+++ b/backend/conf/model/template/entity/model_entity_template_ark_doubao-1.5-lite.yaml
@@ -1,66 +0,0 @@
-id: 65536 # 模型 entity id, 同 id 数据不会覆盖
-name: Doubao-1.5-Lite
-meta:
-  id: 65536
-default_parameters:
-  -
-    name: temperature
-    label:
-      zh: 生成随机性
-      en: Temperature
-    label_en: ''
-    desc:
-      zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
-      en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 1
-    default_val:
-      default_val: '1.0'
-      creative: '1'
-      balance: '0.8'
-      precise: '0.3'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: max_tokens
-    label:
-      zh: 最大回复长度
-      en: Response max length
-    desc:
-      zh: '控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。'
-      en: 'You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.'
-    type: int
-    min: '1'
-    max: '4096'
-    precision: 0
-    default_val:
-      default_val: '4096'
-    style:
-      widget: slider
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
-  -
-    name: top_p
-    label:
-      zh: Top P
-      en: Top P
-    desc:
-      zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
-      en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 2
-    default_val:
-      default_val: '0.7'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
--- a/backend/conf/model/template/entity/model_entity_template_ark_doubao-1.5-pro-256k.yaml
+++ b/backend/conf/model/template/entity/model_entity_template_ark_doubao-1.5-pro-256k.yaml
@@ -1,66 +0,0 @@
-id: 65536 # 模型 entity id, 同 id 数据不会覆盖
-name: Doubao-1.5-Pro-256k
-meta:
-  id: 65536
-default_parameters:
-  -
-    name: temperature
-    label:
-      zh: 生成随机性
-      en: Temperature
-    label_en: ''
-    desc:
-      zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
-      en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 1
-    default_val:
-      default_val: '1.0'
-      creative: '1'
-      balance: '0.8'
-      precise: '0.3'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: max_tokens
-    label:
-      zh: 最大回复长度
-      en: Response max length
-    desc:
-      zh: '控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。'
-      en: 'You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.'
-    type: int
-    min: '1'
-    max: '4096'
-    precision: 0
-    default_val:
-      default_val: '4096'
-    style:
-      widget: slider
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
-  -
-    name: top_p
-    label:
-      zh: Top P
-      en: Top P
-    desc:
-      zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
-      en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 2
-    default_val:
-      default_val: '0.7'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
--- a/backend/conf/model/template/entity/model_entity_template_ark_doubao-1.5-pro-32k.yaml
+++ b/backend/conf/model/template/entity/model_entity_template_ark_doubao-1.5-pro-32k.yaml
@@ -1,66 +0,0 @@
-id: 65536 # 模型 entity id, 同 id 数据不会覆盖
-name: Doubao-1.5-Pro-32k
-meta:
-  id: 65536
-default_parameters:
-  -
-    name: temperature
-    label:
-      zh: 生成随机性
-      en: Temperature
-    label_en: ''
-    desc:
-      zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
-      en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 1
-    default_val:
-      default_val: '1.0'
-      creative: '1'
-      balance: '0.8'
-      precise: '0.3'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: max_tokens
-    label:
-      zh: 最大回复长度
-      en: Response max length
-    desc:
-      zh: '控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。'
-      en: 'You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.'
-    type: int
-    min: '1'
-    max: '4096'
-    precision: 0
-    default_val:
-      default_val: '4096'
-    style:
-      widget: slider
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
-  -
-    name: top_p
-    label:
-      zh: Top P
-      en: Top P
-    desc:
-      zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
-      en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 2
-    default_val:
-      default_val: '0.7'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
--- a/backend/conf/model/template/entity/model_entity_template_ark_doubao-1.5-thinking-pro.yaml
+++ b/backend/conf/model/template/entity/model_entity_template_ark_doubao-1.5-thinking-pro.yaml
@@ -1,90 +0,0 @@
-id: 65536 # 模型 entity id, 同 id 数据不会覆盖
-name: Doubao-1.5-Thinking-Pro
-meta:
-  id: 65536
-default_parameters:
-  -
-    name: temperature
-    label:
-      zh: 生成随机性
-      en: Temperature
-    label_en: ''
-    desc:
-      zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
-      en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 1
-    default_val:
-      default_val: '1.0'
-      creative: '1'
-      balance: '0.8'
-      precise: '0.3'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: max_tokens
-    label:
-      zh: 最大回复长度
-      en: Response max length
-    desc:
-      zh: '控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。'
-      en: 'You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.'
-    type: int
-    min: '1'
-    max: '4096'
-    precision: 0
-    default_val:
-      default_val: '4096'
-    style:
-      widget: slider
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
-  -
-    name: top_p
-    label:
-      zh: Top P
-      en: Top P
-    desc:
-      zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
-      en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 2
-    default_val:
-      default_val: '0.7'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: response_format
-    label:
-      zh: 输出格式
-      en: Response format
-    desc:
-      zh: '- **JSON**: 将引导模型使用JSON格式输出'
-      en: '**Response Format**:\n\n- **JSON**: Uses JSON format for replies'
-    type: int
-    precision: 0
-    default_val:
-      default_val: '0'
-    options:
-      -
-        value: '0'
-        label: 'Text'
-      -
-        value: '1'
-        label: 'JSON'
-    style:
-      widget: radio_buttons
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
--- a/backend/conf/model/template/entity/model_entity_template_ark_doubao-1.5-thinking-vision-pro.yaml
+++ b/backend/conf/model/template/entity/model_entity_template_ark_doubao-1.5-thinking-vision-pro.yaml
@@ -1,90 +0,0 @@
-id: 65536 # 模型 entity id, 同 id 数据不会覆盖
-name: Doubao-1.5-Thinking-Vision-Pro
-meta:
-  id: 65536
-default_parameters:
-  -
-    name: temperature
-    label:
-      zh: 生成随机性
-      en: Temperature
-    label_en: ''
-    desc:
-      zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
-      en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 1
-    default_val:
-      default_val: '1.0'
-      creative: '1'
-      balance: '0.8'
-      precise: '0.3'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: max_tokens
-    label:
-      zh: 最大回复长度
-      en: Response max length
-    desc:
-      zh: '控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。'
-      en: 'You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.'
-    type: int
-    min: '1'
-    max: '4096'
-    precision: 0
-    default_val:
-      default_val: '4096'
-    style:
-      widget: slider
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
-  -
-    name: top_p
-    label:
-      zh: Top P
-      en: Top P
-    desc:
-      zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
-      en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 2
-    default_val:
-      default_val: '0.7'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: response_format
-    label:
-      zh: 输出格式
-      en: Response format
-    desc:
-      zh: '- **JSON**: 将引导模型使用JSON格式输出'
-      en: '**Response Format**:\n\n- **JSON**: Uses JSON format for replies'
-    type: int
-    precision: 0
-    default_val:
-      default_val: '0'
-    options:
-      -
-        value: '0'
-        label: 'Text'
-      -
-        value: '1'
-        label: 'JSON'
-    style:
-      widget: radio_buttons
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
--- a/backend/conf/model/template/entity/model_entity_template_ark_doubao-1.5-vision-lite.yaml
+++ b/backend/conf/model/template/entity/model_entity_template_ark_doubao-1.5-vision-lite.yaml
@@ -1,90 +0,0 @@
-id: 65536 # 模型 entity id, 同 id 数据不会覆盖
-name: Doubao-1.5-Vision-Lite
-meta:
-  id: 65536
-default_parameters:
-  -
-    name: temperature
-    label:
-      zh: 生成随机性
-      en: Temperature
-    label_en: ''
-    desc:
-      zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
-      en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 1
-    default_val:
-      default_val: '1.0'
-      creative: '1'
-      balance: '0.8'
-      precise: '0.3'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: max_tokens
-    label:
-      zh: 最大回复长度
-      en: Response max length
-    desc:
-      zh: '控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。'
-      en: 'You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.'
-    type: int
-    min: '1'
-    max: '4096'
-    precision: 0
-    default_val:
-      default_val: '4096'
-    style:
-      widget: slider
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
-  -
-    name: top_p
-    label:
-      zh: Top P
-      en: Top P
-    desc:
-      zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
-      en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 2
-    default_val:
-      default_val: '0.7'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: response_format
-    label:
-      zh: 输出格式
-      en: Response format
-    desc:
-      zh: '- **JSON**: 将引导模型使用JSON格式输出'
-      en: '**Response Format**:\n\n- **JSON**: Uses JSON format for replies'
-    type: int
-    precision: 0
-    default_val:
-      default_val: '0'
-    options:
-      -
-        value: '0'
-        label: 'Text'
-      -
-        value: '1'
-        label: 'JSON'
-    style:
-      widget: radio_buttons
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
--- a/backend/conf/model/template/entity/model_entity_template_ark_doubao-1.5-vision-pro.yaml
+++ b/backend/conf/model/template/entity/model_entity_template_ark_doubao-1.5-vision-pro.yaml
@@ -1,90 +0,0 @@
-id: 65536 # 模型 entity id, 同 id 数据不会覆盖
-name: Doubao-1.5-Vision-Pro
-meta:
-  id: 65536
-default_parameters:
-  -
-    name: temperature
-    label:
-      zh: 生成随机性
-      en: Temperature
-    label_en: ''
-    desc:
-      zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
-      en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 1
-    default_val:
-      default_val: '1.0'
-      creative: '1'
-      balance: '0.8'
-      precise: '0.3'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: max_tokens
-    label:
-      zh: 最大回复长度
-      en: Response max length
-    desc:
-      zh: '控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。'
-      en: 'You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.'
-    type: int
-    min: '1'
-    max: '4096'
-    precision: 0
-    default_val:
-      default_val: '4096'
-    style:
-      widget: slider
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
-  -
-    name: top_p
-    label:
-      zh: Top P
-      en: Top P
-    desc:
-      zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
-      en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 2
-    default_val:
-      default_val: '0.7'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: response_format
-    label:
-      zh: 输出格式
-      en: Response format
-    desc:
-      zh: '- **JSON**: 将引导模型使用JSON格式输出'
-      en: '**Response Format**:\n\n- **JSON**: Uses JSON format for replies'
-    type: int
-    precision: 0
-    default_val:
-      default_val: '0'
-    options:
-      -
-        value: '0'
-        label: 'Text'
-      -
-        value: '1'
-        label: 'JSON'
-    style:
-      widget: radio_buttons
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
--- a/backend/conf/model/template/entity/model_entity_template_ark_doubao-seed-1.6-flash.yaml
+++ b/backend/conf/model/template/entity/model_entity_template_ark_doubao-seed-1.6-flash.yaml
@@ -1,90 +0,0 @@
-id: 65536 # 模型 entity id, 同 id 数据不会覆盖
-name: Doubao-Seed-1.6-Flash
-meta:
-  id: 65536
-default_parameters:
-  -
-    name: temperature
-    label:
-      zh: 生成随机性
-      en: Temperature
-    label_en: ''
-    desc:
-      zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
-      en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 1
-    default_val:
-      default_val: '1.0'
-      creative: '1'
-      balance: '0.8'
-      precise: '0.3'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: max_tokens
-    label:
-      zh: 最大回复长度
-      en: Response max length
-    desc:
-      zh: '控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。'
-      en: 'You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.'
-    type: int
-    min: '1'
-    max: '4096'
-    precision: 0
-    default_val:
-      default_val: '4096'
-    style:
-      widget: slider
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
-  -
-    name: top_p
-    label:
-      zh: Top P
-      en: Top P
-    desc:
-      zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
-      en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 2
-    default_val:
-      default_val: '0.7'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: response_format
-    label:
-      zh: 输出格式
-      en: Response format
-    desc:
-      zh: '- **JSON**: 将引导模型使用JSON格式输出'
-      en: '**Response Format**:\n\n- **JSON**: Uses JSON format for replies'
-    type: int
-    precision: 0
-    default_val:
-      default_val: '0'
-    options:
-      -
-        value: '0'
-        label: 'Text'
-      -
-        value: '1'
-        label: 'JSON'
-    style:
-      widget: radio_buttons
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
--- a/backend/conf/model/template/entity/model_entity_template_ark_doubao-seed-1.6-thinking.yaml
+++ b/backend/conf/model/template/entity/model_entity_template_ark_doubao-seed-1.6-thinking.yaml
@@ -1,90 +0,0 @@
-id: 65536 # 模型 entity id, 同 id 数据不会覆盖
-name: Doubao-Seed-1.6-Thinking
-meta:
-  id: 65536
-default_parameters:
-  -
-    name: temperature
-    label:
-      zh: 生成随机性
-      en: Temperature
-    label_en: ''
-    desc:
-      zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
-      en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 1
-    default_val:
-      default_val: '1.0'
-      creative: '1'
-      balance: '0.8'
-      precise: '0.3'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: max_tokens
-    label:
-      zh: 最大回复长度
-      en: Response max length
-    desc:
-      zh: '控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。'
-      en: 'You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.'
-    type: int
-    min: '1'
-    max: '4096'
-    precision: 0
-    default_val:
-      default_val: '4096'
-    style:
-      widget: slider
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
-  -
-    name: top_p
-    label:
-      zh: Top P
-      en: Top P
-    desc:
-      zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
-      en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 2
-    default_val:
-      default_val: '0.7'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: response_format
-    label:
-      zh: 输出格式
-      en: Response format
-    desc:
-      zh: '- **JSON**: 将引导模型使用JSON格式输出'
-      en: '**Response Format**:\n\n- **JSON**: Uses JSON format for replies'
-    type: int
-    precision: 0
-    default_val:
-      default_val: '0'
-    options:
-      -
-        value: '0'
-        label: 'Text'
-      -
-        value: '1'
-        label: 'JSON'
-    style:
-      widget: radio_buttons
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
--- a/backend/conf/model/template/entity/model_entity_template_ark_doubao-seed-1.6.yaml
+++ b/backend/conf/model/template/entity/model_entity_template_ark_doubao-seed-1.6.yaml
@@ -1,90 +0,0 @@
-id: 65536 # 模型 entity id, 同 id 数据不会覆盖
-name: Doubao-Seed-1.6
-meta:
-  id: 65536
-default_parameters:
-  -
-    name: temperature
-    label:
-      zh: 生成随机性
-      en: Temperature
-    label_en: ''
-    desc:
-      zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
-      en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 1
-    default_val:
-      default_val: '1.0'
-      creative: '1'
-      balance: '0.8'
-      precise: '0.3'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: max_tokens
-    label:
-      zh: 最大回复长度
-      en: Response max length
-    desc:
-      zh: '控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。'
-      en: 'You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.'
-    type: int
-    min: '1'
-    max: '4096'
-    precision: 0
-    default_val:
-      default_val: '4096'
-    style:
-      widget: slider
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
-  -
-    name: top_p
-    label:
-      zh: Top P
-      en: Top P
-    desc:
-      zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
-      en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 2
-    default_val:
-      default_val: '0.7'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: response_format
-    label:
-      zh: 输出格式
-      en: Response format
-    desc:
-      zh: '- **JSON**: 将引导模型使用JSON格式输出'
-      en: '**Response Format**:\n\n- **JSON**: Uses JSON format for replies'
-    type: int
-    precision: 0
-    default_val:
-      default_val: '0'
-    options:
-      -
-        value: '0'
-        label: 'Text'
-      -
-        value: '1'
-        label: 'JSON'
-    style:
-      widget: radio_buttons
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
--- a/backend/conf/model/template/entity/model_entity_template_ark_volc_deepseek-r1.yaml
+++ b/backend/conf/model/template/entity/model_entity_template_ark_volc_deepseek-r1.yaml
@@ -1,66 +0,0 @@
-id: 65536 # 模型 entity id, 同 id 数据不会覆盖
-name: Deepseek-R1-VolcEngine
-meta:
-  id: 65536
-default_parameters:
-  -
-    name: temperature
-    label:
-      zh: 生成随机性
-      en: Temperature
-    label_en: ''
-    desc:
-      zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
-      en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 1
-    default_val:
-      default_val: '1.0'
-      creative: '1'
-      balance: '0.8'
-      precise: '0.3'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: max_tokens
-    label:
-      zh: 最大回复长度
-      en: Response max length
-    desc:
-      zh: '控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。'
-      en: 'You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.'
-    type: int
-    min: '1'
-    max: '4096'
-    precision: 0
-    default_val:
-      default_val: '4096'
-    style:
-      widget: slider
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
-  -
-    name: top_p
-    label:
-      zh: Top P
-      en: Top P
-    desc:
-      zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
-      en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 2
-    default_val:
-      default_val: '0.7'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
--- a/backend/conf/model/template/entity/model_entity_template_ark_volc_deepseek-v3.yaml
+++ b/backend/conf/model/template/entity/model_entity_template_ark_volc_deepseek-v3.yaml
@@ -1,66 +0,0 @@
-id: 65536 # 模型 entity id, 同 id 数据不会覆盖
-name: Deepseek-V3-VolcEngine
-meta:
-  id: 65536
-default_parameters:
-  -
-    name: temperature
-    label:
-      zh: 生成随机性
-      en: Temperature
-    label_en: ''
-    desc:
-      zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
-      en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 1
-    default_val:
-      default_val: '1.0'
-      creative: '1'
-      balance: '0.8'
-      precise: '0.3'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: max_tokens
-    label:
-      zh: 最大回复长度
-      en: Response max length
-    desc:
-      zh: '控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。'
-      en: 'You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.'
-    type: int
-    min: '1'
-    max: '4096'
-    precision: 0
-    default_val:
-      default_val: '4096'
-    style:
-      widget: slider
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
-  -
-    name: top_p
-    label:
-      zh: Top P
-      en: Top P
-    desc:
-      zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
-      en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 2
-    default_val:
-      default_val: '0.7'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
--- a/backend/conf/model/template/entity/model_entity_template_basic.yaml
+++ b/backend/conf/model/template/entity/model_entity_template_basic.yaml
@@ -1,132 +0,0 @@
-id: 100 # 模型 entity id, 同 id 数据不会覆盖
-name: test_model
-description: test_description
-meta:
-  id: 0
-scenario: 1
-default_parameters:
-  -
-    name: temperature
-    label:
-      zh: 生成随机性
-      en: Temperature
-    desc:
-      zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
-      en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 1
-    default_val:
-      default_val: '1.0'
-      creative: '1'
-      balance: '0.8'
-      precise: '0.3'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: max_tokens
-    label:
-      zh: 最大回复长度
-      en: Response max length
-    desc:
-      zh: '控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。'
-      en: 'You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.'
-    type: int
-    min: '1'
-    max: '4096'
-    precision: 0
-    default_val:
-      default_val: '4096'
-    style:
-      widget: slider
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
-  -
-    name: top_p
-    label:
-      zh: Top P
-      en: Top P
-    desc:
-      zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
-      en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 2
-    default_val:
-      default_val: '0.7'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: frequency_penalty
-    label:
-      zh: 重复语句惩罚
-      en: Frequency penalty
-    desc:
-      zh: '- **frequency penalty**: 当该值为正时，会阻止模型频繁使用相同的词汇和短语，从而增加输出内容的多样性。'
-      en: '**Frequency Penalty**: When positive, it discourages the model from repeating the same words and phrases, thereby increasing the diversity of the output.'
-    type: float
-    min: '-2'
-    max: '2'
-    precision: 2
-    default_val:
-      default_val: '0'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: presence_penalty
-    label:
-      zh: 重复主题惩罚
-      en: Presence penalty
-    desc:
-      zh: '- **presence penalty**: 当该值为正时，会阻止模型频繁讨论相同的主题，从而增加输出内容的多样性'
-      en: '**Presence Penalty**: When positive, it prevents the model from discussing the same topics repeatedly, thereby increasing the diversity of the output.'
-    type: float
-    min: '-2'
-    max: '2'
-    precision: 2
-    default_val:
-      default_val: '0'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: response_format
-    label:
-      zh: 输出格式
-      en: Response format
-    desc:
-      zh: '- **文本**: 使用普通文本格式回复\n- **Markdown**: 将引导模型使用Markdown格式输出回复\n- **JSON**: 将引导模型使用JSON格式输出'
-      en: '**Response Format**:\n\n- **Text**: Replies in plain text format\n- **Markdown**: Uses Markdown format for replies\n- **JSON**: Uses JSON format for replies'
-    type: int
-    precision: 0
-    default_val:
-      default_val: '0'
-    options:
-      -
-        value: '0'
-        label: 'Text'
-      -
-        value: '1'
-        label: 'Markdown'
-      -
-        value: '2'
-        label: 'JSON'
-    style:
-      widget: radio_buttons
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
--- a/backend/conf/model/template/entity/model_entity_template_claude.yaml
+++ b/backend/conf/model/template/entity/model_entity_template_claude.yaml
@@ -1,47 +0,0 @@
-id: 2006 # 模型 entity id, 同 id 数据不会覆盖
-name: Claude-3.5-Sonnet
-description: test claude description
-meta:
-  id: 106
-default_parameters:
-  -
-    name: temperature
-    label:
-      zh: 生成随机性
-      en: Temperature
-    desc:
-      zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
-      en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 1
-    default_val:
-      default_val: '1.0'
-      creative: '1'
-      balance: '0.8'
-      precise: '0.3'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: max_tokens
-    label:
-      zh: 最大回复长度
-      en: Response max length
-    desc:
-      zh: '控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。'
-      en: 'You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.'
-    type: int
-    min: '1'
-    max: '4096'
-    precision: 0
-    default_val:
-      default_val: '4096'
-    style:
-      widget: slider
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
--- a/backend/conf/model/template/entity/model_entity_template_deepseek.yaml
+++ b/backend/conf/model/template/entity/model_entity_template_deepseek.yaml
@@ -1,72 +0,0 @@
-id: 2004 # 模型 entity id, 同 id 数据不会覆盖
-name: DeepSeek-V3
-description: test deepseek description
-meta:
-  id: 104
-default_parameters:
-  -
-    name: temperature
-    label:
-      zh: 生成随机性
-      en: Temperature
-    desc:
-      zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
-      en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 1
-    default_val:
-      default_val: '1.0'
-      creative: '1'
-      balance: '0.8'
-      precise: '0.3'
-    style:
-      widget: slider
-      label:
-        zh: 生成随机性
-        en: Generation diversity
-  -
-    name: max_tokens
-    label:
-      zh: 最大回复长度
-      en: Response max length
-    desc:
-      zh: '控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。'
-      en: 'You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.'
-    type: int
-    min: '1'
-    max: '4096'
-    precision: 0
-    default_val:
-      default_val: '4096'
-    style:
-      widget: slider
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
-  -
-    name: response_format
-    label:
-      zh: 输出格式
-      en: Response format
-    desc:
-      zh: '- **文本**: 使用普通文本格式回复\n- **JSON**: 将引导模型使用JSON格式输出'
-      en: '**Response Format**:\n\n- **Text**: Replies in plain text format\n- **Markdown**: Uses Markdown format for replies\n- **JSON**: Uses JSON format for replies'
-    type: int
-    precision: 0
-    default_val:
-      default_val: '0'
-    options:
-      -
-        value: '0'
-        label: 'Text'
-      -
-        value: '1'
-        label: 'JSON Object'
-    style:
-      widget: radio_buttons
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
-
--- a/backend/conf/model/template/entity/model_entity_template_gemini.yaml
+++ b/backend/conf/model/template/entity/model_entity_template_gemini.yaml
@@ -1,90 +0,0 @@
-id: 2007 # 模型 entity id, 同 id 数据不会覆盖
-name: Gemini-2.5-Flash
-description: test gemini description
-meta:
-  id: 107
-default_parameters:
-  -
-    name: temperature
-    label:
-      zh: 生成随机性
-      en: Temperature
-    desc:
-      zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
-      en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 1
-    default_val:
-      default_val: '1.0'
-      creative: '1'
-      balance: '0.8'
-      precise: '0.3'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: max_tokens
-    label:
-      zh: 最大回复长度
-      en: Response max length
-    desc:
-      zh: '控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。'
-      en: 'You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.'
-    type: int
-    min: '1'
-    max: '4096'
-    precision: 0
-    default_val:
-      default_val: '4096'
-    style:
-      widget: slider
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
-  -
-    name: top_p
-    label:
-      zh: Top P
-      en: Top P
-    desc:
-      zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
-      en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 2
-    default_val:
-      default_val: '0.7'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: response_format
-    label:
-      zh: 输出格式
-      en: Response format
-    desc:
-      zh: '- **文本**: 使用普通文本格式回复\n- **JSON**: 将引导模型使用JSON格式输出'
-      en: '**Response Format**:\n\n- **JSON**: Uses JSON format for replies'
-    type: int
-    precision: 0
-    default_val:
-      default_val: '0'
-    options:
-      -
-        value: '0'
-        label: 'Text'
-      -
-        value: '2'
-        label: 'JSON'
-    style:
-      widget: radio_buttons
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
--- a/backend/conf/model/template/entity/model_entity_template_ollama.yaml
+++ b/backend/conf/model/template/entity/model_entity_template_ollama.yaml
@@ -1,47 +0,0 @@
-id: 2003 # 模型 entity id, 同 id 数据不会覆盖
-name: Gemma-3
-description: test gemma-3 description
-meta:
-  id: 103
-default_parameters:
-  -
-    name: temperature
-    label:
-      zh: 生成随机性
-      en: Temperature
-    desc:
-      zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
-      en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 1
-    default_val:
-      default_val: '1.0'
-      creative: '1'
-      balance: '0.8'
-      precise: '0.3'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: max_tokens
-    label:
-      zh: 最大回复长度
-      en: Response max length
-    desc:
-      zh: '控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。'
-      en: 'You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.'
-    type: int
-    min: '1'
-    max: '4096'
-    precision: 0
-    default_val:
-      default_val: '4096'
-    style:
-      widget: slider
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
--- a/backend/conf/model/template/entity/model_entity_template_openai.yaml
+++ b/backend/conf/model/template/entity/model_entity_template_openai.yaml
@@ -1,131 +0,0 @@
-id: 2001 # 模型 entity id, 同 id 数据不会覆盖
-name: GPT-4o
-description: test gpt-4o description
-meta:
-  id: 101
-default_parameters:
-  -
-    name: temperature
-    label:
-      zh: 生成随机性
-      en: Temperature
-    desc:
-      zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
-      en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 1
-    default_val:
-      default_val: '1.0'
-      creative: '1'
-      balance: '0.8'
-      precise: '0.3'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: max_tokens
-    label:
-      zh: 最大回复长度
-      en: Response max length
-    desc:
-      zh: '控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。'
-      en: 'You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.'
-    type: int
-    min: '1'
-    max: '4096'
-    precision: 0
-    default_val:
-      default_val: '4096'
-    style:
-      widget: slider
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
-  -
-    name: top_p
-    label:
-      zh: Top P
-      en: Top P
-    desc:
-      zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
-      en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 2
-    default_val:
-      default_val: '0.7'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: frequency_penalty
-    label:
-      zh: 重复语句惩罚
-      en: Frequency penalty
-    desc:
-      zh: '- **frequency penalty**: 当该值为正时，会阻止模型频繁使用相同的词汇和短语，从而增加输出内容的多样性。'
-      en: '**Frequency Penalty**: When positive, it discourages the model from repeating the same words and phrases, thereby increasing the diversity of the output.'
-    type: float
-    min: '-2'
-    max: '2'
-    precision: 2
-    default_val:
-      default_val: '0'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: presence_penalty
-    label:
-      zh: 重复主题惩罚
-      en: Presence penalty
-    desc:
-      zh: '- **presence penalty**: 当该值为正时，会阻止模型频繁讨论相同的主题，从而增加输出内容的多样性'
-      en: '**Presence Penalty**: When positive, it prevents the model from discussing the same topics repeatedly, thereby increasing the diversity of the output.'
-    type: float
-    min: '-2'
-    max: '2'
-    precision: 2
-    default_val:
-      default_val: '0'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: response_format
-    label:
-      zh: 输出格式
-      en: Response format
-    desc:
-      zh: '- **文本**: 使用普通文本格式回复\n- **Markdown**: 将引导模型使用Markdown格式输出回复\n- **JSON**: 将引导模型使用JSON格式输出'
-      en: '**Response Format**:\n\n- **Text**: Replies in plain text format\n- **Markdown**: Uses Markdown format for replies\n- **JSON**: Uses JSON format for replies'
-    type: int
-    precision: 0
-    default_val:
-      default_val: '0'
-    options:
-      -
-        value: '0'
-        label: 'Text'
-      -
-        value: '1'
-        label: 'Markdown'
-      -
-        value: '2'
-        label: 'JSON'
-    style:
-      widget: radio_buttons
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
--- a/backend/conf/model/template/entity/model_entity_template_qwen.yaml
+++ b/backend/conf/model/template/entity/model_entity_template_qwen.yaml
@@ -1,66 +0,0 @@
-id: 2005 # 模型 entity id, 同 id 数据不会覆盖
-name: Qwen3-32B
-description: test qwen description
-meta:
-  id: 105
-default_parameters:
-  -
-    name: temperature
-    label:
-      zh: 生成随机性
-      en: Temperature
-    desc:
-      zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
-      en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 1
-    default_val:
-      default_val: '1.0'
-      creative: '1'
-      balance: '0.8'
-      precise: '0.3'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
-  -
-    name: max_tokens
-    label:
-      zh: 最大回复长度
-      en: Response max length
-    desc:
-      zh: '控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。'
-      en: 'You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.'
-    type: int
-    min: '1'
-    max: '4096'
-    precision: 0
-    default_val:
-      default_val: '4096'
-    style:
-      widget: slider
-      label:
-        zh: 输入及输出设置
-        en: Input and output settings
-  -
-    name: top_p
-    label:
-      zh: Top P
-      en: Top P
-    desc:
-      zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
-      en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
-    type: float
-    min: '0'
-    max: '1'
-    precision: 2
-    default_val:
-      default_val: '0.95'
-    style:
-      widget: slider
-      label:
-        zh: 生成多样性
-        en: Generation diversity
--- a/backend/conf/model/template/meta/model_meta_template_ark.yaml
+++ b/backend/conf/model/template/meta/model_meta_template_ark.yaml
@@ -1,41 +0,0 @@
-# ark model template
-# model list: https://www.volcengine.com/docs/82379/1330310
-# get api_key: https://www.volcengine.com/docs/82379/1399008#b00dee71
-# get region:  https://www.volcengine.com/docs/82379/1319853#%E8%AE%BE%E7%BD%AE%E5%9C%B0%E5%9F%9F%E5%92%8C%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D
-# get ak/sk: https://www.volcengine.com/docs/82379/1319853#%E4%BD%BF%E7%94%A8access-key%E9%89%B4%E6%9D%83
-id: 102 # 模型 id, 同 id 数据不会覆盖
-name: Doubao # 模型 meta 名称
-icon_uri: doubao_v2.png
-description:
-  zh: 豆包模型简介 # 模型默认描述
-  en: doubao model description
-protocol: ark # 模型连接协议
-capability: # 模型基础能力
-  function_call: true # 模型是否支持 function call
-  input_modal: # 模型输入支持模态
-    - text
-    - image
-  input_tokens: 128000 # 输入 token 上限
-  output_modal: # 模型输出支持模态
-    - text
-  output_tokens: 16384 # 输出 token 上限
-  max_tokens: 128000 # 最大 token 数量
-  json_mode: false # 是否支持 json mode
-  prefix_caching: false # 是否支持 prefix caching
-  reasoning: false # 是否支持 reasoning
-  prefill_response: false # 是否支持续写
-conn_config: # 模型连接参数
-  api_key: ''   # REQUIRED: api_key
-  model: ''     # REQUIRED: model name
-  temperature: 0.1 # 默认 temperature
-  frequency_penalty: 0 # 默认 frequency_penalty
-  presence_penalty: 0 # 默认 presence_penalty
-  max_tokens: 4096 # 默认 max_tokens
-  top_p: 0.7 # 默认 top_p
-  top_k: 0 # 默认 top_k
-  # 下面是模型专用配置，仅配置 protocol 所对应的类型即可
-  ark: # OPTIONAL
-    region: ''
-    access_key: ''
-    secret_key: ''
-  status: 1
--- a/backend/conf/model/template/meta/model_meta_template_ark_doubao-1.5-lite.yaml
+++ b/backend/conf/model/template/meta/model_meta_template_ark_doubao-1.5-lite.yaml
@@ -1,40 +0,0 @@
-# ark model template
-# model list: https://www.volcengine.com/docs/82379/1330310
-# get api_key: https://www.volcengine.com/docs/82379/1399008#b00dee71
-# get region:  https://www.volcengine.com/docs/82379/1319853#%E8%AE%BE%E7%BD%AE%E5%9C%B0%E5%9F%9F%E5%92%8C%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D
-# get ak/sk: https://www.volcengine.com/docs/82379/1319853#%E4%BD%BF%E7%94%A8access-key%E9%89%B4%E6%9D%83
-id: 65536 # 模型 id, 同 id 数据不会覆盖
-name: doubao-1.5-lite # 模型 meta 名称
-icon_uri: doubao_v2.png
-description:
-  zh: 'Doubao-1.5-lite，全新一代轻量版模型，极致响应速度，效果与时延均达到全球一流水平。' # 模型默认描述
-  en: 'Doubao-1.5-lite, the new generation lightweight model, delivers ultra-fast response speed with both performance and latency reaching world-class standards.'
-protocol: ark # 模型连接协议
-capability: # 模型基础能力
-  function_call: true # 模型是否支持 function call
-  input_modal: # 模型输入支持模态
-    - text
-  input_tokens: 20000 # 输入 token 上限
-  output_modal: # 模型输出支持模态
-    - text
-  output_tokens: 12000 # 输出 token 上限
-  max_tokens: 32000 # 最大 token 数量
-  json_mode: false # 是否支持 json mode
-  prefix_caching: true # 是否支持 prefix caching
-  reasoning: false # 是否支持 reasoning
-  prefill_response: false # 是否支持续写
-conn_config: # 模型连接参数
-  api_key: ''   # REQUIRED: api_key
-  model: ''     # REQUIRED: model name
-  temperature: 0.1 # 默认 temperature
-  frequency_penalty: 0 # 默认 frequency_penalty
-  presence_penalty: 0 # 默认 presence_penalty
-  max_tokens: 4096 # 默认 max_tokens
-  top_p: 0.7 # 默认 top_p
-  top_k: 0 # 默认 top_k
-  # 下面是模型专用配置，仅配置 protocol 所对应的类型即可
-  ark: # OPTIONAL
-    region: ''
-    access_key: ''
-    secret_key: ''
-  status: 1
--- a/backend/conf/model/template/meta/model_meta_template_ark_doubao-1.5-pro-256k.yaml
+++ b/backend/conf/model/template/meta/model_meta_template_ark_doubao-1.5-pro-256k.yaml
@@ -1,40 +0,0 @@
-# ark model template
-# model list: https://www.volcengine.com/docs/82379/1330310
-# get api_key: https://www.volcengine.com/docs/82379/1399008#b00dee71
-# get region:  https://www.volcengine.com/docs/82379/1319853#%E8%AE%BE%E7%BD%AE%E5%9C%B0%E5%9F%9F%E5%92%8C%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D
-# get ak/sk: https://www.volcengine.com/docs/82379/1319853#%E4%BD%BF%E7%94%A8access-key%E9%89%B4%E6%9D%83
-id: 65536 # 模型 id, 同 id 数据不会覆盖
-name: doubao-1.5-pro-256k # 模型 meta 名称
-icon_uri: doubao_v2.png
-description:
-  zh: 'doubao-1.5-pro-256k，基于doubao-1.5-Pro全面升级版，整体效果大幅提升10%。更高性能、更大窗口、超高性价比，适用于更广泛的应用场景。' # 模型默认描述
-  en: 'doubao-1.5-pro-256k is a fully upgraded version based on doubao-1.5-Pro, with an overall performance improvement of 10%. It offers higher performance, a larger context window, and exceptional cost-effectiveness, making it suitable for a wider range of application scenarios.'
-protocol: ark # 模型连接协议
-capability: # 模型基础能力
-  function_call: true # 模型是否支持 function call
-  input_modal: # 模型输入支持模态
-    - text
-  input_tokens: 96000 # 输入 token 上限
-  output_modal: # 模型输出支持模态
-    - text
-  output_tokens: 12000 # 输出 token 上限
-  max_tokens: 256000 # 最大 token 数量
-  json_mode: false # 是否支持 json mode
-  prefix_caching: false # 是否支持 prefix caching
-  reasoning: false # 是否支持 reasoning
-  prefill_response: false # 是否支持续写
-conn_config: # 模型连接参数
-  api_key: ''   # REQUIRED: api_key
-  model: ''     # REQUIRED: model name
-  temperature: 0.1 # 默认 temperature
-  frequency_penalty: 0 # 默认 frequency_penalty
-  presence_penalty: 0 # 默认 presence_penalty
-  max_tokens: 4096 # 默认 max_tokens
-  top_p: 0.7 # 默认 top_p
-  top_k: 0 # 默认 top_k
-  # 下面是模型专用配置，仅配置 protocol 所对应的类型即可
-  ark: # OPTIONAL
-    region: ''
-    access_key: ''
-    secret_key: ''
-  status: 1
--- a/backend/conf/model/template/meta/model_meta_template_ark_doubao-1.5-pro-32k.yaml
+++ b/backend/conf/model/template/meta/model_meta_template_ark_doubao-1.5-pro-32k.yaml
@@ -1,40 +0,0 @@
-# ark model template
-# model list: https://www.volcengine.com/docs/82379/1330310
-# get api_key: https://www.volcengine.com/docs/82379/1399008#b00dee71
-# get region:  https://www.volcengine.com/docs/82379/1319853#%E8%AE%BE%E7%BD%AE%E5%9C%B0%E5%9F%9F%E5%92%8C%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D
-# get ak/sk: https://www.volcengine.com/docs/82379/1319853#%E4%BD%BF%E7%94%A8access-key%E9%89%B4%E6%9D%83
-id: 65536 # 模型 id, 同 id 数据不会覆盖
-name: doubao-1.5-pro-32k # 模型 meta 名称
-icon_uri: doubao_v2.png
-description:
-  zh: 'Doubao-1.5-pro，全新一代主力模型，性能全面升级，在知识、代码、推理、等方面表现卓越。' # 模型默认描述
-  en: 'Doubao-1.5-pro, the new generation flagship model, features comprehensive performance upgrades and excels in areas such as knowledge, coding, and reasoning.'
-protocol: ark # 模型连接协议
-capability: # 模型基础能力
-  function_call: true # 模型是否支持 function call
-  input_modal: # 模型输入支持模态
-    - text
-  input_tokens: 96000 # 输入 token 上限
-  output_modal: # 模型输出支持模态
-    - text
-  output_tokens: 12000 # 输出 token 上限
-  max_tokens: 128000 # 最大 token 数量
-  json_mode: false # 是否支持 json mode
-  prefix_caching: true # 是否支持 prefix caching
-  reasoning: false # 是否支持 reasoning
-  prefill_response: false # 是否支持续写
-conn_config: # 模型连接参数
-  api_key: ''   # REQUIRED: api_key
-  model: ''     # REQUIRED: model name
-  temperature: 0.1 # 默认 temperature
-  frequency_penalty: 0 # 默认 frequency_penalty
-  presence_penalty: 0 # 默认 presence_penalty
-  max_tokens: 4096 # 默认 max_tokens
-  top_p: 0.7 # 默认 top_p
-  top_k: 0 # 默认 top_k
-  # 下面是模型专用配置，仅配置 protocol 所对应的类型即可
-  ark: # OPTIONAL
-    region: ''
-    access_key: ''
-    secret_key: ''
-  status: 1
--- a/backend/conf/model/template/meta/model_meta_template_ark_doubao-1.5-thinking-pro.yaml
+++ b/backend/conf/model/template/meta/model_meta_template_ark_doubao-1.5-thinking-pro.yaml
@@ -1,41 +0,0 @@
-# ark model template
-# model list: https://www.volcengine.com/docs/82379/1330310
-# get api_key: https://www.volcengine.com/docs/82379/1399008#b00dee71
-# get region:  https://www.volcengine.com/docs/82379/1319853#%E8%AE%BE%E7%BD%AE%E5%9C%B0%E5%9F%9F%E5%92%8C%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D
-# get ak/sk: https://www.volcengine.com/docs/82379/1319853#%E4%BD%BF%E7%94%A8access-key%E9%89%B4%E6%9D%83
-id: 65536 # 模型 id, 同 id 数据不会覆盖
-name: doubao-1.5-thinking-pro # 模型 meta 名称
-icon_uri: doubao_v2.png
-description:
-  zh: 'doubao-1.5 全新深度思考模型，在数学、编程、科学推理等专业领域及创意写作等通用任务中表现突出，在 AIME 2024、Codeforces、GPQA 等多项权威基准上达到或接近业界第一梯队水平。' # 模型默认描述
-  en: "doubao-1.5 is a brand-new deep thinking model that excels in specialized fields such as mathematics, programming, scientific reasoning, as well as general tasks like creative writing. It achieves or approaches the industry’s top-tier level on multiple authoritative benchmarks including AIME 2024, Codeforces, and GPQA."
-protocol: ark # 模型连接协议
-capability: # 模型基础能力
-  function_call: true # 模型是否支持 function call
-  input_modal: # 模型输入支持模态
-    - text
-    - image
-  input_tokens: 96000 # 输入 token 上限
-  output_modal: # 模型输出支持模态
-    - text
-  output_tokens: 16000 # 输出 token 上限
-  max_tokens: 128000 # 最大 token 数量
-  json_mode: true # 是否支持 json mode
-  prefix_caching: false # 是否支持 prefix caching
-  reasoning: true # 是否支持 reasoning
-  prefill_response: false # 是否支持续写
-conn_config: # 模型连接参数
-  api_key: ''   # REQUIRED: api_key
-  model: ''     # REQUIRED: model name
-  temperature: 0.1 # 默认 temperature
-  frequency_penalty: 0 # 默认 frequency_penalty
-  presence_penalty: 0 # 默认 presence_penalty
-  max_tokens: 4096 # 默认 max_tokens
-  top_p: 0.7 # 默认 top_p
-  top_k: 0 # 默认 top_k
-  # 下面是模型专用配置，仅配置 protocol 所对应的类型即可
-  ark: # OPTIONAL
-    region: ''
-    access_key: ''
-    secret_key: ''
-  status: 1
--- a/backend/conf/model/template/meta/model_meta_template_ark_doubao-1.5-thinking-vision-pro.yaml
+++ b/backend/conf/model/template/meta/model_meta_template_ark_doubao-1.5-thinking-vision-pro.yaml
@@ -1,42 +0,0 @@
-# ark model template
-# model list: https://www.volcengine.com/docs/82379/1330310
-# get api_key: https://www.volcengine.com/docs/82379/1399008#b00dee71
-# get region:  https://www.volcengine.com/docs/82379/1319853#%E8%AE%BE%E7%BD%AE%E5%9C%B0%E5%9F%9F%E5%92%8C%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D
-# get ak/sk: https://www.volcengine.com/docs/82379/1319853#%E4%BD%BF%E7%94%A8access-key%E9%89%B4%E6%9D%83
-id: 65536 # 模型 id, 同 id 数据不会覆盖
-name: doubao-1.5-thinking-vision-pro # 模型 meta 名称
-icon_uri: doubao_v2.png
-description:
-  zh: 'doubao-1-5-thinking-vision-pro 全新视觉深度思考模型，具备更强的通用多模态理解和推理能力，在 59 个公开评测基准中的 37 个上取得 SOTA 表现。' # 模型默认描述
-  en: 'doubao-1-5-thinking-vision-pro is a brand-new visual deep thinking model, featuring stronger general multimodal understanding and reasoning abilities, achieving SOTA performance on 37 out of 59 public evaluation benchmarks.'
-protocol: ark # 模型连接协议
-capability: # 模型基础能力
-  function_call: true # 模型是否支持 function call
-  input_modal: # 模型输入支持模态
-    - text
-    - image
-    - video
-  input_tokens: 96000 # 输入 token 上限
-  output_modal: # 模型输出支持模态
-    - text
-  output_tokens: 16000 # 输出 token 上限
-  max_tokens: 128000 # 最大 token 数量
-  json_mode: true # 是否支持 json mode
-  prefix_caching: false # 是否支持 prefix caching
-  reasoning: true # 是否支持 reasoning
-  prefill_response: false # 是否支持续写
-conn_config: # 模型连接参数
-  api_key: ''   # REQUIRED: api_key
-  model: ''     # REQUIRED: model name
-  temperature: 0.1 # 默认 temperature
-  frequency_penalty: 0 # 默认 frequency_penalty
-  presence_penalty: 0 # 默认 presence_penalty
-  max_tokens: 4096 # 默认 max_tokens
-  top_p: 0.7 # 默认 top_p
-  top_k: 0 # 默认 top_k
-  # 下面是模型专用配置，仅配置 protocol 所对应的类型即可
-  ark: # OPTIONAL
-    region: ''
-    access_key: ''
-    secret_key: ''
-  status: 1
--- a/backend/conf/model/template/meta/model_meta_template_ark_doubao-1.5-vision-lite.yaml
+++ b/backend/conf/model/template/meta/model_meta_template_ark_doubao-1.5-vision-lite.yaml
@@ -1,41 +0,0 @@
-# ark model template
-# model list: https://www.volcengine.com/docs/82379/1330310
-# get api_key: https://www.volcengine.com/docs/82379/1399008#b00dee71
-# get region:  https://www.volcengine.com/docs/82379/1319853#%E8%AE%BE%E7%BD%AE%E5%9C%B0%E5%9F%9F%E5%92%8C%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D
-# get ak/sk: https://www.volcengine.com/docs/82379/1319853#%E4%BD%BF%E7%94%A8access-key%E9%89%B4%E6%9D%83
-id: 65536 # 模型 id, 同 id 数据不会覆盖
-name: doubao-1.5-vision-lite # 模型 meta 名称
-icon_uri: doubao_v2.png
-description:
-  zh: 'doubao-1.5-vision-lite，极具性价比的多模态大模型，支持任意分辨率和极端长宽比图像识别，增强视觉推理、文档识别、细节信息理解和指令遵循能力。' # 模型默认描述
-  en: 'doubao-1.5-vision-lite is a highly cost-effective multimodal large model that supports image recognition at any resolution and extreme aspect ratios, enhancing visual reasoning, document recognition, detailed information comprehension, and instruction-following capabilities.'
-protocol: ark # 模型连接协议
-capability: # 模型基础能力
-  function_call: false # 模型是否支持 function call
-  input_modal: # 模型输入支持模态
-    - text
-    - image
-  input_tokens: 96000 # 输入 token 上限
-  output_modal: # 模型输出支持模态
-    - text
-  output_tokens: 16000 # 输出 token 上限
-  max_tokens: 128000 # 最大 token 数量
-  json_mode: false # 是否支持 json mode
-  prefix_caching: false # 是否支持 prefix caching
-  reasoning: false # 是否支持 reasoning
-  prefill_response: false # 是否支持续写
-conn_config: # 模型连接参数
-  api_key: ''   # REQUIRED: api_key
-  model: ''     # REQUIRED: model name
-  temperature: 0.1 # 默认 temperature
-  frequency_penalty: 0 # 默认 frequency_penalty
-  presence_penalty: 0 # 默认 presence_penalty
-  max_tokens: 4096 # 默认 max_tokens
-  top_p: 0.7 # 默认 top_p
-  top_k: 0 # 默认 top_k
-  # 下面是模型专用配置，仅配置 protocol 所对应的类型即可
-  ark: # OPTIONAL
-    region: ''
-    access_key: ''
-    secret_key: ''
-  status: 1
--- a/backend/conf/model/template/meta/model_meta_template_ark_doubao-1.5-vision-pro.yaml
+++ b/backend/conf/model/template/meta/model_meta_template_ark_doubao-1.5-vision-pro.yaml
@@ -1,42 +0,0 @@
-# ark model template
-# model list: https://www.volcengine.com/docs/82379/1330310
-# get api_key: https://www.volcengine.com/docs/82379/1399008#b00dee71
-# get region:  https://www.volcengine.com/docs/82379/1319853#%E8%AE%BE%E7%BD%AE%E5%9C%B0%E5%9F%9F%E5%92%8C%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D
-# get ak/sk: https://www.volcengine.com/docs/82379/1319853#%E4%BD%BF%E7%94%A8access-key%E9%89%B4%E6%9D%83
-id: 65536 # 模型 id, 同 id 数据不会覆盖
-name: doubao-1.5-vision-pro # 模型 meta 名称
-icon_uri: doubao_v2.png
-description:
-  zh: 'doubao-1.5-vision-pro，全新升级的多模态大模型，支持任意分辨率和极端长宽比图像识别，增强视觉推理、文档识别、细节信息理解和指令遵循能力。' # 模型默认描述
-  en: 'doubao-1.5-vision-pro is a newly upgraded multimodal large model that supports image recognition at any resolution and extreme aspect ratios, enhancing visual reasoning, document recognition, detailed information comprehension, and instruction-following capabilities.'
-protocol: ark # 模型连接协议
-capability: # 模型基础能力
-  function_call: true # 模型是否支持 function call
-  input_modal: # 模型输入支持模态
-    - text
-    - image
-    - video
-  input_tokens: 96000 # 输入 token 上限
-  output_modal: # 模型输出支持模态
-    - text
-  output_tokens: 16000 # 输出 token 上限
-  max_tokens: 128000 # 最大 token 数量
-  json_mode: true # 是否支持 json mode
-  prefix_caching: false # 是否支持 prefix caching
-  reasoning: true # 是否支持 reasoning
-  prefill_response: false # 是否支持续写
-conn_config: # 模型连接参数
-  api_key: ''   # REQUIRED: api_key
-  model: ''     # REQUIRED: model name
-  temperature: 0.1 # 默认 temperature
-  frequency_penalty: 0 # 默认 frequency_penalty
-  presence_penalty: 0 # 默认 presence_penalty
-  max_tokens: 4096 # 默认 max_tokens
-  top_p: 0.7 # 默认 top_p
-  top_k: 0 # 默认 top_k
-  # 下面是模型专用配置，仅配置 protocol 所对应的类型即可
-  ark: # OPTIONAL
-    region: ''
-    access_key: ''
-    secret_key: ''
-  status: 1
--- a/backend/conf/model/template/meta/model_meta_template_ark_doubao-seed-1.6-flash.yaml
+++ b/backend/conf/model/template/meta/model_meta_template_ark_doubao-seed-1.6-flash.yaml
@@ -1,42 +0,0 @@
-# ark model template
-# model list: https://www.volcengine.com/docs/82379/1330310
-# get api_key: https://www.volcengine.com/docs/82379/1399008#b00dee71
-# get region:  https://www.volcengine.com/docs/82379/1319853#%E8%AE%BE%E7%BD%AE%E5%9C%B0%E5%9F%9F%E5%92%8C%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D
-# get ak/sk: https://www.volcengine.com/docs/82379/1319853#%E4%BD%BF%E7%94%A8access-key%E9%89%B4%E6%9D%83
-id: 65536 # 模型 id, 同 id 数据不会覆盖
-name: doubao-seed-1.6-flash # 模型 meta 名称
-icon_uri: doubao_v2.png
-description:
-  zh: '有极致推理速度的多模态深度思考模型；同时支持文本和视觉理解。文本理解能力超过上一代 Lite 系列模型，视觉理解比肩友商 Pro 系列模型。' # 模型默认描述
-  en: 'A multimodal deep thinking model with extreme reasoning speed; it supports both text and visual understanding. Its text comprehension surpasses the previous generation Lite series models, while its visual understanding rivals competitor Pro series models.'
-protocol: ark # 模型连接协议
-capability: # 模型基础能力
-  function_call: true # 模型是否支持 function call
-  input_modal: # 模型输入支持模态
-    - text
-    - image
-    - video
-  input_tokens: 224000 # 输入 token 上限
-  output_modal: # 模型输出支持模态
-    - text
-  output_tokens: 32000 # 输出 token 上限
-  max_tokens: 256000 # 最大 token 数量
-  json_mode: true # 是否支持 json mode
-  prefix_caching: true # 是否支持 prefix caching
-  reasoning: true # 是否支持 reasoning
-  prefill_response: false # 是否支持续写
-conn_config: # 模型连接参数
-  api_key: ''   # REQUIRED: api_key
-  model: ''     # REQUIRED: model name
-  temperature: 0.1 # 默认 temperature
-  frequency_penalty: 0 # 默认 frequency_penalty
-  presence_penalty: 0 # 默认 presence_penalty
-  max_tokens: 4096 # 默认 max_tokens
-  top_p: 0.7 # 默认 top_p
-  top_k: 0 # 默认 top_k
-  # 下面是模型专用配置，仅配置 protocol 所对应的类型即可
-  ark: # OPTIONAL
-    region: ''
-    access_key: ''
-    secret_key: ''
-  status: 1
--- a/backend/conf/model/template/meta/model_meta_template_ark_doubao-seed-1.6-thinking.yaml
+++ b/backend/conf/model/template/meta/model_meta_template_ark_doubao-seed-1.6-thinking.yaml
@@ -1,42 +0,0 @@
-# ark model template
-# model list: https://www.volcengine.com/docs/82379/1330310
-# get api_key: https://www.volcengine.com/docs/82379/1399008#b00dee71
-# get region:  https://www.volcengine.com/docs/82379/1319853#%E8%AE%BE%E7%BD%AE%E5%9C%B0%E5%9F%9F%E5%92%8C%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D
-# get ak/sk: https://www.volcengine.com/docs/82379/1319853#%E4%BD%BF%E7%94%A8access-key%E9%89%B4%E6%9D%83
-id: 65536 # 模型 id, 同 id 数据不会覆盖
-name: doubao-seed-1.6-thinking # 模型 meta 名称
-icon_uri: doubao_v2.png
-description:
-  zh: '在思考能力上进行了大幅强化， 对比 doubao 1.5 代深度理解模型，在编程、数学、逻辑推理等基础能力上进一步提升， 支持视觉理解。' # 模型默认描述
-  en: 'Significantly enhanced in thinking capabilities, compared to the doubao 1.5 generation deep understanding model, with further improvements in fundamental skills such as programming, mathematics, and logical reasoning, and support for visual understanding.'
-protocol: ark # 模型连接协议
-capability: # 模型基础能力
-  function_call: true # 模型是否支持 function call
-  input_modal: # 模型输入支持模态
-    - text
-    - image
-    - video
-  input_tokens: 224000 # 输入 token 上限
-  output_modal: # 模型输出支持模态
-    - text
-  output_tokens: 16000 # 输出 token 上限
-  max_tokens: 256000 # 最大 token 数量
-  json_mode: true # 是否支持 json mode
-  prefix_caching: true # 是否支持 prefix caching
-  reasoning: true # 是否支持 reasoning
-  prefill_response: false # 是否支持续写
-conn_config: # 模型连接参数
-  api_key: ''   # REQUIRED: api_key
-  model: ''     # REQUIRED: model name
-  temperature: 0.1 # 默认 temperature
-  frequency_penalty: 0 # 默认 frequency_penalty
-  presence_penalty: 0 # 默认 presence_penalty
-  max_tokens: 4096 # 默认 max_tokens
-  top_p: 0.7 # 默认 top_p
-  top_k: 0 # 默认 top_k
-  # 下面是模型专用配置，仅配置 protocol 所对应的类型即可
-  ark: # OPTIONAL
-    region: ''
-    access_key: ''
-    secret_key: ''
-  status: 1
--- a/backend/conf/model/template/meta/model_meta_template_ark_doubao-seed-1.6.yaml
+++ b/backend/conf/model/template/meta/model_meta_template_ark_doubao-seed-1.6.yaml
@@ -1,42 +0,0 @@
-# ark model template
-# model list: https://www.volcengine.com/docs/82379/1330310
-# get api_key: https://www.volcengine.com/docs/82379/1399008#b00dee71
-# get region:  https://www.volcengine.com/docs/82379/1319853#%E8%AE%BE%E7%BD%AE%E5%9C%B0%E5%9F%9F%E5%92%8C%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D
-# get ak/sk: https://www.volcengine.com/docs/82379/1319853#%E4%BD%BF%E7%94%A8access-key%E9%89%B4%E6%9D%83
-id: 65536 # 模型 id, 同 id 数据不会覆盖
-name: doubao-seed-1.6 # 模型 meta 名称
-icon_uri: doubao_v2.png
-description:
-  zh: '全新多模态深度思考模型，同时支持 thinking、non-thinking、auto三种思考模式。其中 non-thinking 模型对比 doubao-1.5-pro-32k-250115 模型大幅提升。' # 模型默认描述
-  en: 'A brand-new multimodal deep thinking model supports three thinking modes: thinking, non-thinking, and auto. Among them, the non-thinking model has significantly improved compared to the doubao-1.5-pro-32k-250115 model.'
-protocol: ark # 模型连接协议
-capability: # 模型基础能力
-  function_call: true # 模型是否支持 function call
-  input_modal: # 模型输入支持模态
-    - text
-    - image
-    - video
-  input_tokens: 224000 # 输入 token 上限
-  output_modal: # 模型输出支持模态
-    - text
-  output_tokens: 32000 # 输出 token 上限
-  max_tokens: 256000 # 最大 token 数量
-  json_mode: true # 是否支持 json mode
-  prefix_caching: true # 是否支持 prefix caching
-  reasoning: true # 是否支持 reasoning
-  prefill_response: false # 是否支持续写
-conn_config: # 模型连接参数
-  api_key: ''   # REQUIRED: api_key
-  model: ''     # REQUIRED: model name
-  temperature: 0.1 # 默认 temperature
-  frequency_penalty: 0 # 默认 frequency_penalty
-  presence_penalty: 0 # 默认 presence_penalty
-  max_tokens: 4096 # 默认 max_tokens
-  top_p: 0.7 # 默认 top_p
-  top_k: 0 # 默认 top_k
-  # 下面是模型专用配置，仅配置 protocol 所对应的类型即可
-  ark: # OPTIONAL
-    region: ''
-    access_key: ''
-    secret_key: ''
-  status: 1
--- a/backend/conf/model/template/meta/model_meta_template_ark_volc_deepseek-r1.yaml
+++ b/backend/conf/model/template/meta/model_meta_template_ark_volc_deepseek-r1.yaml
@@ -1,40 +0,0 @@
-# ark model template
-# model list: https://www.volcengine.com/docs/82379/1330310
-# get api_key: https://www.volcengine.com/docs/82379/1399008#b00dee71
-# get region:  https://www.volcengine.com/docs/82379/1319853#%E8%AE%BE%E7%BD%AE%E5%9C%B0%E5%9F%9F%E5%92%8C%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D
-# get ak/sk: https://www.volcengine.com/docs/82379/1319853#%E4%BD%BF%E7%94%A8access-key%E9%89%B4%E6%9D%83
-id: 65536 # 模型 id, 同 id 数据不会覆盖
-name: deepseek-r1-ve # 模型 meta 名称
-icon_uri: deepseek_v2.png
-description:
-  zh: 'deepseek-r1 是由深度求索推出的深度思考模型。在后训练阶段大规模使用了强化学习技术，在仅有极少标注数据的情况下，极大提升了模型推理能力。在数学、代码、自然语言推理等任务上，性能比肩 OpenAI o1 正式版。' # 模型默认描述
-  en: "deepseek-r1 is a deep thinking model launched by Deep Seek. It extensively employs reinforcement learning during the post-training phase, significantly enhancing the model's reasoning ability with very limited annotated data. In tasks such as mathematics, coding, and natural language reasoning, its performance rivals that of the official OpenAI o1 version."
-protocol: ark # 模型连接协议
-capability: # 模型基础能力
-  function_call: true # 模型是否支持 function call
-  input_modal: # 模型输入支持模态
-    - text
-  input_tokens: 96000 # 输入 token 上限
-  output_modal: # 模型输出支持模态
-    - text
-  output_tokens: 32000 # 输出 token 上限
-  max_tokens: 128000 # 最大 token 数量
-  json_mode: false # 是否支持 json mode
-  prefix_caching: true # 是否支持 prefix caching
-  reasoning: true # 是否支持 reasoning
-  prefill_response: false # 是否支持续写
-conn_config: # 模型连接参数
-  api_key: ''   # REQUIRED: api_key
-  model: ''     # REQUIRED: model name
-  temperature: 0.1 # 默认 temperature
-  frequency_penalty: 0 # 默认 frequency_penalty
-  presence_penalty: 0 # 默认 presence_penalty
-  max_tokens: 4096 # 默认 max_tokens
-  top_p: 0.7 # 默认 top_p
-  top_k: 0 # 默认 top_k
-  # 下面是模型专用配置，仅配置 protocol 所对应的类型即可
-  ark: # OPTIONAL
-    region: ''
-    access_key: ''
-    secret_key: ''
-  status: 1
--- a/backend/conf/model/template/meta/model_meta_template_ark_volc_deepseek-v3.yaml
+++ b/backend/conf/model/template/meta/model_meta_template_ark_volc_deepseek-v3.yaml
@@ -1,40 +0,0 @@
-# ark model template
-# model list: https://www.volcengine.com/docs/82379/1330310
-# get api_key: https://www.volcengine.com/docs/82379/1399008#b00dee71
-# get region:  https://www.volcengine.com/docs/82379/1319853#%E8%AE%BE%E7%BD%AE%E5%9C%B0%E5%9F%9F%E5%92%8C%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D
-# get ak/sk: https://www.volcengine.com/docs/82379/1319853#%E4%BD%BF%E7%94%A8access-key%E9%89%B4%E6%9D%83
-id: 65536 # 模型 id, 同 id 数据不会覆盖
-name: deepseek-v3-ve # 模型 meta 名称
-icon_uri: deepseek_v2.png
-description:
-  zh: 'deepseek-v3 由深度求索公司自研的MoE模型，多项评测成绩超越了 qwen2.5-72b 和 llama-3.1-405b 等开源模型，并在性能上和世界顶尖的闭源模型 gpt-4o 及 claude-3.5-Sonnet 不分伯仲。' # 模型默认描述
-  en: "deepseek-v3 is a MoE model independently developed by Deep Seek. Its performance in multiple evaluations surpasses open-source models such as qwen2.5-72b and llama-3.1-405b, and it competes on par with world-leading closed-source models like gpt-4o and claude-3.5-Sonnet."
-protocol: ark # 模型连接协议
-capability: # 模型基础能力
-  function_call: true # 模型是否支持 function call
-  input_modal: # 模型输入支持模态
-    - text
-  input_tokens: 96000 # 输入 token 上限
-  output_modal: # 模型输出支持模态
-    - text
-  output_tokens: 16000 # 输出 token 上限
-  max_tokens: 128000 # 最大 token 数量
-  json_mode: false # 是否支持 json mode
-  prefix_caching: true # 是否支持 prefix caching
-  reasoning: false # 是否支持 reasoning
-  prefill_response: false # 是否支持续写
-conn_config: # 模型连接参数
-  api_key: ''   # REQUIRED: api_key
-  model: ''     # REQUIRED: model name
-  temperature: 0.1 # 默认 temperature
-  frequency_penalty: 0 # 默认 frequency_penalty
-  presence_penalty: 0 # 默认 presence_penalty
-  max_tokens: 4096 # 默认 max_tokens
-  top_p: 0.7 # 默认 top_p
-  top_k: 0 # 默认 top_k
-  # 下面是模型专用配置，仅配置 protocol 所对应的类型即可
-  ark: # OPTIONAL
-    region: ''
-    access_key: ''
-    secret_key: ''
-  status: 1
--- a/backend/conf/model/template/meta/model_meta_template_basic.yaml
+++ b/backend/conf/model/template/meta/model_meta_template_basic.yaml
@@ -1,75 +0,0 @@
-id: 0 # 模型 meta id, 同 id 数据不会覆盖
-name: test_model # 模型展示名称
-icon_uri: test_icon_uri # 模型展示图片 uri
-icon_url: test_icon_url # 模型展示图片 url
-description:
-  zh: test_description # 模型默认描述
-  en: test_description
-protocol: test_protocol # 模型连接协议, see: backend/infra/contract/chatmodel/protocol.go
-capability: # 模型基础能力
-  function_call: true # 模型是否支持 function call
-  input_modal: # 模型输入支持模态
-    - text
-    - image
-    - audio
-    - video
-  input_tokens: 1024 # 输入 token 上限
-  output_modal: # 模型输出支持模态
-    - text
-    - image
-    - audio
-    - video
-  output_tokens: 1024 # 输出 token 上限
-  max_tokens: 2048 # 最大 token 数量
-  json_mode: true # 是否支持 json mode
-  prefix_caching: false # 是否支持 prefix caching
-  reasoning: false # 是否支持 reasoning
-  prefill_response: false # 是否支持续写
-conn_config: # 模型连接参数
-  base_url: https://localhost:1234/chat/completion
-  api_key: qweasdzxc
-  timeout: 100 # nanosec
-  model: model_name # 模型名称
-  temperature: 0.7 # 默认 temperature
-  frequency_penalty: 0 # 默认 frequency_penalty
-  presence_penalty: 0 # 默认 presence_penalty
-  max_tokens: 2048 # 默认 max_tokens
-  top_p: 0 # 默认 top_p
-  top_k: 0 # 默认 top_k
-  enable_thinking: false
-  stop:
-    - bye
-  # 下面是模型专用配置，仅配置 protocol 所对应的类型即可
-  openai:
-    by_azure: true
-    api_version: 2024-10-21
-    response_format:
-      type: text
-  claude:
-    by_bedrock: true
-    access_key: bedrock_ak
-    secret_access_key: bedrock_secret_ak
-    session_token: bedrock_session_token
-    region: bedrock_region
-  ark:
-    region: region
-    access_key: ak
-    secret_key: sk
-    retry_times: 123
-    custom_header:
-      key: val
-  deepseek:
-    response_format_type: text
-  gemini:
-    backend: 0
-    project: ''
-    location: ''
-    api_version: ''
-    headers:
-      key_1:
-        - val_1
-        - val_2
-    timeout: 0
-    include_thoughts: true
-    thinking_budget: null
-  status: 1
--- a/backend/conf/model/template/meta/model_meta_template_claude.yaml
+++ b/backend/conf/model/template/meta/model_meta_template_claude.yaml
@@ -1,40 +0,0 @@
-id: 106 # 模型 id, 同 id 数据不会覆盖
-name: Claude-3.5-Sonnet # 模型 meta 名称
-icon_uri: claude_v2.png # 模型展示图片 uri
-icon_url: '' # 模型展示图片 url
-description:
-  zh: claude 模型简介 # 模型默认描述
-  en: claude model description
-protocol: claude # 模型连接协议, see: backend/infra/contract/chatmodel/protocol.go
-capability: # 模型基础能力
-  function_call: true # 模型是否支持 function call
-  input_modal: # 模型输入支持模态
-    - text
-    - image
-  input_tokens: 128000 # 输入 token 上限
-  output_modal: # 模型输出支持模态
-    - text
-  output_tokens: 16384 # 输出 token 上限
-  max_tokens: 128000 # 最大 token 数量
-  json_mode: false # 是否支持 json mode
-  prefix_caching: false # 是否支持 prefix caching
-  reasoning: false # 是否支持 reasoning
-  prefill_response: false # 是否支持续写
-conn_config: # 模型连接参数
-  base_url: ''  # REQUIRED: base_url
-  api_key: ''   # REQUIRED: api_key
-  model: ''     # REQUIRED: model
-  temperature: 0.7 # 默认 temperature
-  frequency_penalty: 0 # 默认 frequency_penalty
-  presence_penalty: 0 # 默认 presence_penalty
-  max_tokens: 4096 # 默认 max_tokens
-  top_p: 1 # 默认 top_p
-  top_k: 0 # 默认 top_k
-  # 下面是模型专用配置，仅配置 protocol 所对应的类型即可
-  claude: # OPTIONAL
-    by_bedrock: false     # true if using bedrock service
-    access_key: ''        # access key
-    secret_access_key: '' # secret access key
-    session_token: ''     # session_token
-    region: ''            # region
-  status: 1
--- a/backend/conf/model/template/meta/model_meta_template_deepseek.yaml
+++ b/backend/conf/model/template/meta/model_meta_template_deepseek.yaml
@@ -1,35 +0,0 @@
-id: 104 # 模型 id, 同 id 数据不会覆盖
-name: DeepSeek-V3 # 模型 meta 名称
-icon_uri: deepseek_v2.png # 模型展示图片 uri
-icon_url: '' # 模型展示图片 url
-description:
-  zh: deepseek 模型简介
-  en: deepseek model description
-protocol: deepseek # 模型连接协议, see: backend/infra/contract/chatmodel/protocol.go
-capability: # 模型基础能力
-  function_call: false # 模型是否支持 function call
-  input_modal: # 模型输入支持模态
-    - text
-  input_tokens: 128000 # 输入 token 上限
-  output_modal: # 模型输出支持模态
-    - text
-  output_tokens: 16384 # 输出 token 上限
-  max_tokens: 128000 # 最大 token 数量
-  json_mode: false # 是否支持 json mode
-  prefix_caching: false # 是否支持 prefix caching
-  reasoning: false # 是否支持 reasoning
-  prefill_response: false # 是否支持续写
-conn_config: # 模型连接参数
-  base_url: ''  # REQUIRED: base_url
-  api_key: ''   # REQUIRED: api_key
-  model: ''     # REQUIRED: model
-  temperature: 0.7 # 默认 temperature
-  frequency_penalty: 0 # 默认 frequency_penalty
-  presence_penalty: 0 # 默认 presence_penalty
-  max_tokens: 4096 # 默认 max_tokens
-  top_p: 1 # 默认 top_p
-  top_k: 0 # 默认 top_k
-  # 下面是模型专用配置，仅配置 protocol 所对应的类型即可
-  deepseek: # OPTIONAL
-    response_format_type: text # response format
-  status: 1
--- a/backend/conf/model/template/meta/model_meta_template_gemini.yaml
+++ b/backend/conf/model/template/meta/model_meta_template_gemini.yaml
@@ -1,48 +0,0 @@
-id: 107 # 模型 id, 同 id 数据不会覆盖
-name: Gemini-2.5-Flash # 模型 meta 名称
-icon_uri: gemini_v2.png # 模型展示图片 uri
-icon_url: '' # 模型展示图片 url
-description:
-  zh: gemini 模型简介 # 模型默认描述
-  en: gemini model description
-protocol: gemini # 模型连接协议, see: backend/infra/contract/chatmodel/protocol.go
-capability: # 模型基础能力
-  function_call: true # 模型是否支持 function call
-  input_modal: # 模型输入支持模态
-    - text
-    - image
-    - audio
-    - video
-  input_tokens: 1048576 # 输入 token 上限
-  output_modal: # 模型输出支持模态
-    - text
-  output_tokens: 65536 # 输出 token 上限
-  max_tokens: 1114112 # 最大 token 数量
-  json_mode: true # 是否支持 json mode
-  prefix_caching: true # 是否支持 prefix caching
-  reasoning: true # 是否支持 reasoning
-  prefill_response: true # 是否支持续写
-conn_config: # 模型连接参数
-  base_url: ''  # REQUIRED: base_url
-  api_key: ''   # REQUIRED: api_key
-  model: gemini-2.5-flash     # REQUIRED: model
-  temperature: 0.7 # 默认 temperature
-  frequency_penalty: 0 # 默认 frequency_penalty
-  presence_penalty: 0 # 默认 presence_penalty
-  max_tokens: 4096 # 默认 max_tokens
-  top_p: 1 # 默认 top_p
-  top_k: 0 # 默认 top_k
-  # 下面是模型专用配置，仅配置 protocol 所对应的类型即可
-  gemini:
-    backend: 0
-    project: ''
-    location: ''
-    api_version: ''
-    headers:
-      key_1:
-        - val_1
-        - val_2
-    timeout: 0
-    include_thoughts: true
-    thinking_budget: null
-  status: 1
--- a/backend/conf/model/template/meta/model_meta_template_ollama.yaml
+++ b/backend/conf/model/template/meta/model_meta_template_ollama.yaml
@@ -1,31 +0,0 @@
-id: 103 # 模型 id, 同 id 数据不会覆盖
-name: Gemma-3 # 模型 meta 名称
-icon_uri: ollama.png # 模型展示图片 uri
-icon_url: '' # 模型展示图片 url
-description:
-  zh: ollama 模型简介
-  en: ollama model description
-protocol: ollama # 模型连接协议, see: backend/infra/contract/chatmodel/protocol.go
-capability: # 模型基础能力
-  function_call: true # 模型是否支持 function call
-  input_modal: # 模型输入支持模态
-    - text
-  input_tokens: 128000 # 输入 token 上限
-  output_modal: # 模型输出支持模态
-    - text
-  output_tokens: 16384 # 输出 token 上限
-  max_tokens: 128000 # 最大 token 数量
-  json_mode: false # 是否支持 json mode
-  prefix_caching: false # 是否支持 prefix caching
-  reasoning: false # 是否支持 reasoning
-  prefill_response: false # 是否支持续写
-conn_config: # 模型连接参数
-  base_url: ''  # REQUIRED: base_url
-  model: ''     # REQUIRED: model
-  temperature: 0.6 # 默认 temperature
-  frequency_penalty: 0 # 默认 frequency_penalty
-  presence_penalty: 0 # 默认 presence_penalty
-  max_tokens: 4096 # 默认 max_tokens
-  top_p: 0.95 # 默认 top_p
-  top_k: 20 # 默认 top_k
-  status: 1
--- a/backend/conf/model/template/meta/model_meta_template_openai.yaml
+++ b/backend/conf/model/template/meta/model_meta_template_openai.yaml
@@ -1,39 +0,0 @@
-id: 101 # 模型 id, 同 id 数据不会覆盖
-name: GPT-4o # 模型 meta 名称
-icon_uri: openai_v2.png # 模型展示图片 uri
-icon_url: '' # 模型展示图片 url
-description:
-  zh: gpt 模型简介
-  en: Multi-modal, 320ms, 88.7% MMLU, excels in education, customer support, health, and entertainment. # 模型默认描述
-protocol: openai # 模型连接协议, see: backend/infra/contract/chatmodel/protocol.go
-capability: # 模型基础能力
-  function_call: true # 模型是否支持 function call
-  input_modal: # 模型输入支持模态
-    - text
-    - image
-  input_tokens: 128000 # 输入 token 上限
-  output_modal: # 模型输出支持模态
-    - text
-  output_tokens: 16384 # 输出 token 上限
-  max_tokens: 128000 # 最大 token 数量
-  json_mode: false # 是否支持 json mode
-  prefix_caching: false # 是否支持 prefix caching
-  reasoning: false # 是否支持 reasoning
-  prefill_response: false # 是否支持续写
-conn_config: # 模型连接参数
-  base_url: ''  # REQUIRED: base_url
-  api_key: ''   # REQUIRED: api_key
-  model: ''     # REQUIRED: model
-  temperature: 0.7 # 默认 temperature
-  frequency_penalty: 0 # 默认 frequency_penalty
-  presence_penalty: 0 # 默认 presence_penalty
-  max_tokens: 4096 # 默认 max_tokens
-  top_p: 1 # 默认 top_p
-  top_k: 0 # 默认 top_k
-  # 下面是模型专用配置，仅配置 protocol 所对应的类型即可
-  openai: # OPTIONAL
-    by_azure: true    # true if using azure openai
-    api_version: ''   # azure api version
-    response_format:  # response format
-      type: text
-  status: 1
--- a/backend/conf/model/template/meta/model_meta_template_qwen.yaml
+++ b/backend/conf/model/template/meta/model_meta_template_qwen.yaml
@@ -1,36 +0,0 @@
-id: 105 # 模型 id, 同 id 数据不会覆盖
-name: Qwen3-32B # 模型 meta 名称
-icon_uri: qwen_v2.png # 模型展示图片 uri
-icon_url: '' # 模型展示图片 url
-description:
-  zh: 通义千问模型 # 模型默认描述
-  en: qwen model description
-protocol: qwen # 模型连接协议, see: backend/infra/contract/chatmodel/protocol.go
-capability: # 模型基础能力
-  function_call: true # 模型是否支持 function call
-  input_modal: # 模型输入支持模态
-    - text
-  input_tokens: 128000 # 输入 token 上限
-  output_modal: # 模型输出支持模态
-    - text
-  output_tokens: 16384 # 输出 token 上限
-  max_tokens: 128000 # 最大 token 数量
-  json_mode: false # 是否支持 json mode
-  prefix_caching: false # 是否支持 prefix caching
-  reasoning: false # 是否支持 reasoning
-  prefill_response: false # 是否支持续写
-conn_config: # 模型连接参数
-  base_url: ''  # REQUIRED: base_url
-  api_key: ''   # REQUIRED: api_key
-  model: ''     # REQUIRED: model
-  temperature: 0.7 # 默认 temperature
-  frequency_penalty: 0 # 默认 frequency_penalty
-  presence_penalty: 0 # 默认 presence_penalty
-  max_tokens: 4096 # 默认 max_tokens
-  top_p: 1 # 默认 top_p
-  top_k: 0 # 默认 top_k
-  # 下面是模型专用配置，仅配置 protocol 所对应的类型即可
-  qwen: # OPTIONAL
-    response_format:  # response format
-      type: text
-  status: 1
--- a/backend/conf/model/template/model_template_ark.yaml
+++ b/backend/conf/model/template/model_template_ark.yaml
@@ -0,0 +1,133 @@
+id: 2002
+name: Doubao Model
+icon_uri: default_icon/doubao_v2.png
+icon_url: ""
+description:
+    zh: 豆包模型简介
+    en: doubao model description
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: top_p
+      label:
+        zh: Top P
+        en: Top P
+      desc:
+        zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
+        en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        default_val: "0.7"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: response_format
+      label:
+        zh: 输出格式
+        en: Response format
+      desc:
+        zh: '- **文本**: 使用普通文本格式回复\n- **Markdown**: 将引导模型使用Markdown格式输出回复\n- **JSON**: 将引导模型使用JSON格式输出'
+        en: '**Response Format**:\n\n- **Text**: Replies in plain text format\n- **Markdown**: Uses Markdown format for replies\n- **JSON**: Uses JSON format for replies'
+      type: int
+      min: ""
+      max: ""
+      default_val:
+        default_val: "0"
+      options:
+        - label: Text
+          value: "0"
+        - label: Markdown
+          value: "1"
+        - label: JSON
+          value: "2"
+      style:
+        widget: radio_buttons
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+meta:
+    name: Doubao
+    protocol: ark
+    capability:
+        function_call: true
+        input_modal:
+            - text
+            - image
+        input_tokens: 128000
+        json_mode: false
+        max_tokens: 128000
+        output_modal:
+            - text
+        output_tokens: 16384
+        prefix_caching: false
+        reasoning: false
+        prefill_response: false
+    conn_config:
+        base_url: ""
+        api_key: ""
+        timeout: 0s
+        model: ""
+        temperature: 0.1
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        top_p: 0.7
+        top_k: 0
+        stop: []
+        openai: null
+        claude: null
+        ark:
+            region: ""
+            access_key: ""
+            secret_key: ""
+            retry_times: null
+            custom_header: {}
+        deepseek: null
+        qwen: null
+        gemini: null
+        custom: {}
+    status: 0
--- a/backend/conf/model/template/model_template_ark_doubao-1.5-lite.yaml
+++ b/backend/conf/model/template/model_template_ark_doubao-1.5-lite.yaml
@@ -0,0 +1,108 @@
+id: 65536
+name: Doubao-1.5-Lite
+icon_uri: default_icon/doubao_v2.png
+icon_url: ""
+description:
+    zh: Doubao-1.5-lite，全新一代轻量版模型，极致响应速度，效果与时延均达到全球一流水平。
+    en: Doubao-1.5-lite, the new generation lightweight model, delivers ultra-fast response speed with both performance and latency reaching world-class standards.
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: top_p
+      label:
+        zh: Top P
+        en: Top P
+      desc:
+        zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
+        en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        default_val: "0.7"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+meta:
+    name: doubao-1.5-lite
+    protocol: ark
+    capability:
+        function_call: true
+        input_modal:
+            - text
+        input_tokens: 20000
+        json_mode: false
+        max_tokens: 32000
+        output_modal:
+            - text
+        output_tokens: 12000
+        prefix_caching: true
+        reasoning: false
+        prefill_response: false
+    conn_config:
+        base_url: ""
+        api_key: ""
+        timeout: 0s
+        model: ""
+        temperature: 0.1
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        top_p: 0.7
+        top_k: 0
+        stop: []
+        openai: null
+        claude: null
+        ark:
+            region: ""
+            access_key: ""
+            secret_key: ""
+            retry_times: null
+            custom_header: {}
+        deepseek: null
+        qwen: null
+        gemini: null
+        custom: {}
+    status: 0
--- a/backend/conf/model/template/model_template_ark_doubao-1.5-pro-256k.yaml
+++ b/backend/conf/model/template/model_template_ark_doubao-1.5-pro-256k.yaml
@@ -0,0 +1,108 @@
+id: 65536
+name: Doubao-1.5-Pro-256k
+icon_uri: default_icon/doubao_v2.png
+icon_url: ""
+description:
+    zh: doubao-1.5-pro-256k，基于doubao-1.5-Pro全面升级版，整体效果大幅提升10%。更高性能、更大窗口、超高性价比，适用于更广泛的应用场景。
+    en: doubao-1.5-pro-256k is a fully upgraded version based on doubao-1.5-Pro, with an overall performance improvement of 10%. It offers higher performance, a larger context window, and exceptional cost-effectiveness, making it suitable for a wider range of application scenarios.
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: top_p
+      label:
+        zh: Top P
+        en: Top P
+      desc:
+        zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
+        en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        default_val: "0.7"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+meta:
+    name: doubao-1.5-pro-256k
+    protocol: ark
+    capability:
+        function_call: true
+        input_modal:
+            - text
+        input_tokens: 96000
+        json_mode: false
+        max_tokens: 256000
+        output_modal:
+            - text
+        output_tokens: 12000
+        prefix_caching: false
+        reasoning: false
+        prefill_response: false
+    conn_config:
+        base_url: ""
+        api_key: ""
+        timeout: 0s
+        model: ""
+        temperature: 0.1
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        top_p: 0.7
+        top_k: 0
+        stop: []
+        openai: null
+        claude: null
+        ark:
+            region: ""
+            access_key: ""
+            secret_key: ""
+            retry_times: null
+            custom_header: {}
+        deepseek: null
+        qwen: null
+        gemini: null
+        custom: {}
+    status: 0
--- a/backend/conf/model/template/model_template_ark_doubao-1.5-pro-32k.yaml
+++ b/backend/conf/model/template/model_template_ark_doubao-1.5-pro-32k.yaml
@@ -0,0 +1,108 @@
+id: 65536
+name: Doubao-1.5-Pro-32k
+icon_uri: default_icon/doubao_v2.png
+icon_url: ""
+description:
+    zh: Doubao-1.5-pro，全新一代主力模型，性能全面升级，在知识、代码、推理、等方面表现卓越。
+    en: Doubao-1.5-pro, the new generation flagship model, features comprehensive performance upgrades and excels in areas such as knowledge, coding, and reasoning.
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: top_p
+      label:
+        zh: Top P
+        en: Top P
+      desc:
+        zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
+        en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        default_val: "0.7"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+meta:
+    name: doubao-1.5-pro-32k
+    protocol: ark
+    capability:
+        function_call: true
+        input_modal:
+            - text
+        input_tokens: 96000
+        json_mode: false
+        max_tokens: 128000
+        output_modal:
+            - text
+        output_tokens: 12000
+        prefix_caching: true
+        reasoning: false
+        prefill_response: false
+    conn_config:
+        base_url: ""
+        api_key: ""
+        timeout: 0s
+        model: ""
+        temperature: 0.1
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        top_p: 0.7
+        top_k: 0
+        stop: []
+        openai: null
+        claude: null
+        ark:
+            region: ""
+            access_key: ""
+            secret_key: ""
+            retry_times: null
+            custom_header: {}
+        deepseek: null
+        qwen: null
+        gemini: null
+        custom: {}
+    status: 0
--- a/backend/conf/model/template/model_template_ark_doubao-1.5-thinking-pro.yaml
+++ b/backend/conf/model/template/model_template_ark_doubao-1.5-thinking-pro.yaml
@@ -0,0 +1,131 @@
+id: 65536
+name: Doubao-1.5-Thinking-Pro
+icon_uri: default_icon/doubao_v2.png
+icon_url: ""
+description:
+    zh: doubao-1.5 全新深度思考模型，在数学、编程、科学推理等专业领域及创意写作等通用任务中表现突出，在 AIME 2024、Codeforces、GPQA 等多项权威基准上达到或接近业界第一梯队水平。
+    en: doubao-1.5 is a brand-new deep thinking model that excels in specialized fields such as mathematics, programming, scientific reasoning, as well as general tasks like creative writing. It achieves or approaches the industry’s top-tier level on multiple authoritative benchmarks including AIME 2024, Codeforces, and GPQA.
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: top_p
+      label:
+        zh: Top P
+        en: Top P
+      desc:
+        zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
+        en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        default_val: "0.7"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: response_format
+      label:
+        zh: 输出格式
+        en: Response format
+      desc:
+        zh: '- **JSON**: 将引导模型使用JSON格式输出'
+        en: '**Response Format**:\n\n- **JSON**: Uses JSON format for replies'
+      type: int
+      min: ""
+      max: ""
+      default_val:
+        default_val: "0"
+      options:
+        - label: Text
+          value: "0"
+        - label: JSON
+          value: "1"
+      style:
+        widget: radio_buttons
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+meta:
+    name: doubao-1.5-thinking-pro
+    protocol: ark
+    capability:
+        function_call: true
+        input_modal:
+            - text
+            - image
+        input_tokens: 96000
+        json_mode: true
+        max_tokens: 128000
+        output_modal:
+            - text
+        output_tokens: 16000
+        prefix_caching: false
+        reasoning: true
+        prefill_response: false
+    conn_config:
+        base_url: ""
+        api_key: ""
+        timeout: 0s
+        model: ""
+        temperature: 0.1
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        top_p: 0.7
+        top_k: 0
+        stop: []
+        openai: null
+        claude: null
+        ark:
+            region: ""
+            access_key: ""
+            secret_key: ""
+            retry_times: null
+            custom_header: {}
+        deepseek: null
+        qwen: null
+        gemini: null
+        custom: {}
+    status: 0
--- a/backend/conf/model/template/model_template_ark_doubao-1.5-thinking-vision-pro.yaml
+++ b/backend/conf/model/template/model_template_ark_doubao-1.5-thinking-vision-pro.yaml
@@ -0,0 +1,132 @@
+id: 65536
+name: Doubao-1.5-Thinking-Vision-Pro
+icon_uri: default_icon/doubao_v2.png
+icon_url: ""
+description:
+    zh: doubao-1-5-thinking-vision-pro 全新视觉深度思考模型，具备更强的通用多模态理解和推理能力，在 59 个公开评测基准中的 37 个上取得 SOTA 表现。
+    en: doubao-1-5-thinking-vision-pro is a brand-new visual deep thinking model, featuring stronger general multimodal understanding and reasoning abilities, achieving SOTA performance on 37 out of 59 public evaluation benchmarks.
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: top_p
+      label:
+        zh: Top P
+        en: Top P
+      desc:
+        zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
+        en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        default_val: "0.7"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: response_format
+      label:
+        zh: 输出格式
+        en: Response format
+      desc:
+        zh: '- **JSON**: 将引导模型使用JSON格式输出'
+        en: '**Response Format**:\n\n- **JSON**: Uses JSON format for replies'
+      type: int
+      min: ""
+      max: ""
+      default_val:
+        default_val: "0"
+      options:
+        - label: Text
+          value: "0"
+        - label: JSON
+          value: "1"
+      style:
+        widget: radio_buttons
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+meta:
+    name: doubao-1.5-thinking-vision-pro
+    protocol: ark
+    capability:
+        function_call: true
+        input_modal:
+            - text
+            - image
+            - video
+        input_tokens: 96000
+        json_mode: true
+        max_tokens: 128000
+        output_modal:
+            - text
+        output_tokens: 16000
+        prefix_caching: false
+        reasoning: true
+        prefill_response: false
+    conn_config:
+        base_url: ""
+        api_key: ""
+        timeout: 0s
+        model: ""
+        temperature: 0.1
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        top_p: 0.7
+        top_k: 0
+        stop: []
+        openai: null
+        claude: null
+        ark:
+            region: ""
+            access_key: ""
+            secret_key: ""
+            retry_times: null
+            custom_header: {}
+        deepseek: null
+        qwen: null
+        gemini: null
+        custom: {}
+    status: 0
--- a/backend/conf/model/template/model_template_ark_doubao-1.5-vision-lite.yaml
+++ b/backend/conf/model/template/model_template_ark_doubao-1.5-vision-lite.yaml
@@ -0,0 +1,131 @@
+id: 65536
+name: Doubao-1.5-Vision-Lite
+icon_uri: default_icon/doubao_v2.png
+icon_url: ""
+description:
+    zh: doubao-1.5-vision-lite，极具性价比的多模态大模型，支持任意分辨率和极端长宽比图像识别，增强视觉推理、文档识别、细节信息理解和指令遵循能力。
+    en: doubao-1.5-vision-lite is a highly cost-effective multimodal large model that supports image recognition at any resolution and extreme aspect ratios, enhancing visual reasoning, document recognition, detailed information comprehension, and instruction-following capabilities.
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: top_p
+      label:
+        zh: Top P
+        en: Top P
+      desc:
+        zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
+        en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        default_val: "0.7"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: response_format
+      label:
+        zh: 输出格式
+        en: Response format
+      desc:
+        zh: '- **JSON**: 将引导模型使用JSON格式输出'
+        en: '**Response Format**:\n\n- **JSON**: Uses JSON format for replies'
+      type: int
+      min: ""
+      max: ""
+      default_val:
+        default_val: "0"
+      options:
+        - label: Text
+          value: "0"
+        - label: JSON
+          value: "1"
+      style:
+        widget: radio_buttons
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+meta:
+    name: doubao-1.5-vision-lite
+    protocol: ark
+    capability:
+        function_call: false
+        input_modal:
+            - text
+            - image
+        input_tokens: 96000
+        json_mode: false
+        max_tokens: 128000
+        output_modal:
+            - text
+        output_tokens: 16000
+        prefix_caching: false
+        reasoning: false
+        prefill_response: false
+    conn_config:
+        base_url: ""
+        api_key: ""
+        timeout: 0s
+        model: ""
+        temperature: 0.1
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        top_p: 0.7
+        top_k: 0
+        stop: []
+        openai: null
+        claude: null
+        ark:
+            region: ""
+            access_key: ""
+            secret_key: ""
+            retry_times: null
+            custom_header: {}
+        deepseek: null
+        qwen: null
+        gemini: null
+        custom: {}
+    status: 0
--- a/backend/conf/model/template/model_template_ark_doubao-1.5-vision-pro.yaml
+++ b/backend/conf/model/template/model_template_ark_doubao-1.5-vision-pro.yaml
@@ -0,0 +1,132 @@
+id: 65536
+name: Doubao-1.5-Vision-Pro
+icon_uri: default_icon/doubao_v2.png
+icon_url: ""
+description:
+    zh: doubao-1.5-vision-pro，全新升级的多模态大模型，支持任意分辨率和极端长宽比图像识别，增强视觉推理、文档识别、细节信息理解和指令遵循能力。
+    en: doubao-1.5-vision-pro is a newly upgraded multimodal large model that supports image recognition at any resolution and extreme aspect ratios, enhancing visual reasoning, document recognition, detailed information comprehension, and instruction-following capabilities.
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: top_p
+      label:
+        zh: Top P
+        en: Top P
+      desc:
+        zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
+        en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        default_val: "0.7"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: response_format
+      label:
+        zh: 输出格式
+        en: Response format
+      desc:
+        zh: '- **JSON**: 将引导模型使用JSON格式输出'
+        en: '**Response Format**:\n\n- **JSON**: Uses JSON format for replies'
+      type: int
+      min: ""
+      max: ""
+      default_val:
+        default_val: "0"
+      options:
+        - label: Text
+          value: "0"
+        - label: JSON
+          value: "1"
+      style:
+        widget: radio_buttons
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+meta:
+    name: doubao-1.5-vision-pro
+    protocol: ark
+    capability:
+        function_call: true
+        input_modal:
+            - text
+            - image
+            - video
+        input_tokens: 96000
+        json_mode: true
+        max_tokens: 128000
+        output_modal:
+            - text
+        output_tokens: 16000
+        prefix_caching: false
+        reasoning: true
+        prefill_response: false
+    conn_config:
+        base_url: ""
+        api_key: ""
+        timeout: 0s
+        model: ""
+        temperature: 0.1
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        top_p: 0.7
+        top_k: 0
+        stop: []
+        openai: null
+        claude: null
+        ark:
+            region: ""
+            access_key: ""
+            secret_key: ""
+            retry_times: null
+            custom_header: {}
+        deepseek: null
+        qwen: null
+        gemini: null
+        custom: {}
+    status: 0
--- a/backend/conf/model/template/model_template_ark_doubao-seed-1.6-flash.yaml
+++ b/backend/conf/model/template/model_template_ark_doubao-seed-1.6-flash.yaml
@@ -0,0 +1,132 @@
+id: 65536
+name: Doubao-Seed-1.6-Flash
+icon_uri: default_icon/doubao_v2.png
+icon_url: ""
+description:
+    zh: 有极致推理速度的多模态深度思考模型；同时支持文本和视觉理解。文本理解能力超过上一代 Lite 系列模型，视觉理解比肩友商 Pro 系列模型。
+    en: A multimodal deep thinking model with extreme reasoning speed; it supports both text and visual understanding. Its text comprehension surpasses the previous generation Lite series models, while its visual understanding rivals competitor Pro series models.
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: top_p
+      label:
+        zh: Top P
+        en: Top P
+      desc:
+        zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
+        en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        default_val: "0.7"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: response_format
+      label:
+        zh: 输出格式
+        en: Response format
+      desc:
+        zh: '- **JSON**: 将引导模型使用JSON格式输出'
+        en: '**Response Format**:\n\n- **JSON**: Uses JSON format for replies'
+      type: int
+      min: ""
+      max: ""
+      default_val:
+        default_val: "0"
+      options:
+        - label: Text
+          value: "0"
+        - label: JSON
+          value: "1"
+      style:
+        widget: radio_buttons
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+meta:
+    name: doubao-seed-1.6-flash
+    protocol: ark
+    capability:
+        function_call: true
+        input_modal:
+            - text
+            - image
+            - video
+        input_tokens: 224000
+        json_mode: true
+        max_tokens: 256000
+        output_modal:
+            - text
+        output_tokens: 32000
+        prefix_caching: true
+        reasoning: true
+        prefill_response: false
+    conn_config:
+        base_url: ""
+        api_key: ""
+        timeout: 0s
+        model: ""
+        temperature: 0.1
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        top_p: 0.7
+        top_k: 0
+        stop: []
+        openai: null
+        claude: null
+        ark:
+            region: ""
+            access_key: ""
+            secret_key: ""
+            retry_times: null
+            custom_header: {}
+        deepseek: null
+        qwen: null
+        gemini: null
+        custom: {}
+    status: 0
--- a/backend/conf/model/template/model_template_ark_doubao-seed-1.6-thinking.yaml
+++ b/backend/conf/model/template/model_template_ark_doubao-seed-1.6-thinking.yaml
@@ -0,0 +1,132 @@
+id: 65536
+name: Doubao-Seed-1.6-Thinking
+icon_uri: default_icon/doubao_v2.png
+icon_url: ""
+description:
+    zh: 在思考能力上进行了大幅强化， 对比 doubao 1.5 代深度理解模型，在编程、数学、逻辑推理等基础能力上进一步提升， 支持视觉理解。
+    en: Significantly enhanced in thinking capabilities, compared to the doubao 1.5 generation deep understanding model, with further improvements in fundamental skills such as programming, mathematics, and logical reasoning, and support for visual understanding.
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: top_p
+      label:
+        zh: Top P
+        en: Top P
+      desc:
+        zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
+        en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        default_val: "0.7"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: response_format
+      label:
+        zh: 输出格式
+        en: Response format
+      desc:
+        zh: '- **JSON**: 将引导模型使用JSON格式输出'
+        en: '**Response Format**:\n\n- **JSON**: Uses JSON format for replies'
+      type: int
+      min: ""
+      max: ""
+      default_val:
+        default_val: "0"
+      options:
+        - label: Text
+          value: "0"
+        - label: JSON
+          value: "1"
+      style:
+        widget: radio_buttons
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+meta:
+    name: doubao-seed-1.6-thinking
+    protocol: ark
+    capability:
+        function_call: true
+        input_modal:
+            - text
+            - image
+            - video
+        input_tokens: 224000
+        json_mode: true
+        max_tokens: 256000
+        output_modal:
+            - text
+        output_tokens: 16000
+        prefix_caching: true
+        reasoning: true
+        prefill_response: false
+    conn_config:
+        base_url: ""
+        api_key: ""
+        timeout: 0s
+        model: ""
+        temperature: 0.1
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        top_p: 0.7
+        top_k: 0
+        stop: []
+        openai: null
+        claude: null
+        ark:
+            region: ""
+            access_key: ""
+            secret_key: ""
+            retry_times: null
+            custom_header: {}
+        deepseek: null
+        qwen: null
+        gemini: null
+        custom: {}
+    status: 0
--- a/backend/conf/model/template/model_template_ark_doubao-seed-1.6.yaml
+++ b/backend/conf/model/template/model_template_ark_doubao-seed-1.6.yaml
@@ -0,0 +1,132 @@
+id: 65536
+name: Doubao-Seed-1.6
+icon_uri: default_icon/doubao_v2.png
+icon_url: ""
+description:
+    zh: 全新多模态深度思考模型，同时支持 thinking、non-thinking、auto三种思考模式。其中 non-thinking 模型对比 doubao-1.5-pro-32k-250115 模型大幅提升。
+    en: 'A brand-new multimodal deep thinking model supports three thinking modes: thinking, non-thinking, and auto. Among them, the non-thinking model has significantly improved compared to the doubao-1.5-pro-32k-250115 model.'
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: top_p
+      label:
+        zh: Top P
+        en: Top P
+      desc:
+        zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
+        en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        default_val: "0.7"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: response_format
+      label:
+        zh: 输出格式
+        en: Response format
+      desc:
+        zh: '- **JSON**: 将引导模型使用JSON格式输出'
+        en: '**Response Format**:\n\n- **JSON**: Uses JSON format for replies'
+      type: int
+      min: ""
+      max: ""
+      default_val:
+        default_val: "0"
+      options:
+        - label: Text
+          value: "0"
+        - label: JSON
+          value: "1"
+      style:
+        widget: radio_buttons
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+meta:
+    name: doubao-seed-1.6
+    protocol: ark
+    capability:
+        function_call: true
+        input_modal:
+            - text
+            - image
+            - video
+        input_tokens: 224000
+        json_mode: true
+        max_tokens: 256000
+        output_modal:
+            - text
+        output_tokens: 32000
+        prefix_caching: true
+        reasoning: true
+        prefill_response: false
+    conn_config:
+        base_url: ""
+        api_key: ""
+        timeout: 0s
+        model: ""
+        temperature: 0.1
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        top_p: 0.7
+        top_k: 0
+        stop: []
+        openai: null
+        claude: null
+        ark:
+            region: ""
+            access_key: ""
+            secret_key: ""
+            retry_times: null
+            custom_header: {}
+        deepseek: null
+        qwen: null
+        gemini: null
+        custom: {}
+    status: 0
--- a/backend/conf/model/template/model_template_ark_volc_deepseek-r1.yaml
+++ b/backend/conf/model/template/model_template_ark_volc_deepseek-r1.yaml
@@ -0,0 +1,108 @@
+id: 65536
+name: Deepseek-R1-VolcEngine
+icon_uri: default_icon/deepseek_v2.png
+icon_url: ""
+description:
+    zh: deepseek-r1 是由深度求索推出的深度思考模型。在后训练阶段大规模使用了强化学习技术，在仅有极少标注数据的情况下，极大提升了模型推理能力。在数学、代码、自然语言推理等任务上，性能比肩 OpenAI o1 正式版。
+    en: deepseek-r1 is a deep thinking model launched by Deep Seek. It extensively employs reinforcement learning during the post-training phase, significantly enhancing the model's reasoning ability with very limited annotated data. In tasks such as mathematics, coding, and natural language reasoning, its performance rivals that of the official OpenAI o1 version.
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: top_p
+      label:
+        zh: Top P
+        en: Top P
+      desc:
+        zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
+        en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        default_val: "0.7"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+meta:
+    name: deepseek-r1-ve
+    protocol: ark
+    capability:
+        function_call: true
+        input_modal:
+            - text
+        input_tokens: 96000
+        json_mode: false
+        max_tokens: 128000
+        output_modal:
+            - text
+        output_tokens: 32000
+        prefix_caching: true
+        reasoning: true
+        prefill_response: false
+    conn_config:
+        base_url: ""
+        api_key: ""
+        timeout: 0s
+        model: ""
+        temperature: 0.1
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        top_p: 0.7
+        top_k: 0
+        stop: []
+        openai: null
+        claude: null
+        ark:
+            region: ""
+            access_key: ""
+            secret_key: ""
+            retry_times: null
+            custom_header: {}
+        deepseek: null
+        qwen: null
+        gemini: null
+        custom: {}
+    status: 0
--- a/backend/conf/model/template/model_template_ark_volc_deepseek-v3.yaml
+++ b/backend/conf/model/template/model_template_ark_volc_deepseek-v3.yaml
@@ -0,0 +1,108 @@
+id: 65536
+name: Deepseek-V3-VolcEngine
+icon_uri: default_icon/deepseek_v2.png
+icon_url: ""
+description:
+    zh: deepseek-v3 由深度求索公司自研的MoE模型，多项评测成绩超越了 qwen2.5-72b 和 llama-3.1-405b 等开源模型，并在性能上和世界顶尖的闭源模型 gpt-4o 及 claude-3.5-Sonnet 不分伯仲。
+    en: deepseek-v3 is a MoE model independently developed by Deep Seek. Its performance in multiple evaluations surpasses open-source models such as qwen2.5-72b and llama-3.1-405b, and it competes on par with world-leading closed-source models like gpt-4o and claude-3.5-Sonnet.
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: top_p
+      label:
+        zh: Top P
+        en: Top P
+      desc:
+        zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
+        en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        default_val: "0.7"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+meta:
+    name: deepseek-v3-ve
+    protocol: ark
+    capability:
+        function_call: true
+        input_modal:
+            - text
+        input_tokens: 96000
+        json_mode: false
+        max_tokens: 128000
+        output_modal:
+            - text
+        output_tokens: 16000
+        prefix_caching: true
+        reasoning: false
+        prefill_response: false
+    conn_config:
+        base_url: ""
+        api_key: ""
+        timeout: 0s
+        model: ""
+        temperature: 0.1
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        top_p: 0.7
+        top_k: 0
+        stop: []
+        openai: null
+        claude: null
+        ark:
+            region: ""
+            access_key: ""
+            secret_key: ""
+            retry_times: null
+            custom_header: {}
+        deepseek: null
+        qwen: null
+        gemini: null
+        custom: {}
+    status: 0
--- a/backend/conf/model/template/model_template_basic.yaml
+++ b/backend/conf/model/template/model_template_basic.yaml
@@ -0,0 +1,201 @@
+id: 100
+name: test_model
+icon_uri: default_icon/test_icon_uri.png
+icon_url: test_icon_url
+description:
+    zh: test_description
+    en: test_description
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: top_p
+      label:
+        zh: Top P
+        en: Top P
+      desc:
+        zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
+        en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        default_val: "0.7"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: frequency_penalty
+      label:
+        zh: 重复语句惩罚
+        en: Frequency penalty
+      desc:
+        zh: '- **frequency penalty**: 当该值为正时，会阻止模型频繁使用相同的词汇和短语，从而增加输出内容的多样性。'
+        en: '**Frequency Penalty**: When positive, it discourages the model from repeating the same words and phrases, thereby increasing the diversity of the output.'
+      type: float
+      min: "-2"
+      max: "2"
+      default_val:
+        default_val: "0"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: presence_penalty
+      label:
+        zh: 重复主题惩罚
+        en: Presence penalty
+      desc:
+        zh: '- **presence penalty**: 当该值为正时，会阻止模型频繁讨论相同的主题，从而增加输出内容的多样性'
+        en: '**Presence Penalty**: When positive, it prevents the model from discussing the same topics repeatedly, thereby increasing the diversity of the output.'
+      type: float
+      min: "-2"
+      max: "2"
+      default_val:
+        default_val: "0"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: response_format
+      label:
+        zh: 输出格式
+        en: Response format
+      desc:
+        zh: '- **文本**: 使用普通文本格式回复\n- **Markdown**: 将引导模型使用Markdown格式输出回复\n- **JSON**: 将引导模型使用JSON格式输出'
+        en: '**Response Format**:\n\n- **Text**: Replies in plain text format\n- **Markdown**: Uses Markdown format for replies\n- **JSON**: Uses JSON format for replies'
+      type: int
+      min: ""
+      max: ""
+      default_val:
+        default_val: "0"
+      options:
+        - label: Text
+          value: "0"
+        - label: Markdown
+          value: "1"
+        - label: JSON
+          value: "2"
+      style:
+        widget: radio_buttons
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+meta:
+    name: test_model
+    protocol: test_protocol
+    capability:
+        function_call: true
+        input_modal:
+            - text
+            - image
+            - audio
+            - video
+        input_tokens: 1024
+        json_mode: true
+        max_tokens: 2048
+        output_modal:
+            - text
+            - image
+            - audio
+            - video
+        output_tokens: 1024
+        prefix_caching: false
+        reasoning: false
+        prefill_response: false
+    conn_config:
+        base_url: https://localhost:1234/chat/completion
+        api_key: qweasdzxc
+        timeout: 10s
+        model: model_name
+        temperature: 0.7
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 2048
+        top_p: 0
+        top_k: 0
+        stop:
+            - bye
+        enable_thinking: false
+        openai:
+            by_azure: true
+            api_version: "2024-10-21"
+            response_format:
+                type: text
+                jsonschema: null
+        claude:
+            by_bedrock: true
+            access_key: bedrock_ak
+            secret_access_key: bedrock_secret_ak
+            session_token: bedrock_session_token
+            region: bedrock_region
+        ark:
+            region: region
+            access_key: ak
+            secret_key: sk
+            retry_times: 123
+            custom_header:
+                key: val
+        deepseek:
+            response_format_type: text
+        qwen: null
+        gemini:
+            backend: 0
+            project: ""
+            location: ""
+            api_version: ""
+            headers:
+                key_1:
+                    - val_1
+                    - val_2
+            timeout_ms: 0
+            include_thoughts: true
+            thinking_budget: null
+        custom: {}
+    status: 0
--- a/backend/conf/model/template/model_template_claude.yaml
+++ b/backend/conf/model/template/model_template_claude.yaml
@@ -0,0 +1,90 @@
+id: 2006
+name: Claude-3.5-Sonnet
+icon_uri: default_icon/claude_v2.png
+icon_url: ""
+description:
+    zh: claude 模型简介
+    en: claude model description
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+meta:
+    name: Claude-3.5-Sonnet
+    protocol: claude
+    capability:
+        function_call: true
+        input_modal:
+            - text
+            - image
+        input_tokens: 128000
+        json_mode: false
+        max_tokens: 128000
+        output_modal:
+            - text
+        output_tokens: 16384
+        prefix_caching: false
+        reasoning: false
+        prefill_response: false
+    conn_config:
+        base_url: ""
+        api_key: ""
+        timeout: 0s
+        model: ""
+        temperature: 0.7
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        top_p: 1
+        top_k: 0
+        stop: []
+        openai: null
+        claude:
+            by_bedrock: false
+            access_key: ""
+            secret_access_key: ""
+            session_token: ""
+            region: ""
+        ark: null
+        deepseek: null
+        qwen: null
+        gemini: null
+        custom: {}
+    status: 0
--- a/backend/conf/model/template/model_template_deepseek.yaml
+++ b/backend/conf/model/template/model_template_deepseek.yaml
@@ -0,0 +1,107 @@
+id: 2004
+name: DeepSeek-V3
+icon_uri: default_icon/deepseek_v2.png
+icon_url: ""
+description:
+    zh: deepseek 模型简介
+    en: deepseek model description
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成随机性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: response_format
+      label:
+        zh: 输出格式
+        en: Response format
+      desc:
+        zh: '- **文本**: 使用普通文本格式回复\n- **JSON**: 将引导模型使用JSON格式输出'
+        en: '**Response Format**:\n\n- **Text**: Replies in plain text format\n- **Markdown**: Uses Markdown format for replies\n- **JSON**: Uses JSON format for replies'
+      type: int
+      min: ""
+      max: ""
+      default_val:
+        default_val: "0"
+      options:
+        - label: Text
+          value: "0"
+        - label: JSON Object
+          value: "1"
+      style:
+        widget: radio_buttons
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+meta:
+    name: DeepSeek-V3
+    protocol: deepseek
+    capability:
+        function_call: false
+        input_modal:
+            - text
+        input_tokens: 128000
+        json_mode: false
+        max_tokens: 128000
+        output_modal:
+            - text
+        output_tokens: 16384
+        prefix_caching: false
+        reasoning: false
+        prefill_response: false
+    conn_config:
+        base_url: ""
+        api_key: ""
+        timeout: 0s
+        model: ""
+        temperature: 0.7
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        top_p: 1
+        top_k: 0
+        stop: []
+        openai: null
+        claude: null
+        ark: null
+        deepseek:
+            response_format_type: text
+        qwen: null
+        gemini: null
+        custom: {}
+    status: 0
--- a/backend/conf/model/template/model_template_gemini.yaml
+++ b/backend/conf/model/template/model_template_gemini.yaml
@@ -0,0 +1,139 @@
+id: 2007
+name: Gemini-2.5-Flash
+icon_uri: default_icon/gemini_v2.png
+icon_url: ""
+description:
+    zh: gemini 模型简介
+    en: gemini model description
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: top_p
+      label:
+        zh: Top P
+        en: Top P
+      desc:
+        zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
+        en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        default_val: "0.7"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: response_format
+      label:
+        zh: 输出格式
+        en: Response format
+      desc:
+        zh: '- **文本**: 使用普通文本格式回复\n- **JSON**: 将引导模型使用JSON格式输出'
+        en: '**Response Format**:\n\n- **JSON**: Uses JSON format for replies'
+      type: int
+      min: ""
+      max: ""
+      default_val:
+        default_val: "0"
+      options:
+        - label: Text
+          value: "0"
+        - label: JSON
+          value: "2"
+      style:
+        widget: radio_buttons
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+meta:
+    name: Gemini-2.5-Flash
+    protocol: gemini
+    capability:
+        function_call: true
+        input_modal:
+            - text
+            - image
+            - audio
+            - video
+        input_tokens: 1048576
+        json_mode: true
+        max_tokens: 1114112
+        output_modal:
+            - text
+        output_tokens: 65536
+        prefix_caching: true
+        reasoning: true
+        prefill_response: true
+    conn_config:
+        base_url: ""
+        api_key: ""
+        timeout: 0s
+        model: gemini-2.5-flash
+        temperature: 0.7
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        top_p: 1
+        top_k: 0
+        stop: []
+        openai: null
+        claude: null
+        ark: null
+        deepseek: null
+        qwen: null
+        gemini:
+            backend: 0
+            project: ""
+            location: ""
+            api_version: ""
+            headers:
+                key_1:
+                    - val_1
+                    - val_2
+            timeout_ms: 0
+            include_thoughts: true
+            thinking_budget: null
+        custom: {}
+    status: 0
--- a/backend/conf/model/template/model_template_ollama.yaml
+++ b/backend/conf/model/template/model_template_ollama.yaml
@@ -0,0 +1,84 @@
+id: 2003
+name: Gemma-3
+icon_uri: default_icon/ollama.png
+icon_url: ""
+description:
+    zh: ollama 模型简介
+    en: ollama model description
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+meta:
+    name: Gemma-3
+    protocol: ollama
+    capability:
+        function_call: true
+        input_modal:
+            - text
+        input_tokens: 128000
+        json_mode: false
+        max_tokens: 128000
+        output_modal:
+            - text
+        output_tokens: 16384
+        prefix_caching: false
+        reasoning: false
+        prefill_response: false
+    conn_config:
+        base_url: ""
+        api_key: ""
+        timeout: 0s
+        model: ""
+        temperature: 0.6
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        top_p: 0.95
+        top_k: 20
+        stop: []
+        openai: null
+        claude: null
+        ark: null
+        deepseek: null
+        qwen: null
+        gemini: null
+        custom: {}
+    status: 0
--- a/backend/conf/model/template/model_template_openai.yaml
+++ b/backend/conf/model/template/model_template_openai.yaml
@@ -0,0 +1,171 @@
+id: 2001
+name: GPT-4o
+icon_uri: default_icon/openai_v2.png
+icon_url: ""
+description:
+    zh: gpt 模型简介
+    en: Multi-modal, 320ms, 88.7% MMLU, excels in education, customer support, health, and entertainment.
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: top_p
+      label:
+        zh: Top P
+        en: Top P
+      desc:
+        zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
+        en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        default_val: "0.7"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: frequency_penalty
+      label:
+        zh: 重复语句惩罚
+        en: Frequency penalty
+      desc:
+        zh: '- **frequency penalty**: 当该值为正时，会阻止模型频繁使用相同的词汇和短语，从而增加输出内容的多样性。'
+        en: '**Frequency Penalty**: When positive, it discourages the model from repeating the same words and phrases, thereby increasing the diversity of the output.'
+      type: float
+      min: "-2"
+      max: "2"
+      default_val:
+        default_val: "0"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: presence_penalty
+      label:
+        zh: 重复主题惩罚
+        en: Presence penalty
+      desc:
+        zh: '- **presence penalty**: 当该值为正时，会阻止模型频繁讨论相同的主题，从而增加输出内容的多样性'
+        en: '**Presence Penalty**: When positive, it prevents the model from discussing the same topics repeatedly, thereby increasing the diversity of the output.'
+      type: float
+      min: "-2"
+      max: "2"
+      default_val:
+        default_val: "0"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: response_format
+      label:
+        zh: 输出格式
+        en: Response format
+      desc:
+        zh: '- **文本**: 使用普通文本格式回复\n- **Markdown**: 将引导模型使用Markdown格式输出回复\n- **JSON**: 将引导模型使用JSON格式输出'
+        en: '**Response Format**:\n\n- **Text**: Replies in plain text format\n- **Markdown**: Uses Markdown format for replies\n- **JSON**: Uses JSON format for replies'
+      type: int
+      min: ""
+      max: ""
+      default_val:
+        default_val: "0"
+      options:
+        - label: Text
+          value: "0"
+        - label: Markdown
+          value: "1"
+        - label: JSON
+          value: "2"
+      style:
+        widget: radio_buttons
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+meta:
+    name: GPT-4o
+    protocol: openai
+    capability:
+        function_call: true
+        input_modal:
+            - text
+            - image
+        input_tokens: 128000
+        json_mode: false
+        max_tokens: 128000
+        output_modal:
+            - text
+        output_tokens: 16384
+        prefix_caching: false
+        reasoning: false
+        prefill_response: false
+    conn_config:
+        base_url: ""
+        api_key: ""
+        timeout: 0s
+        model: ""
+        temperature: 0.7
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        top_p: 1
+        top_k: 0
+        stop: []
+        openai:
+            by_azure: true
+            api_version: ""
+            response_format:
+                type: text
+                jsonschema: null
+        claude: null
+        ark: null
+        deepseek: null
+        qwen: null
+        gemini: null
+        custom: {}
+    status: 0
--- a/backend/conf/model/template/model_template_qwen.yaml
+++ b/backend/conf/model/template/model_template_qwen.yaml
@@ -0,0 +1,106 @@
+id: 2005
+name: Qwen3-32B
+icon_uri: default_icon/qwen_v2.png
+icon_url: ""
+description:
+    zh: 通义千问模型
+    en: qwen model description
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: top_p
+      label:
+        zh: Top P
+        en: Top P
+      desc:
+        zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与“生成随机性”同时调整。'
+        en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        default_val: "0.95"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+meta:
+    name: Qwen3-32B
+    protocol: qwen
+    capability:
+        function_call: true
+        input_modal:
+            - text
+        input_tokens: 128000
+        json_mode: false
+        max_tokens: 128000
+        output_modal:
+            - text
+        output_tokens: 16384
+        prefix_caching: false
+        reasoning: false
+        prefill_response: false
+    conn_config:
+        base_url: ""
+        api_key: ""
+        timeout: 0s
+        model: ""
+        temperature: 0.7
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        top_p: 1
+        top_k: 0
+        stop: []
+        openai: null
+        claude: null
+        ark: null
+        deepseek: null
+        qwen:
+            response_format:
+                type: text
+                jsonschema: null
+        gemini: null
+        custom: {}
+    status: 0
--- a/backend/crossdomain/contract/crossmodelmgr/cross_modelmgr.go
+++ b/backend/crossdomain/contract/crossmodelmgr/cross_modelmgr.go
@@ -1,39 +0,0 @@
-/*
- * Copyright 2025 coze-dev Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package crossmodelmgr
-
-import (
-	"context"
-
-	"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
-)
-
-type ModelMgr interface {
-	MGetModelByID(ctx context.Context, req *modelmgr.MGetModelRequest) ([]*modelmgr.Model, error)
-}
-
-type Model = modelmgr.Model
-
-var defaultSVC ModelMgr
-
-func DefaultSVC() ModelMgr {
-	return defaultSVC
-}
-
-func SetDefaultSVC(c ModelMgr) {
-	defaultSVC = c
-}
--- a/backend/crossdomain/impl/modelmgr/modelmgr.go
+++ b/backend/crossdomain/impl/modelmgr/modelmgr.go
@@ -1,52 +0,0 @@
-/*
- * Copyright 2025 coze-dev Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package modelmgr
-
-import (
-	"context"
-
-	model "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
-	"github.com/coze-dev/coze-studio/backend/crossdomain/contract/crossmodelmgr"
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr"
-)
-
-var defaultSVC crossmodelmgr.ModelMgr
-
-type impl struct {
-	DomainSVC modelmgr.Manager
-}
-
-func InitDomainService(c modelmgr.Manager) crossmodelmgr.ModelMgr {
-	defaultSVC = &impl{
-		DomainSVC: c,
-	}
-	return defaultSVC
-}
-
-func (s *impl) MGetModelByID(ctx context.Context, req *modelmgr.MGetModelRequest) ([]*model.Model, error) {
-	res, err := s.DomainSVC.MGetModelByID(ctx, req)
-	if err != nil {
-		return nil, err
-	}
-
-	ret := make([]*model.Model, 0, len(res))
-	for _, v := range res {
-		ret = append(ret, v.Model)
-	}
-
-	return ret, nil
-}
--- a/backend/crossdomain/workflow/model/model.go
+++ b/backend/crossdomain/workflow/model/model.go
@@ -22,10 +22,9 @@ import (

 	model2 "github.com/cloudwego/eino/components/model"

-	"github.com/coze-dev/coze-studio/backend/crossdomain/contract/crossmodelmgr"
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr"
 	"github.com/coze-dev/coze-studio/backend/domain/workflow/crossdomain/model"
 	"github.com/coze-dev/coze-studio/backend/infra/contract/chatmodel"
+	"github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr"
 	chatmodel2 "github.com/coze-dev/coze-studio/backend/infra/impl/chatmodel"
 	"github.com/coze-dev/coze-studio/backend/pkg/lang/ptr"
 )
@@ -45,9 +44,9 @@ func NewModelManager(m modelmgr.Manager, f chatmodel.Factory) *ModelManager {
 	}
 }

-func (m *ModelManager) GetModel(ctx context.Context, params *model.LLMParams) (model2.BaseChatModel, *crossmodelmgr.Model, error) {
+func (m *ModelManager) GetModel(ctx context.Context, params *model.LLMParams) (model2.BaseChatModel, *modelmgr.Model, error) {
 	modelID := params.ModelType
-	models, err := crossmodelmgr.DefaultSVC().MGetModelByID(ctx, &modelmgr.MGetModelRequest{
+	models, err := m.modelMgr.MGetModelByID(ctx, &modelmgr.MGetModelRequest{
 		IDs: []int64{modelID},
 	})
 	if err != nil {
@@ -55,7 +54,7 @@ func (m *ModelManager) GetModel(ctx context.Context, params *model.LLMParams) (m
 	}
 	var config *chatmodel.Config
 	var protocol chatmodel.Protocol
-	var mdl *crossmodelmgr.Model
+	var mdl *modelmgr.Model
 	for i := range models {
 		md := models[i]
 		if md.ID == modelID {
--- a/backend/domain/agent/singleagent/internal/agentflow/agent_flow_builder.go
+++ b/backend/domain/agent/singleagent/internal/agentflow/agent_flow_builder.go
@@ -26,6 +26,7 @@ import (
 	"github.com/cloudwego/eino/compose"
 	"github.com/cloudwego/eino/flow/agent/react"
 	"github.com/cloudwego/eino/schema"
+	"github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr"

 	"github.com/coze-dev/coze-studio/backend/domain/agent/singleagent/entity"
 	"github.com/coze-dev/coze-studio/backend/domain/workflow"
@@ -38,6 +39,7 @@ type Config struct {
 	Agent        *entity.SingleAgent
 	UserID       string
 	Identity     *entity.AgentIdentity
+	ModelMgr     modelmgr.Manager
 	ModelFactory chatmodel.Factory
 	CPStore      compose.CheckPointStore
 }
@@ -86,7 +88,7 @@ func BuildAgent(ctx context.Context, conf *Config) (r *AgentRunner, err error) {
 		return nil, err
 	}

-	modelInfo, err := loadModelInfo(ctx, ptr.From(conf.Agent.ModelInfo.ModelId))
+	modelInfo, err := loadModelInfo(ctx, conf.ModelMgr, ptr.From(conf.Agent.ModelInfo.ModelId))
 	if err != nil {
 		return nil, err
 	}
--- a/backend/domain/agent/singleagent/internal/agentflow/agent_flow_runner.go
+++ b/backend/domain/agent/singleagent/internal/agentflow/agent_flow_runner.go
@@ -27,11 +27,10 @@ import (
 	"github.com/cloudwego/eino/schema"

 	"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/agentrun"
-	"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
 	"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/singleagent"
-	"github.com/coze-dev/coze-studio/backend/crossdomain/contract/crossmodelmgr"
 	"github.com/coze-dev/coze-studio/backend/crossdomain/contract/crossworkflow"
 	"github.com/coze-dev/coze-studio/backend/domain/agent/singleagent/entity"
+	"github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr"
 	"github.com/coze-dev/coze-studio/backend/pkg/lang/conv"
 	"github.com/coze-dev/coze-studio/backend/pkg/logs"
 )
@@ -59,7 +58,7 @@ type AgentRunner struct {
 	requireCheckpoint bool

 	containWfTool bool
-	modelInfo     *crossmodelmgr.Model
+	modelInfo     *modelmgr.Model
 }

 func (r *AgentRunner) StreamExecute(ctx context.Context, req *AgentRequest) (
--- a/backend/domain/agent/singleagent/internal/agentflow/node_chat_model.go
+++ b/backend/domain/agent/singleagent/internal/agentflow/node_chat_model.go
@@ -20,16 +20,15 @@ import (
 	"context"
 	"fmt"

-	"github.com/coze-dev/coze-studio/backend/crossdomain/contract/crossmodelmgr"
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr"
 	"github.com/coze-dev/coze-studio/backend/infra/contract/chatmodel"
+	"github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr"
 	"github.com/coze-dev/coze-studio/backend/pkg/errorx"
 	"github.com/coze-dev/coze-studio/backend/types/errno"
 )

 type config struct {
 	modelFactory chatmodel.Factory
-	modelInfo    *crossmodelmgr.Model
+	modelInfo    *modelmgr.Model
 }

 func newChatModel(ctx context.Context, conf *config) (chatmodel.ToolCallingChatModel, error) {
@@ -53,12 +52,12 @@ func newChatModel(ctx context.Context, conf *config) (chatmodel.ToolCallingChatM
 	return cm, nil
 }

-func loadModelInfo(ctx context.Context, modelID int64) (*crossmodelmgr.Model, error) {
+func loadModelInfo(ctx context.Context, manager modelmgr.Manager, modelID int64) (*modelmgr.Model, error) {
 	if modelID == 0 {
 		return nil, fmt.Errorf("modelID is required")
 	}

-	models, err := crossmodelmgr.DefaultSVC().MGetModelByID(ctx, &modelmgr.MGetModelRequest{
+	models, err := manager.MGetModelByID(ctx, &modelmgr.MGetModelRequest{
 		IDs: []int64{modelID},
 	})

--- a/backend/domain/agent/singleagent/service/single_agent_impl.go
+++ b/backend/domain/agent/singleagent/service/single_agent_impl.go
@@ -22,6 +22,7 @@ import (
 	"math/rand"

 	"github.com/cloudwego/eino/compose"
+	"github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr"
 	"github.com/jinzhu/copier"

 	"github.com/cloudwego/eino/schema"
@@ -44,6 +45,7 @@ type singleAgentImpl struct {
 }

 type Components struct {
+	ModelMgr     modelmgr.Manager
 	ModelFactory chatmodel.Factory

 	AgentDraftRepo   repository.SingleAgentDraftRepo
@@ -106,6 +108,7 @@ func (s *singleAgentImpl) StreamExecute(ctx context.Context, req *entity.Execute
 		Agent:        ae,
 		UserID:       req.UserID,
 		Identity:     req.Identity,
+		ModelMgr:     s.ModelMgr,
 		ModelFactory: s.ModelFactory,
 		CPStore:      s.CPStore,
 	}
--- a/backend/domain/modelmgr/entity/chat_model.go
+++ b/backend/domain/modelmgr/entity/chat_model.go
@@ -1,41 +0,0 @@
-/*
- * Copyright 2025 coze-dev Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package entity
-
-import "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
-
-type Model struct {
-	*modelmgr.Model
-}
-
-type ModelMeta = modelmgr.ModelMeta
-
-type ModelMetaStatus = modelmgr.ModelMetaStatus
-
-func (m *Model) FindParameter(name modelmgr.ParameterName) (*modelmgr.Parameter, bool) {
-	if len(m.DefaultParameters) == 0 {
-		return nil, false
-	}
-
-	for _, param := range m.DefaultParameters {
-		if param.Name == name {
-			return param, true
-		}
-	}
-
-	return nil, false
-}
--- a/backend/domain/modelmgr/entity/chat_model_test.go
+++ b/backend/domain/modelmgr/entity/chat_model_test.go
@@ -1,89 +0,0 @@
-/*
- * Copyright 2025 coze-dev Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package entity
-
-import (
-	"encoding/json"
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-
-	"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
-)
-
-func TestDefaultParameter(t *testing.T) {
-	dps := []*modelmgr.Parameter{
-		{
-			Name: "temperature",
-			Label: &modelmgr.MultilingualText{
-				ZH: "生成随机性",
-				EN: "Temperature",
-			},
-			Desc: &modelmgr.MultilingualText{
-				ZH: "- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与“Top p”同时调整。",
-				EN: "**Temperature**:\\n\\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\\n- It is recommended not to adjust this value with \\\"Top p\\\" at the same time.",
-			},
-			Type:      modelmgr.ValueTypeFloat,
-			Min:       "0",
-			Max:       "1",
-			Precision: 1,
-			DefaultVal: modelmgr.DefaultValue{
-				modelmgr.DefaultTypeDefault:  "1.0",
-				modelmgr.DefaultTypeCreative: "1",
-				modelmgr.DefaultTypeBalance:  "0.8",
-				modelmgr.DefaultTypePrecise:  "0.3",
-			},
-			Style: modelmgr.DisplayStyle{
-				Widget: modelmgr.WidgetSlider,
-				Label: &modelmgr.MultilingualText{
-					ZH: "生成多样性",
-					EN: "Generation diversity",
-				},
-			},
-		},
-		{
-			Name: "max_tokens",
-			Label: &modelmgr.MultilingualText{
-				ZH: "最大回复长度",
-				EN: "Response max length",
-			},
-			Desc: &modelmgr.MultilingualText{
-				ZH: "控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。",
-				EN: "You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.",
-			},
-			Type:      modelmgr.ValueTypeInt,
-			Min:       "1",
-			Max:       "12288",
-			Precision: 0,
-			DefaultVal: modelmgr.DefaultValue{
-				modelmgr.DefaultTypeDefault: "4096",
-			},
-			Style: modelmgr.DisplayStyle{
-				Widget: modelmgr.WidgetSlider,
-				Label: &modelmgr.MultilingualText{
-					ZH: "输入及输出设置",
-					EN: "Input and output settings",
-				},
-			},
-		},
-	}
-
-	data, err := json.Marshal(dps)
-	assert.NoError(t, err)
-
-	t.Logf("default parameters: %s", string(data))
-}
--- a/backend/domain/modelmgr/entity/model.schema.go
+++ b/backend/domain/modelmgr/entity/model.schema.go
@@ -1,23 +0,0 @@
-/*
- * Copyright 2025 coze-dev Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package entity
-
-import (
-	"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
-)
-
-type Capability = modelmgr.Capability
--- a/backend/domain/modelmgr/interface.go
+++ b/backend/domain/modelmgr/interface.go
@@ -1,69 +0,0 @@
-/*
- * Copyright 2025 coze-dev Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package modelmgr
-
-import (
-	"context"
-
-	"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr/entity"
-)
-
-type Manager interface {
-	CreateModelMeta(ctx context.Context, meta *entity.ModelMeta) (*entity.ModelMeta, error)
-	UpdateModelMetaStatus(ctx context.Context, id int64, status entity.ModelMetaStatus) error
-	DeleteModelMeta(ctx context.Context, id int64) error
-	ListModelMeta(ctx context.Context, req *ListModelMetaRequest) (*ListModelMetaResponse, error)
-	MGetModelMetaByID(ctx context.Context, req *MGetModelMetaRequest) ([]*entity.ModelMeta, error)
-
-	CreateModel(ctx context.Context, model *entity.Model) (*entity.Model, error)
-	DeleteModel(ctx context.Context, id int64) error
-	ListModel(ctx context.Context, req *ListModelRequest) (*ListModelResponse, error)
-	MGetModelByID(ctx context.Context, req *MGetModelRequest) ([]*entity.Model, error)
-}
-
-type ListModelMetaRequest struct {
-	FuzzyModelName *string
-	Status         []entity.ModelMetaStatus
-	Limit          int
-	Cursor         *string
-}
-
-type ListModelMetaResponse struct {
-	ModelMetaList []*entity.ModelMeta
-	HasMore       bool
-	NextCursor    *string
-}
-
-type MGetModelMetaRequest struct {
-	IDs []int64
-}
-
-type ListModelRequest struct {
-	FuzzyModelName *string
-	Status         []modelmgr.ModelEntityStatus // default is default and in_use status
-	Limit          int
-	Cursor         *string
-}
-
-type ListModelResponse struct {
-	ModelList  []*entity.Model
-	HasMore    bool
-	NextCursor *string
-}
-
-type MGetModelRequest = modelmgr.MGetModelRequest
--- a/backend/domain/modelmgr/internal/dal/dao/chat_model_entity.go
+++ b/backend/domain/modelmgr/internal/dal/dao/chat_model_entity.go
@@ -1,136 +0,0 @@
-/*
- * Copyright 2025 coze-dev Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dao
-
-import (
-	"context"
-	"database/sql/driver"
-	"strconv"
-
-	"gorm.io/gorm"
-
-	"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr/internal/dal/model"
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr/internal/dal/query"
-	"github.com/coze-dev/coze-studio/backend/pkg/lang/slices"
-	"github.com/coze-dev/coze-studio/backend/pkg/lang/sqlutil"
-)
-
-type ModelEntityRepo interface {
-	Create(ctx context.Context, modelEntity *model.ModelEntity) error
-	Delete(ctx context.Context, id int64) error
-	List(ctx context.Context, fuzzyModelName *string, scenario *int64, status []modelmgr.ModelEntityStatus,
-		limit int, cursor *string) (resp []*model.ModelEntity, nextCursor *string, hasMore bool, err error)
-	MGet(ctx context.Context, ids []int64) ([]*model.ModelEntity, error)
-}
-
-func NewModelEntityDAO(db *gorm.DB) ModelEntityRepo {
-	return &ModelEntityDAO{
-		db:    db,
-		query: query.Use(db),
-	}
-}
-
-type ModelEntityDAO struct {
-	db    *gorm.DB
-	query *query.Query
-}
-
-func (m *ModelEntityDAO) Create(ctx context.Context, modelEntity *model.ModelEntity) error {
-	return m.query.ModelEntity.WithContext(ctx).Create(modelEntity)
-}
-
-func (m *ModelEntityDAO) Delete(ctx context.Context, id int64) error {
-	me := m.query.ModelEntity
-	_, err := me.WithContext(ctx).
-		Debug().
-		Where(me.ID.Eq(id)).
-		Delete()
-
-	return err
-}
-
-func (m *ModelEntityDAO) List(ctx context.Context, fuzzyModelName *string, scenario *int64, status []modelmgr.ModelEntityStatus,
-	limit int, cursor *string,
-) (resp []*model.ModelEntity, nextCursor *string, hasMore bool, err error) {
-	me := m.query.ModelEntity
-	do := me.WithContext(ctx)
-
-	if fuzzyModelName != nil {
-		do = do.Where(me.Name.Like(*fuzzyModelName))
-	}
-	if scenario != nil {
-		do = do.Where(me.Scenario.Eq(sqlutil.DriverValue(*scenario)))
-	}
-	if len(status) > 0 {
-		vals := slices.Transform(status, func(a modelmgr.ModelEntityStatus) driver.Valuer {
-			return sqlutil.DriverValue(int64(a))
-		})
-
-		do = do.Where(me.Status.In(vals...))
-	}
-	if cursor != nil {
-		var id int64
-		id, err = m.fromCursor(*cursor)
-		if err != nil {
-			return nil, nil, false, err
-		}
-		do = do.Where(me.ID.Lt(id))
-	}
-	if limit == 0 {
-		limit = defaultLimit
-	}
-
-	pos, err := do.Limit(limit).Order(me.ID.Desc()).Find()
-	if err != nil {
-		return nil, nil, false, err
-	}
-
-	if len(pos) == 0 {
-		return nil, nil, false, nil
-	}
-
-	hasMore = len(pos) == limit
-	if len(pos) > 0 {
-		nextCursor = m.toIDCursor(pos[len(pos)-1].ID)
-	}
-
-	return pos, nextCursor, hasMore, nil
-}
-
-func (m *ModelEntityDAO) MGet(ctx context.Context, ids []int64) ([]*model.ModelEntity, error) {
-	if len(ids) == 0 {
-		return nil, nil
-	}
-
-	me := m.query.ModelEntity
-	pos, err := me.WithContext(ctx).Where(me.ID.In(ids...)).Find()
-	if err != nil {
-		return nil, err
-	}
-
-	return pos, nil
-}
-
-func (m *ModelEntityDAO) fromCursor(cursor string) (id int64, err error) {
-	return strconv.ParseInt(cursor, 10, 64)
-}
-
-func (m *ModelEntityDAO) toIDCursor(id int64) (cursor *string) {
-	s := strconv.FormatInt(id, 10)
-	return &s
-}
--- a/backend/domain/modelmgr/internal/dal/dao/chat_model_meta.go
+++ b/backend/domain/modelmgr/internal/dal/dao/chat_model_meta.go
@@ -1,189 +0,0 @@
-/*
- * Copyright 2025 coze-dev Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dao
-
-import (
-	"context"
-	"database/sql/driver"
-	"errors"
-	"strconv"
-	"time"
-
-	"gorm.io/gorm"
-
-	"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr/internal/dal/model"
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr/internal/dal/query"
-	"github.com/coze-dev/coze-studio/backend/pkg/lang/slices"
-	"github.com/coze-dev/coze-studio/backend/pkg/lang/sqlutil"
-)
-
-const (
-	defaultLimit = 100
-)
-
-type ModelMetaRepo interface {
-	Create(ctx context.Context, meta *model.ModelMeta) error
-	UpdateStatus(ctx context.Context, id int64, status modelmgr.ModelMetaStatus) error
-	Delete(ctx context.Context, id int64) error
-	List(ctx context.Context, fuzzyShowName *string, status []modelmgr.ModelMetaStatus, limit int, cursor *string) (
-		resp []*model.ModelMeta, nextCursor *string, hasMore bool, err error)
-	GetByID(ctx context.Context, id int64) (*model.ModelMeta, error)
-	MGetByID(ctx context.Context, ids []int64) ([]*model.ModelMeta, error)
-}
-
-func NewModelMetaDAO(db *gorm.DB) ModelMetaRepo {
-	return &ModelMetaDAO{
-		db:    db,
-		query: query.Use(db),
-	}
-}
-
-type ModelMetaDAO struct {
-	db    *gorm.DB
-	query *query.Query
-}
-
-func (m *ModelMetaDAO) Create(ctx context.Context, meta *model.ModelMeta) error {
-	return m.query.ModelMeta.WithContext(ctx).Create(meta)
-}
-
-func (m *ModelMetaDAO) UpdateStatus(ctx context.Context, id int64, status modelmgr.ModelMetaStatus) error {
-	mm := m.query.ModelMeta
-	_, err := mm.WithContext(ctx).
-		Debug().
-		Where(mm.ID.Eq(id)).
-		Select(mm.Status, mm.UpdatedAt).
-		Updates(&model.ModelMeta{
-			Status:    status,
-			UpdatedAt: time.Now().UnixMilli(),
-		})
-
-	return err
-}
-
-func (m *ModelMetaDAO) Delete(ctx context.Context, id int64) error {
-	mm := m.query.ModelMeta
-	_, err := mm.WithContext(ctx).
-		Debug().
-		Where(mm.ID.Eq(id)).
-		Delete()
-
-	return err
-}
-
-func (m *ModelMetaDAO) List(ctx context.Context, fuzzyShowName *string, status []modelmgr.ModelMetaStatus, limit int, cursor *string) (
-	resp []*model.ModelMeta, nextCursor *string, hasMore bool, err error,
-) {
-	mm := m.query.ModelMeta
-	do := mm.WithContext(ctx)
-
-	if fuzzyShowName != nil {
-		do.Where(mm.ModelName.Like(*fuzzyShowName))
-	}
-
-	if len(status) > 0 {
-		vals := slices.Transform(status, func(a modelmgr.ModelMetaStatus) driver.Valuer {
-			return sqlutil.DriverValue(a)
-		})
-		do.Where(mm.Status.In(vals...))
-	}
-
-	if cursor != nil {
-		id, err := m.fromCursor(*cursor)
-		if err != nil {
-			return nil, nil, false, err
-		}
-
-		do.Where(mm.ID.Lt(id))
-	}
-
-	if limit == 0 {
-		limit = defaultLimit
-	}
-
-	pos, err := do.Limit(limit).Order(mm.ID.Desc()).Find()
-	if err != nil {
-		return nil, nil, false, err
-	}
-	if len(pos) == 0 {
-		return nil, nil, false, nil
-	}
-
-	hasMore = len(pos) == limit
-	if len(pos) > 0 {
-		nextCursor = m.toIDCursor(pos[len(pos)-1].ID)
-	}
-
-	return pos, nextCursor, hasMore, nil
-}
-
-func (m *ModelMetaDAO) GetByID(ctx context.Context, id int64) (*model.ModelMeta, error) {
-	mm := m.query.ModelMeta
-	po, err := mm.WithContext(ctx).Where(mm.ID.Eq(id)).Take()
-	if err != nil {
-		if errors.Is(err, gorm.ErrRecordNotFound) {
-			return nil, nil
-		}
-
-		return nil, err
-	}
-
-	return po, nil
-}
-
-func (m *ModelMetaDAO) MGetByID(ctx context.Context, ids []int64) ([]*model.ModelMeta, error) {
-	if len(ids) == 0 {
-		return nil, nil
-	}
-
-	mm := m.query.ModelMeta
-	do := mm.WithContext(ctx)
-
-	pos, err := do.Where(mm.ID.In(ids...)).Find()
-	if err != nil {
-		return nil, err
-	}
-
-	// todo::本意是按照ids的顺序返回结果，但是这里的查询结果是无序的，所以这里先不做排序 @zhaonan
-	// id2Idx := make(map[int64]int, len(ids))
-	// for idx, id := range ids {
-	// 	id2Idx[id] = idx
-	// }
-	//
-	// resp := make([]*model.ModelMeta, 0, len(ids))
-	// for _, po := range pos {
-	// 	idx, found := id2Idx[po.ID]
-	// 	if !found { // unexpected
-	// 		return nil, fmt.Errorf("[MGetByID] unexpected data found, id=%v", po.ID)
-	// 	}
-	//
-	// 	item := po
-	// 	resp[idx] = item
-	// }
-
-	return pos, nil
-}
-
-func (m *ModelMetaDAO) fromCursor(cursor string) (id int64, err error) {
-	return strconv.ParseInt(cursor, 10, 64)
-}
-
-func (m *ModelMetaDAO) toIDCursor(id int64) (cursor *string) {
-	s := strconv.FormatInt(id, 10)
-	return &s
-}
--- a/backend/domain/modelmgr/internal/dal/model/model_entity.gen.go
+++ b/backend/domain/modelmgr/internal/dal/model/model_entity.gen.go
@@ -1,31 +0,0 @@
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-
-package model
-
-import (
-	"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
-	"gorm.io/gorm"
-)
-
-const TableNameModelEntity = "model_entity"
-
-// ModelEntity 模型信息
-type ModelEntity struct {
-	ID            int64                      `gorm:"column:id;primaryKey;comment:主键ID" json:"id"`                                                           // 主键ID
-	MetaID        int64                      `gorm:"column:meta_id;not null;comment:模型元信息 id" json:"meta_id"`                                               // 模型元信息 id
-	Name          string                     `gorm:"column:name;not null;comment:名称" json:"name"`                                                           // 名称
-	Description   string                     `gorm:"column:description;comment:描述" json:"description"`                                                      // 描述
-	DefaultParams []*modelmgr.Parameter      `gorm:"column:default_params;comment:默认参数;serializer:json" json:"default_params"`                              // 默认参数
-	Scenario      modelmgr.Scenario          `gorm:"column:scenario;not null;comment:模型应用场景;serializer:json" json:"scenario"`                               // 模型应用场景
-	Status        modelmgr.ModelEntityStatus `gorm:"column:status;not null;default:1;comment:模型状态;serializer:json" json:"status"`                           // 模型状态
-	CreatedAt     int64                      `gorm:"column:created_at;not null;autoCreateTime:milli;comment:Create Time in Milliseconds" json:"created_at"` // Create Time in Milliseconds
-	UpdatedAt     int64                      `gorm:"column:updated_at;not null;autoUpdateTime:milli;comment:Update Time in Milliseconds" json:"updated_at"` // Update Time in Milliseconds
-	DeletedAt     gorm.DeletedAt             `gorm:"column:deleted_at;comment:Delete Time in Milliseconds" json:"deleted_at"`                               // Delete Time in Milliseconds
-}
-
-// TableName ModelEntity's table name
-func (*ModelEntity) TableName() string {
-	return TableNameModelEntity
-}
--- a/backend/domain/modelmgr/internal/dal/model/model_meta.gen.go
+++ b/backend/domain/modelmgr/internal/dal/model/model_meta.gen.go
@@ -1,34 +0,0 @@
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-
-package model
-
-import (
-	"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
-	"github.com/coze-dev/coze-studio/backend/infra/contract/chatmodel"
-	"gorm.io/gorm"
-)
-
-const TableNameModelMeta = "model_meta"
-
-// ModelMeta 模型元信息
-type ModelMeta struct {
-	ID          int64                    `gorm:"column:id;primaryKey;comment:主键ID" json:"id"`                                                           // 主键ID
-	ModelName   string                   `gorm:"column:model_name;not null;comment:模型名称" json:"model_name"`                                             // 模型名称
-	Protocol    string                   `gorm:"column:protocol;not null;comment:模型协议" json:"protocol"`                                                 // 模型协议
-	IconURI     string                   `gorm:"column:icon_uri;not null;comment:Icon URI" json:"icon_uri"`                                             // Icon URI
-	Capability  *modelmgr.Capability     `gorm:"column:capability;comment:模型能力;serializer:json" json:"capability"`                                      // 模型能力
-	ConnConfig  *chatmodel.Config        `gorm:"column:conn_config;comment:模型连接配置;serializer:json" json:"conn_config"`                                  // 模型连接配置
-	Status      modelmgr.ModelMetaStatus `gorm:"column:status;not null;default:1;comment:模型状态;serializer:json" json:"status"`                           // 模型状态
-	Description string                   `gorm:"column:description;not null;comment:模型描述" json:"description"`                                           // 模型描述
-	CreatedAt   int64                    `gorm:"column:created_at;not null;autoCreateTime:milli;comment:Create Time in Milliseconds" json:"created_at"` // Create Time in Milliseconds
-	UpdatedAt   int64                    `gorm:"column:updated_at;not null;autoUpdateTime:milli;comment:Update Time in Milliseconds" json:"updated_at"` // Update Time in Milliseconds
-	DeletedAt   gorm.DeletedAt           `gorm:"column:deleted_at;comment:Delete Time in Milliseconds" json:"deleted_at"`                               // Delete Time in Milliseconds
-	IconURL     string                   `gorm:"column:icon_url;not null;comment:Icon URL" json:"icon_url"`                                             // Icon URL
-}
-
-// TableName ModelMeta's table name
-func (*ModelMeta) TableName() string {
-	return TableNameModelMeta
-}
--- a/backend/domain/modelmgr/internal/dal/query/gen.go
+++ b/backend/domain/modelmgr/internal/dal/query/gen.go
@@ -1,111 +0,0 @@
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-
-package query
-
-import (
-	"context"
-	"database/sql"
-
-	"gorm.io/gorm"
-
-	"gorm.io/gen"
-
-	"gorm.io/plugin/dbresolver"
-)
-
-var (
-	Q           = new(Query)
-	ModelEntity *modelEntity
-	ModelMeta   *modelMeta
-)
-
-func SetDefault(db *gorm.DB, opts ...gen.DOOption) {
-	*Q = *Use(db, opts...)
-	ModelEntity = &Q.ModelEntity
-	ModelMeta = &Q.ModelMeta
-}
-
-func Use(db *gorm.DB, opts ...gen.DOOption) *Query {
-	return &Query{
-		db:          db,
-		ModelEntity: newModelEntity(db, opts...),
-		ModelMeta:   newModelMeta(db, opts...),
-	}
-}
-
-type Query struct {
-	db *gorm.DB
-
-	ModelEntity modelEntity
-	ModelMeta   modelMeta
-}
-
-func (q *Query) Available() bool { return q.db != nil }
-
-func (q *Query) clone(db *gorm.DB) *Query {
-	return &Query{
-		db:          db,
-		ModelEntity: q.ModelEntity.clone(db),
-		ModelMeta:   q.ModelMeta.clone(db),
-	}
-}
-
-func (q *Query) ReadDB() *Query {
-	return q.ReplaceDB(q.db.Clauses(dbresolver.Read))
-}
-
-func (q *Query) WriteDB() *Query {
-	return q.ReplaceDB(q.db.Clauses(dbresolver.Write))
-}
-
-func (q *Query) ReplaceDB(db *gorm.DB) *Query {
-	return &Query{
-		db:          db,
-		ModelEntity: q.ModelEntity.replaceDB(db),
-		ModelMeta:   q.ModelMeta.replaceDB(db),
-	}
-}
-
-type queryCtx struct {
-	ModelEntity IModelEntityDo
-	ModelMeta   IModelMetaDo
-}
-
-func (q *Query) WithContext(ctx context.Context) *queryCtx {
-	return &queryCtx{
-		ModelEntity: q.ModelEntity.WithContext(ctx),
-		ModelMeta:   q.ModelMeta.WithContext(ctx),
-	}
-}
-
-func (q *Query) Transaction(fc func(tx *Query) error, opts ...*sql.TxOptions) error {
-	return q.db.Transaction(func(tx *gorm.DB) error { return fc(q.clone(tx)) }, opts...)
-}
-
-func (q *Query) Begin(opts ...*sql.TxOptions) *QueryTx {
-	tx := q.db.Begin(opts...)
-	return &QueryTx{Query: q.clone(tx), Error: tx.Error}
-}
-
-type QueryTx struct {
-	*Query
-	Error error
-}
-
-func (q *QueryTx) Commit() error {
-	return q.db.Commit().Error
-}
-
-func (q *QueryTx) Rollback() error {
-	return q.db.Rollback().Error
-}
-
-func (q *QueryTx) SavePoint(name string) error {
-	return q.db.SavePoint(name).Error
-}
-
-func (q *QueryTx) RollbackTo(name string) error {
-	return q.db.RollbackTo(name).Error
-}
--- a/backend/domain/modelmgr/internal/dal/query/model_entity.gen.go
+++ b/backend/domain/modelmgr/internal/dal/query/model_entity.gen.go
@@ -1,417 +0,0 @@
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-
-package query
-
-import (
-	"context"
-
-	"gorm.io/gorm"
-	"gorm.io/gorm/clause"
-	"gorm.io/gorm/schema"
-
-	"gorm.io/gen"
-	"gorm.io/gen/field"
-
-	"gorm.io/plugin/dbresolver"
-
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr/internal/dal/model"
-)
-
-func newModelEntity(db *gorm.DB, opts ...gen.DOOption) modelEntity {
-	_modelEntity := modelEntity{}
-
-	_modelEntity.modelEntityDo.UseDB(db, opts...)
-	_modelEntity.modelEntityDo.UseModel(&model.ModelEntity{})
-
-	tableName := _modelEntity.modelEntityDo.TableName()
-	_modelEntity.ALL = field.NewAsterisk(tableName)
-	_modelEntity.ID = field.NewInt64(tableName, "id")
-	_modelEntity.MetaID = field.NewInt64(tableName, "meta_id")
-	_modelEntity.Name = field.NewString(tableName, "name")
-	_modelEntity.Description = field.NewString(tableName, "description")
-	_modelEntity.DefaultParams = field.NewField(tableName, "default_params")
-	_modelEntity.Scenario = field.NewField(tableName, "scenario")
-	_modelEntity.Status = field.NewField(tableName, "status")
-	_modelEntity.CreatedAt = field.NewInt64(tableName, "created_at")
-	_modelEntity.UpdatedAt = field.NewInt64(tableName, "updated_at")
-	_modelEntity.DeletedAt = field.NewField(tableName, "deleted_at")
-
-	_modelEntity.fillFieldMap()
-
-	return _modelEntity
-}
-
-// modelEntity 模型信息
-type modelEntity struct {
-	modelEntityDo
-
-	ALL           field.Asterisk
-	ID            field.Int64  // 主键ID
-	MetaID        field.Int64  // 模型元信息 id
-	Name          field.String // 名称
-	Description   field.String // 描述
-	DefaultParams field.Field  // 默认参数
-	Scenario      field.Field  // 模型应用场景
-	Status        field.Field  // 模型状态
-	CreatedAt     field.Int64  // Create Time in Milliseconds
-	UpdatedAt     field.Int64  // Update Time in Milliseconds
-	DeletedAt     field.Field  // Delete Time in Milliseconds
-
-	fieldMap map[string]field.Expr
-}
-
-func (m modelEntity) Table(newTableName string) *modelEntity {
-	m.modelEntityDo.UseTable(newTableName)
-	return m.updateTableName(newTableName)
-}
-
-func (m modelEntity) As(alias string) *modelEntity {
-	m.modelEntityDo.DO = *(m.modelEntityDo.As(alias).(*gen.DO))
-	return m.updateTableName(alias)
-}
-
-func (m *modelEntity) updateTableName(table string) *modelEntity {
-	m.ALL = field.NewAsterisk(table)
-	m.ID = field.NewInt64(table, "id")
-	m.MetaID = field.NewInt64(table, "meta_id")
-	m.Name = field.NewString(table, "name")
-	m.Description = field.NewString(table, "description")
-	m.DefaultParams = field.NewField(table, "default_params")
-	m.Scenario = field.NewField(table, "scenario")
-	m.Status = field.NewField(table, "status")
-	m.CreatedAt = field.NewInt64(table, "created_at")
-	m.UpdatedAt = field.NewInt64(table, "updated_at")
-	m.DeletedAt = field.NewField(table, "deleted_at")
-
-	m.fillFieldMap()
-
-	return m
-}
-
-func (m *modelEntity) GetFieldByName(fieldName string) (field.OrderExpr, bool) {
-	_f, ok := m.fieldMap[fieldName]
-	if !ok || _f == nil {
-		return nil, false
-	}
-	_oe, ok := _f.(field.OrderExpr)
-	return _oe, ok
-}
-
-func (m *modelEntity) fillFieldMap() {
-	m.fieldMap = make(map[string]field.Expr, 10)
-	m.fieldMap["id"] = m.ID
-	m.fieldMap["meta_id"] = m.MetaID
-	m.fieldMap["name"] = m.Name
-	m.fieldMap["description"] = m.Description
-	m.fieldMap["default_params"] = m.DefaultParams
-	m.fieldMap["scenario"] = m.Scenario
-	m.fieldMap["status"] = m.Status
-	m.fieldMap["created_at"] = m.CreatedAt
-	m.fieldMap["updated_at"] = m.UpdatedAt
-	m.fieldMap["deleted_at"] = m.DeletedAt
-}
-
-func (m modelEntity) clone(db *gorm.DB) modelEntity {
-	m.modelEntityDo.ReplaceConnPool(db.Statement.ConnPool)
-	return m
-}
-
-func (m modelEntity) replaceDB(db *gorm.DB) modelEntity {
-	m.modelEntityDo.ReplaceDB(db)
-	return m
-}
-
-type modelEntityDo struct{ gen.DO }
-
-type IModelEntityDo interface {
-	gen.SubQuery
-	Debug() IModelEntityDo
-	WithContext(ctx context.Context) IModelEntityDo
-	WithResult(fc func(tx gen.Dao)) gen.ResultInfo
-	ReplaceDB(db *gorm.DB)
-	ReadDB() IModelEntityDo
-	WriteDB() IModelEntityDo
-	As(alias string) gen.Dao
-	Session(config *gorm.Session) IModelEntityDo
-	Columns(cols ...field.Expr) gen.Columns
-	Clauses(conds ...clause.Expression) IModelEntityDo
-	Not(conds ...gen.Condition) IModelEntityDo
-	Or(conds ...gen.Condition) IModelEntityDo
-	Select(conds ...field.Expr) IModelEntityDo
-	Where(conds ...gen.Condition) IModelEntityDo
-	Order(conds ...field.Expr) IModelEntityDo
-	Distinct(cols ...field.Expr) IModelEntityDo
-	Omit(cols ...field.Expr) IModelEntityDo
-	Join(table schema.Tabler, on ...field.Expr) IModelEntityDo
-	LeftJoin(table schema.Tabler, on ...field.Expr) IModelEntityDo
-	RightJoin(table schema.Tabler, on ...field.Expr) IModelEntityDo
-	Group(cols ...field.Expr) IModelEntityDo
-	Having(conds ...gen.Condition) IModelEntityDo
-	Limit(limit int) IModelEntityDo
-	Offset(offset int) IModelEntityDo
-	Count() (count int64, err error)
-	Scopes(funcs ...func(gen.Dao) gen.Dao) IModelEntityDo
-	Unscoped() IModelEntityDo
-	Create(values ...*model.ModelEntity) error
-	CreateInBatches(values []*model.ModelEntity, batchSize int) error
-	Save(values ...*model.ModelEntity) error
-	First() (*model.ModelEntity, error)
-	Take() (*model.ModelEntity, error)
-	Last() (*model.ModelEntity, error)
-	Find() ([]*model.ModelEntity, error)
-	FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*model.ModelEntity, err error)
-	FindInBatches(result *[]*model.ModelEntity, batchSize int, fc func(tx gen.Dao, batch int) error) error
-	Pluck(column field.Expr, dest interface{}) error
-	Delete(...*model.ModelEntity) (info gen.ResultInfo, err error)
-	Update(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
-	UpdateSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
-	Updates(value interface{}) (info gen.ResultInfo, err error)
-	UpdateColumn(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
-	UpdateColumnSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
-	UpdateColumns(value interface{}) (info gen.ResultInfo, err error)
-	UpdateFrom(q gen.SubQuery) gen.Dao
-	Attrs(attrs ...field.AssignExpr) IModelEntityDo
-	Assign(attrs ...field.AssignExpr) IModelEntityDo
-	Joins(fields ...field.RelationField) IModelEntityDo
-	Preload(fields ...field.RelationField) IModelEntityDo
-	FirstOrInit() (*model.ModelEntity, error)
-	FirstOrCreate() (*model.ModelEntity, error)
-	FindByPage(offset int, limit int) (result []*model.ModelEntity, count int64, err error)
-	ScanByPage(result interface{}, offset int, limit int) (count int64, err error)
-	Scan(result interface{}) (err error)
-	Returning(value interface{}, columns ...string) IModelEntityDo
-	UnderlyingDB() *gorm.DB
-	schema.Tabler
-}
-
-func (m modelEntityDo) Debug() IModelEntityDo {
-	return m.withDO(m.DO.Debug())
-}
-
-func (m modelEntityDo) WithContext(ctx context.Context) IModelEntityDo {
-	return m.withDO(m.DO.WithContext(ctx))
-}
-
-func (m modelEntityDo) ReadDB() IModelEntityDo {
-	return m.Clauses(dbresolver.Read)
-}
-
-func (m modelEntityDo) WriteDB() IModelEntityDo {
-	return m.Clauses(dbresolver.Write)
-}
-
-func (m modelEntityDo) Session(config *gorm.Session) IModelEntityDo {
-	return m.withDO(m.DO.Session(config))
-}
-
-func (m modelEntityDo) Clauses(conds ...clause.Expression) IModelEntityDo {
-	return m.withDO(m.DO.Clauses(conds...))
-}
-
-func (m modelEntityDo) Returning(value interface{}, columns ...string) IModelEntityDo {
-	return m.withDO(m.DO.Returning(value, columns...))
-}
-
-func (m modelEntityDo) Not(conds ...gen.Condition) IModelEntityDo {
-	return m.withDO(m.DO.Not(conds...))
-}
-
-func (m modelEntityDo) Or(conds ...gen.Condition) IModelEntityDo {
-	return m.withDO(m.DO.Or(conds...))
-}
-
-func (m modelEntityDo) Select(conds ...field.Expr) IModelEntityDo {
-	return m.withDO(m.DO.Select(conds...))
-}
-
-func (m modelEntityDo) Where(conds ...gen.Condition) IModelEntityDo {
-	return m.withDO(m.DO.Where(conds...))
-}
-
-func (m modelEntityDo) Order(conds ...field.Expr) IModelEntityDo {
-	return m.withDO(m.DO.Order(conds...))
-}
-
-func (m modelEntityDo) Distinct(cols ...field.Expr) IModelEntityDo {
-	return m.withDO(m.DO.Distinct(cols...))
-}
-
-func (m modelEntityDo) Omit(cols ...field.Expr) IModelEntityDo {
-	return m.withDO(m.DO.Omit(cols...))
-}
-
-func (m modelEntityDo) Join(table schema.Tabler, on ...field.Expr) IModelEntityDo {
-	return m.withDO(m.DO.Join(table, on...))
-}
-
-func (m modelEntityDo) LeftJoin(table schema.Tabler, on ...field.Expr) IModelEntityDo {
-	return m.withDO(m.DO.LeftJoin(table, on...))
-}
-
-func (m modelEntityDo) RightJoin(table schema.Tabler, on ...field.Expr) IModelEntityDo {
-	return m.withDO(m.DO.RightJoin(table, on...))
-}
-
-func (m modelEntityDo) Group(cols ...field.Expr) IModelEntityDo {
-	return m.withDO(m.DO.Group(cols...))
-}
-
-func (m modelEntityDo) Having(conds ...gen.Condition) IModelEntityDo {
-	return m.withDO(m.DO.Having(conds...))
-}
-
-func (m modelEntityDo) Limit(limit int) IModelEntityDo {
-	return m.withDO(m.DO.Limit(limit))
-}
-
-func (m modelEntityDo) Offset(offset int) IModelEntityDo {
-	return m.withDO(m.DO.Offset(offset))
-}
-
-func (m modelEntityDo) Scopes(funcs ...func(gen.Dao) gen.Dao) IModelEntityDo {
-	return m.withDO(m.DO.Scopes(funcs...))
-}
-
-func (m modelEntityDo) Unscoped() IModelEntityDo {
-	return m.withDO(m.DO.Unscoped())
-}
-
-func (m modelEntityDo) Create(values ...*model.ModelEntity) error {
-	if len(values) == 0 {
-		return nil
-	}
-	return m.DO.Create(values)
-}
-
-func (m modelEntityDo) CreateInBatches(values []*model.ModelEntity, batchSize int) error {
-	return m.DO.CreateInBatches(values, batchSize)
-}
-
-// Save : !!! underlying implementation is different with GORM
-// The method is equivalent to executing the statement: db.Clauses(clause.OnConflict{UpdateAll: true}).Create(values)
-func (m modelEntityDo) Save(values ...*model.ModelEntity) error {
-	if len(values) == 0 {
-		return nil
-	}
-	return m.DO.Save(values)
-}
-
-func (m modelEntityDo) First() (*model.ModelEntity, error) {
-	if result, err := m.DO.First(); err != nil {
-		return nil, err
-	} else {
-		return result.(*model.ModelEntity), nil
-	}
-}
-
-func (m modelEntityDo) Take() (*model.ModelEntity, error) {
-	if result, err := m.DO.Take(); err != nil {
-		return nil, err
-	} else {
-		return result.(*model.ModelEntity), nil
-	}
-}
-
-func (m modelEntityDo) Last() (*model.ModelEntity, error) {
-	if result, err := m.DO.Last(); err != nil {
-		return nil, err
-	} else {
-		return result.(*model.ModelEntity), nil
-	}
-}
-
-func (m modelEntityDo) Find() ([]*model.ModelEntity, error) {
-	result, err := m.DO.Find()
-	return result.([]*model.ModelEntity), err
-}
-
-func (m modelEntityDo) FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*model.ModelEntity, err error) {
-	buf := make([]*model.ModelEntity, 0, batchSize)
-	err = m.DO.FindInBatches(&buf, batchSize, func(tx gen.Dao, batch int) error {
-		defer func() { results = append(results, buf...) }()
-		return fc(tx, batch)
-	})
-	return results, err
-}
-
-func (m modelEntityDo) FindInBatches(result *[]*model.ModelEntity, batchSize int, fc func(tx gen.Dao, batch int) error) error {
-	return m.DO.FindInBatches(result, batchSize, fc)
-}
-
-func (m modelEntityDo) Attrs(attrs ...field.AssignExpr) IModelEntityDo {
-	return m.withDO(m.DO.Attrs(attrs...))
-}
-
-func (m modelEntityDo) Assign(attrs ...field.AssignExpr) IModelEntityDo {
-	return m.withDO(m.DO.Assign(attrs...))
-}
-
-func (m modelEntityDo) Joins(fields ...field.RelationField) IModelEntityDo {
-	for _, _f := range fields {
-		m = *m.withDO(m.DO.Joins(_f))
-	}
-	return &m
-}
-
-func (m modelEntityDo) Preload(fields ...field.RelationField) IModelEntityDo {
-	for _, _f := range fields {
-		m = *m.withDO(m.DO.Preload(_f))
-	}
-	return &m
-}
-
-func (m modelEntityDo) FirstOrInit() (*model.ModelEntity, error) {
-	if result, err := m.DO.FirstOrInit(); err != nil {
-		return nil, err
-	} else {
-		return result.(*model.ModelEntity), nil
-	}
-}
-
-func (m modelEntityDo) FirstOrCreate() (*model.ModelEntity, error) {
-	if result, err := m.DO.FirstOrCreate(); err != nil {
-		return nil, err
-	} else {
-		return result.(*model.ModelEntity), nil
-	}
-}
-
-func (m modelEntityDo) FindByPage(offset int, limit int) (result []*model.ModelEntity, count int64, err error) {
-	result, err = m.Offset(offset).Limit(limit).Find()
-	if err != nil {
-		return
-	}
-
-	if size := len(result); 0 < limit && 0 < size && size < limit {
-		count = int64(size + offset)
-		return
-	}
-
-	count, err = m.Offset(-1).Limit(-1).Count()
-	return
-}
-
-func (m modelEntityDo) ScanByPage(result interface{}, offset int, limit int) (count int64, err error) {
-	count, err = m.Count()
-	if err != nil {
-		return
-	}
-
-	err = m.Offset(offset).Limit(limit).Scan(result)
-	return
-}
-
-func (m modelEntityDo) Scan(result interface{}) (err error) {
-	return m.DO.Scan(result)
-}
-
-func (m modelEntityDo) Delete(models ...*model.ModelEntity) (result gen.ResultInfo, err error) {
-	return m.DO.Delete(models)
-}
-
-func (m *modelEntityDo) withDO(do gen.Dao) *modelEntityDo {
-	m.DO = *do.(*gen.DO)
-	return m
-}
--- a/backend/domain/modelmgr/internal/dal/query/model_meta.gen.go
+++ b/backend/domain/modelmgr/internal/dal/query/model_meta.gen.go
@@ -1,425 +0,0 @@
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-
-package query
-
-import (
-	"context"
-
-	"gorm.io/gorm"
-	"gorm.io/gorm/clause"
-	"gorm.io/gorm/schema"
-
-	"gorm.io/gen"
-	"gorm.io/gen/field"
-
-	"gorm.io/plugin/dbresolver"
-
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr/internal/dal/model"
-)
-
-func newModelMeta(db *gorm.DB, opts ...gen.DOOption) modelMeta {
-	_modelMeta := modelMeta{}
-
-	_modelMeta.modelMetaDo.UseDB(db, opts...)
-	_modelMeta.modelMetaDo.UseModel(&model.ModelMeta{})
-
-	tableName := _modelMeta.modelMetaDo.TableName()
-	_modelMeta.ALL = field.NewAsterisk(tableName)
-	_modelMeta.ID = field.NewInt64(tableName, "id")
-	_modelMeta.ModelName = field.NewString(tableName, "model_name")
-	_modelMeta.Protocol = field.NewString(tableName, "protocol")
-	_modelMeta.IconURI = field.NewString(tableName, "icon_uri")
-	_modelMeta.Capability = field.NewField(tableName, "capability")
-	_modelMeta.ConnConfig = field.NewField(tableName, "conn_config")
-	_modelMeta.Status = field.NewField(tableName, "status")
-	_modelMeta.Description = field.NewString(tableName, "description")
-	_modelMeta.CreatedAt = field.NewInt64(tableName, "created_at")
-	_modelMeta.UpdatedAt = field.NewInt64(tableName, "updated_at")
-	_modelMeta.DeletedAt = field.NewField(tableName, "deleted_at")
-	_modelMeta.IconURL = field.NewString(tableName, "icon_url")
-
-	_modelMeta.fillFieldMap()
-
-	return _modelMeta
-}
-
-// modelMeta 模型元信息
-type modelMeta struct {
-	modelMetaDo
-
-	ALL         field.Asterisk
-	ID          field.Int64  // 主键ID
-	ModelName   field.String // 模型名称
-	Protocol    field.String // 模型协议
-	IconURI     field.String // Icon URI
-	Capability  field.Field  // 模型能力
-	ConnConfig  field.Field  // 模型连接配置
-	Status      field.Field  // 模型状态
-	Description field.String // 模型描述
-	CreatedAt   field.Int64  // Create Time in Milliseconds
-	UpdatedAt   field.Int64  // Update Time in Milliseconds
-	DeletedAt   field.Field  // Delete Time in Milliseconds
-	IconURL     field.String // Icon URL
-
-	fieldMap map[string]field.Expr
-}
-
-func (m modelMeta) Table(newTableName string) *modelMeta {
-	m.modelMetaDo.UseTable(newTableName)
-	return m.updateTableName(newTableName)
-}
-
-func (m modelMeta) As(alias string) *modelMeta {
-	m.modelMetaDo.DO = *(m.modelMetaDo.As(alias).(*gen.DO))
-	return m.updateTableName(alias)
-}
-
-func (m *modelMeta) updateTableName(table string) *modelMeta {
-	m.ALL = field.NewAsterisk(table)
-	m.ID = field.NewInt64(table, "id")
-	m.ModelName = field.NewString(table, "model_name")
-	m.Protocol = field.NewString(table, "protocol")
-	m.IconURI = field.NewString(table, "icon_uri")
-	m.Capability = field.NewField(table, "capability")
-	m.ConnConfig = field.NewField(table, "conn_config")
-	m.Status = field.NewField(table, "status")
-	m.Description = field.NewString(table, "description")
-	m.CreatedAt = field.NewInt64(table, "created_at")
-	m.UpdatedAt = field.NewInt64(table, "updated_at")
-	m.DeletedAt = field.NewField(table, "deleted_at")
-	m.IconURL = field.NewString(table, "icon_url")
-
-	m.fillFieldMap()
-
-	return m
-}
-
-func (m *modelMeta) GetFieldByName(fieldName string) (field.OrderExpr, bool) {
-	_f, ok := m.fieldMap[fieldName]
-	if !ok || _f == nil {
-		return nil, false
-	}
-	_oe, ok := _f.(field.OrderExpr)
-	return _oe, ok
-}
-
-func (m *modelMeta) fillFieldMap() {
-	m.fieldMap = make(map[string]field.Expr, 12)
-	m.fieldMap["id"] = m.ID
-	m.fieldMap["model_name"] = m.ModelName
-	m.fieldMap["protocol"] = m.Protocol
-	m.fieldMap["icon_uri"] = m.IconURI
-	m.fieldMap["capability"] = m.Capability
-	m.fieldMap["conn_config"] = m.ConnConfig
-	m.fieldMap["status"] = m.Status
-	m.fieldMap["description"] = m.Description
-	m.fieldMap["created_at"] = m.CreatedAt
-	m.fieldMap["updated_at"] = m.UpdatedAt
-	m.fieldMap["deleted_at"] = m.DeletedAt
-	m.fieldMap["icon_url"] = m.IconURL
-}
-
-func (m modelMeta) clone(db *gorm.DB) modelMeta {
-	m.modelMetaDo.ReplaceConnPool(db.Statement.ConnPool)
-	return m
-}
-
-func (m modelMeta) replaceDB(db *gorm.DB) modelMeta {
-	m.modelMetaDo.ReplaceDB(db)
-	return m
-}
-
-type modelMetaDo struct{ gen.DO }
-
-type IModelMetaDo interface {
-	gen.SubQuery
-	Debug() IModelMetaDo
-	WithContext(ctx context.Context) IModelMetaDo
-	WithResult(fc func(tx gen.Dao)) gen.ResultInfo
-	ReplaceDB(db *gorm.DB)
-	ReadDB() IModelMetaDo
-	WriteDB() IModelMetaDo
-	As(alias string) gen.Dao
-	Session(config *gorm.Session) IModelMetaDo
-	Columns(cols ...field.Expr) gen.Columns
-	Clauses(conds ...clause.Expression) IModelMetaDo
-	Not(conds ...gen.Condition) IModelMetaDo
-	Or(conds ...gen.Condition) IModelMetaDo
-	Select(conds ...field.Expr) IModelMetaDo
-	Where(conds ...gen.Condition) IModelMetaDo
-	Order(conds ...field.Expr) IModelMetaDo
-	Distinct(cols ...field.Expr) IModelMetaDo
-	Omit(cols ...field.Expr) IModelMetaDo
-	Join(table schema.Tabler, on ...field.Expr) IModelMetaDo
-	LeftJoin(table schema.Tabler, on ...field.Expr) IModelMetaDo
-	RightJoin(table schema.Tabler, on ...field.Expr) IModelMetaDo
-	Group(cols ...field.Expr) IModelMetaDo
-	Having(conds ...gen.Condition) IModelMetaDo
-	Limit(limit int) IModelMetaDo
-	Offset(offset int) IModelMetaDo
-	Count() (count int64, err error)
-	Scopes(funcs ...func(gen.Dao) gen.Dao) IModelMetaDo
-	Unscoped() IModelMetaDo
-	Create(values ...*model.ModelMeta) error
-	CreateInBatches(values []*model.ModelMeta, batchSize int) error
-	Save(values ...*model.ModelMeta) error
-	First() (*model.ModelMeta, error)
-	Take() (*model.ModelMeta, error)
-	Last() (*model.ModelMeta, error)
-	Find() ([]*model.ModelMeta, error)
-	FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*model.ModelMeta, err error)
-	FindInBatches(result *[]*model.ModelMeta, batchSize int, fc func(tx gen.Dao, batch int) error) error
-	Pluck(column field.Expr, dest interface{}) error
-	Delete(...*model.ModelMeta) (info gen.ResultInfo, err error)
-	Update(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
-	UpdateSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
-	Updates(value interface{}) (info gen.ResultInfo, err error)
-	UpdateColumn(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
-	UpdateColumnSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
-	UpdateColumns(value interface{}) (info gen.ResultInfo, err error)
-	UpdateFrom(q gen.SubQuery) gen.Dao
-	Attrs(attrs ...field.AssignExpr) IModelMetaDo
-	Assign(attrs ...field.AssignExpr) IModelMetaDo
-	Joins(fields ...field.RelationField) IModelMetaDo
-	Preload(fields ...field.RelationField) IModelMetaDo
-	FirstOrInit() (*model.ModelMeta, error)
-	FirstOrCreate() (*model.ModelMeta, error)
-	FindByPage(offset int, limit int) (result []*model.ModelMeta, count int64, err error)
-	ScanByPage(result interface{}, offset int, limit int) (count int64, err error)
-	Scan(result interface{}) (err error)
-	Returning(value interface{}, columns ...string) IModelMetaDo
-	UnderlyingDB() *gorm.DB
-	schema.Tabler
-}
-
-func (m modelMetaDo) Debug() IModelMetaDo {
-	return m.withDO(m.DO.Debug())
-}
-
-func (m modelMetaDo) WithContext(ctx context.Context) IModelMetaDo {
-	return m.withDO(m.DO.WithContext(ctx))
-}
-
-func (m modelMetaDo) ReadDB() IModelMetaDo {
-	return m.Clauses(dbresolver.Read)
-}
-
-func (m modelMetaDo) WriteDB() IModelMetaDo {
-	return m.Clauses(dbresolver.Write)
-}
-
-func (m modelMetaDo) Session(config *gorm.Session) IModelMetaDo {
-	return m.withDO(m.DO.Session(config))
-}
-
-func (m modelMetaDo) Clauses(conds ...clause.Expression) IModelMetaDo {
-	return m.withDO(m.DO.Clauses(conds...))
-}
-
-func (m modelMetaDo) Returning(value interface{}, columns ...string) IModelMetaDo {
-	return m.withDO(m.DO.Returning(value, columns...))
-}
-
-func (m modelMetaDo) Not(conds ...gen.Condition) IModelMetaDo {
-	return m.withDO(m.DO.Not(conds...))
-}
-
-func (m modelMetaDo) Or(conds ...gen.Condition) IModelMetaDo {
-	return m.withDO(m.DO.Or(conds...))
-}
-
-func (m modelMetaDo) Select(conds ...field.Expr) IModelMetaDo {
-	return m.withDO(m.DO.Select(conds...))
-}
-
-func (m modelMetaDo) Where(conds ...gen.Condition) IModelMetaDo {
-	return m.withDO(m.DO.Where(conds...))
-}
-
-func (m modelMetaDo) Order(conds ...field.Expr) IModelMetaDo {
-	return m.withDO(m.DO.Order(conds...))
-}
-
-func (m modelMetaDo) Distinct(cols ...field.Expr) IModelMetaDo {
-	return m.withDO(m.DO.Distinct(cols...))
-}
-
-func (m modelMetaDo) Omit(cols ...field.Expr) IModelMetaDo {
-	return m.withDO(m.DO.Omit(cols...))
-}
-
-func (m modelMetaDo) Join(table schema.Tabler, on ...field.Expr) IModelMetaDo {
-	return m.withDO(m.DO.Join(table, on...))
-}
-
-func (m modelMetaDo) LeftJoin(table schema.Tabler, on ...field.Expr) IModelMetaDo {
-	return m.withDO(m.DO.LeftJoin(table, on...))
-}
-
-func (m modelMetaDo) RightJoin(table schema.Tabler, on ...field.Expr) IModelMetaDo {
-	return m.withDO(m.DO.RightJoin(table, on...))
-}
-
-func (m modelMetaDo) Group(cols ...field.Expr) IModelMetaDo {
-	return m.withDO(m.DO.Group(cols...))
-}
-
-func (m modelMetaDo) Having(conds ...gen.Condition) IModelMetaDo {
-	return m.withDO(m.DO.Having(conds...))
-}
-
-func (m modelMetaDo) Limit(limit int) IModelMetaDo {
-	return m.withDO(m.DO.Limit(limit))
-}
-
-func (m modelMetaDo) Offset(offset int) IModelMetaDo {
-	return m.withDO(m.DO.Offset(offset))
-}
-
-func (m modelMetaDo) Scopes(funcs ...func(gen.Dao) gen.Dao) IModelMetaDo {
-	return m.withDO(m.DO.Scopes(funcs...))
-}
-
-func (m modelMetaDo) Unscoped() IModelMetaDo {
-	return m.withDO(m.DO.Unscoped())
-}
-
-func (m modelMetaDo) Create(values ...*model.ModelMeta) error {
-	if len(values) == 0 {
-		return nil
-	}
-	return m.DO.Create(values)
-}
-
-func (m modelMetaDo) CreateInBatches(values []*model.ModelMeta, batchSize int) error {
-	return m.DO.CreateInBatches(values, batchSize)
-}
-
-// Save : !!! underlying implementation is different with GORM
-// The method is equivalent to executing the statement: db.Clauses(clause.OnConflict{UpdateAll: true}).Create(values)
-func (m modelMetaDo) Save(values ...*model.ModelMeta) error {
-	if len(values) == 0 {
-		return nil
-	}
-	return m.DO.Save(values)
-}
-
-func (m modelMetaDo) First() (*model.ModelMeta, error) {
-	if result, err := m.DO.First(); err != nil {
-		return nil, err
-	} else {
-		return result.(*model.ModelMeta), nil
-	}
-}
-
-func (m modelMetaDo) Take() (*model.ModelMeta, error) {
-	if result, err := m.DO.Take(); err != nil {
-		return nil, err
-	} else {
-		return result.(*model.ModelMeta), nil
-	}
-}
-
-func (m modelMetaDo) Last() (*model.ModelMeta, error) {
-	if result, err := m.DO.Last(); err != nil {
-		return nil, err
-	} else {
-		return result.(*model.ModelMeta), nil
-	}
-}
-
-func (m modelMetaDo) Find() ([]*model.ModelMeta, error) {
-	result, err := m.DO.Find()
-	return result.([]*model.ModelMeta), err
-}
-
-func (m modelMetaDo) FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*model.ModelMeta, err error) {
-	buf := make([]*model.ModelMeta, 0, batchSize)
-	err = m.DO.FindInBatches(&buf, batchSize, func(tx gen.Dao, batch int) error {
-		defer func() { results = append(results, buf...) }()
-		return fc(tx, batch)
-	})
-	return results, err
-}
-
-func (m modelMetaDo) FindInBatches(result *[]*model.ModelMeta, batchSize int, fc func(tx gen.Dao, batch int) error) error {
-	return m.DO.FindInBatches(result, batchSize, fc)
-}
-
-func (m modelMetaDo) Attrs(attrs ...field.AssignExpr) IModelMetaDo {
-	return m.withDO(m.DO.Attrs(attrs...))
-}
-
-func (m modelMetaDo) Assign(attrs ...field.AssignExpr) IModelMetaDo {
-	return m.withDO(m.DO.Assign(attrs...))
-}
-
-func (m modelMetaDo) Joins(fields ...field.RelationField) IModelMetaDo {
-	for _, _f := range fields {
-		m = *m.withDO(m.DO.Joins(_f))
-	}
-	return &m
-}
-
-func (m modelMetaDo) Preload(fields ...field.RelationField) IModelMetaDo {
-	for _, _f := range fields {
-		m = *m.withDO(m.DO.Preload(_f))
-	}
-	return &m
-}
-
-func (m modelMetaDo) FirstOrInit() (*model.ModelMeta, error) {
-	if result, err := m.DO.FirstOrInit(); err != nil {
-		return nil, err
-	} else {
-		return result.(*model.ModelMeta), nil
-	}
-}
-
-func (m modelMetaDo) FirstOrCreate() (*model.ModelMeta, error) {
-	if result, err := m.DO.FirstOrCreate(); err != nil {
-		return nil, err
-	} else {
-		return result.(*model.ModelMeta), nil
-	}
-}
-
-func (m modelMetaDo) FindByPage(offset int, limit int) (result []*model.ModelMeta, count int64, err error) {
-	result, err = m.Offset(offset).Limit(limit).Find()
-	if err != nil {
-		return
-	}
-
-	if size := len(result); 0 < limit && 0 < size && size < limit {
-		count = int64(size + offset)
-		return
-	}
-
-	count, err = m.Offset(-1).Limit(-1).Count()
-	return
-}
-
-func (m modelMetaDo) ScanByPage(result interface{}, offset int, limit int) (count int64, err error) {
-	count, err = m.Count()
-	if err != nil {
-		return
-	}
-
-	err = m.Offset(offset).Limit(limit).Scan(result)
-	return
-}
-
-func (m modelMetaDo) Scan(result interface{}) (err error) {
-	return m.DO.Scan(result)
-}
-
-func (m modelMetaDo) Delete(models ...*model.ModelMeta) (result gen.ResultInfo, err error) {
-	return m.DO.Delete(models)
-}
-
-func (m *modelMetaDo) withDO(do gen.Dao) *modelMetaDo {
-	m.DO = *do.(*gen.DO)
-	return m
-}
--- a/backend/domain/modelmgr/service/model_manager.go
+++ b/backend/domain/modelmgr/service/model_manager.go
@@ -1,389 +0,0 @@
-/*
- * Copyright 2025 coze-dev Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package service
-
-import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"time"
-
-	"gorm.io/gorm"
-
-	modelmgrModel "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr"
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr/entity"
-	"github.com/coze-dev/coze-studio/backend/domain/modelmgr/internal/dal/dao"
-	dmodel "github.com/coze-dev/coze-studio/backend/domain/modelmgr/internal/dal/model"
-	uploadEntity "github.com/coze-dev/coze-studio/backend/domain/upload/entity"
-	modelcontract "github.com/coze-dev/coze-studio/backend/infra/contract/chatmodel"
-	"github.com/coze-dev/coze-studio/backend/infra/contract/idgen"
-	"github.com/coze-dev/coze-studio/backend/infra/contract/storage"
-	"github.com/coze-dev/coze-studio/backend/pkg/lang/slices"
-)
-
-func NewModelManager(db *gorm.DB, idgen idgen.IDGenerator, oss storage.Storage) modelmgr.Manager {
-	return &modelManager{
-		idgen:           idgen,
-		oss:             oss,
-		modelMetaRepo:   dao.NewModelMetaDAO(db),
-		modelEntityRepo: dao.NewModelEntityDAO(db),
-	}
-}
-
-type modelManager struct {
-	idgen idgen.IDGenerator
-	oss   storage.Storage
-
-	modelMetaRepo   dao.ModelMetaRepo
-	modelEntityRepo dao.ModelEntityRepo
-}
-
-func (m *modelManager) CreateModelMeta(ctx context.Context, meta *entity.ModelMeta) (resp *entity.ModelMeta, err error) {
-	if err = m.alignProtocol(meta); err != nil {
-		return nil, err
-	}
-
-	id := meta.ID
-	if id == 0 {
-		id, err = m.idgen.GenID(ctx)
-		if err != nil {
-			return nil, err
-		}
-	}
-
-	desc, err := json.Marshal(meta.Description)
-	if err != nil {
-		return nil, err
-	}
-
-	now := time.Now().UnixMilli()
-	if err = m.modelMetaRepo.Create(ctx, &dmodel.ModelMeta{
-		ID:          id,
-		ModelName:   meta.Name,
-		Protocol:    string(meta.Protocol),
-		IconURI:     meta.IconURI,
-		IconURL:     meta.IconURL,
-		Capability:  meta.Capability,
-		ConnConfig:  meta.ConnConfig,
-		Status:      meta.Status,
-		Description: string(desc),
-		CreatedAt:   now,
-		UpdatedAt:   now,
-		DeletedAt:   gorm.DeletedAt{},
-	}); err != nil {
-		return nil, err
-	}
-
-	return &entity.ModelMeta{
-		ID:          id,
-		Name:        meta.Name,
-		Description: meta.Description,
-		CreatedAtMs: now,
-		UpdatedAtMs: now,
-
-		Protocol:   meta.Protocol,
-		Capability: meta.Capability,
-		ConnConfig: meta.ConnConfig,
-		Status:     meta.Status,
-	}, nil
-}
-
-func (m *modelManager) UpdateModelMetaStatus(ctx context.Context, id int64, status entity.ModelMetaStatus) error {
-	return m.modelMetaRepo.UpdateStatus(ctx, id, status)
-}
-
-func (m *modelManager) DeleteModelMeta(ctx context.Context, id int64) error {
-	return m.modelMetaRepo.Delete(ctx, id)
-}
-
-func (m *modelManager) ListModelMeta(ctx context.Context, req *modelmgr.ListModelMetaRequest) (*modelmgr.ListModelMetaResponse, error) {
-	status := req.Status
-	if len(status) == 0 {
-		status = []entity.ModelMetaStatus{modelmgrModel.StatusInUse}
-	}
-
-	pos, next, hasMore, err := m.modelMetaRepo.List(ctx, req.FuzzyModelName, status, req.Limit, req.Cursor)
-	if err != nil {
-		return nil, err
-	}
-
-	dos, err := m.fromModelMetaPOs(ctx, pos)
-	if err != nil {
-		return nil, err
-	}
-
-	return &modelmgr.ListModelMetaResponse{
-		ModelMetaList: dos,
-		HasMore:       hasMore,
-		NextCursor:    next,
-	}, nil
-}
-
-func (m *modelManager) MGetModelMetaByID(ctx context.Context, req *modelmgr.MGetModelMetaRequest) ([]*entity.ModelMeta, error) {
-	if len(req.IDs) == 0 {
-		return nil, nil
-	}
-
-	pos, err := m.modelMetaRepo.MGetByID(ctx, req.IDs)
-	if err != nil {
-		return nil, err
-	}
-
-	dos, err := m.fromModelMetaPOs(ctx, pos)
-	if err != nil {
-		return nil, err
-	}
-
-	return dos, nil
-}
-
-func (m *modelManager) CreateModel(ctx context.Context, e *entity.Model) (*entity.Model, error) {
-	// check if meta id exists
-	metaPO, err := m.modelMetaRepo.GetByID(ctx, e.Meta.ID)
-	if err != nil {
-		return nil, err
-	}
-	if metaPO == nil {
-		return nil, fmt.Errorf("[CreateModel] mode meta not found, model_meta id=%d", e.Meta.ID)
-	}
-	id := e.ID
-	if id == 0 {
-		id, err = m.idgen.GenID(ctx)
-		if err != nil {
-			return nil, err
-		}
-	}
-
-	now := time.Now().UnixMilli()
-	// TODO(@fanlv) : do -> po 放到 dal 里面去
-	if err = m.modelEntityRepo.Create(ctx, &dmodel.ModelEntity{
-		ID:            id,
-		MetaID:        e.Meta.ID,
-		Name:          e.Name,
-		Description:   e.Description,
-		DefaultParams: e.DefaultParameters,
-		Status:        modelmgrModel.ModelEntityStatusInUse,
-		CreatedAt:     now,
-		UpdatedAt:     now,
-	}); err != nil {
-		return nil, err
-	}
-
-	resp := &entity.Model{
-		Model: &modelmgrModel.Model{
-			ID:          id,
-			Name:        e.Name,
-			CreatedAtMs: now,
-			UpdatedAtMs: now,
-			Meta:        e.Meta,
-		},
-	}
-
-	return resp, nil
-}
-
-func (m *modelManager) DeleteModel(ctx context.Context, id int64) error {
-	return m.modelEntityRepo.Delete(ctx, id)
-}
-
-func (m *modelManager) ListModel(ctx context.Context, req *modelmgr.ListModelRequest) (*modelmgr.ListModelResponse, error) {
-	var sc *int64
-
-	status := req.Status
-	if len(status) == 0 {
-		status = []modelmgrModel.ModelEntityStatus{modelmgrModel.ModelEntityStatusDefault, modelmgrModel.ModelEntityStatusInUse}
-	}
-
-	pos, next, hasMore, err := m.modelEntityRepo.List(ctx, req.FuzzyModelName, sc, status, req.Limit, req.Cursor)
-	if err != nil {
-		return nil, err
-	}
-
-	pos = moveDefaultModelToFirst(pos)
-	resp, err := m.fromModelPOs(ctx, pos)
-	if err != nil {
-		return nil, err
-	}
-
-	return &modelmgr.ListModelResponse{
-		ModelList:  resp,
-		HasMore:    hasMore,
-		NextCursor: next,
-	}, nil
-}
-
-func (m *modelManager) MGetModelByID(ctx context.Context, req *modelmgr.MGetModelRequest) ([]*entity.Model, error) {
-	if len(req.IDs) == 0 {
-		return nil, nil
-	}
-
-	pos, err := m.modelEntityRepo.MGet(ctx, req.IDs)
-	if err != nil {
-		return nil, err
-	}
-
-	resp, err := m.fromModelPOs(ctx, pos)
-	if err != nil {
-		return nil, err
-	}
-
-	return resp, nil
-}
-
-func (m *modelManager) alignProtocol(meta *entity.ModelMeta) error {
-	if meta.Protocol == "" {
-		return fmt.Errorf("protocol not provided")
-	}
-
-	config := meta.ConnConfig
-	if config == nil {
-		return fmt.Errorf("ConnConfig not provided, protocol=%s", meta.Protocol)
-	}
-
-	return nil
-}
-
-func (m *modelManager) fromModelMetaPOs(ctx context.Context, pos []*dmodel.ModelMeta) ([]*entity.ModelMeta, error) {
-	uris := make(map[string]string)
-
-	for _, po := range pos {
-		if po == nil || po.IconURL != "" {
-			continue
-		}
-		if po.IconURI == "" {
-			po.IconURI = uploadEntity.ModelIconURI
-		}
-		uris[po.IconURI] = ""
-	}
-
-	for uri := range uris {
-		url, err := m.oss.GetObjectUrl(ctx, uri)
-		if err != nil {
-			return nil, err
-		}
-		uris[uri] = url
-	}
-
-	dos, err := slices.TransformWithErrorCheck(pos, func(po *dmodel.ModelMeta) (*entity.ModelMeta, error) {
-		if po == nil {
-			return nil, nil
-		}
-		url := po.IconURL
-		if url == "" {
-			url = uris[po.IconURI]
-		}
-
-		desc := &modelmgrModel.MultilingualText{}
-		if unmarshalErr := json.Unmarshal([]byte(po.Description), desc); unmarshalErr != nil {
-			return nil, unmarshalErr
-		}
-
-		return &entity.ModelMeta{
-			ID:      po.ID,
-			Name:    po.ModelName,
-			IconURI: po.IconURI,
-			IconURL: url,
-
-			Description: desc,
-			CreatedAtMs: po.CreatedAt,
-			UpdatedAtMs: po.UpdatedAt,
-			DeletedAtMs: po.DeletedAt.Time.UnixMilli(),
-
-			Protocol:   modelcontract.Protocol(po.Protocol),
-			Capability: po.Capability,
-			ConnConfig: po.ConnConfig,
-			Status:     po.Status,
-		}, nil
-	})
-	if err != nil {
-		return nil, err
-	}
-
-	return dos, nil
-}
-
-func (m *modelManager) fromModelPOs(ctx context.Context, pos []*dmodel.ModelEntity) ([]*entity.Model, error) {
-	if len(pos) == 0 {
-		return nil, nil
-	}
-
-	resp := make([]*entity.Model, 0, len(pos))
-	metaIDSet := make(map[int64]struct{})
-	for _, po := range pos {
-		resp = append(resp, &entity.Model{
-			Model: &modelmgrModel.Model{
-				ID:                po.ID,
-				Name:              po.Name,
-				Description:       po.Description,
-				DefaultParameters: po.DefaultParams,
-				CreatedAtMs:       po.CreatedAt,
-				UpdatedAtMs:       po.UpdatedAt,
-				Meta: entity.ModelMeta{
-					ID: po.MetaID,
-				},
-			},
-		})
-		metaIDSet[po.MetaID] = struct{}{}
-	}
-
-	metaIDSlice := make([]int64, 0, len(metaIDSet))
-	for id := range metaIDSet {
-		metaIDSlice = append(metaIDSlice, id)
-	}
-
-	modelMetaSlice, err := m.MGetModelMetaByID(ctx, &modelmgr.MGetModelMetaRequest{IDs: metaIDSlice})
-	if err != nil {
-		return nil, err
-	}
-
-	metaID2Meta := make(map[int64]*entity.ModelMeta)
-	for i := range modelMetaSlice {
-		item := modelMetaSlice[i]
-		if item.IconURL == "" {
-			url, err := m.oss.GetObjectUrl(ctx, item.IconURI)
-			if err != nil {
-				return nil, err
-			}
-			item.IconURL = url
-		}
-		metaID2Meta[item.ID] = item
-	}
-
-	for _, r := range resp {
-		meta, found := metaID2Meta[r.Meta.ID]
-		if !found {
-			return nil, fmt.Errorf("[ListModel] model meta not found, model_entity id=%v, model_meta id=%v", r.ID, r.Meta.ID)
-		}
-		r.Meta = *meta
-	}
-
-	return resp, nil
-}
-
-func moveDefaultModelToFirst(ms []*dmodel.ModelEntity) []*dmodel.ModelEntity {
-	orders := make([]*dmodel.ModelEntity, len(ms))
-	copy(orders, ms)
-
-	for i, m := range orders {
-		if i != 0 && m.Status == modelmgrModel.ModelEntityStatusDefault {
-			orders[0], orders[i] = orders[i], orders[0]
-			break
-		}
-	}
-	return orders
-}
--- a/backend/domain/modelmgr/service/model_manager_with_cache.go
+++ b/backend/domain/modelmgr/service/model_manager_with_cache.go
@@ -1,41 +0,0 @@
-/*
- * Copyright 2025 coze-dev Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package service
-
-// TODO: 考虑到 model manager 被外部高频读+运行，修改/删除频率很低，基本没有实时更新需求，可进行 cache
-// 1. model_meta
-// 2. model_entity
-// 3. ChatModel
-
-// func (m *modelManager) buildOptions(req *model.ChatRequest) []cm.Option {
-//	var opts []cm.Option
-//
-//	if len(req.Tools) > 0 {
-//		opts = append(opts, cm.WithTools(req.Tools))
-//	}
-//	if req.Temperature != nil {
-//		opts = append(opts, cm.WithTemperature(float32(*req.Temperature)))
-//	}
-//	if req.MaxTokens != nil {
-//		opts = append(opts, cm.WithMaxTokens(*req.MaxTokens))
-//	}
-//	if req.TopP != nil {
-//		opts = append(opts, cm.WithTopP(float32(*req.TopP)))
-//	}
-//	// TODO: support frequency_penalty, presence_penalty, top_k
-//	return opts
-//}
--- a/backend/domain/workflow/crossdomain/model/model.go
+++ b/backend/domain/workflow/crossdomain/model/model.go
@@ -20,8 +20,7 @@ import (
 	"context"

 	"github.com/cloudwego/eino/components/model"
-
-	"github.com/coze-dev/coze-studio/backend/crossdomain/contract/crossmodelmgr"
+	"github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr"
 )

 type LLMParams struct {
@@ -59,5 +58,5 @@ func SetManager(m Manager) {

 //go:generate  mockgen -destination modelmock/model_mock.go --package mockmodel -source model.go
 type Manager interface {
-	GetModel(ctx context.Context, params *LLMParams) (model.BaseChatModel, *crossmodelmgr.Model, error)
+	GetModel(ctx context.Context, params *LLMParams) (model.BaseChatModel, *modelmgr.Model, error)
 }
--- a/backend/domain/workflow/crossdomain/model/modelmock/model_mock.go
+++ b/backend/domain/workflow/crossdomain/model/modelmock/model_mock.go
@@ -1,19 +1,3 @@
-/*
- * Copyright 2025 coze-dev Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
 // Code generated by MockGen. DO NOT EDIT.
 // Source: model.go
 //
@@ -29,9 +13,9 @@ import (
 	context "context"
 	reflect "reflect"

-	crossmodelmgr "github.com/coze-dev/coze-studio/backend/crossdomain/contract/crossmodelmgr"
-	model "github.com/coze-dev/coze-studio/backend/domain/workflow/crossdomain/model"
-	model0 "github.com/cloudwego/eino/components/model"
+	model "github.com/cloudwego/eino/components/model"
+	model0 "github.com/coze-dev/coze-studio/backend/domain/workflow/crossdomain/model"
+	modelmgr "github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr"
 	gomock "go.uber.org/mock/gomock"
 )

@@ -39,7 +23,6 @@ import (
 type MockManager struct {
 	ctrl     *gomock.Controller
 	recorder *MockManagerMockRecorder
-	isgomock struct{}
 }

 // MockManagerMockRecorder is the mock recorder for MockManager.
@@ -60,11 +43,11 @@ func (m *MockManager) EXPECT() *MockManagerMockRecorder {
 }

 // GetModel mocks base method.
-func (m *MockManager) GetModel(ctx context.Context, params *model.LLMParams) (model0.BaseChatModel, *crossmodelmgr.Model, error) {
+func (m *MockManager) GetModel(ctx context.Context, params *model0.LLMParams) (model.BaseChatModel, *modelmgr.Model, error) {
 	m.ctrl.T.Helper()
 	ret := m.ctrl.Call(m, "GetModel", ctx, params)
-	ret0, _ := ret[0].(model0.BaseChatModel)
-	ret1, _ := ret[1].(*crossmodelmgr.Model)
+	ret0, _ := ret[0].(model.BaseChatModel)
+	ret1, _ := ret[1].(*modelmgr.Model)
 	ret2, _ := ret[2].(error)
 	return ret0, ret1, ret2
 }
--- a/backend/domain/workflow/internal/compose/test/llm_test.go
+++ b/backend/domain/workflow/internal/compose/test/llm_test.go
@@ -31,10 +31,10 @@ import (
 	model2 "github.com/cloudwego/eino/components/model"
 	"github.com/cloudwego/eino/compose"
 	"github.com/cloudwego/eino/schema"
+	"github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr"
 	"github.com/stretchr/testify/assert"
 	"go.uber.org/mock/gomock"

-	"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
 	"github.com/coze-dev/coze-studio/backend/domain/workflow/crossdomain/model"
 	mockmodel "github.com/coze-dev/coze-studio/backend/domain/workflow/crossdomain/model/modelmock"
 	"github.com/coze-dev/coze-studio/backend/domain/workflow/entity"
--- a/backend/domain/workflow/internal/compose/to_node.go
+++ b/backend/domain/workflow/internal/compose/to_node.go
@@ -28,8 +28,8 @@ import (
 	"github.com/cloudwego/eino/components/tool"
 	"github.com/cloudwego/eino/compose"
 	"github.com/cloudwego/eino/schema"
+	"github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr"

-	crossmodelmgr "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
 	workflow3 "github.com/coze-dev/coze-studio/backend/api/model/ocean/cloud/workflow"
 	workflow2 "github.com/coze-dev/coze-studio/backend/domain/workflow"
 	crosscode "github.com/coze-dev/coze-studio/backend/domain/workflow/crossdomain/code"
@@ -91,7 +91,7 @@ func (s *NodeSchema) ToLLMConfig(ctx context.Context) (*llm.Config, error) {
 	var (
 		err                  error
 		chatModel, fallbackM einomodel.BaseChatModel
-		info, fallbackI      *crossmodelmgr.Model
+		info, fallbackI      *modelmgr.Model
 		modelWithInfo        llm.ModelWithInfo
 	)

--- a/backend/domain/workflow/internal/nodes/llm/model_with_info.go
+++ b/backend/domain/workflow/internal/nodes/llm/model_with_info.go
@@ -24,28 +24,27 @@ import (
 	"github.com/cloudwego/eino/components"
 	"github.com/cloudwego/eino/components/model"
 	"github.com/cloudwego/eino/schema"
-
-	crossmodelmgr "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
 	"github.com/coze-dev/coze-studio/backend/domain/workflow/internal/execute"
+	"github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr"
 )

 type ModelWithInfo interface {
 	model.BaseChatModel
-	Info(ctx context.Context) *crossmodelmgr.Model
+	Info(ctx context.Context) *modelmgr.Model
 }

 type ModelForLLM struct {
 	Model         model.BaseChatModel
-	MInfo         *crossmodelmgr.Model
+	MInfo         *modelmgr.Model
 	FallbackModel model.BaseChatModel
-	FallbackInfo  *crossmodelmgr.Model
+	FallbackInfo  *modelmgr.Model
 	UseFallback   func(ctx context.Context) bool

 	modelEnableCallback    bool
 	fallbackEnableCallback bool
 }

-func NewModel(m model.BaseChatModel, info *crossmodelmgr.Model) *ModelForLLM {
+func NewModel(m model.BaseChatModel, info *modelmgr.Model) *ModelForLLM {
 	return &ModelForLLM{
 		Model: m,
 		MInfo: info,
@@ -57,7 +56,7 @@ func NewModel(m model.BaseChatModel, info *crossmodelmgr.Model) *ModelForLLM {
 	}
 }

-func NewModelWithFallback(m, f model.BaseChatModel, info, fInfo *crossmodelmgr.Model) *ModelForLLM {
+func NewModelWithFallback(m, f model.BaseChatModel, info, fInfo *modelmgr.Model) *ModelForLLM {
 	return &ModelForLLM{
 		Model:         m,
 		MInfo:         info,
@@ -175,7 +174,7 @@ func (m *ModelForLLM) IsCallbacksEnabled() bool {
 	return true
 }

-func (m *ModelForLLM) Info(ctx context.Context) *crossmodelmgr.Model {
+func (m *ModelForLLM) Info(ctx context.Context) *modelmgr.Model {
 	if m.UseFallback(ctx) {
 		return m.FallbackInfo
 	}
--- a/backend/domain/workflow/internal/nodes/llm/prompt.go
+++ b/backend/domain/workflow/internal/nodes/llm/prompt.go
@@ -22,8 +22,8 @@ import (

 	"github.com/cloudwego/eino/components/prompt"
 	"github.com/cloudwego/eino/schema"
+	"github.com/coze-dev/coze-studio/backend/infra/contract/modelmgr"

-	"github.com/coze-dev/coze-studio/backend/api/model/crossdomain/modelmgr"
 	"github.com/coze-dev/coze-studio/backend/domain/workflow/entity/vo"
 	"github.com/coze-dev/coze-studio/backend/domain/workflow/internal/execute"
 	"github.com/coze-dev/coze-studio/backend/domain/workflow/internal/nodes"
--- a/backend/infra/contract/chatmodel/config.go
+++ b/backend/infra/contract/chatmodel/config.go
@@ -25,9 +25,9 @@ import (
 )

 type Config struct {
-	BaseURL string        `json:"base_url,omitempty" yaml:"base_url,omitempty"`
-	APIKey  string        `json:"api_key,omitempty" yaml:"api_key,omitempty"`
-	Timeout time.Duration `json:"timeout,omitempty" yaml:"timeout,omitempty"`
+	BaseURL string        `json:"base_url,omitempty" yaml:"base_url"`
+	APIKey  string        `json:"api_key,omitempty" yaml:"api_key"`
+	Timeout time.Duration `json:"timeout,omitempty" yaml:"timeout"`

 	Model            string   `json:"model" yaml:"model"`
 	Temperature      *float32 `json:"temperature,omitempty" yaml:"temperature,omitempty"`
--- a/Show More
+++ b/Show More