feat: 重构项目结构并添加新功能

- 新增Cloudflare AutoRAG/Vectorize集成文档 - 实现Vertex AI记忆银行功能 - 重构项目目录结构，清理无用文件 - 更新README以反映最新架构 - 添加Google ADK集成测试脚本 - 完善需求文档和设计规范
2025-08-16 10:37:11 +00:00
parent 26338d48cf
commit c4e8cfefc7
106 changed files with 12243 additions and 1839 deletions
--- a/litellm/comprehensive_mcp_test.py
+++ b/litellm/comprehensive_mcp_test.py
@@ -0,0 +1,247 @@
+#!/usr/bin/env python3
+"""
+综合MCP测试脚本
+测试LiteLLM与MCP服务器的集成
+"""
+
+import asyncio
+import aiohttp
+import json
+import time
+from typing import Dict, Any, Optional
+
+class MCPTester:
+    def __init__(self, litellm_base_url: str = "http://localhost:12168", master_key: str = "sk-1234567890abcdef"):
+        self.litellm_base_url = litellm_base_url
+        self.master_key = master_key
+        self.session = None
+        
+    async def __aenter__(self):
+        self.session = aiohttp.ClientSession()
+        return self
+        
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        if self.session:
+            await self.session.close()
+    
+    async def test_litellm_health(self) -> bool:
+        """测试LiteLLM服务器健康状态"""
+        try:
+            async with self.session.get(f"{self.litellm_base_url}/health") as response:
+                if response.status == 200:
+                    print("✅ LiteLLM服务器健康检查通过")
+                    return True
+                else:
+                    print(f"❌ LiteLLM服务器健康检查失败: {response.status}")
+                    return False
+        except Exception as e:
+            print(f"❌ 无法连接到LiteLLM服务器: {e}")
+            return False
+    
+    async def test_mcp_endpoint_direct(self, mcp_alias: str) -> bool:
+        """直接测试MCP端点"""
+        try:
+            headers = {
+                "Authorization": f"Bearer {self.master_key}",
+                "Content-Type": "application/json"
+            }
+            
+            async with self.session.get(
+                f"{self.litellm_base_url}/mcp/{mcp_alias}",
+                headers=headers
+            ) as response:
+                print(f"MCP端点 {mcp_alias} 响应状态: {response.status}")
+                
+                if response.status == 200:
+                    content_type = response.headers.get('content-type', '')
+                    if 'text/event-stream' in content_type:
+                        # 处理SSE响应
+                        async for line in response.content:
+                            line_str = line.decode('utf-8').strip()
+                            if line_str.startswith('data: '):
+                                data = line_str[6:]  # 移除 'data: ' 前缀
+                                try:
+                                    parsed_data = json.loads(data)
+                                    print(f"✅ MCP {mcp_alias} SSE响应: {json.dumps(parsed_data, indent=2)}")
+                                    return True
+                                except json.JSONDecodeError:
+                                    print(f"⚠️  无法解析SSE数据: {data}")
+                    else:
+                        text = await response.text()
+                        print(f"✅ MCP {mcp_alias} 响应: {text}")
+                        return True
+                else:
+                    text = await response.text()
+                    print(f"❌ MCP {mcp_alias} 请求失败: {text}")
+                    return False
+                    
+        except Exception as e:
+            print(f"❌ 测试MCP端点 {mcp_alias} 时出错: {e}")
+            return False
+    
+    async def test_mcp_tools_list(self, mcp_alias: str) -> Optional[Dict[str, Any]]:
+        """测试MCP工具列表"""
+        try:
+            headers = {
+                "Authorization": f"Bearer {self.master_key}",
+                "Content-Type": "application/json"
+            }
+            
+            # 构造JSON-RPC请求
+            jsonrpc_request = {
+                "jsonrpc": "2.0",
+                "method": "tools/list",
+                "params": {},
+                "id": 1
+            }
+            
+            async with self.session.post(
+                f"{self.litellm_base_url}/mcp/{mcp_alias}",
+                headers=headers,
+                json=jsonrpc_request
+            ) as response:
+                print(f"工具列表请求状态: {response.status}")
+                
+                if response.status == 200:
+                    result = await response.json()
+                    print(f"✅ MCP {mcp_alias} 工具列表: {json.dumps(result, indent=2)}")
+                    return result
+                else:
+                    text = await response.text()
+                    print(f"❌ 获取工具列表失败: {text}")
+                    return None
+                    
+        except Exception as e:
+            print(f"❌ 测试工具列表时出错: {e}")
+            return None
+    
+    async def test_mcp_tool_call(self, mcp_alias: str, tool_name: str, arguments: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        """测试MCP工具调用"""
+        try:
+            headers = {
+                "Authorization": f"Bearer {self.master_key}",
+                "Content-Type": "application/json"
+            }
+            
+            # 构造JSON-RPC请求
+            jsonrpc_request = {
+                "jsonrpc": "2.0",
+                "method": "tools/call",
+                "params": {
+                    "name": tool_name,
+                    "arguments": arguments
+                },
+                "id": 2
+            }
+            
+            async with self.session.post(
+                f"{self.litellm_base_url}/mcp/{mcp_alias}",
+                headers=headers,
+                json=jsonrpc_request
+            ) as response:
+                print(f"工具调用请求状态: {response.status}")
+                
+                if response.status == 200:
+                    result = await response.json()
+                    print(f"✅ MCP {mcp_alias} 工具调用结果: {json.dumps(result, indent=2)}")
+                    return result
+                else:
+                    text = await response.text()
+                    print(f"❌ 工具调用失败: {text}")
+                    return None
+                    
+        except Exception as e:
+            print(f"❌ 测试工具调用时出错: {e}")
+            return None
+    
+    async def test_direct_mcp_server(self, url: str) -> bool:
+        """直接测试MCP服务器"""
+        try:
+            print(f"\n🔍 直接测试MCP服务器: {url}")
+            
+            # 测试初始化
+            async with self.session.get(url) as response:
+                print(f"直接MCP服务器响应状态: {response.status}")
+                
+                if response.status == 200:
+                    content_type = response.headers.get('content-type', '')
+                    if 'text/event-stream' in content_type:
+                        async for line in response.content:
+                            line_str = line.decode('utf-8').strip()
+                            if line_str.startswith('data: '):
+                                data = line_str[6:]
+                                try:
+                                    parsed_data = json.loads(data)
+                                    print(f"✅ 直接MCP服务器SSE响应: {json.dumps(parsed_data, indent=2)}")
+                                    return True
+                                except json.JSONDecodeError:
+                                    print(f"⚠️  无法解析SSE数据: {data}")
+                                break
+                    else:
+                        text = await response.text()
+                        print(f"✅ 直接MCP服务器响应: {text}")
+                        return True
+                else:
+                    text = await response.text()
+                    print(f"❌ 直接MCP服务器请求失败: {text}")
+                    return False
+                    
+        except Exception as e:
+            print(f"❌ 直接测试MCP服务器时出错: {e}")
+            return False
+    
+    async def run_comprehensive_test(self):
+        """运行综合测试"""
+        print("🚀 开始MCP综合测试\n")
+        
+        # 1. 测试LiteLLM健康状态
+        print("1️⃣ 测试LiteLLM服务器健康状态")
+        health_ok = await self.test_litellm_health()
+        
+        if not health_ok:
+            print("❌ LiteLLM服务器不可用，停止测试")
+            return
+        
+        # 2. 测试本地MCP服务器
+        print("\n2️⃣ 测试本地MCP服务器")
+        await self.test_direct_mcp_server("http://localhost:8080/mcp")
+        
+        # 3. 测试通过LiteLLM访问本地MCP
+        print("\n3️⃣ 测试通过LiteLLM访问本地MCP")
+        test_endpoint_ok = await self.test_mcp_endpoint_direct("test")
+        
+        if test_endpoint_ok:
+            # 4. 测试工具列表
+            print("\n4️⃣ 测试本地MCP工具列表")
+            tools_result = await self.test_mcp_tools_list("test")
+            
+            if tools_result and 'result' in tools_result and 'tools' in tools_result['result']:
+                tools = tools_result['result']['tools']
+                print(f"发现 {len(tools)} 个工具")
+                
+                # 5. 测试工具调用
+                print("\n5️⃣ 测试工具调用")
+                for tool in tools[:3]:  # 测试前3个工具
+                    tool_name = tool['name']
+                    print(f"\n测试工具: {tool_name}")
+                    
+                    if tool_name == "echo":
+                        await self.test_mcp_tool_call("test", "echo", {"message": "Hello MCP!"})
+                    elif tool_name == "get_time":
+                        await self.test_mcp_tool_call("test", "get_time", {})
+                    elif tool_name == "calculate":
+                        await self.test_mcp_tool_call("test", "calculate", {"expression": "2+2*3"})
+        
+        # 6. 测试DeepWiki MCP
+        print("\n6️⃣ 测试DeepWiki MCP")
+        await self.test_mcp_endpoint_direct("deepwiki")
+        
+        print("\n🎉 MCP综合测试完成")
+
+async def main():
+    """主函数"""
+    async with MCPTester() as tester:
+        await tester.run_comprehensive_test()
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/litellm/config.yaml
+++ b/litellm/config.yaml
@@ -0,0 +1,26 @@
+model_list:
+  - model_name: test-model
+    litellm_params:
+      model: openai/gpt-3.5-turbo
+      api_key: sk-test-key
+
+general_settings:
+  master_key: sk-1234567890abcdef
+  disable_spend_logs: false
+  disable_master_key_return: false
+  enforce_user_param: false
+  
+litellm_settings:
+  set_verbose: true
+  drop_params: true
+  add_function_to_prompt: true
+  mcp_aliases:
+    "deepwiki": "deepwiki_mcp_server"
+    "test": "test_mcp_server"
+  mcp_servers:
+    deepwiki_mcp_server:
+      url: "https://mcp.api-inference.modelscope.net/f9d3f201909c45/sse"
+      transport: "http"
+    test_mcp_server:
+      url: "http://localhost:8080/mcp"
+      transport: "http"
--- a/litellm/final_mcp_test.py
+++ b/litellm/final_mcp_test.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+"""
+最终的MCP功能测试
+"""
+
+import asyncio
+import httpx
+import json
+from openai import AsyncOpenAI
+
+async def test_litellm_basic():
+    """测试LiteLLM基本功能"""
+    print("=== 测试LiteLLM基本功能 ===")
+    
+    try:
+        client = AsyncOpenAI(
+            api_key="sk-1234567890abcdef",
+            base_url="http://localhost:4000/v1"
+        )
+        
+        # 测试模型列表
+        models = await client.models.list()
+        print(f"可用模型: {[model.id for model in models.data]}")
+        
+        return True
+    except Exception as e:
+        print(f"LiteLLM基本功能测试失败: {e}")
+        return False
+
+async def test_simple_mcp_server():
+    """测试简单MCP服务器"""
+    print("\n=== 测试简单MCP服务器 ===")
+    
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.get(
+                "http://localhost:8080/mcp",
+                headers={"Accept": "text/event-stream"},
+                timeout=5.0
+            )
+            
+            if response.status_code == 200:
+                content = response.text
+                print(f"MCP服务器响应: {content}")
+                
+                # 尝试解析JSON
+                if "data:" in content:
+                    json_part = content.split("data:")[1].strip()
+                    data = json.loads(json_part)
+                    print(f"解析的工具: {data.get('result', {}).get('tools', [])}")
+                    return True
+            else:
+                print(f"MCP服务器返回错误: {response.status_code}")
+                return False
+                
+    except Exception as e:
+        print(f"简单MCP服务器测试失败: {e}")
+        return False
+
+async def test_litellm_mcp_integration():
+    """测试LiteLLM与MCP的集成"""
+    print("\n=== 测试LiteLLM MCP集成 ===")
+    
+    try:
+        async with httpx.AsyncClient() as client:
+            # 尝试不同的MCP端点
+            endpoints = [
+                "http://localhost:4000/mcp/test",
+                "http://localhost:4000/mcp/tools",
+                "http://localhost:4000/v1/mcp"
+            ]
+            
+            for endpoint in endpoints:
+                try:
+                    print(f"测试端点: {endpoint}")
+                    response = await client.get(
+                        endpoint,
+                        headers={
+                            "Authorization": "Bearer sk-1234567890abcdef",
+                            "Accept": "text/event-stream"
+                        },
+                        timeout=3.0
+                    )
+                    print(f"状态码: {response.status_code}")
+                    if response.status_code == 200:
+                        print(f"响应: {response.text[:200]}...")
+                        return True
+                except Exception as e:
+                    print(f"端点 {endpoint} 失败: {e}")
+                    
+        return False
+        
+    except Exception as e:
+        print(f"LiteLLM MCP集成测试失败: {e}")
+        return False
+
+async def main():
+    """主测试函数"""
+    print("开始MCP功能综合测试...\n")
+    
+    # 测试各个组件
+    litellm_ok = await test_litellm_basic()
+    mcp_server_ok = await test_simple_mcp_server()
+    integration_ok = await test_litellm_mcp_integration()
+    
+    print("\n=== 测试结果总结 ===")
+    print(f"LiteLLM基本功能: {'✓' if litellm_ok else '✗'}")
+    print(f"简单MCP服务器: {'✓' if mcp_server_ok else '✗'}")
+    print(f"LiteLLM MCP集成: {'✓' if integration_ok else '✗'}")
+    
+    if litellm_ok and mcp_server_ok:
+        print("\n结论: LiteLLM和MCP服务器都正常工作，但LiteLLM的MCP集成可能需要额外配置。")
+    elif litellm_ok:
+        print("\n结论: LiteLLM正常工作，但MCP功能有问题。")
+    else:
+        print("\n结论: LiteLLM基本功能有问题。")
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/litellm/list_models.py
+++ b/litellm/list_models.py
@@ -0,0 +1,64 @@
+import asyncio
+from openai import AsyncOpenAI
+
+async def list_available_models():
+    """获取LiteLLM服务器支持的模型列表"""
+    print("正在获取可用模型列表...")
+    
+    # 使用远程LiteLLM服务器
+    client = AsyncOpenAI(
+        api_key="sk-0jdcGHZJpX2oUJmyEs7zVA", 
+        base_url="https://litellm.seekkey.tech"
+    )
+    
+    try:
+        # 获取模型列表
+        models = await client.models.list()
+        
+        print("\n=== 可用模型列表 ===")
+        for model in models.data:
+            print(f"- {model.id}")
+            
+        print(f"\n总共找到 {len(models.data)} 个模型")
+        
+        # 尝试调用一个简单的模型
+        if models.data:
+            first_model = models.data[0].id
+            print(f"\n正在测试第一个模型: {first_model}")
+            
+            response = await client.chat.completions.create(
+                model=first_model,
+                messages=[
+                    {"role": "user", "content": "Hello, please say hi in Chinese."}
+                ],
+                max_tokens=50
+            )
+            
+            print(f"测试响应: {response.choices[0].message.content}")
+        
+    except Exception as e:
+        print(f"获取模型列表失败: {e}")
+        print(f"错误类型: {type(e).__name__}")
+        
+        # 尝试直接测试一些常见模型
+        common_models = ["gpt-4", "gpt-3.5-turbo", "gemini-pro", "claude-3-sonnet"]
+        print("\n尝试测试常见模型...")
+        
+        for model in common_models:
+            try:
+                print(f"测试模型: {model}")
+                response = await client.chat.completions.create(
+                    model=model,
+                    messages=[{"role": "user", "content": "Hi"}],
+                    max_tokens=10
+                )
+                print(f"✓ {model} 可用")
+                break
+            except Exception as model_error:
+                print(f"✗ {model} 不可用: {str(model_error)[:100]}...")
+    
+    finally:
+        await client.close()
+
+if __name__ == "__main__":
+    asyncio.run(list_available_models())
--- a/litellm/simple_mcp_server.py
+++ b/litellm/simple_mcp_server.py
@@ -0,0 +1,239 @@
+#!/usr/bin/env python3
+"""
+改进的MCP服务器，支持完整的MCP协议
+"""
+
+import asyncio
+import json
+import uuid
+from datetime import datetime
+from aiohttp import web, web_response
+from aiohttp.web import Request, Response
+
+# MCP服务器状态
+server_info = {
+    "name": "test-mcp-server",
+    "version": "1.0.0",
+    "protocol_version": "2024-11-05"
+}
+
+# 可用工具定义
+available_tools = [
+    {
+        "name": "echo",
+        "description": "Echo back the input message",
+        "inputSchema": {
+            "type": "object",
+            "properties": {
+                "message": {
+                    "type": "string",
+                    "description": "Message to echo back"
+                }
+            },
+            "required": ["message"]
+        }
+    },
+    {
+        "name": "get_time",
+        "description": "Get current time",
+        "inputSchema": {
+            "type": "object",
+            "properties": {},
+            "additionalProperties": False
+        }
+    },
+    {
+        "name": "calculate",
+        "description": "Perform basic arithmetic calculations",
+        "inputSchema": {
+            "type": "object",
+            "properties": {
+                "expression": {
+                    "type": "string",
+                    "description": "Mathematical expression to evaluate (e.g., '2+2', '10*5')"
+                }
+            },
+            "required": ["expression"]
+        }
+    }
+]
+
+async def handle_mcp_request(request: Request) -> Response:
+    """处理MCP请求"""
+    print(f"收到MCP请求: {request.method} {request.path}")
+    print(f"请求头: {dict(request.headers)}")
+    
+    if request.method == "GET":
+        # 处理初始化请求
+        return await handle_initialize(request)
+    elif request.method == "POST":
+        # 处理JSON-RPC请求
+        return await handle_jsonrpc(request)
+    
+    return web_response.Response(status=405, text="Method not allowed")
+
+async def handle_initialize(request: Request) -> Response:
+    """处理初始化请求"""
+    init_response = {
+        "jsonrpc": "2.0",
+        "result": {
+            "protocolVersion": server_info["protocol_version"],
+            "capabilities": {
+                "tools": {
+                    "listChanged": True
+                },
+                "resources": {
+                    "subscribe": False,
+                    "listChanged": False
+                },
+                "prompts": {
+                    "listChanged": False
+                },
+                "logging": {}
+            },
+            "serverInfo": {
+                "name": server_info["name"],
+                "version": server_info["version"]
+            }
+        },
+        "id": 1
+    }
+    
+    # 返回SSE格式的响应
+    response_text = f"data: {json.dumps(init_response)}\n\n"
+    
+    return web_response.Response(
+        text=response_text,
+        content_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Access-Control-Allow-Origin": "*",
+            "Access-Control-Allow-Methods": "GET, POST, OPTIONS",
+            "Access-Control-Allow-Headers": "*"
+        }
+    )
+
+async def handle_jsonrpc(request: Request) -> Response:
+    """处理JSON-RPC请求"""
+    try:
+        body = await request.text()
+        print(f"收到JSON-RPC请求体: {body}")
+        
+        if not body:
+            return web_response.Response(status=400, text="Empty request body")
+            
+        data = json.loads(body)
+        method = data.get("method")
+        params = data.get("params", {})
+        request_id = data.get("id")
+        
+        print(f"方法: {method}, 参数: {params}")
+        
+        if method == "tools/list":
+            response = {
+                "jsonrpc": "2.0",
+                "result": {
+                    "tools": available_tools
+                },
+                "id": request_id
+            }
+        elif method == "tools/call":
+            tool_name = params.get("name")
+            tool_arguments = params.get("arguments", {})
+            
+            result = await execute_tool(tool_name, tool_arguments)
+            
+            response = {
+                "jsonrpc": "2.0",
+                "result": {
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": result
+                        }
+                    ]
+                },
+                "id": request_id
+            }
+        else:
+            response = {
+                "jsonrpc": "2.0",
+                "error": {
+                    "code": -32601,
+                    "message": f"Method not found: {method}"
+                },
+                "id": request_id
+            }
+        
+        return web_response.Response(
+            text=json.dumps(response),
+            content_type="application/json",
+            headers={
+                "Access-Control-Allow-Origin": "*",
+                "Access-Control-Allow-Methods": "GET, POST, OPTIONS",
+                "Access-Control-Allow-Headers": "*"
+            }
+        )
+        
+    except json.JSONDecodeError as e:
+        print(f"JSON解析错误: {e}")
+        return web_response.Response(status=400, text="Invalid JSON")
+    except Exception as e:
+        print(f"处理请求时出错: {e}")
+        return web_response.Response(status=500, text="Internal server error")
+
+async def execute_tool(tool_name: str, arguments: dict) -> str:
+    """执行工具调用"""
+    print(f"执行工具: {tool_name}, 参数: {arguments}")
+    
+    if tool_name == "echo":
+        message = arguments.get("message", "")
+        return f"Echo: {message}"
+    
+    elif tool_name == "get_time":
+        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        return f"Current time: {current_time}"
+    
+    elif tool_name == "calculate":
+        expression = arguments.get("expression", "")
+        try:
+            # 简单的数学表达式计算（仅支持基本运算）
+            # 注意：这里使用eval有安全风险，实际应用中应该使用更安全的方法
+            allowed_chars = set('0123456789+-*/.() ')
+            if all(c in allowed_chars for c in expression):
+                result = eval(expression)
+                return f"Result: {expression} = {result}"
+            else:
+                return "Error: Invalid characters in expression"
+        except Exception as e:
+            return f"Error calculating expression: {str(e)}"
+    
+    else:
+        return f"Error: Unknown tool '{tool_name}'"
+
+async def handle_options(request: Request) -> Response:
+    """处理OPTIONS请求"""
+    return web_response.Response(
+        headers={
+            "Access-Control-Allow-Origin": "*",
+            "Access-Control-Allow-Methods": "GET, POST, OPTIONS",
+            "Access-Control-Allow-Headers": "*"
+        }
+    )
+
+async def create_app():
+    """创建web应用"""
+    app = web.Application()
+    
+    # 添加路由
+    app.router.add_get('/mcp', handle_mcp_request)
+    app.router.add_post('/mcp', handle_mcp_request)
+    app.router.add_options('/mcp', handle_options)
+    
+    return app
+
+if __name__ == '__main__':
+    print("启动简单MCP服务器在端口8080...")
+    app = asyncio.run(create_app())
+    web.run_app(app, host='localhost', port=8080)
--- a/litellm/test_deepwiki_mcp.py
+++ b/litellm/test_deepwiki_mcp.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python3
+
+import httpx
+import json
+import asyncio
+from typing import AsyncGenerator
+
+async def test_deepwiki_mcp():
+    """测试DeepWiki MCP服务器功能"""
+    print("=== 测试DeepWiki MCP服务器 ===")
+    
+    # 测试直接访问DeepWiki MCP端点
+    deepwiki_url = "https://mcp.api-inference.modelscope.net/f9d3f201909c45/sse"
+    
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            print(f"\n1. 测试直接访问DeepWiki MCP端点: {deepwiki_url}")
+            
+            # 发送SSE请求
+            headers = {
+                "Accept": "text/event-stream",
+                "Cache-Control": "no-cache"
+            }
+            
+            async with client.stream("GET", deepwiki_url, headers=headers) as response:
+                print(f"状态码: {response.status_code}")
+                print(f"响应头: {dict(response.headers)}")
+                
+                if response.status_code == 200:
+                    print("\n接收到的数据:")
+                    count = 0
+                    async for line in response.aiter_lines():
+                        if line.strip():
+                            print(f"Line {count}: {line}")
+                            count += 1
+                            if count >= 10:  # 限制输出行数
+                                print("... (更多数据被截断)")
+                                break
+                else:
+                    print(f"请求失败: {response.status_code}")
+                    print(await response.aread())
+                    
+    except Exception as e:
+        print(f"直接访问DeepWiki失败: {e}")
+    
+    # 测试通过LiteLLM访问DeepWiki MCP
+    print("\n\n2. 测试通过LiteLLM访问DeepWiki MCP")
+    litellm_mcp_url = "http://localhost:4000/mcp/deepwiki"
+    
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            headers = {
+                "Accept": "text/event-stream",
+                "Cache-Control": "no-cache"
+            }
+            
+            async with client.stream("GET", litellm_mcp_url, headers=headers) as response:
+                print(f"状态码: {response.status_code}")
+                print(f"响应头: {dict(response.headers)}")
+                
+                if response.status_code == 200:
+                    print("\n接收到的数据:")
+                    count = 0
+                    async for line in response.aiter_lines():
+                        if line.strip():
+                            print(f"Line {count}: {line}")
+                            count += 1
+                            if count >= 10:
+                                print("... (更多数据被截断)")
+                                break
+                else:
+                    print(f"请求失败: {response.status_code}")
+                    error_content = await response.aread()
+                    print(f"错误内容: {error_content}")
+                    
+    except Exception as e:
+        print(f"通过LiteLLM访问DeepWiki失败: {e}")
+    
+    # 测试LiteLLM的基本MCP端点
+    print("\n\n3. 测试LiteLLM的基本MCP端点")
+    basic_endpoints = [
+        "http://localhost:4000/mcp/",
+        "http://localhost:4000/mcp",
+        "http://localhost:4000/v1/mcp"
+    ]
+    
+    for endpoint in basic_endpoints:
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                response = await client.get(endpoint)
+                print(f"\n{endpoint}: {response.status_code}")
+                if response.status_code != 200:
+                    print(f"错误: {response.text[:200]}")
+                else:
+                    print(f"成功: {response.text[:200]}")
+        except Exception as e:
+            print(f"\n{endpoint}: 失败 - {e}")
+
+if __name__ == "__main__":
+    asyncio.run(test_deepwiki_mcp())
--- a/litellm/test_gpt5_nano.py
+++ b/litellm/test_gpt5_nano.py
@@ -0,0 +1,58 @@
+import asyncio
+from openai import AsyncOpenAI
+
+async def test_gpt5_nano():
+    """测试调用LiteLLM的gpt5-nano模型"""
+    print("正在测试gpt5-nano模型...")
+    
+    # 使用远程LiteLLM服务器
+    client = AsyncOpenAI(
+        api_key="sk-0jdcGHZJpX2oUJmyEs7zVA", 
+        base_url="https://litellm.seekkey.tech"
+    )
+    
+    try:
+        # 调用gpt-5-nano模型
+        response = await client.chat.completions.create(
+            model="gpt-5-nano",
+            messages=[
+                {"role": "user", "content": "你好，请简单介绍一下你自己。"}
+            ],
+            max_completion_tokens=150,
+            temperature=0.7
+        )
+        
+        print("\n=== GPT-5-Nano 响应 ===")
+        print(f"模型: {response.model}")
+        print(f"响应内容: {response.choices[0].message.content}")
+        print(f"Token使用: {response.usage.total_tokens if response.usage else 'N/A'}")
+        
+    except Exception as e:
+        print(f"调用失败: {e}")
+        print(f"错误类型: {type(e).__name__}")
+        import traceback
+        print(f"详细错误信息: {traceback.format_exc()}")
+        
+        # 尝试使用其他可用模型
+        print("\n尝试使用其他模型...")
+        try:
+            response = await client.chat.completions.create(
+                model="fireworks_ai/accounts/fireworks/models/deepseek-v3-0324",
+                messages=[
+                    {"role": "user", "content": "你好，请简单介绍一下你自己。"}
+                ],
+                max_tokens=150,
+                temperature=0.7
+            )
+            print("\n=== DeepSeek-V3 响应 ===")
+            print(f"模型: {response.model}")
+            print(f"响应内容: {response.choices[0].message.content}")
+            print(f"Token使用: {response.usage.total_tokens if response.usage else 'N/A'}")
+        except Exception as fallback_error:
+            print(f"备用模型也失败: {fallback_error}")
+    
+    finally:
+        await client.close()
+
+if __name__ == "__main__":
+    asyncio.run(test_gpt5_nano())
--- a/litellm/test_litellm_mcp.py
+++ b/litellm/test_litellm_mcp.py
@@ -0,0 +1,66 @@
+import asyncio
+import sys
+from openai import AsyncOpenAI
+from openai.types.chat import ChatCompletionUserMessageParam
+from mcp import ClientSession
+from mcp.client.sse import sse_client
+
+
+async def main():
+    print("测试LiteLLM的MCP功能...")
+    
+    try:
+        # Initialize OpenAI client
+        print("初始化OpenAI客户端...")
+        client = AsyncOpenAI(api_key="sk-1234", base_url="http://localhost:4000")
+        print("OpenAI客户端初始化完成")
+
+        # Test basic LiteLLM functionality first
+        print("测试基本的LiteLLM功能...")
+        response = await client.chat.completions.create(
+            model="gemini-flash",
+            messages=[
+                {"role": "user", "content": "Hello, this is a test message."}
+            ]
+        )
+        print(f"LiteLLM响应: {response.choices[0].message.content}")
+        
+        # Now test MCP endpoint
+        print("\n测试MCP端点...")
+        
+        # 添加超时处理
+        try:
+            async with asyncio.timeout(10):  # 10秒超时
+                print("正在建立SSE连接到 /mcp/ 端点...")
+                async with sse_client("http://localhost:4000/mcp/") as (read, write):
+                    print("SSE连接建立成功，初始化会话...")
+                    async with ClientSession(read, write) as session:
+                        print("正在初始化MCP会话...")
+                        await session.initialize()
+                        print("MCP会话初始化成功！")
+
+                        # List available tools
+                        print("获取可用工具列表...")
+                        tools = await session.list_tools()
+                        print(f"找到 {len(tools.tools)} 个工具:")
+                        for tool in tools.tools:
+                            print(f"  - {tool.name}: {tool.description}")
+                            
+        except asyncio.TimeoutError:
+            print("MCP连接超时！")
+            print("这可能意味着:")
+            print("1. LiteLLM版本不支持MCP功能")
+            print("2. MCP功能需要额外配置")
+            print("3. /mcp/ 端点不存在")
+            return
+            
+    except Exception as e:
+        print(f"发生错误: {type(e).__name__}: {e}")
+        import traceback
+        traceback.print_exc()
+
+
+if __name__ == "__main__":
+    print("启动LiteLLM MCP测试...")
+    asyncio.run(main())
+    print("测试完成")
--- a/litellm/test_mcp_detailed.py
+++ b/litellm/test_mcp_detailed.py
@@ -0,0 +1,49 @@
+import asyncio
+import httpx
+import json
+
+async def test_mcp_detailed():
+    print("详细测试LiteLLM的MCP端点...")
+    
+    async with httpx.AsyncClient() as client:
+        try:
+            print("\n测试端点: http://localhost:4000/mcp/")
+            
+            # 使用流式请求来处理SSE
+            async with client.stream(
+                "GET",
+                "http://localhost:4000/mcp/",
+                headers={
+                    "Authorization": "Bearer sk-1234567890abcdef",
+                    "Accept": "text/event-stream",
+                    "Cache-Control": "no-cache"
+                },
+                timeout=10.0
+            ) as response:
+                print(f"状态码: {response.status_code}")
+                print(f"响应头: {dict(response.headers)}")
+                
+                if response.status_code == 200:
+                    print("开始读取SSE流...")
+                    content = ""
+                    async for chunk in response.aiter_text():
+                        content += chunk
+                        print(f"收到数据块: {repr(chunk)}")
+                        
+                        # 如果收到足够的数据就停止
+                        if len(content) > 1000:
+                            print("收到足够数据，停止读取")
+                            break
+                    
+                    print(f"\n完整内容: {content}")
+                else:
+                    error_content = await response.aread()
+                    print(f"错误响应: {error_content.decode()}")
+                    
+        except Exception as e:
+            print(f"请求失败: {type(e).__name__}: {e}")
+            import traceback
+            traceback.print_exc()
+
+if __name__ == "__main__":
+    asyncio.run(test_mcp_detailed())
--- a/litellm/test_mcp_endpoint.py
+++ b/litellm/test_mcp_endpoint.py
@@ -0,0 +1,39 @@
+import asyncio
+import httpx
+
+async def test_mcp_endpoint():
+    print("测试LiteLLM的MCP端点...")
+    
+    # Test different endpoints
+    endpoints = [
+        "http://localhost:4000/health",
+        "http://localhost:4000/v1/models", 
+        "http://localhost:4000/mcp/",
+        "http://localhost:4000/mcp"
+    ]
+    
+    async with httpx.AsyncClient() as client:
+        for endpoint in endpoints:
+            try:
+                print(f"\n测试端点: {endpoint}")
+                response = await client.get(
+                    endpoint,
+                    headers={
+                        "Authorization": "Bearer sk-1234567890abcdef",
+                        "Accept": "text/event-stream"
+                    },
+                    timeout=5.0
+                )
+                print(f"状态码: {response.status_code}")
+                print(f"响应头: {dict(response.headers)}")
+                if response.status_code == 200:
+                    content = response.text[:500]  # 只显示前500字符
+                    print(f"响应内容: {content}")
+                else:
+                    print(f"错误响应: {response.text}")
+                    
+            except Exception as e:
+                print(f"请求失败: {type(e).__name__}: {e}")
+
+if __name__ == "__main__":
+    asyncio.run(test_mcp_endpoint())
--- a/litellm/test_remote_simple.py
+++ b/litellm/test_remote_simple.py
@@ -0,0 +1,28 @@
+import asyncio
+from openai import AsyncOpenAI
+
+async def main():
+    # Test remote LiteLLM server without MCP
+    client = AsyncOpenAI(
+        api_key="sk-0jdcGHZJpX2oUJmyEs7zVA", 
+        base_url="https://litellm.seekkey.tech"
+    )
+    
+    try:
+        # Test simple chat completion
+        response = await client.chat.completions.create(
+            model="gemini/gemini-2.5-flash",
+            messages=[
+                {"role": "user", "content": "Hello! Please respond with a simple greeting."}
+            ],
+            max_tokens=50
+        )
+        
+        print("✅ Remote LiteLLM server is working!")
+        print(f"Response: {response.choices[0].message.content}")
+        
+    except Exception as e:
+        print(f"❌ Error connecting to remote server: {e}")
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/litellm/testmcp.py
+++ b/litellm/testmcp.py
@@ -0,0 +1,72 @@
+import asyncio
+from openai import AsyncOpenAI
+from openai.types.chat import ChatCompletionUserMessageParam
+from mcp import ClientSession
+from mcp.client.sse import sse_client
+from litellm.experimental_mcp_client.tools import (
+    transform_mcp_tool_to_openai_tool,
+    transform_openai_tool_call_request_to_mcp_tool_call_request,
+)
+
+
+async def main():
+    # Initialize clients
+    
+    # point OpenAI client to local LiteLLM Proxy
+    client = AsyncOpenAI(api_key="sk-0jdcGHZJpX2oUJmyEs7zVA", base_url="https://litellm.seekkey.tech")
+
+    # Point MCP client to local LiteLLM Proxy with authentication
+    headers = {"Authorization": "Bearer sk-0jdcGHZJpX2oUJmyEs7zVA"}
+    async with sse_client("https://litellm.seekkey.tech/mcp/", headers=headers) as (read, write):
+        async with ClientSession(read, write) as session:
+            await session.initialize()
+
+            # 1. List MCP tools on LiteLLM Proxy
+            mcp_tools = await session.list_tools()
+            print("List of MCP tools for MCP server:", mcp_tools.tools)
+
+            # Create message
+            messages = [
+                ChatCompletionUserMessageParam(
+                    content="Send an email about LiteLLM supporting MCP", role="user"
+                )
+            ]
+
+            # 2. Use `transform_mcp_tool_to_openai_tool` to convert MCP tools to OpenAI tools
+            # Since OpenAI only supports tools in the OpenAI format, we need to convert the MCP tools to the OpenAI format.
+            openai_tools = [
+                transform_mcp_tool_to_openai_tool(tool) for tool in mcp_tools.tools
+            ]
+
+            # 3. Provide the MCP tools to `gpt-4o`
+            response = await client.chat.completions.create(
+                model="gemini/gemini-2.5-flash",
+                messages=messages,
+                tools=openai_tools,
+                tool_choice="auto",
+            )
+
+            # 4. Handle tool call from `gpt-4o`
+            if response.choices[0].message.tool_calls:
+                tool_call = response.choices[0].message.tool_calls[0]
+                if tool_call:
+
+                    # 5. Convert OpenAI tool call to MCP tool call
+                    # Since MCP servers expect tools in the MCP format, we need to convert the OpenAI tool call to the MCP format.
+                    # This is done using litellm.experimental_mcp_client.tools.transform_openai_tool_call_request_to_mcp_tool_call_request
+                    mcp_call = (
+                        transform_openai_tool_call_request_to_mcp_tool_call_request(
+                            openai_tool=tool_call.model_dump()
+                        )
+                    )
+
+                    # 6. Execute tool call on MCP server
+                    result = await session.call_tool(
+                        name=mcp_call.name, arguments=mcp_call.arguments
+                    )
+
+                    print("Result:", result)
+
+
+# Run it
+asyncio.run(main())
--- a/litellm/testmcp_debug.py
+++ b/litellm/testmcp_debug.py
@@ -0,0 +1,108 @@
+import asyncio
+import sys
+from openai import AsyncOpenAI
+from openai.types.chat import ChatCompletionUserMessageParam
+from mcp import ClientSession
+from mcp.client.sse import sse_client
+from litellm.experimental_mcp_client.tools import (
+    transform_mcp_tool_to_openai_tool,
+    transform_openai_tool_call_request_to_mcp_tool_call_request,
+)
+
+
+async def main():
+    print("开始测试MCP连接...")
+    
+    try:
+        # Initialize clients
+        print("初始化OpenAI客户端...")
+        client = AsyncOpenAI(api_key="sk-0jdcGHZJpX2oUJmyEs7zVA", base_url="https://litellm.seekkey.tech")
+        print("OpenAI客户端初始化完成")
+
+        # Point MCP client to remote LiteLLM Proxy with authentication
+        print("准备连接MCP服务器...")
+        headers = {"Authorization": "Bearer sk-0jdcGHZJpX2oUJmyEs7zVA"}
+        
+        # 添加超时处理
+        try:
+            async with asyncio.timeout(10):  # 10秒超时
+                print("正在建立SSE连接...")
+                async with sse_client("https://litellm.seekkey.tech/mcp/", headers=headers) as (read, write):
+                    print("SSE连接建立成功，初始化会话...")
+                    async with ClientSession(read, write) as session:
+                        print("正在初始化MCP会话...")
+                        await session.initialize()
+                        print("MCP会话初始化成功！")
+
+                        # 1. List MCP tools on LiteLLM Proxy
+                        print("获取MCP工具列表...")
+                        mcp_tools = await session.list_tools()
+                        print(f"找到 {len(mcp_tools.tools)} 个MCP工具:")
+                        for tool in mcp_tools.tools:
+                            print(f"  - {tool.name}: {tool.description}")
+
+                        if not mcp_tools.tools:
+                            print("没有找到可用的MCP工具")
+                            return
+
+                        # Create message
+                        messages = [
+                            ChatCompletionUserMessageParam(
+                                content="列出所有可用的数据库", role="user"
+                            )
+                        ]
+
+                        # 2. Convert MCP tools to OpenAI tools
+                        print("转换MCP工具为OpenAI格式...")
+                        openai_tools = [
+                            transform_mcp_tool_to_openai_tool(tool) for tool in mcp_tools.tools
+                        ]
+                        print(f"转换完成，共 {len(openai_tools)} 个工具")
+
+                        # 3. Call LLM with tools
+                        print("调用LLM...")
+                        response = await client.chat.completions.create(
+                            model="gemini/gemini-2.5-flash",
+                            messages=messages,
+                            tools=openai_tools,
+                            tool_choice="auto",
+                        )
+                        print("LLM响应完成")
+
+                        # 4. Handle tool call
+                        if response.choices[0].message.tool_calls:
+                            print("LLM请求调用工具...")
+                            tool_call = response.choices[0].message.tool_calls[0]
+                            print(f"工具调用: {tool_call.function.name}")
+                            print(f"参数: {tool_call.function.arguments}")
+
+                            # 5. Convert to MCP format and execute
+                            mcp_call = transform_openai_tool_call_request_to_mcp_tool_call_request(
+                                openai_tool=tool_call.model_dump()
+                            )
+
+                            print(f"执行MCP工具调用: {mcp_call.name}")
+                            result = await session.call_tool(
+                                name=mcp_call.name, arguments=mcp_call.arguments
+                            )
+
+                            print("工具调用结果:")
+                            print(result)
+                        else:
+                            print("LLM没有请求调用工具")
+                            print(f"LLM回复: {response.choices[0].message.content}")
+                            
+        except asyncio.TimeoutError:
+            print("连接超时！可能是网络问题或服务器响应慢")
+            return
+            
+    except Exception as e:
+        print(f"发生错误: {type(e).__name__}: {e}")
+        import traceback
+        traceback.print_exc()
+
+
+if __name__ == "__main__":
+    print("启动MCP调试测试...")
+    asyncio.run(main())
+    print("测试完成")
--- a/litellm/testmcp_local.py
+++ b/litellm/testmcp_local.py
@@ -0,0 +1,107 @@
+import asyncio
+import sys
+from openai import AsyncOpenAI
+from openai.types.chat import ChatCompletionUserMessageParam
+from mcp import ClientSession
+from mcp.client.sse import sse_client
+from litellm.experimental_mcp_client.tools import (
+    transform_mcp_tool_to_openai_tool,
+    transform_openai_tool_call_request_to_mcp_tool_call_request,
+)
+
+
+async def main():
+    print("开始测试本地MCP连接...")
+    
+    try:
+        # Initialize clients
+        print("初始化OpenAI客户端...")
+        client = AsyncOpenAI(api_key="sk-1234", base_url="http://localhost:4000")
+        print("OpenAI客户端初始化完成")
+
+        # Point MCP client to local LiteLLM Proxy
+        print("准备连接本地MCP服务器...")
+        
+        # 添加超时处理
+        try:
+            async with asyncio.timeout(10):  # 10秒超时
+                print("正在建立SSE连接...")
+                async with sse_client("http://localhost:4000/mcp/") as (read, write):
+                    print("SSE连接建立成功，初始化会话...")
+                    async with ClientSession(read, write) as session:
+                        print("正在初始化MCP会话...")
+                        await session.initialize()
+                        print("MCP会话初始化成功！")
+
+                        # 1. List MCP tools on LiteLLM Proxy
+                        print("获取MCP工具列表...")
+                        mcp_tools = await session.list_tools()
+                        print(f"找到 {len(mcp_tools.tools)} 个MCP工具:")
+                        for tool in mcp_tools.tools:
+                            print(f"  - {tool.name}: {tool.description}")
+
+                        if not mcp_tools.tools:
+                            print("没有找到可用的MCP工具")
+                            return
+
+                        # Create message
+                        messages = [
+                            ChatCompletionUserMessageParam(
+                                content="列出所有可用的数据库", role="user"
+                            )
+                        ]
+
+                        # 2. Convert MCP tools to OpenAI tools
+                        print("转换MCP工具为OpenAI格式...")
+                        openai_tools = [
+                            transform_mcp_tool_to_openai_tool(tool) for tool in mcp_tools.tools
+                        ]
+                        print(f"转换完成，共 {len(openai_tools)} 个工具")
+
+                        # 3. Call LLM with tools
+                        print("调用LLM...")
+                        response = await client.chat.completions.create(
+                            model="gemini/gemini-2.5-flash",
+                            messages=messages,
+                            tools=openai_tools,
+                            tool_choice="auto",
+                        )
+                        print("LLM响应完成")
+
+                        # 4. Handle tool call
+                        if response.choices[0].message.tool_calls:
+                            print("LLM请求调用工具...")
+                            tool_call = response.choices[0].message.tool_calls[0]
+                            print(f"工具调用: {tool_call.function.name}")
+                            print(f"参数: {tool_call.function.arguments}")
+
+                            # 5. Convert to MCP format and execute
+                            mcp_call = transform_openai_tool_call_request_to_mcp_tool_call_request(
+                                openai_tool=tool_call.model_dump()
+                            )
+
+                            print(f"执行MCP工具调用: {mcp_call.name}")
+                            result = await session.call_tool(
+                                name=mcp_call.name, arguments=mcp_call.arguments
+                            )
+
+                            print("工具调用结果:")
+                            print(result)
+                        else:
+                            print("LLM没有请求调用工具")
+                            print(f"LLM回复: {response.choices[0].message.content}")
+                            
+        except asyncio.TimeoutError:
+            print("连接超时！可能是本地服务器没有启动或MCP功能未配置")
+            return
+            
+    except Exception as e:
+        print(f"发生错误: {type(e).__name__}: {e}")
+        import traceback
+        traceback.print_exc()
+
+
+if __name__ == "__main__":
+    print("启动本地MCP调试测试...")
+    asyncio.run(main())
+    print("测试完成")