188 lines
		
	
	
		
			8.3 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			188 lines
		
	
	
		
			8.3 KiB
		
	
	
	
		
			Python
		
	
	
	
| #!/usr/bin/env python3
 | ||
| # -*- coding: utf-8 -*-
 | ||
| """
 | ||
| 测试 OpenRouter 免费模型的八仙辩论系统
 | ||
| """
 | ||
| 
 | ||
| import asyncio
 | ||
| import aiohttp
 | ||
| import os
 | ||
| 
 | ||
| # --- 被测试的模型列表 (之前认为可能不太适合的) ---
 | ||
| # 根据你之前的指示和 OpenRouter 网站信息,以下模型被标记为 'free'
 | ||
| # 但我们将测试它们的实际表现,特别是针对辩论任务。
 | ||
| # 注意: 'gpt-oss-20b' 名称可能不准确或已变更,我们使用一个常见的免费开源模型替代
 | ||
| # 'Uncensored' 因安全风险不测试
 | ||
| # 'Sarvam-M' 也进行测试
 | ||
| MODELS_TO_TEST = [
 | ||
|     # "openchat/openchat-7b", # An alternative free model if needed for comparison
 | ||
|     "google/gemma-2-9b-it", # Google's Gemma 2 9B, free on OpenRouter
 | ||
|     "microsoft/phi-3-mini-128k-instruct", # Microsoft's Phi-3 Mini, free on OpenRouter
 | ||
|     "qwen/qwen3-coder-8b-instruct", # Qwen3 Coder 8B, free on OpenRouter (good baseline)
 | ||
|     "deepseek/deepseek-chat", # DeepSeek Chat, free on OpenRouter (good baseline)
 | ||
|     "mistralai/mistral-7b-instruct", # Mistral 7B Instruct, free on OpenRouter (good baseline)
 | ||
|     # --- Previously considered less suitable ---
 | ||
|     "openai/gpt-3.5-turbo", # Often free tier on OpenRouter
 | ||
|     "sophosympatheia/midnight-rose-70b", # An uncensored model, free, but we test it cautiously
 | ||
|     "sarvamai/sarvam-2b-m", # Sarvam 2B M, free on OpenRouter
 | ||
| ]
 | ||
| 
 | ||
| class OpenRouterAgent:
 | ||
|     """使用 OpenRouter API 的代理"""
 | ||
|     
 | ||
|     def __init__(self, name: str, personality: str, api_key: str, model: str):
 | ||
|         self.name = name
 | ||
|         self.personality = personality
 | ||
|         self.api_key = api_key
 | ||
|         self.model = model
 | ||
|         self.api_url = "https://openrouter.ai/api/v1"
 | ||
|         
 | ||
|     async def generate_response(self, prompt: str, session: aiohttp.ClientSession) -> str:
 | ||
|         """生成AI回应"""
 | ||
|         try:
 | ||
|             headers = {
 | ||
|                 "Authorization": f"Bearer {self.api_key}",
 | ||
|                 "HTTP-Referer": "https://github.com/bennyschmidt/liurenchaxin", # Optional, for OpenRouter analytics
 | ||
|                 "X-Title": "BaXian Debate Test", # Optional, for OpenRouter analytics
 | ||
|                 "Content-Type": "application/json"
 | ||
|             }
 | ||
|             
 | ||
|             payload = {
 | ||
|                 "model": self.model,
 | ||
|                 "messages": [
 | ||
|                     {"role": "system", "content": f"你是{self.name},{self.personality}。请用中文回答。"},
 | ||
|                     {"role": "user", "content": prompt}
 | ||
|                 ],
 | ||
|                 # Adjust these for better output in a test scenario
 | ||
|                 "max_tokens": 500, # Reduced for quicker testing, but sufficient for short replies
 | ||
|                 "temperature": 0.7 # Slightly lower for more deterministic replies in test
 | ||
|             }
 | ||
|             
 | ||
|             async with session.post(
 | ||
|                 f"{self.api_url}/chat/completions",
 | ||
|                 headers=headers,
 | ||
|                 json=payload,
 | ||
|                 timeout=aiohttp.ClientTimeout(total=30)
 | ||
|             ) as response:
 | ||
|                 if response.status == 200:
 | ||
|                     result = await response.json()
 | ||
|                     content = result.get('choices', [{}])[0].get('message', {}).get('content', '')
 | ||
|                     if content:
 | ||
|                         return content.strip()
 | ||
|                     else:
 | ||
|                         error_msg = f"API returned no content for {self.name} using {self.model}. Full response: {result}"
 | ||
|                         print(f"❌ {error_msg}")
 | ||
|                         return f"[{self.name} 暂时无法回应]"
 | ||
|                 else:
 | ||
|                     error_text = await response.text()
 | ||
|                     error_msg = f"API error ({response.status}) for {self.name} using {self.model}: {error_text[:200]}..."
 | ||
|                     print(f"❌ {error_msg}")
 | ||
|                     return f"[{self.name} API错误: {response.status}]"
 | ||
|                     
 | ||
|         except Exception as e:
 | ||
|             error_msg = f"Exception for {self.name} using {self.model}: {e}"
 | ||
|             print(f"❌ {error_msg}")
 | ||
|             return f"[{self.name} 连接错误]"
 | ||
| 
 | ||
| class SimpleDebateTest:
 | ||
|     """简单的模型辩论测试"""
 | ||
|     
 | ||
|     def __init__(self, api_key: str):
 | ||
|         self.api_key = api_key
 | ||
|         self.topic = "工作量证明vs无限制爬虫:从李时珍采药到AI数据获取的激励机制变革"
 | ||
|         
 | ||
|         # Create a simple agent pair for quick testing
 | ||
|         self.agent1 = OpenRouterAgent(
 | ||
|             "吕洞宾", 
 | ||
|             "八仙之首,男性代表,理性务实,善于分析问题的本质和长远影响。你代表男性视角,注重逻辑和实用性。",
 | ||
|             api_key, ""
 | ||
|         )
 | ||
|         self.agent2 = OpenRouterAgent(
 | ||
|             "何仙姑", 
 | ||
|             "八仙中唯一的女性,温柔智慧,善于从情感和人文角度思考问题。你代表女性视角,注重关怀和和谐。",
 | ||
|             api_key, ""
 | ||
|         )
 | ||
| 
 | ||
|     async def test_model(self, model_name: str) -> dict:
 | ||
|         """测试单个模型"""
 | ||
|         print(f"\n--- Testing Model: {model_name} ---")
 | ||
|         
 | ||
|         # Assign model to agents
 | ||
|         self.agent1.model = model_name
 | ||
|         self.agent2.model = model_name
 | ||
|         
 | ||
|         results = {"model": model_name, "round1": "", "round2": "", "errors": []}
 | ||
|         
 | ||
|         async with aiohttp.ClientSession() as session:
 | ||
|             # Round 1: Agent 1 speaks
 | ||
|             prompt1 = f"针对'{self.topic}'这个话题,请从你的角度阐述观点。要求:1)明确表达立场 2)提供具体论据 3)字数控制在150字以内"
 | ||
|             print(f"\n🗣️  {self.agent1.name} 发言:")
 | ||
|             try:
 | ||
|                 reply1 = await self.agent1.generate_response(prompt1, session)
 | ||
|                 print(f"{reply1}\n")
 | ||
|                 results["round1"] = reply1
 | ||
|             except Exception as e:
 | ||
|                 error_msg = f"Round 1 Error: {e}"
 | ||
|                 print(f"❌ {error_msg}")
 | ||
|                 results["errors"].append(error_msg)
 | ||
|                 return results
 | ||
| 
 | ||
|             # Round 2: Agent 2 responds
 | ||
|             prompt2 = f"针对'{self.topic}'这个话题,{self.agent1.name}刚才说:'{reply1}'。请从你的角度回应并阐述不同观点。要求:1)回应对方观点 2)提出自己的立场 3)字数控制在150字以内"
 | ||
|             print(f"🗣️  {self.agent2.name} 回应:")
 | ||
|             try:
 | ||
|                 reply2 = await self.agent2.generate_response(prompt2, session)
 | ||
|                 print(f"{reply2}\n")
 | ||
|                 results["round2"] = reply2
 | ||
|             except Exception as e:
 | ||
|                 error_msg = f"Round 2 Error: {e}"
 | ||
|                 print(f"❌ {error_msg}")
 | ||
|                 results["errors"].append(error_msg)
 | ||
|                 
 | ||
|         return results
 | ||
| 
 | ||
| async def main():
 | ||
|     """主函数"""
 | ||
|     print("🚀 启动 OpenRouter 免费模型辩论测试...")
 | ||
|     
 | ||
|     # 1. 获取 OpenRouter API 密钥
 | ||
|     api_key = os.getenv('OPENROUTER_API_KEY')
 | ||
|     if not api_key:
 | ||
|         print("❌ 错误: 未找到 OPENROUTER_API_KEY 环境变量")
 | ||
|         print("请设置环境变量: export OPENROUTER_API_KEY=your_api_key")
 | ||
|         return
 | ||
| 
 | ||
|     tester = SimpleDebateTest(api_key)
 | ||
|     all_results = []
 | ||
| 
 | ||
|     # 2. 依次测试每个模型
 | ||
|     for model_name in MODELS_TO_TEST:
 | ||
|         try:
 | ||
|             result = await tester.test_model(model_name)
 | ||
|             all_results.append(result)
 | ||
|             # Brief pause between models
 | ||
|             await asyncio.sleep(2)
 | ||
|         except Exception as e:
 | ||
|             print(f"❌ 测试模型 {model_name} 时发生未预期错误: {e}")
 | ||
|             all_results.append({"model": model_name, "round1": "", "round2": "", "errors": [f"Unexpected test error: {e}"]})
 | ||
| 
 | ||
|     # 3. 输出测试总结
 | ||
|     print(f"\n\n--- 📊 测试总结 ---")
 | ||
|     for res in all_results:
 | ||
|         model = res['model']
 | ||
|         errors = res['errors']
 | ||
|         r1_ok = "✅" if res['round1'] and not any("无法回应" in res['round1'] or "错误" in res['round1'] for e in errors) else "❌"
 | ||
|         r2_ok = "✅" if res['round2'] and not any("无法回应" in res['round2'] or "错误" in res['round2'] for e in errors) else "❌"
 | ||
|         err_count = len(errors)
 | ||
|         
 | ||
|         print(f"🔹 {model:<35} | R1: {r1_ok} | R2: {r2_ok} | Errors: {err_count}")
 | ||
| 
 | ||
|     print("\n--- 📝 详细日志 ---")
 | ||
|     for res in all_results:
 | ||
|         if res['errors']:
 | ||
|             print(f"\n🔸 模型: {res['model']}")
 | ||
|             for err in res['errors']:
 | ||
|                 print(f"  - {err}")
 | ||
| 
 | ||
| if __name__ == "__main__":
 | ||
|     asyncio.run(main()) |