#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 测试 OpenRouter 免费模型的八仙辩论系统 """ import asyncio import aiohttp import os # --- 被测试的模型列表 (之前认为可能不太适合的) --- # 根据你之前的指示和 OpenRouter 网站信息,以下模型被标记为 'free' # 但我们将测试它们的实际表现,特别是针对辩论任务。 # 注意: 'gpt-oss-20b' 名称可能不准确或已变更,我们使用一个常见的免费开源模型替代 # 'Uncensored' 因安全风险不测试 # 'Sarvam-M' 也进行测试 MODELS_TO_TEST = [ # "openchat/openchat-7b", # An alternative free model if needed for comparison "google/gemma-2-9b-it", # Google's Gemma 2 9B, free on OpenRouter "microsoft/phi-3-mini-128k-instruct", # Microsoft's Phi-3 Mini, free on OpenRouter "qwen/qwen3-coder-8b-instruct", # Qwen3 Coder 8B, free on OpenRouter (good baseline) "deepseek/deepseek-chat", # DeepSeek Chat, free on OpenRouter (good baseline) "mistralai/mistral-7b-instruct", # Mistral 7B Instruct, free on OpenRouter (good baseline) # --- Previously considered less suitable --- "openai/gpt-3.5-turbo", # Often free tier on OpenRouter "sophosympatheia/midnight-rose-70b", # An uncensored model, free, but we test it cautiously "sarvamai/sarvam-2b-m", # Sarvam 2B M, free on OpenRouter ] class OpenRouterAgent: """使用 OpenRouter API 的代理""" def __init__(self, name: str, personality: str, api_key: str, model: str): self.name = name self.personality = personality self.api_key = api_key self.model = model self.api_url = "https://openrouter.ai/api/v1" async def generate_response(self, prompt: str, session: aiohttp.ClientSession) -> str: """生成AI回应""" try: headers = { "Authorization": f"Bearer {self.api_key}", "HTTP-Referer": "https://github.com/bennyschmidt/liurenchaxin", # Optional, for OpenRouter analytics "X-Title": "BaXian Debate Test", # Optional, for OpenRouter analytics "Content-Type": "application/json" } payload = { "model": self.model, "messages": [ {"role": "system", "content": f"你是{self.name},{self.personality}。请用中文回答。"}, {"role": "user", "content": prompt} ], # Adjust these for better output in a test scenario "max_tokens": 500, # Reduced for quicker testing, but sufficient for short replies "temperature": 0.7 # Slightly lower for more deterministic replies in test } async with session.post( f"{self.api_url}/chat/completions", headers=headers, json=payload, timeout=aiohttp.ClientTimeout(total=30) ) as response: if response.status == 200: result = await response.json() content = result.get('choices', [{}])[0].get('message', {}).get('content', '') if content: return content.strip() else: error_msg = f"API returned no content for {self.name} using {self.model}. Full response: {result}" print(f"❌ {error_msg}") return f"[{self.name} 暂时无法回应]" else: error_text = await response.text() error_msg = f"API error ({response.status}) for {self.name} using {self.model}: {error_text[:200]}..." print(f"❌ {error_msg}") return f"[{self.name} API错误: {response.status}]" except Exception as e: error_msg = f"Exception for {self.name} using {self.model}: {e}" print(f"❌ {error_msg}") return f"[{self.name} 连接错误]" class SimpleDebateTest: """简单的模型辩论测试""" def __init__(self, api_key: str): self.api_key = api_key self.topic = "工作量证明vs无限制爬虫:从李时珍采药到AI数据获取的激励机制变革" # Create a simple agent pair for quick testing self.agent1 = OpenRouterAgent( "吕洞宾", "八仙之首,男性代表,理性务实,善于分析问题的本质和长远影响。你代表男性视角,注重逻辑和实用性。", api_key, "" ) self.agent2 = OpenRouterAgent( "何仙姑", "八仙中唯一的女性,温柔智慧,善于从情感和人文角度思考问题。你代表女性视角,注重关怀和和谐。", api_key, "" ) async def test_model(self, model_name: str) -> dict: """测试单个模型""" print(f"\n--- Testing Model: {model_name} ---") # Assign model to agents self.agent1.model = model_name self.agent2.model = model_name results = {"model": model_name, "round1": "", "round2": "", "errors": []} async with aiohttp.ClientSession() as session: # Round 1: Agent 1 speaks prompt1 = f"针对'{self.topic}'这个话题,请从你的角度阐述观点。要求:1)明确表达立场 2)提供具体论据 3)字数控制在150字以内" print(f"\n🗣️ {self.agent1.name} 发言:") try: reply1 = await self.agent1.generate_response(prompt1, session) print(f"{reply1}\n") results["round1"] = reply1 except Exception as e: error_msg = f"Round 1 Error: {e}" print(f"❌ {error_msg}") results["errors"].append(error_msg) return results # Round 2: Agent 2 responds prompt2 = f"针对'{self.topic}'这个话题,{self.agent1.name}刚才说:'{reply1}'。请从你的角度回应并阐述不同观点。要求:1)回应对方观点 2)提出自己的立场 3)字数控制在150字以内" print(f"🗣️ {self.agent2.name} 回应:") try: reply2 = await self.agent2.generate_response(prompt2, session) print(f"{reply2}\n") results["round2"] = reply2 except Exception as e: error_msg = f"Round 2 Error: {e}" print(f"❌ {error_msg}") results["errors"].append(error_msg) return results async def main(): """主函数""" print("🚀 启动 OpenRouter 免费模型辩论测试...") # 1. 获取 OpenRouter API 密钥 api_key = os.getenv('OPENROUTER_API_KEY') if not api_key: print("❌ 错误: 未找到 OPENROUTER_API_KEY 环境变量") print("请设置环境变量: export OPENROUTER_API_KEY=your_api_key") return tester = SimpleDebateTest(api_key) all_results = [] # 2. 依次测试每个模型 for model_name in MODELS_TO_TEST: try: result = await tester.test_model(model_name) all_results.append(result) # Brief pause between models await asyncio.sleep(2) except Exception as e: print(f"❌ 测试模型 {model_name} 时发生未预期错误: {e}") all_results.append({"model": model_name, "round1": "", "round2": "", "errors": [f"Unexpected test error: {e}"]}) # 3. 输出测试总结 print(f"\n\n--- 📊 测试总结 ---") for res in all_results: model = res['model'] errors = res['errors'] r1_ok = "✅" if res['round1'] and not any("无法回应" in res['round1'] or "错误" in res['round1'] for e in errors) else "❌" r2_ok = "✅" if res['round2'] and not any("无法回应" in res['round2'] or "错误" in res['round2'] for e in errors) else "❌" err_count = len(errors) print(f"🔹 {model:<35} | R1: {r1_ok} | R2: {r2_ok} | Errors: {err_count}") print("\n--- 📝 详细日志 ---") for res in all_results: if res['errors']: print(f"\n🔸 模型: {res['model']}") for err in res['errors']: print(f" - {err}") if __name__ == "__main__": asyncio.run(main())