188 lines
8.3 KiB
Python
188 lines
8.3 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
测试 OpenRouter 免费模型的八仙辩论系统
|
||
"""
|
||
|
||
import asyncio
|
||
import aiohttp
|
||
import os
|
||
|
||
# --- 被测试的模型列表 (之前认为可能不太适合的) ---
|
||
# 根据你之前的指示和 OpenRouter 网站信息,以下模型被标记为 'free'
|
||
# 但我们将测试它们的实际表现,特别是针对辩论任务。
|
||
# 注意: 'gpt-oss-20b' 名称可能不准确或已变更,我们使用一个常见的免费开源模型替代
|
||
# 'Uncensored' 因安全风险不测试
|
||
# 'Sarvam-M' 也进行测试
|
||
MODELS_TO_TEST = [
|
||
# "openchat/openchat-7b", # An alternative free model if needed for comparison
|
||
"google/gemma-2-9b-it", # Google's Gemma 2 9B, free on OpenRouter
|
||
"microsoft/phi-3-mini-128k-instruct", # Microsoft's Phi-3 Mini, free on OpenRouter
|
||
"qwen/qwen3-coder-8b-instruct", # Qwen3 Coder 8B, free on OpenRouter (good baseline)
|
||
"deepseek/deepseek-chat", # DeepSeek Chat, free on OpenRouter (good baseline)
|
||
"mistralai/mistral-7b-instruct", # Mistral 7B Instruct, free on OpenRouter (good baseline)
|
||
# --- Previously considered less suitable ---
|
||
"openai/gpt-3.5-turbo", # Often free tier on OpenRouter
|
||
"sophosympatheia/midnight-rose-70b", # An uncensored model, free, but we test it cautiously
|
||
"sarvamai/sarvam-2b-m", # Sarvam 2B M, free on OpenRouter
|
||
]
|
||
|
||
class OpenRouterAgent:
|
||
"""使用 OpenRouter API 的代理"""
|
||
|
||
def __init__(self, name: str, personality: str, api_key: str, model: str):
|
||
self.name = name
|
||
self.personality = personality
|
||
self.api_key = api_key
|
||
self.model = model
|
||
self.api_url = "https://openrouter.ai/api/v1"
|
||
|
||
async def generate_response(self, prompt: str, session: aiohttp.ClientSession) -> str:
|
||
"""生成AI回应"""
|
||
try:
|
||
headers = {
|
||
"Authorization": f"Bearer {self.api_key}",
|
||
"HTTP-Referer": "https://github.com/bennyschmidt/liurenchaxin", # Optional, for OpenRouter analytics
|
||
"X-Title": "BaXian Debate Test", # Optional, for OpenRouter analytics
|
||
"Content-Type": "application/json"
|
||
}
|
||
|
||
payload = {
|
||
"model": self.model,
|
||
"messages": [
|
||
{"role": "system", "content": f"你是{self.name},{self.personality}。请用中文回答。"},
|
||
{"role": "user", "content": prompt}
|
||
],
|
||
# Adjust these for better output in a test scenario
|
||
"max_tokens": 500, # Reduced for quicker testing, but sufficient for short replies
|
||
"temperature": 0.7 # Slightly lower for more deterministic replies in test
|
||
}
|
||
|
||
async with session.post(
|
||
f"{self.api_url}/chat/completions",
|
||
headers=headers,
|
||
json=payload,
|
||
timeout=aiohttp.ClientTimeout(total=30)
|
||
) as response:
|
||
if response.status == 200:
|
||
result = await response.json()
|
||
content = result.get('choices', [{}])[0].get('message', {}).get('content', '')
|
||
if content:
|
||
return content.strip()
|
||
else:
|
||
error_msg = f"API returned no content for {self.name} using {self.model}. Full response: {result}"
|
||
print(f"❌ {error_msg}")
|
||
return f"[{self.name} 暂时无法回应]"
|
||
else:
|
||
error_text = await response.text()
|
||
error_msg = f"API error ({response.status}) for {self.name} using {self.model}: {error_text[:200]}..."
|
||
print(f"❌ {error_msg}")
|
||
return f"[{self.name} API错误: {response.status}]"
|
||
|
||
except Exception as e:
|
||
error_msg = f"Exception for {self.name} using {self.model}: {e}"
|
||
print(f"❌ {error_msg}")
|
||
return f"[{self.name} 连接错误]"
|
||
|
||
class SimpleDebateTest:
|
||
"""简单的模型辩论测试"""
|
||
|
||
def __init__(self, api_key: str):
|
||
self.api_key = api_key
|
||
self.topic = "工作量证明vs无限制爬虫:从李时珍采药到AI数据获取的激励机制变革"
|
||
|
||
# Create a simple agent pair for quick testing
|
||
self.agent1 = OpenRouterAgent(
|
||
"吕洞宾",
|
||
"八仙之首,男性代表,理性务实,善于分析问题的本质和长远影响。你代表男性视角,注重逻辑和实用性。",
|
||
api_key, ""
|
||
)
|
||
self.agent2 = OpenRouterAgent(
|
||
"何仙姑",
|
||
"八仙中唯一的女性,温柔智慧,善于从情感和人文角度思考问题。你代表女性视角,注重关怀和和谐。",
|
||
api_key, ""
|
||
)
|
||
|
||
async def test_model(self, model_name: str) -> dict:
|
||
"""测试单个模型"""
|
||
print(f"\n--- Testing Model: {model_name} ---")
|
||
|
||
# Assign model to agents
|
||
self.agent1.model = model_name
|
||
self.agent2.model = model_name
|
||
|
||
results = {"model": model_name, "round1": "", "round2": "", "errors": []}
|
||
|
||
async with aiohttp.ClientSession() as session:
|
||
# Round 1: Agent 1 speaks
|
||
prompt1 = f"针对'{self.topic}'这个话题,请从你的角度阐述观点。要求:1)明确表达立场 2)提供具体论据 3)字数控制在150字以内"
|
||
print(f"\n🗣️ {self.agent1.name} 发言:")
|
||
try:
|
||
reply1 = await self.agent1.generate_response(prompt1, session)
|
||
print(f"{reply1}\n")
|
||
results["round1"] = reply1
|
||
except Exception as e:
|
||
error_msg = f"Round 1 Error: {e}"
|
||
print(f"❌ {error_msg}")
|
||
results["errors"].append(error_msg)
|
||
return results
|
||
|
||
# Round 2: Agent 2 responds
|
||
prompt2 = f"针对'{self.topic}'这个话题,{self.agent1.name}刚才说:'{reply1}'。请从你的角度回应并阐述不同观点。要求:1)回应对方观点 2)提出自己的立场 3)字数控制在150字以内"
|
||
print(f"🗣️ {self.agent2.name} 回应:")
|
||
try:
|
||
reply2 = await self.agent2.generate_response(prompt2, session)
|
||
print(f"{reply2}\n")
|
||
results["round2"] = reply2
|
||
except Exception as e:
|
||
error_msg = f"Round 2 Error: {e}"
|
||
print(f"❌ {error_msg}")
|
||
results["errors"].append(error_msg)
|
||
|
||
return results
|
||
|
||
async def main():
|
||
"""主函数"""
|
||
print("🚀 启动 OpenRouter 免费模型辩论测试...")
|
||
|
||
# 1. 获取 OpenRouter API 密钥
|
||
api_key = os.getenv('OPENROUTER_API_KEY')
|
||
if not api_key:
|
||
print("❌ 错误: 未找到 OPENROUTER_API_KEY 环境变量")
|
||
print("请设置环境变量: export OPENROUTER_API_KEY=your_api_key")
|
||
return
|
||
|
||
tester = SimpleDebateTest(api_key)
|
||
all_results = []
|
||
|
||
# 2. 依次测试每个模型
|
||
for model_name in MODELS_TO_TEST:
|
||
try:
|
||
result = await tester.test_model(model_name)
|
||
all_results.append(result)
|
||
# Brief pause between models
|
||
await asyncio.sleep(2)
|
||
except Exception as e:
|
||
print(f"❌ 测试模型 {model_name} 时发生未预期错误: {e}")
|
||
all_results.append({"model": model_name, "round1": "", "round2": "", "errors": [f"Unexpected test error: {e}"]})
|
||
|
||
# 3. 输出测试总结
|
||
print(f"\n\n--- 📊 测试总结 ---")
|
||
for res in all_results:
|
||
model = res['model']
|
||
errors = res['errors']
|
||
r1_ok = "✅" if res['round1'] and not any("无法回应" in res['round1'] or "错误" in res['round1'] for e in errors) else "❌"
|
||
r2_ok = "✅" if res['round2'] and not any("无法回应" in res['round2'] or "错误" in res['round2'] for e in errors) else "❌"
|
||
err_count = len(errors)
|
||
|
||
print(f"🔹 {model:<35} | R1: {r1_ok} | R2: {r2_ok} | Errors: {err_count}")
|
||
|
||
print("\n--- 📝 详细日志 ---")
|
||
for res in all_results:
|
||
if res['errors']:
|
||
print(f"\n🔸 模型: {res['model']}")
|
||
for err in res['errors']:
|
||
print(f" - {err}")
|
||
|
||
if __name__ == "__main__":
|
||
asyncio.run(main()) |