liurenchaxin/tests/test_openrouter_models.py

188 lines
8.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
测试 OpenRouter 免费模型的八仙辩论系统
"""
import asyncio
import aiohttp
import os
# --- 被测试的模型列表 (之前认为可能不太适合的) ---
# 根据你之前的指示和 OpenRouter 网站信息,以下模型被标记为 'free'
# 但我们将测试它们的实际表现,特别是针对辩论任务。
# 注意: 'gpt-oss-20b' 名称可能不准确或已变更,我们使用一个常见的免费开源模型替代
# 'Uncensored' 因安全风险不测试
# 'Sarvam-M' 也进行测试
MODELS_TO_TEST = [
# "openchat/openchat-7b", # An alternative free model if needed for comparison
"google/gemma-2-9b-it", # Google's Gemma 2 9B, free on OpenRouter
"microsoft/phi-3-mini-128k-instruct", # Microsoft's Phi-3 Mini, free on OpenRouter
"qwen/qwen3-coder-8b-instruct", # Qwen3 Coder 8B, free on OpenRouter (good baseline)
"deepseek/deepseek-chat", # DeepSeek Chat, free on OpenRouter (good baseline)
"mistralai/mistral-7b-instruct", # Mistral 7B Instruct, free on OpenRouter (good baseline)
# --- Previously considered less suitable ---
"openai/gpt-3.5-turbo", # Often free tier on OpenRouter
"sophosympatheia/midnight-rose-70b", # An uncensored model, free, but we test it cautiously
"sarvamai/sarvam-2b-m", # Sarvam 2B M, free on OpenRouter
]
class OpenRouterAgent:
"""使用 OpenRouter API 的代理"""
def __init__(self, name: str, personality: str, api_key: str, model: str):
self.name = name
self.personality = personality
self.api_key = api_key
self.model = model
self.api_url = "https://openrouter.ai/api/v1"
async def generate_response(self, prompt: str, session: aiohttp.ClientSession) -> str:
"""生成AI回应"""
try:
headers = {
"Authorization": f"Bearer {self.api_key}",
"HTTP-Referer": "https://github.com/bennyschmidt/liurenchaxin", # Optional, for OpenRouter analytics
"X-Title": "BaXian Debate Test", # Optional, for OpenRouter analytics
"Content-Type": "application/json"
}
payload = {
"model": self.model,
"messages": [
{"role": "system", "content": f"你是{self.name}{self.personality}。请用中文回答。"},
{"role": "user", "content": prompt}
],
# Adjust these for better output in a test scenario
"max_tokens": 500, # Reduced for quicker testing, but sufficient for short replies
"temperature": 0.7 # Slightly lower for more deterministic replies in test
}
async with session.post(
f"{self.api_url}/chat/completions",
headers=headers,
json=payload,
timeout=aiohttp.ClientTimeout(total=30)
) as response:
if response.status == 200:
result = await response.json()
content = result.get('choices', [{}])[0].get('message', {}).get('content', '')
if content:
return content.strip()
else:
error_msg = f"API returned no content for {self.name} using {self.model}. Full response: {result}"
print(f"{error_msg}")
return f"[{self.name} 暂时无法回应]"
else:
error_text = await response.text()
error_msg = f"API error ({response.status}) for {self.name} using {self.model}: {error_text[:200]}..."
print(f"{error_msg}")
return f"[{self.name} API错误: {response.status}]"
except Exception as e:
error_msg = f"Exception for {self.name} using {self.model}: {e}"
print(f"{error_msg}")
return f"[{self.name} 连接错误]"
class SimpleDebateTest:
"""简单的模型辩论测试"""
def __init__(self, api_key: str):
self.api_key = api_key
self.topic = "工作量证明vs无限制爬虫从李时珍采药到AI数据获取的激励机制变革"
# Create a simple agent pair for quick testing
self.agent1 = OpenRouterAgent(
"吕洞宾",
"八仙之首,男性代表,理性务实,善于分析问题的本质和长远影响。你代表男性视角,注重逻辑和实用性。",
api_key, ""
)
self.agent2 = OpenRouterAgent(
"何仙姑",
"八仙中唯一的女性,温柔智慧,善于从情感和人文角度思考问题。你代表女性视角,注重关怀和和谐。",
api_key, ""
)
async def test_model(self, model_name: str) -> dict:
"""测试单个模型"""
print(f"\n--- Testing Model: {model_name} ---")
# Assign model to agents
self.agent1.model = model_name
self.agent2.model = model_name
results = {"model": model_name, "round1": "", "round2": "", "errors": []}
async with aiohttp.ClientSession() as session:
# Round 1: Agent 1 speaks
prompt1 = f"针对'{self.topic}'这个话题请从你的角度阐述观点。要求1)明确表达立场 2)提供具体论据 3)字数控制在150字以内"
print(f"\n🗣️ {self.agent1.name} 发言:")
try:
reply1 = await self.agent1.generate_response(prompt1, session)
print(f"{reply1}\n")
results["round1"] = reply1
except Exception as e:
error_msg = f"Round 1 Error: {e}"
print(f"{error_msg}")
results["errors"].append(error_msg)
return results
# Round 2: Agent 2 responds
prompt2 = f"针对'{self.topic}'这个话题,{self.agent1.name}刚才说:'{reply1}'。请从你的角度回应并阐述不同观点。要求1)回应对方观点 2)提出自己的立场 3)字数控制在150字以内"
print(f"🗣️ {self.agent2.name} 回应:")
try:
reply2 = await self.agent2.generate_response(prompt2, session)
print(f"{reply2}\n")
results["round2"] = reply2
except Exception as e:
error_msg = f"Round 2 Error: {e}"
print(f"{error_msg}")
results["errors"].append(error_msg)
return results
async def main():
"""主函数"""
print("🚀 启动 OpenRouter 免费模型辩论测试...")
# 1. 获取 OpenRouter API 密钥
api_key = os.getenv('OPENROUTER_API_KEY')
if not api_key:
print("❌ 错误: 未找到 OPENROUTER_API_KEY 环境变量")
print("请设置环境变量: export OPENROUTER_API_KEY=your_api_key")
return
tester = SimpleDebateTest(api_key)
all_results = []
# 2. 依次测试每个模型
for model_name in MODELS_TO_TEST:
try:
result = await tester.test_model(model_name)
all_results.append(result)
# Brief pause between models
await asyncio.sleep(2)
except Exception as e:
print(f"❌ 测试模型 {model_name} 时发生未预期错误: {e}")
all_results.append({"model": model_name, "round1": "", "round2": "", "errors": [f"Unexpected test error: {e}"]})
# 3. 输出测试总结
print(f"\n\n--- 📊 测试总结 ---")
for res in all_results:
model = res['model']
errors = res['errors']
r1_ok = "" if res['round1'] and not any("无法回应" in res['round1'] or "错误" in res['round1'] for e in errors) else ""
r2_ok = "" if res['round2'] and not any("无法回应" in res['round2'] or "错误" in res['round2'] for e in errors) else ""
err_count = len(errors)
print(f"🔹 {model:<35} | R1: {r1_ok} | R2: {r2_ok} | Errors: {err_count}")
print("\n--- 📝 详细日志 ---")
for res in all_results:
if res['errors']:
print(f"\n🔸 模型: {res['model']}")
for err in res['errors']:
print(f" - {err}")
if __name__ == "__main__":
asyncio.run(main())