505 lines
21 KiB
Python
505 lines
21 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
Human干预系统测试脚本
|
||
"""
|
||
|
||
import asyncio
|
||
import json
|
||
import time
|
||
from datetime import datetime, timedelta
|
||
from src.jixia.intervention.human_intervention_system import (
|
||
DebateHealthMonitor, HealthStatus, InterventionLevel, AlertType
|
||
)
|
||
|
||
class TestHumanInterventionSystem:
|
||
"""Human干预系统测试类"""
|
||
|
||
def __init__(self):
|
||
self.monitor = DebateHealthMonitor()
|
||
self.test_results = []
|
||
|
||
# 添加事件处理器用于测试
|
||
self.monitor.add_event_handler("alert_created", self._handle_alert_created)
|
||
self.monitor.add_event_handler("intervention_executed", self._handle_intervention_executed)
|
||
self.monitor.add_event_handler("human_notification", self._handle_human_notification)
|
||
|
||
self.received_alerts = []
|
||
self.received_interventions = []
|
||
self.received_notifications = []
|
||
|
||
async def _handle_alert_created(self, alert):
|
||
"""处理警报创建事件"""
|
||
self.received_alerts.append(alert)
|
||
print(f"🚨 收到警报: {alert.alert_type.value} - {alert.message}")
|
||
|
||
async def _handle_intervention_executed(self, action):
|
||
"""处理干预执行事件"""
|
||
self.received_interventions.append(action)
|
||
print(f"🛠️ 执行干预: {action.action_type} - {action.description}")
|
||
|
||
async def _handle_human_notification(self, notification):
|
||
"""处理Human通知事件"""
|
||
self.received_notifications.append(notification)
|
||
print(f"👤 Human通知: {notification['message']}")
|
||
|
||
async def test_basic_health_monitoring(self):
|
||
"""测试基本健康监控功能"""
|
||
print("\n🧪 测试基本健康监控功能...")
|
||
|
||
# 正常辩论数据
|
||
normal_debate_data = {
|
||
"recent_messages": [
|
||
{"sender": "正1", "content": "我认为人工智能投资具有巨大潜力,因为技术发展迅速,市场需求不断增长。首先,AI技术在各行各业都有广泛应用前景。"},
|
||
{"sender": "反1", "content": "虽然AI投资有潜力,但我们也要考虑风险。技术泡沫、监管不确定性等因素都可能影响投资回报。"},
|
||
{"sender": "正2", "content": "反方提到的风险确实存在,但是通过合理的投资策略和风险管理,我们可以最大化收益同时控制风险。"},
|
||
{"sender": "反2", "content": "正方的观点有道理,不过我想补充一点:投资时机也很重要,现在可能不是最佳入场时机。"}
|
||
],
|
||
"topic_keywords": ["人工智能", "AI", "投资", "风险", "收益", "技术", "市场"],
|
||
"system_status": {
|
||
"error_rate": 0.01,
|
||
"avg_response_time": 1.2,
|
||
"system_load": 0.5
|
||
}
|
||
}
|
||
|
||
score, status = await self.monitor.analyze_debate_health(normal_debate_data)
|
||
|
||
success = score >= 70 and status in [HealthStatus.EXCELLENT, HealthStatus.GOOD]
|
||
self.test_results.append(("基本健康监控", success, f"得分: {score:.1f}, 状态: {status.value}"))
|
||
|
||
print(f"✅ 正常辩论健康度: {score:.1f}分 ({status.value})")
|
||
return success
|
||
|
||
async def test_quality_decline_detection(self):
|
||
"""测试质量下降检测"""
|
||
print("\n🧪 测试质量下降检测...")
|
||
|
||
# 低质量辩论数据
|
||
low_quality_data = {
|
||
"recent_messages": [
|
||
{"sender": "正1", "content": "好"},
|
||
{"sender": "反1", "content": "不好"},
|
||
{"sender": "正2", "content": "是的"},
|
||
{"sender": "反2", "content": "不是"},
|
||
{"sender": "正1", "content": "对"},
|
||
{"sender": "反1", "content": "错"},
|
||
],
|
||
"topic_keywords": ["人工智能", "AI", "投资"],
|
||
"system_status": {
|
||
"error_rate": 0.01,
|
||
"avg_response_time": 1.0,
|
||
"system_load": 0.4
|
||
}
|
||
}
|
||
|
||
initial_alert_count = len(self.received_alerts)
|
||
score, status = await self.monitor.analyze_debate_health(low_quality_data)
|
||
|
||
# 检查是否触发了质量相关警报
|
||
quality_alerts = [alert for alert in self.received_alerts[initial_alert_count:]
|
||
if alert.alert_type == AlertType.QUALITY_DECLINE]
|
||
|
||
success = len(quality_alerts) > 0 and score < 50
|
||
self.test_results.append(("质量下降检测", success, f"得分: {score:.1f}, 警报数: {len(quality_alerts)}"))
|
||
|
||
print(f"✅ 低质量辩论检测: {score:.1f}分, 触发警报: {len(quality_alerts)}个")
|
||
return success
|
||
|
||
async def test_toxic_behavior_detection(self):
|
||
"""测试有害行为检测"""
|
||
print("\n🧪 测试有害行为检测...")
|
||
|
||
# 包含有害行为的数据
|
||
toxic_data = {
|
||
"recent_messages": [
|
||
{"sender": "正1", "content": "我认为这个观点是正确的,有充分的理由支持。"},
|
||
{"sender": "反1", "content": "你这个观点太愚蠢了!完全没有逻辑!"},
|
||
{"sender": "正2", "content": "请保持理性讨论,不要进行人身攻击。"},
|
||
{"sender": "反2", "content": "闭嘴!你们这些白痴根本不懂!"},
|
||
{"sender": "正1", "content": "让我们回到正题,理性分析这个问题。"}
|
||
],
|
||
"topic_keywords": ["观点", "逻辑", "分析"],
|
||
"system_status": {
|
||
"error_rate": 0.02,
|
||
"avg_response_time": 1.5,
|
||
"system_load": 0.6
|
||
}
|
||
}
|
||
|
||
initial_alert_count = len(self.received_alerts)
|
||
score, status = await self.monitor.analyze_debate_health(toxic_data)
|
||
|
||
# 检查是否触发了有害行为警报
|
||
toxic_alerts = [alert for alert in self.received_alerts[initial_alert_count:]
|
||
if alert.alert_type == AlertType.TOXIC_BEHAVIOR]
|
||
|
||
success = len(toxic_alerts) > 0
|
||
self.test_results.append(("有害行为检测", success, f"警报数: {len(toxic_alerts)}, 文明度分数: {self.monitor.health_metrics['interaction_civility'].value:.1f}"))
|
||
|
||
print(f"✅ 有害行为检测: 触发警报: {len(toxic_alerts)}个")
|
||
return success
|
||
|
||
async def test_emotional_escalation_detection(self):
|
||
"""测试情绪升级检测"""
|
||
print("\n🧪 测试情绪升级检测...")
|
||
|
||
# 情绪激动的数据
|
||
emotional_data = {
|
||
"recent_messages": [
|
||
{"sender": "正1", "content": "我强烈反对这个观点!!!"},
|
||
{"sender": "反1", "content": "你们完全错了!!!这太愤怒了!!!"},
|
||
{"sender": "正2", "content": "我非常生气!!!这个讨论让我很讨厌!!!"},
|
||
{"sender": "反2", "content": "大家都冷静一下!!!不要这么激动!!!"}
|
||
],
|
||
"topic_keywords": ["观点", "讨论"],
|
||
"system_status": {
|
||
"error_rate": 0.01,
|
||
"avg_response_time": 1.0,
|
||
"system_load": 0.5
|
||
}
|
||
}
|
||
|
||
initial_alert_count = len(self.received_alerts)
|
||
score, status = await self.monitor.analyze_debate_health(emotional_data)
|
||
|
||
# 检查是否触发了情绪升级警报
|
||
emotion_alerts = [alert for alert in self.received_alerts[initial_alert_count:]
|
||
if alert.alert_type == AlertType.EMOTIONAL_ESCALATION]
|
||
|
||
success = len(emotion_alerts) > 0
|
||
self.test_results.append(("情绪升级检测", success, f"警报数: {len(emotion_alerts)}, 情绪稳定性: {self.monitor.health_metrics['emotional_stability'].value:.1f}"))
|
||
|
||
print(f"✅ 情绪升级检测: 触发警报: {len(emotion_alerts)}个")
|
||
return success
|
||
|
||
async def test_participation_imbalance_detection(self):
|
||
"""测试参与不平衡检测"""
|
||
print("\n🧪 测试参与不平衡检测...")
|
||
|
||
# 参与不平衡的数据
|
||
imbalanced_data = {
|
||
"recent_messages": [
|
||
{"sender": "正1", "content": "我有很多观点要分享..."},
|
||
{"sender": "正1", "content": "首先,我认为..."},
|
||
{"sender": "正1", "content": "其次,我们应该..."},
|
||
{"sender": "正1", "content": "最后,我建议..."},
|
||
{"sender": "正1", "content": "总结一下..."},
|
||
{"sender": "正1", "content": "补充一点..."},
|
||
{"sender": "正1", "content": "再说一遍..."},
|
||
{"sender": "反1", "content": "好的"}
|
||
],
|
||
"topic_keywords": ["观点", "建议"],
|
||
"system_status": {
|
||
"error_rate": 0.01,
|
||
"avg_response_time": 1.0,
|
||
"system_load": 0.5
|
||
}
|
||
}
|
||
|
||
initial_alert_count = len(self.received_alerts)
|
||
score, status = await self.monitor.analyze_debate_health(imbalanced_data)
|
||
|
||
# 检查是否触发了参与不平衡警报
|
||
balance_alerts = [alert for alert in self.received_alerts[initial_alert_count:]
|
||
if alert.alert_type == AlertType.PARTICIPATION_IMBALANCE]
|
||
|
||
success = len(balance_alerts) > 0
|
||
self.test_results.append(("参与不平衡检测", success, f"警报数: {len(balance_alerts)}, 平衡度: {self.monitor.health_metrics['participation_balance'].value:.1f}"))
|
||
|
||
print(f"✅ 参与不平衡检测: 触发警报: {len(balance_alerts)}个")
|
||
return success
|
||
|
||
async def test_auto_intervention(self):
|
||
"""测试自动干预功能"""
|
||
print("\n🧪 测试自动干预功能...")
|
||
|
||
# 触发多种问题的数据
|
||
problematic_data = {
|
||
"recent_messages": [
|
||
{"sender": "正1", "content": "你们都是白痴!!!"},
|
||
{"sender": "反1", "content": "愚蠢!!!"},
|
||
{"sender": "正2", "content": "垃圾观点!!!"},
|
||
{"sender": "反2", "content": "讨厌!!!"}
|
||
],
|
||
"topic_keywords": ["观点"],
|
||
"system_status": {
|
||
"error_rate": 0.05,
|
||
"avg_response_time": 3.0,
|
||
"system_load": 0.9
|
||
}
|
||
}
|
||
|
||
initial_intervention_count = len(self.received_interventions)
|
||
score, status = await self.monitor.analyze_debate_health(problematic_data)
|
||
|
||
# 检查是否执行了自动干预
|
||
new_interventions = self.received_interventions[initial_intervention_count:]
|
||
|
||
success = len(new_interventions) > 0
|
||
self.test_results.append(("自动干预功能", success, f"执行干预: {len(new_interventions)}次"))
|
||
|
||
print(f"✅ 自动干预: 执行了 {len(new_interventions)} 次干预")
|
||
for intervention in new_interventions:
|
||
print(f" - {intervention.action_type}: {intervention.description}")
|
||
|
||
return success
|
||
|
||
async def test_human_notification(self):
|
||
"""测试Human通知功能"""
|
||
print("\n🧪 测试Human通知功能...")
|
||
|
||
# 设置较低的通知阈值以便测试
|
||
original_threshold = self.monitor.monitoring_config["human_notification_threshold"]
|
||
self.monitor.monitoring_config["human_notification_threshold"] = InterventionLevel.MODERATE_GUIDANCE
|
||
|
||
# 严重问题数据
|
||
critical_data = {
|
||
"recent_messages": [
|
||
{"sender": "正1", "content": "你"},
|
||
{"sender": "反1", "content": "我"},
|
||
{"sender": "正2", "content": "他"},
|
||
{"sender": "反2", "content": "她"}
|
||
],
|
||
"topic_keywords": ["重要话题"],
|
||
"system_status": {
|
||
"error_rate": 0.1,
|
||
"avg_response_time": 5.0,
|
||
"system_load": 0.95
|
||
}
|
||
}
|
||
|
||
initial_notification_count = len(self.received_notifications)
|
||
score, status = await self.monitor.analyze_debate_health(critical_data)
|
||
|
||
# 恢复原始阈值
|
||
self.monitor.monitoring_config["human_notification_threshold"] = original_threshold
|
||
|
||
# 检查是否发送了Human通知
|
||
new_notifications = self.received_notifications[initial_notification_count:]
|
||
|
||
success = len(new_notifications) > 0
|
||
self.test_results.append(("Human通知功能", success, f"发送通知: {len(new_notifications)}次"))
|
||
|
||
print(f"✅ Human通知: 发送了 {len(new_notifications)} 次通知")
|
||
return success
|
||
|
||
async def test_health_report_generation(self):
|
||
"""测试健康报告生成"""
|
||
print("\n🧪 测试健康报告生成...")
|
||
|
||
report = self.monitor.get_health_report()
|
||
|
||
required_fields = ["overall_score", "health_status", "metrics", "active_alerts",
|
||
"recent_interventions", "monitoring_enabled", "last_check"]
|
||
|
||
success = all(field in report for field in required_fields)
|
||
success = success and len(report["metrics"]) == 6 # 6个健康指标
|
||
|
||
self.test_results.append(("健康报告生成", success, f"包含字段: {len(report)}个"))
|
||
|
||
print(f"✅ 健康报告生成: 包含 {len(report)} 个字段")
|
||
print(f" 整体得分: {report['overall_score']}")
|
||
print(f" 健康状态: {report['health_status']}")
|
||
print(f" 活跃警报: {report['active_alerts']}个")
|
||
|
||
return success
|
||
|
||
async def test_alert_resolution(self):
|
||
"""测试警报解决功能"""
|
||
print("\n🧪 测试警报解决功能...")
|
||
|
||
# 确保有一些警报
|
||
if not self.monitor.active_alerts:
|
||
# 创建一个测试警报
|
||
from src.jixia.intervention.human_intervention_system import InterventionAlert
|
||
test_alert = InterventionAlert(
|
||
id="test_alert_123",
|
||
alert_type=AlertType.QUALITY_DECLINE,
|
||
severity=InterventionLevel.GENTLE_REMINDER,
|
||
message="测试警报",
|
||
affected_participants=[],
|
||
metrics={"test": 50},
|
||
timestamp=datetime.now()
|
||
)
|
||
self.monitor.active_alerts.append(test_alert)
|
||
|
||
# 解决第一个警报
|
||
if self.monitor.active_alerts:
|
||
alert_id = self.monitor.active_alerts[0].id
|
||
success = self.monitor.resolve_alert(alert_id, "测试解决")
|
||
|
||
# 清理已解决的警报
|
||
initial_count = len(self.monitor.active_alerts)
|
||
self.monitor.clear_resolved_alerts()
|
||
final_count = len(self.monitor.active_alerts)
|
||
|
||
success = success and (final_count < initial_count)
|
||
else:
|
||
success = True # 没有警报也算成功
|
||
|
||
self.test_results.append(("警报解决功能", success, f"解决并清理警报"))
|
||
|
||
print(f"✅ 警报解决: 功能正常")
|
||
return success
|
||
|
||
async def test_monitoring_control(self):
|
||
"""测试监控控制功能"""
|
||
print("\n🧪 测试监控控制功能...")
|
||
|
||
# 测试禁用监控
|
||
self.monitor.disable_monitoring()
|
||
disabled_state = not self.monitor.monitoring_enabled
|
||
|
||
# 测试启用监控
|
||
self.monitor.enable_monitoring()
|
||
enabled_state = self.monitor.monitoring_enabled
|
||
|
||
success = disabled_state and enabled_state
|
||
self.test_results.append(("监控控制功能", success, "启用/禁用功能正常"))
|
||
|
||
print(f"✅ 监控控制: 启用/禁用功能正常")
|
||
return success
|
||
|
||
async def test_data_persistence(self):
|
||
"""测试数据持久化"""
|
||
print("\n🧪 测试数据持久化...")
|
||
|
||
try:
|
||
# 保存监控数据
|
||
test_filename = "test_monitoring_data.json"
|
||
self.monitor.save_monitoring_data(test_filename)
|
||
|
||
# 检查文件是否存在并包含正确数据
|
||
import os
|
||
if os.path.exists(test_filename):
|
||
with open(test_filename, 'r', encoding='utf-8') as f:
|
||
data = json.load(f)
|
||
|
||
required_sections = ["health_metrics", "active_alerts", "intervention_history",
|
||
"monitoring_config", "monitoring_enabled", "export_time"]
|
||
success = all(section in data for section in required_sections)
|
||
|
||
# 清理测试文件
|
||
os.remove(test_filename)
|
||
else:
|
||
success = False
|
||
except Exception as e:
|
||
print(f" 数据持久化错误: {e}")
|
||
success = False
|
||
|
||
self.test_results.append(("数据持久化", success, "保存/加载功能正常"))
|
||
|
||
print(f"✅ 数据持久化: 功能正常")
|
||
return success
|
||
|
||
async def test_performance(self):
|
||
"""测试性能"""
|
||
print("\n🧪 测试性能...")
|
||
|
||
# 准备测试数据
|
||
test_data = {
|
||
"recent_messages": [
|
||
{"sender": f"用户{i%4}", "content": f"这是第{i}条测试消息,包含一些内容用于分析。"}
|
||
for i in range(20)
|
||
],
|
||
"topic_keywords": ["测试", "性能", "分析", "消息"],
|
||
"system_status": {
|
||
"error_rate": 0.01,
|
||
"avg_response_time": 1.0,
|
||
"system_load": 0.5
|
||
}
|
||
}
|
||
|
||
# 性能测试
|
||
iterations = 100
|
||
start_time = time.time()
|
||
|
||
for _ in range(iterations):
|
||
await self.monitor.analyze_debate_health(test_data)
|
||
|
||
end_time = time.time()
|
||
total_time = end_time - start_time
|
||
avg_time = total_time / iterations
|
||
analyses_per_second = iterations / total_time
|
||
|
||
# 性能要求:平均处理时间 < 100ms
|
||
success = avg_time < 0.1
|
||
|
||
self.test_results.append(("性能测试", success, f"平均处理时间: {avg_time*1000:.2f}ms, 处理速度: {analyses_per_second:.1f}次/秒"))
|
||
|
||
print(f"✅ 性能测试: 平均处理时间 {avg_time*1000:.2f}ms, 处理速度 {analyses_per_second:.1f}次/秒")
|
||
return success
|
||
|
||
async def run_all_tests(self):
|
||
"""运行所有测试"""
|
||
print("🚀 开始Human干预系统测试...")
|
||
print("=" * 60)
|
||
|
||
test_functions = [
|
||
self.test_basic_health_monitoring,
|
||
self.test_quality_decline_detection,
|
||
self.test_toxic_behavior_detection,
|
||
self.test_emotional_escalation_detection,
|
||
self.test_participation_imbalance_detection,
|
||
self.test_auto_intervention,
|
||
self.test_human_notification,
|
||
self.test_health_report_generation,
|
||
self.test_alert_resolution,
|
||
self.test_monitoring_control,
|
||
self.test_data_persistence,
|
||
self.test_performance
|
||
]
|
||
|
||
passed_tests = 0
|
||
total_tests = len(test_functions)
|
||
|
||
for test_func in test_functions:
|
||
try:
|
||
result = await test_func()
|
||
if result:
|
||
passed_tests += 1
|
||
except Exception as e:
|
||
print(f"❌ 测试失败: {test_func.__name__} - {e}")
|
||
self.test_results.append((test_func.__name__, False, f"异常: {e}"))
|
||
|
||
# 输出测试结果
|
||
print("\n" + "=" * 60)
|
||
print("📊 测试结果汇总:")
|
||
print("=" * 60)
|
||
|
||
for test_name, success, details in self.test_results:
|
||
status = "✅ 通过" if success else "❌ 失败"
|
||
print(f"{status} {test_name}: {details}")
|
||
|
||
success_rate = (passed_tests / total_tests) * 100
|
||
print(f"\n🎯 总体测试结果: {passed_tests}/{total_tests} 通过 ({success_rate:.1f}%)")
|
||
|
||
if success_rate >= 90:
|
||
print("🎉 Human干预系统测试优秀!")
|
||
elif success_rate >= 80:
|
||
print("👍 Human干预系统测试良好!")
|
||
elif success_rate >= 70:
|
||
print("⚠️ Human干预系统测试一般,需要改进。")
|
||
else:
|
||
print("❌ Human干预系统测试较差,需要重大改进。")
|
||
|
||
# 输出系统状态
|
||
print("\n📋 系统状态报告:")
|
||
report = self.monitor.get_health_report()
|
||
print(f"监控状态: {'启用' if report['monitoring_enabled'] else '禁用'}")
|
||
print(f"活跃警报: {report['active_alerts']}个")
|
||
print(f"近期干预: {report['recent_interventions']}次")
|
||
print(f"收到警报: {len(self.received_alerts)}个")
|
||
print(f"执行干预: {len(self.received_interventions)}次")
|
||
print(f"Human通知: {len(self.received_notifications)}次")
|
||
|
||
return success_rate >= 80
|
||
|
||
async def main():
|
||
"""主函数"""
|
||
tester = TestHumanInterventionSystem()
|
||
await tester.run_all_tests()
|
||
|
||
if __name__ == "__main__":
|
||
asyncio.run(main()) |