#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 集夏v2.1.0 综合功能测试 验证所有新功能的集成效果和系统稳定性 """ import asyncio import sys import os import time import threading import json from datetime import datetime, timedelta # 添加项目路径 sys.path.append('/home/ben/liurenchaxin/src') # 导入所有核心模块 try: from jixia.debates.enhanced_priority_algorithm import EnhancedPriorityAlgorithm from jixia.debates.optimized_debate_flow import OptimizedDebateFlowController, FlowControlMode from jixia.intervention.human_intervention_system import DebateHealthMonitor from jixia.coordination.multi_chat_coordinator import MultiChatCoordinator except ImportError as e: print(f"❌ 模块导入失败: {e}") print("请确保所有模块都已正确安装") sys.exit(1) class V2_1_IntegrationTester: """v2.1.0 集成测试器""" def __init__(self): self.test_results = {} self.performance_metrics = {} self.error_log = [] # 初始化各个组件 try: self.priority_algorithm = EnhancedPriorityAlgorithm() self.flow_controller = OptimizedDebateFlowController() self.health_monitor = DebateHealthMonitor() self.chat_coordinator = MultiChatCoordinator() print("✅ 所有核心组件初始化成功") except Exception as e: print(f"❌ 组件初始化失败: {e}") self.error_log.append(f"初始化错误: {e}") def test_priority_algorithm_integration(self): """测试优先级算法集成""" print("\n🧪 测试优先级算法集成") print("-" * 40) try: # 模拟辩论场景 test_speeches = [ { "speaker": "吕洞宾", "content": "根据最新的市场数据分析,AI投资领域显示出强劲的增长潜力。我们应该抓住这个机会。", "context": {"stage": "起", "topic": "AI投资", "recent_speakers": []} }, { "speaker": "何仙姑", "content": "但是我们必须谨慎考虑风险因素!市场波动性很大。", "context": {"stage": "承", "topic": "AI投资", "recent_speakers": ["吕洞宾"]} }, { "speaker": "铁拐李", "content": "我同意吕洞宾的观点,技术发展确实迅速,但何仙姑提到的风险也值得重视。", "context": {"stage": "转", "topic": "AI投资", "recent_speakers": ["吕洞宾", "何仙姑"]} } ] priorities = [] for speech in test_speeches: analysis = self.priority_algorithm.analyze_speech( speech["content"], speech["speaker"], speech["context"] ) # 获取详细的分数分解 speaker = speech["speaker"] context = speech["context"] recent_speeches = test_speeches[:test_speeches.index(speech)] profile = self.priority_algorithm._get_or_create_speaker_profile(speaker) self.priority_algorithm._update_speaker_profile(profile, recent_speeches) rebuttal_urgency = self.priority_algorithm._calculate_rebuttal_urgency(speaker, context, recent_speeches) argument_strength = self.priority_algorithm._calculate_argument_strength(speaker, profile) time_pressure = self.priority_algorithm._calculate_time_pressure(speaker, context) audience_reaction = self.priority_algorithm._calculate_audience_reaction(speaker, context) strategy_need = self.priority_algorithm._calculate_strategy_need(speaker, context, profile) priority = self.priority_algorithm.calculate_priority( speaker, context, recent_speeches ) priorities.append((speaker, priority)) print(f"发言者: {speaker}") print(f" 反驳紧急性: {rebuttal_urgency:.6f}") print(f" 论证强度: {argument_strength:.6f}") print(f" 时间压力: {time_pressure:.6f}") print(f" 观众反应: {audience_reaction:.6f}") print(f" 策略需求: {strategy_need:.6f}") print(f" 最终优先级: {priority:.6f}") print() # 调试输出 print(f"所有优先级值: {[p[1] for p in priorities]}") print(f"唯一优先级数量: {len(set(p[1] for p in priorities))}") print(f"优先级差异: {max(p[1] for p in priorities) - min(p[1] for p in priorities)}") # 验证优先级计算 assert all(0 <= p[1] <= 1 for p in priorities), "优先级应该在0-1之间" assert len(set(p[1] for p in priorities)) > 1, "不同发言应该有不同优先级" self.test_results["priority_algorithm_integration"] = True print("✅ 优先级算法集成测试通过") return True except Exception as e: print(f"❌ 优先级算法集成测试失败: {e}") self.error_log.append(f"优先级算法集成错误: {e}") self.test_results["priority_algorithm_integration"] = False return False def test_flow_controller_integration(self): """测试流程控制器集成""" print("\n🧪 测试流程控制器集成") print("-" * 40) try: # 测试与优先级算法的集成 initial_stage = self.flow_controller.current_stage print(f"初始阶段: {initial_stage.value}") # 模拟完整的辩论流程 test_sequence = [ ("吕洞宾", "开场陈述:AI投资是未来发展的关键"), ("何仙姑", "反方观点:需要谨慎评估风险"), ("铁拐李", "补充论据:技术发展支持投资决策"), ("汉钟离", "风险分析:市场不确定性因素"), ("曹国舅", "综合观点:平衡收益与风险"), ("蓝采和", "实践经验:类似投资案例分析"), ("韩湘子", "未来展望:长期发展趋势"), ("张果老", "总结陈词:理性投资建议") ] stage_transitions = 0 for speaker, content in test_sequence: # 记录发言 self.flow_controller.record_speech(speaker, content) # 检查是否需要推进阶段 if hasattr(self.flow_controller, '_should_advance_stage') and self.flow_controller._should_advance_stage(): old_stage = self.flow_controller.current_stage if self.flow_controller.advance_stage(): stage_transitions += 1 print(f"阶段转换: {old_stage.value} -> {self.flow_controller.current_stage.value}") # 获取流程状态 status = self.flow_controller.get_flow_status() print(f"发言者: {speaker}, 当前阶段: {status['current_stage']}, 进度: {status['stage_progress']}") # 验证流程控制 final_status = self.flow_controller.get_flow_status() total_speeches = len(self.flow_controller.debate_history) assert total_speeches == len(test_sequence), f"发言总数应该匹配,期望{len(test_sequence)},实际{total_speeches}" assert stage_transitions > 0, "应该发生阶段转换" self.test_results["flow_controller_integration"] = True print(f"✅ 流程控制器集成测试通过,发生了 {stage_transitions} 次阶段转换") return True except Exception as e: print(f"❌ 流程控制器集成测试失败: {e}") self.error_log.append(f"流程控制器集成错误: {e}") self.test_results["flow_controller_integration"] = False return False def test_health_monitor_integration(self): """测试健康监控集成""" print("\n🧪 测试健康监控集成") print("-" * 40) try: # 模拟辩论数据 debate_data = { "participants": ["吕洞宾", "何仙姑", "铁拐李", "汉钟离"], "speeches": [ {"speaker": "吕洞宾", "content": "我强烈支持这个提案", "timestamp": datetime.now()}, {"speaker": "何仙姑", "content": "我完全反对,这太危险了", "timestamp": datetime.now()}, {"speaker": "铁拐李", "content": "让我们理性分析一下", "timestamp": datetime.now()}, {"speaker": "汉钟离", "content": "数据显示情况复杂", "timestamp": datetime.now()} ], "current_stage": "承", "duration": timedelta(minutes=15) } # 更新健康监控 self.health_monitor.update_metrics(debate_data) # 检查健康状态 health_status = self.health_monitor.get_health_status() health_report = self.health_monitor.get_health_report() print(f"健康状态: {health_status.value}") print(f"整体分数: {health_report['overall_score']:.1f}") print(f"监控指标数量: {len(health_report['metrics'])}") print(f"活跃警报: {health_report['active_alerts']}个") # 模拟问题场景 problematic_data = { "participants": ["吕洞宾", "何仙姑"], "speeches": [ {"speaker": "吕洞宾", "content": "你们都是白痴!", "timestamp": datetime.now()}, {"speaker": "吕洞宾", "content": "我说了算!", "timestamp": datetime.now()}, {"speaker": "吕洞宾", "content": "闭嘴!", "timestamp": datetime.now()} ], "current_stage": "转", "duration": timedelta(minutes=30) } self.health_monitor.update_metrics(problematic_data) # 检查是否触发警报 alerts = self.health_monitor.active_alerts print(f"活跃警报数量: {len(alerts)}") # 验证监控功能 assert health_status is not None, "应该有健康状态" assert isinstance(health_status, type(health_status)), "健康状态应该是HealthStatus枚举" self.test_results["health_monitor_integration"] = True print("✅ 健康监控集成测试通过") return True except Exception as e: print(f"❌ 健康监控集成测试失败: {e}") self.error_log.append(f"健康监控集成错误: {e}") self.test_results["health_monitor_integration"] = False return False async def test_chat_coordinator_integration(self): """测试多群聊协调集成""" print("\n🧪 测试多群聊协调集成") print("-" * 40) try: # 模拟多群聊场景 main_chat_message = { "chat_id": "main_debate", "speaker": "吕洞宾", "content": "我认为我们应该投资AI技术", "timestamp": datetime.now() } # 处理主群聊消息 await self.chat_coordinator.handle_message(main_chat_message) # 模拟策略讨论 strategy_message = { "chat_id": "strategy_positive", "speaker": "铁拐李", "content": "我们需要准备更多技术数据来支持论点", "timestamp": datetime.now() } await self.chat_coordinator.handle_message(strategy_message) # 检查消息路由 routing_status = self.chat_coordinator.get_routing_status() print(f"路由状态: {routing_status}") # 模拟协调决策 coordination_result = await self.chat_coordinator.coordinate_response( main_chat_message, context={"stage": "承", "topic": "AI投资"} ) print(f"协调结果: {coordination_result}") # 验证协调功能 assert coordination_result is not None, "应该有协调结果" self.test_results["chat_coordinator_integration"] = True print("✅ 多群聊协调集成测试通过") return True except Exception as e: print(f"❌ 多群聊协调集成测试失败: {e}") self.error_log.append(f"多群聊协调集成错误: {e}") self.test_results["chat_coordinator_integration"] = False return False async def test_cross_component_integration(self): """测试跨组件集成""" print("\n🧪 测试跨组件集成") print("-" * 40) try: # 清空之前的发言历史 self.flow_controller.debate_history.clear() # 模拟完整的辩论流程 debate_scenario = { "topic": "人工智能投资策略", "participants": ["吕洞宾", "何仙姑", "铁拐李", "汉钟离"], "duration": 30 # 分钟 } print(f"开始辩论: {debate_scenario['topic']}") # 1. 流程控制器管理发言顺序 speakers_sequence = [] for i in range(8): # 模拟8轮发言 speaker = self.flow_controller.get_current_speaker() speakers_sequence.append(speaker) # 2. 生成发言内容(简化) content = f"这是{speaker}在第{i+1}轮的发言,关于{debate_scenario['topic']}" # 3. 优先级算法分析发言 context = { "stage": self.flow_controller.current_stage.value, "topic": debate_scenario['topic'], "recent_speakers": speakers_sequence[-3:] } analysis = self.priority_algorithm.analyze_speech(content, speaker, context) # 构建正确格式的recent_speeches recent_speeches = [] for j, prev_speaker in enumerate(speakers_sequence): recent_speeches.append({ "speaker": prev_speaker, "content": f"这是{prev_speaker}在第{j+1}轮的发言", "timestamp": datetime.now().isoformat(), "team": "positive" if "正" in prev_speaker else "negative" }) priority = self.priority_algorithm.calculate_priority(speaker, context, recent_speeches) # 4. 记录发言到流程控制器 self.flow_controller.record_speech(speaker, content) # 5. 更新健康监控 debate_data = { "participants": debate_scenario['participants'], "speeches": [{"speaker": speaker, "content": content, "timestamp": datetime.now()}], "current_stage": self.flow_controller.current_stage.value, "duration": timedelta(minutes=i*2) } self.health_monitor.update_metrics(debate_data) # 6. 多群聊协调处理 message = { "chat_id": "main_debate", "speaker": speaker, "content": content, "timestamp": datetime.now() } # 异步调用 try: await self.chat_coordinator.handle_message(message) except Exception as e: print(f"警告: 消息处理失败: {e}") print(f"第{i+1}轮 - 发言者: {speaker}, 优先级: {priority:.3f}, 阶段: {context['stage']}") # 验证集成效果 print("\n开始获取各组件状态...") try: flow_status = self.flow_controller.get_flow_status() print(f"✅ 流程状态获取成功: {type(flow_status)}") except Exception as e: print(f"❌ 流程状态获取失败: {e}") raise try: health_status = self.health_monitor.get_health_status() print(f"✅ 健康状态获取成功: {type(health_status)}") except Exception as e: print(f"❌ 健康状态获取失败: {e}") raise try: routing_status = self.chat_coordinator.get_routing_status() print(f"✅ 路由状态获取成功: {type(routing_status)}, 值: {routing_status}") except Exception as e: print(f"❌ 路由状态获取失败: {e}") raise print(f"\n集成测试结果:") print(f"- 总发言数: {len(self.flow_controller.debate_history)}") print(f"- 当前阶段: {flow_status['current_stage']}") print(f"- 健康状态: {health_status.value}") # 安全地访问routing_status if isinstance(routing_status, dict): print(f"- 活跃路由数: {routing_status.get('active_routes', 0)}") print(f"- 消息队列大小: {routing_status.get('message_queue_size', 0)}") print(f"- 总群聊数: {routing_status.get('total_rooms', 0)}") else: print(f"- 路由状态: {routing_status}") print(f"- 路由状态类型: {type(routing_status)}") # 验证所有组件都正常工作 total_speeches = len(self.flow_controller.debate_history) assert total_speeches == 8, f"应该记录8次发言,实际{total_speeches}次" assert health_status is not None, "应该有健康状态" assert len(speakers_sequence) == 8, "应该有8个发言者记录" self.test_results["cross_component_integration"] = True print("✅ 跨组件集成测试通过") return True except Exception as e: import traceback print(f"❌ 跨组件集成测试失败: {e}") print(f"详细错误信息:") traceback.print_exc() self.error_log.append(f"跨组件集成错误: {e}") self.test_results["cross_component_integration"] = False return False def test_performance_under_load(self): """测试负载下的性能""" print("\n🧪 测试负载下的性能") print("-" * 40) try: # 性能测试参数 num_speeches = 100 num_threads = 5 def simulate_debate_load(): """模拟辩论负载""" thread_name = threading.current_thread().name for i in range(num_speeches // num_threads): try: # 模拟发言处理 speaker = f"Speaker-{thread_name}-{i}" content = f"这是来自{speaker}的测试发言 {i}" # 优先级计算 context = {"stage": "承", "topic": "性能测试", "recent_speakers": []} analysis = self.priority_algorithm.analyze_speech(content, speaker, context) priority = self.priority_algorithm.calculate_priority(speaker, context, []) # 流程记录 self.flow_controller.record_speech(speaker, content) # 健康监控 debate_data = { "participants": [speaker], "speeches": [{"speaker": speaker, "content": content, "timestamp": datetime.now()}], "current_stage": "承", "duration": timedelta(seconds=i) } self.health_monitor.update_metrics(debate_data) except Exception as e: self.error_log.append(f"负载测试错误 {thread_name}-{i}: {e}") # 开始性能测试 start_time = time.time() threads = [] for i in range(num_threads): thread = threading.Thread(target=simulate_debate_load, name=f"LoadTest-{i}") threads.append(thread) thread.start() for thread in threads: thread.join() end_time = time.time() duration = end_time - start_time # 计算性能指标 total_operations = num_speeches * 4 # 每次发言包含4个操作 ops_per_second = total_operations / duration self.performance_metrics = { "total_operations": total_operations, "duration": duration, "ops_per_second": ops_per_second, "avg_operation_time": duration / total_operations * 1000, # 毫秒 "concurrent_threads": num_threads, "errors": len([e for e in self.error_log if "负载测试错误" in e]) } print(f"性能测试结果:") print(f"- 总操作数: {total_operations}") print(f"- 执行时间: {duration:.3f} 秒") print(f"- 操作速度: {ops_per_second:.1f} 操作/秒") print(f"- 平均操作时间: {self.performance_metrics['avg_operation_time']:.2f} 毫秒") print(f"- 并发线程: {num_threads}") print(f"- 错误数量: {self.performance_metrics['errors']}") # 性能验证 assert ops_per_second > 100, "操作速度应该超过100操作/秒" assert self.performance_metrics['errors'] == 0, "不应该有错误" self.test_results["performance_under_load"] = True print("✅ 负载性能测试通过") return True except Exception as e: print(f"❌ 负载性能测试失败: {e}") self.error_log.append(f"负载性能测试错误: {e}") self.test_results["performance_under_load"] = False return False def test_data_consistency(self): """测试数据一致性""" print("\n🧪 测试数据一致性") print("-" * 40) try: # 为了确保数据一致性测试的准确性,创建新的flow_controller实例 from jixia.debates.optimized_debate_flow import OptimizedDebateFlowController, FlowControlMode test_flow_controller = OptimizedDebateFlowController() # 模拟数据操作 test_data = { "speakers": ["吕洞宾", "何仙姑", "铁拐李"], "speeches": [ "AI投资具有巨大潜力", "但风险也不容忽视", "我们需要平衡收益与风险" ] } # 1. 保存流程控制器数据 for i, (speaker, content) in enumerate(zip(test_data["speakers"], test_data["speeches"])): test_flow_controller.record_speech(speaker, content) print(f"记录发言 {i+1}: {speaker} - {content[:30]}...") print(f"当前debate_history长度: {len(test_flow_controller.debate_history)}") flow_data_file = "test_flow_consistency.json" test_flow_controller.save_flow_data(flow_data_file) # 2. 保存健康监控数据 debate_data = { "participants": test_data["speakers"], "speeches": [ {"speaker": s, "content": c, "timestamp": datetime.now()} for s, c in zip(test_data["speakers"], test_data["speeches"]) ], "current_stage": "承", "duration": timedelta(minutes=10) } self.health_monitor.update_metrics(debate_data) health_data_file = "test_health_consistency.json" self.health_monitor.save_monitoring_data(health_data_file) # 3. 验证数据文件 assert os.path.exists(flow_data_file), "流程数据文件应该存在" assert os.path.exists(health_data_file), "健康数据文件应该存在" # 4. 读取并验证数据内容 with open(flow_data_file, 'r', encoding='utf-8') as f: flow_data = json.load(f) with open(health_data_file, 'r', encoding='utf-8') as f: health_data = json.load(f) # 调试信息 print(f"读取的flow_data中debate_history长度: {len(flow_data.get('debate_history', []))}") print(f"debate_history内容: {flow_data.get('debate_history', [])}") # 验证数据完整性 actual_count = len(flow_data.get("debate_history", [])) assert actual_count == 3, f"应该有3条发言记录,实际有{actual_count}条" assert "health_metrics" in health_data, "应该包含健康指标" assert "monitoring_config" in health_data, "应该包含监控配置" print(f"数据一致性验证:") print(f"- 流程数据记录: {len(flow_data['debate_history'])} 条") print(f"- 健康数据大小: {os.path.getsize(health_data_file)} 字节") print(f"- 流程数据大小: {os.path.getsize(flow_data_file)} 字节") # 清理测试文件 os.remove(flow_data_file) os.remove(health_data_file) self.test_results["data_consistency"] = True print("✅ 数据一致性测试通过") return True except Exception as e: print(f"❌ 数据一致性测试失败: {e}") self.error_log.append(f"数据一致性错误: {e}") self.test_results["data_consistency"] = False return False def generate_comprehensive_report(self): """生成综合测试报告""" print("\n" + "=" * 60) print("📊 集夏v2.1.0 综合测试报告") print("=" * 60) # 测试结果统计 total_tests = len(self.test_results) passed_tests = sum(1 for result in self.test_results.values() if result) failed_tests = total_tests - passed_tests pass_rate = (passed_tests / total_tests) * 100 if total_tests > 0 else 0 print(f"\n🎯 测试结果统计:") print(f"- 总测试数: {total_tests}") print(f"- 通过测试: {passed_tests}") print(f"- 失败测试: {failed_tests}") print(f"- 通过率: {pass_rate:.1f}%") # 详细测试结果 print(f"\n📋 详细测试结果:") for test_name, result in self.test_results.items(): status = "✅ 通过" if result else "❌ 失败" print(f"- {test_name}: {status}") # 性能指标 if self.performance_metrics: print(f"\n⚡ 性能指标:") for metric, value in self.performance_metrics.items(): if isinstance(value, float): print(f"- {metric}: {value:.3f}") else: print(f"- {metric}: {value}") # 错误日志 if self.error_log: print(f"\n🚨 错误日志 ({len(self.error_log)} 条):") for i, error in enumerate(self.error_log[:5], 1): # 只显示前5条 print(f"- {i}. {error}") if len(self.error_log) > 5: print(f"- ... 还有 {len(self.error_log) - 5} 条错误") # 系统状态 print(f"\n🔧 系统状态:") try: flow_status = self.flow_controller.get_flow_status() health_status = self.health_monitor.get_health_status() print(f"- 流程控制器: 正常 (总发言: {flow_status.get('total_speeches', 0)})") print(f"- 健康监控: 正常 (状态: {health_status.value})") print(f"- 优先级算法: 正常") print(f"- 多群聊协调: 正常") except Exception as e: print(f"- 系统状态检查失败: {e}") # 总结 print(f"\n🎉 测试总结:") if pass_rate >= 90: print("🟢 系统状态优秀!所有核心功能运行正常,可以发布v2.1.0版本。") elif pass_rate >= 70: print("🟡 系统状态良好,但有部分功能需要优化。建议修复后再发布。") else: print("🔴 系统存在重大问题,需要进行全面修复后才能发布。") return { "pass_rate": pass_rate, "total_tests": total_tests, "passed_tests": passed_tests, "failed_tests": failed_tests, "performance_metrics": self.performance_metrics, "error_count": len(self.error_log) } async def run_all_tests(self): """运行所有测试""" print("🚀 开始集夏v2.1.0综合功能测试") print("=" * 60) # 同步测试方法 sync_test_methods = [ self.test_priority_algorithm_integration, self.test_flow_controller_integration, self.test_health_monitor_integration, self.test_performance_under_load, self.test_data_consistency ] # 异步测试方法 async_test_methods = [ self.test_chat_coordinator_integration, self.test_cross_component_integration ] start_time = time.time() # 运行同步测试 for test_method in sync_test_methods: try: test_method() except Exception as e: print(f"❌ 测试执行异常: {e}") self.error_log.append(f"测试执行异常: {e}") # 运行异步测试 for test_method in async_test_methods: try: await test_method() except Exception as e: print(f"❌ 测试执行异常: {e}") self.error_log.append(f"测试执行异常: {e}") end_time = time.time() total_duration = end_time - start_time print(f"\n⏱️ 总测试时间: {total_duration:.3f} 秒") # 生成综合报告 return self.generate_comprehensive_report() async def main(): """主函数""" tester = V2_1_IntegrationTester() report = await tester.run_all_tests() # 保存测试报告 report_file = "v2_1_comprehensive_test_report.json" with open(report_file, 'w', encoding='utf-8') as f: json.dump({ "timestamp": datetime.now().isoformat(), "version": "v2.1.0", "test_results": tester.test_results, "performance_metrics": tester.performance_metrics, "error_log": tester.error_log, "summary": report }, f, ensure_ascii=False, indent=2) print(f"\n📄 详细测试报告已保存到: {report_file}") return report["pass_rate"] >= 70 # 70%通过率作为发布标准 if __name__ == "__main__": success = asyncio.run(main()) sys.exit(0 if success else 1)