liurenchaxin/tests/test_human_intervention.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Human干预系统测试脚本
"""

import asyncio
import json
import time
from datetime import datetime, timedelta
from src.jixia.intervention.human_intervention_system import (
    DebateHealthMonitor, HealthStatus, InterventionLevel, AlertType
)

class TestHumanInterventionSystem:
    """Human干预系统测试类"""

    def __init__(self):
        self.monitor = DebateHealthMonitor()
        self.test_results = []

        # 添加事件处理器用于测试
        self.monitor.add_event_handler("alert_created", self._handle_alert_created)
        self.monitor.add_event_handler("intervention_executed", self._handle_intervention_executed)
        self.monitor.add_event_handler("human_notification", self._handle_human_notification)

        self.received_alerts = []
        self.received_interventions = []
        self.received_notifications = []

    async def _handle_alert_created(self, alert):
        """处理警报创建事件"""
        self.received_alerts.append(alert)
        print(f"🚨 收到警报: {alert.alert_type.value} - {alert.message}")

    async def _handle_intervention_executed(self, action):
        """处理干预执行事件"""
        self.received_interventions.append(action)
        print(f"🛠️ 执行干预: {action.action_type} - {action.description}")

    async def _handle_human_notification(self, notification):
        """处理Human通知事件"""
        self.received_notifications.append(notification)
        print(f"👤 Human通知: {notification['message']}")

    async def test_basic_health_monitoring(self):
        """测试基本健康监控功能"""
        print("\n🧪 测试基本健康监控功能...")

        # 正常辩论数据
        normal_debate_data = {
            "recent_messages": [
                {"sender": "正1", "content": "我认为人工智能投资具有巨大潜力，因为技术发展迅速，市场需求不断增长。首先，AI技术在各行各业都有广泛应用前景。"},
                {"sender": "反1", "content": "虽然AI投资有潜力，但我们也要考虑风险。技术泡沫、监管不确定性等因素都可能影响投资回报。"},
                {"sender": "正2", "content": "反方提到的风险确实存在，但是通过合理的投资策略和风险管理，我们可以最大化收益同时控制风险。"},
                {"sender": "反2", "content": "正方的观点有道理，不过我想补充一点：投资时机也很重要，现在可能不是最佳入场时机。"}
            ],
            "topic_keywords": ["人工智能", "AI", "投资", "风险", "收益", "技术", "市场"],
            "system_status": {
                "error_rate": 0.01,
                "avg_response_time": 1.2,
                "system_load": 0.5
            }
        }

        score, status = await self.monitor.analyze_debate_health(normal_debate_data)

        success = score >= 70 and status in [HealthStatus.EXCELLENT, HealthStatus.GOOD]
        self.test_results.append(("基本健康监控", success, f"得分: {score:.1f}, 状态: {status.value}"))

        print(f"✅ 正常辩论健康度: {score:.1f}分 ({status.value})")
        return success

    async def test_quality_decline_detection(self):
        """测试质量下降检测"""
        print("\n🧪 测试质量下降检测...")

        # 低质量辩论数据
        low_quality_data = {
            "recent_messages": [
                {"sender": "正1", "content": "好"},
                {"sender": "反1", "content": "不好"},
                {"sender": "正2", "content": "是的"},
                {"sender": "反2", "content": "不是"},
                {"sender": "正1", "content": "对"},
                {"sender": "反1", "content": "错"},
            ],
            "topic_keywords": ["人工智能", "AI", "投资"],
            "system_status": {
                "error_rate": 0.01,
                "avg_response_time": 1.0,
                "system_load": 0.4
            }
        }

        initial_alert_count = len(self.received_alerts)
        score, status = await self.monitor.analyze_debate_health(low_quality_data)

        # 检查是否触发了质量相关警报
        quality_alerts = [alert for alert in self.received_alerts[initial_alert_count:]
                         if alert.alert_type == AlertType.QUALITY_DECLINE]

        success = len(quality_alerts) > 0 and score < 50
        self.test_results.append(("质量下降检测", success, f"得分: {score:.1f}, 警报数: {len(quality_alerts)}"))

        print(f"✅ 低质量辩论检测: {score:.1f}分, 触发警报: {len(quality_alerts)}个")
        return success

    async def test_toxic_behavior_detection(self):
        """测试有害行为检测"""
        print("\n🧪 测试有害行为检测...")

        # 包含有害行为的数据
        toxic_data = {
            "recent_messages": [
                {"sender": "正1", "content": "我认为这个观点是正确的，有充分的理由支持。"},
                {"sender": "反1", "content": "你这个观点太愚蠢了！完全没有逻辑！"},
                {"sender": "正2", "content": "请保持理性讨论，不要进行人身攻击。"},
                {"sender": "反2", "content": "闭嘴！你们这些白痴根本不懂！"},
                {"sender": "正1", "content": "让我们回到正题，理性分析这个问题。"}
            ],
            "topic_keywords": ["观点", "逻辑", "分析"],
            "system_status": {
                "error_rate": 0.02,
                "avg_response_time": 1.5,
                "system_load": 0.6
            }
        }

        initial_alert_count = len(self.received_alerts)
        score, status = await self.monitor.analyze_debate_health(toxic_data)

        # 检查是否触发了有害行为警报
        toxic_alerts = [alert for alert in self.received_alerts[initial_alert_count:]
                       if alert.alert_type == AlertType.TOXIC_BEHAVIOR]

        success = len(toxic_alerts) > 0
        self.test_results.append(("有害行为检测", success, f"警报数: {len(toxic_alerts)}, 文明度分数: {self.monitor.health_metrics['interaction_civility'].value:.1f}"))

        print(f"✅ 有害行为检测: 触发警报: {len(toxic_alerts)}个")
        return success

    async def test_emotional_escalation_detection(self):
        """测试情绪升级检测"""
        print("\n🧪 测试情绪升级检测...")

        # 情绪激动的数据
        emotional_data = {
            "recent_messages": [
                {"sender": "正1", "content": "我强烈反对这个观点！！！"},
                {"sender": "反1", "content": "你们完全错了！！！这太愤怒了！！！"},
                {"sender": "正2", "content": "我非常生气！！！这个讨论让我很讨厌！！！"},
                {"sender": "反2", "content": "大家都冷静一下！！！不要这么激动！！！"}
            ],
            "topic_keywords": ["观点", "讨论"],
            "system_status": {
                "error_rate": 0.01,
                "avg_response_time": 1.0,
                "system_load": 0.5
            }
        }

        initial_alert_count = len(self.received_alerts)
        score, status = await self.monitor.analyze_debate_health(emotional_data)

        # 检查是否触发了情绪升级警报
        emotion_alerts = [alert for alert in self.received_alerts[initial_alert_count:]
                         if alert.alert_type == AlertType.EMOTIONAL_ESCALATION]

        success = len(emotion_alerts) > 0
        self.test_results.append(("情绪升级检测", success, f"警报数: {len(emotion_alerts)}, 情绪稳定性: {self.monitor.health_metrics['emotional_stability'].value:.1f}"))

        print(f"✅ 情绪升级检测: 触发警报: {len(emotion_alerts)}个")
        return success

    async def test_participation_imbalance_detection(self):
        """测试参与不平衡检测"""
        print("\n🧪 测试参与不平衡检测...")

        # 参与不平衡的数据
        imbalanced_data = {
            "recent_messages": [
                {"sender": "正1", "content": "我有很多观点要分享..."},
                {"sender": "正1", "content": "首先，我认为..."},
                {"sender": "正1", "content": "其次，我们应该..."},
                {"sender": "正1", "content": "最后，我建议..."},
                {"sender": "正1", "content": "总结一下..."},
                {"sender": "正1", "content": "补充一点..."},
                {"sender": "正1", "content": "再说一遍..."},
                {"sender": "反1", "content": "好的"}
            ],
            "topic_keywords": ["观点", "建议"],
            "system_status": {
                "error_rate": 0.01,
                "avg_response_time": 1.0,
                "system_load": 0.5
            }
        }

        initial_alert_count = len(self.received_alerts)
        score, status = await self.monitor.analyze_debate_health(imbalanced_data)

        # 检查是否触发了参与不平衡警报
        balance_alerts = [alert for alert in self.received_alerts[initial_alert_count:]
                         if alert.alert_type == AlertType.PARTICIPATION_IMBALANCE]

        success = len(balance_alerts) > 0
        self.test_results.append(("参与不平衡检测", success, f"警报数: {len(balance_alerts)}, 平衡度: {self.monitor.health_metrics['participation_balance'].value:.1f}"))

        print(f"✅ 参与不平衡检测: 触发警报: {len(balance_alerts)}个")
        return success

    async def test_auto_intervention(self):
        """测试自动干预功能"""
        print("\n🧪 测试自动干预功能...")

        # 触发多种问题的数据
        problematic_data = {
            "recent_messages": [
                {"sender": "正1", "content": "你们都是白痴！！！"},
                {"sender": "反1", "content": "愚蠢！！！"},
                {"sender": "正2", "content": "垃圾观点！！！"},
                {"sender": "反2", "content": "讨厌！！！"}
            ],
            "topic_keywords": ["观点"],
            "system_status": {
                "error_rate": 0.05,
                "avg_response_time": 3.0,
                "system_load": 0.9
            }
        }

        initial_intervention_count = len(self.received_interventions)
        score, status = await self.monitor.analyze_debate_health(problematic_data)

        # 检查是否执行了自动干预
        new_interventions = self.received_interventions[initial_intervention_count:]

        success = len(new_interventions) > 0
        self.test_results.append(("自动干预功能", success, f"执行干预: {len(new_interventions)}次"))

        print(f"✅ 自动干预: 执行了 {len(new_interventions)} 次干预")
        for intervention in new_interventions:
            print(f"   - {intervention.action_type}: {intervention.description}")

        return success

    async def test_human_notification(self):
        """测试Human通知功能"""
        print("\n🧪 测试Human通知功能...")

        # 设置较低的通知阈值以便测试
        original_threshold = self.monitor.monitoring_config["human_notification_threshold"]
        self.monitor.monitoring_config["human_notification_threshold"] = InterventionLevel.MODERATE_GUIDANCE

        # 严重问题数据
        critical_data = {
            "recent_messages": [
                {"sender": "正1", "content": "你"},
                {"sender": "反1", "content": "我"},
                {"sender": "正2", "content": "他"},
                {"sender": "反2", "content": "她"}
            ],
            "topic_keywords": ["重要话题"],
            "system_status": {
                "error_rate": 0.1,
                "avg_response_time": 5.0,
                "system_load": 0.95
            }
        }

        initial_notification_count = len(self.received_notifications)
        score, status = await self.monitor.analyze_debate_health(critical_data)

        # 恢复原始阈值
        self.monitor.monitoring_config["human_notification_threshold"] = original_threshold

        # 检查是否发送了Human通知
        new_notifications = self.received_notifications[initial_notification_count:]

        success = len(new_notifications) > 0
        self.test_results.append(("Human通知功能", success, f"发送通知: {len(new_notifications)}次"))

        print(f"✅ Human通知: 发送了 {len(new_notifications)} 次通知")
        return success

    async def test_health_report_generation(self):
        """测试健康报告生成"""
        print("\n🧪 测试健康报告生成...")

        report = self.monitor.get_health_report()

        required_fields = ["overall_score", "health_status", "metrics", "active_alerts",
                          "recent_interventions", "monitoring_enabled", "last_check"]

        success = all(field in report for field in required_fields)
        success = success and len(report["metrics"]) == 6  # 6个健康指标

        self.test_results.append(("健康报告生成", success, f"包含字段: {len(report)}个"))

        print(f"✅ 健康报告生成: 包含 {len(report)} 个字段")
        print(f"   整体得分: {report['overall_score']}")
        print(f"   健康状态: {report['health_status']}")
        print(f"   活跃警报: {report['active_alerts']}个")

        return success

    async def test_alert_resolution(self):
        """测试警报解决功能"""
        print("\n🧪 测试警报解决功能...")

        # 确保有一些警报
        if not self.monitor.active_alerts:
            # 创建一个测试警报
            from src.jixia.intervention.human_intervention_system import InterventionAlert
            test_alert = InterventionAlert(
                id="test_alert_123",
                alert_type=AlertType.QUALITY_DECLINE,
                severity=InterventionLevel.GENTLE_REMINDER,
                message="测试警报",
                affected_participants=[],
                metrics={"test": 50},
                timestamp=datetime.now()
            )
            self.monitor.active_alerts.append(test_alert)

        # 解决第一个警报
        if self.monitor.active_alerts:
            alert_id = self.monitor.active_alerts[0].id
            success = self.monitor.resolve_alert(alert_id, "测试解决")

            # 清理已解决的警报
            initial_count = len(self.monitor.active_alerts)
            self.monitor.clear_resolved_alerts()
            final_count = len(self.monitor.active_alerts)

            success = success and (final_count < initial_count)
        else:
            success = True  # 没有警报也算成功

        self.test_results.append(("警报解决功能", success, f"解决并清理警报"))

        print(f"✅ 警报解决: 功能正常")
        return success

    async def test_monitoring_control(self):
        """测试监控控制功能"""
        print("\n🧪 测试监控控制功能...")

        # 测试禁用监控
        self.monitor.disable_monitoring()
        disabled_state = not self.monitor.monitoring_enabled

        # 测试启用监控
        self.monitor.enable_monitoring()
        enabled_state = self.monitor.monitoring_enabled

        success = disabled_state and enabled_state
        self.test_results.append(("监控控制功能", success, "启用/禁用功能正常"))

        print(f"✅ 监控控制: 启用/禁用功能正常")
        return success

    async def test_data_persistence(self):
        """测试数据持久化"""
        print("\n🧪 测试数据持久化...")

        try:
            # 保存监控数据
            test_filename = "test_monitoring_data.json"
            self.monitor.save_monitoring_data(test_filename)

            # 检查文件是否存在并包含正确数据
            import os
            if os.path.exists(test_filename):
                with open(test_filename, 'r', encoding='utf-8') as f:
                    data = json.load(f)

                required_sections = ["health_metrics", "active_alerts", "intervention_history",
                                   "monitoring_config", "monitoring_enabled", "export_time"]
                success = all(section in data for section in required_sections)

                # 清理测试文件
                os.remove(test_filename)
            else:
                success = False
        except Exception as e:
            print(f"   数据持久化错误: {e}")
            success = False

        self.test_results.append(("数据持久化", success, "保存/加载功能正常"))

        print(f"✅ 数据持久化: 功能正常")
        return success

    async def test_performance(self):
        """测试性能"""
        print("\n🧪 测试性能...")

        # 准备测试数据
        test_data = {
            "recent_messages": [
                {"sender": f"用户{i%4}", "content": f"这是第{i}条测试消息，包含一些内容用于分析。"}
                for i in range(20)
            ],
            "topic_keywords": ["测试", "性能", "分析", "消息"],
            "system_status": {
                "error_rate": 0.01,
                "avg_response_time": 1.0,
                "system_load": 0.5
            }
        }

        # 性能测试
        iterations = 100
        start_time = time.time()

        for _ in range(iterations):
            await self.monitor.analyze_debate_health(test_data)

        end_time = time.time()
        total_time = end_time - start_time
        avg_time = total_time / iterations
        analyses_per_second = iterations / total_time

        # 性能要求：平均处理时间 < 100ms
        success = avg_time < 0.1

        self.test_results.append(("性能测试", success, f"平均处理时间: {avg_time*1000:.2f}ms, 处理速度: {analyses_per_second:.1f}次/秒"))

        print(f"✅ 性能测试: 平均处理时间 {avg_time*1000:.2f}ms, 处理速度 {analyses_per_second:.1f}次/秒")
        return success

    async def run_all_tests(self):
        """运行所有测试"""
        print("🚀 开始Human干预系统测试...")
        print("=" * 60)

        test_functions = [
            self.test_basic_health_monitoring,
            self.test_quality_decline_detection,
            self.test_toxic_behavior_detection,
            self.test_emotional_escalation_detection,
            self.test_participation_imbalance_detection,
            self.test_auto_intervention,
            self.test_human_notification,
            self.test_health_report_generation,
            self.test_alert_resolution,
            self.test_monitoring_control,
            self.test_data_persistence,
            self.test_performance
        ]

        passed_tests = 0
        total_tests = len(test_functions)

        for test_func in test_functions:
            try:
                result = await test_func()
                if result:
                    passed_tests += 1
            except Exception as e:
                print(f"❌ 测试失败: {test_func.__name__} - {e}")
                self.test_results.append((test_func.__name__, False, f"异常: {e}"))

        # 输出测试结果
        print("\n" + "=" * 60)
        print("📊 测试结果汇总:")
        print("=" * 60)

        for test_name, success, details in self.test_results:
            status = "✅ 通过" if success else "❌ 失败"
            print(f"{status} {test_name}: {details}")

        success_rate = (passed_tests / total_tests) * 100
        print(f"\n🎯 总体测试结果: {passed_tests}/{total_tests} 通过 ({success_rate:.1f}%)")

        if success_rate >= 90:
            print("🎉 Human干预系统测试优秀！")
        elif success_rate >= 80:
            print("👍 Human干预系统测试良好！")
        elif success_rate >= 70:
            print("⚠️ Human干预系统测试一般，需要改进。")
        else:
            print("❌ Human干预系统测试较差，需要重大改进。")

        # 输出系统状态
        print("\n📋 系统状态报告:")
        report = self.monitor.get_health_report()
        print(f"监控状态: {'启用' if report['monitoring_enabled'] else '禁用'}")
        print(f"活跃警报: {report['active_alerts']}个")
        print(f"近期干预: {report['recent_interventions']}次")
        print(f"收到警报: {len(self.received_alerts)}个")
        print(f"执行干预: {len(self.received_interventions)}次")
        print(f"Human通知: {len(self.received_notifications)}次")

        return success_rate >= 80

async def main():
    """主函数"""
    tester = TestHumanInterventionSystem()
    await tester.run_all_tests()

if __name__ == "__main__":
    asyncio.run(main())