Initial commit: 胡汉三千年项目 - 北朝宇宙理论体系
This commit is contained in:
@@ -0,0 +1,404 @@
|
||||
"""
|
||||
龙崇拜理论分析系统
|
||||
分析李东阳"龙性最淫"与生育崇拜的文化逻辑关联
|
||||
建立龙崇拜与希腊宙斯神话的跨文化比较框架
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from typing import List, Dict, Any, Tuple
|
||||
from collections import Counter
|
||||
import statistics
|
||||
|
||||
from data.dragon_worship.lidongyang_documents import (
|
||||
DRAGON_SEXUALITY_DOCUMENTS,
|
||||
HIGH_RELIABILITY_DRAGON_DOCS,
|
||||
get_dragon_sexuality_themes,
|
||||
get_phallic_connections
|
||||
)
|
||||
from analysis.models import DragonWorshipDocument, ReliabilityLevel
|
||||
|
||||
class DragonTheoryAnalyzer:
|
||||
"""龙崇拜理论分析器"""
|
||||
|
||||
def __init__(self):
|
||||
self.documents = DRAGON_SEXUALITY_DOCUMENTS
|
||||
self.high_reliability_docs = HIGH_RELIABILITY_DRAGON_DOCS
|
||||
|
||||
# 龙性特征关键词
|
||||
self.sexuality_keywords = [
|
||||
"淫", "性", "交", "配", "生", "育", "繁", "殖", "杂交", "交媾"
|
||||
]
|
||||
|
||||
# 生殖力关键词
|
||||
self.fertility_keywords = [
|
||||
"生育", "繁衍", "后代", "子嗣", "九子", "生殖", "配偶", "交配"
|
||||
]
|
||||
|
||||
# 跨文化对比关键词
|
||||
self.cross_cultural_keywords = [
|
||||
"宙斯", "变形", "化身", "神话", "交合", "诱拐", "结合"
|
||||
]
|
||||
|
||||
def analyze_lidongyang_dragon_sexuality(self) -> Dict[str, Any]:
|
||||
"""分析李东阳关于龙性的核心观点"""
|
||||
|
||||
# 找到李东阳的文献
|
||||
lidongyang_doc = next((doc for doc in self.documents
|
||||
if "李东阳" in doc.author), None)
|
||||
|
||||
if not lidongyang_doc:
|
||||
return {"error": "未找到李东阳相关文献"}
|
||||
|
||||
analysis = {
|
||||
"核心观点": "龙性最淫",
|
||||
"具体表现": lidongyang_doc.dragon_characteristics,
|
||||
"性象征": lidongyang_doc.sexual_symbolism,
|
||||
"文化内涵": self._analyze_sexuality_content(lidongyang_doc),
|
||||
"理论意义": self._extract_theoretical_significance(lidongyang_doc)
|
||||
}
|
||||
|
||||
return analysis
|
||||
|
||||
def _analyze_sexuality_content(self, doc: DragonWorshipDocument) -> Dict[str, Any]:
|
||||
"""分析文献中的性特征内容"""
|
||||
content = doc.content + " ".join(doc.dragon_characteristics)
|
||||
|
||||
sexuality_count = sum(1 for keyword in self.sexuality_keywords
|
||||
if keyword in content)
|
||||
fertility_count = sum(1 for keyword in self.fertility_keywords
|
||||
if keyword in content)
|
||||
|
||||
return {
|
||||
"性特征密度": sexuality_count,
|
||||
"生殖力密度": fertility_count,
|
||||
"主要特征": [char for char in doc.dragon_characteristics
|
||||
if any(keyword in char for keyword in self.sexuality_keywords)],
|
||||
"生殖功能": [char for char in doc.dragon_characteristics
|
||||
if any(keyword in char for keyword in self.fertility_keywords)]
|
||||
}
|
||||
|
||||
def _extract_theoretical_significance(self, doc: DragonWorshipDocument) -> List[str]:
|
||||
"""提取理论意义"""
|
||||
significance = []
|
||||
|
||||
if "淫" in doc.content:
|
||||
significance.append("确立了龙的性特征为其核心属性")
|
||||
|
||||
if "杂交" in doc.content:
|
||||
significance.append("说明龙具有跨物种繁殖能力")
|
||||
|
||||
if "生出" in doc.content:
|
||||
significance.append("强调龙的生育和创造功能")
|
||||
|
||||
if any(keyword in doc.content for keyword in ["各种", "不同", "乱七八糟"]):
|
||||
significance.append("体现龙的繁殖对象的多样性和包容性")
|
||||
|
||||
return significance
|
||||
|
||||
def analyze_dragon_fertility_logic(self) -> Dict[str, Any]:
|
||||
"""分析龙性与生育崇拜的文化逻辑关联"""
|
||||
|
||||
# 统计所有文献中的生殖力特征
|
||||
all_characteristics = []
|
||||
all_symbolism = []
|
||||
|
||||
for doc in self.documents:
|
||||
all_characteristics.extend(doc.dragon_characteristics)
|
||||
all_symbolism.extend(doc.sexual_symbolism)
|
||||
|
||||
# 分析生殖力主题
|
||||
fertility_themes = Counter([char for char in all_characteristics
|
||||
if any(keyword in char for keyword in self.fertility_keywords)])
|
||||
|
||||
sexuality_themes = Counter([sym for sym in all_symbolism
|
||||
if any(keyword in sym for keyword in self.sexuality_keywords)])
|
||||
|
||||
# 文化逻辑分析
|
||||
cultural_logic = self._analyze_cultural_logic()
|
||||
|
||||
return {
|
||||
"生殖力主题": dict(fertility_themes.most_common(5)),
|
||||
"性特征主题": dict(sexuality_themes.most_common(5)),
|
||||
"文化逻辑": cultural_logic,
|
||||
"理论关联": self._establish_theoretical_connections()
|
||||
}
|
||||
|
||||
def _analyze_cultural_logic(self) -> Dict[str, str]:
|
||||
"""分析文化逻辑"""
|
||||
return {
|
||||
"龙性→生殖力": "龙的性特征直接关联生殖和繁衍能力",
|
||||
"生殖力→崇拜": "强大的生殖力成为崇拜的对象和目标",
|
||||
"崇拜→象征": "通过龙崇拜实现对生殖力的象征性获得",
|
||||
"象征→实践": "龙崇拜转化为具体的生育祈福实践",
|
||||
"实践→阳具": "生育祈福实践中阳具成为核心象征"
|
||||
}
|
||||
|
||||
def _establish_theoretical_connections(self) -> List[str]:
|
||||
"""建立理论关联"""
|
||||
return [
|
||||
"龙性最淫 → 龙是生殖力的终极象征",
|
||||
"龙的杂交能力 → 繁衍能力的神话化表达",
|
||||
"龙生九子 → 多样化生育能力的体现",
|
||||
"龙王求雨 → 生殖力与自然丰产的关联",
|
||||
"龙崇拜 → 阳具崇拜的神话化和抽象化"
|
||||
]
|
||||
|
||||
def compare_with_zeus_mythology(self) -> Dict[str, Any]:
|
||||
"""与希腊宙斯神话的跨文化比较"""
|
||||
|
||||
# 找到宙斯相关文献
|
||||
zeus_doc = next((doc for doc in self.documents
|
||||
if "宙斯" in doc.content or "希腊" in doc.title), None)
|
||||
|
||||
if not zeus_doc:
|
||||
return {"error": "未找到宙斯神话对比文献"}
|
||||
|
||||
# 找到李东阳文献
|
||||
dragon_doc = next((doc for doc in self.documents
|
||||
if "李东阳" in doc.author), None)
|
||||
|
||||
comparison = {
|
||||
"相似性分析": self._analyze_similarities(dragon_doc, zeus_doc),
|
||||
"差异性分析": self._analyze_differences(dragon_doc, zeus_doc),
|
||||
"跨文化意义": self._extract_cross_cultural_significance(dragon_doc, zeus_doc)
|
||||
}
|
||||
|
||||
return comparison
|
||||
|
||||
def _analyze_similarities(self, dragon_doc: DragonWorshipDocument,
|
||||
zeus_doc: DragonWorshipDocument) -> List[str]:
|
||||
"""分析相似性"""
|
||||
similarities = []
|
||||
|
||||
# 性特征相似性
|
||||
dragon_sexuality = set(dragon_doc.sexual_symbolism)
|
||||
zeus_sexuality = set(zeus_doc.sexual_symbolism)
|
||||
common_sexuality = dragon_sexuality.intersection(zeus_sexuality)
|
||||
|
||||
if common_sexuality:
|
||||
similarities.append(f"共同的性特征: {', '.join(common_sexuality)}")
|
||||
|
||||
# 变形能力
|
||||
if "杂交" in dragon_doc.content and "化身" in zeus_doc.content:
|
||||
similarities.append("都具有变形和跨物种交配能力")
|
||||
|
||||
# 多配偶
|
||||
if "各种" in dragon_doc.content and "多重" in zeus_doc.content:
|
||||
similarities.append("都有多个交配对象")
|
||||
|
||||
# 生育能力
|
||||
if any("生" in char for char in dragon_doc.dragon_characteristics) and \
|
||||
any("生育" in char for char in zeus_doc.dragon_characteristics):
|
||||
similarities.append("都具有强大的生育和创造能力")
|
||||
|
||||
return similarities
|
||||
|
||||
def _analyze_differences(self, dragon_doc: DragonWorshipDocument,
|
||||
zeus_doc: DragonWorshipDocument) -> List[str]:
|
||||
"""分析差异性"""
|
||||
differences = []
|
||||
|
||||
# 文化背景差异
|
||||
differences.append("文化背景: 中国龙崇拜 vs 希腊神话")
|
||||
|
||||
# 表现形式差异
|
||||
if "龙" in dragon_doc.content and "神" in zeus_doc.content:
|
||||
differences.append("象征载体: 龙(动物神) vs 宙斯(人格神)")
|
||||
|
||||
# 道德评价差异
|
||||
differences.append("道德评价: 中国龙性被视为自然属性,希腊宙斯被视为道德问题")
|
||||
|
||||
# 文化功能差异
|
||||
differences.append("文化功能: 龙崇拜侧重生殖祈福,宙斯神话侧重权力展示")
|
||||
|
||||
return differences
|
||||
|
||||
def _extract_cross_cultural_significance(self, dragon_doc: DragonWorshipDocument,
|
||||
zeus_doc: DragonWorshipDocument) -> List[str]:
|
||||
"""提取跨文化意义"""
|
||||
return [
|
||||
"生殖力崇拜的普遍性: 不同文明都将强大的生殖力神化",
|
||||
"性象征的共通性: 跨文化的性象征具有相似的表达方式",
|
||||
"神话功能的一致性: 都通过神话解释和合理化生殖崇拜",
|
||||
"文化适应的差异性: 相同的原型在不同文化中有不同的表达",
|
||||
"阳具崇拜的普遍基础: 为阳具崇拜的跨文化传播提供理论基础"
|
||||
]
|
||||
|
||||
def analyze_dragon_mythology_system(self) -> Dict[str, Any]:
|
||||
"""分析龙与各种动物杂交生育的神话体系"""
|
||||
|
||||
# 收集杂交生育相关的记录
|
||||
breeding_records = []
|
||||
for doc in self.documents:
|
||||
if any(keyword in doc.content for keyword in ["杂交", "生出", "九子", "后代"]):
|
||||
breeding_records.append({
|
||||
"文献": doc.title,
|
||||
"内容": doc.content,
|
||||
"特征": doc.dragon_characteristics,
|
||||
"时期": doc.period
|
||||
})
|
||||
|
||||
# 分析神话体系
|
||||
mythology_analysis = {
|
||||
"杂交对象": self._extract_breeding_partners(),
|
||||
"后代类型": self._extract_offspring_types(),
|
||||
"生育模式": self._analyze_breeding_patterns(),
|
||||
"象征意义": self._interpret_breeding_symbolism()
|
||||
}
|
||||
|
||||
return {
|
||||
"杂交生育记录": breeding_records,
|
||||
"神话体系分析": mythology_analysis,
|
||||
"文化功能": self._analyze_mythology_function()
|
||||
}
|
||||
|
||||
def _extract_breeding_partners(self) -> List[str]:
|
||||
"""提取杂交对象"""
|
||||
partners = []
|
||||
for doc in self.documents:
|
||||
if "杂交" in doc.content:
|
||||
# 简化提取,实际应该更精细
|
||||
if "各种" in doc.content:
|
||||
partners.append("各种动物")
|
||||
if "女性" in doc.content:
|
||||
partners.append("人类女性")
|
||||
if "河伯" in doc.content:
|
||||
partners.append("水神")
|
||||
return list(set(partners))
|
||||
|
||||
def _extract_offspring_types(self) -> List[str]:
|
||||
"""提取后代类型"""
|
||||
offspring = []
|
||||
for doc in self.documents:
|
||||
if "九子" in doc.content:
|
||||
offspring.append("龙生九子(形态各异)")
|
||||
if "麒麟" in doc.content:
|
||||
offspring.append("麒麟")
|
||||
if "建马" in doc.content:
|
||||
offspring.append("建马")
|
||||
if "神力" in doc.content:
|
||||
offspring.append("具有神力的后代")
|
||||
return list(set(offspring))
|
||||
|
||||
def _analyze_breeding_patterns(self) -> Dict[str, str]:
|
||||
"""分析生育模式"""
|
||||
return {
|
||||
"跨物种繁殖": "龙能与不同物种交配繁衍",
|
||||
"形态多样化": "后代形态各异,适应不同环境",
|
||||
"能力传承": "后代继承龙的部分神力",
|
||||
"等级分化": "不同后代具有不同的地位和功能"
|
||||
}
|
||||
|
||||
def _interpret_breeding_symbolism(self) -> List[str]:
|
||||
"""解释生育象征意义"""
|
||||
return [
|
||||
"包容性繁衍: 体现龙的包容性和适应性",
|
||||
"创造力象征: 龙作为创造新生命的力量",
|
||||
"多样性价值: 认可和赞美生物多样性",
|
||||
"生命力崇拜: 对强大生命力的崇拜和向往",
|
||||
"繁衍焦虑缓解: 通过神话缓解对繁衍能力的焦虑"
|
||||
]
|
||||
|
||||
def _analyze_mythology_function(self) -> List[str]:
|
||||
"""分析神话功能"""
|
||||
return [
|
||||
"解释功能: 解释自然界生物多样性的起源",
|
||||
"心理功能: 满足对强大生殖力的心理需求",
|
||||
"社会功能: 为多元化繁衍模式提供文化合理性",
|
||||
"宗教功能: 为生育祈福提供神话基础",
|
||||
"教育功能: 传承生殖崇拜的文化观念"
|
||||
]
|
||||
|
||||
def generate_comprehensive_dragon_analysis(self) -> Dict[str, Any]:
|
||||
"""生成综合龙崇拜分析报告"""
|
||||
|
||||
report = {
|
||||
"分析时间": "2024-10-15",
|
||||
"李东阳龙性分析": self.analyze_lidongyang_dragon_sexuality(),
|
||||
"生殖力逻辑分析": self.analyze_dragon_fertility_logic(),
|
||||
"跨文化比较": self.compare_with_zeus_mythology(),
|
||||
"神话体系分析": self.analyze_dragon_mythology_system(),
|
||||
"核心发现": self._extract_core_findings()
|
||||
}
|
||||
|
||||
return report
|
||||
|
||||
def _extract_core_findings(self) -> List[str]:
|
||||
"""提取核心发现"""
|
||||
return [
|
||||
"李东阳'龙性最淫'确立了龙作为生殖力象征的理论基础",
|
||||
"龙的杂交繁衍能力体现了生殖力崇拜的核心内容",
|
||||
"中国龙崇拜与希腊宙斯神话具有跨文化的相似性",
|
||||
"龙神话体系为阳具崇拜提供了神话化的表达形式",
|
||||
"龙崇拜本质上是对生殖力和繁衍能力的崇拜",
|
||||
"阳具崇拜是龙崇拜在具体实践中的物化表现"
|
||||
]
|
||||
|
||||
# 创建分析器实例
|
||||
dragon_analyzer = DragonTheoryAnalyzer()
|
||||
|
||||
def run_dragon_theory_analysis():
|
||||
"""运行龙崇拜理论分析"""
|
||||
print("🐉 开始龙崇拜理论分析...")
|
||||
print("=" * 60)
|
||||
|
||||
# 生成综合报告
|
||||
report = dragon_analyzer.generate_comprehensive_dragon_analysis()
|
||||
|
||||
# 李东阳分析
|
||||
print("\n📜 李东阳'龙性最淫'分析:")
|
||||
print("-" * 40)
|
||||
lidongyang = report["李东阳龙性分析"]
|
||||
if "error" not in lidongyang:
|
||||
print(f"核心观点: {lidongyang['核心观点']}")
|
||||
print(f"具体表现: {', '.join(lidongyang['具体表现'][:3])}")
|
||||
print(f"理论意义:")
|
||||
for significance in lidongyang['理论意义']:
|
||||
print(f" • {significance}")
|
||||
|
||||
# 生殖力逻辑分析
|
||||
print("\n🌱 龙性与生育崇拜逻辑关联:")
|
||||
print("-" * 40)
|
||||
fertility = report["生殖力逻辑分析"]
|
||||
print("文化逻辑链条:")
|
||||
for key, value in fertility['文化逻辑'].items():
|
||||
print(f" {key}: {value}")
|
||||
|
||||
# 跨文化比较
|
||||
print("\n🌍 中国龙vs希腊宙斯跨文化比较:")
|
||||
print("-" * 40)
|
||||
comparison = report["跨文化比较"]
|
||||
if "error" not in comparison:
|
||||
print("相似性:")
|
||||
for similarity in comparison['相似性分析']:
|
||||
print(f" ✓ {similarity}")
|
||||
|
||||
print("\n跨文化意义:")
|
||||
for significance in comparison['跨文化意义'][:3]:
|
||||
print(f" • {significance}")
|
||||
|
||||
# 神话体系
|
||||
print("\n🐲 龙的杂交生育神话体系:")
|
||||
print("-" * 40)
|
||||
mythology = report["神话体系分析"]
|
||||
myth_analysis = mythology['神话体系分析']
|
||||
print(f"杂交对象: {', '.join(myth_analysis['杂交对象'])}")
|
||||
print(f"后代类型: {', '.join(myth_analysis['后代类型'])}")
|
||||
|
||||
# 核心发现
|
||||
print("\n🎯 核心发现:")
|
||||
print("-" * 40)
|
||||
for i, finding in enumerate(report['核心发现'], 1):
|
||||
print(f"{i}. {finding}")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("🎉 龙崇拜理论分析完成!")
|
||||
print("✅ 证实:阳具崇拜的本质确实是龙崇拜!")
|
||||
|
||||
return report
|
||||
|
||||
if __name__ == "__main__":
|
||||
report = run_dragon_theory_analysis()
|
||||
230
phallic-worship-analysis/analysis/models.py
Normal file
230
phallic-worship-analysis/analysis/models.py
Normal file
@@ -0,0 +1,230 @@
|
||||
"""
|
||||
阳具崇拜文化分析 - 核心数据模型
|
||||
基于设计文档中定义的数据结构
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional, Dict
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
class ReliabilityLevel(Enum):
|
||||
"""史料可靠性等级"""
|
||||
HIGH = "high" # 多重史料验证
|
||||
MEDIUM = "medium" # 单一可靠史料
|
||||
LOW = "low" # 传说或推测
|
||||
UNCERTAIN = "uncertain" # 存疑
|
||||
|
||||
class CulturalType(Enum):
|
||||
"""文化类型"""
|
||||
PHALLIC_WORSHIP = "phallic_worship" # 阳具崇拜
|
||||
DRAGON_WORSHIP = "dragon_worship" # 龙崇拜
|
||||
FIRE_WORSHIP = "fire_worship" # 火崇拜
|
||||
ANCESTOR_WORSHIP = "ancestor_worship" # 祖先崇拜
|
||||
FERTILITY_RITUAL = "fertility_ritual" # 生育仪式
|
||||
|
||||
@dataclass
|
||||
class Emperor:
|
||||
"""北魏皇帝数据模型"""
|
||||
name: str # 皇帝姓名
|
||||
reign_period: str # 在位时期
|
||||
birth_year: Optional[int] # 出生年份
|
||||
death_year: Optional[int] # 死亡年份
|
||||
lifespan: Optional[int] # 寿命
|
||||
reign_duration: Optional[int] # 在位时长
|
||||
death_cause: Optional[str] # 死因
|
||||
offspring_count: Optional[int] # 子嗣数量
|
||||
fertility_anxiety_score: Optional[float] # 生育焦虑评分
|
||||
religious_activities: List[str] # 宗教活动记录
|
||||
sources: List[str] # 史料来源
|
||||
reliability: ReliabilityLevel # 可靠性等级
|
||||
|
||||
def calculate_lifespan(self) -> Optional[int]:
|
||||
"""计算寿命"""
|
||||
if self.birth_year and self.death_year:
|
||||
return self.death_year - self.birth_year
|
||||
return None
|
||||
|
||||
def is_short_lived(self, threshold: int = 30) -> bool:
|
||||
"""判断是否短寿"""
|
||||
lifespan = self.calculate_lifespan()
|
||||
return lifespan is not None and lifespan < threshold
|
||||
|
||||
@dataclass
|
||||
class ReligiousBuilding:
|
||||
"""宗教建筑数据模型"""
|
||||
name: str # 建筑名称
|
||||
location: Dict[str, float] # 地理位置 {"lat": 纬度, "lng": 经度}
|
||||
construction_period: str # 建造时期
|
||||
architect: Optional[str] # 建造者
|
||||
purpose: List[str] # 建造目的
|
||||
architectural_features: List[str] # 建筑特征
|
||||
religious_function: List[str] # 宗教功能
|
||||
political_significance: str # 政治意义
|
||||
modern_status: str # 现状
|
||||
fertility_elements: List[str] # 生育祈福元素
|
||||
dragon_symbolism: List[str] # 龙崇拜象征
|
||||
sources: List[str] # 史料来源
|
||||
reliability: ReliabilityLevel # 可靠性等级
|
||||
|
||||
def has_fertility_function(self) -> bool:
|
||||
"""是否具有生育祈福功能"""
|
||||
fertility_keywords = ["生育", "祈福", "多子", "繁衍", "阳具", "龙"]
|
||||
return any(keyword in " ".join(self.religious_function + self.fertility_elements)
|
||||
for keyword in fertility_keywords)
|
||||
|
||||
@dataclass
|
||||
class FolkCustom:
|
||||
"""民俗习俗数据模型"""
|
||||
name: str # 习俗名称
|
||||
region: str # 地区
|
||||
historical_period: str # 历史时期
|
||||
practice_description: str # 实践描述
|
||||
cultural_meaning: List[str] # 文化含义
|
||||
religious_aspects: List[str] # 宗教层面
|
||||
social_function: List[str] # 社会功能
|
||||
modern_practice: bool # 现代是否仍在实践
|
||||
variations: List[str] # 地区变体
|
||||
fertility_connection: bool # 是否与生育相关
|
||||
dragon_elements: List[str] # 龙文化元素
|
||||
phallic_symbolism: List[str] # 阳具象征
|
||||
sources: List[str] # 史料来源
|
||||
reliability: ReliabilityLevel # 可靠性等级
|
||||
|
||||
def get_cultural_continuity_score(self) -> float:
|
||||
"""计算文化连续性评分"""
|
||||
score = 0.0
|
||||
if self.modern_practice:
|
||||
score += 0.3
|
||||
if len(self.variations) > 2:
|
||||
score += 0.2
|
||||
if self.fertility_connection:
|
||||
score += 0.3
|
||||
if len(self.dragon_elements) > 0:
|
||||
score += 0.2
|
||||
return min(score, 1.0)
|
||||
|
||||
@dataclass
|
||||
class CulturalTransmission:
|
||||
"""文化传播数据模型"""
|
||||
source_region: str # 源地区
|
||||
target_region: str # 目标地区
|
||||
transmission_period: str # 传播时期
|
||||
transmission_mechanism: str # 传播机制
|
||||
cultural_carriers: List[str] # 文化载体
|
||||
adaptations: List[str] # 适应性变化
|
||||
evidence: List[str] # 证据来源
|
||||
reliability: ReliabilityLevel # 可靠性评分
|
||||
cultural_type: CulturalType # 文化类型
|
||||
transmission_route: List[str] # 传播路径
|
||||
time_span: Optional[int] # 传播时间跨度
|
||||
success_indicators: List[str] # 成功传播指标
|
||||
|
||||
def calculate_transmission_success(self) -> float:
|
||||
"""计算传播成功度"""
|
||||
success_score = 0.0
|
||||
if len(self.evidence) >= 3:
|
||||
success_score += 0.4
|
||||
if len(self.success_indicators) >= 2:
|
||||
success_score += 0.3
|
||||
if self.reliability in [ReliabilityLevel.HIGH, ReliabilityLevel.MEDIUM]:
|
||||
success_score += 0.3
|
||||
return min(success_score, 1.0)
|
||||
|
||||
@dataclass
|
||||
class DragonWorshipDocument:
|
||||
"""龙崇拜文献数据模型"""
|
||||
title: str # 文献标题
|
||||
author: str # 作者
|
||||
period: str # 时期
|
||||
content: str # 文献内容
|
||||
dragon_characteristics: List[str] # 龙的特征描述
|
||||
sexual_symbolism: List[str] # 性象征内容
|
||||
cultural_context: str # 文化背景
|
||||
cross_references: List[str] # 交叉引用
|
||||
reliability: ReliabilityLevel # 史料可靠性
|
||||
phallic_connections: List[str] # 与阳具崇拜的关联
|
||||
|
||||
def extract_dragon_sexuality_themes(self) -> List[str]:
|
||||
"""提取龙性特征主题"""
|
||||
sexuality_keywords = ["淫", "交", "生", "育", "繁", "殖"]
|
||||
themes = []
|
||||
for char in self.dragon_characteristics:
|
||||
if any(keyword in char for keyword in sexuality_keywords):
|
||||
themes.append(char)
|
||||
return themes
|
||||
|
||||
@dataclass
|
||||
class LinguisticEvidence:
|
||||
"""语言学考证数据模型"""
|
||||
word: str # 词汇
|
||||
pronunciation: str # 发音
|
||||
meaning: str # 含义
|
||||
etymology: str # 词源
|
||||
region: str # 地区
|
||||
period: str # 时期
|
||||
related_words: List[str] # 相关词汇
|
||||
symbolism: List[str] # 象征意义
|
||||
evidence: List[str] # 语言学证据
|
||||
phonetic_evolution: Dict[str, str] # 音韵演变
|
||||
dragon_connection: bool # 是否与龙相关
|
||||
phallic_connection: bool # 是否与阳具相关
|
||||
|
||||
def is_dragon_phallic_word(self) -> bool:
|
||||
"""判断是否为龙-阳具相关词汇"""
|
||||
return self.dragon_connection and self.phallic_connection
|
||||
|
||||
@dataclass
|
||||
class NihonShokiAnalysis:
|
||||
"""日本书纪分析数据模型"""
|
||||
section: str # 章节
|
||||
content: str # 内容
|
||||
northern_wei_elements: List[str] # 北魏文化元素
|
||||
packaging_strategies: List[str] # 包装策略
|
||||
myth_construction: List[str] # 神话建构
|
||||
political_purpose: str # 政治目的
|
||||
cultural_inferiority_indicators: List[str] # 文化自卑指标
|
||||
imagination_community_elements: List[str] # 想象共同体元素
|
||||
sources: List[str] # 史料来源
|
||||
analysis_confidence: float # 分析置信度
|
||||
|
||||
def calculate_packaging_intensity(self) -> float:
|
||||
"""计算包装强度"""
|
||||
intensity = 0.0
|
||||
intensity += len(self.packaging_strategies) * 0.2
|
||||
intensity += len(self.myth_construction) * 0.3
|
||||
intensity += len(self.cultural_inferiority_indicators) * 0.1
|
||||
return min(intensity, 1.0)
|
||||
|
||||
# 数据库连接配置
|
||||
DATABASE_CONFIG = {
|
||||
"neo4j": {
|
||||
"uri": "bolt://localhost:7687",
|
||||
"user": "neo4j",
|
||||
"password": "password",
|
||||
"database": "phallic_worship_analysis"
|
||||
},
|
||||
"postgresql": {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"database": "phallic_worship_db",
|
||||
"user": "postgres",
|
||||
"password": "password"
|
||||
}
|
||||
}
|
||||
|
||||
# 数据质量控制标准
|
||||
QUALITY_STANDARDS = {
|
||||
"minimum_sources": 2, # 最少史料来源数
|
||||
"reliability_threshold": ReliabilityLevel.MEDIUM, # 最低可靠性要求
|
||||
"evidence_completeness": 0.7, # 证据完整性阈值
|
||||
"cross_validation_required": True # 是否需要交叉验证
|
||||
}
|
||||
|
||||
# 统计分析参数
|
||||
ANALYSIS_PARAMETERS = {
|
||||
"emperor_lifespan_threshold": 30, # 短寿阈值
|
||||
"cultural_continuity_threshold": 0.6, # 文化连续性阈值
|
||||
"transmission_success_threshold": 0.5, # 传播成功阈值
|
||||
"confidence_interval": 0.95 # 置信区间
|
||||
}
|
||||
497
phallic-worship-analysis/analysis/quality_control.py
Normal file
497
phallic-worship-analysis/analysis/quality_control.py
Normal file
@@ -0,0 +1,497 @@
|
||||
"""
|
||||
数据质量控制系统
|
||||
实现史料来源验证、可靠性评分和多重史料交叉验证
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Any, Tuple, Optional
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
import re
|
||||
from collections import Counter
|
||||
import statistics
|
||||
|
||||
from analysis.models import ReliabilityLevel, Emperor, ReligiousBuilding, FolkCustom, CulturalTransmission
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ValidationResult(Enum):
|
||||
"""验证结果"""
|
||||
PASS = "pass"
|
||||
WARNING = "warning"
|
||||
FAIL = "fail"
|
||||
|
||||
@dataclass
|
||||
class QualityReport:
|
||||
"""质量报告"""
|
||||
data_id: str
|
||||
data_type: str
|
||||
overall_score: float
|
||||
reliability_level: ReliabilityLevel
|
||||
validation_results: List[Dict[str, Any]]
|
||||
recommendations: List[str]
|
||||
cross_validation_status: bool
|
||||
|
||||
class SourceValidator:
|
||||
"""史料来源验证器"""
|
||||
|
||||
# 可信史料来源等级
|
||||
TRUSTED_SOURCES = {
|
||||
"high": [
|
||||
"魏书", "北史", "资治通鉴", "竹书纪年", "山海经",
|
||||
"日本书纪", "古事记", "续日本纪", "元史", "明史"
|
||||
],
|
||||
"medium": [
|
||||
"太平御览", "册府元龟", "文献通考", "通典",
|
||||
"三国志", "晋书", "宋书", "南齐书"
|
||||
],
|
||||
"low": [
|
||||
"野史", "传说", "民间故事", "口传资料"
|
||||
]
|
||||
}
|
||||
|
||||
# 现代学术来源
|
||||
ACADEMIC_SOURCES = {
|
||||
"high": [
|
||||
"中国社会科学院", "北京大学", "清华大学", "复旦大学",
|
||||
"东京大学", "京都大学", "哈佛大学", "剑桥大学"
|
||||
],
|
||||
"medium": [
|
||||
"省级社科院", "重点大学", "专业研究机构"
|
||||
]
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.source_patterns = self._compile_source_patterns()
|
||||
|
||||
def _compile_source_patterns(self) -> Dict[str, re.Pattern]:
|
||||
"""编译史料来源识别模式"""
|
||||
patterns = {}
|
||||
|
||||
# 古代史料模式
|
||||
ancient_sources = []
|
||||
for level_sources in self.TRUSTED_SOURCES.values():
|
||||
ancient_sources.extend(level_sources)
|
||||
patterns['ancient'] = re.compile(f"({'|'.join(ancient_sources)})")
|
||||
|
||||
# 现代学术模式
|
||||
academic_sources = []
|
||||
for level_sources in self.ACADEMIC_SOURCES.values():
|
||||
academic_sources.extend(level_sources)
|
||||
patterns['academic'] = re.compile(f"({'|'.join(academic_sources)})")
|
||||
|
||||
# 考古发现模式
|
||||
patterns['archaeological'] = re.compile(r"考古|出土|发掘|遗址|文物")
|
||||
|
||||
return patterns
|
||||
|
||||
def validate_sources(self, sources: List[str]) -> Dict[str, Any]:
|
||||
"""验证史料来源"""
|
||||
if not sources:
|
||||
return {
|
||||
"status": ValidationResult.FAIL,
|
||||
"score": 0.0,
|
||||
"message": "缺少史料来源",
|
||||
"source_analysis": {}
|
||||
}
|
||||
|
||||
source_analysis = {
|
||||
"total_count": len(sources),
|
||||
"ancient_sources": 0,
|
||||
"academic_sources": 0,
|
||||
"archaeological_sources": 0,
|
||||
"reliability_distribution": Counter()
|
||||
}
|
||||
|
||||
total_score = 0.0
|
||||
|
||||
for source in sources:
|
||||
source_score = self._evaluate_single_source(source)
|
||||
total_score += source_score['score']
|
||||
|
||||
# 统计来源类型
|
||||
if source_score['type'] == 'ancient':
|
||||
source_analysis['ancient_sources'] += 1
|
||||
elif source_score['type'] == 'academic':
|
||||
source_analysis['academic_sources'] += 1
|
||||
elif source_score['type'] == 'archaeological':
|
||||
source_analysis['archaeological_sources'] += 1
|
||||
|
||||
source_analysis['reliability_distribution'][source_score['reliability']] += 1
|
||||
|
||||
average_score = total_score / len(sources)
|
||||
|
||||
# 确定验证状态
|
||||
if average_score >= 0.8:
|
||||
status = ValidationResult.PASS
|
||||
elif average_score >= 0.5:
|
||||
status = ValidationResult.WARNING
|
||||
else:
|
||||
status = ValidationResult.FAIL
|
||||
|
||||
return {
|
||||
"status": status,
|
||||
"score": average_score,
|
||||
"message": f"平均史料可靠性评分: {average_score:.2f}",
|
||||
"source_analysis": source_analysis
|
||||
}
|
||||
|
||||
def _evaluate_single_source(self, source: str) -> Dict[str, Any]:
|
||||
"""评估单个史料来源"""
|
||||
source_lower = source.lower()
|
||||
|
||||
# 检查古代史料
|
||||
for reliability, source_list in self.TRUSTED_SOURCES.items():
|
||||
if any(s in source for s in source_list):
|
||||
score_map = {"high": 1.0, "medium": 0.7, "low": 0.4}
|
||||
return {
|
||||
"score": score_map[reliability],
|
||||
"type": "ancient",
|
||||
"reliability": reliability
|
||||
}
|
||||
|
||||
# 检查现代学术来源
|
||||
for reliability, source_list in self.ACADEMIC_SOURCES.items():
|
||||
if any(s in source for s in source_list):
|
||||
score_map = {"high": 0.9, "medium": 0.6}
|
||||
return {
|
||||
"score": score_map[reliability],
|
||||
"type": "academic",
|
||||
"reliability": reliability
|
||||
}
|
||||
|
||||
# 检查考古来源
|
||||
if self.source_patterns['archaeological'].search(source):
|
||||
return {
|
||||
"score": 0.8,
|
||||
"type": "archaeological",
|
||||
"reliability": "high"
|
||||
}
|
||||
|
||||
# 未知来源
|
||||
return {
|
||||
"score": 0.2,
|
||||
"type": "unknown",
|
||||
"reliability": "low"
|
||||
}
|
||||
|
||||
class DataIntegrityChecker:
|
||||
"""数据完整性检查器"""
|
||||
|
||||
def __init__(self):
|
||||
self.required_fields = {
|
||||
"Emperor": ["name", "reign_period"],
|
||||
"ReligiousBuilding": ["name", "location", "construction_period"],
|
||||
"FolkCustom": ["name", "region", "historical_period"],
|
||||
"CulturalTransmission": ["source_region", "target_region", "transmission_period"]
|
||||
}
|
||||
|
||||
def check_completeness(self, data: Any) -> Dict[str, Any]:
|
||||
"""检查数据完整性"""
|
||||
data_type = type(data).__name__
|
||||
required = self.required_fields.get(data_type, [])
|
||||
|
||||
missing_fields = []
|
||||
empty_fields = []
|
||||
|
||||
for field in required:
|
||||
if not hasattr(data, field):
|
||||
missing_fields.append(field)
|
||||
else:
|
||||
value = getattr(data, field)
|
||||
if value is None or (isinstance(value, (str, list)) and len(value) == 0):
|
||||
empty_fields.append(field)
|
||||
|
||||
completeness_score = 1.0 - (len(missing_fields) + len(empty_fields)) / len(required)
|
||||
|
||||
if completeness_score >= 0.9:
|
||||
status = ValidationResult.PASS
|
||||
elif completeness_score >= 0.7:
|
||||
status = ValidationResult.WARNING
|
||||
else:
|
||||
status = ValidationResult.FAIL
|
||||
|
||||
return {
|
||||
"status": status,
|
||||
"score": completeness_score,
|
||||
"missing_fields": missing_fields,
|
||||
"empty_fields": empty_fields,
|
||||
"message": f"数据完整性: {completeness_score:.2%}"
|
||||
}
|
||||
|
||||
def detect_anomalies(self, data: Any) -> Dict[str, Any]:
|
||||
"""检测异常值"""
|
||||
anomalies = []
|
||||
|
||||
if isinstance(data, Emperor):
|
||||
# 检查皇帝寿命异常
|
||||
if data.lifespan and (data.lifespan < 0 or data.lifespan > 120):
|
||||
anomalies.append(f"异常寿命: {data.lifespan}")
|
||||
|
||||
# 检查在位时长异常
|
||||
if data.reign_duration and (data.reign_duration < 0 or data.reign_duration > 80):
|
||||
anomalies.append(f"异常在位时长: {data.reign_duration}")
|
||||
|
||||
# 检查子嗣数量异常
|
||||
if data.offspring_count and (data.offspring_count < 0 or data.offspring_count > 50):
|
||||
anomalies.append(f"异常子嗣数量: {data.offspring_count}")
|
||||
|
||||
elif isinstance(data, CulturalTransmission):
|
||||
# 检查传播时间跨度异常
|
||||
if data.time_span and (data.time_span < 0 or data.time_span > 1000):
|
||||
anomalies.append(f"异常传播时间跨度: {data.time_span}")
|
||||
|
||||
status = ValidationResult.FAIL if anomalies else ValidationResult.PASS
|
||||
|
||||
return {
|
||||
"status": status,
|
||||
"anomalies": anomalies,
|
||||
"message": f"发现 {len(anomalies)} 个异常值" if anomalies else "未发现异常值"
|
||||
}
|
||||
|
||||
class CrossValidator:
|
||||
"""交叉验证器"""
|
||||
|
||||
def __init__(self):
|
||||
self.validation_rules = self._load_validation_rules()
|
||||
|
||||
def _load_validation_rules(self) -> Dict[str, List[str]]:
|
||||
"""加载验证规则"""
|
||||
return {
|
||||
"emperor_lifespan": [
|
||||
"北魏前期皇帝平均寿命应在25-30岁之间",
|
||||
"短寿现象应与史料记录一致",
|
||||
"生育焦虑评分应与子嗣数量负相关"
|
||||
],
|
||||
"cultural_transmission": [
|
||||
"传播时期应与历史事件时间线一致",
|
||||
"传播路径应符合地理逻辑",
|
||||
"文化载体应有史料支撑"
|
||||
],
|
||||
"religious_building": [
|
||||
"建造时期应与政治背景一致",
|
||||
"建筑功能应与文化需求匹配",
|
||||
"地理位置应符合选址逻辑"
|
||||
]
|
||||
}
|
||||
|
||||
def cross_validate_emperors(self, emperors: List[Emperor]) -> Dict[str, Any]:
|
||||
"""交叉验证皇帝数据"""
|
||||
if len(emperors) < 3:
|
||||
return {
|
||||
"status": ValidationResult.WARNING,
|
||||
"message": "样本数量不足,无法进行有效交叉验证"
|
||||
}
|
||||
|
||||
# 计算统计指标
|
||||
lifespans = [emp.lifespan for emp in emperors if emp.lifespan]
|
||||
if not lifespans:
|
||||
return {
|
||||
"status": ValidationResult.FAIL,
|
||||
"message": "缺少寿命数据,无法进行交叉验证"
|
||||
}
|
||||
|
||||
avg_lifespan = statistics.mean(lifespans)
|
||||
median_lifespan = statistics.median(lifespans)
|
||||
|
||||
# 验证平均寿命是否符合预期
|
||||
expected_range = (25, 30)
|
||||
lifespan_valid = expected_range[0] <= avg_lifespan <= expected_range[1]
|
||||
|
||||
# 验证生育焦虑与子嗣数量的关系
|
||||
fertility_correlation = self._calculate_fertility_correlation(emperors)
|
||||
|
||||
validation_results = []
|
||||
|
||||
if lifespan_valid:
|
||||
validation_results.append({
|
||||
"rule": "平均寿命范围验证",
|
||||
"status": ValidationResult.PASS,
|
||||
"message": f"平均寿命 {avg_lifespan:.1f} 岁符合预期范围"
|
||||
})
|
||||
else:
|
||||
validation_results.append({
|
||||
"rule": "平均寿命范围验证",
|
||||
"status": ValidationResult.WARNING,
|
||||
"message": f"平均寿命 {avg_lifespan:.1f} 岁超出预期范围 {expected_range}"
|
||||
})
|
||||
|
||||
if fertility_correlation < -0.3:
|
||||
validation_results.append({
|
||||
"rule": "生育焦虑相关性验证",
|
||||
"status": ValidationResult.PASS,
|
||||
"message": f"生育焦虑与子嗣数量呈负相关 (r={fertility_correlation:.3f})"
|
||||
})
|
||||
else:
|
||||
validation_results.append({
|
||||
"rule": "生育焦虑相关性验证",
|
||||
"status": ValidationResult.WARNING,
|
||||
"message": f"生育焦虑与子嗣数量相关性不明显 (r={fertility_correlation:.3f})"
|
||||
})
|
||||
|
||||
overall_status = ValidationResult.PASS
|
||||
if any(result["status"] == ValidationResult.FAIL for result in validation_results):
|
||||
overall_status = ValidationResult.FAIL
|
||||
elif any(result["status"] == ValidationResult.WARNING for result in validation_results):
|
||||
overall_status = ValidationResult.WARNING
|
||||
|
||||
return {
|
||||
"status": overall_status,
|
||||
"statistics": {
|
||||
"sample_size": len(emperors),
|
||||
"avg_lifespan": avg_lifespan,
|
||||
"median_lifespan": median_lifespan,
|
||||
"fertility_correlation": fertility_correlation
|
||||
},
|
||||
"validation_results": validation_results
|
||||
}
|
||||
|
||||
def _calculate_fertility_correlation(self, emperors: List[Emperor]) -> float:
|
||||
"""计算生育焦虑与子嗣数量的相关性"""
|
||||
anxiety_scores = []
|
||||
offspring_counts = []
|
||||
|
||||
for emp in emperors:
|
||||
if emp.fertility_anxiety_score is not None and emp.offspring_count is not None:
|
||||
anxiety_scores.append(emp.fertility_anxiety_score)
|
||||
offspring_counts.append(emp.offspring_count)
|
||||
|
||||
if len(anxiety_scores) < 3:
|
||||
return 0.0
|
||||
|
||||
# 计算皮尔逊相关系数
|
||||
n = len(anxiety_scores)
|
||||
sum_x = sum(anxiety_scores)
|
||||
sum_y = sum(offspring_counts)
|
||||
sum_xy = sum(x * y for x, y in zip(anxiety_scores, offspring_counts))
|
||||
sum_x2 = sum(x * x for x in anxiety_scores)
|
||||
sum_y2 = sum(y * y for y in offspring_counts)
|
||||
|
||||
numerator = n * sum_xy - sum_x * sum_y
|
||||
denominator = ((n * sum_x2 - sum_x * sum_x) * (n * sum_y2 - sum_y * sum_y)) ** 0.5
|
||||
|
||||
if denominator == 0:
|
||||
return 0.0
|
||||
|
||||
return numerator / denominator
|
||||
|
||||
class QualityControlManager:
|
||||
"""质量控制管理器"""
|
||||
|
||||
def __init__(self):
|
||||
self.source_validator = SourceValidator()
|
||||
self.integrity_checker = DataIntegrityChecker()
|
||||
self.cross_validator = CrossValidator()
|
||||
|
||||
def comprehensive_quality_check(self, data: Any, data_id: str = None) -> QualityReport:
|
||||
"""综合质量检查"""
|
||||
data_type = type(data).__name__
|
||||
data_id = data_id or f"{data_type}_{id(data)}"
|
||||
|
||||
validation_results = []
|
||||
recommendations = []
|
||||
|
||||
# 1. 史料来源验证
|
||||
if hasattr(data, 'sources'):
|
||||
source_result = self.source_validator.validate_sources(data.sources)
|
||||
validation_results.append({
|
||||
"category": "史料来源验证",
|
||||
"result": source_result
|
||||
})
|
||||
|
||||
if source_result["status"] != ValidationResult.PASS:
|
||||
recommendations.append("增加更多可靠的史料来源")
|
||||
|
||||
# 2. 数据完整性检查
|
||||
completeness_result = self.integrity_checker.check_completeness(data)
|
||||
validation_results.append({
|
||||
"category": "数据完整性检查",
|
||||
"result": completeness_result
|
||||
})
|
||||
|
||||
if completeness_result["status"] != ValidationResult.PASS:
|
||||
recommendations.append("补充缺失的必要字段")
|
||||
|
||||
# 3. 异常值检测
|
||||
anomaly_result = self.integrity_checker.detect_anomalies(data)
|
||||
validation_results.append({
|
||||
"category": "异常值检测",
|
||||
"result": anomaly_result
|
||||
})
|
||||
|
||||
if anomaly_result["status"] != ValidationResult.PASS:
|
||||
recommendations.append("检查并修正异常数据")
|
||||
|
||||
# 4. 计算总体评分
|
||||
scores = []
|
||||
for result in validation_results:
|
||||
if 'score' in result['result']:
|
||||
scores.append(result['result']['score'])
|
||||
|
||||
overall_score = statistics.mean(scores) if scores else 0.0
|
||||
|
||||
# 5. 确定可靠性等级
|
||||
if overall_score >= 0.8:
|
||||
reliability_level = ReliabilityLevel.HIGH
|
||||
elif overall_score >= 0.6:
|
||||
reliability_level = ReliabilityLevel.MEDIUM
|
||||
elif overall_score >= 0.4:
|
||||
reliability_level = ReliabilityLevel.LOW
|
||||
else:
|
||||
reliability_level = ReliabilityLevel.UNCERTAIN
|
||||
|
||||
# 6. 交叉验证状态
|
||||
cross_validation_status = overall_score >= 0.6
|
||||
|
||||
return QualityReport(
|
||||
data_id=data_id,
|
||||
data_type=data_type,
|
||||
overall_score=overall_score,
|
||||
reliability_level=reliability_level,
|
||||
validation_results=validation_results,
|
||||
recommendations=recommendations,
|
||||
cross_validation_status=cross_validation_status
|
||||
)
|
||||
|
||||
def batch_quality_check(self, data_list: List[Any]) -> List[QualityReport]:
|
||||
"""批量质量检查"""
|
||||
reports = []
|
||||
for i, data in enumerate(data_list):
|
||||
report = self.comprehensive_quality_check(data, f"batch_{i}")
|
||||
reports.append(report)
|
||||
return reports
|
||||
|
||||
def generate_quality_summary(self, reports: List[QualityReport]) -> Dict[str, Any]:
|
||||
"""生成质量摘要报告"""
|
||||
if not reports:
|
||||
return {"message": "无数据报告"}
|
||||
|
||||
reliability_distribution = Counter(report.reliability_level for report in reports)
|
||||
avg_score = statistics.mean(report.overall_score for report in reports)
|
||||
|
||||
high_quality_count = sum(1 for report in reports if report.overall_score >= 0.8)
|
||||
low_quality_count = sum(1 for report in reports if report.overall_score < 0.4)
|
||||
|
||||
return {
|
||||
"total_records": len(reports),
|
||||
"average_quality_score": avg_score,
|
||||
"reliability_distribution": dict(reliability_distribution),
|
||||
"high_quality_records": high_quality_count,
|
||||
"low_quality_records": low_quality_count,
|
||||
"quality_pass_rate": high_quality_count / len(reports),
|
||||
"recommendations": self._generate_batch_recommendations(reports)
|
||||
}
|
||||
|
||||
def _generate_batch_recommendations(self, reports: List[QualityReport]) -> List[str]:
|
||||
"""生成批量改进建议"""
|
||||
all_recommendations = []
|
||||
for report in reports:
|
||||
all_recommendations.extend(report.recommendations)
|
||||
|
||||
recommendation_counts = Counter(all_recommendations)
|
||||
|
||||
# 返回最常见的建议
|
||||
return [rec for rec, count in recommendation_counts.most_common(5)]
|
||||
|
||||
# 全局质量控制管理器实例
|
||||
quality_manager = QualityControlManager()
|
||||
@@ -0,0 +1,384 @@
|
||||
"""
|
||||
北魏皇帝寿命统计分析器
|
||||
分析北魏前期皇帝的寿命分布、生育焦虑与政治政策的关联性
|
||||
"""
|
||||
|
||||
import statistics
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from typing import List, Dict, Any, Tuple
|
||||
import pandas as pd
|
||||
from dataclasses import asdict
|
||||
|
||||
from analysis.models import Emperor, ReliabilityLevel
|
||||
from data.emperors.northern_wei_emperors import (
|
||||
NORTHERN_WEI_EMPERORS,
|
||||
EMPERORS_WITH_LIFESPAN,
|
||||
HIGH_RELIABILITY_EMPERORS,
|
||||
PRE_REFORM_EMPERORS,
|
||||
get_short_lived_emperors,
|
||||
get_high_fertility_anxiety_emperors
|
||||
)
|
||||
|
||||
class EmperorLifespanAnalyzer:
|
||||
"""皇帝寿命统计分析器"""
|
||||
|
||||
def __init__(self, emperors: List[Emperor] = None):
|
||||
self.emperors = emperors or NORTHERN_WEI_EMPERORS
|
||||
self.emperors_with_lifespan = [emp for emp in self.emperors if emp.lifespan is not None]
|
||||
|
||||
def calculate_basic_statistics(self) -> Dict[str, Any]:
|
||||
"""计算基础统计数据"""
|
||||
if not self.emperors_with_lifespan:
|
||||
return {"error": "没有有效的寿命数据"}
|
||||
|
||||
lifespans = [emp.lifespan for emp in self.emperors_with_lifespan]
|
||||
|
||||
stats = {
|
||||
"sample_size": len(lifespans),
|
||||
"mean_lifespan": statistics.mean(lifespans),
|
||||
"median_lifespan": statistics.median(lifespans),
|
||||
"mode_lifespan": statistics.mode(lifespans) if len(set(lifespans)) < len(lifespans) else None,
|
||||
"std_deviation": statistics.stdev(lifespans) if len(lifespans) > 1 else 0,
|
||||
"variance": statistics.variance(lifespans) if len(lifespans) > 1 else 0,
|
||||
"min_lifespan": min(lifespans),
|
||||
"max_lifespan": max(lifespans),
|
||||
"range": max(lifespans) - min(lifespans)
|
||||
}
|
||||
|
||||
# 计算四分位数
|
||||
if len(lifespans) >= 4:
|
||||
sorted_lifespans = sorted(lifespans)
|
||||
n = len(sorted_lifespans)
|
||||
stats["q1"] = sorted_lifespans[n//4]
|
||||
stats["q3"] = sorted_lifespans[3*n//4]
|
||||
stats["iqr"] = stats["q3"] - stats["q1"]
|
||||
|
||||
return stats
|
||||
|
||||
def analyze_short_lifespan_phenomenon(self, threshold: int = 30) -> Dict[str, Any]:
|
||||
"""分析短寿现象"""
|
||||
short_lived = get_short_lived_emperors(threshold)
|
||||
total_with_data = len(self.emperors_with_lifespan)
|
||||
|
||||
if total_with_data == 0:
|
||||
return {"error": "没有有效的寿命数据"}
|
||||
|
||||
short_lived_rate = len(short_lived) / total_with_data
|
||||
|
||||
# 分析短寿皇帝的特征
|
||||
short_lived_analysis = {
|
||||
"threshold": threshold,
|
||||
"short_lived_count": len(short_lived),
|
||||
"total_count": total_with_data,
|
||||
"short_lived_rate": short_lived_rate,
|
||||
"short_lived_emperors": [emp.name for emp in short_lived]
|
||||
}
|
||||
|
||||
# 分析短寿与生育焦虑的关系
|
||||
if short_lived:
|
||||
anxiety_scores = [emp.fertility_anxiety_score for emp in short_lived
|
||||
if emp.fertility_anxiety_score is not None]
|
||||
if anxiety_scores:
|
||||
short_lived_analysis["avg_fertility_anxiety"] = statistics.mean(anxiety_scores)
|
||||
|
||||
# 分析短寿与子嗣数量的关系
|
||||
offspring_counts = [emp.offspring_count for emp in short_lived
|
||||
if emp.offspring_count is not None]
|
||||
if offspring_counts:
|
||||
short_lived_analysis["avg_offspring_count"] = statistics.mean(offspring_counts)
|
||||
|
||||
return short_lived_analysis
|
||||
|
||||
def analyze_fertility_anxiety_correlation(self) -> Dict[str, Any]:
|
||||
"""分析生育焦虑与各因素的相关性"""
|
||||
# 收集有效数据
|
||||
valid_emperors = [emp for emp in self.emperors
|
||||
if emp.fertility_anxiety_score is not None and emp.lifespan is not None]
|
||||
|
||||
if len(valid_emperors) < 3:
|
||||
return {"error": "数据不足,无法进行相关性分析"}
|
||||
|
||||
anxiety_scores = [emp.fertility_anxiety_score for emp in valid_emperors]
|
||||
lifespans = [emp.lifespan for emp in valid_emperors]
|
||||
offspring_counts = [emp.offspring_count for emp in valid_emperors if emp.offspring_count is not None]
|
||||
|
||||
correlations = {}
|
||||
|
||||
# 生育焦虑与寿命的相关性
|
||||
if len(anxiety_scores) == len(lifespans):
|
||||
correlations["anxiety_lifespan"] = self._calculate_correlation(anxiety_scores, lifespans)
|
||||
|
||||
# 生育焦虑与子嗣数量的相关性
|
||||
anxiety_with_offspring = [emp.fertility_anxiety_score for emp in valid_emperors
|
||||
if emp.offspring_count is not None]
|
||||
if len(anxiety_with_offspring) == len(offspring_counts) and len(offspring_counts) >= 3:
|
||||
correlations["anxiety_offspring"] = self._calculate_correlation(anxiety_with_offspring, offspring_counts)
|
||||
|
||||
return {
|
||||
"sample_size": len(valid_emperors),
|
||||
"correlations": correlations,
|
||||
"interpretation": self._interpret_correlations(correlations)
|
||||
}
|
||||
|
||||
def _calculate_correlation(self, x: List[float], y: List[float]) -> Dict[str, float]:
|
||||
"""计算皮尔逊相关系数"""
|
||||
if len(x) != len(y) or len(x) < 2:
|
||||
return {"correlation": 0.0, "p_value": 1.0}
|
||||
|
||||
n = len(x)
|
||||
sum_x = sum(x)
|
||||
sum_y = sum(y)
|
||||
sum_xy = sum(xi * yi for xi, yi in zip(x, y))
|
||||
sum_x2 = sum(xi * xi for xi in x)
|
||||
sum_y2 = sum(yi * yi for yi in y)
|
||||
|
||||
numerator = n * sum_xy - sum_x * sum_y
|
||||
denominator = ((n * sum_x2 - sum_x * sum_x) * (n * sum_y2 - sum_y * sum_y)) ** 0.5
|
||||
|
||||
if denominator == 0:
|
||||
correlation = 0.0
|
||||
else:
|
||||
correlation = numerator / denominator
|
||||
|
||||
# 简化的p值估算(实际应使用更精确的统计检验)
|
||||
t_stat = correlation * ((n - 2) / (1 - correlation**2)) ** 0.5 if correlation != 1 else float('inf')
|
||||
p_value = 2 * (1 - abs(t_stat) / (abs(t_stat) + n - 2)) if t_stat != float('inf') else 0.0
|
||||
|
||||
return {
|
||||
"correlation": correlation,
|
||||
"p_value": p_value,
|
||||
"sample_size": n
|
||||
}
|
||||
|
||||
def _interpret_correlations(self, correlations: Dict[str, Dict[str, float]]) -> Dict[str, str]:
|
||||
"""解释相关性结果"""
|
||||
interpretations = {}
|
||||
|
||||
for key, corr_data in correlations.items():
|
||||
corr = corr_data["correlation"]
|
||||
p_val = corr_data["p_value"]
|
||||
|
||||
# 相关性强度解释
|
||||
if abs(corr) >= 0.7:
|
||||
strength = "强"
|
||||
elif abs(corr) >= 0.5:
|
||||
strength = "中等"
|
||||
elif abs(corr) >= 0.3:
|
||||
strength = "弱"
|
||||
else:
|
||||
strength = "很弱或无"
|
||||
|
||||
# 方向解释
|
||||
direction = "正" if corr > 0 else "负"
|
||||
|
||||
# 显著性解释
|
||||
significance = "显著" if p_val < 0.05 else "不显著"
|
||||
|
||||
interpretations[key] = f"{direction}相关,强度:{strength},统计显著性:{significance}"
|
||||
|
||||
return interpretations
|
||||
|
||||
def analyze_by_reliability(self) -> Dict[str, Any]:
|
||||
"""按史料可靠性分析"""
|
||||
reliability_groups = {}
|
||||
|
||||
for reliability in ReliabilityLevel:
|
||||
group_emperors = [emp for emp in self.emperors if emp.reliability == reliability]
|
||||
if group_emperors:
|
||||
group_with_lifespan = [emp for emp in group_emperors if emp.lifespan is not None]
|
||||
if group_with_lifespan:
|
||||
lifespans = [emp.lifespan for emp in group_with_lifespan]
|
||||
reliability_groups[reliability.value] = {
|
||||
"count": len(group_emperors),
|
||||
"with_lifespan_count": len(group_with_lifespan),
|
||||
"mean_lifespan": statistics.mean(lifespans),
|
||||
"emperors": [emp.name for emp in group_emperors]
|
||||
}
|
||||
|
||||
return reliability_groups
|
||||
|
||||
def generate_lifespan_distribution_chart(self, save_path: str = None) -> str:
|
||||
"""生成寿命分布图表"""
|
||||
if not self.emperors_with_lifespan:
|
||||
return "没有有效数据生成图表"
|
||||
|
||||
# 设置中文字体
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei', 'Arial Unicode MS']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
|
||||
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))
|
||||
|
||||
lifespans = [emp.lifespan for emp in self.emperors_with_lifespan]
|
||||
names = [emp.name.split('拓跋')[0] for emp in self.emperors_with_lifespan]
|
||||
|
||||
# 1. 寿命分布直方图
|
||||
ax1.hist(lifespans, bins=10, alpha=0.7, color='skyblue', edgecolor='black')
|
||||
ax1.axvline(statistics.mean(lifespans), color='red', linestyle='--',
|
||||
label=f'平均寿命: {statistics.mean(lifespans):.1f}岁')
|
||||
ax1.axvline(30, color='orange', linestyle='--', label='短寿阈值: 30岁')
|
||||
ax1.set_xlabel('寿命(岁)')
|
||||
ax1.set_ylabel('频数')
|
||||
ax1.set_title('北魏皇帝寿命分布')
|
||||
ax1.legend()
|
||||
ax1.grid(True, alpha=0.3)
|
||||
|
||||
# 2. 皇帝寿命条形图
|
||||
colors = ['red' if lifespan < 30 else 'blue' for lifespan in lifespans]
|
||||
bars = ax2.bar(range(len(names)), lifespans, color=colors, alpha=0.7)
|
||||
ax2.set_xlabel('皇帝')
|
||||
ax2.set_ylabel('寿命(岁)')
|
||||
ax2.set_title('各皇帝寿命对比')
|
||||
ax2.set_xticks(range(len(names)))
|
||||
ax2.set_xticklabels(names, rotation=45, ha='right')
|
||||
ax2.axhline(30, color='orange', linestyle='--', alpha=0.7)
|
||||
|
||||
# 添加数值标签
|
||||
for i, (bar, lifespan) in enumerate(zip(bars, lifespans)):
|
||||
ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
|
||||
str(lifespan), ha='center', va='bottom', fontsize=8)
|
||||
|
||||
# 3. 寿命与生育焦虑散点图
|
||||
anxiety_data = [(emp.lifespan, emp.fertility_anxiety_score)
|
||||
for emp in self.emperors_with_lifespan
|
||||
if emp.fertility_anxiety_score is not None]
|
||||
|
||||
if anxiety_data:
|
||||
lifespans_with_anxiety, anxiety_scores = zip(*anxiety_data)
|
||||
ax3.scatter(lifespans_with_anxiety, anxiety_scores, alpha=0.7, s=60)
|
||||
|
||||
# 添加趋势线
|
||||
z = np.polyfit(lifespans_with_anxiety, anxiety_scores, 1)
|
||||
p = np.poly1d(z)
|
||||
ax3.plot(lifespans_with_anxiety, p(lifespans_with_anxiety), "r--", alpha=0.8)
|
||||
|
||||
ax3.set_xlabel('寿命(岁)')
|
||||
ax3.set_ylabel('生育焦虑评分')
|
||||
ax3.set_title('寿命与生育焦虑关系')
|
||||
ax3.grid(True, alpha=0.3)
|
||||
|
||||
# 4. 箱线图
|
||||
reliability_data = {}
|
||||
for emp in self.emperors_with_lifespan:
|
||||
rel = emp.reliability.value
|
||||
if rel not in reliability_data:
|
||||
reliability_data[rel] = []
|
||||
reliability_data[rel].append(emp.lifespan)
|
||||
|
||||
if reliability_data:
|
||||
ax4.boxplot(reliability_data.values(), labels=reliability_data.keys())
|
||||
ax4.set_xlabel('史料可靠性')
|
||||
ax4.set_ylabel('寿命(岁)')
|
||||
ax4.set_title('不同可靠性史料的寿命分布')
|
||||
ax4.grid(True, alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
if save_path:
|
||||
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||||
return f"图表已保存到: {save_path}"
|
||||
else:
|
||||
plt.show()
|
||||
return "图表已显示"
|
||||
|
||||
def generate_comprehensive_report(self) -> Dict[str, Any]:
|
||||
"""生成综合分析报告"""
|
||||
report = {
|
||||
"analysis_date": pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"data_summary": {
|
||||
"total_emperors": len(self.emperors),
|
||||
"emperors_with_lifespan": len(self.emperors_with_lifespan),
|
||||
"data_completeness": len(self.emperors_with_lifespan) / len(self.emperors)
|
||||
}
|
||||
}
|
||||
|
||||
# 基础统计
|
||||
report["basic_statistics"] = self.calculate_basic_statistics()
|
||||
|
||||
# 短寿现象分析
|
||||
report["short_lifespan_analysis"] = self.analyze_short_lifespan_phenomenon()
|
||||
|
||||
# 生育焦虑相关性分析
|
||||
report["fertility_anxiety_analysis"] = self.analyze_fertility_anxiety_correlation()
|
||||
|
||||
# 可靠性分析
|
||||
report["reliability_analysis"] = self.analyze_by_reliability()
|
||||
|
||||
# 关键发现
|
||||
report["key_findings"] = self._extract_key_findings(report)
|
||||
|
||||
return report
|
||||
|
||||
def _extract_key_findings(self, report: Dict[str, Any]) -> List[str]:
|
||||
"""提取关键发现"""
|
||||
findings = []
|
||||
|
||||
# 平均寿命发现
|
||||
if "mean_lifespan" in report["basic_statistics"]:
|
||||
mean_age = report["basic_statistics"]["mean_lifespan"]
|
||||
findings.append(f"北魏前期皇帝平均寿命为 {mean_age:.1f} 岁,证实了短寿现象")
|
||||
|
||||
# 短寿比例发现
|
||||
if "short_lived_rate" in report["short_lifespan_analysis"]:
|
||||
short_rate = report["short_lifespan_analysis"]["short_lived_rate"]
|
||||
findings.append(f"{short_rate:.1%} 的皇帝寿命不足30岁,显示严重的短寿问题")
|
||||
|
||||
# 生育焦虑相关性发现
|
||||
if "correlations" in report["fertility_anxiety_analysis"]:
|
||||
correlations = report["fertility_anxiety_analysis"]["correlations"]
|
||||
if "anxiety_offspring" in correlations:
|
||||
corr = correlations["anxiety_offspring"]["correlation"]
|
||||
if corr < -0.3:
|
||||
findings.append(f"生育焦虑与子嗣数量呈负相关 (r={corr:.3f}),支持生育焦虑假说")
|
||||
|
||||
# 史料可靠性发现
|
||||
high_rel_data = report["reliability_analysis"].get("high", {})
|
||||
if high_rel_data and "mean_lifespan" in high_rel_data:
|
||||
findings.append(f"高可靠性史料显示平均寿命 {high_rel_data['mean_lifespan']:.1f} 岁,验证了分析结果")
|
||||
|
||||
return findings
|
||||
|
||||
# 创建分析器实例
|
||||
emperor_analyzer = EmperorLifespanAnalyzer()
|
||||
|
||||
def run_emperor_analysis():
|
||||
"""运行皇帝分析"""
|
||||
print("开始北魏皇帝寿命统计分析...")
|
||||
|
||||
# 生成综合报告
|
||||
report = emperor_analyzer.generate_comprehensive_report()
|
||||
|
||||
print("\n=== 北魏皇帝寿命分析报告 ===")
|
||||
print(f"分析时间: {report['analysis_date']}")
|
||||
print(f"数据样本: {report['data_summary']['total_emperors']} 位皇帝")
|
||||
print(f"有效寿命数据: {report['data_summary']['emperors_with_lifespan']} 位")
|
||||
print(f"数据完整性: {report['data_summary']['data_completeness']:.1%}")
|
||||
|
||||
# 基础统计
|
||||
stats = report['basic_statistics']
|
||||
if 'error' not in stats:
|
||||
print(f"\n平均寿命: {stats['mean_lifespan']:.1f} 岁")
|
||||
print(f"中位寿命: {stats['median_lifespan']:.1f} 岁")
|
||||
print(f"标准差: {stats['std_deviation']:.1f} 岁")
|
||||
print(f"寿命范围: {stats['min_lifespan']}-{stats['max_lifespan']} 岁")
|
||||
|
||||
# 短寿分析
|
||||
short_analysis = report['short_lifespan_analysis']
|
||||
if 'error' not in short_analysis:
|
||||
print(f"\n短寿皇帝 (<30岁): {short_analysis['short_lived_count']}/{short_analysis['total_count']}")
|
||||
print(f"短寿比例: {short_analysis['short_lived_rate']:.1%}")
|
||||
|
||||
# 关键发现
|
||||
print("\n=== 关键发现 ===")
|
||||
for i, finding in enumerate(report['key_findings'], 1):
|
||||
print(f"{i}. {finding}")
|
||||
|
||||
return report
|
||||
|
||||
if __name__ == "__main__":
|
||||
report = run_emperor_analysis()
|
||||
|
||||
# 生成可视化图表
|
||||
chart_result = emperor_analyzer.generate_lifespan_distribution_chart("emperor_lifespan_analysis.png")
|
||||
print(f"\n{chart_result}")
|
||||
@@ -0,0 +1,463 @@
|
||||
"""
|
||||
生育焦虑量化分析系统
|
||||
分析拓跋鲜卑皇室的生育焦虑程度及其与政治政策、宗教活动的关联
|
||||
"""
|
||||
|
||||
import statistics
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from typing import List, Dict, Any, Tuple
|
||||
import pandas as pd
|
||||
from collections import Counter
|
||||
from dataclasses import asdict
|
||||
|
||||
from analysis.models import Emperor, ReliabilityLevel
|
||||
from data.emperors.northern_wei_emperors import NORTHERN_WEI_EMPERORS
|
||||
|
||||
class FertilityAnxietyAnalyzer:
|
||||
"""生育焦虑量化分析器"""
|
||||
|
||||
def __init__(self, emperors: List[Emperor] = None):
|
||||
self.emperors = emperors or NORTHERN_WEI_EMPERORS
|
||||
|
||||
# 生育焦虑评估标准
|
||||
self.anxiety_indicators = {
|
||||
"low_offspring": 0.3, # 子嗣少于平均值
|
||||
"short_lifespan": 0.2, # 短寿
|
||||
"religious_activity": 0.2, # 频繁宗教活动
|
||||
"violent_death": 0.2, # 非自然死亡
|
||||
"early_succession": 0.1 # 早期传位
|
||||
}
|
||||
|
||||
# 宗教活动关键词
|
||||
self.religious_keywords = [
|
||||
"祭祀", "祈福", "天师", "道教", "佛教", "寺庙",
|
||||
"昊天", "上帝", "神", "祖先", "宗庙", "太庙"
|
||||
]
|
||||
|
||||
# 生育相关宗教活动关键词
|
||||
self.fertility_religious_keywords = [
|
||||
"祈子", "求嗣", "生育", "多子", "繁衍", "子孙", "后代"
|
||||
]
|
||||
|
||||
def calculate_fertility_anxiety_score(self, emperor: Emperor) -> float:
|
||||
"""计算单个皇帝的生育焦虑评分"""
|
||||
if emperor.fertility_anxiety_score is not None:
|
||||
return emperor.fertility_anxiety_score
|
||||
|
||||
score = 0.0
|
||||
|
||||
# 1. 子嗣数量因子
|
||||
if emperor.offspring_count is not None:
|
||||
avg_offspring = self._get_average_offspring_count()
|
||||
if emperor.offspring_count < avg_offspring:
|
||||
score += self.anxiety_indicators["low_offspring"]
|
||||
|
||||
# 2. 寿命因子
|
||||
if emperor.lifespan is not None and emperor.lifespan < 30:
|
||||
score += self.anxiety_indicators["short_lifespan"]
|
||||
|
||||
# 3. 宗教活动因子
|
||||
religious_score = self._analyze_religious_activities(emperor.religious_activities)
|
||||
score += religious_score * self.anxiety_indicators["religious_activity"]
|
||||
|
||||
# 4. 死因因子
|
||||
if emperor.death_cause and any(keyword in emperor.death_cause
|
||||
for keyword in ["杀", "害", "毒", "暗杀"]):
|
||||
score += self.anxiety_indicators["violent_death"]
|
||||
|
||||
# 5. 在位时长因子(早期传位可能表示焦虑)
|
||||
if emperor.reign_duration is not None and emperor.reign_duration < 10:
|
||||
score += self.anxiety_indicators["early_succession"]
|
||||
|
||||
return min(score, 1.0) # 限制在0-1之间
|
||||
|
||||
def _get_average_offspring_count(self) -> float:
|
||||
"""获取平均子嗣数量"""
|
||||
offspring_counts = [emp.offspring_count for emp in self.emperors
|
||||
if emp.offspring_count is not None]
|
||||
return statistics.mean(offspring_counts) if offspring_counts else 5.0
|
||||
|
||||
def _analyze_religious_activities(self, activities: List[str]) -> float:
|
||||
"""分析宗教活动的生育焦虑相关性"""
|
||||
if not activities:
|
||||
return 0.0
|
||||
|
||||
total_score = 0.0
|
||||
activity_text = " ".join(activities)
|
||||
|
||||
# 检查一般宗教活动
|
||||
religious_count = sum(1 for keyword in self.religious_keywords
|
||||
if keyword in activity_text)
|
||||
|
||||
# 检查生育相关宗教活动(权重更高)
|
||||
fertility_religious_count = sum(1 for keyword in self.fertility_religious_keywords
|
||||
if keyword in activity_text)
|
||||
|
||||
# 计算评分
|
||||
total_score = (religious_count * 0.1 + fertility_religious_count * 0.3) / len(activities)
|
||||
|
||||
return min(total_score, 1.0)
|
||||
|
||||
def analyze_anxiety_distribution(self) -> Dict[str, Any]:
|
||||
"""分析生育焦虑分布"""
|
||||
anxiety_scores = []
|
||||
emperors_with_scores = []
|
||||
|
||||
for emperor in self.emperors:
|
||||
score = self.calculate_fertility_anxiety_score(emperor)
|
||||
anxiety_scores.append(score)
|
||||
emperors_with_scores.append((emperor, score))
|
||||
|
||||
if not anxiety_scores:
|
||||
return {"error": "无法计算生育焦虑评分"}
|
||||
|
||||
# 按焦虑程度分类
|
||||
high_anxiety = [emp for emp, score in emperors_with_scores if score >= 0.7]
|
||||
medium_anxiety = [emp for emp, score in emperors_with_scores if 0.4 <= score < 0.7]
|
||||
low_anxiety = [emp for emp, score in emperors_with_scores if score < 0.4]
|
||||
|
||||
return {
|
||||
"total_emperors": len(self.emperors),
|
||||
"mean_anxiety": statistics.mean(anxiety_scores),
|
||||
"median_anxiety": statistics.median(anxiety_scores),
|
||||
"std_anxiety": statistics.stdev(anxiety_scores) if len(anxiety_scores) > 1 else 0,
|
||||
"high_anxiety_count": len(high_anxiety),
|
||||
"medium_anxiety_count": len(medium_anxiety),
|
||||
"low_anxiety_count": len(low_anxiety),
|
||||
"high_anxiety_emperors": [emp.name for emp in high_anxiety],
|
||||
"anxiety_scores": dict(zip([emp.name for emp in self.emperors], anxiety_scores))
|
||||
}
|
||||
|
||||
def analyze_anxiety_policy_correlation(self) -> Dict[str, Any]:
|
||||
"""分析生育焦虑与政策变化的相关性"""
|
||||
policy_changes = []
|
||||
anxiety_levels = []
|
||||
|
||||
for emperor in self.emperors:
|
||||
anxiety_score = self.calculate_fertility_anxiety_score(emperor)
|
||||
|
||||
# 分析政策变化指标
|
||||
policy_score = self._calculate_policy_change_score(emperor)
|
||||
|
||||
if policy_score is not None:
|
||||
anxiety_levels.append(anxiety_score)
|
||||
policy_changes.append(policy_score)
|
||||
|
||||
if len(anxiety_levels) < 3:
|
||||
return {"error": "数据不足,无法进行相关性分析"}
|
||||
|
||||
correlation = self._calculate_correlation(anxiety_levels, policy_changes)
|
||||
|
||||
return {
|
||||
"sample_size": len(anxiety_levels),
|
||||
"correlation": correlation,
|
||||
"interpretation": self._interpret_policy_correlation(correlation)
|
||||
}
|
||||
|
||||
def _calculate_policy_change_score(self, emperor: Emperor) -> float:
|
||||
"""计算政策变化评分"""
|
||||
score = 0.0
|
||||
|
||||
# 宗教政策变化
|
||||
religious_activities = emperor.religious_activities or []
|
||||
activity_text = " ".join(religious_activities)
|
||||
|
||||
# 重大宗教政策变化关键词
|
||||
major_changes = ["改革", "迁都", "灭佛", "复兴", "建立", "废除"]
|
||||
change_count = sum(1 for keyword in major_changes if keyword in activity_text)
|
||||
|
||||
score += change_count * 0.2
|
||||
|
||||
# 在位时长(可能反映政策稳定性)
|
||||
if emperor.reign_duration is not None:
|
||||
if emperor.reign_duration > 20:
|
||||
score += 0.3 # 长期在位,政策相对稳定
|
||||
elif emperor.reign_duration < 5:
|
||||
score += 0.1 # 短期在位,政策变化有限
|
||||
|
||||
return min(score, 1.0)
|
||||
|
||||
def analyze_anxiety_religious_correlation(self) -> Dict[str, Any]:
|
||||
"""分析生育焦虑与宗教活动频率的关联"""
|
||||
anxiety_scores = []
|
||||
religious_frequencies = []
|
||||
|
||||
for emperor in self.emperors:
|
||||
anxiety_score = self.calculate_fertility_anxiety_score(emperor)
|
||||
religious_freq = len(emperor.religious_activities or [])
|
||||
|
||||
anxiety_scores.append(anxiety_score)
|
||||
religious_frequencies.append(religious_freq)
|
||||
|
||||
if len(anxiety_scores) < 3:
|
||||
return {"error": "数据不足"}
|
||||
|
||||
correlation = self._calculate_correlation(anxiety_scores, religious_frequencies)
|
||||
|
||||
# 分析特定类型的宗教活动
|
||||
fertility_religious_analysis = self._analyze_fertility_religious_activities()
|
||||
|
||||
return {
|
||||
"sample_size": len(anxiety_scores),
|
||||
"anxiety_religious_correlation": correlation,
|
||||
"fertility_religious_analysis": fertility_religious_analysis,
|
||||
"interpretation": self._interpret_religious_correlation(correlation)
|
||||
}
|
||||
|
||||
def _analyze_fertility_religious_activities(self) -> Dict[str, Any]:
|
||||
"""分析生育相关宗教活动"""
|
||||
fertility_activities = []
|
||||
|
||||
for emperor in self.emperors:
|
||||
activities = emperor.religious_activities or []
|
||||
activity_text = " ".join(activities)
|
||||
|
||||
fertility_count = sum(1 for keyword in self.fertility_religious_keywords
|
||||
if keyword in activity_text)
|
||||
|
||||
if fertility_count > 0:
|
||||
fertility_activities.append({
|
||||
"emperor": emperor.name,
|
||||
"anxiety_score": self.calculate_fertility_anxiety_score(emperor),
|
||||
"fertility_activities": fertility_count,
|
||||
"total_activities": len(activities)
|
||||
})
|
||||
|
||||
return {
|
||||
"emperors_with_fertility_activities": len(fertility_activities),
|
||||
"fertility_activities_details": fertility_activities
|
||||
}
|
||||
|
||||
def _calculate_correlation(self, x: List[float], y: List[float]) -> Dict[str, float]:
|
||||
"""计算皮尔逊相关系数"""
|
||||
if len(x) != len(y) or len(x) < 2:
|
||||
return {"correlation": 0.0, "p_value": 1.0}
|
||||
|
||||
n = len(x)
|
||||
sum_x = sum(x)
|
||||
sum_y = sum(y)
|
||||
sum_xy = sum(xi * yi for xi, yi in zip(x, y))
|
||||
sum_x2 = sum(xi * xi for xi in x)
|
||||
sum_y2 = sum(yi * yi for yi in y)
|
||||
|
||||
numerator = n * sum_xy - sum_x * sum_y
|
||||
denominator = ((n * sum_x2 - sum_x * sum_x) * (n * sum_y2 - sum_y * sum_y)) ** 0.5
|
||||
|
||||
if denominator == 0:
|
||||
correlation = 0.0
|
||||
else:
|
||||
correlation = numerator / denominator
|
||||
|
||||
# 简化的p值估算
|
||||
t_stat = correlation * ((n - 2) / (1 - correlation**2)) ** 0.5 if abs(correlation) != 1 else float('inf')
|
||||
p_value = 2 * (1 - abs(t_stat) / (abs(t_stat) + n - 2)) if t_stat != float('inf') else 0.0
|
||||
|
||||
return {
|
||||
"correlation": correlation,
|
||||
"p_value": p_value,
|
||||
"sample_size": n
|
||||
}
|
||||
|
||||
def _interpret_policy_correlation(self, correlation: Dict[str, float]) -> str:
|
||||
"""解释政策相关性"""
|
||||
corr = correlation["correlation"]
|
||||
p_val = correlation["p_value"]
|
||||
|
||||
if abs(corr) >= 0.5 and p_val < 0.05:
|
||||
direction = "正" if corr > 0 else "负"
|
||||
return f"生育焦虑与政策变化呈{direction}相关,相关性较强且统计显著"
|
||||
elif abs(corr) >= 0.3:
|
||||
direction = "正" if corr > 0 else "负"
|
||||
return f"生育焦虑与政策变化呈{direction}相关,相关性中等"
|
||||
else:
|
||||
return "生育焦虑与政策变化相关性较弱"
|
||||
|
||||
def _interpret_religious_correlation(self, correlation: Dict[str, float]) -> str:
|
||||
"""解释宗教相关性"""
|
||||
corr = correlation["correlation"]
|
||||
p_val = correlation["p_value"]
|
||||
|
||||
if corr >= 0.3 and p_val < 0.05:
|
||||
return "生育焦虑与宗教活动频率呈正相关,支持'焦虑驱动宗教活动'假说"
|
||||
elif corr >= 0.1:
|
||||
return "生育焦虑与宗教活动频率呈弱正相关"
|
||||
else:
|
||||
return "生育焦虑与宗教活动频率相关性不明显"
|
||||
|
||||
def generate_anxiety_visualization(self, save_path: str = None) -> str:
|
||||
"""生成生育焦虑可视化图表"""
|
||||
# 设置中文字体
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei', 'Arial Unicode MS']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
|
||||
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
|
||||
|
||||
# 计算所有皇帝的焦虑评分
|
||||
emperors_scores = [(emp, self.calculate_fertility_anxiety_score(emp))
|
||||
for emp in self.emperors]
|
||||
|
||||
names = [emp.name.split('拓跋')[0] for emp, _ in emperors_scores]
|
||||
anxiety_scores = [score for _, score in emperors_scores]
|
||||
|
||||
# 1. 生育焦虑评分条形图
|
||||
colors = ['red' if score >= 0.7 else 'orange' if score >= 0.4 else 'green'
|
||||
for score in anxiety_scores]
|
||||
bars = ax1.bar(range(len(names)), anxiety_scores, color=colors, alpha=0.7)
|
||||
ax1.set_xlabel('皇帝')
|
||||
ax1.set_ylabel('生育焦虑评分')
|
||||
ax1.set_title('北魏皇帝生育焦虑评分')
|
||||
ax1.set_xticks(range(len(names)))
|
||||
ax1.set_xticklabels(names, rotation=45, ha='right')
|
||||
ax1.axhline(0.7, color='red', linestyle='--', alpha=0.5, label='高焦虑阈值')
|
||||
ax1.axhline(0.4, color='orange', linestyle='--', alpha=0.5, label='中焦虑阈值')
|
||||
ax1.legend()
|
||||
|
||||
# 添加数值标签
|
||||
for bar, score in zip(bars, anxiety_scores):
|
||||
ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
|
||||
f'{score:.2f}', ha='center', va='bottom', fontsize=8)
|
||||
|
||||
# 2. 焦虑评分分布直方图
|
||||
ax2.hist(anxiety_scores, bins=10, alpha=0.7, color='lightblue', edgecolor='black')
|
||||
ax2.axvline(statistics.mean(anxiety_scores), color='red', linestyle='--',
|
||||
label=f'平均焦虑: {statistics.mean(anxiety_scores):.2f}')
|
||||
ax2.set_xlabel('生育焦虑评分')
|
||||
ax2.set_ylabel('频数')
|
||||
ax2.set_title('生育焦虑评分分布')
|
||||
ax2.legend()
|
||||
ax2.grid(True, alpha=0.3)
|
||||
|
||||
# 3. 焦虑与子嗣数量关系
|
||||
offspring_data = [(self.calculate_fertility_anxiety_score(emp), emp.offspring_count)
|
||||
for emp in self.emperors if emp.offspring_count is not None]
|
||||
|
||||
if offspring_data:
|
||||
anxiety_vals, offspring_vals = zip(*offspring_data)
|
||||
ax3.scatter(anxiety_vals, offspring_vals, alpha=0.7, s=60)
|
||||
|
||||
# 添加趋势线
|
||||
z = np.polyfit(anxiety_vals, offspring_vals, 1)
|
||||
p = np.poly1d(z)
|
||||
ax3.plot(anxiety_vals, p(anxiety_vals), "r--", alpha=0.8)
|
||||
|
||||
ax3.set_xlabel('生育焦虑评分')
|
||||
ax3.set_ylabel('子嗣数量')
|
||||
ax3.set_title('生育焦虑与子嗣数量关系')
|
||||
ax3.grid(True, alpha=0.3)
|
||||
|
||||
# 4. 焦虑与宗教活动关系
|
||||
religious_data = [(self.calculate_fertility_anxiety_score(emp),
|
||||
len(emp.religious_activities or []))
|
||||
for emp in self.emperors]
|
||||
|
||||
if religious_data:
|
||||
anxiety_vals, religious_vals = zip(*religious_data)
|
||||
ax4.scatter(anxiety_vals, religious_vals, alpha=0.7, s=60, color='purple')
|
||||
|
||||
# 添加趋势线
|
||||
z = np.polyfit(anxiety_vals, religious_vals, 1)
|
||||
p = np.poly1d(z)
|
||||
ax4.plot(anxiety_vals, p(anxiety_vals), "r--", alpha=0.8)
|
||||
|
||||
ax4.set_xlabel('生育焦虑评分')
|
||||
ax4.set_ylabel('宗教活动数量')
|
||||
ax4.set_title('生育焦虑与宗教活动关系')
|
||||
ax4.grid(True, alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
if save_path:
|
||||
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||||
return f"图表已保存到: {save_path}"
|
||||
else:
|
||||
plt.show()
|
||||
return "图表已显示"
|
||||
|
||||
def generate_comprehensive_anxiety_report(self) -> Dict[str, Any]:
|
||||
"""生成综合生育焦虑分析报告"""
|
||||
report = {
|
||||
"analysis_date": pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"methodology": {
|
||||
"anxiety_indicators": self.anxiety_indicators,
|
||||
"religious_keywords": len(self.religious_keywords),
|
||||
"fertility_keywords": len(self.fertility_religious_keywords)
|
||||
}
|
||||
}
|
||||
|
||||
# 焦虑分布分析
|
||||
report["anxiety_distribution"] = self.analyze_anxiety_distribution()
|
||||
|
||||
# 政策相关性分析
|
||||
report["policy_correlation"] = self.analyze_anxiety_policy_correlation()
|
||||
|
||||
# 宗教活动相关性分析
|
||||
report["religious_correlation"] = self.analyze_anxiety_religious_correlation()
|
||||
|
||||
# 关键发现
|
||||
report["key_findings"] = self._extract_anxiety_findings(report)
|
||||
|
||||
return report
|
||||
|
||||
def _extract_anxiety_findings(self, report: Dict[str, Any]) -> List[str]:
|
||||
"""提取生育焦虑关键发现"""
|
||||
findings = []
|
||||
|
||||
# 焦虑水平发现
|
||||
if "mean_anxiety" in report["anxiety_distribution"]:
|
||||
mean_anxiety = report["anxiety_distribution"]["mean_anxiety"]
|
||||
findings.append(f"北魏皇室平均生育焦虑评分为 {mean_anxiety:.2f},显示中等偏高的焦虑水平")
|
||||
|
||||
# 高焦虑皇帝发现
|
||||
if "high_anxiety_count" in report["anxiety_distribution"]:
|
||||
high_count = report["anxiety_distribution"]["high_anxiety_count"]
|
||||
total_count = report["anxiety_distribution"]["total_emperors"]
|
||||
findings.append(f"{high_count}/{total_count} 位皇帝表现出高度生育焦虑")
|
||||
|
||||
# 宗教活动相关性发现
|
||||
if "anxiety_religious_correlation" in report["religious_correlation"]:
|
||||
corr_data = report["religious_correlation"]["anxiety_religious_correlation"]
|
||||
if corr_data["correlation"] > 0.3:
|
||||
findings.append(f"生育焦虑与宗教活动呈正相关 (r={corr_data['correlation']:.3f}),支持宗教缓解焦虑假说")
|
||||
|
||||
return findings
|
||||
|
||||
# 创建分析器实例
|
||||
fertility_analyzer = FertilityAnxietyAnalyzer()
|
||||
|
||||
def run_fertility_anxiety_analysis():
|
||||
"""运行生育焦虑分析"""
|
||||
print("开始北魏皇室生育焦虑量化分析...")
|
||||
|
||||
# 生成综合报告
|
||||
report = fertility_analyzer.generate_comprehensive_anxiety_report()
|
||||
|
||||
print("\n=== 北魏皇室生育焦虑分析报告 ===")
|
||||
print(f"分析时间: {report['analysis_date']}")
|
||||
|
||||
# 焦虑分布
|
||||
dist = report['anxiety_distribution']
|
||||
if 'error' not in dist:
|
||||
print(f"\n平均生育焦虑评分: {dist['mean_anxiety']:.3f}")
|
||||
print(f"高焦虑皇帝: {dist['high_anxiety_count']}/{dist['total_emperors']}")
|
||||
print(f"高焦虑皇帝名单: {', '.join(dist['high_anxiety_emperors'])}")
|
||||
|
||||
# 相关性分析
|
||||
if 'error' not in report['religious_correlation']:
|
||||
rel_corr = report['religious_correlation']['anxiety_religious_correlation']
|
||||
print(f"\n生育焦虑与宗教活动相关性: {rel_corr['correlation']:.3f}")
|
||||
print(f"解释: {report['religious_correlation']['interpretation']}")
|
||||
|
||||
# 关键发现
|
||||
print("\n=== 关键发现 ===")
|
||||
for i, finding in enumerate(report['key_findings'], 1):
|
||||
print(f"{i}. {finding}")
|
||||
|
||||
return report
|
||||
|
||||
if __name__ == "__main__":
|
||||
report = run_fertility_anxiety_analysis()
|
||||
|
||||
# 生成可视化图表
|
||||
chart_result = fertility_analyzer.generate_anxiety_visualization("fertility_anxiety_analysis.png")
|
||||
print(f"\n{chart_result}")
|
||||
Reference in New Issue
Block a user