384 lines
16 KiB
Python
384 lines
16 KiB
Python
"""
|
||
北魏皇帝寿命统计分析器
|
||
分析北魏前期皇帝的寿命分布、生育焦虑与政治政策的关联性
|
||
"""
|
||
|
||
import statistics
|
||
import numpy as np
|
||
import matplotlib.pyplot as plt
|
||
import seaborn as sns
|
||
from typing import List, Dict, Any, Tuple
|
||
import pandas as pd
|
||
from dataclasses import asdict
|
||
|
||
from analysis.models import Emperor, ReliabilityLevel
|
||
from data.emperors.northern_wei_emperors import (
|
||
NORTHERN_WEI_EMPERORS,
|
||
EMPERORS_WITH_LIFESPAN,
|
||
HIGH_RELIABILITY_EMPERORS,
|
||
PRE_REFORM_EMPERORS,
|
||
get_short_lived_emperors,
|
||
get_high_fertility_anxiety_emperors
|
||
)
|
||
|
||
class EmperorLifespanAnalyzer:
|
||
"""皇帝寿命统计分析器"""
|
||
|
||
def __init__(self, emperors: List[Emperor] = None):
|
||
self.emperors = emperors or NORTHERN_WEI_EMPERORS
|
||
self.emperors_with_lifespan = [emp for emp in self.emperors if emp.lifespan is not None]
|
||
|
||
def calculate_basic_statistics(self) -> Dict[str, Any]:
|
||
"""计算基础统计数据"""
|
||
if not self.emperors_with_lifespan:
|
||
return {"error": "没有有效的寿命数据"}
|
||
|
||
lifespans = [emp.lifespan for emp in self.emperors_with_lifespan]
|
||
|
||
stats = {
|
||
"sample_size": len(lifespans),
|
||
"mean_lifespan": statistics.mean(lifespans),
|
||
"median_lifespan": statistics.median(lifespans),
|
||
"mode_lifespan": statistics.mode(lifespans) if len(set(lifespans)) < len(lifespans) else None,
|
||
"std_deviation": statistics.stdev(lifespans) if len(lifespans) > 1 else 0,
|
||
"variance": statistics.variance(lifespans) if len(lifespans) > 1 else 0,
|
||
"min_lifespan": min(lifespans),
|
||
"max_lifespan": max(lifespans),
|
||
"range": max(lifespans) - min(lifespans)
|
||
}
|
||
|
||
# 计算四分位数
|
||
if len(lifespans) >= 4:
|
||
sorted_lifespans = sorted(lifespans)
|
||
n = len(sorted_lifespans)
|
||
stats["q1"] = sorted_lifespans[n//4]
|
||
stats["q3"] = sorted_lifespans[3*n//4]
|
||
stats["iqr"] = stats["q3"] - stats["q1"]
|
||
|
||
return stats
|
||
|
||
def analyze_short_lifespan_phenomenon(self, threshold: int = 30) -> Dict[str, Any]:
|
||
"""分析短寿现象"""
|
||
short_lived = get_short_lived_emperors(threshold)
|
||
total_with_data = len(self.emperors_with_lifespan)
|
||
|
||
if total_with_data == 0:
|
||
return {"error": "没有有效的寿命数据"}
|
||
|
||
short_lived_rate = len(short_lived) / total_with_data
|
||
|
||
# 分析短寿皇帝的特征
|
||
short_lived_analysis = {
|
||
"threshold": threshold,
|
||
"short_lived_count": len(short_lived),
|
||
"total_count": total_with_data,
|
||
"short_lived_rate": short_lived_rate,
|
||
"short_lived_emperors": [emp.name for emp in short_lived]
|
||
}
|
||
|
||
# 分析短寿与生育焦虑的关系
|
||
if short_lived:
|
||
anxiety_scores = [emp.fertility_anxiety_score for emp in short_lived
|
||
if emp.fertility_anxiety_score is not None]
|
||
if anxiety_scores:
|
||
short_lived_analysis["avg_fertility_anxiety"] = statistics.mean(anxiety_scores)
|
||
|
||
# 分析短寿与子嗣数量的关系
|
||
offspring_counts = [emp.offspring_count for emp in short_lived
|
||
if emp.offspring_count is not None]
|
||
if offspring_counts:
|
||
short_lived_analysis["avg_offspring_count"] = statistics.mean(offspring_counts)
|
||
|
||
return short_lived_analysis
|
||
|
||
def analyze_fertility_anxiety_correlation(self) -> Dict[str, Any]:
|
||
"""分析生育焦虑与各因素的相关性"""
|
||
# 收集有效数据
|
||
valid_emperors = [emp for emp in self.emperors
|
||
if emp.fertility_anxiety_score is not None and emp.lifespan is not None]
|
||
|
||
if len(valid_emperors) < 3:
|
||
return {"error": "数据不足,无法进行相关性分析"}
|
||
|
||
anxiety_scores = [emp.fertility_anxiety_score for emp in valid_emperors]
|
||
lifespans = [emp.lifespan for emp in valid_emperors]
|
||
offspring_counts = [emp.offspring_count for emp in valid_emperors if emp.offspring_count is not None]
|
||
|
||
correlations = {}
|
||
|
||
# 生育焦虑与寿命的相关性
|
||
if len(anxiety_scores) == len(lifespans):
|
||
correlations["anxiety_lifespan"] = self._calculate_correlation(anxiety_scores, lifespans)
|
||
|
||
# 生育焦虑与子嗣数量的相关性
|
||
anxiety_with_offspring = [emp.fertility_anxiety_score for emp in valid_emperors
|
||
if emp.offspring_count is not None]
|
||
if len(anxiety_with_offspring) == len(offspring_counts) and len(offspring_counts) >= 3:
|
||
correlations["anxiety_offspring"] = self._calculate_correlation(anxiety_with_offspring, offspring_counts)
|
||
|
||
return {
|
||
"sample_size": len(valid_emperors),
|
||
"correlations": correlations,
|
||
"interpretation": self._interpret_correlations(correlations)
|
||
}
|
||
|
||
def _calculate_correlation(self, x: List[float], y: List[float]) -> Dict[str, float]:
|
||
"""计算皮尔逊相关系数"""
|
||
if len(x) != len(y) or len(x) < 2:
|
||
return {"correlation": 0.0, "p_value": 1.0}
|
||
|
||
n = len(x)
|
||
sum_x = sum(x)
|
||
sum_y = sum(y)
|
||
sum_xy = sum(xi * yi for xi, yi in zip(x, y))
|
||
sum_x2 = sum(xi * xi for xi in x)
|
||
sum_y2 = sum(yi * yi for yi in y)
|
||
|
||
numerator = n * sum_xy - sum_x * sum_y
|
||
denominator = ((n * sum_x2 - sum_x * sum_x) * (n * sum_y2 - sum_y * sum_y)) ** 0.5
|
||
|
||
if denominator == 0:
|
||
correlation = 0.0
|
||
else:
|
||
correlation = numerator / denominator
|
||
|
||
# 简化的p值估算(实际应使用更精确的统计检验)
|
||
t_stat = correlation * ((n - 2) / (1 - correlation**2)) ** 0.5 if correlation != 1 else float('inf')
|
||
p_value = 2 * (1 - abs(t_stat) / (abs(t_stat) + n - 2)) if t_stat != float('inf') else 0.0
|
||
|
||
return {
|
||
"correlation": correlation,
|
||
"p_value": p_value,
|
||
"sample_size": n
|
||
}
|
||
|
||
def _interpret_correlations(self, correlations: Dict[str, Dict[str, float]]) -> Dict[str, str]:
|
||
"""解释相关性结果"""
|
||
interpretations = {}
|
||
|
||
for key, corr_data in correlations.items():
|
||
corr = corr_data["correlation"]
|
||
p_val = corr_data["p_value"]
|
||
|
||
# 相关性强度解释
|
||
if abs(corr) >= 0.7:
|
||
strength = "强"
|
||
elif abs(corr) >= 0.5:
|
||
strength = "中等"
|
||
elif abs(corr) >= 0.3:
|
||
strength = "弱"
|
||
else:
|
||
strength = "很弱或无"
|
||
|
||
# 方向解释
|
||
direction = "正" if corr > 0 else "负"
|
||
|
||
# 显著性解释
|
||
significance = "显著" if p_val < 0.05 else "不显著"
|
||
|
||
interpretations[key] = f"{direction}相关,强度:{strength},统计显著性:{significance}"
|
||
|
||
return interpretations
|
||
|
||
def analyze_by_reliability(self) -> Dict[str, Any]:
|
||
"""按史料可靠性分析"""
|
||
reliability_groups = {}
|
||
|
||
for reliability in ReliabilityLevel:
|
||
group_emperors = [emp for emp in self.emperors if emp.reliability == reliability]
|
||
if group_emperors:
|
||
group_with_lifespan = [emp for emp in group_emperors if emp.lifespan is not None]
|
||
if group_with_lifespan:
|
||
lifespans = [emp.lifespan for emp in group_with_lifespan]
|
||
reliability_groups[reliability.value] = {
|
||
"count": len(group_emperors),
|
||
"with_lifespan_count": len(group_with_lifespan),
|
||
"mean_lifespan": statistics.mean(lifespans),
|
||
"emperors": [emp.name for emp in group_emperors]
|
||
}
|
||
|
||
return reliability_groups
|
||
|
||
def generate_lifespan_distribution_chart(self, save_path: str = None) -> str:
|
||
"""生成寿命分布图表"""
|
||
if not self.emperors_with_lifespan:
|
||
return "没有有效数据生成图表"
|
||
|
||
# 设置中文字体
|
||
plt.rcParams['font.sans-serif'] = ['SimHei', 'Arial Unicode MS']
|
||
plt.rcParams['axes.unicode_minus'] = False
|
||
|
||
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))
|
||
|
||
lifespans = [emp.lifespan for emp in self.emperors_with_lifespan]
|
||
names = [emp.name.split('拓跋')[0] for emp in self.emperors_with_lifespan]
|
||
|
||
# 1. 寿命分布直方图
|
||
ax1.hist(lifespans, bins=10, alpha=0.7, color='skyblue', edgecolor='black')
|
||
ax1.axvline(statistics.mean(lifespans), color='red', linestyle='--',
|
||
label=f'平均寿命: {statistics.mean(lifespans):.1f}岁')
|
||
ax1.axvline(30, color='orange', linestyle='--', label='短寿阈值: 30岁')
|
||
ax1.set_xlabel('寿命(岁)')
|
||
ax1.set_ylabel('频数')
|
||
ax1.set_title('北魏皇帝寿命分布')
|
||
ax1.legend()
|
||
ax1.grid(True, alpha=0.3)
|
||
|
||
# 2. 皇帝寿命条形图
|
||
colors = ['red' if lifespan < 30 else 'blue' for lifespan in lifespans]
|
||
bars = ax2.bar(range(len(names)), lifespans, color=colors, alpha=0.7)
|
||
ax2.set_xlabel('皇帝')
|
||
ax2.set_ylabel('寿命(岁)')
|
||
ax2.set_title('各皇帝寿命对比')
|
||
ax2.set_xticks(range(len(names)))
|
||
ax2.set_xticklabels(names, rotation=45, ha='right')
|
||
ax2.axhline(30, color='orange', linestyle='--', alpha=0.7)
|
||
|
||
# 添加数值标签
|
||
for i, (bar, lifespan) in enumerate(zip(bars, lifespans)):
|
||
ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
|
||
str(lifespan), ha='center', va='bottom', fontsize=8)
|
||
|
||
# 3. 寿命与生育焦虑散点图
|
||
anxiety_data = [(emp.lifespan, emp.fertility_anxiety_score)
|
||
for emp in self.emperors_with_lifespan
|
||
if emp.fertility_anxiety_score is not None]
|
||
|
||
if anxiety_data:
|
||
lifespans_with_anxiety, anxiety_scores = zip(*anxiety_data)
|
||
ax3.scatter(lifespans_with_anxiety, anxiety_scores, alpha=0.7, s=60)
|
||
|
||
# 添加趋势线
|
||
z = np.polyfit(lifespans_with_anxiety, anxiety_scores, 1)
|
||
p = np.poly1d(z)
|
||
ax3.plot(lifespans_with_anxiety, p(lifespans_with_anxiety), "r--", alpha=0.8)
|
||
|
||
ax3.set_xlabel('寿命(岁)')
|
||
ax3.set_ylabel('生育焦虑评分')
|
||
ax3.set_title('寿命与生育焦虑关系')
|
||
ax3.grid(True, alpha=0.3)
|
||
|
||
# 4. 箱线图
|
||
reliability_data = {}
|
||
for emp in self.emperors_with_lifespan:
|
||
rel = emp.reliability.value
|
||
if rel not in reliability_data:
|
||
reliability_data[rel] = []
|
||
reliability_data[rel].append(emp.lifespan)
|
||
|
||
if reliability_data:
|
||
ax4.boxplot(reliability_data.values(), labels=reliability_data.keys())
|
||
ax4.set_xlabel('史料可靠性')
|
||
ax4.set_ylabel('寿命(岁)')
|
||
ax4.set_title('不同可靠性史料的寿命分布')
|
||
ax4.grid(True, alpha=0.3)
|
||
|
||
plt.tight_layout()
|
||
|
||
if save_path:
|
||
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||
return f"图表已保存到: {save_path}"
|
||
else:
|
||
plt.show()
|
||
return "图表已显示"
|
||
|
||
def generate_comprehensive_report(self) -> Dict[str, Any]:
|
||
"""生成综合分析报告"""
|
||
report = {
|
||
"analysis_date": pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||
"data_summary": {
|
||
"total_emperors": len(self.emperors),
|
||
"emperors_with_lifespan": len(self.emperors_with_lifespan),
|
||
"data_completeness": len(self.emperors_with_lifespan) / len(self.emperors)
|
||
}
|
||
}
|
||
|
||
# 基础统计
|
||
report["basic_statistics"] = self.calculate_basic_statistics()
|
||
|
||
# 短寿现象分析
|
||
report["short_lifespan_analysis"] = self.analyze_short_lifespan_phenomenon()
|
||
|
||
# 生育焦虑相关性分析
|
||
report["fertility_anxiety_analysis"] = self.analyze_fertility_anxiety_correlation()
|
||
|
||
# 可靠性分析
|
||
report["reliability_analysis"] = self.analyze_by_reliability()
|
||
|
||
# 关键发现
|
||
report["key_findings"] = self._extract_key_findings(report)
|
||
|
||
return report
|
||
|
||
def _extract_key_findings(self, report: Dict[str, Any]) -> List[str]:
|
||
"""提取关键发现"""
|
||
findings = []
|
||
|
||
# 平均寿命发现
|
||
if "mean_lifespan" in report["basic_statistics"]:
|
||
mean_age = report["basic_statistics"]["mean_lifespan"]
|
||
findings.append(f"北魏前期皇帝平均寿命为 {mean_age:.1f} 岁,证实了短寿现象")
|
||
|
||
# 短寿比例发现
|
||
if "short_lived_rate" in report["short_lifespan_analysis"]:
|
||
short_rate = report["short_lifespan_analysis"]["short_lived_rate"]
|
||
findings.append(f"{short_rate:.1%} 的皇帝寿命不足30岁,显示严重的短寿问题")
|
||
|
||
# 生育焦虑相关性发现
|
||
if "correlations" in report["fertility_anxiety_analysis"]:
|
||
correlations = report["fertility_anxiety_analysis"]["correlations"]
|
||
if "anxiety_offspring" in correlations:
|
||
corr = correlations["anxiety_offspring"]["correlation"]
|
||
if corr < -0.3:
|
||
findings.append(f"生育焦虑与子嗣数量呈负相关 (r={corr:.3f}),支持生育焦虑假说")
|
||
|
||
# 史料可靠性发现
|
||
high_rel_data = report["reliability_analysis"].get("high", {})
|
||
if high_rel_data and "mean_lifespan" in high_rel_data:
|
||
findings.append(f"高可靠性史料显示平均寿命 {high_rel_data['mean_lifespan']:.1f} 岁,验证了分析结果")
|
||
|
||
return findings
|
||
|
||
# 创建分析器实例
|
||
emperor_analyzer = EmperorLifespanAnalyzer()
|
||
|
||
def run_emperor_analysis():
|
||
"""运行皇帝分析"""
|
||
print("开始北魏皇帝寿命统计分析...")
|
||
|
||
# 生成综合报告
|
||
report = emperor_analyzer.generate_comprehensive_report()
|
||
|
||
print("\n=== 北魏皇帝寿命分析报告 ===")
|
||
print(f"分析时间: {report['analysis_date']}")
|
||
print(f"数据样本: {report['data_summary']['total_emperors']} 位皇帝")
|
||
print(f"有效寿命数据: {report['data_summary']['emperors_with_lifespan']} 位")
|
||
print(f"数据完整性: {report['data_summary']['data_completeness']:.1%}")
|
||
|
||
# 基础统计
|
||
stats = report['basic_statistics']
|
||
if 'error' not in stats:
|
||
print(f"\n平均寿命: {stats['mean_lifespan']:.1f} 岁")
|
||
print(f"中位寿命: {stats['median_lifespan']:.1f} 岁")
|
||
print(f"标准差: {stats['std_deviation']:.1f} 岁")
|
||
print(f"寿命范围: {stats['min_lifespan']}-{stats['max_lifespan']} 岁")
|
||
|
||
# 短寿分析
|
||
short_analysis = report['short_lifespan_analysis']
|
||
if 'error' not in short_analysis:
|
||
print(f"\n短寿皇帝 (<30岁): {short_analysis['short_lived_count']}/{short_analysis['total_count']}")
|
||
print(f"短寿比例: {short_analysis['short_lived_rate']:.1%}")
|
||
|
||
# 关键发现
|
||
print("\n=== 关键发现 ===")
|
||
for i, finding in enumerate(report['key_findings'], 1):
|
||
print(f"{i}. {finding}")
|
||
|
||
return report
|
||
|
||
if __name__ == "__main__":
|
||
report = run_emperor_analysis()
|
||
|
||
# 生成可视化图表
|
||
chart_result = emperor_analyzer.generate_lifespan_distribution_chart("emperor_lifespan_analysis.png")
|
||
print(f"\n{chart_result}") |