509 lines
18 KiB
Python
509 lines
18 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
符号数据分析器
|
||
胡汉三千年项目数据分析工具
|
||
|
||
功能:对符号数据库进行统计分析、模式识别、关联挖掘
|
||
"""
|
||
|
||
import sqlite3
|
||
import pandas as pd
|
||
import numpy as np
|
||
from typing import Dict, List, Tuple, Any
|
||
from collections import Counter
|
||
import matplotlib.pyplot as plt
|
||
import seaborn as sns
|
||
from datetime import datetime
|
||
import json
|
||
|
||
class SymbolAnalyzer:
|
||
"""符号数据分析器"""
|
||
|
||
def __init__(self, db_path: str = "symbols.db"):
|
||
"""初始化分析器"""
|
||
self.db_path = db_path
|
||
self.conn = sqlite3.connect(db_path)
|
||
|
||
# 设置中文字体
|
||
plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans']
|
||
plt.rcParams['axes.unicode_minus'] = False
|
||
|
||
def get_basic_statistics(self) -> Dict[str, Any]:
|
||
"""
|
||
获取基础统计信息
|
||
|
||
Returns:
|
||
统计信息字典
|
||
"""
|
||
cursor = self.conn.cursor()
|
||
|
||
stats = {}
|
||
|
||
# 符号总数
|
||
cursor.execute("SELECT COUNT(*) FROM symbols")
|
||
stats['total_symbols'] = cursor.fetchone()[0]
|
||
|
||
# 阴阳属性分布
|
||
cursor.execute("SELECT yin_yang_attribute, COUNT(*) FROM symbols GROUP BY yin_yang_attribute")
|
||
stats['yin_yang_distribution'] = dict(cursor.fetchall())
|
||
|
||
# 刻法类型分布
|
||
cursor.execute("SELECT engraving_type, COUNT(*) FROM symbols GROUP BY engraving_type")
|
||
stats['engraving_distribution'] = dict(cursor.fetchall())
|
||
|
||
# 文明分布
|
||
cursor.execute("SELECT origin_civilization, COUNT(*) FROM symbols GROUP BY origin_civilization")
|
||
stats['civilization_distribution'] = dict(cursor.fetchall())
|
||
|
||
# 时期分布
|
||
cursor.execute("SELECT origin_period, COUNT(*) FROM symbols GROUP BY origin_period")
|
||
stats['period_distribution'] = dict(cursor.fetchall())
|
||
|
||
# 关联关系统计
|
||
cursor.execute("SELECT COUNT(*) FROM cross_civilization_links")
|
||
stats['total_links'] = cursor.fetchone()[0]
|
||
|
||
return stats
|
||
|
||
def analyze_yin_yang_patterns(self) -> Dict[str, Any]:
|
||
"""
|
||
分析阴阳模式
|
||
|
||
Returns:
|
||
阴阳模式分析结果
|
||
"""
|
||
cursor = self.conn.cursor()
|
||
|
||
patterns = {}
|
||
|
||
# 阴阳属性与刻法类型的关联
|
||
cursor.execute("""
|
||
SELECT yin_yang_attribute, engraving_type, COUNT(*)
|
||
FROM symbols
|
||
GROUP BY yin_yang_attribute, engraving_type
|
||
ORDER BY yin_yang_attribute, engraving_type
|
||
""")
|
||
|
||
yin_yang_engraving = cursor.fetchall()
|
||
patterns['yin_yang_engraving_association'] = {}
|
||
|
||
for yin_yang, engraving, count in yin_yang_engraving:
|
||
if yin_yang not in patterns['yin_yang_engraving_association']:
|
||
patterns['yin_yang_engraving_association'][yin_yang] = {}
|
||
patterns['yin_yang_engraving_association'][yin_yang][engraving] = count
|
||
|
||
# 阴阳属性与文明的关联
|
||
cursor.execute("""
|
||
SELECT yin_yang_attribute, origin_civilization, COUNT(*)
|
||
FROM symbols
|
||
GROUP BY yin_yang_attribute, origin_civilization
|
||
ORDER BY yin_yang_attribute, origin_civilization
|
||
""")
|
||
|
||
yin_yang_civilization = cursor.fetchall()
|
||
patterns['yin_yang_civilization_association'] = {}
|
||
|
||
for yin_yang, civilization, count in yin_yang_civilization:
|
||
if yin_yang not in patterns['yin_yang_civilization_association']:
|
||
patterns['yin_yang_civilization_association'][yin_yang] = {}
|
||
patterns['yin_yang_civilization_association'][yin_yang][civilization] = count
|
||
|
||
# 阴阳属性的时间分布
|
||
cursor.execute("""
|
||
SELECT yin_yang_attribute, origin_period, COUNT(*)
|
||
FROM symbols
|
||
GROUP BY yin_yang_attribute, origin_period
|
||
ORDER BY origin_period, yin_yang_attribute
|
||
""")
|
||
|
||
yin_yang_period = cursor.fetchall()
|
||
patterns['yin_yang_period_distribution'] = {}
|
||
|
||
for yin_yang, period, count in yin_yang_period:
|
||
if period not in patterns['yin_yang_period_distribution']:
|
||
patterns['yin_yang_period_distribution'][period] = {}
|
||
patterns['yin_yang_period_distribution'][period][yin_yang] = count
|
||
|
||
return patterns
|
||
|
||
def analyze_transmission_patterns(self) -> Dict[str, Any]:
|
||
"""
|
||
分析传播模式
|
||
|
||
Returns:
|
||
传播模式分析结果
|
||
"""
|
||
cursor = self.conn.cursor()
|
||
|
||
patterns = {}
|
||
|
||
# 传播方向分析
|
||
cursor.execute("""
|
||
SELECT s1.origin_civilization, s2.origin_civilization, COUNT(*) as link_count
|
||
FROM cross_civilization_links l
|
||
JOIN symbols s1 ON l.source_symbol_id = s1.symbol_id
|
||
JOIN symbols s2 ON l.target_symbol_id = s2.symbol_id
|
||
WHERE s1.origin_civilization != s2.origin_civilization
|
||
GROUP BY s1.origin_civilization, s2.origin_civilization
|
||
ORDER BY link_count DESC
|
||
""")
|
||
|
||
transmission_directions = cursor.fetchall()
|
||
patterns['transmission_directions'] = transmission_directions
|
||
|
||
# 传播路径长度分析
|
||
cursor.execute("""
|
||
WITH RECURSIVE transmission_paths AS (
|
||
SELECT
|
||
source_symbol_id,
|
||
target_symbol_id,
|
||
1 as path_length,
|
||
source_symbol_id || '->' || target_symbol_id as path
|
||
FROM cross_civilization_links
|
||
|
||
UNION ALL
|
||
|
||
SELECT
|
||
tp.source_symbol_id,
|
||
l.target_symbol_id,
|
||
tp.path_length + 1,
|
||
tp.path || '->' || l.target_symbol_id
|
||
FROM cross_civilization_links l
|
||
JOIN transmission_paths tp ON l.source_symbol_id = tp.target_symbol_id
|
||
WHERE tp.path_length < 10
|
||
)
|
||
SELECT path_length, COUNT(*) as path_count
|
||
FROM transmission_paths
|
||
GROUP BY path_length
|
||
ORDER BY path_length
|
||
""")
|
||
|
||
path_lengths = cursor.fetchall()
|
||
patterns['path_length_distribution'] = path_lengths
|
||
|
||
# 传播网络中心性分析
|
||
cursor.execute("""
|
||
WITH symbol_degrees AS (
|
||
SELECT symbol_id, COUNT(*) as degree
|
||
FROM (
|
||
SELECT source_symbol_id as symbol_id FROM cross_civilization_links
|
||
UNION ALL
|
||
SELECT target_symbol_id as symbol_id FROM cross_civilization_links
|
||
)
|
||
GROUP BY symbol_id
|
||
)
|
||
SELECT s.symbol_id, s.symbol_name, sd.degree
|
||
FROM symbol_degrees sd
|
||
JOIN symbols s ON sd.symbol_id = s.symbol_id
|
||
ORDER BY sd.degree DESC
|
||
LIMIT 10
|
||
""")
|
||
|
||
top_central_symbols = cursor.fetchall()
|
||
patterns['top_central_symbols'] = top_central_symbols
|
||
|
||
return patterns
|
||
|
||
def analyze_symbol_clusters(self) -> Dict[str, Any]:
|
||
"""
|
||
分析符号聚类
|
||
|
||
Returns:
|
||
聚类分析结果
|
||
"""
|
||
cursor = self.conn.cursor()
|
||
|
||
clusters = {}
|
||
|
||
# 基于阴阳属性的聚类
|
||
cursor.execute("""
|
||
SELECT yin_yang_attribute, engraving_type, COUNT(*) as cluster_size
|
||
FROM symbols
|
||
GROUP BY yin_yang_attribute, engraving_type
|
||
ORDER BY cluster_size DESC
|
||
""")
|
||
|
||
yin_yang_clusters = cursor.fetchall()
|
||
clusters['yin_yang_clusters'] = yin_yang_clusters
|
||
|
||
# 基于文明-时期的聚类
|
||
cursor.execute("""
|
||
SELECT origin_civilization, origin_period, COUNT(*) as cluster_size
|
||
FROM symbols
|
||
GROUP BY origin_civilization, origin_period
|
||
ORDER BY cluster_size DESC
|
||
""")
|
||
|
||
civilization_clusters = cursor.fetchall()
|
||
clusters['civilization_clusters'] = civilization_clusters
|
||
|
||
# 符号形态相似性分析
|
||
cursor.execute("""
|
||
SELECT symbol_form, COUNT(*) as frequency
|
||
FROM symbols
|
||
GROUP BY symbol_form
|
||
ORDER BY frequency DESC
|
||
LIMIT 20
|
||
""")
|
||
|
||
form_frequency = cursor.fetchall()
|
||
clusters['form_frequency'] = form_frequency
|
||
|
||
return clusters
|
||
|
||
def find_interesting_patterns(self) -> Dict[str, Any]:
|
||
"""
|
||
发现有趣模式
|
||
|
||
Returns:
|
||
有趣模式列表
|
||
"""
|
||
cursor = self.conn.cursor()
|
||
|
||
patterns = {}
|
||
|
||
# 1. 阴阳属性反转模式
|
||
cursor.execute("""
|
||
SELECT l.source_symbol_id, l.target_symbol_id,
|
||
s1.yin_yang_attribute as source_yin_yang,
|
||
s2.yin_yang_attribute as target_yin_yang
|
||
FROM cross_civilization_links l
|
||
JOIN symbols s1 ON l.source_symbol_id = s1.symbol_id
|
||
JOIN symbols s2 ON l.target_symbol_id = s2.symbol_id
|
||
WHERE s1.yin_yang_attribute != s2.yin_yang_attribute
|
||
""")
|
||
|
||
yin_yang_reversals = cursor.fetchall()
|
||
patterns['yin_yang_reversals'] = yin_yang_reversals
|
||
|
||
# 2. 跨文明传播的阴阳偏好
|
||
cursor.execute("""
|
||
SELECT s1.origin_civilization, s2.origin_civilization,
|
||
s1.yin_yang_attribute, COUNT(*) as count
|
||
FROM cross_civilization_links l
|
||
JOIN symbols s1 ON l.source_symbol_id = s1.symbol_id
|
||
JOIN symbols s2 ON l.target_symbol_id = s2.symbol_id
|
||
WHERE s1.origin_civilization != s2.origin_civilization
|
||
GROUP BY s1.origin_civilization, s2.origin_civilization, s1.yin_yang_attribute
|
||
ORDER BY count DESC
|
||
""")
|
||
|
||
cross_civilization_preferences = cursor.fetchall()
|
||
patterns['cross_civilization_preferences'] = cross_civilization_preferences
|
||
|
||
# 3. 符号形态的跨文明一致性
|
||
cursor.execute("""
|
||
SELECT symbol_form, COUNT(DISTINCT origin_civilization) as civilization_count
|
||
FROM symbols
|
||
GROUP BY symbol_form
|
||
HAVING civilization_count > 1
|
||
ORDER BY civilization_count DESC
|
||
""")
|
||
|
||
cross_civilization_forms = cursor.fetchall()
|
||
patterns['cross_civilization_forms'] = cross_civilization_forms
|
||
|
||
# 4. 时间序列中的阴阳变化
|
||
cursor.execute("""
|
||
SELECT origin_period,
|
||
SUM(CASE WHEN yin_yang_attribute = 'yang' THEN 1 ELSE 0 END) as yang_count,
|
||
SUM(CASE WHEN yin_yang_attribute = 'yin' THEN 1 ELSE 0 END) as yin_count,
|
||
COUNT(*) as total
|
||
FROM symbols
|
||
GROUP BY origin_period
|
||
ORDER BY origin_period
|
||
""")
|
||
|
||
temporal_yin_yang = cursor.fetchall()
|
||
patterns['temporal_yin_yang'] = temporal_yin_yang
|
||
|
||
return patterns
|
||
|
||
def create_statistical_report(self, output_file: str = None) -> str:
|
||
"""
|
||
创建统计分析报告
|
||
|
||
Args:
|
||
output_file: 输出文件路径(可选)
|
||
|
||
Returns:
|
||
报告内容
|
||
"""
|
||
report = []
|
||
|
||
# 基础统计
|
||
basic_stats = self.get_basic_statistics()
|
||
report.append("# 符号数据库统计分析报告")
|
||
report.append(f"生成时间:{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||
report.append("\n## 基础统计信息")
|
||
report.append(f"- 符号总数:{basic_stats['total_symbols']}")
|
||
report.append(f"- 关联关系总数:{basic_stats['total_links']}")
|
||
|
||
# 阴阳属性分布
|
||
report.append("\n## 阴阳属性分布")
|
||
for yin_yang, count in basic_stats['yin_yang_distribution'].items():
|
||
percentage = (count / basic_stats['total_symbols']) * 100
|
||
report.append(f"- {yin_yang}:{count} ({percentage:.1f}%)")
|
||
|
||
# 刻法类型分布
|
||
report.append("\n## 刻法类型分布")
|
||
for engraving, count in basic_stats['engraving_distribution'].items():
|
||
percentage = (count / basic_stats['total_symbols']) * 100
|
||
report.append(f"- {engraving}:{count} ({percentage:.1f}%)")
|
||
|
||
# 文明分布
|
||
report.append("\n## 文明分布")
|
||
for civilization, count in basic_stats['civilization_distribution'].items():
|
||
percentage = (count / basic_stats['total_symbols']) * 100
|
||
report.append(f"- {civilization}:{count} ({percentage:.1f}%)")
|
||
|
||
# 阴阳模式分析
|
||
yin_yang_patterns = self.analyze_yin_yang_patterns()
|
||
report.append("\n## 阴阳模式分析")
|
||
|
||
report.append("\n### 阴阳属性与刻法类型关联")
|
||
for yin_yang, engraving_counts in yin_yang_patterns['yin_yang_engraving_association'].items():
|
||
report.append(f"\n**{yin_yang}属性:**")
|
||
for engraving, count in engraving_counts.items():
|
||
report.append(f" - {engraving}:{count}")
|
||
|
||
# 传播模式分析
|
||
transmission_patterns = self.analyze_transmission_patterns()
|
||
report.append("\n## 传播模式分析")
|
||
|
||
report.append("\n### 主要传播方向")
|
||
for source, target, count in transmission_patterns['transmission_directions'][:10]:
|
||
report.append(f"- {source} → {target}:{count} 次")
|
||
|
||
# 聚类分析
|
||
clusters = self.analyze_symbol_clusters()
|
||
report.append("\n## 符号聚类分析")
|
||
|
||
report.append("\n### 阴阳-刻法聚类")
|
||
for yin_yang, engraving, size in clusters['yin_yang_clusters'][:10]:
|
||
report.append(f"- {yin_yang} + {engraving}:{size} 个符号")
|
||
|
||
# 有趣模式
|
||
interesting_patterns = self.find_interesting_patterns()
|
||
report.append("\n## 有趣模式发现")
|
||
|
||
report.append(f"\n### 阴阳属性反转")
|
||
report.append(f"- 发现 {len(interesting_patterns['yin_yang_reversals'])} 个阴阳属性反转的传播案例")
|
||
|
||
report.append(f"\n### 跨文明符号形态")
|
||
for form, civ_count in interesting_patterns['cross_civilization_forms'][:5]:
|
||
report.append(f"- 形态 '{form}' 出现在 {civ_count} 个不同文明中")
|
||
|
||
# 将报告保存到文件
|
||
report_content = '\n'.join(report)
|
||
|
||
if output_file:
|
||
with open(output_file, 'w', encoding='utf-8') as f:
|
||
f.write(report_content)
|
||
print(f"统计分析报告已保存至:{output_file}")
|
||
|
||
return report_content
|
||
|
||
def create_visual_analysis(self, output_dir: str = "./analysis_results"):
|
||
"""
|
||
创建可视化分析图表
|
||
|
||
Args:
|
||
output_dir: 输出目录
|
||
"""
|
||
import os
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
|
||
# 获取统计数据
|
||
basic_stats = self.get_basic_statistics()
|
||
yin_yang_patterns = self.analyze_yin_yang_patterns()
|
||
|
||
# 1. 阴阳属性分布饼图
|
||
plt.figure(figsize=(10, 8))
|
||
|
||
plt.subplot(2, 2, 1)
|
||
yin_yang_data = basic_stats['yin_yang_distribution']
|
||
plt.pie(yin_yang_data.values(), labels=yin_yang_data.keys(), autopct='%1.1f%%')
|
||
plt.title('阴阳属性分布')
|
||
|
||
# 2. 刻法类型分布柱状图
|
||
plt.subplot(2, 2, 2)
|
||
engraving_data = basic_stats['engraving_distribution']
|
||
plt.bar(engraving_data.keys(), engraving_data.values())
|
||
plt.title('刻法类型分布')
|
||
plt.xticks(rotation=45)
|
||
|
||
# 3. 文明分布柱状图
|
||
plt.subplot(2, 2, 3)
|
||
civilization_data = basic_stats['civilization_distribution']
|
||
plt.barh(list(civilization_data.keys()), list(civilization_data.values()))
|
||
plt.title('文明分布')
|
||
|
||
# 4. 阴阳-刻法关联热力图
|
||
plt.subplot(2, 2, 4)
|
||
association_data = yin_yang_patterns['yin_yang_engraving_association']
|
||
|
||
# 转换为矩阵格式
|
||
yin_yang_types = list(association_data.keys())
|
||
engraving_types = set()
|
||
for yin_yang in yin_yang_types:
|
||
engraving_types.update(association_data[yin_yang].keys())
|
||
|
||
engraving_types = sorted(list(engraving_types))
|
||
matrix = np.zeros((len(yin_yang_types), len(engraving_types)))
|
||
|
||
for i, yin_yang in enumerate(yin_yang_types):
|
||
for j, engraving in enumerate(engraving_types):
|
||
matrix[i, j] = association_data[yin_yang].get(engraving, 0)
|
||
|
||
sns.heatmap(matrix, annot=True, fmt='g',
|
||
xticklabels=engraving_types, yticklabels=yin_yang_types)
|
||
plt.title('阴阳-刻法关联热力图')
|
||
plt.xlabel('刻法类型')
|
||
plt.ylabel('阴阳属性')
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(output_dir, 'basic_analysis.png'), dpi=300, bbox_inches='tight')
|
||
plt.show()
|
||
|
||
print(f"可视化分析图表已保存至:{output_dir}")
|
||
|
||
# 使用示例
|
||
def main():
|
||
"""主函数示例"""
|
||
# 创建分析器
|
||
analyzer = SymbolAnalyzer()
|
||
|
||
# 获取基础统计
|
||
print("=== 基础统计信息 ===")
|
||
basic_stats = analyzer.get_basic_statistics()
|
||
print(f"符号总数:{basic_stats['total_symbols']}")
|
||
print(f"阴阳分布:{basic_stats['yin_yang_distribution']}")
|
||
|
||
# 分析阴阳模式
|
||
print("\n=== 阴阳模式分析 ===")
|
||
yin_yang_patterns = analyzer.analyze_yin_yang_patterns()
|
||
print("阴阳-刻法关联:", yin_yang_patterns['yin_yang_engraving_association'])
|
||
|
||
# 分析传播模式
|
||
print("\n=== 传播模式分析 ===")
|
||
transmission_patterns = analyzer.analyze_transmission_patterns()
|
||
print("主要传播方向:", transmission_patterns['transmission_directions'][:5])
|
||
|
||
# 创建统计分析报告
|
||
print("\n=== 创建统计分析报告 ===")
|
||
report = analyzer.create_statistical_report("symbol_analysis_report.md")
|
||
|
||
# 创建可视化分析
|
||
print("\n=== 创建可视化分析 ===")
|
||
analyzer.create_visual_analysis()
|
||
|
||
# 发现有趣模式
|
||
print("\n=== 发现有趣模式 ===")
|
||
interesting_patterns = analyzer.find_interesting_patterns()
|
||
print(f"阴阳属性反转案例:{len(interesting_patterns['yin_yang_reversals'])} 个")
|
||
print(f"跨文明符号形态:{interesting_patterns['cross_civilization_forms'][:3]}")
|
||
|
||
if __name__ == "__main__":
|
||
main() |