huhan3000/胡汉三千年项目/数据分析工具/符号数据分析器.py

509 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
符号数据分析器
胡汉三千年项目数据分析工具
功能:对符号数据库进行统计分析、模式识别、关联挖掘
"""
import sqlite3
import pandas as pd
import numpy as np
from typing import Dict, List, Tuple, Any
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import json
class SymbolAnalyzer:
"""符号数据分析器"""
def __init__(self, db_path: str = "symbols.db"):
"""初始化分析器"""
self.db_path = db_path
self.conn = sqlite3.connect(db_path)
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
def get_basic_statistics(self) -> Dict[str, Any]:
"""
获取基础统计信息
Returns:
统计信息字典
"""
cursor = self.conn.cursor()
stats = {}
# 符号总数
cursor.execute("SELECT COUNT(*) FROM symbols")
stats['total_symbols'] = cursor.fetchone()[0]
# 阴阳属性分布
cursor.execute("SELECT yin_yang_attribute, COUNT(*) FROM symbols GROUP BY yin_yang_attribute")
stats['yin_yang_distribution'] = dict(cursor.fetchall())
# 刻法类型分布
cursor.execute("SELECT engraving_type, COUNT(*) FROM symbols GROUP BY engraving_type")
stats['engraving_distribution'] = dict(cursor.fetchall())
# 文明分布
cursor.execute("SELECT origin_civilization, COUNT(*) FROM symbols GROUP BY origin_civilization")
stats['civilization_distribution'] = dict(cursor.fetchall())
# 时期分布
cursor.execute("SELECT origin_period, COUNT(*) FROM symbols GROUP BY origin_period")
stats['period_distribution'] = dict(cursor.fetchall())
# 关联关系统计
cursor.execute("SELECT COUNT(*) FROM cross_civilization_links")
stats['total_links'] = cursor.fetchone()[0]
return stats
def analyze_yin_yang_patterns(self) -> Dict[str, Any]:
"""
分析阴阳模式
Returns:
阴阳模式分析结果
"""
cursor = self.conn.cursor()
patterns = {}
# 阴阳属性与刻法类型的关联
cursor.execute("""
SELECT yin_yang_attribute, engraving_type, COUNT(*)
FROM symbols
GROUP BY yin_yang_attribute, engraving_type
ORDER BY yin_yang_attribute, engraving_type
""")
yin_yang_engraving = cursor.fetchall()
patterns['yin_yang_engraving_association'] = {}
for yin_yang, engraving, count in yin_yang_engraving:
if yin_yang not in patterns['yin_yang_engraving_association']:
patterns['yin_yang_engraving_association'][yin_yang] = {}
patterns['yin_yang_engraving_association'][yin_yang][engraving] = count
# 阴阳属性与文明的关联
cursor.execute("""
SELECT yin_yang_attribute, origin_civilization, COUNT(*)
FROM symbols
GROUP BY yin_yang_attribute, origin_civilization
ORDER BY yin_yang_attribute, origin_civilization
""")
yin_yang_civilization = cursor.fetchall()
patterns['yin_yang_civilization_association'] = {}
for yin_yang, civilization, count in yin_yang_civilization:
if yin_yang not in patterns['yin_yang_civilization_association']:
patterns['yin_yang_civilization_association'][yin_yang] = {}
patterns['yin_yang_civilization_association'][yin_yang][civilization] = count
# 阴阳属性的时间分布
cursor.execute("""
SELECT yin_yang_attribute, origin_period, COUNT(*)
FROM symbols
GROUP BY yin_yang_attribute, origin_period
ORDER BY origin_period, yin_yang_attribute
""")
yin_yang_period = cursor.fetchall()
patterns['yin_yang_period_distribution'] = {}
for yin_yang, period, count in yin_yang_period:
if period not in patterns['yin_yang_period_distribution']:
patterns['yin_yang_period_distribution'][period] = {}
patterns['yin_yang_period_distribution'][period][yin_yang] = count
return patterns
def analyze_transmission_patterns(self) -> Dict[str, Any]:
"""
分析传播模式
Returns:
传播模式分析结果
"""
cursor = self.conn.cursor()
patterns = {}
# 传播方向分析
cursor.execute("""
SELECT s1.origin_civilization, s2.origin_civilization, COUNT(*) as link_count
FROM cross_civilization_links l
JOIN symbols s1 ON l.source_symbol_id = s1.symbol_id
JOIN symbols s2 ON l.target_symbol_id = s2.symbol_id
WHERE s1.origin_civilization != s2.origin_civilization
GROUP BY s1.origin_civilization, s2.origin_civilization
ORDER BY link_count DESC
""")
transmission_directions = cursor.fetchall()
patterns['transmission_directions'] = transmission_directions
# 传播路径长度分析
cursor.execute("""
WITH RECURSIVE transmission_paths AS (
SELECT
source_symbol_id,
target_symbol_id,
1 as path_length,
source_symbol_id || '->' || target_symbol_id as path
FROM cross_civilization_links
UNION ALL
SELECT
tp.source_symbol_id,
l.target_symbol_id,
tp.path_length + 1,
tp.path || '->' || l.target_symbol_id
FROM cross_civilization_links l
JOIN transmission_paths tp ON l.source_symbol_id = tp.target_symbol_id
WHERE tp.path_length < 10
)
SELECT path_length, COUNT(*) as path_count
FROM transmission_paths
GROUP BY path_length
ORDER BY path_length
""")
path_lengths = cursor.fetchall()
patterns['path_length_distribution'] = path_lengths
# 传播网络中心性分析
cursor.execute("""
WITH symbol_degrees AS (
SELECT symbol_id, COUNT(*) as degree
FROM (
SELECT source_symbol_id as symbol_id FROM cross_civilization_links
UNION ALL
SELECT target_symbol_id as symbol_id FROM cross_civilization_links
)
GROUP BY symbol_id
)
SELECT s.symbol_id, s.symbol_name, sd.degree
FROM symbol_degrees sd
JOIN symbols s ON sd.symbol_id = s.symbol_id
ORDER BY sd.degree DESC
LIMIT 10
""")
top_central_symbols = cursor.fetchall()
patterns['top_central_symbols'] = top_central_symbols
return patterns
def analyze_symbol_clusters(self) -> Dict[str, Any]:
"""
分析符号聚类
Returns:
聚类分析结果
"""
cursor = self.conn.cursor()
clusters = {}
# 基于阴阳属性的聚类
cursor.execute("""
SELECT yin_yang_attribute, engraving_type, COUNT(*) as cluster_size
FROM symbols
GROUP BY yin_yang_attribute, engraving_type
ORDER BY cluster_size DESC
""")
yin_yang_clusters = cursor.fetchall()
clusters['yin_yang_clusters'] = yin_yang_clusters
# 基于文明-时期的聚类
cursor.execute("""
SELECT origin_civilization, origin_period, COUNT(*) as cluster_size
FROM symbols
GROUP BY origin_civilization, origin_period
ORDER BY cluster_size DESC
""")
civilization_clusters = cursor.fetchall()
clusters['civilization_clusters'] = civilization_clusters
# 符号形态相似性分析
cursor.execute("""
SELECT symbol_form, COUNT(*) as frequency
FROM symbols
GROUP BY symbol_form
ORDER BY frequency DESC
LIMIT 20
""")
form_frequency = cursor.fetchall()
clusters['form_frequency'] = form_frequency
return clusters
def find_interesting_patterns(self) -> Dict[str, Any]:
"""
发现有趣模式
Returns:
有趣模式列表
"""
cursor = self.conn.cursor()
patterns = {}
# 1. 阴阳属性反转模式
cursor.execute("""
SELECT l.source_symbol_id, l.target_symbol_id,
s1.yin_yang_attribute as source_yin_yang,
s2.yin_yang_attribute as target_yin_yang
FROM cross_civilization_links l
JOIN symbols s1 ON l.source_symbol_id = s1.symbol_id
JOIN symbols s2 ON l.target_symbol_id = s2.symbol_id
WHERE s1.yin_yang_attribute != s2.yin_yang_attribute
""")
yin_yang_reversals = cursor.fetchall()
patterns['yin_yang_reversals'] = yin_yang_reversals
# 2. 跨文明传播的阴阳偏好
cursor.execute("""
SELECT s1.origin_civilization, s2.origin_civilization,
s1.yin_yang_attribute, COUNT(*) as count
FROM cross_civilization_links l
JOIN symbols s1 ON l.source_symbol_id = s1.symbol_id
JOIN symbols s2 ON l.target_symbol_id = s2.symbol_id
WHERE s1.origin_civilization != s2.origin_civilization
GROUP BY s1.origin_civilization, s2.origin_civilization, s1.yin_yang_attribute
ORDER BY count DESC
""")
cross_civilization_preferences = cursor.fetchall()
patterns['cross_civilization_preferences'] = cross_civilization_preferences
# 3. 符号形态的跨文明一致性
cursor.execute("""
SELECT symbol_form, COUNT(DISTINCT origin_civilization) as civilization_count
FROM symbols
GROUP BY symbol_form
HAVING civilization_count > 1
ORDER BY civilization_count DESC
""")
cross_civilization_forms = cursor.fetchall()
patterns['cross_civilization_forms'] = cross_civilization_forms
# 4. 时间序列中的阴阳变化
cursor.execute("""
SELECT origin_period,
SUM(CASE WHEN yin_yang_attribute = 'yang' THEN 1 ELSE 0 END) as yang_count,
SUM(CASE WHEN yin_yang_attribute = 'yin' THEN 1 ELSE 0 END) as yin_count,
COUNT(*) as total
FROM symbols
GROUP BY origin_period
ORDER BY origin_period
""")
temporal_yin_yang = cursor.fetchall()
patterns['temporal_yin_yang'] = temporal_yin_yang
return patterns
def create_statistical_report(self, output_file: str = None) -> str:
"""
创建统计分析报告
Args:
output_file: 输出文件路径(可选)
Returns:
报告内容
"""
report = []
# 基础统计
basic_stats = self.get_basic_statistics()
report.append("# 符号数据库统计分析报告")
report.append(f"生成时间:{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
report.append("\n## 基础统计信息")
report.append(f"- 符号总数:{basic_stats['total_symbols']}")
report.append(f"- 关联关系总数:{basic_stats['total_links']}")
# 阴阳属性分布
report.append("\n## 阴阳属性分布")
for yin_yang, count in basic_stats['yin_yang_distribution'].items():
percentage = (count / basic_stats['total_symbols']) * 100
report.append(f"- {yin_yang}{count} ({percentage:.1f}%)")
# 刻法类型分布
report.append("\n## 刻法类型分布")
for engraving, count in basic_stats['engraving_distribution'].items():
percentage = (count / basic_stats['total_symbols']) * 100
report.append(f"- {engraving}{count} ({percentage:.1f}%)")
# 文明分布
report.append("\n## 文明分布")
for civilization, count in basic_stats['civilization_distribution'].items():
percentage = (count / basic_stats['total_symbols']) * 100
report.append(f"- {civilization}{count} ({percentage:.1f}%)")
# 阴阳模式分析
yin_yang_patterns = self.analyze_yin_yang_patterns()
report.append("\n## 阴阳模式分析")
report.append("\n### 阴阳属性与刻法类型关联")
for yin_yang, engraving_counts in yin_yang_patterns['yin_yang_engraving_association'].items():
report.append(f"\n**{yin_yang}属性:**")
for engraving, count in engraving_counts.items():
report.append(f" - {engraving}{count}")
# 传播模式分析
transmission_patterns = self.analyze_transmission_patterns()
report.append("\n## 传播模式分析")
report.append("\n### 主要传播方向")
for source, target, count in transmission_patterns['transmission_directions'][:10]:
report.append(f"- {source}{target}{count}")
# 聚类分析
clusters = self.analyze_symbol_clusters()
report.append("\n## 符号聚类分析")
report.append("\n### 阴阳-刻法聚类")
for yin_yang, engraving, size in clusters['yin_yang_clusters'][:10]:
report.append(f"- {yin_yang} + {engraving}{size} 个符号")
# 有趣模式
interesting_patterns = self.find_interesting_patterns()
report.append("\n## 有趣模式发现")
report.append(f"\n### 阴阳属性反转")
report.append(f"- 发现 {len(interesting_patterns['yin_yang_reversals'])} 个阴阳属性反转的传播案例")
report.append(f"\n### 跨文明符号形态")
for form, civ_count in interesting_patterns['cross_civilization_forms'][:5]:
report.append(f"- 形态 '{form}' 出现在 {civ_count} 个不同文明中")
# 将报告保存到文件
report_content = '\n'.join(report)
if output_file:
with open(output_file, 'w', encoding='utf-8') as f:
f.write(report_content)
print(f"统计分析报告已保存至:{output_file}")
return report_content
def create_visual_analysis(self, output_dir: str = "./analysis_results"):
"""
创建可视化分析图表
Args:
output_dir: 输出目录
"""
import os
os.makedirs(output_dir, exist_ok=True)
# 获取统计数据
basic_stats = self.get_basic_statistics()
yin_yang_patterns = self.analyze_yin_yang_patterns()
# 1. 阴阳属性分布饼图
plt.figure(figsize=(10, 8))
plt.subplot(2, 2, 1)
yin_yang_data = basic_stats['yin_yang_distribution']
plt.pie(yin_yang_data.values(), labels=yin_yang_data.keys(), autopct='%1.1f%%')
plt.title('阴阳属性分布')
# 2. 刻法类型分布柱状图
plt.subplot(2, 2, 2)
engraving_data = basic_stats['engraving_distribution']
plt.bar(engraving_data.keys(), engraving_data.values())
plt.title('刻法类型分布')
plt.xticks(rotation=45)
# 3. 文明分布柱状图
plt.subplot(2, 2, 3)
civilization_data = basic_stats['civilization_distribution']
plt.barh(list(civilization_data.keys()), list(civilization_data.values()))
plt.title('文明分布')
# 4. 阴阳-刻法关联热力图
plt.subplot(2, 2, 4)
association_data = yin_yang_patterns['yin_yang_engraving_association']
# 转换为矩阵格式
yin_yang_types = list(association_data.keys())
engraving_types = set()
for yin_yang in yin_yang_types:
engraving_types.update(association_data[yin_yang].keys())
engraving_types = sorted(list(engraving_types))
matrix = np.zeros((len(yin_yang_types), len(engraving_types)))
for i, yin_yang in enumerate(yin_yang_types):
for j, engraving in enumerate(engraving_types):
matrix[i, j] = association_data[yin_yang].get(engraving, 0)
sns.heatmap(matrix, annot=True, fmt='g',
xticklabels=engraving_types, yticklabels=yin_yang_types)
plt.title('阴阳-刻法关联热力图')
plt.xlabel('刻法类型')
plt.ylabel('阴阳属性')
plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'basic_analysis.png'), dpi=300, bbox_inches='tight')
plt.show()
print(f"可视化分析图表已保存至:{output_dir}")
# 使用示例
def main():
"""主函数示例"""
# 创建分析器
analyzer = SymbolAnalyzer()
# 获取基础统计
print("=== 基础统计信息 ===")
basic_stats = analyzer.get_basic_statistics()
print(f"符号总数:{basic_stats['total_symbols']}")
print(f"阴阳分布:{basic_stats['yin_yang_distribution']}")
# 分析阴阳模式
print("\n=== 阴阳模式分析 ===")
yin_yang_patterns = analyzer.analyze_yin_yang_patterns()
print("阴阳-刻法关联:", yin_yang_patterns['yin_yang_engraving_association'])
# 分析传播模式
print("\n=== 传播模式分析 ===")
transmission_patterns = analyzer.analyze_transmission_patterns()
print("主要传播方向:", transmission_patterns['transmission_directions'][:5])
# 创建统计分析报告
print("\n=== 创建统计分析报告 ===")
report = analyzer.create_statistical_report("symbol_analysis_report.md")
# 创建可视化分析
print("\n=== 创建可视化分析 ===")
analyzer.create_visual_analysis()
# 发现有趣模式
print("\n=== 发现有趣模式 ===")
interesting_patterns = analyzer.find_interesting_patterns()
print(f"阴阳属性反转案例:{len(interesting_patterns['yin_yang_reversals'])}")
print(f"跨文明符号形态:{interesting_patterns['cross_civilization_forms'][:3]}")
if __name__ == "__main__":
main()