更新文档系统归并优化方案
This commit is contained in:
509
胡汉三千年项目/数据分析工具/符号数据分析器.py
Normal file
509
胡汉三千年项目/数据分析工具/符号数据分析器.py
Normal file
@@ -0,0 +1,509 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
符号数据分析器
|
||||
胡汉三千年项目数据分析工具
|
||||
|
||||
功能:对符号数据库进行统计分析、模式识别、关联挖掘
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from typing import Dict, List, Tuple, Any
|
||||
from collections import Counter
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from datetime import datetime
|
||||
import json
|
||||
|
||||
class SymbolAnalyzer:
|
||||
"""符号数据分析器"""
|
||||
|
||||
def __init__(self, db_path: str = "symbols.db"):
|
||||
"""初始化分析器"""
|
||||
self.db_path = db_path
|
||||
self.conn = sqlite3.connect(db_path)
|
||||
|
||||
# 设置中文字体
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
|
||||
def get_basic_statistics(self) -> Dict[str, Any]:
|
||||
"""
|
||||
获取基础统计信息
|
||||
|
||||
Returns:
|
||||
统计信息字典
|
||||
"""
|
||||
cursor = self.conn.cursor()
|
||||
|
||||
stats = {}
|
||||
|
||||
# 符号总数
|
||||
cursor.execute("SELECT COUNT(*) FROM symbols")
|
||||
stats['total_symbols'] = cursor.fetchone()[0]
|
||||
|
||||
# 阴阳属性分布
|
||||
cursor.execute("SELECT yin_yang_attribute, COUNT(*) FROM symbols GROUP BY yin_yang_attribute")
|
||||
stats['yin_yang_distribution'] = dict(cursor.fetchall())
|
||||
|
||||
# 刻法类型分布
|
||||
cursor.execute("SELECT engraving_type, COUNT(*) FROM symbols GROUP BY engraving_type")
|
||||
stats['engraving_distribution'] = dict(cursor.fetchall())
|
||||
|
||||
# 文明分布
|
||||
cursor.execute("SELECT origin_civilization, COUNT(*) FROM symbols GROUP BY origin_civilization")
|
||||
stats['civilization_distribution'] = dict(cursor.fetchall())
|
||||
|
||||
# 时期分布
|
||||
cursor.execute("SELECT origin_period, COUNT(*) FROM symbols GROUP BY origin_period")
|
||||
stats['period_distribution'] = dict(cursor.fetchall())
|
||||
|
||||
# 关联关系统计
|
||||
cursor.execute("SELECT COUNT(*) FROM cross_civilization_links")
|
||||
stats['total_links'] = cursor.fetchone()[0]
|
||||
|
||||
return stats
|
||||
|
||||
def analyze_yin_yang_patterns(self) -> Dict[str, Any]:
|
||||
"""
|
||||
分析阴阳模式
|
||||
|
||||
Returns:
|
||||
阴阳模式分析结果
|
||||
"""
|
||||
cursor = self.conn.cursor()
|
||||
|
||||
patterns = {}
|
||||
|
||||
# 阴阳属性与刻法类型的关联
|
||||
cursor.execute("""
|
||||
SELECT yin_yang_attribute, engraving_type, COUNT(*)
|
||||
FROM symbols
|
||||
GROUP BY yin_yang_attribute, engraving_type
|
||||
ORDER BY yin_yang_attribute, engraving_type
|
||||
""")
|
||||
|
||||
yin_yang_engraving = cursor.fetchall()
|
||||
patterns['yin_yang_engraving_association'] = {}
|
||||
|
||||
for yin_yang, engraving, count in yin_yang_engraving:
|
||||
if yin_yang not in patterns['yin_yang_engraving_association']:
|
||||
patterns['yin_yang_engraving_association'][yin_yang] = {}
|
||||
patterns['yin_yang_engraving_association'][yin_yang][engraving] = count
|
||||
|
||||
# 阴阳属性与文明的关联
|
||||
cursor.execute("""
|
||||
SELECT yin_yang_attribute, origin_civilization, COUNT(*)
|
||||
FROM symbols
|
||||
GROUP BY yin_yang_attribute, origin_civilization
|
||||
ORDER BY yin_yang_attribute, origin_civilization
|
||||
""")
|
||||
|
||||
yin_yang_civilization = cursor.fetchall()
|
||||
patterns['yin_yang_civilization_association'] = {}
|
||||
|
||||
for yin_yang, civilization, count in yin_yang_civilization:
|
||||
if yin_yang not in patterns['yin_yang_civilization_association']:
|
||||
patterns['yin_yang_civilization_association'][yin_yang] = {}
|
||||
patterns['yin_yang_civilization_association'][yin_yang][civilization] = count
|
||||
|
||||
# 阴阳属性的时间分布
|
||||
cursor.execute("""
|
||||
SELECT yin_yang_attribute, origin_period, COUNT(*)
|
||||
FROM symbols
|
||||
GROUP BY yin_yang_attribute, origin_period
|
||||
ORDER BY origin_period, yin_yang_attribute
|
||||
""")
|
||||
|
||||
yin_yang_period = cursor.fetchall()
|
||||
patterns['yin_yang_period_distribution'] = {}
|
||||
|
||||
for yin_yang, period, count in yin_yang_period:
|
||||
if period not in patterns['yin_yang_period_distribution']:
|
||||
patterns['yin_yang_period_distribution'][period] = {}
|
||||
patterns['yin_yang_period_distribution'][period][yin_yang] = count
|
||||
|
||||
return patterns
|
||||
|
||||
def analyze_transmission_patterns(self) -> Dict[str, Any]:
|
||||
"""
|
||||
分析传播模式
|
||||
|
||||
Returns:
|
||||
传播模式分析结果
|
||||
"""
|
||||
cursor = self.conn.cursor()
|
||||
|
||||
patterns = {}
|
||||
|
||||
# 传播方向分析
|
||||
cursor.execute("""
|
||||
SELECT s1.origin_civilization, s2.origin_civilization, COUNT(*) as link_count
|
||||
FROM cross_civilization_links l
|
||||
JOIN symbols s1 ON l.source_symbol_id = s1.symbol_id
|
||||
JOIN symbols s2 ON l.target_symbol_id = s2.symbol_id
|
||||
WHERE s1.origin_civilization != s2.origin_civilization
|
||||
GROUP BY s1.origin_civilization, s2.origin_civilization
|
||||
ORDER BY link_count DESC
|
||||
""")
|
||||
|
||||
transmission_directions = cursor.fetchall()
|
||||
patterns['transmission_directions'] = transmission_directions
|
||||
|
||||
# 传播路径长度分析
|
||||
cursor.execute("""
|
||||
WITH RECURSIVE transmission_paths AS (
|
||||
SELECT
|
||||
source_symbol_id,
|
||||
target_symbol_id,
|
||||
1 as path_length,
|
||||
source_symbol_id || '->' || target_symbol_id as path
|
||||
FROM cross_civilization_links
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
tp.source_symbol_id,
|
||||
l.target_symbol_id,
|
||||
tp.path_length + 1,
|
||||
tp.path || '->' || l.target_symbol_id
|
||||
FROM cross_civilization_links l
|
||||
JOIN transmission_paths tp ON l.source_symbol_id = tp.target_symbol_id
|
||||
WHERE tp.path_length < 10
|
||||
)
|
||||
SELECT path_length, COUNT(*) as path_count
|
||||
FROM transmission_paths
|
||||
GROUP BY path_length
|
||||
ORDER BY path_length
|
||||
""")
|
||||
|
||||
path_lengths = cursor.fetchall()
|
||||
patterns['path_length_distribution'] = path_lengths
|
||||
|
||||
# 传播网络中心性分析
|
||||
cursor.execute("""
|
||||
WITH symbol_degrees AS (
|
||||
SELECT symbol_id, COUNT(*) as degree
|
||||
FROM (
|
||||
SELECT source_symbol_id as symbol_id FROM cross_civilization_links
|
||||
UNION ALL
|
||||
SELECT target_symbol_id as symbol_id FROM cross_civilization_links
|
||||
)
|
||||
GROUP BY symbol_id
|
||||
)
|
||||
SELECT s.symbol_id, s.symbol_name, sd.degree
|
||||
FROM symbol_degrees sd
|
||||
JOIN symbols s ON sd.symbol_id = s.symbol_id
|
||||
ORDER BY sd.degree DESC
|
||||
LIMIT 10
|
||||
""")
|
||||
|
||||
top_central_symbols = cursor.fetchall()
|
||||
patterns['top_central_symbols'] = top_central_symbols
|
||||
|
||||
return patterns
|
||||
|
||||
def analyze_symbol_clusters(self) -> Dict[str, Any]:
|
||||
"""
|
||||
分析符号聚类
|
||||
|
||||
Returns:
|
||||
聚类分析结果
|
||||
"""
|
||||
cursor = self.conn.cursor()
|
||||
|
||||
clusters = {}
|
||||
|
||||
# 基于阴阳属性的聚类
|
||||
cursor.execute("""
|
||||
SELECT yin_yang_attribute, engraving_type, COUNT(*) as cluster_size
|
||||
FROM symbols
|
||||
GROUP BY yin_yang_attribute, engraving_type
|
||||
ORDER BY cluster_size DESC
|
||||
""")
|
||||
|
||||
yin_yang_clusters = cursor.fetchall()
|
||||
clusters['yin_yang_clusters'] = yin_yang_clusters
|
||||
|
||||
# 基于文明-时期的聚类
|
||||
cursor.execute("""
|
||||
SELECT origin_civilization, origin_period, COUNT(*) as cluster_size
|
||||
FROM symbols
|
||||
GROUP BY origin_civilization, origin_period
|
||||
ORDER BY cluster_size DESC
|
||||
""")
|
||||
|
||||
civilization_clusters = cursor.fetchall()
|
||||
clusters['civilization_clusters'] = civilization_clusters
|
||||
|
||||
# 符号形态相似性分析
|
||||
cursor.execute("""
|
||||
SELECT symbol_form, COUNT(*) as frequency
|
||||
FROM symbols
|
||||
GROUP BY symbol_form
|
||||
ORDER BY frequency DESC
|
||||
LIMIT 20
|
||||
""")
|
||||
|
||||
form_frequency = cursor.fetchall()
|
||||
clusters['form_frequency'] = form_frequency
|
||||
|
||||
return clusters
|
||||
|
||||
def find_interesting_patterns(self) -> Dict[str, Any]:
|
||||
"""
|
||||
发现有趣模式
|
||||
|
||||
Returns:
|
||||
有趣模式列表
|
||||
"""
|
||||
cursor = self.conn.cursor()
|
||||
|
||||
patterns = {}
|
||||
|
||||
# 1. 阴阳属性反转模式
|
||||
cursor.execute("""
|
||||
SELECT l.source_symbol_id, l.target_symbol_id,
|
||||
s1.yin_yang_attribute as source_yin_yang,
|
||||
s2.yin_yang_attribute as target_yin_yang
|
||||
FROM cross_civilization_links l
|
||||
JOIN symbols s1 ON l.source_symbol_id = s1.symbol_id
|
||||
JOIN symbols s2 ON l.target_symbol_id = s2.symbol_id
|
||||
WHERE s1.yin_yang_attribute != s2.yin_yang_attribute
|
||||
""")
|
||||
|
||||
yin_yang_reversals = cursor.fetchall()
|
||||
patterns['yin_yang_reversals'] = yin_yang_reversals
|
||||
|
||||
# 2. 跨文明传播的阴阳偏好
|
||||
cursor.execute("""
|
||||
SELECT s1.origin_civilization, s2.origin_civilization,
|
||||
s1.yin_yang_attribute, COUNT(*) as count
|
||||
FROM cross_civilization_links l
|
||||
JOIN symbols s1 ON l.source_symbol_id = s1.symbol_id
|
||||
JOIN symbols s2 ON l.target_symbol_id = s2.symbol_id
|
||||
WHERE s1.origin_civilization != s2.origin_civilization
|
||||
GROUP BY s1.origin_civilization, s2.origin_civilization, s1.yin_yang_attribute
|
||||
ORDER BY count DESC
|
||||
""")
|
||||
|
||||
cross_civilization_preferences = cursor.fetchall()
|
||||
patterns['cross_civilization_preferences'] = cross_civilization_preferences
|
||||
|
||||
# 3. 符号形态的跨文明一致性
|
||||
cursor.execute("""
|
||||
SELECT symbol_form, COUNT(DISTINCT origin_civilization) as civilization_count
|
||||
FROM symbols
|
||||
GROUP BY symbol_form
|
||||
HAVING civilization_count > 1
|
||||
ORDER BY civilization_count DESC
|
||||
""")
|
||||
|
||||
cross_civilization_forms = cursor.fetchall()
|
||||
patterns['cross_civilization_forms'] = cross_civilization_forms
|
||||
|
||||
# 4. 时间序列中的阴阳变化
|
||||
cursor.execute("""
|
||||
SELECT origin_period,
|
||||
SUM(CASE WHEN yin_yang_attribute = 'yang' THEN 1 ELSE 0 END) as yang_count,
|
||||
SUM(CASE WHEN yin_yang_attribute = 'yin' THEN 1 ELSE 0 END) as yin_count,
|
||||
COUNT(*) as total
|
||||
FROM symbols
|
||||
GROUP BY origin_period
|
||||
ORDER BY origin_period
|
||||
""")
|
||||
|
||||
temporal_yin_yang = cursor.fetchall()
|
||||
patterns['temporal_yin_yang'] = temporal_yin_yang
|
||||
|
||||
return patterns
|
||||
|
||||
def create_statistical_report(self, output_file: str = None) -> str:
|
||||
"""
|
||||
创建统计分析报告
|
||||
|
||||
Args:
|
||||
output_file: 输出文件路径(可选)
|
||||
|
||||
Returns:
|
||||
报告内容
|
||||
"""
|
||||
report = []
|
||||
|
||||
# 基础统计
|
||||
basic_stats = self.get_basic_statistics()
|
||||
report.append("# 符号数据库统计分析报告")
|
||||
report.append(f"生成时间:{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
report.append("\n## 基础统计信息")
|
||||
report.append(f"- 符号总数:{basic_stats['total_symbols']}")
|
||||
report.append(f"- 关联关系总数:{basic_stats['total_links']}")
|
||||
|
||||
# 阴阳属性分布
|
||||
report.append("\n## 阴阳属性分布")
|
||||
for yin_yang, count in basic_stats['yin_yang_distribution'].items():
|
||||
percentage = (count / basic_stats['total_symbols']) * 100
|
||||
report.append(f"- {yin_yang}:{count} ({percentage:.1f}%)")
|
||||
|
||||
# 刻法类型分布
|
||||
report.append("\n## 刻法类型分布")
|
||||
for engraving, count in basic_stats['engraving_distribution'].items():
|
||||
percentage = (count / basic_stats['total_symbols']) * 100
|
||||
report.append(f"- {engraving}:{count} ({percentage:.1f}%)")
|
||||
|
||||
# 文明分布
|
||||
report.append("\n## 文明分布")
|
||||
for civilization, count in basic_stats['civilization_distribution'].items():
|
||||
percentage = (count / basic_stats['total_symbols']) * 100
|
||||
report.append(f"- {civilization}:{count} ({percentage:.1f}%)")
|
||||
|
||||
# 阴阳模式分析
|
||||
yin_yang_patterns = self.analyze_yin_yang_patterns()
|
||||
report.append("\n## 阴阳模式分析")
|
||||
|
||||
report.append("\n### 阴阳属性与刻法类型关联")
|
||||
for yin_yang, engraving_counts in yin_yang_patterns['yin_yang_engraving_association'].items():
|
||||
report.append(f"\n**{yin_yang}属性:**")
|
||||
for engraving, count in engraving_counts.items():
|
||||
report.append(f" - {engraving}:{count}")
|
||||
|
||||
# 传播模式分析
|
||||
transmission_patterns = self.analyze_transmission_patterns()
|
||||
report.append("\n## 传播模式分析")
|
||||
|
||||
report.append("\n### 主要传播方向")
|
||||
for source, target, count in transmission_patterns['transmission_directions'][:10]:
|
||||
report.append(f"- {source} → {target}:{count} 次")
|
||||
|
||||
# 聚类分析
|
||||
clusters = self.analyze_symbol_clusters()
|
||||
report.append("\n## 符号聚类分析")
|
||||
|
||||
report.append("\n### 阴阳-刻法聚类")
|
||||
for yin_yang, engraving, size in clusters['yin_yang_clusters'][:10]:
|
||||
report.append(f"- {yin_yang} + {engraving}:{size} 个符号")
|
||||
|
||||
# 有趣模式
|
||||
interesting_patterns = self.find_interesting_patterns()
|
||||
report.append("\n## 有趣模式发现")
|
||||
|
||||
report.append(f"\n### 阴阳属性反转")
|
||||
report.append(f"- 发现 {len(interesting_patterns['yin_yang_reversals'])} 个阴阳属性反转的传播案例")
|
||||
|
||||
report.append(f"\n### 跨文明符号形态")
|
||||
for form, civ_count in interesting_patterns['cross_civilization_forms'][:5]:
|
||||
report.append(f"- 形态 '{form}' 出现在 {civ_count} 个不同文明中")
|
||||
|
||||
# 将报告保存到文件
|
||||
report_content = '\n'.join(report)
|
||||
|
||||
if output_file:
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
f.write(report_content)
|
||||
print(f"统计分析报告已保存至:{output_file}")
|
||||
|
||||
return report_content
|
||||
|
||||
def create_visual_analysis(self, output_dir: str = "./analysis_results"):
|
||||
"""
|
||||
创建可视化分析图表
|
||||
|
||||
Args:
|
||||
output_dir: 输出目录
|
||||
"""
|
||||
import os
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# 获取统计数据
|
||||
basic_stats = self.get_basic_statistics()
|
||||
yin_yang_patterns = self.analyze_yin_yang_patterns()
|
||||
|
||||
# 1. 阴阳属性分布饼图
|
||||
plt.figure(figsize=(10, 8))
|
||||
|
||||
plt.subplot(2, 2, 1)
|
||||
yin_yang_data = basic_stats['yin_yang_distribution']
|
||||
plt.pie(yin_yang_data.values(), labels=yin_yang_data.keys(), autopct='%1.1f%%')
|
||||
plt.title('阴阳属性分布')
|
||||
|
||||
# 2. 刻法类型分布柱状图
|
||||
plt.subplot(2, 2, 2)
|
||||
engraving_data = basic_stats['engraving_distribution']
|
||||
plt.bar(engraving_data.keys(), engraving_data.values())
|
||||
plt.title('刻法类型分布')
|
||||
plt.xticks(rotation=45)
|
||||
|
||||
# 3. 文明分布柱状图
|
||||
plt.subplot(2, 2, 3)
|
||||
civilization_data = basic_stats['civilization_distribution']
|
||||
plt.barh(list(civilization_data.keys()), list(civilization_data.values()))
|
||||
plt.title('文明分布')
|
||||
|
||||
# 4. 阴阳-刻法关联热力图
|
||||
plt.subplot(2, 2, 4)
|
||||
association_data = yin_yang_patterns['yin_yang_engraving_association']
|
||||
|
||||
# 转换为矩阵格式
|
||||
yin_yang_types = list(association_data.keys())
|
||||
engraving_types = set()
|
||||
for yin_yang in yin_yang_types:
|
||||
engraving_types.update(association_data[yin_yang].keys())
|
||||
|
||||
engraving_types = sorted(list(engraving_types))
|
||||
matrix = np.zeros((len(yin_yang_types), len(engraving_types)))
|
||||
|
||||
for i, yin_yang in enumerate(yin_yang_types):
|
||||
for j, engraving in enumerate(engraving_types):
|
||||
matrix[i, j] = association_data[yin_yang].get(engraving, 0)
|
||||
|
||||
sns.heatmap(matrix, annot=True, fmt='g',
|
||||
xticklabels=engraving_types, yticklabels=yin_yang_types)
|
||||
plt.title('阴阳-刻法关联热力图')
|
||||
plt.xlabel('刻法类型')
|
||||
plt.ylabel('阴阳属性')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(os.path.join(output_dir, 'basic_analysis.png'), dpi=300, bbox_inches='tight')
|
||||
plt.show()
|
||||
|
||||
print(f"可视化分析图表已保存至:{output_dir}")
|
||||
|
||||
# 使用示例
|
||||
def main():
|
||||
"""主函数示例"""
|
||||
# 创建分析器
|
||||
analyzer = SymbolAnalyzer()
|
||||
|
||||
# 获取基础统计
|
||||
print("=== 基础统计信息 ===")
|
||||
basic_stats = analyzer.get_basic_statistics()
|
||||
print(f"符号总数:{basic_stats['total_symbols']}")
|
||||
print(f"阴阳分布:{basic_stats['yin_yang_distribution']}")
|
||||
|
||||
# 分析阴阳模式
|
||||
print("\n=== 阴阳模式分析 ===")
|
||||
yin_yang_patterns = analyzer.analyze_yin_yang_patterns()
|
||||
print("阴阳-刻法关联:", yin_yang_patterns['yin_yang_engraving_association'])
|
||||
|
||||
# 分析传播模式
|
||||
print("\n=== 传播模式分析 ===")
|
||||
transmission_patterns = analyzer.analyze_transmission_patterns()
|
||||
print("主要传播方向:", transmission_patterns['transmission_directions'][:5])
|
||||
|
||||
# 创建统计分析报告
|
||||
print("\n=== 创建统计分析报告 ===")
|
||||
report = analyzer.create_statistical_report("symbol_analysis_report.md")
|
||||
|
||||
# 创建可视化分析
|
||||
print("\n=== 创建可视化分析 ===")
|
||||
analyzer.create_visual_analysis()
|
||||
|
||||
# 发现有趣模式
|
||||
print("\n=== 发现有趣模式 ===")
|
||||
interesting_patterns = analyzer.find_interesting_patterns()
|
||||
print(f"阴阳属性反转案例:{len(interesting_patterns['yin_yang_reversals'])} 个")
|
||||
print(f"跨文明符号形态:{interesting_patterns['cross_civilization_forms'][:3]}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user