#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 符号数据分析器 胡汉三千年项目数据分析工具 功能:对符号数据库进行统计分析、模式识别、关联挖掘 """ import sqlite3 import pandas as pd import numpy as np from typing import Dict, List, Tuple, Any from collections import Counter import matplotlib.pyplot as plt import seaborn as sns from datetime import datetime import json class SymbolAnalyzer: """符号数据分析器""" def __init__(self, db_path: str = "symbols.db"): """初始化分析器""" self.db_path = db_path self.conn = sqlite3.connect(db_path) # 设置中文字体 plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans'] plt.rcParams['axes.unicode_minus'] = False def get_basic_statistics(self) -> Dict[str, Any]: """ 获取基础统计信息 Returns: 统计信息字典 """ cursor = self.conn.cursor() stats = {} # 符号总数 cursor.execute("SELECT COUNT(*) FROM symbols") stats['total_symbols'] = cursor.fetchone()[0] # 阴阳属性分布 cursor.execute("SELECT yin_yang_attribute, COUNT(*) FROM symbols GROUP BY yin_yang_attribute") stats['yin_yang_distribution'] = dict(cursor.fetchall()) # 刻法类型分布 cursor.execute("SELECT engraving_type, COUNT(*) FROM symbols GROUP BY engraving_type") stats['engraving_distribution'] = dict(cursor.fetchall()) # 文明分布 cursor.execute("SELECT origin_civilization, COUNT(*) FROM symbols GROUP BY origin_civilization") stats['civilization_distribution'] = dict(cursor.fetchall()) # 时期分布 cursor.execute("SELECT origin_period, COUNT(*) FROM symbols GROUP BY origin_period") stats['period_distribution'] = dict(cursor.fetchall()) # 关联关系统计 cursor.execute("SELECT COUNT(*) FROM cross_civilization_links") stats['total_links'] = cursor.fetchone()[0] return stats def analyze_yin_yang_patterns(self) -> Dict[str, Any]: """ 分析阴阳模式 Returns: 阴阳模式分析结果 """ cursor = self.conn.cursor() patterns = {} # 阴阳属性与刻法类型的关联 cursor.execute(""" SELECT yin_yang_attribute, engraving_type, COUNT(*) FROM symbols GROUP BY yin_yang_attribute, engraving_type ORDER BY yin_yang_attribute, engraving_type """) yin_yang_engraving = cursor.fetchall() patterns['yin_yang_engraving_association'] = {} for yin_yang, engraving, count in yin_yang_engraving: if yin_yang not in patterns['yin_yang_engraving_association']: patterns['yin_yang_engraving_association'][yin_yang] = {} patterns['yin_yang_engraving_association'][yin_yang][engraving] = count # 阴阳属性与文明的关联 cursor.execute(""" SELECT yin_yang_attribute, origin_civilization, COUNT(*) FROM symbols GROUP BY yin_yang_attribute, origin_civilization ORDER BY yin_yang_attribute, origin_civilization """) yin_yang_civilization = cursor.fetchall() patterns['yin_yang_civilization_association'] = {} for yin_yang, civilization, count in yin_yang_civilization: if yin_yang not in patterns['yin_yang_civilization_association']: patterns['yin_yang_civilization_association'][yin_yang] = {} patterns['yin_yang_civilization_association'][yin_yang][civilization] = count # 阴阳属性的时间分布 cursor.execute(""" SELECT yin_yang_attribute, origin_period, COUNT(*) FROM symbols GROUP BY yin_yang_attribute, origin_period ORDER BY origin_period, yin_yang_attribute """) yin_yang_period = cursor.fetchall() patterns['yin_yang_period_distribution'] = {} for yin_yang, period, count in yin_yang_period: if period not in patterns['yin_yang_period_distribution']: patterns['yin_yang_period_distribution'][period] = {} patterns['yin_yang_period_distribution'][period][yin_yang] = count return patterns def analyze_transmission_patterns(self) -> Dict[str, Any]: """ 分析传播模式 Returns: 传播模式分析结果 """ cursor = self.conn.cursor() patterns = {} # 传播方向分析 cursor.execute(""" SELECT s1.origin_civilization, s2.origin_civilization, COUNT(*) as link_count FROM cross_civilization_links l JOIN symbols s1 ON l.source_symbol_id = s1.symbol_id JOIN symbols s2 ON l.target_symbol_id = s2.symbol_id WHERE s1.origin_civilization != s2.origin_civilization GROUP BY s1.origin_civilization, s2.origin_civilization ORDER BY link_count DESC """) transmission_directions = cursor.fetchall() patterns['transmission_directions'] = transmission_directions # 传播路径长度分析 cursor.execute(""" WITH RECURSIVE transmission_paths AS ( SELECT source_symbol_id, target_symbol_id, 1 as path_length, source_symbol_id || '->' || target_symbol_id as path FROM cross_civilization_links UNION ALL SELECT tp.source_symbol_id, l.target_symbol_id, tp.path_length + 1, tp.path || '->' || l.target_symbol_id FROM cross_civilization_links l JOIN transmission_paths tp ON l.source_symbol_id = tp.target_symbol_id WHERE tp.path_length < 10 ) SELECT path_length, COUNT(*) as path_count FROM transmission_paths GROUP BY path_length ORDER BY path_length """) path_lengths = cursor.fetchall() patterns['path_length_distribution'] = path_lengths # 传播网络中心性分析 cursor.execute(""" WITH symbol_degrees AS ( SELECT symbol_id, COUNT(*) as degree FROM ( SELECT source_symbol_id as symbol_id FROM cross_civilization_links UNION ALL SELECT target_symbol_id as symbol_id FROM cross_civilization_links ) GROUP BY symbol_id ) SELECT s.symbol_id, s.symbol_name, sd.degree FROM symbol_degrees sd JOIN symbols s ON sd.symbol_id = s.symbol_id ORDER BY sd.degree DESC LIMIT 10 """) top_central_symbols = cursor.fetchall() patterns['top_central_symbols'] = top_central_symbols return patterns def analyze_symbol_clusters(self) -> Dict[str, Any]: """ 分析符号聚类 Returns: 聚类分析结果 """ cursor = self.conn.cursor() clusters = {} # 基于阴阳属性的聚类 cursor.execute(""" SELECT yin_yang_attribute, engraving_type, COUNT(*) as cluster_size FROM symbols GROUP BY yin_yang_attribute, engraving_type ORDER BY cluster_size DESC """) yin_yang_clusters = cursor.fetchall() clusters['yin_yang_clusters'] = yin_yang_clusters # 基于文明-时期的聚类 cursor.execute(""" SELECT origin_civilization, origin_period, COUNT(*) as cluster_size FROM symbols GROUP BY origin_civilization, origin_period ORDER BY cluster_size DESC """) civilization_clusters = cursor.fetchall() clusters['civilization_clusters'] = civilization_clusters # 符号形态相似性分析 cursor.execute(""" SELECT symbol_form, COUNT(*) as frequency FROM symbols GROUP BY symbol_form ORDER BY frequency DESC LIMIT 20 """) form_frequency = cursor.fetchall() clusters['form_frequency'] = form_frequency return clusters def find_interesting_patterns(self) -> Dict[str, Any]: """ 发现有趣模式 Returns: 有趣模式列表 """ cursor = self.conn.cursor() patterns = {} # 1. 阴阳属性反转模式 cursor.execute(""" SELECT l.source_symbol_id, l.target_symbol_id, s1.yin_yang_attribute as source_yin_yang, s2.yin_yang_attribute as target_yin_yang FROM cross_civilization_links l JOIN symbols s1 ON l.source_symbol_id = s1.symbol_id JOIN symbols s2 ON l.target_symbol_id = s2.symbol_id WHERE s1.yin_yang_attribute != s2.yin_yang_attribute """) yin_yang_reversals = cursor.fetchall() patterns['yin_yang_reversals'] = yin_yang_reversals # 2. 跨文明传播的阴阳偏好 cursor.execute(""" SELECT s1.origin_civilization, s2.origin_civilization, s1.yin_yang_attribute, COUNT(*) as count FROM cross_civilization_links l JOIN symbols s1 ON l.source_symbol_id = s1.symbol_id JOIN symbols s2 ON l.target_symbol_id = s2.symbol_id WHERE s1.origin_civilization != s2.origin_civilization GROUP BY s1.origin_civilization, s2.origin_civilization, s1.yin_yang_attribute ORDER BY count DESC """) cross_civilization_preferences = cursor.fetchall() patterns['cross_civilization_preferences'] = cross_civilization_preferences # 3. 符号形态的跨文明一致性 cursor.execute(""" SELECT symbol_form, COUNT(DISTINCT origin_civilization) as civilization_count FROM symbols GROUP BY symbol_form HAVING civilization_count > 1 ORDER BY civilization_count DESC """) cross_civilization_forms = cursor.fetchall() patterns['cross_civilization_forms'] = cross_civilization_forms # 4. 时间序列中的阴阳变化 cursor.execute(""" SELECT origin_period, SUM(CASE WHEN yin_yang_attribute = 'yang' THEN 1 ELSE 0 END) as yang_count, SUM(CASE WHEN yin_yang_attribute = 'yin' THEN 1 ELSE 0 END) as yin_count, COUNT(*) as total FROM symbols GROUP BY origin_period ORDER BY origin_period """) temporal_yin_yang = cursor.fetchall() patterns['temporal_yin_yang'] = temporal_yin_yang return patterns def create_statistical_report(self, output_file: str = None) -> str: """ 创建统计分析报告 Args: output_file: 输出文件路径(可选) Returns: 报告内容 """ report = [] # 基础统计 basic_stats = self.get_basic_statistics() report.append("# 符号数据库统计分析报告") report.append(f"生成时间:{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") report.append("\n## 基础统计信息") report.append(f"- 符号总数:{basic_stats['total_symbols']}") report.append(f"- 关联关系总数:{basic_stats['total_links']}") # 阴阳属性分布 report.append("\n## 阴阳属性分布") for yin_yang, count in basic_stats['yin_yang_distribution'].items(): percentage = (count / basic_stats['total_symbols']) * 100 report.append(f"- {yin_yang}:{count} ({percentage:.1f}%)") # 刻法类型分布 report.append("\n## 刻法类型分布") for engraving, count in basic_stats['engraving_distribution'].items(): percentage = (count / basic_stats['total_symbols']) * 100 report.append(f"- {engraving}:{count} ({percentage:.1f}%)") # 文明分布 report.append("\n## 文明分布") for civilization, count in basic_stats['civilization_distribution'].items(): percentage = (count / basic_stats['total_symbols']) * 100 report.append(f"- {civilization}:{count} ({percentage:.1f}%)") # 阴阳模式分析 yin_yang_patterns = self.analyze_yin_yang_patterns() report.append("\n## 阴阳模式分析") report.append("\n### 阴阳属性与刻法类型关联") for yin_yang, engraving_counts in yin_yang_patterns['yin_yang_engraving_association'].items(): report.append(f"\n**{yin_yang}属性:**") for engraving, count in engraving_counts.items(): report.append(f" - {engraving}:{count}") # 传播模式分析 transmission_patterns = self.analyze_transmission_patterns() report.append("\n## 传播模式分析") report.append("\n### 主要传播方向") for source, target, count in transmission_patterns['transmission_directions'][:10]: report.append(f"- {source} → {target}:{count} 次") # 聚类分析 clusters = self.analyze_symbol_clusters() report.append("\n## 符号聚类分析") report.append("\n### 阴阳-刻法聚类") for yin_yang, engraving, size in clusters['yin_yang_clusters'][:10]: report.append(f"- {yin_yang} + {engraving}:{size} 个符号") # 有趣模式 interesting_patterns = self.find_interesting_patterns() report.append("\n## 有趣模式发现") report.append(f"\n### 阴阳属性反转") report.append(f"- 发现 {len(interesting_patterns['yin_yang_reversals'])} 个阴阳属性反转的传播案例") report.append(f"\n### 跨文明符号形态") for form, civ_count in interesting_patterns['cross_civilization_forms'][:5]: report.append(f"- 形态 '{form}' 出现在 {civ_count} 个不同文明中") # 将报告保存到文件 report_content = '\n'.join(report) if output_file: with open(output_file, 'w', encoding='utf-8') as f: f.write(report_content) print(f"统计分析报告已保存至:{output_file}") return report_content def create_visual_analysis(self, output_dir: str = "./analysis_results"): """ 创建可视化分析图表 Args: output_dir: 输出目录 """ import os os.makedirs(output_dir, exist_ok=True) # 获取统计数据 basic_stats = self.get_basic_statistics() yin_yang_patterns = self.analyze_yin_yang_patterns() # 1. 阴阳属性分布饼图 plt.figure(figsize=(10, 8)) plt.subplot(2, 2, 1) yin_yang_data = basic_stats['yin_yang_distribution'] plt.pie(yin_yang_data.values(), labels=yin_yang_data.keys(), autopct='%1.1f%%') plt.title('阴阳属性分布') # 2. 刻法类型分布柱状图 plt.subplot(2, 2, 2) engraving_data = basic_stats['engraving_distribution'] plt.bar(engraving_data.keys(), engraving_data.values()) plt.title('刻法类型分布') plt.xticks(rotation=45) # 3. 文明分布柱状图 plt.subplot(2, 2, 3) civilization_data = basic_stats['civilization_distribution'] plt.barh(list(civilization_data.keys()), list(civilization_data.values())) plt.title('文明分布') # 4. 阴阳-刻法关联热力图 plt.subplot(2, 2, 4) association_data = yin_yang_patterns['yin_yang_engraving_association'] # 转换为矩阵格式 yin_yang_types = list(association_data.keys()) engraving_types = set() for yin_yang in yin_yang_types: engraving_types.update(association_data[yin_yang].keys()) engraving_types = sorted(list(engraving_types)) matrix = np.zeros((len(yin_yang_types), len(engraving_types))) for i, yin_yang in enumerate(yin_yang_types): for j, engraving in enumerate(engraving_types): matrix[i, j] = association_data[yin_yang].get(engraving, 0) sns.heatmap(matrix, annot=True, fmt='g', xticklabels=engraving_types, yticklabels=yin_yang_types) plt.title('阴阳-刻法关联热力图') plt.xlabel('刻法类型') plt.ylabel('阴阳属性') plt.tight_layout() plt.savefig(os.path.join(output_dir, 'basic_analysis.png'), dpi=300, bbox_inches='tight') plt.show() print(f"可视化分析图表已保存至:{output_dir}") # 使用示例 def main(): """主函数示例""" # 创建分析器 analyzer = SymbolAnalyzer() # 获取基础统计 print("=== 基础统计信息 ===") basic_stats = analyzer.get_basic_statistics() print(f"符号总数:{basic_stats['total_symbols']}") print(f"阴阳分布:{basic_stats['yin_yang_distribution']}") # 分析阴阳模式 print("\n=== 阴阳模式分析 ===") yin_yang_patterns = analyzer.analyze_yin_yang_patterns() print("阴阳-刻法关联:", yin_yang_patterns['yin_yang_engraving_association']) # 分析传播模式 print("\n=== 传播模式分析 ===") transmission_patterns = analyzer.analyze_transmission_patterns() print("主要传播方向:", transmission_patterns['transmission_directions'][:5]) # 创建统计分析报告 print("\n=== 创建统计分析报告 ===") report = analyzer.create_statistical_report("symbol_analysis_report.md") # 创建可视化分析 print("\n=== 创建可视化分析 ===") analyzer.create_visual_analysis() # 发现有趣模式 print("\n=== 发现有趣模式 ===") interesting_patterns = analyzer.find_interesting_patterns() print(f"阴阳属性反转案例:{len(interesting_patterns['yin_yang_reversals'])} 个") print(f"跨文明符号形态:{interesting_patterns['cross_civilization_forms'][:3]}") if __name__ == "__main__": main()