415 lines
16 KiB
Python
415 lines
16 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
s音文明基因数字人文分析平台
|
||
Digital Humanities Platform for s-Phoneme Civilization Gene Analysis
|
||
|
||
基于新研究范式的综合性数字分析工具
|
||
"""
|
||
|
||
import json
|
||
import numpy as np
|
||
import matplotlib.pyplot as plt
|
||
import networkx as nx
|
||
from collections import defaultdict, Counter
|
||
import seaborn as sns
|
||
from datetime import datetime
|
||
import pandas as pd
|
||
from sklearn.cluster import KMeans
|
||
from sklearn.decomposition import PCA
|
||
import plotly.graph_objects as go
|
||
import plotly.express as px
|
||
from plotly.subplots import make_subplots
|
||
import warnings
|
||
warnings.filterwarnings('ignore')
|
||
|
||
class SPhonemeDigitalHumanitiesPlatform:
|
||
"""s音文明基因数字人文分析平台"""
|
||
|
||
def __init__(self, database_path="s音文明基因数据库.json"):
|
||
"""初始化平台"""
|
||
self.database_path = database_path
|
||
self.data = self.load_database()
|
||
self.civilizations = self.data['s_phoneme_civilization_database']['civilizations']
|
||
self.transmission_pathways = self.data['s_phoneme_civilization_database']['transmission_pathways']
|
||
self.chronological_layers = self.data['s_phoneme_civilization_database']['chronological_layers']
|
||
|
||
# 设置中文字体支持
|
||
plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans']
|
||
plt.rcParams['axes.unicode_minus'] = False
|
||
|
||
def load_database(self):
|
||
"""加载数据库"""
|
||
try:
|
||
with open(self.database_path, 'r', encoding='utf-8') as f:
|
||
return json.load(f)
|
||
except FileNotFoundError:
|
||
print(f"数据库文件 {self.database_path} 未找到,创建基础数据库...")
|
||
return self.create_basic_database()
|
||
|
||
def create_basic_database(self):
|
||
"""创建基础数据库结构"""
|
||
return {
|
||
"s_phoneme_civilization_database": {
|
||
"civilizations": {},
|
||
"transmission_pathways": {},
|
||
"chronological_layers": {}
|
||
}
|
||
}
|
||
|
||
def analyze_s_phoneme_stability(self):
|
||
"""分析s音在不同文明中的稳定性"""
|
||
print("🧬 正在分析s音稳定性...")
|
||
|
||
stability_data = {}
|
||
for civ_name, civ_data in self.civilizations.items():
|
||
s_phonemes = civ_data.get('s_phoneme_system', {}).get('primary', [])
|
||
vocabulary = civ_data.get('vocabulary', {})
|
||
|
||
# 计算s音词汇占比
|
||
s_words = [word for word, data in vocabulary.items()
|
||
if any(sound in word.lower() for sound in ['s', 'ś', 'š', 'sh'])]
|
||
total_words = len(vocabulary)
|
||
s_ratio = len(s_words) / total_words if total_words > 0 else 0
|
||
|
||
stability_data[civ_name] = {
|
||
's_phonemes': s_phonemes,
|
||
's_vocabulary_ratio': s_ratio,
|
||
's_word_count': len(s_words),
|
||
'total_vocabulary': total_words,
|
||
'cultural_encoding': len(civ_data.get('cultural_encoding', {}))
|
||
}
|
||
|
||
return stability_data
|
||
|
||
def visualize_s_phoneme_evolution(self):
|
||
"""可视化s音演化过程"""
|
||
print("📈 正在生成s音演化可视化...")
|
||
|
||
# 创建时间序列数据
|
||
periods = []
|
||
s_phoneme_counts = []
|
||
civilization_names = []
|
||
|
||
for civ_name, civ_data in self.civilizations.items():
|
||
period = civ_data.get('period', '未知')
|
||
s_phonemes = civ_data.get('s_phoneme_system', {}).get('primary', [])
|
||
|
||
periods.append(period)
|
||
s_phoneme_counts.append(len(s_phonemes))
|
||
civilization_names.append(civ_data.get('name', civ_name))
|
||
|
||
# 创建演化图
|
||
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
|
||
|
||
# s音数量变化
|
||
ax1.bar(range(len(civilization_names)), s_phoneme_counts,
|
||
color='steelblue', alpha=0.7)
|
||
ax1.set_title('s音音素数量跨文明比较', fontsize=14, fontweight='bold')
|
||
ax1.set_xlabel('文明')
|
||
ax1.set_ylabel('s音音素数量')
|
||
ax1.set_xticks(range(len(civilization_names)))
|
||
ax1.set_xticklabels(civilization_names, rotation=45, ha='right')
|
||
|
||
# 添加数值标签
|
||
for i, v in enumerate(s_phoneme_counts):
|
||
ax1.text(i, v + 0.1, str(v), ha='center', va='bottom')
|
||
|
||
# s音词汇占比
|
||
stability_data = self.analyze_s_phoneme_stability()
|
||
vocab_ratios = [data['s_vocabulary_ratio'] for data in stability_data.values()]
|
||
|
||
ax2.bar(range(len(civilization_names)), vocab_ratios,
|
||
color='darkgreen', alpha=0.7)
|
||
ax2.set_title('s音词汇占比跨文明比较', fontsize=14, fontweight='bold')
|
||
ax2.set_xlabel('文明')
|
||
ax2.set_ylabel('s音词汇占比')
|
||
ax2.set_xticks(range(len(civilization_names)))
|
||
ax2.set_xticklabels(civilization_names, rotation=45, ha='right')
|
||
|
||
# 添加百分比标签
|
||
for i, v in enumerate(vocab_ratios):
|
||
ax2.text(i, v + 0.01, f'{v:.1%}', ha='center', va='bottom')
|
||
|
||
plt.tight_layout()
|
||
plt.savefig('s音演化分析图.png', dpi=300, bbox_inches='tight')
|
||
plt.show()
|
||
|
||
return fig
|
||
|
||
def build_transmission_network(self):
|
||
"""构建s音传播网络"""
|
||
print("🌐 正在构建s音传播网络...")
|
||
|
||
G = nx.DiGraph()
|
||
|
||
# 添加节点(文明)
|
||
for civ_name, civ_data in self.civilizations.items():
|
||
G.add_node(civ_name,
|
||
name=civ_data.get('name', civ_name),
|
||
period=civ_data.get('period', '未知'),
|
||
geography=civ_data.get('geography', '未知'))
|
||
|
||
# 添加边(传播路径)
|
||
for pathway_name, pathway_data in self.transmission_pathways.items():
|
||
carriers = pathway_data.get('primary_carriers', [])
|
||
for i in range(len(carriers)-1):
|
||
source = carriers[i]
|
||
target = carriers[i+1]
|
||
if source in G.nodes() and target in G.nodes():
|
||
G.add_edge(source, target,
|
||
pathway=pathway_name,
|
||
characteristics=pathway_data.get('s_phoneme_characteristics', ''))
|
||
|
||
return G
|
||
|
||
def visualize_transmission_network(self):
|
||
"""可视化传播网络"""
|
||
G = self.build_transmission_network()
|
||
|
||
plt.figure(figsize=(15, 12))
|
||
|
||
# 使用spring布局
|
||
pos = nx.spring_layout(G, k=3, iterations=50)
|
||
|
||
# 绘制节点
|
||
node_colors = ['lightblue' if node in ['sumerian', 'sanskrit_buddhist']
|
||
else 'lightgreen' if 'scythian' in node or 'turkic' in node
|
||
else 'lightcoral' for node in G.nodes()]
|
||
|
||
nx.draw_networkx_nodes(G, pos, node_color=node_colors,
|
||
node_size=3000, alpha=0.8)
|
||
|
||
# 绘制边
|
||
nx.draw_networkx_edges(G, pos, edge_color='gray',
|
||
arrows=True, arrowsize=20, alpha=0.6)
|
||
|
||
# 添加标签
|
||
labels = {node: f"{data['name']}\n{data['period']}"
|
||
for node, data in G.nodes(data=True)}
|
||
nx.draw_networkx_labels(G, pos, labels, font_size=8)
|
||
|
||
plt.title('s音文明传播网络图', fontsize=16, fontweight='bold')
|
||
plt.axis('off')
|
||
plt.tight_layout()
|
||
plt.savefig('s音传播网络图.png', dpi=300, bbox_inches='tight')
|
||
plt.show()
|
||
|
||
return G
|
||
|
||
def analyze_cultural_categories(self):
|
||
"""分析文化类别中的s音分布"""
|
||
print("📊 正在分析文化类别s音分布...")
|
||
|
||
categories = self.data['s_phoneme_civilization_database'].get('cultural_categories', {})
|
||
|
||
category_stats = {}
|
||
for category, subcategories in categories.items():
|
||
category_stats[category] = {}
|
||
for subcategory, words in subcategories.items():
|
||
s_words = [word for word in words if 's' in word.lower()]
|
||
category_stats[category][subcategory] = {
|
||
'total': len(words),
|
||
's_words': len(s_words),
|
||
'ratio': len(s_words) / len(words) if words else 0
|
||
}
|
||
|
||
return category_stats
|
||
|
||
def create_interactive_dashboard(self):
|
||
"""创建交互式仪表板"""
|
||
print("🎯 正在创建交互式仪表板...")
|
||
|
||
# 准备数据
|
||
stability_data = self.analyze_s_phoneme_stability()
|
||
category_stats = self.analyze_cultural_categories()
|
||
|
||
# 创建子图
|
||
fig = make_subplots(
|
||
rows=2, cols=2,
|
||
subplot_titles=('s音稳定性比较', '文化类别s音分布',
|
||
'文明时间线', '传播路径强度'),
|
||
specs=[[{"type": "bar"}, {"type": "bar"}],
|
||
[{"type": "scatter"}, {"type": "heatmap"}]]
|
||
)
|
||
|
||
# 1. s音稳定性比较
|
||
civ_names = list(stability_data.keys())
|
||
stability_scores = [data['s_vocabulary_ratio'] for data in stability_data.values()]
|
||
|
||
fig.add_trace(
|
||
go.Bar(x=civ_names, y=stability_scores, name='s音词汇占比'),
|
||
row=1, col=1
|
||
)
|
||
|
||
# 2. 文化类别s音分布
|
||
categories = []
|
||
ratios = []
|
||
for category, subcats in category_stats.items():
|
||
for subcat, stats in subcats.items():
|
||
categories.append(f"{category}-{subcat}")
|
||
ratios.append(stats['ratio'])
|
||
|
||
fig.add_trace(
|
||
go.Bar(x=categories, y=ratios, name='s音占比'),
|
||
row=1, col=2
|
||
)
|
||
|
||
# 3. 文明时间线(简化版)
|
||
time_periods = []
|
||
civ_labels = []
|
||
for civ_name, civ_data in self.civilizations.items():
|
||
period = civ_data.get('period', '未知')
|
||
# 简化的年份提取
|
||
if '前' in period:
|
||
try:
|
||
year = -int(period.split('-')[0].replace('前', ''))
|
||
except:
|
||
year = 0
|
||
else:
|
||
try:
|
||
year = int(period.split('-')[0])
|
||
except:
|
||
year = 0
|
||
|
||
time_periods.append(year)
|
||
civ_labels.append(civ_data.get('name', civ_name))
|
||
|
||
fig.add_trace(
|
||
go.Scatter(x=time_periods, y=civ_labels, mode='markers',
|
||
marker=dict(size=10), name='文明时间分布'),
|
||
row=2, col=1
|
||
)
|
||
|
||
# 4. 传播路径强度矩阵
|
||
G = self.build_transmission_network()
|
||
nodes = list(G.nodes())
|
||
adj_matrix = nx.adjacency_matrix(G).toarray()
|
||
|
||
fig.add_trace(
|
||
go.Heatmap(z=adj_matrix, x=nodes, y=nodes, colorscale='Blues'),
|
||
row=2, col=2
|
||
)
|
||
|
||
# 更新布局
|
||
fig.update_layout(
|
||
title_text="s音文明基因数字人文分析仪表板",
|
||
height=800,
|
||
showlegend=False
|
||
)
|
||
|
||
# 保存为HTML文件
|
||
fig.write_html('s音文明基因分析仪表板.html')
|
||
print("💾 交互式仪表板已保存为 's音文明基因分析仪表板.html'")
|
||
|
||
return fig
|
||
|
||
def generate_comprehensive_report(self):
|
||
"""生成综合分析报告"""
|
||
print("📋 正在生成综合分析报告...")
|
||
|
||
stability_data = self.analyze_s_phoneme_stability()
|
||
category_stats = self.analyze_cultural_categories()
|
||
G = self.build_transmission_network()
|
||
|
||
report = {
|
||
"metadata": {
|
||
"generated_date": datetime.now().isoformat(),
|
||
"database_version": self.data.get('s_phoneme_civilization_database', {}).get('metadata', {}).get('version', '1.0'),
|
||
"analysis_scope": "丝绸之路s音文明基因综合分析"
|
||
},
|
||
"summary_statistics": {
|
||
"total_civilizations": len(self.civilizations),
|
||
"total_transmission_pathways": len(self.transmission_pathways),
|
||
"network_density": nx.density(G),
|
||
"average_s_phoneme_stability": np.mean([data['s_vocabulary_ratio'] for data in stability_data.values()])
|
||
},
|
||
"key_findings": {
|
||
"most_stable_civilization": max(stability_data.items(), key=lambda x: x[1]['s_vocabulary_ratio']),
|
||
"most_connected_node": max(G.degree(), key=lambda x: x[1]),
|
||
"dominant_cultural_category": max([(cat, np.mean([stats['ratio'] for stats in subcats.values()]))
|
||
for cat, subcats in category_stats.items()], key=lambda x: x[1])
|
||
},
|
||
"detailed_analysis": {
|
||
"s_phoneme_stability": stability_data,
|
||
"cultural_category_distribution": category_stats,
|
||
"network_metrics": {
|
||
"nodes": G.number_of_nodes(),
|
||
"edges": G.number_of_edges(),
|
||
"average_clustering": nx.average_clustering(G),
|
||
"centrality_measures": nx.degree_centrality(G)
|
||
}
|
||
},
|
||
"research_implications": [
|
||
"s音作为文明基因具有高度稳定性和跨文化传播能力",
|
||
"草原通道是s音传播的主要路径,体现了游牧民族的媒介作用",
|
||
"商业活动是s音传播的重要驱动力,形成了s音商业词汇集群",
|
||
"宗教传播强化了s音的神圣性,使其成为文化认同的标识",
|
||
"政治权力的s音编码体现了统治合法性的文化建构"
|
||
]
|
||
}
|
||
|
||
# 保存报告
|
||
with open('s音文明基因综合分析报告.json', 'w', encoding='utf-8') as f:
|
||
json.dump(report, f, ensure_ascii=False, indent=2)
|
||
|
||
return report
|
||
|
||
def run_full_analysis(self):
|
||
"""运行完整分析流程"""
|
||
print("🚀 启动s音文明基因数字人文分析平台...")
|
||
print("="*60)
|
||
|
||
# 1. 基础分析
|
||
stability_data = self.analyze_s_phoneme_stability()
|
||
print(f"📊 已分析 {len(stability_data)} 个文明的s音稳定性")
|
||
|
||
# 2. 可视化
|
||
self.visualize_s_phoneme_evolution()
|
||
print("📈 已生成s音演化分析图")
|
||
|
||
# 3. 网络分析
|
||
G = self.visualize_transmission_network()
|
||
print(f"🌐 已构建包含 {G.number_of_nodes()} 个节点、{G.number_of_edges()} 条边的传播网络")
|
||
|
||
# 4. 文化类别分析
|
||
category_stats = self.analyze_cultural_categories()
|
||
print(f"📊 已分析 {len(category_stats)} 个文化类别的s音分布")
|
||
|
||
# 5. 交互式仪表板
|
||
self.create_interactive_dashboard()
|
||
print("🎯 已创建交互式分析仪表板")
|
||
|
||
# 6. 综合报告
|
||
report = self.generate_comprehensive_report()
|
||
print("📋 已生成综合分析报告")
|
||
|
||
print("="*60)
|
||
print("✅ 分析完成!主要发现:")
|
||
print(f" • 平均s音稳定性: {report['summary_statistics']['average_s_phoneme_stability']:.1%}")
|
||
print(f" • 网络密度: {report['summary_statistics']['network_density']:.3f}")
|
||
print(f" • 最稳定文明: {report['key_findings']['most_stable_civilization'][0]}")
|
||
print(f" • 主要文化类别: {report['key_findings']['dominant_cultural_category'][0]}")
|
||
print("="*60)
|
||
|
||
return report
|
||
|
||
def main():
|
||
"""主函数"""
|
||
# 创建分析平台实例
|
||
platform = SPhonemeDigitalHumanitiesPlatform()
|
||
|
||
# 运行完整分析
|
||
report = platform.run_full_analysis()
|
||
|
||
print("\n🎉 s音文明基因数字人文分析完成!")
|
||
print("📁 生成的文件:")
|
||
print(" • s音演化分析图.png")
|
||
print(" • s音传播网络图.png")
|
||
print(" • s音文明基因分析仪表板.html")
|
||
print(" • s音文明基因综合分析报告.json")
|
||
|
||
return platform, report
|
||
|
||
if __name__ == "__main__":
|
||
platform, report = main() |