411 lines
17 KiB
Python
411 lines
17 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
阴阳取向验证器
|
||
基于用户提出的'盆-P-否-吐鲁番'阴阳取向理论的数据验证工具
|
||
"""
|
||
|
||
import sqlite3
|
||
import json
|
||
import pandas as pd
|
||
from typing import Dict, List, Tuple, Any
|
||
import matplotlib.pyplot as plt
|
||
import seaborn as sns
|
||
from collections import Counter
|
||
|
||
class YinYangOrientationValidator:
|
||
"""阴阳取向理论验证器"""
|
||
|
||
def __init__(self, db_path: str = "symbols.db"):
|
||
self.db_path = db_path
|
||
self.conn = sqlite3.connect(db_path)
|
||
|
||
def analyze_p_basin_pattern(self) -> Dict[str, Any]:
|
||
"""分析P音盆地模式"""
|
||
cursor = self.conn.cursor()
|
||
|
||
# 查询所有P音相关的符号
|
||
cursor.execute("""
|
||
SELECT * FROM symbols
|
||
WHERE phonetic_context LIKE '%p%'
|
||
OR phonetic_context LIKE '%P%'
|
||
OR symbol_name LIKE '%盆%'
|
||
OR symbol_name LIKE '%否%'
|
||
OR symbol_name LIKE '%吐鲁番%'
|
||
""")
|
||
|
||
p_symbols = cursor.fetchall()
|
||
|
||
# 分析阴阳属性分布
|
||
yin_count = sum(1 for sym in p_symbols if sym[3] == 'yin')
|
||
yang_count = sum(1 for sym in p_symbols if sym[3] == 'yang')
|
||
neutral_count = sum(1 for sym in p_symbols if sym[3] == 'neutral')
|
||
|
||
# 分析刻法类型分布
|
||
engraving_types = Counter([sym[4] for sym in p_symbols])
|
||
|
||
# 分析功能语境
|
||
functional_contexts = [sym[8] for sym in p_symbols if sym[8]]
|
||
|
||
return {
|
||
'total_p_symbols': len(p_symbols),
|
||
'yin_yang_distribution': {
|
||
'yin': yin_count,
|
||
'yang': yang_count,
|
||
'neutral': neutral_count
|
||
},
|
||
'engraving_type_distribution': dict(engraving_types),
|
||
'functional_contexts': functional_contexts,
|
||
'symbols_details': [
|
||
{
|
||
'symbol_id': sym[0],
|
||
'symbol_name': sym[2],
|
||
'yin_yang': sym[3],
|
||
'engraving_type': sym[4],
|
||
'phonetic_context': sym[9],
|
||
'semantic_context': sym[10]
|
||
}
|
||
for sym in p_symbols
|
||
]
|
||
}
|
||
|
||
def validate_basin_yin_yang_logic(self) -> Dict[str, Any]:
|
||
"""验证盆地阴阳逻辑"""
|
||
cursor = self.conn.cursor()
|
||
|
||
# 查询盆地相关符号
|
||
cursor.execute("""
|
||
SELECT * FROM symbols
|
||
WHERE symbol_name LIKE '%盆%'
|
||
OR geographical_context LIKE '%盆地%'
|
||
OR semantic_context LIKE '%凹陷%'
|
||
OR semantic_context LIKE '%阴刻%'
|
||
""")
|
||
|
||
basin_symbols = cursor.fetchall()
|
||
|
||
analysis_results = []
|
||
|
||
for symbol in basin_symbols:
|
||
symbol_id, symbol_name, yin_yang, engraving, geo_context, semantic = \
|
||
symbol[0], symbol[2], symbol[3], symbol[4], symbol[6], symbol[10]
|
||
|
||
# 验证阴阳逻辑
|
||
logic_validation = {
|
||
'symbol_id': symbol_id,
|
||
'symbol_name': symbol_name,
|
||
'expected_yin_yang': 'yin', # 盆地应该为阴
|
||
'actual_yin_yang': yin_yang,
|
||
'yin_yang_match': yin_yang == 'yin',
|
||
'expected_engraving': 'yin_engraving', # 应该为阴刻
|
||
'actual_engraving': engraving,
|
||
'engraving_match': engraving == 'yin_engraving',
|
||
'geo_context_contains_basin': '盆地' in str(geo_context),
|
||
'semantic_contains_yin_keywords': any(keyword in str(semantic)
|
||
for keyword in ['凹陷', '阴刻', '地在天之上'])
|
||
}
|
||
|
||
analysis_results.append(logic_validation)
|
||
|
||
# 统计验证结果
|
||
total_basins = len(analysis_results)
|
||
yin_yang_correct = sum(1 for r in analysis_results if r['yin_yang_match'])
|
||
engraving_correct = sum(1 for r in analysis_results if r['engraving_match'])
|
||
|
||
return {
|
||
'total_basin_symbols': total_basins,
|
||
'yin_yang_accuracy': yin_yang_correct / total_basins * 100 if total_basins > 0 else 0,
|
||
'engraving_accuracy': engraving_correct / total_basins * 100 if total_basins > 0 else 0,
|
||
'detailed_analysis': analysis_results
|
||
}
|
||
|
||
def analyze_geographical_opposites(self) -> Dict[str, Any]:
|
||
"""分析地理对立关系"""
|
||
cursor = self.conn.cursor()
|
||
|
||
# 查询地理对立关系
|
||
cursor.execute("""
|
||
SELECT ccl.*, s1.symbol_name as source_name, s2.symbol_name as target_name,
|
||
s1.geographical_context as source_geo, s2.geographical_context as target_geo
|
||
FROM cross_civilization_links ccl
|
||
JOIN symbols s1 ON ccl.source_symbol_id = s1.symbol_id
|
||
JOIN symbols s2 ON ccl.target_symbol_id = s2.symbol_id
|
||
WHERE ccl.link_type = 'geographical'
|
||
""")
|
||
|
||
geo_links = cursor.fetchall()
|
||
|
||
opposites_analysis = []
|
||
|
||
for link in geo_links:
|
||
source_id, target_id, link_type, confidence = link[1], link[2], link[3], link[4]
|
||
source_name, target_name = link[6], link[7]
|
||
source_geo, target_geo = link[8], link[9]
|
||
|
||
# 获取阴阳属性
|
||
cursor.execute("SELECT yin_yang_attribute FROM symbols WHERE symbol_id = ?", (source_id,))
|
||
source_yin_yang = cursor.fetchone()[0]
|
||
|
||
cursor.execute("SELECT yin_yang_attribute FROM symbols WHERE symbol_id = ?", (target_id,))
|
||
target_yin_yang = cursor.fetchone()[0]
|
||
|
||
# 验证阴阳对立
|
||
is_yin_yang_opposite = (source_yin_yang == 'yin' and target_yin_yang == 'yang') or \
|
||
(source_yin_yang == 'yang' and target_yin_yang == 'yin')
|
||
|
||
opposites_analysis.append({
|
||
'source_symbol': source_name,
|
||
'target_symbol': target_name,
|
||
'source_yin_yang': source_yin_yang,
|
||
'target_yin_yang': target_yin_yang,
|
||
'is_opposite': is_yin_yang_opposite,
|
||
'confidence': confidence,
|
||
'source_geo_context': source_geo,
|
||
'target_geo_context': target_geo
|
||
})
|
||
|
||
# 统计对立关系
|
||
total_geo_links = len(opposites_analysis)
|
||
valid_opposites = sum(1 for analysis in opposites_analysis if analysis['is_opposite'])
|
||
|
||
return {
|
||
'total_geographical_links': total_geo_links,
|
||
'valid_yin_yang_opposites': valid_opposites,
|
||
'opposite_accuracy': valid_opposites / total_geo_links * 100 if total_geo_links > 0 else 0,
|
||
'detailed_analysis': opposites_analysis
|
||
}
|
||
|
||
def validate_p_phonetic_cluster(self) -> Dict[str, Any]:
|
||
"""验证P音聚类现象"""
|
||
cursor = self.conn.cursor()
|
||
|
||
# 查询所有符号的语音语境
|
||
cursor.execute("SELECT symbol_id, symbol_name, phonetic_context, yin_yang_attribute FROM symbols")
|
||
all_symbols = cursor.fetchall()
|
||
|
||
# 分析P音符号
|
||
p_symbols = []
|
||
other_symbols = []
|
||
|
||
for symbol in all_symbols:
|
||
symbol_id, name, phonetic, yin_yang = symbol
|
||
|
||
if phonetic and any(p_sound in phonetic.lower() for p_sound in ['p', 'pǐ', 'pén', 'fān']):
|
||
p_symbols.append({
|
||
'symbol_id': symbol_id,
|
||
'name': name,
|
||
'phonetic': phonetic,
|
||
'yin_yang': yin_yang
|
||
})
|
||
else:
|
||
other_symbols.append({
|
||
'symbol_id': symbol_id,
|
||
'name': name,
|
||
'phonetic': phonetic,
|
||
'yin_yang': yin_yang
|
||
})
|
||
|
||
# 统计阴阳分布
|
||
p_yin_count = sum(1 for s in p_symbols if s['yin_yang'] == 'yin')
|
||
p_yang_count = sum(1 for s in p_symbols if s['yin_yang'] == 'yang')
|
||
p_neutral_count = sum(1 for s in p_symbols if s['yin_yang'] == 'neutral')
|
||
|
||
other_yin_count = sum(1 for s in other_symbols if s['yin_yang'] == 'yin')
|
||
other_yang_count = sum(1 for s in other_symbols if s['yin_yang'] == 'yang')
|
||
other_neutral_count = sum(1 for s in other_symbols if s['yin_yang'] == 'neutral')
|
||
|
||
return {
|
||
'p_symbols_count': len(p_symbols),
|
||
'other_symbols_count': len(other_symbols),
|
||
'p_symbols_yin_ratio': p_yin_count / len(p_symbols) * 100 if p_symbols else 0,
|
||
'other_symbols_yin_ratio': other_yin_count / len(other_symbols) * 100 if other_symbols else 0,
|
||
'p_symbols_details': p_symbols,
|
||
'statistical_significance': self._calculate_significance_test(p_yin_count, len(p_symbols),
|
||
other_yin_count, len(other_symbols))
|
||
}
|
||
|
||
def _calculate_significance_test(self, p_yin, p_total, other_yin, other_total) -> Dict[str, float]:
|
||
"""计算统计显著性"""
|
||
if p_total == 0 or other_total == 0:
|
||
return {'p_value': 1.0, 'effect_size': 0.0}
|
||
|
||
p_proportion = p_yin / p_total
|
||
other_proportion = other_yin / other_total
|
||
|
||
# 简单比例差异检验(简化版)
|
||
pooled_proportion = (p_yin + other_yin) / (p_total + other_total)
|
||
|
||
if pooled_proportion == 0 or pooled_proportion == 1:
|
||
return {'p_value': 1.0, 'effect_size': 0.0}
|
||
|
||
# 标准误差
|
||
se = (pooled_proportion * (1 - pooled_proportion) * (1/p_total + 1/other_total)) ** 0.5
|
||
|
||
if se == 0:
|
||
return {'p_value': 1.0, 'effect_size': 0.0}
|
||
|
||
# Z分数
|
||
z_score = (p_proportion - other_proportion) / se
|
||
|
||
# 简化p值计算(使用标准正态分布)
|
||
from math import erf, sqrt
|
||
p_value = 2 * (1 - 0.5 * (1 + erf(abs(z_score) / sqrt(2))))
|
||
|
||
# 效应大小
|
||
effect_size = p_proportion - other_proportion
|
||
|
||
return {
|
||
'p_value': p_value,
|
||
'effect_size': effect_size,
|
||
'z_score': z_score
|
||
}
|
||
|
||
def generate_yin_yang_orientation_report(self) -> str:
|
||
"""生成阴阳取向理论验证报告"""
|
||
|
||
print("=== 阴阳取向理论验证报告 ===\n")
|
||
|
||
# 1. P音盆地模式分析
|
||
print("1. P音盆地模式分析:")
|
||
p_analysis = self.analyze_p_basin_pattern()
|
||
print(f" 发现 {p_analysis['total_p_symbols']} 个P音相关符号")
|
||
print(f" 阴阳分布: 阴{p_analysis['yin_yang_distribution']['yin']} | "
|
||
f"阳{p_analysis['yin_yang_distribution']['yang']} | "
|
||
f"中性{p_analysis['yin_yang_distribution']['neutral']}")
|
||
|
||
# 2. 盆地阴阳逻辑验证
|
||
print("\n2. 盆地阴阳逻辑验证:")
|
||
basin_validation = self.validate_basin_yin_yang_logic()
|
||
print(f" 分析 {basin_validation['total_basin_symbols']} 个盆地相关符号")
|
||
print(f" 阴阳属性准确率: {basin_validation['yin_yang_accuracy']:.1f}%")
|
||
print(f" 刻法类型准确率: {basin_validation['engraving_accuracy']:.1f}%")
|
||
|
||
# 3. 地理对立关系分析
|
||
print("\n3. 地理对立关系分析:")
|
||
geo_analysis = self.analyze_geographical_opposites()
|
||
print(f" 分析 {geo_analysis['total_geographical_links']} 个地理关联")
|
||
print(f" 有效阴阳对立关系: {geo_analysis['valid_yin_yang_opposites']} 个")
|
||
print(f" 对立关系准确率: {geo_analysis['opposite_accuracy']:.1f}%")
|
||
|
||
# 4. P音聚类验证
|
||
print("\n4. P音聚类统计分析:")
|
||
phonetic_analysis = self.validate_p_phonetic_cluster()
|
||
print(f" P音符号数量: {phonetic_analysis['p_symbols_count']}")
|
||
print(f" 其他符号数量: {phonetic_analysis['other_symbols_count']}")
|
||
print(f" P音符号阴属性比例: {phonetic_analysis['p_symbols_yin_ratio']:.1f}%")
|
||
print(f" 其他符号阴属性比例: {phonetic_analysis['other_symbols_yin_ratio']:.1f}%")
|
||
|
||
significance = phonetic_analysis['statistical_significance']
|
||
print(f" 统计显著性(p值): {significance['p_value']:.4f}")
|
||
print(f" 效应大小: {significance['effect_size']:.4f}")
|
||
|
||
# 理论验证结论
|
||
print("\n5. 理论验证结论:")
|
||
|
||
conclusions = []
|
||
|
||
# P音与阴属性关联
|
||
if phonetic_analysis['p_symbols_yin_ratio'] > phonetic_analysis['other_symbols_yin_ratio'] + 10:
|
||
conclusions.append("✅ P音符号显著倾向于阴属性(支持理论)")
|
||
else:
|
||
conclusions.append("❌ P音符号与阴属性关联不显著")
|
||
|
||
# 盆地阴阳逻辑
|
||
if basin_validation['yin_yang_accuracy'] > 80:
|
||
conclusions.append("✅ 盆地符号普遍符合阴属性逻辑(支持理论)")
|
||
else:
|
||
conclusions.append("⚠️ 盆地符号阴阳属性一致性有待提高")
|
||
|
||
# 地理对立关系
|
||
if geo_analysis['opposite_accuracy'] > 70:
|
||
conclusions.append("✅ 地理对立关系普遍符合阴阳对立模式(支持理论)")
|
||
else:
|
||
conclusions.append("⚠️ 地理对立关系的阴阳模式需要更多证据")
|
||
|
||
for conclusion in conclusions:
|
||
print(f" {conclusion}")
|
||
|
||
# 生成详细报告
|
||
report = {
|
||
'p_basin_analysis': p_analysis,
|
||
'basin_validation': basin_validation,
|
||
'geo_opposites_analysis': geo_analysis,
|
||
'phonetic_cluster_analysis': phonetic_analysis,
|
||
'validation_conclusions': conclusions
|
||
}
|
||
|
||
return json.dumps(report, ensure_ascii=False, indent=2)
|
||
|
||
def create_visualization(self) -> None:
|
||
"""创建可视化图表"""
|
||
# 设置中文字体
|
||
plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans']
|
||
plt.rcParams['axes.unicode_minus'] = False
|
||
|
||
# 1. 阴阳属性分布图
|
||
p_analysis = self.analyze_p_basin_pattern()
|
||
yin_yang_data = p_analysis['yin_yang_distribution']
|
||
|
||
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
|
||
|
||
# 阴阳属性饼图
|
||
labels = ['阴', '阳', '中性']
|
||
sizes = [yin_yang_data['yin'], yin_yang_data['yang'], yin_yang_data['neutral']]
|
||
axes[0, 0].pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
|
||
axes[0, 0].set_title('P音符号阴阳属性分布')
|
||
|
||
# 刻法类型柱状图
|
||
engraving_data = p_analysis['engraving_type_distribution']
|
||
axes[0, 1].bar(engraving_data.keys(), engraving_data.values())
|
||
axes[0, 1].set_title('P音符号刻法类型分布')
|
||
axes[0, 1].set_ylabel('数量')
|
||
|
||
# 盆地验证准确率
|
||
basin_validation = self.validate_basin_yin_yang_logic()
|
||
accuracy_data = [basin_validation['yin_yang_accuracy'], basin_validation['engraving_accuracy']]
|
||
accuracy_labels = ['阴阳属性准确率', '刻法类型准确率']
|
||
axes[1, 0].bar(accuracy_labels, accuracy_data)
|
||
axes[1, 0].set_ylabel('准确率 (%)')
|
||
axes[1, 0].set_ylim(0, 100)
|
||
axes[1, 0].set_title('盆地符号验证准确率')
|
||
|
||
# P音聚类对比
|
||
phonetic_analysis = self.validate_p_phonetic_cluster()
|
||
comparison_data = [phonetic_analysis['p_symbols_yin_ratio'],
|
||
phonetic_analysis['other_symbols_yin_ratio']]
|
||
comparison_labels = ['P音符号', '其他符号']
|
||
axes[1, 1].bar(comparison_labels, comparison_data)
|
||
axes[1, 1].set_ylabel('阴属性比例 (%)')
|
||
axes[1, 1].set_ylim(0, 100)
|
||
axes[1, 1].set_title('P音符号阴属性比例对比')
|
||
|
||
plt.tight_layout()
|
||
plt.savefig('yin_yang_orientation_analysis.png', dpi=300, bbox_inches='tight')
|
||
plt.show()
|
||
|
||
def main():
|
||
"""主函数"""
|
||
# 创建验证器
|
||
validator = YinYangOrientationValidator()
|
||
|
||
# 生成验证报告
|
||
print("开始验证阴阳取向理论...")
|
||
report = validator.generate_yin_yang_orientation_report()
|
||
|
||
# 保存报告
|
||
with open('yin_yang_orientation_report.json', 'w', encoding='utf-8') as f:
|
||
f.write(report)
|
||
|
||
print("\n验证报告已保存至: yin_yang_orientation_report.json")
|
||
|
||
# 创建可视化(需要matplotlib)
|
||
try:
|
||
validator.create_visualization()
|
||
print("可视化图表已生成: yin_yang_orientation_analysis.png")
|
||
except ImportError:
|
||
print("警告: 未安装matplotlib,跳过可视化生成")
|
||
|
||
print("\n=== 验证完成 ===")
|
||
|
||
if __name__ == "__main__":
|
||
main() |