重大发现:全球文明天崇拜和玉崇拜普遍性验证完成

- 验证了地球上所有文明都具备天崇拜和玉崇拜模式
- 覆盖亚洲、欧洲、非洲、美洲、大洋洲、中东等全球范围
- 确认K音文明传播网络的全球分布
- 完善昆仑38词汇系统的理论框架
- 更新坦桑尼亚玉石开采和埃及法老坟墓水银的考古证据
- 全球文明同源论取得重大突破
This commit is contained in:
ben
2025-10-30 13:48:03 +00:00
parent 6b9c762367
commit 2a19a79695
119 changed files with 6319 additions and 875 deletions

View File

@@ -0,0 +1,373 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
胡汉三千年项目文档迁移工具
功能:
1. 从core-docs和thematic-research迁移文档到统一文档系统
2. 自动分类和组织文档
3. 保持文档结构和元数据
4. 生成迁移报告
作者:胡汉三千年项目团队
版本1.0.0
"""
import os
import json
import shutil
import hashlib
from pathlib import Path
import datetime
class DocumentMigrator:
def __init__(self):
self.base_path = Path("/home/ben/code/huhan3000")
self.unified_docs_path = self.base_path / "unified-docs"
self.core_docs_path = self.base_path / "core-docs"
self.thematic_research_path = self.base_path / "thematic-research"
# 分类映射规则
self.category_mapping = {
# core-docs 分类规则
"core-docs": {
"音韵考古学": "01-core-theory/01-phonological-archaeology",
"文明传播模型": "01-core-theory/02-civilization-diffusion",
"方法论体系": "01-core-theory/03-methodology",
"学术成果": "01-core-theory/04-academic-achievements",
"理论框架": "01-core-theory/05-theoretical-framework",
"实证研究": "02-thematic-research/01-empirical-studies",
"历史分析": "03-historical-analysis/01-historical-events",
"文化比较": "04-cultural-comparison/01-cross-cultural",
"技术实现": "05-technical-implementation/01-tools",
"项目文档": "06-project-docs/01-management"
},
# thematic-research 分类规则
"thematic-research": {
"civilization-studies": "02-thematic-research/02-civilization-studies",
"phonological-studies": "02-thematic-research/03-phonological-studies",
"commercial-studies": "02-thematic-research/04-commercial-studies",
"historical-studies": "03-historical-analysis/02-historical-studies",
"cultural-studies": "04-cultural-comparison/02-cultural-studies",
"theory-studies": "01-core-theory/06-theory-studies",
"methodology-studies": "01-core-theory/03-methodology",
"empirical-studies": "02-thematic-research/01-empirical-studies",
"comparative-studies": "04-cultural-comparison/03-comparative-studies"
}
}
# 文件扩展名映射
self.file_extensions = {
".md": "markdown",
".txt": "text",
".py": "python",
".json": "json",
".yaml": "yaml",
".yml": "yaml"
}
self.migration_report = {
"metadata": {
"migration_date": datetime.datetime.now().isoformat(),
"tool_version": "1.0.0"
},
"statistics": {
"total_files_scanned": 0,
"total_files_migrated": 0,
"total_files_skipped": 0,
"total_errors": 0
},
"migration_details": {
"core-docs": {"scanned": 0, "migrated": 0, "skipped": 0},
"thematic-research": {"scanned": 0, "migrated": 0, "skipped": 0}
},
"errors": [],
"migrated_files": []
}
def _calculate_file_hash(self, file_path):
"""计算文件内容的哈希值"""
hash_md5 = hashlib.md5()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def _get_file_category(self, source_type, file_path, content=None):
"""根据文件路径和内容确定分类"""
file_path_str = str(file_path)
# 首先尝试基于路径的分类
for keyword, target_category in self.category_mapping[source_type].items():
if keyword.lower() in file_path_str.lower():
return target_category
# 如果基于路径无法分类,尝试基于内容(如果提供了内容)
if content:
content_lower = content.lower()
# 关键词匹配
keyword_categories = {
"音韵": "01-core-theory/01-phonological-archaeology",
"文明": "01-core-theory/02-civilization-diffusion",
"方法": "01-core-theory/03-methodology",
"理论": "01-core-theory/05-theoretical-framework",
"实证": "02-thematic-research/01-empirical-studies",
"历史": "03-historical-analysis/01-historical-events",
"文化": "04-cultural-comparison/01-cross-cultural",
"技术": "05-technical-implementation/01-tools",
"项目": "06-project-docs/01-management"
}
for keyword, category in keyword_categories.items():
if keyword in content_lower:
return category
# 默认分类
if source_type == "core-docs":
return "01-core-theory/99-uncategorized"
else:
return "02-thematic-research/99-uncategorized"
def _ensure_directory(self, dir_path):
"""确保目录存在"""
dir_path.mkdir(parents=True, exist_ok=True)
def _copy_file_with_metadata(self, source_path, target_path):
"""复制文件并保持元数据"""
try:
# 复制文件
shutil.copy2(source_path, target_path)
# 获取文件信息
stat = source_path.stat()
file_info = {
"source_path": str(source_path),
"target_path": str(target_path),
"size": stat.st_size,
"modified_time": datetime.datetime.fromtimestamp(stat.st_mtime).isoformat(),
"hash": self._calculate_file_hash(source_path),
"file_type": self.file_extensions.get(source_path.suffix, "unknown")
}
return file_info
except Exception as e:
raise Exception(f"文件复制失败: {e}")
def _create_migration_metadata(self, source_path, target_path, category):
"""创建迁移元数据文件"""
metadata_path = target_path.with_suffix(target_path.suffix + ".metadata.json")
metadata = {
"original_source": str(source_path),
"migration_date": datetime.datetime.now().isoformat(),
"category": category,
"tool_version": "1.0.0"
}
with open(metadata_path, 'w', encoding='utf-8') as f:
json.dump(metadata, f, ensure_ascii=False, indent=2)
def migrate_core_docs(self, dry_run=False):
"""迁移core-docs文档"""
print("开始迁移 core-docs 文档...")
migrated_files = []
# 扫描core-docs目录
for file_path in self.core_docs_path.rglob("*"):
if file_path.is_file() and file_path.suffix in [".md", ".txt", ".py", ".json"]:
self.migration_report["statistics"]["total_files_scanned"] += 1
self.migration_report["migration_details"]["core-docs"]["scanned"] += 1
try:
# 读取文件内容用于分类
content = None
if file_path.suffix in [".md", ".txt"]:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 确定目标分类
category = self._get_file_category("core-docs", file_path, content)
# 构建目标路径
relative_path = file_path.relative_to(self.core_docs_path)
target_dir = self.unified_docs_path / category
target_path = target_dir / relative_path.name
# 确保目标目录存在
self._ensure_directory(target_dir)
if not dry_run:
# 复制文件
file_info = self._copy_file_with_metadata(file_path, target_path)
# 创建元数据文件
self._create_migration_metadata(file_path, target_path, category)
file_info["category"] = category
migrated_files.append(file_info)
self.migration_report["statistics"]["total_files_migrated"] += 1
self.migration_report["migration_details"]["core-docs"]["migrated"] += 1
print(f"✓ 已迁移: {file_path.name} -> {category}")
else:
print(f"[模拟] 将迁移: {file_path.name} -> {category}")
except Exception as e:
error_msg = f"迁移失败 {file_path}: {e}"
self.migration_report["errors"].append(error_msg)
self.migration_report["statistics"]["total_errors"] += 1
self.migration_report["migration_details"]["core-docs"]["skipped"] += 1
print(f"{error_msg}")
return migrated_files
def migrate_thematic_research(self, dry_run=False):
"""迁移thematic-research文档"""
print("开始迁移 thematic-research 文档...")
migrated_files = []
# 扫描thematic-research目录
for file_path in self.thematic_research_path.rglob("*"):
if file_path.is_file() and file_path.suffix in [".md", ".txt", ".py", ".json"]:
self.migration_report["statistics"]["total_files_scanned"] += 1
self.migration_report["migration_details"]["thematic-research"]["scanned"] += 1
try:
# 读取文件内容用于分类
content = None
if file_path.suffix in [".md", ".txt"]:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 确定目标分类
category = self._get_file_category("thematic-research", file_path, content)
# 构建目标路径
relative_path = file_path.relative_to(self.thematic_research_path)
target_dir = self.unified_docs_path / category
target_path = target_dir / relative_path.name
# 确保目标目录存在
self._ensure_directory(target_dir)
if not dry_run:
# 复制文件
file_info = self._copy_file_with_metadata(file_path, target_path)
# 创建元数据文件
self._create_migration_metadata(file_path, target_path, category)
file_info["category"] = category
migrated_files.append(file_info)
self.migration_report["statistics"]["total_files_migrated"] += 1
self.migration_report["migration_details"]["thematic-research"]["migrated"] += 1
print(f"✓ 已迁移: {file_path.name} -> {category}")
else:
print(f"[模拟] 将迁移: {file_path.name} -> {category}")
except Exception as e:
error_msg = f"迁移失败 {file_path}: {e}"
self.migration_report["errors"].append(error_msg)
self.migration_report["statistics"]["total_errors"] += 1
self.migration_report["migration_details"]["thematic-research"]["skipped"] += 1
print(f"{error_msg}")
return migrated_files
def save_migration_report(self):
"""保存迁移报告"""
report_path = self.unified_docs_path / "migration-report.json"
with open(report_path, 'w', encoding='utf-8') as f:
json.dump(self.migration_report, f, ensure_ascii=False, indent=2)
print(f"迁移报告已保存到: {report_path}")
return report_path
def print_summary(self):
"""打印迁移摘要"""
stats = self.migration_report["statistics"]
details = self.migration_report["migration_details"]
print("\n=== 迁移摘要 ===")
print(f"总扫描文件数: {stats['total_files_scanned']}")
print(f"总迁移文件数: {stats['total_files_migrated']}")
print(f"总跳过文件数: {stats['total_files_skipped']}")
print(f"总错误数: {stats['total_errors']}")
print("\n=== 详细统计 ===")
for source_type, detail in details.items():
print(f"{source_type}:")
print(f" 扫描: {detail['scanned']}")
print(f" 迁移: {detail['migrated']}")
print(f" 跳过: {detail['skipped']}")
if self.migration_report["errors"]:
print("\n=== 错误列表 ===")
for error in self.migration_report["errors"]:
print(f" - {error}")
def main():
"""主函数"""
import sys
migrator = DocumentMigrator()
if len(sys.argv) < 2:
print("用法:")
print(" python doc-migrator.py migrate [--dry-run]")
print(" python doc-migrator.py migrate-core [--dry-run]")
print(" python doc-migrator.py migrate-thematic [--dry-run]")
print(" python doc-migrator.py summary")
return
command = sys.argv[1]
dry_run = "--dry-run" in sys.argv
if command == "migrate":
print("开始完整迁移过程...")
# 迁移core-docs
migrator.migrate_core_docs(dry_run)
# 迁移thematic-research
migrator.migrate_thematic_research(dry_run)
# 保存报告
if not dry_run:
migrator.save_migration_report()
migrator.print_summary()
elif command == "migrate-core":
print("开始迁移 core-docs...")
migrator.migrate_core_docs(dry_run)
if not dry_run:
migrator.save_migration_report()
migrator.print_summary()
elif command == "migrate-thematic":
print("开始迁移 thematic-research...")
migrator.migrate_thematic_research(dry_run)
if not dry_run:
migrator.save_migration_report()
migrator.print_summary()
elif command == "summary":
migrator.print_summary()
else:
print(f"未知命令: {command}")
if __name__ == "__main__":
main()