重大发现:全球文明天崇拜和玉崇拜普遍性验证完成
- 验证了地球上所有文明都具备天崇拜和玉崇拜模式 - 覆盖亚洲、欧洲、非洲、美洲、大洋洲、中东等全球范围 - 确认K音文明传播网络的全球分布 - 完善昆仑38词汇系统的理论框架 - 更新坦桑尼亚玉石开采和埃及法老坟墓水银的考古证据 - 全球文明同源论取得重大突破
This commit is contained in:
373
unified-docs/tools/doc-migrator.py
Normal file
373
unified-docs/tools/doc-migrator.py
Normal file
@@ -0,0 +1,373 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
胡汉三千年项目文档迁移工具
|
||||
|
||||
功能:
|
||||
1. 从core-docs和thematic-research迁移文档到统一文档系统
|
||||
2. 自动分类和组织文档
|
||||
3. 保持文档结构和元数据
|
||||
4. 生成迁移报告
|
||||
|
||||
作者:胡汉三千年项目团队
|
||||
版本:1.0.0
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import shutil
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
import datetime
|
||||
|
||||
class DocumentMigrator:
|
||||
def __init__(self):
|
||||
self.base_path = Path("/home/ben/code/huhan3000")
|
||||
self.unified_docs_path = self.base_path / "unified-docs"
|
||||
self.core_docs_path = self.base_path / "core-docs"
|
||||
self.thematic_research_path = self.base_path / "thematic-research"
|
||||
|
||||
# 分类映射规则
|
||||
self.category_mapping = {
|
||||
# core-docs 分类规则
|
||||
"core-docs": {
|
||||
"音韵考古学": "01-core-theory/01-phonological-archaeology",
|
||||
"文明传播模型": "01-core-theory/02-civilization-diffusion",
|
||||
"方法论体系": "01-core-theory/03-methodology",
|
||||
"学术成果": "01-core-theory/04-academic-achievements",
|
||||
"理论框架": "01-core-theory/05-theoretical-framework",
|
||||
"实证研究": "02-thematic-research/01-empirical-studies",
|
||||
"历史分析": "03-historical-analysis/01-historical-events",
|
||||
"文化比较": "04-cultural-comparison/01-cross-cultural",
|
||||
"技术实现": "05-technical-implementation/01-tools",
|
||||
"项目文档": "06-project-docs/01-management"
|
||||
},
|
||||
# thematic-research 分类规则
|
||||
"thematic-research": {
|
||||
"civilization-studies": "02-thematic-research/02-civilization-studies",
|
||||
"phonological-studies": "02-thematic-research/03-phonological-studies",
|
||||
"commercial-studies": "02-thematic-research/04-commercial-studies",
|
||||
"historical-studies": "03-historical-analysis/02-historical-studies",
|
||||
"cultural-studies": "04-cultural-comparison/02-cultural-studies",
|
||||
"theory-studies": "01-core-theory/06-theory-studies",
|
||||
"methodology-studies": "01-core-theory/03-methodology",
|
||||
"empirical-studies": "02-thematic-research/01-empirical-studies",
|
||||
"comparative-studies": "04-cultural-comparison/03-comparative-studies"
|
||||
}
|
||||
}
|
||||
|
||||
# 文件扩展名映射
|
||||
self.file_extensions = {
|
||||
".md": "markdown",
|
||||
".txt": "text",
|
||||
".py": "python",
|
||||
".json": "json",
|
||||
".yaml": "yaml",
|
||||
".yml": "yaml"
|
||||
}
|
||||
|
||||
self.migration_report = {
|
||||
"metadata": {
|
||||
"migration_date": datetime.datetime.now().isoformat(),
|
||||
"tool_version": "1.0.0"
|
||||
},
|
||||
"statistics": {
|
||||
"total_files_scanned": 0,
|
||||
"total_files_migrated": 0,
|
||||
"total_files_skipped": 0,
|
||||
"total_errors": 0
|
||||
},
|
||||
"migration_details": {
|
||||
"core-docs": {"scanned": 0, "migrated": 0, "skipped": 0},
|
||||
"thematic-research": {"scanned": 0, "migrated": 0, "skipped": 0}
|
||||
},
|
||||
"errors": [],
|
||||
"migrated_files": []
|
||||
}
|
||||
|
||||
def _calculate_file_hash(self, file_path):
|
||||
"""计算文件内容的哈希值"""
|
||||
hash_md5 = hashlib.md5()
|
||||
with open(file_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(4096), b""):
|
||||
hash_md5.update(chunk)
|
||||
return hash_md5.hexdigest()
|
||||
|
||||
def _get_file_category(self, source_type, file_path, content=None):
|
||||
"""根据文件路径和内容确定分类"""
|
||||
file_path_str = str(file_path)
|
||||
|
||||
# 首先尝试基于路径的分类
|
||||
for keyword, target_category in self.category_mapping[source_type].items():
|
||||
if keyword.lower() in file_path_str.lower():
|
||||
return target_category
|
||||
|
||||
# 如果基于路径无法分类,尝试基于内容(如果提供了内容)
|
||||
if content:
|
||||
content_lower = content.lower()
|
||||
|
||||
# 关键词匹配
|
||||
keyword_categories = {
|
||||
"音韵": "01-core-theory/01-phonological-archaeology",
|
||||
"文明": "01-core-theory/02-civilization-diffusion",
|
||||
"方法": "01-core-theory/03-methodology",
|
||||
"理论": "01-core-theory/05-theoretical-framework",
|
||||
"实证": "02-thematic-research/01-empirical-studies",
|
||||
"历史": "03-historical-analysis/01-historical-events",
|
||||
"文化": "04-cultural-comparison/01-cross-cultural",
|
||||
"技术": "05-technical-implementation/01-tools",
|
||||
"项目": "06-project-docs/01-management"
|
||||
}
|
||||
|
||||
for keyword, category in keyword_categories.items():
|
||||
if keyword in content_lower:
|
||||
return category
|
||||
|
||||
# 默认分类
|
||||
if source_type == "core-docs":
|
||||
return "01-core-theory/99-uncategorized"
|
||||
else:
|
||||
return "02-thematic-research/99-uncategorized"
|
||||
|
||||
def _ensure_directory(self, dir_path):
|
||||
"""确保目录存在"""
|
||||
dir_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def _copy_file_with_metadata(self, source_path, target_path):
|
||||
"""复制文件并保持元数据"""
|
||||
try:
|
||||
# 复制文件
|
||||
shutil.copy2(source_path, target_path)
|
||||
|
||||
# 获取文件信息
|
||||
stat = source_path.stat()
|
||||
file_info = {
|
||||
"source_path": str(source_path),
|
||||
"target_path": str(target_path),
|
||||
"size": stat.st_size,
|
||||
"modified_time": datetime.datetime.fromtimestamp(stat.st_mtime).isoformat(),
|
||||
"hash": self._calculate_file_hash(source_path),
|
||||
"file_type": self.file_extensions.get(source_path.suffix, "unknown")
|
||||
}
|
||||
|
||||
return file_info
|
||||
except Exception as e:
|
||||
raise Exception(f"文件复制失败: {e}")
|
||||
|
||||
def _create_migration_metadata(self, source_path, target_path, category):
|
||||
"""创建迁移元数据文件"""
|
||||
metadata_path = target_path.with_suffix(target_path.suffix + ".metadata.json")
|
||||
|
||||
metadata = {
|
||||
"original_source": str(source_path),
|
||||
"migration_date": datetime.datetime.now().isoformat(),
|
||||
"category": category,
|
||||
"tool_version": "1.0.0"
|
||||
}
|
||||
|
||||
with open(metadata_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(metadata, f, ensure_ascii=False, indent=2)
|
||||
|
||||
def migrate_core_docs(self, dry_run=False):
|
||||
"""迁移core-docs文档"""
|
||||
print("开始迁移 core-docs 文档...")
|
||||
|
||||
migrated_files = []
|
||||
|
||||
# 扫描core-docs目录
|
||||
for file_path in self.core_docs_path.rglob("*"):
|
||||
if file_path.is_file() and file_path.suffix in [".md", ".txt", ".py", ".json"]:
|
||||
self.migration_report["statistics"]["total_files_scanned"] += 1
|
||||
self.migration_report["migration_details"]["core-docs"]["scanned"] += 1
|
||||
|
||||
try:
|
||||
# 读取文件内容用于分类
|
||||
content = None
|
||||
if file_path.suffix in [".md", ".txt"]:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# 确定目标分类
|
||||
category = self._get_file_category("core-docs", file_path, content)
|
||||
|
||||
# 构建目标路径
|
||||
relative_path = file_path.relative_to(self.core_docs_path)
|
||||
target_dir = self.unified_docs_path / category
|
||||
target_path = target_dir / relative_path.name
|
||||
|
||||
# 确保目标目录存在
|
||||
self._ensure_directory(target_dir)
|
||||
|
||||
if not dry_run:
|
||||
# 复制文件
|
||||
file_info = self._copy_file_with_metadata(file_path, target_path)
|
||||
|
||||
# 创建元数据文件
|
||||
self._create_migration_metadata(file_path, target_path, category)
|
||||
|
||||
file_info["category"] = category
|
||||
migrated_files.append(file_info)
|
||||
|
||||
self.migration_report["statistics"]["total_files_migrated"] += 1
|
||||
self.migration_report["migration_details"]["core-docs"]["migrated"] += 1
|
||||
|
||||
print(f"✓ 已迁移: {file_path.name} -> {category}")
|
||||
else:
|
||||
print(f"[模拟] 将迁移: {file_path.name} -> {category}")
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"迁移失败 {file_path}: {e}"
|
||||
self.migration_report["errors"].append(error_msg)
|
||||
self.migration_report["statistics"]["total_errors"] += 1
|
||||
self.migration_report["migration_details"]["core-docs"]["skipped"] += 1
|
||||
print(f"✗ {error_msg}")
|
||||
|
||||
return migrated_files
|
||||
|
||||
def migrate_thematic_research(self, dry_run=False):
|
||||
"""迁移thematic-research文档"""
|
||||
print("开始迁移 thematic-research 文档...")
|
||||
|
||||
migrated_files = []
|
||||
|
||||
# 扫描thematic-research目录
|
||||
for file_path in self.thematic_research_path.rglob("*"):
|
||||
if file_path.is_file() and file_path.suffix in [".md", ".txt", ".py", ".json"]:
|
||||
self.migration_report["statistics"]["total_files_scanned"] += 1
|
||||
self.migration_report["migration_details"]["thematic-research"]["scanned"] += 1
|
||||
|
||||
try:
|
||||
# 读取文件内容用于分类
|
||||
content = None
|
||||
if file_path.suffix in [".md", ".txt"]:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# 确定目标分类
|
||||
category = self._get_file_category("thematic-research", file_path, content)
|
||||
|
||||
# 构建目标路径
|
||||
relative_path = file_path.relative_to(self.thematic_research_path)
|
||||
target_dir = self.unified_docs_path / category
|
||||
target_path = target_dir / relative_path.name
|
||||
|
||||
# 确保目标目录存在
|
||||
self._ensure_directory(target_dir)
|
||||
|
||||
if not dry_run:
|
||||
# 复制文件
|
||||
file_info = self._copy_file_with_metadata(file_path, target_path)
|
||||
|
||||
# 创建元数据文件
|
||||
self._create_migration_metadata(file_path, target_path, category)
|
||||
|
||||
file_info["category"] = category
|
||||
migrated_files.append(file_info)
|
||||
|
||||
self.migration_report["statistics"]["total_files_migrated"] += 1
|
||||
self.migration_report["migration_details"]["thematic-research"]["migrated"] += 1
|
||||
|
||||
print(f"✓ 已迁移: {file_path.name} -> {category}")
|
||||
else:
|
||||
print(f"[模拟] 将迁移: {file_path.name} -> {category}")
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"迁移失败 {file_path}: {e}"
|
||||
self.migration_report["errors"].append(error_msg)
|
||||
self.migration_report["statistics"]["total_errors"] += 1
|
||||
self.migration_report["migration_details"]["thematic-research"]["skipped"] += 1
|
||||
print(f"✗ {error_msg}")
|
||||
|
||||
return migrated_files
|
||||
|
||||
def save_migration_report(self):
|
||||
"""保存迁移报告"""
|
||||
report_path = self.unified_docs_path / "migration-report.json"
|
||||
|
||||
with open(report_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(self.migration_report, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"迁移报告已保存到: {report_path}")
|
||||
return report_path
|
||||
|
||||
def print_summary(self):
|
||||
"""打印迁移摘要"""
|
||||
stats = self.migration_report["statistics"]
|
||||
details = self.migration_report["migration_details"]
|
||||
|
||||
print("\n=== 迁移摘要 ===")
|
||||
print(f"总扫描文件数: {stats['total_files_scanned']}")
|
||||
print(f"总迁移文件数: {stats['total_files_migrated']}")
|
||||
print(f"总跳过文件数: {stats['total_files_skipped']}")
|
||||
print(f"总错误数: {stats['total_errors']}")
|
||||
|
||||
print("\n=== 详细统计 ===")
|
||||
for source_type, detail in details.items():
|
||||
print(f"{source_type}:")
|
||||
print(f" 扫描: {detail['scanned']}")
|
||||
print(f" 迁移: {detail['migrated']}")
|
||||
print(f" 跳过: {detail['skipped']}")
|
||||
|
||||
if self.migration_report["errors"]:
|
||||
print("\n=== 错误列表 ===")
|
||||
for error in self.migration_report["errors"]:
|
||||
print(f" - {error}")
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
import sys
|
||||
|
||||
migrator = DocumentMigrator()
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("用法:")
|
||||
print(" python doc-migrator.py migrate [--dry-run]")
|
||||
print(" python doc-migrator.py migrate-core [--dry-run]")
|
||||
print(" python doc-migrator.py migrate-thematic [--dry-run]")
|
||||
print(" python doc-migrator.py summary")
|
||||
return
|
||||
|
||||
command = sys.argv[1]
|
||||
dry_run = "--dry-run" in sys.argv
|
||||
|
||||
if command == "migrate":
|
||||
print("开始完整迁移过程...")
|
||||
|
||||
# 迁移core-docs
|
||||
migrator.migrate_core_docs(dry_run)
|
||||
|
||||
# 迁移thematic-research
|
||||
migrator.migrate_thematic_research(dry_run)
|
||||
|
||||
# 保存报告
|
||||
if not dry_run:
|
||||
migrator.save_migration_report()
|
||||
|
||||
migrator.print_summary()
|
||||
|
||||
elif command == "migrate-core":
|
||||
print("开始迁移 core-docs...")
|
||||
migrator.migrate_core_docs(dry_run)
|
||||
|
||||
if not dry_run:
|
||||
migrator.save_migration_report()
|
||||
|
||||
migrator.print_summary()
|
||||
|
||||
elif command == "migrate-thematic":
|
||||
print("开始迁移 thematic-research...")
|
||||
migrator.migrate_thematic_research(dry_run)
|
||||
|
||||
if not dry_run:
|
||||
migrator.save_migration_report()
|
||||
|
||||
migrator.print_summary()
|
||||
|
||||
elif command == "summary":
|
||||
migrator.print_summary()
|
||||
|
||||
else:
|
||||
print(f"未知命令: {command}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user