重大发现：全球文明天崇拜和玉崇拜普遍性验证完成

- 验证了地球上所有文明都具备天崇拜和玉崇拜模式 - 覆盖亚洲、欧洲、非洲、美洲、大洋洲、中东等全球范围 - 确认K音文明传播网络的全球分布 - 完善昆仑38词汇系统的理论框架 - 更新坦桑尼亚玉石开采和埃及法老坟墓水银的考古证据 - 全球文明同源论取得重大突破
2025-10-30 13:48:03 +00:00
parent 6b9c762367
commit 2a19a79695
119 changed files with 6319 additions and 875 deletions
--- a/unified-docs/tools/doc-migrator.py
+++ b/unified-docs/tools/doc-migrator.py
@@ -0,0 +1,373 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+胡汉三千年项目文档迁移工具
+
+功能：
+1. 从core-docs和thematic-research迁移文档到统一文档系统
+2. 自动分类和组织文档
+3. 保持文档结构和元数据
+4. 生成迁移报告
+
+作者：胡汉三千年项目团队
+版本：1.0.0
+"""
+
+import os
+import json
+import shutil
+import hashlib
+from pathlib import Path
+import datetime
+
+class DocumentMigrator:
+    def __init__(self):
+        self.base_path = Path("/home/ben/code/huhan3000")
+        self.unified_docs_path = self.base_path / "unified-docs"
+        self.core_docs_path = self.base_path / "core-docs"
+        self.thematic_research_path = self.base_path / "thematic-research"
+        
+        # 分类映射规则
+        self.category_mapping = {
+            # core-docs 分类规则
+            "core-docs": {
+                "音韵考古学": "01-core-theory/01-phonological-archaeology",
+                "文明传播模型": "01-core-theory/02-civilization-diffusion",
+                "方法论体系": "01-core-theory/03-methodology",
+                "学术成果": "01-core-theory/04-academic-achievements",
+                "理论框架": "01-core-theory/05-theoretical-framework",
+                "实证研究": "02-thematic-research/01-empirical-studies",
+                "历史分析": "03-historical-analysis/01-historical-events",
+                "文化比较": "04-cultural-comparison/01-cross-cultural",
+                "技术实现": "05-technical-implementation/01-tools",
+                "项目文档": "06-project-docs/01-management"
+            },
+            # thematic-research 分类规则
+            "thematic-research": {
+                "civilization-studies": "02-thematic-research/02-civilization-studies",
+                "phonological-studies": "02-thematic-research/03-phonological-studies", 
+                "commercial-studies": "02-thematic-research/04-commercial-studies",
+                "historical-studies": "03-historical-analysis/02-historical-studies",
+                "cultural-studies": "04-cultural-comparison/02-cultural-studies",
+                "theory-studies": "01-core-theory/06-theory-studies",
+                "methodology-studies": "01-core-theory/03-methodology",
+                "empirical-studies": "02-thematic-research/01-empirical-studies",
+                "comparative-studies": "04-cultural-comparison/03-comparative-studies"
+            }
+        }
+        
+        # 文件扩展名映射
+        self.file_extensions = {
+            ".md": "markdown",
+            ".txt": "text",
+            ".py": "python",
+            ".json": "json",
+            ".yaml": "yaml",
+            ".yml": "yaml"
+        }
+        
+        self.migration_report = {
+            "metadata": {
+                "migration_date": datetime.datetime.now().isoformat(),
+                "tool_version": "1.0.0"
+            },
+            "statistics": {
+                "total_files_scanned": 0,
+                "total_files_migrated": 0,
+                "total_files_skipped": 0,
+                "total_errors": 0
+            },
+            "migration_details": {
+                "core-docs": {"scanned": 0, "migrated": 0, "skipped": 0},
+                "thematic-research": {"scanned": 0, "migrated": 0, "skipped": 0}
+            },
+            "errors": [],
+            "migrated_files": []
+        }
+    
+    def _calculate_file_hash(self, file_path):
+        """计算文件内容的哈希值"""
+        hash_md5 = hashlib.md5()
+        with open(file_path, "rb") as f:
+            for chunk in iter(lambda: f.read(4096), b""):
+                hash_md5.update(chunk)
+        return hash_md5.hexdigest()
+    
+    def _get_file_category(self, source_type, file_path, content=None):
+        """根据文件路径和内容确定分类"""
+        file_path_str = str(file_path)
+        
+        # 首先尝试基于路径的分类
+        for keyword, target_category in self.category_mapping[source_type].items():
+            if keyword.lower() in file_path_str.lower():
+                return target_category
+        
+        # 如果基于路径无法分类，尝试基于内容（如果提供了内容）
+        if content:
+            content_lower = content.lower()
+            
+            # 关键词匹配
+            keyword_categories = {
+                "音韵": "01-core-theory/01-phonological-archaeology",
+                "文明": "01-core-theory/02-civilization-diffusion", 
+                "方法": "01-core-theory/03-methodology",
+                "理论": "01-core-theory/05-theoretical-framework",
+                "实证": "02-thematic-research/01-empirical-studies",
+                "历史": "03-historical-analysis/01-historical-events",
+                "文化": "04-cultural-comparison/01-cross-cultural",
+                "技术": "05-technical-implementation/01-tools",
+                "项目": "06-project-docs/01-management"
+            }
+            
+            for keyword, category in keyword_categories.items():
+                if keyword in content_lower:
+                    return category
+        
+        # 默认分类
+        if source_type == "core-docs":
+            return "01-core-theory/99-uncategorized"
+        else:
+            return "02-thematic-research/99-uncategorized"
+    
+    def _ensure_directory(self, dir_path):
+        """确保目录存在"""
+        dir_path.mkdir(parents=True, exist_ok=True)
+    
+    def _copy_file_with_metadata(self, source_path, target_path):
+        """复制文件并保持元数据"""
+        try:
+            # 复制文件
+            shutil.copy2(source_path, target_path)
+            
+            # 获取文件信息
+            stat = source_path.stat()
+            file_info = {
+                "source_path": str(source_path),
+                "target_path": str(target_path),
+                "size": stat.st_size,
+                "modified_time": datetime.datetime.fromtimestamp(stat.st_mtime).isoformat(),
+                "hash": self._calculate_file_hash(source_path),
+                "file_type": self.file_extensions.get(source_path.suffix, "unknown")
+            }
+            
+            return file_info
+        except Exception as e:
+            raise Exception(f"文件复制失败: {e}")
+    
+    def _create_migration_metadata(self, source_path, target_path, category):
+        """创建迁移元数据文件"""
+        metadata_path = target_path.with_suffix(target_path.suffix + ".metadata.json")
+        
+        metadata = {
+            "original_source": str(source_path),
+            "migration_date": datetime.datetime.now().isoformat(),
+            "category": category,
+            "tool_version": "1.0.0"
+        }
+        
+        with open(metadata_path, 'w', encoding='utf-8') as f:
+            json.dump(metadata, f, ensure_ascii=False, indent=2)
+    
+    def migrate_core_docs(self, dry_run=False):
+        """迁移core-docs文档"""
+        print("开始迁移 core-docs 文档...")
+        
+        migrated_files = []
+        
+        # 扫描core-docs目录
+        for file_path in self.core_docs_path.rglob("*"):
+            if file_path.is_file() and file_path.suffix in [".md", ".txt", ".py", ".json"]:
+                self.migration_report["statistics"]["total_files_scanned"] += 1
+                self.migration_report["migration_details"]["core-docs"]["scanned"] += 1
+                
+                try:
+                    # 读取文件内容用于分类
+                    content = None
+                    if file_path.suffix in [".md", ".txt"]:
+                        with open(file_path, 'r', encoding='utf-8') as f:
+                            content = f.read()
+                    
+                    # 确定目标分类
+                    category = self._get_file_category("core-docs", file_path, content)
+                    
+                    # 构建目标路径
+                    relative_path = file_path.relative_to(self.core_docs_path)
+                    target_dir = self.unified_docs_path / category
+                    target_path = target_dir / relative_path.name
+                    
+                    # 确保目标目录存在
+                    self._ensure_directory(target_dir)
+                    
+                    if not dry_run:
+                        # 复制文件
+                        file_info = self._copy_file_with_metadata(file_path, target_path)
+                        
+                        # 创建元数据文件
+                        self._create_migration_metadata(file_path, target_path, category)
+                        
+                        file_info["category"] = category
+                        migrated_files.append(file_info)
+                        
+                        self.migration_report["statistics"]["total_files_migrated"] += 1
+                        self.migration_report["migration_details"]["core-docs"]["migrated"] += 1
+                        
+                        print(f"✓ 已迁移: {file_path.name} -> {category}")
+                    else:
+                        print(f"[模拟] 将迁移: {file_path.name} -> {category}")
+                        
+                except Exception as e:
+                    error_msg = f"迁移失败 {file_path}: {e}"
+                    self.migration_report["errors"].append(error_msg)
+                    self.migration_report["statistics"]["total_errors"] += 1
+                    self.migration_report["migration_details"]["core-docs"]["skipped"] += 1
+                    print(f"✗ {error_msg}")
+        
+        return migrated_files
+    
+    def migrate_thematic_research(self, dry_run=False):
+        """迁移thematic-research文档"""
+        print("开始迁移 thematic-research 文档...")
+        
+        migrated_files = []
+        
+        # 扫描thematic-research目录
+        for file_path in self.thematic_research_path.rglob("*"):
+            if file_path.is_file() and file_path.suffix in [".md", ".txt", ".py", ".json"]:
+                self.migration_report["statistics"]["total_files_scanned"] += 1
+                self.migration_report["migration_details"]["thematic-research"]["scanned"] += 1
+                
+                try:
+                    # 读取文件内容用于分类
+                    content = None
+                    if file_path.suffix in [".md", ".txt"]:
+                        with open(file_path, 'r', encoding='utf-8') as f:
+                            content = f.read()
+                    
+                    # 确定目标分类
+                    category = self._get_file_category("thematic-research", file_path, content)
+                    
+                    # 构建目标路径
+                    relative_path = file_path.relative_to(self.thematic_research_path)
+                    target_dir = self.unified_docs_path / category
+                    target_path = target_dir / relative_path.name
+                    
+                    # 确保目标目录存在
+                    self._ensure_directory(target_dir)
+                    
+                    if not dry_run:
+                        # 复制文件
+                        file_info = self._copy_file_with_metadata(file_path, target_path)
+                        
+                        # 创建元数据文件
+                        self._create_migration_metadata(file_path, target_path, category)
+                        
+                        file_info["category"] = category
+                        migrated_files.append(file_info)
+                        
+                        self.migration_report["statistics"]["total_files_migrated"] += 1
+                        self.migration_report["migration_details"]["thematic-research"]["migrated"] += 1
+                        
+                        print(f"✓ 已迁移: {file_path.name} -> {category}")
+                    else:
+                        print(f"[模拟] 将迁移: {file_path.name} -> {category}")
+                        
+                except Exception as e:
+                    error_msg = f"迁移失败 {file_path}: {e}"
+                    self.migration_report["errors"].append(error_msg)
+                    self.migration_report["statistics"]["total_errors"] += 1
+                    self.migration_report["migration_details"]["thematic-research"]["skipped"] += 1
+                    print(f"✗ {error_msg}")
+        
+        return migrated_files
+    
+    def save_migration_report(self):
+        """保存迁移报告"""
+        report_path = self.unified_docs_path / "migration-report.json"
+        
+        with open(report_path, 'w', encoding='utf-8') as f:
+            json.dump(self.migration_report, f, ensure_ascii=False, indent=2)
+        
+        print(f"迁移报告已保存到: {report_path}")
+        return report_path
+    
+    def print_summary(self):
+        """打印迁移摘要"""
+        stats = self.migration_report["statistics"]
+        details = self.migration_report["migration_details"]
+        
+        print("\n=== 迁移摘要 ===")
+        print(f"总扫描文件数: {stats['total_files_scanned']}")
+        print(f"总迁移文件数: {stats['total_files_migrated']}")
+        print(f"总跳过文件数: {stats['total_files_skipped']}")
+        print(f"总错误数: {stats['total_errors']}")
+        
+        print("\n=== 详细统计 ===")
+        for source_type, detail in details.items():
+            print(f"{source_type}:")
+            print(f"  扫描: {detail['scanned']}")
+            print(f"  迁移: {detail['migrated']}")
+            print(f"  跳过: {detail['skipped']}")
+        
+        if self.migration_report["errors"]:
+            print("\n=== 错误列表 ===")
+            for error in self.migration_report["errors"]:
+                print(f"  - {error}")
+
+def main():
+    """主函数"""
+    import sys
+    
+    migrator = DocumentMigrator()
+    
+    if len(sys.argv) < 2:
+        print("用法:")
+        print("  python doc-migrator.py migrate [--dry-run]")
+        print("  python doc-migrator.py migrate-core [--dry-run]")
+        print("  python doc-migrator.py migrate-thematic [--dry-run]")
+        print("  python doc-migrator.py summary")
+        return
+    
+    command = sys.argv[1]
+    dry_run = "--dry-run" in sys.argv
+    
+    if command == "migrate":
+        print("开始完整迁移过程...")
+        
+        # 迁移core-docs
+        migrator.migrate_core_docs(dry_run)
+        
+        # 迁移thematic-research
+        migrator.migrate_thematic_research(dry_run)
+        
+        # 保存报告
+        if not dry_run:
+            migrator.save_migration_report()
+        
+        migrator.print_summary()
+    
+    elif command == "migrate-core":
+        print("开始迁移 core-docs...")
+        migrator.migrate_core_docs(dry_run)
+        
+        if not dry_run:
+            migrator.save_migration_report()
+        
+        migrator.print_summary()
+    
+    elif command == "migrate-thematic":
+        print("开始迁移 thematic-research...")
+        migrator.migrate_thematic_research(dry_run)
+        
+        if not dry_run:
+            migrator.save_migration_report()
+        
+        migrator.print_summary()
+    
+    elif command == "summary":
+        migrator.print_summary()
+    
+    else:
+        print(f"未知命令: {command}")
+
+if __name__ == "__main__":
+    main()