重大发现:全球文明天崇拜和玉崇拜普遍性验证完成

- 验证了地球上所有文明都具备天崇拜和玉崇拜模式
- 覆盖亚洲、欧洲、非洲、美洲、大洋洲、中东等全球范围
- 确认K音文明传播网络的全球分布
- 完善昆仑38词汇系统的理论框架
- 更新坦桑尼亚玉石开采和埃及法老坟墓水银的考古证据
- 全球文明同源论取得重大突破
This commit is contained in:
ben
2025-10-30 13:48:03 +00:00
parent 6b9c762367
commit 2a19a79695
119 changed files with 6319 additions and 875 deletions

View File

@@ -0,0 +1,288 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
胡汉三千年项目文档索引工具
功能:
1. 自动扫描文档目录
2. 生成文档索引
3. 更新统一索引文件
4. 检测文档变更
作者:胡汉三千年项目团队
版本1.0.0
"""
import os
import json
import hashlib
import datetime
from pathlib import Path
class DocumentIndexer:
def __init__(self, base_path="/home/ben/code/huhan3000/unified-docs"):
self.base_path = Path(base_path)
self.index_file = self.base_path / "unified-index.json"
self.categories = [
"01-core-theory",
"02-thematic-research",
"03-historical-analysis",
"04-methodology",
"05-applications",
"06-resources"
]
def scan_documents(self):
"""扫描所有文档目录,收集文档信息"""
documents = {}
for category in self.categories:
category_path = self.base_path / category
if not category_path.exists():
continue
documents[category] = []
# 扫描Markdown文件
for md_file in category_path.rglob("*.md"):
if md_file.name == "README.md":
continue
doc_info = self._get_document_info(md_file, category)
documents[category].append(doc_info)
return documents
def _get_document_info(self, file_path, category):
"""获取单个文档的详细信息"""
stat = file_path.stat()
# 计算文件哈希
file_hash = self._calculate_file_hash(file_path)
# 读取文件内容获取基本信息
title = file_path.stem
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 尝试从内容中提取标题
lines = content.split('\n')
for line in lines:
if line.startswith('# '):
title = line[2:].strip()
break
return {
"title": title,
"filename": file_path.name,
"path": str(file_path.relative_to(self.base_path)),
"category": category,
"size": stat.st_size,
"modified": datetime.datetime.fromtimestamp(stat.st_mtime).isoformat(),
"hash": file_hash,
"word_count": len(content.split())
}
def _calculate_file_hash(self, file_path):
"""计算文件内容的哈希值"""
hash_md5 = hashlib.md5()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def generate_index(self, documents):
"""生成索引文件"""
index_data = {
"metadata": {
"generated_at": datetime.datetime.now().isoformat(),
"total_documents": sum(len(docs) for docs in documents.values()),
"tool_version": "1.0.0"
},
"categories": {},
"documents": documents,
"statistics": self._calculate_statistics(documents)
}
# 按类别统计
for category, docs in documents.items():
index_data["categories"][category] = {
"count": len(docs),
"total_size": sum(doc["size"] for doc in docs),
"last_modified": max(doc["modified"] for doc in docs) if docs else None
}
return index_data
def _calculate_statistics(self, documents):
"""计算文档统计信息"""
all_docs = []
for docs in documents.values():
all_docs.extend(docs)
if not all_docs:
return {
"total_documents": 0,
"total_size_bytes": 0,
"total_size_mb": 0,
"total_words": 0,
"average_words_per_doc": 0,
"last_modified": None
}
total_size = sum(doc["size"] for doc in all_docs)
total_words = sum(doc["word_count"] for doc in all_docs)
return {
"total_documents": len(all_docs),
"total_size_bytes": total_size,
"total_size_mb": round(total_size / (1024 * 1024), 2),
"total_words": total_words,
"average_words_per_doc": round(total_words / len(all_docs), 2),
"last_modified": max(doc["modified"] for doc in all_docs)
}
def save_index(self, index_data):
"""保存索引到文件"""
with open(self.index_file, 'w', encoding='utf-8') as f:
json.dump(index_data, f, ensure_ascii=False, indent=2)
def update_markdown_index(self, index_data):
"""更新Markdown格式的索引文件"""
md_index_file = self.base_path / "unified-index.md"
# 读取现有的Markdown索引
if md_index_file.exists():
with open(md_index_file, 'r', encoding='utf-8') as f:
content = f.read()
else:
content = ""
# 生成新的索引内容
new_content = self._generate_markdown_index(index_data)
# 更新文档迁移状态部分
updated_content = self._update_migration_status(content, new_content, index_data)
with open(md_index_file, 'w', encoding='utf-8') as f:
f.write(updated_content)
def _generate_markdown_index(self, index_data):
"""生成Markdown格式的索引内容"""
lines = []
# 统计信息
stats = index_data["statistics"]
lines.append("## 文档统计信息\n")
lines.append(f"- **总文档数**: {stats['total_documents']}")
lines.append(f"- **总大小**: {stats['total_size_mb']} MB")
lines.append(f"- **总字数**: {stats['total_words']:,}")
lines.append(f"- **平均每文档字数**: {stats['average_words_per_doc']}")
lines.append(f"- **最后更新时间**: {stats['last_modified']}\n")
# 按类别列出文档
for category, docs in index_data["documents"].items():
if docs:
lines.append(f"\n### {category.replace('-', ' ').title()}\n")
for doc in sorted(docs, key=lambda x: x["title"]):
lines.append(f"- **{doc['title']}** - `{doc['filename']}` ")
lines.append(f" - 大小: {round(doc['size']/1024, 1)} KB")
lines.append(f" - 字数: {doc['word_count']}")
lines.append(f" - 修改: {doc['modified'][:10]}")
return '\n'.join(lines)
def _update_migration_status(self, old_content, new_index_content, index_data):
"""更新文档迁移状态部分"""
# 查找文档迁移状态部分
migration_start = old_content.find("## 文档迁移状态")
if migration_start == -1:
# 如果没有找到,在适当位置插入
insert_pos = old_content.find("## 更新日志")
if insert_pos == -1:
insert_pos = len(old_content)
migration_content = self._generate_migration_status(index_data)
updated_content = old_content[:insert_pos] + "\n" + migration_content + "\n" + old_content[insert_pos:]
else:
# 替换现有的迁移状态部分
migration_end = old_content.find("##", migration_start + 1)
if migration_end == -1:
migration_end = len(old_content)
migration_content = self._generate_migration_status(index_data)
updated_content = old_content[:migration_start] + migration_content + old_content[migration_end:]
# 更新索引内容部分
index_start = updated_content.find("## 文档统计信息")
if index_start != -1:
index_end = updated_content.find("##", index_start + 1)
if index_end == -1:
index_end = len(updated_content)
updated_content = updated_content[:index_start] + new_index_content + updated_content[index_end:]
return updated_content
def _generate_migration_status(self, index_data):
"""生成文档迁移状态内容"""
lines = []
lines.append("## 文档迁移状态\n")
stats = index_data["statistics"]
total_migrated = stats["total_documents"]
# 估算core-docs和thematic-research中的文档数
estimated_core_docs = 399 # 根据之前的统计
estimated_thematic = 142 # 根据之前的统计
total_estimated = estimated_core_docs + estimated_thematic
migration_percentage = (total_migrated / total_estimated * 100) if total_estimated > 0 else 0
lines.append(f"### 迁移进度: {migration_percentage:.1f}%\n")
lines.append(f"- **已迁移文档**: {total_migrated}")
lines.append(f"- **预计总文档**: {total_estimated}")
lines.append(f"- **剩余文档**: {total_estimated - total_migrated}\n")
lines.append("### 按类别迁移情况\n")
for category, info in index_data["categories"].items():
lines.append(f"- **{category.replace('-', ' ').title()}**: {info['count']} 个文档")
return '\n'.join(lines)
def run(self):
"""运行索引工具"""
print("=== 胡汉三千年项目文档索引工具 ===")
print(f"扫描目录: {self.base_path}")
# 扫描文档
print("正在扫描文档...")
documents = self.scan_documents()
# 生成索引
print("正在生成索引...")
index_data = self.generate_index(documents)
# 保存JSON索引
print("正在保存索引文件...")
self.save_index(index_data)
# 更新Markdown索引
print("正在更新Markdown索引...")
self.update_markdown_index(index_data)
# 输出统计信息
stats = index_data["statistics"]
print(f"\n=== 索引完成 ===")
print(f"处理文档数: {stats['total_documents']}")
print(f"总大小: {stats['total_size_mb']} MB")
print(f"索引文件: {self.index_file}")
print(f"生成时间: {index_data['metadata']['generated_at']}")
def main():
"""主函数"""
indexer = DocumentIndexer()
indexer.run()
if __name__ == "__main__":
main()