重大发现:全球文明天崇拜和玉崇拜普遍性验证完成

- 验证了地球上所有文明都具备天崇拜和玉崇拜模式
- 覆盖亚洲、欧洲、非洲、美洲、大洋洲、中东等全球范围
- 确认K音文明传播网络的全球分布
- 完善昆仑38词汇系统的理论框架
- 更新坦桑尼亚玉石开采和埃及法老坟墓水银的考古证据
- 全球文明同源论取得重大突破
This commit is contained in:
ben
2025-10-30 13:48:03 +00:00
parent 6b9c762367
commit 2a19a79695
119 changed files with 6319 additions and 875 deletions

View File

@@ -0,0 +1,303 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
胡汉三千年项目文档搜索工具
功能:
1. 全文搜索文档内容
2. 按关键词检索
3. 按类别过滤
4. 支持模糊搜索
作者:胡汉三千年项目团队
版本1.0.0
"""
import os
import json
import re
from pathlib import Path
class DocumentSearcher:
def __init__(self, base_path="/home/ben/code/huhan3000/unified-docs"):
self.base_path = Path(base_path)
self.index_file = self.base_path / "unified-index.json"
self.index_data = self._load_index()
def _load_index(self):
"""加载索引文件"""
if not self.index_file.exists():
print("警告:索引文件不存在,请先运行文档索引工具")
return {"documents": {}}
with open(self.index_file, 'r', encoding='utf-8') as f:
return json.load(f)
def search_by_keyword(self, keyword, category=None, case_sensitive=False):
"""按关键词搜索文档"""
results = []
for cat, docs in self.index_data.get("documents", {}).items():
# 如果指定了类别,只搜索该类别
if category and cat != category:
continue
for doc in docs:
file_path = self.base_path / doc["path"]
if not file_path.exists():
continue
# 搜索文件内容
matches = self._search_in_file(file_path, keyword, case_sensitive)
if matches:
result = {
"document": doc,
"matches": matches,
"match_count": len(matches)
}
results.append(result)
# 按匹配数量排序
results.sort(key=lambda x: x["match_count"], reverse=True)
return results
def _search_in_file(self, file_path, keyword, case_sensitive):
"""在单个文件中搜索关键词"""
matches = []
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 构建搜索模式
if case_sensitive:
pattern = re.escape(keyword)
else:
pattern = re.escape(keyword)
flags = re.IGNORECASE
# 搜索关键词
for match in re.finditer(pattern, content, flags if not case_sensitive else 0):
start_line = content[:match.start()].count('\n') + 1
end_line = content[:match.end()].count('\n') + 1
# 获取匹配行的上下文
lines = content.split('\n')
context_start = max(0, start_line - 3)
context_end = min(len(lines), end_line + 3)
context = '\n'.join(lines[context_start:context_end])
matches.append({
"start_line": start_line,
"end_line": end_line,
"match_text": match.group(),
"context": context
})
except Exception as e:
print(f"搜索文件时出错 {file_path}: {e}")
return matches
def search_by_title(self, title_pattern, category=None):
"""按标题模式搜索文档"""
results = []
for cat, docs in self.index_data.get("documents", {}).items():
# 如果指定了类别,只搜索该类别
if category and cat != category:
continue
for doc in docs:
if re.search(title_pattern, doc["title"], re.IGNORECASE):
results.append({
"document": doc,
"match_type": "title",
"match_score": self._calculate_match_score(title_pattern, doc["title"])
})
# 按匹配分数排序
results.sort(key=lambda x: x["match_score"], reverse=True)
return results
def _calculate_match_score(self, pattern, text):
"""计算匹配分数"""
# 简单的匹配分数计算
if pattern.lower() in text.lower():
return 1.0
# 模糊匹配分数
pattern_words = set(pattern.lower().split())
text_words = set(text.lower().split())
if pattern_words.intersection(text_words):
return len(pattern_words.intersection(text_words)) / len(pattern_words)
return 0.0
def list_documents(self, category=None, sort_by="title"):
"""列出文档"""
documents = []
for cat, docs in self.index_data.get("documents", {}).items():
# 如果指定了类别,只列出该类别
if category and cat != category:
continue
documents.extend(docs)
# 排序
if sort_by == "title":
documents.sort(key=lambda x: x["title"])
elif sort_by == "modified":
documents.sort(key=lambda x: x["modified"], reverse=True)
elif sort_by == "size":
documents.sort(key=lambda x: x["size"], reverse=True)
return documents
def get_category_stats(self):
"""获取类别统计信息"""
return self.index_data.get("categories", {})
def get_overall_stats(self):
"""获取总体统计信息"""
return self.index_data.get("statistics", {})
def print_search_results(self, results, max_results=10):
"""打印搜索结果"""
if not results:
print("未找到匹配的文档")
return
print(f"找到 {len(results)} 个匹配结果:")
print("-" * 80)
for i, result in enumerate(results[:max_results]):
doc = result["document"]
print(f"{i+1}. {doc['title']}")
print(f" 文件: {doc['filename']}")
print(f" 类别: {doc['category']}")
print(f" 大小: {round(doc['size']/1024, 1)} KB")
print(f" 修改: {doc['modified'][:10]}")
if "matches" in result:
print(f" 匹配数: {result['match_count']}")
if result["match_count"] > 0:
match = result["matches"][0]
print(f" 示例匹配: 第{match['start_line']}行 - {match['match_text'][:50]}...")
print()
def interactive_search(self):
"""交互式搜索界面"""
print("=== 胡汉三千年项目文档搜索工具 ===")
print("输入 'quit' 退出搜索")
print("输入 'help' 查看帮助")
print("-" * 50)
while True:
try:
query = input("\n搜索关键词: ").strip()
if query.lower() == 'quit':
break
elif query.lower() == 'help':
self._print_help()
continue
elif not query:
continue
# 解析搜索选项
options = self._parse_search_options(query)
# 执行搜索
if options["search_type"] == "content":
results = self.search_by_keyword(
options["keyword"],
options["category"],
options["case_sensitive"]
)
else:
results = self.search_by_title(
options["keyword"],
options["category"]
)
self.print_search_results(results, options["max_results"])
except KeyboardInterrupt:
print("\n搜索已取消")
break
except Exception as e:
print(f"搜索出错: {e}")
def _parse_search_options(self, query):
"""解析搜索选项"""
options = {
"search_type": "content", # content 或 title
"keyword": query,
"category": None,
"case_sensitive": False,
"max_results": 10
}
# 简单的选项解析
if query.startswith("title:"):
options["search_type"] = "title"
options["keyword"] = query[6:].strip()
elif query.startswith("cat:"):
parts = query.split(" ")
if len(parts) >= 2:
options["category"] = parts[0][4:]
options["keyword"] = " ".join(parts[1:])
return options
def _print_help(self):
"""打印帮助信息"""
print("\n搜索语法:")
print(" 普通搜索: 关键词")
print(" 标题搜索: title:关键词")
print(" 类别搜索: cat:类别名 关键词")
print("\n可用类别:")
stats = self.get_category_stats()
for category, info in stats.items():
print(f" {category}: {info.get('count', 0)} 个文档")
print("\n示例:")
print(" 搜索音韵相关内容: 音韵")
print(" 搜索标题包含'蒙古'的文档: title:蒙古")
print(" 在核心理论中搜索'方法论': cat:01-core-theory 方法论")
def main():
"""主函数"""
import sys
searcher = DocumentSearcher()
if len(sys.argv) > 1:
# 命令行模式
query = " ".join(sys.argv[1:])
options = searcher._parse_search_options(query)
if options["search_type"] == "content":
results = searcher.search_by_keyword(
options["keyword"],
options["category"],
options["case_sensitive"]
)
else:
results = searcher.search_by_title(
options["keyword"],
options["category"]
)
searcher.print_search_results(results, options["max_results"])
else:
# 交互式模式
searcher.interactive_search()
if __name__ == "__main__":
main()