333
This commit is contained in:
288
methodology-and-tools/tools/doc-indexer.py
Normal file
288
methodology-and-tools/tools/doc-indexer.py
Normal file
@@ -0,0 +1,288 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
胡汉三千年项目文档索引工具
|
||||
|
||||
功能:
|
||||
1. 自动扫描文档目录
|
||||
2. 生成文档索引
|
||||
3. 更新统一索引文件
|
||||
4. 检测文档变更
|
||||
|
||||
作者:胡汉三千年项目团队
|
||||
版本:1.0.0
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import hashlib
|
||||
import datetime
|
||||
from pathlib import Path
|
||||
|
||||
class DocumentIndexer:
|
||||
def __init__(self, base_path="/home/ben/code/huhan3000/unified-docs"):
|
||||
self.base_path = Path(base_path)
|
||||
self.index_file = self.base_path / "unified-index.json"
|
||||
self.categories = [
|
||||
"01-core-theory",
|
||||
"02-thematic-research",
|
||||
"03-historical-analysis",
|
||||
"04-methodology",
|
||||
"05-applications",
|
||||
"06-resources"
|
||||
]
|
||||
|
||||
def scan_documents(self):
|
||||
"""扫描所有文档目录,收集文档信息"""
|
||||
documents = {}
|
||||
|
||||
for category in self.categories:
|
||||
category_path = self.base_path / category
|
||||
if not category_path.exists():
|
||||
continue
|
||||
|
||||
documents[category] = []
|
||||
|
||||
# 扫描Markdown文件
|
||||
for md_file in category_path.rglob("*.md"):
|
||||
if md_file.name == "README.md":
|
||||
continue
|
||||
|
||||
doc_info = self._get_document_info(md_file, category)
|
||||
documents[category].append(doc_info)
|
||||
|
||||
return documents
|
||||
|
||||
def _get_document_info(self, file_path, category):
|
||||
"""获取单个文档的详细信息"""
|
||||
stat = file_path.stat()
|
||||
|
||||
# 计算文件哈希
|
||||
file_hash = self._calculate_file_hash(file_path)
|
||||
|
||||
# 读取文件内容获取基本信息
|
||||
title = file_path.stem
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
# 尝试从内容中提取标题
|
||||
lines = content.split('\n')
|
||||
for line in lines:
|
||||
if line.startswith('# '):
|
||||
title = line[2:].strip()
|
||||
break
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"filename": file_path.name,
|
||||
"path": str(file_path.relative_to(self.base_path)),
|
||||
"category": category,
|
||||
"size": stat.st_size,
|
||||
"modified": datetime.datetime.fromtimestamp(stat.st_mtime).isoformat(),
|
||||
"hash": file_hash,
|
||||
"word_count": len(content.split())
|
||||
}
|
||||
|
||||
def _calculate_file_hash(self, file_path):
|
||||
"""计算文件内容的哈希值"""
|
||||
hash_md5 = hashlib.md5()
|
||||
with open(file_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(4096), b""):
|
||||
hash_md5.update(chunk)
|
||||
return hash_md5.hexdigest()
|
||||
|
||||
def generate_index(self, documents):
|
||||
"""生成索引文件"""
|
||||
index_data = {
|
||||
"metadata": {
|
||||
"generated_at": datetime.datetime.now().isoformat(),
|
||||
"total_documents": sum(len(docs) for docs in documents.values()),
|
||||
"tool_version": "1.0.0"
|
||||
},
|
||||
"categories": {},
|
||||
"documents": documents,
|
||||
"statistics": self._calculate_statistics(documents)
|
||||
}
|
||||
|
||||
# 按类别统计
|
||||
for category, docs in documents.items():
|
||||
index_data["categories"][category] = {
|
||||
"count": len(docs),
|
||||
"total_size": sum(doc["size"] for doc in docs),
|
||||
"last_modified": max(doc["modified"] for doc in docs) if docs else None
|
||||
}
|
||||
|
||||
return index_data
|
||||
|
||||
def _calculate_statistics(self, documents):
|
||||
"""计算文档统计信息"""
|
||||
all_docs = []
|
||||
for docs in documents.values():
|
||||
all_docs.extend(docs)
|
||||
|
||||
if not all_docs:
|
||||
return {
|
||||
"total_documents": 0,
|
||||
"total_size_bytes": 0,
|
||||
"total_size_mb": 0,
|
||||
"total_words": 0,
|
||||
"average_words_per_doc": 0,
|
||||
"last_modified": None
|
||||
}
|
||||
|
||||
total_size = sum(doc["size"] for doc in all_docs)
|
||||
total_words = sum(doc["word_count"] for doc in all_docs)
|
||||
|
||||
return {
|
||||
"total_documents": len(all_docs),
|
||||
"total_size_bytes": total_size,
|
||||
"total_size_mb": round(total_size / (1024 * 1024), 2),
|
||||
"total_words": total_words,
|
||||
"average_words_per_doc": round(total_words / len(all_docs), 2),
|
||||
"last_modified": max(doc["modified"] for doc in all_docs)
|
||||
}
|
||||
|
||||
def save_index(self, index_data):
|
||||
"""保存索引到文件"""
|
||||
with open(self.index_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(index_data, f, ensure_ascii=False, indent=2)
|
||||
|
||||
def update_markdown_index(self, index_data):
|
||||
"""更新Markdown格式的索引文件"""
|
||||
md_index_file = self.base_path / "unified-index.md"
|
||||
|
||||
# 读取现有的Markdown索引
|
||||
if md_index_file.exists():
|
||||
with open(md_index_file, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
else:
|
||||
content = ""
|
||||
|
||||
# 生成新的索引内容
|
||||
new_content = self._generate_markdown_index(index_data)
|
||||
|
||||
# 更新文档迁移状态部分
|
||||
updated_content = self._update_migration_status(content, new_content, index_data)
|
||||
|
||||
with open(md_index_file, 'w', encoding='utf-8') as f:
|
||||
f.write(updated_content)
|
||||
|
||||
def _generate_markdown_index(self, index_data):
|
||||
"""生成Markdown格式的索引内容"""
|
||||
lines = []
|
||||
|
||||
# 统计信息
|
||||
stats = index_data["statistics"]
|
||||
lines.append("## 文档统计信息\n")
|
||||
lines.append(f"- **总文档数**: {stats['total_documents']}")
|
||||
lines.append(f"- **总大小**: {stats['total_size_mb']} MB")
|
||||
lines.append(f"- **总字数**: {stats['total_words']:,}")
|
||||
lines.append(f"- **平均每文档字数**: {stats['average_words_per_doc']}")
|
||||
lines.append(f"- **最后更新时间**: {stats['last_modified']}\n")
|
||||
|
||||
# 按类别列出文档
|
||||
for category, docs in index_data["documents"].items():
|
||||
if docs:
|
||||
lines.append(f"\n### {category.replace('-', ' ').title()}\n")
|
||||
|
||||
for doc in sorted(docs, key=lambda x: x["title"]):
|
||||
lines.append(f"- **{doc['title']}** - `{doc['filename']}` ")
|
||||
lines.append(f" - 大小: {round(doc['size']/1024, 1)} KB")
|
||||
lines.append(f" - 字数: {doc['word_count']}")
|
||||
lines.append(f" - 修改: {doc['modified'][:10]}")
|
||||
|
||||
return '\n'.join(lines)
|
||||
|
||||
def _update_migration_status(self, old_content, new_index_content, index_data):
|
||||
"""更新文档迁移状态部分"""
|
||||
# 查找文档迁移状态部分
|
||||
migration_start = old_content.find("## 文档迁移状态")
|
||||
if migration_start == -1:
|
||||
# 如果没有找到,在适当位置插入
|
||||
insert_pos = old_content.find("## 更新日志")
|
||||
if insert_pos == -1:
|
||||
insert_pos = len(old_content)
|
||||
|
||||
migration_content = self._generate_migration_status(index_data)
|
||||
updated_content = old_content[:insert_pos] + "\n" + migration_content + "\n" + old_content[insert_pos:]
|
||||
else:
|
||||
# 替换现有的迁移状态部分
|
||||
migration_end = old_content.find("##", migration_start + 1)
|
||||
if migration_end == -1:
|
||||
migration_end = len(old_content)
|
||||
|
||||
migration_content = self._generate_migration_status(index_data)
|
||||
updated_content = old_content[:migration_start] + migration_content + old_content[migration_end:]
|
||||
|
||||
# 更新索引内容部分
|
||||
index_start = updated_content.find("## 文档统计信息")
|
||||
if index_start != -1:
|
||||
index_end = updated_content.find("##", index_start + 1)
|
||||
if index_end == -1:
|
||||
index_end = len(updated_content)
|
||||
|
||||
updated_content = updated_content[:index_start] + new_index_content + updated_content[index_end:]
|
||||
|
||||
return updated_content
|
||||
|
||||
def _generate_migration_status(self, index_data):
|
||||
"""生成文档迁移状态内容"""
|
||||
lines = []
|
||||
lines.append("## 文档迁移状态\n")
|
||||
|
||||
stats = index_data["statistics"]
|
||||
total_migrated = stats["total_documents"]
|
||||
|
||||
# 估算core-docs和thematic-research中的文档数
|
||||
estimated_core_docs = 399 # 根据之前的统计
|
||||
estimated_thematic = 142 # 根据之前的统计
|
||||
total_estimated = estimated_core_docs + estimated_thematic
|
||||
|
||||
migration_percentage = (total_migrated / total_estimated * 100) if total_estimated > 0 else 0
|
||||
|
||||
lines.append(f"### 迁移进度: {migration_percentage:.1f}%\n")
|
||||
lines.append(f"- **已迁移文档**: {total_migrated}")
|
||||
lines.append(f"- **预计总文档**: {total_estimated}")
|
||||
lines.append(f"- **剩余文档**: {total_estimated - total_migrated}\n")
|
||||
|
||||
lines.append("### 按类别迁移情况\n")
|
||||
for category, info in index_data["categories"].items():
|
||||
lines.append(f"- **{category.replace('-', ' ').title()}**: {info['count']} 个文档")
|
||||
|
||||
return '\n'.join(lines)
|
||||
|
||||
def run(self):
|
||||
"""运行索引工具"""
|
||||
print("=== 胡汉三千年项目文档索引工具 ===")
|
||||
print(f"扫描目录: {self.base_path}")
|
||||
|
||||
# 扫描文档
|
||||
print("正在扫描文档...")
|
||||
documents = self.scan_documents()
|
||||
|
||||
# 生成索引
|
||||
print("正在生成索引...")
|
||||
index_data = self.generate_index(documents)
|
||||
|
||||
# 保存JSON索引
|
||||
print("正在保存索引文件...")
|
||||
self.save_index(index_data)
|
||||
|
||||
# 更新Markdown索引
|
||||
print("正在更新Markdown索引...")
|
||||
self.update_markdown_index(index_data)
|
||||
|
||||
# 输出统计信息
|
||||
stats = index_data["statistics"]
|
||||
print(f"\n=== 索引完成 ===")
|
||||
print(f"处理文档数: {stats['total_documents']}")
|
||||
print(f"总大小: {stats['total_size_mb']} MB")
|
||||
print(f"索引文件: {self.index_file}")
|
||||
print(f"生成时间: {index_data['metadata']['generated_at']}")
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
indexer = DocumentIndexer()
|
||||
indexer.run()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
373
methodology-and-tools/tools/doc-migrator.py
Normal file
373
methodology-and-tools/tools/doc-migrator.py
Normal file
@@ -0,0 +1,373 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
胡汉三千年项目文档迁移工具
|
||||
|
||||
功能:
|
||||
1. 从core-docs和thematic-research迁移文档到统一文档系统
|
||||
2. 自动分类和组织文档
|
||||
3. 保持文档结构和元数据
|
||||
4. 生成迁移报告
|
||||
|
||||
作者:胡汉三千年项目团队
|
||||
版本:1.0.0
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import shutil
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
import datetime
|
||||
|
||||
class DocumentMigrator:
|
||||
def __init__(self):
|
||||
self.base_path = Path("/home/ben/code/huhan3000")
|
||||
self.unified_docs_path = self.base_path / "unified-docs"
|
||||
self.core_docs_path = self.base_path / "core-docs"
|
||||
self.thematic_research_path = self.base_path / "thematic-research"
|
||||
|
||||
# 分类映射规则
|
||||
self.category_mapping = {
|
||||
# core-docs 分类规则
|
||||
"core-docs": {
|
||||
"音韵考古学": "01-core-theory/01-phonological-archaeology",
|
||||
"文明传播模型": "01-core-theory/02-civilization-diffusion",
|
||||
"方法论体系": "01-core-theory/03-methodology",
|
||||
"学术成果": "01-core-theory/04-academic-achievements",
|
||||
"理论框架": "01-core-theory/05-theoretical-framework",
|
||||
"实证研究": "02-thematic-research/01-empirical-studies",
|
||||
"历史分析": "03-historical-analysis/01-historical-events",
|
||||
"文化比较": "04-cultural-comparison/01-cross-cultural",
|
||||
"技术实现": "05-technical-implementation/01-tools",
|
||||
"项目文档": "06-project-docs/01-management"
|
||||
},
|
||||
# thematic-research 分类规则
|
||||
"thematic-research": {
|
||||
"civilization-studies": "02-thematic-research/02-civilization-studies",
|
||||
"phonological-studies": "02-thematic-research/03-phonological-studies",
|
||||
"commercial-studies": "02-thematic-research/04-commercial-studies",
|
||||
"historical-studies": "03-historical-analysis/02-historical-studies",
|
||||
"cultural-studies": "04-cultural-comparison/02-cultural-studies",
|
||||
"theory-studies": "01-core-theory/06-theory-studies",
|
||||
"methodology-studies": "01-core-theory/03-methodology",
|
||||
"empirical-studies": "02-thematic-research/01-empirical-studies",
|
||||
"comparative-studies": "04-cultural-comparison/03-comparative-studies"
|
||||
}
|
||||
}
|
||||
|
||||
# 文件扩展名映射
|
||||
self.file_extensions = {
|
||||
".md": "markdown",
|
||||
".txt": "text",
|
||||
".py": "python",
|
||||
".json": "json",
|
||||
".yaml": "yaml",
|
||||
".yml": "yaml"
|
||||
}
|
||||
|
||||
self.migration_report = {
|
||||
"metadata": {
|
||||
"migration_date": datetime.datetime.now().isoformat(),
|
||||
"tool_version": "1.0.0"
|
||||
},
|
||||
"statistics": {
|
||||
"total_files_scanned": 0,
|
||||
"total_files_migrated": 0,
|
||||
"total_files_skipped": 0,
|
||||
"total_errors": 0
|
||||
},
|
||||
"migration_details": {
|
||||
"core-docs": {"scanned": 0, "migrated": 0, "skipped": 0},
|
||||
"thematic-research": {"scanned": 0, "migrated": 0, "skipped": 0}
|
||||
},
|
||||
"errors": [],
|
||||
"migrated_files": []
|
||||
}
|
||||
|
||||
def _calculate_file_hash(self, file_path):
|
||||
"""计算文件内容的哈希值"""
|
||||
hash_md5 = hashlib.md5()
|
||||
with open(file_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(4096), b""):
|
||||
hash_md5.update(chunk)
|
||||
return hash_md5.hexdigest()
|
||||
|
||||
def _get_file_category(self, source_type, file_path, content=None):
|
||||
"""根据文件路径和内容确定分类"""
|
||||
file_path_str = str(file_path)
|
||||
|
||||
# 首先尝试基于路径的分类
|
||||
for keyword, target_category in self.category_mapping[source_type].items():
|
||||
if keyword.lower() in file_path_str.lower():
|
||||
return target_category
|
||||
|
||||
# 如果基于路径无法分类,尝试基于内容(如果提供了内容)
|
||||
if content:
|
||||
content_lower = content.lower()
|
||||
|
||||
# 关键词匹配
|
||||
keyword_categories = {
|
||||
"音韵": "01-core-theory/01-phonological-archaeology",
|
||||
"文明": "01-core-theory/02-civilization-diffusion",
|
||||
"方法": "01-core-theory/03-methodology",
|
||||
"理论": "01-core-theory/05-theoretical-framework",
|
||||
"实证": "02-thematic-research/01-empirical-studies",
|
||||
"历史": "03-historical-analysis/01-historical-events",
|
||||
"文化": "04-cultural-comparison/01-cross-cultural",
|
||||
"技术": "05-technical-implementation/01-tools",
|
||||
"项目": "06-project-docs/01-management"
|
||||
}
|
||||
|
||||
for keyword, category in keyword_categories.items():
|
||||
if keyword in content_lower:
|
||||
return category
|
||||
|
||||
# 默认分类
|
||||
if source_type == "core-docs":
|
||||
return "01-core-theory/99-uncategorized"
|
||||
else:
|
||||
return "02-thematic-research/99-uncategorized"
|
||||
|
||||
def _ensure_directory(self, dir_path):
|
||||
"""确保目录存在"""
|
||||
dir_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def _copy_file_with_metadata(self, source_path, target_path):
|
||||
"""复制文件并保持元数据"""
|
||||
try:
|
||||
# 复制文件
|
||||
shutil.copy2(source_path, target_path)
|
||||
|
||||
# 获取文件信息
|
||||
stat = source_path.stat()
|
||||
file_info = {
|
||||
"source_path": str(source_path),
|
||||
"target_path": str(target_path),
|
||||
"size": stat.st_size,
|
||||
"modified_time": datetime.datetime.fromtimestamp(stat.st_mtime).isoformat(),
|
||||
"hash": self._calculate_file_hash(source_path),
|
||||
"file_type": self.file_extensions.get(source_path.suffix, "unknown")
|
||||
}
|
||||
|
||||
return file_info
|
||||
except Exception as e:
|
||||
raise Exception(f"文件复制失败: {e}")
|
||||
|
||||
def _create_migration_metadata(self, source_path, target_path, category):
|
||||
"""创建迁移元数据文件"""
|
||||
metadata_path = target_path.with_suffix(target_path.suffix + ".metadata.json")
|
||||
|
||||
metadata = {
|
||||
"original_source": str(source_path),
|
||||
"migration_date": datetime.datetime.now().isoformat(),
|
||||
"category": category,
|
||||
"tool_version": "1.0.0"
|
||||
}
|
||||
|
||||
with open(metadata_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(metadata, f, ensure_ascii=False, indent=2)
|
||||
|
||||
def migrate_core_docs(self, dry_run=False):
|
||||
"""迁移core-docs文档"""
|
||||
print("开始迁移 core-docs 文档...")
|
||||
|
||||
migrated_files = []
|
||||
|
||||
# 扫描core-docs目录
|
||||
for file_path in self.core_docs_path.rglob("*"):
|
||||
if file_path.is_file() and file_path.suffix in [".md", ".txt", ".py", ".json"]:
|
||||
self.migration_report["statistics"]["total_files_scanned"] += 1
|
||||
self.migration_report["migration_details"]["core-docs"]["scanned"] += 1
|
||||
|
||||
try:
|
||||
# 读取文件内容用于分类
|
||||
content = None
|
||||
if file_path.suffix in [".md", ".txt"]:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# 确定目标分类
|
||||
category = self._get_file_category("core-docs", file_path, content)
|
||||
|
||||
# 构建目标路径
|
||||
relative_path = file_path.relative_to(self.core_docs_path)
|
||||
target_dir = self.unified_docs_path / category
|
||||
target_path = target_dir / relative_path.name
|
||||
|
||||
# 确保目标目录存在
|
||||
self._ensure_directory(target_dir)
|
||||
|
||||
if not dry_run:
|
||||
# 复制文件
|
||||
file_info = self._copy_file_with_metadata(file_path, target_path)
|
||||
|
||||
# 创建元数据文件
|
||||
self._create_migration_metadata(file_path, target_path, category)
|
||||
|
||||
file_info["category"] = category
|
||||
migrated_files.append(file_info)
|
||||
|
||||
self.migration_report["statistics"]["total_files_migrated"] += 1
|
||||
self.migration_report["migration_details"]["core-docs"]["migrated"] += 1
|
||||
|
||||
print(f"✓ 已迁移: {file_path.name} -> {category}")
|
||||
else:
|
||||
print(f"[模拟] 将迁移: {file_path.name} -> {category}")
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"迁移失败 {file_path}: {e}"
|
||||
self.migration_report["errors"].append(error_msg)
|
||||
self.migration_report["statistics"]["total_errors"] += 1
|
||||
self.migration_report["migration_details"]["core-docs"]["skipped"] += 1
|
||||
print(f"✗ {error_msg}")
|
||||
|
||||
return migrated_files
|
||||
|
||||
def migrate_thematic_research(self, dry_run=False):
|
||||
"""迁移thematic-research文档"""
|
||||
print("开始迁移 thematic-research 文档...")
|
||||
|
||||
migrated_files = []
|
||||
|
||||
# 扫描thematic-research目录
|
||||
for file_path in self.thematic_research_path.rglob("*"):
|
||||
if file_path.is_file() and file_path.suffix in [".md", ".txt", ".py", ".json"]:
|
||||
self.migration_report["statistics"]["total_files_scanned"] += 1
|
||||
self.migration_report["migration_details"]["thematic-research"]["scanned"] += 1
|
||||
|
||||
try:
|
||||
# 读取文件内容用于分类
|
||||
content = None
|
||||
if file_path.suffix in [".md", ".txt"]:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# 确定目标分类
|
||||
category = self._get_file_category("thematic-research", file_path, content)
|
||||
|
||||
# 构建目标路径
|
||||
relative_path = file_path.relative_to(self.thematic_research_path)
|
||||
target_dir = self.unified_docs_path / category
|
||||
target_path = target_dir / relative_path.name
|
||||
|
||||
# 确保目标目录存在
|
||||
self._ensure_directory(target_dir)
|
||||
|
||||
if not dry_run:
|
||||
# 复制文件
|
||||
file_info = self._copy_file_with_metadata(file_path, target_path)
|
||||
|
||||
# 创建元数据文件
|
||||
self._create_migration_metadata(file_path, target_path, category)
|
||||
|
||||
file_info["category"] = category
|
||||
migrated_files.append(file_info)
|
||||
|
||||
self.migration_report["statistics"]["total_files_migrated"] += 1
|
||||
self.migration_report["migration_details"]["thematic-research"]["migrated"] += 1
|
||||
|
||||
print(f"✓ 已迁移: {file_path.name} -> {category}")
|
||||
else:
|
||||
print(f"[模拟] 将迁移: {file_path.name} -> {category}")
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"迁移失败 {file_path}: {e}"
|
||||
self.migration_report["errors"].append(error_msg)
|
||||
self.migration_report["statistics"]["total_errors"] += 1
|
||||
self.migration_report["migration_details"]["thematic-research"]["skipped"] += 1
|
||||
print(f"✗ {error_msg}")
|
||||
|
||||
return migrated_files
|
||||
|
||||
def save_migration_report(self):
|
||||
"""保存迁移报告"""
|
||||
report_path = self.unified_docs_path / "migration-report.json"
|
||||
|
||||
with open(report_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(self.migration_report, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"迁移报告已保存到: {report_path}")
|
||||
return report_path
|
||||
|
||||
def print_summary(self):
|
||||
"""打印迁移摘要"""
|
||||
stats = self.migration_report["statistics"]
|
||||
details = self.migration_report["migration_details"]
|
||||
|
||||
print("\n=== 迁移摘要 ===")
|
||||
print(f"总扫描文件数: {stats['total_files_scanned']}")
|
||||
print(f"总迁移文件数: {stats['total_files_migrated']}")
|
||||
print(f"总跳过文件数: {stats['total_files_skipped']}")
|
||||
print(f"总错误数: {stats['total_errors']}")
|
||||
|
||||
print("\n=== 详细统计 ===")
|
||||
for source_type, detail in details.items():
|
||||
print(f"{source_type}:")
|
||||
print(f" 扫描: {detail['scanned']}")
|
||||
print(f" 迁移: {detail['migrated']}")
|
||||
print(f" 跳过: {detail['skipped']}")
|
||||
|
||||
if self.migration_report["errors"]:
|
||||
print("\n=== 错误列表 ===")
|
||||
for error in self.migration_report["errors"]:
|
||||
print(f" - {error}")
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
import sys
|
||||
|
||||
migrator = DocumentMigrator()
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("用法:")
|
||||
print(" python doc-migrator.py migrate [--dry-run]")
|
||||
print(" python doc-migrator.py migrate-core [--dry-run]")
|
||||
print(" python doc-migrator.py migrate-thematic [--dry-run]")
|
||||
print(" python doc-migrator.py summary")
|
||||
return
|
||||
|
||||
command = sys.argv[1]
|
||||
dry_run = "--dry-run" in sys.argv
|
||||
|
||||
if command == "migrate":
|
||||
print("开始完整迁移过程...")
|
||||
|
||||
# 迁移core-docs
|
||||
migrator.migrate_core_docs(dry_run)
|
||||
|
||||
# 迁移thematic-research
|
||||
migrator.migrate_thematic_research(dry_run)
|
||||
|
||||
# 保存报告
|
||||
if not dry_run:
|
||||
migrator.save_migration_report()
|
||||
|
||||
migrator.print_summary()
|
||||
|
||||
elif command == "migrate-core":
|
||||
print("开始迁移 core-docs...")
|
||||
migrator.migrate_core_docs(dry_run)
|
||||
|
||||
if not dry_run:
|
||||
migrator.save_migration_report()
|
||||
|
||||
migrator.print_summary()
|
||||
|
||||
elif command == "migrate-thematic":
|
||||
print("开始迁移 thematic-research...")
|
||||
migrator.migrate_thematic_research(dry_run)
|
||||
|
||||
if not dry_run:
|
||||
migrator.save_migration_report()
|
||||
|
||||
migrator.print_summary()
|
||||
|
||||
elif command == "summary":
|
||||
migrator.print_summary()
|
||||
|
||||
else:
|
||||
print(f"未知命令: {command}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
303
methodology-and-tools/tools/search-tool.py
Normal file
303
methodology-and-tools/tools/search-tool.py
Normal file
@@ -0,0 +1,303 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
胡汉三千年项目文档搜索工具
|
||||
|
||||
功能:
|
||||
1. 全文搜索文档内容
|
||||
2. 按关键词检索
|
||||
3. 按类别过滤
|
||||
4. 支持模糊搜索
|
||||
|
||||
作者:胡汉三千年项目团队
|
||||
版本:1.0.0
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
class DocumentSearcher:
|
||||
def __init__(self, base_path="/home/ben/code/huhan3000/unified-docs"):
|
||||
self.base_path = Path(base_path)
|
||||
self.index_file = self.base_path / "unified-index.json"
|
||||
self.index_data = self._load_index()
|
||||
|
||||
def _load_index(self):
|
||||
"""加载索引文件"""
|
||||
if not self.index_file.exists():
|
||||
print("警告:索引文件不存在,请先运行文档索引工具")
|
||||
return {"documents": {}}
|
||||
|
||||
with open(self.index_file, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
|
||||
def search_by_keyword(self, keyword, category=None, case_sensitive=False):
|
||||
"""按关键词搜索文档"""
|
||||
results = []
|
||||
|
||||
for cat, docs in self.index_data.get("documents", {}).items():
|
||||
# 如果指定了类别,只搜索该类别
|
||||
if category and cat != category:
|
||||
continue
|
||||
|
||||
for doc in docs:
|
||||
file_path = self.base_path / doc["path"]
|
||||
if not file_path.exists():
|
||||
continue
|
||||
|
||||
# 搜索文件内容
|
||||
matches = self._search_in_file(file_path, keyword, case_sensitive)
|
||||
if matches:
|
||||
result = {
|
||||
"document": doc,
|
||||
"matches": matches,
|
||||
"match_count": len(matches)
|
||||
}
|
||||
results.append(result)
|
||||
|
||||
# 按匹配数量排序
|
||||
results.sort(key=lambda x: x["match_count"], reverse=True)
|
||||
return results
|
||||
|
||||
def _search_in_file(self, file_path, keyword, case_sensitive):
|
||||
"""在单个文件中搜索关键词"""
|
||||
matches = []
|
||||
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# 构建搜索模式
|
||||
if case_sensitive:
|
||||
pattern = re.escape(keyword)
|
||||
else:
|
||||
pattern = re.escape(keyword)
|
||||
flags = re.IGNORECASE
|
||||
|
||||
# 搜索关键词
|
||||
for match in re.finditer(pattern, content, flags if not case_sensitive else 0):
|
||||
start_line = content[:match.start()].count('\n') + 1
|
||||
end_line = content[:match.end()].count('\n') + 1
|
||||
|
||||
# 获取匹配行的上下文
|
||||
lines = content.split('\n')
|
||||
context_start = max(0, start_line - 3)
|
||||
context_end = min(len(lines), end_line + 3)
|
||||
context = '\n'.join(lines[context_start:context_end])
|
||||
|
||||
matches.append({
|
||||
"start_line": start_line,
|
||||
"end_line": end_line,
|
||||
"match_text": match.group(),
|
||||
"context": context
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f"搜索文件时出错 {file_path}: {e}")
|
||||
|
||||
return matches
|
||||
|
||||
def search_by_title(self, title_pattern, category=None):
|
||||
"""按标题模式搜索文档"""
|
||||
results = []
|
||||
|
||||
for cat, docs in self.index_data.get("documents", {}).items():
|
||||
# 如果指定了类别,只搜索该类别
|
||||
if category and cat != category:
|
||||
continue
|
||||
|
||||
for doc in docs:
|
||||
if re.search(title_pattern, doc["title"], re.IGNORECASE):
|
||||
results.append({
|
||||
"document": doc,
|
||||
"match_type": "title",
|
||||
"match_score": self._calculate_match_score(title_pattern, doc["title"])
|
||||
})
|
||||
|
||||
# 按匹配分数排序
|
||||
results.sort(key=lambda x: x["match_score"], reverse=True)
|
||||
return results
|
||||
|
||||
def _calculate_match_score(self, pattern, text):
|
||||
"""计算匹配分数"""
|
||||
# 简单的匹配分数计算
|
||||
if pattern.lower() in text.lower():
|
||||
return 1.0
|
||||
|
||||
# 模糊匹配分数
|
||||
pattern_words = set(pattern.lower().split())
|
||||
text_words = set(text.lower().split())
|
||||
|
||||
if pattern_words.intersection(text_words):
|
||||
return len(pattern_words.intersection(text_words)) / len(pattern_words)
|
||||
|
||||
return 0.0
|
||||
|
||||
def list_documents(self, category=None, sort_by="title"):
|
||||
"""列出文档"""
|
||||
documents = []
|
||||
|
||||
for cat, docs in self.index_data.get("documents", {}).items():
|
||||
# 如果指定了类别,只列出该类别
|
||||
if category and cat != category:
|
||||
continue
|
||||
|
||||
documents.extend(docs)
|
||||
|
||||
# 排序
|
||||
if sort_by == "title":
|
||||
documents.sort(key=lambda x: x["title"])
|
||||
elif sort_by == "modified":
|
||||
documents.sort(key=lambda x: x["modified"], reverse=True)
|
||||
elif sort_by == "size":
|
||||
documents.sort(key=lambda x: x["size"], reverse=True)
|
||||
|
||||
return documents
|
||||
|
||||
def get_category_stats(self):
|
||||
"""获取类别统计信息"""
|
||||
return self.index_data.get("categories", {})
|
||||
|
||||
def get_overall_stats(self):
|
||||
"""获取总体统计信息"""
|
||||
return self.index_data.get("statistics", {})
|
||||
|
||||
def print_search_results(self, results, max_results=10):
|
||||
"""打印搜索结果"""
|
||||
if not results:
|
||||
print("未找到匹配的文档")
|
||||
return
|
||||
|
||||
print(f"找到 {len(results)} 个匹配结果:")
|
||||
print("-" * 80)
|
||||
|
||||
for i, result in enumerate(results[:max_results]):
|
||||
doc = result["document"]
|
||||
print(f"{i+1}. {doc['title']}")
|
||||
print(f" 文件: {doc['filename']}")
|
||||
print(f" 类别: {doc['category']}")
|
||||
print(f" 大小: {round(doc['size']/1024, 1)} KB")
|
||||
print(f" 修改: {doc['modified'][:10]}")
|
||||
|
||||
if "matches" in result:
|
||||
print(f" 匹配数: {result['match_count']}")
|
||||
if result["match_count"] > 0:
|
||||
match = result["matches"][0]
|
||||
print(f" 示例匹配: 第{match['start_line']}行 - {match['match_text'][:50]}...")
|
||||
|
||||
print()
|
||||
|
||||
def interactive_search(self):
|
||||
"""交互式搜索界面"""
|
||||
print("=== 胡汉三千年项目文档搜索工具 ===")
|
||||
print("输入 'quit' 退出搜索")
|
||||
print("输入 'help' 查看帮助")
|
||||
print("-" * 50)
|
||||
|
||||
while True:
|
||||
try:
|
||||
query = input("\n搜索关键词: ").strip()
|
||||
|
||||
if query.lower() == 'quit':
|
||||
break
|
||||
elif query.lower() == 'help':
|
||||
self._print_help()
|
||||
continue
|
||||
elif not query:
|
||||
continue
|
||||
|
||||
# 解析搜索选项
|
||||
options = self._parse_search_options(query)
|
||||
|
||||
# 执行搜索
|
||||
if options["search_type"] == "content":
|
||||
results = self.search_by_keyword(
|
||||
options["keyword"],
|
||||
options["category"],
|
||||
options["case_sensitive"]
|
||||
)
|
||||
else:
|
||||
results = self.search_by_title(
|
||||
options["keyword"],
|
||||
options["category"]
|
||||
)
|
||||
|
||||
self.print_search_results(results, options["max_results"])
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n搜索已取消")
|
||||
break
|
||||
except Exception as e:
|
||||
print(f"搜索出错: {e}")
|
||||
|
||||
def _parse_search_options(self, query):
|
||||
"""解析搜索选项"""
|
||||
options = {
|
||||
"search_type": "content", # content 或 title
|
||||
"keyword": query,
|
||||
"category": None,
|
||||
"case_sensitive": False,
|
||||
"max_results": 10
|
||||
}
|
||||
|
||||
# 简单的选项解析
|
||||
if query.startswith("title:"):
|
||||
options["search_type"] = "title"
|
||||
options["keyword"] = query[6:].strip()
|
||||
elif query.startswith("cat:"):
|
||||
parts = query.split(" ")
|
||||
if len(parts) >= 2:
|
||||
options["category"] = parts[0][4:]
|
||||
options["keyword"] = " ".join(parts[1:])
|
||||
|
||||
return options
|
||||
|
||||
def _print_help(self):
|
||||
"""打印帮助信息"""
|
||||
print("\n搜索语法:")
|
||||
print(" 普通搜索: 关键词")
|
||||
print(" 标题搜索: title:关键词")
|
||||
print(" 类别搜索: cat:类别名 关键词")
|
||||
print("\n可用类别:")
|
||||
|
||||
stats = self.get_category_stats()
|
||||
for category, info in stats.items():
|
||||
print(f" {category}: {info.get('count', 0)} 个文档")
|
||||
|
||||
print("\n示例:")
|
||||
print(" 搜索音韵相关内容: 音韵")
|
||||
print(" 搜索标题包含'蒙古'的文档: title:蒙古")
|
||||
print(" 在核心理论中搜索'方法论': cat:01-core-theory 方法论")
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
import sys
|
||||
|
||||
searcher = DocumentSearcher()
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
# 命令行模式
|
||||
query = " ".join(sys.argv[1:])
|
||||
options = searcher._parse_search_options(query)
|
||||
|
||||
if options["search_type"] == "content":
|
||||
results = searcher.search_by_keyword(
|
||||
options["keyword"],
|
||||
options["category"],
|
||||
options["case_sensitive"]
|
||||
)
|
||||
else:
|
||||
results = searcher.search_by_title(
|
||||
options["keyword"],
|
||||
options["category"]
|
||||
)
|
||||
|
||||
searcher.print_search_results(results, options["max_results"])
|
||||
else:
|
||||
# 交互式模式
|
||||
searcher.interactive_search()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
345
methodology-and-tools/tools/version-manager.py
Normal file
345
methodology-and-tools/tools/version-manager.py
Normal file
@@ -0,0 +1,345 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
胡汉三千年项目文档版本管理工具
|
||||
|
||||
功能:
|
||||
1. 文档版本控制
|
||||
2. 变更记录管理
|
||||
3. 版本比较和恢复
|
||||
4. 变更统计
|
||||
|
||||
作者:胡汉三千年项目团队
|
||||
版本:1.0.0
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import hashlib
|
||||
import datetime
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
class VersionManager:
|
||||
def __init__(self, base_path="/home/ben/code/huhan3000/unified-docs"):
|
||||
self.base_path = Path(base_path)
|
||||
self.versions_dir = self.base_path / ".versions"
|
||||
self.version_db = self.versions_dir / "version-db.json"
|
||||
|
||||
# 初始化版本目录
|
||||
self.versions_dir.mkdir(exist_ok=True)
|
||||
|
||||
# 加载版本数据库
|
||||
self.db = self._load_version_db()
|
||||
|
||||
def _load_version_db(self):
|
||||
"""加载版本数据库"""
|
||||
if self.version_db.exists():
|
||||
with open(self.version_db, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
else:
|
||||
return {
|
||||
"metadata": {
|
||||
"created_at": datetime.datetime.now().isoformat(),
|
||||
"last_updated": datetime.datetime.now().isoformat(),
|
||||
"tool_version": "1.0.0"
|
||||
},
|
||||
"documents": {},
|
||||
"statistics": {
|
||||
"total_versions": 0,
|
||||
"total_documents": 0,
|
||||
"total_changes": 0
|
||||
}
|
||||
}
|
||||
|
||||
def _save_version_db(self):
|
||||
"""保存版本数据库"""
|
||||
self.db["metadata"]["last_updated"] = datetime.datetime.now().isoformat()
|
||||
|
||||
with open(self.version_db, 'w', encoding='utf-8') as f:
|
||||
json.dump(self.db, f, ensure_ascii=False, indent=2)
|
||||
|
||||
def _calculate_file_hash(self, file_path):
|
||||
"""计算文件内容的哈希值"""
|
||||
hash_md5 = hashlib.md5()
|
||||
with open(file_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(4096), b""):
|
||||
hash_md5.update(chunk)
|
||||
return hash_md5.hexdigest()
|
||||
|
||||
def create_version(self, document_path, comment=""):
|
||||
"""为文档创建新版本"""
|
||||
doc_path = Path(document_path)
|
||||
if not doc_path.exists():
|
||||
print(f"错误:文档不存在 {doc_path}")
|
||||
return False
|
||||
|
||||
# 计算文件哈希
|
||||
file_hash = self._calculate_file_hash(doc_path)
|
||||
|
||||
# 获取文档信息
|
||||
stat = doc_path.stat()
|
||||
doc_info = {
|
||||
"path": str(doc_path.relative_to(self.base_path)),
|
||||
"size": stat.st_size,
|
||||
"modified": datetime.datetime.fromtimestamp(stat.st_mtime).isoformat(),
|
||||
"hash": file_hash
|
||||
}
|
||||
|
||||
# 检查文档是否已存在版本记录
|
||||
doc_key = str(doc_path.relative_to(self.base_path))
|
||||
if doc_key not in self.db["documents"]:
|
||||
self.db["documents"][doc_key] = {
|
||||
"versions": [],
|
||||
"created_at": datetime.datetime.now().isoformat(),
|
||||
"total_versions": 0
|
||||
}
|
||||
|
||||
# 检查是否需要创建新版本(内容是否改变)
|
||||
current_versions = self.db["documents"][doc_key]["versions"]
|
||||
if current_versions:
|
||||
last_version = current_versions[-1]
|
||||
if last_version["hash"] == file_hash:
|
||||
print(f"文档 {doc_key} 内容未改变,跳过版本创建")
|
||||
return False
|
||||
|
||||
# 创建版本目录
|
||||
version_id = f"v{len(current_versions) + 1:04d}"
|
||||
version_dir = self.versions_dir / doc_key.replace('/', '_') / version_id
|
||||
version_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 保存版本文件
|
||||
version_file = version_dir / doc_path.name
|
||||
shutil.copy2(doc_path, version_file)
|
||||
|
||||
# 记录版本信息
|
||||
version_info = {
|
||||
"version_id": version_id,
|
||||
"timestamp": datetime.datetime.now().isoformat(),
|
||||
"comment": comment,
|
||||
"hash": file_hash,
|
||||
"size": stat.st_size,
|
||||
"file_path": str(version_file.relative_to(self.versions_dir))
|
||||
}
|
||||
|
||||
self.db["documents"][doc_key]["versions"].append(version_info)
|
||||
self.db["documents"][doc_key]["total_versions"] = len(current_versions) + 1
|
||||
self.db["documents"][doc_key]["last_updated"] = datetime.datetime.now().isoformat()
|
||||
|
||||
# 更新统计信息
|
||||
self.db["statistics"]["total_versions"] += 1
|
||||
self.db["statistics"]["total_documents"] = len(self.db["documents"])
|
||||
|
||||
if len(current_versions) > 0:
|
||||
self.db["statistics"]["total_changes"] += 1
|
||||
|
||||
self._save_version_db()
|
||||
print(f"已为文档 {doc_key} 创建版本 {version_id}")
|
||||
return True
|
||||
|
||||
def list_versions(self, document_path=None):
|
||||
"""列出文档版本"""
|
||||
if document_path:
|
||||
doc_key = str(Path(document_path).relative_to(self.base_path))
|
||||
if doc_key not in self.db["documents"]:
|
||||
print(f"文档 {doc_key} 没有版本记录")
|
||||
return []
|
||||
|
||||
return self.db["documents"][doc_key]["versions"]
|
||||
else:
|
||||
# 列出所有文档的版本信息
|
||||
all_versions = []
|
||||
for doc_key, doc_info in self.db["documents"].items():
|
||||
for version in doc_info["versions"]:
|
||||
version["document"] = doc_key
|
||||
all_versions.append(version)
|
||||
|
||||
# 按时间排序
|
||||
all_versions.sort(key=lambda x: x["timestamp"], reverse=True)
|
||||
return all_versions
|
||||
|
||||
def compare_versions(self, document_path, version1, version2):
|
||||
"""比较两个版本的差异"""
|
||||
doc_key = str(Path(document_path).relative_to(self.base_path))
|
||||
|
||||
if doc_key not in self.db["documents"]:
|
||||
print(f"文档 {doc_key} 没有版本记录")
|
||||
return None
|
||||
|
||||
versions = self.db["documents"][doc_key]["versions"]
|
||||
v1_info = next((v for v in versions if v["version_id"] == version1), None)
|
||||
v2_info = next((v for v in versions if v["version_id"] == version2), None)
|
||||
|
||||
if not v1_info or not v2_info:
|
||||
print(f"版本 {version1} 或 {version2} 不存在")
|
||||
return None
|
||||
|
||||
# 读取两个版本的内容
|
||||
v1_path = self.versions_dir / v1_info["file_path"]
|
||||
v2_path = self.versions_dir / v2_info["file_path"]
|
||||
|
||||
with open(v1_path, 'r', encoding='utf-8') as f:
|
||||
v1_content = f.read()
|
||||
|
||||
with open(v2_path, 'r', encoding='utf-8') as f:
|
||||
v2_content = f.read()
|
||||
|
||||
# 简单的差异比较
|
||||
diff_result = {
|
||||
"document": doc_key,
|
||||
"versions": [version1, version2],
|
||||
"size_change": v2_info["size"] - v1_info["size"],
|
||||
"hash_changed": v1_info["hash"] != v2_info["hash"],
|
||||
"line_count_change": len(v2_content.split('\n')) - len(v1_content.split('\n'))
|
||||
}
|
||||
|
||||
return diff_result
|
||||
|
||||
def restore_version(self, document_path, version_id):
|
||||
"""恢复文档到指定版本"""
|
||||
doc_key = str(Path(document_path).relative_to(self.base_path))
|
||||
|
||||
if doc_key not in self.db["documents"]:
|
||||
print(f"文档 {doc_key} 没有版本记录")
|
||||
return False
|
||||
|
||||
versions = self.db["documents"][doc_key]["versions"]
|
||||
version_info = next((v for v in versions if v["version_id"] == version_id), None)
|
||||
|
||||
if not version_info:
|
||||
print(f"版本 {version_id} 不存在")
|
||||
return False
|
||||
|
||||
# 备份当前版本
|
||||
current_path = self.base_path / doc_key
|
||||
if current_path.exists():
|
||||
backup_path = current_path.with_suffix(current_path.suffix + ".backup")
|
||||
shutil.copy2(current_path, backup_path)
|
||||
print(f"当前版本已备份到 {backup_path}")
|
||||
|
||||
# 恢复指定版本
|
||||
version_path = self.versions_dir / version_info["file_path"]
|
||||
shutil.copy2(version_path, current_path)
|
||||
|
||||
print(f"文档 {doc_key} 已恢复到版本 {version_id}")
|
||||
return True
|
||||
|
||||
def get_statistics(self):
|
||||
"""获取版本管理统计信息"""
|
||||
return self.db["statistics"]
|
||||
|
||||
def print_statistics(self):
|
||||
"""打印统计信息"""
|
||||
stats = self.get_statistics()
|
||||
|
||||
print("=== 版本管理统计 ===")
|
||||
print(f"总文档数: {stats['total_documents']}")
|
||||
print(f"总版本数: {stats['total_versions']}")
|
||||
print(f"总变更次数: {stats['total_changes']}")
|
||||
|
||||
# 按文档统计
|
||||
print("\n=== 文档版本统计 ===")
|
||||
for doc_key, doc_info in self.db["documents"].items():
|
||||
print(f"{doc_key}: {doc_info['total_versions']} 个版本")
|
||||
|
||||
def batch_create_versions(self, directory_path, comment=""):
|
||||
"""批量创建文档版本"""
|
||||
dir_path = Path(directory_path)
|
||||
if not dir_path.exists():
|
||||
print(f"目录不存在: {dir_path}")
|
||||
return False
|
||||
|
||||
created_count = 0
|
||||
|
||||
# 扫描目录中的Markdown文件
|
||||
for md_file in dir_path.rglob("*.md"):
|
||||
if md_file.name == "README.md":
|
||||
continue
|
||||
|
||||
if self.create_version(md_file, comment):
|
||||
created_count += 1
|
||||
|
||||
print(f"批量创建完成,共创建 {created_count} 个新版本")
|
||||
return True
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
import sys
|
||||
|
||||
manager = VersionManager()
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("用法:")
|
||||
print(" python version-manager.py create <文档路径> [注释]")
|
||||
print(" python version-manager.py list [文档路径]")
|
||||
print(" python version-manager.py compare <文档路径> <版本1> <版本2>")
|
||||
print(" python version-manager.py restore <文档路径> <版本>")
|
||||
print(" python version-manager.py stats")
|
||||
print(" python version-manager.py batch <目录路径> [注释]")
|
||||
return
|
||||
|
||||
command = sys.argv[1]
|
||||
|
||||
if command == "create":
|
||||
if len(sys.argv) < 3:
|
||||
print("错误:需要指定文档路径")
|
||||
return
|
||||
|
||||
doc_path = sys.argv[2]
|
||||
comment = sys.argv[3] if len(sys.argv) > 3 else ""
|
||||
manager.create_version(doc_path, comment)
|
||||
|
||||
elif command == "list":
|
||||
doc_path = sys.argv[2] if len(sys.argv) > 2 else None
|
||||
versions = manager.list_versions(doc_path)
|
||||
|
||||
if versions:
|
||||
print(f"找到 {len(versions)} 个版本:")
|
||||
for version in versions:
|
||||
doc = version.get("document", "当前文档")
|
||||
print(f" {version['version_id']} - {version['timestamp'][:19]} - {version['comment']} ({doc})")
|
||||
else:
|
||||
print("没有找到版本记录")
|
||||
|
||||
elif command == "compare":
|
||||
if len(sys.argv) < 5:
|
||||
print("错误:需要指定文档路径和两个版本号")
|
||||
return
|
||||
|
||||
doc_path = sys.argv[2]
|
||||
version1 = sys.argv[3]
|
||||
version2 = sys.argv[4]
|
||||
|
||||
diff = manager.compare_versions(doc_path, version1, version2)
|
||||
if diff:
|
||||
print(f"版本比较结果 ({version1} -> {version2}):")
|
||||
print(f" 大小变化: {diff['size_change']} 字节")
|
||||
print(f" 哈希变化: {'是' if diff['hash_changed'] else '否'}")
|
||||
print(f" 行数变化: {diff['line_count_change']} 行")
|
||||
|
||||
elif command == "restore":
|
||||
if len(sys.argv) < 4:
|
||||
print("错误:需要指定文档路径和版本号")
|
||||
return
|
||||
|
||||
doc_path = sys.argv[2]
|
||||
version_id = sys.argv[3]
|
||||
manager.restore_version(doc_path, version_id)
|
||||
|
||||
elif command == "stats":
|
||||
manager.print_statistics()
|
||||
|
||||
elif command == "batch":
|
||||
if len(sys.argv) < 3:
|
||||
print("错误:需要指定目录路径")
|
||||
return
|
||||
|
||||
dir_path = sys.argv[2]
|
||||
comment = sys.argv[3] if len(sys.argv) > 3 else "批量创建版本"
|
||||
manager.batch_create_versions(dir_path, comment)
|
||||
|
||||
else:
|
||||
print(f"未知命令: {command}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user