huhan3000/unified-docs/tools/version-manager.py

345 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
胡汉三千年项目文档版本管理工具
功能:
1. 文档版本控制
2. 变更记录管理
3. 版本比较和恢复
4. 变更统计
作者:胡汉三千年项目团队
版本1.0.0
"""
import os
import json
import hashlib
import datetime
import shutil
from pathlib import Path
class VersionManager:
def __init__(self, base_path="/home/ben/code/huhan3000/unified-docs"):
self.base_path = Path(base_path)
self.versions_dir = self.base_path / ".versions"
self.version_db = self.versions_dir / "version-db.json"
# 初始化版本目录
self.versions_dir.mkdir(exist_ok=True)
# 加载版本数据库
self.db = self._load_version_db()
def _load_version_db(self):
"""加载版本数据库"""
if self.version_db.exists():
with open(self.version_db, 'r', encoding='utf-8') as f:
return json.load(f)
else:
return {
"metadata": {
"created_at": datetime.datetime.now().isoformat(),
"last_updated": datetime.datetime.now().isoformat(),
"tool_version": "1.0.0"
},
"documents": {},
"statistics": {
"total_versions": 0,
"total_documents": 0,
"total_changes": 0
}
}
def _save_version_db(self):
"""保存版本数据库"""
self.db["metadata"]["last_updated"] = datetime.datetime.now().isoformat()
with open(self.version_db, 'w', encoding='utf-8') as f:
json.dump(self.db, f, ensure_ascii=False, indent=2)
def _calculate_file_hash(self, file_path):
"""计算文件内容的哈希值"""
hash_md5 = hashlib.md5()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def create_version(self, document_path, comment=""):
"""为文档创建新版本"""
doc_path = Path(document_path)
if not doc_path.exists():
print(f"错误:文档不存在 {doc_path}")
return False
# 计算文件哈希
file_hash = self._calculate_file_hash(doc_path)
# 获取文档信息
stat = doc_path.stat()
doc_info = {
"path": str(doc_path.relative_to(self.base_path)),
"size": stat.st_size,
"modified": datetime.datetime.fromtimestamp(stat.st_mtime).isoformat(),
"hash": file_hash
}
# 检查文档是否已存在版本记录
doc_key = str(doc_path.relative_to(self.base_path))
if doc_key not in self.db["documents"]:
self.db["documents"][doc_key] = {
"versions": [],
"created_at": datetime.datetime.now().isoformat(),
"total_versions": 0
}
# 检查是否需要创建新版本(内容是否改变)
current_versions = self.db["documents"][doc_key]["versions"]
if current_versions:
last_version = current_versions[-1]
if last_version["hash"] == file_hash:
print(f"文档 {doc_key} 内容未改变,跳过版本创建")
return False
# 创建版本目录
version_id = f"v{len(current_versions) + 1:04d}"
version_dir = self.versions_dir / doc_key.replace('/', '_') / version_id
version_dir.mkdir(parents=True, exist_ok=True)
# 保存版本文件
version_file = version_dir / doc_path.name
shutil.copy2(doc_path, version_file)
# 记录版本信息
version_info = {
"version_id": version_id,
"timestamp": datetime.datetime.now().isoformat(),
"comment": comment,
"hash": file_hash,
"size": stat.st_size,
"file_path": str(version_file.relative_to(self.versions_dir))
}
self.db["documents"][doc_key]["versions"].append(version_info)
self.db["documents"][doc_key]["total_versions"] = len(current_versions) + 1
self.db["documents"][doc_key]["last_updated"] = datetime.datetime.now().isoformat()
# 更新统计信息
self.db["statistics"]["total_versions"] += 1
self.db["statistics"]["total_documents"] = len(self.db["documents"])
if len(current_versions) > 0:
self.db["statistics"]["total_changes"] += 1
self._save_version_db()
print(f"已为文档 {doc_key} 创建版本 {version_id}")
return True
def list_versions(self, document_path=None):
"""列出文档版本"""
if document_path:
doc_key = str(Path(document_path).relative_to(self.base_path))
if doc_key not in self.db["documents"]:
print(f"文档 {doc_key} 没有版本记录")
return []
return self.db["documents"][doc_key]["versions"]
else:
# 列出所有文档的版本信息
all_versions = []
for doc_key, doc_info in self.db["documents"].items():
for version in doc_info["versions"]:
version["document"] = doc_key
all_versions.append(version)
# 按时间排序
all_versions.sort(key=lambda x: x["timestamp"], reverse=True)
return all_versions
def compare_versions(self, document_path, version1, version2):
"""比较两个版本的差异"""
doc_key = str(Path(document_path).relative_to(self.base_path))
if doc_key not in self.db["documents"]:
print(f"文档 {doc_key} 没有版本记录")
return None
versions = self.db["documents"][doc_key]["versions"]
v1_info = next((v for v in versions if v["version_id"] == version1), None)
v2_info = next((v for v in versions if v["version_id"] == version2), None)
if not v1_info or not v2_info:
print(f"版本 {version1}{version2} 不存在")
return None
# 读取两个版本的内容
v1_path = self.versions_dir / v1_info["file_path"]
v2_path = self.versions_dir / v2_info["file_path"]
with open(v1_path, 'r', encoding='utf-8') as f:
v1_content = f.read()
with open(v2_path, 'r', encoding='utf-8') as f:
v2_content = f.read()
# 简单的差异比较
diff_result = {
"document": doc_key,
"versions": [version1, version2],
"size_change": v2_info["size"] - v1_info["size"],
"hash_changed": v1_info["hash"] != v2_info["hash"],
"line_count_change": len(v2_content.split('\n')) - len(v1_content.split('\n'))
}
return diff_result
def restore_version(self, document_path, version_id):
"""恢复文档到指定版本"""
doc_key = str(Path(document_path).relative_to(self.base_path))
if doc_key not in self.db["documents"]:
print(f"文档 {doc_key} 没有版本记录")
return False
versions = self.db["documents"][doc_key]["versions"]
version_info = next((v for v in versions if v["version_id"] == version_id), None)
if not version_info:
print(f"版本 {version_id} 不存在")
return False
# 备份当前版本
current_path = self.base_path / doc_key
if current_path.exists():
backup_path = current_path.with_suffix(current_path.suffix + ".backup")
shutil.copy2(current_path, backup_path)
print(f"当前版本已备份到 {backup_path}")
# 恢复指定版本
version_path = self.versions_dir / version_info["file_path"]
shutil.copy2(version_path, current_path)
print(f"文档 {doc_key} 已恢复到版本 {version_id}")
return True
def get_statistics(self):
"""获取版本管理统计信息"""
return self.db["statistics"]
def print_statistics(self):
"""打印统计信息"""
stats = self.get_statistics()
print("=== 版本管理统计 ===")
print(f"总文档数: {stats['total_documents']}")
print(f"总版本数: {stats['total_versions']}")
print(f"总变更次数: {stats['total_changes']}")
# 按文档统计
print("\n=== 文档版本统计 ===")
for doc_key, doc_info in self.db["documents"].items():
print(f"{doc_key}: {doc_info['total_versions']} 个版本")
def batch_create_versions(self, directory_path, comment=""):
"""批量创建文档版本"""
dir_path = Path(directory_path)
if not dir_path.exists():
print(f"目录不存在: {dir_path}")
return False
created_count = 0
# 扫描目录中的Markdown文件
for md_file in dir_path.rglob("*.md"):
if md_file.name == "README.md":
continue
if self.create_version(md_file, comment):
created_count += 1
print(f"批量创建完成,共创建 {created_count} 个新版本")
return True
def main():
"""主函数"""
import sys
manager = VersionManager()
if len(sys.argv) < 2:
print("用法:")
print(" python version-manager.py create <文档路径> [注释]")
print(" python version-manager.py list [文档路径]")
print(" python version-manager.py compare <文档路径> <版本1> <版本2>")
print(" python version-manager.py restore <文档路径> <版本>")
print(" python version-manager.py stats")
print(" python version-manager.py batch <目录路径> [注释]")
return
command = sys.argv[1]
if command == "create":
if len(sys.argv) < 3:
print("错误:需要指定文档路径")
return
doc_path = sys.argv[2]
comment = sys.argv[3] if len(sys.argv) > 3 else ""
manager.create_version(doc_path, comment)
elif command == "list":
doc_path = sys.argv[2] if len(sys.argv) > 2 else None
versions = manager.list_versions(doc_path)
if versions:
print(f"找到 {len(versions)} 个版本:")
for version in versions:
doc = version.get("document", "当前文档")
print(f" {version['version_id']} - {version['timestamp'][:19]} - {version['comment']} ({doc})")
else:
print("没有找到版本记录")
elif command == "compare":
if len(sys.argv) < 5:
print("错误:需要指定文档路径和两个版本号")
return
doc_path = sys.argv[2]
version1 = sys.argv[3]
version2 = sys.argv[4]
diff = manager.compare_versions(doc_path, version1, version2)
if diff:
print(f"版本比较结果 ({version1} -> {version2}):")
print(f" 大小变化: {diff['size_change']} 字节")
print(f" 哈希变化: {'' if diff['hash_changed'] else ''}")
print(f" 行数变化: {diff['line_count_change']}")
elif command == "restore":
if len(sys.argv) < 4:
print("错误:需要指定文档路径和版本号")
return
doc_path = sys.argv[2]
version_id = sys.argv[3]
manager.restore_version(doc_path, version_id)
elif command == "stats":
manager.print_statistics()
elif command == "batch":
if len(sys.argv) < 3:
print("错误:需要指定目录路径")
return
dir_path = sys.argv[2]
comment = sys.argv[3] if len(sys.argv) > 3 else "批量创建版本"
manager.batch_create_versions(dir_path, comment)
else:
print(f"未知命令: {command}")
if __name__ == "__main__":
main()