345 lines
12 KiB
Python
345 lines
12 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
胡汉三千年项目文档版本管理工具
|
||
|
||
功能:
|
||
1. 文档版本控制
|
||
2. 变更记录管理
|
||
3. 版本比较和恢复
|
||
4. 变更统计
|
||
|
||
作者:胡汉三千年项目团队
|
||
版本:1.0.0
|
||
"""
|
||
|
||
import os
|
||
import json
|
||
import hashlib
|
||
import datetime
|
||
import shutil
|
||
from pathlib import Path
|
||
|
||
class VersionManager:
|
||
def __init__(self, base_path="/home/ben/code/huhan3000/unified-docs"):
|
||
self.base_path = Path(base_path)
|
||
self.versions_dir = self.base_path / ".versions"
|
||
self.version_db = self.versions_dir / "version-db.json"
|
||
|
||
# 初始化版本目录
|
||
self.versions_dir.mkdir(exist_ok=True)
|
||
|
||
# 加载版本数据库
|
||
self.db = self._load_version_db()
|
||
|
||
def _load_version_db(self):
|
||
"""加载版本数据库"""
|
||
if self.version_db.exists():
|
||
with open(self.version_db, 'r', encoding='utf-8') as f:
|
||
return json.load(f)
|
||
else:
|
||
return {
|
||
"metadata": {
|
||
"created_at": datetime.datetime.now().isoformat(),
|
||
"last_updated": datetime.datetime.now().isoformat(),
|
||
"tool_version": "1.0.0"
|
||
},
|
||
"documents": {},
|
||
"statistics": {
|
||
"total_versions": 0,
|
||
"total_documents": 0,
|
||
"total_changes": 0
|
||
}
|
||
}
|
||
|
||
def _save_version_db(self):
|
||
"""保存版本数据库"""
|
||
self.db["metadata"]["last_updated"] = datetime.datetime.now().isoformat()
|
||
|
||
with open(self.version_db, 'w', encoding='utf-8') as f:
|
||
json.dump(self.db, f, ensure_ascii=False, indent=2)
|
||
|
||
def _calculate_file_hash(self, file_path):
|
||
"""计算文件内容的哈希值"""
|
||
hash_md5 = hashlib.md5()
|
||
with open(file_path, "rb") as f:
|
||
for chunk in iter(lambda: f.read(4096), b""):
|
||
hash_md5.update(chunk)
|
||
return hash_md5.hexdigest()
|
||
|
||
def create_version(self, document_path, comment=""):
|
||
"""为文档创建新版本"""
|
||
doc_path = Path(document_path)
|
||
if not doc_path.exists():
|
||
print(f"错误:文档不存在 {doc_path}")
|
||
return False
|
||
|
||
# 计算文件哈希
|
||
file_hash = self._calculate_file_hash(doc_path)
|
||
|
||
# 获取文档信息
|
||
stat = doc_path.stat()
|
||
doc_info = {
|
||
"path": str(doc_path.relative_to(self.base_path)),
|
||
"size": stat.st_size,
|
||
"modified": datetime.datetime.fromtimestamp(stat.st_mtime).isoformat(),
|
||
"hash": file_hash
|
||
}
|
||
|
||
# 检查文档是否已存在版本记录
|
||
doc_key = str(doc_path.relative_to(self.base_path))
|
||
if doc_key not in self.db["documents"]:
|
||
self.db["documents"][doc_key] = {
|
||
"versions": [],
|
||
"created_at": datetime.datetime.now().isoformat(),
|
||
"total_versions": 0
|
||
}
|
||
|
||
# 检查是否需要创建新版本(内容是否改变)
|
||
current_versions = self.db["documents"][doc_key]["versions"]
|
||
if current_versions:
|
||
last_version = current_versions[-1]
|
||
if last_version["hash"] == file_hash:
|
||
print(f"文档 {doc_key} 内容未改变,跳过版本创建")
|
||
return False
|
||
|
||
# 创建版本目录
|
||
version_id = f"v{len(current_versions) + 1:04d}"
|
||
version_dir = self.versions_dir / doc_key.replace('/', '_') / version_id
|
||
version_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
# 保存版本文件
|
||
version_file = version_dir / doc_path.name
|
||
shutil.copy2(doc_path, version_file)
|
||
|
||
# 记录版本信息
|
||
version_info = {
|
||
"version_id": version_id,
|
||
"timestamp": datetime.datetime.now().isoformat(),
|
||
"comment": comment,
|
||
"hash": file_hash,
|
||
"size": stat.st_size,
|
||
"file_path": str(version_file.relative_to(self.versions_dir))
|
||
}
|
||
|
||
self.db["documents"][doc_key]["versions"].append(version_info)
|
||
self.db["documents"][doc_key]["total_versions"] = len(current_versions) + 1
|
||
self.db["documents"][doc_key]["last_updated"] = datetime.datetime.now().isoformat()
|
||
|
||
# 更新统计信息
|
||
self.db["statistics"]["total_versions"] += 1
|
||
self.db["statistics"]["total_documents"] = len(self.db["documents"])
|
||
|
||
if len(current_versions) > 0:
|
||
self.db["statistics"]["total_changes"] += 1
|
||
|
||
self._save_version_db()
|
||
print(f"已为文档 {doc_key} 创建版本 {version_id}")
|
||
return True
|
||
|
||
def list_versions(self, document_path=None):
|
||
"""列出文档版本"""
|
||
if document_path:
|
||
doc_key = str(Path(document_path).relative_to(self.base_path))
|
||
if doc_key not in self.db["documents"]:
|
||
print(f"文档 {doc_key} 没有版本记录")
|
||
return []
|
||
|
||
return self.db["documents"][doc_key]["versions"]
|
||
else:
|
||
# 列出所有文档的版本信息
|
||
all_versions = []
|
||
for doc_key, doc_info in self.db["documents"].items():
|
||
for version in doc_info["versions"]:
|
||
version["document"] = doc_key
|
||
all_versions.append(version)
|
||
|
||
# 按时间排序
|
||
all_versions.sort(key=lambda x: x["timestamp"], reverse=True)
|
||
return all_versions
|
||
|
||
def compare_versions(self, document_path, version1, version2):
|
||
"""比较两个版本的差异"""
|
||
doc_key = str(Path(document_path).relative_to(self.base_path))
|
||
|
||
if doc_key not in self.db["documents"]:
|
||
print(f"文档 {doc_key} 没有版本记录")
|
||
return None
|
||
|
||
versions = self.db["documents"][doc_key]["versions"]
|
||
v1_info = next((v for v in versions if v["version_id"] == version1), None)
|
||
v2_info = next((v for v in versions if v["version_id"] == version2), None)
|
||
|
||
if not v1_info or not v2_info:
|
||
print(f"版本 {version1} 或 {version2} 不存在")
|
||
return None
|
||
|
||
# 读取两个版本的内容
|
||
v1_path = self.versions_dir / v1_info["file_path"]
|
||
v2_path = self.versions_dir / v2_info["file_path"]
|
||
|
||
with open(v1_path, 'r', encoding='utf-8') as f:
|
||
v1_content = f.read()
|
||
|
||
with open(v2_path, 'r', encoding='utf-8') as f:
|
||
v2_content = f.read()
|
||
|
||
# 简单的差异比较
|
||
diff_result = {
|
||
"document": doc_key,
|
||
"versions": [version1, version2],
|
||
"size_change": v2_info["size"] - v1_info["size"],
|
||
"hash_changed": v1_info["hash"] != v2_info["hash"],
|
||
"line_count_change": len(v2_content.split('\n')) - len(v1_content.split('\n'))
|
||
}
|
||
|
||
return diff_result
|
||
|
||
def restore_version(self, document_path, version_id):
|
||
"""恢复文档到指定版本"""
|
||
doc_key = str(Path(document_path).relative_to(self.base_path))
|
||
|
||
if doc_key not in self.db["documents"]:
|
||
print(f"文档 {doc_key} 没有版本记录")
|
||
return False
|
||
|
||
versions = self.db["documents"][doc_key]["versions"]
|
||
version_info = next((v for v in versions if v["version_id"] == version_id), None)
|
||
|
||
if not version_info:
|
||
print(f"版本 {version_id} 不存在")
|
||
return False
|
||
|
||
# 备份当前版本
|
||
current_path = self.base_path / doc_key
|
||
if current_path.exists():
|
||
backup_path = current_path.with_suffix(current_path.suffix + ".backup")
|
||
shutil.copy2(current_path, backup_path)
|
||
print(f"当前版本已备份到 {backup_path}")
|
||
|
||
# 恢复指定版本
|
||
version_path = self.versions_dir / version_info["file_path"]
|
||
shutil.copy2(version_path, current_path)
|
||
|
||
print(f"文档 {doc_key} 已恢复到版本 {version_id}")
|
||
return True
|
||
|
||
def get_statistics(self):
|
||
"""获取版本管理统计信息"""
|
||
return self.db["statistics"]
|
||
|
||
def print_statistics(self):
|
||
"""打印统计信息"""
|
||
stats = self.get_statistics()
|
||
|
||
print("=== 版本管理统计 ===")
|
||
print(f"总文档数: {stats['total_documents']}")
|
||
print(f"总版本数: {stats['total_versions']}")
|
||
print(f"总变更次数: {stats['total_changes']}")
|
||
|
||
# 按文档统计
|
||
print("\n=== 文档版本统计 ===")
|
||
for doc_key, doc_info in self.db["documents"].items():
|
||
print(f"{doc_key}: {doc_info['total_versions']} 个版本")
|
||
|
||
def batch_create_versions(self, directory_path, comment=""):
|
||
"""批量创建文档版本"""
|
||
dir_path = Path(directory_path)
|
||
if not dir_path.exists():
|
||
print(f"目录不存在: {dir_path}")
|
||
return False
|
||
|
||
created_count = 0
|
||
|
||
# 扫描目录中的Markdown文件
|
||
for md_file in dir_path.rglob("*.md"):
|
||
if md_file.name == "README.md":
|
||
continue
|
||
|
||
if self.create_version(md_file, comment):
|
||
created_count += 1
|
||
|
||
print(f"批量创建完成,共创建 {created_count} 个新版本")
|
||
return True
|
||
|
||
def main():
|
||
"""主函数"""
|
||
import sys
|
||
|
||
manager = VersionManager()
|
||
|
||
if len(sys.argv) < 2:
|
||
print("用法:")
|
||
print(" python version-manager.py create <文档路径> [注释]")
|
||
print(" python version-manager.py list [文档路径]")
|
||
print(" python version-manager.py compare <文档路径> <版本1> <版本2>")
|
||
print(" python version-manager.py restore <文档路径> <版本>")
|
||
print(" python version-manager.py stats")
|
||
print(" python version-manager.py batch <目录路径> [注释]")
|
||
return
|
||
|
||
command = sys.argv[1]
|
||
|
||
if command == "create":
|
||
if len(sys.argv) < 3:
|
||
print("错误:需要指定文档路径")
|
||
return
|
||
|
||
doc_path = sys.argv[2]
|
||
comment = sys.argv[3] if len(sys.argv) > 3 else ""
|
||
manager.create_version(doc_path, comment)
|
||
|
||
elif command == "list":
|
||
doc_path = sys.argv[2] if len(sys.argv) > 2 else None
|
||
versions = manager.list_versions(doc_path)
|
||
|
||
if versions:
|
||
print(f"找到 {len(versions)} 个版本:")
|
||
for version in versions:
|
||
doc = version.get("document", "当前文档")
|
||
print(f" {version['version_id']} - {version['timestamp'][:19]} - {version['comment']} ({doc})")
|
||
else:
|
||
print("没有找到版本记录")
|
||
|
||
elif command == "compare":
|
||
if len(sys.argv) < 5:
|
||
print("错误:需要指定文档路径和两个版本号")
|
||
return
|
||
|
||
doc_path = sys.argv[2]
|
||
version1 = sys.argv[3]
|
||
version2 = sys.argv[4]
|
||
|
||
diff = manager.compare_versions(doc_path, version1, version2)
|
||
if diff:
|
||
print(f"版本比较结果 ({version1} -> {version2}):")
|
||
print(f" 大小变化: {diff['size_change']} 字节")
|
||
print(f" 哈希变化: {'是' if diff['hash_changed'] else '否'}")
|
||
print(f" 行数变化: {diff['line_count_change']} 行")
|
||
|
||
elif command == "restore":
|
||
if len(sys.argv) < 4:
|
||
print("错误:需要指定文档路径和版本号")
|
||
return
|
||
|
||
doc_path = sys.argv[2]
|
||
version_id = sys.argv[3]
|
||
manager.restore_version(doc_path, version_id)
|
||
|
||
elif command == "stats":
|
||
manager.print_statistics()
|
||
|
||
elif command == "batch":
|
||
if len(sys.argv) < 3:
|
||
print("错误:需要指定目录路径")
|
||
return
|
||
|
||
dir_path = sys.argv[2]
|
||
comment = sys.argv[3] if len(sys.argv) > 3 else "批量创建版本"
|
||
manager.batch_create_versions(dir_path, comment)
|
||
|
||
else:
|
||
print(f"未知命令: {command}")
|
||
|
||
if __name__ == "__main__":
|
||
main() |