#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 胡汉三千年项目文档版本管理工具 功能: 1. 文档版本控制 2. 变更记录管理 3. 版本比较和恢复 4. 变更统计 作者:胡汉三千年项目团队 版本:1.0.0 """ import os import json import hashlib import datetime import shutil from pathlib import Path class VersionManager: def __init__(self, base_path="/home/ben/code/huhan3000/unified-docs"): self.base_path = Path(base_path) self.versions_dir = self.base_path / ".versions" self.version_db = self.versions_dir / "version-db.json" # 初始化版本目录 self.versions_dir.mkdir(exist_ok=True) # 加载版本数据库 self.db = self._load_version_db() def _load_version_db(self): """加载版本数据库""" if self.version_db.exists(): with open(self.version_db, 'r', encoding='utf-8') as f: return json.load(f) else: return { "metadata": { "created_at": datetime.datetime.now().isoformat(), "last_updated": datetime.datetime.now().isoformat(), "tool_version": "1.0.0" }, "documents": {}, "statistics": { "total_versions": 0, "total_documents": 0, "total_changes": 0 } } def _save_version_db(self): """保存版本数据库""" self.db["metadata"]["last_updated"] = datetime.datetime.now().isoformat() with open(self.version_db, 'w', encoding='utf-8') as f: json.dump(self.db, f, ensure_ascii=False, indent=2) def _calculate_file_hash(self, file_path): """计算文件内容的哈希值""" hash_md5 = hashlib.md5() with open(file_path, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash_md5.update(chunk) return hash_md5.hexdigest() def create_version(self, document_path, comment=""): """为文档创建新版本""" doc_path = Path(document_path) if not doc_path.exists(): print(f"错误:文档不存在 {doc_path}") return False # 计算文件哈希 file_hash = self._calculate_file_hash(doc_path) # 获取文档信息 stat = doc_path.stat() doc_info = { "path": str(doc_path.relative_to(self.base_path)), "size": stat.st_size, "modified": datetime.datetime.fromtimestamp(stat.st_mtime).isoformat(), "hash": file_hash } # 检查文档是否已存在版本记录 doc_key = str(doc_path.relative_to(self.base_path)) if doc_key not in self.db["documents"]: self.db["documents"][doc_key] = { "versions": [], "created_at": datetime.datetime.now().isoformat(), "total_versions": 0 } # 检查是否需要创建新版本(内容是否改变) current_versions = self.db["documents"][doc_key]["versions"] if current_versions: last_version = current_versions[-1] if last_version["hash"] == file_hash: print(f"文档 {doc_key} 内容未改变,跳过版本创建") return False # 创建版本目录 version_id = f"v{len(current_versions) + 1:04d}" version_dir = self.versions_dir / doc_key.replace('/', '_') / version_id version_dir.mkdir(parents=True, exist_ok=True) # 保存版本文件 version_file = version_dir / doc_path.name shutil.copy2(doc_path, version_file) # 记录版本信息 version_info = { "version_id": version_id, "timestamp": datetime.datetime.now().isoformat(), "comment": comment, "hash": file_hash, "size": stat.st_size, "file_path": str(version_file.relative_to(self.versions_dir)) } self.db["documents"][doc_key]["versions"].append(version_info) self.db["documents"][doc_key]["total_versions"] = len(current_versions) + 1 self.db["documents"][doc_key]["last_updated"] = datetime.datetime.now().isoformat() # 更新统计信息 self.db["statistics"]["total_versions"] += 1 self.db["statistics"]["total_documents"] = len(self.db["documents"]) if len(current_versions) > 0: self.db["statistics"]["total_changes"] += 1 self._save_version_db() print(f"已为文档 {doc_key} 创建版本 {version_id}") return True def list_versions(self, document_path=None): """列出文档版本""" if document_path: doc_key = str(Path(document_path).relative_to(self.base_path)) if doc_key not in self.db["documents"]: print(f"文档 {doc_key} 没有版本记录") return [] return self.db["documents"][doc_key]["versions"] else: # 列出所有文档的版本信息 all_versions = [] for doc_key, doc_info in self.db["documents"].items(): for version in doc_info["versions"]: version["document"] = doc_key all_versions.append(version) # 按时间排序 all_versions.sort(key=lambda x: x["timestamp"], reverse=True) return all_versions def compare_versions(self, document_path, version1, version2): """比较两个版本的差异""" doc_key = str(Path(document_path).relative_to(self.base_path)) if doc_key not in self.db["documents"]: print(f"文档 {doc_key} 没有版本记录") return None versions = self.db["documents"][doc_key]["versions"] v1_info = next((v for v in versions if v["version_id"] == version1), None) v2_info = next((v for v in versions if v["version_id"] == version2), None) if not v1_info or not v2_info: print(f"版本 {version1} 或 {version2} 不存在") return None # 读取两个版本的内容 v1_path = self.versions_dir / v1_info["file_path"] v2_path = self.versions_dir / v2_info["file_path"] with open(v1_path, 'r', encoding='utf-8') as f: v1_content = f.read() with open(v2_path, 'r', encoding='utf-8') as f: v2_content = f.read() # 简单的差异比较 diff_result = { "document": doc_key, "versions": [version1, version2], "size_change": v2_info["size"] - v1_info["size"], "hash_changed": v1_info["hash"] != v2_info["hash"], "line_count_change": len(v2_content.split('\n')) - len(v1_content.split('\n')) } return diff_result def restore_version(self, document_path, version_id): """恢复文档到指定版本""" doc_key = str(Path(document_path).relative_to(self.base_path)) if doc_key not in self.db["documents"]: print(f"文档 {doc_key} 没有版本记录") return False versions = self.db["documents"][doc_key]["versions"] version_info = next((v for v in versions if v["version_id"] == version_id), None) if not version_info: print(f"版本 {version_id} 不存在") return False # 备份当前版本 current_path = self.base_path / doc_key if current_path.exists(): backup_path = current_path.with_suffix(current_path.suffix + ".backup") shutil.copy2(current_path, backup_path) print(f"当前版本已备份到 {backup_path}") # 恢复指定版本 version_path = self.versions_dir / version_info["file_path"] shutil.copy2(version_path, current_path) print(f"文档 {doc_key} 已恢复到版本 {version_id}") return True def get_statistics(self): """获取版本管理统计信息""" return self.db["statistics"] def print_statistics(self): """打印统计信息""" stats = self.get_statistics() print("=== 版本管理统计 ===") print(f"总文档数: {stats['total_documents']}") print(f"总版本数: {stats['total_versions']}") print(f"总变更次数: {stats['total_changes']}") # 按文档统计 print("\n=== 文档版本统计 ===") for doc_key, doc_info in self.db["documents"].items(): print(f"{doc_key}: {doc_info['total_versions']} 个版本") def batch_create_versions(self, directory_path, comment=""): """批量创建文档版本""" dir_path = Path(directory_path) if not dir_path.exists(): print(f"目录不存在: {dir_path}") return False created_count = 0 # 扫描目录中的Markdown文件 for md_file in dir_path.rglob("*.md"): if md_file.name == "README.md": continue if self.create_version(md_file, comment): created_count += 1 print(f"批量创建完成,共创建 {created_count} 个新版本") return True def main(): """主函数""" import sys manager = VersionManager() if len(sys.argv) < 2: print("用法:") print(" python version-manager.py create <文档路径> [注释]") print(" python version-manager.py list [文档路径]") print(" python version-manager.py compare <文档路径> <版本1> <版本2>") print(" python version-manager.py restore <文档路径> <版本>") print(" python version-manager.py stats") print(" python version-manager.py batch <目录路径> [注释]") return command = sys.argv[1] if command == "create": if len(sys.argv) < 3: print("错误:需要指定文档路径") return doc_path = sys.argv[2] comment = sys.argv[3] if len(sys.argv) > 3 else "" manager.create_version(doc_path, comment) elif command == "list": doc_path = sys.argv[2] if len(sys.argv) > 2 else None versions = manager.list_versions(doc_path) if versions: print(f"找到 {len(versions)} 个版本:") for version in versions: doc = version.get("document", "当前文档") print(f" {version['version_id']} - {version['timestamp'][:19]} - {version['comment']} ({doc})") else: print("没有找到版本记录") elif command == "compare": if len(sys.argv) < 5: print("错误:需要指定文档路径和两个版本号") return doc_path = sys.argv[2] version1 = sys.argv[3] version2 = sys.argv[4] diff = manager.compare_versions(doc_path, version1, version2) if diff: print(f"版本比较结果 ({version1} -> {version2}):") print(f" 大小变化: {diff['size_change']} 字节") print(f" 哈希变化: {'是' if diff['hash_changed'] else '否'}") print(f" 行数变化: {diff['line_count_change']} 行") elif command == "restore": if len(sys.argv) < 4: print("错误:需要指定文档路径和版本号") return doc_path = sys.argv[2] version_id = sys.argv[3] manager.restore_version(doc_path, version_id) elif command == "stats": manager.print_statistics() elif command == "batch": if len(sys.argv) < 3: print("错误:需要指定目录路径") return dir_path = sys.argv[2] comment = sys.argv[3] if len(sys.argv) > 3 else "批量创建版本" manager.batch_create_versions(dir_path, comment) else: print(f"未知命令: {command}") if __name__ == "__main__": main()