liurenchaxin/scripts/validate_doc_lifecycle.py

137 lines
4.9 KiB
Python

import glob
import frontmatter
import datetime
import argparse
from pathlib import Path
# --- Configuration ---
# Directories to exclude from scanning
EXCLUDE_DIRS = ['venv', 'node_modules', '.git']
# Default metadata template for the --fix option
DEFAULT_METADATA_TEMPLATE = {
'title': "Default Title",
'status': "spring",
'owner': "TBD",
'created': datetime.date.today().strftime('%Y-%m-%d'),
'review_by': (datetime.date.today() + datetime.timedelta(days=180)).strftime('%Y-%m-%d'),
'tags': ["untagged"]
}
def get_project_files(project_root):
"""Get all markdown files, respecting exclusions."""
all_files = project_root.rglob('*.md')
filtered_files = []
for file_path in all_files:
if not any(excluded_dir in file_path.parts for excluded_dir in EXCLUDE_DIRS):
filtered_files.append(str(file_path))
return filtered_files
def add_default_frontmatter(file_path):
"""Adds a default YAML front matter block to a file that lacks one."""
try:
with open(file_path, 'r+', encoding='utf-8') as f:
content = f.read()
f.seek(0, 0)
# Create a new post object with default metadata and existing content
new_post = frontmatter.Post(content, **DEFAULT_METADATA_TEMPLATE)
# Write the serialized post (metadata + content) back to the file
f.write(frontmatter.dumps(new_post))
print(f"[FIXED] {file_path}: Added default front matter.")
return True
except Exception as e:
print(f"[CRITICAL] {file_path}: Could not apply fix. Error: {e}")
return False
def validate_doc_lifecycle(fix_missing=False):
"""
Scans and validates markdown files, with an option to fix files missing front matter.
"""
project_root = Path(__file__).parent.parent
markdown_files = get_project_files(project_root)
print(f"Scanning {len(markdown_files)} Markdown files (vendor directories excluded)...")
all_docs = []
errors = []
warnings = []
fixed_count = 0
for md_file in markdown_files:
try:
post = frontmatter.load(md_file)
metadata = post.metadata
if not metadata:
if fix_missing:
if add_default_frontmatter(md_file):
fixed_count += 1
else:
warnings.append(f"[SKIPPED] {md_file}: No YAML front matter found. Use --fix to add a template.")
continue
doc_info = {'path': md_file}
required_fields = ['title', 'status', 'owner', 'created', 'review_by']
missing_fields = [field for field in required_fields if field not in metadata]
if missing_fields:
errors.append(f"[ERROR] {md_file}: Missing required fields: {', '.join(missing_fields)}")
continue
doc_info.update(metadata)
allowed_statuses = ['spring', 'summer', 'autumn', 'winter']
if metadata.get('status') not in allowed_statuses:
errors.append(f"[ERROR] {md_file}: Invalid status '{metadata.get('status')}'. Must be one of {allowed_statuses}")
review_by_date = metadata.get('review_by')
if review_by_date:
if isinstance(review_by_date, str):
review_by_date = datetime.datetime.strptime(review_by_date, '%Y-%m-%d').date()
if review_by_date < datetime.date.today():
warnings.append(f"[WARNING] {md_file}: Review date ({review_by_date}) has passed.")
all_docs.append(doc_info)
except Exception as e:
errors.append(f"[CRITICAL] {md_file}: Could not parse file. Error: {e}")
print("\n--- Validation Report ---")
if not errors and not warnings:
print("✅ All documents with front matter are valid and up-to-date.")
if warnings:
print("\n⚠️ Warnings:")
for warning in warnings:
print(warning)
if errors:
print("\n❌ Errors:")
for error in errors:
print(error)
print(f"\n--- Summary ---")
print(f"Total files scanned: {len(markdown_files)}")
print(f"Files with valid front matter: {len(all_docs)}")
if fix_missing:
print(f"Files automatically fixed: {fixed_count}")
print(f"Warnings: {len(warnings)}")
print(f"Errors: {len(errors)}")
return len(errors) == 0
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Validate and manage the lifecycle of Markdown documents.")
parser.add_argument(
'--fix',
action='store_true',
help="Automatically add a default front matter template to any document that is missing one."
)
args = parser.parse_args()
is_valid = validate_doc_lifecycle(fix_missing=args.fix)
if not is_valid:
exit(1)