137 lines
4.9 KiB
Python
137 lines
4.9 KiB
Python
import glob
|
|
import frontmatter
|
|
import datetime
|
|
import argparse
|
|
from pathlib import Path
|
|
|
|
# --- Configuration ---
|
|
# Directories to exclude from scanning
|
|
EXCLUDE_DIRS = ['venv', 'node_modules', '.git']
|
|
# Default metadata template for the --fix option
|
|
DEFAULT_METADATA_TEMPLATE = {
|
|
'title': "Default Title",
|
|
'status': "spring",
|
|
'owner': "TBD",
|
|
'created': datetime.date.today().strftime('%Y-%m-%d'),
|
|
'review_by': (datetime.date.today() + datetime.timedelta(days=180)).strftime('%Y-%m-%d'),
|
|
'tags': ["untagged"]
|
|
}
|
|
|
|
def get_project_files(project_root):
|
|
"""Get all markdown files, respecting exclusions."""
|
|
all_files = project_root.rglob('*.md')
|
|
filtered_files = []
|
|
for file_path in all_files:
|
|
if not any(excluded_dir in file_path.parts for excluded_dir in EXCLUDE_DIRS):
|
|
filtered_files.append(str(file_path))
|
|
return filtered_files
|
|
|
|
def add_default_frontmatter(file_path):
|
|
"""Adds a default YAML front matter block to a file that lacks one."""
|
|
try:
|
|
with open(file_path, 'r+', encoding='utf-8') as f:
|
|
content = f.read()
|
|
f.seek(0, 0)
|
|
|
|
# Create a new post object with default metadata and existing content
|
|
new_post = frontmatter.Post(content, **DEFAULT_METADATA_TEMPLATE)
|
|
|
|
# Write the serialized post (metadata + content) back to the file
|
|
f.write(frontmatter.dumps(new_post))
|
|
print(f"[FIXED] {file_path}: Added default front matter.")
|
|
return True
|
|
except Exception as e:
|
|
print(f"[CRITICAL] {file_path}: Could not apply fix. Error: {e}")
|
|
return False
|
|
|
|
def validate_doc_lifecycle(fix_missing=False):
|
|
"""
|
|
Scans and validates markdown files, with an option to fix files missing front matter.
|
|
"""
|
|
project_root = Path(__file__).parent.parent
|
|
markdown_files = get_project_files(project_root)
|
|
|
|
print(f"Scanning {len(markdown_files)} Markdown files (vendor directories excluded)...")
|
|
|
|
all_docs = []
|
|
errors = []
|
|
warnings = []
|
|
fixed_count = 0
|
|
|
|
for md_file in markdown_files:
|
|
try:
|
|
post = frontmatter.load(md_file)
|
|
metadata = post.metadata
|
|
|
|
if not metadata:
|
|
if fix_missing:
|
|
if add_default_frontmatter(md_file):
|
|
fixed_count += 1
|
|
else:
|
|
warnings.append(f"[SKIPPED] {md_file}: No YAML front matter found. Use --fix to add a template.")
|
|
continue
|
|
|
|
doc_info = {'path': md_file}
|
|
|
|
required_fields = ['title', 'status', 'owner', 'created', 'review_by']
|
|
missing_fields = [field for field in required_fields if field not in metadata]
|
|
if missing_fields:
|
|
errors.append(f"[ERROR] {md_file}: Missing required fields: {', '.join(missing_fields)}")
|
|
continue
|
|
|
|
doc_info.update(metadata)
|
|
|
|
allowed_statuses = ['spring', 'summer', 'autumn', 'winter']
|
|
if metadata.get('status') not in allowed_statuses:
|
|
errors.append(f"[ERROR] {md_file}: Invalid status '{metadata.get('status')}'. Must be one of {allowed_statuses}")
|
|
|
|
review_by_date = metadata.get('review_by')
|
|
if review_by_date:
|
|
if isinstance(review_by_date, str):
|
|
review_by_date = datetime.datetime.strptime(review_by_date, '%Y-%m-%d').date()
|
|
|
|
if review_by_date < datetime.date.today():
|
|
warnings.append(f"[WARNING] {md_file}: Review date ({review_by_date}) has passed.")
|
|
|
|
all_docs.append(doc_info)
|
|
|
|
except Exception as e:
|
|
errors.append(f"[CRITICAL] {md_file}: Could not parse file. Error: {e}")
|
|
|
|
print("\n--- Validation Report ---")
|
|
|
|
if not errors and not warnings:
|
|
print("✅ All documents with front matter are valid and up-to-date.")
|
|
|
|
if warnings:
|
|
print("\n⚠️ Warnings:")
|
|
for warning in warnings:
|
|
print(warning)
|
|
|
|
if errors:
|
|
print("\n❌ Errors:")
|
|
for error in errors:
|
|
print(error)
|
|
|
|
print(f"\n--- Summary ---")
|
|
print(f"Total files scanned: {len(markdown_files)}")
|
|
print(f"Files with valid front matter: {len(all_docs)}")
|
|
if fix_missing:
|
|
print(f"Files automatically fixed: {fixed_count}")
|
|
print(f"Warnings: {len(warnings)}")
|
|
print(f"Errors: {len(errors)}")
|
|
|
|
return len(errors) == 0
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Validate and manage the lifecycle of Markdown documents.")
|
|
parser.add_argument(
|
|
'--fix',
|
|
action='store_true',
|
|
help="Automatically add a default front matter template to any document that is missing one."
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
is_valid = validate_doc_lifecycle(fix_missing=args.fix)
|
|
if not is_valid:
|
|
exit(1) |