import glob import frontmatter import datetime import argparse from pathlib import Path # --- Configuration --- # Directories to exclude from scanning EXCLUDE_DIRS = ['venv', 'node_modules', '.git'] # Default metadata template for the --fix option DEFAULT_METADATA_TEMPLATE = { 'title': "Default Title", 'status': "spring", 'owner': "TBD", 'created': datetime.date.today().strftime('%Y-%m-%d'), 'review_by': (datetime.date.today() + datetime.timedelta(days=180)).strftime('%Y-%m-%d'), 'tags': ["untagged"] } def get_project_files(project_root): """Get all markdown files, respecting exclusions.""" all_files = project_root.rglob('*.md') filtered_files = [] for file_path in all_files: if not any(excluded_dir in file_path.parts for excluded_dir in EXCLUDE_DIRS): filtered_files.append(str(file_path)) return filtered_files def add_default_frontmatter(file_path): """Adds a default YAML front matter block to a file that lacks one.""" try: with open(file_path, 'r+', encoding='utf-8') as f: content = f.read() f.seek(0, 0) # Create a new post object with default metadata and existing content new_post = frontmatter.Post(content, **DEFAULT_METADATA_TEMPLATE) # Write the serialized post (metadata + content) back to the file f.write(frontmatter.dumps(new_post)) print(f"[FIXED] {file_path}: Added default front matter.") return True except Exception as e: print(f"[CRITICAL] {file_path}: Could not apply fix. Error: {e}") return False def validate_doc_lifecycle(fix_missing=False): """ Scans and validates markdown files, with an option to fix files missing front matter. """ project_root = Path(__file__).parent.parent markdown_files = get_project_files(project_root) print(f"Scanning {len(markdown_files)} Markdown files (vendor directories excluded)...") all_docs = [] errors = [] warnings = [] fixed_count = 0 for md_file in markdown_files: try: post = frontmatter.load(md_file) metadata = post.metadata if not metadata: if fix_missing: if add_default_frontmatter(md_file): fixed_count += 1 else: warnings.append(f"[SKIPPED] {md_file}: No YAML front matter found. Use --fix to add a template.") continue doc_info = {'path': md_file} required_fields = ['title', 'status', 'owner', 'created', 'review_by'] missing_fields = [field for field in required_fields if field not in metadata] if missing_fields: errors.append(f"[ERROR] {md_file}: Missing required fields: {', '.join(missing_fields)}") continue doc_info.update(metadata) allowed_statuses = ['spring', 'summer', 'autumn', 'winter'] if metadata.get('status') not in allowed_statuses: errors.append(f"[ERROR] {md_file}: Invalid status '{metadata.get('status')}'. Must be one of {allowed_statuses}") review_by_date = metadata.get('review_by') if review_by_date: if isinstance(review_by_date, str): review_by_date = datetime.datetime.strptime(review_by_date, '%Y-%m-%d').date() if review_by_date < datetime.date.today(): warnings.append(f"[WARNING] {md_file}: Review date ({review_by_date}) has passed.") all_docs.append(doc_info) except Exception as e: errors.append(f"[CRITICAL] {md_file}: Could not parse file. Error: {e}") print("\n--- Validation Report ---") if not errors and not warnings: print("✅ All documents with front matter are valid and up-to-date.") if warnings: print("\n⚠️ Warnings:") for warning in warnings: print(warning) if errors: print("\n❌ Errors:") for error in errors: print(error) print(f"\n--- Summary ---") print(f"Total files scanned: {len(markdown_files)}") print(f"Files with valid front matter: {len(all_docs)}") if fix_missing: print(f"Files automatically fixed: {fixed_count}") print(f"Warnings: {len(warnings)}") print(f"Errors: {len(errors)}") return len(errors) == 0 if __name__ == "__main__": parser = argparse.ArgumentParser(description="Validate and manage the lifecycle of Markdown documents.") parser.add_argument( '--fix', action='store_true', help="Automatically add a default front matter template to any document that is missing one." ) args = parser.parse_args() is_valid = validate_doc_lifecycle(fix_missing=args.fix) if not is_valid: exit(1)