137 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			137 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			Python
		
	
	
	
| import glob
 | |
| import frontmatter
 | |
| import datetime
 | |
| import argparse
 | |
| from pathlib import Path
 | |
| 
 | |
| # --- Configuration ---
 | |
| # Directories to exclude from scanning
 | |
| EXCLUDE_DIRS = ['venv', 'node_modules', '.git']
 | |
| # Default metadata template for the --fix option
 | |
| DEFAULT_METADATA_TEMPLATE = {
 | |
|     'title': "Default Title",
 | |
|     'status': "spring",
 | |
|     'owner': "TBD",
 | |
|     'created': datetime.date.today().strftime('%Y-%m-%d'),
 | |
|     'review_by': (datetime.date.today() + datetime.timedelta(days=180)).strftime('%Y-%m-%d'),
 | |
|     'tags': ["untagged"]
 | |
| }
 | |
| 
 | |
| def get_project_files(project_root):
 | |
|     """Get all markdown files, respecting exclusions."""
 | |
|     all_files = project_root.rglob('*.md')
 | |
|     filtered_files = []
 | |
|     for file_path in all_files:
 | |
|         if not any(excluded_dir in file_path.parts for excluded_dir in EXCLUDE_DIRS):
 | |
|             filtered_files.append(str(file_path))
 | |
|     return filtered_files
 | |
| 
 | |
| def add_default_frontmatter(file_path):
 | |
|     """Adds a default YAML front matter block to a file that lacks one."""
 | |
|     try:
 | |
|         with open(file_path, 'r+', encoding='utf-8') as f:
 | |
|             content = f.read()
 | |
|             f.seek(0, 0)
 | |
|             
 | |
|             # Create a new post object with default metadata and existing content
 | |
|             new_post = frontmatter.Post(content, **DEFAULT_METADATA_TEMPLATE)
 | |
|             
 | |
|             # Write the serialized post (metadata + content) back to the file
 | |
|             f.write(frontmatter.dumps(new_post))
 | |
|         print(f"[FIXED] {file_path}: Added default front matter.")
 | |
|         return True
 | |
|     except Exception as e:
 | |
|         print(f"[CRITICAL] {file_path}: Could not apply fix. Error: {e}")
 | |
|         return False
 | |
| 
 | |
| def validate_doc_lifecycle(fix_missing=False):
 | |
|     """
 | |
|     Scans and validates markdown files, with an option to fix files missing front matter.
 | |
|     """
 | |
|     project_root = Path(__file__).parent.parent
 | |
|     markdown_files = get_project_files(project_root)
 | |
|     
 | |
|     print(f"Scanning {len(markdown_files)} Markdown files (vendor directories excluded)...")
 | |
|     
 | |
|     all_docs = []
 | |
|     errors = []
 | |
|     warnings = []
 | |
|     fixed_count = 0
 | |
| 
 | |
|     for md_file in markdown_files:
 | |
|         try:
 | |
|             post = frontmatter.load(md_file)
 | |
|             metadata = post.metadata
 | |
|             
 | |
|             if not metadata:
 | |
|                 if fix_missing:
 | |
|                     if add_default_frontmatter(md_file):
 | |
|                         fixed_count += 1
 | |
|                 else:
 | |
|                     warnings.append(f"[SKIPPED] {md_file}: No YAML front matter found. Use --fix to add a template.")
 | |
|                 continue
 | |
| 
 | |
|             doc_info = {'path': md_file}
 | |
|             
 | |
|             required_fields = ['title', 'status', 'owner', 'created', 'review_by']
 | |
|             missing_fields = [field for field in required_fields if field not in metadata]
 | |
|             if missing_fields:
 | |
|                 errors.append(f"[ERROR] {md_file}: Missing required fields: {', '.join(missing_fields)}")
 | |
|                 continue
 | |
| 
 | |
|             doc_info.update(metadata)
 | |
| 
 | |
|             allowed_statuses = ['spring', 'summer', 'autumn', 'winter']
 | |
|             if metadata.get('status') not in allowed_statuses:
 | |
|                 errors.append(f"[ERROR] {md_file}: Invalid status '{metadata.get('status')}'. Must be one of {allowed_statuses}")
 | |
| 
 | |
|             review_by_date = metadata.get('review_by')
 | |
|             if review_by_date:
 | |
|                 if isinstance(review_by_date, str):
 | |
|                     review_by_date = datetime.datetime.strptime(review_by_date, '%Y-%m-%d').date()
 | |
|                 
 | |
|                 if review_by_date < datetime.date.today():
 | |
|                     warnings.append(f"[WARNING] {md_file}: Review date ({review_by_date}) has passed.")
 | |
| 
 | |
|             all_docs.append(doc_info)
 | |
| 
 | |
|         except Exception as e:
 | |
|             errors.append(f"[CRITICAL] {md_file}: Could not parse file. Error: {e}")
 | |
| 
 | |
|     print("\n--- Validation Report ---")
 | |
|     
 | |
|     if not errors and not warnings:
 | |
|         print("✅ All documents with front matter are valid and up-to-date.")
 | |
|     
 | |
|     if warnings:
 | |
|         print("\n⚠️ Warnings:")
 | |
|         for warning in warnings:
 | |
|             print(warning)
 | |
|             
 | |
|     if errors:
 | |
|         print("\n❌ Errors:")
 | |
|         for error in errors:
 | |
|             print(error)
 | |
| 
 | |
|     print(f"\n--- Summary ---")
 | |
|     print(f"Total files scanned: {len(markdown_files)}")
 | |
|     print(f"Files with valid front matter: {len(all_docs)}")
 | |
|     if fix_missing:
 | |
|         print(f"Files automatically fixed: {fixed_count}")
 | |
|     print(f"Warnings: {len(warnings)}")
 | |
|     print(f"Errors: {len(errors)}")
 | |
|     
 | |
|     return len(errors) == 0
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     parser = argparse.ArgumentParser(description="Validate and manage the lifecycle of Markdown documents.")
 | |
|     parser.add_argument(
 | |
|         '--fix',
 | |
|         action='store_true',
 | |
|         help="Automatically add a default front matter template to any document that is missing one."
 | |
|     )
 | |
|     args = parser.parse_args()
 | |
| 
 | |
|     is_valid = validate_doc_lifecycle(fix_missing=args.fix)
 | |
|     if not is_valid:
 | |
|         exit(1) |