#!/usr/bin/env python3 """ generate_changelog.py - AI-assisted changelog generation for suite releases Sprint: CI/CD Enhancement - Suite Release Pipeline Generates changelogs from git commit history with optional AI enhancement. Usage: python generate_changelog.py [options] python generate_changelog.py 2026.04 --codename Nova python generate_changelog.py 2026.04 --from-tag suite-2025.10 --ai Arguments: version Suite version (YYYY.MM format) Options: --codename NAME Release codename --from-tag TAG Previous release tag (defaults to latest suite-* tag) --to-ref REF End reference (defaults to HEAD) --ai Use AI to enhance changelog descriptions --output FILE Output file (defaults to stdout) --format FMT Output format: markdown, json (default: markdown) """ import argparse import json import os import re import subprocess import sys from dataclasses import dataclass, field from datetime import datetime, timezone from pathlib import Path from typing import Dict, List, Optional, Tuple from collections import defaultdict # Repository paths SCRIPT_DIR = Path(__file__).parent REPO_ROOT = SCRIPT_DIR.parent.parent.parent # Module patterns for categorization MODULE_PATTERNS = { "Authority": r"src/Authority/", "Attestor": r"src/Attestor/", "Concelier": r"src/Concelier/", "Scanner": r"src/Scanner/", "Policy": r"src/Policy/", "Signer": r"src/Signer/", "Excititor": r"src/Excititor/", "Gateway": r"src/Gateway/", "Scheduler": r"src/Scheduler/", "CLI": r"src/Cli/", "Orchestrator": r"src/Orchestrator/", "Notify": r"src/Notify/", "Infrastructure": r"(devops/|\.gitea/|docs/)", "Core": r"src/__Libraries/", } # Commit type patterns (conventional commits) COMMIT_TYPE_PATTERNS = { "breaking": r"^(feat|fix|refactor)(\(.+\))?!:|BREAKING CHANGE:", "security": r"^(security|fix)(\(.+\))?:|CVE-|vulnerability|exploit", "feature": r"^feat(\(.+\))?:", "fix": r"^fix(\(.+\))?:", "performance": r"^perf(\(.+\))?:|performance|optimize", "refactor": r"^refactor(\(.+\))?:", "docs": r"^docs(\(.+\))?:", "test": r"^test(\(.+\))?:", "chore": r"^chore(\(.+\))?:|^ci(\(.+\))?:|^build(\(.+\))?:", } @dataclass class Commit: sha: str short_sha: str message: str body: str author: str date: str files: List[str] = field(default_factory=list) type: str = "other" module: str = "Other" scope: str = "" @dataclass class ChangelogEntry: description: str commits: List[Commit] module: str type: str def run_git(args: List[str], cwd: Path = REPO_ROOT) -> str: """Run git command and return output.""" result = subprocess.run( ["git"] + args, capture_output=True, text=True, cwd=cwd, ) if result.returncode != 0: raise RuntimeError(f"Git command failed: {result.stderr}") return result.stdout.strip() def get_latest_suite_tag() -> Optional[str]: """Get the most recent suite-* tag.""" try: output = run_git(["tag", "-l", "suite-*", "--sort=-creatordate"]) tags = output.split("\n") return tags[0] if tags and tags[0] else None except RuntimeError: return None def get_commits_between(from_ref: str, to_ref: str = "HEAD") -> List[Commit]: """Get commits between two refs.""" # Format: sha|short_sha|subject|body|author|date format_str = "%H|%h|%s|%b|%an|%aI" separator = "---COMMIT_SEPARATOR---" try: output = run_git([ "log", f"{from_ref}..{to_ref}", f"--format={format_str}{separator}", "--name-only", ]) except RuntimeError: # If from_ref doesn't exist, get all commits up to to_ref output = run_git([ "log", to_ref, "-100", # Limit to last 100 commits f"--format={format_str}{separator}", "--name-only", ]) commits = [] entries = output.split(separator) for entry in entries: entry = entry.strip() if not entry: continue lines = entry.split("\n") if not lines: continue # Parse commit info parts = lines[0].split("|") if len(parts) < 6: continue # Get changed files (remaining lines after commit info) files = [f.strip() for f in lines[1:] if f.strip()] commit = Commit( sha=parts[0], short_sha=parts[1], message=parts[2], body=parts[3] if len(parts) > 3 else "", author=parts[4] if len(parts) > 4 else "", date=parts[5] if len(parts) > 5 else "", files=files, ) # Categorize commit commit.type = categorize_commit_type(commit.message) commit.module = categorize_commit_module(commit.files, commit.message) commit.scope = extract_scope(commit.message) commits.append(commit) return commits def categorize_commit_type(message: str) -> str: """Categorize commit by type based on message.""" message_lower = message.lower() for commit_type, pattern in COMMIT_TYPE_PATTERNS.items(): if re.search(pattern, message, re.IGNORECASE): return commit_type return "other" def categorize_commit_module(files: List[str], message: str) -> str: """Categorize commit by module based on changed files.""" module_counts: Dict[str, int] = defaultdict(int) for file in files: for module, pattern in MODULE_PATTERNS.items(): if re.search(pattern, file): module_counts[module] += 1 break if module_counts: return max(module_counts, key=module_counts.get) # Try to extract from message scope scope_match = re.match(r"^\w+\((\w+)\):", message) if scope_match: scope = scope_match.group(1).lower() for module in MODULE_PATTERNS: if module.lower() == scope: return module return "Other" def extract_scope(message: str) -> str: """Extract scope from conventional commit message.""" match = re.match(r"^\w+\(([^)]+)\):", message) return match.group(1) if match else "" def group_commits_by_type_and_module( commits: List[Commit], ) -> Dict[str, Dict[str, List[Commit]]]: """Group commits by type and module.""" grouped: Dict[str, Dict[str, List[Commit]]] = defaultdict(lambda: defaultdict(list)) for commit in commits: grouped[commit.type][commit.module].append(commit) return grouped def generate_markdown_changelog( version: str, codename: str, commits: List[Commit], ai_enhanced: bool = False, ) -> str: """Generate markdown changelog.""" grouped = group_commits_by_type_and_module(commits) lines = [ f"# Changelog - StellaOps {version} \"{codename}\"", "", f"Release Date: {datetime.now(timezone.utc).strftime('%Y-%m-%d')}", "", ] # Order of sections section_order = [ ("breaking", "Breaking Changes"), ("security", "Security"), ("feature", "Features"), ("fix", "Bug Fixes"), ("performance", "Performance"), ("refactor", "Refactoring"), ("docs", "Documentation"), ("other", "Other Changes"), ] for type_key, section_title in section_order: if type_key not in grouped: continue modules = grouped[type_key] if not modules: continue lines.append(f"## {section_title}") lines.append("") # Sort modules alphabetically for module in sorted(modules.keys()): commits_in_module = modules[module] if not commits_in_module: continue lines.append(f"### {module}") lines.append("") for commit in commits_in_module: # Clean up message msg = commit.message # Remove conventional commit prefix for display msg = re.sub(r"^\w+(\([^)]+\))?[!]?:\s*", "", msg) if ai_enhanced: # Placeholder for AI-enhanced description lines.append(f"- {msg} ([{commit.short_sha}])") else: lines.append(f"- {msg} (`{commit.short_sha}`)") lines.append("") # Add statistics lines.extend([ "---", "", "## Statistics", "", f"- **Total Commits:** {len(commits)}", f"- **Contributors:** {len(set(c.author for c in commits))}", f"- **Files Changed:** {len(set(f for c in commits for f in c.files))}", "", ]) return "\n".join(lines) def generate_json_changelog( version: str, codename: str, commits: List[Commit], ) -> str: """Generate JSON changelog.""" grouped = group_commits_by_type_and_module(commits) changelog = { "version": version, "codename": codename, "date": datetime.now(timezone.utc).isoformat(), "statistics": { "totalCommits": len(commits), "contributors": len(set(c.author for c in commits)), "filesChanged": len(set(f for c in commits for f in c.files)), }, "sections": {}, } for type_key, modules in grouped.items(): if not modules: continue changelog["sections"][type_key] = {} for module, module_commits in modules.items(): changelog["sections"][type_key][module] = [ { "sha": c.short_sha, "message": c.message, "author": c.author, "date": c.date, } for c in module_commits ] return json.dumps(changelog, indent=2, ensure_ascii=False) def enhance_with_ai(changelog: str, api_key: Optional[str] = None) -> str: """Enhance changelog using AI (if available).""" if not api_key: api_key = os.environ.get("AI_API_KEY") if not api_key: print("Warning: No AI API key provided, skipping AI enhancement", file=sys.stderr) return changelog # This is a placeholder for AI integration # In production, this would call Claude API or similar prompt = f""" You are a technical writer creating release notes for a security platform. Improve the following changelog by: 1. Making descriptions more user-friendly 2. Highlighting important changes 3. Adding context where helpful 4. Keeping it concise Original changelog: {changelog} Generate improved changelog in the same markdown format. """ # For now, return the original changelog # TODO: Implement actual AI API call print("Note: AI enhancement is a placeholder, returning original changelog", file=sys.stderr) return changelog def main(): parser = argparse.ArgumentParser( description="Generate changelog from git history", formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument("version", help="Suite version (YYYY.MM format)") parser.add_argument("--codename", default="", help="Release codename") parser.add_argument("--from-tag", help="Previous release tag") parser.add_argument("--to-ref", default="HEAD", help="End reference") parser.add_argument("--ai", action="store_true", help="Use AI enhancement") parser.add_argument("--output", "-o", help="Output file") parser.add_argument( "--format", choices=["markdown", "json"], default="markdown", help="Output format", ) args = parser.parse_args() # Validate version format if not re.match(r"^\d{4}\.(04|10)$", args.version): print(f"Warning: Non-standard version format: {args.version}", file=sys.stderr) # Determine from tag from_tag = args.from_tag if not from_tag: from_tag = get_latest_suite_tag() if from_tag: print(f"Using previous tag: {from_tag}", file=sys.stderr) else: print("No previous suite tag found, using last 100 commits", file=sys.stderr) from_tag = "HEAD~100" # Get commits print(f"Collecting commits from {from_tag} to {args.to_ref}...", file=sys.stderr) commits = get_commits_between(from_tag, args.to_ref) print(f"Found {len(commits)} commits", file=sys.stderr) if not commits: print("No commits found in range", file=sys.stderr) sys.exit(0) # Generate changelog codename = args.codename or "TBD" if args.format == "json": output = generate_json_changelog(args.version, codename, commits) else: output = generate_markdown_changelog( args.version, codename, commits, ai_enhanced=args.ai ) if args.ai: output = enhance_with_ai(output) # Output if args.output: Path(args.output).write_text(output, encoding="utf-8") print(f"Changelog written to: {args.output}", file=sys.stderr) else: print(output) if __name__ == "__main__": main()