git.stella-ops.org/.gitea/scripts/release/generate_changelog.py

#!/usr/bin/env python3
"""
generate_changelog.py - AI-assisted changelog generation for suite releases

Sprint: CI/CD Enhancement - Suite Release Pipeline
Generates changelogs from git commit history with optional AI enhancement.

Usage:
    python generate_changelog.py <version> [options]
    python generate_changelog.py 2026.04 --codename Nova
    python generate_changelog.py 2026.04 --from-tag suite-2025.10 --ai

Arguments:
    version     Suite version (YYYY.MM format)

Options:
    --codename NAME     Release codename
    --from-tag TAG      Previous release tag (defaults to latest suite-* tag)
    --to-ref REF        End reference (defaults to HEAD)
    --ai                Use AI to enhance changelog descriptions
    --output FILE       Output file (defaults to stdout)
    --format FMT        Output format: markdown, json (default: markdown)
"""

import argparse
import json
import os
import re
import subprocess
import sys
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from collections import defaultdict

# Repository paths
SCRIPT_DIR = Path(__file__).parent
REPO_ROOT = SCRIPT_DIR.parent.parent.parent

# Module patterns for categorization
MODULE_PATTERNS = {
    "Authority": r"src/Authority/",
    "Attestor": r"src/Attestor/",
    "Concelier": r"src/Concelier/",
    "Scanner": r"src/Scanner/",
    "Policy": r"src/Policy/",
    "Signer": r"src/Signer/",
    "Excititor": r"src/Excititor/",
    "Gateway": r"src/Gateway/",
    "Scheduler": r"src/Scheduler/",
    "CLI": r"src/Cli/",
    "Orchestrator": r"src/Orchestrator/",
    "Notify": r"src/Notify/",
    "Infrastructure": r"(devops/|\.gitea/|docs/)",
    "Core": r"src/__Libraries/",
}

# Commit type patterns (conventional commits)
COMMIT_TYPE_PATTERNS = {
    "breaking": r"^(feat|fix|refactor)(\(.+\))?!:|BREAKING CHANGE:",
    "security": r"^(security|fix)(\(.+\))?:|CVE-|vulnerability|exploit",
    "feature": r"^feat(\(.+\))?:",
    "fix": r"^fix(\(.+\))?:",
    "performance": r"^perf(\(.+\))?:|performance|optimize",
    "refactor": r"^refactor(\(.+\))?:",
    "docs": r"^docs(\(.+\))?:",
    "test": r"^test(\(.+\))?:",
    "chore": r"^chore(\(.+\))?:|^ci(\(.+\))?:|^build(\(.+\))?:",
}


@dataclass
class Commit:
    sha: str
    short_sha: str
    message: str
    body: str
    author: str
    date: str
    files: List[str] = field(default_factory=list)
    type: str = "other"
    module: str = "Other"
    scope: str = ""


@dataclass
class ChangelogEntry:
    description: str
    commits: List[Commit]
    module: str
    type: str


def run_git(args: List[str], cwd: Path = REPO_ROOT) -> str:
    """Run git command and return output."""
    result = subprocess.run(
        ["git"] + args,
        capture_output=True,
        text=True,
        cwd=cwd,
    )
    if result.returncode != 0:
        raise RuntimeError(f"Git command failed: {result.stderr}")
    return result.stdout.strip()


def get_latest_suite_tag() -> Optional[str]:
    """Get the most recent suite-* tag."""
    try:
        output = run_git(["tag", "-l", "suite-*", "--sort=-creatordate"])
        tags = output.split("\n")
        return tags[0] if tags and tags[0] else None
    except RuntimeError:
        return None


def get_commits_between(from_ref: str, to_ref: str = "HEAD") -> List[Commit]:
    """Get commits between two refs."""
    # Format: sha|short_sha|subject|body|author|date
    format_str = "%H|%h|%s|%b|%an|%aI"
    separator = "---COMMIT_SEPARATOR---"

    try:
        output = run_git([
            "log",
            f"{from_ref}..{to_ref}",
            f"--format={format_str}{separator}",
            "--name-only",
        ])
    except RuntimeError:
        # If from_ref doesn't exist, get all commits up to to_ref
        output = run_git([
            "log",
            to_ref,
            "-100",  # Limit to last 100 commits
            f"--format={format_str}{separator}",
            "--name-only",
        ])

    commits = []
    entries = output.split(separator)

    for entry in entries:
        entry = entry.strip()
        if not entry:
            continue

        lines = entry.split("\n")
        if not lines:
            continue

        # Parse commit info
        parts = lines[0].split("|")
        if len(parts) < 6:
            continue

        # Get changed files (remaining lines after commit info)
        files = [f.strip() for f in lines[1:] if f.strip()]

        commit = Commit(
            sha=parts[0],
            short_sha=parts[1],
            message=parts[2],
            body=parts[3] if len(parts) > 3 else "",
            author=parts[4] if len(parts) > 4 else "",
            date=parts[5] if len(parts) > 5 else "",
            files=files,
        )

        # Categorize commit
        commit.type = categorize_commit_type(commit.message)
        commit.module = categorize_commit_module(commit.files, commit.message)
        commit.scope = extract_scope(commit.message)

        commits.append(commit)

    return commits


def categorize_commit_type(message: str) -> str:
    """Categorize commit by type based on message."""
    message_lower = message.lower()

    for commit_type, pattern in COMMIT_TYPE_PATTERNS.items():
        if re.search(pattern, message, re.IGNORECASE):
            return commit_type

    return "other"


def categorize_commit_module(files: List[str], message: str) -> str:
    """Categorize commit by module based on changed files."""
    module_counts: Dict[str, int] = defaultdict(int)

    for file in files:
        for module, pattern in MODULE_PATTERNS.items():
            if re.search(pattern, file):
                module_counts[module] += 1
                break

    if module_counts:
        return max(module_counts, key=module_counts.get)

    # Try to extract from message scope
    scope_match = re.match(r"^\w+\((\w+)\):", message)
    if scope_match:
        scope = scope_match.group(1).lower()
        for module in MODULE_PATTERNS:
            if module.lower() == scope:
                return module

    return "Other"


def extract_scope(message: str) -> str:
    """Extract scope from conventional commit message."""
    match = re.match(r"^\w+\(([^)]+)\):", message)
    return match.group(1) if match else ""


def group_commits_by_type_and_module(
    commits: List[Commit],
) -> Dict[str, Dict[str, List[Commit]]]:
    """Group commits by type and module."""
    grouped: Dict[str, Dict[str, List[Commit]]] = defaultdict(lambda: defaultdict(list))

    for commit in commits:
        grouped[commit.type][commit.module].append(commit)

    return grouped


def generate_markdown_changelog(
    version: str,
    codename: str,
    commits: List[Commit],
    ai_enhanced: bool = False,
) -> str:
    """Generate markdown changelog."""
    grouped = group_commits_by_type_and_module(commits)

    lines = [
        f"# Changelog - StellaOps {version} \"{codename}\"",
        "",
        f"Release Date: {datetime.now(timezone.utc).strftime('%Y-%m-%d')}",
        "",
    ]

    # Order of sections
    section_order = [
        ("breaking", "Breaking Changes"),
        ("security", "Security"),
        ("feature", "Features"),
        ("fix", "Bug Fixes"),
        ("performance", "Performance"),
        ("refactor", "Refactoring"),
        ("docs", "Documentation"),
        ("other", "Other Changes"),
    ]

    for type_key, section_title in section_order:
        if type_key not in grouped:
            continue

        modules = grouped[type_key]
        if not modules:
            continue

        lines.append(f"## {section_title}")
        lines.append("")

        # Sort modules alphabetically
        for module in sorted(modules.keys()):
            commits_in_module = modules[module]
            if not commits_in_module:
                continue

            lines.append(f"### {module}")
            lines.append("")

            for commit in commits_in_module:
                # Clean up message
                msg = commit.message
                # Remove conventional commit prefix for display
                msg = re.sub(r"^\w+(\([^)]+\))?[!]?:\s*", "", msg)

                if ai_enhanced:
                    # Placeholder for AI-enhanced description
                    lines.append(f"- {msg} ([{commit.short_sha}])")
                else:
                    lines.append(f"- {msg} (`{commit.short_sha}`)")

            lines.append("")

    # Add statistics
    lines.extend([
        "---",
        "",
        "## Statistics",
        "",
        f"- **Total Commits:** {len(commits)}",
        f"- **Contributors:** {len(set(c.author for c in commits))}",
        f"- **Files Changed:** {len(set(f for c in commits for f in c.files))}",
        "",
    ])

    return "\n".join(lines)


def generate_json_changelog(
    version: str,
    codename: str,
    commits: List[Commit],
) -> str:
    """Generate JSON changelog."""
    grouped = group_commits_by_type_and_module(commits)

    changelog = {
        "version": version,
        "codename": codename,
        "date": datetime.now(timezone.utc).isoformat(),
        "statistics": {
            "totalCommits": len(commits),
            "contributors": len(set(c.author for c in commits)),
            "filesChanged": len(set(f for c in commits for f in c.files)),
        },
        "sections": {},
    }

    for type_key, modules in grouped.items():
        if not modules:
            continue

        changelog["sections"][type_key] = {}

        for module, module_commits in modules.items():
            changelog["sections"][type_key][module] = [
                {
                    "sha": c.short_sha,
                    "message": c.message,
                    "author": c.author,
                    "date": c.date,
                }
                for c in module_commits
            ]

    return json.dumps(changelog, indent=2, ensure_ascii=False)


def enhance_with_ai(changelog: str, api_key: Optional[str] = None) -> str:
    """Enhance changelog using AI (if available)."""
    if not api_key:
        api_key = os.environ.get("AI_API_KEY")

    if not api_key:
        print("Warning: No AI API key provided, skipping AI enhancement", file=sys.stderr)
        return changelog

    # This is a placeholder for AI integration
    # In production, this would call Claude API or similar
    prompt = f"""
You are a technical writer creating release notes for a security platform.
Improve the following changelog by:
1. Making descriptions more user-friendly
2. Highlighting important changes
3. Adding context where helpful
4. Keeping it concise

Original changelog:
{changelog}

Generate improved changelog in the same markdown format.
"""

    # For now, return the original changelog
    # TODO: Implement actual AI API call
    print("Note: AI enhancement is a placeholder, returning original changelog", file=sys.stderr)
    return changelog


def main():
    parser = argparse.ArgumentParser(
        description="Generate changelog from git history",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )

    parser.add_argument("version", help="Suite version (YYYY.MM format)")
    parser.add_argument("--codename", default="", help="Release codename")
    parser.add_argument("--from-tag", help="Previous release tag")
    parser.add_argument("--to-ref", default="HEAD", help="End reference")
    parser.add_argument("--ai", action="store_true", help="Use AI enhancement")
    parser.add_argument("--output", "-o", help="Output file")
    parser.add_argument(
        "--format",
        choices=["markdown", "json"],
        default="markdown",
        help="Output format",
    )

    args = parser.parse_args()

    # Validate version format
    if not re.match(r"^\d{4}\.(04|10)$", args.version):
        print(f"Warning: Non-standard version format: {args.version}", file=sys.stderr)

    # Determine from tag
    from_tag = args.from_tag
    if not from_tag:
        from_tag = get_latest_suite_tag()
        if from_tag:
            print(f"Using previous tag: {from_tag}", file=sys.stderr)
        else:
            print("No previous suite tag found, using last 100 commits", file=sys.stderr)
            from_tag = "HEAD~100"

    # Get commits
    print(f"Collecting commits from {from_tag} to {args.to_ref}...", file=sys.stderr)
    commits = get_commits_between(from_tag, args.to_ref)
    print(f"Found {len(commits)} commits", file=sys.stderr)

    if not commits:
        print("No commits found in range", file=sys.stderr)
        sys.exit(0)

    # Generate changelog
    codename = args.codename or "TBD"

    if args.format == "json":
        output = generate_json_changelog(args.version, codename, commits)
    else:
        output = generate_markdown_changelog(
            args.version, codename, commits, ai_enhanced=args.ai
        )

        if args.ai:
            output = enhance_with_ai(output)

    # Output
    if args.output:
        Path(args.output).write_text(output, encoding="utf-8")
        print(f"Changelog written to: {args.output}", file=sys.stderr)
    else:
        print(output)


if __name__ == "__main__":
    main()