Files
git.stella-ops.org/.gitea/scripts/release/generate_changelog.py
StellaOps Bot e6c47c8f50 save progress
2025-12-28 23:49:56 +02:00

449 lines
13 KiB
Python

#!/usr/bin/env python3
"""
generate_changelog.py - AI-assisted changelog generation for suite releases
Sprint: CI/CD Enhancement - Suite Release Pipeline
Generates changelogs from git commit history with optional AI enhancement.
Usage:
python generate_changelog.py <version> [options]
python generate_changelog.py 2026.04 --codename Nova
python generate_changelog.py 2026.04 --from-tag suite-2025.10 --ai
Arguments:
version Suite version (YYYY.MM format)
Options:
--codename NAME Release codename
--from-tag TAG Previous release tag (defaults to latest suite-* tag)
--to-ref REF End reference (defaults to HEAD)
--ai Use AI to enhance changelog descriptions
--output FILE Output file (defaults to stdout)
--format FMT Output format: markdown, json (default: markdown)
"""
import argparse
import json
import os
import re
import subprocess
import sys
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from collections import defaultdict
# Repository paths
SCRIPT_DIR = Path(__file__).parent
REPO_ROOT = SCRIPT_DIR.parent.parent.parent
# Module patterns for categorization
MODULE_PATTERNS = {
"Authority": r"src/Authority/",
"Attestor": r"src/Attestor/",
"Concelier": r"src/Concelier/",
"Scanner": r"src/Scanner/",
"Policy": r"src/Policy/",
"Signer": r"src/Signer/",
"Excititor": r"src/Excititor/",
"Gateway": r"src/Gateway/",
"Scheduler": r"src/Scheduler/",
"CLI": r"src/Cli/",
"Orchestrator": r"src/Orchestrator/",
"Notify": r"src/Notify/",
"Infrastructure": r"(devops/|\.gitea/|docs/)",
"Core": r"src/__Libraries/",
}
# Commit type patterns (conventional commits)
COMMIT_TYPE_PATTERNS = {
"breaking": r"^(feat|fix|refactor)(\(.+\))?!:|BREAKING CHANGE:",
"security": r"^(security|fix)(\(.+\))?:|CVE-|vulnerability|exploit",
"feature": r"^feat(\(.+\))?:",
"fix": r"^fix(\(.+\))?:",
"performance": r"^perf(\(.+\))?:|performance|optimize",
"refactor": r"^refactor(\(.+\))?:",
"docs": r"^docs(\(.+\))?:",
"test": r"^test(\(.+\))?:",
"chore": r"^chore(\(.+\))?:|^ci(\(.+\))?:|^build(\(.+\))?:",
}
@dataclass
class Commit:
sha: str
short_sha: str
message: str
body: str
author: str
date: str
files: List[str] = field(default_factory=list)
type: str = "other"
module: str = "Other"
scope: str = ""
@dataclass
class ChangelogEntry:
description: str
commits: List[Commit]
module: str
type: str
def run_git(args: List[str], cwd: Path = REPO_ROOT) -> str:
"""Run git command and return output."""
result = subprocess.run(
["git"] + args,
capture_output=True,
text=True,
cwd=cwd,
)
if result.returncode != 0:
raise RuntimeError(f"Git command failed: {result.stderr}")
return result.stdout.strip()
def get_latest_suite_tag() -> Optional[str]:
"""Get the most recent suite-* tag."""
try:
output = run_git(["tag", "-l", "suite-*", "--sort=-creatordate"])
tags = output.split("\n")
return tags[0] if tags and tags[0] else None
except RuntimeError:
return None
def get_commits_between(from_ref: str, to_ref: str = "HEAD") -> List[Commit]:
"""Get commits between two refs."""
# Format: sha|short_sha|subject|body|author|date
format_str = "%H|%h|%s|%b|%an|%aI"
separator = "---COMMIT_SEPARATOR---"
try:
output = run_git([
"log",
f"{from_ref}..{to_ref}",
f"--format={format_str}{separator}",
"--name-only",
])
except RuntimeError:
# If from_ref doesn't exist, get all commits up to to_ref
output = run_git([
"log",
to_ref,
"-100", # Limit to last 100 commits
f"--format={format_str}{separator}",
"--name-only",
])
commits = []
entries = output.split(separator)
for entry in entries:
entry = entry.strip()
if not entry:
continue
lines = entry.split("\n")
if not lines:
continue
# Parse commit info
parts = lines[0].split("|")
if len(parts) < 6:
continue
# Get changed files (remaining lines after commit info)
files = [f.strip() for f in lines[1:] if f.strip()]
commit = Commit(
sha=parts[0],
short_sha=parts[1],
message=parts[2],
body=parts[3] if len(parts) > 3 else "",
author=parts[4] if len(parts) > 4 else "",
date=parts[5] if len(parts) > 5 else "",
files=files,
)
# Categorize commit
commit.type = categorize_commit_type(commit.message)
commit.module = categorize_commit_module(commit.files, commit.message)
commit.scope = extract_scope(commit.message)
commits.append(commit)
return commits
def categorize_commit_type(message: str) -> str:
"""Categorize commit by type based on message."""
message_lower = message.lower()
for commit_type, pattern in COMMIT_TYPE_PATTERNS.items():
if re.search(pattern, message, re.IGNORECASE):
return commit_type
return "other"
def categorize_commit_module(files: List[str], message: str) -> str:
"""Categorize commit by module based on changed files."""
module_counts: Dict[str, int] = defaultdict(int)
for file in files:
for module, pattern in MODULE_PATTERNS.items():
if re.search(pattern, file):
module_counts[module] += 1
break
if module_counts:
return max(module_counts, key=module_counts.get)
# Try to extract from message scope
scope_match = re.match(r"^\w+\((\w+)\):", message)
if scope_match:
scope = scope_match.group(1).lower()
for module in MODULE_PATTERNS:
if module.lower() == scope:
return module
return "Other"
def extract_scope(message: str) -> str:
"""Extract scope from conventional commit message."""
match = re.match(r"^\w+\(([^)]+)\):", message)
return match.group(1) if match else ""
def group_commits_by_type_and_module(
commits: List[Commit],
) -> Dict[str, Dict[str, List[Commit]]]:
"""Group commits by type and module."""
grouped: Dict[str, Dict[str, List[Commit]]] = defaultdict(lambda: defaultdict(list))
for commit in commits:
grouped[commit.type][commit.module].append(commit)
return grouped
def generate_markdown_changelog(
version: str,
codename: str,
commits: List[Commit],
ai_enhanced: bool = False,
) -> str:
"""Generate markdown changelog."""
grouped = group_commits_by_type_and_module(commits)
lines = [
f"# Changelog - StellaOps {version} \"{codename}\"",
"",
f"Release Date: {datetime.now(timezone.utc).strftime('%Y-%m-%d')}",
"",
]
# Order of sections
section_order = [
("breaking", "Breaking Changes"),
("security", "Security"),
("feature", "Features"),
("fix", "Bug Fixes"),
("performance", "Performance"),
("refactor", "Refactoring"),
("docs", "Documentation"),
("other", "Other Changes"),
]
for type_key, section_title in section_order:
if type_key not in grouped:
continue
modules = grouped[type_key]
if not modules:
continue
lines.append(f"## {section_title}")
lines.append("")
# Sort modules alphabetically
for module in sorted(modules.keys()):
commits_in_module = modules[module]
if not commits_in_module:
continue
lines.append(f"### {module}")
lines.append("")
for commit in commits_in_module:
# Clean up message
msg = commit.message
# Remove conventional commit prefix for display
msg = re.sub(r"^\w+(\([^)]+\))?[!]?:\s*", "", msg)
if ai_enhanced:
# Placeholder for AI-enhanced description
lines.append(f"- {msg} ([{commit.short_sha}])")
else:
lines.append(f"- {msg} (`{commit.short_sha}`)")
lines.append("")
# Add statistics
lines.extend([
"---",
"",
"## Statistics",
"",
f"- **Total Commits:** {len(commits)}",
f"- **Contributors:** {len(set(c.author for c in commits))}",
f"- **Files Changed:** {len(set(f for c in commits for f in c.files))}",
"",
])
return "\n".join(lines)
def generate_json_changelog(
version: str,
codename: str,
commits: List[Commit],
) -> str:
"""Generate JSON changelog."""
grouped = group_commits_by_type_and_module(commits)
changelog = {
"version": version,
"codename": codename,
"date": datetime.now(timezone.utc).isoformat(),
"statistics": {
"totalCommits": len(commits),
"contributors": len(set(c.author for c in commits)),
"filesChanged": len(set(f for c in commits for f in c.files)),
},
"sections": {},
}
for type_key, modules in grouped.items():
if not modules:
continue
changelog["sections"][type_key] = {}
for module, module_commits in modules.items():
changelog["sections"][type_key][module] = [
{
"sha": c.short_sha,
"message": c.message,
"author": c.author,
"date": c.date,
}
for c in module_commits
]
return json.dumps(changelog, indent=2, ensure_ascii=False)
def enhance_with_ai(changelog: str, api_key: Optional[str] = None) -> str:
"""Enhance changelog using AI (if available)."""
if not api_key:
api_key = os.environ.get("AI_API_KEY")
if not api_key:
print("Warning: No AI API key provided, skipping AI enhancement", file=sys.stderr)
return changelog
# This is a placeholder for AI integration
# In production, this would call Claude API or similar
prompt = f"""
You are a technical writer creating release notes for a security platform.
Improve the following changelog by:
1. Making descriptions more user-friendly
2. Highlighting important changes
3. Adding context where helpful
4. Keeping it concise
Original changelog:
{changelog}
Generate improved changelog in the same markdown format.
"""
# For now, return the original changelog
# TODO: Implement actual AI API call
print("Note: AI enhancement is a placeholder, returning original changelog", file=sys.stderr)
return changelog
def main():
parser = argparse.ArgumentParser(
description="Generate changelog from git history",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument("version", help="Suite version (YYYY.MM format)")
parser.add_argument("--codename", default="", help="Release codename")
parser.add_argument("--from-tag", help="Previous release tag")
parser.add_argument("--to-ref", default="HEAD", help="End reference")
parser.add_argument("--ai", action="store_true", help="Use AI enhancement")
parser.add_argument("--output", "-o", help="Output file")
parser.add_argument(
"--format",
choices=["markdown", "json"],
default="markdown",
help="Output format",
)
args = parser.parse_args()
# Validate version format
if not re.match(r"^\d{4}\.(04|10)$", args.version):
print(f"Warning: Non-standard version format: {args.version}", file=sys.stderr)
# Determine from tag
from_tag = args.from_tag
if not from_tag:
from_tag = get_latest_suite_tag()
if from_tag:
print(f"Using previous tag: {from_tag}", file=sys.stderr)
else:
print("No previous suite tag found, using last 100 commits", file=sys.stderr)
from_tag = "HEAD~100"
# Get commits
print(f"Collecting commits from {from_tag} to {args.to_ref}...", file=sys.stderr)
commits = get_commits_between(from_tag, args.to_ref)
print(f"Found {len(commits)} commits", file=sys.stderr)
if not commits:
print("No commits found in range", file=sys.stderr)
sys.exit(0)
# Generate changelog
codename = args.codename or "TBD"
if args.format == "json":
output = generate_json_changelog(args.version, codename, commits)
else:
output = generate_markdown_changelog(
args.version, codename, commits, ai_enhanced=args.ai
)
if args.ai:
output = enhance_with_ai(output)
# Output
if args.output:
Path(args.output).write_text(output, encoding="utf-8")
print(f"Changelog written to: {args.output}", file=sys.stderr)
else:
print(output)
if __name__ == "__main__":
main()