git.stella-ops.org/tools/slntools/nuget_normalizer.py

#!/usr/bin/env python3
"""
StellaOps NuGet Version Normalizer.

Scans all .csproj files and normalizes NuGet package versions to the latest stable.

IMPORTANT: Packages centrally managed in Directory.Build.props (via PackageReference Update)
are automatically excluded from normalization. These packages are reported separately.

Usage:
    python nuget_normalizer.py [OPTIONS]

Options:
    --src-root PATH       Root of src/ directory (default: ./src)
    --repo-root PATH      Root of repository (default: parent of src-root)
    --dry-run             Report without making changes
    --report PATH         Write JSON report to file
    --exclude PACKAGE     Exclude package from normalization (repeatable)
    --check               CI mode: exit 1 if normalization needed
    -v, --verbose         Verbose output
"""

import argparse
import json
import logging
import re
import sys
from datetime import datetime, timezone
from pathlib import Path

from lib.csproj_parser import find_all_csproj
from lib.models import NormalizationChange, NormalizationResult, PackageUsage
from lib.version_utils import is_stable, parse_version, select_latest_stable

logger = logging.getLogger(__name__)


def find_directory_build_props(repo_root: Path) -> list[Path]:
    """
    Find all Directory.Build.props files in the repository.

    Args:
        repo_root: Root of the repository

    Returns:
        List of paths to Directory.Build.props files
    """
    props_files = []
    for props_file in repo_root.rglob("Directory.Build.props"):
        # Skip common exclusion directories
        parts = props_file.parts
        if any(p in ("bin", "obj", "node_modules", ".git") for p in parts):
            continue
        props_files.append(props_file)
    return props_files


def scan_centrally_managed_packages(repo_root: Path) -> dict[str, tuple[str, Path]]:
    """
    Scan Directory.Build.props files for centrally managed package versions.

    These are packages defined with <PackageReference Update="..." Version="..."/>
    which override versions in individual csproj files.

    Args:
        repo_root: Root of the repository

    Returns:
        Dictionary mapping package name to (version, props_file_path)
    """
    centrally_managed: dict[str, tuple[str, Path]] = {}

    props_files = find_directory_build_props(repo_root)
    logger.info(f"Scanning {len(props_files)} Directory.Build.props files for centrally managed packages")

    # Pattern for PackageReference Update (central version management)
    # <PackageReference Update="PackageName" Version="1.2.3" />
    update_pattern = re.compile(
        r'<PackageReference\s+Update\s*=\s*"([^"]+)"[^>]*Version\s*=\s*"([^"]+)"',
        re.IGNORECASE,
    )

    # Alternative pattern when Version comes first
    update_pattern_alt = re.compile(
        r'<PackageReference\s+[^>]*Version\s*=\s*"([^"]+)"[^>]*Update\s*=\s*"([^"]+)"',
        re.IGNORECASE,
    )

    for props_file in props_files:
        try:
            content = props_file.read_text(encoding="utf-8")
        except Exception as e:
            logger.warning(f"Failed to read {props_file}: {e}")
            continue

        # Find PackageReference Update elements
        for match in update_pattern.finditer(content):
            package_name = match.group(1)
            version = match.group(2)
            # Store with the props file path for reporting
            if package_name not in centrally_managed:
                centrally_managed[package_name] = (version, props_file)
                logger.debug(f"Found centrally managed: {package_name} v{version} in {props_file}")

        for match in update_pattern_alt.finditer(content):
            version = match.group(1)
            package_name = match.group(2)
            if package_name not in centrally_managed:
                centrally_managed[package_name] = (version, props_file)
                logger.debug(f"Found centrally managed: {package_name} v{version} in {props_file}")

    logger.info(f"Found {len(centrally_managed)} centrally managed packages")
    return centrally_managed


def setup_logging(verbose: bool) -> None:
    """Configure logging based on verbosity."""
    level = logging.DEBUG if verbose else logging.INFO
    logging.basicConfig(
        level=level,
        format="%(levelname)s: %(message)s",
    )


def scan_all_packages(src_root: Path) -> dict[str, PackageUsage]:
    """
    Scan all .csproj files and collect package references.

    Args:
        src_root: Root of src/ directory

    Returns:
        Dictionary mapping package name to PackageUsage
    """
    packages: dict[str, PackageUsage] = {}
    csproj_files = find_all_csproj(src_root)

    logger.info(f"Scanning {len(csproj_files)} .csproj files for package references")

    # Regex for PackageReference
    # Matches: <PackageReference Include="PackageName" Version="1.2.3" />
    # Also handles multi-line and various attribute orderings
    package_ref_pattern = re.compile(
        r'<PackageReference\s+[^>]*Include\s*=\s*"([^"]+)"[^>]*Version\s*=\s*"([^"]+)"',
        re.IGNORECASE,
    )

    # Alternative pattern for when Version comes first
    package_ref_pattern_alt = re.compile(
        r'<PackageReference\s+[^>]*Version\s*=\s*"([^"]+)"[^>]*Include\s*=\s*"([^"]+)"',
        re.IGNORECASE,
    )

    for csproj_path in csproj_files:
        try:
            content = csproj_path.read_text(encoding="utf-8")
        except Exception as e:
            logger.warning(f"Failed to read {csproj_path}: {e}")
            continue

        # Find all PackageReference elements
        for match in package_ref_pattern.finditer(content):
            package_name = match.group(1)
            version = match.group(2)

            if package_name not in packages:
                packages[package_name] = PackageUsage(package_name=package_name)

            packages[package_name].usages[csproj_path] = version

        # Also try alternative pattern
        for match in package_ref_pattern_alt.finditer(content):
            version = match.group(1)
            package_name = match.group(2)

            if package_name not in packages:
                packages[package_name] = PackageUsage(package_name=package_name)

            packages[package_name].usages[csproj_path] = version

    logger.info(f"Found {len(packages)} unique packages")
    return packages


def calculate_normalizations(
    packages: dict[str, PackageUsage],
    exclude_packages: set[str],
    centrally_managed: dict[str, tuple[str, Path]] | None = None,
) -> tuple[list[NormalizationResult], list[tuple[str, str, Path]]]:
    """
    Calculate which packages need version normalization.

    Args:
        packages: Package usage data
        exclude_packages: Package names to exclude
        centrally_managed: Packages managed in Directory.Build.props (auto-excluded)

    Returns:
        Tuple of (normalization results, list of centrally managed packages that were skipped)
    """
    results: list[NormalizationResult] = []
    centrally_skipped: list[tuple[str, str, Path]] = []

    if centrally_managed is None:
        centrally_managed = {}

    for package_name, usage in sorted(packages.items()):
        # Skip centrally managed packages
        if package_name in centrally_managed:
            version, props_file = centrally_managed[package_name]
            centrally_skipped.append((package_name, version, props_file))
            logger.debug(f"Skipping centrally managed package: {package_name} (v{version} in {props_file})")
            continue

        if package_name in exclude_packages:
            logger.debug(f"Excluding package: {package_name}")
            continue

        versions = usage.get_all_versions()

        # Skip if only one version
        if len(versions) <= 1:
            continue

        # Check if any versions are wildcards or unparseable
        parseable_versions = [v for v in versions if parse_version(v) is not None]

        if not parseable_versions:
            results.append(
                NormalizationResult(
                    package_name=package_name,
                    target_version="",
                    skipped_reason="No parseable versions found",
                )
            )
            continue

        # Select latest stable version
        target_version = select_latest_stable(parseable_versions)

        if target_version is None:
            # Try to find any version (including prereleases)
            parsed = [
                (parse_version(v), v)
                for v in parseable_versions
                if parse_version(v) is not None
            ]
            if parsed:
                parsed.sort(key=lambda x: x[0], reverse=True)
                target_version = parsed[0][1]
                results.append(
                    NormalizationResult(
                        package_name=package_name,
                        target_version=target_version,
                        skipped_reason="Only prerelease versions available",
                    )
                )
                continue
            else:
                results.append(
                    NormalizationResult(
                        package_name=package_name,
                        target_version="",
                        skipped_reason="No stable versions found",
                    )
                )
                continue

        # Create normalization result with changes
        result = NormalizationResult(
            package_name=package_name,
            target_version=target_version,
        )

        for csproj_path, current_version in usage.usages.items():
            if current_version != target_version:
                result.changes.append(
                    NormalizationChange(
                        csproj_path=csproj_path,
                        old_version=current_version,
                        new_version=target_version,
                    )
                )

        if result.changes:
            results.append(result)

    return results, centrally_skipped


def apply_normalizations(
    normalizations: list[NormalizationResult],
    dry_run: bool = False,
) -> int:
    """
    Apply version normalizations to csproj files.

    Args:
        normalizations: List of normalization results
        dry_run: If True, don't actually modify files

    Returns:
        Number of files modified
    """
    files_modified: set[Path] = set()

    for result in normalizations:
        if result.skipped_reason:
            continue

        for change in result.changes:
            csproj_path = change.csproj_path

            if dry_run:
                logger.info(
                    f"Would update {result.package_name} in {csproj_path.name}: "
                    f"{change.old_version} -> {change.new_version}"
                )
                files_modified.add(csproj_path)
                continue

            try:
                content = csproj_path.read_text(encoding="utf-8")

                # Replace the specific package version
                # Pattern matches the PackageReference for this specific package
                pattern = re.compile(
                    rf'(<PackageReference\s+[^>]*Include\s*=\s*"{re.escape(result.package_name)}"'
                    rf'[^>]*Version\s*=\s*"){re.escape(change.old_version)}(")',
                    re.IGNORECASE,
                )

                new_content, count = pattern.subn(
                    rf"\g<1>{change.new_version}\g<2>",
                    content,
                )

                if count > 0:
                    csproj_path.write_text(new_content, encoding="utf-8")
                    files_modified.add(csproj_path)
                    logger.info(
                        f"Updated {result.package_name} in {csproj_path.name}: "
                        f"{change.old_version} -> {change.new_version}"
                    )
                else:
                    # Try alternative pattern
                    pattern_alt = re.compile(
                        rf'(<PackageReference\s+[^>]*Version\s*=\s*"){re.escape(change.old_version)}"'
                        rf'([^>]*Include\s*=\s*"{re.escape(result.package_name)}")',
                        re.IGNORECASE,
                    )

                    new_content, count = pattern_alt.subn(
                        rf'\g<1>{change.new_version}"\g<2>',
                        content,
                    )

                    if count > 0:
                        csproj_path.write_text(new_content, encoding="utf-8")
                        files_modified.add(csproj_path)
                        logger.info(
                            f"Updated {result.package_name} in {csproj_path.name}: "
                            f"{change.old_version} -> {change.new_version}"
                        )
                    else:
                        logger.warning(
                            f"Could not find pattern to update {result.package_name} "
                            f"in {csproj_path}"
                        )

            except Exception as e:
                logger.error(f"Failed to update {csproj_path}: {e}")

    return len(files_modified)


def generate_report(
    packages: dict[str, PackageUsage],
    normalizations: list[NormalizationResult],
    centrally_skipped: list[tuple[str, str, Path]] | None = None,
) -> dict:
    """
    Generate a JSON report of the normalization.

    Args:
        packages: Package usage data
        normalizations: Normalization results
        centrally_skipped: Packages skipped due to central management

    Returns:
        Report dictionary
    """
    if centrally_skipped is None:
        centrally_skipped = []

    # Count changes
    packages_normalized = sum(
        1 for n in normalizations if n.changes and not n.skipped_reason
    )
    files_modified = len(
        set(
            change.csproj_path
            for n in normalizations
            for change in n.changes
            if not n.skipped_reason
        )
    )

    report = {
        "timestamp": datetime.now(timezone.utc).isoformat(),
        "summary": {
            "packages_scanned": len(packages),
            "packages_with_inconsistencies": len(normalizations),
            "packages_normalized": packages_normalized,
            "files_modified": files_modified,
            "packages_centrally_managed": len(centrally_skipped),
        },
        "normalizations": [],
        "skipped": [],
        "centrally_managed": [],
    }

    for result in normalizations:
        if result.skipped_reason:
            report["skipped"].append(
                {
                    "package": result.package_name,
                    "reason": result.skipped_reason,
                    "versions": packages[result.package_name].get_all_versions()
                    if result.package_name in packages
                    else [],
                }
            )
        elif result.changes:
            report["normalizations"].append(
                {
                    "package": result.package_name,
                    "target_version": result.target_version,
                    "changes": [
                        {
                            "file": str(change.csproj_path),
                            "old": change.old_version,
                            "new": change.new_version,
                        }
                        for change in result.changes
                    ],
                }
            )

    # Add centrally managed packages
    for package_name, version, props_file in centrally_skipped:
        report["centrally_managed"].append(
            {
                "package": package_name,
                "version": version,
                "managed_in": str(props_file),
            }
        )

    return report


def print_summary(
    packages: dict[str, PackageUsage],
    normalizations: list[NormalizationResult],
    centrally_skipped: list[tuple[str, str, Path]],
    dry_run: bool,
) -> None:
    """Print a summary of the normalization."""
    print("\n" + "=" * 60)
    print("NuGet Version Normalization Summary")
    print("=" * 60)

    changes_needed = [n for n in normalizations if n.changes and not n.skipped_reason]
    skipped = [n for n in normalizations if n.skipped_reason]

    print(f"\nPackages scanned: {len(packages)}")
    print(f"Packages with version inconsistencies: {len(normalizations)}")
    print(f"Packages to normalize: {len(changes_needed)}")
    print(f"Packages skipped (other reasons): {len(skipped)}")
    print(f"Packages centrally managed (auto-skipped): {len(centrally_skipped)}")

    if centrally_skipped:
        print("\nCentrally managed packages (in Directory.Build.props):")
        for package_name, version, props_file in sorted(centrally_skipped, key=lambda x: x[0]):
            rel_path = props_file.name if len(str(props_file)) > 50 else props_file
            print(f"  {package_name}: v{version} ({rel_path})")

    if changes_needed:
        print("\nPackages to normalize:")
        for result in sorted(changes_needed, key=lambda x: x.package_name):
            old_versions = set(c.old_version for c in result.changes)
            print(
                f"  {result.package_name}: {', '.join(sorted(old_versions))} -> {result.target_version}"
            )

    if skipped and logger.isEnabledFor(logging.DEBUG):
        print("\nSkipped packages:")
        for result in sorted(skipped, key=lambda x: x.package_name):
            print(f"  {result.package_name}: {result.skipped_reason}")

    if dry_run:
        print("\n[DRY RUN - No files were modified]")


def main() -> int:
    """Main entry point."""
    parser = argparse.ArgumentParser(
        description="Normalize NuGet package versions across all csproj files",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=__doc__,
    )

    parser.add_argument(
        "--src-root",
        type=Path,
        default=Path("src"),
        help="Root of src/ directory (default: ./src)",
    )
    parser.add_argument(
        "--repo-root",
        type=Path,
        default=None,
        help="Root of repository for Directory.Build.props scanning (default: parent of src-root)",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Report without making changes",
    )
    parser.add_argument(
        "--report",
        type=Path,
        help="Write JSON report to file",
    )
    parser.add_argument(
        "--exclude",
        action="append",
        dest="exclude_packages",
        default=[],
        help="Exclude package from normalization (repeatable)",
    )
    parser.add_argument(
        "--check",
        action="store_true",
        help="CI mode: exit 1 if normalization needed",
    )
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        help="Verbose output",
    )

    args = parser.parse_args()
    setup_logging(args.verbose)

    # Resolve src root
    src_root = args.src_root.resolve()
    if not src_root.exists():
        logger.error(f"Source root does not exist: {src_root}")
        return 1

    # Resolve repo root (for Directory.Build.props scanning)
    repo_root = args.repo_root.resolve() if args.repo_root else src_root.parent
    if not repo_root.exists():
        logger.error(f"Repository root does not exist: {repo_root}")
        return 1

    logger.info(f"Source root: {src_root}")
    logger.info(f"Repository root: {repo_root}")

    # Scan for centrally managed packages in Directory.Build.props
    centrally_managed = scan_centrally_managed_packages(repo_root)

    # Scan all packages
    packages = scan_all_packages(src_root)

    if not packages:
        logger.info("No packages found")
        return 0

    # Calculate normalizations (excluding centrally managed packages)
    exclude_set = set(args.exclude_packages)
    normalizations, centrally_skipped = calculate_normalizations(
        packages, exclude_set, centrally_managed
    )

    # Generate report
    report = generate_report(packages, normalizations, centrally_skipped)

    # Write report if requested
    if args.report:
        try:
            args.report.write_text(
                json.dumps(report, indent=2, default=str),
                encoding="utf-8",
            )
            logger.info(f"Report written to: {args.report}")
        except Exception as e:
            logger.error(f"Failed to write report: {e}")

    # Print summary
    print_summary(packages, normalizations, centrally_skipped, args.dry_run or args.check)

    # Check mode - just report if normalization is needed
    if args.check:
        changes_needed = [n for n in normalizations if n.changes and not n.skipped_reason]
        if changes_needed:
            logger.error("Version normalization needed")
            return 1
        logger.info("All package versions are consistent")
        return 0

    # Apply normalizations
    if not args.dry_run:
        files_modified = apply_normalizations(normalizations, dry_run=False)
        print(f"\nModified {files_modified} files")
    else:
        apply_normalizations(normalizations, dry_run=True)

    return 0


if __name__ == "__main__":
    sys.exit(main())