Files
git.stella-ops.org/tools/slntools/nuget_normalizer.py
StellaOps Bot cec4265a40 save progress
2025-12-28 01:40:52 +02:00

627 lines
21 KiB
Python

#!/usr/bin/env python3
"""
StellaOps NuGet Version Normalizer.
Scans all .csproj files and normalizes NuGet package versions to the latest stable.
IMPORTANT: Packages centrally managed in Directory.Build.props (via PackageReference Update)
are automatically excluded from normalization. These packages are reported separately.
Usage:
python nuget_normalizer.py [OPTIONS]
Options:
--src-root PATH Root of src/ directory (default: ./src)
--repo-root PATH Root of repository (default: parent of src-root)
--dry-run Report without making changes
--report PATH Write JSON report to file
--exclude PACKAGE Exclude package from normalization (repeatable)
--check CI mode: exit 1 if normalization needed
-v, --verbose Verbose output
"""
import argparse
import json
import logging
import re
import sys
from datetime import datetime, timezone
from pathlib import Path
from lib.csproj_parser import find_all_csproj
from lib.models import NormalizationChange, NormalizationResult, PackageUsage
from lib.version_utils import is_stable, parse_version, select_latest_stable
logger = logging.getLogger(__name__)
def find_directory_build_props(repo_root: Path) -> list[Path]:
"""
Find all Directory.Build.props files in the repository.
Args:
repo_root: Root of the repository
Returns:
List of paths to Directory.Build.props files
"""
props_files = []
for props_file in repo_root.rglob("Directory.Build.props"):
# Skip common exclusion directories
parts = props_file.parts
if any(p in ("bin", "obj", "node_modules", ".git") for p in parts):
continue
props_files.append(props_file)
return props_files
def scan_centrally_managed_packages(repo_root: Path) -> dict[str, tuple[str, Path]]:
"""
Scan Directory.Build.props files for centrally managed package versions.
These are packages defined with <PackageReference Update="..." Version="..."/>
which override versions in individual csproj files.
Args:
repo_root: Root of the repository
Returns:
Dictionary mapping package name to (version, props_file_path)
"""
centrally_managed: dict[str, tuple[str, Path]] = {}
props_files = find_directory_build_props(repo_root)
logger.info(f"Scanning {len(props_files)} Directory.Build.props files for centrally managed packages")
# Pattern for PackageReference Update (central version management)
# <PackageReference Update="PackageName" Version="1.2.3" />
update_pattern = re.compile(
r'<PackageReference\s+Update\s*=\s*"([^"]+)"[^>]*Version\s*=\s*"([^"]+)"',
re.IGNORECASE,
)
# Alternative pattern when Version comes first
update_pattern_alt = re.compile(
r'<PackageReference\s+[^>]*Version\s*=\s*"([^"]+)"[^>]*Update\s*=\s*"([^"]+)"',
re.IGNORECASE,
)
for props_file in props_files:
try:
content = props_file.read_text(encoding="utf-8")
except Exception as e:
logger.warning(f"Failed to read {props_file}: {e}")
continue
# Find PackageReference Update elements
for match in update_pattern.finditer(content):
package_name = match.group(1)
version = match.group(2)
# Store with the props file path for reporting
if package_name not in centrally_managed:
centrally_managed[package_name] = (version, props_file)
logger.debug(f"Found centrally managed: {package_name} v{version} in {props_file}")
for match in update_pattern_alt.finditer(content):
version = match.group(1)
package_name = match.group(2)
if package_name not in centrally_managed:
centrally_managed[package_name] = (version, props_file)
logger.debug(f"Found centrally managed: {package_name} v{version} in {props_file}")
logger.info(f"Found {len(centrally_managed)} centrally managed packages")
return centrally_managed
def setup_logging(verbose: bool) -> None:
"""Configure logging based on verbosity."""
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(
level=level,
format="%(levelname)s: %(message)s",
)
def scan_all_packages(src_root: Path) -> dict[str, PackageUsage]:
"""
Scan all .csproj files and collect package references.
Args:
src_root: Root of src/ directory
Returns:
Dictionary mapping package name to PackageUsage
"""
packages: dict[str, PackageUsage] = {}
csproj_files = find_all_csproj(src_root)
logger.info(f"Scanning {len(csproj_files)} .csproj files for package references")
# Regex for PackageReference
# Matches: <PackageReference Include="PackageName" Version="1.2.3" />
# Also handles multi-line and various attribute orderings
package_ref_pattern = re.compile(
r'<PackageReference\s+[^>]*Include\s*=\s*"([^"]+)"[^>]*Version\s*=\s*"([^"]+)"',
re.IGNORECASE,
)
# Alternative pattern for when Version comes first
package_ref_pattern_alt = re.compile(
r'<PackageReference\s+[^>]*Version\s*=\s*"([^"]+)"[^>]*Include\s*=\s*"([^"]+)"',
re.IGNORECASE,
)
for csproj_path in csproj_files:
try:
content = csproj_path.read_text(encoding="utf-8")
except Exception as e:
logger.warning(f"Failed to read {csproj_path}: {e}")
continue
# Find all PackageReference elements
for match in package_ref_pattern.finditer(content):
package_name = match.group(1)
version = match.group(2)
if package_name not in packages:
packages[package_name] = PackageUsage(package_name=package_name)
packages[package_name].usages[csproj_path] = version
# Also try alternative pattern
for match in package_ref_pattern_alt.finditer(content):
version = match.group(1)
package_name = match.group(2)
if package_name not in packages:
packages[package_name] = PackageUsage(package_name=package_name)
packages[package_name].usages[csproj_path] = version
logger.info(f"Found {len(packages)} unique packages")
return packages
def calculate_normalizations(
packages: dict[str, PackageUsage],
exclude_packages: set[str],
centrally_managed: dict[str, tuple[str, Path]] | None = None,
) -> tuple[list[NormalizationResult], list[tuple[str, str, Path]]]:
"""
Calculate which packages need version normalization.
Args:
packages: Package usage data
exclude_packages: Package names to exclude
centrally_managed: Packages managed in Directory.Build.props (auto-excluded)
Returns:
Tuple of (normalization results, list of centrally managed packages that were skipped)
"""
results: list[NormalizationResult] = []
centrally_skipped: list[tuple[str, str, Path]] = []
if centrally_managed is None:
centrally_managed = {}
for package_name, usage in sorted(packages.items()):
# Skip centrally managed packages
if package_name in centrally_managed:
version, props_file = centrally_managed[package_name]
centrally_skipped.append((package_name, version, props_file))
logger.debug(f"Skipping centrally managed package: {package_name} (v{version} in {props_file})")
continue
if package_name in exclude_packages:
logger.debug(f"Excluding package: {package_name}")
continue
versions = usage.get_all_versions()
# Skip if only one version
if len(versions) <= 1:
continue
# Check if any versions are wildcards or unparseable
parseable_versions = [v for v in versions if parse_version(v) is not None]
if not parseable_versions:
results.append(
NormalizationResult(
package_name=package_name,
target_version="",
skipped_reason="No parseable versions found",
)
)
continue
# Select latest stable version
target_version = select_latest_stable(parseable_versions)
if target_version is None:
# Try to find any version (including prereleases)
parsed = [
(parse_version(v), v)
for v in parseable_versions
if parse_version(v) is not None
]
if parsed:
parsed.sort(key=lambda x: x[0], reverse=True)
target_version = parsed[0][1]
results.append(
NormalizationResult(
package_name=package_name,
target_version=target_version,
skipped_reason="Only prerelease versions available",
)
)
continue
else:
results.append(
NormalizationResult(
package_name=package_name,
target_version="",
skipped_reason="No stable versions found",
)
)
continue
# Create normalization result with changes
result = NormalizationResult(
package_name=package_name,
target_version=target_version,
)
for csproj_path, current_version in usage.usages.items():
if current_version != target_version:
result.changes.append(
NormalizationChange(
csproj_path=csproj_path,
old_version=current_version,
new_version=target_version,
)
)
if result.changes:
results.append(result)
return results, centrally_skipped
def apply_normalizations(
normalizations: list[NormalizationResult],
dry_run: bool = False,
) -> int:
"""
Apply version normalizations to csproj files.
Args:
normalizations: List of normalization results
dry_run: If True, don't actually modify files
Returns:
Number of files modified
"""
files_modified: set[Path] = set()
for result in normalizations:
if result.skipped_reason:
continue
for change in result.changes:
csproj_path = change.csproj_path
if dry_run:
logger.info(
f"Would update {result.package_name} in {csproj_path.name}: "
f"{change.old_version} -> {change.new_version}"
)
files_modified.add(csproj_path)
continue
try:
content = csproj_path.read_text(encoding="utf-8")
# Replace the specific package version
# Pattern matches the PackageReference for this specific package
pattern = re.compile(
rf'(<PackageReference\s+[^>]*Include\s*=\s*"{re.escape(result.package_name)}"'
rf'[^>]*Version\s*=\s*"){re.escape(change.old_version)}(")',
re.IGNORECASE,
)
new_content, count = pattern.subn(
rf"\g<1>{change.new_version}\g<2>",
content,
)
if count > 0:
csproj_path.write_text(new_content, encoding="utf-8")
files_modified.add(csproj_path)
logger.info(
f"Updated {result.package_name} in {csproj_path.name}: "
f"{change.old_version} -> {change.new_version}"
)
else:
# Try alternative pattern
pattern_alt = re.compile(
rf'(<PackageReference\s+[^>]*Version\s*=\s*"){re.escape(change.old_version)}"'
rf'([^>]*Include\s*=\s*"{re.escape(result.package_name)}")',
re.IGNORECASE,
)
new_content, count = pattern_alt.subn(
rf'\g<1>{change.new_version}"\g<2>',
content,
)
if count > 0:
csproj_path.write_text(new_content, encoding="utf-8")
files_modified.add(csproj_path)
logger.info(
f"Updated {result.package_name} in {csproj_path.name}: "
f"{change.old_version} -> {change.new_version}"
)
else:
logger.warning(
f"Could not find pattern to update {result.package_name} "
f"in {csproj_path}"
)
except Exception as e:
logger.error(f"Failed to update {csproj_path}: {e}")
return len(files_modified)
def generate_report(
packages: dict[str, PackageUsage],
normalizations: list[NormalizationResult],
centrally_skipped: list[tuple[str, str, Path]] | None = None,
) -> dict:
"""
Generate a JSON report of the normalization.
Args:
packages: Package usage data
normalizations: Normalization results
centrally_skipped: Packages skipped due to central management
Returns:
Report dictionary
"""
if centrally_skipped is None:
centrally_skipped = []
# Count changes
packages_normalized = sum(
1 for n in normalizations if n.changes and not n.skipped_reason
)
files_modified = len(
set(
change.csproj_path
for n in normalizations
for change in n.changes
if not n.skipped_reason
)
)
report = {
"timestamp": datetime.now(timezone.utc).isoformat(),
"summary": {
"packages_scanned": len(packages),
"packages_with_inconsistencies": len(normalizations),
"packages_normalized": packages_normalized,
"files_modified": files_modified,
"packages_centrally_managed": len(centrally_skipped),
},
"normalizations": [],
"skipped": [],
"centrally_managed": [],
}
for result in normalizations:
if result.skipped_reason:
report["skipped"].append(
{
"package": result.package_name,
"reason": result.skipped_reason,
"versions": packages[result.package_name].get_all_versions()
if result.package_name in packages
else [],
}
)
elif result.changes:
report["normalizations"].append(
{
"package": result.package_name,
"target_version": result.target_version,
"changes": [
{
"file": str(change.csproj_path),
"old": change.old_version,
"new": change.new_version,
}
for change in result.changes
],
}
)
# Add centrally managed packages
for package_name, version, props_file in centrally_skipped:
report["centrally_managed"].append(
{
"package": package_name,
"version": version,
"managed_in": str(props_file),
}
)
return report
def print_summary(
packages: dict[str, PackageUsage],
normalizations: list[NormalizationResult],
centrally_skipped: list[tuple[str, str, Path]],
dry_run: bool,
) -> None:
"""Print a summary of the normalization."""
print("\n" + "=" * 60)
print("NuGet Version Normalization Summary")
print("=" * 60)
changes_needed = [n for n in normalizations if n.changes and not n.skipped_reason]
skipped = [n for n in normalizations if n.skipped_reason]
print(f"\nPackages scanned: {len(packages)}")
print(f"Packages with version inconsistencies: {len(normalizations)}")
print(f"Packages to normalize: {len(changes_needed)}")
print(f"Packages skipped (other reasons): {len(skipped)}")
print(f"Packages centrally managed (auto-skipped): {len(centrally_skipped)}")
if centrally_skipped:
print("\nCentrally managed packages (in Directory.Build.props):")
for package_name, version, props_file in sorted(centrally_skipped, key=lambda x: x[0]):
rel_path = props_file.name if len(str(props_file)) > 50 else props_file
print(f" {package_name}: v{version} ({rel_path})")
if changes_needed:
print("\nPackages to normalize:")
for result in sorted(changes_needed, key=lambda x: x.package_name):
old_versions = set(c.old_version for c in result.changes)
print(
f" {result.package_name}: {', '.join(sorted(old_versions))} -> {result.target_version}"
)
if skipped and logger.isEnabledFor(logging.DEBUG):
print("\nSkipped packages:")
for result in sorted(skipped, key=lambda x: x.package_name):
print(f" {result.package_name}: {result.skipped_reason}")
if dry_run:
print("\n[DRY RUN - No files were modified]")
def main() -> int:
"""Main entry point."""
parser = argparse.ArgumentParser(
description="Normalize NuGet package versions across all csproj files",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__,
)
parser.add_argument(
"--src-root",
type=Path,
default=Path("src"),
help="Root of src/ directory (default: ./src)",
)
parser.add_argument(
"--repo-root",
type=Path,
default=None,
help="Root of repository for Directory.Build.props scanning (default: parent of src-root)",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Report without making changes",
)
parser.add_argument(
"--report",
type=Path,
help="Write JSON report to file",
)
parser.add_argument(
"--exclude",
action="append",
dest="exclude_packages",
default=[],
help="Exclude package from normalization (repeatable)",
)
parser.add_argument(
"--check",
action="store_true",
help="CI mode: exit 1 if normalization needed",
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="Verbose output",
)
args = parser.parse_args()
setup_logging(args.verbose)
# Resolve src root
src_root = args.src_root.resolve()
if not src_root.exists():
logger.error(f"Source root does not exist: {src_root}")
return 1
# Resolve repo root (for Directory.Build.props scanning)
repo_root = args.repo_root.resolve() if args.repo_root else src_root.parent
if not repo_root.exists():
logger.error(f"Repository root does not exist: {repo_root}")
return 1
logger.info(f"Source root: {src_root}")
logger.info(f"Repository root: {repo_root}")
# Scan for centrally managed packages in Directory.Build.props
centrally_managed = scan_centrally_managed_packages(repo_root)
# Scan all packages
packages = scan_all_packages(src_root)
if not packages:
logger.info("No packages found")
return 0
# Calculate normalizations (excluding centrally managed packages)
exclude_set = set(args.exclude_packages)
normalizations, centrally_skipped = calculate_normalizations(
packages, exclude_set, centrally_managed
)
# Generate report
report = generate_report(packages, normalizations, centrally_skipped)
# Write report if requested
if args.report:
try:
args.report.write_text(
json.dumps(report, indent=2, default=str),
encoding="utf-8",
)
logger.info(f"Report written to: {args.report}")
except Exception as e:
logger.error(f"Failed to write report: {e}")
# Print summary
print_summary(packages, normalizations, centrally_skipped, args.dry_run or args.check)
# Check mode - just report if normalization is needed
if args.check:
changes_needed = [n for n in normalizations if n.changes and not n.skipped_reason]
if changes_needed:
logger.error("Version normalization needed")
return 1
logger.info("All package versions are consistent")
return 0
# Apply normalizations
if not args.dry_run:
files_modified = apply_normalizations(normalizations, dry_run=False)
print(f"\nModified {files_modified} files")
else:
apply_normalizations(normalizations, dry_run=True)
return 0
if __name__ == "__main__":
sys.exit(main())