#!/usr/bin/env python3 """ StellaOps NuGet Centralization Tool. Centralizes NuGet package versions to src/Directory.Build.props for packages used in multiple projects, and removes version attributes from individual .csproj files. This is the REVERSE of nuget_normalizer.py: - nuget_normalizer: keeps versions in csproj files, normalizes to latest stable - nuget_centralizer: moves shared packages to Directory.Build.props, removes versions from csproj Usage: python nuget_centralizer.py [OPTIONS] Options: --src-root PATH Root of src/ directory (default: ./src) --dry-run Report without making changes --report PATH Write JSON report to file --exclude PACKAGE Exclude package from centralization (repeatable) --min-usage N Minimum number of projects using a package to centralize it (default: 2) --check CI mode: exit 1 if centralization needed -v, --verbose Verbose output """ import argparse import json import logging import re import sys from datetime import datetime, timezone from pathlib import Path from typing import Dict, List, Set, Tuple from lib.csproj_parser import find_all_csproj from lib.models import PackageUsage from lib.version_utils import select_latest_stable, parse_version logger = logging.getLogger(__name__) def setup_logging(verbose: bool) -> None: """Configure logging based on verbosity.""" level = logging.DEBUG if verbose else logging.INFO logging.basicConfig( level=level, format="%(levelname)s: %(message)s", ) def scan_all_packages(src_root: Path) -> Dict[str, PackageUsage]: """ Scan all .csproj files and collect package references. Args: src_root: Root of src/ directory Returns: Dictionary mapping package name to PackageUsage """ packages: Dict[str, PackageUsage] = {} csproj_files = find_all_csproj(src_root) logger.info(f"Scanning {len(csproj_files)} .csproj files for package references") # Regex for PackageReference with Version package_ref_pattern = re.compile( r']*Include\s*=\s*"([^"]+)"[^>]*Version\s*=\s*"([^"]+)"', re.IGNORECASE, ) # Alternative pattern for when Version comes first package_ref_pattern_alt = re.compile( r']*Version\s*=\s*"([^"]+)"[^>]*Include\s*=\s*"([^"]+)"', re.IGNORECASE, ) for csproj_path in csproj_files: try: content = csproj_path.read_text(encoding="utf-8") except Exception as e: logger.warning(f"Failed to read {csproj_path}: {e}") continue # Find all PackageReference elements with versions for match in package_ref_pattern.finditer(content): package_name = match.group(1) version = match.group(2) if package_name not in packages: packages[package_name] = PackageUsage(package_name=package_name) packages[package_name].usages[csproj_path] = version # Also try alternative pattern for match in package_ref_pattern_alt.finditer(content): version = match.group(1) package_name = match.group(2) if package_name not in packages: packages[package_name] = PackageUsage(package_name=package_name) packages[package_name].usages[csproj_path] = version logger.info(f"Found {len(packages)} unique packages") return packages def find_packages_to_centralize( packages: Dict[str, PackageUsage], exclude_packages: Set[str], min_usage: int = 2, ) -> Dict[str, Tuple[str, List[Path]]]: """ Find packages that should be centralized. A package is centralized if: - It is used in at least min_usage projects - It is not in the exclude list - It has at least one parseable stable version Args: packages: Package usage data exclude_packages: Package names to exclude min_usage: Minimum number of projects using a package Returns: Dictionary mapping package name to (target_version, list of csproj paths) """ to_centralize: Dict[str, Tuple[str, List[Path]]] = {} for package_name, usage in sorted(packages.items()): # Skip excluded packages if package_name in exclude_packages: logger.debug(f"Excluding package: {package_name}") continue # Check if used in enough projects if len(usage.usages) < min_usage: logger.debug(f"Skipping {package_name}: only used in {len(usage.usages)} project(s)") continue # Get all versions and find latest stable versions = usage.get_all_versions() parseable_versions = [v for v in versions if parse_version(v) is not None] if not parseable_versions: logger.warning(f"Skipping {package_name}: no parseable versions") continue target_version = select_latest_stable(parseable_versions) if target_version is None: # Try to find any version (including prereleases) parsed = [ (parse_version(v), v) for v in parseable_versions if parse_version(v) is not None ] if parsed: parsed.sort(key=lambda x: x[0], reverse=True) target_version = parsed[0][1] logger.warning( f"Package {package_name}: using prerelease version {target_version} " "(no stable version found)" ) else: logger.warning(f"Skipping {package_name}: no valid versions found") continue # Add to centralization list csproj_paths = list(usage.usages.keys()) to_centralize[package_name] = (target_version, csproj_paths) logger.info( f"Will centralize {package_name} v{target_version} " f"(used in {len(csproj_paths)} projects)" ) return to_centralize def read_directory_build_props(props_path: Path) -> str: """ Read Directory.Build.props file. Args: props_path: Path to Directory.Build.props Returns: File content as string """ if props_path.exists(): return props_path.read_text(encoding="utf-8") else: # Create minimal Directory.Build.props return """ $(MSBuildThisFileDirectory)../.nuget/packages true false $(WarningsNotAsErrors);NU1900;NU1901;NU1902;NU1903;NU1904 """ def add_package_to_directory_props( content: str, package_name: str, version: str, ) -> str: """ Add or update a PackageReference in Directory.Build.props. Args: content: Current content of Directory.Build.props package_name: Package name version: Package version Returns: Updated content """ # Check if package already exists existing_pattern = re.compile( rf']*Version\s*=\s*"[^"]+"[^>]*/?>', re.IGNORECASE, ) if existing_pattern.search(content): # Update existing entry def replacer(match): # Preserve the format of the existing entry return f'' content = existing_pattern.sub(replacer, content) logger.debug(f"Updated existing entry for {package_name}") else: # Find or create the centralized packages ItemGroup # Look for the comment marker first itemgroup_pattern = re.compile( r'(\s*)', re.IGNORECASE, ) if itemgroup_pattern.search(content): # Add to existing centralized ItemGroup new_entry = f'\n ' content = itemgroup_pattern.sub(rf'\1{new_entry}', content) logger.debug(f"Added {package_name} to centralized ItemGroup") else: # Create new centralized ItemGroup section before project_end = re.compile(r'(\s*)', re.IGNORECASE) new_section = f''' ''' content = project_end.sub(rf'{new_section}\1', content) logger.debug(f"Created centralized ItemGroup with {package_name}") return content def update_directory_build_props( props_path: Path, packages_to_centralize: Dict[str, Tuple[str, List[Path]]], dry_run: bool = False, ) -> bool: """ Update Directory.Build.props with centralized package versions. Args: props_path: Path to Directory.Build.props packages_to_centralize: Packages to add dry_run: If True, don't write files Returns: True if successful """ if not packages_to_centralize: logger.info("No packages to centralize") return True content = read_directory_build_props(props_path) # Add or update each package for package_name, (version, _) in sorted(packages_to_centralize.items()): content = add_package_to_directory_props(content, package_name, version) # Sort the PackageReference entries alphabetically content = sort_package_references(content) if dry_run: logger.info(f"Would update {props_path}") return True try: props_path.write_text(content, encoding="utf-8") logger.info(f"Updated {props_path} with {len(packages_to_centralize)} centralized packages") return True except Exception as e: logger.error(f"Failed to write {props_path}: {e}") return False def sort_package_references(content: str) -> str: """ Sort PackageReference Update entries alphabetically. Args: content: XML content Returns: Content with sorted PackageReference entries """ # Find the centralized packages ItemGroup section itemgroup_pattern = re.compile( r'(\s*)(.*?)()', re.IGNORECASE | re.DOTALL, ) match = itemgroup_pattern.search(content) if not match: # No centralized section found, return as-is return content prefix = match.group(1) itemgroup_content = match.group(2) suffix = match.group(3) # Find all PackageReference Update entries in this section package_pattern = re.compile( r']*Version\s*=\s*"([^"]+)"[^>]*/?>', re.IGNORECASE, ) packages = package_pattern.findall(itemgroup_content) if not packages: # No packages found, return as-is return content # Sort by package name sorted_packages = sorted(packages, key=lambda x: x[0].lower()) # Rebuild the ItemGroup with sorted entries sorted_entries = '\n'.join( f' ' for pkg, ver in sorted_packages ) new_itemgroup = f'{prefix}\n{sorted_entries}\n {suffix}' # Replace the old ItemGroup with the sorted one content = itemgroup_pattern.sub(new_itemgroup, content) return content def remove_version_from_csproj( csproj_path: Path, package_name: str, dry_run: bool = False, ) -> bool: """ Remove Version attribute from a PackageReference in a .csproj file. Args: csproj_path: Path to .csproj file package_name: Package name dry_run: If True, don't write files Returns: True if successful """ try: content = csproj_path.read_text(encoding="utf-8") except Exception as e: logger.error(f"Failed to read {csproj_path}: {e}") return False # Pattern to match PackageReference with Version attribute and remove it # This pattern captures the opening tag, Include attribute, and any other attributes # Then removes the Version="..." attribute while preserving others # Pattern 1: pattern1 = re.compile( rf'(]*/>)', re.IGNORECASE, ) # Pattern 2: pattern2 = re.compile( rf'(]*?)\s+Version\s*=\s*"[^"]+"\s*([^/>]*/>)', re.IGNORECASE, ) # Pattern 3: pattern3 = re.compile( rf'(]*/>)', re.IGNORECASE, ) new_content = content # Try each pattern new_content = pattern1.sub(r'\1 \2', new_content) new_content = pattern2.sub(r'\1 \2', new_content) new_content = pattern3.sub(r'\1 \2', new_content) if new_content == content: logger.debug(f"No changes needed for {package_name} in {csproj_path.name}") return True if dry_run: logger.info(f"Would remove version from {package_name} in {csproj_path.name}") return True try: csproj_path.write_text(new_content, encoding="utf-8") logger.info(f"Removed version from {package_name} in {csproj_path.name}") return True except Exception as e: logger.error(f"Failed to write {csproj_path}: {e}") return False def apply_centralization( props_path: Path, packages_to_centralize: Dict[str, Tuple[str, List[Path]]], dry_run: bool = False, ) -> Tuple[int, int]: """ Apply centralization by updating Directory.Build.props and csproj files. Args: props_path: Path to Directory.Build.props packages_to_centralize: Packages to centralize dry_run: If True, don't write files Returns: Tuple of (packages centralized, csproj files modified) """ # Update Directory.Build.props if not update_directory_build_props(props_path, packages_to_centralize, dry_run): return 0, 0 # Remove versions from csproj files files_modified: Set[Path] = set() for package_name, (version, csproj_paths) in packages_to_centralize.items(): for csproj_path in csproj_paths: if remove_version_from_csproj(csproj_path, package_name, dry_run): files_modified.add(csproj_path) return len(packages_to_centralize), len(files_modified) def generate_report( packages: Dict[str, PackageUsage], packages_to_centralize: Dict[str, Tuple[str, List[Path]]], ) -> dict: """ Generate a JSON report of the centralization. Args: packages: All package usage data packages_to_centralize: Packages to centralize Returns: Report dictionary """ csproj_files_affected = set() for _, csproj_paths in packages_to_centralize.values(): csproj_files_affected.update(csproj_paths) report = { "timestamp": datetime.now(timezone.utc).isoformat(), "summary": { "packages_scanned": len(packages), "packages_to_centralize": len(packages_to_centralize), "csproj_files_affected": len(csproj_files_affected), }, "centralized_packages": [], } for package_name, (version, csproj_paths) in sorted(packages_to_centralize.items()): report["centralized_packages"].append( { "package": package_name, "version": version, "usage_count": len(csproj_paths), "files": [str(p) for p in csproj_paths], } ) return report def print_summary( packages: Dict[str, PackageUsage], packages_to_centralize: Dict[str, Tuple[str, List[Path]]], dry_run: bool, ) -> None: """Print a summary of the centralization.""" print("\n" + "=" * 60) print("NuGet Package Centralization Summary") print("=" * 60) csproj_files_affected = set() for _, csproj_paths in packages_to_centralize.values(): csproj_files_affected.update(csproj_paths) print(f"\nPackages scanned: {len(packages)}") print(f"Packages to centralize: {len(packages_to_centralize)}") print(f"Project files affected: {len(csproj_files_affected)}") if packages_to_centralize: print("\nPackages to centralize to Directory.Build.props:") for package_name, (version, csproj_paths) in sorted(packages_to_centralize.items()): print(f" {package_name}: v{version} (used in {len(csproj_paths)} projects)") if dry_run: print("\n[DRY RUN - No files were modified]") def main() -> int: """Main entry point.""" parser = argparse.ArgumentParser( description="Centralize NuGet package versions to Directory.Build.props", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=__doc__, ) parser.add_argument( "--src-root", type=Path, default=Path("src"), help="Root of src/ directory (default: ./src)", ) parser.add_argument( "--dry-run", action="store_true", help="Report without making changes", ) parser.add_argument( "--report", type=Path, help="Write JSON report to file", ) parser.add_argument( "--exclude", action="append", dest="exclude_packages", default=[], help="Exclude package from centralization (repeatable)", ) parser.add_argument( "--min-usage", type=int, default=2, help="Minimum number of projects using a package to centralize it (default: 2)", ) parser.add_argument( "--check", action="store_true", help="CI mode: exit 1 if centralization needed", ) parser.add_argument( "-v", "--verbose", action="store_true", help="Verbose output", ) args = parser.parse_args() setup_logging(args.verbose) # Resolve src root src_root = args.src_root.resolve() if not src_root.exists(): logger.error(f"Source root does not exist: {src_root}") return 1 logger.info(f"Source root: {src_root}") # Path to Directory.Build.props props_path = src_root / "Directory.Build.props" # Scan all packages packages = scan_all_packages(src_root) if not packages: logger.info("No packages found") return 0 # Find packages to centralize exclude_set = set(args.exclude_packages) packages_to_centralize = find_packages_to_centralize( packages, exclude_set, args.min_usage ) # Generate report report = generate_report(packages, packages_to_centralize) # Write report if requested if args.report: try: args.report.write_text( json.dumps(report, indent=2, default=str), encoding="utf-8", ) logger.info(f"Report written to: {args.report}") except Exception as e: logger.error(f"Failed to write report: {e}") # Print summary print_summary(packages, packages_to_centralize, args.dry_run or args.check) # Check mode - just report if centralization is needed if args.check: if packages_to_centralize: logger.error("Package centralization needed") return 1 logger.info("All shared packages are already centralized") return 0 # Apply centralization if not args.dry_run: packages_count, files_count = apply_centralization( props_path, packages_to_centralize, dry_run=False ) print(f"\nCentralized {packages_count} packages, modified {files_count} files") else: apply_centralization(props_path, packages_to_centralize, dry_run=True) return 0 if __name__ == "__main__": sys.exit(main())