Files
git.stella-ops.org/tools/slntools/nuget_centralizer.py
StellaOps Bot e6c47c8f50 save progress
2025-12-28 23:49:56 +02:00

649 lines
20 KiB
Python

#!/usr/bin/env python3
"""
StellaOps NuGet Centralization Tool.
Centralizes NuGet package versions to src/Directory.Build.props for packages
used in multiple projects, and removes version attributes from individual .csproj files.
This is the REVERSE of nuget_normalizer.py:
- nuget_normalizer: keeps versions in csproj files, normalizes to latest stable
- nuget_centralizer: moves shared packages to Directory.Build.props, removes versions from csproj
Usage:
python nuget_centralizer.py [OPTIONS]
Options:
--src-root PATH Root of src/ directory (default: ./src)
--dry-run Report without making changes
--report PATH Write JSON report to file
--exclude PACKAGE Exclude package from centralization (repeatable)
--min-usage N Minimum number of projects using a package to centralize it (default: 2)
--check CI mode: exit 1 if centralization needed
-v, --verbose Verbose output
"""
import argparse
import json
import logging
import re
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Dict, List, Set, Tuple
from lib.csproj_parser import find_all_csproj
from lib.models import PackageUsage
from lib.version_utils import select_latest_stable, parse_version
logger = logging.getLogger(__name__)
def setup_logging(verbose: bool) -> None:
"""Configure logging based on verbosity."""
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(
level=level,
format="%(levelname)s: %(message)s",
)
def scan_all_packages(src_root: Path) -> Dict[str, PackageUsage]:
"""
Scan all .csproj files and collect package references.
Args:
src_root: Root of src/ directory
Returns:
Dictionary mapping package name to PackageUsage
"""
packages: Dict[str, PackageUsage] = {}
csproj_files = find_all_csproj(src_root)
logger.info(f"Scanning {len(csproj_files)} .csproj files for package references")
# Regex for PackageReference with Version
package_ref_pattern = re.compile(
r'<PackageReference\s+[^>]*Include\s*=\s*"([^"]+)"[^>]*Version\s*=\s*"([^"]+)"',
re.IGNORECASE,
)
# Alternative pattern for when Version comes first
package_ref_pattern_alt = re.compile(
r'<PackageReference\s+[^>]*Version\s*=\s*"([^"]+)"[^>]*Include\s*=\s*"([^"]+)"',
re.IGNORECASE,
)
for csproj_path in csproj_files:
try:
content = csproj_path.read_text(encoding="utf-8")
except Exception as e:
logger.warning(f"Failed to read {csproj_path}: {e}")
continue
# Find all PackageReference elements with versions
for match in package_ref_pattern.finditer(content):
package_name = match.group(1)
version = match.group(2)
if package_name not in packages:
packages[package_name] = PackageUsage(package_name=package_name)
packages[package_name].usages[csproj_path] = version
# Also try alternative pattern
for match in package_ref_pattern_alt.finditer(content):
version = match.group(1)
package_name = match.group(2)
if package_name not in packages:
packages[package_name] = PackageUsage(package_name=package_name)
packages[package_name].usages[csproj_path] = version
logger.info(f"Found {len(packages)} unique packages")
return packages
def find_packages_to_centralize(
packages: Dict[str, PackageUsage],
exclude_packages: Set[str],
min_usage: int = 2,
) -> Dict[str, Tuple[str, List[Path]]]:
"""
Find packages that should be centralized.
A package is centralized if:
- It is used in at least min_usage projects
- It is not in the exclude list
- It has at least one parseable stable version
Args:
packages: Package usage data
exclude_packages: Package names to exclude
min_usage: Minimum number of projects using a package
Returns:
Dictionary mapping package name to (target_version, list of csproj paths)
"""
to_centralize: Dict[str, Tuple[str, List[Path]]] = {}
for package_name, usage in sorted(packages.items()):
# Skip excluded packages
if package_name in exclude_packages:
logger.debug(f"Excluding package: {package_name}")
continue
# Check if used in enough projects
if len(usage.usages) < min_usage:
logger.debug(f"Skipping {package_name}: only used in {len(usage.usages)} project(s)")
continue
# Get all versions and find latest stable
versions = usage.get_all_versions()
parseable_versions = [v for v in versions if parse_version(v) is not None]
if not parseable_versions:
logger.warning(f"Skipping {package_name}: no parseable versions")
continue
target_version = select_latest_stable(parseable_versions)
if target_version is None:
# Try to find any version (including prereleases)
parsed = [
(parse_version(v), v)
for v in parseable_versions
if parse_version(v) is not None
]
if parsed:
parsed.sort(key=lambda x: x[0], reverse=True)
target_version = parsed[0][1]
logger.warning(
f"Package {package_name}: using prerelease version {target_version} "
"(no stable version found)"
)
else:
logger.warning(f"Skipping {package_name}: no valid versions found")
continue
# Add to centralization list
csproj_paths = list(usage.usages.keys())
to_centralize[package_name] = (target_version, csproj_paths)
logger.info(
f"Will centralize {package_name} v{target_version} "
f"(used in {len(csproj_paths)} projects)"
)
return to_centralize
def read_directory_build_props(props_path: Path) -> str:
"""
Read Directory.Build.props file.
Args:
props_path: Path to Directory.Build.props
Returns:
File content as string
"""
if props_path.exists():
return props_path.read_text(encoding="utf-8")
else:
# Create minimal Directory.Build.props
return """<Project>
<PropertyGroup>
<!-- Centralize NuGet package cache to prevent directory sprawl -->
<RestorePackagesPath>$(MSBuildThisFileDirectory)../.nuget/packages</RestorePackagesPath>
<DisableImplicitNuGetFallbackFolder>true</DisableImplicitNuGetFallbackFolder>
<!-- Disable NuGet audit to prevent build failures when mirrors are unreachable -->
<NuGetAudit>false</NuGetAudit>
<WarningsNotAsErrors>$(WarningsNotAsErrors);NU1900;NU1901;NU1902;NU1903;NU1904</WarningsNotAsErrors>
</PropertyGroup>
<!-- Centralized NuGet package versions -->
<ItemGroup>
</ItemGroup>
</Project>
"""
def add_package_to_directory_props(
content: str,
package_name: str,
version: str,
) -> str:
"""
Add or update a PackageReference in Directory.Build.props.
Args:
content: Current content of Directory.Build.props
package_name: Package name
version: Package version
Returns:
Updated content
"""
# Check if package already exists
existing_pattern = re.compile(
rf'<PackageReference\s+Update\s*=\s*"{re.escape(package_name)}"[^>]*Version\s*=\s*"[^"]+"[^>]*/?>',
re.IGNORECASE,
)
if existing_pattern.search(content):
# Update existing entry
def replacer(match):
# Preserve the format of the existing entry
return f'<PackageReference Update="{package_name}" Version="{version}" />'
content = existing_pattern.sub(replacer, content)
logger.debug(f"Updated existing entry for {package_name}")
else:
# Find or create the centralized packages ItemGroup
# Look for the comment marker first
itemgroup_pattern = re.compile(
r'(<!-- Centralized NuGet package versions -->\s*<ItemGroup>)',
re.IGNORECASE,
)
if itemgroup_pattern.search(content):
# Add to existing centralized ItemGroup
new_entry = f'\n <PackageReference Update="{package_name}" Version="{version}" />'
content = itemgroup_pattern.sub(rf'\1{new_entry}', content)
logger.debug(f"Added {package_name} to centralized ItemGroup")
else:
# Create new centralized ItemGroup section before </Project>
project_end = re.compile(r'(\s*</Project>)', re.IGNORECASE)
new_section = f'''
<!-- Centralized NuGet package versions -->
<ItemGroup>
<PackageReference Update="{package_name}" Version="{version}" />
</ItemGroup>
'''
content = project_end.sub(rf'{new_section}\1', content)
logger.debug(f"Created centralized ItemGroup with {package_name}")
return content
def update_directory_build_props(
props_path: Path,
packages_to_centralize: Dict[str, Tuple[str, List[Path]]],
dry_run: bool = False,
) -> bool:
"""
Update Directory.Build.props with centralized package versions.
Args:
props_path: Path to Directory.Build.props
packages_to_centralize: Packages to add
dry_run: If True, don't write files
Returns:
True if successful
"""
if not packages_to_centralize:
logger.info("No packages to centralize")
return True
content = read_directory_build_props(props_path)
# Add or update each package
for package_name, (version, _) in sorted(packages_to_centralize.items()):
content = add_package_to_directory_props(content, package_name, version)
# Sort the PackageReference entries alphabetically
content = sort_package_references(content)
if dry_run:
logger.info(f"Would update {props_path}")
return True
try:
props_path.write_text(content, encoding="utf-8")
logger.info(f"Updated {props_path} with {len(packages_to_centralize)} centralized packages")
return True
except Exception as e:
logger.error(f"Failed to write {props_path}: {e}")
return False
def sort_package_references(content: str) -> str:
"""
Sort PackageReference Update entries alphabetically.
Args:
content: XML content
Returns:
Content with sorted PackageReference entries
"""
# Find the centralized packages ItemGroup section
itemgroup_pattern = re.compile(
r'(<!-- Centralized NuGet package versions -->\s*<ItemGroup>)(.*?)(</ItemGroup>)',
re.IGNORECASE | re.DOTALL,
)
match = itemgroup_pattern.search(content)
if not match:
# No centralized section found, return as-is
return content
prefix = match.group(1)
itemgroup_content = match.group(2)
suffix = match.group(3)
# Find all PackageReference Update entries in this section
package_pattern = re.compile(
r'<PackageReference\s+Update\s*=\s*"([^"]+)"[^>]*Version\s*=\s*"([^"]+)"[^>]*/?>',
re.IGNORECASE,
)
packages = package_pattern.findall(itemgroup_content)
if not packages:
# No packages found, return as-is
return content
# Sort by package name
sorted_packages = sorted(packages, key=lambda x: x[0].lower())
# Rebuild the ItemGroup with sorted entries
sorted_entries = '\n'.join(
f' <PackageReference Update="{pkg}" Version="{ver}" />'
for pkg, ver in sorted_packages
)
new_itemgroup = f'{prefix}\n{sorted_entries}\n {suffix}'
# Replace the old ItemGroup with the sorted one
content = itemgroup_pattern.sub(new_itemgroup, content)
return content
def remove_version_from_csproj(
csproj_path: Path,
package_name: str,
dry_run: bool = False,
) -> bool:
"""
Remove Version attribute from a PackageReference in a .csproj file.
Args:
csproj_path: Path to .csproj file
package_name: Package name
dry_run: If True, don't write files
Returns:
True if successful
"""
try:
content = csproj_path.read_text(encoding="utf-8")
except Exception as e:
logger.error(f"Failed to read {csproj_path}: {e}")
return False
# Pattern to match PackageReference with Version attribute and remove it
# This pattern captures the opening tag, Include attribute, and any other attributes
# Then removes the Version="..." attribute while preserving others
# Pattern 1: <PackageReference Include="..." Version="..." ... />
pattern1 = re.compile(
rf'(<PackageReference\s+Include\s*=\s*"{re.escape(package_name)}")\s+Version\s*=\s*"[^"]+"\s*([^/>]*/>)',
re.IGNORECASE,
)
# Pattern 2: <PackageReference Include="..." attr="..." Version="..." ... />
pattern2 = re.compile(
rf'(<PackageReference\s+Include\s*=\s*"{re.escape(package_name)}"[^>]*?)\s+Version\s*=\s*"[^"]+"\s*([^/>]*/>)',
re.IGNORECASE,
)
# Pattern 3: <PackageReference Version="..." Include="..." ... />
pattern3 = re.compile(
rf'(<PackageReference)\s+Version\s*=\s*"[^"]+"\s+(Include\s*=\s*"{re.escape(package_name)}"[^/>]*/>)',
re.IGNORECASE,
)
new_content = content
# Try each pattern
new_content = pattern1.sub(r'\1 \2', new_content)
new_content = pattern2.sub(r'\1 \2', new_content)
new_content = pattern3.sub(r'\1 \2', new_content)
if new_content == content:
logger.debug(f"No changes needed for {package_name} in {csproj_path.name}")
return True
if dry_run:
logger.info(f"Would remove version from {package_name} in {csproj_path.name}")
return True
try:
csproj_path.write_text(new_content, encoding="utf-8")
logger.info(f"Removed version from {package_name} in {csproj_path.name}")
return True
except Exception as e:
logger.error(f"Failed to write {csproj_path}: {e}")
return False
def apply_centralization(
props_path: Path,
packages_to_centralize: Dict[str, Tuple[str, List[Path]]],
dry_run: bool = False,
) -> Tuple[int, int]:
"""
Apply centralization by updating Directory.Build.props and csproj files.
Args:
props_path: Path to Directory.Build.props
packages_to_centralize: Packages to centralize
dry_run: If True, don't write files
Returns:
Tuple of (packages centralized, csproj files modified)
"""
# Update Directory.Build.props
if not update_directory_build_props(props_path, packages_to_centralize, dry_run):
return 0, 0
# Remove versions from csproj files
files_modified: Set[Path] = set()
for package_name, (version, csproj_paths) in packages_to_centralize.items():
for csproj_path in csproj_paths:
if remove_version_from_csproj(csproj_path, package_name, dry_run):
files_modified.add(csproj_path)
return len(packages_to_centralize), len(files_modified)
def generate_report(
packages: Dict[str, PackageUsage],
packages_to_centralize: Dict[str, Tuple[str, List[Path]]],
) -> dict:
"""
Generate a JSON report of the centralization.
Args:
packages: All package usage data
packages_to_centralize: Packages to centralize
Returns:
Report dictionary
"""
csproj_files_affected = set()
for _, csproj_paths in packages_to_centralize.values():
csproj_files_affected.update(csproj_paths)
report = {
"timestamp": datetime.now(timezone.utc).isoformat(),
"summary": {
"packages_scanned": len(packages),
"packages_to_centralize": len(packages_to_centralize),
"csproj_files_affected": len(csproj_files_affected),
},
"centralized_packages": [],
}
for package_name, (version, csproj_paths) in sorted(packages_to_centralize.items()):
report["centralized_packages"].append(
{
"package": package_name,
"version": version,
"usage_count": len(csproj_paths),
"files": [str(p) for p in csproj_paths],
}
)
return report
def print_summary(
packages: Dict[str, PackageUsage],
packages_to_centralize: Dict[str, Tuple[str, List[Path]]],
dry_run: bool,
) -> None:
"""Print a summary of the centralization."""
print("\n" + "=" * 60)
print("NuGet Package Centralization Summary")
print("=" * 60)
csproj_files_affected = set()
for _, csproj_paths in packages_to_centralize.values():
csproj_files_affected.update(csproj_paths)
print(f"\nPackages scanned: {len(packages)}")
print(f"Packages to centralize: {len(packages_to_centralize)}")
print(f"Project files affected: {len(csproj_files_affected)}")
if packages_to_centralize:
print("\nPackages to centralize to Directory.Build.props:")
for package_name, (version, csproj_paths) in sorted(packages_to_centralize.items()):
print(f" {package_name}: v{version} (used in {len(csproj_paths)} projects)")
if dry_run:
print("\n[DRY RUN - No files were modified]")
def main() -> int:
"""Main entry point."""
parser = argparse.ArgumentParser(
description="Centralize NuGet package versions to Directory.Build.props",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__,
)
parser.add_argument(
"--src-root",
type=Path,
default=Path("src"),
help="Root of src/ directory (default: ./src)",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Report without making changes",
)
parser.add_argument(
"--report",
type=Path,
help="Write JSON report to file",
)
parser.add_argument(
"--exclude",
action="append",
dest="exclude_packages",
default=[],
help="Exclude package from centralization (repeatable)",
)
parser.add_argument(
"--min-usage",
type=int,
default=2,
help="Minimum number of projects using a package to centralize it (default: 2)",
)
parser.add_argument(
"--check",
action="store_true",
help="CI mode: exit 1 if centralization needed",
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="Verbose output",
)
args = parser.parse_args()
setup_logging(args.verbose)
# Resolve src root
src_root = args.src_root.resolve()
if not src_root.exists():
logger.error(f"Source root does not exist: {src_root}")
return 1
logger.info(f"Source root: {src_root}")
# Path to Directory.Build.props
props_path = src_root / "Directory.Build.props"
# Scan all packages
packages = scan_all_packages(src_root)
if not packages:
logger.info("No packages found")
return 0
# Find packages to centralize
exclude_set = set(args.exclude_packages)
packages_to_centralize = find_packages_to_centralize(
packages, exclude_set, args.min_usage
)
# Generate report
report = generate_report(packages, packages_to_centralize)
# Write report if requested
if args.report:
try:
args.report.write_text(
json.dumps(report, indent=2, default=str),
encoding="utf-8",
)
logger.info(f"Report written to: {args.report}")
except Exception as e:
logger.error(f"Failed to write report: {e}")
# Print summary
print_summary(packages, packages_to_centralize, args.dry_run or args.check)
# Check mode - just report if centralization is needed
if args.check:
if packages_to_centralize:
logger.error("Package centralization needed")
return 1
logger.info("All shared packages are already centralized")
return 0
# Apply centralization
if not args.dry_run:
packages_count, files_count = apply_centralization(
props_path, packages_to_centralize, dry_run=False
)
print(f"\nCentralized {packages_count} packages, modified {files_count} files")
else:
apply_centralization(props_path, packages_to_centralize, dry_run=True)
return 0
if __name__ == "__main__":
sys.exit(main())