Files
git.stella-ops.org/tools/slntools/lib/csproj_parser.py
StellaOps Bot cec4265a40 save progress
2025-12-28 01:40:52 +02:00

275 lines
7.4 KiB
Python

"""
Csproj file parsing utilities.
Provides functions to:
- Find all .csproj files in a directory tree
- Parse csproj files to extract project references and package references
- Generate deterministic GUIDs for projects
"""
import hashlib
import logging
import xml.etree.ElementTree as ET
from pathlib import Path
from typing import Optional
from .models import CsprojProject
logger = logging.getLogger(__name__)
# Default patterns to exclude when scanning for csproj files
DEFAULT_EXCLUDE_DIRS = {
"bin",
"obj",
"node_modules",
".git",
".vs",
".idea",
"third_party",
"packages",
".nuget",
".cache",
}
# Default file patterns to exclude (test fixtures, samples, etc.)
DEFAULT_EXCLUDE_PATTERNS = {
"*.Tests.Fixtures",
"*.Samples",
}
def get_deterministic_guid(path: Path, base_path: Optional[Path] = None) -> str:
"""
Generate a deterministic GUID from a path.
Uses SHA256 hash of the relative path to ensure consistency across runs.
Args:
path: Path to generate GUID for
base_path: Base path to calculate relative path from (optional)
Returns:
GUID string in uppercase format (e.g., "XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX")
"""
if base_path:
try:
rel_path = path.relative_to(base_path)
except ValueError:
rel_path = path
else:
rel_path = path
# Normalize path separators and convert to lowercase for consistency
normalized = str(rel_path).replace("\\", "/").lower()
# Generate SHA256 hash
hash_bytes = hashlib.sha256(normalized.encode("utf-8")).digest()
# Format as GUID (use first 16 bytes)
guid_hex = hash_bytes[:16].hex().upper()
guid = f"{guid_hex[:8]}-{guid_hex[8:12]}-{guid_hex[12:16]}-{guid_hex[16:20]}-{guid_hex[20:32]}"
return guid
def find_all_csproj(
root_dir: Path,
exclude_dirs: Optional[set[str]] = None,
exclude_patterns: Optional[set[str]] = None,
) -> list[Path]:
"""
Find all .csproj files under a directory.
Args:
root_dir: Root directory to search
exclude_dirs: Directory names to exclude (defaults to bin, obj, etc.)
exclude_patterns: File name patterns to exclude
Returns:
List of absolute paths to .csproj files, sorted by path
"""
if exclude_dirs is None:
exclude_dirs = DEFAULT_EXCLUDE_DIRS
if exclude_patterns is None:
exclude_patterns = DEFAULT_EXCLUDE_PATTERNS
csproj_files: list[Path] = []
if not root_dir.exists():
logger.warning(f"Directory does not exist: {root_dir}")
return csproj_files
for item in root_dir.rglob("*.csproj"):
# Check if any parent directory should be excluded
skip = False
for parent in item.parents:
if parent.name in exclude_dirs:
skip = True
break
if skip:
continue
# Check file name patterns
for pattern in exclude_patterns:
if item.match(pattern):
skip = True
break
if skip:
continue
csproj_files.append(item.resolve())
return sorted(csproj_files)
def parse_csproj(
csproj_path: Path,
base_path: Optional[Path] = None,
) -> Optional[CsprojProject]:
"""
Parse a .csproj file and extract project information.
Args:
csproj_path: Path to the .csproj file
base_path: Base path for generating deterministic GUID
Returns:
CsprojProject with parsed information, or None if parsing fails
"""
if not csproj_path.exists():
logger.error(f"Csproj file does not exist: {csproj_path}")
return None
try:
tree = ET.parse(csproj_path)
root = tree.getroot()
except ET.ParseError as e:
logger.error(f"Failed to parse XML in {csproj_path}: {e}")
return None
# Extract project name from file name
name = csproj_path.stem
# Generate deterministic GUID
guid = get_deterministic_guid(csproj_path, base_path)
# Parse project references
project_references = _parse_project_references(root, csproj_path)
# Parse package references
package_references = _parse_package_references(root)
return CsprojProject(
path=csproj_path.resolve(),
name=name,
guid=guid,
project_references=project_references,
package_references=package_references,
)
def _parse_project_references(root: ET.Element, csproj_path: Path) -> list[Path]:
"""
Parse ProjectReference elements from csproj XML.
Args:
root: XML root element
csproj_path: Path to the csproj file (for resolving relative paths)
Returns:
List of resolved absolute paths to referenced projects
"""
references: list[Path] = []
csproj_dir = csproj_path.parent
# Handle both with and without namespace
for ref in root.iter():
if ref.tag.endswith("ProjectReference") or ref.tag == "ProjectReference":
include = ref.get("Include")
if include:
# Normalize path separators
include = include.replace("\\", "/")
# Resolve relative path
try:
ref_path = (csproj_dir / include).resolve()
if ref_path.exists():
references.append(ref_path)
else:
logger.warning(
f"Referenced project does not exist: {include} (from {csproj_path})"
)
except Exception as e:
logger.warning(f"Failed to resolve path {include}: {e}")
return references
def _parse_package_references(root: ET.Element) -> dict[str, str]:
"""
Parse PackageReference elements from csproj XML.
Args:
root: XML root element
Returns:
Dictionary mapping package name to version string
"""
packages: dict[str, str] = {}
for ref in root.iter():
if ref.tag.endswith("PackageReference") or ref.tag == "PackageReference":
include = ref.get("Include")
version = ref.get("Version")
if include and version:
packages[include] = version
elif include:
# Version might be in a child element
for child in ref:
if child.tag.endswith("Version") or child.tag == "Version":
if child.text:
packages[include] = child.text.strip()
break
return packages
def get_project_name_from_path(csproj_path: Path) -> str:
"""
Extract project name from csproj file path.
Args:
csproj_path: Path to csproj file
Returns:
Project name (file name without extension)
"""
return csproj_path.stem
def resolve_project_path(
include_path: str,
from_csproj: Path,
) -> Optional[Path]:
"""
Resolve a ProjectReference Include path to an absolute path.
Args:
include_path: The Include attribute value
from_csproj: The csproj file containing the reference
Returns:
Resolved absolute path, or None if resolution fails
"""
# Normalize path separators
include_path = include_path.replace("\\", "/")
try:
resolved = (from_csproj.parent / include_path).resolve()
return resolved if resolved.exists() else None
except Exception:
return None