""" Csproj file parsing utilities. Provides functions to: - Find all .csproj files in a directory tree - Parse csproj files to extract project references and package references - Generate deterministic GUIDs for projects """ import hashlib import logging import xml.etree.ElementTree as ET from pathlib import Path from typing import Optional from .models import CsprojProject logger = logging.getLogger(__name__) # Default patterns to exclude when scanning for csproj files DEFAULT_EXCLUDE_DIRS = { "bin", "obj", "node_modules", ".git", ".vs", ".idea", "third_party", "packages", ".nuget", ".cache", "Fixtures", # Test fixture files should not be in solutions "TestData", # Test data files should not be in solutions } # Default file patterns to exclude (test fixtures, samples, etc.) DEFAULT_EXCLUDE_PATTERNS = { "*.Tests.Fixtures", "*.Samples", } def get_deterministic_guid(path: Path, base_path: Optional[Path] = None) -> str: """ Generate a deterministic GUID from a path. Uses SHA256 hash of the relative path to ensure consistency across runs. Args: path: Path to generate GUID for base_path: Base path to calculate relative path from (optional) Returns: GUID string in uppercase format (e.g., "XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX") """ if base_path: try: rel_path = path.relative_to(base_path) except ValueError: rel_path = path else: rel_path = path # Normalize path separators and convert to lowercase for consistency normalized = str(rel_path).replace("\\", "/").lower() # Generate SHA256 hash hash_bytes = hashlib.sha256(normalized.encode("utf-8")).digest() # Format as GUID (use first 16 bytes) guid_hex = hash_bytes[:16].hex().upper() guid = f"{guid_hex[:8]}-{guid_hex[8:12]}-{guid_hex[12:16]}-{guid_hex[16:20]}-{guid_hex[20:32]}" return guid def find_all_csproj( root_dir: Path, exclude_dirs: Optional[set[str]] = None, exclude_patterns: Optional[set[str]] = None, ) -> list[Path]: """ Find all .csproj files under a directory. Args: root_dir: Root directory to search exclude_dirs: Directory names to exclude (defaults to bin, obj, etc.) exclude_patterns: File name patterns to exclude Returns: List of absolute paths to .csproj files, sorted by path """ if exclude_dirs is None: exclude_dirs = DEFAULT_EXCLUDE_DIRS if exclude_patterns is None: exclude_patterns = DEFAULT_EXCLUDE_PATTERNS csproj_files: list[Path] = [] if not root_dir.exists(): logger.warning(f"Directory does not exist: {root_dir}") return csproj_files for item in root_dir.rglob("*.csproj"): # Check if any parent directory should be excluded skip = False for parent in item.parents: if parent.name in exclude_dirs: skip = True break if skip: continue # Check file name patterns for pattern in exclude_patterns: if item.match(pattern): skip = True break if skip: continue csproj_files.append(item.resolve()) return sorted(csproj_files) def parse_csproj( csproj_path: Path, base_path: Optional[Path] = None, ) -> Optional[CsprojProject]: """ Parse a .csproj file and extract project information. Args: csproj_path: Path to the .csproj file base_path: Base path for generating deterministic GUID Returns: CsprojProject with parsed information, or None if parsing fails """ if not csproj_path.exists(): logger.error(f"Csproj file does not exist: {csproj_path}") return None try: tree = ET.parse(csproj_path) root = tree.getroot() except ET.ParseError as e: logger.error(f"Failed to parse XML in {csproj_path}: {e}") return None # Extract project name from file name name = csproj_path.stem # Generate deterministic GUID guid = get_deterministic_guid(csproj_path, base_path) # Parse project references project_references = _parse_project_references(root, csproj_path) # Parse package references package_references = _parse_package_references(root) return CsprojProject( path=csproj_path.resolve(), name=name, guid=guid, project_references=project_references, package_references=package_references, ) def _parse_project_references(root: ET.Element, csproj_path: Path) -> list[Path]: """ Parse ProjectReference elements from csproj XML. Args: root: XML root element csproj_path: Path to the csproj file (for resolving relative paths) Returns: List of resolved absolute paths to referenced projects """ references: list[Path] = [] csproj_dir = csproj_path.parent # Handle both with and without namespace for ref in root.iter(): if ref.tag.endswith("ProjectReference") or ref.tag == "ProjectReference": include = ref.get("Include") if include: # Normalize path separators include = include.replace("\\", "/") # Resolve relative path try: ref_path = (csproj_dir / include).resolve() if ref_path.exists(): references.append(ref_path) else: logger.warning( f"Referenced project does not exist: {include} (from {csproj_path})" ) except Exception as e: logger.warning(f"Failed to resolve path {include}: {e}") return references def _parse_package_references(root: ET.Element) -> dict[str, str]: """ Parse PackageReference elements from csproj XML. Args: root: XML root element Returns: Dictionary mapping package name to version string """ packages: dict[str, str] = {} for ref in root.iter(): if ref.tag.endswith("PackageReference") or ref.tag == "PackageReference": include = ref.get("Include") version = ref.get("Version") if include and version: packages[include] = version elif include: # Version might be in a child element for child in ref: if child.tag.endswith("Version") or child.tag == "Version": if child.text: packages[include] = child.text.strip() break return packages def get_project_name_from_path(csproj_path: Path) -> str: """ Extract project name from csproj file path. Args: csproj_path: Path to csproj file Returns: Project name (file name without extension) """ return csproj_path.stem def resolve_project_path( include_path: str, from_csproj: Path, ) -> Optional[Path]: """ Resolve a ProjectReference Include path to an absolute path. Args: include_path: The Include attribute value from_csproj: The csproj file containing the reference Returns: Resolved absolute path, or None if resolution fails """ # Normalize path separators include_path = include_path.replace("\\", "/") try: resolved = (from_csproj.parent / include_path).resolve() return resolved if resolved.exists() else None except Exception: return None