277 lines
7.5 KiB
Python
277 lines
7.5 KiB
Python
"""
|
|
Csproj file parsing utilities.
|
|
|
|
Provides functions to:
|
|
- Find all .csproj files in a directory tree
|
|
- Parse csproj files to extract project references and package references
|
|
- Generate deterministic GUIDs for projects
|
|
"""
|
|
|
|
import hashlib
|
|
import logging
|
|
import xml.etree.ElementTree as ET
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
from .models import CsprojProject
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Default patterns to exclude when scanning for csproj files
|
|
DEFAULT_EXCLUDE_DIRS = {
|
|
"bin",
|
|
"obj",
|
|
"node_modules",
|
|
".git",
|
|
".vs",
|
|
".idea",
|
|
"third_party",
|
|
"packages",
|
|
".nuget",
|
|
".cache",
|
|
"Fixtures", # Test fixture files should not be in solutions
|
|
"TestData", # Test data files should not be in solutions
|
|
}
|
|
|
|
# Default file patterns to exclude (test fixtures, samples, etc.)
|
|
DEFAULT_EXCLUDE_PATTERNS = {
|
|
"*.Tests.Fixtures",
|
|
"*.Samples",
|
|
}
|
|
|
|
|
|
def get_deterministic_guid(path: Path, base_path: Optional[Path] = None) -> str:
|
|
"""
|
|
Generate a deterministic GUID from a path.
|
|
|
|
Uses SHA256 hash of the relative path to ensure consistency across runs.
|
|
|
|
Args:
|
|
path: Path to generate GUID for
|
|
base_path: Base path to calculate relative path from (optional)
|
|
|
|
Returns:
|
|
GUID string in uppercase format (e.g., "XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX")
|
|
"""
|
|
if base_path:
|
|
try:
|
|
rel_path = path.relative_to(base_path)
|
|
except ValueError:
|
|
rel_path = path
|
|
else:
|
|
rel_path = path
|
|
|
|
# Normalize path separators and convert to lowercase for consistency
|
|
normalized = str(rel_path).replace("\\", "/").lower()
|
|
|
|
# Generate SHA256 hash
|
|
hash_bytes = hashlib.sha256(normalized.encode("utf-8")).digest()
|
|
|
|
# Format as GUID (use first 16 bytes)
|
|
guid_hex = hash_bytes[:16].hex().upper()
|
|
guid = f"{guid_hex[:8]}-{guid_hex[8:12]}-{guid_hex[12:16]}-{guid_hex[16:20]}-{guid_hex[20:32]}"
|
|
|
|
return guid
|
|
|
|
|
|
def find_all_csproj(
|
|
root_dir: Path,
|
|
exclude_dirs: Optional[set[str]] = None,
|
|
exclude_patterns: Optional[set[str]] = None,
|
|
) -> list[Path]:
|
|
"""
|
|
Find all .csproj files under a directory.
|
|
|
|
Args:
|
|
root_dir: Root directory to search
|
|
exclude_dirs: Directory names to exclude (defaults to bin, obj, etc.)
|
|
exclude_patterns: File name patterns to exclude
|
|
|
|
Returns:
|
|
List of absolute paths to .csproj files, sorted by path
|
|
"""
|
|
if exclude_dirs is None:
|
|
exclude_dirs = DEFAULT_EXCLUDE_DIRS
|
|
if exclude_patterns is None:
|
|
exclude_patterns = DEFAULT_EXCLUDE_PATTERNS
|
|
|
|
csproj_files: list[Path] = []
|
|
|
|
if not root_dir.exists():
|
|
logger.warning(f"Directory does not exist: {root_dir}")
|
|
return csproj_files
|
|
|
|
for item in root_dir.rglob("*.csproj"):
|
|
# Check if any parent directory should be excluded
|
|
skip = False
|
|
for parent in item.parents:
|
|
if parent.name in exclude_dirs:
|
|
skip = True
|
|
break
|
|
|
|
if skip:
|
|
continue
|
|
|
|
# Check file name patterns
|
|
for pattern in exclude_patterns:
|
|
if item.match(pattern):
|
|
skip = True
|
|
break
|
|
|
|
if skip:
|
|
continue
|
|
|
|
csproj_files.append(item.resolve())
|
|
|
|
return sorted(csproj_files)
|
|
|
|
|
|
def parse_csproj(
|
|
csproj_path: Path,
|
|
base_path: Optional[Path] = None,
|
|
) -> Optional[CsprojProject]:
|
|
"""
|
|
Parse a .csproj file and extract project information.
|
|
|
|
Args:
|
|
csproj_path: Path to the .csproj file
|
|
base_path: Base path for generating deterministic GUID
|
|
|
|
Returns:
|
|
CsprojProject with parsed information, or None if parsing fails
|
|
"""
|
|
if not csproj_path.exists():
|
|
logger.error(f"Csproj file does not exist: {csproj_path}")
|
|
return None
|
|
|
|
try:
|
|
tree = ET.parse(csproj_path)
|
|
root = tree.getroot()
|
|
except ET.ParseError as e:
|
|
logger.error(f"Failed to parse XML in {csproj_path}: {e}")
|
|
return None
|
|
|
|
# Extract project name from file name
|
|
name = csproj_path.stem
|
|
|
|
# Generate deterministic GUID
|
|
guid = get_deterministic_guid(csproj_path, base_path)
|
|
|
|
# Parse project references
|
|
project_references = _parse_project_references(root, csproj_path)
|
|
|
|
# Parse package references
|
|
package_references = _parse_package_references(root)
|
|
|
|
return CsprojProject(
|
|
path=csproj_path.resolve(),
|
|
name=name,
|
|
guid=guid,
|
|
project_references=project_references,
|
|
package_references=package_references,
|
|
)
|
|
|
|
|
|
def _parse_project_references(root: ET.Element, csproj_path: Path) -> list[Path]:
|
|
"""
|
|
Parse ProjectReference elements from csproj XML.
|
|
|
|
Args:
|
|
root: XML root element
|
|
csproj_path: Path to the csproj file (for resolving relative paths)
|
|
|
|
Returns:
|
|
List of resolved absolute paths to referenced projects
|
|
"""
|
|
references: list[Path] = []
|
|
csproj_dir = csproj_path.parent
|
|
|
|
# Handle both with and without namespace
|
|
for ref in root.iter():
|
|
if ref.tag.endswith("ProjectReference") or ref.tag == "ProjectReference":
|
|
include = ref.get("Include")
|
|
if include:
|
|
# Normalize path separators
|
|
include = include.replace("\\", "/")
|
|
|
|
# Resolve relative path
|
|
try:
|
|
ref_path = (csproj_dir / include).resolve()
|
|
if ref_path.exists():
|
|
references.append(ref_path)
|
|
else:
|
|
logger.warning(
|
|
f"Referenced project does not exist: {include} (from {csproj_path})"
|
|
)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to resolve path {include}: {e}")
|
|
|
|
return references
|
|
|
|
|
|
def _parse_package_references(root: ET.Element) -> dict[str, str]:
|
|
"""
|
|
Parse PackageReference elements from csproj XML.
|
|
|
|
Args:
|
|
root: XML root element
|
|
|
|
Returns:
|
|
Dictionary mapping package name to version string
|
|
"""
|
|
packages: dict[str, str] = {}
|
|
|
|
for ref in root.iter():
|
|
if ref.tag.endswith("PackageReference") or ref.tag == "PackageReference":
|
|
include = ref.get("Include")
|
|
version = ref.get("Version")
|
|
|
|
if include and version:
|
|
packages[include] = version
|
|
elif include:
|
|
# Version might be in a child element
|
|
for child in ref:
|
|
if child.tag.endswith("Version") or child.tag == "Version":
|
|
if child.text:
|
|
packages[include] = child.text.strip()
|
|
break
|
|
|
|
return packages
|
|
|
|
|
|
def get_project_name_from_path(csproj_path: Path) -> str:
|
|
"""
|
|
Extract project name from csproj file path.
|
|
|
|
Args:
|
|
csproj_path: Path to csproj file
|
|
|
|
Returns:
|
|
Project name (file name without extension)
|
|
"""
|
|
return csproj_path.stem
|
|
|
|
|
|
def resolve_project_path(
|
|
include_path: str,
|
|
from_csproj: Path,
|
|
) -> Optional[Path]:
|
|
"""
|
|
Resolve a ProjectReference Include path to an absolute path.
|
|
|
|
Args:
|
|
include_path: The Include attribute value
|
|
from_csproj: The csproj file containing the reference
|
|
|
|
Returns:
|
|
Resolved absolute path, or None if resolution fails
|
|
"""
|
|
# Normalize path separators
|
|
include_path = include_path.replace("\\", "/")
|
|
|
|
try:
|
|
resolved = (from_csproj.parent / include_path).resolve()
|
|
return resolved if resolved.exists() else None
|
|
except Exception:
|
|
return None
|