up
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
devportal-offline / build-offline (push) Has been cancelled
Mirror Thin Bundle Sign & Verify / mirror-sign (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-11-28 00:45:16 +02:00
parent 3b96b2e3ea
commit 1c6730a1d2
95 changed files with 14504 additions and 463 deletions

View File

@@ -0,0 +1,200 @@
using System.Collections.Immutable;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Capabilities;
/// <summary>
/// Represents a detected runtime capability in Python code.
/// </summary>
/// <param name="Kind">The type of capability.</param>
/// <param name="SourceFile">The file where this capability was detected.</param>
/// <param name="LineNumber">The line number (if available).</param>
/// <param name="Evidence">The code pattern that indicated this capability.</param>
/// <param name="Confidence">Confidence level for this detection.</param>
/// <param name="Context">Additional context about the capability.</param>
internal sealed record PythonCapability(
PythonCapabilityKind Kind,
string SourceFile,
int? LineNumber,
string Evidence,
PythonCapabilityConfidence Confidence,
ImmutableDictionary<string, string>? Context = null)
{
/// <summary>
/// Gets whether this is a security-sensitive capability.
/// </summary>
public bool IsSecuritySensitive => Kind is
PythonCapabilityKind.ProcessExecution or
PythonCapabilityKind.CodeExecution or
PythonCapabilityKind.FileSystemAccess or
PythonCapabilityKind.NetworkAccess or
PythonCapabilityKind.NativeCodeExecution or
PythonCapabilityKind.EnvironmentAccess or
PythonCapabilityKind.Deserialization;
/// <summary>
/// Generates metadata entries for this capability.
/// </summary>
public IEnumerable<KeyValuePair<string, string?>> ToMetadata(string prefix)
{
yield return new($"{prefix}.kind", Kind.ToString());
yield return new($"{prefix}.file", SourceFile);
if (LineNumber.HasValue)
{
yield return new($"{prefix}.line", LineNumber.Value.ToString());
}
yield return new($"{prefix}.evidence", Evidence);
yield return new($"{prefix}.confidence", Confidence.ToString());
yield return new($"{prefix}.securitySensitive", IsSecuritySensitive.ToString());
}
}
/// <summary>
/// Types of runtime capabilities detected in Python code.
/// </summary>
internal enum PythonCapabilityKind
{
/// <summary>
/// Unknown capability.
/// </summary>
Unknown,
// Process and execution
/// <summary>
/// Process execution (subprocess, os.system, etc.).
/// </summary>
ProcessExecution,
/// <summary>
/// Dynamic code execution (eval, exec, compile).
/// </summary>
CodeExecution,
/// <summary>
/// Native code execution via ctypes/cffi.
/// </summary>
NativeCodeExecution,
// Concurrency
/// <summary>
/// Multiprocessing usage.
/// </summary>
Multiprocessing,
/// <summary>
/// Threading usage.
/// </summary>
Threading,
/// <summary>
/// Async/await usage.
/// </summary>
AsyncAwait,
// I/O and resources
/// <summary>
/// File system access.
/// </summary>
FileSystemAccess,
/// <summary>
/// Network access (sockets, http).
/// </summary>
NetworkAccess,
/// <summary>
/// Database access.
/// </summary>
DatabaseAccess,
// System interaction
/// <summary>
/// Environment variable access.
/// </summary>
EnvironmentAccess,
/// <summary>
/// Signal handling.
/// </summary>
SignalHandling,
/// <summary>
/// System information access.
/// </summary>
SystemInfo,
// Security sensitive
/// <summary>
/// Cryptographic operations.
/// </summary>
Cryptography,
/// <summary>
/// Deserialization (pickle, marshal).
/// </summary>
Deserialization,
/// <summary>
/// Import hook modification.
/// </summary>
ImportHook,
// FFI/Native
/// <summary>
/// ctypes usage.
/// </summary>
Ctypes,
/// <summary>
/// CFFI usage.
/// </summary>
Cffi,
/// <summary>
/// WebAssembly usage.
/// </summary>
Wasm,
// Frameworks
/// <summary>
/// Web framework (Django, Flask, FastAPI).
/// </summary>
WebFramework,
/// <summary>
/// Task queue (Celery, RQ).
/// </summary>
TaskQueue,
/// <summary>
/// Machine learning framework.
/// </summary>
MachineLearning
}
/// <summary>
/// Confidence level for capability detection.
/// </summary>
internal enum PythonCapabilityConfidence
{
/// <summary>
/// Low confidence - heuristic match.
/// </summary>
Low = 0,
/// <summary>
/// Medium confidence - likely usage.
/// </summary>
Medium = 1,
/// <summary>
/// High confidence - clear usage pattern.
/// </summary>
High = 2,
/// <summary>
/// Definitive - direct API call detected.
/// </summary>
Definitive = 3
}

View File

@@ -0,0 +1,335 @@
using System.Collections.Frozen;
using System.Collections.Immutable;
using System.Text.RegularExpressions;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Capabilities;
/// <summary>
/// Detects runtime capabilities from Python source code.
/// </summary>
internal sealed partial class PythonCapabilityDetector
{
private static readonly FrozenDictionary<string, (PythonCapabilityKind Kind, PythonCapabilityConfidence Confidence)> ImportCapabilities =
new Dictionary<string, (PythonCapabilityKind, PythonCapabilityConfidence)>(StringComparer.Ordinal)
{
// Process execution
["subprocess"] = (PythonCapabilityKind.ProcessExecution, PythonCapabilityConfidence.Definitive),
["os.system"] = (PythonCapabilityKind.ProcessExecution, PythonCapabilityConfidence.High),
["os.popen"] = (PythonCapabilityKind.ProcessExecution, PythonCapabilityConfidence.High),
["os.spawn"] = (PythonCapabilityKind.ProcessExecution, PythonCapabilityConfidence.High),
["os.exec"] = (PythonCapabilityKind.ProcessExecution, PythonCapabilityConfidence.High),
["commands"] = (PythonCapabilityKind.ProcessExecution, PythonCapabilityConfidence.High),
["pexpect"] = (PythonCapabilityKind.ProcessExecution, PythonCapabilityConfidence.Definitive),
// Multiprocessing/Threading
["multiprocessing"] = (PythonCapabilityKind.Multiprocessing, PythonCapabilityConfidence.Definitive),
["concurrent.futures"] = (PythonCapabilityKind.Multiprocessing, PythonCapabilityConfidence.High),
["threading"] = (PythonCapabilityKind.Threading, PythonCapabilityConfidence.Definitive),
["_thread"] = (PythonCapabilityKind.Threading, PythonCapabilityConfidence.Definitive),
["asyncio"] = (PythonCapabilityKind.AsyncAwait, PythonCapabilityConfidence.Definitive),
["trio"] = (PythonCapabilityKind.AsyncAwait, PythonCapabilityConfidence.Definitive),
["anyio"] = (PythonCapabilityKind.AsyncAwait, PythonCapabilityConfidence.Definitive),
// FFI/Native
["ctypes"] = (PythonCapabilityKind.Ctypes, PythonCapabilityConfidence.Definitive),
["cffi"] = (PythonCapabilityKind.Cffi, PythonCapabilityConfidence.Definitive),
["_ctypes"] = (PythonCapabilityKind.Ctypes, PythonCapabilityConfidence.Definitive),
["wasmtime"] = (PythonCapabilityKind.Wasm, PythonCapabilityConfidence.Definitive),
["wasmer"] = (PythonCapabilityKind.Wasm, PythonCapabilityConfidence.Definitive),
// Network
["socket"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.Definitive),
["http.client"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.High),
["urllib"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.High),
["urllib3"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.High),
["requests"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.Definitive),
["httpx"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.Definitive),
["aiohttp"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.Definitive),
["websocket"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.Definitive),
["paramiko"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.Definitive),
["ftplib"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.High),
["smtplib"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.High),
// File system
["pathlib"] = (PythonCapabilityKind.FileSystemAccess, PythonCapabilityConfidence.Medium),
["shutil"] = (PythonCapabilityKind.FileSystemAccess, PythonCapabilityConfidence.High),
["glob"] = (PythonCapabilityKind.FileSystemAccess, PythonCapabilityConfidence.Medium),
["tempfile"] = (PythonCapabilityKind.FileSystemAccess, PythonCapabilityConfidence.High),
["watchdog"] = (PythonCapabilityKind.FileSystemAccess, PythonCapabilityConfidence.Definitive),
// Database
["sqlite3"] = (PythonCapabilityKind.DatabaseAccess, PythonCapabilityConfidence.Definitive),
["psycopg2"] = (PythonCapabilityKind.DatabaseAccess, PythonCapabilityConfidence.Definitive),
["pymysql"] = (PythonCapabilityKind.DatabaseAccess, PythonCapabilityConfidence.Definitive),
["pymongo"] = (PythonCapabilityKind.DatabaseAccess, PythonCapabilityConfidence.Definitive),
["sqlalchemy"] = (PythonCapabilityKind.DatabaseAccess, PythonCapabilityConfidence.Definitive),
["redis"] = (PythonCapabilityKind.DatabaseAccess, PythonCapabilityConfidence.Definitive),
// Security sensitive
["pickle"] = (PythonCapabilityKind.Deserialization, PythonCapabilityConfidence.Definitive),
["marshal"] = (PythonCapabilityKind.Deserialization, PythonCapabilityConfidence.High),
["shelve"] = (PythonCapabilityKind.Deserialization, PythonCapabilityConfidence.High),
["dill"] = (PythonCapabilityKind.Deserialization, PythonCapabilityConfidence.Definitive),
["cloudpickle"] = (PythonCapabilityKind.Deserialization, PythonCapabilityConfidence.Definitive),
["cryptography"] = (PythonCapabilityKind.Cryptography, PythonCapabilityConfidence.Definitive),
["pycryptodome"] = (PythonCapabilityKind.Cryptography, PythonCapabilityConfidence.Definitive),
["hashlib"] = (PythonCapabilityKind.Cryptography, PythonCapabilityConfidence.Medium),
["ssl"] = (PythonCapabilityKind.Cryptography, PythonCapabilityConfidence.Medium),
// System
["signal"] = (PythonCapabilityKind.SignalHandling, PythonCapabilityConfidence.Definitive),
["platform"] = (PythonCapabilityKind.SystemInfo, PythonCapabilityConfidence.Low),
["sys"] = (PythonCapabilityKind.SystemInfo, PythonCapabilityConfidence.Low),
// Web frameworks
["django"] = (PythonCapabilityKind.WebFramework, PythonCapabilityConfidence.Definitive),
["flask"] = (PythonCapabilityKind.WebFramework, PythonCapabilityConfidence.Definitive),
["fastapi"] = (PythonCapabilityKind.WebFramework, PythonCapabilityConfidence.Definitive),
["starlette"] = (PythonCapabilityKind.WebFramework, PythonCapabilityConfidence.Definitive),
["tornado"] = (PythonCapabilityKind.WebFramework, PythonCapabilityConfidence.Definitive),
["bottle"] = (PythonCapabilityKind.WebFramework, PythonCapabilityConfidence.Definitive),
["pyramid"] = (PythonCapabilityKind.WebFramework, PythonCapabilityConfidence.Definitive),
// Task queues
["celery"] = (PythonCapabilityKind.TaskQueue, PythonCapabilityConfidence.Definitive),
["rq"] = (PythonCapabilityKind.TaskQueue, PythonCapabilityConfidence.Definitive),
["huey"] = (PythonCapabilityKind.TaskQueue, PythonCapabilityConfidence.Definitive),
["dramatiq"] = (PythonCapabilityKind.TaskQueue, PythonCapabilityConfidence.Definitive),
// ML
["tensorflow"] = (PythonCapabilityKind.MachineLearning, PythonCapabilityConfidence.Definitive),
["torch"] = (PythonCapabilityKind.MachineLearning, PythonCapabilityConfidence.Definitive),
["pytorch"] = (PythonCapabilityKind.MachineLearning, PythonCapabilityConfidence.Definitive),
["sklearn"] = (PythonCapabilityKind.MachineLearning, PythonCapabilityConfidence.Definitive),
["scikit-learn"] = (PythonCapabilityKind.MachineLearning, PythonCapabilityConfidence.Definitive),
["keras"] = (PythonCapabilityKind.MachineLearning, PythonCapabilityConfidence.Definitive),
["transformers"] = (PythonCapabilityKind.MachineLearning, PythonCapabilityConfidence.Definitive),
}.ToFrozenDictionary();
// Patterns for code execution detection
[GeneratedRegex(@"\beval\s*\(", RegexOptions.Compiled)]
private static partial Regex EvalPattern();
[GeneratedRegex(@"\bexec\s*\(", RegexOptions.Compiled)]
private static partial Regex ExecPattern();
[GeneratedRegex(@"\bcompile\s*\(", RegexOptions.Compiled)]
private static partial Regex CompilePattern();
[GeneratedRegex(@"__import__\s*\(", RegexOptions.Compiled)]
private static partial Regex DynamicImportPattern();
// Pattern for environment access
[GeneratedRegex(@"os\.environ|os\.getenv|environ\.get", RegexOptions.Compiled)]
private static partial Regex EnvironmentPattern();
// Pattern for file operations
[GeneratedRegex(@"\bopen\s*\([^)]*[""'][rwa]", RegexOptions.Compiled)]
private static partial Regex FileOpenPattern();
/// <summary>
/// Detects capabilities from Python source code.
/// </summary>
public async Task<ImmutableArray<PythonCapability>> DetectAsync(
PythonVirtualFileSystem vfs,
CancellationToken cancellationToken = default)
{
var capabilities = new List<PythonCapability>();
// Scan all Python files
var pythonFiles = vfs.Files
.Where(f => f.VirtualPath.EndsWith(".py", StringComparison.OrdinalIgnoreCase))
.ToList();
foreach (var file in pythonFiles)
{
cancellationToken.ThrowIfCancellationRequested();
var fileCapabilities = await DetectInFileAsync(vfs, file, cancellationToken).ConfigureAwait(false);
capabilities.AddRange(fileCapabilities);
}
// Deduplicate by kind and file
return capabilities
.GroupBy(c => (c.Kind, c.SourceFile))
.Select(g => g.OrderByDescending(c => c.Confidence).First())
.ToImmutableArray();
}
private async Task<IEnumerable<PythonCapability>> DetectInFileAsync(
PythonVirtualFileSystem vfs,
PythonVirtualFile file,
CancellationToken cancellationToken)
{
var capabilities = new List<PythonCapability>();
try
{
using var stream = await vfs.OpenReadAsync(file.VirtualPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return capabilities;
}
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
var lines = content.Split('\n');
for (var lineNum = 0; lineNum < lines.Length; lineNum++)
{
cancellationToken.ThrowIfCancellationRequested();
var line = lines[lineNum];
var trimmed = line.TrimStart();
// Skip comments
if (trimmed.StartsWith('#'))
{
continue;
}
// Check for imports
if (trimmed.StartsWith("import ", StringComparison.Ordinal) ||
trimmed.StartsWith("from ", StringComparison.Ordinal))
{
var importCapabilities = DetectImportCapabilities(trimmed, file.VirtualPath, lineNum + 1);
capabilities.AddRange(importCapabilities);
}
// Check for code execution
if (EvalPattern().IsMatch(line))
{
capabilities.Add(new PythonCapability(
Kind: PythonCapabilityKind.CodeExecution,
SourceFile: file.VirtualPath,
LineNumber: lineNum + 1,
Evidence: "eval()",
Confidence: PythonCapabilityConfidence.Definitive));
}
if (ExecPattern().IsMatch(line))
{
capabilities.Add(new PythonCapability(
Kind: PythonCapabilityKind.CodeExecution,
SourceFile: file.VirtualPath,
LineNumber: lineNum + 1,
Evidence: "exec()",
Confidence: PythonCapabilityConfidence.Definitive));
}
if (CompilePattern().IsMatch(line))
{
capabilities.Add(new PythonCapability(
Kind: PythonCapabilityKind.CodeExecution,
SourceFile: file.VirtualPath,
LineNumber: lineNum + 1,
Evidence: "compile()",
Confidence: PythonCapabilityConfidence.High));
}
if (DynamicImportPattern().IsMatch(line))
{
capabilities.Add(new PythonCapability(
Kind: PythonCapabilityKind.ImportHook,
SourceFile: file.VirtualPath,
LineNumber: lineNum + 1,
Evidence: "__import__()",
Confidence: PythonCapabilityConfidence.High));
}
// Check for environment access
if (EnvironmentPattern().IsMatch(line))
{
capabilities.Add(new PythonCapability(
Kind: PythonCapabilityKind.EnvironmentAccess,
SourceFile: file.VirtualPath,
LineNumber: lineNum + 1,
Evidence: "os.environ",
Confidence: PythonCapabilityConfidence.High));
}
// Check for file operations
if (FileOpenPattern().IsMatch(line))
{
capabilities.Add(new PythonCapability(
Kind: PythonCapabilityKind.FileSystemAccess,
SourceFile: file.VirtualPath,
LineNumber: lineNum + 1,
Evidence: "open()",
Confidence: PythonCapabilityConfidence.High));
}
}
}
catch (IOException)
{
// Skip unreadable files
}
return capabilities;
}
private static IEnumerable<PythonCapability> DetectImportCapabilities(string line, string sourceFile, int lineNumber)
{
// Parse import line to extract module names
var modules = ParseImportLine(line);
foreach (var module in modules)
{
// Check exact match
if (ImportCapabilities.TryGetValue(module, out var capability))
{
yield return new PythonCapability(
Kind: capability.Kind,
SourceFile: sourceFile,
LineNumber: lineNumber,
Evidence: $"import {module}",
Confidence: capability.Confidence);
}
// Check prefix match (e.g., "os.system" for "os")
foreach (var (key, cap) in ImportCapabilities)
{
if (key.StartsWith(module + ".", StringComparison.Ordinal) ||
module.StartsWith(key + ".", StringComparison.Ordinal))
{
yield return new PythonCapability(
Kind: cap.Kind,
SourceFile: sourceFile,
LineNumber: lineNumber,
Evidence: $"import {module}",
Confidence: cap.Confidence);
}
}
}
}
private static IEnumerable<string> ParseImportLine(string line)
{
var trimmed = line.Trim();
if (trimmed.StartsWith("import ", StringComparison.Ordinal))
{
// import a, b, c
// import a as x
var modules = trimmed[7..].Split(',');
foreach (var m in modules)
{
var moduleName = m.Trim().Split(new[] { " as ", " " }, StringSplitOptions.RemoveEmptyEntries)[0];
if (!string.IsNullOrEmpty(moduleName))
{
yield return moduleName;
}
}
}
else if (trimmed.StartsWith("from ", StringComparison.Ordinal))
{
// from a import b, c
var parts = trimmed[5..].Split(new[] { " import " }, StringSplitOptions.RemoveEmptyEntries);
if (parts.Length > 0)
{
yield return parts[0].Trim();
}
}
}
}

View File

@@ -0,0 +1,130 @@
using System.Collections.Immutable;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Capabilities;
/// <summary>
/// Represents a detected native extension in a Python project.
/// </summary>
/// <param name="ModuleName">The Python module name for this extension.</param>
/// <param name="Path">The virtual path to the extension file.</param>
/// <param name="Kind">The type of native extension.</param>
/// <param name="Platform">The target platform (if detectable).</param>
/// <param name="Architecture">The target architecture (if detectable).</param>
/// <param name="Source">Where this extension came from.</param>
/// <param name="PackageName">The package this extension belongs to (if known).</param>
/// <param name="Dependencies">Native library dependencies (if detectable).</param>
internal sealed record PythonNativeExtension(
string ModuleName,
string Path,
PythonNativeExtensionKind Kind,
string? Platform,
string? Architecture,
PythonFileSource Source,
string? PackageName,
ImmutableArray<string> Dependencies)
{
/// <summary>
/// Gets the file extension.
/// </summary>
public string FileExtension => System.IO.Path.GetExtension(Path).ToLowerInvariant();
/// <summary>
/// Gets whether this is a Linux extension.
/// </summary>
public bool IsLinux => FileExtension == ".so" ||
Platform?.Contains("linux", StringComparison.OrdinalIgnoreCase) == true;
/// <summary>
/// Gets whether this is a Windows extension.
/// </summary>
public bool IsWindows => FileExtension == ".pyd" ||
Platform?.Contains("win", StringComparison.OrdinalIgnoreCase) == true;
/// <summary>
/// Gets whether this is a macOS extension.
/// </summary>
public bool IsMacOS => Platform?.Contains("darwin", StringComparison.OrdinalIgnoreCase) == true ||
Platform?.Contains("macos", StringComparison.OrdinalIgnoreCase) == true;
/// <summary>
/// Generates metadata entries for this extension.
/// </summary>
public IEnumerable<KeyValuePair<string, string?>> ToMetadata(string prefix)
{
yield return new($"{prefix}.module", ModuleName);
yield return new($"{prefix}.path", Path);
yield return new($"{prefix}.kind", Kind.ToString());
if (Platform is not null)
{
yield return new($"{prefix}.platform", Platform);
}
if (Architecture is not null)
{
yield return new($"{prefix}.arch", Architecture);
}
if (PackageName is not null)
{
yield return new($"{prefix}.package", PackageName);
}
if (Dependencies.Length > 0)
{
yield return new($"{prefix}.dependencies", string.Join(",", Dependencies));
}
}
}
/// <summary>
/// The type of native extension.
/// </summary>
internal enum PythonNativeExtensionKind
{
/// <summary>
/// Unknown extension type.
/// </summary>
Unknown,
/// <summary>
/// Standard C extension module (.so, .pyd).
/// </summary>
CExtension,
/// <summary>
/// Cython compiled module.
/// </summary>
Cython,
/// <summary>
/// CFFI extension.
/// </summary>
Cffi,
/// <summary>
/// pybind11 extension.
/// </summary>
Pybind11,
/// <summary>
/// SWIG generated extension.
/// </summary>
Swig,
/// <summary>
/// Rust extension (PyO3/maturin).
/// </summary>
Rust,
/// <summary>
/// NumPy extension.
/// </summary>
Numpy,
/// <summary>
/// WebAssembly module.
/// </summary>
Wasm
}

View File

@@ -0,0 +1,269 @@
using System.Collections.Immutable;
using System.Text.RegularExpressions;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Capabilities;
/// <summary>
/// Scans for native extensions in a Python project.
/// </summary>
internal sealed partial class PythonNativeExtensionScanner
{
// Pattern to extract module name and platform info from extension filenames
// Examples: numpy.core._multiarray_umath.cpython-311-x86_64-linux-gnu.so
// _ssl.cpython-311-darwin.so
// _ctypes.pyd
[GeneratedRegex(
@"^(?<module>[a-zA-Z_][a-zA-Z0-9_]*)(?:\.cpython-(?<pyver>\d+)(?:-(?<platform>[^.]+))?)?\.(?<ext>so|pyd)$",
RegexOptions.Compiled | RegexOptions.IgnoreCase)]
private static partial Regex ExtensionFilePattern();
// Pattern to detect Cython modules from source
[GeneratedRegex(@"^\s*#\s*cython:", RegexOptions.Compiled | RegexOptions.Multiline)]
private static partial Regex CythonMarkerPattern();
// Pattern for CFFI ffi.cdef
[GeneratedRegex(@"ffi\s*=\s*FFI\(\)|ffi\.cdef\s*\(", RegexOptions.Compiled)]
private static partial Regex CffiPattern();
// Pattern for pybind11
[GeneratedRegex(@"PYBIND11_MODULE|pybind11::module", RegexOptions.Compiled)]
private static partial Regex Pybind11Pattern();
// Pattern for SWIG
[GeneratedRegex(@"%module\s+\w+|SWIG_init", RegexOptions.Compiled)]
private static partial Regex SwigPattern();
// Pattern for PyO3/Rust
[GeneratedRegex(@"#\[pymodule\]|#\[pyfunction\]|use pyo3::", RegexOptions.Compiled)]
private static partial Regex PyO3Pattern();
/// <summary>
/// Scans the VFS for native extensions.
/// </summary>
public IEnumerable<PythonNativeExtension> Scan(PythonVirtualFileSystem vfs)
{
// Find all .so and .pyd files
var extensionFiles = vfs.Files
.Where(f => f.VirtualPath.EndsWith(".so", StringComparison.OrdinalIgnoreCase) ||
f.VirtualPath.EndsWith(".pyd", StringComparison.OrdinalIgnoreCase))
.ToList();
foreach (var file in extensionFiles)
{
var extension = ParseExtensionFile(file);
if (extension is not null)
{
yield return extension;
}
}
// Find WASM files
var wasmFiles = vfs.Files
.Where(f => f.VirtualPath.EndsWith(".wasm", StringComparison.OrdinalIgnoreCase))
.ToList();
foreach (var file in wasmFiles)
{
yield return new PythonNativeExtension(
ModuleName: Path.GetFileNameWithoutExtension(file.VirtualPath),
Path: file.VirtualPath,
Kind: PythonNativeExtensionKind.Wasm,
Platform: null,
Architecture: "wasm32",
Source: file.Source,
PackageName: ExtractPackageName(file.VirtualPath),
Dependencies: ImmutableArray<string>.Empty);
}
}
/// <summary>
/// Detects the kind of native extension from source files in the package.
/// </summary>
public async Task<PythonNativeExtensionKind> DetectExtensionKindAsync(
PythonVirtualFileSystem vfs,
string packagePath,
CancellationToken cancellationToken = default)
{
// Check for Cython (.pyx files)
var pyxFiles = vfs.EnumerateFiles(packagePath, "*.pyx").ToList();
if (pyxFiles.Count > 0)
{
return PythonNativeExtensionKind.Cython;
}
// Check setup.py or pyproject.toml for hints
var setupPy = $"{packagePath}/setup.py";
if (vfs.FileExists(setupPy))
{
using var stream = await vfs.OpenReadAsync(setupPy, cancellationToken).ConfigureAwait(false);
if (stream is not null)
{
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
if (CffiPattern().IsMatch(content))
{
return PythonNativeExtensionKind.Cffi;
}
if (content.Contains("pybind11", StringComparison.OrdinalIgnoreCase))
{
return PythonNativeExtensionKind.Pybind11;
}
if (content.Contains("numpy.distutils", StringComparison.OrdinalIgnoreCase))
{
return PythonNativeExtensionKind.Numpy;
}
}
}
// Check for Rust/PyO3 (Cargo.toml with pyo3)
var cargoToml = $"{packagePath}/Cargo.toml";
if (vfs.FileExists(cargoToml))
{
using var stream = await vfs.OpenReadAsync(cargoToml, cancellationToken).ConfigureAwait(false);
if (stream is not null)
{
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
if (content.Contains("pyo3", StringComparison.OrdinalIgnoreCase))
{
return PythonNativeExtensionKind.Rust;
}
}
}
return PythonNativeExtensionKind.CExtension;
}
private static PythonNativeExtension? ParseExtensionFile(PythonVirtualFile file)
{
var fileName = Path.GetFileName(file.VirtualPath);
var match = ExtensionFilePattern().Match(fileName);
string moduleName;
string? platform = null;
string? architecture = null;
if (match.Success)
{
moduleName = match.Groups["module"].Value;
var platformGroup = match.Groups["platform"];
if (platformGroup.Success)
{
var platformStr = platformGroup.Value;
(platform, architecture) = ParsePlatformString(platformStr);
}
}
else
{
// Fall back to simple filename parsing
moduleName = Path.GetFileNameWithoutExtension(fileName);
if (moduleName.Contains('.'))
{
// Handle cases like module.cpython-311.so
var parts = moduleName.Split('.');
moduleName = parts[0];
}
}
// Infer platform from extension
var ext = Path.GetExtension(fileName).ToLowerInvariant();
if (platform is null)
{
platform = ext == ".pyd" ? "win32" : "posix";
}
var kind = InferExtensionKind(file.VirtualPath, moduleName);
return new PythonNativeExtension(
ModuleName: moduleName,
Path: file.VirtualPath,
Kind: kind,
Platform: platform,
Architecture: architecture,
Source: file.Source,
PackageName: ExtractPackageName(file.VirtualPath),
Dependencies: ImmutableArray<string>.Empty);
}
private static (string? Platform, string? Architecture) ParsePlatformString(string platformStr)
{
// Parse platform strings like:
// x86_64-linux-gnu
// darwin
// win_amd64
string? platform = null;
string? arch = null;
if (platformStr.Contains("linux", StringComparison.OrdinalIgnoreCase))
{
platform = "linux";
}
else if (platformStr.Contains("darwin", StringComparison.OrdinalIgnoreCase))
{
platform = "darwin";
}
else if (platformStr.Contains("win", StringComparison.OrdinalIgnoreCase))
{
platform = "win32";
}
if (platformStr.Contains("x86_64", StringComparison.OrdinalIgnoreCase) ||
platformStr.Contains("amd64", StringComparison.OrdinalIgnoreCase))
{
arch = "x86_64";
}
else if (platformStr.Contains("aarch64", StringComparison.OrdinalIgnoreCase) ||
platformStr.Contains("arm64", StringComparison.OrdinalIgnoreCase))
{
arch = "aarch64";
}
else if (platformStr.Contains("i686", StringComparison.OrdinalIgnoreCase) ||
platformStr.Contains("x86", StringComparison.OrdinalIgnoreCase))
{
arch = "x86";
}
return (platform, arch);
}
private static PythonNativeExtensionKind InferExtensionKind(string path, string moduleName)
{
// Infer from known package patterns
var pathLower = path.ToLowerInvariant();
if (pathLower.Contains("numpy") || moduleName.StartsWith("_multiarray", StringComparison.OrdinalIgnoreCase))
{
return PythonNativeExtensionKind.Numpy;
}
if (pathLower.Contains("cffi") || moduleName.StartsWith("_cffi", StringComparison.OrdinalIgnoreCase))
{
return PythonNativeExtensionKind.Cffi;
}
if (moduleName.StartsWith("_cython", StringComparison.OrdinalIgnoreCase))
{
return PythonNativeExtensionKind.Cython;
}
return PythonNativeExtensionKind.CExtension;
}
private static string? ExtractPackageName(string virtualPath)
{
// Extract package name from path like "numpy/core/_multiarray.so"
var parts = virtualPath.Split('/');
if (parts.Length > 1)
{
return parts[0];
}
return null;
}
}

View File

@@ -0,0 +1,182 @@
using System.Collections.Immutable;
using System.Text.Json;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging.Adapters;
/// <summary>
/// Adapter for Conda package installations.
/// </summary>
internal sealed class CondaAdapter : IPythonPackagingAdapter
{
public string Name => "conda";
public int Priority => 20; // Lower priority than pip/dist-info
public bool CanHandle(PythonVirtualFileSystem vfs, string path)
{
// Check for conda-meta directory
var condaMetaPath = FindCondaMetaPath(vfs, path);
return condaMetaPath is not null && vfs.EnumerateFiles(condaMetaPath, "*.json").Any();
}
public async IAsyncEnumerable<PythonPackageInfo> DiscoverPackagesAsync(
PythonVirtualFileSystem vfs,
string path,
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default)
{
var condaMetaPath = FindCondaMetaPath(vfs, path);
if (condaMetaPath is null)
{
yield break;
}
var jsonFiles = vfs.EnumerateFiles(condaMetaPath, "*.json").ToList();
foreach (var jsonFile in jsonFiles)
{
cancellationToken.ThrowIfCancellationRequested();
// Skip history file
if (jsonFile.VirtualPath.EndsWith("history", StringComparison.OrdinalIgnoreCase))
{
continue;
}
var packageInfo = await ParseCondaMetaJsonAsync(vfs, jsonFile, path, cancellationToken).ConfigureAwait(false);
if (packageInfo is not null)
{
yield return packageInfo;
}
}
}
private static string? FindCondaMetaPath(PythonVirtualFileSystem vfs, string path)
{
// Common conda-meta locations
var candidates = new[]
{
$"{path}/conda-meta",
$"{path}/../conda-meta",
$"{path}/../../conda-meta"
};
foreach (var candidate in candidates)
{
var normalizedPath = candidate.Replace("//", "/");
if (vfs.EnumerateFiles(normalizedPath, "*.json").Any())
{
return normalizedPath;
}
}
return null;
}
private static async Task<PythonPackageInfo?> ParseCondaMetaJsonAsync(
PythonVirtualFileSystem vfs,
PythonVirtualFile file,
string sitePackagesPath,
CancellationToken cancellationToken)
{
try
{
using var stream = await vfs.OpenReadAsync(file.VirtualPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return null;
}
using var doc = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken).ConfigureAwait(false);
var root = doc.RootElement;
if (!root.TryGetProperty("name", out var nameElement))
{
return null;
}
var name = nameElement.GetString();
if (string.IsNullOrEmpty(name))
{
return null;
}
string? version = null;
if (root.TryGetProperty("version", out var versionElement))
{
version = versionElement.GetString();
}
// Get dependencies
var dependencies = ImmutableArray<string>.Empty;
if (root.TryGetProperty("depends", out var dependsElement) && dependsElement.ValueKind == JsonValueKind.Array)
{
dependencies = dependsElement.EnumerateArray()
.Select(e => e.GetString())
.Where(s => !string.IsNullOrEmpty(s))
.Cast<string>()
.ToImmutableArray();
}
// Get files to extract top-level modules
var topLevelModules = ImmutableArray<string>.Empty;
if (root.TryGetProperty("files", out var filesElement) && filesElement.ValueKind == JsonValueKind.Array)
{
var pythonFiles = filesElement.EnumerateArray()
.Select(e => e.GetString())
.Where(s => !string.IsNullOrEmpty(s) && s!.EndsWith(".py", StringComparison.OrdinalIgnoreCase))
.Cast<string>()
.ToList();
// Extract top-level module names from file paths
var modules = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var f in pythonFiles)
{
var parts = f.Split('/');
if (parts.Length > 0)
{
var topLevel = parts[0];
// Skip standard library paths
if (!topLevel.Equals("lib", StringComparison.OrdinalIgnoreCase) &&
!topLevel.Equals("bin", StringComparison.OrdinalIgnoreCase) &&
!topLevel.StartsWith("python", StringComparison.OrdinalIgnoreCase))
{
modules.Add(topLevel.Replace(".py", string.Empty));
}
}
}
topLevelModules = modules.ToImmutableArray();
}
// Check if explicitly requested (direct dependency marker)
var isDirect = false;
if (root.TryGetProperty("requested", out var requestedElement))
{
isDirect = requestedElement.ValueKind == JsonValueKind.True;
}
return new PythonPackageInfo(
Name: name,
Version: version,
Kind: PythonPackageKind.Conda,
Location: sitePackagesPath,
MetadataPath: file.VirtualPath,
TopLevelModules: topLevelModules,
Dependencies: dependencies,
Extras: ImmutableArray<string>.Empty,
RecordFiles: ImmutableArray<PythonRecordEntry>.Empty,
InstallerTool: "conda",
EditableTarget: null,
IsDirectDependency: isDirect,
Confidence: PythonPackageConfidence.High);
}
catch (JsonException)
{
return null;
}
catch (IOException)
{
return null;
}
}
}

View File

@@ -0,0 +1,196 @@
using System.Collections.Immutable;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging.Adapters;
/// <summary>
/// Adapter for container layer overlays that may contain Python packages.
/// Handles whiteout files and layer ordering.
/// </summary>
internal sealed class ContainerLayerAdapter : IPythonPackagingAdapter
{
public string Name => "container-layer";
public int Priority => 100; // Lowest priority - use other adapters first
public bool CanHandle(PythonVirtualFileSystem vfs, string path)
{
// Container layers typically have specific patterns
// Check for layer root markers or whiteout files
return vfs.EnumerateFiles(path, ".wh.*").Any() ||
HasContainerLayoutMarkers(vfs, path);
}
public async IAsyncEnumerable<PythonPackageInfo> DiscoverPackagesAsync(
PythonVirtualFileSystem vfs,
string path,
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default)
{
// Discover packages from common Python installation paths in containers
var pythonPaths = FindPythonPathsInContainer(vfs, path);
// Use DistInfoAdapter for each discovered path
var distInfoAdapter = new DistInfoAdapter();
foreach (var pythonPath in pythonPaths)
{
cancellationToken.ThrowIfCancellationRequested();
if (!distInfoAdapter.CanHandle(vfs, pythonPath))
{
continue;
}
await foreach (var pkg in distInfoAdapter.DiscoverPackagesAsync(vfs, pythonPath, cancellationToken).ConfigureAwait(false))
{
// Mark as coming from container layer
yield return pkg with
{
Location = pythonPath,
Confidence = AdjustConfidenceForContainer(pkg.Confidence)
};
}
}
// Also check for vendored packages in /app, /opt, etc.
var vendoredPaths = FindVendoredPathsInContainer(vfs, path);
foreach (var vendoredPath in vendoredPaths)
{
cancellationToken.ThrowIfCancellationRequested();
await foreach (var pkg in DiscoverVendoredPackagesAsync(vfs, vendoredPath, cancellationToken).ConfigureAwait(false))
{
yield return pkg;
}
}
}
private static bool HasContainerLayoutMarkers(PythonVirtualFileSystem vfs, string path)
{
// Check for typical container root structure
var markers = new[]
{
$"{path}/etc/os-release",
$"{path}/usr/lib",
$"{path}/usr/local/lib",
$"{path}/app",
$"{path}/opt"
};
return markers.Any(m => vfs.EnumerateFiles(m, "*").Any());
}
private static IEnumerable<string> FindPythonPathsInContainer(PythonVirtualFileSystem vfs, string path)
{
var foundPaths = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
// Common Python installation paths in containers
var pythonPathPatterns = new[]
{
$"{path}/usr/lib/python*/site-packages",
$"{path}/usr/local/lib/python*/site-packages",
$"{path}/opt/*/lib/python*/site-packages",
$"{path}/home/*/.local/lib/python*/site-packages",
$"{path}/.venv/lib/python*/site-packages",
$"{path}/venv/lib/python*/site-packages"
};
// Search for site-packages directories
var sitePackagesDirs = vfs.EnumerateFiles(path, "site-packages/*")
.Select(f => GetParentDirectory(f.VirtualPath))
.Where(p => p is not null && p.EndsWith("site-packages", StringComparison.OrdinalIgnoreCase))
.Distinct()
.Cast<string>();
foreach (var dir in sitePackagesDirs)
{
foundPaths.Add(dir);
}
return foundPaths;
}
private static IEnumerable<string> FindVendoredPathsInContainer(PythonVirtualFileSystem vfs, string path)
{
var vendoredPaths = new List<string>();
// Common vendored package locations
var vendorPatterns = new[]
{
$"{path}/app/vendor",
$"{path}/app/lib",
$"{path}/app/third_party",
$"{path}/opt/app/vendor"
};
foreach (var pattern in vendorPatterns)
{
if (vfs.EnumerateFiles(pattern, "*.py").Any())
{
vendoredPaths.Add(pattern);
}
}
return vendoredPaths;
}
private static async IAsyncEnumerable<PythonPackageInfo> DiscoverVendoredPackagesAsync(
PythonVirtualFileSystem vfs,
string path,
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken)
{
// Find packages by looking for __init__.py or standalone .py files
var initFiles = vfs.EnumerateFiles(path, "__init__.py").ToList();
var discoveredPackages = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var initFile in initFiles)
{
cancellationToken.ThrowIfCancellationRequested();
var packageDir = GetParentDirectory(initFile.VirtualPath);
if (packageDir is null)
{
continue;
}
var packageName = Path.GetFileName(packageDir);
if (string.IsNullOrEmpty(packageName) || discoveredPackages.Contains(packageName))
{
continue;
}
discoveredPackages.Add(packageName);
yield return new PythonPackageInfo(
Name: packageName,
Version: null,
Kind: PythonPackageKind.Vendored,
Location: path,
MetadataPath: null,
TopLevelModules: ImmutableArray.Create(packageName),
Dependencies: ImmutableArray<string>.Empty,
Extras: ImmutableArray<string>.Empty,
RecordFiles: ImmutableArray<PythonRecordEntry>.Empty,
InstallerTool: null,
EditableTarget: null,
IsDirectDependency: true,
Confidence: PythonPackageConfidence.Low);
}
}
private static PythonPackageConfidence AdjustConfidenceForContainer(PythonPackageConfidence confidence)
{
// Container layers may have incomplete or overlaid files
return confidence switch
{
PythonPackageConfidence.Definitive => PythonPackageConfidence.High,
_ => confidence
};
}
private static string? GetParentDirectory(string path)
{
var lastSep = path.LastIndexOf('/');
return lastSep > 0 ? path[..lastSep] : null;
}
}

View File

@@ -0,0 +1,316 @@
using System.Collections.Immutable;
using System.Text.RegularExpressions;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging.Adapters;
/// <summary>
/// Adapter for standard .dist-info metadata (wheel installations).
/// </summary>
internal sealed partial class DistInfoAdapter : IPythonPackagingAdapter
{
public string Name => "dist-info";
public int Priority => 10;
[GeneratedRegex(@"^(?<name>[A-Za-z0-9][\w.-]*)-(?<version>[\d.]+(?:\.dev\d*|a\d*|b\d*|rc\d*|post\d*)?)\.dist-info$",
RegexOptions.Compiled | RegexOptions.IgnoreCase)]
private static partial Regex DistInfoDirPattern();
public bool CanHandle(PythonVirtualFileSystem vfs, string path)
{
// Look for any .dist-info directories
return vfs.EnumerateFiles(path, "*.dist-info/METADATA").Any();
}
public async IAsyncEnumerable<PythonPackageInfo> DiscoverPackagesAsync(
PythonVirtualFileSystem vfs,
string path,
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default)
{
// Find all .dist-info directories
var metadataFiles = vfs.EnumerateFiles(path, "*.dist-info/METADATA").ToList();
foreach (var metadataFile in metadataFiles)
{
cancellationToken.ThrowIfCancellationRequested();
var distInfoPath = Path.GetDirectoryName(metadataFile.VirtualPath) ?? string.Empty;
var distInfoName = Path.GetFileName(distInfoPath);
var match = DistInfoDirPattern().Match(distInfoName);
if (!match.Success)
{
continue;
}
var packageName = match.Groups["name"].Value;
var version = match.Groups["version"].Value;
// Read METADATA file
var metadata = await ReadMetadataAsync(vfs, metadataFile, cancellationToken).ConfigureAwait(false);
// Use metadata name if available (more accurate)
if (metadata.TryGetValue("Name", out var metadataName) && !string.IsNullOrEmpty(metadataName))
{
packageName = metadataName;
}
if (metadata.TryGetValue("Version", out var metadataVersion) && !string.IsNullOrEmpty(metadataVersion))
{
version = metadataVersion;
}
// Read top_level.txt
var topLevelModules = await ReadTopLevelAsync(vfs, distInfoPath, cancellationToken).ConfigureAwait(false);
// Read RECORD
var recordEntries = await ReadRecordAsync(vfs, distInfoPath, cancellationToken).ConfigureAwait(false);
// Read INSTALLER
var installer = await ReadInstallerAsync(vfs, distInfoPath, cancellationToken).ConfigureAwait(false);
// Extract dependencies from metadata
var dependencies = ExtractDependencies(metadata);
// Extract extras
var extras = ExtractExtras(metadata);
// Determine if it's a direct dependency (from direct_url.json or REQUESTED)
var isDirect = await IsDependencyDirectAsync(vfs, distInfoPath, cancellationToken).ConfigureAwait(false);
// Validate RECORD if available
var confidence = recordEntries.Length > 0
? PythonPackageConfidence.Definitive
: PythonPackageConfidence.High;
yield return new PythonPackageInfo(
Name: packageName,
Version: version,
Kind: PythonPackageKind.Wheel,
Location: path,
MetadataPath: distInfoPath,
TopLevelModules: topLevelModules,
Dependencies: dependencies,
Extras: extras,
RecordFiles: recordEntries,
InstallerTool: installer,
EditableTarget: null,
IsDirectDependency: isDirect,
Confidence: confidence);
}
}
private static async Task<Dictionary<string, string>> ReadMetadataAsync(
PythonVirtualFileSystem vfs,
PythonVirtualFile file,
CancellationToken cancellationToken)
{
var result = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
try
{
using var stream = await vfs.OpenReadAsync(file.VirtualPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return result;
}
using var reader = new StreamReader(stream);
string? currentKey = null;
var currentValue = new System.Text.StringBuilder();
string? line;
while ((line = await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false)) is not null)
{
cancellationToken.ThrowIfCancellationRequested();
// Empty line marks end of headers
if (string.IsNullOrEmpty(line))
{
if (currentKey is not null)
{
result[currentKey] = currentValue.ToString().Trim();
}
break;
}
// Continuation line
if (line.StartsWith(' ') || line.StartsWith('\t'))
{
currentValue.AppendLine(line.Trim());
continue;
}
// New header
var colonIdx = line.IndexOf(':');
if (colonIdx > 0)
{
if (currentKey is not null)
{
if (!result.ContainsKey(currentKey))
{
result[currentKey] = currentValue.ToString().Trim();
}
else
{
// Multi-value header (like Requires-Dist)
result[currentKey] = result[currentKey] + "\n" + currentValue.ToString().Trim();
}
}
currentKey = line[..colonIdx].Trim();
currentValue.Clear();
currentValue.Append(line[(colonIdx + 1)..].Trim());
}
}
if (currentKey is not null && !result.ContainsKey(currentKey))
{
result[currentKey] = currentValue.ToString().Trim();
}
}
catch (IOException)
{
// Ignore read errors
}
return result;
}
private static async Task<ImmutableArray<string>> ReadTopLevelAsync(
PythonVirtualFileSystem vfs,
string distInfoPath,
CancellationToken cancellationToken)
{
var topLevelPath = $"{distInfoPath}/top_level.txt";
try
{
using var stream = await vfs.OpenReadAsync(topLevelPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return ImmutableArray<string>.Empty;
}
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
return content.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
.ToImmutableArray();
}
catch (IOException)
{
return ImmutableArray<string>.Empty;
}
}
private static async Task<ImmutableArray<PythonRecordEntry>> ReadRecordAsync(
PythonVirtualFileSystem vfs,
string distInfoPath,
CancellationToken cancellationToken)
{
var recordPath = $"{distInfoPath}/RECORD";
try
{
using var stream = await vfs.OpenReadAsync(recordPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return ImmutableArray<PythonRecordEntry>.Empty;
}
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
return content.Split('\n', StringSplitOptions.RemoveEmptyEntries)
.Select(PythonRecordEntry.Parse)
.Where(e => e is not null)
.Cast<PythonRecordEntry>()
.ToImmutableArray();
}
catch (IOException)
{
return ImmutableArray<PythonRecordEntry>.Empty;
}
}
private static async Task<string?> ReadInstallerAsync(
PythonVirtualFileSystem vfs,
string distInfoPath,
CancellationToken cancellationToken)
{
var installerPath = $"{distInfoPath}/INSTALLER";
try
{
using var stream = await vfs.OpenReadAsync(installerPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return null;
}
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
return content.Trim();
}
catch (IOException)
{
return null;
}
}
private static async Task<bool> IsDependencyDirectAsync(
PythonVirtualFileSystem vfs,
string distInfoPath,
CancellationToken cancellationToken)
{
// Check for REQUESTED file (pip marker for direct dependencies)
var requestedPath = $"{distInfoPath}/REQUESTED";
if (vfs.FileExists(requestedPath))
{
return true;
}
// Check for direct_url.json (PEP 610)
var directUrlPath = $"{distInfoPath}/direct_url.json";
if (vfs.FileExists(directUrlPath))
{
try
{
using var stream = await vfs.OpenReadAsync(directUrlPath, cancellationToken).ConfigureAwait(false);
if (stream is not null)
{
return true;
}
}
catch (IOException)
{
// Ignore
}
}
return false;
}
private static ImmutableArray<string> ExtractDependencies(Dictionary<string, string> metadata)
{
if (!metadata.TryGetValue("Requires-Dist", out var requires))
{
return ImmutableArray<string>.Empty;
}
return requires.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
.ToImmutableArray();
}
private static ImmutableArray<string> ExtractExtras(Dictionary<string, string> metadata)
{
if (!metadata.TryGetValue("Provides-Extra", out var extras))
{
return ImmutableArray<string>.Empty;
}
return extras.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
.ToImmutableArray();
}
}

View File

@@ -0,0 +1,276 @@
using System.Collections.Immutable;
using System.Text.RegularExpressions;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging.Adapters;
/// <summary>
/// Adapter for pip editable installations (.egg-link files).
/// </summary>
internal sealed partial class PipEditableAdapter : IPythonPackagingAdapter
{
public string Name => "pip-editable";
public int Priority => 5; // Higher priority than dist-info
[GeneratedRegex(@"^(?<name>[A-Za-z0-9][\w.-]*)\.egg-link$",
RegexOptions.Compiled | RegexOptions.IgnoreCase)]
private static partial Regex EggLinkPattern();
public bool CanHandle(PythonVirtualFileSystem vfs, string path)
{
return vfs.EnumerateFiles(path, "*.egg-link").Any();
}
public async IAsyncEnumerable<PythonPackageInfo> DiscoverPackagesAsync(
PythonVirtualFileSystem vfs,
string path,
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default)
{
var eggLinkFiles = vfs.EnumerateFiles(path, "*.egg-link").ToList();
foreach (var eggLinkFile in eggLinkFiles)
{
cancellationToken.ThrowIfCancellationRequested();
var fileName = Path.GetFileName(eggLinkFile.VirtualPath);
var match = EggLinkPattern().Match(fileName);
if (!match.Success)
{
continue;
}
var packageName = match.Groups["name"].Value;
// Read .egg-link to get the target path
var targetPath = await ReadEggLinkAsync(vfs, eggLinkFile, cancellationToken).ConfigureAwait(false);
if (string.IsNullOrEmpty(targetPath))
{
continue;
}
// Look for .egg-info in the target
var (version, metadata, topLevel) = await ReadEggInfoAsync(vfs, targetPath, packageName, cancellationToken).ConfigureAwait(false);
// Also look for pyproject.toml for additional metadata
var pyprojectInfo = await ReadPyprojectAsync(vfs, targetPath, cancellationToken).ConfigureAwait(false);
if (pyprojectInfo.Name is not null)
{
packageName = pyprojectInfo.Name;
}
if (pyprojectInfo.Version is not null && string.IsNullOrEmpty(version))
{
version = pyprojectInfo.Version;
}
var dependencies = metadata.TryGetValue("Requires-Dist", out var requires)
? requires.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToImmutableArray()
: ImmutableArray<string>.Empty;
yield return new PythonPackageInfo(
Name: packageName,
Version: version,
Kind: PythonPackageKind.PipEditable,
Location: path,
MetadataPath: null,
TopLevelModules: topLevel,
Dependencies: dependencies,
Extras: ImmutableArray<string>.Empty,
RecordFiles: ImmutableArray<PythonRecordEntry>.Empty,
InstallerTool: "pip",
EditableTarget: targetPath,
IsDirectDependency: true, // Editable installs are always direct
Confidence: PythonPackageConfidence.High);
}
}
private static async Task<string?> ReadEggLinkAsync(
PythonVirtualFileSystem vfs,
PythonVirtualFile file,
CancellationToken cancellationToken)
{
try
{
using var stream = await vfs.OpenReadAsync(file.VirtualPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return null;
}
using var reader = new StreamReader(stream);
var firstLine = await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false);
return firstLine?.Trim();
}
catch (IOException)
{
return null;
}
}
private static async Task<(string? Version, Dictionary<string, string> Metadata, ImmutableArray<string> TopLevel)> ReadEggInfoAsync(
PythonVirtualFileSystem vfs,
string targetPath,
string packageName,
CancellationToken cancellationToken)
{
var metadata = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
string? version = null;
var topLevel = ImmutableArray<string>.Empty;
// Look for .egg-info directory
var eggInfoPattern = $"{packageName}.egg-info";
var eggInfoFiles = vfs.EnumerateFiles(targetPath, "*.egg-info/PKG-INFO").ToList();
PythonVirtualFile? pkgInfoFile = null;
foreach (var file in eggInfoFiles)
{
var dirName = Path.GetFileName(Path.GetDirectoryName(file.VirtualPath) ?? string.Empty);
if (dirName.StartsWith(packageName, StringComparison.OrdinalIgnoreCase) ||
dirName.StartsWith(PythonPackageInfo.NormalizeName(packageName), StringComparison.OrdinalIgnoreCase))
{
pkgInfoFile = file;
break;
}
}
if (pkgInfoFile is null && eggInfoFiles.Count > 0)
{
pkgInfoFile = eggInfoFiles[0];
}
if (pkgInfoFile is not null)
{
var eggInfoPath = Path.GetDirectoryName(pkgInfoFile.VirtualPath);
try
{
using var stream = await vfs.OpenReadAsync(pkgInfoFile.VirtualPath, cancellationToken).ConfigureAwait(false);
if (stream is not null)
{
using var reader = new StreamReader(stream);
string? metadataLine;
while ((metadataLine = await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false)) is not null)
{
if (string.IsNullOrEmpty(metadataLine))
{
break;
}
var colonIdx = metadataLine.IndexOf(':');
if (colonIdx > 0)
{
var key = metadataLine[..colonIdx].Trim();
var value = metadataLine[(colonIdx + 1)..].Trim();
metadata[key] = value;
}
}
}
}
catch (IOException)
{
// Ignore
}
if (metadata.TryGetValue("Version", out var v))
{
version = v;
}
// Read top_level.txt
if (eggInfoPath is not null)
{
var topLevelPath = $"{eggInfoPath}/top_level.txt";
try
{
using var stream = await vfs.OpenReadAsync(topLevelPath, cancellationToken).ConfigureAwait(false);
if (stream is not null)
{
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
topLevel = content.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
.ToImmutableArray();
}
}
catch (IOException)
{
// Ignore
}
}
}
return (version, metadata, topLevel);
}
private static async Task<(string? Name, string? Version)> ReadPyprojectAsync(
PythonVirtualFileSystem vfs,
string targetPath,
CancellationToken cancellationToken)
{
var pyprojectPath = $"{targetPath}/pyproject.toml";
try
{
using var stream = await vfs.OpenReadAsync(pyprojectPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return (null, null);
}
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
// Simple TOML parsing for name and version
string? name = null;
string? version = null;
foreach (var line in content.Split('\n'))
{
var trimmed = line.Trim();
if (trimmed.StartsWith("name", StringComparison.Ordinal) && trimmed.Contains('='))
{
var value = ExtractTomlValue(trimmed);
if (value is not null)
{
name = value;
}
}
else if (trimmed.StartsWith("version", StringComparison.Ordinal) && trimmed.Contains('='))
{
var value = ExtractTomlValue(trimmed);
if (value is not null)
{
version = value;
}
}
}
return (name, version);
}
catch (IOException)
{
return (null, null);
}
}
private static string? ExtractTomlValue(string line)
{
var eqIdx = line.IndexOf('=');
if (eqIdx < 0)
{
return null;
}
var value = line[(eqIdx + 1)..].Trim();
// Remove quotes
if ((value.StartsWith('"') && value.EndsWith('"')) ||
(value.StartsWith('\'') && value.EndsWith('\'')))
{
return value[1..^1];
}
return value;
}
}

View File

@@ -0,0 +1,305 @@
using System.Collections.Immutable;
using System.Text.RegularExpressions;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging.Adapters;
/// <summary>
/// Adapter for Poetry project layouts and editable installs.
/// </summary>
internal sealed partial class PoetryAdapter : IPythonPackagingAdapter
{
public string Name => "poetry";
public int Priority => 3; // High priority for local projects
[GeneratedRegex(@"^\[tool\.poetry\]", RegexOptions.Multiline)]
private static partial Regex PoetryToolSectionPattern();
[GeneratedRegex(@"^name\s*=\s*""([^""]+)""", RegexOptions.Multiline)]
private static partial Regex NamePattern();
[GeneratedRegex(@"^version\s*=\s*""([^""]+)""", RegexOptions.Multiline)]
private static partial Regex VersionPattern();
public bool CanHandle(PythonVirtualFileSystem vfs, string path)
{
// Check for pyproject.toml with [tool.poetry] section
var pyprojectPath = $"{path}/pyproject.toml";
if (!vfs.FileExists(pyprojectPath))
{
return false;
}
// We can't easily check content without async, so check for poetry.lock instead
return vfs.FileExists($"{path}/poetry.lock");
}
public async IAsyncEnumerable<PythonPackageInfo> DiscoverPackagesAsync(
PythonVirtualFileSystem vfs,
string path,
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default)
{
// Parse pyproject.toml for the main package
var pyprojectPath = $"{path}/pyproject.toml";
if (!vfs.FileExists(pyprojectPath))
{
yield break;
}
string? content;
try
{
using var stream = await vfs.OpenReadAsync(pyprojectPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
yield break;
}
using var reader = new StreamReader(stream);
content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
}
catch (IOException)
{
yield break;
}
// Check for [tool.poetry] section
if (!PoetryToolSectionPattern().IsMatch(content))
{
yield break;
}
var name = ExtractValue(content, NamePattern());
var version = ExtractValue(content, VersionPattern());
if (string.IsNullOrEmpty(name))
{
yield break;
}
// Extract dependencies from pyproject.toml
var dependencies = ExtractDependencies(content);
// Find top-level modules (packages or src layout)
var topLevelModules = await FindTopLevelModulesAsync(vfs, path, name, cancellationToken).ConfigureAwait(false);
yield return new PythonPackageInfo(
Name: name,
Version: version,
Kind: PythonPackageKind.PoetryEditable,
Location: path,
MetadataPath: pyprojectPath,
TopLevelModules: topLevelModules,
Dependencies: dependencies,
Extras: ImmutableArray<string>.Empty,
RecordFiles: ImmutableArray<PythonRecordEntry>.Empty,
InstallerTool: "poetry",
EditableTarget: path,
IsDirectDependency: true,
Confidence: PythonPackageConfidence.High);
// Also parse poetry.lock for installed dependencies
await foreach (var pkg in ParsePoetryLockAsync(vfs, path, cancellationToken).ConfigureAwait(false))
{
yield return pkg;
}
}
private static string? ExtractValue(string content, Regex pattern)
{
var match = pattern.Match(content);
return match.Success ? match.Groups[1].Value : null;
}
private static ImmutableArray<string> ExtractDependencies(string content)
{
var dependencies = new List<string>();
// Find [tool.poetry.dependencies] section
var depsStart = content.IndexOf("[tool.poetry.dependencies]", StringComparison.OrdinalIgnoreCase);
if (depsStart < 0)
{
return ImmutableArray<string>.Empty;
}
var depsEnd = content.IndexOf('[', depsStart + 1);
var depsSection = depsEnd > depsStart
? content[depsStart..depsEnd]
: content[depsStart..];
foreach (var line in depsSection.Split('\n'))
{
var trimmed = line.Trim();
if (string.IsNullOrEmpty(trimmed) || trimmed.StartsWith('[') || trimmed.StartsWith('#'))
{
continue;
}
var eqIdx = trimmed.IndexOf('=');
if (eqIdx > 0)
{
var depName = trimmed[..eqIdx].Trim();
if (!depName.Equals("python", StringComparison.OrdinalIgnoreCase))
{
dependencies.Add(depName);
}
}
}
return dependencies.ToImmutableArray();
}
private static async Task<ImmutableArray<string>> FindTopLevelModulesAsync(
PythonVirtualFileSystem vfs,
string path,
string packageName,
CancellationToken cancellationToken)
{
var modules = new List<string>();
var normalizedName = PythonPackageInfo.NormalizeName(packageName);
// Check for src layout
var srcPath = $"{path}/src";
var srcFiles = vfs.EnumerateFiles(srcPath, "__init__.py").ToList();
if (srcFiles.Count > 0)
{
foreach (var initFile in srcFiles)
{
var dir = Path.GetDirectoryName(initFile.VirtualPath);
if (dir is not null)
{
var moduleName = Path.GetFileName(dir);
if (!string.IsNullOrEmpty(moduleName))
{
modules.Add(moduleName);
}
}
}
}
// Check for flat layout (package at root)
var rootInit = $"{path}/{normalizedName}/__init__.py";
if (vfs.FileExists(rootInit))
{
modules.Add(normalizedName);
}
// Check for single-file module
var singleFile = $"{path}/{normalizedName}.py";
if (vfs.FileExists(singleFile))
{
modules.Add(normalizedName);
}
return modules.Distinct().ToImmutableArray();
}
private static async IAsyncEnumerable<PythonPackageInfo> ParsePoetryLockAsync(
PythonVirtualFileSystem vfs,
string path,
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken)
{
var lockPath = $"{path}/poetry.lock";
if (!vfs.FileExists(lockPath))
{
yield break;
}
string content;
try
{
using var stream = await vfs.OpenReadAsync(lockPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
yield break;
}
using var reader = new StreamReader(stream);
content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
}
catch (IOException)
{
yield break;
}
// Parse [[package]] sections
var packages = ParseLockPackages(content);
foreach (var pkg in packages)
{
yield return pkg;
}
}
private static IEnumerable<PythonPackageInfo> ParseLockPackages(string content)
{
var packages = new List<PythonPackageInfo>();
// Split by [[package]] sections
var sections = content.Split("[[package]]", StringSplitOptions.RemoveEmptyEntries);
foreach (var section in sections.Skip(1)) // Skip content before first [[package]]
{
var lines = section.Split('\n');
string? name = null;
string? version = null;
var dependencies = new List<string>();
foreach (var line in lines)
{
var trimmed = line.Trim();
if (trimmed.StartsWith("name", StringComparison.Ordinal) && trimmed.Contains('='))
{
name = ExtractQuotedValue(trimmed);
}
else if (trimmed.StartsWith("version", StringComparison.Ordinal) && trimmed.Contains('='))
{
version = ExtractQuotedValue(trimmed);
}
else if (trimmed.StartsWith('[') && !trimmed.StartsWith("[["))
{
// End of this package's basic info
break;
}
}
if (!string.IsNullOrEmpty(name))
{
packages.Add(new PythonPackageInfo(
Name: name,
Version: version,
Kind: PythonPackageKind.Wheel, // Locked packages are typically wheels
Location: string.Empty, // Location unknown from lock file
MetadataPath: null,
TopLevelModules: ImmutableArray<string>.Empty,
Dependencies: dependencies.ToImmutableArray(),
Extras: ImmutableArray<string>.Empty,
RecordFiles: ImmutableArray<PythonRecordEntry>.Empty,
InstallerTool: "poetry",
EditableTarget: null,
IsDirectDependency: false, // Can't determine from lock file alone
Confidence: PythonPackageConfidence.Medium));
}
}
return packages;
}
private static string? ExtractQuotedValue(string line)
{
var eqIdx = line.IndexOf('=');
if (eqIdx < 0)
{
return null;
}
var value = line[(eqIdx + 1)..].Trim();
if ((value.StartsWith('"') && value.EndsWith('"')) ||
(value.StartsWith('\'') && value.EndsWith('\'')))
{
return value[1..^1];
}
return value;
}
}

View File

@@ -0,0 +1,32 @@
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
/// <summary>
/// Interface for Python packaging adapters that discover installed packages.
/// </summary>
internal interface IPythonPackagingAdapter
{
/// <summary>
/// Gets the packaging system this adapter handles.
/// </summary>
string Name { get; }
/// <summary>
/// Gets the priority for this adapter (lower = higher priority).
/// </summary>
int Priority { get; }
/// <summary>
/// Checks if this adapter can handle the given path.
/// </summary>
bool CanHandle(PythonVirtualFileSystem vfs, string path);
/// <summary>
/// Discovers packages at the given path.
/// </summary>
IAsyncEnumerable<PythonPackageInfo> DiscoverPackagesAsync(
PythonVirtualFileSystem vfs,
string path,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,294 @@
using System.Collections.Immutable;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging.Adapters;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
/// <summary>
/// Orchestrates Python package discovery across multiple packaging systems.
/// </summary>
internal sealed class PythonPackageDiscovery
{
private readonly IReadOnlyList<IPythonPackagingAdapter> _adapters;
public PythonPackageDiscovery()
: this(CreateDefaultAdapters())
{
}
public PythonPackageDiscovery(IEnumerable<IPythonPackagingAdapter> adapters)
{
_adapters = adapters.OrderBy(a => a.Priority).ToList();
}
/// <summary>
/// Gets the registered adapters.
/// </summary>
public IReadOnlyList<IPythonPackagingAdapter> Adapters => _adapters;
/// <summary>
/// Discovers all packages in the given virtual filesystem.
/// </summary>
public async Task<PythonPackageDiscoveryResult> DiscoverAsync(
PythonVirtualFileSystem vfs,
CancellationToken cancellationToken = default)
{
var packages = new Dictionary<string, PythonPackageInfo>(StringComparer.OrdinalIgnoreCase);
var errors = new List<PythonPackageDiscoveryError>();
var searchPaths = new List<string>();
// Gather all search paths from VFS
searchPaths.AddRange(vfs.SitePackagesPaths);
searchPaths.AddRange(vfs.SourceTreeRoots);
searchPaths.AddRange(vfs.EditablePaths);
foreach (var path in searchPaths.Distinct())
{
cancellationToken.ThrowIfCancellationRequested();
await DiscoverInPathAsync(vfs, path, packages, errors, cancellationToken).ConfigureAwait(false);
}
// Build dependency graph
var dependencyGraph = BuildDependencyGraph(packages);
// Identify direct vs transitive dependencies
MarkTransitiveDependencies(packages, dependencyGraph);
return new PythonPackageDiscoveryResult(
Packages: packages.Values.ToImmutableArray(),
DependencyGraph: dependencyGraph,
Errors: errors.ToImmutableArray());
}
/// <summary>
/// Discovers packages at a specific path.
/// </summary>
public async Task<ImmutableArray<PythonPackageInfo>> DiscoverAtPathAsync(
PythonVirtualFileSystem vfs,
string path,
CancellationToken cancellationToken = default)
{
var packages = new Dictionary<string, PythonPackageInfo>(StringComparer.OrdinalIgnoreCase);
var errors = new List<PythonPackageDiscoveryError>();
await DiscoverInPathAsync(vfs, path, packages, errors, cancellationToken).ConfigureAwait(false);
return packages.Values.ToImmutableArray();
}
private async Task DiscoverInPathAsync(
PythonVirtualFileSystem vfs,
string path,
Dictionary<string, PythonPackageInfo> packages,
List<PythonPackageDiscoveryError> errors,
CancellationToken cancellationToken)
{
foreach (var adapter in _adapters)
{
cancellationToken.ThrowIfCancellationRequested();
if (!adapter.CanHandle(vfs, path))
{
continue;
}
try
{
await foreach (var pkg in adapter.DiscoverPackagesAsync(vfs, path, cancellationToken).ConfigureAwait(false))
{
var key = pkg.NormalizedName;
// Keep the package with higher confidence or more recent (last seen)
if (!packages.TryGetValue(key, out var existing) ||
pkg.Confidence > existing.Confidence)
{
packages[key] = pkg;
}
}
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
errors.Add(new PythonPackageDiscoveryError(
AdapterName: adapter.Name,
Path: path,
Message: ex.Message,
Exception: ex));
}
}
}
private static ImmutableDictionary<string, ImmutableArray<string>> BuildDependencyGraph(
Dictionary<string, PythonPackageInfo> packages)
{
var graph = new Dictionary<string, List<string>>(StringComparer.OrdinalIgnoreCase);
foreach (var pkg in packages.Values)
{
var deps = new List<string>();
foreach (var dep in pkg.Dependencies)
{
// Parse dependency string to get package name
var depName = ParseDependencyName(dep);
if (!string.IsNullOrEmpty(depName))
{
var normalizedDep = PythonPackageInfo.NormalizeName(depName);
if (packages.ContainsKey(normalizedDep))
{
deps.Add(normalizedDep);
}
}
}
graph[pkg.NormalizedName] = deps;
}
return graph.ToImmutableDictionary(
kv => kv.Key,
kv => kv.Value.ToImmutableArray(),
StringComparer.OrdinalIgnoreCase);
}
private static string? ParseDependencyName(string dependency)
{
if (string.IsNullOrEmpty(dependency))
{
return null;
}
// Handle various dependency formats:
// package
// package>=1.0
// package[extra]>=1.0
// package ; python_version >= "3.8"
var dep = dependency.Trim();
// Remove extras first: package[extra] -> package
var bracketIdx = dep.IndexOf('[');
if (bracketIdx > 0)
{
dep = dep[..bracketIdx];
}
// Remove version specifier
var specifiers = new[] { ">=", "<=", "==", "!=", ">", "<", "~=", ";" };
foreach (var spec in specifiers)
{
var idx = dep.IndexOf(spec, StringComparison.Ordinal);
if (idx > 0)
{
dep = dep[..idx];
}
}
return dep.Trim();
}
private static void MarkTransitiveDependencies(
Dictionary<string, PythonPackageInfo> packages,
ImmutableDictionary<string, ImmutableArray<string>> graph)
{
// Find all transitive dependencies starting from direct ones
var directPackages = packages.Values.Where(p => p.IsDirectDependency).ToList();
var transitive = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
void CollectTransitive(string packageName)
{
if (!graph.TryGetValue(packageName, out var deps))
{
return;
}
foreach (var dep in deps)
{
if (transitive.Add(dep))
{
CollectTransitive(dep);
}
}
}
foreach (var pkg in directPackages)
{
CollectTransitive(pkg.NormalizedName);
}
// Update packages that we incorrectly identified as direct
foreach (var pkgName in transitive)
{
if (packages.TryGetValue(pkgName, out var pkg) && pkg.IsDirectDependency)
{
// Only update if we're sure it's transitive
// (i.e., it appears as a dependency of a direct package)
var isDependencyOfDirect = directPackages.Any(d =>
graph.TryGetValue(d.NormalizedName, out var deps) &&
deps.Contains(pkgName, StringComparer.OrdinalIgnoreCase));
if (isDependencyOfDirect)
{
packages[pkgName] = pkg with { IsDirectDependency = false };
}
}
}
}
private static IReadOnlyList<IPythonPackagingAdapter> CreateDefaultAdapters() =>
new IPythonPackagingAdapter[]
{
new PoetryAdapter(),
new PipEditableAdapter(),
new DistInfoAdapter(),
new CondaAdapter(),
new ContainerLayerAdapter()
};
}
/// <summary>
/// Result of package discovery.
/// </summary>
/// <param name="Packages">Discovered packages.</param>
/// <param name="DependencyGraph">Package dependency graph (normalized name → dependencies).</param>
/// <param name="Errors">Errors encountered during discovery.</param>
internal sealed record PythonPackageDiscoveryResult(
ImmutableArray<PythonPackageInfo> Packages,
ImmutableDictionary<string, ImmutableArray<string>> DependencyGraph,
ImmutableArray<PythonPackageDiscoveryError> Errors)
{
/// <summary>
/// Gets packages by normalized name.
/// </summary>
public ImmutableDictionary<string, PythonPackageInfo> PackagesByName =>
Packages.ToImmutableDictionary(p => p.NormalizedName, StringComparer.OrdinalIgnoreCase);
/// <summary>
/// Gets direct dependencies only.
/// </summary>
public ImmutableArray<PythonPackageInfo> DirectDependencies =>
Packages.Where(p => p.IsDirectDependency).ToImmutableArray();
/// <summary>
/// Gets transitive dependencies only.
/// </summary>
public ImmutableArray<PythonPackageInfo> TransitiveDependencies =>
Packages.Where(p => !p.IsDirectDependency).ToImmutableArray();
/// <summary>
/// Gets whether discovery completed without errors.
/// </summary>
public bool IsSuccessful => Errors.Length == 0;
}
/// <summary>
/// Error encountered during package discovery.
/// </summary>
/// <param name="AdapterName">The adapter that encountered the error.</param>
/// <param name="Path">The path being searched.</param>
/// <param name="Message">Error message.</param>
/// <param name="Exception">The exception if available.</param>
internal sealed record PythonPackageDiscoveryError(
string AdapterName,
string Path,
string Message,
Exception? Exception);

View File

@@ -0,0 +1,175 @@
using System.Collections.Immutable;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
/// <summary>
/// Represents discovered information about an installed Python package.
/// </summary>
/// <param name="Name">The package name (normalized per PEP 503).</param>
/// <param name="Version">The package version string.</param>
/// <param name="Kind">The packaging format.</param>
/// <param name="Location">The installation location (directory or archive path).</param>
/// <param name="MetadataPath">Path to the metadata directory (dist-info/egg-info).</param>
/// <param name="TopLevelModules">Top-level importable module names.</param>
/// <param name="Dependencies">Declared dependencies (requirement strings).</param>
/// <param name="Extras">Available extras.</param>
/// <param name="RecordFiles">Files listed in RECORD (if available).</param>
/// <param name="InstallerTool">The tool that installed this package (pip, poetry, conda, etc.).</param>
/// <param name="EditableTarget">For editable installs, the target directory.</param>
/// <param name="IsDirectDependency">Whether this is a direct (vs transitive) dependency.</param>
/// <param name="Confidence">Confidence level in the package discovery.</param>
internal sealed record PythonPackageInfo(
string Name,
string? Version,
PythonPackageKind Kind,
string Location,
string? MetadataPath,
ImmutableArray<string> TopLevelModules,
ImmutableArray<string> Dependencies,
ImmutableArray<string> Extras,
ImmutableArray<PythonRecordEntry> RecordFiles,
string? InstallerTool,
string? EditableTarget,
bool IsDirectDependency,
PythonPackageConfidence Confidence)
{
/// <summary>
/// Gets the normalized package name (lowercase, hyphens to underscores).
/// </summary>
public string NormalizedName => NormalizeName(Name);
/// <summary>
/// Gets whether this is an editable installation.
/// </summary>
public bool IsEditable => Kind is PythonPackageKind.PipEditable
or PythonPackageKind.PoetryEditable
or PythonPackageKind.FlitEditable;
/// <summary>
/// Normalizes a package name per PEP 503.
/// </summary>
public static string NormalizeName(string name) =>
name.ToLowerInvariant().Replace('-', '_').Replace('.', '_');
/// <summary>
/// Generates metadata entries for this package.
/// </summary>
public IEnumerable<KeyValuePair<string, string?>> ToMetadata(string prefix)
{
yield return new($"{prefix}.name", Name);
yield return new($"{prefix}.normalizedName", NormalizedName);
if (Version is not null)
{
yield return new($"{prefix}.version", Version);
}
yield return new($"{prefix}.kind", Kind.ToString());
yield return new($"{prefix}.location", Location);
if (MetadataPath is not null)
{
yield return new($"{prefix}.metadataPath", MetadataPath);
}
if (TopLevelModules.Length > 0)
{
yield return new($"{prefix}.topLevel", string.Join(",", TopLevelModules));
}
if (Dependencies.Length > 0)
{
yield return new($"{prefix}.dependencies", string.Join(";", Dependencies));
}
if (InstallerTool is not null)
{
yield return new($"{prefix}.installer", InstallerTool);
}
if (EditableTarget is not null)
{
yield return new($"{prefix}.editableTarget", EditableTarget);
}
yield return new($"{prefix}.isDirect", IsDirectDependency.ToString());
yield return new($"{prefix}.confidence", Confidence.ToString());
}
}
/// <summary>
/// Represents an entry from a RECORD file.
/// </summary>
/// <param name="Path">The file path relative to site-packages.</param>
/// <param name="Hash">The hash algorithm and digest (e.g., "sha256=...").</param>
/// <param name="Size">The file size in bytes.</param>
internal sealed record PythonRecordEntry(
string Path,
string? Hash,
long? Size)
{
/// <summary>
/// Parses a RECORD line into an entry.
/// </summary>
public static PythonRecordEntry? Parse(string line)
{
if (string.IsNullOrWhiteSpace(line))
{
return null;
}
var parts = line.Split(',');
if (parts.Length < 1)
{
return null;
}
var path = parts[0].Trim();
if (string.IsNullOrEmpty(path))
{
return null;
}
string? hash = null;
long? size = null;
if (parts.Length > 1 && !string.IsNullOrWhiteSpace(parts[1]))
{
hash = parts[1].Trim();
}
if (parts.Length > 2 && !string.IsNullOrWhiteSpace(parts[2]) &&
long.TryParse(parts[2].Trim(), out var parsedSize))
{
size = parsedSize;
}
return new PythonRecordEntry(path, hash, size);
}
}
/// <summary>
/// Confidence level for package discovery.
/// </summary>
internal enum PythonPackageConfidence
{
/// <summary>
/// Low confidence - inferred from file structure.
/// </summary>
Low = 0,
/// <summary>
/// Medium confidence - partial metadata available.
/// </summary>
Medium = 1,
/// <summary>
/// High confidence - metadata present and validated.
/// </summary>
High = 2,
/// <summary>
/// Definitive - full metadata with RECORD validation.
/// </summary>
Definitive = 3
}

View File

@@ -0,0 +1,67 @@
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
/// <summary>
/// Identifies the packaging format of a Python package.
/// </summary>
internal enum PythonPackageKind
{
/// <summary>
/// Unknown or unrecognized packaging format.
/// </summary>
Unknown,
/// <summary>
/// Standard wheel installation (.whl → dist-info).
/// </summary>
Wheel,
/// <summary>
/// Legacy egg installation (.egg-info).
/// </summary>
Egg,
/// <summary>
/// Source distribution (sdist) installation.
/// </summary>
Sdist,
/// <summary>
/// Pip editable install (development mode via .egg-link).
/// </summary>
PipEditable,
/// <summary>
/// Poetry editable install.
/// </summary>
PoetryEditable,
/// <summary>
/// Flit editable install.
/// </summary>
FlitEditable,
/// <summary>
/// Conda package installation.
/// </summary>
Conda,
/// <summary>
/// System package (installed via OS package manager).
/// </summary>
System,
/// <summary>
/// Vendored/bundled package (copied into project).
/// </summary>
Vendored,
/// <summary>
/// Zipapp package (.pyz, .pyzw).
/// </summary>
Zipapp,
/// <summary>
/// Local source package (not installed, just in path).
/// </summary>
LocalSource
}

View File

@@ -334,7 +334,7 @@ internal sealed partial class PythonModuleResolver
{
if (resolution.Kind == PythonResolutionKind.NamespacePackage)
{
// Collect namespace package paths
// Collect namespace package paths (already normalized)
if (resolution.VirtualPath is not null)
{
namespacePaths.Add(resolution.VirtualPath);
@@ -353,7 +353,7 @@ internal sealed partial class PythonModuleResolver
return new PythonModuleResolution(
ModuleName: moduleName,
Kind: PythonResolutionKind.NamespacePackage,
VirtualPath: namespacePaths[0],
VirtualPath: NormalizeVirtualPath(namespacePaths[0]),
AbsolutePath: null,
SearchPath: _searchPaths[0].Path,
Source: PythonFileSource.SitePackages,
@@ -430,7 +430,7 @@ internal sealed partial class PythonModuleResolver
return new PythonModuleResolution(
ModuleName: moduleName,
Kind: PythonResolutionKind.SourceModule,
VirtualPath: modulePath,
VirtualPath: NormalizeVirtualPath(modulePath),
AbsolutePath: file?.AbsolutePath,
SearchPath: searchPath.Path,
Source: file?.Source ?? PythonFileSource.Unknown,
@@ -447,7 +447,7 @@ internal sealed partial class PythonModuleResolver
return new PythonModuleResolution(
ModuleName: moduleName,
Kind: PythonResolutionKind.BytecodeModule,
VirtualPath: bytecodePath,
VirtualPath: NormalizeVirtualPath(bytecodePath),
AbsolutePath: file?.AbsolutePath,
SearchPath: searchPath.Path,
Source: file?.Source ?? PythonFileSource.Unknown,
@@ -466,7 +466,7 @@ internal sealed partial class PythonModuleResolver
return new PythonModuleResolution(
ModuleName: moduleName,
Kind: PythonResolutionKind.ExtensionModule,
VirtualPath: extPath,
VirtualPath: NormalizeVirtualPath(extPath),
AbsolutePath: file?.AbsolutePath,
SearchPath: searchPath.Path,
Source: file?.Source ?? PythonFileSource.Unknown,
@@ -484,7 +484,7 @@ internal sealed partial class PythonModuleResolver
return new PythonModuleResolution(
ModuleName: moduleName,
Kind: PythonResolutionKind.Package,
VirtualPath: packageInitPath,
VirtualPath: NormalizeVirtualPath(packageInitPath),
AbsolutePath: file?.AbsolutePath,
SearchPath: searchPath.Path,
Source: file?.Source ?? PythonFileSource.Unknown,
@@ -499,7 +499,7 @@ internal sealed partial class PythonModuleResolver
return new PythonModuleResolution(
ModuleName: moduleName,
Kind: PythonResolutionKind.NamespacePackage,
VirtualPath: packagePath,
VirtualPath: NormalizeVirtualPath(packagePath),
AbsolutePath: null,
SearchPath: searchPath.Path,
Source: PythonFileSource.Unknown,
@@ -517,6 +517,14 @@ internal sealed partial class PythonModuleResolver
return _vfs.EnumerateFiles(virtualPath, "*").Any();
}
/// <summary>
/// Normalizes a virtual path by removing leading slashes.
/// </summary>
private static string NormalizeVirtualPath(string path)
{
return path.TrimStart('/');
}
/// <summary>
/// Clears the resolution cache.
/// </summary>

View File

@@ -101,6 +101,82 @@ internal sealed partial class PythonVirtualFileSystem
return _files.TryGetValue(normalized, out file);
}
/// <summary>
/// Opens a file for reading asynchronously.
/// </summary>
/// <returns>A stream to read the file, or null if the file doesn't exist or can't be opened.</returns>
public async Task<Stream?> OpenReadAsync(string virtualPath, CancellationToken cancellationToken = default)
{
var file = GetFile(virtualPath);
if (file is null)
{
return null;
}
try
{
if (file.IsFromArchive && file.ArchivePath is not null)
{
// Read from archive - need to extract to memory
return await OpenFromArchiveAsync(file, cancellationToken).ConfigureAwait(false);
}
// Read from filesystem
if (File.Exists(file.AbsolutePath))
{
return new FileStream(
file.AbsolutePath,
FileMode.Open,
FileAccess.Read,
FileShare.Read,
bufferSize: 4096,
useAsync: true);
}
return null;
}
catch (IOException)
{
return null;
}
catch (UnauthorizedAccessException)
{
return null;
}
}
private static async Task<Stream?> OpenFromArchiveAsync(PythonVirtualFile file, CancellationToken cancellationToken)
{
if (file.ArchivePath is null || !File.Exists(file.ArchivePath))
{
return null;
}
try
{
using var archive = ZipFile.OpenRead(file.ArchivePath);
var entry = archive.GetEntry(file.AbsolutePath);
if (entry is null)
{
return null;
}
// Copy to memory stream since ZipArchive doesn't support async well
var memoryStream = new MemoryStream();
using (var entryStream = entry.Open())
{
await entryStream.CopyToAsync(memoryStream, cancellationToken).ConfigureAwait(false);
}
memoryStream.Position = 0;
return memoryStream;
}
catch (InvalidDataException)
{
return null;
}
}
/// <summary>
/// Checks if a virtual path exists as a file.
/// </summary>