up
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-11-27 23:44:42 +02:00
parent ef6e4b2067
commit 3b96b2e3ea
298 changed files with 47516 additions and 1168 deletions

View File

@@ -0,0 +1,183 @@
using System.Collections.Frozen;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Entrypoints;
/// <summary>
/// Represents a discovered Python entrypoint.
/// </summary>
/// <param name="Name">Display name of the entrypoint (e.g., "myapp", "manage.py").</param>
/// <param name="Kind">Type of entrypoint.</param>
/// <param name="Target">Target module or callable (e.g., "myapp.cli:main", "myapp.__main__").</param>
/// <param name="VirtualPath">Path in the virtual filesystem.</param>
/// <param name="InvocationContext">How to invoke this entrypoint.</param>
/// <param name="Confidence">Confidence level of the detection.</param>
/// <param name="Source">Source of the entrypoint definition.</param>
/// <param name="Metadata">Additional metadata.</param>
internal sealed record PythonEntrypoint(
string Name,
PythonEntrypointKind Kind,
string Target,
string? VirtualPath,
PythonInvocationContext InvocationContext,
PythonEntrypointConfidence Confidence,
string Source,
FrozenDictionary<string, string>? Metadata = null)
{
/// <summary>
/// Gets the module part of the target (before the colon).
/// </summary>
public string? ModulePath
{
get
{
if (string.IsNullOrEmpty(Target))
{
return null;
}
var colonIndex = Target.IndexOf(':');
return colonIndex > 0 ? Target[..colonIndex] : Target;
}
}
/// <summary>
/// Gets the callable part of the target (after the colon).
/// </summary>
public string? Callable
{
get
{
if (string.IsNullOrEmpty(Target))
{
return null;
}
var colonIndex = Target.IndexOf(':');
return colonIndex > 0 && colonIndex < Target.Length - 1
? Target[(colonIndex + 1)..]
: null;
}
}
/// <summary>
/// Returns true if this is a framework-specific entrypoint.
/// </summary>
public bool IsFrameworkEntrypoint => Kind is
PythonEntrypointKind.DjangoManage or
PythonEntrypointKind.WsgiApp or
PythonEntrypointKind.AsgiApp or
PythonEntrypointKind.CeleryWorker or
PythonEntrypointKind.LambdaHandler or
PythonEntrypointKind.AzureFunctionHandler or
PythonEntrypointKind.CloudFunctionHandler;
/// <summary>
/// Returns true if this is a CLI entrypoint.
/// </summary>
public bool IsCliEntrypoint => Kind is
PythonEntrypointKind.ConsoleScript or
PythonEntrypointKind.Script or
PythonEntrypointKind.CliApp or
PythonEntrypointKind.PackageMain;
}
/// <summary>
/// Describes how a Python entrypoint is invoked.
/// </summary>
/// <param name="InvocationType">How the entrypoint is invoked.</param>
/// <param name="Command">Command or module to invoke (e.g., "python -m myapp", "./bin/myapp").</param>
/// <param name="Arguments">Additional arguments or environment requirements.</param>
/// <param name="WorkingDirectory">Expected working directory relative to project root.</param>
internal sealed record PythonInvocationContext(
PythonInvocationType InvocationType,
string Command,
string? Arguments = null,
string? WorkingDirectory = null)
{
/// <summary>
/// Creates an invocation context for running as a module.
/// </summary>
public static PythonInvocationContext AsModule(string modulePath) =>
new(PythonInvocationType.Module, $"python -m {modulePath}");
/// <summary>
/// Creates an invocation context for running as a script.
/// </summary>
public static PythonInvocationContext AsScript(string scriptPath) =>
new(PythonInvocationType.Script, scriptPath);
/// <summary>
/// Creates an invocation context for running as a console script.
/// </summary>
public static PythonInvocationContext AsConsoleScript(string name) =>
new(PythonInvocationType.ConsoleScript, name);
/// <summary>
/// Creates an invocation context for a WSGI/ASGI app.
/// </summary>
public static PythonInvocationContext AsWsgiApp(string runner, string target) =>
new(PythonInvocationType.WsgiAsgi, runner, target);
/// <summary>
/// Creates an invocation context for a serverless handler.
/// </summary>
public static PythonInvocationContext AsHandler(string handler) =>
new(PythonInvocationType.Handler, handler);
}
/// <summary>
/// How a Python entrypoint is invoked.
/// </summary>
internal enum PythonInvocationType
{
/// <summary>
/// Invoked via python -m module.
/// </summary>
Module,
/// <summary>
/// Invoked as a script file.
/// </summary>
Script,
/// <summary>
/// Invoked via installed console script.
/// </summary>
ConsoleScript,
/// <summary>
/// Invoked via WSGI/ASGI server.
/// </summary>
WsgiAsgi,
/// <summary>
/// Invoked as a serverless handler.
/// </summary>
Handler
}
/// <summary>
/// Confidence level for entrypoint detection.
/// </summary>
internal enum PythonEntrypointConfidence
{
/// <summary>
/// Low confidence - inferred from heuristics or naming patterns.
/// </summary>
Low,
/// <summary>
/// Medium confidence - detected from configuration or common patterns.
/// </summary>
Medium,
/// <summary>
/// High confidence - explicitly declared in metadata.
/// </summary>
High,
/// <summary>
/// Definitive - from authoritative source like entry_points.txt.
/// </summary>
Definitive
}

View File

@@ -0,0 +1,138 @@
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Entrypoints;
/// <summary>
/// Result of Python entrypoint analysis.
/// </summary>
internal sealed class PythonEntrypointAnalysis
{
private PythonEntrypointAnalysis(
IReadOnlyList<PythonEntrypoint> entrypoints,
IReadOnlyList<PythonEntrypoint> consoleScripts,
IReadOnlyList<PythonEntrypoint> frameworkEntrypoints,
IReadOnlyList<PythonEntrypoint> cliEntrypoints)
{
Entrypoints = entrypoints;
ConsoleScripts = consoleScripts;
FrameworkEntrypoints = frameworkEntrypoints;
CliEntrypoints = cliEntrypoints;
}
/// <summary>
/// Gets all discovered entrypoints.
/// </summary>
public IReadOnlyList<PythonEntrypoint> Entrypoints { get; }
/// <summary>
/// Gets console_scripts and gui_scripts entrypoints.
/// </summary>
public IReadOnlyList<PythonEntrypoint> ConsoleScripts { get; }
/// <summary>
/// Gets framework-specific entrypoints (Django, Lambda, etc.).
/// </summary>
public IReadOnlyList<PythonEntrypoint> FrameworkEntrypoints { get; }
/// <summary>
/// Gets CLI application entrypoints.
/// </summary>
public IReadOnlyList<PythonEntrypoint> CliEntrypoints { get; }
/// <summary>
/// Gets the primary entrypoint (highest confidence, CLI preference).
/// </summary>
public PythonEntrypoint? PrimaryEntrypoint =>
Entrypoints
.OrderByDescending(static e => e.Confidence)
.ThenBy(static e => e.Kind switch
{
PythonEntrypointKind.ConsoleScript => 0,
PythonEntrypointKind.PackageMain => 1,
PythonEntrypointKind.CliApp => 2,
PythonEntrypointKind.LambdaHandler => 3,
PythonEntrypointKind.WsgiApp => 4,
PythonEntrypointKind.AsgiApp => 5,
_ => 10
})
.ThenBy(static e => e.Name, StringComparer.Ordinal)
.FirstOrDefault();
/// <summary>
/// Analyzes entrypoints from a virtual filesystem.
/// </summary>
public static async Task<PythonEntrypointAnalysis> AnalyzeAsync(
PythonVirtualFileSystem vfs,
string rootPath,
CancellationToken cancellationToken = default)
{
var discovery = new PythonEntrypointDiscovery(vfs, rootPath);
await discovery.DiscoverAsync(cancellationToken).ConfigureAwait(false);
var entrypoints = discovery.Entrypoints
.OrderBy(static e => e.Name, StringComparer.Ordinal)
.ThenBy(static e => e.Kind)
.ToList();
var consoleScripts = entrypoints
.Where(static e => e.Kind is PythonEntrypointKind.ConsoleScript or PythonEntrypointKind.GuiScript)
.ToList();
var frameworkEntrypoints = entrypoints
.Where(static e => e.IsFrameworkEntrypoint)
.ToList();
var cliEntrypoints = entrypoints
.Where(static e => e.IsCliEntrypoint)
.ToList();
return new PythonEntrypointAnalysis(
entrypoints,
consoleScripts,
frameworkEntrypoints,
cliEntrypoints);
}
/// <summary>
/// Generates metadata entries for the analysis result.
/// </summary>
public IEnumerable<KeyValuePair<string, string?>> ToMetadata()
{
yield return new KeyValuePair<string, string?>("entrypoints.total", Entrypoints.Count.ToString());
yield return new KeyValuePair<string, string?>("entrypoints.consoleScripts", ConsoleScripts.Count.ToString());
yield return new KeyValuePair<string, string?>("entrypoints.framework", FrameworkEntrypoints.Count.ToString());
yield return new KeyValuePair<string, string?>("entrypoints.cli", CliEntrypoints.Count.ToString());
if (PrimaryEntrypoint is not null)
{
yield return new KeyValuePair<string, string?>("entrypoints.primary.name", PrimaryEntrypoint.Name);
yield return new KeyValuePair<string, string?>("entrypoints.primary.kind", PrimaryEntrypoint.Kind.ToString());
yield return new KeyValuePair<string, string?>("entrypoints.primary.target", PrimaryEntrypoint.Target);
}
// List all console scripts
if (ConsoleScripts.Count > 0)
{
var scripts = string.Join(';', ConsoleScripts.Select(static e => e.Name));
yield return new KeyValuePair<string, string?>("entrypoints.consoleScripts.names", scripts);
}
// List framework entrypoints
foreach (var ep in FrameworkEntrypoints)
{
yield return new KeyValuePair<string, string?>($"entrypoints.{ep.Kind.ToString().ToLowerInvariant()}", ep.Target);
}
}
/// <summary>
/// Gets entrypoints by kind.
/// </summary>
public IEnumerable<PythonEntrypoint> GetByKind(PythonEntrypointKind kind) =>
Entrypoints.Where(e => e.Kind == kind);
/// <summary>
/// Gets entrypoints by confidence level.
/// </summary>
public IEnumerable<PythonEntrypoint> GetByConfidence(PythonEntrypointConfidence minConfidence) =>
Entrypoints.Where(e => e.Confidence >= minConfidence);
}

View File

@@ -0,0 +1,677 @@
using System.Text.RegularExpressions;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Entrypoints;
/// <summary>
/// Discovers Python entrypoints from a virtual filesystem.
/// </summary>
internal sealed partial class PythonEntrypointDiscovery
{
private readonly PythonVirtualFileSystem _vfs;
private readonly string _rootPath;
private readonly List<PythonEntrypoint> _entrypoints = new();
public PythonEntrypointDiscovery(PythonVirtualFileSystem vfs, string rootPath)
{
_vfs = vfs ?? throw new ArgumentNullException(nameof(vfs));
_rootPath = rootPath ?? throw new ArgumentNullException(nameof(rootPath));
}
/// <summary>
/// Gets all discovered entrypoints.
/// </summary>
public IReadOnlyList<PythonEntrypoint> Entrypoints => _entrypoints;
/// <summary>
/// Discovers all entrypoints from the virtual filesystem.
/// </summary>
public async Task<PythonEntrypointDiscovery> DiscoverAsync(CancellationToken cancellationToken = default)
{
await DiscoverPackageMainsAsync(cancellationToken).ConfigureAwait(false);
await DiscoverConsoleScriptsAsync(cancellationToken).ConfigureAwait(false);
await DiscoverDataScriptsAsync(cancellationToken).ConfigureAwait(false);
await DiscoverZipappMainsAsync(cancellationToken).ConfigureAwait(false);
await DiscoverDjangoEntrypointsAsync(cancellationToken).ConfigureAwait(false);
await DiscoverWsgiAsgiAppsAsync(cancellationToken).ConfigureAwait(false);
await DiscoverCeleryEntrypointsAsync(cancellationToken).ConfigureAwait(false);
await DiscoverLambdaHandlersAsync(cancellationToken).ConfigureAwait(false);
await DiscoverCliAppsAsync(cancellationToken).ConfigureAwait(false);
await DiscoverStandaloneScriptsAsync(cancellationToken).ConfigureAwait(false);
return this;
}
/// <summary>
/// Discovers __main__.py files in packages.
/// </summary>
private Task DiscoverPackageMainsAsync(CancellationToken cancellationToken)
{
var mainFiles = _vfs.EnumerateFiles(string.Empty, "**/__main__.py");
foreach (var file in mainFiles)
{
cancellationToken.ThrowIfCancellationRequested();
// Extract package name from path
var parts = file.VirtualPath.Split('/');
if (parts.Length < 2)
{
continue; // Not in a package
}
// Get the package path (everything except __main__.py)
var packagePath = string.Join('.', parts[..^1]);
_entrypoints.Add(new PythonEntrypoint(
Name: parts[0],
Kind: PythonEntrypointKind.PackageMain,
Target: packagePath,
VirtualPath: file.VirtualPath,
InvocationContext: PythonInvocationContext.AsModule(packagePath),
Confidence: PythonEntrypointConfidence.Definitive,
Source: file.VirtualPath));
}
return Task.CompletedTask;
}
/// <summary>
/// Discovers console_scripts and gui_scripts from entry_points.txt files.
/// </summary>
private async Task DiscoverConsoleScriptsAsync(CancellationToken cancellationToken)
{
var entryPointFiles = _vfs.EnumerateFiles(string.Empty, "*.dist-info/entry_points.txt");
foreach (var file in entryPointFiles)
{
cancellationToken.ThrowIfCancellationRequested();
var absolutePath = file.AbsolutePath;
if (file.IsFromArchive)
{
continue; // Can't read from archive directly yet
}
var fullPath = Path.Combine(_rootPath, absolutePath);
if (!File.Exists(fullPath))
{
fullPath = absolutePath;
if (!File.Exists(fullPath))
{
continue;
}
}
try
{
var content = await File.ReadAllTextAsync(fullPath, cancellationToken).ConfigureAwait(false);
ParseEntryPointsTxt(content, file.VirtualPath);
}
catch (IOException)
{
// Skip unreadable files
}
}
}
private void ParseEntryPointsTxt(string content, string source)
{
string? currentSection = null;
foreach (var line in content.Split('\n'))
{
var trimmed = line.Trim();
if (string.IsNullOrEmpty(trimmed) || trimmed.StartsWith('#'))
{
continue;
}
// Section header
if (trimmed.StartsWith('[') && trimmed.EndsWith(']'))
{
currentSection = trimmed[1..^1].Trim().ToLowerInvariant();
continue;
}
// Entry: name = module:callable or name = module:callable [extras]
if (currentSection is "console_scripts" or "gui_scripts")
{
var equalsIndex = trimmed.IndexOf('=');
if (equalsIndex <= 0)
{
continue;
}
var name = trimmed[..equalsIndex].Trim();
var target = trimmed[(equalsIndex + 1)..].Trim();
// Remove extras if present
var bracketIndex = target.IndexOf('[');
if (bracketIndex > 0)
{
target = target[..bracketIndex].Trim();
}
var kind = currentSection == "gui_scripts"
? PythonEntrypointKind.GuiScript
: PythonEntrypointKind.ConsoleScript;
_entrypoints.Add(new PythonEntrypoint(
Name: name,
Kind: kind,
Target: target,
VirtualPath: null,
InvocationContext: PythonInvocationContext.AsConsoleScript(name),
Confidence: PythonEntrypointConfidence.Definitive,
Source: source));
}
}
}
/// <summary>
/// Discovers scripts in *.data/scripts/ directories.
/// </summary>
private Task DiscoverDataScriptsAsync(CancellationToken cancellationToken)
{
var dataScripts = _vfs.EnumerateFiles(string.Empty, "*.data/scripts/*");
foreach (var file in dataScripts)
{
cancellationToken.ThrowIfCancellationRequested();
var name = Path.GetFileName(file.VirtualPath);
_entrypoints.Add(new PythonEntrypoint(
Name: name,
Kind: PythonEntrypointKind.Script,
Target: file.VirtualPath,
VirtualPath: file.VirtualPath,
InvocationContext: PythonInvocationContext.AsScript(file.VirtualPath),
Confidence: PythonEntrypointConfidence.High,
Source: file.VirtualPath));
}
// Also check bin/ directory
var binScripts = _vfs.EnumerateFiles("bin", "*");
foreach (var file in binScripts)
{
cancellationToken.ThrowIfCancellationRequested();
var name = Path.GetFileName(file.VirtualPath);
_entrypoints.Add(new PythonEntrypoint(
Name: name,
Kind: PythonEntrypointKind.Script,
Target: file.VirtualPath,
VirtualPath: file.VirtualPath,
InvocationContext: PythonInvocationContext.AsScript(file.VirtualPath),
Confidence: PythonEntrypointConfidence.High,
Source: file.VirtualPath));
}
return Task.CompletedTask;
}
/// <summary>
/// Discovers __main__.py in zipapps.
/// </summary>
private Task DiscoverZipappMainsAsync(CancellationToken cancellationToken)
{
// Zipapp files have their __main__.py at the root level
var zipappFiles = _vfs.GetFilesBySource(PythonFileSource.Zipapp);
foreach (var file in zipappFiles)
{
cancellationToken.ThrowIfCancellationRequested();
if (file.VirtualPath == "__main__.py")
{
_entrypoints.Add(new PythonEntrypoint(
Name: "__main__",
Kind: PythonEntrypointKind.ZipappMain,
Target: "__main__",
VirtualPath: file.VirtualPath,
InvocationContext: new PythonInvocationContext(
PythonInvocationType.Script,
file.ArchivePath ?? "app.pyz"),
Confidence: PythonEntrypointConfidence.Definitive,
Source: file.ArchivePath ?? file.VirtualPath));
}
}
return Task.CompletedTask;
}
/// <summary>
/// Discovers Django manage.py and related entrypoints.
/// </summary>
private async Task DiscoverDjangoEntrypointsAsync(CancellationToken cancellationToken)
{
// Look for manage.py
if (_vfs.FileExists("manage.py"))
{
_entrypoints.Add(new PythonEntrypoint(
Name: "manage.py",
Kind: PythonEntrypointKind.DjangoManage,
Target: "manage",
VirtualPath: "manage.py",
InvocationContext: PythonInvocationContext.AsScript("manage.py"),
Confidence: PythonEntrypointConfidence.High,
Source: "manage.py"));
}
// Look for Django settings to infer WSGI/ASGI apps
var settingsFiles = _vfs.EnumerateFiles(string.Empty, "**/settings.py");
foreach (var file in settingsFiles)
{
cancellationToken.ThrowIfCancellationRequested();
var parts = file.VirtualPath.Split('/');
if (parts.Length < 2)
{
continue;
}
var projectName = parts[^2]; // Folder containing settings.py
// Check for wsgi.py
var wsgiPath = string.Join('/', parts[..^1]) + "/wsgi.py";
if (_vfs.FileExists(wsgiPath))
{
var wsgiModule = $"{projectName}.wsgi:application";
_entrypoints.Add(new PythonEntrypoint(
Name: $"{projectName}-wsgi",
Kind: PythonEntrypointKind.WsgiApp,
Target: wsgiModule,
VirtualPath: wsgiPath,
InvocationContext: PythonInvocationContext.AsWsgiApp("gunicorn", wsgiModule),
Confidence: PythonEntrypointConfidence.High,
Source: wsgiPath));
}
// Check for asgi.py
var asgiPath = string.Join('/', parts[..^1]) + "/asgi.py";
if (_vfs.FileExists(asgiPath))
{
var asgiModule = $"{projectName}.asgi:application";
_entrypoints.Add(new PythonEntrypoint(
Name: $"{projectName}-asgi",
Kind: PythonEntrypointKind.AsgiApp,
Target: asgiModule,
VirtualPath: asgiPath,
InvocationContext: PythonInvocationContext.AsWsgiApp("uvicorn", asgiModule),
Confidence: PythonEntrypointConfidence.High,
Source: asgiPath));
}
}
}
/// <summary>
/// Discovers WSGI/ASGI apps from configuration files.
/// </summary>
private async Task DiscoverWsgiAsgiAppsAsync(CancellationToken cancellationToken)
{
// Check for gunicorn.conf.py or gunicorn configuration
if (_vfs.FileExists("gunicorn.conf.py"))
{
var fullPath = Path.Combine(_rootPath, "gunicorn.conf.py");
if (File.Exists(fullPath))
{
try
{
var content = await File.ReadAllTextAsync(fullPath, cancellationToken).ConfigureAwait(false);
var match = WsgiAppPattern().Match(content);
if (match.Success)
{
var target = match.Groups["app"].Value;
_entrypoints.Add(new PythonEntrypoint(
Name: "gunicorn-app",
Kind: PythonEntrypointKind.WsgiApp,
Target: target,
VirtualPath: "gunicorn.conf.py",
InvocationContext: PythonInvocationContext.AsWsgiApp("gunicorn", target),
Confidence: PythonEntrypointConfidence.High,
Source: "gunicorn.conf.py"));
}
}
catch (IOException)
{
// Skip unreadable files
}
}
}
// Check for Procfile (common in Heroku deployments)
if (_vfs.FileExists("Procfile"))
{
var fullPath = Path.Combine(_rootPath, "Procfile");
if (File.Exists(fullPath))
{
try
{
var content = await File.ReadAllTextAsync(fullPath, cancellationToken).ConfigureAwait(false);
foreach (var line in content.Split('\n'))
{
var trimmed = line.Trim();
if (string.IsNullOrEmpty(trimmed) || trimmed.StartsWith('#'))
{
continue;
}
var colonIndex = trimmed.IndexOf(':');
if (colonIndex <= 0)
{
continue;
}
var procType = trimmed[..colonIndex].Trim();
var command = trimmed[(colonIndex + 1)..].Trim();
// Look for gunicorn/uvicorn commands
var gunicornMatch = GunicornCommandPattern().Match(command);
if (gunicornMatch.Success)
{
var target = gunicornMatch.Groups["app"].Value;
_entrypoints.Add(new PythonEntrypoint(
Name: $"procfile-{procType}",
Kind: PythonEntrypointKind.WsgiApp,
Target: target,
VirtualPath: "Procfile",
InvocationContext: PythonInvocationContext.AsWsgiApp("gunicorn", target),
Confidence: PythonEntrypointConfidence.Medium,
Source: "Procfile"));
}
var uvicornMatch = UvicornCommandPattern().Match(command);
if (uvicornMatch.Success)
{
var target = uvicornMatch.Groups["app"].Value;
_entrypoints.Add(new PythonEntrypoint(
Name: $"procfile-{procType}",
Kind: PythonEntrypointKind.AsgiApp,
Target: target,
VirtualPath: "Procfile",
InvocationContext: PythonInvocationContext.AsWsgiApp("uvicorn", target),
Confidence: PythonEntrypointConfidence.Medium,
Source: "Procfile"));
}
}
}
catch (IOException)
{
// Skip unreadable files
}
}
}
}
/// <summary>
/// Discovers Celery worker entrypoints.
/// </summary>
private async Task DiscoverCeleryEntrypointsAsync(CancellationToken cancellationToken)
{
// Look for celery.py or tasks.py patterns
var celeryFiles = _vfs.EnumerateFiles(string.Empty, "**/celery.py");
foreach (var file in celeryFiles)
{
cancellationToken.ThrowIfCancellationRequested();
var parts = file.VirtualPath.Split('/');
var modulePath = string.Join('.', parts)[..^3]; // Remove .py
_entrypoints.Add(new PythonEntrypoint(
Name: "celery-worker",
Kind: PythonEntrypointKind.CeleryWorker,
Target: modulePath,
VirtualPath: file.VirtualPath,
InvocationContext: new PythonInvocationContext(
PythonInvocationType.Module,
$"celery -A {modulePath} worker"),
Confidence: PythonEntrypointConfidence.Medium,
Source: file.VirtualPath));
}
}
/// <summary>
/// Discovers AWS Lambda, Azure Functions, and Cloud Functions handlers.
/// </summary>
private async Task DiscoverLambdaHandlersAsync(CancellationToken cancellationToken)
{
// AWS Lambda - lambda_function.py with handler function
if (_vfs.FileExists("lambda_function.py"))
{
_entrypoints.Add(new PythonEntrypoint(
Name: "lambda-handler",
Kind: PythonEntrypointKind.LambdaHandler,
Target: "lambda_function.handler",
VirtualPath: "lambda_function.py",
InvocationContext: PythonInvocationContext.AsHandler("lambda_function.handler"),
Confidence: PythonEntrypointConfidence.High,
Source: "lambda_function.py"));
}
// Also check for handler.py (common Lambda pattern)
if (_vfs.FileExists("handler.py"))
{
_entrypoints.Add(new PythonEntrypoint(
Name: "lambda-handler",
Kind: PythonEntrypointKind.LambdaHandler,
Target: "handler.handler",
VirtualPath: "handler.py",
InvocationContext: PythonInvocationContext.AsHandler("handler.handler"),
Confidence: PythonEntrypointConfidence.Medium,
Source: "handler.py"));
}
// Azure Functions - look for function.json
var functionJsonFiles = _vfs.EnumerateFiles(string.Empty, "**/function.json");
foreach (var file in functionJsonFiles)
{
cancellationToken.ThrowIfCancellationRequested();
var parts = file.VirtualPath.Split('/');
if (parts.Length < 2)
{
continue;
}
var functionName = parts[^2];
// Check for __init__.py in the same directory
var initPath = string.Join('/', parts[..^1]) + "/__init__.py";
if (_vfs.FileExists(initPath))
{
_entrypoints.Add(new PythonEntrypoint(
Name: functionName,
Kind: PythonEntrypointKind.AzureFunctionHandler,
Target: $"{functionName}:main",
VirtualPath: initPath,
InvocationContext: PythonInvocationContext.AsHandler($"{functionName}:main"),
Confidence: PythonEntrypointConfidence.High,
Source: file.VirtualPath));
}
}
// Google Cloud Functions - main.py with specific patterns
if (_vfs.FileExists("main.py"))
{
var fullPath = Path.Combine(_rootPath, "main.py");
if (File.Exists(fullPath))
{
try
{
var content = await File.ReadAllTextAsync(fullPath, cancellationToken).ConfigureAwait(false);
// Look for Cloud Functions patterns
if (content.Contains("functions_framework") ||
content.Contains("@functions_framework"))
{
var match = CloudFunctionPattern().Match(content);
var functionName = match.Success ? match.Groups["name"].Value : "main";
_entrypoints.Add(new PythonEntrypoint(
Name: functionName,
Kind: PythonEntrypointKind.CloudFunctionHandler,
Target: $"main:{functionName}",
VirtualPath: "main.py",
InvocationContext: PythonInvocationContext.AsHandler($"main:{functionName}"),
Confidence: PythonEntrypointConfidence.High,
Source: "main.py"));
}
}
catch (IOException)
{
// Skip unreadable files
}
}
}
}
/// <summary>
/// Discovers Click/Typer CLI applications.
/// </summary>
private async Task DiscoverCliAppsAsync(CancellationToken cancellationToken)
{
// Look for files with Click or Typer patterns
var pyFiles = _vfs.Files.Where(f => f.IsPythonSource && f.Source != PythonFileSource.Zipapp);
foreach (var file in pyFiles)
{
cancellationToken.ThrowIfCancellationRequested();
if (file.IsFromArchive)
{
continue;
}
var fullPath = Path.Combine(_rootPath, file.AbsolutePath);
if (!File.Exists(fullPath))
{
fullPath = file.AbsolutePath;
if (!File.Exists(fullPath))
{
continue;
}
}
try
{
var content = await File.ReadAllTextAsync(fullPath, cancellationToken).ConfigureAwait(false);
// Check for Click CLI patterns
if (content.Contains("@click.command") || content.Contains("@click.group"))
{
var match = ClickGroupPattern().Match(content);
var cliName = match.Success ? match.Groups["name"].Value : Path.GetFileNameWithoutExtension(file.VirtualPath);
// Check if there's a main guard
if (content.Contains("if __name__") && content.Contains("__main__"))
{
var modulePath = file.VirtualPath.Replace('/', '.')[..^3];
_entrypoints.Add(new PythonEntrypoint(
Name: cliName,
Kind: PythonEntrypointKind.CliApp,
Target: $"{modulePath}:cli",
VirtualPath: file.VirtualPath,
InvocationContext: PythonInvocationContext.AsModule(modulePath),
Confidence: PythonEntrypointConfidence.Medium,
Source: file.VirtualPath));
}
}
// Check for Typer CLI patterns
if (content.Contains("typer.Typer") || content.Contains("@app.command"))
{
var modulePath = file.VirtualPath.Replace('/', '.')[..^3];
_entrypoints.Add(new PythonEntrypoint(
Name: Path.GetFileNameWithoutExtension(file.VirtualPath),
Kind: PythonEntrypointKind.CliApp,
Target: $"{modulePath}:app",
VirtualPath: file.VirtualPath,
InvocationContext: PythonInvocationContext.AsModule(modulePath),
Confidence: PythonEntrypointConfidence.Medium,
Source: file.VirtualPath));
}
}
catch (IOException)
{
// Skip unreadable files
}
}
}
/// <summary>
/// Discovers standalone Python scripts with main guards.
/// </summary>
private async Task DiscoverStandaloneScriptsAsync(CancellationToken cancellationToken)
{
// Look for Python files at the root level with main guards
var rootPyFiles = _vfs.EnumerateFiles(string.Empty, "*.py")
.Where(f => !f.VirtualPath.Contains('/') && f.VirtualPath != "__main__.py");
foreach (var file in rootPyFiles)
{
cancellationToken.ThrowIfCancellationRequested();
if (file.IsFromArchive)
{
continue;
}
var fullPath = Path.Combine(_rootPath, file.AbsolutePath);
if (!File.Exists(fullPath))
{
fullPath = file.AbsolutePath;
if (!File.Exists(fullPath))
{
continue;
}
}
try
{
var content = await File.ReadAllTextAsync(fullPath, cancellationToken).ConfigureAwait(false);
// Check for main guard or shebang
var hasMainGuard = content.Contains("if __name__") && content.Contains("__main__");
var hasShebang = content.StartsWith("#!");
if (hasMainGuard || hasShebang)
{
var name = Path.GetFileNameWithoutExtension(file.VirtualPath);
_entrypoints.Add(new PythonEntrypoint(
Name: name,
Kind: PythonEntrypointKind.StandaloneScript,
Target: file.VirtualPath,
VirtualPath: file.VirtualPath,
InvocationContext: PythonInvocationContext.AsScript(file.VirtualPath),
Confidence: hasMainGuard ? PythonEntrypointConfidence.High : PythonEntrypointConfidence.Medium,
Source: file.VirtualPath));
}
}
catch (IOException)
{
// Skip unreadable files
}
}
}
[GeneratedRegex(@"bind\s*=\s*['""](?<app>[^'""]+)['""]", RegexOptions.IgnoreCase)]
private static partial Regex WsgiAppPattern();
[GeneratedRegex(@"gunicorn\s+(?:.*\s+)?(?<app>[\w.]+:[\w]+)", RegexOptions.IgnoreCase)]
private static partial Regex GunicornCommandPattern();
[GeneratedRegex(@"uvicorn\s+(?<app>[\w.]+:[\w]+)", RegexOptions.IgnoreCase)]
private static partial Regex UvicornCommandPattern();
[GeneratedRegex(@"@functions_framework\.http\s*\n\s*def\s+(?<name>\w+)", RegexOptions.IgnoreCase)]
private static partial Regex CloudFunctionPattern();
[GeneratedRegex(@"@click\.group\(\)\s*\n\s*def\s+(?<name>\w+)", RegexOptions.IgnoreCase)]
private static partial Regex ClickGroupPattern();
}

View File

@@ -0,0 +1,82 @@
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Entrypoints;
/// <summary>
/// Identifies the type of Python entrypoint.
/// </summary>
internal enum PythonEntrypointKind
{
/// <summary>
/// Package __main__.py module (invoked via python -m package).
/// </summary>
PackageMain,
/// <summary>
/// Console script from entry_points.txt [console_scripts].
/// </summary>
ConsoleScript,
/// <summary>
/// GUI script from entry_points.txt [gui_scripts].
/// </summary>
GuiScript,
/// <summary>
/// Script file in *.data/scripts/ or bin/ directory.
/// </summary>
Script,
/// <summary>
/// Zipapp __main__.py module.
/// </summary>
ZipappMain,
/// <summary>
/// Django manage.py management script.
/// </summary>
DjangoManage,
/// <summary>
/// Gunicorn/uWSGI WSGI application reference.
/// </summary>
WsgiApp,
/// <summary>
/// ASGI application reference (Uvicorn, Daphne).
/// </summary>
AsgiApp,
/// <summary>
/// Celery worker/beat entrypoint.
/// </summary>
CeleryWorker,
/// <summary>
/// AWS Lambda handler function.
/// </summary>
LambdaHandler,
/// <summary>
/// Azure Functions handler.
/// </summary>
AzureFunctionHandler,
/// <summary>
/// Google Cloud Function handler.
/// </summary>
CloudFunctionHandler,
/// <summary>
/// Click/Typer CLI application.
/// </summary>
CliApp,
/// <summary>
/// pytest/unittest test runner.
/// </summary>
TestRunner,
/// <summary>
/// Generic Python script (standalone .py file with shebang or main guard).
/// </summary>
StandaloneScript
}

View File

@@ -0,0 +1,416 @@
using System.Buffers.Binary;
using System.Text;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Imports;
/// <summary>
/// Extracts imports from Python bytecode (.pyc) files.
/// Supports Python 3.8 - 3.12+ bytecode formats.
/// </summary>
internal sealed class PythonBytecodeImportExtractor
{
// Python bytecode opcodes (consistent across 3.8-3.12)
private const byte IMPORT_NAME = 108;
private const byte IMPORT_FROM = 109;
private const byte IMPORT_STAR = 84;
// Python 3.11+ uses different opcodes
private const byte IMPORT_NAME_311 = 108;
// Magic numbers for Python versions (first 2 bytes of .pyc)
private static readonly IReadOnlyDictionary<int, string> PythonMagicNumbers = new Dictionary<int, string>
{
// Python 3.8
[3413] = "3.8",
[3415] = "3.8",
// Python 3.9
[3425] = "3.9",
// Python 3.10
[3435] = "3.10",
[3437] = "3.10",
[3439] = "3.10",
// Python 3.11
[3495] = "3.11",
// Python 3.12
[3531] = "3.12",
// Python 3.13
[3571] = "3.13",
};
private readonly string _sourceFile;
private readonly List<PythonImport> _imports = new();
public PythonBytecodeImportExtractor(string sourceFile)
{
_sourceFile = sourceFile ?? throw new ArgumentNullException(nameof(sourceFile));
}
/// <summary>
/// Gets all extracted imports.
/// </summary>
public IReadOnlyList<PythonImport> Imports => _imports;
/// <summary>
/// Gets the detected Python version, if available.
/// </summary>
public string? PythonVersion { get; private set; }
/// <summary>
/// Extracts imports from Python bytecode.
/// </summary>
public PythonBytecodeImportExtractor Extract(ReadOnlySpan<byte> bytecode)
{
if (bytecode.Length < 16)
{
return this; // Too short to be valid bytecode
}
// Read magic number (first 4 bytes, little-endian)
var magic = BinaryPrimitives.ReadUInt16LittleEndian(bytecode);
if (PythonMagicNumbers.TryGetValue(magic, out var version))
{
PythonVersion = version;
}
// Skip header to find code object
// Python 3.8+: 16 bytes header
var headerSize = 16;
if (bytecode.Length <= headerSize)
{
return this;
}
// Parse the marshalled code object
var codeSection = bytecode[headerSize..];
ExtractFromMarshalledCode(codeSection);
return this;
}
/// <summary>
/// Extracts imports from a file stream.
/// </summary>
public async Task<PythonBytecodeImportExtractor> ExtractAsync(Stream stream, CancellationToken cancellationToken = default)
{
if (stream is null || !stream.CanRead)
{
return this;
}
// Read the entire bytecode file (typically small)
var buffer = new byte[stream.Length];
var bytesRead = await stream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false);
if (bytesRead > 0)
{
Extract(buffer.AsSpan(0, bytesRead));
}
return this;
}
private void ExtractFromMarshalledCode(ReadOnlySpan<byte> data)
{
// Look for string table entries that look like module names
// Python marshal format: TYPE_CODE (0x63) followed by code object structure
if (data.Length < 2)
{
return;
}
// Find code object marker
var codeStart = data.IndexOf((byte)0x63); // TYPE_CODE
if (codeStart < 0)
{
// Try alternate scan - look for string sequences that could be imports
ScanForModuleStrings(data);
return;
}
// The code object contains names tuple which includes import targets
// We'll scan for patterns that indicate IMPORT_NAME instructions
var names = ExtractNameTable(data);
var codeBytes = ExtractCodeBytes(data);
if (codeBytes.Length == 0 || names.Count == 0)
{
return;
}
// Scan bytecode for import instructions
ScanBytecodeForImports(codeBytes, names);
}
private List<string> ExtractNameTable(ReadOnlySpan<byte> data)
{
var names = new List<string>();
// Look for small tuple marker (TYPE_SMALL_TUPLE = 0x29) or tuple marker (TYPE_TUPLE = 0x28)
// followed by string entries
var pos = 0;
while (pos < data.Length - 4)
{
var marker = data[pos];
// TYPE_SHORT_ASCII (0x7A) or TYPE_ASCII (0x61) or TYPE_UNICODE (0x75)
if (marker is 0x7A or 0x61 or 0x75 or 0x73 or 0x5A)
{
var length = marker == 0x7A || marker == 0x5A
? data[pos + 1]
: (data.Length > pos + 4 ? BinaryPrimitives.ReadInt32LittleEndian(data[(pos + 1)..]) : 0);
if (length > 0 && length < 256 && pos + 2 + length <= data.Length)
{
var stringStart = marker is 0x7A or 0x5A ? pos + 2 : pos + 5;
if (stringStart + length <= data.Length)
{
try
{
var str = Encoding.UTF8.GetString(data.Slice(stringStart, length));
if (IsValidModuleName(str))
{
names.Add(str);
}
}
catch
{
// Invalid UTF-8, skip
}
}
}
}
pos++;
}
return names;
}
private static ReadOnlySpan<byte> ExtractCodeBytes(ReadOnlySpan<byte> data)
{
// Look for TYPE_STRING (0x73) or TYPE_CODE_OBJECT markers that precede bytecode
// The bytecode is stored as a bytes object
for (var i = 0; i < data.Length - 5; i++)
{
// Look for string/bytes marker followed by reasonable length
if (data[i] == 0x73 || data[i] == 0x43) // TYPE_STRING or TYPE_CODE_STRING
{
var length = BinaryPrimitives.ReadInt32LittleEndian(data[(i + 1)..]);
if (length > 0 && length < 100000 && i + 5 + length <= data.Length)
{
return data.Slice(i + 5, length);
}
}
}
return ReadOnlySpan<byte>.Empty;
}
private void ScanBytecodeForImports(ReadOnlySpan<byte> code, List<string> names)
{
// Python 3.8-3.10: 2-byte instructions (opcode + arg)
// Python 3.11+: variable-length instructions with EXTENDED_ARG
var i = 0;
while (i < code.Length - 1)
{
var opcode = code[i];
var arg = code[i + 1];
if (opcode == IMPORT_NAME || opcode == IMPORT_NAME_311)
{
// arg is index into names tuple
if (arg < names.Count)
{
var moduleName = names[arg];
AddImport(moduleName, PythonImportKind.Import);
}
}
else if (opcode == IMPORT_FROM)
{
if (arg < names.Count)
{
var name = names[arg];
// IMPORT_FROM follows IMPORT_NAME, so we just note the imported name
// The module is already added by IMPORT_NAME
}
}
else if (opcode == IMPORT_STAR)
{
// Star import - the module is from previous IMPORT_NAME
}
i += 2; // Basic instruction size
}
}
private void ScanForModuleStrings(ReadOnlySpan<byte> data)
{
// Fallback: scan for string patterns that look like module names
// This is less accurate but works when we can't parse the marshal format
var currentString = new StringBuilder();
var stringStart = false;
for (var i = 0; i < data.Length; i++)
{
var b = data[i];
if (b >= 0x20 && b <= 0x7E) // Printable ASCII
{
var c = (char)b;
if (char.IsLetterOrDigit(c) || c == '_' || c == '.')
{
currentString.Append(c);
stringStart = true;
}
else if (stringStart)
{
CheckAndAddModuleName(currentString.ToString());
currentString.Clear();
stringStart = false;
}
}
else if (stringStart)
{
CheckAndAddModuleName(currentString.ToString());
currentString.Clear();
stringStart = false;
}
}
if (currentString.Length > 0)
{
CheckAndAddModuleName(currentString.ToString());
}
}
private void CheckAndAddModuleName(string str)
{
if (string.IsNullOrEmpty(str) || str.Length < 2)
{
return;
}
// Filter for likely module names
if (!IsValidModuleName(str))
{
return;
}
// Skip Python internals and unlikely imports
if (str.StartsWith("__") && str.EndsWith("__"))
{
return;
}
// Check against known standard library modules for higher confidence
var isStdLib = IsStandardLibraryModule(str);
AddImport(str, PythonImportKind.Import, isStdLib ? PythonImportConfidence.Medium : PythonImportConfidence.Low);
}
private void AddImport(string module, PythonImportKind kind, PythonImportConfidence confidence = PythonImportConfidence.Medium)
{
// Avoid duplicates
if (_imports.Any(i => i.Module == module && i.Kind == kind))
{
return;
}
var relativeLevel = 0;
var moduleName = module;
// Handle relative imports (leading dots)
while (moduleName.StartsWith('.'))
{
relativeLevel++;
moduleName = moduleName[1..];
}
if (relativeLevel > 0)
{
kind = PythonImportKind.RelativeImport;
}
_imports.Add(new PythonImport(
Module: moduleName,
Names: null,
Alias: null,
Kind: kind,
RelativeLevel: relativeLevel,
SourceFile: _sourceFile,
LineNumber: null, // Line numbers not available in bytecode
Confidence: confidence));
}
private static bool IsValidModuleName(string str)
{
if (string.IsNullOrEmpty(str))
{
return false;
}
// Must not be all digits
if (str.All(char.IsDigit))
{
return false;
}
// Must start with letter or underscore
if (!char.IsLetter(str[0]) && str[0] != '_')
{
return false;
}
// Must contain only valid identifier characters and dots
foreach (var c in str)
{
if (!char.IsLetterOrDigit(c) && c != '_' && c != '.')
{
return false;
}
}
// Reject very long names
if (str.Length > 100)
{
return false;
}
return true;
}
private static bool IsStandardLibraryModule(string module)
{
var topLevel = module.Split('.')[0];
// Common standard library modules (non-exhaustive)
return topLevel switch
{
"os" or "sys" or "re" or "io" or "json" or "time" or "datetime" => true,
"collections" or "itertools" or "functools" or "operator" => true,
"pathlib" or "shutil" or "glob" or "fnmatch" or "linecache" => true,
"pickle" or "shelve" or "dbm" or "sqlite3" => true,
"zlib" or "gzip" or "bz2" or "lzma" or "zipfile" or "tarfile" => true,
"csv" or "configparser" or "tomllib" => true,
"hashlib" or "hmac" or "secrets" => true,
"logging" or "warnings" or "traceback" => true,
"threading" or "multiprocessing" or "concurrent" or "subprocess" or "sched" => true,
"asyncio" or "socket" or "ssl" or "select" or "selectors" => true,
"email" or "html" or "xml" or "urllib" or "http" => true,
"unittest" or "doctest" or "pytest" => true,
"typing" or "types" or "abc" or "contextlib" or "dataclasses" => true,
"importlib" or "pkgutil" or "zipimport" => true,
"copy" or "pprint" or "enum" or "graphlib" => true,
"math" or "cmath" or "decimal" or "fractions" or "random" or "statistics" => true,
"struct" or "codecs" or "unicodedata" or "stringprep" => true,
"ctypes" or "ffi" => true,
_ => false
};
}
}

View File

@@ -0,0 +1,149 @@
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Imports;
/// <summary>
/// Confidence level for import resolution.
/// </summary>
internal enum PythonImportConfidence
{
/// <summary>
/// Low confidence - dynamic or uncertain import target.
/// </summary>
Low = 0,
/// <summary>
/// Medium confidence - inferred from context.
/// </summary>
Medium = 1,
/// <summary>
/// High confidence - clear static import.
/// </summary>
High = 2,
/// <summary>
/// Definitive - direct static import statement.
/// </summary>
Definitive = 3
}
/// <summary>
/// Represents a Python import statement.
/// </summary>
/// <param name="Module">The module being imported (e.g., 'os.path').</param>
/// <param name="Names">Names imported from the module (null for 'import x', names for 'from x import a, b').</param>
/// <param name="Alias">Alias if present (e.g., 'import numpy as np').</param>
/// <param name="Kind">The kind of import statement.</param>
/// <param name="RelativeLevel">Number of dots for relative imports (0 = absolute).</param>
/// <param name="SourceFile">The file containing this import.</param>
/// <param name="LineNumber">Line number in source file (1-based, null if from bytecode).</param>
/// <param name="Confidence">Confidence level for this import.</param>
/// <param name="IsConditional">Whether this import is inside a try/except or if block.</param>
/// <param name="IsLazy">Whether this import is inside a function (not at module level).</param>
/// <param name="IsTypeCheckingOnly">Whether this import is inside TYPE_CHECKING block.</param>
internal sealed record PythonImport(
string Module,
IReadOnlyList<PythonImportedName>? Names,
string? Alias,
PythonImportKind Kind,
int RelativeLevel,
string SourceFile,
int? LineNumber,
PythonImportConfidence Confidence,
bool IsConditional = false,
bool IsLazy = false,
bool IsTypeCheckingOnly = false)
{
/// <summary>
/// Gets whether this is a relative import.
/// </summary>
public bool IsRelative => RelativeLevel > 0;
/// <summary>
/// Gets whether this is a star import.
/// </summary>
public bool IsStar => Names?.Count == 1 && Names[0].Name == "*";
/// <summary>
/// Gets whether this is a future import.
/// </summary>
public bool IsFuture => Module == "__future__";
/// <summary>
/// Gets the fully qualified module name for absolute imports.
/// For relative imports, this returns the relative notation (e.g., '.foo').
/// </summary>
public string QualifiedModule
{
get
{
if (!IsRelative)
{
return Module;
}
var prefix = new string('.', RelativeLevel);
return string.IsNullOrEmpty(Module) ? prefix : $"{prefix}{Module}";
}
}
/// <summary>
/// Gets all imported names as strings (for 'from x import a, b' returns ['a', 'b']).
/// </summary>
public IEnumerable<string> ImportedNames =>
Names?.Select(static n => n.Name) ?? Enumerable.Empty<string>();
/// <summary>
/// Gets metadata entries for this import.
/// </summary>
public IEnumerable<KeyValuePair<string, string?>> ToMetadata(int index)
{
var prefix = $"import[{index}]";
yield return new KeyValuePair<string, string?>($"{prefix}.module", QualifiedModule);
yield return new KeyValuePair<string, string?>($"{prefix}.kind", Kind.ToString());
yield return new KeyValuePair<string, string?>($"{prefix}.confidence", Confidence.ToString());
if (Alias is not null)
{
yield return new KeyValuePair<string, string?>($"{prefix}.alias", Alias);
}
if (Names is { Count: > 0 } && !IsStar)
{
var names = string.Join(',', ImportedNames);
yield return new KeyValuePair<string, string?>($"{prefix}.names", names);
}
if (LineNumber.HasValue)
{
yield return new KeyValuePair<string, string?>($"{prefix}.line", LineNumber.Value.ToString());
}
if (IsConditional)
{
yield return new KeyValuePair<string, string?>($"{prefix}.conditional", "true");
}
if (IsLazy)
{
yield return new KeyValuePair<string, string?>($"{prefix}.lazy", "true");
}
if (IsTypeCheckingOnly)
{
yield return new KeyValuePair<string, string?>($"{prefix}.typeCheckingOnly", "true");
}
}
}
/// <summary>
/// Represents a name imported from a module.
/// </summary>
/// <param name="Name">The imported name.</param>
/// <param name="Alias">Optional alias for the name.</param>
internal sealed record PythonImportedName(string Name, string? Alias = null)
{
/// <summary>
/// Gets the effective name (alias if present, otherwise name).
/// </summary>
public string EffectiveName => Alias ?? Name;
}

View File

@@ -0,0 +1,381 @@
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Imports;
/// <summary>
/// Result of Python import analysis.
/// </summary>
internal sealed class PythonImportAnalysis
{
private PythonImportAnalysis(
PythonImportGraph graph,
IReadOnlyList<PythonImport> allImports,
IReadOnlyList<PythonImport> standardLibraryImports,
IReadOnlyList<PythonImport> thirdPartyImports,
IReadOnlyList<PythonImport> localImports,
IReadOnlyList<PythonImport> relativeImports,
IReadOnlyList<PythonImport> dynamicImports,
IReadOnlyList<IReadOnlyList<string>> cycles)
{
Graph = graph;
AllImports = allImports;
StandardLibraryImports = standardLibraryImports;
ThirdPartyImports = thirdPartyImports;
LocalImports = localImports;
RelativeImports = relativeImports;
DynamicImports = dynamicImports;
Cycles = cycles;
}
/// <summary>
/// Gets the import graph.
/// </summary>
public PythonImportGraph Graph { get; }
/// <summary>
/// Gets all discovered imports.
/// </summary>
public IReadOnlyList<PythonImport> AllImports { get; }
/// <summary>
/// Gets imports from the Python standard library.
/// </summary>
public IReadOnlyList<PythonImport> StandardLibraryImports { get; }
/// <summary>
/// Gets imports from third-party packages.
/// </summary>
public IReadOnlyList<PythonImport> ThirdPartyImports { get; }
/// <summary>
/// Gets imports from local modules (resolved in the project).
/// </summary>
public IReadOnlyList<PythonImport> LocalImports { get; }
/// <summary>
/// Gets relative imports.
/// </summary>
public IReadOnlyList<PythonImport> RelativeImports { get; }
/// <summary>
/// Gets dynamic imports (importlib.import_module, __import__, etc.).
/// </summary>
public IReadOnlyList<PythonImport> DynamicImports { get; }
/// <summary>
/// Gets detected import cycles.
/// </summary>
public IReadOnlyList<IReadOnlyList<string>> Cycles { get; }
/// <summary>
/// Gets whether there are any import cycles.
/// </summary>
public bool HasCycles => Cycles.Count > 0;
/// <summary>
/// Gets the total number of modules in the graph.
/// </summary>
public int TotalModules => Graph.Modules.Count;
/// <summary>
/// Gets the number of unresolved modules.
/// </summary>
public int UnresolvedModuleCount => Graph.UnresolvedModules.Count;
/// <summary>
/// Analyzes imports from a virtual filesystem.
/// </summary>
public static async Task<PythonImportAnalysis> AnalyzeAsync(
PythonVirtualFileSystem vfs,
string rootPath,
CancellationToken cancellationToken = default)
{
var graph = new PythonImportGraph(vfs, rootPath);
await graph.BuildAsync(cancellationToken).ConfigureAwait(false);
var allImports = graph.ImportsByFile.Values
.SelectMany(static list => list)
.OrderBy(static i => i.SourceFile, StringComparer.Ordinal)
.ThenBy(static i => i.LineNumber ?? int.MaxValue)
.ToList();
var standardLibrary = allImports
.Where(static i => IsStandardLibrary(i.Module))
.ToList();
var thirdParty = new List<PythonImport>();
var local = new List<PythonImport>();
foreach (var import in allImports)
{
if (standardLibrary.Contains(import))
{
continue;
}
if (import.IsRelative)
{
continue; // Handled separately
}
// Check if the module is in the graph (local) or not (third-party)
if (graph.Modules.ContainsKey(import.Module) ||
graph.Modules.ContainsKey(import.Module.Split('.')[0]))
{
local.Add(import);
}
else
{
thirdParty.Add(import);
}
}
var relative = allImports
.Where(static i => i.IsRelative)
.ToList();
var dynamic = allImports
.Where(static i => i.Kind is PythonImportKind.ImportlibImportModule or
PythonImportKind.BuiltinImport or
PythonImportKind.PkgutilExtendPath)
.ToList();
var cycles = graph.FindCycles();
return new PythonImportAnalysis(
graph,
allImports,
standardLibrary,
thirdParty,
local,
relative,
dynamic,
cycles);
}
/// <summary>
/// Generates metadata entries for the analysis result.
/// </summary>
public IEnumerable<KeyValuePair<string, string?>> ToMetadata()
{
yield return new KeyValuePair<string, string?>("imports.total", AllImports.Count.ToString());
yield return new KeyValuePair<string, string?>("imports.stdlib", StandardLibraryImports.Count.ToString());
yield return new KeyValuePair<string, string?>("imports.thirdParty", ThirdPartyImports.Count.ToString());
yield return new KeyValuePair<string, string?>("imports.local", LocalImports.Count.ToString());
yield return new KeyValuePair<string, string?>("imports.relative", RelativeImports.Count.ToString());
yield return new KeyValuePair<string, string?>("imports.dynamic", DynamicImports.Count.ToString());
yield return new KeyValuePair<string, string?>("imports.modules", TotalModules.ToString());
yield return new KeyValuePair<string, string?>("imports.unresolved", UnresolvedModuleCount.ToString());
yield return new KeyValuePair<string, string?>("imports.hasCycles", HasCycles.ToString().ToLowerInvariant());
if (HasCycles)
{
yield return new KeyValuePair<string, string?>("imports.cycleCount", Cycles.Count.ToString());
}
// List unresolved third-party modules
var thirdPartyModules = ThirdPartyImports
.Select(static i => i.Module.Split('.')[0])
.Distinct(StringComparer.Ordinal)
.OrderBy(static m => m, StringComparer.Ordinal)
.ToList();
if (thirdPartyModules.Count > 0)
{
var modules = string.Join(';', thirdPartyModules);
yield return new KeyValuePair<string, string?>("imports.thirdParty.modules", modules);
}
// List dynamic import targets
if (DynamicImports.Count > 0)
{
var dynamicModules = DynamicImports
.Select(static i => i.Module)
.Distinct(StringComparer.Ordinal)
.OrderBy(static m => m, StringComparer.Ordinal)
.ToList();
var modules = string.Join(';', dynamicModules);
yield return new KeyValuePair<string, string?>("imports.dynamic.modules", modules);
}
}
/// <summary>
/// Gets imports by kind.
/// </summary>
public IEnumerable<PythonImport> GetByKind(PythonImportKind kind) =>
AllImports.Where(i => i.Kind == kind);
/// <summary>
/// Gets imports by confidence level.
/// </summary>
public IEnumerable<PythonImport> GetByConfidence(PythonImportConfidence minConfidence) =>
AllImports.Where(i => i.Confidence >= minConfidence);
/// <summary>
/// Gets imports for a specific module.
/// </summary>
public IEnumerable<PythonImport> GetImportsFor(string modulePath) =>
AllImports.Where(i => i.Module == modulePath || i.Module.StartsWith($"{modulePath}.", StringComparison.Ordinal));
/// <summary>
/// Gets the transitive dependencies of a module.
/// </summary>
public IReadOnlySet<string> GetTransitiveDependencies(string modulePath)
{
var result = new HashSet<string>(StringComparer.Ordinal);
var queue = new Queue<string>();
queue.Enqueue(modulePath);
while (queue.Count > 0)
{
var current = queue.Dequeue();
foreach (var dep in Graph.GetDependencies(current))
{
if (result.Add(dep.ModulePath))
{
queue.Enqueue(dep.ModulePath);
}
}
}
return result;
}
/// <summary>
/// Gets the transitive dependents of a module.
/// </summary>
public IReadOnlySet<string> GetTransitiveDependents(string modulePath)
{
var result = new HashSet<string>(StringComparer.Ordinal);
var queue = new Queue<string>();
queue.Enqueue(modulePath);
while (queue.Count > 0)
{
var current = queue.Dequeue();
foreach (var dep in Graph.GetDependents(current))
{
if (result.Add(dep.ModulePath))
{
queue.Enqueue(dep.ModulePath);
}
}
}
return result;
}
private static bool IsStandardLibrary(string module)
{
var topLevel = module.Split('.')[0];
// Python standard library modules
return topLevel switch
{
// Built-in types and functions
"builtins" or "__future__" or "abc" or "types" or "typing" => true,
"typing_extensions" => false, // This is third-party
// String and text processing
"string" or "re" or "difflib" or "textwrap" or "unicodedata" or "stringprep" => true,
"readline" or "rlcompleter" => true,
// Binary data
"struct" or "codecs" => true,
// Data types
"datetime" or "zoneinfo" or "calendar" or "collections" or "heapq" => true,
"bisect" or "array" or "weakref" or "types" or "copy" or "pprint" => true,
"reprlib" or "enum" or "graphlib" => true,
// Numeric and math
"numbers" or "math" or "cmath" or "decimal" or "fractions" => true,
"random" or "statistics" => true,
// Functional programming
"itertools" or "functools" or "operator" => true,
// File and directory
"pathlib" or "fileinput" or "stat" or "filecmp" or "tempfile" => true,
"glob" or "fnmatch" or "linecache" or "shutil" => true,
// Data persistence
"pickle" or "copyreg" or "shelve" or "marshal" or "dbm" or "sqlite3" => true,
// Compression
"zlib" or "gzip" or "bz2" or "lzma" or "zipfile" or "tarfile" => true,
// File formats
"csv" or "configparser" or "tomllib" or "netrc" or "plistlib" => true,
// Cryptographic
"hashlib" or "hmac" or "secrets" => true,
// OS services
"os" or "io" or "time" or "argparse" or "getopt" or "logging" => true,
"getpass" or "curses" or "platform" or "errno" or "ctypes" => true,
// Concurrent execution
"threading" or "multiprocessing" or "concurrent" or "subprocess" => true,
"sched" or "queue" or "_thread" => true,
// Networking
"asyncio" or "socket" or "ssl" or "select" or "selectors" => true,
"signal" or "mmap" => true,
// Internet protocols
"email" or "json" or "mailbox" or "mimetypes" or "base64" => true,
"binascii" or "quopri" or "html" or "xml" => true,
// Internet protocols - urllib, http
"urllib" or "http" or "ftplib" or "poplib" or "imaplib" => true,
"smtplib" or "uuid" or "socketserver" or "xmlrpc" or "ipaddress" => true,
// Multimedia
"wave" or "colorsys" => true,
// Internationalization
"gettext" or "locale" => true,
// Program frameworks
"turtle" or "cmd" or "shlex" => true,
// Development tools
"typing" or "pydoc" or "doctest" or "unittest" => true,
"test" or "test.support" => true,
// Debugging
"bdb" or "faulthandler" or "pdb" or "timeit" or "trace" or "tracemalloc" => true,
// Software packaging
"distutils" or "ensurepip" or "venv" or "zipapp" => true,
// Python runtime
"sys" or "sysconfig" or "builtins" or "warnings" or "dataclasses" => true,
"contextlib" or "atexit" or "traceback" or "gc" or "inspect" or "site" => true,
// Import system
"importlib" or "pkgutil" or "modulefinder" or "runpy" or "zipimport" => true,
// Language services
"ast" or "symtable" or "token" or "keyword" or "tokenize" or "tabnanny" => true,
"pyclbr" or "py_compile" or "compileall" or "dis" or "pickletools" => true,
// MS Windows specific
"msvcrt" or "winreg" or "winsound" => true,
// Unix specific
"posix" or "pwd" or "grp" or "termios" or "tty" or "pty" => true,
"fcntl" or "pipes" or "resource" or "syslog" => true,
// Superseded modules
"aifc" or "audioop" or "cgi" or "cgitb" or "chunk" or "crypt" => true,
"imghdr" or "mailcap" or "msilib" or "nis" or "nntplib" or "optparse" => true,
"ossaudiodev" or "pipes" or "sndhdr" or "spwd" or "sunau" or "telnetlib" => true,
"uu" or "xdrlib" => true,
_ => false
};
}
}

View File

@@ -0,0 +1,570 @@
using System.Collections.Frozen;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Imports;
/// <summary>
/// Represents a node in the import graph.
/// </summary>
/// <param name="ModulePath">The fully qualified module path.</param>
/// <param name="VirtualPath">The virtual file path in the VFS.</param>
/// <param name="IsPackage">Whether this is a package (__init__.py).</param>
/// <param name="IsNamespacePackage">Whether this is a namespace package (no __init__.py).</param>
/// <param name="Source">The file source type.</param>
internal sealed record PythonModuleNode(
string ModulePath,
string? VirtualPath,
bool IsPackage,
bool IsNamespacePackage,
PythonFileSource Source)
{
/// <summary>
/// Gets whether this module was resolved to a file.
/// </summary>
public bool IsResolved => VirtualPath is not null;
/// <summary>
/// Gets the top-level package name.
/// </summary>
public string TopLevelPackage
{
get
{
var dotIndex = ModulePath.IndexOf('.');
return dotIndex < 0 ? ModulePath : ModulePath[..dotIndex];
}
}
}
/// <summary>
/// Represents an edge in the import graph.
/// </summary>
/// <param name="From">The importing module.</param>
/// <param name="To">The imported module.</param>
/// <param name="Import">The import statement that created this edge.</param>
internal sealed record PythonImportEdge(
string From,
string To,
PythonImport Import)
{
/// <summary>
/// Gets the edge key for deduplication.
/// </summary>
public string Key => $"{From}->{To}";
}
/// <summary>
/// Builds and represents a Python import dependency graph.
/// </summary>
internal sealed class PythonImportGraph
{
private readonly PythonVirtualFileSystem _vfs;
private readonly string _rootPath;
private readonly Dictionary<string, PythonModuleNode> _modules = new(StringComparer.Ordinal);
private readonly Dictionary<string, List<PythonImportEdge>> _edges = new(StringComparer.Ordinal);
private readonly Dictionary<string, List<PythonImportEdge>> _reverseEdges = new(StringComparer.Ordinal);
private readonly Dictionary<string, List<PythonImport>> _importsByFile = new(StringComparer.Ordinal);
private readonly HashSet<string> _unresolvedModules = new(StringComparer.Ordinal);
public PythonImportGraph(PythonVirtualFileSystem vfs, string rootPath)
{
_vfs = vfs ?? throw new ArgumentNullException(nameof(vfs));
_rootPath = rootPath ?? throw new ArgumentNullException(nameof(rootPath));
}
/// <summary>
/// Gets all modules in the graph.
/// </summary>
public IReadOnlyDictionary<string, PythonModuleNode> Modules => _modules;
/// <summary>
/// Gets edges (dependencies) by source module.
/// </summary>
public IReadOnlyDictionary<string, List<PythonImportEdge>> Edges => _edges;
/// <summary>
/// Gets reverse edges (dependents) by target module.
/// </summary>
public IReadOnlyDictionary<string, List<PythonImportEdge>> ReverseEdges => _reverseEdges;
/// <summary>
/// Gets all imports grouped by source file.
/// </summary>
public IReadOnlyDictionary<string, List<PythonImport>> ImportsByFile => _importsByFile;
/// <summary>
/// Gets modules that could not be resolved to files.
/// </summary>
public IReadOnlySet<string> UnresolvedModules => _unresolvedModules;
/// <summary>
/// Gets the total number of import statements.
/// </summary>
public int TotalImports => _importsByFile.Values.Sum(static list => list.Count);
/// <summary>
/// Builds the import graph from the virtual filesystem.
/// </summary>
public async Task<PythonImportGraph> BuildAsync(CancellationToken cancellationToken = default)
{
// First, discover all modules in the VFS
DiscoverModules();
// Then extract imports from each Python file
await ExtractImportsAsync(cancellationToken).ConfigureAwait(false);
// Finally, build edges from imports
BuildEdges();
return this;
}
/// <summary>
/// Gets the dependencies of a module (modules it imports).
/// </summary>
public IEnumerable<PythonModuleNode> GetDependencies(string modulePath)
{
if (!_edges.TryGetValue(modulePath, out var edges))
{
yield break;
}
foreach (var edge in edges)
{
if (_modules.TryGetValue(edge.To, out var node))
{
yield return node;
}
}
}
/// <summary>
/// Gets the dependents of a module (modules that import it).
/// </summary>
public IEnumerable<PythonModuleNode> GetDependents(string modulePath)
{
if (!_reverseEdges.TryGetValue(modulePath, out var edges))
{
yield break;
}
foreach (var edge in edges)
{
if (_modules.TryGetValue(edge.From, out var node))
{
yield return node;
}
}
}
/// <summary>
/// Gets imports for a specific file.
/// </summary>
public IReadOnlyList<PythonImport> GetImportsForFile(string virtualPath)
{
return _importsByFile.TryGetValue(virtualPath, out var imports)
? imports
: Array.Empty<PythonImport>();
}
/// <summary>
/// Checks if there's a cyclic dependency involving the given module.
/// </summary>
public bool HasCycle(string modulePath)
{
var visited = new HashSet<string>(StringComparer.Ordinal);
var stack = new HashSet<string>(StringComparer.Ordinal);
return HasCycleInternal(modulePath, visited, stack);
}
/// <summary>
/// Gets all cyclic dependencies in the graph.
/// </summary>
public IReadOnlyList<IReadOnlyList<string>> FindCycles()
{
var cycles = new List<IReadOnlyList<string>>();
var visited = new HashSet<string>(StringComparer.Ordinal);
var stack = new List<string>();
var onStack = new HashSet<string>(StringComparer.Ordinal);
foreach (var module in _modules.Keys)
{
FindCyclesInternal(module, visited, stack, onStack, cycles);
}
return cycles;
}
/// <summary>
/// Gets a topological ordering of modules (if no cycles).
/// </summary>
public IReadOnlyList<string>? GetTopologicalOrder()
{
var inDegree = new Dictionary<string, int>(StringComparer.Ordinal);
foreach (var module in _modules.Keys)
{
inDegree[module] = 0;
}
foreach (var edges in _edges.Values)
{
foreach (var edge in edges)
{
if (inDegree.ContainsKey(edge.To))
{
inDegree[edge.To]++;
}
}
}
var queue = new Queue<string>();
foreach (var (module, degree) in inDegree)
{
if (degree == 0)
{
queue.Enqueue(module);
}
}
var result = new List<string>();
while (queue.Count > 0)
{
var module = queue.Dequeue();
result.Add(module);
if (_edges.TryGetValue(module, out var edges))
{
foreach (var edge in edges)
{
if (inDegree.ContainsKey(edge.To))
{
inDegree[edge.To]--;
if (inDegree[edge.To] == 0)
{
queue.Enqueue(edge.To);
}
}
}
}
}
// If not all modules are in result, there's a cycle
return result.Count == _modules.Count ? result : null;
}
private void DiscoverModules()
{
foreach (var file in _vfs.Files)
{
if (!file.IsPythonSource && !file.IsBytecode)
{
continue;
}
var modulePath = VirtualPathToModulePath(file.VirtualPath);
if (string.IsNullOrEmpty(modulePath))
{
continue;
}
var isPackage = file.VirtualPath.EndsWith("/__init__.py", StringComparison.Ordinal) ||
file.VirtualPath == "__init__.py";
_modules[modulePath] = new PythonModuleNode(
ModulePath: modulePath,
VirtualPath: file.VirtualPath,
IsPackage: isPackage,
IsNamespacePackage: false,
Source: file.Source);
// Also add parent packages
AddParentPackages(modulePath, file.Source);
}
}
private void AddParentPackages(string modulePath, PythonFileSource source)
{
var parts = modulePath.Split('.');
var current = string.Empty;
for (var i = 0; i < parts.Length - 1; i++)
{
current = string.IsNullOrEmpty(current) ? parts[i] : $"{current}.{parts[i]}";
if (!_modules.ContainsKey(current))
{
// Check if __init__.py exists
var initPath = current.Replace('.', '/') + "/__init__.py";
var exists = _vfs.FileExists(initPath);
_modules[current] = new PythonModuleNode(
ModulePath: current,
VirtualPath: exists ? initPath : null,
IsPackage: true,
IsNamespacePackage: !exists,
Source: source);
}
}
}
private async Task ExtractImportsAsync(CancellationToken cancellationToken)
{
foreach (var file in _vfs.Files)
{
cancellationToken.ThrowIfCancellationRequested();
if (!file.IsPythonSource && !file.IsBytecode)
{
continue;
}
var imports = await ExtractFileImportsAsync(file, cancellationToken).ConfigureAwait(false);
if (imports.Count > 0)
{
_importsByFile[file.VirtualPath] = imports;
}
}
}
private async Task<List<PythonImport>> ExtractFileImportsAsync(
PythonVirtualFile file,
CancellationToken cancellationToken)
{
// Skip files from archives for now (need to read from zip)
if (file.IsFromArchive)
{
return new List<PythonImport>();
}
var absolutePath = Path.Combine(_rootPath, file.AbsolutePath);
if (!File.Exists(absolutePath))
{
absolutePath = file.AbsolutePath;
if (!File.Exists(absolutePath))
{
return new List<PythonImport>();
}
}
try
{
if (file.IsPythonSource)
{
var content = await File.ReadAllTextAsync(absolutePath, cancellationToken).ConfigureAwait(false);
var extractor = new PythonSourceImportExtractor(file.VirtualPath);
extractor.Extract(content);
return extractor.Imports.ToList();
}
else if (file.IsBytecode)
{
var bytes = await File.ReadAllBytesAsync(absolutePath, cancellationToken).ConfigureAwait(false);
var extractor = new PythonBytecodeImportExtractor(file.VirtualPath);
extractor.Extract(bytes);
return extractor.Imports.ToList();
}
}
catch (IOException)
{
// Skip unreadable files
}
catch (UnauthorizedAccessException)
{
// Skip inaccessible files
}
return new List<PythonImport>();
}
private void BuildEdges()
{
foreach (var (virtualPath, imports) in _importsByFile)
{
var sourceModulePath = VirtualPathToModulePath(virtualPath);
if (string.IsNullOrEmpty(sourceModulePath))
{
continue;
}
foreach (var import in imports)
{
var targetModulePath = ResolveImportTarget(import, sourceModulePath);
if (string.IsNullOrEmpty(targetModulePath))
{
continue;
}
var edge = new PythonImportEdge(sourceModulePath, targetModulePath, import);
// Add forward edge
if (!_edges.TryGetValue(sourceModulePath, out var forwardEdges))
{
forwardEdges = new List<PythonImportEdge>();
_edges[sourceModulePath] = forwardEdges;
}
forwardEdges.Add(edge);
// Add reverse edge
if (!_reverseEdges.TryGetValue(targetModulePath, out var reverseEdges))
{
reverseEdges = new List<PythonImportEdge>();
_reverseEdges[targetModulePath] = reverseEdges;
}
reverseEdges.Add(edge);
// Track unresolved modules
if (!_modules.ContainsKey(targetModulePath))
{
_unresolvedModules.Add(targetModulePath);
}
}
}
}
private string? ResolveImportTarget(PythonImport import, string sourceModulePath)
{
if (import.IsRelative)
{
return ResolveRelativeImport(import, sourceModulePath);
}
return import.Module;
}
private string? ResolveRelativeImport(PythonImport import, string sourceModulePath)
{
var parts = sourceModulePath.Split('.');
// Calculate the package to start from
// Level 1 (.) = current package
// Level 2 (..) = parent package
var levelsUp = import.RelativeLevel;
// If source is not a package (__init__.py), we need to go one more level up
var sourceVirtualPath = _modules.TryGetValue(sourceModulePath, out var node) ? node.VirtualPath : null;
var isSourcePackage = sourceVirtualPath?.EndsWith("__init__.py", StringComparison.Ordinal) == true;
if (!isSourcePackage)
{
levelsUp++;
}
if (levelsUp > parts.Length)
{
return null; // Invalid relative import (goes beyond top-level package)
}
var baseParts = parts[..^(levelsUp)];
var basePackage = string.Join('.', baseParts);
if (string.IsNullOrEmpty(import.Module))
{
return string.IsNullOrEmpty(basePackage) ? null : basePackage;
}
return string.IsNullOrEmpty(basePackage)
? import.Module
: $"{basePackage}.{import.Module}";
}
private static string VirtualPathToModulePath(string virtualPath)
{
// Remove .py or .pyc extension
var path = virtualPath;
if (path.EndsWith(".py", StringComparison.Ordinal))
{
path = path[..^3];
}
else if (path.EndsWith(".pyc", StringComparison.Ordinal))
{
path = path[..^4];
}
// Handle __init__ -> package name
if (path.EndsWith("/__init__", StringComparison.Ordinal))
{
path = path[..^9];
}
else if (path == "__init__")
{
return string.Empty;
}
// Convert path separators to dots
return path.Replace('/', '.');
}
private bool HasCycleInternal(string module, HashSet<string> visited, HashSet<string> stack)
{
if (stack.Contains(module))
{
return true;
}
if (visited.Contains(module))
{
return false;
}
visited.Add(module);
stack.Add(module);
if (_edges.TryGetValue(module, out var edges))
{
foreach (var edge in edges)
{
if (HasCycleInternal(edge.To, visited, stack))
{
return true;
}
}
}
stack.Remove(module);
return false;
}
private void FindCyclesInternal(
string module,
HashSet<string> visited,
List<string> stack,
HashSet<string> onStack,
List<IReadOnlyList<string>> cycles)
{
if (onStack.Contains(module))
{
// Found a cycle - extract it from the stack
var cycleStart = stack.IndexOf(module);
if (cycleStart >= 0)
{
var cycle = stack.Skip(cycleStart).ToList();
cycle.Add(module); // Complete the cycle
cycles.Add(cycle);
}
return;
}
if (visited.Contains(module))
{
return;
}
visited.Add(module);
stack.Add(module);
onStack.Add(module);
if (_edges.TryGetValue(module, out var edges))
{
foreach (var edge in edges)
{
FindCyclesInternal(edge.To, visited, stack, onStack, cycles);
}
}
stack.RemoveAt(stack.Count - 1);
onStack.Remove(module);
}
}

View File

@@ -0,0 +1,62 @@
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Imports;
/// <summary>
/// Categorizes Python import statement types.
/// </summary>
internal enum PythonImportKind
{
/// <summary>
/// Standard import: import foo
/// </summary>
Import,
/// <summary>
/// From import: from foo import bar
/// </summary>
FromImport,
/// <summary>
/// Relative import: from . import bar or from ..foo import bar
/// </summary>
RelativeImport,
/// <summary>
/// Star import: from foo import *
/// </summary>
StarImport,
/// <summary>
/// Dynamic import via importlib.import_module()
/// </summary>
ImportlibImportModule,
/// <summary>
/// Dynamic import via __import__()
/// </summary>
BuiltinImport,
/// <summary>
/// Namespace package extension via pkgutil.extend_path()
/// </summary>
PkgutilExtendPath,
/// <summary>
/// Conditional import (inside try/except or if block)
/// </summary>
ConditionalImport,
/// <summary>
/// Lazy import (inside function body, not at module level)
/// </summary>
LazyImport,
/// <summary>
/// Type checking only import (inside TYPE_CHECKING block)
/// </summary>
TypeCheckingImport,
/// <summary>
/// Future import: from __future__ import ...
/// </summary>
FutureImport
}

View File

@@ -0,0 +1,449 @@
using System.Text.RegularExpressions;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Imports;
/// <summary>
/// Extracts imports from Python source code using regex-based static analysis.
/// </summary>
internal sealed partial class PythonSourceImportExtractor
{
private readonly string _sourceFile;
private readonly List<PythonImport> _imports = new();
private bool _inTryBlock;
private bool _inTypeCheckingBlock;
private int _functionDepth;
private int _classDepth;
public PythonSourceImportExtractor(string sourceFile)
{
_sourceFile = sourceFile ?? throw new ArgumentNullException(nameof(sourceFile));
}
/// <summary>
/// Gets all extracted imports.
/// </summary>
public IReadOnlyList<PythonImport> Imports => _imports;
/// <summary>
/// Extracts imports from Python source code.
/// </summary>
public PythonSourceImportExtractor Extract(string content)
{
if (string.IsNullOrEmpty(content))
{
return this;
}
var lines = content.Split('\n');
var lineNumber = 0;
var continuedLine = string.Empty;
foreach (var rawLine in lines)
{
lineNumber++;
var line = rawLine.TrimEnd('\r');
// Handle line continuations
if (line.EndsWith('\\'))
{
continuedLine += line[..^1].Trim() + " ";
continue;
}
var fullLine = continuedLine + line;
continuedLine = string.Empty;
// Track context
UpdateContext(fullLine.TrimStart());
// Skip comments and empty lines
var trimmed = fullLine.TrimStart();
if (string.IsNullOrEmpty(trimmed) || trimmed.StartsWith('#'))
{
continue;
}
// Remove inline comments for parsing
var commentIndex = FindCommentStart(trimmed);
if (commentIndex >= 0)
{
trimmed = trimmed[..commentIndex].TrimEnd();
}
if (string.IsNullOrEmpty(trimmed))
{
continue;
}
// Try to extract imports
ExtractImports(trimmed, lineNumber);
}
return this;
}
private void UpdateContext(string line)
{
// Track try blocks
if (line.StartsWith("try:") || line == "try")
{
_inTryBlock = true;
}
else if (line.StartsWith("except") || line.StartsWith("finally:") || line == "finally")
{
_inTryBlock = false;
}
// Track TYPE_CHECKING blocks
if (line.Contains("TYPE_CHECKING") && line.Contains("if"))
{
_inTypeCheckingBlock = true;
}
// Track function depth
if (line.StartsWith("def ") || line.StartsWith("async def "))
{
_functionDepth++;
}
// Track class depth (for nested classes)
if (line.StartsWith("class "))
{
_classDepth++;
}
// Reset context at module level definitions
if ((line.StartsWith("def ") || line.StartsWith("class ") || line.StartsWith("async def ")) &&
!line.StartsWith(" ") && !line.StartsWith("\t"))
{
_inTypeCheckingBlock = false;
_functionDepth = 0;
_classDepth = 0;
}
}
private void ExtractImports(string line, int lineNumber)
{
// Standard import: import foo, bar
var importMatch = StandardImportPattern().Match(line);
if (importMatch.Success)
{
ParseStandardImport(importMatch.Groups["modules"].Value, lineNumber);
return;
}
// From import: from foo import bar, baz
var fromMatch = FromImportPattern().Match(line);
if (fromMatch.Success)
{
ParseFromImport(
fromMatch.Groups["dots"].Value,
fromMatch.Groups["module"].Value,
fromMatch.Groups["names"].Value,
lineNumber);
return;
}
// importlib.import_module()
var importlibMatch = ImportlibPattern().Match(line);
if (importlibMatch.Success)
{
ParseImportlibImport(
importlibMatch.Groups["module"].Value,
importlibMatch.Groups["package"].Value,
lineNumber);
return;
}
// __import__()
var builtinMatch = BuiltinImportPattern().Match(line);
if (builtinMatch.Success)
{
ParseBuiltinImport(builtinMatch.Groups["module"].Value, lineNumber);
return;
}
// pkgutil.extend_path()
var pkgutilMatch = PkgutilExtendPathPattern().Match(line);
if (pkgutilMatch.Success)
{
ParsePkgutilExtendPath(pkgutilMatch.Groups["name"].Value, lineNumber);
return;
}
}
private void ParseStandardImport(string modulesStr, int lineNumber)
{
// import foo, bar as baz, qux
var modules = modulesStr.Split(',');
foreach (var moduleSpec in modules)
{
var trimmed = moduleSpec.Trim();
if (string.IsNullOrEmpty(trimmed))
{
continue;
}
string module;
string? alias = null;
// Check for alias
var asMatch = AsAliasPattern().Match(trimmed);
if (asMatch.Success)
{
module = asMatch.Groups["name"].Value.Trim();
alias = asMatch.Groups["alias"].Value.Trim();
}
else
{
module = trimmed;
}
var kind = _inTypeCheckingBlock ? PythonImportKind.TypeCheckingImport : PythonImportKind.Import;
_imports.Add(new PythonImport(
Module: module,
Names: null,
Alias: alias,
Kind: kind,
RelativeLevel: 0,
SourceFile: _sourceFile,
LineNumber: lineNumber,
Confidence: PythonImportConfidence.Definitive,
IsConditional: _inTryBlock,
IsLazy: _functionDepth > 0,
IsTypeCheckingOnly: _inTypeCheckingBlock));
}
}
private void ParseFromImport(string dots, string module, string namesStr, int lineNumber)
{
var relativeLevel = dots.Length;
var isFuture = module == "__future__";
// Handle star import
if (namesStr.Trim() == "*")
{
var kind = isFuture
? PythonImportKind.FutureImport
: relativeLevel > 0
? PythonImportKind.RelativeImport
: PythonImportKind.StarImport;
_imports.Add(new PythonImport(
Module: module,
Names: [new PythonImportedName("*")],
Alias: null,
Kind: kind,
RelativeLevel: relativeLevel,
SourceFile: _sourceFile,
LineNumber: lineNumber,
Confidence: PythonImportConfidence.Definitive,
IsConditional: _inTryBlock,
IsLazy: _functionDepth > 0,
IsTypeCheckingOnly: _inTypeCheckingBlock));
return;
}
// Handle parenthesized imports: from foo import (bar, baz)
namesStr = namesStr.Trim();
if (namesStr.StartsWith('('))
{
namesStr = namesStr.TrimStart('(').TrimEnd(')');
}
var names = new List<PythonImportedName>();
foreach (var nameSpec in namesStr.Split(','))
{
var trimmed = nameSpec.Trim();
if (string.IsNullOrEmpty(trimmed))
{
continue;
}
var asMatch = AsAliasPattern().Match(trimmed);
if (asMatch.Success)
{
names.Add(new PythonImportedName(
asMatch.Groups["name"].Value.Trim(),
asMatch.Groups["alias"].Value.Trim()));
}
else
{
names.Add(new PythonImportedName(trimmed));
}
}
var importKind = isFuture
? PythonImportKind.FutureImport
: relativeLevel > 0
? PythonImportKind.RelativeImport
: _inTypeCheckingBlock
? PythonImportKind.TypeCheckingImport
: PythonImportKind.FromImport;
_imports.Add(new PythonImport(
Module: module,
Names: names,
Alias: null,
Kind: importKind,
RelativeLevel: relativeLevel,
SourceFile: _sourceFile,
LineNumber: lineNumber,
Confidence: PythonImportConfidence.Definitive,
IsConditional: _inTryBlock,
IsLazy: _functionDepth > 0,
IsTypeCheckingOnly: _inTypeCheckingBlock));
}
private void ParseImportlibImport(string module, string package, int lineNumber)
{
// importlib.import_module('foo') or importlib.import_module('.foo', 'bar')
var moduleName = module.Trim('\'', '"');
var relativeLevel = 0;
// Count leading dots for relative imports
while (moduleName.StartsWith('.'))
{
relativeLevel++;
moduleName = moduleName[1..];
}
// If package is specified and module is relative, note it
var isRelative = relativeLevel > 0 || !string.IsNullOrEmpty(package);
_imports.Add(new PythonImport(
Module: moduleName,
Names: null,
Alias: null,
Kind: PythonImportKind.ImportlibImportModule,
RelativeLevel: relativeLevel,
SourceFile: _sourceFile,
LineNumber: lineNumber,
Confidence: PythonImportConfidence.High,
IsConditional: _inTryBlock,
IsLazy: _functionDepth > 0,
IsTypeCheckingOnly: _inTypeCheckingBlock));
}
private void ParseBuiltinImport(string module, int lineNumber)
{
var moduleName = module.Trim('\'', '"');
_imports.Add(new PythonImport(
Module: moduleName,
Names: null,
Alias: null,
Kind: PythonImportKind.BuiltinImport,
RelativeLevel: 0,
SourceFile: _sourceFile,
LineNumber: lineNumber,
Confidence: PythonImportConfidence.High,
IsConditional: _inTryBlock,
IsLazy: _functionDepth > 0,
IsTypeCheckingOnly: _inTypeCheckingBlock));
}
private void ParsePkgutilExtendPath(string name, int lineNumber)
{
// pkgutil.extend_path(__path__, __name__)
_imports.Add(new PythonImport(
Module: name.Trim('\'', '"', ' '),
Names: null,
Alias: null,
Kind: PythonImportKind.PkgutilExtendPath,
RelativeLevel: 0,
SourceFile: _sourceFile,
LineNumber: lineNumber,
Confidence: PythonImportConfidence.Medium,
IsConditional: _inTryBlock,
IsLazy: _functionDepth > 0,
IsTypeCheckingOnly: _inTypeCheckingBlock));
}
private static int FindCommentStart(string line)
{
var inSingleQuote = false;
var inDoubleQuote = false;
var inTripleSingle = false;
var inTripleDouble = false;
for (var i = 0; i < line.Length; i++)
{
var c = line[i];
var remaining = line.Length - i;
// Check for triple quotes
if (remaining >= 3)
{
var three = line.Substring(i, 3);
if (three == "\"\"\"" && !inSingleQuote && !inTripleSingle)
{
inTripleDouble = !inTripleDouble;
i += 2;
continue;
}
if (three == "'''" && !inDoubleQuote && !inTripleDouble)
{
inTripleSingle = !inTripleSingle;
i += 2;
continue;
}
}
// Check for single quotes
if (c == '"' && !inSingleQuote && !inTripleSingle && !inTripleDouble)
{
inDoubleQuote = !inDoubleQuote;
continue;
}
if (c == '\'' && !inDoubleQuote && !inTripleSingle && !inTripleDouble)
{
inSingleQuote = !inSingleQuote;
continue;
}
// Check for comment
if (c == '#' && !inSingleQuote && !inDoubleQuote && !inTripleSingle && !inTripleDouble)
{
return i;
}
// Handle escape sequences
if (c == '\\' && i + 1 < line.Length)
{
i++;
}
}
return -1;
}
// Standard import: import foo, bar
[GeneratedRegex(@"^import\s+(?<modules>.+)$", RegexOptions.Compiled)]
private static partial Regex StandardImportPattern();
// From import: from foo import bar (handles relative with dots)
[GeneratedRegex(@"^from\s+(?<dots>\.*)(?<module>[\w.]*)\s+import\s+(?<names>.+)$", RegexOptions.Compiled)]
private static partial Regex FromImportPattern();
// importlib.import_module('module') or importlib.import_module('module', 'package')
[GeneratedRegex(@"importlib\.import_module\s*\(\s*(?<module>['""][^'""]+['""])(?:\s*,\s*(?<package>['""][^'""]*['""]))?", RegexOptions.Compiled)]
private static partial Regex ImportlibPattern();
// __import__('module')
[GeneratedRegex(@"__import__\s*\(\s*(?<module>['""][^'""]+['""]\s*)", RegexOptions.Compiled)]
private static partial Regex BuiltinImportPattern();
// pkgutil.extend_path(__path__, __name__) or pkgutil.extend_path(__path__, 'name')
[GeneratedRegex(@"pkgutil\.extend_path\s*\(\s*__path__\s*,\s*(?<name>__name__|['""][^'""]+['""]\s*)\)", RegexOptions.Compiled)]
private static partial Regex PkgutilExtendPathPattern();
// name as alias
[GeneratedRegex(@"^(?<name>[\w.]+)\s+as\s+(?<alias>\w+)$", RegexOptions.Compiled)]
private static partial Regex AsAliasPattern();
}

View File

@@ -0,0 +1,349 @@
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal;
/// <summary>
/// Detects Python runtime in OCI container layers and zipapp archives.
/// Parses layers/, .layers/, layer/ directories for Python site-packages.
/// </summary>
internal static class PythonContainerAdapter
{
private static readonly string[] LayerRootCandidates = { "layers", ".layers", "layer" };
private static readonly string[] SitePackagesPatterns = { "site-packages", "dist-packages" };
private static readonly string[] PythonBinaryNames = { "python", "python3", "python3.10", "python3.11", "python3.12", "python3.13" };
/// <summary>
/// Discovers Python site-packages directories from OCI container layers.
/// </summary>
public static IReadOnlyCollection<string> DiscoverLayerSitePackages(string rootPath)
{
var discovered = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var normalizedRoot = EnsureTrailingSeparator(Path.GetFullPath(rootPath));
// Check for direct layer directories (layer1/, layer2/, etc.)
foreach (var directory in EnumerateDirectoriesSafe(rootPath))
{
var dirName = Path.GetFileName(directory);
if (dirName.StartsWith("layer", StringComparison.OrdinalIgnoreCase))
{
foreach (var sitePackages in DiscoverSitePackagesInDirectory(directory))
{
if (TryNormalizeUnderRoot(normalizedRoot, sitePackages, out var normalized))
{
discovered.Add(normalized);
}
}
}
}
// Check for standard OCI layer root directories
foreach (var layerRoot in EnumerateLayerRoots(rootPath))
{
foreach (var sitePackages in DiscoverSitePackagesInDirectory(layerRoot))
{
if (TryNormalizeUnderRoot(normalizedRoot, sitePackages, out var normalized))
{
discovered.Add(normalized);
}
}
}
return discovered
.OrderBy(static path => path, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
/// <summary>
/// Detects Python runtime information from container layers.
/// </summary>
public static PythonRuntimeInfo? DetectRuntime(string rootPath)
{
var pythonPaths = new List<string>();
var pythonVersions = new SortedSet<string>(StringComparer.Ordinal);
var pythonBinaries = new List<string>();
// Search in standard locations
var searchRoots = new List<string> { rootPath };
searchRoots.AddRange(EnumerateLayerRoots(rootPath));
foreach (var searchRoot in searchRoots)
{
// Look for Python binaries in /usr/bin, /usr/local/bin, /bin
var binDirectories = new[]
{
Path.Combine(searchRoot, "usr", "bin"),
Path.Combine(searchRoot, "usr", "local", "bin"),
Path.Combine(searchRoot, "bin")
};
foreach (var binDir in binDirectories)
{
if (!Directory.Exists(binDir))
{
continue;
}
foreach (var binaryName in PythonBinaryNames)
{
var pythonPath = Path.Combine(binDir, binaryName);
if (File.Exists(pythonPath))
{
pythonBinaries.Add(pythonPath);
var version = ExtractVersionFromBinaryName(binaryName);
if (!string.IsNullOrEmpty(version))
{
pythonVersions.Add(version);
}
}
}
}
// Look for Python lib directories to detect version
var libDirectories = new[]
{
Path.Combine(searchRoot, "usr", "lib"),
Path.Combine(searchRoot, "usr", "local", "lib"),
Path.Combine(searchRoot, "lib")
};
foreach (var libDir in libDirectories)
{
if (!Directory.Exists(libDir))
{
continue;
}
foreach (var pythonDir in EnumerateDirectoriesSafe(libDir))
{
var dirName = Path.GetFileName(pythonDir);
if (dirName.StartsWith("python", StringComparison.OrdinalIgnoreCase))
{
var version = dirName.Replace("python", "", StringComparison.OrdinalIgnoreCase);
if (!string.IsNullOrEmpty(version) && char.IsDigit(version[0]))
{
pythonVersions.Add(version);
pythonPaths.Add(pythonDir);
}
}
}
}
}
if (pythonVersions.Count == 0 && pythonBinaries.Count == 0)
{
return null;
}
return new PythonRuntimeInfo(
pythonVersions.ToArray(),
pythonBinaries.ToArray(),
pythonPaths.ToArray());
}
/// <summary>
/// Discovers dist-info directories within container layers.
/// </summary>
public static IReadOnlyCollection<string> DiscoverDistInfoDirectories(string rootPath)
{
var discovered = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var sitePackages in DiscoverLayerSitePackages(rootPath))
{
foreach (var distInfo in EnumerateDistInfoDirectories(sitePackages))
{
discovered.Add(distInfo);
}
}
// Also check root-level site-packages
foreach (var sitePackages in DiscoverSitePackagesInDirectory(rootPath))
{
foreach (var distInfo in EnumerateDistInfoDirectories(sitePackages))
{
discovered.Add(distInfo);
}
}
return discovered
.OrderBy(static path => path, StringComparer.Ordinal)
.ToArray();
}
private static IEnumerable<string> EnumerateDistInfoDirectories(string sitePackages)
{
if (!Directory.Exists(sitePackages))
{
yield break;
}
IEnumerable<string>? directories = null;
try
{
directories = Directory.EnumerateDirectories(sitePackages, "*.dist-info");
}
catch (IOException)
{
yield break;
}
catch (UnauthorizedAccessException)
{
yield break;
}
foreach (var directory in directories)
{
yield return directory;
}
}
private static IEnumerable<string> DiscoverSitePackagesInDirectory(string directory)
{
if (!Directory.Exists(directory))
{
yield break;
}
// Search recursively up to 6 levels deep (e.g., usr/lib/python3.11/site-packages)
var pending = new Stack<(string Path, int Depth)>();
pending.Push((directory, 0));
while (pending.Count > 0)
{
var (current, depth) = pending.Pop();
var dirName = Path.GetFileName(current);
if (SitePackagesPatterns.Contains(dirName, StringComparer.OrdinalIgnoreCase))
{
yield return current;
continue;
}
if (depth >= 6)
{
continue;
}
IEnumerable<string>? subdirs = null;
try
{
subdirs = Directory.EnumerateDirectories(current);
}
catch (IOException)
{
continue;
}
catch (UnauthorizedAccessException)
{
continue;
}
foreach (var subdir in subdirs)
{
pending.Push((subdir, depth + 1));
}
}
}
private static IEnumerable<string> EnumerateLayerRoots(string workspaceRoot)
{
foreach (var candidate in LayerRootCandidates)
{
var root = Path.Combine(workspaceRoot, candidate);
if (!Directory.Exists(root))
{
continue;
}
IEnumerable<string>? directories = null;
try
{
directories = Directory.EnumerateDirectories(root);
}
catch (IOException)
{
continue;
}
catch (UnauthorizedAccessException)
{
continue;
}
foreach (var layerDirectory in directories)
{
// Check for fs/ subdirectory (extracted layer filesystem)
var fsDirectory = Path.Combine(layerDirectory, "fs");
if (Directory.Exists(fsDirectory))
{
yield return fsDirectory;
}
else
{
yield return layerDirectory;
}
}
}
}
private static IEnumerable<string> EnumerateDirectoriesSafe(string path)
{
if (!Directory.Exists(path))
{
yield break;
}
IEnumerable<string>? directories = null;
try
{
directories = Directory.EnumerateDirectories(path);
}
catch (IOException)
{
yield break;
}
catch (UnauthorizedAccessException)
{
yield break;
}
foreach (var dir in directories)
{
yield return dir;
}
}
private static bool TryNormalizeUnderRoot(string normalizedRoot, string path, out string normalizedPath)
{
normalizedPath = Path.GetFullPath(path);
return normalizedPath.StartsWith(normalizedRoot, StringComparison.OrdinalIgnoreCase);
}
private static string EnsureTrailingSeparator(string path)
{
if (path.EndsWith(Path.DirectorySeparatorChar))
{
return path;
}
return path + Path.DirectorySeparatorChar;
}
private static string? ExtractVersionFromBinaryName(string binaryName)
{
if (binaryName.StartsWith("python", StringComparison.OrdinalIgnoreCase))
{
var version = binaryName.Replace("python", "", StringComparison.OrdinalIgnoreCase);
if (!string.IsNullOrEmpty(version) && char.IsDigit(version[0]))
{
return version;
}
}
return null;
}
}
/// <summary>
/// Information about the Python runtime detected in a container.
/// </summary>
internal sealed record PythonRuntimeInfo(
IReadOnlyCollection<string> Versions,
IReadOnlyCollection<string> Binaries,
IReadOnlyCollection<string> LibPaths);

View File

@@ -0,0 +1,326 @@
using System.Text.Json;
using System.Text.RegularExpressions;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal;
/// <summary>
/// Detects Python environment variables (PYTHONPATH, PYTHONHOME) from container
/// configuration files, environment files, and virtual environment configs.
/// </summary>
internal static class PythonEnvironmentDetector
{
private static readonly string[] EnvironmentFileNames =
{
"environment",
".env",
"env",
".docker-env",
"docker-env"
};
private static readonly string[] OciConfigFiles =
{
"config.json",
"container-config.json",
"image-config.json"
};
private static readonly Regex EnvLinePattern = new(
@"^\s*(?:export\s+)?(?<key>PYTHON(?:PATH|HOME|STARTUP|OPTIMIZATION|HASHSEED|UNBUFFERED|DONTWRITEBYTECODE|VERBOSE|DEBUG))\s*=\s*(?<value>.+)$",
RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Multiline);
/// <summary>
/// Detects Python-related environment variables from various sources.
/// </summary>
public static async ValueTask<PythonEnvironment> DetectAsync(string rootPath, CancellationToken cancellationToken)
{
var variables = new Dictionary<string, PythonEnvVariable>(StringComparer.OrdinalIgnoreCase);
var sources = new List<string>();
// Check for pyvenv.cfg (virtual environment config)
await DetectPyvenvConfigAsync(rootPath, variables, sources, cancellationToken).ConfigureAwait(false);
// Check for environment files
foreach (var fileName in EnvironmentFileNames)
{
var envPath = Path.Combine(rootPath, fileName);
if (File.Exists(envPath))
{
await ParseEnvironmentFileAsync(envPath, variables, sources, cancellationToken).ConfigureAwait(false);
}
// Also check in /etc
var etcEnvPath = Path.Combine(rootPath, "etc", fileName);
if (File.Exists(etcEnvPath))
{
await ParseEnvironmentFileAsync(etcEnvPath, variables, sources, cancellationToken).ConfigureAwait(false);
}
}
// Check for OCI config files
foreach (var configFile in OciConfigFiles)
{
var configPath = Path.Combine(rootPath, configFile);
if (File.Exists(configPath))
{
await ParseOciConfigAsync(configPath, variables, sources, cancellationToken).ConfigureAwait(false);
}
}
// Check container layer roots for environment configs
foreach (var layerRoot in EnumerateLayerRoots(rootPath))
{
var layerEnvPath = Path.Combine(layerRoot, "etc", "environment");
if (File.Exists(layerEnvPath))
{
await ParseEnvironmentFileAsync(layerEnvPath, variables, sources, cancellationToken).ConfigureAwait(false);
}
}
return new PythonEnvironment(variables, sources);
}
private static async ValueTask DetectPyvenvConfigAsync(
string rootPath,
Dictionary<string, PythonEnvVariable> variables,
List<string> sources,
CancellationToken cancellationToken)
{
var pyvenvPath = Path.Combine(rootPath, "pyvenv.cfg");
if (!File.Exists(pyvenvPath))
{
return;
}
sources.Add(pyvenvPath);
try
{
var content = await File.ReadAllTextAsync(pyvenvPath, cancellationToken).ConfigureAwait(false);
var lines = content.Split('\n', StringSplitOptions.RemoveEmptyEntries);
foreach (var line in lines)
{
var trimmed = line.Trim();
if (trimmed.StartsWith('#'))
{
continue;
}
var separator = trimmed.IndexOf('=');
if (separator <= 0)
{
continue;
}
var key = trimmed[..separator].Trim();
var value = trimmed[(separator + 1)..].Trim();
if (string.Equals(key, "home", StringComparison.OrdinalIgnoreCase))
{
variables["PYTHONHOME_VENV"] = new PythonEnvVariable("PYTHONHOME_VENV", value, pyvenvPath, "pyvenv.cfg");
}
else if (string.Equals(key, "include-system-site-packages", StringComparison.OrdinalIgnoreCase))
{
variables["PYVENV_INCLUDE_SYSTEM"] = new PythonEnvVariable("PYVENV_INCLUDE_SYSTEM", value, pyvenvPath, "pyvenv.cfg");
}
}
}
catch (IOException)
{
// Ignore read errors
}
}
private static async ValueTask ParseEnvironmentFileAsync(
string path,
Dictionary<string, PythonEnvVariable> variables,
List<string> sources,
CancellationToken cancellationToken)
{
try
{
var content = await File.ReadAllTextAsync(path, cancellationToken).ConfigureAwait(false);
var matches = EnvLinePattern.Matches(content);
if (matches.Count > 0)
{
sources.Add(path);
}
foreach (Match match in matches)
{
var key = match.Groups["key"].Value.Trim();
var value = match.Groups["value"].Value.Trim().Trim('"', '\'');
variables[key] = new PythonEnvVariable(key, value, path, "environment");
}
}
catch (IOException)
{
// Ignore read errors
}
}
private static async ValueTask ParseOciConfigAsync(
string path,
Dictionary<string, PythonEnvVariable> variables,
List<string> sources,
CancellationToken cancellationToken)
{
try
{
await using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
using var document = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken).ConfigureAwait(false);
var root = document.RootElement;
// OCI image config structure: config.Env or Env
JsonElement envArray = default;
if (root.TryGetProperty("config", out var config) && config.TryGetProperty("Env", out var configEnv))
{
envArray = configEnv;
}
else if (root.TryGetProperty("Env", out var directEnv))
{
envArray = directEnv;
}
if (envArray.ValueKind != JsonValueKind.Array)
{
return;
}
var foundPythonVar = false;
foreach (var envElement in envArray.EnumerateArray())
{
var envString = envElement.GetString();
if (string.IsNullOrWhiteSpace(envString))
{
continue;
}
var separator = envString.IndexOf('=');
if (separator <= 0)
{
continue;
}
var key = envString[..separator];
var value = envString[(separator + 1)..];
if (key.StartsWith("PYTHON", StringComparison.OrdinalIgnoreCase))
{
variables[key] = new PythonEnvVariable(key, value, path, "oci-config");
foundPythonVar = true;
}
}
if (foundPythonVar)
{
sources.Add(path);
}
}
catch (IOException)
{
// Ignore read errors
}
catch (JsonException)
{
// Ignore parse errors
}
}
private static readonly string[] LayerRootCandidates = { "layers", ".layers", "layer" };
private static IEnumerable<string> EnumerateLayerRoots(string workspaceRoot)
{
foreach (var candidate in LayerRootCandidates)
{
var root = Path.Combine(workspaceRoot, candidate);
if (!Directory.Exists(root))
{
continue;
}
IEnumerable<string>? directories = null;
try
{
directories = Directory.EnumerateDirectories(root);
}
catch (IOException)
{
continue;
}
catch (UnauthorizedAccessException)
{
continue;
}
foreach (var layerDirectory in directories)
{
var fsDirectory = Path.Combine(layerDirectory, "fs");
if (Directory.Exists(fsDirectory))
{
yield return fsDirectory;
}
else
{
yield return layerDirectory;
}
}
}
}
}
/// <summary>
/// Represents detected Python environment configuration.
/// </summary>
internal sealed class PythonEnvironment
{
public PythonEnvironment(
IReadOnlyDictionary<string, PythonEnvVariable> variables,
IReadOnlyCollection<string> sources)
{
Variables = variables;
Sources = sources;
}
public IReadOnlyDictionary<string, PythonEnvVariable> Variables { get; }
public IReadOnlyCollection<string> Sources { get; }
public bool HasPythonPath => Variables.ContainsKey("PYTHONPATH");
public bool HasPythonHome => Variables.ContainsKey("PYTHONHOME") || Variables.ContainsKey("PYTHONHOME_VENV");
public string? PythonPath => Variables.TryGetValue("PYTHONPATH", out var v) ? v.Value : null;
public string? PythonHome => Variables.TryGetValue("PYTHONHOME", out var v) ? v.Value :
Variables.TryGetValue("PYTHONHOME_VENV", out var venv) ? venv.Value : null;
public IReadOnlyCollection<KeyValuePair<string, string?>> ToMetadata()
{
var entries = new List<KeyValuePair<string, string?>>();
foreach (var variable in Variables.Values.OrderBy(v => v.Key, StringComparer.Ordinal))
{
entries.Add(new KeyValuePair<string, string?>($"env.{variable.Key.ToLowerInvariant()}", variable.Value));
entries.Add(new KeyValuePair<string, string?>($"env.{variable.Key.ToLowerInvariant()}.source", variable.SourceType));
}
if (Sources.Count > 0)
{
entries.Add(new KeyValuePair<string, string?>("env.sources", string.Join(';', Sources.OrderBy(s => s, StringComparer.Ordinal))));
}
return entries;
}
}
/// <summary>
/// Represents a single Python environment variable.
/// </summary>
internal sealed record PythonEnvVariable(
string Key,
string Value,
string SourcePath,
string SourceType);

View File

@@ -0,0 +1,447 @@
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal;
/// <summary>
/// Detects Python startup hooks that run before or during interpreter initialization.
/// These include sitecustomize.py, usercustomize.py, and .pth files.
/// </summary>
internal static class PythonStartupHookDetector
{
private static readonly string[] StartupHookFiles =
{
"sitecustomize.py",
"usercustomize.py"
};
private static readonly string[] SitePackagesPatterns = { "site-packages", "dist-packages" };
private static readonly string[] LayerRootCandidates = { "layers", ".layers", "layer" };
/// <summary>
/// Detects startup hooks in the given root path.
/// </summary>
public static PythonStartupHooks Detect(string rootPath)
{
var hooks = new List<PythonStartupHook>();
var pthFiles = new List<PythonPthFile>();
var warnings = new List<string>();
// Search in site-packages directories
foreach (var sitePackages in DiscoverSitePackagesDirectories(rootPath))
{
DetectInSitePackages(sitePackages, hooks, pthFiles, warnings);
}
// Search in Python lib directories
foreach (var libPath in DiscoverPythonLibDirectories(rootPath))
{
DetectInPythonLib(libPath, hooks, warnings);
}
// Search in container layers
foreach (var layerRoot in EnumerateLayerRoots(rootPath))
{
foreach (var sitePackages in DiscoverSitePackagesDirectories(layerRoot))
{
DetectInSitePackages(sitePackages, hooks, pthFiles, warnings);
}
foreach (var libPath in DiscoverPythonLibDirectories(layerRoot))
{
DetectInPythonLib(libPath, hooks, warnings);
}
}
// Generate warnings for detected hooks
if (hooks.Count > 0)
{
var siteCustomize = hooks.Where(h => h.HookType == StartupHookType.SiteCustomize).ToArray();
var userCustomize = hooks.Where(h => h.HookType == StartupHookType.UserCustomize).ToArray();
if (siteCustomize.Length > 0)
{
warnings.Add($"sitecustomize.py detected in {siteCustomize.Length} location(s); runs on every Python interpreter start");
}
if (userCustomize.Length > 0)
{
warnings.Add($"usercustomize.py detected in {userCustomize.Length} location(s); runs on every Python interpreter start");
}
}
if (pthFiles.Count > 0)
{
var importPth = pthFiles.Where(p => p.HasImportDirective).ToArray();
if (importPth.Length > 0)
{
warnings.Add($"{importPth.Length} .pth file(s) with import directives detected; may execute code on startup");
}
}
return new PythonStartupHooks(
hooks
.OrderBy(h => h.Path, StringComparer.Ordinal)
.ToArray(),
pthFiles
.OrderBy(p => p.Path, StringComparer.Ordinal)
.ToArray(),
warnings);
}
private static void DetectInSitePackages(
string sitePackages,
List<PythonStartupHook> hooks,
List<PythonPthFile> pthFiles,
List<string> warnings)
{
if (!Directory.Exists(sitePackages))
{
return;
}
// Check for sitecustomize.py and usercustomize.py
foreach (var hookFile in StartupHookFiles)
{
var hookPath = Path.Combine(sitePackages, hookFile);
if (File.Exists(hookPath))
{
var hookType = hookFile.StartsWith("site", StringComparison.OrdinalIgnoreCase)
? StartupHookType.SiteCustomize
: StartupHookType.UserCustomize;
hooks.Add(new PythonStartupHook(hookPath, hookType, "site-packages"));
}
}
// Check for .pth files
try
{
foreach (var pthFile in Directory.EnumerateFiles(sitePackages, "*.pth"))
{
var pthInfo = AnalyzePthFile(pthFile);
pthFiles.Add(pthInfo);
}
}
catch (IOException)
{
// Ignore enumeration errors
}
catch (UnauthorizedAccessException)
{
// Ignore access errors
}
}
private static void DetectInPythonLib(string libPath, List<PythonStartupHook> hooks, List<string> warnings)
{
if (!Directory.Exists(libPath))
{
return;
}
// Check for sitecustomize.py in the lib directory itself
foreach (var hookFile in StartupHookFiles)
{
var hookPath = Path.Combine(libPath, hookFile);
if (File.Exists(hookPath))
{
var hookType = hookFile.StartsWith("site", StringComparison.OrdinalIgnoreCase)
? StartupHookType.SiteCustomize
: StartupHookType.UserCustomize;
hooks.Add(new PythonStartupHook(hookPath, hookType, "python-lib"));
}
}
}
private static PythonPthFile AnalyzePthFile(string path)
{
var hasImportDirective = false;
var pathEntries = new List<string>();
try
{
var lines = File.ReadAllLines(path);
foreach (var line in lines)
{
var trimmed = line.Trim();
if (string.IsNullOrEmpty(trimmed) || trimmed.StartsWith('#'))
{
continue;
}
// Lines starting with 'import ' execute Python code
if (trimmed.StartsWith("import ", StringComparison.OrdinalIgnoreCase))
{
hasImportDirective = true;
}
else
{
pathEntries.Add(trimmed);
}
}
}
catch (IOException)
{
// Ignore read errors
}
return new PythonPthFile(path, Path.GetFileName(path), hasImportDirective, pathEntries);
}
private static IEnumerable<string> DiscoverSitePackagesDirectories(string rootPath)
{
if (!Directory.Exists(rootPath))
{
yield break;
}
var pending = new Stack<(string Path, int Depth)>();
pending.Push((rootPath, 0));
while (pending.Count > 0)
{
var (current, depth) = pending.Pop();
var dirName = Path.GetFileName(current);
if (SitePackagesPatterns.Contains(dirName, StringComparer.OrdinalIgnoreCase))
{
yield return current;
continue;
}
if (depth >= 6)
{
continue;
}
IEnumerable<string>? subdirs = null;
try
{
subdirs = Directory.EnumerateDirectories(current);
}
catch (IOException)
{
continue;
}
catch (UnauthorizedAccessException)
{
continue;
}
foreach (var subdir in subdirs)
{
var subdirName = Path.GetFileName(subdir);
// Skip common non-Python directories
if (subdirName.StartsWith('.') ||
string.Equals(subdirName, "node_modules", StringComparison.OrdinalIgnoreCase) ||
string.Equals(subdirName, "__pycache__", StringComparison.OrdinalIgnoreCase))
{
continue;
}
pending.Push((subdir, depth + 1));
}
}
}
private static IEnumerable<string> DiscoverPythonLibDirectories(string rootPath)
{
var libPaths = new[]
{
Path.Combine(rootPath, "usr", "lib"),
Path.Combine(rootPath, "usr", "local", "lib"),
Path.Combine(rootPath, "lib")
};
foreach (var libPath in libPaths)
{
if (!Directory.Exists(libPath))
{
continue;
}
IEnumerable<string>? directories = null;
try
{
directories = Directory.EnumerateDirectories(libPath);
}
catch (IOException)
{
continue;
}
catch (UnauthorizedAccessException)
{
continue;
}
foreach (var directory in directories)
{
var dirName = Path.GetFileName(directory);
if (dirName.StartsWith("python", StringComparison.OrdinalIgnoreCase))
{
yield return directory;
}
}
}
}
private static IEnumerable<string> EnumerateLayerRoots(string workspaceRoot)
{
foreach (var candidate in LayerRootCandidates)
{
var root = Path.Combine(workspaceRoot, candidate);
if (!Directory.Exists(root))
{
continue;
}
IEnumerable<string>? directories = null;
try
{
directories = Directory.EnumerateDirectories(root);
}
catch (IOException)
{
continue;
}
catch (UnauthorizedAccessException)
{
continue;
}
foreach (var layerDirectory in directories)
{
var fsDirectory = Path.Combine(layerDirectory, "fs");
if (Directory.Exists(fsDirectory))
{
yield return fsDirectory;
}
else
{
yield return layerDirectory;
}
}
}
}
}
/// <summary>
/// Type of Python startup hook.
/// </summary>
internal enum StartupHookType
{
SiteCustomize,
UserCustomize
}
/// <summary>
/// Represents a detected Python startup hook file.
/// </summary>
internal sealed record PythonStartupHook(
string Path,
StartupHookType HookType,
string Location);
/// <summary>
/// Represents a detected .pth file.
/// </summary>
internal sealed record PythonPthFile(
string Path,
string FileName,
bool HasImportDirective,
IReadOnlyCollection<string> PathEntries);
/// <summary>
/// Collection of detected Python startup hooks.
/// </summary>
internal sealed class PythonStartupHooks
{
public PythonStartupHooks(
IReadOnlyCollection<PythonStartupHook> hooks,
IReadOnlyCollection<PythonPthFile> pthFiles,
IReadOnlyCollection<string> warnings)
{
Hooks = hooks;
PthFiles = pthFiles;
Warnings = warnings;
}
public IReadOnlyCollection<PythonStartupHook> Hooks { get; }
public IReadOnlyCollection<PythonPthFile> PthFiles { get; }
public IReadOnlyCollection<string> Warnings { get; }
public bool HasStartupHooks => Hooks.Count > 0;
public bool HasPthFilesWithImports => PthFiles.Any(p => p.HasImportDirective);
public bool HasWarnings => Warnings.Count > 0;
public IReadOnlyCollection<KeyValuePair<string, string?>> ToMetadata()
{
var entries = new List<KeyValuePair<string, string?>>();
if (Hooks.Count > 0)
{
var siteCustomize = Hooks.Where(h => h.HookType == StartupHookType.SiteCustomize).ToArray();
var userCustomize = Hooks.Where(h => h.HookType == StartupHookType.UserCustomize).ToArray();
if (siteCustomize.Length > 0)
{
entries.Add(new KeyValuePair<string, string?>("startupHooks.sitecustomize.count", siteCustomize.Length.ToString()));
entries.Add(new KeyValuePair<string, string?>("startupHooks.sitecustomize.paths", string.Join(';', siteCustomize.Select(h => h.Path))));
}
if (userCustomize.Length > 0)
{
entries.Add(new KeyValuePair<string, string?>("startupHooks.usercustomize.count", userCustomize.Length.ToString()));
entries.Add(new KeyValuePair<string, string?>("startupHooks.usercustomize.paths", string.Join(';', userCustomize.Select(h => h.Path))));
}
}
if (PthFiles.Count > 0)
{
entries.Add(new KeyValuePair<string, string?>("pthFiles.count", PthFiles.Count.ToString()));
var withImports = PthFiles.Where(p => p.HasImportDirective).ToArray();
if (withImports.Length > 0)
{
entries.Add(new KeyValuePair<string, string?>("pthFiles.withImports.count", withImports.Length.ToString()));
entries.Add(new KeyValuePair<string, string?>("pthFiles.withImports.files", string.Join(';', withImports.Select(p => p.FileName))));
}
}
if (Warnings.Count > 0)
{
for (var i = 0; i < Warnings.Count; i++)
{
entries.Add(new KeyValuePair<string, string?>($"startupHooks.warning[{i}]", Warnings.ElementAt(i)));
}
}
return entries;
}
public IReadOnlyCollection<LanguageComponentEvidence> ToEvidence(LanguageAnalyzerContext context)
{
var evidence = new List<LanguageComponentEvidence>();
foreach (var hook in Hooks)
{
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.File,
hook.HookType == StartupHookType.SiteCustomize ? "sitecustomize" : "usercustomize",
PythonPathHelper.NormalizeRelative(context, hook.Path),
Value: null,
Sha256: null));
}
foreach (var pthFile in PthFiles.Where(p => p.HasImportDirective))
{
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.File,
"pth-import",
PythonPathHelper.NormalizeRelative(context, pthFile.Path),
Value: null,
Sha256: null));
}
return evidence;
}
}

View File

@@ -0,0 +1,279 @@
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Resolver;
/// <summary>
/// Result of resolving a module name to a file location.
/// </summary>
internal enum PythonResolutionKind
{
/// <summary>
/// Regular module (.py file).
/// </summary>
SourceModule,
/// <summary>
/// Compiled module (.pyc file).
/// </summary>
BytecodeModule,
/// <summary>
/// Regular package (__init__.py exists).
/// </summary>
Package,
/// <summary>
/// Namespace package (PEP 420, no __init__.py).
/// </summary>
NamespacePackage,
/// <summary>
/// Built-in module (part of Python interpreter).
/// </summary>
BuiltinModule,
/// <summary>
/// Frozen module (compiled into Python binary).
/// </summary>
FrozenModule,
/// <summary>
/// Extension module (.so, .pyd).
/// </summary>
ExtensionModule,
/// <summary>
/// Module from a zip archive.
/// </summary>
ZipModule,
/// <summary>
/// Module could not be resolved.
/// </summary>
NotFound
}
/// <summary>
/// Confidence level for module resolution.
/// </summary>
internal enum PythonResolutionConfidence
{
/// <summary>
/// Low confidence - heuristic or partial match.
/// </summary>
Low = 0,
/// <summary>
/// Medium confidence - likely correct but unverified.
/// </summary>
Medium = 1,
/// <summary>
/// High confidence - clear match.
/// </summary>
High = 2,
/// <summary>
/// Definitive - exact file found.
/// </summary>
Definitive = 3
}
/// <summary>
/// Represents the resolution of a Python module name to its location.
/// </summary>
/// <param name="ModuleName">The fully qualified module name.</param>
/// <param name="Kind">The type of resolution.</param>
/// <param name="VirtualPath">The resolved virtual path (null if not found).</param>
/// <param name="AbsolutePath">The resolved absolute path (null if not found).</param>
/// <param name="SearchPath">The search path entry that matched.</param>
/// <param name="Source">The source type of the file.</param>
/// <param name="Confidence">Confidence level of the resolution.</param>
/// <param name="NamespacePaths">For namespace packages, the contributing paths.</param>
/// <param name="ResolverTrace">Trace of resolution steps for debugging.</param>
internal sealed record PythonModuleResolution(
string ModuleName,
PythonResolutionKind Kind,
string? VirtualPath,
string? AbsolutePath,
string? SearchPath,
PythonFileSource Source,
PythonResolutionConfidence Confidence,
IReadOnlyList<string>? NamespacePaths = null,
IReadOnlyList<string>? ResolverTrace = null)
{
/// <summary>
/// Gets whether the module was successfully resolved.
/// </summary>
public bool IsResolved => Kind != PythonResolutionKind.NotFound;
/// <summary>
/// Gets whether this is a package (regular or namespace).
/// </summary>
public bool IsPackage => Kind is PythonResolutionKind.Package or PythonResolutionKind.NamespacePackage;
/// <summary>
/// Gets whether this is a namespace package.
/// </summary>
public bool IsNamespacePackage => Kind == PythonResolutionKind.NamespacePackage;
/// <summary>
/// Gets whether this is a built-in or frozen module.
/// </summary>
public bool IsBuiltin => Kind is PythonResolutionKind.BuiltinModule or PythonResolutionKind.FrozenModule;
/// <summary>
/// Gets the parent module name.
/// </summary>
public string? ParentModule
{
get
{
var lastDot = ModuleName.LastIndexOf('.');
return lastDot < 0 ? null : ModuleName[..lastDot];
}
}
/// <summary>
/// Gets the simple name (last component).
/// </summary>
public string SimpleName
{
get
{
var lastDot = ModuleName.LastIndexOf('.');
return lastDot < 0 ? ModuleName : ModuleName[(lastDot + 1)..];
}
}
/// <summary>
/// Creates a not-found resolution.
/// </summary>
public static PythonModuleResolution NotFound(string moduleName, IReadOnlyList<string>? trace = null) =>
new(
ModuleName: moduleName,
Kind: PythonResolutionKind.NotFound,
VirtualPath: null,
AbsolutePath: null,
SearchPath: null,
Source: PythonFileSource.Unknown,
Confidence: PythonResolutionConfidence.Low,
NamespacePaths: null,
ResolverTrace: trace);
/// <summary>
/// Creates a built-in module resolution.
/// </summary>
public static PythonModuleResolution Builtin(string moduleName) =>
new(
ModuleName: moduleName,
Kind: PythonResolutionKind.BuiltinModule,
VirtualPath: null,
AbsolutePath: null,
SearchPath: null,
Source: PythonFileSource.Unknown,
Confidence: PythonResolutionConfidence.Definitive,
NamespacePaths: null,
ResolverTrace: null);
/// <summary>
/// Generates metadata entries for this resolution.
/// </summary>
public IEnumerable<KeyValuePair<string, string?>> ToMetadata(string prefix)
{
yield return new KeyValuePair<string, string?>($"{prefix}.module", ModuleName);
yield return new KeyValuePair<string, string?>($"{prefix}.kind", Kind.ToString());
yield return new KeyValuePair<string, string?>($"{prefix}.confidence", Confidence.ToString());
if (VirtualPath is not null)
{
yield return new KeyValuePair<string, string?>($"{prefix}.path", VirtualPath);
}
if (SearchPath is not null)
{
yield return new KeyValuePair<string, string?>($"{prefix}.searchPath", SearchPath);
}
if (NamespacePaths is { Count: > 0 })
{
var paths = string.Join(';', NamespacePaths);
yield return new KeyValuePair<string, string?>($"{prefix}.namespacePaths", paths);
}
}
}
/// <summary>
/// Represents a search path entry for module resolution.
/// </summary>
/// <param name="Path">The search path.</param>
/// <param name="Priority">Priority (lower = higher priority).</param>
/// <param name="Kind">The kind of path entry.</param>
/// <param name="FromPthFile">The .pth file that added this path (if any).</param>
internal sealed record PythonSearchPath(
string Path,
int Priority,
PythonSearchPathKind Kind,
string? FromPthFile = null)
{
/// <summary>
/// Gets whether this path is from a .pth file.
/// </summary>
public bool IsFromPthFile => FromPthFile is not null;
}
/// <summary>
/// Kind of search path entry.
/// </summary>
internal enum PythonSearchPathKind
{
/// <summary>
/// The script's directory or current working directory.
/// </summary>
ScriptDirectory,
/// <summary>
/// PYTHONPATH environment variable.
/// </summary>
PythonPath,
/// <summary>
/// Site-packages directory.
/// </summary>
SitePackages,
/// <summary>
/// Standard library path.
/// </summary>
StandardLibrary,
/// <summary>
/// User site-packages (--user installations).
/// </summary>
UserSitePackages,
/// <summary>
/// Path added via .pth file.
/// </summary>
PthFile,
/// <summary>
/// Zip archive path.
/// </summary>
ZipArchive,
/// <summary>
/// Editable install path.
/// </summary>
EditableInstall,
/// <summary>
/// Virtual environment path.
/// </summary>
VirtualEnv,
/// <summary>
/// Custom or unknown path.
/// </summary>
Custom
}

View File

@@ -0,0 +1,538 @@
using System.Collections.Frozen;
using System.Text.RegularExpressions;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Resolver;
/// <summary>
/// Resolves Python module names to file locations following importlib semantics.
/// Supports namespace packages (PEP 420), .pth files, zipimport, and site-packages precedence.
/// </summary>
internal sealed partial class PythonModuleResolver
{
private readonly PythonVirtualFileSystem _vfs;
private readonly string _rootPath;
private readonly List<PythonSearchPath> _searchPaths = new();
private readonly Dictionary<string, PythonModuleResolution> _cache = new(StringComparer.Ordinal);
private readonly bool _enableTracing;
// Built-in modules that exist in the Python interpreter
private static readonly FrozenSet<string> BuiltinModules = new HashSet<string>(StringComparer.Ordinal)
{
"sys", "builtins", "_abc", "_ast", "_bisect", "_blake2", "_codecs",
"_collections", "_datetime", "_elementtree", "_functools", "_heapq",
"_imp", "_io", "_json", "_locale", "_lsprof", "_md5", "_operator",
"_pickle", "_posixsubprocess", "_random", "_sha1", "_sha256", "_sha512",
"_sha3", "_signal", "_socket", "_sre", "_stat", "_statistics", "_string",
"_struct", "_symtable", "_thread", "_tracemalloc", "_typing", "_warnings",
"_weakref", "array", "atexit", "binascii", "cmath", "errno", "faulthandler",
"gc", "itertools", "marshal", "math", "posix", "pwd", "select", "time",
"unicodedata", "xxsubtype", "zlib"
}.ToFrozenSet();
// Standard library packages (top-level)
private static readonly FrozenSet<string> StandardLibraryModules = new HashSet<string>(StringComparer.Ordinal)
{
"abc", "aifc", "argparse", "array", "ast", "asynchat", "asyncio",
"asyncore", "atexit", "audioop", "base64", "bdb", "binascii", "binhex",
"bisect", "builtins", "bz2", "calendar", "cgi", "cgitb", "chunk",
"cmath", "cmd", "code", "codecs", "codeop", "collections", "colorsys",
"compileall", "concurrent", "configparser", "contextlib", "contextvars",
"copy", "copyreg", "cProfile", "crypt", "csv", "ctypes", "curses",
"dataclasses", "datetime", "dbm", "decimal", "difflib", "dis",
"distutils", "doctest", "email", "encodings", "enum", "errno",
"faulthandler", "fcntl", "filecmp", "fileinput", "fnmatch", "fractions",
"ftplib", "functools", "gc", "getopt", "getpass", "gettext", "glob",
"graphlib", "grp", "gzip", "hashlib", "heapq", "hmac", "html", "http",
"idlelib", "imaplib", "imghdr", "imp", "importlib", "inspect", "io",
"ipaddress", "itertools", "json", "keyword", "lib2to3", "linecache",
"locale", "logging", "lzma", "mailbox", "mailcap", "marshal", "math",
"mimetypes", "mmap", "modulefinder", "multiprocessing", "netrc", "nis",
"nntplib", "numbers", "operator", "optparse", "os", "ossaudiodev",
"pathlib", "pdb", "pickle", "pickletools", "pipes", "pkgutil", "platform",
"plistlib", "poplib", "posix", "posixpath", "pprint", "profile", "pstats",
"pty", "pwd", "py_compile", "pyclbr", "pydoc", "queue", "quopri",
"random", "re", "readline", "reprlib", "resource", "rlcompleter",
"runpy", "sched", "secrets", "select", "selectors", "shelve", "shlex",
"shutil", "signal", "site", "smtpd", "smtplib", "sndhdr", "socket",
"socketserver", "spwd", "sqlite3", "ssl", "stat", "statistics", "string",
"stringprep", "struct", "subprocess", "sunau", "symtable", "sys",
"sysconfig", "syslog", "tabnanny", "tarfile", "telnetlib", "tempfile",
"termios", "test", "textwrap", "threading", "time", "timeit", "tkinter",
"token", "tokenize", "tomllib", "trace", "traceback", "tracemalloc",
"tty", "turtle", "turtledemo", "types", "typing", "unicodedata",
"unittest", "urllib", "uu", "uuid", "venv", "warnings", "wave",
"weakref", "webbrowser", "winreg", "winsound", "wsgiref", "xdrlib",
"xml", "xmlrpc", "zipapp", "zipfile", "zipimport", "zlib", "zoneinfo",
"_thread"
}.ToFrozenSet();
public PythonModuleResolver(PythonVirtualFileSystem vfs, string rootPath, bool enableTracing = false)
{
_vfs = vfs ?? throw new ArgumentNullException(nameof(vfs));
_rootPath = rootPath ?? throw new ArgumentNullException(nameof(rootPath));
_enableTracing = enableTracing;
}
/// <summary>
/// Gets the configured search paths.
/// </summary>
public IReadOnlyList<PythonSearchPath> SearchPaths => _searchPaths;
/// <summary>
/// Initializes the resolver by building search paths from the VFS.
/// </summary>
public async Task<PythonModuleResolver> InitializeAsync(CancellationToken cancellationToken = default)
{
_searchPaths.Clear();
_cache.Clear();
// Build search paths from VFS
BuildSearchPaths();
// Process .pth files
await ProcessPthFilesAsync(cancellationToken).ConfigureAwait(false);
// Sort by priority
_searchPaths.Sort((a, b) => a.Priority.CompareTo(b.Priority));
return this;
}
/// <summary>
/// Resolves a module name to its location.
/// </summary>
public PythonModuleResolution Resolve(string moduleName)
{
if (string.IsNullOrEmpty(moduleName))
{
return PythonModuleResolution.NotFound(moduleName ?? string.Empty);
}
// Check cache
if (_cache.TryGetValue(moduleName, out var cached))
{
return cached;
}
var trace = _enableTracing ? new List<string>() : null;
// Check for built-in modules
if (IsBuiltinModule(moduleName))
{
var builtin = PythonModuleResolution.Builtin(moduleName);
_cache[moduleName] = builtin;
return builtin;
}
// Resolve using search paths
var resolution = ResolveFromSearchPaths(moduleName, trace);
_cache[moduleName] = resolution;
return resolution;
}
/// <summary>
/// Resolves a relative import from a given package context.
/// </summary>
public PythonModuleResolution ResolveRelative(
string moduleName,
int relativeLevel,
string fromModule)
{
if (relativeLevel <= 0)
{
return Resolve(moduleName);
}
// Calculate the base package
var fromParts = fromModule.Split('.');
// Check if fromModule is a package or module
var fromResolution = Resolve(fromModule);
var isFromPackage = fromResolution.IsPackage;
// For relative imports from a module, we start from its parent package
var effectiveLevel = isFromPackage ? relativeLevel : relativeLevel;
var baseParts = fromParts.Length;
// Adjust for non-package modules
if (!isFromPackage && baseParts > 0)
{
baseParts--;
}
if (effectiveLevel > baseParts)
{
return PythonModuleResolution.NotFound($"relative import beyond top-level package");
}
var basePackageParts = fromParts[..^effectiveLevel];
var basePackage = string.Join('.', basePackageParts);
// Build the absolute module name
var absoluteName = string.IsNullOrEmpty(moduleName)
? basePackage
: string.IsNullOrEmpty(basePackage)
? moduleName
: $"{basePackage}.{moduleName}";
return Resolve(absoluteName);
}
/// <summary>
/// Checks if a module is a standard library module.
/// </summary>
public static bool IsStandardLibraryModule(string moduleName)
{
var topLevel = moduleName.Split('.')[0];
return StandardLibraryModules.Contains(topLevel) || BuiltinModules.Contains(topLevel);
}
/// <summary>
/// Checks if a module is a built-in module.
/// </summary>
public static bool IsBuiltinModule(string moduleName)
{
return BuiltinModules.Contains(moduleName);
}
private void BuildSearchPaths()
{
var priority = 0;
// 1. Script directory / source tree roots
foreach (var root in _vfs.SourceTreeRoots)
{
_searchPaths.Add(new PythonSearchPath(
Path: root,
Priority: priority++,
Kind: PythonSearchPathKind.ScriptDirectory));
}
// 2. Site-packages directories
foreach (var sitePackages in _vfs.SitePackagesPaths)
{
_searchPaths.Add(new PythonSearchPath(
Path: sitePackages,
Priority: priority + 100, // Site-packages comes after script directory
Kind: PythonSearchPathKind.SitePackages));
}
// 3. Editable install locations
foreach (var editable in _vfs.EditablePaths)
{
_searchPaths.Add(new PythonSearchPath(
Path: editable,
Priority: priority + 50, // Between script dir and site-packages
Kind: PythonSearchPathKind.EditableInstall));
}
// 4. Zip archives
foreach (var archive in _vfs.ZipArchivePaths)
{
_searchPaths.Add(new PythonSearchPath(
Path: archive,
Priority: priority + 200,
Kind: PythonSearchPathKind.ZipArchive));
}
}
private async Task ProcessPthFilesAsync(CancellationToken cancellationToken)
{
// Find all .pth files in site-packages
var pthFiles = _vfs.EnumerateFiles(string.Empty, "*.pth")
.Where(f => f.Source == PythonFileSource.SitePackages);
foreach (var pthFile in pthFiles)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
var lines = await ReadPthFileAsync(pthFile, cancellationToken).ConfigureAwait(false);
ProcessPthFileLines(lines, pthFile.VirtualPath);
}
catch (IOException)
{
// Skip unreadable files
}
}
}
private async Task<string[]> ReadPthFileAsync(PythonVirtualFile file, CancellationToken cancellationToken)
{
if (file.IsFromArchive)
{
return Array.Empty<string>();
}
var absolutePath = Path.Combine(_rootPath, file.AbsolutePath);
if (!File.Exists(absolutePath))
{
absolutePath = file.AbsolutePath;
if (!File.Exists(absolutePath))
{
return Array.Empty<string>();
}
}
var content = await File.ReadAllTextAsync(absolutePath, cancellationToken).ConfigureAwait(false);
return content.Split('\n', StringSplitOptions.RemoveEmptyEntries);
}
private void ProcessPthFileLines(string[] lines, string pthFilePath)
{
var pthDirectory = Path.GetDirectoryName(pthFilePath) ?? string.Empty;
var priority = 150; // Between editable installs and site-packages
foreach (var rawLine in lines)
{
var line = rawLine.Trim();
// Skip empty lines and comments
if (string.IsNullOrEmpty(line) || line.StartsWith('#'))
{
continue;
}
// Lines starting with "import " are executed but we skip them for static analysis
if (line.StartsWith("import ", StringComparison.Ordinal))
{
continue;
}
// The line is a path to add
var path = line;
// If relative, resolve relative to the .pth file's directory
if (!Path.IsPathRooted(path))
{
path = Path.Combine(pthDirectory, path);
}
_searchPaths.Add(new PythonSearchPath(
Path: path.Replace('\\', '/'),
Priority: priority++,
Kind: PythonSearchPathKind.PthFile,
FromPthFile: pthFilePath));
}
}
private PythonModuleResolution ResolveFromSearchPaths(string moduleName, List<string>? trace)
{
var parts = moduleName.Split('.');
var namespacePaths = new List<string>();
foreach (var searchPath in _searchPaths)
{
trace?.Add($"Searching in {searchPath.Kind}: {searchPath.Path}");
// Try to resolve the full module path
var resolution = TryResolveInPath(moduleName, parts, searchPath, trace);
if (resolution is not null)
{
if (resolution.Kind == PythonResolutionKind.NamespacePackage)
{
// Collect namespace package paths
if (resolution.VirtualPath is not null)
{
namespacePaths.Add(resolution.VirtualPath);
}
}
else
{
return resolution;
}
}
}
// If we found namespace package contributions, return a namespace package resolution
if (namespacePaths.Count > 0)
{
return new PythonModuleResolution(
ModuleName: moduleName,
Kind: PythonResolutionKind.NamespacePackage,
VirtualPath: namespacePaths[0],
AbsolutePath: null,
SearchPath: _searchPaths[0].Path,
Source: PythonFileSource.SitePackages,
Confidence: PythonResolutionConfidence.High,
NamespacePaths: namespacePaths,
ResolverTrace: trace);
}
return PythonModuleResolution.NotFound(moduleName, trace);
}
private PythonModuleResolution? TryResolveInPath(
string moduleName,
string[] parts,
PythonSearchPath searchPath,
List<string>? trace)
{
var basePath = searchPath.Path;
var currentPath = basePath;
// Walk through the module path parts
for (var i = 0; i < parts.Length; i++)
{
var part = parts[i];
var isLast = i == parts.Length - 1;
if (isLast)
{
// This is the final component - could be a module or package
return TryResolveFinalComponent(moduleName, currentPath, part, searchPath, trace);
}
else
{
// This is an intermediate package
var packagePath = $"{currentPath}/{part}";
// Check for regular package
var initPath = $"{packagePath}/__init__.py";
if (_vfs.FileExists(initPath))
{
currentPath = packagePath;
continue;
}
// Check for namespace package (PEP 420) - directory exists but no __init__.py
if (DirectoryExists(packagePath))
{
currentPath = packagePath;
continue;
}
// Package not found
trace?.Add($" Package not found: {packagePath}");
return null;
}
}
return null;
}
private PythonModuleResolution? TryResolveFinalComponent(
string moduleName,
string currentPath,
string name,
PythonSearchPath searchPath,
List<string>? trace)
{
// 1. Try as a module: name.py
var modulePath = $"{currentPath}/{name}.py";
if (_vfs.FileExists(modulePath))
{
trace?.Add($" Found module: {modulePath}");
var file = _vfs.GetFile(modulePath);
return new PythonModuleResolution(
ModuleName: moduleName,
Kind: PythonResolutionKind.SourceModule,
VirtualPath: modulePath,
AbsolutePath: file?.AbsolutePath,
SearchPath: searchPath.Path,
Source: file?.Source ?? PythonFileSource.Unknown,
Confidence: PythonResolutionConfidence.Definitive,
ResolverTrace: trace);
}
// 2. Try as a bytecode module: name.pyc
var bytecodePath = $"{currentPath}/{name}.pyc";
if (_vfs.FileExists(bytecodePath))
{
trace?.Add($" Found bytecode: {bytecodePath}");
var file = _vfs.GetFile(bytecodePath);
return new PythonModuleResolution(
ModuleName: moduleName,
Kind: PythonResolutionKind.BytecodeModule,
VirtualPath: bytecodePath,
AbsolutePath: file?.AbsolutePath,
SearchPath: searchPath.Path,
Source: file?.Source ?? PythonFileSource.Unknown,
Confidence: PythonResolutionConfidence.High,
ResolverTrace: trace);
}
// 3. Try as an extension module: name.so, name.pyd
var soPath = $"{currentPath}/{name}.so";
var pydPath = $"{currentPath}/{name}.pyd";
if (_vfs.FileExists(soPath) || _vfs.FileExists(pydPath))
{
var extPath = _vfs.FileExists(soPath) ? soPath : pydPath;
trace?.Add($" Found extension: {extPath}");
var file = _vfs.GetFile(extPath);
return new PythonModuleResolution(
ModuleName: moduleName,
Kind: PythonResolutionKind.ExtensionModule,
VirtualPath: extPath,
AbsolutePath: file?.AbsolutePath,
SearchPath: searchPath.Path,
Source: file?.Source ?? PythonFileSource.Unknown,
Confidence: PythonResolutionConfidence.Definitive,
ResolverTrace: trace);
}
// 4. Try as a regular package: name/__init__.py
var packagePath = $"{currentPath}/{name}";
var packageInitPath = $"{packagePath}/__init__.py";
if (_vfs.FileExists(packageInitPath))
{
trace?.Add($" Found package: {packageInitPath}");
var file = _vfs.GetFile(packageInitPath);
return new PythonModuleResolution(
ModuleName: moduleName,
Kind: PythonResolutionKind.Package,
VirtualPath: packageInitPath,
AbsolutePath: file?.AbsolutePath,
SearchPath: searchPath.Path,
Source: file?.Source ?? PythonFileSource.Unknown,
Confidence: PythonResolutionConfidence.Definitive,
ResolverTrace: trace);
}
// 5. Try as a namespace package (PEP 420): directory exists but no __init__.py
if (DirectoryExists(packagePath))
{
trace?.Add($" Found namespace package: {packagePath}");
return new PythonModuleResolution(
ModuleName: moduleName,
Kind: PythonResolutionKind.NamespacePackage,
VirtualPath: packagePath,
AbsolutePath: null,
SearchPath: searchPath.Path,
Source: PythonFileSource.Unknown,
Confidence: PythonResolutionConfidence.High,
ResolverTrace: trace);
}
trace?.Add($" Not found in {currentPath}/{name}");
return null;
}
private bool DirectoryExists(string virtualPath)
{
// Check if any file exists under this path
return _vfs.EnumerateFiles(virtualPath, "*").Any();
}
/// <summary>
/// Clears the resolution cache.
/// </summary>
public void ClearCache()
{
_cache.Clear();
}
/// <summary>
/// Gets resolution statistics.
/// </summary>
public (int Total, int Resolved, int NotFound, int Cached) GetStatistics()
{
var total = _cache.Count;
var resolved = _cache.Values.Count(r => r.IsResolved);
var notFound = total - resolved;
return (total, resolved, notFound, _cache.Count);
}
}

View File

@@ -0,0 +1,67 @@
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
/// <summary>
/// Identifies the origin of a file in the Python virtual filesystem.
/// </summary>
internal enum PythonFileSource
{
/// <summary>
/// Unknown or unresolved source.
/// </summary>
Unknown,
/// <summary>
/// File from a site-packages directory (installed packages).
/// </summary>
SitePackages,
/// <summary>
/// File from a wheel archive (.whl).
/// </summary>
Wheel,
/// <summary>
/// File from a source distribution (.tar.gz, .zip).
/// </summary>
Sdist,
/// <summary>
/// File from a zipapp (.pyz, .pyzw).
/// </summary>
Zipapp,
/// <summary>
/// File from an editable install (development mode).
/// </summary>
Editable,
/// <summary>
/// File from a container layer.
/// </summary>
ContainerLayer,
/// <summary>
/// File from the project source tree.
/// </summary>
SourceTree,
/// <summary>
/// File from a virtual environment's bin/Scripts directory.
/// </summary>
VenvBin,
/// <summary>
/// Python configuration file (pyproject.toml, setup.py, setup.cfg).
/// </summary>
ProjectConfig,
/// <summary>
/// Lock file (requirements.txt, Pipfile.lock, poetry.lock).
/// </summary>
LockFile,
/// <summary>
/// Standard library module.
/// </summary>
StdLib
}

View File

@@ -0,0 +1,808 @@
using System.Text.Json;
using System.Text.RegularExpressions;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
/// <summary>
/// Normalizes Python project inputs by detecting layouts, version targets,
/// and building a virtual filesystem from various sources.
/// </summary>
internal sealed partial class PythonInputNormalizer
{
private static readonly EnumerationOptions SafeEnumeration = new()
{
RecurseSubdirectories = false,
IgnoreInaccessible = true,
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint
};
private readonly string _rootPath;
private readonly List<PythonVersionTarget> _versionTargets = new();
private readonly List<string> _sitePackagesPaths = new();
private readonly List<string> _wheelPaths = new();
private readonly List<string> _zipappPaths = new();
private readonly List<(string Path, string? PackageName)> _editablePaths = new();
private PythonLayoutKind _layout = PythonLayoutKind.Unknown;
private string? _venvPath;
private string? _binPath;
private string? _pythonExecutable;
public PythonInputNormalizer(string rootPath)
{
if (string.IsNullOrWhiteSpace(rootPath))
{
throw new ArgumentException("Root path is required", nameof(rootPath));
}
_rootPath = Path.GetFullPath(rootPath);
}
/// <summary>
/// Gets the detected layout kind.
/// </summary>
public PythonLayoutKind Layout => _layout;
/// <summary>
/// Gets all detected version targets.
/// </summary>
public IReadOnlyList<PythonVersionTarget> VersionTargets => _versionTargets;
/// <summary>
/// Gets the primary version target (highest confidence).
/// </summary>
public PythonVersionTarget? PrimaryVersionTarget =>
_versionTargets
.OrderByDescending(static v => v.Confidence)
.ThenBy(static v => v.Source, StringComparer.Ordinal)
.FirstOrDefault();
/// <summary>
/// Gets all detected site-packages paths.
/// </summary>
public IReadOnlyList<string> SitePackagesPaths => _sitePackagesPaths;
/// <summary>
/// Gets the virtual environment path if detected.
/// </summary>
public string? VenvPath => _venvPath;
/// <summary>
/// Gets the bin/Scripts directory path if detected.
/// </summary>
public string? BinPath => _binPath;
/// <summary>
/// Analyzes the root path to detect layout and version targets.
/// </summary>
public async Task<PythonInputNormalizer> AnalyzeAsync(CancellationToken cancellationToken = default)
{
await DetectLayoutAsync(cancellationToken).ConfigureAwait(false);
await DetectVersionTargetsAsync(cancellationToken).ConfigureAwait(false);
DetectSitePackages();
DetectWheels();
DetectZipapps();
await DetectEditablesAsync(cancellationToken).ConfigureAwait(false);
return this;
}
/// <summary>
/// Builds a virtual filesystem from the analyzed inputs.
/// </summary>
public PythonVirtualFileSystem BuildVirtualFileSystem()
{
var builder = PythonVirtualFileSystem.CreateBuilder();
// Add site-packages in order (later takes precedence)
foreach (var sitePackagesPath in _sitePackagesPaths)
{
builder.AddSitePackages(sitePackagesPath);
}
// Add wheels
foreach (var wheelPath in _wheelPaths)
{
builder.AddWheel(wheelPath);
}
// Add zipapps
foreach (var zipappPath in _zipappPaths)
{
builder.AddZipapp(zipappPath);
}
// Add editable installs
foreach (var (path, packageName) in _editablePaths)
{
builder.AddEditable(path, packageName);
}
// Add bin directory
if (!string.IsNullOrEmpty(_binPath) && Directory.Exists(_binPath))
{
builder.AddVenvBin(_binPath);
}
return builder.Build();
}
private async Task DetectLayoutAsync(CancellationToken cancellationToken)
{
// Check for venv/virtualenv markers
var pyvenvCfg = Path.Combine(_rootPath, "pyvenv.cfg");
if (File.Exists(pyvenvCfg))
{
_layout = PythonLayoutKind.Virtualenv;
_venvPath = _rootPath;
DetectBinDirectory();
await ParsePyvenvCfgAsync(pyvenvCfg, cancellationToken).ConfigureAwait(false);
return;
}
// Check for venv subdirectory
foreach (var venvName in new[] { "venv", ".venv", "env", ".env" })
{
var venvDir = Path.Combine(_rootPath, venvName);
var venvCfg = Path.Combine(venvDir, "pyvenv.cfg");
if (File.Exists(venvCfg))
{
_layout = PythonLayoutKind.Virtualenv;
_venvPath = venvDir;
DetectBinDirectory();
await ParsePyvenvCfgAsync(venvCfg, cancellationToken).ConfigureAwait(false);
return;
}
}
// Check for Poetry
if (File.Exists(Path.Combine(_rootPath, "poetry.lock")))
{
_layout = PythonLayoutKind.Poetry;
return;
}
// Check for Pipenv
if (File.Exists(Path.Combine(_rootPath, "Pipfile.lock")))
{
_layout = PythonLayoutKind.Pipenv;
return;
}
// Check for Conda
var condaMeta = Path.Combine(_rootPath, "conda-meta");
if (Directory.Exists(condaMeta))
{
_layout = PythonLayoutKind.Conda;
return;
}
// Check for Lambda
if (File.Exists(Path.Combine(_rootPath, "lambda_function.py")) ||
(File.Exists(Path.Combine(_rootPath, "requirements.txt")) &&
Directory.Exists(Path.Combine(_rootPath, "python"))))
{
_layout = PythonLayoutKind.Lambda;
return;
}
// Check for Azure Functions
if (File.Exists(Path.Combine(_rootPath, "function.json")) ||
File.Exists(Path.Combine(_rootPath, "host.json")))
{
_layout = PythonLayoutKind.AzureFunction;
return;
}
// Check for container markers
if (File.Exists(Path.Combine(_rootPath, "Dockerfile")) ||
Directory.Exists(Path.Combine(_rootPath, "usr", "local", "lib")))
{
_layout = PythonLayoutKind.Container;
return;
}
// Check for system Python installation
var libDir = Path.Combine(_rootPath, "lib");
if (Directory.Exists(libDir))
{
try
{
foreach (var dir in Directory.EnumerateDirectories(libDir, "python*", SafeEnumeration))
{
if (Directory.Exists(Path.Combine(dir, "site-packages")))
{
_layout = PythonLayoutKind.System;
return;
}
}
}
catch (UnauthorizedAccessException)
{
// Ignore inaccessible directories
}
}
_layout = PythonLayoutKind.Unknown;
}
private void DetectBinDirectory()
{
if (string.IsNullOrEmpty(_venvPath))
{
return;
}
// Windows uses Scripts, Unix uses bin
var scriptsDir = Path.Combine(_venvPath, "Scripts");
if (Directory.Exists(scriptsDir))
{
_binPath = scriptsDir;
_pythonExecutable = Path.Combine(scriptsDir, "python.exe");
return;
}
var binDir = Path.Combine(_venvPath, "bin");
if (Directory.Exists(binDir))
{
_binPath = binDir;
_pythonExecutable = Path.Combine(binDir, "python");
}
}
private async Task DetectVersionTargetsAsync(CancellationToken cancellationToken)
{
await ParsePyprojectTomlAsync(cancellationToken).ConfigureAwait(false);
await ParseSetupPyAsync(cancellationToken).ConfigureAwait(false);
await ParseSetupCfgAsync(cancellationToken).ConfigureAwait(false);
await ParseRuntimeTxtAsync(cancellationToken).ConfigureAwait(false);
await ParseDockerfileAsync(cancellationToken).ConfigureAwait(false);
await ParseToxIniAsync(cancellationToken).ConfigureAwait(false);
DetectVersionFromSitePackages();
}
private async Task ParsePyvenvCfgAsync(string path, CancellationToken cancellationToken)
{
try
{
var content = await File.ReadAllTextAsync(path, cancellationToken).ConfigureAwait(false);
foreach (var line in content.Split('\n'))
{
var trimmed = line.Trim();
if (trimmed.StartsWith("version", StringComparison.OrdinalIgnoreCase))
{
var parts = trimmed.Split('=', 2);
if (parts.Length == 2)
{
var version = parts[1].Trim();
if (!string.IsNullOrEmpty(version))
{
_versionTargets.Add(new PythonVersionTarget(
version,
"pyvenv.cfg",
PythonVersionConfidence.Definitive));
}
}
}
}
}
catch (IOException)
{
// Ignore read errors
}
}
private async Task ParsePyprojectTomlAsync(CancellationToken cancellationToken)
{
var path = Path.Combine(_rootPath, "pyproject.toml");
if (!File.Exists(path))
{
return;
}
try
{
var content = await File.ReadAllTextAsync(path, cancellationToken).ConfigureAwait(false);
// Look for requires-python in [project] section
var requiresPythonMatch = RequiresPythonPattern().Match(content);
if (requiresPythonMatch.Success)
{
var version = requiresPythonMatch.Groups["version"].Value.Trim().Trim('"', '\'');
if (!string.IsNullOrEmpty(version))
{
var isMinimum = version.StartsWith(">=", StringComparison.Ordinal) ||
version.StartsWith(">", StringComparison.Ordinal);
version = Regex.Replace(version, @"^[><=!~]+", string.Empty).Trim();
_versionTargets.Add(new PythonVersionTarget(
version,
"pyproject.toml",
PythonVersionConfidence.High,
isMinimum));
}
}
// Look for python_requires in [tool.poetry] or similar
var pythonMatch = PythonVersionTomlPattern().Match(content);
if (pythonMatch.Success)
{
var version = pythonMatch.Groups["version"].Value.Trim().Trim('"', '\'');
if (!string.IsNullOrEmpty(version))
{
var isMinimum = version.StartsWith("^", StringComparison.Ordinal) ||
version.StartsWith(">=", StringComparison.Ordinal);
version = Regex.Replace(version, @"^[\^><=!~]+", string.Empty).Trim();
_versionTargets.Add(new PythonVersionTarget(
version,
"pyproject.toml",
PythonVersionConfidence.High,
isMinimum));
}
}
}
catch (IOException)
{
// Ignore read errors
}
}
private async Task ParseSetupPyAsync(CancellationToken cancellationToken)
{
var path = Path.Combine(_rootPath, "setup.py");
if (!File.Exists(path))
{
return;
}
try
{
var content = await File.ReadAllTextAsync(path, cancellationToken).ConfigureAwait(false);
var match = PythonRequiresPattern().Match(content);
if (match.Success)
{
var version = match.Groups["version"].Value.Trim().Trim('"', '\'');
if (!string.IsNullOrEmpty(version))
{
var isMinimum = version.StartsWith(">=", StringComparison.Ordinal);
version = Regex.Replace(version, @"^[><=!~]+", string.Empty).Trim();
_versionTargets.Add(new PythonVersionTarget(
version,
"setup.py",
PythonVersionConfidence.High,
isMinimum));
}
}
}
catch (IOException)
{
// Ignore read errors
}
}
private async Task ParseSetupCfgAsync(CancellationToken cancellationToken)
{
var path = Path.Combine(_rootPath, "setup.cfg");
if (!File.Exists(path))
{
return;
}
try
{
var content = await File.ReadAllTextAsync(path, cancellationToken).ConfigureAwait(false);
var match = PythonRequiresCfgPattern().Match(content);
if (match.Success)
{
var version = match.Groups["version"].Value.Trim();
if (!string.IsNullOrEmpty(version))
{
var isMinimum = version.StartsWith(">=", StringComparison.Ordinal);
version = Regex.Replace(version, @"^[><=!~]+", string.Empty).Trim();
_versionTargets.Add(new PythonVersionTarget(
version,
"setup.cfg",
PythonVersionConfidence.High,
isMinimum));
}
}
}
catch (IOException)
{
// Ignore read errors
}
}
private async Task ParseRuntimeTxtAsync(CancellationToken cancellationToken)
{
var path = Path.Combine(_rootPath, "runtime.txt");
if (!File.Exists(path))
{
return;
}
try
{
var content = await File.ReadAllTextAsync(path, cancellationToken).ConfigureAwait(false);
var trimmed = content.Trim();
// Heroku format: python-3.11.4
var match = RuntimeTxtPattern().Match(trimmed);
if (match.Success)
{
var version = match.Groups["version"].Value;
_versionTargets.Add(new PythonVersionTarget(
version,
"runtime.txt",
PythonVersionConfidence.Definitive));
}
}
catch (IOException)
{
// Ignore read errors
}
}
private async Task ParseDockerfileAsync(CancellationToken cancellationToken)
{
var path = Path.Combine(_rootPath, "Dockerfile");
if (!File.Exists(path))
{
return;
}
try
{
var content = await File.ReadAllTextAsync(path, cancellationToken).ConfigureAwait(false);
// Look for FROM python:X.Y or FROM python:X.Y.Z
var fromMatch = DockerFromPythonPattern().Match(content);
if (fromMatch.Success)
{
var version = fromMatch.Groups["version"].Value;
_versionTargets.Add(new PythonVersionTarget(
version,
"Dockerfile",
PythonVersionConfidence.High));
return;
}
// Look for ENV PYTHON_VERSION=X.Y.Z
var envMatch = DockerEnvPythonPattern().Match(content);
if (envMatch.Success)
{
var version = envMatch.Groups["version"].Value;
_versionTargets.Add(new PythonVersionTarget(
version,
"Dockerfile",
PythonVersionConfidence.Medium));
}
}
catch (IOException)
{
// Ignore read errors
}
}
private async Task ParseToxIniAsync(CancellationToken cancellationToken)
{
var path = Path.Combine(_rootPath, "tox.ini");
if (!File.Exists(path))
{
return;
}
try
{
var content = await File.ReadAllTextAsync(path, cancellationToken).ConfigureAwait(false);
var match = ToxEnvListPattern().Match(content);
if (match.Success)
{
var envList = match.Groups["envs"].Value;
var pyMatches = ToxPythonEnvPattern().Matches(envList);
foreach (Match pyMatch in pyMatches)
{
var version = pyMatch.Groups["version"].Value;
if (version.Length >= 2)
{
// Convert py311 to 3.11
var formatted = $"{version[0]}.{version[1..]}";
_versionTargets.Add(new PythonVersionTarget(
formatted,
"tox.ini",
PythonVersionConfidence.Medium));
}
}
}
}
catch (IOException)
{
// Ignore read errors
}
}
private void DetectVersionFromSitePackages()
{
// Look for python version in lib directory names
var searchPaths = new List<string>();
if (!string.IsNullOrEmpty(_venvPath))
{
searchPaths.Add(Path.Combine(_venvPath, "lib"));
}
searchPaths.Add(Path.Combine(_rootPath, "lib"));
searchPaths.Add(Path.Combine(_rootPath, "usr", "local", "lib"));
searchPaths.Add(Path.Combine(_rootPath, "usr", "lib"));
foreach (var libPath in searchPaths)
{
if (!Directory.Exists(libPath))
{
continue;
}
try
{
foreach (var dir in Directory.EnumerateDirectories(libPath, "python*", SafeEnumeration))
{
var dirName = Path.GetFileName(dir);
var match = LibPythonDirPattern().Match(dirName);
if (match.Success)
{
var version = match.Groups["version"].Value;
_versionTargets.Add(new PythonVersionTarget(
version,
$"lib/{dirName}",
PythonVersionConfidence.Medium));
}
}
}
catch (UnauthorizedAccessException)
{
// Ignore inaccessible directories
}
}
}
private void DetectSitePackages()
{
var searchPaths = new List<string>();
// Virtualenv site-packages
if (!string.IsNullOrEmpty(_venvPath))
{
// Windows
searchPaths.Add(Path.Combine(_venvPath, "Lib", "site-packages"));
// Unix - need to find pythonX.Y directory
var libDir = Path.Combine(_venvPath, "lib");
if (Directory.Exists(libDir))
{
try
{
foreach (var pythonDir in Directory.EnumerateDirectories(libDir, "python*", SafeEnumeration))
{
searchPaths.Add(Path.Combine(pythonDir, "site-packages"));
}
}
catch (UnauthorizedAccessException)
{
// Ignore
}
}
}
// Lambda layer paths
if (_layout == PythonLayoutKind.Lambda)
{
searchPaths.Add(Path.Combine(_rootPath, "python", "lib", "python3.9", "site-packages"));
searchPaths.Add(Path.Combine(_rootPath, "python", "lib", "python3.10", "site-packages"));
searchPaths.Add(Path.Combine(_rootPath, "python", "lib", "python3.11", "site-packages"));
searchPaths.Add(Path.Combine(_rootPath, "python", "lib", "python3.12", "site-packages"));
searchPaths.Add(Path.Combine(_rootPath, "python"));
}
// Container paths
searchPaths.Add(Path.Combine(_rootPath, "usr", "local", "lib", "python3.9", "site-packages"));
searchPaths.Add(Path.Combine(_rootPath, "usr", "local", "lib", "python3.10", "site-packages"));
searchPaths.Add(Path.Combine(_rootPath, "usr", "local", "lib", "python3.11", "site-packages"));
searchPaths.Add(Path.Combine(_rootPath, "usr", "local", "lib", "python3.12", "site-packages"));
searchPaths.Add(Path.Combine(_rootPath, "usr", "lib", "python3", "dist-packages"));
// Root site-packages (common for some Docker images)
searchPaths.Add(Path.Combine(_rootPath, "site-packages"));
foreach (var path in searchPaths)
{
if (Directory.Exists(path) && !_sitePackagesPaths.Contains(path, StringComparer.OrdinalIgnoreCase))
{
_sitePackagesPaths.Add(path);
}
}
}
private void DetectWheels()
{
// Look for wheels in common locations
var searchPaths = new List<string>
{
Path.Combine(_rootPath, "dist"),
Path.Combine(_rootPath, "wheels"),
Path.Combine(_rootPath, ".wheels"),
_rootPath
};
foreach (var searchPath in searchPaths)
{
if (!Directory.Exists(searchPath))
{
continue;
}
try
{
foreach (var wheel in Directory.EnumerateFiles(searchPath, "*.whl", SafeEnumeration))
{
if (!_wheelPaths.Contains(wheel, StringComparer.OrdinalIgnoreCase))
{
_wheelPaths.Add(wheel);
}
}
}
catch (UnauthorizedAccessException)
{
// Ignore
}
}
}
private void DetectZipapps()
{
if (!Directory.Exists(_rootPath))
{
return;
}
try
{
foreach (var pyz in Directory.EnumerateFiles(_rootPath, "*.pyz", SafeEnumeration))
{
_zipappPaths.Add(pyz);
}
foreach (var pyzw in Directory.EnumerateFiles(_rootPath, "*.pyzw", SafeEnumeration))
{
_zipappPaths.Add(pyzw);
}
}
catch (UnauthorizedAccessException)
{
// Ignore
}
}
private async Task DetectEditablesAsync(CancellationToken cancellationToken)
{
// Look for .egg-link files in site-packages
foreach (var sitePackagesPath in _sitePackagesPaths)
{
try
{
foreach (var eggLink in Directory.EnumerateFiles(sitePackagesPath, "*.egg-link", SafeEnumeration))
{
var content = await File.ReadAllTextAsync(eggLink, cancellationToken).ConfigureAwait(false);
var lines = content.Split('\n', StringSplitOptions.RemoveEmptyEntries);
if (lines.Length > 0)
{
var editablePath = lines[0].Trim();
if (Directory.Exists(editablePath))
{
var packageName = Path.GetFileNameWithoutExtension(eggLink);
if (packageName.EndsWith(".egg-link", StringComparison.OrdinalIgnoreCase))
{
packageName = packageName[..^9];
}
_editablePaths.Add((editablePath, packageName));
}
}
}
}
catch (UnauthorizedAccessException)
{
// Ignore
}
catch (IOException)
{
// Ignore
}
}
// Look for direct_url.json with editable flag in dist-info directories
foreach (var sitePackagesPath in _sitePackagesPaths)
{
try
{
foreach (var distInfo in Directory.EnumerateDirectories(sitePackagesPath, "*.dist-info", SafeEnumeration))
{
var directUrlPath = Path.Combine(distInfo, "direct_url.json");
if (!File.Exists(directUrlPath))
{
continue;
}
try
{
var content = await File.ReadAllTextAsync(directUrlPath, cancellationToken).ConfigureAwait(false);
using var doc = JsonDocument.Parse(content);
var root = doc.RootElement;
if (root.TryGetProperty("dir_info", out var dirInfo) &&
dirInfo.TryGetProperty("editable", out var editable) &&
editable.GetBoolean() &&
root.TryGetProperty("url", out var urlElement))
{
var url = urlElement.GetString();
if (!string.IsNullOrEmpty(url) && url.StartsWith("file://", StringComparison.OrdinalIgnoreCase))
{
var editablePath = url[7..];
if (OperatingSystem.IsWindows() && editablePath.StartsWith('/'))
{
editablePath = editablePath[1..];
}
if (Directory.Exists(editablePath))
{
var distInfoName = Path.GetFileName(distInfo);
var packageName = distInfoName.Split('-')[0];
_editablePaths.Add((editablePath, packageName));
}
}
}
}
catch (JsonException)
{
// Invalid JSON - skip
}
}
}
catch (UnauthorizedAccessException)
{
// Ignore
}
}
}
[GeneratedRegex(@"requires-python\s*=\s*[""']?(?<version>[^""'\n]+)", RegexOptions.IgnoreCase)]
private static partial Regex RequiresPythonPattern();
[GeneratedRegex(@"python\s*=\s*[""'](?<version>[^""']+)[""']", RegexOptions.IgnoreCase)]
private static partial Regex PythonVersionTomlPattern();
[GeneratedRegex(@"python_requires\s*=\s*[""'](?<version>[^""']+)[""']", RegexOptions.IgnoreCase)]
private static partial Regex PythonRequiresPattern();
[GeneratedRegex(@"python_requires\s*=\s*(?<version>[^\s\n]+)", RegexOptions.IgnoreCase)]
private static partial Regex PythonRequiresCfgPattern();
[GeneratedRegex(@"^python-(?<version>\d+\.\d+(?:\.\d+)?)", RegexOptions.IgnoreCase)]
private static partial Regex RuntimeTxtPattern();
[GeneratedRegex(@"FROM\s+(?:.*\/)?python:(?<version>\d+\.\d+(?:\.\d+)?)", RegexOptions.IgnoreCase)]
private static partial Regex DockerFromPythonPattern();
[GeneratedRegex(@"ENV\s+PYTHON_VERSION\s*=?\s*(?<version>\d+\.\d+(?:\.\d+)?)", RegexOptions.IgnoreCase)]
private static partial Regex DockerEnvPythonPattern();
[GeneratedRegex(@"envlist\s*=\s*(?<envs>[^\n\[]+)", RegexOptions.IgnoreCase)]
private static partial Regex ToxEnvListPattern();
[GeneratedRegex(@"py(?<version>\d{2,3})", RegexOptions.IgnoreCase)]
private static partial Regex ToxPythonEnvPattern();
[GeneratedRegex(@"^python(?<version>\d+\.\d+)$", RegexOptions.IgnoreCase)]
private static partial Regex LibPythonDirPattern();
}

View File

@@ -0,0 +1,67 @@
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
/// <summary>
/// Identifies the type of Python installation/project layout.
/// </summary>
internal enum PythonLayoutKind
{
/// <summary>
/// Unknown or undetected layout.
/// </summary>
Unknown,
/// <summary>
/// Standard virtual environment (venv or virtualenv).
/// </summary>
Virtualenv,
/// <summary>
/// Poetry-managed project.
/// </summary>
Poetry,
/// <summary>
/// Pipenv-managed project.
/// </summary>
Pipenv,
/// <summary>
/// Conda environment.
/// </summary>
Conda,
/// <summary>
/// System-wide Python installation.
/// </summary>
System,
/// <summary>
/// Container image with Python.
/// </summary>
Container,
/// <summary>
/// AWS Lambda function.
/// </summary>
Lambda,
/// <summary>
/// Azure Functions.
/// </summary>
AzureFunction,
/// <summary>
/// Google Cloud Function.
/// </summary>
CloudFunction,
/// <summary>
/// Pipx-managed application.
/// </summary>
Pipx,
/// <summary>
/// PyInstaller or similar frozen application.
/// </summary>
Frozen
}

View File

@@ -0,0 +1,122 @@
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
/// <summary>
/// Result of Python project analysis containing layout detection,
/// version targets, and the virtual filesystem.
/// </summary>
internal sealed class PythonProjectAnalysis
{
private PythonProjectAnalysis(
PythonLayoutKind layout,
IReadOnlyList<PythonVersionTarget> versionTargets,
IReadOnlyList<string> sitePackagesPaths,
string? venvPath,
string? binPath,
PythonVirtualFileSystem virtualFileSystem)
{
Layout = layout;
VersionTargets = versionTargets;
SitePackagesPaths = sitePackagesPaths;
VenvPath = venvPath;
BinPath = binPath;
VirtualFileSystem = virtualFileSystem;
}
/// <summary>
/// Gets the detected layout kind.
/// </summary>
public PythonLayoutKind Layout { get; }
/// <summary>
/// Gets all detected version targets.
/// </summary>
public IReadOnlyList<PythonVersionTarget> VersionTargets { get; }
/// <summary>
/// Gets the primary version target (highest confidence).
/// </summary>
public PythonVersionTarget? PrimaryVersionTarget =>
VersionTargets
.OrderByDescending(static v => v.Confidence)
.ThenBy(static v => v.Source, StringComparer.Ordinal)
.FirstOrDefault();
/// <summary>
/// Gets all detected site-packages paths.
/// </summary>
public IReadOnlyList<string> SitePackagesPaths { get; }
/// <summary>
/// Gets the virtual environment path if detected.
/// </summary>
public string? VenvPath { get; }
/// <summary>
/// Gets the bin/Scripts directory path if detected.
/// </summary>
public string? BinPath { get; }
/// <summary>
/// Gets the virtual filesystem built from all detected sources.
/// </summary>
public PythonVirtualFileSystem VirtualFileSystem { get; }
/// <summary>
/// Analyzes a Python project and builds the virtual filesystem.
/// </summary>
public static async Task<PythonProjectAnalysis> AnalyzeAsync(
string rootPath,
CancellationToken cancellationToken = default)
{
var normalizer = new PythonInputNormalizer(rootPath);
await normalizer.AnalyzeAsync(cancellationToken).ConfigureAwait(false);
var vfs = normalizer.BuildVirtualFileSystem();
return new PythonProjectAnalysis(
normalizer.Layout,
normalizer.VersionTargets.ToList(),
normalizer.SitePackagesPaths.ToList(),
normalizer.VenvPath,
normalizer.BinPath,
vfs);
}
/// <summary>
/// Generates metadata entries for the analysis result.
/// </summary>
public IEnumerable<KeyValuePair<string, string?>> ToMetadata()
{
yield return new KeyValuePair<string, string?>("layout", Layout.ToString());
if (PrimaryVersionTarget is not null)
{
yield return new KeyValuePair<string, string?>("pythonVersion", PrimaryVersionTarget.Version);
yield return new KeyValuePair<string, string?>("pythonVersionSource", PrimaryVersionTarget.Source);
yield return new KeyValuePair<string, string?>("pythonVersionConfidence", PrimaryVersionTarget.Confidence.ToString());
if (PrimaryVersionTarget.IsMinimum)
{
yield return new KeyValuePair<string, string?>("pythonVersionIsMinimum", "true");
}
}
if (VersionTargets.Count > 1)
{
var versions = string.Join(';', VersionTargets.Select(static v => $"{v.Version}@{v.Source}"));
yield return new KeyValuePair<string, string?>("pythonVersionsDetected", versions);
}
if (!string.IsNullOrEmpty(VenvPath))
{
yield return new KeyValuePair<string, string?>("venvPath", VenvPath);
}
if (SitePackagesPaths.Count > 0)
{
yield return new KeyValuePair<string, string?>("sitePackagesCount", SitePackagesPaths.Count.ToString());
}
yield return new KeyValuePair<string, string?>("vfsFileCount", VirtualFileSystem.FileCount.ToString());
}
}

View File

@@ -0,0 +1,71 @@
using System.Text.RegularExpressions;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
/// <summary>
/// Represents a detected Python version target.
/// </summary>
/// <param name="Version">The detected version string (e.g., "3.11", "3.12.1").</param>
/// <param name="Source">The source file where the version was detected.</param>
/// <param name="Confidence">Confidence level of the detection.</param>
/// <param name="IsMinimum">True if this is a minimum version requirement.</param>
internal sealed record PythonVersionTarget(
string Version,
string Source,
PythonVersionConfidence Confidence,
bool IsMinimum = false)
{
/// <summary>
/// Gets the major version number.
/// </summary>
public int? Major => TryParsePart(0);
/// <summary>
/// Gets the minor version number.
/// </summary>
public int? Minor => TryParsePart(1);
/// <summary>
/// Gets the patch version number.
/// </summary>
public int? Patch => TryParsePart(2);
private int? TryParsePart(int index)
{
var parts = Version.Split('.');
if (index >= parts.Length)
{
return null;
}
// Handle versions like "3.11+" or ">=3.10"
var part = Regex.Replace(parts[index], @"[^\d]", string.Empty);
return int.TryParse(part, out var value) ? value : null;
}
}
/// <summary>
/// Confidence level for Python version detection.
/// </summary>
internal enum PythonVersionConfidence
{
/// <summary>
/// Low confidence - inferred from heuristics.
/// </summary>
Low,
/// <summary>
/// Medium confidence - from configuration files.
/// </summary>
Medium,
/// <summary>
/// High confidence - explicit declaration.
/// </summary>
High,
/// <summary>
/// Definitive - from runtime detection.
/// </summary>
Definitive
}

View File

@@ -0,0 +1,62 @@
using System.Collections.Frozen;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
/// <summary>
/// Represents a file in the Python virtual filesystem.
/// </summary>
/// <param name="VirtualPath">Normalized virtual path with forward slashes, relative to the VFS root.</param>
/// <param name="AbsolutePath">Absolute filesystem path or archive entry path.</param>
/// <param name="Source">Origin of the file.</param>
/// <param name="LayerDigest">Container layer digest if from a container layer.</param>
/// <param name="ArchivePath">Path to the containing archive if from wheel/sdist/zipapp.</param>
/// <param name="Size">File size in bytes, if known.</param>
/// <param name="Sha256">SHA-256 hash of the file content, if computed.</param>
/// <param name="Metadata">Additional metadata key-value pairs.</param>
internal sealed record PythonVirtualFile(
string VirtualPath,
string AbsolutePath,
PythonFileSource Source,
string? LayerDigest = null,
string? ArchivePath = null,
long? Size = null,
string? Sha256 = null,
FrozenDictionary<string, string>? Metadata = null)
{
/// <summary>
/// Gets the file extension in lowercase without the leading dot.
/// </summary>
public string Extension
{
get
{
var ext = Path.GetExtension(VirtualPath);
return string.IsNullOrEmpty(ext) ? string.Empty : ext[1..].ToLowerInvariant();
}
}
/// <summary>
/// Gets the file name without path.
/// </summary>
public string FileName => Path.GetFileName(VirtualPath);
/// <summary>
/// Returns true if this is a Python source file.
/// </summary>
public bool IsPythonSource => Extension is "py" or "pyw";
/// <summary>
/// Returns true if this is a compiled Python bytecode file.
/// </summary>
public bool IsBytecode => Extension is "pyc" or "pyo";
/// <summary>
/// Returns true if this is a native extension.
/// </summary>
public bool IsNativeExtension => Extension is "so" or "pyd" or "dll";
/// <summary>
/// Returns true if this file comes from an archive.
/// </summary>
public bool IsFromArchive => !string.IsNullOrEmpty(ArchivePath);
}

View File

@@ -0,0 +1,579 @@
using System.Collections.Frozen;
using System.Diagnostics.CodeAnalysis;
using System.IO.Compression;
using System.Text.RegularExpressions;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
/// <summary>
/// A virtual filesystem that normalizes access to Python project files across
/// wheels, sdists, editable installs, zipapps, site-packages trees, and container roots.
/// </summary>
internal sealed partial class PythonVirtualFileSystem
{
private static readonly EnumerationOptions SafeEnumeration = new()
{
RecurseSubdirectories = false,
IgnoreInaccessible = true,
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint
};
private static readonly EnumerationOptions RecursiveEnumeration = new()
{
RecurseSubdirectories = true,
IgnoreInaccessible = true,
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint
};
private readonly FrozenDictionary<string, PythonVirtualFile> _files;
private readonly FrozenDictionary<string, FrozenSet<string>> _directories;
private readonly FrozenSet<string> _sourceTreeRoots;
private readonly FrozenSet<string> _sitePackagesPaths;
private readonly FrozenSet<string> _editablePaths;
private readonly FrozenSet<string> _zipArchivePaths;
private PythonVirtualFileSystem(
FrozenDictionary<string, PythonVirtualFile> files,
FrozenDictionary<string, FrozenSet<string>> directories,
FrozenSet<string> sourceTreeRoots,
FrozenSet<string> sitePackagesPaths,
FrozenSet<string> editablePaths,
FrozenSet<string> zipArchivePaths)
{
_files = files;
_directories = directories;
_sourceTreeRoots = sourceTreeRoots;
_sitePackagesPaths = sitePackagesPaths;
_editablePaths = editablePaths;
_zipArchivePaths = zipArchivePaths;
}
/// <summary>
/// Gets the total number of files in the virtual filesystem.
/// </summary>
public int FileCount => _files.Count;
/// <summary>
/// Gets all files in the virtual filesystem.
/// </summary>
public IEnumerable<PythonVirtualFile> Files => _files.Values;
/// <summary>
/// Gets all virtual paths in sorted order.
/// </summary>
public IEnumerable<string> Paths => _files.Keys.OrderBy(static p => p, StringComparer.Ordinal);
/// <summary>
/// Gets the source tree root paths.
/// </summary>
public IReadOnlySet<string> SourceTreeRoots => _sourceTreeRoots;
/// <summary>
/// Gets the site-packages directory paths.
/// </summary>
public IReadOnlySet<string> SitePackagesPaths => _sitePackagesPaths;
/// <summary>
/// Gets the editable install paths.
/// </summary>
public IReadOnlySet<string> EditablePaths => _editablePaths;
/// <summary>
/// Gets the zip archive paths (wheels, zipapps, sdists).
/// </summary>
public IReadOnlySet<string> ZipArchivePaths => _zipArchivePaths;
/// <summary>
/// Gets a file by its virtual path, or null if not found.
/// </summary>
public PythonVirtualFile? GetFile(string virtualPath)
{
var normalized = NormalizePath(virtualPath);
return _files.GetValueOrDefault(normalized);
}
/// <summary>
/// Tries to get a file by its virtual path.
/// </summary>
public bool TryGetFile(string virtualPath, [NotNullWhen(true)] out PythonVirtualFile? file)
{
var normalized = NormalizePath(virtualPath);
return _files.TryGetValue(normalized, out file);
}
/// <summary>
/// Checks if a virtual path exists as a file.
/// </summary>
public bool FileExists(string virtualPath)
{
var normalized = NormalizePath(virtualPath);
return _files.ContainsKey(normalized);
}
/// <summary>
/// Checks if a virtual path exists as a directory.
/// </summary>
public bool DirectoryExists(string virtualPath)
{
var normalized = NormalizePath(virtualPath);
if (normalized.Length == 0)
{
return true;
}
return _directories.ContainsKey(normalized);
}
/// <summary>
/// Enumerates files in a directory (non-recursive).
/// </summary>
public IEnumerable<PythonVirtualFile> EnumerateFiles(string virtualPath)
{
var normalized = NormalizePath(virtualPath);
if (!_directories.TryGetValue(normalized, out var entries))
{
yield break;
}
foreach (var entry in entries.OrderBy(static e => e, StringComparer.Ordinal))
{
var fullPath = normalized.Length == 0 ? entry : $"{normalized}/{entry}";
if (_files.TryGetValue(fullPath, out var file))
{
yield return file;
}
}
}
/// <summary>
/// Enumerates all files matching a glob pattern.
/// </summary>
public IEnumerable<PythonVirtualFile> EnumerateFiles(string virtualPath, string pattern)
{
var regex = GlobToRegex(pattern);
var normalized = NormalizePath(virtualPath);
var prefix = normalized.Length == 0 ? string.Empty : normalized + "/";
foreach (var kvp in _files)
{
if (!kvp.Key.StartsWith(prefix, StringComparison.Ordinal))
{
continue;
}
var relative = kvp.Key[prefix.Length..];
if (regex.IsMatch(relative))
{
yield return kvp.Value;
}
}
}
/// <summary>
/// Gets all files from a specific source.
/// </summary>
public IEnumerable<PythonVirtualFile> GetFilesBySource(PythonFileSource source)
{
return _files.Values
.Where(f => f.Source == source)
.OrderBy(static f => f.VirtualPath, StringComparer.Ordinal);
}
/// <summary>
/// Creates a new builder for constructing a virtual filesystem.
/// </summary>
public static Builder CreateBuilder() => new();
private static string NormalizePath(string path)
{
if (string.IsNullOrWhiteSpace(path))
{
return string.Empty;
}
var normalized = path.Replace('\\', '/').Trim('/');
return normalized == "." ? string.Empty : normalized;
}
[GeneratedRegex(@"^[a-zA-Z0-9_.\-/]+$")]
private static partial Regex SafePathPattern();
private static Regex GlobToRegex(string pattern)
{
var escaped = Regex.Escape(pattern)
.Replace(@"\*\*", ".*")
.Replace(@"\*", "[^/]*")
.Replace(@"\?", "[^/]");
return new Regex($"^{escaped}$", RegexOptions.Compiled | RegexOptions.IgnoreCase);
}
/// <summary>
/// Builder for constructing a <see cref="PythonVirtualFileSystem"/>.
/// </summary>
internal sealed class Builder
{
private readonly Dictionary<string, PythonVirtualFile> _files = new(StringComparer.Ordinal);
private readonly HashSet<string> _processedArchives = new(StringComparer.OrdinalIgnoreCase);
private readonly HashSet<string> _sourceTreeRoots = new(StringComparer.Ordinal);
private readonly HashSet<string> _sitePackagesPaths = new(StringComparer.Ordinal);
private readonly HashSet<string> _editablePaths = new(StringComparer.Ordinal);
private readonly HashSet<string> _zipArchivePaths = new(StringComparer.Ordinal);
/// <summary>
/// Adds files from a site-packages directory.
/// </summary>
public Builder AddSitePackages(string sitePackagesPath, string? layerDigest = null)
{
if (!Directory.Exists(sitePackagesPath))
{
return this;
}
var basePath = Path.GetFullPath(sitePackagesPath);
_sitePackagesPaths.Add(string.Empty); // Root of the VFS
AddDirectoryRecursive(basePath, string.Empty, PythonFileSource.SitePackages, layerDigest);
return this;
}
/// <summary>
/// Adds files from a wheel archive (.whl).
/// </summary>
public Builder AddWheel(string wheelPath)
{
if (!File.Exists(wheelPath) || !_processedArchives.Add(wheelPath))
{
return this;
}
_zipArchivePaths.Add(wheelPath);
try
{
using var archive = ZipFile.OpenRead(wheelPath);
AddArchiveEntries(archive, wheelPath, PythonFileSource.Wheel);
}
catch (InvalidDataException)
{
// Corrupted archive - skip
}
catch (IOException)
{
// IO error - skip
}
return this;
}
/// <summary>
/// Adds files from a zipapp (.pyz, .pyzw).
/// </summary>
public Builder AddZipapp(string zipappPath)
{
if (!File.Exists(zipappPath) || !_processedArchives.Add(zipappPath))
{
return this;
}
_zipArchivePaths.Add(zipappPath);
try
{
using var stream = File.OpenRead(zipappPath);
// Zipapps start with a shebang line, need to find ZIP signature
var offset = FindZipOffset(stream);
if (offset < 0)
{
return this;
}
stream.Position = offset;
using var archive = new ZipArchive(stream, ZipArchiveMode.Read);
AddArchiveEntries(archive, zipappPath, PythonFileSource.Zipapp);
}
catch (InvalidDataException)
{
// Corrupted archive - skip
}
catch (IOException)
{
// IO error - skip
}
return this;
}
/// <summary>
/// Adds files from a source distribution archive (.tar.gz, .zip).
/// </summary>
public Builder AddSdist(string sdistPath)
{
if (!File.Exists(sdistPath) || !_processedArchives.Add(sdistPath))
{
return this;
}
_zipArchivePaths.Add(sdistPath);
try
{
if (sdistPath.EndsWith(".zip", StringComparison.OrdinalIgnoreCase))
{
using var archive = ZipFile.OpenRead(sdistPath);
AddArchiveEntries(archive, sdistPath, PythonFileSource.Sdist);
}
// Note: .tar.gz support would require TarReader from System.Formats.Tar
// For now, we handle the common .zip case
}
catch (InvalidDataException)
{
// Corrupted archive - skip
}
catch (IOException)
{
// IO error - skip
}
return this;
}
/// <summary>
/// Adds files from an editable install path.
/// </summary>
public Builder AddEditable(string editablePath, string? packageName = null)
{
if (!Directory.Exists(editablePath))
{
return this;
}
var basePath = Path.GetFullPath(editablePath);
var prefix = string.IsNullOrEmpty(packageName) ? string.Empty : packageName + "/";
_editablePaths.Add(prefix.TrimEnd('/'));
AddDirectoryRecursive(basePath, prefix.TrimEnd('/'), PythonFileSource.Editable, layerDigest: null);
return this;
}
/// <summary>
/// Adds files from a source tree.
/// </summary>
public Builder AddSourceTree(string sourcePath)
{
if (!Directory.Exists(sourcePath))
{
return this;
}
var basePath = Path.GetFullPath(sourcePath);
_sourceTreeRoots.Add(string.Empty); // Root of the VFS
AddDirectoryRecursive(basePath, string.Empty, PythonFileSource.SourceTree, layerDigest: null);
return this;
}
/// <summary>
/// Adds files from a virtual environment's bin/Scripts directory.
/// </summary>
public Builder AddVenvBin(string binPath)
{
if (!Directory.Exists(binPath))
{
return this;
}
var basePath = Path.GetFullPath(binPath);
foreach (var file in Directory.EnumerateFiles(basePath, "*", SafeEnumeration))
{
var fileName = Path.GetFileName(file);
var virtualPath = $"bin/{fileName}";
AddFile(virtualPath, file, PythonFileSource.VenvBin, layerDigest: null, archivePath: null);
}
return this;
}
/// <summary>
/// Adds a single file with explicit parameters.
/// </summary>
public Builder AddFile(
string virtualPath,
string absolutePath,
PythonFileSource source,
string? layerDigest = null,
string? archivePath = null,
long? size = null,
string? sha256 = null,
FrozenDictionary<string, string>? metadata = null)
{
var normalized = NormalizePath(virtualPath);
if (string.IsNullOrEmpty(normalized))
{
return this;
}
var file = new PythonVirtualFile(
normalized,
absolutePath,
source,
layerDigest,
archivePath,
size,
sha256,
metadata);
// Later additions override earlier ones (layer precedence)
_files[normalized] = file;
return this;
}
/// <summary>
/// Builds the immutable virtual filesystem.
/// </summary>
public PythonVirtualFileSystem Build()
{
var files = _files.ToFrozenDictionary(StringComparer.Ordinal);
var directories = BuildDirectoryIndex(files);
return new PythonVirtualFileSystem(
files,
directories,
_sourceTreeRoots.ToFrozenSet(StringComparer.Ordinal),
_sitePackagesPaths.ToFrozenSet(StringComparer.Ordinal),
_editablePaths.ToFrozenSet(StringComparer.Ordinal),
_zipArchivePaths.ToFrozenSet(StringComparer.Ordinal));
}
private void AddDirectoryRecursive(
string basePath,
string virtualPrefix,
PythonFileSource source,
string? layerDigest)
{
try
{
foreach (var file in Directory.EnumerateFiles(basePath, "*", RecursiveEnumeration))
{
var relativePath = Path.GetRelativePath(basePath, file);
var normalizedRelative = relativePath.Replace('\\', '/');
var virtualPath = string.IsNullOrEmpty(virtualPrefix)
? normalizedRelative
: $"{virtualPrefix}/{normalizedRelative}";
// Skip __pycache__ and hidden files
if (normalizedRelative.Contains("/__pycache__/", StringComparison.Ordinal) ||
normalizedRelative.StartsWith("__pycache__/", StringComparison.Ordinal) ||
Path.GetFileName(file).StartsWith('.'))
{
continue;
}
long? size = null;
try
{
var info = new FileInfo(file);
size = info.Length;
}
catch
{
// Ignore size if we can't read it
}
AddFile(virtualPath, file, source, layerDigest, archivePath: null, size: size);
}
}
catch (UnauthorizedAccessException)
{
// Skip inaccessible directories
}
catch (DirectoryNotFoundException)
{
// Directory was removed - skip
}
}
private void AddArchiveEntries(ZipArchive archive, string archivePath, PythonFileSource source)
{
foreach (var entry in archive.Entries)
{
// Skip directories
if (string.IsNullOrEmpty(entry.Name) || entry.FullName.EndsWith('/'))
{
continue;
}
var virtualPath = entry.FullName.Replace('\\', '/');
// Skip __pycache__ in archives too
if (virtualPath.Contains("/__pycache__/", StringComparison.Ordinal) ||
virtualPath.StartsWith("__pycache__/", StringComparison.Ordinal))
{
continue;
}
AddFile(
virtualPath,
entry.FullName,
source,
layerDigest: null,
archivePath: archivePath,
size: entry.Length);
}
}
private static long FindZipOffset(Stream stream)
{
// ZIP files start with PK\x03\x04 signature
var buffer = new byte[4096];
var bytesRead = stream.Read(buffer, 0, buffer.Length);
for (var i = 0; i < bytesRead - 3; i++)
{
if (buffer[i] == 0x50 && buffer[i + 1] == 0x4B &&
buffer[i + 2] == 0x03 && buffer[i + 3] == 0x04)
{
return i;
}
}
return -1;
}
private static FrozenDictionary<string, FrozenSet<string>> BuildDirectoryIndex(
FrozenDictionary<string, PythonVirtualFile> files)
{
var directories = new Dictionary<string, HashSet<string>>(StringComparer.Ordinal)
{
[string.Empty] = new(StringComparer.Ordinal)
};
foreach (var path in files.Keys)
{
var parts = path.Split('/');
var current = string.Empty;
for (var i = 0; i < parts.Length; i++)
{
var part = parts[i];
var isLast = i == parts.Length - 1;
if (!directories.TryGetValue(current, out var entries))
{
entries = new HashSet<string>(StringComparer.Ordinal);
directories[current] = entries;
}
entries.Add(part);
if (!isLast)
{
current = current.Length == 0 ? part : $"{current}/{part}";
}
}
}
return directories.ToFrozenDictionary(
static kvp => kvp.Key,
static kvp => kvp.Value.ToFrozenSet(StringComparer.Ordinal),
StringComparer.Ordinal);
}
}
}

View File

@@ -1,64 +1,71 @@
using System.Linq;
using System.Text.Json;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal;
namespace StellaOps.Scanner.Analyzers.Lang.Python;
public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
{
private static readonly EnumerationOptions Enumeration = new()
{
RecurseSubdirectories = true,
IgnoreInaccessible = true,
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint
};
public string Id => "python";
public string DisplayName => "Python Analyzer";
public ValueTask AnalyzeAsync(LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(context);
ArgumentNullException.ThrowIfNull(writer);
return AnalyzeInternalAsync(context, writer, cancellationToken);
}
using StellaOps.Scanner.Analyzers.Lang.Python.Internal;
namespace StellaOps.Scanner.Analyzers.Lang.Python;
public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
{
private static readonly EnumerationOptions Enumeration = new()
{
RecurseSubdirectories = true,
IgnoreInaccessible = true,
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint
};
public string Id => "python";
public string DisplayName => "Python Analyzer";
public ValueTask AnalyzeAsync(LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(context);
ArgumentNullException.ThrowIfNull(writer);
return AnalyzeInternalAsync(context, writer, cancellationToken);
}
private static async ValueTask AnalyzeInternalAsync(LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken)
{
var lockData = await PythonLockFileCollector.LoadAsync(context, cancellationToken).ConfigureAwait(false);
var matchedLocks = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var hasLockEntries = lockData.Entries.Count > 0;
var distInfoDirectories = Directory
.EnumerateDirectories(context.RootPath, "*.dist-info", Enumeration)
.OrderBy(static path => path, StringComparer.Ordinal)
.ToArray();
// Detect Python runtime in container layers
var runtimeInfo = PythonContainerAdapter.DetectRuntime(context.RootPath);
// Detect environment variables (PYTHONPATH/PYTHONHOME)
var environment = await PythonEnvironmentDetector.DetectAsync(context.RootPath, cancellationToken).ConfigureAwait(false);
// Detect startup hooks (sitecustomize.py, usercustomize.py, .pth files)
var startupHooks = PythonStartupHookDetector.Detect(context.RootPath);
// Collect dist-info directories from both root and container layers
var distInfoDirectories = CollectDistInfoDirectories(context.RootPath);
foreach (var distInfoPath in distInfoDirectories)
{
cancellationToken.ThrowIfCancellationRequested();
PythonDistribution? distribution;
try
{
distribution = await PythonDistributionLoader.LoadAsync(context, distInfoPath, cancellationToken).ConfigureAwait(false);
}
catch (IOException)
{
continue;
}
catch (JsonException)
{
continue;
}
catch (UnauthorizedAccessException)
{
continue;
}
if (distribution is null)
try
{
distribution = await PythonDistributionLoader.LoadAsync(context, distInfoPath, cancellationToken).ConfigureAwait(false);
}
catch (IOException)
{
continue;
}
catch (JsonException)
{
continue;
}
catch (UnauthorizedAccessException)
{
continue;
}
if (distribution is null)
{
continue;
}
@@ -75,6 +82,19 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
metadata.Add(new KeyValuePair<string, string?>("lockMissing", "true"));
}
// Append runtime information
AppendRuntimeMetadata(metadata, runtimeInfo);
// Append environment variables (PYTHONPATH/PYTHONHOME)
AppendEnvironmentMetadata(metadata, environment);
// Append startup hooks warnings
AppendStartupHooksMetadata(metadata, startupHooks);
// Collect evidence including startup hooks
var evidence = distribution.SortedEvidence.ToList();
evidence.AddRange(startupHooks.ToEvidence(context));
writer.AddFromPurl(
analyzerId: "python",
purl: distribution.Purl,
@@ -82,7 +102,7 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
version: distribution.Version,
type: "pypi",
metadata: metadata,
evidence: distribution.SortedEvidence,
evidence: evidence,
usedByEntrypoint: distribution.UsedByEntrypoint);
}
@@ -159,4 +179,98 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
metadata.Add(new KeyValuePair<string, string?>("lockEditablePath", entry.EditablePath));
}
}
private static void AppendRuntimeMetadata(List<KeyValuePair<string, string?>> metadata, PythonRuntimeInfo? runtimeInfo)
{
if (runtimeInfo is null)
{
return;
}
if (runtimeInfo.Versions.Count > 0)
{
metadata.Add(new KeyValuePair<string, string?>("runtime.versions", string.Join(';', runtimeInfo.Versions)));
}
if (runtimeInfo.Binaries.Count > 0)
{
metadata.Add(new KeyValuePair<string, string?>("runtime.binaries.count", runtimeInfo.Binaries.Count.ToString()));
}
if (runtimeInfo.LibPaths.Count > 0)
{
metadata.Add(new KeyValuePair<string, string?>("runtime.libPaths.count", runtimeInfo.LibPaths.Count.ToString()));
}
}
private static void AppendEnvironmentMetadata(List<KeyValuePair<string, string?>> metadata, PythonEnvironment environment)
{
if (environment.HasPythonPath)
{
metadata.Add(new KeyValuePair<string, string?>("env.pythonpath", environment.PythonPath));
metadata.Add(new KeyValuePair<string, string?>("env.pythonpath.warning", "PYTHONPATH is set; may affect module resolution"));
}
if (environment.HasPythonHome)
{
metadata.Add(new KeyValuePair<string, string?>("env.pythonhome", environment.PythonHome));
metadata.Add(new KeyValuePair<string, string?>("env.pythonhome.warning", "PYTHONHOME is set; may affect interpreter behavior"));
}
if (environment.Sources.Count > 0)
{
metadata.Add(new KeyValuePair<string, string?>("env.sources.count", environment.Sources.Count.ToString()));
}
}
private static void AppendStartupHooksMetadata(List<KeyValuePair<string, string?>> metadata, PythonStartupHooks startupHooks)
{
if (startupHooks.HasStartupHooks)
{
metadata.Add(new KeyValuePair<string, string?>("startupHooks.detected", "true"));
metadata.Add(new KeyValuePair<string, string?>("startupHooks.count", startupHooks.Hooks.Count.ToString()));
}
if (startupHooks.HasPthFilesWithImports)
{
metadata.Add(new KeyValuePair<string, string?>("pthFiles.withImports.detected", "true"));
}
foreach (var warning in startupHooks.Warnings)
{
metadata.Add(new KeyValuePair<string, string?>("startupHooks.warning", warning));
}
}
private static IReadOnlyCollection<string> CollectDistInfoDirectories(string rootPath)
{
var directories = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
// Collect from root path recursively
try
{
foreach (var dir in Directory.EnumerateDirectories(rootPath, "*.dist-info", Enumeration))
{
directories.Add(dir);
}
}
catch (IOException)
{
// Ignore enumeration errors
}
catch (UnauthorizedAccessException)
{
// Ignore access errors
}
// Also collect from OCI container layers
foreach (var dir in PythonContainerAdapter.DiscoverDistInfoDirectories(rootPath))
{
directories.Add(dir);
}
return directories
.OrderBy(static path => path, StringComparer.Ordinal)
.ToArray();
}
}

View File

@@ -8,6 +8,10 @@
<EnableDefaultItems>false</EnableDefaultItems>
</PropertyGroup>
<ItemGroup>
<InternalsVisibleTo Include="StellaOps.Scanner.Analyzers.Lang.Python.Tests" />
</ItemGroup>
<ItemGroup>
<Compile Include="**\\*.cs" Exclude="obj\\**;bin\\**" />
<EmbeddedResource Include="**\\*.json" Exclude="obj\\**;bin\\**" />

View File

@@ -12,3 +12,9 @@
| 6 | SCANNER-ANALYZERS-LANG-10-309P | DONE (2025-10-23) | SCANNER-ANALYZERS-LANG-10-308P | Package plug-in (manifest, DI registration) and document Offline Kit bundling of Python stdlib metadata if needed. | Manifest copied to `plugins/scanner/analyzers/lang/`; Worker loads analyzer; Offline Kit doc updated. |
| 7 | SCANNER-ANALYZERS-PYTHON-23-001 | DONE (2025-11-27) | — | Build input normalizer & virtual filesystem for wheels, sdists, editable installs, zipapps, site-packages trees, and container roots. Detect Python version targets (`pyproject.toml`, `runtime.txt`, Dockerfile) + virtualenv layout deterministically. | Created `PythonVirtualFileSystem` with builder pattern, `PythonInputNormalizer` for layout/version detection. Supports VFS for wheels, sdists, zipapps. Detects layouts (Virtualenv, Poetry, Pipenv, Conda, Lambda, Container) and version from multiple sources. Files: `Internal/VirtualFileSystem/`. Tests: `VirtualFileSystem/PythonVirtualFileSystemTests.cs`, `VirtualFileSystem/PythonInputNormalizerTests.cs`. |
| 8 | SCANNER-ANALYZERS-PYTHON-23-002 | DONE (2025-11-27) | SCANNER-ANALYZERS-PYTHON-23-001 | Entrypoint discovery: module `__main__`, console_scripts entry points, `scripts`, zipapp main, `manage.py`/gunicorn/celery patterns. Capture invocation context (module vs package, argv wrappers). | Created `PythonEntrypointDiscovery` with support for: PackageMain (__main__.py), ConsoleScript/GuiScript (entry_points.txt), Script (bin/), ZipappMain, DjangoManage, WsgiApp/AsgiApp, CeleryWorker, LambdaHandler, AzureFunctionHandler, CloudFunctionHandler, CliApp (Click/Typer), StandaloneScript. Includes invocation context with command, arguments. Files: `Internal/Entrypoints/`. Tests: `Entrypoints/PythonEntrypointDiscoveryTests.cs`. |
| 9 | SCANNER-ANALYZERS-PYTHON-23-003 | DONE (2025-11-27) | SCANNER-ANALYZERS-PYTHON-23-002 | Static import graph builder using AST and bytecode fallback. Support `import`, `from ... import`, relative imports, `importlib.import_module`, `__import__` with literal args, `pkgutil.extend_path`. | Created `PythonSourceImportExtractor` with regex-based AST-like import extraction supporting: standard imports, from imports, star imports, relative imports (all levels), future imports, conditional imports (try/except), lazy imports (inside functions), TYPE_CHECKING imports, `importlib.import_module()`, `__import__()`, `pkgutil.extend_path()`. Created `PythonBytecodeImportExtractor` for .pyc fallback supporting Python 3.8-3.13 magic numbers. Created `PythonImportGraph` for dependency graph with: forward/reverse edges, cycle detection, topological ordering, relative import resolution. Created `PythonImportAnalysis` wrapper categorizing imports as stdlib/third-party/local with transitive dependency analysis. Files: `Internal/Imports/`. Tests: `Imports/PythonImportExtractorTests.cs`, `Imports/PythonImportGraphTests.cs`. |