up
This commit is contained in:
@@ -0,0 +1,183 @@
|
||||
using System.Collections.Frozen;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Entrypoints;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a discovered Python entrypoint.
|
||||
/// </summary>
|
||||
/// <param name="Name">Display name of the entrypoint (e.g., "myapp", "manage.py").</param>
|
||||
/// <param name="Kind">Type of entrypoint.</param>
|
||||
/// <param name="Target">Target module or callable (e.g., "myapp.cli:main", "myapp.__main__").</param>
|
||||
/// <param name="VirtualPath">Path in the virtual filesystem.</param>
|
||||
/// <param name="InvocationContext">How to invoke this entrypoint.</param>
|
||||
/// <param name="Confidence">Confidence level of the detection.</param>
|
||||
/// <param name="Source">Source of the entrypoint definition.</param>
|
||||
/// <param name="Metadata">Additional metadata.</param>
|
||||
internal sealed record PythonEntrypoint(
|
||||
string Name,
|
||||
PythonEntrypointKind Kind,
|
||||
string Target,
|
||||
string? VirtualPath,
|
||||
PythonInvocationContext InvocationContext,
|
||||
PythonEntrypointConfidence Confidence,
|
||||
string Source,
|
||||
FrozenDictionary<string, string>? Metadata = null)
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the module part of the target (before the colon).
|
||||
/// </summary>
|
||||
public string? ModulePath
|
||||
{
|
||||
get
|
||||
{
|
||||
if (string.IsNullOrEmpty(Target))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var colonIndex = Target.IndexOf(':');
|
||||
return colonIndex > 0 ? Target[..colonIndex] : Target;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the callable part of the target (after the colon).
|
||||
/// </summary>
|
||||
public string? Callable
|
||||
{
|
||||
get
|
||||
{
|
||||
if (string.IsNullOrEmpty(Target))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var colonIndex = Target.IndexOf(':');
|
||||
return colonIndex > 0 && colonIndex < Target.Length - 1
|
||||
? Target[(colonIndex + 1)..]
|
||||
: null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns true if this is a framework-specific entrypoint.
|
||||
/// </summary>
|
||||
public bool IsFrameworkEntrypoint => Kind is
|
||||
PythonEntrypointKind.DjangoManage or
|
||||
PythonEntrypointKind.WsgiApp or
|
||||
PythonEntrypointKind.AsgiApp or
|
||||
PythonEntrypointKind.CeleryWorker or
|
||||
PythonEntrypointKind.LambdaHandler or
|
||||
PythonEntrypointKind.AzureFunctionHandler or
|
||||
PythonEntrypointKind.CloudFunctionHandler;
|
||||
|
||||
/// <summary>
|
||||
/// Returns true if this is a CLI entrypoint.
|
||||
/// </summary>
|
||||
public bool IsCliEntrypoint => Kind is
|
||||
PythonEntrypointKind.ConsoleScript or
|
||||
PythonEntrypointKind.Script or
|
||||
PythonEntrypointKind.CliApp or
|
||||
PythonEntrypointKind.PackageMain;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Describes how a Python entrypoint is invoked.
|
||||
/// </summary>
|
||||
/// <param name="InvocationType">How the entrypoint is invoked.</param>
|
||||
/// <param name="Command">Command or module to invoke (e.g., "python -m myapp", "./bin/myapp").</param>
|
||||
/// <param name="Arguments">Additional arguments or environment requirements.</param>
|
||||
/// <param name="WorkingDirectory">Expected working directory relative to project root.</param>
|
||||
internal sealed record PythonInvocationContext(
|
||||
PythonInvocationType InvocationType,
|
||||
string Command,
|
||||
string? Arguments = null,
|
||||
string? WorkingDirectory = null)
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates an invocation context for running as a module.
|
||||
/// </summary>
|
||||
public static PythonInvocationContext AsModule(string modulePath) =>
|
||||
new(PythonInvocationType.Module, $"python -m {modulePath}");
|
||||
|
||||
/// <summary>
|
||||
/// Creates an invocation context for running as a script.
|
||||
/// </summary>
|
||||
public static PythonInvocationContext AsScript(string scriptPath) =>
|
||||
new(PythonInvocationType.Script, scriptPath);
|
||||
|
||||
/// <summary>
|
||||
/// Creates an invocation context for running as a console script.
|
||||
/// </summary>
|
||||
public static PythonInvocationContext AsConsoleScript(string name) =>
|
||||
new(PythonInvocationType.ConsoleScript, name);
|
||||
|
||||
/// <summary>
|
||||
/// Creates an invocation context for a WSGI/ASGI app.
|
||||
/// </summary>
|
||||
public static PythonInvocationContext AsWsgiApp(string runner, string target) =>
|
||||
new(PythonInvocationType.WsgiAsgi, runner, target);
|
||||
|
||||
/// <summary>
|
||||
/// Creates an invocation context for a serverless handler.
|
||||
/// </summary>
|
||||
public static PythonInvocationContext AsHandler(string handler) =>
|
||||
new(PythonInvocationType.Handler, handler);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// How a Python entrypoint is invoked.
|
||||
/// </summary>
|
||||
internal enum PythonInvocationType
|
||||
{
|
||||
/// <summary>
|
||||
/// Invoked via python -m module.
|
||||
/// </summary>
|
||||
Module,
|
||||
|
||||
/// <summary>
|
||||
/// Invoked as a script file.
|
||||
/// </summary>
|
||||
Script,
|
||||
|
||||
/// <summary>
|
||||
/// Invoked via installed console script.
|
||||
/// </summary>
|
||||
ConsoleScript,
|
||||
|
||||
/// <summary>
|
||||
/// Invoked via WSGI/ASGI server.
|
||||
/// </summary>
|
||||
WsgiAsgi,
|
||||
|
||||
/// <summary>
|
||||
/// Invoked as a serverless handler.
|
||||
/// </summary>
|
||||
Handler
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Confidence level for entrypoint detection.
|
||||
/// </summary>
|
||||
internal enum PythonEntrypointConfidence
|
||||
{
|
||||
/// <summary>
|
||||
/// Low confidence - inferred from heuristics or naming patterns.
|
||||
/// </summary>
|
||||
Low,
|
||||
|
||||
/// <summary>
|
||||
/// Medium confidence - detected from configuration or common patterns.
|
||||
/// </summary>
|
||||
Medium,
|
||||
|
||||
/// <summary>
|
||||
/// High confidence - explicitly declared in metadata.
|
||||
/// </summary>
|
||||
High,
|
||||
|
||||
/// <summary>
|
||||
/// Definitive - from authoritative source like entry_points.txt.
|
||||
/// </summary>
|
||||
Definitive
|
||||
}
|
||||
@@ -0,0 +1,138 @@
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Entrypoints;
|
||||
|
||||
/// <summary>
|
||||
/// Result of Python entrypoint analysis.
|
||||
/// </summary>
|
||||
internal sealed class PythonEntrypointAnalysis
|
||||
{
|
||||
private PythonEntrypointAnalysis(
|
||||
IReadOnlyList<PythonEntrypoint> entrypoints,
|
||||
IReadOnlyList<PythonEntrypoint> consoleScripts,
|
||||
IReadOnlyList<PythonEntrypoint> frameworkEntrypoints,
|
||||
IReadOnlyList<PythonEntrypoint> cliEntrypoints)
|
||||
{
|
||||
Entrypoints = entrypoints;
|
||||
ConsoleScripts = consoleScripts;
|
||||
FrameworkEntrypoints = frameworkEntrypoints;
|
||||
CliEntrypoints = cliEntrypoints;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets all discovered entrypoints.
|
||||
/// </summary>
|
||||
public IReadOnlyList<PythonEntrypoint> Entrypoints { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets console_scripts and gui_scripts entrypoints.
|
||||
/// </summary>
|
||||
public IReadOnlyList<PythonEntrypoint> ConsoleScripts { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets framework-specific entrypoints (Django, Lambda, etc.).
|
||||
/// </summary>
|
||||
public IReadOnlyList<PythonEntrypoint> FrameworkEntrypoints { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets CLI application entrypoints.
|
||||
/// </summary>
|
||||
public IReadOnlyList<PythonEntrypoint> CliEntrypoints { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the primary entrypoint (highest confidence, CLI preference).
|
||||
/// </summary>
|
||||
public PythonEntrypoint? PrimaryEntrypoint =>
|
||||
Entrypoints
|
||||
.OrderByDescending(static e => e.Confidence)
|
||||
.ThenBy(static e => e.Kind switch
|
||||
{
|
||||
PythonEntrypointKind.ConsoleScript => 0,
|
||||
PythonEntrypointKind.PackageMain => 1,
|
||||
PythonEntrypointKind.CliApp => 2,
|
||||
PythonEntrypointKind.LambdaHandler => 3,
|
||||
PythonEntrypointKind.WsgiApp => 4,
|
||||
PythonEntrypointKind.AsgiApp => 5,
|
||||
_ => 10
|
||||
})
|
||||
.ThenBy(static e => e.Name, StringComparer.Ordinal)
|
||||
.FirstOrDefault();
|
||||
|
||||
/// <summary>
|
||||
/// Analyzes entrypoints from a virtual filesystem.
|
||||
/// </summary>
|
||||
public static async Task<PythonEntrypointAnalysis> AnalyzeAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string rootPath,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var discovery = new PythonEntrypointDiscovery(vfs, rootPath);
|
||||
await discovery.DiscoverAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var entrypoints = discovery.Entrypoints
|
||||
.OrderBy(static e => e.Name, StringComparer.Ordinal)
|
||||
.ThenBy(static e => e.Kind)
|
||||
.ToList();
|
||||
|
||||
var consoleScripts = entrypoints
|
||||
.Where(static e => e.Kind is PythonEntrypointKind.ConsoleScript or PythonEntrypointKind.GuiScript)
|
||||
.ToList();
|
||||
|
||||
var frameworkEntrypoints = entrypoints
|
||||
.Where(static e => e.IsFrameworkEntrypoint)
|
||||
.ToList();
|
||||
|
||||
var cliEntrypoints = entrypoints
|
||||
.Where(static e => e.IsCliEntrypoint)
|
||||
.ToList();
|
||||
|
||||
return new PythonEntrypointAnalysis(
|
||||
entrypoints,
|
||||
consoleScripts,
|
||||
frameworkEntrypoints,
|
||||
cliEntrypoints);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates metadata entries for the analysis result.
|
||||
/// </summary>
|
||||
public IEnumerable<KeyValuePair<string, string?>> ToMetadata()
|
||||
{
|
||||
yield return new KeyValuePair<string, string?>("entrypoints.total", Entrypoints.Count.ToString());
|
||||
yield return new KeyValuePair<string, string?>("entrypoints.consoleScripts", ConsoleScripts.Count.ToString());
|
||||
yield return new KeyValuePair<string, string?>("entrypoints.framework", FrameworkEntrypoints.Count.ToString());
|
||||
yield return new KeyValuePair<string, string?>("entrypoints.cli", CliEntrypoints.Count.ToString());
|
||||
|
||||
if (PrimaryEntrypoint is not null)
|
||||
{
|
||||
yield return new KeyValuePair<string, string?>("entrypoints.primary.name", PrimaryEntrypoint.Name);
|
||||
yield return new KeyValuePair<string, string?>("entrypoints.primary.kind", PrimaryEntrypoint.Kind.ToString());
|
||||
yield return new KeyValuePair<string, string?>("entrypoints.primary.target", PrimaryEntrypoint.Target);
|
||||
}
|
||||
|
||||
// List all console scripts
|
||||
if (ConsoleScripts.Count > 0)
|
||||
{
|
||||
var scripts = string.Join(';', ConsoleScripts.Select(static e => e.Name));
|
||||
yield return new KeyValuePair<string, string?>("entrypoints.consoleScripts.names", scripts);
|
||||
}
|
||||
|
||||
// List framework entrypoints
|
||||
foreach (var ep in FrameworkEntrypoints)
|
||||
{
|
||||
yield return new KeyValuePair<string, string?>($"entrypoints.{ep.Kind.ToString().ToLowerInvariant()}", ep.Target);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets entrypoints by kind.
|
||||
/// </summary>
|
||||
public IEnumerable<PythonEntrypoint> GetByKind(PythonEntrypointKind kind) =>
|
||||
Entrypoints.Where(e => e.Kind == kind);
|
||||
|
||||
/// <summary>
|
||||
/// Gets entrypoints by confidence level.
|
||||
/// </summary>
|
||||
public IEnumerable<PythonEntrypoint> GetByConfidence(PythonEntrypointConfidence minConfidence) =>
|
||||
Entrypoints.Where(e => e.Confidence >= minConfidence);
|
||||
}
|
||||
@@ -0,0 +1,677 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Entrypoints;
|
||||
|
||||
/// <summary>
|
||||
/// Discovers Python entrypoints from a virtual filesystem.
|
||||
/// </summary>
|
||||
internal sealed partial class PythonEntrypointDiscovery
|
||||
{
|
||||
private readonly PythonVirtualFileSystem _vfs;
|
||||
private readonly string _rootPath;
|
||||
private readonly List<PythonEntrypoint> _entrypoints = new();
|
||||
|
||||
public PythonEntrypointDiscovery(PythonVirtualFileSystem vfs, string rootPath)
|
||||
{
|
||||
_vfs = vfs ?? throw new ArgumentNullException(nameof(vfs));
|
||||
_rootPath = rootPath ?? throw new ArgumentNullException(nameof(rootPath));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets all discovered entrypoints.
|
||||
/// </summary>
|
||||
public IReadOnlyList<PythonEntrypoint> Entrypoints => _entrypoints;
|
||||
|
||||
/// <summary>
|
||||
/// Discovers all entrypoints from the virtual filesystem.
|
||||
/// </summary>
|
||||
public async Task<PythonEntrypointDiscovery> DiscoverAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
await DiscoverPackageMainsAsync(cancellationToken).ConfigureAwait(false);
|
||||
await DiscoverConsoleScriptsAsync(cancellationToken).ConfigureAwait(false);
|
||||
await DiscoverDataScriptsAsync(cancellationToken).ConfigureAwait(false);
|
||||
await DiscoverZipappMainsAsync(cancellationToken).ConfigureAwait(false);
|
||||
await DiscoverDjangoEntrypointsAsync(cancellationToken).ConfigureAwait(false);
|
||||
await DiscoverWsgiAsgiAppsAsync(cancellationToken).ConfigureAwait(false);
|
||||
await DiscoverCeleryEntrypointsAsync(cancellationToken).ConfigureAwait(false);
|
||||
await DiscoverLambdaHandlersAsync(cancellationToken).ConfigureAwait(false);
|
||||
await DiscoverCliAppsAsync(cancellationToken).ConfigureAwait(false);
|
||||
await DiscoverStandaloneScriptsAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Discovers __main__.py files in packages.
|
||||
/// </summary>
|
||||
private Task DiscoverPackageMainsAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var mainFiles = _vfs.EnumerateFiles(string.Empty, "**/__main__.py");
|
||||
|
||||
foreach (var file in mainFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
// Extract package name from path
|
||||
var parts = file.VirtualPath.Split('/');
|
||||
if (parts.Length < 2)
|
||||
{
|
||||
continue; // Not in a package
|
||||
}
|
||||
|
||||
// Get the package path (everything except __main__.py)
|
||||
var packagePath = string.Join('.', parts[..^1]);
|
||||
|
||||
_entrypoints.Add(new PythonEntrypoint(
|
||||
Name: parts[0],
|
||||
Kind: PythonEntrypointKind.PackageMain,
|
||||
Target: packagePath,
|
||||
VirtualPath: file.VirtualPath,
|
||||
InvocationContext: PythonInvocationContext.AsModule(packagePath),
|
||||
Confidence: PythonEntrypointConfidence.Definitive,
|
||||
Source: file.VirtualPath));
|
||||
}
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Discovers console_scripts and gui_scripts from entry_points.txt files.
|
||||
/// </summary>
|
||||
private async Task DiscoverConsoleScriptsAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var entryPointFiles = _vfs.EnumerateFiles(string.Empty, "*.dist-info/entry_points.txt");
|
||||
|
||||
foreach (var file in entryPointFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var absolutePath = file.AbsolutePath;
|
||||
if (file.IsFromArchive)
|
||||
{
|
||||
continue; // Can't read from archive directly yet
|
||||
}
|
||||
|
||||
var fullPath = Path.Combine(_rootPath, absolutePath);
|
||||
if (!File.Exists(fullPath))
|
||||
{
|
||||
fullPath = absolutePath;
|
||||
if (!File.Exists(fullPath))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(fullPath, cancellationToken).ConfigureAwait(false);
|
||||
ParseEntryPointsTxt(content, file.VirtualPath);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Skip unreadable files
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void ParseEntryPointsTxt(string content, string source)
|
||||
{
|
||||
string? currentSection = null;
|
||||
|
||||
foreach (var line in content.Split('\n'))
|
||||
{
|
||||
var trimmed = line.Trim();
|
||||
if (string.IsNullOrEmpty(trimmed) || trimmed.StartsWith('#'))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Section header
|
||||
if (trimmed.StartsWith('[') && trimmed.EndsWith(']'))
|
||||
{
|
||||
currentSection = trimmed[1..^1].Trim().ToLowerInvariant();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Entry: name = module:callable or name = module:callable [extras]
|
||||
if (currentSection is "console_scripts" or "gui_scripts")
|
||||
{
|
||||
var equalsIndex = trimmed.IndexOf('=');
|
||||
if (equalsIndex <= 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var name = trimmed[..equalsIndex].Trim();
|
||||
var target = trimmed[(equalsIndex + 1)..].Trim();
|
||||
|
||||
// Remove extras if present
|
||||
var bracketIndex = target.IndexOf('[');
|
||||
if (bracketIndex > 0)
|
||||
{
|
||||
target = target[..bracketIndex].Trim();
|
||||
}
|
||||
|
||||
var kind = currentSection == "gui_scripts"
|
||||
? PythonEntrypointKind.GuiScript
|
||||
: PythonEntrypointKind.ConsoleScript;
|
||||
|
||||
_entrypoints.Add(new PythonEntrypoint(
|
||||
Name: name,
|
||||
Kind: kind,
|
||||
Target: target,
|
||||
VirtualPath: null,
|
||||
InvocationContext: PythonInvocationContext.AsConsoleScript(name),
|
||||
Confidence: PythonEntrypointConfidence.Definitive,
|
||||
Source: source));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Discovers scripts in *.data/scripts/ directories.
|
||||
/// </summary>
|
||||
private Task DiscoverDataScriptsAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var dataScripts = _vfs.EnumerateFiles(string.Empty, "*.data/scripts/*");
|
||||
|
||||
foreach (var file in dataScripts)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var name = Path.GetFileName(file.VirtualPath);
|
||||
|
||||
_entrypoints.Add(new PythonEntrypoint(
|
||||
Name: name,
|
||||
Kind: PythonEntrypointKind.Script,
|
||||
Target: file.VirtualPath,
|
||||
VirtualPath: file.VirtualPath,
|
||||
InvocationContext: PythonInvocationContext.AsScript(file.VirtualPath),
|
||||
Confidence: PythonEntrypointConfidence.High,
|
||||
Source: file.VirtualPath));
|
||||
}
|
||||
|
||||
// Also check bin/ directory
|
||||
var binScripts = _vfs.EnumerateFiles("bin", "*");
|
||||
foreach (var file in binScripts)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var name = Path.GetFileName(file.VirtualPath);
|
||||
|
||||
_entrypoints.Add(new PythonEntrypoint(
|
||||
Name: name,
|
||||
Kind: PythonEntrypointKind.Script,
|
||||
Target: file.VirtualPath,
|
||||
VirtualPath: file.VirtualPath,
|
||||
InvocationContext: PythonInvocationContext.AsScript(file.VirtualPath),
|
||||
Confidence: PythonEntrypointConfidence.High,
|
||||
Source: file.VirtualPath));
|
||||
}
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Discovers __main__.py in zipapps.
|
||||
/// </summary>
|
||||
private Task DiscoverZipappMainsAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
// Zipapp files have their __main__.py at the root level
|
||||
var zipappFiles = _vfs.GetFilesBySource(PythonFileSource.Zipapp);
|
||||
|
||||
foreach (var file in zipappFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
if (file.VirtualPath == "__main__.py")
|
||||
{
|
||||
_entrypoints.Add(new PythonEntrypoint(
|
||||
Name: "__main__",
|
||||
Kind: PythonEntrypointKind.ZipappMain,
|
||||
Target: "__main__",
|
||||
VirtualPath: file.VirtualPath,
|
||||
InvocationContext: new PythonInvocationContext(
|
||||
PythonInvocationType.Script,
|
||||
file.ArchivePath ?? "app.pyz"),
|
||||
Confidence: PythonEntrypointConfidence.Definitive,
|
||||
Source: file.ArchivePath ?? file.VirtualPath));
|
||||
}
|
||||
}
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Discovers Django manage.py and related entrypoints.
|
||||
/// </summary>
|
||||
private async Task DiscoverDjangoEntrypointsAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
// Look for manage.py
|
||||
if (_vfs.FileExists("manage.py"))
|
||||
{
|
||||
_entrypoints.Add(new PythonEntrypoint(
|
||||
Name: "manage.py",
|
||||
Kind: PythonEntrypointKind.DjangoManage,
|
||||
Target: "manage",
|
||||
VirtualPath: "manage.py",
|
||||
InvocationContext: PythonInvocationContext.AsScript("manage.py"),
|
||||
Confidence: PythonEntrypointConfidence.High,
|
||||
Source: "manage.py"));
|
||||
}
|
||||
|
||||
// Look for Django settings to infer WSGI/ASGI apps
|
||||
var settingsFiles = _vfs.EnumerateFiles(string.Empty, "**/settings.py");
|
||||
foreach (var file in settingsFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var parts = file.VirtualPath.Split('/');
|
||||
if (parts.Length < 2)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var projectName = parts[^2]; // Folder containing settings.py
|
||||
|
||||
// Check for wsgi.py
|
||||
var wsgiPath = string.Join('/', parts[..^1]) + "/wsgi.py";
|
||||
if (_vfs.FileExists(wsgiPath))
|
||||
{
|
||||
var wsgiModule = $"{projectName}.wsgi:application";
|
||||
_entrypoints.Add(new PythonEntrypoint(
|
||||
Name: $"{projectName}-wsgi",
|
||||
Kind: PythonEntrypointKind.WsgiApp,
|
||||
Target: wsgiModule,
|
||||
VirtualPath: wsgiPath,
|
||||
InvocationContext: PythonInvocationContext.AsWsgiApp("gunicorn", wsgiModule),
|
||||
Confidence: PythonEntrypointConfidence.High,
|
||||
Source: wsgiPath));
|
||||
}
|
||||
|
||||
// Check for asgi.py
|
||||
var asgiPath = string.Join('/', parts[..^1]) + "/asgi.py";
|
||||
if (_vfs.FileExists(asgiPath))
|
||||
{
|
||||
var asgiModule = $"{projectName}.asgi:application";
|
||||
_entrypoints.Add(new PythonEntrypoint(
|
||||
Name: $"{projectName}-asgi",
|
||||
Kind: PythonEntrypointKind.AsgiApp,
|
||||
Target: asgiModule,
|
||||
VirtualPath: asgiPath,
|
||||
InvocationContext: PythonInvocationContext.AsWsgiApp("uvicorn", asgiModule),
|
||||
Confidence: PythonEntrypointConfidence.High,
|
||||
Source: asgiPath));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Discovers WSGI/ASGI apps from configuration files.
|
||||
/// </summary>
|
||||
private async Task DiscoverWsgiAsgiAppsAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
// Check for gunicorn.conf.py or gunicorn configuration
|
||||
if (_vfs.FileExists("gunicorn.conf.py"))
|
||||
{
|
||||
var fullPath = Path.Combine(_rootPath, "gunicorn.conf.py");
|
||||
if (File.Exists(fullPath))
|
||||
{
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(fullPath, cancellationToken).ConfigureAwait(false);
|
||||
var match = WsgiAppPattern().Match(content);
|
||||
if (match.Success)
|
||||
{
|
||||
var target = match.Groups["app"].Value;
|
||||
_entrypoints.Add(new PythonEntrypoint(
|
||||
Name: "gunicorn-app",
|
||||
Kind: PythonEntrypointKind.WsgiApp,
|
||||
Target: target,
|
||||
VirtualPath: "gunicorn.conf.py",
|
||||
InvocationContext: PythonInvocationContext.AsWsgiApp("gunicorn", target),
|
||||
Confidence: PythonEntrypointConfidence.High,
|
||||
Source: "gunicorn.conf.py"));
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Skip unreadable files
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for Procfile (common in Heroku deployments)
|
||||
if (_vfs.FileExists("Procfile"))
|
||||
{
|
||||
var fullPath = Path.Combine(_rootPath, "Procfile");
|
||||
if (File.Exists(fullPath))
|
||||
{
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(fullPath, cancellationToken).ConfigureAwait(false);
|
||||
foreach (var line in content.Split('\n'))
|
||||
{
|
||||
var trimmed = line.Trim();
|
||||
if (string.IsNullOrEmpty(trimmed) || trimmed.StartsWith('#'))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var colonIndex = trimmed.IndexOf(':');
|
||||
if (colonIndex <= 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var procType = trimmed[..colonIndex].Trim();
|
||||
var command = trimmed[(colonIndex + 1)..].Trim();
|
||||
|
||||
// Look for gunicorn/uvicorn commands
|
||||
var gunicornMatch = GunicornCommandPattern().Match(command);
|
||||
if (gunicornMatch.Success)
|
||||
{
|
||||
var target = gunicornMatch.Groups["app"].Value;
|
||||
_entrypoints.Add(new PythonEntrypoint(
|
||||
Name: $"procfile-{procType}",
|
||||
Kind: PythonEntrypointKind.WsgiApp,
|
||||
Target: target,
|
||||
VirtualPath: "Procfile",
|
||||
InvocationContext: PythonInvocationContext.AsWsgiApp("gunicorn", target),
|
||||
Confidence: PythonEntrypointConfidence.Medium,
|
||||
Source: "Procfile"));
|
||||
}
|
||||
|
||||
var uvicornMatch = UvicornCommandPattern().Match(command);
|
||||
if (uvicornMatch.Success)
|
||||
{
|
||||
var target = uvicornMatch.Groups["app"].Value;
|
||||
_entrypoints.Add(new PythonEntrypoint(
|
||||
Name: $"procfile-{procType}",
|
||||
Kind: PythonEntrypointKind.AsgiApp,
|
||||
Target: target,
|
||||
VirtualPath: "Procfile",
|
||||
InvocationContext: PythonInvocationContext.AsWsgiApp("uvicorn", target),
|
||||
Confidence: PythonEntrypointConfidence.Medium,
|
||||
Source: "Procfile"));
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Skip unreadable files
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Discovers Celery worker entrypoints.
|
||||
/// </summary>
|
||||
private async Task DiscoverCeleryEntrypointsAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
// Look for celery.py or tasks.py patterns
|
||||
var celeryFiles = _vfs.EnumerateFiles(string.Empty, "**/celery.py");
|
||||
foreach (var file in celeryFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var parts = file.VirtualPath.Split('/');
|
||||
var modulePath = string.Join('.', parts)[..^3]; // Remove .py
|
||||
|
||||
_entrypoints.Add(new PythonEntrypoint(
|
||||
Name: "celery-worker",
|
||||
Kind: PythonEntrypointKind.CeleryWorker,
|
||||
Target: modulePath,
|
||||
VirtualPath: file.VirtualPath,
|
||||
InvocationContext: new PythonInvocationContext(
|
||||
PythonInvocationType.Module,
|
||||
$"celery -A {modulePath} worker"),
|
||||
Confidence: PythonEntrypointConfidence.Medium,
|
||||
Source: file.VirtualPath));
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Discovers AWS Lambda, Azure Functions, and Cloud Functions handlers.
|
||||
/// </summary>
|
||||
private async Task DiscoverLambdaHandlersAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
// AWS Lambda - lambda_function.py with handler function
|
||||
if (_vfs.FileExists("lambda_function.py"))
|
||||
{
|
||||
_entrypoints.Add(new PythonEntrypoint(
|
||||
Name: "lambda-handler",
|
||||
Kind: PythonEntrypointKind.LambdaHandler,
|
||||
Target: "lambda_function.handler",
|
||||
VirtualPath: "lambda_function.py",
|
||||
InvocationContext: PythonInvocationContext.AsHandler("lambda_function.handler"),
|
||||
Confidence: PythonEntrypointConfidence.High,
|
||||
Source: "lambda_function.py"));
|
||||
}
|
||||
|
||||
// Also check for handler.py (common Lambda pattern)
|
||||
if (_vfs.FileExists("handler.py"))
|
||||
{
|
||||
_entrypoints.Add(new PythonEntrypoint(
|
||||
Name: "lambda-handler",
|
||||
Kind: PythonEntrypointKind.LambdaHandler,
|
||||
Target: "handler.handler",
|
||||
VirtualPath: "handler.py",
|
||||
InvocationContext: PythonInvocationContext.AsHandler("handler.handler"),
|
||||
Confidence: PythonEntrypointConfidence.Medium,
|
||||
Source: "handler.py"));
|
||||
}
|
||||
|
||||
// Azure Functions - look for function.json
|
||||
var functionJsonFiles = _vfs.EnumerateFiles(string.Empty, "**/function.json");
|
||||
foreach (var file in functionJsonFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var parts = file.VirtualPath.Split('/');
|
||||
if (parts.Length < 2)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var functionName = parts[^2];
|
||||
|
||||
// Check for __init__.py in the same directory
|
||||
var initPath = string.Join('/', parts[..^1]) + "/__init__.py";
|
||||
if (_vfs.FileExists(initPath))
|
||||
{
|
||||
_entrypoints.Add(new PythonEntrypoint(
|
||||
Name: functionName,
|
||||
Kind: PythonEntrypointKind.AzureFunctionHandler,
|
||||
Target: $"{functionName}:main",
|
||||
VirtualPath: initPath,
|
||||
InvocationContext: PythonInvocationContext.AsHandler($"{functionName}:main"),
|
||||
Confidence: PythonEntrypointConfidence.High,
|
||||
Source: file.VirtualPath));
|
||||
}
|
||||
}
|
||||
|
||||
// Google Cloud Functions - main.py with specific patterns
|
||||
if (_vfs.FileExists("main.py"))
|
||||
{
|
||||
var fullPath = Path.Combine(_rootPath, "main.py");
|
||||
if (File.Exists(fullPath))
|
||||
{
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(fullPath, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Look for Cloud Functions patterns
|
||||
if (content.Contains("functions_framework") ||
|
||||
content.Contains("@functions_framework"))
|
||||
{
|
||||
var match = CloudFunctionPattern().Match(content);
|
||||
var functionName = match.Success ? match.Groups["name"].Value : "main";
|
||||
|
||||
_entrypoints.Add(new PythonEntrypoint(
|
||||
Name: functionName,
|
||||
Kind: PythonEntrypointKind.CloudFunctionHandler,
|
||||
Target: $"main:{functionName}",
|
||||
VirtualPath: "main.py",
|
||||
InvocationContext: PythonInvocationContext.AsHandler($"main:{functionName}"),
|
||||
Confidence: PythonEntrypointConfidence.High,
|
||||
Source: "main.py"));
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Skip unreadable files
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Discovers Click/Typer CLI applications.
|
||||
/// </summary>
|
||||
private async Task DiscoverCliAppsAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
// Look for files with Click or Typer patterns
|
||||
var pyFiles = _vfs.Files.Where(f => f.IsPythonSource && f.Source != PythonFileSource.Zipapp);
|
||||
|
||||
foreach (var file in pyFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
if (file.IsFromArchive)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var fullPath = Path.Combine(_rootPath, file.AbsolutePath);
|
||||
if (!File.Exists(fullPath))
|
||||
{
|
||||
fullPath = file.AbsolutePath;
|
||||
if (!File.Exists(fullPath))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(fullPath, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Check for Click CLI patterns
|
||||
if (content.Contains("@click.command") || content.Contains("@click.group"))
|
||||
{
|
||||
var match = ClickGroupPattern().Match(content);
|
||||
var cliName = match.Success ? match.Groups["name"].Value : Path.GetFileNameWithoutExtension(file.VirtualPath);
|
||||
|
||||
// Check if there's a main guard
|
||||
if (content.Contains("if __name__") && content.Contains("__main__"))
|
||||
{
|
||||
var modulePath = file.VirtualPath.Replace('/', '.')[..^3];
|
||||
|
||||
_entrypoints.Add(new PythonEntrypoint(
|
||||
Name: cliName,
|
||||
Kind: PythonEntrypointKind.CliApp,
|
||||
Target: $"{modulePath}:cli",
|
||||
VirtualPath: file.VirtualPath,
|
||||
InvocationContext: PythonInvocationContext.AsModule(modulePath),
|
||||
Confidence: PythonEntrypointConfidence.Medium,
|
||||
Source: file.VirtualPath));
|
||||
}
|
||||
}
|
||||
|
||||
// Check for Typer CLI patterns
|
||||
if (content.Contains("typer.Typer") || content.Contains("@app.command"))
|
||||
{
|
||||
var modulePath = file.VirtualPath.Replace('/', '.')[..^3];
|
||||
|
||||
_entrypoints.Add(new PythonEntrypoint(
|
||||
Name: Path.GetFileNameWithoutExtension(file.VirtualPath),
|
||||
Kind: PythonEntrypointKind.CliApp,
|
||||
Target: $"{modulePath}:app",
|
||||
VirtualPath: file.VirtualPath,
|
||||
InvocationContext: PythonInvocationContext.AsModule(modulePath),
|
||||
Confidence: PythonEntrypointConfidence.Medium,
|
||||
Source: file.VirtualPath));
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Skip unreadable files
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Discovers standalone Python scripts with main guards.
|
||||
/// </summary>
|
||||
private async Task DiscoverStandaloneScriptsAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
// Look for Python files at the root level with main guards
|
||||
var rootPyFiles = _vfs.EnumerateFiles(string.Empty, "*.py")
|
||||
.Where(f => !f.VirtualPath.Contains('/') && f.VirtualPath != "__main__.py");
|
||||
|
||||
foreach (var file in rootPyFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
if (file.IsFromArchive)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var fullPath = Path.Combine(_rootPath, file.AbsolutePath);
|
||||
if (!File.Exists(fullPath))
|
||||
{
|
||||
fullPath = file.AbsolutePath;
|
||||
if (!File.Exists(fullPath))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(fullPath, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Check for main guard or shebang
|
||||
var hasMainGuard = content.Contains("if __name__") && content.Contains("__main__");
|
||||
var hasShebang = content.StartsWith("#!");
|
||||
|
||||
if (hasMainGuard || hasShebang)
|
||||
{
|
||||
var name = Path.GetFileNameWithoutExtension(file.VirtualPath);
|
||||
|
||||
_entrypoints.Add(new PythonEntrypoint(
|
||||
Name: name,
|
||||
Kind: PythonEntrypointKind.StandaloneScript,
|
||||
Target: file.VirtualPath,
|
||||
VirtualPath: file.VirtualPath,
|
||||
InvocationContext: PythonInvocationContext.AsScript(file.VirtualPath),
|
||||
Confidence: hasMainGuard ? PythonEntrypointConfidence.High : PythonEntrypointConfidence.Medium,
|
||||
Source: file.VirtualPath));
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Skip unreadable files
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"bind\s*=\s*['""](?<app>[^'""]+)['""]", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex WsgiAppPattern();
|
||||
|
||||
[GeneratedRegex(@"gunicorn\s+(?:.*\s+)?(?<app>[\w.]+:[\w]+)", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex GunicornCommandPattern();
|
||||
|
||||
[GeneratedRegex(@"uvicorn\s+(?<app>[\w.]+:[\w]+)", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex UvicornCommandPattern();
|
||||
|
||||
[GeneratedRegex(@"@functions_framework\.http\s*\n\s*def\s+(?<name>\w+)", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex CloudFunctionPattern();
|
||||
|
||||
[GeneratedRegex(@"@click\.group\(\)\s*\n\s*def\s+(?<name>\w+)", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex ClickGroupPattern();
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Entrypoints;
|
||||
|
||||
/// <summary>
|
||||
/// Identifies the type of Python entrypoint.
|
||||
/// </summary>
|
||||
internal enum PythonEntrypointKind
|
||||
{
|
||||
/// <summary>
|
||||
/// Package __main__.py module (invoked via python -m package).
|
||||
/// </summary>
|
||||
PackageMain,
|
||||
|
||||
/// <summary>
|
||||
/// Console script from entry_points.txt [console_scripts].
|
||||
/// </summary>
|
||||
ConsoleScript,
|
||||
|
||||
/// <summary>
|
||||
/// GUI script from entry_points.txt [gui_scripts].
|
||||
/// </summary>
|
||||
GuiScript,
|
||||
|
||||
/// <summary>
|
||||
/// Script file in *.data/scripts/ or bin/ directory.
|
||||
/// </summary>
|
||||
Script,
|
||||
|
||||
/// <summary>
|
||||
/// Zipapp __main__.py module.
|
||||
/// </summary>
|
||||
ZipappMain,
|
||||
|
||||
/// <summary>
|
||||
/// Django manage.py management script.
|
||||
/// </summary>
|
||||
DjangoManage,
|
||||
|
||||
/// <summary>
|
||||
/// Gunicorn/uWSGI WSGI application reference.
|
||||
/// </summary>
|
||||
WsgiApp,
|
||||
|
||||
/// <summary>
|
||||
/// ASGI application reference (Uvicorn, Daphne).
|
||||
/// </summary>
|
||||
AsgiApp,
|
||||
|
||||
/// <summary>
|
||||
/// Celery worker/beat entrypoint.
|
||||
/// </summary>
|
||||
CeleryWorker,
|
||||
|
||||
/// <summary>
|
||||
/// AWS Lambda handler function.
|
||||
/// </summary>
|
||||
LambdaHandler,
|
||||
|
||||
/// <summary>
|
||||
/// Azure Functions handler.
|
||||
/// </summary>
|
||||
AzureFunctionHandler,
|
||||
|
||||
/// <summary>
|
||||
/// Google Cloud Function handler.
|
||||
/// </summary>
|
||||
CloudFunctionHandler,
|
||||
|
||||
/// <summary>
|
||||
/// Click/Typer CLI application.
|
||||
/// </summary>
|
||||
CliApp,
|
||||
|
||||
/// <summary>
|
||||
/// pytest/unittest test runner.
|
||||
/// </summary>
|
||||
TestRunner,
|
||||
|
||||
/// <summary>
|
||||
/// Generic Python script (standalone .py file with shebang or main guard).
|
||||
/// </summary>
|
||||
StandaloneScript
|
||||
}
|
||||
@@ -0,0 +1,416 @@
|
||||
using System.Buffers.Binary;
|
||||
using System.Text;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Imports;
|
||||
|
||||
/// <summary>
|
||||
/// Extracts imports from Python bytecode (.pyc) files.
|
||||
/// Supports Python 3.8 - 3.12+ bytecode formats.
|
||||
/// </summary>
|
||||
internal sealed class PythonBytecodeImportExtractor
|
||||
{
|
||||
// Python bytecode opcodes (consistent across 3.8-3.12)
|
||||
private const byte IMPORT_NAME = 108;
|
||||
private const byte IMPORT_FROM = 109;
|
||||
private const byte IMPORT_STAR = 84;
|
||||
|
||||
// Python 3.11+ uses different opcodes
|
||||
private const byte IMPORT_NAME_311 = 108;
|
||||
|
||||
// Magic numbers for Python versions (first 2 bytes of .pyc)
|
||||
private static readonly IReadOnlyDictionary<int, string> PythonMagicNumbers = new Dictionary<int, string>
|
||||
{
|
||||
// Python 3.8
|
||||
[3413] = "3.8",
|
||||
[3415] = "3.8",
|
||||
// Python 3.9
|
||||
[3425] = "3.9",
|
||||
// Python 3.10
|
||||
[3435] = "3.10",
|
||||
[3437] = "3.10",
|
||||
[3439] = "3.10",
|
||||
// Python 3.11
|
||||
[3495] = "3.11",
|
||||
// Python 3.12
|
||||
[3531] = "3.12",
|
||||
// Python 3.13
|
||||
[3571] = "3.13",
|
||||
};
|
||||
|
||||
private readonly string _sourceFile;
|
||||
private readonly List<PythonImport> _imports = new();
|
||||
|
||||
public PythonBytecodeImportExtractor(string sourceFile)
|
||||
{
|
||||
_sourceFile = sourceFile ?? throw new ArgumentNullException(nameof(sourceFile));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets all extracted imports.
|
||||
/// </summary>
|
||||
public IReadOnlyList<PythonImport> Imports => _imports;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the detected Python version, if available.
|
||||
/// </summary>
|
||||
public string? PythonVersion { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// Extracts imports from Python bytecode.
|
||||
/// </summary>
|
||||
public PythonBytecodeImportExtractor Extract(ReadOnlySpan<byte> bytecode)
|
||||
{
|
||||
if (bytecode.Length < 16)
|
||||
{
|
||||
return this; // Too short to be valid bytecode
|
||||
}
|
||||
|
||||
// Read magic number (first 4 bytes, little-endian)
|
||||
var magic = BinaryPrimitives.ReadUInt16LittleEndian(bytecode);
|
||||
if (PythonMagicNumbers.TryGetValue(magic, out var version))
|
||||
{
|
||||
PythonVersion = version;
|
||||
}
|
||||
|
||||
// Skip header to find code object
|
||||
// Python 3.8+: 16 bytes header
|
||||
var headerSize = 16;
|
||||
if (bytecode.Length <= headerSize)
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
// Parse the marshalled code object
|
||||
var codeSection = bytecode[headerSize..];
|
||||
ExtractFromMarshalledCode(codeSection);
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts imports from a file stream.
|
||||
/// </summary>
|
||||
public async Task<PythonBytecodeImportExtractor> ExtractAsync(Stream stream, CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (stream is null || !stream.CanRead)
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
// Read the entire bytecode file (typically small)
|
||||
var buffer = new byte[stream.Length];
|
||||
var bytesRead = await stream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (bytesRead > 0)
|
||||
{
|
||||
Extract(buffer.AsSpan(0, bytesRead));
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
private void ExtractFromMarshalledCode(ReadOnlySpan<byte> data)
|
||||
{
|
||||
// Look for string table entries that look like module names
|
||||
// Python marshal format: TYPE_CODE (0x63) followed by code object structure
|
||||
|
||||
if (data.Length < 2)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Find code object marker
|
||||
var codeStart = data.IndexOf((byte)0x63); // TYPE_CODE
|
||||
if (codeStart < 0)
|
||||
{
|
||||
// Try alternate scan - look for string sequences that could be imports
|
||||
ScanForModuleStrings(data);
|
||||
return;
|
||||
}
|
||||
|
||||
// The code object contains names tuple which includes import targets
|
||||
// We'll scan for patterns that indicate IMPORT_NAME instructions
|
||||
|
||||
var names = ExtractNameTable(data);
|
||||
var codeBytes = ExtractCodeBytes(data);
|
||||
|
||||
if (codeBytes.Length == 0 || names.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Scan bytecode for import instructions
|
||||
ScanBytecodeForImports(codeBytes, names);
|
||||
}
|
||||
|
||||
private List<string> ExtractNameTable(ReadOnlySpan<byte> data)
|
||||
{
|
||||
var names = new List<string>();
|
||||
|
||||
// Look for small tuple marker (TYPE_SMALL_TUPLE = 0x29) or tuple marker (TYPE_TUPLE = 0x28)
|
||||
// followed by string entries
|
||||
|
||||
var pos = 0;
|
||||
while (pos < data.Length - 4)
|
||||
{
|
||||
var marker = data[pos];
|
||||
|
||||
// TYPE_SHORT_ASCII (0x7A) or TYPE_ASCII (0x61) or TYPE_UNICODE (0x75)
|
||||
if (marker is 0x7A or 0x61 or 0x75 or 0x73 or 0x5A)
|
||||
{
|
||||
var length = marker == 0x7A || marker == 0x5A
|
||||
? data[pos + 1]
|
||||
: (data.Length > pos + 4 ? BinaryPrimitives.ReadInt32LittleEndian(data[(pos + 1)..]) : 0);
|
||||
|
||||
if (length > 0 && length < 256 && pos + 2 + length <= data.Length)
|
||||
{
|
||||
var stringStart = marker is 0x7A or 0x5A ? pos + 2 : pos + 5;
|
||||
if (stringStart + length <= data.Length)
|
||||
{
|
||||
try
|
||||
{
|
||||
var str = Encoding.UTF8.GetString(data.Slice(stringStart, length));
|
||||
if (IsValidModuleName(str))
|
||||
{
|
||||
names.Add(str);
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Invalid UTF-8, skip
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pos++;
|
||||
}
|
||||
|
||||
return names;
|
||||
}
|
||||
|
||||
private static ReadOnlySpan<byte> ExtractCodeBytes(ReadOnlySpan<byte> data)
|
||||
{
|
||||
// Look for TYPE_STRING (0x73) or TYPE_CODE_OBJECT markers that precede bytecode
|
||||
// The bytecode is stored as a bytes object
|
||||
|
||||
for (var i = 0; i < data.Length - 5; i++)
|
||||
{
|
||||
// Look for string/bytes marker followed by reasonable length
|
||||
if (data[i] == 0x73 || data[i] == 0x43) // TYPE_STRING or TYPE_CODE_STRING
|
||||
{
|
||||
var length = BinaryPrimitives.ReadInt32LittleEndian(data[(i + 1)..]);
|
||||
if (length > 0 && length < 100000 && i + 5 + length <= data.Length)
|
||||
{
|
||||
return data.Slice(i + 5, length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ReadOnlySpan<byte>.Empty;
|
||||
}
|
||||
|
||||
private void ScanBytecodeForImports(ReadOnlySpan<byte> code, List<string> names)
|
||||
{
|
||||
// Python 3.8-3.10: 2-byte instructions (opcode + arg)
|
||||
// Python 3.11+: variable-length instructions with EXTENDED_ARG
|
||||
|
||||
var i = 0;
|
||||
while (i < code.Length - 1)
|
||||
{
|
||||
var opcode = code[i];
|
||||
var arg = code[i + 1];
|
||||
|
||||
if (opcode == IMPORT_NAME || opcode == IMPORT_NAME_311)
|
||||
{
|
||||
// arg is index into names tuple
|
||||
if (arg < names.Count)
|
||||
{
|
||||
var moduleName = names[arg];
|
||||
AddImport(moduleName, PythonImportKind.Import);
|
||||
}
|
||||
}
|
||||
else if (opcode == IMPORT_FROM)
|
||||
{
|
||||
if (arg < names.Count)
|
||||
{
|
||||
var name = names[arg];
|
||||
// IMPORT_FROM follows IMPORT_NAME, so we just note the imported name
|
||||
// The module is already added by IMPORT_NAME
|
||||
}
|
||||
}
|
||||
else if (opcode == IMPORT_STAR)
|
||||
{
|
||||
// Star import - the module is from previous IMPORT_NAME
|
||||
}
|
||||
|
||||
i += 2; // Basic instruction size
|
||||
}
|
||||
}
|
||||
|
||||
private void ScanForModuleStrings(ReadOnlySpan<byte> data)
|
||||
{
|
||||
// Fallback: scan for string patterns that look like module names
|
||||
// This is less accurate but works when we can't parse the marshal format
|
||||
|
||||
var currentString = new StringBuilder();
|
||||
var stringStart = false;
|
||||
|
||||
for (var i = 0; i < data.Length; i++)
|
||||
{
|
||||
var b = data[i];
|
||||
|
||||
if (b >= 0x20 && b <= 0x7E) // Printable ASCII
|
||||
{
|
||||
var c = (char)b;
|
||||
if (char.IsLetterOrDigit(c) || c == '_' || c == '.')
|
||||
{
|
||||
currentString.Append(c);
|
||||
stringStart = true;
|
||||
}
|
||||
else if (stringStart)
|
||||
{
|
||||
CheckAndAddModuleName(currentString.ToString());
|
||||
currentString.Clear();
|
||||
stringStart = false;
|
||||
}
|
||||
}
|
||||
else if (stringStart)
|
||||
{
|
||||
CheckAndAddModuleName(currentString.ToString());
|
||||
currentString.Clear();
|
||||
stringStart = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (currentString.Length > 0)
|
||||
{
|
||||
CheckAndAddModuleName(currentString.ToString());
|
||||
}
|
||||
}
|
||||
|
||||
private void CheckAndAddModuleName(string str)
|
||||
{
|
||||
if (string.IsNullOrEmpty(str) || str.Length < 2)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Filter for likely module names
|
||||
if (!IsValidModuleName(str))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip Python internals and unlikely imports
|
||||
if (str.StartsWith("__") && str.EndsWith("__"))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Check against known standard library modules for higher confidence
|
||||
var isStdLib = IsStandardLibraryModule(str);
|
||||
|
||||
AddImport(str, PythonImportKind.Import, isStdLib ? PythonImportConfidence.Medium : PythonImportConfidence.Low);
|
||||
}
|
||||
|
||||
private void AddImport(string module, PythonImportKind kind, PythonImportConfidence confidence = PythonImportConfidence.Medium)
|
||||
{
|
||||
// Avoid duplicates
|
||||
if (_imports.Any(i => i.Module == module && i.Kind == kind))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var relativeLevel = 0;
|
||||
var moduleName = module;
|
||||
|
||||
// Handle relative imports (leading dots)
|
||||
while (moduleName.StartsWith('.'))
|
||||
{
|
||||
relativeLevel++;
|
||||
moduleName = moduleName[1..];
|
||||
}
|
||||
|
||||
if (relativeLevel > 0)
|
||||
{
|
||||
kind = PythonImportKind.RelativeImport;
|
||||
}
|
||||
|
||||
_imports.Add(new PythonImport(
|
||||
Module: moduleName,
|
||||
Names: null,
|
||||
Alias: null,
|
||||
Kind: kind,
|
||||
RelativeLevel: relativeLevel,
|
||||
SourceFile: _sourceFile,
|
||||
LineNumber: null, // Line numbers not available in bytecode
|
||||
Confidence: confidence));
|
||||
}
|
||||
|
||||
private static bool IsValidModuleName(string str)
|
||||
{
|
||||
if (string.IsNullOrEmpty(str))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Must not be all digits
|
||||
if (str.All(char.IsDigit))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Must start with letter or underscore
|
||||
if (!char.IsLetter(str[0]) && str[0] != '_')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Must contain only valid identifier characters and dots
|
||||
foreach (var c in str)
|
||||
{
|
||||
if (!char.IsLetterOrDigit(c) && c != '_' && c != '.')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Reject very long names
|
||||
if (str.Length > 100)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static bool IsStandardLibraryModule(string module)
|
||||
{
|
||||
var topLevel = module.Split('.')[0];
|
||||
|
||||
// Common standard library modules (non-exhaustive)
|
||||
return topLevel switch
|
||||
{
|
||||
"os" or "sys" or "re" or "io" or "json" or "time" or "datetime" => true,
|
||||
"collections" or "itertools" or "functools" or "operator" => true,
|
||||
"pathlib" or "shutil" or "glob" or "fnmatch" or "linecache" => true,
|
||||
"pickle" or "shelve" or "dbm" or "sqlite3" => true,
|
||||
"zlib" or "gzip" or "bz2" or "lzma" or "zipfile" or "tarfile" => true,
|
||||
"csv" or "configparser" or "tomllib" => true,
|
||||
"hashlib" or "hmac" or "secrets" => true,
|
||||
"logging" or "warnings" or "traceback" => true,
|
||||
"threading" or "multiprocessing" or "concurrent" or "subprocess" or "sched" => true,
|
||||
"asyncio" or "socket" or "ssl" or "select" or "selectors" => true,
|
||||
"email" or "html" or "xml" or "urllib" or "http" => true,
|
||||
"unittest" or "doctest" or "pytest" => true,
|
||||
"typing" or "types" or "abc" or "contextlib" or "dataclasses" => true,
|
||||
"importlib" or "pkgutil" or "zipimport" => true,
|
||||
"copy" or "pprint" or "enum" or "graphlib" => true,
|
||||
"math" or "cmath" or "decimal" or "fractions" or "random" or "statistics" => true,
|
||||
"struct" or "codecs" or "unicodedata" or "stringprep" => true,
|
||||
"ctypes" or "ffi" => true,
|
||||
_ => false
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,149 @@
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Imports;
|
||||
|
||||
/// <summary>
|
||||
/// Confidence level for import resolution.
|
||||
/// </summary>
|
||||
internal enum PythonImportConfidence
|
||||
{
|
||||
/// <summary>
|
||||
/// Low confidence - dynamic or uncertain import target.
|
||||
/// </summary>
|
||||
Low = 0,
|
||||
|
||||
/// <summary>
|
||||
/// Medium confidence - inferred from context.
|
||||
/// </summary>
|
||||
Medium = 1,
|
||||
|
||||
/// <summary>
|
||||
/// High confidence - clear static import.
|
||||
/// </summary>
|
||||
High = 2,
|
||||
|
||||
/// <summary>
|
||||
/// Definitive - direct static import statement.
|
||||
/// </summary>
|
||||
Definitive = 3
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a Python import statement.
|
||||
/// </summary>
|
||||
/// <param name="Module">The module being imported (e.g., 'os.path').</param>
|
||||
/// <param name="Names">Names imported from the module (null for 'import x', names for 'from x import a, b').</param>
|
||||
/// <param name="Alias">Alias if present (e.g., 'import numpy as np').</param>
|
||||
/// <param name="Kind">The kind of import statement.</param>
|
||||
/// <param name="RelativeLevel">Number of dots for relative imports (0 = absolute).</param>
|
||||
/// <param name="SourceFile">The file containing this import.</param>
|
||||
/// <param name="LineNumber">Line number in source file (1-based, null if from bytecode).</param>
|
||||
/// <param name="Confidence">Confidence level for this import.</param>
|
||||
/// <param name="IsConditional">Whether this import is inside a try/except or if block.</param>
|
||||
/// <param name="IsLazy">Whether this import is inside a function (not at module level).</param>
|
||||
/// <param name="IsTypeCheckingOnly">Whether this import is inside TYPE_CHECKING block.</param>
|
||||
internal sealed record PythonImport(
|
||||
string Module,
|
||||
IReadOnlyList<PythonImportedName>? Names,
|
||||
string? Alias,
|
||||
PythonImportKind Kind,
|
||||
int RelativeLevel,
|
||||
string SourceFile,
|
||||
int? LineNumber,
|
||||
PythonImportConfidence Confidence,
|
||||
bool IsConditional = false,
|
||||
bool IsLazy = false,
|
||||
bool IsTypeCheckingOnly = false)
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets whether this is a relative import.
|
||||
/// </summary>
|
||||
public bool IsRelative => RelativeLevel > 0;
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether this is a star import.
|
||||
/// </summary>
|
||||
public bool IsStar => Names?.Count == 1 && Names[0].Name == "*";
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether this is a future import.
|
||||
/// </summary>
|
||||
public bool IsFuture => Module == "__future__";
|
||||
|
||||
/// <summary>
|
||||
/// Gets the fully qualified module name for absolute imports.
|
||||
/// For relative imports, this returns the relative notation (e.g., '.foo').
|
||||
/// </summary>
|
||||
public string QualifiedModule
|
||||
{
|
||||
get
|
||||
{
|
||||
if (!IsRelative)
|
||||
{
|
||||
return Module;
|
||||
}
|
||||
|
||||
var prefix = new string('.', RelativeLevel);
|
||||
return string.IsNullOrEmpty(Module) ? prefix : $"{prefix}{Module}";
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets all imported names as strings (for 'from x import a, b' returns ['a', 'b']).
|
||||
/// </summary>
|
||||
public IEnumerable<string> ImportedNames =>
|
||||
Names?.Select(static n => n.Name) ?? Enumerable.Empty<string>();
|
||||
|
||||
/// <summary>
|
||||
/// Gets metadata entries for this import.
|
||||
/// </summary>
|
||||
public IEnumerable<KeyValuePair<string, string?>> ToMetadata(int index)
|
||||
{
|
||||
var prefix = $"import[{index}]";
|
||||
yield return new KeyValuePair<string, string?>($"{prefix}.module", QualifiedModule);
|
||||
yield return new KeyValuePair<string, string?>($"{prefix}.kind", Kind.ToString());
|
||||
yield return new KeyValuePair<string, string?>($"{prefix}.confidence", Confidence.ToString());
|
||||
|
||||
if (Alias is not null)
|
||||
{
|
||||
yield return new KeyValuePair<string, string?>($"{prefix}.alias", Alias);
|
||||
}
|
||||
|
||||
if (Names is { Count: > 0 } && !IsStar)
|
||||
{
|
||||
var names = string.Join(',', ImportedNames);
|
||||
yield return new KeyValuePair<string, string?>($"{prefix}.names", names);
|
||||
}
|
||||
|
||||
if (LineNumber.HasValue)
|
||||
{
|
||||
yield return new KeyValuePair<string, string?>($"{prefix}.line", LineNumber.Value.ToString());
|
||||
}
|
||||
|
||||
if (IsConditional)
|
||||
{
|
||||
yield return new KeyValuePair<string, string?>($"{prefix}.conditional", "true");
|
||||
}
|
||||
|
||||
if (IsLazy)
|
||||
{
|
||||
yield return new KeyValuePair<string, string?>($"{prefix}.lazy", "true");
|
||||
}
|
||||
|
||||
if (IsTypeCheckingOnly)
|
||||
{
|
||||
yield return new KeyValuePair<string, string?>($"{prefix}.typeCheckingOnly", "true");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a name imported from a module.
|
||||
/// </summary>
|
||||
/// <param name="Name">The imported name.</param>
|
||||
/// <param name="Alias">Optional alias for the name.</param>
|
||||
internal sealed record PythonImportedName(string Name, string? Alias = null)
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the effective name (alias if present, otherwise name).
|
||||
/// </summary>
|
||||
public string EffectiveName => Alias ?? Name;
|
||||
}
|
||||
@@ -0,0 +1,381 @@
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Imports;
|
||||
|
||||
/// <summary>
|
||||
/// Result of Python import analysis.
|
||||
/// </summary>
|
||||
internal sealed class PythonImportAnalysis
|
||||
{
|
||||
private PythonImportAnalysis(
|
||||
PythonImportGraph graph,
|
||||
IReadOnlyList<PythonImport> allImports,
|
||||
IReadOnlyList<PythonImport> standardLibraryImports,
|
||||
IReadOnlyList<PythonImport> thirdPartyImports,
|
||||
IReadOnlyList<PythonImport> localImports,
|
||||
IReadOnlyList<PythonImport> relativeImports,
|
||||
IReadOnlyList<PythonImport> dynamicImports,
|
||||
IReadOnlyList<IReadOnlyList<string>> cycles)
|
||||
{
|
||||
Graph = graph;
|
||||
AllImports = allImports;
|
||||
StandardLibraryImports = standardLibraryImports;
|
||||
ThirdPartyImports = thirdPartyImports;
|
||||
LocalImports = localImports;
|
||||
RelativeImports = relativeImports;
|
||||
DynamicImports = dynamicImports;
|
||||
Cycles = cycles;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the import graph.
|
||||
/// </summary>
|
||||
public PythonImportGraph Graph { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets all discovered imports.
|
||||
/// </summary>
|
||||
public IReadOnlyList<PythonImport> AllImports { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets imports from the Python standard library.
|
||||
/// </summary>
|
||||
public IReadOnlyList<PythonImport> StandardLibraryImports { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets imports from third-party packages.
|
||||
/// </summary>
|
||||
public IReadOnlyList<PythonImport> ThirdPartyImports { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets imports from local modules (resolved in the project).
|
||||
/// </summary>
|
||||
public IReadOnlyList<PythonImport> LocalImports { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets relative imports.
|
||||
/// </summary>
|
||||
public IReadOnlyList<PythonImport> RelativeImports { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets dynamic imports (importlib.import_module, __import__, etc.).
|
||||
/// </summary>
|
||||
public IReadOnlyList<PythonImport> DynamicImports { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets detected import cycles.
|
||||
/// </summary>
|
||||
public IReadOnlyList<IReadOnlyList<string>> Cycles { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether there are any import cycles.
|
||||
/// </summary>
|
||||
public bool HasCycles => Cycles.Count > 0;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the total number of modules in the graph.
|
||||
/// </summary>
|
||||
public int TotalModules => Graph.Modules.Count;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the number of unresolved modules.
|
||||
/// </summary>
|
||||
public int UnresolvedModuleCount => Graph.UnresolvedModules.Count;
|
||||
|
||||
/// <summary>
|
||||
/// Analyzes imports from a virtual filesystem.
|
||||
/// </summary>
|
||||
public static async Task<PythonImportAnalysis> AnalyzeAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string rootPath,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var graph = new PythonImportGraph(vfs, rootPath);
|
||||
await graph.BuildAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var allImports = graph.ImportsByFile.Values
|
||||
.SelectMany(static list => list)
|
||||
.OrderBy(static i => i.SourceFile, StringComparer.Ordinal)
|
||||
.ThenBy(static i => i.LineNumber ?? int.MaxValue)
|
||||
.ToList();
|
||||
|
||||
var standardLibrary = allImports
|
||||
.Where(static i => IsStandardLibrary(i.Module))
|
||||
.ToList();
|
||||
|
||||
var thirdParty = new List<PythonImport>();
|
||||
var local = new List<PythonImport>();
|
||||
|
||||
foreach (var import in allImports)
|
||||
{
|
||||
if (standardLibrary.Contains(import))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (import.IsRelative)
|
||||
{
|
||||
continue; // Handled separately
|
||||
}
|
||||
|
||||
// Check if the module is in the graph (local) or not (third-party)
|
||||
if (graph.Modules.ContainsKey(import.Module) ||
|
||||
graph.Modules.ContainsKey(import.Module.Split('.')[0]))
|
||||
{
|
||||
local.Add(import);
|
||||
}
|
||||
else
|
||||
{
|
||||
thirdParty.Add(import);
|
||||
}
|
||||
}
|
||||
|
||||
var relative = allImports
|
||||
.Where(static i => i.IsRelative)
|
||||
.ToList();
|
||||
|
||||
var dynamic = allImports
|
||||
.Where(static i => i.Kind is PythonImportKind.ImportlibImportModule or
|
||||
PythonImportKind.BuiltinImport or
|
||||
PythonImportKind.PkgutilExtendPath)
|
||||
.ToList();
|
||||
|
||||
var cycles = graph.FindCycles();
|
||||
|
||||
return new PythonImportAnalysis(
|
||||
graph,
|
||||
allImports,
|
||||
standardLibrary,
|
||||
thirdParty,
|
||||
local,
|
||||
relative,
|
||||
dynamic,
|
||||
cycles);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates metadata entries for the analysis result.
|
||||
/// </summary>
|
||||
public IEnumerable<KeyValuePair<string, string?>> ToMetadata()
|
||||
{
|
||||
yield return new KeyValuePair<string, string?>("imports.total", AllImports.Count.ToString());
|
||||
yield return new KeyValuePair<string, string?>("imports.stdlib", StandardLibraryImports.Count.ToString());
|
||||
yield return new KeyValuePair<string, string?>("imports.thirdParty", ThirdPartyImports.Count.ToString());
|
||||
yield return new KeyValuePair<string, string?>("imports.local", LocalImports.Count.ToString());
|
||||
yield return new KeyValuePair<string, string?>("imports.relative", RelativeImports.Count.ToString());
|
||||
yield return new KeyValuePair<string, string?>("imports.dynamic", DynamicImports.Count.ToString());
|
||||
yield return new KeyValuePair<string, string?>("imports.modules", TotalModules.ToString());
|
||||
yield return new KeyValuePair<string, string?>("imports.unresolved", UnresolvedModuleCount.ToString());
|
||||
yield return new KeyValuePair<string, string?>("imports.hasCycles", HasCycles.ToString().ToLowerInvariant());
|
||||
|
||||
if (HasCycles)
|
||||
{
|
||||
yield return new KeyValuePair<string, string?>("imports.cycleCount", Cycles.Count.ToString());
|
||||
}
|
||||
|
||||
// List unresolved third-party modules
|
||||
var thirdPartyModules = ThirdPartyImports
|
||||
.Select(static i => i.Module.Split('.')[0])
|
||||
.Distinct(StringComparer.Ordinal)
|
||||
.OrderBy(static m => m, StringComparer.Ordinal)
|
||||
.ToList();
|
||||
|
||||
if (thirdPartyModules.Count > 0)
|
||||
{
|
||||
var modules = string.Join(';', thirdPartyModules);
|
||||
yield return new KeyValuePair<string, string?>("imports.thirdParty.modules", modules);
|
||||
}
|
||||
|
||||
// List dynamic import targets
|
||||
if (DynamicImports.Count > 0)
|
||||
{
|
||||
var dynamicModules = DynamicImports
|
||||
.Select(static i => i.Module)
|
||||
.Distinct(StringComparer.Ordinal)
|
||||
.OrderBy(static m => m, StringComparer.Ordinal)
|
||||
.ToList();
|
||||
|
||||
var modules = string.Join(';', dynamicModules);
|
||||
yield return new KeyValuePair<string, string?>("imports.dynamic.modules", modules);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets imports by kind.
|
||||
/// </summary>
|
||||
public IEnumerable<PythonImport> GetByKind(PythonImportKind kind) =>
|
||||
AllImports.Where(i => i.Kind == kind);
|
||||
|
||||
/// <summary>
|
||||
/// Gets imports by confidence level.
|
||||
/// </summary>
|
||||
public IEnumerable<PythonImport> GetByConfidence(PythonImportConfidence minConfidence) =>
|
||||
AllImports.Where(i => i.Confidence >= minConfidence);
|
||||
|
||||
/// <summary>
|
||||
/// Gets imports for a specific module.
|
||||
/// </summary>
|
||||
public IEnumerable<PythonImport> GetImportsFor(string modulePath) =>
|
||||
AllImports.Where(i => i.Module == modulePath || i.Module.StartsWith($"{modulePath}.", StringComparison.Ordinal));
|
||||
|
||||
/// <summary>
|
||||
/// Gets the transitive dependencies of a module.
|
||||
/// </summary>
|
||||
public IReadOnlySet<string> GetTransitiveDependencies(string modulePath)
|
||||
{
|
||||
var result = new HashSet<string>(StringComparer.Ordinal);
|
||||
var queue = new Queue<string>();
|
||||
queue.Enqueue(modulePath);
|
||||
|
||||
while (queue.Count > 0)
|
||||
{
|
||||
var current = queue.Dequeue();
|
||||
foreach (var dep in Graph.GetDependencies(current))
|
||||
{
|
||||
if (result.Add(dep.ModulePath))
|
||||
{
|
||||
queue.Enqueue(dep.ModulePath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the transitive dependents of a module.
|
||||
/// </summary>
|
||||
public IReadOnlySet<string> GetTransitiveDependents(string modulePath)
|
||||
{
|
||||
var result = new HashSet<string>(StringComparer.Ordinal);
|
||||
var queue = new Queue<string>();
|
||||
queue.Enqueue(modulePath);
|
||||
|
||||
while (queue.Count > 0)
|
||||
{
|
||||
var current = queue.Dequeue();
|
||||
foreach (var dep in Graph.GetDependents(current))
|
||||
{
|
||||
if (result.Add(dep.ModulePath))
|
||||
{
|
||||
queue.Enqueue(dep.ModulePath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static bool IsStandardLibrary(string module)
|
||||
{
|
||||
var topLevel = module.Split('.')[0];
|
||||
|
||||
// Python standard library modules
|
||||
return topLevel switch
|
||||
{
|
||||
// Built-in types and functions
|
||||
"builtins" or "__future__" or "abc" or "types" or "typing" => true,
|
||||
"typing_extensions" => false, // This is third-party
|
||||
|
||||
// String and text processing
|
||||
"string" or "re" or "difflib" or "textwrap" or "unicodedata" or "stringprep" => true,
|
||||
"readline" or "rlcompleter" => true,
|
||||
|
||||
// Binary data
|
||||
"struct" or "codecs" => true,
|
||||
|
||||
// Data types
|
||||
"datetime" or "zoneinfo" or "calendar" or "collections" or "heapq" => true,
|
||||
"bisect" or "array" or "weakref" or "types" or "copy" or "pprint" => true,
|
||||
"reprlib" or "enum" or "graphlib" => true,
|
||||
|
||||
// Numeric and math
|
||||
"numbers" or "math" or "cmath" or "decimal" or "fractions" => true,
|
||||
"random" or "statistics" => true,
|
||||
|
||||
// Functional programming
|
||||
"itertools" or "functools" or "operator" => true,
|
||||
|
||||
// File and directory
|
||||
"pathlib" or "fileinput" or "stat" or "filecmp" or "tempfile" => true,
|
||||
"glob" or "fnmatch" or "linecache" or "shutil" => true,
|
||||
|
||||
// Data persistence
|
||||
"pickle" or "copyreg" or "shelve" or "marshal" or "dbm" or "sqlite3" => true,
|
||||
|
||||
// Compression
|
||||
"zlib" or "gzip" or "bz2" or "lzma" or "zipfile" or "tarfile" => true,
|
||||
|
||||
// File formats
|
||||
"csv" or "configparser" or "tomllib" or "netrc" or "plistlib" => true,
|
||||
|
||||
// Cryptographic
|
||||
"hashlib" or "hmac" or "secrets" => true,
|
||||
|
||||
// OS services
|
||||
"os" or "io" or "time" or "argparse" or "getopt" or "logging" => true,
|
||||
"getpass" or "curses" or "platform" or "errno" or "ctypes" => true,
|
||||
|
||||
// Concurrent execution
|
||||
"threading" or "multiprocessing" or "concurrent" or "subprocess" => true,
|
||||
"sched" or "queue" or "_thread" => true,
|
||||
|
||||
// Networking
|
||||
"asyncio" or "socket" or "ssl" or "select" or "selectors" => true,
|
||||
"signal" or "mmap" => true,
|
||||
|
||||
// Internet protocols
|
||||
"email" or "json" or "mailbox" or "mimetypes" or "base64" => true,
|
||||
"binascii" or "quopri" or "html" or "xml" => true,
|
||||
|
||||
// Internet protocols - urllib, http
|
||||
"urllib" or "http" or "ftplib" or "poplib" or "imaplib" => true,
|
||||
"smtplib" or "uuid" or "socketserver" or "xmlrpc" or "ipaddress" => true,
|
||||
|
||||
// Multimedia
|
||||
"wave" or "colorsys" => true,
|
||||
|
||||
// Internationalization
|
||||
"gettext" or "locale" => true,
|
||||
|
||||
// Program frameworks
|
||||
"turtle" or "cmd" or "shlex" => true,
|
||||
|
||||
// Development tools
|
||||
"typing" or "pydoc" or "doctest" or "unittest" => true,
|
||||
"test" or "test.support" => true,
|
||||
|
||||
// Debugging
|
||||
"bdb" or "faulthandler" or "pdb" or "timeit" or "trace" or "tracemalloc" => true,
|
||||
|
||||
// Software packaging
|
||||
"distutils" or "ensurepip" or "venv" or "zipapp" => true,
|
||||
|
||||
// Python runtime
|
||||
"sys" or "sysconfig" or "builtins" or "warnings" or "dataclasses" => true,
|
||||
"contextlib" or "atexit" or "traceback" or "gc" or "inspect" or "site" => true,
|
||||
|
||||
// Import system
|
||||
"importlib" or "pkgutil" or "modulefinder" or "runpy" or "zipimport" => true,
|
||||
|
||||
// Language services
|
||||
"ast" or "symtable" or "token" or "keyword" or "tokenize" or "tabnanny" => true,
|
||||
"pyclbr" or "py_compile" or "compileall" or "dis" or "pickletools" => true,
|
||||
|
||||
// MS Windows specific
|
||||
"msvcrt" or "winreg" or "winsound" => true,
|
||||
|
||||
// Unix specific
|
||||
"posix" or "pwd" or "grp" or "termios" or "tty" or "pty" => true,
|
||||
"fcntl" or "pipes" or "resource" or "syslog" => true,
|
||||
|
||||
// Superseded modules
|
||||
"aifc" or "audioop" or "cgi" or "cgitb" or "chunk" or "crypt" => true,
|
||||
"imghdr" or "mailcap" or "msilib" or "nis" or "nntplib" or "optparse" => true,
|
||||
"ossaudiodev" or "pipes" or "sndhdr" or "spwd" or "sunau" or "telnetlib" => true,
|
||||
"uu" or "xdrlib" => true,
|
||||
|
||||
_ => false
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,570 @@
|
||||
using System.Collections.Frozen;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Imports;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a node in the import graph.
|
||||
/// </summary>
|
||||
/// <param name="ModulePath">The fully qualified module path.</param>
|
||||
/// <param name="VirtualPath">The virtual file path in the VFS.</param>
|
||||
/// <param name="IsPackage">Whether this is a package (__init__.py).</param>
|
||||
/// <param name="IsNamespacePackage">Whether this is a namespace package (no __init__.py).</param>
|
||||
/// <param name="Source">The file source type.</param>
|
||||
internal sealed record PythonModuleNode(
|
||||
string ModulePath,
|
||||
string? VirtualPath,
|
||||
bool IsPackage,
|
||||
bool IsNamespacePackage,
|
||||
PythonFileSource Source)
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets whether this module was resolved to a file.
|
||||
/// </summary>
|
||||
public bool IsResolved => VirtualPath is not null;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the top-level package name.
|
||||
/// </summary>
|
||||
public string TopLevelPackage
|
||||
{
|
||||
get
|
||||
{
|
||||
var dotIndex = ModulePath.IndexOf('.');
|
||||
return dotIndex < 0 ? ModulePath : ModulePath[..dotIndex];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents an edge in the import graph.
|
||||
/// </summary>
|
||||
/// <param name="From">The importing module.</param>
|
||||
/// <param name="To">The imported module.</param>
|
||||
/// <param name="Import">The import statement that created this edge.</param>
|
||||
internal sealed record PythonImportEdge(
|
||||
string From,
|
||||
string To,
|
||||
PythonImport Import)
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the edge key for deduplication.
|
||||
/// </summary>
|
||||
public string Key => $"{From}->{To}";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds and represents a Python import dependency graph.
|
||||
/// </summary>
|
||||
internal sealed class PythonImportGraph
|
||||
{
|
||||
private readonly PythonVirtualFileSystem _vfs;
|
||||
private readonly string _rootPath;
|
||||
private readonly Dictionary<string, PythonModuleNode> _modules = new(StringComparer.Ordinal);
|
||||
private readonly Dictionary<string, List<PythonImportEdge>> _edges = new(StringComparer.Ordinal);
|
||||
private readonly Dictionary<string, List<PythonImportEdge>> _reverseEdges = new(StringComparer.Ordinal);
|
||||
private readonly Dictionary<string, List<PythonImport>> _importsByFile = new(StringComparer.Ordinal);
|
||||
private readonly HashSet<string> _unresolvedModules = new(StringComparer.Ordinal);
|
||||
|
||||
public PythonImportGraph(PythonVirtualFileSystem vfs, string rootPath)
|
||||
{
|
||||
_vfs = vfs ?? throw new ArgumentNullException(nameof(vfs));
|
||||
_rootPath = rootPath ?? throw new ArgumentNullException(nameof(rootPath));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets all modules in the graph.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, PythonModuleNode> Modules => _modules;
|
||||
|
||||
/// <summary>
|
||||
/// Gets edges (dependencies) by source module.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, List<PythonImportEdge>> Edges => _edges;
|
||||
|
||||
/// <summary>
|
||||
/// Gets reverse edges (dependents) by target module.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, List<PythonImportEdge>> ReverseEdges => _reverseEdges;
|
||||
|
||||
/// <summary>
|
||||
/// Gets all imports grouped by source file.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, List<PythonImport>> ImportsByFile => _importsByFile;
|
||||
|
||||
/// <summary>
|
||||
/// Gets modules that could not be resolved to files.
|
||||
/// </summary>
|
||||
public IReadOnlySet<string> UnresolvedModules => _unresolvedModules;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the total number of import statements.
|
||||
/// </summary>
|
||||
public int TotalImports => _importsByFile.Values.Sum(static list => list.Count);
|
||||
|
||||
/// <summary>
|
||||
/// Builds the import graph from the virtual filesystem.
|
||||
/// </summary>
|
||||
public async Task<PythonImportGraph> BuildAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
// First, discover all modules in the VFS
|
||||
DiscoverModules();
|
||||
|
||||
// Then extract imports from each Python file
|
||||
await ExtractImportsAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Finally, build edges from imports
|
||||
BuildEdges();
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the dependencies of a module (modules it imports).
|
||||
/// </summary>
|
||||
public IEnumerable<PythonModuleNode> GetDependencies(string modulePath)
|
||||
{
|
||||
if (!_edges.TryGetValue(modulePath, out var edges))
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
foreach (var edge in edges)
|
||||
{
|
||||
if (_modules.TryGetValue(edge.To, out var node))
|
||||
{
|
||||
yield return node;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the dependents of a module (modules that import it).
|
||||
/// </summary>
|
||||
public IEnumerable<PythonModuleNode> GetDependents(string modulePath)
|
||||
{
|
||||
if (!_reverseEdges.TryGetValue(modulePath, out var edges))
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
foreach (var edge in edges)
|
||||
{
|
||||
if (_modules.TryGetValue(edge.From, out var node))
|
||||
{
|
||||
yield return node;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets imports for a specific file.
|
||||
/// </summary>
|
||||
public IReadOnlyList<PythonImport> GetImportsForFile(string virtualPath)
|
||||
{
|
||||
return _importsByFile.TryGetValue(virtualPath, out var imports)
|
||||
? imports
|
||||
: Array.Empty<PythonImport>();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if there's a cyclic dependency involving the given module.
|
||||
/// </summary>
|
||||
public bool HasCycle(string modulePath)
|
||||
{
|
||||
var visited = new HashSet<string>(StringComparer.Ordinal);
|
||||
var stack = new HashSet<string>(StringComparer.Ordinal);
|
||||
|
||||
return HasCycleInternal(modulePath, visited, stack);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets all cyclic dependencies in the graph.
|
||||
/// </summary>
|
||||
public IReadOnlyList<IReadOnlyList<string>> FindCycles()
|
||||
{
|
||||
var cycles = new List<IReadOnlyList<string>>();
|
||||
var visited = new HashSet<string>(StringComparer.Ordinal);
|
||||
var stack = new List<string>();
|
||||
var onStack = new HashSet<string>(StringComparer.Ordinal);
|
||||
|
||||
foreach (var module in _modules.Keys)
|
||||
{
|
||||
FindCyclesInternal(module, visited, stack, onStack, cycles);
|
||||
}
|
||||
|
||||
return cycles;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a topological ordering of modules (if no cycles).
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? GetTopologicalOrder()
|
||||
{
|
||||
var inDegree = new Dictionary<string, int>(StringComparer.Ordinal);
|
||||
foreach (var module in _modules.Keys)
|
||||
{
|
||||
inDegree[module] = 0;
|
||||
}
|
||||
|
||||
foreach (var edges in _edges.Values)
|
||||
{
|
||||
foreach (var edge in edges)
|
||||
{
|
||||
if (inDegree.ContainsKey(edge.To))
|
||||
{
|
||||
inDegree[edge.To]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var queue = new Queue<string>();
|
||||
foreach (var (module, degree) in inDegree)
|
||||
{
|
||||
if (degree == 0)
|
||||
{
|
||||
queue.Enqueue(module);
|
||||
}
|
||||
}
|
||||
|
||||
var result = new List<string>();
|
||||
while (queue.Count > 0)
|
||||
{
|
||||
var module = queue.Dequeue();
|
||||
result.Add(module);
|
||||
|
||||
if (_edges.TryGetValue(module, out var edges))
|
||||
{
|
||||
foreach (var edge in edges)
|
||||
{
|
||||
if (inDegree.ContainsKey(edge.To))
|
||||
{
|
||||
inDegree[edge.To]--;
|
||||
if (inDegree[edge.To] == 0)
|
||||
{
|
||||
queue.Enqueue(edge.To);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If not all modules are in result, there's a cycle
|
||||
return result.Count == _modules.Count ? result : null;
|
||||
}
|
||||
|
||||
private void DiscoverModules()
|
||||
{
|
||||
foreach (var file in _vfs.Files)
|
||||
{
|
||||
if (!file.IsPythonSource && !file.IsBytecode)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var modulePath = VirtualPathToModulePath(file.VirtualPath);
|
||||
if (string.IsNullOrEmpty(modulePath))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var isPackage = file.VirtualPath.EndsWith("/__init__.py", StringComparison.Ordinal) ||
|
||||
file.VirtualPath == "__init__.py";
|
||||
|
||||
_modules[modulePath] = new PythonModuleNode(
|
||||
ModulePath: modulePath,
|
||||
VirtualPath: file.VirtualPath,
|
||||
IsPackage: isPackage,
|
||||
IsNamespacePackage: false,
|
||||
Source: file.Source);
|
||||
|
||||
// Also add parent packages
|
||||
AddParentPackages(modulePath, file.Source);
|
||||
}
|
||||
}
|
||||
|
||||
private void AddParentPackages(string modulePath, PythonFileSource source)
|
||||
{
|
||||
var parts = modulePath.Split('.');
|
||||
var current = string.Empty;
|
||||
|
||||
for (var i = 0; i < parts.Length - 1; i++)
|
||||
{
|
||||
current = string.IsNullOrEmpty(current) ? parts[i] : $"{current}.{parts[i]}";
|
||||
|
||||
if (!_modules.ContainsKey(current))
|
||||
{
|
||||
// Check if __init__.py exists
|
||||
var initPath = current.Replace('.', '/') + "/__init__.py";
|
||||
var exists = _vfs.FileExists(initPath);
|
||||
|
||||
_modules[current] = new PythonModuleNode(
|
||||
ModulePath: current,
|
||||
VirtualPath: exists ? initPath : null,
|
||||
IsPackage: true,
|
||||
IsNamespacePackage: !exists,
|
||||
Source: source);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async Task ExtractImportsAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
foreach (var file in _vfs.Files)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
if (!file.IsPythonSource && !file.IsBytecode)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var imports = await ExtractFileImportsAsync(file, cancellationToken).ConfigureAwait(false);
|
||||
if (imports.Count > 0)
|
||||
{
|
||||
_importsByFile[file.VirtualPath] = imports;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<List<PythonImport>> ExtractFileImportsAsync(
|
||||
PythonVirtualFile file,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
// Skip files from archives for now (need to read from zip)
|
||||
if (file.IsFromArchive)
|
||||
{
|
||||
return new List<PythonImport>();
|
||||
}
|
||||
|
||||
var absolutePath = Path.Combine(_rootPath, file.AbsolutePath);
|
||||
if (!File.Exists(absolutePath))
|
||||
{
|
||||
absolutePath = file.AbsolutePath;
|
||||
if (!File.Exists(absolutePath))
|
||||
{
|
||||
return new List<PythonImport>();
|
||||
}
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
if (file.IsPythonSource)
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(absolutePath, cancellationToken).ConfigureAwait(false);
|
||||
var extractor = new PythonSourceImportExtractor(file.VirtualPath);
|
||||
extractor.Extract(content);
|
||||
return extractor.Imports.ToList();
|
||||
}
|
||||
else if (file.IsBytecode)
|
||||
{
|
||||
var bytes = await File.ReadAllBytesAsync(absolutePath, cancellationToken).ConfigureAwait(false);
|
||||
var extractor = new PythonBytecodeImportExtractor(file.VirtualPath);
|
||||
extractor.Extract(bytes);
|
||||
return extractor.Imports.ToList();
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Skip unreadable files
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
// Skip inaccessible files
|
||||
}
|
||||
|
||||
return new List<PythonImport>();
|
||||
}
|
||||
|
||||
private void BuildEdges()
|
||||
{
|
||||
foreach (var (virtualPath, imports) in _importsByFile)
|
||||
{
|
||||
var sourceModulePath = VirtualPathToModulePath(virtualPath);
|
||||
if (string.IsNullOrEmpty(sourceModulePath))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var import in imports)
|
||||
{
|
||||
var targetModulePath = ResolveImportTarget(import, sourceModulePath);
|
||||
if (string.IsNullOrEmpty(targetModulePath))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var edge = new PythonImportEdge(sourceModulePath, targetModulePath, import);
|
||||
|
||||
// Add forward edge
|
||||
if (!_edges.TryGetValue(sourceModulePath, out var forwardEdges))
|
||||
{
|
||||
forwardEdges = new List<PythonImportEdge>();
|
||||
_edges[sourceModulePath] = forwardEdges;
|
||||
}
|
||||
|
||||
forwardEdges.Add(edge);
|
||||
|
||||
// Add reverse edge
|
||||
if (!_reverseEdges.TryGetValue(targetModulePath, out var reverseEdges))
|
||||
{
|
||||
reverseEdges = new List<PythonImportEdge>();
|
||||
_reverseEdges[targetModulePath] = reverseEdges;
|
||||
}
|
||||
|
||||
reverseEdges.Add(edge);
|
||||
|
||||
// Track unresolved modules
|
||||
if (!_modules.ContainsKey(targetModulePath))
|
||||
{
|
||||
_unresolvedModules.Add(targetModulePath);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private string? ResolveImportTarget(PythonImport import, string sourceModulePath)
|
||||
{
|
||||
if (import.IsRelative)
|
||||
{
|
||||
return ResolveRelativeImport(import, sourceModulePath);
|
||||
}
|
||||
|
||||
return import.Module;
|
||||
}
|
||||
|
||||
private string? ResolveRelativeImport(PythonImport import, string sourceModulePath)
|
||||
{
|
||||
var parts = sourceModulePath.Split('.');
|
||||
|
||||
// Calculate the package to start from
|
||||
// Level 1 (.) = current package
|
||||
// Level 2 (..) = parent package
|
||||
var levelsUp = import.RelativeLevel;
|
||||
|
||||
// If source is not a package (__init__.py), we need to go one more level up
|
||||
var sourceVirtualPath = _modules.TryGetValue(sourceModulePath, out var node) ? node.VirtualPath : null;
|
||||
var isSourcePackage = sourceVirtualPath?.EndsWith("__init__.py", StringComparison.Ordinal) == true;
|
||||
|
||||
if (!isSourcePackage)
|
||||
{
|
||||
levelsUp++;
|
||||
}
|
||||
|
||||
if (levelsUp > parts.Length)
|
||||
{
|
||||
return null; // Invalid relative import (goes beyond top-level package)
|
||||
}
|
||||
|
||||
var baseParts = parts[..^(levelsUp)];
|
||||
var basePackage = string.Join('.', baseParts);
|
||||
|
||||
if (string.IsNullOrEmpty(import.Module))
|
||||
{
|
||||
return string.IsNullOrEmpty(basePackage) ? null : basePackage;
|
||||
}
|
||||
|
||||
return string.IsNullOrEmpty(basePackage)
|
||||
? import.Module
|
||||
: $"{basePackage}.{import.Module}";
|
||||
}
|
||||
|
||||
private static string VirtualPathToModulePath(string virtualPath)
|
||||
{
|
||||
// Remove .py or .pyc extension
|
||||
var path = virtualPath;
|
||||
if (path.EndsWith(".py", StringComparison.Ordinal))
|
||||
{
|
||||
path = path[..^3];
|
||||
}
|
||||
else if (path.EndsWith(".pyc", StringComparison.Ordinal))
|
||||
{
|
||||
path = path[..^4];
|
||||
}
|
||||
|
||||
// Handle __init__ -> package name
|
||||
if (path.EndsWith("/__init__", StringComparison.Ordinal))
|
||||
{
|
||||
path = path[..^9];
|
||||
}
|
||||
else if (path == "__init__")
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
// Convert path separators to dots
|
||||
return path.Replace('/', '.');
|
||||
}
|
||||
|
||||
private bool HasCycleInternal(string module, HashSet<string> visited, HashSet<string> stack)
|
||||
{
|
||||
if (stack.Contains(module))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (visited.Contains(module))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
visited.Add(module);
|
||||
stack.Add(module);
|
||||
|
||||
if (_edges.TryGetValue(module, out var edges))
|
||||
{
|
||||
foreach (var edge in edges)
|
||||
{
|
||||
if (HasCycleInternal(edge.To, visited, stack))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stack.Remove(module);
|
||||
return false;
|
||||
}
|
||||
|
||||
private void FindCyclesInternal(
|
||||
string module,
|
||||
HashSet<string> visited,
|
||||
List<string> stack,
|
||||
HashSet<string> onStack,
|
||||
List<IReadOnlyList<string>> cycles)
|
||||
{
|
||||
if (onStack.Contains(module))
|
||||
{
|
||||
// Found a cycle - extract it from the stack
|
||||
var cycleStart = stack.IndexOf(module);
|
||||
if (cycleStart >= 0)
|
||||
{
|
||||
var cycle = stack.Skip(cycleStart).ToList();
|
||||
cycle.Add(module); // Complete the cycle
|
||||
cycles.Add(cycle);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (visited.Contains(module))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
visited.Add(module);
|
||||
stack.Add(module);
|
||||
onStack.Add(module);
|
||||
|
||||
if (_edges.TryGetValue(module, out var edges))
|
||||
{
|
||||
foreach (var edge in edges)
|
||||
{
|
||||
FindCyclesInternal(edge.To, visited, stack, onStack, cycles);
|
||||
}
|
||||
}
|
||||
|
||||
stack.RemoveAt(stack.Count - 1);
|
||||
onStack.Remove(module);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Imports;
|
||||
|
||||
/// <summary>
|
||||
/// Categorizes Python import statement types.
|
||||
/// </summary>
|
||||
internal enum PythonImportKind
|
||||
{
|
||||
/// <summary>
|
||||
/// Standard import: import foo
|
||||
/// </summary>
|
||||
Import,
|
||||
|
||||
/// <summary>
|
||||
/// From import: from foo import bar
|
||||
/// </summary>
|
||||
FromImport,
|
||||
|
||||
/// <summary>
|
||||
/// Relative import: from . import bar or from ..foo import bar
|
||||
/// </summary>
|
||||
RelativeImport,
|
||||
|
||||
/// <summary>
|
||||
/// Star import: from foo import *
|
||||
/// </summary>
|
||||
StarImport,
|
||||
|
||||
/// <summary>
|
||||
/// Dynamic import via importlib.import_module()
|
||||
/// </summary>
|
||||
ImportlibImportModule,
|
||||
|
||||
/// <summary>
|
||||
/// Dynamic import via __import__()
|
||||
/// </summary>
|
||||
BuiltinImport,
|
||||
|
||||
/// <summary>
|
||||
/// Namespace package extension via pkgutil.extend_path()
|
||||
/// </summary>
|
||||
PkgutilExtendPath,
|
||||
|
||||
/// <summary>
|
||||
/// Conditional import (inside try/except or if block)
|
||||
/// </summary>
|
||||
ConditionalImport,
|
||||
|
||||
/// <summary>
|
||||
/// Lazy import (inside function body, not at module level)
|
||||
/// </summary>
|
||||
LazyImport,
|
||||
|
||||
/// <summary>
|
||||
/// Type checking only import (inside TYPE_CHECKING block)
|
||||
/// </summary>
|
||||
TypeCheckingImport,
|
||||
|
||||
/// <summary>
|
||||
/// Future import: from __future__ import ...
|
||||
/// </summary>
|
||||
FutureImport
|
||||
}
|
||||
@@ -0,0 +1,449 @@
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Imports;
|
||||
|
||||
/// <summary>
|
||||
/// Extracts imports from Python source code using regex-based static analysis.
|
||||
/// </summary>
|
||||
internal sealed partial class PythonSourceImportExtractor
|
||||
{
|
||||
private readonly string _sourceFile;
|
||||
private readonly List<PythonImport> _imports = new();
|
||||
private bool _inTryBlock;
|
||||
private bool _inTypeCheckingBlock;
|
||||
private int _functionDepth;
|
||||
private int _classDepth;
|
||||
|
||||
public PythonSourceImportExtractor(string sourceFile)
|
||||
{
|
||||
_sourceFile = sourceFile ?? throw new ArgumentNullException(nameof(sourceFile));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets all extracted imports.
|
||||
/// </summary>
|
||||
public IReadOnlyList<PythonImport> Imports => _imports;
|
||||
|
||||
/// <summary>
|
||||
/// Extracts imports from Python source code.
|
||||
/// </summary>
|
||||
public PythonSourceImportExtractor Extract(string content)
|
||||
{
|
||||
if (string.IsNullOrEmpty(content))
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
var lines = content.Split('\n');
|
||||
var lineNumber = 0;
|
||||
var continuedLine = string.Empty;
|
||||
|
||||
foreach (var rawLine in lines)
|
||||
{
|
||||
lineNumber++;
|
||||
var line = rawLine.TrimEnd('\r');
|
||||
|
||||
// Handle line continuations
|
||||
if (line.EndsWith('\\'))
|
||||
{
|
||||
continuedLine += line[..^1].Trim() + " ";
|
||||
continue;
|
||||
}
|
||||
|
||||
var fullLine = continuedLine + line;
|
||||
continuedLine = string.Empty;
|
||||
|
||||
// Track context
|
||||
UpdateContext(fullLine.TrimStart());
|
||||
|
||||
// Skip comments and empty lines
|
||||
var trimmed = fullLine.TrimStart();
|
||||
if (string.IsNullOrEmpty(trimmed) || trimmed.StartsWith('#'))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Remove inline comments for parsing
|
||||
var commentIndex = FindCommentStart(trimmed);
|
||||
if (commentIndex >= 0)
|
||||
{
|
||||
trimmed = trimmed[..commentIndex].TrimEnd();
|
||||
}
|
||||
|
||||
if (string.IsNullOrEmpty(trimmed))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try to extract imports
|
||||
ExtractImports(trimmed, lineNumber);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
private void UpdateContext(string line)
|
||||
{
|
||||
// Track try blocks
|
||||
if (line.StartsWith("try:") || line == "try")
|
||||
{
|
||||
_inTryBlock = true;
|
||||
}
|
||||
else if (line.StartsWith("except") || line.StartsWith("finally:") || line == "finally")
|
||||
{
|
||||
_inTryBlock = false;
|
||||
}
|
||||
|
||||
// Track TYPE_CHECKING blocks
|
||||
if (line.Contains("TYPE_CHECKING") && line.Contains("if"))
|
||||
{
|
||||
_inTypeCheckingBlock = true;
|
||||
}
|
||||
|
||||
// Track function depth
|
||||
if (line.StartsWith("def ") || line.StartsWith("async def "))
|
||||
{
|
||||
_functionDepth++;
|
||||
}
|
||||
|
||||
// Track class depth (for nested classes)
|
||||
if (line.StartsWith("class "))
|
||||
{
|
||||
_classDepth++;
|
||||
}
|
||||
|
||||
// Reset context at module level definitions
|
||||
if ((line.StartsWith("def ") || line.StartsWith("class ") || line.StartsWith("async def ")) &&
|
||||
!line.StartsWith(" ") && !line.StartsWith("\t"))
|
||||
{
|
||||
_inTypeCheckingBlock = false;
|
||||
_functionDepth = 0;
|
||||
_classDepth = 0;
|
||||
}
|
||||
}
|
||||
|
||||
private void ExtractImports(string line, int lineNumber)
|
||||
{
|
||||
// Standard import: import foo, bar
|
||||
var importMatch = StandardImportPattern().Match(line);
|
||||
if (importMatch.Success)
|
||||
{
|
||||
ParseStandardImport(importMatch.Groups["modules"].Value, lineNumber);
|
||||
return;
|
||||
}
|
||||
|
||||
// From import: from foo import bar, baz
|
||||
var fromMatch = FromImportPattern().Match(line);
|
||||
if (fromMatch.Success)
|
||||
{
|
||||
ParseFromImport(
|
||||
fromMatch.Groups["dots"].Value,
|
||||
fromMatch.Groups["module"].Value,
|
||||
fromMatch.Groups["names"].Value,
|
||||
lineNumber);
|
||||
return;
|
||||
}
|
||||
|
||||
// importlib.import_module()
|
||||
var importlibMatch = ImportlibPattern().Match(line);
|
||||
if (importlibMatch.Success)
|
||||
{
|
||||
ParseImportlibImport(
|
||||
importlibMatch.Groups["module"].Value,
|
||||
importlibMatch.Groups["package"].Value,
|
||||
lineNumber);
|
||||
return;
|
||||
}
|
||||
|
||||
// __import__()
|
||||
var builtinMatch = BuiltinImportPattern().Match(line);
|
||||
if (builtinMatch.Success)
|
||||
{
|
||||
ParseBuiltinImport(builtinMatch.Groups["module"].Value, lineNumber);
|
||||
return;
|
||||
}
|
||||
|
||||
// pkgutil.extend_path()
|
||||
var pkgutilMatch = PkgutilExtendPathPattern().Match(line);
|
||||
if (pkgutilMatch.Success)
|
||||
{
|
||||
ParsePkgutilExtendPath(pkgutilMatch.Groups["name"].Value, lineNumber);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
private void ParseStandardImport(string modulesStr, int lineNumber)
|
||||
{
|
||||
// import foo, bar as baz, qux
|
||||
var modules = modulesStr.Split(',');
|
||||
|
||||
foreach (var moduleSpec in modules)
|
||||
{
|
||||
var trimmed = moduleSpec.Trim();
|
||||
if (string.IsNullOrEmpty(trimmed))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
string module;
|
||||
string? alias = null;
|
||||
|
||||
// Check for alias
|
||||
var asMatch = AsAliasPattern().Match(trimmed);
|
||||
if (asMatch.Success)
|
||||
{
|
||||
module = asMatch.Groups["name"].Value.Trim();
|
||||
alias = asMatch.Groups["alias"].Value.Trim();
|
||||
}
|
||||
else
|
||||
{
|
||||
module = trimmed;
|
||||
}
|
||||
|
||||
var kind = _inTypeCheckingBlock ? PythonImportKind.TypeCheckingImport : PythonImportKind.Import;
|
||||
|
||||
_imports.Add(new PythonImport(
|
||||
Module: module,
|
||||
Names: null,
|
||||
Alias: alias,
|
||||
Kind: kind,
|
||||
RelativeLevel: 0,
|
||||
SourceFile: _sourceFile,
|
||||
LineNumber: lineNumber,
|
||||
Confidence: PythonImportConfidence.Definitive,
|
||||
IsConditional: _inTryBlock,
|
||||
IsLazy: _functionDepth > 0,
|
||||
IsTypeCheckingOnly: _inTypeCheckingBlock));
|
||||
}
|
||||
}
|
||||
|
||||
private void ParseFromImport(string dots, string module, string namesStr, int lineNumber)
|
||||
{
|
||||
var relativeLevel = dots.Length;
|
||||
var isFuture = module == "__future__";
|
||||
|
||||
// Handle star import
|
||||
if (namesStr.Trim() == "*")
|
||||
{
|
||||
var kind = isFuture
|
||||
? PythonImportKind.FutureImport
|
||||
: relativeLevel > 0
|
||||
? PythonImportKind.RelativeImport
|
||||
: PythonImportKind.StarImport;
|
||||
|
||||
_imports.Add(new PythonImport(
|
||||
Module: module,
|
||||
Names: [new PythonImportedName("*")],
|
||||
Alias: null,
|
||||
Kind: kind,
|
||||
RelativeLevel: relativeLevel,
|
||||
SourceFile: _sourceFile,
|
||||
LineNumber: lineNumber,
|
||||
Confidence: PythonImportConfidence.Definitive,
|
||||
IsConditional: _inTryBlock,
|
||||
IsLazy: _functionDepth > 0,
|
||||
IsTypeCheckingOnly: _inTypeCheckingBlock));
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle parenthesized imports: from foo import (bar, baz)
|
||||
namesStr = namesStr.Trim();
|
||||
if (namesStr.StartsWith('('))
|
||||
{
|
||||
namesStr = namesStr.TrimStart('(').TrimEnd(')');
|
||||
}
|
||||
|
||||
var names = new List<PythonImportedName>();
|
||||
|
||||
foreach (var nameSpec in namesStr.Split(','))
|
||||
{
|
||||
var trimmed = nameSpec.Trim();
|
||||
if (string.IsNullOrEmpty(trimmed))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var asMatch = AsAliasPattern().Match(trimmed);
|
||||
if (asMatch.Success)
|
||||
{
|
||||
names.Add(new PythonImportedName(
|
||||
asMatch.Groups["name"].Value.Trim(),
|
||||
asMatch.Groups["alias"].Value.Trim()));
|
||||
}
|
||||
else
|
||||
{
|
||||
names.Add(new PythonImportedName(trimmed));
|
||||
}
|
||||
}
|
||||
|
||||
var importKind = isFuture
|
||||
? PythonImportKind.FutureImport
|
||||
: relativeLevel > 0
|
||||
? PythonImportKind.RelativeImport
|
||||
: _inTypeCheckingBlock
|
||||
? PythonImportKind.TypeCheckingImport
|
||||
: PythonImportKind.FromImport;
|
||||
|
||||
_imports.Add(new PythonImport(
|
||||
Module: module,
|
||||
Names: names,
|
||||
Alias: null,
|
||||
Kind: importKind,
|
||||
RelativeLevel: relativeLevel,
|
||||
SourceFile: _sourceFile,
|
||||
LineNumber: lineNumber,
|
||||
Confidence: PythonImportConfidence.Definitive,
|
||||
IsConditional: _inTryBlock,
|
||||
IsLazy: _functionDepth > 0,
|
||||
IsTypeCheckingOnly: _inTypeCheckingBlock));
|
||||
}
|
||||
|
||||
private void ParseImportlibImport(string module, string package, int lineNumber)
|
||||
{
|
||||
// importlib.import_module('foo') or importlib.import_module('.foo', 'bar')
|
||||
var moduleName = module.Trim('\'', '"');
|
||||
var relativeLevel = 0;
|
||||
|
||||
// Count leading dots for relative imports
|
||||
while (moduleName.StartsWith('.'))
|
||||
{
|
||||
relativeLevel++;
|
||||
moduleName = moduleName[1..];
|
||||
}
|
||||
|
||||
// If package is specified and module is relative, note it
|
||||
var isRelative = relativeLevel > 0 || !string.IsNullOrEmpty(package);
|
||||
|
||||
_imports.Add(new PythonImport(
|
||||
Module: moduleName,
|
||||
Names: null,
|
||||
Alias: null,
|
||||
Kind: PythonImportKind.ImportlibImportModule,
|
||||
RelativeLevel: relativeLevel,
|
||||
SourceFile: _sourceFile,
|
||||
LineNumber: lineNumber,
|
||||
Confidence: PythonImportConfidence.High,
|
||||
IsConditional: _inTryBlock,
|
||||
IsLazy: _functionDepth > 0,
|
||||
IsTypeCheckingOnly: _inTypeCheckingBlock));
|
||||
}
|
||||
|
||||
private void ParseBuiltinImport(string module, int lineNumber)
|
||||
{
|
||||
var moduleName = module.Trim('\'', '"');
|
||||
|
||||
_imports.Add(new PythonImport(
|
||||
Module: moduleName,
|
||||
Names: null,
|
||||
Alias: null,
|
||||
Kind: PythonImportKind.BuiltinImport,
|
||||
RelativeLevel: 0,
|
||||
SourceFile: _sourceFile,
|
||||
LineNumber: lineNumber,
|
||||
Confidence: PythonImportConfidence.High,
|
||||
IsConditional: _inTryBlock,
|
||||
IsLazy: _functionDepth > 0,
|
||||
IsTypeCheckingOnly: _inTypeCheckingBlock));
|
||||
}
|
||||
|
||||
private void ParsePkgutilExtendPath(string name, int lineNumber)
|
||||
{
|
||||
// pkgutil.extend_path(__path__, __name__)
|
||||
_imports.Add(new PythonImport(
|
||||
Module: name.Trim('\'', '"', ' '),
|
||||
Names: null,
|
||||
Alias: null,
|
||||
Kind: PythonImportKind.PkgutilExtendPath,
|
||||
RelativeLevel: 0,
|
||||
SourceFile: _sourceFile,
|
||||
LineNumber: lineNumber,
|
||||
Confidence: PythonImportConfidence.Medium,
|
||||
IsConditional: _inTryBlock,
|
||||
IsLazy: _functionDepth > 0,
|
||||
IsTypeCheckingOnly: _inTypeCheckingBlock));
|
||||
}
|
||||
|
||||
private static int FindCommentStart(string line)
|
||||
{
|
||||
var inSingleQuote = false;
|
||||
var inDoubleQuote = false;
|
||||
var inTripleSingle = false;
|
||||
var inTripleDouble = false;
|
||||
|
||||
for (var i = 0; i < line.Length; i++)
|
||||
{
|
||||
var c = line[i];
|
||||
var remaining = line.Length - i;
|
||||
|
||||
// Check for triple quotes
|
||||
if (remaining >= 3)
|
||||
{
|
||||
var three = line.Substring(i, 3);
|
||||
if (three == "\"\"\"" && !inSingleQuote && !inTripleSingle)
|
||||
{
|
||||
inTripleDouble = !inTripleDouble;
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (three == "'''" && !inDoubleQuote && !inTripleDouble)
|
||||
{
|
||||
inTripleSingle = !inTripleSingle;
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for single quotes
|
||||
if (c == '"' && !inSingleQuote && !inTripleSingle && !inTripleDouble)
|
||||
{
|
||||
inDoubleQuote = !inDoubleQuote;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == '\'' && !inDoubleQuote && !inTripleSingle && !inTripleDouble)
|
||||
{
|
||||
inSingleQuote = !inSingleQuote;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for comment
|
||||
if (c == '#' && !inSingleQuote && !inDoubleQuote && !inTripleSingle && !inTripleDouble)
|
||||
{
|
||||
return i;
|
||||
}
|
||||
|
||||
// Handle escape sequences
|
||||
if (c == '\\' && i + 1 < line.Length)
|
||||
{
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Standard import: import foo, bar
|
||||
[GeneratedRegex(@"^import\s+(?<modules>.+)$", RegexOptions.Compiled)]
|
||||
private static partial Regex StandardImportPattern();
|
||||
|
||||
// From import: from foo import bar (handles relative with dots)
|
||||
[GeneratedRegex(@"^from\s+(?<dots>\.*)(?<module>[\w.]*)\s+import\s+(?<names>.+)$", RegexOptions.Compiled)]
|
||||
private static partial Regex FromImportPattern();
|
||||
|
||||
// importlib.import_module('module') or importlib.import_module('module', 'package')
|
||||
[GeneratedRegex(@"importlib\.import_module\s*\(\s*(?<module>['""][^'""]+['""])(?:\s*,\s*(?<package>['""][^'""]*['""]))?", RegexOptions.Compiled)]
|
||||
private static partial Regex ImportlibPattern();
|
||||
|
||||
// __import__('module')
|
||||
[GeneratedRegex(@"__import__\s*\(\s*(?<module>['""][^'""]+['""]\s*)", RegexOptions.Compiled)]
|
||||
private static partial Regex BuiltinImportPattern();
|
||||
|
||||
// pkgutil.extend_path(__path__, __name__) or pkgutil.extend_path(__path__, 'name')
|
||||
[GeneratedRegex(@"pkgutil\.extend_path\s*\(\s*__path__\s*,\s*(?<name>__name__|['""][^'""]+['""]\s*)\)", RegexOptions.Compiled)]
|
||||
private static partial Regex PkgutilExtendPathPattern();
|
||||
|
||||
// name as alias
|
||||
[GeneratedRegex(@"^(?<name>[\w.]+)\s+as\s+(?<alias>\w+)$", RegexOptions.Compiled)]
|
||||
private static partial Regex AsAliasPattern();
|
||||
}
|
||||
@@ -0,0 +1,349 @@
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal;
|
||||
|
||||
/// <summary>
|
||||
/// Detects Python runtime in OCI container layers and zipapp archives.
|
||||
/// Parses layers/, .layers/, layer/ directories for Python site-packages.
|
||||
/// </summary>
|
||||
internal static class PythonContainerAdapter
|
||||
{
|
||||
private static readonly string[] LayerRootCandidates = { "layers", ".layers", "layer" };
|
||||
private static readonly string[] SitePackagesPatterns = { "site-packages", "dist-packages" };
|
||||
private static readonly string[] PythonBinaryNames = { "python", "python3", "python3.10", "python3.11", "python3.12", "python3.13" };
|
||||
|
||||
/// <summary>
|
||||
/// Discovers Python site-packages directories from OCI container layers.
|
||||
/// </summary>
|
||||
public static IReadOnlyCollection<string> DiscoverLayerSitePackages(string rootPath)
|
||||
{
|
||||
var discovered = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
var normalizedRoot = EnsureTrailingSeparator(Path.GetFullPath(rootPath));
|
||||
|
||||
// Check for direct layer directories (layer1/, layer2/, etc.)
|
||||
foreach (var directory in EnumerateDirectoriesSafe(rootPath))
|
||||
{
|
||||
var dirName = Path.GetFileName(directory);
|
||||
if (dirName.StartsWith("layer", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
foreach (var sitePackages in DiscoverSitePackagesInDirectory(directory))
|
||||
{
|
||||
if (TryNormalizeUnderRoot(normalizedRoot, sitePackages, out var normalized))
|
||||
{
|
||||
discovered.Add(normalized);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for standard OCI layer root directories
|
||||
foreach (var layerRoot in EnumerateLayerRoots(rootPath))
|
||||
{
|
||||
foreach (var sitePackages in DiscoverSitePackagesInDirectory(layerRoot))
|
||||
{
|
||||
if (TryNormalizeUnderRoot(normalizedRoot, sitePackages, out var normalized))
|
||||
{
|
||||
discovered.Add(normalized);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return discovered
|
||||
.OrderBy(static path => path, StringComparer.OrdinalIgnoreCase)
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Detects Python runtime information from container layers.
|
||||
/// </summary>
|
||||
public static PythonRuntimeInfo? DetectRuntime(string rootPath)
|
||||
{
|
||||
var pythonPaths = new List<string>();
|
||||
var pythonVersions = new SortedSet<string>(StringComparer.Ordinal);
|
||||
var pythonBinaries = new List<string>();
|
||||
|
||||
// Search in standard locations
|
||||
var searchRoots = new List<string> { rootPath };
|
||||
searchRoots.AddRange(EnumerateLayerRoots(rootPath));
|
||||
|
||||
foreach (var searchRoot in searchRoots)
|
||||
{
|
||||
// Look for Python binaries in /usr/bin, /usr/local/bin, /bin
|
||||
var binDirectories = new[]
|
||||
{
|
||||
Path.Combine(searchRoot, "usr", "bin"),
|
||||
Path.Combine(searchRoot, "usr", "local", "bin"),
|
||||
Path.Combine(searchRoot, "bin")
|
||||
};
|
||||
|
||||
foreach (var binDir in binDirectories)
|
||||
{
|
||||
if (!Directory.Exists(binDir))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var binaryName in PythonBinaryNames)
|
||||
{
|
||||
var pythonPath = Path.Combine(binDir, binaryName);
|
||||
if (File.Exists(pythonPath))
|
||||
{
|
||||
pythonBinaries.Add(pythonPath);
|
||||
var version = ExtractVersionFromBinaryName(binaryName);
|
||||
if (!string.IsNullOrEmpty(version))
|
||||
{
|
||||
pythonVersions.Add(version);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Look for Python lib directories to detect version
|
||||
var libDirectories = new[]
|
||||
{
|
||||
Path.Combine(searchRoot, "usr", "lib"),
|
||||
Path.Combine(searchRoot, "usr", "local", "lib"),
|
||||
Path.Combine(searchRoot, "lib")
|
||||
};
|
||||
|
||||
foreach (var libDir in libDirectories)
|
||||
{
|
||||
if (!Directory.Exists(libDir))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var pythonDir in EnumerateDirectoriesSafe(libDir))
|
||||
{
|
||||
var dirName = Path.GetFileName(pythonDir);
|
||||
if (dirName.StartsWith("python", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var version = dirName.Replace("python", "", StringComparison.OrdinalIgnoreCase);
|
||||
if (!string.IsNullOrEmpty(version) && char.IsDigit(version[0]))
|
||||
{
|
||||
pythonVersions.Add(version);
|
||||
pythonPaths.Add(pythonDir);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (pythonVersions.Count == 0 && pythonBinaries.Count == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return new PythonRuntimeInfo(
|
||||
pythonVersions.ToArray(),
|
||||
pythonBinaries.ToArray(),
|
||||
pythonPaths.ToArray());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Discovers dist-info directories within container layers.
|
||||
/// </summary>
|
||||
public static IReadOnlyCollection<string> DiscoverDistInfoDirectories(string rootPath)
|
||||
{
|
||||
var discovered = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var sitePackages in DiscoverLayerSitePackages(rootPath))
|
||||
{
|
||||
foreach (var distInfo in EnumerateDistInfoDirectories(sitePackages))
|
||||
{
|
||||
discovered.Add(distInfo);
|
||||
}
|
||||
}
|
||||
|
||||
// Also check root-level site-packages
|
||||
foreach (var sitePackages in DiscoverSitePackagesInDirectory(rootPath))
|
||||
{
|
||||
foreach (var distInfo in EnumerateDistInfoDirectories(sitePackages))
|
||||
{
|
||||
discovered.Add(distInfo);
|
||||
}
|
||||
}
|
||||
|
||||
return discovered
|
||||
.OrderBy(static path => path, StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
private static IEnumerable<string> EnumerateDistInfoDirectories(string sitePackages)
|
||||
{
|
||||
if (!Directory.Exists(sitePackages))
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
IEnumerable<string>? directories = null;
|
||||
try
|
||||
{
|
||||
directories = Directory.EnumerateDirectories(sitePackages, "*.dist-info");
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
foreach (var directory in directories)
|
||||
{
|
||||
yield return directory;
|
||||
}
|
||||
}
|
||||
|
||||
private static IEnumerable<string> DiscoverSitePackagesInDirectory(string directory)
|
||||
{
|
||||
if (!Directory.Exists(directory))
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
// Search recursively up to 6 levels deep (e.g., usr/lib/python3.11/site-packages)
|
||||
var pending = new Stack<(string Path, int Depth)>();
|
||||
pending.Push((directory, 0));
|
||||
|
||||
while (pending.Count > 0)
|
||||
{
|
||||
var (current, depth) = pending.Pop();
|
||||
var dirName = Path.GetFileName(current);
|
||||
|
||||
if (SitePackagesPatterns.Contains(dirName, StringComparer.OrdinalIgnoreCase))
|
||||
{
|
||||
yield return current;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (depth >= 6)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
IEnumerable<string>? subdirs = null;
|
||||
try
|
||||
{
|
||||
subdirs = Directory.EnumerateDirectories(current);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var subdir in subdirs)
|
||||
{
|
||||
pending.Push((subdir, depth + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static IEnumerable<string> EnumerateLayerRoots(string workspaceRoot)
|
||||
{
|
||||
foreach (var candidate in LayerRootCandidates)
|
||||
{
|
||||
var root = Path.Combine(workspaceRoot, candidate);
|
||||
if (!Directory.Exists(root))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
IEnumerable<string>? directories = null;
|
||||
try
|
||||
{
|
||||
directories = Directory.EnumerateDirectories(root);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var layerDirectory in directories)
|
||||
{
|
||||
// Check for fs/ subdirectory (extracted layer filesystem)
|
||||
var fsDirectory = Path.Combine(layerDirectory, "fs");
|
||||
if (Directory.Exists(fsDirectory))
|
||||
{
|
||||
yield return fsDirectory;
|
||||
}
|
||||
else
|
||||
{
|
||||
yield return layerDirectory;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static IEnumerable<string> EnumerateDirectoriesSafe(string path)
|
||||
{
|
||||
if (!Directory.Exists(path))
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
IEnumerable<string>? directories = null;
|
||||
try
|
||||
{
|
||||
directories = Directory.EnumerateDirectories(path);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
foreach (var dir in directories)
|
||||
{
|
||||
yield return dir;
|
||||
}
|
||||
}
|
||||
|
||||
private static bool TryNormalizeUnderRoot(string normalizedRoot, string path, out string normalizedPath)
|
||||
{
|
||||
normalizedPath = Path.GetFullPath(path);
|
||||
return normalizedPath.StartsWith(normalizedRoot, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private static string EnsureTrailingSeparator(string path)
|
||||
{
|
||||
if (path.EndsWith(Path.DirectorySeparatorChar))
|
||||
{
|
||||
return path;
|
||||
}
|
||||
|
||||
return path + Path.DirectorySeparatorChar;
|
||||
}
|
||||
|
||||
private static string? ExtractVersionFromBinaryName(string binaryName)
|
||||
{
|
||||
if (binaryName.StartsWith("python", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var version = binaryName.Replace("python", "", StringComparison.OrdinalIgnoreCase);
|
||||
if (!string.IsNullOrEmpty(version) && char.IsDigit(version[0]))
|
||||
{
|
||||
return version;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Information about the Python runtime detected in a container.
|
||||
/// </summary>
|
||||
internal sealed record PythonRuntimeInfo(
|
||||
IReadOnlyCollection<string> Versions,
|
||||
IReadOnlyCollection<string> Binaries,
|
||||
IReadOnlyCollection<string> LibPaths);
|
||||
@@ -0,0 +1,326 @@
|
||||
using System.Text.Json;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal;
|
||||
|
||||
/// <summary>
|
||||
/// Detects Python environment variables (PYTHONPATH, PYTHONHOME) from container
|
||||
/// configuration files, environment files, and virtual environment configs.
|
||||
/// </summary>
|
||||
internal static class PythonEnvironmentDetector
|
||||
{
|
||||
private static readonly string[] EnvironmentFileNames =
|
||||
{
|
||||
"environment",
|
||||
".env",
|
||||
"env",
|
||||
".docker-env",
|
||||
"docker-env"
|
||||
};
|
||||
|
||||
private static readonly string[] OciConfigFiles =
|
||||
{
|
||||
"config.json",
|
||||
"container-config.json",
|
||||
"image-config.json"
|
||||
};
|
||||
|
||||
private static readonly Regex EnvLinePattern = new(
|
||||
@"^\s*(?:export\s+)?(?<key>PYTHON(?:PATH|HOME|STARTUP|OPTIMIZATION|HASHSEED|UNBUFFERED|DONTWRITEBYTECODE|VERBOSE|DEBUG))\s*=\s*(?<value>.+)$",
|
||||
RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Multiline);
|
||||
|
||||
/// <summary>
|
||||
/// Detects Python-related environment variables from various sources.
|
||||
/// </summary>
|
||||
public static async ValueTask<PythonEnvironment> DetectAsync(string rootPath, CancellationToken cancellationToken)
|
||||
{
|
||||
var variables = new Dictionary<string, PythonEnvVariable>(StringComparer.OrdinalIgnoreCase);
|
||||
var sources = new List<string>();
|
||||
|
||||
// Check for pyvenv.cfg (virtual environment config)
|
||||
await DetectPyvenvConfigAsync(rootPath, variables, sources, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Check for environment files
|
||||
foreach (var fileName in EnvironmentFileNames)
|
||||
{
|
||||
var envPath = Path.Combine(rootPath, fileName);
|
||||
if (File.Exists(envPath))
|
||||
{
|
||||
await ParseEnvironmentFileAsync(envPath, variables, sources, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// Also check in /etc
|
||||
var etcEnvPath = Path.Combine(rootPath, "etc", fileName);
|
||||
if (File.Exists(etcEnvPath))
|
||||
{
|
||||
await ParseEnvironmentFileAsync(etcEnvPath, variables, sources, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
// Check for OCI config files
|
||||
foreach (var configFile in OciConfigFiles)
|
||||
{
|
||||
var configPath = Path.Combine(rootPath, configFile);
|
||||
if (File.Exists(configPath))
|
||||
{
|
||||
await ParseOciConfigAsync(configPath, variables, sources, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
// Check container layer roots for environment configs
|
||||
foreach (var layerRoot in EnumerateLayerRoots(rootPath))
|
||||
{
|
||||
var layerEnvPath = Path.Combine(layerRoot, "etc", "environment");
|
||||
if (File.Exists(layerEnvPath))
|
||||
{
|
||||
await ParseEnvironmentFileAsync(layerEnvPath, variables, sources, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
return new PythonEnvironment(variables, sources);
|
||||
}
|
||||
|
||||
private static async ValueTask DetectPyvenvConfigAsync(
|
||||
string rootPath,
|
||||
Dictionary<string, PythonEnvVariable> variables,
|
||||
List<string> sources,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var pyvenvPath = Path.Combine(rootPath, "pyvenv.cfg");
|
||||
if (!File.Exists(pyvenvPath))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
sources.Add(pyvenvPath);
|
||||
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(pyvenvPath, cancellationToken).ConfigureAwait(false);
|
||||
var lines = content.Split('\n', StringSplitOptions.RemoveEmptyEntries);
|
||||
|
||||
foreach (var line in lines)
|
||||
{
|
||||
var trimmed = line.Trim();
|
||||
if (trimmed.StartsWith('#'))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var separator = trimmed.IndexOf('=');
|
||||
if (separator <= 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var key = trimmed[..separator].Trim();
|
||||
var value = trimmed[(separator + 1)..].Trim();
|
||||
|
||||
if (string.Equals(key, "home", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
variables["PYTHONHOME_VENV"] = new PythonEnvVariable("PYTHONHOME_VENV", value, pyvenvPath, "pyvenv.cfg");
|
||||
}
|
||||
else if (string.Equals(key, "include-system-site-packages", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
variables["PYVENV_INCLUDE_SYSTEM"] = new PythonEnvVariable("PYVENV_INCLUDE_SYSTEM", value, pyvenvPath, "pyvenv.cfg");
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore read errors
|
||||
}
|
||||
}
|
||||
|
||||
private static async ValueTask ParseEnvironmentFileAsync(
|
||||
string path,
|
||||
Dictionary<string, PythonEnvVariable> variables,
|
||||
List<string> sources,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(path, cancellationToken).ConfigureAwait(false);
|
||||
var matches = EnvLinePattern.Matches(content);
|
||||
|
||||
if (matches.Count > 0)
|
||||
{
|
||||
sources.Add(path);
|
||||
}
|
||||
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
var key = match.Groups["key"].Value.Trim();
|
||||
var value = match.Groups["value"].Value.Trim().Trim('"', '\'');
|
||||
variables[key] = new PythonEnvVariable(key, value, path, "environment");
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore read errors
|
||||
}
|
||||
}
|
||||
|
||||
private static async ValueTask ParseOciConfigAsync(
|
||||
string path,
|
||||
Dictionary<string, PythonEnvVariable> variables,
|
||||
List<string> sources,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
await using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
|
||||
using var document = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var root = document.RootElement;
|
||||
|
||||
// OCI image config structure: config.Env or Env
|
||||
JsonElement envArray = default;
|
||||
|
||||
if (root.TryGetProperty("config", out var config) && config.TryGetProperty("Env", out var configEnv))
|
||||
{
|
||||
envArray = configEnv;
|
||||
}
|
||||
else if (root.TryGetProperty("Env", out var directEnv))
|
||||
{
|
||||
envArray = directEnv;
|
||||
}
|
||||
|
||||
if (envArray.ValueKind != JsonValueKind.Array)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var foundPythonVar = false;
|
||||
|
||||
foreach (var envElement in envArray.EnumerateArray())
|
||||
{
|
||||
var envString = envElement.GetString();
|
||||
if (string.IsNullOrWhiteSpace(envString))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var separator = envString.IndexOf('=');
|
||||
if (separator <= 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var key = envString[..separator];
|
||||
var value = envString[(separator + 1)..];
|
||||
|
||||
if (key.StartsWith("PYTHON", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
variables[key] = new PythonEnvVariable(key, value, path, "oci-config");
|
||||
foundPythonVar = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (foundPythonVar)
|
||||
{
|
||||
sources.Add(path);
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore read errors
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
// Ignore parse errors
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly string[] LayerRootCandidates = { "layers", ".layers", "layer" };
|
||||
|
||||
private static IEnumerable<string> EnumerateLayerRoots(string workspaceRoot)
|
||||
{
|
||||
foreach (var candidate in LayerRootCandidates)
|
||||
{
|
||||
var root = Path.Combine(workspaceRoot, candidate);
|
||||
if (!Directory.Exists(root))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
IEnumerable<string>? directories = null;
|
||||
try
|
||||
{
|
||||
directories = Directory.EnumerateDirectories(root);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var layerDirectory in directories)
|
||||
{
|
||||
var fsDirectory = Path.Combine(layerDirectory, "fs");
|
||||
if (Directory.Exists(fsDirectory))
|
||||
{
|
||||
yield return fsDirectory;
|
||||
}
|
||||
else
|
||||
{
|
||||
yield return layerDirectory;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents detected Python environment configuration.
|
||||
/// </summary>
|
||||
internal sealed class PythonEnvironment
|
||||
{
|
||||
public PythonEnvironment(
|
||||
IReadOnlyDictionary<string, PythonEnvVariable> variables,
|
||||
IReadOnlyCollection<string> sources)
|
||||
{
|
||||
Variables = variables;
|
||||
Sources = sources;
|
||||
}
|
||||
|
||||
public IReadOnlyDictionary<string, PythonEnvVariable> Variables { get; }
|
||||
public IReadOnlyCollection<string> Sources { get; }
|
||||
|
||||
public bool HasPythonPath => Variables.ContainsKey("PYTHONPATH");
|
||||
public bool HasPythonHome => Variables.ContainsKey("PYTHONHOME") || Variables.ContainsKey("PYTHONHOME_VENV");
|
||||
|
||||
public string? PythonPath => Variables.TryGetValue("PYTHONPATH", out var v) ? v.Value : null;
|
||||
public string? PythonHome => Variables.TryGetValue("PYTHONHOME", out var v) ? v.Value :
|
||||
Variables.TryGetValue("PYTHONHOME_VENV", out var venv) ? venv.Value : null;
|
||||
|
||||
public IReadOnlyCollection<KeyValuePair<string, string?>> ToMetadata()
|
||||
{
|
||||
var entries = new List<KeyValuePair<string, string?>>();
|
||||
|
||||
foreach (var variable in Variables.Values.OrderBy(v => v.Key, StringComparer.Ordinal))
|
||||
{
|
||||
entries.Add(new KeyValuePair<string, string?>($"env.{variable.Key.ToLowerInvariant()}", variable.Value));
|
||||
entries.Add(new KeyValuePair<string, string?>($"env.{variable.Key.ToLowerInvariant()}.source", variable.SourceType));
|
||||
}
|
||||
|
||||
if (Sources.Count > 0)
|
||||
{
|
||||
entries.Add(new KeyValuePair<string, string?>("env.sources", string.Join(';', Sources.OrderBy(s => s, StringComparer.Ordinal))));
|
||||
}
|
||||
|
||||
return entries;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a single Python environment variable.
|
||||
/// </summary>
|
||||
internal sealed record PythonEnvVariable(
|
||||
string Key,
|
||||
string Value,
|
||||
string SourcePath,
|
||||
string SourceType);
|
||||
@@ -0,0 +1,447 @@
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal;
|
||||
|
||||
/// <summary>
|
||||
/// Detects Python startup hooks that run before or during interpreter initialization.
|
||||
/// These include sitecustomize.py, usercustomize.py, and .pth files.
|
||||
/// </summary>
|
||||
internal static class PythonStartupHookDetector
|
||||
{
|
||||
private static readonly string[] StartupHookFiles =
|
||||
{
|
||||
"sitecustomize.py",
|
||||
"usercustomize.py"
|
||||
};
|
||||
|
||||
private static readonly string[] SitePackagesPatterns = { "site-packages", "dist-packages" };
|
||||
private static readonly string[] LayerRootCandidates = { "layers", ".layers", "layer" };
|
||||
|
||||
/// <summary>
|
||||
/// Detects startup hooks in the given root path.
|
||||
/// </summary>
|
||||
public static PythonStartupHooks Detect(string rootPath)
|
||||
{
|
||||
var hooks = new List<PythonStartupHook>();
|
||||
var pthFiles = new List<PythonPthFile>();
|
||||
var warnings = new List<string>();
|
||||
|
||||
// Search in site-packages directories
|
||||
foreach (var sitePackages in DiscoverSitePackagesDirectories(rootPath))
|
||||
{
|
||||
DetectInSitePackages(sitePackages, hooks, pthFiles, warnings);
|
||||
}
|
||||
|
||||
// Search in Python lib directories
|
||||
foreach (var libPath in DiscoverPythonLibDirectories(rootPath))
|
||||
{
|
||||
DetectInPythonLib(libPath, hooks, warnings);
|
||||
}
|
||||
|
||||
// Search in container layers
|
||||
foreach (var layerRoot in EnumerateLayerRoots(rootPath))
|
||||
{
|
||||
foreach (var sitePackages in DiscoverSitePackagesDirectories(layerRoot))
|
||||
{
|
||||
DetectInSitePackages(sitePackages, hooks, pthFiles, warnings);
|
||||
}
|
||||
|
||||
foreach (var libPath in DiscoverPythonLibDirectories(layerRoot))
|
||||
{
|
||||
DetectInPythonLib(libPath, hooks, warnings);
|
||||
}
|
||||
}
|
||||
|
||||
// Generate warnings for detected hooks
|
||||
if (hooks.Count > 0)
|
||||
{
|
||||
var siteCustomize = hooks.Where(h => h.HookType == StartupHookType.SiteCustomize).ToArray();
|
||||
var userCustomize = hooks.Where(h => h.HookType == StartupHookType.UserCustomize).ToArray();
|
||||
|
||||
if (siteCustomize.Length > 0)
|
||||
{
|
||||
warnings.Add($"sitecustomize.py detected in {siteCustomize.Length} location(s); runs on every Python interpreter start");
|
||||
}
|
||||
|
||||
if (userCustomize.Length > 0)
|
||||
{
|
||||
warnings.Add($"usercustomize.py detected in {userCustomize.Length} location(s); runs on every Python interpreter start");
|
||||
}
|
||||
}
|
||||
|
||||
if (pthFiles.Count > 0)
|
||||
{
|
||||
var importPth = pthFiles.Where(p => p.HasImportDirective).ToArray();
|
||||
if (importPth.Length > 0)
|
||||
{
|
||||
warnings.Add($"{importPth.Length} .pth file(s) with import directives detected; may execute code on startup");
|
||||
}
|
||||
}
|
||||
|
||||
return new PythonStartupHooks(
|
||||
hooks
|
||||
.OrderBy(h => h.Path, StringComparer.Ordinal)
|
||||
.ToArray(),
|
||||
pthFiles
|
||||
.OrderBy(p => p.Path, StringComparer.Ordinal)
|
||||
.ToArray(),
|
||||
warnings);
|
||||
}
|
||||
|
||||
private static void DetectInSitePackages(
|
||||
string sitePackages,
|
||||
List<PythonStartupHook> hooks,
|
||||
List<PythonPthFile> pthFiles,
|
||||
List<string> warnings)
|
||||
{
|
||||
if (!Directory.Exists(sitePackages))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for sitecustomize.py and usercustomize.py
|
||||
foreach (var hookFile in StartupHookFiles)
|
||||
{
|
||||
var hookPath = Path.Combine(sitePackages, hookFile);
|
||||
if (File.Exists(hookPath))
|
||||
{
|
||||
var hookType = hookFile.StartsWith("site", StringComparison.OrdinalIgnoreCase)
|
||||
? StartupHookType.SiteCustomize
|
||||
: StartupHookType.UserCustomize;
|
||||
|
||||
hooks.Add(new PythonStartupHook(hookPath, hookType, "site-packages"));
|
||||
}
|
||||
}
|
||||
|
||||
// Check for .pth files
|
||||
try
|
||||
{
|
||||
foreach (var pthFile in Directory.EnumerateFiles(sitePackages, "*.pth"))
|
||||
{
|
||||
var pthInfo = AnalyzePthFile(pthFile);
|
||||
pthFiles.Add(pthInfo);
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore enumeration errors
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
// Ignore access errors
|
||||
}
|
||||
}
|
||||
|
||||
private static void DetectInPythonLib(string libPath, List<PythonStartupHook> hooks, List<string> warnings)
|
||||
{
|
||||
if (!Directory.Exists(libPath))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for sitecustomize.py in the lib directory itself
|
||||
foreach (var hookFile in StartupHookFiles)
|
||||
{
|
||||
var hookPath = Path.Combine(libPath, hookFile);
|
||||
if (File.Exists(hookPath))
|
||||
{
|
||||
var hookType = hookFile.StartsWith("site", StringComparison.OrdinalIgnoreCase)
|
||||
? StartupHookType.SiteCustomize
|
||||
: StartupHookType.UserCustomize;
|
||||
|
||||
hooks.Add(new PythonStartupHook(hookPath, hookType, "python-lib"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static PythonPthFile AnalyzePthFile(string path)
|
||||
{
|
||||
var hasImportDirective = false;
|
||||
var pathEntries = new List<string>();
|
||||
|
||||
try
|
||||
{
|
||||
var lines = File.ReadAllLines(path);
|
||||
foreach (var line in lines)
|
||||
{
|
||||
var trimmed = line.Trim();
|
||||
if (string.IsNullOrEmpty(trimmed) || trimmed.StartsWith('#'))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Lines starting with 'import ' execute Python code
|
||||
if (trimmed.StartsWith("import ", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
hasImportDirective = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
pathEntries.Add(trimmed);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore read errors
|
||||
}
|
||||
|
||||
return new PythonPthFile(path, Path.GetFileName(path), hasImportDirective, pathEntries);
|
||||
}
|
||||
|
||||
private static IEnumerable<string> DiscoverSitePackagesDirectories(string rootPath)
|
||||
{
|
||||
if (!Directory.Exists(rootPath))
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
var pending = new Stack<(string Path, int Depth)>();
|
||||
pending.Push((rootPath, 0));
|
||||
|
||||
while (pending.Count > 0)
|
||||
{
|
||||
var (current, depth) = pending.Pop();
|
||||
var dirName = Path.GetFileName(current);
|
||||
|
||||
if (SitePackagesPatterns.Contains(dirName, StringComparer.OrdinalIgnoreCase))
|
||||
{
|
||||
yield return current;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (depth >= 6)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
IEnumerable<string>? subdirs = null;
|
||||
try
|
||||
{
|
||||
subdirs = Directory.EnumerateDirectories(current);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var subdir in subdirs)
|
||||
{
|
||||
var subdirName = Path.GetFileName(subdir);
|
||||
// Skip common non-Python directories
|
||||
if (subdirName.StartsWith('.') ||
|
||||
string.Equals(subdirName, "node_modules", StringComparison.OrdinalIgnoreCase) ||
|
||||
string.Equals(subdirName, "__pycache__", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
pending.Push((subdir, depth + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static IEnumerable<string> DiscoverPythonLibDirectories(string rootPath)
|
||||
{
|
||||
var libPaths = new[]
|
||||
{
|
||||
Path.Combine(rootPath, "usr", "lib"),
|
||||
Path.Combine(rootPath, "usr", "local", "lib"),
|
||||
Path.Combine(rootPath, "lib")
|
||||
};
|
||||
|
||||
foreach (var libPath in libPaths)
|
||||
{
|
||||
if (!Directory.Exists(libPath))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
IEnumerable<string>? directories = null;
|
||||
try
|
||||
{
|
||||
directories = Directory.EnumerateDirectories(libPath);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var directory in directories)
|
||||
{
|
||||
var dirName = Path.GetFileName(directory);
|
||||
if (dirName.StartsWith("python", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
yield return directory;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static IEnumerable<string> EnumerateLayerRoots(string workspaceRoot)
|
||||
{
|
||||
foreach (var candidate in LayerRootCandidates)
|
||||
{
|
||||
var root = Path.Combine(workspaceRoot, candidate);
|
||||
if (!Directory.Exists(root))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
IEnumerable<string>? directories = null;
|
||||
try
|
||||
{
|
||||
directories = Directory.EnumerateDirectories(root);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var layerDirectory in directories)
|
||||
{
|
||||
var fsDirectory = Path.Combine(layerDirectory, "fs");
|
||||
if (Directory.Exists(fsDirectory))
|
||||
{
|
||||
yield return fsDirectory;
|
||||
}
|
||||
else
|
||||
{
|
||||
yield return layerDirectory;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Type of Python startup hook.
|
||||
/// </summary>
|
||||
internal enum StartupHookType
|
||||
{
|
||||
SiteCustomize,
|
||||
UserCustomize
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a detected Python startup hook file.
|
||||
/// </summary>
|
||||
internal sealed record PythonStartupHook(
|
||||
string Path,
|
||||
StartupHookType HookType,
|
||||
string Location);
|
||||
|
||||
/// <summary>
|
||||
/// Represents a detected .pth file.
|
||||
/// </summary>
|
||||
internal sealed record PythonPthFile(
|
||||
string Path,
|
||||
string FileName,
|
||||
bool HasImportDirective,
|
||||
IReadOnlyCollection<string> PathEntries);
|
||||
|
||||
/// <summary>
|
||||
/// Collection of detected Python startup hooks.
|
||||
/// </summary>
|
||||
internal sealed class PythonStartupHooks
|
||||
{
|
||||
public PythonStartupHooks(
|
||||
IReadOnlyCollection<PythonStartupHook> hooks,
|
||||
IReadOnlyCollection<PythonPthFile> pthFiles,
|
||||
IReadOnlyCollection<string> warnings)
|
||||
{
|
||||
Hooks = hooks;
|
||||
PthFiles = pthFiles;
|
||||
Warnings = warnings;
|
||||
}
|
||||
|
||||
public IReadOnlyCollection<PythonStartupHook> Hooks { get; }
|
||||
public IReadOnlyCollection<PythonPthFile> PthFiles { get; }
|
||||
public IReadOnlyCollection<string> Warnings { get; }
|
||||
|
||||
public bool HasStartupHooks => Hooks.Count > 0;
|
||||
public bool HasPthFilesWithImports => PthFiles.Any(p => p.HasImportDirective);
|
||||
public bool HasWarnings => Warnings.Count > 0;
|
||||
|
||||
public IReadOnlyCollection<KeyValuePair<string, string?>> ToMetadata()
|
||||
{
|
||||
var entries = new List<KeyValuePair<string, string?>>();
|
||||
|
||||
if (Hooks.Count > 0)
|
||||
{
|
||||
var siteCustomize = Hooks.Where(h => h.HookType == StartupHookType.SiteCustomize).ToArray();
|
||||
var userCustomize = Hooks.Where(h => h.HookType == StartupHookType.UserCustomize).ToArray();
|
||||
|
||||
if (siteCustomize.Length > 0)
|
||||
{
|
||||
entries.Add(new KeyValuePair<string, string?>("startupHooks.sitecustomize.count", siteCustomize.Length.ToString()));
|
||||
entries.Add(new KeyValuePair<string, string?>("startupHooks.sitecustomize.paths", string.Join(';', siteCustomize.Select(h => h.Path))));
|
||||
}
|
||||
|
||||
if (userCustomize.Length > 0)
|
||||
{
|
||||
entries.Add(new KeyValuePair<string, string?>("startupHooks.usercustomize.count", userCustomize.Length.ToString()));
|
||||
entries.Add(new KeyValuePair<string, string?>("startupHooks.usercustomize.paths", string.Join(';', userCustomize.Select(h => h.Path))));
|
||||
}
|
||||
}
|
||||
|
||||
if (PthFiles.Count > 0)
|
||||
{
|
||||
entries.Add(new KeyValuePair<string, string?>("pthFiles.count", PthFiles.Count.ToString()));
|
||||
|
||||
var withImports = PthFiles.Where(p => p.HasImportDirective).ToArray();
|
||||
if (withImports.Length > 0)
|
||||
{
|
||||
entries.Add(new KeyValuePair<string, string?>("pthFiles.withImports.count", withImports.Length.ToString()));
|
||||
entries.Add(new KeyValuePair<string, string?>("pthFiles.withImports.files", string.Join(';', withImports.Select(p => p.FileName))));
|
||||
}
|
||||
}
|
||||
|
||||
if (Warnings.Count > 0)
|
||||
{
|
||||
for (var i = 0; i < Warnings.Count; i++)
|
||||
{
|
||||
entries.Add(new KeyValuePair<string, string?>($"startupHooks.warning[{i}]", Warnings.ElementAt(i)));
|
||||
}
|
||||
}
|
||||
|
||||
return entries;
|
||||
}
|
||||
|
||||
public IReadOnlyCollection<LanguageComponentEvidence> ToEvidence(LanguageAnalyzerContext context)
|
||||
{
|
||||
var evidence = new List<LanguageComponentEvidence>();
|
||||
|
||||
foreach (var hook in Hooks)
|
||||
{
|
||||
evidence.Add(new LanguageComponentEvidence(
|
||||
LanguageEvidenceKind.File,
|
||||
hook.HookType == StartupHookType.SiteCustomize ? "sitecustomize" : "usercustomize",
|
||||
PythonPathHelper.NormalizeRelative(context, hook.Path),
|
||||
Value: null,
|
||||
Sha256: null));
|
||||
}
|
||||
|
||||
foreach (var pthFile in PthFiles.Where(p => p.HasImportDirective))
|
||||
{
|
||||
evidence.Add(new LanguageComponentEvidence(
|
||||
LanguageEvidenceKind.File,
|
||||
"pth-import",
|
||||
PythonPathHelper.NormalizeRelative(context, pthFile.Path),
|
||||
Value: null,
|
||||
Sha256: null));
|
||||
}
|
||||
|
||||
return evidence;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,279 @@
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Resolver;
|
||||
|
||||
/// <summary>
|
||||
/// Result of resolving a module name to a file location.
|
||||
/// </summary>
|
||||
internal enum PythonResolutionKind
|
||||
{
|
||||
/// <summary>
|
||||
/// Regular module (.py file).
|
||||
/// </summary>
|
||||
SourceModule,
|
||||
|
||||
/// <summary>
|
||||
/// Compiled module (.pyc file).
|
||||
/// </summary>
|
||||
BytecodeModule,
|
||||
|
||||
/// <summary>
|
||||
/// Regular package (__init__.py exists).
|
||||
/// </summary>
|
||||
Package,
|
||||
|
||||
/// <summary>
|
||||
/// Namespace package (PEP 420, no __init__.py).
|
||||
/// </summary>
|
||||
NamespacePackage,
|
||||
|
||||
/// <summary>
|
||||
/// Built-in module (part of Python interpreter).
|
||||
/// </summary>
|
||||
BuiltinModule,
|
||||
|
||||
/// <summary>
|
||||
/// Frozen module (compiled into Python binary).
|
||||
/// </summary>
|
||||
FrozenModule,
|
||||
|
||||
/// <summary>
|
||||
/// Extension module (.so, .pyd).
|
||||
/// </summary>
|
||||
ExtensionModule,
|
||||
|
||||
/// <summary>
|
||||
/// Module from a zip archive.
|
||||
/// </summary>
|
||||
ZipModule,
|
||||
|
||||
/// <summary>
|
||||
/// Module could not be resolved.
|
||||
/// </summary>
|
||||
NotFound
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Confidence level for module resolution.
|
||||
/// </summary>
|
||||
internal enum PythonResolutionConfidence
|
||||
{
|
||||
/// <summary>
|
||||
/// Low confidence - heuristic or partial match.
|
||||
/// </summary>
|
||||
Low = 0,
|
||||
|
||||
/// <summary>
|
||||
/// Medium confidence - likely correct but unverified.
|
||||
/// </summary>
|
||||
Medium = 1,
|
||||
|
||||
/// <summary>
|
||||
/// High confidence - clear match.
|
||||
/// </summary>
|
||||
High = 2,
|
||||
|
||||
/// <summary>
|
||||
/// Definitive - exact file found.
|
||||
/// </summary>
|
||||
Definitive = 3
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents the resolution of a Python module name to its location.
|
||||
/// </summary>
|
||||
/// <param name="ModuleName">The fully qualified module name.</param>
|
||||
/// <param name="Kind">The type of resolution.</param>
|
||||
/// <param name="VirtualPath">The resolved virtual path (null if not found).</param>
|
||||
/// <param name="AbsolutePath">The resolved absolute path (null if not found).</param>
|
||||
/// <param name="SearchPath">The search path entry that matched.</param>
|
||||
/// <param name="Source">The source type of the file.</param>
|
||||
/// <param name="Confidence">Confidence level of the resolution.</param>
|
||||
/// <param name="NamespacePaths">For namespace packages, the contributing paths.</param>
|
||||
/// <param name="ResolverTrace">Trace of resolution steps for debugging.</param>
|
||||
internal sealed record PythonModuleResolution(
|
||||
string ModuleName,
|
||||
PythonResolutionKind Kind,
|
||||
string? VirtualPath,
|
||||
string? AbsolutePath,
|
||||
string? SearchPath,
|
||||
PythonFileSource Source,
|
||||
PythonResolutionConfidence Confidence,
|
||||
IReadOnlyList<string>? NamespacePaths = null,
|
||||
IReadOnlyList<string>? ResolverTrace = null)
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets whether the module was successfully resolved.
|
||||
/// </summary>
|
||||
public bool IsResolved => Kind != PythonResolutionKind.NotFound;
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether this is a package (regular or namespace).
|
||||
/// </summary>
|
||||
public bool IsPackage => Kind is PythonResolutionKind.Package or PythonResolutionKind.NamespacePackage;
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether this is a namespace package.
|
||||
/// </summary>
|
||||
public bool IsNamespacePackage => Kind == PythonResolutionKind.NamespacePackage;
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether this is a built-in or frozen module.
|
||||
/// </summary>
|
||||
public bool IsBuiltin => Kind is PythonResolutionKind.BuiltinModule or PythonResolutionKind.FrozenModule;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the parent module name.
|
||||
/// </summary>
|
||||
public string? ParentModule
|
||||
{
|
||||
get
|
||||
{
|
||||
var lastDot = ModuleName.LastIndexOf('.');
|
||||
return lastDot < 0 ? null : ModuleName[..lastDot];
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the simple name (last component).
|
||||
/// </summary>
|
||||
public string SimpleName
|
||||
{
|
||||
get
|
||||
{
|
||||
var lastDot = ModuleName.LastIndexOf('.');
|
||||
return lastDot < 0 ? ModuleName : ModuleName[(lastDot + 1)..];
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a not-found resolution.
|
||||
/// </summary>
|
||||
public static PythonModuleResolution NotFound(string moduleName, IReadOnlyList<string>? trace = null) =>
|
||||
new(
|
||||
ModuleName: moduleName,
|
||||
Kind: PythonResolutionKind.NotFound,
|
||||
VirtualPath: null,
|
||||
AbsolutePath: null,
|
||||
SearchPath: null,
|
||||
Source: PythonFileSource.Unknown,
|
||||
Confidence: PythonResolutionConfidence.Low,
|
||||
NamespacePaths: null,
|
||||
ResolverTrace: trace);
|
||||
|
||||
/// <summary>
|
||||
/// Creates a built-in module resolution.
|
||||
/// </summary>
|
||||
public static PythonModuleResolution Builtin(string moduleName) =>
|
||||
new(
|
||||
ModuleName: moduleName,
|
||||
Kind: PythonResolutionKind.BuiltinModule,
|
||||
VirtualPath: null,
|
||||
AbsolutePath: null,
|
||||
SearchPath: null,
|
||||
Source: PythonFileSource.Unknown,
|
||||
Confidence: PythonResolutionConfidence.Definitive,
|
||||
NamespacePaths: null,
|
||||
ResolverTrace: null);
|
||||
|
||||
/// <summary>
|
||||
/// Generates metadata entries for this resolution.
|
||||
/// </summary>
|
||||
public IEnumerable<KeyValuePair<string, string?>> ToMetadata(string prefix)
|
||||
{
|
||||
yield return new KeyValuePair<string, string?>($"{prefix}.module", ModuleName);
|
||||
yield return new KeyValuePair<string, string?>($"{prefix}.kind", Kind.ToString());
|
||||
yield return new KeyValuePair<string, string?>($"{prefix}.confidence", Confidence.ToString());
|
||||
|
||||
if (VirtualPath is not null)
|
||||
{
|
||||
yield return new KeyValuePair<string, string?>($"{prefix}.path", VirtualPath);
|
||||
}
|
||||
|
||||
if (SearchPath is not null)
|
||||
{
|
||||
yield return new KeyValuePair<string, string?>($"{prefix}.searchPath", SearchPath);
|
||||
}
|
||||
|
||||
if (NamespacePaths is { Count: > 0 })
|
||||
{
|
||||
var paths = string.Join(';', NamespacePaths);
|
||||
yield return new KeyValuePair<string, string?>($"{prefix}.namespacePaths", paths);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a search path entry for module resolution.
|
||||
/// </summary>
|
||||
/// <param name="Path">The search path.</param>
|
||||
/// <param name="Priority">Priority (lower = higher priority).</param>
|
||||
/// <param name="Kind">The kind of path entry.</param>
|
||||
/// <param name="FromPthFile">The .pth file that added this path (if any).</param>
|
||||
internal sealed record PythonSearchPath(
|
||||
string Path,
|
||||
int Priority,
|
||||
PythonSearchPathKind Kind,
|
||||
string? FromPthFile = null)
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets whether this path is from a .pth file.
|
||||
/// </summary>
|
||||
public bool IsFromPthFile => FromPthFile is not null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Kind of search path entry.
|
||||
/// </summary>
|
||||
internal enum PythonSearchPathKind
|
||||
{
|
||||
/// <summary>
|
||||
/// The script's directory or current working directory.
|
||||
/// </summary>
|
||||
ScriptDirectory,
|
||||
|
||||
/// <summary>
|
||||
/// PYTHONPATH environment variable.
|
||||
/// </summary>
|
||||
PythonPath,
|
||||
|
||||
/// <summary>
|
||||
/// Site-packages directory.
|
||||
/// </summary>
|
||||
SitePackages,
|
||||
|
||||
/// <summary>
|
||||
/// Standard library path.
|
||||
/// </summary>
|
||||
StandardLibrary,
|
||||
|
||||
/// <summary>
|
||||
/// User site-packages (--user installations).
|
||||
/// </summary>
|
||||
UserSitePackages,
|
||||
|
||||
/// <summary>
|
||||
/// Path added via .pth file.
|
||||
/// </summary>
|
||||
PthFile,
|
||||
|
||||
/// <summary>
|
||||
/// Zip archive path.
|
||||
/// </summary>
|
||||
ZipArchive,
|
||||
|
||||
/// <summary>
|
||||
/// Editable install path.
|
||||
/// </summary>
|
||||
EditableInstall,
|
||||
|
||||
/// <summary>
|
||||
/// Virtual environment path.
|
||||
/// </summary>
|
||||
VirtualEnv,
|
||||
|
||||
/// <summary>
|
||||
/// Custom or unknown path.
|
||||
/// </summary>
|
||||
Custom
|
||||
}
|
||||
@@ -0,0 +1,538 @@
|
||||
using System.Collections.Frozen;
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Resolver;
|
||||
|
||||
/// <summary>
|
||||
/// Resolves Python module names to file locations following importlib semantics.
|
||||
/// Supports namespace packages (PEP 420), .pth files, zipimport, and site-packages precedence.
|
||||
/// </summary>
|
||||
internal sealed partial class PythonModuleResolver
|
||||
{
|
||||
private readonly PythonVirtualFileSystem _vfs;
|
||||
private readonly string _rootPath;
|
||||
private readonly List<PythonSearchPath> _searchPaths = new();
|
||||
private readonly Dictionary<string, PythonModuleResolution> _cache = new(StringComparer.Ordinal);
|
||||
private readonly bool _enableTracing;
|
||||
|
||||
// Built-in modules that exist in the Python interpreter
|
||||
private static readonly FrozenSet<string> BuiltinModules = new HashSet<string>(StringComparer.Ordinal)
|
||||
{
|
||||
"sys", "builtins", "_abc", "_ast", "_bisect", "_blake2", "_codecs",
|
||||
"_collections", "_datetime", "_elementtree", "_functools", "_heapq",
|
||||
"_imp", "_io", "_json", "_locale", "_lsprof", "_md5", "_operator",
|
||||
"_pickle", "_posixsubprocess", "_random", "_sha1", "_sha256", "_sha512",
|
||||
"_sha3", "_signal", "_socket", "_sre", "_stat", "_statistics", "_string",
|
||||
"_struct", "_symtable", "_thread", "_tracemalloc", "_typing", "_warnings",
|
||||
"_weakref", "array", "atexit", "binascii", "cmath", "errno", "faulthandler",
|
||||
"gc", "itertools", "marshal", "math", "posix", "pwd", "select", "time",
|
||||
"unicodedata", "xxsubtype", "zlib"
|
||||
}.ToFrozenSet();
|
||||
|
||||
// Standard library packages (top-level)
|
||||
private static readonly FrozenSet<string> StandardLibraryModules = new HashSet<string>(StringComparer.Ordinal)
|
||||
{
|
||||
"abc", "aifc", "argparse", "array", "ast", "asynchat", "asyncio",
|
||||
"asyncore", "atexit", "audioop", "base64", "bdb", "binascii", "binhex",
|
||||
"bisect", "builtins", "bz2", "calendar", "cgi", "cgitb", "chunk",
|
||||
"cmath", "cmd", "code", "codecs", "codeop", "collections", "colorsys",
|
||||
"compileall", "concurrent", "configparser", "contextlib", "contextvars",
|
||||
"copy", "copyreg", "cProfile", "crypt", "csv", "ctypes", "curses",
|
||||
"dataclasses", "datetime", "dbm", "decimal", "difflib", "dis",
|
||||
"distutils", "doctest", "email", "encodings", "enum", "errno",
|
||||
"faulthandler", "fcntl", "filecmp", "fileinput", "fnmatch", "fractions",
|
||||
"ftplib", "functools", "gc", "getopt", "getpass", "gettext", "glob",
|
||||
"graphlib", "grp", "gzip", "hashlib", "heapq", "hmac", "html", "http",
|
||||
"idlelib", "imaplib", "imghdr", "imp", "importlib", "inspect", "io",
|
||||
"ipaddress", "itertools", "json", "keyword", "lib2to3", "linecache",
|
||||
"locale", "logging", "lzma", "mailbox", "mailcap", "marshal", "math",
|
||||
"mimetypes", "mmap", "modulefinder", "multiprocessing", "netrc", "nis",
|
||||
"nntplib", "numbers", "operator", "optparse", "os", "ossaudiodev",
|
||||
"pathlib", "pdb", "pickle", "pickletools", "pipes", "pkgutil", "platform",
|
||||
"plistlib", "poplib", "posix", "posixpath", "pprint", "profile", "pstats",
|
||||
"pty", "pwd", "py_compile", "pyclbr", "pydoc", "queue", "quopri",
|
||||
"random", "re", "readline", "reprlib", "resource", "rlcompleter",
|
||||
"runpy", "sched", "secrets", "select", "selectors", "shelve", "shlex",
|
||||
"shutil", "signal", "site", "smtpd", "smtplib", "sndhdr", "socket",
|
||||
"socketserver", "spwd", "sqlite3", "ssl", "stat", "statistics", "string",
|
||||
"stringprep", "struct", "subprocess", "sunau", "symtable", "sys",
|
||||
"sysconfig", "syslog", "tabnanny", "tarfile", "telnetlib", "tempfile",
|
||||
"termios", "test", "textwrap", "threading", "time", "timeit", "tkinter",
|
||||
"token", "tokenize", "tomllib", "trace", "traceback", "tracemalloc",
|
||||
"tty", "turtle", "turtledemo", "types", "typing", "unicodedata",
|
||||
"unittest", "urllib", "uu", "uuid", "venv", "warnings", "wave",
|
||||
"weakref", "webbrowser", "winreg", "winsound", "wsgiref", "xdrlib",
|
||||
"xml", "xmlrpc", "zipapp", "zipfile", "zipimport", "zlib", "zoneinfo",
|
||||
"_thread"
|
||||
}.ToFrozenSet();
|
||||
|
||||
public PythonModuleResolver(PythonVirtualFileSystem vfs, string rootPath, bool enableTracing = false)
|
||||
{
|
||||
_vfs = vfs ?? throw new ArgumentNullException(nameof(vfs));
|
||||
_rootPath = rootPath ?? throw new ArgumentNullException(nameof(rootPath));
|
||||
_enableTracing = enableTracing;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the configured search paths.
|
||||
/// </summary>
|
||||
public IReadOnlyList<PythonSearchPath> SearchPaths => _searchPaths;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes the resolver by building search paths from the VFS.
|
||||
/// </summary>
|
||||
public async Task<PythonModuleResolver> InitializeAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
_searchPaths.Clear();
|
||||
_cache.Clear();
|
||||
|
||||
// Build search paths from VFS
|
||||
BuildSearchPaths();
|
||||
|
||||
// Process .pth files
|
||||
await ProcessPthFilesAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Sort by priority
|
||||
_searchPaths.Sort((a, b) => a.Priority.CompareTo(b.Priority));
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolves a module name to its location.
|
||||
/// </summary>
|
||||
public PythonModuleResolution Resolve(string moduleName)
|
||||
{
|
||||
if (string.IsNullOrEmpty(moduleName))
|
||||
{
|
||||
return PythonModuleResolution.NotFound(moduleName ?? string.Empty);
|
||||
}
|
||||
|
||||
// Check cache
|
||||
if (_cache.TryGetValue(moduleName, out var cached))
|
||||
{
|
||||
return cached;
|
||||
}
|
||||
|
||||
var trace = _enableTracing ? new List<string>() : null;
|
||||
|
||||
// Check for built-in modules
|
||||
if (IsBuiltinModule(moduleName))
|
||||
{
|
||||
var builtin = PythonModuleResolution.Builtin(moduleName);
|
||||
_cache[moduleName] = builtin;
|
||||
return builtin;
|
||||
}
|
||||
|
||||
// Resolve using search paths
|
||||
var resolution = ResolveFromSearchPaths(moduleName, trace);
|
||||
_cache[moduleName] = resolution;
|
||||
return resolution;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolves a relative import from a given package context.
|
||||
/// </summary>
|
||||
public PythonModuleResolution ResolveRelative(
|
||||
string moduleName,
|
||||
int relativeLevel,
|
||||
string fromModule)
|
||||
{
|
||||
if (relativeLevel <= 0)
|
||||
{
|
||||
return Resolve(moduleName);
|
||||
}
|
||||
|
||||
// Calculate the base package
|
||||
var fromParts = fromModule.Split('.');
|
||||
|
||||
// Check if fromModule is a package or module
|
||||
var fromResolution = Resolve(fromModule);
|
||||
var isFromPackage = fromResolution.IsPackage;
|
||||
|
||||
// For relative imports from a module, we start from its parent package
|
||||
var effectiveLevel = isFromPackage ? relativeLevel : relativeLevel;
|
||||
var baseParts = fromParts.Length;
|
||||
|
||||
// Adjust for non-package modules
|
||||
if (!isFromPackage && baseParts > 0)
|
||||
{
|
||||
baseParts--;
|
||||
}
|
||||
|
||||
if (effectiveLevel > baseParts)
|
||||
{
|
||||
return PythonModuleResolution.NotFound($"relative import beyond top-level package");
|
||||
}
|
||||
|
||||
var basePackageParts = fromParts[..^effectiveLevel];
|
||||
var basePackage = string.Join('.', basePackageParts);
|
||||
|
||||
// Build the absolute module name
|
||||
var absoluteName = string.IsNullOrEmpty(moduleName)
|
||||
? basePackage
|
||||
: string.IsNullOrEmpty(basePackage)
|
||||
? moduleName
|
||||
: $"{basePackage}.{moduleName}";
|
||||
|
||||
return Resolve(absoluteName);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if a module is a standard library module.
|
||||
/// </summary>
|
||||
public static bool IsStandardLibraryModule(string moduleName)
|
||||
{
|
||||
var topLevel = moduleName.Split('.')[0];
|
||||
return StandardLibraryModules.Contains(topLevel) || BuiltinModules.Contains(topLevel);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if a module is a built-in module.
|
||||
/// </summary>
|
||||
public static bool IsBuiltinModule(string moduleName)
|
||||
{
|
||||
return BuiltinModules.Contains(moduleName);
|
||||
}
|
||||
|
||||
private void BuildSearchPaths()
|
||||
{
|
||||
var priority = 0;
|
||||
|
||||
// 1. Script directory / source tree roots
|
||||
foreach (var root in _vfs.SourceTreeRoots)
|
||||
{
|
||||
_searchPaths.Add(new PythonSearchPath(
|
||||
Path: root,
|
||||
Priority: priority++,
|
||||
Kind: PythonSearchPathKind.ScriptDirectory));
|
||||
}
|
||||
|
||||
// 2. Site-packages directories
|
||||
foreach (var sitePackages in _vfs.SitePackagesPaths)
|
||||
{
|
||||
_searchPaths.Add(new PythonSearchPath(
|
||||
Path: sitePackages,
|
||||
Priority: priority + 100, // Site-packages comes after script directory
|
||||
Kind: PythonSearchPathKind.SitePackages));
|
||||
}
|
||||
|
||||
// 3. Editable install locations
|
||||
foreach (var editable in _vfs.EditablePaths)
|
||||
{
|
||||
_searchPaths.Add(new PythonSearchPath(
|
||||
Path: editable,
|
||||
Priority: priority + 50, // Between script dir and site-packages
|
||||
Kind: PythonSearchPathKind.EditableInstall));
|
||||
}
|
||||
|
||||
// 4. Zip archives
|
||||
foreach (var archive in _vfs.ZipArchivePaths)
|
||||
{
|
||||
_searchPaths.Add(new PythonSearchPath(
|
||||
Path: archive,
|
||||
Priority: priority + 200,
|
||||
Kind: PythonSearchPathKind.ZipArchive));
|
||||
}
|
||||
}
|
||||
|
||||
private async Task ProcessPthFilesAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
// Find all .pth files in site-packages
|
||||
var pthFiles = _vfs.EnumerateFiles(string.Empty, "*.pth")
|
||||
.Where(f => f.Source == PythonFileSource.SitePackages);
|
||||
|
||||
foreach (var pthFile in pthFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
try
|
||||
{
|
||||
var lines = await ReadPthFileAsync(pthFile, cancellationToken).ConfigureAwait(false);
|
||||
ProcessPthFileLines(lines, pthFile.VirtualPath);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Skip unreadable files
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<string[]> ReadPthFileAsync(PythonVirtualFile file, CancellationToken cancellationToken)
|
||||
{
|
||||
if (file.IsFromArchive)
|
||||
{
|
||||
return Array.Empty<string>();
|
||||
}
|
||||
|
||||
var absolutePath = Path.Combine(_rootPath, file.AbsolutePath);
|
||||
if (!File.Exists(absolutePath))
|
||||
{
|
||||
absolutePath = file.AbsolutePath;
|
||||
if (!File.Exists(absolutePath))
|
||||
{
|
||||
return Array.Empty<string>();
|
||||
}
|
||||
}
|
||||
|
||||
var content = await File.ReadAllTextAsync(absolutePath, cancellationToken).ConfigureAwait(false);
|
||||
return content.Split('\n', StringSplitOptions.RemoveEmptyEntries);
|
||||
}
|
||||
|
||||
private void ProcessPthFileLines(string[] lines, string pthFilePath)
|
||||
{
|
||||
var pthDirectory = Path.GetDirectoryName(pthFilePath) ?? string.Empty;
|
||||
var priority = 150; // Between editable installs and site-packages
|
||||
|
||||
foreach (var rawLine in lines)
|
||||
{
|
||||
var line = rawLine.Trim();
|
||||
|
||||
// Skip empty lines and comments
|
||||
if (string.IsNullOrEmpty(line) || line.StartsWith('#'))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Lines starting with "import " are executed but we skip them for static analysis
|
||||
if (line.StartsWith("import ", StringComparison.Ordinal))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// The line is a path to add
|
||||
var path = line;
|
||||
|
||||
// If relative, resolve relative to the .pth file's directory
|
||||
if (!Path.IsPathRooted(path))
|
||||
{
|
||||
path = Path.Combine(pthDirectory, path);
|
||||
}
|
||||
|
||||
_searchPaths.Add(new PythonSearchPath(
|
||||
Path: path.Replace('\\', '/'),
|
||||
Priority: priority++,
|
||||
Kind: PythonSearchPathKind.PthFile,
|
||||
FromPthFile: pthFilePath));
|
||||
}
|
||||
}
|
||||
|
||||
private PythonModuleResolution ResolveFromSearchPaths(string moduleName, List<string>? trace)
|
||||
{
|
||||
var parts = moduleName.Split('.');
|
||||
var namespacePaths = new List<string>();
|
||||
|
||||
foreach (var searchPath in _searchPaths)
|
||||
{
|
||||
trace?.Add($"Searching in {searchPath.Kind}: {searchPath.Path}");
|
||||
|
||||
// Try to resolve the full module path
|
||||
var resolution = TryResolveInPath(moduleName, parts, searchPath, trace);
|
||||
|
||||
if (resolution is not null)
|
||||
{
|
||||
if (resolution.Kind == PythonResolutionKind.NamespacePackage)
|
||||
{
|
||||
// Collect namespace package paths
|
||||
if (resolution.VirtualPath is not null)
|
||||
{
|
||||
namespacePaths.Add(resolution.VirtualPath);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return resolution;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we found namespace package contributions, return a namespace package resolution
|
||||
if (namespacePaths.Count > 0)
|
||||
{
|
||||
return new PythonModuleResolution(
|
||||
ModuleName: moduleName,
|
||||
Kind: PythonResolutionKind.NamespacePackage,
|
||||
VirtualPath: namespacePaths[0],
|
||||
AbsolutePath: null,
|
||||
SearchPath: _searchPaths[0].Path,
|
||||
Source: PythonFileSource.SitePackages,
|
||||
Confidence: PythonResolutionConfidence.High,
|
||||
NamespacePaths: namespacePaths,
|
||||
ResolverTrace: trace);
|
||||
}
|
||||
|
||||
return PythonModuleResolution.NotFound(moduleName, trace);
|
||||
}
|
||||
|
||||
private PythonModuleResolution? TryResolveInPath(
|
||||
string moduleName,
|
||||
string[] parts,
|
||||
PythonSearchPath searchPath,
|
||||
List<string>? trace)
|
||||
{
|
||||
var basePath = searchPath.Path;
|
||||
var currentPath = basePath;
|
||||
|
||||
// Walk through the module path parts
|
||||
for (var i = 0; i < parts.Length; i++)
|
||||
{
|
||||
var part = parts[i];
|
||||
var isLast = i == parts.Length - 1;
|
||||
|
||||
if (isLast)
|
||||
{
|
||||
// This is the final component - could be a module or package
|
||||
return TryResolveFinalComponent(moduleName, currentPath, part, searchPath, trace);
|
||||
}
|
||||
else
|
||||
{
|
||||
// This is an intermediate package
|
||||
var packagePath = $"{currentPath}/{part}";
|
||||
|
||||
// Check for regular package
|
||||
var initPath = $"{packagePath}/__init__.py";
|
||||
if (_vfs.FileExists(initPath))
|
||||
{
|
||||
currentPath = packagePath;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for namespace package (PEP 420) - directory exists but no __init__.py
|
||||
if (DirectoryExists(packagePath))
|
||||
{
|
||||
currentPath = packagePath;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Package not found
|
||||
trace?.Add($" Package not found: {packagePath}");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private PythonModuleResolution? TryResolveFinalComponent(
|
||||
string moduleName,
|
||||
string currentPath,
|
||||
string name,
|
||||
PythonSearchPath searchPath,
|
||||
List<string>? trace)
|
||||
{
|
||||
// 1. Try as a module: name.py
|
||||
var modulePath = $"{currentPath}/{name}.py";
|
||||
if (_vfs.FileExists(modulePath))
|
||||
{
|
||||
trace?.Add($" Found module: {modulePath}");
|
||||
var file = _vfs.GetFile(modulePath);
|
||||
return new PythonModuleResolution(
|
||||
ModuleName: moduleName,
|
||||
Kind: PythonResolutionKind.SourceModule,
|
||||
VirtualPath: modulePath,
|
||||
AbsolutePath: file?.AbsolutePath,
|
||||
SearchPath: searchPath.Path,
|
||||
Source: file?.Source ?? PythonFileSource.Unknown,
|
||||
Confidence: PythonResolutionConfidence.Definitive,
|
||||
ResolverTrace: trace);
|
||||
}
|
||||
|
||||
// 2. Try as a bytecode module: name.pyc
|
||||
var bytecodePath = $"{currentPath}/{name}.pyc";
|
||||
if (_vfs.FileExists(bytecodePath))
|
||||
{
|
||||
trace?.Add($" Found bytecode: {bytecodePath}");
|
||||
var file = _vfs.GetFile(bytecodePath);
|
||||
return new PythonModuleResolution(
|
||||
ModuleName: moduleName,
|
||||
Kind: PythonResolutionKind.BytecodeModule,
|
||||
VirtualPath: bytecodePath,
|
||||
AbsolutePath: file?.AbsolutePath,
|
||||
SearchPath: searchPath.Path,
|
||||
Source: file?.Source ?? PythonFileSource.Unknown,
|
||||
Confidence: PythonResolutionConfidence.High,
|
||||
ResolverTrace: trace);
|
||||
}
|
||||
|
||||
// 3. Try as an extension module: name.so, name.pyd
|
||||
var soPath = $"{currentPath}/{name}.so";
|
||||
var pydPath = $"{currentPath}/{name}.pyd";
|
||||
if (_vfs.FileExists(soPath) || _vfs.FileExists(pydPath))
|
||||
{
|
||||
var extPath = _vfs.FileExists(soPath) ? soPath : pydPath;
|
||||
trace?.Add($" Found extension: {extPath}");
|
||||
var file = _vfs.GetFile(extPath);
|
||||
return new PythonModuleResolution(
|
||||
ModuleName: moduleName,
|
||||
Kind: PythonResolutionKind.ExtensionModule,
|
||||
VirtualPath: extPath,
|
||||
AbsolutePath: file?.AbsolutePath,
|
||||
SearchPath: searchPath.Path,
|
||||
Source: file?.Source ?? PythonFileSource.Unknown,
|
||||
Confidence: PythonResolutionConfidence.Definitive,
|
||||
ResolverTrace: trace);
|
||||
}
|
||||
|
||||
// 4. Try as a regular package: name/__init__.py
|
||||
var packagePath = $"{currentPath}/{name}";
|
||||
var packageInitPath = $"{packagePath}/__init__.py";
|
||||
if (_vfs.FileExists(packageInitPath))
|
||||
{
|
||||
trace?.Add($" Found package: {packageInitPath}");
|
||||
var file = _vfs.GetFile(packageInitPath);
|
||||
return new PythonModuleResolution(
|
||||
ModuleName: moduleName,
|
||||
Kind: PythonResolutionKind.Package,
|
||||
VirtualPath: packageInitPath,
|
||||
AbsolutePath: file?.AbsolutePath,
|
||||
SearchPath: searchPath.Path,
|
||||
Source: file?.Source ?? PythonFileSource.Unknown,
|
||||
Confidence: PythonResolutionConfidence.Definitive,
|
||||
ResolverTrace: trace);
|
||||
}
|
||||
|
||||
// 5. Try as a namespace package (PEP 420): directory exists but no __init__.py
|
||||
if (DirectoryExists(packagePath))
|
||||
{
|
||||
trace?.Add($" Found namespace package: {packagePath}");
|
||||
return new PythonModuleResolution(
|
||||
ModuleName: moduleName,
|
||||
Kind: PythonResolutionKind.NamespacePackage,
|
||||
VirtualPath: packagePath,
|
||||
AbsolutePath: null,
|
||||
SearchPath: searchPath.Path,
|
||||
Source: PythonFileSource.Unknown,
|
||||
Confidence: PythonResolutionConfidence.High,
|
||||
ResolverTrace: trace);
|
||||
}
|
||||
|
||||
trace?.Add($" Not found in {currentPath}/{name}");
|
||||
return null;
|
||||
}
|
||||
|
||||
private bool DirectoryExists(string virtualPath)
|
||||
{
|
||||
// Check if any file exists under this path
|
||||
return _vfs.EnumerateFiles(virtualPath, "*").Any();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Clears the resolution cache.
|
||||
/// </summary>
|
||||
public void ClearCache()
|
||||
{
|
||||
_cache.Clear();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets resolution statistics.
|
||||
/// </summary>
|
||||
public (int Total, int Resolved, int NotFound, int Cached) GetStatistics()
|
||||
{
|
||||
var total = _cache.Count;
|
||||
var resolved = _cache.Values.Count(r => r.IsResolved);
|
||||
var notFound = total - resolved;
|
||||
return (total, resolved, notFound, _cache.Count);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
/// <summary>
|
||||
/// Identifies the origin of a file in the Python virtual filesystem.
|
||||
/// </summary>
|
||||
internal enum PythonFileSource
|
||||
{
|
||||
/// <summary>
|
||||
/// Unknown or unresolved source.
|
||||
/// </summary>
|
||||
Unknown,
|
||||
|
||||
/// <summary>
|
||||
/// File from a site-packages directory (installed packages).
|
||||
/// </summary>
|
||||
SitePackages,
|
||||
|
||||
/// <summary>
|
||||
/// File from a wheel archive (.whl).
|
||||
/// </summary>
|
||||
Wheel,
|
||||
|
||||
/// <summary>
|
||||
/// File from a source distribution (.tar.gz, .zip).
|
||||
/// </summary>
|
||||
Sdist,
|
||||
|
||||
/// <summary>
|
||||
/// File from a zipapp (.pyz, .pyzw).
|
||||
/// </summary>
|
||||
Zipapp,
|
||||
|
||||
/// <summary>
|
||||
/// File from an editable install (development mode).
|
||||
/// </summary>
|
||||
Editable,
|
||||
|
||||
/// <summary>
|
||||
/// File from a container layer.
|
||||
/// </summary>
|
||||
ContainerLayer,
|
||||
|
||||
/// <summary>
|
||||
/// File from the project source tree.
|
||||
/// </summary>
|
||||
SourceTree,
|
||||
|
||||
/// <summary>
|
||||
/// File from a virtual environment's bin/Scripts directory.
|
||||
/// </summary>
|
||||
VenvBin,
|
||||
|
||||
/// <summary>
|
||||
/// Python configuration file (pyproject.toml, setup.py, setup.cfg).
|
||||
/// </summary>
|
||||
ProjectConfig,
|
||||
|
||||
/// <summary>
|
||||
/// Lock file (requirements.txt, Pipfile.lock, poetry.lock).
|
||||
/// </summary>
|
||||
LockFile,
|
||||
|
||||
/// <summary>
|
||||
/// Standard library module.
|
||||
/// </summary>
|
||||
StdLib
|
||||
}
|
||||
@@ -0,0 +1,808 @@
|
||||
using System.Text.Json;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes Python project inputs by detecting layouts, version targets,
|
||||
/// and building a virtual filesystem from various sources.
|
||||
/// </summary>
|
||||
internal sealed partial class PythonInputNormalizer
|
||||
{
|
||||
private static readonly EnumerationOptions SafeEnumeration = new()
|
||||
{
|
||||
RecurseSubdirectories = false,
|
||||
IgnoreInaccessible = true,
|
||||
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint
|
||||
};
|
||||
|
||||
private readonly string _rootPath;
|
||||
private readonly List<PythonVersionTarget> _versionTargets = new();
|
||||
private readonly List<string> _sitePackagesPaths = new();
|
||||
private readonly List<string> _wheelPaths = new();
|
||||
private readonly List<string> _zipappPaths = new();
|
||||
private readonly List<(string Path, string? PackageName)> _editablePaths = new();
|
||||
|
||||
private PythonLayoutKind _layout = PythonLayoutKind.Unknown;
|
||||
private string? _venvPath;
|
||||
private string? _binPath;
|
||||
private string? _pythonExecutable;
|
||||
|
||||
public PythonInputNormalizer(string rootPath)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(rootPath))
|
||||
{
|
||||
throw new ArgumentException("Root path is required", nameof(rootPath));
|
||||
}
|
||||
|
||||
_rootPath = Path.GetFullPath(rootPath);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the detected layout kind.
|
||||
/// </summary>
|
||||
public PythonLayoutKind Layout => _layout;
|
||||
|
||||
/// <summary>
|
||||
/// Gets all detected version targets.
|
||||
/// </summary>
|
||||
public IReadOnlyList<PythonVersionTarget> VersionTargets => _versionTargets;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the primary version target (highest confidence).
|
||||
/// </summary>
|
||||
public PythonVersionTarget? PrimaryVersionTarget =>
|
||||
_versionTargets
|
||||
.OrderByDescending(static v => v.Confidence)
|
||||
.ThenBy(static v => v.Source, StringComparer.Ordinal)
|
||||
.FirstOrDefault();
|
||||
|
||||
/// <summary>
|
||||
/// Gets all detected site-packages paths.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string> SitePackagesPaths => _sitePackagesPaths;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the virtual environment path if detected.
|
||||
/// </summary>
|
||||
public string? VenvPath => _venvPath;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the bin/Scripts directory path if detected.
|
||||
/// </summary>
|
||||
public string? BinPath => _binPath;
|
||||
|
||||
/// <summary>
|
||||
/// Analyzes the root path to detect layout and version targets.
|
||||
/// </summary>
|
||||
public async Task<PythonInputNormalizer> AnalyzeAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
await DetectLayoutAsync(cancellationToken).ConfigureAwait(false);
|
||||
await DetectVersionTargetsAsync(cancellationToken).ConfigureAwait(false);
|
||||
DetectSitePackages();
|
||||
DetectWheels();
|
||||
DetectZipapps();
|
||||
await DetectEditablesAsync(cancellationToken).ConfigureAwait(false);
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds a virtual filesystem from the analyzed inputs.
|
||||
/// </summary>
|
||||
public PythonVirtualFileSystem BuildVirtualFileSystem()
|
||||
{
|
||||
var builder = PythonVirtualFileSystem.CreateBuilder();
|
||||
|
||||
// Add site-packages in order (later takes precedence)
|
||||
foreach (var sitePackagesPath in _sitePackagesPaths)
|
||||
{
|
||||
builder.AddSitePackages(sitePackagesPath);
|
||||
}
|
||||
|
||||
// Add wheels
|
||||
foreach (var wheelPath in _wheelPaths)
|
||||
{
|
||||
builder.AddWheel(wheelPath);
|
||||
}
|
||||
|
||||
// Add zipapps
|
||||
foreach (var zipappPath in _zipappPaths)
|
||||
{
|
||||
builder.AddZipapp(zipappPath);
|
||||
}
|
||||
|
||||
// Add editable installs
|
||||
foreach (var (path, packageName) in _editablePaths)
|
||||
{
|
||||
builder.AddEditable(path, packageName);
|
||||
}
|
||||
|
||||
// Add bin directory
|
||||
if (!string.IsNullOrEmpty(_binPath) && Directory.Exists(_binPath))
|
||||
{
|
||||
builder.AddVenvBin(_binPath);
|
||||
}
|
||||
|
||||
return builder.Build();
|
||||
}
|
||||
|
||||
private async Task DetectLayoutAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
// Check for venv/virtualenv markers
|
||||
var pyvenvCfg = Path.Combine(_rootPath, "pyvenv.cfg");
|
||||
if (File.Exists(pyvenvCfg))
|
||||
{
|
||||
_layout = PythonLayoutKind.Virtualenv;
|
||||
_venvPath = _rootPath;
|
||||
DetectBinDirectory();
|
||||
await ParsePyvenvCfgAsync(pyvenvCfg, cancellationToken).ConfigureAwait(false);
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for venv subdirectory
|
||||
foreach (var venvName in new[] { "venv", ".venv", "env", ".env" })
|
||||
{
|
||||
var venvDir = Path.Combine(_rootPath, venvName);
|
||||
var venvCfg = Path.Combine(venvDir, "pyvenv.cfg");
|
||||
if (File.Exists(venvCfg))
|
||||
{
|
||||
_layout = PythonLayoutKind.Virtualenv;
|
||||
_venvPath = venvDir;
|
||||
DetectBinDirectory();
|
||||
await ParsePyvenvCfgAsync(venvCfg, cancellationToken).ConfigureAwait(false);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for Poetry
|
||||
if (File.Exists(Path.Combine(_rootPath, "poetry.lock")))
|
||||
{
|
||||
_layout = PythonLayoutKind.Poetry;
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for Pipenv
|
||||
if (File.Exists(Path.Combine(_rootPath, "Pipfile.lock")))
|
||||
{
|
||||
_layout = PythonLayoutKind.Pipenv;
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for Conda
|
||||
var condaMeta = Path.Combine(_rootPath, "conda-meta");
|
||||
if (Directory.Exists(condaMeta))
|
||||
{
|
||||
_layout = PythonLayoutKind.Conda;
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for Lambda
|
||||
if (File.Exists(Path.Combine(_rootPath, "lambda_function.py")) ||
|
||||
(File.Exists(Path.Combine(_rootPath, "requirements.txt")) &&
|
||||
Directory.Exists(Path.Combine(_rootPath, "python"))))
|
||||
{
|
||||
_layout = PythonLayoutKind.Lambda;
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for Azure Functions
|
||||
if (File.Exists(Path.Combine(_rootPath, "function.json")) ||
|
||||
File.Exists(Path.Combine(_rootPath, "host.json")))
|
||||
{
|
||||
_layout = PythonLayoutKind.AzureFunction;
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for container markers
|
||||
if (File.Exists(Path.Combine(_rootPath, "Dockerfile")) ||
|
||||
Directory.Exists(Path.Combine(_rootPath, "usr", "local", "lib")))
|
||||
{
|
||||
_layout = PythonLayoutKind.Container;
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for system Python installation
|
||||
var libDir = Path.Combine(_rootPath, "lib");
|
||||
if (Directory.Exists(libDir))
|
||||
{
|
||||
try
|
||||
{
|
||||
foreach (var dir in Directory.EnumerateDirectories(libDir, "python*", SafeEnumeration))
|
||||
{
|
||||
if (Directory.Exists(Path.Combine(dir, "site-packages")))
|
||||
{
|
||||
_layout = PythonLayoutKind.System;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
// Ignore inaccessible directories
|
||||
}
|
||||
}
|
||||
|
||||
_layout = PythonLayoutKind.Unknown;
|
||||
}
|
||||
|
||||
private void DetectBinDirectory()
|
||||
{
|
||||
if (string.IsNullOrEmpty(_venvPath))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Windows uses Scripts, Unix uses bin
|
||||
var scriptsDir = Path.Combine(_venvPath, "Scripts");
|
||||
if (Directory.Exists(scriptsDir))
|
||||
{
|
||||
_binPath = scriptsDir;
|
||||
_pythonExecutable = Path.Combine(scriptsDir, "python.exe");
|
||||
return;
|
||||
}
|
||||
|
||||
var binDir = Path.Combine(_venvPath, "bin");
|
||||
if (Directory.Exists(binDir))
|
||||
{
|
||||
_binPath = binDir;
|
||||
_pythonExecutable = Path.Combine(binDir, "python");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task DetectVersionTargetsAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
await ParsePyprojectTomlAsync(cancellationToken).ConfigureAwait(false);
|
||||
await ParseSetupPyAsync(cancellationToken).ConfigureAwait(false);
|
||||
await ParseSetupCfgAsync(cancellationToken).ConfigureAwait(false);
|
||||
await ParseRuntimeTxtAsync(cancellationToken).ConfigureAwait(false);
|
||||
await ParseDockerfileAsync(cancellationToken).ConfigureAwait(false);
|
||||
await ParseToxIniAsync(cancellationToken).ConfigureAwait(false);
|
||||
DetectVersionFromSitePackages();
|
||||
}
|
||||
|
||||
private async Task ParsePyvenvCfgAsync(string path, CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(path, cancellationToken).ConfigureAwait(false);
|
||||
foreach (var line in content.Split('\n'))
|
||||
{
|
||||
var trimmed = line.Trim();
|
||||
if (trimmed.StartsWith("version", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var parts = trimmed.Split('=', 2);
|
||||
if (parts.Length == 2)
|
||||
{
|
||||
var version = parts[1].Trim();
|
||||
if (!string.IsNullOrEmpty(version))
|
||||
{
|
||||
_versionTargets.Add(new PythonVersionTarget(
|
||||
version,
|
||||
"pyvenv.cfg",
|
||||
PythonVersionConfidence.Definitive));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore read errors
|
||||
}
|
||||
}
|
||||
|
||||
private async Task ParsePyprojectTomlAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var path = Path.Combine(_rootPath, "pyproject.toml");
|
||||
if (!File.Exists(path))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(path, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Look for requires-python in [project] section
|
||||
var requiresPythonMatch = RequiresPythonPattern().Match(content);
|
||||
if (requiresPythonMatch.Success)
|
||||
{
|
||||
var version = requiresPythonMatch.Groups["version"].Value.Trim().Trim('"', '\'');
|
||||
if (!string.IsNullOrEmpty(version))
|
||||
{
|
||||
var isMinimum = version.StartsWith(">=", StringComparison.Ordinal) ||
|
||||
version.StartsWith(">", StringComparison.Ordinal);
|
||||
version = Regex.Replace(version, @"^[><=!~]+", string.Empty).Trim();
|
||||
|
||||
_versionTargets.Add(new PythonVersionTarget(
|
||||
version,
|
||||
"pyproject.toml",
|
||||
PythonVersionConfidence.High,
|
||||
isMinimum));
|
||||
}
|
||||
}
|
||||
|
||||
// Look for python_requires in [tool.poetry] or similar
|
||||
var pythonMatch = PythonVersionTomlPattern().Match(content);
|
||||
if (pythonMatch.Success)
|
||||
{
|
||||
var version = pythonMatch.Groups["version"].Value.Trim().Trim('"', '\'');
|
||||
if (!string.IsNullOrEmpty(version))
|
||||
{
|
||||
var isMinimum = version.StartsWith("^", StringComparison.Ordinal) ||
|
||||
version.StartsWith(">=", StringComparison.Ordinal);
|
||||
version = Regex.Replace(version, @"^[\^><=!~]+", string.Empty).Trim();
|
||||
|
||||
_versionTargets.Add(new PythonVersionTarget(
|
||||
version,
|
||||
"pyproject.toml",
|
||||
PythonVersionConfidence.High,
|
||||
isMinimum));
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore read errors
|
||||
}
|
||||
}
|
||||
|
||||
private async Task ParseSetupPyAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var path = Path.Combine(_rootPath, "setup.py");
|
||||
if (!File.Exists(path))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(path, cancellationToken).ConfigureAwait(false);
|
||||
var match = PythonRequiresPattern().Match(content);
|
||||
if (match.Success)
|
||||
{
|
||||
var version = match.Groups["version"].Value.Trim().Trim('"', '\'');
|
||||
if (!string.IsNullOrEmpty(version))
|
||||
{
|
||||
var isMinimum = version.StartsWith(">=", StringComparison.Ordinal);
|
||||
version = Regex.Replace(version, @"^[><=!~]+", string.Empty).Trim();
|
||||
|
||||
_versionTargets.Add(new PythonVersionTarget(
|
||||
version,
|
||||
"setup.py",
|
||||
PythonVersionConfidence.High,
|
||||
isMinimum));
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore read errors
|
||||
}
|
||||
}
|
||||
|
||||
private async Task ParseSetupCfgAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var path = Path.Combine(_rootPath, "setup.cfg");
|
||||
if (!File.Exists(path))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(path, cancellationToken).ConfigureAwait(false);
|
||||
var match = PythonRequiresCfgPattern().Match(content);
|
||||
if (match.Success)
|
||||
{
|
||||
var version = match.Groups["version"].Value.Trim();
|
||||
if (!string.IsNullOrEmpty(version))
|
||||
{
|
||||
var isMinimum = version.StartsWith(">=", StringComparison.Ordinal);
|
||||
version = Regex.Replace(version, @"^[><=!~]+", string.Empty).Trim();
|
||||
|
||||
_versionTargets.Add(new PythonVersionTarget(
|
||||
version,
|
||||
"setup.cfg",
|
||||
PythonVersionConfidence.High,
|
||||
isMinimum));
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore read errors
|
||||
}
|
||||
}
|
||||
|
||||
private async Task ParseRuntimeTxtAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var path = Path.Combine(_rootPath, "runtime.txt");
|
||||
if (!File.Exists(path))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(path, cancellationToken).ConfigureAwait(false);
|
||||
var trimmed = content.Trim();
|
||||
|
||||
// Heroku format: python-3.11.4
|
||||
var match = RuntimeTxtPattern().Match(trimmed);
|
||||
if (match.Success)
|
||||
{
|
||||
var version = match.Groups["version"].Value;
|
||||
_versionTargets.Add(new PythonVersionTarget(
|
||||
version,
|
||||
"runtime.txt",
|
||||
PythonVersionConfidence.Definitive));
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore read errors
|
||||
}
|
||||
}
|
||||
|
||||
private async Task ParseDockerfileAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var path = Path.Combine(_rootPath, "Dockerfile");
|
||||
if (!File.Exists(path))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(path, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Look for FROM python:X.Y or FROM python:X.Y.Z
|
||||
var fromMatch = DockerFromPythonPattern().Match(content);
|
||||
if (fromMatch.Success)
|
||||
{
|
||||
var version = fromMatch.Groups["version"].Value;
|
||||
_versionTargets.Add(new PythonVersionTarget(
|
||||
version,
|
||||
"Dockerfile",
|
||||
PythonVersionConfidence.High));
|
||||
return;
|
||||
}
|
||||
|
||||
// Look for ENV PYTHON_VERSION=X.Y.Z
|
||||
var envMatch = DockerEnvPythonPattern().Match(content);
|
||||
if (envMatch.Success)
|
||||
{
|
||||
var version = envMatch.Groups["version"].Value;
|
||||
_versionTargets.Add(new PythonVersionTarget(
|
||||
version,
|
||||
"Dockerfile",
|
||||
PythonVersionConfidence.Medium));
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore read errors
|
||||
}
|
||||
}
|
||||
|
||||
private async Task ParseToxIniAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var path = Path.Combine(_rootPath, "tox.ini");
|
||||
if (!File.Exists(path))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(path, cancellationToken).ConfigureAwait(false);
|
||||
var match = ToxEnvListPattern().Match(content);
|
||||
if (match.Success)
|
||||
{
|
||||
var envList = match.Groups["envs"].Value;
|
||||
var pyMatches = ToxPythonEnvPattern().Matches(envList);
|
||||
foreach (Match pyMatch in pyMatches)
|
||||
{
|
||||
var version = pyMatch.Groups["version"].Value;
|
||||
if (version.Length >= 2)
|
||||
{
|
||||
// Convert py311 to 3.11
|
||||
var formatted = $"{version[0]}.{version[1..]}";
|
||||
_versionTargets.Add(new PythonVersionTarget(
|
||||
formatted,
|
||||
"tox.ini",
|
||||
PythonVersionConfidence.Medium));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore read errors
|
||||
}
|
||||
}
|
||||
|
||||
private void DetectVersionFromSitePackages()
|
||||
{
|
||||
// Look for python version in lib directory names
|
||||
var searchPaths = new List<string>();
|
||||
|
||||
if (!string.IsNullOrEmpty(_venvPath))
|
||||
{
|
||||
searchPaths.Add(Path.Combine(_venvPath, "lib"));
|
||||
}
|
||||
|
||||
searchPaths.Add(Path.Combine(_rootPath, "lib"));
|
||||
searchPaths.Add(Path.Combine(_rootPath, "usr", "local", "lib"));
|
||||
searchPaths.Add(Path.Combine(_rootPath, "usr", "lib"));
|
||||
|
||||
foreach (var libPath in searchPaths)
|
||||
{
|
||||
if (!Directory.Exists(libPath))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
foreach (var dir in Directory.EnumerateDirectories(libPath, "python*", SafeEnumeration))
|
||||
{
|
||||
var dirName = Path.GetFileName(dir);
|
||||
var match = LibPythonDirPattern().Match(dirName);
|
||||
if (match.Success)
|
||||
{
|
||||
var version = match.Groups["version"].Value;
|
||||
_versionTargets.Add(new PythonVersionTarget(
|
||||
version,
|
||||
$"lib/{dirName}",
|
||||
PythonVersionConfidence.Medium));
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
// Ignore inaccessible directories
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void DetectSitePackages()
|
||||
{
|
||||
var searchPaths = new List<string>();
|
||||
|
||||
// Virtualenv site-packages
|
||||
if (!string.IsNullOrEmpty(_venvPath))
|
||||
{
|
||||
// Windows
|
||||
searchPaths.Add(Path.Combine(_venvPath, "Lib", "site-packages"));
|
||||
|
||||
// Unix - need to find pythonX.Y directory
|
||||
var libDir = Path.Combine(_venvPath, "lib");
|
||||
if (Directory.Exists(libDir))
|
||||
{
|
||||
try
|
||||
{
|
||||
foreach (var pythonDir in Directory.EnumerateDirectories(libDir, "python*", SafeEnumeration))
|
||||
{
|
||||
searchPaths.Add(Path.Combine(pythonDir, "site-packages"));
|
||||
}
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Lambda layer paths
|
||||
if (_layout == PythonLayoutKind.Lambda)
|
||||
{
|
||||
searchPaths.Add(Path.Combine(_rootPath, "python", "lib", "python3.9", "site-packages"));
|
||||
searchPaths.Add(Path.Combine(_rootPath, "python", "lib", "python3.10", "site-packages"));
|
||||
searchPaths.Add(Path.Combine(_rootPath, "python", "lib", "python3.11", "site-packages"));
|
||||
searchPaths.Add(Path.Combine(_rootPath, "python", "lib", "python3.12", "site-packages"));
|
||||
searchPaths.Add(Path.Combine(_rootPath, "python"));
|
||||
}
|
||||
|
||||
// Container paths
|
||||
searchPaths.Add(Path.Combine(_rootPath, "usr", "local", "lib", "python3.9", "site-packages"));
|
||||
searchPaths.Add(Path.Combine(_rootPath, "usr", "local", "lib", "python3.10", "site-packages"));
|
||||
searchPaths.Add(Path.Combine(_rootPath, "usr", "local", "lib", "python3.11", "site-packages"));
|
||||
searchPaths.Add(Path.Combine(_rootPath, "usr", "local", "lib", "python3.12", "site-packages"));
|
||||
searchPaths.Add(Path.Combine(_rootPath, "usr", "lib", "python3", "dist-packages"));
|
||||
|
||||
// Root site-packages (common for some Docker images)
|
||||
searchPaths.Add(Path.Combine(_rootPath, "site-packages"));
|
||||
|
||||
foreach (var path in searchPaths)
|
||||
{
|
||||
if (Directory.Exists(path) && !_sitePackagesPaths.Contains(path, StringComparer.OrdinalIgnoreCase))
|
||||
{
|
||||
_sitePackagesPaths.Add(path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void DetectWheels()
|
||||
{
|
||||
// Look for wheels in common locations
|
||||
var searchPaths = new List<string>
|
||||
{
|
||||
Path.Combine(_rootPath, "dist"),
|
||||
Path.Combine(_rootPath, "wheels"),
|
||||
Path.Combine(_rootPath, ".wheels"),
|
||||
_rootPath
|
||||
};
|
||||
|
||||
foreach (var searchPath in searchPaths)
|
||||
{
|
||||
if (!Directory.Exists(searchPath))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
foreach (var wheel in Directory.EnumerateFiles(searchPath, "*.whl", SafeEnumeration))
|
||||
{
|
||||
if (!_wheelPaths.Contains(wheel, StringComparer.OrdinalIgnoreCase))
|
||||
{
|
||||
_wheelPaths.Add(wheel);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void DetectZipapps()
|
||||
{
|
||||
if (!Directory.Exists(_rootPath))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
foreach (var pyz in Directory.EnumerateFiles(_rootPath, "*.pyz", SafeEnumeration))
|
||||
{
|
||||
_zipappPaths.Add(pyz);
|
||||
}
|
||||
|
||||
foreach (var pyzw in Directory.EnumerateFiles(_rootPath, "*.pyzw", SafeEnumeration))
|
||||
{
|
||||
_zipappPaths.Add(pyzw);
|
||||
}
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
|
||||
private async Task DetectEditablesAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
// Look for .egg-link files in site-packages
|
||||
foreach (var sitePackagesPath in _sitePackagesPaths)
|
||||
{
|
||||
try
|
||||
{
|
||||
foreach (var eggLink in Directory.EnumerateFiles(sitePackagesPath, "*.egg-link", SafeEnumeration))
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(eggLink, cancellationToken).ConfigureAwait(false);
|
||||
var lines = content.Split('\n', StringSplitOptions.RemoveEmptyEntries);
|
||||
if (lines.Length > 0)
|
||||
{
|
||||
var editablePath = lines[0].Trim();
|
||||
if (Directory.Exists(editablePath))
|
||||
{
|
||||
var packageName = Path.GetFileNameWithoutExtension(eggLink);
|
||||
if (packageName.EndsWith(".egg-link", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
packageName = packageName[..^9];
|
||||
}
|
||||
|
||||
_editablePaths.Add((editablePath, packageName));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
// Ignore
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
|
||||
// Look for direct_url.json with editable flag in dist-info directories
|
||||
foreach (var sitePackagesPath in _sitePackagesPaths)
|
||||
{
|
||||
try
|
||||
{
|
||||
foreach (var distInfo in Directory.EnumerateDirectories(sitePackagesPath, "*.dist-info", SafeEnumeration))
|
||||
{
|
||||
var directUrlPath = Path.Combine(distInfo, "direct_url.json");
|
||||
if (!File.Exists(directUrlPath))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(directUrlPath, cancellationToken).ConfigureAwait(false);
|
||||
using var doc = JsonDocument.Parse(content);
|
||||
var root = doc.RootElement;
|
||||
|
||||
if (root.TryGetProperty("dir_info", out var dirInfo) &&
|
||||
dirInfo.TryGetProperty("editable", out var editable) &&
|
||||
editable.GetBoolean() &&
|
||||
root.TryGetProperty("url", out var urlElement))
|
||||
{
|
||||
var url = urlElement.GetString();
|
||||
if (!string.IsNullOrEmpty(url) && url.StartsWith("file://", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var editablePath = url[7..];
|
||||
if (OperatingSystem.IsWindows() && editablePath.StartsWith('/'))
|
||||
{
|
||||
editablePath = editablePath[1..];
|
||||
}
|
||||
|
||||
if (Directory.Exists(editablePath))
|
||||
{
|
||||
var distInfoName = Path.GetFileName(distInfo);
|
||||
var packageName = distInfoName.Split('-')[0];
|
||||
_editablePaths.Add((editablePath, packageName));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
// Invalid JSON - skip
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"requires-python\s*=\s*[""']?(?<version>[^""'\n]+)", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex RequiresPythonPattern();
|
||||
|
||||
[GeneratedRegex(@"python\s*=\s*[""'](?<version>[^""']+)[""']", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex PythonVersionTomlPattern();
|
||||
|
||||
[GeneratedRegex(@"python_requires\s*=\s*[""'](?<version>[^""']+)[""']", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex PythonRequiresPattern();
|
||||
|
||||
[GeneratedRegex(@"python_requires\s*=\s*(?<version>[^\s\n]+)", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex PythonRequiresCfgPattern();
|
||||
|
||||
[GeneratedRegex(@"^python-(?<version>\d+\.\d+(?:\.\d+)?)", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex RuntimeTxtPattern();
|
||||
|
||||
[GeneratedRegex(@"FROM\s+(?:.*\/)?python:(?<version>\d+\.\d+(?:\.\d+)?)", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex DockerFromPythonPattern();
|
||||
|
||||
[GeneratedRegex(@"ENV\s+PYTHON_VERSION\s*=?\s*(?<version>\d+\.\d+(?:\.\d+)?)", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex DockerEnvPythonPattern();
|
||||
|
||||
[GeneratedRegex(@"envlist\s*=\s*(?<envs>[^\n\[]+)", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex ToxEnvListPattern();
|
||||
|
||||
[GeneratedRegex(@"py(?<version>\d{2,3})", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex ToxPythonEnvPattern();
|
||||
|
||||
[GeneratedRegex(@"^python(?<version>\d+\.\d+)$", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex LibPythonDirPattern();
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
/// <summary>
|
||||
/// Identifies the type of Python installation/project layout.
|
||||
/// </summary>
|
||||
internal enum PythonLayoutKind
|
||||
{
|
||||
/// <summary>
|
||||
/// Unknown or undetected layout.
|
||||
/// </summary>
|
||||
Unknown,
|
||||
|
||||
/// <summary>
|
||||
/// Standard virtual environment (venv or virtualenv).
|
||||
/// </summary>
|
||||
Virtualenv,
|
||||
|
||||
/// <summary>
|
||||
/// Poetry-managed project.
|
||||
/// </summary>
|
||||
Poetry,
|
||||
|
||||
/// <summary>
|
||||
/// Pipenv-managed project.
|
||||
/// </summary>
|
||||
Pipenv,
|
||||
|
||||
/// <summary>
|
||||
/// Conda environment.
|
||||
/// </summary>
|
||||
Conda,
|
||||
|
||||
/// <summary>
|
||||
/// System-wide Python installation.
|
||||
/// </summary>
|
||||
System,
|
||||
|
||||
/// <summary>
|
||||
/// Container image with Python.
|
||||
/// </summary>
|
||||
Container,
|
||||
|
||||
/// <summary>
|
||||
/// AWS Lambda function.
|
||||
/// </summary>
|
||||
Lambda,
|
||||
|
||||
/// <summary>
|
||||
/// Azure Functions.
|
||||
/// </summary>
|
||||
AzureFunction,
|
||||
|
||||
/// <summary>
|
||||
/// Google Cloud Function.
|
||||
/// </summary>
|
||||
CloudFunction,
|
||||
|
||||
/// <summary>
|
||||
/// Pipx-managed application.
|
||||
/// </summary>
|
||||
Pipx,
|
||||
|
||||
/// <summary>
|
||||
/// PyInstaller or similar frozen application.
|
||||
/// </summary>
|
||||
Frozen
|
||||
}
|
||||
@@ -0,0 +1,122 @@
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
/// <summary>
|
||||
/// Result of Python project analysis containing layout detection,
|
||||
/// version targets, and the virtual filesystem.
|
||||
/// </summary>
|
||||
internal sealed class PythonProjectAnalysis
|
||||
{
|
||||
private PythonProjectAnalysis(
|
||||
PythonLayoutKind layout,
|
||||
IReadOnlyList<PythonVersionTarget> versionTargets,
|
||||
IReadOnlyList<string> sitePackagesPaths,
|
||||
string? venvPath,
|
||||
string? binPath,
|
||||
PythonVirtualFileSystem virtualFileSystem)
|
||||
{
|
||||
Layout = layout;
|
||||
VersionTargets = versionTargets;
|
||||
SitePackagesPaths = sitePackagesPaths;
|
||||
VenvPath = venvPath;
|
||||
BinPath = binPath;
|
||||
VirtualFileSystem = virtualFileSystem;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the detected layout kind.
|
||||
/// </summary>
|
||||
public PythonLayoutKind Layout { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets all detected version targets.
|
||||
/// </summary>
|
||||
public IReadOnlyList<PythonVersionTarget> VersionTargets { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the primary version target (highest confidence).
|
||||
/// </summary>
|
||||
public PythonVersionTarget? PrimaryVersionTarget =>
|
||||
VersionTargets
|
||||
.OrderByDescending(static v => v.Confidence)
|
||||
.ThenBy(static v => v.Source, StringComparer.Ordinal)
|
||||
.FirstOrDefault();
|
||||
|
||||
/// <summary>
|
||||
/// Gets all detected site-packages paths.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string> SitePackagesPaths { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the virtual environment path if detected.
|
||||
/// </summary>
|
||||
public string? VenvPath { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the bin/Scripts directory path if detected.
|
||||
/// </summary>
|
||||
public string? BinPath { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the virtual filesystem built from all detected sources.
|
||||
/// </summary>
|
||||
public PythonVirtualFileSystem VirtualFileSystem { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Analyzes a Python project and builds the virtual filesystem.
|
||||
/// </summary>
|
||||
public static async Task<PythonProjectAnalysis> AnalyzeAsync(
|
||||
string rootPath,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var normalizer = new PythonInputNormalizer(rootPath);
|
||||
await normalizer.AnalyzeAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var vfs = normalizer.BuildVirtualFileSystem();
|
||||
|
||||
return new PythonProjectAnalysis(
|
||||
normalizer.Layout,
|
||||
normalizer.VersionTargets.ToList(),
|
||||
normalizer.SitePackagesPaths.ToList(),
|
||||
normalizer.VenvPath,
|
||||
normalizer.BinPath,
|
||||
vfs);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates metadata entries for the analysis result.
|
||||
/// </summary>
|
||||
public IEnumerable<KeyValuePair<string, string?>> ToMetadata()
|
||||
{
|
||||
yield return new KeyValuePair<string, string?>("layout", Layout.ToString());
|
||||
|
||||
if (PrimaryVersionTarget is not null)
|
||||
{
|
||||
yield return new KeyValuePair<string, string?>("pythonVersion", PrimaryVersionTarget.Version);
|
||||
yield return new KeyValuePair<string, string?>("pythonVersionSource", PrimaryVersionTarget.Source);
|
||||
yield return new KeyValuePair<string, string?>("pythonVersionConfidence", PrimaryVersionTarget.Confidence.ToString());
|
||||
|
||||
if (PrimaryVersionTarget.IsMinimum)
|
||||
{
|
||||
yield return new KeyValuePair<string, string?>("pythonVersionIsMinimum", "true");
|
||||
}
|
||||
}
|
||||
|
||||
if (VersionTargets.Count > 1)
|
||||
{
|
||||
var versions = string.Join(';', VersionTargets.Select(static v => $"{v.Version}@{v.Source}"));
|
||||
yield return new KeyValuePair<string, string?>("pythonVersionsDetected", versions);
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(VenvPath))
|
||||
{
|
||||
yield return new KeyValuePair<string, string?>("venvPath", VenvPath);
|
||||
}
|
||||
|
||||
if (SitePackagesPaths.Count > 0)
|
||||
{
|
||||
yield return new KeyValuePair<string, string?>("sitePackagesCount", SitePackagesPaths.Count.ToString());
|
||||
}
|
||||
|
||||
yield return new KeyValuePair<string, string?>("vfsFileCount", VirtualFileSystem.FileCount.ToString());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a detected Python version target.
|
||||
/// </summary>
|
||||
/// <param name="Version">The detected version string (e.g., "3.11", "3.12.1").</param>
|
||||
/// <param name="Source">The source file where the version was detected.</param>
|
||||
/// <param name="Confidence">Confidence level of the detection.</param>
|
||||
/// <param name="IsMinimum">True if this is a minimum version requirement.</param>
|
||||
internal sealed record PythonVersionTarget(
|
||||
string Version,
|
||||
string Source,
|
||||
PythonVersionConfidence Confidence,
|
||||
bool IsMinimum = false)
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the major version number.
|
||||
/// </summary>
|
||||
public int? Major => TryParsePart(0);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the minor version number.
|
||||
/// </summary>
|
||||
public int? Minor => TryParsePart(1);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the patch version number.
|
||||
/// </summary>
|
||||
public int? Patch => TryParsePart(2);
|
||||
|
||||
private int? TryParsePart(int index)
|
||||
{
|
||||
var parts = Version.Split('.');
|
||||
if (index >= parts.Length)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Handle versions like "3.11+" or ">=3.10"
|
||||
var part = Regex.Replace(parts[index], @"[^\d]", string.Empty);
|
||||
return int.TryParse(part, out var value) ? value : null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Confidence level for Python version detection.
|
||||
/// </summary>
|
||||
internal enum PythonVersionConfidence
|
||||
{
|
||||
/// <summary>
|
||||
/// Low confidence - inferred from heuristics.
|
||||
/// </summary>
|
||||
Low,
|
||||
|
||||
/// <summary>
|
||||
/// Medium confidence - from configuration files.
|
||||
/// </summary>
|
||||
Medium,
|
||||
|
||||
/// <summary>
|
||||
/// High confidence - explicit declaration.
|
||||
/// </summary>
|
||||
High,
|
||||
|
||||
/// <summary>
|
||||
/// Definitive - from runtime detection.
|
||||
/// </summary>
|
||||
Definitive
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
using System.Collections.Frozen;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a file in the Python virtual filesystem.
|
||||
/// </summary>
|
||||
/// <param name="VirtualPath">Normalized virtual path with forward slashes, relative to the VFS root.</param>
|
||||
/// <param name="AbsolutePath">Absolute filesystem path or archive entry path.</param>
|
||||
/// <param name="Source">Origin of the file.</param>
|
||||
/// <param name="LayerDigest">Container layer digest if from a container layer.</param>
|
||||
/// <param name="ArchivePath">Path to the containing archive if from wheel/sdist/zipapp.</param>
|
||||
/// <param name="Size">File size in bytes, if known.</param>
|
||||
/// <param name="Sha256">SHA-256 hash of the file content, if computed.</param>
|
||||
/// <param name="Metadata">Additional metadata key-value pairs.</param>
|
||||
internal sealed record PythonVirtualFile(
|
||||
string VirtualPath,
|
||||
string AbsolutePath,
|
||||
PythonFileSource Source,
|
||||
string? LayerDigest = null,
|
||||
string? ArchivePath = null,
|
||||
long? Size = null,
|
||||
string? Sha256 = null,
|
||||
FrozenDictionary<string, string>? Metadata = null)
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the file extension in lowercase without the leading dot.
|
||||
/// </summary>
|
||||
public string Extension
|
||||
{
|
||||
get
|
||||
{
|
||||
var ext = Path.GetExtension(VirtualPath);
|
||||
return string.IsNullOrEmpty(ext) ? string.Empty : ext[1..].ToLowerInvariant();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the file name without path.
|
||||
/// </summary>
|
||||
public string FileName => Path.GetFileName(VirtualPath);
|
||||
|
||||
/// <summary>
|
||||
/// Returns true if this is a Python source file.
|
||||
/// </summary>
|
||||
public bool IsPythonSource => Extension is "py" or "pyw";
|
||||
|
||||
/// <summary>
|
||||
/// Returns true if this is a compiled Python bytecode file.
|
||||
/// </summary>
|
||||
public bool IsBytecode => Extension is "pyc" or "pyo";
|
||||
|
||||
/// <summary>
|
||||
/// Returns true if this is a native extension.
|
||||
/// </summary>
|
||||
public bool IsNativeExtension => Extension is "so" or "pyd" or "dll";
|
||||
|
||||
/// <summary>
|
||||
/// Returns true if this file comes from an archive.
|
||||
/// </summary>
|
||||
public bool IsFromArchive => !string.IsNullOrEmpty(ArchivePath);
|
||||
}
|
||||
@@ -0,0 +1,579 @@
|
||||
using System.Collections.Frozen;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.IO.Compression;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
/// <summary>
|
||||
/// A virtual filesystem that normalizes access to Python project files across
|
||||
/// wheels, sdists, editable installs, zipapps, site-packages trees, and container roots.
|
||||
/// </summary>
|
||||
internal sealed partial class PythonVirtualFileSystem
|
||||
{
|
||||
private static readonly EnumerationOptions SafeEnumeration = new()
|
||||
{
|
||||
RecurseSubdirectories = false,
|
||||
IgnoreInaccessible = true,
|
||||
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint
|
||||
};
|
||||
|
||||
private static readonly EnumerationOptions RecursiveEnumeration = new()
|
||||
{
|
||||
RecurseSubdirectories = true,
|
||||
IgnoreInaccessible = true,
|
||||
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint
|
||||
};
|
||||
|
||||
private readonly FrozenDictionary<string, PythonVirtualFile> _files;
|
||||
private readonly FrozenDictionary<string, FrozenSet<string>> _directories;
|
||||
private readonly FrozenSet<string> _sourceTreeRoots;
|
||||
private readonly FrozenSet<string> _sitePackagesPaths;
|
||||
private readonly FrozenSet<string> _editablePaths;
|
||||
private readonly FrozenSet<string> _zipArchivePaths;
|
||||
|
||||
private PythonVirtualFileSystem(
|
||||
FrozenDictionary<string, PythonVirtualFile> files,
|
||||
FrozenDictionary<string, FrozenSet<string>> directories,
|
||||
FrozenSet<string> sourceTreeRoots,
|
||||
FrozenSet<string> sitePackagesPaths,
|
||||
FrozenSet<string> editablePaths,
|
||||
FrozenSet<string> zipArchivePaths)
|
||||
{
|
||||
_files = files;
|
||||
_directories = directories;
|
||||
_sourceTreeRoots = sourceTreeRoots;
|
||||
_sitePackagesPaths = sitePackagesPaths;
|
||||
_editablePaths = editablePaths;
|
||||
_zipArchivePaths = zipArchivePaths;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the total number of files in the virtual filesystem.
|
||||
/// </summary>
|
||||
public int FileCount => _files.Count;
|
||||
|
||||
/// <summary>
|
||||
/// Gets all files in the virtual filesystem.
|
||||
/// </summary>
|
||||
public IEnumerable<PythonVirtualFile> Files => _files.Values;
|
||||
|
||||
/// <summary>
|
||||
/// Gets all virtual paths in sorted order.
|
||||
/// </summary>
|
||||
public IEnumerable<string> Paths => _files.Keys.OrderBy(static p => p, StringComparer.Ordinal);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the source tree root paths.
|
||||
/// </summary>
|
||||
public IReadOnlySet<string> SourceTreeRoots => _sourceTreeRoots;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the site-packages directory paths.
|
||||
/// </summary>
|
||||
public IReadOnlySet<string> SitePackagesPaths => _sitePackagesPaths;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the editable install paths.
|
||||
/// </summary>
|
||||
public IReadOnlySet<string> EditablePaths => _editablePaths;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the zip archive paths (wheels, zipapps, sdists).
|
||||
/// </summary>
|
||||
public IReadOnlySet<string> ZipArchivePaths => _zipArchivePaths;
|
||||
|
||||
/// <summary>
|
||||
/// Gets a file by its virtual path, or null if not found.
|
||||
/// </summary>
|
||||
public PythonVirtualFile? GetFile(string virtualPath)
|
||||
{
|
||||
var normalized = NormalizePath(virtualPath);
|
||||
return _files.GetValueOrDefault(normalized);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Tries to get a file by its virtual path.
|
||||
/// </summary>
|
||||
public bool TryGetFile(string virtualPath, [NotNullWhen(true)] out PythonVirtualFile? file)
|
||||
{
|
||||
var normalized = NormalizePath(virtualPath);
|
||||
return _files.TryGetValue(normalized, out file);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if a virtual path exists as a file.
|
||||
/// </summary>
|
||||
public bool FileExists(string virtualPath)
|
||||
{
|
||||
var normalized = NormalizePath(virtualPath);
|
||||
return _files.ContainsKey(normalized);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if a virtual path exists as a directory.
|
||||
/// </summary>
|
||||
public bool DirectoryExists(string virtualPath)
|
||||
{
|
||||
var normalized = NormalizePath(virtualPath);
|
||||
if (normalized.Length == 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return _directories.ContainsKey(normalized);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Enumerates files in a directory (non-recursive).
|
||||
/// </summary>
|
||||
public IEnumerable<PythonVirtualFile> EnumerateFiles(string virtualPath)
|
||||
{
|
||||
var normalized = NormalizePath(virtualPath);
|
||||
if (!_directories.TryGetValue(normalized, out var entries))
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
foreach (var entry in entries.OrderBy(static e => e, StringComparer.Ordinal))
|
||||
{
|
||||
var fullPath = normalized.Length == 0 ? entry : $"{normalized}/{entry}";
|
||||
if (_files.TryGetValue(fullPath, out var file))
|
||||
{
|
||||
yield return file;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Enumerates all files matching a glob pattern.
|
||||
/// </summary>
|
||||
public IEnumerable<PythonVirtualFile> EnumerateFiles(string virtualPath, string pattern)
|
||||
{
|
||||
var regex = GlobToRegex(pattern);
|
||||
var normalized = NormalizePath(virtualPath);
|
||||
var prefix = normalized.Length == 0 ? string.Empty : normalized + "/";
|
||||
|
||||
foreach (var kvp in _files)
|
||||
{
|
||||
if (!kvp.Key.StartsWith(prefix, StringComparison.Ordinal))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var relative = kvp.Key[prefix.Length..];
|
||||
if (regex.IsMatch(relative))
|
||||
{
|
||||
yield return kvp.Value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets all files from a specific source.
|
||||
/// </summary>
|
||||
public IEnumerable<PythonVirtualFile> GetFilesBySource(PythonFileSource source)
|
||||
{
|
||||
return _files.Values
|
||||
.Where(f => f.Source == source)
|
||||
.OrderBy(static f => f.VirtualPath, StringComparer.Ordinal);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new builder for constructing a virtual filesystem.
|
||||
/// </summary>
|
||||
public static Builder CreateBuilder() => new();
|
||||
|
||||
private static string NormalizePath(string path)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(path))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var normalized = path.Replace('\\', '/').Trim('/');
|
||||
return normalized == "." ? string.Empty : normalized;
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"^[a-zA-Z0-9_.\-/]+$")]
|
||||
private static partial Regex SafePathPattern();
|
||||
|
||||
private static Regex GlobToRegex(string pattern)
|
||||
{
|
||||
var escaped = Regex.Escape(pattern)
|
||||
.Replace(@"\*\*", ".*")
|
||||
.Replace(@"\*", "[^/]*")
|
||||
.Replace(@"\?", "[^/]");
|
||||
|
||||
return new Regex($"^{escaped}$", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builder for constructing a <see cref="PythonVirtualFileSystem"/>.
|
||||
/// </summary>
|
||||
internal sealed class Builder
|
||||
{
|
||||
private readonly Dictionary<string, PythonVirtualFile> _files = new(StringComparer.Ordinal);
|
||||
private readonly HashSet<string> _processedArchives = new(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly HashSet<string> _sourceTreeRoots = new(StringComparer.Ordinal);
|
||||
private readonly HashSet<string> _sitePackagesPaths = new(StringComparer.Ordinal);
|
||||
private readonly HashSet<string> _editablePaths = new(StringComparer.Ordinal);
|
||||
private readonly HashSet<string> _zipArchivePaths = new(StringComparer.Ordinal);
|
||||
|
||||
/// <summary>
|
||||
/// Adds files from a site-packages directory.
|
||||
/// </summary>
|
||||
public Builder AddSitePackages(string sitePackagesPath, string? layerDigest = null)
|
||||
{
|
||||
if (!Directory.Exists(sitePackagesPath))
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
var basePath = Path.GetFullPath(sitePackagesPath);
|
||||
_sitePackagesPaths.Add(string.Empty); // Root of the VFS
|
||||
AddDirectoryRecursive(basePath, string.Empty, PythonFileSource.SitePackages, layerDigest);
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds files from a wheel archive (.whl).
|
||||
/// </summary>
|
||||
public Builder AddWheel(string wheelPath)
|
||||
{
|
||||
if (!File.Exists(wheelPath) || !_processedArchives.Add(wheelPath))
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
_zipArchivePaths.Add(wheelPath);
|
||||
|
||||
try
|
||||
{
|
||||
using var archive = ZipFile.OpenRead(wheelPath);
|
||||
AddArchiveEntries(archive, wheelPath, PythonFileSource.Wheel);
|
||||
}
|
||||
catch (InvalidDataException)
|
||||
{
|
||||
// Corrupted archive - skip
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// IO error - skip
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds files from a zipapp (.pyz, .pyzw).
|
||||
/// </summary>
|
||||
public Builder AddZipapp(string zipappPath)
|
||||
{
|
||||
if (!File.Exists(zipappPath) || !_processedArchives.Add(zipappPath))
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
_zipArchivePaths.Add(zipappPath);
|
||||
|
||||
try
|
||||
{
|
||||
using var stream = File.OpenRead(zipappPath);
|
||||
|
||||
// Zipapps start with a shebang line, need to find ZIP signature
|
||||
var offset = FindZipOffset(stream);
|
||||
if (offset < 0)
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
stream.Position = offset;
|
||||
using var archive = new ZipArchive(stream, ZipArchiveMode.Read);
|
||||
AddArchiveEntries(archive, zipappPath, PythonFileSource.Zipapp);
|
||||
}
|
||||
catch (InvalidDataException)
|
||||
{
|
||||
// Corrupted archive - skip
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// IO error - skip
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds files from a source distribution archive (.tar.gz, .zip).
|
||||
/// </summary>
|
||||
public Builder AddSdist(string sdistPath)
|
||||
{
|
||||
if (!File.Exists(sdistPath) || !_processedArchives.Add(sdistPath))
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
_zipArchivePaths.Add(sdistPath);
|
||||
|
||||
try
|
||||
{
|
||||
if (sdistPath.EndsWith(".zip", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
using var archive = ZipFile.OpenRead(sdistPath);
|
||||
AddArchiveEntries(archive, sdistPath, PythonFileSource.Sdist);
|
||||
}
|
||||
// Note: .tar.gz support would require TarReader from System.Formats.Tar
|
||||
// For now, we handle the common .zip case
|
||||
}
|
||||
catch (InvalidDataException)
|
||||
{
|
||||
// Corrupted archive - skip
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// IO error - skip
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds files from an editable install path.
|
||||
/// </summary>
|
||||
public Builder AddEditable(string editablePath, string? packageName = null)
|
||||
{
|
||||
if (!Directory.Exists(editablePath))
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
var basePath = Path.GetFullPath(editablePath);
|
||||
var prefix = string.IsNullOrEmpty(packageName) ? string.Empty : packageName + "/";
|
||||
_editablePaths.Add(prefix.TrimEnd('/'));
|
||||
AddDirectoryRecursive(basePath, prefix.TrimEnd('/'), PythonFileSource.Editable, layerDigest: null);
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds files from a source tree.
|
||||
/// </summary>
|
||||
public Builder AddSourceTree(string sourcePath)
|
||||
{
|
||||
if (!Directory.Exists(sourcePath))
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
var basePath = Path.GetFullPath(sourcePath);
|
||||
_sourceTreeRoots.Add(string.Empty); // Root of the VFS
|
||||
AddDirectoryRecursive(basePath, string.Empty, PythonFileSource.SourceTree, layerDigest: null);
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds files from a virtual environment's bin/Scripts directory.
|
||||
/// </summary>
|
||||
public Builder AddVenvBin(string binPath)
|
||||
{
|
||||
if (!Directory.Exists(binPath))
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
var basePath = Path.GetFullPath(binPath);
|
||||
foreach (var file in Directory.EnumerateFiles(basePath, "*", SafeEnumeration))
|
||||
{
|
||||
var fileName = Path.GetFileName(file);
|
||||
var virtualPath = $"bin/{fileName}";
|
||||
AddFile(virtualPath, file, PythonFileSource.VenvBin, layerDigest: null, archivePath: null);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds a single file with explicit parameters.
|
||||
/// </summary>
|
||||
public Builder AddFile(
|
||||
string virtualPath,
|
||||
string absolutePath,
|
||||
PythonFileSource source,
|
||||
string? layerDigest = null,
|
||||
string? archivePath = null,
|
||||
long? size = null,
|
||||
string? sha256 = null,
|
||||
FrozenDictionary<string, string>? metadata = null)
|
||||
{
|
||||
var normalized = NormalizePath(virtualPath);
|
||||
if (string.IsNullOrEmpty(normalized))
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
var file = new PythonVirtualFile(
|
||||
normalized,
|
||||
absolutePath,
|
||||
source,
|
||||
layerDigest,
|
||||
archivePath,
|
||||
size,
|
||||
sha256,
|
||||
metadata);
|
||||
|
||||
// Later additions override earlier ones (layer precedence)
|
||||
_files[normalized] = file;
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds the immutable virtual filesystem.
|
||||
/// </summary>
|
||||
public PythonVirtualFileSystem Build()
|
||||
{
|
||||
var files = _files.ToFrozenDictionary(StringComparer.Ordinal);
|
||||
var directories = BuildDirectoryIndex(files);
|
||||
return new PythonVirtualFileSystem(
|
||||
files,
|
||||
directories,
|
||||
_sourceTreeRoots.ToFrozenSet(StringComparer.Ordinal),
|
||||
_sitePackagesPaths.ToFrozenSet(StringComparer.Ordinal),
|
||||
_editablePaths.ToFrozenSet(StringComparer.Ordinal),
|
||||
_zipArchivePaths.ToFrozenSet(StringComparer.Ordinal));
|
||||
}
|
||||
|
||||
private void AddDirectoryRecursive(
|
||||
string basePath,
|
||||
string virtualPrefix,
|
||||
PythonFileSource source,
|
||||
string? layerDigest)
|
||||
{
|
||||
try
|
||||
{
|
||||
foreach (var file in Directory.EnumerateFiles(basePath, "*", RecursiveEnumeration))
|
||||
{
|
||||
var relativePath = Path.GetRelativePath(basePath, file);
|
||||
var normalizedRelative = relativePath.Replace('\\', '/');
|
||||
var virtualPath = string.IsNullOrEmpty(virtualPrefix)
|
||||
? normalizedRelative
|
||||
: $"{virtualPrefix}/{normalizedRelative}";
|
||||
|
||||
// Skip __pycache__ and hidden files
|
||||
if (normalizedRelative.Contains("/__pycache__/", StringComparison.Ordinal) ||
|
||||
normalizedRelative.StartsWith("__pycache__/", StringComparison.Ordinal) ||
|
||||
Path.GetFileName(file).StartsWith('.'))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
long? size = null;
|
||||
try
|
||||
{
|
||||
var info = new FileInfo(file);
|
||||
size = info.Length;
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Ignore size if we can't read it
|
||||
}
|
||||
|
||||
AddFile(virtualPath, file, source, layerDigest, archivePath: null, size: size);
|
||||
}
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
// Skip inaccessible directories
|
||||
}
|
||||
catch (DirectoryNotFoundException)
|
||||
{
|
||||
// Directory was removed - skip
|
||||
}
|
||||
}
|
||||
|
||||
private void AddArchiveEntries(ZipArchive archive, string archivePath, PythonFileSource source)
|
||||
{
|
||||
foreach (var entry in archive.Entries)
|
||||
{
|
||||
// Skip directories
|
||||
if (string.IsNullOrEmpty(entry.Name) || entry.FullName.EndsWith('/'))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var virtualPath = entry.FullName.Replace('\\', '/');
|
||||
|
||||
// Skip __pycache__ in archives too
|
||||
if (virtualPath.Contains("/__pycache__/", StringComparison.Ordinal) ||
|
||||
virtualPath.StartsWith("__pycache__/", StringComparison.Ordinal))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
AddFile(
|
||||
virtualPath,
|
||||
entry.FullName,
|
||||
source,
|
||||
layerDigest: null,
|
||||
archivePath: archivePath,
|
||||
size: entry.Length);
|
||||
}
|
||||
}
|
||||
|
||||
private static long FindZipOffset(Stream stream)
|
||||
{
|
||||
// ZIP files start with PK\x03\x04 signature
|
||||
var buffer = new byte[4096];
|
||||
var bytesRead = stream.Read(buffer, 0, buffer.Length);
|
||||
|
||||
for (var i = 0; i < bytesRead - 3; i++)
|
||||
{
|
||||
if (buffer[i] == 0x50 && buffer[i + 1] == 0x4B &&
|
||||
buffer[i + 2] == 0x03 && buffer[i + 3] == 0x04)
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
private static FrozenDictionary<string, FrozenSet<string>> BuildDirectoryIndex(
|
||||
FrozenDictionary<string, PythonVirtualFile> files)
|
||||
{
|
||||
var directories = new Dictionary<string, HashSet<string>>(StringComparer.Ordinal)
|
||||
{
|
||||
[string.Empty] = new(StringComparer.Ordinal)
|
||||
};
|
||||
|
||||
foreach (var path in files.Keys)
|
||||
{
|
||||
var parts = path.Split('/');
|
||||
var current = string.Empty;
|
||||
|
||||
for (var i = 0; i < parts.Length; i++)
|
||||
{
|
||||
var part = parts[i];
|
||||
var isLast = i == parts.Length - 1;
|
||||
|
||||
if (!directories.TryGetValue(current, out var entries))
|
||||
{
|
||||
entries = new HashSet<string>(StringComparer.Ordinal);
|
||||
directories[current] = entries;
|
||||
}
|
||||
|
||||
entries.Add(part);
|
||||
|
||||
if (!isLast)
|
||||
{
|
||||
current = current.Length == 0 ? part : $"{current}/{part}";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return directories.ToFrozenDictionary(
|
||||
static kvp => kvp.Key,
|
||||
static kvp => kvp.Value.ToFrozenSet(StringComparer.Ordinal),
|
||||
StringComparer.Ordinal);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,64 +1,71 @@
|
||||
using System.Linq;
|
||||
using System.Text.Json;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python;
|
||||
|
||||
public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
{
|
||||
private static readonly EnumerationOptions Enumeration = new()
|
||||
{
|
||||
RecurseSubdirectories = true,
|
||||
IgnoreInaccessible = true,
|
||||
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint
|
||||
};
|
||||
|
||||
public string Id => "python";
|
||||
|
||||
public string DisplayName => "Python Analyzer";
|
||||
|
||||
public ValueTask AnalyzeAsync(LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(context);
|
||||
ArgumentNullException.ThrowIfNull(writer);
|
||||
|
||||
return AnalyzeInternalAsync(context, writer, cancellationToken);
|
||||
}
|
||||
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python;
|
||||
|
||||
public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
{
|
||||
private static readonly EnumerationOptions Enumeration = new()
|
||||
{
|
||||
RecurseSubdirectories = true,
|
||||
IgnoreInaccessible = true,
|
||||
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint
|
||||
};
|
||||
|
||||
public string Id => "python";
|
||||
|
||||
public string DisplayName => "Python Analyzer";
|
||||
|
||||
public ValueTask AnalyzeAsync(LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(context);
|
||||
ArgumentNullException.ThrowIfNull(writer);
|
||||
|
||||
return AnalyzeInternalAsync(context, writer, cancellationToken);
|
||||
}
|
||||
|
||||
private static async ValueTask AnalyzeInternalAsync(LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken)
|
||||
{
|
||||
var lockData = await PythonLockFileCollector.LoadAsync(context, cancellationToken).ConfigureAwait(false);
|
||||
var matchedLocks = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
var hasLockEntries = lockData.Entries.Count > 0;
|
||||
|
||||
var distInfoDirectories = Directory
|
||||
.EnumerateDirectories(context.RootPath, "*.dist-info", Enumeration)
|
||||
.OrderBy(static path => path, StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
// Detect Python runtime in container layers
|
||||
var runtimeInfo = PythonContainerAdapter.DetectRuntime(context.RootPath);
|
||||
|
||||
// Detect environment variables (PYTHONPATH/PYTHONHOME)
|
||||
var environment = await PythonEnvironmentDetector.DetectAsync(context.RootPath, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Detect startup hooks (sitecustomize.py, usercustomize.py, .pth files)
|
||||
var startupHooks = PythonStartupHookDetector.Detect(context.RootPath);
|
||||
|
||||
// Collect dist-info directories from both root and container layers
|
||||
var distInfoDirectories = CollectDistInfoDirectories(context.RootPath);
|
||||
|
||||
foreach (var distInfoPath in distInfoDirectories)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
PythonDistribution? distribution;
|
||||
try
|
||||
{
|
||||
distribution = await PythonDistributionLoader.LoadAsync(context, distInfoPath, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (distribution is null)
|
||||
try
|
||||
{
|
||||
distribution = await PythonDistributionLoader.LoadAsync(context, distInfoPath, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (distribution is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
@@ -75,6 +82,19 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
metadata.Add(new KeyValuePair<string, string?>("lockMissing", "true"));
|
||||
}
|
||||
|
||||
// Append runtime information
|
||||
AppendRuntimeMetadata(metadata, runtimeInfo);
|
||||
|
||||
// Append environment variables (PYTHONPATH/PYTHONHOME)
|
||||
AppendEnvironmentMetadata(metadata, environment);
|
||||
|
||||
// Append startup hooks warnings
|
||||
AppendStartupHooksMetadata(metadata, startupHooks);
|
||||
|
||||
// Collect evidence including startup hooks
|
||||
var evidence = distribution.SortedEvidence.ToList();
|
||||
evidence.AddRange(startupHooks.ToEvidence(context));
|
||||
|
||||
writer.AddFromPurl(
|
||||
analyzerId: "python",
|
||||
purl: distribution.Purl,
|
||||
@@ -82,7 +102,7 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
version: distribution.Version,
|
||||
type: "pypi",
|
||||
metadata: metadata,
|
||||
evidence: distribution.SortedEvidence,
|
||||
evidence: evidence,
|
||||
usedByEntrypoint: distribution.UsedByEntrypoint);
|
||||
}
|
||||
|
||||
@@ -159,4 +179,98 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
metadata.Add(new KeyValuePair<string, string?>("lockEditablePath", entry.EditablePath));
|
||||
}
|
||||
}
|
||||
|
||||
private static void AppendRuntimeMetadata(List<KeyValuePair<string, string?>> metadata, PythonRuntimeInfo? runtimeInfo)
|
||||
{
|
||||
if (runtimeInfo is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (runtimeInfo.Versions.Count > 0)
|
||||
{
|
||||
metadata.Add(new KeyValuePair<string, string?>("runtime.versions", string.Join(';', runtimeInfo.Versions)));
|
||||
}
|
||||
|
||||
if (runtimeInfo.Binaries.Count > 0)
|
||||
{
|
||||
metadata.Add(new KeyValuePair<string, string?>("runtime.binaries.count", runtimeInfo.Binaries.Count.ToString()));
|
||||
}
|
||||
|
||||
if (runtimeInfo.LibPaths.Count > 0)
|
||||
{
|
||||
metadata.Add(new KeyValuePair<string, string?>("runtime.libPaths.count", runtimeInfo.LibPaths.Count.ToString()));
|
||||
}
|
||||
}
|
||||
|
||||
private static void AppendEnvironmentMetadata(List<KeyValuePair<string, string?>> metadata, PythonEnvironment environment)
|
||||
{
|
||||
if (environment.HasPythonPath)
|
||||
{
|
||||
metadata.Add(new KeyValuePair<string, string?>("env.pythonpath", environment.PythonPath));
|
||||
metadata.Add(new KeyValuePair<string, string?>("env.pythonpath.warning", "PYTHONPATH is set; may affect module resolution"));
|
||||
}
|
||||
|
||||
if (environment.HasPythonHome)
|
||||
{
|
||||
metadata.Add(new KeyValuePair<string, string?>("env.pythonhome", environment.PythonHome));
|
||||
metadata.Add(new KeyValuePair<string, string?>("env.pythonhome.warning", "PYTHONHOME is set; may affect interpreter behavior"));
|
||||
}
|
||||
|
||||
if (environment.Sources.Count > 0)
|
||||
{
|
||||
metadata.Add(new KeyValuePair<string, string?>("env.sources.count", environment.Sources.Count.ToString()));
|
||||
}
|
||||
}
|
||||
|
||||
private static void AppendStartupHooksMetadata(List<KeyValuePair<string, string?>> metadata, PythonStartupHooks startupHooks)
|
||||
{
|
||||
if (startupHooks.HasStartupHooks)
|
||||
{
|
||||
metadata.Add(new KeyValuePair<string, string?>("startupHooks.detected", "true"));
|
||||
metadata.Add(new KeyValuePair<string, string?>("startupHooks.count", startupHooks.Hooks.Count.ToString()));
|
||||
}
|
||||
|
||||
if (startupHooks.HasPthFilesWithImports)
|
||||
{
|
||||
metadata.Add(new KeyValuePair<string, string?>("pthFiles.withImports.detected", "true"));
|
||||
}
|
||||
|
||||
foreach (var warning in startupHooks.Warnings)
|
||||
{
|
||||
metadata.Add(new KeyValuePair<string, string?>("startupHooks.warning", warning));
|
||||
}
|
||||
}
|
||||
|
||||
private static IReadOnlyCollection<string> CollectDistInfoDirectories(string rootPath)
|
||||
{
|
||||
var directories = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
// Collect from root path recursively
|
||||
try
|
||||
{
|
||||
foreach (var dir in Directory.EnumerateDirectories(rootPath, "*.dist-info", Enumeration))
|
||||
{
|
||||
directories.Add(dir);
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore enumeration errors
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
// Ignore access errors
|
||||
}
|
||||
|
||||
// Also collect from OCI container layers
|
||||
foreach (var dir in PythonContainerAdapter.DiscoverDistInfoDirectories(rootPath))
|
||||
{
|
||||
directories.Add(dir);
|
||||
}
|
||||
|
||||
return directories
|
||||
.OrderBy(static path => path, StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,10 @@
|
||||
<EnableDefaultItems>false</EnableDefaultItems>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<InternalsVisibleTo Include="StellaOps.Scanner.Analyzers.Lang.Python.Tests" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Compile Include="**\\*.cs" Exclude="obj\\**;bin\\**" />
|
||||
<EmbeddedResource Include="**\\*.json" Exclude="obj\\**;bin\\**" />
|
||||
|
||||
@@ -12,3 +12,9 @@
|
||||
|
||||
| 6 | SCANNER-ANALYZERS-LANG-10-309P | DONE (2025-10-23) | SCANNER-ANALYZERS-LANG-10-308P | Package plug-in (manifest, DI registration) and document Offline Kit bundling of Python stdlib metadata if needed. | Manifest copied to `plugins/scanner/analyzers/lang/`; Worker loads analyzer; Offline Kit doc updated. |
|
||||
|
||||
| 7 | SCANNER-ANALYZERS-PYTHON-23-001 | DONE (2025-11-27) | — | Build input normalizer & virtual filesystem for wheels, sdists, editable installs, zipapps, site-packages trees, and container roots. Detect Python version targets (`pyproject.toml`, `runtime.txt`, Dockerfile) + virtualenv layout deterministically. | Created `PythonVirtualFileSystem` with builder pattern, `PythonInputNormalizer` for layout/version detection. Supports VFS for wheels, sdists, zipapps. Detects layouts (Virtualenv, Poetry, Pipenv, Conda, Lambda, Container) and version from multiple sources. Files: `Internal/VirtualFileSystem/`. Tests: `VirtualFileSystem/PythonVirtualFileSystemTests.cs`, `VirtualFileSystem/PythonInputNormalizerTests.cs`. |
|
||||
|
||||
| 8 | SCANNER-ANALYZERS-PYTHON-23-002 | DONE (2025-11-27) | SCANNER-ANALYZERS-PYTHON-23-001 | Entrypoint discovery: module `__main__`, console_scripts entry points, `scripts`, zipapp main, `manage.py`/gunicorn/celery patterns. Capture invocation context (module vs package, argv wrappers). | Created `PythonEntrypointDiscovery` with support for: PackageMain (__main__.py), ConsoleScript/GuiScript (entry_points.txt), Script (bin/), ZipappMain, DjangoManage, WsgiApp/AsgiApp, CeleryWorker, LambdaHandler, AzureFunctionHandler, CloudFunctionHandler, CliApp (Click/Typer), StandaloneScript. Includes invocation context with command, arguments. Files: `Internal/Entrypoints/`. Tests: `Entrypoints/PythonEntrypointDiscoveryTests.cs`. |
|
||||
|
||||
| 9 | SCANNER-ANALYZERS-PYTHON-23-003 | DONE (2025-11-27) | SCANNER-ANALYZERS-PYTHON-23-002 | Static import graph builder using AST and bytecode fallback. Support `import`, `from ... import`, relative imports, `importlib.import_module`, `__import__` with literal args, `pkgutil.extend_path`. | Created `PythonSourceImportExtractor` with regex-based AST-like import extraction supporting: standard imports, from imports, star imports, relative imports (all levels), future imports, conditional imports (try/except), lazy imports (inside functions), TYPE_CHECKING imports, `importlib.import_module()`, `__import__()`, `pkgutil.extend_path()`. Created `PythonBytecodeImportExtractor` for .pyc fallback supporting Python 3.8-3.13 magic numbers. Created `PythonImportGraph` for dependency graph with: forward/reverse edges, cycle detection, topological ordering, relative import resolution. Created `PythonImportAnalysis` wrapper categorizing imports as stdlib/third-party/local with transitive dependency analysis. Files: `Internal/Imports/`. Tests: `Imports/PythonImportExtractorTests.cs`, `Imports/PythonImportGraphTests.cs`. |
|
||||
|
||||
|
||||
Reference in New Issue
Block a user