up
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
devportal-offline / build-offline (push) Has been cancelled
Mirror Thin Bundle Sign & Verify / mirror-sign (push) Has been cancelled
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
devportal-offline / build-offline (push) Has been cancelled
Mirror Thin Bundle Sign & Verify / mirror-sign (push) Has been cancelled
This commit is contained in:
@@ -0,0 +1,200 @@
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Capabilities;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a detected runtime capability in Python code.
|
||||
/// </summary>
|
||||
/// <param name="Kind">The type of capability.</param>
|
||||
/// <param name="SourceFile">The file where this capability was detected.</param>
|
||||
/// <param name="LineNumber">The line number (if available).</param>
|
||||
/// <param name="Evidence">The code pattern that indicated this capability.</param>
|
||||
/// <param name="Confidence">Confidence level for this detection.</param>
|
||||
/// <param name="Context">Additional context about the capability.</param>
|
||||
internal sealed record PythonCapability(
|
||||
PythonCapabilityKind Kind,
|
||||
string SourceFile,
|
||||
int? LineNumber,
|
||||
string Evidence,
|
||||
PythonCapabilityConfidence Confidence,
|
||||
ImmutableDictionary<string, string>? Context = null)
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets whether this is a security-sensitive capability.
|
||||
/// </summary>
|
||||
public bool IsSecuritySensitive => Kind is
|
||||
PythonCapabilityKind.ProcessExecution or
|
||||
PythonCapabilityKind.CodeExecution or
|
||||
PythonCapabilityKind.FileSystemAccess or
|
||||
PythonCapabilityKind.NetworkAccess or
|
||||
PythonCapabilityKind.NativeCodeExecution or
|
||||
PythonCapabilityKind.EnvironmentAccess or
|
||||
PythonCapabilityKind.Deserialization;
|
||||
|
||||
/// <summary>
|
||||
/// Generates metadata entries for this capability.
|
||||
/// </summary>
|
||||
public IEnumerable<KeyValuePair<string, string?>> ToMetadata(string prefix)
|
||||
{
|
||||
yield return new($"{prefix}.kind", Kind.ToString());
|
||||
yield return new($"{prefix}.file", SourceFile);
|
||||
|
||||
if (LineNumber.HasValue)
|
||||
{
|
||||
yield return new($"{prefix}.line", LineNumber.Value.ToString());
|
||||
}
|
||||
|
||||
yield return new($"{prefix}.evidence", Evidence);
|
||||
yield return new($"{prefix}.confidence", Confidence.ToString());
|
||||
yield return new($"{prefix}.securitySensitive", IsSecuritySensitive.ToString());
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Types of runtime capabilities detected in Python code.
|
||||
/// </summary>
|
||||
internal enum PythonCapabilityKind
|
||||
{
|
||||
/// <summary>
|
||||
/// Unknown capability.
|
||||
/// </summary>
|
||||
Unknown,
|
||||
|
||||
// Process and execution
|
||||
/// <summary>
|
||||
/// Process execution (subprocess, os.system, etc.).
|
||||
/// </summary>
|
||||
ProcessExecution,
|
||||
|
||||
/// <summary>
|
||||
/// Dynamic code execution (eval, exec, compile).
|
||||
/// </summary>
|
||||
CodeExecution,
|
||||
|
||||
/// <summary>
|
||||
/// Native code execution via ctypes/cffi.
|
||||
/// </summary>
|
||||
NativeCodeExecution,
|
||||
|
||||
// Concurrency
|
||||
/// <summary>
|
||||
/// Multiprocessing usage.
|
||||
/// </summary>
|
||||
Multiprocessing,
|
||||
|
||||
/// <summary>
|
||||
/// Threading usage.
|
||||
/// </summary>
|
||||
Threading,
|
||||
|
||||
/// <summary>
|
||||
/// Async/await usage.
|
||||
/// </summary>
|
||||
AsyncAwait,
|
||||
|
||||
// I/O and resources
|
||||
/// <summary>
|
||||
/// File system access.
|
||||
/// </summary>
|
||||
FileSystemAccess,
|
||||
|
||||
/// <summary>
|
||||
/// Network access (sockets, http).
|
||||
/// </summary>
|
||||
NetworkAccess,
|
||||
|
||||
/// <summary>
|
||||
/// Database access.
|
||||
/// </summary>
|
||||
DatabaseAccess,
|
||||
|
||||
// System interaction
|
||||
/// <summary>
|
||||
/// Environment variable access.
|
||||
/// </summary>
|
||||
EnvironmentAccess,
|
||||
|
||||
/// <summary>
|
||||
/// Signal handling.
|
||||
/// </summary>
|
||||
SignalHandling,
|
||||
|
||||
/// <summary>
|
||||
/// System information access.
|
||||
/// </summary>
|
||||
SystemInfo,
|
||||
|
||||
// Security sensitive
|
||||
/// <summary>
|
||||
/// Cryptographic operations.
|
||||
/// </summary>
|
||||
Cryptography,
|
||||
|
||||
/// <summary>
|
||||
/// Deserialization (pickle, marshal).
|
||||
/// </summary>
|
||||
Deserialization,
|
||||
|
||||
/// <summary>
|
||||
/// Import hook modification.
|
||||
/// </summary>
|
||||
ImportHook,
|
||||
|
||||
// FFI/Native
|
||||
/// <summary>
|
||||
/// ctypes usage.
|
||||
/// </summary>
|
||||
Ctypes,
|
||||
|
||||
/// <summary>
|
||||
/// CFFI usage.
|
||||
/// </summary>
|
||||
Cffi,
|
||||
|
||||
/// <summary>
|
||||
/// WebAssembly usage.
|
||||
/// </summary>
|
||||
Wasm,
|
||||
|
||||
// Frameworks
|
||||
/// <summary>
|
||||
/// Web framework (Django, Flask, FastAPI).
|
||||
/// </summary>
|
||||
WebFramework,
|
||||
|
||||
/// <summary>
|
||||
/// Task queue (Celery, RQ).
|
||||
/// </summary>
|
||||
TaskQueue,
|
||||
|
||||
/// <summary>
|
||||
/// Machine learning framework.
|
||||
/// </summary>
|
||||
MachineLearning
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Confidence level for capability detection.
|
||||
/// </summary>
|
||||
internal enum PythonCapabilityConfidence
|
||||
{
|
||||
/// <summary>
|
||||
/// Low confidence - heuristic match.
|
||||
/// </summary>
|
||||
Low = 0,
|
||||
|
||||
/// <summary>
|
||||
/// Medium confidence - likely usage.
|
||||
/// </summary>
|
||||
Medium = 1,
|
||||
|
||||
/// <summary>
|
||||
/// High confidence - clear usage pattern.
|
||||
/// </summary>
|
||||
High = 2,
|
||||
|
||||
/// <summary>
|
||||
/// Definitive - direct API call detected.
|
||||
/// </summary>
|
||||
Definitive = 3
|
||||
}
|
||||
@@ -0,0 +1,335 @@
|
||||
using System.Collections.Frozen;
|
||||
using System.Collections.Immutable;
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Capabilities;
|
||||
|
||||
/// <summary>
|
||||
/// Detects runtime capabilities from Python source code.
|
||||
/// </summary>
|
||||
internal sealed partial class PythonCapabilityDetector
|
||||
{
|
||||
private static readonly FrozenDictionary<string, (PythonCapabilityKind Kind, PythonCapabilityConfidence Confidence)> ImportCapabilities =
|
||||
new Dictionary<string, (PythonCapabilityKind, PythonCapabilityConfidence)>(StringComparer.Ordinal)
|
||||
{
|
||||
// Process execution
|
||||
["subprocess"] = (PythonCapabilityKind.ProcessExecution, PythonCapabilityConfidence.Definitive),
|
||||
["os.system"] = (PythonCapabilityKind.ProcessExecution, PythonCapabilityConfidence.High),
|
||||
["os.popen"] = (PythonCapabilityKind.ProcessExecution, PythonCapabilityConfidence.High),
|
||||
["os.spawn"] = (PythonCapabilityKind.ProcessExecution, PythonCapabilityConfidence.High),
|
||||
["os.exec"] = (PythonCapabilityKind.ProcessExecution, PythonCapabilityConfidence.High),
|
||||
["commands"] = (PythonCapabilityKind.ProcessExecution, PythonCapabilityConfidence.High),
|
||||
["pexpect"] = (PythonCapabilityKind.ProcessExecution, PythonCapabilityConfidence.Definitive),
|
||||
|
||||
// Multiprocessing/Threading
|
||||
["multiprocessing"] = (PythonCapabilityKind.Multiprocessing, PythonCapabilityConfidence.Definitive),
|
||||
["concurrent.futures"] = (PythonCapabilityKind.Multiprocessing, PythonCapabilityConfidence.High),
|
||||
["threading"] = (PythonCapabilityKind.Threading, PythonCapabilityConfidence.Definitive),
|
||||
["_thread"] = (PythonCapabilityKind.Threading, PythonCapabilityConfidence.Definitive),
|
||||
["asyncio"] = (PythonCapabilityKind.AsyncAwait, PythonCapabilityConfidence.Definitive),
|
||||
["trio"] = (PythonCapabilityKind.AsyncAwait, PythonCapabilityConfidence.Definitive),
|
||||
["anyio"] = (PythonCapabilityKind.AsyncAwait, PythonCapabilityConfidence.Definitive),
|
||||
|
||||
// FFI/Native
|
||||
["ctypes"] = (PythonCapabilityKind.Ctypes, PythonCapabilityConfidence.Definitive),
|
||||
["cffi"] = (PythonCapabilityKind.Cffi, PythonCapabilityConfidence.Definitive),
|
||||
["_ctypes"] = (PythonCapabilityKind.Ctypes, PythonCapabilityConfidence.Definitive),
|
||||
["wasmtime"] = (PythonCapabilityKind.Wasm, PythonCapabilityConfidence.Definitive),
|
||||
["wasmer"] = (PythonCapabilityKind.Wasm, PythonCapabilityConfidence.Definitive),
|
||||
|
||||
// Network
|
||||
["socket"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.Definitive),
|
||||
["http.client"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.High),
|
||||
["urllib"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.High),
|
||||
["urllib3"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.High),
|
||||
["requests"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.Definitive),
|
||||
["httpx"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.Definitive),
|
||||
["aiohttp"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.Definitive),
|
||||
["websocket"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.Definitive),
|
||||
["paramiko"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.Definitive),
|
||||
["ftplib"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.High),
|
||||
["smtplib"] = (PythonCapabilityKind.NetworkAccess, PythonCapabilityConfidence.High),
|
||||
|
||||
// File system
|
||||
["pathlib"] = (PythonCapabilityKind.FileSystemAccess, PythonCapabilityConfidence.Medium),
|
||||
["shutil"] = (PythonCapabilityKind.FileSystemAccess, PythonCapabilityConfidence.High),
|
||||
["glob"] = (PythonCapabilityKind.FileSystemAccess, PythonCapabilityConfidence.Medium),
|
||||
["tempfile"] = (PythonCapabilityKind.FileSystemAccess, PythonCapabilityConfidence.High),
|
||||
["watchdog"] = (PythonCapabilityKind.FileSystemAccess, PythonCapabilityConfidence.Definitive),
|
||||
|
||||
// Database
|
||||
["sqlite3"] = (PythonCapabilityKind.DatabaseAccess, PythonCapabilityConfidence.Definitive),
|
||||
["psycopg2"] = (PythonCapabilityKind.DatabaseAccess, PythonCapabilityConfidence.Definitive),
|
||||
["pymysql"] = (PythonCapabilityKind.DatabaseAccess, PythonCapabilityConfidence.Definitive),
|
||||
["pymongo"] = (PythonCapabilityKind.DatabaseAccess, PythonCapabilityConfidence.Definitive),
|
||||
["sqlalchemy"] = (PythonCapabilityKind.DatabaseAccess, PythonCapabilityConfidence.Definitive),
|
||||
["redis"] = (PythonCapabilityKind.DatabaseAccess, PythonCapabilityConfidence.Definitive),
|
||||
|
||||
// Security sensitive
|
||||
["pickle"] = (PythonCapabilityKind.Deserialization, PythonCapabilityConfidence.Definitive),
|
||||
["marshal"] = (PythonCapabilityKind.Deserialization, PythonCapabilityConfidence.High),
|
||||
["shelve"] = (PythonCapabilityKind.Deserialization, PythonCapabilityConfidence.High),
|
||||
["dill"] = (PythonCapabilityKind.Deserialization, PythonCapabilityConfidence.Definitive),
|
||||
["cloudpickle"] = (PythonCapabilityKind.Deserialization, PythonCapabilityConfidence.Definitive),
|
||||
["cryptography"] = (PythonCapabilityKind.Cryptography, PythonCapabilityConfidence.Definitive),
|
||||
["pycryptodome"] = (PythonCapabilityKind.Cryptography, PythonCapabilityConfidence.Definitive),
|
||||
["hashlib"] = (PythonCapabilityKind.Cryptography, PythonCapabilityConfidence.Medium),
|
||||
["ssl"] = (PythonCapabilityKind.Cryptography, PythonCapabilityConfidence.Medium),
|
||||
|
||||
// System
|
||||
["signal"] = (PythonCapabilityKind.SignalHandling, PythonCapabilityConfidence.Definitive),
|
||||
["platform"] = (PythonCapabilityKind.SystemInfo, PythonCapabilityConfidence.Low),
|
||||
["sys"] = (PythonCapabilityKind.SystemInfo, PythonCapabilityConfidence.Low),
|
||||
|
||||
// Web frameworks
|
||||
["django"] = (PythonCapabilityKind.WebFramework, PythonCapabilityConfidence.Definitive),
|
||||
["flask"] = (PythonCapabilityKind.WebFramework, PythonCapabilityConfidence.Definitive),
|
||||
["fastapi"] = (PythonCapabilityKind.WebFramework, PythonCapabilityConfidence.Definitive),
|
||||
["starlette"] = (PythonCapabilityKind.WebFramework, PythonCapabilityConfidence.Definitive),
|
||||
["tornado"] = (PythonCapabilityKind.WebFramework, PythonCapabilityConfidence.Definitive),
|
||||
["bottle"] = (PythonCapabilityKind.WebFramework, PythonCapabilityConfidence.Definitive),
|
||||
["pyramid"] = (PythonCapabilityKind.WebFramework, PythonCapabilityConfidence.Definitive),
|
||||
|
||||
// Task queues
|
||||
["celery"] = (PythonCapabilityKind.TaskQueue, PythonCapabilityConfidence.Definitive),
|
||||
["rq"] = (PythonCapabilityKind.TaskQueue, PythonCapabilityConfidence.Definitive),
|
||||
["huey"] = (PythonCapabilityKind.TaskQueue, PythonCapabilityConfidence.Definitive),
|
||||
["dramatiq"] = (PythonCapabilityKind.TaskQueue, PythonCapabilityConfidence.Definitive),
|
||||
|
||||
// ML
|
||||
["tensorflow"] = (PythonCapabilityKind.MachineLearning, PythonCapabilityConfidence.Definitive),
|
||||
["torch"] = (PythonCapabilityKind.MachineLearning, PythonCapabilityConfidence.Definitive),
|
||||
["pytorch"] = (PythonCapabilityKind.MachineLearning, PythonCapabilityConfidence.Definitive),
|
||||
["sklearn"] = (PythonCapabilityKind.MachineLearning, PythonCapabilityConfidence.Definitive),
|
||||
["scikit-learn"] = (PythonCapabilityKind.MachineLearning, PythonCapabilityConfidence.Definitive),
|
||||
["keras"] = (PythonCapabilityKind.MachineLearning, PythonCapabilityConfidence.Definitive),
|
||||
["transformers"] = (PythonCapabilityKind.MachineLearning, PythonCapabilityConfidence.Definitive),
|
||||
}.ToFrozenDictionary();
|
||||
|
||||
// Patterns for code execution detection
|
||||
[GeneratedRegex(@"\beval\s*\(", RegexOptions.Compiled)]
|
||||
private static partial Regex EvalPattern();
|
||||
|
||||
[GeneratedRegex(@"\bexec\s*\(", RegexOptions.Compiled)]
|
||||
private static partial Regex ExecPattern();
|
||||
|
||||
[GeneratedRegex(@"\bcompile\s*\(", RegexOptions.Compiled)]
|
||||
private static partial Regex CompilePattern();
|
||||
|
||||
[GeneratedRegex(@"__import__\s*\(", RegexOptions.Compiled)]
|
||||
private static partial Regex DynamicImportPattern();
|
||||
|
||||
// Pattern for environment access
|
||||
[GeneratedRegex(@"os\.environ|os\.getenv|environ\.get", RegexOptions.Compiled)]
|
||||
private static partial Regex EnvironmentPattern();
|
||||
|
||||
// Pattern for file operations
|
||||
[GeneratedRegex(@"\bopen\s*\([^)]*[""'][rwa]", RegexOptions.Compiled)]
|
||||
private static partial Regex FileOpenPattern();
|
||||
|
||||
/// <summary>
|
||||
/// Detects capabilities from Python source code.
|
||||
/// </summary>
|
||||
public async Task<ImmutableArray<PythonCapability>> DetectAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var capabilities = new List<PythonCapability>();
|
||||
|
||||
// Scan all Python files
|
||||
var pythonFiles = vfs.Files
|
||||
.Where(f => f.VirtualPath.EndsWith(".py", StringComparison.OrdinalIgnoreCase))
|
||||
.ToList();
|
||||
|
||||
foreach (var file in pythonFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var fileCapabilities = await DetectInFileAsync(vfs, file, cancellationToken).ConfigureAwait(false);
|
||||
capabilities.AddRange(fileCapabilities);
|
||||
}
|
||||
|
||||
// Deduplicate by kind and file
|
||||
return capabilities
|
||||
.GroupBy(c => (c.Kind, c.SourceFile))
|
||||
.Select(g => g.OrderByDescending(c => c.Confidence).First())
|
||||
.ToImmutableArray();
|
||||
}
|
||||
|
||||
private async Task<IEnumerable<PythonCapability>> DetectInFileAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
PythonVirtualFile file,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var capabilities = new List<PythonCapability>();
|
||||
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(file.VirtualPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return capabilities;
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream);
|
||||
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
var lines = content.Split('\n');
|
||||
|
||||
for (var lineNum = 0; lineNum < lines.Length; lineNum++)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var line = lines[lineNum];
|
||||
var trimmed = line.TrimStart();
|
||||
|
||||
// Skip comments
|
||||
if (trimmed.StartsWith('#'))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for imports
|
||||
if (trimmed.StartsWith("import ", StringComparison.Ordinal) ||
|
||||
trimmed.StartsWith("from ", StringComparison.Ordinal))
|
||||
{
|
||||
var importCapabilities = DetectImportCapabilities(trimmed, file.VirtualPath, lineNum + 1);
|
||||
capabilities.AddRange(importCapabilities);
|
||||
}
|
||||
|
||||
// Check for code execution
|
||||
if (EvalPattern().IsMatch(line))
|
||||
{
|
||||
capabilities.Add(new PythonCapability(
|
||||
Kind: PythonCapabilityKind.CodeExecution,
|
||||
SourceFile: file.VirtualPath,
|
||||
LineNumber: lineNum + 1,
|
||||
Evidence: "eval()",
|
||||
Confidence: PythonCapabilityConfidence.Definitive));
|
||||
}
|
||||
|
||||
if (ExecPattern().IsMatch(line))
|
||||
{
|
||||
capabilities.Add(new PythonCapability(
|
||||
Kind: PythonCapabilityKind.CodeExecution,
|
||||
SourceFile: file.VirtualPath,
|
||||
LineNumber: lineNum + 1,
|
||||
Evidence: "exec()",
|
||||
Confidence: PythonCapabilityConfidence.Definitive));
|
||||
}
|
||||
|
||||
if (CompilePattern().IsMatch(line))
|
||||
{
|
||||
capabilities.Add(new PythonCapability(
|
||||
Kind: PythonCapabilityKind.CodeExecution,
|
||||
SourceFile: file.VirtualPath,
|
||||
LineNumber: lineNum + 1,
|
||||
Evidence: "compile()",
|
||||
Confidence: PythonCapabilityConfidence.High));
|
||||
}
|
||||
|
||||
if (DynamicImportPattern().IsMatch(line))
|
||||
{
|
||||
capabilities.Add(new PythonCapability(
|
||||
Kind: PythonCapabilityKind.ImportHook,
|
||||
SourceFile: file.VirtualPath,
|
||||
LineNumber: lineNum + 1,
|
||||
Evidence: "__import__()",
|
||||
Confidence: PythonCapabilityConfidence.High));
|
||||
}
|
||||
|
||||
// Check for environment access
|
||||
if (EnvironmentPattern().IsMatch(line))
|
||||
{
|
||||
capabilities.Add(new PythonCapability(
|
||||
Kind: PythonCapabilityKind.EnvironmentAccess,
|
||||
SourceFile: file.VirtualPath,
|
||||
LineNumber: lineNum + 1,
|
||||
Evidence: "os.environ",
|
||||
Confidence: PythonCapabilityConfidence.High));
|
||||
}
|
||||
|
||||
// Check for file operations
|
||||
if (FileOpenPattern().IsMatch(line))
|
||||
{
|
||||
capabilities.Add(new PythonCapability(
|
||||
Kind: PythonCapabilityKind.FileSystemAccess,
|
||||
SourceFile: file.VirtualPath,
|
||||
LineNumber: lineNum + 1,
|
||||
Evidence: "open()",
|
||||
Confidence: PythonCapabilityConfidence.High));
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Skip unreadable files
|
||||
}
|
||||
|
||||
return capabilities;
|
||||
}
|
||||
|
||||
private static IEnumerable<PythonCapability> DetectImportCapabilities(string line, string sourceFile, int lineNumber)
|
||||
{
|
||||
// Parse import line to extract module names
|
||||
var modules = ParseImportLine(line);
|
||||
|
||||
foreach (var module in modules)
|
||||
{
|
||||
// Check exact match
|
||||
if (ImportCapabilities.TryGetValue(module, out var capability))
|
||||
{
|
||||
yield return new PythonCapability(
|
||||
Kind: capability.Kind,
|
||||
SourceFile: sourceFile,
|
||||
LineNumber: lineNumber,
|
||||
Evidence: $"import {module}",
|
||||
Confidence: capability.Confidence);
|
||||
}
|
||||
|
||||
// Check prefix match (e.g., "os.system" for "os")
|
||||
foreach (var (key, cap) in ImportCapabilities)
|
||||
{
|
||||
if (key.StartsWith(module + ".", StringComparison.Ordinal) ||
|
||||
module.StartsWith(key + ".", StringComparison.Ordinal))
|
||||
{
|
||||
yield return new PythonCapability(
|
||||
Kind: cap.Kind,
|
||||
SourceFile: sourceFile,
|
||||
LineNumber: lineNumber,
|
||||
Evidence: $"import {module}",
|
||||
Confidence: cap.Confidence);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static IEnumerable<string> ParseImportLine(string line)
|
||||
{
|
||||
var trimmed = line.Trim();
|
||||
|
||||
if (trimmed.StartsWith("import ", StringComparison.Ordinal))
|
||||
{
|
||||
// import a, b, c
|
||||
// import a as x
|
||||
var modules = trimmed[7..].Split(',');
|
||||
foreach (var m in modules)
|
||||
{
|
||||
var moduleName = m.Trim().Split(new[] { " as ", " " }, StringSplitOptions.RemoveEmptyEntries)[0];
|
||||
if (!string.IsNullOrEmpty(moduleName))
|
||||
{
|
||||
yield return moduleName;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (trimmed.StartsWith("from ", StringComparison.Ordinal))
|
||||
{
|
||||
// from a import b, c
|
||||
var parts = trimmed[5..].Split(new[] { " import " }, StringSplitOptions.RemoveEmptyEntries);
|
||||
if (parts.Length > 0)
|
||||
{
|
||||
yield return parts[0].Trim();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,130 @@
|
||||
using System.Collections.Immutable;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Capabilities;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a detected native extension in a Python project.
|
||||
/// </summary>
|
||||
/// <param name="ModuleName">The Python module name for this extension.</param>
|
||||
/// <param name="Path">The virtual path to the extension file.</param>
|
||||
/// <param name="Kind">The type of native extension.</param>
|
||||
/// <param name="Platform">The target platform (if detectable).</param>
|
||||
/// <param name="Architecture">The target architecture (if detectable).</param>
|
||||
/// <param name="Source">Where this extension came from.</param>
|
||||
/// <param name="PackageName">The package this extension belongs to (if known).</param>
|
||||
/// <param name="Dependencies">Native library dependencies (if detectable).</param>
|
||||
internal sealed record PythonNativeExtension(
|
||||
string ModuleName,
|
||||
string Path,
|
||||
PythonNativeExtensionKind Kind,
|
||||
string? Platform,
|
||||
string? Architecture,
|
||||
PythonFileSource Source,
|
||||
string? PackageName,
|
||||
ImmutableArray<string> Dependencies)
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the file extension.
|
||||
/// </summary>
|
||||
public string FileExtension => System.IO.Path.GetExtension(Path).ToLowerInvariant();
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether this is a Linux extension.
|
||||
/// </summary>
|
||||
public bool IsLinux => FileExtension == ".so" ||
|
||||
Platform?.Contains("linux", StringComparison.OrdinalIgnoreCase) == true;
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether this is a Windows extension.
|
||||
/// </summary>
|
||||
public bool IsWindows => FileExtension == ".pyd" ||
|
||||
Platform?.Contains("win", StringComparison.OrdinalIgnoreCase) == true;
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether this is a macOS extension.
|
||||
/// </summary>
|
||||
public bool IsMacOS => Platform?.Contains("darwin", StringComparison.OrdinalIgnoreCase) == true ||
|
||||
Platform?.Contains("macos", StringComparison.OrdinalIgnoreCase) == true;
|
||||
|
||||
/// <summary>
|
||||
/// Generates metadata entries for this extension.
|
||||
/// </summary>
|
||||
public IEnumerable<KeyValuePair<string, string?>> ToMetadata(string prefix)
|
||||
{
|
||||
yield return new($"{prefix}.module", ModuleName);
|
||||
yield return new($"{prefix}.path", Path);
|
||||
yield return new($"{prefix}.kind", Kind.ToString());
|
||||
|
||||
if (Platform is not null)
|
||||
{
|
||||
yield return new($"{prefix}.platform", Platform);
|
||||
}
|
||||
|
||||
if (Architecture is not null)
|
||||
{
|
||||
yield return new($"{prefix}.arch", Architecture);
|
||||
}
|
||||
|
||||
if (PackageName is not null)
|
||||
{
|
||||
yield return new($"{prefix}.package", PackageName);
|
||||
}
|
||||
|
||||
if (Dependencies.Length > 0)
|
||||
{
|
||||
yield return new($"{prefix}.dependencies", string.Join(",", Dependencies));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// The type of native extension.
|
||||
/// </summary>
|
||||
internal enum PythonNativeExtensionKind
|
||||
{
|
||||
/// <summary>
|
||||
/// Unknown extension type.
|
||||
/// </summary>
|
||||
Unknown,
|
||||
|
||||
/// <summary>
|
||||
/// Standard C extension module (.so, .pyd).
|
||||
/// </summary>
|
||||
CExtension,
|
||||
|
||||
/// <summary>
|
||||
/// Cython compiled module.
|
||||
/// </summary>
|
||||
Cython,
|
||||
|
||||
/// <summary>
|
||||
/// CFFI extension.
|
||||
/// </summary>
|
||||
Cffi,
|
||||
|
||||
/// <summary>
|
||||
/// pybind11 extension.
|
||||
/// </summary>
|
||||
Pybind11,
|
||||
|
||||
/// <summary>
|
||||
/// SWIG generated extension.
|
||||
/// </summary>
|
||||
Swig,
|
||||
|
||||
/// <summary>
|
||||
/// Rust extension (PyO3/maturin).
|
||||
/// </summary>
|
||||
Rust,
|
||||
|
||||
/// <summary>
|
||||
/// NumPy extension.
|
||||
/// </summary>
|
||||
Numpy,
|
||||
|
||||
/// <summary>
|
||||
/// WebAssembly module.
|
||||
/// </summary>
|
||||
Wasm
|
||||
}
|
||||
@@ -0,0 +1,269 @@
|
||||
using System.Collections.Immutable;
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Capabilities;
|
||||
|
||||
/// <summary>
|
||||
/// Scans for native extensions in a Python project.
|
||||
/// </summary>
|
||||
internal sealed partial class PythonNativeExtensionScanner
|
||||
{
|
||||
// Pattern to extract module name and platform info from extension filenames
|
||||
// Examples: numpy.core._multiarray_umath.cpython-311-x86_64-linux-gnu.so
|
||||
// _ssl.cpython-311-darwin.so
|
||||
// _ctypes.pyd
|
||||
[GeneratedRegex(
|
||||
@"^(?<module>[a-zA-Z_][a-zA-Z0-9_]*)(?:\.cpython-(?<pyver>\d+)(?:-(?<platform>[^.]+))?)?\.(?<ext>so|pyd)$",
|
||||
RegexOptions.Compiled | RegexOptions.IgnoreCase)]
|
||||
private static partial Regex ExtensionFilePattern();
|
||||
|
||||
// Pattern to detect Cython modules from source
|
||||
[GeneratedRegex(@"^\s*#\s*cython:", RegexOptions.Compiled | RegexOptions.Multiline)]
|
||||
private static partial Regex CythonMarkerPattern();
|
||||
|
||||
// Pattern for CFFI ffi.cdef
|
||||
[GeneratedRegex(@"ffi\s*=\s*FFI\(\)|ffi\.cdef\s*\(", RegexOptions.Compiled)]
|
||||
private static partial Regex CffiPattern();
|
||||
|
||||
// Pattern for pybind11
|
||||
[GeneratedRegex(@"PYBIND11_MODULE|pybind11::module", RegexOptions.Compiled)]
|
||||
private static partial Regex Pybind11Pattern();
|
||||
|
||||
// Pattern for SWIG
|
||||
[GeneratedRegex(@"%module\s+\w+|SWIG_init", RegexOptions.Compiled)]
|
||||
private static partial Regex SwigPattern();
|
||||
|
||||
// Pattern for PyO3/Rust
|
||||
[GeneratedRegex(@"#\[pymodule\]|#\[pyfunction\]|use pyo3::", RegexOptions.Compiled)]
|
||||
private static partial Regex PyO3Pattern();
|
||||
|
||||
/// <summary>
|
||||
/// Scans the VFS for native extensions.
|
||||
/// </summary>
|
||||
public IEnumerable<PythonNativeExtension> Scan(PythonVirtualFileSystem vfs)
|
||||
{
|
||||
// Find all .so and .pyd files
|
||||
var extensionFiles = vfs.Files
|
||||
.Where(f => f.VirtualPath.EndsWith(".so", StringComparison.OrdinalIgnoreCase) ||
|
||||
f.VirtualPath.EndsWith(".pyd", StringComparison.OrdinalIgnoreCase))
|
||||
.ToList();
|
||||
|
||||
foreach (var file in extensionFiles)
|
||||
{
|
||||
var extension = ParseExtensionFile(file);
|
||||
if (extension is not null)
|
||||
{
|
||||
yield return extension;
|
||||
}
|
||||
}
|
||||
|
||||
// Find WASM files
|
||||
var wasmFiles = vfs.Files
|
||||
.Where(f => f.VirtualPath.EndsWith(".wasm", StringComparison.OrdinalIgnoreCase))
|
||||
.ToList();
|
||||
|
||||
foreach (var file in wasmFiles)
|
||||
{
|
||||
yield return new PythonNativeExtension(
|
||||
ModuleName: Path.GetFileNameWithoutExtension(file.VirtualPath),
|
||||
Path: file.VirtualPath,
|
||||
Kind: PythonNativeExtensionKind.Wasm,
|
||||
Platform: null,
|
||||
Architecture: "wasm32",
|
||||
Source: file.Source,
|
||||
PackageName: ExtractPackageName(file.VirtualPath),
|
||||
Dependencies: ImmutableArray<string>.Empty);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Detects the kind of native extension from source files in the package.
|
||||
/// </summary>
|
||||
public async Task<PythonNativeExtensionKind> DetectExtensionKindAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string packagePath,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
// Check for Cython (.pyx files)
|
||||
var pyxFiles = vfs.EnumerateFiles(packagePath, "*.pyx").ToList();
|
||||
if (pyxFiles.Count > 0)
|
||||
{
|
||||
return PythonNativeExtensionKind.Cython;
|
||||
}
|
||||
|
||||
// Check setup.py or pyproject.toml for hints
|
||||
var setupPy = $"{packagePath}/setup.py";
|
||||
if (vfs.FileExists(setupPy))
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(setupPy, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is not null)
|
||||
{
|
||||
using var reader = new StreamReader(stream);
|
||||
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (CffiPattern().IsMatch(content))
|
||||
{
|
||||
return PythonNativeExtensionKind.Cffi;
|
||||
}
|
||||
|
||||
if (content.Contains("pybind11", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return PythonNativeExtensionKind.Pybind11;
|
||||
}
|
||||
|
||||
if (content.Contains("numpy.distutils", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return PythonNativeExtensionKind.Numpy;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for Rust/PyO3 (Cargo.toml with pyo3)
|
||||
var cargoToml = $"{packagePath}/Cargo.toml";
|
||||
if (vfs.FileExists(cargoToml))
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(cargoToml, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is not null)
|
||||
{
|
||||
using var reader = new StreamReader(stream);
|
||||
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (content.Contains("pyo3", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return PythonNativeExtensionKind.Rust;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return PythonNativeExtensionKind.CExtension;
|
||||
}
|
||||
|
||||
private static PythonNativeExtension? ParseExtensionFile(PythonVirtualFile file)
|
||||
{
|
||||
var fileName = Path.GetFileName(file.VirtualPath);
|
||||
var match = ExtensionFilePattern().Match(fileName);
|
||||
|
||||
string moduleName;
|
||||
string? platform = null;
|
||||
string? architecture = null;
|
||||
|
||||
if (match.Success)
|
||||
{
|
||||
moduleName = match.Groups["module"].Value;
|
||||
var platformGroup = match.Groups["platform"];
|
||||
if (platformGroup.Success)
|
||||
{
|
||||
var platformStr = platformGroup.Value;
|
||||
(platform, architecture) = ParsePlatformString(platformStr);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fall back to simple filename parsing
|
||||
moduleName = Path.GetFileNameWithoutExtension(fileName);
|
||||
if (moduleName.Contains('.'))
|
||||
{
|
||||
// Handle cases like module.cpython-311.so
|
||||
var parts = moduleName.Split('.');
|
||||
moduleName = parts[0];
|
||||
}
|
||||
}
|
||||
|
||||
// Infer platform from extension
|
||||
var ext = Path.GetExtension(fileName).ToLowerInvariant();
|
||||
if (platform is null)
|
||||
{
|
||||
platform = ext == ".pyd" ? "win32" : "posix";
|
||||
}
|
||||
|
||||
var kind = InferExtensionKind(file.VirtualPath, moduleName);
|
||||
|
||||
return new PythonNativeExtension(
|
||||
ModuleName: moduleName,
|
||||
Path: file.VirtualPath,
|
||||
Kind: kind,
|
||||
Platform: platform,
|
||||
Architecture: architecture,
|
||||
Source: file.Source,
|
||||
PackageName: ExtractPackageName(file.VirtualPath),
|
||||
Dependencies: ImmutableArray<string>.Empty);
|
||||
}
|
||||
|
||||
private static (string? Platform, string? Architecture) ParsePlatformString(string platformStr)
|
||||
{
|
||||
// Parse platform strings like:
|
||||
// x86_64-linux-gnu
|
||||
// darwin
|
||||
// win_amd64
|
||||
|
||||
string? platform = null;
|
||||
string? arch = null;
|
||||
|
||||
if (platformStr.Contains("linux", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
platform = "linux";
|
||||
}
|
||||
else if (platformStr.Contains("darwin", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
platform = "darwin";
|
||||
}
|
||||
else if (platformStr.Contains("win", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
platform = "win32";
|
||||
}
|
||||
|
||||
if (platformStr.Contains("x86_64", StringComparison.OrdinalIgnoreCase) ||
|
||||
platformStr.Contains("amd64", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
arch = "x86_64";
|
||||
}
|
||||
else if (platformStr.Contains("aarch64", StringComparison.OrdinalIgnoreCase) ||
|
||||
platformStr.Contains("arm64", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
arch = "aarch64";
|
||||
}
|
||||
else if (platformStr.Contains("i686", StringComparison.OrdinalIgnoreCase) ||
|
||||
platformStr.Contains("x86", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
arch = "x86";
|
||||
}
|
||||
|
||||
return (platform, arch);
|
||||
}
|
||||
|
||||
private static PythonNativeExtensionKind InferExtensionKind(string path, string moduleName)
|
||||
{
|
||||
// Infer from known package patterns
|
||||
var pathLower = path.ToLowerInvariant();
|
||||
|
||||
if (pathLower.Contains("numpy") || moduleName.StartsWith("_multiarray", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return PythonNativeExtensionKind.Numpy;
|
||||
}
|
||||
|
||||
if (pathLower.Contains("cffi") || moduleName.StartsWith("_cffi", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return PythonNativeExtensionKind.Cffi;
|
||||
}
|
||||
|
||||
if (moduleName.StartsWith("_cython", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return PythonNativeExtensionKind.Cython;
|
||||
}
|
||||
|
||||
return PythonNativeExtensionKind.CExtension;
|
||||
}
|
||||
|
||||
private static string? ExtractPackageName(string virtualPath)
|
||||
{
|
||||
// Extract package name from path like "numpy/core/_multiarray.so"
|
||||
var parts = virtualPath.Split('/');
|
||||
if (parts.Length > 1)
|
||||
{
|
||||
return parts[0];
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,182 @@
|
||||
using System.Collections.Immutable;
|
||||
using System.Text.Json;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging.Adapters;
|
||||
|
||||
/// <summary>
|
||||
/// Adapter for Conda package installations.
|
||||
/// </summary>
|
||||
internal sealed class CondaAdapter : IPythonPackagingAdapter
|
||||
{
|
||||
public string Name => "conda";
|
||||
public int Priority => 20; // Lower priority than pip/dist-info
|
||||
|
||||
public bool CanHandle(PythonVirtualFileSystem vfs, string path)
|
||||
{
|
||||
// Check for conda-meta directory
|
||||
var condaMetaPath = FindCondaMetaPath(vfs, path);
|
||||
return condaMetaPath is not null && vfs.EnumerateFiles(condaMetaPath, "*.json").Any();
|
||||
}
|
||||
|
||||
public async IAsyncEnumerable<PythonPackageInfo> DiscoverPackagesAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string path,
|
||||
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||
{
|
||||
var condaMetaPath = FindCondaMetaPath(vfs, path);
|
||||
if (condaMetaPath is null)
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
var jsonFiles = vfs.EnumerateFiles(condaMetaPath, "*.json").ToList();
|
||||
|
||||
foreach (var jsonFile in jsonFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
// Skip history file
|
||||
if (jsonFile.VirtualPath.EndsWith("history", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var packageInfo = await ParseCondaMetaJsonAsync(vfs, jsonFile, path, cancellationToken).ConfigureAwait(false);
|
||||
if (packageInfo is not null)
|
||||
{
|
||||
yield return packageInfo;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static string? FindCondaMetaPath(PythonVirtualFileSystem vfs, string path)
|
||||
{
|
||||
// Common conda-meta locations
|
||||
var candidates = new[]
|
||||
{
|
||||
$"{path}/conda-meta",
|
||||
$"{path}/../conda-meta",
|
||||
$"{path}/../../conda-meta"
|
||||
};
|
||||
|
||||
foreach (var candidate in candidates)
|
||||
{
|
||||
var normalizedPath = candidate.Replace("//", "/");
|
||||
if (vfs.EnumerateFiles(normalizedPath, "*.json").Any())
|
||||
{
|
||||
return normalizedPath;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static async Task<PythonPackageInfo?> ParseCondaMetaJsonAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
PythonVirtualFile file,
|
||||
string sitePackagesPath,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(file.VirtualPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
using var doc = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken).ConfigureAwait(false);
|
||||
var root = doc.RootElement;
|
||||
|
||||
if (!root.TryGetProperty("name", out var nameElement))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var name = nameElement.GetString();
|
||||
if (string.IsNullOrEmpty(name))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
string? version = null;
|
||||
if (root.TryGetProperty("version", out var versionElement))
|
||||
{
|
||||
version = versionElement.GetString();
|
||||
}
|
||||
|
||||
// Get dependencies
|
||||
var dependencies = ImmutableArray<string>.Empty;
|
||||
if (root.TryGetProperty("depends", out var dependsElement) && dependsElement.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
dependencies = dependsElement.EnumerateArray()
|
||||
.Select(e => e.GetString())
|
||||
.Where(s => !string.IsNullOrEmpty(s))
|
||||
.Cast<string>()
|
||||
.ToImmutableArray();
|
||||
}
|
||||
|
||||
// Get files to extract top-level modules
|
||||
var topLevelModules = ImmutableArray<string>.Empty;
|
||||
if (root.TryGetProperty("files", out var filesElement) && filesElement.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
var pythonFiles = filesElement.EnumerateArray()
|
||||
.Select(e => e.GetString())
|
||||
.Where(s => !string.IsNullOrEmpty(s) && s!.EndsWith(".py", StringComparison.OrdinalIgnoreCase))
|
||||
.Cast<string>()
|
||||
.ToList();
|
||||
|
||||
// Extract top-level module names from file paths
|
||||
var modules = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var f in pythonFiles)
|
||||
{
|
||||
var parts = f.Split('/');
|
||||
if (parts.Length > 0)
|
||||
{
|
||||
var topLevel = parts[0];
|
||||
// Skip standard library paths
|
||||
if (!topLevel.Equals("lib", StringComparison.OrdinalIgnoreCase) &&
|
||||
!topLevel.Equals("bin", StringComparison.OrdinalIgnoreCase) &&
|
||||
!topLevel.StartsWith("python", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
modules.Add(topLevel.Replace(".py", string.Empty));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
topLevelModules = modules.ToImmutableArray();
|
||||
}
|
||||
|
||||
// Check if explicitly requested (direct dependency marker)
|
||||
var isDirect = false;
|
||||
if (root.TryGetProperty("requested", out var requestedElement))
|
||||
{
|
||||
isDirect = requestedElement.ValueKind == JsonValueKind.True;
|
||||
}
|
||||
|
||||
return new PythonPackageInfo(
|
||||
Name: name,
|
||||
Version: version,
|
||||
Kind: PythonPackageKind.Conda,
|
||||
Location: sitePackagesPath,
|
||||
MetadataPath: file.VirtualPath,
|
||||
TopLevelModules: topLevelModules,
|
||||
Dependencies: dependencies,
|
||||
Extras: ImmutableArray<string>.Empty,
|
||||
RecordFiles: ImmutableArray<PythonRecordEntry>.Empty,
|
||||
InstallerTool: "conda",
|
||||
EditableTarget: null,
|
||||
IsDirectDependency: isDirect,
|
||||
Confidence: PythonPackageConfidence.High);
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,196 @@
|
||||
using System.Collections.Immutable;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging.Adapters;
|
||||
|
||||
/// <summary>
|
||||
/// Adapter for container layer overlays that may contain Python packages.
|
||||
/// Handles whiteout files and layer ordering.
|
||||
/// </summary>
|
||||
internal sealed class ContainerLayerAdapter : IPythonPackagingAdapter
|
||||
{
|
||||
public string Name => "container-layer";
|
||||
public int Priority => 100; // Lowest priority - use other adapters first
|
||||
|
||||
public bool CanHandle(PythonVirtualFileSystem vfs, string path)
|
||||
{
|
||||
// Container layers typically have specific patterns
|
||||
// Check for layer root markers or whiteout files
|
||||
return vfs.EnumerateFiles(path, ".wh.*").Any() ||
|
||||
HasContainerLayoutMarkers(vfs, path);
|
||||
}
|
||||
|
||||
public async IAsyncEnumerable<PythonPackageInfo> DiscoverPackagesAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string path,
|
||||
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||
{
|
||||
// Discover packages from common Python installation paths in containers
|
||||
var pythonPaths = FindPythonPathsInContainer(vfs, path);
|
||||
|
||||
// Use DistInfoAdapter for each discovered path
|
||||
var distInfoAdapter = new DistInfoAdapter();
|
||||
|
||||
foreach (var pythonPath in pythonPaths)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
if (!distInfoAdapter.CanHandle(vfs, pythonPath))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
await foreach (var pkg in distInfoAdapter.DiscoverPackagesAsync(vfs, pythonPath, cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
// Mark as coming from container layer
|
||||
yield return pkg with
|
||||
{
|
||||
Location = pythonPath,
|
||||
Confidence = AdjustConfidenceForContainer(pkg.Confidence)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Also check for vendored packages in /app, /opt, etc.
|
||||
var vendoredPaths = FindVendoredPathsInContainer(vfs, path);
|
||||
foreach (var vendoredPath in vendoredPaths)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
await foreach (var pkg in DiscoverVendoredPackagesAsync(vfs, vendoredPath, cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
yield return pkg;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static bool HasContainerLayoutMarkers(PythonVirtualFileSystem vfs, string path)
|
||||
{
|
||||
// Check for typical container root structure
|
||||
var markers = new[]
|
||||
{
|
||||
$"{path}/etc/os-release",
|
||||
$"{path}/usr/lib",
|
||||
$"{path}/usr/local/lib",
|
||||
$"{path}/app",
|
||||
$"{path}/opt"
|
||||
};
|
||||
|
||||
return markers.Any(m => vfs.EnumerateFiles(m, "*").Any());
|
||||
}
|
||||
|
||||
private static IEnumerable<string> FindPythonPathsInContainer(PythonVirtualFileSystem vfs, string path)
|
||||
{
|
||||
var foundPaths = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
// Common Python installation paths in containers
|
||||
var pythonPathPatterns = new[]
|
||||
{
|
||||
$"{path}/usr/lib/python*/site-packages",
|
||||
$"{path}/usr/local/lib/python*/site-packages",
|
||||
$"{path}/opt/*/lib/python*/site-packages",
|
||||
$"{path}/home/*/.local/lib/python*/site-packages",
|
||||
$"{path}/.venv/lib/python*/site-packages",
|
||||
$"{path}/venv/lib/python*/site-packages"
|
||||
};
|
||||
|
||||
// Search for site-packages directories
|
||||
var sitePackagesDirs = vfs.EnumerateFiles(path, "site-packages/*")
|
||||
.Select(f => GetParentDirectory(f.VirtualPath))
|
||||
.Where(p => p is not null && p.EndsWith("site-packages", StringComparison.OrdinalIgnoreCase))
|
||||
.Distinct()
|
||||
.Cast<string>();
|
||||
|
||||
foreach (var dir in sitePackagesDirs)
|
||||
{
|
||||
foundPaths.Add(dir);
|
||||
}
|
||||
|
||||
return foundPaths;
|
||||
}
|
||||
|
||||
private static IEnumerable<string> FindVendoredPathsInContainer(PythonVirtualFileSystem vfs, string path)
|
||||
{
|
||||
var vendoredPaths = new List<string>();
|
||||
|
||||
// Common vendored package locations
|
||||
var vendorPatterns = new[]
|
||||
{
|
||||
$"{path}/app/vendor",
|
||||
$"{path}/app/lib",
|
||||
$"{path}/app/third_party",
|
||||
$"{path}/opt/app/vendor"
|
||||
};
|
||||
|
||||
foreach (var pattern in vendorPatterns)
|
||||
{
|
||||
if (vfs.EnumerateFiles(pattern, "*.py").Any())
|
||||
{
|
||||
vendoredPaths.Add(pattern);
|
||||
}
|
||||
}
|
||||
|
||||
return vendoredPaths;
|
||||
}
|
||||
|
||||
private static async IAsyncEnumerable<PythonPackageInfo> DiscoverVendoredPackagesAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string path,
|
||||
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken)
|
||||
{
|
||||
// Find packages by looking for __init__.py or standalone .py files
|
||||
var initFiles = vfs.EnumerateFiles(path, "__init__.py").ToList();
|
||||
|
||||
var discoveredPackages = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var initFile in initFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var packageDir = GetParentDirectory(initFile.VirtualPath);
|
||||
if (packageDir is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var packageName = Path.GetFileName(packageDir);
|
||||
if (string.IsNullOrEmpty(packageName) || discoveredPackages.Contains(packageName))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
discoveredPackages.Add(packageName);
|
||||
|
||||
yield return new PythonPackageInfo(
|
||||
Name: packageName,
|
||||
Version: null,
|
||||
Kind: PythonPackageKind.Vendored,
|
||||
Location: path,
|
||||
MetadataPath: null,
|
||||
TopLevelModules: ImmutableArray.Create(packageName),
|
||||
Dependencies: ImmutableArray<string>.Empty,
|
||||
Extras: ImmutableArray<string>.Empty,
|
||||
RecordFiles: ImmutableArray<PythonRecordEntry>.Empty,
|
||||
InstallerTool: null,
|
||||
EditableTarget: null,
|
||||
IsDirectDependency: true,
|
||||
Confidence: PythonPackageConfidence.Low);
|
||||
}
|
||||
}
|
||||
|
||||
private static PythonPackageConfidence AdjustConfidenceForContainer(PythonPackageConfidence confidence)
|
||||
{
|
||||
// Container layers may have incomplete or overlaid files
|
||||
return confidence switch
|
||||
{
|
||||
PythonPackageConfidence.Definitive => PythonPackageConfidence.High,
|
||||
_ => confidence
|
||||
};
|
||||
}
|
||||
|
||||
private static string? GetParentDirectory(string path)
|
||||
{
|
||||
var lastSep = path.LastIndexOf('/');
|
||||
return lastSep > 0 ? path[..lastSep] : null;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,316 @@
|
||||
using System.Collections.Immutable;
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging.Adapters;
|
||||
|
||||
/// <summary>
|
||||
/// Adapter for standard .dist-info metadata (wheel installations).
|
||||
/// </summary>
|
||||
internal sealed partial class DistInfoAdapter : IPythonPackagingAdapter
|
||||
{
|
||||
public string Name => "dist-info";
|
||||
public int Priority => 10;
|
||||
|
||||
[GeneratedRegex(@"^(?<name>[A-Za-z0-9][\w.-]*)-(?<version>[\d.]+(?:\.dev\d*|a\d*|b\d*|rc\d*|post\d*)?)\.dist-info$",
|
||||
RegexOptions.Compiled | RegexOptions.IgnoreCase)]
|
||||
private static partial Regex DistInfoDirPattern();
|
||||
|
||||
public bool CanHandle(PythonVirtualFileSystem vfs, string path)
|
||||
{
|
||||
// Look for any .dist-info directories
|
||||
return vfs.EnumerateFiles(path, "*.dist-info/METADATA").Any();
|
||||
}
|
||||
|
||||
public async IAsyncEnumerable<PythonPackageInfo> DiscoverPackagesAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string path,
|
||||
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||
{
|
||||
// Find all .dist-info directories
|
||||
var metadataFiles = vfs.EnumerateFiles(path, "*.dist-info/METADATA").ToList();
|
||||
|
||||
foreach (var metadataFile in metadataFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var distInfoPath = Path.GetDirectoryName(metadataFile.VirtualPath) ?? string.Empty;
|
||||
var distInfoName = Path.GetFileName(distInfoPath);
|
||||
|
||||
var match = DistInfoDirPattern().Match(distInfoName);
|
||||
if (!match.Success)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var packageName = match.Groups["name"].Value;
|
||||
var version = match.Groups["version"].Value;
|
||||
|
||||
// Read METADATA file
|
||||
var metadata = await ReadMetadataAsync(vfs, metadataFile, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Use metadata name if available (more accurate)
|
||||
if (metadata.TryGetValue("Name", out var metadataName) && !string.IsNullOrEmpty(metadataName))
|
||||
{
|
||||
packageName = metadataName;
|
||||
}
|
||||
|
||||
if (metadata.TryGetValue("Version", out var metadataVersion) && !string.IsNullOrEmpty(metadataVersion))
|
||||
{
|
||||
version = metadataVersion;
|
||||
}
|
||||
|
||||
// Read top_level.txt
|
||||
var topLevelModules = await ReadTopLevelAsync(vfs, distInfoPath, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Read RECORD
|
||||
var recordEntries = await ReadRecordAsync(vfs, distInfoPath, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Read INSTALLER
|
||||
var installer = await ReadInstallerAsync(vfs, distInfoPath, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Extract dependencies from metadata
|
||||
var dependencies = ExtractDependencies(metadata);
|
||||
|
||||
// Extract extras
|
||||
var extras = ExtractExtras(metadata);
|
||||
|
||||
// Determine if it's a direct dependency (from direct_url.json or REQUESTED)
|
||||
var isDirect = await IsDependencyDirectAsync(vfs, distInfoPath, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Validate RECORD if available
|
||||
var confidence = recordEntries.Length > 0
|
||||
? PythonPackageConfidence.Definitive
|
||||
: PythonPackageConfidence.High;
|
||||
|
||||
yield return new PythonPackageInfo(
|
||||
Name: packageName,
|
||||
Version: version,
|
||||
Kind: PythonPackageKind.Wheel,
|
||||
Location: path,
|
||||
MetadataPath: distInfoPath,
|
||||
TopLevelModules: topLevelModules,
|
||||
Dependencies: dependencies,
|
||||
Extras: extras,
|
||||
RecordFiles: recordEntries,
|
||||
InstallerTool: installer,
|
||||
EditableTarget: null,
|
||||
IsDirectDependency: isDirect,
|
||||
Confidence: confidence);
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<Dictionary<string, string>> ReadMetadataAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
PythonVirtualFile file,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var result = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(file.VirtualPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream);
|
||||
string? currentKey = null;
|
||||
var currentValue = new System.Text.StringBuilder();
|
||||
string? line;
|
||||
|
||||
while ((line = await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false)) is not null)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
// Empty line marks end of headers
|
||||
if (string.IsNullOrEmpty(line))
|
||||
{
|
||||
if (currentKey is not null)
|
||||
{
|
||||
result[currentKey] = currentValue.ToString().Trim();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Continuation line
|
||||
if (line.StartsWith(' ') || line.StartsWith('\t'))
|
||||
{
|
||||
currentValue.AppendLine(line.Trim());
|
||||
continue;
|
||||
}
|
||||
|
||||
// New header
|
||||
var colonIdx = line.IndexOf(':');
|
||||
if (colonIdx > 0)
|
||||
{
|
||||
if (currentKey is not null)
|
||||
{
|
||||
if (!result.ContainsKey(currentKey))
|
||||
{
|
||||
result[currentKey] = currentValue.ToString().Trim();
|
||||
}
|
||||
else
|
||||
{
|
||||
// Multi-value header (like Requires-Dist)
|
||||
result[currentKey] = result[currentKey] + "\n" + currentValue.ToString().Trim();
|
||||
}
|
||||
}
|
||||
|
||||
currentKey = line[..colonIdx].Trim();
|
||||
currentValue.Clear();
|
||||
currentValue.Append(line[(colonIdx + 1)..].Trim());
|
||||
}
|
||||
}
|
||||
|
||||
if (currentKey is not null && !result.ContainsKey(currentKey))
|
||||
{
|
||||
result[currentKey] = currentValue.ToString().Trim();
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore read errors
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static async Task<ImmutableArray<string>> ReadTopLevelAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string distInfoPath,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var topLevelPath = $"{distInfoPath}/top_level.txt";
|
||||
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(topLevelPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return ImmutableArray<string>.Empty;
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream);
|
||||
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return content.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
|
||||
.ToImmutableArray();
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return ImmutableArray<string>.Empty;
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<ImmutableArray<PythonRecordEntry>> ReadRecordAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string distInfoPath,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var recordPath = $"{distInfoPath}/RECORD";
|
||||
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(recordPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return ImmutableArray<PythonRecordEntry>.Empty;
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream);
|
||||
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return content.Split('\n', StringSplitOptions.RemoveEmptyEntries)
|
||||
.Select(PythonRecordEntry.Parse)
|
||||
.Where(e => e is not null)
|
||||
.Cast<PythonRecordEntry>()
|
||||
.ToImmutableArray();
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return ImmutableArray<PythonRecordEntry>.Empty;
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<string?> ReadInstallerAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string distInfoPath,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var installerPath = $"{distInfoPath}/INSTALLER";
|
||||
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(installerPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream);
|
||||
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
return content.Trim();
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<bool> IsDependencyDirectAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string distInfoPath,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
// Check for REQUESTED file (pip marker for direct dependencies)
|
||||
var requestedPath = $"{distInfoPath}/REQUESTED";
|
||||
if (vfs.FileExists(requestedPath))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for direct_url.json (PEP 610)
|
||||
var directUrlPath = $"{distInfoPath}/direct_url.json";
|
||||
if (vfs.FileExists(directUrlPath))
|
||||
{
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(directUrlPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is not null)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static ImmutableArray<string> ExtractDependencies(Dictionary<string, string> metadata)
|
||||
{
|
||||
if (!metadata.TryGetValue("Requires-Dist", out var requires))
|
||||
{
|
||||
return ImmutableArray<string>.Empty;
|
||||
}
|
||||
|
||||
return requires.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
|
||||
.ToImmutableArray();
|
||||
}
|
||||
|
||||
private static ImmutableArray<string> ExtractExtras(Dictionary<string, string> metadata)
|
||||
{
|
||||
if (!metadata.TryGetValue("Provides-Extra", out var extras))
|
||||
{
|
||||
return ImmutableArray<string>.Empty;
|
||||
}
|
||||
|
||||
return extras.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
|
||||
.ToImmutableArray();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,276 @@
|
||||
using System.Collections.Immutable;
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging.Adapters;
|
||||
|
||||
/// <summary>
|
||||
/// Adapter for pip editable installations (.egg-link files).
|
||||
/// </summary>
|
||||
internal sealed partial class PipEditableAdapter : IPythonPackagingAdapter
|
||||
{
|
||||
public string Name => "pip-editable";
|
||||
public int Priority => 5; // Higher priority than dist-info
|
||||
|
||||
[GeneratedRegex(@"^(?<name>[A-Za-z0-9][\w.-]*)\.egg-link$",
|
||||
RegexOptions.Compiled | RegexOptions.IgnoreCase)]
|
||||
private static partial Regex EggLinkPattern();
|
||||
|
||||
public bool CanHandle(PythonVirtualFileSystem vfs, string path)
|
||||
{
|
||||
return vfs.EnumerateFiles(path, "*.egg-link").Any();
|
||||
}
|
||||
|
||||
public async IAsyncEnumerable<PythonPackageInfo> DiscoverPackagesAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string path,
|
||||
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||
{
|
||||
var eggLinkFiles = vfs.EnumerateFiles(path, "*.egg-link").ToList();
|
||||
|
||||
foreach (var eggLinkFile in eggLinkFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var fileName = Path.GetFileName(eggLinkFile.VirtualPath);
|
||||
var match = EggLinkPattern().Match(fileName);
|
||||
if (!match.Success)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var packageName = match.Groups["name"].Value;
|
||||
|
||||
// Read .egg-link to get the target path
|
||||
var targetPath = await ReadEggLinkAsync(vfs, eggLinkFile, cancellationToken).ConfigureAwait(false);
|
||||
if (string.IsNullOrEmpty(targetPath))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Look for .egg-info in the target
|
||||
var (version, metadata, topLevel) = await ReadEggInfoAsync(vfs, targetPath, packageName, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Also look for pyproject.toml for additional metadata
|
||||
var pyprojectInfo = await ReadPyprojectAsync(vfs, targetPath, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (pyprojectInfo.Name is not null)
|
||||
{
|
||||
packageName = pyprojectInfo.Name;
|
||||
}
|
||||
|
||||
if (pyprojectInfo.Version is not null && string.IsNullOrEmpty(version))
|
||||
{
|
||||
version = pyprojectInfo.Version;
|
||||
}
|
||||
|
||||
var dependencies = metadata.TryGetValue("Requires-Dist", out var requires)
|
||||
? requires.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToImmutableArray()
|
||||
: ImmutableArray<string>.Empty;
|
||||
|
||||
yield return new PythonPackageInfo(
|
||||
Name: packageName,
|
||||
Version: version,
|
||||
Kind: PythonPackageKind.PipEditable,
|
||||
Location: path,
|
||||
MetadataPath: null,
|
||||
TopLevelModules: topLevel,
|
||||
Dependencies: dependencies,
|
||||
Extras: ImmutableArray<string>.Empty,
|
||||
RecordFiles: ImmutableArray<PythonRecordEntry>.Empty,
|
||||
InstallerTool: "pip",
|
||||
EditableTarget: targetPath,
|
||||
IsDirectDependency: true, // Editable installs are always direct
|
||||
Confidence: PythonPackageConfidence.High);
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<string?> ReadEggLinkAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
PythonVirtualFile file,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(file.VirtualPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream);
|
||||
var firstLine = await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false);
|
||||
return firstLine?.Trim();
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<(string? Version, Dictionary<string, string> Metadata, ImmutableArray<string> TopLevel)> ReadEggInfoAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string targetPath,
|
||||
string packageName,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var metadata = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
|
||||
string? version = null;
|
||||
var topLevel = ImmutableArray<string>.Empty;
|
||||
|
||||
// Look for .egg-info directory
|
||||
var eggInfoPattern = $"{packageName}.egg-info";
|
||||
var eggInfoFiles = vfs.EnumerateFiles(targetPath, "*.egg-info/PKG-INFO").ToList();
|
||||
|
||||
PythonVirtualFile? pkgInfoFile = null;
|
||||
foreach (var file in eggInfoFiles)
|
||||
{
|
||||
var dirName = Path.GetFileName(Path.GetDirectoryName(file.VirtualPath) ?? string.Empty);
|
||||
if (dirName.StartsWith(packageName, StringComparison.OrdinalIgnoreCase) ||
|
||||
dirName.StartsWith(PythonPackageInfo.NormalizeName(packageName), StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
pkgInfoFile = file;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (pkgInfoFile is null && eggInfoFiles.Count > 0)
|
||||
{
|
||||
pkgInfoFile = eggInfoFiles[0];
|
||||
}
|
||||
|
||||
if (pkgInfoFile is not null)
|
||||
{
|
||||
var eggInfoPath = Path.GetDirectoryName(pkgInfoFile.VirtualPath);
|
||||
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(pkgInfoFile.VirtualPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is not null)
|
||||
{
|
||||
using var reader = new StreamReader(stream);
|
||||
string? metadataLine;
|
||||
while ((metadataLine = await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false)) is not null)
|
||||
{
|
||||
if (string.IsNullOrEmpty(metadataLine))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
var colonIdx = metadataLine.IndexOf(':');
|
||||
if (colonIdx > 0)
|
||||
{
|
||||
var key = metadataLine[..colonIdx].Trim();
|
||||
var value = metadataLine[(colonIdx + 1)..].Trim();
|
||||
metadata[key] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore
|
||||
}
|
||||
|
||||
if (metadata.TryGetValue("Version", out var v))
|
||||
{
|
||||
version = v;
|
||||
}
|
||||
|
||||
// Read top_level.txt
|
||||
if (eggInfoPath is not null)
|
||||
{
|
||||
var topLevelPath = $"{eggInfoPath}/top_level.txt";
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(topLevelPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is not null)
|
||||
{
|
||||
using var reader = new StreamReader(stream);
|
||||
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
topLevel = content.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
|
||||
.ToImmutableArray();
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (version, metadata, topLevel);
|
||||
}
|
||||
|
||||
private static async Task<(string? Name, string? Version)> ReadPyprojectAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string targetPath,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var pyprojectPath = $"{targetPath}/pyproject.toml";
|
||||
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(pyprojectPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return (null, null);
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream);
|
||||
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Simple TOML parsing for name and version
|
||||
string? name = null;
|
||||
string? version = null;
|
||||
|
||||
foreach (var line in content.Split('\n'))
|
||||
{
|
||||
var trimmed = line.Trim();
|
||||
|
||||
if (trimmed.StartsWith("name", StringComparison.Ordinal) && trimmed.Contains('='))
|
||||
{
|
||||
var value = ExtractTomlValue(trimmed);
|
||||
if (value is not null)
|
||||
{
|
||||
name = value;
|
||||
}
|
||||
}
|
||||
else if (trimmed.StartsWith("version", StringComparison.Ordinal) && trimmed.Contains('='))
|
||||
{
|
||||
var value = ExtractTomlValue(trimmed);
|
||||
if (value is not null)
|
||||
{
|
||||
version = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (name, version);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return (null, null);
|
||||
}
|
||||
}
|
||||
|
||||
private static string? ExtractTomlValue(string line)
|
||||
{
|
||||
var eqIdx = line.IndexOf('=');
|
||||
if (eqIdx < 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var value = line[(eqIdx + 1)..].Trim();
|
||||
|
||||
// Remove quotes
|
||||
if ((value.StartsWith('"') && value.EndsWith('"')) ||
|
||||
(value.StartsWith('\'') && value.EndsWith('\'')))
|
||||
{
|
||||
return value[1..^1];
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,305 @@
|
||||
using System.Collections.Immutable;
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging.Adapters;
|
||||
|
||||
/// <summary>
|
||||
/// Adapter for Poetry project layouts and editable installs.
|
||||
/// </summary>
|
||||
internal sealed partial class PoetryAdapter : IPythonPackagingAdapter
|
||||
{
|
||||
public string Name => "poetry";
|
||||
public int Priority => 3; // High priority for local projects
|
||||
|
||||
[GeneratedRegex(@"^\[tool\.poetry\]", RegexOptions.Multiline)]
|
||||
private static partial Regex PoetryToolSectionPattern();
|
||||
|
||||
[GeneratedRegex(@"^name\s*=\s*""([^""]+)""", RegexOptions.Multiline)]
|
||||
private static partial Regex NamePattern();
|
||||
|
||||
[GeneratedRegex(@"^version\s*=\s*""([^""]+)""", RegexOptions.Multiline)]
|
||||
private static partial Regex VersionPattern();
|
||||
|
||||
public bool CanHandle(PythonVirtualFileSystem vfs, string path)
|
||||
{
|
||||
// Check for pyproject.toml with [tool.poetry] section
|
||||
var pyprojectPath = $"{path}/pyproject.toml";
|
||||
if (!vfs.FileExists(pyprojectPath))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// We can't easily check content without async, so check for poetry.lock instead
|
||||
return vfs.FileExists($"{path}/poetry.lock");
|
||||
}
|
||||
|
||||
public async IAsyncEnumerable<PythonPackageInfo> DiscoverPackagesAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string path,
|
||||
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||
{
|
||||
// Parse pyproject.toml for the main package
|
||||
var pyprojectPath = $"{path}/pyproject.toml";
|
||||
if (!vfs.FileExists(pyprojectPath))
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
string? content;
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(pyprojectPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream);
|
||||
content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
// Check for [tool.poetry] section
|
||||
if (!PoetryToolSectionPattern().IsMatch(content))
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
var name = ExtractValue(content, NamePattern());
|
||||
var version = ExtractValue(content, VersionPattern());
|
||||
|
||||
if (string.IsNullOrEmpty(name))
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
// Extract dependencies from pyproject.toml
|
||||
var dependencies = ExtractDependencies(content);
|
||||
|
||||
// Find top-level modules (packages or src layout)
|
||||
var topLevelModules = await FindTopLevelModulesAsync(vfs, path, name, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
yield return new PythonPackageInfo(
|
||||
Name: name,
|
||||
Version: version,
|
||||
Kind: PythonPackageKind.PoetryEditable,
|
||||
Location: path,
|
||||
MetadataPath: pyprojectPath,
|
||||
TopLevelModules: topLevelModules,
|
||||
Dependencies: dependencies,
|
||||
Extras: ImmutableArray<string>.Empty,
|
||||
RecordFiles: ImmutableArray<PythonRecordEntry>.Empty,
|
||||
InstallerTool: "poetry",
|
||||
EditableTarget: path,
|
||||
IsDirectDependency: true,
|
||||
Confidence: PythonPackageConfidence.High);
|
||||
|
||||
// Also parse poetry.lock for installed dependencies
|
||||
await foreach (var pkg in ParsePoetryLockAsync(vfs, path, cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
yield return pkg;
|
||||
}
|
||||
}
|
||||
|
||||
private static string? ExtractValue(string content, Regex pattern)
|
||||
{
|
||||
var match = pattern.Match(content);
|
||||
return match.Success ? match.Groups[1].Value : null;
|
||||
}
|
||||
|
||||
private static ImmutableArray<string> ExtractDependencies(string content)
|
||||
{
|
||||
var dependencies = new List<string>();
|
||||
|
||||
// Find [tool.poetry.dependencies] section
|
||||
var depsStart = content.IndexOf("[tool.poetry.dependencies]", StringComparison.OrdinalIgnoreCase);
|
||||
if (depsStart < 0)
|
||||
{
|
||||
return ImmutableArray<string>.Empty;
|
||||
}
|
||||
|
||||
var depsEnd = content.IndexOf('[', depsStart + 1);
|
||||
var depsSection = depsEnd > depsStart
|
||||
? content[depsStart..depsEnd]
|
||||
: content[depsStart..];
|
||||
|
||||
foreach (var line in depsSection.Split('\n'))
|
||||
{
|
||||
var trimmed = line.Trim();
|
||||
if (string.IsNullOrEmpty(trimmed) || trimmed.StartsWith('[') || trimmed.StartsWith('#'))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var eqIdx = trimmed.IndexOf('=');
|
||||
if (eqIdx > 0)
|
||||
{
|
||||
var depName = trimmed[..eqIdx].Trim();
|
||||
if (!depName.Equals("python", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
dependencies.Add(depName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return dependencies.ToImmutableArray();
|
||||
}
|
||||
|
||||
private static async Task<ImmutableArray<string>> FindTopLevelModulesAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string path,
|
||||
string packageName,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var modules = new List<string>();
|
||||
var normalizedName = PythonPackageInfo.NormalizeName(packageName);
|
||||
|
||||
// Check for src layout
|
||||
var srcPath = $"{path}/src";
|
||||
var srcFiles = vfs.EnumerateFiles(srcPath, "__init__.py").ToList();
|
||||
if (srcFiles.Count > 0)
|
||||
{
|
||||
foreach (var initFile in srcFiles)
|
||||
{
|
||||
var dir = Path.GetDirectoryName(initFile.VirtualPath);
|
||||
if (dir is not null)
|
||||
{
|
||||
var moduleName = Path.GetFileName(dir);
|
||||
if (!string.IsNullOrEmpty(moduleName))
|
||||
{
|
||||
modules.Add(moduleName);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for flat layout (package at root)
|
||||
var rootInit = $"{path}/{normalizedName}/__init__.py";
|
||||
if (vfs.FileExists(rootInit))
|
||||
{
|
||||
modules.Add(normalizedName);
|
||||
}
|
||||
|
||||
// Check for single-file module
|
||||
var singleFile = $"{path}/{normalizedName}.py";
|
||||
if (vfs.FileExists(singleFile))
|
||||
{
|
||||
modules.Add(normalizedName);
|
||||
}
|
||||
|
||||
return modules.Distinct().ToImmutableArray();
|
||||
}
|
||||
|
||||
private static async IAsyncEnumerable<PythonPackageInfo> ParsePoetryLockAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string path,
|
||||
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken)
|
||||
{
|
||||
var lockPath = $"{path}/poetry.lock";
|
||||
if (!vfs.FileExists(lockPath))
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
string content;
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(lockPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream);
|
||||
content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
// Parse [[package]] sections
|
||||
var packages = ParseLockPackages(content);
|
||||
foreach (var pkg in packages)
|
||||
{
|
||||
yield return pkg;
|
||||
}
|
||||
}
|
||||
|
||||
private static IEnumerable<PythonPackageInfo> ParseLockPackages(string content)
|
||||
{
|
||||
var packages = new List<PythonPackageInfo>();
|
||||
|
||||
// Split by [[package]] sections
|
||||
var sections = content.Split("[[package]]", StringSplitOptions.RemoveEmptyEntries);
|
||||
|
||||
foreach (var section in sections.Skip(1)) // Skip content before first [[package]]
|
||||
{
|
||||
var lines = section.Split('\n');
|
||||
string? name = null;
|
||||
string? version = null;
|
||||
var dependencies = new List<string>();
|
||||
|
||||
foreach (var line in lines)
|
||||
{
|
||||
var trimmed = line.Trim();
|
||||
if (trimmed.StartsWith("name", StringComparison.Ordinal) && trimmed.Contains('='))
|
||||
{
|
||||
name = ExtractQuotedValue(trimmed);
|
||||
}
|
||||
else if (trimmed.StartsWith("version", StringComparison.Ordinal) && trimmed.Contains('='))
|
||||
{
|
||||
version = ExtractQuotedValue(trimmed);
|
||||
}
|
||||
else if (trimmed.StartsWith('[') && !trimmed.StartsWith("[["))
|
||||
{
|
||||
// End of this package's basic info
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(name))
|
||||
{
|
||||
packages.Add(new PythonPackageInfo(
|
||||
Name: name,
|
||||
Version: version,
|
||||
Kind: PythonPackageKind.Wheel, // Locked packages are typically wheels
|
||||
Location: string.Empty, // Location unknown from lock file
|
||||
MetadataPath: null,
|
||||
TopLevelModules: ImmutableArray<string>.Empty,
|
||||
Dependencies: dependencies.ToImmutableArray(),
|
||||
Extras: ImmutableArray<string>.Empty,
|
||||
RecordFiles: ImmutableArray<PythonRecordEntry>.Empty,
|
||||
InstallerTool: "poetry",
|
||||
EditableTarget: null,
|
||||
IsDirectDependency: false, // Can't determine from lock file alone
|
||||
Confidence: PythonPackageConfidence.Medium));
|
||||
}
|
||||
}
|
||||
|
||||
return packages;
|
||||
}
|
||||
|
||||
private static string? ExtractQuotedValue(string line)
|
||||
{
|
||||
var eqIdx = line.IndexOf('=');
|
||||
if (eqIdx < 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var value = line[(eqIdx + 1)..].Trim();
|
||||
|
||||
if ((value.StartsWith('"') && value.EndsWith('"')) ||
|
||||
(value.StartsWith('\'') && value.EndsWith('\'')))
|
||||
{
|
||||
return value[1..^1];
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
|
||||
|
||||
/// <summary>
|
||||
/// Interface for Python packaging adapters that discover installed packages.
|
||||
/// </summary>
|
||||
internal interface IPythonPackagingAdapter
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the packaging system this adapter handles.
|
||||
/// </summary>
|
||||
string Name { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the priority for this adapter (lower = higher priority).
|
||||
/// </summary>
|
||||
int Priority { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Checks if this adapter can handle the given path.
|
||||
/// </summary>
|
||||
bool CanHandle(PythonVirtualFileSystem vfs, string path);
|
||||
|
||||
/// <summary>
|
||||
/// Discovers packages at the given path.
|
||||
/// </summary>
|
||||
IAsyncEnumerable<PythonPackageInfo> DiscoverPackagesAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string path,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,294 @@
|
||||
using System.Collections.Immutable;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging.Adapters;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
|
||||
|
||||
/// <summary>
|
||||
/// Orchestrates Python package discovery across multiple packaging systems.
|
||||
/// </summary>
|
||||
internal sealed class PythonPackageDiscovery
|
||||
{
|
||||
private readonly IReadOnlyList<IPythonPackagingAdapter> _adapters;
|
||||
|
||||
public PythonPackageDiscovery()
|
||||
: this(CreateDefaultAdapters())
|
||||
{
|
||||
}
|
||||
|
||||
public PythonPackageDiscovery(IEnumerable<IPythonPackagingAdapter> adapters)
|
||||
{
|
||||
_adapters = adapters.OrderBy(a => a.Priority).ToList();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the registered adapters.
|
||||
/// </summary>
|
||||
public IReadOnlyList<IPythonPackagingAdapter> Adapters => _adapters;
|
||||
|
||||
/// <summary>
|
||||
/// Discovers all packages in the given virtual filesystem.
|
||||
/// </summary>
|
||||
public async Task<PythonPackageDiscoveryResult> DiscoverAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var packages = new Dictionary<string, PythonPackageInfo>(StringComparer.OrdinalIgnoreCase);
|
||||
var errors = new List<PythonPackageDiscoveryError>();
|
||||
var searchPaths = new List<string>();
|
||||
|
||||
// Gather all search paths from VFS
|
||||
searchPaths.AddRange(vfs.SitePackagesPaths);
|
||||
searchPaths.AddRange(vfs.SourceTreeRoots);
|
||||
searchPaths.AddRange(vfs.EditablePaths);
|
||||
|
||||
foreach (var path in searchPaths.Distinct())
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
await DiscoverInPathAsync(vfs, path, packages, errors, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// Build dependency graph
|
||||
var dependencyGraph = BuildDependencyGraph(packages);
|
||||
|
||||
// Identify direct vs transitive dependencies
|
||||
MarkTransitiveDependencies(packages, dependencyGraph);
|
||||
|
||||
return new PythonPackageDiscoveryResult(
|
||||
Packages: packages.Values.ToImmutableArray(),
|
||||
DependencyGraph: dependencyGraph,
|
||||
Errors: errors.ToImmutableArray());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Discovers packages at a specific path.
|
||||
/// </summary>
|
||||
public async Task<ImmutableArray<PythonPackageInfo>> DiscoverAtPathAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string path,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var packages = new Dictionary<string, PythonPackageInfo>(StringComparer.OrdinalIgnoreCase);
|
||||
var errors = new List<PythonPackageDiscoveryError>();
|
||||
|
||||
await DiscoverInPathAsync(vfs, path, packages, errors, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return packages.Values.ToImmutableArray();
|
||||
}
|
||||
|
||||
private async Task DiscoverInPathAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string path,
|
||||
Dictionary<string, PythonPackageInfo> packages,
|
||||
List<PythonPackageDiscoveryError> errors,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
foreach (var adapter in _adapters)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
if (!adapter.CanHandle(vfs, path))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await foreach (var pkg in adapter.DiscoverPackagesAsync(vfs, path, cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
var key = pkg.NormalizedName;
|
||||
|
||||
// Keep the package with higher confidence or more recent (last seen)
|
||||
if (!packages.TryGetValue(key, out var existing) ||
|
||||
pkg.Confidence > existing.Confidence)
|
||||
{
|
||||
packages[key] = pkg;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||
{
|
||||
errors.Add(new PythonPackageDiscoveryError(
|
||||
AdapterName: adapter.Name,
|
||||
Path: path,
|
||||
Message: ex.Message,
|
||||
Exception: ex));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static ImmutableDictionary<string, ImmutableArray<string>> BuildDependencyGraph(
|
||||
Dictionary<string, PythonPackageInfo> packages)
|
||||
{
|
||||
var graph = new Dictionary<string, List<string>>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var pkg in packages.Values)
|
||||
{
|
||||
var deps = new List<string>();
|
||||
|
||||
foreach (var dep in pkg.Dependencies)
|
||||
{
|
||||
// Parse dependency string to get package name
|
||||
var depName = ParseDependencyName(dep);
|
||||
if (!string.IsNullOrEmpty(depName))
|
||||
{
|
||||
var normalizedDep = PythonPackageInfo.NormalizeName(depName);
|
||||
if (packages.ContainsKey(normalizedDep))
|
||||
{
|
||||
deps.Add(normalizedDep);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
graph[pkg.NormalizedName] = deps;
|
||||
}
|
||||
|
||||
return graph.ToImmutableDictionary(
|
||||
kv => kv.Key,
|
||||
kv => kv.Value.ToImmutableArray(),
|
||||
StringComparer.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private static string? ParseDependencyName(string dependency)
|
||||
{
|
||||
if (string.IsNullOrEmpty(dependency))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Handle various dependency formats:
|
||||
// package
|
||||
// package>=1.0
|
||||
// package[extra]>=1.0
|
||||
// package ; python_version >= "3.8"
|
||||
|
||||
var dep = dependency.Trim();
|
||||
|
||||
// Remove extras first: package[extra] -> package
|
||||
var bracketIdx = dep.IndexOf('[');
|
||||
if (bracketIdx > 0)
|
||||
{
|
||||
dep = dep[..bracketIdx];
|
||||
}
|
||||
|
||||
// Remove version specifier
|
||||
var specifiers = new[] { ">=", "<=", "==", "!=", ">", "<", "~=", ";" };
|
||||
foreach (var spec in specifiers)
|
||||
{
|
||||
var idx = dep.IndexOf(spec, StringComparison.Ordinal);
|
||||
if (idx > 0)
|
||||
{
|
||||
dep = dep[..idx];
|
||||
}
|
||||
}
|
||||
|
||||
return dep.Trim();
|
||||
}
|
||||
|
||||
private static void MarkTransitiveDependencies(
|
||||
Dictionary<string, PythonPackageInfo> packages,
|
||||
ImmutableDictionary<string, ImmutableArray<string>> graph)
|
||||
{
|
||||
// Find all transitive dependencies starting from direct ones
|
||||
var directPackages = packages.Values.Where(p => p.IsDirectDependency).ToList();
|
||||
var transitive = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
void CollectTransitive(string packageName)
|
||||
{
|
||||
if (!graph.TryGetValue(packageName, out var deps))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var dep in deps)
|
||||
{
|
||||
if (transitive.Add(dep))
|
||||
{
|
||||
CollectTransitive(dep);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach (var pkg in directPackages)
|
||||
{
|
||||
CollectTransitive(pkg.NormalizedName);
|
||||
}
|
||||
|
||||
// Update packages that we incorrectly identified as direct
|
||||
foreach (var pkgName in transitive)
|
||||
{
|
||||
if (packages.TryGetValue(pkgName, out var pkg) && pkg.IsDirectDependency)
|
||||
{
|
||||
// Only update if we're sure it's transitive
|
||||
// (i.e., it appears as a dependency of a direct package)
|
||||
var isDependencyOfDirect = directPackages.Any(d =>
|
||||
graph.TryGetValue(d.NormalizedName, out var deps) &&
|
||||
deps.Contains(pkgName, StringComparer.OrdinalIgnoreCase));
|
||||
|
||||
if (isDependencyOfDirect)
|
||||
{
|
||||
packages[pkgName] = pkg with { IsDirectDependency = false };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static IReadOnlyList<IPythonPackagingAdapter> CreateDefaultAdapters() =>
|
||||
new IPythonPackagingAdapter[]
|
||||
{
|
||||
new PoetryAdapter(),
|
||||
new PipEditableAdapter(),
|
||||
new DistInfoAdapter(),
|
||||
new CondaAdapter(),
|
||||
new ContainerLayerAdapter()
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of package discovery.
|
||||
/// </summary>
|
||||
/// <param name="Packages">Discovered packages.</param>
|
||||
/// <param name="DependencyGraph">Package dependency graph (normalized name → dependencies).</param>
|
||||
/// <param name="Errors">Errors encountered during discovery.</param>
|
||||
internal sealed record PythonPackageDiscoveryResult(
|
||||
ImmutableArray<PythonPackageInfo> Packages,
|
||||
ImmutableDictionary<string, ImmutableArray<string>> DependencyGraph,
|
||||
ImmutableArray<PythonPackageDiscoveryError> Errors)
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets packages by normalized name.
|
||||
/// </summary>
|
||||
public ImmutableDictionary<string, PythonPackageInfo> PackagesByName =>
|
||||
Packages.ToImmutableDictionary(p => p.NormalizedName, StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
/// <summary>
|
||||
/// Gets direct dependencies only.
|
||||
/// </summary>
|
||||
public ImmutableArray<PythonPackageInfo> DirectDependencies =>
|
||||
Packages.Where(p => p.IsDirectDependency).ToImmutableArray();
|
||||
|
||||
/// <summary>
|
||||
/// Gets transitive dependencies only.
|
||||
/// </summary>
|
||||
public ImmutableArray<PythonPackageInfo> TransitiveDependencies =>
|
||||
Packages.Where(p => !p.IsDirectDependency).ToImmutableArray();
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether discovery completed without errors.
|
||||
/// </summary>
|
||||
public bool IsSuccessful => Errors.Length == 0;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Error encountered during package discovery.
|
||||
/// </summary>
|
||||
/// <param name="AdapterName">The adapter that encountered the error.</param>
|
||||
/// <param name="Path">The path being searched.</param>
|
||||
/// <param name="Message">Error message.</param>
|
||||
/// <param name="Exception">The exception if available.</param>
|
||||
internal sealed record PythonPackageDiscoveryError(
|
||||
string AdapterName,
|
||||
string Path,
|
||||
string Message,
|
||||
Exception? Exception);
|
||||
@@ -0,0 +1,175 @@
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
|
||||
|
||||
/// <summary>
|
||||
/// Represents discovered information about an installed Python package.
|
||||
/// </summary>
|
||||
/// <param name="Name">The package name (normalized per PEP 503).</param>
|
||||
/// <param name="Version">The package version string.</param>
|
||||
/// <param name="Kind">The packaging format.</param>
|
||||
/// <param name="Location">The installation location (directory or archive path).</param>
|
||||
/// <param name="MetadataPath">Path to the metadata directory (dist-info/egg-info).</param>
|
||||
/// <param name="TopLevelModules">Top-level importable module names.</param>
|
||||
/// <param name="Dependencies">Declared dependencies (requirement strings).</param>
|
||||
/// <param name="Extras">Available extras.</param>
|
||||
/// <param name="RecordFiles">Files listed in RECORD (if available).</param>
|
||||
/// <param name="InstallerTool">The tool that installed this package (pip, poetry, conda, etc.).</param>
|
||||
/// <param name="EditableTarget">For editable installs, the target directory.</param>
|
||||
/// <param name="IsDirectDependency">Whether this is a direct (vs transitive) dependency.</param>
|
||||
/// <param name="Confidence">Confidence level in the package discovery.</param>
|
||||
internal sealed record PythonPackageInfo(
|
||||
string Name,
|
||||
string? Version,
|
||||
PythonPackageKind Kind,
|
||||
string Location,
|
||||
string? MetadataPath,
|
||||
ImmutableArray<string> TopLevelModules,
|
||||
ImmutableArray<string> Dependencies,
|
||||
ImmutableArray<string> Extras,
|
||||
ImmutableArray<PythonRecordEntry> RecordFiles,
|
||||
string? InstallerTool,
|
||||
string? EditableTarget,
|
||||
bool IsDirectDependency,
|
||||
PythonPackageConfidence Confidence)
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the normalized package name (lowercase, hyphens to underscores).
|
||||
/// </summary>
|
||||
public string NormalizedName => NormalizeName(Name);
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether this is an editable installation.
|
||||
/// </summary>
|
||||
public bool IsEditable => Kind is PythonPackageKind.PipEditable
|
||||
or PythonPackageKind.PoetryEditable
|
||||
or PythonPackageKind.FlitEditable;
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes a package name per PEP 503.
|
||||
/// </summary>
|
||||
public static string NormalizeName(string name) =>
|
||||
name.ToLowerInvariant().Replace('-', '_').Replace('.', '_');
|
||||
|
||||
/// <summary>
|
||||
/// Generates metadata entries for this package.
|
||||
/// </summary>
|
||||
public IEnumerable<KeyValuePair<string, string?>> ToMetadata(string prefix)
|
||||
{
|
||||
yield return new($"{prefix}.name", Name);
|
||||
yield return new($"{prefix}.normalizedName", NormalizedName);
|
||||
|
||||
if (Version is not null)
|
||||
{
|
||||
yield return new($"{prefix}.version", Version);
|
||||
}
|
||||
|
||||
yield return new($"{prefix}.kind", Kind.ToString());
|
||||
yield return new($"{prefix}.location", Location);
|
||||
|
||||
if (MetadataPath is not null)
|
||||
{
|
||||
yield return new($"{prefix}.metadataPath", MetadataPath);
|
||||
}
|
||||
|
||||
if (TopLevelModules.Length > 0)
|
||||
{
|
||||
yield return new($"{prefix}.topLevel", string.Join(",", TopLevelModules));
|
||||
}
|
||||
|
||||
if (Dependencies.Length > 0)
|
||||
{
|
||||
yield return new($"{prefix}.dependencies", string.Join(";", Dependencies));
|
||||
}
|
||||
|
||||
if (InstallerTool is not null)
|
||||
{
|
||||
yield return new($"{prefix}.installer", InstallerTool);
|
||||
}
|
||||
|
||||
if (EditableTarget is not null)
|
||||
{
|
||||
yield return new($"{prefix}.editableTarget", EditableTarget);
|
||||
}
|
||||
|
||||
yield return new($"{prefix}.isDirect", IsDirectDependency.ToString());
|
||||
yield return new($"{prefix}.confidence", Confidence.ToString());
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents an entry from a RECORD file.
|
||||
/// </summary>
|
||||
/// <param name="Path">The file path relative to site-packages.</param>
|
||||
/// <param name="Hash">The hash algorithm and digest (e.g., "sha256=...").</param>
|
||||
/// <param name="Size">The file size in bytes.</param>
|
||||
internal sealed record PythonRecordEntry(
|
||||
string Path,
|
||||
string? Hash,
|
||||
long? Size)
|
||||
{
|
||||
/// <summary>
|
||||
/// Parses a RECORD line into an entry.
|
||||
/// </summary>
|
||||
public static PythonRecordEntry? Parse(string line)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(line))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var parts = line.Split(',');
|
||||
if (parts.Length < 1)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var path = parts[0].Trim();
|
||||
if (string.IsNullOrEmpty(path))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
string? hash = null;
|
||||
long? size = null;
|
||||
|
||||
if (parts.Length > 1 && !string.IsNullOrWhiteSpace(parts[1]))
|
||||
{
|
||||
hash = parts[1].Trim();
|
||||
}
|
||||
|
||||
if (parts.Length > 2 && !string.IsNullOrWhiteSpace(parts[2]) &&
|
||||
long.TryParse(parts[2].Trim(), out var parsedSize))
|
||||
{
|
||||
size = parsedSize;
|
||||
}
|
||||
|
||||
return new PythonRecordEntry(path, hash, size);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Confidence level for package discovery.
|
||||
/// </summary>
|
||||
internal enum PythonPackageConfidence
|
||||
{
|
||||
/// <summary>
|
||||
/// Low confidence - inferred from file structure.
|
||||
/// </summary>
|
||||
Low = 0,
|
||||
|
||||
/// <summary>
|
||||
/// Medium confidence - partial metadata available.
|
||||
/// </summary>
|
||||
Medium = 1,
|
||||
|
||||
/// <summary>
|
||||
/// High confidence - metadata present and validated.
|
||||
/// </summary>
|
||||
High = 2,
|
||||
|
||||
/// <summary>
|
||||
/// Definitive - full metadata with RECORD validation.
|
||||
/// </summary>
|
||||
Definitive = 3
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
|
||||
|
||||
/// <summary>
|
||||
/// Identifies the packaging format of a Python package.
|
||||
/// </summary>
|
||||
internal enum PythonPackageKind
|
||||
{
|
||||
/// <summary>
|
||||
/// Unknown or unrecognized packaging format.
|
||||
/// </summary>
|
||||
Unknown,
|
||||
|
||||
/// <summary>
|
||||
/// Standard wheel installation (.whl → dist-info).
|
||||
/// </summary>
|
||||
Wheel,
|
||||
|
||||
/// <summary>
|
||||
/// Legacy egg installation (.egg-info).
|
||||
/// </summary>
|
||||
Egg,
|
||||
|
||||
/// <summary>
|
||||
/// Source distribution (sdist) installation.
|
||||
/// </summary>
|
||||
Sdist,
|
||||
|
||||
/// <summary>
|
||||
/// Pip editable install (development mode via .egg-link).
|
||||
/// </summary>
|
||||
PipEditable,
|
||||
|
||||
/// <summary>
|
||||
/// Poetry editable install.
|
||||
/// </summary>
|
||||
PoetryEditable,
|
||||
|
||||
/// <summary>
|
||||
/// Flit editable install.
|
||||
/// </summary>
|
||||
FlitEditable,
|
||||
|
||||
/// <summary>
|
||||
/// Conda package installation.
|
||||
/// </summary>
|
||||
Conda,
|
||||
|
||||
/// <summary>
|
||||
/// System package (installed via OS package manager).
|
||||
/// </summary>
|
||||
System,
|
||||
|
||||
/// <summary>
|
||||
/// Vendored/bundled package (copied into project).
|
||||
/// </summary>
|
||||
Vendored,
|
||||
|
||||
/// <summary>
|
||||
/// Zipapp package (.pyz, .pyzw).
|
||||
/// </summary>
|
||||
Zipapp,
|
||||
|
||||
/// <summary>
|
||||
/// Local source package (not installed, just in path).
|
||||
/// </summary>
|
||||
LocalSource
|
||||
}
|
||||
@@ -334,7 +334,7 @@ internal sealed partial class PythonModuleResolver
|
||||
{
|
||||
if (resolution.Kind == PythonResolutionKind.NamespacePackage)
|
||||
{
|
||||
// Collect namespace package paths
|
||||
// Collect namespace package paths (already normalized)
|
||||
if (resolution.VirtualPath is not null)
|
||||
{
|
||||
namespacePaths.Add(resolution.VirtualPath);
|
||||
@@ -353,7 +353,7 @@ internal sealed partial class PythonModuleResolver
|
||||
return new PythonModuleResolution(
|
||||
ModuleName: moduleName,
|
||||
Kind: PythonResolutionKind.NamespacePackage,
|
||||
VirtualPath: namespacePaths[0],
|
||||
VirtualPath: NormalizeVirtualPath(namespacePaths[0]),
|
||||
AbsolutePath: null,
|
||||
SearchPath: _searchPaths[0].Path,
|
||||
Source: PythonFileSource.SitePackages,
|
||||
@@ -430,7 +430,7 @@ internal sealed partial class PythonModuleResolver
|
||||
return new PythonModuleResolution(
|
||||
ModuleName: moduleName,
|
||||
Kind: PythonResolutionKind.SourceModule,
|
||||
VirtualPath: modulePath,
|
||||
VirtualPath: NormalizeVirtualPath(modulePath),
|
||||
AbsolutePath: file?.AbsolutePath,
|
||||
SearchPath: searchPath.Path,
|
||||
Source: file?.Source ?? PythonFileSource.Unknown,
|
||||
@@ -447,7 +447,7 @@ internal sealed partial class PythonModuleResolver
|
||||
return new PythonModuleResolution(
|
||||
ModuleName: moduleName,
|
||||
Kind: PythonResolutionKind.BytecodeModule,
|
||||
VirtualPath: bytecodePath,
|
||||
VirtualPath: NormalizeVirtualPath(bytecodePath),
|
||||
AbsolutePath: file?.AbsolutePath,
|
||||
SearchPath: searchPath.Path,
|
||||
Source: file?.Source ?? PythonFileSource.Unknown,
|
||||
@@ -466,7 +466,7 @@ internal sealed partial class PythonModuleResolver
|
||||
return new PythonModuleResolution(
|
||||
ModuleName: moduleName,
|
||||
Kind: PythonResolutionKind.ExtensionModule,
|
||||
VirtualPath: extPath,
|
||||
VirtualPath: NormalizeVirtualPath(extPath),
|
||||
AbsolutePath: file?.AbsolutePath,
|
||||
SearchPath: searchPath.Path,
|
||||
Source: file?.Source ?? PythonFileSource.Unknown,
|
||||
@@ -484,7 +484,7 @@ internal sealed partial class PythonModuleResolver
|
||||
return new PythonModuleResolution(
|
||||
ModuleName: moduleName,
|
||||
Kind: PythonResolutionKind.Package,
|
||||
VirtualPath: packageInitPath,
|
||||
VirtualPath: NormalizeVirtualPath(packageInitPath),
|
||||
AbsolutePath: file?.AbsolutePath,
|
||||
SearchPath: searchPath.Path,
|
||||
Source: file?.Source ?? PythonFileSource.Unknown,
|
||||
@@ -499,7 +499,7 @@ internal sealed partial class PythonModuleResolver
|
||||
return new PythonModuleResolution(
|
||||
ModuleName: moduleName,
|
||||
Kind: PythonResolutionKind.NamespacePackage,
|
||||
VirtualPath: packagePath,
|
||||
VirtualPath: NormalizeVirtualPath(packagePath),
|
||||
AbsolutePath: null,
|
||||
SearchPath: searchPath.Path,
|
||||
Source: PythonFileSource.Unknown,
|
||||
@@ -517,6 +517,14 @@ internal sealed partial class PythonModuleResolver
|
||||
return _vfs.EnumerateFiles(virtualPath, "*").Any();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes a virtual path by removing leading slashes.
|
||||
/// </summary>
|
||||
private static string NormalizeVirtualPath(string path)
|
||||
{
|
||||
return path.TrimStart('/');
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Clears the resolution cache.
|
||||
/// </summary>
|
||||
|
||||
@@ -101,6 +101,82 @@ internal sealed partial class PythonVirtualFileSystem
|
||||
return _files.TryGetValue(normalized, out file);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Opens a file for reading asynchronously.
|
||||
/// </summary>
|
||||
/// <returns>A stream to read the file, or null if the file doesn't exist or can't be opened.</returns>
|
||||
public async Task<Stream?> OpenReadAsync(string virtualPath, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var file = GetFile(virtualPath);
|
||||
if (file is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
if (file.IsFromArchive && file.ArchivePath is not null)
|
||||
{
|
||||
// Read from archive - need to extract to memory
|
||||
return await OpenFromArchiveAsync(file, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// Read from filesystem
|
||||
if (File.Exists(file.AbsolutePath))
|
||||
{
|
||||
return new FileStream(
|
||||
file.AbsolutePath,
|
||||
FileMode.Open,
|
||||
FileAccess.Read,
|
||||
FileShare.Read,
|
||||
bufferSize: 4096,
|
||||
useAsync: true);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<Stream?> OpenFromArchiveAsync(PythonVirtualFile file, CancellationToken cancellationToken)
|
||||
{
|
||||
if (file.ArchivePath is null || !File.Exists(file.ArchivePath))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
using var archive = ZipFile.OpenRead(file.ArchivePath);
|
||||
var entry = archive.GetEntry(file.AbsolutePath);
|
||||
if (entry is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Copy to memory stream since ZipArchive doesn't support async well
|
||||
var memoryStream = new MemoryStream();
|
||||
using (var entryStream = entry.Open())
|
||||
{
|
||||
await entryStream.CopyToAsync(memoryStream, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
memoryStream.Position = 0;
|
||||
return memoryStream;
|
||||
}
|
||||
catch (InvalidDataException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if a virtual path exists as a file.
|
||||
/// </summary>
|
||||
|
||||
Reference in New Issue
Block a user