up
Some checks failed
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Policy Simulation / policy-simulate (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Signals Reachability Scoring & Events / reachability-smoke (push) Has been cancelled
Signals Reachability Scoring & Events / sign-and-upload (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-12-13 09:37:15 +02:00
parent e00f6365da
commit 6e45066e37
349 changed files with 17160 additions and 1867 deletions

View File

@@ -87,25 +87,16 @@ internal sealed partial class PythonEntrypointDiscovery
{
cancellationToken.ThrowIfCancellationRequested();
var absolutePath = file.AbsolutePath;
if (file.IsFromArchive)
try
{
continue; // Can't read from archive directly yet
}
var fullPath = Path.Combine(_rootPath, absolutePath);
if (!File.Exists(fullPath))
{
fullPath = absolutePath;
if (!File.Exists(fullPath))
using var stream = await _vfs.OpenReadAsync(file.VirtualPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
continue;
}
}
try
{
var content = await File.ReadAllTextAsync(fullPath, cancellationToken).ConfigureAwait(false);
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
ParseEntryPointsTxt(content, file.VirtualPath);
}
catch (IOException)
@@ -225,7 +216,7 @@ internal sealed partial class PythonEntrypointDiscovery
{
cancellationToken.ThrowIfCancellationRequested();
if (file.VirtualPath == "__main__.py")
if (string.Equals(file.AbsolutePath, "__main__.py", StringComparison.OrdinalIgnoreCase))
{
_entrypoints.Add(new PythonEntrypoint(
Name: "__main__",

View File

@@ -48,11 +48,15 @@ internal sealed partial class PipEditableAdapter : IPythonPackagingAdapter
continue;
}
// Look for .egg-info in the target
var (version, metadata, topLevel) = await ReadEggInfoAsync(vfs, targetPath, packageName, cancellationToken).ConfigureAwait(false);
// The editable target path in .egg-link can be absolute and host-specific.
// Prefer the VFS-mounted editable tree under the packageName prefix.
var editableRoot = packageName;
// Look for .egg-info in the editable root
var (version, metadata, topLevel) = await ReadEggInfoAsync(vfs, editableRoot, packageName, cancellationToken).ConfigureAwait(false);
// Also look for pyproject.toml for additional metadata
var pyprojectInfo = await ReadPyprojectAsync(vfs, targetPath, cancellationToken).ConfigureAwait(false);
var pyprojectInfo = await ReadPyprojectAsync(vfs, editableRoot, cancellationToken).ConfigureAwait(false);
if (pyprojectInfo.Name is not null)
{
@@ -79,7 +83,7 @@ internal sealed partial class PipEditableAdapter : IPythonPackagingAdapter
Extras: ImmutableArray<string>.Empty,
RecordFiles: ImmutableArray<PythonRecordEntry>.Empty,
InstallerTool: "pip",
EditableTarget: targetPath,
EditableTarget: editableRoot,
IsDirectDependency: true, // Editable installs are always direct
Confidence: PythonPackageConfidence.High);
}
@@ -110,7 +114,7 @@ internal sealed partial class PipEditableAdapter : IPythonPackagingAdapter
private static async Task<(string? Version, Dictionary<string, string> Metadata, ImmutableArray<string> TopLevel)> ReadEggInfoAsync(
PythonVirtualFileSystem vfs,
string targetPath,
string editableRoot,
string packageName,
CancellationToken cancellationToken)
{
@@ -119,8 +123,7 @@ internal sealed partial class PipEditableAdapter : IPythonPackagingAdapter
var topLevel = ImmutableArray<string>.Empty;
// Look for .egg-info directory
var eggInfoPattern = $"{packageName}.egg-info";
var eggInfoFiles = vfs.EnumerateFiles(targetPath, "*.egg-info/PKG-INFO").ToList();
var eggInfoFiles = vfs.EnumerateFiles(editableRoot, "*.egg-info/PKG-INFO").ToList();
PythonVirtualFile? pkgInfoFile = null;
foreach (var file in eggInfoFiles)
@@ -204,10 +207,10 @@ internal sealed partial class PipEditableAdapter : IPythonPackagingAdapter
private static async Task<(string? Name, string? Version)> ReadPyprojectAsync(
PythonVirtualFileSystem vfs,
string targetPath,
string editableRoot,
CancellationToken cancellationToken)
{
var pyprojectPath = $"{targetPath}/pyproject.toml";
var pyprojectPath = $"{editableRoot}/pyproject.toml";
try
{

View File

@@ -37,12 +37,16 @@ internal sealed class PythonPackageDiscovery
var errors = new List<PythonPackageDiscoveryError>();
var searchPaths = new List<string>();
// Gather all search paths from VFS
searchPaths.AddRange(vfs.SitePackagesPaths);
searchPaths.AddRange(vfs.SourceTreeRoots);
searchPaths.AddRange(vfs.EditablePaths);
// Gather all search paths from VFS (ordered by intended precedence).
// Later paths overwrite earlier ones on equal confidence.
searchPaths.Add(string.Empty); // workspace root (pyproject/locks/etc.)
searchPaths.AddRange(vfs.SourceTreeRoots.OrderBy(static path => path, StringComparer.Ordinal));
searchPaths.AddRange(vfs.EditablePaths.OrderBy(static path => path, StringComparer.Ordinal));
searchPaths.AddRange(vfs.SitePackagesPaths.OrderBy(static path => path, StringComparer.Ordinal));
searchPaths.AddRange(vfs.ZipArchivePaths.OrderBy(static path => path, StringComparer.Ordinal));
foreach (var path in searchPaths.Distinct())
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var path in searchPaths.Where(p => seen.Add(p)))
{
cancellationToken.ThrowIfCancellationRequested();

View File

@@ -0,0 +1,937 @@
using System.Buffers;
using System.Globalization;
using System.IO.Compression;
using System.Linq;
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
using Packaging = StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal;
internal static class PythonDistributionVfsLoader
{
public static async Task<PythonDistribution?> LoadAsync(
LanguageAnalyzerContext context,
PythonVirtualFileSystem vfs,
Packaging.PythonPackageInfo package,
CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(context);
ArgumentNullException.ThrowIfNull(vfs);
cancellationToken.ThrowIfCancellationRequested();
if (string.IsNullOrWhiteSpace(package.MetadataPath))
{
return null;
}
var isEggInfo = package.Kind == Packaging.PythonPackageKind.Egg;
var metadataName = isEggInfo ? "PKG-INFO" : "METADATA";
var recordName = isEggInfo ? "installed-files.txt" : "RECORD";
var metadataVirtualPath = $"{package.MetadataPath}/{metadataName}";
if (!vfs.FileExists(metadataVirtualPath))
{
return null;
}
var metadataDocument = await PythonMetadataDocumentVfs.LoadAsync(vfs, metadataVirtualPath, cancellationToken).ConfigureAwait(false);
var name = (metadataDocument.GetFirst("Name") ?? package.Name)?.Trim();
var version = (metadataDocument.GetFirst("Version") ?? package.Version)?.Trim();
if (string.IsNullOrWhiteSpace(name) || string.IsNullOrWhiteSpace(version))
{
return null;
}
var normalizedName = PythonPathHelper.NormalizePackageName(name);
var purl = $"pkg:pypi/{normalizedName}@{version}";
var metadataEntries = new List<KeyValuePair<string, string?>>();
var evidenceEntries = new List<LanguageComponentEvidence>();
AppendMetadata(metadataEntries, "distInfoPath", NormalizeVfsPath(package.MetadataPath));
AppendMetadata(metadataEntries, "name", name);
AppendMetadata(metadataEntries, "version", version);
AppendMetadata(metadataEntries, "normalizedName", normalizedName);
AppendMetadata(metadataEntries, "summary", metadataDocument.GetFirst("Summary"));
AppendMetadata(metadataEntries, "license", metadataDocument.GetFirst("License"));
AppendMetadata(metadataEntries, "licenseExpression", metadataDocument.GetFirst("License-Expression"));
AppendMetadata(metadataEntries, "homePage", metadataDocument.GetFirst("Home-page"));
AppendMetadata(metadataEntries, "author", metadataDocument.GetFirst("Author"));
AppendMetadata(metadataEntries, "authorEmail", metadataDocument.GetFirst("Author-email"));
AppendMetadata(metadataEntries, "projectUrl", metadataDocument.GetFirst("Project-URL"));
AppendMetadata(metadataEntries, "requiresPython", metadataDocument.GetFirst("Requires-Python"));
AppendClassifiers(metadataEntries, metadataDocument);
var requiresDist = metadataDocument.GetAll("Requires-Dist");
if (requiresDist.Count > 0)
{
AppendMetadata(metadataEntries, "requiresDist", string.Join(';', requiresDist));
}
await AppendEntryPointsAsync(vfs, metadataEntries, $"{package.MetadataPath}/entry_points.txt", cancellationToken)
.ConfigureAwait(false);
if (!isEggInfo)
{
await AppendWheelMetadataAsync(vfs, metadataEntries, $"{package.MetadataPath}/WHEEL", cancellationToken)
.ConfigureAwait(false);
}
var installer = await ReadSingleLineAsync(vfs, $"{package.MetadataPath}/INSTALLER", cancellationToken).ConfigureAwait(false);
if (!string.IsNullOrWhiteSpace(installer))
{
AppendMetadata(metadataEntries, "installer", installer);
}
await AppendDirectUrlAsync(context, vfs, metadataEntries, evidenceEntries, $"{package.MetadataPath}/direct_url.json", cancellationToken)
.ConfigureAwait(false);
AddOptionalFileEvidence(context, vfs, evidenceEntries, metadataVirtualPath, metadataName);
AddOptionalFileEvidence(context, vfs, evidenceEntries, $"{package.MetadataPath}/WHEEL", "WHEEL");
AddOptionalFileEvidence(context, vfs, evidenceEntries, $"{package.MetadataPath}/entry_points.txt", "entry_points.txt");
AddOptionalFileEvidence(context, vfs, evidenceEntries, $"{package.MetadataPath}/INSTALLER", "INSTALLER");
AddOptionalFileEvidence(context, vfs, evidenceEntries, $"{package.MetadataPath}/{recordName}", recordName);
AddOptionalFileEvidence(context, vfs, evidenceEntries, $"{package.MetadataPath}/direct_url.json", "direct_url.json");
var recordVirtualPath = $"{package.MetadataPath}/{recordName}";
var recordEntries = await ReadRecordAsync(vfs, recordVirtualPath, cancellationToken).ConfigureAwait(false);
var recordVerification = await VerifyRecordAsync(vfs, package.MetadataPath, recordEntries, cancellationToken).ConfigureAwait(false);
metadataEntries.Add(new KeyValuePair<string, string?>("record.totalEntries", recordVerification.TotalEntries.ToString(CultureInfo.InvariantCulture)));
metadataEntries.Add(new KeyValuePair<string, string?>("record.hashedEntries", recordVerification.HashedEntries.ToString(CultureInfo.InvariantCulture)));
metadataEntries.Add(new KeyValuePair<string, string?>("record.missingFiles", recordVerification.MissingFiles.ToString(CultureInfo.InvariantCulture)));
metadataEntries.Add(new KeyValuePair<string, string?>("record.hashMismatches", recordVerification.HashMismatches.ToString(CultureInfo.InvariantCulture)));
metadataEntries.Add(new KeyValuePair<string, string?>("record.ioErrors", recordVerification.IoErrors.ToString(CultureInfo.InvariantCulture)));
if (recordVerification.UnsupportedAlgorithms.Count > 0)
{
AppendMetadata(
metadataEntries,
"record.unsupportedAlgorithms",
string.Join(';', recordVerification.UnsupportedAlgorithms.OrderBy(static a => a, StringComparer.OrdinalIgnoreCase)));
}
evidenceEntries.AddRange(recordVerification.Evidence);
AppendMetadata(metadataEntries, "provenance", isEggInfo ? "egg-info" : "dist-info");
var usedByEntrypoint = false;
return new PythonDistribution(
name,
version,
purl,
metadataEntries,
evidenceEntries,
usedByEntrypoint);
}
private static void AddOptionalFileEvidence(
LanguageAnalyzerContext context,
PythonVirtualFileSystem vfs,
ICollection<LanguageComponentEvidence> evidence,
string virtualPath,
string source)
{
var file = vfs.GetFile(virtualPath);
if (file is null)
{
return;
}
if (file.IsFromArchive && file.ArchivePath is not null)
{
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.File,
source,
PythonPathHelper.NormalizeRelative(context, file.ArchivePath),
Value: file.AbsolutePath,
Sha256: null));
return;
}
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.File,
source,
PythonPathHelper.NormalizeRelative(context, file.AbsolutePath),
Value: null,
Sha256: null));
}
private static void AppendClassifiers(
ICollection<KeyValuePair<string, string?>> metadata,
PythonMetadataDocumentVfs metadataDocument)
{
var classifiers = metadataDocument.GetAll("Classifier");
if (classifiers.Count == 0)
{
return;
}
var orderedClassifiers = classifiers
.Select(static classifier => classifier.Trim())
.Where(static classifier => classifier.Length > 0)
.OrderBy(static classifier => classifier, StringComparer.Ordinal)
.ToArray();
if (orderedClassifiers.Length == 0)
{
return;
}
AppendMetadata(metadata, "classifiers", string.Join(';', orderedClassifiers));
var licenseClassifierIndex = 0;
for (var index = 0; index < orderedClassifiers.Length; index++)
{
var classifier = orderedClassifiers[index];
AppendMetadata(metadata, $"classifier[{index}]", classifier);
if (classifier.StartsWith("License ::", StringComparison.OrdinalIgnoreCase))
{
AppendMetadata(metadata, $"license.classifier[{licenseClassifierIndex}]", classifier);
licenseClassifierIndex++;
}
}
}
private static async Task AppendEntryPointsAsync(
PythonVirtualFileSystem vfs,
ICollection<KeyValuePair<string, string?>> metadata,
string entryPointsVirtualPath,
CancellationToken cancellationToken)
{
if (!vfs.FileExists(entryPointsVirtualPath))
{
return;
}
string? content;
try
{
await using var stream = await vfs.OpenReadAsync(entryPointsVirtualPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return;
}
using var reader = new StreamReader(stream, PythonEncoding.Utf8, detectEncodingFromByteOrderMarks: true);
content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
}
catch (IOException)
{
return;
}
var groups = new Dictionary<string, List<(string Name, string Target)>>(StringComparer.OrdinalIgnoreCase);
string? currentGroup = null;
foreach (var rawLine in content.Split('\n'))
{
var line = rawLine.Trim();
if (line.Length == 0 || line.StartsWith('#'))
{
continue;
}
if (line.StartsWith('[') && line.EndsWith(']'))
{
currentGroup = line[1..^1].Trim();
if (currentGroup.Length == 0)
{
currentGroup = null;
}
continue;
}
if (currentGroup is null)
{
continue;
}
var separator = line.IndexOf('=');
if (separator <= 0)
{
continue;
}
var name = line[..separator].Trim();
var target = line[(separator + 1)..].Trim();
if (name.Length == 0 || target.Length == 0)
{
continue;
}
if (!groups.TryGetValue(currentGroup, out var list))
{
list = new List<(string Name, string Target)>();
groups[currentGroup] = list;
}
list.Add((name, target));
}
foreach (var group in groups.OrderBy(static g => g.Key, StringComparer.OrdinalIgnoreCase))
{
AppendMetadata(metadata, $"entryPoints.{group.Key}", string.Join(';', group.Value.Select(static ep => $"{ep.Name}={ep.Target}")));
}
}
private static async Task AppendWheelMetadataAsync(
PythonVirtualFileSystem vfs,
ICollection<KeyValuePair<string, string?>> metadata,
string wheelVirtualPath,
CancellationToken cancellationToken)
{
if (!vfs.FileExists(wheelVirtualPath))
{
return;
}
var values = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
try
{
await using var stream = await vfs.OpenReadAsync(wheelVirtualPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return;
}
using var reader = new StreamReader(stream, PythonEncoding.Utf8, detectEncodingFromByteOrderMarks: true);
while (await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false) is { } line)
{
cancellationToken.ThrowIfCancellationRequested();
if (string.IsNullOrWhiteSpace(line))
{
continue;
}
var separator = line.IndexOf(':');
if (separator <= 0)
{
continue;
}
var key = line[..separator].Trim();
var value = line[(separator + 1)..].Trim();
if (key.Length == 0 || value.Length == 0)
{
continue;
}
values[key] = value;
}
}
catch (IOException)
{
return;
}
if (values.TryGetValue("Wheel-Version", out var wheelVersion))
{
AppendMetadata(metadata, "wheel.version", wheelVersion);
}
if (values.TryGetValue("Tag", out var tags))
{
AppendMetadata(metadata, "wheel.tags", tags);
}
if (values.TryGetValue("Root-Is-Purelib", out var purelib))
{
AppendMetadata(metadata, "wheel.rootIsPurelib", purelib);
}
if (values.TryGetValue("Generator", out var generator))
{
AppendMetadata(metadata, "wheel.generator", generator);
}
}
private static async Task<string?> ReadSingleLineAsync(
PythonVirtualFileSystem vfs,
string virtualPath,
CancellationToken cancellationToken)
{
if (!vfs.FileExists(virtualPath))
{
return null;
}
try
{
await using var stream = await vfs.OpenReadAsync(virtualPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return null;
}
using var reader = new StreamReader(stream, PythonEncoding.Utf8, detectEncodingFromByteOrderMarks: true);
return await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false);
}
catch (IOException)
{
return null;
}
}
private static async Task AppendDirectUrlAsync(
LanguageAnalyzerContext context,
PythonVirtualFileSystem vfs,
ICollection<KeyValuePair<string, string?>> metadata,
ICollection<LanguageComponentEvidence> evidence,
string directUrlVirtualPath,
CancellationToken cancellationToken)
{
var file = vfs.GetFile(directUrlVirtualPath);
if (file is null)
{
return;
}
try
{
await using var stream = await vfs.OpenReadAsync(directUrlVirtualPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return;
}
using var document = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken).ConfigureAwait(false);
var root = document.RootElement;
var url = root.TryGetProperty("url", out var urlElement) ? urlElement.GetString() : null;
var isEditable = root.TryGetProperty("dir_info", out var dirInfo) &&
dirInfo.TryGetProperty("editable", out var editableValue) &&
editableValue.GetBoolean();
var subdir = root.TryGetProperty("dir_info", out dirInfo) &&
dirInfo.TryGetProperty("subdirectory", out var subdirElement)
? subdirElement.GetString()
: null;
string? vcs = null;
string? commit = null;
if (root.TryGetProperty("vcs_info", out var vcsInfo))
{
vcs = vcsInfo.TryGetProperty("vcs", out var vcsElement) ? vcsElement.GetString() : null;
commit = vcsInfo.TryGetProperty("commit_id", out var commitElement) ? commitElement.GetString() : null;
}
if (isEditable)
{
AppendMetadata(metadata, "editable", "true");
}
AppendMetadata(metadata, "sourceUrl", url);
AppendMetadata(metadata, "sourceSubdirectory", subdir);
AppendMetadata(metadata, "sourceVcs", vcs);
AppendMetadata(metadata, "sourceCommit", commit);
if (!string.IsNullOrWhiteSpace(url))
{
var locator = file.IsFromArchive && file.ArchivePath is not null
? PythonPathHelper.NormalizeRelative(context, file.ArchivePath)
: PythonPathHelper.NormalizeRelative(context, file.AbsolutePath);
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Metadata,
"direct_url.json",
locator,
url,
Sha256: null));
}
}
catch (JsonException)
{
// Ignore invalid JSON
}
catch (IOException)
{
// Ignore read errors
}
}
private static async Task<IReadOnlyList<PythonRecordEntry>> ReadRecordAsync(
PythonVirtualFileSystem vfs,
string recordVirtualPath,
CancellationToken cancellationToken)
{
if (!vfs.FileExists(recordVirtualPath))
{
return Array.Empty<PythonRecordEntry>();
}
var fileName = Path.GetFileName(recordVirtualPath);
if (!string.IsNullOrWhiteSpace(fileName) &&
fileName.EndsWith("installed-files.txt", StringComparison.OrdinalIgnoreCase))
{
return await ReadInstalledFilesAsync(vfs, recordVirtualPath, cancellationToken).ConfigureAwait(false);
}
try
{
await using var stream = await vfs.OpenReadAsync(recordVirtualPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return Array.Empty<PythonRecordEntry>();
}
using var reader = new StreamReader(stream, PythonEncoding.Utf8, detectEncodingFromByteOrderMarks: true);
var entries = new List<PythonRecordEntry>();
while (await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false) is { } line)
{
cancellationToken.ThrowIfCancellationRequested();
if (line.Length == 0)
{
continue;
}
var fields = ParseCsvLine(line);
if (fields.Count < 1)
{
continue;
}
var entryPath = fields[0];
string? algorithm = null;
string? hashValue = null;
if (fields.Count > 1 && !string.IsNullOrWhiteSpace(fields[1]))
{
var hashField = fields[1].Trim();
var separator = hashField.IndexOf('=');
if (separator > 0 && separator < hashField.Length - 1)
{
algorithm = hashField[..separator];
hashValue = hashField[(separator + 1)..];
}
}
long? size = null;
if (fields.Count > 2 &&
long.TryParse(fields[2], NumberStyles.Integer, CultureInfo.InvariantCulture, out var parsedSize))
{
size = parsedSize;
}
entries.Add(new PythonRecordEntry(entryPath, algorithm, hashValue, size));
}
return entries;
}
catch (IOException)
{
return Array.Empty<PythonRecordEntry>();
}
}
private static async Task<IReadOnlyList<PythonRecordEntry>> ReadInstalledFilesAsync(
PythonVirtualFileSystem vfs,
string recordVirtualPath,
CancellationToken cancellationToken)
{
try
{
await using var stream = await vfs.OpenReadAsync(recordVirtualPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return Array.Empty<PythonRecordEntry>();
}
using var reader = new StreamReader(stream, PythonEncoding.Utf8, detectEncodingFromByteOrderMarks: true);
var entries = new List<PythonRecordEntry>();
while (await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false) is { } line)
{
cancellationToken.ThrowIfCancellationRequested();
var trimmed = line.Trim();
if (trimmed.Length == 0 || trimmed == ".")
{
continue;
}
entries.Add(new PythonRecordEntry(trimmed, null, null, null));
}
return entries;
}
catch (IOException)
{
return Array.Empty<PythonRecordEntry>();
}
}
private static async Task<PythonRecordVerificationResult> VerifyRecordAsync(
PythonVirtualFileSystem vfs,
string distInfoVirtualPath,
IReadOnlyList<PythonRecordEntry> entries,
CancellationToken cancellationToken)
{
if (entries.Count == 0)
{
return new PythonRecordVerificationResult(0, 0, 0, 0, 0, usedByEntrypoint: false, Array.Empty<string>(), Array.Empty<LanguageComponentEvidence>());
}
var evidence = new List<LanguageComponentEvidence>();
var unsupported = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var root = GetParentDirectory(distInfoVirtualPath);
var total = 0;
var hashed = 0;
var missing = 0;
var mismatched = 0;
var ioErrors = 0;
foreach (var entry in entries)
{
cancellationToken.ThrowIfCancellationRequested();
total++;
var normalizedEntryPath = NormalizeRecordPath(entry.Path);
if (normalizedEntryPath is null)
{
missing++;
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Derived,
"RECORD",
NormalizeVfsPath(entry.Path),
"outside-root",
Sha256: null));
continue;
}
var virtualPath = $"{root}/{normalizedEntryPath}";
if (!vfs.FileExists(virtualPath))
{
missing++;
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Derived,
"RECORD",
NormalizeVfsPath(virtualPath),
"missing",
Sha256: null));
continue;
}
if (string.IsNullOrWhiteSpace(entry.HashAlgorithm) || string.IsNullOrWhiteSpace(entry.HashValue))
{
continue;
}
hashed++;
if (!string.Equals(entry.HashAlgorithm, "sha256", StringComparison.OrdinalIgnoreCase))
{
unsupported.Add(entry.HashAlgorithm);
continue;
}
string? actualHash;
try
{
actualHash = await ComputeSha256Base64Async(vfs, virtualPath, cancellationToken).ConfigureAwait(false);
}
catch (IOException)
{
ioErrors++;
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Derived,
"RECORD",
NormalizeVfsPath(virtualPath),
"io-error",
Sha256: null));
continue;
}
if (!string.Equals(actualHash, entry.HashValue, StringComparison.Ordinal))
{
mismatched++;
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Derived,
"RECORD",
NormalizeVfsPath(virtualPath),
$"sha256 mismatch expected={entry.HashValue} actual={actualHash}",
Sha256: actualHash));
}
}
return new PythonRecordVerificationResult(
total,
hashed,
missing,
mismatched,
ioErrors,
usedByEntrypoint: false,
unsupported.ToArray(),
evidence);
}
private static async Task<string> ComputeSha256Base64Async(
PythonVirtualFileSystem vfs,
string virtualPath,
CancellationToken cancellationToken)
{
await using var stream = await vfs.OpenReadAsync(virtualPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
throw new IOException("Unable to open file for hashing.");
}
using var sha = SHA256.Create();
var buffer = ArrayPool<byte>.Shared.Rent(81920);
try
{
int bytesRead;
while ((bytesRead = await stream.ReadAsync(buffer.AsMemory(0, buffer.Length), cancellationToken).ConfigureAwait(false)) > 0)
{
sha.TransformBlock(buffer, 0, bytesRead, null, 0);
}
sha.TransformFinalBlock(Array.Empty<byte>(), 0, 0);
return Convert.ToBase64String(sha.Hash ?? Array.Empty<byte>());
}
finally
{
ArrayPool<byte>.Shared.Return(buffer);
}
}
private static List<string> ParseCsvLine(string line)
{
var values = new List<string>();
var builder = new StringBuilder();
var inQuotes = false;
for (var i = 0; i < line.Length; i++)
{
var ch = line[i];
if (inQuotes)
{
if (ch == '"')
{
var next = i + 1 < line.Length ? line[i + 1] : '\0';
if (next == '"')
{
builder.Append('"');
i++;
}
else
{
inQuotes = false;
}
}
else
{
builder.Append(ch);
}
continue;
}
if (ch == ',')
{
values.Add(builder.ToString());
builder.Clear();
continue;
}
if (ch == '"')
{
inQuotes = true;
continue;
}
builder.Append(ch);
}
values.Add(builder.ToString());
return values;
}
private static string? NormalizeRecordPath(string path)
{
if (string.IsNullOrWhiteSpace(path))
{
return null;
}
var normalized = path.Replace('\\', '/').TrimStart('/');
if (normalized.Contains("/../", StringComparison.Ordinal) ||
normalized.StartsWith("../", StringComparison.Ordinal) ||
normalized.EndsWith("/..", StringComparison.Ordinal) ||
normalized == "..")
{
return null;
}
return normalized.Length == 0 ? null : normalized;
}
private static string NormalizeVfsPath(string path)
=> path.Replace('\\', '/').Trim('/');
private static string GetParentDirectory(string path)
{
var normalized = NormalizeVfsPath(path);
var lastSlash = normalized.LastIndexOf('/');
return lastSlash <= 0 ? string.Empty : normalized[..lastSlash];
}
private static void AppendMetadata(ICollection<KeyValuePair<string, string?>> metadata, string key, string? value)
{
if (string.IsNullOrWhiteSpace(key))
{
return;
}
if (string.IsNullOrWhiteSpace(value))
{
return;
}
metadata.Add(new KeyValuePair<string, string?>(key, value.Trim()));
}
private sealed class PythonMetadataDocumentVfs
{
private readonly Dictionary<string, List<string>> _values;
private PythonMetadataDocumentVfs(Dictionary<string, List<string>> values)
{
_values = values;
}
public static async Task<PythonMetadataDocumentVfs> LoadAsync(
PythonVirtualFileSystem vfs,
string virtualPath,
CancellationToken cancellationToken)
{
if (!vfs.FileExists(virtualPath))
{
return new PythonMetadataDocumentVfs(new Dictionary<string, List<string>>(StringComparer.OrdinalIgnoreCase));
}
try
{
await using var stream = await vfs.OpenReadAsync(virtualPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return new PythonMetadataDocumentVfs(new Dictionary<string, List<string>>(StringComparer.OrdinalIgnoreCase));
}
using var reader = new StreamReader(stream, PythonEncoding.Utf8, detectEncodingFromByteOrderMarks: true);
var values = new Dictionary<string, List<string>>(StringComparer.OrdinalIgnoreCase);
string? currentKey = null;
var builder = new StringBuilder();
while (await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false) is { } line)
{
cancellationToken.ThrowIfCancellationRequested();
if (line.Length == 0)
{
Commit();
continue;
}
if (line.StartsWith(' ') || line.StartsWith('\t'))
{
if (currentKey is not null)
{
if (builder.Length > 0)
{
builder.Append(' ');
}
builder.Append(line.Trim());
}
continue;
}
Commit();
var separator = line.IndexOf(':');
if (separator <= 0)
{
continue;
}
currentKey = line[..separator].Trim();
builder.Clear();
builder.Append(line[(separator + 1)..].Trim());
}
Commit();
return new PythonMetadataDocumentVfs(values);
void Commit()
{
if (string.IsNullOrWhiteSpace(currentKey))
{
return;
}
if (!values.TryGetValue(currentKey, out var list))
{
list = new List<string>();
values[currentKey] = list;
}
var value = builder.ToString().Trim();
if (value.Length > 0)
{
list.Add(value);
}
currentKey = null;
builder.Clear();
}
}
catch (IOException)
{
return new PythonMetadataDocumentVfs(new Dictionary<string, List<string>>(StringComparer.OrdinalIgnoreCase));
}
}
public string? GetFirst(string key)
{
if (key is null)
{
return null;
}
return _values.TryGetValue(key, out var list) && list.Count > 0
? list[0]
: null;
}
public IReadOnlyList<string> GetAll(string key)
{
if (key is null)
{
return Array.Empty<string>();
}
return _values.TryGetValue(key, out var list)
? list.AsReadOnly()
: Array.Empty<string>();
}
}
}

View File

@@ -1,6 +1,7 @@
using System.Text.Json;
using System.Text.RegularExpressions;
using System.Linq;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
@@ -81,9 +82,11 @@ internal sealed partial class PythonInputNormalizer
await DetectLayoutAsync(cancellationToken).ConfigureAwait(false);
await DetectVersionTargetsAsync(cancellationToken).ConfigureAwait(false);
DetectSitePackages();
DetectLayerSitePackages();
DetectWheels();
DetectZipapps();
await DetectEditablesAsync(cancellationToken).ConfigureAwait(false);
NormalizeDetectedInputs();
return this;
}
@@ -94,6 +97,11 @@ internal sealed partial class PythonInputNormalizer
{
var builder = PythonVirtualFileSystem.CreateBuilder();
AddProjectFiles(builder);
var condaMeta = Path.Combine(_rootPath, "conda-meta");
builder.AddDirectory(condaMeta, "conda-meta", PythonFileSource.ProjectConfig, includeHiddenFiles: false);
// Add site-packages in order (later takes precedence)
foreach (var sitePackagesPath in _sitePackagesPaths)
{
@@ -582,7 +590,8 @@ internal sealed partial class PythonInputNormalizer
{
try
{
foreach (var pythonDir in Directory.EnumerateDirectories(libDir, "python*", SafeEnumeration))
foreach (var pythonDir in Directory.EnumerateDirectories(libDir, "python*", SafeEnumeration)
.OrderBy(static p => p, StringComparer.OrdinalIgnoreCase))
{
searchPaths.Add(Path.Combine(pythonDir, "site-packages"));
}
@@ -611,6 +620,25 @@ internal sealed partial class PythonInputNormalizer
searchPaths.Add(Path.Combine(_rootPath, "usr", "local", "lib", "python3.12", "site-packages"));
searchPaths.Add(Path.Combine(_rootPath, "usr", "lib", "python3", "dist-packages"));
// System-style lib/pythonX.Y/site-packages under the workspace root
var rootLibDir = Path.Combine(_rootPath, "lib");
if (Directory.Exists(rootLibDir))
{
try
{
foreach (var pythonDir in Directory.EnumerateDirectories(rootLibDir, "python*", SafeEnumeration)
.OrderBy(static p => p, StringComparer.OrdinalIgnoreCase))
{
searchPaths.Add(Path.Combine(pythonDir, "site-packages"));
searchPaths.Add(Path.Combine(pythonDir, "dist-packages"));
}
}
catch (UnauthorizedAccessException)
{
// Ignore
}
}
// Root site-packages (common for some Docker images)
searchPaths.Add(Path.Combine(_rootPath, "site-packages"));
@@ -623,6 +651,17 @@ internal sealed partial class PythonInputNormalizer
}
}
private void DetectLayerSitePackages()
{
foreach (var sitePackagesPath in PythonContainerAdapter.DiscoverLayerSitePackages(_rootPath))
{
if (!_sitePackagesPaths.Contains(sitePackagesPath, StringComparer.OrdinalIgnoreCase))
{
_sitePackagesPaths.Add(sitePackagesPath);
}
}
}
private void DetectWheels()
{
// Look for wheels in common locations
@@ -643,7 +682,8 @@ internal sealed partial class PythonInputNormalizer
try
{
foreach (var wheel in Directory.EnumerateFiles(searchPath, "*.whl", SafeEnumeration))
foreach (var wheel in Directory.EnumerateFiles(searchPath, "*.whl", SafeEnumeration)
.OrderBy(static p => p, StringComparer.OrdinalIgnoreCase))
{
if (!_wheelPaths.Contains(wheel, StringComparer.OrdinalIgnoreCase))
{
@@ -700,37 +740,24 @@ internal sealed partial class PythonInputNormalizer
private void DetectZipapps()
{
if (!Directory.Exists(_rootPath))
foreach (var zipappPath in PythonZipappAdapter.DiscoverZipapps(_rootPath))
{
return;
}
try
{
foreach (var pyz in Directory.EnumerateFiles(_rootPath, "*.pyz", SafeEnumeration))
if (!_zipappPaths.Contains(zipappPath, StringComparer.OrdinalIgnoreCase))
{
_zipappPaths.Add(pyz);
_zipappPaths.Add(zipappPath);
}
foreach (var pyzw in Directory.EnumerateFiles(_rootPath, "*.pyzw", SafeEnumeration))
{
_zipappPaths.Add(pyzw);
}
}
catch (UnauthorizedAccessException)
{
// Ignore
}
}
private async Task DetectEditablesAsync(CancellationToken cancellationToken)
{
// Look for .egg-link files in site-packages
foreach (var sitePackagesPath in _sitePackagesPaths)
foreach (var sitePackagesPath in _sitePackagesPaths.OrderBy(static p => p, StringComparer.OrdinalIgnoreCase))
{
try
{
foreach (var eggLink in Directory.EnumerateFiles(sitePackagesPath, "*.egg-link", SafeEnumeration))
foreach (var eggLink in Directory.EnumerateFiles(sitePackagesPath, "*.egg-link", SafeEnumeration)
.OrderBy(static p => p, StringComparer.OrdinalIgnoreCase))
{
var content = await File.ReadAllTextAsync(eggLink, cancellationToken).ConfigureAwait(false);
var lines = content.Split('\n', StringSplitOptions.RemoveEmptyEntries);
@@ -761,11 +788,12 @@ internal sealed partial class PythonInputNormalizer
}
// Look for direct_url.json with editable flag in dist-info directories
foreach (var sitePackagesPath in _sitePackagesPaths)
foreach (var sitePackagesPath in _sitePackagesPaths.OrderBy(static p => p, StringComparer.OrdinalIgnoreCase))
{
try
{
foreach (var distInfo in Directory.EnumerateDirectories(sitePackagesPath, "*.dist-info", SafeEnumeration))
foreach (var distInfo in Directory.EnumerateDirectories(sitePackagesPath, "*.dist-info", SafeEnumeration)
.OrderBy(static p => p, StringComparer.OrdinalIgnoreCase))
{
var directUrlPath = Path.Combine(distInfo, "direct_url.json");
if (!File.Exists(directUrlPath))
@@ -815,6 +843,84 @@ internal sealed partial class PythonInputNormalizer
}
}
private void NormalizeDetectedInputs()
{
NormalizePathList(_sitePackagesPaths);
NormalizePathList(_wheelPaths);
NormalizePathList(_zipappPaths);
NormalizeEditableList(_editablePaths);
}
private static void NormalizePathList(List<string> paths)
{
var normalized = paths
.Where(static p => !string.IsNullOrWhiteSpace(p))
.Select(static p => Path.GetFullPath(p.Trim()))
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(static p => p, StringComparer.Ordinal)
.ToList();
paths.Clear();
paths.AddRange(normalized);
}
private static void NormalizeEditableList(List<(string Path, string? PackageName)> editables)
{
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var normalized = new List<(string Path, string? PackageName)>();
foreach (var (path, packageName) in editables)
{
if (string.IsNullOrWhiteSpace(path))
{
continue;
}
var fullPath = Path.GetFullPath(path.Trim());
var name = string.IsNullOrWhiteSpace(packageName) ? null : packageName.Trim();
var key = $"{name ?? string.Empty}|{fullPath}";
if (!seen.Add(key))
{
continue;
}
normalized.Add((fullPath, name));
}
editables.Clear();
editables.AddRange(normalized
.OrderBy(static e => e.PackageName ?? string.Empty, StringComparer.OrdinalIgnoreCase)
.ThenBy(static e => e.Path, StringComparer.Ordinal));
}
private void AddProjectFiles(PythonVirtualFileSystem.Builder builder)
{
AddProjectFile(builder, "pyproject.toml", PythonFileSource.ProjectConfig);
AddProjectFile(builder, "setup.py", PythonFileSource.ProjectConfig);
AddProjectFile(builder, "setup.cfg", PythonFileSource.ProjectConfig);
AddProjectFile(builder, "runtime.txt", PythonFileSource.ProjectConfig);
AddProjectFile(builder, "Dockerfile", PythonFileSource.ProjectConfig);
AddProjectFile(builder, "tox.ini", PythonFileSource.ProjectConfig);
AddProjectFile(builder, "requirements.txt", PythonFileSource.LockFile);
AddProjectFile(builder, "requirements-dev.txt", PythonFileSource.LockFile);
AddProjectFile(builder, "requirements.prod.txt", PythonFileSource.LockFile);
AddProjectFile(builder, "Pipfile.lock", PythonFileSource.LockFile);
AddProjectFile(builder, "poetry.lock", PythonFileSource.LockFile);
}
private void AddProjectFile(PythonVirtualFileSystem.Builder builder, string relativePath, PythonFileSource source)
{
var absolutePath = Path.Combine(_rootPath, relativePath);
if (!File.Exists(absolutePath))
{
return;
}
builder.AddFile(relativePath, absolutePath, source);
}
[GeneratedRegex(@"requires-python\s*=\s*[""']?(?<version>[^""'\n]+)", RegexOptions.IgnoreCase)]
private static partial Regex RequiresPythonPattern();

View File

@@ -54,9 +54,9 @@ internal sealed partial class PythonVirtualFileSystem
public int FileCount => _files.Count;
/// <summary>
/// Gets all files in the virtual filesystem.
/// Gets all files in the virtual filesystem, ordered deterministically by virtual path.
/// </summary>
public IEnumerable<PythonVirtualFile> Files => _files.Values;
public IEnumerable<PythonVirtualFile> Files => Paths.Select(path => _files[path]);
/// <summary>
/// Gets all virtual paths in sorted order.
@@ -230,17 +230,17 @@ internal sealed partial class PythonVirtualFileSystem
var normalized = NormalizePath(virtualPath);
var prefix = normalized.Length == 0 ? string.Empty : normalized + "/";
foreach (var kvp in _files)
foreach (var key in _files.Keys.OrderBy(static path => path, StringComparer.Ordinal))
{
if (!kvp.Key.StartsWith(prefix, StringComparison.Ordinal))
if (!key.StartsWith(prefix, StringComparison.Ordinal))
{
continue;
}
var relative = kvp.Key[prefix.Length..];
var relative = key[prefix.Length..];
if (regex.IsMatch(relative))
{
yield return kvp.Value;
yield return _files[key];
}
}
}
@@ -291,11 +291,32 @@ internal sealed partial class PythonVirtualFileSystem
{
private readonly Dictionary<string, PythonVirtualFile> _files = new(StringComparer.Ordinal);
private readonly HashSet<string> _processedArchives = new(StringComparer.OrdinalIgnoreCase);
private readonly Dictionary<string, int> _archiveAliasCounters = new(StringComparer.OrdinalIgnoreCase);
private readonly HashSet<string> _sourceTreeRoots = new(StringComparer.Ordinal);
private readonly HashSet<string> _sitePackagesPaths = new(StringComparer.Ordinal);
private readonly HashSet<string> _editablePaths = new(StringComparer.Ordinal);
private readonly HashSet<string> _zipArchivePaths = new(StringComparer.Ordinal);
/// <summary>
/// Adds files from an arbitrary directory at a specific virtual prefix.
/// </summary>
public Builder AddDirectory(
string directoryPath,
string virtualPrefix,
PythonFileSource source,
string? layerDigest = null,
bool includeHiddenFiles = false)
{
if (!Directory.Exists(directoryPath))
{
return this;
}
var basePath = Path.GetFullPath(directoryPath);
AddDirectoryRecursive(basePath, NormalizePath(virtualPrefix), source, layerDigest, includeHiddenFiles);
return this;
}
/// <summary>
/// Adds files from a site-packages directory.
/// </summary>
@@ -308,7 +329,7 @@ internal sealed partial class PythonVirtualFileSystem
var basePath = Path.GetFullPath(sitePackagesPath);
_sitePackagesPaths.Add(string.Empty); // Root of the VFS
AddDirectoryRecursive(basePath, string.Empty, PythonFileSource.SitePackages, layerDigest);
AddDirectoryRecursive(basePath, string.Empty, PythonFileSource.SitePackages, layerDigest, includeHiddenFiles: false);
return this;
}
@@ -322,12 +343,13 @@ internal sealed partial class PythonVirtualFileSystem
return this;
}
_zipArchivePaths.Add(wheelPath);
var virtualRoot = CreateArchiveVirtualRoot("wheel", wheelPath);
_zipArchivePaths.Add(virtualRoot);
try
{
using var archive = ZipFile.OpenRead(wheelPath);
AddArchiveEntries(archive, wheelPath, PythonFileSource.Wheel);
AddArchiveEntries(archive, wheelPath, virtualRoot, PythonFileSource.Wheel);
}
catch (InvalidDataException)
{
@@ -351,7 +373,8 @@ internal sealed partial class PythonVirtualFileSystem
return this;
}
_zipArchivePaths.Add(zipappPath);
var virtualRoot = CreateArchiveVirtualRoot("zipapp", zipappPath);
_zipArchivePaths.Add(virtualRoot);
try
{
@@ -366,7 +389,7 @@ internal sealed partial class PythonVirtualFileSystem
stream.Position = offset;
using var archive = new ZipArchive(stream, ZipArchiveMode.Read);
AddArchiveEntries(archive, zipappPath, PythonFileSource.Zipapp);
AddArchiveEntries(archive, zipappPath, virtualRoot, PythonFileSource.Zipapp);
}
catch (InvalidDataException)
{
@@ -390,14 +413,15 @@ internal sealed partial class PythonVirtualFileSystem
return this;
}
_zipArchivePaths.Add(sdistPath);
var virtualRoot = CreateArchiveVirtualRoot("sdist", sdistPath);
_zipArchivePaths.Add(virtualRoot);
try
{
if (sdistPath.EndsWith(".zip", StringComparison.OrdinalIgnoreCase))
{
using var archive = ZipFile.OpenRead(sdistPath);
AddArchiveEntries(archive, sdistPath, PythonFileSource.Sdist);
AddArchiveEntries(archive, sdistPath, virtualRoot, PythonFileSource.Sdist);
}
// Note: .tar.gz support would require TarReader from System.Formats.Tar
// For now, we handle the common .zip case
@@ -427,7 +451,7 @@ internal sealed partial class PythonVirtualFileSystem
var basePath = Path.GetFullPath(editablePath);
var prefix = string.IsNullOrEmpty(packageName) ? string.Empty : packageName + "/";
_editablePaths.Add(prefix.TrimEnd('/'));
AddDirectoryRecursive(basePath, prefix.TrimEnd('/'), PythonFileSource.Editable, layerDigest: null);
AddDirectoryRecursive(basePath, prefix.TrimEnd('/'), PythonFileSource.Editable, layerDigest: null, includeHiddenFiles: false);
return this;
}
@@ -443,7 +467,7 @@ internal sealed partial class PythonVirtualFileSystem
var basePath = Path.GetFullPath(sourcePath);
_sourceTreeRoots.Add(string.Empty); // Root of the VFS
AddDirectoryRecursive(basePath, string.Empty, PythonFileSource.SourceTree, layerDigest: null);
AddDirectoryRecursive(basePath, string.Empty, PythonFileSource.SourceTree, layerDigest: null, includeHiddenFiles: false);
return this;
}
@@ -522,7 +546,8 @@ internal sealed partial class PythonVirtualFileSystem
string basePath,
string virtualPrefix,
PythonFileSource source,
string? layerDigest)
string? layerDigest,
bool includeHiddenFiles)
{
try
{
@@ -537,7 +562,7 @@ internal sealed partial class PythonVirtualFileSystem
// Skip __pycache__ and hidden files
if (normalizedRelative.Contains("/__pycache__/", StringComparison.Ordinal) ||
normalizedRelative.StartsWith("__pycache__/", StringComparison.Ordinal) ||
Path.GetFileName(file).StartsWith('.'))
(!includeHiddenFiles && Path.GetFileName(file).StartsWith('.')))
{
continue;
}
@@ -566,7 +591,7 @@ internal sealed partial class PythonVirtualFileSystem
}
}
private void AddArchiveEntries(ZipArchive archive, string archivePath, PythonFileSource source)
private void AddArchiveEntries(ZipArchive archive, string archivePath, string virtualRoot, PythonFileSource source)
{
foreach (var entry in archive.Entries)
{
@@ -576,7 +601,8 @@ internal sealed partial class PythonVirtualFileSystem
continue;
}
var virtualPath = entry.FullName.Replace('\\', '/');
var entryPath = entry.FullName.Replace('\\', '/').TrimStart('/');
var virtualPath = $"{virtualRoot}/{entryPath}";
// Skip __pycache__ in archives too
if (virtualPath.Contains("/__pycache__/", StringComparison.Ordinal) ||
@@ -587,7 +613,7 @@ internal sealed partial class PythonVirtualFileSystem
AddFile(
virtualPath,
entry.FullName,
entryPath,
source,
layerDigest: null,
archivePath: archivePath,
@@ -595,6 +621,22 @@ internal sealed partial class PythonVirtualFileSystem
}
}
private string CreateArchiveVirtualRoot(string kind, string archivePath)
{
var baseName = Path.GetFileName(archivePath);
var key = $"{kind}/{baseName}";
if (!_archiveAliasCounters.TryGetValue(key, out var count))
{
_archiveAliasCounters[key] = 1;
return $"archives/{kind}/{baseName}";
}
count++;
_archiveAliasCounters[key] = count;
return $"archives/{kind}/{baseName}~{count}";
}
private static long FindZipOffset(Stream stream)
{
// ZIP files start with PK\x03\x04 signature

View File

@@ -1,18 +1,13 @@
using System.Linq;
using System.Text.Json;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python;
public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
{
private static readonly EnumerationOptions Enumeration = new()
{
RecurseSubdirectories = true,
IgnoreInaccessible = true,
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint
};
public string Id => "python";
public string DisplayName => "Python Analyzer";
@@ -43,73 +38,33 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
// Analyze zipapps in workspace and container layers
var zipappAnalysis = PythonZipappAdapter.AnalyzeAll(context.RootPath);
// Collect dist-info directories from both root and container layers
var distInfoDirectories = CollectDistInfoDirectories(context.RootPath);
var projectAnalysis = await PythonProjectAnalysis.AnalyzeAsync(context.RootPath, cancellationToken).ConfigureAwait(false);
var vfs = projectAnalysis.VirtualFileSystem;
foreach (var distInfoPath in distInfoDirectories)
var packageDiscovery = new PythonPackageDiscovery();
var discoveryResult = await packageDiscovery.DiscoverAsync(vfs, cancellationToken).ConfigureAwait(false);
foreach (var package in discoveryResult.Packages
.Where(static p => !string.IsNullOrWhiteSpace(p.Version))
.OrderBy(static p => p.NormalizedName, StringComparer.Ordinal)
.ThenBy(static p => p.Version, StringComparer.Ordinal))
{
cancellationToken.ThrowIfCancellationRequested();
PythonDistribution? distribution;
try
{
distribution = await PythonDistributionLoader.LoadAsync(context, distInfoPath, cancellationToken).ConfigureAwait(false);
}
catch (IOException)
{
continue;
}
catch (JsonException)
{
continue;
}
catch (UnauthorizedAccessException)
{
continue;
}
if (distribution is null)
{
continue;
}
var metadata = distribution.SortedMetadata.ToList();
if (lockData.TryGet(distribution.Name, distribution.Version, out var lockEntry))
{
matchedLocks.Add(lockEntry!.DeclarationKey);
AppendLockMetadata(metadata, lockEntry);
}
else if (hasLockEntries)
{
metadata.Add(new KeyValuePair<string, string?>("lockMissing", "true"));
}
// Append runtime information
AppendRuntimeMetadata(metadata, runtimeInfo);
// Append environment variables (PYTHONPATH/PYTHONHOME)
AppendEnvironmentMetadata(metadata, environment);
// Append startup hooks warnings
AppendStartupHooksMetadata(metadata, startupHooks);
// Append zipapp analysis
AppendZipappMetadata(metadata, zipappAnalysis);
// Collect evidence including startup hooks
var evidence = distribution.SortedEvidence.ToList();
evidence.AddRange(startupHooks.ToEvidence(context));
writer.AddFromPurl(
analyzerId: "python",
purl: distribution.Purl,
name: distribution.Name,
version: distribution.Version,
type: "pypi",
metadata: metadata,
evidence: evidence,
usedByEntrypoint: distribution.UsedByEntrypoint);
await EmitDiscoveredPackageAsync(
context,
writer,
vfs,
package,
lockData,
matchedLocks,
hasLockEntries,
runtimeInfo,
environment,
startupHooks,
zipappAnalysis,
cancellationToken)
.ConfigureAwait(false);
}
if (lockData.Entries.Count > 0)
@@ -121,18 +76,18 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
continue;
}
var normalizedName = PythonPathHelper.NormalizePackageName(entry.Name);
var declaredMetadata = new List<KeyValuePair<string, string?>>
{
new("declaredOnly", "true"),
new("pkg.kind", "DeclaredOnly"),
new("pkg.confidence", PythonPackageConfidence.Medium.ToString()),
new("pkg.location", entry.Locator),
new("lockSource", entry.Source),
new("lockLocator", entry.Locator)
};
AppendCommonLockFields(declaredMetadata, entry);
var version = string.IsNullOrWhiteSpace(entry.Version) ? "editable" : entry.Version!;
var purl = $"pkg:pypi/{PythonPathHelper.NormalizePackageName(entry.Name)}@{version}";
var evidence = new[]
{
new LanguageComponentEvidence(
@@ -143,6 +98,49 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
Sha256: null)
};
if (string.IsNullOrWhiteSpace(entry.Version))
{
var editableSpec = NormalizeEditableSpec(context, entry.EditablePath, out var specRedacted);
declaredMetadata.Add(new KeyValuePair<string, string?>("declared.source", entry.Source));
declaredMetadata.Add(new KeyValuePair<string, string?>("declared.locator", entry.Locator));
declaredMetadata.Add(new KeyValuePair<string, string?>("declared.versionSpec", editableSpec));
declaredMetadata.Add(new KeyValuePair<string, string?>("declared.scope", "unknown"));
declaredMetadata.Add(new KeyValuePair<string, string?>("declared.sourceType", "editable"));
if (!string.IsNullOrWhiteSpace(editableSpec))
{
declaredMetadata.Add(new KeyValuePair<string, string?>("lockEditablePath", editableSpec));
}
if (specRedacted)
{
declaredMetadata.Add(new KeyValuePair<string, string?>("lockEditablePathRedacted", "true"));
}
var componentKey = LanguageExplicitKey.Create("python", "pypi", normalizedName, editableSpec, entry.Locator);
writer.AddFromExplicitKey(
analyzerId: "python",
componentKey: componentKey,
purl: null,
name: entry.Name,
version: null,
type: "pypi",
metadata: declaredMetadata,
evidence: evidence,
usedByEntrypoint: false);
continue;
}
AppendCommonLockFields(declaredMetadata, entry);
var version = entry.Version!.Trim();
if (version.Length == 0)
{
continue;
}
var purl = $"pkg:pypi/{normalizedName}@{version}";
writer.AddFromPurl(
analyzerId: "python",
purl: purl,
@@ -156,6 +154,284 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
}
}
private static string NormalizeEditableSpec(LanguageAnalyzerContext context, string? editablePath, out bool redacted)
{
redacted = false;
if (string.IsNullOrWhiteSpace(editablePath))
{
return string.Empty;
}
var trimmed = editablePath.Trim().Trim('"', '\'');
if (trimmed.Length == 0)
{
return string.Empty;
}
var normalized = trimmed.Replace('\\', '/');
var hasDrivePrefix = trimmed.Length >= 2 && char.IsLetter(trimmed[0]) && trimmed[1] == ':';
var isAbsolute = Path.IsPathRooted(trimmed) ||
hasDrivePrefix ||
normalized.StartsWith("/", StringComparison.Ordinal) ||
normalized.StartsWith("//", StringComparison.Ordinal);
if (!isAbsolute)
{
return normalized;
}
try
{
var relative = context.GetRelativePath(trimmed);
if (!string.IsNullOrWhiteSpace(relative) &&
relative != "." &&
!relative.StartsWith("..", StringComparison.Ordinal) &&
relative.IndexOf(':') < 0)
{
return relative.Replace('\\', '/');
}
}
catch
{
}
redacted = true;
normalized = normalized.TrimEnd('/');
var lastSlash = normalized.LastIndexOf('/');
var fileName = lastSlash >= 0 && lastSlash < normalized.Length - 1 ? normalized[(lastSlash + 1)..] : normalized;
return string.IsNullOrWhiteSpace(fileName) ? "editable" : fileName;
}
private static async Task EmitDiscoveredPackageAsync(
LanguageAnalyzerContext context,
LanguageComponentWriter writer,
PythonVirtualFileSystem vfs,
PythonPackageInfo package,
PythonLockData lockData,
ISet<string> matchedLocks,
bool hasLockEntries,
PythonRuntimeInfo? runtimeInfo,
PythonEnvironment environment,
PythonStartupHooks startupHooks,
PythonZipappAnalysis zipappAnalysis,
CancellationToken cancellationToken)
{
var version = package.Version!.Trim();
if (version.Length == 0)
{
return;
}
var metadata = new List<KeyValuePair<string, string?>>();
metadata.AddRange(BuildPackageMetadata(context, vfs, package));
if (lockData.TryGet(package.Name, version, out var lockEntry))
{
matchedLocks.Add(lockEntry!.DeclarationKey);
AppendLockMetadata(metadata, lockEntry);
}
else if (hasLockEntries)
{
metadata.Add(new KeyValuePair<string, string?>("lockMissing", "true"));
}
var metadataDirectory = TryResolvePhysicalMetadataDirectory(vfs, package, out var metadataFile);
if (metadataDirectory is not null)
{
PythonDistribution? distribution;
try
{
distribution = await PythonDistributionLoader.LoadAsync(context, metadataDirectory, cancellationToken).ConfigureAwait(false);
}
catch (IOException)
{
return;
}
catch (JsonException)
{
return;
}
catch (UnauthorizedAccessException)
{
return;
}
if (distribution is null)
{
return;
}
var fullMetadata = distribution.SortedMetadata.ToList();
fullMetadata.AddRange(metadata);
AppendRuntimeMetadata(fullMetadata, runtimeInfo);
AppendEnvironmentMetadata(fullMetadata, environment);
AppendStartupHooksMetadata(fullMetadata, startupHooks);
AppendZipappMetadata(fullMetadata, zipappAnalysis);
var evidence = distribution.SortedEvidence.ToList();
evidence.AddRange(startupHooks.ToEvidence(context));
writer.AddFromPurl(
analyzerId: "python",
purl: distribution.Purl,
name: distribution.Name,
version: distribution.Version,
type: "pypi",
metadata: fullMetadata,
evidence: evidence,
usedByEntrypoint: distribution.UsedByEntrypoint);
return;
}
if (metadataFile is not null && metadataFile.IsFromArchive)
{
var archiveDistribution = await PythonDistributionVfsLoader
.LoadAsync(context, vfs, package, cancellationToken)
.ConfigureAwait(false);
if (archiveDistribution is not null)
{
var fullMetadata = archiveDistribution.SortedMetadata.ToList();
fullMetadata.AddRange(metadata);
writer.AddFromPurl(
analyzerId: "python",
purl: archiveDistribution.Purl,
name: archiveDistribution.Name,
version: archiveDistribution.Version,
type: "pypi",
metadata: fullMetadata,
evidence: archiveDistribution.SortedEvidence,
usedByEntrypoint: archiveDistribution.UsedByEntrypoint);
return;
}
}
var purl = $"pkg:pypi/{PythonPathHelper.NormalizePackageName(package.Name)}@{version}";
var evidenceFallback = BuildPackageEvidence(context, vfs, package, metadataFile);
writer.AddFromPurl(
analyzerId: "python",
purl: purl,
name: package.Name,
version: version,
type: "pypi",
metadata: metadata,
evidence: evidenceFallback,
usedByEntrypoint: false);
}
private static string? TryResolvePhysicalMetadataDirectory(
PythonVirtualFileSystem vfs,
PythonPackageInfo package,
out PythonVirtualFile? metadataFile)
{
metadataFile = null;
if (string.IsNullOrWhiteSpace(package.MetadataPath))
{
return null;
}
var metadataName = package.Kind == PythonPackageKind.Egg ? "PKG-INFO" : "METADATA";
var virtualPath = $"{package.MetadataPath}/{metadataName}";
metadataFile = vfs.GetFile(virtualPath);
if (metadataFile is null || metadataFile.IsFromArchive)
{
return null;
}
return Path.GetDirectoryName(metadataFile.AbsolutePath);
}
private static IEnumerable<KeyValuePair<string, string?>> BuildPackageMetadata(
LanguageAnalyzerContext context,
PythonVirtualFileSystem vfs,
PythonPackageInfo package)
{
var location = package.Location;
if (string.IsNullOrWhiteSpace(location) && !string.IsNullOrWhiteSpace(package.MetadataPath))
{
var metadataName = package.Kind == PythonPackageKind.Egg ? "PKG-INFO" : "METADATA";
var file = vfs.GetFile($"{package.MetadataPath}/{metadataName}");
if (file is not null)
{
if (file.IsFromArchive && file.ArchivePath is not null)
{
location = PythonPathHelper.NormalizeRelative(context, file.ArchivePath);
}
else
{
location = Path.GetDirectoryName(file.AbsolutePath) is { Length: > 0 } metadataDirectory
? PythonPathHelper.NormalizeRelative(context, metadataDirectory)
: PythonPathHelper.NormalizeRelative(context, file.AbsolutePath);
}
}
}
yield return new KeyValuePair<string, string?>("pkg.kind", package.Kind.ToString());
yield return new KeyValuePair<string, string?>("pkg.confidence", package.Confidence.ToString());
yield return new KeyValuePair<string, string?>("pkg.location", string.IsNullOrWhiteSpace(location) ? "." : location.Replace('\\', '/'));
}
private static IReadOnlyCollection<LanguageComponentEvidence> BuildPackageEvidence(
LanguageAnalyzerContext context,
PythonVirtualFileSystem vfs,
PythonPackageInfo package,
PythonVirtualFile? metadataFile)
{
if (metadataFile is not null)
{
var locator = metadataFile.IsFromArchive && metadataFile.ArchivePath is not null
? PythonPathHelper.NormalizeRelative(context, metadataFile.ArchivePath)
: PythonPathHelper.NormalizeRelative(context, metadataFile.AbsolutePath);
var value = metadataFile.IsFromArchive ? metadataFile.AbsolutePath : null;
return new[]
{
new LanguageComponentEvidence(
LanguageEvidenceKind.File,
package.Kind == PythonPackageKind.Egg ? "PKG-INFO" : "METADATA",
locator,
Value: value,
Sha256: null)
};
}
if (!string.IsNullOrWhiteSpace(package.MetadataPath))
{
var metadataName = package.Kind == PythonPackageKind.Egg ? "PKG-INFO" : "METADATA";
var file = vfs.GetFile($"{package.MetadataPath}/{metadataName}");
if (file is not null)
{
var locator = file.IsFromArchive && file.ArchivePath is not null
? PythonPathHelper.NormalizeRelative(context, file.ArchivePath)
: PythonPathHelper.NormalizeRelative(context, file.AbsolutePath);
var value = file.IsFromArchive ? file.AbsolutePath : null;
return new[]
{
new LanguageComponentEvidence(
LanguageEvidenceKind.File,
metadataName,
locator,
Value: value,
Sha256: null)
};
}
}
return Array.Empty<LanguageComponentEvidence>();
}
private static void AppendLockMetadata(List<KeyValuePair<string, string?>> metadata, PythonLockEntry entry)
{
metadata.Add(new KeyValuePair<string, string?>("lockSource", entry.Source));
@@ -286,41 +562,4 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
}
}
}
private static IReadOnlyCollection<string> CollectDistInfoDirectories(string rootPath)
{
var directories = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
AddMetadataDirectories(rootPath, "*.dist-info", directories);
AddMetadataDirectories(rootPath, "*.egg-info", directories);
// Also collect from OCI container layers
foreach (var dir in PythonContainerAdapter.DiscoverDistInfoDirectories(rootPath))
{
directories.Add(dir);
}
return directories
.OrderBy(static path => path, StringComparer.Ordinal)
.ToArray();
static void AddMetadataDirectories(string basePath, string pattern, ISet<string> accumulator)
{
try
{
foreach (var dir in Directory.EnumerateDirectories(basePath, pattern, Enumeration))
{
accumulator.Add(dir);
}
}
catch (IOException)
{
// Ignore enumeration errors
}
catch (UnauthorizedAccessException)
{
// Ignore access errors
}
}
}
}

View File

@@ -0,0 +1,14 @@
# Python Analyzer Tasks
## Python Detection Gaps (Sprint 0405)
| Task ID | Status | Notes | Updated (UTC) |
| --- | --- | --- | --- |
| SCAN-PY-405-001 | DONE | Wire layout-aware VFS/discovery into `PythonLanguageAnalyzer`. | 2025-12-13 |
| SCAN-PY-405-002 | BLOCKED | Preserve dist-info/egg-info evidence; emit explicit-key components where needed (incl. editable lock entries; no `@editable` PURLs). | 2025-12-13 |
| SCAN-PY-405-003 | BLOCKED | Blocked on Action 2: lock/requirements precedence + supported formats scope. | 2025-12-13 |
| SCAN-PY-405-004 | BLOCKED | Blocked on Action 3: container overlay contract (whiteouts + ordering semantics). | 2025-12-13 |
| SCAN-PY-405-005 | BLOCKED | Blocked on Action 4: vendored deps representation contract (identity/scope vs metadata-only). | 2025-12-13 |
| SCAN-PY-405-006 | BLOCKED | Blocked on Interlock 4: "used-by-entrypoint" semantics (avoid turning heuristics into truth). | 2025-12-13 |
| SCAN-PY-405-007 | BLOCKED | Blocked on Actions 2-4: fixtures for includes/editables, overlay/whiteouts, vendoring. | 2025-12-13 |
| SCAN-PY-405-008 | DONE | Docs + deterministic offline bench for Python analyzer contract. | 2025-12-13 |