up
Some checks failed
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Policy Simulation / policy-simulate (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Signals Reachability Scoring & Events / reachability-smoke (push) Has been cancelled
Signals Reachability Scoring & Events / sign-and-upload (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
Some checks failed
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Policy Simulation / policy-simulate (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Signals Reachability Scoring & Events / reachability-smoke (push) Has been cancelled
Signals Reachability Scoring & Events / sign-and-upload (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
This commit is contained in:
@@ -87,25 +87,16 @@ internal sealed partial class PythonEntrypointDiscovery
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var absolutePath = file.AbsolutePath;
|
||||
if (file.IsFromArchive)
|
||||
try
|
||||
{
|
||||
continue; // Can't read from archive directly yet
|
||||
}
|
||||
|
||||
var fullPath = Path.Combine(_rootPath, absolutePath);
|
||||
if (!File.Exists(fullPath))
|
||||
{
|
||||
fullPath = absolutePath;
|
||||
if (!File.Exists(fullPath))
|
||||
using var stream = await _vfs.OpenReadAsync(file.VirtualPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(fullPath, cancellationToken).ConfigureAwait(false);
|
||||
using var reader = new StreamReader(stream);
|
||||
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
ParseEntryPointsTxt(content, file.VirtualPath);
|
||||
}
|
||||
catch (IOException)
|
||||
@@ -225,7 +216,7 @@ internal sealed partial class PythonEntrypointDiscovery
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
if (file.VirtualPath == "__main__.py")
|
||||
if (string.Equals(file.AbsolutePath, "__main__.py", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
_entrypoints.Add(new PythonEntrypoint(
|
||||
Name: "__main__",
|
||||
|
||||
@@ -48,11 +48,15 @@ internal sealed partial class PipEditableAdapter : IPythonPackagingAdapter
|
||||
continue;
|
||||
}
|
||||
|
||||
// Look for .egg-info in the target
|
||||
var (version, metadata, topLevel) = await ReadEggInfoAsync(vfs, targetPath, packageName, cancellationToken).ConfigureAwait(false);
|
||||
// The editable target path in .egg-link can be absolute and host-specific.
|
||||
// Prefer the VFS-mounted editable tree under the packageName prefix.
|
||||
var editableRoot = packageName;
|
||||
|
||||
// Look for .egg-info in the editable root
|
||||
var (version, metadata, topLevel) = await ReadEggInfoAsync(vfs, editableRoot, packageName, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Also look for pyproject.toml for additional metadata
|
||||
var pyprojectInfo = await ReadPyprojectAsync(vfs, targetPath, cancellationToken).ConfigureAwait(false);
|
||||
var pyprojectInfo = await ReadPyprojectAsync(vfs, editableRoot, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (pyprojectInfo.Name is not null)
|
||||
{
|
||||
@@ -79,7 +83,7 @@ internal sealed partial class PipEditableAdapter : IPythonPackagingAdapter
|
||||
Extras: ImmutableArray<string>.Empty,
|
||||
RecordFiles: ImmutableArray<PythonRecordEntry>.Empty,
|
||||
InstallerTool: "pip",
|
||||
EditableTarget: targetPath,
|
||||
EditableTarget: editableRoot,
|
||||
IsDirectDependency: true, // Editable installs are always direct
|
||||
Confidence: PythonPackageConfidence.High);
|
||||
}
|
||||
@@ -110,7 +114,7 @@ internal sealed partial class PipEditableAdapter : IPythonPackagingAdapter
|
||||
|
||||
private static async Task<(string? Version, Dictionary<string, string> Metadata, ImmutableArray<string> TopLevel)> ReadEggInfoAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string targetPath,
|
||||
string editableRoot,
|
||||
string packageName,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
@@ -119,8 +123,7 @@ internal sealed partial class PipEditableAdapter : IPythonPackagingAdapter
|
||||
var topLevel = ImmutableArray<string>.Empty;
|
||||
|
||||
// Look for .egg-info directory
|
||||
var eggInfoPattern = $"{packageName}.egg-info";
|
||||
var eggInfoFiles = vfs.EnumerateFiles(targetPath, "*.egg-info/PKG-INFO").ToList();
|
||||
var eggInfoFiles = vfs.EnumerateFiles(editableRoot, "*.egg-info/PKG-INFO").ToList();
|
||||
|
||||
PythonVirtualFile? pkgInfoFile = null;
|
||||
foreach (var file in eggInfoFiles)
|
||||
@@ -204,10 +207,10 @@ internal sealed partial class PipEditableAdapter : IPythonPackagingAdapter
|
||||
|
||||
private static async Task<(string? Name, string? Version)> ReadPyprojectAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string targetPath,
|
||||
string editableRoot,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var pyprojectPath = $"{targetPath}/pyproject.toml";
|
||||
var pyprojectPath = $"{editableRoot}/pyproject.toml";
|
||||
|
||||
try
|
||||
{
|
||||
|
||||
@@ -37,12 +37,16 @@ internal sealed class PythonPackageDiscovery
|
||||
var errors = new List<PythonPackageDiscoveryError>();
|
||||
var searchPaths = new List<string>();
|
||||
|
||||
// Gather all search paths from VFS
|
||||
searchPaths.AddRange(vfs.SitePackagesPaths);
|
||||
searchPaths.AddRange(vfs.SourceTreeRoots);
|
||||
searchPaths.AddRange(vfs.EditablePaths);
|
||||
// Gather all search paths from VFS (ordered by intended precedence).
|
||||
// Later paths overwrite earlier ones on equal confidence.
|
||||
searchPaths.Add(string.Empty); // workspace root (pyproject/locks/etc.)
|
||||
searchPaths.AddRange(vfs.SourceTreeRoots.OrderBy(static path => path, StringComparer.Ordinal));
|
||||
searchPaths.AddRange(vfs.EditablePaths.OrderBy(static path => path, StringComparer.Ordinal));
|
||||
searchPaths.AddRange(vfs.SitePackagesPaths.OrderBy(static path => path, StringComparer.Ordinal));
|
||||
searchPaths.AddRange(vfs.ZipArchivePaths.OrderBy(static path => path, StringComparer.Ordinal));
|
||||
|
||||
foreach (var path in searchPaths.Distinct())
|
||||
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var path in searchPaths.Where(p => seen.Add(p)))
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
|
||||
@@ -0,0 +1,937 @@
|
||||
using System.Buffers;
|
||||
using System.Globalization;
|
||||
using System.IO.Compression;
|
||||
using System.Linq;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
using Packaging = StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal;
|
||||
|
||||
internal static class PythonDistributionVfsLoader
|
||||
{
|
||||
public static async Task<PythonDistribution?> LoadAsync(
|
||||
LanguageAnalyzerContext context,
|
||||
PythonVirtualFileSystem vfs,
|
||||
Packaging.PythonPackageInfo package,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(context);
|
||||
ArgumentNullException.ThrowIfNull(vfs);
|
||||
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
if (string.IsNullOrWhiteSpace(package.MetadataPath))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var isEggInfo = package.Kind == Packaging.PythonPackageKind.Egg;
|
||||
var metadataName = isEggInfo ? "PKG-INFO" : "METADATA";
|
||||
var recordName = isEggInfo ? "installed-files.txt" : "RECORD";
|
||||
|
||||
var metadataVirtualPath = $"{package.MetadataPath}/{metadataName}";
|
||||
if (!vfs.FileExists(metadataVirtualPath))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var metadataDocument = await PythonMetadataDocumentVfs.LoadAsync(vfs, metadataVirtualPath, cancellationToken).ConfigureAwait(false);
|
||||
var name = (metadataDocument.GetFirst("Name") ?? package.Name)?.Trim();
|
||||
var version = (metadataDocument.GetFirst("Version") ?? package.Version)?.Trim();
|
||||
|
||||
if (string.IsNullOrWhiteSpace(name) || string.IsNullOrWhiteSpace(version))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var normalizedName = PythonPathHelper.NormalizePackageName(name);
|
||||
var purl = $"pkg:pypi/{normalizedName}@{version}";
|
||||
|
||||
var metadataEntries = new List<KeyValuePair<string, string?>>();
|
||||
var evidenceEntries = new List<LanguageComponentEvidence>();
|
||||
|
||||
AppendMetadata(metadataEntries, "distInfoPath", NormalizeVfsPath(package.MetadataPath));
|
||||
AppendMetadata(metadataEntries, "name", name);
|
||||
AppendMetadata(metadataEntries, "version", version);
|
||||
AppendMetadata(metadataEntries, "normalizedName", normalizedName);
|
||||
AppendMetadata(metadataEntries, "summary", metadataDocument.GetFirst("Summary"));
|
||||
AppendMetadata(metadataEntries, "license", metadataDocument.GetFirst("License"));
|
||||
AppendMetadata(metadataEntries, "licenseExpression", metadataDocument.GetFirst("License-Expression"));
|
||||
AppendMetadata(metadataEntries, "homePage", metadataDocument.GetFirst("Home-page"));
|
||||
AppendMetadata(metadataEntries, "author", metadataDocument.GetFirst("Author"));
|
||||
AppendMetadata(metadataEntries, "authorEmail", metadataDocument.GetFirst("Author-email"));
|
||||
AppendMetadata(metadataEntries, "projectUrl", metadataDocument.GetFirst("Project-URL"));
|
||||
AppendMetadata(metadataEntries, "requiresPython", metadataDocument.GetFirst("Requires-Python"));
|
||||
|
||||
AppendClassifiers(metadataEntries, metadataDocument);
|
||||
|
||||
var requiresDist = metadataDocument.GetAll("Requires-Dist");
|
||||
if (requiresDist.Count > 0)
|
||||
{
|
||||
AppendMetadata(metadataEntries, "requiresDist", string.Join(';', requiresDist));
|
||||
}
|
||||
|
||||
await AppendEntryPointsAsync(vfs, metadataEntries, $"{package.MetadataPath}/entry_points.txt", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (!isEggInfo)
|
||||
{
|
||||
await AppendWheelMetadataAsync(vfs, metadataEntries, $"{package.MetadataPath}/WHEEL", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
|
||||
var installer = await ReadSingleLineAsync(vfs, $"{package.MetadataPath}/INSTALLER", cancellationToken).ConfigureAwait(false);
|
||||
if (!string.IsNullOrWhiteSpace(installer))
|
||||
{
|
||||
AppendMetadata(metadataEntries, "installer", installer);
|
||||
}
|
||||
|
||||
await AppendDirectUrlAsync(context, vfs, metadataEntries, evidenceEntries, $"{package.MetadataPath}/direct_url.json", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
AddOptionalFileEvidence(context, vfs, evidenceEntries, metadataVirtualPath, metadataName);
|
||||
AddOptionalFileEvidence(context, vfs, evidenceEntries, $"{package.MetadataPath}/WHEEL", "WHEEL");
|
||||
AddOptionalFileEvidence(context, vfs, evidenceEntries, $"{package.MetadataPath}/entry_points.txt", "entry_points.txt");
|
||||
AddOptionalFileEvidence(context, vfs, evidenceEntries, $"{package.MetadataPath}/INSTALLER", "INSTALLER");
|
||||
AddOptionalFileEvidence(context, vfs, evidenceEntries, $"{package.MetadataPath}/{recordName}", recordName);
|
||||
AddOptionalFileEvidence(context, vfs, evidenceEntries, $"{package.MetadataPath}/direct_url.json", "direct_url.json");
|
||||
|
||||
var recordVirtualPath = $"{package.MetadataPath}/{recordName}";
|
||||
var recordEntries = await ReadRecordAsync(vfs, recordVirtualPath, cancellationToken).ConfigureAwait(false);
|
||||
var recordVerification = await VerifyRecordAsync(vfs, package.MetadataPath, recordEntries, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
metadataEntries.Add(new KeyValuePair<string, string?>("record.totalEntries", recordVerification.TotalEntries.ToString(CultureInfo.InvariantCulture)));
|
||||
metadataEntries.Add(new KeyValuePair<string, string?>("record.hashedEntries", recordVerification.HashedEntries.ToString(CultureInfo.InvariantCulture)));
|
||||
metadataEntries.Add(new KeyValuePair<string, string?>("record.missingFiles", recordVerification.MissingFiles.ToString(CultureInfo.InvariantCulture)));
|
||||
metadataEntries.Add(new KeyValuePair<string, string?>("record.hashMismatches", recordVerification.HashMismatches.ToString(CultureInfo.InvariantCulture)));
|
||||
metadataEntries.Add(new KeyValuePair<string, string?>("record.ioErrors", recordVerification.IoErrors.ToString(CultureInfo.InvariantCulture)));
|
||||
|
||||
if (recordVerification.UnsupportedAlgorithms.Count > 0)
|
||||
{
|
||||
AppendMetadata(
|
||||
metadataEntries,
|
||||
"record.unsupportedAlgorithms",
|
||||
string.Join(';', recordVerification.UnsupportedAlgorithms.OrderBy(static a => a, StringComparer.OrdinalIgnoreCase)));
|
||||
}
|
||||
|
||||
evidenceEntries.AddRange(recordVerification.Evidence);
|
||||
|
||||
AppendMetadata(metadataEntries, "provenance", isEggInfo ? "egg-info" : "dist-info");
|
||||
|
||||
var usedByEntrypoint = false;
|
||||
|
||||
return new PythonDistribution(
|
||||
name,
|
||||
version,
|
||||
purl,
|
||||
metadataEntries,
|
||||
evidenceEntries,
|
||||
usedByEntrypoint);
|
||||
}
|
||||
|
||||
private static void AddOptionalFileEvidence(
|
||||
LanguageAnalyzerContext context,
|
||||
PythonVirtualFileSystem vfs,
|
||||
ICollection<LanguageComponentEvidence> evidence,
|
||||
string virtualPath,
|
||||
string source)
|
||||
{
|
||||
var file = vfs.GetFile(virtualPath);
|
||||
if (file is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (file.IsFromArchive && file.ArchivePath is not null)
|
||||
{
|
||||
evidence.Add(new LanguageComponentEvidence(
|
||||
LanguageEvidenceKind.File,
|
||||
source,
|
||||
PythonPathHelper.NormalizeRelative(context, file.ArchivePath),
|
||||
Value: file.AbsolutePath,
|
||||
Sha256: null));
|
||||
return;
|
||||
}
|
||||
|
||||
evidence.Add(new LanguageComponentEvidence(
|
||||
LanguageEvidenceKind.File,
|
||||
source,
|
||||
PythonPathHelper.NormalizeRelative(context, file.AbsolutePath),
|
||||
Value: null,
|
||||
Sha256: null));
|
||||
}
|
||||
|
||||
private static void AppendClassifiers(
|
||||
ICollection<KeyValuePair<string, string?>> metadata,
|
||||
PythonMetadataDocumentVfs metadataDocument)
|
||||
{
|
||||
var classifiers = metadataDocument.GetAll("Classifier");
|
||||
if (classifiers.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var orderedClassifiers = classifiers
|
||||
.Select(static classifier => classifier.Trim())
|
||||
.Where(static classifier => classifier.Length > 0)
|
||||
.OrderBy(static classifier => classifier, StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
|
||||
if (orderedClassifiers.Length == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
AppendMetadata(metadata, "classifiers", string.Join(';', orderedClassifiers));
|
||||
|
||||
var licenseClassifierIndex = 0;
|
||||
for (var index = 0; index < orderedClassifiers.Length; index++)
|
||||
{
|
||||
var classifier = orderedClassifiers[index];
|
||||
AppendMetadata(metadata, $"classifier[{index}]", classifier);
|
||||
|
||||
if (classifier.StartsWith("License ::", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
AppendMetadata(metadata, $"license.classifier[{licenseClassifierIndex}]", classifier);
|
||||
licenseClassifierIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task AppendEntryPointsAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
ICollection<KeyValuePair<string, string?>> metadata,
|
||||
string entryPointsVirtualPath,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (!vfs.FileExists(entryPointsVirtualPath))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
string? content;
|
||||
try
|
||||
{
|
||||
await using var stream = await vfs.OpenReadAsync(entryPointsVirtualPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream, PythonEncoding.Utf8, detectEncodingFromByteOrderMarks: true);
|
||||
content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var groups = new Dictionary<string, List<(string Name, string Target)>>(StringComparer.OrdinalIgnoreCase);
|
||||
string? currentGroup = null;
|
||||
|
||||
foreach (var rawLine in content.Split('\n'))
|
||||
{
|
||||
var line = rawLine.Trim();
|
||||
if (line.Length == 0 || line.StartsWith('#'))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (line.StartsWith('[') && line.EndsWith(']'))
|
||||
{
|
||||
currentGroup = line[1..^1].Trim();
|
||||
if (currentGroup.Length == 0)
|
||||
{
|
||||
currentGroup = null;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (currentGroup is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var separator = line.IndexOf('=');
|
||||
if (separator <= 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var name = line[..separator].Trim();
|
||||
var target = line[(separator + 1)..].Trim();
|
||||
if (name.Length == 0 || target.Length == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!groups.TryGetValue(currentGroup, out var list))
|
||||
{
|
||||
list = new List<(string Name, string Target)>();
|
||||
groups[currentGroup] = list;
|
||||
}
|
||||
|
||||
list.Add((name, target));
|
||||
}
|
||||
|
||||
foreach (var group in groups.OrderBy(static g => g.Key, StringComparer.OrdinalIgnoreCase))
|
||||
{
|
||||
AppendMetadata(metadata, $"entryPoints.{group.Key}", string.Join(';', group.Value.Select(static ep => $"{ep.Name}={ep.Target}")));
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task AppendWheelMetadataAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
ICollection<KeyValuePair<string, string?>> metadata,
|
||||
string wheelVirtualPath,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (!vfs.FileExists(wheelVirtualPath))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var values = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
|
||||
try
|
||||
{
|
||||
await using var stream = await vfs.OpenReadAsync(wheelVirtualPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream, PythonEncoding.Utf8, detectEncodingFromByteOrderMarks: true);
|
||||
while (await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false) is { } line)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
if (string.IsNullOrWhiteSpace(line))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var separator = line.IndexOf(':');
|
||||
if (separator <= 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var key = line[..separator].Trim();
|
||||
var value = line[(separator + 1)..].Trim();
|
||||
if (key.Length == 0 || value.Length == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
values[key] = value;
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (values.TryGetValue("Wheel-Version", out var wheelVersion))
|
||||
{
|
||||
AppendMetadata(metadata, "wheel.version", wheelVersion);
|
||||
}
|
||||
|
||||
if (values.TryGetValue("Tag", out var tags))
|
||||
{
|
||||
AppendMetadata(metadata, "wheel.tags", tags);
|
||||
}
|
||||
|
||||
if (values.TryGetValue("Root-Is-Purelib", out var purelib))
|
||||
{
|
||||
AppendMetadata(metadata, "wheel.rootIsPurelib", purelib);
|
||||
}
|
||||
|
||||
if (values.TryGetValue("Generator", out var generator))
|
||||
{
|
||||
AppendMetadata(metadata, "wheel.generator", generator);
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<string?> ReadSingleLineAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string virtualPath,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (!vfs.FileExists(virtualPath))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await using var stream = await vfs.OpenReadAsync(virtualPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream, PythonEncoding.Utf8, detectEncodingFromByteOrderMarks: true);
|
||||
return await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task AppendDirectUrlAsync(
|
||||
LanguageAnalyzerContext context,
|
||||
PythonVirtualFileSystem vfs,
|
||||
ICollection<KeyValuePair<string, string?>> metadata,
|
||||
ICollection<LanguageComponentEvidence> evidence,
|
||||
string directUrlVirtualPath,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var file = vfs.GetFile(directUrlVirtualPath);
|
||||
if (file is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await using var stream = await vfs.OpenReadAsync(directUrlVirtualPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
using var document = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken).ConfigureAwait(false);
|
||||
var root = document.RootElement;
|
||||
|
||||
var url = root.TryGetProperty("url", out var urlElement) ? urlElement.GetString() : null;
|
||||
var isEditable = root.TryGetProperty("dir_info", out var dirInfo) &&
|
||||
dirInfo.TryGetProperty("editable", out var editableValue) &&
|
||||
editableValue.GetBoolean();
|
||||
var subdir = root.TryGetProperty("dir_info", out dirInfo) &&
|
||||
dirInfo.TryGetProperty("subdirectory", out var subdirElement)
|
||||
? subdirElement.GetString()
|
||||
: null;
|
||||
|
||||
string? vcs = null;
|
||||
string? commit = null;
|
||||
|
||||
if (root.TryGetProperty("vcs_info", out var vcsInfo))
|
||||
{
|
||||
vcs = vcsInfo.TryGetProperty("vcs", out var vcsElement) ? vcsElement.GetString() : null;
|
||||
commit = vcsInfo.TryGetProperty("commit_id", out var commitElement) ? commitElement.GetString() : null;
|
||||
}
|
||||
|
||||
if (isEditable)
|
||||
{
|
||||
AppendMetadata(metadata, "editable", "true");
|
||||
}
|
||||
|
||||
AppendMetadata(metadata, "sourceUrl", url);
|
||||
AppendMetadata(metadata, "sourceSubdirectory", subdir);
|
||||
AppendMetadata(metadata, "sourceVcs", vcs);
|
||||
AppendMetadata(metadata, "sourceCommit", commit);
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(url))
|
||||
{
|
||||
var locator = file.IsFromArchive && file.ArchivePath is not null
|
||||
? PythonPathHelper.NormalizeRelative(context, file.ArchivePath)
|
||||
: PythonPathHelper.NormalizeRelative(context, file.AbsolutePath);
|
||||
|
||||
evidence.Add(new LanguageComponentEvidence(
|
||||
LanguageEvidenceKind.Metadata,
|
||||
"direct_url.json",
|
||||
locator,
|
||||
url,
|
||||
Sha256: null));
|
||||
}
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
// Ignore invalid JSON
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore read errors
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<IReadOnlyList<PythonRecordEntry>> ReadRecordAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string recordVirtualPath,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (!vfs.FileExists(recordVirtualPath))
|
||||
{
|
||||
return Array.Empty<PythonRecordEntry>();
|
||||
}
|
||||
|
||||
var fileName = Path.GetFileName(recordVirtualPath);
|
||||
if (!string.IsNullOrWhiteSpace(fileName) &&
|
||||
fileName.EndsWith("installed-files.txt", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return await ReadInstalledFilesAsync(vfs, recordVirtualPath, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await using var stream = await vfs.OpenReadAsync(recordVirtualPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return Array.Empty<PythonRecordEntry>();
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream, PythonEncoding.Utf8, detectEncodingFromByteOrderMarks: true);
|
||||
var entries = new List<PythonRecordEntry>();
|
||||
|
||||
while (await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false) is { } line)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
if (line.Length == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var fields = ParseCsvLine(line);
|
||||
if (fields.Count < 1)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var entryPath = fields[0];
|
||||
string? algorithm = null;
|
||||
string? hashValue = null;
|
||||
|
||||
if (fields.Count > 1 && !string.IsNullOrWhiteSpace(fields[1]))
|
||||
{
|
||||
var hashField = fields[1].Trim();
|
||||
var separator = hashField.IndexOf('=');
|
||||
if (separator > 0 && separator < hashField.Length - 1)
|
||||
{
|
||||
algorithm = hashField[..separator];
|
||||
hashValue = hashField[(separator + 1)..];
|
||||
}
|
||||
}
|
||||
|
||||
long? size = null;
|
||||
if (fields.Count > 2 &&
|
||||
long.TryParse(fields[2], NumberStyles.Integer, CultureInfo.InvariantCulture, out var parsedSize))
|
||||
{
|
||||
size = parsedSize;
|
||||
}
|
||||
|
||||
entries.Add(new PythonRecordEntry(entryPath, algorithm, hashValue, size));
|
||||
}
|
||||
|
||||
return entries;
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return Array.Empty<PythonRecordEntry>();
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<IReadOnlyList<PythonRecordEntry>> ReadInstalledFilesAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string recordVirtualPath,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
await using var stream = await vfs.OpenReadAsync(recordVirtualPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return Array.Empty<PythonRecordEntry>();
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream, PythonEncoding.Utf8, detectEncodingFromByteOrderMarks: true);
|
||||
var entries = new List<PythonRecordEntry>();
|
||||
|
||||
while (await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false) is { } line)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var trimmed = line.Trim();
|
||||
if (trimmed.Length == 0 || trimmed == ".")
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
entries.Add(new PythonRecordEntry(trimmed, null, null, null));
|
||||
}
|
||||
|
||||
return entries;
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return Array.Empty<PythonRecordEntry>();
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<PythonRecordVerificationResult> VerifyRecordAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string distInfoVirtualPath,
|
||||
IReadOnlyList<PythonRecordEntry> entries,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (entries.Count == 0)
|
||||
{
|
||||
return new PythonRecordVerificationResult(0, 0, 0, 0, 0, usedByEntrypoint: false, Array.Empty<string>(), Array.Empty<LanguageComponentEvidence>());
|
||||
}
|
||||
|
||||
var evidence = new List<LanguageComponentEvidence>();
|
||||
var unsupported = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
var root = GetParentDirectory(distInfoVirtualPath);
|
||||
|
||||
var total = 0;
|
||||
var hashed = 0;
|
||||
var missing = 0;
|
||||
var mismatched = 0;
|
||||
var ioErrors = 0;
|
||||
|
||||
foreach (var entry in entries)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
total++;
|
||||
|
||||
var normalizedEntryPath = NormalizeRecordPath(entry.Path);
|
||||
if (normalizedEntryPath is null)
|
||||
{
|
||||
missing++;
|
||||
evidence.Add(new LanguageComponentEvidence(
|
||||
LanguageEvidenceKind.Derived,
|
||||
"RECORD",
|
||||
NormalizeVfsPath(entry.Path),
|
||||
"outside-root",
|
||||
Sha256: null));
|
||||
continue;
|
||||
}
|
||||
|
||||
var virtualPath = $"{root}/{normalizedEntryPath}";
|
||||
|
||||
if (!vfs.FileExists(virtualPath))
|
||||
{
|
||||
missing++;
|
||||
evidence.Add(new LanguageComponentEvidence(
|
||||
LanguageEvidenceKind.Derived,
|
||||
"RECORD",
|
||||
NormalizeVfsPath(virtualPath),
|
||||
"missing",
|
||||
Sha256: null));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(entry.HashAlgorithm) || string.IsNullOrWhiteSpace(entry.HashValue))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
hashed++;
|
||||
|
||||
if (!string.Equals(entry.HashAlgorithm, "sha256", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
unsupported.Add(entry.HashAlgorithm);
|
||||
continue;
|
||||
}
|
||||
|
||||
string? actualHash;
|
||||
try
|
||||
{
|
||||
actualHash = await ComputeSha256Base64Async(vfs, virtualPath, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
ioErrors++;
|
||||
evidence.Add(new LanguageComponentEvidence(
|
||||
LanguageEvidenceKind.Derived,
|
||||
"RECORD",
|
||||
NormalizeVfsPath(virtualPath),
|
||||
"io-error",
|
||||
Sha256: null));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!string.Equals(actualHash, entry.HashValue, StringComparison.Ordinal))
|
||||
{
|
||||
mismatched++;
|
||||
evidence.Add(new LanguageComponentEvidence(
|
||||
LanguageEvidenceKind.Derived,
|
||||
"RECORD",
|
||||
NormalizeVfsPath(virtualPath),
|
||||
$"sha256 mismatch expected={entry.HashValue} actual={actualHash}",
|
||||
Sha256: actualHash));
|
||||
}
|
||||
}
|
||||
|
||||
return new PythonRecordVerificationResult(
|
||||
total,
|
||||
hashed,
|
||||
missing,
|
||||
mismatched,
|
||||
ioErrors,
|
||||
usedByEntrypoint: false,
|
||||
unsupported.ToArray(),
|
||||
evidence);
|
||||
}
|
||||
|
||||
private static async Task<string> ComputeSha256Base64Async(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string virtualPath,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using var stream = await vfs.OpenReadAsync(virtualPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
throw new IOException("Unable to open file for hashing.");
|
||||
}
|
||||
|
||||
using var sha = SHA256.Create();
|
||||
var buffer = ArrayPool<byte>.Shared.Rent(81920);
|
||||
try
|
||||
{
|
||||
int bytesRead;
|
||||
while ((bytesRead = await stream.ReadAsync(buffer.AsMemory(0, buffer.Length), cancellationToken).ConfigureAwait(false)) > 0)
|
||||
{
|
||||
sha.TransformBlock(buffer, 0, bytesRead, null, 0);
|
||||
}
|
||||
|
||||
sha.TransformFinalBlock(Array.Empty<byte>(), 0, 0);
|
||||
return Convert.ToBase64String(sha.Hash ?? Array.Empty<byte>());
|
||||
}
|
||||
finally
|
||||
{
|
||||
ArrayPool<byte>.Shared.Return(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
private static List<string> ParseCsvLine(string line)
|
||||
{
|
||||
var values = new List<string>();
|
||||
var builder = new StringBuilder();
|
||||
var inQuotes = false;
|
||||
|
||||
for (var i = 0; i < line.Length; i++)
|
||||
{
|
||||
var ch = line[i];
|
||||
|
||||
if (inQuotes)
|
||||
{
|
||||
if (ch == '"')
|
||||
{
|
||||
var next = i + 1 < line.Length ? line[i + 1] : '\0';
|
||||
if (next == '"')
|
||||
{
|
||||
builder.Append('"');
|
||||
i++;
|
||||
}
|
||||
else
|
||||
{
|
||||
inQuotes = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
builder.Append(ch);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch == ',')
|
||||
{
|
||||
values.Add(builder.ToString());
|
||||
builder.Clear();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch == '"')
|
||||
{
|
||||
inQuotes = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
builder.Append(ch);
|
||||
}
|
||||
|
||||
values.Add(builder.ToString());
|
||||
return values;
|
||||
}
|
||||
|
||||
private static string? NormalizeRecordPath(string path)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(path))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var normalized = path.Replace('\\', '/').TrimStart('/');
|
||||
|
||||
if (normalized.Contains("/../", StringComparison.Ordinal) ||
|
||||
normalized.StartsWith("../", StringComparison.Ordinal) ||
|
||||
normalized.EndsWith("/..", StringComparison.Ordinal) ||
|
||||
normalized == "..")
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return normalized.Length == 0 ? null : normalized;
|
||||
}
|
||||
|
||||
private static string NormalizeVfsPath(string path)
|
||||
=> path.Replace('\\', '/').Trim('/');
|
||||
|
||||
private static string GetParentDirectory(string path)
|
||||
{
|
||||
var normalized = NormalizeVfsPath(path);
|
||||
var lastSlash = normalized.LastIndexOf('/');
|
||||
return lastSlash <= 0 ? string.Empty : normalized[..lastSlash];
|
||||
}
|
||||
|
||||
private static void AppendMetadata(ICollection<KeyValuePair<string, string?>> metadata, string key, string? value)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(key))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(value))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
metadata.Add(new KeyValuePair<string, string?>(key, value.Trim()));
|
||||
}
|
||||
|
||||
private sealed class PythonMetadataDocumentVfs
|
||||
{
|
||||
private readonly Dictionary<string, List<string>> _values;
|
||||
|
||||
private PythonMetadataDocumentVfs(Dictionary<string, List<string>> values)
|
||||
{
|
||||
_values = values;
|
||||
}
|
||||
|
||||
public static async Task<PythonMetadataDocumentVfs> LoadAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string virtualPath,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (!vfs.FileExists(virtualPath))
|
||||
{
|
||||
return new PythonMetadataDocumentVfs(new Dictionary<string, List<string>>(StringComparer.OrdinalIgnoreCase));
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await using var stream = await vfs.OpenReadAsync(virtualPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return new PythonMetadataDocumentVfs(new Dictionary<string, List<string>>(StringComparer.OrdinalIgnoreCase));
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream, PythonEncoding.Utf8, detectEncodingFromByteOrderMarks: true);
|
||||
var values = new Dictionary<string, List<string>>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
string? currentKey = null;
|
||||
var builder = new StringBuilder();
|
||||
|
||||
while (await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false) is { } line)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
if (line.Length == 0)
|
||||
{
|
||||
Commit();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (line.StartsWith(' ') || line.StartsWith('\t'))
|
||||
{
|
||||
if (currentKey is not null)
|
||||
{
|
||||
if (builder.Length > 0)
|
||||
{
|
||||
builder.Append(' ');
|
||||
}
|
||||
|
||||
builder.Append(line.Trim());
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
Commit();
|
||||
|
||||
var separator = line.IndexOf(':');
|
||||
if (separator <= 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
currentKey = line[..separator].Trim();
|
||||
builder.Clear();
|
||||
builder.Append(line[(separator + 1)..].Trim());
|
||||
}
|
||||
|
||||
Commit();
|
||||
return new PythonMetadataDocumentVfs(values);
|
||||
|
||||
void Commit()
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(currentKey))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (!values.TryGetValue(currentKey, out var list))
|
||||
{
|
||||
list = new List<string>();
|
||||
values[currentKey] = list;
|
||||
}
|
||||
|
||||
var value = builder.ToString().Trim();
|
||||
if (value.Length > 0)
|
||||
{
|
||||
list.Add(value);
|
||||
}
|
||||
|
||||
currentKey = null;
|
||||
builder.Clear();
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return new PythonMetadataDocumentVfs(new Dictionary<string, List<string>>(StringComparer.OrdinalIgnoreCase));
|
||||
}
|
||||
}
|
||||
|
||||
public string? GetFirst(string key)
|
||||
{
|
||||
if (key is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return _values.TryGetValue(key, out var list) && list.Count > 0
|
||||
? list[0]
|
||||
: null;
|
||||
}
|
||||
|
||||
public IReadOnlyList<string> GetAll(string key)
|
||||
{
|
||||
if (key is null)
|
||||
{
|
||||
return Array.Empty<string>();
|
||||
}
|
||||
|
||||
return _values.TryGetValue(key, out var list)
|
||||
? list.AsReadOnly()
|
||||
: Array.Empty<string>();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
using System.Text.Json;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Linq;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
@@ -81,9 +82,11 @@ internal sealed partial class PythonInputNormalizer
|
||||
await DetectLayoutAsync(cancellationToken).ConfigureAwait(false);
|
||||
await DetectVersionTargetsAsync(cancellationToken).ConfigureAwait(false);
|
||||
DetectSitePackages();
|
||||
DetectLayerSitePackages();
|
||||
DetectWheels();
|
||||
DetectZipapps();
|
||||
await DetectEditablesAsync(cancellationToken).ConfigureAwait(false);
|
||||
NormalizeDetectedInputs();
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -94,6 +97,11 @@ internal sealed partial class PythonInputNormalizer
|
||||
{
|
||||
var builder = PythonVirtualFileSystem.CreateBuilder();
|
||||
|
||||
AddProjectFiles(builder);
|
||||
|
||||
var condaMeta = Path.Combine(_rootPath, "conda-meta");
|
||||
builder.AddDirectory(condaMeta, "conda-meta", PythonFileSource.ProjectConfig, includeHiddenFiles: false);
|
||||
|
||||
// Add site-packages in order (later takes precedence)
|
||||
foreach (var sitePackagesPath in _sitePackagesPaths)
|
||||
{
|
||||
@@ -582,7 +590,8 @@ internal sealed partial class PythonInputNormalizer
|
||||
{
|
||||
try
|
||||
{
|
||||
foreach (var pythonDir in Directory.EnumerateDirectories(libDir, "python*", SafeEnumeration))
|
||||
foreach (var pythonDir in Directory.EnumerateDirectories(libDir, "python*", SafeEnumeration)
|
||||
.OrderBy(static p => p, StringComparer.OrdinalIgnoreCase))
|
||||
{
|
||||
searchPaths.Add(Path.Combine(pythonDir, "site-packages"));
|
||||
}
|
||||
@@ -611,6 +620,25 @@ internal sealed partial class PythonInputNormalizer
|
||||
searchPaths.Add(Path.Combine(_rootPath, "usr", "local", "lib", "python3.12", "site-packages"));
|
||||
searchPaths.Add(Path.Combine(_rootPath, "usr", "lib", "python3", "dist-packages"));
|
||||
|
||||
// System-style lib/pythonX.Y/site-packages under the workspace root
|
||||
var rootLibDir = Path.Combine(_rootPath, "lib");
|
||||
if (Directory.Exists(rootLibDir))
|
||||
{
|
||||
try
|
||||
{
|
||||
foreach (var pythonDir in Directory.EnumerateDirectories(rootLibDir, "python*", SafeEnumeration)
|
||||
.OrderBy(static p => p, StringComparer.OrdinalIgnoreCase))
|
||||
{
|
||||
searchPaths.Add(Path.Combine(pythonDir, "site-packages"));
|
||||
searchPaths.Add(Path.Combine(pythonDir, "dist-packages"));
|
||||
}
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
|
||||
// Root site-packages (common for some Docker images)
|
||||
searchPaths.Add(Path.Combine(_rootPath, "site-packages"));
|
||||
|
||||
@@ -623,6 +651,17 @@ internal sealed partial class PythonInputNormalizer
|
||||
}
|
||||
}
|
||||
|
||||
private void DetectLayerSitePackages()
|
||||
{
|
||||
foreach (var sitePackagesPath in PythonContainerAdapter.DiscoverLayerSitePackages(_rootPath))
|
||||
{
|
||||
if (!_sitePackagesPaths.Contains(sitePackagesPath, StringComparer.OrdinalIgnoreCase))
|
||||
{
|
||||
_sitePackagesPaths.Add(sitePackagesPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void DetectWheels()
|
||||
{
|
||||
// Look for wheels in common locations
|
||||
@@ -643,7 +682,8 @@ internal sealed partial class PythonInputNormalizer
|
||||
|
||||
try
|
||||
{
|
||||
foreach (var wheel in Directory.EnumerateFiles(searchPath, "*.whl", SafeEnumeration))
|
||||
foreach (var wheel in Directory.EnumerateFiles(searchPath, "*.whl", SafeEnumeration)
|
||||
.OrderBy(static p => p, StringComparer.OrdinalIgnoreCase))
|
||||
{
|
||||
if (!_wheelPaths.Contains(wheel, StringComparer.OrdinalIgnoreCase))
|
||||
{
|
||||
@@ -700,37 +740,24 @@ internal sealed partial class PythonInputNormalizer
|
||||
|
||||
private void DetectZipapps()
|
||||
{
|
||||
if (!Directory.Exists(_rootPath))
|
||||
foreach (var zipappPath in PythonZipappAdapter.DiscoverZipapps(_rootPath))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
foreach (var pyz in Directory.EnumerateFiles(_rootPath, "*.pyz", SafeEnumeration))
|
||||
if (!_zipappPaths.Contains(zipappPath, StringComparer.OrdinalIgnoreCase))
|
||||
{
|
||||
_zipappPaths.Add(pyz);
|
||||
_zipappPaths.Add(zipappPath);
|
||||
}
|
||||
|
||||
foreach (var pyzw in Directory.EnumerateFiles(_rootPath, "*.pyzw", SafeEnumeration))
|
||||
{
|
||||
_zipappPaths.Add(pyzw);
|
||||
}
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
|
||||
private async Task DetectEditablesAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
// Look for .egg-link files in site-packages
|
||||
foreach (var sitePackagesPath in _sitePackagesPaths)
|
||||
foreach (var sitePackagesPath in _sitePackagesPaths.OrderBy(static p => p, StringComparer.OrdinalIgnoreCase))
|
||||
{
|
||||
try
|
||||
{
|
||||
foreach (var eggLink in Directory.EnumerateFiles(sitePackagesPath, "*.egg-link", SafeEnumeration))
|
||||
foreach (var eggLink in Directory.EnumerateFiles(sitePackagesPath, "*.egg-link", SafeEnumeration)
|
||||
.OrderBy(static p => p, StringComparer.OrdinalIgnoreCase))
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(eggLink, cancellationToken).ConfigureAwait(false);
|
||||
var lines = content.Split('\n', StringSplitOptions.RemoveEmptyEntries);
|
||||
@@ -761,11 +788,12 @@ internal sealed partial class PythonInputNormalizer
|
||||
}
|
||||
|
||||
// Look for direct_url.json with editable flag in dist-info directories
|
||||
foreach (var sitePackagesPath in _sitePackagesPaths)
|
||||
foreach (var sitePackagesPath in _sitePackagesPaths.OrderBy(static p => p, StringComparer.OrdinalIgnoreCase))
|
||||
{
|
||||
try
|
||||
{
|
||||
foreach (var distInfo in Directory.EnumerateDirectories(sitePackagesPath, "*.dist-info", SafeEnumeration))
|
||||
foreach (var distInfo in Directory.EnumerateDirectories(sitePackagesPath, "*.dist-info", SafeEnumeration)
|
||||
.OrderBy(static p => p, StringComparer.OrdinalIgnoreCase))
|
||||
{
|
||||
var directUrlPath = Path.Combine(distInfo, "direct_url.json");
|
||||
if (!File.Exists(directUrlPath))
|
||||
@@ -815,6 +843,84 @@ internal sealed partial class PythonInputNormalizer
|
||||
}
|
||||
}
|
||||
|
||||
private void NormalizeDetectedInputs()
|
||||
{
|
||||
NormalizePathList(_sitePackagesPaths);
|
||||
NormalizePathList(_wheelPaths);
|
||||
NormalizePathList(_zipappPaths);
|
||||
NormalizeEditableList(_editablePaths);
|
||||
}
|
||||
|
||||
private static void NormalizePathList(List<string> paths)
|
||||
{
|
||||
var normalized = paths
|
||||
.Where(static p => !string.IsNullOrWhiteSpace(p))
|
||||
.Select(static p => Path.GetFullPath(p.Trim()))
|
||||
.Distinct(StringComparer.OrdinalIgnoreCase)
|
||||
.OrderBy(static p => p, StringComparer.Ordinal)
|
||||
.ToList();
|
||||
|
||||
paths.Clear();
|
||||
paths.AddRange(normalized);
|
||||
}
|
||||
|
||||
private static void NormalizeEditableList(List<(string Path, string? PackageName)> editables)
|
||||
{
|
||||
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
var normalized = new List<(string Path, string? PackageName)>();
|
||||
|
||||
foreach (var (path, packageName) in editables)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(path))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var fullPath = Path.GetFullPath(path.Trim());
|
||||
var name = string.IsNullOrWhiteSpace(packageName) ? null : packageName.Trim();
|
||||
var key = $"{name ?? string.Empty}|{fullPath}";
|
||||
|
||||
if (!seen.Add(key))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
normalized.Add((fullPath, name));
|
||||
}
|
||||
|
||||
editables.Clear();
|
||||
editables.AddRange(normalized
|
||||
.OrderBy(static e => e.PackageName ?? string.Empty, StringComparer.OrdinalIgnoreCase)
|
||||
.ThenBy(static e => e.Path, StringComparer.Ordinal));
|
||||
}
|
||||
|
||||
private void AddProjectFiles(PythonVirtualFileSystem.Builder builder)
|
||||
{
|
||||
AddProjectFile(builder, "pyproject.toml", PythonFileSource.ProjectConfig);
|
||||
AddProjectFile(builder, "setup.py", PythonFileSource.ProjectConfig);
|
||||
AddProjectFile(builder, "setup.cfg", PythonFileSource.ProjectConfig);
|
||||
AddProjectFile(builder, "runtime.txt", PythonFileSource.ProjectConfig);
|
||||
AddProjectFile(builder, "Dockerfile", PythonFileSource.ProjectConfig);
|
||||
AddProjectFile(builder, "tox.ini", PythonFileSource.ProjectConfig);
|
||||
|
||||
AddProjectFile(builder, "requirements.txt", PythonFileSource.LockFile);
|
||||
AddProjectFile(builder, "requirements-dev.txt", PythonFileSource.LockFile);
|
||||
AddProjectFile(builder, "requirements.prod.txt", PythonFileSource.LockFile);
|
||||
AddProjectFile(builder, "Pipfile.lock", PythonFileSource.LockFile);
|
||||
AddProjectFile(builder, "poetry.lock", PythonFileSource.LockFile);
|
||||
}
|
||||
|
||||
private void AddProjectFile(PythonVirtualFileSystem.Builder builder, string relativePath, PythonFileSource source)
|
||||
{
|
||||
var absolutePath = Path.Combine(_rootPath, relativePath);
|
||||
if (!File.Exists(absolutePath))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
builder.AddFile(relativePath, absolutePath, source);
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"requires-python\s*=\s*[""']?(?<version>[^""'\n]+)", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex RequiresPythonPattern();
|
||||
|
||||
|
||||
@@ -54,9 +54,9 @@ internal sealed partial class PythonVirtualFileSystem
|
||||
public int FileCount => _files.Count;
|
||||
|
||||
/// <summary>
|
||||
/// Gets all files in the virtual filesystem.
|
||||
/// Gets all files in the virtual filesystem, ordered deterministically by virtual path.
|
||||
/// </summary>
|
||||
public IEnumerable<PythonVirtualFile> Files => _files.Values;
|
||||
public IEnumerable<PythonVirtualFile> Files => Paths.Select(path => _files[path]);
|
||||
|
||||
/// <summary>
|
||||
/// Gets all virtual paths in sorted order.
|
||||
@@ -230,17 +230,17 @@ internal sealed partial class PythonVirtualFileSystem
|
||||
var normalized = NormalizePath(virtualPath);
|
||||
var prefix = normalized.Length == 0 ? string.Empty : normalized + "/";
|
||||
|
||||
foreach (var kvp in _files)
|
||||
foreach (var key in _files.Keys.OrderBy(static path => path, StringComparer.Ordinal))
|
||||
{
|
||||
if (!kvp.Key.StartsWith(prefix, StringComparison.Ordinal))
|
||||
if (!key.StartsWith(prefix, StringComparison.Ordinal))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var relative = kvp.Key[prefix.Length..];
|
||||
var relative = key[prefix.Length..];
|
||||
if (regex.IsMatch(relative))
|
||||
{
|
||||
yield return kvp.Value;
|
||||
yield return _files[key];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -291,11 +291,32 @@ internal sealed partial class PythonVirtualFileSystem
|
||||
{
|
||||
private readonly Dictionary<string, PythonVirtualFile> _files = new(StringComparer.Ordinal);
|
||||
private readonly HashSet<string> _processedArchives = new(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly Dictionary<string, int> _archiveAliasCounters = new(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly HashSet<string> _sourceTreeRoots = new(StringComparer.Ordinal);
|
||||
private readonly HashSet<string> _sitePackagesPaths = new(StringComparer.Ordinal);
|
||||
private readonly HashSet<string> _editablePaths = new(StringComparer.Ordinal);
|
||||
private readonly HashSet<string> _zipArchivePaths = new(StringComparer.Ordinal);
|
||||
|
||||
/// <summary>
|
||||
/// Adds files from an arbitrary directory at a specific virtual prefix.
|
||||
/// </summary>
|
||||
public Builder AddDirectory(
|
||||
string directoryPath,
|
||||
string virtualPrefix,
|
||||
PythonFileSource source,
|
||||
string? layerDigest = null,
|
||||
bool includeHiddenFiles = false)
|
||||
{
|
||||
if (!Directory.Exists(directoryPath))
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
var basePath = Path.GetFullPath(directoryPath);
|
||||
AddDirectoryRecursive(basePath, NormalizePath(virtualPrefix), source, layerDigest, includeHiddenFiles);
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds files from a site-packages directory.
|
||||
/// </summary>
|
||||
@@ -308,7 +329,7 @@ internal sealed partial class PythonVirtualFileSystem
|
||||
|
||||
var basePath = Path.GetFullPath(sitePackagesPath);
|
||||
_sitePackagesPaths.Add(string.Empty); // Root of the VFS
|
||||
AddDirectoryRecursive(basePath, string.Empty, PythonFileSource.SitePackages, layerDigest);
|
||||
AddDirectoryRecursive(basePath, string.Empty, PythonFileSource.SitePackages, layerDigest, includeHiddenFiles: false);
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -322,12 +343,13 @@ internal sealed partial class PythonVirtualFileSystem
|
||||
return this;
|
||||
}
|
||||
|
||||
_zipArchivePaths.Add(wheelPath);
|
||||
var virtualRoot = CreateArchiveVirtualRoot("wheel", wheelPath);
|
||||
_zipArchivePaths.Add(virtualRoot);
|
||||
|
||||
try
|
||||
{
|
||||
using var archive = ZipFile.OpenRead(wheelPath);
|
||||
AddArchiveEntries(archive, wheelPath, PythonFileSource.Wheel);
|
||||
AddArchiveEntries(archive, wheelPath, virtualRoot, PythonFileSource.Wheel);
|
||||
}
|
||||
catch (InvalidDataException)
|
||||
{
|
||||
@@ -351,7 +373,8 @@ internal sealed partial class PythonVirtualFileSystem
|
||||
return this;
|
||||
}
|
||||
|
||||
_zipArchivePaths.Add(zipappPath);
|
||||
var virtualRoot = CreateArchiveVirtualRoot("zipapp", zipappPath);
|
||||
_zipArchivePaths.Add(virtualRoot);
|
||||
|
||||
try
|
||||
{
|
||||
@@ -366,7 +389,7 @@ internal sealed partial class PythonVirtualFileSystem
|
||||
|
||||
stream.Position = offset;
|
||||
using var archive = new ZipArchive(stream, ZipArchiveMode.Read);
|
||||
AddArchiveEntries(archive, zipappPath, PythonFileSource.Zipapp);
|
||||
AddArchiveEntries(archive, zipappPath, virtualRoot, PythonFileSource.Zipapp);
|
||||
}
|
||||
catch (InvalidDataException)
|
||||
{
|
||||
@@ -390,14 +413,15 @@ internal sealed partial class PythonVirtualFileSystem
|
||||
return this;
|
||||
}
|
||||
|
||||
_zipArchivePaths.Add(sdistPath);
|
||||
var virtualRoot = CreateArchiveVirtualRoot("sdist", sdistPath);
|
||||
_zipArchivePaths.Add(virtualRoot);
|
||||
|
||||
try
|
||||
{
|
||||
if (sdistPath.EndsWith(".zip", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
using var archive = ZipFile.OpenRead(sdistPath);
|
||||
AddArchiveEntries(archive, sdistPath, PythonFileSource.Sdist);
|
||||
AddArchiveEntries(archive, sdistPath, virtualRoot, PythonFileSource.Sdist);
|
||||
}
|
||||
// Note: .tar.gz support would require TarReader from System.Formats.Tar
|
||||
// For now, we handle the common .zip case
|
||||
@@ -427,7 +451,7 @@ internal sealed partial class PythonVirtualFileSystem
|
||||
var basePath = Path.GetFullPath(editablePath);
|
||||
var prefix = string.IsNullOrEmpty(packageName) ? string.Empty : packageName + "/";
|
||||
_editablePaths.Add(prefix.TrimEnd('/'));
|
||||
AddDirectoryRecursive(basePath, prefix.TrimEnd('/'), PythonFileSource.Editable, layerDigest: null);
|
||||
AddDirectoryRecursive(basePath, prefix.TrimEnd('/'), PythonFileSource.Editable, layerDigest: null, includeHiddenFiles: false);
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -443,7 +467,7 @@ internal sealed partial class PythonVirtualFileSystem
|
||||
|
||||
var basePath = Path.GetFullPath(sourcePath);
|
||||
_sourceTreeRoots.Add(string.Empty); // Root of the VFS
|
||||
AddDirectoryRecursive(basePath, string.Empty, PythonFileSource.SourceTree, layerDigest: null);
|
||||
AddDirectoryRecursive(basePath, string.Empty, PythonFileSource.SourceTree, layerDigest: null, includeHiddenFiles: false);
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -522,7 +546,8 @@ internal sealed partial class PythonVirtualFileSystem
|
||||
string basePath,
|
||||
string virtualPrefix,
|
||||
PythonFileSource source,
|
||||
string? layerDigest)
|
||||
string? layerDigest,
|
||||
bool includeHiddenFiles)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -537,7 +562,7 @@ internal sealed partial class PythonVirtualFileSystem
|
||||
// Skip __pycache__ and hidden files
|
||||
if (normalizedRelative.Contains("/__pycache__/", StringComparison.Ordinal) ||
|
||||
normalizedRelative.StartsWith("__pycache__/", StringComparison.Ordinal) ||
|
||||
Path.GetFileName(file).StartsWith('.'))
|
||||
(!includeHiddenFiles && Path.GetFileName(file).StartsWith('.')))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
@@ -566,7 +591,7 @@ internal sealed partial class PythonVirtualFileSystem
|
||||
}
|
||||
}
|
||||
|
||||
private void AddArchiveEntries(ZipArchive archive, string archivePath, PythonFileSource source)
|
||||
private void AddArchiveEntries(ZipArchive archive, string archivePath, string virtualRoot, PythonFileSource source)
|
||||
{
|
||||
foreach (var entry in archive.Entries)
|
||||
{
|
||||
@@ -576,7 +601,8 @@ internal sealed partial class PythonVirtualFileSystem
|
||||
continue;
|
||||
}
|
||||
|
||||
var virtualPath = entry.FullName.Replace('\\', '/');
|
||||
var entryPath = entry.FullName.Replace('\\', '/').TrimStart('/');
|
||||
var virtualPath = $"{virtualRoot}/{entryPath}";
|
||||
|
||||
// Skip __pycache__ in archives too
|
||||
if (virtualPath.Contains("/__pycache__/", StringComparison.Ordinal) ||
|
||||
@@ -587,7 +613,7 @@ internal sealed partial class PythonVirtualFileSystem
|
||||
|
||||
AddFile(
|
||||
virtualPath,
|
||||
entry.FullName,
|
||||
entryPath,
|
||||
source,
|
||||
layerDigest: null,
|
||||
archivePath: archivePath,
|
||||
@@ -595,6 +621,22 @@ internal sealed partial class PythonVirtualFileSystem
|
||||
}
|
||||
}
|
||||
|
||||
private string CreateArchiveVirtualRoot(string kind, string archivePath)
|
||||
{
|
||||
var baseName = Path.GetFileName(archivePath);
|
||||
var key = $"{kind}/{baseName}";
|
||||
|
||||
if (!_archiveAliasCounters.TryGetValue(key, out var count))
|
||||
{
|
||||
_archiveAliasCounters[key] = 1;
|
||||
return $"archives/{kind}/{baseName}";
|
||||
}
|
||||
|
||||
count++;
|
||||
_archiveAliasCounters[key] = count;
|
||||
return $"archives/{kind}/{baseName}~{count}";
|
||||
}
|
||||
|
||||
private static long FindZipOffset(Stream stream)
|
||||
{
|
||||
// ZIP files start with PK\x03\x04 signature
|
||||
|
||||
@@ -1,18 +1,13 @@
|
||||
using System.Linq;
|
||||
using System.Text.Json;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python;
|
||||
|
||||
public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
{
|
||||
private static readonly EnumerationOptions Enumeration = new()
|
||||
{
|
||||
RecurseSubdirectories = true,
|
||||
IgnoreInaccessible = true,
|
||||
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint
|
||||
};
|
||||
|
||||
public string Id => "python";
|
||||
|
||||
public string DisplayName => "Python Analyzer";
|
||||
@@ -43,73 +38,33 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
// Analyze zipapps in workspace and container layers
|
||||
var zipappAnalysis = PythonZipappAdapter.AnalyzeAll(context.RootPath);
|
||||
|
||||
// Collect dist-info directories from both root and container layers
|
||||
var distInfoDirectories = CollectDistInfoDirectories(context.RootPath);
|
||||
var projectAnalysis = await PythonProjectAnalysis.AnalyzeAsync(context.RootPath, cancellationToken).ConfigureAwait(false);
|
||||
var vfs = projectAnalysis.VirtualFileSystem;
|
||||
|
||||
foreach (var distInfoPath in distInfoDirectories)
|
||||
var packageDiscovery = new PythonPackageDiscovery();
|
||||
var discoveryResult = await packageDiscovery.DiscoverAsync(vfs, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
foreach (var package in discoveryResult.Packages
|
||||
.Where(static p => !string.IsNullOrWhiteSpace(p.Version))
|
||||
.OrderBy(static p => p.NormalizedName, StringComparer.Ordinal)
|
||||
.ThenBy(static p => p.Version, StringComparer.Ordinal))
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
PythonDistribution? distribution;
|
||||
try
|
||||
{
|
||||
distribution = await PythonDistributionLoader.LoadAsync(context, distInfoPath, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (distribution is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var metadata = distribution.SortedMetadata.ToList();
|
||||
|
||||
if (lockData.TryGet(distribution.Name, distribution.Version, out var lockEntry))
|
||||
{
|
||||
matchedLocks.Add(lockEntry!.DeclarationKey);
|
||||
AppendLockMetadata(metadata, lockEntry);
|
||||
}
|
||||
else if (hasLockEntries)
|
||||
{
|
||||
metadata.Add(new KeyValuePair<string, string?>("lockMissing", "true"));
|
||||
}
|
||||
|
||||
// Append runtime information
|
||||
AppendRuntimeMetadata(metadata, runtimeInfo);
|
||||
|
||||
// Append environment variables (PYTHONPATH/PYTHONHOME)
|
||||
AppendEnvironmentMetadata(metadata, environment);
|
||||
|
||||
// Append startup hooks warnings
|
||||
AppendStartupHooksMetadata(metadata, startupHooks);
|
||||
|
||||
// Append zipapp analysis
|
||||
AppendZipappMetadata(metadata, zipappAnalysis);
|
||||
|
||||
// Collect evidence including startup hooks
|
||||
var evidence = distribution.SortedEvidence.ToList();
|
||||
evidence.AddRange(startupHooks.ToEvidence(context));
|
||||
|
||||
writer.AddFromPurl(
|
||||
analyzerId: "python",
|
||||
purl: distribution.Purl,
|
||||
name: distribution.Name,
|
||||
version: distribution.Version,
|
||||
type: "pypi",
|
||||
metadata: metadata,
|
||||
evidence: evidence,
|
||||
usedByEntrypoint: distribution.UsedByEntrypoint);
|
||||
await EmitDiscoveredPackageAsync(
|
||||
context,
|
||||
writer,
|
||||
vfs,
|
||||
package,
|
||||
lockData,
|
||||
matchedLocks,
|
||||
hasLockEntries,
|
||||
runtimeInfo,
|
||||
environment,
|
||||
startupHooks,
|
||||
zipappAnalysis,
|
||||
cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
|
||||
if (lockData.Entries.Count > 0)
|
||||
@@ -121,18 +76,18 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
continue;
|
||||
}
|
||||
|
||||
var normalizedName = PythonPathHelper.NormalizePackageName(entry.Name);
|
||||
|
||||
var declaredMetadata = new List<KeyValuePair<string, string?>>
|
||||
{
|
||||
new("declaredOnly", "true"),
|
||||
new("pkg.kind", "DeclaredOnly"),
|
||||
new("pkg.confidence", PythonPackageConfidence.Medium.ToString()),
|
||||
new("pkg.location", entry.Locator),
|
||||
new("lockSource", entry.Source),
|
||||
new("lockLocator", entry.Locator)
|
||||
};
|
||||
|
||||
AppendCommonLockFields(declaredMetadata, entry);
|
||||
|
||||
var version = string.IsNullOrWhiteSpace(entry.Version) ? "editable" : entry.Version!;
|
||||
var purl = $"pkg:pypi/{PythonPathHelper.NormalizePackageName(entry.Name)}@{version}";
|
||||
|
||||
var evidence = new[]
|
||||
{
|
||||
new LanguageComponentEvidence(
|
||||
@@ -143,6 +98,49 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
Sha256: null)
|
||||
};
|
||||
|
||||
if (string.IsNullOrWhiteSpace(entry.Version))
|
||||
{
|
||||
var editableSpec = NormalizeEditableSpec(context, entry.EditablePath, out var specRedacted);
|
||||
|
||||
declaredMetadata.Add(new KeyValuePair<string, string?>("declared.source", entry.Source));
|
||||
declaredMetadata.Add(new KeyValuePair<string, string?>("declared.locator", entry.Locator));
|
||||
declaredMetadata.Add(new KeyValuePair<string, string?>("declared.versionSpec", editableSpec));
|
||||
declaredMetadata.Add(new KeyValuePair<string, string?>("declared.scope", "unknown"));
|
||||
declaredMetadata.Add(new KeyValuePair<string, string?>("declared.sourceType", "editable"));
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(editableSpec))
|
||||
{
|
||||
declaredMetadata.Add(new KeyValuePair<string, string?>("lockEditablePath", editableSpec));
|
||||
}
|
||||
|
||||
if (specRedacted)
|
||||
{
|
||||
declaredMetadata.Add(new KeyValuePair<string, string?>("lockEditablePathRedacted", "true"));
|
||||
}
|
||||
|
||||
var componentKey = LanguageExplicitKey.Create("python", "pypi", normalizedName, editableSpec, entry.Locator);
|
||||
writer.AddFromExplicitKey(
|
||||
analyzerId: "python",
|
||||
componentKey: componentKey,
|
||||
purl: null,
|
||||
name: entry.Name,
|
||||
version: null,
|
||||
type: "pypi",
|
||||
metadata: declaredMetadata,
|
||||
evidence: evidence,
|
||||
usedByEntrypoint: false);
|
||||
continue;
|
||||
}
|
||||
|
||||
AppendCommonLockFields(declaredMetadata, entry);
|
||||
|
||||
var version = entry.Version!.Trim();
|
||||
if (version.Length == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var purl = $"pkg:pypi/{normalizedName}@{version}";
|
||||
writer.AddFromPurl(
|
||||
analyzerId: "python",
|
||||
purl: purl,
|
||||
@@ -156,6 +154,284 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
}
|
||||
}
|
||||
|
||||
private static string NormalizeEditableSpec(LanguageAnalyzerContext context, string? editablePath, out bool redacted)
|
||||
{
|
||||
redacted = false;
|
||||
|
||||
if (string.IsNullOrWhiteSpace(editablePath))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var trimmed = editablePath.Trim().Trim('"', '\'');
|
||||
if (trimmed.Length == 0)
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var normalized = trimmed.Replace('\\', '/');
|
||||
var hasDrivePrefix = trimmed.Length >= 2 && char.IsLetter(trimmed[0]) && trimmed[1] == ':';
|
||||
var isAbsolute = Path.IsPathRooted(trimmed) ||
|
||||
hasDrivePrefix ||
|
||||
normalized.StartsWith("/", StringComparison.Ordinal) ||
|
||||
normalized.StartsWith("//", StringComparison.Ordinal);
|
||||
|
||||
if (!isAbsolute)
|
||||
{
|
||||
return normalized;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var relative = context.GetRelativePath(trimmed);
|
||||
if (!string.IsNullOrWhiteSpace(relative) &&
|
||||
relative != "." &&
|
||||
!relative.StartsWith("..", StringComparison.Ordinal) &&
|
||||
relative.IndexOf(':') < 0)
|
||||
{
|
||||
return relative.Replace('\\', '/');
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
}
|
||||
|
||||
redacted = true;
|
||||
normalized = normalized.TrimEnd('/');
|
||||
var lastSlash = normalized.LastIndexOf('/');
|
||||
var fileName = lastSlash >= 0 && lastSlash < normalized.Length - 1 ? normalized[(lastSlash + 1)..] : normalized;
|
||||
return string.IsNullOrWhiteSpace(fileName) ? "editable" : fileName;
|
||||
}
|
||||
|
||||
private static async Task EmitDiscoveredPackageAsync(
|
||||
LanguageAnalyzerContext context,
|
||||
LanguageComponentWriter writer,
|
||||
PythonVirtualFileSystem vfs,
|
||||
PythonPackageInfo package,
|
||||
PythonLockData lockData,
|
||||
ISet<string> matchedLocks,
|
||||
bool hasLockEntries,
|
||||
PythonRuntimeInfo? runtimeInfo,
|
||||
PythonEnvironment environment,
|
||||
PythonStartupHooks startupHooks,
|
||||
PythonZipappAnalysis zipappAnalysis,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var version = package.Version!.Trim();
|
||||
if (version.Length == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var metadata = new List<KeyValuePair<string, string?>>();
|
||||
metadata.AddRange(BuildPackageMetadata(context, vfs, package));
|
||||
|
||||
if (lockData.TryGet(package.Name, version, out var lockEntry))
|
||||
{
|
||||
matchedLocks.Add(lockEntry!.DeclarationKey);
|
||||
AppendLockMetadata(metadata, lockEntry);
|
||||
}
|
||||
else if (hasLockEntries)
|
||||
{
|
||||
metadata.Add(new KeyValuePair<string, string?>("lockMissing", "true"));
|
||||
}
|
||||
|
||||
var metadataDirectory = TryResolvePhysicalMetadataDirectory(vfs, package, out var metadataFile);
|
||||
if (metadataDirectory is not null)
|
||||
{
|
||||
PythonDistribution? distribution;
|
||||
try
|
||||
{
|
||||
distribution = await PythonDistributionLoader.LoadAsync(context, metadataDirectory, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return;
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
return;
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (distribution is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var fullMetadata = distribution.SortedMetadata.ToList();
|
||||
fullMetadata.AddRange(metadata);
|
||||
|
||||
AppendRuntimeMetadata(fullMetadata, runtimeInfo);
|
||||
AppendEnvironmentMetadata(fullMetadata, environment);
|
||||
AppendStartupHooksMetadata(fullMetadata, startupHooks);
|
||||
AppendZipappMetadata(fullMetadata, zipappAnalysis);
|
||||
|
||||
var evidence = distribution.SortedEvidence.ToList();
|
||||
evidence.AddRange(startupHooks.ToEvidence(context));
|
||||
|
||||
writer.AddFromPurl(
|
||||
analyzerId: "python",
|
||||
purl: distribution.Purl,
|
||||
name: distribution.Name,
|
||||
version: distribution.Version,
|
||||
type: "pypi",
|
||||
metadata: fullMetadata,
|
||||
evidence: evidence,
|
||||
usedByEntrypoint: distribution.UsedByEntrypoint);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (metadataFile is not null && metadataFile.IsFromArchive)
|
||||
{
|
||||
var archiveDistribution = await PythonDistributionVfsLoader
|
||||
.LoadAsync(context, vfs, package, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (archiveDistribution is not null)
|
||||
{
|
||||
var fullMetadata = archiveDistribution.SortedMetadata.ToList();
|
||||
fullMetadata.AddRange(metadata);
|
||||
|
||||
writer.AddFromPurl(
|
||||
analyzerId: "python",
|
||||
purl: archiveDistribution.Purl,
|
||||
name: archiveDistribution.Name,
|
||||
version: archiveDistribution.Version,
|
||||
type: "pypi",
|
||||
metadata: fullMetadata,
|
||||
evidence: archiveDistribution.SortedEvidence,
|
||||
usedByEntrypoint: archiveDistribution.UsedByEntrypoint);
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
var purl = $"pkg:pypi/{PythonPathHelper.NormalizePackageName(package.Name)}@{version}";
|
||||
var evidenceFallback = BuildPackageEvidence(context, vfs, package, metadataFile);
|
||||
|
||||
writer.AddFromPurl(
|
||||
analyzerId: "python",
|
||||
purl: purl,
|
||||
name: package.Name,
|
||||
version: version,
|
||||
type: "pypi",
|
||||
metadata: metadata,
|
||||
evidence: evidenceFallback,
|
||||
usedByEntrypoint: false);
|
||||
}
|
||||
|
||||
private static string? TryResolvePhysicalMetadataDirectory(
|
||||
PythonVirtualFileSystem vfs,
|
||||
PythonPackageInfo package,
|
||||
out PythonVirtualFile? metadataFile)
|
||||
{
|
||||
metadataFile = null;
|
||||
|
||||
if (string.IsNullOrWhiteSpace(package.MetadataPath))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var metadataName = package.Kind == PythonPackageKind.Egg ? "PKG-INFO" : "METADATA";
|
||||
var virtualPath = $"{package.MetadataPath}/{metadataName}";
|
||||
metadataFile = vfs.GetFile(virtualPath);
|
||||
|
||||
if (metadataFile is null || metadataFile.IsFromArchive)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return Path.GetDirectoryName(metadataFile.AbsolutePath);
|
||||
}
|
||||
|
||||
private static IEnumerable<KeyValuePair<string, string?>> BuildPackageMetadata(
|
||||
LanguageAnalyzerContext context,
|
||||
PythonVirtualFileSystem vfs,
|
||||
PythonPackageInfo package)
|
||||
{
|
||||
var location = package.Location;
|
||||
if (string.IsNullOrWhiteSpace(location) && !string.IsNullOrWhiteSpace(package.MetadataPath))
|
||||
{
|
||||
var metadataName = package.Kind == PythonPackageKind.Egg ? "PKG-INFO" : "METADATA";
|
||||
var file = vfs.GetFile($"{package.MetadataPath}/{metadataName}");
|
||||
|
||||
if (file is not null)
|
||||
{
|
||||
if (file.IsFromArchive && file.ArchivePath is not null)
|
||||
{
|
||||
location = PythonPathHelper.NormalizeRelative(context, file.ArchivePath);
|
||||
}
|
||||
else
|
||||
{
|
||||
location = Path.GetDirectoryName(file.AbsolutePath) is { Length: > 0 } metadataDirectory
|
||||
? PythonPathHelper.NormalizeRelative(context, metadataDirectory)
|
||||
: PythonPathHelper.NormalizeRelative(context, file.AbsolutePath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
yield return new KeyValuePair<string, string?>("pkg.kind", package.Kind.ToString());
|
||||
yield return new KeyValuePair<string, string?>("pkg.confidence", package.Confidence.ToString());
|
||||
yield return new KeyValuePair<string, string?>("pkg.location", string.IsNullOrWhiteSpace(location) ? "." : location.Replace('\\', '/'));
|
||||
}
|
||||
|
||||
private static IReadOnlyCollection<LanguageComponentEvidence> BuildPackageEvidence(
|
||||
LanguageAnalyzerContext context,
|
||||
PythonVirtualFileSystem vfs,
|
||||
PythonPackageInfo package,
|
||||
PythonVirtualFile? metadataFile)
|
||||
{
|
||||
if (metadataFile is not null)
|
||||
{
|
||||
var locator = metadataFile.IsFromArchive && metadataFile.ArchivePath is not null
|
||||
? PythonPathHelper.NormalizeRelative(context, metadataFile.ArchivePath)
|
||||
: PythonPathHelper.NormalizeRelative(context, metadataFile.AbsolutePath);
|
||||
|
||||
var value = metadataFile.IsFromArchive ? metadataFile.AbsolutePath : null;
|
||||
|
||||
return new[]
|
||||
{
|
||||
new LanguageComponentEvidence(
|
||||
LanguageEvidenceKind.File,
|
||||
package.Kind == PythonPackageKind.Egg ? "PKG-INFO" : "METADATA",
|
||||
locator,
|
||||
Value: value,
|
||||
Sha256: null)
|
||||
};
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(package.MetadataPath))
|
||||
{
|
||||
var metadataName = package.Kind == PythonPackageKind.Egg ? "PKG-INFO" : "METADATA";
|
||||
var file = vfs.GetFile($"{package.MetadataPath}/{metadataName}");
|
||||
if (file is not null)
|
||||
{
|
||||
var locator = file.IsFromArchive && file.ArchivePath is not null
|
||||
? PythonPathHelper.NormalizeRelative(context, file.ArchivePath)
|
||||
: PythonPathHelper.NormalizeRelative(context, file.AbsolutePath);
|
||||
|
||||
var value = file.IsFromArchive ? file.AbsolutePath : null;
|
||||
|
||||
return new[]
|
||||
{
|
||||
new LanguageComponentEvidence(
|
||||
LanguageEvidenceKind.File,
|
||||
metadataName,
|
||||
locator,
|
||||
Value: value,
|
||||
Sha256: null)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return Array.Empty<LanguageComponentEvidence>();
|
||||
}
|
||||
|
||||
private static void AppendLockMetadata(List<KeyValuePair<string, string?>> metadata, PythonLockEntry entry)
|
||||
{
|
||||
metadata.Add(new KeyValuePair<string, string?>("lockSource", entry.Source));
|
||||
@@ -286,41 +562,4 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static IReadOnlyCollection<string> CollectDistInfoDirectories(string rootPath)
|
||||
{
|
||||
var directories = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
AddMetadataDirectories(rootPath, "*.dist-info", directories);
|
||||
AddMetadataDirectories(rootPath, "*.egg-info", directories);
|
||||
|
||||
// Also collect from OCI container layers
|
||||
foreach (var dir in PythonContainerAdapter.DiscoverDistInfoDirectories(rootPath))
|
||||
{
|
||||
directories.Add(dir);
|
||||
}
|
||||
|
||||
return directories
|
||||
.OrderBy(static path => path, StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
|
||||
static void AddMetadataDirectories(string basePath, string pattern, ISet<string> accumulator)
|
||||
{
|
||||
try
|
||||
{
|
||||
foreach (var dir in Directory.EnumerateDirectories(basePath, pattern, Enumeration))
|
||||
{
|
||||
accumulator.Add(dir);
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore enumeration errors
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
// Ignore access errors
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
# Python Analyzer Tasks
|
||||
|
||||
## Python Detection Gaps (Sprint 0405)
|
||||
|
||||
| Task ID | Status | Notes | Updated (UTC) |
|
||||
| --- | --- | --- | --- |
|
||||
| SCAN-PY-405-001 | DONE | Wire layout-aware VFS/discovery into `PythonLanguageAnalyzer`. | 2025-12-13 |
|
||||
| SCAN-PY-405-002 | BLOCKED | Preserve dist-info/egg-info evidence; emit explicit-key components where needed (incl. editable lock entries; no `@editable` PURLs). | 2025-12-13 |
|
||||
| SCAN-PY-405-003 | BLOCKED | Blocked on Action 2: lock/requirements precedence + supported formats scope. | 2025-12-13 |
|
||||
| SCAN-PY-405-004 | BLOCKED | Blocked on Action 3: container overlay contract (whiteouts + ordering semantics). | 2025-12-13 |
|
||||
| SCAN-PY-405-005 | BLOCKED | Blocked on Action 4: vendored deps representation contract (identity/scope vs metadata-only). | 2025-12-13 |
|
||||
| SCAN-PY-405-006 | BLOCKED | Blocked on Interlock 4: "used-by-entrypoint" semantics (avoid turning heuristics into truth). | 2025-12-13 |
|
||||
| SCAN-PY-405-007 | BLOCKED | Blocked on Actions 2-4: fixtures for includes/editables, overlay/whiteouts, vendoring. | 2025-12-13 |
|
||||
| SCAN-PY-405-008 | DONE | Docs + deterministic offline bench for Python analyzer contract. | 2025-12-13 |
|
||||
Reference in New Issue
Block a user