feat(python-analyzer): Enhance deterministic output tests and add new fixtures
- Updated TASKS.md to reflect changes in test fixtures for SCAN-PY-405-007. - Added multiple test cases to ensure deterministic output for various Python package scenarios, including conda environments, requirements files, and vendored directories. - Created new expected output files for conda packages (numpy, requests) and updated existing test fixtures for container whiteouts, wheel workspaces, and zipapp embedded requirements. - Introduced helper methods to create wheel and zipapp packages for testing purposes. - Added metadata files for new test fixtures to validate package detection and dependencies.
This commit is contained in:
@@ -88,7 +88,12 @@ internal sealed partial class ContainerOverlayHandler
|
||||
isComplete = false;
|
||||
}
|
||||
|
||||
var layerFiles = enumerateFiles(layer.Path).ToList();
|
||||
var layerRoot = NormalizePath(layer.Path);
|
||||
var layerFiles = enumerateFiles(layer.Path)
|
||||
.Select(file => NormalizeLayerRelativePath(layerRoot, file))
|
||||
.Where(static file => file is not null)
|
||||
.Cast<string>()
|
||||
.ToList();
|
||||
|
||||
// First pass: collect whiteouts and opaque markers
|
||||
var layerWhiteouts = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
@@ -121,6 +126,16 @@ internal sealed partial class ContainerOverlayHandler
|
||||
: targetName;
|
||||
|
||||
layerWhiteouts.Add(targetPath);
|
||||
|
||||
// Whiteouts can target files or directories. If a directory is whited out,
|
||||
// all previously-visible files under it must be removed.
|
||||
var toRemove = visiblePaths.Where(p => IsUnderDirectory(p, targetPath)).ToList();
|
||||
foreach (var path in toRemove)
|
||||
{
|
||||
visiblePaths.Remove(path);
|
||||
whiteoutedPaths.Add(path);
|
||||
}
|
||||
|
||||
visiblePaths.Remove(targetPath);
|
||||
whiteoutedPaths.Add(targetPath);
|
||||
}
|
||||
@@ -163,6 +178,32 @@ internal sealed partial class ContainerOverlayHandler
|
||||
warning);
|
||||
}
|
||||
|
||||
private static string? NormalizeLayerRelativePath(string normalizedLayerRoot, string path)
|
||||
{
|
||||
var normalized = NormalizePath(path);
|
||||
if (normalized.Length == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!normalized.StartsWith(normalizedLayerRoot, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (normalized.Length == normalizedLayerRoot.Length)
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
if (normalized[normalizedLayerRoot.Length] != '/')
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return normalized[(normalizedLayerRoot.Length + 1)..];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if a path would be visible after overlay processing.
|
||||
/// </summary>
|
||||
|
||||
@@ -160,29 +160,30 @@ internal static partial class VendoredPackageDetector
|
||||
// with the same name as the package (normalized to lowercase with underscores).
|
||||
// E.g., dist-info at "site-packages/pip-23.0.dist-info" means package at "site-packages/pip/"
|
||||
|
||||
string? baseDir = null;
|
||||
var baseDir = string.Empty;
|
||||
|
||||
if (!string.IsNullOrEmpty(package.MetadataPath))
|
||||
{
|
||||
// Get the directory containing dist-info (usually site-packages)
|
||||
baseDir = Path.GetDirectoryName(package.MetadataPath);
|
||||
// In the Python VFS, MetadataPath may be just "<name>-<version>.dist-info" (rooted at VFS root).
|
||||
// Treat missing directory name as VFS root.
|
||||
baseDir = Path.GetDirectoryName(package.MetadataPath) ?? string.Empty;
|
||||
}
|
||||
else if (!string.IsNullOrEmpty(package.Location))
|
||||
{
|
||||
baseDir = package.Location;
|
||||
}
|
||||
|
||||
if (string.IsNullOrEmpty(baseDir))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// The package directory is baseDir + package module name
|
||||
// Use the first top-level module if available, otherwise use the normalized package name
|
||||
var moduleName = package.TopLevelModules.Length > 0
|
||||
? package.TopLevelModules[0]
|
||||
: package.NormalizedName;
|
||||
|
||||
if (string.IsNullOrWhiteSpace(moduleName))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return Path.Combine(baseDir, moduleName).Replace('\\', '/');
|
||||
}
|
||||
|
||||
|
||||
@@ -27,6 +27,8 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
var matchedLocks = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
var hasLockEntries = lockData.Entries.Count > 0;
|
||||
|
||||
var containerOverlay = TryBuildContainerOverlay(context.RootPath);
|
||||
|
||||
// Detect Python runtime in container layers
|
||||
var runtimeInfo = PythonContainerAdapter.DetectRuntime(context.RootPath);
|
||||
|
||||
@@ -44,6 +46,7 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
|
||||
var packageDiscovery = new PythonPackageDiscovery();
|
||||
var discoveryResult = await packageDiscovery.DiscoverAsync(vfs, cancellationToken).ConfigureAwait(false);
|
||||
var vendoringByPackage = await BuildVendoringMapAsync(vfs, discoveryResult, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
foreach (var package in discoveryResult.Packages
|
||||
.Where(static p => !string.IsNullOrWhiteSpace(p.Version))
|
||||
@@ -52,6 +55,8 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
vendoringByPackage.TryGetValue(package.NormalizedName, out var vendoringAnalysis);
|
||||
|
||||
await EmitDiscoveredPackageAsync(
|
||||
context,
|
||||
writer,
|
||||
@@ -64,6 +69,8 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
environment,
|
||||
startupHooks,
|
||||
zipappAnalysis,
|
||||
containerOverlay,
|
||||
vendoringAnalysis,
|
||||
cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
@@ -221,6 +228,8 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
PythonEnvironment environment,
|
||||
PythonStartupHooks startupHooks,
|
||||
PythonZipappAnalysis zipappAnalysis,
|
||||
ContainerOverlayHandler.OverlayResult? containerOverlay,
|
||||
VendoringAnalysis? vendoringAnalysis,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var version = package.Version!.Trim();
|
||||
@@ -243,6 +252,21 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
}
|
||||
|
||||
var metadataDirectory = TryResolvePhysicalMetadataDirectory(vfs, package, out var metadataFile);
|
||||
|
||||
if (vendoringAnalysis is not null)
|
||||
{
|
||||
metadata.AddRange(VendoringMetadataBuilder.BuildParentMetadata(vendoringAnalysis));
|
||||
}
|
||||
|
||||
if (metadataFile is not null &&
|
||||
!metadataFile.IsFromArchive &&
|
||||
containerOverlay is not null &&
|
||||
TryGetContainerOverlayPath(containerOverlay, metadataFile.AbsolutePath, out var overlayPath) &&
|
||||
!ContainerOverlayHandler.IsPathVisible(containerOverlay, overlayPath))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (metadataDirectory is not null)
|
||||
{
|
||||
PythonDistribution? distribution;
|
||||
@@ -289,6 +313,7 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
evidence: evidence,
|
||||
usedByEntrypoint: distribution.UsedByEntrypoint);
|
||||
|
||||
EmitVendoredEmbeddedComponents(context, writer, vfs, vendoringAnalysis, version);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -313,6 +338,7 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
evidence: archiveDistribution.SortedEvidence,
|
||||
usedByEntrypoint: archiveDistribution.UsedByEntrypoint);
|
||||
|
||||
EmitVendoredEmbeddedComponents(context, writer, vfs, vendoringAnalysis, version);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -329,6 +355,8 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
metadata: metadata,
|
||||
evidence: evidenceFallback,
|
||||
usedByEntrypoint: false);
|
||||
|
||||
EmitVendoredEmbeddedComponents(context, writer, vfs, vendoringAnalysis, version);
|
||||
}
|
||||
|
||||
private static string? TryResolvePhysicalMetadataDirectory(
|
||||
@@ -363,6 +391,13 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
var location = package.Location;
|
||||
if (string.IsNullOrWhiteSpace(location) && !string.IsNullOrWhiteSpace(package.MetadataPath))
|
||||
{
|
||||
if (package.Kind == PythonPackageKind.Conda)
|
||||
{
|
||||
var normalizedPath = package.MetadataPath.Replace('\\', '/').Trim('/');
|
||||
var lastSlash = normalizedPath.LastIndexOf('/');
|
||||
location = lastSlash > 0 ? normalizedPath[..lastSlash] : normalizedPath;
|
||||
}
|
||||
|
||||
var metadataName = package.Kind == PythonPackageKind.Egg ? "PKG-INFO" : "METADATA";
|
||||
var file = vfs.GetFile($"{package.MetadataPath}/{metadataName}");
|
||||
|
||||
@@ -392,6 +427,19 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
PythonPackageInfo package,
|
||||
PythonVirtualFile? metadataFile)
|
||||
{
|
||||
if (package.Kind == PythonPackageKind.Conda && !string.IsNullOrWhiteSpace(package.MetadataPath))
|
||||
{
|
||||
return new[]
|
||||
{
|
||||
new LanguageComponentEvidence(
|
||||
LanguageEvidenceKind.File,
|
||||
"conda-meta",
|
||||
package.MetadataPath.Replace('\\', '/').TrimStart('/'),
|
||||
Value: null,
|
||||
Sha256: null)
|
||||
};
|
||||
}
|
||||
|
||||
if (metadataFile is not null)
|
||||
{
|
||||
var locator = metadataFile.IsFromArchive && metadataFile.ArchivePath is not null
|
||||
@@ -555,6 +603,27 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
metadata.Add(new KeyValuePair<string, string?>("zipapps.detected", "true"));
|
||||
metadata.Add(new KeyValuePair<string, string?>("zipapps.count", zipappAnalysis.Zipapps.Count.ToString()));
|
||||
|
||||
var embeddedDeps = zipappAnalysis.Zipapps
|
||||
.SelectMany(static z => z.EmbeddedDependencies)
|
||||
.Select(static d => d.Trim())
|
||||
.Where(static d => d.Length > 0)
|
||||
.Distinct(StringComparer.OrdinalIgnoreCase)
|
||||
.OrderBy(static d => d, StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
|
||||
if (embeddedDeps.Length > 0)
|
||||
{
|
||||
metadata.Add(new KeyValuePair<string, string?>("zipapps.embeddedDeps.count", embeddedDeps.Length.ToString()));
|
||||
|
||||
var sample = embeddedDeps.Take(12).ToArray();
|
||||
metadata.Add(new KeyValuePair<string, string?>("zipapps.embeddedDeps.sample", string.Join(';', sample)));
|
||||
|
||||
if (embeddedDeps.Length > sample.Length)
|
||||
{
|
||||
metadata.Add(new KeyValuePair<string, string?>("zipapps.embeddedDeps.sampleTruncated", "true"));
|
||||
}
|
||||
}
|
||||
|
||||
// Add version information from zipapp shebangs
|
||||
var versions = zipappAnalysis.Zipapps
|
||||
.Where(z => z.PythonVersion != null)
|
||||
@@ -583,4 +652,229 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<Dictionary<string, VendoringAnalysis>> BuildVendoringMapAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
PythonPackageDiscoveryResult discoveryResult,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var results = await VendoredPackageDetector.AnalyzeAllAsync(vfs, discoveryResult, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var map = new Dictionary<string, VendoringAnalysis>(StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var analysis in results)
|
||||
{
|
||||
var key = PythonPackageInfo.NormalizeName(analysis.PackageName);
|
||||
map[key] = analysis;
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
private static ContainerOverlayHandler.OverlayResult? TryBuildContainerOverlay(string rootPath)
|
||||
{
|
||||
IReadOnlyList<ContainerOverlayHandler.LayerInfo> layers;
|
||||
try
|
||||
{
|
||||
layers = ContainerOverlayHandler.DiscoverLayers(rootPath);
|
||||
}
|
||||
catch
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (layers.Count == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
return ContainerOverlayHandler.ProcessLayers(layers, EnumerateOverlayRelevantFiles);
|
||||
}
|
||||
catch
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static IEnumerable<string> EnumerateOverlayRelevantFiles(string layerPath)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(layerPath) || !Directory.Exists(layerPath))
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
var options = new EnumerationOptions
|
||||
{
|
||||
RecurseSubdirectories = true,
|
||||
IgnoreInaccessible = true,
|
||||
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint
|
||||
};
|
||||
|
||||
var results = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
try
|
||||
{
|
||||
foreach (var file in Directory.EnumerateFiles(layerPath, ".wh.*", options))
|
||||
{
|
||||
results.Add(file);
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
foreach (var file in Directory.EnumerateFiles(layerPath, "METADATA", options))
|
||||
{
|
||||
if (file.Contains(".dist-info", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
results.Add(file);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
foreach (var file in Directory.EnumerateFiles(layerPath, "PKG-INFO", options))
|
||||
{
|
||||
if (file.Contains(".egg-info", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
results.Add(file);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
}
|
||||
|
||||
foreach (var result in results.OrderBy(static path => path, StringComparer.OrdinalIgnoreCase))
|
||||
{
|
||||
yield return result;
|
||||
}
|
||||
}
|
||||
|
||||
private static bool TryGetContainerOverlayPath(ContainerOverlayHandler.OverlayResult overlay, string absolutePath, out string overlayPath)
|
||||
{
|
||||
var normalized = absolutePath.Replace('\\', '/').TrimEnd('/');
|
||||
|
||||
foreach (var layer in overlay.ProcessedLayers)
|
||||
{
|
||||
var layerPath = layer.Path.Replace('\\', '/').TrimEnd('/');
|
||||
|
||||
if (normalized.StartsWith(layerPath + "/", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
overlayPath = normalized[(layerPath.Length + 1)..];
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
overlayPath = string.Empty;
|
||||
return false;
|
||||
}
|
||||
|
||||
private static void EmitVendoredEmbeddedComponents(
|
||||
LanguageAnalyzerContext context,
|
||||
LanguageComponentWriter writer,
|
||||
PythonVirtualFileSystem vfs,
|
||||
VendoringAnalysis? vendoringAnalysis,
|
||||
string? parentVersion)
|
||||
{
|
||||
if (vendoringAnalysis is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var embeddedToEmit = VendoringMetadataBuilder.GetEmbeddedToEmitSeparately(vendoringAnalysis, parentVersion);
|
||||
if (embeddedToEmit.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var embedded in embeddedToEmit)
|
||||
{
|
||||
var embeddedMetadata = VendoringMetadataBuilder.BuildEmbeddedMetadata(embedded, parentVersion, vendoringAnalysis.Confidence);
|
||||
var evidence = TryBuildVendoredEvidence(context, vfs, embedded.Path);
|
||||
|
||||
var componentKey = LanguageExplicitKey.Create(
|
||||
analyzerId: "python",
|
||||
ecosystem: "pypi",
|
||||
name: PythonPathHelper.NormalizePackageName(embedded.Name),
|
||||
spec: embedded.Version ?? string.Empty,
|
||||
originLocator: embedded.Path);
|
||||
|
||||
writer.AddFromExplicitKey(
|
||||
analyzerId: "python",
|
||||
componentKey: componentKey,
|
||||
purl: embedded.Purl,
|
||||
name: embedded.Name,
|
||||
version: embedded.Version,
|
||||
type: "pypi",
|
||||
metadata: embeddedMetadata,
|
||||
evidence: evidence,
|
||||
usedByEntrypoint: false);
|
||||
}
|
||||
}
|
||||
|
||||
private static IReadOnlyCollection<LanguageComponentEvidence>? TryBuildVendoredEvidence(
|
||||
LanguageAnalyzerContext context,
|
||||
PythonVirtualFileSystem vfs,
|
||||
string embeddedPath)
|
||||
{
|
||||
var candidates = new[]
|
||||
{
|
||||
$"{embeddedPath}/__init__.py",
|
||||
$"{embeddedPath}/_version.py",
|
||||
$"{embeddedPath}/version.py",
|
||||
$"{embeddedPath}/__version__.py",
|
||||
embeddedPath + ".py"
|
||||
};
|
||||
|
||||
foreach (var candidate in candidates)
|
||||
{
|
||||
var file = vfs.GetFile(candidate);
|
||||
if (file is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (file.IsFromArchive && file.ArchivePath is not null)
|
||||
{
|
||||
return new[]
|
||||
{
|
||||
new LanguageComponentEvidence(
|
||||
LanguageEvidenceKind.File,
|
||||
"vendored",
|
||||
PythonPathHelper.NormalizeRelative(context, file.ArchivePath),
|
||||
Value: file.AbsolutePath,
|
||||
Sha256: null)
|
||||
};
|
||||
}
|
||||
|
||||
return new[]
|
||||
{
|
||||
new LanguageComponentEvidence(
|
||||
LanguageEvidenceKind.File,
|
||||
"vendored",
|
||||
PythonPathHelper.NormalizeRelative(context, file.AbsolutePath),
|
||||
Value: null,
|
||||
Sha256: null)
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
| SCAN-PY-405-004 | DONE | Container overlay contract implemented: OCI whiteout semantics (`.wh.*`, `.wh..wh..opq`), deterministic layer ordering, `container.overlayIncomplete` metadata marker. | 2025-12-13 |
|
||||
| SCAN-PY-405-005 | DONE | Vendoring integration: `VendoringMetadataBuilder` for parent metadata + embedded components with High confidence. | 2025-12-13 |
|
||||
| SCAN-PY-405-006 | DONE | Scope classification added (prod/dev/docs/build) from lock sections and file names per Interlock 4. Usage signals remain default. | 2025-12-13 |
|
||||
| SCAN-PY-405-007 | DONE | Added test fixtures for includes, Pipfile.lock develop, scope classification, PEP 508 direct refs, cycle detection. | 2025-12-13 |
|
||||
| SCAN-PY-405-007 | DONE | Added deterministic fixtures + goldens: conda-meta env, requirements includes+editable, Pipfile.lock default+develop, wheel workspace, zipapp embedded requirements, container whiteouts, and vendored directories. | 2025-12-21 |
|
||||
| SCAN-PY-405-008 | DONE | Docs + deterministic offline bench for Python analyzer contract. | 2025-12-13 |
|
||||
|
||||
## Completed Contracts (Action Decisions 2025-12-13)
|
||||
|
||||
Reference in New Issue
Block a user