feat(python-analyzer): Enhance deterministic output tests and add new fixtures

- Updated TASKS.md to reflect changes in test fixtures for SCAN-PY-405-007.
- Added multiple test cases to ensure deterministic output for various Python package scenarios, including conda environments, requirements files, and vendored directories.
- Created new expected output files for conda packages (numpy, requests) and updated existing test fixtures for container whiteouts, wheel workspaces, and zipapp embedded requirements.
- Introduced helper methods to create wheel and zipapp packages for testing purposes.
- Added metadata files for new test fixtures to validate package detection and dependencies.
This commit is contained in:
StellaOps Bot
2025-12-21 17:51:19 +02:00
parent 22d67f203f
commit 292a6e94e8
29 changed files with 1043 additions and 25 deletions

View File

@@ -88,7 +88,12 @@ internal sealed partial class ContainerOverlayHandler
isComplete = false;
}
var layerFiles = enumerateFiles(layer.Path).ToList();
var layerRoot = NormalizePath(layer.Path);
var layerFiles = enumerateFiles(layer.Path)
.Select(file => NormalizeLayerRelativePath(layerRoot, file))
.Where(static file => file is not null)
.Cast<string>()
.ToList();
// First pass: collect whiteouts and opaque markers
var layerWhiteouts = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
@@ -121,6 +126,16 @@ internal sealed partial class ContainerOverlayHandler
: targetName;
layerWhiteouts.Add(targetPath);
// Whiteouts can target files or directories. If a directory is whited out,
// all previously-visible files under it must be removed.
var toRemove = visiblePaths.Where(p => IsUnderDirectory(p, targetPath)).ToList();
foreach (var path in toRemove)
{
visiblePaths.Remove(path);
whiteoutedPaths.Add(path);
}
visiblePaths.Remove(targetPath);
whiteoutedPaths.Add(targetPath);
}
@@ -163,6 +178,32 @@ internal sealed partial class ContainerOverlayHandler
warning);
}
private static string? NormalizeLayerRelativePath(string normalizedLayerRoot, string path)
{
var normalized = NormalizePath(path);
if (normalized.Length == 0)
{
return null;
}
if (!normalized.StartsWith(normalizedLayerRoot, StringComparison.OrdinalIgnoreCase))
{
return null;
}
if (normalized.Length == normalizedLayerRoot.Length)
{
return string.Empty;
}
if (normalized[normalizedLayerRoot.Length] != '/')
{
return null;
}
return normalized[(normalizedLayerRoot.Length + 1)..];
}
/// <summary>
/// Checks if a path would be visible after overlay processing.
/// </summary>

View File

@@ -160,29 +160,30 @@ internal static partial class VendoredPackageDetector
// with the same name as the package (normalized to lowercase with underscores).
// E.g., dist-info at "site-packages/pip-23.0.dist-info" means package at "site-packages/pip/"
string? baseDir = null;
var baseDir = string.Empty;
if (!string.IsNullOrEmpty(package.MetadataPath))
{
// Get the directory containing dist-info (usually site-packages)
baseDir = Path.GetDirectoryName(package.MetadataPath);
// In the Python VFS, MetadataPath may be just "<name>-<version>.dist-info" (rooted at VFS root).
// Treat missing directory name as VFS root.
baseDir = Path.GetDirectoryName(package.MetadataPath) ?? string.Empty;
}
else if (!string.IsNullOrEmpty(package.Location))
{
baseDir = package.Location;
}
if (string.IsNullOrEmpty(baseDir))
{
return null;
}
// The package directory is baseDir + package module name
// Use the first top-level module if available, otherwise use the normalized package name
var moduleName = package.TopLevelModules.Length > 0
? package.TopLevelModules[0]
: package.NormalizedName;
if (string.IsNullOrWhiteSpace(moduleName))
{
return null;
}
return Path.Combine(baseDir, moduleName).Replace('\\', '/');
}

View File

@@ -27,6 +27,8 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
var matchedLocks = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var hasLockEntries = lockData.Entries.Count > 0;
var containerOverlay = TryBuildContainerOverlay(context.RootPath);
// Detect Python runtime in container layers
var runtimeInfo = PythonContainerAdapter.DetectRuntime(context.RootPath);
@@ -44,6 +46,7 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
var packageDiscovery = new PythonPackageDiscovery();
var discoveryResult = await packageDiscovery.DiscoverAsync(vfs, cancellationToken).ConfigureAwait(false);
var vendoringByPackage = await BuildVendoringMapAsync(vfs, discoveryResult, cancellationToken).ConfigureAwait(false);
foreach (var package in discoveryResult.Packages
.Where(static p => !string.IsNullOrWhiteSpace(p.Version))
@@ -52,6 +55,8 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
{
cancellationToken.ThrowIfCancellationRequested();
vendoringByPackage.TryGetValue(package.NormalizedName, out var vendoringAnalysis);
await EmitDiscoveredPackageAsync(
context,
writer,
@@ -64,6 +69,8 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
environment,
startupHooks,
zipappAnalysis,
containerOverlay,
vendoringAnalysis,
cancellationToken)
.ConfigureAwait(false);
}
@@ -221,6 +228,8 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
PythonEnvironment environment,
PythonStartupHooks startupHooks,
PythonZipappAnalysis zipappAnalysis,
ContainerOverlayHandler.OverlayResult? containerOverlay,
VendoringAnalysis? vendoringAnalysis,
CancellationToken cancellationToken)
{
var version = package.Version!.Trim();
@@ -243,6 +252,21 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
}
var metadataDirectory = TryResolvePhysicalMetadataDirectory(vfs, package, out var metadataFile);
if (vendoringAnalysis is not null)
{
metadata.AddRange(VendoringMetadataBuilder.BuildParentMetadata(vendoringAnalysis));
}
if (metadataFile is not null &&
!metadataFile.IsFromArchive &&
containerOverlay is not null &&
TryGetContainerOverlayPath(containerOverlay, metadataFile.AbsolutePath, out var overlayPath) &&
!ContainerOverlayHandler.IsPathVisible(containerOverlay, overlayPath))
{
return;
}
if (metadataDirectory is not null)
{
PythonDistribution? distribution;
@@ -289,6 +313,7 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
evidence: evidence,
usedByEntrypoint: distribution.UsedByEntrypoint);
EmitVendoredEmbeddedComponents(context, writer, vfs, vendoringAnalysis, version);
return;
}
@@ -313,6 +338,7 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
evidence: archiveDistribution.SortedEvidence,
usedByEntrypoint: archiveDistribution.UsedByEntrypoint);
EmitVendoredEmbeddedComponents(context, writer, vfs, vendoringAnalysis, version);
return;
}
}
@@ -329,6 +355,8 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
metadata: metadata,
evidence: evidenceFallback,
usedByEntrypoint: false);
EmitVendoredEmbeddedComponents(context, writer, vfs, vendoringAnalysis, version);
}
private static string? TryResolvePhysicalMetadataDirectory(
@@ -363,6 +391,13 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
var location = package.Location;
if (string.IsNullOrWhiteSpace(location) && !string.IsNullOrWhiteSpace(package.MetadataPath))
{
if (package.Kind == PythonPackageKind.Conda)
{
var normalizedPath = package.MetadataPath.Replace('\\', '/').Trim('/');
var lastSlash = normalizedPath.LastIndexOf('/');
location = lastSlash > 0 ? normalizedPath[..lastSlash] : normalizedPath;
}
var metadataName = package.Kind == PythonPackageKind.Egg ? "PKG-INFO" : "METADATA";
var file = vfs.GetFile($"{package.MetadataPath}/{metadataName}");
@@ -392,6 +427,19 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
PythonPackageInfo package,
PythonVirtualFile? metadataFile)
{
if (package.Kind == PythonPackageKind.Conda && !string.IsNullOrWhiteSpace(package.MetadataPath))
{
return new[]
{
new LanguageComponentEvidence(
LanguageEvidenceKind.File,
"conda-meta",
package.MetadataPath.Replace('\\', '/').TrimStart('/'),
Value: null,
Sha256: null)
};
}
if (metadataFile is not null)
{
var locator = metadataFile.IsFromArchive && metadataFile.ArchivePath is not null
@@ -555,6 +603,27 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
metadata.Add(new KeyValuePair<string, string?>("zipapps.detected", "true"));
metadata.Add(new KeyValuePair<string, string?>("zipapps.count", zipappAnalysis.Zipapps.Count.ToString()));
var embeddedDeps = zipappAnalysis.Zipapps
.SelectMany(static z => z.EmbeddedDependencies)
.Select(static d => d.Trim())
.Where(static d => d.Length > 0)
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(static d => d, StringComparer.Ordinal)
.ToArray();
if (embeddedDeps.Length > 0)
{
metadata.Add(new KeyValuePair<string, string?>("zipapps.embeddedDeps.count", embeddedDeps.Length.ToString()));
var sample = embeddedDeps.Take(12).ToArray();
metadata.Add(new KeyValuePair<string, string?>("zipapps.embeddedDeps.sample", string.Join(';', sample)));
if (embeddedDeps.Length > sample.Length)
{
metadata.Add(new KeyValuePair<string, string?>("zipapps.embeddedDeps.sampleTruncated", "true"));
}
}
// Add version information from zipapp shebangs
var versions = zipappAnalysis.Zipapps
.Where(z => z.PythonVersion != null)
@@ -583,4 +652,229 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
}
}
}
private static async Task<Dictionary<string, VendoringAnalysis>> BuildVendoringMapAsync(
PythonVirtualFileSystem vfs,
PythonPackageDiscoveryResult discoveryResult,
CancellationToken cancellationToken)
{
var results = await VendoredPackageDetector.AnalyzeAllAsync(vfs, discoveryResult, cancellationToken).ConfigureAwait(false);
var map = new Dictionary<string, VendoringAnalysis>(StringComparer.OrdinalIgnoreCase);
foreach (var analysis in results)
{
var key = PythonPackageInfo.NormalizeName(analysis.PackageName);
map[key] = analysis;
}
return map;
}
private static ContainerOverlayHandler.OverlayResult? TryBuildContainerOverlay(string rootPath)
{
IReadOnlyList<ContainerOverlayHandler.LayerInfo> layers;
try
{
layers = ContainerOverlayHandler.DiscoverLayers(rootPath);
}
catch
{
return null;
}
if (layers.Count == 0)
{
return null;
}
try
{
return ContainerOverlayHandler.ProcessLayers(layers, EnumerateOverlayRelevantFiles);
}
catch
{
return null;
}
}
private static IEnumerable<string> EnumerateOverlayRelevantFiles(string layerPath)
{
if (string.IsNullOrWhiteSpace(layerPath) || !Directory.Exists(layerPath))
{
yield break;
}
var options = new EnumerationOptions
{
RecurseSubdirectories = true,
IgnoreInaccessible = true,
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint
};
var results = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
try
{
foreach (var file in Directory.EnumerateFiles(layerPath, ".wh.*", options))
{
results.Add(file);
}
}
catch (IOException)
{
}
catch (UnauthorizedAccessException)
{
}
try
{
foreach (var file in Directory.EnumerateFiles(layerPath, "METADATA", options))
{
if (file.Contains(".dist-info", StringComparison.OrdinalIgnoreCase))
{
results.Add(file);
}
}
}
catch (IOException)
{
}
catch (UnauthorizedAccessException)
{
}
try
{
foreach (var file in Directory.EnumerateFiles(layerPath, "PKG-INFO", options))
{
if (file.Contains(".egg-info", StringComparison.OrdinalIgnoreCase))
{
results.Add(file);
}
}
}
catch (IOException)
{
}
catch (UnauthorizedAccessException)
{
}
foreach (var result in results.OrderBy(static path => path, StringComparer.OrdinalIgnoreCase))
{
yield return result;
}
}
private static bool TryGetContainerOverlayPath(ContainerOverlayHandler.OverlayResult overlay, string absolutePath, out string overlayPath)
{
var normalized = absolutePath.Replace('\\', '/').TrimEnd('/');
foreach (var layer in overlay.ProcessedLayers)
{
var layerPath = layer.Path.Replace('\\', '/').TrimEnd('/');
if (normalized.StartsWith(layerPath + "/", StringComparison.OrdinalIgnoreCase))
{
overlayPath = normalized[(layerPath.Length + 1)..];
return true;
}
}
overlayPath = string.Empty;
return false;
}
private static void EmitVendoredEmbeddedComponents(
LanguageAnalyzerContext context,
LanguageComponentWriter writer,
PythonVirtualFileSystem vfs,
VendoringAnalysis? vendoringAnalysis,
string? parentVersion)
{
if (vendoringAnalysis is null)
{
return;
}
var embeddedToEmit = VendoringMetadataBuilder.GetEmbeddedToEmitSeparately(vendoringAnalysis, parentVersion);
if (embeddedToEmit.Count == 0)
{
return;
}
foreach (var embedded in embeddedToEmit)
{
var embeddedMetadata = VendoringMetadataBuilder.BuildEmbeddedMetadata(embedded, parentVersion, vendoringAnalysis.Confidence);
var evidence = TryBuildVendoredEvidence(context, vfs, embedded.Path);
var componentKey = LanguageExplicitKey.Create(
analyzerId: "python",
ecosystem: "pypi",
name: PythonPathHelper.NormalizePackageName(embedded.Name),
spec: embedded.Version ?? string.Empty,
originLocator: embedded.Path);
writer.AddFromExplicitKey(
analyzerId: "python",
componentKey: componentKey,
purl: embedded.Purl,
name: embedded.Name,
version: embedded.Version,
type: "pypi",
metadata: embeddedMetadata,
evidence: evidence,
usedByEntrypoint: false);
}
}
private static IReadOnlyCollection<LanguageComponentEvidence>? TryBuildVendoredEvidence(
LanguageAnalyzerContext context,
PythonVirtualFileSystem vfs,
string embeddedPath)
{
var candidates = new[]
{
$"{embeddedPath}/__init__.py",
$"{embeddedPath}/_version.py",
$"{embeddedPath}/version.py",
$"{embeddedPath}/__version__.py",
embeddedPath + ".py"
};
foreach (var candidate in candidates)
{
var file = vfs.GetFile(candidate);
if (file is null)
{
continue;
}
if (file.IsFromArchive && file.ArchivePath is not null)
{
return new[]
{
new LanguageComponentEvidence(
LanguageEvidenceKind.File,
"vendored",
PythonPathHelper.NormalizeRelative(context, file.ArchivePath),
Value: file.AbsolutePath,
Sha256: null)
};
}
return new[]
{
new LanguageComponentEvidence(
LanguageEvidenceKind.File,
"vendored",
PythonPathHelper.NormalizeRelative(context, file.AbsolutePath),
Value: null,
Sha256: null)
};
}
return null;
}
}

View File

@@ -10,7 +10,7 @@
| SCAN-PY-405-004 | DONE | Container overlay contract implemented: OCI whiteout semantics (`.wh.*`, `.wh..wh..opq`), deterministic layer ordering, `container.overlayIncomplete` metadata marker. | 2025-12-13 |
| SCAN-PY-405-005 | DONE | Vendoring integration: `VendoringMetadataBuilder` for parent metadata + embedded components with High confidence. | 2025-12-13 |
| SCAN-PY-405-006 | DONE | Scope classification added (prod/dev/docs/build) from lock sections and file names per Interlock 4. Usage signals remain default. | 2025-12-13 |
| SCAN-PY-405-007 | DONE | Added test fixtures for includes, Pipfile.lock develop, scope classification, PEP 508 direct refs, cycle detection. | 2025-12-13 |
| SCAN-PY-405-007 | DONE | Added deterministic fixtures + goldens: conda-meta env, requirements includes+editable, Pipfile.lock default+develop, wheel workspace, zipapp embedded requirements, container whiteouts, and vendored directories. | 2025-12-21 |
| SCAN-PY-405-008 | DONE | Docs + deterministic offline bench for Python analyzer contract. | 2025-12-13 |
## Completed Contracts (Action Decisions 2025-12-13)