diff --git a/docs/implplan/archived/SPRINT_0170_0001_0001_notifications_telemetry.md b/docs/implplan/archived/SPRINT_0170_0001_0001_notifications_telemetry.md index 8b7b998dc..c234c71a1 100644 --- a/docs/implplan/archived/SPRINT_0170_0001_0001_notifications_telemetry.md +++ b/docs/implplan/archived/SPRINT_0170_0001_0001_notifications_telemetry.md @@ -96,10 +96,10 @@ ## Interlocks (External Dependencies) | Dependency | Source sprint / doc | Current state | Impact on waves | | --- | --- | --- | --- | -| Sprint 150.A – Orchestrator (wave table) | `SPRINT_150_scheduling_automation.md` | TODO | Blocks visibility of job events for Notify templates and Telemetry samples until orchestration telemetry lands. | -| ORCH-OBS-50-001 `orchestrator instrumentation` | Sprint 150 backlog | TODO | Needed for Telemetry.Core sample + Notify SLO hooks; monitor for slip. | -| POLICY-OBS-50-001 `policy instrumentation` | Sprint 150 backlog | TODO | Required before Telemetry helpers can be adopted by Policy + risk routing. | -| WEB-OBS-50-001 `gateway telemetry core adoption` | Sprint 214/215 backlogs | TODO | Ensures web/gateway emits trace IDs that Notify incident payload references. | +| Sprint 150.A – Orchestrator (wave table) | `docs/implplan/archived/SPRINT_0150_0001_0001_scheduling_automation.md` | DONE (2025-12-10) | Unblocked: orchestration baseline landed; job/telemetry events available for Notify templates and Telemetry samples. | +| ORCH-OBS-50-001 `orchestrator instrumentation` | `docs/implplan/archived/SPRINT_0151_0001_0001_orchestrator_i.md` | DONE (2025-12-10) | Telemetry.Core wiring complete; Notify SLO hooks and Telemetry.Core sample integration unblocked. | +| POLICY-OBS-50-001 `policy instrumentation` | `docs/implplan/archived/SPRINT_0127_0001_0001_policy_reasoning.md` | DONE (2025-11-27) | Telemetry helpers available for Policy + risk routing adoption. | +| WEB-OBS-50-001 `gateway telemetry core adoption` | `docs/implplan/archived/SPRINT_0214_0001_0001_web_iii.md` | DONE (2025-12-11) | Gateway emits trace IDs; Notify incident payloads can reference end-to-end trace context. | | POLICY-RISK-40-002 `risk profile metadata export` | Sprint 215+ (Policy) | DONE (2025-12-04) | Provides metadata enrichment for NOTIFY-RISK routes; unblocked. | ## Upcoming Checkpoints (historical) @@ -149,3 +149,4 @@ | 2025-12-05 | Merged legacy sprint content into canonical template, refreshed statuses to DONE, and reconfirmed external dependency states; legacy file stubbed to point here. | Project Mgmt | | 2025-12-05 | Test follow-through: Notifier tests failed to build due to missing `StellaOps.Notify.Storage.Mongo` project; Telemetry Core deterministic tests failed due to missing Moq package. Actions added to tracker (#2, #3); statuses remain DONE pending evidence. | Implementer | | 2025-12-06 | Telemetry Core tests verified GREEN; Moq restored from curated feed; evidence path recorded. Action tracker #3/#4 closed. | Telemetry Core Guild | +| 2025-12-21 | Refreshed Interlocks (External Dependencies) table with upstream sprint outcomes; removed stale TODO rows (Orchestrator/Policy/Web telemetry adoption now DONE). | Implementer | diff --git a/docs/implplan/archived/SPRINT_0405_0001_0001_scanner_python_detection_gaps.md b/docs/implplan/archived/SPRINT_0405_0001_0001_scanner_python_detection_gaps.md index 546d0a09e..73040febf 100644 --- a/docs/implplan/archived/SPRINT_0405_0001_0001_scanner_python_detection_gaps.md +++ b/docs/implplan/archived/SPRINT_0405_0001_0001_scanner_python_detection_gaps.md @@ -26,7 +26,7 @@ | 4 | SCAN-PY-405-004 | DONE | Whiteout/overlay semantics implemented in `ContainerOverlayHandler` + `ContainerLayerAdapter`. | Python Analyzer Guild (`src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Python`) | **Correct container-layer inventory semantics**: when scanning raw OCI layer trees (`layers/`, `.layers/`, `layer*/`), honor whiteouts/overlay ordering so removed packages are not reported. Use/extend `Internal/Packaging/Adapters/ContainerLayerAdapter` semantics as the source of truth for precedence. Emit explicit metadata markers when inventory is partial due to missing overlay context (e.g., `container.overlayIncomplete=true`). | | 5 | SCAN-PY-405-005 | DONE | VendoredPackageDetector integrated; `VendoringMetadataBuilder` added. | Python Analyzer Guild (`src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Python`) | **Surface vendored (bundled) Python deps**: integrate `VendoredPackageDetector` so known vendoring patterns (`*_vendor`, `third_party`, `requests.packages`, etc.) are detected. Emit either (a) separate "embedded" components with bounded evidence locators (preferred) or (b) a bounded metadata summary on the parent package (`vendored.detected=true`, `vendored.packages`, `vendored.paths`). Never emit unbounded file/module lists; cap to top-N deterministic samples. | | 6 | SCAN-PY-405-006 | DONE | Scope classification added from lock entries (Scope enum) per Interlock 4. | Python Analyzer Guild (`src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Python`) | **Improve "used by entrypoint" and scope classification**: today `usedByEntrypoint` primarily comes from RECORD/script hints. Extend this by optionally mapping source-tree imports (`PythonImportAnalysis`) and/or runtime evidence (`PythonRuntimeEvidenceCollector`) to packages (via `TopLevelModules`) so "likely used" can be signaled deterministically (bounded, opt-in). Add `scope` metadata using `PythonScopeClassifier` (prod/dev/docs/build) based on lock sections and requirements file names. | -| 7 | SCAN-PY-405-007 | TODO | Core implementation complete; fixtures pending. | QA Guild (`src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests`) | **Fixtures + golden outputs**: add fixtures proving new detection paths: (a) conda env (`conda-meta/*.json`) without dist-info, (b) requirements with `-r` includes + `-e .` editable, (c) Pipfile.lock with `default` + `develop`, (d) wheel file in workspace (no extraction), (e) zipapp/pyz with embedded requirements, (f) container layers with whiteouts hiding a dist-info dir, (g) vendored dependency directory under a package. Extend `PythonLanguageAnalyzerTests.cs` to assert deterministic ordering, stable identities, and bounded metadata. | +| 7 | SCAN-PY-405-007 | DONE | Fixtures + goldens landed; tests pass. | QA Guild (`src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests`) | **Fixtures + golden outputs**: add fixtures proving new detection paths: (a) conda env (`conda-meta/*.json`) without dist-info, (b) requirements with `-r` includes + `-e .` editable, (c) Pipfile.lock with `default` + `develop`, (d) wheel file in workspace (no extraction), (e) zipapp/pyz with embedded requirements, (f) container layers with whiteouts hiding a dist-info dir, (g) vendored dependency directory under a package. Extend `PythonLanguageAnalyzerTests.cs` to assert deterministic ordering, stable identities, and bounded metadata. | | 8 | SCAN-PY-405-008 | DONE | After core behavior lands, update docs + perf guard. | Docs Guild + Bench Guild (`docs/modules/scanner`, `src/Bench/StellaOps.Bench/Scanner.Analyzers`) | **Document + benchmark Python analyzer contract**: update `docs/modules/scanner/architecture.md` (or add a Python analyzer sub-doc) describing detection sources & precedence, lock parsing rules, container overlay semantics, vendoring representation, and identity rules for non-versioned components. Add a deterministic offline bench scanning a representative fixture (many packages + lockfiles) and record baseline ceilings (time + components count). | ## Wave Coordination @@ -279,4 +279,6 @@ When import/runtime analysis contributes to usage signals: | 2025-12-13 | **Decided Actions 1-4 and Interlock 4** to unblock SCAN-PY-405-002 through SCAN-PY-405-007. Action 1: explicit-key identity scheme using `LanguageExplicitKey.Create`. Action 2: lock precedence order (poetry.lock > Pipfile.lock > pdm.lock > uv.lock > requirements.txt) with first-wins dedupe. Action 3: OCI whiteout semantics with deterministic layer ordering. Action 4: vendored deps emit parent metadata by default, separate components only with High confidence + known version. Interlock 4: usage/scope classification is opt-in, RECORD/entry_points signals remain default. | Implementer | | 2025-12-13 | Started implementation of SCAN-PY-405-002 through SCAN-PY-405-007 in parallel (all waves now unblocked). | Implementer | | 2025-12-13 | **Completed SCAN-PY-405-002 through SCAN-PY-405-006**: (1) `PythonLockFileCollector` upgraded with full precedence order, `-r` includes with cycle detection, PEP 508 parsing, `name @ url` direct refs, Pipenv develop section, pdm.lock/uv.lock support. (2) `ContainerOverlayHandler` + `ContainerLayerAdapter` updated with OCI whiteout semantics. (3) `VendoringMetadataBuilder` added for bounded parent metadata. (4) Scope/SourceType metadata added to analyzer. Build passes. SCAN-PY-405-007 (fixtures) remains TODO. | Implementer | +| 2025-12-21 | Started SCAN-PY-405-007 (add deterministic fixtures + update goldens). | Implementer | +| 2025-12-21 | Completed SCAN-PY-405-007: fixtures for conda env, requirements includes+editable, Pipfile.lock default+develop, wheel workspace, zipapp embedded requirements, container whiteouts, and vendored directories; updated goldens; verified `dotnet test src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests.csproj -c Release`. | Implementer | diff --git a/docs/implplan/archived/SPRINT_0406_0001_0001_scanner_node_detection_gaps.md b/docs/implplan/archived/SPRINT_0406_0001_0001_scanner_node_detection_gaps.md index 4414a30ae..3e6133e40 100644 --- a/docs/implplan/archived/SPRINT_0406_0001_0001_scanner_node_detection_gaps.md +++ b/docs/implplan/archived/SPRINT_0406_0001_0001_scanner_node_detection_gaps.md @@ -35,11 +35,11 @@ ## Wave Coordination | Wave | Guild owners | Shared prerequisites | Status | Notes | | --- | --- | --- | --- | --- | -| A: Declared-only & identity | Node Analyzer Guild + QA Guild | Action 1 | TODO | Emit declared-only safely; avoid invalid PURLs. | -| B: Lock fidelity | Node Analyzer Guild + QA Guild | None | TODO | Multi-version lock correctness + Yarn Berry + pnpm hardening + nested path fixes. | -| C: Workspaces & containers | Node Analyzer Guild + QA Guild | Action 2 | TODO | Workspace glob support + scope attribution + container app-root discovery. | -| D: Imports & evidence | Node Analyzer Guild + QA Guild | Action 4 | TODO | ESM/TS import correctness + bounded scanning + package.json hashing. | -| E: Docs & bench | Docs Guild + Bench Guild | Waves A–D | TODO | Contract + performance ceiling. | +| A: Declared-only & identity | Node Analyzer Guild + QA Guild | Action 1 | DONE | Emit declared-only safely; avoid invalid PURLs. | +| B: Lock fidelity | Node Analyzer Guild + QA Guild | None | DONE | Multi-version lock correctness + Yarn Berry + pnpm hardening + nested path fixes. | +| C: Workspaces & containers | Node Analyzer Guild + QA Guild | Action 2 | DONE | Workspace glob support + scope attribution + container app-root discovery. | +| D: Imports & evidence | Node Analyzer Guild + QA Guild | Action 4 | DONE | ESM/TS import correctness + bounded scanning + package.json hashing. | +| E: Docs & bench | Docs Guild + Bench Guild | Waves A–D | DONE | Contract + performance ceiling. | ## Wave Detail Snapshots - **Wave A:** Declared-only dependencies become visible and safely keyed (no range-as-version PURLs). @@ -70,7 +70,7 @@ | 4 | Decide import-scanning policy: default enabled/disabled, scope (workspace only vs all packages), and caps to enforce. | Project Mgmt + Node Analyzer Guild | 2025-12-13 | Done | Scope: root + workspace members only; caps + skip markers; bench exports `node.importScan.*` metrics (see `docs/modules/scanner/analyzers-node.md`). | ## Decisions & Risks -- **Decision (pending):** Declared-only identity scheme, workspace glob bounds, lock precedence, and import scanning caps (Action Tracker 1–4). +- **DECIDED (2025-12-13):** Declared-only identity scheme, workspace glob bounds, lock precedence, and import scanning caps (Action Tracker 1–4). | Risk ID | Risk | Impact | Likelihood | Mitigation | Owner | Trigger / Signal | | --- | --- | --- | --- | --- | --- | --- | @@ -92,4 +92,5 @@ | 2025-12-13 | Updated declared-only emission to use the cross-analyzer explicit-key format and expanded fixtures for `layers/`, `.layers/`, and `layer*/` discovery. | Implementer | | 2025-12-13 | Completed task 406-010 (fixtures + goldens: lock-only package-lock/yarn-berry/pnpm, workspace globs, container app-root discovery) with regression tests. | Implementer | | 2025-12-13 | Completed task 406-011 (docs + offline bench: `docs/modules/scanner/analyzers-node.md`, scenario `node_detection_gaps_fixture`, import-scan metrics) with bench/test coverage. | Implementer | +| 2025-12-21 | Normalised Wave Coordination statuses to `DONE` (they were left `TODO`); verified `dotnet test src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Node.Tests/StellaOps.Scanner.Analyzers.Lang.Node.Tests.csproj -c Release` (365/365). | Implementer | diff --git a/docs/implplan/archived/SPRINT_0411_0001_0001_semantic_entrypoint_engine.md b/docs/implplan/archived/SPRINT_0411_0001_0001_semantic_entrypoint_engine.md index 15fd33b76..7b504552a 100644 --- a/docs/implplan/archived/SPRINT_0411_0001_0001_semantic_entrypoint_engine.md +++ b/docs/implplan/archived/SPRINT_0411_0001_0001_semantic_entrypoint_engine.md @@ -75,10 +75,10 @@ ## Action Tracker | # | Action | Owner | Due (UTC) | Status | Notes | |---|--------|-------|-----------|--------|-------| -| 1 | Review existing entrypoint detection code | Scanner Guild | 2025-12-16 | TODO | Understand integration points | -| 2 | Draft ApplicationIntent enum with cross-team input | Scanner Guild | 2025-12-17 | TODO | Need input from all language teams | -| 3 | Create AGENTS.md for EntryTrace module | Scanner Guild | 2025-12-16 | TODO | Implementer guidance | -| 4 | Validate semantic schema against richgraph-v1 | Platform Guild | 2025-12-18 | TODO | Ensure compatibility | +| 1 | Review existing entrypoint detection code | Scanner Guild | 2025-12-16 | DONE | Covered by Delivery Tracker + sprint close notes. | +| 2 | Draft ApplicationIntent enum with cross-team input | Scanner Guild | 2025-12-17 | DONE | Covered by Delivery Tracker + sprint close notes. | +| 3 | Create AGENTS.md for EntryTrace module | Scanner Guild | 2025-12-16 | DONE | Covered by Delivery Tracker + sprint close notes. | +| 4 | Validate semantic schema against richgraph-v1 | Platform Guild | 2025-12-18 | DONE | Covered by Delivery Tracker + sprint close notes. | ## Decisions & Risks @@ -162,3 +162,4 @@ public enum CapabilityClass : long |------------|--------|-------| | 2025-12-13 | Created sprint from program sprint 0410; defined 25 tasks across schema, adapters, integration, QA/docs; included schema previews. | Planning | | 2025-12-13 | Completed tasks 17-25: DI registration (AddSemanticEntryTraceAnalyzer), LanguageComponentRecord semantic fields (intent, capabilities, threatVectors), verified richgraph-v1 semantic extensions and SBOM property extensions already implemented, verified test fixtures exist, created semantic-entrypoint-schema.md documentation, updated architecture.md with semantic engine section, verified CLI --semantic flag implementation. Sprint 100% complete. | Scanner Guild | +| 2025-12-21 | Normalised Action Tracker statuses to `DONE` (they were left `TODO`); no semantic changes. | Implementer | diff --git a/src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Python/Internal/Packaging/ContainerOverlayHandler.cs b/src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Python/Internal/Packaging/ContainerOverlayHandler.cs index 70cc220ea..594f5c565 100644 --- a/src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Python/Internal/Packaging/ContainerOverlayHandler.cs +++ b/src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Python/Internal/Packaging/ContainerOverlayHandler.cs @@ -88,7 +88,12 @@ internal sealed partial class ContainerOverlayHandler isComplete = false; } - var layerFiles = enumerateFiles(layer.Path).ToList(); + var layerRoot = NormalizePath(layer.Path); + var layerFiles = enumerateFiles(layer.Path) + .Select(file => NormalizeLayerRelativePath(layerRoot, file)) + .Where(static file => file is not null) + .Cast() + .ToList(); // First pass: collect whiteouts and opaque markers var layerWhiteouts = new HashSet(StringComparer.OrdinalIgnoreCase); @@ -121,6 +126,16 @@ internal sealed partial class ContainerOverlayHandler : targetName; layerWhiteouts.Add(targetPath); + + // Whiteouts can target files or directories. If a directory is whited out, + // all previously-visible files under it must be removed. + var toRemove = visiblePaths.Where(p => IsUnderDirectory(p, targetPath)).ToList(); + foreach (var path in toRemove) + { + visiblePaths.Remove(path); + whiteoutedPaths.Add(path); + } + visiblePaths.Remove(targetPath); whiteoutedPaths.Add(targetPath); } @@ -163,6 +178,32 @@ internal sealed partial class ContainerOverlayHandler warning); } + private static string? NormalizeLayerRelativePath(string normalizedLayerRoot, string path) + { + var normalized = NormalizePath(path); + if (normalized.Length == 0) + { + return null; + } + + if (!normalized.StartsWith(normalizedLayerRoot, StringComparison.OrdinalIgnoreCase)) + { + return null; + } + + if (normalized.Length == normalizedLayerRoot.Length) + { + return string.Empty; + } + + if (normalized[normalizedLayerRoot.Length] != '/') + { + return null; + } + + return normalized[(normalizedLayerRoot.Length + 1)..]; + } + /// /// Checks if a path would be visible after overlay processing. /// diff --git a/src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Python/Internal/Vendoring/VendoredPackageDetector.cs b/src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Python/Internal/Vendoring/VendoredPackageDetector.cs index 3f4863e5e..790a98679 100644 --- a/src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Python/Internal/Vendoring/VendoredPackageDetector.cs +++ b/src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Python/Internal/Vendoring/VendoredPackageDetector.cs @@ -160,29 +160,30 @@ internal static partial class VendoredPackageDetector // with the same name as the package (normalized to lowercase with underscores). // E.g., dist-info at "site-packages/pip-23.0.dist-info" means package at "site-packages/pip/" - string? baseDir = null; + var baseDir = string.Empty; if (!string.IsNullOrEmpty(package.MetadataPath)) { - // Get the directory containing dist-info (usually site-packages) - baseDir = Path.GetDirectoryName(package.MetadataPath); + // In the Python VFS, MetadataPath may be just "-.dist-info" (rooted at VFS root). + // Treat missing directory name as VFS root. + baseDir = Path.GetDirectoryName(package.MetadataPath) ?? string.Empty; } else if (!string.IsNullOrEmpty(package.Location)) { baseDir = package.Location; } - if (string.IsNullOrEmpty(baseDir)) - { - return null; - } - // The package directory is baseDir + package module name // Use the first top-level module if available, otherwise use the normalized package name var moduleName = package.TopLevelModules.Length > 0 ? package.TopLevelModules[0] : package.NormalizedName; + if (string.IsNullOrWhiteSpace(moduleName)) + { + return null; + } + return Path.Combine(baseDir, moduleName).Replace('\\', '/'); } diff --git a/src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Python/PythonLanguageAnalyzer.cs b/src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Python/PythonLanguageAnalyzer.cs index b8d68d53b..818ec7686 100644 --- a/src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Python/PythonLanguageAnalyzer.cs +++ b/src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Python/PythonLanguageAnalyzer.cs @@ -27,6 +27,8 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer var matchedLocks = new HashSet(StringComparer.OrdinalIgnoreCase); var hasLockEntries = lockData.Entries.Count > 0; + var containerOverlay = TryBuildContainerOverlay(context.RootPath); + // Detect Python runtime in container layers var runtimeInfo = PythonContainerAdapter.DetectRuntime(context.RootPath); @@ -44,6 +46,7 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer var packageDiscovery = new PythonPackageDiscovery(); var discoveryResult = await packageDiscovery.DiscoverAsync(vfs, cancellationToken).ConfigureAwait(false); + var vendoringByPackage = await BuildVendoringMapAsync(vfs, discoveryResult, cancellationToken).ConfigureAwait(false); foreach (var package in discoveryResult.Packages .Where(static p => !string.IsNullOrWhiteSpace(p.Version)) @@ -52,6 +55,8 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer { cancellationToken.ThrowIfCancellationRequested(); + vendoringByPackage.TryGetValue(package.NormalizedName, out var vendoringAnalysis); + await EmitDiscoveredPackageAsync( context, writer, @@ -64,6 +69,8 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer environment, startupHooks, zipappAnalysis, + containerOverlay, + vendoringAnalysis, cancellationToken) .ConfigureAwait(false); } @@ -221,6 +228,8 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer PythonEnvironment environment, PythonStartupHooks startupHooks, PythonZipappAnalysis zipappAnalysis, + ContainerOverlayHandler.OverlayResult? containerOverlay, + VendoringAnalysis? vendoringAnalysis, CancellationToken cancellationToken) { var version = package.Version!.Trim(); @@ -243,6 +252,21 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer } var metadataDirectory = TryResolvePhysicalMetadataDirectory(vfs, package, out var metadataFile); + + if (vendoringAnalysis is not null) + { + metadata.AddRange(VendoringMetadataBuilder.BuildParentMetadata(vendoringAnalysis)); + } + + if (metadataFile is not null && + !metadataFile.IsFromArchive && + containerOverlay is not null && + TryGetContainerOverlayPath(containerOverlay, metadataFile.AbsolutePath, out var overlayPath) && + !ContainerOverlayHandler.IsPathVisible(containerOverlay, overlayPath)) + { + return; + } + if (metadataDirectory is not null) { PythonDistribution? distribution; @@ -289,6 +313,7 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer evidence: evidence, usedByEntrypoint: distribution.UsedByEntrypoint); + EmitVendoredEmbeddedComponents(context, writer, vfs, vendoringAnalysis, version); return; } @@ -313,6 +338,7 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer evidence: archiveDistribution.SortedEvidence, usedByEntrypoint: archiveDistribution.UsedByEntrypoint); + EmitVendoredEmbeddedComponents(context, writer, vfs, vendoringAnalysis, version); return; } } @@ -329,6 +355,8 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer metadata: metadata, evidence: evidenceFallback, usedByEntrypoint: false); + + EmitVendoredEmbeddedComponents(context, writer, vfs, vendoringAnalysis, version); } private static string? TryResolvePhysicalMetadataDirectory( @@ -363,6 +391,13 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer var location = package.Location; if (string.IsNullOrWhiteSpace(location) && !string.IsNullOrWhiteSpace(package.MetadataPath)) { + if (package.Kind == PythonPackageKind.Conda) + { + var normalizedPath = package.MetadataPath.Replace('\\', '/').Trim('/'); + var lastSlash = normalizedPath.LastIndexOf('/'); + location = lastSlash > 0 ? normalizedPath[..lastSlash] : normalizedPath; + } + var metadataName = package.Kind == PythonPackageKind.Egg ? "PKG-INFO" : "METADATA"; var file = vfs.GetFile($"{package.MetadataPath}/{metadataName}"); @@ -392,6 +427,19 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer PythonPackageInfo package, PythonVirtualFile? metadataFile) { + if (package.Kind == PythonPackageKind.Conda && !string.IsNullOrWhiteSpace(package.MetadataPath)) + { + return new[] + { + new LanguageComponentEvidence( + LanguageEvidenceKind.File, + "conda-meta", + package.MetadataPath.Replace('\\', '/').TrimStart('/'), + Value: null, + Sha256: null) + }; + } + if (metadataFile is not null) { var locator = metadataFile.IsFromArchive && metadataFile.ArchivePath is not null @@ -555,6 +603,27 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer metadata.Add(new KeyValuePair("zipapps.detected", "true")); metadata.Add(new KeyValuePair("zipapps.count", zipappAnalysis.Zipapps.Count.ToString())); + var embeddedDeps = zipappAnalysis.Zipapps + .SelectMany(static z => z.EmbeddedDependencies) + .Select(static d => d.Trim()) + .Where(static d => d.Length > 0) + .Distinct(StringComparer.OrdinalIgnoreCase) + .OrderBy(static d => d, StringComparer.Ordinal) + .ToArray(); + + if (embeddedDeps.Length > 0) + { + metadata.Add(new KeyValuePair("zipapps.embeddedDeps.count", embeddedDeps.Length.ToString())); + + var sample = embeddedDeps.Take(12).ToArray(); + metadata.Add(new KeyValuePair("zipapps.embeddedDeps.sample", string.Join(';', sample))); + + if (embeddedDeps.Length > sample.Length) + { + metadata.Add(new KeyValuePair("zipapps.embeddedDeps.sampleTruncated", "true")); + } + } + // Add version information from zipapp shebangs var versions = zipappAnalysis.Zipapps .Where(z => z.PythonVersion != null) @@ -583,4 +652,229 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer } } } + + private static async Task> BuildVendoringMapAsync( + PythonVirtualFileSystem vfs, + PythonPackageDiscoveryResult discoveryResult, + CancellationToken cancellationToken) + { + var results = await VendoredPackageDetector.AnalyzeAllAsync(vfs, discoveryResult, cancellationToken).ConfigureAwait(false); + + var map = new Dictionary(StringComparer.OrdinalIgnoreCase); + foreach (var analysis in results) + { + var key = PythonPackageInfo.NormalizeName(analysis.PackageName); + map[key] = analysis; + } + + return map; + } + + private static ContainerOverlayHandler.OverlayResult? TryBuildContainerOverlay(string rootPath) + { + IReadOnlyList layers; + try + { + layers = ContainerOverlayHandler.DiscoverLayers(rootPath); + } + catch + { + return null; + } + + if (layers.Count == 0) + { + return null; + } + + try + { + return ContainerOverlayHandler.ProcessLayers(layers, EnumerateOverlayRelevantFiles); + } + catch + { + return null; + } + } + + private static IEnumerable EnumerateOverlayRelevantFiles(string layerPath) + { + if (string.IsNullOrWhiteSpace(layerPath) || !Directory.Exists(layerPath)) + { + yield break; + } + + var options = new EnumerationOptions + { + RecurseSubdirectories = true, + IgnoreInaccessible = true, + AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint + }; + + var results = new HashSet(StringComparer.OrdinalIgnoreCase); + + try + { + foreach (var file in Directory.EnumerateFiles(layerPath, ".wh.*", options)) + { + results.Add(file); + } + } + catch (IOException) + { + } + catch (UnauthorizedAccessException) + { + } + + try + { + foreach (var file in Directory.EnumerateFiles(layerPath, "METADATA", options)) + { + if (file.Contains(".dist-info", StringComparison.OrdinalIgnoreCase)) + { + results.Add(file); + } + } + } + catch (IOException) + { + } + catch (UnauthorizedAccessException) + { + } + + try + { + foreach (var file in Directory.EnumerateFiles(layerPath, "PKG-INFO", options)) + { + if (file.Contains(".egg-info", StringComparison.OrdinalIgnoreCase)) + { + results.Add(file); + } + } + } + catch (IOException) + { + } + catch (UnauthorizedAccessException) + { + } + + foreach (var result in results.OrderBy(static path => path, StringComparer.OrdinalIgnoreCase)) + { + yield return result; + } + } + + private static bool TryGetContainerOverlayPath(ContainerOverlayHandler.OverlayResult overlay, string absolutePath, out string overlayPath) + { + var normalized = absolutePath.Replace('\\', '/').TrimEnd('/'); + + foreach (var layer in overlay.ProcessedLayers) + { + var layerPath = layer.Path.Replace('\\', '/').TrimEnd('/'); + + if (normalized.StartsWith(layerPath + "/", StringComparison.OrdinalIgnoreCase)) + { + overlayPath = normalized[(layerPath.Length + 1)..]; + return true; + } + } + + overlayPath = string.Empty; + return false; + } + + private static void EmitVendoredEmbeddedComponents( + LanguageAnalyzerContext context, + LanguageComponentWriter writer, + PythonVirtualFileSystem vfs, + VendoringAnalysis? vendoringAnalysis, + string? parentVersion) + { + if (vendoringAnalysis is null) + { + return; + } + + var embeddedToEmit = VendoringMetadataBuilder.GetEmbeddedToEmitSeparately(vendoringAnalysis, parentVersion); + if (embeddedToEmit.Count == 0) + { + return; + } + + foreach (var embedded in embeddedToEmit) + { + var embeddedMetadata = VendoringMetadataBuilder.BuildEmbeddedMetadata(embedded, parentVersion, vendoringAnalysis.Confidence); + var evidence = TryBuildVendoredEvidence(context, vfs, embedded.Path); + + var componentKey = LanguageExplicitKey.Create( + analyzerId: "python", + ecosystem: "pypi", + name: PythonPathHelper.NormalizePackageName(embedded.Name), + spec: embedded.Version ?? string.Empty, + originLocator: embedded.Path); + + writer.AddFromExplicitKey( + analyzerId: "python", + componentKey: componentKey, + purl: embedded.Purl, + name: embedded.Name, + version: embedded.Version, + type: "pypi", + metadata: embeddedMetadata, + evidence: evidence, + usedByEntrypoint: false); + } + } + + private static IReadOnlyCollection? TryBuildVendoredEvidence( + LanguageAnalyzerContext context, + PythonVirtualFileSystem vfs, + string embeddedPath) + { + var candidates = new[] + { + $"{embeddedPath}/__init__.py", + $"{embeddedPath}/_version.py", + $"{embeddedPath}/version.py", + $"{embeddedPath}/__version__.py", + embeddedPath + ".py" + }; + + foreach (var candidate in candidates) + { + var file = vfs.GetFile(candidate); + if (file is null) + { + continue; + } + + if (file.IsFromArchive && file.ArchivePath is not null) + { + return new[] + { + new LanguageComponentEvidence( + LanguageEvidenceKind.File, + "vendored", + PythonPathHelper.NormalizeRelative(context, file.ArchivePath), + Value: file.AbsolutePath, + Sha256: null) + }; + } + + return new[] + { + new LanguageComponentEvidence( + LanguageEvidenceKind.File, + "vendored", + PythonPathHelper.NormalizeRelative(context, file.AbsolutePath), + Value: null, + Sha256: null) + }; + } + + return null; + } } diff --git a/src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Python/TASKS.md b/src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Python/TASKS.md index 1c3fb820f..a7e069a3a 100644 --- a/src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Python/TASKS.md +++ b/src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Python/TASKS.md @@ -10,7 +10,7 @@ | SCAN-PY-405-004 | DONE | Container overlay contract implemented: OCI whiteout semantics (`.wh.*`, `.wh..wh..opq`), deterministic layer ordering, `container.overlayIncomplete` metadata marker. | 2025-12-13 | | SCAN-PY-405-005 | DONE | Vendoring integration: `VendoringMetadataBuilder` for parent metadata + embedded components with High confidence. | 2025-12-13 | | SCAN-PY-405-006 | DONE | Scope classification added (prod/dev/docs/build) from lock sections and file names per Interlock 4. Usage signals remain default. | 2025-12-13 | -| SCAN-PY-405-007 | DONE | Added test fixtures for includes, Pipfile.lock develop, scope classification, PEP 508 direct refs, cycle detection. | 2025-12-13 | +| SCAN-PY-405-007 | DONE | Added deterministic fixtures + goldens: conda-meta env, requirements includes+editable, Pipfile.lock default+develop, wheel workspace, zipapp embedded requirements, container whiteouts, and vendored directories. | 2025-12-21 | | SCAN-PY-405-008 | DONE | Docs + deterministic offline bench for Python analyzer contract. | 2025-12-13 | ## Completed Contracts (Action Decisions 2025-12-13) diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/conda-env/conda-meta/numpy-1.26.0-0.json b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/conda-env/conda-meta/numpy-1.26.0-0.json new file mode 100644 index 000000000..046e37901 --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/conda-env/conda-meta/numpy-1.26.0-0.json @@ -0,0 +1,11 @@ +{ + "name": "numpy", + "version": "1.26.0", + "depends": [ + "python >=3.11" + ], + "files": [ + "numpy/__init__.py" + ], + "requested": true +} diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/conda-env/conda-meta/requests-2.31.0-0.json b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/conda-env/conda-meta/requests-2.31.0-0.json new file mode 100644 index 000000000..6b2a1f956 --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/conda-env/conda-meta/requests-2.31.0-0.json @@ -0,0 +1,12 @@ +{ + "name": "requests", + "version": "2.31.0", + "depends": [ + "python >=3.11", + "urllib3 >=1.26" + ], + "files": [ + "requests/__init__.py" + ], + "requested": false +} diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/conda-env/expected.json b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/conda-env/expected.json new file mode 100644 index 000000000..c9c5ccc0b --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/conda-env/expected.json @@ -0,0 +1,44 @@ +[ + { + "analyzerId": "python", + "componentKey": "purl::pkg:pypi/numpy@1.26.0", + "purl": "pkg:pypi/numpy@1.26.0", + "name": "numpy", + "version": "1.26.0", + "type": "pypi", + "usedByEntrypoint": false, + "metadata": { + "pkg.confidence": "High", + "pkg.kind": "Conda", + "pkg.location": "conda-meta" + }, + "evidence": [ + { + "kind": "file", + "source": "conda-meta", + "locator": "conda-meta/numpy-1.26.0-0.json" + } + ] + }, + { + "analyzerId": "python", + "componentKey": "purl::pkg:pypi/requests@2.31.0", + "purl": "pkg:pypi/requests@2.31.0", + "name": "requests", + "version": "2.31.0", + "type": "pypi", + "usedByEntrypoint": false, + "metadata": { + "pkg.confidence": "High", + "pkg.kind": "Conda", + "pkg.location": "conda-meta" + }, + "evidence": [ + { + "kind": "file", + "source": "conda-meta", + "locator": "conda-meta/requests-2.31.0-0.json" + } + ] + } +] \ No newline at end of file diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/container-whiteouts/expected.json b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/container-whiteouts/expected.json new file mode 100644 index 000000000..ee76509f3 --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/container-whiteouts/expected.json @@ -0,0 +1,36 @@ +[ + { + "analyzerId": "python", + "componentKey": "purl::pkg:pypi/visible@2.0.0", + "purl": "pkg:pypi/visible@2.0.0", + "name": "visible", + "version": "2.0.0", + "type": "pypi", + "usedByEntrypoint": false, + "metadata": { + "distInfoPath": "layers/layer1/usr/lib/python3.11/site-packages/visible-2.0.0.dist-info", + "name": "visible", + "normalizedName": "visible", + "pkg.confidence": "High", + "pkg.kind": "Wheel", + "pkg.location": "layers/layer1/usr/lib/python3.11/site-packages/visible-2.0.0.dist-info", + "provenance": "dist-info", + "record.hashMismatches": "0", + "record.hashedEntries": "0", + "record.ioErrors": "0", + "record.missingFiles": "0", + "record.totalEntries": "0", + "runtime.libPaths.count": "2", + "runtime.versions": "3.11", + "summary": "Visible after overlay", + "version": "2.0.0" + }, + "evidence": [ + { + "kind": "file", + "source": "METADATA", + "locator": "layers/layer1/usr/lib/python3.11/site-packages/visible-2.0.0.dist-info/METADATA" + } + ] + } +] \ No newline at end of file diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/container-whiteouts/layers/layer0/usr/lib/python3.11/site-packages/whited-1.0.0.dist-info/METADATA b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/container-whiteouts/layers/layer0/usr/lib/python3.11/site-packages/whited-1.0.0.dist-info/METADATA new file mode 100644 index 000000000..1a3716da6 --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/container-whiteouts/layers/layer0/usr/lib/python3.11/site-packages/whited-1.0.0.dist-info/METADATA @@ -0,0 +1,5 @@ +Metadata-Version: 2.1 +Name: whited +Version: 1.0.0 +Summary: Should be removed by whiteout + diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/container-whiteouts/layers/layer1/usr/lib/python3.11/site-packages/.wh.whited-1.0.0.dist-info b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/container-whiteouts/layers/layer1/usr/lib/python3.11/site-packages/.wh.whited-1.0.0.dist-info new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/container-whiteouts/layers/layer1/usr/lib/python3.11/site-packages/.wh.whited-1.0.0.dist-info @@ -0,0 +1 @@ + diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/container-whiteouts/layers/layer1/usr/lib/python3.11/site-packages/visible-2.0.0.dist-info/METADATA b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/container-whiteouts/layers/layer1/usr/lib/python3.11/site-packages/visible-2.0.0.dist-info/METADATA new file mode 100644 index 000000000..73f9ce9bc --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/container-whiteouts/layers/layer1/usr/lib/python3.11/site-packages/visible-2.0.0.dist-info/METADATA @@ -0,0 +1,5 @@ +Metadata-Version: 2.1 +Name: visible +Version: 2.0.0 +Summary: Visible after overlay + diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/pipfile-lock-default-develop/Pipfile.lock b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/pipfile-lock-default-develop/Pipfile.lock new file mode 100644 index 000000000..169d2501c --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/pipfile-lock-default-develop/Pipfile.lock @@ -0,0 +1,15 @@ +{ + "_meta": { + "sources": [] + }, + "default": { + "requests": { + "version": "==2.28.0" + } + }, + "develop": { + "pytest": { + "version": "==7.0.0" + } + } +} diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/pipfile-lock-default-develop/expected.json b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/pipfile-lock-default-develop/expected.json new file mode 100644 index 000000000..08d883b01 --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/pipfile-lock-default-develop/expected.json @@ -0,0 +1,52 @@ +[ + { + "analyzerId": "python", + "componentKey": "purl::pkg:pypi/pytest@7.0.0", + "purl": "pkg:pypi/pytest@7.0.0", + "name": "pytest", + "version": "7.0.0", + "type": "pypi", + "usedByEntrypoint": false, + "metadata": { + "declaredOnly": "true", + "lockLocator": "Pipfile.lock", + "lockSource": "Pipfile.lock", + "pkg.confidence": "Medium", + "pkg.kind": "DeclaredOnly", + "pkg.location": "Pipfile.lock", + "scope": "dev" + }, + "evidence": [ + { + "kind": "metadata", + "source": "Pipfile.lock", + "locator": "Pipfile.lock" + } + ] + }, + { + "analyzerId": "python", + "componentKey": "purl::pkg:pypi/requests@2.28.0", + "purl": "pkg:pypi/requests@2.28.0", + "name": "requests", + "version": "2.28.0", + "type": "pypi", + "usedByEntrypoint": false, + "metadata": { + "declaredOnly": "true", + "lockLocator": "Pipfile.lock", + "lockSource": "Pipfile.lock", + "pkg.confidence": "Medium", + "pkg.kind": "DeclaredOnly", + "pkg.location": "Pipfile.lock", + "scope": "prod" + }, + "evidence": [ + { + "kind": "metadata", + "source": "Pipfile.lock", + "locator": "Pipfile.lock" + } + ] + } +] \ No newline at end of file diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/requirements-includes-editable/editable-src/pyproject.toml b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/requirements-includes-editable/editable-src/pyproject.toml new file mode 100644 index 000000000..888ec0465 --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/requirements-includes-editable/editable-src/pyproject.toml @@ -0,0 +1,3 @@ +[project] +name = "editable-src" +version = "0.0.0" diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/requirements-includes-editable/expected.json b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/requirements-includes-editable/expected.json new file mode 100644 index 000000000..a08f0cae0 --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/requirements-includes-editable/expected.json @@ -0,0 +1,106 @@ +[ + { + "analyzerId": "python", + "componentKey": "explicit::python::pypi::editable-src::sha256:5bd6cf3f7ac212830a9fcbc83b06ab72e79bafc1c94ea98a3d0560547c96c923", + "name": "editable-src", + "type": "pypi", + "usedByEntrypoint": false, + "metadata": { + "declared.locator": "requirements.txt", + "declared.scope": "prod", + "declared.source": "requirements.txt", + "declared.sourceType": "editable", + "declared.versionSpec": "./editable-src", + "declaredOnly": "true", + "lockEditablePath": "./editable-src", + "lockLocator": "requirements.txt", + "lockSource": "requirements.txt", + "pkg.confidence": "Medium", + "pkg.kind": "DeclaredOnly", + "pkg.location": "requirements.txt", + "scope": "prod" + }, + "evidence": [ + { + "kind": "metadata", + "source": "requirements.txt", + "locator": "requirements.txt" + } + ] + }, + { + "analyzerId": "python", + "componentKey": "purl::pkg:pypi/certifi@2022.12.7", + "purl": "pkg:pypi/certifi@2022.12.7", + "name": "certifi", + "version": "2022.12.7", + "type": "pypi", + "usedByEntrypoint": false, + "metadata": { + "declaredOnly": "true", + "lockLocator": "requirements-base.txt", + "lockSource": "requirements-base.txt", + "pkg.confidence": "Medium", + "pkg.kind": "DeclaredOnly", + "pkg.location": "requirements-base.txt", + "scope": "prod" + }, + "evidence": [ + { + "kind": "metadata", + "source": "requirements-base.txt", + "locator": "requirements-base.txt" + } + ] + }, + { + "analyzerId": "python", + "componentKey": "purl::pkg:pypi/requests@2.28.0", + "purl": "pkg:pypi/requests@2.28.0", + "name": "requests", + "version": "2.28.0", + "type": "pypi", + "usedByEntrypoint": false, + "metadata": { + "declaredOnly": "true", + "lockLocator": "requirements.txt", + "lockSource": "requirements.txt", + "pkg.confidence": "Medium", + "pkg.kind": "DeclaredOnly", + "pkg.location": "requirements.txt", + "scope": "prod" + }, + "evidence": [ + { + "kind": "metadata", + "source": "requirements.txt", + "locator": "requirements.txt" + } + ] + }, + { + "analyzerId": "python", + "componentKey": "purl::pkg:pypi/urllib3@1.26.0", + "purl": "pkg:pypi/urllib3@1.26.0", + "name": "urllib3", + "version": "1.26.0", + "type": "pypi", + "usedByEntrypoint": false, + "metadata": { + "declaredOnly": "true", + "lockLocator": "requirements-base.txt", + "lockSource": "requirements-base.txt", + "pkg.confidence": "Medium", + "pkg.kind": "DeclaredOnly", + "pkg.location": "requirements-base.txt", + "scope": "prod" + }, + "evidence": [ + { + "kind": "metadata", + "source": "requirements-base.txt", + "locator": "requirements-base.txt" + } + ] + } +] \ No newline at end of file diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/requirements-includes-editable/requirements-base.txt b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/requirements-includes-editable/requirements-base.txt new file mode 100644 index 000000000..7e57b6184 --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/requirements-includes-editable/requirements-base.txt @@ -0,0 +1,2 @@ +urllib3==1.26.0 +certifi==2022.12.7 diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/requirements-includes-editable/requirements.txt b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/requirements-includes-editable/requirements.txt new file mode 100644 index 000000000..2471d523c --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/requirements-includes-editable/requirements.txt @@ -0,0 +1,3 @@ +requests==2.28.0 +-r requirements-base.txt +-e ./editable-src diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/vendored-directory/expected.json b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/vendored-directory/expected.json new file mode 100644 index 000000000..36dd6c92d --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/vendored-directory/expected.json @@ -0,0 +1,65 @@ +[ + { + "analyzerId": "python", + "componentKey": "explicit::python::pypi::urllib3::sha256:aa29f86a6e70276f0f1bc8b4fc71abb66af19af7dbcf4bfe40b40c3c4aa08467", + "purl": "pkg:pypi/urllib3@1.26.0", + "name": "urllib3", + "version": "1.26.0", + "type": "pypi", + "usedByEntrypoint": false, + "metadata": { + "embedded": "true", + "embedded.confidence": "High", + "embedded.parentPackage": "requests", + "embedded.parentVersion": "2.0.0", + "embedded.path": "requests/_vendor/urllib3", + "embedded.versionSource": "heuristic" + }, + "evidence": [ + { + "kind": "file", + "source": "vendored", + "locator": "lib/python3.11/site-packages/requests/_vendor/urllib3/__init__.py" + } + ] + }, + { + "analyzerId": "python", + "componentKey": "purl::pkg:pypi/requests@2.0.0", + "purl": "pkg:pypi/requests@2.0.0", + "name": "requests", + "version": "2.0.0", + "type": "pypi", + "usedByEntrypoint": false, + "metadata": { + "distInfoPath": "lib/python3.11/site-packages/requests-2.0.0.dist-info", + "name": "requests", + "normalizedName": "requests", + "pkg.confidence": "High", + "pkg.kind": "Wheel", + "pkg.location": "lib/python3.11/site-packages/requests-2.0.0.dist-info", + "provenance": "dist-info", + "record.hashMismatches": "0", + "record.hashedEntries": "0", + "record.ioErrors": "0", + "record.missingFiles": "0", + "record.totalEntries": "0", + "runtime.libPaths.count": "1", + "runtime.versions": "3.11", + "summary": "Parent package with vendored deps", + "vendored.confidence": "High", + "vendored.detected": "true", + "vendored.packageCount": "1", + "vendored.packages": "urllib3@1.26.0", + "vendored.paths": "requests/_vendor", + "version": "2.0.0" + }, + "evidence": [ + { + "kind": "file", + "source": "METADATA", + "locator": "lib/python3.11/site-packages/requests-2.0.0.dist-info/METADATA" + } + ] + } +] \ No newline at end of file diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/vendored-directory/lib/python3.11/site-packages/requests-2.0.0.dist-info/METADATA b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/vendored-directory/lib/python3.11/site-packages/requests-2.0.0.dist-info/METADATA new file mode 100644 index 000000000..fecd8c514 --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/vendored-directory/lib/python3.11/site-packages/requests-2.0.0.dist-info/METADATA @@ -0,0 +1,5 @@ +Metadata-Version: 2.1 +Name: requests +Version: 2.0.0 +Summary: Parent package with vendored deps + diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/vendored-directory/lib/python3.11/site-packages/requests/__init__.py b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/vendored-directory/lib/python3.11/site-packages/requests/__init__.py new file mode 100644 index 000000000..8c0d5d5bb --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/vendored-directory/lib/python3.11/site-packages/requests/__init__.py @@ -0,0 +1 @@ +__version__ = "2.0.0" diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/vendored-directory/lib/python3.11/site-packages/requests/_vendor/urllib3/__init__.py b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/vendored-directory/lib/python3.11/site-packages/requests/_vendor/urllib3/__init__.py new file mode 100644 index 000000000..d00934fb7 --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/vendored-directory/lib/python3.11/site-packages/requests/_vendor/urllib3/__init__.py @@ -0,0 +1 @@ +__version__ = "1.26.0" diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/wheel-workspace/expected.json b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/wheel-workspace/expected.json new file mode 100644 index 000000000..d3e8d9761 --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/wheel-workspace/expected.json @@ -0,0 +1,51 @@ +[ + { + "analyzerId": "python", + "componentKey": "purl::pkg:pypi/wheelpkg@1.0.0", + "purl": "pkg:pypi/wheelpkg@1.0.0", + "name": "wheelpkg", + "version": "1.0.0", + "type": "pypi", + "usedByEntrypoint": false, + "metadata": { + "distInfoPath": "archives/wheel/wheelpkg-1.0.0-py3-none-any.whl/wheelpkg-1.0.0.dist-info", + "name": "wheelpkg", + "normalizedName": "wheelpkg", + "pkg.confidence": "Definitive", + "pkg.kind": "Wheel", + "pkg.location": "archives/wheel/wheelpkg-1.0.0-py3-none-any.whl", + "provenance": "dist-info", + "record.hashMismatches": "0", + "record.hashedEntries": "3", + "record.ioErrors": "0", + "record.missingFiles": "0", + "record.totalEntries": "4", + "summary": "Wheel fixture", + "version": "1.0.0", + "wheel.generator": "stellaops-test", + "wheel.rootIsPurelib": "true", + "wheel.tags": "py3-none-any", + "wheel.version": "1.0" + }, + "evidence": [ + { + "kind": "file", + "source": "METADATA", + "locator": "dist/wheelpkg-1.0.0-py3-none-any.whl", + "value": "wheelpkg-1.0.0.dist-info/METADATA" + }, + { + "kind": "file", + "source": "RECORD", + "locator": "dist/wheelpkg-1.0.0-py3-none-any.whl", + "value": "wheelpkg-1.0.0.dist-info/RECORD" + }, + { + "kind": "file", + "source": "WHEEL", + "locator": "dist/wheelpkg-1.0.0-py3-none-any.whl", + "value": "wheelpkg-1.0.0.dist-info/WHEEL" + } + ] + } +] \ No newline at end of file diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/zipapp-embedded-requirements/expected.json b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/zipapp-embedded-requirements/expected.json new file mode 100644 index 000000000..8650277d3 --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/zipapp-embedded-requirements/expected.json @@ -0,0 +1,41 @@ +[ + { + "analyzerId": "python", + "componentKey": "purl::pkg:pypi/hostpkg@0.1.0", + "purl": "pkg:pypi/hostpkg@0.1.0", + "name": "hostpkg", + "version": "0.1.0", + "type": "pypi", + "usedByEntrypoint": false, + "metadata": { + "distInfoPath": "lib/python3.11/site-packages/hostpkg-0.1.0.dist-info", + "name": "hostpkg", + "normalizedName": "hostpkg", + "pkg.confidence": "High", + "pkg.kind": "Wheel", + "pkg.location": "lib/python3.11/site-packages/hostpkg-0.1.0.dist-info", + "provenance": "dist-info", + "record.hashMismatches": "0", + "record.hashedEntries": "0", + "record.ioErrors": "0", + "record.missingFiles": "0", + "record.totalEntries": "0", + "runtime.libPaths.count": "1", + "runtime.versions": "3.11", + "summary": "Host package for zipapp fixture", + "version": "0.1.0", + "zipapps.count": "1", + "zipapps.detected": "true", + "zipapps.embeddedDeps.count": "2", + "zipapps.embeddedDeps.sample": "flask;requests", + "zipapps.pythonVersions": "3.11" + }, + "evidence": [ + { + "kind": "file", + "source": "METADATA", + "locator": "lib/python3.11/site-packages/hostpkg-0.1.0.dist-info/METADATA" + } + ] + } +] \ No newline at end of file diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/zipapp-embedded-requirements/lib/python3.11/site-packages/hostpkg-0.1.0.dist-info/METADATA b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/zipapp-embedded-requirements/lib/python3.11/site-packages/hostpkg-0.1.0.dist-info/METADATA new file mode 100644 index 000000000..8a5c57f86 --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Fixtures/lang/python/zipapp-embedded-requirements/lib/python3.11/site-packages/hostpkg-0.1.0.dist-info/METADATA @@ -0,0 +1,5 @@ +Metadata-Version: 2.1 +Name: hostpkg +Version: 0.1.0 +Summary: Host package for zipapp fixture + diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Python/PythonLanguageAnalyzerTests.cs b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Python/PythonLanguageAnalyzerTests.cs index 2b74a09fd..5c2c065e0 100644 --- a/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Python/PythonLanguageAnalyzerTests.cs +++ b/src/Scanner/__Tests/StellaOps.Scanner.Analyzers.Lang.Python.Tests/Python/PythonLanguageAnalyzerTests.cs @@ -85,6 +85,148 @@ public sealed class PythonLanguageAnalyzerTests usageHints); } + [Fact] + public async Task CondaEnvFixtureProducesDeterministicOutputAsync() + { + var cancellationToken = TestContext.Current.CancellationToken; + var fixturePath = TestPaths.ResolveFixture("lang", "python", "conda-env"); + var goldenPath = Path.Combine(fixturePath, "expected.json"); + + var analyzers = new ILanguageAnalyzer[] + { + new PythonLanguageAnalyzer() + }; + + await LanguageAnalyzerTestHarness.AssertDeterministicAsync( + fixturePath, + goldenPath, + analyzers, + cancellationToken); + } + + [Fact] + public async Task RequirementsIncludesEditableFixtureProducesDeterministicOutputAsync() + { + var cancellationToken = TestContext.Current.CancellationToken; + var fixturePath = TestPaths.ResolveFixture("lang", "python", "requirements-includes-editable"); + var goldenPath = Path.Combine(fixturePath, "expected.json"); + + var analyzers = new ILanguageAnalyzer[] + { + new PythonLanguageAnalyzer() + }; + + await LanguageAnalyzerTestHarness.AssertDeterministicAsync( + fixturePath, + goldenPath, + analyzers, + cancellationToken); + } + + [Fact] + public async Task PipfileLockDefaultDevelopFixtureProducesDeterministicOutputAsync() + { + var cancellationToken = TestContext.Current.CancellationToken; + var fixturePath = TestPaths.ResolveFixture("lang", "python", "pipfile-lock-default-develop"); + var goldenPath = Path.Combine(fixturePath, "expected.json"); + + var analyzers = new ILanguageAnalyzer[] + { + new PythonLanguageAnalyzer() + }; + + await LanguageAnalyzerTestHarness.AssertDeterministicAsync( + fixturePath, + goldenPath, + analyzers, + cancellationToken); + } + + [Fact] + public async Task WheelWorkspaceFixtureProducesDeterministicOutputAsync() + { + var cancellationToken = TestContext.Current.CancellationToken; + var fixturePath = TestPaths.ResolveFixture("lang", "python", "wheel-workspace"); + var goldenPath = Path.Combine(fixturePath, "expected.json"); + + var distDir = Path.Combine(fixturePath, "dist"); + Directory.CreateDirectory(distDir); + + var wheelPath = Path.Combine(distDir, "wheelpkg-1.0.0-py3-none-any.whl"); + CreateWheelpkgWheel(wheelPath); + + var analyzers = new ILanguageAnalyzer[] + { + new PythonLanguageAnalyzer() + }; + + await LanguageAnalyzerTestHarness.AssertDeterministicAsync( + fixturePath, + goldenPath, + analyzers, + cancellationToken); + } + + [Fact] + public async Task ZipappEmbeddedRequirementsFixtureProducesDeterministicOutputAsync() + { + var cancellationToken = TestContext.Current.CancellationToken; + var fixturePath = TestPaths.ResolveFixture("lang", "python", "zipapp-embedded-requirements"); + var goldenPath = Path.Combine(fixturePath, "expected.json"); + + var zipappPath = Path.Combine(fixturePath, "myapp.pyz"); + CreateZipappWithEmbeddedRequirements(zipappPath); + + var analyzers = new ILanguageAnalyzer[] + { + new PythonLanguageAnalyzer() + }; + + await LanguageAnalyzerTestHarness.AssertDeterministicAsync( + fixturePath, + goldenPath, + analyzers, + cancellationToken); + } + + [Fact] + public async Task ContainerWhiteoutsFixtureProducesDeterministicOutputAsync() + { + var cancellationToken = TestContext.Current.CancellationToken; + var fixturePath = TestPaths.ResolveFixture("lang", "python", "container-whiteouts"); + var goldenPath = Path.Combine(fixturePath, "expected.json"); + + var analyzers = new ILanguageAnalyzer[] + { + new PythonLanguageAnalyzer() + }; + + await LanguageAnalyzerTestHarness.AssertDeterministicAsync( + fixturePath, + goldenPath, + analyzers, + cancellationToken); + } + + [Fact] + public async Task VendoredDirectoryFixtureProducesDeterministicOutputAsync() + { + var cancellationToken = TestContext.Current.CancellationToken; + var fixturePath = TestPaths.ResolveFixture("lang", "python", "vendored-directory"); + var goldenPath = Path.Combine(fixturePath, "expected.json"); + + var analyzers = new ILanguageAnalyzer[] + { + new PythonLanguageAnalyzer() + }; + + await LanguageAnalyzerTestHarness.AssertDeterministicAsync( + fixturePath, + goldenPath, + analyzers, + cancellationToken); + } + [Fact] public async Task LockfileCollectorEmitsDeclaredOnlyComponentsAsync() { @@ -580,6 +722,77 @@ public sealed class PythonLanguageAnalyzerTests return path; } + private static void CreateWheelpkgWheel(string wheelPath) + { + Directory.CreateDirectory(Path.GetDirectoryName(wheelPath)!); + + var initBytes = Encoding.UTF8.GetBytes("__version__ = \"1.0.0\"\n"); + var metadataBytes = Encoding.UTF8.GetBytes( + $"Metadata-Version: 2.1\nName: wheelpkg\nVersion: 1.0.0\nSummary: Wheel fixture\n{Environment.NewLine}"); + var wheelBytes = Encoding.UTF8.GetBytes( + "Wheel-Version: 1.0\nGenerator: stellaops-test\nRoot-Is-Purelib: true\nTag: py3-none-any\n"); + + var recordContent = new StringBuilder() + .AppendLine($"wheelpkg/__init__.py,sha256={ComputeSha256Base64(initBytes)},{initBytes.Length}") + .AppendLine($"wheelpkg-1.0.0.dist-info/METADATA,sha256={ComputeSha256Base64(metadataBytes)},{metadataBytes.Length}") + .AppendLine($"wheelpkg-1.0.0.dist-info/WHEEL,sha256={ComputeSha256Base64(wheelBytes)},{wheelBytes.Length}") + .AppendLine("wheelpkg-1.0.0.dist-info/RECORD,,") + .ToString(); + var recordBytes = Encoding.UTF8.GetBytes(recordContent); + + if (File.Exists(wheelPath)) + { + File.Delete(wheelPath); + } + + using (var stream = File.Create(wheelPath)) + using (var archive = new ZipArchive(stream, ZipArchiveMode.Create, leaveOpen: false)) + { + WriteEntry(archive, "wheelpkg/__init__.py", initBytes); + WriteEntry(archive, "wheelpkg-1.0.0.dist-info/METADATA", metadataBytes); + WriteEntry(archive, "wheelpkg-1.0.0.dist-info/WHEEL", wheelBytes); + WriteEntry(archive, "wheelpkg-1.0.0.dist-info/RECORD", recordBytes); + } + + static void WriteEntry(ZipArchive archive, string entryName, byte[] content) + { + var entry = archive.CreateEntry(entryName); + entry.LastWriteTime = new DateTimeOffset(2020, 1, 1, 0, 0, 0, TimeSpan.Zero); + using var entryStream = entry.Open(); + entryStream.Write(content, 0, content.Length); + } + + static string ComputeSha256Base64(byte[] content) + => Convert.ToBase64String(SHA256.HashData(content)); + } + + private static void CreateZipappWithEmbeddedRequirements(string zipappPath) + { + if (File.Exists(zipappPath)) + { + File.Delete(zipappPath); + } + + using var fileStream = File.Create(zipappPath); + + var shebangBytes = Encoding.UTF8.GetBytes("#!/usr/bin/python3.11\n"); + fileStream.Write(shebangBytes); + + using var archive = new ZipArchive(fileStream, ZipArchiveMode.Create, leaveOpen: true); + + WriteTextEntry(archive, "__main__.py", "print('hello')\n"); + WriteTextEntry(archive, "requirements.txt", "requests==2.28.0\nflask==2.1.0\n"); + + static void WriteTextEntry(ZipArchive archive, string name, string content) + { + var entry = archive.CreateEntry(name, CompressionLevel.NoCompression); + entry.LastWriteTime = new DateTimeOffset(2020, 1, 1, 0, 0, 0, TimeSpan.Zero); + using var stream = entry.Open(); + using var writer = new StreamWriter(stream, new UTF8Encoding(encoderShouldEmitUTF8Identifier: false)); + writer.Write(content); + } + } + // ===== SCAN-PY-405-007 Fixtures ===== [Fact]