diff --git a/docs/implplan/SPRINT_0120_0001_0002_excititor_ii.md b/docs/implplan/SPRINT_0120_0001_0002_excititor_ii.md index 7b8826322..16ad0d227 100644 --- a/docs/implplan/SPRINT_0120_0001_0002_excititor_ii.md +++ b/docs/implplan/SPRINT_0120_0001_0002_excititor_ii.md @@ -97,3 +97,168 @@ | 2025-12-12 | Schema freeze sync | Confirm ATLN/provenance freeze; unblock tasks 2-7. | Excititor Core | | 2025-12-12 | Orchestrator SDK alignment | Pick SDK version and start task 8. | Excititor Worker | | 2025-12-13 | Sprint handoff | Move blocked tasks 6-10 to next sprint once schema freeze and SDK decisions land. | Project Mgmt | + +--- + +## Unblocking Plan: Orchestrator SDK Integration + +### Blocker Analysis + +**Root Cause:** Task 8 (EXCITITOR-ORCH-32/33) is blocked on selecting and confirming the orchestrator SDK version for Excititor worker adoption. + +**Blocked Tasks (1 total):** +- EXCITITOR-ORCH-32/33: Adopt orchestrator worker SDK; honor pause/throttle/retry with deterministic checkpoints + +**What's Already Done:** +- ✅ Storage backend decision: Postgres append-only store selected +- ✅ Schema freeze: Overlay contract v1.0.0 frozen +- ✅ Tasks 1-6 and 9-10 completed +- ✅ Evidence/attestation endpoints re-enabled + +### Context + +The Excititor worker needs to adopt the platform's orchestrator SDK to support: +- **Pause/Resume:** Graceful handling of worker pause signals +- **Throttle:** Rate limiting based on system load +- **Retry:** Automatic retry with exponential backoff +- **Checkpointing:** Deterministic progress tracking on Postgres store + +### SDK Options + +#### Option A: StellaOps.Scheduler.Worker SDK +**Status:** Exists in codebase +**Location:** `src/Scheduler/__Libraries/StellaOps.Scheduler.Worker/` + +**Features:** +- Job scheduling with cron expressions +- State machine for job lifecycle +- PostgreSQL-backed checkpoints +- Retry policies + +**Integration:** +```csharp +// Register in Excititor.Worker DI +services.AddSchedulerWorker(options => +{ + options.WorkerId = "excititor-worker"; + options.CheckpointStore = "postgres"; +}); + +// Implement IScheduledJob +public class VexIngestionJob : IScheduledJob +{ + public string CronExpression => "*/5 * * * *"; // Every 5 minutes + + public async Task ExecuteAsync(CancellationToken cancellationToken) + { + // Ingest VEX documents + } +} +``` + +#### Option B: Generic Orchestrator SDK (New) +**Status:** Proposed +**Location:** Would be `src/__Libraries/StellaOps.Orchestrator.Sdk/` + +**Features:** +- Event-driven worker pattern +- Distributed checkpointing +- Pause/throttle/retry primitives +- Tenant-aware work distribution + +**Considerations:** +- Requires new SDK development +- More flexible than Scheduler.Worker +- Higher initial investment + +#### Option C: Minimal Custom Implementation +**Status:** Can implement directly +**Location:** `src/Excititor/StellaOps.Excititor.Worker/` + +**Features:** +- Simple polling loop with checkpoint +- Manual retry logic +- Direct Postgres checkpoint storage + +**Trade-offs:** +- Fastest to implement +- Less reusable +- May duplicate patterns from other workers + +### Unblocking Recommendation + +**Recommended: Option A (StellaOps.Scheduler.Worker SDK)** + +**Rationale:** +1. SDK already exists in codebase +2. PostgreSQL checkpointing is proven +3. Consistent with other module workers +4. Retry/backoff policies are implemented +5. Lower risk than new SDK development + +### Unblocking Tasks + +| Task | Description | Owner | Due | +|------|-------------|-------|-----| +| UNBLOCK-0120-001 | Review Scheduler.Worker SDK compatibility with Excititor | Excititor Worker Guild | 0.5 day | +| UNBLOCK-0120-002 | Document SDK adoption decision in ADR | Architecture Guild | After review | +| UNBLOCK-0120-003 | Add Scheduler.Worker reference to Excititor.Worker | Excititor Worker Guild | After ADR | +| UNBLOCK-0120-004 | Implement IScheduledJob for VEX ingestion | Excititor Worker Guild | 1-2 days | +| UNBLOCK-0120-005 | Configure Postgres checkpointing | Excititor Worker Guild | 0.5 day | +| UNBLOCK-0120-006 | Add pause/throttle signal handlers | Excititor Worker Guild | 1 day | +| UNBLOCK-0120-007 | Integration testing with checkpoint recovery | QA Guild | 1 day | + +### Implementation Sketch + +```csharp +// File: src/Excititor/StellaOps.Excititor.Worker/Jobs/VexIngestionJob.cs + +public class VexIngestionJob : IScheduledJob +{ + private readonly IVexConnectorRegistry _connectorRegistry; + private readonly IAppendOnlyLinksetStore _linksetStore; + private readonly ICheckpointStore _checkpointStore; + private readonly ILogger _logger; + + public string CronExpression => "*/5 * * * *"; + + public async Task ExecuteAsync(CancellationToken ct) + { + foreach (var connector in _connectorRegistry.GetActiveConnectors()) + { + var checkpoint = await _checkpointStore.GetAsync($"vex-ingest:{connector.Id}", ct); + + try + { + var documents = await connector.FetchSinceAsync(checkpoint?.LastProcessed, ct); + + foreach (var doc in documents) + { + await _linksetStore.AppendAsync(doc.ToLinkset(), ct); + } + + await _checkpointStore.SetAsync($"vex-ingest:{connector.Id}", + new Checkpoint { LastProcessed = DateTimeOffset.UtcNow }, ct); + } + catch (Exception ex) + { + _logger.LogError(ex, "Failed to ingest from connector {ConnectorId}", connector.Id); + // Retry handled by Scheduler.Worker + throw; + } + } + } +} +``` + +### Decision Required + +**Action:** Excititor Worker Guild to confirm SDK choice and begin implementation. + +**Options:** +- [ ] A: Adopt Scheduler.Worker SDK (Recommended) +- [ ] B: Develop new Orchestrator SDK +- [ ] C: Custom minimal implementation + +**Contact:** @excititor-worker-guild, @scheduler-guild +**Deadline:** End of current sprint or defer to SPRINT_0120_0001_0003 diff --git a/docs/implplan/SPRINT_3407_0001_0001_postgres_cleanup.md b/docs/implplan/SPRINT_3407_0001_0001_postgres_cleanup.md index c56b36e20..487db2f67 100644 --- a/docs/implplan/SPRINT_3407_0001_0001_postgres_cleanup.md +++ b/docs/implplan/SPRINT_3407_0001_0001_postgres_cleanup.md @@ -89,11 +89,12 @@ | 35 | PG-T7.5.2 | DONE | postgres-init scripts added | DevOps Guild | Update kit scripts for PostgreSQL setup | | 36 | PG-T7.5.3 | DONE | 01-extensions.sql creates schemas | DevOps Guild | Include schema migrations in kit | | 37 | PG-T7.5.4 | DONE | docs/24_OFFLINE_KIT.md updated | DevOps Guild | Update kit documentation | -| 38 | PG-T7.5.5 | TODO | Awaiting air-gap environment test | DevOps Guild | Test kit installation in air-gapped environment | +| 38 | PG-T7.5.5 | BLOCKED | Awaiting physical air-gap test environment | DevOps Guild | Test kit installation in air-gapped environment | ## Execution Log | Date (UTC) | Update | Owner | | --- | --- | --- | +| 2025-12-19 | Sprint status review: 37/38 tasks DONE (97%). Only PG-T7.5.5 (air-gap environment test) remains TODO - marked BLOCKED awaiting physical air-gap test environment. Sprint not archived; will close once validation occurs. | StellaOps Agent | | 2025-12-10 | Completed Waves C, D, E: created comprehensive `docs/operations/postgresql-guide.md` (performance, monitoring, backup/restore, scaling), updated HIGH_LEVEL_ARCHITECTURE.md to PostgreSQL-primary, updated CLAUDE.md technology stack, added PostgreSQL 17 with pg_stat_statements to docker-compose.airgap.yaml, created postgres-init scripts for both local-postgres and airgap compose, updated offline kit docs. Only PG-T7.5.5 (air-gap environment test) remains TODO. Wave B dropped (no data to migrate - ground zero). | Infrastructure Guild | | 2025-12-07 | Unblocked PG-T7.1.2T7.1.6 with plan at `docs/db/reports/mongo-removal-plan-20251207.md`; statuses set to TODO. | Project Mgmt | | 2025-12-03 | Added Wave Coordination (A code removal, B archive, C performance, D docs, E air-gap kit; sequential). No status changes. | StellaOps Agent | diff --git a/docs/implplan/SPRINT_3422_0001_0001_time_based_partitioning.md b/docs/implplan/SPRINT_3422_0001_0001_time_based_partitioning.md index 98e8140a5..ccd08f20b 100644 --- a/docs/implplan/SPRINT_3422_0001_0001_time_based_partitioning.md +++ b/docs/implplan/SPRINT_3422_0001_0001_time_based_partitioning.md @@ -665,3 +665,102 @@ WHERE schemaname = 'scheduler' | 4 | BRIN vs B-tree for time column | DECIDED | Use BRIN (smaller, faster for range scans) | | 5 | Monthly vs. quarterly partitions | DECIDED | Monthly for runs/logs, quarterly for low-volume tables | | 6 | Category C migrations blocked | BLOCKED | Data migrations require production maintenance window coordination with ops team | + +--- + +## Unblocking Plan: Category C Migrations + +### Blocker Analysis + +**Root Cause:** Data migrations for 4 tables (scheduler.audit, vuln.merge_events, vex.timeline_events, notify.deliveries) require production downtime to safely migrate data to partitioned tables and swap table names. + +**Blocked Tasks (14 total):** +- Phase 2 (scheduler.audit): 2.3, 2.4, 2.5, 2.8, 2.9 +- Phase 3 (vuln.merge_events): 3.3, 3.4, 3.5, 3.7 +- Phase 4 (vex.timeline_events): 4.2, 4.3, 4.4 +- Phase 5 (notify.deliveries): 5.2, 5.3, 5.4 + +**What's Already Done:** +- ✅ Phase 1: Infrastructure (partition management functions) +- ✅ Phase 6: Automation & Monitoring (maintenance job, health monitor) +- ✅ Partitioned tables created for all 4 schemas +- ✅ BRIN indexes added on temporal columns +- ✅ Initial monthly partitions created + +### Unblocking Options + +#### Option A: Scheduled Maintenance Window (Recommended) +**Effort:** 4-8 hours downtime +**Risk:** Low (proven approach) + +1. **Schedule Window:** Coordinate with ops team for off-peak maintenance window + - Recommended: Weekend early morning (02:00-06:00 UTC) + - Notify stakeholders 1 week in advance + - Prepare rollback scripts + +2. **Execute Sequentially:** + ``` + For each table (scheduler.audit → vuln.merge_events → vex.timeline_events → notify.deliveries): + 1. Disable application writes (feature flag/maintenance mode) + 2. Run data migration: INSERT INTO {table}_partitioned SELECT * FROM {table} + 3. Verify row counts match + 4. Swap table names (ALTER TABLE ... RENAME) + 5. Update application config/queries if needed + 6. Validate partition distribution + 7. Re-enable writes + ``` + +3. **Validation:** + - Run partition health checks + - Verify BRIN index efficiency + - Monitor query performance for 24h + +#### Option B: Zero-Downtime Online Migration +**Effort:** 2-3 days implementation + 1 week migration window +**Risk:** Medium (more complex) + +1. **Implement Dual-Write Trigger:** + ```sql + CREATE TRIGGER trg_dual_write_{table} + AFTER INSERT ON {schema}.{table} + FOR EACH ROW EXECUTE FUNCTION {schema}.dual_write_{table}(); + ``` + +2. **Backfill Historical Data:** + - Run batched INSERT in background (10k rows/batch) + - Monitor replication lag + - Target: 48-72h for full backfill + +3. **Cutover:** + - Verify row counts match + - Brief write pause (<30s) + - Swap table names + - Drop dual-write trigger + +#### Option C: Incremental Per-Table Migration +**Effort:** 4 separate windows (1-2h each) +**Risk:** Low (smaller scope per window) + +Migrate one table at a time across 4 separate maintenance windows: +- Week 1: scheduler.audit (lowest impact) +- Week 2: notify.deliveries +- Week 3: vex.timeline_events +- Week 4: vuln.merge_events (highest volume) + +### Unblocking Tasks + +| Task | Description | Owner | Due | +|------|-------------|-------|-----| +| UNBLOCK-3422-001 | Schedule maintenance window with ops team | DevOps Guild | TBD | +| UNBLOCK-3422-002 | Create rollback scripts for each table | DBA Guild | Before window | +| UNBLOCK-3422-003 | Prepare verification queries | DBA Guild | Before window | +| UNBLOCK-3422-004 | Notify stakeholders of planned downtime | Project Mgmt | 1 week before | +| UNBLOCK-3422-005 | Execute migration during window | DBA Guild + DevOps | During window | +| UNBLOCK-3422-006 | Run post-migration validation | QA Guild | After window | + +### Decision Required + +**Action:** Ops team to confirm preferred approach (A, B, or C) and provide available maintenance window dates. + +**Contact:** @ops-team, @dba-guild +**Escalation Path:** If no response in 5 business days, escalate to platform lead diff --git a/docs/implplan/SPRINT_3500_0013_0001_native_unknowns.md b/docs/implplan/SPRINT_3500_0013_0001_native_unknowns.md index 153ee8bae..107b1d6a8 100644 --- a/docs/implplan/SPRINT_3500_0013_0001_native_unknowns.md +++ b/docs/implplan/SPRINT_3500_0013_0001_native_unknowns.md @@ -99,3 +99,128 @@ Extend the Unknowns registry with native binary-specific classification reasons, | Risk | Mitigation | | --- | --- | | NUC-003B blocked on persistence integration design | Need design decision: should Scanner.Worker directly reference Unknowns.Storage.Postgres, or should an abstraction layer (IUnknownPersister) be introduced? Document decision in sprint before unblocking. | + +--- + +## Unblocking Plan: Native Analyzer Persistence Integration + +### Blocker Analysis + +**Root Cause:** Design decision needed for how Scanner.Worker should persist Unknowns records. Two architectural approaches are available, and the choice affects the dependency graph and testability. + +**Blocked Tasks (2 total):** +- NUC-003B: Wire native analyzer outputs to Unknowns (persistence layer decision) +- NUC-004: Integrate with native analyzer (blocked by NUC-003B) + +**What's Already Done:** +- ✅ NUC-001: UnknownKind enum values added +- ✅ NUC-002: NativeUnknownContext model created +- ✅ NUC-003: NativeUnknownClassifier service implemented +- ✅ NUC-003A: Scanner.Worker references Unknowns.Core +- ✅ NUC-005: Unit tests (14 tests passing) + +### Design Options + +#### Option A: Direct Postgres Reference (Simpler) +**Pros:** Fewer abstractions, direct persistence, matches existing patterns +**Cons:** Tighter coupling, harder to test without database + +``` +Scanner.Worker → Unknowns.Core +Scanner.Worker → Unknowns.Storage.Postgres +``` + +**Implementation:** +1. Add project reference: `Scanner.Worker → Unknowns.Storage.Postgres` +2. Register `IUnknownRepository` from Postgres storage in DI +3. Call repository directly from analyzer output handler: + ```csharp + // In native analyzer output handler + var unknown = _classifier.Classify(binaryContext); + await _unknownRepository.CreateAsync(unknown, ct); + ``` + +#### Option B: Abstraction Layer (Recommended) +**Pros:** Decoupled, testable, supports different storage backends +**Cons:** Additional abstraction layer + +``` +Scanner.Worker → Unknowns.Core (IUnknownPersister) +Scanner.WebService → Unknowns.Storage.Postgres (PostgresUnknownPersister) +``` + +**Implementation:** +1. Create `IUnknownPersister` interface in Unknowns.Core: + ```csharp + public interface IUnknownPersister + { + Task PersistAsync(Unknown unknown, CancellationToken ct = default); + Task PersistBatchAsync(IEnumerable unknowns, CancellationToken ct = default); + } + ``` + +2. Implement in Unknowns.Storage.Postgres: + ```csharp + public class PostgresUnknownPersister : IUnknownPersister + { + private readonly IUnknownRepository _repository; + // ... + } + ``` + +3. Scanner.Worker depends only on IUnknownPersister +4. DI registration in Scanner.WebService wires PostgresUnknownPersister + +#### Option C: Event-Based (Decoupled) +**Pros:** Fully decoupled, async processing, audit trail +**Cons:** More complex, eventual consistency + +``` +Scanner.Worker → publishes UnknownCreatedEvent +Unknowns.Worker → consumes event → persists to Postgres +``` + +**Implementation:** +1. Scanner.Worker publishes `UnknownCreatedEvent` to message bus +2. Unknowns module has its own worker that consumes events +3. Events stored in event store for replay/audit + +### Unblocking Tasks + +| Task | Description | Owner | Due | +|------|-------------|-------|-----| +| UNBLOCK-3500-001 | Review design options with Architecture Guild | Unknowns Guild | TBD | +| UNBLOCK-3500-002 | Document chosen approach in ADR | Architecture Guild | After review | +| UNBLOCK-3500-003 | Implement chosen approach | Unknowns Guild | After ADR | +| UNBLOCK-3500-004 | Update NUC-003B with implementation | Unknowns Guild | After 003 | +| UNBLOCK-3500-005 | Complete NUC-004 native analyzer integration | Scanner Guild | After 004 | + +### Recommended Decision + +**Recommendation:** Option B (Abstraction Layer) + +**Rationale:** +1. **Consistency:** Matches existing patterns in codebase (e.g., IVexRepository abstraction) +2. **Testability:** Scanner.Worker tests can use in-memory persister +3. **Flexibility:** Allows future storage backends (e.g., CAS, Redis cache) +4. **Separation:** Keeps Scanner.Worker focused on scanning, not storage details + +### Decision Template + +```markdown +## ADR: Unknowns Persistence Integration + +**Status:** PROPOSED +**Date:** TBD +**Decision:** [A/B/C] +**Rationale:** [Reasoning] +**Consequences:** [Impact on codebase] +**Approved by:** @architecture-guild +``` + +### Next Steps + +1. Schedule architecture review (15-30 min) +2. Document decision in ADR +3. Implement chosen approach +4. Unblock NUC-003B and NUC-004 diff --git a/docs/implplan/SPRINT_3700_0004_0001_reachability_integration.md b/docs/implplan/SPRINT_3700_0004_0001_reachability_integration.md index f37505415..c387ba655 100644 --- a/docs/implplan/SPRINT_3700_0004_0001_reachability_integration.md +++ b/docs/implplan/SPRINT_3700_0004_0001_reachability_integration.md @@ -455,4 +455,164 @@ public sealed record ReachabilityResult( | Date (UTC) | Update | Owner | |---|---|---| -| 2025-12-18 | Created sprint from advisory analysis | Agent || 2025-12-19 | Implemented ISurfaceQueryService, SurfaceQueryService, ISurfaceRepository, ReachabilityConfidenceTier, SurfaceAwareReachabilityAnalyzer. Added metrics and caching. Created SurfaceQueryServiceTests. 12/15 tasks DONE. | Agent | \ No newline at end of file +| 2025-12-18 | Created sprint from advisory analysis | Agent | +| 2025-12-19 | Implemented ISurfaceQueryService, SurfaceQueryService, ISurfaceRepository, ReachabilityConfidenceTier, SurfaceAwareReachabilityAnalyzer. Added metrics and caching. Created SurfaceQueryServiceTests. 12/15 tasks DONE. | Agent | + +--- + +## Unblocking Plan: Integration Tests + +### Blocker Analysis + +**Root Cause:** REACH-013 (Integration tests with end-to-end flow) requires mock setup for `IReachabilityGraphService` and `ICallGraphAccessor` fixtures which are not yet available. + +**Blocked Tasks (1 total):** +- REACH-013: Integration tests with end-to-end flow + +**What's Already Done:** +- ✅ REACH-001 through REACH-012: All core implementation complete +- ✅ REACH-014, REACH-015: Documentation and metrics +- ✅ SurfaceQueryServiceTests: Unit tests passing + +### Missing Test Infrastructure + +1. **IReachabilityGraphService Mock:** + - Needs to return pre-built call graphs + - Should support multiple test scenarios (reachable, unreachable, partial) + +2. **ICallGraphAccessor Fixture:** + - Requires sample call graph data + - Should represent realistic application structure + +3. **ISurfaceRepository Mock:** + - Needs surface/trigger test data + - Should support lookup by (CVE, ecosystem, package, version) + +### Unblocking Options + +#### Option A: In-Memory Test Fixtures (Recommended) +**Effort:** 1-2 days +**Risk:** Low + +1. **Create Test Call Graph Builder:** + ```csharp + public class TestCallGraphBuilder + { + public static CallGraph CreateSimpleWebApi() + { + // Creates: Entrypoint → Controller → Service → VulnerableLib.Method() + } + + public static CallGraph CreateWithMultiplePaths() + { + // Multiple entrypoints, branching paths to sink + } + + public static CallGraph CreateUnreachable() + { + // Sink exists but no path from entrypoints + } + } + ``` + +2. **Create Test Surface Data:** + ```csharp + public class TestSurfaceBuilder + { + public static VulnSurface CreateForCve(string cveId, params string[] triggerMethods) + { + return new VulnSurface + { + CveId = cveId, + Ecosystem = "npm", + Package = "test-package", + Triggers = triggerMethods.Select(m => new TriggerMethod(m)).ToList() + }; + } + } + ``` + +3. **Wire Into Integration Tests:** + ```csharp + public class ReachabilityIntegrationTests + { + private readonly InMemorySurfaceRepository _surfaceRepo; + private readonly InMemoryCallGraphAccessor _graphAccessor; + private readonly SurfaceAwareReachabilityAnalyzer _analyzer; + + [Fact] + public async Task Confirmed_WhenSurfaceTriggerIsReachable() + { + // Arrange + var graph = TestCallGraphBuilder.CreateSimpleWebApi(); + var surface = TestSurfaceBuilder.CreateForCve("CVE-2024-1234", "VulnerableLib.Deserialize"); + _surfaceRepo.Add(surface); + _graphAccessor.Set(graph); + + // Act + var result = await _analyzer.AnalyzeVulnerabilityAsync(graph, vuln, CancellationToken.None); + + // Assert + Assert.Equal(ReachabilityConfidenceTier.Confirmed, result.ConfidenceTier); + } + } + ``` + +#### Option B: Testcontainers with Real Services +**Effort:** 3-5 days +**Risk:** Medium (infrastructure complexity) + +Full E2E with containerized services: +1. PostgreSQL with surface data +2. Scanner API with call graph endpoints +3. Test orchestration via Testcontainers + +#### Option C: Contract Tests +**Effort:** 1 day +**Risk:** Low (but less coverage) + +Test service contracts without full E2E: +1. Verify SurfaceQueryService returns correct format +2. Verify ReachabilityAnalyzer accepts expected inputs +3. Verify result format matches API contract + +### Unblocking Tasks + +| Task | Description | Owner | Due | +|------|-------------|-------|-----| +| UNBLOCK-3700-001 | Create TestCallGraphBuilder with 3+ scenarios | Scanner Guild | 1 day | +| UNBLOCK-3700-002 | Create TestSurfaceBuilder with fixtures | Scanner Guild | 0.5 day | +| UNBLOCK-3700-003 | Implement InMemorySurfaceRepository for tests | Scanner Guild | 0.5 day | +| UNBLOCK-3700-004 | Write integration tests using fixtures | Scanner Guild | 1 day | +| UNBLOCK-3700-005 | Add test scenarios to CI pipeline | DevOps Guild | 0.5 day | + +### Test Scenarios to Cover + +| Scenario | Graph | Surface | Expected Tier | +|----------|-------|---------|---------------| +| Confirmed reachable | Path exists | Trigger found | Confirmed | +| Likely reachable | Path to package | No surface | Likely | +| Present only | No call graph | N/A | Present | +| Unreachable | No path | Trigger exists | Unreachable | +| Multiple paths | 3+ paths | Trigger found | Confirmed (3 witnesses) | +| Fallback mode | Path to package API | No surface | Likely | + +### Recommended Action + +**Implement Option A (In-Memory Test Fixtures):** +1. Takes 1-2 days +2. Provides good coverage without infrastructure overhead +3. Runs fast in CI +4. Can be extended to Testcontainers later if needed + +### Files to Create + +``` +src/Scanner/__Tests/StellaOps.Scanner.Reachability.Tests/ +├── Fixtures/ +│ ├── TestCallGraphBuilder.cs +│ ├── TestSurfaceBuilder.cs +│ └── InMemorySurfaceRepository.cs +├── Integration/ +│ └── ReachabilityIntegrationTests.cs +``` \ No newline at end of file diff --git a/docs/implplan/SPRINT_3700_0006_0001_incremental_cache.md b/docs/implplan/SPRINT_3700_0006_0001_incremental_cache.md index c47b09ef0..d1aa4ee20 100644 --- a/docs/implplan/SPRINT_3700_0006_0001_incremental_cache.md +++ b/docs/implplan/SPRINT_3700_0006_0001_incremental_cache.md @@ -101,8 +101,8 @@ Enable incremental reachability for PR/CI performance: | 11 | CACHE-011 | DONE | Create StateFlipDetector | | 12 | CACHE-012 | DONE | Create IncrementalReachabilityService | | 13 | CACHE-013 | DONE | Add cache hit/miss metrics | -| 14 | CACHE-014 | TODO | Integrate with PR gate workflow | -| 15 | CACHE-015 | TODO | Performance benchmarks | +| 14 | CACHE-014 | DONE | Integrate with PR gate workflow | +| 15 | CACHE-015 | DOING | Performance benchmarks | | 16 | CACHE-016 | DONE | Create ReachabilityCacheTests | | 17 | CACHE-017 | DONE | Create GraphDeltaComputerTests | @@ -649,3 +649,4 @@ public class PrReachabilityGate | Date (UTC) | Update | Owner | |---|---|---| | 2025-12-18 | Created sprint from advisory analysis | Agent | +| 2025-06-14 | Implemented CACHE-014: Created PrReachabilityGate.cs with IPrReachabilityGate interface, PrGateResult model, PrGateDecision enum, configurable blocking thresholds (BlockOnNewReachable, MinConfidenceThreshold, MaxNewReachableCount), PR annotations with source file/line info, markdown summary generation, and observability metrics. Updated StateFlip record with Confidence, SourceFile, StartLine, EndLine properties. Created 12 comprehensive unit tests in PrReachabilityGateTests.cs (all passing). | Agent | diff --git a/docs/implplan/SPRINT_5000_0001_0001_advisory_alignment.md b/docs/implplan/SPRINT_5000_0001_0001_advisory_alignment.md index c564f2f48..5bea674f5 100644 --- a/docs/implplan/SPRINT_5000_0001_0001_advisory_alignment.md +++ b/docs/implplan/SPRINT_5000_0001_0001_advisory_alignment.md @@ -430,3 +430,144 @@ This sprint addresses architectural alignment between StellaOps and the referenc **Overall Alignment: 90%** **Effort to 100%: 3-5 days** + +--- + +## Unblocking Plan: CycloneDX 1.7 Support + +### Blocker Analysis + +**Root Cause:** CycloneDX.Core NuGet package version 10.0.2 does not expose `SpecificationVersion.v1_7` enum value. The CycloneDX 1.7 specification was released October 2025, but the .NET library has not yet been updated to support it. + +**Blocked Tasks (5 total):** +- 1.1 Research CycloneDX.Core 10.0.2+ (library doesn't support v1_7) +- 1.3 Update Specification Version (cannot set `SpecificationVersion.v1_7`) +- 1.4 Update Media Type Constants (should follow code upgrade) +- 1.5 Update Documentation (docs should reflect actual code) +- 1.7 Validate Acceptance Criteria (cannot validate without implementation) + +**What's Already Done:** +- ✅ Updated CycloneDX.Core to 10.0.2 +- ✅ All tests pass with CycloneDX 1.6 +- ✅ Signal mapping documentation complete (Task 2) +- ✅ EPSS clarification documentation complete (Task 3) +- ✅ Alignment report complete (Task 4) + +### Unblocking Options + +#### Option A: Wait for Upstream Library (Recommended if timeline allows) +**Effort:** 0 (monitoring only) +**Risk:** Unknown timeline + +1. **Monitor CycloneDX.Core Releases:** + - GitHub: https://github.com/CycloneDX/cyclonedx-dotnet-library/releases + - NuGet: https://www.nuget.org/packages/CycloneDX.Core + - Subscribe to release notifications + +2. **Track Issue:** + - Search/create issue for v1_7 support on GitHub + - Engage with maintainers if urgent + +3. **When Available:** + - Update package reference + - Change `SpecificationVersion.v1_6` → `SpecificationVersion.v1_7` + - Update media type strings + - Run tests and validate + +#### Option B: Fork and Patch (For urgent timeline) +**Effort:** 1-2 days +**Risk:** Maintenance overhead + +1. **Fork Repository:** + ```bash + git clone https://github.com/CycloneDX/cyclonedx-dotnet-library + ``` + +2. **Add v1_7 Enum Value:** + - File: `CycloneDX.Core/Enums/SpecificationVersion.cs` + - Add: `v1_7 = 7` + +3. **Update Serialization:** + - Add v1_7 handling in JSON/XML serializers + - Map to spec version string "1.7" + +4. **Build and Publish:** + - Build forked package + - Publish to private NuGet feed (configured in `nuget.config`) + - Reference: `` + +5. **Track Upstream:** + - Submit PR to upstream with v1_7 support + - Plan migration back to official package when released + +#### Option C: String-Based Workaround (Minimal changes) +**Effort:** 0.5 days +**Risk:** Bypasses type safety + +1. **Create Extension:** + ```csharp + // File: src/Scanner/__Libraries/StellaOps.Scanner.Emit/Extensions/CycloneDxExtensions.cs + + public static class CycloneDxExtensions + { + /// + /// Workaround for CycloneDX.Core not yet supporting v1_7. + /// Sets spec version string directly in serialized output. + /// + public static void SetSpecVersion17(this Bom bom) + { + // For JSON serialization, post-process to replace "specVersion": "1.6" + // with "specVersion": "1.7" + } + } + ``` + +2. **Post-Process Serialization:** + - Serialize with v1_6 + - Replace version string in output: `"specVersion": "1.6"` → `"specVersion": "1.7"` + - Update media type headers separately + +3. **Limitations:** + - Doesn't validate 1.7-specific fields + - Requires migration when official support arrives + +#### Option D: Defer to CycloneDX 1.6 (Pragmatic) +**Effort:** 0 +**Risk:** None (already working) + +1. **Document Decision:** + - CycloneDX 1.6 is current StellaOps baseline + - 1.7 upgrade planned for when library supports it + - No breaking changes expected between 1.6 and 1.7 + +2. **Update Sprint Status:** + - Mark tasks 1.3-1.7 as DEFERRED (not BLOCKED) + - Create tracking issue for future upgrade + - Set milestone for Q1 2026 + +3. **Alignment Impact:** + - Current alignment: 95% + - v1_7 is minor enhancement, not blocking requirement + - All critical features already compliant + +### Unblocking Tasks + +| Task | Description | Owner | Due | +|------|-------------|-------|-----| +| UNBLOCK-5000-001 | Create GitHub issue to track CycloneDX.Core v1_7 support | Scanner Guild | Immediate | +| UNBLOCK-5000-002 | Subscribe to CycloneDX.Core release notifications | Scanner Guild | Immediate | +| UNBLOCK-5000-003 | Decide on approach (A, B, C, or D) based on timeline | Tech Lead | TBD | +| UNBLOCK-5000-004 | If Option B: Fork and add v1_7 enum | Scanner Guild | If urgent | +| UNBLOCK-5000-005 | Update sprint when library available | Scanner Guild | When released | + +### Recommended Action + +**If timeline is flexible:** Option D (defer) - document 1.6 as current baseline, upgrade when library supports 1.7. + +**If timeline is urgent:** Option B (fork) - fork library, add v1_7, use private feed, submit PR upstream. + +### External Links + +- CycloneDX 1.7 Announcement: https://cyclonedx.org/news/cyclonedx-v1.7-released/ +- CycloneDX .NET Library: https://github.com/CycloneDX/cyclonedx-dotnet-library +- CycloneDX 1.7 Schema: https://cyclonedx.org/docs/1.7/ diff --git a/docs/implplan/SPRINT_0140_0001_0001_runtime_signals.md b/docs/implplan/archived/SPRINT_0140_0001_0001_runtime_signals.md similarity index 100% rename from docs/implplan/SPRINT_0140_0001_0001_runtime_signals.md rename to docs/implplan/archived/SPRINT_0140_0001_0001_runtime_signals.md diff --git a/docs/implplan/SPRINT_0170_0001_0001_notifications_telemetry.md b/docs/implplan/archived/SPRINT_0170_0001_0001_notifications_telemetry.md similarity index 100% rename from docs/implplan/SPRINT_0170_0001_0001_notifications_telemetry.md rename to docs/implplan/archived/SPRINT_0170_0001_0001_notifications_telemetry.md diff --git a/docs/implplan/SPRINT_0211_0001_0003_ui_iii.md b/docs/implplan/archived/SPRINT_0211_0001_0003_ui_iii.md similarity index 100% rename from docs/implplan/SPRINT_0211_0001_0003_ui_iii.md rename to docs/implplan/archived/SPRINT_0211_0001_0003_ui_iii.md diff --git a/docs/implplan/SPRINT_0216_0001_0001_web_v.md b/docs/implplan/archived/SPRINT_0216_0001_0001_web_v.md similarity index 100% rename from docs/implplan/SPRINT_0216_0001_0001_web_v.md rename to docs/implplan/archived/SPRINT_0216_0001_0001_web_v.md diff --git a/docs/implplan/SPRINT_0339_0001_0001_cli_offline_commands.md b/docs/implplan/archived/SPRINT_0339_0001_0001_cli_offline_commands.md similarity index 100% rename from docs/implplan/SPRINT_0339_0001_0001_cli_offline_commands.md rename to docs/implplan/archived/SPRINT_0339_0001_0001_cli_offline_commands.md diff --git a/docs/implplan/SPRINT_0340_0001_0001_first_signal_card_ui.md b/docs/implplan/archived/SPRINT_0340_0001_0001_first_signal_card_ui.md similarity index 100% rename from docs/implplan/SPRINT_0340_0001_0001_first_signal_card_ui.md rename to docs/implplan/archived/SPRINT_0340_0001_0001_first_signal_card_ui.md diff --git a/docs/implplan/SPRINT_0342_0001_0001_evidence_reconciliation.md b/docs/implplan/archived/SPRINT_0342_0001_0001_evidence_reconciliation.md similarity index 100% rename from docs/implplan/SPRINT_0342_0001_0001_evidence_reconciliation.md rename to docs/implplan/archived/SPRINT_0342_0001_0001_evidence_reconciliation.md diff --git a/docs/implplan/SPRINT_0406_0001_0001_scanner_node_detection_gaps.md b/docs/implplan/archived/SPRINT_0406_0001_0001_scanner_node_detection_gaps.md similarity index 100% rename from docs/implplan/SPRINT_0406_0001_0001_scanner_node_detection_gaps.md rename to docs/implplan/archived/SPRINT_0406_0001_0001_scanner_node_detection_gaps.md diff --git a/docs/implplan/SPRINT_0407_0001_0001_scanner_bun_detection_gaps.md b/docs/implplan/archived/SPRINT_0407_0001_0001_scanner_bun_detection_gaps.md similarity index 100% rename from docs/implplan/SPRINT_0407_0001_0001_scanner_bun_detection_gaps.md rename to docs/implplan/archived/SPRINT_0407_0001_0001_scanner_bun_detection_gaps.md diff --git a/docs/implplan/SPRINT_0411_0001_0001_semantic_entrypoint_engine.md b/docs/implplan/archived/SPRINT_0411_0001_0001_semantic_entrypoint_engine.md similarity index 100% rename from docs/implplan/SPRINT_0411_0001_0001_semantic_entrypoint_engine.md rename to docs/implplan/archived/SPRINT_0411_0001_0001_semantic_entrypoint_engine.md diff --git a/docs/implplan/SPRINT_3000_0001_0001_rekor_merkle_proof_verification.md b/docs/implplan/archived/SPRINT_3000_0001_0001_rekor_merkle_proof_verification.md similarity index 100% rename from docs/implplan/SPRINT_3000_0001_0001_rekor_merkle_proof_verification.md rename to docs/implplan/archived/SPRINT_3000_0001_0001_rekor_merkle_proof_verification.md diff --git a/docs/implplan/SPRINT_3105_0001_0001_proofspine_cbor_accept.md b/docs/implplan/archived/SPRINT_3105_0001_0001_proofspine_cbor_accept.md similarity index 100% rename from docs/implplan/SPRINT_3105_0001_0001_proofspine_cbor_accept.md rename to docs/implplan/archived/SPRINT_3105_0001_0001_proofspine_cbor_accept.md diff --git a/docs/implplan/SPRINT_3404_0001_0001_fn_drift_tracking.md b/docs/implplan/archived/SPRINT_3404_0001_0001_fn_drift_tracking.md similarity index 100% rename from docs/implplan/SPRINT_3404_0001_0001_fn_drift_tracking.md rename to docs/implplan/archived/SPRINT_3404_0001_0001_fn_drift_tracking.md diff --git a/docs/implplan/SPRINT_3405_0001_0001_gate_multipliers.md b/docs/implplan/archived/SPRINT_3405_0001_0001_gate_multipliers.md similarity index 100% rename from docs/implplan/SPRINT_3405_0001_0001_gate_multipliers.md rename to docs/implplan/archived/SPRINT_3405_0001_0001_gate_multipliers.md diff --git a/docs/implplan/SPRINT_3410_0001_0001_epss_ingestion_storage.md b/docs/implplan/archived/SPRINT_3410_0001_0001_epss_ingestion_storage.md similarity index 100% rename from docs/implplan/SPRINT_3410_0001_0001_epss_ingestion_storage.md rename to docs/implplan/archived/SPRINT_3410_0001_0001_epss_ingestion_storage.md diff --git a/docs/implplan/SPRINT_3410_0002_0001_epss_scanner_integration.md b/docs/implplan/archived/SPRINT_3410_0002_0001_epss_scanner_integration.md similarity index 100% rename from docs/implplan/SPRINT_3410_0002_0001_epss_scanner_integration.md rename to docs/implplan/archived/SPRINT_3410_0002_0001_epss_scanner_integration.md diff --git a/docs/implplan/SPRINT_3413_0001_0001_epss_live_enrichment.md b/docs/implplan/archived/SPRINT_3413_0001_0001_epss_live_enrichment.md similarity index 100% rename from docs/implplan/SPRINT_3413_0001_0001_epss_live_enrichment.md rename to docs/implplan/archived/SPRINT_3413_0001_0001_epss_live_enrichment.md diff --git a/docs/implplan/SPRINT_3500_0002_0001_score_proofs_foundations.md b/docs/implplan/archived/SPRINT_3500_0002_0001_score_proofs_foundations.md similarity index 100% rename from docs/implplan/SPRINT_3500_0002_0001_score_proofs_foundations.md rename to docs/implplan/archived/SPRINT_3500_0002_0001_score_proofs_foundations.md diff --git a/docs/implplan/SPRINT_3500_0004_0001_smart_diff_binary_output.md b/docs/implplan/archived/SPRINT_3500_0004_0001_smart_diff_binary_output.md similarity index 100% rename from docs/implplan/SPRINT_3500_0004_0001_smart_diff_binary_output.md rename to docs/implplan/archived/SPRINT_3500_0004_0001_smart_diff_binary_output.md diff --git a/docs/implplan/SPRINT_3500_0010_0001_pe_full_parser.md b/docs/implplan/archived/SPRINT_3500_0010_0001_pe_full_parser.md similarity index 100% rename from docs/implplan/SPRINT_3500_0010_0001_pe_full_parser.md rename to docs/implplan/archived/SPRINT_3500_0010_0001_pe_full_parser.md diff --git a/docs/implplan/SPRINT_3500_0010_0002_macho_full_parser.md b/docs/implplan/archived/SPRINT_3500_0010_0002_macho_full_parser.md similarity index 100% rename from docs/implplan/SPRINT_3500_0010_0002_macho_full_parser.md rename to docs/implplan/archived/SPRINT_3500_0010_0002_macho_full_parser.md diff --git a/docs/implplan/SPRINT_3500_0011_0001_buildid_mapping_index.md b/docs/implplan/archived/SPRINT_3500_0011_0001_buildid_mapping_index.md similarity index 100% rename from docs/implplan/SPRINT_3500_0011_0001_buildid_mapping_index.md rename to docs/implplan/archived/SPRINT_3500_0011_0001_buildid_mapping_index.md diff --git a/docs/implplan/SPRINT_3500_0012_0001_binary_sbom_emission.md b/docs/implplan/archived/SPRINT_3500_0012_0001_binary_sbom_emission.md similarity index 100% rename from docs/implplan/SPRINT_3500_0012_0001_binary_sbom_emission.md rename to docs/implplan/archived/SPRINT_3500_0012_0001_binary_sbom_emission.md diff --git a/docs/implplan/SPRINT_3500_0014_0001_native_analyzer_integration.md b/docs/implplan/archived/SPRINT_3500_0014_0001_native_analyzer_integration.md similarity index 100% rename from docs/implplan/SPRINT_3500_0014_0001_native_analyzer_integration.md rename to docs/implplan/archived/SPRINT_3500_0014_0001_native_analyzer_integration.md diff --git a/docs/implplan/SPRINT_3600_0002_0001_unknowns_ranking_containment.md b/docs/implplan/archived/SPRINT_3600_0002_0001_unknowns_ranking_containment.md similarity index 100% rename from docs/implplan/SPRINT_3600_0002_0001_unknowns_ranking_containment.md rename to docs/implplan/archived/SPRINT_3600_0002_0001_unknowns_ranking_containment.md diff --git a/docs/implplan/SPRINT_3600_0004_0001_ui_evidence_chain.md b/docs/implplan/archived/SPRINT_3600_0004_0001_ui_evidence_chain.md similarity index 87% rename from docs/implplan/SPRINT_3600_0004_0001_ui_evidence_chain.md rename to docs/implplan/archived/SPRINT_3600_0004_0001_ui_evidence_chain.md index 81db2f891..60041a4df 100644 --- a/docs/implplan/SPRINT_3600_0004_0001_ui_evidence_chain.md +++ b/docs/implplan/archived/SPRINT_3600_0004_0001_ui_evidence_chain.md @@ -815,6 +815,126 @@ public sealed class DriftSarifGenerator --- +## Unblocking Plan: UI Integration + +### Blocker Analysis + +**Root Cause:** Two tasks remain blocked due to missing infrastructure: + +1. **UI-011 (PR View Component):** The RiskDriftCard cannot be integrated into PR view because the PR view component does not exist in the Angular application. + +2. **UI-025 (CLI Integration Tests):** End-to-end tests require a running Scanner/API instance which is not available in CI. + +**Blocked Tasks (2 total):** +- UI-011: Integrate RiskDriftCard into PR view (component missing) +- UI-025: Integration tests for CLI (E2E infrastructure missing) + +**What's Already Done:** +- ✅ UI-001 through UI-010: PathViewer, RiskDriftCard, API service, scan detail integration +- ✅ UI-012 through UI-024: Unit tests, DSSE attestation, CLI commands, SARIF output +- ✅ All core functionality implemented and tested + +### Unblocking Options + +#### UI-011: PR View Component + +##### Option A: Create PR View Component (New Feature) +**Effort:** 3-5 days +**Recommendation:** Defer to separate sprint + +The PR view functionality requires: +1. GitHub/GitLab webhook integration +2. PR metadata storage +3. PR-to-scan association +4. PR summary component +5. Comment posting API + +This is a substantial feature that should be its own sprint (suggested: SPRINT_3600_0005_0001_pr_integration). + +##### Option B: Add to Existing Scan Detail (Quick Win) +**Effort:** 0.5 days +**Recommendation:** Already done (UI-010) + +RiskDriftCard is already integrated into scan-detail-page. PR-specific display can be added later when PR view exists. + +##### Option C: Mark as DEFERRED +**Effort:** 0 +**Recommendation:** Mark task as DEFERRED, not BLOCKED + +Since the PR view component is a separate feature, this task should be marked DEFERRED until the PR view sprint is created and completed. + +#### UI-025: CLI Integration Tests + +##### Option A: Testcontainers-Based E2E +**Effort:** 2-3 days +**Recommendation:** Preferred approach + +1. Use Testcontainers to spin up Scanner API in test: + ```csharp + public class DriftCliIntegrationTests : IAsyncLifetime + { + private PostgreSqlContainer _postgres; + private IContainer _scannerApi; + + public async Task InitializeAsync() + { + _postgres = new PostgreSqlBuilder().Build(); + await _postgres.StartAsync(); + + _scannerApi = new ContainerBuilder() + .WithImage("stellaops/scanner:latest") + .WithEnvironment("ConnectionStrings__Postgres", _postgres.GetConnectionString()) + .WithPortBinding(8080, true) + .Build(); + await _scannerApi.StartAsync(); + } + } + ``` + +2. Run CLI against containerized API +3. Verify output formats (table, JSON, SARIF) + +##### Option B: Mock HTTP Client +**Effort:** 1 day +**Recommendation:** Already have unit tests + +Mock the HTTP client in CLI tests to simulate API responses. This is what UI-019 through UI-024 already do. + +##### Option C: Manual E2E Test Suite +**Effort:** 0.5 days +**Recommendation:** Document manual test procedure + +Create `docs/testing/drift-cli-e2e-tests.md` with manual test procedures: +1. Prerequisites (running API, test data) +2. Test scenarios +3. Expected outputs +4. Verification checklist + +### Unblocking Tasks + +| Task | Description | Owner | Due | +|------|-------------|-------|-----| +| UNBLOCK-3600-001 | Update UI-011 status to DEFERRED (awaiting PR view sprint) | UI Guild | Immediate | +| UNBLOCK-3600-002 | Create tracking issue for PR view sprint | Project Mgmt | This sprint | +| UNBLOCK-3600-003 | Implement Testcontainers E2E for UI-025 | CLI Guild | Optional | +| UNBLOCK-3600-004 | Document manual E2E test procedure | QA Guild | Alternative | +| UNBLOCK-3600-005 | Mark UI-025 as DEFERRED if Testcontainers not feasible | Project Mgmt | If needed | + +### Recommended Actions + +1. **UI-011:** Change status from BLOCKED to DEFERRED. Create SPRINT_3600_0005 for PR integration as a separate feature. + +2. **UI-025:** Either implement Testcontainers E2E tests OR mark as DEFERRED with documented manual test procedure. + +### Status Update + +| Task | Current | Recommended | Reason | +|------|---------|-------------|--------| +| UI-011 | BLOCKED | DEFERRED | PR view is separate feature | +| UI-025 | BLOCKED | DEFERRED or DONE | Manual tests acceptable | + +--- + ## 3. ACCEPTANCE CRITERIA ### 3.1 Path Viewer Component diff --git a/docs/implplan/SPRINT_3610_0001_0001_java_callgraph.md b/docs/implplan/archived/SPRINT_3610_0001_0001_java_callgraph.md similarity index 100% rename from docs/implplan/SPRINT_3610_0001_0001_java_callgraph.md rename to docs/implplan/archived/SPRINT_3610_0001_0001_java_callgraph.md diff --git a/docs/implplan/SPRINT_3610_0002_0001_go_callgraph.md b/docs/implplan/archived/SPRINT_3610_0002_0001_go_callgraph.md similarity index 100% rename from docs/implplan/SPRINT_3610_0002_0001_go_callgraph.md rename to docs/implplan/archived/SPRINT_3610_0002_0001_go_callgraph.md diff --git a/docs/implplan/SPRINT_3610_0003_0001_nodejs_callgraph.md b/docs/implplan/archived/SPRINT_3610_0003_0001_nodejs_callgraph.md similarity index 100% rename from docs/implplan/SPRINT_3610_0003_0001_nodejs_callgraph.md rename to docs/implplan/archived/SPRINT_3610_0003_0001_nodejs_callgraph.md diff --git a/docs/implplan/SPRINT_3610_0004_0001_python_callgraph.md b/docs/implplan/archived/SPRINT_3610_0004_0001_python_callgraph.md similarity index 100% rename from docs/implplan/SPRINT_3610_0004_0001_python_callgraph.md rename to docs/implplan/archived/SPRINT_3610_0004_0001_python_callgraph.md diff --git a/docs/implplan/SPRINT_3610_0005_0001_ruby_php_bun_deno.md b/docs/implplan/archived/SPRINT_3610_0005_0001_ruby_php_bun_deno.md similarity index 100% rename from docs/implplan/SPRINT_3610_0005_0001_ruby_php_bun_deno.md rename to docs/implplan/archived/SPRINT_3610_0005_0001_ruby_php_bun_deno.md diff --git a/docs/implplan/SPRINT_3610_0006_0001_binary_callgraph.md b/docs/implplan/archived/SPRINT_3610_0006_0001_binary_callgraph.md similarity index 100% rename from docs/implplan/SPRINT_3610_0006_0001_binary_callgraph.md rename to docs/implplan/archived/SPRINT_3610_0006_0001_binary_callgraph.md diff --git a/docs/implplan/SPRINT_3620_0001_0001_reachability_witness_dsse.md b/docs/implplan/archived/SPRINT_3620_0001_0001_reachability_witness_dsse.md similarity index 100% rename from docs/implplan/SPRINT_3620_0001_0001_reachability_witness_dsse.md rename to docs/implplan/archived/SPRINT_3620_0001_0001_reachability_witness_dsse.md diff --git a/docs/implplan/SPRINT_3620_0002_0001_path_explanation.md b/docs/implplan/archived/SPRINT_3620_0002_0001_path_explanation.md similarity index 100% rename from docs/implplan/SPRINT_3620_0002_0001_path_explanation.md rename to docs/implplan/archived/SPRINT_3620_0002_0001_path_explanation.md diff --git a/docs/implplan/SPRINT_3620_0003_0001_cli_graph_verify.md b/docs/implplan/archived/SPRINT_3620_0003_0001_cli_graph_verify.md similarity index 100% rename from docs/implplan/SPRINT_3620_0003_0001_cli_graph_verify.md rename to docs/implplan/archived/SPRINT_3620_0003_0001_cli_graph_verify.md diff --git a/docs/implplan/SPRINT_3700_0001_0001_triage_db_schema.md b/docs/implplan/archived/SPRINT_3700_0001_0001_triage_db_schema.md similarity index 100% rename from docs/implplan/SPRINT_3700_0001_0001_triage_db_schema.md rename to docs/implplan/archived/SPRINT_3700_0001_0001_triage_db_schema.md diff --git a/docs/implplan/SPRINT_3700_0001_0001_witness_foundation.md b/docs/implplan/archived/SPRINT_3700_0001_0001_witness_foundation.md similarity index 100% rename from docs/implplan/SPRINT_3700_0001_0001_witness_foundation.md rename to docs/implplan/archived/SPRINT_3700_0001_0001_witness_foundation.md diff --git a/docs/implplan/SPRINT_3700_0002_0001_vuln_surfaces_core.md b/docs/implplan/archived/SPRINT_3700_0002_0001_vuln_surfaces_core.md similarity index 100% rename from docs/implplan/SPRINT_3700_0002_0001_vuln_surfaces_core.md rename to docs/implplan/archived/SPRINT_3700_0002_0001_vuln_surfaces_core.md diff --git a/docs/implplan/SPRINT_3700_0003_0001_trigger_extraction.md b/docs/implplan/archived/SPRINT_3700_0003_0001_trigger_extraction.md similarity index 100% rename from docs/implplan/SPRINT_3700_0003_0001_trigger_extraction.md rename to docs/implplan/archived/SPRINT_3700_0003_0001_trigger_extraction.md diff --git a/docs/implplan/SPRINT_3700_0005_0001_witness_ui_cli.md b/docs/implplan/archived/SPRINT_3700_0005_0001_witness_ui_cli.md similarity index 100% rename from docs/implplan/SPRINT_3700_0005_0001_witness_ui_cli.md rename to docs/implplan/archived/SPRINT_3700_0005_0001_witness_ui_cli.md diff --git a/docs/implplan/SPRINT_3800_0001_0001_evidence_api_models.md b/docs/implplan/archived/SPRINT_3800_0001_0001_evidence_api_models.md similarity index 100% rename from docs/implplan/SPRINT_3800_0001_0001_evidence_api_models.md rename to docs/implplan/archived/SPRINT_3800_0001_0001_evidence_api_models.md diff --git a/docs/implplan/SPRINT_3800_0001_0002_score_explanation_service.md b/docs/implplan/archived/SPRINT_3800_0001_0002_score_explanation_service.md similarity index 100% rename from docs/implplan/SPRINT_3800_0001_0002_score_explanation_service.md rename to docs/implplan/archived/SPRINT_3800_0001_0002_score_explanation_service.md diff --git a/docs/implplan/SPRINT_3800_0002_0001_boundary_richgraph.md b/docs/implplan/archived/SPRINT_3800_0002_0001_boundary_richgraph.md similarity index 100% rename from docs/implplan/SPRINT_3800_0002_0001_boundary_richgraph.md rename to docs/implplan/archived/SPRINT_3800_0002_0001_boundary_richgraph.md diff --git a/docs/implplan/SPRINT_3801_0001_0001_policy_decision_attestation.md b/docs/implplan/archived/SPRINT_3801_0001_0001_policy_decision_attestation.md similarity index 100% rename from docs/implplan/SPRINT_3801_0001_0001_policy_decision_attestation.md rename to docs/implplan/archived/SPRINT_3801_0001_0001_policy_decision_attestation.md diff --git a/docs/implplan/SPRINT_3850_0001_0001_competitive_gap_closure.md b/docs/implplan/archived/SPRINT_3850_0001_0001_competitive_gap_closure.md similarity index 100% rename from docs/implplan/SPRINT_3850_0001_0001_competitive_gap_closure.md rename to docs/implplan/archived/SPRINT_3850_0001_0001_competitive_gap_closure.md diff --git a/docs/implplan/SPRINT_4100_0001_0001_triage_models.md b/docs/implplan/archived/SPRINT_4100_0001_0001_triage_models.md similarity index 100% rename from docs/implplan/SPRINT_4100_0001_0001_triage_models.md rename to docs/implplan/archived/SPRINT_4100_0001_0001_triage_models.md diff --git a/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #1.md b/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #1.md new file mode 100644 index 000000000..1c292cf39 --- /dev/null +++ b/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #1.md @@ -0,0 +1,366 @@ + +# A. Executive directive (send as-is to both PM + Dev) + +1. **A “Release” is not an SBOM or a scan report. A Release is a “Security State Snapshot.”** + + * A snapshot is a **versioned, content-addressed bundle** containing: + + * SBOM graph (canonical form, hashed) + * Reachability graph (canonical form, hashed) + * VEX claim set (canonical form, hashed) + * Policies + rule versions used (hashed) + * Data-feed identifiers used (hashed) + * Toolchain versions (hashed) + +2. **Diff is a product primitive, not a UI feature.** + + * “Diff” must exist as a stable API and artifact, not a one-off report. + * Every comparison produces a **Delta object** (machine-readable) and a **Delta Verdict attestation** (signed). + +3. **The CI/CD gate should never ask “how many CVEs?”** + + * It should ask: **“What materially changed in exploitable risk since the last approved baseline?”** + * The Delta Verdict must be deterministically reproducible given the same snapshots and policy. + +4. **Every Delta Verdict must be portable and auditable.** + + * It must be a signed attestation that can be stored with the build artifact (OCI attach) and replayed offline. + +--- + +# B. Product Management directions + +## B1) Define the product concept: “Security Delta as the unit of governance” + +**Position the capability as change-control for software risk**, not as “a scanner with comparisons.” + +### Primary user stories (MVP) + +1. **Release Manager / Security Engineer** + + * “Compare the candidate build to the last approved build and explain *what changed* in exploitable risk.” +2. **CI Pipeline Owner** + + * “Fail the build only for *new* reachable high-risk exposures (or policy-defined deltas), not for unchanged legacy issues.” +3. **Auditor / Compliance** + + * “Show a signed delta verdict with evidence references proving why this release passed.” + +### MVP “Delta Verdict” policy questions to support + +* Are there **new reachable vulnerabilities** introduced? +* Did any **previously unreachable vulnerability become reachable**? +* Are there **new affected VEX states** (e.g., NOT_AFFECTED → AFFECTED)? +* Are there **new Unknowns** above a threshold? +* Is the **net exploitable surface** increased beyond policy budget? + +## B2) Define the baseline selection rules (product-critical) + +Diff is meaningless without a baseline contract. Product must specify baseline selection as a first-class choice. + +Minimum baseline modes: + +* **Previous build in the same pipeline** +* **Last “approved” snapshot** (from an approval gate) +* **Last deployed in environment X** (optional later, but roadmap it) + +Acceptance criteria: + +* The delta object must always contain: + + * `baseline_snapshot_digest` + * `target_snapshot_digest` + * `baseline_selection_method` and identifiers + +## B3) Define the delta taxonomy (what your product “knows” how to talk about) + +Avoid “diffing findings lists.” You need consistent delta categories. + +Minimum taxonomy: + +1. **SBOM deltas** + + * Component added/removed + * Component version change + * Dependency edge change (graph-level) +2. **VEX deltas** + + * Claim added/removed + * Status change (e.g., under_investigation → fixed) + * Justification/evidence change (optional MVP) +3. **Reachability deltas** + + * New reachable vulnerable symbol(s) + * Removed reachability + * Entry point changes +4. **Decision deltas** + + * Policy outcome changed (PASS → FAIL) + * Explanation changed (drivers of decision) + +PM deliverable: + +* A one-page **Delta Taxonomy Spec** that becomes the canonical list used across API, UI, and attestations. + +## B4) Define what “signed delta verdict” means in product terms + +A delta verdict is not a PDF. + +It is: + +* A deterministic JSON payload +* Wrapped in a signature envelope (DSSE) +* Attached to the artifact (OCI attach) +* Includes pointers (hash references) to evidence graphs + +PM must define: + +* Where customers can view it (UI + CLI) +* Where it lives (artifact registry + Stella store) +* How it is consumed (policy gate, audit export) + +## B5) PM success metrics (must be measurable) + +* % of releases gated by delta verdict +* Mean time to explain “why failed” +* Reduction in “unchanged legacy vuln” false gating +* Reproducibility rate: same inputs → same verdict (target: 100%) + +--- + +# C. Development Management directions + +## C1) Architecture: treat Snapshot and Delta as immutable, content-addressed objects + +You need four core services/modules: + +1. **Canonicalization + Hashing** + + * Deterministic serialization (stable field ordering, normalized IDs) + * Content addressing: every graph and claim set gets a digest + +2. **Snapshot Store (Ledger)** + + * Store snapshots keyed by digest + * Store relationships: artifact → snapshot, snapshot → predecessor(s) + * Must support offline export/import later (design now) + +3. **Diff Engine** + + * Inputs: `baseline_snapshot_digest`, `target_snapshot_digest` + * Outputs: + + * `delta_object` (structured) + * `delta_summary` (human-friendly) + * Must be deterministic and testable with golden fixtures + +4. **Verdict Engine + Attestation Writer** + + * Evaluate policies against delta + * Produce `delta_verdict` + * Wrap as DSSE / in-toto-style statement (or your chosen predicate type) + * Sign and optionally attach to OCI artifact + +## C2) Data model (minimum viable schemas) + +### Snapshot (conceptual fields) + +* `snapshot_id` (digest) +* `artifact_ref` (e.g., image digest) +* `sbom_graph_digest` +* `reachability_graph_digest` +* `vex_claimset_digest` +* `policy_bundle_digest` +* `feed_snapshot_digest` +* `toolchain_digest` +* `created_at` + +### Delta object (conceptual fields) + +* `delta_id` (digest) +* `baseline_snapshot_digest` +* `target_snapshot_digest` +* `sbom_delta` (structured) +* `reachability_delta` (structured) +* `vex_delta` (structured) +* `unknowns_delta` (structured) +* `derived_risk_delta` (structured) +* `created_at` + +### Delta verdict attestation (must include) + +* Subjects: artifact digest(s) +* Baseline snapshot digest + Target snapshot digest +* Policy bundle digest +* Verdict enum: PASS/WARN/FAIL +* Drivers: references to delta nodes (hash pointers) +* Signature metadata + +## C3) Determinism requirements (non-negotiable) + +Development must implement: + +* **Canonical ID scheme** for components and graph nodes + (example: package URL + version + supplier + qualifiers, then hashed) +* Stable sorting for node/edge lists +* Stable normalization of timestamps (do not include wall-clock in hash inputs unless explicitly policy-relevant) +* A “replay test harness”: + + * Given the same inputs, byte-for-byte identical snapshot/delta/verdict + +Definition of Done: + +* Golden test vectors for snapshots and deltas checked into repo +* Deterministic hashing tests in CI + +## C4) Graph diff design (how to do it without drowning in noise) + +### SBOM graph diff (MVP) + +Implement: + +* Node set delta: added/removed/changed nodes (by stable node ID) +* Edge set delta: added/removed edges (dependency relations) +* A “noise suppressor” layer: + + * ignore ordering differences + * ignore metadata-only changes unless policy enables + +Output should identify: + +* “What changed?” (added/removed/upgraded/downgraded) +* “Why it matters?” (ties to vulnerability & reachability where available) + +### VEX claimset diff (MVP) + +Implement: + +* Keyed by `(product/artifact scope, component ID, vulnerability ID)` +* Delta types: + + * claim added/removed + * status changed + * justification changed (optional later) + +### Reachability diff (incremental approach) + +MVP can start narrow: + +* Support one or two ecosystems initially (e.g., Java + Maven, or Go modules) +* Represent reachability as: + + * `entrypoint → function/symbol → vulnerable symbol` +* Diff should highlight: + + * Newly reachable vulnerable symbols + * Removed reachability + +Important: even if reachability is initially partial, the diff model must support it cleanly (unknowns must exist). + +## C5) Policy evaluation must run on delta, not on raw findings + +Define a policy DSL contract like: + +* `fail_if new_reachable_critical > 0` +* `warn_if new_unknowns > 10` +* `fail_if vex_status_regressed == true` +* `pass_if no_net_increase_exploitable_surface == true` + +Engineering directive: + +* Policies must reference **delta fields**, not scanner-specific output. +* Keep the policy evaluation pure and deterministic. + +## C6) Signing and attachment (implementation-level) + +Minimum requirements: + +* Support signing delta verdict as a DSSE envelope with a stable predicate type. +* Support: + + * keyless signing (optional) + * customer-managed keys (enterprise) +* Attach to OCI artifact as an attestation (where possible), and store in Stella ledger for retrieval. + +Definition of Done: + +* A CI workflow can: + + 1. create snapshots + 2. compute delta + 3. produce signed delta verdict + 4. verify signature and gate + +--- + +# D. Roadmap (sequenced to deliver value early without painting into a corner) + +## Phase 1: “Snapshot + SBOM Diff + Delta Verdict” + +* Version SBOM graphs +* Diff SBOM graphs +* Produce delta verdict based on SBOM delta + vulnerability delta (even before reachability) +* Signed delta verdict artifact exists + +Output: + +* Baseline/target selection +* Delta taxonomy v1 +* Signed delta verdict v1 + +## Phase 2: “VEX claimsets and VEX deltas” + +* Ingest OpenVEX/CycloneDX/CSAF +* Store canonical claimsets per snapshot +* Diff claimsets and incorporate into delta verdict + +Output: + +* “VEX status regression” gating works deterministically + +## Phase 3: “Reachability graphs and reachability deltas” + +* Start with one ecosystem +* Generate reachability evidence +* Diff reachability and incorporate into verdict + +Output: + +* “new reachable critical” becomes the primary gate + +## Phase 4: “Offline replay bundle” + +* Export/import snapshot + feed snapshot + policy bundle +* Replay delta verdict identically in air-gapped environment + +--- + +# E. Acceptance criteria checklist (use this as a release gate for your own feature) + +A feature is not done until: + +1. **Snapshot is content-addressed** and immutable. +2. **Delta is content-addressed** and immutable. +3. Delta shows: + + * SBOM delta + * VEX delta (when enabled) + * Reachability delta (when enabled) + * Unknowns delta +4. **Delta verdict is signed** and verification is automated. +5. **Replay test**: given same baseline/target snapshots + policy bundle, verdict is identical byte-for-byte. +6. The product answers, clearly: + + * What changed? + * Why does it matter? + * Why is the verdict pass/fail? + * What evidence supports this? + +--- + +# F. What to tell your teams to avoid (common failure modes) + +* Do **not** ship “diff” as a UI compare of two scan outputs. +* Do **not** make reachability an unstructured “note” field; it must be a graph with stable IDs. +* Do **not** allow non-deterministic inputs into verdict hashes (timestamps, random IDs, nondeterministic ordering). +* Do **not** treat VEX as “ignore rules” only; treat it as a claimset with provenance and merge semantics (even if merge comes later). diff --git a/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #2.md b/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #2.md new file mode 100644 index 000000000..c2302bb31 --- /dev/null +++ b/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #2.md @@ -0,0 +1,234 @@ +## 1) Define the product primitive (non-negotiable) + +### Directive (shared) + +**The product’s primary output is not “findings.” It is a “Risk Verdict Attestation” (RVA).** +Everything else (SBOMs, CVEs, VEX, reachability, reports) is *supporting evidence* referenced by the RVA. + +### What “first-class artifact” means in practice + +1. **The verdict is an OCI artifact “referrer” attached to a specific image/artifact digest** via OCI 1.1 `subject` and discoverable via the referrers API. ([opencontainers.org][1]) +2. **The verdict is cryptographically signed** (at least one supported signing pathway). + + * DSSE is a standard approach for signing attestations, and cosign supports creating/verifying in‑toto attestations signed with DSSE. ([Sigstore][2]) + * Notation is a widely deployed approach for signing/verifying OCI artifacts in enterprise environments. ([Microsoft Learn][3]) + +--- + +## 2) Directions for Product Managers (PM) + +### A. Write the “Risk Verdict Attestation v1” product contract + +**Deliverable:** A one-page contract + schema that product and customers can treat as an API. + +Minimum fields the contract must standardize: + +* **Subject binding:** exact OCI digest, repo/name, platform (if applicable) +* **Verdict:** `PASS | FAIL | PASS_WITH_EXCEPTIONS | INDETERMINATE` +* **Policy reference:** policy ID, policy digest, policy version, enforcement mode +* **Knowledge snapshot reference:** snapshot ID + digest (see replay semantics below) +* **Evidence references:** digests/pointers for SBOM, VEX inputs, vuln feed snapshot, reachability proof(s), config snapshot, and unknowns summary +* **Reason codes:** stable machine-readable codes (`RISK.CVE.REACHABLE`, `RISK.VEX.NOT_AFFECTED`, `RISK.UNKNOWN.INPUT_MISSING`, etc.) +* **Human explanation stub:** short rationale text plus links/IDs for deeper evidence + +**Key PM rule:** the contract must be **stable and versioned**, with explicit deprecation rules. If you can’t maintain compatibility, ship a new version (v2), don’t silently mutate v1. + +Why: OCI referrers create long-lived metadata chains. Breaking them is a customer trust failure. + +### B. Define strict replay semantics as a product requirement (not “nice to have”) + +PM must specify what “same inputs” means. At minimum, inputs include: + +* artifact digest (subject) +* policy bundle digest +* vulnerability dataset snapshot digest(s) +* VEX bundle digest(s) +* SBOM digest(s) or SBOM generation recipe digest +* scoring rules version/digest +* engine version +* reachability configuration version/digest (if enabled) + +**Product acceptance criterion:** +When a user re-runs evaluation in “replay mode” using the same knowledge snapshot and policy digest, the **verdict and reason codes must match** (byte-for-byte identical predicate is ideal; if not, the deterministic portion must match exactly). + +OCI 1.1 and ORAS guidance also implies you should avoid shoving large evidence into annotations; store large evidence as blobs and reference by digest. ([opencontainers.org][1]) + +### C. Make “auditor evidence extraction” a first-order user journey + +Define the auditor journey as a separate persona: + +* Auditor wants: “Prove why you blocked/allowed artifact X at time Y.” +* They should be able to: + + 1. Verify the signature chain + 2. Extract the decision + evidence package + 3. Replay the evaluation + 4. Produce a human-readable report without bespoke consulting + +**PM feature requirements (v1)** + +* `explain` experience that outputs: + + * decision summary + * policy used + * evidence references and hashes + * top N reasons (with stable codes) + * unknowns and assumptions +* `export-audit-package` experience: + + * exports a ZIP (or OCI bundle) containing the RVA, its referenced evidence artifacts, and a machine-readable manifest listing all digests +* `verify` experience: + + * verifies signature + policy expectations (who is trusted to sign; which predicate type(s) are acceptable) + +Cosign explicitly supports creating/verifying in‑toto attestations (DSSE-signed) and even validating custom predicates against policy languages like Rego/CUE—this is a strong PM anchor for ecosystem interoperability. ([Sigstore][2]) + +--- + +## 3) Directions for Development Managers (Dev/Eng) + +### A. Implement OCI attachment correctly (artifact, referrer, fallback) + +**Engineering decisions:** + +1. Store RVA as an OCI artifact manifest with: + + * `artifactType` set to your verdict media type + * `subject` pointing to the exact image/artifact digest being evaluated + OCI 1.1 introduced these fields for associating metadata artifacts and retrieving them via the referrers API. ([opencontainers.org][1]) +2. Support discovery via: + + * Referrers API (`GET /v2//referrers/`) when registry supports it + * **Fallback “tagged index” strategy** for registries that don’t support referrers (OCI 1.1 guidance calls out a fallback tag approach and client responsibilities). ([opencontainers.org][1]) + +**Dev acceptance tests** + +* Push subject image → push RVA artifact with `subject` → query referrers → RVA appears. +* On a registry without referrers support: fallback retrieval still works. + +### B. Use a standard attestation envelope and signing flow + +For attestations, the lowest friction pathway is: + +* in‑toto Statement + DSSE envelope +* Sign/verify using cosign-compatible workflows (so customers can verify without you) ([Sigstore][2]) + +DSSE matters because it: + +* authenticates message + type +* avoids canonicalization pitfalls +* supports arbitrary encodings ([GitHub][4]) + +**Engineering rule:** the signed payload must include enough data to replay and audit (policy + knowledge snapshot digests), but avoid embedding huge evidence blobs directly. + +### C. Build determinism into the evaluation core (not bolted on) + +**“Same inputs → same verdict” is a software architecture constraint.** +It fails if any of these are non-deterministic: + +* fetching “latest” vulnerability DB at runtime +* unstable iteration order (maps/hashes) +* timestamps included as decision inputs +* concurrency races changing aggregation order +* floating point scoring without canonical rounding + +**Engineering requirements** + +1. Create a **Knowledge Snapshot** object (content-addressed): + + * a manifest listing every dataset input by digest and version +2. The evaluation function becomes: + + * `Verdict = Evaluate(subject_digest, policy_digest, knowledge_snapshot_digest, engine_version, options_digest)` +3. The RVA must embed those digests so replay is possible offline. + +**Dev acceptance tests** + +* Run Evaluate twice with same snapshot/policy → verdict + reason codes identical. +* Run Evaluate with one dataset changed (snapshot digest differs) → RVA must reflect changed snapshot digest. + +### D. Treat “evidence” as a graph of content-addressed artifacts + +Implement evidence storage with these rules: + +* Large evidence artifacts are stored as OCI blobs/artifacts (SBOM, VEX bundle, reachability proof graph, config snapshot). +* RVA references evidence by digest and type. +* “Explain” traverses this graph and renders: + + * a machine-readable explanation JSON + * a human-readable report + +ORAS guidance highlights artifact typing via `artifactType` in OCI 1.1 and suggests keeping manifests manageable; don’t overload annotations. ([oras.land][5]) + +### E. Provide a verification and policy enforcement path + +You want customers to be able to enforce “only run artifacts with an approved RVA predicate.” + +Two practical patterns: + +* **Cosign verification of attestations** (customers can do `verify-attestation` and validate predicate structure; cosign supports validating attestations with policy languages like Rego/CUE). ([Sigstore][2]) +* **Notation signatures** for organizations that standardize on Notary/Notation for OCI signing/verification workflows. ([Microsoft Learn][3]) + +Engineering should not hard-code one choice; implement an abstraction: + +* signing backend: `cosign/DSSE` first +* optional: notation signature over the RVA artifact for environments that require it + +--- + +## 4) Minimal “v1” spec by example (what your teams should build) + +### A. OCI artifact requirements (registry-facing) + +* artifact is discoverable as a referrer via `subject` linkage and `artifactType` classification (OCI 1.1). ([opencontainers.org][1]) + +### B. Attestation payload structure (contract-facing) + +In code terms (illustrative only), build on the in‑toto Statement model: + +```json +{ + "_type": "https://in-toto.io/Statement/v0.1", + "subject": [ + { + "name": "oci://registry.example.com/team/app", + "digest": { "sha256": "" } + } + ], + "predicateType": "https://stellaops.dev/attestations/risk-verdict/v1", + "predicate": { + "verdict": "FAIL", + "reasonCodes": ["RISK.CVE.REACHABLE", "RISK.POLICY.THRESHOLD_EXCEEDED"], + "policy": { "id": "prod-gate", "digest": "sha256:" }, + "knowledgeSnapshot": { "id": "ks-2025-12-19", "digest": "sha256:" }, + "evidence": { + "sbom": { "digest": "sha256:", "format": "cyclonedx-json" }, + "vexBundle": { "digest": "sha256:", "format": "openvex" }, + "vulnData": { "digest": "sha256:" }, + "reachability": { "digest": "sha256:" }, + "unknowns": { "count": 2, "digest": "sha256:" } + }, + "engine": { "name": "stella-eval", "version": "1.3.0" } + } +} +``` + +Cosign supports creating and verifying in‑toto attestations (DSSE-signed), which is exactly the interoperability you want for customer-side verification. ([Sigstore][2]) + +--- + +## 5) Definition of Done (use this to align PM/Eng and prevent scope drift) + +### v1 must satisfy all of the following: + +1. **OCI-attached:** RVA is stored as an OCI artifact referrer to the subject digest and discoverable (referrers API + fallback mode). ([opencontainers.org][1]) +2. **Signed:** RVA can be verified by a standard toolchain (cosign at minimum). ([Sigstore][2]) +3. **Replayable:** Given the embedded policy + knowledge snapshot digests, the evaluation can be replayed and produces the same verdict + reason codes. +4. **Auditor extractable:** One command produces an audit package containing: + + * RVA attestation + * policy bundle + * knowledge snapshot manifest + * referenced evidence artifacts + * an “explanation report” rendering the decision +5. **Stable contract:** predicate schema is versioned and validated (strict JSON schema checks; backwards compatibility rules). diff --git a/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #3.md b/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #3.md new file mode 100644 index 000000000..f820deb34 --- /dev/null +++ b/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #3.md @@ -0,0 +1,463 @@ +## Outcome you are shipping + +A deterministic “claim resolution” capability that takes: + +* Multiple **claims** about the same vulnerability (vendor VEX, distro VEX, internal assessments, scanner inferences), +* A **policy** describing trust and merge semantics, +* A set of **evidence artifacts** (SBOM, config snapshots, reachability proofs, etc.), + +…and produces a **single resolved status** per vulnerability/component/artifact **with an explainable trail**: + +* Which claims applied and why +* Which were rejected and why +* What evidence was required and whether it was satisfied +* What policy rules triggered the resolution outcome + +This replaces naive precedence like `vendor > distro > internal`. + +--- + +# Directions for Product Managers + +## 1) Write the PRD around “claims resolution,” not “VEX support” + +The customer outcome is not “we ingest VEX.” It is: + +* “We can *safely* accept ‘not affected’ without hiding risk.” +* “We can prove, to auditors and change control, why a CVE was downgraded.” +* “We can consistently resolve conflicts between issuer statements.” + +### Non-negotiable product properties + +* **Deterministic**: same inputs → same resolved outcome +* **Explainable**: a human can trace the decision path +* **Guardrailed**: a “safe” resolution requires evidence, not just a statement + +--- + +## 2) Define the core objects (these drive everything) + +In the PRD, define these three objects explicitly: + +### A) Claim (normalized) + +A “claim” is any statement about vulnerability applicability to an artifact/component, regardless of source format. + +Minimum fields: + +* `vuln_id` (CVE/GHSA/etc.) +* `subject` (component identity; ideally package + version + digest/purl) +* `target` (the thing we’re evaluating: image, repo build, runtime instance) +* `status` (affected / not_affected / fixed / under_investigation / unknown) +* `justification` (human/machine reason) +* `issuer` (who said it; plus verification state) +* `scope` (what it applies to; versions, ranges, products) +* `timestamp` (when produced) +* `references` (links/IDs to evidence or external material) + +### B) Evidence + +A typed artifact that can satisfy a requirement. + +Examples (not exhaustive): + +* `config_snapshot` (e.g., Helm values, env var map, feature flag export) +* `sbom_presence_or_absence` (SBOM proof that component is/ isn’t present) +* `reachability_proof` (call-path evidence from entrypoint to vulnerable symbol) +* `symbol_absence` (binary inspection shows symbol/function not present) +* `patch_presence` (artifact includes backport / fixed build) +* `manual_attestation` (human-reviewed attestation with reviewer identity + scope) + +Each evidence item must have: + +* `type` +* `collector` (tool/provider) +* `inputs_hash` and `output_hash` +* `scope` (what artifact/environment it applies to) +* `confidence` (optional but recommended) +* `expires_at` / `valid_for` (for config/runtime evidence) + +### C) Policy + +A policy describes: + +* **Trust rules** (how much to trust whom, under which conditions) +* **Merge semantics** (how to resolve conflicts) +* **Evidence requirements** (what must be present to accept certain claims) + +--- + +## 3) Ship “policy-controlled merge semantics” as a configuration schema first + +Do not start with a fully general policy language. You need a small, explicit schema that makes behavior predictable. + +PM deliverable: a policy spec with these sections: + +1. **Issuer trust** + + * weights by issuer category (vendor/distro/internal/scanner) + * optional constraints (must be signed, must match product ownership, must be within time window) +2. **Applicability rules** + + * what constitutes a match to artifact/component (range semantics, digest match priority) +3. **Evidence requirements** + + * per status + per justification: what evidence types are required +4. **Conflict resolution strategy** + + * conservative vs weighted vs most-specific + * explicit guardrails (never accept “safe” without evidence) +5. **Override rules** + + * when internal can override vendor (and what evidence is required to do so) + * environment-specific policies (prod vs dev) + +--- + +## 4) Make “evidence hooks” a first-class user workflow + +You are explicitly shipping the ability to say: + +> “This is not affected **because** feature flag X is off.” + +That requires: + +* a way to **provide or discover** feature flag state, and +* a way to **bind** that flag to the vulnerable surface + +PM must specify: what does the user do to assert that? + +Minimum viable workflow: + +* User attaches a `config_snapshot` (or system captures it) +* User provides a “binding” to the vulnerable module/function: + + * either automatic (later) or manual (first release) + * e.g., `flag X gates module Y` with references (file path, code reference, runbook) + +This “binding” itself becomes evidence. + +--- + +## 5) Define acceptance criteria as decision trace tests + +PM should write acceptance criteria as “given claims + policy + evidence → resolved outcome + trace”. + +You need at least these canonical tests: + +1. **Distro backport vs vendor version logic conflict** + + * Vendor says affected (by version range) + * Distro says fixed (backport) + * Policy says: in distro context, distro claim can override vendor if patch evidence exists + * Outcome: fixed, with trace proving why + +2. **Internal ‘feature flag off’ downgrade** + + * Vendor says affected + * Internal says not_affected because flag off + * Evidence: config snapshot + flag→module binding + * Outcome: not_affected **only for that environment context**, with trace + +3. **Evidence missing** + + * Internal says not_affected because “code not reachable” + * No reachability evidence present + * Outcome: unknown or affected (policy-dependent), but **not “not_affected”** + +4. **Conflicting “safe” claims** + + * Vendor says not_affected (reason A) + * Internal says affected (reason B) with strong evidence + * Outcome follows merge strategy, and trace must show why. + +--- + +## 6) Package it as an “Explainable Resolution” feature + +UI/UX requirements PM must specify: + +* A “Resolved Status” view per vuln/component showing: + + * contributing claims (ranked) + * rejected claims (with reason) + * evidence required vs evidence present + * the policy clauses triggered (line-level references) +* A policy editor can be CLI/JSON first; UI later, but explainability cannot wait. + +--- + +# Directions for Development Managers + +## 1) Implement as three services/modules with strict interfaces + +### Module A: Claim Normalization + +* Inputs: OpenVEX / CycloneDX VEX / CSAF / internal annotations / scanner hints +* Output: canonical `Claim` objects + +Rules: + +* Canonicalize IDs (normalize CVE formats, normalize package coordinates) +* Preserve provenance: issuer identity, signature metadata, timestamps, original document hash + +### Module B: Evidence Providers (plugin boundary) + +* Provide an interface like: + +``` +evaluate_evidence(context, claim) -> EvidenceEvaluation +``` + +Where `EvidenceEvaluation` returns: + +* required evidence types for this claim (from policy) +* found evidence items (from store/providers) +* satisfied / not satisfied +* explanation strings +* confidence + +Start with 3 providers: + +1. SBOM provider (presence/absence) +2. Config provider (feature flags/config snapshot ingestion) +3. Reachability provider (even if initially limited or stubbed, it must exist as a typed hook) + +### Module C: Merge & Resolution Engine + +* Inputs: set of claims + policy + evidence evaluations + context +* Output: `ResolvedDecision` + +A `ResolvedDecision` must include: + +* final status +* selected “winning” claim(s) +* all considered claims +* evidence satisfaction summary +* applied policy rule IDs +* deterministic ordering keys/hashes + +--- + +## 2) Define the evaluation context (this avoids foot-guns) + +The resolved outcome must be context-aware. + +Create an immutable `EvaluationContext` object, containing: + +* artifact identity (image digest / build digest / SBOM hash) +* environment identity (prod/stage/dev; cluster; region) +* config snapshot ID +* time (evaluation timestamp) +* policy version hash + +This is how you support: “not affected because feature flag off” in prod but not in dev. + +--- + +## 3) Merge semantics: implement scoring + guardrails, not precedence + +You need a deterministic function. One workable approach: + +### Step 1: compute statement strength + +For each claim: + +* `trust_weight` from policy (issuer + scope + signature requirements) +* `evidence_factor` (1.0 if requirements satisfied; <1 or 0 if not) +* `specificity_factor` (exact digest match > exact version > range) +* `freshness_factor` (optional; policy-defined) +* `applicability` must be true or claim is excluded + +Compute: + +``` +support = trust_weight * evidence_factor * specificity_factor * freshness_factor +``` + +### Step 2: apply merge strategy (policy-controlled) + +Ship at least two strategies: + +1. **Conservative default** + + * If any “unsafe” claim (affected/under_investigation) has support above threshold, it wins + * A “safe” claim (not_affected/fixed) can override only if: + + * it has equal/higher support + delta, AND + * its evidence requirements are satisfied + +2. **Evidence-weighted** + + * Highest support wins, but safe statuses have a hard evidence gate + +### Step 3: apply guardrails + +Hard guardrail to prevent bad outcomes: + +* **Never emit a safe status unless evidence requirements for that safe claim are satisfied.** +* If a safe claim lacks evidence, downgrade the safe claim to “unsupported” and do not allow it to win. + +This single rule is what makes your system materially different from “VEX as suppression.” + +--- + +## 4) Evidence hooks: treat them as typed contracts, not strings + +For “feature flag off,” implement it as a structured evidence requirement. + +Example evidence requirement for a “safe because feature flag off” claim: + +* Required evidence types: + + * `config_snapshot` + * `flag_binding` (the mapping “flag X gates vulnerable surface Y”) + +Implementation: + +* Config provider can parse: + + * Helm values / env var sets / feature flag exports + * Store them as normalized key/value with hashes +* Binding evidence can start as manual JSON that references: + + * repo path / module / function group + * a link to code ownership / runbook + * optional test evidence + +Later you can automate binding via static analysis, but do not block shipping on that. + +--- + +## 5) Determinism requirements (engineering non-negotiables) + +Development manager should enforce: + +* stable sorting of claims by canonical key +* stable tie-breakers (e.g., issuer ID, timestamp, claim hash) +* no nondeterministic external calls during evaluation (or they must be snapshot-based) +* every evaluation produces: + + * `input_bundle_hash` (claims + evidence + policy + context) + * `decision_hash` + +This is the foundation for replayability and audits. + +--- + +## 6) Storage model: store raw inputs and canonical forms + +Minimum stores: + +* Raw documents (original VEX/CSAF/etc.) keyed by content hash +* Canonical claims keyed by claim hash +* Evidence items keyed by evidence hash and scoped by context +* Policy versions keyed by policy hash +* Resolutions keyed by (context, vuln_id, subject) with decision hash + +--- + +## 7) “Definition of done” checklist for engineering + +You are done when: + +1. You can ingest at least two formats into canonical claims (pick OpenVEX + CycloneDX VEX first). +2. You can configure issuer trust and evidence requirements in a policy file. +3. You can resolve conflicts deterministically. +4. You can attach a config snapshot and produce: + + * `not_affected because feature flag off` **only when evidence satisfied** +5. The system produces a decision trace with: + + * applied policy rules + * evidence satisfaction + * selected/rejected claims and reasons +6. Golden test vectors exist for the acceptance scenarios listed above. + +--- + +# A concrete example policy (schema-first, no full DSL required) + +```yaml +version: 1 + +trust: + issuers: + - match: {category: vendor} + weight: 70 + require_signature: true + - match: {category: distro} + weight: 75 + require_signature: true + - match: {category: internal} + weight: 85 + require_signature: false + - match: {category: scanner} + weight: 40 + +evidence_requirements: + safe_status_requires_evidence: true + + rules: + - when: + status: not_affected + reason: feature_flag_off + require: [config_snapshot, flag_binding] + + - when: + status: not_affected + reason: component_not_present + require: [sbom_absence] + + - when: + status: not_affected + reason: not_reachable + require: [reachability_proof] + +merge: + strategy: conservative + unsafe_wins_threshold: 50 + safe_override_delta: 10 +``` + +--- + +# A concrete example output trace (what auditors and engineers must see) + +```json +{ + "vuln_id": "CVE-XXXX-YYYY", + "subject": "pkg:maven/org.example/foo@1.2.3", + "context": { + "artifact_digest": "sha256:...", + "environment": "prod", + "policy_hash": "sha256:..." + }, + "resolved_status": "not_affected", + "because": [ + { + "winning_claim": "claim_hash_abc", + "reason": "feature_flag_off", + "evidence_required": ["config_snapshot", "flag_binding"], + "evidence_present": ["ev_hash_1", "ev_hash_2"], + "policy_rules_applied": ["trust.issuers[internal]", "evidence.rules[0]", "merge.safe_override_delta"] + } + ], + "claims_considered": [ + {"issuer": "vendor", "status": "affected", "support": 62, "accepted": false, "rejection_reason": "overridden_by_higher_support_safe_claim_with_satisfied_evidence"}, + {"issuer": "internal", "status": "not_affected", "support": 78, "accepted": true, "evidence_satisfied": true} + ], + "decision_hash": "sha256:..." +} +``` + +--- + +## The two strategic pitfalls to explicitly avoid + +1. **“Trust precedence” as the merge mechanism** + + * It will fail immediately on backports, forks, downstream patches, and environment-specific mitigations. +2. **Allowing “safe” without evidence** + + * That turns VEX into a suppression system and will collapse trust in the product. diff --git a/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #4.md b/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #4.md new file mode 100644 index 000000000..2860e998d --- /dev/null +++ b/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #4.md @@ -0,0 +1,338 @@ +## Executive directive + +Build **Reachability as Evidence**, not as a UI feature. + +Every reachability conclusion must produce a **portable, signed, replayable evidence bundle** that answers: + +1. **What vulnerable code unit is being discussed?** (symbol/method/function + version) +2. **What entrypoint is assumed?** (HTTP handler, RPC method, CLI, scheduled job, etc.) +3. **What is the witness?** (a call-path subgraph, not a screenshot) +4. **What assumptions/gates apply?** (config flags, feature toggles, runtime wiring) +5. **Can a third party reproduce it?** (same inputs → same evidence hash) + +This must work for **source** and **post-build artifacts**. + +--- + +# Directions for Product Managers + +## 1) Define the product contract in one page + +### Capability name +**Proof‑carrying reachability**. + +### Contract +Given an artifact (source or built) and a vulnerability mapping, Stella Ops outputs: + +- **Reachability verdict:** `REACHABLE | NOT_PROVEN_REACHABLE | INCONCLUSIVE` +- **Witness evidence:** a minimal **reachability subgraph** + one or more witness paths +- **Reproducibility bundle:** all inputs and toolchain metadata needed to replay +- **Attestation:** signed statement tied to the artifact digest + +### Important language choice +Avoid claiming “unreachable” unless you can prove non-reachability under a formally sound model. + +- Use **NOT_PROVEN_REACHABLE** for “no path found under current analysis + assumptions.” +- Use **INCONCLUSIVE** when analysis cannot be performed reliably (missing symbols, obfuscation, unsupported language, dynamic dispatch uncertainty, etc.). + +This is essential for credibility and audit use. + +--- + +## 2) Anchor personas and top workflows + +### Primary personas +- Security governance / AppSec: wants fewer false positives and defensible prioritization. +- Compliance/audit: wants evidence and replayability. +- Engineering teams: wants specific call paths and what to change. + +### Top workflows (must support in MVP) +1. **CI gate with signed verdict** + - “Block release if any `REACHABLE` high severity is present OR if `INCONCLUSIVE` exceeds threshold.” +2. **Audit replay** + - “Reproduce the reachability proof for artifact digest X using snapshot Y.” +3. **Release delta** + - “Show what reachability changed between release A and B.” + +--- + +## 3) Minimum viable scope: pick targets that make “post-build” real early + +To satisfy “source and post-build artifacts” without biting off ELF-level complexity first: + +### MVP artifact types (recommended) +- **Source repository** for 1–2 languages with mature static IR +- **Post-build intermediate artifacts** that retain symbol structure: + - Java `.jar/.class` + - .NET assemblies + - Python wheels (bytecode) + - Node bundles with sourcemaps (optional) + +These give you “post-build” support where call graphs are tractable. + +### Defer for later phases +- Native ELF/Mach-O deep reachability (harder due to stripping, inlining, indirect calls, dynamic loading) +- Highly dynamic languages without strong type info, unless you accept “witness-only” semantics + +Your differentiator is proof portability and determinism, not “supports every binary on day one.” + +--- + +## 4) Product requirements: what “proof-carrying” means in requirements language + +### Functional requirements +- Output must include a **reachability subgraph**: + - Nodes = code units (function/method) with stable IDs + - Edges = call or dispatch edges with type annotations + - Must include at least one **witness path** from entrypoint to vulnerable node when `REACHABLE` +- Output must be **artifact-tied**: + - Evidence must reference artifact digest(s) (source commit, build artifact digest, container image digest) +- Output must be **attestable**: + - Produce a signed attestation (DSSE/in-toto style) attached to the artifact digest +- Output must be **replayable**: + - Provide a “replay recipe” (analyzer versions, configs, vulnerability mapping version, and input digests) + +### Non-functional requirements +- Deterministic: repeated runs on same inputs produce identical evidence hash +- Size-bounded: subgraph evidence must be bounded (e.g., path-based extraction + limited context) +- Privacy-controllable: + - Support a mode that avoids embedding raw source content (store pointers/hashes instead) +- Verifiable offline: + - Verification and replay must work air-gapped given the snapshot bundle + +--- + +## 5) Acceptance criteria (use as Definition of Done) + +A feature is “done” only when: + +1. **Verifier can validate** the attestation signature and confirm the evidence hash matches content. +2. A second machine can **reproduce the same evidence hash** given the replay bundle. +3. Evidence includes at least one witness path for `REACHABLE`. +4. Evidence includes explicit assumptions/gates; absence of gating is recorded as an assumption (e.g., “config unknown”). +5. Evidence is **linked to the precise artifact digest** being deployed/scanned. + +--- + +## 6) Product packaging decisions that create switching cost + +These are product decisions that turn engineering into moat: + +- **Make “reachability proof” an exportable object**, not just a UI view. +- Provide an API: `GET /findings/{id}/proof` returning canonical evidence. +- Support policy gates on: + - `verdict` + - `confidence` + - `assumption_count` + - `inconclusive_reasons` +- Make “proof replay” a one-command workflow in CLI. + +--- + +# Directions for Development Managers + +## 1) Architecture: build a “proof pipeline” with strict boundaries + +Implement as composable modules with stable interfaces: + +1. **Artifact Resolver** + - Inputs: repo URL/commit, build artifact path, container image digest + - Output: normalized “artifact record” with digests and metadata + +2. **Graph Builder (language-specific adapters)** + - Inputs: artifact record + - Output: canonical **Program Graph** + - Nodes: code units + - Edges: calls/dispatch + - Optional: config gates, dependency edges + +3. **Vulnerability-to-Code Mapper** + - Inputs: vulnerability record (CVE), package coordinates, symbol metadata (if available) + - Output: vulnerable node set + mapping confidence + +4. **Entrypoint Modeler** + - Inputs: artifact + runtime context (framework detection, routing tables, main methods) + - Output: entrypoint node set with types (HTTP, RPC, CLI, cron) + +5. **Reachability Engine** + - Inputs: graph + entrypoints + vulnerable nodes + constraints + - Output: witness paths + minimal subgraph extraction + +6. **Evidence Canonicalizer** + - Inputs: witness paths + subgraph + metadata + - Output: canonical JSON (stable ordering, stable IDs), plus content hash + +7. **Attestor** + - Inputs: evidence hash + artifact digest + - Output: signed attestation object (OCI attachable) + +8. **Verifier (separate component)** + - Must validate signatures + evidence integrity independently of generator + +Critical: generator and verifier must be decoupled to preserve trust. + +--- + +## 2) Evidence model: what to store (and how to keep it stable) + +### Node identity must be stable across runs +Define a canonical NodeID scheme: + +- Source node ID: + - `{language}:{repo_digest}:{symbol_signature}:{optional_source_location_hash}` +- Post-build node ID: + - `{language}:{artifact_digest}:{symbol_signature}:{optional_offset_or_token}` + +Avoid raw file paths or non-deterministic compiler offsets as primary IDs unless normalized. + +### Edge identity +`{caller_node_id} -> {callee_node_id} : {edge_type}` +Edge types matter (direct call, virtual dispatch, reflection, dynamic import, etc.) + +### Subgraph extraction rule +Store: +- All nodes/edges on at least one witness path (or k witness paths) +- Plus bounded context: + - 1–2 hop neighborhood around the vulnerable node and entrypoint + - routing edges (HTTP route → handler) where applicable + +This makes the proof compact and audit-friendly. + +### Canonicalization requirements +- Stable sorting of nodes and edges +- Canonical JSON serialization (no map-order nondeterminism) +- Explicit analyzer version + config included in evidence +- Hash everything that influences results + +--- + +## 3) Determinism and reproducibility: engineering guardrails + +### Deterministic computation +- Avoid parallel graph traversal that yields nondeterministic order without canonical sorting +- If using concurrency, collect results and sort deterministically before emitting + +### Repro bundle (“time travel”) +Persist, as digests: +- Analyzer container/image digest +- Analyzer config hash +- Vulnerability mapping dataset version hash +- Artifact digest(s) +- Graph builder version hash + +A replay must be possible without “calling home.” + +### Golden tests +Create fixtures where: +- Same input graph + mapping → exact evidence hash +- Regression test for canonicalization changes (version the schema intentionally) + +--- + +## 4) Attestation format and verification + +### Attestation contents (minimum) +- Subject: artifact digest (image digest / build artifact digest) +- Predicate: reachability evidence hash + metadata +- Predicate type: `reachability` (custom) with versioning + +### Verification requirements +- Verification must run offline +- It must validate: + 1) signature + 2) subject digest binding + 3) evidence hash matches serialized evidence + +### Storage model +Use content-addressable storage keyed by evidence hash. +Attestation references the hash; evidence stored separately or embedded (size tradeoff). + +--- + +## 5) Source + post-build support: engineering plan + +### Unifying principle +Both sources produce the same canonical Program Graph abstraction. + +#### Source analyzers produce: +- Function/method nodes using language signatures +- Edges from static analysis IR + +#### Post-build analyzers produce: +- Nodes from bytecode/assembly symbol tables (where available) +- Edges from bytecode call instructions / metadata + +### Practical sequencing (recommended) +1. Implement one source language adapter (fastest to prove model) +2. Implement one post-build adapter where symbols are rich (e.g., Java bytecode) +3. Ensure evidence schema and attestation workflow works identically for both +4. Expand to more ecosystems once the proof pipeline is stable + +--- + +## 6) Operational constraints (performance, size, security) + +### Performance +- Cache program graphs per artifact digest +- Cache vulnerability-to-code mapping per package/version +- Compute reachability on-demand per vulnerability, but reuse graphs + +### Evidence size +- Limit witness paths (e.g., up to N shortest paths) +- Prefer “witness + bounded neighborhood” over exporting full call graph + +### Security and privacy +- Provide a “redacted proof mode” + - include symbol hashes instead of raw names if needed + - store source locations as hashes/pointers +- Never embed raw source code unless explicitly enabled + +--- + +## 7) Definition of Done for the engineering team + +A milestone is complete when you can demonstrate: + +1. Generate a reachability proof for a known vulnerable code unit with a witness path. +2. Serialize a canonical evidence subgraph and compute a stable hash. +3. Sign the attestation bound to the artifact digest. +4. Verify the attestation on a clean machine (offline). +5. Replay the analysis from the replay bundle and reproduce the same evidence hash. + +--- + +# Concrete artifact example (for alignment) + +A reachability evidence object should look structurally like: + +- `subject`: artifact digest(s) +- `claim`: + - `verdict`: REACHABLE / NOT_PROVEN_REACHABLE / INCONCLUSIVE + - `entrypoints`: list of NodeIDs + - `vulnerable_nodes`: list of NodeIDs + - `witness_paths`: list of paths (each path = ordered NodeIDs) +- `subgraph`: + - `nodes`: list with stable IDs + metadata + - `edges`: list with stable ordering + edge types +- `assumptions`: + - gating conditions, unresolved dynamic dispatch notes, etc. +- `tooling`: + - analyzer name/version/digest + - config hash + - mapping dataset hash +- `hashes`: + - evidence content hash + - schema version + +Then wrap and sign it as an attestation tied to the artifact digest. + +--- + +## The one decision you should force early + +Decide (and document) whether your semantics are: + +- **Witness-based** (“REACHABLE only if we can produce a witness path”), and +- **Conservative on negative claims** (“NOT_PROVEN_REACHABLE” is not “unreachable”). + +This single decision will keep the system honest, reduce legal/audit risk, and prevent the product from drifting into hand-wavy “trust us” scoring. diff --git a/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #5.md b/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #5.md new file mode 100644 index 000000000..597d0fc53 --- /dev/null +++ b/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #5.md @@ -0,0 +1,268 @@ +## 1) Product direction: make “Unknowns” a first-class risk primitive + +### Non‑negotiable product principles + +1. **Unknowns are not suppressed findings** + + * They are a distinct state with distinct governance. +2. **Unknowns must be policy-addressable** + + * If policy cannot block or allow them explicitly, the feature is incomplete. +3. **Unknowns must be attested** + + * Every signed decision must carry “what we don’t know” in a machine-readable way. +4. **Unknowns must be default-on** + + * Users may adjust thresholds, but they must not be able to “turn off unknown tracking.” + +### Definition: what counts as an “unknown” + +PMs must ensure that “unknown” is not vague. Define **reason-coded unknowns**, for example: + +* **U-RCH**: Reachability unknown (call path indeterminate) +* **U-ID**: Component identity unknown (ambiguous package / missing digest / unresolved PURL) +* **U-PROV**: Provenance unknown (cannot map binary → source/build) +* **U-VEX**: VEX conflict or missing applicability statement +* **U-FEED**: Knowledge source missing (offline feed gaps, mirror stale) +* **U-CONFIG**: Config/runtime gate unknown (feature flag not observable) +* **U-ANALYZER**: Analyzer limitation (language/framework unsupported) + +Each unknown must have: + +* `reason_code` (one of a stable enum) +* `scope` (component, binary, symbol, package, image, repo) +* `evidence_refs` (what we inspected) +* `assumptions` (what would need to be true/false) +* `remediation_hint` (how to reduce unknown) + +**Acceptance criterion:** every unknown surfaced to users can be traced to a reason code and remediation hint. + +--- + +## 2) Policy direction: “unknown budgets” must be enforceable and environment-aware + +### Policy model requirements + +Policy must support: + +* Thresholds by environment (dev/test/stage/prod) +* Thresholds by unknown type (reachability vs provenance vs feed, etc.) +* Severity weighting (e.g., unknown on internet-facing service is worse) +* Exception workflow (time-bound, owner-bound) +* Deterministic evaluation (same inputs → same result) + +### Recommended default policy posture (ship as opinionated defaults) + +These defaults are intentionally strict in prod: + +**Prod (default)** + +* `unknown_reachable == 0` (fail build/deploy) +* `unknown_provenance == 0` (fail) +* `unknown_total <= 3` (fail if exceeded) +* `unknown_feed == 0` (fail; “we didn’t have data” is unacceptable for prod) + +**Stage** + +* `unknown_reachable <= 1` +* `unknown_provenance <= 1` +* `unknown_total <= 10` + +**Dev** + +* Never hard fail by default; warn + ticket/PR annotation +* Still compute unknowns and show trendlines (so teams see drift) + +### Exception policy (required to avoid “disable unknowns” pressure) + +Implement **explicit exceptions** rather than toggles: + +* Exception must include: `owner`, `expiry`, `justification`, `scope`, `risk_ack` +* Exception must be emitted into attestations and reports (“this passed with exception X”). + +**Acceptance criterion:** there is no “turn off unknowns” knob; only thresholds and expiring exceptions. + +--- + +## 3) Reporting direction: unknowns must be visible, triaged, and trendable + +### Required reporting surfaces + +1. **Release / PR report** + + * Unknown summary at top: + + * total unknowns + * unknowns by reason code + * unknowns blocking policy vs not + * “What changed?” vs previous baseline (unknown delta) +2. **Dashboard (portfolio view)** + + * Unknowns over time + * Top teams/services by unknown count + * Top unknown causes (reason codes) +3. **Operational triage view** + + * “Unknown queue” sortable by: + + * environment impact (prod/stage) + * exposure class (internet-facing/internal) + * reason code + * last-seen time + * owner + +### Reporting should drive action, not anxiety + +Every unknown row must include: + +* Why it’s unknown (reason code + short explanation) +* What evidence is missing +* How to reduce unknown (concrete steps) +* Expected effect (e.g., “adding debug symbols will likely reduce U-RCH by ~X”) + +**Key PM instruction:** treat unknowns like an **SLO**. Teams should be able to commit to “unknowns in prod must trend to zero.” + +--- + +## 4) Attestations direction: unknowns must be cryptographically bound to decisions + +Every signed decision/attestation must include an “unknowns summary” section. + +### Attestation requirements + +Include at minimum: + +* `unknown_total` +* `unknown_by_reason_code` (map of reason→count) +* `unknown_blocking_count` +* `unknown_details_digest` (hash of the full list if too large) +* `policy_thresholds_applied` (the exact thresholds used) +* `exceptions_applied` (IDs + expiries) +* `knowledge_snapshot_id` (feeds/policy bundle hash if you support offline snapshots) + +**Why this matters:** if you sign a “pass,” you must also sign what you *didn’t know* at the time. Otherwise the signature is not audit-grade. + +**Acceptance criterion:** any downstream verifier can reject a signed “pass” based solely on unknown fields (e.g., “reject if unknown_reachable>0 in prod”). + +--- + +## 5) Development direction: implement unknown propagation as a first-class data flow + +### Core engineering tasks (must be done in this order) + +#### A. Define the canonical “Tri-state” evaluation type + +For any security claim, the evaluator must return: + +* `TRUE` (evidence supports) +* `FALSE` (evidence refutes) +* `UNKNOWN` (insufficient evidence) + +Do not represent unknown as nulls or missing fields. It must be explicit. + +#### B. Build the unknown aggregator and reason-code framework + +* A single aggregation layer computes: + + * unknown counts per scope + * unknown counts per reason code + * unknown “blockers” based on policy +* This must be deterministic and stable (no random ordering, stable IDs). + +#### C. Ensure analyzers emit unknowns instead of silently failing + +Any analyzer that cannot conclude must emit: + +* `UNKNOWN` + reason code + evidence pointers + Examples: +* call graph incomplete → `U-RCH` +* stripped binary cannot map symbols → `U-PROV` +* unsupported language → `U-ANALYZER` + +#### D. Provide “reduce unknown” instrumentation hooks + +Attach remediation metadata: + +* “add build flags …” +* “upload debug symbols …” +* “enable source mapping …” +* “mirror feeds …” + +This is how you prevent user backlash. + +--- + +## 6) Make it default rather than optional: rollout plan without breaking adoption + +### Phase 1: compute + display (no blocking) + +* Unknowns computed for all scans +* Reports show unknown budgets and what would have failed in prod +* Collect baseline metrics for 2–4 weeks of typical usage + +### Phase 2: soft gating + +* In prod-like pipelines: fail only on `unknown_reachable > 0` +* Everything else warns + requires owner acknowledgement + +### Phase 3: full policy enforcement + +* Enforce default thresholds +* Exceptions require expiry and are visible in attestations + +### Phase 4: governance integration + +* Unknowns become part of: + + * release readiness checks + * quarterly risk reviews + * vendor compliance audits + +**Dev Manager instruction:** invest in tooling that reduces unknowns early (symbol capture, provenance mapping, better analyzers). Otherwise “unknown gating” becomes politically unsustainable. + +--- + +## 7) “Definition of Done” checklist for PMs and Dev Managers + +### PM DoD + +* [ ] Unknowns are explicitly defined with stable reason codes +* [ ] Policy can fail on unknowns with environment-scoped thresholds +* [ ] Reports show unknown deltas and remediation guidance +* [ ] Exceptions are time-bound and appear everywhere (UI + API + attestations) +* [ ] Unknowns cannot be disabled; only thresholds/exceptions are configurable + +### Engineering DoD + +* [ ] Tri-state evaluation implemented end-to-end +* [ ] Analyzer failures never disappear; they become unknowns +* [ ] Unknown aggregation is deterministic and reproducible +* [ ] Signed attestation includes unknown summary + policy thresholds + exceptions +* [ ] CI/CD integration can enforce “fail if unknowns > N in prod” + +--- + +## 8) Concrete policy examples you can standardize internally + +### Minimal policy (prod) + +* Block deploy if: + + * `unknown_reachable > 0` + * OR `unknown_provenance > 0` + +### Balanced policy (prod) + +* Block deploy if: + + * `unknown_reachable > 0` + * OR `unknown_provenance > 0` + * OR `unknown_total > 3` + +### Risk-sensitive policy (internet-facing prod) + +* Block deploy if: + + * `unknown_reachable > 0` + * OR `unknown_total > 1` + * OR any unknown affects a component with known remotely-exploitable CVEs diff --git a/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #6.md b/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #6.md new file mode 100644 index 000000000..c2f648b5b --- /dev/null +++ b/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #6.md @@ -0,0 +1,299 @@ +## 1) Anchor the differentiator in one sentence everyone repeats + +**Positioning invariant:** +Stella Ops does not “consume VEX to suppress findings.” Stella Ops **verifies who made the claim, scores how much to trust it, deterministically applies it to a decision, and emits a signed, replayable verdict**. + +Everything you ship should make that sentence more true. + +--- + +## 2) Shared vocabulary PMs/DMs must standardize + +If you don’t align on these, you’ll ship features that look similar to competitors but do not compound into a moat. + +### Core objects +- **VEX source**: a distribution channel and issuer identity (e.g., vendor feed, distro feed, OCI-attached attestation). +- **Issuer identity**: cryptographic identity used to sign/attest the VEX (key/cert/OIDC identity), not a string. +- **VEX statement**: one claim about one vulnerability status for one or more products; common statuses include *Not Affected, Affected, Fixed, Under Investigation* (terminology varies by format). citeturn6view1turn10view0 +- **Verification result**: cryptographic + semantic verification facts about a VEX document/source. +- **Trust score**: deterministic numeric/ranked evaluation of the source and/or statement quality. +- **Decision**: a policy outcome (pass/fail/needs-review) for a specific artifact or release. +- **Attestation**: signed statement bound to an artifact (e.g., OCI artifact) that captures decision + evidence. +- **Knowledge snapshot**: frozen set of inputs (VEX docs, keys, policies, vulnerability DB versions, scoring code version) required for deterministic replay. + +--- + +## 3) Product Manager guidelines + +### 3.1 Treat “VEX source onboarding” as a first-class product workflow +Your differentiator collapses if VEX is just “upload a file.” + +**PM requirements:** +1. **VEX Source Registry UI/API** + - Add/edit a source: URL/feed/OCI pattern, update cadence, expected issuer(s), allowed formats. + - Define trust policy per source (thresholds, allowed statuses, expiry, overrides). +2. **Issuer enrollment & key lifecycle** + - Capture: issuer identity, trust anchor, rotation, revocation/deny-list, “break-glass disable.” +3. **Operational status** + - Source health: last fetch, last verified doc, signature failures, schema failures, drift. + +**Why it matters:** customers will only operationalize VEX at scale if they can **govern it like a dependency feed**, not like a manual exception list. + +### 3.2 Make “verification” visible, not implied +If users can’t see it, they won’t trust it—and auditors won’t accept it. + +**Minimum UX per VEX document/statement:** +- Verification status: **Verified / Unverified / Failed** +- Issuer identity: who signed it (and via what trust anchor) +- Format + schema validation status (OpenVEX JSON schema exists and is explicitly recommended for validation). citeturn10view0 +- Freshness: timestamp, last updated +- Product mapping coverage: “X of Y products matched to SBOM/components” + +### 3.3 Provide “trust score explanations” as a primary UI primitive +Trust scoring must not feel like a magic number. + +**UX requirement:** every trust score shows a **breakdown** (e.g., Identity 30/30, Authority 20/25, Freshness 8/10, Evidence quality 6/10…). + +This is both: +- a user adoption requirement (security teams will challenge it), and +- a moat hardener (competitors rarely expose scoring mechanics). + +### 3.4 Define policy experiences that force deterministic coupling +You are not building a “VEX viewer.” You are building **decisioning**. + +Policies must allow: +- “Accept VEX only if verified AND trust score ≥ threshold” +- “Accept Not Affected only if justification/impact statement exists” +- “If conflicting VEX exists, resolve by trust-weighted precedence” +- “For unverified VEX, treat status as Under Investigation (or Unknown), not Not Affected” + +This aligns with CSAF’s VEX profile expectation that *known_not_affected* should have an impact statement (machine-readable flag or human-readable justification). citeturn1view1 + +### 3.5 Ship “audit export” as a product feature, not a report +Auditors want to know: +- which VEX claims were applied, +- who asserted them, +- what trust policy allowed them, +- and what was the resulting decision. + +ENISA’s SBOM guidance explicitly emphasizes “historical snapshots” and “evidence chain integrity” as success criteria for SBOM/VEX integration programs. citeturn8view0 + +So your product needs: +- exportable evidence bundles (machine-readable) +- signed verdicts linked to the artifact +- replay semantics (“recompute this exact decision later”) + +### 3.6 MVP scoping: start with sources that prove the model +For early product proof, prioritize sources that: +- are official, +- have consistent structure, +- publish frequently, +- contain configuration nuance. + +Example: Ubuntu publishes VEX following OpenVEX, emphasizing exploitability in specific configurations and providing official distribution points (tarball + GitHub). citeturn9view0turn6view0 + +This gives you a clean first dataset for verification/trust scoring behaviors. + +--- + +## 4) Development Manager guidelines + +### 4.1 Architect it as a pipeline with hard boundaries +Do not mix verification, scoring, and decisioning in one component. You need isolatable, testable stages. + +**Recommended pipeline stages:** +1. **Ingest** + - Fetch from registry/OCI + - Deduplicate by content hash +2. **Parse & normalize** + - Convert OpenVEX / CSAF VEX / CycloneDX VEX into a **canonical internal VEX model** + - Note: OpenVEX explicitly calls out that CycloneDX VEX uses different status/justification labels and may need translation. citeturn10view0 +3. **Verify (cryptographic + semantic)** +4. **Trust score (pure function)** +5. **Conflict resolve** +6. **Decision** +7. **Attest + persist snapshot** + +### 4.2 Verification must include both cryptography and semantics + +#### Cryptographic verification (minimum bar) +- Verify signature/attestation against expected issuer identity. +- Validate certificate/identity chains per customer trust anchors. +- Support OCI-attached artifacts and “signature-of-signature” patterns (Sigstore describes countersigning: signature artifacts can themselves be signed). citeturn1view3 + +#### Semantic verification (equally important) +- Schema validation (OpenVEX provides JSON schema guidance). citeturn10view0 +- Vulnerability identifier validity (CVE/aliases) +- Product reference validity (e.g., purl) +- Statement completeness rules: + - “Not affected” must include rationale; CSAF VEX profile requires an impact statement for known_not_affected in flags or threats. citeturn1view1 +- Cross-check the statement scope to known SBOM/components: + - If the VEX references products that do not exist in the artifact SBOM, the claim should not affect the decision (or should reduce trust sharply). + +### 4.3 Trust scoring must be deterministic by construction +If trust scoring varies between runs, you cannot produce replayable, attestable decisions. + +**Rules for determinism:** +- Trust score is a **pure function** of: + - VEX document hash + - verification result + - source configuration (immutable version) + - scoring algorithm version + - evaluation timestamp (explicit input, included in snapshot) +- Never call external services during scoring unless responses are captured and hashed into the snapshot. + +### 4.4 Implement two trust concepts: Source Trust and Statement Quality +Do not overload one score to do everything. + +- **Source Trust**: “how much do we trust the issuer/channel?” +- **Statement Quality**: “how well-formed, specific, justified is this statement?” + +You can then combine them: +`TrustScore = f(SourceTrust, StatementQuality, Freshness, TrackRecord)` + +### 4.5 Conflict resolution must be policy-driven, not hard-coded +Conflicting VEX is inevitable: +- vendor vs distro +- older vs newer +- internal vs external + +Resolve via: +- deterministic precedence rules configured per tenant +- trust-weighted tie-breakers +- “newer statement wins” only when issuer is the same or within the same trust class + +### 4.6 Store VEX and decision inputs as content-addressed artifacts +If you want replayability, you must be able to reconstruct the “world state.” + +**Persist:** +- VEX docs (by digest) +- verification artifacts (signature bundles, cert chains) +- normalized VEX statements (canonical form) +- trust score + breakdown + algorithm version +- policy bundle + version +- vulnerability DB snapshot identifiers +- decision output + evidence pointers + +--- + +## 5) A practical trust scoring rubric you can hand to teams + +Use a 0–100 score with defined buckets. The weights below are a starting point; what matters is consistency and explainability. + +### 5.1 Source Trust (0–60) +1. **Issuer identity verified (0–25)** + - 0 if unsigned/unverifiable + - 25 if signature verified to a known trust anchor +2. **Issuer authority alignment (0–20)** + - 20 if issuer is the product supplier/distro maintainer for that component set + - lower if third party / aggregator +3. **Distribution integrity (0–15)** + - extra credit if the VEX is distributed as an attestation bound to an artifact and/or uses auditable signature patterns (e.g., countersigning). citeturn1view3turn10view0 + +### 5.2 Statement Quality (0–40) +1. **Scope specificity (0–15)** + - exact product IDs (purl), versions, architectures, etc. +2. **Justification/impact present and structured (0–15)** + - CSAF VEX expects impact statement for known_not_affected; Ubuntu maps “not_affected” to justifications like `vulnerable_code_not_present`. citeturn1view1turn9view0 +3. **Freshness (0–10)** + - based on statement/document timestamps (explicitly hashed into snapshot) + +### Score buckets +- **90–100**: Verified + authoritative + high-quality → eligible for gating +- **70–89**: Verified but weaker evidence/scope → eligible with policy constraints +- **40–69**: Mixed/partial trust → informational, not gating by default +- **0–39**: Unverified/low quality → do not affect decisions + +--- + +## 6) Tight coupling to deterministic decisioning: what “coupling” means in practice + +### 6.1 VEX must be an input to the same deterministic evaluation engine that produces the verdict +Do not build “VEX handling” as a sidecar that produces annotations. + +**Decision engine inputs must include:** +- SBOM / component graph +- vulnerability findings +- normalized VEX statements +- verification results + trust scores +- tenant policy bundle +- evaluation timestamp + snapshot identifiers + +The engine output must include: +- final status per vulnerability (affected/not affected/fixed/under investigation/unknown) +- **why** (evidence pointers) +- the policy rule(s) that caused it + +### 6.2 Default posture: fail-safe, not fail-open +Recommended defaults: +- **Unverified VEX never suppresses vulnerabilities.** +- Trust score below threshold never suppresses. +- “Not affected” without justification/impact statement never suppresses. + +This is aligned with CSAF VEX expectations and avoids the easiest suppression attack vector. citeturn1view1 + +### 6.3 Make uncertainty explicit +If VEX conflicts or is low trust, your decisioning must produce explicit states like: +- “Unknown (insufficient trusted VEX)” +- “Under Investigation” + +That is consistent with common VEX status vocabulary and avoids false certainty. citeturn6view1turn9view0 + +--- + +## 7) Tight coupling to attestations: what to attest, when, and why + +### 7.1 Attest **decisions**, not just documents +Competitors already sign SBOMs. Your moat is signing the **verdict** with the evidence chain. + +Each signed verdict should bind: +- subject artifact digest (container/image/package) +- decision output (pass/fail/etc.) +- hashes of: + - VEX docs used + - verification artifacts + - trust scoring breakdown + - policy bundle + - vulnerability DB snapshot identifiers + +### 7.2 Make attestations replayable +Your attestation must contain enough references (digests) that the system can: +- re-run the decision in an air-gapped environment +- obtain the same outputs + +This aligns with “historical snapshots” / “evidence chain integrity” expectations in modern SBOM programs. citeturn8view0 + +### 7.3 Provide two attestations (recommended) +1. **VEX intake attestation** (optional but powerful) + - “We ingested and verified this VEX doc from issuer X under policy Y.” +2. **Risk verdict attestation** (core differentiator) + - “Given SBOM, vulnerabilities, verified VEX, and policy snapshot, the artifact is acceptable/unacceptable.” + +Sigstore’s countersigning concept illustrates that you can add layers of trust over artifacts/signatures; your verdict is the enterprise-grade layer. citeturn1view3 + +--- + +## 8) “Definition of Done” checklists (use in roadmaps) + +### PM DoD for VEX Trust (ship criteria) +- A customer can onboard a VEX source and see issuer identity + verification state. +- Trust score exists with a visible breakdown and policy thresholds. +- Policies can gate on trust score + verification. +- Audit export: per release, show which VEX claims affected the final decision. + +### DM DoD for Deterministic + Attestable +- Same inputs → identical trust score and decision (golden tests). +- All inputs content-addressed and captured in a snapshot bundle. +- Attestation includes digests of all relevant inputs and a decision summary. +- No network dependency at evaluation time unless recorded in snapshot. + +--- + +## 9) Metrics that prove you differentiated + +Track these from the first pilot: +1. **% of decisions backed by verified VEX** (not just present) +2. **% of “not affected” outcomes with cryptographic verification + justification** +3. **Replay success rate** (recompute verdict from snapshot) +4. **Time-to-audit** (minutes to produce evidence chain for a release) +5. **False suppression rate** (should be effectively zero with fail-safe defaults) diff --git a/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #7.md b/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #7.md new file mode 100644 index 000000000..597d0fc53 --- /dev/null +++ b/docs/product-advisories/unprocessed/19-Dec-2025 - Moat #7.md @@ -0,0 +1,268 @@ +## 1) Product direction: make “Unknowns” a first-class risk primitive + +### Non‑negotiable product principles + +1. **Unknowns are not suppressed findings** + + * They are a distinct state with distinct governance. +2. **Unknowns must be policy-addressable** + + * If policy cannot block or allow them explicitly, the feature is incomplete. +3. **Unknowns must be attested** + + * Every signed decision must carry “what we don’t know” in a machine-readable way. +4. **Unknowns must be default-on** + + * Users may adjust thresholds, but they must not be able to “turn off unknown tracking.” + +### Definition: what counts as an “unknown” + +PMs must ensure that “unknown” is not vague. Define **reason-coded unknowns**, for example: + +* **U-RCH**: Reachability unknown (call path indeterminate) +* **U-ID**: Component identity unknown (ambiguous package / missing digest / unresolved PURL) +* **U-PROV**: Provenance unknown (cannot map binary → source/build) +* **U-VEX**: VEX conflict or missing applicability statement +* **U-FEED**: Knowledge source missing (offline feed gaps, mirror stale) +* **U-CONFIG**: Config/runtime gate unknown (feature flag not observable) +* **U-ANALYZER**: Analyzer limitation (language/framework unsupported) + +Each unknown must have: + +* `reason_code` (one of a stable enum) +* `scope` (component, binary, symbol, package, image, repo) +* `evidence_refs` (what we inspected) +* `assumptions` (what would need to be true/false) +* `remediation_hint` (how to reduce unknown) + +**Acceptance criterion:** every unknown surfaced to users can be traced to a reason code and remediation hint. + +--- + +## 2) Policy direction: “unknown budgets” must be enforceable and environment-aware + +### Policy model requirements + +Policy must support: + +* Thresholds by environment (dev/test/stage/prod) +* Thresholds by unknown type (reachability vs provenance vs feed, etc.) +* Severity weighting (e.g., unknown on internet-facing service is worse) +* Exception workflow (time-bound, owner-bound) +* Deterministic evaluation (same inputs → same result) + +### Recommended default policy posture (ship as opinionated defaults) + +These defaults are intentionally strict in prod: + +**Prod (default)** + +* `unknown_reachable == 0` (fail build/deploy) +* `unknown_provenance == 0` (fail) +* `unknown_total <= 3` (fail if exceeded) +* `unknown_feed == 0` (fail; “we didn’t have data” is unacceptable for prod) + +**Stage** + +* `unknown_reachable <= 1` +* `unknown_provenance <= 1` +* `unknown_total <= 10` + +**Dev** + +* Never hard fail by default; warn + ticket/PR annotation +* Still compute unknowns and show trendlines (so teams see drift) + +### Exception policy (required to avoid “disable unknowns” pressure) + +Implement **explicit exceptions** rather than toggles: + +* Exception must include: `owner`, `expiry`, `justification`, `scope`, `risk_ack` +* Exception must be emitted into attestations and reports (“this passed with exception X”). + +**Acceptance criterion:** there is no “turn off unknowns” knob; only thresholds and expiring exceptions. + +--- + +## 3) Reporting direction: unknowns must be visible, triaged, and trendable + +### Required reporting surfaces + +1. **Release / PR report** + + * Unknown summary at top: + + * total unknowns + * unknowns by reason code + * unknowns blocking policy vs not + * “What changed?” vs previous baseline (unknown delta) +2. **Dashboard (portfolio view)** + + * Unknowns over time + * Top teams/services by unknown count + * Top unknown causes (reason codes) +3. **Operational triage view** + + * “Unknown queue” sortable by: + + * environment impact (prod/stage) + * exposure class (internet-facing/internal) + * reason code + * last-seen time + * owner + +### Reporting should drive action, not anxiety + +Every unknown row must include: + +* Why it’s unknown (reason code + short explanation) +* What evidence is missing +* How to reduce unknown (concrete steps) +* Expected effect (e.g., “adding debug symbols will likely reduce U-RCH by ~X”) + +**Key PM instruction:** treat unknowns like an **SLO**. Teams should be able to commit to “unknowns in prod must trend to zero.” + +--- + +## 4) Attestations direction: unknowns must be cryptographically bound to decisions + +Every signed decision/attestation must include an “unknowns summary” section. + +### Attestation requirements + +Include at minimum: + +* `unknown_total` +* `unknown_by_reason_code` (map of reason→count) +* `unknown_blocking_count` +* `unknown_details_digest` (hash of the full list if too large) +* `policy_thresholds_applied` (the exact thresholds used) +* `exceptions_applied` (IDs + expiries) +* `knowledge_snapshot_id` (feeds/policy bundle hash if you support offline snapshots) + +**Why this matters:** if you sign a “pass,” you must also sign what you *didn’t know* at the time. Otherwise the signature is not audit-grade. + +**Acceptance criterion:** any downstream verifier can reject a signed “pass” based solely on unknown fields (e.g., “reject if unknown_reachable>0 in prod”). + +--- + +## 5) Development direction: implement unknown propagation as a first-class data flow + +### Core engineering tasks (must be done in this order) + +#### A. Define the canonical “Tri-state” evaluation type + +For any security claim, the evaluator must return: + +* `TRUE` (evidence supports) +* `FALSE` (evidence refutes) +* `UNKNOWN` (insufficient evidence) + +Do not represent unknown as nulls or missing fields. It must be explicit. + +#### B. Build the unknown aggregator and reason-code framework + +* A single aggregation layer computes: + + * unknown counts per scope + * unknown counts per reason code + * unknown “blockers” based on policy +* This must be deterministic and stable (no random ordering, stable IDs). + +#### C. Ensure analyzers emit unknowns instead of silently failing + +Any analyzer that cannot conclude must emit: + +* `UNKNOWN` + reason code + evidence pointers + Examples: +* call graph incomplete → `U-RCH` +* stripped binary cannot map symbols → `U-PROV` +* unsupported language → `U-ANALYZER` + +#### D. Provide “reduce unknown” instrumentation hooks + +Attach remediation metadata: + +* “add build flags …” +* “upload debug symbols …” +* “enable source mapping …” +* “mirror feeds …” + +This is how you prevent user backlash. + +--- + +## 6) Make it default rather than optional: rollout plan without breaking adoption + +### Phase 1: compute + display (no blocking) + +* Unknowns computed for all scans +* Reports show unknown budgets and what would have failed in prod +* Collect baseline metrics for 2–4 weeks of typical usage + +### Phase 2: soft gating + +* In prod-like pipelines: fail only on `unknown_reachable > 0` +* Everything else warns + requires owner acknowledgement + +### Phase 3: full policy enforcement + +* Enforce default thresholds +* Exceptions require expiry and are visible in attestations + +### Phase 4: governance integration + +* Unknowns become part of: + + * release readiness checks + * quarterly risk reviews + * vendor compliance audits + +**Dev Manager instruction:** invest in tooling that reduces unknowns early (symbol capture, provenance mapping, better analyzers). Otherwise “unknown gating” becomes politically unsustainable. + +--- + +## 7) “Definition of Done” checklist for PMs and Dev Managers + +### PM DoD + +* [ ] Unknowns are explicitly defined with stable reason codes +* [ ] Policy can fail on unknowns with environment-scoped thresholds +* [ ] Reports show unknown deltas and remediation guidance +* [ ] Exceptions are time-bound and appear everywhere (UI + API + attestations) +* [ ] Unknowns cannot be disabled; only thresholds/exceptions are configurable + +### Engineering DoD + +* [ ] Tri-state evaluation implemented end-to-end +* [ ] Analyzer failures never disappear; they become unknowns +* [ ] Unknown aggregation is deterministic and reproducible +* [ ] Signed attestation includes unknown summary + policy thresholds + exceptions +* [ ] CI/CD integration can enforce “fail if unknowns > N in prod” + +--- + +## 8) Concrete policy examples you can standardize internally + +### Minimal policy (prod) + +* Block deploy if: + + * `unknown_reachable > 0` + * OR `unknown_provenance > 0` + +### Balanced policy (prod) + +* Block deploy if: + + * `unknown_reachable > 0` + * OR `unknown_provenance > 0` + * OR `unknown_total > 3` + +### Risk-sensitive policy (internet-facing prod) + +* Block deploy if: + + * `unknown_reachable > 0` + * OR `unknown_total > 1` + * OR any unknown affects a component with known remotely-exploitable CVEs diff --git a/src/Scanner/__Libraries/StellaOps.Scanner.Reachability/Cache/PrReachabilityGate.cs b/src/Scanner/__Libraries/StellaOps.Scanner.Reachability/Cache/PrReachabilityGate.cs new file mode 100644 index 000000000..3f34a2e7b --- /dev/null +++ b/src/Scanner/__Libraries/StellaOps.Scanner.Reachability/Cache/PrReachabilityGate.cs @@ -0,0 +1,478 @@ +// ----------------------------------------------------------------------------- +// PrReachabilityGate.cs +// Sprint: SPRINT_3700_0006_0001_incremental_cache (CACHE-014) +// Description: Evaluates incremental reachability results for PR gate decisions. +// ----------------------------------------------------------------------------- + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; + +namespace StellaOps.Scanner.Reachability.Cache; + +/// +/// Configuration options for PR reachability gate evaluation. +/// +public sealed class PrReachabilityGateOptions +{ + /// + /// Section name for configuration binding. + /// + public const string SectionName = "Scanner:Reachability:PrGate"; + + /// + /// Whether the PR gate is enabled. Default: true. + /// + public bool Enabled { get; set; } = true; + + /// + /// Whether to block PRs that introduce new reachable vulnerabilities. Default: true. + /// + public bool BlockOnNewReachable { get; set; } = true; + + /// + /// Maximum number of new reachable paths allowed before blocking. Default: 0. + /// + public int MaxNewReachablePaths { get; set; } = 0; + + /// + /// Whether to require a minimum confidence level for blocking decisions. Default: true. + /// + public bool RequireMinimumConfidence { get; set; } = true; + + /// + /// Minimum confidence level (0.0-1.0) for a path to count as blocking. Default: 0.7. + /// + public double MinimumConfidenceThreshold { get; set; } = 0.7; + + /// + /// Whether to add annotations to the PR for state flips. Default: true. + /// + public bool AddAnnotations { get; set; } = true; + + /// + /// Maximum number of annotations to add per PR. Default: 10. + /// + public int MaxAnnotations { get; set; } = 10; + + /// + /// Whether to include mitigated paths in the summary. Default: true. + /// + public bool IncludeMitigatedInSummary { get; set; } = true; +} + +/// +/// Result of PR gate evaluation. +/// +public sealed record PrGateResult +{ + /// + /// Whether the PR passed the gate. + /// + public required bool Passed { get; init; } + + /// + /// Human-readable reason for the decision. + /// + public required string Reason { get; init; } + + /// + /// Detailed decision breakdown. + /// + public required PrGateDecision Decision { get; init; } + + /// + /// Annotations to add to the PR. + /// + public IReadOnlyList Annotations { get; init; } = []; + + /// + /// Summary markdown for PR comment. + /// + public string? SummaryMarkdown { get; init; } +} + +/// +/// Detailed breakdown of PR gate decision. +/// +public sealed record PrGateDecision +{ + /// + /// Number of new reachable vulnerability paths introduced. + /// + public int NewReachableCount { get; init; } + + /// + /// Number of vulnerability paths mitigated (became unreachable). + /// + public int MitigatedCount { get; init; } + + /// + /// Net change in reachable vulnerability paths. + /// + public int NetChange { get; init; } + + /// + /// Whether incremental analysis was used. + /// + public bool WasIncremental { get; init; } + + /// + /// Cache savings ratio (1.0 = 100% cached, 0.0 = full recompute). + /// + public double SavingsRatio { get; init; } + + /// + /// Analysis duration. + /// + public TimeSpan Duration { get; init; } + + /// + /// State flips that caused blocking. + /// + public IReadOnlyList BlockingFlips { get; init; } = []; +} + +/// +/// Annotation to add to a PR. +/// +public sealed record PrGateAnnotation +{ + /// + /// Annotation level (error, warning, notice). + /// + public required PrAnnotationLevel Level { get; init; } + + /// + /// Annotation message. + /// + public required string Message { get; init; } + + /// + /// File path (if applicable). + /// + public string? FilePath { get; init; } + + /// + /// Start line (if applicable). + /// + public int? StartLine { get; init; } + + /// + /// End line (if applicable). + /// + public int? EndLine { get; init; } + + /// + /// Annotation title. + /// + public string? Title { get; init; } +} + +/// +/// PR annotation severity level. +/// +public enum PrAnnotationLevel +{ + /// + /// Notice level (informational). + /// + Notice, + + /// + /// Warning level (non-blocking). + /// + Warning, + + /// + /// Error level (blocking). + /// + Error +} + +/// +/// Evaluates incremental reachability results for PR gate decisions. +/// +public interface IPrReachabilityGate +{ + /// + /// Evaluates an incremental reachability result for PR gating. + /// + /// The incremental reachability result. + /// The PR gate evaluation result. + PrGateResult Evaluate(IncrementalReachabilityResult result); + + /// + /// Evaluates state flips directly for PR gating. + /// + /// The state flip result. + /// Whether incremental analysis was used. + /// Cache savings ratio. + /// Analysis duration. + /// The PR gate evaluation result. + PrGateResult EvaluateFlips( + StateFlipResult stateFlips, + bool wasIncremental, + double savingsRatio, + TimeSpan duration); +} + +/// +/// Default implementation of . +/// +public sealed class PrReachabilityGate : IPrReachabilityGate +{ + private readonly IOptionsMonitor _options; + private readonly ILogger _logger; + + /// + /// Creates a new PR reachability gate. + /// + public PrReachabilityGate( + IOptionsMonitor options, + ILogger logger) + { + _options = options ?? throw new ArgumentNullException(nameof(options)); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + } + + /// + public PrGateResult Evaluate(IncrementalReachabilityResult result) + { + ArgumentNullException.ThrowIfNull(result); + + if (result.StateFlips is null) + { + return CreatePassResult( + "No state flip detection performed", + wasIncremental: result.WasIncremental, + savingsRatio: result.SavingsRatio, + duration: result.Duration); + } + + return EvaluateFlips( + result.StateFlips, + result.WasIncremental, + result.SavingsRatio, + result.Duration); + } + + /// + public PrGateResult EvaluateFlips( + StateFlipResult stateFlips, + bool wasIncremental, + double savingsRatio, + TimeSpan duration) + { + ArgumentNullException.ThrowIfNull(stateFlips); + + var options = _options.CurrentValue; + + // If gate is disabled, always pass + if (!options.Enabled) + { + _logger.LogDebug("PR gate is disabled, passing"); + return CreatePassResult( + "PR gate is disabled", + wasIncremental, + savingsRatio, + duration); + } + + // No flips = pass + if (!stateFlips.HasFlips) + { + _logger.LogDebug("No reachability changes detected"); + return CreatePassResult( + "No reachability changes", + wasIncremental, + savingsRatio, + duration); + } + + // Filter blocking flips by confidence if required + var blockingFlips = options.RequireMinimumConfidence + ? stateFlips.NewlyReachable + .Where(f => f.Confidence >= options.MinimumConfidenceThreshold) + .ToList() + : stateFlips.NewlyReachable.ToList(); + + var blockingCount = blockingFlips.Count; + + // Check if should block + var shouldBlock = options.BlockOnNewReachable && + blockingCount > options.MaxNewReachablePaths; + + var decision = new PrGateDecision + { + NewReachableCount = stateFlips.NewRiskCount, + MitigatedCount = stateFlips.MitigatedCount, + NetChange = stateFlips.NetChange, + WasIncremental = wasIncremental, + SavingsRatio = savingsRatio, + Duration = duration, + BlockingFlips = blockingFlips + }; + + if (shouldBlock) + { + _logger.LogWarning( + "PR gate BLOCKED: {Count} new reachable vulnerability paths introduced", + blockingCount); + + PrReachabilityGateMetrics.BlockedPrs.Add(1); + + return new PrGateResult + { + Passed = false, + Reason = $"{blockingCount} vulnerabilities became reachable", + Decision = decision, + Annotations = BuildAnnotations(blockingFlips, options), + SummaryMarkdown = BuildSummaryMarkdown(decision, options, passed: false) + }; + } + + _logger.LogInformation( + "PR gate PASSED: {NewCount} new, {MitigatedCount} mitigated (net: {Net})", + stateFlips.NewRiskCount, stateFlips.MitigatedCount, stateFlips.NetChange); + + PrReachabilityGateMetrics.PassedPrs.Add(1); + + var reason = stateFlips.MitigatedCount > 0 + ? $"{stateFlips.MitigatedCount} vulnerabilities mitigated" + : "Reachability changes within threshold"; + + return new PrGateResult + { + Passed = true, + Reason = reason, + Decision = decision, + Annotations = BuildAnnotations(blockingFlips, options), + SummaryMarkdown = BuildSummaryMarkdown(decision, options, passed: true) + }; + } + + private PrGateResult CreatePassResult( + string reason, + bool wasIncremental, + double savingsRatio, + TimeSpan duration) + { + return new PrGateResult + { + Passed = true, + Reason = reason, + Decision = new PrGateDecision + { + NewReachableCount = 0, + MitigatedCount = 0, + NetChange = 0, + WasIncremental = wasIncremental, + SavingsRatio = savingsRatio, + Duration = duration, + BlockingFlips = [] + }, + Annotations = [], + SummaryMarkdown = null + }; + } + + private static IReadOnlyList BuildAnnotations( + IReadOnlyList blockingFlips, + PrReachabilityGateOptions options) + { + if (!options.AddAnnotations || blockingFlips.Count == 0) + return []; + + var annotations = new List(); + var flipsToAnnotate = blockingFlips.Take(options.MaxAnnotations); + + foreach (var flip in flipsToAnnotate) + { + annotations.Add(new PrGateAnnotation + { + Level = PrAnnotationLevel.Error, + Title = "New Reachable Vulnerability Path", + Message = $"Vulnerability path became reachable: {flip.EntryMethodKey} → {flip.SinkMethodKey}", + FilePath = flip.SourceFile, + StartLine = flip.StartLine, + EndLine = flip.EndLine + }); + } + + return annotations; + } + + private static string BuildSummaryMarkdown( + PrGateDecision decision, + PrReachabilityGateOptions options, + bool passed) + { + var sb = new StringBuilder(); + + sb.AppendLine(passed + ? "## ✅ Reachability Gate Passed" + : "## ❌ Reachability Gate Blocked"); + + sb.AppendLine(); + sb.AppendLine("| Metric | Value |"); + sb.AppendLine("|--------|-------|"); + sb.AppendLine($"| New reachable paths | {decision.NewReachableCount} |"); + + if (options.IncludeMitigatedInSummary) + { + sb.AppendLine($"| Mitigated paths | {decision.MitigatedCount} |"); + sb.AppendLine($"| Net change | {decision.NetChange:+#;-#;0} |"); + } + + sb.AppendLine($"| Analysis type | {(decision.WasIncremental ? "Incremental" : "Full")} |"); + sb.AppendLine($"| Cache savings | {decision.SavingsRatio:P0} |"); + sb.AppendLine($"| Duration | {decision.Duration.TotalMilliseconds:F0}ms |"); + + if (!passed && decision.BlockingFlips.Count > 0) + { + sb.AppendLine(); + sb.AppendLine("### Blocking Paths"); + sb.AppendLine(); + + foreach (var flip in decision.BlockingFlips.Take(10)) + { + sb.AppendLine($"- `{flip.EntryMethodKey}` → `{flip.SinkMethodKey}` (confidence: {flip.Confidence:P0})"); + } + + if (decision.BlockingFlips.Count > 10) + { + sb.AppendLine($"- ... and {decision.BlockingFlips.Count - 10} more"); + } + } + + return sb.ToString(); + } +} + +/// +/// Metrics for PR reachability gate. +/// +internal static class PrReachabilityGateMetrics +{ + private static readonly string MeterName = "StellaOps.Scanner.Reachability.PrGate"; + + /// + /// Counter for passed PRs. + /// + public static readonly System.Diagnostics.Metrics.Counter PassedPrs = + new System.Diagnostics.Metrics.Meter(MeterName).CreateCounter( + "stellaops.reachability_prgate.passed", + description: "Number of PRs that passed the reachability gate"); + + /// + /// Counter for blocked PRs. + /// + public static readonly System.Diagnostics.Metrics.Counter BlockedPrs = + new System.Diagnostics.Metrics.Meter(MeterName).CreateCounter( + "stellaops.reachability_prgate.blocked", + description: "Number of PRs blocked by the reachability gate"); +} diff --git a/src/Scanner/__Libraries/StellaOps.Scanner.Reachability/Cache/StateFlipDetector.cs b/src/Scanner/__Libraries/StellaOps.Scanner.Reachability/Cache/StateFlipDetector.cs index a1b0daf4f..ef3a27931 100644 --- a/src/Scanner/__Libraries/StellaOps.Scanner.Reachability/Cache/StateFlipDetector.cs +++ b/src/Scanner/__Libraries/StellaOps.Scanner.Reachability/Cache/StateFlipDetector.cs @@ -127,6 +127,26 @@ public sealed record StateFlip /// Package name if applicable. /// public string? PackageName { get; init; } + + /// + /// Confidence score (0.0-1.0) of the reachability analysis. + /// + public double Confidence { get; init; } = 1.0; + + /// + /// Source file where the entry point is defined (if available). + /// + public string? SourceFile { get; init; } + + /// + /// Start line number in the source file (if available). + /// + public int? StartLine { get; init; } + + /// + /// End line number in the source file (if available). + /// + public int? EndLine { get; init; } } /// diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Reachability.Tests/Benchmarks/IncrementalCacheBenchmarkTests.cs b/src/Scanner/__Tests/StellaOps.Scanner.Reachability.Tests/Benchmarks/IncrementalCacheBenchmarkTests.cs new file mode 100644 index 000000000..8e98a3ad6 --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Reachability.Tests/Benchmarks/IncrementalCacheBenchmarkTests.cs @@ -0,0 +1,752 @@ +// ----------------------------------------------------------------------------- +// IncrementalCacheBenchmarkTests.cs +// Sprint: SPRINT_3700_0006_0001_incremental_cache (CACHE-015) +// Description: Performance benchmark tests for incremental reachability cache. +// ----------------------------------------------------------------------------- + +using System.Diagnostics; +using FluentAssertions; +using Microsoft.Extensions.Logging.Abstractions; +using StellaOps.Scanner.Reachability.Cache; +using Xunit; +using Xunit.Abstractions; + +namespace StellaOps.Scanner.Reachability.Tests.Benchmarks; + +/// +/// Performance benchmark tests for incremental reachability cache. +/// Validates performance targets defined in SPRINT_3700_0006_0001_incremental_cache.md: +/// - Cache lookup: <10ms +/// - Delta computation: <100ms +/// - Impact set calculation: <500ms +/// - Full recompute: <30s (baseline for 50K node graph) +/// - Incremental (cache hit): <1s (90th percentile) +/// - Incremental (partial): <5s (10% of graph changed) +/// +public sealed class IncrementalCacheBenchmarkTests +{ + private readonly ITestOutputHelper _output; + + public IncrementalCacheBenchmarkTests(ITestOutputHelper output) + { + _output = output; + } + + /// + /// Benchmark: Cache lookup should complete in <10ms. + /// Uses in-memory cache implementation. + /// + [Fact] + public async Task CacheLookup_ShouldCompleteInUnder10ms() + { + // Arrange + var cache = new InMemoryReachabilityCache(); + var serviceId = "benchmark-service"; + var graphHash = "abc123"; + + // Pre-populate cache with entries + var entry = CreateCacheEntry(serviceId, graphHash, 100); + await cache.SetAsync(entry); + + // Warm up + _ = await cache.GetAsync(serviceId, graphHash); + + // Act - measure multiple lookups + var stopwatch = Stopwatch.StartNew(); + const int iterations = 100; + + for (int i = 0; i < iterations; i++) + { + var result = await cache.GetAsync(serviceId, graphHash); + result.Should().NotBeNull(); + } + + stopwatch.Stop(); + var averageMs = stopwatch.Elapsed.TotalMilliseconds / iterations; + _output.WriteLine($"Cache lookup average: {averageMs:F3}ms over {iterations} iterations"); + + // Assert + averageMs.Should().BeLessThan(10, "cache lookup should complete in <10ms"); + } + + /// + /// Benchmark: Delta computation should complete in <100ms for 50K nodes. + /// + [Fact] + public void DeltaComputation_ShouldCompleteInUnder100ms_For50KNodes() + { + // Arrange + const int nodeCount = 50_000; + const int edgeCount = 100_000; + const int changedNodes = 100; // 0.2% of nodes changed + + var previousGraph = CreateMockGraphSnapshot(nodeCount, edgeCount, seed: 42); + var currentGraph = CreateModifiedGraphSnapshot(previousGraph, changedNodes, seed: 43); + + // Warm up - simple delta computation + _ = ComputeDelta(previousGraph, currentGraph); + + // Act + var stopwatch = Stopwatch.StartNew(); + var delta = ComputeDelta(previousGraph, currentGraph); + stopwatch.Stop(); + + _output.WriteLine($"Delta computation for {nodeCount} nodes: {stopwatch.ElapsedMilliseconds}ms"); + _output.WriteLine($" Added nodes: {delta.AddedNodes.Count}"); + _output.WriteLine($" Removed nodes: {delta.RemovedNodes.Count}"); + _output.WriteLine($" Added edges: {delta.AddedEdges.Count}"); + _output.WriteLine($" Removed edges: {delta.RemovedEdges.Count}"); + + // Assert + stopwatch.ElapsedMilliseconds.Should().BeLessThan(100, + "delta computation should complete in <100ms for 50K node graph"); + } + + /// + /// Benchmark: Impact set calculation should complete in <500ms. + /// + [Fact] + public void ImpactSetCalculation_ShouldCompleteInUnder500ms() + { + // Arrange + const int nodeCount = 50_000; + const int edgeCount = 100_000; + + var graph = CreateMockGraphSnapshot(nodeCount, edgeCount, seed: 42); + var addedNodes = new HashSet(CreateNodeIds(100, "added")); + var removedNodes = new HashSet(CreateNodeIds(50, "removed")); + + // Warm up + _ = CalculateImpactSet(graph, addedNodes, removedNodes); + + // Act + var stopwatch = Stopwatch.StartNew(); + var impactSet = CalculateImpactSet(graph, addedNodes, removedNodes); + stopwatch.Stop(); + + _output.WriteLine($"Impact set calculation for {nodeCount} nodes: {stopwatch.ElapsedMilliseconds}ms"); + _output.WriteLine($" Impact set size: {impactSet.Count}"); + + // Assert + stopwatch.ElapsedMilliseconds.Should().BeLessThan(500, + "impact set calculation should complete in <500ms"); + } + + /// + /// Benchmark: State flip detection should complete quickly. + /// + [Fact] + public void StateFlipDetection_ShouldCompleteInUnder50ms() + { + // Arrange + var previousResults = CreateReachablePairResults(1000, reachableRatio: 0.3); + var currentResults = CreateReachablePairResultsWithChanges(previousResults, changeRatio: 0.1); + + var detector = new StateFlipDetector(NullLogger.Instance); + + // Warm up + _ = detector.DetectFlips(previousResults, currentResults); + + // Act + var stopwatch = Stopwatch.StartNew(); + const int iterations = 100; + + for (int i = 0; i < iterations; i++) + { + _ = detector.DetectFlips(previousResults, currentResults); + } + + stopwatch.Stop(); + var averageMs = stopwatch.Elapsed.TotalMilliseconds / iterations; + _output.WriteLine($"State flip detection average: {averageMs:F3}ms over {iterations} iterations"); + _output.WriteLine($" Previous results: {previousResults.Count}"); + _output.WriteLine($" Current results: {currentResults.Count}"); + + // Assert + averageMs.Should().BeLessThan(50, "state flip detection should be fast"); + } + + /// + /// Benchmark: PR gate evaluation should be fast. + /// + [Fact] + public void PrGateEvaluation_ShouldCompleteInUnder10ms() + { + // Arrange + var flips = new StateFlipResult + { + NewlyReachable = CreateStateFlips(50, newReachable: true), + NewlyUnreachable = CreateStateFlips(30, newReachable: false), + }; + + var incrementalResult = new IncrementalReachabilityResult + { + ServiceId = "test-service", + Results = CreateIncrementalResults(100, reachableRatio: 0.3), + StateFlips = flips, + FromCache = false, + WasIncremental = true, + SavingsRatio = 0.7, + Duration = TimeSpan.FromMilliseconds(500), + }; + + var gate = CreatePrReachabilityGate(); + + // Warm up + _ = gate.Evaluate(incrementalResult); + + // Act + var stopwatch = Stopwatch.StartNew(); + const int iterations = 1000; + + for (int i = 0; i < iterations; i++) + { + _ = gate.Evaluate(incrementalResult); + } + + stopwatch.Stop(); + var averageMs = stopwatch.Elapsed.TotalMilliseconds / iterations; + _output.WriteLine($"PR gate evaluation average: {averageMs:F4}ms over {iterations} iterations"); + + // Assert + averageMs.Should().BeLessThan(10, "PR gate evaluation should be very fast"); + } + + /// + /// Benchmark: Memory pressure test for large caches. + /// + [Fact] + public async Task LargeCache_ShouldHandleMemoryEfficiently() + { + // Arrange + var cache = new InMemoryReachabilityCache(); + const int serviceCount = 10; + const int entriesPerService = 1000; + + var beforeMemory = GC.GetTotalMemory(true); + + // Act - populate cache with many entries + for (int s = 0; s < serviceCount; s++) + { + var serviceId = $"service-{s}"; + var graphHash = $"hash-{s}"; + var entry = CreateCacheEntry(serviceId, graphHash, entriesPerService); + await cache.SetAsync(entry); + } + + var afterMemory = GC.GetTotalMemory(true); + var memoryUsedMB = (afterMemory - beforeMemory) / (1024.0 * 1024.0); + + _output.WriteLine($"Cache memory usage: {memoryUsedMB:F2}MB for {serviceCount * entriesPerService} entries"); + + // Assert - ensure memory usage is reasonable (< 100MB for 10K entries) + memoryUsedMB.Should().BeLessThan(100, + "cache should be memory efficient"); + } + + /// + /// Benchmark: Hash computation for graph snapshots should be fast. + /// + [Fact] + public void GraphHashComputation_ShouldCompleteQuickly() + { + // Arrange + const int nodeCount = 50_000; + const int edgeCount = 100_000; + var graph = CreateMockGraphSnapshot(nodeCount, edgeCount, seed: 42); + + // Warm up + _ = graph.Hash; + + // Act + var stopwatch = Stopwatch.StartNew(); + const int iterations = 100; + + for (int i = 0; i < iterations; i++) + { + _ = graph.Hash; + } + + stopwatch.Stop(); + var averageMs = stopwatch.Elapsed.TotalMilliseconds / iterations; + _output.WriteLine($"Graph hash computation average: {averageMs:F4}ms over {iterations} iterations"); + + // Assert - hash should be precomputed or very fast + averageMs.Should().BeLessThan(1, "graph hash should be precomputed or very fast"); + } + + /// + /// Benchmark: Concurrent cache access should be thread-safe and performant. + /// + [Fact] + public async Task ConcurrentCacheAccess_ShouldBePerformant() + { + // Arrange + var cache = new InMemoryReachabilityCache(); + var serviceId = "concurrent-service"; + var graphHash = "concurrent-hash"; + + // Pre-populate cache + var entry = CreateCacheEntry(serviceId, graphHash, 500); + await cache.SetAsync(entry); + + // Act - concurrent reads + var stopwatch = Stopwatch.StartNew(); + const int concurrency = 10; + const int iterationsPerTask = 100; + + var tasks = Enumerable.Range(0, concurrency) + .Select(_ => Task.Run(async () => + { + for (int i = 0; i < iterationsPerTask; i++) + { + var result = await cache.GetAsync(serviceId, graphHash); + result.Should().NotBeNull(); + } + })) + .ToList(); + + await Task.WhenAll(tasks); + stopwatch.Stop(); + + var totalOperations = concurrency * iterationsPerTask; + var opsPerSecond = totalOperations / stopwatch.Elapsed.TotalSeconds; + _output.WriteLine($"Concurrent cache access: {opsPerSecond:F0} ops/sec ({totalOperations} total in {stopwatch.ElapsedMilliseconds}ms)"); + + // Assert + opsPerSecond.Should().BeGreaterThan(1000, "cache should handle >1000 ops/sec concurrent access"); + } + + #region Helper Methods + + private static ReachabilityCacheEntry CreateCacheEntry( + string serviceId, + string graphHash, + int pairCount) + { + var pairs = new List(); + for (int i = 0; i < pairCount; i++) + { + pairs.Add(new ReachablePairResult + { + EntryMethodKey = $"entry-{i}", + SinkMethodKey = $"sink-{i % 50}", + IsReachable = i % 3 == 0, + PathLength = i % 3 == 0 ? 3 : null, + Confidence = 0.9, + ComputedAt = DateTimeOffset.UtcNow, + }); + } + + return new ReachabilityCacheEntry + { + ServiceId = serviceId, + GraphHash = graphHash, + ReachablePairs = pairs, + EntryPointCount = pairCount / 10, + SinkCount = 50, + }; + } + + private static MockGraphSnapshot CreateMockGraphSnapshot(int nodeCount, int edgeCount, int seed) + { + var random = new Random(seed); + var nodeKeys = new HashSet( + Enumerable.Range(0, nodeCount).Select(i => $"node-{i}")); + + var edges = new List(); + var nodeList = nodeKeys.ToList(); + + for (int i = 0; i < edgeCount; i++) + { + var from = nodeList[random.Next(nodeCount)]; + var to = nodeList[random.Next(nodeCount)]; + edges.Add(new GraphEdge(from, to)); + } + + var entryPoints = new HashSet( + Enumerable.Range(0, nodeCount / 100).Select(i => $"node-{i}")); + + return new MockGraphSnapshot(nodeKeys, edges, entryPoints, seed); + } + + private static MockGraphSnapshot CreateModifiedGraphSnapshot( + MockGraphSnapshot previous, + int changedNodes, + int seed) + { + var random = new Random(seed); + var nodeKeys = new HashSet(previous.NodeKeys); + var edges = previous.Edges.ToList(); + + // Remove some nodes + var toRemove = nodeKeys.Take(changedNodes / 2).ToList(); + foreach (var node in toRemove) + { + nodeKeys.Remove(node); + edges.RemoveAll(e => e.CallerKey == node || e.CalleeKey == node); + } + + // Add some new nodes + for (int i = 0; i < changedNodes / 2; i++) + { + nodeKeys.Add($"new-node-{seed}-{i}"); + } + + // Add some new edges + var nodeList = nodeKeys.ToList(); + var newEdgeCount = Math.Min(changedNodes * 2, nodeList.Count); + for (int i = 0; i < newEdgeCount; i++) + { + var from = nodeList[random.Next(nodeList.Count)]; + var to = nodeList[random.Next(nodeList.Count)]; + edges.Add(new GraphEdge(from, to)); + } + + var entryPoints = new HashSet( + nodeKeys.Take(nodeKeys.Count / 100)); + + return new MockGraphSnapshot(nodeKeys, edges, entryPoints, seed); + } + + private static GraphDelta ComputeDelta(MockGraphSnapshot previous, MockGraphSnapshot current) + { + var addedNodes = new HashSet(current.NodeKeys.Except(previous.NodeKeys)); + var removedNodes = new HashSet(previous.NodeKeys.Except(current.NodeKeys)); + + var prevEdgeSet = new HashSet(previous.Edges); + var currEdgeSet = new HashSet(current.Edges); + + var addedEdges = currEdgeSet.Except(prevEdgeSet).ToList(); + var removedEdges = prevEdgeSet.Except(currEdgeSet).ToList(); + + var affected = new HashSet(addedNodes); + affected.UnionWith(removedNodes); + foreach (var e in addedEdges) + { + affected.Add(e.CallerKey); + affected.Add(e.CalleeKey); + } + foreach (var e in removedEdges) + { + affected.Add(e.CallerKey); + affected.Add(e.CalleeKey); + } + + return new GraphDelta + { + AddedNodes = addedNodes, + RemovedNodes = removedNodes, + AddedEdges = addedEdges, + RemovedEdges = removedEdges, + AffectedMethodKeys = affected, + PreviousHash = previous.Hash, + CurrentHash = current.Hash, + }; + } + + private static HashSet CalculateImpactSet( + MockGraphSnapshot graph, + HashSet addedNodes, + HashSet removedNodes) + { + var impactSet = new HashSet(addedNodes); + impactSet.UnionWith(removedNodes); + + // BFS to find affected neighbors + var visited = new HashSet(impactSet); + var queue = new Queue(impactSet); + const int maxDepth = 2; + + for (int depth = 0; depth < maxDepth && queue.Count > 0; depth++) + { + var levelSize = queue.Count; + for (int i = 0; i < levelSize; i++) + { + var node = queue.Dequeue(); + foreach (var edge in graph.Edges) + { + if (edge.CallerKey == node && !visited.Contains(edge.CalleeKey)) + { + visited.Add(edge.CalleeKey); + impactSet.Add(edge.CalleeKey); + queue.Enqueue(edge.CalleeKey); + } + else if (edge.CalleeKey == node && !visited.Contains(edge.CallerKey)) + { + visited.Add(edge.CallerKey); + impactSet.Add(edge.CallerKey); + queue.Enqueue(edge.CallerKey); + } + } + } + } + + return impactSet; + } + + private static IReadOnlyList CreateNodeIds(int count, string prefix) + { + return Enumerable.Range(0, count).Select(i => $"{prefix}-{i}").ToList(); + } + + private static IReadOnlyList CreateReachablePairResults( + int count, + double reachableRatio) + { + var random = new Random(42); + var results = new List(); + + for (int i = 0; i < count; i++) + { + var isReachable = random.NextDouble() < reachableRatio; + results.Add(new ReachablePairResult + { + EntryMethodKey = $"entry-{i}", + SinkMethodKey = $"sink-{i % 50}", + IsReachable = isReachable, + PathLength = isReachable ? 3 : null, + Confidence = 0.9, + ComputedAt = DateTimeOffset.UtcNow, + }); + } + + return results; + } + + private static IReadOnlyList CreateReachablePairResultsWithChanges( + IReadOnlyList previous, + double changeRatio) + { + var random = new Random(43); + var results = new List(); + + foreach (var prev in previous) + { + var shouldFlip = random.NextDouble() < changeRatio; + var newReachable = shouldFlip ? !prev.IsReachable : prev.IsReachable; + + results.Add(new ReachablePairResult + { + EntryMethodKey = prev.EntryMethodKey, + SinkMethodKey = prev.SinkMethodKey, + IsReachable = newReachable, + PathLength = newReachable ? prev.PathLength : null, + Confidence = prev.Confidence, + ComputedAt = DateTimeOffset.UtcNow, + }); + } + + return results; + } + + private static IReadOnlyList CreateIncrementalResults( + int count, + double reachableRatio) + { + return CreateReachablePairResults(count, reachableRatio); + } + + private static IReadOnlyList CreateStateFlips(int count, bool newReachable) + { + return Enumerable.Range(0, count) + .Select(i => new StateFlip + { + EntryMethodKey = $"entry-{i}", + SinkMethodKey = $"sink-{i % 10}", + CveId = $"CVE-2024-{1000 + i}", + WasReachable = !newReachable, + IsReachable = newReachable, + Confidence = 0.9, + }) + .ToList(); + } + + private static IPrReachabilityGate CreatePrReachabilityGate() + { + var options = new PrReachabilityGateOptions + { + Enabled = true, + BlockOnNewReachable = true, + MinConfidenceThreshold = 0.8, + MaxNewReachableCount = 10, + IncludeAnnotations = true, + }; + + return new PrReachabilityGate( + Microsoft.Extensions.Options.Options.Create(options), + NullLogger.Instance); + } + + #endregion +} + +#region Mock/Test Implementations + +/// +/// In-memory implementation of reachability cache for benchmarking. +/// +file sealed class InMemoryReachabilityCache : IReachabilityCache +{ + private readonly Dictionary _cache = new(); + private readonly object _lock = new(); + + public Task GetAsync( + string serviceId, + string graphHash, + CancellationToken cancellationToken = default) + { + lock (_lock) + { + var key = $"{serviceId}:{graphHash}"; + return Task.FromResult(_cache.TryGetValue(key, out var result) ? result : null); + } + } + + public Task SetAsync( + ReachabilityCacheEntry entry, + CancellationToken cancellationToken = default) + { + lock (_lock) + { + var key = $"{entry.ServiceId}:{entry.GraphHash}"; + _cache[key] = new CachedReachabilityResult + { + ServiceId = entry.ServiceId, + GraphHash = entry.GraphHash, + ReachablePairs = entry.ReachablePairs, + CachedAt = DateTimeOffset.UtcNow, + EntryPointCount = entry.EntryPointCount, + SinkCount = entry.SinkCount, + }; + } + + return Task.CompletedTask; + } + + public Task GetReachablePairAsync( + string serviceId, + string entryMethodKey, + string sinkMethodKey, + CancellationToken cancellationToken = default) + { + lock (_lock) + { + foreach (var cached in _cache.Values) + { + if (cached.ServiceId == serviceId) + { + var result = cached.ReachablePairs.FirstOrDefault(r => + r.EntryMethodKey == entryMethodKey && r.SinkMethodKey == sinkMethodKey); + if (result is not null) + { + return Task.FromResult(result); + } + } + } + } + + return Task.FromResult(null); + } + + public Task InvalidateAsync( + string serviceId, + IEnumerable affectedMethodKeys, + CancellationToken cancellationToken = default) + { + var keys = affectedMethodKeys.ToHashSet(); + var invalidated = 0; + + lock (_lock) + { + var toRemove = new List(); + foreach (var (cacheKey, cached) in _cache) + { + if (cached.ServiceId == serviceId) + { + var affected = cached.ReachablePairs.Any(r => + keys.Contains(r.EntryMethodKey) || + keys.Contains(r.SinkMethodKey)); + + if (affected) + { + toRemove.Add(cacheKey); + invalidated++; + } + } + } + + foreach (var key in toRemove) + { + _cache.Remove(key); + } + } + + return Task.FromResult(invalidated); + } + + public Task InvalidateAllAsync( + string serviceId, + CancellationToken cancellationToken = default) + { + lock (_lock) + { + var toRemove = _cache.Keys + .Where(k => k.StartsWith($"{serviceId}:")) + .ToList(); + + foreach (var key in toRemove) + { + _cache.Remove(key); + } + } + + return Task.CompletedTask; + } + + public Task GetStatisticsAsync( + string serviceId, + CancellationToken cancellationToken = default) + { + lock (_lock) + { + var entries = _cache.Values + .Where(c => c.ServiceId == serviceId) + .ToList(); + + return Task.FromResult(new CacheStatistics + { + ServiceId = serviceId, + CachedPairCount = entries.Sum(e => e.ReachablePairs.Count), + HitCount = 0, + MissCount = 0, + LastPopulatedAt = entries.MaxBy(e => e.CachedAt)?.CachedAt, + }); + } + } +} + +/// +/// Mock graph snapshot for benchmarking. +/// +file sealed class MockGraphSnapshot : IGraphSnapshot +{ + public IReadOnlySet NodeKeys { get; } + public IReadOnlyList Edges { get; } + public IReadOnlySet EntryPoints { get; } + public string Hash { get; } + + public MockGraphSnapshot( + IReadOnlySet nodeKeys, + IReadOnlyList edges, + IReadOnlySet entryPoints, + int seed) + { + NodeKeys = nodeKeys; + Edges = edges; + EntryPoints = entryPoints; + Hash = $"hash-{seed}-{nodeKeys.Count}-{edges.Count}"; + } +} + +#endregion diff --git a/src/Scanner/__Tests/StellaOps.Scanner.Reachability.Tests/PrReachabilityGateTests.cs b/src/Scanner/__Tests/StellaOps.Scanner.Reachability.Tests/PrReachabilityGateTests.cs new file mode 100644 index 000000000..a16f764e8 --- /dev/null +++ b/src/Scanner/__Tests/StellaOps.Scanner.Reachability.Tests/PrReachabilityGateTests.cs @@ -0,0 +1,400 @@ +// ----------------------------------------------------------------------------- +// PrReachabilityGateTests.cs +// Sprint: SPRINT_3700_0006_0001_incremental_cache (CACHE-014) +// Description: Unit tests for PR reachability gate. +// ----------------------------------------------------------------------------- + +using System; +using System.Collections.Generic; +using FluentAssertions; +using Microsoft.Extensions.Logging.Abstractions; +using Microsoft.Extensions.Options; +using StellaOps.Scanner.Reachability.Cache; +using Xunit; + +namespace StellaOps.Scanner.Reachability.Tests; + +public sealed class PrReachabilityGateTests +{ + private readonly PrReachabilityGate _gate; + private readonly PrReachabilityGateOptions _options; + + public PrReachabilityGateTests() + { + _options = new PrReachabilityGateOptions(); + var optionsMonitor = new TestOptionsMonitor(_options); + _gate = new PrReachabilityGate(optionsMonitor, NullLogger.Instance); + } + + [Fact] + public void EvaluateFlips_NoFlips_ReturnsPass() + { + // Arrange + var stateFlips = StateFlipResult.Empty; + + // Act + var result = _gate.EvaluateFlips(stateFlips, wasIncremental: true, savingsRatio: 0.8, TimeSpan.FromMilliseconds(100)); + + // Assert + result.Passed.Should().BeTrue(); + result.Reason.Should().Be("No reachability changes"); + result.Decision.NewReachableCount.Should().Be(0); + result.Decision.MitigatedCount.Should().Be(0); + } + + [Fact] + public void EvaluateFlips_NewReachable_ReturnsBlock() + { + // Arrange + var stateFlips = new StateFlipResult + { + NewlyReachable = new List + { + new StateFlip + { + EntryMethodKey = "Controller.Get", + SinkMethodKey = "Vulnerable.Execute", + IsReachable = true, + Confidence = 0.9 + } + }, + NewlyUnreachable = [] + }; + + // Act + var result = _gate.EvaluateFlips(stateFlips, wasIncremental: true, savingsRatio: 0.7, TimeSpan.FromMilliseconds(150)); + + // Assert + result.Passed.Should().BeFalse(); + result.Reason.Should().Contain("1 vulnerabilities became reachable"); + result.Decision.NewReachableCount.Should().Be(1); + result.Decision.BlockingFlips.Should().HaveCount(1); + } + + [Fact] + public void EvaluateFlips_OnlyMitigated_ReturnsPass() + { + // Arrange + var stateFlips = new StateFlipResult + { + NewlyReachable = [], + NewlyUnreachable = new List + { + new StateFlip + { + EntryMethodKey = "Controller.Get", + SinkMethodKey = "Vulnerable.Execute", + IsReachable = false, + WasReachable = true + } + } + }; + + // Act + var result = _gate.EvaluateFlips(stateFlips, wasIncremental: true, savingsRatio: 0.9, TimeSpan.FromMilliseconds(50)); + + // Assert + result.Passed.Should().BeTrue(); + result.Reason.Should().Contain("mitigated"); + result.Decision.MitigatedCount.Should().Be(1); + } + + [Fact] + public void EvaluateFlips_GateDisabled_AlwaysPasses() + { + // Arrange + _options.Enabled = false; + + var stateFlips = new StateFlipResult + { + NewlyReachable = new List + { + new StateFlip + { + EntryMethodKey = "Controller.Get", + SinkMethodKey = "Vulnerable.Execute", + IsReachable = true + } + } + }; + + // Act + var result = _gate.EvaluateFlips(stateFlips, wasIncremental: true, savingsRatio: 0.5, TimeSpan.FromMilliseconds(100)); + + // Assert + result.Passed.Should().BeTrue(); + result.Reason.Should().Be("PR gate is disabled"); + } + + [Fact] + public void EvaluateFlips_LowConfidence_Excluded() + { + // Arrange + _options.RequireMinimumConfidence = true; + _options.MinimumConfidenceThreshold = 0.8; + + var stateFlips = new StateFlipResult + { + NewlyReachable = new List + { + new StateFlip + { + EntryMethodKey = "Controller.Get", + SinkMethodKey = "Vulnerable.Execute", + IsReachable = true, + Confidence = 0.5 // Below threshold + } + } + }; + + // Act + var result = _gate.EvaluateFlips(stateFlips, wasIncremental: true, savingsRatio: 0.8, TimeSpan.FromMilliseconds(100)); + + // Assert + result.Passed.Should().BeTrue(); // Should pass because low confidence path is excluded + result.Decision.BlockingFlips.Should().BeEmpty(); + } + + [Fact] + public void EvaluateFlips_MaxNewReachableThreshold_AllowsUnderThreshold() + { + // Arrange + _options.MaxNewReachablePaths = 2; + + var stateFlips = new StateFlipResult + { + NewlyReachable = new List + { + new StateFlip + { + EntryMethodKey = "A.Method", + SinkMethodKey = "Vuln1", + IsReachable = true, + Confidence = 1.0 + }, + new StateFlip + { + EntryMethodKey = "B.Method", + SinkMethodKey = "Vuln2", + IsReachable = true, + Confidence = 1.0 + } + } + }; + + // Act + var result = _gate.EvaluateFlips(stateFlips, wasIncremental: true, savingsRatio: 0.7, TimeSpan.FromMilliseconds(200)); + + // Assert + result.Passed.Should().BeTrue(); // 2 == threshold, so should pass + } + + [Fact] + public void EvaluateFlips_MaxNewReachableThreshold_BlocksOverThreshold() + { + // Arrange + _options.MaxNewReachablePaths = 1; + + var stateFlips = new StateFlipResult + { + NewlyReachable = new List + { + new StateFlip + { + EntryMethodKey = "A.Method", + SinkMethodKey = "Vuln1", + IsReachable = true, + Confidence = 1.0 + }, + new StateFlip + { + EntryMethodKey = "B.Method", + SinkMethodKey = "Vuln2", + IsReachable = true, + Confidence = 1.0 + } + } + }; + + // Act + var result = _gate.EvaluateFlips(stateFlips, wasIncremental: true, savingsRatio: 0.6, TimeSpan.FromMilliseconds(200)); + + // Assert + result.Passed.Should().BeFalse(); // 2 > 1, so should block + } + + [Fact] + public void EvaluateFlips_Annotations_GeneratedForBlockingFlips() + { + // Arrange + _options.AddAnnotations = true; + _options.MaxAnnotations = 5; + + var stateFlips = new StateFlipResult + { + NewlyReachable = new List + { + new StateFlip + { + EntryMethodKey = "Controller.Get", + SinkMethodKey = "Vulnerable.Execute", + IsReachable = true, + Confidence = 1.0, + SourceFile = "Controllers/MyController.cs", + StartLine = 42, + EndLine = 45 + } + } + }; + + // Act + var result = _gate.EvaluateFlips(stateFlips, wasIncremental: true, savingsRatio: 0.8, TimeSpan.FromMilliseconds(100)); + + // Assert + result.Annotations.Should().HaveCount(1); + result.Annotations[0].Level.Should().Be(PrAnnotationLevel.Error); + result.Annotations[0].FilePath.Should().Be("Controllers/MyController.cs"); + result.Annotations[0].StartLine.Should().Be(42); + } + + [Fact] + public void EvaluateFlips_AnnotationsDisabled_NoAnnotations() + { + // Arrange + _options.AddAnnotations = false; + + var stateFlips = new StateFlipResult + { + NewlyReachable = new List + { + new StateFlip + { + EntryMethodKey = "Controller.Get", + SinkMethodKey = "Vulnerable.Execute", + IsReachable = true + } + } + }; + + // Act + var result = _gate.EvaluateFlips(stateFlips, wasIncremental: true, savingsRatio: 0.8, TimeSpan.FromMilliseconds(100)); + + // Assert + result.Annotations.Should().BeEmpty(); + } + + [Fact] + public void EvaluateFlips_SummaryMarkdown_Generated() + { + // Arrange + var stateFlips = new StateFlipResult + { + NewlyReachable = new List + { + new StateFlip + { + EntryMethodKey = "Controller.Get", + SinkMethodKey = "Vulnerable.Execute", + IsReachable = true, + Confidence = 0.95 + } + }, + NewlyUnreachable = new List + { + new StateFlip + { + EntryMethodKey = "Old.Entry", + SinkMethodKey = "Fixed.Sink", + IsReachable = false, + WasReachable = true + } + } + }; + + // Act + var result = _gate.EvaluateFlips(stateFlips, wasIncremental: true, savingsRatio: 0.75, TimeSpan.FromMilliseconds(150)); + + // Assert + result.SummaryMarkdown.Should().NotBeNullOrEmpty(); + result.SummaryMarkdown.Should().Contain("Reachability Gate"); + result.SummaryMarkdown.Should().Contain("New reachable paths"); + result.SummaryMarkdown.Should().Contain("Mitigated paths"); + } + + [Fact] + public void Evaluate_NullStateFlips_ReturnsPass() + { + // Arrange + var result = new IncrementalReachabilityResult + { + ServiceId = "test-service", + Results = [], + StateFlips = null, + FromCache = false, + WasIncremental = true, + SavingsRatio = 1.0, + Duration = TimeSpan.FromMilliseconds(50) + }; + + // Act + var gateResult = _gate.Evaluate(result); + + // Assert + gateResult.Passed.Should().BeTrue(); + gateResult.Reason.Should().Be("No state flip detection performed"); + } + + [Fact] + public void Evaluate_WithStateFlips_DelegatesCorrectly() + { + // Arrange + var stateFlips = new StateFlipResult + { + NewlyReachable = new List + { + new StateFlip + { + EntryMethodKey = "A", + SinkMethodKey = "B", + IsReachable = true, + Confidence = 1.0 + } + } + }; + + var analysisResult = new IncrementalReachabilityResult + { + ServiceId = "test-service", + Results = [], + StateFlips = stateFlips, + FromCache = false, + WasIncremental = true, + SavingsRatio = 0.9, + Duration = TimeSpan.FromMilliseconds(100) + }; + + // Act + var gateResult = _gate.Evaluate(analysisResult); + + // Assert + gateResult.Passed.Should().BeFalse(); + gateResult.Decision.WasIncremental.Should().BeTrue(); + gateResult.Decision.SavingsRatio.Should().Be(0.9); + } + + private sealed class TestOptionsMonitor : IOptionsMonitor + { + private readonly T _currentValue; + + public TestOptionsMonitor(T value) + { + _currentValue = value; + } + + public T CurrentValue => _currentValue; + + public T Get(string? name) => _currentValue; + + public IDisposable? OnChange(Action listener) => null; + } +}