feat: add security sink detection patterns for JavaScript/TypeScript

- Introduced `sink-detect.js` with various security sink detection patterns categorized by type (e.g., command injection, SQL injection, file operations). - Implemented functions to build a lookup map for fast sink detection and to match sink calls against known patterns. - Added `package-lock.json` for dependency management.
2025-12-22 23:21:21 +02:00
parent 3ba7157b00
commit 5146204f1b
529 changed files with 73579 additions and 5985 deletions
--- a/tests/chaos/StellaOps.Chaos.Router.Tests/BackpressureVerificationTests.cs
+++ b/tests/chaos/StellaOps.Chaos.Router.Tests/BackpressureVerificationTests.cs
@@ -0,0 +1,235 @@
+// -----------------------------------------------------------------------------
+// BackpressureVerificationTests.cs
+// Sprint: SPRINT_5100_0005_0001_router_chaos_suite
+// Task: T2 - Backpressure Verification Tests
+// Description: Verify router emits correct 429/503 responses with Retry-After.
+// -----------------------------------------------------------------------------
+
+using System.Net;
+using FluentAssertions;
+using StellaOps.Chaos.Router.Tests.Fixtures;
+
+namespace StellaOps.Chaos.Router.Tests;
+
+[Trait("Category", "Chaos")]
+[Trait("Category", "Router")]
+public class BackpressureVerificationTests : IClassFixture<RouterTestFixture>
+{
+    private readonly RouterTestFixture _fixture;
+
+    public BackpressureVerificationTests(RouterTestFixture fixture)
+    {
+        _fixture = fixture;
+    }
+
+    [Fact]
+    public async Task Router_UnderLoad_Returns429WithRetryAfter()
+    {
+        // Arrange
+        var client = _fixture.CreateClient();
+        var tasks = new List<Task<HttpResponseMessage>>();
+
+        // Act - Send burst of requests
+        for (var i = 0; i < 1000; i++)
+        {
+            tasks.Add(client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest()));
+        }
+
+        var responses = await Task.WhenAll(tasks);
+
+        // Assert - Some should be throttled
+        var throttled = responses.Where(r => r.StatusCode == HttpStatusCode.TooManyRequests).ToList();
+
+        // Note: This test may not trigger throttling if router is not under significant load
+        // In production chaos testing, we expect throttling to occur
+        if (throttled.Count > 0)
+        {
+            foreach (var response in throttled)
+            {
+                response.Headers.Should().Contain(
+                    h => h.Key.Equals("Retry-After", StringComparison.OrdinalIgnoreCase),
+                    "429 response should have Retry-After header");
+
+                var retryAfter = response.Headers.GetValues("Retry-After").FirstOrDefault();
+                retryAfter.Should().NotBeNull();
+
+                int.TryParse(retryAfter, out var seconds).Should().BeTrue(
+                    "Retry-After should be a valid integer");
+
+                seconds.Should().BeInRange(1, 300,
+                    "Retry-After should be reasonable (1-300 seconds)");
+            }
+        }
+    }
+
+    [Fact]
+    public async Task Router_UnderLoad_Returns503WhenOverloaded()
+    {
+        // Arrange
+        await _fixture.ConfigureLowLimitsAsync();
+        var client = _fixture.CreateClient();
+
+        // Act - Massive burst
+        var tasks = Enumerable.Range(0, 5000)
+            .Select(_ => client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest()));
+
+        var responses = await Task.WhenAll(tasks);
+
+        // Assert - Should see 503s when completely overloaded
+        var overloaded = responses.Where(r =>
+            r.StatusCode == HttpStatusCode.ServiceUnavailable).ToList();
+
+        // If we get 503s, they should have Retry-After headers
+        foreach (var response in overloaded)
+        {
+            response.Headers.Should().Contain(
+                h => h.Key.Equals("Retry-After", StringComparison.OrdinalIgnoreCase),
+                "503 response should have Retry-After header");
+        }
+    }
+
+    [Fact]
+    public async Task Router_RetryAfterHonored_EventuallySucceeds()
+    {
+        // Arrange
+        var client = _fixture.CreateClient();
+        var maxRetries = 5;
+        var retryCount = 0;
+        HttpResponseMessage? response = null;
+
+        // Act - Keep trying until success or max retries
+        while (retryCount < maxRetries)
+        {
+            response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
+
+            if (response.StatusCode == HttpStatusCode.TooManyRequests)
+            {
+                var retryAfterHeader = response.Headers.GetValues("Retry-After").FirstOrDefault();
+                if (int.TryParse(retryAfterHeader, out var retryAfter))
+                {
+                    // Wait for Retry-After duration (with cap for test performance)
+                    var waitTime = Math.Min(retryAfter, 5);
+                    await Task.Delay(TimeSpan.FromSeconds(waitTime + 1));
+                }
+                retryCount++;
+            }
+            else
+            {
+                break;
+            }
+        }
+
+        // Assert - Eventually should succeed
+        response.Should().NotBeNull();
+
+        if (retryCount > 0)
+        {
+            // If we were throttled, we should eventually succeed
+            response!.StatusCode.Should().BeOneOf(
+                HttpStatusCode.OK,
+                HttpStatusCode.Accepted,
+                "Request should eventually succeed after honoring Retry-After");
+        }
+    }
+
+    [Fact]
+    public async Task Router_ThrottleMetrics_AreExposed()
+    {
+        // Arrange
+        var client = _fixture.CreateClient();
+
+        // Trigger some requests (may or may not cause throttling)
+        var tasks = Enumerable.Range(0, 100)
+            .Select(_ => client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest()));
+        await Task.WhenAll(tasks);
+
+        // Act - Check metrics endpoint
+        var metricsResponse = await client.GetAsync("/metrics");
+
+        // Assert - Metrics endpoint should be accessible
+        if (metricsResponse.IsSuccessStatusCode)
+        {
+            var metrics = await metricsResponse.Content.ReadAsStringAsync();
+
+            // Basic metric checks (actual metric names depend on implementation)
+            // These are common Prometheus-style metric names
+            var expectedMetrics = new[]
+            {
+                "http_requests_total",
+                "http_request_duration",
+            };
+
+            // At least some metrics should be present
+            expectedMetrics.Any(m => metrics.Contains(m)).Should().BeTrue(
+                "Metrics endpoint should expose request metrics");
+        }
+    }
+
+    [Fact]
+    public async Task Router_ResponseHeaders_IncludeRateLimitInfo()
+    {
+        // Arrange
+        var client = _fixture.CreateClient();
+
+        // Act
+        var response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
+
+        // Assert - Check for rate limit headers (common patterns)
+        // These headers are optional but recommended for rate-limited APIs
+        var rateLimitHeaders = new[]
+        {
+            "X-RateLimit-Limit",
+            "X-RateLimit-Remaining",
+            "X-RateLimit-Reset",
+            "RateLimit-Limit",
+            "RateLimit-Remaining",
+            "RateLimit-Reset"
+        };
+
+        // Log which headers are present (for information)
+        var presentHeaders = rateLimitHeaders
+            .Where(h => response.Headers.Contains(h))
+            .ToList();
+
+        // This is informational - not all routers include these headers
+        Console.WriteLine($"Rate limit headers present: {string.Join(", ", presentHeaders)}");
+    }
+
+    [Theory]
+    [InlineData(10)]
+    [InlineData(50)]
+    [InlineData(100)]
+    public async Task Router_ConcurrentRequests_HandledGracefully(int concurrency)
+    {
+        // Arrange
+        var client = _fixture.CreateClient();
+
+        // Act - Send concurrent requests
+        var tasks = Enumerable.Range(0, concurrency)
+            .Select(_ => client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest()));
+
+        var responses = await Task.WhenAll(tasks);
+
+        // Assert - All responses should be valid HTTP responses
+        foreach (var response in responses)
+        {
+            var validStatuses = new[]
+            {
+                HttpStatusCode.OK,
+                HttpStatusCode.Accepted,
+                HttpStatusCode.TooManyRequests,
+                HttpStatusCode.ServiceUnavailable
+            };
+
+            response.StatusCode.Should().BeOneOf(validStatuses,
+                $"Response should be a valid status code for concurrency level {concurrency}");
+        }
+
+        // Calculate success rate
+        var successCount = responses.Count(r =>
+            r.StatusCode == HttpStatusCode.OK || r.StatusCode == HttpStatusCode.Accepted);
+
+        var successRate = (double)successCount / responses.Length;
+        Console.WriteLine($"Concurrency {concurrency}: Success rate = {successRate:P2}");
+    }
+}
--- a/tests/chaos/StellaOps.Chaos.Router.Tests/Fixtures/RouterTestFixture.cs
+++ b/tests/chaos/StellaOps.Chaos.Router.Tests/Fixtures/RouterTestFixture.cs
@@ -0,0 +1,124 @@
+// -----------------------------------------------------------------------------
+// RouterTestFixture.cs
+// Sprint: SPRINT_5100_0005_0001_router_chaos_suite
+// Task: T2 - Backpressure Verification Tests
+// Description: Test fixture for router chaos testing with Valkey support.
+// -----------------------------------------------------------------------------
+
+using System.Net.Http.Json;
+
+namespace StellaOps.Chaos.Router.Tests.Fixtures;
+
+/// <summary>
+/// Test fixture providing an HTTP client for router chaos testing.
+/// </summary>
+public class RouterTestFixture : IAsyncLifetime
+{
+    private readonly HttpClient _client;
+    private readonly string _routerUrl;
+
+    public RouterTestFixture()
+    {
+        _routerUrl = Environment.GetEnvironmentVariable("ROUTER_URL") ?? "http://localhost:8080";
+
+        _client = new HttpClient
+        {
+            BaseAddress = new Uri(_routerUrl),
+            Timeout = TimeSpan.FromSeconds(30)
+        };
+    }
+
+    public HttpClient CreateClient() => _client;
+
+    public string RouterUrl => _routerUrl;
+
+    /// <summary>
+    /// Configure router with lower limits for overload testing.
+    /// </summary>
+    public async Task ConfigureLowLimitsAsync()
+    {
+        // In real scenario, this would configure the router via admin endpoint
+        // For now, assume limits are pre-configured for chaos testing
+        await Task.CompletedTask;
+    }
+
+    /// <summary>
+    /// Create a scan request payload.
+    /// </summary>
+    public static HttpContent CreateScanRequest(string? scanId = null)
+    {
+        var request = new
+        {
+            image = "alpine:latest",
+            scanId = scanId ?? Guid.NewGuid().ToString(),
+            timestamp = DateTimeOffset.UtcNow.ToString("O")
+        };
+
+        return JsonContent.Create(request);
+    }
+
+    public Task InitializeAsync()
+    {
+        // Verify router is reachable
+        return Task.CompletedTask;
+    }
+
+    public Task DisposeAsync()
+    {
+        _client.Dispose();
+        return Task.CompletedTask;
+    }
+}
+
+/// <summary>
+/// Extended fixture with Valkey container support for failure injection.
+/// </summary>
+public class RouterWithValkeyFixture : RouterTestFixture
+{
+    private Testcontainers.Redis.RedisContainer? _valkeyContainer;
+    private bool _valkeyRunning;
+
+    public async Task StartValkeyAsync()
+    {
+        if (_valkeyContainer is null)
+        {
+            _valkeyContainer = new Testcontainers.Redis.RedisBuilder()
+                .WithImage("valkey/valkey:7-alpine")
+                .WithName($"chaos-valkey-{Guid.NewGuid():N}")
+                .Build();
+        }
+
+        if (!_valkeyRunning)
+        {
+            await _valkeyContainer.StartAsync();
+            _valkeyRunning = true;
+        }
+    }
+
+    public async Task StopValkeyAsync()
+    {
+        if (_valkeyContainer is not null && _valkeyRunning)
+        {
+            await _valkeyContainer.StopAsync();
+            _valkeyRunning = false;
+        }
+    }
+
+    public async Task ConfigureValkeyLatencyAsync(TimeSpan latency)
+    {
+        // Configure artificial latency via Valkey DEBUG SLEEP
+        // In production, use network simulation tools like tc or toxiproxy
+        await Task.CompletedTask;
+    }
+
+    public new async Task DisposeAsync()
+    {
+        if (_valkeyContainer is not null)
+        {
+            await _valkeyContainer.StopAsync();
+            await _valkeyContainer.DisposeAsync();
+        }
+
+        await base.DisposeAsync();
+    }
+}
--- a/tests/chaos/StellaOps.Chaos.Router.Tests/RecoveryTests.cs
+++ b/tests/chaos/StellaOps.Chaos.Router.Tests/RecoveryTests.cs
@@ -0,0 +1,298 @@
+// -----------------------------------------------------------------------------
+// RecoveryTests.cs
+// Sprint: SPRINT_5100_0005_0001_router_chaos_suite
+// Task: T3 - Recovery and Resilience Tests
+// Description: Test router recovery after load spikes.
+// -----------------------------------------------------------------------------
+
+using System.Collections.Concurrent;
+using System.Diagnostics;
+using System.Net;
+using FluentAssertions;
+using StellaOps.Chaos.Router.Tests.Fixtures;
+
+namespace StellaOps.Chaos.Router.Tests;
+
+[Trait("Category", "Chaos")]
+[Trait("Category", "Router")]
+[Trait("Category", "Recovery")]
+public class RecoveryTests : IClassFixture<RouterTestFixture>
+{
+    private readonly RouterTestFixture _fixture;
+
+    public RecoveryTests(RouterTestFixture fixture)
+    {
+        _fixture = fixture;
+    }
+
+    [Fact]
+    public async Task Router_AfterSpike_RecoveryWithin30Seconds()
+    {
+        // Arrange
+        var client = _fixture.CreateClient();
+        var stopwatch = Stopwatch.StartNew();
+
+        // Phase 1: Verify normal operation
+        var normalResponse = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
+        var normalWorking = normalResponse.IsSuccessStatusCode ||
+                           normalResponse.StatusCode == HttpStatusCode.TooManyRequests;
+
+        // Phase 2: Create load spike
+        await CreateLoadSpikeAsync(client, requestCount: 500, durationSeconds: 5);
+
+        // Phase 3: Measure recovery
+        var recovered = false;
+        var recoveryStart = Stopwatch.StartNew();
+
+        while (recoveryStart.Elapsed < TimeSpan.FromSeconds(60))
+        {
+            var response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
+
+            if (response.IsSuccessStatusCode)
+            {
+                recovered = true;
+                break;
+            }
+
+            // If throttled, wait before retry
+            if (response.StatusCode == HttpStatusCode.TooManyRequests)
+            {
+                var retryAfter = response.Headers.GetValues("Retry-After").FirstOrDefault();
+                if (int.TryParse(retryAfter, out var waitSeconds))
+                {
+                    await Task.Delay(TimeSpan.FromSeconds(Math.Min(waitSeconds, 5)));
+                }
+                else
+                {
+                    await Task.Delay(1000);
+                }
+            }
+            else
+            {
+                await Task.Delay(1000);
+            }
+        }
+
+        recoveryStart.Stop();
+
+        // Assert
+        if (normalWorking)
+        {
+            recovered.Should().BeTrue("Router should recover after spike");
+            recoveryStart.Elapsed.Should().BeLessThan(TimeSpan.FromSeconds(30),
+                "Recovery should happen within 30 seconds");
+        }
+
+        Console.WriteLine($"Recovery time: {recoveryStart.Elapsed.TotalSeconds:F2}s");
+    }
+
+    [Fact]
+    public async Task Router_NoDataLoss_DuringThrottling()
+    {
+        // Arrange
+        var client = _fixture.CreateClient();
+        var submittedIds = new ConcurrentBag<string>();
+        var successfulIds = new ConcurrentBag<string>();
+        var maxRetries = 10;
+
+        // Act - Submit requests with tracking and retry on throttle
+        var tasks = Enumerable.Range(0, 100).Select(async i =>
+        {
+            var scanId = Guid.NewGuid().ToString();
+            submittedIds.Add(scanId);
+
+            var retryCount = 0;
+            HttpResponseMessage? response = null;
+
+            while (retryCount < maxRetries)
+            {
+                response = await client.PostAsync("/api/v1/scan",
+                    RouterTestFixture.CreateScanRequest(scanId));
+
+                if (response.StatusCode == HttpStatusCode.TooManyRequests)
+                {
+                    var retryAfter = response.Headers.GetValues("Retry-After").FirstOrDefault();
+                    var waitSeconds = int.TryParse(retryAfter, out var ra) ? ra : 2;
+                    await Task.Delay(TimeSpan.FromSeconds(Math.Min(waitSeconds, 5)));
+                    retryCount++;
+                }
+                else
+                {
+                    break;
+                }
+            }
+
+            if (response is not null && response.IsSuccessStatusCode)
+            {
+                successfulIds.Add(scanId);
+            }
+
+            return response;
+        });
+
+        await Task.WhenAll(tasks);
+
+        // Assert
+        var successRate = (double)successfulIds.Count / submittedIds.Count;
+        Console.WriteLine($"Success rate with retries: {successRate:P2} ({successfulIds.Count}/{submittedIds.Count})");
+
+        // All submitted requests should eventually succeed with proper retry logic
+        successRate.Should().BeGreaterOrEqualTo(0.9,
+            "At least 90% of requests should succeed with retry logic");
+    }
+
+    [Fact]
+    public async Task Router_GracefulDegradation_MaintainsPartialService()
+    {
+        // Arrange
+        var client = _fixture.CreateClient();
+        var cts = new CancellationTokenSource();
+
+        // Start continuous background load
+        var backgroundTask = CreateContinuousLoadAsync(client, cts.Token);
+
+        // Allow load to stabilize
+        await Task.Delay(3000);
+
+        // Check that some requests are still succeeding
+        var successCount = 0;
+        var totalChecks = 10;
+
+        for (var i = 0; i < totalChecks; i++)
+        {
+            var response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
+            if (response.IsSuccessStatusCode || response.StatusCode == HttpStatusCode.Accepted)
+            {
+                successCount++;
+            }
+            await Task.Delay(100);
+        }
+
+        cts.Cancel();
+        try { await backgroundTask; } catch (OperationCanceledException) { }
+
+        // Assert
+        successCount.Should().BeGreaterThan(0,
+            "Router should maintain partial service under load");
+
+        Console.WriteLine($"Partial service check: {successCount}/{totalChecks} successful");
+    }
+
+    [Fact]
+    public async Task Router_LatencyBounded_DuringSpike()
+    {
+        // Arrange
+        var client = _fixture.CreateClient();
+        var latencies = new ConcurrentBag<long>();
+
+        // Create background load
+        var cts = new CancellationTokenSource();
+        var loadTask = CreateContinuousLoadAsync(client, cts.Token);
+
+        // Measure latencies during load
+        for (var i = 0; i < 20; i++)
+        {
+            var sw = Stopwatch.StartNew();
+            var response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
+            sw.Stop();
+
+            latencies.Add(sw.ElapsedMilliseconds);
+            await Task.Delay(100);
+        }
+
+        cts.Cancel();
+        try { await loadTask; } catch (OperationCanceledException) { }
+
+        // Assert
+        var avgLatency = latencies.Average();
+        var p95Latency = latencies.OrderBy(l => l).ElementAt((int)(latencies.Count * 0.95));
+
+        Console.WriteLine($"Latency during load: Avg={avgLatency:F0}ms, P95={p95Latency}ms");
+
+        // P95 latency should be bounded (allowing for throttle wait times)
+        p95Latency.Should().BeLessThan(10000,
+            "95th percentile latency should be bounded under load");
+    }
+
+    [Fact]
+    public async Task Router_QueueDepth_DoesNotGrowUnbounded()
+    {
+        // Arrange
+        var client = _fixture.CreateClient();
+
+        // Create significant load
+        var tasks = Enumerable.Range(0, 200)
+            .Select(_ => client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest()));
+
+        await Task.WhenAll(tasks);
+
+        // Check metrics for queue depth
+        var metricsResponse = await client.GetAsync("/metrics");
+
+        if (metricsResponse.IsSuccessStatusCode)
+        {
+            var metrics = await metricsResponse.Content.ReadAsStringAsync();
+
+            // Look for queue depth metric
+            if (metrics.Contains("queue_depth") || metrics.Contains("pending_requests"))
+            {
+                // Queue depth should be reasonable after burst
+                Console.WriteLine("Queue metrics found in /metrics endpoint");
+            }
+        }
+
+        // If we got here without timeout, queue is not growing unbounded
+    }
+
+    private static async Task CreateLoadSpikeAsync(HttpClient client, int requestCount, int durationSeconds)
+    {
+        var cts = new CancellationTokenSource(TimeSpan.FromSeconds(durationSeconds));
+        var tasks = new List<Task>();
+
+        try
+        {
+            for (var i = 0; i < requestCount && !cts.Token.IsCancellationRequested; i++)
+            {
+                tasks.Add(client.PostAsync("/api/v1/scan",
+                    RouterTestFixture.CreateScanRequest(),
+                    cts.Token));
+
+                // Small delay to spread requests
+                if (i % 50 == 0)
+                {
+                    await Task.Delay(10, cts.Token);
+                }
+            }
+
+            await Task.WhenAll(tasks.Where(t => !t.IsCanceled));
+        }
+        catch (OperationCanceledException)
+        {
+            // Expected when duration expires
+        }
+    }
+
+    private static async Task CreateContinuousLoadAsync(HttpClient client, CancellationToken ct)
+    {
+        while (!ct.IsCancellationRequested)
+        {
+            try
+            {
+                // Fire-and-forget requests
+                _ = client.PostAsync("/api/v1/scan",
+                    RouterTestFixture.CreateScanRequest(),
+                    ct);
+
+                await Task.Delay(50, ct);
+            }
+            catch (OperationCanceledException)
+            {
+                break;
+            }
+            catch
+            {
+                // Ignore errors during load generation
+            }
+        }
+    }
+}
--- a/tests/chaos/StellaOps.Chaos.Router.Tests/StellaOps.Chaos.Router.Tests.csproj
+++ b/tests/chaos/StellaOps.Chaos.Router.Tests/StellaOps.Chaos.Router.Tests.csproj
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>net10.0</TargetFramework>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+    <IsPackable>false</IsPackable>
+    <RootNamespace>StellaOps.Chaos.Router.Tests</RootNamespace>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="FluentAssertions" Version="8.0.0" />
+    <PackageReference Include="Microsoft.AspNetCore.Mvc.Testing" Version="10.0.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.13.0" />
+    <PackageReference Include="Testcontainers" Version="4.3.0" />
+    <PackageReference Include="Testcontainers.Redis" Version="4.3.0" />
+    <PackageReference Include="xunit" Version="3.0.0" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="3.0.0">
+      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
+      <PrivateAssets>all</PrivateAssets>
+    </PackageReference>
+  </ItemGroup>
+
+</Project>
--- a/tests/chaos/StellaOps.Chaos.Router.Tests/ValkeyFailureTests.cs
+++ b/tests/chaos/StellaOps.Chaos.Router.Tests/ValkeyFailureTests.cs
@@ -0,0 +1,217 @@
+// -----------------------------------------------------------------------------
+// ValkeyFailureTests.cs
+// Sprint: SPRINT_5100_0005_0001_router_chaos_suite
+// Task: T4 - Valkey Failure Injection
+// Description: Test router behavior when Valkey cache fails.
+// -----------------------------------------------------------------------------
+
+using System.Diagnostics;
+using System.Net;
+using FluentAssertions;
+using StellaOps.Chaos.Router.Tests.Fixtures;
+
+namespace StellaOps.Chaos.Router.Tests;
+
+[Trait("Category", "Chaos")]
+[Trait("Category", "Valkey")]
+[Collection("ValkeyTests")]
+public class ValkeyFailureTests : IClassFixture<RouterWithValkeyFixture>, IAsyncLifetime
+{
+    private readonly RouterWithValkeyFixture _fixture;
+
+    public ValkeyFailureTests(RouterWithValkeyFixture fixture)
+    {
+        _fixture = fixture;
+    }
+
+    public async Task InitializeAsync()
+    {
+        await _fixture.StartValkeyAsync();
+    }
+
+    public Task DisposeAsync()
+    {
+        return Task.CompletedTask;
+    }
+
+    [Fact]
+    public async Task Router_ValkeyDown_FallsBackToLocal()
+    {
+        // Arrange
+        var client = _fixture.CreateClient();
+
+        // Verify normal operation with Valkey
+        var response1 = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
+        var initialSuccess = response1.IsSuccessStatusCode ||
+                            response1.StatusCode == HttpStatusCode.TooManyRequests;
+
+        // Kill Valkey
+        await _fixture.StopValkeyAsync();
+
+        // Wait for router to detect Valkey is down
+        await Task.Delay(2000);
+
+        // Act - Router should degrade gracefully
+        var response2 = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
+
+        // Assert - Should still work with local rate limiter or return controlled error
+        var validStatuses = new[]
+        {
+            HttpStatusCode.OK,
+            HttpStatusCode.Accepted,
+            HttpStatusCode.TooManyRequests,
+            HttpStatusCode.ServiceUnavailable
+        };
+
+        response2.StatusCode.Should().BeOneOf(validStatuses,
+            "Router should fall back to local rate limiting when Valkey is down");
+
+        // Restore Valkey for other tests
+        await _fixture.StartValkeyAsync();
+    }
+
+    [Fact]
+    public async Task Router_ValkeyReconnect_ResumesDistributedLimiting()
+    {
+        // Arrange
+        var client = _fixture.CreateClient();
+
+        // Kill and restart Valkey
+        await _fixture.StopValkeyAsync();
+        await Task.Delay(3000);
+        await _fixture.StartValkeyAsync();
+        await Task.Delay(2000);  // Allow reconnection
+
+        // Act - Send some requests after Valkey restart
+        var responses = new List<HttpResponseMessage>();
+        for (var i = 0; i < 10; i++)
+        {
+            responses.Add(await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest()));
+            await Task.Delay(100);
+        }
+
+        // Assert - Requests should be processed
+        var successCount = responses.Count(r =>
+            r.IsSuccessStatusCode || r.StatusCode == HttpStatusCode.TooManyRequests);
+
+        successCount.Should().BeGreaterThan(0,
+            "Router should resume processing after Valkey reconnect");
+
+        // Optional: Check metrics for distributed limiting active
+        var metricsResponse = await client.GetAsync("/metrics");
+        if (metricsResponse.IsSuccessStatusCode)
+        {
+            var metrics = await metricsResponse.Content.ReadAsStringAsync();
+            Console.WriteLine("Metrics available after Valkey reconnect");
+            // Log whether distributed backend is active
+        }
+    }
+
+    [Fact]
+    public async Task Router_ValkeyLatency_DoesNotBlock()
+    {
+        // Arrange
+        await _fixture.ConfigureValkeyLatencyAsync(TimeSpan.FromSeconds(2));
+
+        var client = _fixture.CreateClient();
+        var stopwatch = Stopwatch.StartNew();
+
+        // Act
+        var response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
+
+        stopwatch.Stop();
+
+        // Assert - Request should complete without waiting for slow Valkey
+        // The router should have a timeout for cache operations
+        stopwatch.Elapsed.Should().BeLessThan(TimeSpan.FromSeconds(5),
+            "Slow Valkey should not significantly block request processing");
+
+        // Request should still be valid
+        var validStatuses = new[]
+        {
+            HttpStatusCode.OK,
+            HttpStatusCode.Accepted,
+            HttpStatusCode.TooManyRequests,
+            HttpStatusCode.ServiceUnavailable
+        };
+
+        response.StatusCode.Should().BeOneOf(validStatuses);
+
+        Console.WriteLine($"Request completed in {stopwatch.ElapsedMilliseconds}ms with slow Valkey");
+    }
+
+    [Fact]
+    public async Task Router_ValkeyFlap_HandlesGracefully()
+    {
+        // Arrange
+        var client = _fixture.CreateClient();
+        var successCount = 0;
+        var errorCount = 0;
+
+        // Act - Simulate Valkey flapping
+        for (var cycle = 0; cycle < 3; cycle++)
+        {
+            // Valkey up
+            await _fixture.StartValkeyAsync();
+            await Task.Delay(1000);
+
+            for (var i = 0; i < 5; i++)
+            {
+                var response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
+                if (response.IsSuccessStatusCode) successCount++;
+                else errorCount++;
+            }
+
+            // Valkey down
+            await _fixture.StopValkeyAsync();
+            await Task.Delay(1000);
+
+            for (var i = 0; i < 5; i++)
+            {
+                var response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
+                if (response.IsSuccessStatusCode) successCount++;
+                else if (response.StatusCode == HttpStatusCode.TooManyRequests)
+                    successCount++; // Throttled is acceptable
+                else errorCount++;
+            }
+        }
+
+        // Assert
+        var totalRequests = successCount + errorCount;
+        var successRate = (double)successCount / totalRequests;
+
+        Console.WriteLine($"Valkey flap test: {successCount}/{totalRequests} successful ({successRate:P2})");
+
+        successRate.Should().BeGreaterOrEqualTo(0.5,
+            "Router should handle at least 50% of requests during Valkey flapping");
+    }
+
+    [Fact]
+    public async Task Router_ValkeyConnectionExhaustion_DoesNotCrash()
+    {
+        // Arrange
+        var client = _fixture.CreateClient();
+
+        // Create many parallel requests that might exhaust Valkey connections
+        var tasks = Enumerable.Range(0, 500)
+            .Select(_ => client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest()));
+
+        // Act
+        var responses = await Task.WhenAll(tasks);
+
+        // Assert - Router should not crash
+        var validResponses = responses.Count(r =>
+            r.StatusCode == HttpStatusCode.OK ||
+            r.StatusCode == HttpStatusCode.Accepted ||
+            r.StatusCode == HttpStatusCode.TooManyRequests ||
+            r.StatusCode == HttpStatusCode.ServiceUnavailable);
+
+        validResponses.Should().Be(responses.Length,
+            "All responses should be valid HTTP responses");
+
+        // Verify router is still responsive after burst
+        var healthCheck = await client.GetAsync("/health");
+        // Router health endpoint should respond
+        Console.WriteLine($"Health check after burst: {healthCheck.StatusCode}");
+    }
+}
--- a/tests/integration/StellaOps.Integration.Platform/PostgresOnlyStartupTests.cs
+++ b/tests/integration/StellaOps.Integration.Platform/PostgresOnlyStartupTests.cs
@@ -0,0 +1,248 @@
+// -----------------------------------------------------------------------------
+// PostgresOnlyStartupTests.cs
+// Sprint: SPRINT_5100_0001_0001_mongodb_cli_cleanup_consolidation
+// Task: T1.13 - PostgreSQL-only Platform Startup Test
+// Description: Validates platform can start with PostgreSQL-only infrastructure.
+// -----------------------------------------------------------------------------
+
+using System.Reflection;
+using StellaOps.Infrastructure.Postgres.Testing;
+using Testcontainers.PostgreSql;
+
+namespace StellaOps.Integration.Platform;
+
+/// <summary>
+/// Integration tests validating PostgreSQL-only platform startup.
+/// </summary>
+/// <remarks>
+/// T1.13-AC1: Platform starts successfully with PostgreSQL only
+/// T1.13-AC2: All services connect to PostgreSQL correctly
+/// T1.13-AC3: Schema migrations run successfully
+/// T1.13-AC4: No MongoDB connection attempts in logs
+/// </remarks>
+[Trait("Category", "Integration")]
+[Trait("Category", "Platform")]
+[Trait("Category", "PostgresOnly")]
+public class PostgresOnlyStartupTests : IAsyncLifetime
+{
+    private PostgreSqlContainer? _container;
+    private string? _connectionString;
+
+    public async Task InitializeAsync()
+    {
+        _container = new PostgreSqlBuilder()
+            .WithImage("postgres:16-alpine")
+            .Build();
+
+        await _container.StartAsync();
+        _connectionString = _container.GetConnectionString();
+    }
+
+    public async Task DisposeAsync()
+    {
+        if (_container != null)
+        {
+            await _container.DisposeAsync();
+        }
+    }
+
+    #region T1.13-AC1: Platform starts successfully with PostgreSQL only
+
+    [Fact(DisplayName = "T1.13-AC1.1: PostgreSQL container starts and accepts connections")]
+    public async Task PostgresContainer_StartsAndAcceptsConnections()
+    {
+        // Arrange & Act - already done in InitializeAsync
+
+        // Assert
+        _connectionString.Should().NotBeNullOrEmpty();
+        _container!.State.Should().Be(DotNet.Testcontainers.Containers.TestcontainersStates.Running);
+
+        // Verify connection works
+        using var connection = new Npgsql.NpgsqlConnection(_connectionString);
+        await connection.OpenAsync();
+        connection.State.Should().Be(System.Data.ConnectionState.Open);
+    }
+
+    [Fact(DisplayName = "T1.13-AC1.2: PostgreSQL connection string contains no MongoDB references")]
+    public void ConnectionString_ContainsNoMongoDbReferences()
+    {
+        // Assert
+        _connectionString.Should().NotContainAny("mongo", "mongodb", "27017");
+    }
+
+    #endregion
+
+    #region T1.13-AC2: Services connect to PostgreSQL correctly
+
+    [Fact(DisplayName = "T1.13-AC2.1: Can create and verify database schema")]
+    public async Task Database_CanCreateAndVerifySchema()
+    {
+        // Arrange
+        using var connection = new Npgsql.NpgsqlConnection(_connectionString);
+        await connection.OpenAsync();
+
+        // Act - Create a test schema
+        using var createCmd = connection.CreateCommand();
+        createCmd.CommandText = "CREATE SCHEMA IF NOT EXISTS test_platform";
+        await createCmd.ExecuteNonQueryAsync();
+
+        // Assert - Verify schema exists
+        using var verifyCmd = connection.CreateCommand();
+        verifyCmd.CommandText = @"
+            SELECT schema_name
+            FROM information_schema.schemata
+            WHERE schema_name = 'test_platform'";
+        var result = await verifyCmd.ExecuteScalarAsync();
+        result.Should().Be("test_platform");
+    }
+
+    [Fact(DisplayName = "T1.13-AC2.2: Can perform basic CRUD operations")]
+    public async Task Database_CanPerformCrudOperations()
+    {
+        // Arrange
+        using var connection = new Npgsql.NpgsqlConnection(_connectionString);
+        await connection.OpenAsync();
+
+        // Create test table
+        using var createCmd = connection.CreateCommand();
+        createCmd.CommandText = @"
+            CREATE TABLE IF NOT EXISTS test_crud (
+                id SERIAL PRIMARY KEY,
+                name VARCHAR(100) NOT NULL,
+                created_at TIMESTAMPTZ DEFAULT NOW()
+            )";
+        await createCmd.ExecuteNonQueryAsync();
+
+        // Act - Insert
+        using var insertCmd = connection.CreateCommand();
+        insertCmd.CommandText = "INSERT INTO test_crud (name) VALUES ('test-record') RETURNING id";
+        var insertedId = await insertCmd.ExecuteScalarAsync();
+        insertedId.Should().NotBeNull();
+
+        // Act - Select
+        using var selectCmd = connection.CreateCommand();
+        selectCmd.CommandText = "SELECT name FROM test_crud WHERE id = @id";
+        selectCmd.Parameters.AddWithValue("id", insertedId!);
+        var name = await selectCmd.ExecuteScalarAsync();
+        name.Should().Be("test-record");
+
+        // Act - Update
+        using var updateCmd = connection.CreateCommand();
+        updateCmd.CommandText = "UPDATE test_crud SET name = 'updated-record' WHERE id = @id";
+        updateCmd.Parameters.AddWithValue("id", insertedId!);
+        var rowsAffected = await updateCmd.ExecuteNonQueryAsync();
+        rowsAffected.Should().Be(1);
+
+        // Act - Delete
+        using var deleteCmd = connection.CreateCommand();
+        deleteCmd.CommandText = "DELETE FROM test_crud WHERE id = @id";
+        deleteCmd.Parameters.AddWithValue("id", insertedId!);
+        rowsAffected = await deleteCmd.ExecuteNonQueryAsync();
+        rowsAffected.Should().Be(1);
+    }
+
+    #endregion
+
+    #region T1.13-AC3: Schema migrations run successfully
+
+    [Fact(DisplayName = "T1.13-AC3.1: Can run DDL migrations")]
+    public async Task Database_CanRunDdlMigrations()
+    {
+        // Arrange
+        using var connection = new Npgsql.NpgsqlConnection(_connectionString);
+        await connection.OpenAsync();
+
+        // Act - Run a migration-like DDL script
+        var migrationScript = @"
+            -- V1: Create migrations tracking table
+            CREATE TABLE IF NOT EXISTS schema_migrations (
+                version VARCHAR(50) PRIMARY KEY,
+                applied_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+                checksum VARCHAR(64) NOT NULL
+            );
+
+            -- V2: Create sample domain table
+            CREATE TABLE IF NOT EXISTS scan_results (
+                id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+                image_ref TEXT NOT NULL,
+                findings_count INT NOT NULL DEFAULT 0,
+                created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+            );
+
+            -- Record migration
+            INSERT INTO schema_migrations (version, checksum)
+            VALUES ('V2_create_scan_results', 'abc123')
+            ON CONFLICT (version) DO NOTHING;
+        ";
+
+        using var migrateCmd = connection.CreateCommand();
+        migrateCmd.CommandText = migrationScript;
+        await migrateCmd.ExecuteNonQueryAsync();
+
+        // Assert - Verify migration recorded
+        using var verifyCmd = connection.CreateCommand();
+        verifyCmd.CommandText = "SELECT COUNT(*) FROM schema_migrations WHERE version = 'V2_create_scan_results'";
+        var count = await verifyCmd.ExecuteScalarAsync();
+        Convert.ToInt32(count).Should().Be(1);
+    }
+
+    [Fact(DisplayName = "T1.13-AC3.2: PostgreSQL extensions can be created")]
+    public async Task Database_CanCreateExtensions()
+    {
+        // Arrange
+        using var connection = new Npgsql.NpgsqlConnection(_connectionString);
+        await connection.OpenAsync();
+
+        // Act - Create common extensions used by StellaOps
+        using var extCmd = connection.CreateCommand();
+        extCmd.CommandText = "CREATE EXTENSION IF NOT EXISTS \"uuid-ossp\"";
+        await extCmd.ExecuteNonQueryAsync();
+
+        // Assert - Verify extension exists
+        using var verifyCmd = connection.CreateCommand();
+        verifyCmd.CommandText = "SELECT COUNT(*) FROM pg_extension WHERE extname = 'uuid-ossp'";
+        var count = await verifyCmd.ExecuteScalarAsync();
+        Convert.ToInt32(count).Should().Be(1);
+    }
+
+    #endregion
+
+    #region T1.13-AC4: No MongoDB connection attempts
+
+    [Fact(DisplayName = "T1.13-AC4.1: Environment variables contain no MongoDB references")]
+    public void EnvironmentVariables_ContainNoMongoDbReferences()
+    {
+        // Arrange - Get all environment variables
+        var envVars = Environment.GetEnvironmentVariables();
+
+        // Act & Assert
+        foreach (string key in envVars.Keys)
+        {
+            var value = envVars[key]?.ToString() ?? "";
+
+            // Skip if this is our test connection string
+            if (key.Contains("POSTGRES", StringComparison.OrdinalIgnoreCase))
+                continue;
+
+            key.Should().NotContainEquivalentOf("mongo",
+                $"Environment variable key '{key}' should not reference MongoDB");
+        }
+    }
+
+    [Fact(DisplayName = "T1.13-AC4.2: PostgreSQL-only configuration is valid")]
+    public void Configuration_IsPostgresOnly()
+    {
+        // This test documents the expected configuration pattern
+        var expectedConfig = new Dictionary<string, string>
+        {
+            ["STELLAOPS_STORAGE_DRIVER"] = "postgres",
+            ["STELLAOPS_CACHE_DRIVER"] = "valkey",  // or "redis" for compatibility
+        };
+
+        // Assert - Document the expected pattern
+        expectedConfig["STELLAOPS_STORAGE_DRIVER"].Should().NotBe("mongodb");
+        expectedConfig["STELLAOPS_STORAGE_DRIVER"].Should().Be("postgres");
+    }
+
+    #endregion
+}
--- a/tests/integration/StellaOps.Integration.Platform/StellaOps.Integration.Platform.csproj
+++ b/tests/integration/StellaOps.Integration.Platform/StellaOps.Integration.Platform.csproj
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+  StellaOps.Integration.Platform.csproj
+  Sprint: SPRINT_5100_0001_0001_mongodb_cli_cleanup_consolidation
+  Task: T1.13 - PostgreSQL-only Platform Startup Test
+  Description: Integration tests for platform startup with PostgreSQL-only stack
+-->
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>net10.0</TargetFramework>
+    <LangVersion>preview</LangVersion>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+    <IsPackable>false</IsPackable>
+    <IsTestProject>true</IsTestProject>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
+    <PackageReference Include="xunit" Version="2.9.2" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2">
+      <PrivateAssets>all</PrivateAssets>
+      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
+    </PackageReference>
+    <PackageReference Include="FluentAssertions" Version="6.12.0" />
+    <PackageReference Include="Testcontainers" Version="3.6.0" />
+    <PackageReference Include="Testcontainers.PostgreSql" Version="3.6.0" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <!-- Infrastructure testing library -->
+    <ProjectReference Include="../../../src/__Libraries/StellaOps.Infrastructure.Postgres.Testing/StellaOps.Infrastructure.Postgres.Testing.csproj" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <Using Include="Xunit" />
+    <Using Include="FluentAssertions" />
+  </ItemGroup>
+
+</Project>
--- a/tests/interop/StellaOps.Interop.Tests/InteropTestHarness.cs
+++ b/tests/interop/StellaOps.Interop.Tests/InteropTestHarness.cs
@@ -197,58 +197,3 @@ public sealed class InteropTestHarness : IAsyncLifetime
        return Array.Empty<GrypeFinding>();
    }
 }
-
-public enum SbomFormat
-{
-    CycloneDx16,
-    Spdx30
-}
-
-public sealed record SbomResult(
-    bool Success,
-    string? Path = null,
-    SbomFormat? Format = null,
-    string? Content = null,
-    string? Digest = null,
-    string? Error = null)
-{
-    public static SbomResult Failed(string error) => new(false, Error: error);
-}
-
-public sealed record AttestationResult(
-    bool Success,
-    string? ImageRef = null,
-    string? Error = null)
-{
-    public static AttestationResult Failed(string error) => new(false, Error: error);
-}
-
-public sealed record GrypeScanResult(
-    bool Success,
-    IReadOnlyList<GrypeFinding>? Findings = null,
-    string? RawOutput = null,
-    string? Error = null)
-{
-    public static GrypeScanResult Failed(string error) => new(false, Error: error);
-}
-
-public sealed record FindingsComparisonResult(
-    decimal ParityPercent,
-    bool IsWithinTolerance,
-    int StellaTotalFindings,
-    int GrypeTotalFindings,
-    int MatchingFindings,
-    int OnlyInStella,
-    int OnlyInGrype,
-    IReadOnlyList<(string VulnId, string Purl)> OnlyInStellaDetails,
-    IReadOnlyList<(string VulnId, string Purl)> OnlyInGrypeDetails);
-
-public sealed record Finding(
-    string VulnerabilityId,
-    string PackagePurl,
-    string Severity);
-
-public sealed record GrypeFinding(
-    string VulnerabilityId,
-    string PackagePurl,
-    string Severity);
--- a/tests/interop/StellaOps.Interop.Tests/Models.cs
+++ b/tests/interop/StellaOps.Interop.Tests/Models.cs
@@ -0,0 +1,78 @@
+// -----------------------------------------------------------------------------
+// Models.cs
+// Sprint: SPRINT_5100_0003_0001_sbom_interop_roundtrip
+// Task: T1, T7 - Interop Test Harness & Project Setup
+// Description: Models for SBOM interoperability testing.
+// -----------------------------------------------------------------------------
+
+using System.Collections.Immutable;
+using System.Security.Cryptography;
+using System.Text;
+
+namespace StellaOps.Interop.Tests;
+
+public enum SbomFormat
+{
+    CycloneDx16,
+    Spdx30
+}
+
+public sealed record SbomResult(
+    bool Success,
+    string? Path = null,
+    SbomFormat? Format = null,
+    string? Content = null,
+    string? Digest = null,
+    string? Error = null)
+{
+    public static SbomResult Failed(string error) => new(false, Error: error);
+}
+
+public sealed record AttestationResult(
+    bool Success,
+    string? ImageRef = null,
+    string? Error = null)
+{
+    public static AttestationResult Failed(string error) => new(false, Error: error);
+}
+
+public sealed record GrypeScanResult(
+    bool Success,
+    IReadOnlyList<GrypeFinding>? Findings = null,
+    string? RawOutput = null,
+    string? Error = null)
+{
+    public static GrypeScanResult Failed(string error) => new(false, Error: error);
+}
+
+public sealed record GrypeFinding(
+    string VulnerabilityId,
+    string PackagePurl,
+    string Severity,
+    string? FixedIn = null);
+
+public sealed record Finding(
+    string VulnerabilityId,
+    string PackagePurl,
+    string Severity);
+
+public sealed record ToolResult(
+    bool Success,
+    string Output,
+    string? Error = null);
+
+public sealed record FindingsComparisonResult(
+    decimal ParityPercent,
+    bool IsWithinTolerance,
+    int StellaTotalFindings,
+    int GrypeTotalFindings,
+    int MatchingFindings,
+    int OnlyInStella,
+    int OnlyInGrype,
+    IReadOnlyList<(string VulnId, string Purl)> OnlyInStellaDetails,
+    IReadOnlyList<(string VulnId, string Purl)> OnlyInGrypeDetails);
+
+public sealed record VerifyResult(
+    bool Success,
+    string? PredicateDigest = null,
+    string? Error = null);
--- a/tests/interop/StellaOps.Interop.Tests/StellaOps.Interop.Tests.csproj
+++ b/tests/interop/StellaOps.Interop.Tests/StellaOps.Interop.Tests.csproj
@@ -5,19 +5,14 @@
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
    <IsPackable>false</IsPackable>
-    <IsTestProject>true</IsTestProject>
-    <LangVersion>preview</LangVersion>
+    <RootNamespace>StellaOps.Interop.Tests</RootNamespace>
  </PropertyGroup>

  <ItemGroup>
-    <PackageReference Include="coverlet.collector" Version="6.0.0">
-      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
-      <PrivateAssets>all</PrivateAssets>
-    </PackageReference>
    <PackageReference Include="FluentAssertions" Version="6.12.0" />
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.9.0" />
-    <PackageReference Include="xunit" Version="2.6.6" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6">
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.13.0" />
+    <PackageReference Include="xunit" Version="2.9.2" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2">
      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
      <PrivateAssets>all</PrivateAssets>
    </PackageReference>
@@ -26,7 +21,6 @@
  <ItemGroup>
    <Using Include="Xunit" />
    <Using Include="FluentAssertions" />
-    <Using Include="System.Collections.Immutable" />
  </ItemGroup>

 </Project>
--- a/tests/interop/StellaOps.Interop.Tests/ToolManager.cs
+++ b/tests/interop/StellaOps.Interop.Tests/ToolManager.cs
@@ -1,11 +1,14 @@
-namespace StellaOps.Interop.Tests;
+// -----------------------------------------------------------------------------
+// ToolManager.cs
+// Sprint: SPRINT_5100_0003_0001_sbom_interop_roundtrip
+// Task: T1 - Interop Test Harness
+// Description: Manages execution of external tools (Syft, Grype, cosign).
+// -----------------------------------------------------------------------------

 using System.Diagnostics;
-using System.Text;

-/// <summary>
-/// Manages execution of external tools for interop testing.
-/// </summary>
+namespace StellaOps.Interop.Tests;
+
 public sealed class ToolManager
 {
    private readonly string _workDir;
@@ -15,110 +18,66 @@ public sealed class ToolManager
        _workDir = workDir;
    }

-    /// <summary>
-    /// Verify that a tool is available and executable.
-    /// </summary>
-    public async Task<bool> VerifyToolAsync(string toolName, string testArgs, CancellationToken ct = default)
+    public async Task VerifyToolAsync(string tool, string versionArg)
    {
-        try
+        var result = await RunAsync(tool, versionArg, CancellationToken.None);
+        if (!result.Success)
        {
-            var result = await RunAsync(toolName, testArgs, ct);
-            return result.Success || result.ExitCode == 0; // Some tools return 0 even on --version
-        }
-        catch
-        {
-            return false;
+            throw new InvalidOperationException(
+                $"Tool '{tool}' is not available or failed verification: {result.Error}");
        }
    }

-    /// <summary>
-    /// Run an external tool with arguments.
-    /// </summary>
    public async Task<ToolResult> RunAsync(
-        string toolName,
+        string tool,
        string arguments,
-        CancellationToken ct = default,
-        int timeoutMs = 300000) // 5 minute default timeout
+        CancellationToken ct,
+        int timeoutSeconds = 300)
    {
-        var startInfo = new ProcessStartInfo
-        {
-            FileName = toolName,
-            Arguments = arguments,
-            WorkingDirectory = _workDir,
-            RedirectStandardOutput = true,
-            RedirectStandardError = true,
-            UseShellExecute = false,
-            CreateNoWindow = true
-        };
-
-        using var process = new Process { StartInfo = startInfo };
-        var outputBuilder = new StringBuilder();
-        var errorBuilder = new StringBuilder();
-
-        process.OutputDataReceived += (sender, e) =>
-        {
-            if (e.Data != null)
-                outputBuilder.AppendLine(e.Data);
-        };
-
-        process.ErrorDataReceived += (sender, e) =>
-        {
-            if (e.Data != null)
-                errorBuilder.AppendLine(e.Data);
-        };
-
        try
        {
+            using var process = new Process
+            {
+                StartInfo = new ProcessStartInfo
+                {
+                    FileName = tool,
+                    Arguments = arguments,
+                    WorkingDirectory = _workDir,
+                    RedirectStandardOutput = true,
+                    RedirectStandardError = true,
+                    UseShellExecute = false,
+                    CreateNoWindow = true
+                }
+            };
+
            process.Start();
-            process.BeginOutputReadLine();
-            process.BeginErrorReadLine();

-            using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
-            cts.CancelAfter(timeoutMs);
+            var outputTask = process.StandardOutput.ReadToEndAsync(ct);
+            var errorTask = process.StandardError.ReadToEndAsync(ct);

-            await process.WaitForExitAsync(cts.Token);
+            var completed = await Task.WhenAny(
+                process.WaitForExitAsync(ct),
+                Task.Delay(TimeSpan.FromSeconds(timeoutSeconds), ct));

-            var output = outputBuilder.ToString();
-            var error = errorBuilder.ToString();
-            var exitCode = process.ExitCode;
-
-            return new ToolResult(
-                Success: exitCode == 0,
-                ExitCode: exitCode,
-                Output: output,
-                Error: string.IsNullOrWhiteSpace(error) ? null : error);
-        }
-        catch (OperationCanceledException)
-        {
-            try
+            if (!process.HasExited)
            {
-                if (!process.HasExited)
-                    process.Kill();
-            }
-            catch
-            {
-                // Ignore kill failures
+                process.Kill(entireProcessTree: true);
+                return new ToolResult(false, "", "Process timed out");
            }

-            return new ToolResult(
-                Success: false,
-                ExitCode: -1,
-                Output: outputBuilder.ToString(),
-                Error: $"Tool execution timed out after {timeoutMs}ms");
+            var output = await outputTask;
+            var error = await errorTask;
+
+            if (process.ExitCode != 0)
+            {
+                return new ToolResult(false, output, error);
+            }
+
+            return new ToolResult(true, output);
        }
        catch (Exception ex)
        {
-            return new ToolResult(
-                Success: false,
-                ExitCode: -1,
-                Output: outputBuilder.ToString(),
-                Error: $"Tool execution failed: {ex.Message}");
+            return new ToolResult(false, "", ex.Message);
        }
    }
 }
-
-public sealed record ToolResult(
-    bool Success,
-    int ExitCode,
-    string Output,
-    string? Error = null);
--- a/tests/load/router/spike-test.js
+++ b/tests/load/router/spike-test.js
@@ -0,0 +1,227 @@
+// -----------------------------------------------------------------------------
+// spike-test.js
+// Sprint: SPRINT_5100_0005_0001_router_chaos_suite
+// Task: T1 - Load Test Harness
+// Description: k6 load test for router spike testing and backpressure validation.
+// -----------------------------------------------------------------------------
+
+import http from 'k6/http';
+import { check, sleep } from 'k6';
+import { Rate, Trend, Counter } from 'k6/metrics';
+
+// Custom metrics for throttle behavior
+const throttledRate = new Rate('throttled_requests');
+const retryAfterTrend = new Trend('retry_after_seconds');
+const recoveryTime = new Trend('recovery_time_ms');
+const throttle429Count = new Counter('throttle_429_count');
+const throttle503Count = new Counter('throttle_503_count');
+const successCount = new Counter('success_count');
+
+export const options = {
+  scenarios: {
+    // Phase 1: Baseline load (normal operation)
+    baseline: {
+      executor: 'constant-arrival-rate',
+      rate: 100,
+      timeUnit: '1s',
+      duration: '1m',
+      preAllocatedVUs: 50,
+      maxVUs: 100,
+    },
+    // Phase 2: 10x spike
+    spike_10x: {
+      executor: 'constant-arrival-rate',
+      rate: 1000,
+      timeUnit: '1s',
+      duration: '30s',
+      startTime: '1m',
+      preAllocatedVUs: 500,
+      maxVUs: 1000,
+    },
+    // Phase 3: 50x spike
+    spike_50x: {
+      executor: 'constant-arrival-rate',
+      rate: 5000,
+      timeUnit: '1s',
+      duration: '30s',
+      startTime: '2m',
+      preAllocatedVUs: 2000,
+      maxVUs: 5000,
+    },
+    // Phase 4: Recovery observation
+    recovery: {
+      executor: 'constant-arrival-rate',
+      rate: 100,
+      timeUnit: '1s',
+      duration: '2m',
+      startTime: '3m',
+      preAllocatedVUs: 50,
+      maxVUs: 100,
+    },
+  },
+  thresholds: {
+    // At least 95% of requests should succeed OR return proper throttle response
+    'http_req_failed{expected_response:true}': ['rate<0.05'],
+    // Throttled requests should have Retry-After header
+    'throttled_requests': ['rate>0'],  // We expect some throttling during spike
+    // Recovery should happen within reasonable time
+    'recovery_time_ms': ['p(95)<30000'],  // 95% recover within 30s
+    // Response time should be bounded even under load
+    'http_req_duration{expected_response:true}': ['p(95)<5000'],
+  },
+};
+
+const ROUTER_URL = __ENV.ROUTER_URL || 'http://localhost:8080';
+const API_ENDPOINT = __ENV.API_ENDPOINT || '/api/v1/scan';
+
+export function setup() {
+  console.log(`Testing router at: ${ROUTER_URL}${API_ENDPOINT}`);
+
+  // Verify router is reachable
+  const healthCheck = http.get(`${ROUTER_URL}/health`);
+  if (healthCheck.status !== 200) {
+    console.warn(`Router health check returned ${healthCheck.status}`);
+  }
+
+  return {
+    startTime: new Date().toISOString(),
+    routerUrl: ROUTER_URL,
+  };
+}
+
+export default function () {
+  const payload = JSON.stringify({
+    image: 'alpine:latest',
+    requestId: `spike-test-${__VU}-${__ITER}`,
+    timestamp: new Date().toISOString(),
+  });
+
+  const params = {
+    headers: {
+      'Content-Type': 'application/json',
+      'X-Request-ID': `${__VU}-${__ITER}`,
+    },
+    tags: { expected_response: 'true' },
+    timeout: '10s',
+  };
+
+  const response = http.post(`${ROUTER_URL}${API_ENDPOINT}`, payload, params);
+
+  // Handle throttle responses (429 Too Many Requests)
+  if (response.status === 429) {
+    throttledRate.add(1);
+    throttle429Count.add(1);
+
+    // Verify Retry-After header
+    const retryAfter = response.headers['Retry-After'];
+    check(response, {
+      '429 has Retry-After header': (r) => r.headers['Retry-After'] !== undefined,
+      'Retry-After is valid number': (r) => {
+        const val = r.headers['Retry-After'];
+        return val && !isNaN(parseInt(val));
+      },
+      'Retry-After is reasonable (1-300s)': (r) => {
+        const val = parseInt(r.headers['Retry-After']);
+        return val >= 1 && val <= 300;
+      },
+    });
+
+    if (retryAfter) {
+      retryAfterTrend.add(parseInt(retryAfter));
+    }
+  }
+  // Handle overload responses (503 Service Unavailable)
+  else if (response.status === 503) {
+    throttledRate.add(1);
+    throttle503Count.add(1);
+
+    check(response, {
+      '503 has Retry-After header': (r) => r.headers['Retry-After'] !== undefined,
+    });
+
+    const retryAfter = response.headers['Retry-After'];
+    if (retryAfter) {
+      retryAfterTrend.add(parseInt(retryAfter));
+    }
+  }
+  // Handle success responses
+  else {
+    throttledRate.add(0);
+    successCount.add(1);
+
+    check(response, {
+      'status is 200 or 202': (r) => r.status === 200 || r.status === 202,
+      'response has body': (r) => r.body && r.body.length > 0,
+      'response time < 5s': (r) => r.timings.duration < 5000,
+    });
+  }
+
+  // Track any errors
+  if (response.status >= 500 && response.status !== 503) {
+    check(response, {
+      'no unexpected 5xx errors': () => false,
+    });
+  }
+}
+
+export function teardown(data) {
+  console.log(`Test completed. Started at: ${data.startTime}`);
+  console.log(`Router URL: ${data.routerUrl}`);
+}
+
+export function handleSummary(data) {
+  const summary = {
+    testRun: {
+      startTime: new Date().toISOString(),
+      routerUrl: ROUTER_URL,
+    },
+    metrics: {
+      totalRequests: data.metrics.http_reqs ? data.metrics.http_reqs.values.count : 0,
+      throttled429: data.metrics.throttle_429_count ? data.metrics.throttle_429_count.values.count : 0,
+      throttled503: data.metrics.throttle_503_count ? data.metrics.throttle_503_count.values.count : 0,
+      successful: data.metrics.success_count ? data.metrics.success_count.values.count : 0,
+      throttleRate: data.metrics.throttled_requests ? data.metrics.throttled_requests.values.rate : 0,
+      retryAfterAvg: data.metrics.retry_after_seconds ? data.metrics.retry_after_seconds.values.avg : null,
+      retryAfterP95: data.metrics.retry_after_seconds ? data.metrics.retry_after_seconds.values['p(95)'] : null,
+    },
+    thresholds: data.thresholds,
+    checks: data.metrics.checks ? {
+      passes: data.metrics.checks.values.passes,
+      fails: data.metrics.checks.values.fails,
+      rate: data.metrics.checks.values.rate,
+    } : null,
+  };
+
+  return {
+    'results/spike-test-summary.json': JSON.stringify(summary, null, 2),
+    stdout: textSummary(data, { indent: ' ', enableColors: true }),
+  };
+}
+
+function textSummary(data, options) {
+  let output = '\n=== Router Spike Test Summary ===\n\n';
+
+  const totalReqs = data.metrics.http_reqs ? data.metrics.http_reqs.values.count : 0;
+  const throttled429 = data.metrics.throttle_429_count ? data.metrics.throttle_429_count.values.count : 0;
+  const throttled503 = data.metrics.throttle_503_count ? data.metrics.throttle_503_count.values.count : 0;
+  const successful = data.metrics.success_count ? data.metrics.success_count.values.count : 0;
+
+  output += `Total Requests: ${totalReqs}\n`;
+  output += `Successful (2xx): ${successful}\n`;
+  output += `Throttled (429): ${throttled429}\n`;
+  output += `Overloaded (503): ${throttled503}\n`;
+  output += `Throttle Rate: ${((throttled429 + throttled503) / totalReqs * 100).toFixed(2)}%\n`;
+
+  if (data.metrics.retry_after_seconds) {
+    output += `\nRetry-After Header:\n`;
+    output += `  Avg: ${data.metrics.retry_after_seconds.values.avg.toFixed(2)}s\n`;
+    output += `  P95: ${data.metrics.retry_after_seconds.values['p(95)'].toFixed(2)}s\n`;
+  }
+
+  output += '\nThreshold Results:\n';
+  for (const [name, result] of Object.entries(data.thresholds || {})) {
+    output += `  ${result.ok ? 'PASS' : 'FAIL'}: ${name}\n`;
+  }
+
+  return output;
+}
--- a/tests/load/router/thresholds.json
+++ b/tests/load/router/thresholds.json
@@ -0,0 +1,55 @@
+{
+  "description": "Router chaos test thresholds for SPRINT_5100_0005_0001",
+  "thresholds": {
+    "recovery_time_seconds": {
+      "max": 30,
+      "description": "Maximum time to recover after load spike"
+    },
+    "throttle_rate_max": {
+      "max": 0.95,
+      "description": "Maximum percentage of requests that can be throttled during spike"
+    },
+    "success_rate_baseline": {
+      "min": 0.99,
+      "description": "Minimum success rate during baseline load"
+    },
+    "success_rate_recovery": {
+      "min": 0.95,
+      "description": "Minimum success rate during recovery phase"
+    },
+    "retry_after_max_seconds": {
+      "max": 300,
+      "description": "Maximum Retry-After value in seconds"
+    },
+    "retry_after_min_seconds": {
+      "min": 1,
+      "description": "Minimum Retry-After value in seconds"
+    },
+    "response_time_p95_ms": {
+      "max": 5000,
+      "description": "95th percentile response time under normal load"
+    },
+    "data_loss_rate": {
+      "max": 0,
+      "description": "No data loss allowed during throttling"
+    }
+  },
+  "scenarios": {
+    "baseline": {
+      "expected_throttle_rate": 0.01,
+      "expected_success_rate": 0.99
+    },
+    "spike_10x": {
+      "expected_throttle_rate": 0.5,
+      "expected_success_rate": 0.5
+    },
+    "spike_50x": {
+      "expected_throttle_rate": 0.9,
+      "expected_success_rate": 0.1
+    },
+    "recovery": {
+      "expected_throttle_rate": 0.05,
+      "expected_success_rate": 0.95
+    }
+  }
+}