- Introduced `sink-detect.js` with various security sink detection patterns categorized by type (e.g., command injection, SQL injection, file operations). - Implemented functions to build a lookup map for fast sink detection and to match sink calls against known patterns. - Added `package-lock.json` for dependency management.
299 lines
9.6 KiB
C#
299 lines
9.6 KiB
C#
// -----------------------------------------------------------------------------
|
|
// RecoveryTests.cs
|
|
// Sprint: SPRINT_5100_0005_0001_router_chaos_suite
|
|
// Task: T3 - Recovery and Resilience Tests
|
|
// Description: Test router recovery after load spikes.
|
|
// -----------------------------------------------------------------------------
|
|
|
|
using System.Collections.Concurrent;
|
|
using System.Diagnostics;
|
|
using System.Net;
|
|
using FluentAssertions;
|
|
using StellaOps.Chaos.Router.Tests.Fixtures;
|
|
|
|
namespace StellaOps.Chaos.Router.Tests;
|
|
|
|
[Trait("Category", "Chaos")]
|
|
[Trait("Category", "Router")]
|
|
[Trait("Category", "Recovery")]
|
|
public class RecoveryTests : IClassFixture<RouterTestFixture>
|
|
{
|
|
private readonly RouterTestFixture _fixture;
|
|
|
|
public RecoveryTests(RouterTestFixture fixture)
|
|
{
|
|
_fixture = fixture;
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Router_AfterSpike_RecoveryWithin30Seconds()
|
|
{
|
|
// Arrange
|
|
var client = _fixture.CreateClient();
|
|
var stopwatch = Stopwatch.StartNew();
|
|
|
|
// Phase 1: Verify normal operation
|
|
var normalResponse = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
|
|
var normalWorking = normalResponse.IsSuccessStatusCode ||
|
|
normalResponse.StatusCode == HttpStatusCode.TooManyRequests;
|
|
|
|
// Phase 2: Create load spike
|
|
await CreateLoadSpikeAsync(client, requestCount: 500, durationSeconds: 5);
|
|
|
|
// Phase 3: Measure recovery
|
|
var recovered = false;
|
|
var recoveryStart = Stopwatch.StartNew();
|
|
|
|
while (recoveryStart.Elapsed < TimeSpan.FromSeconds(60))
|
|
{
|
|
var response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
|
|
|
|
if (response.IsSuccessStatusCode)
|
|
{
|
|
recovered = true;
|
|
break;
|
|
}
|
|
|
|
// If throttled, wait before retry
|
|
if (response.StatusCode == HttpStatusCode.TooManyRequests)
|
|
{
|
|
var retryAfter = response.Headers.GetValues("Retry-After").FirstOrDefault();
|
|
if (int.TryParse(retryAfter, out var waitSeconds))
|
|
{
|
|
await Task.Delay(TimeSpan.FromSeconds(Math.Min(waitSeconds, 5)));
|
|
}
|
|
else
|
|
{
|
|
await Task.Delay(1000);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
await Task.Delay(1000);
|
|
}
|
|
}
|
|
|
|
recoveryStart.Stop();
|
|
|
|
// Assert
|
|
if (normalWorking)
|
|
{
|
|
recovered.Should().BeTrue("Router should recover after spike");
|
|
recoveryStart.Elapsed.Should().BeLessThan(TimeSpan.FromSeconds(30),
|
|
"Recovery should happen within 30 seconds");
|
|
}
|
|
|
|
Console.WriteLine($"Recovery time: {recoveryStart.Elapsed.TotalSeconds:F2}s");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Router_NoDataLoss_DuringThrottling()
|
|
{
|
|
// Arrange
|
|
var client = _fixture.CreateClient();
|
|
var submittedIds = new ConcurrentBag<string>();
|
|
var successfulIds = new ConcurrentBag<string>();
|
|
var maxRetries = 10;
|
|
|
|
// Act - Submit requests with tracking and retry on throttle
|
|
var tasks = Enumerable.Range(0, 100).Select(async i =>
|
|
{
|
|
var scanId = Guid.NewGuid().ToString();
|
|
submittedIds.Add(scanId);
|
|
|
|
var retryCount = 0;
|
|
HttpResponseMessage? response = null;
|
|
|
|
while (retryCount < maxRetries)
|
|
{
|
|
response = await client.PostAsync("/api/v1/scan",
|
|
RouterTestFixture.CreateScanRequest(scanId));
|
|
|
|
if (response.StatusCode == HttpStatusCode.TooManyRequests)
|
|
{
|
|
var retryAfter = response.Headers.GetValues("Retry-After").FirstOrDefault();
|
|
var waitSeconds = int.TryParse(retryAfter, out var ra) ? ra : 2;
|
|
await Task.Delay(TimeSpan.FromSeconds(Math.Min(waitSeconds, 5)));
|
|
retryCount++;
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (response is not null && response.IsSuccessStatusCode)
|
|
{
|
|
successfulIds.Add(scanId);
|
|
}
|
|
|
|
return response;
|
|
});
|
|
|
|
await Task.WhenAll(tasks);
|
|
|
|
// Assert
|
|
var successRate = (double)successfulIds.Count / submittedIds.Count;
|
|
Console.WriteLine($"Success rate with retries: {successRate:P2} ({successfulIds.Count}/{submittedIds.Count})");
|
|
|
|
// All submitted requests should eventually succeed with proper retry logic
|
|
successRate.Should().BeGreaterOrEqualTo(0.9,
|
|
"At least 90% of requests should succeed with retry logic");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Router_GracefulDegradation_MaintainsPartialService()
|
|
{
|
|
// Arrange
|
|
var client = _fixture.CreateClient();
|
|
var cts = new CancellationTokenSource();
|
|
|
|
// Start continuous background load
|
|
var backgroundTask = CreateContinuousLoadAsync(client, cts.Token);
|
|
|
|
// Allow load to stabilize
|
|
await Task.Delay(3000);
|
|
|
|
// Check that some requests are still succeeding
|
|
var successCount = 0;
|
|
var totalChecks = 10;
|
|
|
|
for (var i = 0; i < totalChecks; i++)
|
|
{
|
|
var response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
|
|
if (response.IsSuccessStatusCode || response.StatusCode == HttpStatusCode.Accepted)
|
|
{
|
|
successCount++;
|
|
}
|
|
await Task.Delay(100);
|
|
}
|
|
|
|
cts.Cancel();
|
|
try { await backgroundTask; } catch (OperationCanceledException) { }
|
|
|
|
// Assert
|
|
successCount.Should().BeGreaterThan(0,
|
|
"Router should maintain partial service under load");
|
|
|
|
Console.WriteLine($"Partial service check: {successCount}/{totalChecks} successful");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Router_LatencyBounded_DuringSpike()
|
|
{
|
|
// Arrange
|
|
var client = _fixture.CreateClient();
|
|
var latencies = new ConcurrentBag<long>();
|
|
|
|
// Create background load
|
|
var cts = new CancellationTokenSource();
|
|
var loadTask = CreateContinuousLoadAsync(client, cts.Token);
|
|
|
|
// Measure latencies during load
|
|
for (var i = 0; i < 20; i++)
|
|
{
|
|
var sw = Stopwatch.StartNew();
|
|
var response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
|
|
sw.Stop();
|
|
|
|
latencies.Add(sw.ElapsedMilliseconds);
|
|
await Task.Delay(100);
|
|
}
|
|
|
|
cts.Cancel();
|
|
try { await loadTask; } catch (OperationCanceledException) { }
|
|
|
|
// Assert
|
|
var avgLatency = latencies.Average();
|
|
var p95Latency = latencies.OrderBy(l => l).ElementAt((int)(latencies.Count * 0.95));
|
|
|
|
Console.WriteLine($"Latency during load: Avg={avgLatency:F0}ms, P95={p95Latency}ms");
|
|
|
|
// P95 latency should be bounded (allowing for throttle wait times)
|
|
p95Latency.Should().BeLessThan(10000,
|
|
"95th percentile latency should be bounded under load");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Router_QueueDepth_DoesNotGrowUnbounded()
|
|
{
|
|
// Arrange
|
|
var client = _fixture.CreateClient();
|
|
|
|
// Create significant load
|
|
var tasks = Enumerable.Range(0, 200)
|
|
.Select(_ => client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest()));
|
|
|
|
await Task.WhenAll(tasks);
|
|
|
|
// Check metrics for queue depth
|
|
var metricsResponse = await client.GetAsync("/metrics");
|
|
|
|
if (metricsResponse.IsSuccessStatusCode)
|
|
{
|
|
var metrics = await metricsResponse.Content.ReadAsStringAsync();
|
|
|
|
// Look for queue depth metric
|
|
if (metrics.Contains("queue_depth") || metrics.Contains("pending_requests"))
|
|
{
|
|
// Queue depth should be reasonable after burst
|
|
Console.WriteLine("Queue metrics found in /metrics endpoint");
|
|
}
|
|
}
|
|
|
|
// If we got here without timeout, queue is not growing unbounded
|
|
}
|
|
|
|
private static async Task CreateLoadSpikeAsync(HttpClient client, int requestCount, int durationSeconds)
|
|
{
|
|
var cts = new CancellationTokenSource(TimeSpan.FromSeconds(durationSeconds));
|
|
var tasks = new List<Task>();
|
|
|
|
try
|
|
{
|
|
for (var i = 0; i < requestCount && !cts.Token.IsCancellationRequested; i++)
|
|
{
|
|
tasks.Add(client.PostAsync("/api/v1/scan",
|
|
RouterTestFixture.CreateScanRequest(),
|
|
cts.Token));
|
|
|
|
// Small delay to spread requests
|
|
if (i % 50 == 0)
|
|
{
|
|
await Task.Delay(10, cts.Token);
|
|
}
|
|
}
|
|
|
|
await Task.WhenAll(tasks.Where(t => !t.IsCanceled));
|
|
}
|
|
catch (OperationCanceledException)
|
|
{
|
|
// Expected when duration expires
|
|
}
|
|
}
|
|
|
|
private static async Task CreateContinuousLoadAsync(HttpClient client, CancellationToken ct)
|
|
{
|
|
while (!ct.IsCancellationRequested)
|
|
{
|
|
try
|
|
{
|
|
// Fire-and-forget requests
|
|
_ = client.PostAsync("/api/v1/scan",
|
|
RouterTestFixture.CreateScanRequest(),
|
|
ct);
|
|
|
|
await Task.Delay(50, ct);
|
|
}
|
|
catch (OperationCanceledException)
|
|
{
|
|
break;
|
|
}
|
|
catch
|
|
{
|
|
// Ignore errors during load generation
|
|
}
|
|
}
|
|
}
|
|
}
|