Files
git.stella-ops.org/tests/chaos/StellaOps.Chaos.Router.Tests/RecoveryTests.cs
StellaOps Bot 5146204f1b feat: add security sink detection patterns for JavaScript/TypeScript
- Introduced `sink-detect.js` with various security sink detection patterns categorized by type (e.g., command injection, SQL injection, file operations).
- Implemented functions to build a lookup map for fast sink detection and to match sink calls against known patterns.
- Added `package-lock.json` for dependency management.
2025-12-22 23:21:21 +02:00

299 lines
9.6 KiB
C#

// -----------------------------------------------------------------------------
// RecoveryTests.cs
// Sprint: SPRINT_5100_0005_0001_router_chaos_suite
// Task: T3 - Recovery and Resilience Tests
// Description: Test router recovery after load spikes.
// -----------------------------------------------------------------------------
using System.Collections.Concurrent;
using System.Diagnostics;
using System.Net;
using FluentAssertions;
using StellaOps.Chaos.Router.Tests.Fixtures;
namespace StellaOps.Chaos.Router.Tests;
[Trait("Category", "Chaos")]
[Trait("Category", "Router")]
[Trait("Category", "Recovery")]
public class RecoveryTests : IClassFixture<RouterTestFixture>
{
private readonly RouterTestFixture _fixture;
public RecoveryTests(RouterTestFixture fixture)
{
_fixture = fixture;
}
[Fact]
public async Task Router_AfterSpike_RecoveryWithin30Seconds()
{
// Arrange
var client = _fixture.CreateClient();
var stopwatch = Stopwatch.StartNew();
// Phase 1: Verify normal operation
var normalResponse = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
var normalWorking = normalResponse.IsSuccessStatusCode ||
normalResponse.StatusCode == HttpStatusCode.TooManyRequests;
// Phase 2: Create load spike
await CreateLoadSpikeAsync(client, requestCount: 500, durationSeconds: 5);
// Phase 3: Measure recovery
var recovered = false;
var recoveryStart = Stopwatch.StartNew();
while (recoveryStart.Elapsed < TimeSpan.FromSeconds(60))
{
var response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
if (response.IsSuccessStatusCode)
{
recovered = true;
break;
}
// If throttled, wait before retry
if (response.StatusCode == HttpStatusCode.TooManyRequests)
{
var retryAfter = response.Headers.GetValues("Retry-After").FirstOrDefault();
if (int.TryParse(retryAfter, out var waitSeconds))
{
await Task.Delay(TimeSpan.FromSeconds(Math.Min(waitSeconds, 5)));
}
else
{
await Task.Delay(1000);
}
}
else
{
await Task.Delay(1000);
}
}
recoveryStart.Stop();
// Assert
if (normalWorking)
{
recovered.Should().BeTrue("Router should recover after spike");
recoveryStart.Elapsed.Should().BeLessThan(TimeSpan.FromSeconds(30),
"Recovery should happen within 30 seconds");
}
Console.WriteLine($"Recovery time: {recoveryStart.Elapsed.TotalSeconds:F2}s");
}
[Fact]
public async Task Router_NoDataLoss_DuringThrottling()
{
// Arrange
var client = _fixture.CreateClient();
var submittedIds = new ConcurrentBag<string>();
var successfulIds = new ConcurrentBag<string>();
var maxRetries = 10;
// Act - Submit requests with tracking and retry on throttle
var tasks = Enumerable.Range(0, 100).Select(async i =>
{
var scanId = Guid.NewGuid().ToString();
submittedIds.Add(scanId);
var retryCount = 0;
HttpResponseMessage? response = null;
while (retryCount < maxRetries)
{
response = await client.PostAsync("/api/v1/scan",
RouterTestFixture.CreateScanRequest(scanId));
if (response.StatusCode == HttpStatusCode.TooManyRequests)
{
var retryAfter = response.Headers.GetValues("Retry-After").FirstOrDefault();
var waitSeconds = int.TryParse(retryAfter, out var ra) ? ra : 2;
await Task.Delay(TimeSpan.FromSeconds(Math.Min(waitSeconds, 5)));
retryCount++;
}
else
{
break;
}
}
if (response is not null && response.IsSuccessStatusCode)
{
successfulIds.Add(scanId);
}
return response;
});
await Task.WhenAll(tasks);
// Assert
var successRate = (double)successfulIds.Count / submittedIds.Count;
Console.WriteLine($"Success rate with retries: {successRate:P2} ({successfulIds.Count}/{submittedIds.Count})");
// All submitted requests should eventually succeed with proper retry logic
successRate.Should().BeGreaterOrEqualTo(0.9,
"At least 90% of requests should succeed with retry logic");
}
[Fact]
public async Task Router_GracefulDegradation_MaintainsPartialService()
{
// Arrange
var client = _fixture.CreateClient();
var cts = new CancellationTokenSource();
// Start continuous background load
var backgroundTask = CreateContinuousLoadAsync(client, cts.Token);
// Allow load to stabilize
await Task.Delay(3000);
// Check that some requests are still succeeding
var successCount = 0;
var totalChecks = 10;
for (var i = 0; i < totalChecks; i++)
{
var response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
if (response.IsSuccessStatusCode || response.StatusCode == HttpStatusCode.Accepted)
{
successCount++;
}
await Task.Delay(100);
}
cts.Cancel();
try { await backgroundTask; } catch (OperationCanceledException) { }
// Assert
successCount.Should().BeGreaterThan(0,
"Router should maintain partial service under load");
Console.WriteLine($"Partial service check: {successCount}/{totalChecks} successful");
}
[Fact]
public async Task Router_LatencyBounded_DuringSpike()
{
// Arrange
var client = _fixture.CreateClient();
var latencies = new ConcurrentBag<long>();
// Create background load
var cts = new CancellationTokenSource();
var loadTask = CreateContinuousLoadAsync(client, cts.Token);
// Measure latencies during load
for (var i = 0; i < 20; i++)
{
var sw = Stopwatch.StartNew();
var response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
sw.Stop();
latencies.Add(sw.ElapsedMilliseconds);
await Task.Delay(100);
}
cts.Cancel();
try { await loadTask; } catch (OperationCanceledException) { }
// Assert
var avgLatency = latencies.Average();
var p95Latency = latencies.OrderBy(l => l).ElementAt((int)(latencies.Count * 0.95));
Console.WriteLine($"Latency during load: Avg={avgLatency:F0}ms, P95={p95Latency}ms");
// P95 latency should be bounded (allowing for throttle wait times)
p95Latency.Should().BeLessThan(10000,
"95th percentile latency should be bounded under load");
}
[Fact]
public async Task Router_QueueDepth_DoesNotGrowUnbounded()
{
// Arrange
var client = _fixture.CreateClient();
// Create significant load
var tasks = Enumerable.Range(0, 200)
.Select(_ => client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest()));
await Task.WhenAll(tasks);
// Check metrics for queue depth
var metricsResponse = await client.GetAsync("/metrics");
if (metricsResponse.IsSuccessStatusCode)
{
var metrics = await metricsResponse.Content.ReadAsStringAsync();
// Look for queue depth metric
if (metrics.Contains("queue_depth") || metrics.Contains("pending_requests"))
{
// Queue depth should be reasonable after burst
Console.WriteLine("Queue metrics found in /metrics endpoint");
}
}
// If we got here without timeout, queue is not growing unbounded
}
private static async Task CreateLoadSpikeAsync(HttpClient client, int requestCount, int durationSeconds)
{
var cts = new CancellationTokenSource(TimeSpan.FromSeconds(durationSeconds));
var tasks = new List<Task>();
try
{
for (var i = 0; i < requestCount && !cts.Token.IsCancellationRequested; i++)
{
tasks.Add(client.PostAsync("/api/v1/scan",
RouterTestFixture.CreateScanRequest(),
cts.Token));
// Small delay to spread requests
if (i % 50 == 0)
{
await Task.Delay(10, cts.Token);
}
}
await Task.WhenAll(tasks.Where(t => !t.IsCanceled));
}
catch (OperationCanceledException)
{
// Expected when duration expires
}
}
private static async Task CreateContinuousLoadAsync(HttpClient client, CancellationToken ct)
{
while (!ct.IsCancellationRequested)
{
try
{
// Fire-and-forget requests
_ = client.PostAsync("/api/v1/scan",
RouterTestFixture.CreateScanRequest(),
ct);
await Task.Delay(50, ct);
}
catch (OperationCanceledException)
{
break;
}
catch
{
// Ignore errors during load generation
}
}
}
}