feat: add security sink detection patterns for JavaScript/TypeScript
- Introduced `sink-detect.js` with various security sink detection patterns categorized by type (e.g., command injection, SQL injection, file operations). - Implemented functions to build a lookup map for fast sink detection and to match sink calls against known patterns. - Added `package-lock.json` for dependency management.
This commit is contained in:
@@ -0,0 +1,235 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// BackpressureVerificationTests.cs
|
||||
// Sprint: SPRINT_5100_0005_0001_router_chaos_suite
|
||||
// Task: T2 - Backpressure Verification Tests
|
||||
// Description: Verify router emits correct 429/503 responses with Retry-After.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Net;
|
||||
using FluentAssertions;
|
||||
using StellaOps.Chaos.Router.Tests.Fixtures;
|
||||
|
||||
namespace StellaOps.Chaos.Router.Tests;
|
||||
|
||||
[Trait("Category", "Chaos")]
|
||||
[Trait("Category", "Router")]
|
||||
public class BackpressureVerificationTests : IClassFixture<RouterTestFixture>
|
||||
{
|
||||
private readonly RouterTestFixture _fixture;
|
||||
|
||||
public BackpressureVerificationTests(RouterTestFixture fixture)
|
||||
{
|
||||
_fixture = fixture;
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Router_UnderLoad_Returns429WithRetryAfter()
|
||||
{
|
||||
// Arrange
|
||||
var client = _fixture.CreateClient();
|
||||
var tasks = new List<Task<HttpResponseMessage>>();
|
||||
|
||||
// Act - Send burst of requests
|
||||
for (var i = 0; i < 1000; i++)
|
||||
{
|
||||
tasks.Add(client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest()));
|
||||
}
|
||||
|
||||
var responses = await Task.WhenAll(tasks);
|
||||
|
||||
// Assert - Some should be throttled
|
||||
var throttled = responses.Where(r => r.StatusCode == HttpStatusCode.TooManyRequests).ToList();
|
||||
|
||||
// Note: This test may not trigger throttling if router is not under significant load
|
||||
// In production chaos testing, we expect throttling to occur
|
||||
if (throttled.Count > 0)
|
||||
{
|
||||
foreach (var response in throttled)
|
||||
{
|
||||
response.Headers.Should().Contain(
|
||||
h => h.Key.Equals("Retry-After", StringComparison.OrdinalIgnoreCase),
|
||||
"429 response should have Retry-After header");
|
||||
|
||||
var retryAfter = response.Headers.GetValues("Retry-After").FirstOrDefault();
|
||||
retryAfter.Should().NotBeNull();
|
||||
|
||||
int.TryParse(retryAfter, out var seconds).Should().BeTrue(
|
||||
"Retry-After should be a valid integer");
|
||||
|
||||
seconds.Should().BeInRange(1, 300,
|
||||
"Retry-After should be reasonable (1-300 seconds)");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Router_UnderLoad_Returns503WhenOverloaded()
|
||||
{
|
||||
// Arrange
|
||||
await _fixture.ConfigureLowLimitsAsync();
|
||||
var client = _fixture.CreateClient();
|
||||
|
||||
// Act - Massive burst
|
||||
var tasks = Enumerable.Range(0, 5000)
|
||||
.Select(_ => client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest()));
|
||||
|
||||
var responses = await Task.WhenAll(tasks);
|
||||
|
||||
// Assert - Should see 503s when completely overloaded
|
||||
var overloaded = responses.Where(r =>
|
||||
r.StatusCode == HttpStatusCode.ServiceUnavailable).ToList();
|
||||
|
||||
// If we get 503s, they should have Retry-After headers
|
||||
foreach (var response in overloaded)
|
||||
{
|
||||
response.Headers.Should().Contain(
|
||||
h => h.Key.Equals("Retry-After", StringComparison.OrdinalIgnoreCase),
|
||||
"503 response should have Retry-After header");
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Router_RetryAfterHonored_EventuallySucceeds()
|
||||
{
|
||||
// Arrange
|
||||
var client = _fixture.CreateClient();
|
||||
var maxRetries = 5;
|
||||
var retryCount = 0;
|
||||
HttpResponseMessage? response = null;
|
||||
|
||||
// Act - Keep trying until success or max retries
|
||||
while (retryCount < maxRetries)
|
||||
{
|
||||
response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
|
||||
|
||||
if (response.StatusCode == HttpStatusCode.TooManyRequests)
|
||||
{
|
||||
var retryAfterHeader = response.Headers.GetValues("Retry-After").FirstOrDefault();
|
||||
if (int.TryParse(retryAfterHeader, out var retryAfter))
|
||||
{
|
||||
// Wait for Retry-After duration (with cap for test performance)
|
||||
var waitTime = Math.Min(retryAfter, 5);
|
||||
await Task.Delay(TimeSpan.FromSeconds(waitTime + 1));
|
||||
}
|
||||
retryCount++;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Assert - Eventually should succeed
|
||||
response.Should().NotBeNull();
|
||||
|
||||
if (retryCount > 0)
|
||||
{
|
||||
// If we were throttled, we should eventually succeed
|
||||
response!.StatusCode.Should().BeOneOf(
|
||||
HttpStatusCode.OK,
|
||||
HttpStatusCode.Accepted,
|
||||
"Request should eventually succeed after honoring Retry-After");
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Router_ThrottleMetrics_AreExposed()
|
||||
{
|
||||
// Arrange
|
||||
var client = _fixture.CreateClient();
|
||||
|
||||
// Trigger some requests (may or may not cause throttling)
|
||||
var tasks = Enumerable.Range(0, 100)
|
||||
.Select(_ => client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest()));
|
||||
await Task.WhenAll(tasks);
|
||||
|
||||
// Act - Check metrics endpoint
|
||||
var metricsResponse = await client.GetAsync("/metrics");
|
||||
|
||||
// Assert - Metrics endpoint should be accessible
|
||||
if (metricsResponse.IsSuccessStatusCode)
|
||||
{
|
||||
var metrics = await metricsResponse.Content.ReadAsStringAsync();
|
||||
|
||||
// Basic metric checks (actual metric names depend on implementation)
|
||||
// These are common Prometheus-style metric names
|
||||
var expectedMetrics = new[]
|
||||
{
|
||||
"http_requests_total",
|
||||
"http_request_duration",
|
||||
};
|
||||
|
||||
// At least some metrics should be present
|
||||
expectedMetrics.Any(m => metrics.Contains(m)).Should().BeTrue(
|
||||
"Metrics endpoint should expose request metrics");
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Router_ResponseHeaders_IncludeRateLimitInfo()
|
||||
{
|
||||
// Arrange
|
||||
var client = _fixture.CreateClient();
|
||||
|
||||
// Act
|
||||
var response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
|
||||
|
||||
// Assert - Check for rate limit headers (common patterns)
|
||||
// These headers are optional but recommended for rate-limited APIs
|
||||
var rateLimitHeaders = new[]
|
||||
{
|
||||
"X-RateLimit-Limit",
|
||||
"X-RateLimit-Remaining",
|
||||
"X-RateLimit-Reset",
|
||||
"RateLimit-Limit",
|
||||
"RateLimit-Remaining",
|
||||
"RateLimit-Reset"
|
||||
};
|
||||
|
||||
// Log which headers are present (for information)
|
||||
var presentHeaders = rateLimitHeaders
|
||||
.Where(h => response.Headers.Contains(h))
|
||||
.ToList();
|
||||
|
||||
// This is informational - not all routers include these headers
|
||||
Console.WriteLine($"Rate limit headers present: {string.Join(", ", presentHeaders)}");
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData(10)]
|
||||
[InlineData(50)]
|
||||
[InlineData(100)]
|
||||
public async Task Router_ConcurrentRequests_HandledGracefully(int concurrency)
|
||||
{
|
||||
// Arrange
|
||||
var client = _fixture.CreateClient();
|
||||
|
||||
// Act - Send concurrent requests
|
||||
var tasks = Enumerable.Range(0, concurrency)
|
||||
.Select(_ => client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest()));
|
||||
|
||||
var responses = await Task.WhenAll(tasks);
|
||||
|
||||
// Assert - All responses should be valid HTTP responses
|
||||
foreach (var response in responses)
|
||||
{
|
||||
var validStatuses = new[]
|
||||
{
|
||||
HttpStatusCode.OK,
|
||||
HttpStatusCode.Accepted,
|
||||
HttpStatusCode.TooManyRequests,
|
||||
HttpStatusCode.ServiceUnavailable
|
||||
};
|
||||
|
||||
response.StatusCode.Should().BeOneOf(validStatuses,
|
||||
$"Response should be a valid status code for concurrency level {concurrency}");
|
||||
}
|
||||
|
||||
// Calculate success rate
|
||||
var successCount = responses.Count(r =>
|
||||
r.StatusCode == HttpStatusCode.OK || r.StatusCode == HttpStatusCode.Accepted);
|
||||
|
||||
var successRate = (double)successCount / responses.Length;
|
||||
Console.WriteLine($"Concurrency {concurrency}: Success rate = {successRate:P2}");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,124 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// RouterTestFixture.cs
|
||||
// Sprint: SPRINT_5100_0005_0001_router_chaos_suite
|
||||
// Task: T2 - Backpressure Verification Tests
|
||||
// Description: Test fixture for router chaos testing with Valkey support.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Net.Http.Json;
|
||||
|
||||
namespace StellaOps.Chaos.Router.Tests.Fixtures;
|
||||
|
||||
/// <summary>
|
||||
/// Test fixture providing an HTTP client for router chaos testing.
|
||||
/// </summary>
|
||||
public class RouterTestFixture : IAsyncLifetime
|
||||
{
|
||||
private readonly HttpClient _client;
|
||||
private readonly string _routerUrl;
|
||||
|
||||
public RouterTestFixture()
|
||||
{
|
||||
_routerUrl = Environment.GetEnvironmentVariable("ROUTER_URL") ?? "http://localhost:8080";
|
||||
|
||||
_client = new HttpClient
|
||||
{
|
||||
BaseAddress = new Uri(_routerUrl),
|
||||
Timeout = TimeSpan.FromSeconds(30)
|
||||
};
|
||||
}
|
||||
|
||||
public HttpClient CreateClient() => _client;
|
||||
|
||||
public string RouterUrl => _routerUrl;
|
||||
|
||||
/// <summary>
|
||||
/// Configure router with lower limits for overload testing.
|
||||
/// </summary>
|
||||
public async Task ConfigureLowLimitsAsync()
|
||||
{
|
||||
// In real scenario, this would configure the router via admin endpoint
|
||||
// For now, assume limits are pre-configured for chaos testing
|
||||
await Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Create a scan request payload.
|
||||
/// </summary>
|
||||
public static HttpContent CreateScanRequest(string? scanId = null)
|
||||
{
|
||||
var request = new
|
||||
{
|
||||
image = "alpine:latest",
|
||||
scanId = scanId ?? Guid.NewGuid().ToString(),
|
||||
timestamp = DateTimeOffset.UtcNow.ToString("O")
|
||||
};
|
||||
|
||||
return JsonContent.Create(request);
|
||||
}
|
||||
|
||||
public Task InitializeAsync()
|
||||
{
|
||||
// Verify router is reachable
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public Task DisposeAsync()
|
||||
{
|
||||
_client.Dispose();
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extended fixture with Valkey container support for failure injection.
|
||||
/// </summary>
|
||||
public class RouterWithValkeyFixture : RouterTestFixture
|
||||
{
|
||||
private Testcontainers.Redis.RedisContainer? _valkeyContainer;
|
||||
private bool _valkeyRunning;
|
||||
|
||||
public async Task StartValkeyAsync()
|
||||
{
|
||||
if (_valkeyContainer is null)
|
||||
{
|
||||
_valkeyContainer = new Testcontainers.Redis.RedisBuilder()
|
||||
.WithImage("valkey/valkey:7-alpine")
|
||||
.WithName($"chaos-valkey-{Guid.NewGuid():N}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (!_valkeyRunning)
|
||||
{
|
||||
await _valkeyContainer.StartAsync();
|
||||
_valkeyRunning = true;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task StopValkeyAsync()
|
||||
{
|
||||
if (_valkeyContainer is not null && _valkeyRunning)
|
||||
{
|
||||
await _valkeyContainer.StopAsync();
|
||||
_valkeyRunning = false;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task ConfigureValkeyLatencyAsync(TimeSpan latency)
|
||||
{
|
||||
// Configure artificial latency via Valkey DEBUG SLEEP
|
||||
// In production, use network simulation tools like tc or toxiproxy
|
||||
await Task.CompletedTask;
|
||||
}
|
||||
|
||||
public new async Task DisposeAsync()
|
||||
{
|
||||
if (_valkeyContainer is not null)
|
||||
{
|
||||
await _valkeyContainer.StopAsync();
|
||||
await _valkeyContainer.DisposeAsync();
|
||||
}
|
||||
|
||||
await base.DisposeAsync();
|
||||
}
|
||||
}
|
||||
298
tests/chaos/StellaOps.Chaos.Router.Tests/RecoveryTests.cs
Normal file
298
tests/chaos/StellaOps.Chaos.Router.Tests/RecoveryTests.cs
Normal file
@@ -0,0 +1,298 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// RecoveryTests.cs
|
||||
// Sprint: SPRINT_5100_0005_0001_router_chaos_suite
|
||||
// Task: T3 - Recovery and Resilience Tests
|
||||
// Description: Test router recovery after load spikes.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Collections.Concurrent;
|
||||
using System.Diagnostics;
|
||||
using System.Net;
|
||||
using FluentAssertions;
|
||||
using StellaOps.Chaos.Router.Tests.Fixtures;
|
||||
|
||||
namespace StellaOps.Chaos.Router.Tests;
|
||||
|
||||
[Trait("Category", "Chaos")]
|
||||
[Trait("Category", "Router")]
|
||||
[Trait("Category", "Recovery")]
|
||||
public class RecoveryTests : IClassFixture<RouterTestFixture>
|
||||
{
|
||||
private readonly RouterTestFixture _fixture;
|
||||
|
||||
public RecoveryTests(RouterTestFixture fixture)
|
||||
{
|
||||
_fixture = fixture;
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Router_AfterSpike_RecoveryWithin30Seconds()
|
||||
{
|
||||
// Arrange
|
||||
var client = _fixture.CreateClient();
|
||||
var stopwatch = Stopwatch.StartNew();
|
||||
|
||||
// Phase 1: Verify normal operation
|
||||
var normalResponse = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
|
||||
var normalWorking = normalResponse.IsSuccessStatusCode ||
|
||||
normalResponse.StatusCode == HttpStatusCode.TooManyRequests;
|
||||
|
||||
// Phase 2: Create load spike
|
||||
await CreateLoadSpikeAsync(client, requestCount: 500, durationSeconds: 5);
|
||||
|
||||
// Phase 3: Measure recovery
|
||||
var recovered = false;
|
||||
var recoveryStart = Stopwatch.StartNew();
|
||||
|
||||
while (recoveryStart.Elapsed < TimeSpan.FromSeconds(60))
|
||||
{
|
||||
var response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
|
||||
|
||||
if (response.IsSuccessStatusCode)
|
||||
{
|
||||
recovered = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// If throttled, wait before retry
|
||||
if (response.StatusCode == HttpStatusCode.TooManyRequests)
|
||||
{
|
||||
var retryAfter = response.Headers.GetValues("Retry-After").FirstOrDefault();
|
||||
if (int.TryParse(retryAfter, out var waitSeconds))
|
||||
{
|
||||
await Task.Delay(TimeSpan.FromSeconds(Math.Min(waitSeconds, 5)));
|
||||
}
|
||||
else
|
||||
{
|
||||
await Task.Delay(1000);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
await Task.Delay(1000);
|
||||
}
|
||||
}
|
||||
|
||||
recoveryStart.Stop();
|
||||
|
||||
// Assert
|
||||
if (normalWorking)
|
||||
{
|
||||
recovered.Should().BeTrue("Router should recover after spike");
|
||||
recoveryStart.Elapsed.Should().BeLessThan(TimeSpan.FromSeconds(30),
|
||||
"Recovery should happen within 30 seconds");
|
||||
}
|
||||
|
||||
Console.WriteLine($"Recovery time: {recoveryStart.Elapsed.TotalSeconds:F2}s");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Router_NoDataLoss_DuringThrottling()
|
||||
{
|
||||
// Arrange
|
||||
var client = _fixture.CreateClient();
|
||||
var submittedIds = new ConcurrentBag<string>();
|
||||
var successfulIds = new ConcurrentBag<string>();
|
||||
var maxRetries = 10;
|
||||
|
||||
// Act - Submit requests with tracking and retry on throttle
|
||||
var tasks = Enumerable.Range(0, 100).Select(async i =>
|
||||
{
|
||||
var scanId = Guid.NewGuid().ToString();
|
||||
submittedIds.Add(scanId);
|
||||
|
||||
var retryCount = 0;
|
||||
HttpResponseMessage? response = null;
|
||||
|
||||
while (retryCount < maxRetries)
|
||||
{
|
||||
response = await client.PostAsync("/api/v1/scan",
|
||||
RouterTestFixture.CreateScanRequest(scanId));
|
||||
|
||||
if (response.StatusCode == HttpStatusCode.TooManyRequests)
|
||||
{
|
||||
var retryAfter = response.Headers.GetValues("Retry-After").FirstOrDefault();
|
||||
var waitSeconds = int.TryParse(retryAfter, out var ra) ? ra : 2;
|
||||
await Task.Delay(TimeSpan.FromSeconds(Math.Min(waitSeconds, 5)));
|
||||
retryCount++;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (response is not null && response.IsSuccessStatusCode)
|
||||
{
|
||||
successfulIds.Add(scanId);
|
||||
}
|
||||
|
||||
return response;
|
||||
});
|
||||
|
||||
await Task.WhenAll(tasks);
|
||||
|
||||
// Assert
|
||||
var successRate = (double)successfulIds.Count / submittedIds.Count;
|
||||
Console.WriteLine($"Success rate with retries: {successRate:P2} ({successfulIds.Count}/{submittedIds.Count})");
|
||||
|
||||
// All submitted requests should eventually succeed with proper retry logic
|
||||
successRate.Should().BeGreaterOrEqualTo(0.9,
|
||||
"At least 90% of requests should succeed with retry logic");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Router_GracefulDegradation_MaintainsPartialService()
|
||||
{
|
||||
// Arrange
|
||||
var client = _fixture.CreateClient();
|
||||
var cts = new CancellationTokenSource();
|
||||
|
||||
// Start continuous background load
|
||||
var backgroundTask = CreateContinuousLoadAsync(client, cts.Token);
|
||||
|
||||
// Allow load to stabilize
|
||||
await Task.Delay(3000);
|
||||
|
||||
// Check that some requests are still succeeding
|
||||
var successCount = 0;
|
||||
var totalChecks = 10;
|
||||
|
||||
for (var i = 0; i < totalChecks; i++)
|
||||
{
|
||||
var response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
|
||||
if (response.IsSuccessStatusCode || response.StatusCode == HttpStatusCode.Accepted)
|
||||
{
|
||||
successCount++;
|
||||
}
|
||||
await Task.Delay(100);
|
||||
}
|
||||
|
||||
cts.Cancel();
|
||||
try { await backgroundTask; } catch (OperationCanceledException) { }
|
||||
|
||||
// Assert
|
||||
successCount.Should().BeGreaterThan(0,
|
||||
"Router should maintain partial service under load");
|
||||
|
||||
Console.WriteLine($"Partial service check: {successCount}/{totalChecks} successful");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Router_LatencyBounded_DuringSpike()
|
||||
{
|
||||
// Arrange
|
||||
var client = _fixture.CreateClient();
|
||||
var latencies = new ConcurrentBag<long>();
|
||||
|
||||
// Create background load
|
||||
var cts = new CancellationTokenSource();
|
||||
var loadTask = CreateContinuousLoadAsync(client, cts.Token);
|
||||
|
||||
// Measure latencies during load
|
||||
for (var i = 0; i < 20; i++)
|
||||
{
|
||||
var sw = Stopwatch.StartNew();
|
||||
var response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
|
||||
sw.Stop();
|
||||
|
||||
latencies.Add(sw.ElapsedMilliseconds);
|
||||
await Task.Delay(100);
|
||||
}
|
||||
|
||||
cts.Cancel();
|
||||
try { await loadTask; } catch (OperationCanceledException) { }
|
||||
|
||||
// Assert
|
||||
var avgLatency = latencies.Average();
|
||||
var p95Latency = latencies.OrderBy(l => l).ElementAt((int)(latencies.Count * 0.95));
|
||||
|
||||
Console.WriteLine($"Latency during load: Avg={avgLatency:F0}ms, P95={p95Latency}ms");
|
||||
|
||||
// P95 latency should be bounded (allowing for throttle wait times)
|
||||
p95Latency.Should().BeLessThan(10000,
|
||||
"95th percentile latency should be bounded under load");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Router_QueueDepth_DoesNotGrowUnbounded()
|
||||
{
|
||||
// Arrange
|
||||
var client = _fixture.CreateClient();
|
||||
|
||||
// Create significant load
|
||||
var tasks = Enumerable.Range(0, 200)
|
||||
.Select(_ => client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest()));
|
||||
|
||||
await Task.WhenAll(tasks);
|
||||
|
||||
// Check metrics for queue depth
|
||||
var metricsResponse = await client.GetAsync("/metrics");
|
||||
|
||||
if (metricsResponse.IsSuccessStatusCode)
|
||||
{
|
||||
var metrics = await metricsResponse.Content.ReadAsStringAsync();
|
||||
|
||||
// Look for queue depth metric
|
||||
if (metrics.Contains("queue_depth") || metrics.Contains("pending_requests"))
|
||||
{
|
||||
// Queue depth should be reasonable after burst
|
||||
Console.WriteLine("Queue metrics found in /metrics endpoint");
|
||||
}
|
||||
}
|
||||
|
||||
// If we got here without timeout, queue is not growing unbounded
|
||||
}
|
||||
|
||||
private static async Task CreateLoadSpikeAsync(HttpClient client, int requestCount, int durationSeconds)
|
||||
{
|
||||
var cts = new CancellationTokenSource(TimeSpan.FromSeconds(durationSeconds));
|
||||
var tasks = new List<Task>();
|
||||
|
||||
try
|
||||
{
|
||||
for (var i = 0; i < requestCount && !cts.Token.IsCancellationRequested; i++)
|
||||
{
|
||||
tasks.Add(client.PostAsync("/api/v1/scan",
|
||||
RouterTestFixture.CreateScanRequest(),
|
||||
cts.Token));
|
||||
|
||||
// Small delay to spread requests
|
||||
if (i % 50 == 0)
|
||||
{
|
||||
await Task.Delay(10, cts.Token);
|
||||
}
|
||||
}
|
||||
|
||||
await Task.WhenAll(tasks.Where(t => !t.IsCanceled));
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Expected when duration expires
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task CreateContinuousLoadAsync(HttpClient client, CancellationToken ct)
|
||||
{
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Fire-and-forget requests
|
||||
_ = client.PostAsync("/api/v1/scan",
|
||||
RouterTestFixture.CreateScanRequest(),
|
||||
ct);
|
||||
|
||||
await Task.Delay(50, ct);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
break;
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Ignore errors during load generation
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<IsPackable>false</IsPackable>
|
||||
<RootNamespace>StellaOps.Chaos.Router.Tests</RootNamespace>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="FluentAssertions" Version="8.0.0" />
|
||||
<PackageReference Include="Microsoft.AspNetCore.Mvc.Testing" Version="10.0.0" />
|
||||
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.13.0" />
|
||||
<PackageReference Include="Testcontainers" Version="4.3.0" />
|
||||
<PackageReference Include="Testcontainers.Redis" Version="4.3.0" />
|
||||
<PackageReference Include="xunit" Version="3.0.0" />
|
||||
<PackageReference Include="xunit.runner.visualstudio" Version="3.0.0">
|
||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
217
tests/chaos/StellaOps.Chaos.Router.Tests/ValkeyFailureTests.cs
Normal file
217
tests/chaos/StellaOps.Chaos.Router.Tests/ValkeyFailureTests.cs
Normal file
@@ -0,0 +1,217 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// ValkeyFailureTests.cs
|
||||
// Sprint: SPRINT_5100_0005_0001_router_chaos_suite
|
||||
// Task: T4 - Valkey Failure Injection
|
||||
// Description: Test router behavior when Valkey cache fails.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Diagnostics;
|
||||
using System.Net;
|
||||
using FluentAssertions;
|
||||
using StellaOps.Chaos.Router.Tests.Fixtures;
|
||||
|
||||
namespace StellaOps.Chaos.Router.Tests;
|
||||
|
||||
[Trait("Category", "Chaos")]
|
||||
[Trait("Category", "Valkey")]
|
||||
[Collection("ValkeyTests")]
|
||||
public class ValkeyFailureTests : IClassFixture<RouterWithValkeyFixture>, IAsyncLifetime
|
||||
{
|
||||
private readonly RouterWithValkeyFixture _fixture;
|
||||
|
||||
public ValkeyFailureTests(RouterWithValkeyFixture fixture)
|
||||
{
|
||||
_fixture = fixture;
|
||||
}
|
||||
|
||||
public async Task InitializeAsync()
|
||||
{
|
||||
await _fixture.StartValkeyAsync();
|
||||
}
|
||||
|
||||
public Task DisposeAsync()
|
||||
{
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Router_ValkeyDown_FallsBackToLocal()
|
||||
{
|
||||
// Arrange
|
||||
var client = _fixture.CreateClient();
|
||||
|
||||
// Verify normal operation with Valkey
|
||||
var response1 = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
|
||||
var initialSuccess = response1.IsSuccessStatusCode ||
|
||||
response1.StatusCode == HttpStatusCode.TooManyRequests;
|
||||
|
||||
// Kill Valkey
|
||||
await _fixture.StopValkeyAsync();
|
||||
|
||||
// Wait for router to detect Valkey is down
|
||||
await Task.Delay(2000);
|
||||
|
||||
// Act - Router should degrade gracefully
|
||||
var response2 = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
|
||||
|
||||
// Assert - Should still work with local rate limiter or return controlled error
|
||||
var validStatuses = new[]
|
||||
{
|
||||
HttpStatusCode.OK,
|
||||
HttpStatusCode.Accepted,
|
||||
HttpStatusCode.TooManyRequests,
|
||||
HttpStatusCode.ServiceUnavailable
|
||||
};
|
||||
|
||||
response2.StatusCode.Should().BeOneOf(validStatuses,
|
||||
"Router should fall back to local rate limiting when Valkey is down");
|
||||
|
||||
// Restore Valkey for other tests
|
||||
await _fixture.StartValkeyAsync();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Router_ValkeyReconnect_ResumesDistributedLimiting()
|
||||
{
|
||||
// Arrange
|
||||
var client = _fixture.CreateClient();
|
||||
|
||||
// Kill and restart Valkey
|
||||
await _fixture.StopValkeyAsync();
|
||||
await Task.Delay(3000);
|
||||
await _fixture.StartValkeyAsync();
|
||||
await Task.Delay(2000); // Allow reconnection
|
||||
|
||||
// Act - Send some requests after Valkey restart
|
||||
var responses = new List<HttpResponseMessage>();
|
||||
for (var i = 0; i < 10; i++)
|
||||
{
|
||||
responses.Add(await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest()));
|
||||
await Task.Delay(100);
|
||||
}
|
||||
|
||||
// Assert - Requests should be processed
|
||||
var successCount = responses.Count(r =>
|
||||
r.IsSuccessStatusCode || r.StatusCode == HttpStatusCode.TooManyRequests);
|
||||
|
||||
successCount.Should().BeGreaterThan(0,
|
||||
"Router should resume processing after Valkey reconnect");
|
||||
|
||||
// Optional: Check metrics for distributed limiting active
|
||||
var metricsResponse = await client.GetAsync("/metrics");
|
||||
if (metricsResponse.IsSuccessStatusCode)
|
||||
{
|
||||
var metrics = await metricsResponse.Content.ReadAsStringAsync();
|
||||
Console.WriteLine("Metrics available after Valkey reconnect");
|
||||
// Log whether distributed backend is active
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Router_ValkeyLatency_DoesNotBlock()
|
||||
{
|
||||
// Arrange
|
||||
await _fixture.ConfigureValkeyLatencyAsync(TimeSpan.FromSeconds(2));
|
||||
|
||||
var client = _fixture.CreateClient();
|
||||
var stopwatch = Stopwatch.StartNew();
|
||||
|
||||
// Act
|
||||
var response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
|
||||
|
||||
stopwatch.Stop();
|
||||
|
||||
// Assert - Request should complete without waiting for slow Valkey
|
||||
// The router should have a timeout for cache operations
|
||||
stopwatch.Elapsed.Should().BeLessThan(TimeSpan.FromSeconds(5),
|
||||
"Slow Valkey should not significantly block request processing");
|
||||
|
||||
// Request should still be valid
|
||||
var validStatuses = new[]
|
||||
{
|
||||
HttpStatusCode.OK,
|
||||
HttpStatusCode.Accepted,
|
||||
HttpStatusCode.TooManyRequests,
|
||||
HttpStatusCode.ServiceUnavailable
|
||||
};
|
||||
|
||||
response.StatusCode.Should().BeOneOf(validStatuses);
|
||||
|
||||
Console.WriteLine($"Request completed in {stopwatch.ElapsedMilliseconds}ms with slow Valkey");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Router_ValkeyFlap_HandlesGracefully()
|
||||
{
|
||||
// Arrange
|
||||
var client = _fixture.CreateClient();
|
||||
var successCount = 0;
|
||||
var errorCount = 0;
|
||||
|
||||
// Act - Simulate Valkey flapping
|
||||
for (var cycle = 0; cycle < 3; cycle++)
|
||||
{
|
||||
// Valkey up
|
||||
await _fixture.StartValkeyAsync();
|
||||
await Task.Delay(1000);
|
||||
|
||||
for (var i = 0; i < 5; i++)
|
||||
{
|
||||
var response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
|
||||
if (response.IsSuccessStatusCode) successCount++;
|
||||
else errorCount++;
|
||||
}
|
||||
|
||||
// Valkey down
|
||||
await _fixture.StopValkeyAsync();
|
||||
await Task.Delay(1000);
|
||||
|
||||
for (var i = 0; i < 5; i++)
|
||||
{
|
||||
var response = await client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest());
|
||||
if (response.IsSuccessStatusCode) successCount++;
|
||||
else if (response.StatusCode == HttpStatusCode.TooManyRequests)
|
||||
successCount++; // Throttled is acceptable
|
||||
else errorCount++;
|
||||
}
|
||||
}
|
||||
|
||||
// Assert
|
||||
var totalRequests = successCount + errorCount;
|
||||
var successRate = (double)successCount / totalRequests;
|
||||
|
||||
Console.WriteLine($"Valkey flap test: {successCount}/{totalRequests} successful ({successRate:P2})");
|
||||
|
||||
successRate.Should().BeGreaterOrEqualTo(0.5,
|
||||
"Router should handle at least 50% of requests during Valkey flapping");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Router_ValkeyConnectionExhaustion_DoesNotCrash()
|
||||
{
|
||||
// Arrange
|
||||
var client = _fixture.CreateClient();
|
||||
|
||||
// Create many parallel requests that might exhaust Valkey connections
|
||||
var tasks = Enumerable.Range(0, 500)
|
||||
.Select(_ => client.PostAsync("/api/v1/scan", RouterTestFixture.CreateScanRequest()));
|
||||
|
||||
// Act
|
||||
var responses = await Task.WhenAll(tasks);
|
||||
|
||||
// Assert - Router should not crash
|
||||
var validResponses = responses.Count(r =>
|
||||
r.StatusCode == HttpStatusCode.OK ||
|
||||
r.StatusCode == HttpStatusCode.Accepted ||
|
||||
r.StatusCode == HttpStatusCode.TooManyRequests ||
|
||||
r.StatusCode == HttpStatusCode.ServiceUnavailable);
|
||||
|
||||
validResponses.Should().Be(responses.Length,
|
||||
"All responses should be valid HTTP responses");
|
||||
|
||||
// Verify router is still responsive after burst
|
||||
var healthCheck = await client.GetAsync("/health");
|
||||
// Router health endpoint should respond
|
||||
Console.WriteLine($"Health check after burst: {healthCheck.StatusCode}");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user