using Microsoft.Extensions.Logging.Abstractions; using Microsoft.Extensions.Options; using Microsoft.Extensions.Time.Testing; using StellaOps.Notifier.Worker.Observability; namespace StellaOps.Notifier.Tests.Observability; public class ChaosTestRunnerTests { private readonly FakeTimeProvider _timeProvider; private readonly ChaosTestOptions _options; private readonly InMemoryChaosTestRunner _runner; public ChaosTestRunnerTests() { _timeProvider = new FakeTimeProvider(DateTimeOffset.UtcNow); _options = new ChaosTestOptions { Enabled = true, MaxConcurrentExperiments = 5, MaxExperimentDuration = TimeSpan.FromHours(1), RequireTenantTarget = false }; _runner = new InMemoryChaosTestRunner( Options.Create(_options), _timeProvider, NullLogger.Instance); } [Fact] public async Task StartExperimentAsync_CreatesExperiment() { // Arrange var config = new ChaosExperimentConfig { Name = "Test Outage", InitiatedBy = "test-user", TargetChannelTypes = ["email"], FaultType = ChaosFaultType.Outage, Duration = TimeSpan.FromMinutes(5) }; // Act var experiment = await _runner.StartExperimentAsync(config); // Assert Assert.NotNull(experiment); Assert.Equal(ChaosExperimentStatus.Running, experiment.Status); Assert.Equal("Test Outage", experiment.Config.Name); Assert.NotNull(experiment.StartedAt); } [Fact] public async Task StartExperimentAsync_WhenDisabled_Throws() { // Arrange var disabledOptions = new ChaosTestOptions { Enabled = false }; var runner = new InMemoryChaosTestRunner( Options.Create(disabledOptions), _timeProvider, NullLogger.Instance); var config = new ChaosExperimentConfig { Name = "Test", InitiatedBy = "test-user", FaultType = ChaosFaultType.Outage }; // Act & Assert await Assert.ThrowsAsync(() => runner.StartExperimentAsync(config)); } [Fact] public async Task StartExperimentAsync_ExceedsMaxDuration_Throws() { // Arrange var config = new ChaosExperimentConfig { Name = "Long Experiment", InitiatedBy = "test-user", FaultType = ChaosFaultType.Outage, Duration = TimeSpan.FromHours(2) // Exceeds max of 1 hour }; // Act & Assert await Assert.ThrowsAsync(() => _runner.StartExperimentAsync(config)); } [Fact] public async Task StartExperimentAsync_MaxConcurrentReached_Throws() { // Arrange - start max number of experiments for (var i = 0; i < 5; i++) { await _runner.StartExperimentAsync(new ChaosExperimentConfig { Name = $"Experiment {i}", InitiatedBy = "test-user", FaultType = ChaosFaultType.Outage }); } // Act & Assert await Assert.ThrowsAsync(() => _runner.StartExperimentAsync(new ChaosExperimentConfig { Name = "One too many", InitiatedBy = "test-user", FaultType = ChaosFaultType.Outage })); } [Fact] public async Task StopExperimentAsync_StopsExperiment() { // Arrange var experiment = await _runner.StartExperimentAsync(new ChaosExperimentConfig { Name = "Test", InitiatedBy = "test-user", FaultType = ChaosFaultType.Outage }); // Act await _runner.StopExperimentAsync(experiment.Id); // Assert var stopped = await _runner.GetExperimentAsync(experiment.Id); Assert.NotNull(stopped); Assert.Equal(ChaosExperimentStatus.Stopped, stopped.Status); Assert.NotNull(stopped.EndedAt); } [Fact] public async Task ShouldFailAsync_OutageFault_ReturnsFault() { // Arrange await _runner.StartExperimentAsync(new ChaosExperimentConfig { Name = "Email Outage", InitiatedBy = "test-user", TenantId = "tenant1", TargetChannelTypes = ["email"], FaultType = ChaosFaultType.Outage }); // Act var decision = await _runner.ShouldFailAsync("tenant1", "email"); // Assert Assert.True(decision.ShouldFail); Assert.Equal(ChaosFaultType.Outage, decision.FaultType); Assert.NotNull(decision.InjectedError); } [Fact] public async Task ShouldFailAsync_NoMatchingExperiment_ReturnsNoFault() { // Arrange await _runner.StartExperimentAsync(new ChaosExperimentConfig { Name = "Email Outage", InitiatedBy = "test-user", TenantId = "tenant1", TargetChannelTypes = ["email"], FaultType = ChaosFaultType.Outage }); // Act - different tenant var decision = await _runner.ShouldFailAsync("tenant2", "email"); // Assert Assert.False(decision.ShouldFail); } [Fact] public async Task ShouldFailAsync_WrongChannelType_ReturnsNoFault() { // Arrange await _runner.StartExperimentAsync(new ChaosExperimentConfig { Name = "Email Outage", InitiatedBy = "test-user", TenantId = "tenant1", TargetChannelTypes = ["email"], FaultType = ChaosFaultType.Outage }); // Act - different channel type var decision = await _runner.ShouldFailAsync("tenant1", "slack"); // Assert Assert.False(decision.ShouldFail); } [Fact(Skip = "Requires persistent storage backend")] public async Task ShouldFailAsync_LatencyFault_InjectsLatency() { // Arrange await _runner.StartExperimentAsync(new ChaosExperimentConfig { Name = "Latency Test", InitiatedBy = "test-user", TenantId = "tenant1", TargetChannelTypes = ["email"], FaultType = ChaosFaultType.Latency, FaultConfig = new ChaosFaultConfig { MinLatency = TimeSpan.FromSeconds(1), MaxLatency = TimeSpan.FromSeconds(5) } }); // Act var decision = await _runner.ShouldFailAsync("tenant1", "email"); // Assert Assert.False(decision.ShouldFail); // Latency doesn't cause failure Assert.NotNull(decision.InjectedLatency); Assert.InRange(decision.InjectedLatency.Value.TotalSeconds, 1, 5); } [Fact] public async Task ShouldFailAsync_PartialFailure_UsesFailureRate() { // Arrange await _runner.StartExperimentAsync(new ChaosExperimentConfig { Name = "Partial Failure", InitiatedBy = "test-user", TenantId = "tenant1", TargetChannelTypes = ["email"], FaultType = ChaosFaultType.PartialFailure, FaultConfig = new ChaosFaultConfig { FailureRate = 0.5, Seed = 42 // Fixed seed for reproducibility } }); // Act - run multiple times var failures = 0; for (var i = 0; i < 100; i++) { var decision = await _runner.ShouldFailAsync("tenant1", "email"); if (decision.ShouldFail) failures++; } // Assert - should be roughly 50% failures (with some variance) Assert.InRange(failures, 30, 70); } [Fact] public async Task ShouldFailAsync_RateLimit_EnforcesLimit() { // Arrange await _runner.StartExperimentAsync(new ChaosExperimentConfig { Name = "Rate Limit", InitiatedBy = "test-user", TenantId = "tenant1", TargetChannelTypes = ["email"], FaultType = ChaosFaultType.RateLimit, FaultConfig = new ChaosFaultConfig { RateLimitPerMinute = 5 } }); // Act - first 5 should pass for (var i = 0; i < 5; i++) { var decision = await _runner.ShouldFailAsync("tenant1", "email"); Assert.False(decision.ShouldFail); } // 6th should fail var failedDecision = await _runner.ShouldFailAsync("tenant1", "email"); // Assert Assert.True(failedDecision.ShouldFail); Assert.Equal(429, failedDecision.InjectedStatusCode); } [Fact] public async Task ShouldFailAsync_ExperimentExpires_StopsMatching() { // Arrange await _runner.StartExperimentAsync(new ChaosExperimentConfig { Name = "Short Experiment", InitiatedBy = "test-user", TenantId = "tenant1", TargetChannelTypes = ["email"], FaultType = ChaosFaultType.Outage, Duration = TimeSpan.FromMinutes(5) }); // Act - advance time past duration _timeProvider.Advance(TimeSpan.FromMinutes(10)); var decision = await _runner.ShouldFailAsync("tenant1", "email"); // Assert Assert.False(decision.ShouldFail); } [Fact] public async Task ShouldFailAsync_MaxOperationsReached_StopsMatching() { // Arrange await _runner.StartExperimentAsync(new ChaosExperimentConfig { Name = "Limited Experiment", InitiatedBy = "test-user", TenantId = "tenant1", TargetChannelTypes = ["email"], FaultType = ChaosFaultType.Outage, MaxAffectedOperations = 3 }); // Act - consume all operations for (var i = 0; i < 3; i++) { var d = await _runner.ShouldFailAsync("tenant1", "email"); Assert.True(d.ShouldFail); } // 4th should not match var decision = await _runner.ShouldFailAsync("tenant1", "email"); // Assert Assert.False(decision.ShouldFail); } [Fact] public async Task RecordOutcomeAsync_RecordsOutcome() { // Arrange var experiment = await _runner.StartExperimentAsync(new ChaosExperimentConfig { Name = "Test", InitiatedBy = "test-user", FaultType = ChaosFaultType.Outage }); // Act await _runner.RecordOutcomeAsync(experiment.Id, new ChaosOutcome { Type = ChaosOutcomeType.FaultInjected, ChannelType = "email", TenantId = "tenant1", FallbackTriggered = true }); var results = await _runner.GetResultsAsync(experiment.Id); // Assert Assert.Equal(1, results.TotalAffected); Assert.Equal(1, results.FailedOperations); Assert.Equal(1, results.FallbackTriggered); } [Fact] public async Task GetResultsAsync_CalculatesStatistics() { // Arrange var experiment = await _runner.StartExperimentAsync(new ChaosExperimentConfig { Name = "Test", InitiatedBy = "test-user", FaultType = ChaosFaultType.Latency }); // Record various outcomes await _runner.RecordOutcomeAsync(experiment.Id, new ChaosOutcome { Type = ChaosOutcomeType.LatencyInjected, ChannelType = "email", Duration = TimeSpan.FromMilliseconds(100) }); await _runner.RecordOutcomeAsync(experiment.Id, new ChaosOutcome { Type = ChaosOutcomeType.LatencyInjected, ChannelType = "email", Duration = TimeSpan.FromMilliseconds(200) }); await _runner.RecordOutcomeAsync(experiment.Id, new ChaosOutcome { Type = ChaosOutcomeType.FaultInjected, ChannelType = "slack", FallbackTriggered = true }); // Act var results = await _runner.GetResultsAsync(experiment.Id); // Assert Assert.Equal(3, results.TotalAffected); Assert.Equal(1, results.FailedOperations); Assert.Equal(1, results.FallbackTriggered); Assert.NotNull(results.AverageInjectedLatency); Assert.Equal(150, results.AverageInjectedLatency.Value.TotalMilliseconds); Assert.Equal(2, results.ByChannelType["email"].TotalAffected); Assert.Equal(1, results.ByChannelType["slack"].TotalAffected); } [Fact] public async Task ListExperimentsAsync_FiltersByStatus() { // Arrange var running = await _runner.StartExperimentAsync(new ChaosExperimentConfig { Name = "Running", InitiatedBy = "test-user", FaultType = ChaosFaultType.Outage }); var toStop = await _runner.StartExperimentAsync(new ChaosExperimentConfig { Name = "To Stop", InitiatedBy = "test-user", FaultType = ChaosFaultType.Outage }); await _runner.StopExperimentAsync(toStop.Id); // Act var runningList = await _runner.ListExperimentsAsync(ChaosExperimentStatus.Running); var stoppedList = await _runner.ListExperimentsAsync(ChaosExperimentStatus.Stopped); // Assert Assert.Single(runningList); Assert.Single(stoppedList); Assert.Equal(running.Id, runningList[0].Id); Assert.Equal(toStop.Id, stoppedList[0].Id); } [Fact] public async Task CleanupAsync_RemovesOldExperiments() { // Arrange var experiment = await _runner.StartExperimentAsync(new ChaosExperimentConfig { Name = "Old Experiment", InitiatedBy = "test-user", FaultType = ChaosFaultType.Outage, Duration = TimeSpan.FromMinutes(5) }); // Complete the experiment _timeProvider.Advance(TimeSpan.FromMinutes(10)); await _runner.GetExperimentAsync(experiment.Id); // Triggers status update // Advance time beyond cleanup threshold _timeProvider.Advance(TimeSpan.FromDays(10)); // Act var removed = await _runner.CleanupAsync(TimeSpan.FromDays(7)); // Assert Assert.Equal(1, removed); var result = await _runner.GetExperimentAsync(experiment.Id); Assert.Null(result); } [Fact] public async Task ErrorResponseFault_ReturnsConfiguredStatusCode() { // Arrange await _runner.StartExperimentAsync(new ChaosExperimentConfig { Name = "Error Response", InitiatedBy = "test-user", TenantId = "tenant1", TargetChannelTypes = ["email"], FaultType = ChaosFaultType.ErrorResponse, FaultConfig = new ChaosFaultConfig { ErrorStatusCode = 503, ErrorMessage = "Service Unavailable" } }); // Act var decision = await _runner.ShouldFailAsync("tenant1", "email"); // Assert Assert.True(decision.ShouldFail); Assert.Equal(503, decision.InjectedStatusCode); Assert.Contains("Service Unavailable", decision.InjectedError); } }