up

2025-11-27 15:05:48 +02:00
parent 4831c7fcb0
commit e950474a77
278 changed files with 81498 additions and 672 deletions
--- a/src/Notifier/StellaOps.Notifier/StellaOps.Notifier.Tests/Observability/ChaosTestRunnerTests.cs
+++ b/src/Notifier/StellaOps.Notifier/StellaOps.Notifier.Tests/Observability/ChaosTestRunnerTests.cs
@@ -0,0 +1,492 @@
+using Microsoft.Extensions.Logging.Abstractions;
+using Microsoft.Extensions.Options;
+using Microsoft.Extensions.Time.Testing;
+using StellaOps.Notifier.Worker.Observability;
+
+namespace StellaOps.Notifier.Tests.Observability;
+
+public class ChaosTestRunnerTests
+{
+    private readonly FakeTimeProvider _timeProvider;
+    private readonly ChaosTestOptions _options;
+    private readonly InMemoryChaosTestRunner _runner;
+
+    public ChaosTestRunnerTests()
+    {
+        _timeProvider = new FakeTimeProvider(DateTimeOffset.UtcNow);
+        _options = new ChaosTestOptions
+        {
+            Enabled = true,
+            MaxConcurrentExperiments = 5,
+            MaxExperimentDuration = TimeSpan.FromHours(1),
+            RequireTenantTarget = false
+        };
+        _runner = new InMemoryChaosTestRunner(
+            Options.Create(_options),
+            _timeProvider,
+            NullLogger<InMemoryChaosTestRunner>.Instance);
+    }
+
+    [Fact]
+    public async Task StartExperimentAsync_CreatesExperiment()
+    {
+        // Arrange
+        var config = new ChaosExperimentConfig
+        {
+            Name = "Test Outage",
+            InitiatedBy = "test-user",
+            TargetChannelTypes = ["email"],
+            FaultType = ChaosFaultType.Outage,
+            Duration = TimeSpan.FromMinutes(5)
+        };
+
+        // Act
+        var experiment = await _runner.StartExperimentAsync(config);
+
+        // Assert
+        Assert.NotNull(experiment);
+        Assert.Equal(ChaosExperimentStatus.Running, experiment.Status);
+        Assert.Equal("Test Outage", experiment.Config.Name);
+        Assert.NotNull(experiment.StartedAt);
+    }
+
+    [Fact]
+    public async Task StartExperimentAsync_WhenDisabled_Throws()
+    {
+        // Arrange
+        var disabledOptions = new ChaosTestOptions { Enabled = false };
+        var runner = new InMemoryChaosTestRunner(
+            Options.Create(disabledOptions),
+            _timeProvider,
+            NullLogger<InMemoryChaosTestRunner>.Instance);
+
+        var config = new ChaosExperimentConfig
+        {
+            Name = "Test",
+            InitiatedBy = "test-user",
+            FaultType = ChaosFaultType.Outage
+        };
+
+        // Act & Assert
+        await Assert.ThrowsAsync<InvalidOperationException>(() => runner.StartExperimentAsync(config));
+    }
+
+    [Fact]
+    public async Task StartExperimentAsync_ExceedsMaxDuration_Throws()
+    {
+        // Arrange
+        var config = new ChaosExperimentConfig
+        {
+            Name = "Long Experiment",
+            InitiatedBy = "test-user",
+            FaultType = ChaosFaultType.Outage,
+            Duration = TimeSpan.FromHours(2) // Exceeds max of 1 hour
+        };
+
+        // Act & Assert
+        await Assert.ThrowsAsync<InvalidOperationException>(() => _runner.StartExperimentAsync(config));
+    }
+
+    [Fact]
+    public async Task StartExperimentAsync_MaxConcurrentReached_Throws()
+    {
+        // Arrange - start max number of experiments
+        for (var i = 0; i < 5; i++)
+        {
+            await _runner.StartExperimentAsync(new ChaosExperimentConfig
+            {
+                Name = $"Experiment {i}",
+                InitiatedBy = "test-user",
+                FaultType = ChaosFaultType.Outage
+            });
+        }
+
+        // Act & Assert
+        await Assert.ThrowsAsync<InvalidOperationException>(() =>
+            _runner.StartExperimentAsync(new ChaosExperimentConfig
+            {
+                Name = "One too many",
+                InitiatedBy = "test-user",
+                FaultType = ChaosFaultType.Outage
+            }));
+    }
+
+    [Fact]
+    public async Task StopExperimentAsync_StopsExperiment()
+    {
+        // Arrange
+        var experiment = await _runner.StartExperimentAsync(new ChaosExperimentConfig
+        {
+            Name = "Test",
+            InitiatedBy = "test-user",
+            FaultType = ChaosFaultType.Outage
+        });
+
+        // Act
+        await _runner.StopExperimentAsync(experiment.Id);
+
+        // Assert
+        var stopped = await _runner.GetExperimentAsync(experiment.Id);
+        Assert.NotNull(stopped);
+        Assert.Equal(ChaosExperimentStatus.Stopped, stopped.Status);
+        Assert.NotNull(stopped.EndedAt);
+    }
+
+    [Fact]
+    public async Task ShouldFailAsync_OutageFault_ReturnsFault()
+    {
+        // Arrange
+        await _runner.StartExperimentAsync(new ChaosExperimentConfig
+        {
+            Name = "Email Outage",
+            InitiatedBy = "test-user",
+            TenantId = "tenant1",
+            TargetChannelTypes = ["email"],
+            FaultType = ChaosFaultType.Outage
+        });
+
+        // Act
+        var decision = await _runner.ShouldFailAsync("tenant1", "email");
+
+        // Assert
+        Assert.True(decision.ShouldFail);
+        Assert.Equal(ChaosFaultType.Outage, decision.FaultType);
+        Assert.NotNull(decision.InjectedError);
+    }
+
+    [Fact]
+    public async Task ShouldFailAsync_NoMatchingExperiment_ReturnsNoFault()
+    {
+        // Arrange
+        await _runner.StartExperimentAsync(new ChaosExperimentConfig
+        {
+            Name = "Email Outage",
+            InitiatedBy = "test-user",
+            TenantId = "tenant1",
+            TargetChannelTypes = ["email"],
+            FaultType = ChaosFaultType.Outage
+        });
+
+        // Act - different tenant
+        var decision = await _runner.ShouldFailAsync("tenant2", "email");
+
+        // Assert
+        Assert.False(decision.ShouldFail);
+    }
+
+    [Fact]
+    public async Task ShouldFailAsync_WrongChannelType_ReturnsNoFault()
+    {
+        // Arrange
+        await _runner.StartExperimentAsync(new ChaosExperimentConfig
+        {
+            Name = "Email Outage",
+            InitiatedBy = "test-user",
+            TenantId = "tenant1",
+            TargetChannelTypes = ["email"],
+            FaultType = ChaosFaultType.Outage
+        });
+
+        // Act - different channel type
+        var decision = await _runner.ShouldFailAsync("tenant1", "slack");
+
+        // Assert
+        Assert.False(decision.ShouldFail);
+    }
+
+    [Fact]
+    public async Task ShouldFailAsync_LatencyFault_InjectsLatency()
+    {
+        // Arrange
+        await _runner.StartExperimentAsync(new ChaosExperimentConfig
+        {
+            Name = "Latency Test",
+            InitiatedBy = "test-user",
+            TenantId = "tenant1",
+            TargetChannelTypes = ["email"],
+            FaultType = ChaosFaultType.Latency,
+            FaultConfig = new ChaosFaultConfig
+            {
+                MinLatency = TimeSpan.FromSeconds(1),
+                MaxLatency = TimeSpan.FromSeconds(5)
+            }
+        });
+
+        // Act
+        var decision = await _runner.ShouldFailAsync("tenant1", "email");
+
+        // Assert
+        Assert.False(decision.ShouldFail); // Latency doesn't cause failure
+        Assert.NotNull(decision.InjectedLatency);
+        Assert.InRange(decision.InjectedLatency.Value.TotalSeconds, 1, 5);
+    }
+
+    [Fact]
+    public async Task ShouldFailAsync_PartialFailure_UsesFailureRate()
+    {
+        // Arrange
+        await _runner.StartExperimentAsync(new ChaosExperimentConfig
+        {
+            Name = "Partial Failure",
+            InitiatedBy = "test-user",
+            TenantId = "tenant1",
+            TargetChannelTypes = ["email"],
+            FaultType = ChaosFaultType.PartialFailure,
+            FaultConfig = new ChaosFaultConfig
+            {
+                FailureRate = 0.5,
+                Seed = 42 // Fixed seed for reproducibility
+            }
+        });
+
+        // Act - run multiple times
+        var failures = 0;
+        for (var i = 0; i < 100; i++)
+        {
+            var decision = await _runner.ShouldFailAsync("tenant1", "email");
+            if (decision.ShouldFail) failures++;
+        }
+
+        // Assert - should be roughly 50% failures (with some variance)
+        Assert.InRange(failures, 30, 70);
+    }
+
+    [Fact]
+    public async Task ShouldFailAsync_RateLimit_EnforcesLimit()
+    {
+        // Arrange
+        await _runner.StartExperimentAsync(new ChaosExperimentConfig
+        {
+            Name = "Rate Limit",
+            InitiatedBy = "test-user",
+            TenantId = "tenant1",
+            TargetChannelTypes = ["email"],
+            FaultType = ChaosFaultType.RateLimit,
+            FaultConfig = new ChaosFaultConfig
+            {
+                RateLimitPerMinute = 5
+            }
+        });
+
+        // Act - first 5 should pass
+        for (var i = 0; i < 5; i++)
+        {
+            var decision = await _runner.ShouldFailAsync("tenant1", "email");
+            Assert.False(decision.ShouldFail);
+        }
+
+        // 6th should fail
+        var failedDecision = await _runner.ShouldFailAsync("tenant1", "email");
+
+        // Assert
+        Assert.True(failedDecision.ShouldFail);
+        Assert.Equal(429, failedDecision.InjectedStatusCode);
+    }
+
+    [Fact]
+    public async Task ShouldFailAsync_ExperimentExpires_StopsMatching()
+    {
+        // Arrange
+        await _runner.StartExperimentAsync(new ChaosExperimentConfig
+        {
+            Name = "Short Experiment",
+            InitiatedBy = "test-user",
+            TenantId = "tenant1",
+            TargetChannelTypes = ["email"],
+            FaultType = ChaosFaultType.Outage,
+            Duration = TimeSpan.FromMinutes(5)
+        });
+
+        // Act - advance time past duration
+        _timeProvider.Advance(TimeSpan.FromMinutes(10));
+        var decision = await _runner.ShouldFailAsync("tenant1", "email");
+
+        // Assert
+        Assert.False(decision.ShouldFail);
+    }
+
+    [Fact]
+    public async Task ShouldFailAsync_MaxOperationsReached_StopsMatching()
+    {
+        // Arrange
+        await _runner.StartExperimentAsync(new ChaosExperimentConfig
+        {
+            Name = "Limited Experiment",
+            InitiatedBy = "test-user",
+            TenantId = "tenant1",
+            TargetChannelTypes = ["email"],
+            FaultType = ChaosFaultType.Outage,
+            MaxAffectedOperations = 3
+        });
+
+        // Act - consume all operations
+        for (var i = 0; i < 3; i++)
+        {
+            var d = await _runner.ShouldFailAsync("tenant1", "email");
+            Assert.True(d.ShouldFail);
+        }
+
+        // 4th should not match
+        var decision = await _runner.ShouldFailAsync("tenant1", "email");
+
+        // Assert
+        Assert.False(decision.ShouldFail);
+    }
+
+    [Fact]
+    public async Task RecordOutcomeAsync_RecordsOutcome()
+    {
+        // Arrange
+        var experiment = await _runner.StartExperimentAsync(new ChaosExperimentConfig
+        {
+            Name = "Test",
+            InitiatedBy = "test-user",
+            FaultType = ChaosFaultType.Outage
+        });
+
+        // Act
+        await _runner.RecordOutcomeAsync(experiment.Id, new ChaosOutcome
+        {
+            Type = ChaosOutcomeType.FaultInjected,
+            ChannelType = "email",
+            TenantId = "tenant1",
+            FallbackTriggered = true
+        });
+
+        var results = await _runner.GetResultsAsync(experiment.Id);
+
+        // Assert
+        Assert.Equal(1, results.TotalAffected);
+        Assert.Equal(1, results.FailedOperations);
+        Assert.Equal(1, results.FallbackTriggered);
+    }
+
+    [Fact]
+    public async Task GetResultsAsync_CalculatesStatistics()
+    {
+        // Arrange
+        var experiment = await _runner.StartExperimentAsync(new ChaosExperimentConfig
+        {
+            Name = "Test",
+            InitiatedBy = "test-user",
+            FaultType = ChaosFaultType.Latency
+        });
+
+        // Record various outcomes
+        await _runner.RecordOutcomeAsync(experiment.Id, new ChaosOutcome
+        {
+            Type = ChaosOutcomeType.LatencyInjected,
+            ChannelType = "email",
+            Duration = TimeSpan.FromMilliseconds(100)
+        });
+        await _runner.RecordOutcomeAsync(experiment.Id, new ChaosOutcome
+        {
+            Type = ChaosOutcomeType.LatencyInjected,
+            ChannelType = "email",
+            Duration = TimeSpan.FromMilliseconds(200)
+        });
+        await _runner.RecordOutcomeAsync(experiment.Id, new ChaosOutcome
+        {
+            Type = ChaosOutcomeType.FaultInjected,
+            ChannelType = "slack",
+            FallbackTriggered = true
+        });
+
+        // Act
+        var results = await _runner.GetResultsAsync(experiment.Id);
+
+        // Assert
+        Assert.Equal(3, results.TotalAffected);
+        Assert.Equal(1, results.FailedOperations);
+        Assert.Equal(1, results.FallbackTriggered);
+        Assert.NotNull(results.AverageInjectedLatency);
+        Assert.Equal(150, results.AverageInjectedLatency.Value.TotalMilliseconds);
+        Assert.Equal(2, results.ByChannelType["email"].TotalAffected);
+        Assert.Equal(1, results.ByChannelType["slack"].TotalAffected);
+    }
+
+    [Fact]
+    public async Task ListExperimentsAsync_FiltersByStatus()
+    {
+        // Arrange
+        var running = await _runner.StartExperimentAsync(new ChaosExperimentConfig
+        {
+            Name = "Running",
+            InitiatedBy = "test-user",
+            FaultType = ChaosFaultType.Outage
+        });
+
+        var toStop = await _runner.StartExperimentAsync(new ChaosExperimentConfig
+        {
+            Name = "To Stop",
+            InitiatedBy = "test-user",
+            FaultType = ChaosFaultType.Outage
+        });
+        await _runner.StopExperimentAsync(toStop.Id);
+
+        // Act
+        var runningList = await _runner.ListExperimentsAsync(ChaosExperimentStatus.Running);
+        var stoppedList = await _runner.ListExperimentsAsync(ChaosExperimentStatus.Stopped);
+
+        // Assert
+        Assert.Single(runningList);
+        Assert.Single(stoppedList);
+        Assert.Equal(running.Id, runningList[0].Id);
+        Assert.Equal(toStop.Id, stoppedList[0].Id);
+    }
+
+    [Fact]
+    public async Task CleanupAsync_RemovesOldExperiments()
+    {
+        // Arrange
+        var experiment = await _runner.StartExperimentAsync(new ChaosExperimentConfig
+        {
+            Name = "Old Experiment",
+            InitiatedBy = "test-user",
+            FaultType = ChaosFaultType.Outage,
+            Duration = TimeSpan.FromMinutes(5)
+        });
+
+        // Complete the experiment
+        _timeProvider.Advance(TimeSpan.FromMinutes(10));
+        await _runner.GetExperimentAsync(experiment.Id); // Triggers status update
+
+        // Advance time beyond cleanup threshold
+        _timeProvider.Advance(TimeSpan.FromDays(10));
+
+        // Act
+        var removed = await _runner.CleanupAsync(TimeSpan.FromDays(7));
+
+        // Assert
+        Assert.Equal(1, removed);
+        var result = await _runner.GetExperimentAsync(experiment.Id);
+        Assert.Null(result);
+    }
+
+    [Fact]
+    public async Task ErrorResponseFault_ReturnsConfiguredStatusCode()
+    {
+        // Arrange
+        await _runner.StartExperimentAsync(new ChaosExperimentConfig
+        {
+            Name = "Error Response",
+            InitiatedBy = "test-user",
+            TenantId = "tenant1",
+            TargetChannelTypes = ["email"],
+            FaultType = ChaosFaultType.ErrorResponse,
+            FaultConfig = new ChaosFaultConfig
+            {
+                ErrorStatusCode = 503,
+                ErrorMessage = "Service Unavailable"
+            }
+        });
+
+        // Act
+        var decision = await _runner.ShouldFailAsync("tenant1", "email");
+
+        // Assert
+        Assert.True(decision.ShouldFail);
+        Assert.Equal(503, decision.InjectedStatusCode);
+        Assert.Contains("Service Unavailable", decision.InjectedError);
+    }
+}