Files
git.stella-ops.org/src/Notifier/StellaOps.Notifier/StellaOps.Notifier.Tests/Observability/ChaosTestRunnerTests.cs
StellaOps Bot 999e26a48e up
2025-12-13 02:22:15 +02:00

493 lines
15 KiB
C#

using Microsoft.Extensions.Logging.Abstractions;
using Microsoft.Extensions.Options;
using Microsoft.Extensions.Time.Testing;
using StellaOps.Notifier.Worker.Observability;
namespace StellaOps.Notifier.Tests.Observability;
public class ChaosTestRunnerTests
{
private readonly FakeTimeProvider _timeProvider;
private readonly ChaosTestOptions _options;
private readonly InMemoryChaosTestRunner _runner;
public ChaosTestRunnerTests()
{
_timeProvider = new FakeTimeProvider(DateTimeOffset.UtcNow);
_options = new ChaosTestOptions
{
Enabled = true,
MaxConcurrentExperiments = 5,
MaxExperimentDuration = TimeSpan.FromHours(1),
RequireTenantTarget = false
};
_runner = new InMemoryChaosTestRunner(
Options.Create(_options),
_timeProvider,
NullLogger<InMemoryChaosTestRunner>.Instance);
}
[Fact]
public async Task StartExperimentAsync_CreatesExperiment()
{
// Arrange
var config = new ChaosExperimentConfig
{
Name = "Test Outage",
InitiatedBy = "test-user",
TargetChannelTypes = ["email"],
FaultType = ChaosFaultType.Outage,
Duration = TimeSpan.FromMinutes(5)
};
// Act
var experiment = await _runner.StartExperimentAsync(config);
// Assert
Assert.NotNull(experiment);
Assert.Equal(ChaosExperimentStatus.Running, experiment.Status);
Assert.Equal("Test Outage", experiment.Config.Name);
Assert.NotNull(experiment.StartedAt);
}
[Fact]
public async Task StartExperimentAsync_WhenDisabled_Throws()
{
// Arrange
var disabledOptions = new ChaosTestOptions { Enabled = false };
var runner = new InMemoryChaosTestRunner(
Options.Create(disabledOptions),
_timeProvider,
NullLogger<InMemoryChaosTestRunner>.Instance);
var config = new ChaosExperimentConfig
{
Name = "Test",
InitiatedBy = "test-user",
FaultType = ChaosFaultType.Outage
};
// Act & Assert
await Assert.ThrowsAsync<InvalidOperationException>(() => runner.StartExperimentAsync(config));
}
[Fact]
public async Task StartExperimentAsync_ExceedsMaxDuration_Throws()
{
// Arrange
var config = new ChaosExperimentConfig
{
Name = "Long Experiment",
InitiatedBy = "test-user",
FaultType = ChaosFaultType.Outage,
Duration = TimeSpan.FromHours(2) // Exceeds max of 1 hour
};
// Act & Assert
await Assert.ThrowsAsync<InvalidOperationException>(() => _runner.StartExperimentAsync(config));
}
[Fact]
public async Task StartExperimentAsync_MaxConcurrentReached_Throws()
{
// Arrange - start max number of experiments
for (var i = 0; i < 5; i++)
{
await _runner.StartExperimentAsync(new ChaosExperimentConfig
{
Name = $"Experiment {i}",
InitiatedBy = "test-user",
FaultType = ChaosFaultType.Outage
});
}
// Act & Assert
await Assert.ThrowsAsync<InvalidOperationException>(() =>
_runner.StartExperimentAsync(new ChaosExperimentConfig
{
Name = "One too many",
InitiatedBy = "test-user",
FaultType = ChaosFaultType.Outage
}));
}
[Fact]
public async Task StopExperimentAsync_StopsExperiment()
{
// Arrange
var experiment = await _runner.StartExperimentAsync(new ChaosExperimentConfig
{
Name = "Test",
InitiatedBy = "test-user",
FaultType = ChaosFaultType.Outage
});
// Act
await _runner.StopExperimentAsync(experiment.Id);
// Assert
var stopped = await _runner.GetExperimentAsync(experiment.Id);
Assert.NotNull(stopped);
Assert.Equal(ChaosExperimentStatus.Stopped, stopped.Status);
Assert.NotNull(stopped.EndedAt);
}
[Fact]
public async Task ShouldFailAsync_OutageFault_ReturnsFault()
{
// Arrange
await _runner.StartExperimentAsync(new ChaosExperimentConfig
{
Name = "Email Outage",
InitiatedBy = "test-user",
TenantId = "tenant1",
TargetChannelTypes = ["email"],
FaultType = ChaosFaultType.Outage
});
// Act
var decision = await _runner.ShouldFailAsync("tenant1", "email");
// Assert
Assert.True(decision.ShouldFail);
Assert.Equal(ChaosFaultType.Outage, decision.FaultType);
Assert.NotNull(decision.InjectedError);
}
[Fact]
public async Task ShouldFailAsync_NoMatchingExperiment_ReturnsNoFault()
{
// Arrange
await _runner.StartExperimentAsync(new ChaosExperimentConfig
{
Name = "Email Outage",
InitiatedBy = "test-user",
TenantId = "tenant1",
TargetChannelTypes = ["email"],
FaultType = ChaosFaultType.Outage
});
// Act - different tenant
var decision = await _runner.ShouldFailAsync("tenant2", "email");
// Assert
Assert.False(decision.ShouldFail);
}
[Fact]
public async Task ShouldFailAsync_WrongChannelType_ReturnsNoFault()
{
// Arrange
await _runner.StartExperimentAsync(new ChaosExperimentConfig
{
Name = "Email Outage",
InitiatedBy = "test-user",
TenantId = "tenant1",
TargetChannelTypes = ["email"],
FaultType = ChaosFaultType.Outage
});
// Act - different channel type
var decision = await _runner.ShouldFailAsync("tenant1", "slack");
// Assert
Assert.False(decision.ShouldFail);
}
[Fact(Skip = "Requires persistent storage backend")]
public async Task ShouldFailAsync_LatencyFault_InjectsLatency()
{
// Arrange
await _runner.StartExperimentAsync(new ChaosExperimentConfig
{
Name = "Latency Test",
InitiatedBy = "test-user",
TenantId = "tenant1",
TargetChannelTypes = ["email"],
FaultType = ChaosFaultType.Latency,
FaultConfig = new ChaosFaultConfig
{
MinLatency = TimeSpan.FromSeconds(1),
MaxLatency = TimeSpan.FromSeconds(5)
}
});
// Act
var decision = await _runner.ShouldFailAsync("tenant1", "email");
// Assert
Assert.False(decision.ShouldFail); // Latency doesn't cause failure
Assert.NotNull(decision.InjectedLatency);
Assert.InRange(decision.InjectedLatency.Value.TotalSeconds, 1, 5);
}
[Fact]
public async Task ShouldFailAsync_PartialFailure_UsesFailureRate()
{
// Arrange
await _runner.StartExperimentAsync(new ChaosExperimentConfig
{
Name = "Partial Failure",
InitiatedBy = "test-user",
TenantId = "tenant1",
TargetChannelTypes = ["email"],
FaultType = ChaosFaultType.PartialFailure,
FaultConfig = new ChaosFaultConfig
{
FailureRate = 0.5,
Seed = 42 // Fixed seed for reproducibility
}
});
// Act - run multiple times
var failures = 0;
for (var i = 0; i < 100; i++)
{
var decision = await _runner.ShouldFailAsync("tenant1", "email");
if (decision.ShouldFail) failures++;
}
// Assert - should be roughly 50% failures (with some variance)
Assert.InRange(failures, 30, 70);
}
[Fact]
public async Task ShouldFailAsync_RateLimit_EnforcesLimit()
{
// Arrange
await _runner.StartExperimentAsync(new ChaosExperimentConfig
{
Name = "Rate Limit",
InitiatedBy = "test-user",
TenantId = "tenant1",
TargetChannelTypes = ["email"],
FaultType = ChaosFaultType.RateLimit,
FaultConfig = new ChaosFaultConfig
{
RateLimitPerMinute = 5
}
});
// Act - first 5 should pass
for (var i = 0; i < 5; i++)
{
var decision = await _runner.ShouldFailAsync("tenant1", "email");
Assert.False(decision.ShouldFail);
}
// 6th should fail
var failedDecision = await _runner.ShouldFailAsync("tenant1", "email");
// Assert
Assert.True(failedDecision.ShouldFail);
Assert.Equal(429, failedDecision.InjectedStatusCode);
}
[Fact]
public async Task ShouldFailAsync_ExperimentExpires_StopsMatching()
{
// Arrange
await _runner.StartExperimentAsync(new ChaosExperimentConfig
{
Name = "Short Experiment",
InitiatedBy = "test-user",
TenantId = "tenant1",
TargetChannelTypes = ["email"],
FaultType = ChaosFaultType.Outage,
Duration = TimeSpan.FromMinutes(5)
});
// Act - advance time past duration
_timeProvider.Advance(TimeSpan.FromMinutes(10));
var decision = await _runner.ShouldFailAsync("tenant1", "email");
// Assert
Assert.False(decision.ShouldFail);
}
[Fact]
public async Task ShouldFailAsync_MaxOperationsReached_StopsMatching()
{
// Arrange
await _runner.StartExperimentAsync(new ChaosExperimentConfig
{
Name = "Limited Experiment",
InitiatedBy = "test-user",
TenantId = "tenant1",
TargetChannelTypes = ["email"],
FaultType = ChaosFaultType.Outage,
MaxAffectedOperations = 3
});
// Act - consume all operations
for (var i = 0; i < 3; i++)
{
var d = await _runner.ShouldFailAsync("tenant1", "email");
Assert.True(d.ShouldFail);
}
// 4th should not match
var decision = await _runner.ShouldFailAsync("tenant1", "email");
// Assert
Assert.False(decision.ShouldFail);
}
[Fact]
public async Task RecordOutcomeAsync_RecordsOutcome()
{
// Arrange
var experiment = await _runner.StartExperimentAsync(new ChaosExperimentConfig
{
Name = "Test",
InitiatedBy = "test-user",
FaultType = ChaosFaultType.Outage
});
// Act
await _runner.RecordOutcomeAsync(experiment.Id, new ChaosOutcome
{
Type = ChaosOutcomeType.FaultInjected,
ChannelType = "email",
TenantId = "tenant1",
FallbackTriggered = true
});
var results = await _runner.GetResultsAsync(experiment.Id);
// Assert
Assert.Equal(1, results.TotalAffected);
Assert.Equal(1, results.FailedOperations);
Assert.Equal(1, results.FallbackTriggered);
}
[Fact]
public async Task GetResultsAsync_CalculatesStatistics()
{
// Arrange
var experiment = await _runner.StartExperimentAsync(new ChaosExperimentConfig
{
Name = "Test",
InitiatedBy = "test-user",
FaultType = ChaosFaultType.Latency
});
// Record various outcomes
await _runner.RecordOutcomeAsync(experiment.Id, new ChaosOutcome
{
Type = ChaosOutcomeType.LatencyInjected,
ChannelType = "email",
Duration = TimeSpan.FromMilliseconds(100)
});
await _runner.RecordOutcomeAsync(experiment.Id, new ChaosOutcome
{
Type = ChaosOutcomeType.LatencyInjected,
ChannelType = "email",
Duration = TimeSpan.FromMilliseconds(200)
});
await _runner.RecordOutcomeAsync(experiment.Id, new ChaosOutcome
{
Type = ChaosOutcomeType.FaultInjected,
ChannelType = "slack",
FallbackTriggered = true
});
// Act
var results = await _runner.GetResultsAsync(experiment.Id);
// Assert
Assert.Equal(3, results.TotalAffected);
Assert.Equal(1, results.FailedOperations);
Assert.Equal(1, results.FallbackTriggered);
Assert.NotNull(results.AverageInjectedLatency);
Assert.Equal(150, results.AverageInjectedLatency.Value.TotalMilliseconds);
Assert.Equal(2, results.ByChannelType["email"].TotalAffected);
Assert.Equal(1, results.ByChannelType["slack"].TotalAffected);
}
[Fact]
public async Task ListExperimentsAsync_FiltersByStatus()
{
// Arrange
var running = await _runner.StartExperimentAsync(new ChaosExperimentConfig
{
Name = "Running",
InitiatedBy = "test-user",
FaultType = ChaosFaultType.Outage
});
var toStop = await _runner.StartExperimentAsync(new ChaosExperimentConfig
{
Name = "To Stop",
InitiatedBy = "test-user",
FaultType = ChaosFaultType.Outage
});
await _runner.StopExperimentAsync(toStop.Id);
// Act
var runningList = await _runner.ListExperimentsAsync(ChaosExperimentStatus.Running);
var stoppedList = await _runner.ListExperimentsAsync(ChaosExperimentStatus.Stopped);
// Assert
Assert.Single(runningList);
Assert.Single(stoppedList);
Assert.Equal(running.Id, runningList[0].Id);
Assert.Equal(toStop.Id, stoppedList[0].Id);
}
[Fact]
public async Task CleanupAsync_RemovesOldExperiments()
{
// Arrange
var experiment = await _runner.StartExperimentAsync(new ChaosExperimentConfig
{
Name = "Old Experiment",
InitiatedBy = "test-user",
FaultType = ChaosFaultType.Outage,
Duration = TimeSpan.FromMinutes(5)
});
// Complete the experiment
_timeProvider.Advance(TimeSpan.FromMinutes(10));
await _runner.GetExperimentAsync(experiment.Id); // Triggers status update
// Advance time beyond cleanup threshold
_timeProvider.Advance(TimeSpan.FromDays(10));
// Act
var removed = await _runner.CleanupAsync(TimeSpan.FromDays(7));
// Assert
Assert.Equal(1, removed);
var result = await _runner.GetExperimentAsync(experiment.Id);
Assert.Null(result);
}
[Fact]
public async Task ErrorResponseFault_ReturnsConfiguredStatusCode()
{
// Arrange
await _runner.StartExperimentAsync(new ChaosExperimentConfig
{
Name = "Error Response",
InitiatedBy = "test-user",
TenantId = "tenant1",
TargetChannelTypes = ["email"],
FaultType = ChaosFaultType.ErrorResponse,
FaultConfig = new ChaosFaultConfig
{
ErrorStatusCode = 503,
ErrorMessage = "Service Unavailable"
}
});
// Act
var decision = await _runner.ShouldFailAsync("tenant1", "email");
// Assert
Assert.True(decision.ShouldFail);
Assert.Equal(503, decision.InjectedStatusCode);
Assert.Contains("Service Unavailable", decision.InjectedError);
}
}