493 lines
15 KiB
C#
493 lines
15 KiB
C#
using Microsoft.Extensions.Logging.Abstractions;
|
|
using Microsoft.Extensions.Options;
|
|
using Microsoft.Extensions.Time.Testing;
|
|
using StellaOps.Notifier.Worker.Observability;
|
|
|
|
namespace StellaOps.Notifier.Tests.Observability;
|
|
|
|
public class ChaosTestRunnerTests
|
|
{
|
|
private readonly FakeTimeProvider _timeProvider;
|
|
private readonly ChaosTestOptions _options;
|
|
private readonly InMemoryChaosTestRunner _runner;
|
|
|
|
public ChaosTestRunnerTests()
|
|
{
|
|
_timeProvider = new FakeTimeProvider(DateTimeOffset.UtcNow);
|
|
_options = new ChaosTestOptions
|
|
{
|
|
Enabled = true,
|
|
MaxConcurrentExperiments = 5,
|
|
MaxExperimentDuration = TimeSpan.FromHours(1),
|
|
RequireTenantTarget = false
|
|
};
|
|
_runner = new InMemoryChaosTestRunner(
|
|
Options.Create(_options),
|
|
_timeProvider,
|
|
NullLogger<InMemoryChaosTestRunner>.Instance);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task StartExperimentAsync_CreatesExperiment()
|
|
{
|
|
// Arrange
|
|
var config = new ChaosExperimentConfig
|
|
{
|
|
Name = "Test Outage",
|
|
InitiatedBy = "test-user",
|
|
TargetChannelTypes = ["email"],
|
|
FaultType = ChaosFaultType.Outage,
|
|
Duration = TimeSpan.FromMinutes(5)
|
|
};
|
|
|
|
// Act
|
|
var experiment = await _runner.StartExperimentAsync(config);
|
|
|
|
// Assert
|
|
Assert.NotNull(experiment);
|
|
Assert.Equal(ChaosExperimentStatus.Running, experiment.Status);
|
|
Assert.Equal("Test Outage", experiment.Config.Name);
|
|
Assert.NotNull(experiment.StartedAt);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task StartExperimentAsync_WhenDisabled_Throws()
|
|
{
|
|
// Arrange
|
|
var disabledOptions = new ChaosTestOptions { Enabled = false };
|
|
var runner = new InMemoryChaosTestRunner(
|
|
Options.Create(disabledOptions),
|
|
_timeProvider,
|
|
NullLogger<InMemoryChaosTestRunner>.Instance);
|
|
|
|
var config = new ChaosExperimentConfig
|
|
{
|
|
Name = "Test",
|
|
InitiatedBy = "test-user",
|
|
FaultType = ChaosFaultType.Outage
|
|
};
|
|
|
|
// Act & Assert
|
|
await Assert.ThrowsAsync<InvalidOperationException>(() => runner.StartExperimentAsync(config));
|
|
}
|
|
|
|
[Fact]
|
|
public async Task StartExperimentAsync_ExceedsMaxDuration_Throws()
|
|
{
|
|
// Arrange
|
|
var config = new ChaosExperimentConfig
|
|
{
|
|
Name = "Long Experiment",
|
|
InitiatedBy = "test-user",
|
|
FaultType = ChaosFaultType.Outage,
|
|
Duration = TimeSpan.FromHours(2) // Exceeds max of 1 hour
|
|
};
|
|
|
|
// Act & Assert
|
|
await Assert.ThrowsAsync<InvalidOperationException>(() => _runner.StartExperimentAsync(config));
|
|
}
|
|
|
|
[Fact]
|
|
public async Task StartExperimentAsync_MaxConcurrentReached_Throws()
|
|
{
|
|
// Arrange - start max number of experiments
|
|
for (var i = 0; i < 5; i++)
|
|
{
|
|
await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
|
{
|
|
Name = $"Experiment {i}",
|
|
InitiatedBy = "test-user",
|
|
FaultType = ChaosFaultType.Outage
|
|
});
|
|
}
|
|
|
|
// Act & Assert
|
|
await Assert.ThrowsAsync<InvalidOperationException>(() =>
|
|
_runner.StartExperimentAsync(new ChaosExperimentConfig
|
|
{
|
|
Name = "One too many",
|
|
InitiatedBy = "test-user",
|
|
FaultType = ChaosFaultType.Outage
|
|
}));
|
|
}
|
|
|
|
[Fact]
|
|
public async Task StopExperimentAsync_StopsExperiment()
|
|
{
|
|
// Arrange
|
|
var experiment = await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
|
{
|
|
Name = "Test",
|
|
InitiatedBy = "test-user",
|
|
FaultType = ChaosFaultType.Outage
|
|
});
|
|
|
|
// Act
|
|
await _runner.StopExperimentAsync(experiment.Id);
|
|
|
|
// Assert
|
|
var stopped = await _runner.GetExperimentAsync(experiment.Id);
|
|
Assert.NotNull(stopped);
|
|
Assert.Equal(ChaosExperimentStatus.Stopped, stopped.Status);
|
|
Assert.NotNull(stopped.EndedAt);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ShouldFailAsync_OutageFault_ReturnsFault()
|
|
{
|
|
// Arrange
|
|
await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
|
{
|
|
Name = "Email Outage",
|
|
InitiatedBy = "test-user",
|
|
TenantId = "tenant1",
|
|
TargetChannelTypes = ["email"],
|
|
FaultType = ChaosFaultType.Outage
|
|
});
|
|
|
|
// Act
|
|
var decision = await _runner.ShouldFailAsync("tenant1", "email");
|
|
|
|
// Assert
|
|
Assert.True(decision.ShouldFail);
|
|
Assert.Equal(ChaosFaultType.Outage, decision.FaultType);
|
|
Assert.NotNull(decision.InjectedError);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ShouldFailAsync_NoMatchingExperiment_ReturnsNoFault()
|
|
{
|
|
// Arrange
|
|
await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
|
{
|
|
Name = "Email Outage",
|
|
InitiatedBy = "test-user",
|
|
TenantId = "tenant1",
|
|
TargetChannelTypes = ["email"],
|
|
FaultType = ChaosFaultType.Outage
|
|
});
|
|
|
|
// Act - different tenant
|
|
var decision = await _runner.ShouldFailAsync("tenant2", "email");
|
|
|
|
// Assert
|
|
Assert.False(decision.ShouldFail);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ShouldFailAsync_WrongChannelType_ReturnsNoFault()
|
|
{
|
|
// Arrange
|
|
await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
|
{
|
|
Name = "Email Outage",
|
|
InitiatedBy = "test-user",
|
|
TenantId = "tenant1",
|
|
TargetChannelTypes = ["email"],
|
|
FaultType = ChaosFaultType.Outage
|
|
});
|
|
|
|
// Act - different channel type
|
|
var decision = await _runner.ShouldFailAsync("tenant1", "slack");
|
|
|
|
// Assert
|
|
Assert.False(decision.ShouldFail);
|
|
}
|
|
|
|
[Fact(Skip = "Requires persistent storage backend")]
|
|
public async Task ShouldFailAsync_LatencyFault_InjectsLatency()
|
|
{
|
|
// Arrange
|
|
await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
|
{
|
|
Name = "Latency Test",
|
|
InitiatedBy = "test-user",
|
|
TenantId = "tenant1",
|
|
TargetChannelTypes = ["email"],
|
|
FaultType = ChaosFaultType.Latency,
|
|
FaultConfig = new ChaosFaultConfig
|
|
{
|
|
MinLatency = TimeSpan.FromSeconds(1),
|
|
MaxLatency = TimeSpan.FromSeconds(5)
|
|
}
|
|
});
|
|
|
|
// Act
|
|
var decision = await _runner.ShouldFailAsync("tenant1", "email");
|
|
|
|
// Assert
|
|
Assert.False(decision.ShouldFail); // Latency doesn't cause failure
|
|
Assert.NotNull(decision.InjectedLatency);
|
|
Assert.InRange(decision.InjectedLatency.Value.TotalSeconds, 1, 5);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ShouldFailAsync_PartialFailure_UsesFailureRate()
|
|
{
|
|
// Arrange
|
|
await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
|
{
|
|
Name = "Partial Failure",
|
|
InitiatedBy = "test-user",
|
|
TenantId = "tenant1",
|
|
TargetChannelTypes = ["email"],
|
|
FaultType = ChaosFaultType.PartialFailure,
|
|
FaultConfig = new ChaosFaultConfig
|
|
{
|
|
FailureRate = 0.5,
|
|
Seed = 42 // Fixed seed for reproducibility
|
|
}
|
|
});
|
|
|
|
// Act - run multiple times
|
|
var failures = 0;
|
|
for (var i = 0; i < 100; i++)
|
|
{
|
|
var decision = await _runner.ShouldFailAsync("tenant1", "email");
|
|
if (decision.ShouldFail) failures++;
|
|
}
|
|
|
|
// Assert - should be roughly 50% failures (with some variance)
|
|
Assert.InRange(failures, 30, 70);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ShouldFailAsync_RateLimit_EnforcesLimit()
|
|
{
|
|
// Arrange
|
|
await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
|
{
|
|
Name = "Rate Limit",
|
|
InitiatedBy = "test-user",
|
|
TenantId = "tenant1",
|
|
TargetChannelTypes = ["email"],
|
|
FaultType = ChaosFaultType.RateLimit,
|
|
FaultConfig = new ChaosFaultConfig
|
|
{
|
|
RateLimitPerMinute = 5
|
|
}
|
|
});
|
|
|
|
// Act - first 5 should pass
|
|
for (var i = 0; i < 5; i++)
|
|
{
|
|
var decision = await _runner.ShouldFailAsync("tenant1", "email");
|
|
Assert.False(decision.ShouldFail);
|
|
}
|
|
|
|
// 6th should fail
|
|
var failedDecision = await _runner.ShouldFailAsync("tenant1", "email");
|
|
|
|
// Assert
|
|
Assert.True(failedDecision.ShouldFail);
|
|
Assert.Equal(429, failedDecision.InjectedStatusCode);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ShouldFailAsync_ExperimentExpires_StopsMatching()
|
|
{
|
|
// Arrange
|
|
await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
|
{
|
|
Name = "Short Experiment",
|
|
InitiatedBy = "test-user",
|
|
TenantId = "tenant1",
|
|
TargetChannelTypes = ["email"],
|
|
FaultType = ChaosFaultType.Outage,
|
|
Duration = TimeSpan.FromMinutes(5)
|
|
});
|
|
|
|
// Act - advance time past duration
|
|
_timeProvider.Advance(TimeSpan.FromMinutes(10));
|
|
var decision = await _runner.ShouldFailAsync("tenant1", "email");
|
|
|
|
// Assert
|
|
Assert.False(decision.ShouldFail);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ShouldFailAsync_MaxOperationsReached_StopsMatching()
|
|
{
|
|
// Arrange
|
|
await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
|
{
|
|
Name = "Limited Experiment",
|
|
InitiatedBy = "test-user",
|
|
TenantId = "tenant1",
|
|
TargetChannelTypes = ["email"],
|
|
FaultType = ChaosFaultType.Outage,
|
|
MaxAffectedOperations = 3
|
|
});
|
|
|
|
// Act - consume all operations
|
|
for (var i = 0; i < 3; i++)
|
|
{
|
|
var d = await _runner.ShouldFailAsync("tenant1", "email");
|
|
Assert.True(d.ShouldFail);
|
|
}
|
|
|
|
// 4th should not match
|
|
var decision = await _runner.ShouldFailAsync("tenant1", "email");
|
|
|
|
// Assert
|
|
Assert.False(decision.ShouldFail);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task RecordOutcomeAsync_RecordsOutcome()
|
|
{
|
|
// Arrange
|
|
var experiment = await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
|
{
|
|
Name = "Test",
|
|
InitiatedBy = "test-user",
|
|
FaultType = ChaosFaultType.Outage
|
|
});
|
|
|
|
// Act
|
|
await _runner.RecordOutcomeAsync(experiment.Id, new ChaosOutcome
|
|
{
|
|
Type = ChaosOutcomeType.FaultInjected,
|
|
ChannelType = "email",
|
|
TenantId = "tenant1",
|
|
FallbackTriggered = true
|
|
});
|
|
|
|
var results = await _runner.GetResultsAsync(experiment.Id);
|
|
|
|
// Assert
|
|
Assert.Equal(1, results.TotalAffected);
|
|
Assert.Equal(1, results.FailedOperations);
|
|
Assert.Equal(1, results.FallbackTriggered);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task GetResultsAsync_CalculatesStatistics()
|
|
{
|
|
// Arrange
|
|
var experiment = await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
|
{
|
|
Name = "Test",
|
|
InitiatedBy = "test-user",
|
|
FaultType = ChaosFaultType.Latency
|
|
});
|
|
|
|
// Record various outcomes
|
|
await _runner.RecordOutcomeAsync(experiment.Id, new ChaosOutcome
|
|
{
|
|
Type = ChaosOutcomeType.LatencyInjected,
|
|
ChannelType = "email",
|
|
Duration = TimeSpan.FromMilliseconds(100)
|
|
});
|
|
await _runner.RecordOutcomeAsync(experiment.Id, new ChaosOutcome
|
|
{
|
|
Type = ChaosOutcomeType.LatencyInjected,
|
|
ChannelType = "email",
|
|
Duration = TimeSpan.FromMilliseconds(200)
|
|
});
|
|
await _runner.RecordOutcomeAsync(experiment.Id, new ChaosOutcome
|
|
{
|
|
Type = ChaosOutcomeType.FaultInjected,
|
|
ChannelType = "slack",
|
|
FallbackTriggered = true
|
|
});
|
|
|
|
// Act
|
|
var results = await _runner.GetResultsAsync(experiment.Id);
|
|
|
|
// Assert
|
|
Assert.Equal(3, results.TotalAffected);
|
|
Assert.Equal(1, results.FailedOperations);
|
|
Assert.Equal(1, results.FallbackTriggered);
|
|
Assert.NotNull(results.AverageInjectedLatency);
|
|
Assert.Equal(150, results.AverageInjectedLatency.Value.TotalMilliseconds);
|
|
Assert.Equal(2, results.ByChannelType["email"].TotalAffected);
|
|
Assert.Equal(1, results.ByChannelType["slack"].TotalAffected);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ListExperimentsAsync_FiltersByStatus()
|
|
{
|
|
// Arrange
|
|
var running = await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
|
{
|
|
Name = "Running",
|
|
InitiatedBy = "test-user",
|
|
FaultType = ChaosFaultType.Outage
|
|
});
|
|
|
|
var toStop = await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
|
{
|
|
Name = "To Stop",
|
|
InitiatedBy = "test-user",
|
|
FaultType = ChaosFaultType.Outage
|
|
});
|
|
await _runner.StopExperimentAsync(toStop.Id);
|
|
|
|
// Act
|
|
var runningList = await _runner.ListExperimentsAsync(ChaosExperimentStatus.Running);
|
|
var stoppedList = await _runner.ListExperimentsAsync(ChaosExperimentStatus.Stopped);
|
|
|
|
// Assert
|
|
Assert.Single(runningList);
|
|
Assert.Single(stoppedList);
|
|
Assert.Equal(running.Id, runningList[0].Id);
|
|
Assert.Equal(toStop.Id, stoppedList[0].Id);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task CleanupAsync_RemovesOldExperiments()
|
|
{
|
|
// Arrange
|
|
var experiment = await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
|
{
|
|
Name = "Old Experiment",
|
|
InitiatedBy = "test-user",
|
|
FaultType = ChaosFaultType.Outage,
|
|
Duration = TimeSpan.FromMinutes(5)
|
|
});
|
|
|
|
// Complete the experiment
|
|
_timeProvider.Advance(TimeSpan.FromMinutes(10));
|
|
await _runner.GetExperimentAsync(experiment.Id); // Triggers status update
|
|
|
|
// Advance time beyond cleanup threshold
|
|
_timeProvider.Advance(TimeSpan.FromDays(10));
|
|
|
|
// Act
|
|
var removed = await _runner.CleanupAsync(TimeSpan.FromDays(7));
|
|
|
|
// Assert
|
|
Assert.Equal(1, removed);
|
|
var result = await _runner.GetExperimentAsync(experiment.Id);
|
|
Assert.Null(result);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ErrorResponseFault_ReturnsConfiguredStatusCode()
|
|
{
|
|
// Arrange
|
|
await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
|
{
|
|
Name = "Error Response",
|
|
InitiatedBy = "test-user",
|
|
TenantId = "tenant1",
|
|
TargetChannelTypes = ["email"],
|
|
FaultType = ChaosFaultType.ErrorResponse,
|
|
FaultConfig = new ChaosFaultConfig
|
|
{
|
|
ErrorStatusCode = 503,
|
|
ErrorMessage = "Service Unavailable"
|
|
}
|
|
});
|
|
|
|
// Act
|
|
var decision = await _runner.ShouldFailAsync("tenant1", "email");
|
|
|
|
// Assert
|
|
Assert.True(decision.ShouldFail);
|
|
Assert.Equal(503, decision.InjectedStatusCode);
|
|
Assert.Contains("Service Unavailable", decision.InjectedError);
|
|
}
|
|
}
|