up
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
api-governance / spectral-lint (push) Has been cancelled
oas-ci / oas-validate (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Policy Simulation / policy-simulate (push) Has been cancelled
SDK Publish & Sign / sdk-publish (push) Has been cancelled
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
api-governance / spectral-lint (push) Has been cancelled
oas-ci / oas-validate (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Policy Simulation / policy-simulate (push) Has been cancelled
SDK Publish & Sign / sdk-publish (push) Has been cancelled
This commit is contained in:
@@ -0,0 +1,492 @@
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using Microsoft.Extensions.Time.Testing;
|
||||
using StellaOps.Notifier.Worker.Observability;
|
||||
|
||||
namespace StellaOps.Notifier.Tests.Observability;
|
||||
|
||||
public class ChaosTestRunnerTests
|
||||
{
|
||||
private readonly FakeTimeProvider _timeProvider;
|
||||
private readonly ChaosTestOptions _options;
|
||||
private readonly InMemoryChaosTestRunner _runner;
|
||||
|
||||
public ChaosTestRunnerTests()
|
||||
{
|
||||
_timeProvider = new FakeTimeProvider(DateTimeOffset.UtcNow);
|
||||
_options = new ChaosTestOptions
|
||||
{
|
||||
Enabled = true,
|
||||
MaxConcurrentExperiments = 5,
|
||||
MaxExperimentDuration = TimeSpan.FromHours(1),
|
||||
RequireTenantTarget = false
|
||||
};
|
||||
_runner = new InMemoryChaosTestRunner(
|
||||
Options.Create(_options),
|
||||
_timeProvider,
|
||||
NullLogger<InMemoryChaosTestRunner>.Instance);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task StartExperimentAsync_CreatesExperiment()
|
||||
{
|
||||
// Arrange
|
||||
var config = new ChaosExperimentConfig
|
||||
{
|
||||
Name = "Test Outage",
|
||||
InitiatedBy = "test-user",
|
||||
TargetChannelTypes = ["email"],
|
||||
FaultType = ChaosFaultType.Outage,
|
||||
Duration = TimeSpan.FromMinutes(5)
|
||||
};
|
||||
|
||||
// Act
|
||||
var experiment = await _runner.StartExperimentAsync(config);
|
||||
|
||||
// Assert
|
||||
Assert.NotNull(experiment);
|
||||
Assert.Equal(ChaosExperimentStatus.Running, experiment.Status);
|
||||
Assert.Equal("Test Outage", experiment.Config.Name);
|
||||
Assert.NotNull(experiment.StartedAt);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task StartExperimentAsync_WhenDisabled_Throws()
|
||||
{
|
||||
// Arrange
|
||||
var disabledOptions = new ChaosTestOptions { Enabled = false };
|
||||
var runner = new InMemoryChaosTestRunner(
|
||||
Options.Create(disabledOptions),
|
||||
_timeProvider,
|
||||
NullLogger<InMemoryChaosTestRunner>.Instance);
|
||||
|
||||
var config = new ChaosExperimentConfig
|
||||
{
|
||||
Name = "Test",
|
||||
InitiatedBy = "test-user",
|
||||
FaultType = ChaosFaultType.Outage
|
||||
};
|
||||
|
||||
// Act & Assert
|
||||
await Assert.ThrowsAsync<InvalidOperationException>(() => runner.StartExperimentAsync(config));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task StartExperimentAsync_ExceedsMaxDuration_Throws()
|
||||
{
|
||||
// Arrange
|
||||
var config = new ChaosExperimentConfig
|
||||
{
|
||||
Name = "Long Experiment",
|
||||
InitiatedBy = "test-user",
|
||||
FaultType = ChaosFaultType.Outage,
|
||||
Duration = TimeSpan.FromHours(2) // Exceeds max of 1 hour
|
||||
};
|
||||
|
||||
// Act & Assert
|
||||
await Assert.ThrowsAsync<InvalidOperationException>(() => _runner.StartExperimentAsync(config));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task StartExperimentAsync_MaxConcurrentReached_Throws()
|
||||
{
|
||||
// Arrange - start max number of experiments
|
||||
for (var i = 0; i < 5; i++)
|
||||
{
|
||||
await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
||||
{
|
||||
Name = $"Experiment {i}",
|
||||
InitiatedBy = "test-user",
|
||||
FaultType = ChaosFaultType.Outage
|
||||
});
|
||||
}
|
||||
|
||||
// Act & Assert
|
||||
await Assert.ThrowsAsync<InvalidOperationException>(() =>
|
||||
_runner.StartExperimentAsync(new ChaosExperimentConfig
|
||||
{
|
||||
Name = "One too many",
|
||||
InitiatedBy = "test-user",
|
||||
FaultType = ChaosFaultType.Outage
|
||||
}));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task StopExperimentAsync_StopsExperiment()
|
||||
{
|
||||
// Arrange
|
||||
var experiment = await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
||||
{
|
||||
Name = "Test",
|
||||
InitiatedBy = "test-user",
|
||||
FaultType = ChaosFaultType.Outage
|
||||
});
|
||||
|
||||
// Act
|
||||
await _runner.StopExperimentAsync(experiment.Id);
|
||||
|
||||
// Assert
|
||||
var stopped = await _runner.GetExperimentAsync(experiment.Id);
|
||||
Assert.NotNull(stopped);
|
||||
Assert.Equal(ChaosExperimentStatus.Stopped, stopped.Status);
|
||||
Assert.NotNull(stopped.EndedAt);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ShouldFailAsync_OutageFault_ReturnsFault()
|
||||
{
|
||||
// Arrange
|
||||
await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
||||
{
|
||||
Name = "Email Outage",
|
||||
InitiatedBy = "test-user",
|
||||
TenantId = "tenant1",
|
||||
TargetChannelTypes = ["email"],
|
||||
FaultType = ChaosFaultType.Outage
|
||||
});
|
||||
|
||||
// Act
|
||||
var decision = await _runner.ShouldFailAsync("tenant1", "email");
|
||||
|
||||
// Assert
|
||||
Assert.True(decision.ShouldFail);
|
||||
Assert.Equal(ChaosFaultType.Outage, decision.FaultType);
|
||||
Assert.NotNull(decision.InjectedError);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ShouldFailAsync_NoMatchingExperiment_ReturnsNoFault()
|
||||
{
|
||||
// Arrange
|
||||
await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
||||
{
|
||||
Name = "Email Outage",
|
||||
InitiatedBy = "test-user",
|
||||
TenantId = "tenant1",
|
||||
TargetChannelTypes = ["email"],
|
||||
FaultType = ChaosFaultType.Outage
|
||||
});
|
||||
|
||||
// Act - different tenant
|
||||
var decision = await _runner.ShouldFailAsync("tenant2", "email");
|
||||
|
||||
// Assert
|
||||
Assert.False(decision.ShouldFail);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ShouldFailAsync_WrongChannelType_ReturnsNoFault()
|
||||
{
|
||||
// Arrange
|
||||
await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
||||
{
|
||||
Name = "Email Outage",
|
||||
InitiatedBy = "test-user",
|
||||
TenantId = "tenant1",
|
||||
TargetChannelTypes = ["email"],
|
||||
FaultType = ChaosFaultType.Outage
|
||||
});
|
||||
|
||||
// Act - different channel type
|
||||
var decision = await _runner.ShouldFailAsync("tenant1", "slack");
|
||||
|
||||
// Assert
|
||||
Assert.False(decision.ShouldFail);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ShouldFailAsync_LatencyFault_InjectsLatency()
|
||||
{
|
||||
// Arrange
|
||||
await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
||||
{
|
||||
Name = "Latency Test",
|
||||
InitiatedBy = "test-user",
|
||||
TenantId = "tenant1",
|
||||
TargetChannelTypes = ["email"],
|
||||
FaultType = ChaosFaultType.Latency,
|
||||
FaultConfig = new ChaosFaultConfig
|
||||
{
|
||||
MinLatency = TimeSpan.FromSeconds(1),
|
||||
MaxLatency = TimeSpan.FromSeconds(5)
|
||||
}
|
||||
});
|
||||
|
||||
// Act
|
||||
var decision = await _runner.ShouldFailAsync("tenant1", "email");
|
||||
|
||||
// Assert
|
||||
Assert.False(decision.ShouldFail); // Latency doesn't cause failure
|
||||
Assert.NotNull(decision.InjectedLatency);
|
||||
Assert.InRange(decision.InjectedLatency.Value.TotalSeconds, 1, 5);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ShouldFailAsync_PartialFailure_UsesFailureRate()
|
||||
{
|
||||
// Arrange
|
||||
await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
||||
{
|
||||
Name = "Partial Failure",
|
||||
InitiatedBy = "test-user",
|
||||
TenantId = "tenant1",
|
||||
TargetChannelTypes = ["email"],
|
||||
FaultType = ChaosFaultType.PartialFailure,
|
||||
FaultConfig = new ChaosFaultConfig
|
||||
{
|
||||
FailureRate = 0.5,
|
||||
Seed = 42 // Fixed seed for reproducibility
|
||||
}
|
||||
});
|
||||
|
||||
// Act - run multiple times
|
||||
var failures = 0;
|
||||
for (var i = 0; i < 100; i++)
|
||||
{
|
||||
var decision = await _runner.ShouldFailAsync("tenant1", "email");
|
||||
if (decision.ShouldFail) failures++;
|
||||
}
|
||||
|
||||
// Assert - should be roughly 50% failures (with some variance)
|
||||
Assert.InRange(failures, 30, 70);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ShouldFailAsync_RateLimit_EnforcesLimit()
|
||||
{
|
||||
// Arrange
|
||||
await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
||||
{
|
||||
Name = "Rate Limit",
|
||||
InitiatedBy = "test-user",
|
||||
TenantId = "tenant1",
|
||||
TargetChannelTypes = ["email"],
|
||||
FaultType = ChaosFaultType.RateLimit,
|
||||
FaultConfig = new ChaosFaultConfig
|
||||
{
|
||||
RateLimitPerMinute = 5
|
||||
}
|
||||
});
|
||||
|
||||
// Act - first 5 should pass
|
||||
for (var i = 0; i < 5; i++)
|
||||
{
|
||||
var decision = await _runner.ShouldFailAsync("tenant1", "email");
|
||||
Assert.False(decision.ShouldFail);
|
||||
}
|
||||
|
||||
// 6th should fail
|
||||
var failedDecision = await _runner.ShouldFailAsync("tenant1", "email");
|
||||
|
||||
// Assert
|
||||
Assert.True(failedDecision.ShouldFail);
|
||||
Assert.Equal(429, failedDecision.InjectedStatusCode);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ShouldFailAsync_ExperimentExpires_StopsMatching()
|
||||
{
|
||||
// Arrange
|
||||
await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
||||
{
|
||||
Name = "Short Experiment",
|
||||
InitiatedBy = "test-user",
|
||||
TenantId = "tenant1",
|
||||
TargetChannelTypes = ["email"],
|
||||
FaultType = ChaosFaultType.Outage,
|
||||
Duration = TimeSpan.FromMinutes(5)
|
||||
});
|
||||
|
||||
// Act - advance time past duration
|
||||
_timeProvider.Advance(TimeSpan.FromMinutes(10));
|
||||
var decision = await _runner.ShouldFailAsync("tenant1", "email");
|
||||
|
||||
// Assert
|
||||
Assert.False(decision.ShouldFail);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ShouldFailAsync_MaxOperationsReached_StopsMatching()
|
||||
{
|
||||
// Arrange
|
||||
await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
||||
{
|
||||
Name = "Limited Experiment",
|
||||
InitiatedBy = "test-user",
|
||||
TenantId = "tenant1",
|
||||
TargetChannelTypes = ["email"],
|
||||
FaultType = ChaosFaultType.Outage,
|
||||
MaxAffectedOperations = 3
|
||||
});
|
||||
|
||||
// Act - consume all operations
|
||||
for (var i = 0; i < 3; i++)
|
||||
{
|
||||
var d = await _runner.ShouldFailAsync("tenant1", "email");
|
||||
Assert.True(d.ShouldFail);
|
||||
}
|
||||
|
||||
// 4th should not match
|
||||
var decision = await _runner.ShouldFailAsync("tenant1", "email");
|
||||
|
||||
// Assert
|
||||
Assert.False(decision.ShouldFail);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task RecordOutcomeAsync_RecordsOutcome()
|
||||
{
|
||||
// Arrange
|
||||
var experiment = await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
||||
{
|
||||
Name = "Test",
|
||||
InitiatedBy = "test-user",
|
||||
FaultType = ChaosFaultType.Outage
|
||||
});
|
||||
|
||||
// Act
|
||||
await _runner.RecordOutcomeAsync(experiment.Id, new ChaosOutcome
|
||||
{
|
||||
Type = ChaosOutcomeType.FaultInjected,
|
||||
ChannelType = "email",
|
||||
TenantId = "tenant1",
|
||||
FallbackTriggered = true
|
||||
});
|
||||
|
||||
var results = await _runner.GetResultsAsync(experiment.Id);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(1, results.TotalAffected);
|
||||
Assert.Equal(1, results.FailedOperations);
|
||||
Assert.Equal(1, results.FallbackTriggered);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GetResultsAsync_CalculatesStatistics()
|
||||
{
|
||||
// Arrange
|
||||
var experiment = await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
||||
{
|
||||
Name = "Test",
|
||||
InitiatedBy = "test-user",
|
||||
FaultType = ChaosFaultType.Latency
|
||||
});
|
||||
|
||||
// Record various outcomes
|
||||
await _runner.RecordOutcomeAsync(experiment.Id, new ChaosOutcome
|
||||
{
|
||||
Type = ChaosOutcomeType.LatencyInjected,
|
||||
ChannelType = "email",
|
||||
Duration = TimeSpan.FromMilliseconds(100)
|
||||
});
|
||||
await _runner.RecordOutcomeAsync(experiment.Id, new ChaosOutcome
|
||||
{
|
||||
Type = ChaosOutcomeType.LatencyInjected,
|
||||
ChannelType = "email",
|
||||
Duration = TimeSpan.FromMilliseconds(200)
|
||||
});
|
||||
await _runner.RecordOutcomeAsync(experiment.Id, new ChaosOutcome
|
||||
{
|
||||
Type = ChaosOutcomeType.FaultInjected,
|
||||
ChannelType = "slack",
|
||||
FallbackTriggered = true
|
||||
});
|
||||
|
||||
// Act
|
||||
var results = await _runner.GetResultsAsync(experiment.Id);
|
||||
|
||||
// Assert
|
||||
Assert.Equal(3, results.TotalAffected);
|
||||
Assert.Equal(1, results.FailedOperations);
|
||||
Assert.Equal(1, results.FallbackTriggered);
|
||||
Assert.NotNull(results.AverageInjectedLatency);
|
||||
Assert.Equal(150, results.AverageInjectedLatency.Value.TotalMilliseconds);
|
||||
Assert.Equal(2, results.ByChannelType["email"].TotalAffected);
|
||||
Assert.Equal(1, results.ByChannelType["slack"].TotalAffected);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ListExperimentsAsync_FiltersByStatus()
|
||||
{
|
||||
// Arrange
|
||||
var running = await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
||||
{
|
||||
Name = "Running",
|
||||
InitiatedBy = "test-user",
|
||||
FaultType = ChaosFaultType.Outage
|
||||
});
|
||||
|
||||
var toStop = await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
||||
{
|
||||
Name = "To Stop",
|
||||
InitiatedBy = "test-user",
|
||||
FaultType = ChaosFaultType.Outage
|
||||
});
|
||||
await _runner.StopExperimentAsync(toStop.Id);
|
||||
|
||||
// Act
|
||||
var runningList = await _runner.ListExperimentsAsync(ChaosExperimentStatus.Running);
|
||||
var stoppedList = await _runner.ListExperimentsAsync(ChaosExperimentStatus.Stopped);
|
||||
|
||||
// Assert
|
||||
Assert.Single(runningList);
|
||||
Assert.Single(stoppedList);
|
||||
Assert.Equal(running.Id, runningList[0].Id);
|
||||
Assert.Equal(toStop.Id, stoppedList[0].Id);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task CleanupAsync_RemovesOldExperiments()
|
||||
{
|
||||
// Arrange
|
||||
var experiment = await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
||||
{
|
||||
Name = "Old Experiment",
|
||||
InitiatedBy = "test-user",
|
||||
FaultType = ChaosFaultType.Outage,
|
||||
Duration = TimeSpan.FromMinutes(5)
|
||||
});
|
||||
|
||||
// Complete the experiment
|
||||
_timeProvider.Advance(TimeSpan.FromMinutes(10));
|
||||
await _runner.GetExperimentAsync(experiment.Id); // Triggers status update
|
||||
|
||||
// Advance time beyond cleanup threshold
|
||||
_timeProvider.Advance(TimeSpan.FromDays(10));
|
||||
|
||||
// Act
|
||||
var removed = await _runner.CleanupAsync(TimeSpan.FromDays(7));
|
||||
|
||||
// Assert
|
||||
Assert.Equal(1, removed);
|
||||
var result = await _runner.GetExperimentAsync(experiment.Id);
|
||||
Assert.Null(result);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ErrorResponseFault_ReturnsConfiguredStatusCode()
|
||||
{
|
||||
// Arrange
|
||||
await _runner.StartExperimentAsync(new ChaosExperimentConfig
|
||||
{
|
||||
Name = "Error Response",
|
||||
InitiatedBy = "test-user",
|
||||
TenantId = "tenant1",
|
||||
TargetChannelTypes = ["email"],
|
||||
FaultType = ChaosFaultType.ErrorResponse,
|
||||
FaultConfig = new ChaosFaultConfig
|
||||
{
|
||||
ErrorStatusCode = 503,
|
||||
ErrorMessage = "Service Unavailable"
|
||||
}
|
||||
});
|
||||
|
||||
// Act
|
||||
var decision = await _runner.ShouldFailAsync("tenant1", "email");
|
||||
|
||||
// Assert
|
||||
Assert.True(decision.ShouldFail);
|
||||
Assert.Equal(503, decision.InjectedStatusCode);
|
||||
Assert.Contains("Service Unavailable", decision.InjectedError);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user