Files
git.stella-ops.org/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Tests/PackRunIncidentModeTests.cs
StellaOps Bot 9bd6a73926
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Implement incident mode management service and models
- Added IPackRunIncidentModeService interface for managing incident mode activation, deactivation, and status retrieval.
- Created PackRunIncidentModeService class implementing the service interface with methods for activating, deactivating, and escalating incident modes.
- Introduced incident mode status model (PackRunIncidentModeStatus) and related enums for escalation levels and activation sources.
- Developed retention policy, telemetry settings, and debug capture settings models to manage incident mode configurations.
- Implemented SLO breach notification handling to activate incident mode based on severity.
- Added in-memory store (InMemoryPackRunIncidentModeStore) for testing purposes.
- Created comprehensive unit tests for incident mode service, covering activation, deactivation, status retrieval, and SLO breach handling.
2025-12-06 22:33:00 +02:00

397 lines
14 KiB
C#

using Microsoft.Extensions.Logging.Abstractions;
using Microsoft.Extensions.Time.Testing;
using StellaOps.TaskRunner.Core.Events;
using StellaOps.TaskRunner.Core.IncidentMode;
namespace StellaOps.TaskRunner.Tests;
public sealed class PackRunIncidentModeTests
{
[Fact]
public async Task ActivateAsync_ActivatesIncidentModeSuccessfully()
{
var store = new InMemoryPackRunIncidentModeStore();
var service = new PackRunIncidentModeService(
store,
NullLogger<PackRunIncidentModeService>.Instance);
var request = new IncidentModeActivationRequest(
RunId: "run-001",
TenantId: "tenant-1",
Level: IncidentEscalationLevel.Medium,
Source: IncidentModeSource.Manual,
Reason: "Debugging production issue",
DurationMinutes: 60,
RequestedBy: "admin@example.com");
var result = await service.ActivateAsync(request, TestContext.Current.CancellationToken);
Assert.True(result.Success);
Assert.True(result.Status.Active);
Assert.Equal(IncidentEscalationLevel.Medium, result.Status.Level);
Assert.Equal(IncidentModeSource.Manual, result.Status.Source);
Assert.NotNull(result.Status.ActivatedAt);
Assert.NotNull(result.Status.ExpiresAt);
}
[Fact]
public async Task ActivateAsync_WithoutDuration_CreatesIndefiniteIncidentMode()
{
var store = new InMemoryPackRunIncidentModeStore();
var service = new PackRunIncidentModeService(
store,
NullLogger<PackRunIncidentModeService>.Instance);
var request = new IncidentModeActivationRequest(
RunId: "run-002",
TenantId: "tenant-1",
Level: IncidentEscalationLevel.High,
Source: IncidentModeSource.Manual,
Reason: "Critical investigation",
DurationMinutes: null,
RequestedBy: null);
var result = await service.ActivateAsync(request, TestContext.Current.CancellationToken);
Assert.True(result.Success);
Assert.Null(result.Status.ExpiresAt);
}
[Fact]
public async Task ActivateAsync_EmitsTimelineEvent()
{
var store = new InMemoryPackRunIncidentModeStore();
var timelineSink = new InMemoryPackRunTimelineEventSink();
var emitter = new PackRunTimelineEventEmitter(
timelineSink,
TimeProvider.System,
NullLogger<PackRunTimelineEventEmitter>.Instance);
var service = new PackRunIncidentModeService(
store,
NullLogger<PackRunIncidentModeService>.Instance,
null,
emitter);
var request = new IncidentModeActivationRequest(
RunId: "run-003",
TenantId: "tenant-1",
Level: IncidentEscalationLevel.Low,
Source: IncidentModeSource.Manual,
Reason: "Test",
DurationMinutes: 30,
RequestedBy: null);
await service.ActivateAsync(request, TestContext.Current.CancellationToken);
Assert.Equal(1, timelineSink.Count);
var evt = timelineSink.GetEvents()[0];
Assert.Equal(PackRunIncidentEventTypes.IncidentModeActivated, evt.EventType);
}
[Fact]
public async Task DeactivateAsync_DeactivatesIncidentMode()
{
var store = new InMemoryPackRunIncidentModeStore();
var service = new PackRunIncidentModeService(
store,
NullLogger<PackRunIncidentModeService>.Instance);
// First activate
var activateRequest = new IncidentModeActivationRequest(
RunId: "run-004",
TenantId: "tenant-1",
Level: IncidentEscalationLevel.Medium,
Source: IncidentModeSource.Manual,
Reason: "Test",
DurationMinutes: null,
RequestedBy: null);
await service.ActivateAsync(activateRequest, TestContext.Current.CancellationToken);
// Then deactivate
var result = await service.DeactivateAsync("run-004", "Issue resolved", TestContext.Current.CancellationToken);
Assert.True(result.Success);
Assert.False(result.Status.Active);
var status = await service.GetStatusAsync("run-004", TestContext.Current.CancellationToken);
Assert.False(status.Active);
}
[Fact]
public async Task GetStatusAsync_ReturnsInactiveForUnknownRun()
{
var store = new InMemoryPackRunIncidentModeStore();
var service = new PackRunIncidentModeService(
store,
NullLogger<PackRunIncidentModeService>.Instance);
var status = await service.GetStatusAsync("unknown-run", TestContext.Current.CancellationToken);
Assert.False(status.Active);
Assert.Equal(IncidentEscalationLevel.None, status.Level);
}
[Fact]
public async Task GetStatusAsync_AutoDeactivatesExpiredIncidentMode()
{
var store = new InMemoryPackRunIncidentModeStore();
var fakeTime = new FakeTimeProvider(DateTimeOffset.UtcNow);
var service = new PackRunIncidentModeService(
store,
NullLogger<PackRunIncidentModeService>.Instance,
fakeTime);
var request = new IncidentModeActivationRequest(
RunId: "run-005",
TenantId: "tenant-1",
Level: IncidentEscalationLevel.Medium,
Source: IncidentModeSource.Manual,
Reason: "Test",
DurationMinutes: 30,
RequestedBy: null);
await service.ActivateAsync(request, TestContext.Current.CancellationToken);
// Advance time past expiration
fakeTime.Advance(TimeSpan.FromMinutes(31));
var status = await service.GetStatusAsync("run-005", TestContext.Current.CancellationToken);
Assert.False(status.Active);
}
[Fact]
public async Task HandleSloBreachAsync_ActivatesIncidentModeFromBreach()
{
var store = new InMemoryPackRunIncidentModeStore();
var service = new PackRunIncidentModeService(
store,
NullLogger<PackRunIncidentModeService>.Instance);
var breach = new SloBreachNotification(
BreachId: "breach-001",
SloName: "error_rate_5m",
Severity: "HIGH",
OccurredAt: DateTimeOffset.UtcNow,
CurrentValue: 15.5,
Threshold: 5.0,
Target: 1.0,
ResourceId: "run-006",
TenantId: "tenant-1",
Context: new Dictionary<string, string> { ["step"] = "scan" });
var result = await service.HandleSloBreachAsync(breach, TestContext.Current.CancellationToken);
Assert.True(result.Success);
Assert.True(result.Status.Active);
Assert.Equal(IncidentEscalationLevel.High, result.Status.Level);
Assert.Equal(IncidentModeSource.SloBreach, result.Status.Source);
Assert.Contains("error_rate_5m", result.Status.ActivationReason!);
}
[Fact]
public async Task HandleSloBreachAsync_MapsSeverityToLevel()
{
var store = new InMemoryPackRunIncidentModeStore();
var service = new PackRunIncidentModeService(
store,
NullLogger<PackRunIncidentModeService>.Instance);
var severityToLevel = new Dictionary<string, IncidentEscalationLevel>
{
["CRITICAL"] = IncidentEscalationLevel.Critical,
["HIGH"] = IncidentEscalationLevel.High,
["MEDIUM"] = IncidentEscalationLevel.Medium,
["LOW"] = IncidentEscalationLevel.Low
};
var runIndex = 0;
foreach (var (severity, expectedLevel) in severityToLevel)
{
var breach = new SloBreachNotification(
BreachId: $"breach-{runIndex}",
SloName: "test_slo",
Severity: severity,
OccurredAt: DateTimeOffset.UtcNow,
CurrentValue: 10.0,
Threshold: 5.0,
Target: 1.0,
ResourceId: $"run-severity-{runIndex++}",
TenantId: "tenant-1",
Context: null);
var result = await service.HandleSloBreachAsync(breach, TestContext.Current.CancellationToken);
Assert.True(result.Success);
Assert.Equal(expectedLevel, result.Status.Level);
}
}
[Fact]
public async Task HandleSloBreachAsync_ReturnsErrorForMissingResourceId()
{
var store = new InMemoryPackRunIncidentModeStore();
var service = new PackRunIncidentModeService(
store,
NullLogger<PackRunIncidentModeService>.Instance);
var breach = new SloBreachNotification(
BreachId: "breach-no-resource",
SloName: "test_slo",
Severity: "HIGH",
OccurredAt: DateTimeOffset.UtcNow,
CurrentValue: 10.0,
Threshold: 5.0,
Target: 1.0,
ResourceId: null,
TenantId: "tenant-1",
Context: null);
var result = await service.HandleSloBreachAsync(breach, TestContext.Current.CancellationToken);
Assert.False(result.Success);
Assert.Contains("No resource ID", result.Error);
}
[Fact]
public async Task EscalateAsync_IncreasesEscalationLevel()
{
var store = new InMemoryPackRunIncidentModeStore();
var service = new PackRunIncidentModeService(
store,
NullLogger<PackRunIncidentModeService>.Instance);
// First activate at Low level
var activateRequest = new IncidentModeActivationRequest(
RunId: "run-escalate",
TenantId: "tenant-1",
Level: IncidentEscalationLevel.Low,
Source: IncidentModeSource.Manual,
Reason: "Initial activation",
DurationMinutes: null,
RequestedBy: null);
await service.ActivateAsync(activateRequest, TestContext.Current.CancellationToken);
// Escalate to High
var result = await service.EscalateAsync(
"run-escalate",
IncidentEscalationLevel.High,
"Issue is more severe than expected",
TestContext.Current.CancellationToken);
Assert.True(result.Success);
Assert.Equal(IncidentEscalationLevel.High, result.Status.Level);
Assert.Contains("Escalated", result.Status.ActivationReason);
}
[Fact]
public async Task EscalateAsync_FailsWhenNotInIncidentMode()
{
var store = new InMemoryPackRunIncidentModeStore();
var service = new PackRunIncidentModeService(
store,
NullLogger<PackRunIncidentModeService>.Instance);
var result = await service.EscalateAsync(
"unknown-run",
IncidentEscalationLevel.High,
null,
TestContext.Current.CancellationToken);
Assert.False(result.Success);
Assert.Contains("not active", result.Error);
}
[Fact]
public async Task EscalateAsync_FailsWhenNewLevelIsLowerOrEqual()
{
var store = new InMemoryPackRunIncidentModeStore();
var service = new PackRunIncidentModeService(
store,
NullLogger<PackRunIncidentModeService>.Instance);
var activateRequest = new IncidentModeActivationRequest(
RunId: "run-no-deescalate",
TenantId: "tenant-1",
Level: IncidentEscalationLevel.High,
Source: IncidentModeSource.Manual,
Reason: "Test",
DurationMinutes: null,
RequestedBy: null);
await service.ActivateAsync(activateRequest, TestContext.Current.CancellationToken);
var result = await service.EscalateAsync(
"run-no-deescalate",
IncidentEscalationLevel.Medium, // Lower than High
null,
TestContext.Current.CancellationToken);
Assert.False(result.Success);
Assert.Contains("Cannot escalate", result.Error);
}
[Fact]
public void GetSettingsForLevel_ReturnsCorrectSettings()
{
var store = new InMemoryPackRunIncidentModeStore();
var service = new PackRunIncidentModeService(
store,
NullLogger<PackRunIncidentModeService>.Instance);
// Test None level
var noneSettings = service.GetSettingsForLevel(IncidentEscalationLevel.None);
Assert.False(noneSettings.TelemetrySettings.EnhancedTelemetryActive);
Assert.False(noneSettings.DebugCaptureSettings.CaptureActive);
// Test Critical level
var criticalSettings = service.GetSettingsForLevel(IncidentEscalationLevel.Critical);
Assert.True(criticalSettings.TelemetrySettings.EnhancedTelemetryActive);
Assert.Equal(IncidentLogVerbosity.Debug, criticalSettings.TelemetrySettings.LogVerbosity);
Assert.Equal(1.0, criticalSettings.TelemetrySettings.TraceSamplingRate);
Assert.True(criticalSettings.DebugCaptureSettings.CaptureActive);
Assert.True(criticalSettings.DebugCaptureSettings.CaptureHeapDumps);
Assert.Equal(365, criticalSettings.RetentionPolicy.LogRetentionDays);
}
[Fact]
public void PackRunIncidentModeStatus_Inactive_ReturnsDefaultValues()
{
var inactive = PackRunIncidentModeStatus.Inactive();
Assert.False(inactive.Active);
Assert.Equal(IncidentEscalationLevel.None, inactive.Level);
Assert.Null(inactive.ActivatedAt);
Assert.Null(inactive.ActivationReason);
Assert.Equal(IncidentModeSource.None, inactive.Source);
Assert.False(inactive.RetentionPolicy.ExtendedRetentionActive);
Assert.False(inactive.TelemetrySettings.EnhancedTelemetryActive);
Assert.False(inactive.DebugCaptureSettings.CaptureActive);
}
[Fact]
public void IncidentRetentionPolicy_Extended_HasLongerRetention()
{
var defaultPolicy = IncidentRetentionPolicy.Default();
var extendedPolicy = IncidentRetentionPolicy.Extended();
Assert.True(extendedPolicy.ExtendedRetentionActive);
Assert.True(extendedPolicy.LogRetentionDays > defaultPolicy.LogRetentionDays);
Assert.True(extendedPolicy.ArtifactRetentionDays > defaultPolicy.ArtifactRetentionDays);
}
[Fact]
public void IncidentTelemetrySettings_Enhanced_HasHigherSampling()
{
var defaultSettings = IncidentTelemetrySettings.Default();
var enhancedSettings = IncidentTelemetrySettings.Enhanced();
Assert.True(enhancedSettings.EnhancedTelemetryActive);
Assert.True(enhancedSettings.TraceSamplingRate > defaultSettings.TraceSamplingRate);
Assert.True(enhancedSettings.CaptureEnvironment);
Assert.True(enhancedSettings.CaptureStepIo);
}
}