Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
- Added IPackRunIncidentModeService interface for managing incident mode activation, deactivation, and status retrieval. - Created PackRunIncidentModeService class implementing the service interface with methods for activating, deactivating, and escalating incident modes. - Introduced incident mode status model (PackRunIncidentModeStatus) and related enums for escalation levels and activation sources. - Developed retention policy, telemetry settings, and debug capture settings models to manage incident mode configurations. - Implemented SLO breach notification handling to activate incident mode based on severity. - Added in-memory store (InMemoryPackRunIncidentModeStore) for testing purposes. - Created comprehensive unit tests for incident mode service, covering activation, deactivation, status retrieval, and SLO breach handling.
397 lines
14 KiB
C#
397 lines
14 KiB
C#
using Microsoft.Extensions.Logging.Abstractions;
|
|
using Microsoft.Extensions.Time.Testing;
|
|
using StellaOps.TaskRunner.Core.Events;
|
|
using StellaOps.TaskRunner.Core.IncidentMode;
|
|
|
|
namespace StellaOps.TaskRunner.Tests;
|
|
|
|
public sealed class PackRunIncidentModeTests
|
|
{
|
|
[Fact]
|
|
public async Task ActivateAsync_ActivatesIncidentModeSuccessfully()
|
|
{
|
|
var store = new InMemoryPackRunIncidentModeStore();
|
|
var service = new PackRunIncidentModeService(
|
|
store,
|
|
NullLogger<PackRunIncidentModeService>.Instance);
|
|
|
|
var request = new IncidentModeActivationRequest(
|
|
RunId: "run-001",
|
|
TenantId: "tenant-1",
|
|
Level: IncidentEscalationLevel.Medium,
|
|
Source: IncidentModeSource.Manual,
|
|
Reason: "Debugging production issue",
|
|
DurationMinutes: 60,
|
|
RequestedBy: "admin@example.com");
|
|
|
|
var result = await service.ActivateAsync(request, TestContext.Current.CancellationToken);
|
|
|
|
Assert.True(result.Success);
|
|
Assert.True(result.Status.Active);
|
|
Assert.Equal(IncidentEscalationLevel.Medium, result.Status.Level);
|
|
Assert.Equal(IncidentModeSource.Manual, result.Status.Source);
|
|
Assert.NotNull(result.Status.ActivatedAt);
|
|
Assert.NotNull(result.Status.ExpiresAt);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ActivateAsync_WithoutDuration_CreatesIndefiniteIncidentMode()
|
|
{
|
|
var store = new InMemoryPackRunIncidentModeStore();
|
|
var service = new PackRunIncidentModeService(
|
|
store,
|
|
NullLogger<PackRunIncidentModeService>.Instance);
|
|
|
|
var request = new IncidentModeActivationRequest(
|
|
RunId: "run-002",
|
|
TenantId: "tenant-1",
|
|
Level: IncidentEscalationLevel.High,
|
|
Source: IncidentModeSource.Manual,
|
|
Reason: "Critical investigation",
|
|
DurationMinutes: null,
|
|
RequestedBy: null);
|
|
|
|
var result = await service.ActivateAsync(request, TestContext.Current.CancellationToken);
|
|
|
|
Assert.True(result.Success);
|
|
Assert.Null(result.Status.ExpiresAt);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ActivateAsync_EmitsTimelineEvent()
|
|
{
|
|
var store = new InMemoryPackRunIncidentModeStore();
|
|
var timelineSink = new InMemoryPackRunTimelineEventSink();
|
|
var emitter = new PackRunTimelineEventEmitter(
|
|
timelineSink,
|
|
TimeProvider.System,
|
|
NullLogger<PackRunTimelineEventEmitter>.Instance);
|
|
var service = new PackRunIncidentModeService(
|
|
store,
|
|
NullLogger<PackRunIncidentModeService>.Instance,
|
|
null,
|
|
emitter);
|
|
|
|
var request = new IncidentModeActivationRequest(
|
|
RunId: "run-003",
|
|
TenantId: "tenant-1",
|
|
Level: IncidentEscalationLevel.Low,
|
|
Source: IncidentModeSource.Manual,
|
|
Reason: "Test",
|
|
DurationMinutes: 30,
|
|
RequestedBy: null);
|
|
|
|
await service.ActivateAsync(request, TestContext.Current.CancellationToken);
|
|
|
|
Assert.Equal(1, timelineSink.Count);
|
|
var evt = timelineSink.GetEvents()[0];
|
|
Assert.Equal(PackRunIncidentEventTypes.IncidentModeActivated, evt.EventType);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task DeactivateAsync_DeactivatesIncidentMode()
|
|
{
|
|
var store = new InMemoryPackRunIncidentModeStore();
|
|
var service = new PackRunIncidentModeService(
|
|
store,
|
|
NullLogger<PackRunIncidentModeService>.Instance);
|
|
|
|
// First activate
|
|
var activateRequest = new IncidentModeActivationRequest(
|
|
RunId: "run-004",
|
|
TenantId: "tenant-1",
|
|
Level: IncidentEscalationLevel.Medium,
|
|
Source: IncidentModeSource.Manual,
|
|
Reason: "Test",
|
|
DurationMinutes: null,
|
|
RequestedBy: null);
|
|
|
|
await service.ActivateAsync(activateRequest, TestContext.Current.CancellationToken);
|
|
|
|
// Then deactivate
|
|
var result = await service.DeactivateAsync("run-004", "Issue resolved", TestContext.Current.CancellationToken);
|
|
|
|
Assert.True(result.Success);
|
|
Assert.False(result.Status.Active);
|
|
|
|
var status = await service.GetStatusAsync("run-004", TestContext.Current.CancellationToken);
|
|
Assert.False(status.Active);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task GetStatusAsync_ReturnsInactiveForUnknownRun()
|
|
{
|
|
var store = new InMemoryPackRunIncidentModeStore();
|
|
var service = new PackRunIncidentModeService(
|
|
store,
|
|
NullLogger<PackRunIncidentModeService>.Instance);
|
|
|
|
var status = await service.GetStatusAsync("unknown-run", TestContext.Current.CancellationToken);
|
|
|
|
Assert.False(status.Active);
|
|
Assert.Equal(IncidentEscalationLevel.None, status.Level);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task GetStatusAsync_AutoDeactivatesExpiredIncidentMode()
|
|
{
|
|
var store = new InMemoryPackRunIncidentModeStore();
|
|
var fakeTime = new FakeTimeProvider(DateTimeOffset.UtcNow);
|
|
var service = new PackRunIncidentModeService(
|
|
store,
|
|
NullLogger<PackRunIncidentModeService>.Instance,
|
|
fakeTime);
|
|
|
|
var request = new IncidentModeActivationRequest(
|
|
RunId: "run-005",
|
|
TenantId: "tenant-1",
|
|
Level: IncidentEscalationLevel.Medium,
|
|
Source: IncidentModeSource.Manual,
|
|
Reason: "Test",
|
|
DurationMinutes: 30,
|
|
RequestedBy: null);
|
|
|
|
await service.ActivateAsync(request, TestContext.Current.CancellationToken);
|
|
|
|
// Advance time past expiration
|
|
fakeTime.Advance(TimeSpan.FromMinutes(31));
|
|
|
|
var status = await service.GetStatusAsync("run-005", TestContext.Current.CancellationToken);
|
|
|
|
Assert.False(status.Active);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task HandleSloBreachAsync_ActivatesIncidentModeFromBreach()
|
|
{
|
|
var store = new InMemoryPackRunIncidentModeStore();
|
|
var service = new PackRunIncidentModeService(
|
|
store,
|
|
NullLogger<PackRunIncidentModeService>.Instance);
|
|
|
|
var breach = new SloBreachNotification(
|
|
BreachId: "breach-001",
|
|
SloName: "error_rate_5m",
|
|
Severity: "HIGH",
|
|
OccurredAt: DateTimeOffset.UtcNow,
|
|
CurrentValue: 15.5,
|
|
Threshold: 5.0,
|
|
Target: 1.0,
|
|
ResourceId: "run-006",
|
|
TenantId: "tenant-1",
|
|
Context: new Dictionary<string, string> { ["step"] = "scan" });
|
|
|
|
var result = await service.HandleSloBreachAsync(breach, TestContext.Current.CancellationToken);
|
|
|
|
Assert.True(result.Success);
|
|
Assert.True(result.Status.Active);
|
|
Assert.Equal(IncidentEscalationLevel.High, result.Status.Level);
|
|
Assert.Equal(IncidentModeSource.SloBreach, result.Status.Source);
|
|
Assert.Contains("error_rate_5m", result.Status.ActivationReason!);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task HandleSloBreachAsync_MapsSeverityToLevel()
|
|
{
|
|
var store = new InMemoryPackRunIncidentModeStore();
|
|
var service = new PackRunIncidentModeService(
|
|
store,
|
|
NullLogger<PackRunIncidentModeService>.Instance);
|
|
|
|
var severityToLevel = new Dictionary<string, IncidentEscalationLevel>
|
|
{
|
|
["CRITICAL"] = IncidentEscalationLevel.Critical,
|
|
["HIGH"] = IncidentEscalationLevel.High,
|
|
["MEDIUM"] = IncidentEscalationLevel.Medium,
|
|
["LOW"] = IncidentEscalationLevel.Low
|
|
};
|
|
|
|
var runIndex = 0;
|
|
foreach (var (severity, expectedLevel) in severityToLevel)
|
|
{
|
|
var breach = new SloBreachNotification(
|
|
BreachId: $"breach-{runIndex}",
|
|
SloName: "test_slo",
|
|
Severity: severity,
|
|
OccurredAt: DateTimeOffset.UtcNow,
|
|
CurrentValue: 10.0,
|
|
Threshold: 5.0,
|
|
Target: 1.0,
|
|
ResourceId: $"run-severity-{runIndex++}",
|
|
TenantId: "tenant-1",
|
|
Context: null);
|
|
|
|
var result = await service.HandleSloBreachAsync(breach, TestContext.Current.CancellationToken);
|
|
|
|
Assert.True(result.Success);
|
|
Assert.Equal(expectedLevel, result.Status.Level);
|
|
}
|
|
}
|
|
|
|
[Fact]
|
|
public async Task HandleSloBreachAsync_ReturnsErrorForMissingResourceId()
|
|
{
|
|
var store = new InMemoryPackRunIncidentModeStore();
|
|
var service = new PackRunIncidentModeService(
|
|
store,
|
|
NullLogger<PackRunIncidentModeService>.Instance);
|
|
|
|
var breach = new SloBreachNotification(
|
|
BreachId: "breach-no-resource",
|
|
SloName: "test_slo",
|
|
Severity: "HIGH",
|
|
OccurredAt: DateTimeOffset.UtcNow,
|
|
CurrentValue: 10.0,
|
|
Threshold: 5.0,
|
|
Target: 1.0,
|
|
ResourceId: null,
|
|
TenantId: "tenant-1",
|
|
Context: null);
|
|
|
|
var result = await service.HandleSloBreachAsync(breach, TestContext.Current.CancellationToken);
|
|
|
|
Assert.False(result.Success);
|
|
Assert.Contains("No resource ID", result.Error);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task EscalateAsync_IncreasesEscalationLevel()
|
|
{
|
|
var store = new InMemoryPackRunIncidentModeStore();
|
|
var service = new PackRunIncidentModeService(
|
|
store,
|
|
NullLogger<PackRunIncidentModeService>.Instance);
|
|
|
|
// First activate at Low level
|
|
var activateRequest = new IncidentModeActivationRequest(
|
|
RunId: "run-escalate",
|
|
TenantId: "tenant-1",
|
|
Level: IncidentEscalationLevel.Low,
|
|
Source: IncidentModeSource.Manual,
|
|
Reason: "Initial activation",
|
|
DurationMinutes: null,
|
|
RequestedBy: null);
|
|
|
|
await service.ActivateAsync(activateRequest, TestContext.Current.CancellationToken);
|
|
|
|
// Escalate to High
|
|
var result = await service.EscalateAsync(
|
|
"run-escalate",
|
|
IncidentEscalationLevel.High,
|
|
"Issue is more severe than expected",
|
|
TestContext.Current.CancellationToken);
|
|
|
|
Assert.True(result.Success);
|
|
Assert.Equal(IncidentEscalationLevel.High, result.Status.Level);
|
|
Assert.Contains("Escalated", result.Status.ActivationReason);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task EscalateAsync_FailsWhenNotInIncidentMode()
|
|
{
|
|
var store = new InMemoryPackRunIncidentModeStore();
|
|
var service = new PackRunIncidentModeService(
|
|
store,
|
|
NullLogger<PackRunIncidentModeService>.Instance);
|
|
|
|
var result = await service.EscalateAsync(
|
|
"unknown-run",
|
|
IncidentEscalationLevel.High,
|
|
null,
|
|
TestContext.Current.CancellationToken);
|
|
|
|
Assert.False(result.Success);
|
|
Assert.Contains("not active", result.Error);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task EscalateAsync_FailsWhenNewLevelIsLowerOrEqual()
|
|
{
|
|
var store = new InMemoryPackRunIncidentModeStore();
|
|
var service = new PackRunIncidentModeService(
|
|
store,
|
|
NullLogger<PackRunIncidentModeService>.Instance);
|
|
|
|
var activateRequest = new IncidentModeActivationRequest(
|
|
RunId: "run-no-deescalate",
|
|
TenantId: "tenant-1",
|
|
Level: IncidentEscalationLevel.High,
|
|
Source: IncidentModeSource.Manual,
|
|
Reason: "Test",
|
|
DurationMinutes: null,
|
|
RequestedBy: null);
|
|
|
|
await service.ActivateAsync(activateRequest, TestContext.Current.CancellationToken);
|
|
|
|
var result = await service.EscalateAsync(
|
|
"run-no-deescalate",
|
|
IncidentEscalationLevel.Medium, // Lower than High
|
|
null,
|
|
TestContext.Current.CancellationToken);
|
|
|
|
Assert.False(result.Success);
|
|
Assert.Contains("Cannot escalate", result.Error);
|
|
}
|
|
|
|
[Fact]
|
|
public void GetSettingsForLevel_ReturnsCorrectSettings()
|
|
{
|
|
var store = new InMemoryPackRunIncidentModeStore();
|
|
var service = new PackRunIncidentModeService(
|
|
store,
|
|
NullLogger<PackRunIncidentModeService>.Instance);
|
|
|
|
// Test None level
|
|
var noneSettings = service.GetSettingsForLevel(IncidentEscalationLevel.None);
|
|
Assert.False(noneSettings.TelemetrySettings.EnhancedTelemetryActive);
|
|
Assert.False(noneSettings.DebugCaptureSettings.CaptureActive);
|
|
|
|
// Test Critical level
|
|
var criticalSettings = service.GetSettingsForLevel(IncidentEscalationLevel.Critical);
|
|
Assert.True(criticalSettings.TelemetrySettings.EnhancedTelemetryActive);
|
|
Assert.Equal(IncidentLogVerbosity.Debug, criticalSettings.TelemetrySettings.LogVerbosity);
|
|
Assert.Equal(1.0, criticalSettings.TelemetrySettings.TraceSamplingRate);
|
|
Assert.True(criticalSettings.DebugCaptureSettings.CaptureActive);
|
|
Assert.True(criticalSettings.DebugCaptureSettings.CaptureHeapDumps);
|
|
Assert.Equal(365, criticalSettings.RetentionPolicy.LogRetentionDays);
|
|
}
|
|
|
|
[Fact]
|
|
public void PackRunIncidentModeStatus_Inactive_ReturnsDefaultValues()
|
|
{
|
|
var inactive = PackRunIncidentModeStatus.Inactive();
|
|
|
|
Assert.False(inactive.Active);
|
|
Assert.Equal(IncidentEscalationLevel.None, inactive.Level);
|
|
Assert.Null(inactive.ActivatedAt);
|
|
Assert.Null(inactive.ActivationReason);
|
|
Assert.Equal(IncidentModeSource.None, inactive.Source);
|
|
Assert.False(inactive.RetentionPolicy.ExtendedRetentionActive);
|
|
Assert.False(inactive.TelemetrySettings.EnhancedTelemetryActive);
|
|
Assert.False(inactive.DebugCaptureSettings.CaptureActive);
|
|
}
|
|
|
|
[Fact]
|
|
public void IncidentRetentionPolicy_Extended_HasLongerRetention()
|
|
{
|
|
var defaultPolicy = IncidentRetentionPolicy.Default();
|
|
var extendedPolicy = IncidentRetentionPolicy.Extended();
|
|
|
|
Assert.True(extendedPolicy.ExtendedRetentionActive);
|
|
Assert.True(extendedPolicy.LogRetentionDays > defaultPolicy.LogRetentionDays);
|
|
Assert.True(extendedPolicy.ArtifactRetentionDays > defaultPolicy.ArtifactRetentionDays);
|
|
}
|
|
|
|
[Fact]
|
|
public void IncidentTelemetrySettings_Enhanced_HasHigherSampling()
|
|
{
|
|
var defaultSettings = IncidentTelemetrySettings.Default();
|
|
var enhancedSettings = IncidentTelemetrySettings.Enhanced();
|
|
|
|
Assert.True(enhancedSettings.EnhancedTelemetryActive);
|
|
Assert.True(enhancedSettings.TraceSamplingRate > defaultSettings.TraceSamplingRate);
|
|
Assert.True(enhancedSettings.CaptureEnvironment);
|
|
Assert.True(enhancedSettings.CaptureStepIo);
|
|
}
|
|
}
|