Implement incident mode management service and models
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled

- Added IPackRunIncidentModeService interface for managing incident mode activation, deactivation, and status retrieval.
- Created PackRunIncidentModeService class implementing the service interface with methods for activating, deactivating, and escalating incident modes.
- Introduced incident mode status model (PackRunIncidentModeStatus) and related enums for escalation levels and activation sources.
- Developed retention policy, telemetry settings, and debug capture settings models to manage incident mode configurations.
- Implemented SLO breach notification handling to activate incident mode based on severity.
- Added in-memory store (InMemoryPackRunIncidentModeStore) for testing purposes.
- Created comprehensive unit tests for incident mode service, covering activation, deactivation, status retrieval, and SLO breach handling.
This commit is contained in:
StellaOps Bot
2025-12-06 22:33:00 +02:00
parent 4042fc2184
commit 9bd6a73926
23 changed files with 7779 additions and 12 deletions

View File

@@ -16,6 +16,7 @@ using StellaOps.AirGap.Policy;
using StellaOps.TaskRunner.Core.AirGap;
using StellaOps.TaskRunner.Core.Attestation;
using StellaOps.TaskRunner.Core.Configuration;
using StellaOps.TaskRunner.Core.IncidentMode;
using StellaOps.TaskRunner.Core.Events;
using StellaOps.TaskRunner.Core.Execution;
using StellaOps.TaskRunner.Core.Execution.Simulation;
@@ -127,6 +128,10 @@ builder.Services.AddSingleton<IPackRunAttestationStore, InMemoryPackRunAttestati
builder.Services.AddSingleton<IPackRunAttestationSigner, StubPackRunAttestationSigner>();
builder.Services.AddSingleton<IPackRunAttestationService, PackRunAttestationService>();
// Pack run incident mode (TASKRUN-OBS-55-001)
builder.Services.AddSingleton<IPackRunIncidentModeStore, InMemoryPackRunIncidentModeStore>();
builder.Services.AddSingleton<IPackRunIncidentModeService, PackRunIncidentModeService>();
builder.Services.AddOpenApi();
var app = builder.Build();
@@ -230,6 +235,22 @@ app.MapGet("/api/attestations/{attestationId}/envelope", HandleGetAttestationEnv
app.MapPost("/v1/task-runner/attestations/{attestationId}/verify", HandleVerifyAttestation).WithName("VerifyAttestation");
app.MapPost("/api/attestations/{attestationId}/verify", HandleVerifyAttestation).WithName("VerifyAttestationApi");
// Incident mode endpoints (TASKRUN-OBS-55-001)
app.MapGet("/v1/task-runner/runs/{runId}/incident-mode", HandleGetIncidentModeStatus).WithName("GetIncidentModeStatus");
app.MapGet("/api/runs/{runId}/incident-mode", HandleGetIncidentModeStatus).WithName("GetIncidentModeStatusApi");
app.MapPost("/v1/task-runner/runs/{runId}/incident-mode/activate", HandleActivateIncidentMode).WithName("ActivateIncidentMode");
app.MapPost("/api/runs/{runId}/incident-mode/activate", HandleActivateIncidentMode).WithName("ActivateIncidentModeApi");
app.MapPost("/v1/task-runner/runs/{runId}/incident-mode/deactivate", HandleDeactivateIncidentMode).WithName("DeactivateIncidentMode");
app.MapPost("/api/runs/{runId}/incident-mode/deactivate", HandleDeactivateIncidentMode).WithName("DeactivateIncidentModeApi");
app.MapPost("/v1/task-runner/runs/{runId}/incident-mode/escalate", HandleEscalateIncidentMode).WithName("EscalateIncidentMode");
app.MapPost("/api/runs/{runId}/incident-mode/escalate", HandleEscalateIncidentMode).WithName("EscalateIncidentModeApi");
app.MapPost("/v1/task-runner/webhooks/slo-breach", HandleSloBreachWebhook).WithName("SloBreachWebhook");
app.MapPost("/api/webhooks/slo-breach", HandleSloBreachWebhook).WithName("SloBreachWebhookApi");
app.MapGet("/.well-known/openapi", (HttpResponse response) =>
{
var metadata = OpenApiMetadataFactory.Create("/openapi");
@@ -681,6 +702,175 @@ async Task<IResult> HandleVerifyAttestation(
}, statusCode: statusCode);
}
// Incident mode handlers (TASKRUN-OBS-55-001)
async Task<IResult> HandleGetIncidentModeStatus(
string runId,
IPackRunIncidentModeService incidentModeService,
CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(runId))
{
return Results.BadRequest(new { error = "runId is required." });
}
var status = await incidentModeService.GetStatusAsync(runId, cancellationToken).ConfigureAwait(false);
return Results.Ok(new
{
runId,
active = status.Active,
level = status.Level.ToString().ToLowerInvariant(),
activatedAt = status.ActivatedAt?.ToString("O"),
activationReason = status.ActivationReason,
source = status.Source.ToString().ToLowerInvariant(),
expiresAt = status.ExpiresAt?.ToString("O"),
retentionPolicy = new
{
extendedRetentionActive = status.RetentionPolicy.ExtendedRetentionActive,
logRetentionDays = status.RetentionPolicy.LogRetentionDays,
artifactRetentionDays = status.RetentionPolicy.ArtifactRetentionDays
},
telemetrySettings = new
{
enhancedTelemetryActive = status.TelemetrySettings.EnhancedTelemetryActive,
logVerbosity = status.TelemetrySettings.LogVerbosity.ToString().ToLowerInvariant(),
traceSamplingRate = status.TelemetrySettings.TraceSamplingRate
},
debugCaptureSettings = new
{
captureActive = status.DebugCaptureSettings.CaptureActive,
captureHeapDumps = status.DebugCaptureSettings.CaptureHeapDumps,
captureThreadDumps = status.DebugCaptureSettings.CaptureThreadDumps
}
});
}
async Task<IResult> HandleActivateIncidentMode(
string runId,
[FromBody] ActivateIncidentModeRequest? request,
[FromHeader(Name = "X-Tenant-ID")] string? tenantId,
IPackRunIncidentModeService incidentModeService,
CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(runId))
{
return Results.BadRequest(new { error = "runId is required." });
}
var level = Enum.TryParse<IncidentEscalationLevel>(request?.Level, ignoreCase: true, out var parsedLevel)
? parsedLevel
: IncidentEscalationLevel.Medium;
var activationRequest = new IncidentModeActivationRequest(
RunId: runId,
TenantId: tenantId ?? "default",
Level: level,
Source: StellaOps.TaskRunner.Core.IncidentMode.IncidentModeSource.Manual,
Reason: request?.Reason ?? "Manual activation via API",
DurationMinutes: request?.DurationMinutes,
RequestedBy: request?.RequestedBy);
var result = await incidentModeService.ActivateAsync(activationRequest, cancellationToken).ConfigureAwait(false);
if (!result.Success)
{
return Results.BadRequest(new { error = result.Error });
}
return Results.Ok(new
{
success = result.Success,
active = result.Status.Active,
level = result.Status.Level.ToString().ToLowerInvariant(),
activatedAt = result.Status.ActivatedAt?.ToString("O"),
expiresAt = result.Status.ExpiresAt?.ToString("O")
});
}
async Task<IResult> HandleDeactivateIncidentMode(
string runId,
[FromBody] DeactivateIncidentModeRequest? request,
IPackRunIncidentModeService incidentModeService,
CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(runId))
{
return Results.BadRequest(new { error = "runId is required." });
}
var result = await incidentModeService.DeactivateAsync(runId, request?.Reason, cancellationToken)
.ConfigureAwait(false);
return Results.Ok(new
{
success = result.Success,
active = result.Status.Active
});
}
async Task<IResult> HandleEscalateIncidentMode(
string runId,
[FromBody] EscalateIncidentModeRequest? request,
IPackRunIncidentModeService incidentModeService,
CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(runId))
{
return Results.BadRequest(new { error = "runId is required." });
}
if (request is null || string.IsNullOrWhiteSpace(request.Level))
{
return Results.BadRequest(new { error = "Level is required for escalation." });
}
if (!Enum.TryParse<IncidentEscalationLevel>(request.Level, ignoreCase: true, out var newLevel))
{
return Results.BadRequest(new { error = $"Invalid escalation level: {request.Level}" });
}
var result = await incidentModeService.EscalateAsync(runId, newLevel, request.Reason, cancellationToken)
.ConfigureAwait(false);
if (!result.Success)
{
return Results.BadRequest(new { error = result.Error });
}
return Results.Ok(new
{
success = result.Success,
level = result.Status.Level.ToString().ToLowerInvariant()
});
}
async Task<IResult> HandleSloBreachWebhook(
[FromBody] SloBreachNotification notification,
IPackRunIncidentModeService incidentModeService,
CancellationToken cancellationToken)
{
if (notification is null)
{
return Results.BadRequest(new { error = "Notification body is required." });
}
var result = await incidentModeService.HandleSloBreachAsync(notification, cancellationToken)
.ConfigureAwait(false);
if (!result.Success)
{
return Results.BadRequest(new { error = result.Error });
}
return Results.Ok(new
{
success = result.Success,
runId = notification.ResourceId,
level = result.Status.Level.ToString().ToLowerInvariant(),
activatedAt = result.Status.ActivatedAt?.ToString("O")
});
}
app.Run();
static IDictionary<string, JsonNode?>? ConvertInputs(JsonObject? node)
@@ -712,6 +902,17 @@ internal sealed record VerifyAttestationRequest(
internal sealed record VerifyAttestationSubject(string Name, IReadOnlyDictionary<string, string>? Digest);
// Incident mode API request models (TASKRUN-OBS-55-001)
internal sealed record ActivateIncidentModeRequest(
string? Level,
string? Reason,
int? DurationMinutes,
string? RequestedBy);
internal sealed record DeactivateIncidentModeRequest(string? Reason);
internal sealed record EscalateIncidentModeRequest(string Level, string? Reason);
internal sealed record SimulationResponse(
string PlanHash,
FailurePolicyResponse FailurePolicy,