Implement incident mode management service and models
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
- Added IPackRunIncidentModeService interface for managing incident mode activation, deactivation, and status retrieval. - Created PackRunIncidentModeService class implementing the service interface with methods for activating, deactivating, and escalating incident modes. - Introduced incident mode status model (PackRunIncidentModeStatus) and related enums for escalation levels and activation sources. - Developed retention policy, telemetry settings, and debug capture settings models to manage incident mode configurations. - Implemented SLO breach notification handling to activate incident mode based on severity. - Added in-memory store (InMemoryPackRunIncidentModeStore) for testing purposes. - Created comprehensive unit tests for incident mode service, covering activation, deactivation, status retrieval, and SLO breach handling.
This commit is contained in:
@@ -16,6 +16,7 @@ using StellaOps.AirGap.Policy;
|
||||
using StellaOps.TaskRunner.Core.AirGap;
|
||||
using StellaOps.TaskRunner.Core.Attestation;
|
||||
using StellaOps.TaskRunner.Core.Configuration;
|
||||
using StellaOps.TaskRunner.Core.IncidentMode;
|
||||
using StellaOps.TaskRunner.Core.Events;
|
||||
using StellaOps.TaskRunner.Core.Execution;
|
||||
using StellaOps.TaskRunner.Core.Execution.Simulation;
|
||||
@@ -127,6 +128,10 @@ builder.Services.AddSingleton<IPackRunAttestationStore, InMemoryPackRunAttestati
|
||||
builder.Services.AddSingleton<IPackRunAttestationSigner, StubPackRunAttestationSigner>();
|
||||
builder.Services.AddSingleton<IPackRunAttestationService, PackRunAttestationService>();
|
||||
|
||||
// Pack run incident mode (TASKRUN-OBS-55-001)
|
||||
builder.Services.AddSingleton<IPackRunIncidentModeStore, InMemoryPackRunIncidentModeStore>();
|
||||
builder.Services.AddSingleton<IPackRunIncidentModeService, PackRunIncidentModeService>();
|
||||
|
||||
builder.Services.AddOpenApi();
|
||||
|
||||
var app = builder.Build();
|
||||
@@ -230,6 +235,22 @@ app.MapGet("/api/attestations/{attestationId}/envelope", HandleGetAttestationEnv
|
||||
app.MapPost("/v1/task-runner/attestations/{attestationId}/verify", HandleVerifyAttestation).WithName("VerifyAttestation");
|
||||
app.MapPost("/api/attestations/{attestationId}/verify", HandleVerifyAttestation).WithName("VerifyAttestationApi");
|
||||
|
||||
// Incident mode endpoints (TASKRUN-OBS-55-001)
|
||||
app.MapGet("/v1/task-runner/runs/{runId}/incident-mode", HandleGetIncidentModeStatus).WithName("GetIncidentModeStatus");
|
||||
app.MapGet("/api/runs/{runId}/incident-mode", HandleGetIncidentModeStatus).WithName("GetIncidentModeStatusApi");
|
||||
|
||||
app.MapPost("/v1/task-runner/runs/{runId}/incident-mode/activate", HandleActivateIncidentMode).WithName("ActivateIncidentMode");
|
||||
app.MapPost("/api/runs/{runId}/incident-mode/activate", HandleActivateIncidentMode).WithName("ActivateIncidentModeApi");
|
||||
|
||||
app.MapPost("/v1/task-runner/runs/{runId}/incident-mode/deactivate", HandleDeactivateIncidentMode).WithName("DeactivateIncidentMode");
|
||||
app.MapPost("/api/runs/{runId}/incident-mode/deactivate", HandleDeactivateIncidentMode).WithName("DeactivateIncidentModeApi");
|
||||
|
||||
app.MapPost("/v1/task-runner/runs/{runId}/incident-mode/escalate", HandleEscalateIncidentMode).WithName("EscalateIncidentMode");
|
||||
app.MapPost("/api/runs/{runId}/incident-mode/escalate", HandleEscalateIncidentMode).WithName("EscalateIncidentModeApi");
|
||||
|
||||
app.MapPost("/v1/task-runner/webhooks/slo-breach", HandleSloBreachWebhook).WithName("SloBreachWebhook");
|
||||
app.MapPost("/api/webhooks/slo-breach", HandleSloBreachWebhook).WithName("SloBreachWebhookApi");
|
||||
|
||||
app.MapGet("/.well-known/openapi", (HttpResponse response) =>
|
||||
{
|
||||
var metadata = OpenApiMetadataFactory.Create("/openapi");
|
||||
@@ -681,6 +702,175 @@ async Task<IResult> HandleVerifyAttestation(
|
||||
}, statusCode: statusCode);
|
||||
}
|
||||
|
||||
// Incident mode handlers (TASKRUN-OBS-55-001)
|
||||
async Task<IResult> HandleGetIncidentModeStatus(
|
||||
string runId,
|
||||
IPackRunIncidentModeService incidentModeService,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(runId))
|
||||
{
|
||||
return Results.BadRequest(new { error = "runId is required." });
|
||||
}
|
||||
|
||||
var status = await incidentModeService.GetStatusAsync(runId, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return Results.Ok(new
|
||||
{
|
||||
runId,
|
||||
active = status.Active,
|
||||
level = status.Level.ToString().ToLowerInvariant(),
|
||||
activatedAt = status.ActivatedAt?.ToString("O"),
|
||||
activationReason = status.ActivationReason,
|
||||
source = status.Source.ToString().ToLowerInvariant(),
|
||||
expiresAt = status.ExpiresAt?.ToString("O"),
|
||||
retentionPolicy = new
|
||||
{
|
||||
extendedRetentionActive = status.RetentionPolicy.ExtendedRetentionActive,
|
||||
logRetentionDays = status.RetentionPolicy.LogRetentionDays,
|
||||
artifactRetentionDays = status.RetentionPolicy.ArtifactRetentionDays
|
||||
},
|
||||
telemetrySettings = new
|
||||
{
|
||||
enhancedTelemetryActive = status.TelemetrySettings.EnhancedTelemetryActive,
|
||||
logVerbosity = status.TelemetrySettings.LogVerbosity.ToString().ToLowerInvariant(),
|
||||
traceSamplingRate = status.TelemetrySettings.TraceSamplingRate
|
||||
},
|
||||
debugCaptureSettings = new
|
||||
{
|
||||
captureActive = status.DebugCaptureSettings.CaptureActive,
|
||||
captureHeapDumps = status.DebugCaptureSettings.CaptureHeapDumps,
|
||||
captureThreadDumps = status.DebugCaptureSettings.CaptureThreadDumps
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async Task<IResult> HandleActivateIncidentMode(
|
||||
string runId,
|
||||
[FromBody] ActivateIncidentModeRequest? request,
|
||||
[FromHeader(Name = "X-Tenant-ID")] string? tenantId,
|
||||
IPackRunIncidentModeService incidentModeService,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(runId))
|
||||
{
|
||||
return Results.BadRequest(new { error = "runId is required." });
|
||||
}
|
||||
|
||||
var level = Enum.TryParse<IncidentEscalationLevel>(request?.Level, ignoreCase: true, out var parsedLevel)
|
||||
? parsedLevel
|
||||
: IncidentEscalationLevel.Medium;
|
||||
|
||||
var activationRequest = new IncidentModeActivationRequest(
|
||||
RunId: runId,
|
||||
TenantId: tenantId ?? "default",
|
||||
Level: level,
|
||||
Source: StellaOps.TaskRunner.Core.IncidentMode.IncidentModeSource.Manual,
|
||||
Reason: request?.Reason ?? "Manual activation via API",
|
||||
DurationMinutes: request?.DurationMinutes,
|
||||
RequestedBy: request?.RequestedBy);
|
||||
|
||||
var result = await incidentModeService.ActivateAsync(activationRequest, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (!result.Success)
|
||||
{
|
||||
return Results.BadRequest(new { error = result.Error });
|
||||
}
|
||||
|
||||
return Results.Ok(new
|
||||
{
|
||||
success = result.Success,
|
||||
active = result.Status.Active,
|
||||
level = result.Status.Level.ToString().ToLowerInvariant(),
|
||||
activatedAt = result.Status.ActivatedAt?.ToString("O"),
|
||||
expiresAt = result.Status.ExpiresAt?.ToString("O")
|
||||
});
|
||||
}
|
||||
|
||||
async Task<IResult> HandleDeactivateIncidentMode(
|
||||
string runId,
|
||||
[FromBody] DeactivateIncidentModeRequest? request,
|
||||
IPackRunIncidentModeService incidentModeService,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(runId))
|
||||
{
|
||||
return Results.BadRequest(new { error = "runId is required." });
|
||||
}
|
||||
|
||||
var result = await incidentModeService.DeactivateAsync(runId, request?.Reason, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
return Results.Ok(new
|
||||
{
|
||||
success = result.Success,
|
||||
active = result.Status.Active
|
||||
});
|
||||
}
|
||||
|
||||
async Task<IResult> HandleEscalateIncidentMode(
|
||||
string runId,
|
||||
[FromBody] EscalateIncidentModeRequest? request,
|
||||
IPackRunIncidentModeService incidentModeService,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(runId))
|
||||
{
|
||||
return Results.BadRequest(new { error = "runId is required." });
|
||||
}
|
||||
|
||||
if (request is null || string.IsNullOrWhiteSpace(request.Level))
|
||||
{
|
||||
return Results.BadRequest(new { error = "Level is required for escalation." });
|
||||
}
|
||||
|
||||
if (!Enum.TryParse<IncidentEscalationLevel>(request.Level, ignoreCase: true, out var newLevel))
|
||||
{
|
||||
return Results.BadRequest(new { error = $"Invalid escalation level: {request.Level}" });
|
||||
}
|
||||
|
||||
var result = await incidentModeService.EscalateAsync(runId, newLevel, request.Reason, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (!result.Success)
|
||||
{
|
||||
return Results.BadRequest(new { error = result.Error });
|
||||
}
|
||||
|
||||
return Results.Ok(new
|
||||
{
|
||||
success = result.Success,
|
||||
level = result.Status.Level.ToString().ToLowerInvariant()
|
||||
});
|
||||
}
|
||||
|
||||
async Task<IResult> HandleSloBreachWebhook(
|
||||
[FromBody] SloBreachNotification notification,
|
||||
IPackRunIncidentModeService incidentModeService,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (notification is null)
|
||||
{
|
||||
return Results.BadRequest(new { error = "Notification body is required." });
|
||||
}
|
||||
|
||||
var result = await incidentModeService.HandleSloBreachAsync(notification, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (!result.Success)
|
||||
{
|
||||
return Results.BadRequest(new { error = result.Error });
|
||||
}
|
||||
|
||||
return Results.Ok(new
|
||||
{
|
||||
success = result.Success,
|
||||
runId = notification.ResourceId,
|
||||
level = result.Status.Level.ToString().ToLowerInvariant(),
|
||||
activatedAt = result.Status.ActivatedAt?.ToString("O")
|
||||
});
|
||||
}
|
||||
|
||||
app.Run();
|
||||
|
||||
static IDictionary<string, JsonNode?>? ConvertInputs(JsonObject? node)
|
||||
@@ -712,6 +902,17 @@ internal sealed record VerifyAttestationRequest(
|
||||
|
||||
internal sealed record VerifyAttestationSubject(string Name, IReadOnlyDictionary<string, string>? Digest);
|
||||
|
||||
// Incident mode API request models (TASKRUN-OBS-55-001)
|
||||
internal sealed record ActivateIncidentModeRequest(
|
||||
string? Level,
|
||||
string? Reason,
|
||||
int? DurationMinutes,
|
||||
string? RequestedBy);
|
||||
|
||||
internal sealed record DeactivateIncidentModeRequest(string? Reason);
|
||||
|
||||
internal sealed record EscalateIncidentModeRequest(string Level, string? Reason);
|
||||
|
||||
internal sealed record SimulationResponse(
|
||||
string PlanHash,
|
||||
FailurePolicyResponse FailurePolicy,
|
||||
|
||||
Reference in New Issue
Block a user