Concelier: - Register Topology.Read, Topology.Manage, Topology.Admin authorization policies mapped to OrchRead/OrchOperate/PlatformContextRead/IntegrationWrite scopes. Previously these policies were referenced by endpoints but never registered, causing System.InvalidOperationException on every topology API call. Gateway routes: - Simplified targets/environments routes (removed specific sub-path routes, use catch-all patterns instead) - Changed environments base route to JobEngine (where CRUD lives) - Changed to ReverseProxy type for all topology routes KNOWN ISSUE (not yet fixed): - ReverseProxy routes don't forward the gateway's identity envelope to Concelier. The regions/targets/bindings endpoints return 401 because hasPrincipal=False — the gateway authenticates the user but doesn't pass the identity to the backend via ReverseProxy. Microservice routes use Valkey transport which includes envelope headers. Topology endpoints need either: (a) Valkey transport registration in Concelier, or (b) Concelier configured to accept raw bearer tokens on ReverseProxy paths. This is an architecture-level fix. Journey findings collected so far: - Integration wizard (Harbor + GitHub App): works end-to-end - Advisory Check All: fixed (parallel individual checks) - Mirror domain creation: works, generate-immediately fails silently - Topology wizard Step 1 (Region): blocked by auth passthrough issue - Topology wizard Step 2 (Environment): POST to JobEngine needs verify - User ID resolution: raw hashes shown everywhere Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
818 lines
29 KiB
C#
818 lines
29 KiB
C#
|
|
using Microsoft.AspNetCore.Mvc;
|
|
using Npgsql;
|
|
using StellaOps.Auth.ServerIntegration.Tenancy;
|
|
using StellaOps.JobEngine.Core.DeadLetter;
|
|
using StellaOps.JobEngine.Core.Domain;
|
|
using StellaOps.JobEngine.WebService.Services;
|
|
using System;
|
|
using System.Globalization;
|
|
using System.Text;
|
|
using static StellaOps.Localization.T;
|
|
|
|
namespace StellaOps.JobEngine.WebService.Endpoints;
|
|
|
|
/// <summary>
|
|
/// REST API endpoints for dead-letter store.
|
|
/// </summary>
|
|
public static class DeadLetterEndpoints
|
|
{
|
|
/// <summary>
|
|
/// Maps dead-letter endpoints to the route builder.
|
|
/// </summary>
|
|
public static RouteGroupBuilder MapDeadLetterEndpoints(this IEndpointRouteBuilder app)
|
|
{
|
|
var group = app.MapGroup("/api/v1/jobengine/deadletter")
|
|
.WithTags("Orchestrator Dead-Letter")
|
|
.RequireAuthorization(JobEnginePolicies.Read)
|
|
.RequireTenant();
|
|
|
|
// Entry management
|
|
group.MapGet(string.Empty, ListEntries)
|
|
.WithName("Orchestrator_ListDeadLetterEntries")
|
|
.WithDescription(_t("orchestrator.dead_letter.list_description"));
|
|
|
|
group.MapGet("{entryId:guid}", GetEntry)
|
|
.WithName("Orchestrator_GetDeadLetterEntry")
|
|
.WithDescription(_t("orchestrator.dead_letter.get_description"));
|
|
|
|
group.MapGet("by-job/{jobId:guid}", GetEntryByJobId)
|
|
.WithName("Orchestrator_GetDeadLetterEntryByJobId")
|
|
.WithDescription(_t("orchestrator.dead_letter.get_by_job_description"));
|
|
|
|
group.MapGet("stats", GetStats)
|
|
.WithName("Orchestrator_GetDeadLetterStats")
|
|
.WithDescription(_t("orchestrator.dead_letter.stats_description"));
|
|
|
|
group.MapGet("export", ExportEntries)
|
|
.WithName("Orchestrator_ExportDeadLetterEntries")
|
|
.WithDescription(_t("orchestrator.dead_letter.export_description"));
|
|
|
|
group.MapGet("summary", GetActionableSummary)
|
|
.WithName("Orchestrator_GetDeadLetterSummary")
|
|
.WithDescription(_t("orchestrator.dead_letter.summary_description"));
|
|
|
|
// Replay operations
|
|
group.MapPost("{entryId:guid}/replay", ReplayEntry)
|
|
.WithName("Orchestrator_ReplayDeadLetterEntry")
|
|
.WithDescription(_t("orchestrator.dead_letter.replay_description"))
|
|
.RequireAuthorization(JobEnginePolicies.Operate);
|
|
|
|
group.MapPost("replay/batch", ReplayBatch)
|
|
.WithName("Orchestrator_ReplayDeadLetterBatch")
|
|
.WithDescription(_t("orchestrator.dead_letter.replay_batch_description"))
|
|
.RequireAuthorization(JobEnginePolicies.Operate);
|
|
|
|
group.MapPost("replay/pending", ReplayPending)
|
|
.WithName("Orchestrator_ReplayPendingDeadLetters")
|
|
.WithDescription(_t("orchestrator.dead_letter.replay_pending_description"))
|
|
.RequireAuthorization(JobEnginePolicies.Operate);
|
|
|
|
// Resolution
|
|
group.MapPost("{entryId:guid}/resolve", ResolveEntry)
|
|
.WithName("Orchestrator_ResolveDeadLetterEntry")
|
|
.WithDescription(_t("orchestrator.dead_letter.resolve_description"))
|
|
.RequireAuthorization(JobEnginePolicies.Operate);
|
|
|
|
group.MapPost("resolve/batch", ResolveBatch)
|
|
.WithName("Orchestrator_ResolveDeadLetterBatch")
|
|
.WithDescription(_t("orchestrator.dead_letter.resolve_batch_description"))
|
|
.RequireAuthorization(JobEnginePolicies.Operate);
|
|
|
|
// Error classification reference
|
|
group.MapGet("error-codes", ListErrorCodes)
|
|
.WithName("Orchestrator_ListDeadLetterErrorCodes")
|
|
.WithDescription(_t("orchestrator.dead_letter.error_codes_description"));
|
|
|
|
// Audit
|
|
group.MapGet("{entryId:guid}/audit", GetReplayAudit)
|
|
.WithName("Orchestrator_GetDeadLetterReplayAudit")
|
|
.WithDescription(_t("orchestrator.dead_letter.replay_audit_description"));
|
|
|
|
return group;
|
|
}
|
|
|
|
private static async Task<IResult> ListEntries(
|
|
HttpContext context,
|
|
[FromServices] TenantResolver tenantResolver,
|
|
[FromServices] IDeadLetterRepository repository,
|
|
[FromQuery] string? status = null,
|
|
[FromQuery] string? category = null,
|
|
[FromQuery] string? jobType = null,
|
|
[FromQuery] string? errorCode = null,
|
|
[FromQuery] Guid? sourceId = null,
|
|
[FromQuery] Guid? runId = null,
|
|
[FromQuery] bool? isRetryable = null,
|
|
[FromQuery] string? createdAfter = null,
|
|
[FromQuery] string? createdBefore = null,
|
|
[FromQuery] int? limit = null,
|
|
[FromQuery] string? cursor = null,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
try
|
|
{
|
|
var tenantId = tenantResolver.Resolve(context);
|
|
var effectiveLimit = EndpointHelpers.GetLimit(limit);
|
|
|
|
var options = new DeadLetterListOptions(
|
|
Status: TryParseDeadLetterStatus(status),
|
|
Category: TryParseErrorCategory(category),
|
|
JobType: jobType,
|
|
ErrorCode: errorCode,
|
|
SourceId: sourceId,
|
|
RunId: runId,
|
|
IsRetryable: isRetryable,
|
|
CreatedAfter: EndpointHelpers.TryParseDateTimeOffset(createdAfter),
|
|
CreatedBefore: EndpointHelpers.TryParseDateTimeOffset(createdBefore),
|
|
Cursor: cursor,
|
|
Limit: effectiveLimit);
|
|
|
|
var entries = await repository.ListAsync(tenantId, options, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
|
|
var totalCount = await repository.CountAsync(tenantId, options, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
|
|
var responses = entries.Select(DeadLetterEntryResponse.FromDomain).ToList();
|
|
var nextCursor = entries.Count >= effectiveLimit
|
|
? entries.Last().CreatedAt.ToString("O", CultureInfo.InvariantCulture)
|
|
: null;
|
|
|
|
return Results.Ok(new DeadLetterListResponse(responses, nextCursor, totalCount));
|
|
}
|
|
catch (InvalidOperationException ex)
|
|
{
|
|
return Results.BadRequest(new { error = ex.Message });
|
|
}
|
|
catch (PostgresException ex) when (IsMissingDeadLetterTable(ex))
|
|
{
|
|
return Results.Ok(new DeadLetterListResponse(new List<DeadLetterEntryResponse>(), null, 0));
|
|
}
|
|
}
|
|
|
|
private static async Task<IResult> GetEntry(
|
|
HttpContext context,
|
|
[FromRoute] Guid entryId,
|
|
[FromServices] TenantResolver tenantResolver,
|
|
[FromServices] IDeadLetterRepository repository,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
try
|
|
{
|
|
var tenantId = tenantResolver.Resolve(context);
|
|
var entry = await repository.GetByIdAsync(tenantId, entryId, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
|
|
if (entry is null)
|
|
{
|
|
return Results.NotFound();
|
|
}
|
|
|
|
return Results.Ok(DeadLetterEntryDetailResponse.FromDomain(entry));
|
|
}
|
|
catch (InvalidOperationException ex)
|
|
{
|
|
return Results.BadRequest(new { error = ex.Message });
|
|
}
|
|
catch (PostgresException ex) when (IsMissingDeadLetterTable(ex))
|
|
{
|
|
return Results.NotFound();
|
|
}
|
|
}
|
|
|
|
private static async Task<IResult> GetEntryByJobId(
|
|
HttpContext context,
|
|
[FromRoute] Guid jobId,
|
|
[FromServices] TenantResolver tenantResolver,
|
|
[FromServices] IDeadLetterRepository repository,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
try
|
|
{
|
|
var tenantId = tenantResolver.Resolve(context);
|
|
var entry = await repository.GetByOriginalJobIdAsync(tenantId, jobId, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
|
|
if (entry is null)
|
|
{
|
|
return Results.NotFound();
|
|
}
|
|
|
|
return Results.Ok(DeadLetterEntryDetailResponse.FromDomain(entry));
|
|
}
|
|
catch (InvalidOperationException ex)
|
|
{
|
|
return Results.BadRequest(new { error = ex.Message });
|
|
}
|
|
catch (PostgresException ex) when (IsMissingDeadLetterTable(ex))
|
|
{
|
|
return Results.NotFound();
|
|
}
|
|
}
|
|
|
|
private static async Task<IResult> GetStats(
|
|
HttpContext context,
|
|
[FromServices] TenantResolver tenantResolver,
|
|
[FromServices] IDeadLetterRepository repository,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
try
|
|
{
|
|
var tenantId = tenantResolver.Resolve(context);
|
|
var stats = await repository.GetStatsAsync(tenantId, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
|
|
return Results.Ok(DeadLetterStatsResponse.FromDomain(stats));
|
|
}
|
|
catch (InvalidOperationException ex)
|
|
{
|
|
return Results.BadRequest(new { error = ex.Message });
|
|
}
|
|
catch (PostgresException ex) when (IsMissingDeadLetterTable(ex))
|
|
{
|
|
return Results.Ok(DeadLetterStatsResponse.FromDomain(CreateEmptyStats()));
|
|
}
|
|
}
|
|
|
|
private static async Task<IResult> ExportEntries(
|
|
HttpContext context,
|
|
[FromServices] TenantResolver tenantResolver,
|
|
[FromServices] IDeadLetterRepository repository,
|
|
[FromQuery] string? status = null,
|
|
[FromQuery] string? category = null,
|
|
[FromQuery] string? jobType = null,
|
|
[FromQuery] string? errorCode = null,
|
|
[FromQuery] bool? isRetryable = null,
|
|
[FromQuery] int? limit = null,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
try
|
|
{
|
|
var tenantId = tenantResolver.Resolve(context);
|
|
var effectiveLimit = Math.Clamp(limit ?? 1000, 1, 10000);
|
|
|
|
var options = new DeadLetterListOptions(
|
|
Status: TryParseDeadLetterStatus(status),
|
|
Category: TryParseErrorCategory(category),
|
|
JobType: jobType,
|
|
ErrorCode: errorCode,
|
|
IsRetryable: isRetryable,
|
|
Limit: effectiveLimit);
|
|
|
|
var entries = await repository.ListAsync(tenantId, options, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
|
|
var csv = BuildDeadLetterCsv(entries);
|
|
var payload = Encoding.UTF8.GetBytes(csv);
|
|
var fileName = $"deadletter-export-{DateTime.UtcNow:yyyyMMdd-HHmmss}.csv";
|
|
|
|
return Results.File(payload, "text/csv", fileName);
|
|
}
|
|
catch (InvalidOperationException ex)
|
|
{
|
|
return Results.BadRequest(new { error = ex.Message });
|
|
}
|
|
catch (PostgresException ex) when (IsMissingDeadLetterTable(ex))
|
|
{
|
|
var payload = Encoding.UTF8.GetBytes(BuildDeadLetterCsv(Array.Empty<DeadLetterEntry>()));
|
|
var fileName = $"deadletter-export-{DateTime.UtcNow:yyyyMMdd-HHmmss}.csv";
|
|
return Results.File(payload, "text/csv", fileName);
|
|
}
|
|
}
|
|
|
|
private static async Task<IResult> GetActionableSummary(
|
|
HttpContext context,
|
|
[FromServices] TenantResolver tenantResolver,
|
|
[FromServices] IDeadLetterRepository repository,
|
|
[FromQuery] int? limit = null,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
try
|
|
{
|
|
var tenantId = tenantResolver.Resolve(context);
|
|
var effectiveLimit = Math.Clamp(limit ?? 10, 1, 50);
|
|
|
|
var summaries = await repository.GetActionableSummaryAsync(tenantId, effectiveLimit, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
|
|
return Results.Ok(new DeadLetterSummaryListResponse(
|
|
summaries.Select(s => new DeadLetterSummaryResponse(
|
|
s.ErrorCode,
|
|
s.Category.ToString(),
|
|
s.EntryCount,
|
|
s.RetryableCount,
|
|
s.OldestEntry,
|
|
s.SampleReason)).ToList()));
|
|
}
|
|
catch (InvalidOperationException ex)
|
|
{
|
|
return Results.BadRequest(new { error = ex.Message });
|
|
}
|
|
catch (PostgresException ex) when (IsMissingDeadLetterTable(ex))
|
|
{
|
|
return Results.Ok(new DeadLetterSummaryListResponse(new List<DeadLetterSummaryResponse>()));
|
|
}
|
|
}
|
|
|
|
private static async Task<IResult> ReplayEntry(
|
|
HttpContext context,
|
|
[FromRoute] Guid entryId,
|
|
[FromServices] TenantResolver tenantResolver,
|
|
[FromServices] IReplayManager replayManager,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
try
|
|
{
|
|
var tenantId = tenantResolver.Resolve(context);
|
|
var user = GetCurrentUser(context);
|
|
|
|
var result = await replayManager.ReplayAsync(tenantId, entryId, user, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
|
|
if (!result.Success)
|
|
{
|
|
return Results.UnprocessableEntity(new { error = result.ErrorMessage });
|
|
}
|
|
|
|
return Results.Ok(new ReplayResultResponse(
|
|
result.Success,
|
|
result.NewJobId,
|
|
result.ErrorMessage,
|
|
DeadLetterEntryResponse.FromDomain(result.UpdatedEntry)));
|
|
}
|
|
catch (InvalidOperationException ex)
|
|
{
|
|
return Results.BadRequest(new { error = ex.Message });
|
|
}
|
|
}
|
|
|
|
private static async Task<IResult> ReplayBatch(
|
|
HttpContext context,
|
|
[FromBody] ReplayBatchRequest request,
|
|
[FromServices] TenantResolver tenantResolver,
|
|
[FromServices] IReplayManager replayManager,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
try
|
|
{
|
|
var tenantId = tenantResolver.Resolve(context);
|
|
var user = GetCurrentUser(context);
|
|
|
|
var result = await replayManager.ReplayBatchAsync(tenantId, request.EntryIds, user, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
|
|
return Results.Ok(new BatchReplayResultResponse(
|
|
result.Attempted,
|
|
result.Succeeded,
|
|
result.Failed,
|
|
result.Results.Select(r => new ReplayResultResponse(
|
|
r.Success,
|
|
r.NewJobId,
|
|
r.ErrorMessage,
|
|
r.UpdatedEntry is not null ? DeadLetterEntryResponse.FromDomain(r.UpdatedEntry) : null)).ToList()));
|
|
}
|
|
catch (ArgumentException ex)
|
|
{
|
|
return Results.BadRequest(new { error = ex.Message });
|
|
}
|
|
catch (InvalidOperationException ex)
|
|
{
|
|
return Results.BadRequest(new { error = ex.Message });
|
|
}
|
|
}
|
|
|
|
private static async Task<IResult> ReplayPending(
|
|
HttpContext context,
|
|
[FromBody] ReplayPendingRequest request,
|
|
[FromServices] TenantResolver tenantResolver,
|
|
[FromServices] IReplayManager replayManager,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
try
|
|
{
|
|
var tenantId = tenantResolver.Resolve(context);
|
|
var user = GetCurrentUser(context);
|
|
|
|
var result = await replayManager.ReplayPendingAsync(
|
|
tenantId,
|
|
request.ErrorCode,
|
|
TryParseErrorCategory(request.Category),
|
|
request.MaxCount ?? 100,
|
|
user,
|
|
cancellationToken).ConfigureAwait(false);
|
|
|
|
return Results.Ok(new BatchReplayResultResponse(
|
|
result.Attempted,
|
|
result.Succeeded,
|
|
result.Failed,
|
|
result.Results.Select(r => new ReplayResultResponse(
|
|
r.Success,
|
|
r.NewJobId,
|
|
r.ErrorMessage,
|
|
r.UpdatedEntry is not null ? DeadLetterEntryResponse.FromDomain(r.UpdatedEntry) : null)).ToList()));
|
|
}
|
|
catch (InvalidOperationException ex)
|
|
{
|
|
return Results.BadRequest(new { error = ex.Message });
|
|
}
|
|
}
|
|
|
|
private static async Task<IResult> ResolveEntry(
|
|
HttpContext context,
|
|
[FromRoute] Guid entryId,
|
|
[FromBody] ResolveEntryRequest request,
|
|
[FromServices] TenantResolver tenantResolver,
|
|
[FromServices] IReplayManager replayManager,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
try
|
|
{
|
|
var tenantId = tenantResolver.Resolve(context);
|
|
var user = GetCurrentUser(context);
|
|
|
|
var entry = await replayManager.ResolveAsync(tenantId, entryId, request.Notes, user, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
|
|
return Results.Ok(DeadLetterEntryResponse.FromDomain(entry));
|
|
}
|
|
catch (InvalidOperationException ex)
|
|
{
|
|
return Results.BadRequest(new { error = ex.Message });
|
|
}
|
|
}
|
|
|
|
private static async Task<IResult> ResolveBatch(
|
|
HttpContext context,
|
|
[FromBody] ResolveBatchRequest request,
|
|
[FromServices] TenantResolver tenantResolver,
|
|
[FromServices] IReplayManager replayManager,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
try
|
|
{
|
|
var tenantId = tenantResolver.Resolve(context);
|
|
var user = GetCurrentUser(context);
|
|
|
|
var count = await replayManager.ResolveBatchAsync(
|
|
tenantId, request.EntryIds, request.Notes, user, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
|
|
return Results.Ok(new { resolvedCount = count });
|
|
}
|
|
catch (InvalidOperationException ex)
|
|
{
|
|
return Results.BadRequest(new { error = ex.Message });
|
|
}
|
|
}
|
|
|
|
private static Task<IResult> ListErrorCodes(
|
|
[FromServices] IErrorClassifier classifier,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
// Return the known error codes with their classifications
|
|
var errorCodes = new[]
|
|
{
|
|
// Transient errors
|
|
DefaultErrorClassifier.ErrorCodes.NetworkTimeout,
|
|
DefaultErrorClassifier.ErrorCodes.ConnectionRefused,
|
|
DefaultErrorClassifier.ErrorCodes.DnsResolutionFailed,
|
|
DefaultErrorClassifier.ErrorCodes.ServiceUnavailable,
|
|
DefaultErrorClassifier.ErrorCodes.GatewayTimeout,
|
|
// Not found errors
|
|
DefaultErrorClassifier.ErrorCodes.ImageNotFound,
|
|
DefaultErrorClassifier.ErrorCodes.SourceNotFound,
|
|
DefaultErrorClassifier.ErrorCodes.RegistryNotFound,
|
|
// Auth errors
|
|
DefaultErrorClassifier.ErrorCodes.InvalidCredentials,
|
|
DefaultErrorClassifier.ErrorCodes.TokenExpired,
|
|
DefaultErrorClassifier.ErrorCodes.InsufficientPermissions,
|
|
// Rate limit errors
|
|
DefaultErrorClassifier.ErrorCodes.RateLimited,
|
|
DefaultErrorClassifier.ErrorCodes.QuotaExceeded,
|
|
// Validation errors
|
|
DefaultErrorClassifier.ErrorCodes.InvalidPayload,
|
|
DefaultErrorClassifier.ErrorCodes.InvalidConfiguration,
|
|
// Upstream errors
|
|
DefaultErrorClassifier.ErrorCodes.RegistryError,
|
|
DefaultErrorClassifier.ErrorCodes.AdvisoryFeedError,
|
|
// Internal errors
|
|
DefaultErrorClassifier.ErrorCodes.InternalError,
|
|
DefaultErrorClassifier.ErrorCodes.ProcessingError
|
|
};
|
|
|
|
var responses = errorCodes.Select(code =>
|
|
{
|
|
var classified = classifier.Classify(code, string.Empty);
|
|
return new ErrorCodeResponse(
|
|
classified.ErrorCode,
|
|
classified.Category.ToString(),
|
|
classified.Description,
|
|
classified.RemediationHint,
|
|
classified.IsRetryable,
|
|
classified.SuggestedRetryDelay?.TotalSeconds);
|
|
}).ToList();
|
|
|
|
return Task.FromResult(Results.Ok(new ErrorCodeListResponse(responses)));
|
|
}
|
|
|
|
private static async Task<IResult> GetReplayAudit(
|
|
HttpContext context,
|
|
[FromRoute] Guid entryId,
|
|
[FromServices] TenantResolver tenantResolver,
|
|
[FromServices] IReplayAuditRepository auditRepository,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
try
|
|
{
|
|
var tenantId = tenantResolver.Resolve(context);
|
|
var audits = await auditRepository.GetByEntryAsync(tenantId, entryId, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
|
|
var responses = audits.Select(a => new ReplayAuditResponse(
|
|
a.AuditId,
|
|
a.EntryId,
|
|
a.AttemptNumber,
|
|
a.Success,
|
|
a.NewJobId,
|
|
a.ErrorMessage,
|
|
a.TriggeredBy,
|
|
a.TriggeredAt,
|
|
a.CompletedAt,
|
|
a.InitiatedBy)).ToList();
|
|
|
|
return Results.Ok(new ReplayAuditListResponse(responses));
|
|
}
|
|
catch (InvalidOperationException ex)
|
|
{
|
|
return Results.BadRequest(new { error = ex.Message });
|
|
}
|
|
}
|
|
|
|
private static DeadLetterStatus? TryParseDeadLetterStatus(string? value) =>
|
|
string.IsNullOrWhiteSpace(value) ? null :
|
|
Enum.TryParse<DeadLetterStatus>(value, ignoreCase: true, out var status) ? status : null;
|
|
|
|
private static ErrorCategory? TryParseErrorCategory(string? value) =>
|
|
string.IsNullOrWhiteSpace(value) ? null :
|
|
Enum.TryParse<ErrorCategory>(value, ignoreCase: true, out var category) ? category : null;
|
|
|
|
private static string GetCurrentUser(HttpContext context) =>
|
|
context.User?.Identity?.Name ?? "anonymous";
|
|
|
|
private static bool IsMissingDeadLetterTable(PostgresException exception) =>
|
|
string.Equals(exception.SqlState, "42P01", StringComparison.Ordinal)
|
|
|| string.Equals(exception.SqlState, "25P02", StringComparison.Ordinal);
|
|
|
|
private static DeadLetterStats CreateEmptyStats() =>
|
|
new(
|
|
TotalEntries: 0,
|
|
PendingEntries: 0,
|
|
ReplayingEntries: 0,
|
|
ReplayedEntries: 0,
|
|
ResolvedEntries: 0,
|
|
ExhaustedEntries: 0,
|
|
ExpiredEntries: 0,
|
|
RetryableEntries: 0,
|
|
ByCategory: new Dictionary<ErrorCategory, long>(),
|
|
TopErrorCodes: new Dictionary<string, long>(),
|
|
TopJobTypes: new Dictionary<string, long>());
|
|
|
|
private static string BuildDeadLetterCsv(IReadOnlyList<DeadLetterEntry> entries)
|
|
{
|
|
var builder = new StringBuilder();
|
|
builder.AppendLine("entryId,jobId,status,errorCode,category,retryable,replayAttempts,maxReplayAttempts,failedAt,createdAt,resolvedAt,reason");
|
|
|
|
foreach (var entry in entries)
|
|
{
|
|
builder.Append(EscapeCsv(entry.EntryId.ToString())).Append(',');
|
|
builder.Append(EscapeCsv(entry.OriginalJobId.ToString())).Append(',');
|
|
builder.Append(EscapeCsv(entry.Status.ToString())).Append(',');
|
|
builder.Append(EscapeCsv(entry.ErrorCode)).Append(',');
|
|
builder.Append(EscapeCsv(entry.Category.ToString())).Append(',');
|
|
builder.Append(EscapeCsv(entry.IsRetryable.ToString(CultureInfo.InvariantCulture))).Append(',');
|
|
builder.Append(EscapeCsv(entry.ReplayAttempts.ToString(CultureInfo.InvariantCulture))).Append(',');
|
|
builder.Append(EscapeCsv(entry.MaxReplayAttempts.ToString(CultureInfo.InvariantCulture))).Append(',');
|
|
builder.Append(EscapeCsv(entry.FailedAt.ToString("O", CultureInfo.InvariantCulture))).Append(',');
|
|
builder.Append(EscapeCsv(entry.CreatedAt.ToString("O", CultureInfo.InvariantCulture))).Append(',');
|
|
builder.Append(EscapeCsv(entry.ResolvedAt?.ToString("O", CultureInfo.InvariantCulture))).Append(',');
|
|
builder.Append(EscapeCsv(entry.FailureReason));
|
|
builder.AppendLine();
|
|
}
|
|
|
|
return builder.ToString();
|
|
}
|
|
|
|
private static string EscapeCsv(string? value)
|
|
{
|
|
if (string.IsNullOrEmpty(value))
|
|
{
|
|
return string.Empty;
|
|
}
|
|
|
|
return "\"" + value.Replace("\"", "\"\"", StringComparison.Ordinal) + "\"";
|
|
}
|
|
}
|
|
|
|
// Response DTOs
|
|
|
|
public sealed record DeadLetterEntryResponse(
|
|
Guid EntryId,
|
|
Guid OriginalJobId,
|
|
Guid? RunId,
|
|
Guid? SourceId,
|
|
string JobType,
|
|
string Status,
|
|
string ErrorCode,
|
|
string FailureReason,
|
|
string? RemediationHint,
|
|
string Category,
|
|
bool IsRetryable,
|
|
int OriginalAttempts,
|
|
int ReplayAttempts,
|
|
int MaxReplayAttempts,
|
|
bool CanReplay,
|
|
DateTimeOffset FailedAt,
|
|
DateTimeOffset CreatedAt,
|
|
DateTimeOffset ExpiresAt,
|
|
DateTimeOffset? ResolvedAt)
|
|
{
|
|
public static DeadLetterEntryResponse FromDomain(DeadLetterEntry entry) =>
|
|
new(
|
|
entry.EntryId,
|
|
entry.OriginalJobId,
|
|
entry.RunId,
|
|
entry.SourceId,
|
|
entry.JobType,
|
|
entry.Status.ToString(),
|
|
entry.ErrorCode,
|
|
entry.FailureReason,
|
|
entry.RemediationHint,
|
|
entry.Category.ToString(),
|
|
entry.IsRetryable,
|
|
entry.OriginalAttempts,
|
|
entry.ReplayAttempts,
|
|
entry.MaxReplayAttempts,
|
|
entry.CanReplay,
|
|
entry.FailedAt,
|
|
entry.CreatedAt,
|
|
entry.ExpiresAt,
|
|
entry.ResolvedAt);
|
|
}
|
|
|
|
public sealed record DeadLetterEntryDetailResponse(
|
|
Guid EntryId,
|
|
Guid OriginalJobId,
|
|
Guid? RunId,
|
|
Guid? SourceId,
|
|
string JobType,
|
|
string Payload,
|
|
string PayloadDigest,
|
|
string IdempotencyKey,
|
|
string? CorrelationId,
|
|
string Status,
|
|
string ErrorCode,
|
|
string FailureReason,
|
|
string? RemediationHint,
|
|
string Category,
|
|
bool IsRetryable,
|
|
int OriginalAttempts,
|
|
int ReplayAttempts,
|
|
int MaxReplayAttempts,
|
|
bool CanReplay,
|
|
DateTimeOffset FailedAt,
|
|
DateTimeOffset CreatedAt,
|
|
DateTimeOffset UpdatedAt,
|
|
DateTimeOffset ExpiresAt,
|
|
DateTimeOffset? ResolvedAt,
|
|
string? ResolutionNotes,
|
|
string CreatedBy,
|
|
string UpdatedBy)
|
|
{
|
|
public static DeadLetterEntryDetailResponse FromDomain(DeadLetterEntry entry) =>
|
|
new(
|
|
entry.EntryId,
|
|
entry.OriginalJobId,
|
|
entry.RunId,
|
|
entry.SourceId,
|
|
entry.JobType,
|
|
entry.Payload,
|
|
entry.PayloadDigest,
|
|
entry.IdempotencyKey,
|
|
entry.CorrelationId,
|
|
entry.Status.ToString(),
|
|
entry.ErrorCode,
|
|
entry.FailureReason,
|
|
entry.RemediationHint,
|
|
entry.Category.ToString(),
|
|
entry.IsRetryable,
|
|
entry.OriginalAttempts,
|
|
entry.ReplayAttempts,
|
|
entry.MaxReplayAttempts,
|
|
entry.CanReplay,
|
|
entry.FailedAt,
|
|
entry.CreatedAt,
|
|
entry.UpdatedAt,
|
|
entry.ExpiresAt,
|
|
entry.ResolvedAt,
|
|
entry.ResolutionNotes,
|
|
entry.CreatedBy,
|
|
entry.UpdatedBy);
|
|
}
|
|
|
|
public sealed record DeadLetterListResponse(
|
|
IReadOnlyList<DeadLetterEntryResponse> Entries,
|
|
string? NextCursor,
|
|
long TotalCount);
|
|
|
|
public sealed record DeadLetterStatsResponse(
|
|
long TotalEntries,
|
|
long PendingEntries,
|
|
long ReplayingEntries,
|
|
long ReplayedEntries,
|
|
long ResolvedEntries,
|
|
long ExhaustedEntries,
|
|
long ExpiredEntries,
|
|
long RetryableEntries,
|
|
IDictionary<string, long> ByCategory,
|
|
IDictionary<string, long> TopErrorCodes,
|
|
IDictionary<string, long> TopJobTypes)
|
|
{
|
|
public static DeadLetterStatsResponse FromDomain(DeadLetterStats stats) =>
|
|
new(
|
|
stats.TotalEntries,
|
|
stats.PendingEntries,
|
|
stats.ReplayingEntries,
|
|
stats.ReplayedEntries,
|
|
stats.ResolvedEntries,
|
|
stats.ExhaustedEntries,
|
|
stats.ExpiredEntries,
|
|
stats.RetryableEntries,
|
|
stats.ByCategory.ToDictionary(kv => kv.Key.ToString(), kv => kv.Value),
|
|
new Dictionary<string, long>(stats.TopErrorCodes),
|
|
new Dictionary<string, long>(stats.TopJobTypes));
|
|
}
|
|
|
|
public sealed record DeadLetterSummaryResponse(
|
|
string ErrorCode,
|
|
string Category,
|
|
long EntryCount,
|
|
long RetryableCount,
|
|
DateTimeOffset OldestEntry,
|
|
string? SampleReason);
|
|
|
|
public sealed record DeadLetterSummaryListResponse(
|
|
IReadOnlyList<DeadLetterSummaryResponse> Summaries);
|
|
|
|
public sealed record ReplayResultResponse(
|
|
bool Success,
|
|
Guid? NewJobId,
|
|
string? ErrorMessage,
|
|
DeadLetterEntryResponse? UpdatedEntry);
|
|
|
|
public sealed record BatchReplayResultResponse(
|
|
int Attempted,
|
|
int Succeeded,
|
|
int Failed,
|
|
IReadOnlyList<ReplayResultResponse> Results);
|
|
|
|
public sealed record ReplayBatchRequest(
|
|
IReadOnlyList<Guid> EntryIds);
|
|
|
|
public sealed record ReplayPendingRequest(
|
|
string? ErrorCode,
|
|
string? Category,
|
|
int? MaxCount);
|
|
|
|
public sealed record ResolveEntryRequest(
|
|
string Notes);
|
|
|
|
public sealed record ResolveBatchRequest(
|
|
IReadOnlyList<Guid> EntryIds,
|
|
string Notes);
|
|
|
|
public sealed record ErrorCodeResponse(
|
|
string ErrorCode,
|
|
string Category,
|
|
string Description,
|
|
string RemediationHint,
|
|
bool IsRetryable,
|
|
double? SuggestedRetryDelaySeconds);
|
|
|
|
public sealed record ErrorCodeListResponse(
|
|
IReadOnlyList<ErrorCodeResponse> ErrorCodes);
|
|
|
|
public sealed record ReplayAuditResponse(
|
|
Guid AuditId,
|
|
Guid EntryId,
|
|
int AttemptNumber,
|
|
bool Success,
|
|
Guid? NewJobId,
|
|
string? ErrorMessage,
|
|
string TriggeredBy,
|
|
DateTimeOffset TriggeredAt,
|
|
DateTimeOffset? CompletedAt,
|
|
string InitiatedBy);
|
|
|
|
public sealed record ReplayAuditListResponse(
|
|
IReadOnlyList<ReplayAuditResponse> Audits);
|