up
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
SDK Publish & Sign / sdk-publish (push) Has been cancelled
sdk-generator-smoke / sdk-smoke (push) Has been cancelled
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
SDK Publish & Sign / sdk-publish (push) Has been cancelled
sdk-generator-smoke / sdk-smoke (push) Has been cancelled
This commit is contained in:
@@ -0,0 +1,185 @@
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.Notifier.Worker.DeadLetter;
|
||||
|
||||
/// <summary>
|
||||
/// Service for managing dead-letter entries for failed notification deliveries.
|
||||
/// </summary>
|
||||
public interface IDeadLetterService
|
||||
{
|
||||
/// <summary>
|
||||
/// Enqueues a failed delivery to the dead-letter queue.
|
||||
/// </summary>
|
||||
Task<DeadLetterEntry> EnqueueAsync(
|
||||
DeadLetterEnqueueRequest request,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Retrieves a dead-letter entry by ID.
|
||||
/// </summary>
|
||||
Task<DeadLetterEntry?> GetAsync(
|
||||
string tenantId,
|
||||
string entryId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Lists dead-letter entries with optional filtering.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<DeadLetterEntry>> ListAsync(
|
||||
string tenantId,
|
||||
DeadLetterListOptions? options = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Retries a dead-letter entry.
|
||||
/// </summary>
|
||||
Task<DeadLetterRetryResult> RetryAsync(
|
||||
string tenantId,
|
||||
string entryId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Retries multiple dead-letter entries.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<DeadLetterRetryResult>> RetryBatchAsync(
|
||||
string tenantId,
|
||||
IEnumerable<string> entryIds,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Marks a dead-letter entry as resolved/dismissed.
|
||||
/// </summary>
|
||||
Task ResolveAsync(
|
||||
string tenantId,
|
||||
string entryId,
|
||||
string resolution,
|
||||
string? resolvedBy = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Deletes old dead-letter entries based on retention policy.
|
||||
/// </summary>
|
||||
Task<int> PurgeExpiredAsync(
|
||||
string tenantId,
|
||||
TimeSpan maxAge,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets statistics about dead-letter entries.
|
||||
/// </summary>
|
||||
Task<DeadLetterStats> GetStatsAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Request to enqueue a dead-letter entry.
|
||||
/// </summary>
|
||||
public sealed record DeadLetterEnqueueRequest
|
||||
{
|
||||
public required string TenantId { get; init; }
|
||||
public required string DeliveryId { get; init; }
|
||||
public required string EventId { get; init; }
|
||||
public required string ChannelId { get; init; }
|
||||
public required string ChannelType { get; init; }
|
||||
public required string FailureReason { get; init; }
|
||||
public string? FailureDetails { get; init; }
|
||||
public int AttemptCount { get; init; }
|
||||
public DateTimeOffset? LastAttemptAt { get; init; }
|
||||
public IReadOnlyDictionary<string, string>? Metadata { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Original payload for retry purposes.
|
||||
/// </summary>
|
||||
public string? OriginalPayload { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// A dead-letter queue entry.
|
||||
/// </summary>
|
||||
public sealed record DeadLetterEntry
|
||||
{
|
||||
public required string EntryId { get; init; }
|
||||
public required string TenantId { get; init; }
|
||||
public required string DeliveryId { get; init; }
|
||||
public required string EventId { get; init; }
|
||||
public required string ChannelId { get; init; }
|
||||
public required string ChannelType { get; init; }
|
||||
public required string FailureReason { get; init; }
|
||||
public string? FailureDetails { get; init; }
|
||||
public required int AttemptCount { get; init; }
|
||||
public required DateTimeOffset CreatedAt { get; init; }
|
||||
public DateTimeOffset? LastAttemptAt { get; init; }
|
||||
public required DeadLetterStatus Status { get; init; }
|
||||
public int RetryCount { get; init; }
|
||||
public DateTimeOffset? LastRetryAt { get; init; }
|
||||
public string? Resolution { get; init; }
|
||||
public string? ResolvedBy { get; init; }
|
||||
public DateTimeOffset? ResolvedAt { get; init; }
|
||||
public ImmutableDictionary<string, string> Metadata { get; init; } = ImmutableDictionary<string, string>.Empty;
|
||||
public string? OriginalPayload { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Status of a dead-letter entry.
|
||||
/// </summary>
|
||||
public enum DeadLetterStatus
|
||||
{
|
||||
/// <summary>Entry is pending retry or resolution.</summary>
|
||||
Pending,
|
||||
|
||||
/// <summary>Entry is being retried.</summary>
|
||||
Retrying,
|
||||
|
||||
/// <summary>Entry was successfully retried.</summary>
|
||||
Retried,
|
||||
|
||||
/// <summary>Entry was manually resolved/dismissed.</summary>
|
||||
Resolved,
|
||||
|
||||
/// <summary>Entry exceeded max retries.</summary>
|
||||
Exhausted
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Options for listing dead-letter entries.
|
||||
/// </summary>
|
||||
public sealed record DeadLetterListOptions
|
||||
{
|
||||
public DeadLetterStatus? Status { get; init; }
|
||||
public string? ChannelId { get; init; }
|
||||
public string? ChannelType { get; init; }
|
||||
public DateTimeOffset? Since { get; init; }
|
||||
public DateTimeOffset? Until { get; init; }
|
||||
public int Limit { get; init; } = 50;
|
||||
public int Offset { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of a dead-letter retry attempt.
|
||||
/// </summary>
|
||||
public sealed record DeadLetterRetryResult
|
||||
{
|
||||
public required string EntryId { get; init; }
|
||||
public required bool Success { get; init; }
|
||||
public string? Error { get; init; }
|
||||
public DateTimeOffset? RetriedAt { get; init; }
|
||||
public string? NewDeliveryId { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Statistics about dead-letter entries.
|
||||
/// </summary>
|
||||
public sealed record DeadLetterStats
|
||||
{
|
||||
public required int TotalCount { get; init; }
|
||||
public required int PendingCount { get; init; }
|
||||
public required int RetryingCount { get; init; }
|
||||
public required int RetriedCount { get; init; }
|
||||
public required int ResolvedCount { get; init; }
|
||||
public required int ExhaustedCount { get; init; }
|
||||
public required IReadOnlyDictionary<string, int> ByChannel { get; init; }
|
||||
public required IReadOnlyDictionary<string, int> ByReason { get; init; }
|
||||
public DateTimeOffset? OldestEntryAt { get; init; }
|
||||
public DateTimeOffset? NewestEntryAt { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,294 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Immutable;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Notifier.Worker.Observability;
|
||||
|
||||
namespace StellaOps.Notifier.Worker.DeadLetter;
|
||||
|
||||
/// <summary>
|
||||
/// In-memory implementation of dead-letter service.
|
||||
/// For production, use a persistent storage implementation.
|
||||
/// </summary>
|
||||
public sealed class InMemoryDeadLetterService : IDeadLetterService
|
||||
{
|
||||
private readonly ConcurrentDictionary<string, DeadLetterEntry> _entries = new();
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly INotifyMetrics? _metrics;
|
||||
private readonly ILogger<InMemoryDeadLetterService> _logger;
|
||||
|
||||
public InMemoryDeadLetterService(
|
||||
TimeProvider timeProvider,
|
||||
ILogger<InMemoryDeadLetterService> logger,
|
||||
INotifyMetrics? metrics = null)
|
||||
{
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_metrics = metrics;
|
||||
}
|
||||
|
||||
public Task<DeadLetterEntry> EnqueueAsync(
|
||||
DeadLetterEnqueueRequest request,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
var entryId = Guid.NewGuid().ToString("N");
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
|
||||
var entry = new DeadLetterEntry
|
||||
{
|
||||
EntryId = entryId,
|
||||
TenantId = request.TenantId,
|
||||
DeliveryId = request.DeliveryId,
|
||||
EventId = request.EventId,
|
||||
ChannelId = request.ChannelId,
|
||||
ChannelType = request.ChannelType,
|
||||
FailureReason = request.FailureReason,
|
||||
FailureDetails = request.FailureDetails,
|
||||
AttemptCount = request.AttemptCount,
|
||||
CreatedAt = now,
|
||||
LastAttemptAt = request.LastAttemptAt ?? now,
|
||||
Status = DeadLetterStatus.Pending,
|
||||
Metadata = request.Metadata?.ToImmutableDictionary() ?? ImmutableDictionary<string, string>.Empty,
|
||||
OriginalPayload = request.OriginalPayload
|
||||
};
|
||||
|
||||
_entries[GetKey(request.TenantId, entryId)] = entry;
|
||||
|
||||
_metrics?.RecordDeadLetter(request.TenantId, request.FailureReason, request.ChannelType);
|
||||
|
||||
_logger.LogWarning(
|
||||
"Dead-lettered delivery {DeliveryId} for tenant {TenantId}: {Reason}",
|
||||
request.DeliveryId, request.TenantId, request.FailureReason);
|
||||
|
||||
return Task.FromResult(entry);
|
||||
}
|
||||
|
||||
public Task<DeadLetterEntry?> GetAsync(
|
||||
string tenantId,
|
||||
string entryId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(entryId);
|
||||
|
||||
_entries.TryGetValue(GetKey(tenantId, entryId), out var entry);
|
||||
return Task.FromResult(entry);
|
||||
}
|
||||
|
||||
public Task<IReadOnlyList<DeadLetterEntry>> ListAsync(
|
||||
string tenantId,
|
||||
DeadLetterListOptions? options = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
options ??= new DeadLetterListOptions();
|
||||
|
||||
var query = _entries.Values
|
||||
.Where(e => e.TenantId == tenantId);
|
||||
|
||||
if (options.Status.HasValue)
|
||||
{
|
||||
query = query.Where(e => e.Status == options.Status.Value);
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(options.ChannelId))
|
||||
{
|
||||
query = query.Where(e => e.ChannelId == options.ChannelId);
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(options.ChannelType))
|
||||
{
|
||||
query = query.Where(e => e.ChannelType == options.ChannelType);
|
||||
}
|
||||
|
||||
if (options.Since.HasValue)
|
||||
{
|
||||
query = query.Where(e => e.CreatedAt >= options.Since.Value);
|
||||
}
|
||||
|
||||
if (options.Until.HasValue)
|
||||
{
|
||||
query = query.Where(e => e.CreatedAt <= options.Until.Value);
|
||||
}
|
||||
|
||||
var result = query
|
||||
.OrderByDescending(e => e.CreatedAt)
|
||||
.Skip(options.Offset)
|
||||
.Take(options.Limit)
|
||||
.ToArray();
|
||||
|
||||
return Task.FromResult<IReadOnlyList<DeadLetterEntry>>(result);
|
||||
}
|
||||
|
||||
public Task<DeadLetterRetryResult> RetryAsync(
|
||||
string tenantId,
|
||||
string entryId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(entryId);
|
||||
|
||||
var key = GetKey(tenantId, entryId);
|
||||
if (!_entries.TryGetValue(key, out var entry))
|
||||
{
|
||||
return Task.FromResult(new DeadLetterRetryResult
|
||||
{
|
||||
EntryId = entryId,
|
||||
Success = false,
|
||||
Error = "Entry not found"
|
||||
});
|
||||
}
|
||||
|
||||
if (entry.Status is DeadLetterStatus.Retried or DeadLetterStatus.Resolved)
|
||||
{
|
||||
return Task.FromResult(new DeadLetterRetryResult
|
||||
{
|
||||
EntryId = entryId,
|
||||
Success = false,
|
||||
Error = $"Entry is already {entry.Status}"
|
||||
});
|
||||
}
|
||||
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
|
||||
// Update entry status
|
||||
var updatedEntry = entry with
|
||||
{
|
||||
Status = DeadLetterStatus.Retried,
|
||||
RetryCount = entry.RetryCount + 1,
|
||||
LastRetryAt = now
|
||||
};
|
||||
|
||||
_entries[key] = updatedEntry;
|
||||
|
||||
_logger.LogInformation(
|
||||
"Retried dead-letter entry {EntryId} for tenant {TenantId}",
|
||||
entryId, tenantId);
|
||||
|
||||
// In a real implementation, this would re-queue the delivery
|
||||
return Task.FromResult(new DeadLetterRetryResult
|
||||
{
|
||||
EntryId = entryId,
|
||||
Success = true,
|
||||
RetriedAt = now,
|
||||
NewDeliveryId = Guid.NewGuid().ToString("N")
|
||||
});
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<DeadLetterRetryResult>> RetryBatchAsync(
|
||||
string tenantId,
|
||||
IEnumerable<string> entryIds,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
ArgumentNullException.ThrowIfNull(entryIds);
|
||||
|
||||
var results = new List<DeadLetterRetryResult>();
|
||||
foreach (var entryId in entryIds)
|
||||
{
|
||||
var result = await RetryAsync(tenantId, entryId, cancellationToken).ConfigureAwait(false);
|
||||
results.Add(result);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
public Task ResolveAsync(
|
||||
string tenantId,
|
||||
string entryId,
|
||||
string resolution,
|
||||
string? resolvedBy = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(entryId);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(resolution);
|
||||
|
||||
var key = GetKey(tenantId, entryId);
|
||||
if (_entries.TryGetValue(key, out var entry))
|
||||
{
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
_entries[key] = entry with
|
||||
{
|
||||
Status = DeadLetterStatus.Resolved,
|
||||
Resolution = resolution,
|
||||
ResolvedBy = resolvedBy,
|
||||
ResolvedAt = now
|
||||
};
|
||||
|
||||
_logger.LogInformation(
|
||||
"Resolved dead-letter entry {EntryId} for tenant {TenantId}: {Resolution}",
|
||||
entryId, tenantId, resolution);
|
||||
}
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public Task<int> PurgeExpiredAsync(
|
||||
string tenantId,
|
||||
TimeSpan maxAge,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
|
||||
var cutoff = _timeProvider.GetUtcNow() - maxAge;
|
||||
var toRemove = _entries
|
||||
.Where(kv => kv.Value.TenantId == tenantId && kv.Value.CreatedAt < cutoff)
|
||||
.Select(kv => kv.Key)
|
||||
.ToArray();
|
||||
|
||||
var count = 0;
|
||||
foreach (var key in toRemove)
|
||||
{
|
||||
if (_entries.TryRemove(key, out _))
|
||||
{
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (count > 0)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Purged {Count} expired dead-letter entries for tenant {TenantId}",
|
||||
count, tenantId);
|
||||
}
|
||||
|
||||
return Task.FromResult(count);
|
||||
}
|
||||
|
||||
public Task<DeadLetterStats> GetStatsAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
|
||||
var entries = _entries.Values.Where(e => e.TenantId == tenantId).ToArray();
|
||||
|
||||
var byChannel = entries
|
||||
.GroupBy(e => e.ChannelType)
|
||||
.ToDictionary(g => g.Key, g => g.Count());
|
||||
|
||||
var byReason = entries
|
||||
.GroupBy(e => e.FailureReason)
|
||||
.ToDictionary(g => g.Key, g => g.Count());
|
||||
|
||||
var stats = new DeadLetterStats
|
||||
{
|
||||
TotalCount = entries.Length,
|
||||
PendingCount = entries.Count(e => e.Status == DeadLetterStatus.Pending),
|
||||
RetryingCount = entries.Count(e => e.Status == DeadLetterStatus.Retrying),
|
||||
RetriedCount = entries.Count(e => e.Status == DeadLetterStatus.Retried),
|
||||
ResolvedCount = entries.Count(e => e.Status == DeadLetterStatus.Resolved),
|
||||
ExhaustedCount = entries.Count(e => e.Status == DeadLetterStatus.Exhausted),
|
||||
ByChannel = byChannel,
|
||||
ByReason = byReason,
|
||||
OldestEntryAt = entries.MinBy(e => e.CreatedAt)?.CreatedAt,
|
||||
NewestEntryAt = entries.MaxBy(e => e.CreatedAt)?.CreatedAt
|
||||
};
|
||||
|
||||
return Task.FromResult(stats);
|
||||
}
|
||||
|
||||
private static string GetKey(string tenantId, string entryId) => $"{tenantId}:{entryId}";
|
||||
}
|
||||
Reference in New Issue
Block a user