up
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
SDK Publish & Sign / sdk-publish (push) Has been cancelled
sdk-generator-smoke / sdk-smoke (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-11-27 08:51:10 +02:00
parent ea970ead2a
commit c34fb7256d
126 changed files with 18553 additions and 693 deletions

View File

@@ -0,0 +1,185 @@
using System.Collections.Immutable;
namespace StellaOps.Notifier.Worker.DeadLetter;
/// <summary>
/// Service for managing dead-letter entries for failed notification deliveries.
/// </summary>
public interface IDeadLetterService
{
/// <summary>
/// Enqueues a failed delivery to the dead-letter queue.
/// </summary>
Task<DeadLetterEntry> EnqueueAsync(
DeadLetterEnqueueRequest request,
CancellationToken cancellationToken = default);
/// <summary>
/// Retrieves a dead-letter entry by ID.
/// </summary>
Task<DeadLetterEntry?> GetAsync(
string tenantId,
string entryId,
CancellationToken cancellationToken = default);
/// <summary>
/// Lists dead-letter entries with optional filtering.
/// </summary>
Task<IReadOnlyList<DeadLetterEntry>> ListAsync(
string tenantId,
DeadLetterListOptions? options = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Retries a dead-letter entry.
/// </summary>
Task<DeadLetterRetryResult> RetryAsync(
string tenantId,
string entryId,
CancellationToken cancellationToken = default);
/// <summary>
/// Retries multiple dead-letter entries.
/// </summary>
Task<IReadOnlyList<DeadLetterRetryResult>> RetryBatchAsync(
string tenantId,
IEnumerable<string> entryIds,
CancellationToken cancellationToken = default);
/// <summary>
/// Marks a dead-letter entry as resolved/dismissed.
/// </summary>
Task ResolveAsync(
string tenantId,
string entryId,
string resolution,
string? resolvedBy = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Deletes old dead-letter entries based on retention policy.
/// </summary>
Task<int> PurgeExpiredAsync(
string tenantId,
TimeSpan maxAge,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets statistics about dead-letter entries.
/// </summary>
Task<DeadLetterStats> GetStatsAsync(
string tenantId,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Request to enqueue a dead-letter entry.
/// </summary>
public sealed record DeadLetterEnqueueRequest
{
public required string TenantId { get; init; }
public required string DeliveryId { get; init; }
public required string EventId { get; init; }
public required string ChannelId { get; init; }
public required string ChannelType { get; init; }
public required string FailureReason { get; init; }
public string? FailureDetails { get; init; }
public int AttemptCount { get; init; }
public DateTimeOffset? LastAttemptAt { get; init; }
public IReadOnlyDictionary<string, string>? Metadata { get; init; }
/// <summary>
/// Original payload for retry purposes.
/// </summary>
public string? OriginalPayload { get; init; }
}
/// <summary>
/// A dead-letter queue entry.
/// </summary>
public sealed record DeadLetterEntry
{
public required string EntryId { get; init; }
public required string TenantId { get; init; }
public required string DeliveryId { get; init; }
public required string EventId { get; init; }
public required string ChannelId { get; init; }
public required string ChannelType { get; init; }
public required string FailureReason { get; init; }
public string? FailureDetails { get; init; }
public required int AttemptCount { get; init; }
public required DateTimeOffset CreatedAt { get; init; }
public DateTimeOffset? LastAttemptAt { get; init; }
public required DeadLetterStatus Status { get; init; }
public int RetryCount { get; init; }
public DateTimeOffset? LastRetryAt { get; init; }
public string? Resolution { get; init; }
public string? ResolvedBy { get; init; }
public DateTimeOffset? ResolvedAt { get; init; }
public ImmutableDictionary<string, string> Metadata { get; init; } = ImmutableDictionary<string, string>.Empty;
public string? OriginalPayload { get; init; }
}
/// <summary>
/// Status of a dead-letter entry.
/// </summary>
public enum DeadLetterStatus
{
/// <summary>Entry is pending retry or resolution.</summary>
Pending,
/// <summary>Entry is being retried.</summary>
Retrying,
/// <summary>Entry was successfully retried.</summary>
Retried,
/// <summary>Entry was manually resolved/dismissed.</summary>
Resolved,
/// <summary>Entry exceeded max retries.</summary>
Exhausted
}
/// <summary>
/// Options for listing dead-letter entries.
/// </summary>
public sealed record DeadLetterListOptions
{
public DeadLetterStatus? Status { get; init; }
public string? ChannelId { get; init; }
public string? ChannelType { get; init; }
public DateTimeOffset? Since { get; init; }
public DateTimeOffset? Until { get; init; }
public int Limit { get; init; } = 50;
public int Offset { get; init; }
}
/// <summary>
/// Result of a dead-letter retry attempt.
/// </summary>
public sealed record DeadLetterRetryResult
{
public required string EntryId { get; init; }
public required bool Success { get; init; }
public string? Error { get; init; }
public DateTimeOffset? RetriedAt { get; init; }
public string? NewDeliveryId { get; init; }
}
/// <summary>
/// Statistics about dead-letter entries.
/// </summary>
public sealed record DeadLetterStats
{
public required int TotalCount { get; init; }
public required int PendingCount { get; init; }
public required int RetryingCount { get; init; }
public required int RetriedCount { get; init; }
public required int ResolvedCount { get; init; }
public required int ExhaustedCount { get; init; }
public required IReadOnlyDictionary<string, int> ByChannel { get; init; }
public required IReadOnlyDictionary<string, int> ByReason { get; init; }
public DateTimeOffset? OldestEntryAt { get; init; }
public DateTimeOffset? NewestEntryAt { get; init; }
}

View File

@@ -0,0 +1,294 @@
using System.Collections.Concurrent;
using System.Collections.Immutable;
using Microsoft.Extensions.Logging;
using StellaOps.Notifier.Worker.Observability;
namespace StellaOps.Notifier.Worker.DeadLetter;
/// <summary>
/// In-memory implementation of dead-letter service.
/// For production, use a persistent storage implementation.
/// </summary>
public sealed class InMemoryDeadLetterService : IDeadLetterService
{
private readonly ConcurrentDictionary<string, DeadLetterEntry> _entries = new();
private readonly TimeProvider _timeProvider;
private readonly INotifyMetrics? _metrics;
private readonly ILogger<InMemoryDeadLetterService> _logger;
public InMemoryDeadLetterService(
TimeProvider timeProvider,
ILogger<InMemoryDeadLetterService> logger,
INotifyMetrics? metrics = null)
{
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_metrics = metrics;
}
public Task<DeadLetterEntry> EnqueueAsync(
DeadLetterEnqueueRequest request,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(request);
var entryId = Guid.NewGuid().ToString("N");
var now = _timeProvider.GetUtcNow();
var entry = new DeadLetterEntry
{
EntryId = entryId,
TenantId = request.TenantId,
DeliveryId = request.DeliveryId,
EventId = request.EventId,
ChannelId = request.ChannelId,
ChannelType = request.ChannelType,
FailureReason = request.FailureReason,
FailureDetails = request.FailureDetails,
AttemptCount = request.AttemptCount,
CreatedAt = now,
LastAttemptAt = request.LastAttemptAt ?? now,
Status = DeadLetterStatus.Pending,
Metadata = request.Metadata?.ToImmutableDictionary() ?? ImmutableDictionary<string, string>.Empty,
OriginalPayload = request.OriginalPayload
};
_entries[GetKey(request.TenantId, entryId)] = entry;
_metrics?.RecordDeadLetter(request.TenantId, request.FailureReason, request.ChannelType);
_logger.LogWarning(
"Dead-lettered delivery {DeliveryId} for tenant {TenantId}: {Reason}",
request.DeliveryId, request.TenantId, request.FailureReason);
return Task.FromResult(entry);
}
public Task<DeadLetterEntry?> GetAsync(
string tenantId,
string entryId,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentException.ThrowIfNullOrWhiteSpace(entryId);
_entries.TryGetValue(GetKey(tenantId, entryId), out var entry);
return Task.FromResult(entry);
}
public Task<IReadOnlyList<DeadLetterEntry>> ListAsync(
string tenantId,
DeadLetterListOptions? options = null,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
options ??= new DeadLetterListOptions();
var query = _entries.Values
.Where(e => e.TenantId == tenantId);
if (options.Status.HasValue)
{
query = query.Where(e => e.Status == options.Status.Value);
}
if (!string.IsNullOrWhiteSpace(options.ChannelId))
{
query = query.Where(e => e.ChannelId == options.ChannelId);
}
if (!string.IsNullOrWhiteSpace(options.ChannelType))
{
query = query.Where(e => e.ChannelType == options.ChannelType);
}
if (options.Since.HasValue)
{
query = query.Where(e => e.CreatedAt >= options.Since.Value);
}
if (options.Until.HasValue)
{
query = query.Where(e => e.CreatedAt <= options.Until.Value);
}
var result = query
.OrderByDescending(e => e.CreatedAt)
.Skip(options.Offset)
.Take(options.Limit)
.ToArray();
return Task.FromResult<IReadOnlyList<DeadLetterEntry>>(result);
}
public Task<DeadLetterRetryResult> RetryAsync(
string tenantId,
string entryId,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentException.ThrowIfNullOrWhiteSpace(entryId);
var key = GetKey(tenantId, entryId);
if (!_entries.TryGetValue(key, out var entry))
{
return Task.FromResult(new DeadLetterRetryResult
{
EntryId = entryId,
Success = false,
Error = "Entry not found"
});
}
if (entry.Status is DeadLetterStatus.Retried or DeadLetterStatus.Resolved)
{
return Task.FromResult(new DeadLetterRetryResult
{
EntryId = entryId,
Success = false,
Error = $"Entry is already {entry.Status}"
});
}
var now = _timeProvider.GetUtcNow();
// Update entry status
var updatedEntry = entry with
{
Status = DeadLetterStatus.Retried,
RetryCount = entry.RetryCount + 1,
LastRetryAt = now
};
_entries[key] = updatedEntry;
_logger.LogInformation(
"Retried dead-letter entry {EntryId} for tenant {TenantId}",
entryId, tenantId);
// In a real implementation, this would re-queue the delivery
return Task.FromResult(new DeadLetterRetryResult
{
EntryId = entryId,
Success = true,
RetriedAt = now,
NewDeliveryId = Guid.NewGuid().ToString("N")
});
}
public async Task<IReadOnlyList<DeadLetterRetryResult>> RetryBatchAsync(
string tenantId,
IEnumerable<string> entryIds,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentNullException.ThrowIfNull(entryIds);
var results = new List<DeadLetterRetryResult>();
foreach (var entryId in entryIds)
{
var result = await RetryAsync(tenantId, entryId, cancellationToken).ConfigureAwait(false);
results.Add(result);
}
return results;
}
public Task ResolveAsync(
string tenantId,
string entryId,
string resolution,
string? resolvedBy = null,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentException.ThrowIfNullOrWhiteSpace(entryId);
ArgumentException.ThrowIfNullOrWhiteSpace(resolution);
var key = GetKey(tenantId, entryId);
if (_entries.TryGetValue(key, out var entry))
{
var now = _timeProvider.GetUtcNow();
_entries[key] = entry with
{
Status = DeadLetterStatus.Resolved,
Resolution = resolution,
ResolvedBy = resolvedBy,
ResolvedAt = now
};
_logger.LogInformation(
"Resolved dead-letter entry {EntryId} for tenant {TenantId}: {Resolution}",
entryId, tenantId, resolution);
}
return Task.CompletedTask;
}
public Task<int> PurgeExpiredAsync(
string tenantId,
TimeSpan maxAge,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
var cutoff = _timeProvider.GetUtcNow() - maxAge;
var toRemove = _entries
.Where(kv => kv.Value.TenantId == tenantId && kv.Value.CreatedAt < cutoff)
.Select(kv => kv.Key)
.ToArray();
var count = 0;
foreach (var key in toRemove)
{
if (_entries.TryRemove(key, out _))
{
count++;
}
}
if (count > 0)
{
_logger.LogInformation(
"Purged {Count} expired dead-letter entries for tenant {TenantId}",
count, tenantId);
}
return Task.FromResult(count);
}
public Task<DeadLetterStats> GetStatsAsync(
string tenantId,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
var entries = _entries.Values.Where(e => e.TenantId == tenantId).ToArray();
var byChannel = entries
.GroupBy(e => e.ChannelType)
.ToDictionary(g => g.Key, g => g.Count());
var byReason = entries
.GroupBy(e => e.FailureReason)
.ToDictionary(g => g.Key, g => g.Count());
var stats = new DeadLetterStats
{
TotalCount = entries.Length,
PendingCount = entries.Count(e => e.Status == DeadLetterStatus.Pending),
RetryingCount = entries.Count(e => e.Status == DeadLetterStatus.Retrying),
RetriedCount = entries.Count(e => e.Status == DeadLetterStatus.Retried),
ResolvedCount = entries.Count(e => e.Status == DeadLetterStatus.Resolved),
ExhaustedCount = entries.Count(e => e.Status == DeadLetterStatus.Exhausted),
ByChannel = byChannel,
ByReason = byReason,
OldestEntryAt = entries.MinBy(e => e.CreatedAt)?.CreatedAt,
NewestEntryAt = entries.MaxBy(e => e.CreatedAt)?.CreatedAt
};
return Task.FromResult(stats);
}
private static string GetKey(string tenantId, string entryId) => $"{tenantId}:{entryId}";
}