Restructure solution layout by module
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
# StellaOps.Scheduler.Queue — Agent Charter
|
||||
|
||||
## Mission
|
||||
Provide queue abstraction (Redis Streams / NATS JetStream) for planner inputs and runner segments per `docs/ARCHITECTURE_SCHEDULER.md`.
|
||||
@@ -0,0 +1,3 @@
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
[assembly: InternalsVisibleTo("StellaOps.Scheduler.Queue.Tests")]
|
||||
@@ -0,0 +1,9 @@
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue;
|
||||
|
||||
internal interface ISchedulerQueueTransportDiagnostics
|
||||
{
|
||||
ValueTask PingAsync(CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Nats;
|
||||
|
||||
internal interface INatsSchedulerQueuePayload<TMessage>
|
||||
{
|
||||
string QueueName { get; }
|
||||
|
||||
string GetIdempotencyKey(TMessage message);
|
||||
|
||||
byte[] Serialize(TMessage message);
|
||||
|
||||
TMessage Deserialize(byte[] payload);
|
||||
|
||||
string GetRunId(TMessage message);
|
||||
|
||||
string GetTenantId(TMessage message);
|
||||
|
||||
string? GetScheduleId(TMessage message);
|
||||
|
||||
string? GetSegmentId(TMessage message);
|
||||
|
||||
string? GetCorrelationId(TMessage message);
|
||||
|
||||
IReadOnlyDictionary<string, string>? GetAttributes(TMessage message);
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using NATS.Client.Core;
|
||||
using NATS.Client.JetStream;
|
||||
using StellaOps.Scheduler.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Nats;
|
||||
|
||||
internal sealed class NatsSchedulerPlannerQueue
|
||||
: NatsSchedulerQueueBase<PlannerQueueMessage>, ISchedulerPlannerQueue
|
||||
{
|
||||
public NatsSchedulerPlannerQueue(
|
||||
SchedulerQueueOptions queueOptions,
|
||||
SchedulerNatsQueueOptions natsOptions,
|
||||
ILogger<NatsSchedulerPlannerQueue> logger,
|
||||
TimeProvider timeProvider,
|
||||
Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>>? connectionFactory = null)
|
||||
: base(
|
||||
queueOptions,
|
||||
natsOptions,
|
||||
natsOptions.Planner,
|
||||
PlannerPayload.Instance,
|
||||
logger,
|
||||
timeProvider,
|
||||
connectionFactory)
|
||||
{
|
||||
}
|
||||
|
||||
private sealed class PlannerPayload : INatsSchedulerQueuePayload<PlannerQueueMessage>
|
||||
{
|
||||
public static PlannerPayload Instance { get; } = new();
|
||||
|
||||
public string QueueName => "planner";
|
||||
|
||||
public string GetIdempotencyKey(PlannerQueueMessage message)
|
||||
=> message.IdempotencyKey;
|
||||
|
||||
public byte[] Serialize(PlannerQueueMessage message)
|
||||
=> Encoding.UTF8.GetBytes(CanonicalJsonSerializer.Serialize(message));
|
||||
|
||||
public PlannerQueueMessage Deserialize(byte[] payload)
|
||||
=> CanonicalJsonSerializer.Deserialize<PlannerQueueMessage>(Encoding.UTF8.GetString(payload));
|
||||
|
||||
public string GetRunId(PlannerQueueMessage message)
|
||||
=> message.Run.Id;
|
||||
|
||||
public string GetTenantId(PlannerQueueMessage message)
|
||||
=> message.Run.TenantId;
|
||||
|
||||
public string? GetScheduleId(PlannerQueueMessage message)
|
||||
=> message.ScheduleId;
|
||||
|
||||
public string? GetSegmentId(PlannerQueueMessage message)
|
||||
=> null;
|
||||
|
||||
public string? GetCorrelationId(PlannerQueueMessage message)
|
||||
=> message.CorrelationId;
|
||||
|
||||
public IReadOnlyDictionary<string, string>? GetAttributes(PlannerQueueMessage message)
|
||||
=> null;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,692 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Collections.ObjectModel;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using NATS.Client.Core;
|
||||
using NATS.Client.JetStream;
|
||||
using NATS.Client.JetStream.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Nats;
|
||||
|
||||
internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMessage>, IAsyncDisposable, ISchedulerQueueTransportDiagnostics
|
||||
{
|
||||
private const string TransportName = "nats";
|
||||
|
||||
private static readonly INatsSerializer<byte[]> PayloadSerializer = NatsRawSerializer<byte[]>.Default;
|
||||
|
||||
private readonly SchedulerQueueOptions _queueOptions;
|
||||
private readonly SchedulerNatsQueueOptions _natsOptions;
|
||||
private readonly SchedulerNatsStreamOptions _streamOptions;
|
||||
private readonly INatsSchedulerQueuePayload<TMessage> _payload;
|
||||
private readonly ILogger _logger;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly SemaphoreSlim _connectionGate = new(1, 1);
|
||||
private readonly Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>> _connectionFactory;
|
||||
|
||||
private NatsConnection? _connection;
|
||||
private NatsJSContext? _jsContext;
|
||||
private INatsJSConsumer? _consumer;
|
||||
private bool _disposed;
|
||||
private long _approximateDepth;
|
||||
|
||||
protected NatsSchedulerQueueBase(
|
||||
SchedulerQueueOptions queueOptions,
|
||||
SchedulerNatsQueueOptions natsOptions,
|
||||
SchedulerNatsStreamOptions streamOptions,
|
||||
INatsSchedulerQueuePayload<TMessage> payload,
|
||||
ILogger logger,
|
||||
TimeProvider timeProvider,
|
||||
Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>>? connectionFactory = null)
|
||||
{
|
||||
_queueOptions = queueOptions ?? throw new ArgumentNullException(nameof(queueOptions));
|
||||
_natsOptions = natsOptions ?? throw new ArgumentNullException(nameof(natsOptions));
|
||||
_streamOptions = streamOptions ?? throw new ArgumentNullException(nameof(streamOptions));
|
||||
_payload = payload ?? throw new ArgumentNullException(nameof(payload));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
_connectionFactory = connectionFactory ?? ((opts, cancellationToken) => new ValueTask<NatsConnection>(new NatsConnection(opts)));
|
||||
|
||||
if (string.IsNullOrWhiteSpace(_natsOptions.Url))
|
||||
{
|
||||
throw new InvalidOperationException("NATS connection URL must be configured for the scheduler queue.");
|
||||
}
|
||||
}
|
||||
|
||||
public async ValueTask<SchedulerQueueEnqueueResult> EnqueueAsync(
|
||||
TMessage message,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(message);
|
||||
|
||||
var js = await GetJetStreamAsync(cancellationToken).ConfigureAwait(false);
|
||||
await EnsureStreamAndConsumerAsync(js, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var payloadBytes = _payload.Serialize(message);
|
||||
var idempotencyKey = _payload.GetIdempotencyKey(message);
|
||||
var headers = BuildHeaders(message, idempotencyKey);
|
||||
|
||||
var publishOptions = new NatsJSPubOpts
|
||||
{
|
||||
MsgId = idempotencyKey,
|
||||
RetryAttempts = 0
|
||||
};
|
||||
|
||||
var ack = await js.PublishAsync(
|
||||
_streamOptions.Subject,
|
||||
payloadBytes,
|
||||
PayloadSerializer,
|
||||
publishOptions,
|
||||
headers,
|
||||
cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (ack.Duplicate)
|
||||
{
|
||||
SchedulerQueueMetrics.RecordDeduplicated(TransportName, _payload.QueueName);
|
||||
_logger.LogDebug(
|
||||
"Duplicate enqueue detected for scheduler {Queue} message idempotency key {Key}; sequence {Sequence} reused.",
|
||||
_payload.QueueName,
|
||||
idempotencyKey,
|
||||
ack.Seq);
|
||||
|
||||
PublishDepth();
|
||||
return new SchedulerQueueEnqueueResult(ack.Seq.ToString(), true);
|
||||
}
|
||||
|
||||
SchedulerQueueMetrics.RecordEnqueued(TransportName, _payload.QueueName);
|
||||
_logger.LogDebug(
|
||||
"Enqueued scheduler {Queue} message into stream {Stream} with sequence {Sequence}.",
|
||||
_payload.QueueName,
|
||||
ack.Stream,
|
||||
ack.Seq);
|
||||
|
||||
IncrementDepth();
|
||||
return new SchedulerQueueEnqueueResult(ack.Seq.ToString(), false);
|
||||
}
|
||||
|
||||
public async ValueTask<IReadOnlyList<ISchedulerQueueLease<TMessage>>> LeaseAsync(
|
||||
SchedulerQueueLeaseRequest request,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
var js = await GetJetStreamAsync(cancellationToken).ConfigureAwait(false);
|
||||
var consumer = await EnsureStreamAndConsumerAsync(js, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var fetchOpts = new NatsJSFetchOpts
|
||||
{
|
||||
MaxMsgs = request.BatchSize,
|
||||
Expires = request.LeaseDuration,
|
||||
IdleHeartbeat = _natsOptions.IdleHeartbeat
|
||||
};
|
||||
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var leases = new List<ISchedulerQueueLease<TMessage>>(request.BatchSize);
|
||||
|
||||
await foreach (var message in consumer.FetchAsync(PayloadSerializer, fetchOpts, cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
var lease = CreateLease(message, request.Consumer, now, request.LeaseDuration);
|
||||
if (lease is not null)
|
||||
{
|
||||
leases.Add(lease);
|
||||
}
|
||||
}
|
||||
|
||||
PublishDepth();
|
||||
return leases;
|
||||
}
|
||||
|
||||
public async ValueTask<IReadOnlyList<ISchedulerQueueLease<TMessage>>> ClaimExpiredAsync(
|
||||
SchedulerQueueClaimOptions options,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
|
||||
var js = await GetJetStreamAsync(cancellationToken).ConfigureAwait(false);
|
||||
var consumer = await EnsureStreamAndConsumerAsync(js, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var fetchOpts = new NatsJSFetchOpts
|
||||
{
|
||||
MaxMsgs = options.BatchSize,
|
||||
Expires = options.MinIdleTime,
|
||||
IdleHeartbeat = _natsOptions.IdleHeartbeat
|
||||
};
|
||||
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var leases = new List<ISchedulerQueueLease<TMessage>>(options.BatchSize);
|
||||
|
||||
await foreach (var message in consumer.FetchAsync(PayloadSerializer, fetchOpts, cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
var deliveries = (int)(message.Metadata?.NumDelivered ?? 1);
|
||||
if (deliveries <= 1)
|
||||
{
|
||||
await message.NakAsync(new AckOpts(), TimeSpan.Zero, cancellationToken).ConfigureAwait(false);
|
||||
continue;
|
||||
}
|
||||
|
||||
var lease = CreateLease(message, options.ClaimantConsumer, now, _queueOptions.DefaultLeaseDuration);
|
||||
if (lease is not null)
|
||||
{
|
||||
leases.Add(lease);
|
||||
}
|
||||
}
|
||||
|
||||
PublishDepth();
|
||||
return leases;
|
||||
}
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
if (_disposed)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_disposed = true;
|
||||
|
||||
if (_connection is not null)
|
||||
{
|
||||
await _connection.DisposeAsync().ConfigureAwait(false);
|
||||
}
|
||||
|
||||
_connectionGate.Dispose();
|
||||
SchedulerQueueMetrics.RemoveDepth(TransportName, _payload.QueueName);
|
||||
GC.SuppressFinalize(this);
|
||||
}
|
||||
|
||||
public async ValueTask PingAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var connection = await EnsureConnectionAsync(cancellationToken).ConfigureAwait(false);
|
||||
await connection.PingAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
internal async Task AcknowledgeAsync(NatsSchedulerQueueLease<TMessage> lease, CancellationToken cancellationToken)
|
||||
{
|
||||
if (!lease.TryBeginCompletion())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
await lease.RawMessage.AckAsync(new AckOpts(), cancellationToken).ConfigureAwait(false);
|
||||
SchedulerQueueMetrics.RecordAck(TransportName, _payload.QueueName);
|
||||
DecrementDepth();
|
||||
}
|
||||
|
||||
internal async Task RenewAsync(NatsSchedulerQueueLease<TMessage> lease, TimeSpan leaseDuration, CancellationToken cancellationToken)
|
||||
{
|
||||
await lease.RawMessage.AckProgressAsync(new AckOpts(), cancellationToken).ConfigureAwait(false);
|
||||
lease.RefreshLease(_timeProvider.GetUtcNow().Add(leaseDuration));
|
||||
}
|
||||
|
||||
internal async Task ReleaseAsync(NatsSchedulerQueueLease<TMessage> lease, SchedulerQueueReleaseDisposition disposition, CancellationToken cancellationToken)
|
||||
{
|
||||
if (disposition == SchedulerQueueReleaseDisposition.Retry && lease.Attempt >= _queueOptions.MaxDeliveryAttempts)
|
||||
{
|
||||
await DeadLetterAsync(lease, $"max-delivery-attempts:{lease.Attempt}", cancellationToken).ConfigureAwait(false);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!lease.TryBeginCompletion())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (disposition == SchedulerQueueReleaseDisposition.Retry)
|
||||
{
|
||||
SchedulerQueueMetrics.RecordRetry(TransportName, _payload.QueueName);
|
||||
var delay = CalculateBackoff(lease.Attempt + 1);
|
||||
lease.IncrementAttempt();
|
||||
await lease.RawMessage.NakAsync(new AckOpts(), delay, cancellationToken).ConfigureAwait(false);
|
||||
_logger.LogWarning(
|
||||
"Requeued scheduler {Queue} message {RunId} with delay {Delay} (attempt {Attempt}).",
|
||||
_payload.QueueName,
|
||||
lease.RunId,
|
||||
delay,
|
||||
lease.Attempt);
|
||||
}
|
||||
else
|
||||
{
|
||||
await lease.RawMessage.AckTerminateAsync(new AckOpts(), cancellationToken).ConfigureAwait(false);
|
||||
SchedulerQueueMetrics.RecordAck(TransportName, _payload.QueueName);
|
||||
DecrementDepth();
|
||||
_logger.LogInformation(
|
||||
"Abandoned scheduler {Queue} message {RunId} after {Attempt} attempt(s).",
|
||||
_payload.QueueName,
|
||||
lease.RunId,
|
||||
lease.Attempt);
|
||||
}
|
||||
|
||||
PublishDepth();
|
||||
}
|
||||
|
||||
internal async Task DeadLetterAsync(NatsSchedulerQueueLease<TMessage> lease, string reason, CancellationToken cancellationToken)
|
||||
{
|
||||
if (!lease.TryBeginCompletion())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
await lease.RawMessage.AckAsync(new AckOpts(), cancellationToken).ConfigureAwait(false);
|
||||
DecrementDepth();
|
||||
|
||||
var js = await GetJetStreamAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (!_queueOptions.DeadLetterEnabled)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Dropped scheduler {Queue} message {RunId} after {Attempt} attempt(s); dead-letter disabled. Reason: {Reason}",
|
||||
_payload.QueueName,
|
||||
lease.RunId,
|
||||
lease.Attempt,
|
||||
reason);
|
||||
PublishDepth();
|
||||
return;
|
||||
}
|
||||
|
||||
await EnsureDeadLetterStreamAsync(js, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var headers = BuildDeadLetterHeaders(lease, reason);
|
||||
await js.PublishAsync(
|
||||
_streamOptions.DeadLetterSubject,
|
||||
lease.Payload,
|
||||
PayloadSerializer,
|
||||
new NatsJSPubOpts(),
|
||||
headers,
|
||||
cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
SchedulerQueueMetrics.RecordDeadLetter(TransportName, _payload.QueueName);
|
||||
_logger.LogError(
|
||||
"Dead-lettered scheduler {Queue} message {RunId} after {Attempt} attempt(s): {Reason}",
|
||||
_payload.QueueName,
|
||||
lease.RunId,
|
||||
lease.Attempt,
|
||||
reason);
|
||||
PublishDepth();
|
||||
}
|
||||
|
||||
private async Task<NatsJSContext> GetJetStreamAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
if (_jsContext is not null)
|
||||
{
|
||||
return _jsContext;
|
||||
}
|
||||
|
||||
var connection = await EnsureConnectionAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
await _connectionGate.WaitAsync(cancellationToken).ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
_jsContext ??= new NatsJSContext(connection);
|
||||
return _jsContext;
|
||||
}
|
||||
finally
|
||||
{
|
||||
_connectionGate.Release();
|
||||
}
|
||||
}
|
||||
|
||||
private async ValueTask<INatsJSConsumer> EnsureStreamAndConsumerAsync(NatsJSContext js, CancellationToken cancellationToken)
|
||||
{
|
||||
if (_consumer is not null)
|
||||
{
|
||||
return _consumer;
|
||||
}
|
||||
|
||||
await _connectionGate.WaitAsync(cancellationToken).ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
if (_consumer is not null)
|
||||
{
|
||||
return _consumer;
|
||||
}
|
||||
|
||||
await EnsureStreamAsync(js, cancellationToken).ConfigureAwait(false);
|
||||
await EnsureDeadLetterStreamAsync(js, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var consumerConfig = new ConsumerConfig
|
||||
{
|
||||
DurableName = _streamOptions.DurableConsumer,
|
||||
AckPolicy = ConsumerConfigAckPolicy.Explicit,
|
||||
ReplayPolicy = ConsumerConfigReplayPolicy.Instant,
|
||||
DeliverPolicy = ConsumerConfigDeliverPolicy.All,
|
||||
AckWait = ToNanoseconds(_streamOptions.AckWait),
|
||||
MaxAckPending = Math.Max(1, _streamOptions.MaxAckPending),
|
||||
MaxDeliver = Math.Max(1, _queueOptions.MaxDeliveryAttempts),
|
||||
FilterSubjects = new[] { _streamOptions.Subject }
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
_consumer = await js.CreateConsumerAsync(
|
||||
_streamOptions.Stream,
|
||||
consumerConfig,
|
||||
cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
catch (NatsJSApiException apiEx)
|
||||
{
|
||||
_logger.LogDebug(apiEx,
|
||||
"CreateConsumerAsync failed with code {Code}; attempting to reuse durable {Durable}.",
|
||||
apiEx.Error?.Code,
|
||||
_streamOptions.DurableConsumer);
|
||||
|
||||
_consumer = await js.GetConsumerAsync(
|
||||
_streamOptions.Stream,
|
||||
_streamOptions.DurableConsumer,
|
||||
cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
|
||||
return _consumer;
|
||||
}
|
||||
finally
|
||||
{
|
||||
_connectionGate.Release();
|
||||
}
|
||||
}
|
||||
|
||||
private async Task EnsureStreamAsync(NatsJSContext js, CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
await js.GetStreamAsync(
|
||||
_streamOptions.Stream,
|
||||
new StreamInfoRequest(),
|
||||
cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
catch (NatsJSApiException)
|
||||
{
|
||||
var config = new StreamConfig(
|
||||
name: _streamOptions.Stream,
|
||||
subjects: new[] { _streamOptions.Subject })
|
||||
{
|
||||
Retention = StreamConfigRetention.Workqueue,
|
||||
Storage = StreamConfigStorage.File,
|
||||
MaxConsumers = -1,
|
||||
MaxMsgs = -1,
|
||||
MaxBytes = -1,
|
||||
MaxAge = 0
|
||||
};
|
||||
|
||||
await js.CreateStreamAsync(config, cancellationToken).ConfigureAwait(false);
|
||||
_logger.LogInformation(
|
||||
"Created NATS JetStream stream {Stream} ({Subject}) for scheduler {Queue} queue.",
|
||||
_streamOptions.Stream,
|
||||
_streamOptions.Subject,
|
||||
_payload.QueueName);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task EnsureDeadLetterStreamAsync(NatsJSContext js, CancellationToken cancellationToken)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(_streamOptions.DeadLetterStream) || string.IsNullOrWhiteSpace(_streamOptions.DeadLetterSubject))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await js.GetStreamAsync(
|
||||
_streamOptions.DeadLetterStream,
|
||||
new StreamInfoRequest(),
|
||||
cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
catch (NatsJSApiException)
|
||||
{
|
||||
var config = new StreamConfig(
|
||||
name: _streamOptions.DeadLetterStream,
|
||||
subjects: new[] { _streamOptions.DeadLetterSubject })
|
||||
{
|
||||
Retention = StreamConfigRetention.Workqueue,
|
||||
Storage = StreamConfigStorage.File,
|
||||
MaxConsumers = -1,
|
||||
MaxMsgs = -1,
|
||||
MaxBytes = -1,
|
||||
MaxAge = 0
|
||||
};
|
||||
|
||||
await js.CreateStreamAsync(config, cancellationToken).ConfigureAwait(false);
|
||||
_logger.LogInformation(
|
||||
"Created NATS JetStream dead-letter stream {Stream} ({Subject}) for scheduler {Queue} queue.",
|
||||
_streamOptions.DeadLetterStream,
|
||||
_streamOptions.DeadLetterSubject,
|
||||
_payload.QueueName);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<NatsConnection> EnsureConnectionAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
if (_connection is not null)
|
||||
{
|
||||
return _connection;
|
||||
}
|
||||
|
||||
await _connectionGate.WaitAsync(cancellationToken).ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
if (_connection is not null)
|
||||
{
|
||||
return _connection;
|
||||
}
|
||||
|
||||
var options = new NatsOpts
|
||||
{
|
||||
Url = _natsOptions.Url!,
|
||||
Name = $"stellaops-scheduler-{_payload.QueueName}-queue",
|
||||
CommandTimeout = TimeSpan.FromSeconds(10),
|
||||
RequestTimeout = TimeSpan.FromSeconds(20),
|
||||
PingInterval = TimeSpan.FromSeconds(30)
|
||||
};
|
||||
|
||||
_connection = await _connectionFactory(options, cancellationToken).ConfigureAwait(false);
|
||||
await _connection.ConnectAsync().ConfigureAwait(false);
|
||||
return _connection;
|
||||
}
|
||||
finally
|
||||
{
|
||||
_connectionGate.Release();
|
||||
}
|
||||
}
|
||||
|
||||
private NatsSchedulerQueueLease<TMessage>? CreateLease(
|
||||
NatsJSMsg<byte[]> message,
|
||||
string consumer,
|
||||
DateTimeOffset now,
|
||||
TimeSpan leaseDuration)
|
||||
{
|
||||
var payload = message.Data ?? ReadOnlyMemory<byte>.Empty;
|
||||
if (payload.IsEmpty)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
TMessage deserialized;
|
||||
try
|
||||
{
|
||||
deserialized = _payload.Deserialize(payload.ToArray());
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to deserialize scheduler {Queue} payload from NATS sequence {Sequence}.", _payload.QueueName, message.Metadata?.Sequence);
|
||||
return null;
|
||||
}
|
||||
|
||||
var attempt = (int)(message.Metadata?.NumDelivered ?? 1);
|
||||
if (attempt <= 0)
|
||||
{
|
||||
attempt = 1;
|
||||
}
|
||||
|
||||
var headers = message.Headers ?? new NatsHeaders();
|
||||
|
||||
var enqueuedAt = headers.TryGetValue(SchedulerQueueFields.EnqueuedAt, out var enqueuedValues) && enqueuedValues.Count > 0
|
||||
&& long.TryParse(enqueuedValues[0], out var unix)
|
||||
? DateTimeOffset.FromUnixTimeMilliseconds(unix)
|
||||
: now;
|
||||
|
||||
var leaseExpires = now.Add(leaseDuration);
|
||||
var runId = _payload.GetRunId(deserialized);
|
||||
var tenantId = _payload.GetTenantId(deserialized);
|
||||
var scheduleId = _payload.GetScheduleId(deserialized);
|
||||
var segmentId = _payload.GetSegmentId(deserialized);
|
||||
var correlationId = _payload.GetCorrelationId(deserialized);
|
||||
var attributes = _payload.GetAttributes(deserialized) ?? new Dictionary<string, string>();
|
||||
|
||||
var attributeView = attributes.Count == 0
|
||||
? EmptyReadOnlyDictionary<string, string>.Instance
|
||||
: new ReadOnlyDictionary<string, string>(new Dictionary<string, string>(attributes, StringComparer.Ordinal));
|
||||
|
||||
return new NatsSchedulerQueueLease<TMessage>(
|
||||
this,
|
||||
message,
|
||||
payload.ToArray(),
|
||||
_payload.GetIdempotencyKey(deserialized),
|
||||
runId,
|
||||
tenantId,
|
||||
scheduleId,
|
||||
segmentId,
|
||||
correlationId,
|
||||
attributeView,
|
||||
deserialized,
|
||||
attempt,
|
||||
enqueuedAt,
|
||||
leaseExpires,
|
||||
consumer);
|
||||
}
|
||||
|
||||
private NatsHeaders BuildHeaders(TMessage message, string idempotencyKey)
|
||||
{
|
||||
var headers = new NatsHeaders
|
||||
{
|
||||
{ SchedulerQueueFields.IdempotencyKey, idempotencyKey },
|
||||
{ SchedulerQueueFields.RunId, _payload.GetRunId(message) },
|
||||
{ SchedulerQueueFields.TenantId, _payload.GetTenantId(message) },
|
||||
{ SchedulerQueueFields.QueueKind, _payload.QueueName },
|
||||
{ SchedulerQueueFields.EnqueuedAt, _timeProvider.GetUtcNow().ToUnixTimeMilliseconds().ToString() }
|
||||
};
|
||||
|
||||
var scheduleId = _payload.GetScheduleId(message);
|
||||
if (!string.IsNullOrWhiteSpace(scheduleId))
|
||||
{
|
||||
headers.Add(SchedulerQueueFields.ScheduleId, scheduleId);
|
||||
}
|
||||
|
||||
var segmentId = _payload.GetSegmentId(message);
|
||||
if (!string.IsNullOrWhiteSpace(segmentId))
|
||||
{
|
||||
headers.Add(SchedulerQueueFields.SegmentId, segmentId);
|
||||
}
|
||||
|
||||
var correlationId = _payload.GetCorrelationId(message);
|
||||
if (!string.IsNullOrWhiteSpace(correlationId))
|
||||
{
|
||||
headers.Add(SchedulerQueueFields.CorrelationId, correlationId);
|
||||
}
|
||||
|
||||
var attributes = _payload.GetAttributes(message);
|
||||
if (attributes is not null)
|
||||
{
|
||||
foreach (var kvp in attributes)
|
||||
{
|
||||
headers.Add(SchedulerQueueFields.AttributePrefix + kvp.Key, kvp.Value);
|
||||
}
|
||||
}
|
||||
|
||||
return headers;
|
||||
}
|
||||
|
||||
private NatsHeaders BuildDeadLetterHeaders(NatsSchedulerQueueLease<TMessage> lease, string reason)
|
||||
{
|
||||
var headers = new NatsHeaders
|
||||
{
|
||||
{ SchedulerQueueFields.RunId, lease.RunId },
|
||||
{ SchedulerQueueFields.TenantId, lease.TenantId },
|
||||
{ SchedulerQueueFields.QueueKind, _payload.QueueName },
|
||||
{ "reason", reason }
|
||||
};
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(lease.ScheduleId))
|
||||
{
|
||||
headers.Add(SchedulerQueueFields.ScheduleId, lease.ScheduleId);
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(lease.CorrelationId))
|
||||
{
|
||||
headers.Add(SchedulerQueueFields.CorrelationId, lease.CorrelationId);
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(lease.SegmentId))
|
||||
{
|
||||
headers.Add(SchedulerQueueFields.SegmentId, lease.SegmentId);
|
||||
}
|
||||
|
||||
return headers;
|
||||
}
|
||||
|
||||
private TimeSpan CalculateBackoff(int attempt)
|
||||
{
|
||||
var initial = _queueOptions.RetryInitialBackoff > TimeSpan.Zero
|
||||
? _queueOptions.RetryInitialBackoff
|
||||
: _streamOptions.RetryDelay;
|
||||
|
||||
if (initial <= TimeSpan.Zero)
|
||||
{
|
||||
return TimeSpan.Zero;
|
||||
}
|
||||
|
||||
if (attempt <= 1)
|
||||
{
|
||||
return initial;
|
||||
}
|
||||
|
||||
var max = _queueOptions.RetryMaxBackoff > TimeSpan.Zero
|
||||
? _queueOptions.RetryMaxBackoff
|
||||
: initial;
|
||||
|
||||
var exponent = attempt - 1;
|
||||
var scaledTicks = initial.Ticks * Math.Pow(2, exponent - 1);
|
||||
var cappedTicks = Math.Min(max.Ticks, scaledTicks);
|
||||
|
||||
return TimeSpan.FromTicks((long)Math.Max(initial.Ticks, cappedTicks));
|
||||
}
|
||||
|
||||
private static long ToNanoseconds(TimeSpan value)
|
||||
=> value <= TimeSpan.Zero ? 0 : (long)(value.TotalMilliseconds * 1_000_000.0);
|
||||
|
||||
private sealed class EmptyReadOnlyDictionary<TKey, TValue>
|
||||
where TKey : notnull
|
||||
{
|
||||
public static readonly IReadOnlyDictionary<TKey, TValue> Instance =
|
||||
new ReadOnlyDictionary<TKey, TValue>(new Dictionary<TKey, TValue>(0, EqualityComparer<TKey>.Default));
|
||||
}
|
||||
|
||||
private void IncrementDepth()
|
||||
{
|
||||
var depth = Interlocked.Increment(ref _approximateDepth);
|
||||
SchedulerQueueMetrics.RecordDepth(TransportName, _payload.QueueName, depth);
|
||||
}
|
||||
|
||||
private void DecrementDepth()
|
||||
{
|
||||
var depth = Interlocked.Decrement(ref _approximateDepth);
|
||||
if (depth < 0)
|
||||
{
|
||||
depth = Interlocked.Exchange(ref _approximateDepth, 0);
|
||||
}
|
||||
|
||||
SchedulerQueueMetrics.RecordDepth(TransportName, _payload.QueueName, depth);
|
||||
}
|
||||
|
||||
private void PublishDepth()
|
||||
{
|
||||
var depth = Volatile.Read(ref _approximateDepth);
|
||||
SchedulerQueueMetrics.RecordDepth(TransportName, _payload.QueueName, depth);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using NATS.Client.JetStream;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Nats;
|
||||
|
||||
internal sealed class NatsSchedulerQueueLease<TMessage> : ISchedulerQueueLease<TMessage>
|
||||
{
|
||||
private readonly NatsSchedulerQueueBase<TMessage> _queue;
|
||||
private int _completed;
|
||||
|
||||
internal NatsSchedulerQueueLease(
|
||||
NatsSchedulerQueueBase<TMessage> queue,
|
||||
NatsJSMsg<byte[]> message,
|
||||
byte[] payload,
|
||||
string idempotencyKey,
|
||||
string runId,
|
||||
string tenantId,
|
||||
string? scheduleId,
|
||||
string? segmentId,
|
||||
string? correlationId,
|
||||
IReadOnlyDictionary<string, string> attributes,
|
||||
TMessage deserialized,
|
||||
int attempt,
|
||||
DateTimeOffset enqueuedAt,
|
||||
DateTimeOffset leaseExpiresAt,
|
||||
string consumer)
|
||||
{
|
||||
_queue = queue;
|
||||
MessageId = message.Metadata?.Sequence.ToString() ?? idempotencyKey;
|
||||
RunId = runId;
|
||||
TenantId = tenantId;
|
||||
ScheduleId = scheduleId;
|
||||
SegmentId = segmentId;
|
||||
CorrelationId = correlationId;
|
||||
Attributes = attributes;
|
||||
Attempt = attempt;
|
||||
EnqueuedAt = enqueuedAt;
|
||||
LeaseExpiresAt = leaseExpiresAt;
|
||||
Consumer = consumer;
|
||||
IdempotencyKey = idempotencyKey;
|
||||
Message = deserialized;
|
||||
_message = message;
|
||||
Payload = payload;
|
||||
}
|
||||
|
||||
private readonly NatsJSMsg<byte[]> _message;
|
||||
|
||||
internal NatsJSMsg<byte[]> RawMessage => _message;
|
||||
|
||||
internal byte[] Payload { get; }
|
||||
|
||||
public string MessageId { get; }
|
||||
|
||||
public string IdempotencyKey { get; }
|
||||
|
||||
public string RunId { get; }
|
||||
|
||||
public string TenantId { get; }
|
||||
|
||||
public string? ScheduleId { get; }
|
||||
|
||||
public string? SegmentId { get; }
|
||||
|
||||
public string? CorrelationId { get; }
|
||||
|
||||
public IReadOnlyDictionary<string, string> Attributes { get; }
|
||||
|
||||
public TMessage Message { get; }
|
||||
|
||||
public int Attempt { get; private set; }
|
||||
|
||||
public DateTimeOffset EnqueuedAt { get; }
|
||||
|
||||
public DateTimeOffset LeaseExpiresAt { get; private set; }
|
||||
|
||||
public string Consumer { get; }
|
||||
|
||||
public Task AcknowledgeAsync(CancellationToken cancellationToken = default)
|
||||
=> _queue.AcknowledgeAsync(this, cancellationToken);
|
||||
|
||||
public Task RenewAsync(TimeSpan leaseDuration, CancellationToken cancellationToken = default)
|
||||
=> _queue.RenewAsync(this, leaseDuration, cancellationToken);
|
||||
|
||||
public Task ReleaseAsync(SchedulerQueueReleaseDisposition disposition, CancellationToken cancellationToken = default)
|
||||
=> _queue.ReleaseAsync(this, disposition, cancellationToken);
|
||||
|
||||
public Task DeadLetterAsync(string reason, CancellationToken cancellationToken = default)
|
||||
=> _queue.DeadLetterAsync(this, reason, cancellationToken);
|
||||
|
||||
internal bool TryBeginCompletion()
|
||||
=> Interlocked.CompareExchange(ref _completed, 1, 0) == 0;
|
||||
|
||||
internal void RefreshLease(DateTimeOffset expiresAt)
|
||||
=> LeaseExpiresAt = expiresAt;
|
||||
|
||||
internal void IncrementAttempt()
|
||||
=> Attempt++;
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using NATS.Client.Core;
|
||||
using NATS.Client.JetStream;
|
||||
using StellaOps.Scheduler.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Nats;
|
||||
|
||||
internal sealed class NatsSchedulerRunnerQueue
|
||||
: NatsSchedulerQueueBase<RunnerSegmentQueueMessage>, ISchedulerRunnerQueue
|
||||
{
|
||||
public NatsSchedulerRunnerQueue(
|
||||
SchedulerQueueOptions queueOptions,
|
||||
SchedulerNatsQueueOptions natsOptions,
|
||||
ILogger<NatsSchedulerRunnerQueue> logger,
|
||||
TimeProvider timeProvider,
|
||||
Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>>? connectionFactory = null)
|
||||
: base(
|
||||
queueOptions,
|
||||
natsOptions,
|
||||
natsOptions.Runner,
|
||||
RunnerPayload.Instance,
|
||||
logger,
|
||||
timeProvider,
|
||||
connectionFactory)
|
||||
{
|
||||
}
|
||||
|
||||
private sealed class RunnerPayload : INatsSchedulerQueuePayload<RunnerSegmentQueueMessage>
|
||||
{
|
||||
public static RunnerPayload Instance { get; } = new();
|
||||
|
||||
public string QueueName => "runner";
|
||||
|
||||
public string GetIdempotencyKey(RunnerSegmentQueueMessage message)
|
||||
=> message.IdempotencyKey;
|
||||
|
||||
public byte[] Serialize(RunnerSegmentQueueMessage message)
|
||||
=> Encoding.UTF8.GetBytes(CanonicalJsonSerializer.Serialize(message));
|
||||
|
||||
public RunnerSegmentQueueMessage Deserialize(byte[] payload)
|
||||
=> CanonicalJsonSerializer.Deserialize<RunnerSegmentQueueMessage>(Encoding.UTF8.GetString(payload));
|
||||
|
||||
public string GetRunId(RunnerSegmentQueueMessage message)
|
||||
=> message.RunId;
|
||||
|
||||
public string GetTenantId(RunnerSegmentQueueMessage message)
|
||||
=> message.TenantId;
|
||||
|
||||
public string? GetScheduleId(RunnerSegmentQueueMessage message)
|
||||
=> message.ScheduleId;
|
||||
|
||||
public string? GetSegmentId(RunnerSegmentQueueMessage message)
|
||||
=> message.SegmentId;
|
||||
|
||||
public string? GetCorrelationId(RunnerSegmentQueueMessage message)
|
||||
=> message.CorrelationId;
|
||||
|
||||
public IReadOnlyDictionary<string, string>? GetAttributes(RunnerSegmentQueueMessage message)
|
||||
{
|
||||
if (message.Attributes is null || message.Attributes.Count == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return message.Attributes.ToDictionary(kvp => kvp.Key, kvp => kvp.Value, StringComparer.Ordinal);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
# Scheduler Queue — Sprint 16 Coordination Notes
|
||||
|
||||
Queue work now has concrete contracts from `StellaOps.Scheduler.Models`:
|
||||
|
||||
* Planner inputs reference `Schedule` and `ImpactSet` samples (`samples/api/scheduler/`).
|
||||
* Runner segment payloads should carry `runId`, `scheduleId?`, `tenantId`, and the impacted digest list (mirrors `Run.Deltas`).
|
||||
* Notify fanout relies on the `DeltaSummary` shape already emitted by the model layer.
|
||||
|
||||
## Action items for SCHED-QUEUE-16-401..403
|
||||
|
||||
1. Reference `StellaOps.Scheduler.Models` so adapters can serialise `Run`/`DeltaSummary` without bespoke DTOs.
|
||||
2. Use the canonical serializer for queue messages to keep ordering consistent with API payloads.
|
||||
3. Coverage: add fixture-driven tests that enqueue the sample payloads, then dequeue and re-serialise to verify byte-for-byte stability.
|
||||
4. Expose queue depth/lease metrics with the identifiers provided by the models (`Run.Id`, `Schedule.Id`).
|
||||
|
||||
## JetStream failover notes
|
||||
|
||||
- `SchedulerQueueOptions.Kind = "nats"` will spin up `NatsSchedulerPlannerQueue` / `NatsSchedulerRunnerQueue` instances backed by JetStream.
|
||||
- `SchedulerQueueHealthCheck` pings both planner and runner transports; register via `AddSchedulerQueueHealthCheck()` to surface in `/healthz`.
|
||||
- Planner defaults:
|
||||
```yaml
|
||||
scheduler:
|
||||
queue:
|
||||
kind: nats
|
||||
deadLetterEnabled: true
|
||||
nats:
|
||||
url: "nats://nats:4222"
|
||||
planner:
|
||||
stream: SCHEDULER_PLANNER
|
||||
subject: scheduler.planner
|
||||
durableConsumer: scheduler-planners
|
||||
deadLetterStream: SCHEDULER_PLANNER_DEAD
|
||||
deadLetterSubject: scheduler.planner.dead
|
||||
runner:
|
||||
stream: SCHEDULER_RUNNER
|
||||
subject: scheduler.runner
|
||||
durableConsumer: scheduler-runners
|
||||
redis:
|
||||
deadLetterStream: scheduler:planner:dead
|
||||
idempotencyKeyPrefix: scheduler:planner:idemp:
|
||||
```
|
||||
- Retry / dead-letter semantics mirror the Redis adapter: attempts beyond `MaxDeliveryAttempts` are shipped to the configured dead-letter stream with headers describing `runId`, `scheduleId`, and failure reasons. Set `deadLetterEnabled: false` to drop exhausted messages instead.
|
||||
- Depth metrics surface through `scheduler_queue_depth{transport,queue}`; both transports publish lightweight counters to drive alerting dashboards.
|
||||
|
||||
These notes unblock the queue guild now that SCHED-MODELS-16-102 is complete.
|
||||
@@ -0,0 +1,26 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Redis;
|
||||
|
||||
internal interface IRedisSchedulerQueuePayload<TMessage>
|
||||
{
|
||||
string QueueName { get; }
|
||||
|
||||
string GetIdempotencyKey(TMessage message);
|
||||
|
||||
string Serialize(TMessage message);
|
||||
|
||||
TMessage Deserialize(string payload);
|
||||
|
||||
string GetRunId(TMessage message);
|
||||
|
||||
string GetTenantId(TMessage message);
|
||||
|
||||
string? GetScheduleId(TMessage message);
|
||||
|
||||
string? GetSegmentId(TMessage message);
|
||||
|
||||
string? GetCorrelationId(TMessage message);
|
||||
|
||||
IReadOnlyDictionary<string, string>? GetAttributes(TMessage message);
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StackExchange.Redis;
|
||||
using StellaOps.Scheduler.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Redis;
|
||||
|
||||
internal sealed class RedisSchedulerPlannerQueue
|
||||
: RedisSchedulerQueueBase<PlannerQueueMessage>, ISchedulerPlannerQueue
|
||||
{
|
||||
public RedisSchedulerPlannerQueue(
|
||||
SchedulerQueueOptions queueOptions,
|
||||
SchedulerRedisQueueOptions redisOptions,
|
||||
ILogger<RedisSchedulerPlannerQueue> logger,
|
||||
TimeProvider timeProvider,
|
||||
Func<ConfigurationOptions, Task<IConnectionMultiplexer>>? connectionFactory = null)
|
||||
: base(
|
||||
queueOptions,
|
||||
redisOptions,
|
||||
redisOptions.Planner,
|
||||
PlannerPayload.Instance,
|
||||
logger,
|
||||
timeProvider,
|
||||
connectionFactory)
|
||||
{
|
||||
}
|
||||
|
||||
private sealed class PlannerPayload : IRedisSchedulerQueuePayload<PlannerQueueMessage>
|
||||
{
|
||||
public static PlannerPayload Instance { get; } = new();
|
||||
|
||||
public string QueueName => "planner";
|
||||
|
||||
public string GetIdempotencyKey(PlannerQueueMessage message)
|
||||
=> message.IdempotencyKey;
|
||||
|
||||
public string Serialize(PlannerQueueMessage message)
|
||||
=> CanonicalJsonSerializer.Serialize(message);
|
||||
|
||||
public PlannerQueueMessage Deserialize(string payload)
|
||||
=> CanonicalJsonSerializer.Deserialize<PlannerQueueMessage>(payload);
|
||||
|
||||
public string GetRunId(PlannerQueueMessage message)
|
||||
=> message.Run.Id;
|
||||
|
||||
public string GetTenantId(PlannerQueueMessage message)
|
||||
=> message.Run.TenantId;
|
||||
|
||||
public string? GetScheduleId(PlannerQueueMessage message)
|
||||
=> message.ScheduleId;
|
||||
|
||||
public string? GetSegmentId(PlannerQueueMessage message)
|
||||
=> null;
|
||||
|
||||
public string? GetCorrelationId(PlannerQueueMessage message)
|
||||
=> message.CorrelationId;
|
||||
|
||||
public IReadOnlyDictionary<string, string>? GetAttributes(PlannerQueueMessage message)
|
||||
=> null;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,805 @@
|
||||
using System;
|
||||
using System.Buffers;
|
||||
using System.Collections.Generic;
|
||||
using System.Collections.ObjectModel;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StackExchange.Redis;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Redis;
|
||||
|
||||
internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMessage>, IAsyncDisposable, ISchedulerQueueTransportDiagnostics
|
||||
{
|
||||
private const string TransportName = "redis";
|
||||
|
||||
private readonly SchedulerQueueOptions _queueOptions;
|
||||
private readonly SchedulerRedisQueueOptions _redisOptions;
|
||||
private readonly RedisSchedulerStreamOptions _streamOptions;
|
||||
private readonly IRedisSchedulerQueuePayload<TMessage> _payload;
|
||||
private readonly ILogger _logger;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly Func<ConfigurationOptions, Task<IConnectionMultiplexer>> _connectionFactory;
|
||||
private readonly SemaphoreSlim _connectionLock = new(1, 1);
|
||||
private readonly SemaphoreSlim _groupInitLock = new(1, 1);
|
||||
private long _approximateDepth;
|
||||
|
||||
private IConnectionMultiplexer? _connection;
|
||||
private volatile bool _groupInitialized;
|
||||
private bool _disposed;
|
||||
|
||||
protected RedisSchedulerQueueBase(
|
||||
SchedulerQueueOptions queueOptions,
|
||||
SchedulerRedisQueueOptions redisOptions,
|
||||
RedisSchedulerStreamOptions streamOptions,
|
||||
IRedisSchedulerQueuePayload<TMessage> payload,
|
||||
ILogger logger,
|
||||
TimeProvider timeProvider,
|
||||
Func<ConfigurationOptions, Task<IConnectionMultiplexer>>? connectionFactory = null)
|
||||
{
|
||||
_queueOptions = queueOptions ?? throw new ArgumentNullException(nameof(queueOptions));
|
||||
_redisOptions = redisOptions ?? throw new ArgumentNullException(nameof(redisOptions));
|
||||
_streamOptions = streamOptions ?? throw new ArgumentNullException(nameof(streamOptions));
|
||||
_payload = payload ?? throw new ArgumentNullException(nameof(payload));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
|
||||
_connectionFactory = connectionFactory ?? (config => Task.FromResult<IConnectionMultiplexer>(ConnectionMultiplexer.Connect(config)));
|
||||
|
||||
if (string.IsNullOrWhiteSpace(_redisOptions.ConnectionString))
|
||||
{
|
||||
throw new InvalidOperationException("Redis connection string must be configured for the scheduler queue.");
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(_streamOptions.Stream))
|
||||
{
|
||||
throw new InvalidOperationException("Redis stream name must be configured for the scheduler queue.");
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(_streamOptions.ConsumerGroup))
|
||||
{
|
||||
throw new InvalidOperationException("Redis consumer group must be configured for the scheduler queue.");
|
||||
}
|
||||
}
|
||||
|
||||
public async ValueTask<SchedulerQueueEnqueueResult> EnqueueAsync(
|
||||
TMessage message,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(message);
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var database = await GetDatabaseAsync(cancellationToken).ConfigureAwait(false);
|
||||
await EnsureConsumerGroupAsync(database, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var attempt = 1;
|
||||
var entries = BuildEntries(message, now, attempt);
|
||||
|
||||
var messageId = await AddToStreamAsync(
|
||||
database,
|
||||
_streamOptions.Stream,
|
||||
entries,
|
||||
_streamOptions.ApproximateMaxLength,
|
||||
_streamOptions.ApproximateMaxLength is not null)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
var idempotencyKey = BuildIdempotencyKey(_payload.GetIdempotencyKey(message));
|
||||
var stored = await database.StringSetAsync(
|
||||
idempotencyKey,
|
||||
messageId,
|
||||
when: When.NotExists,
|
||||
expiry: _streamOptions.IdempotencyWindow)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (!stored)
|
||||
{
|
||||
await database.StreamDeleteAsync(_streamOptions.Stream, new RedisValue[] { messageId }).ConfigureAwait(false);
|
||||
|
||||
var existing = await database.StringGetAsync(idempotencyKey).ConfigureAwait(false);
|
||||
var reusable = existing.IsNullOrEmpty ? messageId : existing;
|
||||
|
||||
SchedulerQueueMetrics.RecordDeduplicated(TransportName, _payload.QueueName);
|
||||
_logger.LogDebug(
|
||||
"Duplicate enqueue detected for scheduler queue {Queue} with key {Key}; returning existing stream id {StreamId}.",
|
||||
_payload.QueueName,
|
||||
idempotencyKey,
|
||||
reusable.ToString());
|
||||
|
||||
PublishDepth();
|
||||
return new SchedulerQueueEnqueueResult(reusable.ToString(), true);
|
||||
}
|
||||
|
||||
SchedulerQueueMetrics.RecordEnqueued(TransportName, _payload.QueueName);
|
||||
_logger.LogDebug(
|
||||
"Enqueued {Queue} message into {Stream} with id {StreamId}.",
|
||||
_payload.QueueName,
|
||||
_streamOptions.Stream,
|
||||
messageId.ToString());
|
||||
|
||||
IncrementDepth();
|
||||
return new SchedulerQueueEnqueueResult(messageId.ToString(), false);
|
||||
}
|
||||
|
||||
public async ValueTask<IReadOnlyList<ISchedulerQueueLease<TMessage>>> LeaseAsync(
|
||||
SchedulerQueueLeaseRequest request,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var database = await GetDatabaseAsync(cancellationToken).ConfigureAwait(false);
|
||||
await EnsureConsumerGroupAsync(database, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var entries = await database.StreamReadGroupAsync(
|
||||
_streamOptions.Stream,
|
||||
_streamOptions.ConsumerGroup,
|
||||
request.Consumer,
|
||||
position: ">",
|
||||
count: request.BatchSize,
|
||||
flags: CommandFlags.None)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (entries is null || entries.Length == 0)
|
||||
{
|
||||
PublishDepth();
|
||||
return Array.Empty<ISchedulerQueueLease<TMessage>>();
|
||||
}
|
||||
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var leases = new List<ISchedulerQueueLease<TMessage>>(entries.Length);
|
||||
|
||||
foreach (var entry in entries)
|
||||
{
|
||||
var lease = TryMapLease(entry, request.Consumer, now, request.LeaseDuration, attemptOverride: null);
|
||||
if (lease is null)
|
||||
{
|
||||
await HandlePoisonEntryAsync(database, entry.Id).ConfigureAwait(false);
|
||||
continue;
|
||||
}
|
||||
|
||||
leases.Add(lease);
|
||||
}
|
||||
|
||||
PublishDepth();
|
||||
return leases;
|
||||
}
|
||||
|
||||
public async ValueTask<IReadOnlyList<ISchedulerQueueLease<TMessage>>> ClaimExpiredAsync(
|
||||
SchedulerQueueClaimOptions options,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var database = await GetDatabaseAsync(cancellationToken).ConfigureAwait(false);
|
||||
await EnsureConsumerGroupAsync(database, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var pending = await database.StreamPendingMessagesAsync(
|
||||
_streamOptions.Stream,
|
||||
_streamOptions.ConsumerGroup,
|
||||
options.BatchSize,
|
||||
RedisValue.Null,
|
||||
(long)options.MinIdleTime.TotalMilliseconds)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (pending is null || pending.Length == 0)
|
||||
{
|
||||
return Array.Empty<ISchedulerQueueLease<TMessage>>();
|
||||
}
|
||||
|
||||
var eligible = pending
|
||||
.Where(info => info.IdleTimeInMilliseconds >= options.MinIdleTime.TotalMilliseconds)
|
||||
.ToArray();
|
||||
|
||||
if (eligible.Length == 0)
|
||||
{
|
||||
return Array.Empty<ISchedulerQueueLease<TMessage>>();
|
||||
}
|
||||
|
||||
var messageIds = eligible
|
||||
.Select(info => (RedisValue)info.MessageId)
|
||||
.ToArray();
|
||||
|
||||
var claimed = await database.StreamClaimAsync(
|
||||
_streamOptions.Stream,
|
||||
_streamOptions.ConsumerGroup,
|
||||
options.ClaimantConsumer,
|
||||
0,
|
||||
messageIds,
|
||||
CommandFlags.None)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (claimed is null || claimed.Length == 0)
|
||||
{
|
||||
PublishDepth();
|
||||
return Array.Empty<ISchedulerQueueLease<TMessage>>();
|
||||
}
|
||||
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var attemptLookup = eligible.ToDictionary(
|
||||
info => info.MessageId.IsNullOrEmpty ? string.Empty : info.MessageId.ToString(),
|
||||
info => (int)Math.Max(1, info.DeliveryCount),
|
||||
StringComparer.Ordinal);
|
||||
|
||||
var leases = new List<ISchedulerQueueLease<TMessage>>(claimed.Length);
|
||||
foreach (var entry in claimed)
|
||||
{
|
||||
var entryId = entry.Id.ToString();
|
||||
attemptLookup.TryGetValue(entryId, out var attempt);
|
||||
|
||||
var lease = TryMapLease(
|
||||
entry,
|
||||
options.ClaimantConsumer,
|
||||
now,
|
||||
_queueOptions.DefaultLeaseDuration,
|
||||
attemptOverride: attempt);
|
||||
|
||||
if (lease is null)
|
||||
{
|
||||
await HandlePoisonEntryAsync(database, entry.Id).ConfigureAwait(false);
|
||||
continue;
|
||||
}
|
||||
|
||||
leases.Add(lease);
|
||||
}
|
||||
|
||||
PublishDepth();
|
||||
return leases;
|
||||
}
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
if (_disposed)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_disposed = true;
|
||||
|
||||
if (_connection is not null)
|
||||
{
|
||||
await _connection.CloseAsync();
|
||||
_connection.Dispose();
|
||||
}
|
||||
|
||||
_connectionLock.Dispose();
|
||||
_groupInitLock.Dispose();
|
||||
SchedulerQueueMetrics.RemoveDepth(TransportName, _payload.QueueName);
|
||||
GC.SuppressFinalize(this);
|
||||
}
|
||||
|
||||
internal async Task AcknowledgeAsync(
|
||||
RedisSchedulerQueueLease<TMessage> lease,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (!lease.TryBeginCompletion())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var database = await GetDatabaseAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
await database.StreamAcknowledgeAsync(
|
||||
_streamOptions.Stream,
|
||||
_streamOptions.ConsumerGroup,
|
||||
new RedisValue[] { lease.MessageId })
|
||||
.ConfigureAwait(false);
|
||||
|
||||
await database.StreamDeleteAsync(
|
||||
_streamOptions.Stream,
|
||||
new RedisValue[] { lease.MessageId })
|
||||
.ConfigureAwait(false);
|
||||
|
||||
SchedulerQueueMetrics.RecordAck(TransportName, _payload.QueueName);
|
||||
DecrementDepth();
|
||||
}
|
||||
|
||||
internal async Task RenewLeaseAsync(
|
||||
RedisSchedulerQueueLease<TMessage> lease,
|
||||
TimeSpan leaseDuration,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var database = await GetDatabaseAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
await database.StreamClaimAsync(
|
||||
_streamOptions.Stream,
|
||||
_streamOptions.ConsumerGroup,
|
||||
lease.Consumer,
|
||||
0,
|
||||
new RedisValue[] { lease.MessageId },
|
||||
CommandFlags.None)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
var expires = _timeProvider.GetUtcNow().Add(leaseDuration);
|
||||
lease.RefreshLease(expires);
|
||||
}
|
||||
|
||||
internal async Task ReleaseAsync(
|
||||
RedisSchedulerQueueLease<TMessage> lease,
|
||||
SchedulerQueueReleaseDisposition disposition,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (disposition == SchedulerQueueReleaseDisposition.Retry
|
||||
&& lease.Attempt >= _queueOptions.MaxDeliveryAttempts)
|
||||
{
|
||||
await DeadLetterAsync(
|
||||
lease,
|
||||
$"max-delivery-attempts:{lease.Attempt}",
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!lease.TryBeginCompletion())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var database = await GetDatabaseAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
await database.StreamAcknowledgeAsync(
|
||||
_streamOptions.Stream,
|
||||
_streamOptions.ConsumerGroup,
|
||||
new RedisValue[] { lease.MessageId })
|
||||
.ConfigureAwait(false);
|
||||
|
||||
await database.StreamDeleteAsync(
|
||||
_streamOptions.Stream,
|
||||
new RedisValue[] { lease.MessageId })
|
||||
.ConfigureAwait(false);
|
||||
|
||||
SchedulerQueueMetrics.RecordAck(TransportName, _payload.QueueName);
|
||||
DecrementDepth();
|
||||
|
||||
if (disposition == SchedulerQueueReleaseDisposition.Retry)
|
||||
{
|
||||
SchedulerQueueMetrics.RecordRetry(TransportName, _payload.QueueName);
|
||||
|
||||
lease.IncrementAttempt();
|
||||
|
||||
var backoff = CalculateBackoff(lease.Attempt);
|
||||
if (backoff > TimeSpan.Zero)
|
||||
{
|
||||
try
|
||||
{
|
||||
await Task.Delay(backoff, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var entries = BuildEntries(lease.Message, now, lease.Attempt);
|
||||
|
||||
await AddToStreamAsync(
|
||||
database,
|
||||
_streamOptions.Stream,
|
||||
entries,
|
||||
_streamOptions.ApproximateMaxLength,
|
||||
_streamOptions.ApproximateMaxLength is not null)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
SchedulerQueueMetrics.RecordEnqueued(TransportName, _payload.QueueName);
|
||||
IncrementDepth();
|
||||
}
|
||||
}
|
||||
|
||||
internal async Task DeadLetterAsync(
|
||||
RedisSchedulerQueueLease<TMessage> lease,
|
||||
string reason,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (!lease.TryBeginCompletion())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var database = await GetDatabaseAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
await database.StreamAcknowledgeAsync(
|
||||
_streamOptions.Stream,
|
||||
_streamOptions.ConsumerGroup,
|
||||
new RedisValue[] { lease.MessageId })
|
||||
.ConfigureAwait(false);
|
||||
|
||||
await database.StreamDeleteAsync(
|
||||
_streamOptions.Stream,
|
||||
new RedisValue[] { lease.MessageId })
|
||||
.ConfigureAwait(false);
|
||||
|
||||
DecrementDepth();
|
||||
|
||||
if (!_queueOptions.DeadLetterEnabled)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Dropped {Queue} message {MessageId} after {Attempt} attempt(s); dead-letter disabled. Reason: {Reason}",
|
||||
_payload.QueueName,
|
||||
lease.MessageId,
|
||||
lease.Attempt,
|
||||
reason);
|
||||
return;
|
||||
}
|
||||
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var entries = BuildEntries(lease.Message, now, lease.Attempt);
|
||||
|
||||
await AddToStreamAsync(
|
||||
database,
|
||||
_streamOptions.DeadLetterStream,
|
||||
entries,
|
||||
null,
|
||||
false)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
SchedulerQueueMetrics.RecordDeadLetter(TransportName, _payload.QueueName);
|
||||
_logger.LogError(
|
||||
"Dead-lettered {Queue} message {MessageId} after {Attempt} attempt(s): {Reason}",
|
||||
_payload.QueueName,
|
||||
lease.MessageId,
|
||||
lease.Attempt,
|
||||
reason);
|
||||
}
|
||||
|
||||
public async ValueTask PingAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var database = await GetDatabaseAsync(cancellationToken).ConfigureAwait(false);
|
||||
await database.ExecuteAsync("PING").ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private string BuildIdempotencyKey(string key)
|
||||
=> string.Concat(_streamOptions.IdempotencyKeyPrefix, key);
|
||||
|
||||
private TimeSpan CalculateBackoff(int attempt)
|
||||
{
|
||||
if (attempt <= 1)
|
||||
{
|
||||
return _queueOptions.RetryInitialBackoff > TimeSpan.Zero
|
||||
? _queueOptions.RetryInitialBackoff
|
||||
: TimeSpan.Zero;
|
||||
}
|
||||
|
||||
var initial = _queueOptions.RetryInitialBackoff > TimeSpan.Zero
|
||||
? _queueOptions.RetryInitialBackoff
|
||||
: TimeSpan.Zero;
|
||||
|
||||
if (initial <= TimeSpan.Zero)
|
||||
{
|
||||
return TimeSpan.Zero;
|
||||
}
|
||||
|
||||
var max = _queueOptions.RetryMaxBackoff > TimeSpan.Zero
|
||||
? _queueOptions.RetryMaxBackoff
|
||||
: initial;
|
||||
|
||||
var exponent = attempt - 1;
|
||||
var scaledTicks = initial.Ticks * Math.Pow(2, exponent - 1);
|
||||
var cappedTicks = Math.Min(max.Ticks, scaledTicks);
|
||||
|
||||
return TimeSpan.FromTicks((long)Math.Max(initial.Ticks, cappedTicks));
|
||||
}
|
||||
|
||||
private async ValueTask<IDatabase> GetDatabaseAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
if (_connection is not null)
|
||||
{
|
||||
return _connection.GetDatabase(_redisOptions.Database ?? -1);
|
||||
}
|
||||
|
||||
await _connectionLock.WaitAsync(cancellationToken).ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
if (_connection is null)
|
||||
{
|
||||
var config = ConfigurationOptions.Parse(_redisOptions.ConnectionString!);
|
||||
config.AbortOnConnectFail = false;
|
||||
config.ConnectTimeout = (int)_redisOptions.InitializationTimeout.TotalMilliseconds;
|
||||
config.ConnectRetry = 3;
|
||||
|
||||
if (_redisOptions.Database is not null)
|
||||
{
|
||||
config.DefaultDatabase = _redisOptions.Database;
|
||||
}
|
||||
|
||||
_connection = await _connectionFactory(config).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
_connectionLock.Release();
|
||||
}
|
||||
|
||||
return _connection.GetDatabase(_redisOptions.Database ?? -1);
|
||||
}
|
||||
|
||||
private async Task EnsureConsumerGroupAsync(
|
||||
IDatabase database,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (_groupInitialized)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
await _groupInitLock.WaitAsync(cancellationToken).ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
if (_groupInitialized)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await database.StreamCreateConsumerGroupAsync(
|
||||
_streamOptions.Stream,
|
||||
_streamOptions.ConsumerGroup,
|
||||
StreamPosition.Beginning,
|
||||
createStream: true)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
catch (RedisServerException ex) when (ex.Message.Contains("BUSYGROUP", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
// Group already exists.
|
||||
}
|
||||
|
||||
_groupInitialized = true;
|
||||
}
|
||||
finally
|
||||
{
|
||||
_groupInitLock.Release();
|
||||
}
|
||||
}
|
||||
|
||||
private NameValueEntry[] BuildEntries(
|
||||
TMessage message,
|
||||
DateTimeOffset enqueuedAt,
|
||||
int attempt)
|
||||
{
|
||||
var attributes = _payload.GetAttributes(message);
|
||||
var attributeCount = attributes?.Count ?? 0;
|
||||
var entries = ArrayPool<NameValueEntry>.Shared.Rent(10 + attributeCount);
|
||||
var index = 0;
|
||||
|
||||
entries[index++] = new NameValueEntry(SchedulerQueueFields.QueueKind, _payload.QueueName);
|
||||
entries[index++] = new NameValueEntry(SchedulerQueueFields.RunId, _payload.GetRunId(message));
|
||||
entries[index++] = new NameValueEntry(SchedulerQueueFields.TenantId, _payload.GetTenantId(message));
|
||||
|
||||
var scheduleId = _payload.GetScheduleId(message);
|
||||
if (!string.IsNullOrWhiteSpace(scheduleId))
|
||||
{
|
||||
entries[index++] = new NameValueEntry(SchedulerQueueFields.ScheduleId, scheduleId);
|
||||
}
|
||||
|
||||
var segmentId = _payload.GetSegmentId(message);
|
||||
if (!string.IsNullOrWhiteSpace(segmentId))
|
||||
{
|
||||
entries[index++] = new NameValueEntry(SchedulerQueueFields.SegmentId, segmentId);
|
||||
}
|
||||
|
||||
var correlationId = _payload.GetCorrelationId(message);
|
||||
if (!string.IsNullOrWhiteSpace(correlationId))
|
||||
{
|
||||
entries[index++] = new NameValueEntry(SchedulerQueueFields.CorrelationId, correlationId);
|
||||
}
|
||||
|
||||
entries[index++] = new NameValueEntry(SchedulerQueueFields.IdempotencyKey, _payload.GetIdempotencyKey(message));
|
||||
entries[index++] = new NameValueEntry(SchedulerQueueFields.Attempt, attempt);
|
||||
entries[index++] = new NameValueEntry(SchedulerQueueFields.EnqueuedAt, enqueuedAt.ToUnixTimeMilliseconds());
|
||||
entries[index++] = new NameValueEntry(SchedulerQueueFields.Payload, _payload.Serialize(message));
|
||||
|
||||
if (attributeCount > 0 && attributes is not null)
|
||||
{
|
||||
foreach (var kvp in attributes)
|
||||
{
|
||||
entries[index++] = new NameValueEntry(
|
||||
SchedulerQueueFields.AttributePrefix + kvp.Key,
|
||||
kvp.Value);
|
||||
}
|
||||
}
|
||||
|
||||
var result = entries.AsSpan(0, index).ToArray();
|
||||
ArrayPool<NameValueEntry>.Shared.Return(entries, clearArray: true);
|
||||
return result;
|
||||
}
|
||||
|
||||
private RedisSchedulerQueueLease<TMessage>? TryMapLease(
|
||||
StreamEntry entry,
|
||||
string consumer,
|
||||
DateTimeOffset now,
|
||||
TimeSpan leaseDuration,
|
||||
int? attemptOverride)
|
||||
{
|
||||
if (entry.Values is null || entry.Values.Length == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
string? payload = null;
|
||||
string? runId = null;
|
||||
string? tenantId = null;
|
||||
string? scheduleId = null;
|
||||
string? segmentId = null;
|
||||
string? correlationId = null;
|
||||
string? idempotencyKey = null;
|
||||
long? enqueuedAtUnix = null;
|
||||
var attempt = attemptOverride ?? 1;
|
||||
var attributes = new Dictionary<string, string>(StringComparer.Ordinal);
|
||||
|
||||
foreach (var field in entry.Values)
|
||||
{
|
||||
var name = field.Name.ToString();
|
||||
var value = field.Value;
|
||||
|
||||
if (name.Equals(SchedulerQueueFields.Payload, StringComparison.Ordinal))
|
||||
{
|
||||
payload = value.ToString();
|
||||
}
|
||||
else if (name.Equals(SchedulerQueueFields.RunId, StringComparison.Ordinal))
|
||||
{
|
||||
runId = value.ToString();
|
||||
}
|
||||
else if (name.Equals(SchedulerQueueFields.TenantId, StringComparison.Ordinal))
|
||||
{
|
||||
tenantId = value.ToString();
|
||||
}
|
||||
else if (name.Equals(SchedulerQueueFields.ScheduleId, StringComparison.Ordinal))
|
||||
{
|
||||
scheduleId = NormalizeOptional(value.ToString());
|
||||
}
|
||||
else if (name.Equals(SchedulerQueueFields.SegmentId, StringComparison.Ordinal))
|
||||
{
|
||||
segmentId = NormalizeOptional(value.ToString());
|
||||
}
|
||||
else if (name.Equals(SchedulerQueueFields.CorrelationId, StringComparison.Ordinal))
|
||||
{
|
||||
correlationId = NormalizeOptional(value.ToString());
|
||||
}
|
||||
else if (name.Equals(SchedulerQueueFields.IdempotencyKey, StringComparison.Ordinal))
|
||||
{
|
||||
idempotencyKey = value.ToString();
|
||||
}
|
||||
else if (name.Equals(SchedulerQueueFields.EnqueuedAt, StringComparison.Ordinal))
|
||||
{
|
||||
if (long.TryParse(value.ToString(), out var unixMs))
|
||||
{
|
||||
enqueuedAtUnix = unixMs;
|
||||
}
|
||||
}
|
||||
else if (name.Equals(SchedulerQueueFields.Attempt, StringComparison.Ordinal))
|
||||
{
|
||||
if (int.TryParse(value.ToString(), out var parsedAttempt))
|
||||
{
|
||||
attempt = attemptOverride.HasValue
|
||||
? Math.Max(attemptOverride.Value, parsedAttempt)
|
||||
: Math.Max(1, parsedAttempt);
|
||||
}
|
||||
}
|
||||
else if (name.StartsWith(SchedulerQueueFields.AttributePrefix, StringComparison.Ordinal))
|
||||
{
|
||||
var key = name[SchedulerQueueFields.AttributePrefix.Length..];
|
||||
attributes[key] = value.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
if (payload is null || runId is null || tenantId is null || enqueuedAtUnix is null || idempotencyKey is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var message = _payload.Deserialize(payload);
|
||||
var enqueuedAt = DateTimeOffset.FromUnixTimeMilliseconds(enqueuedAtUnix.Value);
|
||||
var leaseExpires = now.Add(leaseDuration);
|
||||
|
||||
IReadOnlyDictionary<string, string> attributeView = attributes.Count == 0
|
||||
? EmptyReadOnlyDictionary<string, string>.Instance
|
||||
: new ReadOnlyDictionary<string, string>(attributes);
|
||||
|
||||
return new RedisSchedulerQueueLease<TMessage>(
|
||||
this,
|
||||
entry.Id.ToString(),
|
||||
idempotencyKey,
|
||||
runId,
|
||||
tenantId,
|
||||
scheduleId,
|
||||
segmentId,
|
||||
correlationId,
|
||||
attributeView,
|
||||
message,
|
||||
attempt,
|
||||
enqueuedAt,
|
||||
leaseExpires,
|
||||
consumer);
|
||||
}
|
||||
|
||||
private async Task HandlePoisonEntryAsync(IDatabase database, RedisValue entryId)
|
||||
{
|
||||
await database.StreamAcknowledgeAsync(
|
||||
_streamOptions.Stream,
|
||||
_streamOptions.ConsumerGroup,
|
||||
new RedisValue[] { entryId })
|
||||
.ConfigureAwait(false);
|
||||
|
||||
await database.StreamDeleteAsync(
|
||||
_streamOptions.Stream,
|
||||
new RedisValue[] { entryId })
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private async Task<RedisValue> AddToStreamAsync(
|
||||
IDatabase database,
|
||||
RedisKey stream,
|
||||
NameValueEntry[] entries,
|
||||
int? maxLength,
|
||||
bool useApproximateLength)
|
||||
{
|
||||
var capacity = 4 + (entries.Length * 2);
|
||||
var args = new List<object>(capacity)
|
||||
{
|
||||
stream
|
||||
};
|
||||
|
||||
if (maxLength.HasValue)
|
||||
{
|
||||
args.Add("MAXLEN");
|
||||
if (useApproximateLength)
|
||||
{
|
||||
args.Add("~");
|
||||
}
|
||||
|
||||
args.Add(maxLength.Value);
|
||||
}
|
||||
|
||||
args.Add("*");
|
||||
|
||||
for (var i = 0; i < entries.Length; i++)
|
||||
{
|
||||
args.Add(entries[i].Name);
|
||||
args.Add(entries[i].Value);
|
||||
}
|
||||
|
||||
var result = await database.ExecuteAsync("XADD", args.ToArray()).ConfigureAwait(false);
|
||||
return (RedisValue)result!;
|
||||
}
|
||||
|
||||
private void IncrementDepth()
|
||||
{
|
||||
var depth = Interlocked.Increment(ref _approximateDepth);
|
||||
SchedulerQueueMetrics.RecordDepth(TransportName, _payload.QueueName, depth);
|
||||
}
|
||||
|
||||
private void DecrementDepth()
|
||||
{
|
||||
var depth = Interlocked.Decrement(ref _approximateDepth);
|
||||
if (depth < 0)
|
||||
{
|
||||
depth = Interlocked.Exchange(ref _approximateDepth, 0);
|
||||
}
|
||||
|
||||
SchedulerQueueMetrics.RecordDepth(TransportName, _payload.QueueName, depth);
|
||||
}
|
||||
|
||||
private void PublishDepth()
|
||||
{
|
||||
var depth = Volatile.Read(ref _approximateDepth);
|
||||
SchedulerQueueMetrics.RecordDepth(TransportName, _payload.QueueName, depth);
|
||||
}
|
||||
|
||||
private static string? NormalizeOptional(string? value)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(value))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
private sealed class EmptyReadOnlyDictionary<TKey, TValue>
|
||||
where TKey : notnull
|
||||
{
|
||||
public static readonly IReadOnlyDictionary<TKey, TValue> Instance =
|
||||
new ReadOnlyDictionary<TKey, TValue>(new Dictionary<TKey, TValue>(0, EqualityComparer<TKey>.Default));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Redis;
|
||||
|
||||
internal sealed class RedisSchedulerQueueLease<TMessage> : ISchedulerQueueLease<TMessage>
|
||||
{
|
||||
private readonly RedisSchedulerQueueBase<TMessage> _queue;
|
||||
private int _completed;
|
||||
|
||||
internal RedisSchedulerQueueLease(
|
||||
RedisSchedulerQueueBase<TMessage> queue,
|
||||
string messageId,
|
||||
string idempotencyKey,
|
||||
string runId,
|
||||
string tenantId,
|
||||
string? scheduleId,
|
||||
string? segmentId,
|
||||
string? correlationId,
|
||||
IReadOnlyDictionary<string, string> attributes,
|
||||
TMessage message,
|
||||
int attempt,
|
||||
DateTimeOffset enqueuedAt,
|
||||
DateTimeOffset leaseExpiresAt,
|
||||
string consumer)
|
||||
{
|
||||
_queue = queue;
|
||||
MessageId = messageId;
|
||||
IdempotencyKey = idempotencyKey;
|
||||
RunId = runId;
|
||||
TenantId = tenantId;
|
||||
ScheduleId = scheduleId;
|
||||
SegmentId = segmentId;
|
||||
CorrelationId = correlationId;
|
||||
Attributes = attributes;
|
||||
Message = message;
|
||||
Attempt = attempt;
|
||||
EnqueuedAt = enqueuedAt;
|
||||
LeaseExpiresAt = leaseExpiresAt;
|
||||
Consumer = consumer;
|
||||
}
|
||||
|
||||
public string MessageId { get; }
|
||||
|
||||
public string IdempotencyKey { get; }
|
||||
|
||||
public string RunId { get; }
|
||||
|
||||
public string TenantId { get; }
|
||||
|
||||
public string? ScheduleId { get; }
|
||||
|
||||
public string? SegmentId { get; }
|
||||
|
||||
public string? CorrelationId { get; }
|
||||
|
||||
public IReadOnlyDictionary<string, string> Attributes { get; }
|
||||
|
||||
public TMessage Message { get; }
|
||||
|
||||
public int Attempt { get; private set; }
|
||||
|
||||
public DateTimeOffset EnqueuedAt { get; }
|
||||
|
||||
public DateTimeOffset LeaseExpiresAt { get; private set; }
|
||||
|
||||
public string Consumer { get; }
|
||||
|
||||
public Task AcknowledgeAsync(CancellationToken cancellationToken = default)
|
||||
=> _queue.AcknowledgeAsync(this, cancellationToken);
|
||||
|
||||
public Task RenewAsync(TimeSpan leaseDuration, CancellationToken cancellationToken = default)
|
||||
=> _queue.RenewLeaseAsync(this, leaseDuration, cancellationToken);
|
||||
|
||||
public Task ReleaseAsync(SchedulerQueueReleaseDisposition disposition, CancellationToken cancellationToken = default)
|
||||
=> _queue.ReleaseAsync(this, disposition, cancellationToken);
|
||||
|
||||
public Task DeadLetterAsync(string reason, CancellationToken cancellationToken = default)
|
||||
=> _queue.DeadLetterAsync(this, reason, cancellationToken);
|
||||
|
||||
internal bool TryBeginCompletion()
|
||||
=> Interlocked.CompareExchange(ref _completed, 1, 0) == 0;
|
||||
|
||||
internal void RefreshLease(DateTimeOffset expiresAt)
|
||||
=> LeaseExpiresAt = expiresAt;
|
||||
|
||||
internal void IncrementAttempt()
|
||||
=> Attempt++;
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StackExchange.Redis;
|
||||
using StellaOps.Scheduler.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Redis;
|
||||
|
||||
internal sealed class RedisSchedulerRunnerQueue
|
||||
: RedisSchedulerQueueBase<RunnerSegmentQueueMessage>, ISchedulerRunnerQueue
|
||||
{
|
||||
public RedisSchedulerRunnerQueue(
|
||||
SchedulerQueueOptions queueOptions,
|
||||
SchedulerRedisQueueOptions redisOptions,
|
||||
ILogger<RedisSchedulerRunnerQueue> logger,
|
||||
TimeProvider timeProvider,
|
||||
Func<ConfigurationOptions, Task<IConnectionMultiplexer>>? connectionFactory = null)
|
||||
: base(
|
||||
queueOptions,
|
||||
redisOptions,
|
||||
redisOptions.Runner,
|
||||
RunnerPayload.Instance,
|
||||
logger,
|
||||
timeProvider,
|
||||
connectionFactory)
|
||||
{
|
||||
}
|
||||
|
||||
private sealed class RunnerPayload : IRedisSchedulerQueuePayload<RunnerSegmentQueueMessage>
|
||||
{
|
||||
public static RunnerPayload Instance { get; } = new();
|
||||
|
||||
public string QueueName => "runner";
|
||||
|
||||
public string GetIdempotencyKey(RunnerSegmentQueueMessage message)
|
||||
=> message.IdempotencyKey;
|
||||
|
||||
public string Serialize(RunnerSegmentQueueMessage message)
|
||||
=> CanonicalJsonSerializer.Serialize(message);
|
||||
|
||||
public RunnerSegmentQueueMessage Deserialize(string payload)
|
||||
=> CanonicalJsonSerializer.Deserialize<RunnerSegmentQueueMessage>(payload);
|
||||
|
||||
public string GetRunId(RunnerSegmentQueueMessage message)
|
||||
=> message.RunId;
|
||||
|
||||
public string GetTenantId(RunnerSegmentQueueMessage message)
|
||||
=> message.TenantId;
|
||||
|
||||
public string? GetScheduleId(RunnerSegmentQueueMessage message)
|
||||
=> message.ScheduleId;
|
||||
|
||||
public string? GetSegmentId(RunnerSegmentQueueMessage message)
|
||||
=> message.SegmentId;
|
||||
|
||||
public string? GetCorrelationId(RunnerSegmentQueueMessage message)
|
||||
=> message.CorrelationId;
|
||||
|
||||
public IReadOnlyDictionary<string, string>? GetAttributes(RunnerSegmentQueueMessage message)
|
||||
{
|
||||
if (message.Attributes.Count == 0 && message.ImageDigests.Count == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Ensure digests remain accessible without deserializing the entire payload.
|
||||
var map = new Dictionary<string, string>(message.Attributes, StringComparer.Ordinal);
|
||||
map["imageDigestCount"] = message.ImageDigests.Count.ToString();
|
||||
|
||||
// populate first few digests for quick inspection (bounded)
|
||||
var take = Math.Min(message.ImageDigests.Count, 5);
|
||||
for (var i = 0; i < take; i++)
|
||||
{
|
||||
map[$"digest{i}"] = message.ImageDigests[i];
|
||||
}
|
||||
|
||||
if (message.RatePerSecond.HasValue)
|
||||
{
|
||||
map["ratePerSecond"] = message.RatePerSecond.Value.ToString();
|
||||
}
|
||||
|
||||
map["usageOnly"] = message.UsageOnly ? "true" : "false";
|
||||
|
||||
return map;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,274 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Collections.ObjectModel;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using StellaOps.Scheduler.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue;
|
||||
|
||||
public sealed class PlannerQueueMessage
|
||||
{
|
||||
[JsonConstructor]
|
||||
public PlannerQueueMessage(
|
||||
Run run,
|
||||
ImpactSet impactSet,
|
||||
Schedule? schedule = null,
|
||||
string? correlationId = null)
|
||||
{
|
||||
Run = run ?? throw new ArgumentNullException(nameof(run));
|
||||
ImpactSet = impactSet ?? throw new ArgumentNullException(nameof(impactSet));
|
||||
|
||||
if (schedule is not null && string.IsNullOrWhiteSpace(schedule.Id))
|
||||
{
|
||||
throw new ArgumentException("Schedule must have a valid identifier.", nameof(schedule));
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(correlationId))
|
||||
{
|
||||
correlationId = correlationId!.Trim();
|
||||
}
|
||||
|
||||
Schedule = schedule;
|
||||
CorrelationId = string.IsNullOrWhiteSpace(correlationId) ? null : correlationId;
|
||||
}
|
||||
|
||||
public Run Run { get; }
|
||||
|
||||
public ImpactSet ImpactSet { get; }
|
||||
|
||||
public Schedule? Schedule { get; }
|
||||
|
||||
public string? CorrelationId { get; }
|
||||
|
||||
public string IdempotencyKey => Run.Id;
|
||||
|
||||
public string TenantId => Run.TenantId;
|
||||
|
||||
public string? ScheduleId => Run.ScheduleId;
|
||||
}
|
||||
|
||||
public sealed class RunnerSegmentQueueMessage
|
||||
{
|
||||
private readonly ReadOnlyCollection<string> _imageDigests;
|
||||
private readonly IReadOnlyDictionary<string, string> _attributes;
|
||||
|
||||
[JsonConstructor]
|
||||
public RunnerSegmentQueueMessage(
|
||||
string segmentId,
|
||||
string runId,
|
||||
string tenantId,
|
||||
IReadOnlyList<string> imageDigests,
|
||||
string? scheduleId = null,
|
||||
int? ratePerSecond = null,
|
||||
bool usageOnly = true,
|
||||
IReadOnlyDictionary<string, string>? attributes = null,
|
||||
string? correlationId = null)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(segmentId))
|
||||
{
|
||||
throw new ArgumentException("Segment identifier must be provided.", nameof(segmentId));
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(runId))
|
||||
{
|
||||
throw new ArgumentException("Run identifier must be provided.", nameof(runId));
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(tenantId))
|
||||
{
|
||||
throw new ArgumentException("Tenant identifier must be provided.", nameof(tenantId));
|
||||
}
|
||||
|
||||
SegmentId = segmentId;
|
||||
RunId = runId;
|
||||
TenantId = tenantId;
|
||||
ScheduleId = string.IsNullOrWhiteSpace(scheduleId) ? null : scheduleId;
|
||||
RatePerSecond = ratePerSecond;
|
||||
UsageOnly = usageOnly;
|
||||
CorrelationId = string.IsNullOrWhiteSpace(correlationId) ? null : correlationId;
|
||||
|
||||
_imageDigests = new ReadOnlyCollection<string>(NormalizeDigests(imageDigests));
|
||||
_attributes = attributes is null
|
||||
? EmptyReadOnlyDictionary<string, string>.Instance
|
||||
: new ReadOnlyDictionary<string, string>(new Dictionary<string, string>(attributes, StringComparer.Ordinal));
|
||||
}
|
||||
|
||||
public string SegmentId { get; }
|
||||
|
||||
public string RunId { get; }
|
||||
|
||||
public string TenantId { get; }
|
||||
|
||||
public string? ScheduleId { get; }
|
||||
|
||||
public int? RatePerSecond { get; }
|
||||
|
||||
public bool UsageOnly { get; }
|
||||
|
||||
public string? CorrelationId { get; }
|
||||
|
||||
public IReadOnlyList<string> ImageDigests => _imageDigests;
|
||||
|
||||
public IReadOnlyDictionary<string, string> Attributes => _attributes;
|
||||
|
||||
public string IdempotencyKey => SegmentId;
|
||||
|
||||
private static List<string> NormalizeDigests(IReadOnlyList<string> digests)
|
||||
{
|
||||
if (digests is null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(digests));
|
||||
}
|
||||
|
||||
var list = new List<string>();
|
||||
foreach (var digest in digests)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(digest))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
list.Add(digest.Trim());
|
||||
}
|
||||
|
||||
if (list.Count == 0)
|
||||
{
|
||||
throw new ArgumentException("At least one image digest must be provided.", nameof(digests));
|
||||
}
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
private sealed class EmptyReadOnlyDictionary<TKey, TValue>
|
||||
where TKey : notnull
|
||||
{
|
||||
public static readonly IReadOnlyDictionary<TKey, TValue> Instance =
|
||||
new ReadOnlyDictionary<TKey, TValue>(new Dictionary<TKey, TValue>(0, EqualityComparer<TKey>.Default));
|
||||
}
|
||||
}
|
||||
|
||||
public readonly record struct SchedulerQueueEnqueueResult(string MessageId, bool Deduplicated);
|
||||
|
||||
public sealed class SchedulerQueueLeaseRequest
|
||||
{
|
||||
public SchedulerQueueLeaseRequest(string consumer, int batchSize, TimeSpan leaseDuration)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(consumer))
|
||||
{
|
||||
throw new ArgumentException("Consumer identifier must be provided.", nameof(consumer));
|
||||
}
|
||||
|
||||
if (batchSize <= 0)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(batchSize), batchSize, "Batch size must be positive.");
|
||||
}
|
||||
|
||||
if (leaseDuration <= TimeSpan.Zero)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(leaseDuration), leaseDuration, "Lease duration must be positive.");
|
||||
}
|
||||
|
||||
Consumer = consumer;
|
||||
BatchSize = batchSize;
|
||||
LeaseDuration = leaseDuration;
|
||||
}
|
||||
|
||||
public string Consumer { get; }
|
||||
|
||||
public int BatchSize { get; }
|
||||
|
||||
public TimeSpan LeaseDuration { get; }
|
||||
}
|
||||
|
||||
public sealed class SchedulerQueueClaimOptions
|
||||
{
|
||||
public SchedulerQueueClaimOptions(string claimantConsumer, int batchSize, TimeSpan minIdleTime)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(claimantConsumer))
|
||||
{
|
||||
throw new ArgumentException("Consumer identifier must be provided.", nameof(claimantConsumer));
|
||||
}
|
||||
|
||||
if (batchSize <= 0)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(batchSize), batchSize, "Batch size must be positive.");
|
||||
}
|
||||
|
||||
if (minIdleTime < TimeSpan.Zero)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(minIdleTime), minIdleTime, "Idle time cannot be negative.");
|
||||
}
|
||||
|
||||
ClaimantConsumer = claimantConsumer;
|
||||
BatchSize = batchSize;
|
||||
MinIdleTime = minIdleTime;
|
||||
}
|
||||
|
||||
public string ClaimantConsumer { get; }
|
||||
|
||||
public int BatchSize { get; }
|
||||
|
||||
public TimeSpan MinIdleTime { get; }
|
||||
}
|
||||
|
||||
public enum SchedulerQueueReleaseDisposition
|
||||
{
|
||||
Retry,
|
||||
Abandon
|
||||
}
|
||||
|
||||
public interface ISchedulerQueue<TMessage>
|
||||
{
|
||||
ValueTask<SchedulerQueueEnqueueResult> EnqueueAsync(TMessage message, CancellationToken cancellationToken = default);
|
||||
|
||||
ValueTask<IReadOnlyList<ISchedulerQueueLease<TMessage>>> LeaseAsync(SchedulerQueueLeaseRequest request, CancellationToken cancellationToken = default);
|
||||
|
||||
ValueTask<IReadOnlyList<ISchedulerQueueLease<TMessage>>> ClaimExpiredAsync(SchedulerQueueClaimOptions options, CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
public interface ISchedulerQueueLease<out TMessage>
|
||||
{
|
||||
string MessageId { get; }
|
||||
|
||||
int Attempt { get; }
|
||||
|
||||
DateTimeOffset EnqueuedAt { get; }
|
||||
|
||||
DateTimeOffset LeaseExpiresAt { get; }
|
||||
|
||||
string Consumer { get; }
|
||||
|
||||
string TenantId { get; }
|
||||
|
||||
string RunId { get; }
|
||||
|
||||
string? ScheduleId { get; }
|
||||
|
||||
string? SegmentId { get; }
|
||||
|
||||
string? CorrelationId { get; }
|
||||
|
||||
string IdempotencyKey { get; }
|
||||
|
||||
IReadOnlyDictionary<string, string> Attributes { get; }
|
||||
|
||||
TMessage Message { get; }
|
||||
|
||||
Task AcknowledgeAsync(CancellationToken cancellationToken = default);
|
||||
|
||||
Task RenewAsync(TimeSpan leaseDuration, CancellationToken cancellationToken = default);
|
||||
|
||||
Task ReleaseAsync(SchedulerQueueReleaseDisposition disposition, CancellationToken cancellationToken = default);
|
||||
|
||||
Task DeadLetterAsync(string reason, CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
public interface ISchedulerPlannerQueue : ISchedulerQueue<PlannerQueueMessage>
|
||||
{
|
||||
}
|
||||
|
||||
public interface ISchedulerRunnerQueue : ISchedulerQueue<RunnerSegmentQueueMessage>
|
||||
{
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
namespace StellaOps.Scheduler.Queue;
|
||||
|
||||
internal static class SchedulerQueueFields
|
||||
{
|
||||
public const string Payload = "payload";
|
||||
public const string Attempt = "attempt";
|
||||
public const string EnqueuedAt = "enqueuedAt";
|
||||
public const string IdempotencyKey = "idempotency";
|
||||
public const string RunId = "runId";
|
||||
public const string TenantId = "tenantId";
|
||||
public const string ScheduleId = "scheduleId";
|
||||
public const string SegmentId = "segmentId";
|
||||
public const string QueueKind = "queueKind";
|
||||
public const string CorrelationId = "correlationId";
|
||||
public const string AttributePrefix = "attr:";
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue;
|
||||
|
||||
public sealed class SchedulerQueueHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly ISchedulerPlannerQueue _plannerQueue;
|
||||
private readonly ISchedulerRunnerQueue _runnerQueue;
|
||||
private readonly ILogger<SchedulerQueueHealthCheck> _logger;
|
||||
|
||||
public SchedulerQueueHealthCheck(
|
||||
ISchedulerPlannerQueue plannerQueue,
|
||||
ISchedulerRunnerQueue runnerQueue,
|
||||
ILogger<SchedulerQueueHealthCheck> logger)
|
||||
{
|
||||
_plannerQueue = plannerQueue ?? throw new ArgumentNullException(nameof(plannerQueue));
|
||||
_runnerQueue = runnerQueue ?? throw new ArgumentNullException(nameof(runnerQueue));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<HealthCheckResult> CheckHealthAsync(
|
||||
HealthCheckContext context,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var failures = new List<string>();
|
||||
|
||||
if (!await ProbeAsync(_plannerQueue, "planner", cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
failures.Add("planner transport unreachable");
|
||||
}
|
||||
|
||||
if (!await ProbeAsync(_runnerQueue, "runner", cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
failures.Add("runner transport unreachable");
|
||||
}
|
||||
|
||||
if (failures.Count == 0)
|
||||
{
|
||||
return HealthCheckResult.Healthy("Scheduler queues reachable.");
|
||||
}
|
||||
|
||||
var description = string.Join("; ", failures);
|
||||
return new HealthCheckResult(
|
||||
context.Registration.FailureStatus,
|
||||
description);
|
||||
}
|
||||
|
||||
private async Task<bool> ProbeAsync(object queue, string label, CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (queue is ISchedulerQueueTransportDiagnostics diagnostics)
|
||||
{
|
||||
await diagnostics.PingAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Scheduler {Label} queue transport ping failed.", label);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.Metrics;
|
||||
using System.Linq;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue;
|
||||
|
||||
internal static class SchedulerQueueMetrics
|
||||
{
|
||||
private const string TransportTagName = "transport";
|
||||
private const string QueueTagName = "queue";
|
||||
|
||||
private static readonly Meter Meter = new("StellaOps.Scheduler.Queue");
|
||||
private static readonly Counter<long> EnqueuedCounter = Meter.CreateCounter<long>("scheduler_queue_enqueued_total");
|
||||
private static readonly Counter<long> DeduplicatedCounter = Meter.CreateCounter<long>("scheduler_queue_deduplicated_total");
|
||||
private static readonly Counter<long> AckCounter = Meter.CreateCounter<long>("scheduler_queue_ack_total");
|
||||
private static readonly Counter<long> RetryCounter = Meter.CreateCounter<long>("scheduler_queue_retry_total");
|
||||
private static readonly Counter<long> DeadLetterCounter = Meter.CreateCounter<long>("scheduler_queue_deadletter_total");
|
||||
private static readonly ConcurrentDictionary<(string transport, string queue), long> DepthSamples = new();
|
||||
private static readonly ObservableGauge<long> DepthGauge = Meter.CreateObservableGauge<long>(
|
||||
"scheduler_queue_depth",
|
||||
ObserveDepth);
|
||||
|
||||
public static void RecordEnqueued(string transport, string queue)
|
||||
=> EnqueuedCounter.Add(1, BuildTags(transport, queue));
|
||||
|
||||
public static void RecordDeduplicated(string transport, string queue)
|
||||
=> DeduplicatedCounter.Add(1, BuildTags(transport, queue));
|
||||
|
||||
public static void RecordAck(string transport, string queue)
|
||||
=> AckCounter.Add(1, BuildTags(transport, queue));
|
||||
|
||||
public static void RecordRetry(string transport, string queue)
|
||||
=> RetryCounter.Add(1, BuildTags(transport, queue));
|
||||
|
||||
public static void RecordDeadLetter(string transport, string queue)
|
||||
=> DeadLetterCounter.Add(1, BuildTags(transport, queue));
|
||||
|
||||
public static void RecordDepth(string transport, string queue, long depth)
|
||||
=> DepthSamples[(transport, queue)] = depth;
|
||||
|
||||
public static void RemoveDepth(string transport, string queue)
|
||||
=> DepthSamples.TryRemove((transport, queue), out _);
|
||||
|
||||
internal static IReadOnlyDictionary<(string transport, string queue), long> SnapshotDepths()
|
||||
=> DepthSamples.ToDictionary(pair => pair.Key, pair => pair.Value);
|
||||
|
||||
private static KeyValuePair<string, object?>[] BuildTags(string transport, string queue)
|
||||
=> new[]
|
||||
{
|
||||
new KeyValuePair<string, object?>(TransportTagName, transport),
|
||||
new KeyValuePair<string, object?>(QueueTagName, queue)
|
||||
};
|
||||
|
||||
private static IEnumerable<Measurement<long>> ObserveDepth()
|
||||
{
|
||||
foreach (var sample in DepthSamples)
|
||||
{
|
||||
yield return new Measurement<long>(
|
||||
sample.Value,
|
||||
new KeyValuePair<string, object?>(TransportTagName, sample.Key.transport),
|
||||
new KeyValuePair<string, object?>(QueueTagName, sample.Key.queue));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,134 @@
|
||||
using System;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue;
|
||||
|
||||
public sealed class SchedulerQueueOptions
|
||||
{
|
||||
public SchedulerQueueTransportKind Kind { get; set; } = SchedulerQueueTransportKind.Redis;
|
||||
|
||||
public SchedulerRedisQueueOptions Redis { get; set; } = new();
|
||||
|
||||
public SchedulerNatsQueueOptions Nats { get; set; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Default lease/visibility window applied when callers do not override the duration.
|
||||
/// </summary>
|
||||
public TimeSpan DefaultLeaseDuration { get; set; } = TimeSpan.FromMinutes(5);
|
||||
|
||||
/// <summary>
|
||||
/// Maximum number of deliveries before a message is shunted to the dead-letter stream.
|
||||
/// </summary>
|
||||
public int MaxDeliveryAttempts { get; set; } = 5;
|
||||
|
||||
/// <summary>
|
||||
/// Enables routing exhausted deliveries to the configured dead-letter transport.
|
||||
/// When disabled, messages exceeding the attempt budget are dropped after acknowledgement.
|
||||
/// </summary>
|
||||
public bool DeadLetterEnabled { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Base retry delay used when a message is released for retry.
|
||||
/// </summary>
|
||||
public TimeSpan RetryInitialBackoff { get; set; } = TimeSpan.FromSeconds(5);
|
||||
|
||||
/// <summary>
|
||||
/// Cap applied to the retry delay when exponential backoff is used.
|
||||
/// </summary>
|
||||
public TimeSpan RetryMaxBackoff { get; set; } = TimeSpan.FromMinutes(1);
|
||||
}
|
||||
|
||||
public sealed class SchedulerRedisQueueOptions
|
||||
{
|
||||
public string? ConnectionString { get; set; }
|
||||
|
||||
public int? Database { get; set; }
|
||||
|
||||
public TimeSpan InitializationTimeout { get; set; } = TimeSpan.FromSeconds(30);
|
||||
|
||||
public RedisSchedulerStreamOptions Planner { get; set; } = RedisSchedulerStreamOptions.ForPlanner();
|
||||
|
||||
public RedisSchedulerStreamOptions Runner { get; set; } = RedisSchedulerStreamOptions.ForRunner();
|
||||
}
|
||||
|
||||
public sealed class RedisSchedulerStreamOptions
|
||||
{
|
||||
public string Stream { get; set; } = string.Empty;
|
||||
|
||||
public string ConsumerGroup { get; set; } = string.Empty;
|
||||
|
||||
public string DeadLetterStream { get; set; } = string.Empty;
|
||||
|
||||
public string IdempotencyKeyPrefix { get; set; } = string.Empty;
|
||||
|
||||
public TimeSpan IdempotencyWindow { get; set; } = TimeSpan.FromHours(12);
|
||||
|
||||
public int? ApproximateMaxLength { get; set; }
|
||||
|
||||
public static RedisSchedulerStreamOptions ForPlanner()
|
||||
=> new()
|
||||
{
|
||||
Stream = "scheduler:planner",
|
||||
ConsumerGroup = "scheduler-planners",
|
||||
DeadLetterStream = "scheduler:planner:dead",
|
||||
IdempotencyKeyPrefix = "scheduler:planner:idemp:"
|
||||
};
|
||||
|
||||
public static RedisSchedulerStreamOptions ForRunner()
|
||||
=> new()
|
||||
{
|
||||
Stream = "scheduler:runner",
|
||||
ConsumerGroup = "scheduler-runners",
|
||||
DeadLetterStream = "scheduler:runner:dead",
|
||||
IdempotencyKeyPrefix = "scheduler:runner:idemp:"
|
||||
};
|
||||
}
|
||||
|
||||
public sealed class SchedulerNatsQueueOptions
|
||||
{
|
||||
public string? Url { get; set; }
|
||||
|
||||
public TimeSpan IdleHeartbeat { get; set; } = TimeSpan.FromSeconds(30);
|
||||
|
||||
public SchedulerNatsStreamOptions Planner { get; set; } = SchedulerNatsStreamOptions.ForPlanner();
|
||||
|
||||
public SchedulerNatsStreamOptions Runner { get; set; } = SchedulerNatsStreamOptions.ForRunner();
|
||||
}
|
||||
|
||||
public sealed class SchedulerNatsStreamOptions
|
||||
{
|
||||
public string Stream { get; set; } = string.Empty;
|
||||
|
||||
public string Subject { get; set; } = string.Empty;
|
||||
|
||||
public string DurableConsumer { get; set; } = string.Empty;
|
||||
|
||||
public string DeadLetterStream { get; set; } = string.Empty;
|
||||
|
||||
public string DeadLetterSubject { get; set; } = string.Empty;
|
||||
|
||||
public int MaxAckPending { get; set; } = 64;
|
||||
|
||||
public TimeSpan AckWait { get; set; } = TimeSpan.FromMinutes(5);
|
||||
|
||||
public TimeSpan RetryDelay { get; set; } = TimeSpan.FromSeconds(10);
|
||||
|
||||
public static SchedulerNatsStreamOptions ForPlanner()
|
||||
=> new()
|
||||
{
|
||||
Stream = "SCHEDULER_PLANNER",
|
||||
Subject = "scheduler.planner",
|
||||
DurableConsumer = "scheduler-planners",
|
||||
DeadLetterStream = "SCHEDULER_PLANNER_DEAD",
|
||||
DeadLetterSubject = "scheduler.planner.dead"
|
||||
};
|
||||
|
||||
public static SchedulerNatsStreamOptions ForRunner()
|
||||
=> new()
|
||||
{
|
||||
Stream = "SCHEDULER_RUNNER",
|
||||
Subject = "scheduler.runner",
|
||||
DurableConsumer = "scheduler-runners",
|
||||
DeadLetterStream = "SCHEDULER_RUNNER_DEAD",
|
||||
DeadLetterSubject = "scheduler.runner.dead"
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
using System;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Scheduler.Queue.Nats;
|
||||
using StellaOps.Scheduler.Queue.Redis;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue;
|
||||
|
||||
public static class SchedulerQueueServiceCollectionExtensions
|
||||
{
|
||||
public static IServiceCollection AddSchedulerQueues(
|
||||
this IServiceCollection services,
|
||||
IConfiguration configuration,
|
||||
string sectionName = "scheduler:queue")
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
ArgumentNullException.ThrowIfNull(configuration);
|
||||
|
||||
var options = new SchedulerQueueOptions();
|
||||
configuration.GetSection(sectionName).Bind(options);
|
||||
|
||||
services.TryAddSingleton(TimeProvider.System);
|
||||
services.AddSingleton(options);
|
||||
|
||||
services.AddSingleton<ISchedulerPlannerQueue>(sp =>
|
||||
{
|
||||
var loggerFactory = sp.GetRequiredService<ILoggerFactory>();
|
||||
var timeProvider = sp.GetService<TimeProvider>() ?? TimeProvider.System;
|
||||
|
||||
return options.Kind switch
|
||||
{
|
||||
SchedulerQueueTransportKind.Redis => new RedisSchedulerPlannerQueue(
|
||||
options,
|
||||
options.Redis,
|
||||
loggerFactory.CreateLogger<RedisSchedulerPlannerQueue>(),
|
||||
timeProvider),
|
||||
SchedulerQueueTransportKind.Nats => new NatsSchedulerPlannerQueue(
|
||||
options,
|
||||
options.Nats,
|
||||
loggerFactory.CreateLogger<NatsSchedulerPlannerQueue>(),
|
||||
timeProvider),
|
||||
_ => throw new InvalidOperationException($"Unsupported scheduler queue transport '{options.Kind}'.")
|
||||
};
|
||||
});
|
||||
|
||||
services.AddSingleton<ISchedulerRunnerQueue>(sp =>
|
||||
{
|
||||
var loggerFactory = sp.GetRequiredService<ILoggerFactory>();
|
||||
var timeProvider = sp.GetService<TimeProvider>() ?? TimeProvider.System;
|
||||
|
||||
return options.Kind switch
|
||||
{
|
||||
SchedulerQueueTransportKind.Redis => new RedisSchedulerRunnerQueue(
|
||||
options,
|
||||
options.Redis,
|
||||
loggerFactory.CreateLogger<RedisSchedulerRunnerQueue>(),
|
||||
timeProvider),
|
||||
SchedulerQueueTransportKind.Nats => new NatsSchedulerRunnerQueue(
|
||||
options,
|
||||
options.Nats,
|
||||
loggerFactory.CreateLogger<NatsSchedulerRunnerQueue>(),
|
||||
timeProvider),
|
||||
_ => throw new InvalidOperationException($"Unsupported scheduler queue transport '{options.Kind}'.")
|
||||
};
|
||||
});
|
||||
|
||||
services.AddSingleton<SchedulerQueueHealthCheck>();
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
public static IHealthChecksBuilder AddSchedulerQueueHealthCheck(
|
||||
this IHealthChecksBuilder builder)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(builder);
|
||||
|
||||
builder.Services.TryAddSingleton<SchedulerQueueHealthCheck>();
|
||||
builder.AddCheck<SchedulerQueueHealthCheck>(
|
||||
name: "scheduler-queue",
|
||||
failureStatus: HealthStatus.Unhealthy,
|
||||
tags: new[] { "scheduler", "queue" });
|
||||
|
||||
return builder;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
namespace StellaOps.Scheduler.Queue;
|
||||
|
||||
/// <summary>
|
||||
/// Transport backends supported by the scheduler queue abstraction.
|
||||
/// </summary>
|
||||
public enum SchedulerQueueTransportKind
|
||||
{
|
||||
Redis = 0,
|
||||
Nats = 1,
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="StackExchange.Redis" Version="2.8.24" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0-rc.2.25502.107" />
|
||||
<PackageReference Include="Microsoft.Extensions.Configuration.Abstractions" Version="10.0.0-rc.2.25502.107" />
|
||||
<PackageReference Include="Microsoft.Extensions.Configuration.Binder" Version="10.0.0-rc.2.25502.107" />
|
||||
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="10.0.0-rc.2.25502.107" />
|
||||
<PackageReference Include="Microsoft.Extensions.Diagnostics.HealthChecks" Version="10.0.0-rc.2.25502.107" />
|
||||
<PackageReference Include="Microsoft.Extensions.Diagnostics.HealthChecks.Abstractions" Version="10.0.0-rc.2.25502.107" />
|
||||
<PackageReference Include="NATS.Client.Core" Version="2.0.0" />
|
||||
<PackageReference Include="NATS.Client.JetStream" Version="2.0.0" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\StellaOps.Scheduler.Models\StellaOps.Scheduler.Models.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -0,0 +1,9 @@
|
||||
# Scheduler Queue Task Board (Sprint 16)
|
||||
|
||||
> **Status note (2025-10-19):** Scheduler DTOs and sample payloads are now available (SCHED-MODELS-16-102). Queue tasks remain pending on this board.
|
||||
|
||||
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|
||||
|----|--------|----------|------------|-------------|---------------|
|
||||
| SCHED-QUEUE-16-401 | DONE (2025-10-20) | Scheduler Queue Guild | SCHED-MODELS-16-101 | Implement queue abstraction + Redis Streams adapter (planner inputs, runner segments) with ack/lease semantics. | Integration tests cover enqueue/dequeue/ack; lease renewal implemented; ordering preserved. |
|
||||
| SCHED-QUEUE-16-402 | DONE (2025-10-20) | Scheduler Queue Guild | SCHED-QUEUE-16-401 | Add NATS JetStream adapter with configuration binding, health probes, failover. | Health endpoints verified; failover documented; adapter tested. |
|
||||
| SCHED-QUEUE-16-403 | DONE (2025-10-20) | Scheduler Queue Guild | SCHED-QUEUE-16-401 | Dead-letter handling + metrics (queue depth, retry counts), configuration toggles. | Dead-letter policy tested; metrics exported; docs updated. |
|
||||
Reference in New Issue
Block a user