Restructure solution layout by module

This commit is contained in:
master
2025-10-28 15:10:40 +02:00
parent 95daa159c4
commit d870da18ce
4103 changed files with 192899 additions and 187024 deletions

View File

@@ -0,0 +1,4 @@
# StellaOps.Scheduler.Queue — Agent Charter
## Mission
Provide queue abstraction (Redis Streams / NATS JetStream) for planner inputs and runner segments per `docs/ARCHITECTURE_SCHEDULER.md`.

View File

@@ -0,0 +1,3 @@
using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("StellaOps.Scheduler.Queue.Tests")]

View File

@@ -0,0 +1,9 @@
using System.Threading;
using System.Threading.Tasks;
namespace StellaOps.Scheduler.Queue;
internal interface ISchedulerQueueTransportDiagnostics
{
ValueTask PingAsync(CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,26 @@
using System.Collections.Generic;
namespace StellaOps.Scheduler.Queue.Nats;
internal interface INatsSchedulerQueuePayload<TMessage>
{
string QueueName { get; }
string GetIdempotencyKey(TMessage message);
byte[] Serialize(TMessage message);
TMessage Deserialize(byte[] payload);
string GetRunId(TMessage message);
string GetTenantId(TMessage message);
string? GetScheduleId(TMessage message);
string? GetSegmentId(TMessage message);
string? GetCorrelationId(TMessage message);
IReadOnlyDictionary<string, string>? GetAttributes(TMessage message);
}

View File

@@ -0,0 +1,66 @@
using System;
using System.Collections.Generic;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using NATS.Client.Core;
using NATS.Client.JetStream;
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Queue.Nats;
internal sealed class NatsSchedulerPlannerQueue
: NatsSchedulerQueueBase<PlannerQueueMessage>, ISchedulerPlannerQueue
{
public NatsSchedulerPlannerQueue(
SchedulerQueueOptions queueOptions,
SchedulerNatsQueueOptions natsOptions,
ILogger<NatsSchedulerPlannerQueue> logger,
TimeProvider timeProvider,
Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>>? connectionFactory = null)
: base(
queueOptions,
natsOptions,
natsOptions.Planner,
PlannerPayload.Instance,
logger,
timeProvider,
connectionFactory)
{
}
private sealed class PlannerPayload : INatsSchedulerQueuePayload<PlannerQueueMessage>
{
public static PlannerPayload Instance { get; } = new();
public string QueueName => "planner";
public string GetIdempotencyKey(PlannerQueueMessage message)
=> message.IdempotencyKey;
public byte[] Serialize(PlannerQueueMessage message)
=> Encoding.UTF8.GetBytes(CanonicalJsonSerializer.Serialize(message));
public PlannerQueueMessage Deserialize(byte[] payload)
=> CanonicalJsonSerializer.Deserialize<PlannerQueueMessage>(Encoding.UTF8.GetString(payload));
public string GetRunId(PlannerQueueMessage message)
=> message.Run.Id;
public string GetTenantId(PlannerQueueMessage message)
=> message.Run.TenantId;
public string? GetScheduleId(PlannerQueueMessage message)
=> message.ScheduleId;
public string? GetSegmentId(PlannerQueueMessage message)
=> null;
public string? GetCorrelationId(PlannerQueueMessage message)
=> message.CorrelationId;
public IReadOnlyDictionary<string, string>? GetAttributes(PlannerQueueMessage message)
=> null;
}
}

View File

@@ -0,0 +1,692 @@
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using NATS.Client.Core;
using NATS.Client.JetStream;
using NATS.Client.JetStream.Models;
namespace StellaOps.Scheduler.Queue.Nats;
internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMessage>, IAsyncDisposable, ISchedulerQueueTransportDiagnostics
{
private const string TransportName = "nats";
private static readonly INatsSerializer<byte[]> PayloadSerializer = NatsRawSerializer<byte[]>.Default;
private readonly SchedulerQueueOptions _queueOptions;
private readonly SchedulerNatsQueueOptions _natsOptions;
private readonly SchedulerNatsStreamOptions _streamOptions;
private readonly INatsSchedulerQueuePayload<TMessage> _payload;
private readonly ILogger _logger;
private readonly TimeProvider _timeProvider;
private readonly SemaphoreSlim _connectionGate = new(1, 1);
private readonly Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>> _connectionFactory;
private NatsConnection? _connection;
private NatsJSContext? _jsContext;
private INatsJSConsumer? _consumer;
private bool _disposed;
private long _approximateDepth;
protected NatsSchedulerQueueBase(
SchedulerQueueOptions queueOptions,
SchedulerNatsQueueOptions natsOptions,
SchedulerNatsStreamOptions streamOptions,
INatsSchedulerQueuePayload<TMessage> payload,
ILogger logger,
TimeProvider timeProvider,
Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>>? connectionFactory = null)
{
_queueOptions = queueOptions ?? throw new ArgumentNullException(nameof(queueOptions));
_natsOptions = natsOptions ?? throw new ArgumentNullException(nameof(natsOptions));
_streamOptions = streamOptions ?? throw new ArgumentNullException(nameof(streamOptions));
_payload = payload ?? throw new ArgumentNullException(nameof(payload));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? TimeProvider.System;
_connectionFactory = connectionFactory ?? ((opts, cancellationToken) => new ValueTask<NatsConnection>(new NatsConnection(opts)));
if (string.IsNullOrWhiteSpace(_natsOptions.Url))
{
throw new InvalidOperationException("NATS connection URL must be configured for the scheduler queue.");
}
}
public async ValueTask<SchedulerQueueEnqueueResult> EnqueueAsync(
TMessage message,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(message);
var js = await GetJetStreamAsync(cancellationToken).ConfigureAwait(false);
await EnsureStreamAndConsumerAsync(js, cancellationToken).ConfigureAwait(false);
var payloadBytes = _payload.Serialize(message);
var idempotencyKey = _payload.GetIdempotencyKey(message);
var headers = BuildHeaders(message, idempotencyKey);
var publishOptions = new NatsJSPubOpts
{
MsgId = idempotencyKey,
RetryAttempts = 0
};
var ack = await js.PublishAsync(
_streamOptions.Subject,
payloadBytes,
PayloadSerializer,
publishOptions,
headers,
cancellationToken)
.ConfigureAwait(false);
if (ack.Duplicate)
{
SchedulerQueueMetrics.RecordDeduplicated(TransportName, _payload.QueueName);
_logger.LogDebug(
"Duplicate enqueue detected for scheduler {Queue} message idempotency key {Key}; sequence {Sequence} reused.",
_payload.QueueName,
idempotencyKey,
ack.Seq);
PublishDepth();
return new SchedulerQueueEnqueueResult(ack.Seq.ToString(), true);
}
SchedulerQueueMetrics.RecordEnqueued(TransportName, _payload.QueueName);
_logger.LogDebug(
"Enqueued scheduler {Queue} message into stream {Stream} with sequence {Sequence}.",
_payload.QueueName,
ack.Stream,
ack.Seq);
IncrementDepth();
return new SchedulerQueueEnqueueResult(ack.Seq.ToString(), false);
}
public async ValueTask<IReadOnlyList<ISchedulerQueueLease<TMessage>>> LeaseAsync(
SchedulerQueueLeaseRequest request,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(request);
var js = await GetJetStreamAsync(cancellationToken).ConfigureAwait(false);
var consumer = await EnsureStreamAndConsumerAsync(js, cancellationToken).ConfigureAwait(false);
var fetchOpts = new NatsJSFetchOpts
{
MaxMsgs = request.BatchSize,
Expires = request.LeaseDuration,
IdleHeartbeat = _natsOptions.IdleHeartbeat
};
var now = _timeProvider.GetUtcNow();
var leases = new List<ISchedulerQueueLease<TMessage>>(request.BatchSize);
await foreach (var message in consumer.FetchAsync(PayloadSerializer, fetchOpts, cancellationToken).ConfigureAwait(false))
{
var lease = CreateLease(message, request.Consumer, now, request.LeaseDuration);
if (lease is not null)
{
leases.Add(lease);
}
}
PublishDepth();
return leases;
}
public async ValueTask<IReadOnlyList<ISchedulerQueueLease<TMessage>>> ClaimExpiredAsync(
SchedulerQueueClaimOptions options,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(options);
var js = await GetJetStreamAsync(cancellationToken).ConfigureAwait(false);
var consumer = await EnsureStreamAndConsumerAsync(js, cancellationToken).ConfigureAwait(false);
var fetchOpts = new NatsJSFetchOpts
{
MaxMsgs = options.BatchSize,
Expires = options.MinIdleTime,
IdleHeartbeat = _natsOptions.IdleHeartbeat
};
var now = _timeProvider.GetUtcNow();
var leases = new List<ISchedulerQueueLease<TMessage>>(options.BatchSize);
await foreach (var message in consumer.FetchAsync(PayloadSerializer, fetchOpts, cancellationToken).ConfigureAwait(false))
{
var deliveries = (int)(message.Metadata?.NumDelivered ?? 1);
if (deliveries <= 1)
{
await message.NakAsync(new AckOpts(), TimeSpan.Zero, cancellationToken).ConfigureAwait(false);
continue;
}
var lease = CreateLease(message, options.ClaimantConsumer, now, _queueOptions.DefaultLeaseDuration);
if (lease is not null)
{
leases.Add(lease);
}
}
PublishDepth();
return leases;
}
public async ValueTask DisposeAsync()
{
if (_disposed)
{
return;
}
_disposed = true;
if (_connection is not null)
{
await _connection.DisposeAsync().ConfigureAwait(false);
}
_connectionGate.Dispose();
SchedulerQueueMetrics.RemoveDepth(TransportName, _payload.QueueName);
GC.SuppressFinalize(this);
}
public async ValueTask PingAsync(CancellationToken cancellationToken)
{
var connection = await EnsureConnectionAsync(cancellationToken).ConfigureAwait(false);
await connection.PingAsync(cancellationToken).ConfigureAwait(false);
}
internal async Task AcknowledgeAsync(NatsSchedulerQueueLease<TMessage> lease, CancellationToken cancellationToken)
{
if (!lease.TryBeginCompletion())
{
return;
}
await lease.RawMessage.AckAsync(new AckOpts(), cancellationToken).ConfigureAwait(false);
SchedulerQueueMetrics.RecordAck(TransportName, _payload.QueueName);
DecrementDepth();
}
internal async Task RenewAsync(NatsSchedulerQueueLease<TMessage> lease, TimeSpan leaseDuration, CancellationToken cancellationToken)
{
await lease.RawMessage.AckProgressAsync(new AckOpts(), cancellationToken).ConfigureAwait(false);
lease.RefreshLease(_timeProvider.GetUtcNow().Add(leaseDuration));
}
internal async Task ReleaseAsync(NatsSchedulerQueueLease<TMessage> lease, SchedulerQueueReleaseDisposition disposition, CancellationToken cancellationToken)
{
if (disposition == SchedulerQueueReleaseDisposition.Retry && lease.Attempt >= _queueOptions.MaxDeliveryAttempts)
{
await DeadLetterAsync(lease, $"max-delivery-attempts:{lease.Attempt}", cancellationToken).ConfigureAwait(false);
return;
}
if (!lease.TryBeginCompletion())
{
return;
}
if (disposition == SchedulerQueueReleaseDisposition.Retry)
{
SchedulerQueueMetrics.RecordRetry(TransportName, _payload.QueueName);
var delay = CalculateBackoff(lease.Attempt + 1);
lease.IncrementAttempt();
await lease.RawMessage.NakAsync(new AckOpts(), delay, cancellationToken).ConfigureAwait(false);
_logger.LogWarning(
"Requeued scheduler {Queue} message {RunId} with delay {Delay} (attempt {Attempt}).",
_payload.QueueName,
lease.RunId,
delay,
lease.Attempt);
}
else
{
await lease.RawMessage.AckTerminateAsync(new AckOpts(), cancellationToken).ConfigureAwait(false);
SchedulerQueueMetrics.RecordAck(TransportName, _payload.QueueName);
DecrementDepth();
_logger.LogInformation(
"Abandoned scheduler {Queue} message {RunId} after {Attempt} attempt(s).",
_payload.QueueName,
lease.RunId,
lease.Attempt);
}
PublishDepth();
}
internal async Task DeadLetterAsync(NatsSchedulerQueueLease<TMessage> lease, string reason, CancellationToken cancellationToken)
{
if (!lease.TryBeginCompletion())
{
return;
}
await lease.RawMessage.AckAsync(new AckOpts(), cancellationToken).ConfigureAwait(false);
DecrementDepth();
var js = await GetJetStreamAsync(cancellationToken).ConfigureAwait(false);
if (!_queueOptions.DeadLetterEnabled)
{
_logger.LogWarning(
"Dropped scheduler {Queue} message {RunId} after {Attempt} attempt(s); dead-letter disabled. Reason: {Reason}",
_payload.QueueName,
lease.RunId,
lease.Attempt,
reason);
PublishDepth();
return;
}
await EnsureDeadLetterStreamAsync(js, cancellationToken).ConfigureAwait(false);
var headers = BuildDeadLetterHeaders(lease, reason);
await js.PublishAsync(
_streamOptions.DeadLetterSubject,
lease.Payload,
PayloadSerializer,
new NatsJSPubOpts(),
headers,
cancellationToken)
.ConfigureAwait(false);
SchedulerQueueMetrics.RecordDeadLetter(TransportName, _payload.QueueName);
_logger.LogError(
"Dead-lettered scheduler {Queue} message {RunId} after {Attempt} attempt(s): {Reason}",
_payload.QueueName,
lease.RunId,
lease.Attempt,
reason);
PublishDepth();
}
private async Task<NatsJSContext> GetJetStreamAsync(CancellationToken cancellationToken)
{
if (_jsContext is not null)
{
return _jsContext;
}
var connection = await EnsureConnectionAsync(cancellationToken).ConfigureAwait(false);
await _connectionGate.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
_jsContext ??= new NatsJSContext(connection);
return _jsContext;
}
finally
{
_connectionGate.Release();
}
}
private async ValueTask<INatsJSConsumer> EnsureStreamAndConsumerAsync(NatsJSContext js, CancellationToken cancellationToken)
{
if (_consumer is not null)
{
return _consumer;
}
await _connectionGate.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
if (_consumer is not null)
{
return _consumer;
}
await EnsureStreamAsync(js, cancellationToken).ConfigureAwait(false);
await EnsureDeadLetterStreamAsync(js, cancellationToken).ConfigureAwait(false);
var consumerConfig = new ConsumerConfig
{
DurableName = _streamOptions.DurableConsumer,
AckPolicy = ConsumerConfigAckPolicy.Explicit,
ReplayPolicy = ConsumerConfigReplayPolicy.Instant,
DeliverPolicy = ConsumerConfigDeliverPolicy.All,
AckWait = ToNanoseconds(_streamOptions.AckWait),
MaxAckPending = Math.Max(1, _streamOptions.MaxAckPending),
MaxDeliver = Math.Max(1, _queueOptions.MaxDeliveryAttempts),
FilterSubjects = new[] { _streamOptions.Subject }
};
try
{
_consumer = await js.CreateConsumerAsync(
_streamOptions.Stream,
consumerConfig,
cancellationToken)
.ConfigureAwait(false);
}
catch (NatsJSApiException apiEx)
{
_logger.LogDebug(apiEx,
"CreateConsumerAsync failed with code {Code}; attempting to reuse durable {Durable}.",
apiEx.Error?.Code,
_streamOptions.DurableConsumer);
_consumer = await js.GetConsumerAsync(
_streamOptions.Stream,
_streamOptions.DurableConsumer,
cancellationToken)
.ConfigureAwait(false);
}
return _consumer;
}
finally
{
_connectionGate.Release();
}
}
private async Task EnsureStreamAsync(NatsJSContext js, CancellationToken cancellationToken)
{
try
{
await js.GetStreamAsync(
_streamOptions.Stream,
new StreamInfoRequest(),
cancellationToken)
.ConfigureAwait(false);
}
catch (NatsJSApiException)
{
var config = new StreamConfig(
name: _streamOptions.Stream,
subjects: new[] { _streamOptions.Subject })
{
Retention = StreamConfigRetention.Workqueue,
Storage = StreamConfigStorage.File,
MaxConsumers = -1,
MaxMsgs = -1,
MaxBytes = -1,
MaxAge = 0
};
await js.CreateStreamAsync(config, cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Created NATS JetStream stream {Stream} ({Subject}) for scheduler {Queue} queue.",
_streamOptions.Stream,
_streamOptions.Subject,
_payload.QueueName);
}
}
private async Task EnsureDeadLetterStreamAsync(NatsJSContext js, CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(_streamOptions.DeadLetterStream) || string.IsNullOrWhiteSpace(_streamOptions.DeadLetterSubject))
{
return;
}
try
{
await js.GetStreamAsync(
_streamOptions.DeadLetterStream,
new StreamInfoRequest(),
cancellationToken)
.ConfigureAwait(false);
}
catch (NatsJSApiException)
{
var config = new StreamConfig(
name: _streamOptions.DeadLetterStream,
subjects: new[] { _streamOptions.DeadLetterSubject })
{
Retention = StreamConfigRetention.Workqueue,
Storage = StreamConfigStorage.File,
MaxConsumers = -1,
MaxMsgs = -1,
MaxBytes = -1,
MaxAge = 0
};
await js.CreateStreamAsync(config, cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Created NATS JetStream dead-letter stream {Stream} ({Subject}) for scheduler {Queue} queue.",
_streamOptions.DeadLetterStream,
_streamOptions.DeadLetterSubject,
_payload.QueueName);
}
}
private async Task<NatsConnection> EnsureConnectionAsync(CancellationToken cancellationToken)
{
if (_connection is not null)
{
return _connection;
}
await _connectionGate.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
if (_connection is not null)
{
return _connection;
}
var options = new NatsOpts
{
Url = _natsOptions.Url!,
Name = $"stellaops-scheduler-{_payload.QueueName}-queue",
CommandTimeout = TimeSpan.FromSeconds(10),
RequestTimeout = TimeSpan.FromSeconds(20),
PingInterval = TimeSpan.FromSeconds(30)
};
_connection = await _connectionFactory(options, cancellationToken).ConfigureAwait(false);
await _connection.ConnectAsync().ConfigureAwait(false);
return _connection;
}
finally
{
_connectionGate.Release();
}
}
private NatsSchedulerQueueLease<TMessage>? CreateLease(
NatsJSMsg<byte[]> message,
string consumer,
DateTimeOffset now,
TimeSpan leaseDuration)
{
var payload = message.Data ?? ReadOnlyMemory<byte>.Empty;
if (payload.IsEmpty)
{
return null;
}
TMessage deserialized;
try
{
deserialized = _payload.Deserialize(payload.ToArray());
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to deserialize scheduler {Queue} payload from NATS sequence {Sequence}.", _payload.QueueName, message.Metadata?.Sequence);
return null;
}
var attempt = (int)(message.Metadata?.NumDelivered ?? 1);
if (attempt <= 0)
{
attempt = 1;
}
var headers = message.Headers ?? new NatsHeaders();
var enqueuedAt = headers.TryGetValue(SchedulerQueueFields.EnqueuedAt, out var enqueuedValues) && enqueuedValues.Count > 0
&& long.TryParse(enqueuedValues[0], out var unix)
? DateTimeOffset.FromUnixTimeMilliseconds(unix)
: now;
var leaseExpires = now.Add(leaseDuration);
var runId = _payload.GetRunId(deserialized);
var tenantId = _payload.GetTenantId(deserialized);
var scheduleId = _payload.GetScheduleId(deserialized);
var segmentId = _payload.GetSegmentId(deserialized);
var correlationId = _payload.GetCorrelationId(deserialized);
var attributes = _payload.GetAttributes(deserialized) ?? new Dictionary<string, string>();
var attributeView = attributes.Count == 0
? EmptyReadOnlyDictionary<string, string>.Instance
: new ReadOnlyDictionary<string, string>(new Dictionary<string, string>(attributes, StringComparer.Ordinal));
return new NatsSchedulerQueueLease<TMessage>(
this,
message,
payload.ToArray(),
_payload.GetIdempotencyKey(deserialized),
runId,
tenantId,
scheduleId,
segmentId,
correlationId,
attributeView,
deserialized,
attempt,
enqueuedAt,
leaseExpires,
consumer);
}
private NatsHeaders BuildHeaders(TMessage message, string idempotencyKey)
{
var headers = new NatsHeaders
{
{ SchedulerQueueFields.IdempotencyKey, idempotencyKey },
{ SchedulerQueueFields.RunId, _payload.GetRunId(message) },
{ SchedulerQueueFields.TenantId, _payload.GetTenantId(message) },
{ SchedulerQueueFields.QueueKind, _payload.QueueName },
{ SchedulerQueueFields.EnqueuedAt, _timeProvider.GetUtcNow().ToUnixTimeMilliseconds().ToString() }
};
var scheduleId = _payload.GetScheduleId(message);
if (!string.IsNullOrWhiteSpace(scheduleId))
{
headers.Add(SchedulerQueueFields.ScheduleId, scheduleId);
}
var segmentId = _payload.GetSegmentId(message);
if (!string.IsNullOrWhiteSpace(segmentId))
{
headers.Add(SchedulerQueueFields.SegmentId, segmentId);
}
var correlationId = _payload.GetCorrelationId(message);
if (!string.IsNullOrWhiteSpace(correlationId))
{
headers.Add(SchedulerQueueFields.CorrelationId, correlationId);
}
var attributes = _payload.GetAttributes(message);
if (attributes is not null)
{
foreach (var kvp in attributes)
{
headers.Add(SchedulerQueueFields.AttributePrefix + kvp.Key, kvp.Value);
}
}
return headers;
}
private NatsHeaders BuildDeadLetterHeaders(NatsSchedulerQueueLease<TMessage> lease, string reason)
{
var headers = new NatsHeaders
{
{ SchedulerQueueFields.RunId, lease.RunId },
{ SchedulerQueueFields.TenantId, lease.TenantId },
{ SchedulerQueueFields.QueueKind, _payload.QueueName },
{ "reason", reason }
};
if (!string.IsNullOrWhiteSpace(lease.ScheduleId))
{
headers.Add(SchedulerQueueFields.ScheduleId, lease.ScheduleId);
}
if (!string.IsNullOrWhiteSpace(lease.CorrelationId))
{
headers.Add(SchedulerQueueFields.CorrelationId, lease.CorrelationId);
}
if (!string.IsNullOrWhiteSpace(lease.SegmentId))
{
headers.Add(SchedulerQueueFields.SegmentId, lease.SegmentId);
}
return headers;
}
private TimeSpan CalculateBackoff(int attempt)
{
var initial = _queueOptions.RetryInitialBackoff > TimeSpan.Zero
? _queueOptions.RetryInitialBackoff
: _streamOptions.RetryDelay;
if (initial <= TimeSpan.Zero)
{
return TimeSpan.Zero;
}
if (attempt <= 1)
{
return initial;
}
var max = _queueOptions.RetryMaxBackoff > TimeSpan.Zero
? _queueOptions.RetryMaxBackoff
: initial;
var exponent = attempt - 1;
var scaledTicks = initial.Ticks * Math.Pow(2, exponent - 1);
var cappedTicks = Math.Min(max.Ticks, scaledTicks);
return TimeSpan.FromTicks((long)Math.Max(initial.Ticks, cappedTicks));
}
private static long ToNanoseconds(TimeSpan value)
=> value <= TimeSpan.Zero ? 0 : (long)(value.TotalMilliseconds * 1_000_000.0);
private sealed class EmptyReadOnlyDictionary<TKey, TValue>
where TKey : notnull
{
public static readonly IReadOnlyDictionary<TKey, TValue> Instance =
new ReadOnlyDictionary<TKey, TValue>(new Dictionary<TKey, TValue>(0, EqualityComparer<TKey>.Default));
}
private void IncrementDepth()
{
var depth = Interlocked.Increment(ref _approximateDepth);
SchedulerQueueMetrics.RecordDepth(TransportName, _payload.QueueName, depth);
}
private void DecrementDepth()
{
var depth = Interlocked.Decrement(ref _approximateDepth);
if (depth < 0)
{
depth = Interlocked.Exchange(ref _approximateDepth, 0);
}
SchedulerQueueMetrics.RecordDepth(TransportName, _payload.QueueName, depth);
}
private void PublishDepth()
{
var depth = Volatile.Read(ref _approximateDepth);
SchedulerQueueMetrics.RecordDepth(TransportName, _payload.QueueName, depth);
}
}

View File

@@ -0,0 +1,101 @@
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using NATS.Client.JetStream;
namespace StellaOps.Scheduler.Queue.Nats;
internal sealed class NatsSchedulerQueueLease<TMessage> : ISchedulerQueueLease<TMessage>
{
private readonly NatsSchedulerQueueBase<TMessage> _queue;
private int _completed;
internal NatsSchedulerQueueLease(
NatsSchedulerQueueBase<TMessage> queue,
NatsJSMsg<byte[]> message,
byte[] payload,
string idempotencyKey,
string runId,
string tenantId,
string? scheduleId,
string? segmentId,
string? correlationId,
IReadOnlyDictionary<string, string> attributes,
TMessage deserialized,
int attempt,
DateTimeOffset enqueuedAt,
DateTimeOffset leaseExpiresAt,
string consumer)
{
_queue = queue;
MessageId = message.Metadata?.Sequence.ToString() ?? idempotencyKey;
RunId = runId;
TenantId = tenantId;
ScheduleId = scheduleId;
SegmentId = segmentId;
CorrelationId = correlationId;
Attributes = attributes;
Attempt = attempt;
EnqueuedAt = enqueuedAt;
LeaseExpiresAt = leaseExpiresAt;
Consumer = consumer;
IdempotencyKey = idempotencyKey;
Message = deserialized;
_message = message;
Payload = payload;
}
private readonly NatsJSMsg<byte[]> _message;
internal NatsJSMsg<byte[]> RawMessage => _message;
internal byte[] Payload { get; }
public string MessageId { get; }
public string IdempotencyKey { get; }
public string RunId { get; }
public string TenantId { get; }
public string? ScheduleId { get; }
public string? SegmentId { get; }
public string? CorrelationId { get; }
public IReadOnlyDictionary<string, string> Attributes { get; }
public TMessage Message { get; }
public int Attempt { get; private set; }
public DateTimeOffset EnqueuedAt { get; }
public DateTimeOffset LeaseExpiresAt { get; private set; }
public string Consumer { get; }
public Task AcknowledgeAsync(CancellationToken cancellationToken = default)
=> _queue.AcknowledgeAsync(this, cancellationToken);
public Task RenewAsync(TimeSpan leaseDuration, CancellationToken cancellationToken = default)
=> _queue.RenewAsync(this, leaseDuration, cancellationToken);
public Task ReleaseAsync(SchedulerQueueReleaseDisposition disposition, CancellationToken cancellationToken = default)
=> _queue.ReleaseAsync(this, disposition, cancellationToken);
public Task DeadLetterAsync(string reason, CancellationToken cancellationToken = default)
=> _queue.DeadLetterAsync(this, reason, cancellationToken);
internal bool TryBeginCompletion()
=> Interlocked.CompareExchange(ref _completed, 1, 0) == 0;
internal void RefreshLease(DateTimeOffset expiresAt)
=> LeaseExpiresAt = expiresAt;
internal void IncrementAttempt()
=> Attempt++;
}

View File

@@ -0,0 +1,74 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using NATS.Client.Core;
using NATS.Client.JetStream;
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Queue.Nats;
internal sealed class NatsSchedulerRunnerQueue
: NatsSchedulerQueueBase<RunnerSegmentQueueMessage>, ISchedulerRunnerQueue
{
public NatsSchedulerRunnerQueue(
SchedulerQueueOptions queueOptions,
SchedulerNatsQueueOptions natsOptions,
ILogger<NatsSchedulerRunnerQueue> logger,
TimeProvider timeProvider,
Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>>? connectionFactory = null)
: base(
queueOptions,
natsOptions,
natsOptions.Runner,
RunnerPayload.Instance,
logger,
timeProvider,
connectionFactory)
{
}
private sealed class RunnerPayload : INatsSchedulerQueuePayload<RunnerSegmentQueueMessage>
{
public static RunnerPayload Instance { get; } = new();
public string QueueName => "runner";
public string GetIdempotencyKey(RunnerSegmentQueueMessage message)
=> message.IdempotencyKey;
public byte[] Serialize(RunnerSegmentQueueMessage message)
=> Encoding.UTF8.GetBytes(CanonicalJsonSerializer.Serialize(message));
public RunnerSegmentQueueMessage Deserialize(byte[] payload)
=> CanonicalJsonSerializer.Deserialize<RunnerSegmentQueueMessage>(Encoding.UTF8.GetString(payload));
public string GetRunId(RunnerSegmentQueueMessage message)
=> message.RunId;
public string GetTenantId(RunnerSegmentQueueMessage message)
=> message.TenantId;
public string? GetScheduleId(RunnerSegmentQueueMessage message)
=> message.ScheduleId;
public string? GetSegmentId(RunnerSegmentQueueMessage message)
=> message.SegmentId;
public string? GetCorrelationId(RunnerSegmentQueueMessage message)
=> message.CorrelationId;
public IReadOnlyDictionary<string, string>? GetAttributes(RunnerSegmentQueueMessage message)
{
if (message.Attributes is null || message.Attributes.Count == 0)
{
return null;
}
return message.Attributes.ToDictionary(kvp => kvp.Key, kvp => kvp.Value, StringComparer.Ordinal);
}
}
}

View File

@@ -0,0 +1,45 @@
# Scheduler Queue — Sprint 16 Coordination Notes
Queue work now has concrete contracts from `StellaOps.Scheduler.Models`:
* Planner inputs reference `Schedule` and `ImpactSet` samples (`samples/api/scheduler/`).
* Runner segment payloads should carry `runId`, `scheduleId?`, `tenantId`, and the impacted digest list (mirrors `Run.Deltas`).
* Notify fanout relies on the `DeltaSummary` shape already emitted by the model layer.
## Action items for SCHED-QUEUE-16-401..403
1. Reference `StellaOps.Scheduler.Models` so adapters can serialise `Run`/`DeltaSummary` without bespoke DTOs.
2. Use the canonical serializer for queue messages to keep ordering consistent with API payloads.
3. Coverage: add fixture-driven tests that enqueue the sample payloads, then dequeue and re-serialise to verify byte-for-byte stability.
4. Expose queue depth/lease metrics with the identifiers provided by the models (`Run.Id`, `Schedule.Id`).
## JetStream failover notes
- `SchedulerQueueOptions.Kind = "nats"` will spin up `NatsSchedulerPlannerQueue` / `NatsSchedulerRunnerQueue` instances backed by JetStream.
- `SchedulerQueueHealthCheck` pings both planner and runner transports; register via `AddSchedulerQueueHealthCheck()` to surface in `/healthz`.
- Planner defaults:
```yaml
scheduler:
queue:
kind: nats
deadLetterEnabled: true
nats:
url: "nats://nats:4222"
planner:
stream: SCHEDULER_PLANNER
subject: scheduler.planner
durableConsumer: scheduler-planners
deadLetterStream: SCHEDULER_PLANNER_DEAD
deadLetterSubject: scheduler.planner.dead
runner:
stream: SCHEDULER_RUNNER
subject: scheduler.runner
durableConsumer: scheduler-runners
redis:
deadLetterStream: scheduler:planner:dead
idempotencyKeyPrefix: scheduler:planner:idemp:
```
- Retry / dead-letter semantics mirror the Redis adapter: attempts beyond `MaxDeliveryAttempts` are shipped to the configured dead-letter stream with headers describing `runId`, `scheduleId`, and failure reasons. Set `deadLetterEnabled: false` to drop exhausted messages instead.
- Depth metrics surface through `scheduler_queue_depth{transport,queue}`; both transports publish lightweight counters to drive alerting dashboards.
These notes unblock the queue guild now that SCHED-MODELS-16-102 is complete.

View File

@@ -0,0 +1,26 @@
using System.Collections.Generic;
namespace StellaOps.Scheduler.Queue.Redis;
internal interface IRedisSchedulerQueuePayload<TMessage>
{
string QueueName { get; }
string GetIdempotencyKey(TMessage message);
string Serialize(TMessage message);
TMessage Deserialize(string payload);
string GetRunId(TMessage message);
string GetTenantId(TMessage message);
string? GetScheduleId(TMessage message);
string? GetSegmentId(TMessage message);
string? GetCorrelationId(TMessage message);
IReadOnlyDictionary<string, string>? GetAttributes(TMessage message);
}

View File

@@ -0,0 +1,64 @@
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using StackExchange.Redis;
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Queue.Redis;
internal sealed class RedisSchedulerPlannerQueue
: RedisSchedulerQueueBase<PlannerQueueMessage>, ISchedulerPlannerQueue
{
public RedisSchedulerPlannerQueue(
SchedulerQueueOptions queueOptions,
SchedulerRedisQueueOptions redisOptions,
ILogger<RedisSchedulerPlannerQueue> logger,
TimeProvider timeProvider,
Func<ConfigurationOptions, Task<IConnectionMultiplexer>>? connectionFactory = null)
: base(
queueOptions,
redisOptions,
redisOptions.Planner,
PlannerPayload.Instance,
logger,
timeProvider,
connectionFactory)
{
}
private sealed class PlannerPayload : IRedisSchedulerQueuePayload<PlannerQueueMessage>
{
public static PlannerPayload Instance { get; } = new();
public string QueueName => "planner";
public string GetIdempotencyKey(PlannerQueueMessage message)
=> message.IdempotencyKey;
public string Serialize(PlannerQueueMessage message)
=> CanonicalJsonSerializer.Serialize(message);
public PlannerQueueMessage Deserialize(string payload)
=> CanonicalJsonSerializer.Deserialize<PlannerQueueMessage>(payload);
public string GetRunId(PlannerQueueMessage message)
=> message.Run.Id;
public string GetTenantId(PlannerQueueMessage message)
=> message.Run.TenantId;
public string? GetScheduleId(PlannerQueueMessage message)
=> message.ScheduleId;
public string? GetSegmentId(PlannerQueueMessage message)
=> null;
public string? GetCorrelationId(PlannerQueueMessage message)
=> message.CorrelationId;
public IReadOnlyDictionary<string, string>? GetAttributes(PlannerQueueMessage message)
=> null;
}
}

View File

@@ -0,0 +1,805 @@
using System;
using System.Buffers;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using StackExchange.Redis;
namespace StellaOps.Scheduler.Queue.Redis;
internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMessage>, IAsyncDisposable, ISchedulerQueueTransportDiagnostics
{
private const string TransportName = "redis";
private readonly SchedulerQueueOptions _queueOptions;
private readonly SchedulerRedisQueueOptions _redisOptions;
private readonly RedisSchedulerStreamOptions _streamOptions;
private readonly IRedisSchedulerQueuePayload<TMessage> _payload;
private readonly ILogger _logger;
private readonly TimeProvider _timeProvider;
private readonly Func<ConfigurationOptions, Task<IConnectionMultiplexer>> _connectionFactory;
private readonly SemaphoreSlim _connectionLock = new(1, 1);
private readonly SemaphoreSlim _groupInitLock = new(1, 1);
private long _approximateDepth;
private IConnectionMultiplexer? _connection;
private volatile bool _groupInitialized;
private bool _disposed;
protected RedisSchedulerQueueBase(
SchedulerQueueOptions queueOptions,
SchedulerRedisQueueOptions redisOptions,
RedisSchedulerStreamOptions streamOptions,
IRedisSchedulerQueuePayload<TMessage> payload,
ILogger logger,
TimeProvider timeProvider,
Func<ConfigurationOptions, Task<IConnectionMultiplexer>>? connectionFactory = null)
{
_queueOptions = queueOptions ?? throw new ArgumentNullException(nameof(queueOptions));
_redisOptions = redisOptions ?? throw new ArgumentNullException(nameof(redisOptions));
_streamOptions = streamOptions ?? throw new ArgumentNullException(nameof(streamOptions));
_payload = payload ?? throw new ArgumentNullException(nameof(payload));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_connectionFactory = connectionFactory ?? (config => Task.FromResult<IConnectionMultiplexer>(ConnectionMultiplexer.Connect(config)));
if (string.IsNullOrWhiteSpace(_redisOptions.ConnectionString))
{
throw new InvalidOperationException("Redis connection string must be configured for the scheduler queue.");
}
if (string.IsNullOrWhiteSpace(_streamOptions.Stream))
{
throw new InvalidOperationException("Redis stream name must be configured for the scheduler queue.");
}
if (string.IsNullOrWhiteSpace(_streamOptions.ConsumerGroup))
{
throw new InvalidOperationException("Redis consumer group must be configured for the scheduler queue.");
}
}
public async ValueTask<SchedulerQueueEnqueueResult> EnqueueAsync(
TMessage message,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(message);
cancellationToken.ThrowIfCancellationRequested();
var database = await GetDatabaseAsync(cancellationToken).ConfigureAwait(false);
await EnsureConsumerGroupAsync(database, cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var attempt = 1;
var entries = BuildEntries(message, now, attempt);
var messageId = await AddToStreamAsync(
database,
_streamOptions.Stream,
entries,
_streamOptions.ApproximateMaxLength,
_streamOptions.ApproximateMaxLength is not null)
.ConfigureAwait(false);
var idempotencyKey = BuildIdempotencyKey(_payload.GetIdempotencyKey(message));
var stored = await database.StringSetAsync(
idempotencyKey,
messageId,
when: When.NotExists,
expiry: _streamOptions.IdempotencyWindow)
.ConfigureAwait(false);
if (!stored)
{
await database.StreamDeleteAsync(_streamOptions.Stream, new RedisValue[] { messageId }).ConfigureAwait(false);
var existing = await database.StringGetAsync(idempotencyKey).ConfigureAwait(false);
var reusable = existing.IsNullOrEmpty ? messageId : existing;
SchedulerQueueMetrics.RecordDeduplicated(TransportName, _payload.QueueName);
_logger.LogDebug(
"Duplicate enqueue detected for scheduler queue {Queue} with key {Key}; returning existing stream id {StreamId}.",
_payload.QueueName,
idempotencyKey,
reusable.ToString());
PublishDepth();
return new SchedulerQueueEnqueueResult(reusable.ToString(), true);
}
SchedulerQueueMetrics.RecordEnqueued(TransportName, _payload.QueueName);
_logger.LogDebug(
"Enqueued {Queue} message into {Stream} with id {StreamId}.",
_payload.QueueName,
_streamOptions.Stream,
messageId.ToString());
IncrementDepth();
return new SchedulerQueueEnqueueResult(messageId.ToString(), false);
}
public async ValueTask<IReadOnlyList<ISchedulerQueueLease<TMessage>>> LeaseAsync(
SchedulerQueueLeaseRequest request,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(request);
cancellationToken.ThrowIfCancellationRequested();
var database = await GetDatabaseAsync(cancellationToken).ConfigureAwait(false);
await EnsureConsumerGroupAsync(database, cancellationToken).ConfigureAwait(false);
var entries = await database.StreamReadGroupAsync(
_streamOptions.Stream,
_streamOptions.ConsumerGroup,
request.Consumer,
position: ">",
count: request.BatchSize,
flags: CommandFlags.None)
.ConfigureAwait(false);
if (entries is null || entries.Length == 0)
{
PublishDepth();
return Array.Empty<ISchedulerQueueLease<TMessage>>();
}
var now = _timeProvider.GetUtcNow();
var leases = new List<ISchedulerQueueLease<TMessage>>(entries.Length);
foreach (var entry in entries)
{
var lease = TryMapLease(entry, request.Consumer, now, request.LeaseDuration, attemptOverride: null);
if (lease is null)
{
await HandlePoisonEntryAsync(database, entry.Id).ConfigureAwait(false);
continue;
}
leases.Add(lease);
}
PublishDepth();
return leases;
}
public async ValueTask<IReadOnlyList<ISchedulerQueueLease<TMessage>>> ClaimExpiredAsync(
SchedulerQueueClaimOptions options,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(options);
cancellationToken.ThrowIfCancellationRequested();
var database = await GetDatabaseAsync(cancellationToken).ConfigureAwait(false);
await EnsureConsumerGroupAsync(database, cancellationToken).ConfigureAwait(false);
var pending = await database.StreamPendingMessagesAsync(
_streamOptions.Stream,
_streamOptions.ConsumerGroup,
options.BatchSize,
RedisValue.Null,
(long)options.MinIdleTime.TotalMilliseconds)
.ConfigureAwait(false);
if (pending is null || pending.Length == 0)
{
return Array.Empty<ISchedulerQueueLease<TMessage>>();
}
var eligible = pending
.Where(info => info.IdleTimeInMilliseconds >= options.MinIdleTime.TotalMilliseconds)
.ToArray();
if (eligible.Length == 0)
{
return Array.Empty<ISchedulerQueueLease<TMessage>>();
}
var messageIds = eligible
.Select(info => (RedisValue)info.MessageId)
.ToArray();
var claimed = await database.StreamClaimAsync(
_streamOptions.Stream,
_streamOptions.ConsumerGroup,
options.ClaimantConsumer,
0,
messageIds,
CommandFlags.None)
.ConfigureAwait(false);
if (claimed is null || claimed.Length == 0)
{
PublishDepth();
return Array.Empty<ISchedulerQueueLease<TMessage>>();
}
var now = _timeProvider.GetUtcNow();
var attemptLookup = eligible.ToDictionary(
info => info.MessageId.IsNullOrEmpty ? string.Empty : info.MessageId.ToString(),
info => (int)Math.Max(1, info.DeliveryCount),
StringComparer.Ordinal);
var leases = new List<ISchedulerQueueLease<TMessage>>(claimed.Length);
foreach (var entry in claimed)
{
var entryId = entry.Id.ToString();
attemptLookup.TryGetValue(entryId, out var attempt);
var lease = TryMapLease(
entry,
options.ClaimantConsumer,
now,
_queueOptions.DefaultLeaseDuration,
attemptOverride: attempt);
if (lease is null)
{
await HandlePoisonEntryAsync(database, entry.Id).ConfigureAwait(false);
continue;
}
leases.Add(lease);
}
PublishDepth();
return leases;
}
public async ValueTask DisposeAsync()
{
if (_disposed)
{
return;
}
_disposed = true;
if (_connection is not null)
{
await _connection.CloseAsync();
_connection.Dispose();
}
_connectionLock.Dispose();
_groupInitLock.Dispose();
SchedulerQueueMetrics.RemoveDepth(TransportName, _payload.QueueName);
GC.SuppressFinalize(this);
}
internal async Task AcknowledgeAsync(
RedisSchedulerQueueLease<TMessage> lease,
CancellationToken cancellationToken)
{
if (!lease.TryBeginCompletion())
{
return;
}
var database = await GetDatabaseAsync(cancellationToken).ConfigureAwait(false);
await database.StreamAcknowledgeAsync(
_streamOptions.Stream,
_streamOptions.ConsumerGroup,
new RedisValue[] { lease.MessageId })
.ConfigureAwait(false);
await database.StreamDeleteAsync(
_streamOptions.Stream,
new RedisValue[] { lease.MessageId })
.ConfigureAwait(false);
SchedulerQueueMetrics.RecordAck(TransportName, _payload.QueueName);
DecrementDepth();
}
internal async Task RenewLeaseAsync(
RedisSchedulerQueueLease<TMessage> lease,
TimeSpan leaseDuration,
CancellationToken cancellationToken)
{
var database = await GetDatabaseAsync(cancellationToken).ConfigureAwait(false);
await database.StreamClaimAsync(
_streamOptions.Stream,
_streamOptions.ConsumerGroup,
lease.Consumer,
0,
new RedisValue[] { lease.MessageId },
CommandFlags.None)
.ConfigureAwait(false);
var expires = _timeProvider.GetUtcNow().Add(leaseDuration);
lease.RefreshLease(expires);
}
internal async Task ReleaseAsync(
RedisSchedulerQueueLease<TMessage> lease,
SchedulerQueueReleaseDisposition disposition,
CancellationToken cancellationToken)
{
if (disposition == SchedulerQueueReleaseDisposition.Retry
&& lease.Attempt >= _queueOptions.MaxDeliveryAttempts)
{
await DeadLetterAsync(
lease,
$"max-delivery-attempts:{lease.Attempt}",
cancellationToken).ConfigureAwait(false);
return;
}
if (!lease.TryBeginCompletion())
{
return;
}
var database = await GetDatabaseAsync(cancellationToken).ConfigureAwait(false);
await database.StreamAcknowledgeAsync(
_streamOptions.Stream,
_streamOptions.ConsumerGroup,
new RedisValue[] { lease.MessageId })
.ConfigureAwait(false);
await database.StreamDeleteAsync(
_streamOptions.Stream,
new RedisValue[] { lease.MessageId })
.ConfigureAwait(false);
SchedulerQueueMetrics.RecordAck(TransportName, _payload.QueueName);
DecrementDepth();
if (disposition == SchedulerQueueReleaseDisposition.Retry)
{
SchedulerQueueMetrics.RecordRetry(TransportName, _payload.QueueName);
lease.IncrementAttempt();
var backoff = CalculateBackoff(lease.Attempt);
if (backoff > TimeSpan.Zero)
{
try
{
await Task.Delay(backoff, cancellationToken).ConfigureAwait(false);
}
catch (TaskCanceledException)
{
return;
}
}
var now = _timeProvider.GetUtcNow();
var entries = BuildEntries(lease.Message, now, lease.Attempt);
await AddToStreamAsync(
database,
_streamOptions.Stream,
entries,
_streamOptions.ApproximateMaxLength,
_streamOptions.ApproximateMaxLength is not null)
.ConfigureAwait(false);
SchedulerQueueMetrics.RecordEnqueued(TransportName, _payload.QueueName);
IncrementDepth();
}
}
internal async Task DeadLetterAsync(
RedisSchedulerQueueLease<TMessage> lease,
string reason,
CancellationToken cancellationToken)
{
if (!lease.TryBeginCompletion())
{
return;
}
var database = await GetDatabaseAsync(cancellationToken).ConfigureAwait(false);
await database.StreamAcknowledgeAsync(
_streamOptions.Stream,
_streamOptions.ConsumerGroup,
new RedisValue[] { lease.MessageId })
.ConfigureAwait(false);
await database.StreamDeleteAsync(
_streamOptions.Stream,
new RedisValue[] { lease.MessageId })
.ConfigureAwait(false);
DecrementDepth();
if (!_queueOptions.DeadLetterEnabled)
{
_logger.LogWarning(
"Dropped {Queue} message {MessageId} after {Attempt} attempt(s); dead-letter disabled. Reason: {Reason}",
_payload.QueueName,
lease.MessageId,
lease.Attempt,
reason);
return;
}
var now = _timeProvider.GetUtcNow();
var entries = BuildEntries(lease.Message, now, lease.Attempt);
await AddToStreamAsync(
database,
_streamOptions.DeadLetterStream,
entries,
null,
false)
.ConfigureAwait(false);
SchedulerQueueMetrics.RecordDeadLetter(TransportName, _payload.QueueName);
_logger.LogError(
"Dead-lettered {Queue} message {MessageId} after {Attempt} attempt(s): {Reason}",
_payload.QueueName,
lease.MessageId,
lease.Attempt,
reason);
}
public async ValueTask PingAsync(CancellationToken cancellationToken)
{
var database = await GetDatabaseAsync(cancellationToken).ConfigureAwait(false);
await database.ExecuteAsync("PING").ConfigureAwait(false);
}
private string BuildIdempotencyKey(string key)
=> string.Concat(_streamOptions.IdempotencyKeyPrefix, key);
private TimeSpan CalculateBackoff(int attempt)
{
if (attempt <= 1)
{
return _queueOptions.RetryInitialBackoff > TimeSpan.Zero
? _queueOptions.RetryInitialBackoff
: TimeSpan.Zero;
}
var initial = _queueOptions.RetryInitialBackoff > TimeSpan.Zero
? _queueOptions.RetryInitialBackoff
: TimeSpan.Zero;
if (initial <= TimeSpan.Zero)
{
return TimeSpan.Zero;
}
var max = _queueOptions.RetryMaxBackoff > TimeSpan.Zero
? _queueOptions.RetryMaxBackoff
: initial;
var exponent = attempt - 1;
var scaledTicks = initial.Ticks * Math.Pow(2, exponent - 1);
var cappedTicks = Math.Min(max.Ticks, scaledTicks);
return TimeSpan.FromTicks((long)Math.Max(initial.Ticks, cappedTicks));
}
private async ValueTask<IDatabase> GetDatabaseAsync(CancellationToken cancellationToken)
{
if (_connection is not null)
{
return _connection.GetDatabase(_redisOptions.Database ?? -1);
}
await _connectionLock.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
if (_connection is null)
{
var config = ConfigurationOptions.Parse(_redisOptions.ConnectionString!);
config.AbortOnConnectFail = false;
config.ConnectTimeout = (int)_redisOptions.InitializationTimeout.TotalMilliseconds;
config.ConnectRetry = 3;
if (_redisOptions.Database is not null)
{
config.DefaultDatabase = _redisOptions.Database;
}
_connection = await _connectionFactory(config).ConfigureAwait(false);
}
}
finally
{
_connectionLock.Release();
}
return _connection.GetDatabase(_redisOptions.Database ?? -1);
}
private async Task EnsureConsumerGroupAsync(
IDatabase database,
CancellationToken cancellationToken)
{
if (_groupInitialized)
{
return;
}
await _groupInitLock.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
if (_groupInitialized)
{
return;
}
try
{
await database.StreamCreateConsumerGroupAsync(
_streamOptions.Stream,
_streamOptions.ConsumerGroup,
StreamPosition.Beginning,
createStream: true)
.ConfigureAwait(false);
}
catch (RedisServerException ex) when (ex.Message.Contains("BUSYGROUP", StringComparison.OrdinalIgnoreCase))
{
// Group already exists.
}
_groupInitialized = true;
}
finally
{
_groupInitLock.Release();
}
}
private NameValueEntry[] BuildEntries(
TMessage message,
DateTimeOffset enqueuedAt,
int attempt)
{
var attributes = _payload.GetAttributes(message);
var attributeCount = attributes?.Count ?? 0;
var entries = ArrayPool<NameValueEntry>.Shared.Rent(10 + attributeCount);
var index = 0;
entries[index++] = new NameValueEntry(SchedulerQueueFields.QueueKind, _payload.QueueName);
entries[index++] = new NameValueEntry(SchedulerQueueFields.RunId, _payload.GetRunId(message));
entries[index++] = new NameValueEntry(SchedulerQueueFields.TenantId, _payload.GetTenantId(message));
var scheduleId = _payload.GetScheduleId(message);
if (!string.IsNullOrWhiteSpace(scheduleId))
{
entries[index++] = new NameValueEntry(SchedulerQueueFields.ScheduleId, scheduleId);
}
var segmentId = _payload.GetSegmentId(message);
if (!string.IsNullOrWhiteSpace(segmentId))
{
entries[index++] = new NameValueEntry(SchedulerQueueFields.SegmentId, segmentId);
}
var correlationId = _payload.GetCorrelationId(message);
if (!string.IsNullOrWhiteSpace(correlationId))
{
entries[index++] = new NameValueEntry(SchedulerQueueFields.CorrelationId, correlationId);
}
entries[index++] = new NameValueEntry(SchedulerQueueFields.IdempotencyKey, _payload.GetIdempotencyKey(message));
entries[index++] = new NameValueEntry(SchedulerQueueFields.Attempt, attempt);
entries[index++] = new NameValueEntry(SchedulerQueueFields.EnqueuedAt, enqueuedAt.ToUnixTimeMilliseconds());
entries[index++] = new NameValueEntry(SchedulerQueueFields.Payload, _payload.Serialize(message));
if (attributeCount > 0 && attributes is not null)
{
foreach (var kvp in attributes)
{
entries[index++] = new NameValueEntry(
SchedulerQueueFields.AttributePrefix + kvp.Key,
kvp.Value);
}
}
var result = entries.AsSpan(0, index).ToArray();
ArrayPool<NameValueEntry>.Shared.Return(entries, clearArray: true);
return result;
}
private RedisSchedulerQueueLease<TMessage>? TryMapLease(
StreamEntry entry,
string consumer,
DateTimeOffset now,
TimeSpan leaseDuration,
int? attemptOverride)
{
if (entry.Values is null || entry.Values.Length == 0)
{
return null;
}
string? payload = null;
string? runId = null;
string? tenantId = null;
string? scheduleId = null;
string? segmentId = null;
string? correlationId = null;
string? idempotencyKey = null;
long? enqueuedAtUnix = null;
var attempt = attemptOverride ?? 1;
var attributes = new Dictionary<string, string>(StringComparer.Ordinal);
foreach (var field in entry.Values)
{
var name = field.Name.ToString();
var value = field.Value;
if (name.Equals(SchedulerQueueFields.Payload, StringComparison.Ordinal))
{
payload = value.ToString();
}
else if (name.Equals(SchedulerQueueFields.RunId, StringComparison.Ordinal))
{
runId = value.ToString();
}
else if (name.Equals(SchedulerQueueFields.TenantId, StringComparison.Ordinal))
{
tenantId = value.ToString();
}
else if (name.Equals(SchedulerQueueFields.ScheduleId, StringComparison.Ordinal))
{
scheduleId = NormalizeOptional(value.ToString());
}
else if (name.Equals(SchedulerQueueFields.SegmentId, StringComparison.Ordinal))
{
segmentId = NormalizeOptional(value.ToString());
}
else if (name.Equals(SchedulerQueueFields.CorrelationId, StringComparison.Ordinal))
{
correlationId = NormalizeOptional(value.ToString());
}
else if (name.Equals(SchedulerQueueFields.IdempotencyKey, StringComparison.Ordinal))
{
idempotencyKey = value.ToString();
}
else if (name.Equals(SchedulerQueueFields.EnqueuedAt, StringComparison.Ordinal))
{
if (long.TryParse(value.ToString(), out var unixMs))
{
enqueuedAtUnix = unixMs;
}
}
else if (name.Equals(SchedulerQueueFields.Attempt, StringComparison.Ordinal))
{
if (int.TryParse(value.ToString(), out var parsedAttempt))
{
attempt = attemptOverride.HasValue
? Math.Max(attemptOverride.Value, parsedAttempt)
: Math.Max(1, parsedAttempt);
}
}
else if (name.StartsWith(SchedulerQueueFields.AttributePrefix, StringComparison.Ordinal))
{
var key = name[SchedulerQueueFields.AttributePrefix.Length..];
attributes[key] = value.ToString();
}
}
if (payload is null || runId is null || tenantId is null || enqueuedAtUnix is null || idempotencyKey is null)
{
return null;
}
var message = _payload.Deserialize(payload);
var enqueuedAt = DateTimeOffset.FromUnixTimeMilliseconds(enqueuedAtUnix.Value);
var leaseExpires = now.Add(leaseDuration);
IReadOnlyDictionary<string, string> attributeView = attributes.Count == 0
? EmptyReadOnlyDictionary<string, string>.Instance
: new ReadOnlyDictionary<string, string>(attributes);
return new RedisSchedulerQueueLease<TMessage>(
this,
entry.Id.ToString(),
idempotencyKey,
runId,
tenantId,
scheduleId,
segmentId,
correlationId,
attributeView,
message,
attempt,
enqueuedAt,
leaseExpires,
consumer);
}
private async Task HandlePoisonEntryAsync(IDatabase database, RedisValue entryId)
{
await database.StreamAcknowledgeAsync(
_streamOptions.Stream,
_streamOptions.ConsumerGroup,
new RedisValue[] { entryId })
.ConfigureAwait(false);
await database.StreamDeleteAsync(
_streamOptions.Stream,
new RedisValue[] { entryId })
.ConfigureAwait(false);
}
private async Task<RedisValue> AddToStreamAsync(
IDatabase database,
RedisKey stream,
NameValueEntry[] entries,
int? maxLength,
bool useApproximateLength)
{
var capacity = 4 + (entries.Length * 2);
var args = new List<object>(capacity)
{
stream
};
if (maxLength.HasValue)
{
args.Add("MAXLEN");
if (useApproximateLength)
{
args.Add("~");
}
args.Add(maxLength.Value);
}
args.Add("*");
for (var i = 0; i < entries.Length; i++)
{
args.Add(entries[i].Name);
args.Add(entries[i].Value);
}
var result = await database.ExecuteAsync("XADD", args.ToArray()).ConfigureAwait(false);
return (RedisValue)result!;
}
private void IncrementDepth()
{
var depth = Interlocked.Increment(ref _approximateDepth);
SchedulerQueueMetrics.RecordDepth(TransportName, _payload.QueueName, depth);
}
private void DecrementDepth()
{
var depth = Interlocked.Decrement(ref _approximateDepth);
if (depth < 0)
{
depth = Interlocked.Exchange(ref _approximateDepth, 0);
}
SchedulerQueueMetrics.RecordDepth(TransportName, _payload.QueueName, depth);
}
private void PublishDepth()
{
var depth = Volatile.Read(ref _approximateDepth);
SchedulerQueueMetrics.RecordDepth(TransportName, _payload.QueueName, depth);
}
private static string? NormalizeOptional(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return null;
}
return value;
}
private sealed class EmptyReadOnlyDictionary<TKey, TValue>
where TKey : notnull
{
public static readonly IReadOnlyDictionary<TKey, TValue> Instance =
new ReadOnlyDictionary<TKey, TValue>(new Dictionary<TKey, TValue>(0, EqualityComparer<TKey>.Default));
}
}

View File

@@ -0,0 +1,91 @@
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
namespace StellaOps.Scheduler.Queue.Redis;
internal sealed class RedisSchedulerQueueLease<TMessage> : ISchedulerQueueLease<TMessage>
{
private readonly RedisSchedulerQueueBase<TMessage> _queue;
private int _completed;
internal RedisSchedulerQueueLease(
RedisSchedulerQueueBase<TMessage> queue,
string messageId,
string idempotencyKey,
string runId,
string tenantId,
string? scheduleId,
string? segmentId,
string? correlationId,
IReadOnlyDictionary<string, string> attributes,
TMessage message,
int attempt,
DateTimeOffset enqueuedAt,
DateTimeOffset leaseExpiresAt,
string consumer)
{
_queue = queue;
MessageId = messageId;
IdempotencyKey = idempotencyKey;
RunId = runId;
TenantId = tenantId;
ScheduleId = scheduleId;
SegmentId = segmentId;
CorrelationId = correlationId;
Attributes = attributes;
Message = message;
Attempt = attempt;
EnqueuedAt = enqueuedAt;
LeaseExpiresAt = leaseExpiresAt;
Consumer = consumer;
}
public string MessageId { get; }
public string IdempotencyKey { get; }
public string RunId { get; }
public string TenantId { get; }
public string? ScheduleId { get; }
public string? SegmentId { get; }
public string? CorrelationId { get; }
public IReadOnlyDictionary<string, string> Attributes { get; }
public TMessage Message { get; }
public int Attempt { get; private set; }
public DateTimeOffset EnqueuedAt { get; }
public DateTimeOffset LeaseExpiresAt { get; private set; }
public string Consumer { get; }
public Task AcknowledgeAsync(CancellationToken cancellationToken = default)
=> _queue.AcknowledgeAsync(this, cancellationToken);
public Task RenewAsync(TimeSpan leaseDuration, CancellationToken cancellationToken = default)
=> _queue.RenewLeaseAsync(this, leaseDuration, cancellationToken);
public Task ReleaseAsync(SchedulerQueueReleaseDisposition disposition, CancellationToken cancellationToken = default)
=> _queue.ReleaseAsync(this, disposition, cancellationToken);
public Task DeadLetterAsync(string reason, CancellationToken cancellationToken = default)
=> _queue.DeadLetterAsync(this, reason, cancellationToken);
internal bool TryBeginCompletion()
=> Interlocked.CompareExchange(ref _completed, 1, 0) == 0;
internal void RefreshLease(DateTimeOffset expiresAt)
=> LeaseExpiresAt = expiresAt;
internal void IncrementAttempt()
=> Attempt++;
}

View File

@@ -0,0 +1,90 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using StackExchange.Redis;
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Queue.Redis;
internal sealed class RedisSchedulerRunnerQueue
: RedisSchedulerQueueBase<RunnerSegmentQueueMessage>, ISchedulerRunnerQueue
{
public RedisSchedulerRunnerQueue(
SchedulerQueueOptions queueOptions,
SchedulerRedisQueueOptions redisOptions,
ILogger<RedisSchedulerRunnerQueue> logger,
TimeProvider timeProvider,
Func<ConfigurationOptions, Task<IConnectionMultiplexer>>? connectionFactory = null)
: base(
queueOptions,
redisOptions,
redisOptions.Runner,
RunnerPayload.Instance,
logger,
timeProvider,
connectionFactory)
{
}
private sealed class RunnerPayload : IRedisSchedulerQueuePayload<RunnerSegmentQueueMessage>
{
public static RunnerPayload Instance { get; } = new();
public string QueueName => "runner";
public string GetIdempotencyKey(RunnerSegmentQueueMessage message)
=> message.IdempotencyKey;
public string Serialize(RunnerSegmentQueueMessage message)
=> CanonicalJsonSerializer.Serialize(message);
public RunnerSegmentQueueMessage Deserialize(string payload)
=> CanonicalJsonSerializer.Deserialize<RunnerSegmentQueueMessage>(payload);
public string GetRunId(RunnerSegmentQueueMessage message)
=> message.RunId;
public string GetTenantId(RunnerSegmentQueueMessage message)
=> message.TenantId;
public string? GetScheduleId(RunnerSegmentQueueMessage message)
=> message.ScheduleId;
public string? GetSegmentId(RunnerSegmentQueueMessage message)
=> message.SegmentId;
public string? GetCorrelationId(RunnerSegmentQueueMessage message)
=> message.CorrelationId;
public IReadOnlyDictionary<string, string>? GetAttributes(RunnerSegmentQueueMessage message)
{
if (message.Attributes.Count == 0 && message.ImageDigests.Count == 0)
{
return null;
}
// Ensure digests remain accessible without deserializing the entire payload.
var map = new Dictionary<string, string>(message.Attributes, StringComparer.Ordinal);
map["imageDigestCount"] = message.ImageDigests.Count.ToString();
// populate first few digests for quick inspection (bounded)
var take = Math.Min(message.ImageDigests.Count, 5);
for (var i = 0; i < take; i++)
{
map[$"digest{i}"] = message.ImageDigests[i];
}
if (message.RatePerSecond.HasValue)
{
map["ratePerSecond"] = message.RatePerSecond.Value.ToString();
}
map["usageOnly"] = message.UsageOnly ? "true" : "false";
return map;
}
}
}

View File

@@ -0,0 +1,274 @@
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Text.Json.Serialization;
using System.Threading;
using System.Threading.Tasks;
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Queue;
public sealed class PlannerQueueMessage
{
[JsonConstructor]
public PlannerQueueMessage(
Run run,
ImpactSet impactSet,
Schedule? schedule = null,
string? correlationId = null)
{
Run = run ?? throw new ArgumentNullException(nameof(run));
ImpactSet = impactSet ?? throw new ArgumentNullException(nameof(impactSet));
if (schedule is not null && string.IsNullOrWhiteSpace(schedule.Id))
{
throw new ArgumentException("Schedule must have a valid identifier.", nameof(schedule));
}
if (!string.IsNullOrWhiteSpace(correlationId))
{
correlationId = correlationId!.Trim();
}
Schedule = schedule;
CorrelationId = string.IsNullOrWhiteSpace(correlationId) ? null : correlationId;
}
public Run Run { get; }
public ImpactSet ImpactSet { get; }
public Schedule? Schedule { get; }
public string? CorrelationId { get; }
public string IdempotencyKey => Run.Id;
public string TenantId => Run.TenantId;
public string? ScheduleId => Run.ScheduleId;
}
public sealed class RunnerSegmentQueueMessage
{
private readonly ReadOnlyCollection<string> _imageDigests;
private readonly IReadOnlyDictionary<string, string> _attributes;
[JsonConstructor]
public RunnerSegmentQueueMessage(
string segmentId,
string runId,
string tenantId,
IReadOnlyList<string> imageDigests,
string? scheduleId = null,
int? ratePerSecond = null,
bool usageOnly = true,
IReadOnlyDictionary<string, string>? attributes = null,
string? correlationId = null)
{
if (string.IsNullOrWhiteSpace(segmentId))
{
throw new ArgumentException("Segment identifier must be provided.", nameof(segmentId));
}
if (string.IsNullOrWhiteSpace(runId))
{
throw new ArgumentException("Run identifier must be provided.", nameof(runId));
}
if (string.IsNullOrWhiteSpace(tenantId))
{
throw new ArgumentException("Tenant identifier must be provided.", nameof(tenantId));
}
SegmentId = segmentId;
RunId = runId;
TenantId = tenantId;
ScheduleId = string.IsNullOrWhiteSpace(scheduleId) ? null : scheduleId;
RatePerSecond = ratePerSecond;
UsageOnly = usageOnly;
CorrelationId = string.IsNullOrWhiteSpace(correlationId) ? null : correlationId;
_imageDigests = new ReadOnlyCollection<string>(NormalizeDigests(imageDigests));
_attributes = attributes is null
? EmptyReadOnlyDictionary<string, string>.Instance
: new ReadOnlyDictionary<string, string>(new Dictionary<string, string>(attributes, StringComparer.Ordinal));
}
public string SegmentId { get; }
public string RunId { get; }
public string TenantId { get; }
public string? ScheduleId { get; }
public int? RatePerSecond { get; }
public bool UsageOnly { get; }
public string? CorrelationId { get; }
public IReadOnlyList<string> ImageDigests => _imageDigests;
public IReadOnlyDictionary<string, string> Attributes => _attributes;
public string IdempotencyKey => SegmentId;
private static List<string> NormalizeDigests(IReadOnlyList<string> digests)
{
if (digests is null)
{
throw new ArgumentNullException(nameof(digests));
}
var list = new List<string>();
foreach (var digest in digests)
{
if (string.IsNullOrWhiteSpace(digest))
{
continue;
}
list.Add(digest.Trim());
}
if (list.Count == 0)
{
throw new ArgumentException("At least one image digest must be provided.", nameof(digests));
}
return list;
}
private sealed class EmptyReadOnlyDictionary<TKey, TValue>
where TKey : notnull
{
public static readonly IReadOnlyDictionary<TKey, TValue> Instance =
new ReadOnlyDictionary<TKey, TValue>(new Dictionary<TKey, TValue>(0, EqualityComparer<TKey>.Default));
}
}
public readonly record struct SchedulerQueueEnqueueResult(string MessageId, bool Deduplicated);
public sealed class SchedulerQueueLeaseRequest
{
public SchedulerQueueLeaseRequest(string consumer, int batchSize, TimeSpan leaseDuration)
{
if (string.IsNullOrWhiteSpace(consumer))
{
throw new ArgumentException("Consumer identifier must be provided.", nameof(consumer));
}
if (batchSize <= 0)
{
throw new ArgumentOutOfRangeException(nameof(batchSize), batchSize, "Batch size must be positive.");
}
if (leaseDuration <= TimeSpan.Zero)
{
throw new ArgumentOutOfRangeException(nameof(leaseDuration), leaseDuration, "Lease duration must be positive.");
}
Consumer = consumer;
BatchSize = batchSize;
LeaseDuration = leaseDuration;
}
public string Consumer { get; }
public int BatchSize { get; }
public TimeSpan LeaseDuration { get; }
}
public sealed class SchedulerQueueClaimOptions
{
public SchedulerQueueClaimOptions(string claimantConsumer, int batchSize, TimeSpan minIdleTime)
{
if (string.IsNullOrWhiteSpace(claimantConsumer))
{
throw new ArgumentException("Consumer identifier must be provided.", nameof(claimantConsumer));
}
if (batchSize <= 0)
{
throw new ArgumentOutOfRangeException(nameof(batchSize), batchSize, "Batch size must be positive.");
}
if (minIdleTime < TimeSpan.Zero)
{
throw new ArgumentOutOfRangeException(nameof(minIdleTime), minIdleTime, "Idle time cannot be negative.");
}
ClaimantConsumer = claimantConsumer;
BatchSize = batchSize;
MinIdleTime = minIdleTime;
}
public string ClaimantConsumer { get; }
public int BatchSize { get; }
public TimeSpan MinIdleTime { get; }
}
public enum SchedulerQueueReleaseDisposition
{
Retry,
Abandon
}
public interface ISchedulerQueue<TMessage>
{
ValueTask<SchedulerQueueEnqueueResult> EnqueueAsync(TMessage message, CancellationToken cancellationToken = default);
ValueTask<IReadOnlyList<ISchedulerQueueLease<TMessage>>> LeaseAsync(SchedulerQueueLeaseRequest request, CancellationToken cancellationToken = default);
ValueTask<IReadOnlyList<ISchedulerQueueLease<TMessage>>> ClaimExpiredAsync(SchedulerQueueClaimOptions options, CancellationToken cancellationToken = default);
}
public interface ISchedulerQueueLease<out TMessage>
{
string MessageId { get; }
int Attempt { get; }
DateTimeOffset EnqueuedAt { get; }
DateTimeOffset LeaseExpiresAt { get; }
string Consumer { get; }
string TenantId { get; }
string RunId { get; }
string? ScheduleId { get; }
string? SegmentId { get; }
string? CorrelationId { get; }
string IdempotencyKey { get; }
IReadOnlyDictionary<string, string> Attributes { get; }
TMessage Message { get; }
Task AcknowledgeAsync(CancellationToken cancellationToken = default);
Task RenewAsync(TimeSpan leaseDuration, CancellationToken cancellationToken = default);
Task ReleaseAsync(SchedulerQueueReleaseDisposition disposition, CancellationToken cancellationToken = default);
Task DeadLetterAsync(string reason, CancellationToken cancellationToken = default);
}
public interface ISchedulerPlannerQueue : ISchedulerQueue<PlannerQueueMessage>
{
}
public interface ISchedulerRunnerQueue : ISchedulerQueue<RunnerSegmentQueueMessage>
{
}

View File

@@ -0,0 +1,16 @@
namespace StellaOps.Scheduler.Queue;
internal static class SchedulerQueueFields
{
public const string Payload = "payload";
public const string Attempt = "attempt";
public const string EnqueuedAt = "enqueuedAt";
public const string IdempotencyKey = "idempotency";
public const string RunId = "runId";
public const string TenantId = "tenantId";
public const string ScheduleId = "scheduleId";
public const string SegmentId = "segmentId";
public const string QueueKind = "queueKind";
public const string CorrelationId = "correlationId";
public const string AttributePrefix = "attr:";
}

View File

@@ -0,0 +1,72 @@
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Diagnostics.HealthChecks;
using Microsoft.Extensions.Logging;
namespace StellaOps.Scheduler.Queue;
public sealed class SchedulerQueueHealthCheck : IHealthCheck
{
private readonly ISchedulerPlannerQueue _plannerQueue;
private readonly ISchedulerRunnerQueue _runnerQueue;
private readonly ILogger<SchedulerQueueHealthCheck> _logger;
public SchedulerQueueHealthCheck(
ISchedulerPlannerQueue plannerQueue,
ISchedulerRunnerQueue runnerQueue,
ILogger<SchedulerQueueHealthCheck> logger)
{
_plannerQueue = plannerQueue ?? throw new ArgumentNullException(nameof(plannerQueue));
_runnerQueue = runnerQueue ?? throw new ArgumentNullException(nameof(runnerQueue));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<HealthCheckResult> CheckHealthAsync(
HealthCheckContext context,
CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
var failures = new List<string>();
if (!await ProbeAsync(_plannerQueue, "planner", cancellationToken).ConfigureAwait(false))
{
failures.Add("planner transport unreachable");
}
if (!await ProbeAsync(_runnerQueue, "runner", cancellationToken).ConfigureAwait(false))
{
failures.Add("runner transport unreachable");
}
if (failures.Count == 0)
{
return HealthCheckResult.Healthy("Scheduler queues reachable.");
}
var description = string.Join("; ", failures);
return new HealthCheckResult(
context.Registration.FailureStatus,
description);
}
private async Task<bool> ProbeAsync(object queue, string label, CancellationToken cancellationToken)
{
try
{
if (queue is ISchedulerQueueTransportDiagnostics diagnostics)
{
await diagnostics.PingAsync(cancellationToken).ConfigureAwait(false);
}
return true;
}
catch (Exception ex)
{
_logger.LogError(ex, "Scheduler {Label} queue transport ping failed.", label);
return false;
}
}
}

View File

@@ -0,0 +1,65 @@
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics.Metrics;
using System.Linq;
namespace StellaOps.Scheduler.Queue;
internal static class SchedulerQueueMetrics
{
private const string TransportTagName = "transport";
private const string QueueTagName = "queue";
private static readonly Meter Meter = new("StellaOps.Scheduler.Queue");
private static readonly Counter<long> EnqueuedCounter = Meter.CreateCounter<long>("scheduler_queue_enqueued_total");
private static readonly Counter<long> DeduplicatedCounter = Meter.CreateCounter<long>("scheduler_queue_deduplicated_total");
private static readonly Counter<long> AckCounter = Meter.CreateCounter<long>("scheduler_queue_ack_total");
private static readonly Counter<long> RetryCounter = Meter.CreateCounter<long>("scheduler_queue_retry_total");
private static readonly Counter<long> DeadLetterCounter = Meter.CreateCounter<long>("scheduler_queue_deadletter_total");
private static readonly ConcurrentDictionary<(string transport, string queue), long> DepthSamples = new();
private static readonly ObservableGauge<long> DepthGauge = Meter.CreateObservableGauge<long>(
"scheduler_queue_depth",
ObserveDepth);
public static void RecordEnqueued(string transport, string queue)
=> EnqueuedCounter.Add(1, BuildTags(transport, queue));
public static void RecordDeduplicated(string transport, string queue)
=> DeduplicatedCounter.Add(1, BuildTags(transport, queue));
public static void RecordAck(string transport, string queue)
=> AckCounter.Add(1, BuildTags(transport, queue));
public static void RecordRetry(string transport, string queue)
=> RetryCounter.Add(1, BuildTags(transport, queue));
public static void RecordDeadLetter(string transport, string queue)
=> DeadLetterCounter.Add(1, BuildTags(transport, queue));
public static void RecordDepth(string transport, string queue, long depth)
=> DepthSamples[(transport, queue)] = depth;
public static void RemoveDepth(string transport, string queue)
=> DepthSamples.TryRemove((transport, queue), out _);
internal static IReadOnlyDictionary<(string transport, string queue), long> SnapshotDepths()
=> DepthSamples.ToDictionary(pair => pair.Key, pair => pair.Value);
private static KeyValuePair<string, object?>[] BuildTags(string transport, string queue)
=> new[]
{
new KeyValuePair<string, object?>(TransportTagName, transport),
new KeyValuePair<string, object?>(QueueTagName, queue)
};
private static IEnumerable<Measurement<long>> ObserveDepth()
{
foreach (var sample in DepthSamples)
{
yield return new Measurement<long>(
sample.Value,
new KeyValuePair<string, object?>(TransportTagName, sample.Key.transport),
new KeyValuePair<string, object?>(QueueTagName, sample.Key.queue));
}
}
}

View File

@@ -0,0 +1,134 @@
using System;
namespace StellaOps.Scheduler.Queue;
public sealed class SchedulerQueueOptions
{
public SchedulerQueueTransportKind Kind { get; set; } = SchedulerQueueTransportKind.Redis;
public SchedulerRedisQueueOptions Redis { get; set; } = new();
public SchedulerNatsQueueOptions Nats { get; set; } = new();
/// <summary>
/// Default lease/visibility window applied when callers do not override the duration.
/// </summary>
public TimeSpan DefaultLeaseDuration { get; set; } = TimeSpan.FromMinutes(5);
/// <summary>
/// Maximum number of deliveries before a message is shunted to the dead-letter stream.
/// </summary>
public int MaxDeliveryAttempts { get; set; } = 5;
/// <summary>
/// Enables routing exhausted deliveries to the configured dead-letter transport.
/// When disabled, messages exceeding the attempt budget are dropped after acknowledgement.
/// </summary>
public bool DeadLetterEnabled { get; set; } = true;
/// <summary>
/// Base retry delay used when a message is released for retry.
/// </summary>
public TimeSpan RetryInitialBackoff { get; set; } = TimeSpan.FromSeconds(5);
/// <summary>
/// Cap applied to the retry delay when exponential backoff is used.
/// </summary>
public TimeSpan RetryMaxBackoff { get; set; } = TimeSpan.FromMinutes(1);
}
public sealed class SchedulerRedisQueueOptions
{
public string? ConnectionString { get; set; }
public int? Database { get; set; }
public TimeSpan InitializationTimeout { get; set; } = TimeSpan.FromSeconds(30);
public RedisSchedulerStreamOptions Planner { get; set; } = RedisSchedulerStreamOptions.ForPlanner();
public RedisSchedulerStreamOptions Runner { get; set; } = RedisSchedulerStreamOptions.ForRunner();
}
public sealed class RedisSchedulerStreamOptions
{
public string Stream { get; set; } = string.Empty;
public string ConsumerGroup { get; set; } = string.Empty;
public string DeadLetterStream { get; set; } = string.Empty;
public string IdempotencyKeyPrefix { get; set; } = string.Empty;
public TimeSpan IdempotencyWindow { get; set; } = TimeSpan.FromHours(12);
public int? ApproximateMaxLength { get; set; }
public static RedisSchedulerStreamOptions ForPlanner()
=> new()
{
Stream = "scheduler:planner",
ConsumerGroup = "scheduler-planners",
DeadLetterStream = "scheduler:planner:dead",
IdempotencyKeyPrefix = "scheduler:planner:idemp:"
};
public static RedisSchedulerStreamOptions ForRunner()
=> new()
{
Stream = "scheduler:runner",
ConsumerGroup = "scheduler-runners",
DeadLetterStream = "scheduler:runner:dead",
IdempotencyKeyPrefix = "scheduler:runner:idemp:"
};
}
public sealed class SchedulerNatsQueueOptions
{
public string? Url { get; set; }
public TimeSpan IdleHeartbeat { get; set; } = TimeSpan.FromSeconds(30);
public SchedulerNatsStreamOptions Planner { get; set; } = SchedulerNatsStreamOptions.ForPlanner();
public SchedulerNatsStreamOptions Runner { get; set; } = SchedulerNatsStreamOptions.ForRunner();
}
public sealed class SchedulerNatsStreamOptions
{
public string Stream { get; set; } = string.Empty;
public string Subject { get; set; } = string.Empty;
public string DurableConsumer { get; set; } = string.Empty;
public string DeadLetterStream { get; set; } = string.Empty;
public string DeadLetterSubject { get; set; } = string.Empty;
public int MaxAckPending { get; set; } = 64;
public TimeSpan AckWait { get; set; } = TimeSpan.FromMinutes(5);
public TimeSpan RetryDelay { get; set; } = TimeSpan.FromSeconds(10);
public static SchedulerNatsStreamOptions ForPlanner()
=> new()
{
Stream = "SCHEDULER_PLANNER",
Subject = "scheduler.planner",
DurableConsumer = "scheduler-planners",
DeadLetterStream = "SCHEDULER_PLANNER_DEAD",
DeadLetterSubject = "scheduler.planner.dead"
};
public static SchedulerNatsStreamOptions ForRunner()
=> new()
{
Stream = "SCHEDULER_RUNNER",
Subject = "scheduler.runner",
DurableConsumer = "scheduler-runners",
DeadLetterStream = "SCHEDULER_RUNNER_DEAD",
DeadLetterSubject = "scheduler.runner.dead"
};
}

View File

@@ -0,0 +1,88 @@
using System;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using Microsoft.Extensions.Diagnostics.HealthChecks;
using Microsoft.Extensions.Logging;
using StellaOps.Scheduler.Queue.Nats;
using StellaOps.Scheduler.Queue.Redis;
namespace StellaOps.Scheduler.Queue;
public static class SchedulerQueueServiceCollectionExtensions
{
public static IServiceCollection AddSchedulerQueues(
this IServiceCollection services,
IConfiguration configuration,
string sectionName = "scheduler:queue")
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
var options = new SchedulerQueueOptions();
configuration.GetSection(sectionName).Bind(options);
services.TryAddSingleton(TimeProvider.System);
services.AddSingleton(options);
services.AddSingleton<ISchedulerPlannerQueue>(sp =>
{
var loggerFactory = sp.GetRequiredService<ILoggerFactory>();
var timeProvider = sp.GetService<TimeProvider>() ?? TimeProvider.System;
return options.Kind switch
{
SchedulerQueueTransportKind.Redis => new RedisSchedulerPlannerQueue(
options,
options.Redis,
loggerFactory.CreateLogger<RedisSchedulerPlannerQueue>(),
timeProvider),
SchedulerQueueTransportKind.Nats => new NatsSchedulerPlannerQueue(
options,
options.Nats,
loggerFactory.CreateLogger<NatsSchedulerPlannerQueue>(),
timeProvider),
_ => throw new InvalidOperationException($"Unsupported scheduler queue transport '{options.Kind}'.")
};
});
services.AddSingleton<ISchedulerRunnerQueue>(sp =>
{
var loggerFactory = sp.GetRequiredService<ILoggerFactory>();
var timeProvider = sp.GetService<TimeProvider>() ?? TimeProvider.System;
return options.Kind switch
{
SchedulerQueueTransportKind.Redis => new RedisSchedulerRunnerQueue(
options,
options.Redis,
loggerFactory.CreateLogger<RedisSchedulerRunnerQueue>(),
timeProvider),
SchedulerQueueTransportKind.Nats => new NatsSchedulerRunnerQueue(
options,
options.Nats,
loggerFactory.CreateLogger<NatsSchedulerRunnerQueue>(),
timeProvider),
_ => throw new InvalidOperationException($"Unsupported scheduler queue transport '{options.Kind}'.")
};
});
services.AddSingleton<SchedulerQueueHealthCheck>();
return services;
}
public static IHealthChecksBuilder AddSchedulerQueueHealthCheck(
this IHealthChecksBuilder builder)
{
ArgumentNullException.ThrowIfNull(builder);
builder.Services.TryAddSingleton<SchedulerQueueHealthCheck>();
builder.AddCheck<SchedulerQueueHealthCheck>(
name: "scheduler-queue",
failureStatus: HealthStatus.Unhealthy,
tags: new[] { "scheduler", "queue" });
return builder;
}
}

View File

@@ -0,0 +1,10 @@
namespace StellaOps.Scheduler.Queue;
/// <summary>
/// Transport backends supported by the scheduler queue abstraction.
/// </summary>
public enum SchedulerQueueTransportKind
{
Redis = 0,
Nats = 1,
}

View File

@@ -0,0 +1,21 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="StackExchange.Redis" Version="2.8.24" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0-rc.2.25502.107" />
<PackageReference Include="Microsoft.Extensions.Configuration.Abstractions" Version="10.0.0-rc.2.25502.107" />
<PackageReference Include="Microsoft.Extensions.Configuration.Binder" Version="10.0.0-rc.2.25502.107" />
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="10.0.0-rc.2.25502.107" />
<PackageReference Include="Microsoft.Extensions.Diagnostics.HealthChecks" Version="10.0.0-rc.2.25502.107" />
<PackageReference Include="Microsoft.Extensions.Diagnostics.HealthChecks.Abstractions" Version="10.0.0-rc.2.25502.107" />
<PackageReference Include="NATS.Client.Core" Version="2.0.0" />
<PackageReference Include="NATS.Client.JetStream" Version="2.0.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Scheduler.Models\StellaOps.Scheduler.Models.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,9 @@
# Scheduler Queue Task Board (Sprint 16)
> **Status note (2025-10-19):** Scheduler DTOs and sample payloads are now available (SCHED-MODELS-16-102). Queue tasks remain pending on this board.
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|----|--------|----------|------------|-------------|---------------|
| SCHED-QUEUE-16-401 | DONE (2025-10-20) | Scheduler Queue Guild | SCHED-MODELS-16-101 | Implement queue abstraction + Redis Streams adapter (planner inputs, runner segments) with ack/lease semantics. | Integration tests cover enqueue/dequeue/ack; lease renewal implemented; ordering preserved. |
| SCHED-QUEUE-16-402 | DONE (2025-10-20) | Scheduler Queue Guild | SCHED-QUEUE-16-401 | Add NATS JetStream adapter with configuration binding, health probes, failover. | Health endpoints verified; failover documented; adapter tested. |
| SCHED-QUEUE-16-403 | DONE (2025-10-20) | Scheduler Queue Guild | SCHED-QUEUE-16-401 | Dead-letter handling + metrics (queue depth, retry counts), configuration toggles. | Dead-letter policy tested; metrics exported; docs updated. |