using System; using System.Collections.Generic; using System.Collections.ObjectModel; using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.Logging; using NATS.Client.Core; using NATS.Client.JetStream; using NATS.Client.JetStream.Models; namespace StellaOps.Scanner.Queue.Nats; internal sealed class NatsScanQueue : IScanQueue, IAsyncDisposable { private const string TransportName = "nats"; private static readonly INatsSerializer PayloadSerializer = NatsRawSerializer.Default; private readonly ScannerQueueOptions _queueOptions; private readonly NatsQueueOptions _options; private readonly ILogger _logger; private readonly TimeProvider _timeProvider; private readonly SemaphoreSlim _connectionGate = new(1, 1); private readonly Func> _connectionFactory; private NatsConnection? _connection; private NatsJSContext? _jsContext; private INatsJSConsumer? _consumer; private bool _disposed; public NatsScanQueue( ScannerQueueOptions queueOptions, NatsQueueOptions options, ILogger logger, TimeProvider timeProvider, Func>? connectionFactory = null) { _queueOptions = queueOptions ?? throw new ArgumentNullException(nameof(queueOptions)); _options = options ?? throw new ArgumentNullException(nameof(options)); _logger = logger ?? throw new ArgumentNullException(nameof(logger)); _timeProvider = timeProvider ?? TimeProvider.System; _connectionFactory = connectionFactory ?? ((opts, cancellationToken) => new ValueTask(new NatsConnection(opts))); if (string.IsNullOrWhiteSpace(_options.Url)) { throw new InvalidOperationException("NATS connection URL must be configured for the scanner queue."); } } public async ValueTask EnqueueAsync( ScanQueueMessage message, CancellationToken cancellationToken = default) { ArgumentNullException.ThrowIfNull(message); var js = await GetJetStreamAsync(cancellationToken).ConfigureAwait(false); await EnsureStreamAndConsumerAsync(js, cancellationToken).ConfigureAwait(false); var idempotencyKey = message.IdempotencyKey ?? message.JobId; var headers = BuildHeaders(message, idempotencyKey); var publishOpts = new NatsJSPubOpts { MsgId = idempotencyKey, RetryAttempts = 0 }; var ack = await js.PublishAsync( _options.Subject, message.Payload.ToArray(), PayloadSerializer, publishOpts, headers, cancellationToken) .ConfigureAwait(false); if (ack.Duplicate) { _logger.LogDebug( "Duplicate NATS enqueue detected for job {JobId} (token {Token}).", message.JobId, idempotencyKey); QueueMetrics.RecordDeduplicated(TransportName); return new QueueEnqueueResult(ack.Seq.ToString(), true); } QueueMetrics.RecordEnqueued(TransportName); _logger.LogDebug( "Enqueued job {JobId} into NATS stream {Stream} with sequence {Sequence}.", message.JobId, ack.Stream, ack.Seq); return new QueueEnqueueResult(ack.Seq.ToString(), false); } public async ValueTask> LeaseAsync( QueueLeaseRequest request, CancellationToken cancellationToken = default) { ArgumentNullException.ThrowIfNull(request); var js = await GetJetStreamAsync(cancellationToken).ConfigureAwait(false); var consumer = await EnsureStreamAndConsumerAsync(js, cancellationToken).ConfigureAwait(false); var fetchOpts = new NatsJSFetchOpts { MaxMsgs = request.BatchSize, Expires = request.LeaseDuration, IdleHeartbeat = _options.IdleHeartbeat }; var now = _timeProvider.GetUtcNow(); var leases = new List(capacity: request.BatchSize); await foreach (var msg in consumer.FetchAsync(PayloadSerializer, fetchOpts, cancellationToken).ConfigureAwait(false)) { var lease = CreateLease(msg, request.Consumer, now, request.LeaseDuration); if (lease is not null) { leases.Add(lease); } } return leases; } public async ValueTask> ClaimExpiredLeasesAsync( QueueClaimOptions options, CancellationToken cancellationToken = default) { ArgumentNullException.ThrowIfNull(options); var js = await GetJetStreamAsync(cancellationToken).ConfigureAwait(false); var consumer = await EnsureStreamAndConsumerAsync(js, cancellationToken).ConfigureAwait(false); var fetchOpts = new NatsJSFetchOpts { MaxMsgs = options.BatchSize, Expires = options.MinIdleTime, IdleHeartbeat = _options.IdleHeartbeat }; var now = _timeProvider.GetUtcNow(); var leases = new List(options.BatchSize); await foreach (var msg in consumer.FetchAsync(PayloadSerializer, fetchOpts, cancellationToken).ConfigureAwait(false)) { var deliveries = (int)(msg.Metadata?.NumDelivered ?? 1); if (deliveries <= 1) { // Fresh message; surface back to queue and continue. await msg.NakAsync(new AckOpts(), TimeSpan.Zero, cancellationToken).ConfigureAwait(false); continue; } var lease = CreateLease(msg, options.ClaimantConsumer, now, _queueOptions.DefaultLeaseDuration); if (lease is not null) { leases.Add(lease); } } return leases; } public async ValueTask DisposeAsync() { if (_disposed) { return; } _disposed = true; if (_connection is not null) { await _connection.DisposeAsync().ConfigureAwait(false); } _connectionGate.Dispose(); GC.SuppressFinalize(this); } internal async Task AcknowledgeAsync( NatsScanQueueLease lease, CancellationToken cancellationToken) { if (!lease.TryBeginCompletion()) { return; } await lease.Message.AckAsync(new AckOpts(), cancellationToken).ConfigureAwait(false); QueueMetrics.RecordAck(TransportName); _logger.LogDebug( "Acknowledged job {JobId} (seq {Seq}).", lease.JobId, lease.MessageId); } internal async Task RenewLeaseAsync( NatsScanQueueLease lease, TimeSpan leaseDuration, CancellationToken cancellationToken) { await lease.Message.AckProgressAsync(new AckOpts(), cancellationToken).ConfigureAwait(false); var expires = _timeProvider.GetUtcNow().Add(leaseDuration); lease.RefreshLease(expires); _logger.LogDebug( "Renewed NATS lease for job {JobId} until {Expires:u}.", lease.JobId, expires); } internal async Task ReleaseAsync( NatsScanQueueLease lease, QueueReleaseDisposition disposition, CancellationToken cancellationToken) { if (disposition == QueueReleaseDisposition.Retry && lease.Attempt >= _queueOptions.MaxDeliveryAttempts) { _logger.LogWarning( "Job {JobId} reached max delivery attempts ({Attempts}); shipping to dead-letter stream.", lease.JobId, lease.Attempt); await DeadLetterAsync( lease, $"max-delivery-attempts:{lease.Attempt}", cancellationToken).ConfigureAwait(false); return; } if (!lease.TryBeginCompletion()) { return; } if (disposition == QueueReleaseDisposition.Retry) { QueueMetrics.RecordRetry(TransportName); var delay = CalculateBackoff(lease.Attempt); await lease.Message.NakAsync(new AckOpts(), delay, cancellationToken).ConfigureAwait(false); _logger.LogWarning( "Rescheduled job {JobId} via NATS NAK with delay {Delay} (attempt {Attempt}).", lease.JobId, delay, lease.Attempt); } else { await lease.Message.AckTerminateAsync(new AckOpts(), cancellationToken).ConfigureAwait(false); QueueMetrics.RecordAck(TransportName); _logger.LogInformation( "Abandoned job {JobId} after {Attempt} attempt(s).", lease.JobId, lease.Attempt); } } internal async Task DeadLetterAsync( NatsScanQueueLease lease, string reason, CancellationToken cancellationToken) { if (!lease.TryBeginCompletion()) { return; } await lease.Message.AckAsync(new AckOpts(), cancellationToken).ConfigureAwait(false); var js = await GetJetStreamAsync(cancellationToken).ConfigureAwait(false); await EnsureDeadLetterStreamAsync(js, cancellationToken).ConfigureAwait(false); var headers = BuildDeadLetterHeaders(lease, reason); await js.PublishAsync( _options.DeadLetterSubject, lease.Payload.ToArray(), PayloadSerializer, new NatsJSPubOpts(), headers, cancellationToken) .ConfigureAwait(false); QueueMetrics.RecordDeadLetter(TransportName); _logger.LogError( "Dead-lettered job {JobId} (attempt {Attempt}): {Reason}", lease.JobId, lease.Attempt, reason); } private async Task GetJetStreamAsync(CancellationToken cancellationToken) { if (_jsContext is not null) { return _jsContext; } var connection = await EnsureConnectionAsync(cancellationToken).ConfigureAwait(false); await _connectionGate.WaitAsync(cancellationToken).ConfigureAwait(false); try { _jsContext ??= new NatsJSContext(connection); return _jsContext; } finally { _connectionGate.Release(); } } private async ValueTask EnsureStreamAndConsumerAsync( NatsJSContext js, CancellationToken cancellationToken) { if (_consumer is not null) { return _consumer; } await _connectionGate.WaitAsync(cancellationToken).ConfigureAwait(false); try { if (_consumer is not null) { return _consumer; } await EnsureStreamAsync(js, cancellationToken).ConfigureAwait(false); await EnsureDeadLetterStreamAsync(js, cancellationToken).ConfigureAwait(false); var consumerConfig = new ConsumerConfig { DurableName = _options.DurableConsumer, AckPolicy = ConsumerConfigAckPolicy.Explicit, ReplayPolicy = ConsumerConfigReplayPolicy.Instant, DeliverPolicy = ConsumerConfigDeliverPolicy.All, AckWait = ToNanoseconds(_options.AckWait), MaxAckPending = _options.MaxInFlight, MaxDeliver = Math.Max(1, _queueOptions.MaxDeliveryAttempts), FilterSubjects = new[] { _options.Subject } }; try { _consumer = await js.CreateConsumerAsync( _options.Stream, consumerConfig, cancellationToken) .ConfigureAwait(false); } catch (NatsJSApiException apiEx) { _logger.LogDebug(apiEx, "CreateConsumerAsync failed with code {Code}; attempting to fetch existing durable consumer {Durable}.", apiEx.Error?.Code, _options.DurableConsumer); _consumer = await js.GetConsumerAsync( _options.Stream, _options.DurableConsumer, cancellationToken) .ConfigureAwait(false); } return _consumer; } finally { _connectionGate.Release(); } } private async Task EnsureConnectionAsync(CancellationToken cancellationToken) { if (_connection is not null) { return _connection; } await _connectionGate.WaitAsync(cancellationToken).ConfigureAwait(false); try { if (_connection is not null) { return _connection; } var opts = new NatsOpts { Url = _options.Url!, Name = "stellaops-scanner-queue", CommandTimeout = TimeSpan.FromSeconds(10), RequestTimeout = TimeSpan.FromSeconds(20), PingInterval = TimeSpan.FromSeconds(30) }; _connection = await _connectionFactory(opts, cancellationToken).ConfigureAwait(false); await _connection.ConnectAsync().ConfigureAwait(false); return _connection; } finally { _connectionGate.Release(); } } private async Task EnsureStreamAsync(NatsJSContext js, CancellationToken cancellationToken) { try { await js.GetStreamAsync( _options.Stream, new StreamInfoRequest(), cancellationToken) .ConfigureAwait(false); } catch (NatsJSApiException) { var config = new StreamConfig( name: _options.Stream, subjects: new[] { _options.Subject }) { Retention = StreamConfigRetention.Workqueue, Storage = StreamConfigStorage.File, MaxConsumers = -1, MaxMsgs = -1, MaxBytes = -1, MaxAge = 0 }; await js.CreateStreamAsync(config, cancellationToken).ConfigureAwait(false); _logger.LogInformation("Created NATS JetStream stream {Stream} ({Subject}).", _options.Stream, _options.Subject); } } private async Task EnsureDeadLetterStreamAsync(NatsJSContext js, CancellationToken cancellationToken) { try { await js.GetStreamAsync( _options.DeadLetterStream, new StreamInfoRequest(), cancellationToken) .ConfigureAwait(false); } catch (NatsJSApiException) { var config = new StreamConfig( name: _options.DeadLetterStream, subjects: new[] { _options.DeadLetterSubject }) { Retention = StreamConfigRetention.Workqueue, Storage = StreamConfigStorage.File, MaxConsumers = -1, MaxMsgs = -1, MaxBytes = -1, MaxAge = ToNanoseconds(_queueOptions.DeadLetter.Retention) }; await js.CreateStreamAsync(config, cancellationToken).ConfigureAwait(false); _logger.LogInformation("Created NATS dead-letter stream {Stream} ({Subject}).", _options.DeadLetterStream, _options.DeadLetterSubject); } } internal async ValueTask PingAsync(CancellationToken cancellationToken) { var connection = await EnsureConnectionAsync(cancellationToken).ConfigureAwait(false); await connection.PingAsync(cancellationToken).ConfigureAwait(false); } private NatsScanQueueLease? CreateLease( NatsJSMsg message, string consumer, DateTimeOffset now, TimeSpan leaseDuration) { var headers = message.Headers; if (headers is null) { return null; } if (!headers.TryGetValue(QueueEnvelopeFields.JobId, out var jobIdValues) || jobIdValues.Count == 0) { return null; } var jobId = jobIdValues[0]!; var idempotencyKey = headers.TryGetValue(QueueEnvelopeFields.IdempotencyKey, out var idemValues) && idemValues.Count > 0 ? idemValues[0] : null; var traceId = headers.TryGetValue(QueueEnvelopeFields.TraceId, out var traceValues) && traceValues.Count > 0 ? string.IsNullOrWhiteSpace(traceValues[0]) ? null : traceValues[0] : null; var enqueuedAt = headers.TryGetValue(QueueEnvelopeFields.EnqueuedAt, out var enqueuedValues) && enqueuedValues.Count > 0 && long.TryParse(enqueuedValues[0], out var unix) ? DateTimeOffset.FromUnixTimeMilliseconds(unix) : now; var attempt = headers.TryGetValue(QueueEnvelopeFields.Attempt, out var attemptValues) && attemptValues.Count > 0 && int.TryParse(attemptValues[0], out var parsedAttempt) ? parsedAttempt : 1; if (message.Metadata?.NumDelivered is ulong delivered && delivered > 0) { var deliveredInt = delivered > int.MaxValue ? int.MaxValue : (int)delivered; if (deliveredInt > attempt) { attempt = deliveredInt; } } var leaseExpires = now.Add(leaseDuration); var attributes = ExtractAttributes(headers); var messageId = message.Metadata?.Sequence.Stream.ToString() ?? Guid.NewGuid().ToString("n"); return new NatsScanQueueLease( this, message, messageId, jobId, message.Data ?? Array.Empty(), attempt, enqueuedAt, leaseExpires, consumer, idempotencyKey, traceId, attributes); } private static IReadOnlyDictionary ExtractAttributes(NatsHeaders headers) { var attributes = new Dictionary(StringComparer.Ordinal); foreach (var key in headers.Keys) { if (!key.StartsWith(QueueEnvelopeFields.AttributePrefix, StringComparison.Ordinal)) { continue; } if (headers.TryGetValue(key, out var values) && values.Count > 0) { attributes[key[QueueEnvelopeFields.AttributePrefix.Length..]] = values[0]!; } } return attributes.Count == 0 ? EmptyReadOnlyDictionary.Instance : new ReadOnlyDictionary(attributes); } private NatsHeaders BuildHeaders(ScanQueueMessage message, string idempotencyKey) { var headers = new NatsHeaders { { QueueEnvelopeFields.JobId, message.JobId }, { QueueEnvelopeFields.IdempotencyKey, idempotencyKey }, { QueueEnvelopeFields.Attempt, "1" }, { QueueEnvelopeFields.EnqueuedAt, _timeProvider.GetUtcNow().ToUnixTimeMilliseconds().ToString() } }; if (!string.IsNullOrEmpty(message.TraceId)) { headers.Add(QueueEnvelopeFields.TraceId, message.TraceId!); } if (message.Attributes is not null) { foreach (var kvp in message.Attributes) { headers.Add(QueueEnvelopeFields.AttributePrefix + kvp.Key, kvp.Value); } } return headers; } private NatsHeaders BuildDeadLetterHeaders(NatsScanQueueLease lease, string reason) { var headers = new NatsHeaders { { QueueEnvelopeFields.JobId, lease.JobId }, { QueueEnvelopeFields.IdempotencyKey, lease.IdempotencyKey ?? lease.JobId }, { QueueEnvelopeFields.Attempt, lease.Attempt.ToString() }, { QueueEnvelopeFields.EnqueuedAt, lease.EnqueuedAt.ToUnixTimeMilliseconds().ToString() }, { "deadletter-reason", reason } }; if (!string.IsNullOrWhiteSpace(lease.TraceId)) { headers.Add(QueueEnvelopeFields.TraceId, lease.TraceId!); } foreach (var kvp in lease.Attributes) { headers.Add(QueueEnvelopeFields.AttributePrefix + kvp.Key, kvp.Value); } return headers; } private TimeSpan CalculateBackoff(int attempt) { var configuredInitial = _options.RetryDelay > TimeSpan.Zero ? _options.RetryDelay : _queueOptions.RetryInitialBackoff; if (configuredInitial <= TimeSpan.Zero) { return TimeSpan.Zero; } if (attempt <= 1) { return configuredInitial; } var max = _queueOptions.RetryMaxBackoff > TimeSpan.Zero ? _queueOptions.RetryMaxBackoff : configuredInitial; var exponent = attempt - 1; var scaledTicks = configuredInitial.Ticks * Math.Pow(2, exponent - 1); var cappedTicks = Math.Min(max.Ticks, scaledTicks); var resultTicks = Math.Max(configuredInitial.Ticks, (long)cappedTicks); return TimeSpan.FromTicks(resultTicks); } private static long ToNanoseconds(TimeSpan timeSpan) => timeSpan <= TimeSpan.Zero ? 0 : timeSpan.Ticks * 100L; private static class EmptyReadOnlyDictionary where TKey : notnull { public static readonly IReadOnlyDictionary Instance = new ReadOnlyDictionary(new Dictionary(0, EqualityComparer.Default)); } }