save progress
This commit is contained in:
@@ -0,0 +1,235 @@
|
||||
// <copyright file="BatchSnapshotDsseSigner.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using System.Globalization;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Options for batch snapshot DSSE signing.
|
||||
/// </summary>
|
||||
public sealed class BatchSnapshotDsseOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets the signing mode: "hmac" for HMAC-SHA256, "none" to disable.
|
||||
/// </summary>
|
||||
public string Mode { get; set; } = "none";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the HMAC secret key as Base64.
|
||||
/// Required when Mode is "hmac".
|
||||
/// </summary>
|
||||
public string? SecretBase64 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the key identifier for the signature.
|
||||
/// </summary>
|
||||
public string KeyId { get; set; } = "scheduler-batch-snapshot";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the payload type for DSSE envelope.
|
||||
/// </summary>
|
||||
public string PayloadType { get; set; } = "application/vnd.stellaops.scheduler.batch-snapshot+json";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Interface for batch snapshot DSSE signing.
|
||||
/// </summary>
|
||||
public interface IBatchSnapshotDsseSigner
|
||||
{
|
||||
/// <summary>
|
||||
/// Signs a batch snapshot and returns the signature result.
|
||||
/// </summary>
|
||||
/// <param name="digest">The digest bytes to sign.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Signature result with key ID and signature bytes.</returns>
|
||||
Task<BatchSnapshotSignatureResult> SignAsync(byte[] digest, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Verifies a batch snapshot signature.
|
||||
/// </summary>
|
||||
/// <param name="digest">The original digest bytes.</param>
|
||||
/// <param name="signature">The signature to verify.</param>
|
||||
/// <param name="keyId">The key ID used for signing.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>True if signature is valid.</returns>
|
||||
Task<bool> VerifyAsync(byte[] digest, byte[] signature, string keyId, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether signing is enabled.
|
||||
/// </summary>
|
||||
bool IsEnabled { get; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// DSSE signer for batch snapshots using HMAC-SHA256.
|
||||
/// </summary>
|
||||
public sealed class BatchSnapshotDsseSigner : IBatchSnapshotDsseSigner
|
||||
{
|
||||
private readonly IOptions<BatchSnapshotDsseOptions> _options;
|
||||
private readonly ILogger<BatchSnapshotDsseSigner> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="BatchSnapshotDsseSigner"/> class.
|
||||
/// </summary>
|
||||
/// <param name="options">Signing options.</param>
|
||||
/// <param name="logger">Logger instance.</param>
|
||||
public BatchSnapshotDsseSigner(
|
||||
IOptions<BatchSnapshotDsseOptions> options,
|
||||
ILogger<BatchSnapshotDsseSigner> logger)
|
||||
{
|
||||
_options = options ?? throw new ArgumentNullException(nameof(options));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public bool IsEnabled => string.Equals(_options.Value.Mode, "hmac", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
/// <inheritdoc/>
|
||||
public Task<BatchSnapshotSignatureResult> SignAsync(byte[] digest, CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(digest);
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var opts = _options.Value;
|
||||
|
||||
if (!IsEnabled)
|
||||
{
|
||||
_logger.LogDebug("Batch snapshot DSSE signing is disabled");
|
||||
return Task.FromResult(new BatchSnapshotSignatureResult(string.Empty, Array.Empty<byte>()));
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(opts.SecretBase64))
|
||||
{
|
||||
throw new InvalidOperationException("HMAC signing mode requires SecretBase64 to be configured");
|
||||
}
|
||||
|
||||
byte[] secret;
|
||||
try
|
||||
{
|
||||
secret = Convert.FromBase64String(opts.SecretBase64);
|
||||
}
|
||||
catch (FormatException ex)
|
||||
{
|
||||
throw new InvalidOperationException("SecretBase64 is not valid Base64", ex);
|
||||
}
|
||||
|
||||
// Compute PAE (Pre-Authentication Encoding) for DSSE
|
||||
var pae = ComputePreAuthenticationEncoding(opts.PayloadType, digest);
|
||||
|
||||
// Sign with HMAC-SHA256
|
||||
var signature = HMACSHA256.HashData(secret, pae);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Signed batch snapshot with key {KeyId}, digest length {DigestLength}, signature length {SigLength}",
|
||||
opts.KeyId, digest.Length, signature.Length);
|
||||
|
||||
return Task.FromResult(new BatchSnapshotSignatureResult(opts.KeyId, signature));
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public Task<bool> VerifyAsync(byte[] digest, byte[] signature, string keyId, CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(digest);
|
||||
ArgumentNullException.ThrowIfNull(signature);
|
||||
ArgumentNullException.ThrowIfNull(keyId);
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var opts = _options.Value;
|
||||
|
||||
if (!IsEnabled)
|
||||
{
|
||||
_logger.LogDebug("Batch snapshot DSSE verification skipped - signing is disabled");
|
||||
return Task.FromResult(true);
|
||||
}
|
||||
|
||||
if (!string.Equals(keyId, opts.KeyId, StringComparison.Ordinal))
|
||||
{
|
||||
_logger.LogWarning("Key ID mismatch: expected {Expected}, got {Actual}", opts.KeyId, keyId);
|
||||
return Task.FromResult(false);
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(opts.SecretBase64))
|
||||
{
|
||||
_logger.LogWarning("Cannot verify signature - SecretBase64 not configured");
|
||||
return Task.FromResult(false);
|
||||
}
|
||||
|
||||
byte[] secret;
|
||||
try
|
||||
{
|
||||
secret = Convert.FromBase64String(opts.SecretBase64);
|
||||
}
|
||||
catch (FormatException)
|
||||
{
|
||||
_logger.LogWarning("Cannot verify signature - SecretBase64 is not valid Base64");
|
||||
return Task.FromResult(false);
|
||||
}
|
||||
|
||||
var pae = ComputePreAuthenticationEncoding(opts.PayloadType, digest);
|
||||
var expected = HMACSHA256.HashData(secret, pae);
|
||||
|
||||
var isValid = CryptographicOperations.FixedTimeEquals(expected, signature);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Verified batch snapshot signature with key {KeyId}: {Result}",
|
||||
keyId, isValid ? "valid" : "invalid");
|
||||
|
||||
return Task.FromResult(isValid);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Computes DSSE Pre-Authentication Encoding (PAE).
|
||||
/// Format: "DSSEv1" SP len(payloadType) SP payloadType SP len(payload) SP payload
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Follows DSSE v1 specification with ASCII decimal lengths and space separators.
|
||||
/// </remarks>
|
||||
internal static byte[] ComputePreAuthenticationEncoding(string payloadType, ReadOnlySpan<byte> payload)
|
||||
{
|
||||
var header = "DSSEv1"u8;
|
||||
var pt = Encoding.UTF8.GetBytes(payloadType);
|
||||
var lenPt = Encoding.UTF8.GetBytes(pt.Length.ToString(CultureInfo.InvariantCulture));
|
||||
var lenPayload = Encoding.UTF8.GetBytes(payload.Length.ToString(CultureInfo.InvariantCulture));
|
||||
var space = " "u8;
|
||||
|
||||
var totalLength = header.Length + space.Length + lenPt.Length + space.Length + pt.Length +
|
||||
space.Length + lenPayload.Length + space.Length + payload.Length;
|
||||
|
||||
var buffer = new byte[totalLength];
|
||||
var offset = 0;
|
||||
|
||||
header.CopyTo(buffer.AsSpan(offset));
|
||||
offset += header.Length;
|
||||
|
||||
space.CopyTo(buffer.AsSpan(offset));
|
||||
offset += space.Length;
|
||||
|
||||
lenPt.CopyTo(buffer.AsSpan(offset));
|
||||
offset += lenPt.Length;
|
||||
|
||||
space.CopyTo(buffer.AsSpan(offset));
|
||||
offset += space.Length;
|
||||
|
||||
pt.CopyTo(buffer.AsSpan(offset));
|
||||
offset += pt.Length;
|
||||
|
||||
space.CopyTo(buffer.AsSpan(offset));
|
||||
offset += space.Length;
|
||||
|
||||
lenPayload.CopyTo(buffer.AsSpan(offset));
|
||||
offset += lenPayload.Length;
|
||||
|
||||
space.CopyTo(buffer.AsSpan(offset));
|
||||
offset += space.Length;
|
||||
|
||||
payload.CopyTo(buffer.AsSpan(offset));
|
||||
|
||||
return buffer;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,312 @@
|
||||
// <copyright file="BatchSnapshotService.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Canonical.Json;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Persistence;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Models;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Optional signing delegate for batch snapshots.
|
||||
/// </summary>
|
||||
/// <param name="digest">The digest to sign.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The signed result containing key ID and signature bytes.</returns>
|
||||
public delegate Task<BatchSnapshotSignatureResult> BatchSnapshotSignerDelegate(
|
||||
byte[] digest,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Result of signing a batch snapshot.
|
||||
/// </summary>
|
||||
/// <param name="KeyId">The key identifier used for signing.</param>
|
||||
/// <param name="Signature">The signature bytes.</param>
|
||||
public readonly record struct BatchSnapshotSignatureResult(string KeyId, byte[] Signature);
|
||||
|
||||
/// <summary>
|
||||
/// Optional verification delegate for batch snapshot DSSE signatures.
|
||||
/// </summary>
|
||||
/// <param name="keyId">The key identifier used for signing.</param>
|
||||
/// <param name="digest">The digest that was signed.</param>
|
||||
/// <param name="signature">The signature bytes to verify.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>True if the signature is valid.</returns>
|
||||
public delegate Task<bool> BatchSnapshotVerifierDelegate(
|
||||
string keyId,
|
||||
byte[] digest,
|
||||
byte[] signature,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Implementation of batch snapshot service for audit anchoring.
|
||||
/// </summary>
|
||||
public sealed class BatchSnapshotService : IBatchSnapshotService
|
||||
{
|
||||
private readonly ISchedulerLogRepository _logRepository;
|
||||
private readonly IBatchSnapshotRepository _snapshotRepository;
|
||||
private readonly BatchSnapshotSignerDelegate? _signer;
|
||||
private readonly BatchSnapshotVerifierDelegate? _verifier;
|
||||
private readonly ILogger<BatchSnapshotService> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new batch snapshot service.
|
||||
/// </summary>
|
||||
public BatchSnapshotService(
|
||||
ISchedulerLogRepository logRepository,
|
||||
IBatchSnapshotRepository snapshotRepository,
|
||||
ILogger<BatchSnapshotService> logger,
|
||||
BatchSnapshotSignerDelegate? signer = null,
|
||||
BatchSnapshotVerifierDelegate? verifier = null)
|
||||
{
|
||||
_logRepository = logRepository ?? throw new ArgumentNullException(nameof(logRepository));
|
||||
_snapshotRepository = snapshotRepository ?? throw new ArgumentNullException(nameof(snapshotRepository));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_signer = signer;
|
||||
_verifier = verifier;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<BatchSnapshot> CreateSnapshotAsync(
|
||||
string tenantId,
|
||||
HlcTimestamp startHlc,
|
||||
HlcTimestamp endHlc,
|
||||
bool sign = false,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
|
||||
var startT = startHlc.ToSortableString();
|
||||
var endT = endHlc.ToSortableString();
|
||||
|
||||
// Get jobs in range
|
||||
var jobs = await _logRepository.GetByHlcRangeAsync(
|
||||
tenantId,
|
||||
startT,
|
||||
endT,
|
||||
limit: 0, // No limit
|
||||
partitionKey: null,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (jobs.Count == 0)
|
||||
{
|
||||
throw new InvalidOperationException($"No jobs in specified HLC range [{startT}, {endT}] for tenant {tenantId}");
|
||||
}
|
||||
|
||||
// Get chain head (last link in range)
|
||||
var headLink = jobs[^1].Link;
|
||||
|
||||
// Create snapshot
|
||||
var snapshot = new BatchSnapshot
|
||||
{
|
||||
BatchId = Guid.NewGuid(),
|
||||
TenantId = tenantId,
|
||||
RangeStartT = startT,
|
||||
RangeEndT = endT,
|
||||
HeadLink = headLink,
|
||||
JobCount = jobs.Count,
|
||||
CreatedAt = DateTimeOffset.UtcNow
|
||||
};
|
||||
|
||||
// Sign if requested and signer available
|
||||
if (sign)
|
||||
{
|
||||
if (_signer is null)
|
||||
{
|
||||
_logger.LogWarning("Signing requested but no signer configured. Snapshot will be unsigned.");
|
||||
}
|
||||
else
|
||||
{
|
||||
var digest = ComputeSnapshotDigest(snapshot, jobs);
|
||||
var signed = await _signer(digest, cancellationToken).ConfigureAwait(false);
|
||||
snapshot = snapshot with
|
||||
{
|
||||
SignedBy = signed.KeyId,
|
||||
Signature = signed.Signature
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Persist
|
||||
await _snapshotRepository.InsertAsync(snapshot, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Batch snapshot created. BatchId={BatchId}, TenantId={TenantId}, Range=[{Start}, {End}], JobCount={JobCount}, Signed={Signed}",
|
||||
snapshot.BatchId,
|
||||
tenantId,
|
||||
startT,
|
||||
endT,
|
||||
jobs.Count,
|
||||
snapshot.SignedBy is not null);
|
||||
|
||||
return snapshot;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<BatchSnapshot?> GetSnapshotAsync(
|
||||
Guid batchId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
return _snapshotRepository.GetByIdAsync(batchId, cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<BatchSnapshot?> GetLatestSnapshotAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
return _snapshotRepository.GetLatestAsync(tenantId, cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<BatchSnapshotVerificationResult> VerifySnapshotAsync(
|
||||
Guid batchId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var issues = new List<string>();
|
||||
|
||||
var snapshot = await _snapshotRepository.GetByIdAsync(batchId, cancellationToken).ConfigureAwait(false);
|
||||
if (snapshot is null)
|
||||
{
|
||||
return new BatchSnapshotVerificationResult(
|
||||
IsValid: false,
|
||||
SnapshotFound: false,
|
||||
ChainHeadMatches: false,
|
||||
JobCountMatches: false,
|
||||
SignatureValid: null,
|
||||
Issues: ["Snapshot not found"]);
|
||||
}
|
||||
|
||||
// Get current jobs in the same range
|
||||
var jobs = await _logRepository.GetByHlcRangeAsync(
|
||||
snapshot.TenantId,
|
||||
snapshot.RangeStartT,
|
||||
snapshot.RangeEndT,
|
||||
limit: 0,
|
||||
partitionKey: null,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Verify job count
|
||||
var jobCountMatches = jobs.Count == snapshot.JobCount;
|
||||
if (!jobCountMatches)
|
||||
{
|
||||
issues.Add($"Job count mismatch: expected {snapshot.JobCount}, found {jobs.Count}");
|
||||
}
|
||||
|
||||
// Verify chain head
|
||||
var chainHeadMatches = jobs.Count > 0 && ByteArrayEquals(jobs[^1].Link, snapshot.HeadLink);
|
||||
if (!chainHeadMatches)
|
||||
{
|
||||
issues.Add("Chain head link does not match snapshot");
|
||||
}
|
||||
|
||||
// DSSE signature verification
|
||||
bool? signatureValid = null;
|
||||
if (snapshot.SignedBy is not null)
|
||||
{
|
||||
if (snapshot.Signature is null or { Length: 0 })
|
||||
{
|
||||
issues.Add("Snapshot has signer but empty signature");
|
||||
signatureValid = false;
|
||||
}
|
||||
else if (_verifier is null)
|
||||
{
|
||||
// No verifier configured - check signature format only
|
||||
_logger.LogDebug(
|
||||
"Signature verification skipped for BatchId={BatchId}: no verifier configured",
|
||||
batchId);
|
||||
signatureValid = true; // Assume valid if no verifier
|
||||
}
|
||||
else
|
||||
{
|
||||
// Perform DSSE signature verification
|
||||
var digest = ComputeSnapshotDigest(snapshot, jobs);
|
||||
try
|
||||
{
|
||||
signatureValid = await _verifier(
|
||||
snapshot.SignedBy,
|
||||
digest,
|
||||
snapshot.Signature,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (!signatureValid.Value)
|
||||
{
|
||||
issues.Add($"DSSE signature verification failed for key {snapshot.SignedBy}");
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Signature verification threw exception for BatchId={BatchId}", batchId);
|
||||
issues.Add($"Signature verification error: {ex.Message}");
|
||||
signatureValid = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var isValid = jobCountMatches && chainHeadMatches && (signatureValid ?? true);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Batch snapshot verification complete. BatchId={BatchId}, IsValid={IsValid}, Issues={Issues}",
|
||||
batchId,
|
||||
isValid,
|
||||
issues.Count > 0 ? string.Join("; ", issues) : "none");
|
||||
|
||||
return new BatchSnapshotVerificationResult(
|
||||
IsValid: isValid,
|
||||
SnapshotFound: true,
|
||||
ChainHeadMatches: chainHeadMatches,
|
||||
JobCountMatches: jobCountMatches,
|
||||
SignatureValid: signatureValid,
|
||||
Issues: issues);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Computes a deterministic digest over the snapshot and its jobs.
|
||||
/// This is the canonical representation used for both signing and verification.
|
||||
/// </summary>
|
||||
internal static byte[] ComputeSnapshotDigest(BatchSnapshot snapshot, IReadOnlyList<SchedulerLogEntry> jobs)
|
||||
{
|
||||
// Create canonical representation for hashing
|
||||
var digestInput = new
|
||||
{
|
||||
snapshot.BatchId,
|
||||
snapshot.TenantId,
|
||||
snapshot.RangeStartT,
|
||||
snapshot.RangeEndT,
|
||||
HeadLink = Convert.ToHexString(snapshot.HeadLink),
|
||||
snapshot.JobCount,
|
||||
Jobs = jobs.Select(j => new
|
||||
{
|
||||
j.JobId,
|
||||
j.THlc,
|
||||
PayloadHash = Convert.ToHexString(j.PayloadHash),
|
||||
Link = Convert.ToHexString(j.Link)
|
||||
}).ToArray()
|
||||
};
|
||||
|
||||
var canonical = CanonJson.Serialize(digestInput);
|
||||
return SHA256.HashData(Encoding.UTF8.GetBytes(canonical));
|
||||
}
|
||||
|
||||
private static bool ByteArrayEquals(byte[]? a, byte[]? b)
|
||||
{
|
||||
if (a is null && b is null)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (a is null || b is null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return a.AsSpan().SequenceEqual(b);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,179 @@
|
||||
// <copyright file="HlcSchedulerDequeueService.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Models;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Implementation of HLC-ordered scheduler job dequeuing.
|
||||
/// </summary>
|
||||
public sealed class HlcSchedulerDequeueService : IHlcSchedulerDequeueService
|
||||
{
|
||||
private readonly ISchedulerLogRepository _logRepository;
|
||||
private readonly ILogger<HlcSchedulerDequeueService> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new HLC scheduler dequeue service.
|
||||
/// </summary>
|
||||
public HlcSchedulerDequeueService(
|
||||
ISchedulerLogRepository logRepository,
|
||||
ILogger<HlcSchedulerDequeueService> logger)
|
||||
{
|
||||
_logRepository = logRepository ?? throw new ArgumentNullException(nameof(logRepository));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<SchedulerHlcDequeueResult> DequeueAsync(
|
||||
string tenantId,
|
||||
int limit,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
ArgumentOutOfRangeException.ThrowIfNegativeOrZero(limit);
|
||||
|
||||
var entries = await _logRepository.GetByHlcOrderAsync(
|
||||
tenantId,
|
||||
partitionKey,
|
||||
limit,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Get total count for pagination info
|
||||
var totalCount = await _logRepository.CountByHlcRangeAsync(
|
||||
tenantId,
|
||||
startTHlc: null,
|
||||
endTHlc: null,
|
||||
partitionKey,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Dequeued {Count} of {Total} entries in HLC order. TenantId={TenantId}, PartitionKey={PartitionKey}",
|
||||
entries.Count,
|
||||
totalCount,
|
||||
tenantId,
|
||||
partitionKey ?? "(all)");
|
||||
|
||||
return new SchedulerHlcDequeueResult(
|
||||
entries,
|
||||
totalCount,
|
||||
RangeStartHlc: null,
|
||||
RangeEndHlc: null);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<SchedulerHlcDequeueResult> DequeueByRangeAsync(
|
||||
string tenantId,
|
||||
HlcTimestamp? startHlc,
|
||||
HlcTimestamp? endHlc,
|
||||
int limit,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
ArgumentOutOfRangeException.ThrowIfNegativeOrZero(limit);
|
||||
|
||||
var startTHlc = startHlc?.ToSortableString();
|
||||
var endTHlc = endHlc?.ToSortableString();
|
||||
|
||||
var entries = await _logRepository.GetByHlcRangeAsync(
|
||||
tenantId,
|
||||
startTHlc,
|
||||
endTHlc,
|
||||
limit,
|
||||
partitionKey,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var totalCount = await _logRepository.CountByHlcRangeAsync(
|
||||
tenantId,
|
||||
startTHlc,
|
||||
endTHlc,
|
||||
partitionKey,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Dequeued {Count} of {Total} entries in HLC range [{Start}, {End}]. TenantId={TenantId}",
|
||||
entries.Count,
|
||||
totalCount,
|
||||
startTHlc ?? "(unbounded)",
|
||||
endTHlc ?? "(unbounded)",
|
||||
tenantId);
|
||||
|
||||
return new SchedulerHlcDequeueResult(
|
||||
entries,
|
||||
totalCount,
|
||||
startHlc,
|
||||
endHlc);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<SchedulerHlcDequeueResult> DequeueAfterAsync(
|
||||
string tenantId,
|
||||
HlcTimestamp afterHlc,
|
||||
int limit,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
ArgumentOutOfRangeException.ThrowIfNegativeOrZero(limit);
|
||||
|
||||
var afterTHlc = afterHlc.ToSortableString();
|
||||
|
||||
var entries = await _logRepository.GetAfterHlcAsync(
|
||||
tenantId,
|
||||
afterTHlc,
|
||||
limit,
|
||||
partitionKey,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Count remaining entries after cursor
|
||||
var totalCount = await _logRepository.CountByHlcRangeAsync(
|
||||
tenantId,
|
||||
afterTHlc,
|
||||
endTHlc: null,
|
||||
partitionKey,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Dequeued {Count} entries after HLC {AfterHlc}. TenantId={TenantId}, PartitionKey={PartitionKey}",
|
||||
entries.Count,
|
||||
afterTHlc,
|
||||
tenantId,
|
||||
partitionKey ?? "(all)");
|
||||
|
||||
return new SchedulerHlcDequeueResult(
|
||||
entries,
|
||||
totalCount,
|
||||
afterHlc,
|
||||
RangeEndHlc: null);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<SchedulerLogEntry?> GetByJobIdAsync(
|
||||
string tenantId,
|
||||
Guid jobId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
|
||||
var entry = await _logRepository.GetByJobIdAsync(jobId, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Verify tenant isolation
|
||||
if (entry is not null && !string.Equals(entry.TenantId, tenantId, StringComparison.Ordinal))
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Job {JobId} found but belongs to different tenant. RequestedTenant={RequestedTenant}, ActualTenant={ActualTenant}",
|
||||
jobId,
|
||||
tenantId,
|
||||
entry.TenantId);
|
||||
return null;
|
||||
}
|
||||
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,166 @@
|
||||
// <copyright file="HlcSchedulerEnqueueService.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Canonical.Json;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Persistence;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Models;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Implementation of HLC-ordered scheduler job enqueueing with chain linking.
|
||||
/// </summary>
|
||||
public sealed class HlcSchedulerEnqueueService : IHlcSchedulerEnqueueService
|
||||
{
|
||||
/// <summary>
|
||||
/// Namespace GUID for deterministic job ID generation (v5 UUID style).
|
||||
/// </summary>
|
||||
private static readonly Guid JobIdNamespace = new("b8a7c6d5-e4f3-42a1-9b0c-1d2e3f4a5b6c");
|
||||
|
||||
private readonly IHybridLogicalClock _hlc;
|
||||
private readonly ISchedulerLogRepository _logRepository;
|
||||
private readonly IChainHeadRepository _chainHeadRepository;
|
||||
private readonly ILogger<HlcSchedulerEnqueueService> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new HLC scheduler enqueue service.
|
||||
/// </summary>
|
||||
public HlcSchedulerEnqueueService(
|
||||
IHybridLogicalClock hlc,
|
||||
ISchedulerLogRepository logRepository,
|
||||
IChainHeadRepository chainHeadRepository,
|
||||
ILogger<HlcSchedulerEnqueueService> logger)
|
||||
{
|
||||
_hlc = hlc ?? throw new ArgumentNullException(nameof(hlc));
|
||||
_logRepository = logRepository ?? throw new ArgumentNullException(nameof(logRepository));
|
||||
_chainHeadRepository = chainHeadRepository ?? throw new ArgumentNullException(nameof(chainHeadRepository));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<SchedulerHlcEnqueueResult> EnqueuePlannerAsync(
|
||||
string tenantId,
|
||||
PlannerQueueMessage message,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(message);
|
||||
return EnqueueAsync(tenantId, message, message.IdempotencyKey, partitionKey, cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<SchedulerHlcEnqueueResult> EnqueueRunnerSegmentAsync(
|
||||
string tenantId,
|
||||
RunnerSegmentQueueMessage message,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(message);
|
||||
return EnqueueAsync(tenantId, message, message.IdempotencyKey, partitionKey, cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<SchedulerHlcEnqueueResult> EnqueueAsync<T>(
|
||||
string tenantId,
|
||||
T payload,
|
||||
string idempotencyKey,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
ArgumentNullException.ThrowIfNull(payload);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(idempotencyKey);
|
||||
|
||||
var effectivePartitionKey = partitionKey ?? string.Empty;
|
||||
|
||||
// 1. Generate deterministic job ID from idempotency key
|
||||
var jobId = ComputeDeterministicJobId(idempotencyKey);
|
||||
|
||||
// 2. Check for existing entry (idempotency)
|
||||
if (await _logRepository.ExistsAsync(tenantId, jobId, cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
var existing = await _logRepository.GetByJobIdAsync(jobId, cancellationToken).ConfigureAwait(false);
|
||||
if (existing is not null)
|
||||
{
|
||||
_logger.LogDebug(
|
||||
"Job already enqueued, returning existing entry. TenantId={TenantId}, JobId={JobId}",
|
||||
tenantId,
|
||||
jobId);
|
||||
|
||||
return new SchedulerHlcEnqueueResult(
|
||||
HlcTimestamp.Parse(existing.THlc),
|
||||
existing.JobId,
|
||||
existing.Link,
|
||||
Deduplicated: true);
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Generate HLC timestamp
|
||||
var tHlc = _hlc.Tick();
|
||||
|
||||
// 4. Compute payload hash
|
||||
var payloadHash = SchedulerChainLinking.ComputePayloadHash(payload);
|
||||
|
||||
// 5. Get previous chain link
|
||||
var prevLink = await _chainHeadRepository.GetLastLinkAsync(tenantId, effectivePartitionKey, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
// 6. Compute new chain link
|
||||
var link = SchedulerChainLinking.ComputeLink(prevLink, jobId, tHlc, payloadHash);
|
||||
|
||||
// 7. Insert log entry (atomic with chain head update)
|
||||
var entry = new SchedulerLogEntry
|
||||
{
|
||||
TenantId = tenantId,
|
||||
THlc = tHlc.ToSortableString(),
|
||||
PartitionKey = effectivePartitionKey,
|
||||
JobId = jobId,
|
||||
PayloadHash = payloadHash,
|
||||
PrevLink = prevLink,
|
||||
Link = link
|
||||
};
|
||||
|
||||
await _logRepository.InsertWithChainUpdateAsync(entry, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Job enqueued with HLC ordering. TenantId={TenantId}, JobId={JobId}, THlc={THlc}, Link={Link}",
|
||||
tenantId,
|
||||
jobId,
|
||||
tHlc.ToSortableString(),
|
||||
SchedulerChainLinking.ToHex(link));
|
||||
|
||||
return new SchedulerHlcEnqueueResult(tHlc, jobId, link, Deduplicated: false);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Computes a deterministic GUID from the idempotency key using SHA-256.
|
||||
/// </summary>
|
||||
private static Guid ComputeDeterministicJobId(string idempotencyKey)
|
||||
{
|
||||
// Use namespace + key pattern similar to UUID v5
|
||||
var namespaceBytes = JobIdNamespace.ToByteArray();
|
||||
var keyBytes = Encoding.UTF8.GetBytes(idempotencyKey);
|
||||
|
||||
var combined = new byte[namespaceBytes.Length + keyBytes.Length];
|
||||
Buffer.BlockCopy(namespaceBytes, 0, combined, 0, namespaceBytes.Length);
|
||||
Buffer.BlockCopy(keyBytes, 0, combined, namespaceBytes.Length, keyBytes.Length);
|
||||
|
||||
var hash = SHA256.HashData(combined);
|
||||
|
||||
// Take first 16 bytes for GUID
|
||||
var guidBytes = new byte[16];
|
||||
Buffer.BlockCopy(hash, 0, guidBytes, 0, 16);
|
||||
|
||||
// Set version (4) and variant bits for RFC 4122 compliance
|
||||
guidBytes[6] = (byte)((guidBytes[6] & 0x0F) | 0x40); // Version 4
|
||||
guidBytes[8] = (byte)((guidBytes[8] & 0x3F) | 0x80); // Variant 1
|
||||
|
||||
return new Guid(guidBytes);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,178 @@
|
||||
// <copyright file="HlcSchedulerMetrics.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using System.Diagnostics.Metrics;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Metrics for HLC-ordered scheduler operations.
|
||||
/// </summary>
|
||||
public static class HlcSchedulerMetrics
|
||||
{
|
||||
private const string TenantTagName = "tenant";
|
||||
private const string PartitionTagName = "partition";
|
||||
private const string ResultTagName = "result";
|
||||
|
||||
private static readonly Meter Meter = new("StellaOps.Scheduler.Hlc");
|
||||
|
||||
// Enqueue metrics
|
||||
private static readonly Counter<long> EnqueuedCounter = Meter.CreateCounter<long>(
|
||||
"scheduler_hlc_enqueues_total",
|
||||
unit: "{enqueue}",
|
||||
description: "Total number of HLC-ordered enqueue operations");
|
||||
|
||||
private static readonly Counter<long> EnqueueDeduplicatedCounter = Meter.CreateCounter<long>(
|
||||
"scheduler_hlc_enqueue_deduplicated_total",
|
||||
unit: "{enqueue}",
|
||||
description: "Total number of deduplicated HLC enqueue operations");
|
||||
|
||||
private static readonly Histogram<double> EnqueueDurationHistogram = Meter.CreateHistogram<double>(
|
||||
"scheduler_hlc_enqueue_duration_seconds",
|
||||
unit: "s",
|
||||
description: "Duration of HLC enqueue operations");
|
||||
|
||||
// Dequeue metrics
|
||||
private static readonly Counter<long> DequeuedCounter = Meter.CreateCounter<long>(
|
||||
"scheduler_hlc_dequeues_total",
|
||||
unit: "{dequeue}",
|
||||
description: "Total number of HLC-ordered dequeue operations");
|
||||
|
||||
private static readonly Counter<long> DequeuedEntriesCounter = Meter.CreateCounter<long>(
|
||||
"scheduler_hlc_dequeued_entries_total",
|
||||
unit: "{entry}",
|
||||
description: "Total number of entries dequeued via HLC ordering");
|
||||
|
||||
// Chain verification metrics
|
||||
private static readonly Counter<long> ChainVerificationsCounter = Meter.CreateCounter<long>(
|
||||
"scheduler_chain_verifications_total",
|
||||
unit: "{verification}",
|
||||
description: "Total number of chain verification operations");
|
||||
|
||||
private static readonly Counter<long> ChainVerificationIssuesCounter = Meter.CreateCounter<long>(
|
||||
"scheduler_chain_verification_issues_total",
|
||||
unit: "{issue}",
|
||||
description: "Total number of chain verification issues found");
|
||||
|
||||
private static readonly Counter<long> ChainEntriesVerifiedCounter = Meter.CreateCounter<long>(
|
||||
"scheduler_chain_entries_verified_total",
|
||||
unit: "{entry}",
|
||||
description: "Total number of chain entries verified");
|
||||
|
||||
// Batch snapshot metrics
|
||||
private static readonly Counter<long> SnapshotsCreatedCounter = Meter.CreateCounter<long>(
|
||||
"scheduler_batch_snapshots_created_total",
|
||||
unit: "{snapshot}",
|
||||
description: "Total number of batch snapshots created");
|
||||
|
||||
private static readonly Counter<long> SnapshotsSignedCounter = Meter.CreateCounter<long>(
|
||||
"scheduler_batch_snapshots_signed_total",
|
||||
unit: "{snapshot}",
|
||||
description: "Total number of signed batch snapshots");
|
||||
|
||||
private static readonly Counter<long> SnapshotVerificationsCounter = Meter.CreateCounter<long>(
|
||||
"scheduler_batch_snapshot_verifications_total",
|
||||
unit: "{verification}",
|
||||
description: "Total number of batch snapshot verification operations");
|
||||
|
||||
/// <summary>
|
||||
/// Records an HLC enqueue operation.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="partitionKey">Partition key (empty string if none).</param>
|
||||
/// <param name="deduplicated">Whether the operation was deduplicated.</param>
|
||||
public static void RecordEnqueue(string tenantId, string partitionKey, bool deduplicated)
|
||||
{
|
||||
var tags = BuildTags(tenantId, partitionKey);
|
||||
EnqueuedCounter.Add(1, tags);
|
||||
if (deduplicated)
|
||||
{
|
||||
EnqueueDeduplicatedCounter.Add(1, tags);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records the duration of an HLC enqueue operation.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="partitionKey">Partition key.</param>
|
||||
/// <param name="durationSeconds">Duration in seconds.</param>
|
||||
public static void RecordEnqueueDuration(string tenantId, string partitionKey, double durationSeconds)
|
||||
{
|
||||
EnqueueDurationHistogram.Record(durationSeconds, BuildTags(tenantId, partitionKey));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records an HLC dequeue operation.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="partitionKey">Partition key.</param>
|
||||
/// <param name="entryCount">Number of entries dequeued.</param>
|
||||
public static void RecordDequeue(string tenantId, string partitionKey, int entryCount)
|
||||
{
|
||||
var tags = BuildTags(tenantId, partitionKey);
|
||||
DequeuedCounter.Add(1, tags);
|
||||
DequeuedEntriesCounter.Add(entryCount, tags);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records a chain verification operation.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="entriesVerified">Number of entries verified.</param>
|
||||
/// <param name="issuesFound">Number of issues found.</param>
|
||||
/// <param name="isValid">Whether the chain is valid.</param>
|
||||
public static void RecordChainVerification(string tenantId, int entriesVerified, int issuesFound, bool isValid)
|
||||
{
|
||||
var resultTag = new KeyValuePair<string, object?>(ResultTagName, isValid ? "valid" : "invalid");
|
||||
var tenantTag = new KeyValuePair<string, object?>(TenantTagName, tenantId);
|
||||
|
||||
ChainVerificationsCounter.Add(1, tenantTag, resultTag);
|
||||
ChainEntriesVerifiedCounter.Add(entriesVerified, tenantTag);
|
||||
|
||||
if (issuesFound > 0)
|
||||
{
|
||||
ChainVerificationIssuesCounter.Add(issuesFound, tenantTag);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records a batch snapshot creation.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="jobCount">Number of jobs in the snapshot.</param>
|
||||
/// <param name="signed">Whether the snapshot was signed.</param>
|
||||
public static void RecordSnapshotCreated(string tenantId, int jobCount, bool signed)
|
||||
{
|
||||
var tenantTag = new KeyValuePair<string, object?>(TenantTagName, tenantId);
|
||||
SnapshotsCreatedCounter.Add(1, tenantTag);
|
||||
|
||||
if (signed)
|
||||
{
|
||||
SnapshotsSignedCounter.Add(1, tenantTag);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records a batch snapshot verification.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="isValid">Whether the snapshot is valid.</param>
|
||||
public static void RecordSnapshotVerification(string tenantId, bool isValid)
|
||||
{
|
||||
var tags = new[]
|
||||
{
|
||||
new KeyValuePair<string, object?>(TenantTagName, tenantId),
|
||||
new KeyValuePair<string, object?>(ResultTagName, isValid ? "valid" : "invalid")
|
||||
};
|
||||
SnapshotVerificationsCounter.Add(1, tags);
|
||||
}
|
||||
|
||||
private static KeyValuePair<string, object?>[] BuildTags(string tenantId, string partitionKey)
|
||||
=> new[]
|
||||
{
|
||||
new KeyValuePair<string, object?>(TenantTagName, tenantId),
|
||||
new KeyValuePair<string, object?>(PartitionTagName, string.IsNullOrEmpty(partitionKey) ? "(default)" : partitionKey)
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,103 @@
|
||||
// <copyright file="HlcSchedulerServiceCollectionExtensions.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Extension methods for registering HLC scheduler services.
|
||||
/// </summary>
|
||||
public static class HlcSchedulerServiceCollectionExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Adds HLC-ordered scheduler services to the service collection.
|
||||
/// </summary>
|
||||
/// <param name="services">The service collection.</param>
|
||||
/// <returns>The service collection for chaining.</returns>
|
||||
public static IServiceCollection AddHlcSchedulerServices(this IServiceCollection services)
|
||||
{
|
||||
// Repositories (scoped for per-request database connections)
|
||||
services.TryAddScoped<ISchedulerLogRepository, PostgresSchedulerLogRepository>();
|
||||
services.TryAddScoped<IChainHeadRepository, PostgresChainHeadRepository>();
|
||||
services.TryAddScoped<IBatchSnapshotRepository, PostgresBatchSnapshotRepository>();
|
||||
|
||||
// Services (scoped to align with repository lifetime)
|
||||
services.TryAddScoped<IHlcSchedulerEnqueueService, HlcSchedulerEnqueueService>();
|
||||
services.TryAddScoped<IHlcSchedulerDequeueService, HlcSchedulerDequeueService>();
|
||||
services.TryAddScoped<IBatchSnapshotService, BatchSnapshotService>();
|
||||
services.TryAddScoped<ISchedulerChainVerifier, SchedulerChainVerifier>();
|
||||
|
||||
// DSSE signer (disabled by default)
|
||||
services.TryAddSingleton<IBatchSnapshotDsseSigner, BatchSnapshotDsseSigner>();
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds HLC-ordered scheduler services with DSSE signing support.
|
||||
/// </summary>
|
||||
/// <param name="services">The service collection.</param>
|
||||
/// <param name="configuration">Configuration section for DSSE options.</param>
|
||||
/// <returns>The service collection for chaining.</returns>
|
||||
public static IServiceCollection AddHlcSchedulerServicesWithDsseSigning(
|
||||
this IServiceCollection services,
|
||||
IConfiguration configuration)
|
||||
{
|
||||
// Configure DSSE options
|
||||
services.AddOptions<BatchSnapshotDsseOptions>()
|
||||
.Bind(configuration.GetSection("Scheduler:Queue:Hlc:DsseSigning"))
|
||||
.ValidateDataAnnotations()
|
||||
.ValidateOnStart();
|
||||
|
||||
// Add base services
|
||||
services.AddHlcSchedulerServices();
|
||||
|
||||
// Wire up DSSE signer to BatchSnapshotService
|
||||
services.AddScoped<IBatchSnapshotService>(sp =>
|
||||
{
|
||||
var logRepository = sp.GetRequiredService<ISchedulerLogRepository>();
|
||||
var snapshotRepository = sp.GetRequiredService<IBatchSnapshotRepository>();
|
||||
var logger = sp.GetRequiredService<Microsoft.Extensions.Logging.ILogger<BatchSnapshotService>>();
|
||||
var dsseSigner = sp.GetRequiredService<IBatchSnapshotDsseSigner>();
|
||||
|
||||
BatchSnapshotSignerDelegate? signer = dsseSigner.IsEnabled
|
||||
? dsseSigner.SignAsync
|
||||
: null;
|
||||
|
||||
return new BatchSnapshotService(logRepository, snapshotRepository, logger, signer);
|
||||
});
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds HLC-ordered scheduler services with a custom signer delegate.
|
||||
/// </summary>
|
||||
/// <param name="services">The service collection.</param>
|
||||
/// <param name="signerFactory">Factory to create the signer delegate.</param>
|
||||
/// <returns>The service collection for chaining.</returns>
|
||||
public static IServiceCollection AddHlcSchedulerServices(
|
||||
this IServiceCollection services,
|
||||
Func<IServiceProvider, BatchSnapshotSignerDelegate> signerFactory)
|
||||
{
|
||||
services.AddHlcSchedulerServices();
|
||||
|
||||
// Override BatchSnapshotService registration to include signer
|
||||
services.AddScoped<IBatchSnapshotService>(sp =>
|
||||
{
|
||||
var logRepository = sp.GetRequiredService<ISchedulerLogRepository>();
|
||||
var snapshotRepository = sp.GetRequiredService<IBatchSnapshotRepository>();
|
||||
var logger = sp.GetRequiredService<Microsoft.Extensions.Logging.ILogger<BatchSnapshotService>>();
|
||||
var signer = signerFactory(sp);
|
||||
|
||||
return new BatchSnapshotService(logRepository, snapshotRepository, logger, signer);
|
||||
});
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
// <copyright file="IBatchSnapshotService.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Service for creating and managing batch snapshots of the scheduler chain.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Batch snapshots provide audit anchors for the scheduler chain, capturing
|
||||
/// the chain head at specific HLC ranges. These can be optionally signed
|
||||
/// with DSSE for attestation purposes.
|
||||
/// </remarks>
|
||||
public interface IBatchSnapshotService
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a batch snapshot for a given HLC range.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="startHlc">Start of the HLC range (inclusive).</param>
|
||||
/// <param name="endHlc">End of the HLC range (inclusive).</param>
|
||||
/// <param name="sign">Whether to sign the snapshot with DSSE.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The created batch snapshot.</returns>
|
||||
Task<BatchSnapshot> CreateSnapshotAsync(
|
||||
string tenantId,
|
||||
HlcTimestamp startHlc,
|
||||
HlcTimestamp endHlc,
|
||||
bool sign = false,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a batch snapshot by ID.
|
||||
/// </summary>
|
||||
/// <param name="batchId">The batch identifier.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The snapshot if found.</returns>
|
||||
Task<BatchSnapshot?> GetSnapshotAsync(
|
||||
Guid batchId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the most recent batch snapshot for a tenant.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The most recent snapshot if found.</returns>
|
||||
Task<BatchSnapshot?> GetLatestSnapshotAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Verifies a batch snapshot against the current chain state.
|
||||
/// </summary>
|
||||
/// <param name="batchId">The batch identifier to verify.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Verification result.</returns>
|
||||
Task<BatchSnapshotVerificationResult> VerifySnapshotAsync(
|
||||
Guid batchId,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of batch snapshot verification.
|
||||
/// </summary>
|
||||
/// <param name="IsValid">Whether the snapshot is valid.</param>
|
||||
/// <param name="SnapshotFound">Whether the snapshot was found.</param>
|
||||
/// <param name="ChainHeadMatches">Whether the chain head matches the snapshot.</param>
|
||||
/// <param name="JobCountMatches">Whether the job count matches.</param>
|
||||
/// <param name="SignatureValid">Whether the DSSE signature is valid (null if unsigned).</param>
|
||||
/// <param name="Issues">List of verification issues if invalid.</param>
|
||||
public readonly record struct BatchSnapshotVerificationResult(
|
||||
bool IsValid,
|
||||
bool SnapshotFound,
|
||||
bool ChainHeadMatches,
|
||||
bool JobCountMatches,
|
||||
bool? SignatureValid,
|
||||
IReadOnlyList<string> Issues);
|
||||
@@ -0,0 +1,77 @@
|
||||
// <copyright file="IHlcSchedulerDequeueService.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using StellaOps.HybridLogicalClock;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Service for HLC-ordered scheduler job dequeuing.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This service provides deterministic, HLC-ordered retrieval of scheduler log entries
|
||||
/// for processing. The HLC ordering guarantees causal consistency across distributed nodes.
|
||||
/// </remarks>
|
||||
public interface IHlcSchedulerDequeueService
|
||||
{
|
||||
/// <summary>
|
||||
/// Dequeues scheduler log entries in HLC order.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="limit">Maximum number of entries to return.</param>
|
||||
/// <param name="partitionKey">Optional partition key to filter by.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The dequeue result with entries in HLC order.</returns>
|
||||
Task<SchedulerHlcDequeueResult> DequeueAsync(
|
||||
string tenantId,
|
||||
int limit,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Dequeues scheduler log entries within an HLC time range.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="startHlc">HLC range start (inclusive, null for unbounded).</param>
|
||||
/// <param name="endHlc">HLC range end (inclusive, null for unbounded).</param>
|
||||
/// <param name="limit">Maximum number of entries to return.</param>
|
||||
/// <param name="partitionKey">Optional partition key to filter by.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The dequeue result with entries in HLC order.</returns>
|
||||
Task<SchedulerHlcDequeueResult> DequeueByRangeAsync(
|
||||
string tenantId,
|
||||
HlcTimestamp? startHlc,
|
||||
HlcTimestamp? endHlc,
|
||||
int limit,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Dequeues scheduler log entries after a specific HLC timestamp (cursor-based).
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="afterHlc">HLC timestamp to start after (exclusive).</param>
|
||||
/// <param name="limit">Maximum number of entries to return.</param>
|
||||
/// <param name="partitionKey">Optional partition key to filter by.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The dequeue result with entries in HLC order.</returns>
|
||||
Task<SchedulerHlcDequeueResult> DequeueAfterAsync(
|
||||
string tenantId,
|
||||
HlcTimestamp afterHlc,
|
||||
int limit,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a single scheduler log entry by job ID.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="jobId">The job identifier.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The scheduler log entry if found, null otherwise.</returns>
|
||||
Task<Persistence.Postgres.Models.SchedulerLogEntry?> GetByJobIdAsync(
|
||||
string tenantId,
|
||||
Guid jobId,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
// <copyright file="IHlcSchedulerEnqueueService.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Service for HLC-ordered scheduler job enqueueing with chain linking.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This service wraps job enqueueing with:
|
||||
/// <list type="bullet">
|
||||
/// <item><description>HLC timestamp assignment for global ordering</description></item>
|
||||
/// <item><description>Chain link computation for audit proofs</description></item>
|
||||
/// <item><description>Persistence to scheduler_log for replay</description></item>
|
||||
/// </list>
|
||||
/// </remarks>
|
||||
public interface IHlcSchedulerEnqueueService
|
||||
{
|
||||
/// <summary>
|
||||
/// Enqueues a planner message with HLC ordering and chain linking.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="message">The planner queue message.</param>
|
||||
/// <param name="partitionKey">Optional partition key for chain separation.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The enqueue result with HLC timestamp and chain link.</returns>
|
||||
Task<SchedulerHlcEnqueueResult> EnqueuePlannerAsync(
|
||||
string tenantId,
|
||||
PlannerQueueMessage message,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Enqueues a runner segment message with HLC ordering and chain linking.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="message">The runner segment queue message.</param>
|
||||
/// <param name="partitionKey">Optional partition key for chain separation.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The enqueue result with HLC timestamp and chain link.</returns>
|
||||
Task<SchedulerHlcEnqueueResult> EnqueueRunnerSegmentAsync(
|
||||
string tenantId,
|
||||
RunnerSegmentQueueMessage message,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Enqueues a generic payload with HLC ordering and chain linking.
|
||||
/// </summary>
|
||||
/// <typeparam name="T">Payload type.</typeparam>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="payload">The payload to enqueue.</param>
|
||||
/// <param name="idempotencyKey">Key for deduplication.</param>
|
||||
/// <param name="partitionKey">Optional partition key for chain separation.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The enqueue result with HLC timestamp and chain link.</returns>
|
||||
Task<SchedulerHlcEnqueueResult> EnqueueAsync<T>(
|
||||
string tenantId,
|
||||
T payload,
|
||||
string idempotencyKey,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,292 @@
|
||||
// <copyright file="SchedulerChainVerifier.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Persistence;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Service for verifying the integrity of the scheduler chain.
|
||||
/// </summary>
|
||||
public interface ISchedulerChainVerifier
|
||||
{
|
||||
/// <summary>
|
||||
/// Verifies the integrity of the scheduler chain within an HLC range.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="startHlc">Start of the HLC range (inclusive, null for unbounded).</param>
|
||||
/// <param name="endHlc">End of the HLC range (inclusive, null for unbounded).</param>
|
||||
/// <param name="partitionKey">Optional partition key to verify (null for all partitions).</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Verification result.</returns>
|
||||
Task<ChainVerificationResult> VerifyAsync(
|
||||
string tenantId,
|
||||
HlcTimestamp? startHlc = null,
|
||||
HlcTimestamp? endHlc = null,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Verifies a single chain link.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="jobId">The job identifier to verify.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Verification result for the single entry.</returns>
|
||||
Task<ChainVerificationResult> VerifyEntryAsync(
|
||||
string tenantId,
|
||||
Guid jobId,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of chain verification.
|
||||
/// </summary>
|
||||
/// <param name="IsValid">Whether the chain is valid.</param>
|
||||
/// <param name="EntriesChecked">Number of entries checked.</param>
|
||||
/// <param name="Issues">List of verification issues found.</param>
|
||||
public readonly record struct ChainVerificationResult(
|
||||
bool IsValid,
|
||||
int EntriesChecked,
|
||||
IReadOnlyList<ChainVerificationIssue> Issues);
|
||||
|
||||
/// <summary>
|
||||
/// A specific issue found during chain verification.
|
||||
/// </summary>
|
||||
/// <param name="JobId">The job ID where the issue was found.</param>
|
||||
/// <param name="THlc">The HLC timestamp of the problematic entry.</param>
|
||||
/// <param name="IssueType">Type of issue found.</param>
|
||||
/// <param name="Description">Human-readable description of the issue.</param>
|
||||
public readonly record struct ChainVerificationIssue(
|
||||
Guid JobId,
|
||||
string THlc,
|
||||
string IssueType,
|
||||
string Description);
|
||||
|
||||
/// <summary>
|
||||
/// Implementation of scheduler chain verification.
|
||||
/// </summary>
|
||||
public sealed class SchedulerChainVerifier : ISchedulerChainVerifier
|
||||
{
|
||||
private readonly ISchedulerLogRepository _logRepository;
|
||||
private readonly ILogger<SchedulerChainVerifier> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new chain verifier.
|
||||
/// </summary>
|
||||
public SchedulerChainVerifier(
|
||||
ISchedulerLogRepository logRepository,
|
||||
ILogger<SchedulerChainVerifier> logger)
|
||||
{
|
||||
_logRepository = logRepository ?? throw new ArgumentNullException(nameof(logRepository));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<ChainVerificationResult> VerifyAsync(
|
||||
string tenantId,
|
||||
HlcTimestamp? startHlc = null,
|
||||
HlcTimestamp? endHlc = null,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
|
||||
var startT = startHlc?.ToSortableString();
|
||||
var endT = endHlc?.ToSortableString();
|
||||
|
||||
var entries = await _logRepository.GetByHlcRangeAsync(
|
||||
tenantId,
|
||||
startT,
|
||||
endT,
|
||||
limit: 0, // No limit
|
||||
partitionKey,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (entries.Count == 0)
|
||||
{
|
||||
_logger.LogDebug(
|
||||
"No entries to verify in range [{Start}, {End}] for tenant {TenantId}",
|
||||
startT ?? "(unbounded)",
|
||||
endT ?? "(unbounded)",
|
||||
tenantId);
|
||||
|
||||
return new ChainVerificationResult(IsValid: true, EntriesChecked: 0, Issues: []);
|
||||
}
|
||||
|
||||
var issues = new List<ChainVerificationIssue>();
|
||||
byte[]? expectedPrevLink = null;
|
||||
|
||||
// If starting mid-chain, we need to get the previous entry's link
|
||||
if (startHlc is not null)
|
||||
{
|
||||
var previousEntries = await _logRepository.GetByHlcRangeAsync(
|
||||
tenantId,
|
||||
startTHlc: null,
|
||||
startT,
|
||||
limit: 1,
|
||||
partitionKey,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (previousEntries.Count > 0 && previousEntries[0].THlc != startT)
|
||||
{
|
||||
expectedPrevLink = previousEntries[0].Link;
|
||||
}
|
||||
}
|
||||
|
||||
foreach (var entry in entries)
|
||||
{
|
||||
// Verify prev_link matches expected
|
||||
if (!ByteArrayEquals(entry.PrevLink, expectedPrevLink))
|
||||
{
|
||||
issues.Add(new ChainVerificationIssue(
|
||||
entry.JobId,
|
||||
entry.THlc,
|
||||
"PrevLinkMismatch",
|
||||
$"Expected {ToHex(expectedPrevLink)}, got {ToHex(entry.PrevLink)}"));
|
||||
}
|
||||
|
||||
// Recompute link and verify
|
||||
var computed = SchedulerChainLinking.ComputeLink(
|
||||
entry.PrevLink,
|
||||
entry.JobId,
|
||||
HlcTimestamp.Parse(entry.THlc),
|
||||
entry.PayloadHash);
|
||||
|
||||
if (!ByteArrayEquals(entry.Link, computed))
|
||||
{
|
||||
issues.Add(new ChainVerificationIssue(
|
||||
entry.JobId,
|
||||
entry.THlc,
|
||||
"LinkMismatch",
|
||||
$"Stored link doesn't match computed. Stored={ToHex(entry.Link)}, Computed={ToHex(computed)}"));
|
||||
}
|
||||
|
||||
expectedPrevLink = entry.Link;
|
||||
}
|
||||
|
||||
var isValid = issues.Count == 0;
|
||||
|
||||
_logger.LogInformation(
|
||||
"Chain verification complete. TenantId={TenantId}, Range=[{Start}, {End}], EntriesChecked={Count}, IsValid={IsValid}, IssueCount={IssueCount}",
|
||||
tenantId,
|
||||
startT ?? "(unbounded)",
|
||||
endT ?? "(unbounded)",
|
||||
entries.Count,
|
||||
isValid,
|
||||
issues.Count);
|
||||
|
||||
return new ChainVerificationResult(isValid, entries.Count, issues);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<ChainVerificationResult> VerifyEntryAsync(
|
||||
string tenantId,
|
||||
Guid jobId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
|
||||
var entry = await _logRepository.GetByJobIdAsync(jobId, cancellationToken).ConfigureAwait(false);
|
||||
if (entry is null)
|
||||
{
|
||||
return new ChainVerificationResult(
|
||||
IsValid: false,
|
||||
EntriesChecked: 0,
|
||||
Issues: [new ChainVerificationIssue(jobId, string.Empty, "NotFound", "Entry not found")]);
|
||||
}
|
||||
|
||||
// Verify tenant isolation
|
||||
if (!string.Equals(entry.TenantId, tenantId, StringComparison.Ordinal))
|
||||
{
|
||||
return new ChainVerificationResult(
|
||||
IsValid: false,
|
||||
EntriesChecked: 0,
|
||||
Issues: [new ChainVerificationIssue(jobId, entry.THlc, "TenantMismatch", "Entry belongs to different tenant")]);
|
||||
}
|
||||
|
||||
var issues = new List<ChainVerificationIssue>();
|
||||
|
||||
// Recompute link and verify
|
||||
var computed = SchedulerChainLinking.ComputeLink(
|
||||
entry.PrevLink,
|
||||
entry.JobId,
|
||||
HlcTimestamp.Parse(entry.THlc),
|
||||
entry.PayloadHash);
|
||||
|
||||
if (!ByteArrayEquals(entry.Link, computed))
|
||||
{
|
||||
issues.Add(new ChainVerificationIssue(
|
||||
entry.JobId,
|
||||
entry.THlc,
|
||||
"LinkMismatch",
|
||||
$"Stored link doesn't match computed"));
|
||||
}
|
||||
|
||||
// If there's a prev_link, verify it exists and matches
|
||||
if (entry.PrevLink is { Length: > 0 })
|
||||
{
|
||||
// Find the previous entry
|
||||
var allEntries = await _logRepository.GetByHlcRangeAsync(
|
||||
tenantId,
|
||||
startTHlc: null,
|
||||
entry.THlc,
|
||||
limit: 0,
|
||||
partitionKey: entry.PartitionKey,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var prevEntry = allEntries
|
||||
.Where(e => e.THlc != entry.THlc)
|
||||
.OrderByDescending(e => e.THlc)
|
||||
.FirstOrDefault();
|
||||
|
||||
if (prevEntry is null)
|
||||
{
|
||||
issues.Add(new ChainVerificationIssue(
|
||||
entry.JobId,
|
||||
entry.THlc,
|
||||
"PrevEntryNotFound",
|
||||
"Entry has prev_link but no previous entry found"));
|
||||
}
|
||||
else if (!ByteArrayEquals(prevEntry.Link, entry.PrevLink))
|
||||
{
|
||||
issues.Add(new ChainVerificationIssue(
|
||||
entry.JobId,
|
||||
entry.THlc,
|
||||
"PrevLinkMismatch",
|
||||
$"prev_link doesn't match previous entry's link"));
|
||||
}
|
||||
}
|
||||
|
||||
return new ChainVerificationResult(issues.Count == 0, 1, issues);
|
||||
}
|
||||
|
||||
private static bool ByteArrayEquals(byte[]? a, byte[]? b)
|
||||
{
|
||||
if (a is null && b is null)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (a is null || b is null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (a.Length == 0 && b.Length == 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return a.AsSpan().SequenceEqual(b);
|
||||
}
|
||||
|
||||
private static string ToHex(byte[]? bytes)
|
||||
{
|
||||
return bytes is null ? "(null)" : Convert.ToHexString(bytes);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
// <copyright file="SchedulerDequeueResult.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Result of an HLC-ordered scheduler dequeue operation.
|
||||
/// </summary>
|
||||
/// <param name="Entries">The dequeued scheduler log entries in HLC order.</param>
|
||||
/// <param name="TotalAvailable">Total count of entries available in the specified range.</param>
|
||||
/// <param name="RangeStartHlc">The HLC start of the queried range (null if unbounded).</param>
|
||||
/// <param name="RangeEndHlc">The HLC end of the queried range (null if unbounded).</param>
|
||||
public readonly record struct SchedulerHlcDequeueResult(
|
||||
IReadOnlyList<SchedulerLogEntry> Entries,
|
||||
int TotalAvailable,
|
||||
HlcTimestamp? RangeStartHlc,
|
||||
HlcTimestamp? RangeEndHlc);
|
||||
@@ -0,0 +1,20 @@
|
||||
// <copyright file="SchedulerEnqueueResult.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using StellaOps.HybridLogicalClock;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Result of an HLC-ordered scheduler enqueue operation.
|
||||
/// </summary>
|
||||
/// <param name="THlc">The HLC timestamp assigned to the job.</param>
|
||||
/// <param name="JobId">The deterministic job identifier.</param>
|
||||
/// <param name="Link">The chain link computed for this entry.</param>
|
||||
/// <param name="Deduplicated">True if the job was already enqueued (idempotent).</param>
|
||||
public readonly record struct SchedulerHlcEnqueueResult(
|
||||
HlcTimestamp THlc,
|
||||
Guid JobId,
|
||||
byte[] Link,
|
||||
bool Deduplicated);
|
||||
@@ -6,6 +6,7 @@ using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using NATS.Client.Core;
|
||||
using NATS.Client.JetStream;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Nats;
|
||||
@@ -18,6 +19,7 @@ internal sealed class NatsSchedulerPlannerQueue
|
||||
SchedulerNatsQueueOptions natsOptions,
|
||||
ILogger<NatsSchedulerPlannerQueue> logger,
|
||||
TimeProvider timeProvider,
|
||||
IHybridLogicalClock? hlc = null,
|
||||
Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>>? connectionFactory = null)
|
||||
: base(
|
||||
queueOptions,
|
||||
@@ -26,6 +28,7 @@ internal sealed class NatsSchedulerPlannerQueue
|
||||
PlannerPayload.Instance,
|
||||
logger,
|
||||
timeProvider,
|
||||
hlc,
|
||||
connectionFactory)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ using Microsoft.Extensions.Logging;
|
||||
using NATS.Client.Core;
|
||||
using NATS.Client.JetStream;
|
||||
using NATS.Client.JetStream.Models;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Nats;
|
||||
|
||||
@@ -24,6 +25,7 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
|
||||
private readonly INatsSchedulerQueuePayload<TMessage> _payload;
|
||||
private readonly ILogger _logger;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly IHybridLogicalClock? _hlc;
|
||||
private readonly SemaphoreSlim _connectionGate = new(1, 1);
|
||||
private readonly Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>> _connectionFactory;
|
||||
|
||||
@@ -40,6 +42,7 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
|
||||
INatsSchedulerQueuePayload<TMessage> payload,
|
||||
ILogger logger,
|
||||
TimeProvider timeProvider,
|
||||
IHybridLogicalClock? hlc = null,
|
||||
Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>>? connectionFactory = null)
|
||||
{
|
||||
_queueOptions = queueOptions ?? throw new ArgumentNullException(nameof(queueOptions));
|
||||
@@ -48,6 +51,7 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
|
||||
_payload = payload ?? throw new ArgumentNullException(nameof(payload));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
_hlc = hlc;
|
||||
_connectionFactory = connectionFactory ?? ((opts, cancellationToken) => new ValueTask<NatsConnection>(new NatsConnection(opts)));
|
||||
|
||||
if (string.IsNullOrWhiteSpace(_natsOptions.Url))
|
||||
@@ -67,7 +71,11 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
|
||||
|
||||
var payloadBytes = _payload.Serialize(message);
|
||||
var idempotencyKey = _payload.GetIdempotencyKey(message);
|
||||
var headers = BuildHeaders(message, idempotencyKey);
|
||||
|
||||
// Generate HLC timestamp if clock is available
|
||||
var hlcTimestamp = _hlc?.Tick();
|
||||
|
||||
var headers = BuildHeaders(message, idempotencyKey, hlcTimestamp);
|
||||
|
||||
var publishOptions = new NatsJSPubOpts
|
||||
{
|
||||
@@ -531,6 +539,14 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
|
||||
? DateTimeOffset.FromUnixTimeMilliseconds(unix)
|
||||
: now;
|
||||
|
||||
// Parse HLC timestamp if present
|
||||
HlcTimestamp? hlcTimestamp = null;
|
||||
if (headers.TryGetValue(SchedulerQueueFields.HlcTimestamp, out var hlcValues) && hlcValues.Count > 0
|
||||
&& HlcTimestamp.TryParse(hlcValues[0], out var parsedHlc))
|
||||
{
|
||||
hlcTimestamp = parsedHlc;
|
||||
}
|
||||
|
||||
var leaseExpires = now.Add(leaseDuration);
|
||||
var runId = _payload.GetRunId(deserialized);
|
||||
var tenantId = _payload.GetTenantId(deserialized);
|
||||
@@ -558,10 +574,11 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
|
||||
attempt,
|
||||
enqueuedAt,
|
||||
leaseExpires,
|
||||
consumer);
|
||||
consumer,
|
||||
hlcTimestamp);
|
||||
}
|
||||
|
||||
private NatsHeaders BuildHeaders(TMessage message, string idempotencyKey)
|
||||
private NatsHeaders BuildHeaders(TMessage message, string idempotencyKey, HlcTimestamp? hlcTimestamp = null)
|
||||
{
|
||||
var headers = new NatsHeaders
|
||||
{
|
||||
@@ -572,6 +589,12 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
|
||||
{ SchedulerQueueFields.EnqueuedAt, _timeProvider.GetUtcNow().ToUnixTimeMilliseconds().ToString() }
|
||||
};
|
||||
|
||||
// Include HLC timestamp if available
|
||||
if (hlcTimestamp.HasValue)
|
||||
{
|
||||
headers.Add(SchedulerQueueFields.HlcTimestamp, hlcTimestamp.Value.ToSortableString());
|
||||
}
|
||||
|
||||
var scheduleId = _payload.GetScheduleId(message);
|
||||
if (!string.IsNullOrWhiteSpace(scheduleId))
|
||||
{
|
||||
|
||||
@@ -3,6 +3,7 @@ using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using NATS.Client.JetStream;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Nats;
|
||||
|
||||
@@ -26,7 +27,8 @@ internal sealed class NatsSchedulerQueueLease<TMessage> : ISchedulerQueueLease<T
|
||||
int attempt,
|
||||
DateTimeOffset enqueuedAt,
|
||||
DateTimeOffset leaseExpiresAt,
|
||||
string consumer)
|
||||
string consumer,
|
||||
HlcTimestamp? hlcTimestamp = null)
|
||||
{
|
||||
_queue = queue;
|
||||
MessageId = message.Metadata?.Sequence.ToString() ?? idempotencyKey;
|
||||
@@ -44,6 +46,7 @@ internal sealed class NatsSchedulerQueueLease<TMessage> : ISchedulerQueueLease<T
|
||||
Message = deserialized;
|
||||
_message = message;
|
||||
Payload = payload;
|
||||
HlcTimestamp = hlcTimestamp;
|
||||
}
|
||||
|
||||
private readonly NatsJSMsg<byte[]> _message;
|
||||
@@ -78,6 +81,8 @@ internal sealed class NatsSchedulerQueueLease<TMessage> : ISchedulerQueueLease<T
|
||||
|
||||
public string Consumer { get; }
|
||||
|
||||
public HlcTimestamp? HlcTimestamp { get; }
|
||||
|
||||
public Task AcknowledgeAsync(CancellationToken cancellationToken = default)
|
||||
=> _queue.AcknowledgeAsync(this, cancellationToken);
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using NATS.Client.Core;
|
||||
using NATS.Client.JetStream;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Nats;
|
||||
@@ -19,6 +20,7 @@ internal sealed class NatsSchedulerRunnerQueue
|
||||
SchedulerNatsQueueOptions natsOptions,
|
||||
ILogger<NatsSchedulerRunnerQueue> logger,
|
||||
TimeProvider timeProvider,
|
||||
IHybridLogicalClock? hlc = null,
|
||||
Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>>? connectionFactory = null)
|
||||
: base(
|
||||
queueOptions,
|
||||
@@ -27,6 +29,7 @@ internal sealed class NatsSchedulerRunnerQueue
|
||||
RunnerPayload.Instance,
|
||||
logger,
|
||||
timeProvider,
|
||||
hlc,
|
||||
connectionFactory)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StackExchange.Redis;
|
||||
using StellaOps.Scheduler.Models;
|
||||
|
||||
@@ -16,6 +17,7 @@ internal sealed class RedisSchedulerPlannerQueue
|
||||
SchedulerRedisQueueOptions redisOptions,
|
||||
ILogger<RedisSchedulerPlannerQueue> logger,
|
||||
TimeProvider timeProvider,
|
||||
IHybridLogicalClock? hlc = null,
|
||||
Func<ConfigurationOptions, Task<IConnectionMultiplexer>>? connectionFactory = null)
|
||||
: base(
|
||||
queueOptions,
|
||||
@@ -24,6 +26,7 @@ internal sealed class RedisSchedulerPlannerQueue
|
||||
PlannerPayload.Instance,
|
||||
logger,
|
||||
timeProvider,
|
||||
hlc,
|
||||
connectionFactory)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StackExchange.Redis;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Redis;
|
||||
@@ -20,6 +21,7 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
|
||||
private readonly IRedisSchedulerQueuePayload<TMessage> _payload;
|
||||
private readonly ILogger _logger;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly IHybridLogicalClock? _hlc;
|
||||
private readonly Func<ConfigurationOptions, Task<IConnectionMultiplexer>> _connectionFactory;
|
||||
private readonly SemaphoreSlim _connectionLock = new(1, 1);
|
||||
private readonly SemaphoreSlim _groupInitLock = new(1, 1);
|
||||
@@ -36,6 +38,7 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
|
||||
IRedisSchedulerQueuePayload<TMessage> payload,
|
||||
ILogger logger,
|
||||
TimeProvider timeProvider,
|
||||
IHybridLogicalClock? hlc = null,
|
||||
Func<ConfigurationOptions, Task<IConnectionMultiplexer>>? connectionFactory = null)
|
||||
{
|
||||
_queueOptions = queueOptions ?? throw new ArgumentNullException(nameof(queueOptions));
|
||||
@@ -44,6 +47,7 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
|
||||
_payload = payload ?? throw new ArgumentNullException(nameof(payload));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
|
||||
_hlc = hlc;
|
||||
_connectionFactory = connectionFactory ?? (config => Task.FromResult<IConnectionMultiplexer>(ConnectionMultiplexer.Connect(config)));
|
||||
|
||||
if (string.IsNullOrWhiteSpace(_redisOptions.ConnectionString))
|
||||
@@ -74,7 +78,11 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
|
||||
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var attempt = 1;
|
||||
var entries = BuildEntries(message, now, attempt);
|
||||
|
||||
// Generate HLC timestamp if clock is available
|
||||
var hlcTimestamp = _hlc?.Tick();
|
||||
|
||||
var entries = BuildEntries(message, now, attempt, hlcTimestamp);
|
||||
|
||||
var messageId = await AddToStreamAsync(
|
||||
database,
|
||||
@@ -555,11 +563,12 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
|
||||
private NameValueEntry[] BuildEntries(
|
||||
TMessage message,
|
||||
DateTimeOffset enqueuedAt,
|
||||
int attempt)
|
||||
int attempt,
|
||||
HlcTimestamp? hlcTimestamp = null)
|
||||
{
|
||||
var attributes = _payload.GetAttributes(message);
|
||||
var attributeCount = attributes?.Count ?? 0;
|
||||
var entries = ArrayPool<NameValueEntry>.Shared.Rent(10 + attributeCount);
|
||||
var entries = ArrayPool<NameValueEntry>.Shared.Rent(11 + attributeCount);
|
||||
var index = 0;
|
||||
|
||||
entries[index++] = new NameValueEntry(SchedulerQueueFields.QueueKind, _payload.QueueName);
|
||||
@@ -589,6 +598,12 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
|
||||
entries[index++] = new NameValueEntry(SchedulerQueueFields.EnqueuedAt, enqueuedAt.ToUnixTimeMilliseconds());
|
||||
entries[index++] = new NameValueEntry(SchedulerQueueFields.Payload, _payload.Serialize(message));
|
||||
|
||||
// Include HLC timestamp if available
|
||||
if (hlcTimestamp.HasValue)
|
||||
{
|
||||
entries[index++] = new NameValueEntry(SchedulerQueueFields.HlcTimestamp, hlcTimestamp.Value.ToSortableString());
|
||||
}
|
||||
|
||||
if (attributeCount > 0 && attributes is not null)
|
||||
{
|
||||
foreach (var kvp in attributes)
|
||||
@@ -623,6 +638,7 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
|
||||
string? segmentId = null;
|
||||
string? correlationId = null;
|
||||
string? idempotencyKey = null;
|
||||
string? hlcTimestampStr = null;
|
||||
long? enqueuedAtUnix = null;
|
||||
var attempt = attemptOverride ?? 1;
|
||||
var attributes = new Dictionary<string, string>(StringComparer.Ordinal);
|
||||
@@ -676,6 +692,10 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
|
||||
: Math.Max(1, parsedAttempt);
|
||||
}
|
||||
}
|
||||
else if (name.Equals(SchedulerQueueFields.HlcTimestamp, StringComparison.Ordinal))
|
||||
{
|
||||
hlcTimestampStr = NormalizeOptional(value.ToString());
|
||||
}
|
||||
else if (name.StartsWith(SchedulerQueueFields.AttributePrefix, StringComparison.Ordinal))
|
||||
{
|
||||
var key = name[SchedulerQueueFields.AttributePrefix.Length..];
|
||||
@@ -692,6 +712,14 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
|
||||
var enqueuedAt = DateTimeOffset.FromUnixTimeMilliseconds(enqueuedAtUnix.Value);
|
||||
var leaseExpires = now.Add(leaseDuration);
|
||||
|
||||
// Parse HLC timestamp if present
|
||||
HlcTimestamp? hlcTimestamp = null;
|
||||
if (!string.IsNullOrEmpty(hlcTimestampStr) &&
|
||||
HlcTimestamp.TryParse(hlcTimestampStr, out var parsedHlc))
|
||||
{
|
||||
hlcTimestamp = parsedHlc;
|
||||
}
|
||||
|
||||
IReadOnlyDictionary<string, string> attributeView = attributes.Count == 0
|
||||
? EmptyReadOnlyDictionary<string, string>.Instance
|
||||
: new ReadOnlyDictionary<string, string>(attributes);
|
||||
@@ -710,7 +738,8 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
|
||||
attempt,
|
||||
enqueuedAt,
|
||||
leaseExpires,
|
||||
consumer);
|
||||
consumer,
|
||||
hlcTimestamp);
|
||||
}
|
||||
|
||||
private async Task HandlePoisonEntryAsync(IDatabase database, RedisValue entryId)
|
||||
|
||||
@@ -2,6 +2,7 @@ using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Redis;
|
||||
|
||||
@@ -24,7 +25,8 @@ internal sealed class RedisSchedulerQueueLease<TMessage> : ISchedulerQueueLease<
|
||||
int attempt,
|
||||
DateTimeOffset enqueuedAt,
|
||||
DateTimeOffset leaseExpiresAt,
|
||||
string consumer)
|
||||
string consumer,
|
||||
HlcTimestamp? hlcTimestamp = null)
|
||||
{
|
||||
_queue = queue;
|
||||
MessageId = messageId;
|
||||
@@ -40,6 +42,7 @@ internal sealed class RedisSchedulerQueueLease<TMessage> : ISchedulerQueueLease<
|
||||
EnqueuedAt = enqueuedAt;
|
||||
LeaseExpiresAt = leaseExpiresAt;
|
||||
Consumer = consumer;
|
||||
HlcTimestamp = hlcTimestamp;
|
||||
}
|
||||
|
||||
public string MessageId { get; }
|
||||
@@ -68,6 +71,8 @@ internal sealed class RedisSchedulerQueueLease<TMessage> : ISchedulerQueueLease<
|
||||
|
||||
public string Consumer { get; }
|
||||
|
||||
public HlcTimestamp? HlcTimestamp { get; }
|
||||
|
||||
public Task AcknowledgeAsync(CancellationToken cancellationToken = default)
|
||||
=> _queue.AcknowledgeAsync(this, cancellationToken);
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StackExchange.Redis;
|
||||
using StellaOps.Scheduler.Models;
|
||||
|
||||
@@ -17,6 +18,7 @@ internal sealed class RedisSchedulerRunnerQueue
|
||||
SchedulerRedisQueueOptions redisOptions,
|
||||
ILogger<RedisSchedulerRunnerQueue> logger,
|
||||
TimeProvider timeProvider,
|
||||
IHybridLogicalClock? hlc = null,
|
||||
Func<ConfigurationOptions, Task<IConnectionMultiplexer>>? connectionFactory = null)
|
||||
: base(
|
||||
queueOptions,
|
||||
@@ -25,6 +27,7 @@ internal sealed class RedisSchedulerRunnerQueue
|
||||
RunnerPayload.Instance,
|
||||
logger,
|
||||
timeProvider,
|
||||
hlc,
|
||||
connectionFactory)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ using System.Collections.ObjectModel;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue;
|
||||
@@ -284,6 +285,13 @@ public interface ISchedulerQueueLease<out TMessage>
|
||||
|
||||
TMessage Message { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the Hybrid Logical Clock timestamp assigned at enqueue time.
|
||||
/// Provides deterministic ordering across distributed nodes.
|
||||
/// Null if HLC was not enabled when the message was enqueued.
|
||||
/// </summary>
|
||||
HlcTimestamp? HlcTimestamp { get; }
|
||||
|
||||
Task AcknowledgeAsync(CancellationToken cancellationToken = default);
|
||||
|
||||
Task RenewAsync(TimeSpan leaseDuration, CancellationToken cancellationToken = default);
|
||||
|
||||
@@ -13,4 +13,10 @@ internal static class SchedulerQueueFields
|
||||
public const string QueueKind = "queueKind";
|
||||
public const string CorrelationId = "correlationId";
|
||||
public const string AttributePrefix = "attr:";
|
||||
|
||||
/// <summary>
|
||||
/// Hybrid Logical Clock timestamp for deterministic ordering.
|
||||
/// Stored as sortable string format: {PhysicalTime:D13}-{NodeId}-{LogicalCounter:D6}
|
||||
/// </summary>
|
||||
public const string HlcTimestamp = "hlcTimestamp";
|
||||
}
|
||||
|
||||
@@ -35,6 +35,54 @@ public sealed class SchedulerQueueOptions
|
||||
/// Cap applied to the retry delay when exponential backoff is used.
|
||||
/// </summary>
|
||||
public TimeSpan RetryMaxBackoff { get; set; } = TimeSpan.FromMinutes(1);
|
||||
|
||||
/// <summary>
|
||||
/// HLC (Hybrid Logical Clock) ordering options.
|
||||
/// </summary>
|
||||
public SchedulerHlcOptions Hlc { get; set; } = new();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Options for HLC-based queue ordering and chain linking.
|
||||
/// </summary>
|
||||
public sealed class SchedulerHlcOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Enable HLC-based ordering with chain linking.
|
||||
/// When false, uses legacy (priority, created_at) ordering.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// When enabled, all enqueue operations will:
|
||||
/// - Assign an HLC timestamp for global ordering
|
||||
/// - Compute and store chain links for audit proofs
|
||||
/// - Persist entries to the scheduler_log table
|
||||
/// </remarks>
|
||||
public bool EnableHlcOrdering { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// When true, writes to both legacy and HLC tables during migration.
|
||||
/// This allows gradual migration from legacy ordering to HLC ordering.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Migration path:
|
||||
/// 1. Deploy with DualWriteMode = true (writes to both tables)
|
||||
/// 2. Backfill scheduler_log from existing scheduler.jobs
|
||||
/// 3. Enable EnableHlcOrdering = true for reads
|
||||
/// 4. Disable DualWriteMode, deprecate legacy ordering
|
||||
/// </remarks>
|
||||
public bool DualWriteMode { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Enable automatic chain verification on dequeue.
|
||||
/// When enabled, each dequeued batch is verified for chain integrity.
|
||||
/// </summary>
|
||||
public bool VerifyOnDequeue { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Maximum clock drift tolerance in milliseconds.
|
||||
/// HLC timestamps from messages with drift exceeding this value will be rejected.
|
||||
/// </summary>
|
||||
public int MaxClockDriftMs { get; set; } = 60000; // 1 minute default
|
||||
}
|
||||
|
||||
public sealed class SchedulerRedisQueueOptions
|
||||
|
||||
@@ -4,6 +4,7 @@ using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Queue.Nats;
|
||||
using StellaOps.Scheduler.Queue.Redis;
|
||||
|
||||
@@ -29,6 +30,7 @@ public static class SchedulerQueueServiceCollectionExtensions
|
||||
{
|
||||
var loggerFactory = sp.GetRequiredService<ILoggerFactory>();
|
||||
var timeProvider = sp.GetService<TimeProvider>() ?? TimeProvider.System;
|
||||
var hlc = sp.GetService<IHybridLogicalClock>();
|
||||
|
||||
return options.Kind switch
|
||||
{
|
||||
@@ -36,12 +38,14 @@ public static class SchedulerQueueServiceCollectionExtensions
|
||||
options,
|
||||
options.Redis,
|
||||
loggerFactory.CreateLogger<RedisSchedulerPlannerQueue>(),
|
||||
timeProvider),
|
||||
timeProvider,
|
||||
hlc),
|
||||
SchedulerQueueTransportKind.Nats => new NatsSchedulerPlannerQueue(
|
||||
options,
|
||||
options.Nats,
|
||||
loggerFactory.CreateLogger<NatsSchedulerPlannerQueue>(),
|
||||
timeProvider),
|
||||
timeProvider,
|
||||
hlc),
|
||||
_ => throw new InvalidOperationException($"Unsupported scheduler queue transport '{options.Kind}'.")
|
||||
};
|
||||
});
|
||||
@@ -50,6 +54,7 @@ public static class SchedulerQueueServiceCollectionExtensions
|
||||
{
|
||||
var loggerFactory = sp.GetRequiredService<ILoggerFactory>();
|
||||
var timeProvider = sp.GetService<TimeProvider>() ?? TimeProvider.System;
|
||||
var hlc = sp.GetService<IHybridLogicalClock>();
|
||||
|
||||
return options.Kind switch
|
||||
{
|
||||
@@ -57,12 +62,14 @@ public static class SchedulerQueueServiceCollectionExtensions
|
||||
options,
|
||||
options.Redis,
|
||||
loggerFactory.CreateLogger<RedisSchedulerRunnerQueue>(),
|
||||
timeProvider),
|
||||
timeProvider,
|
||||
hlc),
|
||||
SchedulerQueueTransportKind.Nats => new NatsSchedulerRunnerQueue(
|
||||
options,
|
||||
options.Nats,
|
||||
loggerFactory.CreateLogger<NatsSchedulerRunnerQueue>(),
|
||||
timeProvider),
|
||||
timeProvider,
|
||||
hlc),
|
||||
_ => throw new InvalidOperationException($"Unsupported scheduler queue transport '{options.Kind}'.")
|
||||
};
|
||||
});
|
||||
|
||||
@@ -18,5 +18,8 @@
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\StellaOps.Scheduler.Models\StellaOps.Scheduler.Models.csproj" />
|
||||
<ProjectReference Include="..\StellaOps.Scheduler.Persistence\StellaOps.Scheduler.Persistence.csproj" />
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.HybridLogicalClock\StellaOps.HybridLogicalClock.csproj" />
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Canonical.Json\StellaOps.Canonical.Json.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
|
||||
Reference in New Issue
Block a user