save progress

This commit is contained in:
StellaOps Bot
2026-01-06 09:42:02 +02:00
parent 94d68bee8b
commit 37e11918e0
443 changed files with 85863 additions and 897 deletions

View File

@@ -0,0 +1,235 @@
// <copyright file="BatchSnapshotDsseSigner.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using System.Globalization;
using System.Security.Cryptography;
using System.Text;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Options for batch snapshot DSSE signing.
/// </summary>
public sealed class BatchSnapshotDsseOptions
{
/// <summary>
/// Gets or sets the signing mode: "hmac" for HMAC-SHA256, "none" to disable.
/// </summary>
public string Mode { get; set; } = "none";
/// <summary>
/// Gets or sets the HMAC secret key as Base64.
/// Required when Mode is "hmac".
/// </summary>
public string? SecretBase64 { get; set; }
/// <summary>
/// Gets or sets the key identifier for the signature.
/// </summary>
public string KeyId { get; set; } = "scheduler-batch-snapshot";
/// <summary>
/// Gets or sets the payload type for DSSE envelope.
/// </summary>
public string PayloadType { get; set; } = "application/vnd.stellaops.scheduler.batch-snapshot+json";
}
/// <summary>
/// Interface for batch snapshot DSSE signing.
/// </summary>
public interface IBatchSnapshotDsseSigner
{
/// <summary>
/// Signs a batch snapshot and returns the signature result.
/// </summary>
/// <param name="digest">The digest bytes to sign.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Signature result with key ID and signature bytes.</returns>
Task<BatchSnapshotSignatureResult> SignAsync(byte[] digest, CancellationToken cancellationToken = default);
/// <summary>
/// Verifies a batch snapshot signature.
/// </summary>
/// <param name="digest">The original digest bytes.</param>
/// <param name="signature">The signature to verify.</param>
/// <param name="keyId">The key ID used for signing.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>True if signature is valid.</returns>
Task<bool> VerifyAsync(byte[] digest, byte[] signature, string keyId, CancellationToken cancellationToken = default);
/// <summary>
/// Gets whether signing is enabled.
/// </summary>
bool IsEnabled { get; }
}
/// <summary>
/// DSSE signer for batch snapshots using HMAC-SHA256.
/// </summary>
public sealed class BatchSnapshotDsseSigner : IBatchSnapshotDsseSigner
{
private readonly IOptions<BatchSnapshotDsseOptions> _options;
private readonly ILogger<BatchSnapshotDsseSigner> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="BatchSnapshotDsseSigner"/> class.
/// </summary>
/// <param name="options">Signing options.</param>
/// <param name="logger">Logger instance.</param>
public BatchSnapshotDsseSigner(
IOptions<BatchSnapshotDsseOptions> options,
ILogger<BatchSnapshotDsseSigner> logger)
{
_options = options ?? throw new ArgumentNullException(nameof(options));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc/>
public bool IsEnabled => string.Equals(_options.Value.Mode, "hmac", StringComparison.OrdinalIgnoreCase);
/// <inheritdoc/>
public Task<BatchSnapshotSignatureResult> SignAsync(byte[] digest, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(digest);
cancellationToken.ThrowIfCancellationRequested();
var opts = _options.Value;
if (!IsEnabled)
{
_logger.LogDebug("Batch snapshot DSSE signing is disabled");
return Task.FromResult(new BatchSnapshotSignatureResult(string.Empty, Array.Empty<byte>()));
}
if (string.IsNullOrWhiteSpace(opts.SecretBase64))
{
throw new InvalidOperationException("HMAC signing mode requires SecretBase64 to be configured");
}
byte[] secret;
try
{
secret = Convert.FromBase64String(opts.SecretBase64);
}
catch (FormatException ex)
{
throw new InvalidOperationException("SecretBase64 is not valid Base64", ex);
}
// Compute PAE (Pre-Authentication Encoding) for DSSE
var pae = ComputePreAuthenticationEncoding(opts.PayloadType, digest);
// Sign with HMAC-SHA256
var signature = HMACSHA256.HashData(secret, pae);
_logger.LogDebug(
"Signed batch snapshot with key {KeyId}, digest length {DigestLength}, signature length {SigLength}",
opts.KeyId, digest.Length, signature.Length);
return Task.FromResult(new BatchSnapshotSignatureResult(opts.KeyId, signature));
}
/// <inheritdoc/>
public Task<bool> VerifyAsync(byte[] digest, byte[] signature, string keyId, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(digest);
ArgumentNullException.ThrowIfNull(signature);
ArgumentNullException.ThrowIfNull(keyId);
cancellationToken.ThrowIfCancellationRequested();
var opts = _options.Value;
if (!IsEnabled)
{
_logger.LogDebug("Batch snapshot DSSE verification skipped - signing is disabled");
return Task.FromResult(true);
}
if (!string.Equals(keyId, opts.KeyId, StringComparison.Ordinal))
{
_logger.LogWarning("Key ID mismatch: expected {Expected}, got {Actual}", opts.KeyId, keyId);
return Task.FromResult(false);
}
if (string.IsNullOrWhiteSpace(opts.SecretBase64))
{
_logger.LogWarning("Cannot verify signature - SecretBase64 not configured");
return Task.FromResult(false);
}
byte[] secret;
try
{
secret = Convert.FromBase64String(opts.SecretBase64);
}
catch (FormatException)
{
_logger.LogWarning("Cannot verify signature - SecretBase64 is not valid Base64");
return Task.FromResult(false);
}
var pae = ComputePreAuthenticationEncoding(opts.PayloadType, digest);
var expected = HMACSHA256.HashData(secret, pae);
var isValid = CryptographicOperations.FixedTimeEquals(expected, signature);
_logger.LogDebug(
"Verified batch snapshot signature with key {KeyId}: {Result}",
keyId, isValid ? "valid" : "invalid");
return Task.FromResult(isValid);
}
/// <summary>
/// Computes DSSE Pre-Authentication Encoding (PAE).
/// Format: "DSSEv1" SP len(payloadType) SP payloadType SP len(payload) SP payload
/// </summary>
/// <remarks>
/// Follows DSSE v1 specification with ASCII decimal lengths and space separators.
/// </remarks>
internal static byte[] ComputePreAuthenticationEncoding(string payloadType, ReadOnlySpan<byte> payload)
{
var header = "DSSEv1"u8;
var pt = Encoding.UTF8.GetBytes(payloadType);
var lenPt = Encoding.UTF8.GetBytes(pt.Length.ToString(CultureInfo.InvariantCulture));
var lenPayload = Encoding.UTF8.GetBytes(payload.Length.ToString(CultureInfo.InvariantCulture));
var space = " "u8;
var totalLength = header.Length + space.Length + lenPt.Length + space.Length + pt.Length +
space.Length + lenPayload.Length + space.Length + payload.Length;
var buffer = new byte[totalLength];
var offset = 0;
header.CopyTo(buffer.AsSpan(offset));
offset += header.Length;
space.CopyTo(buffer.AsSpan(offset));
offset += space.Length;
lenPt.CopyTo(buffer.AsSpan(offset));
offset += lenPt.Length;
space.CopyTo(buffer.AsSpan(offset));
offset += space.Length;
pt.CopyTo(buffer.AsSpan(offset));
offset += pt.Length;
space.CopyTo(buffer.AsSpan(offset));
offset += space.Length;
lenPayload.CopyTo(buffer.AsSpan(offset));
offset += lenPayload.Length;
space.CopyTo(buffer.AsSpan(offset));
offset += space.Length;
payload.CopyTo(buffer.AsSpan(offset));
return buffer;
}
}

View File

@@ -0,0 +1,312 @@
// <copyright file="BatchSnapshotService.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using System.Security.Cryptography;
using System.Text;
using Microsoft.Extensions.Logging;
using StellaOps.Canonical.Json;
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Persistence;
using StellaOps.Scheduler.Persistence.Postgres.Models;
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Optional signing delegate for batch snapshots.
/// </summary>
/// <param name="digest">The digest to sign.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The signed result containing key ID and signature bytes.</returns>
public delegate Task<BatchSnapshotSignatureResult> BatchSnapshotSignerDelegate(
byte[] digest,
CancellationToken cancellationToken);
/// <summary>
/// Result of signing a batch snapshot.
/// </summary>
/// <param name="KeyId">The key identifier used for signing.</param>
/// <param name="Signature">The signature bytes.</param>
public readonly record struct BatchSnapshotSignatureResult(string KeyId, byte[] Signature);
/// <summary>
/// Optional verification delegate for batch snapshot DSSE signatures.
/// </summary>
/// <param name="keyId">The key identifier used for signing.</param>
/// <param name="digest">The digest that was signed.</param>
/// <param name="signature">The signature bytes to verify.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>True if the signature is valid.</returns>
public delegate Task<bool> BatchSnapshotVerifierDelegate(
string keyId,
byte[] digest,
byte[] signature,
CancellationToken cancellationToken);
/// <summary>
/// Implementation of batch snapshot service for audit anchoring.
/// </summary>
public sealed class BatchSnapshotService : IBatchSnapshotService
{
private readonly ISchedulerLogRepository _logRepository;
private readonly IBatchSnapshotRepository _snapshotRepository;
private readonly BatchSnapshotSignerDelegate? _signer;
private readonly BatchSnapshotVerifierDelegate? _verifier;
private readonly ILogger<BatchSnapshotService> _logger;
/// <summary>
/// Creates a new batch snapshot service.
/// </summary>
public BatchSnapshotService(
ISchedulerLogRepository logRepository,
IBatchSnapshotRepository snapshotRepository,
ILogger<BatchSnapshotService> logger,
BatchSnapshotSignerDelegate? signer = null,
BatchSnapshotVerifierDelegate? verifier = null)
{
_logRepository = logRepository ?? throw new ArgumentNullException(nameof(logRepository));
_snapshotRepository = snapshotRepository ?? throw new ArgumentNullException(nameof(snapshotRepository));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_signer = signer;
_verifier = verifier;
}
/// <inheritdoc />
public async Task<BatchSnapshot> CreateSnapshotAsync(
string tenantId,
HlcTimestamp startHlc,
HlcTimestamp endHlc,
bool sign = false,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
var startT = startHlc.ToSortableString();
var endT = endHlc.ToSortableString();
// Get jobs in range
var jobs = await _logRepository.GetByHlcRangeAsync(
tenantId,
startT,
endT,
limit: 0, // No limit
partitionKey: null,
cancellationToken).ConfigureAwait(false);
if (jobs.Count == 0)
{
throw new InvalidOperationException($"No jobs in specified HLC range [{startT}, {endT}] for tenant {tenantId}");
}
// Get chain head (last link in range)
var headLink = jobs[^1].Link;
// Create snapshot
var snapshot = new BatchSnapshot
{
BatchId = Guid.NewGuid(),
TenantId = tenantId,
RangeStartT = startT,
RangeEndT = endT,
HeadLink = headLink,
JobCount = jobs.Count,
CreatedAt = DateTimeOffset.UtcNow
};
// Sign if requested and signer available
if (sign)
{
if (_signer is null)
{
_logger.LogWarning("Signing requested but no signer configured. Snapshot will be unsigned.");
}
else
{
var digest = ComputeSnapshotDigest(snapshot, jobs);
var signed = await _signer(digest, cancellationToken).ConfigureAwait(false);
snapshot = snapshot with
{
SignedBy = signed.KeyId,
Signature = signed.Signature
};
}
}
// Persist
await _snapshotRepository.InsertAsync(snapshot, cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Batch snapshot created. BatchId={BatchId}, TenantId={TenantId}, Range=[{Start}, {End}], JobCount={JobCount}, Signed={Signed}",
snapshot.BatchId,
tenantId,
startT,
endT,
jobs.Count,
snapshot.SignedBy is not null);
return snapshot;
}
/// <inheritdoc />
public Task<BatchSnapshot?> GetSnapshotAsync(
Guid batchId,
CancellationToken cancellationToken = default)
{
return _snapshotRepository.GetByIdAsync(batchId, cancellationToken);
}
/// <inheritdoc />
public Task<BatchSnapshot?> GetLatestSnapshotAsync(
string tenantId,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
return _snapshotRepository.GetLatestAsync(tenantId, cancellationToken);
}
/// <inheritdoc />
public async Task<BatchSnapshotVerificationResult> VerifySnapshotAsync(
Guid batchId,
CancellationToken cancellationToken = default)
{
var issues = new List<string>();
var snapshot = await _snapshotRepository.GetByIdAsync(batchId, cancellationToken).ConfigureAwait(false);
if (snapshot is null)
{
return new BatchSnapshotVerificationResult(
IsValid: false,
SnapshotFound: false,
ChainHeadMatches: false,
JobCountMatches: false,
SignatureValid: null,
Issues: ["Snapshot not found"]);
}
// Get current jobs in the same range
var jobs = await _logRepository.GetByHlcRangeAsync(
snapshot.TenantId,
snapshot.RangeStartT,
snapshot.RangeEndT,
limit: 0,
partitionKey: null,
cancellationToken).ConfigureAwait(false);
// Verify job count
var jobCountMatches = jobs.Count == snapshot.JobCount;
if (!jobCountMatches)
{
issues.Add($"Job count mismatch: expected {snapshot.JobCount}, found {jobs.Count}");
}
// Verify chain head
var chainHeadMatches = jobs.Count > 0 && ByteArrayEquals(jobs[^1].Link, snapshot.HeadLink);
if (!chainHeadMatches)
{
issues.Add("Chain head link does not match snapshot");
}
// DSSE signature verification
bool? signatureValid = null;
if (snapshot.SignedBy is not null)
{
if (snapshot.Signature is null or { Length: 0 })
{
issues.Add("Snapshot has signer but empty signature");
signatureValid = false;
}
else if (_verifier is null)
{
// No verifier configured - check signature format only
_logger.LogDebug(
"Signature verification skipped for BatchId={BatchId}: no verifier configured",
batchId);
signatureValid = true; // Assume valid if no verifier
}
else
{
// Perform DSSE signature verification
var digest = ComputeSnapshotDigest(snapshot, jobs);
try
{
signatureValid = await _verifier(
snapshot.SignedBy,
digest,
snapshot.Signature,
cancellationToken).ConfigureAwait(false);
if (!signatureValid.Value)
{
issues.Add($"DSSE signature verification failed for key {snapshot.SignedBy}");
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Signature verification threw exception for BatchId={BatchId}", batchId);
issues.Add($"Signature verification error: {ex.Message}");
signatureValid = false;
}
}
}
var isValid = jobCountMatches && chainHeadMatches && (signatureValid ?? true);
_logger.LogDebug(
"Batch snapshot verification complete. BatchId={BatchId}, IsValid={IsValid}, Issues={Issues}",
batchId,
isValid,
issues.Count > 0 ? string.Join("; ", issues) : "none");
return new BatchSnapshotVerificationResult(
IsValid: isValid,
SnapshotFound: true,
ChainHeadMatches: chainHeadMatches,
JobCountMatches: jobCountMatches,
SignatureValid: signatureValid,
Issues: issues);
}
/// <summary>
/// Computes a deterministic digest over the snapshot and its jobs.
/// This is the canonical representation used for both signing and verification.
/// </summary>
internal static byte[] ComputeSnapshotDigest(BatchSnapshot snapshot, IReadOnlyList<SchedulerLogEntry> jobs)
{
// Create canonical representation for hashing
var digestInput = new
{
snapshot.BatchId,
snapshot.TenantId,
snapshot.RangeStartT,
snapshot.RangeEndT,
HeadLink = Convert.ToHexString(snapshot.HeadLink),
snapshot.JobCount,
Jobs = jobs.Select(j => new
{
j.JobId,
j.THlc,
PayloadHash = Convert.ToHexString(j.PayloadHash),
Link = Convert.ToHexString(j.Link)
}).ToArray()
};
var canonical = CanonJson.Serialize(digestInput);
return SHA256.HashData(Encoding.UTF8.GetBytes(canonical));
}
private static bool ByteArrayEquals(byte[]? a, byte[]? b)
{
if (a is null && b is null)
{
return true;
}
if (a is null || b is null)
{
return false;
}
return a.AsSpan().SequenceEqual(b);
}
}

View File

@@ -0,0 +1,179 @@
// <copyright file="HlcSchedulerDequeueService.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using Microsoft.Extensions.Logging;
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Persistence.Postgres.Models;
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Implementation of HLC-ordered scheduler job dequeuing.
/// </summary>
public sealed class HlcSchedulerDequeueService : IHlcSchedulerDequeueService
{
private readonly ISchedulerLogRepository _logRepository;
private readonly ILogger<HlcSchedulerDequeueService> _logger;
/// <summary>
/// Creates a new HLC scheduler dequeue service.
/// </summary>
public HlcSchedulerDequeueService(
ISchedulerLogRepository logRepository,
ILogger<HlcSchedulerDequeueService> logger)
{
_logRepository = logRepository ?? throw new ArgumentNullException(nameof(logRepository));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public async Task<SchedulerHlcDequeueResult> DequeueAsync(
string tenantId,
int limit,
string? partitionKey = null,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentOutOfRangeException.ThrowIfNegativeOrZero(limit);
var entries = await _logRepository.GetByHlcOrderAsync(
tenantId,
partitionKey,
limit,
cancellationToken).ConfigureAwait(false);
// Get total count for pagination info
var totalCount = await _logRepository.CountByHlcRangeAsync(
tenantId,
startTHlc: null,
endTHlc: null,
partitionKey,
cancellationToken).ConfigureAwait(false);
_logger.LogDebug(
"Dequeued {Count} of {Total} entries in HLC order. TenantId={TenantId}, PartitionKey={PartitionKey}",
entries.Count,
totalCount,
tenantId,
partitionKey ?? "(all)");
return new SchedulerHlcDequeueResult(
entries,
totalCount,
RangeStartHlc: null,
RangeEndHlc: null);
}
/// <inheritdoc />
public async Task<SchedulerHlcDequeueResult> DequeueByRangeAsync(
string tenantId,
HlcTimestamp? startHlc,
HlcTimestamp? endHlc,
int limit,
string? partitionKey = null,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentOutOfRangeException.ThrowIfNegativeOrZero(limit);
var startTHlc = startHlc?.ToSortableString();
var endTHlc = endHlc?.ToSortableString();
var entries = await _logRepository.GetByHlcRangeAsync(
tenantId,
startTHlc,
endTHlc,
limit,
partitionKey,
cancellationToken).ConfigureAwait(false);
var totalCount = await _logRepository.CountByHlcRangeAsync(
tenantId,
startTHlc,
endTHlc,
partitionKey,
cancellationToken).ConfigureAwait(false);
_logger.LogDebug(
"Dequeued {Count} of {Total} entries in HLC range [{Start}, {End}]. TenantId={TenantId}",
entries.Count,
totalCount,
startTHlc ?? "(unbounded)",
endTHlc ?? "(unbounded)",
tenantId);
return new SchedulerHlcDequeueResult(
entries,
totalCount,
startHlc,
endHlc);
}
/// <inheritdoc />
public async Task<SchedulerHlcDequeueResult> DequeueAfterAsync(
string tenantId,
HlcTimestamp afterHlc,
int limit,
string? partitionKey = null,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentOutOfRangeException.ThrowIfNegativeOrZero(limit);
var afterTHlc = afterHlc.ToSortableString();
var entries = await _logRepository.GetAfterHlcAsync(
tenantId,
afterTHlc,
limit,
partitionKey,
cancellationToken).ConfigureAwait(false);
// Count remaining entries after cursor
var totalCount = await _logRepository.CountByHlcRangeAsync(
tenantId,
afterTHlc,
endTHlc: null,
partitionKey,
cancellationToken).ConfigureAwait(false);
_logger.LogDebug(
"Dequeued {Count} entries after HLC {AfterHlc}. TenantId={TenantId}, PartitionKey={PartitionKey}",
entries.Count,
afterTHlc,
tenantId,
partitionKey ?? "(all)");
return new SchedulerHlcDequeueResult(
entries,
totalCount,
afterHlc,
RangeEndHlc: null);
}
/// <inheritdoc />
public async Task<SchedulerLogEntry?> GetByJobIdAsync(
string tenantId,
Guid jobId,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
var entry = await _logRepository.GetByJobIdAsync(jobId, cancellationToken).ConfigureAwait(false);
// Verify tenant isolation
if (entry is not null && !string.Equals(entry.TenantId, tenantId, StringComparison.Ordinal))
{
_logger.LogWarning(
"Job {JobId} found but belongs to different tenant. RequestedTenant={RequestedTenant}, ActualTenant={ActualTenant}",
jobId,
tenantId,
entry.TenantId);
return null;
}
return entry;
}
}

View File

@@ -0,0 +1,166 @@
// <copyright file="HlcSchedulerEnqueueService.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using System.Security.Cryptography;
using System.Text;
using Microsoft.Extensions.Logging;
using StellaOps.Canonical.Json;
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Persistence;
using StellaOps.Scheduler.Persistence.Postgres.Models;
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Implementation of HLC-ordered scheduler job enqueueing with chain linking.
/// </summary>
public sealed class HlcSchedulerEnqueueService : IHlcSchedulerEnqueueService
{
/// <summary>
/// Namespace GUID for deterministic job ID generation (v5 UUID style).
/// </summary>
private static readonly Guid JobIdNamespace = new("b8a7c6d5-e4f3-42a1-9b0c-1d2e3f4a5b6c");
private readonly IHybridLogicalClock _hlc;
private readonly ISchedulerLogRepository _logRepository;
private readonly IChainHeadRepository _chainHeadRepository;
private readonly ILogger<HlcSchedulerEnqueueService> _logger;
/// <summary>
/// Creates a new HLC scheduler enqueue service.
/// </summary>
public HlcSchedulerEnqueueService(
IHybridLogicalClock hlc,
ISchedulerLogRepository logRepository,
IChainHeadRepository chainHeadRepository,
ILogger<HlcSchedulerEnqueueService> logger)
{
_hlc = hlc ?? throw new ArgumentNullException(nameof(hlc));
_logRepository = logRepository ?? throw new ArgumentNullException(nameof(logRepository));
_chainHeadRepository = chainHeadRepository ?? throw new ArgumentNullException(nameof(chainHeadRepository));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public Task<SchedulerHlcEnqueueResult> EnqueuePlannerAsync(
string tenantId,
PlannerQueueMessage message,
string? partitionKey = null,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(message);
return EnqueueAsync(tenantId, message, message.IdempotencyKey, partitionKey, cancellationToken);
}
/// <inheritdoc />
public Task<SchedulerHlcEnqueueResult> EnqueueRunnerSegmentAsync(
string tenantId,
RunnerSegmentQueueMessage message,
string? partitionKey = null,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(message);
return EnqueueAsync(tenantId, message, message.IdempotencyKey, partitionKey, cancellationToken);
}
/// <inheritdoc />
public async Task<SchedulerHlcEnqueueResult> EnqueueAsync<T>(
string tenantId,
T payload,
string idempotencyKey,
string? partitionKey = null,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentNullException.ThrowIfNull(payload);
ArgumentException.ThrowIfNullOrWhiteSpace(idempotencyKey);
var effectivePartitionKey = partitionKey ?? string.Empty;
// 1. Generate deterministic job ID from idempotency key
var jobId = ComputeDeterministicJobId(idempotencyKey);
// 2. Check for existing entry (idempotency)
if (await _logRepository.ExistsAsync(tenantId, jobId, cancellationToken).ConfigureAwait(false))
{
var existing = await _logRepository.GetByJobIdAsync(jobId, cancellationToken).ConfigureAwait(false);
if (existing is not null)
{
_logger.LogDebug(
"Job already enqueued, returning existing entry. TenantId={TenantId}, JobId={JobId}",
tenantId,
jobId);
return new SchedulerHlcEnqueueResult(
HlcTimestamp.Parse(existing.THlc),
existing.JobId,
existing.Link,
Deduplicated: true);
}
}
// 3. Generate HLC timestamp
var tHlc = _hlc.Tick();
// 4. Compute payload hash
var payloadHash = SchedulerChainLinking.ComputePayloadHash(payload);
// 5. Get previous chain link
var prevLink = await _chainHeadRepository.GetLastLinkAsync(tenantId, effectivePartitionKey, cancellationToken)
.ConfigureAwait(false);
// 6. Compute new chain link
var link = SchedulerChainLinking.ComputeLink(prevLink, jobId, tHlc, payloadHash);
// 7. Insert log entry (atomic with chain head update)
var entry = new SchedulerLogEntry
{
TenantId = tenantId,
THlc = tHlc.ToSortableString(),
PartitionKey = effectivePartitionKey,
JobId = jobId,
PayloadHash = payloadHash,
PrevLink = prevLink,
Link = link
};
await _logRepository.InsertWithChainUpdateAsync(entry, cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Job enqueued with HLC ordering. TenantId={TenantId}, JobId={JobId}, THlc={THlc}, Link={Link}",
tenantId,
jobId,
tHlc.ToSortableString(),
SchedulerChainLinking.ToHex(link));
return new SchedulerHlcEnqueueResult(tHlc, jobId, link, Deduplicated: false);
}
/// <summary>
/// Computes a deterministic GUID from the idempotency key using SHA-256.
/// </summary>
private static Guid ComputeDeterministicJobId(string idempotencyKey)
{
// Use namespace + key pattern similar to UUID v5
var namespaceBytes = JobIdNamespace.ToByteArray();
var keyBytes = Encoding.UTF8.GetBytes(idempotencyKey);
var combined = new byte[namespaceBytes.Length + keyBytes.Length];
Buffer.BlockCopy(namespaceBytes, 0, combined, 0, namespaceBytes.Length);
Buffer.BlockCopy(keyBytes, 0, combined, namespaceBytes.Length, keyBytes.Length);
var hash = SHA256.HashData(combined);
// Take first 16 bytes for GUID
var guidBytes = new byte[16];
Buffer.BlockCopy(hash, 0, guidBytes, 0, 16);
// Set version (4) and variant bits for RFC 4122 compliance
guidBytes[6] = (byte)((guidBytes[6] & 0x0F) | 0x40); // Version 4
guidBytes[8] = (byte)((guidBytes[8] & 0x3F) | 0x80); // Variant 1
return new Guid(guidBytes);
}
}

View File

@@ -0,0 +1,178 @@
// <copyright file="HlcSchedulerMetrics.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using System.Diagnostics.Metrics;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Metrics for HLC-ordered scheduler operations.
/// </summary>
public static class HlcSchedulerMetrics
{
private const string TenantTagName = "tenant";
private const string PartitionTagName = "partition";
private const string ResultTagName = "result";
private static readonly Meter Meter = new("StellaOps.Scheduler.Hlc");
// Enqueue metrics
private static readonly Counter<long> EnqueuedCounter = Meter.CreateCounter<long>(
"scheduler_hlc_enqueues_total",
unit: "{enqueue}",
description: "Total number of HLC-ordered enqueue operations");
private static readonly Counter<long> EnqueueDeduplicatedCounter = Meter.CreateCounter<long>(
"scheduler_hlc_enqueue_deduplicated_total",
unit: "{enqueue}",
description: "Total number of deduplicated HLC enqueue operations");
private static readonly Histogram<double> EnqueueDurationHistogram = Meter.CreateHistogram<double>(
"scheduler_hlc_enqueue_duration_seconds",
unit: "s",
description: "Duration of HLC enqueue operations");
// Dequeue metrics
private static readonly Counter<long> DequeuedCounter = Meter.CreateCounter<long>(
"scheduler_hlc_dequeues_total",
unit: "{dequeue}",
description: "Total number of HLC-ordered dequeue operations");
private static readonly Counter<long> DequeuedEntriesCounter = Meter.CreateCounter<long>(
"scheduler_hlc_dequeued_entries_total",
unit: "{entry}",
description: "Total number of entries dequeued via HLC ordering");
// Chain verification metrics
private static readonly Counter<long> ChainVerificationsCounter = Meter.CreateCounter<long>(
"scheduler_chain_verifications_total",
unit: "{verification}",
description: "Total number of chain verification operations");
private static readonly Counter<long> ChainVerificationIssuesCounter = Meter.CreateCounter<long>(
"scheduler_chain_verification_issues_total",
unit: "{issue}",
description: "Total number of chain verification issues found");
private static readonly Counter<long> ChainEntriesVerifiedCounter = Meter.CreateCounter<long>(
"scheduler_chain_entries_verified_total",
unit: "{entry}",
description: "Total number of chain entries verified");
// Batch snapshot metrics
private static readonly Counter<long> SnapshotsCreatedCounter = Meter.CreateCounter<long>(
"scheduler_batch_snapshots_created_total",
unit: "{snapshot}",
description: "Total number of batch snapshots created");
private static readonly Counter<long> SnapshotsSignedCounter = Meter.CreateCounter<long>(
"scheduler_batch_snapshots_signed_total",
unit: "{snapshot}",
description: "Total number of signed batch snapshots");
private static readonly Counter<long> SnapshotVerificationsCounter = Meter.CreateCounter<long>(
"scheduler_batch_snapshot_verifications_total",
unit: "{verification}",
description: "Total number of batch snapshot verification operations");
/// <summary>
/// Records an HLC enqueue operation.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="partitionKey">Partition key (empty string if none).</param>
/// <param name="deduplicated">Whether the operation was deduplicated.</param>
public static void RecordEnqueue(string tenantId, string partitionKey, bool deduplicated)
{
var tags = BuildTags(tenantId, partitionKey);
EnqueuedCounter.Add(1, tags);
if (deduplicated)
{
EnqueueDeduplicatedCounter.Add(1, tags);
}
}
/// <summary>
/// Records the duration of an HLC enqueue operation.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="partitionKey">Partition key.</param>
/// <param name="durationSeconds">Duration in seconds.</param>
public static void RecordEnqueueDuration(string tenantId, string partitionKey, double durationSeconds)
{
EnqueueDurationHistogram.Record(durationSeconds, BuildTags(tenantId, partitionKey));
}
/// <summary>
/// Records an HLC dequeue operation.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="partitionKey">Partition key.</param>
/// <param name="entryCount">Number of entries dequeued.</param>
public static void RecordDequeue(string tenantId, string partitionKey, int entryCount)
{
var tags = BuildTags(tenantId, partitionKey);
DequeuedCounter.Add(1, tags);
DequeuedEntriesCounter.Add(entryCount, tags);
}
/// <summary>
/// Records a chain verification operation.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="entriesVerified">Number of entries verified.</param>
/// <param name="issuesFound">Number of issues found.</param>
/// <param name="isValid">Whether the chain is valid.</param>
public static void RecordChainVerification(string tenantId, int entriesVerified, int issuesFound, bool isValid)
{
var resultTag = new KeyValuePair<string, object?>(ResultTagName, isValid ? "valid" : "invalid");
var tenantTag = new KeyValuePair<string, object?>(TenantTagName, tenantId);
ChainVerificationsCounter.Add(1, tenantTag, resultTag);
ChainEntriesVerifiedCounter.Add(entriesVerified, tenantTag);
if (issuesFound > 0)
{
ChainVerificationIssuesCounter.Add(issuesFound, tenantTag);
}
}
/// <summary>
/// Records a batch snapshot creation.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="jobCount">Number of jobs in the snapshot.</param>
/// <param name="signed">Whether the snapshot was signed.</param>
public static void RecordSnapshotCreated(string tenantId, int jobCount, bool signed)
{
var tenantTag = new KeyValuePair<string, object?>(TenantTagName, tenantId);
SnapshotsCreatedCounter.Add(1, tenantTag);
if (signed)
{
SnapshotsSignedCounter.Add(1, tenantTag);
}
}
/// <summary>
/// Records a batch snapshot verification.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="isValid">Whether the snapshot is valid.</param>
public static void RecordSnapshotVerification(string tenantId, bool isValid)
{
var tags = new[]
{
new KeyValuePair<string, object?>(TenantTagName, tenantId),
new KeyValuePair<string, object?>(ResultTagName, isValid ? "valid" : "invalid")
};
SnapshotVerificationsCounter.Add(1, tags);
}
private static KeyValuePair<string, object?>[] BuildTags(string tenantId, string partitionKey)
=> new[]
{
new KeyValuePair<string, object?>(TenantTagName, tenantId),
new KeyValuePair<string, object?>(PartitionTagName, string.IsNullOrEmpty(partitionKey) ? "(default)" : partitionKey)
};
}

View File

@@ -0,0 +1,103 @@
// <copyright file="HlcSchedulerServiceCollectionExtensions.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Extension methods for registering HLC scheduler services.
/// </summary>
public static class HlcSchedulerServiceCollectionExtensions
{
/// <summary>
/// Adds HLC-ordered scheduler services to the service collection.
/// </summary>
/// <param name="services">The service collection.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddHlcSchedulerServices(this IServiceCollection services)
{
// Repositories (scoped for per-request database connections)
services.TryAddScoped<ISchedulerLogRepository, PostgresSchedulerLogRepository>();
services.TryAddScoped<IChainHeadRepository, PostgresChainHeadRepository>();
services.TryAddScoped<IBatchSnapshotRepository, PostgresBatchSnapshotRepository>();
// Services (scoped to align with repository lifetime)
services.TryAddScoped<IHlcSchedulerEnqueueService, HlcSchedulerEnqueueService>();
services.TryAddScoped<IHlcSchedulerDequeueService, HlcSchedulerDequeueService>();
services.TryAddScoped<IBatchSnapshotService, BatchSnapshotService>();
services.TryAddScoped<ISchedulerChainVerifier, SchedulerChainVerifier>();
// DSSE signer (disabled by default)
services.TryAddSingleton<IBatchSnapshotDsseSigner, BatchSnapshotDsseSigner>();
return services;
}
/// <summary>
/// Adds HLC-ordered scheduler services with DSSE signing support.
/// </summary>
/// <param name="services">The service collection.</param>
/// <param name="configuration">Configuration section for DSSE options.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddHlcSchedulerServicesWithDsseSigning(
this IServiceCollection services,
IConfiguration configuration)
{
// Configure DSSE options
services.AddOptions<BatchSnapshotDsseOptions>()
.Bind(configuration.GetSection("Scheduler:Queue:Hlc:DsseSigning"))
.ValidateDataAnnotations()
.ValidateOnStart();
// Add base services
services.AddHlcSchedulerServices();
// Wire up DSSE signer to BatchSnapshotService
services.AddScoped<IBatchSnapshotService>(sp =>
{
var logRepository = sp.GetRequiredService<ISchedulerLogRepository>();
var snapshotRepository = sp.GetRequiredService<IBatchSnapshotRepository>();
var logger = sp.GetRequiredService<Microsoft.Extensions.Logging.ILogger<BatchSnapshotService>>();
var dsseSigner = sp.GetRequiredService<IBatchSnapshotDsseSigner>();
BatchSnapshotSignerDelegate? signer = dsseSigner.IsEnabled
? dsseSigner.SignAsync
: null;
return new BatchSnapshotService(logRepository, snapshotRepository, logger, signer);
});
return services;
}
/// <summary>
/// Adds HLC-ordered scheduler services with a custom signer delegate.
/// </summary>
/// <param name="services">The service collection.</param>
/// <param name="signerFactory">Factory to create the signer delegate.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddHlcSchedulerServices(
this IServiceCollection services,
Func<IServiceProvider, BatchSnapshotSignerDelegate> signerFactory)
{
services.AddHlcSchedulerServices();
// Override BatchSnapshotService registration to include signer
services.AddScoped<IBatchSnapshotService>(sp =>
{
var logRepository = sp.GetRequiredService<ISchedulerLogRepository>();
var snapshotRepository = sp.GetRequiredService<IBatchSnapshotRepository>();
var logger = sp.GetRequiredService<Microsoft.Extensions.Logging.ILogger<BatchSnapshotService>>();
var signer = signerFactory(sp);
return new BatchSnapshotService(logRepository, snapshotRepository, logger, signer);
});
return services;
}
}

View File

@@ -0,0 +1,82 @@
// <copyright file="IBatchSnapshotService.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Service for creating and managing batch snapshots of the scheduler chain.
/// </summary>
/// <remarks>
/// Batch snapshots provide audit anchors for the scheduler chain, capturing
/// the chain head at specific HLC ranges. These can be optionally signed
/// with DSSE for attestation purposes.
/// </remarks>
public interface IBatchSnapshotService
{
/// <summary>
/// Creates a batch snapshot for a given HLC range.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="startHlc">Start of the HLC range (inclusive).</param>
/// <param name="endHlc">End of the HLC range (inclusive).</param>
/// <param name="sign">Whether to sign the snapshot with DSSE.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The created batch snapshot.</returns>
Task<BatchSnapshot> CreateSnapshotAsync(
string tenantId,
HlcTimestamp startHlc,
HlcTimestamp endHlc,
bool sign = false,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets a batch snapshot by ID.
/// </summary>
/// <param name="batchId">The batch identifier.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The snapshot if found.</returns>
Task<BatchSnapshot?> GetSnapshotAsync(
Guid batchId,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the most recent batch snapshot for a tenant.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The most recent snapshot if found.</returns>
Task<BatchSnapshot?> GetLatestSnapshotAsync(
string tenantId,
CancellationToken cancellationToken = default);
/// <summary>
/// Verifies a batch snapshot against the current chain state.
/// </summary>
/// <param name="batchId">The batch identifier to verify.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Verification result.</returns>
Task<BatchSnapshotVerificationResult> VerifySnapshotAsync(
Guid batchId,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Result of batch snapshot verification.
/// </summary>
/// <param name="IsValid">Whether the snapshot is valid.</param>
/// <param name="SnapshotFound">Whether the snapshot was found.</param>
/// <param name="ChainHeadMatches">Whether the chain head matches the snapshot.</param>
/// <param name="JobCountMatches">Whether the job count matches.</param>
/// <param name="SignatureValid">Whether the DSSE signature is valid (null if unsigned).</param>
/// <param name="Issues">List of verification issues if invalid.</param>
public readonly record struct BatchSnapshotVerificationResult(
bool IsValid,
bool SnapshotFound,
bool ChainHeadMatches,
bool JobCountMatches,
bool? SignatureValid,
IReadOnlyList<string> Issues);

View File

@@ -0,0 +1,77 @@
// <copyright file="IHlcSchedulerDequeueService.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using StellaOps.HybridLogicalClock;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Service for HLC-ordered scheduler job dequeuing.
/// </summary>
/// <remarks>
/// This service provides deterministic, HLC-ordered retrieval of scheduler log entries
/// for processing. The HLC ordering guarantees causal consistency across distributed nodes.
/// </remarks>
public interface IHlcSchedulerDequeueService
{
/// <summary>
/// Dequeues scheduler log entries in HLC order.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="limit">Maximum number of entries to return.</param>
/// <param name="partitionKey">Optional partition key to filter by.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The dequeue result with entries in HLC order.</returns>
Task<SchedulerHlcDequeueResult> DequeueAsync(
string tenantId,
int limit,
string? partitionKey = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Dequeues scheduler log entries within an HLC time range.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="startHlc">HLC range start (inclusive, null for unbounded).</param>
/// <param name="endHlc">HLC range end (inclusive, null for unbounded).</param>
/// <param name="limit">Maximum number of entries to return.</param>
/// <param name="partitionKey">Optional partition key to filter by.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The dequeue result with entries in HLC order.</returns>
Task<SchedulerHlcDequeueResult> DequeueByRangeAsync(
string tenantId,
HlcTimestamp? startHlc,
HlcTimestamp? endHlc,
int limit,
string? partitionKey = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Dequeues scheduler log entries after a specific HLC timestamp (cursor-based).
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="afterHlc">HLC timestamp to start after (exclusive).</param>
/// <param name="limit">Maximum number of entries to return.</param>
/// <param name="partitionKey">Optional partition key to filter by.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The dequeue result with entries in HLC order.</returns>
Task<SchedulerHlcDequeueResult> DequeueAfterAsync(
string tenantId,
HlcTimestamp afterHlc,
int limit,
string? partitionKey = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets a single scheduler log entry by job ID.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="jobId">The job identifier.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The scheduler log entry if found, null otherwise.</returns>
Task<Persistence.Postgres.Models.SchedulerLogEntry?> GetByJobIdAsync(
string tenantId,
Guid jobId,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,64 @@
// <copyright file="IHlcSchedulerEnqueueService.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Service for HLC-ordered scheduler job enqueueing with chain linking.
/// </summary>
/// <remarks>
/// This service wraps job enqueueing with:
/// <list type="bullet">
/// <item><description>HLC timestamp assignment for global ordering</description></item>
/// <item><description>Chain link computation for audit proofs</description></item>
/// <item><description>Persistence to scheduler_log for replay</description></item>
/// </list>
/// </remarks>
public interface IHlcSchedulerEnqueueService
{
/// <summary>
/// Enqueues a planner message with HLC ordering and chain linking.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="message">The planner queue message.</param>
/// <param name="partitionKey">Optional partition key for chain separation.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The enqueue result with HLC timestamp and chain link.</returns>
Task<SchedulerHlcEnqueueResult> EnqueuePlannerAsync(
string tenantId,
PlannerQueueMessage message,
string? partitionKey = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Enqueues a runner segment message with HLC ordering and chain linking.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="message">The runner segment queue message.</param>
/// <param name="partitionKey">Optional partition key for chain separation.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The enqueue result with HLC timestamp and chain link.</returns>
Task<SchedulerHlcEnqueueResult> EnqueueRunnerSegmentAsync(
string tenantId,
RunnerSegmentQueueMessage message,
string? partitionKey = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Enqueues a generic payload with HLC ordering and chain linking.
/// </summary>
/// <typeparam name="T">Payload type.</typeparam>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="payload">The payload to enqueue.</param>
/// <param name="idempotencyKey">Key for deduplication.</param>
/// <param name="partitionKey">Optional partition key for chain separation.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The enqueue result with HLC timestamp and chain link.</returns>
Task<SchedulerHlcEnqueueResult> EnqueueAsync<T>(
string tenantId,
T payload,
string idempotencyKey,
string? partitionKey = null,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,292 @@
// <copyright file="SchedulerChainVerifier.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using Microsoft.Extensions.Logging;
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Persistence;
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Service for verifying the integrity of the scheduler chain.
/// </summary>
public interface ISchedulerChainVerifier
{
/// <summary>
/// Verifies the integrity of the scheduler chain within an HLC range.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="startHlc">Start of the HLC range (inclusive, null for unbounded).</param>
/// <param name="endHlc">End of the HLC range (inclusive, null for unbounded).</param>
/// <param name="partitionKey">Optional partition key to verify (null for all partitions).</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Verification result.</returns>
Task<ChainVerificationResult> VerifyAsync(
string tenantId,
HlcTimestamp? startHlc = null,
HlcTimestamp? endHlc = null,
string? partitionKey = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Verifies a single chain link.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="jobId">The job identifier to verify.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Verification result for the single entry.</returns>
Task<ChainVerificationResult> VerifyEntryAsync(
string tenantId,
Guid jobId,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Result of chain verification.
/// </summary>
/// <param name="IsValid">Whether the chain is valid.</param>
/// <param name="EntriesChecked">Number of entries checked.</param>
/// <param name="Issues">List of verification issues found.</param>
public readonly record struct ChainVerificationResult(
bool IsValid,
int EntriesChecked,
IReadOnlyList<ChainVerificationIssue> Issues);
/// <summary>
/// A specific issue found during chain verification.
/// </summary>
/// <param name="JobId">The job ID where the issue was found.</param>
/// <param name="THlc">The HLC timestamp of the problematic entry.</param>
/// <param name="IssueType">Type of issue found.</param>
/// <param name="Description">Human-readable description of the issue.</param>
public readonly record struct ChainVerificationIssue(
Guid JobId,
string THlc,
string IssueType,
string Description);
/// <summary>
/// Implementation of scheduler chain verification.
/// </summary>
public sealed class SchedulerChainVerifier : ISchedulerChainVerifier
{
private readonly ISchedulerLogRepository _logRepository;
private readonly ILogger<SchedulerChainVerifier> _logger;
/// <summary>
/// Creates a new chain verifier.
/// </summary>
public SchedulerChainVerifier(
ISchedulerLogRepository logRepository,
ILogger<SchedulerChainVerifier> logger)
{
_logRepository = logRepository ?? throw new ArgumentNullException(nameof(logRepository));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public async Task<ChainVerificationResult> VerifyAsync(
string tenantId,
HlcTimestamp? startHlc = null,
HlcTimestamp? endHlc = null,
string? partitionKey = null,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
var startT = startHlc?.ToSortableString();
var endT = endHlc?.ToSortableString();
var entries = await _logRepository.GetByHlcRangeAsync(
tenantId,
startT,
endT,
limit: 0, // No limit
partitionKey,
cancellationToken).ConfigureAwait(false);
if (entries.Count == 0)
{
_logger.LogDebug(
"No entries to verify in range [{Start}, {End}] for tenant {TenantId}",
startT ?? "(unbounded)",
endT ?? "(unbounded)",
tenantId);
return new ChainVerificationResult(IsValid: true, EntriesChecked: 0, Issues: []);
}
var issues = new List<ChainVerificationIssue>();
byte[]? expectedPrevLink = null;
// If starting mid-chain, we need to get the previous entry's link
if (startHlc is not null)
{
var previousEntries = await _logRepository.GetByHlcRangeAsync(
tenantId,
startTHlc: null,
startT,
limit: 1,
partitionKey,
cancellationToken).ConfigureAwait(false);
if (previousEntries.Count > 0 && previousEntries[0].THlc != startT)
{
expectedPrevLink = previousEntries[0].Link;
}
}
foreach (var entry in entries)
{
// Verify prev_link matches expected
if (!ByteArrayEquals(entry.PrevLink, expectedPrevLink))
{
issues.Add(new ChainVerificationIssue(
entry.JobId,
entry.THlc,
"PrevLinkMismatch",
$"Expected {ToHex(expectedPrevLink)}, got {ToHex(entry.PrevLink)}"));
}
// Recompute link and verify
var computed = SchedulerChainLinking.ComputeLink(
entry.PrevLink,
entry.JobId,
HlcTimestamp.Parse(entry.THlc),
entry.PayloadHash);
if (!ByteArrayEquals(entry.Link, computed))
{
issues.Add(new ChainVerificationIssue(
entry.JobId,
entry.THlc,
"LinkMismatch",
$"Stored link doesn't match computed. Stored={ToHex(entry.Link)}, Computed={ToHex(computed)}"));
}
expectedPrevLink = entry.Link;
}
var isValid = issues.Count == 0;
_logger.LogInformation(
"Chain verification complete. TenantId={TenantId}, Range=[{Start}, {End}], EntriesChecked={Count}, IsValid={IsValid}, IssueCount={IssueCount}",
tenantId,
startT ?? "(unbounded)",
endT ?? "(unbounded)",
entries.Count,
isValid,
issues.Count);
return new ChainVerificationResult(isValid, entries.Count, issues);
}
/// <inheritdoc />
public async Task<ChainVerificationResult> VerifyEntryAsync(
string tenantId,
Guid jobId,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
var entry = await _logRepository.GetByJobIdAsync(jobId, cancellationToken).ConfigureAwait(false);
if (entry is null)
{
return new ChainVerificationResult(
IsValid: false,
EntriesChecked: 0,
Issues: [new ChainVerificationIssue(jobId, string.Empty, "NotFound", "Entry not found")]);
}
// Verify tenant isolation
if (!string.Equals(entry.TenantId, tenantId, StringComparison.Ordinal))
{
return new ChainVerificationResult(
IsValid: false,
EntriesChecked: 0,
Issues: [new ChainVerificationIssue(jobId, entry.THlc, "TenantMismatch", "Entry belongs to different tenant")]);
}
var issues = new List<ChainVerificationIssue>();
// Recompute link and verify
var computed = SchedulerChainLinking.ComputeLink(
entry.PrevLink,
entry.JobId,
HlcTimestamp.Parse(entry.THlc),
entry.PayloadHash);
if (!ByteArrayEquals(entry.Link, computed))
{
issues.Add(new ChainVerificationIssue(
entry.JobId,
entry.THlc,
"LinkMismatch",
$"Stored link doesn't match computed"));
}
// If there's a prev_link, verify it exists and matches
if (entry.PrevLink is { Length: > 0 })
{
// Find the previous entry
var allEntries = await _logRepository.GetByHlcRangeAsync(
tenantId,
startTHlc: null,
entry.THlc,
limit: 0,
partitionKey: entry.PartitionKey,
cancellationToken).ConfigureAwait(false);
var prevEntry = allEntries
.Where(e => e.THlc != entry.THlc)
.OrderByDescending(e => e.THlc)
.FirstOrDefault();
if (prevEntry is null)
{
issues.Add(new ChainVerificationIssue(
entry.JobId,
entry.THlc,
"PrevEntryNotFound",
"Entry has prev_link but no previous entry found"));
}
else if (!ByteArrayEquals(prevEntry.Link, entry.PrevLink))
{
issues.Add(new ChainVerificationIssue(
entry.JobId,
entry.THlc,
"PrevLinkMismatch",
$"prev_link doesn't match previous entry's link"));
}
}
return new ChainVerificationResult(issues.Count == 0, 1, issues);
}
private static bool ByteArrayEquals(byte[]? a, byte[]? b)
{
if (a is null && b is null)
{
return true;
}
if (a is null || b is null)
{
return false;
}
if (a.Length == 0 && b.Length == 0)
{
return true;
}
return a.AsSpan().SequenceEqual(b);
}
private static string ToHex(byte[]? bytes)
{
return bytes is null ? "(null)" : Convert.ToHexString(bytes);
}
}

View File

@@ -0,0 +1,21 @@
// <copyright file="SchedulerDequeueResult.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Result of an HLC-ordered scheduler dequeue operation.
/// </summary>
/// <param name="Entries">The dequeued scheduler log entries in HLC order.</param>
/// <param name="TotalAvailable">Total count of entries available in the specified range.</param>
/// <param name="RangeStartHlc">The HLC start of the queried range (null if unbounded).</param>
/// <param name="RangeEndHlc">The HLC end of the queried range (null if unbounded).</param>
public readonly record struct SchedulerHlcDequeueResult(
IReadOnlyList<SchedulerLogEntry> Entries,
int TotalAvailable,
HlcTimestamp? RangeStartHlc,
HlcTimestamp? RangeEndHlc);

View File

@@ -0,0 +1,20 @@
// <copyright file="SchedulerEnqueueResult.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using StellaOps.HybridLogicalClock;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Result of an HLC-ordered scheduler enqueue operation.
/// </summary>
/// <param name="THlc">The HLC timestamp assigned to the job.</param>
/// <param name="JobId">The deterministic job identifier.</param>
/// <param name="Link">The chain link computed for this entry.</param>
/// <param name="Deduplicated">True if the job was already enqueued (idempotent).</param>
public readonly record struct SchedulerHlcEnqueueResult(
HlcTimestamp THlc,
Guid JobId,
byte[] Link,
bool Deduplicated);

View File

@@ -6,6 +6,7 @@ using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using NATS.Client.Core;
using NATS.Client.JetStream;
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Queue.Nats;
@@ -18,6 +19,7 @@ internal sealed class NatsSchedulerPlannerQueue
SchedulerNatsQueueOptions natsOptions,
ILogger<NatsSchedulerPlannerQueue> logger,
TimeProvider timeProvider,
IHybridLogicalClock? hlc = null,
Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>>? connectionFactory = null)
: base(
queueOptions,
@@ -26,6 +28,7 @@ internal sealed class NatsSchedulerPlannerQueue
PlannerPayload.Instance,
logger,
timeProvider,
hlc,
connectionFactory)
{
}

View File

@@ -9,6 +9,7 @@ using Microsoft.Extensions.Logging;
using NATS.Client.Core;
using NATS.Client.JetStream;
using NATS.Client.JetStream.Models;
using StellaOps.HybridLogicalClock;
namespace StellaOps.Scheduler.Queue.Nats;
@@ -24,6 +25,7 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
private readonly INatsSchedulerQueuePayload<TMessage> _payload;
private readonly ILogger _logger;
private readonly TimeProvider _timeProvider;
private readonly IHybridLogicalClock? _hlc;
private readonly SemaphoreSlim _connectionGate = new(1, 1);
private readonly Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>> _connectionFactory;
@@ -40,6 +42,7 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
INatsSchedulerQueuePayload<TMessage> payload,
ILogger logger,
TimeProvider timeProvider,
IHybridLogicalClock? hlc = null,
Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>>? connectionFactory = null)
{
_queueOptions = queueOptions ?? throw new ArgumentNullException(nameof(queueOptions));
@@ -48,6 +51,7 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
_payload = payload ?? throw new ArgumentNullException(nameof(payload));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? TimeProvider.System;
_hlc = hlc;
_connectionFactory = connectionFactory ?? ((opts, cancellationToken) => new ValueTask<NatsConnection>(new NatsConnection(opts)));
if (string.IsNullOrWhiteSpace(_natsOptions.Url))
@@ -67,7 +71,11 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
var payloadBytes = _payload.Serialize(message);
var idempotencyKey = _payload.GetIdempotencyKey(message);
var headers = BuildHeaders(message, idempotencyKey);
// Generate HLC timestamp if clock is available
var hlcTimestamp = _hlc?.Tick();
var headers = BuildHeaders(message, idempotencyKey, hlcTimestamp);
var publishOptions = new NatsJSPubOpts
{
@@ -531,6 +539,14 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
? DateTimeOffset.FromUnixTimeMilliseconds(unix)
: now;
// Parse HLC timestamp if present
HlcTimestamp? hlcTimestamp = null;
if (headers.TryGetValue(SchedulerQueueFields.HlcTimestamp, out var hlcValues) && hlcValues.Count > 0
&& HlcTimestamp.TryParse(hlcValues[0], out var parsedHlc))
{
hlcTimestamp = parsedHlc;
}
var leaseExpires = now.Add(leaseDuration);
var runId = _payload.GetRunId(deserialized);
var tenantId = _payload.GetTenantId(deserialized);
@@ -558,10 +574,11 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
attempt,
enqueuedAt,
leaseExpires,
consumer);
consumer,
hlcTimestamp);
}
private NatsHeaders BuildHeaders(TMessage message, string idempotencyKey)
private NatsHeaders BuildHeaders(TMessage message, string idempotencyKey, HlcTimestamp? hlcTimestamp = null)
{
var headers = new NatsHeaders
{
@@ -572,6 +589,12 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
{ SchedulerQueueFields.EnqueuedAt, _timeProvider.GetUtcNow().ToUnixTimeMilliseconds().ToString() }
};
// Include HLC timestamp if available
if (hlcTimestamp.HasValue)
{
headers.Add(SchedulerQueueFields.HlcTimestamp, hlcTimestamp.Value.ToSortableString());
}
var scheduleId = _payload.GetScheduleId(message);
if (!string.IsNullOrWhiteSpace(scheduleId))
{

View File

@@ -3,6 +3,7 @@ using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using NATS.Client.JetStream;
using StellaOps.HybridLogicalClock;
namespace StellaOps.Scheduler.Queue.Nats;
@@ -26,7 +27,8 @@ internal sealed class NatsSchedulerQueueLease<TMessage> : ISchedulerQueueLease<T
int attempt,
DateTimeOffset enqueuedAt,
DateTimeOffset leaseExpiresAt,
string consumer)
string consumer,
HlcTimestamp? hlcTimestamp = null)
{
_queue = queue;
MessageId = message.Metadata?.Sequence.ToString() ?? idempotencyKey;
@@ -44,6 +46,7 @@ internal sealed class NatsSchedulerQueueLease<TMessage> : ISchedulerQueueLease<T
Message = deserialized;
_message = message;
Payload = payload;
HlcTimestamp = hlcTimestamp;
}
private readonly NatsJSMsg<byte[]> _message;
@@ -78,6 +81,8 @@ internal sealed class NatsSchedulerQueueLease<TMessage> : ISchedulerQueueLease<T
public string Consumer { get; }
public HlcTimestamp? HlcTimestamp { get; }
public Task AcknowledgeAsync(CancellationToken cancellationToken = default)
=> _queue.AcknowledgeAsync(this, cancellationToken);

View File

@@ -7,6 +7,7 @@ using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using NATS.Client.Core;
using NATS.Client.JetStream;
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Queue.Nats;
@@ -19,6 +20,7 @@ internal sealed class NatsSchedulerRunnerQueue
SchedulerNatsQueueOptions natsOptions,
ILogger<NatsSchedulerRunnerQueue> logger,
TimeProvider timeProvider,
IHybridLogicalClock? hlc = null,
Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>>? connectionFactory = null)
: base(
queueOptions,
@@ -27,6 +29,7 @@ internal sealed class NatsSchedulerRunnerQueue
RunnerPayload.Instance,
logger,
timeProvider,
hlc,
connectionFactory)
{
}

View File

@@ -3,6 +3,7 @@ using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using StellaOps.HybridLogicalClock;
using StackExchange.Redis;
using StellaOps.Scheduler.Models;
@@ -16,6 +17,7 @@ internal sealed class RedisSchedulerPlannerQueue
SchedulerRedisQueueOptions redisOptions,
ILogger<RedisSchedulerPlannerQueue> logger,
TimeProvider timeProvider,
IHybridLogicalClock? hlc = null,
Func<ConfigurationOptions, Task<IConnectionMultiplexer>>? connectionFactory = null)
: base(
queueOptions,
@@ -24,6 +26,7 @@ internal sealed class RedisSchedulerPlannerQueue
PlannerPayload.Instance,
logger,
timeProvider,
hlc,
connectionFactory)
{
}

View File

@@ -6,6 +6,7 @@ using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using StellaOps.HybridLogicalClock;
using StackExchange.Redis;
namespace StellaOps.Scheduler.Queue.Redis;
@@ -20,6 +21,7 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
private readonly IRedisSchedulerQueuePayload<TMessage> _payload;
private readonly ILogger _logger;
private readonly TimeProvider _timeProvider;
private readonly IHybridLogicalClock? _hlc;
private readonly Func<ConfigurationOptions, Task<IConnectionMultiplexer>> _connectionFactory;
private readonly SemaphoreSlim _connectionLock = new(1, 1);
private readonly SemaphoreSlim _groupInitLock = new(1, 1);
@@ -36,6 +38,7 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
IRedisSchedulerQueuePayload<TMessage> payload,
ILogger logger,
TimeProvider timeProvider,
IHybridLogicalClock? hlc = null,
Func<ConfigurationOptions, Task<IConnectionMultiplexer>>? connectionFactory = null)
{
_queueOptions = queueOptions ?? throw new ArgumentNullException(nameof(queueOptions));
@@ -44,6 +47,7 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
_payload = payload ?? throw new ArgumentNullException(nameof(payload));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_hlc = hlc;
_connectionFactory = connectionFactory ?? (config => Task.FromResult<IConnectionMultiplexer>(ConnectionMultiplexer.Connect(config)));
if (string.IsNullOrWhiteSpace(_redisOptions.ConnectionString))
@@ -74,7 +78,11 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
var now = _timeProvider.GetUtcNow();
var attempt = 1;
var entries = BuildEntries(message, now, attempt);
// Generate HLC timestamp if clock is available
var hlcTimestamp = _hlc?.Tick();
var entries = BuildEntries(message, now, attempt, hlcTimestamp);
var messageId = await AddToStreamAsync(
database,
@@ -555,11 +563,12 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
private NameValueEntry[] BuildEntries(
TMessage message,
DateTimeOffset enqueuedAt,
int attempt)
int attempt,
HlcTimestamp? hlcTimestamp = null)
{
var attributes = _payload.GetAttributes(message);
var attributeCount = attributes?.Count ?? 0;
var entries = ArrayPool<NameValueEntry>.Shared.Rent(10 + attributeCount);
var entries = ArrayPool<NameValueEntry>.Shared.Rent(11 + attributeCount);
var index = 0;
entries[index++] = new NameValueEntry(SchedulerQueueFields.QueueKind, _payload.QueueName);
@@ -589,6 +598,12 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
entries[index++] = new NameValueEntry(SchedulerQueueFields.EnqueuedAt, enqueuedAt.ToUnixTimeMilliseconds());
entries[index++] = new NameValueEntry(SchedulerQueueFields.Payload, _payload.Serialize(message));
// Include HLC timestamp if available
if (hlcTimestamp.HasValue)
{
entries[index++] = new NameValueEntry(SchedulerQueueFields.HlcTimestamp, hlcTimestamp.Value.ToSortableString());
}
if (attributeCount > 0 && attributes is not null)
{
foreach (var kvp in attributes)
@@ -623,6 +638,7 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
string? segmentId = null;
string? correlationId = null;
string? idempotencyKey = null;
string? hlcTimestampStr = null;
long? enqueuedAtUnix = null;
var attempt = attemptOverride ?? 1;
var attributes = new Dictionary<string, string>(StringComparer.Ordinal);
@@ -676,6 +692,10 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
: Math.Max(1, parsedAttempt);
}
}
else if (name.Equals(SchedulerQueueFields.HlcTimestamp, StringComparison.Ordinal))
{
hlcTimestampStr = NormalizeOptional(value.ToString());
}
else if (name.StartsWith(SchedulerQueueFields.AttributePrefix, StringComparison.Ordinal))
{
var key = name[SchedulerQueueFields.AttributePrefix.Length..];
@@ -692,6 +712,14 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
var enqueuedAt = DateTimeOffset.FromUnixTimeMilliseconds(enqueuedAtUnix.Value);
var leaseExpires = now.Add(leaseDuration);
// Parse HLC timestamp if present
HlcTimestamp? hlcTimestamp = null;
if (!string.IsNullOrEmpty(hlcTimestampStr) &&
HlcTimestamp.TryParse(hlcTimestampStr, out var parsedHlc))
{
hlcTimestamp = parsedHlc;
}
IReadOnlyDictionary<string, string> attributeView = attributes.Count == 0
? EmptyReadOnlyDictionary<string, string>.Instance
: new ReadOnlyDictionary<string, string>(attributes);
@@ -710,7 +738,8 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
attempt,
enqueuedAt,
leaseExpires,
consumer);
consumer,
hlcTimestamp);
}
private async Task HandlePoisonEntryAsync(IDatabase database, RedisValue entryId)

View File

@@ -2,6 +2,7 @@ using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using StellaOps.HybridLogicalClock;
namespace StellaOps.Scheduler.Queue.Redis;
@@ -24,7 +25,8 @@ internal sealed class RedisSchedulerQueueLease<TMessage> : ISchedulerQueueLease<
int attempt,
DateTimeOffset enqueuedAt,
DateTimeOffset leaseExpiresAt,
string consumer)
string consumer,
HlcTimestamp? hlcTimestamp = null)
{
_queue = queue;
MessageId = messageId;
@@ -40,6 +42,7 @@ internal sealed class RedisSchedulerQueueLease<TMessage> : ISchedulerQueueLease<
EnqueuedAt = enqueuedAt;
LeaseExpiresAt = leaseExpiresAt;
Consumer = consumer;
HlcTimestamp = hlcTimestamp;
}
public string MessageId { get; }
@@ -68,6 +71,8 @@ internal sealed class RedisSchedulerQueueLease<TMessage> : ISchedulerQueueLease<
public string Consumer { get; }
public HlcTimestamp? HlcTimestamp { get; }
public Task AcknowledgeAsync(CancellationToken cancellationToken = default)
=> _queue.AcknowledgeAsync(this, cancellationToken);

View File

@@ -4,6 +4,7 @@ using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using StellaOps.HybridLogicalClock;
using StackExchange.Redis;
using StellaOps.Scheduler.Models;
@@ -17,6 +18,7 @@ internal sealed class RedisSchedulerRunnerQueue
SchedulerRedisQueueOptions redisOptions,
ILogger<RedisSchedulerRunnerQueue> logger,
TimeProvider timeProvider,
IHybridLogicalClock? hlc = null,
Func<ConfigurationOptions, Task<IConnectionMultiplexer>>? connectionFactory = null)
: base(
queueOptions,
@@ -25,6 +27,7 @@ internal sealed class RedisSchedulerRunnerQueue
RunnerPayload.Instance,
logger,
timeProvider,
hlc,
connectionFactory)
{
}

View File

@@ -4,6 +4,7 @@ using System.Collections.ObjectModel;
using System.Text.Json.Serialization;
using System.Threading;
using System.Threading.Tasks;
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Queue;
@@ -284,6 +285,13 @@ public interface ISchedulerQueueLease<out TMessage>
TMessage Message { get; }
/// <summary>
/// Gets the Hybrid Logical Clock timestamp assigned at enqueue time.
/// Provides deterministic ordering across distributed nodes.
/// Null if HLC was not enabled when the message was enqueued.
/// </summary>
HlcTimestamp? HlcTimestamp { get; }
Task AcknowledgeAsync(CancellationToken cancellationToken = default);
Task RenewAsync(TimeSpan leaseDuration, CancellationToken cancellationToken = default);

View File

@@ -13,4 +13,10 @@ internal static class SchedulerQueueFields
public const string QueueKind = "queueKind";
public const string CorrelationId = "correlationId";
public const string AttributePrefix = "attr:";
/// <summary>
/// Hybrid Logical Clock timestamp for deterministic ordering.
/// Stored as sortable string format: {PhysicalTime:D13}-{NodeId}-{LogicalCounter:D6}
/// </summary>
public const string HlcTimestamp = "hlcTimestamp";
}

View File

@@ -35,6 +35,54 @@ public sealed class SchedulerQueueOptions
/// Cap applied to the retry delay when exponential backoff is used.
/// </summary>
public TimeSpan RetryMaxBackoff { get; set; } = TimeSpan.FromMinutes(1);
/// <summary>
/// HLC (Hybrid Logical Clock) ordering options.
/// </summary>
public SchedulerHlcOptions Hlc { get; set; } = new();
}
/// <summary>
/// Options for HLC-based queue ordering and chain linking.
/// </summary>
public sealed class SchedulerHlcOptions
{
/// <summary>
/// Enable HLC-based ordering with chain linking.
/// When false, uses legacy (priority, created_at) ordering.
/// </summary>
/// <remarks>
/// When enabled, all enqueue operations will:
/// - Assign an HLC timestamp for global ordering
/// - Compute and store chain links for audit proofs
/// - Persist entries to the scheduler_log table
/// </remarks>
public bool EnableHlcOrdering { get; set; }
/// <summary>
/// When true, writes to both legacy and HLC tables during migration.
/// This allows gradual migration from legacy ordering to HLC ordering.
/// </summary>
/// <remarks>
/// Migration path:
/// 1. Deploy with DualWriteMode = true (writes to both tables)
/// 2. Backfill scheduler_log from existing scheduler.jobs
/// 3. Enable EnableHlcOrdering = true for reads
/// 4. Disable DualWriteMode, deprecate legacy ordering
/// </remarks>
public bool DualWriteMode { get; set; }
/// <summary>
/// Enable automatic chain verification on dequeue.
/// When enabled, each dequeued batch is verified for chain integrity.
/// </summary>
public bool VerifyOnDequeue { get; set; }
/// <summary>
/// Maximum clock drift tolerance in milliseconds.
/// HLC timestamps from messages with drift exceeding this value will be rejected.
/// </summary>
public int MaxClockDriftMs { get; set; } = 60000; // 1 minute default
}
public sealed class SchedulerRedisQueueOptions

View File

@@ -4,6 +4,7 @@ using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using Microsoft.Extensions.Diagnostics.HealthChecks;
using Microsoft.Extensions.Logging;
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Queue.Nats;
using StellaOps.Scheduler.Queue.Redis;
@@ -29,6 +30,7 @@ public static class SchedulerQueueServiceCollectionExtensions
{
var loggerFactory = sp.GetRequiredService<ILoggerFactory>();
var timeProvider = sp.GetService<TimeProvider>() ?? TimeProvider.System;
var hlc = sp.GetService<IHybridLogicalClock>();
return options.Kind switch
{
@@ -36,12 +38,14 @@ public static class SchedulerQueueServiceCollectionExtensions
options,
options.Redis,
loggerFactory.CreateLogger<RedisSchedulerPlannerQueue>(),
timeProvider),
timeProvider,
hlc),
SchedulerQueueTransportKind.Nats => new NatsSchedulerPlannerQueue(
options,
options.Nats,
loggerFactory.CreateLogger<NatsSchedulerPlannerQueue>(),
timeProvider),
timeProvider,
hlc),
_ => throw new InvalidOperationException($"Unsupported scheduler queue transport '{options.Kind}'.")
};
});
@@ -50,6 +54,7 @@ public static class SchedulerQueueServiceCollectionExtensions
{
var loggerFactory = sp.GetRequiredService<ILoggerFactory>();
var timeProvider = sp.GetService<TimeProvider>() ?? TimeProvider.System;
var hlc = sp.GetService<IHybridLogicalClock>();
return options.Kind switch
{
@@ -57,12 +62,14 @@ public static class SchedulerQueueServiceCollectionExtensions
options,
options.Redis,
loggerFactory.CreateLogger<RedisSchedulerRunnerQueue>(),
timeProvider),
timeProvider,
hlc),
SchedulerQueueTransportKind.Nats => new NatsSchedulerRunnerQueue(
options,
options.Nats,
loggerFactory.CreateLogger<NatsSchedulerRunnerQueue>(),
timeProvider),
timeProvider,
hlc),
_ => throw new InvalidOperationException($"Unsupported scheduler queue transport '{options.Kind}'.")
};
});

View File

@@ -18,5 +18,8 @@
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Scheduler.Models\StellaOps.Scheduler.Models.csproj" />
<ProjectReference Include="..\StellaOps.Scheduler.Persistence\StellaOps.Scheduler.Persistence.csproj" />
<ProjectReference Include="..\..\..\__Libraries\StellaOps.HybridLogicalClock\StellaOps.HybridLogicalClock.csproj" />
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Canonical.Json\StellaOps.Canonical.Json.csproj" />
</ItemGroup>
</Project>