save progress

This commit is contained in:
StellaOps Bot
2026-01-06 09:42:02 +02:00
parent 94d68bee8b
commit 37e11918e0
443 changed files with 85863 additions and 897 deletions

View File

@@ -0,0 +1,177 @@
-- HLC Queue Chain: Hybrid Logical Clock Ordering with Cryptographic Sequence Proofs
-- SPRINT_20260105_002_002_SCHEDULER: SQC-002, SQC-003, SQC-004
--
-- Adds HLC-based ordering with hash chain at enqueue time for audit-safe job queue ordering.
-- See: Product Advisory "Audit-safe job queue ordering using monotonic timestamps"
BEGIN;
-- ============================================================================
-- SECTION 1: Scheduler Log Table (SQC-002)
-- ============================================================================
-- HLC-ordered, chain-linked job entries. This is the authoritative order.
-- Jobs are linked via: link_i = Hash(link_{i-1} || job_id || t_hlc || payload_hash)
CREATE TABLE IF NOT EXISTS scheduler.scheduler_log (
seq_bigint BIGSERIAL PRIMARY KEY, -- Storage order (not authoritative)
tenant_id TEXT NOT NULL,
t_hlc TEXT NOT NULL, -- HLC timestamp: "0001704067200000-node-1-000042"
partition_key TEXT NOT NULL DEFAULT '', -- Optional queue partition
job_id UUID NOT NULL,
payload_hash BYTEA NOT NULL, -- SHA-256 of canonical payload JSON
prev_link BYTEA, -- Previous chain link (null for first)
link BYTEA NOT NULL, -- Hash(prev_link || job_id || t_hlc || payload_hash)
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-- Ensure HLC order is unique within tenant/partition
CONSTRAINT uq_scheduler_log_order UNIQUE (tenant_id, partition_key, t_hlc, job_id)
);
COMMENT ON TABLE scheduler.scheduler_log IS
'HLC-ordered job log with cryptographic chain linking for audit-safe ordering';
COMMENT ON COLUMN scheduler.scheduler_log.t_hlc IS
'Hybrid Logical Clock timestamp in sortable string format';
COMMENT ON COLUMN scheduler.scheduler_log.link IS
'SHA-256 chain link: Hash(prev_link || job_id || t_hlc || payload_hash)';
-- Index for tenant + HLC ordered queries (primary query path)
CREATE INDEX IF NOT EXISTS idx_scheduler_log_tenant_hlc
ON scheduler.scheduler_log(tenant_id, t_hlc);
-- Index for partition-scoped queries
CREATE INDEX IF NOT EXISTS idx_scheduler_log_partition
ON scheduler.scheduler_log(tenant_id, partition_key, t_hlc);
-- Index for job_id lookups (idempotency checks)
CREATE INDEX IF NOT EXISTS idx_scheduler_log_job_id
ON scheduler.scheduler_log(job_id);
-- ============================================================================
-- SECTION 2: Batch Snapshot Table (SQC-003)
-- ============================================================================
-- Captures chain state at specific points for audit anchors and attestation.
CREATE TABLE IF NOT EXISTS scheduler.batch_snapshot (
batch_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
range_start_t TEXT NOT NULL, -- HLC range start (inclusive)
range_end_t TEXT NOT NULL, -- HLC range end (inclusive)
head_link BYTEA NOT NULL, -- Chain head at snapshot time
job_count INT NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
signed_by TEXT, -- Optional: signing key ID for DSSE
signature BYTEA -- Optional: DSSE signature bytes
);
COMMENT ON TABLE scheduler.batch_snapshot IS
'Audit anchors capturing chain state at specific HLC ranges';
COMMENT ON COLUMN scheduler.batch_snapshot.head_link IS
'The chain link at range_end_t - can be used to verify chain integrity';
-- Index for tenant + time ordered queries
CREATE INDEX IF NOT EXISTS idx_batch_snapshot_tenant
ON scheduler.batch_snapshot(tenant_id, created_at DESC);
-- Index for HLC range queries
CREATE INDEX IF NOT EXISTS idx_batch_snapshot_hlc_range
ON scheduler.batch_snapshot(tenant_id, range_start_t, range_end_t);
-- ============================================================================
-- SECTION 3: Chain Heads Table (SQC-004)
-- ============================================================================
-- Tracks the last chain link per tenant/partition for efficient append.
CREATE TABLE IF NOT EXISTS scheduler.chain_heads (
tenant_id TEXT NOT NULL,
partition_key TEXT NOT NULL DEFAULT '',
last_link BYTEA NOT NULL,
last_t_hlc TEXT NOT NULL,
last_job_id UUID NOT NULL,
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
PRIMARY KEY (tenant_id, partition_key)
);
COMMENT ON TABLE scheduler.chain_heads IS
'Per-partition chain head tracking for efficient chain append operations';
-- Trigger to update updated_at on chain_heads modifications
CREATE OR REPLACE TRIGGER update_chain_heads_updated_at
BEFORE UPDATE ON scheduler.chain_heads
FOR EACH ROW
EXECUTE FUNCTION scheduler.update_updated_at();
-- ============================================================================
-- SECTION 4: Helper Functions
-- ============================================================================
-- Function to get the current chain head for a tenant/partition
CREATE OR REPLACE FUNCTION scheduler.get_chain_head(
p_tenant_id TEXT,
p_partition_key TEXT DEFAULT ''
)
RETURNS TABLE (
last_link BYTEA,
last_t_hlc TEXT,
last_job_id UUID
)
LANGUAGE plpgsql STABLE
AS $$
BEGIN
RETURN QUERY
SELECT ch.last_link, ch.last_t_hlc, ch.last_job_id
FROM scheduler.chain_heads ch
WHERE ch.tenant_id = p_tenant_id
AND ch.partition_key = p_partition_key;
END;
$$;
-- Function to insert log entry and update chain head atomically
CREATE OR REPLACE FUNCTION scheduler.insert_log_with_chain_update(
p_tenant_id TEXT,
p_t_hlc TEXT,
p_partition_key TEXT,
p_job_id UUID,
p_payload_hash BYTEA,
p_prev_link BYTEA,
p_link BYTEA
)
RETURNS BIGINT
LANGUAGE plpgsql
AS $$
DECLARE
v_seq BIGINT;
BEGIN
-- Insert log entry
INSERT INTO scheduler.scheduler_log (
tenant_id, t_hlc, partition_key, job_id,
payload_hash, prev_link, link
)
VALUES (
p_tenant_id, p_t_hlc, p_partition_key, p_job_id,
p_payload_hash, p_prev_link, p_link
)
RETURNING seq_bigint INTO v_seq;
-- Upsert chain head
INSERT INTO scheduler.chain_heads (
tenant_id, partition_key, last_link, last_t_hlc, last_job_id
)
VALUES (
p_tenant_id, p_partition_key, p_link, p_t_hlc, p_job_id
)
ON CONFLICT (tenant_id, partition_key)
DO UPDATE SET
last_link = EXCLUDED.last_link,
last_t_hlc = EXCLUDED.last_t_hlc,
last_job_id = EXCLUDED.last_job_id,
updated_at = NOW();
RETURN v_seq;
END;
$$;
COMMENT ON FUNCTION scheduler.insert_log_with_chain_update IS
'Atomically inserts a scheduler log entry and updates the chain head';
COMMIT;

View File

@@ -0,0 +1,56 @@
// <copyright file="BatchSnapshot.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
namespace StellaOps.Scheduler.Persistence.Postgres.Models;
/// <summary>
/// Represents an audit anchor capturing chain state at a specific HLC range.
/// </summary>
public sealed record BatchSnapshot
{
/// <summary>
/// Unique batch identifier.
/// </summary>
public Guid BatchId { get; init; }
/// <summary>
/// Tenant identifier.
/// </summary>
public required string TenantId { get; init; }
/// <summary>
/// HLC range start (inclusive).
/// </summary>
public required string RangeStartT { get; init; }
/// <summary>
/// HLC range end (inclusive).
/// </summary>
public required string RangeEndT { get; init; }
/// <summary>
/// Chain head link at snapshot time.
/// </summary>
public required byte[] HeadLink { get; init; }
/// <summary>
/// Number of jobs in the range.
/// </summary>
public int JobCount { get; init; }
/// <summary>
/// Timestamp when the snapshot was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// Optional: signing key identifier for DSSE.
/// </summary>
public string? SignedBy { get; init; }
/// <summary>
/// Optional: DSSE signature bytes.
/// </summary>
public byte[]? Signature { get; init; }
}

View File

@@ -0,0 +1,41 @@
// <copyright file="ChainHead.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
namespace StellaOps.Scheduler.Persistence.Postgres.Models;
/// <summary>
/// Represents the current chain head for a tenant/partition.
/// </summary>
public sealed record ChainHead
{
/// <summary>
/// Tenant identifier.
/// </summary>
public required string TenantId { get; init; }
/// <summary>
/// Partition key (empty string for default partition).
/// </summary>
public string PartitionKey { get; init; } = string.Empty;
/// <summary>
/// Last chain link.
/// </summary>
public required byte[] LastLink { get; init; }
/// <summary>
/// Last HLC timestamp.
/// </summary>
public required string LastTHlc { get; init; }
/// <summary>
/// Last job identifier.
/// </summary>
public required Guid LastJobId { get; init; }
/// <summary>
/// Timestamp when the chain head was updated.
/// </summary>
public DateTimeOffset UpdatedAt { get; init; }
}

View File

@@ -0,0 +1,56 @@
// <copyright file="SchedulerLogEntry.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
namespace StellaOps.Scheduler.Persistence.Postgres.Models;
/// <summary>
/// Represents an HLC-ordered, chain-linked scheduler log entry.
/// </summary>
public sealed record SchedulerLogEntry
{
/// <summary>
/// Storage sequence number (not authoritative for ordering).
/// </summary>
public long SeqBigint { get; init; }
/// <summary>
/// Tenant identifier.
/// </summary>
public required string TenantId { get; init; }
/// <summary>
/// HLC timestamp in sortable string format.
/// </summary>
public required string THlc { get; init; }
/// <summary>
/// Optional queue partition key.
/// </summary>
public string PartitionKey { get; init; } = string.Empty;
/// <summary>
/// Job identifier (deterministic from payload).
/// </summary>
public required Guid JobId { get; init; }
/// <summary>
/// SHA-256 hash of the canonical payload JSON.
/// </summary>
public required byte[] PayloadHash { get; init; }
/// <summary>
/// Previous chain link (null for first entry in chain).
/// </summary>
public byte[]? PrevLink { get; init; }
/// <summary>
/// Chain link: Hash(prev_link || job_id || t_hlc || payload_hash).
/// </summary>
public required byte[] Link { get; init; }
/// <summary>
/// Timestamp when the entry was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
}

View File

@@ -0,0 +1,65 @@
// <copyright file="IBatchSnapshotRepository.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// Repository interface for batch snapshot operations.
/// </summary>
public interface IBatchSnapshotRepository
{
/// <summary>
/// Inserts a new batch snapshot.
/// </summary>
/// <param name="snapshot">The snapshot to insert.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>A task representing the operation.</returns>
Task InsertAsync(BatchSnapshot snapshot, CancellationToken cancellationToken = default);
/// <summary>
/// Gets a batch snapshot by ID.
/// </summary>
/// <param name="batchId">The batch identifier.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The snapshot if found.</returns>
Task<BatchSnapshot?> GetByIdAsync(Guid batchId, CancellationToken cancellationToken = default);
/// <summary>
/// Gets the most recent batch snapshot for a tenant.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The most recent snapshot if found.</returns>
Task<BatchSnapshot?> GetLatestAsync(string tenantId, CancellationToken cancellationToken = default);
/// <summary>
/// Gets batch snapshots for a tenant within a time range.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="startTime">Start time (inclusive).</param>
/// <param name="endTime">End time (inclusive).</param>
/// <param name="limit">Maximum snapshots to return.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Snapshots in the specified range.</returns>
Task<IReadOnlyList<BatchSnapshot>> GetByTimeRangeAsync(
string tenantId,
DateTimeOffset startTime,
DateTimeOffset endTime,
int limit = 100,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets batch snapshots containing a specific HLC timestamp.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="tHlc">The HLC timestamp to search for.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Snapshots containing the timestamp.</returns>
Task<IReadOnlyList<BatchSnapshot>> GetContainingHlcAsync(
string tenantId,
string tHlc,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,47 @@
// <copyright file="IChainHeadRepository.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// Repository interface for chain head operations.
/// </summary>
public interface IChainHeadRepository
{
/// <summary>
/// Gets the last chain link for a tenant/partition.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="partitionKey">Partition key (empty string for default).</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The last link bytes, or null if no chain exists.</returns>
Task<byte[]?> GetLastLinkAsync(
string tenantId,
string partitionKey,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the full chain head for a tenant/partition.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="partitionKey">Partition key (empty string for default).</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The chain head, or null if no chain exists.</returns>
Task<ChainHead?> GetAsync(
string tenantId,
string partitionKey,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets all chain heads for a tenant.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>All chain heads for the tenant.</returns>
Task<IReadOnlyList<ChainHead>> GetAllForTenantAsync(
string tenantId,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,109 @@
// <copyright file="ISchedulerLogRepository.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// Repository interface for HLC-ordered scheduler log operations.
/// </summary>
public interface ISchedulerLogRepository
{
/// <summary>
/// Inserts a log entry and atomically updates the chain head.
/// </summary>
/// <param name="entry">The log entry to insert.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The sequence number of the inserted entry.</returns>
Task<long> InsertWithChainUpdateAsync(
SchedulerLogEntry entry,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets log entries ordered by HLC timestamp.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="partitionKey">Optional partition key (null for all partitions).</param>
/// <param name="limit">Maximum entries to return.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Log entries in HLC order.</returns>
Task<IReadOnlyList<SchedulerLogEntry>> GetByHlcOrderAsync(
string tenantId,
string? partitionKey,
int limit,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets log entries within an HLC timestamp range.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="startTHlc">Start timestamp (inclusive, null for unbounded).</param>
/// <param name="endTHlc">End timestamp (inclusive, null for unbounded).</param>
/// <param name="limit">Maximum entries to return (0 for unlimited).</param>
/// <param name="partitionKey">Optional partition key (null for all partitions).</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Log entries in the specified range.</returns>
Task<IReadOnlyList<SchedulerLogEntry>> GetByHlcRangeAsync(
string tenantId,
string? startTHlc,
string? endTHlc,
int limit = 0,
string? partitionKey = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets log entries after an HLC timestamp (cursor-based pagination).
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="afterTHlc">Start after this timestamp (exclusive).</param>
/// <param name="limit">Maximum entries to return.</param>
/// <param name="partitionKey">Optional partition key (null for all partitions).</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Log entries after the specified timestamp.</returns>
Task<IReadOnlyList<SchedulerLogEntry>> GetAfterHlcAsync(
string tenantId,
string afterTHlc,
int limit,
string? partitionKey = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Counts log entries within an HLC timestamp range.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="startTHlc">Start timestamp (inclusive, null for unbounded).</param>
/// <param name="endTHlc">End timestamp (inclusive, null for unbounded).</param>
/// <param name="partitionKey">Optional partition key (null for all partitions).</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Count of entries in the range.</returns>
Task<int> CountByHlcRangeAsync(
string tenantId,
string? startTHlc,
string? endTHlc,
string? partitionKey = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets a log entry by job ID.
/// </summary>
/// <param name="jobId">Job identifier.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The log entry if found.</returns>
Task<SchedulerLogEntry?> GetByJobIdAsync(
Guid jobId,
CancellationToken cancellationToken = default);
/// <summary>
/// Checks if a job ID already exists in the log.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="jobId">Job identifier.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>True if the job exists.</returns>
Task<bool> ExistsAsync(
string tenantId,
Guid jobId,
CancellationToken cancellationToken = default);
}

View File

@@ -1,4 +1,5 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Npgsql;
using StellaOps.Determinism;
using StellaOps.Infrastructure.Postgres.Repositories;
@@ -13,6 +14,7 @@ public sealed class JobRepository : RepositoryBase<SchedulerDataSource>, IJobRep
{
private readonly TimeProvider _timeProvider;
private readonly IGuidProvider _guidProvider;
private readonly bool _enableHlcOrdering;
/// <summary>
/// Creates a new job repository.
@@ -20,12 +22,14 @@ public sealed class JobRepository : RepositoryBase<SchedulerDataSource>, IJobRep
public JobRepository(
SchedulerDataSource dataSource,
ILogger<JobRepository> logger,
IOptions<JobRepositoryOptions>? options = null,
TimeProvider? timeProvider = null,
IGuidProvider? guidProvider = null)
: base(dataSource, logger)
{
_timeProvider = timeProvider ?? TimeProvider.System;
_guidProvider = guidProvider ?? SystemGuidProvider.Instance;
_enableHlcOrdering = options?.Value.EnableHlcOrdering ?? false;
}
/// <inheritdoc />
@@ -102,15 +106,28 @@ public sealed class JobRepository : RepositoryBase<SchedulerDataSource>, IJobRep
int limit = 10,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT * FROM scheduler.jobs
WHERE tenant_id = @tenant_id
AND status = 'scheduled'
AND (not_before IS NULL OR not_before <= NOW())
AND job_type = ANY(@job_types)
ORDER BY priority DESC, created_at
LIMIT @limit
""";
// When HLC ordering is enabled, join with scheduler_log and order by t_hlc
// This provides deterministic global ordering based on Hybrid Logical Clock timestamps
var sql = _enableHlcOrdering
? """
SELECT j.* FROM scheduler.jobs j
INNER JOIN scheduler.scheduler_log sl ON j.id = sl.job_id AND j.tenant_id = sl.tenant_id
WHERE j.tenant_id = @tenant_id
AND j.status = 'scheduled'
AND (j.not_before IS NULL OR j.not_before <= NOW())
AND j.job_type = ANY(@job_types)
ORDER BY sl.t_hlc
LIMIT @limit
"""
: """
SELECT * FROM scheduler.jobs
WHERE tenant_id = @tenant_id
AND status = 'scheduled'
AND (not_before IS NULL OR not_before <= NOW())
AND job_type = ANY(@job_types)
ORDER BY priority DESC, created_at
LIMIT @limit
""";
return await QueryAsync(
tenantId,
@@ -350,12 +367,22 @@ public sealed class JobRepository : RepositoryBase<SchedulerDataSource>, IJobRep
int offset = 0,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT * FROM scheduler.jobs
WHERE tenant_id = @tenant_id AND status = @status::scheduler.job_status
ORDER BY created_at DESC, id
LIMIT @limit OFFSET @offset
""";
// When HLC ordering is enabled, join with scheduler_log and order by t_hlc DESC
// This maintains consistent ordering across all job retrieval methods
var sql = _enableHlcOrdering
? """
SELECT j.* FROM scheduler.jobs j
LEFT JOIN scheduler.scheduler_log sl ON j.id = sl.job_id AND j.tenant_id = sl.tenant_id
WHERE j.tenant_id = @tenant_id AND j.status = @status::scheduler.job_status
ORDER BY COALESCE(sl.t_hlc, to_char(j.created_at AT TIME ZONE 'UTC', 'YYYYMMDDHH24MISS')) DESC, j.id
LIMIT @limit OFFSET @offset
"""
: """
SELECT * FROM scheduler.jobs
WHERE tenant_id = @tenant_id AND status = @status::scheduler.job_status
ORDER BY created_at DESC, id
LIMIT @limit OFFSET @offset
""";
return await QueryAsync(
tenantId,

View File

@@ -0,0 +1,18 @@
// <copyright file="JobRepositoryOptions.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// Options for job repository behavior.
/// </summary>
public sealed class JobRepositoryOptions
{
/// <summary>
/// Gets or sets whether to use HLC (Hybrid Logical Clock) ordering for job retrieval.
/// When enabled, jobs are ordered by their HLC timestamp from the scheduler_log table.
/// When disabled, legacy (priority, created_at) ordering is used.
/// </summary>
public bool EnableHlcOrdering { get; set; }
}

View File

@@ -0,0 +1,183 @@
// <copyright file="PostgresBatchSnapshotRepository.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Infrastructure.Postgres.Repositories;
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// PostgreSQL repository for batch snapshot operations.
/// </summary>
public sealed class PostgresBatchSnapshotRepository : RepositoryBase<SchedulerDataSource>, IBatchSnapshotRepository
{
/// <summary>
/// Creates a new batch snapshot repository.
/// </summary>
public PostgresBatchSnapshotRepository(SchedulerDataSource dataSource, ILogger<PostgresBatchSnapshotRepository> logger)
: base(dataSource, logger)
{
}
/// <inheritdoc />
public async Task InsertAsync(BatchSnapshot snapshot, CancellationToken cancellationToken = default)
{
const string sql = """
INSERT INTO scheduler.batch_snapshot (
batch_id, tenant_id, range_start_t, range_end_t, head_link,
job_count, created_at, signed_by, signature
) VALUES (
@batch_id, @tenant_id, @range_start_t, @range_end_t, @head_link,
@job_count, @created_at, @signed_by, @signature
)
""";
await using var connection = await DataSource.OpenConnectionAsync(snapshot.TenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "batch_id", snapshot.BatchId);
AddParameter(command, "tenant_id", snapshot.TenantId);
AddParameter(command, "range_start_t", snapshot.RangeStartT);
AddParameter(command, "range_end_t", snapshot.RangeEndT);
AddParameter(command, "head_link", snapshot.HeadLink);
AddParameter(command, "job_count", snapshot.JobCount);
AddParameter(command, "created_at", snapshot.CreatedAt);
AddParameter(command, "signed_by", snapshot.SignedBy ?? (object)DBNull.Value);
AddParameter(command, "signature", snapshot.Signature ?? (object)DBNull.Value);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<BatchSnapshot?> GetByIdAsync(Guid batchId, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT batch_id, tenant_id, range_start_t, range_end_t, head_link,
job_count, created_at, signed_by, signature
FROM scheduler.batch_snapshot
WHERE batch_id = @batch_id
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "batch_id", batchId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
return await reader.ReadAsync(cancellationToken).ConfigureAwait(false) ? MapSnapshot(reader) : null;
}
/// <inheritdoc />
public async Task<BatchSnapshot?> GetLatestAsync(string tenantId, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT batch_id, tenant_id, range_start_t, range_end_t, head_link,
job_count, created_at, signed_by, signature
FROM scheduler.batch_snapshot
WHERE tenant_id = @tenant_id
ORDER BY created_at DESC
LIMIT 1
""";
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
return await reader.ReadAsync(cancellationToken).ConfigureAwait(false) ? MapSnapshot(reader) : null;
}
/// <inheritdoc />
public async Task<IReadOnlyList<BatchSnapshot>> GetByTimeRangeAsync(
string tenantId,
DateTimeOffset startTime,
DateTimeOffset endTime,
int limit = 100,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT batch_id, tenant_id, range_start_t, range_end_t, head_link,
job_count, created_at, signed_by, signature
FROM scheduler.batch_snapshot
WHERE tenant_id = @tenant_id
AND created_at >= @start_time
AND created_at <= @end_time
ORDER BY created_at DESC
LIMIT @limit
""";
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
AddParameter(command, "start_time", startTime);
AddParameter(command, "end_time", endTime);
AddParameter(command, "limit", limit);
var snapshots = new List<BatchSnapshot>();
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
snapshots.Add(MapSnapshot(reader));
}
return snapshots;
}
/// <inheritdoc />
public async Task<IReadOnlyList<BatchSnapshot>> GetContainingHlcAsync(
string tenantId,
string tHlc,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT batch_id, tenant_id, range_start_t, range_end_t, head_link,
job_count, created_at, signed_by, signature
FROM scheduler.batch_snapshot
WHERE tenant_id = @tenant_id
AND range_start_t <= @t_hlc
AND range_end_t >= @t_hlc
ORDER BY created_at DESC
""";
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
AddParameter(command, "t_hlc", tHlc);
var snapshots = new List<BatchSnapshot>();
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
snapshots.Add(MapSnapshot(reader));
}
return snapshots;
}
private static BatchSnapshot MapSnapshot(NpgsqlDataReader reader)
{
return new BatchSnapshot
{
BatchId = reader.GetGuid(0),
TenantId = reader.GetString(1),
RangeStartT = reader.GetString(2),
RangeEndT = reader.GetString(3),
HeadLink = reader.GetFieldValue<byte[]>(4),
JobCount = reader.GetInt32(5),
CreatedAt = reader.GetFieldValue<DateTimeOffset>(6),
SignedBy = reader.IsDBNull(7) ? null : reader.GetString(7),
Signature = reader.IsDBNull(8) ? null : reader.GetFieldValue<byte[]>(8)
};
}
}

View File

@@ -0,0 +1,111 @@
// <copyright file="PostgresChainHeadRepository.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Infrastructure.Postgres.Repositories;
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// PostgreSQL repository for chain head operations.
/// </summary>
public sealed class PostgresChainHeadRepository : RepositoryBase<SchedulerDataSource>, IChainHeadRepository
{
/// <summary>
/// Creates a new chain head repository.
/// </summary>
public PostgresChainHeadRepository(SchedulerDataSource dataSource, ILogger<PostgresChainHeadRepository> logger)
: base(dataSource, logger)
{
}
/// <inheritdoc />
public async Task<byte[]?> GetLastLinkAsync(
string tenantId,
string partitionKey,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT last_link
FROM scheduler.chain_heads
WHERE tenant_id = @tenant_id AND partition_key = @partition_key
""";
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
AddParameter(command, "partition_key", partitionKey);
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
return result as byte[];
}
/// <inheritdoc />
public async Task<ChainHead?> GetAsync(
string tenantId,
string partitionKey,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT tenant_id, partition_key, last_link, last_t_hlc, last_job_id, updated_at
FROM scheduler.chain_heads
WHERE tenant_id = @tenant_id AND partition_key = @partition_key
""";
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
AddParameter(command, "partition_key", partitionKey);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
return await reader.ReadAsync(cancellationToken).ConfigureAwait(false) ? MapChainHead(reader) : null;
}
/// <inheritdoc />
public async Task<IReadOnlyList<ChainHead>> GetAllForTenantAsync(
string tenantId,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT tenant_id, partition_key, last_link, last_t_hlc, last_job_id, updated_at
FROM scheduler.chain_heads
WHERE tenant_id = @tenant_id
ORDER BY partition_key
""";
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
var heads = new List<ChainHead>();
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
heads.Add(MapChainHead(reader));
}
return heads;
}
private static ChainHead MapChainHead(NpgsqlDataReader reader)
{
return new ChainHead
{
TenantId = reader.GetString(0),
PartitionKey = reader.GetString(1),
LastLink = reader.GetFieldValue<byte[]>(2),
LastTHlc = reader.GetString(3),
LastJobId = reader.GetGuid(4),
UpdatedAt = reader.GetFieldValue<DateTimeOffset>(5)
};
}
}

View File

@@ -0,0 +1,335 @@
// <copyright file="PostgresSchedulerLogRepository.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Infrastructure.Postgres.Repositories;
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// PostgreSQL repository for HLC-ordered scheduler log operations.
/// </summary>
public sealed class PostgresSchedulerLogRepository : RepositoryBase<SchedulerDataSource>, ISchedulerLogRepository
{
/// <summary>
/// Creates a new scheduler log repository.
/// </summary>
public PostgresSchedulerLogRepository(SchedulerDataSource dataSource, ILogger<PostgresSchedulerLogRepository> logger)
: base(dataSource, logger)
{
}
/// <inheritdoc />
public async Task<long> InsertWithChainUpdateAsync(
SchedulerLogEntry entry,
CancellationToken cancellationToken = default)
{
// Use the stored function for atomic insert + chain head update
const string sql = """
SELECT scheduler.insert_log_with_chain_update(
@tenant_id,
@t_hlc,
@partition_key,
@job_id,
@payload_hash,
@prev_link,
@link
)
""";
await using var connection = await DataSource.OpenConnectionAsync(entry.TenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", entry.TenantId);
AddParameter(command, "t_hlc", entry.THlc);
AddParameter(command, "partition_key", entry.PartitionKey);
AddParameter(command, "job_id", entry.JobId);
AddParameter(command, "payload_hash", entry.PayloadHash);
AddParameter(command, "prev_link", entry.PrevLink ?? (object)DBNull.Value);
AddParameter(command, "link", entry.Link);
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
return Convert.ToInt64(result);
}
/// <inheritdoc />
public async Task<IReadOnlyList<SchedulerLogEntry>> GetByHlcOrderAsync(
string tenantId,
string? partitionKey,
int limit,
CancellationToken cancellationToken = default)
{
var sql = partitionKey is null
? """
SELECT seq_bigint, tenant_id, t_hlc, partition_key, job_id,
payload_hash, prev_link, link, created_at
FROM scheduler.scheduler_log
WHERE tenant_id = @tenant_id
ORDER BY t_hlc ASC
LIMIT @limit
"""
: """
SELECT seq_bigint, tenant_id, t_hlc, partition_key, job_id,
payload_hash, prev_link, link, created_at
FROM scheduler.scheduler_log
WHERE tenant_id = @tenant_id AND partition_key = @partition_key
ORDER BY t_hlc ASC
LIMIT @limit
""";
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
AddParameter(command, "limit", limit);
if (partitionKey is not null)
{
AddParameter(command, "partition_key", partitionKey);
}
var entries = new List<SchedulerLogEntry>();
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
entries.Add(MapEntry(reader));
}
return entries;
}
/// <inheritdoc />
public async Task<IReadOnlyList<SchedulerLogEntry>> GetByHlcRangeAsync(
string tenantId,
string? startTHlc,
string? endTHlc,
int limit = 0,
string? partitionKey = null,
CancellationToken cancellationToken = default)
{
var conditions = new List<string> { "tenant_id = @tenant_id" };
if (startTHlc is not null)
{
conditions.Add("t_hlc >= @start_t_hlc");
}
if (endTHlc is not null)
{
conditions.Add("t_hlc <= @end_t_hlc");
}
if (partitionKey is not null)
{
conditions.Add("partition_key = @partition_key");
}
var limitClause = limit > 0 ? $"LIMIT {limit}" : string.Empty;
var sql = $"""
SELECT seq_bigint, tenant_id, t_hlc, partition_key, job_id,
payload_hash, prev_link, link, created_at
FROM scheduler.scheduler_log
WHERE {string.Join(" AND ", conditions)}
ORDER BY t_hlc ASC
{limitClause}
""";
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
if (startTHlc is not null)
{
AddParameter(command, "start_t_hlc", startTHlc);
}
if (endTHlc is not null)
{
AddParameter(command, "end_t_hlc", endTHlc);
}
if (partitionKey is not null)
{
AddParameter(command, "partition_key", partitionKey);
}
var entries = new List<SchedulerLogEntry>();
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
entries.Add(MapEntry(reader));
}
return entries;
}
/// <inheritdoc />
public async Task<IReadOnlyList<SchedulerLogEntry>> GetAfterHlcAsync(
string tenantId,
string afterTHlc,
int limit,
string? partitionKey = null,
CancellationToken cancellationToken = default)
{
var sql = partitionKey is null
? """
SELECT seq_bigint, tenant_id, t_hlc, partition_key, job_id,
payload_hash, prev_link, link, created_at
FROM scheduler.scheduler_log
WHERE tenant_id = @tenant_id AND t_hlc > @after_t_hlc
ORDER BY t_hlc ASC
LIMIT @limit
"""
: """
SELECT seq_bigint, tenant_id, t_hlc, partition_key, job_id,
payload_hash, prev_link, link, created_at
FROM scheduler.scheduler_log
WHERE tenant_id = @tenant_id AND t_hlc > @after_t_hlc AND partition_key = @partition_key
ORDER BY t_hlc ASC
LIMIT @limit
""";
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
AddParameter(command, "after_t_hlc", afterTHlc);
AddParameter(command, "limit", limit);
if (partitionKey is not null)
{
AddParameter(command, "partition_key", partitionKey);
}
var entries = new List<SchedulerLogEntry>();
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
entries.Add(MapEntry(reader));
}
return entries;
}
/// <inheritdoc />
public async Task<int> CountByHlcRangeAsync(
string tenantId,
string? startTHlc,
string? endTHlc,
string? partitionKey = null,
CancellationToken cancellationToken = default)
{
var conditions = new List<string> { "tenant_id = @tenant_id" };
if (startTHlc is not null)
{
conditions.Add("t_hlc >= @start_t_hlc");
}
if (endTHlc is not null)
{
conditions.Add("t_hlc <= @end_t_hlc");
}
if (partitionKey is not null)
{
conditions.Add("partition_key = @partition_key");
}
var sql = $"""
SELECT COUNT(*)
FROM scheduler.scheduler_log
WHERE {string.Join(" AND ", conditions)}
""";
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
if (startTHlc is not null)
{
AddParameter(command, "start_t_hlc", startTHlc);
}
if (endTHlc is not null)
{
AddParameter(command, "end_t_hlc", endTHlc);
}
if (partitionKey is not null)
{
AddParameter(command, "partition_key", partitionKey);
}
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
return Convert.ToInt32(result);
}
/// <inheritdoc />
public async Task<SchedulerLogEntry?> GetByJobIdAsync(
Guid jobId,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT seq_bigint, tenant_id, t_hlc, partition_key, job_id,
payload_hash, prev_link, link, created_at
FROM scheduler.scheduler_log
WHERE job_id = @job_id
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "job_id", jobId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
return await reader.ReadAsync(cancellationToken).ConfigureAwait(false) ? MapEntry(reader) : null;
}
/// <inheritdoc />
public async Task<bool> ExistsAsync(
string tenantId,
Guid jobId,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT EXISTS(
SELECT 1 FROM scheduler.scheduler_log
WHERE tenant_id = @tenant_id AND job_id = @job_id
)
""";
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
AddParameter(command, "job_id", jobId);
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
return result is true;
}
private static SchedulerLogEntry MapEntry(NpgsqlDataReader reader)
{
return new SchedulerLogEntry
{
SeqBigint = reader.GetInt64(0),
TenantId = reader.GetString(1),
THlc = reader.GetString(2),
PartitionKey = reader.GetString(3),
JobId = reader.GetGuid(4),
PayloadHash = reader.GetFieldValue<byte[]>(5),
PrevLink = reader.IsDBNull(6) ? null : reader.GetFieldValue<byte[]>(6),
Link = reader.GetFieldValue<byte[]>(7),
CreatedAt = reader.GetFieldValue<DateTimeOffset>(8)
};
}
}

View File

@@ -0,0 +1,123 @@
// <copyright file="SchedulerChainLinking.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using System.Security.Cryptography;
using System.Text;
using StellaOps.Canonical.Json;
using StellaOps.HybridLogicalClock;
namespace StellaOps.Scheduler.Persistence;
/// <summary>
/// Chain linking utilities for scheduler audit-safe ordering.
/// Implements: link_i = Hash(link_{i-1} || job_id || t_hlc || payload_hash)
/// </summary>
public static class SchedulerChainLinking
{
/// <summary>
/// Size of a chain link in bytes (SHA-256).
/// </summary>
public const int LinkSizeBytes = 32;
/// <summary>
/// Zero link used as prev_link for the first entry in a chain.
/// </summary>
public static readonly byte[] ZeroLink = new byte[LinkSizeBytes];
/// <summary>
/// Compute chain link per advisory specification:
/// link_i = Hash(link_{i-1} || job_id || t_hlc || payload_hash)
/// </summary>
/// <param name="prevLink">Previous chain link (null or empty for first entry).</param>
/// <param name="jobId">Job identifier.</param>
/// <param name="tHlc">HLC timestamp.</param>
/// <param name="payloadHash">SHA-256 hash of canonical payload.</param>
/// <returns>The computed chain link (32 bytes).</returns>
public static byte[] ComputeLink(
byte[]? prevLink,
Guid jobId,
HlcTimestamp tHlc,
byte[] payloadHash)
{
ArgumentNullException.ThrowIfNull(payloadHash);
using var hasher = IncrementalHash.CreateHash(HashAlgorithmName.SHA256);
// Previous link (or 32 zero bytes for first entry)
hasher.AppendData(prevLink is { Length: LinkSizeBytes } ? prevLink : ZeroLink);
// Job ID as bytes (big-endian for consistency)
hasher.AppendData(jobId.ToByteArray());
// HLC timestamp as UTF-8 bytes
hasher.AppendData(Encoding.UTF8.GetBytes(tHlc.ToSortableString()));
// Payload hash
hasher.AppendData(payloadHash);
return hasher.GetHashAndReset();
}
/// <summary>
/// Compute chain link from string HLC timestamp.
/// </summary>
public static byte[] ComputeLink(
byte[]? prevLink,
Guid jobId,
string tHlcString,
byte[] payloadHash)
{
var tHlc = HlcTimestamp.Parse(tHlcString);
return ComputeLink(prevLink, jobId, tHlc, payloadHash);
}
/// <summary>
/// Compute deterministic payload hash from canonical JSON.
/// </summary>
/// <typeparam name="T">Payload type.</typeparam>
/// <param name="payload">The payload object.</param>
/// <returns>SHA-256 hash of the canonical JSON representation.</returns>
public static byte[] ComputePayloadHash<T>(T payload)
{
var canonical = CanonJson.Serialize(payload);
return SHA256.HashData(Encoding.UTF8.GetBytes(canonical));
}
/// <summary>
/// Compute payload hash from raw bytes.
/// </summary>
/// <param name="payloadBytes">Raw payload bytes.</param>
/// <returns>SHA-256 hash of the bytes.</returns>
public static byte[] ComputePayloadHash(byte[] payloadBytes)
{
return SHA256.HashData(payloadBytes);
}
/// <summary>
/// Verify that a chain link matches the expected computation.
/// </summary>
public static bool VerifyLink(
byte[] storedLink,
byte[]? prevLink,
Guid jobId,
HlcTimestamp tHlc,
byte[] payloadHash)
{
var computed = ComputeLink(prevLink, jobId, tHlc, payloadHash);
return CryptographicOperations.FixedTimeEquals(storedLink, computed);
}
/// <summary>
/// Convert link bytes to hex string for display.
/// </summary>
public static string ToHex(byte[]? link)
{
if (link is null or { Length: 0 })
{
return "(null)";
}
return Convert.ToHexString(link).ToLowerInvariant();
}
}

View File

@@ -27,6 +27,8 @@
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Determinism.Abstractions\StellaOps.Determinism.Abstractions.csproj" />
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.EfCore\StellaOps.Infrastructure.EfCore.csproj" />
<ProjectReference Include="..\..\..\__Libraries\StellaOps.HybridLogicalClock\StellaOps.HybridLogicalClock.csproj" />
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Canonical.Json\StellaOps.Canonical.Json.csproj" />
</ItemGroup>
<!-- Embed SQL migrations as resources -->

View File

@@ -0,0 +1,235 @@
// <copyright file="BatchSnapshotDsseSigner.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using System.Globalization;
using System.Security.Cryptography;
using System.Text;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Options for batch snapshot DSSE signing.
/// </summary>
public sealed class BatchSnapshotDsseOptions
{
/// <summary>
/// Gets or sets the signing mode: "hmac" for HMAC-SHA256, "none" to disable.
/// </summary>
public string Mode { get; set; } = "none";
/// <summary>
/// Gets or sets the HMAC secret key as Base64.
/// Required when Mode is "hmac".
/// </summary>
public string? SecretBase64 { get; set; }
/// <summary>
/// Gets or sets the key identifier for the signature.
/// </summary>
public string KeyId { get; set; } = "scheduler-batch-snapshot";
/// <summary>
/// Gets or sets the payload type for DSSE envelope.
/// </summary>
public string PayloadType { get; set; } = "application/vnd.stellaops.scheduler.batch-snapshot+json";
}
/// <summary>
/// Interface for batch snapshot DSSE signing.
/// </summary>
public interface IBatchSnapshotDsseSigner
{
/// <summary>
/// Signs a batch snapshot and returns the signature result.
/// </summary>
/// <param name="digest">The digest bytes to sign.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Signature result with key ID and signature bytes.</returns>
Task<BatchSnapshotSignatureResult> SignAsync(byte[] digest, CancellationToken cancellationToken = default);
/// <summary>
/// Verifies a batch snapshot signature.
/// </summary>
/// <param name="digest">The original digest bytes.</param>
/// <param name="signature">The signature to verify.</param>
/// <param name="keyId">The key ID used for signing.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>True if signature is valid.</returns>
Task<bool> VerifyAsync(byte[] digest, byte[] signature, string keyId, CancellationToken cancellationToken = default);
/// <summary>
/// Gets whether signing is enabled.
/// </summary>
bool IsEnabled { get; }
}
/// <summary>
/// DSSE signer for batch snapshots using HMAC-SHA256.
/// </summary>
public sealed class BatchSnapshotDsseSigner : IBatchSnapshotDsseSigner
{
private readonly IOptions<BatchSnapshotDsseOptions> _options;
private readonly ILogger<BatchSnapshotDsseSigner> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="BatchSnapshotDsseSigner"/> class.
/// </summary>
/// <param name="options">Signing options.</param>
/// <param name="logger">Logger instance.</param>
public BatchSnapshotDsseSigner(
IOptions<BatchSnapshotDsseOptions> options,
ILogger<BatchSnapshotDsseSigner> logger)
{
_options = options ?? throw new ArgumentNullException(nameof(options));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc/>
public bool IsEnabled => string.Equals(_options.Value.Mode, "hmac", StringComparison.OrdinalIgnoreCase);
/// <inheritdoc/>
public Task<BatchSnapshotSignatureResult> SignAsync(byte[] digest, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(digest);
cancellationToken.ThrowIfCancellationRequested();
var opts = _options.Value;
if (!IsEnabled)
{
_logger.LogDebug("Batch snapshot DSSE signing is disabled");
return Task.FromResult(new BatchSnapshotSignatureResult(string.Empty, Array.Empty<byte>()));
}
if (string.IsNullOrWhiteSpace(opts.SecretBase64))
{
throw new InvalidOperationException("HMAC signing mode requires SecretBase64 to be configured");
}
byte[] secret;
try
{
secret = Convert.FromBase64String(opts.SecretBase64);
}
catch (FormatException ex)
{
throw new InvalidOperationException("SecretBase64 is not valid Base64", ex);
}
// Compute PAE (Pre-Authentication Encoding) for DSSE
var pae = ComputePreAuthenticationEncoding(opts.PayloadType, digest);
// Sign with HMAC-SHA256
var signature = HMACSHA256.HashData(secret, pae);
_logger.LogDebug(
"Signed batch snapshot with key {KeyId}, digest length {DigestLength}, signature length {SigLength}",
opts.KeyId, digest.Length, signature.Length);
return Task.FromResult(new BatchSnapshotSignatureResult(opts.KeyId, signature));
}
/// <inheritdoc/>
public Task<bool> VerifyAsync(byte[] digest, byte[] signature, string keyId, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(digest);
ArgumentNullException.ThrowIfNull(signature);
ArgumentNullException.ThrowIfNull(keyId);
cancellationToken.ThrowIfCancellationRequested();
var opts = _options.Value;
if (!IsEnabled)
{
_logger.LogDebug("Batch snapshot DSSE verification skipped - signing is disabled");
return Task.FromResult(true);
}
if (!string.Equals(keyId, opts.KeyId, StringComparison.Ordinal))
{
_logger.LogWarning("Key ID mismatch: expected {Expected}, got {Actual}", opts.KeyId, keyId);
return Task.FromResult(false);
}
if (string.IsNullOrWhiteSpace(opts.SecretBase64))
{
_logger.LogWarning("Cannot verify signature - SecretBase64 not configured");
return Task.FromResult(false);
}
byte[] secret;
try
{
secret = Convert.FromBase64String(opts.SecretBase64);
}
catch (FormatException)
{
_logger.LogWarning("Cannot verify signature - SecretBase64 is not valid Base64");
return Task.FromResult(false);
}
var pae = ComputePreAuthenticationEncoding(opts.PayloadType, digest);
var expected = HMACSHA256.HashData(secret, pae);
var isValid = CryptographicOperations.FixedTimeEquals(expected, signature);
_logger.LogDebug(
"Verified batch snapshot signature with key {KeyId}: {Result}",
keyId, isValid ? "valid" : "invalid");
return Task.FromResult(isValid);
}
/// <summary>
/// Computes DSSE Pre-Authentication Encoding (PAE).
/// Format: "DSSEv1" SP len(payloadType) SP payloadType SP len(payload) SP payload
/// </summary>
/// <remarks>
/// Follows DSSE v1 specification with ASCII decimal lengths and space separators.
/// </remarks>
internal static byte[] ComputePreAuthenticationEncoding(string payloadType, ReadOnlySpan<byte> payload)
{
var header = "DSSEv1"u8;
var pt = Encoding.UTF8.GetBytes(payloadType);
var lenPt = Encoding.UTF8.GetBytes(pt.Length.ToString(CultureInfo.InvariantCulture));
var lenPayload = Encoding.UTF8.GetBytes(payload.Length.ToString(CultureInfo.InvariantCulture));
var space = " "u8;
var totalLength = header.Length + space.Length + lenPt.Length + space.Length + pt.Length +
space.Length + lenPayload.Length + space.Length + payload.Length;
var buffer = new byte[totalLength];
var offset = 0;
header.CopyTo(buffer.AsSpan(offset));
offset += header.Length;
space.CopyTo(buffer.AsSpan(offset));
offset += space.Length;
lenPt.CopyTo(buffer.AsSpan(offset));
offset += lenPt.Length;
space.CopyTo(buffer.AsSpan(offset));
offset += space.Length;
pt.CopyTo(buffer.AsSpan(offset));
offset += pt.Length;
space.CopyTo(buffer.AsSpan(offset));
offset += space.Length;
lenPayload.CopyTo(buffer.AsSpan(offset));
offset += lenPayload.Length;
space.CopyTo(buffer.AsSpan(offset));
offset += space.Length;
payload.CopyTo(buffer.AsSpan(offset));
return buffer;
}
}

View File

@@ -0,0 +1,312 @@
// <copyright file="BatchSnapshotService.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using System.Security.Cryptography;
using System.Text;
using Microsoft.Extensions.Logging;
using StellaOps.Canonical.Json;
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Persistence;
using StellaOps.Scheduler.Persistence.Postgres.Models;
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Optional signing delegate for batch snapshots.
/// </summary>
/// <param name="digest">The digest to sign.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The signed result containing key ID and signature bytes.</returns>
public delegate Task<BatchSnapshotSignatureResult> BatchSnapshotSignerDelegate(
byte[] digest,
CancellationToken cancellationToken);
/// <summary>
/// Result of signing a batch snapshot.
/// </summary>
/// <param name="KeyId">The key identifier used for signing.</param>
/// <param name="Signature">The signature bytes.</param>
public readonly record struct BatchSnapshotSignatureResult(string KeyId, byte[] Signature);
/// <summary>
/// Optional verification delegate for batch snapshot DSSE signatures.
/// </summary>
/// <param name="keyId">The key identifier used for signing.</param>
/// <param name="digest">The digest that was signed.</param>
/// <param name="signature">The signature bytes to verify.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>True if the signature is valid.</returns>
public delegate Task<bool> BatchSnapshotVerifierDelegate(
string keyId,
byte[] digest,
byte[] signature,
CancellationToken cancellationToken);
/// <summary>
/// Implementation of batch snapshot service for audit anchoring.
/// </summary>
public sealed class BatchSnapshotService : IBatchSnapshotService
{
private readonly ISchedulerLogRepository _logRepository;
private readonly IBatchSnapshotRepository _snapshotRepository;
private readonly BatchSnapshotSignerDelegate? _signer;
private readonly BatchSnapshotVerifierDelegate? _verifier;
private readonly ILogger<BatchSnapshotService> _logger;
/// <summary>
/// Creates a new batch snapshot service.
/// </summary>
public BatchSnapshotService(
ISchedulerLogRepository logRepository,
IBatchSnapshotRepository snapshotRepository,
ILogger<BatchSnapshotService> logger,
BatchSnapshotSignerDelegate? signer = null,
BatchSnapshotVerifierDelegate? verifier = null)
{
_logRepository = logRepository ?? throw new ArgumentNullException(nameof(logRepository));
_snapshotRepository = snapshotRepository ?? throw new ArgumentNullException(nameof(snapshotRepository));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_signer = signer;
_verifier = verifier;
}
/// <inheritdoc />
public async Task<BatchSnapshot> CreateSnapshotAsync(
string tenantId,
HlcTimestamp startHlc,
HlcTimestamp endHlc,
bool sign = false,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
var startT = startHlc.ToSortableString();
var endT = endHlc.ToSortableString();
// Get jobs in range
var jobs = await _logRepository.GetByHlcRangeAsync(
tenantId,
startT,
endT,
limit: 0, // No limit
partitionKey: null,
cancellationToken).ConfigureAwait(false);
if (jobs.Count == 0)
{
throw new InvalidOperationException($"No jobs in specified HLC range [{startT}, {endT}] for tenant {tenantId}");
}
// Get chain head (last link in range)
var headLink = jobs[^1].Link;
// Create snapshot
var snapshot = new BatchSnapshot
{
BatchId = Guid.NewGuid(),
TenantId = tenantId,
RangeStartT = startT,
RangeEndT = endT,
HeadLink = headLink,
JobCount = jobs.Count,
CreatedAt = DateTimeOffset.UtcNow
};
// Sign if requested and signer available
if (sign)
{
if (_signer is null)
{
_logger.LogWarning("Signing requested but no signer configured. Snapshot will be unsigned.");
}
else
{
var digest = ComputeSnapshotDigest(snapshot, jobs);
var signed = await _signer(digest, cancellationToken).ConfigureAwait(false);
snapshot = snapshot with
{
SignedBy = signed.KeyId,
Signature = signed.Signature
};
}
}
// Persist
await _snapshotRepository.InsertAsync(snapshot, cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Batch snapshot created. BatchId={BatchId}, TenantId={TenantId}, Range=[{Start}, {End}], JobCount={JobCount}, Signed={Signed}",
snapshot.BatchId,
tenantId,
startT,
endT,
jobs.Count,
snapshot.SignedBy is not null);
return snapshot;
}
/// <inheritdoc />
public Task<BatchSnapshot?> GetSnapshotAsync(
Guid batchId,
CancellationToken cancellationToken = default)
{
return _snapshotRepository.GetByIdAsync(batchId, cancellationToken);
}
/// <inheritdoc />
public Task<BatchSnapshot?> GetLatestSnapshotAsync(
string tenantId,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
return _snapshotRepository.GetLatestAsync(tenantId, cancellationToken);
}
/// <inheritdoc />
public async Task<BatchSnapshotVerificationResult> VerifySnapshotAsync(
Guid batchId,
CancellationToken cancellationToken = default)
{
var issues = new List<string>();
var snapshot = await _snapshotRepository.GetByIdAsync(batchId, cancellationToken).ConfigureAwait(false);
if (snapshot is null)
{
return new BatchSnapshotVerificationResult(
IsValid: false,
SnapshotFound: false,
ChainHeadMatches: false,
JobCountMatches: false,
SignatureValid: null,
Issues: ["Snapshot not found"]);
}
// Get current jobs in the same range
var jobs = await _logRepository.GetByHlcRangeAsync(
snapshot.TenantId,
snapshot.RangeStartT,
snapshot.RangeEndT,
limit: 0,
partitionKey: null,
cancellationToken).ConfigureAwait(false);
// Verify job count
var jobCountMatches = jobs.Count == snapshot.JobCount;
if (!jobCountMatches)
{
issues.Add($"Job count mismatch: expected {snapshot.JobCount}, found {jobs.Count}");
}
// Verify chain head
var chainHeadMatches = jobs.Count > 0 && ByteArrayEquals(jobs[^1].Link, snapshot.HeadLink);
if (!chainHeadMatches)
{
issues.Add("Chain head link does not match snapshot");
}
// DSSE signature verification
bool? signatureValid = null;
if (snapshot.SignedBy is not null)
{
if (snapshot.Signature is null or { Length: 0 })
{
issues.Add("Snapshot has signer but empty signature");
signatureValid = false;
}
else if (_verifier is null)
{
// No verifier configured - check signature format only
_logger.LogDebug(
"Signature verification skipped for BatchId={BatchId}: no verifier configured",
batchId);
signatureValid = true; // Assume valid if no verifier
}
else
{
// Perform DSSE signature verification
var digest = ComputeSnapshotDigest(snapshot, jobs);
try
{
signatureValid = await _verifier(
snapshot.SignedBy,
digest,
snapshot.Signature,
cancellationToken).ConfigureAwait(false);
if (!signatureValid.Value)
{
issues.Add($"DSSE signature verification failed for key {snapshot.SignedBy}");
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Signature verification threw exception for BatchId={BatchId}", batchId);
issues.Add($"Signature verification error: {ex.Message}");
signatureValid = false;
}
}
}
var isValid = jobCountMatches && chainHeadMatches && (signatureValid ?? true);
_logger.LogDebug(
"Batch snapshot verification complete. BatchId={BatchId}, IsValid={IsValid}, Issues={Issues}",
batchId,
isValid,
issues.Count > 0 ? string.Join("; ", issues) : "none");
return new BatchSnapshotVerificationResult(
IsValid: isValid,
SnapshotFound: true,
ChainHeadMatches: chainHeadMatches,
JobCountMatches: jobCountMatches,
SignatureValid: signatureValid,
Issues: issues);
}
/// <summary>
/// Computes a deterministic digest over the snapshot and its jobs.
/// This is the canonical representation used for both signing and verification.
/// </summary>
internal static byte[] ComputeSnapshotDigest(BatchSnapshot snapshot, IReadOnlyList<SchedulerLogEntry> jobs)
{
// Create canonical representation for hashing
var digestInput = new
{
snapshot.BatchId,
snapshot.TenantId,
snapshot.RangeStartT,
snapshot.RangeEndT,
HeadLink = Convert.ToHexString(snapshot.HeadLink),
snapshot.JobCount,
Jobs = jobs.Select(j => new
{
j.JobId,
j.THlc,
PayloadHash = Convert.ToHexString(j.PayloadHash),
Link = Convert.ToHexString(j.Link)
}).ToArray()
};
var canonical = CanonJson.Serialize(digestInput);
return SHA256.HashData(Encoding.UTF8.GetBytes(canonical));
}
private static bool ByteArrayEquals(byte[]? a, byte[]? b)
{
if (a is null && b is null)
{
return true;
}
if (a is null || b is null)
{
return false;
}
return a.AsSpan().SequenceEqual(b);
}
}

View File

@@ -0,0 +1,179 @@
// <copyright file="HlcSchedulerDequeueService.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using Microsoft.Extensions.Logging;
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Persistence.Postgres.Models;
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Implementation of HLC-ordered scheduler job dequeuing.
/// </summary>
public sealed class HlcSchedulerDequeueService : IHlcSchedulerDequeueService
{
private readonly ISchedulerLogRepository _logRepository;
private readonly ILogger<HlcSchedulerDequeueService> _logger;
/// <summary>
/// Creates a new HLC scheduler dequeue service.
/// </summary>
public HlcSchedulerDequeueService(
ISchedulerLogRepository logRepository,
ILogger<HlcSchedulerDequeueService> logger)
{
_logRepository = logRepository ?? throw new ArgumentNullException(nameof(logRepository));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public async Task<SchedulerHlcDequeueResult> DequeueAsync(
string tenantId,
int limit,
string? partitionKey = null,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentOutOfRangeException.ThrowIfNegativeOrZero(limit);
var entries = await _logRepository.GetByHlcOrderAsync(
tenantId,
partitionKey,
limit,
cancellationToken).ConfigureAwait(false);
// Get total count for pagination info
var totalCount = await _logRepository.CountByHlcRangeAsync(
tenantId,
startTHlc: null,
endTHlc: null,
partitionKey,
cancellationToken).ConfigureAwait(false);
_logger.LogDebug(
"Dequeued {Count} of {Total} entries in HLC order. TenantId={TenantId}, PartitionKey={PartitionKey}",
entries.Count,
totalCount,
tenantId,
partitionKey ?? "(all)");
return new SchedulerHlcDequeueResult(
entries,
totalCount,
RangeStartHlc: null,
RangeEndHlc: null);
}
/// <inheritdoc />
public async Task<SchedulerHlcDequeueResult> DequeueByRangeAsync(
string tenantId,
HlcTimestamp? startHlc,
HlcTimestamp? endHlc,
int limit,
string? partitionKey = null,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentOutOfRangeException.ThrowIfNegativeOrZero(limit);
var startTHlc = startHlc?.ToSortableString();
var endTHlc = endHlc?.ToSortableString();
var entries = await _logRepository.GetByHlcRangeAsync(
tenantId,
startTHlc,
endTHlc,
limit,
partitionKey,
cancellationToken).ConfigureAwait(false);
var totalCount = await _logRepository.CountByHlcRangeAsync(
tenantId,
startTHlc,
endTHlc,
partitionKey,
cancellationToken).ConfigureAwait(false);
_logger.LogDebug(
"Dequeued {Count} of {Total} entries in HLC range [{Start}, {End}]. TenantId={TenantId}",
entries.Count,
totalCount,
startTHlc ?? "(unbounded)",
endTHlc ?? "(unbounded)",
tenantId);
return new SchedulerHlcDequeueResult(
entries,
totalCount,
startHlc,
endHlc);
}
/// <inheritdoc />
public async Task<SchedulerHlcDequeueResult> DequeueAfterAsync(
string tenantId,
HlcTimestamp afterHlc,
int limit,
string? partitionKey = null,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentOutOfRangeException.ThrowIfNegativeOrZero(limit);
var afterTHlc = afterHlc.ToSortableString();
var entries = await _logRepository.GetAfterHlcAsync(
tenantId,
afterTHlc,
limit,
partitionKey,
cancellationToken).ConfigureAwait(false);
// Count remaining entries after cursor
var totalCount = await _logRepository.CountByHlcRangeAsync(
tenantId,
afterTHlc,
endTHlc: null,
partitionKey,
cancellationToken).ConfigureAwait(false);
_logger.LogDebug(
"Dequeued {Count} entries after HLC {AfterHlc}. TenantId={TenantId}, PartitionKey={PartitionKey}",
entries.Count,
afterTHlc,
tenantId,
partitionKey ?? "(all)");
return new SchedulerHlcDequeueResult(
entries,
totalCount,
afterHlc,
RangeEndHlc: null);
}
/// <inheritdoc />
public async Task<SchedulerLogEntry?> GetByJobIdAsync(
string tenantId,
Guid jobId,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
var entry = await _logRepository.GetByJobIdAsync(jobId, cancellationToken).ConfigureAwait(false);
// Verify tenant isolation
if (entry is not null && !string.Equals(entry.TenantId, tenantId, StringComparison.Ordinal))
{
_logger.LogWarning(
"Job {JobId} found but belongs to different tenant. RequestedTenant={RequestedTenant}, ActualTenant={ActualTenant}",
jobId,
tenantId,
entry.TenantId);
return null;
}
return entry;
}
}

View File

@@ -0,0 +1,166 @@
// <copyright file="HlcSchedulerEnqueueService.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using System.Security.Cryptography;
using System.Text;
using Microsoft.Extensions.Logging;
using StellaOps.Canonical.Json;
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Persistence;
using StellaOps.Scheduler.Persistence.Postgres.Models;
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Implementation of HLC-ordered scheduler job enqueueing with chain linking.
/// </summary>
public sealed class HlcSchedulerEnqueueService : IHlcSchedulerEnqueueService
{
/// <summary>
/// Namespace GUID for deterministic job ID generation (v5 UUID style).
/// </summary>
private static readonly Guid JobIdNamespace = new("b8a7c6d5-e4f3-42a1-9b0c-1d2e3f4a5b6c");
private readonly IHybridLogicalClock _hlc;
private readonly ISchedulerLogRepository _logRepository;
private readonly IChainHeadRepository _chainHeadRepository;
private readonly ILogger<HlcSchedulerEnqueueService> _logger;
/// <summary>
/// Creates a new HLC scheduler enqueue service.
/// </summary>
public HlcSchedulerEnqueueService(
IHybridLogicalClock hlc,
ISchedulerLogRepository logRepository,
IChainHeadRepository chainHeadRepository,
ILogger<HlcSchedulerEnqueueService> logger)
{
_hlc = hlc ?? throw new ArgumentNullException(nameof(hlc));
_logRepository = logRepository ?? throw new ArgumentNullException(nameof(logRepository));
_chainHeadRepository = chainHeadRepository ?? throw new ArgumentNullException(nameof(chainHeadRepository));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public Task<SchedulerHlcEnqueueResult> EnqueuePlannerAsync(
string tenantId,
PlannerQueueMessage message,
string? partitionKey = null,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(message);
return EnqueueAsync(tenantId, message, message.IdempotencyKey, partitionKey, cancellationToken);
}
/// <inheritdoc />
public Task<SchedulerHlcEnqueueResult> EnqueueRunnerSegmentAsync(
string tenantId,
RunnerSegmentQueueMessage message,
string? partitionKey = null,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(message);
return EnqueueAsync(tenantId, message, message.IdempotencyKey, partitionKey, cancellationToken);
}
/// <inheritdoc />
public async Task<SchedulerHlcEnqueueResult> EnqueueAsync<T>(
string tenantId,
T payload,
string idempotencyKey,
string? partitionKey = null,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentNullException.ThrowIfNull(payload);
ArgumentException.ThrowIfNullOrWhiteSpace(idempotencyKey);
var effectivePartitionKey = partitionKey ?? string.Empty;
// 1. Generate deterministic job ID from idempotency key
var jobId = ComputeDeterministicJobId(idempotencyKey);
// 2. Check for existing entry (idempotency)
if (await _logRepository.ExistsAsync(tenantId, jobId, cancellationToken).ConfigureAwait(false))
{
var existing = await _logRepository.GetByJobIdAsync(jobId, cancellationToken).ConfigureAwait(false);
if (existing is not null)
{
_logger.LogDebug(
"Job already enqueued, returning existing entry. TenantId={TenantId}, JobId={JobId}",
tenantId,
jobId);
return new SchedulerHlcEnqueueResult(
HlcTimestamp.Parse(existing.THlc),
existing.JobId,
existing.Link,
Deduplicated: true);
}
}
// 3. Generate HLC timestamp
var tHlc = _hlc.Tick();
// 4. Compute payload hash
var payloadHash = SchedulerChainLinking.ComputePayloadHash(payload);
// 5. Get previous chain link
var prevLink = await _chainHeadRepository.GetLastLinkAsync(tenantId, effectivePartitionKey, cancellationToken)
.ConfigureAwait(false);
// 6. Compute new chain link
var link = SchedulerChainLinking.ComputeLink(prevLink, jobId, tHlc, payloadHash);
// 7. Insert log entry (atomic with chain head update)
var entry = new SchedulerLogEntry
{
TenantId = tenantId,
THlc = tHlc.ToSortableString(),
PartitionKey = effectivePartitionKey,
JobId = jobId,
PayloadHash = payloadHash,
PrevLink = prevLink,
Link = link
};
await _logRepository.InsertWithChainUpdateAsync(entry, cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Job enqueued with HLC ordering. TenantId={TenantId}, JobId={JobId}, THlc={THlc}, Link={Link}",
tenantId,
jobId,
tHlc.ToSortableString(),
SchedulerChainLinking.ToHex(link));
return new SchedulerHlcEnqueueResult(tHlc, jobId, link, Deduplicated: false);
}
/// <summary>
/// Computes a deterministic GUID from the idempotency key using SHA-256.
/// </summary>
private static Guid ComputeDeterministicJobId(string idempotencyKey)
{
// Use namespace + key pattern similar to UUID v5
var namespaceBytes = JobIdNamespace.ToByteArray();
var keyBytes = Encoding.UTF8.GetBytes(idempotencyKey);
var combined = new byte[namespaceBytes.Length + keyBytes.Length];
Buffer.BlockCopy(namespaceBytes, 0, combined, 0, namespaceBytes.Length);
Buffer.BlockCopy(keyBytes, 0, combined, namespaceBytes.Length, keyBytes.Length);
var hash = SHA256.HashData(combined);
// Take first 16 bytes for GUID
var guidBytes = new byte[16];
Buffer.BlockCopy(hash, 0, guidBytes, 0, 16);
// Set version (4) and variant bits for RFC 4122 compliance
guidBytes[6] = (byte)((guidBytes[6] & 0x0F) | 0x40); // Version 4
guidBytes[8] = (byte)((guidBytes[8] & 0x3F) | 0x80); // Variant 1
return new Guid(guidBytes);
}
}

View File

@@ -0,0 +1,178 @@
// <copyright file="HlcSchedulerMetrics.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using System.Diagnostics.Metrics;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Metrics for HLC-ordered scheduler operations.
/// </summary>
public static class HlcSchedulerMetrics
{
private const string TenantTagName = "tenant";
private const string PartitionTagName = "partition";
private const string ResultTagName = "result";
private static readonly Meter Meter = new("StellaOps.Scheduler.Hlc");
// Enqueue metrics
private static readonly Counter<long> EnqueuedCounter = Meter.CreateCounter<long>(
"scheduler_hlc_enqueues_total",
unit: "{enqueue}",
description: "Total number of HLC-ordered enqueue operations");
private static readonly Counter<long> EnqueueDeduplicatedCounter = Meter.CreateCounter<long>(
"scheduler_hlc_enqueue_deduplicated_total",
unit: "{enqueue}",
description: "Total number of deduplicated HLC enqueue operations");
private static readonly Histogram<double> EnqueueDurationHistogram = Meter.CreateHistogram<double>(
"scheduler_hlc_enqueue_duration_seconds",
unit: "s",
description: "Duration of HLC enqueue operations");
// Dequeue metrics
private static readonly Counter<long> DequeuedCounter = Meter.CreateCounter<long>(
"scheduler_hlc_dequeues_total",
unit: "{dequeue}",
description: "Total number of HLC-ordered dequeue operations");
private static readonly Counter<long> DequeuedEntriesCounter = Meter.CreateCounter<long>(
"scheduler_hlc_dequeued_entries_total",
unit: "{entry}",
description: "Total number of entries dequeued via HLC ordering");
// Chain verification metrics
private static readonly Counter<long> ChainVerificationsCounter = Meter.CreateCounter<long>(
"scheduler_chain_verifications_total",
unit: "{verification}",
description: "Total number of chain verification operations");
private static readonly Counter<long> ChainVerificationIssuesCounter = Meter.CreateCounter<long>(
"scheduler_chain_verification_issues_total",
unit: "{issue}",
description: "Total number of chain verification issues found");
private static readonly Counter<long> ChainEntriesVerifiedCounter = Meter.CreateCounter<long>(
"scheduler_chain_entries_verified_total",
unit: "{entry}",
description: "Total number of chain entries verified");
// Batch snapshot metrics
private static readonly Counter<long> SnapshotsCreatedCounter = Meter.CreateCounter<long>(
"scheduler_batch_snapshots_created_total",
unit: "{snapshot}",
description: "Total number of batch snapshots created");
private static readonly Counter<long> SnapshotsSignedCounter = Meter.CreateCounter<long>(
"scheduler_batch_snapshots_signed_total",
unit: "{snapshot}",
description: "Total number of signed batch snapshots");
private static readonly Counter<long> SnapshotVerificationsCounter = Meter.CreateCounter<long>(
"scheduler_batch_snapshot_verifications_total",
unit: "{verification}",
description: "Total number of batch snapshot verification operations");
/// <summary>
/// Records an HLC enqueue operation.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="partitionKey">Partition key (empty string if none).</param>
/// <param name="deduplicated">Whether the operation was deduplicated.</param>
public static void RecordEnqueue(string tenantId, string partitionKey, bool deduplicated)
{
var tags = BuildTags(tenantId, partitionKey);
EnqueuedCounter.Add(1, tags);
if (deduplicated)
{
EnqueueDeduplicatedCounter.Add(1, tags);
}
}
/// <summary>
/// Records the duration of an HLC enqueue operation.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="partitionKey">Partition key.</param>
/// <param name="durationSeconds">Duration in seconds.</param>
public static void RecordEnqueueDuration(string tenantId, string partitionKey, double durationSeconds)
{
EnqueueDurationHistogram.Record(durationSeconds, BuildTags(tenantId, partitionKey));
}
/// <summary>
/// Records an HLC dequeue operation.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="partitionKey">Partition key.</param>
/// <param name="entryCount">Number of entries dequeued.</param>
public static void RecordDequeue(string tenantId, string partitionKey, int entryCount)
{
var tags = BuildTags(tenantId, partitionKey);
DequeuedCounter.Add(1, tags);
DequeuedEntriesCounter.Add(entryCount, tags);
}
/// <summary>
/// Records a chain verification operation.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="entriesVerified">Number of entries verified.</param>
/// <param name="issuesFound">Number of issues found.</param>
/// <param name="isValid">Whether the chain is valid.</param>
public static void RecordChainVerification(string tenantId, int entriesVerified, int issuesFound, bool isValid)
{
var resultTag = new KeyValuePair<string, object?>(ResultTagName, isValid ? "valid" : "invalid");
var tenantTag = new KeyValuePair<string, object?>(TenantTagName, tenantId);
ChainVerificationsCounter.Add(1, tenantTag, resultTag);
ChainEntriesVerifiedCounter.Add(entriesVerified, tenantTag);
if (issuesFound > 0)
{
ChainVerificationIssuesCounter.Add(issuesFound, tenantTag);
}
}
/// <summary>
/// Records a batch snapshot creation.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="jobCount">Number of jobs in the snapshot.</param>
/// <param name="signed">Whether the snapshot was signed.</param>
public static void RecordSnapshotCreated(string tenantId, int jobCount, bool signed)
{
var tenantTag = new KeyValuePair<string, object?>(TenantTagName, tenantId);
SnapshotsCreatedCounter.Add(1, tenantTag);
if (signed)
{
SnapshotsSignedCounter.Add(1, tenantTag);
}
}
/// <summary>
/// Records a batch snapshot verification.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="isValid">Whether the snapshot is valid.</param>
public static void RecordSnapshotVerification(string tenantId, bool isValid)
{
var tags = new[]
{
new KeyValuePair<string, object?>(TenantTagName, tenantId),
new KeyValuePair<string, object?>(ResultTagName, isValid ? "valid" : "invalid")
};
SnapshotVerificationsCounter.Add(1, tags);
}
private static KeyValuePair<string, object?>[] BuildTags(string tenantId, string partitionKey)
=> new[]
{
new KeyValuePair<string, object?>(TenantTagName, tenantId),
new KeyValuePair<string, object?>(PartitionTagName, string.IsNullOrEmpty(partitionKey) ? "(default)" : partitionKey)
};
}

View File

@@ -0,0 +1,103 @@
// <copyright file="HlcSchedulerServiceCollectionExtensions.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Extension methods for registering HLC scheduler services.
/// </summary>
public static class HlcSchedulerServiceCollectionExtensions
{
/// <summary>
/// Adds HLC-ordered scheduler services to the service collection.
/// </summary>
/// <param name="services">The service collection.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddHlcSchedulerServices(this IServiceCollection services)
{
// Repositories (scoped for per-request database connections)
services.TryAddScoped<ISchedulerLogRepository, PostgresSchedulerLogRepository>();
services.TryAddScoped<IChainHeadRepository, PostgresChainHeadRepository>();
services.TryAddScoped<IBatchSnapshotRepository, PostgresBatchSnapshotRepository>();
// Services (scoped to align with repository lifetime)
services.TryAddScoped<IHlcSchedulerEnqueueService, HlcSchedulerEnqueueService>();
services.TryAddScoped<IHlcSchedulerDequeueService, HlcSchedulerDequeueService>();
services.TryAddScoped<IBatchSnapshotService, BatchSnapshotService>();
services.TryAddScoped<ISchedulerChainVerifier, SchedulerChainVerifier>();
// DSSE signer (disabled by default)
services.TryAddSingleton<IBatchSnapshotDsseSigner, BatchSnapshotDsseSigner>();
return services;
}
/// <summary>
/// Adds HLC-ordered scheduler services with DSSE signing support.
/// </summary>
/// <param name="services">The service collection.</param>
/// <param name="configuration">Configuration section for DSSE options.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddHlcSchedulerServicesWithDsseSigning(
this IServiceCollection services,
IConfiguration configuration)
{
// Configure DSSE options
services.AddOptions<BatchSnapshotDsseOptions>()
.Bind(configuration.GetSection("Scheduler:Queue:Hlc:DsseSigning"))
.ValidateDataAnnotations()
.ValidateOnStart();
// Add base services
services.AddHlcSchedulerServices();
// Wire up DSSE signer to BatchSnapshotService
services.AddScoped<IBatchSnapshotService>(sp =>
{
var logRepository = sp.GetRequiredService<ISchedulerLogRepository>();
var snapshotRepository = sp.GetRequiredService<IBatchSnapshotRepository>();
var logger = sp.GetRequiredService<Microsoft.Extensions.Logging.ILogger<BatchSnapshotService>>();
var dsseSigner = sp.GetRequiredService<IBatchSnapshotDsseSigner>();
BatchSnapshotSignerDelegate? signer = dsseSigner.IsEnabled
? dsseSigner.SignAsync
: null;
return new BatchSnapshotService(logRepository, snapshotRepository, logger, signer);
});
return services;
}
/// <summary>
/// Adds HLC-ordered scheduler services with a custom signer delegate.
/// </summary>
/// <param name="services">The service collection.</param>
/// <param name="signerFactory">Factory to create the signer delegate.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddHlcSchedulerServices(
this IServiceCollection services,
Func<IServiceProvider, BatchSnapshotSignerDelegate> signerFactory)
{
services.AddHlcSchedulerServices();
// Override BatchSnapshotService registration to include signer
services.AddScoped<IBatchSnapshotService>(sp =>
{
var logRepository = sp.GetRequiredService<ISchedulerLogRepository>();
var snapshotRepository = sp.GetRequiredService<IBatchSnapshotRepository>();
var logger = sp.GetRequiredService<Microsoft.Extensions.Logging.ILogger<BatchSnapshotService>>();
var signer = signerFactory(sp);
return new BatchSnapshotService(logRepository, snapshotRepository, logger, signer);
});
return services;
}
}

View File

@@ -0,0 +1,82 @@
// <copyright file="IBatchSnapshotService.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Service for creating and managing batch snapshots of the scheduler chain.
/// </summary>
/// <remarks>
/// Batch snapshots provide audit anchors for the scheduler chain, capturing
/// the chain head at specific HLC ranges. These can be optionally signed
/// with DSSE for attestation purposes.
/// </remarks>
public interface IBatchSnapshotService
{
/// <summary>
/// Creates a batch snapshot for a given HLC range.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="startHlc">Start of the HLC range (inclusive).</param>
/// <param name="endHlc">End of the HLC range (inclusive).</param>
/// <param name="sign">Whether to sign the snapshot with DSSE.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The created batch snapshot.</returns>
Task<BatchSnapshot> CreateSnapshotAsync(
string tenantId,
HlcTimestamp startHlc,
HlcTimestamp endHlc,
bool sign = false,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets a batch snapshot by ID.
/// </summary>
/// <param name="batchId">The batch identifier.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The snapshot if found.</returns>
Task<BatchSnapshot?> GetSnapshotAsync(
Guid batchId,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the most recent batch snapshot for a tenant.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The most recent snapshot if found.</returns>
Task<BatchSnapshot?> GetLatestSnapshotAsync(
string tenantId,
CancellationToken cancellationToken = default);
/// <summary>
/// Verifies a batch snapshot against the current chain state.
/// </summary>
/// <param name="batchId">The batch identifier to verify.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Verification result.</returns>
Task<BatchSnapshotVerificationResult> VerifySnapshotAsync(
Guid batchId,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Result of batch snapshot verification.
/// </summary>
/// <param name="IsValid">Whether the snapshot is valid.</param>
/// <param name="SnapshotFound">Whether the snapshot was found.</param>
/// <param name="ChainHeadMatches">Whether the chain head matches the snapshot.</param>
/// <param name="JobCountMatches">Whether the job count matches.</param>
/// <param name="SignatureValid">Whether the DSSE signature is valid (null if unsigned).</param>
/// <param name="Issues">List of verification issues if invalid.</param>
public readonly record struct BatchSnapshotVerificationResult(
bool IsValid,
bool SnapshotFound,
bool ChainHeadMatches,
bool JobCountMatches,
bool? SignatureValid,
IReadOnlyList<string> Issues);

View File

@@ -0,0 +1,77 @@
// <copyright file="IHlcSchedulerDequeueService.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using StellaOps.HybridLogicalClock;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Service for HLC-ordered scheduler job dequeuing.
/// </summary>
/// <remarks>
/// This service provides deterministic, HLC-ordered retrieval of scheduler log entries
/// for processing. The HLC ordering guarantees causal consistency across distributed nodes.
/// </remarks>
public interface IHlcSchedulerDequeueService
{
/// <summary>
/// Dequeues scheduler log entries in HLC order.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="limit">Maximum number of entries to return.</param>
/// <param name="partitionKey">Optional partition key to filter by.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The dequeue result with entries in HLC order.</returns>
Task<SchedulerHlcDequeueResult> DequeueAsync(
string tenantId,
int limit,
string? partitionKey = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Dequeues scheduler log entries within an HLC time range.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="startHlc">HLC range start (inclusive, null for unbounded).</param>
/// <param name="endHlc">HLC range end (inclusive, null for unbounded).</param>
/// <param name="limit">Maximum number of entries to return.</param>
/// <param name="partitionKey">Optional partition key to filter by.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The dequeue result with entries in HLC order.</returns>
Task<SchedulerHlcDequeueResult> DequeueByRangeAsync(
string tenantId,
HlcTimestamp? startHlc,
HlcTimestamp? endHlc,
int limit,
string? partitionKey = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Dequeues scheduler log entries after a specific HLC timestamp (cursor-based).
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="afterHlc">HLC timestamp to start after (exclusive).</param>
/// <param name="limit">Maximum number of entries to return.</param>
/// <param name="partitionKey">Optional partition key to filter by.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The dequeue result with entries in HLC order.</returns>
Task<SchedulerHlcDequeueResult> DequeueAfterAsync(
string tenantId,
HlcTimestamp afterHlc,
int limit,
string? partitionKey = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets a single scheduler log entry by job ID.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="jobId">The job identifier.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The scheduler log entry if found, null otherwise.</returns>
Task<Persistence.Postgres.Models.SchedulerLogEntry?> GetByJobIdAsync(
string tenantId,
Guid jobId,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,64 @@
// <copyright file="IHlcSchedulerEnqueueService.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Service for HLC-ordered scheduler job enqueueing with chain linking.
/// </summary>
/// <remarks>
/// This service wraps job enqueueing with:
/// <list type="bullet">
/// <item><description>HLC timestamp assignment for global ordering</description></item>
/// <item><description>Chain link computation for audit proofs</description></item>
/// <item><description>Persistence to scheduler_log for replay</description></item>
/// </list>
/// </remarks>
public interface IHlcSchedulerEnqueueService
{
/// <summary>
/// Enqueues a planner message with HLC ordering and chain linking.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="message">The planner queue message.</param>
/// <param name="partitionKey">Optional partition key for chain separation.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The enqueue result with HLC timestamp and chain link.</returns>
Task<SchedulerHlcEnqueueResult> EnqueuePlannerAsync(
string tenantId,
PlannerQueueMessage message,
string? partitionKey = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Enqueues a runner segment message with HLC ordering and chain linking.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="message">The runner segment queue message.</param>
/// <param name="partitionKey">Optional partition key for chain separation.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The enqueue result with HLC timestamp and chain link.</returns>
Task<SchedulerHlcEnqueueResult> EnqueueRunnerSegmentAsync(
string tenantId,
RunnerSegmentQueueMessage message,
string? partitionKey = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Enqueues a generic payload with HLC ordering and chain linking.
/// </summary>
/// <typeparam name="T">Payload type.</typeparam>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="payload">The payload to enqueue.</param>
/// <param name="idempotencyKey">Key for deduplication.</param>
/// <param name="partitionKey">Optional partition key for chain separation.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The enqueue result with HLC timestamp and chain link.</returns>
Task<SchedulerHlcEnqueueResult> EnqueueAsync<T>(
string tenantId,
T payload,
string idempotencyKey,
string? partitionKey = null,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,292 @@
// <copyright file="SchedulerChainVerifier.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using Microsoft.Extensions.Logging;
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Persistence;
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Service for verifying the integrity of the scheduler chain.
/// </summary>
public interface ISchedulerChainVerifier
{
/// <summary>
/// Verifies the integrity of the scheduler chain within an HLC range.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="startHlc">Start of the HLC range (inclusive, null for unbounded).</param>
/// <param name="endHlc">End of the HLC range (inclusive, null for unbounded).</param>
/// <param name="partitionKey">Optional partition key to verify (null for all partitions).</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Verification result.</returns>
Task<ChainVerificationResult> VerifyAsync(
string tenantId,
HlcTimestamp? startHlc = null,
HlcTimestamp? endHlc = null,
string? partitionKey = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Verifies a single chain link.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="jobId">The job identifier to verify.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Verification result for the single entry.</returns>
Task<ChainVerificationResult> VerifyEntryAsync(
string tenantId,
Guid jobId,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Result of chain verification.
/// </summary>
/// <param name="IsValid">Whether the chain is valid.</param>
/// <param name="EntriesChecked">Number of entries checked.</param>
/// <param name="Issues">List of verification issues found.</param>
public readonly record struct ChainVerificationResult(
bool IsValid,
int EntriesChecked,
IReadOnlyList<ChainVerificationIssue> Issues);
/// <summary>
/// A specific issue found during chain verification.
/// </summary>
/// <param name="JobId">The job ID where the issue was found.</param>
/// <param name="THlc">The HLC timestamp of the problematic entry.</param>
/// <param name="IssueType">Type of issue found.</param>
/// <param name="Description">Human-readable description of the issue.</param>
public readonly record struct ChainVerificationIssue(
Guid JobId,
string THlc,
string IssueType,
string Description);
/// <summary>
/// Implementation of scheduler chain verification.
/// </summary>
public sealed class SchedulerChainVerifier : ISchedulerChainVerifier
{
private readonly ISchedulerLogRepository _logRepository;
private readonly ILogger<SchedulerChainVerifier> _logger;
/// <summary>
/// Creates a new chain verifier.
/// </summary>
public SchedulerChainVerifier(
ISchedulerLogRepository logRepository,
ILogger<SchedulerChainVerifier> logger)
{
_logRepository = logRepository ?? throw new ArgumentNullException(nameof(logRepository));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public async Task<ChainVerificationResult> VerifyAsync(
string tenantId,
HlcTimestamp? startHlc = null,
HlcTimestamp? endHlc = null,
string? partitionKey = null,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
var startT = startHlc?.ToSortableString();
var endT = endHlc?.ToSortableString();
var entries = await _logRepository.GetByHlcRangeAsync(
tenantId,
startT,
endT,
limit: 0, // No limit
partitionKey,
cancellationToken).ConfigureAwait(false);
if (entries.Count == 0)
{
_logger.LogDebug(
"No entries to verify in range [{Start}, {End}] for tenant {TenantId}",
startT ?? "(unbounded)",
endT ?? "(unbounded)",
tenantId);
return new ChainVerificationResult(IsValid: true, EntriesChecked: 0, Issues: []);
}
var issues = new List<ChainVerificationIssue>();
byte[]? expectedPrevLink = null;
// If starting mid-chain, we need to get the previous entry's link
if (startHlc is not null)
{
var previousEntries = await _logRepository.GetByHlcRangeAsync(
tenantId,
startTHlc: null,
startT,
limit: 1,
partitionKey,
cancellationToken).ConfigureAwait(false);
if (previousEntries.Count > 0 && previousEntries[0].THlc != startT)
{
expectedPrevLink = previousEntries[0].Link;
}
}
foreach (var entry in entries)
{
// Verify prev_link matches expected
if (!ByteArrayEquals(entry.PrevLink, expectedPrevLink))
{
issues.Add(new ChainVerificationIssue(
entry.JobId,
entry.THlc,
"PrevLinkMismatch",
$"Expected {ToHex(expectedPrevLink)}, got {ToHex(entry.PrevLink)}"));
}
// Recompute link and verify
var computed = SchedulerChainLinking.ComputeLink(
entry.PrevLink,
entry.JobId,
HlcTimestamp.Parse(entry.THlc),
entry.PayloadHash);
if (!ByteArrayEquals(entry.Link, computed))
{
issues.Add(new ChainVerificationIssue(
entry.JobId,
entry.THlc,
"LinkMismatch",
$"Stored link doesn't match computed. Stored={ToHex(entry.Link)}, Computed={ToHex(computed)}"));
}
expectedPrevLink = entry.Link;
}
var isValid = issues.Count == 0;
_logger.LogInformation(
"Chain verification complete. TenantId={TenantId}, Range=[{Start}, {End}], EntriesChecked={Count}, IsValid={IsValid}, IssueCount={IssueCount}",
tenantId,
startT ?? "(unbounded)",
endT ?? "(unbounded)",
entries.Count,
isValid,
issues.Count);
return new ChainVerificationResult(isValid, entries.Count, issues);
}
/// <inheritdoc />
public async Task<ChainVerificationResult> VerifyEntryAsync(
string tenantId,
Guid jobId,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
var entry = await _logRepository.GetByJobIdAsync(jobId, cancellationToken).ConfigureAwait(false);
if (entry is null)
{
return new ChainVerificationResult(
IsValid: false,
EntriesChecked: 0,
Issues: [new ChainVerificationIssue(jobId, string.Empty, "NotFound", "Entry not found")]);
}
// Verify tenant isolation
if (!string.Equals(entry.TenantId, tenantId, StringComparison.Ordinal))
{
return new ChainVerificationResult(
IsValid: false,
EntriesChecked: 0,
Issues: [new ChainVerificationIssue(jobId, entry.THlc, "TenantMismatch", "Entry belongs to different tenant")]);
}
var issues = new List<ChainVerificationIssue>();
// Recompute link and verify
var computed = SchedulerChainLinking.ComputeLink(
entry.PrevLink,
entry.JobId,
HlcTimestamp.Parse(entry.THlc),
entry.PayloadHash);
if (!ByteArrayEquals(entry.Link, computed))
{
issues.Add(new ChainVerificationIssue(
entry.JobId,
entry.THlc,
"LinkMismatch",
$"Stored link doesn't match computed"));
}
// If there's a prev_link, verify it exists and matches
if (entry.PrevLink is { Length: > 0 })
{
// Find the previous entry
var allEntries = await _logRepository.GetByHlcRangeAsync(
tenantId,
startTHlc: null,
entry.THlc,
limit: 0,
partitionKey: entry.PartitionKey,
cancellationToken).ConfigureAwait(false);
var prevEntry = allEntries
.Where(e => e.THlc != entry.THlc)
.OrderByDescending(e => e.THlc)
.FirstOrDefault();
if (prevEntry is null)
{
issues.Add(new ChainVerificationIssue(
entry.JobId,
entry.THlc,
"PrevEntryNotFound",
"Entry has prev_link but no previous entry found"));
}
else if (!ByteArrayEquals(prevEntry.Link, entry.PrevLink))
{
issues.Add(new ChainVerificationIssue(
entry.JobId,
entry.THlc,
"PrevLinkMismatch",
$"prev_link doesn't match previous entry's link"));
}
}
return new ChainVerificationResult(issues.Count == 0, 1, issues);
}
private static bool ByteArrayEquals(byte[]? a, byte[]? b)
{
if (a is null && b is null)
{
return true;
}
if (a is null || b is null)
{
return false;
}
if (a.Length == 0 && b.Length == 0)
{
return true;
}
return a.AsSpan().SequenceEqual(b);
}
private static string ToHex(byte[]? bytes)
{
return bytes is null ? "(null)" : Convert.ToHexString(bytes);
}
}

View File

@@ -0,0 +1,21 @@
// <copyright file="SchedulerDequeueResult.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Result of an HLC-ordered scheduler dequeue operation.
/// </summary>
/// <param name="Entries">The dequeued scheduler log entries in HLC order.</param>
/// <param name="TotalAvailable">Total count of entries available in the specified range.</param>
/// <param name="RangeStartHlc">The HLC start of the queried range (null if unbounded).</param>
/// <param name="RangeEndHlc">The HLC end of the queried range (null if unbounded).</param>
public readonly record struct SchedulerHlcDequeueResult(
IReadOnlyList<SchedulerLogEntry> Entries,
int TotalAvailable,
HlcTimestamp? RangeStartHlc,
HlcTimestamp? RangeEndHlc);

View File

@@ -0,0 +1,20 @@
// <copyright file="SchedulerEnqueueResult.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using StellaOps.HybridLogicalClock;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Result of an HLC-ordered scheduler enqueue operation.
/// </summary>
/// <param name="THlc">The HLC timestamp assigned to the job.</param>
/// <param name="JobId">The deterministic job identifier.</param>
/// <param name="Link">The chain link computed for this entry.</param>
/// <param name="Deduplicated">True if the job was already enqueued (idempotent).</param>
public readonly record struct SchedulerHlcEnqueueResult(
HlcTimestamp THlc,
Guid JobId,
byte[] Link,
bool Deduplicated);

View File

@@ -6,6 +6,7 @@ using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using NATS.Client.Core;
using NATS.Client.JetStream;
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Queue.Nats;
@@ -18,6 +19,7 @@ internal sealed class NatsSchedulerPlannerQueue
SchedulerNatsQueueOptions natsOptions,
ILogger<NatsSchedulerPlannerQueue> logger,
TimeProvider timeProvider,
IHybridLogicalClock? hlc = null,
Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>>? connectionFactory = null)
: base(
queueOptions,
@@ -26,6 +28,7 @@ internal sealed class NatsSchedulerPlannerQueue
PlannerPayload.Instance,
logger,
timeProvider,
hlc,
connectionFactory)
{
}

View File

@@ -9,6 +9,7 @@ using Microsoft.Extensions.Logging;
using NATS.Client.Core;
using NATS.Client.JetStream;
using NATS.Client.JetStream.Models;
using StellaOps.HybridLogicalClock;
namespace StellaOps.Scheduler.Queue.Nats;
@@ -24,6 +25,7 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
private readonly INatsSchedulerQueuePayload<TMessage> _payload;
private readonly ILogger _logger;
private readonly TimeProvider _timeProvider;
private readonly IHybridLogicalClock? _hlc;
private readonly SemaphoreSlim _connectionGate = new(1, 1);
private readonly Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>> _connectionFactory;
@@ -40,6 +42,7 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
INatsSchedulerQueuePayload<TMessage> payload,
ILogger logger,
TimeProvider timeProvider,
IHybridLogicalClock? hlc = null,
Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>>? connectionFactory = null)
{
_queueOptions = queueOptions ?? throw new ArgumentNullException(nameof(queueOptions));
@@ -48,6 +51,7 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
_payload = payload ?? throw new ArgumentNullException(nameof(payload));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? TimeProvider.System;
_hlc = hlc;
_connectionFactory = connectionFactory ?? ((opts, cancellationToken) => new ValueTask<NatsConnection>(new NatsConnection(opts)));
if (string.IsNullOrWhiteSpace(_natsOptions.Url))
@@ -67,7 +71,11 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
var payloadBytes = _payload.Serialize(message);
var idempotencyKey = _payload.GetIdempotencyKey(message);
var headers = BuildHeaders(message, idempotencyKey);
// Generate HLC timestamp if clock is available
var hlcTimestamp = _hlc?.Tick();
var headers = BuildHeaders(message, idempotencyKey, hlcTimestamp);
var publishOptions = new NatsJSPubOpts
{
@@ -531,6 +539,14 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
? DateTimeOffset.FromUnixTimeMilliseconds(unix)
: now;
// Parse HLC timestamp if present
HlcTimestamp? hlcTimestamp = null;
if (headers.TryGetValue(SchedulerQueueFields.HlcTimestamp, out var hlcValues) && hlcValues.Count > 0
&& HlcTimestamp.TryParse(hlcValues[0], out var parsedHlc))
{
hlcTimestamp = parsedHlc;
}
var leaseExpires = now.Add(leaseDuration);
var runId = _payload.GetRunId(deserialized);
var tenantId = _payload.GetTenantId(deserialized);
@@ -558,10 +574,11 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
attempt,
enqueuedAt,
leaseExpires,
consumer);
consumer,
hlcTimestamp);
}
private NatsHeaders BuildHeaders(TMessage message, string idempotencyKey)
private NatsHeaders BuildHeaders(TMessage message, string idempotencyKey, HlcTimestamp? hlcTimestamp = null)
{
var headers = new NatsHeaders
{
@@ -572,6 +589,12 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
{ SchedulerQueueFields.EnqueuedAt, _timeProvider.GetUtcNow().ToUnixTimeMilliseconds().ToString() }
};
// Include HLC timestamp if available
if (hlcTimestamp.HasValue)
{
headers.Add(SchedulerQueueFields.HlcTimestamp, hlcTimestamp.Value.ToSortableString());
}
var scheduleId = _payload.GetScheduleId(message);
if (!string.IsNullOrWhiteSpace(scheduleId))
{

View File

@@ -3,6 +3,7 @@ using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using NATS.Client.JetStream;
using StellaOps.HybridLogicalClock;
namespace StellaOps.Scheduler.Queue.Nats;
@@ -26,7 +27,8 @@ internal sealed class NatsSchedulerQueueLease<TMessage> : ISchedulerQueueLease<T
int attempt,
DateTimeOffset enqueuedAt,
DateTimeOffset leaseExpiresAt,
string consumer)
string consumer,
HlcTimestamp? hlcTimestamp = null)
{
_queue = queue;
MessageId = message.Metadata?.Sequence.ToString() ?? idempotencyKey;
@@ -44,6 +46,7 @@ internal sealed class NatsSchedulerQueueLease<TMessage> : ISchedulerQueueLease<T
Message = deserialized;
_message = message;
Payload = payload;
HlcTimestamp = hlcTimestamp;
}
private readonly NatsJSMsg<byte[]> _message;
@@ -78,6 +81,8 @@ internal sealed class NatsSchedulerQueueLease<TMessage> : ISchedulerQueueLease<T
public string Consumer { get; }
public HlcTimestamp? HlcTimestamp { get; }
public Task AcknowledgeAsync(CancellationToken cancellationToken = default)
=> _queue.AcknowledgeAsync(this, cancellationToken);

View File

@@ -7,6 +7,7 @@ using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using NATS.Client.Core;
using NATS.Client.JetStream;
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Queue.Nats;
@@ -19,6 +20,7 @@ internal sealed class NatsSchedulerRunnerQueue
SchedulerNatsQueueOptions natsOptions,
ILogger<NatsSchedulerRunnerQueue> logger,
TimeProvider timeProvider,
IHybridLogicalClock? hlc = null,
Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>>? connectionFactory = null)
: base(
queueOptions,
@@ -27,6 +29,7 @@ internal sealed class NatsSchedulerRunnerQueue
RunnerPayload.Instance,
logger,
timeProvider,
hlc,
connectionFactory)
{
}

View File

@@ -3,6 +3,7 @@ using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using StellaOps.HybridLogicalClock;
using StackExchange.Redis;
using StellaOps.Scheduler.Models;
@@ -16,6 +17,7 @@ internal sealed class RedisSchedulerPlannerQueue
SchedulerRedisQueueOptions redisOptions,
ILogger<RedisSchedulerPlannerQueue> logger,
TimeProvider timeProvider,
IHybridLogicalClock? hlc = null,
Func<ConfigurationOptions, Task<IConnectionMultiplexer>>? connectionFactory = null)
: base(
queueOptions,
@@ -24,6 +26,7 @@ internal sealed class RedisSchedulerPlannerQueue
PlannerPayload.Instance,
logger,
timeProvider,
hlc,
connectionFactory)
{
}

View File

@@ -6,6 +6,7 @@ using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using StellaOps.HybridLogicalClock;
using StackExchange.Redis;
namespace StellaOps.Scheduler.Queue.Redis;
@@ -20,6 +21,7 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
private readonly IRedisSchedulerQueuePayload<TMessage> _payload;
private readonly ILogger _logger;
private readonly TimeProvider _timeProvider;
private readonly IHybridLogicalClock? _hlc;
private readonly Func<ConfigurationOptions, Task<IConnectionMultiplexer>> _connectionFactory;
private readonly SemaphoreSlim _connectionLock = new(1, 1);
private readonly SemaphoreSlim _groupInitLock = new(1, 1);
@@ -36,6 +38,7 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
IRedisSchedulerQueuePayload<TMessage> payload,
ILogger logger,
TimeProvider timeProvider,
IHybridLogicalClock? hlc = null,
Func<ConfigurationOptions, Task<IConnectionMultiplexer>>? connectionFactory = null)
{
_queueOptions = queueOptions ?? throw new ArgumentNullException(nameof(queueOptions));
@@ -44,6 +47,7 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
_payload = payload ?? throw new ArgumentNullException(nameof(payload));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_hlc = hlc;
_connectionFactory = connectionFactory ?? (config => Task.FromResult<IConnectionMultiplexer>(ConnectionMultiplexer.Connect(config)));
if (string.IsNullOrWhiteSpace(_redisOptions.ConnectionString))
@@ -74,7 +78,11 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
var now = _timeProvider.GetUtcNow();
var attempt = 1;
var entries = BuildEntries(message, now, attempt);
// Generate HLC timestamp if clock is available
var hlcTimestamp = _hlc?.Tick();
var entries = BuildEntries(message, now, attempt, hlcTimestamp);
var messageId = await AddToStreamAsync(
database,
@@ -555,11 +563,12 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
private NameValueEntry[] BuildEntries(
TMessage message,
DateTimeOffset enqueuedAt,
int attempt)
int attempt,
HlcTimestamp? hlcTimestamp = null)
{
var attributes = _payload.GetAttributes(message);
var attributeCount = attributes?.Count ?? 0;
var entries = ArrayPool<NameValueEntry>.Shared.Rent(10 + attributeCount);
var entries = ArrayPool<NameValueEntry>.Shared.Rent(11 + attributeCount);
var index = 0;
entries[index++] = new NameValueEntry(SchedulerQueueFields.QueueKind, _payload.QueueName);
@@ -589,6 +598,12 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
entries[index++] = new NameValueEntry(SchedulerQueueFields.EnqueuedAt, enqueuedAt.ToUnixTimeMilliseconds());
entries[index++] = new NameValueEntry(SchedulerQueueFields.Payload, _payload.Serialize(message));
// Include HLC timestamp if available
if (hlcTimestamp.HasValue)
{
entries[index++] = new NameValueEntry(SchedulerQueueFields.HlcTimestamp, hlcTimestamp.Value.ToSortableString());
}
if (attributeCount > 0 && attributes is not null)
{
foreach (var kvp in attributes)
@@ -623,6 +638,7 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
string? segmentId = null;
string? correlationId = null;
string? idempotencyKey = null;
string? hlcTimestampStr = null;
long? enqueuedAtUnix = null;
var attempt = attemptOverride ?? 1;
var attributes = new Dictionary<string, string>(StringComparer.Ordinal);
@@ -676,6 +692,10 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
: Math.Max(1, parsedAttempt);
}
}
else if (name.Equals(SchedulerQueueFields.HlcTimestamp, StringComparison.Ordinal))
{
hlcTimestampStr = NormalizeOptional(value.ToString());
}
else if (name.StartsWith(SchedulerQueueFields.AttributePrefix, StringComparison.Ordinal))
{
var key = name[SchedulerQueueFields.AttributePrefix.Length..];
@@ -692,6 +712,14 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
var enqueuedAt = DateTimeOffset.FromUnixTimeMilliseconds(enqueuedAtUnix.Value);
var leaseExpires = now.Add(leaseDuration);
// Parse HLC timestamp if present
HlcTimestamp? hlcTimestamp = null;
if (!string.IsNullOrEmpty(hlcTimestampStr) &&
HlcTimestamp.TryParse(hlcTimestampStr, out var parsedHlc))
{
hlcTimestamp = parsedHlc;
}
IReadOnlyDictionary<string, string> attributeView = attributes.Count == 0
? EmptyReadOnlyDictionary<string, string>.Instance
: new ReadOnlyDictionary<string, string>(attributes);
@@ -710,7 +738,8 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
attempt,
enqueuedAt,
leaseExpires,
consumer);
consumer,
hlcTimestamp);
}
private async Task HandlePoisonEntryAsync(IDatabase database, RedisValue entryId)

View File

@@ -2,6 +2,7 @@ using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using StellaOps.HybridLogicalClock;
namespace StellaOps.Scheduler.Queue.Redis;
@@ -24,7 +25,8 @@ internal sealed class RedisSchedulerQueueLease<TMessage> : ISchedulerQueueLease<
int attempt,
DateTimeOffset enqueuedAt,
DateTimeOffset leaseExpiresAt,
string consumer)
string consumer,
HlcTimestamp? hlcTimestamp = null)
{
_queue = queue;
MessageId = messageId;
@@ -40,6 +42,7 @@ internal sealed class RedisSchedulerQueueLease<TMessage> : ISchedulerQueueLease<
EnqueuedAt = enqueuedAt;
LeaseExpiresAt = leaseExpiresAt;
Consumer = consumer;
HlcTimestamp = hlcTimestamp;
}
public string MessageId { get; }
@@ -68,6 +71,8 @@ internal sealed class RedisSchedulerQueueLease<TMessage> : ISchedulerQueueLease<
public string Consumer { get; }
public HlcTimestamp? HlcTimestamp { get; }
public Task AcknowledgeAsync(CancellationToken cancellationToken = default)
=> _queue.AcknowledgeAsync(this, cancellationToken);

View File

@@ -4,6 +4,7 @@ using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using StellaOps.HybridLogicalClock;
using StackExchange.Redis;
using StellaOps.Scheduler.Models;
@@ -17,6 +18,7 @@ internal sealed class RedisSchedulerRunnerQueue
SchedulerRedisQueueOptions redisOptions,
ILogger<RedisSchedulerRunnerQueue> logger,
TimeProvider timeProvider,
IHybridLogicalClock? hlc = null,
Func<ConfigurationOptions, Task<IConnectionMultiplexer>>? connectionFactory = null)
: base(
queueOptions,
@@ -25,6 +27,7 @@ internal sealed class RedisSchedulerRunnerQueue
RunnerPayload.Instance,
logger,
timeProvider,
hlc,
connectionFactory)
{
}

View File

@@ -4,6 +4,7 @@ using System.Collections.ObjectModel;
using System.Text.Json.Serialization;
using System.Threading;
using System.Threading.Tasks;
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Queue;
@@ -284,6 +285,13 @@ public interface ISchedulerQueueLease<out TMessage>
TMessage Message { get; }
/// <summary>
/// Gets the Hybrid Logical Clock timestamp assigned at enqueue time.
/// Provides deterministic ordering across distributed nodes.
/// Null if HLC was not enabled when the message was enqueued.
/// </summary>
HlcTimestamp? HlcTimestamp { get; }
Task AcknowledgeAsync(CancellationToken cancellationToken = default);
Task RenewAsync(TimeSpan leaseDuration, CancellationToken cancellationToken = default);

View File

@@ -13,4 +13,10 @@ internal static class SchedulerQueueFields
public const string QueueKind = "queueKind";
public const string CorrelationId = "correlationId";
public const string AttributePrefix = "attr:";
/// <summary>
/// Hybrid Logical Clock timestamp for deterministic ordering.
/// Stored as sortable string format: {PhysicalTime:D13}-{NodeId}-{LogicalCounter:D6}
/// </summary>
public const string HlcTimestamp = "hlcTimestamp";
}

View File

@@ -35,6 +35,54 @@ public sealed class SchedulerQueueOptions
/// Cap applied to the retry delay when exponential backoff is used.
/// </summary>
public TimeSpan RetryMaxBackoff { get; set; } = TimeSpan.FromMinutes(1);
/// <summary>
/// HLC (Hybrid Logical Clock) ordering options.
/// </summary>
public SchedulerHlcOptions Hlc { get; set; } = new();
}
/// <summary>
/// Options for HLC-based queue ordering and chain linking.
/// </summary>
public sealed class SchedulerHlcOptions
{
/// <summary>
/// Enable HLC-based ordering with chain linking.
/// When false, uses legacy (priority, created_at) ordering.
/// </summary>
/// <remarks>
/// When enabled, all enqueue operations will:
/// - Assign an HLC timestamp for global ordering
/// - Compute and store chain links for audit proofs
/// - Persist entries to the scheduler_log table
/// </remarks>
public bool EnableHlcOrdering { get; set; }
/// <summary>
/// When true, writes to both legacy and HLC tables during migration.
/// This allows gradual migration from legacy ordering to HLC ordering.
/// </summary>
/// <remarks>
/// Migration path:
/// 1. Deploy with DualWriteMode = true (writes to both tables)
/// 2. Backfill scheduler_log from existing scheduler.jobs
/// 3. Enable EnableHlcOrdering = true for reads
/// 4. Disable DualWriteMode, deprecate legacy ordering
/// </remarks>
public bool DualWriteMode { get; set; }
/// <summary>
/// Enable automatic chain verification on dequeue.
/// When enabled, each dequeued batch is verified for chain integrity.
/// </summary>
public bool VerifyOnDequeue { get; set; }
/// <summary>
/// Maximum clock drift tolerance in milliseconds.
/// HLC timestamps from messages with drift exceeding this value will be rejected.
/// </summary>
public int MaxClockDriftMs { get; set; } = 60000; // 1 minute default
}
public sealed class SchedulerRedisQueueOptions

View File

@@ -4,6 +4,7 @@ using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using Microsoft.Extensions.Diagnostics.HealthChecks;
using Microsoft.Extensions.Logging;
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Queue.Nats;
using StellaOps.Scheduler.Queue.Redis;
@@ -29,6 +30,7 @@ public static class SchedulerQueueServiceCollectionExtensions
{
var loggerFactory = sp.GetRequiredService<ILoggerFactory>();
var timeProvider = sp.GetService<TimeProvider>() ?? TimeProvider.System;
var hlc = sp.GetService<IHybridLogicalClock>();
return options.Kind switch
{
@@ -36,12 +38,14 @@ public static class SchedulerQueueServiceCollectionExtensions
options,
options.Redis,
loggerFactory.CreateLogger<RedisSchedulerPlannerQueue>(),
timeProvider),
timeProvider,
hlc),
SchedulerQueueTransportKind.Nats => new NatsSchedulerPlannerQueue(
options,
options.Nats,
loggerFactory.CreateLogger<NatsSchedulerPlannerQueue>(),
timeProvider),
timeProvider,
hlc),
_ => throw new InvalidOperationException($"Unsupported scheduler queue transport '{options.Kind}'.")
};
});
@@ -50,6 +54,7 @@ public static class SchedulerQueueServiceCollectionExtensions
{
var loggerFactory = sp.GetRequiredService<ILoggerFactory>();
var timeProvider = sp.GetService<TimeProvider>() ?? TimeProvider.System;
var hlc = sp.GetService<IHybridLogicalClock>();
return options.Kind switch
{
@@ -57,12 +62,14 @@ public static class SchedulerQueueServiceCollectionExtensions
options,
options.Redis,
loggerFactory.CreateLogger<RedisSchedulerRunnerQueue>(),
timeProvider),
timeProvider,
hlc),
SchedulerQueueTransportKind.Nats => new NatsSchedulerRunnerQueue(
options,
options.Nats,
loggerFactory.CreateLogger<NatsSchedulerRunnerQueue>(),
timeProvider),
timeProvider,
hlc),
_ => throw new InvalidOperationException($"Unsupported scheduler queue transport '{options.Kind}'.")
};
});

View File

@@ -18,5 +18,8 @@
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Scheduler.Models\StellaOps.Scheduler.Models.csproj" />
<ProjectReference Include="..\StellaOps.Scheduler.Persistence\StellaOps.Scheduler.Persistence.csproj" />
<ProjectReference Include="..\..\..\__Libraries\StellaOps.HybridLogicalClock\StellaOps.HybridLogicalClock.csproj" />
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Canonical.Json\StellaOps.Canonical.Json.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,337 @@
// <copyright file="SchedulerChainLinkingTests.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using FluentAssertions;
using StellaOps.HybridLogicalClock;
using Xunit;
namespace StellaOps.Scheduler.Persistence.Tests;
[Trait("Category", "Unit")]
public sealed class SchedulerChainLinkingTests
{
[Fact]
public void ComputeLink_WithNullPrevLink_UsesZeroLink()
{
// Arrange
var jobId = Guid.Parse("12345678-1234-1234-1234-123456789012");
var hlc = new HlcTimestamp { PhysicalTime = 1000000000000L, NodeId = "node1", LogicalCounter = 1 };
var payloadHash = new byte[32];
payloadHash[0] = 0xAB;
// Act
var link1 = SchedulerChainLinking.ComputeLink(null, jobId, hlc, payloadHash);
var link2 = SchedulerChainLinking.ComputeLink(SchedulerChainLinking.ZeroLink, jobId, hlc, payloadHash);
// Assert
link1.Should().HaveCount(32);
link1.Should().BeEquivalentTo(link2, "null prev_link should be treated as zero link");
}
[Fact]
public void ComputeLink_IsDeterministic_SameInputsSameOutput()
{
// Arrange
var prevLink = new byte[32];
prevLink[0] = 0x01;
var jobId = Guid.Parse("AAAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE");
var hlc = new HlcTimestamp { PhysicalTime = 1704067200000L, NodeId = "scheduler-1", LogicalCounter = 42 };
var payloadHash = new byte[32];
for (int i = 0; i < 32; i++) payloadHash[i] = (byte)i;
// Act
var link1 = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc, payloadHash);
var link2 = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc, payloadHash);
var link3 = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc, payloadHash);
// Assert
link1.Should().BeEquivalentTo(link2);
link2.Should().BeEquivalentTo(link3);
}
[Fact]
public void ComputeLink_DifferentJobIds_ProduceDifferentLinks()
{
// Arrange
var prevLink = new byte[32];
var hlc = new HlcTimestamp { PhysicalTime = 1704067200000L, NodeId = "node1", LogicalCounter = 1 };
var payloadHash = new byte[32];
var jobId1 = Guid.Parse("11111111-1111-1111-1111-111111111111");
var jobId2 = Guid.Parse("22222222-2222-2222-2222-222222222222");
// Act
var link1 = SchedulerChainLinking.ComputeLink(prevLink, jobId1, hlc, payloadHash);
var link2 = SchedulerChainLinking.ComputeLink(prevLink, jobId2, hlc, payloadHash);
// Assert
link1.Should().NotBeEquivalentTo(link2);
}
[Fact]
public void ComputeLink_DifferentHlcTimestamps_ProduceDifferentLinks()
{
// Arrange
var prevLink = new byte[32];
var jobId = Guid.NewGuid();
var payloadHash = new byte[32];
var hlc1 = new HlcTimestamp { PhysicalTime = 1704067200000L, NodeId = "node1", LogicalCounter = 1 };
var hlc2 = new HlcTimestamp { PhysicalTime = 1704067200000L, NodeId = "node1", LogicalCounter = 2 }; // Different counter
var hlc3 = new HlcTimestamp { PhysicalTime = 1704067200001L, NodeId = "node1", LogicalCounter = 1 }; // Different physical time
// Act
var link1 = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc1, payloadHash);
var link2 = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc2, payloadHash);
var link3 = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc3, payloadHash);
// Assert
link1.Should().NotBeEquivalentTo(link2);
link1.Should().NotBeEquivalentTo(link3);
link2.Should().NotBeEquivalentTo(link3);
}
[Fact]
public void ComputeLink_DifferentPrevLinks_ProduceDifferentLinks()
{
// Arrange
var jobId = Guid.NewGuid();
var hlc = new HlcTimestamp { PhysicalTime = 1704067200000L, NodeId = "node1", LogicalCounter = 1 };
var payloadHash = new byte[32];
var prevLink1 = new byte[32];
var prevLink2 = new byte[32];
prevLink2[0] = 0xFF;
// Act
var link1 = SchedulerChainLinking.ComputeLink(prevLink1, jobId, hlc, payloadHash);
var link2 = SchedulerChainLinking.ComputeLink(prevLink2, jobId, hlc, payloadHash);
// Assert
link1.Should().NotBeEquivalentTo(link2);
}
[Fact]
public void ComputeLink_DifferentPayloadHashes_ProduceDifferentLinks()
{
// Arrange
var prevLink = new byte[32];
var jobId = Guid.NewGuid();
var hlc = new HlcTimestamp { PhysicalTime = 1704067200000L, NodeId = "node1", LogicalCounter = 1 };
var payload1 = new byte[32];
var payload2 = new byte[32];
payload2[31] = 0x01;
// Act
var link1 = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc, payload1);
var link2 = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc, payload2);
// Assert
link1.Should().NotBeEquivalentTo(link2);
}
[Fact]
public void ComputeLink_WithStringHlc_ProducesSameResultAsParsedHlc()
{
// Arrange
var prevLink = new byte[32];
var jobId = Guid.NewGuid();
var hlc = new HlcTimestamp { PhysicalTime = 1704067200000L, NodeId = "node1", LogicalCounter = 42 };
var hlcString = hlc.ToSortableString();
var payloadHash = new byte[32];
// Act
var link1 = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc, payloadHash);
var link2 = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlcString, payloadHash);
// Assert
link1.Should().BeEquivalentTo(link2);
}
[Fact]
public void VerifyLink_ValidLink_ReturnsTrue()
{
// Arrange
var prevLink = new byte[32];
prevLink[0] = 0xDE;
var jobId = Guid.NewGuid();
var hlc = new HlcTimestamp { PhysicalTime = 1704067200000L, NodeId = "verifier", LogicalCounter = 100 };
var payloadHash = new byte[32];
payloadHash[15] = 0xAD;
var computedLink = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc, payloadHash);
// Act
var isValid = SchedulerChainLinking.VerifyLink(computedLink, prevLink, jobId, hlc, payloadHash);
// Assert
isValid.Should().BeTrue();
}
[Fact]
public void VerifyLink_TamperedLink_ReturnsFalse()
{
// Arrange
var prevLink = new byte[32];
var jobId = Guid.NewGuid();
var hlc = new HlcTimestamp { PhysicalTime = 1704067200000L, NodeId = "node1", LogicalCounter = 1 };
var payloadHash = new byte[32];
var computedLink = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc, payloadHash);
// Tamper with the link
var tamperedLink = (byte[])computedLink.Clone();
tamperedLink[0] ^= 0xFF;
// Act
var isValid = SchedulerChainLinking.VerifyLink(tamperedLink, prevLink, jobId, hlc, payloadHash);
// Assert
isValid.Should().BeFalse();
}
[Fact]
public void ComputePayloadHash_IsDeterministic()
{
// Arrange
var payload = new { Id = 123, Name = "Test", Values = new[] { 1, 2, 3 } };
// Act
var hash1 = SchedulerChainLinking.ComputePayloadHash(payload);
var hash2 = SchedulerChainLinking.ComputePayloadHash(payload);
// Assert
hash1.Should().HaveCount(32);
hash1.Should().BeEquivalentTo(hash2);
}
[Fact]
public void ComputePayloadHash_DifferentPayloads_ProduceDifferentHashes()
{
// Arrange
var payload1 = new { Id = 1, Name = "First" };
var payload2 = new { Id = 2, Name = "Second" };
// Act
var hash1 = SchedulerChainLinking.ComputePayloadHash(payload1);
var hash2 = SchedulerChainLinking.ComputePayloadHash(payload2);
// Assert
hash1.Should().NotBeEquivalentTo(hash2);
}
[Fact]
public void ComputePayloadHash_ByteArray_ProducesConsistentHash()
{
// Arrange
var bytes = new byte[] { 0x01, 0x02, 0x03, 0x04, 0x05 };
// Act
var hash1 = SchedulerChainLinking.ComputePayloadHash(bytes);
var hash2 = SchedulerChainLinking.ComputePayloadHash(bytes);
// Assert
hash1.Should().HaveCount(32);
hash1.Should().BeEquivalentTo(hash2);
}
[Fact]
public void ToHex_NullLink_ReturnsNullString()
{
// Act
var result = SchedulerChainLinking.ToHex(null);
// Assert
result.Should().Be("(null)");
}
[Fact]
public void ToHex_EmptyLink_ReturnsNullString()
{
// Act
var result = SchedulerChainLinking.ToHex(Array.Empty<byte>());
// Assert
result.Should().Be("(null)");
}
[Fact]
public void ToHex_ValidLink_ReturnsLowercaseHex()
{
// Arrange
var link = new byte[] { 0xAB, 0xCD, 0xEF };
// Act
var result = SchedulerChainLinking.ToHex(link);
// Assert
result.Should().Be("abcdef");
}
[Fact]
public void ChainIntegrity_SequentialLinks_FormValidChain()
{
// Arrange - Simulate a chain of 5 entries
var jobIds = Enumerable.Range(1, 5).Select(i => Guid.NewGuid()).ToList();
var payloads = jobIds.Select(id => SchedulerChainLinking.ComputePayloadHash(new { JobId = id })).ToList();
var links = new List<byte[]>();
byte[]? prevLink = null;
long baseTime = 1704067200000L;
// Act - Build chain
for (int i = 0; i < 5; i++)
{
var hlc = new HlcTimestamp { PhysicalTime = baseTime + i, NodeId = "node1", LogicalCounter = i };
var link = SchedulerChainLinking.ComputeLink(prevLink, jobIds[i], hlc, payloads[i]);
links.Add(link);
prevLink = link;
}
// Assert - Verify chain integrity
byte[]? expectedPrev = null;
for (int i = 0; i < 5; i++)
{
var hlc = new HlcTimestamp { PhysicalTime = baseTime + i, NodeId = "node1", LogicalCounter = i };
var isValid = SchedulerChainLinking.VerifyLink(links[i], expectedPrev, jobIds[i], hlc, payloads[i]);
isValid.Should().BeTrue($"Link {i} should be valid");
expectedPrev = links[i];
}
}
[Fact]
public void ChainIntegrity_TamperedMiddleLink_BreaksChain()
{
// Arrange - Build a chain of 3 entries
var jobIds = new[] { Guid.NewGuid(), Guid.NewGuid(), Guid.NewGuid() };
var payloads = jobIds.Select(id => SchedulerChainLinking.ComputePayloadHash(new { JobId = id })).ToArray();
var hlcs = new[]
{
new HlcTimestamp { PhysicalTime = 1000L, NodeId = "node1", LogicalCounter = 0 },
new HlcTimestamp { PhysicalTime = 1001L, NodeId = "node1", LogicalCounter = 0 },
new HlcTimestamp { PhysicalTime = 1002L, NodeId = "node1", LogicalCounter = 0 }
};
var link0 = SchedulerChainLinking.ComputeLink(null, jobIds[0], hlcs[0], payloads[0]);
var link1 = SchedulerChainLinking.ComputeLink(link0, jobIds[1], hlcs[1], payloads[1]);
var link2 = SchedulerChainLinking.ComputeLink(link1, jobIds[2], hlcs[2], payloads[2]);
// Tamper with middle link
var tamperedLink1 = (byte[])link1.Clone();
tamperedLink1[0] ^= 0xFF;
// Act & Assert - First link is still valid
SchedulerChainLinking.VerifyLink(link0, null, jobIds[0], hlcs[0], payloads[0])
.Should().BeTrue("First link should be valid");
// Middle link verification fails
SchedulerChainLinking.VerifyLink(tamperedLink1, link0, jobIds[1], hlcs[1], payloads[1])
.Should().BeFalse("Tampered middle link should fail verification");
// Third link verification fails because prev_link is wrong
SchedulerChainLinking.VerifyLink(link2, tamperedLink1, jobIds[2], hlcs[2], payloads[2])
.Should().BeFalse("Third link should fail with tampered prev_link");
}
}

View File

@@ -0,0 +1,427 @@
// <copyright file="HlcQueueIntegrationTests.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
using FluentAssertions;
using Microsoft.Extensions.Logging.Abstractions;
using StackExchange.Redis;
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Models;
using StellaOps.Scheduler.Queue.Redis;
using StellaOps.TestKit;
using Testcontainers.Redis;
using Xunit;
using HybridLogicalClockImpl = StellaOps.HybridLogicalClock.HybridLogicalClock;
namespace StellaOps.Scheduler.Queue.Tests;
/// <summary>
/// Integration tests for HLC (Hybrid Logical Clock) integration with scheduler queues.
/// </summary>
[Trait("Category", TestCategories.Integration)]
public sealed class HlcQueueIntegrationTests : IAsyncLifetime
{
private readonly RedisContainer _redis;
private string? _skipReason;
public HlcQueueIntegrationTests()
{
_redis = new RedisBuilder().Build();
}
public async ValueTask InitializeAsync()
{
try
{
await _redis.StartAsync();
}
catch (Exception ex) when (IsDockerUnavailable(ex))
{
_skipReason = $"Docker engine is not available for Redis-backed tests: {ex.Message}";
}
}
public async ValueTask DisposeAsync()
{
if (_skipReason is not null)
{
return;
}
await _redis.DisposeAsync().AsTask();
}
[Fact]
public async Task PlannerQueue_WithHlc_LeasedMessageContainsHlcTimestamp()
{
if (SkipIfUnavailable())
{
return;
}
var options = CreateOptions();
var hlc = new HybridLogicalClockImpl(TimeProvider.System, "test-node-1", new InMemoryHlcStateStore());
await using var queue = new RedisSchedulerPlannerQueue(
options,
options.Redis,
NullLogger<RedisSchedulerPlannerQueue>.Instance,
TimeProvider.System,
hlc,
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
var message = CreatePlannerMessage();
var enqueueResult = await queue.EnqueueAsync(message);
enqueueResult.Deduplicated.Should().BeFalse();
var leases = await queue.LeaseAsync(new SchedulerQueueLeaseRequest("planner-hlc", batchSize: 1, options.DefaultLeaseDuration));
leases.Should().ContainSingle();
var lease = leases[0];
lease.HlcTimestamp.Should().NotBeNull("HLC timestamp should be present when HLC is configured");
lease.HlcTimestamp!.Value.NodeId.Should().Be("test-node-1");
lease.HlcTimestamp.Value.PhysicalTime.Should().BeGreaterThan(0);
await lease.AcknowledgeAsync();
}
[Fact]
public async Task RunnerQueue_WithHlc_LeasedMessageContainsHlcTimestamp()
{
if (SkipIfUnavailable())
{
return;
}
var options = CreateOptions();
var hlc = new HybridLogicalClockImpl(TimeProvider.System, "runner-node-1", new InMemoryHlcStateStore());
await using var queue = new RedisSchedulerRunnerQueue(
options,
options.Redis,
NullLogger<RedisSchedulerRunnerQueue>.Instance,
TimeProvider.System,
hlc,
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
var message = CreateRunnerMessage();
await queue.EnqueueAsync(message);
var leases = await queue.LeaseAsync(new SchedulerQueueLeaseRequest("runner-hlc", batchSize: 1, options.DefaultLeaseDuration));
leases.Should().ContainSingle();
var lease = leases[0];
lease.HlcTimestamp.Should().NotBeNull("HLC timestamp should be present when HLC is configured");
lease.HlcTimestamp!.Value.NodeId.Should().Be("runner-node-1");
await lease.AcknowledgeAsync();
}
[Fact]
public async Task PlannerQueue_WithoutHlc_LeasedMessageHasNullTimestamp()
{
if (SkipIfUnavailable())
{
return;
}
var options = CreateOptions();
// No HLC provided
await using var queue = new RedisSchedulerPlannerQueue(
options,
options.Redis,
NullLogger<RedisSchedulerPlannerQueue>.Instance,
TimeProvider.System,
hlc: null,
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
var message = CreatePlannerMessage();
await queue.EnqueueAsync(message);
var leases = await queue.LeaseAsync(new SchedulerQueueLeaseRequest("planner-no-hlc", batchSize: 1, options.DefaultLeaseDuration));
leases.Should().ContainSingle();
var lease = leases[0];
lease.HlcTimestamp.Should().BeNull("HLC timestamp should be null when HLC is not configured");
await lease.AcknowledgeAsync();
}
[Fact]
public async Task HlcTimestamp_IsMonotonicallyIncreasing_AcrossEnqueues()
{
if (SkipIfUnavailable())
{
return;
}
var options = CreateOptions();
var hlc = new HybridLogicalClockImpl(TimeProvider.System, "monotonic-test", new InMemoryHlcStateStore());
await using var queue = new RedisSchedulerPlannerQueue(
options,
options.Redis,
NullLogger<RedisSchedulerPlannerQueue>.Instance,
TimeProvider.System,
hlc,
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
// Enqueue multiple messages
var messages = new List<PlannerQueueMessage>();
for (int i = 0; i < 5; i++)
{
messages.Add(CreatePlannerMessage(suffix: i.ToString()));
}
foreach (var msg in messages)
{
await queue.EnqueueAsync(msg);
}
// Lease all messages
var leases = await queue.LeaseAsync(new SchedulerQueueLeaseRequest("monotonic-consumer", batchSize: 10, options.DefaultLeaseDuration));
leases.Should().HaveCount(5);
// Verify HLC timestamps are monotonically increasing
HlcTimestamp? previousHlc = null;
foreach (var lease in leases)
{
lease.HlcTimestamp.Should().NotBeNull();
if (previousHlc.HasValue)
{
var current = lease.HlcTimestamp!.Value;
var prev = previousHlc.Value;
// Current should be greater than previous
(current > prev).Should().BeTrue(
$"HLC {current} should be greater than {prev}");
}
previousHlc = lease.HlcTimestamp;
await lease.AcknowledgeAsync();
}
}
[Fact]
public async Task HlcTimestamp_SortableString_ParsesCorrectly()
{
if (SkipIfUnavailable())
{
return;
}
var options = CreateOptions();
var hlc = new HybridLogicalClockImpl(TimeProvider.System, "parse-test-node", new InMemoryHlcStateStore());
await using var queue = new RedisSchedulerPlannerQueue(
options,
options.Redis,
NullLogger<RedisSchedulerPlannerQueue>.Instance,
TimeProvider.System,
hlc,
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
var message = CreatePlannerMessage();
await queue.EnqueueAsync(message);
var leases = await queue.LeaseAsync(new SchedulerQueueLeaseRequest("parse-consumer", batchSize: 1, options.DefaultLeaseDuration));
leases.Should().ContainSingle();
var lease = leases[0];
lease.HlcTimestamp.Should().NotBeNull();
// Verify round-trip through sortable string
var hlcValue = lease.HlcTimestamp!.Value;
var sortableString = hlcValue.ToSortableString();
HlcTimestamp.TryParse(sortableString, out var parsed).Should().BeTrue();
parsed.Should().Be(hlcValue);
await lease.AcknowledgeAsync();
}
[Fact]
public async Task HlcTimestamp_DeterministicForSameInput_OnSameNode()
{
if (SkipIfUnavailable())
{
return;
}
// This test verifies that HLC generates consistent timestamps
// by checking that timestamps from the same node use the same node ID
// and that logical counters increment correctly at same physical time
var options = CreateOptions();
var hlc = new HybridLogicalClockImpl(TimeProvider.System, "determinism-node", new InMemoryHlcStateStore());
await using var queue = new RedisSchedulerPlannerQueue(
options,
options.Redis,
NullLogger<RedisSchedulerPlannerQueue>.Instance,
TimeProvider.System,
hlc,
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
// Enqueue rapidly to potentially hit same physical time
var timestamps = new List<HlcTimestamp>();
for (int i = 0; i < 10; i++)
{
var message = CreatePlannerMessage(suffix: $"determinism-{i}");
await queue.EnqueueAsync(message);
}
var leases = await queue.LeaseAsync(new SchedulerQueueLeaseRequest("determinism-consumer", batchSize: 20, options.DefaultLeaseDuration));
leases.Should().HaveCount(10);
foreach (var lease in leases)
{
lease.HlcTimestamp.Should().NotBeNull();
timestamps.Add(lease.HlcTimestamp!.Value);
await lease.AcknowledgeAsync();
}
// All timestamps should have same node ID
foreach (var ts in timestamps)
{
ts.NodeId.Should().Be("determinism-node");
}
// Verify strict ordering (no duplicates)
for (int i = 1; i < timestamps.Count; i++)
{
(timestamps[i] > timestamps[i - 1]).Should().BeTrue(
$"Timestamp {i} ({timestamps[i]}) should be greater than {i - 1} ({timestamps[i - 1]})");
}
}
private SchedulerQueueOptions CreateOptions()
{
var unique = Guid.NewGuid().ToString("N");
return new SchedulerQueueOptions
{
Kind = SchedulerQueueTransportKind.Redis,
DefaultLeaseDuration = TimeSpan.FromSeconds(30),
MaxDeliveryAttempts = 5,
RetryInitialBackoff = TimeSpan.FromMilliseconds(10),
RetryMaxBackoff = TimeSpan.FromMilliseconds(50),
Redis = new SchedulerRedisQueueOptions
{
ConnectionString = _redis.GetConnectionString(),
Database = 0,
InitializationTimeout = TimeSpan.FromSeconds(10),
Planner = new RedisSchedulerStreamOptions
{
Stream = $"scheduler:hlc-test:planner:{unique}",
ConsumerGroup = $"planner-hlc-{unique}",
DeadLetterStream = $"scheduler:hlc-test:planner:{unique}:dead",
IdempotencyKeyPrefix = $"scheduler:hlc-test:planner:{unique}:idemp:",
IdempotencyWindow = TimeSpan.FromMinutes(5)
},
Runner = new RedisSchedulerStreamOptions
{
Stream = $"scheduler:hlc-test:runner:{unique}",
ConsumerGroup = $"runner-hlc-{unique}",
DeadLetterStream = $"scheduler:hlc-test:runner:{unique}:dead",
IdempotencyKeyPrefix = $"scheduler:hlc-test:runner:{unique}:idemp:",
IdempotencyWindow = TimeSpan.FromMinutes(5)
}
}
};
}
private bool SkipIfUnavailable()
{
if (_skipReason is not null)
{
return true;
}
return false;
}
private static bool IsDockerUnavailable(Exception exception)
{
while (exception is AggregateException aggregate && aggregate.InnerException is not null)
{
exception = aggregate.InnerException;
}
return exception is TimeoutException
|| exception.GetType().Name.Contains("Docker", StringComparison.OrdinalIgnoreCase);
}
private static PlannerQueueMessage CreatePlannerMessage(string suffix = "")
{
var id = string.IsNullOrEmpty(suffix) ? "run-hlc-test" : $"run-hlc-test-{suffix}";
var schedule = new Schedule(
id: "sch-hlc-test",
tenantId: "tenant-hlc",
name: "HLC Test",
enabled: true,
cronExpression: "0 0 * * *",
timezone: "UTC",
mode: ScheduleMode.AnalysisOnly,
selection: new Selector(SelectorScope.AllImages, tenantId: "tenant-hlc"),
onlyIf: ScheduleOnlyIf.Default,
notify: ScheduleNotify.Default,
limits: ScheduleLimits.Default,
createdAt: DateTimeOffset.UtcNow,
createdBy: "tests",
updatedAt: DateTimeOffset.UtcNow,
updatedBy: "tests");
var run = new Run(
id: id,
tenantId: "tenant-hlc",
trigger: RunTrigger.Manual,
state: RunState.Planning,
stats: RunStats.Empty,
createdAt: DateTimeOffset.UtcNow,
reason: RunReason.Empty,
scheduleId: schedule.Id);
var impactSet = new ImpactSet(
selector: new Selector(SelectorScope.AllImages, tenantId: "tenant-hlc"),
images: new[]
{
new ImpactImage(
imageDigest: "sha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc",
registry: "registry",
repository: "repo",
namespaces: new[] { "prod" },
tags: new[] { "latest" })
},
usageOnly: true,
generatedAt: DateTimeOffset.UtcNow,
total: 1);
return new PlannerQueueMessage(run, impactSet, schedule, correlationId: $"corr-hlc-{suffix}");
}
private static RunnerSegmentQueueMessage CreateRunnerMessage()
{
return new RunnerSegmentQueueMessage(
segmentId: "segment-hlc-test",
runId: "run-hlc-test",
tenantId: "tenant-hlc",
imageDigests: new[]
{
"sha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd"
},
scheduleId: "sch-hlc-test",
ratePerSecond: 10,
usageOnly: true,
attributes: new Dictionary<string, string> { ["priority"] = "normal" },
correlationId: "corr-runner-hlc");
}
}

View File

@@ -62,7 +62,8 @@ public sealed class RedisSchedulerQueueTests : IAsyncLifetime
options.Redis,
NullLogger<RedisSchedulerPlannerQueue>.Instance,
TimeProvider.System,
async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
hlc: null,
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
var message = TestData.CreatePlannerMessage();
@@ -101,7 +102,8 @@ public sealed class RedisSchedulerQueueTests : IAsyncLifetime
options.Redis,
NullLogger<RedisSchedulerRunnerQueue>.Instance,
TimeProvider.System,
async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
hlc: null,
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
var message = TestData.CreateRunnerMessage();
@@ -136,7 +138,8 @@ public sealed class RedisSchedulerQueueTests : IAsyncLifetime
options.Redis,
NullLogger<RedisSchedulerPlannerQueue>.Instance,
TimeProvider.System,
async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
hlc: null,
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
var message = TestData.CreatePlannerMessage();
await queue.EnqueueAsync(message);
@@ -170,7 +173,8 @@ public sealed class RedisSchedulerQueueTests : IAsyncLifetime
options.Redis,
NullLogger<RedisSchedulerPlannerQueue>.Instance,
TimeProvider.System,
async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
hlc: null,
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
var message = TestData.CreatePlannerMessage();
@@ -208,7 +212,8 @@ public sealed class RedisSchedulerQueueTests : IAsyncLifetime
options.Redis,
NullLogger<RedisSchedulerRunnerQueue>.Instance,
TimeProvider.System,
async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
hlc: null,
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
var message = TestData.CreateRunnerMessage();
await queue.EnqueueAsync(message);