save progress
This commit is contained in:
@@ -0,0 +1,177 @@
|
||||
-- HLC Queue Chain: Hybrid Logical Clock Ordering with Cryptographic Sequence Proofs
|
||||
-- SPRINT_20260105_002_002_SCHEDULER: SQC-002, SQC-003, SQC-004
|
||||
--
|
||||
-- Adds HLC-based ordering with hash chain at enqueue time for audit-safe job queue ordering.
|
||||
-- See: Product Advisory "Audit-safe job queue ordering using monotonic timestamps"
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 1: Scheduler Log Table (SQC-002)
|
||||
-- ============================================================================
|
||||
-- HLC-ordered, chain-linked job entries. This is the authoritative order.
|
||||
-- Jobs are linked via: link_i = Hash(link_{i-1} || job_id || t_hlc || payload_hash)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS scheduler.scheduler_log (
|
||||
seq_bigint BIGSERIAL PRIMARY KEY, -- Storage order (not authoritative)
|
||||
tenant_id TEXT NOT NULL,
|
||||
t_hlc TEXT NOT NULL, -- HLC timestamp: "0001704067200000-node-1-000042"
|
||||
partition_key TEXT NOT NULL DEFAULT '', -- Optional queue partition
|
||||
job_id UUID NOT NULL,
|
||||
payload_hash BYTEA NOT NULL, -- SHA-256 of canonical payload JSON
|
||||
prev_link BYTEA, -- Previous chain link (null for first)
|
||||
link BYTEA NOT NULL, -- Hash(prev_link || job_id || t_hlc || payload_hash)
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
-- Ensure HLC order is unique within tenant/partition
|
||||
CONSTRAINT uq_scheduler_log_order UNIQUE (tenant_id, partition_key, t_hlc, job_id)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE scheduler.scheduler_log IS
|
||||
'HLC-ordered job log with cryptographic chain linking for audit-safe ordering';
|
||||
COMMENT ON COLUMN scheduler.scheduler_log.t_hlc IS
|
||||
'Hybrid Logical Clock timestamp in sortable string format';
|
||||
COMMENT ON COLUMN scheduler.scheduler_log.link IS
|
||||
'SHA-256 chain link: Hash(prev_link || job_id || t_hlc || payload_hash)';
|
||||
|
||||
-- Index for tenant + HLC ordered queries (primary query path)
|
||||
CREATE INDEX IF NOT EXISTS idx_scheduler_log_tenant_hlc
|
||||
ON scheduler.scheduler_log(tenant_id, t_hlc);
|
||||
|
||||
-- Index for partition-scoped queries
|
||||
CREATE INDEX IF NOT EXISTS idx_scheduler_log_partition
|
||||
ON scheduler.scheduler_log(tenant_id, partition_key, t_hlc);
|
||||
|
||||
-- Index for job_id lookups (idempotency checks)
|
||||
CREATE INDEX IF NOT EXISTS idx_scheduler_log_job_id
|
||||
ON scheduler.scheduler_log(job_id);
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 2: Batch Snapshot Table (SQC-003)
|
||||
-- ============================================================================
|
||||
-- Captures chain state at specific points for audit anchors and attestation.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS scheduler.batch_snapshot (
|
||||
batch_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id TEXT NOT NULL,
|
||||
range_start_t TEXT NOT NULL, -- HLC range start (inclusive)
|
||||
range_end_t TEXT NOT NULL, -- HLC range end (inclusive)
|
||||
head_link BYTEA NOT NULL, -- Chain head at snapshot time
|
||||
job_count INT NOT NULL,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
signed_by TEXT, -- Optional: signing key ID for DSSE
|
||||
signature BYTEA -- Optional: DSSE signature bytes
|
||||
);
|
||||
|
||||
COMMENT ON TABLE scheduler.batch_snapshot IS
|
||||
'Audit anchors capturing chain state at specific HLC ranges';
|
||||
COMMENT ON COLUMN scheduler.batch_snapshot.head_link IS
|
||||
'The chain link at range_end_t - can be used to verify chain integrity';
|
||||
|
||||
-- Index for tenant + time ordered queries
|
||||
CREATE INDEX IF NOT EXISTS idx_batch_snapshot_tenant
|
||||
ON scheduler.batch_snapshot(tenant_id, created_at DESC);
|
||||
|
||||
-- Index for HLC range queries
|
||||
CREATE INDEX IF NOT EXISTS idx_batch_snapshot_hlc_range
|
||||
ON scheduler.batch_snapshot(tenant_id, range_start_t, range_end_t);
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 3: Chain Heads Table (SQC-004)
|
||||
-- ============================================================================
|
||||
-- Tracks the last chain link per tenant/partition for efficient append.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS scheduler.chain_heads (
|
||||
tenant_id TEXT NOT NULL,
|
||||
partition_key TEXT NOT NULL DEFAULT '',
|
||||
last_link BYTEA NOT NULL,
|
||||
last_t_hlc TEXT NOT NULL,
|
||||
last_job_id UUID NOT NULL,
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
PRIMARY KEY (tenant_id, partition_key)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE scheduler.chain_heads IS
|
||||
'Per-partition chain head tracking for efficient chain append operations';
|
||||
|
||||
-- Trigger to update updated_at on chain_heads modifications
|
||||
CREATE OR REPLACE TRIGGER update_chain_heads_updated_at
|
||||
BEFORE UPDATE ON scheduler.chain_heads
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION scheduler.update_updated_at();
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 4: Helper Functions
|
||||
-- ============================================================================
|
||||
|
||||
-- Function to get the current chain head for a tenant/partition
|
||||
CREATE OR REPLACE FUNCTION scheduler.get_chain_head(
|
||||
p_tenant_id TEXT,
|
||||
p_partition_key TEXT DEFAULT ''
|
||||
)
|
||||
RETURNS TABLE (
|
||||
last_link BYTEA,
|
||||
last_t_hlc TEXT,
|
||||
last_job_id UUID
|
||||
)
|
||||
LANGUAGE plpgsql STABLE
|
||||
AS $$
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
SELECT ch.last_link, ch.last_t_hlc, ch.last_job_id
|
||||
FROM scheduler.chain_heads ch
|
||||
WHERE ch.tenant_id = p_tenant_id
|
||||
AND ch.partition_key = p_partition_key;
|
||||
END;
|
||||
$$;
|
||||
|
||||
-- Function to insert log entry and update chain head atomically
|
||||
CREATE OR REPLACE FUNCTION scheduler.insert_log_with_chain_update(
|
||||
p_tenant_id TEXT,
|
||||
p_t_hlc TEXT,
|
||||
p_partition_key TEXT,
|
||||
p_job_id UUID,
|
||||
p_payload_hash BYTEA,
|
||||
p_prev_link BYTEA,
|
||||
p_link BYTEA
|
||||
)
|
||||
RETURNS BIGINT
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
DECLARE
|
||||
v_seq BIGINT;
|
||||
BEGIN
|
||||
-- Insert log entry
|
||||
INSERT INTO scheduler.scheduler_log (
|
||||
tenant_id, t_hlc, partition_key, job_id,
|
||||
payload_hash, prev_link, link
|
||||
)
|
||||
VALUES (
|
||||
p_tenant_id, p_t_hlc, p_partition_key, p_job_id,
|
||||
p_payload_hash, p_prev_link, p_link
|
||||
)
|
||||
RETURNING seq_bigint INTO v_seq;
|
||||
|
||||
-- Upsert chain head
|
||||
INSERT INTO scheduler.chain_heads (
|
||||
tenant_id, partition_key, last_link, last_t_hlc, last_job_id
|
||||
)
|
||||
VALUES (
|
||||
p_tenant_id, p_partition_key, p_link, p_t_hlc, p_job_id
|
||||
)
|
||||
ON CONFLICT (tenant_id, partition_key)
|
||||
DO UPDATE SET
|
||||
last_link = EXCLUDED.last_link,
|
||||
last_t_hlc = EXCLUDED.last_t_hlc,
|
||||
last_job_id = EXCLUDED.last_job_id,
|
||||
updated_at = NOW();
|
||||
|
||||
RETURN v_seq;
|
||||
END;
|
||||
$$;
|
||||
|
||||
COMMENT ON FUNCTION scheduler.insert_log_with_chain_update IS
|
||||
'Atomically inserts a scheduler log entry and updates the chain head';
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,56 @@
|
||||
// <copyright file="BatchSnapshot.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
namespace StellaOps.Scheduler.Persistence.Postgres.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Represents an audit anchor capturing chain state at a specific HLC range.
|
||||
/// </summary>
|
||||
public sealed record BatchSnapshot
|
||||
{
|
||||
/// <summary>
|
||||
/// Unique batch identifier.
|
||||
/// </summary>
|
||||
public Guid BatchId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Tenant identifier.
|
||||
/// </summary>
|
||||
public required string TenantId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// HLC range start (inclusive).
|
||||
/// </summary>
|
||||
public required string RangeStartT { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// HLC range end (inclusive).
|
||||
/// </summary>
|
||||
public required string RangeEndT { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Chain head link at snapshot time.
|
||||
/// </summary>
|
||||
public required byte[] HeadLink { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of jobs in the range.
|
||||
/// </summary>
|
||||
public int JobCount { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Timestamp when the snapshot was created.
|
||||
/// </summary>
|
||||
public DateTimeOffset CreatedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional: signing key identifier for DSSE.
|
||||
/// </summary>
|
||||
public string? SignedBy { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional: DSSE signature bytes.
|
||||
/// </summary>
|
||||
public byte[]? Signature { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
// <copyright file="ChainHead.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
namespace StellaOps.Scheduler.Persistence.Postgres.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Represents the current chain head for a tenant/partition.
|
||||
/// </summary>
|
||||
public sealed record ChainHead
|
||||
{
|
||||
/// <summary>
|
||||
/// Tenant identifier.
|
||||
/// </summary>
|
||||
public required string TenantId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Partition key (empty string for default partition).
|
||||
/// </summary>
|
||||
public string PartitionKey { get; init; } = string.Empty;
|
||||
|
||||
/// <summary>
|
||||
/// Last chain link.
|
||||
/// </summary>
|
||||
public required byte[] LastLink { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Last HLC timestamp.
|
||||
/// </summary>
|
||||
public required string LastTHlc { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Last job identifier.
|
||||
/// </summary>
|
||||
public required Guid LastJobId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Timestamp when the chain head was updated.
|
||||
/// </summary>
|
||||
public DateTimeOffset UpdatedAt { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
// <copyright file="SchedulerLogEntry.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
namespace StellaOps.Scheduler.Persistence.Postgres.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Represents an HLC-ordered, chain-linked scheduler log entry.
|
||||
/// </summary>
|
||||
public sealed record SchedulerLogEntry
|
||||
{
|
||||
/// <summary>
|
||||
/// Storage sequence number (not authoritative for ordering).
|
||||
/// </summary>
|
||||
public long SeqBigint { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Tenant identifier.
|
||||
/// </summary>
|
||||
public required string TenantId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// HLC timestamp in sortable string format.
|
||||
/// </summary>
|
||||
public required string THlc { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional queue partition key.
|
||||
/// </summary>
|
||||
public string PartitionKey { get; init; } = string.Empty;
|
||||
|
||||
/// <summary>
|
||||
/// Job identifier (deterministic from payload).
|
||||
/// </summary>
|
||||
public required Guid JobId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// SHA-256 hash of the canonical payload JSON.
|
||||
/// </summary>
|
||||
public required byte[] PayloadHash { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Previous chain link (null for first entry in chain).
|
||||
/// </summary>
|
||||
public byte[]? PrevLink { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Chain link: Hash(prev_link || job_id || t_hlc || payload_hash).
|
||||
/// </summary>
|
||||
public required byte[] Link { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Timestamp when the entry was created.
|
||||
/// </summary>
|
||||
public DateTimeOffset CreatedAt { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
// <copyright file="IBatchSnapshotRepository.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for batch snapshot operations.
|
||||
/// </summary>
|
||||
public interface IBatchSnapshotRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Inserts a new batch snapshot.
|
||||
/// </summary>
|
||||
/// <param name="snapshot">The snapshot to insert.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>A task representing the operation.</returns>
|
||||
Task InsertAsync(BatchSnapshot snapshot, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a batch snapshot by ID.
|
||||
/// </summary>
|
||||
/// <param name="batchId">The batch identifier.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The snapshot if found.</returns>
|
||||
Task<BatchSnapshot?> GetByIdAsync(Guid batchId, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the most recent batch snapshot for a tenant.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The most recent snapshot if found.</returns>
|
||||
Task<BatchSnapshot?> GetLatestAsync(string tenantId, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets batch snapshots for a tenant within a time range.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="startTime">Start time (inclusive).</param>
|
||||
/// <param name="endTime">End time (inclusive).</param>
|
||||
/// <param name="limit">Maximum snapshots to return.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Snapshots in the specified range.</returns>
|
||||
Task<IReadOnlyList<BatchSnapshot>> GetByTimeRangeAsync(
|
||||
string tenantId,
|
||||
DateTimeOffset startTime,
|
||||
DateTimeOffset endTime,
|
||||
int limit = 100,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets batch snapshots containing a specific HLC timestamp.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="tHlc">The HLC timestamp to search for.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Snapshots containing the timestamp.</returns>
|
||||
Task<IReadOnlyList<BatchSnapshot>> GetContainingHlcAsync(
|
||||
string tenantId,
|
||||
string tHlc,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
// <copyright file="IChainHeadRepository.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for chain head operations.
|
||||
/// </summary>
|
||||
public interface IChainHeadRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the last chain link for a tenant/partition.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="partitionKey">Partition key (empty string for default).</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The last link bytes, or null if no chain exists.</returns>
|
||||
Task<byte[]?> GetLastLinkAsync(
|
||||
string tenantId,
|
||||
string partitionKey,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the full chain head for a tenant/partition.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="partitionKey">Partition key (empty string for default).</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The chain head, or null if no chain exists.</returns>
|
||||
Task<ChainHead?> GetAsync(
|
||||
string tenantId,
|
||||
string partitionKey,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets all chain heads for a tenant.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>All chain heads for the tenant.</returns>
|
||||
Task<IReadOnlyList<ChainHead>> GetAllForTenantAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,109 @@
|
||||
// <copyright file="ISchedulerLogRepository.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Repository interface for HLC-ordered scheduler log operations.
|
||||
/// </summary>
|
||||
public interface ISchedulerLogRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Inserts a log entry and atomically updates the chain head.
|
||||
/// </summary>
|
||||
/// <param name="entry">The log entry to insert.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The sequence number of the inserted entry.</returns>
|
||||
Task<long> InsertWithChainUpdateAsync(
|
||||
SchedulerLogEntry entry,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets log entries ordered by HLC timestamp.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="partitionKey">Optional partition key (null for all partitions).</param>
|
||||
/// <param name="limit">Maximum entries to return.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Log entries in HLC order.</returns>
|
||||
Task<IReadOnlyList<SchedulerLogEntry>> GetByHlcOrderAsync(
|
||||
string tenantId,
|
||||
string? partitionKey,
|
||||
int limit,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets log entries within an HLC timestamp range.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="startTHlc">Start timestamp (inclusive, null for unbounded).</param>
|
||||
/// <param name="endTHlc">End timestamp (inclusive, null for unbounded).</param>
|
||||
/// <param name="limit">Maximum entries to return (0 for unlimited).</param>
|
||||
/// <param name="partitionKey">Optional partition key (null for all partitions).</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Log entries in the specified range.</returns>
|
||||
Task<IReadOnlyList<SchedulerLogEntry>> GetByHlcRangeAsync(
|
||||
string tenantId,
|
||||
string? startTHlc,
|
||||
string? endTHlc,
|
||||
int limit = 0,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets log entries after an HLC timestamp (cursor-based pagination).
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="afterTHlc">Start after this timestamp (exclusive).</param>
|
||||
/// <param name="limit">Maximum entries to return.</param>
|
||||
/// <param name="partitionKey">Optional partition key (null for all partitions).</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Log entries after the specified timestamp.</returns>
|
||||
Task<IReadOnlyList<SchedulerLogEntry>> GetAfterHlcAsync(
|
||||
string tenantId,
|
||||
string afterTHlc,
|
||||
int limit,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Counts log entries within an HLC timestamp range.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="startTHlc">Start timestamp (inclusive, null for unbounded).</param>
|
||||
/// <param name="endTHlc">End timestamp (inclusive, null for unbounded).</param>
|
||||
/// <param name="partitionKey">Optional partition key (null for all partitions).</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Count of entries in the range.</returns>
|
||||
Task<int> CountByHlcRangeAsync(
|
||||
string tenantId,
|
||||
string? startTHlc,
|
||||
string? endTHlc,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a log entry by job ID.
|
||||
/// </summary>
|
||||
/// <param name="jobId">Job identifier.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The log entry if found.</returns>
|
||||
Task<SchedulerLogEntry?> GetByJobIdAsync(
|
||||
Guid jobId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Checks if a job ID already exists in the log.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="jobId">Job identifier.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>True if the job exists.</returns>
|
||||
Task<bool> ExistsAsync(
|
||||
string tenantId,
|
||||
Guid jobId,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using Npgsql;
|
||||
using StellaOps.Determinism;
|
||||
using StellaOps.Infrastructure.Postgres.Repositories;
|
||||
@@ -13,6 +14,7 @@ public sealed class JobRepository : RepositoryBase<SchedulerDataSource>, IJobRep
|
||||
{
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly IGuidProvider _guidProvider;
|
||||
private readonly bool _enableHlcOrdering;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new job repository.
|
||||
@@ -20,12 +22,14 @@ public sealed class JobRepository : RepositoryBase<SchedulerDataSource>, IJobRep
|
||||
public JobRepository(
|
||||
SchedulerDataSource dataSource,
|
||||
ILogger<JobRepository> logger,
|
||||
IOptions<JobRepositoryOptions>? options = null,
|
||||
TimeProvider? timeProvider = null,
|
||||
IGuidProvider? guidProvider = null)
|
||||
: base(dataSource, logger)
|
||||
{
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
_guidProvider = guidProvider ?? SystemGuidProvider.Instance;
|
||||
_enableHlcOrdering = options?.Value.EnableHlcOrdering ?? false;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
@@ -102,15 +106,28 @@ public sealed class JobRepository : RepositoryBase<SchedulerDataSource>, IJobRep
|
||||
int limit = 10,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT * FROM scheduler.jobs
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND status = 'scheduled'
|
||||
AND (not_before IS NULL OR not_before <= NOW())
|
||||
AND job_type = ANY(@job_types)
|
||||
ORDER BY priority DESC, created_at
|
||||
LIMIT @limit
|
||||
""";
|
||||
// When HLC ordering is enabled, join with scheduler_log and order by t_hlc
|
||||
// This provides deterministic global ordering based on Hybrid Logical Clock timestamps
|
||||
var sql = _enableHlcOrdering
|
||||
? """
|
||||
SELECT j.* FROM scheduler.jobs j
|
||||
INNER JOIN scheduler.scheduler_log sl ON j.id = sl.job_id AND j.tenant_id = sl.tenant_id
|
||||
WHERE j.tenant_id = @tenant_id
|
||||
AND j.status = 'scheduled'
|
||||
AND (j.not_before IS NULL OR j.not_before <= NOW())
|
||||
AND j.job_type = ANY(@job_types)
|
||||
ORDER BY sl.t_hlc
|
||||
LIMIT @limit
|
||||
"""
|
||||
: """
|
||||
SELECT * FROM scheduler.jobs
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND status = 'scheduled'
|
||||
AND (not_before IS NULL OR not_before <= NOW())
|
||||
AND job_type = ANY(@job_types)
|
||||
ORDER BY priority DESC, created_at
|
||||
LIMIT @limit
|
||||
""";
|
||||
|
||||
return await QueryAsync(
|
||||
tenantId,
|
||||
@@ -350,12 +367,22 @@ public sealed class JobRepository : RepositoryBase<SchedulerDataSource>, IJobRep
|
||||
int offset = 0,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT * FROM scheduler.jobs
|
||||
WHERE tenant_id = @tenant_id AND status = @status::scheduler.job_status
|
||||
ORDER BY created_at DESC, id
|
||||
LIMIT @limit OFFSET @offset
|
||||
""";
|
||||
// When HLC ordering is enabled, join with scheduler_log and order by t_hlc DESC
|
||||
// This maintains consistent ordering across all job retrieval methods
|
||||
var sql = _enableHlcOrdering
|
||||
? """
|
||||
SELECT j.* FROM scheduler.jobs j
|
||||
LEFT JOIN scheduler.scheduler_log sl ON j.id = sl.job_id AND j.tenant_id = sl.tenant_id
|
||||
WHERE j.tenant_id = @tenant_id AND j.status = @status::scheduler.job_status
|
||||
ORDER BY COALESCE(sl.t_hlc, to_char(j.created_at AT TIME ZONE 'UTC', 'YYYYMMDDHH24MISS')) DESC, j.id
|
||||
LIMIT @limit OFFSET @offset
|
||||
"""
|
||||
: """
|
||||
SELECT * FROM scheduler.jobs
|
||||
WHERE tenant_id = @tenant_id AND status = @status::scheduler.job_status
|
||||
ORDER BY created_at DESC, id
|
||||
LIMIT @limit OFFSET @offset
|
||||
""";
|
||||
|
||||
return await QueryAsync(
|
||||
tenantId,
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
// <copyright file="JobRepositoryOptions.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Options for job repository behavior.
|
||||
/// </summary>
|
||||
public sealed class JobRepositoryOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets whether to use HLC (Hybrid Logical Clock) ordering for job retrieval.
|
||||
/// When enabled, jobs are ordered by their HLC timestamp from the scheduler_log table.
|
||||
/// When disabled, legacy (priority, created_at) ordering is used.
|
||||
/// </summary>
|
||||
public bool EnableHlcOrdering { get; set; }
|
||||
}
|
||||
@@ -0,0 +1,183 @@
|
||||
// <copyright file="PostgresBatchSnapshotRepository.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StellaOps.Infrastructure.Postgres.Repositories;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL repository for batch snapshot operations.
|
||||
/// </summary>
|
||||
public sealed class PostgresBatchSnapshotRepository : RepositoryBase<SchedulerDataSource>, IBatchSnapshotRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new batch snapshot repository.
|
||||
/// </summary>
|
||||
public PostgresBatchSnapshotRepository(SchedulerDataSource dataSource, ILogger<PostgresBatchSnapshotRepository> logger)
|
||||
: base(dataSource, logger)
|
||||
{
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task InsertAsync(BatchSnapshot snapshot, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
INSERT INTO scheduler.batch_snapshot (
|
||||
batch_id, tenant_id, range_start_t, range_end_t, head_link,
|
||||
job_count, created_at, signed_by, signature
|
||||
) VALUES (
|
||||
@batch_id, @tenant_id, @range_start_t, @range_end_t, @head_link,
|
||||
@job_count, @created_at, @signed_by, @signature
|
||||
)
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(snapshot.TenantId, "writer", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddParameter(command, "batch_id", snapshot.BatchId);
|
||||
AddParameter(command, "tenant_id", snapshot.TenantId);
|
||||
AddParameter(command, "range_start_t", snapshot.RangeStartT);
|
||||
AddParameter(command, "range_end_t", snapshot.RangeEndT);
|
||||
AddParameter(command, "head_link", snapshot.HeadLink);
|
||||
AddParameter(command, "job_count", snapshot.JobCount);
|
||||
AddParameter(command, "created_at", snapshot.CreatedAt);
|
||||
AddParameter(command, "signed_by", snapshot.SignedBy ?? (object)DBNull.Value);
|
||||
AddParameter(command, "signature", snapshot.Signature ?? (object)DBNull.Value);
|
||||
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<BatchSnapshot?> GetByIdAsync(Guid batchId, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT batch_id, tenant_id, range_start_t, range_end_t, head_link,
|
||||
job_count, created_at, signed_by, signature
|
||||
FROM scheduler.batch_snapshot
|
||||
WHERE batch_id = @batch_id
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
AddParameter(command, "batch_id", batchId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
return await reader.ReadAsync(cancellationToken).ConfigureAwait(false) ? MapSnapshot(reader) : null;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<BatchSnapshot?> GetLatestAsync(string tenantId, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT batch_id, tenant_id, range_start_t, range_end_t, head_link,
|
||||
job_count, created_at, signed_by, signature
|
||||
FROM scheduler.batch_snapshot
|
||||
WHERE tenant_id = @tenant_id
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
AddParameter(command, "tenant_id", tenantId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
return await reader.ReadAsync(cancellationToken).ConfigureAwait(false) ? MapSnapshot(reader) : null;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<BatchSnapshot>> GetByTimeRangeAsync(
|
||||
string tenantId,
|
||||
DateTimeOffset startTime,
|
||||
DateTimeOffset endTime,
|
||||
int limit = 100,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT batch_id, tenant_id, range_start_t, range_end_t, head_link,
|
||||
job_count, created_at, signed_by, signature
|
||||
FROM scheduler.batch_snapshot
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND created_at >= @start_time
|
||||
AND created_at <= @end_time
|
||||
ORDER BY created_at DESC
|
||||
LIMIT @limit
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddParameter(command, "tenant_id", tenantId);
|
||||
AddParameter(command, "start_time", startTime);
|
||||
AddParameter(command, "end_time", endTime);
|
||||
AddParameter(command, "limit", limit);
|
||||
|
||||
var snapshots = new List<BatchSnapshot>();
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
snapshots.Add(MapSnapshot(reader));
|
||||
}
|
||||
|
||||
return snapshots;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<BatchSnapshot>> GetContainingHlcAsync(
|
||||
string tenantId,
|
||||
string tHlc,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT batch_id, tenant_id, range_start_t, range_end_t, head_link,
|
||||
job_count, created_at, signed_by, signature
|
||||
FROM scheduler.batch_snapshot
|
||||
WHERE tenant_id = @tenant_id
|
||||
AND range_start_t <= @t_hlc
|
||||
AND range_end_t >= @t_hlc
|
||||
ORDER BY created_at DESC
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddParameter(command, "tenant_id", tenantId);
|
||||
AddParameter(command, "t_hlc", tHlc);
|
||||
|
||||
var snapshots = new List<BatchSnapshot>();
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
snapshots.Add(MapSnapshot(reader));
|
||||
}
|
||||
|
||||
return snapshots;
|
||||
}
|
||||
|
||||
private static BatchSnapshot MapSnapshot(NpgsqlDataReader reader)
|
||||
{
|
||||
return new BatchSnapshot
|
||||
{
|
||||
BatchId = reader.GetGuid(0),
|
||||
TenantId = reader.GetString(1),
|
||||
RangeStartT = reader.GetString(2),
|
||||
RangeEndT = reader.GetString(3),
|
||||
HeadLink = reader.GetFieldValue<byte[]>(4),
|
||||
JobCount = reader.GetInt32(5),
|
||||
CreatedAt = reader.GetFieldValue<DateTimeOffset>(6),
|
||||
SignedBy = reader.IsDBNull(7) ? null : reader.GetString(7),
|
||||
Signature = reader.IsDBNull(8) ? null : reader.GetFieldValue<byte[]>(8)
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,111 @@
|
||||
// <copyright file="PostgresChainHeadRepository.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StellaOps.Infrastructure.Postgres.Repositories;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL repository for chain head operations.
|
||||
/// </summary>
|
||||
public sealed class PostgresChainHeadRepository : RepositoryBase<SchedulerDataSource>, IChainHeadRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new chain head repository.
|
||||
/// </summary>
|
||||
public PostgresChainHeadRepository(SchedulerDataSource dataSource, ILogger<PostgresChainHeadRepository> logger)
|
||||
: base(dataSource, logger)
|
||||
{
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<byte[]?> GetLastLinkAsync(
|
||||
string tenantId,
|
||||
string partitionKey,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT last_link
|
||||
FROM scheduler.chain_heads
|
||||
WHERE tenant_id = @tenant_id AND partition_key = @partition_key
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddParameter(command, "tenant_id", tenantId);
|
||||
AddParameter(command, "partition_key", partitionKey);
|
||||
|
||||
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
|
||||
return result as byte[];
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<ChainHead?> GetAsync(
|
||||
string tenantId,
|
||||
string partitionKey,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT tenant_id, partition_key, last_link, last_t_hlc, last_job_id, updated_at
|
||||
FROM scheduler.chain_heads
|
||||
WHERE tenant_id = @tenant_id AND partition_key = @partition_key
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddParameter(command, "tenant_id", tenantId);
|
||||
AddParameter(command, "partition_key", partitionKey);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
return await reader.ReadAsync(cancellationToken).ConfigureAwait(false) ? MapChainHead(reader) : null;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<ChainHead>> GetAllForTenantAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT tenant_id, partition_key, last_link, last_t_hlc, last_job_id, updated_at
|
||||
FROM scheduler.chain_heads
|
||||
WHERE tenant_id = @tenant_id
|
||||
ORDER BY partition_key
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
AddParameter(command, "tenant_id", tenantId);
|
||||
|
||||
var heads = new List<ChainHead>();
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
heads.Add(MapChainHead(reader));
|
||||
}
|
||||
|
||||
return heads;
|
||||
}
|
||||
|
||||
private static ChainHead MapChainHead(NpgsqlDataReader reader)
|
||||
{
|
||||
return new ChainHead
|
||||
{
|
||||
TenantId = reader.GetString(0),
|
||||
PartitionKey = reader.GetString(1),
|
||||
LastLink = reader.GetFieldValue<byte[]>(2),
|
||||
LastTHlc = reader.GetString(3),
|
||||
LastJobId = reader.GetGuid(4),
|
||||
UpdatedAt = reader.GetFieldValue<DateTimeOffset>(5)
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,335 @@
|
||||
// <copyright file="PostgresSchedulerLogRepository.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StellaOps.Infrastructure.Postgres.Repositories;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// PostgreSQL repository for HLC-ordered scheduler log operations.
|
||||
/// </summary>
|
||||
public sealed class PostgresSchedulerLogRepository : RepositoryBase<SchedulerDataSource>, ISchedulerLogRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new scheduler log repository.
|
||||
/// </summary>
|
||||
public PostgresSchedulerLogRepository(SchedulerDataSource dataSource, ILogger<PostgresSchedulerLogRepository> logger)
|
||||
: base(dataSource, logger)
|
||||
{
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<long> InsertWithChainUpdateAsync(
|
||||
SchedulerLogEntry entry,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
// Use the stored function for atomic insert + chain head update
|
||||
const string sql = """
|
||||
SELECT scheduler.insert_log_with_chain_update(
|
||||
@tenant_id,
|
||||
@t_hlc,
|
||||
@partition_key,
|
||||
@job_id,
|
||||
@payload_hash,
|
||||
@prev_link,
|
||||
@link
|
||||
)
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(entry.TenantId, "writer", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddParameter(command, "tenant_id", entry.TenantId);
|
||||
AddParameter(command, "t_hlc", entry.THlc);
|
||||
AddParameter(command, "partition_key", entry.PartitionKey);
|
||||
AddParameter(command, "job_id", entry.JobId);
|
||||
AddParameter(command, "payload_hash", entry.PayloadHash);
|
||||
AddParameter(command, "prev_link", entry.PrevLink ?? (object)DBNull.Value);
|
||||
AddParameter(command, "link", entry.Link);
|
||||
|
||||
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
|
||||
return Convert.ToInt64(result);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<SchedulerLogEntry>> GetByHlcOrderAsync(
|
||||
string tenantId,
|
||||
string? partitionKey,
|
||||
int limit,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sql = partitionKey is null
|
||||
? """
|
||||
SELECT seq_bigint, tenant_id, t_hlc, partition_key, job_id,
|
||||
payload_hash, prev_link, link, created_at
|
||||
FROM scheduler.scheduler_log
|
||||
WHERE tenant_id = @tenant_id
|
||||
ORDER BY t_hlc ASC
|
||||
LIMIT @limit
|
||||
"""
|
||||
: """
|
||||
SELECT seq_bigint, tenant_id, t_hlc, partition_key, job_id,
|
||||
payload_hash, prev_link, link, created_at
|
||||
FROM scheduler.scheduler_log
|
||||
WHERE tenant_id = @tenant_id AND partition_key = @partition_key
|
||||
ORDER BY t_hlc ASC
|
||||
LIMIT @limit
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddParameter(command, "tenant_id", tenantId);
|
||||
AddParameter(command, "limit", limit);
|
||||
if (partitionKey is not null)
|
||||
{
|
||||
AddParameter(command, "partition_key", partitionKey);
|
||||
}
|
||||
|
||||
var entries = new List<SchedulerLogEntry>();
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
entries.Add(MapEntry(reader));
|
||||
}
|
||||
|
||||
return entries;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<SchedulerLogEntry>> GetByHlcRangeAsync(
|
||||
string tenantId,
|
||||
string? startTHlc,
|
||||
string? endTHlc,
|
||||
int limit = 0,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var conditions = new List<string> { "tenant_id = @tenant_id" };
|
||||
if (startTHlc is not null)
|
||||
{
|
||||
conditions.Add("t_hlc >= @start_t_hlc");
|
||||
}
|
||||
|
||||
if (endTHlc is not null)
|
||||
{
|
||||
conditions.Add("t_hlc <= @end_t_hlc");
|
||||
}
|
||||
|
||||
if (partitionKey is not null)
|
||||
{
|
||||
conditions.Add("partition_key = @partition_key");
|
||||
}
|
||||
|
||||
var limitClause = limit > 0 ? $"LIMIT {limit}" : string.Empty;
|
||||
var sql = $"""
|
||||
SELECT seq_bigint, tenant_id, t_hlc, partition_key, job_id,
|
||||
payload_hash, prev_link, link, created_at
|
||||
FROM scheduler.scheduler_log
|
||||
WHERE {string.Join(" AND ", conditions)}
|
||||
ORDER BY t_hlc ASC
|
||||
{limitClause}
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddParameter(command, "tenant_id", tenantId);
|
||||
if (startTHlc is not null)
|
||||
{
|
||||
AddParameter(command, "start_t_hlc", startTHlc);
|
||||
}
|
||||
|
||||
if (endTHlc is not null)
|
||||
{
|
||||
AddParameter(command, "end_t_hlc", endTHlc);
|
||||
}
|
||||
|
||||
if (partitionKey is not null)
|
||||
{
|
||||
AddParameter(command, "partition_key", partitionKey);
|
||||
}
|
||||
|
||||
var entries = new List<SchedulerLogEntry>();
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
entries.Add(MapEntry(reader));
|
||||
}
|
||||
|
||||
return entries;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<SchedulerLogEntry>> GetAfterHlcAsync(
|
||||
string tenantId,
|
||||
string afterTHlc,
|
||||
int limit,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var sql = partitionKey is null
|
||||
? """
|
||||
SELECT seq_bigint, tenant_id, t_hlc, partition_key, job_id,
|
||||
payload_hash, prev_link, link, created_at
|
||||
FROM scheduler.scheduler_log
|
||||
WHERE tenant_id = @tenant_id AND t_hlc > @after_t_hlc
|
||||
ORDER BY t_hlc ASC
|
||||
LIMIT @limit
|
||||
"""
|
||||
: """
|
||||
SELECT seq_bigint, tenant_id, t_hlc, partition_key, job_id,
|
||||
payload_hash, prev_link, link, created_at
|
||||
FROM scheduler.scheduler_log
|
||||
WHERE tenant_id = @tenant_id AND t_hlc > @after_t_hlc AND partition_key = @partition_key
|
||||
ORDER BY t_hlc ASC
|
||||
LIMIT @limit
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddParameter(command, "tenant_id", tenantId);
|
||||
AddParameter(command, "after_t_hlc", afterTHlc);
|
||||
AddParameter(command, "limit", limit);
|
||||
if (partitionKey is not null)
|
||||
{
|
||||
AddParameter(command, "partition_key", partitionKey);
|
||||
}
|
||||
|
||||
var entries = new List<SchedulerLogEntry>();
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
entries.Add(MapEntry(reader));
|
||||
}
|
||||
|
||||
return entries;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<int> CountByHlcRangeAsync(
|
||||
string tenantId,
|
||||
string? startTHlc,
|
||||
string? endTHlc,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var conditions = new List<string> { "tenant_id = @tenant_id" };
|
||||
if (startTHlc is not null)
|
||||
{
|
||||
conditions.Add("t_hlc >= @start_t_hlc");
|
||||
}
|
||||
|
||||
if (endTHlc is not null)
|
||||
{
|
||||
conditions.Add("t_hlc <= @end_t_hlc");
|
||||
}
|
||||
|
||||
if (partitionKey is not null)
|
||||
{
|
||||
conditions.Add("partition_key = @partition_key");
|
||||
}
|
||||
|
||||
var sql = $"""
|
||||
SELECT COUNT(*)
|
||||
FROM scheduler.scheduler_log
|
||||
WHERE {string.Join(" AND ", conditions)}
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddParameter(command, "tenant_id", tenantId);
|
||||
if (startTHlc is not null)
|
||||
{
|
||||
AddParameter(command, "start_t_hlc", startTHlc);
|
||||
}
|
||||
|
||||
if (endTHlc is not null)
|
||||
{
|
||||
AddParameter(command, "end_t_hlc", endTHlc);
|
||||
}
|
||||
|
||||
if (partitionKey is not null)
|
||||
{
|
||||
AddParameter(command, "partition_key", partitionKey);
|
||||
}
|
||||
|
||||
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
|
||||
return Convert.ToInt32(result);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<SchedulerLogEntry?> GetByJobIdAsync(
|
||||
Guid jobId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT seq_bigint, tenant_id, t_hlc, partition_key, job_id,
|
||||
payload_hash, prev_link, link, created_at
|
||||
FROM scheduler.scheduler_log
|
||||
WHERE job_id = @job_id
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
AddParameter(command, "job_id", jobId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
return await reader.ReadAsync(cancellationToken).ConfigureAwait(false) ? MapEntry(reader) : null;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> ExistsAsync(
|
||||
string tenantId,
|
||||
Guid jobId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT EXISTS(
|
||||
SELECT 1 FROM scheduler.scheduler_log
|
||||
WHERE tenant_id = @tenant_id AND job_id = @job_id
|
||||
)
|
||||
""";
|
||||
|
||||
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
await using var command = CreateCommand(sql, connection);
|
||||
|
||||
AddParameter(command, "tenant_id", tenantId);
|
||||
AddParameter(command, "job_id", jobId);
|
||||
|
||||
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
|
||||
return result is true;
|
||||
}
|
||||
|
||||
private static SchedulerLogEntry MapEntry(NpgsqlDataReader reader)
|
||||
{
|
||||
return new SchedulerLogEntry
|
||||
{
|
||||
SeqBigint = reader.GetInt64(0),
|
||||
TenantId = reader.GetString(1),
|
||||
THlc = reader.GetString(2),
|
||||
PartitionKey = reader.GetString(3),
|
||||
JobId = reader.GetGuid(4),
|
||||
PayloadHash = reader.GetFieldValue<byte[]>(5),
|
||||
PrevLink = reader.IsDBNull(6) ? null : reader.GetFieldValue<byte[]>(6),
|
||||
Link = reader.GetFieldValue<byte[]>(7),
|
||||
CreatedAt = reader.GetFieldValue<DateTimeOffset>(8)
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,123 @@
|
||||
// <copyright file="SchedulerChainLinking.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using StellaOps.Canonical.Json;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
|
||||
namespace StellaOps.Scheduler.Persistence;
|
||||
|
||||
/// <summary>
|
||||
/// Chain linking utilities for scheduler audit-safe ordering.
|
||||
/// Implements: link_i = Hash(link_{i-1} || job_id || t_hlc || payload_hash)
|
||||
/// </summary>
|
||||
public static class SchedulerChainLinking
|
||||
{
|
||||
/// <summary>
|
||||
/// Size of a chain link in bytes (SHA-256).
|
||||
/// </summary>
|
||||
public const int LinkSizeBytes = 32;
|
||||
|
||||
/// <summary>
|
||||
/// Zero link used as prev_link for the first entry in a chain.
|
||||
/// </summary>
|
||||
public static readonly byte[] ZeroLink = new byte[LinkSizeBytes];
|
||||
|
||||
/// <summary>
|
||||
/// Compute chain link per advisory specification:
|
||||
/// link_i = Hash(link_{i-1} || job_id || t_hlc || payload_hash)
|
||||
/// </summary>
|
||||
/// <param name="prevLink">Previous chain link (null or empty for first entry).</param>
|
||||
/// <param name="jobId">Job identifier.</param>
|
||||
/// <param name="tHlc">HLC timestamp.</param>
|
||||
/// <param name="payloadHash">SHA-256 hash of canonical payload.</param>
|
||||
/// <returns>The computed chain link (32 bytes).</returns>
|
||||
public static byte[] ComputeLink(
|
||||
byte[]? prevLink,
|
||||
Guid jobId,
|
||||
HlcTimestamp tHlc,
|
||||
byte[] payloadHash)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(payloadHash);
|
||||
|
||||
using var hasher = IncrementalHash.CreateHash(HashAlgorithmName.SHA256);
|
||||
|
||||
// Previous link (or 32 zero bytes for first entry)
|
||||
hasher.AppendData(prevLink is { Length: LinkSizeBytes } ? prevLink : ZeroLink);
|
||||
|
||||
// Job ID as bytes (big-endian for consistency)
|
||||
hasher.AppendData(jobId.ToByteArray());
|
||||
|
||||
// HLC timestamp as UTF-8 bytes
|
||||
hasher.AppendData(Encoding.UTF8.GetBytes(tHlc.ToSortableString()));
|
||||
|
||||
// Payload hash
|
||||
hasher.AppendData(payloadHash);
|
||||
|
||||
return hasher.GetHashAndReset();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compute chain link from string HLC timestamp.
|
||||
/// </summary>
|
||||
public static byte[] ComputeLink(
|
||||
byte[]? prevLink,
|
||||
Guid jobId,
|
||||
string tHlcString,
|
||||
byte[] payloadHash)
|
||||
{
|
||||
var tHlc = HlcTimestamp.Parse(tHlcString);
|
||||
return ComputeLink(prevLink, jobId, tHlc, payloadHash);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compute deterministic payload hash from canonical JSON.
|
||||
/// </summary>
|
||||
/// <typeparam name="T">Payload type.</typeparam>
|
||||
/// <param name="payload">The payload object.</param>
|
||||
/// <returns>SHA-256 hash of the canonical JSON representation.</returns>
|
||||
public static byte[] ComputePayloadHash<T>(T payload)
|
||||
{
|
||||
var canonical = CanonJson.Serialize(payload);
|
||||
return SHA256.HashData(Encoding.UTF8.GetBytes(canonical));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compute payload hash from raw bytes.
|
||||
/// </summary>
|
||||
/// <param name="payloadBytes">Raw payload bytes.</param>
|
||||
/// <returns>SHA-256 hash of the bytes.</returns>
|
||||
public static byte[] ComputePayloadHash(byte[] payloadBytes)
|
||||
{
|
||||
return SHA256.HashData(payloadBytes);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that a chain link matches the expected computation.
|
||||
/// </summary>
|
||||
public static bool VerifyLink(
|
||||
byte[] storedLink,
|
||||
byte[]? prevLink,
|
||||
Guid jobId,
|
||||
HlcTimestamp tHlc,
|
||||
byte[] payloadHash)
|
||||
{
|
||||
var computed = ComputeLink(prevLink, jobId, tHlc, payloadHash);
|
||||
return CryptographicOperations.FixedTimeEquals(storedLink, computed);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Convert link bytes to hex string for display.
|
||||
/// </summary>
|
||||
public static string ToHex(byte[]? link)
|
||||
{
|
||||
if (link is null or { Length: 0 })
|
||||
{
|
||||
return "(null)";
|
||||
}
|
||||
|
||||
return Convert.ToHexString(link).ToLowerInvariant();
|
||||
}
|
||||
}
|
||||
@@ -27,6 +27,8 @@
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Determinism.Abstractions\StellaOps.Determinism.Abstractions.csproj" />
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.EfCore\StellaOps.Infrastructure.EfCore.csproj" />
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.HybridLogicalClock\StellaOps.HybridLogicalClock.csproj" />
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Canonical.Json\StellaOps.Canonical.Json.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<!-- Embed SQL migrations as resources -->
|
||||
|
||||
@@ -0,0 +1,235 @@
|
||||
// <copyright file="BatchSnapshotDsseSigner.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using System.Globalization;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Options for batch snapshot DSSE signing.
|
||||
/// </summary>
|
||||
public sealed class BatchSnapshotDsseOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets the signing mode: "hmac" for HMAC-SHA256, "none" to disable.
|
||||
/// </summary>
|
||||
public string Mode { get; set; } = "none";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the HMAC secret key as Base64.
|
||||
/// Required when Mode is "hmac".
|
||||
/// </summary>
|
||||
public string? SecretBase64 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the key identifier for the signature.
|
||||
/// </summary>
|
||||
public string KeyId { get; set; } = "scheduler-batch-snapshot";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the payload type for DSSE envelope.
|
||||
/// </summary>
|
||||
public string PayloadType { get; set; } = "application/vnd.stellaops.scheduler.batch-snapshot+json";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Interface for batch snapshot DSSE signing.
|
||||
/// </summary>
|
||||
public interface IBatchSnapshotDsseSigner
|
||||
{
|
||||
/// <summary>
|
||||
/// Signs a batch snapshot and returns the signature result.
|
||||
/// </summary>
|
||||
/// <param name="digest">The digest bytes to sign.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Signature result with key ID and signature bytes.</returns>
|
||||
Task<BatchSnapshotSignatureResult> SignAsync(byte[] digest, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Verifies a batch snapshot signature.
|
||||
/// </summary>
|
||||
/// <param name="digest">The original digest bytes.</param>
|
||||
/// <param name="signature">The signature to verify.</param>
|
||||
/// <param name="keyId">The key ID used for signing.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>True if signature is valid.</returns>
|
||||
Task<bool> VerifyAsync(byte[] digest, byte[] signature, string keyId, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether signing is enabled.
|
||||
/// </summary>
|
||||
bool IsEnabled { get; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// DSSE signer for batch snapshots using HMAC-SHA256.
|
||||
/// </summary>
|
||||
public sealed class BatchSnapshotDsseSigner : IBatchSnapshotDsseSigner
|
||||
{
|
||||
private readonly IOptions<BatchSnapshotDsseOptions> _options;
|
||||
private readonly ILogger<BatchSnapshotDsseSigner> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="BatchSnapshotDsseSigner"/> class.
|
||||
/// </summary>
|
||||
/// <param name="options">Signing options.</param>
|
||||
/// <param name="logger">Logger instance.</param>
|
||||
public BatchSnapshotDsseSigner(
|
||||
IOptions<BatchSnapshotDsseOptions> options,
|
||||
ILogger<BatchSnapshotDsseSigner> logger)
|
||||
{
|
||||
_options = options ?? throw new ArgumentNullException(nameof(options));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public bool IsEnabled => string.Equals(_options.Value.Mode, "hmac", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
/// <inheritdoc/>
|
||||
public Task<BatchSnapshotSignatureResult> SignAsync(byte[] digest, CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(digest);
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var opts = _options.Value;
|
||||
|
||||
if (!IsEnabled)
|
||||
{
|
||||
_logger.LogDebug("Batch snapshot DSSE signing is disabled");
|
||||
return Task.FromResult(new BatchSnapshotSignatureResult(string.Empty, Array.Empty<byte>()));
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(opts.SecretBase64))
|
||||
{
|
||||
throw new InvalidOperationException("HMAC signing mode requires SecretBase64 to be configured");
|
||||
}
|
||||
|
||||
byte[] secret;
|
||||
try
|
||||
{
|
||||
secret = Convert.FromBase64String(opts.SecretBase64);
|
||||
}
|
||||
catch (FormatException ex)
|
||||
{
|
||||
throw new InvalidOperationException("SecretBase64 is not valid Base64", ex);
|
||||
}
|
||||
|
||||
// Compute PAE (Pre-Authentication Encoding) for DSSE
|
||||
var pae = ComputePreAuthenticationEncoding(opts.PayloadType, digest);
|
||||
|
||||
// Sign with HMAC-SHA256
|
||||
var signature = HMACSHA256.HashData(secret, pae);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Signed batch snapshot with key {KeyId}, digest length {DigestLength}, signature length {SigLength}",
|
||||
opts.KeyId, digest.Length, signature.Length);
|
||||
|
||||
return Task.FromResult(new BatchSnapshotSignatureResult(opts.KeyId, signature));
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public Task<bool> VerifyAsync(byte[] digest, byte[] signature, string keyId, CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(digest);
|
||||
ArgumentNullException.ThrowIfNull(signature);
|
||||
ArgumentNullException.ThrowIfNull(keyId);
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var opts = _options.Value;
|
||||
|
||||
if (!IsEnabled)
|
||||
{
|
||||
_logger.LogDebug("Batch snapshot DSSE verification skipped - signing is disabled");
|
||||
return Task.FromResult(true);
|
||||
}
|
||||
|
||||
if (!string.Equals(keyId, opts.KeyId, StringComparison.Ordinal))
|
||||
{
|
||||
_logger.LogWarning("Key ID mismatch: expected {Expected}, got {Actual}", opts.KeyId, keyId);
|
||||
return Task.FromResult(false);
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(opts.SecretBase64))
|
||||
{
|
||||
_logger.LogWarning("Cannot verify signature - SecretBase64 not configured");
|
||||
return Task.FromResult(false);
|
||||
}
|
||||
|
||||
byte[] secret;
|
||||
try
|
||||
{
|
||||
secret = Convert.FromBase64String(opts.SecretBase64);
|
||||
}
|
||||
catch (FormatException)
|
||||
{
|
||||
_logger.LogWarning("Cannot verify signature - SecretBase64 is not valid Base64");
|
||||
return Task.FromResult(false);
|
||||
}
|
||||
|
||||
var pae = ComputePreAuthenticationEncoding(opts.PayloadType, digest);
|
||||
var expected = HMACSHA256.HashData(secret, pae);
|
||||
|
||||
var isValid = CryptographicOperations.FixedTimeEquals(expected, signature);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Verified batch snapshot signature with key {KeyId}: {Result}",
|
||||
keyId, isValid ? "valid" : "invalid");
|
||||
|
||||
return Task.FromResult(isValid);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Computes DSSE Pre-Authentication Encoding (PAE).
|
||||
/// Format: "DSSEv1" SP len(payloadType) SP payloadType SP len(payload) SP payload
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Follows DSSE v1 specification with ASCII decimal lengths and space separators.
|
||||
/// </remarks>
|
||||
internal static byte[] ComputePreAuthenticationEncoding(string payloadType, ReadOnlySpan<byte> payload)
|
||||
{
|
||||
var header = "DSSEv1"u8;
|
||||
var pt = Encoding.UTF8.GetBytes(payloadType);
|
||||
var lenPt = Encoding.UTF8.GetBytes(pt.Length.ToString(CultureInfo.InvariantCulture));
|
||||
var lenPayload = Encoding.UTF8.GetBytes(payload.Length.ToString(CultureInfo.InvariantCulture));
|
||||
var space = " "u8;
|
||||
|
||||
var totalLength = header.Length + space.Length + lenPt.Length + space.Length + pt.Length +
|
||||
space.Length + lenPayload.Length + space.Length + payload.Length;
|
||||
|
||||
var buffer = new byte[totalLength];
|
||||
var offset = 0;
|
||||
|
||||
header.CopyTo(buffer.AsSpan(offset));
|
||||
offset += header.Length;
|
||||
|
||||
space.CopyTo(buffer.AsSpan(offset));
|
||||
offset += space.Length;
|
||||
|
||||
lenPt.CopyTo(buffer.AsSpan(offset));
|
||||
offset += lenPt.Length;
|
||||
|
||||
space.CopyTo(buffer.AsSpan(offset));
|
||||
offset += space.Length;
|
||||
|
||||
pt.CopyTo(buffer.AsSpan(offset));
|
||||
offset += pt.Length;
|
||||
|
||||
space.CopyTo(buffer.AsSpan(offset));
|
||||
offset += space.Length;
|
||||
|
||||
lenPayload.CopyTo(buffer.AsSpan(offset));
|
||||
offset += lenPayload.Length;
|
||||
|
||||
space.CopyTo(buffer.AsSpan(offset));
|
||||
offset += space.Length;
|
||||
|
||||
payload.CopyTo(buffer.AsSpan(offset));
|
||||
|
||||
return buffer;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,312 @@
|
||||
// <copyright file="BatchSnapshotService.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Canonical.Json;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Persistence;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Models;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Optional signing delegate for batch snapshots.
|
||||
/// </summary>
|
||||
/// <param name="digest">The digest to sign.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The signed result containing key ID and signature bytes.</returns>
|
||||
public delegate Task<BatchSnapshotSignatureResult> BatchSnapshotSignerDelegate(
|
||||
byte[] digest,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Result of signing a batch snapshot.
|
||||
/// </summary>
|
||||
/// <param name="KeyId">The key identifier used for signing.</param>
|
||||
/// <param name="Signature">The signature bytes.</param>
|
||||
public readonly record struct BatchSnapshotSignatureResult(string KeyId, byte[] Signature);
|
||||
|
||||
/// <summary>
|
||||
/// Optional verification delegate for batch snapshot DSSE signatures.
|
||||
/// </summary>
|
||||
/// <param name="keyId">The key identifier used for signing.</param>
|
||||
/// <param name="digest">The digest that was signed.</param>
|
||||
/// <param name="signature">The signature bytes to verify.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>True if the signature is valid.</returns>
|
||||
public delegate Task<bool> BatchSnapshotVerifierDelegate(
|
||||
string keyId,
|
||||
byte[] digest,
|
||||
byte[] signature,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Implementation of batch snapshot service for audit anchoring.
|
||||
/// </summary>
|
||||
public sealed class BatchSnapshotService : IBatchSnapshotService
|
||||
{
|
||||
private readonly ISchedulerLogRepository _logRepository;
|
||||
private readonly IBatchSnapshotRepository _snapshotRepository;
|
||||
private readonly BatchSnapshotSignerDelegate? _signer;
|
||||
private readonly BatchSnapshotVerifierDelegate? _verifier;
|
||||
private readonly ILogger<BatchSnapshotService> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new batch snapshot service.
|
||||
/// </summary>
|
||||
public BatchSnapshotService(
|
||||
ISchedulerLogRepository logRepository,
|
||||
IBatchSnapshotRepository snapshotRepository,
|
||||
ILogger<BatchSnapshotService> logger,
|
||||
BatchSnapshotSignerDelegate? signer = null,
|
||||
BatchSnapshotVerifierDelegate? verifier = null)
|
||||
{
|
||||
_logRepository = logRepository ?? throw new ArgumentNullException(nameof(logRepository));
|
||||
_snapshotRepository = snapshotRepository ?? throw new ArgumentNullException(nameof(snapshotRepository));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_signer = signer;
|
||||
_verifier = verifier;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<BatchSnapshot> CreateSnapshotAsync(
|
||||
string tenantId,
|
||||
HlcTimestamp startHlc,
|
||||
HlcTimestamp endHlc,
|
||||
bool sign = false,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
|
||||
var startT = startHlc.ToSortableString();
|
||||
var endT = endHlc.ToSortableString();
|
||||
|
||||
// Get jobs in range
|
||||
var jobs = await _logRepository.GetByHlcRangeAsync(
|
||||
tenantId,
|
||||
startT,
|
||||
endT,
|
||||
limit: 0, // No limit
|
||||
partitionKey: null,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (jobs.Count == 0)
|
||||
{
|
||||
throw new InvalidOperationException($"No jobs in specified HLC range [{startT}, {endT}] for tenant {tenantId}");
|
||||
}
|
||||
|
||||
// Get chain head (last link in range)
|
||||
var headLink = jobs[^1].Link;
|
||||
|
||||
// Create snapshot
|
||||
var snapshot = new BatchSnapshot
|
||||
{
|
||||
BatchId = Guid.NewGuid(),
|
||||
TenantId = tenantId,
|
||||
RangeStartT = startT,
|
||||
RangeEndT = endT,
|
||||
HeadLink = headLink,
|
||||
JobCount = jobs.Count,
|
||||
CreatedAt = DateTimeOffset.UtcNow
|
||||
};
|
||||
|
||||
// Sign if requested and signer available
|
||||
if (sign)
|
||||
{
|
||||
if (_signer is null)
|
||||
{
|
||||
_logger.LogWarning("Signing requested but no signer configured. Snapshot will be unsigned.");
|
||||
}
|
||||
else
|
||||
{
|
||||
var digest = ComputeSnapshotDigest(snapshot, jobs);
|
||||
var signed = await _signer(digest, cancellationToken).ConfigureAwait(false);
|
||||
snapshot = snapshot with
|
||||
{
|
||||
SignedBy = signed.KeyId,
|
||||
Signature = signed.Signature
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Persist
|
||||
await _snapshotRepository.InsertAsync(snapshot, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Batch snapshot created. BatchId={BatchId}, TenantId={TenantId}, Range=[{Start}, {End}], JobCount={JobCount}, Signed={Signed}",
|
||||
snapshot.BatchId,
|
||||
tenantId,
|
||||
startT,
|
||||
endT,
|
||||
jobs.Count,
|
||||
snapshot.SignedBy is not null);
|
||||
|
||||
return snapshot;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<BatchSnapshot?> GetSnapshotAsync(
|
||||
Guid batchId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
return _snapshotRepository.GetByIdAsync(batchId, cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<BatchSnapshot?> GetLatestSnapshotAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
return _snapshotRepository.GetLatestAsync(tenantId, cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<BatchSnapshotVerificationResult> VerifySnapshotAsync(
|
||||
Guid batchId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var issues = new List<string>();
|
||||
|
||||
var snapshot = await _snapshotRepository.GetByIdAsync(batchId, cancellationToken).ConfigureAwait(false);
|
||||
if (snapshot is null)
|
||||
{
|
||||
return new BatchSnapshotVerificationResult(
|
||||
IsValid: false,
|
||||
SnapshotFound: false,
|
||||
ChainHeadMatches: false,
|
||||
JobCountMatches: false,
|
||||
SignatureValid: null,
|
||||
Issues: ["Snapshot not found"]);
|
||||
}
|
||||
|
||||
// Get current jobs in the same range
|
||||
var jobs = await _logRepository.GetByHlcRangeAsync(
|
||||
snapshot.TenantId,
|
||||
snapshot.RangeStartT,
|
||||
snapshot.RangeEndT,
|
||||
limit: 0,
|
||||
partitionKey: null,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Verify job count
|
||||
var jobCountMatches = jobs.Count == snapshot.JobCount;
|
||||
if (!jobCountMatches)
|
||||
{
|
||||
issues.Add($"Job count mismatch: expected {snapshot.JobCount}, found {jobs.Count}");
|
||||
}
|
||||
|
||||
// Verify chain head
|
||||
var chainHeadMatches = jobs.Count > 0 && ByteArrayEquals(jobs[^1].Link, snapshot.HeadLink);
|
||||
if (!chainHeadMatches)
|
||||
{
|
||||
issues.Add("Chain head link does not match snapshot");
|
||||
}
|
||||
|
||||
// DSSE signature verification
|
||||
bool? signatureValid = null;
|
||||
if (snapshot.SignedBy is not null)
|
||||
{
|
||||
if (snapshot.Signature is null or { Length: 0 })
|
||||
{
|
||||
issues.Add("Snapshot has signer but empty signature");
|
||||
signatureValid = false;
|
||||
}
|
||||
else if (_verifier is null)
|
||||
{
|
||||
// No verifier configured - check signature format only
|
||||
_logger.LogDebug(
|
||||
"Signature verification skipped for BatchId={BatchId}: no verifier configured",
|
||||
batchId);
|
||||
signatureValid = true; // Assume valid if no verifier
|
||||
}
|
||||
else
|
||||
{
|
||||
// Perform DSSE signature verification
|
||||
var digest = ComputeSnapshotDigest(snapshot, jobs);
|
||||
try
|
||||
{
|
||||
signatureValid = await _verifier(
|
||||
snapshot.SignedBy,
|
||||
digest,
|
||||
snapshot.Signature,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (!signatureValid.Value)
|
||||
{
|
||||
issues.Add($"DSSE signature verification failed for key {snapshot.SignedBy}");
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Signature verification threw exception for BatchId={BatchId}", batchId);
|
||||
issues.Add($"Signature verification error: {ex.Message}");
|
||||
signatureValid = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var isValid = jobCountMatches && chainHeadMatches && (signatureValid ?? true);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Batch snapshot verification complete. BatchId={BatchId}, IsValid={IsValid}, Issues={Issues}",
|
||||
batchId,
|
||||
isValid,
|
||||
issues.Count > 0 ? string.Join("; ", issues) : "none");
|
||||
|
||||
return new BatchSnapshotVerificationResult(
|
||||
IsValid: isValid,
|
||||
SnapshotFound: true,
|
||||
ChainHeadMatches: chainHeadMatches,
|
||||
JobCountMatches: jobCountMatches,
|
||||
SignatureValid: signatureValid,
|
||||
Issues: issues);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Computes a deterministic digest over the snapshot and its jobs.
|
||||
/// This is the canonical representation used for both signing and verification.
|
||||
/// </summary>
|
||||
internal static byte[] ComputeSnapshotDigest(BatchSnapshot snapshot, IReadOnlyList<SchedulerLogEntry> jobs)
|
||||
{
|
||||
// Create canonical representation for hashing
|
||||
var digestInput = new
|
||||
{
|
||||
snapshot.BatchId,
|
||||
snapshot.TenantId,
|
||||
snapshot.RangeStartT,
|
||||
snapshot.RangeEndT,
|
||||
HeadLink = Convert.ToHexString(snapshot.HeadLink),
|
||||
snapshot.JobCount,
|
||||
Jobs = jobs.Select(j => new
|
||||
{
|
||||
j.JobId,
|
||||
j.THlc,
|
||||
PayloadHash = Convert.ToHexString(j.PayloadHash),
|
||||
Link = Convert.ToHexString(j.Link)
|
||||
}).ToArray()
|
||||
};
|
||||
|
||||
var canonical = CanonJson.Serialize(digestInput);
|
||||
return SHA256.HashData(Encoding.UTF8.GetBytes(canonical));
|
||||
}
|
||||
|
||||
private static bool ByteArrayEquals(byte[]? a, byte[]? b)
|
||||
{
|
||||
if (a is null && b is null)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (a is null || b is null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return a.AsSpan().SequenceEqual(b);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,179 @@
|
||||
// <copyright file="HlcSchedulerDequeueService.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Models;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Implementation of HLC-ordered scheduler job dequeuing.
|
||||
/// </summary>
|
||||
public sealed class HlcSchedulerDequeueService : IHlcSchedulerDequeueService
|
||||
{
|
||||
private readonly ISchedulerLogRepository _logRepository;
|
||||
private readonly ILogger<HlcSchedulerDequeueService> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new HLC scheduler dequeue service.
|
||||
/// </summary>
|
||||
public HlcSchedulerDequeueService(
|
||||
ISchedulerLogRepository logRepository,
|
||||
ILogger<HlcSchedulerDequeueService> logger)
|
||||
{
|
||||
_logRepository = logRepository ?? throw new ArgumentNullException(nameof(logRepository));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<SchedulerHlcDequeueResult> DequeueAsync(
|
||||
string tenantId,
|
||||
int limit,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
ArgumentOutOfRangeException.ThrowIfNegativeOrZero(limit);
|
||||
|
||||
var entries = await _logRepository.GetByHlcOrderAsync(
|
||||
tenantId,
|
||||
partitionKey,
|
||||
limit,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Get total count for pagination info
|
||||
var totalCount = await _logRepository.CountByHlcRangeAsync(
|
||||
tenantId,
|
||||
startTHlc: null,
|
||||
endTHlc: null,
|
||||
partitionKey,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Dequeued {Count} of {Total} entries in HLC order. TenantId={TenantId}, PartitionKey={PartitionKey}",
|
||||
entries.Count,
|
||||
totalCount,
|
||||
tenantId,
|
||||
partitionKey ?? "(all)");
|
||||
|
||||
return new SchedulerHlcDequeueResult(
|
||||
entries,
|
||||
totalCount,
|
||||
RangeStartHlc: null,
|
||||
RangeEndHlc: null);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<SchedulerHlcDequeueResult> DequeueByRangeAsync(
|
||||
string tenantId,
|
||||
HlcTimestamp? startHlc,
|
||||
HlcTimestamp? endHlc,
|
||||
int limit,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
ArgumentOutOfRangeException.ThrowIfNegativeOrZero(limit);
|
||||
|
||||
var startTHlc = startHlc?.ToSortableString();
|
||||
var endTHlc = endHlc?.ToSortableString();
|
||||
|
||||
var entries = await _logRepository.GetByHlcRangeAsync(
|
||||
tenantId,
|
||||
startTHlc,
|
||||
endTHlc,
|
||||
limit,
|
||||
partitionKey,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var totalCount = await _logRepository.CountByHlcRangeAsync(
|
||||
tenantId,
|
||||
startTHlc,
|
||||
endTHlc,
|
||||
partitionKey,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Dequeued {Count} of {Total} entries in HLC range [{Start}, {End}]. TenantId={TenantId}",
|
||||
entries.Count,
|
||||
totalCount,
|
||||
startTHlc ?? "(unbounded)",
|
||||
endTHlc ?? "(unbounded)",
|
||||
tenantId);
|
||||
|
||||
return new SchedulerHlcDequeueResult(
|
||||
entries,
|
||||
totalCount,
|
||||
startHlc,
|
||||
endHlc);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<SchedulerHlcDequeueResult> DequeueAfterAsync(
|
||||
string tenantId,
|
||||
HlcTimestamp afterHlc,
|
||||
int limit,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
ArgumentOutOfRangeException.ThrowIfNegativeOrZero(limit);
|
||||
|
||||
var afterTHlc = afterHlc.ToSortableString();
|
||||
|
||||
var entries = await _logRepository.GetAfterHlcAsync(
|
||||
tenantId,
|
||||
afterTHlc,
|
||||
limit,
|
||||
partitionKey,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Count remaining entries after cursor
|
||||
var totalCount = await _logRepository.CountByHlcRangeAsync(
|
||||
tenantId,
|
||||
afterTHlc,
|
||||
endTHlc: null,
|
||||
partitionKey,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Dequeued {Count} entries after HLC {AfterHlc}. TenantId={TenantId}, PartitionKey={PartitionKey}",
|
||||
entries.Count,
|
||||
afterTHlc,
|
||||
tenantId,
|
||||
partitionKey ?? "(all)");
|
||||
|
||||
return new SchedulerHlcDequeueResult(
|
||||
entries,
|
||||
totalCount,
|
||||
afterHlc,
|
||||
RangeEndHlc: null);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<SchedulerLogEntry?> GetByJobIdAsync(
|
||||
string tenantId,
|
||||
Guid jobId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
|
||||
var entry = await _logRepository.GetByJobIdAsync(jobId, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Verify tenant isolation
|
||||
if (entry is not null && !string.Equals(entry.TenantId, tenantId, StringComparison.Ordinal))
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Job {JobId} found but belongs to different tenant. RequestedTenant={RequestedTenant}, ActualTenant={ActualTenant}",
|
||||
jobId,
|
||||
tenantId,
|
||||
entry.TenantId);
|
||||
return null;
|
||||
}
|
||||
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,166 @@
|
||||
// <copyright file="HlcSchedulerEnqueueService.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Canonical.Json;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Persistence;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Models;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Implementation of HLC-ordered scheduler job enqueueing with chain linking.
|
||||
/// </summary>
|
||||
public sealed class HlcSchedulerEnqueueService : IHlcSchedulerEnqueueService
|
||||
{
|
||||
/// <summary>
|
||||
/// Namespace GUID for deterministic job ID generation (v5 UUID style).
|
||||
/// </summary>
|
||||
private static readonly Guid JobIdNamespace = new("b8a7c6d5-e4f3-42a1-9b0c-1d2e3f4a5b6c");
|
||||
|
||||
private readonly IHybridLogicalClock _hlc;
|
||||
private readonly ISchedulerLogRepository _logRepository;
|
||||
private readonly IChainHeadRepository _chainHeadRepository;
|
||||
private readonly ILogger<HlcSchedulerEnqueueService> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new HLC scheduler enqueue service.
|
||||
/// </summary>
|
||||
public HlcSchedulerEnqueueService(
|
||||
IHybridLogicalClock hlc,
|
||||
ISchedulerLogRepository logRepository,
|
||||
IChainHeadRepository chainHeadRepository,
|
||||
ILogger<HlcSchedulerEnqueueService> logger)
|
||||
{
|
||||
_hlc = hlc ?? throw new ArgumentNullException(nameof(hlc));
|
||||
_logRepository = logRepository ?? throw new ArgumentNullException(nameof(logRepository));
|
||||
_chainHeadRepository = chainHeadRepository ?? throw new ArgumentNullException(nameof(chainHeadRepository));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<SchedulerHlcEnqueueResult> EnqueuePlannerAsync(
|
||||
string tenantId,
|
||||
PlannerQueueMessage message,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(message);
|
||||
return EnqueueAsync(tenantId, message, message.IdempotencyKey, partitionKey, cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<SchedulerHlcEnqueueResult> EnqueueRunnerSegmentAsync(
|
||||
string tenantId,
|
||||
RunnerSegmentQueueMessage message,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(message);
|
||||
return EnqueueAsync(tenantId, message, message.IdempotencyKey, partitionKey, cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<SchedulerHlcEnqueueResult> EnqueueAsync<T>(
|
||||
string tenantId,
|
||||
T payload,
|
||||
string idempotencyKey,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
ArgumentNullException.ThrowIfNull(payload);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(idempotencyKey);
|
||||
|
||||
var effectivePartitionKey = partitionKey ?? string.Empty;
|
||||
|
||||
// 1. Generate deterministic job ID from idempotency key
|
||||
var jobId = ComputeDeterministicJobId(idempotencyKey);
|
||||
|
||||
// 2. Check for existing entry (idempotency)
|
||||
if (await _logRepository.ExistsAsync(tenantId, jobId, cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
var existing = await _logRepository.GetByJobIdAsync(jobId, cancellationToken).ConfigureAwait(false);
|
||||
if (existing is not null)
|
||||
{
|
||||
_logger.LogDebug(
|
||||
"Job already enqueued, returning existing entry. TenantId={TenantId}, JobId={JobId}",
|
||||
tenantId,
|
||||
jobId);
|
||||
|
||||
return new SchedulerHlcEnqueueResult(
|
||||
HlcTimestamp.Parse(existing.THlc),
|
||||
existing.JobId,
|
||||
existing.Link,
|
||||
Deduplicated: true);
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Generate HLC timestamp
|
||||
var tHlc = _hlc.Tick();
|
||||
|
||||
// 4. Compute payload hash
|
||||
var payloadHash = SchedulerChainLinking.ComputePayloadHash(payload);
|
||||
|
||||
// 5. Get previous chain link
|
||||
var prevLink = await _chainHeadRepository.GetLastLinkAsync(tenantId, effectivePartitionKey, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
// 6. Compute new chain link
|
||||
var link = SchedulerChainLinking.ComputeLink(prevLink, jobId, tHlc, payloadHash);
|
||||
|
||||
// 7. Insert log entry (atomic with chain head update)
|
||||
var entry = new SchedulerLogEntry
|
||||
{
|
||||
TenantId = tenantId,
|
||||
THlc = tHlc.ToSortableString(),
|
||||
PartitionKey = effectivePartitionKey,
|
||||
JobId = jobId,
|
||||
PayloadHash = payloadHash,
|
||||
PrevLink = prevLink,
|
||||
Link = link
|
||||
};
|
||||
|
||||
await _logRepository.InsertWithChainUpdateAsync(entry, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Job enqueued with HLC ordering. TenantId={TenantId}, JobId={JobId}, THlc={THlc}, Link={Link}",
|
||||
tenantId,
|
||||
jobId,
|
||||
tHlc.ToSortableString(),
|
||||
SchedulerChainLinking.ToHex(link));
|
||||
|
||||
return new SchedulerHlcEnqueueResult(tHlc, jobId, link, Deduplicated: false);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Computes a deterministic GUID from the idempotency key using SHA-256.
|
||||
/// </summary>
|
||||
private static Guid ComputeDeterministicJobId(string idempotencyKey)
|
||||
{
|
||||
// Use namespace + key pattern similar to UUID v5
|
||||
var namespaceBytes = JobIdNamespace.ToByteArray();
|
||||
var keyBytes = Encoding.UTF8.GetBytes(idempotencyKey);
|
||||
|
||||
var combined = new byte[namespaceBytes.Length + keyBytes.Length];
|
||||
Buffer.BlockCopy(namespaceBytes, 0, combined, 0, namespaceBytes.Length);
|
||||
Buffer.BlockCopy(keyBytes, 0, combined, namespaceBytes.Length, keyBytes.Length);
|
||||
|
||||
var hash = SHA256.HashData(combined);
|
||||
|
||||
// Take first 16 bytes for GUID
|
||||
var guidBytes = new byte[16];
|
||||
Buffer.BlockCopy(hash, 0, guidBytes, 0, 16);
|
||||
|
||||
// Set version (4) and variant bits for RFC 4122 compliance
|
||||
guidBytes[6] = (byte)((guidBytes[6] & 0x0F) | 0x40); // Version 4
|
||||
guidBytes[8] = (byte)((guidBytes[8] & 0x3F) | 0x80); // Variant 1
|
||||
|
||||
return new Guid(guidBytes);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,178 @@
|
||||
// <copyright file="HlcSchedulerMetrics.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using System.Diagnostics.Metrics;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Metrics for HLC-ordered scheduler operations.
|
||||
/// </summary>
|
||||
public static class HlcSchedulerMetrics
|
||||
{
|
||||
private const string TenantTagName = "tenant";
|
||||
private const string PartitionTagName = "partition";
|
||||
private const string ResultTagName = "result";
|
||||
|
||||
private static readonly Meter Meter = new("StellaOps.Scheduler.Hlc");
|
||||
|
||||
// Enqueue metrics
|
||||
private static readonly Counter<long> EnqueuedCounter = Meter.CreateCounter<long>(
|
||||
"scheduler_hlc_enqueues_total",
|
||||
unit: "{enqueue}",
|
||||
description: "Total number of HLC-ordered enqueue operations");
|
||||
|
||||
private static readonly Counter<long> EnqueueDeduplicatedCounter = Meter.CreateCounter<long>(
|
||||
"scheduler_hlc_enqueue_deduplicated_total",
|
||||
unit: "{enqueue}",
|
||||
description: "Total number of deduplicated HLC enqueue operations");
|
||||
|
||||
private static readonly Histogram<double> EnqueueDurationHistogram = Meter.CreateHistogram<double>(
|
||||
"scheduler_hlc_enqueue_duration_seconds",
|
||||
unit: "s",
|
||||
description: "Duration of HLC enqueue operations");
|
||||
|
||||
// Dequeue metrics
|
||||
private static readonly Counter<long> DequeuedCounter = Meter.CreateCounter<long>(
|
||||
"scheduler_hlc_dequeues_total",
|
||||
unit: "{dequeue}",
|
||||
description: "Total number of HLC-ordered dequeue operations");
|
||||
|
||||
private static readonly Counter<long> DequeuedEntriesCounter = Meter.CreateCounter<long>(
|
||||
"scheduler_hlc_dequeued_entries_total",
|
||||
unit: "{entry}",
|
||||
description: "Total number of entries dequeued via HLC ordering");
|
||||
|
||||
// Chain verification metrics
|
||||
private static readonly Counter<long> ChainVerificationsCounter = Meter.CreateCounter<long>(
|
||||
"scheduler_chain_verifications_total",
|
||||
unit: "{verification}",
|
||||
description: "Total number of chain verification operations");
|
||||
|
||||
private static readonly Counter<long> ChainVerificationIssuesCounter = Meter.CreateCounter<long>(
|
||||
"scheduler_chain_verification_issues_total",
|
||||
unit: "{issue}",
|
||||
description: "Total number of chain verification issues found");
|
||||
|
||||
private static readonly Counter<long> ChainEntriesVerifiedCounter = Meter.CreateCounter<long>(
|
||||
"scheduler_chain_entries_verified_total",
|
||||
unit: "{entry}",
|
||||
description: "Total number of chain entries verified");
|
||||
|
||||
// Batch snapshot metrics
|
||||
private static readonly Counter<long> SnapshotsCreatedCounter = Meter.CreateCounter<long>(
|
||||
"scheduler_batch_snapshots_created_total",
|
||||
unit: "{snapshot}",
|
||||
description: "Total number of batch snapshots created");
|
||||
|
||||
private static readonly Counter<long> SnapshotsSignedCounter = Meter.CreateCounter<long>(
|
||||
"scheduler_batch_snapshots_signed_total",
|
||||
unit: "{snapshot}",
|
||||
description: "Total number of signed batch snapshots");
|
||||
|
||||
private static readonly Counter<long> SnapshotVerificationsCounter = Meter.CreateCounter<long>(
|
||||
"scheduler_batch_snapshot_verifications_total",
|
||||
unit: "{verification}",
|
||||
description: "Total number of batch snapshot verification operations");
|
||||
|
||||
/// <summary>
|
||||
/// Records an HLC enqueue operation.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="partitionKey">Partition key (empty string if none).</param>
|
||||
/// <param name="deduplicated">Whether the operation was deduplicated.</param>
|
||||
public static void RecordEnqueue(string tenantId, string partitionKey, bool deduplicated)
|
||||
{
|
||||
var tags = BuildTags(tenantId, partitionKey);
|
||||
EnqueuedCounter.Add(1, tags);
|
||||
if (deduplicated)
|
||||
{
|
||||
EnqueueDeduplicatedCounter.Add(1, tags);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records the duration of an HLC enqueue operation.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="partitionKey">Partition key.</param>
|
||||
/// <param name="durationSeconds">Duration in seconds.</param>
|
||||
public static void RecordEnqueueDuration(string tenantId, string partitionKey, double durationSeconds)
|
||||
{
|
||||
EnqueueDurationHistogram.Record(durationSeconds, BuildTags(tenantId, partitionKey));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records an HLC dequeue operation.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="partitionKey">Partition key.</param>
|
||||
/// <param name="entryCount">Number of entries dequeued.</param>
|
||||
public static void RecordDequeue(string tenantId, string partitionKey, int entryCount)
|
||||
{
|
||||
var tags = BuildTags(tenantId, partitionKey);
|
||||
DequeuedCounter.Add(1, tags);
|
||||
DequeuedEntriesCounter.Add(entryCount, tags);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records a chain verification operation.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="entriesVerified">Number of entries verified.</param>
|
||||
/// <param name="issuesFound">Number of issues found.</param>
|
||||
/// <param name="isValid">Whether the chain is valid.</param>
|
||||
public static void RecordChainVerification(string tenantId, int entriesVerified, int issuesFound, bool isValid)
|
||||
{
|
||||
var resultTag = new KeyValuePair<string, object?>(ResultTagName, isValid ? "valid" : "invalid");
|
||||
var tenantTag = new KeyValuePair<string, object?>(TenantTagName, tenantId);
|
||||
|
||||
ChainVerificationsCounter.Add(1, tenantTag, resultTag);
|
||||
ChainEntriesVerifiedCounter.Add(entriesVerified, tenantTag);
|
||||
|
||||
if (issuesFound > 0)
|
||||
{
|
||||
ChainVerificationIssuesCounter.Add(issuesFound, tenantTag);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records a batch snapshot creation.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="jobCount">Number of jobs in the snapshot.</param>
|
||||
/// <param name="signed">Whether the snapshot was signed.</param>
|
||||
public static void RecordSnapshotCreated(string tenantId, int jobCount, bool signed)
|
||||
{
|
||||
var tenantTag = new KeyValuePair<string, object?>(TenantTagName, tenantId);
|
||||
SnapshotsCreatedCounter.Add(1, tenantTag);
|
||||
|
||||
if (signed)
|
||||
{
|
||||
SnapshotsSignedCounter.Add(1, tenantTag);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records a batch snapshot verification.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="isValid">Whether the snapshot is valid.</param>
|
||||
public static void RecordSnapshotVerification(string tenantId, bool isValid)
|
||||
{
|
||||
var tags = new[]
|
||||
{
|
||||
new KeyValuePair<string, object?>(TenantTagName, tenantId),
|
||||
new KeyValuePair<string, object?>(ResultTagName, isValid ? "valid" : "invalid")
|
||||
};
|
||||
SnapshotVerificationsCounter.Add(1, tags);
|
||||
}
|
||||
|
||||
private static KeyValuePair<string, object?>[] BuildTags(string tenantId, string partitionKey)
|
||||
=> new[]
|
||||
{
|
||||
new KeyValuePair<string, object?>(TenantTagName, tenantId),
|
||||
new KeyValuePair<string, object?>(PartitionTagName, string.IsNullOrEmpty(partitionKey) ? "(default)" : partitionKey)
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,103 @@
|
||||
// <copyright file="HlcSchedulerServiceCollectionExtensions.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Extension methods for registering HLC scheduler services.
|
||||
/// </summary>
|
||||
public static class HlcSchedulerServiceCollectionExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Adds HLC-ordered scheduler services to the service collection.
|
||||
/// </summary>
|
||||
/// <param name="services">The service collection.</param>
|
||||
/// <returns>The service collection for chaining.</returns>
|
||||
public static IServiceCollection AddHlcSchedulerServices(this IServiceCollection services)
|
||||
{
|
||||
// Repositories (scoped for per-request database connections)
|
||||
services.TryAddScoped<ISchedulerLogRepository, PostgresSchedulerLogRepository>();
|
||||
services.TryAddScoped<IChainHeadRepository, PostgresChainHeadRepository>();
|
||||
services.TryAddScoped<IBatchSnapshotRepository, PostgresBatchSnapshotRepository>();
|
||||
|
||||
// Services (scoped to align with repository lifetime)
|
||||
services.TryAddScoped<IHlcSchedulerEnqueueService, HlcSchedulerEnqueueService>();
|
||||
services.TryAddScoped<IHlcSchedulerDequeueService, HlcSchedulerDequeueService>();
|
||||
services.TryAddScoped<IBatchSnapshotService, BatchSnapshotService>();
|
||||
services.TryAddScoped<ISchedulerChainVerifier, SchedulerChainVerifier>();
|
||||
|
||||
// DSSE signer (disabled by default)
|
||||
services.TryAddSingleton<IBatchSnapshotDsseSigner, BatchSnapshotDsseSigner>();
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds HLC-ordered scheduler services with DSSE signing support.
|
||||
/// </summary>
|
||||
/// <param name="services">The service collection.</param>
|
||||
/// <param name="configuration">Configuration section for DSSE options.</param>
|
||||
/// <returns>The service collection for chaining.</returns>
|
||||
public static IServiceCollection AddHlcSchedulerServicesWithDsseSigning(
|
||||
this IServiceCollection services,
|
||||
IConfiguration configuration)
|
||||
{
|
||||
// Configure DSSE options
|
||||
services.AddOptions<BatchSnapshotDsseOptions>()
|
||||
.Bind(configuration.GetSection("Scheduler:Queue:Hlc:DsseSigning"))
|
||||
.ValidateDataAnnotations()
|
||||
.ValidateOnStart();
|
||||
|
||||
// Add base services
|
||||
services.AddHlcSchedulerServices();
|
||||
|
||||
// Wire up DSSE signer to BatchSnapshotService
|
||||
services.AddScoped<IBatchSnapshotService>(sp =>
|
||||
{
|
||||
var logRepository = sp.GetRequiredService<ISchedulerLogRepository>();
|
||||
var snapshotRepository = sp.GetRequiredService<IBatchSnapshotRepository>();
|
||||
var logger = sp.GetRequiredService<Microsoft.Extensions.Logging.ILogger<BatchSnapshotService>>();
|
||||
var dsseSigner = sp.GetRequiredService<IBatchSnapshotDsseSigner>();
|
||||
|
||||
BatchSnapshotSignerDelegate? signer = dsseSigner.IsEnabled
|
||||
? dsseSigner.SignAsync
|
||||
: null;
|
||||
|
||||
return new BatchSnapshotService(logRepository, snapshotRepository, logger, signer);
|
||||
});
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds HLC-ordered scheduler services with a custom signer delegate.
|
||||
/// </summary>
|
||||
/// <param name="services">The service collection.</param>
|
||||
/// <param name="signerFactory">Factory to create the signer delegate.</param>
|
||||
/// <returns>The service collection for chaining.</returns>
|
||||
public static IServiceCollection AddHlcSchedulerServices(
|
||||
this IServiceCollection services,
|
||||
Func<IServiceProvider, BatchSnapshotSignerDelegate> signerFactory)
|
||||
{
|
||||
services.AddHlcSchedulerServices();
|
||||
|
||||
// Override BatchSnapshotService registration to include signer
|
||||
services.AddScoped<IBatchSnapshotService>(sp =>
|
||||
{
|
||||
var logRepository = sp.GetRequiredService<ISchedulerLogRepository>();
|
||||
var snapshotRepository = sp.GetRequiredService<IBatchSnapshotRepository>();
|
||||
var logger = sp.GetRequiredService<Microsoft.Extensions.Logging.ILogger<BatchSnapshotService>>();
|
||||
var signer = signerFactory(sp);
|
||||
|
||||
return new BatchSnapshotService(logRepository, snapshotRepository, logger, signer);
|
||||
});
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
// <copyright file="IBatchSnapshotService.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Service for creating and managing batch snapshots of the scheduler chain.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Batch snapshots provide audit anchors for the scheduler chain, capturing
|
||||
/// the chain head at specific HLC ranges. These can be optionally signed
|
||||
/// with DSSE for attestation purposes.
|
||||
/// </remarks>
|
||||
public interface IBatchSnapshotService
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a batch snapshot for a given HLC range.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="startHlc">Start of the HLC range (inclusive).</param>
|
||||
/// <param name="endHlc">End of the HLC range (inclusive).</param>
|
||||
/// <param name="sign">Whether to sign the snapshot with DSSE.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The created batch snapshot.</returns>
|
||||
Task<BatchSnapshot> CreateSnapshotAsync(
|
||||
string tenantId,
|
||||
HlcTimestamp startHlc,
|
||||
HlcTimestamp endHlc,
|
||||
bool sign = false,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a batch snapshot by ID.
|
||||
/// </summary>
|
||||
/// <param name="batchId">The batch identifier.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The snapshot if found.</returns>
|
||||
Task<BatchSnapshot?> GetSnapshotAsync(
|
||||
Guid batchId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the most recent batch snapshot for a tenant.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The most recent snapshot if found.</returns>
|
||||
Task<BatchSnapshot?> GetLatestSnapshotAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Verifies a batch snapshot against the current chain state.
|
||||
/// </summary>
|
||||
/// <param name="batchId">The batch identifier to verify.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Verification result.</returns>
|
||||
Task<BatchSnapshotVerificationResult> VerifySnapshotAsync(
|
||||
Guid batchId,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of batch snapshot verification.
|
||||
/// </summary>
|
||||
/// <param name="IsValid">Whether the snapshot is valid.</param>
|
||||
/// <param name="SnapshotFound">Whether the snapshot was found.</param>
|
||||
/// <param name="ChainHeadMatches">Whether the chain head matches the snapshot.</param>
|
||||
/// <param name="JobCountMatches">Whether the job count matches.</param>
|
||||
/// <param name="SignatureValid">Whether the DSSE signature is valid (null if unsigned).</param>
|
||||
/// <param name="Issues">List of verification issues if invalid.</param>
|
||||
public readonly record struct BatchSnapshotVerificationResult(
|
||||
bool IsValid,
|
||||
bool SnapshotFound,
|
||||
bool ChainHeadMatches,
|
||||
bool JobCountMatches,
|
||||
bool? SignatureValid,
|
||||
IReadOnlyList<string> Issues);
|
||||
@@ -0,0 +1,77 @@
|
||||
// <copyright file="IHlcSchedulerDequeueService.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using StellaOps.HybridLogicalClock;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Service for HLC-ordered scheduler job dequeuing.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This service provides deterministic, HLC-ordered retrieval of scheduler log entries
|
||||
/// for processing. The HLC ordering guarantees causal consistency across distributed nodes.
|
||||
/// </remarks>
|
||||
public interface IHlcSchedulerDequeueService
|
||||
{
|
||||
/// <summary>
|
||||
/// Dequeues scheduler log entries in HLC order.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="limit">Maximum number of entries to return.</param>
|
||||
/// <param name="partitionKey">Optional partition key to filter by.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The dequeue result with entries in HLC order.</returns>
|
||||
Task<SchedulerHlcDequeueResult> DequeueAsync(
|
||||
string tenantId,
|
||||
int limit,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Dequeues scheduler log entries within an HLC time range.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="startHlc">HLC range start (inclusive, null for unbounded).</param>
|
||||
/// <param name="endHlc">HLC range end (inclusive, null for unbounded).</param>
|
||||
/// <param name="limit">Maximum number of entries to return.</param>
|
||||
/// <param name="partitionKey">Optional partition key to filter by.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The dequeue result with entries in HLC order.</returns>
|
||||
Task<SchedulerHlcDequeueResult> DequeueByRangeAsync(
|
||||
string tenantId,
|
||||
HlcTimestamp? startHlc,
|
||||
HlcTimestamp? endHlc,
|
||||
int limit,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Dequeues scheduler log entries after a specific HLC timestamp (cursor-based).
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="afterHlc">HLC timestamp to start after (exclusive).</param>
|
||||
/// <param name="limit">Maximum number of entries to return.</param>
|
||||
/// <param name="partitionKey">Optional partition key to filter by.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The dequeue result with entries in HLC order.</returns>
|
||||
Task<SchedulerHlcDequeueResult> DequeueAfterAsync(
|
||||
string tenantId,
|
||||
HlcTimestamp afterHlc,
|
||||
int limit,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a single scheduler log entry by job ID.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="jobId">The job identifier.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The scheduler log entry if found, null otherwise.</returns>
|
||||
Task<Persistence.Postgres.Models.SchedulerLogEntry?> GetByJobIdAsync(
|
||||
string tenantId,
|
||||
Guid jobId,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
// <copyright file="IHlcSchedulerEnqueueService.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Service for HLC-ordered scheduler job enqueueing with chain linking.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This service wraps job enqueueing with:
|
||||
/// <list type="bullet">
|
||||
/// <item><description>HLC timestamp assignment for global ordering</description></item>
|
||||
/// <item><description>Chain link computation for audit proofs</description></item>
|
||||
/// <item><description>Persistence to scheduler_log for replay</description></item>
|
||||
/// </list>
|
||||
/// </remarks>
|
||||
public interface IHlcSchedulerEnqueueService
|
||||
{
|
||||
/// <summary>
|
||||
/// Enqueues a planner message with HLC ordering and chain linking.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="message">The planner queue message.</param>
|
||||
/// <param name="partitionKey">Optional partition key for chain separation.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The enqueue result with HLC timestamp and chain link.</returns>
|
||||
Task<SchedulerHlcEnqueueResult> EnqueuePlannerAsync(
|
||||
string tenantId,
|
||||
PlannerQueueMessage message,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Enqueues a runner segment message with HLC ordering and chain linking.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="message">The runner segment queue message.</param>
|
||||
/// <param name="partitionKey">Optional partition key for chain separation.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The enqueue result with HLC timestamp and chain link.</returns>
|
||||
Task<SchedulerHlcEnqueueResult> EnqueueRunnerSegmentAsync(
|
||||
string tenantId,
|
||||
RunnerSegmentQueueMessage message,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Enqueues a generic payload with HLC ordering and chain linking.
|
||||
/// </summary>
|
||||
/// <typeparam name="T">Payload type.</typeparam>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="payload">The payload to enqueue.</param>
|
||||
/// <param name="idempotencyKey">Key for deduplication.</param>
|
||||
/// <param name="partitionKey">Optional partition key for chain separation.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The enqueue result with HLC timestamp and chain link.</returns>
|
||||
Task<SchedulerHlcEnqueueResult> EnqueueAsync<T>(
|
||||
string tenantId,
|
||||
T payload,
|
||||
string idempotencyKey,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,292 @@
|
||||
// <copyright file="SchedulerChainVerifier.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Persistence;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Service for verifying the integrity of the scheduler chain.
|
||||
/// </summary>
|
||||
public interface ISchedulerChainVerifier
|
||||
{
|
||||
/// <summary>
|
||||
/// Verifies the integrity of the scheduler chain within an HLC range.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="startHlc">Start of the HLC range (inclusive, null for unbounded).</param>
|
||||
/// <param name="endHlc">End of the HLC range (inclusive, null for unbounded).</param>
|
||||
/// <param name="partitionKey">Optional partition key to verify (null for all partitions).</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Verification result.</returns>
|
||||
Task<ChainVerificationResult> VerifyAsync(
|
||||
string tenantId,
|
||||
HlcTimestamp? startHlc = null,
|
||||
HlcTimestamp? endHlc = null,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Verifies a single chain link.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="jobId">The job identifier to verify.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Verification result for the single entry.</returns>
|
||||
Task<ChainVerificationResult> VerifyEntryAsync(
|
||||
string tenantId,
|
||||
Guid jobId,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of chain verification.
|
||||
/// </summary>
|
||||
/// <param name="IsValid">Whether the chain is valid.</param>
|
||||
/// <param name="EntriesChecked">Number of entries checked.</param>
|
||||
/// <param name="Issues">List of verification issues found.</param>
|
||||
public readonly record struct ChainVerificationResult(
|
||||
bool IsValid,
|
||||
int EntriesChecked,
|
||||
IReadOnlyList<ChainVerificationIssue> Issues);
|
||||
|
||||
/// <summary>
|
||||
/// A specific issue found during chain verification.
|
||||
/// </summary>
|
||||
/// <param name="JobId">The job ID where the issue was found.</param>
|
||||
/// <param name="THlc">The HLC timestamp of the problematic entry.</param>
|
||||
/// <param name="IssueType">Type of issue found.</param>
|
||||
/// <param name="Description">Human-readable description of the issue.</param>
|
||||
public readonly record struct ChainVerificationIssue(
|
||||
Guid JobId,
|
||||
string THlc,
|
||||
string IssueType,
|
||||
string Description);
|
||||
|
||||
/// <summary>
|
||||
/// Implementation of scheduler chain verification.
|
||||
/// </summary>
|
||||
public sealed class SchedulerChainVerifier : ISchedulerChainVerifier
|
||||
{
|
||||
private readonly ISchedulerLogRepository _logRepository;
|
||||
private readonly ILogger<SchedulerChainVerifier> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new chain verifier.
|
||||
/// </summary>
|
||||
public SchedulerChainVerifier(
|
||||
ISchedulerLogRepository logRepository,
|
||||
ILogger<SchedulerChainVerifier> logger)
|
||||
{
|
||||
_logRepository = logRepository ?? throw new ArgumentNullException(nameof(logRepository));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<ChainVerificationResult> VerifyAsync(
|
||||
string tenantId,
|
||||
HlcTimestamp? startHlc = null,
|
||||
HlcTimestamp? endHlc = null,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
|
||||
var startT = startHlc?.ToSortableString();
|
||||
var endT = endHlc?.ToSortableString();
|
||||
|
||||
var entries = await _logRepository.GetByHlcRangeAsync(
|
||||
tenantId,
|
||||
startT,
|
||||
endT,
|
||||
limit: 0, // No limit
|
||||
partitionKey,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (entries.Count == 0)
|
||||
{
|
||||
_logger.LogDebug(
|
||||
"No entries to verify in range [{Start}, {End}] for tenant {TenantId}",
|
||||
startT ?? "(unbounded)",
|
||||
endT ?? "(unbounded)",
|
||||
tenantId);
|
||||
|
||||
return new ChainVerificationResult(IsValid: true, EntriesChecked: 0, Issues: []);
|
||||
}
|
||||
|
||||
var issues = new List<ChainVerificationIssue>();
|
||||
byte[]? expectedPrevLink = null;
|
||||
|
||||
// If starting mid-chain, we need to get the previous entry's link
|
||||
if (startHlc is not null)
|
||||
{
|
||||
var previousEntries = await _logRepository.GetByHlcRangeAsync(
|
||||
tenantId,
|
||||
startTHlc: null,
|
||||
startT,
|
||||
limit: 1,
|
||||
partitionKey,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (previousEntries.Count > 0 && previousEntries[0].THlc != startT)
|
||||
{
|
||||
expectedPrevLink = previousEntries[0].Link;
|
||||
}
|
||||
}
|
||||
|
||||
foreach (var entry in entries)
|
||||
{
|
||||
// Verify prev_link matches expected
|
||||
if (!ByteArrayEquals(entry.PrevLink, expectedPrevLink))
|
||||
{
|
||||
issues.Add(new ChainVerificationIssue(
|
||||
entry.JobId,
|
||||
entry.THlc,
|
||||
"PrevLinkMismatch",
|
||||
$"Expected {ToHex(expectedPrevLink)}, got {ToHex(entry.PrevLink)}"));
|
||||
}
|
||||
|
||||
// Recompute link and verify
|
||||
var computed = SchedulerChainLinking.ComputeLink(
|
||||
entry.PrevLink,
|
||||
entry.JobId,
|
||||
HlcTimestamp.Parse(entry.THlc),
|
||||
entry.PayloadHash);
|
||||
|
||||
if (!ByteArrayEquals(entry.Link, computed))
|
||||
{
|
||||
issues.Add(new ChainVerificationIssue(
|
||||
entry.JobId,
|
||||
entry.THlc,
|
||||
"LinkMismatch",
|
||||
$"Stored link doesn't match computed. Stored={ToHex(entry.Link)}, Computed={ToHex(computed)}"));
|
||||
}
|
||||
|
||||
expectedPrevLink = entry.Link;
|
||||
}
|
||||
|
||||
var isValid = issues.Count == 0;
|
||||
|
||||
_logger.LogInformation(
|
||||
"Chain verification complete. TenantId={TenantId}, Range=[{Start}, {End}], EntriesChecked={Count}, IsValid={IsValid}, IssueCount={IssueCount}",
|
||||
tenantId,
|
||||
startT ?? "(unbounded)",
|
||||
endT ?? "(unbounded)",
|
||||
entries.Count,
|
||||
isValid,
|
||||
issues.Count);
|
||||
|
||||
return new ChainVerificationResult(isValid, entries.Count, issues);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<ChainVerificationResult> VerifyEntryAsync(
|
||||
string tenantId,
|
||||
Guid jobId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
|
||||
var entry = await _logRepository.GetByJobIdAsync(jobId, cancellationToken).ConfigureAwait(false);
|
||||
if (entry is null)
|
||||
{
|
||||
return new ChainVerificationResult(
|
||||
IsValid: false,
|
||||
EntriesChecked: 0,
|
||||
Issues: [new ChainVerificationIssue(jobId, string.Empty, "NotFound", "Entry not found")]);
|
||||
}
|
||||
|
||||
// Verify tenant isolation
|
||||
if (!string.Equals(entry.TenantId, tenantId, StringComparison.Ordinal))
|
||||
{
|
||||
return new ChainVerificationResult(
|
||||
IsValid: false,
|
||||
EntriesChecked: 0,
|
||||
Issues: [new ChainVerificationIssue(jobId, entry.THlc, "TenantMismatch", "Entry belongs to different tenant")]);
|
||||
}
|
||||
|
||||
var issues = new List<ChainVerificationIssue>();
|
||||
|
||||
// Recompute link and verify
|
||||
var computed = SchedulerChainLinking.ComputeLink(
|
||||
entry.PrevLink,
|
||||
entry.JobId,
|
||||
HlcTimestamp.Parse(entry.THlc),
|
||||
entry.PayloadHash);
|
||||
|
||||
if (!ByteArrayEquals(entry.Link, computed))
|
||||
{
|
||||
issues.Add(new ChainVerificationIssue(
|
||||
entry.JobId,
|
||||
entry.THlc,
|
||||
"LinkMismatch",
|
||||
$"Stored link doesn't match computed"));
|
||||
}
|
||||
|
||||
// If there's a prev_link, verify it exists and matches
|
||||
if (entry.PrevLink is { Length: > 0 })
|
||||
{
|
||||
// Find the previous entry
|
||||
var allEntries = await _logRepository.GetByHlcRangeAsync(
|
||||
tenantId,
|
||||
startTHlc: null,
|
||||
entry.THlc,
|
||||
limit: 0,
|
||||
partitionKey: entry.PartitionKey,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var prevEntry = allEntries
|
||||
.Where(e => e.THlc != entry.THlc)
|
||||
.OrderByDescending(e => e.THlc)
|
||||
.FirstOrDefault();
|
||||
|
||||
if (prevEntry is null)
|
||||
{
|
||||
issues.Add(new ChainVerificationIssue(
|
||||
entry.JobId,
|
||||
entry.THlc,
|
||||
"PrevEntryNotFound",
|
||||
"Entry has prev_link but no previous entry found"));
|
||||
}
|
||||
else if (!ByteArrayEquals(prevEntry.Link, entry.PrevLink))
|
||||
{
|
||||
issues.Add(new ChainVerificationIssue(
|
||||
entry.JobId,
|
||||
entry.THlc,
|
||||
"PrevLinkMismatch",
|
||||
$"prev_link doesn't match previous entry's link"));
|
||||
}
|
||||
}
|
||||
|
||||
return new ChainVerificationResult(issues.Count == 0, 1, issues);
|
||||
}
|
||||
|
||||
private static bool ByteArrayEquals(byte[]? a, byte[]? b)
|
||||
{
|
||||
if (a is null && b is null)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (a is null || b is null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (a.Length == 0 && b.Length == 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return a.AsSpan().SequenceEqual(b);
|
||||
}
|
||||
|
||||
private static string ToHex(byte[]? bytes)
|
||||
{
|
||||
return bytes is null ? "(null)" : Convert.ToHexString(bytes);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
// <copyright file="SchedulerDequeueResult.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Result of an HLC-ordered scheduler dequeue operation.
|
||||
/// </summary>
|
||||
/// <param name="Entries">The dequeued scheduler log entries in HLC order.</param>
|
||||
/// <param name="TotalAvailable">Total count of entries available in the specified range.</param>
|
||||
/// <param name="RangeStartHlc">The HLC start of the queried range (null if unbounded).</param>
|
||||
/// <param name="RangeEndHlc">The HLC end of the queried range (null if unbounded).</param>
|
||||
public readonly record struct SchedulerHlcDequeueResult(
|
||||
IReadOnlyList<SchedulerLogEntry> Entries,
|
||||
int TotalAvailable,
|
||||
HlcTimestamp? RangeStartHlc,
|
||||
HlcTimestamp? RangeEndHlc);
|
||||
@@ -0,0 +1,20 @@
|
||||
// <copyright file="SchedulerEnqueueResult.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using StellaOps.HybridLogicalClock;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Result of an HLC-ordered scheduler enqueue operation.
|
||||
/// </summary>
|
||||
/// <param name="THlc">The HLC timestamp assigned to the job.</param>
|
||||
/// <param name="JobId">The deterministic job identifier.</param>
|
||||
/// <param name="Link">The chain link computed for this entry.</param>
|
||||
/// <param name="Deduplicated">True if the job was already enqueued (idempotent).</param>
|
||||
public readonly record struct SchedulerHlcEnqueueResult(
|
||||
HlcTimestamp THlc,
|
||||
Guid JobId,
|
||||
byte[] Link,
|
||||
bool Deduplicated);
|
||||
@@ -6,6 +6,7 @@ using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using NATS.Client.Core;
|
||||
using NATS.Client.JetStream;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Nats;
|
||||
@@ -18,6 +19,7 @@ internal sealed class NatsSchedulerPlannerQueue
|
||||
SchedulerNatsQueueOptions natsOptions,
|
||||
ILogger<NatsSchedulerPlannerQueue> logger,
|
||||
TimeProvider timeProvider,
|
||||
IHybridLogicalClock? hlc = null,
|
||||
Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>>? connectionFactory = null)
|
||||
: base(
|
||||
queueOptions,
|
||||
@@ -26,6 +28,7 @@ internal sealed class NatsSchedulerPlannerQueue
|
||||
PlannerPayload.Instance,
|
||||
logger,
|
||||
timeProvider,
|
||||
hlc,
|
||||
connectionFactory)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ using Microsoft.Extensions.Logging;
|
||||
using NATS.Client.Core;
|
||||
using NATS.Client.JetStream;
|
||||
using NATS.Client.JetStream.Models;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Nats;
|
||||
|
||||
@@ -24,6 +25,7 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
|
||||
private readonly INatsSchedulerQueuePayload<TMessage> _payload;
|
||||
private readonly ILogger _logger;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly IHybridLogicalClock? _hlc;
|
||||
private readonly SemaphoreSlim _connectionGate = new(1, 1);
|
||||
private readonly Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>> _connectionFactory;
|
||||
|
||||
@@ -40,6 +42,7 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
|
||||
INatsSchedulerQueuePayload<TMessage> payload,
|
||||
ILogger logger,
|
||||
TimeProvider timeProvider,
|
||||
IHybridLogicalClock? hlc = null,
|
||||
Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>>? connectionFactory = null)
|
||||
{
|
||||
_queueOptions = queueOptions ?? throw new ArgumentNullException(nameof(queueOptions));
|
||||
@@ -48,6 +51,7 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
|
||||
_payload = payload ?? throw new ArgumentNullException(nameof(payload));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
_hlc = hlc;
|
||||
_connectionFactory = connectionFactory ?? ((opts, cancellationToken) => new ValueTask<NatsConnection>(new NatsConnection(opts)));
|
||||
|
||||
if (string.IsNullOrWhiteSpace(_natsOptions.Url))
|
||||
@@ -67,7 +71,11 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
|
||||
|
||||
var payloadBytes = _payload.Serialize(message);
|
||||
var idempotencyKey = _payload.GetIdempotencyKey(message);
|
||||
var headers = BuildHeaders(message, idempotencyKey);
|
||||
|
||||
// Generate HLC timestamp if clock is available
|
||||
var hlcTimestamp = _hlc?.Tick();
|
||||
|
||||
var headers = BuildHeaders(message, idempotencyKey, hlcTimestamp);
|
||||
|
||||
var publishOptions = new NatsJSPubOpts
|
||||
{
|
||||
@@ -531,6 +539,14 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
|
||||
? DateTimeOffset.FromUnixTimeMilliseconds(unix)
|
||||
: now;
|
||||
|
||||
// Parse HLC timestamp if present
|
||||
HlcTimestamp? hlcTimestamp = null;
|
||||
if (headers.TryGetValue(SchedulerQueueFields.HlcTimestamp, out var hlcValues) && hlcValues.Count > 0
|
||||
&& HlcTimestamp.TryParse(hlcValues[0], out var parsedHlc))
|
||||
{
|
||||
hlcTimestamp = parsedHlc;
|
||||
}
|
||||
|
||||
var leaseExpires = now.Add(leaseDuration);
|
||||
var runId = _payload.GetRunId(deserialized);
|
||||
var tenantId = _payload.GetTenantId(deserialized);
|
||||
@@ -558,10 +574,11 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
|
||||
attempt,
|
||||
enqueuedAt,
|
||||
leaseExpires,
|
||||
consumer);
|
||||
consumer,
|
||||
hlcTimestamp);
|
||||
}
|
||||
|
||||
private NatsHeaders BuildHeaders(TMessage message, string idempotencyKey)
|
||||
private NatsHeaders BuildHeaders(TMessage message, string idempotencyKey, HlcTimestamp? hlcTimestamp = null)
|
||||
{
|
||||
var headers = new NatsHeaders
|
||||
{
|
||||
@@ -572,6 +589,12 @@ internal abstract class NatsSchedulerQueueBase<TMessage> : ISchedulerQueue<TMess
|
||||
{ SchedulerQueueFields.EnqueuedAt, _timeProvider.GetUtcNow().ToUnixTimeMilliseconds().ToString() }
|
||||
};
|
||||
|
||||
// Include HLC timestamp if available
|
||||
if (hlcTimestamp.HasValue)
|
||||
{
|
||||
headers.Add(SchedulerQueueFields.HlcTimestamp, hlcTimestamp.Value.ToSortableString());
|
||||
}
|
||||
|
||||
var scheduleId = _payload.GetScheduleId(message);
|
||||
if (!string.IsNullOrWhiteSpace(scheduleId))
|
||||
{
|
||||
|
||||
@@ -3,6 +3,7 @@ using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using NATS.Client.JetStream;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Nats;
|
||||
|
||||
@@ -26,7 +27,8 @@ internal sealed class NatsSchedulerQueueLease<TMessage> : ISchedulerQueueLease<T
|
||||
int attempt,
|
||||
DateTimeOffset enqueuedAt,
|
||||
DateTimeOffset leaseExpiresAt,
|
||||
string consumer)
|
||||
string consumer,
|
||||
HlcTimestamp? hlcTimestamp = null)
|
||||
{
|
||||
_queue = queue;
|
||||
MessageId = message.Metadata?.Sequence.ToString() ?? idempotencyKey;
|
||||
@@ -44,6 +46,7 @@ internal sealed class NatsSchedulerQueueLease<TMessage> : ISchedulerQueueLease<T
|
||||
Message = deserialized;
|
||||
_message = message;
|
||||
Payload = payload;
|
||||
HlcTimestamp = hlcTimestamp;
|
||||
}
|
||||
|
||||
private readonly NatsJSMsg<byte[]> _message;
|
||||
@@ -78,6 +81,8 @@ internal sealed class NatsSchedulerQueueLease<TMessage> : ISchedulerQueueLease<T
|
||||
|
||||
public string Consumer { get; }
|
||||
|
||||
public HlcTimestamp? HlcTimestamp { get; }
|
||||
|
||||
public Task AcknowledgeAsync(CancellationToken cancellationToken = default)
|
||||
=> _queue.AcknowledgeAsync(this, cancellationToken);
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using NATS.Client.Core;
|
||||
using NATS.Client.JetStream;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Nats;
|
||||
@@ -19,6 +20,7 @@ internal sealed class NatsSchedulerRunnerQueue
|
||||
SchedulerNatsQueueOptions natsOptions,
|
||||
ILogger<NatsSchedulerRunnerQueue> logger,
|
||||
TimeProvider timeProvider,
|
||||
IHybridLogicalClock? hlc = null,
|
||||
Func<NatsOpts, CancellationToken, ValueTask<NatsConnection>>? connectionFactory = null)
|
||||
: base(
|
||||
queueOptions,
|
||||
@@ -27,6 +29,7 @@ internal sealed class NatsSchedulerRunnerQueue
|
||||
RunnerPayload.Instance,
|
||||
logger,
|
||||
timeProvider,
|
||||
hlc,
|
||||
connectionFactory)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StackExchange.Redis;
|
||||
using StellaOps.Scheduler.Models;
|
||||
|
||||
@@ -16,6 +17,7 @@ internal sealed class RedisSchedulerPlannerQueue
|
||||
SchedulerRedisQueueOptions redisOptions,
|
||||
ILogger<RedisSchedulerPlannerQueue> logger,
|
||||
TimeProvider timeProvider,
|
||||
IHybridLogicalClock? hlc = null,
|
||||
Func<ConfigurationOptions, Task<IConnectionMultiplexer>>? connectionFactory = null)
|
||||
: base(
|
||||
queueOptions,
|
||||
@@ -24,6 +26,7 @@ internal sealed class RedisSchedulerPlannerQueue
|
||||
PlannerPayload.Instance,
|
||||
logger,
|
||||
timeProvider,
|
||||
hlc,
|
||||
connectionFactory)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StackExchange.Redis;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Redis;
|
||||
@@ -20,6 +21,7 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
|
||||
private readonly IRedisSchedulerQueuePayload<TMessage> _payload;
|
||||
private readonly ILogger _logger;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly IHybridLogicalClock? _hlc;
|
||||
private readonly Func<ConfigurationOptions, Task<IConnectionMultiplexer>> _connectionFactory;
|
||||
private readonly SemaphoreSlim _connectionLock = new(1, 1);
|
||||
private readonly SemaphoreSlim _groupInitLock = new(1, 1);
|
||||
@@ -36,6 +38,7 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
|
||||
IRedisSchedulerQueuePayload<TMessage> payload,
|
||||
ILogger logger,
|
||||
TimeProvider timeProvider,
|
||||
IHybridLogicalClock? hlc = null,
|
||||
Func<ConfigurationOptions, Task<IConnectionMultiplexer>>? connectionFactory = null)
|
||||
{
|
||||
_queueOptions = queueOptions ?? throw new ArgumentNullException(nameof(queueOptions));
|
||||
@@ -44,6 +47,7 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
|
||||
_payload = payload ?? throw new ArgumentNullException(nameof(payload));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
|
||||
_hlc = hlc;
|
||||
_connectionFactory = connectionFactory ?? (config => Task.FromResult<IConnectionMultiplexer>(ConnectionMultiplexer.Connect(config)));
|
||||
|
||||
if (string.IsNullOrWhiteSpace(_redisOptions.ConnectionString))
|
||||
@@ -74,7 +78,11 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
|
||||
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var attempt = 1;
|
||||
var entries = BuildEntries(message, now, attempt);
|
||||
|
||||
// Generate HLC timestamp if clock is available
|
||||
var hlcTimestamp = _hlc?.Tick();
|
||||
|
||||
var entries = BuildEntries(message, now, attempt, hlcTimestamp);
|
||||
|
||||
var messageId = await AddToStreamAsync(
|
||||
database,
|
||||
@@ -555,11 +563,12 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
|
||||
private NameValueEntry[] BuildEntries(
|
||||
TMessage message,
|
||||
DateTimeOffset enqueuedAt,
|
||||
int attempt)
|
||||
int attempt,
|
||||
HlcTimestamp? hlcTimestamp = null)
|
||||
{
|
||||
var attributes = _payload.GetAttributes(message);
|
||||
var attributeCount = attributes?.Count ?? 0;
|
||||
var entries = ArrayPool<NameValueEntry>.Shared.Rent(10 + attributeCount);
|
||||
var entries = ArrayPool<NameValueEntry>.Shared.Rent(11 + attributeCount);
|
||||
var index = 0;
|
||||
|
||||
entries[index++] = new NameValueEntry(SchedulerQueueFields.QueueKind, _payload.QueueName);
|
||||
@@ -589,6 +598,12 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
|
||||
entries[index++] = new NameValueEntry(SchedulerQueueFields.EnqueuedAt, enqueuedAt.ToUnixTimeMilliseconds());
|
||||
entries[index++] = new NameValueEntry(SchedulerQueueFields.Payload, _payload.Serialize(message));
|
||||
|
||||
// Include HLC timestamp if available
|
||||
if (hlcTimestamp.HasValue)
|
||||
{
|
||||
entries[index++] = new NameValueEntry(SchedulerQueueFields.HlcTimestamp, hlcTimestamp.Value.ToSortableString());
|
||||
}
|
||||
|
||||
if (attributeCount > 0 && attributes is not null)
|
||||
{
|
||||
foreach (var kvp in attributes)
|
||||
@@ -623,6 +638,7 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
|
||||
string? segmentId = null;
|
||||
string? correlationId = null;
|
||||
string? idempotencyKey = null;
|
||||
string? hlcTimestampStr = null;
|
||||
long? enqueuedAtUnix = null;
|
||||
var attempt = attemptOverride ?? 1;
|
||||
var attributes = new Dictionary<string, string>(StringComparer.Ordinal);
|
||||
@@ -676,6 +692,10 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
|
||||
: Math.Max(1, parsedAttempt);
|
||||
}
|
||||
}
|
||||
else if (name.Equals(SchedulerQueueFields.HlcTimestamp, StringComparison.Ordinal))
|
||||
{
|
||||
hlcTimestampStr = NormalizeOptional(value.ToString());
|
||||
}
|
||||
else if (name.StartsWith(SchedulerQueueFields.AttributePrefix, StringComparison.Ordinal))
|
||||
{
|
||||
var key = name[SchedulerQueueFields.AttributePrefix.Length..];
|
||||
@@ -692,6 +712,14 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
|
||||
var enqueuedAt = DateTimeOffset.FromUnixTimeMilliseconds(enqueuedAtUnix.Value);
|
||||
var leaseExpires = now.Add(leaseDuration);
|
||||
|
||||
// Parse HLC timestamp if present
|
||||
HlcTimestamp? hlcTimestamp = null;
|
||||
if (!string.IsNullOrEmpty(hlcTimestampStr) &&
|
||||
HlcTimestamp.TryParse(hlcTimestampStr, out var parsedHlc))
|
||||
{
|
||||
hlcTimestamp = parsedHlc;
|
||||
}
|
||||
|
||||
IReadOnlyDictionary<string, string> attributeView = attributes.Count == 0
|
||||
? EmptyReadOnlyDictionary<string, string>.Instance
|
||||
: new ReadOnlyDictionary<string, string>(attributes);
|
||||
@@ -710,7 +738,8 @@ internal abstract class RedisSchedulerQueueBase<TMessage> : ISchedulerQueue<TMes
|
||||
attempt,
|
||||
enqueuedAt,
|
||||
leaseExpires,
|
||||
consumer);
|
||||
consumer,
|
||||
hlcTimestamp);
|
||||
}
|
||||
|
||||
private async Task HandlePoisonEntryAsync(IDatabase database, RedisValue entryId)
|
||||
|
||||
@@ -2,6 +2,7 @@ using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Redis;
|
||||
|
||||
@@ -24,7 +25,8 @@ internal sealed class RedisSchedulerQueueLease<TMessage> : ISchedulerQueueLease<
|
||||
int attempt,
|
||||
DateTimeOffset enqueuedAt,
|
||||
DateTimeOffset leaseExpiresAt,
|
||||
string consumer)
|
||||
string consumer,
|
||||
HlcTimestamp? hlcTimestamp = null)
|
||||
{
|
||||
_queue = queue;
|
||||
MessageId = messageId;
|
||||
@@ -40,6 +42,7 @@ internal sealed class RedisSchedulerQueueLease<TMessage> : ISchedulerQueueLease<
|
||||
EnqueuedAt = enqueuedAt;
|
||||
LeaseExpiresAt = leaseExpiresAt;
|
||||
Consumer = consumer;
|
||||
HlcTimestamp = hlcTimestamp;
|
||||
}
|
||||
|
||||
public string MessageId { get; }
|
||||
@@ -68,6 +71,8 @@ internal sealed class RedisSchedulerQueueLease<TMessage> : ISchedulerQueueLease<
|
||||
|
||||
public string Consumer { get; }
|
||||
|
||||
public HlcTimestamp? HlcTimestamp { get; }
|
||||
|
||||
public Task AcknowledgeAsync(CancellationToken cancellationToken = default)
|
||||
=> _queue.AcknowledgeAsync(this, cancellationToken);
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StackExchange.Redis;
|
||||
using StellaOps.Scheduler.Models;
|
||||
|
||||
@@ -17,6 +18,7 @@ internal sealed class RedisSchedulerRunnerQueue
|
||||
SchedulerRedisQueueOptions redisOptions,
|
||||
ILogger<RedisSchedulerRunnerQueue> logger,
|
||||
TimeProvider timeProvider,
|
||||
IHybridLogicalClock? hlc = null,
|
||||
Func<ConfigurationOptions, Task<IConnectionMultiplexer>>? connectionFactory = null)
|
||||
: base(
|
||||
queueOptions,
|
||||
@@ -25,6 +27,7 @@ internal sealed class RedisSchedulerRunnerQueue
|
||||
RunnerPayload.Instance,
|
||||
logger,
|
||||
timeProvider,
|
||||
hlc,
|
||||
connectionFactory)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ using System.Collections.ObjectModel;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue;
|
||||
@@ -284,6 +285,13 @@ public interface ISchedulerQueueLease<out TMessage>
|
||||
|
||||
TMessage Message { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the Hybrid Logical Clock timestamp assigned at enqueue time.
|
||||
/// Provides deterministic ordering across distributed nodes.
|
||||
/// Null if HLC was not enabled when the message was enqueued.
|
||||
/// </summary>
|
||||
HlcTimestamp? HlcTimestamp { get; }
|
||||
|
||||
Task AcknowledgeAsync(CancellationToken cancellationToken = default);
|
||||
|
||||
Task RenewAsync(TimeSpan leaseDuration, CancellationToken cancellationToken = default);
|
||||
|
||||
@@ -13,4 +13,10 @@ internal static class SchedulerQueueFields
|
||||
public const string QueueKind = "queueKind";
|
||||
public const string CorrelationId = "correlationId";
|
||||
public const string AttributePrefix = "attr:";
|
||||
|
||||
/// <summary>
|
||||
/// Hybrid Logical Clock timestamp for deterministic ordering.
|
||||
/// Stored as sortable string format: {PhysicalTime:D13}-{NodeId}-{LogicalCounter:D6}
|
||||
/// </summary>
|
||||
public const string HlcTimestamp = "hlcTimestamp";
|
||||
}
|
||||
|
||||
@@ -35,6 +35,54 @@ public sealed class SchedulerQueueOptions
|
||||
/// Cap applied to the retry delay when exponential backoff is used.
|
||||
/// </summary>
|
||||
public TimeSpan RetryMaxBackoff { get; set; } = TimeSpan.FromMinutes(1);
|
||||
|
||||
/// <summary>
|
||||
/// HLC (Hybrid Logical Clock) ordering options.
|
||||
/// </summary>
|
||||
public SchedulerHlcOptions Hlc { get; set; } = new();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Options for HLC-based queue ordering and chain linking.
|
||||
/// </summary>
|
||||
public sealed class SchedulerHlcOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Enable HLC-based ordering with chain linking.
|
||||
/// When false, uses legacy (priority, created_at) ordering.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// When enabled, all enqueue operations will:
|
||||
/// - Assign an HLC timestamp for global ordering
|
||||
/// - Compute and store chain links for audit proofs
|
||||
/// - Persist entries to the scheduler_log table
|
||||
/// </remarks>
|
||||
public bool EnableHlcOrdering { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// When true, writes to both legacy and HLC tables during migration.
|
||||
/// This allows gradual migration from legacy ordering to HLC ordering.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Migration path:
|
||||
/// 1. Deploy with DualWriteMode = true (writes to both tables)
|
||||
/// 2. Backfill scheduler_log from existing scheduler.jobs
|
||||
/// 3. Enable EnableHlcOrdering = true for reads
|
||||
/// 4. Disable DualWriteMode, deprecate legacy ordering
|
||||
/// </remarks>
|
||||
public bool DualWriteMode { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Enable automatic chain verification on dequeue.
|
||||
/// When enabled, each dequeued batch is verified for chain integrity.
|
||||
/// </summary>
|
||||
public bool VerifyOnDequeue { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Maximum clock drift tolerance in milliseconds.
|
||||
/// HLC timestamps from messages with drift exceeding this value will be rejected.
|
||||
/// </summary>
|
||||
public int MaxClockDriftMs { get; set; } = 60000; // 1 minute default
|
||||
}
|
||||
|
||||
public sealed class SchedulerRedisQueueOptions
|
||||
|
||||
@@ -4,6 +4,7 @@ using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Queue.Nats;
|
||||
using StellaOps.Scheduler.Queue.Redis;
|
||||
|
||||
@@ -29,6 +30,7 @@ public static class SchedulerQueueServiceCollectionExtensions
|
||||
{
|
||||
var loggerFactory = sp.GetRequiredService<ILoggerFactory>();
|
||||
var timeProvider = sp.GetService<TimeProvider>() ?? TimeProvider.System;
|
||||
var hlc = sp.GetService<IHybridLogicalClock>();
|
||||
|
||||
return options.Kind switch
|
||||
{
|
||||
@@ -36,12 +38,14 @@ public static class SchedulerQueueServiceCollectionExtensions
|
||||
options,
|
||||
options.Redis,
|
||||
loggerFactory.CreateLogger<RedisSchedulerPlannerQueue>(),
|
||||
timeProvider),
|
||||
timeProvider,
|
||||
hlc),
|
||||
SchedulerQueueTransportKind.Nats => new NatsSchedulerPlannerQueue(
|
||||
options,
|
||||
options.Nats,
|
||||
loggerFactory.CreateLogger<NatsSchedulerPlannerQueue>(),
|
||||
timeProvider),
|
||||
timeProvider,
|
||||
hlc),
|
||||
_ => throw new InvalidOperationException($"Unsupported scheduler queue transport '{options.Kind}'.")
|
||||
};
|
||||
});
|
||||
@@ -50,6 +54,7 @@ public static class SchedulerQueueServiceCollectionExtensions
|
||||
{
|
||||
var loggerFactory = sp.GetRequiredService<ILoggerFactory>();
|
||||
var timeProvider = sp.GetService<TimeProvider>() ?? TimeProvider.System;
|
||||
var hlc = sp.GetService<IHybridLogicalClock>();
|
||||
|
||||
return options.Kind switch
|
||||
{
|
||||
@@ -57,12 +62,14 @@ public static class SchedulerQueueServiceCollectionExtensions
|
||||
options,
|
||||
options.Redis,
|
||||
loggerFactory.CreateLogger<RedisSchedulerRunnerQueue>(),
|
||||
timeProvider),
|
||||
timeProvider,
|
||||
hlc),
|
||||
SchedulerQueueTransportKind.Nats => new NatsSchedulerRunnerQueue(
|
||||
options,
|
||||
options.Nats,
|
||||
loggerFactory.CreateLogger<NatsSchedulerRunnerQueue>(),
|
||||
timeProvider),
|
||||
timeProvider,
|
||||
hlc),
|
||||
_ => throw new InvalidOperationException($"Unsupported scheduler queue transport '{options.Kind}'.")
|
||||
};
|
||||
});
|
||||
|
||||
@@ -18,5 +18,8 @@
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\StellaOps.Scheduler.Models\StellaOps.Scheduler.Models.csproj" />
|
||||
<ProjectReference Include="..\StellaOps.Scheduler.Persistence\StellaOps.Scheduler.Persistence.csproj" />
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.HybridLogicalClock\StellaOps.HybridLogicalClock.csproj" />
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Canonical.Json\StellaOps.Canonical.Json.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
|
||||
@@ -0,0 +1,337 @@
|
||||
// <copyright file="SchedulerChainLinkingTests.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using FluentAssertions;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using Xunit;
|
||||
|
||||
namespace StellaOps.Scheduler.Persistence.Tests;
|
||||
|
||||
[Trait("Category", "Unit")]
|
||||
public sealed class SchedulerChainLinkingTests
|
||||
{
|
||||
[Fact]
|
||||
public void ComputeLink_WithNullPrevLink_UsesZeroLink()
|
||||
{
|
||||
// Arrange
|
||||
var jobId = Guid.Parse("12345678-1234-1234-1234-123456789012");
|
||||
var hlc = new HlcTimestamp { PhysicalTime = 1000000000000L, NodeId = "node1", LogicalCounter = 1 };
|
||||
var payloadHash = new byte[32];
|
||||
payloadHash[0] = 0xAB;
|
||||
|
||||
// Act
|
||||
var link1 = SchedulerChainLinking.ComputeLink(null, jobId, hlc, payloadHash);
|
||||
var link2 = SchedulerChainLinking.ComputeLink(SchedulerChainLinking.ZeroLink, jobId, hlc, payloadHash);
|
||||
|
||||
// Assert
|
||||
link1.Should().HaveCount(32);
|
||||
link1.Should().BeEquivalentTo(link2, "null prev_link should be treated as zero link");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ComputeLink_IsDeterministic_SameInputsSameOutput()
|
||||
{
|
||||
// Arrange
|
||||
var prevLink = new byte[32];
|
||||
prevLink[0] = 0x01;
|
||||
var jobId = Guid.Parse("AAAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE");
|
||||
var hlc = new HlcTimestamp { PhysicalTime = 1704067200000L, NodeId = "scheduler-1", LogicalCounter = 42 };
|
||||
var payloadHash = new byte[32];
|
||||
for (int i = 0; i < 32; i++) payloadHash[i] = (byte)i;
|
||||
|
||||
// Act
|
||||
var link1 = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc, payloadHash);
|
||||
var link2 = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc, payloadHash);
|
||||
var link3 = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc, payloadHash);
|
||||
|
||||
// Assert
|
||||
link1.Should().BeEquivalentTo(link2);
|
||||
link2.Should().BeEquivalentTo(link3);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ComputeLink_DifferentJobIds_ProduceDifferentLinks()
|
||||
{
|
||||
// Arrange
|
||||
var prevLink = new byte[32];
|
||||
var hlc = new HlcTimestamp { PhysicalTime = 1704067200000L, NodeId = "node1", LogicalCounter = 1 };
|
||||
var payloadHash = new byte[32];
|
||||
|
||||
var jobId1 = Guid.Parse("11111111-1111-1111-1111-111111111111");
|
||||
var jobId2 = Guid.Parse("22222222-2222-2222-2222-222222222222");
|
||||
|
||||
// Act
|
||||
var link1 = SchedulerChainLinking.ComputeLink(prevLink, jobId1, hlc, payloadHash);
|
||||
var link2 = SchedulerChainLinking.ComputeLink(prevLink, jobId2, hlc, payloadHash);
|
||||
|
||||
// Assert
|
||||
link1.Should().NotBeEquivalentTo(link2);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ComputeLink_DifferentHlcTimestamps_ProduceDifferentLinks()
|
||||
{
|
||||
// Arrange
|
||||
var prevLink = new byte[32];
|
||||
var jobId = Guid.NewGuid();
|
||||
var payloadHash = new byte[32];
|
||||
|
||||
var hlc1 = new HlcTimestamp { PhysicalTime = 1704067200000L, NodeId = "node1", LogicalCounter = 1 };
|
||||
var hlc2 = new HlcTimestamp { PhysicalTime = 1704067200000L, NodeId = "node1", LogicalCounter = 2 }; // Different counter
|
||||
var hlc3 = new HlcTimestamp { PhysicalTime = 1704067200001L, NodeId = "node1", LogicalCounter = 1 }; // Different physical time
|
||||
|
||||
// Act
|
||||
var link1 = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc1, payloadHash);
|
||||
var link2 = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc2, payloadHash);
|
||||
var link3 = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc3, payloadHash);
|
||||
|
||||
// Assert
|
||||
link1.Should().NotBeEquivalentTo(link2);
|
||||
link1.Should().NotBeEquivalentTo(link3);
|
||||
link2.Should().NotBeEquivalentTo(link3);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ComputeLink_DifferentPrevLinks_ProduceDifferentLinks()
|
||||
{
|
||||
// Arrange
|
||||
var jobId = Guid.NewGuid();
|
||||
var hlc = new HlcTimestamp { PhysicalTime = 1704067200000L, NodeId = "node1", LogicalCounter = 1 };
|
||||
var payloadHash = new byte[32];
|
||||
|
||||
var prevLink1 = new byte[32];
|
||||
var prevLink2 = new byte[32];
|
||||
prevLink2[0] = 0xFF;
|
||||
|
||||
// Act
|
||||
var link1 = SchedulerChainLinking.ComputeLink(prevLink1, jobId, hlc, payloadHash);
|
||||
var link2 = SchedulerChainLinking.ComputeLink(prevLink2, jobId, hlc, payloadHash);
|
||||
|
||||
// Assert
|
||||
link1.Should().NotBeEquivalentTo(link2);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ComputeLink_DifferentPayloadHashes_ProduceDifferentLinks()
|
||||
{
|
||||
// Arrange
|
||||
var prevLink = new byte[32];
|
||||
var jobId = Guid.NewGuid();
|
||||
var hlc = new HlcTimestamp { PhysicalTime = 1704067200000L, NodeId = "node1", LogicalCounter = 1 };
|
||||
|
||||
var payload1 = new byte[32];
|
||||
var payload2 = new byte[32];
|
||||
payload2[31] = 0x01;
|
||||
|
||||
// Act
|
||||
var link1 = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc, payload1);
|
||||
var link2 = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc, payload2);
|
||||
|
||||
// Assert
|
||||
link1.Should().NotBeEquivalentTo(link2);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ComputeLink_WithStringHlc_ProducesSameResultAsParsedHlc()
|
||||
{
|
||||
// Arrange
|
||||
var prevLink = new byte[32];
|
||||
var jobId = Guid.NewGuid();
|
||||
var hlc = new HlcTimestamp { PhysicalTime = 1704067200000L, NodeId = "node1", LogicalCounter = 42 };
|
||||
var hlcString = hlc.ToSortableString();
|
||||
var payloadHash = new byte[32];
|
||||
|
||||
// Act
|
||||
var link1 = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc, payloadHash);
|
||||
var link2 = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlcString, payloadHash);
|
||||
|
||||
// Assert
|
||||
link1.Should().BeEquivalentTo(link2);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void VerifyLink_ValidLink_ReturnsTrue()
|
||||
{
|
||||
// Arrange
|
||||
var prevLink = new byte[32];
|
||||
prevLink[0] = 0xDE;
|
||||
var jobId = Guid.NewGuid();
|
||||
var hlc = new HlcTimestamp { PhysicalTime = 1704067200000L, NodeId = "verifier", LogicalCounter = 100 };
|
||||
var payloadHash = new byte[32];
|
||||
payloadHash[15] = 0xAD;
|
||||
|
||||
var computedLink = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc, payloadHash);
|
||||
|
||||
// Act
|
||||
var isValid = SchedulerChainLinking.VerifyLink(computedLink, prevLink, jobId, hlc, payloadHash);
|
||||
|
||||
// Assert
|
||||
isValid.Should().BeTrue();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void VerifyLink_TamperedLink_ReturnsFalse()
|
||||
{
|
||||
// Arrange
|
||||
var prevLink = new byte[32];
|
||||
var jobId = Guid.NewGuid();
|
||||
var hlc = new HlcTimestamp { PhysicalTime = 1704067200000L, NodeId = "node1", LogicalCounter = 1 };
|
||||
var payloadHash = new byte[32];
|
||||
|
||||
var computedLink = SchedulerChainLinking.ComputeLink(prevLink, jobId, hlc, payloadHash);
|
||||
|
||||
// Tamper with the link
|
||||
var tamperedLink = (byte[])computedLink.Clone();
|
||||
tamperedLink[0] ^= 0xFF;
|
||||
|
||||
// Act
|
||||
var isValid = SchedulerChainLinking.VerifyLink(tamperedLink, prevLink, jobId, hlc, payloadHash);
|
||||
|
||||
// Assert
|
||||
isValid.Should().BeFalse();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ComputePayloadHash_IsDeterministic()
|
||||
{
|
||||
// Arrange
|
||||
var payload = new { Id = 123, Name = "Test", Values = new[] { 1, 2, 3 } };
|
||||
|
||||
// Act
|
||||
var hash1 = SchedulerChainLinking.ComputePayloadHash(payload);
|
||||
var hash2 = SchedulerChainLinking.ComputePayloadHash(payload);
|
||||
|
||||
// Assert
|
||||
hash1.Should().HaveCount(32);
|
||||
hash1.Should().BeEquivalentTo(hash2);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ComputePayloadHash_DifferentPayloads_ProduceDifferentHashes()
|
||||
{
|
||||
// Arrange
|
||||
var payload1 = new { Id = 1, Name = "First" };
|
||||
var payload2 = new { Id = 2, Name = "Second" };
|
||||
|
||||
// Act
|
||||
var hash1 = SchedulerChainLinking.ComputePayloadHash(payload1);
|
||||
var hash2 = SchedulerChainLinking.ComputePayloadHash(payload2);
|
||||
|
||||
// Assert
|
||||
hash1.Should().NotBeEquivalentTo(hash2);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ComputePayloadHash_ByteArray_ProducesConsistentHash()
|
||||
{
|
||||
// Arrange
|
||||
var bytes = new byte[] { 0x01, 0x02, 0x03, 0x04, 0x05 };
|
||||
|
||||
// Act
|
||||
var hash1 = SchedulerChainLinking.ComputePayloadHash(bytes);
|
||||
var hash2 = SchedulerChainLinking.ComputePayloadHash(bytes);
|
||||
|
||||
// Assert
|
||||
hash1.Should().HaveCount(32);
|
||||
hash1.Should().BeEquivalentTo(hash2);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ToHex_NullLink_ReturnsNullString()
|
||||
{
|
||||
// Act
|
||||
var result = SchedulerChainLinking.ToHex(null);
|
||||
|
||||
// Assert
|
||||
result.Should().Be("(null)");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ToHex_EmptyLink_ReturnsNullString()
|
||||
{
|
||||
// Act
|
||||
var result = SchedulerChainLinking.ToHex(Array.Empty<byte>());
|
||||
|
||||
// Assert
|
||||
result.Should().Be("(null)");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ToHex_ValidLink_ReturnsLowercaseHex()
|
||||
{
|
||||
// Arrange
|
||||
var link = new byte[] { 0xAB, 0xCD, 0xEF };
|
||||
|
||||
// Act
|
||||
var result = SchedulerChainLinking.ToHex(link);
|
||||
|
||||
// Assert
|
||||
result.Should().Be("abcdef");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ChainIntegrity_SequentialLinks_FormValidChain()
|
||||
{
|
||||
// Arrange - Simulate a chain of 5 entries
|
||||
var jobIds = Enumerable.Range(1, 5).Select(i => Guid.NewGuid()).ToList();
|
||||
var payloads = jobIds.Select(id => SchedulerChainLinking.ComputePayloadHash(new { JobId = id })).ToList();
|
||||
|
||||
var links = new List<byte[]>();
|
||||
byte[]? prevLink = null;
|
||||
long baseTime = 1704067200000L;
|
||||
|
||||
// Act - Build chain
|
||||
for (int i = 0; i < 5; i++)
|
||||
{
|
||||
var hlc = new HlcTimestamp { PhysicalTime = baseTime + i, NodeId = "node1", LogicalCounter = i };
|
||||
var link = SchedulerChainLinking.ComputeLink(prevLink, jobIds[i], hlc, payloads[i]);
|
||||
links.Add(link);
|
||||
prevLink = link;
|
||||
}
|
||||
|
||||
// Assert - Verify chain integrity
|
||||
byte[]? expectedPrev = null;
|
||||
for (int i = 0; i < 5; i++)
|
||||
{
|
||||
var hlc = new HlcTimestamp { PhysicalTime = baseTime + i, NodeId = "node1", LogicalCounter = i };
|
||||
var isValid = SchedulerChainLinking.VerifyLink(links[i], expectedPrev, jobIds[i], hlc, payloads[i]);
|
||||
isValid.Should().BeTrue($"Link {i} should be valid");
|
||||
expectedPrev = links[i];
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ChainIntegrity_TamperedMiddleLink_BreaksChain()
|
||||
{
|
||||
// Arrange - Build a chain of 3 entries
|
||||
var jobIds = new[] { Guid.NewGuid(), Guid.NewGuid(), Guid.NewGuid() };
|
||||
var payloads = jobIds.Select(id => SchedulerChainLinking.ComputePayloadHash(new { JobId = id })).ToArray();
|
||||
var hlcs = new[]
|
||||
{
|
||||
new HlcTimestamp { PhysicalTime = 1000L, NodeId = "node1", LogicalCounter = 0 },
|
||||
new HlcTimestamp { PhysicalTime = 1001L, NodeId = "node1", LogicalCounter = 0 },
|
||||
new HlcTimestamp { PhysicalTime = 1002L, NodeId = "node1", LogicalCounter = 0 }
|
||||
};
|
||||
|
||||
var link0 = SchedulerChainLinking.ComputeLink(null, jobIds[0], hlcs[0], payloads[0]);
|
||||
var link1 = SchedulerChainLinking.ComputeLink(link0, jobIds[1], hlcs[1], payloads[1]);
|
||||
var link2 = SchedulerChainLinking.ComputeLink(link1, jobIds[2], hlcs[2], payloads[2]);
|
||||
|
||||
// Tamper with middle link
|
||||
var tamperedLink1 = (byte[])link1.Clone();
|
||||
tamperedLink1[0] ^= 0xFF;
|
||||
|
||||
// Act & Assert - First link is still valid
|
||||
SchedulerChainLinking.VerifyLink(link0, null, jobIds[0], hlcs[0], payloads[0])
|
||||
.Should().BeTrue("First link should be valid");
|
||||
|
||||
// Middle link verification fails
|
||||
SchedulerChainLinking.VerifyLink(tamperedLink1, link0, jobIds[1], hlcs[1], payloads[1])
|
||||
.Should().BeFalse("Tampered middle link should fail verification");
|
||||
|
||||
// Third link verification fails because prev_link is wrong
|
||||
SchedulerChainLinking.VerifyLink(link2, tamperedLink1, jobIds[2], hlcs[2], payloads[2])
|
||||
.Should().BeFalse("Third link should fail with tampered prev_link");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,427 @@
|
||||
// <copyright file="HlcQueueIntegrationTests.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading.Tasks;
|
||||
using FluentAssertions;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using StackExchange.Redis;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Models;
|
||||
using StellaOps.Scheduler.Queue.Redis;
|
||||
using StellaOps.TestKit;
|
||||
using Testcontainers.Redis;
|
||||
using Xunit;
|
||||
|
||||
using HybridLogicalClockImpl = StellaOps.HybridLogicalClock.HybridLogicalClock;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// Integration tests for HLC (Hybrid Logical Clock) integration with scheduler queues.
|
||||
/// </summary>
|
||||
[Trait("Category", TestCategories.Integration)]
|
||||
public sealed class HlcQueueIntegrationTests : IAsyncLifetime
|
||||
{
|
||||
private readonly RedisContainer _redis;
|
||||
private string? _skipReason;
|
||||
|
||||
public HlcQueueIntegrationTests()
|
||||
{
|
||||
_redis = new RedisBuilder().Build();
|
||||
}
|
||||
|
||||
public async ValueTask InitializeAsync()
|
||||
{
|
||||
try
|
||||
{
|
||||
await _redis.StartAsync();
|
||||
}
|
||||
catch (Exception ex) when (IsDockerUnavailable(ex))
|
||||
{
|
||||
_skipReason = $"Docker engine is not available for Redis-backed tests: {ex.Message}";
|
||||
}
|
||||
}
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
if (_skipReason is not null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
await _redis.DisposeAsync().AsTask();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task PlannerQueue_WithHlc_LeasedMessageContainsHlcTimestamp()
|
||||
{
|
||||
if (SkipIfUnavailable())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var options = CreateOptions();
|
||||
var hlc = new HybridLogicalClockImpl(TimeProvider.System, "test-node-1", new InMemoryHlcStateStore());
|
||||
|
||||
await using var queue = new RedisSchedulerPlannerQueue(
|
||||
options,
|
||||
options.Redis,
|
||||
NullLogger<RedisSchedulerPlannerQueue>.Instance,
|
||||
TimeProvider.System,
|
||||
hlc,
|
||||
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
|
||||
|
||||
var message = CreatePlannerMessage();
|
||||
|
||||
var enqueueResult = await queue.EnqueueAsync(message);
|
||||
enqueueResult.Deduplicated.Should().BeFalse();
|
||||
|
||||
var leases = await queue.LeaseAsync(new SchedulerQueueLeaseRequest("planner-hlc", batchSize: 1, options.DefaultLeaseDuration));
|
||||
leases.Should().ContainSingle();
|
||||
|
||||
var lease = leases[0];
|
||||
lease.HlcTimestamp.Should().NotBeNull("HLC timestamp should be present when HLC is configured");
|
||||
lease.HlcTimestamp!.Value.NodeId.Should().Be("test-node-1");
|
||||
lease.HlcTimestamp.Value.PhysicalTime.Should().BeGreaterThan(0);
|
||||
|
||||
await lease.AcknowledgeAsync();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task RunnerQueue_WithHlc_LeasedMessageContainsHlcTimestamp()
|
||||
{
|
||||
if (SkipIfUnavailable())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var options = CreateOptions();
|
||||
var hlc = new HybridLogicalClockImpl(TimeProvider.System, "runner-node-1", new InMemoryHlcStateStore());
|
||||
|
||||
await using var queue = new RedisSchedulerRunnerQueue(
|
||||
options,
|
||||
options.Redis,
|
||||
NullLogger<RedisSchedulerRunnerQueue>.Instance,
|
||||
TimeProvider.System,
|
||||
hlc,
|
||||
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
|
||||
|
||||
var message = CreateRunnerMessage();
|
||||
|
||||
await queue.EnqueueAsync(message);
|
||||
|
||||
var leases = await queue.LeaseAsync(new SchedulerQueueLeaseRequest("runner-hlc", batchSize: 1, options.DefaultLeaseDuration));
|
||||
leases.Should().ContainSingle();
|
||||
|
||||
var lease = leases[0];
|
||||
lease.HlcTimestamp.Should().NotBeNull("HLC timestamp should be present when HLC is configured");
|
||||
lease.HlcTimestamp!.Value.NodeId.Should().Be("runner-node-1");
|
||||
|
||||
await lease.AcknowledgeAsync();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task PlannerQueue_WithoutHlc_LeasedMessageHasNullTimestamp()
|
||||
{
|
||||
if (SkipIfUnavailable())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var options = CreateOptions();
|
||||
|
||||
// No HLC provided
|
||||
await using var queue = new RedisSchedulerPlannerQueue(
|
||||
options,
|
||||
options.Redis,
|
||||
NullLogger<RedisSchedulerPlannerQueue>.Instance,
|
||||
TimeProvider.System,
|
||||
hlc: null,
|
||||
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
|
||||
|
||||
var message = CreatePlannerMessage();
|
||||
await queue.EnqueueAsync(message);
|
||||
|
||||
var leases = await queue.LeaseAsync(new SchedulerQueueLeaseRequest("planner-no-hlc", batchSize: 1, options.DefaultLeaseDuration));
|
||||
leases.Should().ContainSingle();
|
||||
|
||||
var lease = leases[0];
|
||||
lease.HlcTimestamp.Should().BeNull("HLC timestamp should be null when HLC is not configured");
|
||||
|
||||
await lease.AcknowledgeAsync();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task HlcTimestamp_IsMonotonicallyIncreasing_AcrossEnqueues()
|
||||
{
|
||||
if (SkipIfUnavailable())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var options = CreateOptions();
|
||||
var hlc = new HybridLogicalClockImpl(TimeProvider.System, "monotonic-test", new InMemoryHlcStateStore());
|
||||
|
||||
await using var queue = new RedisSchedulerPlannerQueue(
|
||||
options,
|
||||
options.Redis,
|
||||
NullLogger<RedisSchedulerPlannerQueue>.Instance,
|
||||
TimeProvider.System,
|
||||
hlc,
|
||||
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
|
||||
|
||||
// Enqueue multiple messages
|
||||
var messages = new List<PlannerQueueMessage>();
|
||||
for (int i = 0; i < 5; i++)
|
||||
{
|
||||
messages.Add(CreatePlannerMessage(suffix: i.ToString()));
|
||||
}
|
||||
|
||||
foreach (var msg in messages)
|
||||
{
|
||||
await queue.EnqueueAsync(msg);
|
||||
}
|
||||
|
||||
// Lease all messages
|
||||
var leases = await queue.LeaseAsync(new SchedulerQueueLeaseRequest("monotonic-consumer", batchSize: 10, options.DefaultLeaseDuration));
|
||||
leases.Should().HaveCount(5);
|
||||
|
||||
// Verify HLC timestamps are monotonically increasing
|
||||
HlcTimestamp? previousHlc = null;
|
||||
foreach (var lease in leases)
|
||||
{
|
||||
lease.HlcTimestamp.Should().NotBeNull();
|
||||
|
||||
if (previousHlc.HasValue)
|
||||
{
|
||||
var current = lease.HlcTimestamp!.Value;
|
||||
var prev = previousHlc.Value;
|
||||
|
||||
// Current should be greater than previous
|
||||
(current > prev).Should().BeTrue(
|
||||
$"HLC {current} should be greater than {prev}");
|
||||
}
|
||||
|
||||
previousHlc = lease.HlcTimestamp;
|
||||
await lease.AcknowledgeAsync();
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task HlcTimestamp_SortableString_ParsesCorrectly()
|
||||
{
|
||||
if (SkipIfUnavailable())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var options = CreateOptions();
|
||||
var hlc = new HybridLogicalClockImpl(TimeProvider.System, "parse-test-node", new InMemoryHlcStateStore());
|
||||
|
||||
await using var queue = new RedisSchedulerPlannerQueue(
|
||||
options,
|
||||
options.Redis,
|
||||
NullLogger<RedisSchedulerPlannerQueue>.Instance,
|
||||
TimeProvider.System,
|
||||
hlc,
|
||||
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
|
||||
|
||||
var message = CreatePlannerMessage();
|
||||
await queue.EnqueueAsync(message);
|
||||
|
||||
var leases = await queue.LeaseAsync(new SchedulerQueueLeaseRequest("parse-consumer", batchSize: 1, options.DefaultLeaseDuration));
|
||||
leases.Should().ContainSingle();
|
||||
|
||||
var lease = leases[0];
|
||||
lease.HlcTimestamp.Should().NotBeNull();
|
||||
|
||||
// Verify round-trip through sortable string
|
||||
var hlcValue = lease.HlcTimestamp!.Value;
|
||||
var sortableString = hlcValue.ToSortableString();
|
||||
|
||||
HlcTimestamp.TryParse(sortableString, out var parsed).Should().BeTrue();
|
||||
parsed.Should().Be(hlcValue);
|
||||
|
||||
await lease.AcknowledgeAsync();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task HlcTimestamp_DeterministicForSameInput_OnSameNode()
|
||||
{
|
||||
if (SkipIfUnavailable())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// This test verifies that HLC generates consistent timestamps
|
||||
// by checking that timestamps from the same node use the same node ID
|
||||
// and that logical counters increment correctly at same physical time
|
||||
|
||||
var options = CreateOptions();
|
||||
var hlc = new HybridLogicalClockImpl(TimeProvider.System, "determinism-node", new InMemoryHlcStateStore());
|
||||
|
||||
await using var queue = new RedisSchedulerPlannerQueue(
|
||||
options,
|
||||
options.Redis,
|
||||
NullLogger<RedisSchedulerPlannerQueue>.Instance,
|
||||
TimeProvider.System,
|
||||
hlc,
|
||||
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
|
||||
|
||||
// Enqueue rapidly to potentially hit same physical time
|
||||
var timestamps = new List<HlcTimestamp>();
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
var message = CreatePlannerMessage(suffix: $"determinism-{i}");
|
||||
await queue.EnqueueAsync(message);
|
||||
}
|
||||
|
||||
var leases = await queue.LeaseAsync(new SchedulerQueueLeaseRequest("determinism-consumer", batchSize: 20, options.DefaultLeaseDuration));
|
||||
leases.Should().HaveCount(10);
|
||||
|
||||
foreach (var lease in leases)
|
||||
{
|
||||
lease.HlcTimestamp.Should().NotBeNull();
|
||||
timestamps.Add(lease.HlcTimestamp!.Value);
|
||||
await lease.AcknowledgeAsync();
|
||||
}
|
||||
|
||||
// All timestamps should have same node ID
|
||||
foreach (var ts in timestamps)
|
||||
{
|
||||
ts.NodeId.Should().Be("determinism-node");
|
||||
}
|
||||
|
||||
// Verify strict ordering (no duplicates)
|
||||
for (int i = 1; i < timestamps.Count; i++)
|
||||
{
|
||||
(timestamps[i] > timestamps[i - 1]).Should().BeTrue(
|
||||
$"Timestamp {i} ({timestamps[i]}) should be greater than {i - 1} ({timestamps[i - 1]})");
|
||||
}
|
||||
}
|
||||
|
||||
private SchedulerQueueOptions CreateOptions()
|
||||
{
|
||||
var unique = Guid.NewGuid().ToString("N");
|
||||
|
||||
return new SchedulerQueueOptions
|
||||
{
|
||||
Kind = SchedulerQueueTransportKind.Redis,
|
||||
DefaultLeaseDuration = TimeSpan.FromSeconds(30),
|
||||
MaxDeliveryAttempts = 5,
|
||||
RetryInitialBackoff = TimeSpan.FromMilliseconds(10),
|
||||
RetryMaxBackoff = TimeSpan.FromMilliseconds(50),
|
||||
Redis = new SchedulerRedisQueueOptions
|
||||
{
|
||||
ConnectionString = _redis.GetConnectionString(),
|
||||
Database = 0,
|
||||
InitializationTimeout = TimeSpan.FromSeconds(10),
|
||||
Planner = new RedisSchedulerStreamOptions
|
||||
{
|
||||
Stream = $"scheduler:hlc-test:planner:{unique}",
|
||||
ConsumerGroup = $"planner-hlc-{unique}",
|
||||
DeadLetterStream = $"scheduler:hlc-test:planner:{unique}:dead",
|
||||
IdempotencyKeyPrefix = $"scheduler:hlc-test:planner:{unique}:idemp:",
|
||||
IdempotencyWindow = TimeSpan.FromMinutes(5)
|
||||
},
|
||||
Runner = new RedisSchedulerStreamOptions
|
||||
{
|
||||
Stream = $"scheduler:hlc-test:runner:{unique}",
|
||||
ConsumerGroup = $"runner-hlc-{unique}",
|
||||
DeadLetterStream = $"scheduler:hlc-test:runner:{unique}:dead",
|
||||
IdempotencyKeyPrefix = $"scheduler:hlc-test:runner:{unique}:idemp:",
|
||||
IdempotencyWindow = TimeSpan.FromMinutes(5)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private bool SkipIfUnavailable()
|
||||
{
|
||||
if (_skipReason is not null)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private static bool IsDockerUnavailable(Exception exception)
|
||||
{
|
||||
while (exception is AggregateException aggregate && aggregate.InnerException is not null)
|
||||
{
|
||||
exception = aggregate.InnerException;
|
||||
}
|
||||
|
||||
return exception is TimeoutException
|
||||
|| exception.GetType().Name.Contains("Docker", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private static PlannerQueueMessage CreatePlannerMessage(string suffix = "")
|
||||
{
|
||||
var id = string.IsNullOrEmpty(suffix) ? "run-hlc-test" : $"run-hlc-test-{suffix}";
|
||||
|
||||
var schedule = new Schedule(
|
||||
id: "sch-hlc-test",
|
||||
tenantId: "tenant-hlc",
|
||||
name: "HLC Test",
|
||||
enabled: true,
|
||||
cronExpression: "0 0 * * *",
|
||||
timezone: "UTC",
|
||||
mode: ScheduleMode.AnalysisOnly,
|
||||
selection: new Selector(SelectorScope.AllImages, tenantId: "tenant-hlc"),
|
||||
onlyIf: ScheduleOnlyIf.Default,
|
||||
notify: ScheduleNotify.Default,
|
||||
limits: ScheduleLimits.Default,
|
||||
createdAt: DateTimeOffset.UtcNow,
|
||||
createdBy: "tests",
|
||||
updatedAt: DateTimeOffset.UtcNow,
|
||||
updatedBy: "tests");
|
||||
|
||||
var run = new Run(
|
||||
id: id,
|
||||
tenantId: "tenant-hlc",
|
||||
trigger: RunTrigger.Manual,
|
||||
state: RunState.Planning,
|
||||
stats: RunStats.Empty,
|
||||
createdAt: DateTimeOffset.UtcNow,
|
||||
reason: RunReason.Empty,
|
||||
scheduleId: schedule.Id);
|
||||
|
||||
var impactSet = new ImpactSet(
|
||||
selector: new Selector(SelectorScope.AllImages, tenantId: "tenant-hlc"),
|
||||
images: new[]
|
||||
{
|
||||
new ImpactImage(
|
||||
imageDigest: "sha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc",
|
||||
registry: "registry",
|
||||
repository: "repo",
|
||||
namespaces: new[] { "prod" },
|
||||
tags: new[] { "latest" })
|
||||
},
|
||||
usageOnly: true,
|
||||
generatedAt: DateTimeOffset.UtcNow,
|
||||
total: 1);
|
||||
|
||||
return new PlannerQueueMessage(run, impactSet, schedule, correlationId: $"corr-hlc-{suffix}");
|
||||
}
|
||||
|
||||
private static RunnerSegmentQueueMessage CreateRunnerMessage()
|
||||
{
|
||||
return new RunnerSegmentQueueMessage(
|
||||
segmentId: "segment-hlc-test",
|
||||
runId: "run-hlc-test",
|
||||
tenantId: "tenant-hlc",
|
||||
imageDigests: new[]
|
||||
{
|
||||
"sha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd"
|
||||
},
|
||||
scheduleId: "sch-hlc-test",
|
||||
ratePerSecond: 10,
|
||||
usageOnly: true,
|
||||
attributes: new Dictionary<string, string> { ["priority"] = "normal" },
|
||||
correlationId: "corr-runner-hlc");
|
||||
}
|
||||
}
|
||||
@@ -62,7 +62,8 @@ public sealed class RedisSchedulerQueueTests : IAsyncLifetime
|
||||
options.Redis,
|
||||
NullLogger<RedisSchedulerPlannerQueue>.Instance,
|
||||
TimeProvider.System,
|
||||
async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
|
||||
hlc: null,
|
||||
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
|
||||
|
||||
var message = TestData.CreatePlannerMessage();
|
||||
|
||||
@@ -101,7 +102,8 @@ public sealed class RedisSchedulerQueueTests : IAsyncLifetime
|
||||
options.Redis,
|
||||
NullLogger<RedisSchedulerRunnerQueue>.Instance,
|
||||
TimeProvider.System,
|
||||
async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
|
||||
hlc: null,
|
||||
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
|
||||
|
||||
var message = TestData.CreateRunnerMessage();
|
||||
|
||||
@@ -136,7 +138,8 @@ public sealed class RedisSchedulerQueueTests : IAsyncLifetime
|
||||
options.Redis,
|
||||
NullLogger<RedisSchedulerPlannerQueue>.Instance,
|
||||
TimeProvider.System,
|
||||
async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
|
||||
hlc: null,
|
||||
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
|
||||
|
||||
var message = TestData.CreatePlannerMessage();
|
||||
await queue.EnqueueAsync(message);
|
||||
@@ -170,7 +173,8 @@ public sealed class RedisSchedulerQueueTests : IAsyncLifetime
|
||||
options.Redis,
|
||||
NullLogger<RedisSchedulerPlannerQueue>.Instance,
|
||||
TimeProvider.System,
|
||||
async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
|
||||
hlc: null,
|
||||
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
|
||||
|
||||
var message = TestData.CreatePlannerMessage();
|
||||
|
||||
@@ -208,7 +212,8 @@ public sealed class RedisSchedulerQueueTests : IAsyncLifetime
|
||||
options.Redis,
|
||||
NullLogger<RedisSchedulerRunnerQueue>.Instance,
|
||||
TimeProvider.System,
|
||||
async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
|
||||
hlc: null,
|
||||
connectionFactory: async config => (IConnectionMultiplexer)await ConnectionMultiplexer.ConnectAsync(config).ConfigureAwait(false));
|
||||
|
||||
var message = TestData.CreateRunnerMessage();
|
||||
await queue.EnqueueAsync(message);
|
||||
|
||||
Reference in New Issue
Block a user