Files
git.stella-ops.org/src/Scheduler/__Libraries/StellaOps.Scheduler.Queue/Hlc/SchedulerChainVerifier.cs
StellaOps Bot 37e11918e0 save progress
2026-01-06 09:42:20 +02:00

293 lines
9.9 KiB
C#

// <copyright file="SchedulerChainVerifier.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
using Microsoft.Extensions.Logging;
using StellaOps.HybridLogicalClock;
using StellaOps.Scheduler.Persistence;
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
namespace StellaOps.Scheduler.Queue.Hlc;
/// <summary>
/// Service for verifying the integrity of the scheduler chain.
/// </summary>
public interface ISchedulerChainVerifier
{
/// <summary>
/// Verifies the integrity of the scheduler chain within an HLC range.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="startHlc">Start of the HLC range (inclusive, null for unbounded).</param>
/// <param name="endHlc">End of the HLC range (inclusive, null for unbounded).</param>
/// <param name="partitionKey">Optional partition key to verify (null for all partitions).</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Verification result.</returns>
Task<ChainVerificationResult> VerifyAsync(
string tenantId,
HlcTimestamp? startHlc = null,
HlcTimestamp? endHlc = null,
string? partitionKey = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Verifies a single chain link.
/// </summary>
/// <param name="tenantId">Tenant identifier.</param>
/// <param name="jobId">The job identifier to verify.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Verification result for the single entry.</returns>
Task<ChainVerificationResult> VerifyEntryAsync(
string tenantId,
Guid jobId,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Result of chain verification.
/// </summary>
/// <param name="IsValid">Whether the chain is valid.</param>
/// <param name="EntriesChecked">Number of entries checked.</param>
/// <param name="Issues">List of verification issues found.</param>
public readonly record struct ChainVerificationResult(
bool IsValid,
int EntriesChecked,
IReadOnlyList<ChainVerificationIssue> Issues);
/// <summary>
/// A specific issue found during chain verification.
/// </summary>
/// <param name="JobId">The job ID where the issue was found.</param>
/// <param name="THlc">The HLC timestamp of the problematic entry.</param>
/// <param name="IssueType">Type of issue found.</param>
/// <param name="Description">Human-readable description of the issue.</param>
public readonly record struct ChainVerificationIssue(
Guid JobId,
string THlc,
string IssueType,
string Description);
/// <summary>
/// Implementation of scheduler chain verification.
/// </summary>
public sealed class SchedulerChainVerifier : ISchedulerChainVerifier
{
private readonly ISchedulerLogRepository _logRepository;
private readonly ILogger<SchedulerChainVerifier> _logger;
/// <summary>
/// Creates a new chain verifier.
/// </summary>
public SchedulerChainVerifier(
ISchedulerLogRepository logRepository,
ILogger<SchedulerChainVerifier> logger)
{
_logRepository = logRepository ?? throw new ArgumentNullException(nameof(logRepository));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public async Task<ChainVerificationResult> VerifyAsync(
string tenantId,
HlcTimestamp? startHlc = null,
HlcTimestamp? endHlc = null,
string? partitionKey = null,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
var startT = startHlc?.ToSortableString();
var endT = endHlc?.ToSortableString();
var entries = await _logRepository.GetByHlcRangeAsync(
tenantId,
startT,
endT,
limit: 0, // No limit
partitionKey,
cancellationToken).ConfigureAwait(false);
if (entries.Count == 0)
{
_logger.LogDebug(
"No entries to verify in range [{Start}, {End}] for tenant {TenantId}",
startT ?? "(unbounded)",
endT ?? "(unbounded)",
tenantId);
return new ChainVerificationResult(IsValid: true, EntriesChecked: 0, Issues: []);
}
var issues = new List<ChainVerificationIssue>();
byte[]? expectedPrevLink = null;
// If starting mid-chain, we need to get the previous entry's link
if (startHlc is not null)
{
var previousEntries = await _logRepository.GetByHlcRangeAsync(
tenantId,
startTHlc: null,
startT,
limit: 1,
partitionKey,
cancellationToken).ConfigureAwait(false);
if (previousEntries.Count > 0 && previousEntries[0].THlc != startT)
{
expectedPrevLink = previousEntries[0].Link;
}
}
foreach (var entry in entries)
{
// Verify prev_link matches expected
if (!ByteArrayEquals(entry.PrevLink, expectedPrevLink))
{
issues.Add(new ChainVerificationIssue(
entry.JobId,
entry.THlc,
"PrevLinkMismatch",
$"Expected {ToHex(expectedPrevLink)}, got {ToHex(entry.PrevLink)}"));
}
// Recompute link and verify
var computed = SchedulerChainLinking.ComputeLink(
entry.PrevLink,
entry.JobId,
HlcTimestamp.Parse(entry.THlc),
entry.PayloadHash);
if (!ByteArrayEquals(entry.Link, computed))
{
issues.Add(new ChainVerificationIssue(
entry.JobId,
entry.THlc,
"LinkMismatch",
$"Stored link doesn't match computed. Stored={ToHex(entry.Link)}, Computed={ToHex(computed)}"));
}
expectedPrevLink = entry.Link;
}
var isValid = issues.Count == 0;
_logger.LogInformation(
"Chain verification complete. TenantId={TenantId}, Range=[{Start}, {End}], EntriesChecked={Count}, IsValid={IsValid}, IssueCount={IssueCount}",
tenantId,
startT ?? "(unbounded)",
endT ?? "(unbounded)",
entries.Count,
isValid,
issues.Count);
return new ChainVerificationResult(isValid, entries.Count, issues);
}
/// <inheritdoc />
public async Task<ChainVerificationResult> VerifyEntryAsync(
string tenantId,
Guid jobId,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
var entry = await _logRepository.GetByJobIdAsync(jobId, cancellationToken).ConfigureAwait(false);
if (entry is null)
{
return new ChainVerificationResult(
IsValid: false,
EntriesChecked: 0,
Issues: [new ChainVerificationIssue(jobId, string.Empty, "NotFound", "Entry not found")]);
}
// Verify tenant isolation
if (!string.Equals(entry.TenantId, tenantId, StringComparison.Ordinal))
{
return new ChainVerificationResult(
IsValid: false,
EntriesChecked: 0,
Issues: [new ChainVerificationIssue(jobId, entry.THlc, "TenantMismatch", "Entry belongs to different tenant")]);
}
var issues = new List<ChainVerificationIssue>();
// Recompute link and verify
var computed = SchedulerChainLinking.ComputeLink(
entry.PrevLink,
entry.JobId,
HlcTimestamp.Parse(entry.THlc),
entry.PayloadHash);
if (!ByteArrayEquals(entry.Link, computed))
{
issues.Add(new ChainVerificationIssue(
entry.JobId,
entry.THlc,
"LinkMismatch",
$"Stored link doesn't match computed"));
}
// If there's a prev_link, verify it exists and matches
if (entry.PrevLink is { Length: > 0 })
{
// Find the previous entry
var allEntries = await _logRepository.GetByHlcRangeAsync(
tenantId,
startTHlc: null,
entry.THlc,
limit: 0,
partitionKey: entry.PartitionKey,
cancellationToken).ConfigureAwait(false);
var prevEntry = allEntries
.Where(e => e.THlc != entry.THlc)
.OrderByDescending(e => e.THlc)
.FirstOrDefault();
if (prevEntry is null)
{
issues.Add(new ChainVerificationIssue(
entry.JobId,
entry.THlc,
"PrevEntryNotFound",
"Entry has prev_link but no previous entry found"));
}
else if (!ByteArrayEquals(prevEntry.Link, entry.PrevLink))
{
issues.Add(new ChainVerificationIssue(
entry.JobId,
entry.THlc,
"PrevLinkMismatch",
$"prev_link doesn't match previous entry's link"));
}
}
return new ChainVerificationResult(issues.Count == 0, 1, issues);
}
private static bool ByteArrayEquals(byte[]? a, byte[]? b)
{
if (a is null && b is null)
{
return true;
}
if (a is null || b is null)
{
return false;
}
if (a.Length == 0 && b.Length == 0)
{
return true;
}
return a.AsSpan().SequenceEqual(b);
}
private static string ToHex(byte[]? bytes)
{
return bytes is null ? "(null)" : Convert.ToHexString(bytes);
}
}