293 lines
9.9 KiB
C#
293 lines
9.9 KiB
C#
// <copyright file="SchedulerChainVerifier.cs" company="StellaOps">
|
|
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
|
// </copyright>
|
|
|
|
using Microsoft.Extensions.Logging;
|
|
using StellaOps.HybridLogicalClock;
|
|
using StellaOps.Scheduler.Persistence;
|
|
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
|
|
|
|
namespace StellaOps.Scheduler.Queue.Hlc;
|
|
|
|
/// <summary>
|
|
/// Service for verifying the integrity of the scheduler chain.
|
|
/// </summary>
|
|
public interface ISchedulerChainVerifier
|
|
{
|
|
/// <summary>
|
|
/// Verifies the integrity of the scheduler chain within an HLC range.
|
|
/// </summary>
|
|
/// <param name="tenantId">Tenant identifier.</param>
|
|
/// <param name="startHlc">Start of the HLC range (inclusive, null for unbounded).</param>
|
|
/// <param name="endHlc">End of the HLC range (inclusive, null for unbounded).</param>
|
|
/// <param name="partitionKey">Optional partition key to verify (null for all partitions).</param>
|
|
/// <param name="cancellationToken">Cancellation token.</param>
|
|
/// <returns>Verification result.</returns>
|
|
Task<ChainVerificationResult> VerifyAsync(
|
|
string tenantId,
|
|
HlcTimestamp? startHlc = null,
|
|
HlcTimestamp? endHlc = null,
|
|
string? partitionKey = null,
|
|
CancellationToken cancellationToken = default);
|
|
|
|
/// <summary>
|
|
/// Verifies a single chain link.
|
|
/// </summary>
|
|
/// <param name="tenantId">Tenant identifier.</param>
|
|
/// <param name="jobId">The job identifier to verify.</param>
|
|
/// <param name="cancellationToken">Cancellation token.</param>
|
|
/// <returns>Verification result for the single entry.</returns>
|
|
Task<ChainVerificationResult> VerifyEntryAsync(
|
|
string tenantId,
|
|
Guid jobId,
|
|
CancellationToken cancellationToken = default);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Result of chain verification.
|
|
/// </summary>
|
|
/// <param name="IsValid">Whether the chain is valid.</param>
|
|
/// <param name="EntriesChecked">Number of entries checked.</param>
|
|
/// <param name="Issues">List of verification issues found.</param>
|
|
public readonly record struct ChainVerificationResult(
|
|
bool IsValid,
|
|
int EntriesChecked,
|
|
IReadOnlyList<ChainVerificationIssue> Issues);
|
|
|
|
/// <summary>
|
|
/// A specific issue found during chain verification.
|
|
/// </summary>
|
|
/// <param name="JobId">The job ID where the issue was found.</param>
|
|
/// <param name="THlc">The HLC timestamp of the problematic entry.</param>
|
|
/// <param name="IssueType">Type of issue found.</param>
|
|
/// <param name="Description">Human-readable description of the issue.</param>
|
|
public readonly record struct ChainVerificationIssue(
|
|
Guid JobId,
|
|
string THlc,
|
|
string IssueType,
|
|
string Description);
|
|
|
|
/// <summary>
|
|
/// Implementation of scheduler chain verification.
|
|
/// </summary>
|
|
public sealed class SchedulerChainVerifier : ISchedulerChainVerifier
|
|
{
|
|
private readonly ISchedulerLogRepository _logRepository;
|
|
private readonly ILogger<SchedulerChainVerifier> _logger;
|
|
|
|
/// <summary>
|
|
/// Creates a new chain verifier.
|
|
/// </summary>
|
|
public SchedulerChainVerifier(
|
|
ISchedulerLogRepository logRepository,
|
|
ILogger<SchedulerChainVerifier> logger)
|
|
{
|
|
_logRepository = logRepository ?? throw new ArgumentNullException(nameof(logRepository));
|
|
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public async Task<ChainVerificationResult> VerifyAsync(
|
|
string tenantId,
|
|
HlcTimestamp? startHlc = null,
|
|
HlcTimestamp? endHlc = null,
|
|
string? partitionKey = null,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
|
|
|
var startT = startHlc?.ToSortableString();
|
|
var endT = endHlc?.ToSortableString();
|
|
|
|
var entries = await _logRepository.GetByHlcRangeAsync(
|
|
tenantId,
|
|
startT,
|
|
endT,
|
|
limit: 0, // No limit
|
|
partitionKey,
|
|
cancellationToken).ConfigureAwait(false);
|
|
|
|
if (entries.Count == 0)
|
|
{
|
|
_logger.LogDebug(
|
|
"No entries to verify in range [{Start}, {End}] for tenant {TenantId}",
|
|
startT ?? "(unbounded)",
|
|
endT ?? "(unbounded)",
|
|
tenantId);
|
|
|
|
return new ChainVerificationResult(IsValid: true, EntriesChecked: 0, Issues: []);
|
|
}
|
|
|
|
var issues = new List<ChainVerificationIssue>();
|
|
byte[]? expectedPrevLink = null;
|
|
|
|
// If starting mid-chain, we need to get the previous entry's link
|
|
if (startHlc is not null)
|
|
{
|
|
var previousEntries = await _logRepository.GetByHlcRangeAsync(
|
|
tenantId,
|
|
startTHlc: null,
|
|
startT,
|
|
limit: 1,
|
|
partitionKey,
|
|
cancellationToken).ConfigureAwait(false);
|
|
|
|
if (previousEntries.Count > 0 && previousEntries[0].THlc != startT)
|
|
{
|
|
expectedPrevLink = previousEntries[0].Link;
|
|
}
|
|
}
|
|
|
|
foreach (var entry in entries)
|
|
{
|
|
// Verify prev_link matches expected
|
|
if (!ByteArrayEquals(entry.PrevLink, expectedPrevLink))
|
|
{
|
|
issues.Add(new ChainVerificationIssue(
|
|
entry.JobId,
|
|
entry.THlc,
|
|
"PrevLinkMismatch",
|
|
$"Expected {ToHex(expectedPrevLink)}, got {ToHex(entry.PrevLink)}"));
|
|
}
|
|
|
|
// Recompute link and verify
|
|
var computed = SchedulerChainLinking.ComputeLink(
|
|
entry.PrevLink,
|
|
entry.JobId,
|
|
HlcTimestamp.Parse(entry.THlc),
|
|
entry.PayloadHash);
|
|
|
|
if (!ByteArrayEquals(entry.Link, computed))
|
|
{
|
|
issues.Add(new ChainVerificationIssue(
|
|
entry.JobId,
|
|
entry.THlc,
|
|
"LinkMismatch",
|
|
$"Stored link doesn't match computed. Stored={ToHex(entry.Link)}, Computed={ToHex(computed)}"));
|
|
}
|
|
|
|
expectedPrevLink = entry.Link;
|
|
}
|
|
|
|
var isValid = issues.Count == 0;
|
|
|
|
_logger.LogInformation(
|
|
"Chain verification complete. TenantId={TenantId}, Range=[{Start}, {End}], EntriesChecked={Count}, IsValid={IsValid}, IssueCount={IssueCount}",
|
|
tenantId,
|
|
startT ?? "(unbounded)",
|
|
endT ?? "(unbounded)",
|
|
entries.Count,
|
|
isValid,
|
|
issues.Count);
|
|
|
|
return new ChainVerificationResult(isValid, entries.Count, issues);
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public async Task<ChainVerificationResult> VerifyEntryAsync(
|
|
string tenantId,
|
|
Guid jobId,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
|
|
|
var entry = await _logRepository.GetByJobIdAsync(jobId, cancellationToken).ConfigureAwait(false);
|
|
if (entry is null)
|
|
{
|
|
return new ChainVerificationResult(
|
|
IsValid: false,
|
|
EntriesChecked: 0,
|
|
Issues: [new ChainVerificationIssue(jobId, string.Empty, "NotFound", "Entry not found")]);
|
|
}
|
|
|
|
// Verify tenant isolation
|
|
if (!string.Equals(entry.TenantId, tenantId, StringComparison.Ordinal))
|
|
{
|
|
return new ChainVerificationResult(
|
|
IsValid: false,
|
|
EntriesChecked: 0,
|
|
Issues: [new ChainVerificationIssue(jobId, entry.THlc, "TenantMismatch", "Entry belongs to different tenant")]);
|
|
}
|
|
|
|
var issues = new List<ChainVerificationIssue>();
|
|
|
|
// Recompute link and verify
|
|
var computed = SchedulerChainLinking.ComputeLink(
|
|
entry.PrevLink,
|
|
entry.JobId,
|
|
HlcTimestamp.Parse(entry.THlc),
|
|
entry.PayloadHash);
|
|
|
|
if (!ByteArrayEquals(entry.Link, computed))
|
|
{
|
|
issues.Add(new ChainVerificationIssue(
|
|
entry.JobId,
|
|
entry.THlc,
|
|
"LinkMismatch",
|
|
$"Stored link doesn't match computed"));
|
|
}
|
|
|
|
// If there's a prev_link, verify it exists and matches
|
|
if (entry.PrevLink is { Length: > 0 })
|
|
{
|
|
// Find the previous entry
|
|
var allEntries = await _logRepository.GetByHlcRangeAsync(
|
|
tenantId,
|
|
startTHlc: null,
|
|
entry.THlc,
|
|
limit: 0,
|
|
partitionKey: entry.PartitionKey,
|
|
cancellationToken).ConfigureAwait(false);
|
|
|
|
var prevEntry = allEntries
|
|
.Where(e => e.THlc != entry.THlc)
|
|
.OrderByDescending(e => e.THlc)
|
|
.FirstOrDefault();
|
|
|
|
if (prevEntry is null)
|
|
{
|
|
issues.Add(new ChainVerificationIssue(
|
|
entry.JobId,
|
|
entry.THlc,
|
|
"PrevEntryNotFound",
|
|
"Entry has prev_link but no previous entry found"));
|
|
}
|
|
else if (!ByteArrayEquals(prevEntry.Link, entry.PrevLink))
|
|
{
|
|
issues.Add(new ChainVerificationIssue(
|
|
entry.JobId,
|
|
entry.THlc,
|
|
"PrevLinkMismatch",
|
|
$"prev_link doesn't match previous entry's link"));
|
|
}
|
|
}
|
|
|
|
return new ChainVerificationResult(issues.Count == 0, 1, issues);
|
|
}
|
|
|
|
private static bool ByteArrayEquals(byte[]? a, byte[]? b)
|
|
{
|
|
if (a is null && b is null)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
if (a is null || b is null)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
if (a.Length == 0 && b.Length == 0)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
return a.AsSpan().SequenceEqual(b);
|
|
}
|
|
|
|
private static string ToHex(byte[]? bytes)
|
|
{
|
|
return bytes is null ? "(null)" : Convert.ToHexString(bytes);
|
|
}
|
|
}
|