save progress
This commit is contained in:
@@ -0,0 +1,292 @@
|
||||
// <copyright file="SchedulerChainVerifier.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Scheduler.Persistence;
|
||||
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
|
||||
|
||||
namespace StellaOps.Scheduler.Queue.Hlc;
|
||||
|
||||
/// <summary>
|
||||
/// Service for verifying the integrity of the scheduler chain.
|
||||
/// </summary>
|
||||
public interface ISchedulerChainVerifier
|
||||
{
|
||||
/// <summary>
|
||||
/// Verifies the integrity of the scheduler chain within an HLC range.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="startHlc">Start of the HLC range (inclusive, null for unbounded).</param>
|
||||
/// <param name="endHlc">End of the HLC range (inclusive, null for unbounded).</param>
|
||||
/// <param name="partitionKey">Optional partition key to verify (null for all partitions).</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Verification result.</returns>
|
||||
Task<ChainVerificationResult> VerifyAsync(
|
||||
string tenantId,
|
||||
HlcTimestamp? startHlc = null,
|
||||
HlcTimestamp? endHlc = null,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Verifies a single chain link.
|
||||
/// </summary>
|
||||
/// <param name="tenantId">Tenant identifier.</param>
|
||||
/// <param name="jobId">The job identifier to verify.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Verification result for the single entry.</returns>
|
||||
Task<ChainVerificationResult> VerifyEntryAsync(
|
||||
string tenantId,
|
||||
Guid jobId,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of chain verification.
|
||||
/// </summary>
|
||||
/// <param name="IsValid">Whether the chain is valid.</param>
|
||||
/// <param name="EntriesChecked">Number of entries checked.</param>
|
||||
/// <param name="Issues">List of verification issues found.</param>
|
||||
public readonly record struct ChainVerificationResult(
|
||||
bool IsValid,
|
||||
int EntriesChecked,
|
||||
IReadOnlyList<ChainVerificationIssue> Issues);
|
||||
|
||||
/// <summary>
|
||||
/// A specific issue found during chain verification.
|
||||
/// </summary>
|
||||
/// <param name="JobId">The job ID where the issue was found.</param>
|
||||
/// <param name="THlc">The HLC timestamp of the problematic entry.</param>
|
||||
/// <param name="IssueType">Type of issue found.</param>
|
||||
/// <param name="Description">Human-readable description of the issue.</param>
|
||||
public readonly record struct ChainVerificationIssue(
|
||||
Guid JobId,
|
||||
string THlc,
|
||||
string IssueType,
|
||||
string Description);
|
||||
|
||||
/// <summary>
|
||||
/// Implementation of scheduler chain verification.
|
||||
/// </summary>
|
||||
public sealed class SchedulerChainVerifier : ISchedulerChainVerifier
|
||||
{
|
||||
private readonly ISchedulerLogRepository _logRepository;
|
||||
private readonly ILogger<SchedulerChainVerifier> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new chain verifier.
|
||||
/// </summary>
|
||||
public SchedulerChainVerifier(
|
||||
ISchedulerLogRepository logRepository,
|
||||
ILogger<SchedulerChainVerifier> logger)
|
||||
{
|
||||
_logRepository = logRepository ?? throw new ArgumentNullException(nameof(logRepository));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<ChainVerificationResult> VerifyAsync(
|
||||
string tenantId,
|
||||
HlcTimestamp? startHlc = null,
|
||||
HlcTimestamp? endHlc = null,
|
||||
string? partitionKey = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
|
||||
var startT = startHlc?.ToSortableString();
|
||||
var endT = endHlc?.ToSortableString();
|
||||
|
||||
var entries = await _logRepository.GetByHlcRangeAsync(
|
||||
tenantId,
|
||||
startT,
|
||||
endT,
|
||||
limit: 0, // No limit
|
||||
partitionKey,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (entries.Count == 0)
|
||||
{
|
||||
_logger.LogDebug(
|
||||
"No entries to verify in range [{Start}, {End}] for tenant {TenantId}",
|
||||
startT ?? "(unbounded)",
|
||||
endT ?? "(unbounded)",
|
||||
tenantId);
|
||||
|
||||
return new ChainVerificationResult(IsValid: true, EntriesChecked: 0, Issues: []);
|
||||
}
|
||||
|
||||
var issues = new List<ChainVerificationIssue>();
|
||||
byte[]? expectedPrevLink = null;
|
||||
|
||||
// If starting mid-chain, we need to get the previous entry's link
|
||||
if (startHlc is not null)
|
||||
{
|
||||
var previousEntries = await _logRepository.GetByHlcRangeAsync(
|
||||
tenantId,
|
||||
startTHlc: null,
|
||||
startT,
|
||||
limit: 1,
|
||||
partitionKey,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (previousEntries.Count > 0 && previousEntries[0].THlc != startT)
|
||||
{
|
||||
expectedPrevLink = previousEntries[0].Link;
|
||||
}
|
||||
}
|
||||
|
||||
foreach (var entry in entries)
|
||||
{
|
||||
// Verify prev_link matches expected
|
||||
if (!ByteArrayEquals(entry.PrevLink, expectedPrevLink))
|
||||
{
|
||||
issues.Add(new ChainVerificationIssue(
|
||||
entry.JobId,
|
||||
entry.THlc,
|
||||
"PrevLinkMismatch",
|
||||
$"Expected {ToHex(expectedPrevLink)}, got {ToHex(entry.PrevLink)}"));
|
||||
}
|
||||
|
||||
// Recompute link and verify
|
||||
var computed = SchedulerChainLinking.ComputeLink(
|
||||
entry.PrevLink,
|
||||
entry.JobId,
|
||||
HlcTimestamp.Parse(entry.THlc),
|
||||
entry.PayloadHash);
|
||||
|
||||
if (!ByteArrayEquals(entry.Link, computed))
|
||||
{
|
||||
issues.Add(new ChainVerificationIssue(
|
||||
entry.JobId,
|
||||
entry.THlc,
|
||||
"LinkMismatch",
|
||||
$"Stored link doesn't match computed. Stored={ToHex(entry.Link)}, Computed={ToHex(computed)}"));
|
||||
}
|
||||
|
||||
expectedPrevLink = entry.Link;
|
||||
}
|
||||
|
||||
var isValid = issues.Count == 0;
|
||||
|
||||
_logger.LogInformation(
|
||||
"Chain verification complete. TenantId={TenantId}, Range=[{Start}, {End}], EntriesChecked={Count}, IsValid={IsValid}, IssueCount={IssueCount}",
|
||||
tenantId,
|
||||
startT ?? "(unbounded)",
|
||||
endT ?? "(unbounded)",
|
||||
entries.Count,
|
||||
isValid,
|
||||
issues.Count);
|
||||
|
||||
return new ChainVerificationResult(isValid, entries.Count, issues);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<ChainVerificationResult> VerifyEntryAsync(
|
||||
string tenantId,
|
||||
Guid jobId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
|
||||
|
||||
var entry = await _logRepository.GetByJobIdAsync(jobId, cancellationToken).ConfigureAwait(false);
|
||||
if (entry is null)
|
||||
{
|
||||
return new ChainVerificationResult(
|
||||
IsValid: false,
|
||||
EntriesChecked: 0,
|
||||
Issues: [new ChainVerificationIssue(jobId, string.Empty, "NotFound", "Entry not found")]);
|
||||
}
|
||||
|
||||
// Verify tenant isolation
|
||||
if (!string.Equals(entry.TenantId, tenantId, StringComparison.Ordinal))
|
||||
{
|
||||
return new ChainVerificationResult(
|
||||
IsValid: false,
|
||||
EntriesChecked: 0,
|
||||
Issues: [new ChainVerificationIssue(jobId, entry.THlc, "TenantMismatch", "Entry belongs to different tenant")]);
|
||||
}
|
||||
|
||||
var issues = new List<ChainVerificationIssue>();
|
||||
|
||||
// Recompute link and verify
|
||||
var computed = SchedulerChainLinking.ComputeLink(
|
||||
entry.PrevLink,
|
||||
entry.JobId,
|
||||
HlcTimestamp.Parse(entry.THlc),
|
||||
entry.PayloadHash);
|
||||
|
||||
if (!ByteArrayEquals(entry.Link, computed))
|
||||
{
|
||||
issues.Add(new ChainVerificationIssue(
|
||||
entry.JobId,
|
||||
entry.THlc,
|
||||
"LinkMismatch",
|
||||
$"Stored link doesn't match computed"));
|
||||
}
|
||||
|
||||
// If there's a prev_link, verify it exists and matches
|
||||
if (entry.PrevLink is { Length: > 0 })
|
||||
{
|
||||
// Find the previous entry
|
||||
var allEntries = await _logRepository.GetByHlcRangeAsync(
|
||||
tenantId,
|
||||
startTHlc: null,
|
||||
entry.THlc,
|
||||
limit: 0,
|
||||
partitionKey: entry.PartitionKey,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var prevEntry = allEntries
|
||||
.Where(e => e.THlc != entry.THlc)
|
||||
.OrderByDescending(e => e.THlc)
|
||||
.FirstOrDefault();
|
||||
|
||||
if (prevEntry is null)
|
||||
{
|
||||
issues.Add(new ChainVerificationIssue(
|
||||
entry.JobId,
|
||||
entry.THlc,
|
||||
"PrevEntryNotFound",
|
||||
"Entry has prev_link but no previous entry found"));
|
||||
}
|
||||
else if (!ByteArrayEquals(prevEntry.Link, entry.PrevLink))
|
||||
{
|
||||
issues.Add(new ChainVerificationIssue(
|
||||
entry.JobId,
|
||||
entry.THlc,
|
||||
"PrevLinkMismatch",
|
||||
$"prev_link doesn't match previous entry's link"));
|
||||
}
|
||||
}
|
||||
|
||||
return new ChainVerificationResult(issues.Count == 0, 1, issues);
|
||||
}
|
||||
|
||||
private static bool ByteArrayEquals(byte[]? a, byte[]? b)
|
||||
{
|
||||
if (a is null && b is null)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (a is null || b is null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (a.Length == 0 && b.Length == 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return a.AsSpan().SequenceEqual(b);
|
||||
}
|
||||
|
||||
private static string ToHex(byte[]? bytes)
|
||||
{
|
||||
return bytes is null ? "(null)" : Convert.ToHexString(bytes);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user