up the blokcing tasks
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Notify Smoke Test / Notifier Service Tests (push) Has been cancelled
Notify Smoke Test / Notification Smoke Test (push) Has been cancelled
Notify Smoke Test / Notify Unit Tests (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
Manifest Integrity / Validate Schema Integrity (push) Has been cancelled
Manifest Integrity / Validate Contract Documents (push) Has been cancelled
Manifest Integrity / Validate Pack Fixtures (push) Has been cancelled
Manifest Integrity / Audit SHA256SUMS Files (push) Has been cancelled
Manifest Integrity / Verify Merkle Roots (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Risk Bundle CI / risk-bundle-build (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Risk Bundle CI / risk-bundle-offline-kit (push) Has been cancelled
Risk Bundle CI / publish-checksums (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
devportal-offline / build-offline (push) Has been cancelled
Mirror Thin Bundle Sign & Verify / mirror-sign (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-12-11 02:32:18 +02:00
parent 92bc4d3a07
commit 49922dff5a
474 changed files with 76071 additions and 12411 deletions

View File

@@ -12,6 +12,7 @@ using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Excititor.Core;
using StellaOps.Excititor.Core.Orchestration;
using StellaOps.Excititor.Core.Storage;
using StellaOps.Excititor.Worker.Options;
namespace StellaOps.Excititor.Worker.Orchestration;
@@ -19,10 +20,12 @@ namespace StellaOps.Excititor.Worker.Orchestration;
/// <summary>
/// Default implementation of <see cref="IVexWorkerOrchestratorClient"/>.
/// Stores heartbeats and artifacts locally and, when configured, mirrors them to the Orchestrator worker API.
/// Per EXCITITOR-ORCH-32/33: Uses append-only checkpoint store for deterministic persistence and replay.
/// </summary>
internal sealed class VexWorkerOrchestratorClient : IVexWorkerOrchestratorClient
{
private readonly IVexConnectorStateRepository _stateRepository;
private readonly IAppendOnlyCheckpointStore? _checkpointStore;
private readonly TimeProvider _timeProvider;
private readonly IOptions<VexWorkerOrchestratorOptions> _options;
private readonly ILogger<VexWorkerOrchestratorClient> _logger;
@@ -36,9 +39,11 @@ internal sealed class VexWorkerOrchestratorClient : IVexWorkerOrchestratorClient
TimeProvider timeProvider,
IOptions<VexWorkerOrchestratorOptions> options,
ILogger<VexWorkerOrchestratorClient> logger,
HttpClient? httpClient = null)
HttpClient? httpClient = null,
IAppendOnlyCheckpointStore? checkpointStore = null)
{
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_checkpointStore = checkpointStore;
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_options = options ?? throw new ArgumentNullException(nameof(options));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
@@ -150,6 +155,18 @@ internal sealed class VexWorkerOrchestratorClient : IVexWorkerOrchestratorClient
heartbeat.LastArtifactHash);
}
// Log to append-only checkpoint store (EXCITITOR-ORCH-32/33)
await LogCheckpointMutationAsync(
context,
CheckpointMutation.Heartbeat(
context.RunId,
timestamp,
cursor: null,
heartbeat.LastArtifactHash,
heartbeat.LastArtifactKind,
idempotencyKey: $"hb-{context.RunId}-{sequence}"),
cancellationToken).ConfigureAwait(false);
await SendRemoteHeartbeatAsync(context, heartbeat, cancellationToken).ConfigureAwait(false);
}
@@ -194,6 +211,17 @@ internal sealed class VexWorkerOrchestratorClient : IVexWorkerOrchestratorClient
artifact.Kind,
artifact.ProviderId);
// Log to append-only checkpoint store (EXCITITOR-ORCH-32/33)
await LogCheckpointMutationAsync(
context,
CheckpointMutation.Artifact(
context.RunId,
artifact.CreatedAt,
artifact.Hash,
artifact.Kind,
idempotencyKey: $"artifact-{artifact.Hash}"),
cancellationToken).ConfigureAwait(false);
await SendRemoteProgressForArtifactAsync(context, artifact, cancellationToken).ConfigureAwait(false);
}
@@ -232,6 +260,19 @@ internal sealed class VexWorkerOrchestratorClient : IVexWorkerOrchestratorClient
result.ClaimsGenerated,
duration);
// Log to append-only checkpoint store (EXCITITOR-ORCH-32/33)
await LogCheckpointMutationAsync(
context,
CheckpointMutation.Completed(
context.RunId,
result.CompletedAt,
result.LastCheckpoint,
result.DocumentsProcessed,
result.ClaimsGenerated,
result.LastArtifactHash,
idempotencyKey: $"complete-{context.RunId}"),
cancellationToken).ConfigureAwait(false);
await SendRemoteCompletionAsync(context, result, cancellationToken).ConfigureAwait(false);
}
@@ -271,6 +312,19 @@ internal sealed class VexWorkerOrchestratorClient : IVexWorkerOrchestratorClient
errorCode,
retryAfterSeconds);
// Log to append-only checkpoint store (EXCITITOR-ORCH-32/33)
await LogCheckpointMutationAsync(
context,
CheckpointMutation.Failed(
context.RunId,
now,
errorCode,
errorMessage,
retryAfterSeconds,
state.LastCheckpoint?.ToString("O"),
idempotencyKey: $"fail-{context.RunId}"),
cancellationToken).ConfigureAwait(false);
await SendRemoteCompletionAsync(
context,
new VexWorkerJobResult(0, 0, state.LastCheckpoint, state.LastArtifactHash, now),
@@ -363,6 +417,20 @@ internal sealed class VexWorkerOrchestratorClient : IVexWorkerOrchestratorClient
context.ConnectorId,
checkpoint.Cursor ?? "(none)",
checkpoint.ProcessedDigests.Length);
// Log to append-only checkpoint store (EXCITITOR-ORCH-32/33)
if (!string.IsNullOrEmpty(checkpoint.Cursor))
{
await LogCheckpointMutationAsync(
context,
CheckpointMutation.CursorUpdate(
context.RunId,
checkpoint.LastProcessedAt ?? now,
checkpoint.Cursor,
checkpoint.ProcessedDigests.Length,
idempotencyKey: $"cursor-{context.RunId}-{checkpoint.Cursor}"),
cancellationToken).ConfigureAwait(false);
}
}
public async ValueTask<VexWorkerCheckpoint?> LoadCheckpointAsync(
@@ -647,6 +715,93 @@ internal sealed class VexWorkerOrchestratorClient : IVexWorkerOrchestratorClient
private string Serialize(object value) => JsonSerializer.Serialize(value, _serializerOptions);
/// <summary>
/// Logs a checkpoint mutation to the append-only store for deterministic replay.
/// Per EXCITITOR-ORCH-32/33: All checkpoint mutations are logged for audit/replay.
/// </summary>
private async ValueTask LogCheckpointMutationAsync(
VexWorkerJobContext context,
CheckpointMutation mutation,
CancellationToken cancellationToken)
{
if (_checkpointStore is null)
{
return;
}
try
{
var result = await _checkpointStore.AppendAsync(
context.Tenant,
context.ConnectorId,
mutation,
cancellationToken).ConfigureAwait(false);
if (_options.Value.EnableVerboseLogging)
{
_logger.LogDebug(
"Checkpoint mutation logged: runId={RunId} type={Type} seq={Sequence} duplicate={IsDuplicate}",
context.RunId,
mutation.Type,
result.SequenceNumber,
result.WasDuplicate);
}
}
catch (Exception ex)
{
_logger.LogWarning(ex,
"Failed to log checkpoint mutation for connector {ConnectorId}: {Type}",
context.ConnectorId,
mutation.Type);
}
}
/// <summary>
/// Gets the append-only mutation log for a connector.
/// Per EXCITITOR-ORCH-32/33: Enables deterministic replay.
/// </summary>
public async ValueTask<IReadOnlyList<CheckpointMutationEvent>> GetCheckpointMutationLogAsync(
string tenant,
string connectorId,
long? sinceSequence = null,
int limit = 100,
CancellationToken cancellationToken = default)
{
if (_checkpointStore is null)
{
return Array.Empty<CheckpointMutationEvent>();
}
return await _checkpointStore.GetMutationLogAsync(
tenant,
connectorId,
sinceSequence,
limit,
cancellationToken).ConfigureAwait(false);
}
/// <summary>
/// Replays checkpoint mutations to reconstruct state at a specific sequence.
/// Per EXCITITOR-ORCH-32/33: Deterministic replay for audit/recovery.
/// </summary>
public async ValueTask<CheckpointState?> ReplayCheckpointToSequenceAsync(
string tenant,
string connectorId,
long upToSequence,
CancellationToken cancellationToken = default)
{
if (_checkpointStore is null)
{
return null;
}
return await _checkpointStore.ReplayToSequenceAsync(
tenant,
connectorId,
upToSequence,
cancellationToken).ConfigureAwait(false);
}
private sealed record ClaimRequest(string WorkerId, string? TaskRunnerId, string? JobType, int? LeaseSeconds, string? IdempotencyKey);
private sealed record ClaimResponse(