audit work, fixed StellaOps.sln warnings/errors, fixed tests, sprints work, new advisories

This commit is contained in:
master
2026-01-07 18:49:59 +02:00
parent 04ec098046
commit 608a7f85c0
866 changed files with 56323 additions and 6231 deletions

View File

@@ -0,0 +1,25 @@
# Timeline Core Module Charter
## Mission
- Provide timeline core models and deterministic ordering logic.
## Responsibilities
- Define timeline domain models and validation rules.
- Implement ordering and partitioning logic for timeline events.
- Keep serialization deterministic and invariant.
## Required Reading
- docs/README.md
- docs/07_HIGH_LEVEL_ARCHITECTURE.md
- docs/modules/platform/architecture-overview.md
- docs/modules/timeline-indexer/architecture.md
- docs/modules/timeline-indexer/README.md
## Working Agreement
- Deterministic ordering and invariant formatting.
- Use TimeProvider and IGuidGenerator where timestamps or IDs are created.
- Propagate CancellationToken for async operations.
## Testing Strategy
- Unit tests for ordering, validation, and serialization.
- Determinism tests for stable outputs.

View File

@@ -0,0 +1,193 @@
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
using StellaOps.Eventing.Models;
using StellaOps.HybridLogicalClock;
namespace StellaOps.Timeline.Core.Export;
/// <summary>
/// Interface for building timeline export bundles.
/// </summary>
public interface ITimelineBundleBuilder
{
/// <summary>
/// Initiates an export operation for a correlation ID.
/// </summary>
/// <param name="correlationId">The correlation ID to export.</param>
/// <param name="request">Export request parameters.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The initiated export operation.</returns>
Task<ExportOperation> InitiateExportAsync(
string correlationId,
ExportRequest request,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the status of an export operation.
/// </summary>
/// <param name="exportId">The export operation ID.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The export operation status, or null if not found.</returns>
Task<ExportOperation?> GetExportStatusAsync(
string exportId,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the export bundle content.
/// </summary>
/// <param name="exportId">The export operation ID.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The export bundle stream, or null if not found/not ready.</returns>
Task<ExportBundle?> GetExportBundleAsync(
string exportId,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Request for initiating an export operation.
/// </summary>
public sealed record ExportRequest
{
/// <summary>
/// Export format: "ndjson" or "json".
/// </summary>
public string Format { get; init; } = "ndjson";
/// <summary>
/// Whether to DSSE-sign the bundle.
/// </summary>
public bool SignBundle { get; init; } = false;
/// <summary>
/// Optional HLC range start.
/// </summary>
public HlcTimestamp? FromHlc { get; init; }
/// <summary>
/// Optional HLC range end.
/// </summary>
public HlcTimestamp? ToHlc { get; init; }
/// <summary>
/// Whether to include full payloads.
/// </summary>
public bool IncludePayloads { get; init; } = true;
}
/// <summary>
/// Represents an export operation.
/// </summary>
public sealed record ExportOperation
{
/// <summary>
/// Unique export operation ID.
/// </summary>
public required string ExportId { get; init; }
/// <summary>
/// The correlation ID being exported.
/// </summary>
public required string CorrelationId { get; init; }
/// <summary>
/// Export format.
/// </summary>
public required string Format { get; init; }
/// <summary>
/// Whether bundle is signed.
/// </summary>
public bool SignBundle { get; init; }
/// <summary>
/// Current status.
/// </summary>
public required ExportStatus Status { get; init; }
/// <summary>
/// Number of events exported.
/// </summary>
public int EventCount { get; init; }
/// <summary>
/// Size of export file in bytes.
/// </summary>
public long FileSizeBytes { get; init; }
/// <summary>
/// Creation time.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// Completion time (if completed).
/// </summary>
public DateTimeOffset? CompletedAt { get; init; }
/// <summary>
/// Error message (if failed).
/// </summary>
public string? Error { get; init; }
}
/// <summary>
/// Export operation status.
/// </summary>
public enum ExportStatus
{
/// <summary>
/// Export has been initiated.
/// </summary>
Initiated,
/// <summary>
/// Export is in progress.
/// </summary>
InProgress,
/// <summary>
/// Export completed successfully.
/// </summary>
Completed,
/// <summary>
/// Export failed.
/// </summary>
Failed
}
/// <summary>
/// Represents an export bundle.
/// </summary>
public sealed record ExportBundle
{
/// <summary>
/// The export operation ID.
/// </summary>
public required string ExportId { get; init; }
/// <summary>
/// Content type (e.g., "application/x-ndjson").
/// </summary>
public required string ContentType { get; init; }
/// <summary>
/// Suggested filename.
/// </summary>
public required string FileName { get; init; }
/// <summary>
/// Bundle content stream.
/// </summary>
public required Stream Content { get; init; }
/// <summary>
/// Bundle size in bytes.
/// </summary>
public long SizeBytes { get; init; }
/// <summary>
/// DSSE signature (if signed).
/// </summary>
public string? DsseSignature { get; init; }
}

View File

@@ -0,0 +1,293 @@
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
using System.Collections.Concurrent;
using System.Globalization;
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.Eventing.Models;
using StellaOps.Eventing.Signing;
using StellaOps.Eventing.Storage;
using StellaOps.Timeline.Core.Telemetry;
namespace StellaOps.Timeline.Core.Export;
/// <summary>
/// Implementation of <see cref="ITimelineBundleBuilder"/>.
/// </summary>
public sealed class TimelineBundleBuilder : ITimelineBundleBuilder
{
private readonly ITimelineEventStore _eventStore;
private readonly IEventSigner? _eventSigner;
private readonly TimelineMetrics _metrics;
private readonly TimeProvider _timeProvider;
private readonly ILogger<TimelineBundleBuilder> _logger;
// In-memory store for export operations (production would use PostgreSQL + object storage)
private readonly ConcurrentDictionary<string, ExportOperation> _operations = new();
private readonly ConcurrentDictionary<string, byte[]> _bundles = new();
/// <summary>
/// Initializes a new instance of the <see cref="TimelineBundleBuilder"/> class.
/// </summary>
public TimelineBundleBuilder(
ITimelineEventStore eventStore,
TimelineMetrics metrics,
TimeProvider timeProvider,
ILogger<TimelineBundleBuilder> logger,
IEventSigner? eventSigner = null)
{
_eventStore = eventStore ?? throw new ArgumentNullException(nameof(eventStore));
_metrics = metrics ?? throw new ArgumentNullException(nameof(metrics));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_eventSigner = eventSigner;
}
/// <inheritdoc/>
public async Task<ExportOperation> InitiateExportAsync(
string correlationId,
ExportRequest request,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(correlationId);
ArgumentNullException.ThrowIfNull(request);
var exportId = GenerateExportId();
var now = _timeProvider.GetUtcNow();
var operation = new ExportOperation
{
ExportId = exportId,
CorrelationId = correlationId,
Format = request.Format,
SignBundle = request.SignBundle,
Status = ExportStatus.Initiated,
CreatedAt = now
};
_operations[exportId] = operation;
_logger.LogInformation(
"Initiated export {ExportId} for correlation {CorrelationId}",
exportId,
correlationId);
// Start export in background
_ = Task.Run(() => ExecuteExportAsync(exportId, correlationId, request, cancellationToken), cancellationToken);
return operation;
}
/// <inheritdoc/>
public Task<ExportOperation?> GetExportStatusAsync(
string exportId,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(exportId);
_operations.TryGetValue(exportId, out var operation);
return Task.FromResult(operation);
}
/// <inheritdoc/>
public Task<ExportBundle?> GetExportBundleAsync(
string exportId,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(exportId);
if (!_operations.TryGetValue(exportId, out var operation))
{
return Task.FromResult<ExportBundle?>(null);
}
if (operation.Status != ExportStatus.Completed)
{
return Task.FromResult<ExportBundle?>(null);
}
if (!_bundles.TryGetValue(exportId, out var content))
{
return Task.FromResult<ExportBundle?>(null);
}
var bundle = new ExportBundle
{
ExportId = exportId,
ContentType = operation.Format == "ndjson" ? "application/x-ndjson" : "application/json",
FileName = $"timeline-{operation.CorrelationId}-{exportId}.{operation.Format}",
Content = new MemoryStream(content),
SizeBytes = content.Length
};
return Task.FromResult<ExportBundle?>(bundle);
}
private async Task ExecuteExportAsync(
string exportId,
string correlationId,
ExportRequest request,
CancellationToken cancellationToken)
{
try
{
// Update status to in-progress
UpdateOperation(exportId, op => op with { Status = ExportStatus.InProgress });
// Get events
IReadOnlyList<TimelineEvent> events;
if (request.FromHlc.HasValue && request.ToHlc.HasValue)
{
events = await _eventStore.GetByHlcRangeAsync(
correlationId,
request.FromHlc.Value,
request.ToHlc.Value,
cancellationToken).ConfigureAwait(false);
}
else
{
events = await _eventStore.GetByCorrelationIdAsync(
correlationId,
limit: 100000,
offset: 0,
cancellationToken).ConfigureAwait(false);
}
// Build bundle content
byte[] content;
if (request.Format == "ndjson")
{
content = BuildNdjsonBundle(events, request.IncludePayloads);
}
else
{
content = BuildJsonBundle(events, request.IncludePayloads);
}
// Sign if requested
string? signature = null;
if (request.SignBundle && _eventSigner != null)
{
signature = await _eventSigner.SignAsync(content, cancellationToken).ConfigureAwait(false);
}
// Store bundle
_bundles[exportId] = content;
// Update final status
var now = _timeProvider.GetUtcNow();
UpdateOperation(exportId, op => op with
{
Status = ExportStatus.Completed,
EventCount = events.Count,
FileSizeBytes = content.Length,
CompletedAt = now
});
_metrics.RecordExport(request.Format, request.SignBundle, content.Length, events.Count);
_logger.LogInformation(
"Completed export {ExportId}: {EventCount} events, {Size} bytes",
exportId,
events.Count,
content.Length);
}
catch (Exception ex)
{
_logger.LogError(ex, "Export {ExportId} failed", exportId);
UpdateOperation(exportId, op => op with
{
Status = ExportStatus.Failed,
CompletedAt = _timeProvider.GetUtcNow(),
Error = ex.Message
});
}
}
private static byte[] BuildNdjsonBundle(IReadOnlyList<TimelineEvent> events, bool includePayloads)
{
var sb = new StringBuilder();
foreach (var evt in events)
{
var line = new
{
event_id = evt.EventId,
t_hlc = evt.THlc.ToSortableString(),
ts_wall = evt.TsWall.ToString("O", CultureInfo.InvariantCulture),
correlation_id = evt.CorrelationId,
service = evt.Service,
kind = evt.Kind,
payload = includePayloads ? evt.Payload : null,
payload_digest = Convert.ToHexString(evt.PayloadDigest).ToLowerInvariant(),
engine_version = new
{
name = evt.EngineVersion.EngineName,
version = evt.EngineVersion.Version,
digest = evt.EngineVersion.SourceDigest
},
schema_version = evt.SchemaVersion
};
sb.AppendLine(JsonSerializer.Serialize(line, new JsonSerializerOptions
{
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull
}));
}
return Encoding.UTF8.GetBytes(sb.ToString());
}
private static byte[] BuildJsonBundle(IReadOnlyList<TimelineEvent> events, bool includePayloads)
{
var bundle = new
{
events = events.Select(evt => new
{
event_id = evt.EventId,
t_hlc = evt.THlc.ToSortableString(),
ts_wall = evt.TsWall.ToString("O", CultureInfo.InvariantCulture),
correlation_id = evt.CorrelationId,
service = evt.Service,
kind = evt.Kind,
payload = includePayloads ? evt.Payload : null,
payload_digest = Convert.ToHexString(evt.PayloadDigest).ToLowerInvariant(),
engine_version = new
{
name = evt.EngineVersion.EngineName,
version = evt.EngineVersion.Version,
digest = evt.EngineVersion.SourceDigest
},
schema_version = evt.SchemaVersion
}).ToList(),
metadata = new
{
event_count = events.Count,
exported_at = DateTimeOffset.UtcNow.ToString("O", CultureInfo.InvariantCulture)
}
};
return JsonSerializer.SerializeToUtf8Bytes(bundle, new JsonSerializerOptions
{
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
WriteIndented = true,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull
});
}
private void UpdateOperation(string exportId, Func<ExportOperation, ExportOperation> update)
{
if (_operations.TryGetValue(exportId, out var current))
{
_operations[exportId] = update(current);
}
}
private static string GenerateExportId()
{
return Guid.NewGuid().ToString("N")[..16];
}
}

View File

@@ -0,0 +1,155 @@
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
using StellaOps.Eventing.Models;
using StellaOps.HybridLogicalClock;
namespace StellaOps.Timeline.Core;
/// <summary>
/// Interface for querying timeline events.
/// </summary>
public interface ITimelineQueryService
{
/// <summary>
/// Gets events for a correlation ID, ordered by HLC timestamp.
/// </summary>
Task<TimelineQueryResult> GetByCorrelationIdAsync(
string correlationId,
TimelineQueryOptions? options = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the critical path (longest latency stages) for a correlation.
/// </summary>
Task<CriticalPathResult> GetCriticalPathAsync(
string correlationId,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets events filtered by service.
/// </summary>
Task<TimelineQueryResult> GetByServiceAsync(
string service,
HlcTimestamp? fromHlc = null,
int limit = 100,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Options for timeline queries.
/// </summary>
public sealed record TimelineQueryOptions
{
/// <summary>
/// Maximum number of events to return.
/// </summary>
public int Limit { get; init; } = 100;
/// <summary>
/// Number of events to skip.
/// </summary>
public int Offset { get; init; } = 0;
/// <summary>
/// Filter by services (optional).
/// </summary>
public IReadOnlyList<string>? Services { get; init; }
/// <summary>
/// Filter by event kinds (optional).
/// </summary>
public IReadOnlyList<string>? Kinds { get; init; }
/// <summary>
/// Start of HLC range (inclusive).
/// </summary>
public HlcTimestamp? FromHlc { get; init; }
/// <summary>
/// End of HLC range (inclusive).
/// </summary>
public HlcTimestamp? ToHlc { get; init; }
}
/// <summary>
/// Result of a timeline query.
/// </summary>
public sealed record TimelineQueryResult
{
/// <summary>
/// The events matching the query.
/// </summary>
public required IReadOnlyList<TimelineEvent> Events { get; init; }
/// <summary>
/// Total count (for pagination).
/// </summary>
public long TotalCount { get; init; }
/// <summary>
/// Whether there are more results.
/// </summary>
public bool HasMore { get; init; }
/// <summary>
/// Cursor for next page (HLC of last event).
/// </summary>
public string? NextCursor { get; init; }
}
/// <summary>
/// Critical path analysis result.
/// </summary>
public sealed record CriticalPathResult
{
/// <summary>
/// The correlation ID analyzed.
/// </summary>
public required string CorrelationId { get; init; }
/// <summary>
/// Total duration from first to last event.
/// </summary>
public TimeSpan TotalDuration { get; init; }
/// <summary>
/// The stages in the critical path.
/// </summary>
public required IReadOnlyList<CriticalPathStage> Stages { get; init; }
}
/// <summary>
/// A stage in the critical path.
/// </summary>
public sealed record CriticalPathStage
{
/// <summary>
/// Stage name (e.g., "ENQUEUE -> EXECUTE").
/// </summary>
public required string Stage { get; init; }
/// <summary>
/// Service where this stage occurred.
/// </summary>
public required string Service { get; init; }
/// <summary>
/// Duration of this stage.
/// </summary>
public TimeSpan Duration { get; init; }
/// <summary>
/// Percentage of total duration.
/// </summary>
public double Percentage { get; init; }
/// <summary>
/// HLC at start of stage.
/// </summary>
public required HlcTimestamp FromHlc { get; init; }
/// <summary>
/// HLC at end of stage.
/// </summary>
public required HlcTimestamp ToHlc { get; init; }
}

View File

@@ -0,0 +1,50 @@
-- Migration: 20260107_002_create_critical_path_view
-- Purpose: Create materialized view for critical path computation
-- Create materialized view for critical path analysis
CREATE MATERIALIZED VIEW timeline.critical_path AS
WITH ordered_events AS (
SELECT
correlation_id,
event_id,
t_hlc,
ts_wall,
service,
kind,
LAG(t_hlc) OVER (PARTITION BY correlation_id ORDER BY t_hlc) as prev_hlc,
LAG(ts_wall) OVER (PARTITION BY correlation_id ORDER BY t_hlc) as prev_ts,
LAG(kind) OVER (PARTITION BY correlation_id ORDER BY t_hlc) as prev_kind,
LAG(event_id) OVER (PARTITION BY correlation_id ORDER BY t_hlc) as prev_event_id
FROM timeline.events
)
SELECT
correlation_id,
prev_kind || ' -> ' || kind as stage,
prev_event_id as from_event_id,
event_id as to_event_id,
prev_hlc as from_hlc,
t_hlc as to_hlc,
EXTRACT(EPOCH FROM (ts_wall - prev_ts)) * 1000 as duration_ms,
service
FROM ordered_events
WHERE prev_hlc IS NOT NULL;
-- Create indexes for efficient queries
CREATE UNIQUE INDEX idx_critical_path_corr_from_hlc
ON timeline.critical_path (correlation_id, from_hlc);
CREATE INDEX idx_critical_path_duration
ON timeline.critical_path (correlation_id, duration_ms DESC);
-- Function to refresh materialized view for a specific correlation
CREATE OR REPLACE FUNCTION timeline.refresh_critical_path()
RETURNS void AS $$
BEGIN
REFRESH MATERIALIZED VIEW CONCURRENTLY timeline.critical_path;
END;
$$ LANGUAGE plpgsql;
-- Comments for documentation
COMMENT ON MATERIALIZED VIEW timeline.critical_path IS 'Pre-computed critical path stages for performance analysis';
COMMENT ON COLUMN timeline.critical_path.stage IS 'Transition label: prev_kind -> current_kind';
COMMENT ON COLUMN timeline.critical_path.duration_ms IS 'Wall-clock duration between events in milliseconds';

View File

@@ -0,0 +1,167 @@
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
using StellaOps.Eventing.Models;
using StellaOps.HybridLogicalClock;
namespace StellaOps.Timeline.Core.Replay;
/// <summary>
/// Interface for orchestrating deterministic replay of timeline events.
/// </summary>
public interface ITimelineReplayOrchestrator
{
/// <summary>
/// Initiates a replay operation for a correlation ID.
/// </summary>
/// <param name="correlationId">The correlation ID to replay.</param>
/// <param name="request">Replay request parameters.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The initiated replay operation.</returns>
Task<ReplayOperation> InitiateReplayAsync(
string correlationId,
ReplayRequest request,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the status of a replay operation.
/// </summary>
/// <param name="replayId">The replay operation ID.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The replay operation status, or null if not found.</returns>
Task<ReplayOperation?> GetReplayStatusAsync(
string replayId,
CancellationToken cancellationToken = default);
/// <summary>
/// Cancels an in-progress replay operation.
/// </summary>
/// <param name="replayId">The replay operation ID.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>True if cancelled, false if not found or already completed.</returns>
Task<bool> CancelReplayAsync(
string replayId,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Request for initiating a replay operation.
/// </summary>
public sealed record ReplayRequest
{
/// <summary>
/// Replay mode: "dry-run" or "verify".
/// </summary>
public string Mode { get; init; } = "dry-run";
/// <summary>
/// Optional HLC to replay from.
/// </summary>
public HlcTimestamp? FromHlc { get; init; }
/// <summary>
/// Optional HLC to replay to.
/// </summary>
public HlcTimestamp? ToHlc { get; init; }
}
/// <summary>
/// Represents a replay operation.
/// </summary>
public sealed record ReplayOperation
{
/// <summary>
/// Unique replay operation ID.
/// </summary>
public required string ReplayId { get; init; }
/// <summary>
/// The correlation ID being replayed.
/// </summary>
public required string CorrelationId { get; init; }
/// <summary>
/// Replay mode.
/// </summary>
public required string Mode { get; init; }
/// <summary>
/// Current status.
/// </summary>
public required ReplayStatus Status { get; init; }
/// <summary>
/// Progress (0.0 to 1.0).
/// </summary>
public double Progress { get; init; }
/// <summary>
/// Number of events processed.
/// </summary>
public int EventsProcessed { get; init; }
/// <summary>
/// Total number of events.
/// </summary>
public int TotalEvents { get; init; }
/// <summary>
/// Start time.
/// </summary>
public DateTimeOffset StartedAt { get; init; }
/// <summary>
/// Completion time (if completed).
/// </summary>
public DateTimeOffset? CompletedAt { get; init; }
/// <summary>
/// Error message (if failed).
/// </summary>
public string? Error { get; init; }
/// <summary>
/// Original output digest (for verify mode).
/// </summary>
public string? OriginalDigest { get; init; }
/// <summary>
/// Replayed output digest (for verify mode).
/// </summary>
public string? ReplayDigest { get; init; }
/// <summary>
/// Whether the replay matched the original (for verify mode).
/// </summary>
public bool? DeterministicMatch { get; init; }
}
/// <summary>
/// Replay operation status.
/// </summary>
public enum ReplayStatus
{
/// <summary>
/// Replay has been initiated but not started.
/// </summary>
Initiated,
/// <summary>
/// Replay is in progress.
/// </summary>
InProgress,
/// <summary>
/// Replay completed successfully.
/// </summary>
Completed,
/// <summary>
/// Replay failed.
/// </summary>
Failed,
/// <summary>
/// Replay was cancelled.
/// </summary>
Cancelled
}

View File

@@ -0,0 +1,294 @@
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
using System.Collections.Concurrent;
using System.Security.Cryptography;
using System.Text;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Time.Testing;
using StellaOps.Eventing.Models;
using StellaOps.Eventing.Storage;
using StellaOps.HybridLogicalClock;
using StellaOps.Timeline.Core.Telemetry;
namespace StellaOps.Timeline.Core.Replay;
/// <summary>
/// Implementation of <see cref="ITimelineReplayOrchestrator"/>.
/// </summary>
public sealed class TimelineReplayOrchestrator : ITimelineReplayOrchestrator
{
private readonly ITimelineEventStore _eventStore;
private readonly TimelineMetrics _metrics;
private readonly TimeProvider _timeProvider;
private readonly ILogger<TimelineReplayOrchestrator> _logger;
// In-memory store for replay operations (production would use PostgreSQL)
private readonly ConcurrentDictionary<string, ReplayOperation> _operations = new();
/// <summary>
/// Initializes a new instance of the <see cref="TimelineReplayOrchestrator"/> class.
/// </summary>
public TimelineReplayOrchestrator(
ITimelineEventStore eventStore,
TimelineMetrics metrics,
TimeProvider timeProvider,
ILogger<TimelineReplayOrchestrator> logger)
{
_eventStore = eventStore ?? throw new ArgumentNullException(nameof(eventStore));
_metrics = metrics ?? throw new ArgumentNullException(nameof(metrics));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc/>
public async Task<ReplayOperation> InitiateReplayAsync(
string correlationId,
ReplayRequest request,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(correlationId);
ArgumentNullException.ThrowIfNull(request);
var replayId = GenerateReplayId();
var now = _timeProvider.GetUtcNow();
// Get events to determine total count
var events = await GetEventsForReplayAsync(correlationId, request, cancellationToken)
.ConfigureAwait(false);
var operation = new ReplayOperation
{
ReplayId = replayId,
CorrelationId = correlationId,
Mode = request.Mode,
Status = ReplayStatus.Initiated,
Progress = 0,
EventsProcessed = 0,
TotalEvents = events.Count,
StartedAt = now
};
_operations[replayId] = operation;
_logger.LogInformation(
"Initiated replay {ReplayId} for correlation {CorrelationId} with {EventCount} events",
replayId,
correlationId,
events.Count);
// Start replay in background (in production, this would be queued to a worker)
_ = Task.Run(() => ExecuteReplayAsync(replayId, events, request, cancellationToken), cancellationToken);
return operation;
}
/// <inheritdoc/>
public Task<ReplayOperation?> GetReplayStatusAsync(
string replayId,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(replayId);
_operations.TryGetValue(replayId, out var operation);
return Task.FromResult(operation);
}
/// <inheritdoc/>
public Task<bool> CancelReplayAsync(
string replayId,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(replayId);
if (!_operations.TryGetValue(replayId, out var operation))
{
return Task.FromResult(false);
}
if (operation.Status is ReplayStatus.Completed or ReplayStatus.Failed or ReplayStatus.Cancelled)
{
return Task.FromResult(false);
}
var cancelled = operation with
{
Status = ReplayStatus.Cancelled,
CompletedAt = _timeProvider.GetUtcNow()
};
_operations[replayId] = cancelled;
_logger.LogInformation("Cancelled replay {ReplayId}", replayId);
return Task.FromResult(true);
}
private async Task<IReadOnlyList<TimelineEvent>> GetEventsForReplayAsync(
string correlationId,
ReplayRequest request,
CancellationToken cancellationToken)
{
if (request.FromHlc.HasValue && request.ToHlc.HasValue)
{
return await _eventStore.GetByHlcRangeAsync(
correlationId,
request.FromHlc.Value,
request.ToHlc.Value,
cancellationToken).ConfigureAwait(false);
}
return await _eventStore.GetByCorrelationIdAsync(
correlationId,
limit: 100000, // Get all events
offset: 0,
cancellationToken).ConfigureAwait(false);
}
private async Task ExecuteReplayAsync(
string replayId,
IReadOnlyList<TimelineEvent> events,
ReplayRequest request,
CancellationToken cancellationToken)
{
var startTime = _timeProvider.GetUtcNow();
try
{
// Update status to in-progress
UpdateOperation(replayId, op => op with { Status = ReplayStatus.InProgress });
// Create a FakeTimeProvider for deterministic replay
var fakeTimeProvider = new FakeTimeProvider();
// Compute original digest from events
var originalDigest = ComputeEventChainDigest(events);
var processedCount = 0;
var replayedPayloads = new List<string>();
foreach (var evt in events)
{
if (cancellationToken.IsCancellationRequested)
{
UpdateOperation(replayId, op => op with
{
Status = ReplayStatus.Cancelled,
CompletedAt = _timeProvider.GetUtcNow()
});
return;
}
// Check if cancelled
if (_operations.TryGetValue(replayId, out var current) && current.Status == ReplayStatus.Cancelled)
{
return;
}
// Simulate replay processing
// In production, this would re-execute the logic that produced each event
fakeTimeProvider.SetUtcNow(evt.TsWall);
// For dry-run mode, we just verify we can process all events
// For verify mode, we would re-execute and compare outputs
replayedPayloads.Add(evt.Payload);
processedCount++;
// Update progress
var progress = (double)processedCount / events.Count;
UpdateOperation(replayId, op => op with
{
Progress = progress,
EventsProcessed = processedCount
});
// Small delay to simulate processing (remove in production)
await Task.Delay(1, cancellationToken).ConfigureAwait(false);
}
// Compute replayed digest
var replayDigest = ComputePayloadChainDigest(replayedPayloads);
var deterministicMatch = originalDigest == replayDigest;
var endTime = _timeProvider.GetUtcNow();
var duration = (endTime - startTime).TotalSeconds;
// Update final status
UpdateOperation(replayId, op => op with
{
Status = ReplayStatus.Completed,
Progress = 1.0,
EventsProcessed = events.Count,
CompletedAt = endTime,
OriginalDigest = originalDigest,
ReplayDigest = replayDigest,
DeterministicMatch = deterministicMatch
});
_metrics.RecordReplay(
request.Mode,
deterministicMatch ? "SUCCESS" : "MISMATCH",
events.Count,
duration);
_logger.LogInformation(
"Completed replay {ReplayId}: {EventCount} events, deterministic={Match}, duration={Duration}s",
replayId,
events.Count,
deterministicMatch,
duration);
}
catch (Exception ex)
{
_logger.LogError(ex, "Replay {ReplayId} failed", replayId);
UpdateOperation(replayId, op => op with
{
Status = ReplayStatus.Failed,
CompletedAt = _timeProvider.GetUtcNow(),
Error = ex.Message
});
_metrics.RecordReplay(request.Mode, "FAILED", events.Count, 0);
}
}
private void UpdateOperation(string replayId, Func<ReplayOperation, ReplayOperation> update)
{
if (_operations.TryGetValue(replayId, out var current))
{
_operations[replayId] = update(current);
}
}
private static string GenerateReplayId()
{
return Guid.NewGuid().ToString("N")[..16];
}
private static string ComputeEventChainDigest(IReadOnlyList<TimelineEvent> events)
{
using var hasher = IncrementalHash.CreateHash(HashAlgorithmName.SHA256);
foreach (var evt in events)
{
hasher.AppendData(evt.PayloadDigest);
}
var hash = hasher.GetHashAndReset();
return Convert.ToHexString(hash).ToLowerInvariant();
}
private static string ComputePayloadChainDigest(IReadOnlyList<string> payloads)
{
using var hasher = IncrementalHash.CreateHash(HashAlgorithmName.SHA256);
foreach (var payload in payloads)
{
hasher.AppendData(Encoding.UTF8.GetBytes(payload));
}
var hash = hasher.GetHashAndReset();
return Convert.ToHexString(hash).ToLowerInvariant();
}
}

View File

@@ -0,0 +1,41 @@
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using StellaOps.Timeline.Core.Export;
using StellaOps.Timeline.Core.Replay;
using StellaOps.Timeline.Core.Telemetry;
namespace StellaOps.Timeline.Core;
/// <summary>
/// Extension methods for registering timeline services.
/// </summary>
public static class ServiceCollectionExtensions
{
/// <summary>
/// Adds timeline query and replay services.
/// </summary>
public static IServiceCollection AddTimelineServices(
this IServiceCollection services,
IConfiguration configuration)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
// Register metrics (singleton for consistent counters)
services.TryAddSingleton<TimelineMetrics>();
// Register query service
services.TryAddScoped<ITimelineQueryService, TimelineQueryService>();
// Register replay orchestrator
services.TryAddScoped<ITimelineReplayOrchestrator, TimelineReplayOrchestrator>();
// Register export bundle builder
services.TryAddScoped<ITimelineBundleBuilder, TimelineBundleBuilder>();
return services;
}
}

View File

@@ -0,0 +1,25 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<RootNamespace>StellaOps.Timeline.Core</RootNamespace>
<Description>StellaOps Timeline Core - Query and replay services</Description>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Eventing\StellaOps.Eventing.csproj" />
<ProjectReference Include="..\..\..\__Libraries\StellaOps.HybridLogicalClock\StellaOps.HybridLogicalClock.csproj" />
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Replay.Core\StellaOps.Replay.Core.csproj" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
<PackageReference Include="Npgsql" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,173 @@
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
using System.Diagnostics;
using System.Diagnostics.Metrics;
namespace StellaOps.Timeline.Core.Telemetry;
/// <summary>
/// Metrics instrumentation for the Timeline service.
/// </summary>
public sealed class TimelineMetrics : IDisposable
{
private readonly Meter _meter;
private readonly Counter<long> _queriesCounter;
private readonly Counter<long> _replaysCounter;
private readonly Counter<long> _exportsCounter;
private readonly Histogram<double> _queryDurationHistogram;
private readonly Histogram<double> _replayDurationHistogram;
private readonly Histogram<long> _exportSizeHistogram;
private readonly Counter<long> _cacheHitsCounter;
private readonly Counter<long> _cacheMissesCounter;
/// <summary>
/// Activity source for tracing.
/// </summary>
public static readonly ActivitySource ActivitySource = new("StellaOps.Timeline", "1.0.0");
/// <summary>
/// Initializes a new instance of the <see cref="TimelineMetrics"/> class.
/// </summary>
public TimelineMetrics()
{
_meter = new Meter("StellaOps.Timeline", "1.0.0");
_queriesCounter = _meter.CreateCounter<long>(
"stellaops_timeline_queries_total",
description: "Total number of timeline queries");
_replaysCounter = _meter.CreateCounter<long>(
"stellaops_timeline_replays_total",
description: "Total number of replay operations");
_exportsCounter = _meter.CreateCounter<long>(
"stellaops_timeline_exports_total",
description: "Total number of export operations");
_queryDurationHistogram = _meter.CreateHistogram<double>(
"stellaops_timeline_query_duration_seconds",
unit: "s",
description: "Duration of timeline query operations");
_replayDurationHistogram = _meter.CreateHistogram<double>(
"stellaops_timeline_replay_duration_seconds",
unit: "s",
description: "Duration of replay operations");
_exportSizeHistogram = _meter.CreateHistogram<long>(
"stellaops_timeline_export_size_bytes",
unit: "By",
description: "Size of exported timeline bundles");
_cacheHitsCounter = _meter.CreateCounter<long>(
"stellaops_timeline_cache_hits_total",
description: "Total number of cache hits");
_cacheMissesCounter = _meter.CreateCounter<long>(
"stellaops_timeline_cache_misses_total",
description: "Total number of cache misses");
}
/// <summary>
/// Records a timeline query.
/// </summary>
public void RecordQuery(string queryType, int eventCount, double durationSeconds)
{
_queriesCounter.Add(1,
new KeyValuePair<string, object?>("query_type", queryType));
_queryDurationHistogram.Record(durationSeconds,
new KeyValuePair<string, object?>("query_type", queryType),
new KeyValuePair<string, object?>("event_count_bucket", GetCountBucket(eventCount)));
}
/// <summary>
/// Records a replay operation.
/// </summary>
public void RecordReplay(string mode, string status, int eventCount, double durationSeconds)
{
_replaysCounter.Add(1,
new KeyValuePair<string, object?>("mode", mode),
new KeyValuePair<string, object?>("status", status));
_replayDurationHistogram.Record(durationSeconds,
new KeyValuePair<string, object?>("mode", mode),
new KeyValuePair<string, object?>("event_count_bucket", GetCountBucket(eventCount)));
}
/// <summary>
/// Records an export operation.
/// </summary>
public void RecordExport(string format, bool signed, long sizeBytes, int eventCount)
{
_exportsCounter.Add(1,
new KeyValuePair<string, object?>("format", format),
new KeyValuePair<string, object?>("signed", signed));
_exportSizeHistogram.Record(sizeBytes,
new KeyValuePair<string, object?>("format", format),
new KeyValuePair<string, object?>("event_count_bucket", GetCountBucket(eventCount)));
}
/// <summary>
/// Records a cache hit.
/// </summary>
public void RecordCacheHit(string cacheType)
{
_cacheHitsCounter.Add(1,
new KeyValuePair<string, object?>("cache_type", cacheType));
}
/// <summary>
/// Records a cache miss.
/// </summary>
public void RecordCacheMiss(string cacheType)
{
_cacheMissesCounter.Add(1,
new KeyValuePair<string, object?>("cache_type", cacheType));
}
/// <summary>
/// Starts a query activity for tracing.
/// </summary>
public Activity? StartQueryActivity(string correlationId, string queryType)
{
return ActivitySource.StartActivity(
"timeline.query",
ActivityKind.Server,
parentContext: default,
tags: new[]
{
new KeyValuePair<string, object?>("correlation_id", correlationId),
new KeyValuePair<string, object?>("query_type", queryType)
});
}
/// <summary>
/// Starts a replay activity for tracing.
/// </summary>
public Activity? StartReplayActivity(string correlationId, string mode)
{
return ActivitySource.StartActivity(
"timeline.replay",
ActivityKind.Server,
parentContext: default,
tags: new[]
{
new KeyValuePair<string, object?>("correlation_id", correlationId),
new KeyValuePair<string, object?>("mode", mode)
});
}
private static string GetCountBucket(int count) => count switch
{
<= 10 => "1-10",
<= 100 => "11-100",
<= 1000 => "101-1000",
<= 10000 => "1001-10000",
_ => "10000+"
};
/// <inheritdoc/>
public void Dispose()
{
_meter.Dispose();
}
}

View File

@@ -0,0 +1,192 @@
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
using Microsoft.Extensions.Logging;
using StellaOps.Eventing.Models;
using StellaOps.Eventing.Storage;
using StellaOps.HybridLogicalClock;
namespace StellaOps.Timeline.Core;
/// <summary>
/// Implementation of <see cref="ITimelineQueryService"/>.
/// </summary>
public sealed class TimelineQueryService : ITimelineQueryService
{
private readonly ITimelineEventStore _eventStore;
private readonly ILogger<TimelineQueryService> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="TimelineQueryService"/> class.
/// </summary>
public TimelineQueryService(
ITimelineEventStore eventStore,
ILogger<TimelineQueryService> logger)
{
_eventStore = eventStore ?? throw new ArgumentNullException(nameof(eventStore));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc/>
public async Task<TimelineQueryResult> GetByCorrelationIdAsync(
string correlationId,
TimelineQueryOptions? options = null,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(correlationId);
options ??= new TimelineQueryOptions();
IReadOnlyList<TimelineEvent> events;
if (options.FromHlc.HasValue && options.ToHlc.HasValue)
{
events = await _eventStore.GetByHlcRangeAsync(
correlationId,
options.FromHlc.Value,
options.ToHlc.Value,
cancellationToken).ConfigureAwait(false);
}
else
{
events = await _eventStore.GetByCorrelationIdAsync(
correlationId,
options.Limit + 1, // Fetch one extra to check for more
options.Offset,
cancellationToken).ConfigureAwait(false);
}
// Apply additional filters
var filteredEvents = ApplyFilters(events, options);
// Check if there are more results
var hasMore = filteredEvents.Count > options.Limit;
if (hasMore)
{
filteredEvents = filteredEvents.Take(options.Limit).ToList();
}
var totalCount = await _eventStore.CountByCorrelationIdAsync(correlationId, cancellationToken)
.ConfigureAwait(false);
_logger.LogDebug(
"Queried {Count} events for correlation {CorrelationId}",
filteredEvents.Count,
correlationId);
return new TimelineQueryResult
{
Events = filteredEvents,
TotalCount = totalCount,
HasMore = hasMore,
NextCursor = hasMore && filteredEvents.Count > 0
? filteredEvents[^1].THlc.ToSortableString()
: null
};
}
/// <inheritdoc/>
public async Task<CriticalPathResult> GetCriticalPathAsync(
string correlationId,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(correlationId);
var events = await _eventStore.GetByCorrelationIdAsync(
correlationId,
limit: 10000, // Get all events for critical path analysis
offset: 0,
cancellationToken).ConfigureAwait(false);
if (events.Count < 2)
{
return new CriticalPathResult
{
CorrelationId = correlationId,
TotalDuration = TimeSpan.Zero,
Stages = Array.Empty<CriticalPathStage>()
};
}
var stages = new List<CriticalPathStage>();
var totalDuration = events[^1].TsWall - events[0].TsWall;
for (int i = 1; i < events.Count; i++)
{
var prev = events[i - 1];
var curr = events[i];
var stageDuration = curr.TsWall - prev.TsWall;
stages.Add(new CriticalPathStage
{
Stage = $"{prev.Kind} -> {curr.Kind}",
Service = curr.Service,
Duration = stageDuration,
Percentage = totalDuration.TotalMilliseconds > 0
? stageDuration.TotalMilliseconds / totalDuration.TotalMilliseconds * 100
: 0,
FromHlc = prev.THlc,
ToHlc = curr.THlc
});
}
// Sort by duration descending (critical path = longest stages first)
stages = stages.OrderByDescending(s => s.Duration).ToList();
return new CriticalPathResult
{
CorrelationId = correlationId,
TotalDuration = totalDuration,
Stages = stages
};
}
/// <inheritdoc/>
public async Task<TimelineQueryResult> GetByServiceAsync(
string service,
HlcTimestamp? fromHlc = null,
int limit = 100,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(service);
var events = await _eventStore.GetByServiceAsync(
service,
fromHlc,
limit + 1,
cancellationToken).ConfigureAwait(false);
var hasMore = events.Count > limit;
var resultEvents = hasMore ? events.Take(limit).ToList() : events;
return new TimelineQueryResult
{
Events = resultEvents,
TotalCount = resultEvents.Count,
HasMore = hasMore,
NextCursor = hasMore && resultEvents.Count > 0
? resultEvents[^1].THlc.ToSortableString()
: null
};
}
private static List<TimelineEvent> ApplyFilters(
IReadOnlyList<TimelineEvent> events,
TimelineQueryOptions options)
{
var query = events.AsEnumerable();
if (options.Services is { Count: > 0 })
{
var services = new HashSet<string>(options.Services, StringComparer.OrdinalIgnoreCase);
query = query.Where(e => services.Contains(e.Service));
}
if (options.Kinds is { Count: > 0 })
{
var kinds = new HashSet<string>(options.Kinds, StringComparer.OrdinalIgnoreCase);
query = query.Where(e => kinds.Contains(e.Kind));
}
return query.ToList();
}
}