audit work, fixed StellaOps.sln warnings/errors, fixed tests, sprints work, new advisories
This commit is contained in:
25
src/Timeline/__Libraries/StellaOps.Timeline.Core/AGENTS.md
Normal file
25
src/Timeline/__Libraries/StellaOps.Timeline.Core/AGENTS.md
Normal file
@@ -0,0 +1,25 @@
|
||||
# Timeline Core Module Charter
|
||||
|
||||
## Mission
|
||||
- Provide timeline core models and deterministic ordering logic.
|
||||
|
||||
## Responsibilities
|
||||
- Define timeline domain models and validation rules.
|
||||
- Implement ordering and partitioning logic for timeline events.
|
||||
- Keep serialization deterministic and invariant.
|
||||
|
||||
## Required Reading
|
||||
- docs/README.md
|
||||
- docs/07_HIGH_LEVEL_ARCHITECTURE.md
|
||||
- docs/modules/platform/architecture-overview.md
|
||||
- docs/modules/timeline-indexer/architecture.md
|
||||
- docs/modules/timeline-indexer/README.md
|
||||
|
||||
## Working Agreement
|
||||
- Deterministic ordering and invariant formatting.
|
||||
- Use TimeProvider and IGuidGenerator where timestamps or IDs are created.
|
||||
- Propagate CancellationToken for async operations.
|
||||
|
||||
## Testing Strategy
|
||||
- Unit tests for ordering, validation, and serialization.
|
||||
- Determinism tests for stable outputs.
|
||||
@@ -0,0 +1,193 @@
|
||||
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
|
||||
|
||||
using StellaOps.Eventing.Models;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
|
||||
namespace StellaOps.Timeline.Core.Export;
|
||||
|
||||
/// <summary>
|
||||
/// Interface for building timeline export bundles.
|
||||
/// </summary>
|
||||
public interface ITimelineBundleBuilder
|
||||
{
|
||||
/// <summary>
|
||||
/// Initiates an export operation for a correlation ID.
|
||||
/// </summary>
|
||||
/// <param name="correlationId">The correlation ID to export.</param>
|
||||
/// <param name="request">Export request parameters.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The initiated export operation.</returns>
|
||||
Task<ExportOperation> InitiateExportAsync(
|
||||
string correlationId,
|
||||
ExportRequest request,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the status of an export operation.
|
||||
/// </summary>
|
||||
/// <param name="exportId">The export operation ID.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The export operation status, or null if not found.</returns>
|
||||
Task<ExportOperation?> GetExportStatusAsync(
|
||||
string exportId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the export bundle content.
|
||||
/// </summary>
|
||||
/// <param name="exportId">The export operation ID.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The export bundle stream, or null if not found/not ready.</returns>
|
||||
Task<ExportBundle?> GetExportBundleAsync(
|
||||
string exportId,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Request for initiating an export operation.
|
||||
/// </summary>
|
||||
public sealed record ExportRequest
|
||||
{
|
||||
/// <summary>
|
||||
/// Export format: "ndjson" or "json".
|
||||
/// </summary>
|
||||
public string Format { get; init; } = "ndjson";
|
||||
|
||||
/// <summary>
|
||||
/// Whether to DSSE-sign the bundle.
|
||||
/// </summary>
|
||||
public bool SignBundle { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Optional HLC range start.
|
||||
/// </summary>
|
||||
public HlcTimestamp? FromHlc { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional HLC range end.
|
||||
/// </summary>
|
||||
public HlcTimestamp? ToHlc { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to include full payloads.
|
||||
/// </summary>
|
||||
public bool IncludePayloads { get; init; } = true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents an export operation.
|
||||
/// </summary>
|
||||
public sealed record ExportOperation
|
||||
{
|
||||
/// <summary>
|
||||
/// Unique export operation ID.
|
||||
/// </summary>
|
||||
public required string ExportId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// The correlation ID being exported.
|
||||
/// </summary>
|
||||
public required string CorrelationId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Export format.
|
||||
/// </summary>
|
||||
public required string Format { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether bundle is signed.
|
||||
/// </summary>
|
||||
public bool SignBundle { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Current status.
|
||||
/// </summary>
|
||||
public required ExportStatus Status { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of events exported.
|
||||
/// </summary>
|
||||
public int EventCount { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Size of export file in bytes.
|
||||
/// </summary>
|
||||
public long FileSizeBytes { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Creation time.
|
||||
/// </summary>
|
||||
public DateTimeOffset CreatedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Completion time (if completed).
|
||||
/// </summary>
|
||||
public DateTimeOffset? CompletedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Error message (if failed).
|
||||
/// </summary>
|
||||
public string? Error { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Export operation status.
|
||||
/// </summary>
|
||||
public enum ExportStatus
|
||||
{
|
||||
/// <summary>
|
||||
/// Export has been initiated.
|
||||
/// </summary>
|
||||
Initiated,
|
||||
|
||||
/// <summary>
|
||||
/// Export is in progress.
|
||||
/// </summary>
|
||||
InProgress,
|
||||
|
||||
/// <summary>
|
||||
/// Export completed successfully.
|
||||
/// </summary>
|
||||
Completed,
|
||||
|
||||
/// <summary>
|
||||
/// Export failed.
|
||||
/// </summary>
|
||||
Failed
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents an export bundle.
|
||||
/// </summary>
|
||||
public sealed record ExportBundle
|
||||
{
|
||||
/// <summary>
|
||||
/// The export operation ID.
|
||||
/// </summary>
|
||||
public required string ExportId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Content type (e.g., "application/x-ndjson").
|
||||
/// </summary>
|
||||
public required string ContentType { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Suggested filename.
|
||||
/// </summary>
|
||||
public required string FileName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Bundle content stream.
|
||||
/// </summary>
|
||||
public required Stream Content { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Bundle size in bytes.
|
||||
/// </summary>
|
||||
public long SizeBytes { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// DSSE signature (if signed).
|
||||
/// </summary>
|
||||
public string? DsseSignature { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,293 @@
|
||||
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
|
||||
|
||||
using System.Collections.Concurrent;
|
||||
using System.Globalization;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Eventing.Models;
|
||||
using StellaOps.Eventing.Signing;
|
||||
using StellaOps.Eventing.Storage;
|
||||
using StellaOps.Timeline.Core.Telemetry;
|
||||
|
||||
namespace StellaOps.Timeline.Core.Export;
|
||||
|
||||
/// <summary>
|
||||
/// Implementation of <see cref="ITimelineBundleBuilder"/>.
|
||||
/// </summary>
|
||||
public sealed class TimelineBundleBuilder : ITimelineBundleBuilder
|
||||
{
|
||||
private readonly ITimelineEventStore _eventStore;
|
||||
private readonly IEventSigner? _eventSigner;
|
||||
private readonly TimelineMetrics _metrics;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<TimelineBundleBuilder> _logger;
|
||||
|
||||
// In-memory store for export operations (production would use PostgreSQL + object storage)
|
||||
private readonly ConcurrentDictionary<string, ExportOperation> _operations = new();
|
||||
private readonly ConcurrentDictionary<string, byte[]> _bundles = new();
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="TimelineBundleBuilder"/> class.
|
||||
/// </summary>
|
||||
public TimelineBundleBuilder(
|
||||
ITimelineEventStore eventStore,
|
||||
TimelineMetrics metrics,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<TimelineBundleBuilder> logger,
|
||||
IEventSigner? eventSigner = null)
|
||||
{
|
||||
_eventStore = eventStore ?? throw new ArgumentNullException(nameof(eventStore));
|
||||
_metrics = metrics ?? throw new ArgumentNullException(nameof(metrics));
|
||||
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_eventSigner = eventSigner;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<ExportOperation> InitiateExportAsync(
|
||||
string correlationId,
|
||||
ExportRequest request,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(correlationId);
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
var exportId = GenerateExportId();
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
|
||||
var operation = new ExportOperation
|
||||
{
|
||||
ExportId = exportId,
|
||||
CorrelationId = correlationId,
|
||||
Format = request.Format,
|
||||
SignBundle = request.SignBundle,
|
||||
Status = ExportStatus.Initiated,
|
||||
CreatedAt = now
|
||||
};
|
||||
|
||||
_operations[exportId] = operation;
|
||||
|
||||
_logger.LogInformation(
|
||||
"Initiated export {ExportId} for correlation {CorrelationId}",
|
||||
exportId,
|
||||
correlationId);
|
||||
|
||||
// Start export in background
|
||||
_ = Task.Run(() => ExecuteExportAsync(exportId, correlationId, request, cancellationToken), cancellationToken);
|
||||
|
||||
return operation;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public Task<ExportOperation?> GetExportStatusAsync(
|
||||
string exportId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(exportId);
|
||||
|
||||
_operations.TryGetValue(exportId, out var operation);
|
||||
return Task.FromResult(operation);
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public Task<ExportBundle?> GetExportBundleAsync(
|
||||
string exportId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(exportId);
|
||||
|
||||
if (!_operations.TryGetValue(exportId, out var operation))
|
||||
{
|
||||
return Task.FromResult<ExportBundle?>(null);
|
||||
}
|
||||
|
||||
if (operation.Status != ExportStatus.Completed)
|
||||
{
|
||||
return Task.FromResult<ExportBundle?>(null);
|
||||
}
|
||||
|
||||
if (!_bundles.TryGetValue(exportId, out var content))
|
||||
{
|
||||
return Task.FromResult<ExportBundle?>(null);
|
||||
}
|
||||
|
||||
var bundle = new ExportBundle
|
||||
{
|
||||
ExportId = exportId,
|
||||
ContentType = operation.Format == "ndjson" ? "application/x-ndjson" : "application/json",
|
||||
FileName = $"timeline-{operation.CorrelationId}-{exportId}.{operation.Format}",
|
||||
Content = new MemoryStream(content),
|
||||
SizeBytes = content.Length
|
||||
};
|
||||
|
||||
return Task.FromResult<ExportBundle?>(bundle);
|
||||
}
|
||||
|
||||
private async Task ExecuteExportAsync(
|
||||
string exportId,
|
||||
string correlationId,
|
||||
ExportRequest request,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Update status to in-progress
|
||||
UpdateOperation(exportId, op => op with { Status = ExportStatus.InProgress });
|
||||
|
||||
// Get events
|
||||
IReadOnlyList<TimelineEvent> events;
|
||||
if (request.FromHlc.HasValue && request.ToHlc.HasValue)
|
||||
{
|
||||
events = await _eventStore.GetByHlcRangeAsync(
|
||||
correlationId,
|
||||
request.FromHlc.Value,
|
||||
request.ToHlc.Value,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
events = await _eventStore.GetByCorrelationIdAsync(
|
||||
correlationId,
|
||||
limit: 100000,
|
||||
offset: 0,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// Build bundle content
|
||||
byte[] content;
|
||||
if (request.Format == "ndjson")
|
||||
{
|
||||
content = BuildNdjsonBundle(events, request.IncludePayloads);
|
||||
}
|
||||
else
|
||||
{
|
||||
content = BuildJsonBundle(events, request.IncludePayloads);
|
||||
}
|
||||
|
||||
// Sign if requested
|
||||
string? signature = null;
|
||||
if (request.SignBundle && _eventSigner != null)
|
||||
{
|
||||
signature = await _eventSigner.SignAsync(content, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// Store bundle
|
||||
_bundles[exportId] = content;
|
||||
|
||||
// Update final status
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
UpdateOperation(exportId, op => op with
|
||||
{
|
||||
Status = ExportStatus.Completed,
|
||||
EventCount = events.Count,
|
||||
FileSizeBytes = content.Length,
|
||||
CompletedAt = now
|
||||
});
|
||||
|
||||
_metrics.RecordExport(request.Format, request.SignBundle, content.Length, events.Count);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Completed export {ExportId}: {EventCount} events, {Size} bytes",
|
||||
exportId,
|
||||
events.Count,
|
||||
content.Length);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Export {ExportId} failed", exportId);
|
||||
|
||||
UpdateOperation(exportId, op => op with
|
||||
{
|
||||
Status = ExportStatus.Failed,
|
||||
CompletedAt = _timeProvider.GetUtcNow(),
|
||||
Error = ex.Message
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private static byte[] BuildNdjsonBundle(IReadOnlyList<TimelineEvent> events, bool includePayloads)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
|
||||
foreach (var evt in events)
|
||||
{
|
||||
var line = new
|
||||
{
|
||||
event_id = evt.EventId,
|
||||
t_hlc = evt.THlc.ToSortableString(),
|
||||
ts_wall = evt.TsWall.ToString("O", CultureInfo.InvariantCulture),
|
||||
correlation_id = evt.CorrelationId,
|
||||
service = evt.Service,
|
||||
kind = evt.Kind,
|
||||
payload = includePayloads ? evt.Payload : null,
|
||||
payload_digest = Convert.ToHexString(evt.PayloadDigest).ToLowerInvariant(),
|
||||
engine_version = new
|
||||
{
|
||||
name = evt.EngineVersion.EngineName,
|
||||
version = evt.EngineVersion.Version,
|
||||
digest = evt.EngineVersion.SourceDigest
|
||||
},
|
||||
schema_version = evt.SchemaVersion
|
||||
};
|
||||
|
||||
sb.AppendLine(JsonSerializer.Serialize(line, new JsonSerializerOptions
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
|
||||
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull
|
||||
}));
|
||||
}
|
||||
|
||||
return Encoding.UTF8.GetBytes(sb.ToString());
|
||||
}
|
||||
|
||||
private static byte[] BuildJsonBundle(IReadOnlyList<TimelineEvent> events, bool includePayloads)
|
||||
{
|
||||
var bundle = new
|
||||
{
|
||||
events = events.Select(evt => new
|
||||
{
|
||||
event_id = evt.EventId,
|
||||
t_hlc = evt.THlc.ToSortableString(),
|
||||
ts_wall = evt.TsWall.ToString("O", CultureInfo.InvariantCulture),
|
||||
correlation_id = evt.CorrelationId,
|
||||
service = evt.Service,
|
||||
kind = evt.Kind,
|
||||
payload = includePayloads ? evt.Payload : null,
|
||||
payload_digest = Convert.ToHexString(evt.PayloadDigest).ToLowerInvariant(),
|
||||
engine_version = new
|
||||
{
|
||||
name = evt.EngineVersion.EngineName,
|
||||
version = evt.EngineVersion.Version,
|
||||
digest = evt.EngineVersion.SourceDigest
|
||||
},
|
||||
schema_version = evt.SchemaVersion
|
||||
}).ToList(),
|
||||
metadata = new
|
||||
{
|
||||
event_count = events.Count,
|
||||
exported_at = DateTimeOffset.UtcNow.ToString("O", CultureInfo.InvariantCulture)
|
||||
}
|
||||
};
|
||||
|
||||
return JsonSerializer.SerializeToUtf8Bytes(bundle, new JsonSerializerOptions
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
|
||||
WriteIndented = true,
|
||||
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull
|
||||
});
|
||||
}
|
||||
|
||||
private void UpdateOperation(string exportId, Func<ExportOperation, ExportOperation> update)
|
||||
{
|
||||
if (_operations.TryGetValue(exportId, out var current))
|
||||
{
|
||||
_operations[exportId] = update(current);
|
||||
}
|
||||
}
|
||||
|
||||
private static string GenerateExportId()
|
||||
{
|
||||
return Guid.NewGuid().ToString("N")[..16];
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,155 @@
|
||||
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
|
||||
|
||||
using StellaOps.Eventing.Models;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
|
||||
namespace StellaOps.Timeline.Core;
|
||||
|
||||
/// <summary>
|
||||
/// Interface for querying timeline events.
|
||||
/// </summary>
|
||||
public interface ITimelineQueryService
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets events for a correlation ID, ordered by HLC timestamp.
|
||||
/// </summary>
|
||||
Task<TimelineQueryResult> GetByCorrelationIdAsync(
|
||||
string correlationId,
|
||||
TimelineQueryOptions? options = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the critical path (longest latency stages) for a correlation.
|
||||
/// </summary>
|
||||
Task<CriticalPathResult> GetCriticalPathAsync(
|
||||
string correlationId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets events filtered by service.
|
||||
/// </summary>
|
||||
Task<TimelineQueryResult> GetByServiceAsync(
|
||||
string service,
|
||||
HlcTimestamp? fromHlc = null,
|
||||
int limit = 100,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Options for timeline queries.
|
||||
/// </summary>
|
||||
public sealed record TimelineQueryOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Maximum number of events to return.
|
||||
/// </summary>
|
||||
public int Limit { get; init; } = 100;
|
||||
|
||||
/// <summary>
|
||||
/// Number of events to skip.
|
||||
/// </summary>
|
||||
public int Offset { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Filter by services (optional).
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? Services { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Filter by event kinds (optional).
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? Kinds { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Start of HLC range (inclusive).
|
||||
/// </summary>
|
||||
public HlcTimestamp? FromHlc { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// End of HLC range (inclusive).
|
||||
/// </summary>
|
||||
public HlcTimestamp? ToHlc { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of a timeline query.
|
||||
/// </summary>
|
||||
public sealed record TimelineQueryResult
|
||||
{
|
||||
/// <summary>
|
||||
/// The events matching the query.
|
||||
/// </summary>
|
||||
public required IReadOnlyList<TimelineEvent> Events { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Total count (for pagination).
|
||||
/// </summary>
|
||||
public long TotalCount { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether there are more results.
|
||||
/// </summary>
|
||||
public bool HasMore { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Cursor for next page (HLC of last event).
|
||||
/// </summary>
|
||||
public string? NextCursor { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Critical path analysis result.
|
||||
/// </summary>
|
||||
public sealed record CriticalPathResult
|
||||
{
|
||||
/// <summary>
|
||||
/// The correlation ID analyzed.
|
||||
/// </summary>
|
||||
public required string CorrelationId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Total duration from first to last event.
|
||||
/// </summary>
|
||||
public TimeSpan TotalDuration { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// The stages in the critical path.
|
||||
/// </summary>
|
||||
public required IReadOnlyList<CriticalPathStage> Stages { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// A stage in the critical path.
|
||||
/// </summary>
|
||||
public sealed record CriticalPathStage
|
||||
{
|
||||
/// <summary>
|
||||
/// Stage name (e.g., "ENQUEUE -> EXECUTE").
|
||||
/// </summary>
|
||||
public required string Stage { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Service where this stage occurred.
|
||||
/// </summary>
|
||||
public required string Service { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Duration of this stage.
|
||||
/// </summary>
|
||||
public TimeSpan Duration { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Percentage of total duration.
|
||||
/// </summary>
|
||||
public double Percentage { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// HLC at start of stage.
|
||||
/// </summary>
|
||||
public required HlcTimestamp FromHlc { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// HLC at end of stage.
|
||||
/// </summary>
|
||||
public required HlcTimestamp ToHlc { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
-- Migration: 20260107_002_create_critical_path_view
|
||||
-- Purpose: Create materialized view for critical path computation
|
||||
|
||||
-- Create materialized view for critical path analysis
|
||||
CREATE MATERIALIZED VIEW timeline.critical_path AS
|
||||
WITH ordered_events AS (
|
||||
SELECT
|
||||
correlation_id,
|
||||
event_id,
|
||||
t_hlc,
|
||||
ts_wall,
|
||||
service,
|
||||
kind,
|
||||
LAG(t_hlc) OVER (PARTITION BY correlation_id ORDER BY t_hlc) as prev_hlc,
|
||||
LAG(ts_wall) OVER (PARTITION BY correlation_id ORDER BY t_hlc) as prev_ts,
|
||||
LAG(kind) OVER (PARTITION BY correlation_id ORDER BY t_hlc) as prev_kind,
|
||||
LAG(event_id) OVER (PARTITION BY correlation_id ORDER BY t_hlc) as prev_event_id
|
||||
FROM timeline.events
|
||||
)
|
||||
SELECT
|
||||
correlation_id,
|
||||
prev_kind || ' -> ' || kind as stage,
|
||||
prev_event_id as from_event_id,
|
||||
event_id as to_event_id,
|
||||
prev_hlc as from_hlc,
|
||||
t_hlc as to_hlc,
|
||||
EXTRACT(EPOCH FROM (ts_wall - prev_ts)) * 1000 as duration_ms,
|
||||
service
|
||||
FROM ordered_events
|
||||
WHERE prev_hlc IS NOT NULL;
|
||||
|
||||
-- Create indexes for efficient queries
|
||||
CREATE UNIQUE INDEX idx_critical_path_corr_from_hlc
|
||||
ON timeline.critical_path (correlation_id, from_hlc);
|
||||
|
||||
CREATE INDEX idx_critical_path_duration
|
||||
ON timeline.critical_path (correlation_id, duration_ms DESC);
|
||||
|
||||
-- Function to refresh materialized view for a specific correlation
|
||||
CREATE OR REPLACE FUNCTION timeline.refresh_critical_path()
|
||||
RETURNS void AS $$
|
||||
BEGIN
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY timeline.critical_path;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Comments for documentation
|
||||
COMMENT ON MATERIALIZED VIEW timeline.critical_path IS 'Pre-computed critical path stages for performance analysis';
|
||||
COMMENT ON COLUMN timeline.critical_path.stage IS 'Transition label: prev_kind -> current_kind';
|
||||
COMMENT ON COLUMN timeline.critical_path.duration_ms IS 'Wall-clock duration between events in milliseconds';
|
||||
@@ -0,0 +1,167 @@
|
||||
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
|
||||
|
||||
using StellaOps.Eventing.Models;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
|
||||
namespace StellaOps.Timeline.Core.Replay;
|
||||
|
||||
/// <summary>
|
||||
/// Interface for orchestrating deterministic replay of timeline events.
|
||||
/// </summary>
|
||||
public interface ITimelineReplayOrchestrator
|
||||
{
|
||||
/// <summary>
|
||||
/// Initiates a replay operation for a correlation ID.
|
||||
/// </summary>
|
||||
/// <param name="correlationId">The correlation ID to replay.</param>
|
||||
/// <param name="request">Replay request parameters.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The initiated replay operation.</returns>
|
||||
Task<ReplayOperation> InitiateReplayAsync(
|
||||
string correlationId,
|
||||
ReplayRequest request,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the status of a replay operation.
|
||||
/// </summary>
|
||||
/// <param name="replayId">The replay operation ID.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The replay operation status, or null if not found.</returns>
|
||||
Task<ReplayOperation?> GetReplayStatusAsync(
|
||||
string replayId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Cancels an in-progress replay operation.
|
||||
/// </summary>
|
||||
/// <param name="replayId">The replay operation ID.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>True if cancelled, false if not found or already completed.</returns>
|
||||
Task<bool> CancelReplayAsync(
|
||||
string replayId,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Request for initiating a replay operation.
|
||||
/// </summary>
|
||||
public sealed record ReplayRequest
|
||||
{
|
||||
/// <summary>
|
||||
/// Replay mode: "dry-run" or "verify".
|
||||
/// </summary>
|
||||
public string Mode { get; init; } = "dry-run";
|
||||
|
||||
/// <summary>
|
||||
/// Optional HLC to replay from.
|
||||
/// </summary>
|
||||
public HlcTimestamp? FromHlc { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional HLC to replay to.
|
||||
/// </summary>
|
||||
public HlcTimestamp? ToHlc { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a replay operation.
|
||||
/// </summary>
|
||||
public sealed record ReplayOperation
|
||||
{
|
||||
/// <summary>
|
||||
/// Unique replay operation ID.
|
||||
/// </summary>
|
||||
public required string ReplayId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// The correlation ID being replayed.
|
||||
/// </summary>
|
||||
public required string CorrelationId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Replay mode.
|
||||
/// </summary>
|
||||
public required string Mode { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Current status.
|
||||
/// </summary>
|
||||
public required ReplayStatus Status { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Progress (0.0 to 1.0).
|
||||
/// </summary>
|
||||
public double Progress { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of events processed.
|
||||
/// </summary>
|
||||
public int EventsProcessed { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Total number of events.
|
||||
/// </summary>
|
||||
public int TotalEvents { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Start time.
|
||||
/// </summary>
|
||||
public DateTimeOffset StartedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Completion time (if completed).
|
||||
/// </summary>
|
||||
public DateTimeOffset? CompletedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Error message (if failed).
|
||||
/// </summary>
|
||||
public string? Error { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Original output digest (for verify mode).
|
||||
/// </summary>
|
||||
public string? OriginalDigest { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Replayed output digest (for verify mode).
|
||||
/// </summary>
|
||||
public string? ReplayDigest { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether the replay matched the original (for verify mode).
|
||||
/// </summary>
|
||||
public bool? DeterministicMatch { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Replay operation status.
|
||||
/// </summary>
|
||||
public enum ReplayStatus
|
||||
{
|
||||
/// <summary>
|
||||
/// Replay has been initiated but not started.
|
||||
/// </summary>
|
||||
Initiated,
|
||||
|
||||
/// <summary>
|
||||
/// Replay is in progress.
|
||||
/// </summary>
|
||||
InProgress,
|
||||
|
||||
/// <summary>
|
||||
/// Replay completed successfully.
|
||||
/// </summary>
|
||||
Completed,
|
||||
|
||||
/// <summary>
|
||||
/// Replay failed.
|
||||
/// </summary>
|
||||
Failed,
|
||||
|
||||
/// <summary>
|
||||
/// Replay was cancelled.
|
||||
/// </summary>
|
||||
Cancelled
|
||||
}
|
||||
@@ -0,0 +1,294 @@
|
||||
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
|
||||
|
||||
using System.Collections.Concurrent;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Time.Testing;
|
||||
using StellaOps.Eventing.Models;
|
||||
using StellaOps.Eventing.Storage;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
using StellaOps.Timeline.Core.Telemetry;
|
||||
|
||||
namespace StellaOps.Timeline.Core.Replay;
|
||||
|
||||
/// <summary>
|
||||
/// Implementation of <see cref="ITimelineReplayOrchestrator"/>.
|
||||
/// </summary>
|
||||
public sealed class TimelineReplayOrchestrator : ITimelineReplayOrchestrator
|
||||
{
|
||||
private readonly ITimelineEventStore _eventStore;
|
||||
private readonly TimelineMetrics _metrics;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<TimelineReplayOrchestrator> _logger;
|
||||
|
||||
// In-memory store for replay operations (production would use PostgreSQL)
|
||||
private readonly ConcurrentDictionary<string, ReplayOperation> _operations = new();
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="TimelineReplayOrchestrator"/> class.
|
||||
/// </summary>
|
||||
public TimelineReplayOrchestrator(
|
||||
ITimelineEventStore eventStore,
|
||||
TimelineMetrics metrics,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<TimelineReplayOrchestrator> logger)
|
||||
{
|
||||
_eventStore = eventStore ?? throw new ArgumentNullException(nameof(eventStore));
|
||||
_metrics = metrics ?? throw new ArgumentNullException(nameof(metrics));
|
||||
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<ReplayOperation> InitiateReplayAsync(
|
||||
string correlationId,
|
||||
ReplayRequest request,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(correlationId);
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
var replayId = GenerateReplayId();
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
|
||||
// Get events to determine total count
|
||||
var events = await GetEventsForReplayAsync(correlationId, request, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
var operation = new ReplayOperation
|
||||
{
|
||||
ReplayId = replayId,
|
||||
CorrelationId = correlationId,
|
||||
Mode = request.Mode,
|
||||
Status = ReplayStatus.Initiated,
|
||||
Progress = 0,
|
||||
EventsProcessed = 0,
|
||||
TotalEvents = events.Count,
|
||||
StartedAt = now
|
||||
};
|
||||
|
||||
_operations[replayId] = operation;
|
||||
|
||||
_logger.LogInformation(
|
||||
"Initiated replay {ReplayId} for correlation {CorrelationId} with {EventCount} events",
|
||||
replayId,
|
||||
correlationId,
|
||||
events.Count);
|
||||
|
||||
// Start replay in background (in production, this would be queued to a worker)
|
||||
_ = Task.Run(() => ExecuteReplayAsync(replayId, events, request, cancellationToken), cancellationToken);
|
||||
|
||||
return operation;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public Task<ReplayOperation?> GetReplayStatusAsync(
|
||||
string replayId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(replayId);
|
||||
|
||||
_operations.TryGetValue(replayId, out var operation);
|
||||
return Task.FromResult(operation);
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public Task<bool> CancelReplayAsync(
|
||||
string replayId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(replayId);
|
||||
|
||||
if (!_operations.TryGetValue(replayId, out var operation))
|
||||
{
|
||||
return Task.FromResult(false);
|
||||
}
|
||||
|
||||
if (operation.Status is ReplayStatus.Completed or ReplayStatus.Failed or ReplayStatus.Cancelled)
|
||||
{
|
||||
return Task.FromResult(false);
|
||||
}
|
||||
|
||||
var cancelled = operation with
|
||||
{
|
||||
Status = ReplayStatus.Cancelled,
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
|
||||
_operations[replayId] = cancelled;
|
||||
|
||||
_logger.LogInformation("Cancelled replay {ReplayId}", replayId);
|
||||
|
||||
return Task.FromResult(true);
|
||||
}
|
||||
|
||||
private async Task<IReadOnlyList<TimelineEvent>> GetEventsForReplayAsync(
|
||||
string correlationId,
|
||||
ReplayRequest request,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (request.FromHlc.HasValue && request.ToHlc.HasValue)
|
||||
{
|
||||
return await _eventStore.GetByHlcRangeAsync(
|
||||
correlationId,
|
||||
request.FromHlc.Value,
|
||||
request.ToHlc.Value,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
return await _eventStore.GetByCorrelationIdAsync(
|
||||
correlationId,
|
||||
limit: 100000, // Get all events
|
||||
offset: 0,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private async Task ExecuteReplayAsync(
|
||||
string replayId,
|
||||
IReadOnlyList<TimelineEvent> events,
|
||||
ReplayRequest request,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var startTime = _timeProvider.GetUtcNow();
|
||||
|
||||
try
|
||||
{
|
||||
// Update status to in-progress
|
||||
UpdateOperation(replayId, op => op with { Status = ReplayStatus.InProgress });
|
||||
|
||||
// Create a FakeTimeProvider for deterministic replay
|
||||
var fakeTimeProvider = new FakeTimeProvider();
|
||||
|
||||
// Compute original digest from events
|
||||
var originalDigest = ComputeEventChainDigest(events);
|
||||
|
||||
var processedCount = 0;
|
||||
var replayedPayloads = new List<string>();
|
||||
|
||||
foreach (var evt in events)
|
||||
{
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
UpdateOperation(replayId, op => op with
|
||||
{
|
||||
Status = ReplayStatus.Cancelled,
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if cancelled
|
||||
if (_operations.TryGetValue(replayId, out var current) && current.Status == ReplayStatus.Cancelled)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Simulate replay processing
|
||||
// In production, this would re-execute the logic that produced each event
|
||||
fakeTimeProvider.SetUtcNow(evt.TsWall);
|
||||
|
||||
// For dry-run mode, we just verify we can process all events
|
||||
// For verify mode, we would re-execute and compare outputs
|
||||
replayedPayloads.Add(evt.Payload);
|
||||
|
||||
processedCount++;
|
||||
|
||||
// Update progress
|
||||
var progress = (double)processedCount / events.Count;
|
||||
UpdateOperation(replayId, op => op with
|
||||
{
|
||||
Progress = progress,
|
||||
EventsProcessed = processedCount
|
||||
});
|
||||
|
||||
// Small delay to simulate processing (remove in production)
|
||||
await Task.Delay(1, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// Compute replayed digest
|
||||
var replayDigest = ComputePayloadChainDigest(replayedPayloads);
|
||||
var deterministicMatch = originalDigest == replayDigest;
|
||||
|
||||
var endTime = _timeProvider.GetUtcNow();
|
||||
var duration = (endTime - startTime).TotalSeconds;
|
||||
|
||||
// Update final status
|
||||
UpdateOperation(replayId, op => op with
|
||||
{
|
||||
Status = ReplayStatus.Completed,
|
||||
Progress = 1.0,
|
||||
EventsProcessed = events.Count,
|
||||
CompletedAt = endTime,
|
||||
OriginalDigest = originalDigest,
|
||||
ReplayDigest = replayDigest,
|
||||
DeterministicMatch = deterministicMatch
|
||||
});
|
||||
|
||||
_metrics.RecordReplay(
|
||||
request.Mode,
|
||||
deterministicMatch ? "SUCCESS" : "MISMATCH",
|
||||
events.Count,
|
||||
duration);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Completed replay {ReplayId}: {EventCount} events, deterministic={Match}, duration={Duration}s",
|
||||
replayId,
|
||||
events.Count,
|
||||
deterministicMatch,
|
||||
duration);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Replay {ReplayId} failed", replayId);
|
||||
|
||||
UpdateOperation(replayId, op => op with
|
||||
{
|
||||
Status = ReplayStatus.Failed,
|
||||
CompletedAt = _timeProvider.GetUtcNow(),
|
||||
Error = ex.Message
|
||||
});
|
||||
|
||||
_metrics.RecordReplay(request.Mode, "FAILED", events.Count, 0);
|
||||
}
|
||||
}
|
||||
|
||||
private void UpdateOperation(string replayId, Func<ReplayOperation, ReplayOperation> update)
|
||||
{
|
||||
if (_operations.TryGetValue(replayId, out var current))
|
||||
{
|
||||
_operations[replayId] = update(current);
|
||||
}
|
||||
}
|
||||
|
||||
private static string GenerateReplayId()
|
||||
{
|
||||
return Guid.NewGuid().ToString("N")[..16];
|
||||
}
|
||||
|
||||
private static string ComputeEventChainDigest(IReadOnlyList<TimelineEvent> events)
|
||||
{
|
||||
using var hasher = IncrementalHash.CreateHash(HashAlgorithmName.SHA256);
|
||||
|
||||
foreach (var evt in events)
|
||||
{
|
||||
hasher.AppendData(evt.PayloadDigest);
|
||||
}
|
||||
|
||||
var hash = hasher.GetHashAndReset();
|
||||
return Convert.ToHexString(hash).ToLowerInvariant();
|
||||
}
|
||||
|
||||
private static string ComputePayloadChainDigest(IReadOnlyList<string> payloads)
|
||||
{
|
||||
using var hasher = IncrementalHash.CreateHash(HashAlgorithmName.SHA256);
|
||||
|
||||
foreach (var payload in payloads)
|
||||
{
|
||||
hasher.AppendData(Encoding.UTF8.GetBytes(payload));
|
||||
}
|
||||
|
||||
var hash = hasher.GetHashAndReset();
|
||||
return Convert.ToHexString(hash).ToLowerInvariant();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
|
||||
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
using StellaOps.Timeline.Core.Export;
|
||||
using StellaOps.Timeline.Core.Replay;
|
||||
using StellaOps.Timeline.Core.Telemetry;
|
||||
|
||||
namespace StellaOps.Timeline.Core;
|
||||
|
||||
/// <summary>
|
||||
/// Extension methods for registering timeline services.
|
||||
/// </summary>
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Adds timeline query and replay services.
|
||||
/// </summary>
|
||||
public static IServiceCollection AddTimelineServices(
|
||||
this IServiceCollection services,
|
||||
IConfiguration configuration)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
ArgumentNullException.ThrowIfNull(configuration);
|
||||
|
||||
// Register metrics (singleton for consistent counters)
|
||||
services.TryAddSingleton<TimelineMetrics>();
|
||||
|
||||
// Register query service
|
||||
services.TryAddScoped<ITimelineQueryService, TimelineQueryService>();
|
||||
|
||||
// Register replay orchestrator
|
||||
services.TryAddScoped<ITimelineReplayOrchestrator, TimelineReplayOrchestrator>();
|
||||
|
||||
// Register export bundle builder
|
||||
services.TryAddScoped<ITimelineBundleBuilder, TimelineBundleBuilder>();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<RootNamespace>StellaOps.Timeline.Core</RootNamespace>
|
||||
<Description>StellaOps Timeline Core - Query and replay services</Description>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Eventing\StellaOps.Eventing.csproj" />
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.HybridLogicalClock\StellaOps.HybridLogicalClock.csproj" />
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Replay.Core\StellaOps.Replay.Core.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
|
||||
<PackageReference Include="Microsoft.Extensions.Options" />
|
||||
<PackageReference Include="Npgsql" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,173 @@
|
||||
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
|
||||
|
||||
using System.Diagnostics;
|
||||
using System.Diagnostics.Metrics;
|
||||
|
||||
namespace StellaOps.Timeline.Core.Telemetry;
|
||||
|
||||
/// <summary>
|
||||
/// Metrics instrumentation for the Timeline service.
|
||||
/// </summary>
|
||||
public sealed class TimelineMetrics : IDisposable
|
||||
{
|
||||
private readonly Meter _meter;
|
||||
private readonly Counter<long> _queriesCounter;
|
||||
private readonly Counter<long> _replaysCounter;
|
||||
private readonly Counter<long> _exportsCounter;
|
||||
private readonly Histogram<double> _queryDurationHistogram;
|
||||
private readonly Histogram<double> _replayDurationHistogram;
|
||||
private readonly Histogram<long> _exportSizeHistogram;
|
||||
private readonly Counter<long> _cacheHitsCounter;
|
||||
private readonly Counter<long> _cacheMissesCounter;
|
||||
|
||||
/// <summary>
|
||||
/// Activity source for tracing.
|
||||
/// </summary>
|
||||
public static readonly ActivitySource ActivitySource = new("StellaOps.Timeline", "1.0.0");
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="TimelineMetrics"/> class.
|
||||
/// </summary>
|
||||
public TimelineMetrics()
|
||||
{
|
||||
_meter = new Meter("StellaOps.Timeline", "1.0.0");
|
||||
|
||||
_queriesCounter = _meter.CreateCounter<long>(
|
||||
"stellaops_timeline_queries_total",
|
||||
description: "Total number of timeline queries");
|
||||
|
||||
_replaysCounter = _meter.CreateCounter<long>(
|
||||
"stellaops_timeline_replays_total",
|
||||
description: "Total number of replay operations");
|
||||
|
||||
_exportsCounter = _meter.CreateCounter<long>(
|
||||
"stellaops_timeline_exports_total",
|
||||
description: "Total number of export operations");
|
||||
|
||||
_queryDurationHistogram = _meter.CreateHistogram<double>(
|
||||
"stellaops_timeline_query_duration_seconds",
|
||||
unit: "s",
|
||||
description: "Duration of timeline query operations");
|
||||
|
||||
_replayDurationHistogram = _meter.CreateHistogram<double>(
|
||||
"stellaops_timeline_replay_duration_seconds",
|
||||
unit: "s",
|
||||
description: "Duration of replay operations");
|
||||
|
||||
_exportSizeHistogram = _meter.CreateHistogram<long>(
|
||||
"stellaops_timeline_export_size_bytes",
|
||||
unit: "By",
|
||||
description: "Size of exported timeline bundles");
|
||||
|
||||
_cacheHitsCounter = _meter.CreateCounter<long>(
|
||||
"stellaops_timeline_cache_hits_total",
|
||||
description: "Total number of cache hits");
|
||||
|
||||
_cacheMissesCounter = _meter.CreateCounter<long>(
|
||||
"stellaops_timeline_cache_misses_total",
|
||||
description: "Total number of cache misses");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records a timeline query.
|
||||
/// </summary>
|
||||
public void RecordQuery(string queryType, int eventCount, double durationSeconds)
|
||||
{
|
||||
_queriesCounter.Add(1,
|
||||
new KeyValuePair<string, object?>("query_type", queryType));
|
||||
_queryDurationHistogram.Record(durationSeconds,
|
||||
new KeyValuePair<string, object?>("query_type", queryType),
|
||||
new KeyValuePair<string, object?>("event_count_bucket", GetCountBucket(eventCount)));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records a replay operation.
|
||||
/// </summary>
|
||||
public void RecordReplay(string mode, string status, int eventCount, double durationSeconds)
|
||||
{
|
||||
_replaysCounter.Add(1,
|
||||
new KeyValuePair<string, object?>("mode", mode),
|
||||
new KeyValuePair<string, object?>("status", status));
|
||||
_replayDurationHistogram.Record(durationSeconds,
|
||||
new KeyValuePair<string, object?>("mode", mode),
|
||||
new KeyValuePair<string, object?>("event_count_bucket", GetCountBucket(eventCount)));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records an export operation.
|
||||
/// </summary>
|
||||
public void RecordExport(string format, bool signed, long sizeBytes, int eventCount)
|
||||
{
|
||||
_exportsCounter.Add(1,
|
||||
new KeyValuePair<string, object?>("format", format),
|
||||
new KeyValuePair<string, object?>("signed", signed));
|
||||
_exportSizeHistogram.Record(sizeBytes,
|
||||
new KeyValuePair<string, object?>("format", format),
|
||||
new KeyValuePair<string, object?>("event_count_bucket", GetCountBucket(eventCount)));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records a cache hit.
|
||||
/// </summary>
|
||||
public void RecordCacheHit(string cacheType)
|
||||
{
|
||||
_cacheHitsCounter.Add(1,
|
||||
new KeyValuePair<string, object?>("cache_type", cacheType));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records a cache miss.
|
||||
/// </summary>
|
||||
public void RecordCacheMiss(string cacheType)
|
||||
{
|
||||
_cacheMissesCounter.Add(1,
|
||||
new KeyValuePair<string, object?>("cache_type", cacheType));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starts a query activity for tracing.
|
||||
/// </summary>
|
||||
public Activity? StartQueryActivity(string correlationId, string queryType)
|
||||
{
|
||||
return ActivitySource.StartActivity(
|
||||
"timeline.query",
|
||||
ActivityKind.Server,
|
||||
parentContext: default,
|
||||
tags: new[]
|
||||
{
|
||||
new KeyValuePair<string, object?>("correlation_id", correlationId),
|
||||
new KeyValuePair<string, object?>("query_type", queryType)
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starts a replay activity for tracing.
|
||||
/// </summary>
|
||||
public Activity? StartReplayActivity(string correlationId, string mode)
|
||||
{
|
||||
return ActivitySource.StartActivity(
|
||||
"timeline.replay",
|
||||
ActivityKind.Server,
|
||||
parentContext: default,
|
||||
tags: new[]
|
||||
{
|
||||
new KeyValuePair<string, object?>("correlation_id", correlationId),
|
||||
new KeyValuePair<string, object?>("mode", mode)
|
||||
});
|
||||
}
|
||||
|
||||
private static string GetCountBucket(int count) => count switch
|
||||
{
|
||||
<= 10 => "1-10",
|
||||
<= 100 => "11-100",
|
||||
<= 1000 => "101-1000",
|
||||
<= 10000 => "1001-10000",
|
||||
_ => "10000+"
|
||||
};
|
||||
|
||||
/// <inheritdoc/>
|
||||
public void Dispose()
|
||||
{
|
||||
_meter.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,192 @@
|
||||
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
|
||||
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Eventing.Models;
|
||||
using StellaOps.Eventing.Storage;
|
||||
using StellaOps.HybridLogicalClock;
|
||||
|
||||
namespace StellaOps.Timeline.Core;
|
||||
|
||||
/// <summary>
|
||||
/// Implementation of <see cref="ITimelineQueryService"/>.
|
||||
/// </summary>
|
||||
public sealed class TimelineQueryService : ITimelineQueryService
|
||||
{
|
||||
private readonly ITimelineEventStore _eventStore;
|
||||
private readonly ILogger<TimelineQueryService> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="TimelineQueryService"/> class.
|
||||
/// </summary>
|
||||
public TimelineQueryService(
|
||||
ITimelineEventStore eventStore,
|
||||
ILogger<TimelineQueryService> logger)
|
||||
{
|
||||
_eventStore = eventStore ?? throw new ArgumentNullException(nameof(eventStore));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<TimelineQueryResult> GetByCorrelationIdAsync(
|
||||
string correlationId,
|
||||
TimelineQueryOptions? options = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(correlationId);
|
||||
|
||||
options ??= new TimelineQueryOptions();
|
||||
|
||||
IReadOnlyList<TimelineEvent> events;
|
||||
|
||||
if (options.FromHlc.HasValue && options.ToHlc.HasValue)
|
||||
{
|
||||
events = await _eventStore.GetByHlcRangeAsync(
|
||||
correlationId,
|
||||
options.FromHlc.Value,
|
||||
options.ToHlc.Value,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
events = await _eventStore.GetByCorrelationIdAsync(
|
||||
correlationId,
|
||||
options.Limit + 1, // Fetch one extra to check for more
|
||||
options.Offset,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// Apply additional filters
|
||||
var filteredEvents = ApplyFilters(events, options);
|
||||
|
||||
// Check if there are more results
|
||||
var hasMore = filteredEvents.Count > options.Limit;
|
||||
if (hasMore)
|
||||
{
|
||||
filteredEvents = filteredEvents.Take(options.Limit).ToList();
|
||||
}
|
||||
|
||||
var totalCount = await _eventStore.CountByCorrelationIdAsync(correlationId, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Queried {Count} events for correlation {CorrelationId}",
|
||||
filteredEvents.Count,
|
||||
correlationId);
|
||||
|
||||
return new TimelineQueryResult
|
||||
{
|
||||
Events = filteredEvents,
|
||||
TotalCount = totalCount,
|
||||
HasMore = hasMore,
|
||||
NextCursor = hasMore && filteredEvents.Count > 0
|
||||
? filteredEvents[^1].THlc.ToSortableString()
|
||||
: null
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<CriticalPathResult> GetCriticalPathAsync(
|
||||
string correlationId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(correlationId);
|
||||
|
||||
var events = await _eventStore.GetByCorrelationIdAsync(
|
||||
correlationId,
|
||||
limit: 10000, // Get all events for critical path analysis
|
||||
offset: 0,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (events.Count < 2)
|
||||
{
|
||||
return new CriticalPathResult
|
||||
{
|
||||
CorrelationId = correlationId,
|
||||
TotalDuration = TimeSpan.Zero,
|
||||
Stages = Array.Empty<CriticalPathStage>()
|
||||
};
|
||||
}
|
||||
|
||||
var stages = new List<CriticalPathStage>();
|
||||
var totalDuration = events[^1].TsWall - events[0].TsWall;
|
||||
|
||||
for (int i = 1; i < events.Count; i++)
|
||||
{
|
||||
var prev = events[i - 1];
|
||||
var curr = events[i];
|
||||
var stageDuration = curr.TsWall - prev.TsWall;
|
||||
|
||||
stages.Add(new CriticalPathStage
|
||||
{
|
||||
Stage = $"{prev.Kind} -> {curr.Kind}",
|
||||
Service = curr.Service,
|
||||
Duration = stageDuration,
|
||||
Percentage = totalDuration.TotalMilliseconds > 0
|
||||
? stageDuration.TotalMilliseconds / totalDuration.TotalMilliseconds * 100
|
||||
: 0,
|
||||
FromHlc = prev.THlc,
|
||||
ToHlc = curr.THlc
|
||||
});
|
||||
}
|
||||
|
||||
// Sort by duration descending (critical path = longest stages first)
|
||||
stages = stages.OrderByDescending(s => s.Duration).ToList();
|
||||
|
||||
return new CriticalPathResult
|
||||
{
|
||||
CorrelationId = correlationId,
|
||||
TotalDuration = totalDuration,
|
||||
Stages = stages
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<TimelineQueryResult> GetByServiceAsync(
|
||||
string service,
|
||||
HlcTimestamp? fromHlc = null,
|
||||
int limit = 100,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(service);
|
||||
|
||||
var events = await _eventStore.GetByServiceAsync(
|
||||
service,
|
||||
fromHlc,
|
||||
limit + 1,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var hasMore = events.Count > limit;
|
||||
var resultEvents = hasMore ? events.Take(limit).ToList() : events;
|
||||
|
||||
return new TimelineQueryResult
|
||||
{
|
||||
Events = resultEvents,
|
||||
TotalCount = resultEvents.Count,
|
||||
HasMore = hasMore,
|
||||
NextCursor = hasMore && resultEvents.Count > 0
|
||||
? resultEvents[^1].THlc.ToSortableString()
|
||||
: null
|
||||
};
|
||||
}
|
||||
|
||||
private static List<TimelineEvent> ApplyFilters(
|
||||
IReadOnlyList<TimelineEvent> events,
|
||||
TimelineQueryOptions options)
|
||||
{
|
||||
var query = events.AsEnumerable();
|
||||
|
||||
if (options.Services is { Count: > 0 })
|
||||
{
|
||||
var services = new HashSet<string>(options.Services, StringComparer.OrdinalIgnoreCase);
|
||||
query = query.Where(e => services.Contains(e.Service));
|
||||
}
|
||||
|
||||
if (options.Kinds is { Count: > 0 })
|
||||
{
|
||||
var kinds = new HashSet<string>(options.Kinds, StringComparer.OrdinalIgnoreCase);
|
||||
query = query.Where(e => kinds.Contains(e.Kind));
|
||||
}
|
||||
|
||||
return query.ToList();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user