400 lines
12 KiB
C#
400 lines
12 KiB
C#
// -----------------------------------------------------------------------------
|
|
// ReplayTelemetry.cs
|
|
// Sprint: SPRINT_1227_0005_0004_BE_verdict_replay
|
|
// Task: T10 — Telemetry for replay outcomes
|
|
// -----------------------------------------------------------------------------
|
|
|
|
using System.Diagnostics;
|
|
using System.Diagnostics.Metrics;
|
|
using Microsoft.Extensions.DependencyInjection;
|
|
using Microsoft.Extensions.DependencyInjection.Extensions;
|
|
|
|
namespace StellaOps.AuditPack.Services;
|
|
|
|
/// <summary>
|
|
/// OpenTelemetry instrumentation for verdict replay operations.
|
|
/// Provides metrics, traces, and structured logging support.
|
|
/// </summary>
|
|
public sealed class ReplayTelemetry : IDisposable
|
|
{
|
|
/// <summary>
|
|
/// Service name for telemetry identification.
|
|
/// </summary>
|
|
public const string ServiceName = "StellaOps.Replay";
|
|
|
|
/// <summary>
|
|
/// Meter name for replay metrics.
|
|
/// </summary>
|
|
public const string MeterName = "StellaOps.Replay";
|
|
|
|
/// <summary>
|
|
/// Activity source name for replay tracing.
|
|
/// </summary>
|
|
public const string ActivitySourceName = "StellaOps.Replay";
|
|
|
|
private readonly Meter _meter;
|
|
|
|
// Counters
|
|
private readonly Counter<long> _replayExecutionsTotal;
|
|
private readonly Counter<long> _replayMatchesTotal;
|
|
private readonly Counter<long> _replayDivergencesTotal;
|
|
private readonly Counter<long> _replayErrorsTotal;
|
|
private readonly Counter<long> _attestationsGeneratedTotal;
|
|
private readonly Counter<long> _attestationsVerifiedTotal;
|
|
private readonly Counter<long> _eligibilityChecksTotal;
|
|
|
|
// Histograms
|
|
private readonly Histogram<double> _replayDurationMs;
|
|
private readonly Histogram<double> _attestationGenerationDurationMs;
|
|
private readonly Histogram<int> _driftCount;
|
|
private readonly Histogram<double> _confidenceScore;
|
|
|
|
// Gauges
|
|
private readonly UpDownCounter<long> _replaysInProgress;
|
|
|
|
/// <summary>
|
|
/// Activity source for distributed tracing.
|
|
/// </summary>
|
|
public static readonly ActivitySource ActivitySource = new(ActivitySourceName);
|
|
|
|
/// <summary>
|
|
/// Initializes a new instance of the ReplayTelemetry class.
|
|
/// </summary>
|
|
public ReplayTelemetry(IMeterFactory? meterFactory = null)
|
|
{
|
|
_meter = meterFactory?.Create(MeterName) ?? new Meter(MeterName);
|
|
|
|
// Counters
|
|
_replayExecutionsTotal = _meter.CreateCounter<long>(
|
|
"stellaops.replay.executions.total",
|
|
unit: "{execution}",
|
|
description: "Total number of replay executions");
|
|
|
|
_replayMatchesTotal = _meter.CreateCounter<long>(
|
|
"stellaops.replay.matches.total",
|
|
unit: "{match}",
|
|
description: "Total number of replay matches (verdict unchanged)");
|
|
|
|
_replayDivergencesTotal = _meter.CreateCounter<long>(
|
|
"stellaops.replay.divergences.total",
|
|
unit: "{divergence}",
|
|
description: "Total number of replay divergences detected");
|
|
|
|
_replayErrorsTotal = _meter.CreateCounter<long>(
|
|
"stellaops.replay.errors.total",
|
|
unit: "{error}",
|
|
description: "Total number of replay errors");
|
|
|
|
_attestationsGeneratedTotal = _meter.CreateCounter<long>(
|
|
"stellaops.replay.attestations.generated.total",
|
|
unit: "{attestation}",
|
|
description: "Total number of replay attestations generated");
|
|
|
|
_attestationsVerifiedTotal = _meter.CreateCounter<long>(
|
|
"stellaops.replay.attestations.verified.total",
|
|
unit: "{verification}",
|
|
description: "Total number of replay attestations verified");
|
|
|
|
_eligibilityChecksTotal = _meter.CreateCounter<long>(
|
|
"stellaops.replay.eligibility.checks.total",
|
|
unit: "{check}",
|
|
description: "Total number of replay eligibility checks");
|
|
|
|
// Histograms
|
|
_replayDurationMs = _meter.CreateHistogram<double>(
|
|
"stellaops.replay.duration.ms",
|
|
unit: "ms",
|
|
description: "Replay execution duration in milliseconds");
|
|
|
|
_attestationGenerationDurationMs = _meter.CreateHistogram<double>(
|
|
"stellaops.replay.attestation.generation.duration.ms",
|
|
unit: "ms",
|
|
description: "Attestation generation duration in milliseconds");
|
|
|
|
_driftCount = _meter.CreateHistogram<int>(
|
|
"stellaops.replay.drift.count",
|
|
unit: "{drift}",
|
|
description: "Number of drifts detected per replay");
|
|
|
|
_confidenceScore = _meter.CreateHistogram<double>(
|
|
"stellaops.replay.eligibility.confidence",
|
|
unit: "1",
|
|
description: "Replay eligibility confidence score distribution");
|
|
|
|
// Gauges
|
|
_replaysInProgress = _meter.CreateUpDownCounter<long>(
|
|
"stellaops.replay.in_progress",
|
|
unit: "{replay}",
|
|
description: "Number of replays currently in progress");
|
|
}
|
|
|
|
#region Replay Execution Metrics
|
|
|
|
/// <summary>
|
|
/// Records the start of a replay execution.
|
|
/// </summary>
|
|
public void RecordReplayStarted(string manifestId, string scanId)
|
|
{
|
|
_replaysInProgress.Add(1, new TagList
|
|
{
|
|
{ ReplayTelemetryTags.ManifestId, manifestId },
|
|
{ ReplayTelemetryTags.ScanId, scanId }
|
|
});
|
|
}
|
|
|
|
/// <summary>
|
|
/// Records the completion of a replay execution.
|
|
/// </summary>
|
|
public void RecordReplayCompleted(
|
|
string manifestId,
|
|
string scanId,
|
|
ReplayOutcome outcome,
|
|
int driftCount,
|
|
TimeSpan duration)
|
|
{
|
|
var tags = new TagList
|
|
{
|
|
{ ReplayTelemetryTags.ManifestId, manifestId },
|
|
{ ReplayTelemetryTags.ScanId, scanId },
|
|
{ ReplayTelemetryTags.Outcome, outcome.ToString().ToLowerInvariant() }
|
|
};
|
|
|
|
_replaysInProgress.Add(-1, new TagList
|
|
{
|
|
{ ReplayTelemetryTags.ManifestId, manifestId },
|
|
{ ReplayTelemetryTags.ScanId, scanId }
|
|
});
|
|
|
|
_replayExecutionsTotal.Add(1, tags);
|
|
_replayDurationMs.Record(duration.TotalMilliseconds, tags);
|
|
|
|
switch (outcome)
|
|
{
|
|
case ReplayOutcome.Match:
|
|
_replayMatchesTotal.Add(1, tags);
|
|
break;
|
|
case ReplayOutcome.Divergence:
|
|
_replayDivergencesTotal.Add(1, tags);
|
|
_driftCount.Record(driftCount, tags);
|
|
break;
|
|
case ReplayOutcome.Error:
|
|
_replayErrorsTotal.Add(1, tags);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Records a replay error.
|
|
/// </summary>
|
|
public void RecordReplayError(
|
|
string manifestId,
|
|
string scanId,
|
|
string errorCode)
|
|
{
|
|
var tags = new TagList
|
|
{
|
|
{ ReplayTelemetryTags.ManifestId, manifestId },
|
|
{ ReplayTelemetryTags.ScanId, scanId },
|
|
{ ReplayTelemetryTags.ErrorCode, errorCode }
|
|
};
|
|
|
|
_replaysInProgress.Add(-1, new TagList
|
|
{
|
|
{ ReplayTelemetryTags.ManifestId, manifestId },
|
|
{ ReplayTelemetryTags.ScanId, scanId }
|
|
});
|
|
|
|
_replayErrorsTotal.Add(1, tags);
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region Attestation Metrics
|
|
|
|
/// <summary>
|
|
/// Records attestation generation.
|
|
/// </summary>
|
|
public void RecordAttestationGenerated(
|
|
string manifestId,
|
|
bool match,
|
|
TimeSpan duration)
|
|
{
|
|
var tags = new TagList
|
|
{
|
|
{ ReplayTelemetryTags.ManifestId, manifestId },
|
|
{ ReplayTelemetryTags.Match, match.ToString().ToLowerInvariant() }
|
|
};
|
|
|
|
_attestationsGeneratedTotal.Add(1, tags);
|
|
_attestationGenerationDurationMs.Record(duration.TotalMilliseconds, tags);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Records attestation verification.
|
|
/// </summary>
|
|
public void RecordAttestationVerified(
|
|
string attestationId,
|
|
bool valid)
|
|
{
|
|
var tags = new TagList
|
|
{
|
|
{ ReplayTelemetryTags.AttestationId, attestationId },
|
|
{ ReplayTelemetryTags.Valid, valid.ToString().ToLowerInvariant() }
|
|
};
|
|
|
|
_attestationsVerifiedTotal.Add(1, tags);
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region Eligibility Metrics
|
|
|
|
/// <summary>
|
|
/// Records an eligibility check.
|
|
/// </summary>
|
|
public void RecordEligibilityCheck(
|
|
string manifestId,
|
|
bool eligible,
|
|
double confidenceScore)
|
|
{
|
|
var tags = new TagList
|
|
{
|
|
{ ReplayTelemetryTags.ManifestId, manifestId },
|
|
{ ReplayTelemetryTags.Eligible, eligible.ToString().ToLowerInvariant() }
|
|
};
|
|
|
|
_eligibilityChecksTotal.Add(1, tags);
|
|
_confidenceScore.Record(confidenceScore, tags);
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region Activity Helpers
|
|
|
|
/// <summary>
|
|
/// Starts an activity for replay execution.
|
|
/// </summary>
|
|
public static Activity? StartReplayActivity(string manifestId, string scanId)
|
|
{
|
|
var activity = ActivitySource.StartActivity("Replay.Execute");
|
|
activity?.SetTag(ReplayTelemetryTags.ManifestId, manifestId);
|
|
activity?.SetTag(ReplayTelemetryTags.ScanId, scanId);
|
|
return activity;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Starts an activity for attestation generation.
|
|
/// </summary>
|
|
public static Activity? StartAttestationActivity(string manifestId)
|
|
{
|
|
var activity = ActivitySource.StartActivity("Replay.GenerateAttestation");
|
|
activity?.SetTag(ReplayTelemetryTags.ManifestId, manifestId);
|
|
return activity;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Starts an activity for eligibility check.
|
|
/// </summary>
|
|
public static Activity? StartEligibilityActivity(string manifestId)
|
|
{
|
|
var activity = ActivitySource.StartActivity("Replay.CheckEligibility");
|
|
activity?.SetTag(ReplayTelemetryTags.ManifestId, manifestId);
|
|
return activity;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Starts an activity for divergence detection.
|
|
/// </summary>
|
|
public static Activity? StartDivergenceActivity(string manifestId)
|
|
{
|
|
var activity = ActivitySource.StartActivity("Replay.DetectDivergence");
|
|
activity?.SetTag(ReplayTelemetryTags.ManifestId, manifestId);
|
|
return activity;
|
|
}
|
|
|
|
#endregion
|
|
|
|
/// <inheritdoc />
|
|
public void Dispose()
|
|
{
|
|
_meter.Dispose();
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Tag names for replay telemetry.
|
|
/// </summary>
|
|
public static class ReplayTelemetryTags
|
|
{
|
|
public const string ManifestId = "manifest_id";
|
|
public const string ScanId = "scan_id";
|
|
public const string BundleId = "bundle_id";
|
|
public const string AttestationId = "attestation_id";
|
|
public const string Outcome = "outcome";
|
|
public const string Match = "match";
|
|
public const string Valid = "valid";
|
|
public const string Eligible = "eligible";
|
|
public const string ErrorCode = "error_code";
|
|
public const string DivergenceType = "divergence_type";
|
|
public const string DriftType = "drift_type";
|
|
public const string Severity = "severity";
|
|
}
|
|
|
|
/// <summary>
|
|
/// Replay outcome values.
|
|
/// </summary>
|
|
public enum ReplayOutcome
|
|
{
|
|
/// <summary>Verdict matched the original.</summary>
|
|
Match,
|
|
|
|
/// <summary>Divergence detected between original and replayed verdict.</summary>
|
|
Divergence,
|
|
|
|
/// <summary>Replay execution failed with error.</summary>
|
|
Error,
|
|
|
|
/// <summary>Replay was cancelled.</summary>
|
|
Cancelled
|
|
}
|
|
|
|
/// <summary>
|
|
/// Divergence severity levels.
|
|
/// </summary>
|
|
public static class DivergenceSeverities
|
|
{
|
|
public const string Critical = "critical";
|
|
public const string High = "high";
|
|
public const string Medium = "medium";
|
|
public const string Low = "low";
|
|
public const string Info = "info";
|
|
}
|
|
|
|
/// <summary>
|
|
/// Divergence type values.
|
|
/// </summary>
|
|
public static class DivergenceTypes
|
|
{
|
|
public const string VerdictDigest = "verdict_digest";
|
|
public const string Decision = "decision";
|
|
public const string Confidence = "confidence";
|
|
public const string Input = "input";
|
|
public const string Policy = "policy";
|
|
public const string Evidence = "evidence";
|
|
}
|
|
|
|
/// <summary>
|
|
/// Extension methods for adding replay telemetry.
|
|
/// </summary>
|
|
public static class ReplayTelemetryExtensions
|
|
{
|
|
/// <summary>
|
|
/// Adds replay OpenTelemetry instrumentation.
|
|
/// </summary>
|
|
public static IServiceCollection AddReplayTelemetry(this IServiceCollection services)
|
|
{
|
|
services.TryAddSingleton<ReplayTelemetry>();
|
|
return services;
|
|
}
|
|
}
|