This commit is contained in:
StellaOps Bot
2025-12-13 02:22:15 +02:00
parent 564df71bfb
commit 999e26a48e
395 changed files with 25045 additions and 2224 deletions

View File

@@ -85,6 +85,30 @@ public static class TelemetryServiceCollectionExtensions
return services;
}
/// <summary>
/// Registers Time-to-Evidence (TTE) metrics for measuring triage workflow performance.
/// </summary>
/// <param name="services">Service collection to mutate.</param>
/// <param name="configureOptions">Optional options configuration including SLO targets.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddTimeToEvidenceMetrics(
this IServiceCollection services,
Action<TimeToEvidenceOptions>? configureOptions = null)
{
ArgumentNullException.ThrowIfNull(services);
services.AddOptions<TimeToEvidenceOptions>()
.Configure(options => configureOptions?.Invoke(options));
services.TryAddSingleton(sp =>
{
var options = sp.GetRequiredService<IOptions<TimeToEvidenceOptions>>().Value;
return new TimeToEvidenceMetrics(options);
});
return services;
}
/// <summary>
/// Registers incident mode services for toggling enhanced telemetry during incidents.
/// </summary>

View File

@@ -0,0 +1,378 @@
using System.Diagnostics;
using System.Diagnostics.Metrics;
namespace StellaOps.Telemetry.Core;
/// <summary>
/// Time-to-Evidence (TTE) metrics for measuring the speed and reliability
/// of the evidence chain in vulnerability triage workflows.
/// </summary>
public sealed class TimeToEvidenceMetrics : IDisposable
{
/// <summary>
/// Default meter name for TTE metrics.
/// </summary>
public const string MeterName = "StellaOps.TimeToEvidence";
private readonly Meter _meter;
private readonly TimeToEvidenceOptions _options;
private bool _disposed;
private readonly Histogram<double> _phaseLatencyHistogram;
private readonly Counter<long> _phaseCompletedCounter;
private readonly Counter<long> _phaseFailedCounter;
private readonly Counter<long> _sloBreachCounter;
private readonly Counter<long> _evidenceAttachedCounter;
private readonly Counter<long> _decisionMadeCounter;
/// <summary>
/// Initializes a new instance of <see cref="TimeToEvidenceMetrics"/>.
/// </summary>
public TimeToEvidenceMetrics(TimeToEvidenceOptions? options = null)
{
_options = options ?? new TimeToEvidenceOptions();
_meter = new Meter(MeterName, _options.Version);
_phaseLatencyHistogram = _meter.CreateHistogram<double>(
name: "tte_phase_latency_seconds",
unit: "s",
description: "Latency of TTE phases in seconds.");
_phaseCompletedCounter = _meter.CreateCounter<long>(
name: "tte_phase_completed_total",
unit: "{phase}",
description: "Total number of completed TTE phases.");
_phaseFailedCounter = _meter.CreateCounter<long>(
name: "tte_phase_failed_total",
unit: "{phase}",
description: "Total number of failed TTE phases.");
_sloBreachCounter = _meter.CreateCounter<long>(
name: "tte_slo_breach_total",
unit: "{breach}",
description: "Total number of SLO breaches.");
_evidenceAttachedCounter = _meter.CreateCounter<long>(
name: "tte_evidence_attached_total",
unit: "{evidence}",
description: "Total number of evidence items attached.");
_decisionMadeCounter = _meter.CreateCounter<long>(
name: "tte_decision_made_total",
unit: "{decision}",
description: "Total number of VEX decisions made.");
}
/// <summary>
/// Records a phase completion with latency.
/// </summary>
public void RecordPhaseCompleted(TtePhase phase, double latencySeconds, string? tenantId = null, string? surface = null)
{
var tags = CreatePhaseTags(phase, tenantId, surface);
_phaseLatencyHistogram.Record(latencySeconds, tags);
_phaseCompletedCounter.Add(1, tags);
// Check for SLO breach
var sloTargetSeconds = GetSloTargetSeconds(phase);
if (sloTargetSeconds.HasValue && latencySeconds > sloTargetSeconds.Value)
{
_sloBreachCounter.Add(1, tags);
}
}
/// <summary>
/// Records a phase failure.
/// </summary>
public void RecordPhaseFailed(TtePhase phase, string? errorCode = null, string? tenantId = null, string? surface = null)
{
var tags = CreatePhaseTags(phase, tenantId, surface, errorCode);
_phaseFailedCounter.Add(1, tags);
}
/// <summary>
/// Records evidence attachment.
/// </summary>
public void RecordEvidenceAttached(TteEvidenceType evidenceType, int count = 1, string? tenantId = null)
{
var tags = new TagList
{
{ "evidence_type", evidenceType.ToString().ToLowerInvariant() }
};
if (!string.IsNullOrEmpty(tenantId)) tags.Add("tenant_id", tenantId);
_evidenceAttachedCounter.Add(count, tags);
}
/// <summary>
/// Records a VEX decision.
/// </summary>
public void RecordDecisionMade(TteDecisionStatus status, string? tenantId = null, bool isAutomated = false)
{
var tags = new TagList
{
{ "decision_status", status.ToString().ToLowerInvariant() },
{ "is_automated", isAutomated }
};
if (!string.IsNullOrEmpty(tenantId)) tags.Add("tenant_id", tenantId);
_decisionMadeCounter.Add(1, tags);
}
/// <summary>
/// Records an SLO breach directly.
/// </summary>
public void RecordSloBreachDirect(TtePhase phase, double actualSeconds, double targetSeconds, string? tenantId = null)
{
var tags = CreatePhaseTags(phase, tenantId, null);
tags.Add("actual_seconds", actualSeconds);
tags.Add("target_seconds", targetSeconds);
_sloBreachCounter.Add(1, tags);
}
/// <summary>
/// Starts a measurement scope for a TTE phase.
/// </summary>
public TtePhaseScope MeasurePhase(TtePhase phase, string? tenantId = null, string? surface = null)
{
return new TtePhaseScope(this, phase, tenantId, surface);
}
private TagList CreatePhaseTags(TtePhase phase, string? tenantId, string? surface, string? errorCode = null)
{
var tags = new TagList
{
{ "phase", phase.ToString().ToLowerInvariant() }
};
if (!string.IsNullOrEmpty(tenantId)) tags.Add("tenant_id", tenantId);
if (!string.IsNullOrEmpty(surface)) tags.Add("surface", surface);
if (!string.IsNullOrEmpty(errorCode)) tags.Add("error_code", errorCode);
return tags;
}
private double? GetSloTargetSeconds(TtePhase phase)
{
return phase switch
{
TtePhase.ScanToFinding => _options.SloScanToFindingSeconds,
TtePhase.FindingToEvidence => _options.SloFindingToEvidenceSeconds,
TtePhase.EvidenceToDecision => _options.SloEvidenceToDecisionSeconds,
TtePhase.DecisionToAttestation => _options.SloDecisionToAttestationSeconds,
TtePhase.AttestationToVerification => _options.SloAttestationToVerificationSeconds,
TtePhase.VerificationToPolicy => _options.SloVerificationToPolicySeconds,
TtePhase.EndToEnd => _options.SloEndToEndSeconds,
_ => null
};
}
/// <inheritdoc/>
public void Dispose()
{
if (_disposed) return;
_disposed = true;
_meter.Dispose();
}
/// <summary>
/// Measurement scope for TTE phases.
/// </summary>
public sealed class TtePhaseScope : IDisposable
{
private readonly TimeToEvidenceMetrics _metrics;
private readonly TtePhase _phase;
private readonly string? _tenantId;
private readonly string? _surface;
private readonly Stopwatch _stopwatch;
private bool _completed;
private string? _errorCode;
internal TtePhaseScope(TimeToEvidenceMetrics metrics, TtePhase phase, string? tenantId, string? surface)
{
_metrics = metrics;
_phase = phase;
_tenantId = tenantId;
_surface = surface;
_stopwatch = Stopwatch.StartNew();
}
/// <summary>
/// Marks the phase as failed with an optional error code.
/// </summary>
public void Fail(string? errorCode = null)
{
_errorCode = errorCode;
_completed = false;
}
/// <summary>
/// Marks the phase as successfully completed.
/// </summary>
public void Complete()
{
_completed = true;
}
/// <inheritdoc/>
public void Dispose()
{
_stopwatch.Stop();
if (_completed)
{
_metrics.RecordPhaseCompleted(_phase, _stopwatch.Elapsed.TotalSeconds, _tenantId, _surface);
}
else
{
_metrics.RecordPhaseFailed(_phase, _errorCode, _tenantId, _surface);
}
}
}
}
/// <summary>
/// Options for TTE metrics including SLO targets.
/// </summary>
public sealed class TimeToEvidenceOptions
{
/// <summary>
/// Version string for the meter.
/// </summary>
public string Version { get; set; } = "1.0.0";
/// <summary>
/// SLO target in seconds for scan-to-finding phase. Default: 30 seconds.
/// </summary>
public double? SloScanToFindingSeconds { get; set; } = 30;
/// <summary>
/// SLO target in seconds for finding-to-evidence phase. Default: 5 seconds.
/// </summary>
public double? SloFindingToEvidenceSeconds { get; set; } = 5;
/// <summary>
/// SLO target in seconds for evidence-to-decision phase. Default: 10 seconds.
/// </summary>
public double? SloEvidenceToDecisionSeconds { get; set; } = 10;
/// <summary>
/// SLO target in seconds for decision-to-attestation phase. Default: 5 seconds.
/// </summary>
public double? SloDecisionToAttestationSeconds { get; set; } = 5;
/// <summary>
/// SLO target in seconds for attestation-to-verification phase. Default: 3 seconds.
/// </summary>
public double? SloAttestationToVerificationSeconds { get; set; } = 3;
/// <summary>
/// SLO target in seconds for verification-to-policy phase. Default: 2 seconds.
/// </summary>
public double? SloVerificationToPolicySeconds { get; set; } = 2;
/// <summary>
/// SLO target in seconds for end-to-end triage. Default: 60 seconds.
/// </summary>
public double? SloEndToEndSeconds { get; set; } = 60;
}
/// <summary>
/// Phases in the Time-to-Evidence chain.
/// </summary>
public enum TtePhase
{
/// <summary>
/// From scan completion to finding creation.
/// </summary>
ScanToFinding,
/// <summary>
/// From finding creation to evidence attachment.
/// </summary>
FindingToEvidence,
/// <summary>
/// From evidence attachment to VEX decision.
/// </summary>
EvidenceToDecision,
/// <summary>
/// From VEX decision to attestation signing.
/// </summary>
DecisionToAttestation,
/// <summary>
/// From attestation signing to verification.
/// </summary>
AttestationToVerification,
/// <summary>
/// From verification to policy evaluation.
/// </summary>
VerificationToPolicy,
/// <summary>
/// End-to-end triage workflow.
/// </summary>
EndToEnd
}
/// <summary>
/// Types of evidence in the TTE chain.
/// </summary>
public enum TteEvidenceType
{
/// <summary>
/// DSSE/in-toto attestation.
/// </summary>
Attestation,
/// <summary>
/// VEX statement or document.
/// </summary>
Vex,
/// <summary>
/// SBOM (SPDX or CycloneDX).
/// </summary>
Sbom,
/// <summary>
/// Policy evaluation result.
/// </summary>
PolicyEval,
/// <summary>
/// Reachability analysis result.
/// </summary>
Reachability,
/// <summary>
/// Fix pull request.
/// </summary>
FixPr
}
/// <summary>
/// VEX decision statuses for TTE tracking.
/// </summary>
public enum TteDecisionStatus
{
/// <summary>
/// Vulnerability does not affect the product.
/// </summary>
NotAffected,
/// <summary>
/// Vulnerability affects the product.
/// </summary>
Affected,
/// <summary>
/// Vulnerability has been fixed.
/// </summary>
Fixed,
/// <summary>
/// Vulnerability is under investigation.
/// </summary>
UnderInvestigation
}