Merge branch 'main' of https://git.stella-ops.org/stella-ops.org/git.stella-ops.org
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
api-governance / spectral-lint (push) Has been cancelled
oas-ci / oas-validate (push) Has been cancelled
Policy Simulation / policy-simulate (push) Has been cancelled
sdk-generator-smoke / sdk-smoke (push) Has been cancelled
SDK Publish & Sign / sdk-publish (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-11-27 21:45:32 +02:00
510 changed files with 138401 additions and 51276 deletions

View File

@@ -0,0 +1,379 @@
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
namespace StellaOps.Policy.Engine.Telemetry;
/// <summary>
/// Represents an evaluation evidence bundle containing all inputs, outputs,
/// and metadata for a policy evaluation run.
/// </summary>
public sealed class EvidenceBundle
{
/// <summary>
/// Unique identifier for this evidence bundle.
/// </summary>
public required string BundleId { get; init; }
/// <summary>
/// Run identifier this bundle is associated with.
/// </summary>
public required string RunId { get; init; }
/// <summary>
/// Tenant identifier.
/// </summary>
public required string Tenant { get; init; }
/// <summary>
/// Policy identifier.
/// </summary>
public required string PolicyId { get; init; }
/// <summary>
/// Policy version.
/// </summary>
public required string PolicyVersion { get; init; }
/// <summary>
/// Timestamp when the bundle was created.
/// </summary>
public required DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// SHA-256 hash of the bundle contents for integrity verification.
/// </summary>
public string? ContentHash { get; set; }
/// <summary>
/// Determinism hash from the evaluation run.
/// </summary>
public string? DeterminismHash { get; init; }
/// <summary>
/// Input references for the evaluation.
/// </summary>
public required EvidenceInputs Inputs { get; init; }
/// <summary>
/// Output summary from the evaluation.
/// </summary>
public required EvidenceOutputs Outputs { get; init; }
/// <summary>
/// Environment and configuration metadata.
/// </summary>
public required EvidenceEnvironment Environment { get; init; }
/// <summary>
/// Manifest listing all artifacts in the bundle.
/// </summary>
public required EvidenceManifest Manifest { get; init; }
}
/// <summary>
/// References to inputs used in the policy evaluation.
/// </summary>
public sealed class EvidenceInputs
{
/// <summary>
/// SBOM document references with content hashes.
/// </summary>
public List<EvidenceArtifactRef> SbomRefs { get; init; } = new();
/// <summary>
/// Advisory document references from Concelier.
/// </summary>
public List<EvidenceArtifactRef> AdvisoryRefs { get; init; } = new();
/// <summary>
/// VEX document references from Excititor.
/// </summary>
public List<EvidenceArtifactRef> VexRefs { get; init; } = new();
/// <summary>
/// Reachability evidence references.
/// </summary>
public List<EvidenceArtifactRef> ReachabilityRefs { get; init; } = new();
/// <summary>
/// Policy pack IR digest.
/// </summary>
public string? PolicyIrDigest { get; init; }
/// <summary>
/// Cursor positions for incremental evaluation.
/// </summary>
public Dictionary<string, string> Cursors { get; init; } = new();
}
/// <summary>
/// Summary of evaluation outputs.
/// </summary>
public sealed class EvidenceOutputs
{
/// <summary>
/// Total findings evaluated.
/// </summary>
public int TotalFindings { get; init; }
/// <summary>
/// Findings by verdict status.
/// </summary>
public Dictionary<string, int> FindingsByVerdict { get; init; } = new();
/// <summary>
/// Findings by severity.
/// </summary>
public Dictionary<string, int> FindingsBySeverity { get; init; } = new();
/// <summary>
/// Total rules evaluated.
/// </summary>
public int RulesEvaluated { get; init; }
/// <summary>
/// Total rules that fired.
/// </summary>
public int RulesFired { get; init; }
/// <summary>
/// VEX overrides applied.
/// </summary>
public int VexOverridesApplied { get; init; }
/// <summary>
/// Duration of the evaluation in seconds.
/// </summary>
public double DurationSeconds { get; init; }
/// <summary>
/// Outcome of the evaluation (success, failure, canceled).
/// </summary>
public required string Outcome { get; init; }
/// <summary>
/// Error details if outcome is failure.
/// </summary>
public string? ErrorDetails { get; init; }
}
/// <summary>
/// Environment and configuration metadata for the evaluation.
/// </summary>
public sealed class EvidenceEnvironment
{
/// <summary>
/// Policy Engine service version.
/// </summary>
public required string ServiceVersion { get; init; }
/// <summary>
/// Evaluation mode (full, incremental, simulate).
/// </summary>
public required string Mode { get; init; }
/// <summary>
/// Whether sealed/air-gapped mode was active.
/// </summary>
public bool SealedMode { get; init; }
/// <summary>
/// Host machine identifier.
/// </summary>
public string? HostId { get; init; }
/// <summary>
/// Trace ID for correlation.
/// </summary>
public string? TraceId { get; init; }
/// <summary>
/// Configuration snapshot relevant to the evaluation.
/// </summary>
public Dictionary<string, string> ConfigSnapshot { get; init; } = new();
}
/// <summary>
/// Manifest listing all artifacts in the evidence bundle.
/// </summary>
public sealed class EvidenceManifest
{
/// <summary>
/// Version of the manifest schema.
/// </summary>
public string SchemaVersion { get; init; } = "1.0.0";
/// <summary>
/// List of artifacts in the bundle.
/// </summary>
public List<EvidenceArtifact> Artifacts { get; init; } = new();
/// <summary>
/// Adds an artifact to the manifest.
/// </summary>
public void AddArtifact(string name, string mediaType, long sizeBytes, string contentHash)
{
Artifacts.Add(new EvidenceArtifact
{
Name = name,
MediaType = mediaType,
SizeBytes = sizeBytes,
ContentHash = contentHash,
});
}
}
/// <summary>
/// Reference to an external artifact used as input.
/// </summary>
public sealed class EvidenceArtifactRef
{
/// <summary>
/// URI or identifier for the artifact.
/// </summary>
public required string Uri { get; init; }
/// <summary>
/// Content hash (SHA-256) of the artifact.
/// </summary>
public required string ContentHash { get; init; }
/// <summary>
/// Media type of the artifact.
/// </summary>
public string? MediaType { get; init; }
/// <summary>
/// Timestamp when the artifact was fetched.
/// </summary>
public DateTimeOffset? FetchedAt { get; init; }
}
/// <summary>
/// An artifact included in the evidence bundle.
/// </summary>
public sealed class EvidenceArtifact
{
/// <summary>
/// Name/path of the artifact within the bundle.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Media type of the artifact.
/// </summary>
public required string MediaType { get; init; }
/// <summary>
/// Size in bytes.
/// </summary>
public long SizeBytes { get; init; }
/// <summary>
/// SHA-256 content hash.
/// </summary>
public required string ContentHash { get; init; }
}
/// <summary>
/// Service for creating and managing evaluation evidence bundles.
/// </summary>
public sealed class EvidenceBundleService
{
private readonly TimeProvider _timeProvider;
public EvidenceBundleService(TimeProvider timeProvider)
{
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
}
/// <summary>
/// Creates a new evidence bundle for a policy evaluation run.
/// </summary>
public EvidenceBundle CreateBundle(
string runId,
string tenant,
string policyId,
string policyVersion,
string mode,
string serviceVersion,
bool sealedMode = false,
string? traceId = null)
{
var bundleId = GenerateBundleId(runId);
return new EvidenceBundle
{
BundleId = bundleId,
RunId = runId,
Tenant = tenant,
PolicyId = policyId,
PolicyVersion = policyVersion,
CreatedAt = _timeProvider.GetUtcNow(),
Inputs = new EvidenceInputs(),
Outputs = new EvidenceOutputs { Outcome = "pending" },
Environment = new EvidenceEnvironment
{
ServiceVersion = serviceVersion,
Mode = mode,
SealedMode = sealedMode,
TraceId = traceId,
HostId = Environment.MachineName,
},
Manifest = new EvidenceManifest(),
};
}
/// <summary>
/// Finalizes the bundle by computing the content hash.
/// </summary>
public void FinalizeBundle(EvidenceBundle bundle)
{
ArgumentNullException.ThrowIfNull(bundle);
var json = JsonSerializer.Serialize(bundle, EvidenceBundleJsonContext.Default.EvidenceBundle);
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(json));
bundle.ContentHash = Convert.ToHexStringLower(hash);
}
/// <summary>
/// Serializes the bundle to JSON.
/// </summary>
public string SerializeBundle(EvidenceBundle bundle)
{
ArgumentNullException.ThrowIfNull(bundle);
return JsonSerializer.Serialize(bundle, EvidenceBundleJsonContext.Default.EvidenceBundle);
}
/// <summary>
/// Deserializes a bundle from JSON.
/// </summary>
public EvidenceBundle? DeserializeBundle(string json)
{
ArgumentException.ThrowIfNullOrWhiteSpace(json);
return JsonSerializer.Deserialize(json, EvidenceBundleJsonContext.Default.EvidenceBundle);
}
private static string GenerateBundleId(string runId)
{
var timestamp = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
return $"bundle-{runId}-{timestamp:x}";
}
}
[JsonSerializable(typeof(EvidenceBundle))]
[JsonSerializable(typeof(EvidenceInputs))]
[JsonSerializable(typeof(EvidenceOutputs))]
[JsonSerializable(typeof(EvidenceEnvironment))]
[JsonSerializable(typeof(EvidenceManifest))]
[JsonSerializable(typeof(EvidenceArtifact))]
[JsonSerializable(typeof(EvidenceArtifactRef))]
[JsonSourceGenerationOptions(
WriteIndented = true,
PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull)]
internal partial class EvidenceBundleJsonContext : JsonSerializerContext
{
}

View File

@@ -0,0 +1,214 @@
using System.Diagnostics;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using OpenTelemetry.Trace;
namespace StellaOps.Policy.Engine.Telemetry;
/// <summary>
/// Service for managing incident mode, which enables 100% trace sampling
/// and extended retention during critical periods.
/// </summary>
public sealed class IncidentModeService
{
private readonly ILogger<IncidentModeService> _logger;
private readonly TimeProvider _timeProvider;
private readonly IOptionsMonitor<PolicyEngineTelemetryOptions> _optionsMonitor;
private volatile IncidentModeState _state = new(false, null, null, null);
public IncidentModeService(
ILogger<IncidentModeService> logger,
TimeProvider timeProvider,
IOptionsMonitor<PolicyEngineTelemetryOptions> optionsMonitor)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_optionsMonitor = optionsMonitor ?? throw new ArgumentNullException(nameof(optionsMonitor));
// Initialize from configuration
if (_optionsMonitor.CurrentValue.IncidentMode)
{
_state = new IncidentModeState(
true,
_timeProvider.GetUtcNow(),
null,
"configuration");
}
}
/// <summary>
/// Gets the current incident mode state.
/// </summary>
public IncidentModeState State => _state;
/// <summary>
/// Gets whether incident mode is currently active.
/// </summary>
public bool IsActive => _state.IsActive;
/// <summary>
/// Enables incident mode.
/// </summary>
/// <param name="reason">Reason for enabling incident mode.</param>
/// <param name="duration">Optional duration after which incident mode auto-disables.</param>
public void Enable(string reason, TimeSpan? duration = null)
{
var now = _timeProvider.GetUtcNow();
var expiresAt = duration.HasValue ? now.Add(duration.Value) : (DateTimeOffset?)null;
_state = new IncidentModeState(true, now, expiresAt, reason);
_logger.LogWarning(
"Incident mode ENABLED. Reason: {Reason}, ExpiresAt: {ExpiresAt}",
reason,
expiresAt?.ToString("O") ?? "never");
PolicyEngineTelemetry.RecordError("incident_mode_enabled", null);
}
/// <summary>
/// Disables incident mode.
/// </summary>
/// <param name="reason">Reason for disabling incident mode.</param>
public void Disable(string reason)
{
var wasActive = _state.IsActive;
_state = new IncidentModeState(false, null, null, null);
if (wasActive)
{
_logger.LogInformation("Incident mode DISABLED. Reason: {Reason}", reason);
}
}
/// <summary>
/// Checks if incident mode should be auto-disabled due to expiration.
/// </summary>
public void CheckExpiration()
{
var state = _state;
if (state.IsActive && state.ExpiresAt.HasValue)
{
if (_timeProvider.GetUtcNow() >= state.ExpiresAt.Value)
{
Disable("auto-expired");
}
}
}
/// <summary>
/// Gets the effective sampling ratio, considering incident mode.
/// </summary>
public double GetEffectiveSamplingRatio()
{
if (_state.IsActive)
{
return 1.0; // 100% sampling during incident mode
}
return _optionsMonitor.CurrentValue.TraceSamplingRatio;
}
}
/// <summary>
/// Represents the current state of incident mode.
/// </summary>
public sealed record IncidentModeState(
bool IsActive,
DateTimeOffset? ActivatedAt,
DateTimeOffset? ExpiresAt,
string? Reason);
/// <summary>
/// A trace sampler that respects incident mode settings.
/// </summary>
public sealed class IncidentModeSampler : Sampler
{
private readonly IncidentModeService _incidentModeService;
private readonly Sampler _baseSampler;
public IncidentModeSampler(IncidentModeService incidentModeService, double baseSamplingRatio)
{
_incidentModeService = incidentModeService ?? throw new ArgumentNullException(nameof(incidentModeService));
_baseSampler = new TraceIdRatioBasedSampler(baseSamplingRatio);
}
public override SamplingResult ShouldSample(in SamplingParameters samplingParameters)
{
// During incident mode, always sample
if (_incidentModeService.IsActive)
{
return new SamplingResult(
SamplingDecision.RecordAndSample,
samplingParameters.Tags,
samplingParameters.Links);
}
// Otherwise, use the base sampler
return _baseSampler.ShouldSample(samplingParameters);
}
}
/// <summary>
/// Extension methods for configuring incident mode.
/// </summary>
public static class IncidentModeExtensions
{
/// <summary>
/// Adds the incident mode sampler to the tracer provider.
/// </summary>
public static TracerProviderBuilder SetIncidentModeSampler(
this TracerProviderBuilder builder,
IncidentModeService incidentModeService,
double baseSamplingRatio)
{
ArgumentNullException.ThrowIfNull(builder);
ArgumentNullException.ThrowIfNull(incidentModeService);
return builder.SetSampler(new IncidentModeSampler(incidentModeService, baseSamplingRatio));
}
}
/// <summary>
/// Background service that periodically checks incident mode expiration.
/// </summary>
public sealed class IncidentModeExpirationWorker : BackgroundService
{
private readonly IncidentModeService _incidentModeService;
private readonly ILogger<IncidentModeExpirationWorker> _logger;
private readonly TimeSpan _checkInterval = TimeSpan.FromMinutes(1);
public IncidentModeExpirationWorker(
IncidentModeService incidentModeService,
ILogger<IncidentModeExpirationWorker> logger)
{
_incidentModeService = incidentModeService ?? throw new ArgumentNullException(nameof(incidentModeService));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
_logger.LogDebug("Incident mode expiration worker started.");
while (!stoppingToken.IsCancellationRequested)
{
try
{
_incidentModeService.CheckExpiration();
await Task.Delay(_checkInterval, stoppingToken);
}
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
{
break;
}
catch (Exception ex)
{
_logger.LogError(ex, "Error checking incident mode expiration.");
await Task.Delay(TimeSpan.FromSeconds(30), stoppingToken);
}
}
_logger.LogDebug("Incident mode expiration worker stopped.");
}
}

View File

@@ -0,0 +1,646 @@
using System.Diagnostics;
using System.Diagnostics.Metrics;
namespace StellaOps.Policy.Engine.Telemetry;
/// <summary>
/// Telemetry instrumentation for the Policy Engine service.
/// Provides metrics, traces, and structured logging correlation.
/// </summary>
public static class PolicyEngineTelemetry
{
/// <summary>
/// The name of the meter used for Policy Engine metrics.
/// </summary>
public const string MeterName = "StellaOps.Policy.Engine";
/// <summary>
/// The name of the activity source used for Policy Engine traces.
/// </summary>
public const string ActivitySourceName = "StellaOps.Policy.Engine";
private static readonly Meter Meter = new(MeterName);
/// <summary>
/// The activity source used for Policy Engine traces.
/// </summary>
public static readonly ActivitySource ActivitySource = new(ActivitySourceName);
// Histogram: policy_run_seconds{mode,tenant,policy}
private static readonly Histogram<double> PolicyRunSecondsHistogram =
Meter.CreateHistogram<double>(
"policy_run_seconds",
unit: "s",
description: "Duration of policy evaluation runs.");
// Gauge: policy_run_queue_depth{tenant}
private static readonly ObservableGauge<int> PolicyRunQueueDepthGauge =
Meter.CreateObservableGauge(
"policy_run_queue_depth",
observeValue: () => QueueDepthObservations,
unit: "jobs",
description: "Current depth of pending policy run jobs per tenant.");
// Counter: policy_rules_fired_total{policy,rule}
private static readonly Counter<long> PolicyRulesFiredCounter =
Meter.CreateCounter<long>(
"policy_rules_fired_total",
unit: "rules",
description: "Total number of policy rules that fired during evaluation.");
// Counter: policy_vex_overrides_total{policy,vendor}
private static readonly Counter<long> PolicyVexOverridesCounter =
Meter.CreateCounter<long>(
"policy_vex_overrides_total",
unit: "overrides",
description: "Total number of VEX overrides applied during policy evaluation.");
// Counter: policy_compilation_total{outcome}
private static readonly Counter<long> PolicyCompilationCounter =
Meter.CreateCounter<long>(
"policy_compilation_total",
unit: "compilations",
description: "Total number of policy compilations attempted.");
// Histogram: policy_compilation_seconds
private static readonly Histogram<double> PolicyCompilationSecondsHistogram =
Meter.CreateHistogram<double>(
"policy_compilation_seconds",
unit: "s",
description: "Duration of policy compilation.");
// Counter: policy_simulation_total{tenant,outcome}
private static readonly Counter<long> PolicySimulationCounter =
Meter.CreateCounter<long>(
"policy_simulation_total",
unit: "simulations",
description: "Total number of policy simulations executed.");
#region Golden Signals - Latency
// Histogram: policy_api_latency_seconds{endpoint,method,status}
private static readonly Histogram<double> ApiLatencyHistogram =
Meter.CreateHistogram<double>(
"policy_api_latency_seconds",
unit: "s",
description: "API request latency by endpoint.");
// Histogram: policy_evaluation_latency_seconds{tenant,policy}
private static readonly Histogram<double> EvaluationLatencyHistogram =
Meter.CreateHistogram<double>(
"policy_evaluation_latency_seconds",
unit: "s",
description: "Policy evaluation latency per batch.");
#endregion
#region Golden Signals - Traffic
// Counter: policy_requests_total{endpoint,method}
private static readonly Counter<long> RequestsCounter =
Meter.CreateCounter<long>(
"policy_requests_total",
unit: "requests",
description: "Total API requests by endpoint and method.");
// Counter: policy_evaluations_total{tenant,policy,mode}
private static readonly Counter<long> EvaluationsCounter =
Meter.CreateCounter<long>(
"policy_evaluations_total",
unit: "evaluations",
description: "Total policy evaluations by tenant, policy, and mode.");
// Counter: policy_findings_materialized_total{tenant,policy}
private static readonly Counter<long> FindingsMaterializedCounter =
Meter.CreateCounter<long>(
"policy_findings_materialized_total",
unit: "findings",
description: "Total findings materialized during policy evaluation.");
#endregion
#region Golden Signals - Errors
// Counter: policy_errors_total{type,tenant}
private static readonly Counter<long> ErrorsCounter =
Meter.CreateCounter<long>(
"policy_errors_total",
unit: "errors",
description: "Total errors by type (compilation, evaluation, api, storage).");
// Counter: policy_api_errors_total{endpoint,status_code}
private static readonly Counter<long> ApiErrorsCounter =
Meter.CreateCounter<long>(
"policy_api_errors_total",
unit: "errors",
description: "Total API errors by endpoint and status code.");
// Counter: policy_evaluation_failures_total{tenant,policy,reason}
private static readonly Counter<long> EvaluationFailuresCounter =
Meter.CreateCounter<long>(
"policy_evaluation_failures_total",
unit: "failures",
description: "Total evaluation failures by reason (timeout, determinism, storage, canceled).");
#endregion
#region Golden Signals - Saturation
// Gauge: policy_concurrent_evaluations{tenant}
private static readonly ObservableGauge<int> ConcurrentEvaluationsGauge =
Meter.CreateObservableGauge(
"policy_concurrent_evaluations",
observeValue: () => ConcurrentEvaluationsObservations,
unit: "evaluations",
description: "Current number of concurrent policy evaluations.");
// Gauge: policy_worker_utilization
private static readonly ObservableGauge<double> WorkerUtilizationGauge =
Meter.CreateObservableGauge(
"policy_worker_utilization",
observeValue: () => WorkerUtilizationObservations,
unit: "ratio",
description: "Worker pool utilization ratio (0.0 to 1.0).");
#endregion
#region SLO Metrics
// Gauge: policy_slo_burn_rate{slo_name}
private static readonly ObservableGauge<double> SloBurnRateGauge =
Meter.CreateObservableGauge(
"policy_slo_burn_rate",
observeValue: () => SloBurnRateObservations,
unit: "ratio",
description: "SLO burn rate over configured window.");
// Gauge: policy_error_budget_remaining{slo_name}
private static readonly ObservableGauge<double> ErrorBudgetRemainingGauge =
Meter.CreateObservableGauge(
"policy_error_budget_remaining",
observeValue: () => ErrorBudgetObservations,
unit: "ratio",
description: "Remaining error budget as ratio (0.0 to 1.0).");
// Counter: policy_slo_violations_total{slo_name}
private static readonly Counter<long> SloViolationsCounter =
Meter.CreateCounter<long>(
"policy_slo_violations_total",
unit: "violations",
description: "Total SLO violations detected.");
#endregion
#region Risk Scoring Metrics
// Counter: policy_risk_scoring_jobs_created_total
private static readonly Counter<long> RiskScoringJobsCreatedCounter =
Meter.CreateCounter<long>(
"policy_risk_scoring_jobs_created_total",
unit: "jobs",
description: "Total risk scoring jobs created.");
// Counter: policy_risk_scoring_triggers_skipped_total
private static readonly Counter<long> RiskScoringTriggersSkippedCounter =
Meter.CreateCounter<long>(
"policy_risk_scoring_triggers_skipped_total",
unit: "triggers",
description: "Total risk scoring triggers skipped due to deduplication.");
// Histogram: policy_risk_scoring_duration_seconds
private static readonly Histogram<double> RiskScoringDurationHistogram =
Meter.CreateHistogram<double>(
"policy_risk_scoring_duration_seconds",
unit: "s",
description: "Duration of risk scoring job execution.");
// Counter: policy_risk_scoring_findings_scored_total
private static readonly Counter<long> RiskScoringFindingsScoredCounter =
Meter.CreateCounter<long>(
"policy_risk_scoring_findings_scored_total",
unit: "findings",
description: "Total findings scored by risk scoring jobs.");
/// <summary>
/// Counter for risk scoring jobs created.
/// </summary>
public static Counter<long> RiskScoringJobsCreated => RiskScoringJobsCreatedCounter;
/// <summary>
/// Counter for risk scoring triggers skipped.
/// </summary>
public static Counter<long> RiskScoringTriggersSkipped => RiskScoringTriggersSkippedCounter;
/// <summary>
/// Records risk scoring duration.
/// </summary>
/// <param name="seconds">Duration in seconds.</param>
/// <param name="profileId">Profile identifier.</param>
/// <param name="findingCount">Number of findings scored.</param>
public static void RecordRiskScoringDuration(double seconds, string profileId, int findingCount)
{
var tags = new TagList
{
{ "profile_id", NormalizeTag(profileId) },
{ "finding_count", findingCount.ToString() },
};
RiskScoringDurationHistogram.Record(seconds, tags);
}
/// <summary>
/// Records findings scored by risk scoring.
/// </summary>
/// <param name="profileId">Profile identifier.</param>
/// <param name="count">Number of findings scored.</param>
public static void RecordFindingsScored(string profileId, long count)
{
var tags = new TagList
{
{ "profile_id", NormalizeTag(profileId) },
};
RiskScoringFindingsScoredCounter.Add(count, tags);
}
#endregion
// Storage for observable gauge observations
private static IEnumerable<Measurement<int>> QueueDepthObservations = Enumerable.Empty<Measurement<int>>();
private static IEnumerable<Measurement<int>> ConcurrentEvaluationsObservations = Enumerable.Empty<Measurement<int>>();
private static IEnumerable<Measurement<double>> WorkerUtilizationObservations = Enumerable.Empty<Measurement<double>>();
private static IEnumerable<Measurement<double>> SloBurnRateObservations = Enumerable.Empty<Measurement<double>>();
private static IEnumerable<Measurement<double>> ErrorBudgetObservations = Enumerable.Empty<Measurement<double>>();
/// <summary>
/// Registers a callback to observe queue depth measurements.
/// </summary>
/// <param name="observeFunc">Function that returns current queue depth measurements.</param>
public static void RegisterQueueDepthObservation(Func<IEnumerable<Measurement<int>>> observeFunc)
{
ArgumentNullException.ThrowIfNull(observeFunc);
QueueDepthObservations = observeFunc();
}
/// <summary>
/// Records the duration of a policy run.
/// </summary>
/// <param name="seconds">Duration in seconds.</param>
/// <param name="mode">Run mode (full, incremental, simulate).</param>
/// <param name="tenant">Tenant identifier.</param>
/// <param name="policy">Policy identifier.</param>
/// <param name="outcome">Outcome of the run (success, failure, canceled).</param>
public static void RecordRunDuration(double seconds, string mode, string tenant, string policy, string outcome)
{
var tags = new TagList
{
{ "mode", NormalizeTag(mode) },
{ "tenant", NormalizeTenant(tenant) },
{ "policy", NormalizeTag(policy) },
{ "outcome", NormalizeTag(outcome) },
};
PolicyRunSecondsHistogram.Record(seconds, tags);
}
/// <summary>
/// Records that a policy rule fired during evaluation.
/// </summary>
/// <param name="policy">Policy identifier.</param>
/// <param name="rule">Rule identifier.</param>
/// <param name="count">Number of times the rule fired.</param>
public static void RecordRuleFired(string policy, string rule, long count = 1)
{
var tags = new TagList
{
{ "policy", NormalizeTag(policy) },
{ "rule", NormalizeTag(rule) },
};
PolicyRulesFiredCounter.Add(count, tags);
}
/// <summary>
/// Records a VEX override applied during policy evaluation.
/// </summary>
/// <param name="policy">Policy identifier.</param>
/// <param name="vendor">VEX vendor identifier.</param>
/// <param name="count">Number of overrides.</param>
public static void RecordVexOverride(string policy, string vendor, long count = 1)
{
var tags = new TagList
{
{ "policy", NormalizeTag(policy) },
{ "vendor", NormalizeTag(vendor) },
};
PolicyVexOverridesCounter.Add(count, tags);
}
/// <summary>
/// Records a policy compilation attempt.
/// </summary>
/// <param name="outcome">Outcome (success, failure).</param>
/// <param name="seconds">Duration in seconds.</param>
public static void RecordCompilation(string outcome, double seconds)
{
var tags = new TagList
{
{ "outcome", NormalizeTag(outcome) },
};
PolicyCompilationCounter.Add(1, tags);
PolicyCompilationSecondsHistogram.Record(seconds, tags);
}
/// <summary>
/// Records a policy simulation execution.
/// </summary>
/// <param name="tenant">Tenant identifier.</param>
/// <param name="outcome">Outcome (success, failure).</param>
public static void RecordSimulation(string tenant, string outcome)
{
var tags = new TagList
{
{ "tenant", NormalizeTenant(tenant) },
{ "outcome", NormalizeTag(outcome) },
};
PolicySimulationCounter.Add(1, tags);
}
#region Golden Signals - Recording Methods
/// <summary>
/// Records API request latency.
/// </summary>
/// <param name="seconds">Latency in seconds.</param>
/// <param name="endpoint">API endpoint name.</param>
/// <param name="method">HTTP method.</param>
/// <param name="statusCode">HTTP status code.</param>
public static void RecordApiLatency(double seconds, string endpoint, string method, int statusCode)
{
var tags = new TagList
{
{ "endpoint", NormalizeTag(endpoint) },
{ "method", NormalizeTag(method) },
{ "status", statusCode.ToString() },
};
ApiLatencyHistogram.Record(seconds, tags);
}
/// <summary>
/// Records policy evaluation latency for a batch.
/// </summary>
/// <param name="seconds">Latency in seconds.</param>
/// <param name="tenant">Tenant identifier.</param>
/// <param name="policy">Policy identifier.</param>
public static void RecordEvaluationLatency(double seconds, string tenant, string policy)
{
var tags = new TagList
{
{ "tenant", NormalizeTenant(tenant) },
{ "policy", NormalizeTag(policy) },
};
EvaluationLatencyHistogram.Record(seconds, tags);
}
/// <summary>
/// Records an API request.
/// </summary>
/// <param name="endpoint">API endpoint name.</param>
/// <param name="method">HTTP method.</param>
public static void RecordRequest(string endpoint, string method)
{
var tags = new TagList
{
{ "endpoint", NormalizeTag(endpoint) },
{ "method", NormalizeTag(method) },
};
RequestsCounter.Add(1, tags);
}
/// <summary>
/// Records a policy evaluation execution.
/// </summary>
/// <param name="tenant">Tenant identifier.</param>
/// <param name="policy">Policy identifier.</param>
/// <param name="mode">Evaluation mode (full, incremental, simulate).</param>
public static void RecordEvaluation(string tenant, string policy, string mode)
{
var tags = new TagList
{
{ "tenant", NormalizeTenant(tenant) },
{ "policy", NormalizeTag(policy) },
{ "mode", NormalizeTag(mode) },
};
EvaluationsCounter.Add(1, tags);
}
/// <summary>
/// Records findings materialized during policy evaluation.
/// </summary>
/// <param name="tenant">Tenant identifier.</param>
/// <param name="policy">Policy identifier.</param>
/// <param name="count">Number of findings materialized.</param>
public static void RecordFindingsMaterialized(string tenant, string policy, long count)
{
var tags = new TagList
{
{ "tenant", NormalizeTenant(tenant) },
{ "policy", NormalizeTag(policy) },
};
FindingsMaterializedCounter.Add(count, tags);
}
/// <summary>
/// Records an error.
/// </summary>
/// <param name="errorType">Error type (compilation, evaluation, api, storage).</param>
/// <param name="tenant">Tenant identifier.</param>
public static void RecordError(string errorType, string? tenant = null)
{
var tags = new TagList
{
{ "type", NormalizeTag(errorType) },
{ "tenant", NormalizeTenant(tenant) },
};
ErrorsCounter.Add(1, tags);
}
/// <summary>
/// Records an API error.
/// </summary>
/// <param name="endpoint">API endpoint name.</param>
/// <param name="statusCode">HTTP status code.</param>
public static void RecordApiError(string endpoint, int statusCode)
{
var tags = new TagList
{
{ "endpoint", NormalizeTag(endpoint) },
{ "status_code", statusCode.ToString() },
};
ApiErrorsCounter.Add(1, tags);
}
/// <summary>
/// Records an evaluation failure.
/// </summary>
/// <param name="tenant">Tenant identifier.</param>
/// <param name="policy">Policy identifier.</param>
/// <param name="reason">Failure reason (timeout, determinism, storage, canceled).</param>
public static void RecordEvaluationFailure(string tenant, string policy, string reason)
{
var tags = new TagList
{
{ "tenant", NormalizeTenant(tenant) },
{ "policy", NormalizeTag(policy) },
{ "reason", NormalizeTag(reason) },
};
EvaluationFailuresCounter.Add(1, tags);
}
/// <summary>
/// Records an SLO violation.
/// </summary>
/// <param name="sloName">Name of the SLO that was violated.</param>
public static void RecordSloViolation(string sloName)
{
var tags = new TagList
{
{ "slo_name", NormalizeTag(sloName) },
};
SloViolationsCounter.Add(1, tags);
}
/// <summary>
/// Registers a callback to observe concurrent evaluations measurements.
/// </summary>
/// <param name="observeFunc">Function that returns current concurrent evaluations measurements.</param>
public static void RegisterConcurrentEvaluationsObservation(Func<IEnumerable<Measurement<int>>> observeFunc)
{
ArgumentNullException.ThrowIfNull(observeFunc);
ConcurrentEvaluationsObservations = observeFunc();
}
/// <summary>
/// Registers a callback to observe worker utilization measurements.
/// </summary>
/// <param name="observeFunc">Function that returns current worker utilization measurements.</param>
public static void RegisterWorkerUtilizationObservation(Func<IEnumerable<Measurement<double>>> observeFunc)
{
ArgumentNullException.ThrowIfNull(observeFunc);
WorkerUtilizationObservations = observeFunc();
}
/// <summary>
/// Registers a callback to observe SLO burn rate measurements.
/// </summary>
/// <param name="observeFunc">Function that returns current SLO burn rate measurements.</param>
public static void RegisterSloBurnRateObservation(Func<IEnumerable<Measurement<double>>> observeFunc)
{
ArgumentNullException.ThrowIfNull(observeFunc);
SloBurnRateObservations = observeFunc();
}
/// <summary>
/// Registers a callback to observe error budget measurements.
/// </summary>
/// <param name="observeFunc">Function that returns current error budget measurements.</param>
public static void RegisterErrorBudgetObservation(Func<IEnumerable<Measurement<double>>> observeFunc)
{
ArgumentNullException.ThrowIfNull(observeFunc);
ErrorBudgetObservations = observeFunc();
}
#endregion
/// <summary>
/// Starts an activity for selection layer operations.
/// </summary>
/// <param name="tenant">Tenant identifier.</param>
/// <param name="policyId">Policy identifier.</param>
/// <returns>The started activity, or null if not sampled.</returns>
public static Activity? StartSelectActivity(string? tenant, string? policyId)
{
var activity = ActivitySource.StartActivity("policy.select", ActivityKind.Internal);
activity?.SetTag("tenant", NormalizeTenant(tenant));
activity?.SetTag("policy.id", policyId ?? "unknown");
return activity;
}
/// <summary>
/// Starts an activity for policy evaluation.
/// </summary>
/// <param name="tenant">Tenant identifier.</param>
/// <param name="policyId">Policy identifier.</param>
/// <param name="runId">Run identifier.</param>
/// <returns>The started activity, or null if not sampled.</returns>
public static Activity? StartEvaluateActivity(string? tenant, string? policyId, string? runId)
{
var activity = ActivitySource.StartActivity("policy.evaluate", ActivityKind.Internal);
activity?.SetTag("tenant", NormalizeTenant(tenant));
activity?.SetTag("policy.id", policyId ?? "unknown");
activity?.SetTag("run.id", runId ?? "unknown");
return activity;
}
/// <summary>
/// Starts an activity for materialization operations.
/// </summary>
/// <param name="tenant">Tenant identifier.</param>
/// <param name="policyId">Policy identifier.</param>
/// <param name="batchSize">Number of items in the batch.</param>
/// <returns>The started activity, or null if not sampled.</returns>
public static Activity? StartMaterializeActivity(string? tenant, string? policyId, int batchSize)
{
var activity = ActivitySource.StartActivity("policy.materialize", ActivityKind.Internal);
activity?.SetTag("tenant", NormalizeTenant(tenant));
activity?.SetTag("policy.id", policyId ?? "unknown");
activity?.SetTag("batch.size", batchSize);
return activity;
}
/// <summary>
/// Starts an activity for simulation operations.
/// </summary>
/// <param name="tenant">Tenant identifier.</param>
/// <param name="policyId">Policy identifier.</param>
/// <returns>The started activity, or null if not sampled.</returns>
public static Activity? StartSimulateActivity(string? tenant, string? policyId)
{
var activity = ActivitySource.StartActivity("policy.simulate", ActivityKind.Internal);
activity?.SetTag("tenant", NormalizeTenant(tenant));
activity?.SetTag("policy.id", policyId ?? "unknown");
return activity;
}
/// <summary>
/// Starts an activity for compilation operations.
/// </summary>
/// <param name="policyId">Policy identifier.</param>
/// <param name="version">Policy version.</param>
/// <returns>The started activity, or null if not sampled.</returns>
public static Activity? StartCompileActivity(string? policyId, string? version)
{
var activity = ActivitySource.StartActivity("policy.compile", ActivityKind.Internal);
activity?.SetTag("policy.id", policyId ?? "unknown");
activity?.SetTag("policy.version", version ?? "unknown");
return activity;
}
private static string NormalizeTenant(string? tenant)
=> string.IsNullOrWhiteSpace(tenant) ? "default" : tenant;
private static string NormalizeTag(string? value)
=> string.IsNullOrWhiteSpace(value) ? "unknown" : value;
}

View File

@@ -0,0 +1,85 @@
namespace StellaOps.Policy.Engine.Telemetry;
/// <summary>
/// Configuration options for Policy Engine telemetry.
/// </summary>
public sealed class PolicyEngineTelemetryOptions
{
/// <summary>
/// Gets or sets a value indicating whether telemetry is enabled.
/// </summary>
public bool Enabled { get; set; } = true;
/// <summary>
/// Gets or sets a value indicating whether tracing is enabled.
/// </summary>
public bool EnableTracing { get; set; } = true;
/// <summary>
/// Gets or sets a value indicating whether metrics collection is enabled.
/// </summary>
public bool EnableMetrics { get; set; } = true;
/// <summary>
/// Gets or sets a value indicating whether structured logging is enabled.
/// </summary>
public bool EnableLogging { get; set; } = true;
/// <summary>
/// Gets or sets the service name used in telemetry data.
/// </summary>
public string? ServiceName { get; set; }
/// <summary>
/// Gets or sets the OTLP exporter endpoint.
/// </summary>
public string? OtlpEndpoint { get; set; }
/// <summary>
/// Gets or sets the OTLP exporter headers.
/// </summary>
public Dictionary<string, string> OtlpHeaders { get; set; } = new();
/// <summary>
/// Gets or sets additional resource attributes for OpenTelemetry.
/// </summary>
public Dictionary<string, string> ResourceAttributes { get; set; } = new();
/// <summary>
/// Gets or sets a value indicating whether to export telemetry to console.
/// </summary>
public bool ExportConsole { get; set; } = false;
/// <summary>
/// Gets or sets the minimum log level for structured logging.
/// </summary>
public string MinimumLogLevel { get; set; } = "Information";
/// <summary>
/// Gets or sets a value indicating whether incident mode is enabled.
/// When enabled, 100% sampling is applied and extended retention windows are used.
/// </summary>
public bool IncidentMode { get; set; } = false;
/// <summary>
/// Gets or sets the sampling ratio for traces (0.0 to 1.0).
/// Ignored when <see cref="IncidentMode"/> is enabled.
/// </summary>
public double TraceSamplingRatio { get; set; } = 0.1;
/// <summary>
/// Validates the telemetry options.
/// </summary>
public void Validate()
{
if (!string.IsNullOrWhiteSpace(OtlpEndpoint) && !Uri.TryCreate(OtlpEndpoint, UriKind.Absolute, out _))
{
throw new InvalidOperationException("Telemetry OTLP endpoint must be a valid absolute URI.");
}
if (TraceSamplingRatio is < 0 or > 1)
{
throw new InvalidOperationException("Telemetry trace sampling ratio must be between 0.0 and 1.0.");
}
}
}

View File

@@ -0,0 +1,347 @@
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using StellaOps.Attestor.Envelope;
namespace StellaOps.Policy.Engine.Telemetry;
/// <summary>
/// in-toto statement types for policy evaluation attestations.
/// </summary>
public static class PolicyAttestationTypes
{
/// <summary>
/// Attestation type for policy evaluation results.
/// </summary>
public const string PolicyEvaluationV1 = "https://stella-ops.org/attestation/policy-evaluation/v1";
/// <summary>
/// DSSE payload type for in-toto statements.
/// </summary>
public const string InTotoPayloadType = "application/vnd.in-toto+json";
}
/// <summary>
/// in-toto Statement structure for policy evaluation attestations.
/// </summary>
public sealed class PolicyEvaluationStatement
{
[JsonPropertyName("_type")]
public string Type { get; init; } = "https://in-toto.io/Statement/v1";
[JsonPropertyName("subject")]
public List<InTotoSubject> Subject { get; init; } = new();
[JsonPropertyName("predicateType")]
public string PredicateType { get; init; } = PolicyAttestationTypes.PolicyEvaluationV1;
[JsonPropertyName("predicate")]
public required PolicyEvaluationPredicate Predicate { get; init; }
}
/// <summary>
/// Subject reference in an in-toto statement.
/// </summary>
public sealed class InTotoSubject
{
[JsonPropertyName("name")]
public required string Name { get; init; }
[JsonPropertyName("digest")]
public required Dictionary<string, string> Digest { get; init; }
}
/// <summary>
/// Predicate containing policy evaluation details.
/// </summary>
public sealed class PolicyEvaluationPredicate
{
/// <summary>
/// Run identifier.
/// </summary>
[JsonPropertyName("runId")]
public required string RunId { get; init; }
/// <summary>
/// Tenant identifier.
/// </summary>
[JsonPropertyName("tenant")]
public required string Tenant { get; init; }
/// <summary>
/// Policy identifier.
/// </summary>
[JsonPropertyName("policyId")]
public required string PolicyId { get; init; }
/// <summary>
/// Policy version.
/// </summary>
[JsonPropertyName("policyVersion")]
public required string PolicyVersion { get; init; }
/// <summary>
/// Evaluation mode (full, incremental, simulate).
/// </summary>
[JsonPropertyName("mode")]
public required string Mode { get; init; }
/// <summary>
/// Timestamp when evaluation started.
/// </summary>
[JsonPropertyName("startedAt")]
public required DateTimeOffset StartedAt { get; init; }
/// <summary>
/// Timestamp when evaluation completed.
/// </summary>
[JsonPropertyName("completedAt")]
public required DateTimeOffset CompletedAt { get; init; }
/// <summary>
/// Outcome of the evaluation.
/// </summary>
[JsonPropertyName("outcome")]
public required string Outcome { get; init; }
/// <summary>
/// Determinism hash for reproducibility verification.
/// </summary>
[JsonPropertyName("determinismHash")]
public string? DeterminismHash { get; init; }
/// <summary>
/// Reference to the evidence bundle.
/// </summary>
[JsonPropertyName("evidenceBundle")]
public EvidenceBundleRef? EvidenceBundle { get; init; }
/// <summary>
/// Summary metrics from the evaluation.
/// </summary>
[JsonPropertyName("metrics")]
public required PolicyEvaluationMetrics Metrics { get; init; }
/// <summary>
/// Environment information.
/// </summary>
[JsonPropertyName("environment")]
public required PolicyEvaluationEnvironment Environment { get; init; }
}
/// <summary>
/// Reference to an evidence bundle.
/// </summary>
public sealed class EvidenceBundleRef
{
[JsonPropertyName("bundleId")]
public required string BundleId { get; init; }
[JsonPropertyName("contentHash")]
public required string ContentHash { get; init; }
[JsonPropertyName("uri")]
public string? Uri { get; init; }
}
/// <summary>
/// Metrics from the policy evaluation.
/// </summary>
public sealed class PolicyEvaluationMetrics
{
[JsonPropertyName("totalFindings")]
public int TotalFindings { get; init; }
[JsonPropertyName("rulesEvaluated")]
public int RulesEvaluated { get; init; }
[JsonPropertyName("rulesFired")]
public int RulesFired { get; init; }
[JsonPropertyName("vexOverridesApplied")]
public int VexOverridesApplied { get; init; }
[JsonPropertyName("durationSeconds")]
public double DurationSeconds { get; init; }
}
/// <summary>
/// Environment information for the evaluation.
/// </summary>
public sealed class PolicyEvaluationEnvironment
{
[JsonPropertyName("serviceVersion")]
public required string ServiceVersion { get; init; }
[JsonPropertyName("hostId")]
public string? HostId { get; init; }
[JsonPropertyName("sealedMode")]
public bool SealedMode { get; init; }
}
/// <summary>
/// Service for creating DSSE attestations for policy evaluations.
/// </summary>
public sealed class PolicyEvaluationAttestationService
{
private readonly TimeProvider _timeProvider;
public PolicyEvaluationAttestationService(TimeProvider timeProvider)
{
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
}
/// <summary>
/// Creates an in-toto statement for a policy evaluation.
/// </summary>
public PolicyEvaluationStatement CreateStatement(
string runId,
string tenant,
string policyId,
string policyVersion,
string mode,
DateTimeOffset startedAt,
string outcome,
string serviceVersion,
int totalFindings,
int rulesEvaluated,
int rulesFired,
int vexOverridesApplied,
double durationSeconds,
string? determinismHash = null,
EvidenceBundle? evidenceBundle = null,
bool sealedMode = false,
IEnumerable<(string name, string digestAlgorithm, string digestValue)>? subjects = null)
{
var statement = new PolicyEvaluationStatement
{
Predicate = new PolicyEvaluationPredicate
{
RunId = runId,
Tenant = tenant,
PolicyId = policyId,
PolicyVersion = policyVersion,
Mode = mode,
StartedAt = startedAt,
CompletedAt = _timeProvider.GetUtcNow(),
Outcome = outcome,
DeterminismHash = determinismHash,
EvidenceBundle = evidenceBundle != null
? new EvidenceBundleRef
{
BundleId = evidenceBundle.BundleId,
ContentHash = evidenceBundle.ContentHash ?? "unknown",
}
: null,
Metrics = new PolicyEvaluationMetrics
{
TotalFindings = totalFindings,
RulesEvaluated = rulesEvaluated,
RulesFired = rulesFired,
VexOverridesApplied = vexOverridesApplied,
DurationSeconds = durationSeconds,
},
Environment = new PolicyEvaluationEnvironment
{
ServiceVersion = serviceVersion,
HostId = Environment.MachineName,
SealedMode = sealedMode,
},
},
};
// Add subjects if provided
if (subjects != null)
{
foreach (var (name, algorithm, value) in subjects)
{
statement.Subject.Add(new InTotoSubject
{
Name = name,
Digest = new Dictionary<string, string> { [algorithm] = value },
});
}
}
// Add the policy as a subject
statement.Subject.Add(new InTotoSubject
{
Name = $"policy://{tenant}/{policyId}@{policyVersion}",
Digest = new Dictionary<string, string>
{
["sha256"] = ComputePolicyDigest(policyId, policyVersion),
},
});
return statement;
}
/// <summary>
/// Serializes an in-toto statement to JSON bytes for signing.
/// </summary>
public byte[] SerializeStatement(PolicyEvaluationStatement statement)
{
ArgumentNullException.ThrowIfNull(statement);
var json = JsonSerializer.Serialize(statement, PolicyAttestationJsonContext.Default.PolicyEvaluationStatement);
return Encoding.UTF8.GetBytes(json);
}
/// <summary>
/// Creates an unsigned DSSE envelope for the statement.
/// This envelope can be sent to the Attestor service for signing.
/// </summary>
public DsseEnvelopeRequest CreateEnvelopeRequest(PolicyEvaluationStatement statement)
{
var payload = SerializeStatement(statement);
return new DsseEnvelopeRequest
{
PayloadType = PolicyAttestationTypes.InTotoPayloadType,
Payload = payload,
PayloadBase64 = Convert.ToBase64String(payload),
};
}
private static string ComputePolicyDigest(string policyId, string policyVersion)
{
var input = $"{policyId}@{policyVersion}";
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(input));
return Convert.ToHexStringLower(hash);
}
}
/// <summary>
/// Request to create a DSSE envelope (to be sent to Attestor service).
/// </summary>
public sealed class DsseEnvelopeRequest
{
/// <summary>
/// DSSE payload type.
/// </summary>
public required string PayloadType { get; init; }
/// <summary>
/// Raw payload bytes.
/// </summary>
public required byte[] Payload { get; init; }
/// <summary>
/// Base64-encoded payload for transmission.
/// </summary>
public required string PayloadBase64 { get; init; }
}
[JsonSerializable(typeof(PolicyEvaluationStatement))]
[JsonSerializable(typeof(PolicyEvaluationPredicate))]
[JsonSerializable(typeof(InTotoSubject))]
[JsonSerializable(typeof(EvidenceBundleRef))]
[JsonSerializable(typeof(PolicyEvaluationMetrics))]
[JsonSerializable(typeof(PolicyEvaluationEnvironment))]
[JsonSourceGenerationOptions(
WriteIndented = false,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull)]
internal partial class PolicyAttestationJsonContext : JsonSerializerContext
{
}

View File

@@ -0,0 +1,471 @@
using System.Diagnostics;
using System.Text.Json;
using System.Text.Json.Serialization;
using Microsoft.Extensions.Logging;
namespace StellaOps.Policy.Engine.Telemetry;
/// <summary>
/// Provides structured timeline events for policy evaluation and decision flows.
/// Events are emitted as structured logs with correlation to traces.
/// </summary>
public sealed class PolicyTimelineEvents
{
private readonly ILogger<PolicyTimelineEvents> _logger;
private readonly TimeProvider _timeProvider;
public PolicyTimelineEvents(ILogger<PolicyTimelineEvents> logger, TimeProvider timeProvider)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
}
#region Evaluation Flow Events
/// <summary>
/// Emits an event when a policy evaluation run starts.
/// </summary>
public void EmitRunStarted(string runId, string tenant, string policyId, string policyVersion, string mode)
{
var evt = new TimelineEvent
{
EventType = TimelineEventType.RunStarted,
Timestamp = _timeProvider.GetUtcNow(),
RunId = runId,
Tenant = tenant,
PolicyId = policyId,
PolicyVersion = policyVersion,
TraceId = Activity.Current?.TraceId.ToString(),
SpanId = Activity.Current?.SpanId.ToString(),
Data = new Dictionary<string, object?>
{
["mode"] = mode,
},
};
LogTimelineEvent(evt);
}
/// <summary>
/// Emits an event when a policy evaluation run completes.
/// </summary>
public void EmitRunCompleted(
string runId,
string tenant,
string policyId,
string outcome,
double durationSeconds,
int findingsCount,
string? determinismHash = null)
{
var evt = new TimelineEvent
{
EventType = TimelineEventType.RunCompleted,
Timestamp = _timeProvider.GetUtcNow(),
RunId = runId,
Tenant = tenant,
PolicyId = policyId,
TraceId = Activity.Current?.TraceId.ToString(),
SpanId = Activity.Current?.SpanId.ToString(),
Data = new Dictionary<string, object?>
{
["outcome"] = outcome,
["duration_seconds"] = durationSeconds,
["findings_count"] = findingsCount,
["determinism_hash"] = determinismHash,
},
};
LogTimelineEvent(evt);
}
/// <summary>
/// Emits an event when a batch selection phase starts.
/// </summary>
public void EmitSelectionStarted(string runId, string tenant, string policyId, int batchNumber)
{
var evt = new TimelineEvent
{
EventType = TimelineEventType.SelectionStarted,
Timestamp = _timeProvider.GetUtcNow(),
RunId = runId,
Tenant = tenant,
PolicyId = policyId,
TraceId = Activity.Current?.TraceId.ToString(),
SpanId = Activity.Current?.SpanId.ToString(),
Data = new Dictionary<string, object?>
{
["batch_number"] = batchNumber,
},
};
LogTimelineEvent(evt);
}
/// <summary>
/// Emits an event when a batch selection phase completes.
/// </summary>
public void EmitSelectionCompleted(
string runId,
string tenant,
string policyId,
int batchNumber,
int tupleCount,
double durationSeconds)
{
var evt = new TimelineEvent
{
EventType = TimelineEventType.SelectionCompleted,
Timestamp = _timeProvider.GetUtcNow(),
RunId = runId,
Tenant = tenant,
PolicyId = policyId,
TraceId = Activity.Current?.TraceId.ToString(),
SpanId = Activity.Current?.SpanId.ToString(),
Data = new Dictionary<string, object?>
{
["batch_number"] = batchNumber,
["tuple_count"] = tupleCount,
["duration_seconds"] = durationSeconds,
},
};
LogTimelineEvent(evt);
}
/// <summary>
/// Emits an event when batch evaluation starts.
/// </summary>
public void EmitEvaluationStarted(string runId, string tenant, string policyId, int batchNumber, int tupleCount)
{
var evt = new TimelineEvent
{
EventType = TimelineEventType.EvaluationStarted,
Timestamp = _timeProvider.GetUtcNow(),
RunId = runId,
Tenant = tenant,
PolicyId = policyId,
TraceId = Activity.Current?.TraceId.ToString(),
SpanId = Activity.Current?.SpanId.ToString(),
Data = new Dictionary<string, object?>
{
["batch_number"] = batchNumber,
["tuple_count"] = tupleCount,
},
};
LogTimelineEvent(evt);
}
/// <summary>
/// Emits an event when batch evaluation completes.
/// </summary>
public void EmitEvaluationCompleted(
string runId,
string tenant,
string policyId,
int batchNumber,
int rulesEvaluated,
int rulesFired,
double durationSeconds)
{
var evt = new TimelineEvent
{
EventType = TimelineEventType.EvaluationCompleted,
Timestamp = _timeProvider.GetUtcNow(),
RunId = runId,
Tenant = tenant,
PolicyId = policyId,
TraceId = Activity.Current?.TraceId.ToString(),
SpanId = Activity.Current?.SpanId.ToString(),
Data = new Dictionary<string, object?>
{
["batch_number"] = batchNumber,
["rules_evaluated"] = rulesEvaluated,
["rules_fired"] = rulesFired,
["duration_seconds"] = durationSeconds,
},
};
LogTimelineEvent(evt);
}
#endregion
#region Decision Flow Events
/// <summary>
/// Emits an event when a rule matches during evaluation.
/// </summary>
public void EmitRuleMatched(
string runId,
string tenant,
string policyId,
string ruleId,
string findingKey,
string? severity = null)
{
var evt = new TimelineEvent
{
EventType = TimelineEventType.RuleMatched,
Timestamp = _timeProvider.GetUtcNow(),
RunId = runId,
Tenant = tenant,
PolicyId = policyId,
TraceId = Activity.Current?.TraceId.ToString(),
SpanId = Activity.Current?.SpanId.ToString(),
Data = new Dictionary<string, object?>
{
["rule_id"] = ruleId,
["finding_key"] = findingKey,
["severity"] = severity,
},
};
LogTimelineEvent(evt);
}
/// <summary>
/// Emits an event when a VEX override is applied.
/// </summary>
public void EmitVexOverrideApplied(
string runId,
string tenant,
string policyId,
string findingKey,
string vendor,
string status,
string? justification = null)
{
var evt = new TimelineEvent
{
EventType = TimelineEventType.VexOverrideApplied,
Timestamp = _timeProvider.GetUtcNow(),
RunId = runId,
Tenant = tenant,
PolicyId = policyId,
TraceId = Activity.Current?.TraceId.ToString(),
SpanId = Activity.Current?.SpanId.ToString(),
Data = new Dictionary<string, object?>
{
["finding_key"] = findingKey,
["vendor"] = vendor,
["status"] = status,
["justification"] = justification,
},
};
LogTimelineEvent(evt);
}
/// <summary>
/// Emits an event when a final verdict is determined for a finding.
/// </summary>
public void EmitVerdictDetermined(
string runId,
string tenant,
string policyId,
string findingKey,
string verdict,
string severity,
string? reachabilityState = null,
IReadOnlyList<string>? contributingRules = null)
{
var evt = new TimelineEvent
{
EventType = TimelineEventType.VerdictDetermined,
Timestamp = _timeProvider.GetUtcNow(),
RunId = runId,
Tenant = tenant,
PolicyId = policyId,
TraceId = Activity.Current?.TraceId.ToString(),
SpanId = Activity.Current?.SpanId.ToString(),
Data = new Dictionary<string, object?>
{
["finding_key"] = findingKey,
["verdict"] = verdict,
["severity"] = severity,
["reachability_state"] = reachabilityState,
["contributing_rules"] = contributingRules,
},
};
LogTimelineEvent(evt);
}
/// <summary>
/// Emits an event when materialization of findings starts.
/// </summary>
public void EmitMaterializationStarted(string runId, string tenant, string policyId, int findingsCount)
{
var evt = new TimelineEvent
{
EventType = TimelineEventType.MaterializationStarted,
Timestamp = _timeProvider.GetUtcNow(),
RunId = runId,
Tenant = tenant,
PolicyId = policyId,
TraceId = Activity.Current?.TraceId.ToString(),
SpanId = Activity.Current?.SpanId.ToString(),
Data = new Dictionary<string, object?>
{
["findings_count"] = findingsCount,
},
};
LogTimelineEvent(evt);
}
/// <summary>
/// Emits an event when materialization of findings completes.
/// </summary>
public void EmitMaterializationCompleted(
string runId,
string tenant,
string policyId,
int findingsWritten,
int findingsUpdated,
double durationSeconds)
{
var evt = new TimelineEvent
{
EventType = TimelineEventType.MaterializationCompleted,
Timestamp = _timeProvider.GetUtcNow(),
RunId = runId,
Tenant = tenant,
PolicyId = policyId,
TraceId = Activity.Current?.TraceId.ToString(),
SpanId = Activity.Current?.SpanId.ToString(),
Data = new Dictionary<string, object?>
{
["findings_written"] = findingsWritten,
["findings_updated"] = findingsUpdated,
["duration_seconds"] = durationSeconds,
},
};
LogTimelineEvent(evt);
}
#endregion
#region Error Events
/// <summary>
/// Emits an event when an error occurs during evaluation.
/// </summary>
public void EmitError(
string runId,
string tenant,
string policyId,
string errorCode,
string errorMessage,
string? phase = null)
{
var evt = new TimelineEvent
{
EventType = TimelineEventType.Error,
Timestamp = _timeProvider.GetUtcNow(),
RunId = runId,
Tenant = tenant,
PolicyId = policyId,
TraceId = Activity.Current?.TraceId.ToString(),
SpanId = Activity.Current?.SpanId.ToString(),
Data = new Dictionary<string, object?>
{
["error_code"] = errorCode,
["error_message"] = errorMessage,
["phase"] = phase,
},
};
LogTimelineEvent(evt, LogLevel.Error);
}
/// <summary>
/// Emits an event when a determinism violation is detected.
/// </summary>
public void EmitDeterminismViolation(
string runId,
string tenant,
string policyId,
string violationType,
string details)
{
var evt = new TimelineEvent
{
EventType = TimelineEventType.DeterminismViolation,
Timestamp = _timeProvider.GetUtcNow(),
RunId = runId,
Tenant = tenant,
PolicyId = policyId,
TraceId = Activity.Current?.TraceId.ToString(),
SpanId = Activity.Current?.SpanId.ToString(),
Data = new Dictionary<string, object?>
{
["violation_type"] = violationType,
["details"] = details,
},
};
LogTimelineEvent(evt, LogLevel.Warning);
}
#endregion
private void LogTimelineEvent(TimelineEvent evt, LogLevel level = LogLevel.Information)
{
_logger.Log(
level,
"PolicyTimeline: {EventType} | run={RunId} tenant={Tenant} policy={PolicyId} trace={TraceId} span={SpanId} data={Data}",
evt.EventType,
evt.RunId,
evt.Tenant,
evt.PolicyId,
evt.TraceId,
evt.SpanId,
JsonSerializer.Serialize(evt.Data, TimelineEventJsonContext.Default.DictionaryStringObject));
}
}
/// <summary>
/// Types of timeline events emitted during policy evaluation.
/// </summary>
public enum TimelineEventType
{
RunStarted,
RunCompleted,
SelectionStarted,
SelectionCompleted,
EvaluationStarted,
EvaluationCompleted,
RuleMatched,
VexOverrideApplied,
VerdictDetermined,
MaterializationStarted,
MaterializationCompleted,
Error,
DeterminismViolation,
}
/// <summary>
/// Represents a timeline event for policy evaluation flows.
/// </summary>
public sealed record TimelineEvent
{
public required TimelineEventType EventType { get; init; }
public required DateTimeOffset Timestamp { get; init; }
public required string RunId { get; init; }
public required string Tenant { get; init; }
public required string PolicyId { get; init; }
public string? PolicyVersion { get; init; }
public string? TraceId { get; init; }
public string? SpanId { get; init; }
public Dictionary<string, object?>? Data { get; init; }
}
[JsonSerializable(typeof(Dictionary<string, object?>))]
[JsonSourceGenerationOptions(WriteIndented = false)]
internal partial class TimelineEventJsonContext : JsonSerializerContext
{
}

View File

@@ -0,0 +1,239 @@
using System.Diagnostics;
using System.Reflection;
using Microsoft.Extensions.DependencyInjection;
using OpenTelemetry.Metrics;
using OpenTelemetry.Resources;
using OpenTelemetry.Trace;
using Serilog;
using Serilog.Core;
using Serilog.Events;
using StellaOps.Policy.Engine.Options;
namespace StellaOps.Policy.Engine.Telemetry;
/// <summary>
/// Extension methods for configuring Policy Engine telemetry.
/// </summary>
public static class TelemetryExtensions
{
/// <summary>
/// Configures Policy Engine telemetry including metrics, traces, and structured logging.
/// </summary>
/// <param name="builder">The web application builder.</param>
/// <param name="options">Policy engine options containing telemetry configuration.</param>
public static void ConfigurePolicyEngineTelemetry(this WebApplicationBuilder builder, PolicyEngineOptions options)
{
ArgumentNullException.ThrowIfNull(builder);
ArgumentNullException.ThrowIfNull(options);
var telemetry = options.Telemetry ?? new PolicyEngineTelemetryOptions();
if (telemetry.EnableLogging)
{
builder.Host.UseSerilog((context, services, configuration) =>
{
ConfigureSerilog(configuration, telemetry, builder.Environment.EnvironmentName, builder.Environment.ApplicationName);
});
}
if (!telemetry.Enabled || (!telemetry.EnableTracing && !telemetry.EnableMetrics))
{
return;
}
var openTelemetry = builder.Services.AddOpenTelemetry();
openTelemetry.ConfigureResource(resource =>
{
var serviceName = telemetry.ServiceName ?? builder.Environment.ApplicationName;
var version = Assembly.GetExecutingAssembly().GetName().Version?.ToString() ?? "unknown";
resource.AddService(serviceName, serviceVersion: version, serviceInstanceId: Environment.MachineName);
resource.AddAttributes(new[]
{
new KeyValuePair<string, object>("deployment.environment", builder.Environment.EnvironmentName),
});
foreach (var attribute in telemetry.ResourceAttributes)
{
if (string.IsNullOrWhiteSpace(attribute.Key) || attribute.Value is null)
{
continue;
}
resource.AddAttributes(new[] { new KeyValuePair<string, object>(attribute.Key, attribute.Value) });
}
});
if (telemetry.EnableTracing)
{
openTelemetry.WithTracing(tracing =>
{
tracing
.AddSource(PolicyEngineTelemetry.ActivitySourceName)
.AddAspNetCoreInstrumentation()
.AddHttpClientInstrumentation();
ConfigureTracingExporter(telemetry, tracing);
});
}
if (telemetry.EnableMetrics)
{
openTelemetry.WithMetrics(metrics =>
{
metrics
.AddMeter(PolicyEngineTelemetry.MeterName)
.AddAspNetCoreInstrumentation()
.AddHttpClientInstrumentation()
.AddRuntimeInstrumentation();
ConfigureMetricsExporter(telemetry, metrics);
});
}
}
private static void ConfigureSerilog(
LoggerConfiguration configuration,
PolicyEngineTelemetryOptions telemetry,
string environmentName,
string applicationName)
{
if (!Enum.TryParse(telemetry.MinimumLogLevel, ignoreCase: true, out LogEventLevel level))
{
level = LogEventLevel.Information;
}
configuration
.MinimumLevel.Is(level)
.MinimumLevel.Override("Microsoft", LogEventLevel.Warning)
.MinimumLevel.Override("Microsoft.Hosting.Lifetime", LogEventLevel.Information)
.Enrich.FromLogContext()
.Enrich.With<PolicyEngineActivityEnricher>()
.Enrich.WithProperty("service.name", telemetry.ServiceName ?? applicationName)
.Enrich.WithProperty("deployment.environment", environmentName)
.WriteTo.Console(outputTemplate: "[{Timestamp:O}] [{Level:u3}] {Message:lj} {Properties}{NewLine}{Exception}");
}
private static void ConfigureTracingExporter(PolicyEngineTelemetryOptions telemetry, TracerProviderBuilder tracing)
{
if (string.IsNullOrWhiteSpace(telemetry.OtlpEndpoint))
{
if (telemetry.ExportConsole)
{
tracing.AddConsoleExporter();
}
return;
}
tracing.AddOtlpExporter(options =>
{
options.Endpoint = new Uri(telemetry.OtlpEndpoint);
var headers = BuildHeaders(telemetry);
if (!string.IsNullOrEmpty(headers))
{
options.Headers = headers;
}
});
if (telemetry.ExportConsole)
{
tracing.AddConsoleExporter();
}
}
private static void ConfigureMetricsExporter(PolicyEngineTelemetryOptions telemetry, MeterProviderBuilder metrics)
{
if (string.IsNullOrWhiteSpace(telemetry.OtlpEndpoint))
{
if (telemetry.ExportConsole)
{
metrics.AddConsoleExporter();
}
return;
}
metrics.AddOtlpExporter(options =>
{
options.Endpoint = new Uri(telemetry.OtlpEndpoint);
var headers = BuildHeaders(telemetry);
if (!string.IsNullOrEmpty(headers))
{
options.Headers = headers;
}
});
if (telemetry.ExportConsole)
{
metrics.AddConsoleExporter();
}
}
private static string? BuildHeaders(PolicyEngineTelemetryOptions telemetry)
{
if (telemetry.OtlpHeaders.Count == 0)
{
return null;
}
return string.Join(",", telemetry.OtlpHeaders
.Where(static kvp => !string.IsNullOrWhiteSpace(kvp.Key) && !string.IsNullOrWhiteSpace(kvp.Value))
.Select(static kvp => $"{kvp.Key}={kvp.Value}"));
}
}
/// <summary>
/// Serilog enricher that adds activity context (trace_id, span_id) to log events.
/// </summary>
internal sealed class PolicyEngineActivityEnricher : ILogEventEnricher
{
public void Enrich(LogEvent logEvent, ILogEventPropertyFactory propertyFactory)
{
var activity = Activity.Current;
if (activity is null)
{
return;
}
if (activity.TraceId != default)
{
logEvent.AddPropertyIfAbsent(propertyFactory.CreateProperty("trace_id", activity.TraceId.ToString()));
}
if (activity.SpanId != default)
{
logEvent.AddPropertyIfAbsent(propertyFactory.CreateProperty("span_id", activity.SpanId.ToString()));
}
if (activity.ParentSpanId != default)
{
logEvent.AddPropertyIfAbsent(propertyFactory.CreateProperty("parent_span_id", activity.ParentSpanId.ToString()));
}
if (!string.IsNullOrEmpty(activity.TraceStateString))
{
logEvent.AddPropertyIfAbsent(propertyFactory.CreateProperty("trace_state", activity.TraceStateString));
}
// Add Policy Engine specific context if available
var policyId = activity.GetTagItem("policy.id")?.ToString();
if (!string.IsNullOrEmpty(policyId))
{
logEvent.AddPropertyIfAbsent(propertyFactory.CreateProperty("policy_id", policyId));
}
var runId = activity.GetTagItem("run.id")?.ToString();
if (!string.IsNullOrEmpty(runId))
{
logEvent.AddPropertyIfAbsent(propertyFactory.CreateProperty("run_id", runId));
}
var tenant = activity.GetTagItem("tenant")?.ToString();
if (!string.IsNullOrEmpty(tenant))
{
logEvent.AddPropertyIfAbsent(propertyFactory.CreateProperty("tenant", tenant));
}
}
}