using System.Diagnostics; using System.Diagnostics.Metrics; namespace StellaOps.Policy.Engine.Telemetry; /// /// Telemetry instrumentation for the Policy Engine service. /// Provides metrics, traces, and structured logging correlation. /// public static class PolicyEngineTelemetry { /// /// The name of the meter used for Policy Engine metrics. /// public const string MeterName = "StellaOps.Policy.Engine"; /// /// The name of the activity source used for Policy Engine traces. /// public const string ActivitySourceName = "StellaOps.Policy.Engine"; private static readonly Meter Meter = new(MeterName); /// /// The activity source used for Policy Engine traces. /// public static readonly ActivitySource ActivitySource = new(ActivitySourceName); // Histogram: policy_run_seconds{mode,tenant,policy} private static readonly Histogram PolicyRunSecondsHistogram = Meter.CreateHistogram( "policy_run_seconds", unit: "s", description: "Duration of policy evaluation runs."); // Gauge: policy_run_queue_depth{tenant} private static readonly ObservableGauge PolicyRunQueueDepthGauge = Meter.CreateObservableGauge( "policy_run_queue_depth", observeValues: () => QueueDepthObservations ?? Enumerable.Empty>(), unit: "jobs", description: "Current depth of pending policy run jobs per tenant."); // Counter: policy_rules_fired_total{policy,rule} private static readonly Counter PolicyRulesFiredCounter = Meter.CreateCounter( "policy_rules_fired_total", unit: "rules", description: "Total number of policy rules that fired during evaluation."); // Counter: policy_vex_overrides_total{policy,vendor} private static readonly Counter PolicyVexOverridesCounter = Meter.CreateCounter( "policy_vex_overrides_total", unit: "overrides", description: "Total number of VEX overrides applied during policy evaluation."); // Counter: policy_compilation_total{outcome} private static readonly Counter PolicyCompilationCounter = Meter.CreateCounter( "policy_compilation_total", unit: "compilations", description: "Total number of policy compilations attempted."); // Histogram: policy_compilation_seconds private static readonly Histogram PolicyCompilationSecondsHistogram = Meter.CreateHistogram( "policy_compilation_seconds", unit: "s", description: "Duration of policy compilation."); // Counter: policy_simulation_total{tenant,outcome} private static readonly Counter PolicySimulationCounter = Meter.CreateCounter( "policy_simulation_total", unit: "simulations", description: "Total number of policy simulations executed."); // Counter: policy_rate_limit_exceeded_total{tenant,endpoint} private static readonly Counter RateLimitExceededCounter = Meter.CreateCounter( "policy_rate_limit_exceeded_total", unit: "requests", description: "Total requests rejected due to rate limiting."); /// /// Records a rate limit exceeded event. /// /// The tenant ID (or "anonymous" if not available). /// The endpoint that was rate limited. public static void RecordRateLimitExceeded(string? tenant = null, string? endpoint = null) { var tags = new TagList { { "tenant", NormalizeTag(tenant ?? "anonymous") }, { "endpoint", NormalizeTag(endpoint ?? "simulation") }, }; RateLimitExceededCounter.Add(1, tags); } #region Entropy Metrics // Counter: policy_entropy_penalty_total{outcome} private static readonly Counter EntropyPenaltyCounter = Meter.CreateCounter( "policy_entropy_penalty_total", unit: "penalties", description: "Total entropy penalties computed from scanner evidence."); // Histogram: policy_entropy_penalty_value{outcome} private static readonly Histogram EntropyPenaltyHistogram = Meter.CreateHistogram( "policy_entropy_penalty_value", unit: "ratio", description: "Entropy penalty values (after cap)."); // Histogram: policy_entropy_image_opaque_ratio{outcome} private static readonly Histogram EntropyImageOpaqueRatioHistogram = Meter.CreateHistogram( "policy_entropy_image_opaque_ratio", unit: "ratio", description: "Image opaque ratios observed in layer summaries."); // Histogram: policy_entropy_top_file_ratio{outcome} private static readonly Histogram EntropyTopFileRatioHistogram = Meter.CreateHistogram( "policy_entropy_top_file_ratio", unit: "ratio", description: "Opaque ratio of the top offending file when present."); /// /// Records an entropy penalty computation. /// public static void RecordEntropyPenalty( double penalty, string outcome, double imageOpaqueRatio, double? topFileOpaqueRatio = null) { var tags = new TagList { { "outcome", NormalizeTag(outcome) }, }; EntropyPenaltyCounter.Add(1, tags); EntropyPenaltyHistogram.Record(penalty, tags); EntropyImageOpaqueRatioHistogram.Record(imageOpaqueRatio, tags); if (topFileOpaqueRatio.HasValue) { EntropyTopFileRatioHistogram.Record(topFileOpaqueRatio.Value, tags); } } #endregion #region Golden Signals - Latency // Histogram: policy_api_latency_seconds{endpoint,method,status} private static readonly Histogram ApiLatencyHistogram = Meter.CreateHistogram( "policy_api_latency_seconds", unit: "s", description: "API request latency by endpoint."); // Histogram: policy_evaluation_latency_seconds{tenant,policy} private static readonly Histogram EvaluationLatencyHistogram = Meter.CreateHistogram( "policy_evaluation_latency_seconds", unit: "s", description: "Policy evaluation latency per batch."); #endregion #region Golden Signals - Traffic // Counter: policy_requests_total{endpoint,method} private static readonly Counter RequestsCounter = Meter.CreateCounter( "policy_requests_total", unit: "requests", description: "Total API requests by endpoint and method."); // Counter: policy_evaluations_total{tenant,policy,mode} private static readonly Counter EvaluationsCounter = Meter.CreateCounter( "policy_evaluations_total", unit: "evaluations", description: "Total policy evaluations by tenant, policy, and mode."); // Counter: policy_findings_materialized_total{tenant,policy} private static readonly Counter FindingsMaterializedCounter = Meter.CreateCounter( "policy_findings_materialized_total", unit: "findings", description: "Total findings materialized during policy evaluation."); #endregion #region Golden Signals - Errors // Counter: policy_errors_total{type,tenant} private static readonly Counter ErrorsCounter = Meter.CreateCounter( "policy_errors_total", unit: "errors", description: "Total errors by type (compilation, evaluation, api, storage)."); // Counter: policy_api_errors_total{endpoint,status_code} private static readonly Counter ApiErrorsCounter = Meter.CreateCounter( "policy_api_errors_total", unit: "errors", description: "Total API errors by endpoint and status code."); // Counter: policy_evaluation_failures_total{tenant,policy,reason} private static readonly Counter EvaluationFailuresCounter = Meter.CreateCounter( "policy_evaluation_failures_total", unit: "failures", description: "Total evaluation failures by reason (timeout, determinism, storage, canceled)."); #endregion #region Golden Signals - Saturation // Gauge: policy_concurrent_evaluations{tenant} private static readonly ObservableGauge ConcurrentEvaluationsGauge = Meter.CreateObservableGauge( "policy_concurrent_evaluations", observeValues: () => ConcurrentEvaluationsObservations ?? Enumerable.Empty>(), unit: "evaluations", description: "Current number of concurrent policy evaluations."); // Gauge: policy_worker_utilization private static readonly ObservableGauge WorkerUtilizationGauge = Meter.CreateObservableGauge( "policy_worker_utilization", observeValues: () => WorkerUtilizationObservations ?? Enumerable.Empty>(), unit: "ratio", description: "Worker pool utilization ratio (0.0 to 1.0)."); #endregion #region SLO Metrics // Gauge: policy_slo_burn_rate{slo_name} private static readonly ObservableGauge SloBurnRateGauge = Meter.CreateObservableGauge( "policy_slo_burn_rate", observeValues: () => SloBurnRateObservations ?? Enumerable.Empty>(), unit: "ratio", description: "SLO burn rate over configured window."); // Gauge: policy_error_budget_remaining{slo_name} private static readonly ObservableGauge ErrorBudgetRemainingGauge = Meter.CreateObservableGauge( "policy_error_budget_remaining", observeValues: () => ErrorBudgetObservations ?? Enumerable.Empty>(), unit: "ratio", description: "Remaining error budget as ratio (0.0 to 1.0)."); // Counter: policy_slo_violations_total{slo_name} private static readonly Counter SloViolationsCounter = Meter.CreateCounter( "policy_slo_violations_total", unit: "violations", description: "Total SLO violations detected."); #endregion #region Risk Scoring Metrics // Counter: policy_risk_scoring_jobs_created_total private static readonly Counter RiskScoringJobsCreatedCounter = Meter.CreateCounter( "policy_risk_scoring_jobs_created_total", unit: "jobs", description: "Total risk scoring jobs created."); // Counter: policy_risk_scoring_triggers_skipped_total private static readonly Counter RiskScoringTriggersSkippedCounter = Meter.CreateCounter( "policy_risk_scoring_triggers_skipped_total", unit: "triggers", description: "Total risk scoring triggers skipped due to deduplication."); // Histogram: policy_risk_scoring_duration_seconds private static readonly Histogram RiskScoringDurationHistogram = Meter.CreateHistogram( "policy_risk_scoring_duration_seconds", unit: "s", description: "Duration of risk scoring job execution."); // Counter: policy_risk_scoring_findings_scored_total private static readonly Counter RiskScoringFindingsScoredCounter = Meter.CreateCounter( "policy_risk_scoring_findings_scored_total", unit: "findings", description: "Total findings scored by risk scoring jobs."); /// /// Counter for risk scoring jobs created. /// public static Counter RiskScoringJobsCreated => RiskScoringJobsCreatedCounter; /// /// Counter for risk scoring triggers skipped. /// public static Counter RiskScoringTriggersSkipped => RiskScoringTriggersSkippedCounter; /// /// Records risk scoring duration. /// /// Duration in seconds. /// Profile identifier. /// Number of findings scored. public static void RecordRiskScoringDuration(double seconds, string profileId, int findingCount) { var tags = new TagList { { "profile_id", NormalizeTag(profileId) }, { "finding_count", findingCount.ToString() }, }; RiskScoringDurationHistogram.Record(seconds, tags); } /// /// Records findings scored by risk scoring. /// /// Profile identifier. /// Number of findings scored. public static void RecordFindingsScored(string profileId, long count) { var tags = new TagList { { "profile_id", NormalizeTag(profileId) }, }; RiskScoringFindingsScoredCounter.Add(count, tags); } #endregion #region Risk Simulation and Events Metrics // Counter: policy_risk_simulations_run_total private static readonly Counter RiskSimulationsRunCounter = Meter.CreateCounter( "policy_risk_simulations_run_total", unit: "simulations", description: "Total risk simulations executed."); // Counter: policy_profile_events_published_total private static readonly Counter ProfileEventsPublishedCounter = Meter.CreateCounter( "policy_profile_events_published_total", unit: "events", description: "Total profile lifecycle events published."); /// /// Counter for risk simulations run. /// public static Counter RiskSimulationsRun => RiskSimulationsRunCounter; /// /// Counter for profile events published. /// public static Counter ProfileEventsPublished => ProfileEventsPublishedCounter; // Counter: policy_events_processed_total private static readonly Counter PolicyEventsProcessedCounter = Meter.CreateCounter( "policy_events_processed_total", unit: "events", description: "Total policy change events processed."); /// /// Counter for policy change events processed. /// public static Counter PolicyEventsProcessed => PolicyEventsProcessedCounter; // Counter: policy_effective_events_published_total private static readonly Counter PolicyEffectiveEventsPublishedCounter = Meter.CreateCounter( "policy_effective_events_published_total", unit: "events", description: "Total policy.effective.* events published."); /// /// Counter for policy effective events published. /// public static Counter PolicyEffectiveEventsPublished => PolicyEffectiveEventsPublishedCounter; // Counter: policy_reevaluation_jobs_scheduled_total private static readonly Counter ReEvaluationJobsScheduledCounter = Meter.CreateCounter( "policy_reevaluation_jobs_scheduled_total", unit: "jobs", description: "Total re-evaluation jobs scheduled."); /// /// Counter for re-evaluation jobs scheduled. /// public static Counter ReEvaluationJobsScheduled => ReEvaluationJobsScheduledCounter; // Counter: policy_explain_traces_stored_total private static readonly Counter ExplainTracesStoredCounter = Meter.CreateCounter( "policy_explain_traces_stored_total", unit: "traces", description: "Total explain traces stored for decision audit."); /// /// Counter for explain traces stored. /// public static Counter ExplainTracesStored => ExplainTracesStoredCounter; // Counter: policy_effective_decision_map_operations_total private static readonly Counter EffectiveDecisionMapOperationsCounter = Meter.CreateCounter( "policy_effective_decision_map_operations_total", unit: "operations", description: "Total effective decision map operations (set, get, invalidate)."); /// /// Counter for effective decision map operations. /// public static Counter EffectiveDecisionMapOperations => EffectiveDecisionMapOperationsCounter; // Counter: policy_exception_operations_total{tenant,operation} private static readonly Counter ExceptionOperationsCounter = Meter.CreateCounter( "policy_exception_operations_total", unit: "operations", description: "Total policy exception operations (create, update, revoke, review_*)."); /// /// Counter for policy exception operations. /// public static Counter ExceptionOperations => ExceptionOperationsCounter; // Counter: policy_exception_cache_operations_total{tenant,operation} private static readonly Counter ExceptionCacheOperationsCounter = Meter.CreateCounter( "policy_exception_cache_operations_total", unit: "operations", description: "Total exception cache operations (hit, miss, set, warm, invalidate)."); // Counter: policy_exception_applications_total{tenant,effect} private static readonly Counter ExceptionApplicationsCounter = Meter.CreateCounter( "policy_exception_applications_total", unit: "applications", description: "Total applied exceptions during evaluation by effect type."); // Histogram: policy_exception_application_latency_seconds{tenant,effect} private static readonly Histogram ExceptionApplicationLatencyHistogram = Meter.CreateHistogram( "policy_exception_application_latency_seconds", unit: "s", description: "Latency impact of exception application during evaluation."); // Counter: policy_exception_lifecycle_total{tenant,event} private static readonly Counter ExceptionLifecycleCounter = Meter.CreateCounter( "policy_exception_lifecycle_total", unit: "events", description: "Lifecycle events for exceptions (activated, expired, revoked)."); /// /// Counter for exception cache operations. /// public static Counter ExceptionCacheOperations => ExceptionCacheOperationsCounter; #endregion #region Reachability Metrics // Counter: policy_reachability_applied_total{state} private static readonly Counter ReachabilityAppliedCounter = Meter.CreateCounter( "policy_reachability_applied_total", unit: "facts", description: "Total reachability facts applied during policy evaluation."); // Counter: policy_reachability_cache_hits_total private static readonly Counter ReachabilityCacheHitsCounter = Meter.CreateCounter( "policy_reachability_cache_hits_total", unit: "hits", description: "Total reachability facts cache hits."); // Counter: policy_reachability_cache_misses_total private static readonly Counter ReachabilityCacheMissesCounter = Meter.CreateCounter( "policy_reachability_cache_misses_total", unit: "misses", description: "Total reachability facts cache misses."); // Gauge: policy_reachability_cache_hit_ratio private static readonly ObservableGauge ReachabilityCacheHitRatioGauge = Meter.CreateObservableGauge( "policy_reachability_cache_hit_ratio", observeValues: () => ReachabilityCacheHitRatioObservations ?? Enumerable.Empty>(), unit: "ratio", description: "Reachability facts cache hit ratio (0.0 to 1.0)."); // Counter: policy_reachability_lookups_total{outcome} private static readonly Counter ReachabilityLookupsCounter = Meter.CreateCounter( "policy_reachability_lookups_total", unit: "lookups", description: "Total reachability facts lookup operations."); // Histogram: policy_reachability_lookup_seconds private static readonly Histogram ReachabilityLookupSecondsHistogram = Meter.CreateHistogram( "policy_reachability_lookup_seconds", unit: "s", description: "Duration of reachability facts lookup operations."); private static IEnumerable> ReachabilityCacheHitRatioObservations = Enumerable.Empty>(); /// /// Records reachability fact applied during evaluation. /// /// Reachability state (reachable, unreachable, unknown, under_investigation). /// Number of facts. public static void RecordReachabilityApplied(string state, long count = 1) { var tags = new TagList { { "state", NormalizeTag(state) }, }; ReachabilityAppliedCounter.Add(count, tags); } /// /// Records reachability cache hits. /// /// Number of hits. public static void RecordReachabilityCacheHits(long count) { ReachabilityCacheHitsCounter.Add(count); } /// /// Records reachability cache misses. /// /// Number of misses. public static void RecordReachabilityCacheMisses(long count) { ReachabilityCacheMissesCounter.Add(count); } /// /// Records a reachability lookup operation. /// /// Outcome (found, not_found, error). /// Duration in seconds. /// Number of items looked up. public static void RecordReachabilityLookup(string outcome, double seconds, int batchSize) { var tags = new TagList { { "outcome", NormalizeTag(outcome) }, }; ReachabilityLookupsCounter.Add(batchSize, tags); ReachabilityLookupSecondsHistogram.Record(seconds, tags); } /// /// Registers a callback to observe reachability cache hit ratio. /// /// Function that returns current cache hit ratio measurements. public static void RegisterReachabilityCacheHitRatioObservation(Func>> observeFunc) { ArgumentNullException.ThrowIfNull(observeFunc); ReachabilityCacheHitRatioObservations = observeFunc(); } #endregion #region AirGap/Staleness Metrics // Counter: policy_airgap_staleness_events_total{tenant,event_type} private static readonly Counter StalenessEventsCounter = Meter.CreateCounter( "policy_airgap_staleness_events_total", unit: "events", description: "Total staleness events by type (warning, breach, recovered, anchor_missing)."); // Gauge: policy_airgap_sealed private static readonly ObservableGauge AirGapSealedGauge = Meter.CreateObservableGauge( "policy_airgap_sealed", observeValues: () => AirGapSealedObservations ?? Enumerable.Empty>(), unit: "boolean", description: "1 if sealed, 0 if unsealed."); // Gauge: policy_airgap_anchor_age_seconds private static readonly ObservableGauge AnchorAgeGauge = Meter.CreateObservableGauge( "policy_airgap_anchor_age_seconds", observeValues: () => AnchorAgeObservations ?? Enumerable.Empty>(), unit: "s", description: "Current age of the time anchor in seconds."); private static IEnumerable> AirGapSealedObservations = Enumerable.Empty>(); private static IEnumerable> AnchorAgeObservations = Enumerable.Empty>(); /// /// Records a staleness event. /// /// Tenant identifier. /// Event type (warning, breach, recovered, anchor_missing). public static void RecordStalenessEvent(string tenant, string eventType) { var tags = new TagList { { "tenant", NormalizeTenant(tenant) }, { "event_type", NormalizeTag(eventType) }, }; StalenessEventsCounter.Add(1, tags); } /// /// Registers a callback to observe air-gap sealed state. /// /// Function that returns current sealed state measurements. public static void RegisterAirGapSealedObservation(Func>> observeFunc) { ArgumentNullException.ThrowIfNull(observeFunc); AirGapSealedObservations = observeFunc(); } /// /// Registers a callback to observe time anchor age. /// /// Function that returns current anchor age measurements. public static void RegisterAnchorAgeObservation(Func>> observeFunc) { ArgumentNullException.ThrowIfNull(observeFunc); AnchorAgeObservations = observeFunc(); } #endregion // Storage for observable gauge observations private static IEnumerable> QueueDepthObservations = Enumerable.Empty>(); private static IEnumerable> ConcurrentEvaluationsObservations = Enumerable.Empty>(); private static IEnumerable> WorkerUtilizationObservations = Enumerable.Empty>(); private static IEnumerable> SloBurnRateObservations = Enumerable.Empty>(); private static IEnumerable> ErrorBudgetObservations = Enumerable.Empty>(); /// /// Registers a callback to observe queue depth measurements. /// /// Function that returns current queue depth measurements. public static void RegisterQueueDepthObservation(Func>> observeFunc) { ArgumentNullException.ThrowIfNull(observeFunc); QueueDepthObservations = observeFunc(); } /// /// Records the duration of a policy run. /// /// Duration in seconds. /// Run mode (full, incremental, simulate). /// Tenant identifier. /// Policy identifier. /// Outcome of the run (success, failure, canceled). public static void RecordRunDuration(double seconds, string mode, string tenant, string policy, string outcome) { var tags = new TagList { { "mode", NormalizeTag(mode) }, { "tenant", NormalizeTenant(tenant) }, { "policy", NormalizeTag(policy) }, { "outcome", NormalizeTag(outcome) }, }; PolicyRunSecondsHistogram.Record(seconds, tags); } /// /// Records that a policy rule fired during evaluation. /// /// Policy identifier. /// Rule identifier. /// Number of times the rule fired. public static void RecordRuleFired(string policy, string rule, long count = 1) { var tags = new TagList { { "policy", NormalizeTag(policy) }, { "rule", NormalizeTag(rule) }, }; PolicyRulesFiredCounter.Add(count, tags); } /// /// Records a VEX override applied during policy evaluation. /// /// Policy identifier. /// VEX vendor identifier. /// Number of overrides. public static void RecordVexOverride(string policy, string vendor, long count = 1) { var tags = new TagList { { "policy", NormalizeTag(policy) }, { "vendor", NormalizeTag(vendor) }, }; PolicyVexOverridesCounter.Add(count, tags); } /// /// Records a policy compilation attempt. /// /// Outcome (success, failure). /// Duration in seconds. public static void RecordCompilation(string outcome, double seconds) { var tags = new TagList { { "outcome", NormalizeTag(outcome) }, }; PolicyCompilationCounter.Add(1, tags); PolicyCompilationSecondsHistogram.Record(seconds, tags); } /// /// Records a policy simulation execution. /// /// Tenant identifier. /// Outcome (success, failure). public static void RecordSimulation(string tenant, string outcome) { var tags = new TagList { { "tenant", NormalizeTenant(tenant) }, { "outcome", NormalizeTag(outcome) }, }; PolicySimulationCounter.Add(1, tags); } /// /// Records a policy exception operation. /// /// Tenant identifier. /// Operation type (create, update, revoke, review_create, review_decision_*, etc.). public static void RecordExceptionOperation(string tenant, string operation) { var tags = new TagList { { "tenant", NormalizeTenant(tenant) }, { "operation", NormalizeTag(operation) }, }; ExceptionOperationsCounter.Add(1, tags); } /// /// Records an exception cache operation. /// /// Tenant identifier. /// Operation type (hit, miss, set, warm, invalidate_*, event_*). public static void RecordExceptionCacheOperation(string tenant, string operation) { var tags = new TagList { { "tenant", NormalizeTenant(tenant) }, { "operation", NormalizeTag(operation) }, }; ExceptionCacheOperationsCounter.Add(1, tags); } /// /// Records that an exception was applied during evaluation. /// public static void RecordExceptionApplication(string tenant, string effectType) { var tags = new TagList { { "tenant", NormalizeTenant(tenant) }, { "effect", NormalizeTag(effectType) }, }; ExceptionApplicationsCounter.Add(1, tags); } /// /// Records latency attributed to exception application during evaluation. /// public static void RecordExceptionApplicationLatency(double seconds, string tenant, string effectType) { var tags = new TagList { { "tenant", NormalizeTenant(tenant) }, { "effect", NormalizeTag(effectType) }, }; ExceptionApplicationLatencyHistogram.Record(seconds, tags); } /// /// Records an exception lifecycle event (activated, expired, revoked). /// public static void RecordExceptionLifecycle(string tenant, string eventType) { var tags = new TagList { { "tenant", NormalizeTenant(tenant) }, { "event", NormalizeTag(eventType) }, }; ExceptionLifecycleCounter.Add(1, tags); } #region Golden Signals - Recording Methods /// /// Records API request latency. /// /// Latency in seconds. /// API endpoint name. /// HTTP method. /// HTTP status code. public static void RecordApiLatency(double seconds, string endpoint, string method, int statusCode) { var tags = new TagList { { "endpoint", NormalizeTag(endpoint) }, { "method", NormalizeTag(method) }, { "status", statusCode.ToString() }, }; ApiLatencyHistogram.Record(seconds, tags); } /// /// Records policy evaluation latency for a batch. /// /// Latency in seconds. /// Tenant identifier. /// Policy identifier. public static void RecordEvaluationLatency(double seconds, string tenant, string policy) { var tags = new TagList { { "tenant", NormalizeTenant(tenant) }, { "policy", NormalizeTag(policy) }, }; EvaluationLatencyHistogram.Record(seconds, tags); } /// /// Records an API request. /// /// API endpoint name. /// HTTP method. public static void RecordRequest(string endpoint, string method) { var tags = new TagList { { "endpoint", NormalizeTag(endpoint) }, { "method", NormalizeTag(method) }, }; RequestsCounter.Add(1, tags); } /// /// Records a policy evaluation execution. /// /// Tenant identifier. /// Policy identifier. /// Evaluation mode (full, incremental, simulate). public static void RecordEvaluation(string tenant, string policy, string mode) { var tags = new TagList { { "tenant", NormalizeTenant(tenant) }, { "policy", NormalizeTag(policy) }, { "mode", NormalizeTag(mode) }, }; EvaluationsCounter.Add(1, tags); } /// /// Records findings materialized during policy evaluation. /// /// Tenant identifier. /// Policy identifier. /// Number of findings materialized. public static void RecordFindingsMaterialized(string tenant, string policy, long count) { var tags = new TagList { { "tenant", NormalizeTenant(tenant) }, { "policy", NormalizeTag(policy) }, }; FindingsMaterializedCounter.Add(count, tags); } /// /// Records an error. /// /// Error type (compilation, evaluation, api, storage). /// Tenant identifier. public static void RecordError(string errorType, string? tenant = null) { var tags = new TagList { { "type", NormalizeTag(errorType) }, { "tenant", NormalizeTenant(tenant) }, }; ErrorsCounter.Add(1, tags); } /// /// Records an API error. /// /// API endpoint name. /// HTTP status code. public static void RecordApiError(string endpoint, int statusCode) { var tags = new TagList { { "endpoint", NormalizeTag(endpoint) }, { "status_code", statusCode.ToString() }, }; ApiErrorsCounter.Add(1, tags); } /// /// Records an evaluation failure. /// /// Tenant identifier. /// Policy identifier. /// Failure reason (timeout, determinism, storage, canceled). public static void RecordEvaluationFailure(string tenant, string policy, string reason) { var tags = new TagList { { "tenant", NormalizeTenant(tenant) }, { "policy", NormalizeTag(policy) }, { "reason", NormalizeTag(reason) }, }; EvaluationFailuresCounter.Add(1, tags); } /// /// Records an SLO violation. /// /// Name of the SLO that was violated. public static void RecordSloViolation(string sloName) { var tags = new TagList { { "slo_name", NormalizeTag(sloName) }, }; SloViolationsCounter.Add(1, tags); } /// /// Registers a callback to observe concurrent evaluations measurements. /// /// Function that returns current concurrent evaluations measurements. public static void RegisterConcurrentEvaluationsObservation(Func>> observeFunc) { ArgumentNullException.ThrowIfNull(observeFunc); ConcurrentEvaluationsObservations = observeFunc(); } /// /// Registers a callback to observe worker utilization measurements. /// /// Function that returns current worker utilization measurements. public static void RegisterWorkerUtilizationObservation(Func>> observeFunc) { ArgumentNullException.ThrowIfNull(observeFunc); WorkerUtilizationObservations = observeFunc(); } /// /// Registers a callback to observe SLO burn rate measurements. /// /// Function that returns current SLO burn rate measurements. public static void RegisterSloBurnRateObservation(Func>> observeFunc) { ArgumentNullException.ThrowIfNull(observeFunc); SloBurnRateObservations = observeFunc(); } /// /// Registers a callback to observe error budget measurements. /// /// Function that returns current error budget measurements. public static void RegisterErrorBudgetObservation(Func>> observeFunc) { ArgumentNullException.ThrowIfNull(observeFunc); ErrorBudgetObservations = observeFunc(); } #endregion /// /// Starts an activity for selection layer operations. /// /// Tenant identifier. /// Policy identifier. /// The started activity, or null if not sampled. public static Activity? StartSelectActivity(string? tenant, string? policyId) { var activity = ActivitySource.StartActivity("policy.select", ActivityKind.Internal); activity?.SetTag("tenant", NormalizeTenant(tenant)); activity?.SetTag("policy.id", policyId ?? "unknown"); return activity; } /// /// Starts an activity for policy evaluation. /// /// Tenant identifier. /// Policy identifier. /// Run identifier. /// The started activity, or null if not sampled. public static Activity? StartEvaluateActivity(string? tenant, string? policyId, string? runId) { var activity = ActivitySource.StartActivity("policy.evaluate", ActivityKind.Internal); activity?.SetTag("tenant", NormalizeTenant(tenant)); activity?.SetTag("policy.id", policyId ?? "unknown"); activity?.SetTag("run.id", runId ?? "unknown"); return activity; } /// /// Starts an activity for materialization operations. /// /// Tenant identifier. /// Policy identifier. /// Number of items in the batch. /// The started activity, or null if not sampled. public static Activity? StartMaterializeActivity(string? tenant, string? policyId, int batchSize) { var activity = ActivitySource.StartActivity("policy.materialize", ActivityKind.Internal); activity?.SetTag("tenant", NormalizeTenant(tenant)); activity?.SetTag("policy.id", policyId ?? "unknown"); activity?.SetTag("batch.size", batchSize); return activity; } /// /// Starts an activity for simulation operations. /// /// Tenant identifier. /// Policy identifier. /// The started activity, or null if not sampled. public static Activity? StartSimulateActivity(string? tenant, string? policyId) { var activity = ActivitySource.StartActivity("policy.simulate", ActivityKind.Internal); activity?.SetTag("tenant", NormalizeTenant(tenant)); activity?.SetTag("policy.id", policyId ?? "unknown"); return activity; } /// /// Starts an activity for compilation operations. /// /// Policy identifier. /// Policy version. /// The started activity, or null if not sampled. public static Activity? StartCompileActivity(string? policyId, string? version) { var activity = ActivitySource.StartActivity("policy.compile", ActivityKind.Internal); activity?.SetTag("policy.id", policyId ?? "unknown"); activity?.SetTag("policy.version", version ?? "unknown"); return activity; } private static string NormalizeTenant(string? tenant) => string.IsNullOrWhiteSpace(tenant) ? "default" : tenant; private static string NormalizeTag(string? value) => string.IsNullOrWhiteSpace(value) ? "unknown" : value; }