using System; using System.Collections.Concurrent; using System.Diagnostics.Metrics; using System.Reflection; namespace StellaOps.Findings.Ledger.Observability; public static class LedgerMetrics { private static readonly Meter Meter = new("StellaOps.Findings.Ledger"); private static readonly Histogram WriteDurationSeconds = Meter.CreateHistogram( "ledger_write_duration_seconds", unit: "s", description: "Latency of successful ledger append operations."); // Compatibility with earlier drafts private static readonly Histogram WriteLatencySeconds = Meter.CreateHistogram( "ledger_write_latency_seconds", unit: "s", description: "Deprecated alias for ledger_write_duration_seconds."); private static readonly Counter EventsTotal = Meter.CreateCounter( "ledger_events_total", description: "Number of ledger events appended."); private static readonly Counter BackpressureApplied = Meter.CreateCounter( "ledger_backpressure_applied_total", description: "Times ingest backpressure thresholds were exceeded."); private static readonly Counter QuotaRejections = Meter.CreateCounter( "ledger_quota_rejections_total", description: "Requests rejected due to configured quotas."); private static readonly Histogram ProjectionApplySeconds = Meter.CreateHistogram( "ledger_projection_apply_seconds", unit: "s", description: "Duration to apply a ledger event to the finding projection."); private static readonly Histogram ProjectionRebuildSeconds = Meter.CreateHistogram( "ledger_projection_rebuild_seconds", unit: "s", description: "Duration of projection replay/rebuild batches."); private static readonly Counter ProjectionEventsTotal = Meter.CreateCounter( "ledger_projection_events_total", description: "Number of ledger events applied to projections."); private static readonly Histogram MerkleAnchorDurationSeconds = Meter.CreateHistogram( "ledger_merkle_anchor_duration_seconds", unit: "s", description: "Duration to persist Merkle anchor batches."); private static readonly Counter MerkleAnchorFailures = Meter.CreateCounter( "ledger_merkle_anchor_failures_total", description: "Count of Merkle anchor failures by reason."); private static readonly Counter AttachmentsEncryptionFailures = Meter.CreateCounter( "ledger_attachments_encryption_failures_total", description: "Count of attachment encryption/signing/upload failures."); private static readonly Histogram AirgapStalenessSeconds = Meter.CreateHistogram( "ledger_airgap_staleness_seconds", unit: "s", description: "Current staleness of air-gap imported data by domain."); private static readonly Counter StalenessValidationFailures = Meter.CreateCounter( "ledger_staleness_validation_failures_total", description: "Count of staleness validation failures blocking exports."); private static readonly ObservableGauge AirgapStalenessGauge = Meter.CreateObservableGauge("ledger_airgap_staleness_gauge_seconds", ObserveAirgapStaleness, unit: "s", description: "Current staleness of air-gap data by domain."); private static readonly ConcurrentDictionary AirgapStalenessByDomain = new(StringComparer.Ordinal); private static readonly ObservableGauge ProjectionLagGauge = Meter.CreateObservableGauge("ledger_projection_lag_seconds", ObserveProjectionLag, unit: "s", description: "Lag between ledger recorded_at and projection application time."); private static readonly ObservableGauge IngestBacklogGauge = Meter.CreateObservableGauge("ledger_ingest_backlog_events", ObserveBacklog, description: "Number of events buffered for ingestion/anchoring per tenant."); private static readonly ObservableGauge QuotaRemainingGauge = Meter.CreateObservableGauge("ledger_quota_remaining", ObserveQuotaRemaining, description: "Remaining ingest backlog capacity before backpressure applies."); private static readonly ObservableGauge DbConnectionsGauge = Meter.CreateObservableGauge("ledger_db_connections_active", ObserveDbConnections, description: "Active PostgreSQL connections by role."); private static readonly ObservableGauge AppVersionGauge = Meter.CreateObservableGauge("ledger_app_version_info", ObserveAppVersion, description: "Static gauge exposing build version and git sha."); private static readonly ConcurrentDictionary ProjectionLagByTenant = new(StringComparer.Ordinal); private static readonly ConcurrentDictionary DbConnectionsByRole = new(StringComparer.OrdinalIgnoreCase); private static readonly ConcurrentDictionary BacklogByTenant = new(StringComparer.Ordinal); private static long _ingestBacklogLimit = 5000; private static readonly string AppVersion = Assembly.GetExecutingAssembly().GetName().Version?.ToString() ?? "0.0.0"; private static readonly string GitSha = Environment.GetEnvironmentVariable("GIT_SHA") ?? "unknown"; public static void RecordWriteSuccess(TimeSpan duration, string? tenantId, string? eventType, string? source) { var tags = new KeyValuePair[] { new("tenant", tenantId ?? string.Empty), new("event_type", eventType ?? string.Empty), new("source", source ?? string.Empty) }; WriteDurationSeconds.Record(duration.TotalSeconds, tags); WriteLatencySeconds.Record(duration.TotalSeconds, tags); EventsTotal.Add(1, tags); } public static void RecordProjectionApply( TimeSpan duration, double lagSeconds, string? tenantId, string? eventType, string? policyVersion, string? evaluationStatus) { var tags = new KeyValuePair[] { new("tenant", tenantId ?? string.Empty), new("event_type", eventType ?? string.Empty), new("policy_version", policyVersion ?? string.Empty), new("evaluation_status", evaluationStatus ?? string.Empty) }; ProjectionApplySeconds.Record(duration.TotalSeconds, tags); ProjectionEventsTotal.Add(1, tags); UpdateProjectionLag(tenantId, lagSeconds); } public static void RecordProjectionRebuild(TimeSpan duration, string? tenantId, string scenario) { var tags = new KeyValuePair[] { new("tenant", tenantId ?? string.Empty), new("scenario", scenario) }; ProjectionRebuildSeconds.Record(duration.TotalSeconds, tags); } public static void RecordMerkleAnchorDuration(TimeSpan duration, string tenantId, int leafCount) { var tags = new KeyValuePair[] { new("tenant", tenantId), new("leaf_count", leafCount) }; MerkleAnchorDurationSeconds.Record(duration.TotalSeconds, tags); } public static void RecordMerkleAnchorFailure(string tenantId, string reason) { var tags = new KeyValuePair[] { new("tenant", tenantId), new("reason", reason) }; MerkleAnchorFailures.Add(1, tags); } public static void RecordAttachmentFailure(string tenantId, string stage) { var tags = new KeyValuePair[] { new("tenant", tenantId), new("stage", stage) }; AttachmentsEncryptionFailures.Add(1, tags); } public static void ConfigureQuotas(long ingestBacklogLimit) { if (ingestBacklogLimit > 0) { Interlocked.Exchange(ref _ingestBacklogLimit, ingestBacklogLimit); } } public static long IncrementBacklog(string? tenantId = null) { var key = NormalizeTenant(tenantId); var backlog = BacklogByTenant.AddOrUpdate(key, _ => 1, (_, current) => current + 1); if (backlog > _ingestBacklogLimit) { BackpressureApplied.Add(1, new KeyValuePair[] { new("tenant", key), new("reason", "ingest_backlog"), new("limit", _ingestBacklogLimit) }); } return backlog; } public static void RecordQuotaRejection(string tenantId, string reason) { QuotaRejections.Add(1, new KeyValuePair[] { new("tenant", NormalizeTenant(tenantId)), new("reason", reason) }); } public static void DecrementBacklog(string? tenantId = null) { var key = NormalizeTenant(tenantId); BacklogByTenant.AddOrUpdate(key, _ => 0, (_, current) => Math.Max(0, current - 1)); } public static void ConnectionOpened(string role) { var normalized = NormalizeRole(role); DbConnectionsByRole.AddOrUpdate(normalized, _ => 1, (_, current) => current + 1); } public static void ConnectionClosed(string role) { var normalized = NormalizeRole(role); DbConnectionsByRole.AddOrUpdate(normalized, _ => 0, (_, current) => Math.Max(0, current - 1)); } public static void IncrementDbConnection(string role) => ConnectionOpened(role); public static void DecrementDbConnection(string role) => ConnectionClosed(role); public static void UpdateProjectionLag(string? tenantId, double lagSeconds) { var key = string.IsNullOrWhiteSpace(tenantId) ? string.Empty : tenantId; ProjectionLagByTenant[key] = lagSeconds < 0 ? 0 : lagSeconds; } public static void RecordProjectionLag(TimeSpan lag, string? tenantId) => UpdateProjectionLag(tenantId, lag.TotalSeconds); public static void RecordAirgapStaleness(string? domainId, long stalenessSeconds) { var key = string.IsNullOrWhiteSpace(domainId) ? "unknown" : domainId; var tags = new KeyValuePair[] { new("domain", key) }; AirgapStalenessSeconds.Record(stalenessSeconds, tags); AirgapStalenessByDomain[key] = stalenessSeconds; } public static void RecordStalenessValidationFailure(string? domainId) { var key = string.IsNullOrWhiteSpace(domainId) ? "unknown" : domainId; var tags = new KeyValuePair[] { new("domain", key) }; StalenessValidationFailures.Add(1, tags); } private static readonly Counter ScoredFindingsExports = Meter.CreateCounter( "ledger_scored_findings_exports_total", description: "Count of scored findings export operations."); private static readonly Histogram ScoredFindingsExportDuration = Meter.CreateHistogram( "ledger_scored_findings_export_duration_seconds", unit: "s", description: "Duration of scored findings export operations."); public static void RecordScoredFindingsExport(string? tenantId, int recordCount, double durationSeconds) { var tags = new KeyValuePair[] { new("tenant", tenantId ?? "unknown"), new("record_count", recordCount) }; ScoredFindingsExports.Add(1, tags); ScoredFindingsExportDuration.Record(durationSeconds, tags); } // LEDGER-RISK-69-001: Scoring metrics/dashboards private static readonly Histogram ScoringLatencySeconds = Meter.CreateHistogram( "ledger_scoring_latency_seconds", unit: "s", description: "Latency of risk scoring operations per finding."); private static readonly Counter ScoringOperationsTotal = Meter.CreateCounter( "ledger_scoring_operations_total", description: "Total number of scoring operations by result."); private static readonly Counter ScoringProviderGaps = Meter.CreateCounter( "ledger_scoring_provider_gaps_total", description: "Count of findings where scoring provider was unavailable or returned no data."); private static readonly ConcurrentDictionary SeverityByTenantPolicy = new(StringComparer.Ordinal); private static readonly ConcurrentDictionary ScoreFreshnessByTenant = new(StringComparer.Ordinal); private static readonly ObservableGauge SeverityCriticalGauge = Meter.CreateObservableGauge("ledger_severity_distribution_critical", ObserveSeverityCritical, description: "Current count of critical severity findings by tenant and policy."); private static readonly ObservableGauge SeverityHighGauge = Meter.CreateObservableGauge("ledger_severity_distribution_high", ObserveSeverityHigh, description: "Current count of high severity findings by tenant and policy."); private static readonly ObservableGauge SeverityMediumGauge = Meter.CreateObservableGauge("ledger_severity_distribution_medium", ObserveSeverityMedium, description: "Current count of medium severity findings by tenant and policy."); private static readonly ObservableGauge SeverityLowGauge = Meter.CreateObservableGauge("ledger_severity_distribution_low", ObserveSeverityLow, description: "Current count of low severity findings by tenant and policy."); private static readonly ObservableGauge SeverityUnknownGauge = Meter.CreateObservableGauge("ledger_severity_distribution_unknown", ObserveSeverityUnknown, description: "Current count of unknown/unscored findings by tenant and policy."); private static readonly ObservableGauge ScoreFreshnessGauge = Meter.CreateObservableGauge("ledger_score_freshness_seconds", ObserveScoreFreshness, unit: "s", description: "Time since last scoring operation completed by tenant."); public static void RecordScoringLatency(TimeSpan duration, string? tenantId, string? policyVersion, string result) { var tags = new KeyValuePair[] { new("tenant", tenantId ?? string.Empty), new("policy_version", policyVersion ?? string.Empty), new("result", result) }; ScoringLatencySeconds.Record(duration.TotalSeconds, tags); ScoringOperationsTotal.Add(1, tags); } public static void RecordScoringProviderGap(string? tenantId, string? provider, string reason) { var tags = new KeyValuePair[] { new("tenant", tenantId ?? string.Empty), new("provider", provider ?? "unknown"), new("reason", reason) }; ScoringProviderGaps.Add(1, tags); } public static void UpdateSeverityDistribution( string tenantId, string? policyVersion, int critical, int high, int medium, int low, int unknown) { var key = BuildTenantPolicyKey(tenantId, policyVersion); SeverityByTenantPolicy[key] = new SeveritySnapshot(tenantId, policyVersion ?? "default", critical, high, medium, low, unknown); } public static void UpdateScoreFreshness(string tenantId, double secondsSinceLastScoring) { var key = NormalizeTenant(tenantId); ScoreFreshnessByTenant[key] = secondsSinceLastScoring < 0 ? 0 : secondsSinceLastScoring; } private static string BuildTenantPolicyKey(string? tenantId, string? policyVersion) { var t = string.IsNullOrWhiteSpace(tenantId) ? string.Empty : tenantId; var p = string.IsNullOrWhiteSpace(policyVersion) ? "default" : policyVersion; return $"{t}|{p}"; } private sealed record SeveritySnapshot( string TenantId, string PolicyVersion, int Critical, int High, int Medium, int Low, int Unknown); private static IEnumerable> ObserveSeverityCritical() { foreach (var kvp in SeverityByTenantPolicy) { yield return new Measurement(kvp.Value.Critical, new KeyValuePair("tenant", kvp.Value.TenantId), new KeyValuePair("policy_version", kvp.Value.PolicyVersion)); } } private static IEnumerable> ObserveSeverityHigh() { foreach (var kvp in SeverityByTenantPolicy) { yield return new Measurement(kvp.Value.High, new KeyValuePair("tenant", kvp.Value.TenantId), new KeyValuePair("policy_version", kvp.Value.PolicyVersion)); } } private static IEnumerable> ObserveSeverityMedium() { foreach (var kvp in SeverityByTenantPolicy) { yield return new Measurement(kvp.Value.Medium, new KeyValuePair("tenant", kvp.Value.TenantId), new KeyValuePair("policy_version", kvp.Value.PolicyVersion)); } } private static IEnumerable> ObserveSeverityLow() { foreach (var kvp in SeverityByTenantPolicy) { yield return new Measurement(kvp.Value.Low, new KeyValuePair("tenant", kvp.Value.TenantId), new KeyValuePair("policy_version", kvp.Value.PolicyVersion)); } } private static IEnumerable> ObserveSeverityUnknown() { foreach (var kvp in SeverityByTenantPolicy) { yield return new Measurement(kvp.Value.Unknown, new KeyValuePair("tenant", kvp.Value.TenantId), new KeyValuePair("policy_version", kvp.Value.PolicyVersion)); } } private static IEnumerable> ObserveScoreFreshness() { foreach (var kvp in ScoreFreshnessByTenant) { yield return new Measurement(kvp.Value, new KeyValuePair("tenant", kvp.Key)); } } private static IEnumerable> ObserveProjectionLag() { foreach (var kvp in ProjectionLagByTenant) { yield return new Measurement(kvp.Value, new KeyValuePair("tenant", kvp.Key)); } } private static IEnumerable> ObserveBacklog() { foreach (var kvp in BacklogByTenant) { yield return new Measurement(kvp.Value, new KeyValuePair("tenant", kvp.Key)); } } private static IEnumerable> ObserveQuotaRemaining() { foreach (var kvp in BacklogByTenant) { var remaining = Math.Max(0, _ingestBacklogLimit - kvp.Value); yield return new Measurement(remaining, new KeyValuePair("tenant", kvp.Key)); } } private static IEnumerable> ObserveDbConnections() { foreach (var kvp in DbConnectionsByRole) { yield return new Measurement(kvp.Value, new KeyValuePair("role", kvp.Key)); } } private static IEnumerable> ObserveAppVersion() { yield return new Measurement(1, new KeyValuePair("version", AppVersion), new KeyValuePair("git_sha", GitSha)); } private static IEnumerable> ObserveAirgapStaleness() { foreach (var kvp in AirgapStalenessByDomain) { yield return new Measurement(kvp.Value, new KeyValuePair("domain", kvp.Key)); } } private static string NormalizeRole(string role) => string.IsNullOrWhiteSpace(role) ? "unspecified" : role.ToLowerInvariant(); private static string NormalizeTenant(string? tenantId) => string.IsNullOrWhiteSpace(tenantId) ? string.Empty : tenantId; // SPRINT_8200_0012_0004: Evidence-Weighted Score (EWS) Metrics private static readonly Counter EwsCalculationsTotal = Meter.CreateCounter( "ews_calculations_total", description: "Total number of EWS calculations by result and bucket."); private static readonly Histogram EwsCalculationDurationSeconds = Meter.CreateHistogram( "ews_calculation_duration_seconds", unit: "s", description: "Duration of EWS score calculations."); private static readonly Counter EwsBatchCalculationsTotal = Meter.CreateCounter( "ews_batch_calculations_total", description: "Total number of EWS batch calculations."); private static readonly Histogram EwsBatchSizeHistogram = Meter.CreateHistogram( "ews_batch_size", description: "Distribution of EWS batch sizes."); private static readonly Counter EwsCacheHitsTotal = Meter.CreateCounter( "ews_cache_hits_total", description: "Total EWS cache hits."); private static readonly Counter EwsCacheMissesTotal = Meter.CreateCounter( "ews_cache_misses_total", description: "Total EWS cache misses."); private static readonly Counter EwsWebhooksDeliveredTotal = Meter.CreateCounter( "ews_webhooks_delivered_total", description: "Total webhooks delivered by status."); private static readonly Histogram EwsWebhookDeliveryDurationSeconds = Meter.CreateHistogram( "ews_webhook_delivery_duration_seconds", unit: "s", description: "Duration of webhook delivery attempts."); private static readonly ConcurrentDictionary EwsBucketDistributionByTenant = new(StringComparer.Ordinal); private static readonly ObservableGauge EwsBucketActNowGauge = Meter.CreateObservableGauge("ews_bucket_distribution_act_now", ObserveEwsBucketActNow, description: "Current count of findings in ActNow bucket by tenant."); private static readonly ObservableGauge EwsBucketScheduleNextGauge = Meter.CreateObservableGauge("ews_bucket_distribution_schedule_next", ObserveEwsBucketScheduleNext, description: "Current count of findings in ScheduleNext bucket by tenant."); private static readonly ObservableGauge EwsBucketInvestigateGauge = Meter.CreateObservableGauge("ews_bucket_distribution_investigate", ObserveEwsBucketInvestigate, description: "Current count of findings in Investigate bucket by tenant."); private static readonly ObservableGauge EwsBucketWatchlistGauge = Meter.CreateObservableGauge("ews_bucket_distribution_watchlist", ObserveEwsBucketWatchlist, description: "Current count of findings in Watchlist bucket by tenant."); /// Records an EWS calculation. public static void RecordEwsCalculation( TimeSpan duration, string? tenantId, string? policyDigest, string bucket, string result, bool fromCache) { var tags = new KeyValuePair[] { new("tenant", NormalizeTenant(tenantId)), new("policy_digest", policyDigest ?? string.Empty), new("bucket", bucket), new("result", result), new("from_cache", fromCache) }; EwsCalculationsTotal.Add(1, tags); EwsCalculationDurationSeconds.Record(duration.TotalSeconds, tags); } /// Records an EWS batch calculation. public static void RecordEwsBatchCalculation( TimeSpan duration, string? tenantId, int batchSize, int succeeded, int failed) { var tags = new KeyValuePair[] { new("tenant", NormalizeTenant(tenantId)), new("succeeded", succeeded), new("failed", failed) }; EwsBatchCalculationsTotal.Add(1, tags); EwsBatchSizeHistogram.Record(batchSize, new KeyValuePair("tenant", NormalizeTenant(tenantId))); EwsCalculationDurationSeconds.Record(duration.TotalSeconds, tags); } /// Records an EWS cache hit. public static void RecordEwsCacheHit(string? tenantId, string findingId) { EwsCacheHitsTotal.Add(1, new KeyValuePair("tenant", NormalizeTenant(tenantId))); } /// Records an EWS cache miss. public static void RecordEwsCacheMiss(string? tenantId, string findingId) { EwsCacheMissesTotal.Add(1, new KeyValuePair("tenant", NormalizeTenant(tenantId))); } /// Records a webhook delivery attempt. public static void RecordWebhookDelivery(TimeSpan duration, Guid webhookId, string status, int attempt) { var tags = new KeyValuePair[] { new("webhook_id", webhookId.ToString()), new("status", status), new("attempt", attempt) }; EwsWebhooksDeliveredTotal.Add(1, tags); EwsWebhookDeliveryDurationSeconds.Record(duration.TotalSeconds, tags); } /// Updates the EWS bucket distribution for a tenant. public static void UpdateEwsBucketDistribution( string tenantId, int actNow, int scheduleNext, int investigate, int watchlist) { var key = NormalizeTenant(tenantId); EwsBucketDistributionByTenant[key] = new BucketDistributionSnapshot(key, actNow, scheduleNext, investigate, watchlist); } private sealed record BucketDistributionSnapshot( string TenantId, int ActNow, int ScheduleNext, int Investigate, int Watchlist); private static IEnumerable> ObserveEwsBucketActNow() { foreach (var kvp in EwsBucketDistributionByTenant) { yield return new Measurement(kvp.Value.ActNow, new KeyValuePair("tenant", kvp.Value.TenantId)); } } private static IEnumerable> ObserveEwsBucketScheduleNext() { foreach (var kvp in EwsBucketDistributionByTenant) { yield return new Measurement(kvp.Value.ScheduleNext, new KeyValuePair("tenant", kvp.Value.TenantId)); } } private static IEnumerable> ObserveEwsBucketInvestigate() { foreach (var kvp in EwsBucketDistributionByTenant) { yield return new Measurement(kvp.Value.Investigate, new KeyValuePair("tenant", kvp.Value.TenantId)); } } private static IEnumerable> ObserveEwsBucketWatchlist() { foreach (var kvp in EwsBucketDistributionByTenant) { yield return new Measurement(kvp.Value.Watchlist, new KeyValuePair("tenant", kvp.Value.TenantId)); } } }