using System.Collections.Immutable; using System.Diagnostics; using System.Globalization; using System.Text.Json; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using StellaOps.Messaging; using StellaOps.Messaging.Abstractions; using StellaOps.Policy.Engine.Options; using StellaOps.Policy.Engine.Telemetry; using StellaOps.Policy.Persistence.Postgres.Models; using StellaOps.Policy.Persistence.Postgres.Repositories; namespace StellaOps.Policy.Engine.ExceptionCache; /// /// Transport-agnostic exception effective cache using StellaOps.Messaging abstractions. /// Works with any configured transport (Valkey, PostgreSQL, InMemory). /// internal sealed class MessagingExceptionEffectiveCache : IExceptionEffectiveCache { private readonly IDistributedCache> _entryCache; private readonly ISetStore _exceptionIndex; private readonly IDistributedCache _versionCache; private readonly IDistributedCache> _statsCache; private readonly IExceptionRepository _repository; private readonly ILogger _logger; private readonly ExceptionCacheOptions _options; private readonly TimeProvider _timeProvider; private const string EntryKeyPrefix = "exc:entry"; private const string IndexKeyPrefix = "exc:index"; private const string VersionKeyPrefix = "exc:version"; private const string StatsKeyPrefix = "exc:stats"; public MessagingExceptionEffectiveCache( IDistributedCacheFactory cacheFactory, ISetStoreFactory setStoreFactory, IExceptionRepository repository, ILogger logger, IOptions options, TimeProvider timeProvider) { ArgumentNullException.ThrowIfNull(cacheFactory); ArgumentNullException.ThrowIfNull(setStoreFactory); _entryCache = cacheFactory.Create>(new CacheOptions { KeyPrefix = EntryKeyPrefix }); _exceptionIndex = setStoreFactory.Create("exc-exception-index"); _versionCache = cacheFactory.Create(new CacheOptions { KeyPrefix = VersionKeyPrefix }); _statsCache = cacheFactory.Create>(new CacheOptions { KeyPrefix = StatsKeyPrefix }); _repository = repository ?? throw new ArgumentNullException(nameof(repository)); _logger = logger ?? throw new ArgumentNullException(nameof(logger)); _options = options?.Value.ExceptionCache ?? new ExceptionCacheOptions(); _timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider)); } public async Task GetForAssetAsync( string tenantId, string assetId, string? advisoryId, DateTimeOffset asOf, CancellationToken cancellationToken = default) { var sw = Stopwatch.StartNew(); var entries = new List(); var fromCache = false; // Try specific advisory key first if (advisoryId is not null) { var specificKey = GetAssetKey(tenantId, assetId, advisoryId); var specificResult = await _entryCache.GetAsync(specificKey, cancellationToken).ConfigureAwait(false); if (specificResult.HasValue && specificResult.Value is not null) { entries.AddRange(specificResult.Value); fromCache = true; } } // Also get "all" entries (exceptions without specific advisory) var allKey = GetAssetKey(tenantId, assetId, null); var allResult = await _entryCache.GetAsync(allKey, cancellationToken).ConfigureAwait(false); if (allResult.HasValue && allResult.Value is not null) { entries.AddRange(allResult.Value); fromCache = true; } // Filter by time and sort by priority var validEntries = entries .Where(e => e.EffectiveFrom <= asOf && (e.ExpiresAt is null || e.ExpiresAt > asOf)) .OrderByDescending(e => e.Priority) .ToImmutableArray(); var version = await GetVersionAsync(tenantId, cancellationToken).ConfigureAwait(false); sw.Stop(); PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, fromCache ? "hit" : "miss"); return new ExceptionCacheQueryResult { Entries = validEntries, FromCache = fromCache, CacheVersion = version, QueryDurationMs = sw.ElapsedMilliseconds, }; } public async Task> GetBatchAsync( string tenantId, IReadOnlyList assetIds, DateTimeOffset asOf, CancellationToken cancellationToken = default) { var results = new Dictionary(StringComparer.OrdinalIgnoreCase); var version = await GetVersionAsync(tenantId, cancellationToken).ConfigureAwait(false); foreach (var assetId in assetIds) { var entries = ImmutableArray.Empty; var fromCache = false; var allKey = GetAssetKey(tenantId, assetId, null); var result = await _entryCache.GetAsync(allKey, cancellationToken).ConfigureAwait(false); if (result.HasValue && result.Value is not null) { entries = result.Value .Where(e => e.EffectiveFrom <= asOf && (e.ExpiresAt is null || e.ExpiresAt > asOf)) .OrderByDescending(e => e.Priority) .ToImmutableArray(); fromCache = true; } results[assetId] = new ExceptionCacheQueryResult { Entries = entries, FromCache = fromCache, CacheVersion = version, QueryDurationMs = 0, }; } PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "batch_get"); return results; } public async Task SetAsync( string tenantId, ExceptionCacheEntry entry, CancellationToken cancellationToken = default) { ArgumentNullException.ThrowIfNull(entry); var assetKey = GetAssetKey(tenantId, entry.AssetId, entry.AdvisoryId); var exceptionIndexKey = GetExceptionIndexKey(tenantId, entry.ExceptionId); // Get existing entries for this asset var existingResult = await _entryCache.GetAsync(assetKey, cancellationToken).ConfigureAwait(false); var entries = existingResult.HasValue && existingResult.Value is not null ? existingResult.Value : new List(); // Remove existing entry for same exception if any entries.RemoveAll(e => e.ExceptionId == entry.ExceptionId); // Add new entry entries.Add(entry); var ttl = ComputeTtl(entry); var cacheOptions = new CacheEntryOptions { TimeToLive = ttl }; // Store entry await _entryCache.SetAsync(assetKey, entries, cacheOptions, cancellationToken).ConfigureAwait(false); // Update exception index await _exceptionIndex.AddAsync(exceptionIndexKey, assetKey, cancellationToken).ConfigureAwait(false); await _exceptionIndex.SetExpirationAsync(exceptionIndexKey, ttl.Add(TimeSpan.FromMinutes(5)), cancellationToken) .ConfigureAwait(false); PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "set"); } public async Task SetBatchAsync( string tenantId, IEnumerable entries, CancellationToken cancellationToken = default) { var count = 0; // Group entries by asset+advisory var groupedEntries = entries .GroupBy(e => GetAssetKey(tenantId, e.AssetId, e.AdvisoryId)) .ToDictionary(g => g.Key, g => g.ToList()); foreach (var (assetKey, assetEntries) in groupedEntries) { var ttl = assetEntries.Max(ComputeTtl); var cacheOptions = new CacheEntryOptions { TimeToLive = ttl }; await _entryCache.SetAsync(assetKey, assetEntries, cacheOptions, cancellationToken).ConfigureAwait(false); // Update exception indexes foreach (var entry in assetEntries) { var exceptionIndexKey = GetExceptionIndexKey(tenantId, entry.ExceptionId); await _exceptionIndex.AddAsync(exceptionIndexKey, assetKey, cancellationToken).ConfigureAwait(false); await _exceptionIndex.SetExpirationAsync(exceptionIndexKey, ttl.Add(TimeSpan.FromMinutes(5)), cancellationToken) .ConfigureAwait(false); } count += assetEntries.Count; } // Increment version await IncrementVersionAsync(tenantId, cancellationToken).ConfigureAwait(false); PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "set_batch"); _logger.LogDebug("Set {Count} exception cache entries for tenant {TenantId}", count, tenantId); } public async Task InvalidateExceptionAsync( string tenantId, string exceptionId, CancellationToken cancellationToken = default) { var exceptionIndexKey = GetExceptionIndexKey(tenantId, exceptionId); // Get all asset keys affected by this exception var assetKeys = await _exceptionIndex.GetMembersAsync(exceptionIndexKey, cancellationToken).ConfigureAwait(false); if (assetKeys.Count > 0) { // For each asset key, remove entries for this exception foreach (var assetKey in assetKeys) { var result = await _entryCache.GetAsync(assetKey, cancellationToken).ConfigureAwait(false); if (result.HasValue && result.Value is not null) { var entries = result.Value; entries.RemoveAll(e => e.ExceptionId == exceptionId); if (entries.Count > 0) { var cacheOptions = new CacheEntryOptions { TimeToLive = TimeSpan.FromMinutes(_options.DefaultTtlMinutes) }; await _entryCache.SetAsync(assetKey, entries, cacheOptions, cancellationToken).ConfigureAwait(false); } else { await _entryCache.InvalidateAsync(assetKey, cancellationToken).ConfigureAwait(false); } } } } // Delete the exception index await _exceptionIndex.DeleteAsync(exceptionIndexKey, cancellationToken).ConfigureAwait(false); // Increment version await IncrementVersionAsync(tenantId, cancellationToken).ConfigureAwait(false); PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "invalidate_exception"); _logger.LogInformation( "Invalidated exception {ExceptionId} affecting {Count} assets for tenant {TenantId}", exceptionId, assetKeys.Count, tenantId); } public async Task InvalidateAssetAsync( string tenantId, string assetId, CancellationToken cancellationToken = default) { // Invalidate all keys for this asset using pattern var pattern = $"{EntryKeyPrefix}:{tenantId}:{assetId}:*"; var count = await _entryCache.InvalidateByPatternAsync(pattern, cancellationToken).ConfigureAwait(false); // Increment version await IncrementVersionAsync(tenantId, cancellationToken).ConfigureAwait(false); PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "invalidate_asset"); _logger.LogDebug("Invalidated {Count} cache keys for asset {AssetId}", count, assetId); } public async Task InvalidateTenantAsync( string tenantId, CancellationToken cancellationToken = default) { // Invalidate all entry keys for tenant var entryPattern = $"{EntryKeyPrefix}:{tenantId}:*"; var entryCount = await _entryCache.InvalidateByPatternAsync(entryPattern, cancellationToken).ConfigureAwait(false); // Invalidate version and stats var versionKey = GetVersionKey(tenantId); await _versionCache.InvalidateAsync(versionKey, cancellationToken).ConfigureAwait(false); var statsKey = GetStatsKey(tenantId); await _statsCache.InvalidateAsync(statsKey, cancellationToken).ConfigureAwait(false); PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "invalidate_tenant"); _logger.LogInformation("Invalidated {Count} cache keys for tenant {TenantId}", entryCount, tenantId); } public async Task WarmAsync( string tenantId, CancellationToken cancellationToken = default) { using var activity = PolicyEngineTelemetry.ActivitySource.StartActivity( "exception.cache.warm", ActivityKind.Internal); activity?.SetTag("tenant_id", tenantId); var sw = Stopwatch.StartNew(); var now = _timeProvider.GetUtcNow(); _logger.LogInformation("Starting cache warm for tenant {TenantId}", tenantId); try { var exceptions = await _repository.GetAllAsync( tenantId, ExceptionStatus.Active, limit: _options.MaxEntriesPerTenant, offset: 0, cancellationToken: cancellationToken).ConfigureAwait(false); if (exceptions.Count == 0) { _logger.LogDebug("No active exceptions to warm for tenant {TenantId}", tenantId); return; } var entries = new List(); foreach (var exception in exceptions) { entries.Add(new ExceptionCacheEntry { ExceptionId = exception.Id.ToString(), AssetId = string.IsNullOrWhiteSpace(exception.ProjectId) ? "*" : exception.ProjectId!, AdvisoryId = null, CveId = null, DecisionOverride = "allow", ExceptionType = "waiver", Priority = 0, EffectiveFrom = exception.CreatedAt, ExpiresAt = exception.ExpiresAt, CachedAt = now, ExceptionName = exception.Name, }); } if (entries.Count > 0) { await SetBatchAsync(tenantId, entries, cancellationToken).ConfigureAwait(false); } sw.Stop(); // Update warm stats await UpdateWarmStatsAsync(tenantId, now, entries.Count, cancellationToken).ConfigureAwait(false); PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "warm"); _logger.LogInformation( "Warmed cache with {Count} entries from {ExceptionCount} exceptions for tenant {TenantId} in {Duration}ms", entries.Count, exceptions.Count, tenantId, sw.ElapsedMilliseconds); } catch (Exception ex) { _logger.LogError(ex, "Failed to warm cache for tenant {TenantId}", tenantId); PolicyEngineTelemetry.RecordError("exception_cache_warm", tenantId); throw; } } public async Task GetSummaryAsync( string tenantId, CancellationToken cancellationToken = default) { var now = _timeProvider.GetUtcNow(); // Note: Full summary requires scanning keys which isn't efficient with abstractions // Return placeholder data - complete implementation would need transport-specific code var version = await GetVersionAsync(tenantId, cancellationToken).ConfigureAwait(false); return new ExceptionCacheSummary { TenantId = tenantId, TotalEntries = 0, UniqueExceptions = 0, UniqueAssets = 0, ByType = new Dictionary(), ByDecision = new Dictionary(), ExpiringWithinHour = 0, CacheVersion = version, ComputedAt = now, }; } public Task GetStatsAsync( string? tenantId = null, CancellationToken cancellationToken = default) { // Stats require implementation-specific queries that aren't available through abstractions // Return placeholder stats - a complete implementation would need transport-specific code return Task.FromResult(new ExceptionCacheStats { TotalEntries = 0, TotalTenants = 0, MemoryUsedBytes = null, HitCount = 0, MissCount = 0, LastWarmAt = null, LastInvalidationAt = null, }); } public async Task GetVersionAsync( string tenantId, CancellationToken cancellationToken = default) { var versionKey = GetVersionKey(tenantId); var result = await _versionCache.GetAsync(versionKey, cancellationToken).ConfigureAwait(false); return result.HasValue ? result.Value : 0; } public async Task HandleExceptionEventAsync( ExceptionEvent exceptionEvent, CancellationToken cancellationToken = default) { ArgumentNullException.ThrowIfNull(exceptionEvent); using var activity = PolicyEngineTelemetry.ActivitySource.StartActivity( "exception.cache.handle_event", ActivityKind.Internal); activity?.SetTag("tenant_id", exceptionEvent.TenantId); activity?.SetTag("event_type", exceptionEvent.EventType); activity?.SetTag("exception_id", exceptionEvent.ExceptionId); _logger.LogDebug( "Handling exception event {EventType} for exception {ExceptionId} tenant {TenantId}", exceptionEvent.EventType, exceptionEvent.ExceptionId, exceptionEvent.TenantId); switch (exceptionEvent.EventType.ToLowerInvariant()) { case "activated": await WarmExceptionAsync(exceptionEvent.TenantId, exceptionEvent.ExceptionId, cancellationToken) .ConfigureAwait(false); break; case "expired": case "revoked": case "deleted": await InvalidateExceptionAsync(exceptionEvent.TenantId, exceptionEvent.ExceptionId, cancellationToken) .ConfigureAwait(false); break; case "updated": await InvalidateExceptionAsync(exceptionEvent.TenantId, exceptionEvent.ExceptionId, cancellationToken) .ConfigureAwait(false); await WarmExceptionAsync(exceptionEvent.TenantId, exceptionEvent.ExceptionId, cancellationToken) .ConfigureAwait(false); break; case "created": await WarmExceptionAsync(exceptionEvent.TenantId, exceptionEvent.ExceptionId, cancellationToken) .ConfigureAwait(false); break; default: _logger.LogWarning("Unknown exception event type: {EventType}", exceptionEvent.EventType); break; } PolicyEngineTelemetry.RecordExceptionCacheOperation(exceptionEvent.TenantId, $"event_{exceptionEvent.EventType}"); } private async Task WarmExceptionAsync(string tenantId, string exceptionId, CancellationToken cancellationToken) { if (!Guid.TryParse(exceptionId, out var exceptionGuid)) { _logger.LogWarning("Unable to parse exception id {ExceptionId} for tenant {TenantId}", exceptionId, tenantId); return; } var exception = await _repository.GetByIdAsync(tenantId, exceptionGuid, cancellationToken) .ConfigureAwait(false); if (exception is null || exception.Status != ExceptionStatus.Active) { return; } var now = _timeProvider.GetUtcNow(); var entries = new List { new ExceptionCacheEntry { ExceptionId = exception.Id.ToString(), AssetId = string.IsNullOrWhiteSpace(exception.ProjectId) ? "*" : exception.ProjectId!, AdvisoryId = null, CveId = null, DecisionOverride = "allow", ExceptionType = "waiver", Priority = 0, EffectiveFrom = exception.CreatedAt, ExpiresAt = exception.ExpiresAt, CachedAt = now, ExceptionName = exception.Name, } }; await SetBatchAsync(tenantId, entries, cancellationToken).ConfigureAwait(false); _logger.LogDebug( "Warmed cache with {Count} entries for exception {ExceptionId}", entries.Count, exceptionId); } private async Task IncrementVersionAsync(string tenantId, CancellationToken cancellationToken) { var versionKey = GetVersionKey(tenantId); var current = await GetVersionAsync(tenantId, cancellationToken).ConfigureAwait(false); var newVersion = current + 1; var cacheOptions = new CacheEntryOptions { TimeToLive = TimeSpan.FromMinutes(_options.DefaultTtlMinutes + 10) }; await _versionCache.SetAsync(versionKey, newVersion, cacheOptions, cancellationToken).ConfigureAwait(false); return newVersion; } private async Task UpdateWarmStatsAsync(string tenantId, DateTimeOffset warmAt, int count, CancellationToken cancellationToken) { var statsKey = GetStatsKey(tenantId); var stats = new Dictionary { ["lastWarmAt"] = warmAt.ToString("O", CultureInfo.InvariantCulture), ["lastWarmCount"] = count.ToString(), }; var cacheOptions = new CacheEntryOptions { TimeToLive = TimeSpan.FromMinutes(_options.DefaultTtlMinutes + 30) }; await _statsCache.SetAsync(statsKey, stats, cacheOptions, cancellationToken).ConfigureAwait(false); } private TimeSpan ComputeTtl(ExceptionCacheEntry entry) { if (entry.ExpiresAt.HasValue) { var ttl = entry.ExpiresAt.Value - _timeProvider.GetUtcNow(); if (ttl > TimeSpan.Zero) { return ttl; } } return TimeSpan.FromMinutes(_options.DefaultTtlMinutes); } private static string GetAssetKey(string tenantId, string assetId, string? advisoryId) => $"{tenantId}:{assetId}:{advisoryId ?? "all"}"; private static string GetExceptionIndexKey(string tenantId, string exceptionId) => $"{tenantId}:idx:{exceptionId}"; private static string GetVersionKey(string tenantId) => $"{tenantId}"; private static string GetStatsKey(string tenantId) => $"{tenantId}"; }