Files
git.stella-ops.org/src/Policy/StellaOps.Policy.Engine/ExceptionCache/MessagingExceptionEffectiveCache.cs

586 lines
23 KiB
C#

using System.Collections.Immutable;
using System.Diagnostics;
using System.Globalization;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Messaging;
using StellaOps.Messaging.Abstractions;
using StellaOps.Policy.Engine.Options;
using StellaOps.Policy.Engine.Telemetry;
using StellaOps.Policy.Persistence.Postgres.Models;
using StellaOps.Policy.Persistence.Postgres.Repositories;
namespace StellaOps.Policy.Engine.ExceptionCache;
/// <summary>
/// Transport-agnostic exception effective cache using StellaOps.Messaging abstractions.
/// Works with any configured transport (Valkey, PostgreSQL, InMemory).
/// </summary>
internal sealed class MessagingExceptionEffectiveCache : IExceptionEffectiveCache
{
private readonly IDistributedCache<string, List<ExceptionCacheEntry>> _entryCache;
private readonly ISetStore<string, string> _exceptionIndex;
private readonly IDistributedCache<string, long> _versionCache;
private readonly IDistributedCache<string, Dictionary<string, string>> _statsCache;
private readonly IExceptionRepository _repository;
private readonly ILogger<MessagingExceptionEffectiveCache> _logger;
private readonly ExceptionCacheOptions _options;
private readonly TimeProvider _timeProvider;
private const string EntryKeyPrefix = "exc:entry";
private const string IndexKeyPrefix = "exc:index";
private const string VersionKeyPrefix = "exc:version";
private const string StatsKeyPrefix = "exc:stats";
public MessagingExceptionEffectiveCache(
IDistributedCacheFactory cacheFactory,
ISetStoreFactory setStoreFactory,
IExceptionRepository repository,
ILogger<MessagingExceptionEffectiveCache> logger,
IOptions<PolicyEngineOptions> options,
TimeProvider timeProvider)
{
ArgumentNullException.ThrowIfNull(cacheFactory);
ArgumentNullException.ThrowIfNull(setStoreFactory);
_entryCache = cacheFactory.Create<string, List<ExceptionCacheEntry>>(new CacheOptions { KeyPrefix = EntryKeyPrefix });
_exceptionIndex = setStoreFactory.Create<string, string>("exc-exception-index");
_versionCache = cacheFactory.Create<string, long>(new CacheOptions { KeyPrefix = VersionKeyPrefix });
_statsCache = cacheFactory.Create<string, Dictionary<string, string>>(new CacheOptions { KeyPrefix = StatsKeyPrefix });
_repository = repository ?? throw new ArgumentNullException(nameof(repository));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_options = options?.Value.ExceptionCache ?? new ExceptionCacheOptions();
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
}
public async Task<ExceptionCacheQueryResult> GetForAssetAsync(
string tenantId,
string assetId,
string? advisoryId,
DateTimeOffset asOf,
CancellationToken cancellationToken = default)
{
var sw = Stopwatch.StartNew();
var entries = new List<ExceptionCacheEntry>();
var fromCache = false;
// Try specific advisory key first
if (advisoryId is not null)
{
var specificKey = GetAssetKey(tenantId, assetId, advisoryId);
var specificResult = await _entryCache.GetAsync(specificKey, cancellationToken).ConfigureAwait(false);
if (specificResult.HasValue && specificResult.Value is not null)
{
entries.AddRange(specificResult.Value);
fromCache = true;
}
}
// Also get "all" entries (exceptions without specific advisory)
var allKey = GetAssetKey(tenantId, assetId, null);
var allResult = await _entryCache.GetAsync(allKey, cancellationToken).ConfigureAwait(false);
if (allResult.HasValue && allResult.Value is not null)
{
entries.AddRange(allResult.Value);
fromCache = true;
}
// Filter by time and sort by priority
var validEntries = entries
.Where(e => e.EffectiveFrom <= asOf && (e.ExpiresAt is null || e.ExpiresAt > asOf))
.OrderByDescending(e => e.Priority)
.ToImmutableArray();
var version = await GetVersionAsync(tenantId, cancellationToken).ConfigureAwait(false);
sw.Stop();
PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, fromCache ? "hit" : "miss");
return new ExceptionCacheQueryResult
{
Entries = validEntries,
FromCache = fromCache,
CacheVersion = version,
QueryDurationMs = sw.ElapsedMilliseconds,
};
}
public async Task<IReadOnlyDictionary<string, ExceptionCacheQueryResult>> GetBatchAsync(
string tenantId,
IReadOnlyList<string> assetIds,
DateTimeOffset asOf,
CancellationToken cancellationToken = default)
{
var results = new Dictionary<string, ExceptionCacheQueryResult>(StringComparer.OrdinalIgnoreCase);
var version = await GetVersionAsync(tenantId, cancellationToken).ConfigureAwait(false);
foreach (var assetId in assetIds)
{
var entries = ImmutableArray<ExceptionCacheEntry>.Empty;
var fromCache = false;
var allKey = GetAssetKey(tenantId, assetId, null);
var result = await _entryCache.GetAsync(allKey, cancellationToken).ConfigureAwait(false);
if (result.HasValue && result.Value is not null)
{
entries = result.Value
.Where(e => e.EffectiveFrom <= asOf && (e.ExpiresAt is null || e.ExpiresAt > asOf))
.OrderByDescending(e => e.Priority)
.ToImmutableArray();
fromCache = true;
}
results[assetId] = new ExceptionCacheQueryResult
{
Entries = entries,
FromCache = fromCache,
CacheVersion = version,
QueryDurationMs = 0,
};
}
PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "batch_get");
return results;
}
public async Task SetAsync(
string tenantId,
ExceptionCacheEntry entry,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(entry);
var assetKey = GetAssetKey(tenantId, entry.AssetId, entry.AdvisoryId);
var exceptionIndexKey = GetExceptionIndexKey(tenantId, entry.ExceptionId);
// Get existing entries for this asset
var existingResult = await _entryCache.GetAsync(assetKey, cancellationToken).ConfigureAwait(false);
var entries = existingResult.HasValue && existingResult.Value is not null
? existingResult.Value
: new List<ExceptionCacheEntry>();
// Remove existing entry for same exception if any
entries.RemoveAll(e => e.ExceptionId == entry.ExceptionId);
// Add new entry
entries.Add(entry);
var ttl = ComputeTtl(entry);
var cacheOptions = new CacheEntryOptions { TimeToLive = ttl };
// Store entry
await _entryCache.SetAsync(assetKey, entries, cacheOptions, cancellationToken).ConfigureAwait(false);
// Update exception index
await _exceptionIndex.AddAsync(exceptionIndexKey, assetKey, cancellationToken).ConfigureAwait(false);
await _exceptionIndex.SetExpirationAsync(exceptionIndexKey, ttl.Add(TimeSpan.FromMinutes(5)), cancellationToken)
.ConfigureAwait(false);
PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "set");
}
public async Task SetBatchAsync(
string tenantId,
IEnumerable<ExceptionCacheEntry> entries,
CancellationToken cancellationToken = default)
{
var count = 0;
// Group entries by asset+advisory
var groupedEntries = entries
.GroupBy(e => GetAssetKey(tenantId, e.AssetId, e.AdvisoryId))
.ToDictionary(g => g.Key, g => g.ToList());
foreach (var (assetKey, assetEntries) in groupedEntries)
{
var ttl = assetEntries.Max(ComputeTtl);
var cacheOptions = new CacheEntryOptions { TimeToLive = ttl };
await _entryCache.SetAsync(assetKey, assetEntries, cacheOptions, cancellationToken).ConfigureAwait(false);
// Update exception indexes
foreach (var entry in assetEntries)
{
var exceptionIndexKey = GetExceptionIndexKey(tenantId, entry.ExceptionId);
await _exceptionIndex.AddAsync(exceptionIndexKey, assetKey, cancellationToken).ConfigureAwait(false);
await _exceptionIndex.SetExpirationAsync(exceptionIndexKey, ttl.Add(TimeSpan.FromMinutes(5)), cancellationToken)
.ConfigureAwait(false);
}
count += assetEntries.Count;
}
// Increment version
await IncrementVersionAsync(tenantId, cancellationToken).ConfigureAwait(false);
PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "set_batch");
_logger.LogDebug("Set {Count} exception cache entries for tenant {TenantId}", count, tenantId);
}
public async Task InvalidateExceptionAsync(
string tenantId,
string exceptionId,
CancellationToken cancellationToken = default)
{
var exceptionIndexKey = GetExceptionIndexKey(tenantId, exceptionId);
// Get all asset keys affected by this exception
var assetKeys = await _exceptionIndex.GetMembersAsync(exceptionIndexKey, cancellationToken).ConfigureAwait(false);
if (assetKeys.Count > 0)
{
// For each asset key, remove entries for this exception
foreach (var assetKey in assetKeys)
{
var result = await _entryCache.GetAsync(assetKey, cancellationToken).ConfigureAwait(false);
if (result.HasValue && result.Value is not null)
{
var entries = result.Value;
entries.RemoveAll(e => e.ExceptionId == exceptionId);
if (entries.Count > 0)
{
var cacheOptions = new CacheEntryOptions
{
TimeToLive = TimeSpan.FromMinutes(_options.DefaultTtlMinutes)
};
await _entryCache.SetAsync(assetKey, entries, cacheOptions, cancellationToken).ConfigureAwait(false);
}
else
{
await _entryCache.InvalidateAsync(assetKey, cancellationToken).ConfigureAwait(false);
}
}
}
}
// Delete the exception index
await _exceptionIndex.DeleteAsync(exceptionIndexKey, cancellationToken).ConfigureAwait(false);
// Increment version
await IncrementVersionAsync(tenantId, cancellationToken).ConfigureAwait(false);
PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "invalidate_exception");
_logger.LogInformation(
"Invalidated exception {ExceptionId} affecting {Count} assets for tenant {TenantId}",
exceptionId, assetKeys.Count, tenantId);
}
public async Task InvalidateAssetAsync(
string tenantId,
string assetId,
CancellationToken cancellationToken = default)
{
// Invalidate all keys for this asset using pattern
var pattern = $"{EntryKeyPrefix}:{tenantId}:{assetId}:*";
var count = await _entryCache.InvalidateByPatternAsync(pattern, cancellationToken).ConfigureAwait(false);
// Increment version
await IncrementVersionAsync(tenantId, cancellationToken).ConfigureAwait(false);
PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "invalidate_asset");
_logger.LogDebug("Invalidated {Count} cache keys for asset {AssetId}", count, assetId);
}
public async Task InvalidateTenantAsync(
string tenantId,
CancellationToken cancellationToken = default)
{
// Invalidate all entry keys for tenant
var entryPattern = $"{EntryKeyPrefix}:{tenantId}:*";
var entryCount = await _entryCache.InvalidateByPatternAsync(entryPattern, cancellationToken).ConfigureAwait(false);
// Invalidate version and stats
var versionKey = GetVersionKey(tenantId);
await _versionCache.InvalidateAsync(versionKey, cancellationToken).ConfigureAwait(false);
var statsKey = GetStatsKey(tenantId);
await _statsCache.InvalidateAsync(statsKey, cancellationToken).ConfigureAwait(false);
PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "invalidate_tenant");
_logger.LogInformation("Invalidated {Count} cache keys for tenant {TenantId}", entryCount, tenantId);
}
public async Task WarmAsync(
string tenantId,
CancellationToken cancellationToken = default)
{
using var activity = PolicyEngineTelemetry.ActivitySource.StartActivity(
"exception.cache.warm", ActivityKind.Internal);
activity?.SetTag("tenant_id", tenantId);
var sw = Stopwatch.StartNew();
var now = _timeProvider.GetUtcNow();
_logger.LogInformation("Starting cache warm for tenant {TenantId}", tenantId);
try
{
var exceptions = await _repository.GetAllAsync(
tenantId,
ExceptionStatus.Active,
limit: _options.MaxEntriesPerTenant,
offset: 0,
cancellationToken: cancellationToken).ConfigureAwait(false);
if (exceptions.Count == 0)
{
_logger.LogDebug("No active exceptions to warm for tenant {TenantId}", tenantId);
return;
}
var entries = new List<ExceptionCacheEntry>();
foreach (var exception in exceptions)
{
entries.Add(new ExceptionCacheEntry
{
ExceptionId = exception.Id.ToString(),
AssetId = string.IsNullOrWhiteSpace(exception.ProjectId) ? "*" : exception.ProjectId!,
AdvisoryId = null,
CveId = null,
DecisionOverride = "allow",
ExceptionType = "waiver",
Priority = 0,
EffectiveFrom = exception.CreatedAt,
ExpiresAt = exception.ExpiresAt,
CachedAt = now,
ExceptionName = exception.Name,
});
}
if (entries.Count > 0)
{
await SetBatchAsync(tenantId, entries, cancellationToken).ConfigureAwait(false);
}
sw.Stop();
// Update warm stats
await UpdateWarmStatsAsync(tenantId, now, entries.Count, cancellationToken).ConfigureAwait(false);
PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "warm");
_logger.LogInformation(
"Warmed cache with {Count} entries from {ExceptionCount} exceptions for tenant {TenantId} in {Duration}ms",
entries.Count, exceptions.Count, tenantId, sw.ElapsedMilliseconds);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to warm cache for tenant {TenantId}", tenantId);
PolicyEngineTelemetry.RecordError("exception_cache_warm", tenantId);
throw;
}
}
public async Task<ExceptionCacheSummary> GetSummaryAsync(
string tenantId,
CancellationToken cancellationToken = default)
{
var now = _timeProvider.GetUtcNow();
// Note: Full summary requires scanning keys which isn't efficient with abstractions
// Return placeholder data - complete implementation would need transport-specific code
var version = await GetVersionAsync(tenantId, cancellationToken).ConfigureAwait(false);
return new ExceptionCacheSummary
{
TenantId = tenantId,
TotalEntries = 0,
UniqueExceptions = 0,
UniqueAssets = 0,
ByType = new Dictionary<string, int>(),
ByDecision = new Dictionary<string, int>(),
ExpiringWithinHour = 0,
CacheVersion = version,
ComputedAt = now,
};
}
public Task<ExceptionCacheStats> GetStatsAsync(
string? tenantId = null,
CancellationToken cancellationToken = default)
{
// Stats require implementation-specific queries that aren't available through abstractions
// Return placeholder stats - a complete implementation would need transport-specific code
return Task.FromResult(new ExceptionCacheStats
{
TotalEntries = 0,
TotalTenants = 0,
MemoryUsedBytes = null,
HitCount = 0,
MissCount = 0,
LastWarmAt = null,
LastInvalidationAt = null,
});
}
public async Task<long> GetVersionAsync(
string tenantId,
CancellationToken cancellationToken = default)
{
var versionKey = GetVersionKey(tenantId);
var result = await _versionCache.GetAsync(versionKey, cancellationToken).ConfigureAwait(false);
return result.HasValue ? result.Value : 0;
}
public async Task HandleExceptionEventAsync(
ExceptionEvent exceptionEvent,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(exceptionEvent);
using var activity = PolicyEngineTelemetry.ActivitySource.StartActivity(
"exception.cache.handle_event", ActivityKind.Internal);
activity?.SetTag("tenant_id", exceptionEvent.TenantId);
activity?.SetTag("event_type", exceptionEvent.EventType);
activity?.SetTag("exception_id", exceptionEvent.ExceptionId);
_logger.LogDebug(
"Handling exception event {EventType} for exception {ExceptionId} tenant {TenantId}",
exceptionEvent.EventType, exceptionEvent.ExceptionId, exceptionEvent.TenantId);
switch (exceptionEvent.EventType.ToLowerInvariant())
{
case "activated":
await WarmExceptionAsync(exceptionEvent.TenantId, exceptionEvent.ExceptionId, cancellationToken)
.ConfigureAwait(false);
break;
case "expired":
case "revoked":
case "deleted":
await InvalidateExceptionAsync(exceptionEvent.TenantId, exceptionEvent.ExceptionId, cancellationToken)
.ConfigureAwait(false);
break;
case "updated":
await InvalidateExceptionAsync(exceptionEvent.TenantId, exceptionEvent.ExceptionId, cancellationToken)
.ConfigureAwait(false);
await WarmExceptionAsync(exceptionEvent.TenantId, exceptionEvent.ExceptionId, cancellationToken)
.ConfigureAwait(false);
break;
case "created":
await WarmExceptionAsync(exceptionEvent.TenantId, exceptionEvent.ExceptionId, cancellationToken)
.ConfigureAwait(false);
break;
default:
_logger.LogWarning("Unknown exception event type: {EventType}", exceptionEvent.EventType);
break;
}
PolicyEngineTelemetry.RecordExceptionCacheOperation(exceptionEvent.TenantId, $"event_{exceptionEvent.EventType}");
}
private async Task WarmExceptionAsync(string tenantId, string exceptionId, CancellationToken cancellationToken)
{
if (!Guid.TryParse(exceptionId, out var exceptionGuid))
{
_logger.LogWarning("Unable to parse exception id {ExceptionId} for tenant {TenantId}", exceptionId, tenantId);
return;
}
var exception = await _repository.GetByIdAsync(tenantId, exceptionGuid, cancellationToken)
.ConfigureAwait(false);
if (exception is null || exception.Status != ExceptionStatus.Active)
{
return;
}
var now = _timeProvider.GetUtcNow();
var entries = new List<ExceptionCacheEntry>
{
new ExceptionCacheEntry
{
ExceptionId = exception.Id.ToString(),
AssetId = string.IsNullOrWhiteSpace(exception.ProjectId) ? "*" : exception.ProjectId!,
AdvisoryId = null,
CveId = null,
DecisionOverride = "allow",
ExceptionType = "waiver",
Priority = 0,
EffectiveFrom = exception.CreatedAt,
ExpiresAt = exception.ExpiresAt,
CachedAt = now,
ExceptionName = exception.Name,
}
};
await SetBatchAsync(tenantId, entries, cancellationToken).ConfigureAwait(false);
_logger.LogDebug(
"Warmed cache with {Count} entries for exception {ExceptionId}",
entries.Count, exceptionId);
}
private async Task<long> IncrementVersionAsync(string tenantId, CancellationToken cancellationToken)
{
var versionKey = GetVersionKey(tenantId);
var current = await GetVersionAsync(tenantId, cancellationToken).ConfigureAwait(false);
var newVersion = current + 1;
var cacheOptions = new CacheEntryOptions
{
TimeToLive = TimeSpan.FromMinutes(_options.DefaultTtlMinutes + 10)
};
await _versionCache.SetAsync(versionKey, newVersion, cacheOptions, cancellationToken).ConfigureAwait(false);
return newVersion;
}
private async Task UpdateWarmStatsAsync(string tenantId, DateTimeOffset warmAt, int count, CancellationToken cancellationToken)
{
var statsKey = GetStatsKey(tenantId);
var stats = new Dictionary<string, string>
{
["lastWarmAt"] = warmAt.ToString("O", CultureInfo.InvariantCulture),
["lastWarmCount"] = count.ToString(),
};
var cacheOptions = new CacheEntryOptions
{
TimeToLive = TimeSpan.FromMinutes(_options.DefaultTtlMinutes + 30)
};
await _statsCache.SetAsync(statsKey, stats, cacheOptions, cancellationToken).ConfigureAwait(false);
}
private TimeSpan ComputeTtl(ExceptionCacheEntry entry)
{
if (entry.ExpiresAt.HasValue)
{
var ttl = entry.ExpiresAt.Value - _timeProvider.GetUtcNow();
if (ttl > TimeSpan.Zero)
{
return ttl;
}
}
return TimeSpan.FromMinutes(_options.DefaultTtlMinutes);
}
private static string GetAssetKey(string tenantId, string assetId, string? advisoryId) =>
$"{tenantId}:{assetId}:{advisoryId ?? "all"}";
private static string GetExceptionIndexKey(string tenantId, string exceptionId) =>
$"{tenantId}:idx:{exceptionId}";
private static string GetVersionKey(string tenantId) =>
$"{tenantId}";
private static string GetStatsKey(string tenantId) =>
$"{tenantId}";
}