up
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-11-28 09:40:40 +02:00
parent 1c6730a1d2
commit 05da719048
206 changed files with 34741 additions and 1751 deletions

View File

@@ -0,0 +1,225 @@
using System.Collections.Immutable;
using System.Text.Json.Serialization;
namespace StellaOps.Policy.Engine.ExceptionCache;
/// <summary>
/// Cached exception entry for fast lookup during policy evaluation.
/// </summary>
public sealed record ExceptionCacheEntry
{
/// <summary>
/// Exception identifier.
/// </summary>
[JsonPropertyName("exception_id")]
public required string ExceptionId { get; init; }
/// <summary>
/// Asset identifier this exception applies to.
/// </summary>
[JsonPropertyName("asset_id")]
public required string AssetId { get; init; }
/// <summary>
/// Advisory ID covered (null if applies to all advisories for asset).
/// </summary>
[JsonPropertyName("advisory_id")]
public string? AdvisoryId { get; init; }
/// <summary>
/// CVE ID covered (null if applies to all CVEs for asset).
/// </summary>
[JsonPropertyName("cve_id")]
public string? CveId { get; init; }
/// <summary>
/// Decision override applied by this exception.
/// </summary>
[JsonPropertyName("decision_override")]
public required string DecisionOverride { get; init; }
/// <summary>
/// Exception type: waiver, override, temporary, permanent.
/// </summary>
[JsonPropertyName("exception_type")]
public required string ExceptionType { get; init; }
/// <summary>
/// Priority for conflict resolution (higher = more precedence).
/// </summary>
[JsonPropertyName("priority")]
public int Priority { get; init; }
/// <summary>
/// When the exception becomes effective.
/// </summary>
[JsonPropertyName("effective_from")]
public DateTimeOffset EffectiveFrom { get; init; }
/// <summary>
/// When the exception expires (null = no expiration).
/// </summary>
[JsonPropertyName("expires_at")]
public DateTimeOffset? ExpiresAt { get; init; }
/// <summary>
/// When this cache entry was created.
/// </summary>
[JsonPropertyName("cached_at")]
public DateTimeOffset CachedAt { get; init; }
/// <summary>
/// Original exception name for display.
/// </summary>
[JsonPropertyName("exception_name")]
public string? ExceptionName { get; init; }
}
/// <summary>
/// Result of querying exceptions for an asset.
/// </summary>
public sealed record ExceptionCacheQueryResult
{
/// <summary>
/// Applicable exceptions for the asset, ordered by priority (highest first).
/// </summary>
public required ImmutableArray<ExceptionCacheEntry> Entries { get; init; }
/// <summary>
/// Whether the result came from cache.
/// </summary>
public bool FromCache { get; init; }
/// <summary>
/// Cache version at time of query.
/// </summary>
public long CacheVersion { get; init; }
/// <summary>
/// Time taken to query in milliseconds.
/// </summary>
public long QueryDurationMs { get; init; }
}
/// <summary>
/// Summary of cached exceptions for a tenant.
/// </summary>
public sealed record ExceptionCacheSummary
{
/// <summary>
/// Tenant identifier.
/// </summary>
public required string TenantId { get; init; }
/// <summary>
/// Total cached exception entries.
/// </summary>
public int TotalEntries { get; init; }
/// <summary>
/// Unique exceptions in cache.
/// </summary>
public int UniqueExceptions { get; init; }
/// <summary>
/// Unique assets with exceptions.
/// </summary>
public int UniqueAssets { get; init; }
/// <summary>
/// Counts by exception type.
/// </summary>
public required IReadOnlyDictionary<string, int> ByType { get; init; }
/// <summary>
/// Counts by decision override.
/// </summary>
public required IReadOnlyDictionary<string, int> ByDecision { get; init; }
/// <summary>
/// Entries expiring within the next hour.
/// </summary>
public int ExpiringWithinHour { get; init; }
/// <summary>
/// Cache version.
/// </summary>
public long CacheVersion { get; init; }
/// <summary>
/// When summary was computed.
/// </summary>
public DateTimeOffset ComputedAt { get; init; }
}
/// <summary>
/// Options for exception cache operations.
/// </summary>
public sealed record ExceptionCacheOptions
{
/// <summary>
/// Default TTL for cache entries in minutes.
/// </summary>
public int DefaultTtlMinutes { get; set; } = 60;
/// <summary>
/// Whether to enable automatic cache warming.
/// </summary>
public bool EnableAutoWarm { get; set; } = true;
/// <summary>
/// Warm interval in minutes.
/// </summary>
public int WarmIntervalMinutes { get; set; } = 15;
/// <summary>
/// Maximum entries per tenant.
/// </summary>
public int MaxEntriesPerTenant { get; set; } = 50000;
/// <summary>
/// Whether to invalidate cache on exception events.
/// </summary>
public bool InvalidateOnEvents { get; set; } = true;
}
/// <summary>
/// Statistics for the exception cache.
/// </summary>
public sealed record ExceptionCacheStats
{
/// <summary>
/// Total entries in cache.
/// </summary>
public int TotalEntries { get; init; }
/// <summary>
/// Total tenants with cached data.
/// </summary>
public int TotalTenants { get; init; }
/// <summary>
/// Memory used by cache in bytes (if available).
/// </summary>
public long? MemoryUsedBytes { get; init; }
/// <summary>
/// Cache hit count since last reset.
/// </summary>
public long HitCount { get; init; }
/// <summary>
/// Cache miss count since last reset.
/// </summary>
public long MissCount { get; init; }
/// <summary>
/// Last warm operation timestamp.
/// </summary>
public DateTimeOffset? LastWarmAt { get; init; }
/// <summary>
/// Last invalidation timestamp.
/// </summary>
public DateTimeOffset? LastInvalidationAt { get; init; }
}

View File

@@ -0,0 +1,156 @@
using System.Collections.Immutable;
namespace StellaOps.Policy.Engine.ExceptionCache;
/// <summary>
/// Interface for caching effective exception decisions per asset.
/// Supports warm/invalidation logic reacting to exception events.
/// </summary>
internal interface IExceptionEffectiveCache
{
/// <summary>
/// Gets applicable exceptions for an asset at a given time.
/// </summary>
Task<ExceptionCacheQueryResult> GetForAssetAsync(
string tenantId,
string assetId,
string? advisoryId,
DateTimeOffset asOf,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets applicable exceptions for multiple assets.
/// </summary>
Task<IReadOnlyDictionary<string, ExceptionCacheQueryResult>> GetBatchAsync(
string tenantId,
IReadOnlyList<string> assetIds,
DateTimeOffset asOf,
CancellationToken cancellationToken = default);
/// <summary>
/// Sets a cache entry.
/// </summary>
Task SetAsync(
string tenantId,
ExceptionCacheEntry entry,
CancellationToken cancellationToken = default);
/// <summary>
/// Sets multiple cache entries in batch.
/// </summary>
Task SetBatchAsync(
string tenantId,
IEnumerable<ExceptionCacheEntry> entries,
CancellationToken cancellationToken = default);
/// <summary>
/// Invalidates cache entries for an exception.
/// Called when an exception is modified/revoked/expired.
/// </summary>
Task InvalidateExceptionAsync(
string tenantId,
string exceptionId,
CancellationToken cancellationToken = default);
/// <summary>
/// Invalidates cache entries for an asset.
/// Called when asset exceptions need re-evaluation.
/// </summary>
Task InvalidateAssetAsync(
string tenantId,
string assetId,
CancellationToken cancellationToken = default);
/// <summary>
/// Invalidates all cache entries for a tenant.
/// </summary>
Task InvalidateTenantAsync(
string tenantId,
CancellationToken cancellationToken = default);
/// <summary>
/// Warms the cache for a tenant by loading active exceptions from the repository.
/// </summary>
Task WarmAsync(
string tenantId,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets cache summary for a tenant.
/// </summary>
Task<ExceptionCacheSummary> GetSummaryAsync(
string tenantId,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets cache statistics.
/// </summary>
Task<ExceptionCacheStats> GetStatsAsync(
string? tenantId = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the current cache version for a tenant.
/// </summary>
Task<long> GetVersionAsync(
string tenantId,
CancellationToken cancellationToken = default);
/// <summary>
/// Processes an exception event and updates cache accordingly.
/// </summary>
Task HandleExceptionEventAsync(
ExceptionEvent exceptionEvent,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Event representing a change to an exception.
/// </summary>
public sealed record ExceptionEvent
{
/// <summary>
/// Event type: activated, expired, revoked, updated, created, deleted.
/// </summary>
public required string EventType { get; init; }
/// <summary>
/// Tenant identifier.
/// </summary>
public required string TenantId { get; init; }
/// <summary>
/// Exception identifier.
/// </summary>
public required string ExceptionId { get; init; }
/// <summary>
/// Exception name.
/// </summary>
public string? ExceptionName { get; init; }
/// <summary>
/// Exception type.
/// </summary>
public string? ExceptionType { get; init; }
/// <summary>
/// Affected asset IDs (if known).
/// </summary>
public ImmutableArray<string> AffectedAssetIds { get; init; } = ImmutableArray<string>.Empty;
/// <summary>
/// Affected advisory IDs (if known).
/// </summary>
public ImmutableArray<string> AffectedAdvisoryIds { get; init; } = ImmutableArray<string>.Empty;
/// <summary>
/// When the event occurred.
/// </summary>
public DateTimeOffset OccurredAt { get; init; }
/// <summary>
/// Correlation ID for tracing.
/// </summary>
public string? CorrelationId { get; init; }
}

View File

@@ -0,0 +1,725 @@
using System.Collections.Immutable;
using System.Diagnostics;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Policy.Engine.Options;
using StellaOps.Policy.Engine.Storage.Mongo.Repositories;
using StellaOps.Policy.Engine.Telemetry;
using StackExchange.Redis;
namespace StellaOps.Policy.Engine.ExceptionCache;
/// <summary>
/// Redis-backed exception effective cache with warm/invalidation support.
/// Key structure:
/// - Entry by asset: stellaops:exc:{tenant}:a:{asset}:{advisory|all} -> JSON array of entries
/// - Entry by exception: stellaops:exc:{tenant}:e:{exceptionId} -> JSON entry
/// - Index by exception: stellaops:exc:{tenant}:idx:e:{exceptionId} -> set of asset keys
/// - Version: stellaops:exc:{tenant}:v -> integer version
/// - Stats: stellaops:exc:{tenant}:stats -> JSON stats
/// </summary>
internal sealed class RedisExceptionEffectiveCache : IExceptionEffectiveCache
{
private readonly IConnectionMultiplexer _redis;
private readonly IExceptionRepository _repository;
private readonly ILogger<RedisExceptionEffectiveCache> _logger;
private readonly ExceptionCacheOptions _options;
private readonly TimeProvider _timeProvider;
private const string KeyPrefix = "stellaops:exc";
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false,
};
public RedisExceptionEffectiveCache(
IConnectionMultiplexer redis,
IExceptionRepository repository,
ILogger<RedisExceptionEffectiveCache> logger,
IOptions<PolicyEngineOptions> options,
TimeProvider timeProvider)
{
_redis = redis ?? throw new ArgumentNullException(nameof(redis));
_repository = repository ?? throw new ArgumentNullException(nameof(repository));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_options = options?.Value.ExceptionCache ?? new ExceptionCacheOptions();
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
}
public async Task<ExceptionCacheQueryResult> GetForAssetAsync(
string tenantId,
string assetId,
string? advisoryId,
DateTimeOffset asOf,
CancellationToken cancellationToken = default)
{
var sw = Stopwatch.StartNew();
var db = _redis.GetDatabase();
// Try specific advisory key first, then fall back to "all"
var entries = new List<ExceptionCacheEntry>();
var fromCache = false;
if (advisoryId is not null)
{
var specificKey = GetAssetKey(tenantId, assetId, advisoryId);
var specificJson = await db.StringGetAsync(specificKey).ConfigureAwait(false);
if (specificJson.HasValue)
{
var specificEntries = JsonSerializer.Deserialize<List<ExceptionCacheEntry>>((string)specificJson!, JsonOptions);
if (specificEntries is not null)
{
entries.AddRange(specificEntries);
fromCache = true;
}
}
}
// Also get "all" entries (exceptions without specific advisory)
var allKey = GetAssetKey(tenantId, assetId, null);
var allJson = await db.StringGetAsync(allKey).ConfigureAwait(false);
if (allJson.HasValue)
{
var allEntries = JsonSerializer.Deserialize<List<ExceptionCacheEntry>>((string)allJson!, JsonOptions);
if (allEntries is not null)
{
entries.AddRange(allEntries);
fromCache = true;
}
}
// Filter by time and sort by priority
var validEntries = entries
.Where(e => e.EffectiveFrom <= asOf && (e.ExpiresAt is null || e.ExpiresAt > asOf))
.OrderByDescending(e => e.Priority)
.ToImmutableArray();
var version = await GetVersionAsync(tenantId, cancellationToken).ConfigureAwait(false);
sw.Stop();
PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, fromCache ? "hit" : "miss");
return new ExceptionCacheQueryResult
{
Entries = validEntries,
FromCache = fromCache,
CacheVersion = version,
QueryDurationMs = sw.ElapsedMilliseconds,
};
}
public async Task<IReadOnlyDictionary<string, ExceptionCacheQueryResult>> GetBatchAsync(
string tenantId,
IReadOnlyList<string> assetIds,
DateTimeOffset asOf,
CancellationToken cancellationToken = default)
{
var results = new Dictionary<string, ExceptionCacheQueryResult>(StringComparer.OrdinalIgnoreCase);
var db = _redis.GetDatabase();
// Get all "all" keys for assets
var keys = assetIds.Select(id => (RedisKey)GetAssetKey(tenantId, id, null)).ToArray();
var values = await db.StringGetAsync(keys).ConfigureAwait(false);
var version = await GetVersionAsync(tenantId, cancellationToken).ConfigureAwait(false);
for (int i = 0; i < assetIds.Count; i++)
{
var entries = ImmutableArray<ExceptionCacheEntry>.Empty;
var fromCache = false;
if (values[i].HasValue)
{
var cachedEntries = JsonSerializer.Deserialize<List<ExceptionCacheEntry>>((string)values[i]!, JsonOptions);
if (cachedEntries is not null)
{
entries = cachedEntries
.Where(e => e.EffectiveFrom <= asOf && (e.ExpiresAt is null || e.ExpiresAt > asOf))
.OrderByDescending(e => e.Priority)
.ToImmutableArray();
fromCache = true;
}
}
results[assetIds[i]] = new ExceptionCacheQueryResult
{
Entries = entries,
FromCache = fromCache,
CacheVersion = version,
QueryDurationMs = 0,
};
}
PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "batch_get");
return results;
}
public async Task SetAsync(
string tenantId,
ExceptionCacheEntry entry,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(entry);
var db = _redis.GetDatabase();
var assetKey = GetAssetKey(tenantId, entry.AssetId, entry.AdvisoryId);
var exceptionIndexKey = GetExceptionIndexKey(tenantId, entry.ExceptionId);
// Get existing entries for this asset
var existingJson = await db.StringGetAsync(assetKey).ConfigureAwait(false);
var entries = existingJson.HasValue
? JsonSerializer.Deserialize<List<ExceptionCacheEntry>>((string)existingJson!, JsonOptions) ?? new List<ExceptionCacheEntry>()
: new List<ExceptionCacheEntry>();
// Remove existing entry for same exception if any
entries.RemoveAll(e => e.ExceptionId == entry.ExceptionId);
// Add new entry
entries.Add(entry);
var ttl = ComputeTtl(entry);
var json = JsonSerializer.Serialize(entries, JsonOptions);
var tasks = new List<Task>
{
db.StringSetAsync(assetKey, json, ttl),
db.SetAddAsync(exceptionIndexKey, assetKey),
db.KeyExpireAsync(exceptionIndexKey, ttl + TimeSpan.FromMinutes(5)),
};
await Task.WhenAll(tasks).ConfigureAwait(false);
PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "set");
}
public async Task SetBatchAsync(
string tenantId,
IEnumerable<ExceptionCacheEntry> entries,
CancellationToken cancellationToken = default)
{
var db = _redis.GetDatabase();
var batch = db.CreateBatch();
var count = 0;
// Group entries by asset+advisory
var groupedEntries = entries
.GroupBy(e => GetAssetKey(tenantId, e.AssetId, e.AdvisoryId))
.ToDictionary(g => g.Key, g => g.ToList());
foreach (var (assetKey, assetEntries) in groupedEntries)
{
var ttl = assetEntries.Max(e => ComputeTtl(e));
var json = JsonSerializer.Serialize(assetEntries, JsonOptions);
_ = batch.StringSetAsync(assetKey, json, ttl);
// Update exception indexes
foreach (var entry in assetEntries)
{
var exceptionIndexKey = GetExceptionIndexKey(tenantId, entry.ExceptionId);
_ = batch.SetAddAsync(exceptionIndexKey, assetKey);
_ = batch.KeyExpireAsync(exceptionIndexKey, ttl + TimeSpan.FromMinutes(5));
}
count += assetEntries.Count;
}
batch.Execute();
// Increment version
await IncrementVersionAsync(tenantId, cancellationToken).ConfigureAwait(false);
PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "set_batch");
_logger.LogDebug("Set {Count} exception cache entries for tenant {TenantId}", count, tenantId);
}
public async Task InvalidateExceptionAsync(
string tenantId,
string exceptionId,
CancellationToken cancellationToken = default)
{
var db = _redis.GetDatabase();
var exceptionIndexKey = GetExceptionIndexKey(tenantId, exceptionId);
// Get all asset keys affected by this exception
var assetKeys = await db.SetMembersAsync(exceptionIndexKey).ConfigureAwait(false);
if (assetKeys.Length > 0)
{
// For each asset key, remove entries for this exception
foreach (var assetKey in assetKeys)
{
var json = await db.StringGetAsync((string)assetKey!).ConfigureAwait(false);
if (json.HasValue)
{
var entries = JsonSerializer.Deserialize<List<ExceptionCacheEntry>>((string)json!, JsonOptions);
if (entries is not null)
{
entries.RemoveAll(e => e.ExceptionId == exceptionId);
if (entries.Count > 0)
{
await db.StringSetAsync((string)assetKey!, JsonSerializer.Serialize(entries, JsonOptions))
.ConfigureAwait(false);
}
else
{
await db.KeyDeleteAsync((string)assetKey!).ConfigureAwait(false);
}
}
}
}
}
// Delete the exception index
await db.KeyDeleteAsync(exceptionIndexKey).ConfigureAwait(false);
// Increment version
await IncrementVersionAsync(tenantId, cancellationToken).ConfigureAwait(false);
PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "invalidate_exception");
_logger.LogInformation(
"Invalidated exception {ExceptionId} affecting {Count} assets for tenant {TenantId}",
exceptionId, assetKeys.Length, tenantId);
}
public async Task InvalidateAssetAsync(
string tenantId,
string assetId,
CancellationToken cancellationToken = default)
{
var db = _redis.GetDatabase();
var server = _redis.GetServer(_redis.GetEndPoints().First());
// Find all keys for this asset (all advisory variants)
var pattern = $"{KeyPrefix}:{tenantId}:a:{assetId}:*";
var keys = server.Keys(pattern: pattern).ToArray();
if (keys.Length > 0)
{
await db.KeyDeleteAsync(keys).ConfigureAwait(false);
}
// Increment version
await IncrementVersionAsync(tenantId, cancellationToken).ConfigureAwait(false);
PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "invalidate_asset");
_logger.LogDebug("Invalidated {Count} cache keys for asset {AssetId}", keys.Length, assetId);
}
public async Task InvalidateTenantAsync(
string tenantId,
CancellationToken cancellationToken = default)
{
var server = _redis.GetServer(_redis.GetEndPoints().First());
var pattern = $"{KeyPrefix}:{tenantId}:*";
var keys = server.Keys(pattern: pattern).ToArray();
if (keys.Length > 0)
{
var db = _redis.GetDatabase();
await db.KeyDeleteAsync(keys).ConfigureAwait(false);
}
PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "invalidate_tenant");
_logger.LogInformation("Invalidated {Count} cache keys for tenant {TenantId}", keys.Length, tenantId);
}
public async Task WarmAsync(
string tenantId,
CancellationToken cancellationToken = default)
{
using var activity = PolicyEngineTelemetry.ActivitySource.StartActivity(
"exception.cache.warm", ActivityKind.Internal);
activity?.SetTag("tenant_id", tenantId);
var sw = Stopwatch.StartNew();
var now = _timeProvider.GetUtcNow();
_logger.LogInformation("Starting cache warm for tenant {TenantId}", tenantId);
try
{
// Get all active exceptions from repository
var exceptions = await _repository.ListExceptionsAsync(
tenantId,
new ExceptionQueryOptions
{
Statuses = ImmutableArray.Create("active"),
IncludeExpired = false,
Limit = _options.MaxEntriesPerTenant,
},
cancellationToken).ConfigureAwait(false);
if (exceptions.Length == 0)
{
_logger.LogDebug("No active exceptions to warm for tenant {TenantId}", tenantId);
return;
}
// Get bindings for all exceptions
var entries = new List<ExceptionCacheEntry>();
foreach (var exception in exceptions)
{
var bindings = await _repository.GetBindingsForExceptionAsync(
tenantId, exception.Id, cancellationToken).ConfigureAwait(false);
foreach (var binding in bindings.Where(b => b.Status == "active"))
{
entries.Add(new ExceptionCacheEntry
{
ExceptionId = exception.Id,
AssetId = binding.AssetId,
AdvisoryId = binding.AdvisoryId,
CveId = binding.CveId,
DecisionOverride = binding.DecisionOverride,
ExceptionType = exception.ExceptionType,
Priority = exception.Priority,
EffectiveFrom = binding.EffectiveFrom,
ExpiresAt = binding.ExpiresAt ?? exception.ExpiresAt,
CachedAt = now,
ExceptionName = exception.Name,
});
}
// Also add entries for scope-based exceptions without explicit bindings
if (exception.Scope.ApplyToAll || exception.Scope.AssetIds.Count > 0)
{
foreach (var assetId in exception.Scope.AssetIds)
{
foreach (var advisoryId in exception.Scope.AdvisoryIds.DefaultIfEmpty(null!))
{
entries.Add(new ExceptionCacheEntry
{
ExceptionId = exception.Id,
AssetId = assetId,
AdvisoryId = advisoryId,
CveId = null,
DecisionOverride = "allow",
ExceptionType = exception.ExceptionType,
Priority = exception.Priority,
EffectiveFrom = exception.EffectiveFrom ?? exception.CreatedAt,
ExpiresAt = exception.ExpiresAt,
CachedAt = now,
ExceptionName = exception.Name,
});
}
}
}
}
if (entries.Count > 0)
{
await SetBatchAsync(tenantId, entries, cancellationToken).ConfigureAwait(false);
}
sw.Stop();
// Update warm stats
await UpdateWarmStatsAsync(tenantId, now, entries.Count).ConfigureAwait(false);
PolicyEngineTelemetry.RecordExceptionCacheOperation(tenantId, "warm");
_logger.LogInformation(
"Warmed cache with {Count} entries from {ExceptionCount} exceptions for tenant {TenantId} in {Duration}ms",
entries.Count, exceptions.Length, tenantId, sw.ElapsedMilliseconds);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to warm cache for tenant {TenantId}", tenantId);
PolicyEngineTelemetry.RecordError("exception_cache_warm", tenantId);
throw;
}
}
public async Task<ExceptionCacheSummary> GetSummaryAsync(
string tenantId,
CancellationToken cancellationToken = default)
{
var server = _redis.GetServer(_redis.GetEndPoints().First());
var db = _redis.GetDatabase();
var now = _timeProvider.GetUtcNow();
// Count asset keys
var assetPattern = $"{KeyPrefix}:{tenantId}:a:*";
var assetKeys = server.Keys(pattern: assetPattern).ToArray();
// Count exception index keys
var exceptionPattern = $"{KeyPrefix}:{tenantId}:idx:e:*";
var exceptionKeys = server.Keys(pattern: exceptionPattern).ToArray();
// Aggregate stats
var byType = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase);
var byDecision = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase);
var totalEntries = 0;
var expiringWithinHour = 0;
var uniqueAssets = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var key in assetKeys.Take(1000)) // Limit scan for performance
{
var json = await db.StringGetAsync(key).ConfigureAwait(false);
if (!json.HasValue) continue;
var entries = JsonSerializer.Deserialize<List<ExceptionCacheEntry>>((string)json!, JsonOptions);
if (entries is null) continue;
foreach (var entry in entries)
{
totalEntries++;
uniqueAssets.Add(entry.AssetId);
byType.TryGetValue(entry.ExceptionType, out var typeCount);
byType[entry.ExceptionType] = typeCount + 1;
byDecision.TryGetValue(entry.DecisionOverride, out var decisionCount);
byDecision[entry.DecisionOverride] = decisionCount + 1;
if (entry.ExpiresAt.HasValue && entry.ExpiresAt.Value - now <= TimeSpan.FromHours(1))
{
expiringWithinHour++;
}
}
}
var version = await GetVersionAsync(tenantId, cancellationToken).ConfigureAwait(false);
return new ExceptionCacheSummary
{
TenantId = tenantId,
TotalEntries = totalEntries,
UniqueExceptions = exceptionKeys.Length,
UniqueAssets = uniqueAssets.Count,
ByType = byType,
ByDecision = byDecision,
ExpiringWithinHour = expiringWithinHour,
CacheVersion = version,
ComputedAt = now,
};
}
public async Task<ExceptionCacheStats> GetStatsAsync(
string? tenantId = null,
CancellationToken cancellationToken = default)
{
var server = _redis.GetServer(_redis.GetEndPoints().First());
var pattern = tenantId != null
? $"{KeyPrefix}:{tenantId}:a:*"
: $"{KeyPrefix}:*:a:*";
var entryCount = server.Keys(pattern: pattern).Count();
var tenantPattern = tenantId != null
? $"{KeyPrefix}:{tenantId}:v"
: $"{KeyPrefix}:*:v";
var tenantCount = server.Keys(pattern: tenantPattern).Count();
long? memoryUsed = null;
try
{
var info = server.Info("memory");
var memorySection = info.FirstOrDefault(s => s.Key == "Memory");
if (memorySection is not null)
{
var usedMemory = memorySection.FirstOrDefault(p => p.Key == "used_memory");
if (usedMemory.Key is not null && long.TryParse(usedMemory.Value, out var bytes))
{
memoryUsed = bytes;
}
}
}
catch
{
// Ignore - memory info not available
}
return new ExceptionCacheStats
{
TotalEntries = entryCount,
TotalTenants = tenantCount,
MemoryUsedBytes = memoryUsed,
HitCount = 0, // Would need to track separately
MissCount = 0,
LastWarmAt = null,
LastInvalidationAt = null,
};
}
public async Task<long> GetVersionAsync(
string tenantId,
CancellationToken cancellationToken = default)
{
var db = _redis.GetDatabase();
var versionKey = GetVersionKey(tenantId);
var version = await db.StringGetAsync(versionKey).ConfigureAwait(false);
return version.HasValue ? (long)version : 0;
}
public async Task HandleExceptionEventAsync(
ExceptionEvent exceptionEvent,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(exceptionEvent);
using var activity = PolicyEngineTelemetry.ActivitySource.StartActivity(
"exception.cache.handle_event", ActivityKind.Internal);
activity?.SetTag("tenant_id", exceptionEvent.TenantId);
activity?.SetTag("event_type", exceptionEvent.EventType);
activity?.SetTag("exception_id", exceptionEvent.ExceptionId);
_logger.LogDebug(
"Handling exception event {EventType} for exception {ExceptionId} tenant {TenantId}",
exceptionEvent.EventType, exceptionEvent.ExceptionId, exceptionEvent.TenantId);
switch (exceptionEvent.EventType.ToLowerInvariant())
{
case "activated":
// Warm the cache with the new exception
await WarmExceptionAsync(exceptionEvent.TenantId, exceptionEvent.ExceptionId, cancellationToken)
.ConfigureAwait(false);
break;
case "expired":
case "revoked":
case "deleted":
// Invalidate cache entries for this exception
await InvalidateExceptionAsync(exceptionEvent.TenantId, exceptionEvent.ExceptionId, cancellationToken)
.ConfigureAwait(false);
break;
case "updated":
// Invalidate and re-warm
await InvalidateExceptionAsync(exceptionEvent.TenantId, exceptionEvent.ExceptionId, cancellationToken)
.ConfigureAwait(false);
await WarmExceptionAsync(exceptionEvent.TenantId, exceptionEvent.ExceptionId, cancellationToken)
.ConfigureAwait(false);
break;
case "created":
// Only warm if already active
var exception = await _repository.GetExceptionAsync(
exceptionEvent.TenantId, exceptionEvent.ExceptionId, cancellationToken).ConfigureAwait(false);
if (exception?.Status == "active")
{
await WarmExceptionAsync(exceptionEvent.TenantId, exceptionEvent.ExceptionId, cancellationToken)
.ConfigureAwait(false);
}
break;
default:
_logger.LogWarning("Unknown exception event type: {EventType}", exceptionEvent.EventType);
break;
}
PolicyEngineTelemetry.RecordExceptionCacheOperation(exceptionEvent.TenantId, $"event_{exceptionEvent.EventType}");
}
private async Task WarmExceptionAsync(string tenantId, string exceptionId, CancellationToken cancellationToken)
{
var exception = await _repository.GetExceptionAsync(tenantId, exceptionId, cancellationToken)
.ConfigureAwait(false);
if (exception is null || exception.Status != "active")
{
return;
}
var now = _timeProvider.GetUtcNow();
var entries = new List<ExceptionCacheEntry>();
var bindings = await _repository.GetBindingsForExceptionAsync(tenantId, exceptionId, cancellationToken)
.ConfigureAwait(false);
foreach (var binding in bindings.Where(b => b.Status == "active"))
{
entries.Add(new ExceptionCacheEntry
{
ExceptionId = exception.Id,
AssetId = binding.AssetId,
AdvisoryId = binding.AdvisoryId,
CveId = binding.CveId,
DecisionOverride = binding.DecisionOverride,
ExceptionType = exception.ExceptionType,
Priority = exception.Priority,
EffectiveFrom = binding.EffectiveFrom,
ExpiresAt = binding.ExpiresAt ?? exception.ExpiresAt,
CachedAt = now,
ExceptionName = exception.Name,
});
}
if (entries.Count > 0)
{
await SetBatchAsync(tenantId, entries, cancellationToken).ConfigureAwait(false);
}
_logger.LogDebug(
"Warmed cache with {Count} entries for exception {ExceptionId}",
entries.Count, exceptionId);
}
private async Task<long> IncrementVersionAsync(string tenantId, CancellationToken cancellationToken)
{
var db = _redis.GetDatabase();
var versionKey = GetVersionKey(tenantId);
var newVersion = await db.StringIncrementAsync(versionKey).ConfigureAwait(false);
// Set TTL on version key if not already set
await db.KeyExpireAsync(versionKey, TimeSpan.FromMinutes(_options.DefaultTtlMinutes + 10), ExpireWhen.HasNoExpiry)
.ConfigureAwait(false);
return newVersion;
}
private async Task UpdateWarmStatsAsync(string tenantId, DateTimeOffset warmAt, int count)
{
var db = _redis.GetDatabase();
var statsKey = GetStatsKey(tenantId);
var stats = new Dictionary<string, string>
{
["lastWarmAt"] = warmAt.ToString("O"),
["lastWarmCount"] = count.ToString(),
};
await db.HashSetAsync(statsKey, stats.Select(kv => new HashEntry(kv.Key, kv.Value)).ToArray())
.ConfigureAwait(false);
}
private TimeSpan ComputeTtl(ExceptionCacheEntry entry)
{
if (entry.ExpiresAt.HasValue)
{
var ttl = entry.ExpiresAt.Value - _timeProvider.GetUtcNow();
if (ttl > TimeSpan.Zero)
{
return ttl;
}
}
return TimeSpan.FromMinutes(_options.DefaultTtlMinutes);
}
private static string GetAssetKey(string tenantId, string assetId, string? advisoryId) =>
$"{KeyPrefix}:{tenantId}:a:{assetId}:{advisoryId ?? "all"}";
private static string GetExceptionIndexKey(string tenantId, string exceptionId) =>
$"{KeyPrefix}:{tenantId}:idx:e:{exceptionId}";
private static string GetVersionKey(string tenantId) =>
$"{KeyPrefix}:{tenantId}:v";
private static string GetStatsKey(string tenantId) =>
$"{KeyPrefix}:{tenantId}:stats";
}