Files
git.stella-ops.org/src/Policy/StellaOps.Policy.Engine/EffectiveDecisionMap/RedisEffectiveDecisionMap.cs
StellaOps Bot 05da719048
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
up
2025-11-28 09:41:08 +02:00

502 lines
18 KiB
C#

using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Policy.Engine.Options;
using StellaOps.Policy.Engine.Telemetry;
using StackExchange.Redis;
namespace StellaOps.Policy.Engine.EffectiveDecisionMap;
/// <summary>
/// Redis-backed effective decision map with versioning and TTL-based eviction.
/// Key structure:
/// - Entry: stellaops:edm:{tenant}:{snapshot}:e:{asset} -> JSON entry
/// - Version: stellaops:edm:{tenant}:{snapshot}:v -> integer version
/// - Index: stellaops:edm:{tenant}:{snapshot}:idx -> sorted set of assets by evaluated_at
/// </summary>
internal sealed class RedisEffectiveDecisionMap : IEffectiveDecisionMap
{
private readonly IConnectionMultiplexer _redis;
private readonly ILogger<RedisEffectiveDecisionMap> _logger;
private readonly EffectiveDecisionMapOptions _options;
private readonly TimeProvider _timeProvider;
private const string KeyPrefix = "stellaops:edm";
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false,
};
public RedisEffectiveDecisionMap(
IConnectionMultiplexer redis,
ILogger<RedisEffectiveDecisionMap> logger,
IOptions<PolicyEngineOptions> options,
TimeProvider timeProvider)
{
_redis = redis ?? throw new ArgumentNullException(nameof(redis));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_options = options?.Value.EffectiveDecisionMap ?? new EffectiveDecisionMapOptions();
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
}
public async Task SetAsync(
string tenantId,
string snapshotId,
EffectiveDecisionEntry entry,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(entry);
var db = _redis.GetDatabase();
var entryKey = GetEntryKey(tenantId, snapshotId, entry.AssetId);
var indexKey = GetIndexKey(tenantId, snapshotId);
var json = JsonSerializer.Serialize(entry, JsonOptions);
var ttl = entry.ExpiresAt - _timeProvider.GetUtcNow();
if (ttl <= TimeSpan.Zero)
{
ttl = TimeSpan.FromMinutes(_options.DefaultTtlMinutes);
}
var tasks = new List<Task>
{
db.StringSetAsync(entryKey, json, ttl),
db.SortedSetAddAsync(indexKey, entry.AssetId, entry.EvaluatedAt.ToUnixTimeMilliseconds()),
db.KeyExpireAsync(indexKey, ttl + TimeSpan.FromMinutes(5)), // Index lives slightly longer
};
await Task.WhenAll(tasks).ConfigureAwait(false);
PolicyEngineTelemetry.EffectiveDecisionMapOperations.Add(1,
new KeyValuePair<string, object?>("operation", "set"),
new KeyValuePair<string, object?>("tenant_id", tenantId));
}
public async Task SetBatchAsync(
string tenantId,
string snapshotId,
IEnumerable<EffectiveDecisionEntry> entries,
CancellationToken cancellationToken = default)
{
var db = _redis.GetDatabase();
var batch = db.CreateBatch();
var indexKey = GetIndexKey(tenantId, snapshotId);
var now = _timeProvider.GetUtcNow();
var count = 0;
var sortedSetEntries = new List<SortedSetEntry>();
foreach (var entry in entries)
{
var entryKey = GetEntryKey(tenantId, snapshotId, entry.AssetId);
var json = JsonSerializer.Serialize(entry, JsonOptions);
var ttl = entry.ExpiresAt - now;
if (ttl <= TimeSpan.Zero)
{
ttl = TimeSpan.FromMinutes(_options.DefaultTtlMinutes);
}
_ = batch.StringSetAsync(entryKey, json, ttl);
sortedSetEntries.Add(new SortedSetEntry(entry.AssetId, entry.EvaluatedAt.ToUnixTimeMilliseconds()));
count++;
}
if (sortedSetEntries.Count > 0)
{
_ = batch.SortedSetAddAsync(indexKey, sortedSetEntries.ToArray());
_ = batch.KeyExpireAsync(indexKey, TimeSpan.FromMinutes(_options.DefaultTtlMinutes + 5));
}
batch.Execute();
await Task.CompletedTask; // Batch operations are synchronous
// Increment version after batch write
await IncrementVersionAsync(tenantId, snapshotId, cancellationToken).ConfigureAwait(false);
PolicyEngineTelemetry.EffectiveDecisionMapOperations.Add(count,
new KeyValuePair<string, object?>("operation", "set_batch"),
new KeyValuePair<string, object?>("tenant_id", tenantId));
_logger.LogDebug("Set {Count} effective decisions for snapshot {SnapshotId}", count, snapshotId);
}
public async Task<EffectiveDecisionEntry?> GetAsync(
string tenantId,
string snapshotId,
string assetId,
CancellationToken cancellationToken = default)
{
var db = _redis.GetDatabase();
var entryKey = GetEntryKey(tenantId, snapshotId, assetId);
var json = await db.StringGetAsync(entryKey).ConfigureAwait(false);
PolicyEngineTelemetry.EffectiveDecisionMapOperations.Add(1,
new KeyValuePair<string, object?>("operation", "get"),
new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("cache_hit", json.HasValue));
if (!json.HasValue)
{
return null;
}
return JsonSerializer.Deserialize<EffectiveDecisionEntry>((string)json!, JsonOptions);
}
public async Task<EffectiveDecisionQueryResult> GetBatchAsync(
string tenantId,
string snapshotId,
IReadOnlyList<string> assetIds,
CancellationToken cancellationToken = default)
{
var db = _redis.GetDatabase();
var keys = assetIds.Select(id => (RedisKey)GetEntryKey(tenantId, snapshotId, id)).ToArray();
var values = await db.StringGetAsync(keys).ConfigureAwait(false);
var entries = new Dictionary<string, EffectiveDecisionEntry>();
var notFound = new List<string>();
for (int i = 0; i < assetIds.Count; i++)
{
if (values[i].HasValue)
{
var entry = JsonSerializer.Deserialize<EffectiveDecisionEntry>((string)values[i]!, JsonOptions);
if (entry != null)
{
entries[assetIds[i]] = entry;
}
}
else
{
notFound.Add(assetIds[i]);
}
}
var version = await GetVersionAsync(tenantId, snapshotId, cancellationToken).ConfigureAwait(false);
PolicyEngineTelemetry.EffectiveDecisionMapOperations.Add(assetIds.Count,
new KeyValuePair<string, object?>("operation", "get_batch"),
new KeyValuePair<string, object?>("tenant_id", tenantId));
return new EffectiveDecisionQueryResult
{
Entries = entries,
NotFound = notFound,
MapVersion = version,
FromCache = true,
};
}
public async Task<IReadOnlyList<EffectiveDecisionEntry>> GetAllForSnapshotAsync(
string tenantId,
string snapshotId,
EffectiveDecisionFilter? filter = null,
CancellationToken cancellationToken = default)
{
var db = _redis.GetDatabase();
var indexKey = GetIndexKey(tenantId, snapshotId);
// Get all asset IDs from the index
var assetIds = await db.SortedSetRangeByRankAsync(indexKey, 0, -1, Order.Descending)
.ConfigureAwait(false);
if (assetIds.Length == 0)
{
return Array.Empty<EffectiveDecisionEntry>();
}
// Get all entries
var keys = assetIds.Select(id => (RedisKey)GetEntryKey(tenantId, snapshotId, id!)).ToArray();
var values = await db.StringGetAsync(keys).ConfigureAwait(false);
var entries = new List<EffectiveDecisionEntry>();
foreach (var value in values)
{
if (!value.HasValue) continue;
var entry = JsonSerializer.Deserialize<EffectiveDecisionEntry>((string)value!, JsonOptions);
if (entry is null) continue;
// Apply filters
if (filter != null)
{
if (filter.Statuses?.Count > 0 &&
!filter.Statuses.Contains(entry.Status, StringComparer.OrdinalIgnoreCase))
{
continue;
}
if (filter.Severities?.Count > 0 &&
(entry.Severity is null || !filter.Severities.Contains(entry.Severity, StringComparer.OrdinalIgnoreCase)))
{
continue;
}
if (filter.HasException == true && entry.ExceptionId is null)
{
continue;
}
if (filter.HasException == false && entry.ExceptionId is not null)
{
continue;
}
if (filter.MinAdvisoryCount.HasValue && entry.AdvisoryCount < filter.MinAdvisoryCount)
{
continue;
}
if (filter.MinHighSeverityCount.HasValue && entry.HighSeverityCount < filter.MinHighSeverityCount)
{
continue;
}
}
entries.Add(entry);
// Apply limit
if (filter?.Limit > 0 && entries.Count >= filter.Limit + (filter?.Offset ?? 0))
{
break;
}
}
// Apply offset
if (filter?.Offset > 0)
{
entries = entries.Skip(filter.Offset).ToList();
}
// Apply final limit
if (filter?.Limit > 0)
{
entries = entries.Take(filter.Limit).ToList();
}
PolicyEngineTelemetry.EffectiveDecisionMapOperations.Add(1,
new KeyValuePair<string, object?>("operation", "get_all"),
new KeyValuePair<string, object?>("tenant_id", tenantId));
return entries;
}
public async Task<EffectiveDecisionSummary> GetSummaryAsync(
string tenantId,
string snapshotId,
CancellationToken cancellationToken = default)
{
var entries = await GetAllForSnapshotAsync(tenantId, snapshotId, null, cancellationToken)
.ConfigureAwait(false);
var statusCounts = entries
.GroupBy(e => e.Status, StringComparer.OrdinalIgnoreCase)
.ToDictionary(g => g.Key, g => g.Count(), StringComparer.OrdinalIgnoreCase);
var severityCounts = entries
.Where(e => e.Severity is not null)
.GroupBy(e => e.Severity!, StringComparer.OrdinalIgnoreCase)
.ToDictionary(g => g.Key, g => g.Count(), StringComparer.OrdinalIgnoreCase);
var version = await GetVersionAsync(tenantId, snapshotId, cancellationToken).ConfigureAwait(false);
return new EffectiveDecisionSummary
{
SnapshotId = snapshotId,
TotalAssets = entries.Count,
StatusCounts = statusCounts,
SeverityCounts = severityCounts,
ExceptionCount = entries.Count(e => e.ExceptionId is not null),
MapVersion = version,
ComputedAt = _timeProvider.GetUtcNow(),
};
}
public async Task InvalidateAsync(
string tenantId,
string snapshotId,
string assetId,
CancellationToken cancellationToken = default)
{
var db = _redis.GetDatabase();
var entryKey = GetEntryKey(tenantId, snapshotId, assetId);
var indexKey = GetIndexKey(tenantId, snapshotId);
await Task.WhenAll(
db.KeyDeleteAsync(entryKey),
db.SortedSetRemoveAsync(indexKey, assetId)
).ConfigureAwait(false);
await IncrementVersionAsync(tenantId, snapshotId, cancellationToken).ConfigureAwait(false);
PolicyEngineTelemetry.EffectiveDecisionMapOperations.Add(1,
new KeyValuePair<string, object?>("operation", "invalidate"),
new KeyValuePair<string, object?>("tenant_id", tenantId));
}
public async Task InvalidateSnapshotAsync(
string tenantId,
string snapshotId,
CancellationToken cancellationToken = default)
{
var db = _redis.GetDatabase();
var indexKey = GetIndexKey(tenantId, snapshotId);
// Get all asset IDs from the index
var assetIds = await db.SortedSetRangeByRankAsync(indexKey).ConfigureAwait(false);
if (assetIds.Length > 0)
{
var keys = assetIds
.Select(id => (RedisKey)GetEntryKey(tenantId, snapshotId, id!))
.Append(indexKey)
.Append(GetVersionKey(tenantId, snapshotId))
.ToArray();
await db.KeyDeleteAsync(keys).ConfigureAwait(false);
}
PolicyEngineTelemetry.EffectiveDecisionMapOperations.Add(assetIds.Length,
new KeyValuePair<string, object?>("operation", "invalidate_snapshot"),
new KeyValuePair<string, object?>("tenant_id", tenantId));
_logger.LogInformation("Invalidated {Count} entries for snapshot {SnapshotId}", assetIds.Length, snapshotId);
}
public async Task InvalidateTenantAsync(
string tenantId,
CancellationToken cancellationToken = default)
{
var server = _redis.GetServer(_redis.GetEndPoints().First());
var pattern = $"{KeyPrefix}:{tenantId}:*";
var keys = server.Keys(pattern: pattern).ToArray();
if (keys.Length > 0)
{
var db = _redis.GetDatabase();
await db.KeyDeleteAsync(keys).ConfigureAwait(false);
}
PolicyEngineTelemetry.EffectiveDecisionMapOperations.Add(keys.Length,
new KeyValuePair<string, object?>("operation", "invalidate_tenant"),
new KeyValuePair<string, object?>("tenant_id", tenantId));
_logger.LogInformation("Invalidated {Count} keys for tenant {TenantId}", keys.Length, tenantId);
}
public async Task<long> GetVersionAsync(
string tenantId,
string snapshotId,
CancellationToken cancellationToken = default)
{
var db = _redis.GetDatabase();
var versionKey = GetVersionKey(tenantId, snapshotId);
var version = await db.StringGetAsync(versionKey).ConfigureAwait(false);
return version.HasValue ? (long)version : 0;
}
public async Task<long> IncrementVersionAsync(
string tenantId,
string snapshotId,
CancellationToken cancellationToken = default)
{
var db = _redis.GetDatabase();
var versionKey = GetVersionKey(tenantId, snapshotId);
var newVersion = await db.StringIncrementAsync(versionKey).ConfigureAwait(false);
// Set TTL on version key if not already set
await db.KeyExpireAsync(versionKey, TimeSpan.FromMinutes(_options.DefaultTtlMinutes + 10), ExpireWhen.HasNoExpiry)
.ConfigureAwait(false);
return newVersion;
}
public async Task<EffectiveDecisionMapStats> GetStatsAsync(
string? tenantId = null,
CancellationToken cancellationToken = default)
{
var server = _redis.GetServer(_redis.GetEndPoints().First());
var pattern = tenantId != null
? $"{KeyPrefix}:{tenantId}:*:e:*"
: $"{KeyPrefix}:*:e:*";
var entryCount = server.Keys(pattern: pattern).Count();
var snapshotPattern = tenantId != null
? $"{KeyPrefix}:{tenantId}:*:idx"
: $"{KeyPrefix}:*:idx";
var snapshotCount = server.Keys(pattern: snapshotPattern).Count();
long? memoryUsed = null;
try
{
var info = server.Info("memory");
var memorySection = info.FirstOrDefault(s => s.Key == "Memory");
if (memorySection is not null)
{
var usedMemory = memorySection.FirstOrDefault(p => p.Key == "used_memory");
if (usedMemory.Key is not null && long.TryParse(usedMemory.Value, out var bytes))
{
memoryUsed = bytes;
}
}
}
catch
{
// Ignore - memory info not available
}
return new EffectiveDecisionMapStats
{
TotalEntries = entryCount,
TotalSnapshots = snapshotCount,
MemoryUsedBytes = memoryUsed,
ExpiringWithinHour = 0, // Would require scanning TTLs
LastEvictionAt = null,
LastEvictionCount = 0,
};
}
private static string GetEntryKey(string tenantId, string snapshotId, string assetId) =>
$"{KeyPrefix}:{tenantId}:{snapshotId}:e:{assetId}";
private static string GetIndexKey(string tenantId, string snapshotId) =>
$"{KeyPrefix}:{tenantId}:{snapshotId}:idx";
private static string GetVersionKey(string tenantId, string snapshotId) =>
$"{KeyPrefix}:{tenantId}:{snapshotId}:v";
}
/// <summary>
/// Configuration options for effective decision map.
/// </summary>
public sealed class EffectiveDecisionMapOptions
{
/// <summary>
/// Default TTL for entries in minutes.
/// </summary>
public int DefaultTtlMinutes { get; set; } = 60;
/// <summary>
/// Maximum entries per snapshot.
/// </summary>
public int MaxEntriesPerSnapshot { get; set; } = 100000;
/// <summary>
/// Whether to enable automatic eviction of expired entries.
/// </summary>
public bool EnableAutoEviction { get; set; } = true;
/// <summary>
/// Eviction check interval in minutes.
/// </summary>
public int EvictionIntervalMinutes { get; set; } = 5;
}