up
Some checks failed
LNM Migration CI / build-runner (push) Has been cancelled
Ledger OpenAPI CI / deprecation-check (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Airgap Sealed CI Smoke / sealed-smoke (push) Has been cancelled
Ledger Packs CI / build-pack (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
Ledger OpenAPI CI / validate-oas (push) Has been cancelled
Ledger OpenAPI CI / check-wellknown (push) Has been cancelled
Ledger Packs CI / verify-pack (push) Has been cancelled
LNM Migration CI / validate-metrics (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-12-14 18:33:02 +02:00
parent d233fa3529
commit 2e70c9fdb6
51 changed files with 5958 additions and 75 deletions

View File

@@ -5,10 +5,17 @@ using StellaOps.Zastava.Core.Contracts;
namespace StellaOps.Scanner.WebService.Services;
/// <summary>
/// Hierarchical rate limiter for runtime events.
/// Supports rate limiting at tenant, node, namespace, and workload levels.
/// Budget allocation: tenant → namespace → workload (when hierarchical mode enabled).
/// </summary>
internal sealed class RuntimeEventRateLimiter
{
private readonly ConcurrentDictionary<string, TokenBucket> _tenantBuckets = new(StringComparer.Ordinal);
private readonly ConcurrentDictionary<string, TokenBucket> _nodeBuckets = new(StringComparer.Ordinal);
private readonly ConcurrentDictionary<string, TokenBucket> _namespaceBuckets = new(StringComparer.Ordinal);
private readonly ConcurrentDictionary<string, TokenBucket> _workloadBuckets = new(StringComparer.Ordinal);
private readonly TimeProvider _timeProvider;
private readonly IOptionsMonitor<ScannerWebServiceOptions> _optionsMonitor;
@@ -29,33 +36,36 @@ internal sealed class RuntimeEventRateLimiter
var options = _optionsMonitor.CurrentValue.Runtime ?? new ScannerWebServiceOptions.RuntimeOptions();
var now = _timeProvider.GetUtcNow();
// Count events by scope
var tenantCounts = new Dictionary<string, int>(StringComparer.Ordinal);
var nodeCounts = new Dictionary<string, int>(StringComparer.Ordinal);
var namespaceCounts = new Dictionary<string, int>(StringComparer.Ordinal);
var workloadCounts = new Dictionary<string, int>(StringComparer.Ordinal);
foreach (var envelope in envelopes)
{
var tenant = envelope.Event.Tenant;
var node = envelope.Event.Node;
if (tenantCounts.TryGetValue(tenant, out var tenantCount))
{
tenantCounts[tenant] = tenantCount + 1;
}
else
{
tenantCounts[tenant] = 1;
}
var ns = envelope.Event.Workload?.Namespace ?? "_default";
var workloadId = GetWorkloadKey(envelope.Event);
var nodeKey = $"{tenant}|{node}";
if (nodeCounts.TryGetValue(nodeKey, out var nodeCount))
// Tenant counts
IncrementCount(tenantCounts, tenant);
// Node counts (tenant-scoped)
IncrementCount(nodeCounts, $"{tenant}|{node}");
// Namespace counts (tenant-scoped) - only used in hierarchical mode
if (options.HierarchicalRateLimitingEnabled)
{
nodeCounts[nodeKey] = nodeCount + 1;
}
else
{
nodeCounts[nodeKey] = 1;
IncrementCount(namespaceCounts, $"{tenant}|{ns}");
IncrementCount(workloadCounts, $"{tenant}|{ns}|{workloadId}");
}
}
// === Evaluate rate limits in order: tenant → node → namespace → workload ===
// 1. Tenant-level check
var tenantDecision = TryAcquire(
_tenantBuckets,
tenantCounts,
@@ -69,6 +79,7 @@ internal sealed class RuntimeEventRateLimiter
return tenantDecision;
}
// 2. Node-level check
var nodeDecision = TryAcquire(
_nodeBuckets,
nodeCounts,
@@ -77,7 +88,84 @@ internal sealed class RuntimeEventRateLimiter
now,
scope: "node");
return nodeDecision;
if (!nodeDecision.Allowed)
{
return nodeDecision;
}
// 3. Hierarchical checks (namespace → workload) - only when enabled
if (options.HierarchicalRateLimitingEnabled)
{
// 3a. Namespace-level check
var namespaceDecision = TryAcquire(
_namespaceBuckets,
namespaceCounts,
options.PerNamespaceEventsPerSecond,
options.PerNamespaceBurst,
now,
scope: "namespace");
if (!namespaceDecision.Allowed)
{
return namespaceDecision;
}
// 3b. Workload-level check
var workloadDecision = TryAcquire(
_workloadBuckets,
workloadCounts,
options.PerWorkloadEventsPerSecond,
options.PerWorkloadBurst,
now,
scope: "workload");
if (!workloadDecision.Allowed)
{
return workloadDecision;
}
}
return RateLimitDecision.Success;
}
/// <summary>
/// Gets a unique key for a workload from the runtime event.
/// Uses pod name if available, otherwise container ID or a generated key.
/// </summary>
private static string GetWorkloadKey(RuntimeEvent evt)
{
var workload = evt.Workload;
if (workload is null)
{
return "_unknown";
}
// Prefer pod name for Kubernetes workloads
if (!string.IsNullOrEmpty(workload.Pod))
{
return workload.Pod;
}
// Fall back to container ID
if (!string.IsNullOrEmpty(workload.ContainerId))
{
// Truncate container ID for reasonable key length
var containerId = workload.ContainerId;
if (containerId.Contains("://"))
{
containerId = containerId.Substring(containerId.IndexOf("://") + 3);
}
return containerId.Length > 12 ? containerId[..12] : containerId;
}
// Last resort: use container name
return workload.Container ?? "_unknown";
}
private static void IncrementCount(Dictionary<string, int> counts, string key)
{
counts.TryGetValue(key, out var count);
counts[key] = count + 1;
}
private static RateLimitDecision TryAcquire(