feat(rate-limiting): Implement core rate limiting functionality with configuration, decision-making, metrics, middleware, and service registration

- Add RateLimitConfig for configuration management with YAML binding support.
- Introduce RateLimitDecision to encapsulate the result of rate limit checks.
- Implement RateLimitMetrics for OpenTelemetry metrics tracking.
- Create RateLimitMiddleware for enforcing rate limits on incoming requests.
- Develop RateLimitService to orchestrate instance and environment rate limit checks.
- Add RateLimitServiceCollectionExtensions for dependency injection registration.
This commit is contained in:
master
2025-12-17 18:02:37 +02:00
parent 394b57f6bf
commit 8bbfe4d2d2
211 changed files with 47179 additions and 1590 deletions

View File

@@ -1,5 +1,6 @@
using StellaOps.Router.Gateway.Middleware;
using StellaOps.Router.Gateway.OpenApi;
using StellaOps.Router.Gateway.RateLimit;
namespace StellaOps.Router.Gateway;
@@ -18,6 +19,9 @@ public static class ApplicationBuilderExtensions
// Enforce payload limits first
app.UseMiddleware<PayloadLimitsMiddleware>();
// Rate limiting (Sprint 1200_001_001)
app.UseRateLimiting();
// Resolve endpoints from routing state
app.UseMiddleware<EndpointResolutionMiddleware>();
@@ -30,6 +34,24 @@ public static class ApplicationBuilderExtensions
return app;
}
/// <summary>
/// Adds rate limiting middleware to the pipeline.
/// Sprint: SPRINT_1200_001_001_router_rate_limiting_core
/// Task: 1.6 - Wire into Router Pipeline
/// </summary>
/// <param name="app">The application builder.</param>
/// <returns>The application builder for chaining.</returns>
public static IApplicationBuilder UseRateLimiting(this IApplicationBuilder app)
{
// Only add if rate limit service is registered
var rateLimitService = app.ApplicationServices.GetService<RateLimitService>();
if (rateLimitService is not null)
{
app.UseMiddleware<RateLimitMiddleware>();
}
return app;
}
/// <summary>
/// Adds the router gateway middleware pipeline without payload limiting.
/// </summary>
@@ -37,6 +59,9 @@ public static class ApplicationBuilderExtensions
/// <returns>The application builder for chaining.</returns>
public static IApplicationBuilder UseRouterGatewayCore(this IApplicationBuilder app)
{
// Rate limiting (Sprint 1200_001_001)
app.UseRateLimiting();
// Resolve endpoints from routing state
app.UseMiddleware<EndpointResolutionMiddleware>();

View File

@@ -0,0 +1,173 @@
// -----------------------------------------------------------------------------
// CircuitBreaker.cs
// Sprint: SPRINT_1200_001_001_router_rate_limiting_core
// Task: 1.3 - Valkey-Backed Environment Rate Limiter
// Description: Circuit breaker for resilient Valkey operations
// -----------------------------------------------------------------------------
namespace StellaOps.Router.Gateway.RateLimit;
/// <summary>
/// Circuit breaker for Valkey operations.
/// Provides fail-open behavior when Valkey is unavailable.
/// </summary>
public sealed class CircuitBreaker
{
private readonly int _failureThreshold;
private readonly TimeSpan _openTimeout;
private readonly TimeSpan _halfOpenTimeout;
private readonly object _lock = new();
private CircuitState _state = CircuitState.Closed;
private int _failureCount;
private DateTimeOffset _lastFailure;
private DateTimeOffset _openedAt;
public CircuitBreaker(int failureThreshold, int timeoutSeconds, int halfOpenTimeout)
{
_failureThreshold = Math.Max(1, failureThreshold);
_openTimeout = TimeSpan.FromSeconds(Math.Max(1, timeoutSeconds));
_halfOpenTimeout = TimeSpan.FromSeconds(Math.Max(1, halfOpenTimeout));
}
/// <summary>
/// Current state of the circuit.
/// </summary>
public CircuitState State
{
get
{
lock (_lock)
{
UpdateState();
return _state;
}
}
}
/// <summary>
/// Whether the circuit is open (requests should bypass Valkey).
/// </summary>
public bool IsOpen
{
get
{
lock (_lock)
{
UpdateState();
return _state == CircuitState.Open;
}
}
}
/// <summary>
/// Whether the circuit is half-open (testing recovery).
/// </summary>
public bool IsHalfOpen
{
get
{
lock (_lock)
{
UpdateState();
return _state == CircuitState.HalfOpen;
}
}
}
/// <summary>
/// Record a successful operation.
/// </summary>
public void RecordSuccess()
{
lock (_lock)
{
if (_state == CircuitState.HalfOpen)
{
// Successful probe, close the circuit
_state = CircuitState.Closed;
_failureCount = 0;
}
else if (_state == CircuitState.Closed)
{
// Reset failure count on success
_failureCount = 0;
}
}
}
/// <summary>
/// Record a failed operation.
/// </summary>
public void RecordFailure()
{
lock (_lock)
{
_lastFailure = DateTimeOffset.UtcNow;
if (_state == CircuitState.HalfOpen)
{
// Failed during probe, reopen
_state = CircuitState.Open;
_openedAt = DateTimeOffset.UtcNow;
return;
}
_failureCount++;
if (_failureCount >= _failureThreshold)
{
_state = CircuitState.Open;
_openedAt = DateTimeOffset.UtcNow;
}
}
}
/// <summary>
/// Reset the circuit breaker.
/// </summary>
public void Reset()
{
lock (_lock)
{
_state = CircuitState.Closed;
_failureCount = 0;
}
}
private void UpdateState()
{
if (_state == CircuitState.Open)
{
var timeSinceOpen = DateTimeOffset.UtcNow - _openedAt;
if (timeSinceOpen >= _openTimeout)
{
_state = CircuitState.HalfOpen;
}
}
else if (_state == CircuitState.HalfOpen)
{
var timeSinceOpen = DateTimeOffset.UtcNow - _openedAt;
if (timeSinceOpen >= _openTimeout + _halfOpenTimeout)
{
// Too long in half-open without success, reopen
_state = CircuitState.Open;
_openedAt = DateTimeOffset.UtcNow;
}
}
}
}
/// <summary>
/// Circuit breaker state.
/// </summary>
public enum CircuitState
{
/// <summary>Circuit is closed, requests flow through.</summary>
Closed,
/// <summary>Circuit is open, requests are blocked.</summary>
Open,
/// <summary>Circuit is testing recovery.</summary>
HalfOpen
}

View File

@@ -0,0 +1,182 @@
// -----------------------------------------------------------------------------
// EnvironmentRateLimiter.cs
// Sprint: SPRINT_1200_001_001_router_rate_limiting_core
// Task: 1.3 - Valkey-Backed Environment Rate Limiter
// Description: Distributed rate limiter using Valkey for environment-level protection
// -----------------------------------------------------------------------------
using Microsoft.Extensions.Logging;
namespace StellaOps.Router.Gateway.RateLimit;
/// <summary>
/// Valkey-backed rate limiter for environment-level protection.
/// Uses fixed-window counters with atomic Lua operations.
/// Per advisory "Designing 202 + Retry-After Backpressure Control".
/// </summary>
public sealed class EnvironmentRateLimiter : IDisposable
{
private readonly IValkeyRateLimitStore _store;
private readonly CircuitBreaker _circuitBreaker;
private readonly EffectiveLimits _defaultLimits;
private readonly ILogger<EnvironmentRateLimiter> _logger;
private bool _disposed;
public EnvironmentRateLimiter(
IValkeyRateLimitStore store,
CircuitBreaker circuitBreaker,
EffectiveLimits defaultLimits,
ILogger<EnvironmentRateLimiter> logger)
{
_store = store ?? throw new ArgumentNullException(nameof(store));
_circuitBreaker = circuitBreaker ?? throw new ArgumentNullException(nameof(circuitBreaker));
_defaultLimits = defaultLimits ?? throw new ArgumentNullException(nameof(defaultLimits));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <summary>
/// Try to acquire a request slot.
/// Returns null if circuit breaker is open (fail-open behavior).
/// </summary>
public async Task<RateLimitDecision?> TryAcquireAsync(
string microservice,
EffectiveLimits? limits,
CancellationToken cancellationToken)
{
if (_circuitBreaker.IsOpen)
{
_logger.LogWarning("Circuit breaker is open, skipping environment rate limit check");
RateLimitMetrics.RecordCircuitBreakerTrip("open");
return null; // Fail-open
}
var effectiveLimits = limits ?? _defaultLimits;
using var latency = RateLimitMetrics.MeasureLatency(RateLimitScope.Environment);
try
{
var result = await _store.IncrementAndCheckAsync(
microservice,
effectiveLimits.WindowSeconds,
effectiveLimits.MaxRequests,
cancellationToken);
_circuitBreaker.RecordSuccess();
RateLimitMetrics.UpdateEnvironmentCount(result.CurrentCount);
if (result.Allowed)
{
return RateLimitDecision.Allow(
RateLimitScope.Environment,
result.CurrentCount,
effectiveLimits.MaxRequests,
effectiveLimits.WindowSeconds,
microservice);
}
return RateLimitDecision.Deny(
RateLimitScope.Environment,
result.RetryAfterSeconds,
result.CurrentCount,
effectiveLimits.MaxRequests,
effectiveLimits.WindowSeconds,
microservice);
}
catch (Exception ex)
{
_logger.LogError(ex, "Valkey rate limit check failed for {Microservice}", microservice);
_circuitBreaker.RecordFailure();
RateLimitMetrics.RecordValkeyError(ex.GetType().Name);
return null; // Fail-open
}
}
public void Dispose()
{
if (_disposed) return;
_disposed = true;
(_store as IDisposable)?.Dispose();
}
}
/// <summary>
/// Result of a Valkey rate limit check.
/// </summary>
public sealed record ValkeyCheckResult(
bool Allowed,
long CurrentCount,
int RetryAfterSeconds);
/// <summary>
/// Interface for Valkey rate limit store operations.
/// </summary>
public interface IValkeyRateLimitStore
{
/// <summary>
/// Atomically increment counter and check if limit is exceeded.
/// </summary>
Task<ValkeyCheckResult> IncrementAndCheckAsync(
string key,
int windowSeconds,
long limit,
CancellationToken cancellationToken);
}
/// <summary>
/// In-memory implementation for testing.
/// </summary>
public sealed class InMemoryValkeyRateLimitStore : IValkeyRateLimitStore
{
private readonly Dictionary<string, (long Count, DateTimeOffset WindowStart)> _counters = new();
private readonly object _lock = new();
public Task<ValkeyCheckResult> IncrementAndCheckAsync(
string key,
int windowSeconds,
long limit,
CancellationToken cancellationToken)
{
lock (_lock)
{
var now = DateTimeOffset.UtcNow;
var windowStart = new DateTimeOffset(
now.Year, now.Month, now.Day,
now.Hour, now.Minute, (now.Second / windowSeconds) * windowSeconds,
now.Offset);
if (_counters.TryGetValue(key, out var entry))
{
if (entry.WindowStart < windowStart)
{
// Window expired, start new
entry = (1, windowStart);
}
else
{
entry = (entry.Count + 1, entry.WindowStart);
}
}
else
{
entry = (1, windowStart);
}
_counters[key] = entry;
var allowed = entry.Count <= limit;
var retryAfter = allowed ? 0 : (int)(windowStart.AddSeconds(windowSeconds) - now).TotalSeconds;
return Task.FromResult(new ValkeyCheckResult(allowed, entry.Count, Math.Max(1, retryAfter)));
}
}
public void Reset()
{
lock (_lock)
{
_counters.Clear();
}
}
}

View File

@@ -0,0 +1,237 @@
// -----------------------------------------------------------------------------
// InstanceRateLimiter.cs
// Sprint: SPRINT_1200_001_001_router_rate_limiting_core
// Task: 1.2 - In-Memory Instance Rate Limiter
// Description: Sliding window rate limiter for instance-level protection
// -----------------------------------------------------------------------------
using System.Collections.Concurrent;
using System.Diagnostics;
namespace StellaOps.Router.Gateway.RateLimit;
/// <summary>
/// In-memory rate limiter for instance-level protection.
/// Uses sliding window counters for fair rate limiting.
/// Per advisory "Designing 202 + Retry-After Backpressure Control".
/// </summary>
public sealed class InstanceRateLimiter : IDisposable
{
private readonly EffectiveLimits _defaultLimits;
private readonly ConcurrentDictionary<string, SlidingWindowCounter> _counters = new();
private readonly Timer _cleanupTimer;
private readonly object _cleanupLock = new();
private bool _disposed;
/// <summary>
/// Create instance rate limiter with default limits.
/// </summary>
public InstanceRateLimiter(EffectiveLimits defaultLimits)
{
_defaultLimits = defaultLimits ?? throw new ArgumentNullException(nameof(defaultLimits));
// Cleanup stale counters every minute
_cleanupTimer = new Timer(CleanupStaleCounters, null, TimeSpan.FromMinutes(1), TimeSpan.FromMinutes(1));
}
/// <summary>
/// Try to acquire a request slot.
/// </summary>
/// <param name="microservice">Target microservice name.</param>
/// <param name="limits">Optional per-microservice limits.</param>
/// <returns>Decision indicating whether request is allowed.</returns>
public RateLimitDecision TryAcquire(string microservice, EffectiveLimits? limits = null)
{
var effectiveLimits = limits ?? _defaultLimits;
var key = microservice ?? "default";
var counter = _counters.GetOrAdd(key, _ => new SlidingWindowCounter(effectiveLimits.WindowSeconds));
var (allowed, currentCount) = counter.TryIncrement(effectiveLimits.MaxRequests);
if (allowed)
{
return RateLimitDecision.Allow(
RateLimitScope.Instance,
currentCount,
effectiveLimits.MaxRequests,
effectiveLimits.WindowSeconds,
microservice);
}
var retryAfter = counter.GetRetryAfterSeconds();
return RateLimitDecision.Deny(
RateLimitScope.Instance,
retryAfter,
currentCount,
effectiveLimits.MaxRequests,
effectiveLimits.WindowSeconds,
microservice);
}
/// <summary>
/// Get current request count for a microservice.
/// </summary>
public long GetCurrentCount(string microservice)
{
return _counters.TryGetValue(microservice ?? "default", out var counter)
? counter.GetCount()
: 0;
}
/// <summary>
/// Reset counters (for testing).
/// </summary>
public void Reset()
{
_counters.Clear();
}
private void CleanupStaleCounters(object? state)
{
if (_disposed) return;
lock (_cleanupLock)
{
var staleKeys = _counters
.Where(kvp => kvp.Value.IsStale())
.Select(kvp => kvp.Key)
.ToList();
foreach (var key in staleKeys)
{
_counters.TryRemove(key, out _);
}
}
}
public void Dispose()
{
if (_disposed) return;
_disposed = true;
_cleanupTimer.Dispose();
}
}
/// <summary>
/// Sliding window counter for rate limiting.
/// Uses sub-second granularity buckets for smooth rate limiting.
/// </summary>
internal sealed class SlidingWindowCounter
{
private readonly int _windowSeconds;
private readonly int _bucketCount;
private readonly long[] _buckets;
private readonly long _bucketDurationTicks;
private long _lastBucketTicks;
private readonly object _lock = new();
public SlidingWindowCounter(int windowSeconds, int bucketCount = 10)
{
_windowSeconds = Math.Max(1, windowSeconds);
_bucketCount = Math.Max(1, bucketCount);
_buckets = new long[_bucketCount];
_bucketDurationTicks = TimeSpan.FromSeconds((double)_windowSeconds / _bucketCount).Ticks;
_lastBucketTicks = Stopwatch.GetTimestamp();
}
/// <summary>
/// Try to increment the counter. Returns (allowed, currentCount).
/// </summary>
public (bool Allowed, long CurrentCount) TryIncrement(long limit)
{
lock (_lock)
{
RotateBuckets();
var currentCount = _buckets.Sum();
if (currentCount >= limit)
{
return (false, currentCount);
}
// Increment current bucket
var currentBucketIndex = GetCurrentBucketIndex();
_buckets[currentBucketIndex]++;
return (true, currentCount + 1);
}
}
/// <summary>
/// Get current count without incrementing.
/// </summary>
public long GetCount()
{
lock (_lock)
{
RotateBuckets();
return _buckets.Sum();
}
}
/// <summary>
/// Get seconds until the oldest bucket rotates out.
/// </summary>
public int GetRetryAfterSeconds()
{
lock (_lock)
{
RotateBuckets();
// Find the oldest non-empty bucket
var currentBucketIndex = GetCurrentBucketIndex();
for (var i = 1; i < _bucketCount; i++)
{
var bucketIndex = (currentBucketIndex + i) % _bucketCount;
if (_buckets[bucketIndex] > 0)
{
// This bucket will rotate out after (bucketCount - i) bucket durations
var ticksUntilRotation = (_bucketCount - i) * _bucketDurationTicks;
var secondsUntilRotation = (int)Math.Ceiling(TimeSpan.FromTicks(ticksUntilRotation).TotalSeconds);
return Math.Max(1, secondsUntilRotation);
}
}
// All buckets are in the current slot
return _windowSeconds;
}
}
/// <summary>
/// Check if this counter is stale (no requests in 2x window).
/// </summary>
public bool IsStale()
{
lock (_lock)
{
RotateBuckets();
return _buckets.All(b => b == 0);
}
}
private void RotateBuckets()
{
var now = Stopwatch.GetTimestamp();
var elapsed = now - _lastBucketTicks;
var bucketsToRotate = (int)(elapsed / _bucketDurationTicks);
if (bucketsToRotate <= 0) return;
// Clear rotated buckets
var currentBucketIndex = GetCurrentBucketIndex();
for (var i = 0; i < Math.Min(bucketsToRotate, _bucketCount); i++)
{
var bucketIndex = (currentBucketIndex + 1 + i) % _bucketCount;
_buckets[bucketIndex] = 0;
}
_lastBucketTicks = now;
}
private int GetCurrentBucketIndex()
{
var now = Stopwatch.GetTimestamp();
return (int)(now / _bucketDurationTicks % _bucketCount);
}
}

View File

@@ -0,0 +1,249 @@
// -----------------------------------------------------------------------------
// RateLimitConfig.cs
// Sprint: SPRINT_1200_001_001_router_rate_limiting_core
// Task: 1.1 - Rate Limit Configuration Models
// Description: Root configuration class with YAML binding support
// -----------------------------------------------------------------------------
using Microsoft.Extensions.Configuration;
namespace StellaOps.Router.Gateway.RateLimit;
/// <summary>
/// Root configuration for Router rate limiting.
/// Per advisory "Designing 202 + Retry-After Backpressure Control".
/// </summary>
public sealed class RateLimitConfig
{
/// <summary>
/// Activation gate: only check Valkey when traffic exceeds this threshold per 5 minutes.
/// Set to 0 to always check Valkey. Default: 5000.
/// </summary>
[ConfigurationKeyName("process_back_pressure_when_more_than_per_5min")]
public int ActivationThresholdPer5Min { get; set; } = 5000;
/// <summary>
/// Instance-level rate limits (in-memory, per router instance).
/// </summary>
[ConfigurationKeyName("for_instance")]
public InstanceLimitsConfig? ForInstance { get; set; }
/// <summary>
/// Environment-level rate limits (Valkey-backed, across all router instances).
/// </summary>
[ConfigurationKeyName("for_environment")]
public EnvironmentLimitsConfig? ForEnvironment { get; set; }
/// <summary>
/// Typo alias support for backwards compatibility.
/// </summary>
[ConfigurationKeyName("back_pressure_limtis")]
public RateLimitsSection? BackPressureLimtis { get; set; }
/// <summary>
/// Load configuration from IConfiguration.
/// </summary>
public static RateLimitConfig Load(IConfiguration configuration)
{
var config = new RateLimitConfig();
configuration.Bind("rate_limiting", config);
return config.Validate();
}
/// <summary>
/// Validate configuration values.
/// </summary>
public RateLimitConfig Validate()
{
if (ActivationThresholdPer5Min < 0)
throw new ArgumentException("Activation threshold must be >= 0", nameof(ActivationThresholdPer5Min));
ForInstance?.Validate("for_instance");
ForEnvironment?.Validate("for_environment");
return this;
}
/// <summary>
/// Whether rate limiting is enabled (at least one scope configured).
/// </summary>
public bool IsEnabled => ForInstance is not null || ForEnvironment is not null;
}
/// <summary>
/// Instance-level rate limit configuration (in-memory).
/// </summary>
public sealed class InstanceLimitsConfig
{
/// <summary>Time window in seconds.</summary>
[ConfigurationKeyName("per_seconds")]
public int PerSeconds { get; set; }
/// <summary>Maximum requests in the time window.</summary>
[ConfigurationKeyName("max_requests")]
public int MaxRequests { get; set; }
/// <summary>Burst window in seconds.</summary>
[ConfigurationKeyName("allow_burst_for_seconds")]
public int AllowBurstForSeconds { get; set; }
/// <summary>Maximum burst requests.</summary>
[ConfigurationKeyName("allow_max_burst_requests")]
public int AllowMaxBurstRequests { get; set; }
/// <summary>Typo alias for backwards compatibility.</summary>
[ConfigurationKeyName("allow_max_bust_requests")]
public int AllowMaxBustRequests { get; set; }
/// <summary>
/// Validate configuration.
/// </summary>
public void Validate(string path)
{
if (PerSeconds < 0 || MaxRequests < 0)
throw new ArgumentException($"{path}: Window (per_seconds) and limit (max_requests) must be >= 0");
if (AllowBurstForSeconds < 0 || AllowMaxBurstRequests < 0)
throw new ArgumentException($"{path}: Burst window and limit must be >= 0");
// Normalize typo alias
if (AllowMaxBustRequests > 0 && AllowMaxBurstRequests == 0)
AllowMaxBurstRequests = AllowMaxBustRequests;
}
}
/// <summary>
/// Environment-level rate limit configuration (Valkey-backed).
/// </summary>
public sealed class EnvironmentLimitsConfig
{
/// <summary>Valkey connection string.</summary>
[ConfigurationKeyName("valkey_connection")]
public string ValkeyConnection { get; set; } = "localhost:6379";
/// <summary>Valkey bucket/prefix for rate limit keys.</summary>
[ConfigurationKeyName("valkey_bucket")]
public string ValkeyBucket { get; set; } = "stella-router-rate-limit";
/// <summary>Circuit breaker configuration.</summary>
[ConfigurationKeyName("circuit_breaker")]
public CircuitBreakerConfig? CircuitBreaker { get; set; }
/// <summary>Time window in seconds.</summary>
[ConfigurationKeyName("per_seconds")]
public int PerSeconds { get; set; }
/// <summary>Maximum requests in the time window.</summary>
[ConfigurationKeyName("max_requests")]
public int MaxRequests { get; set; }
/// <summary>Burst window in seconds.</summary>
[ConfigurationKeyName("allow_burst_for_seconds")]
public int AllowBurstForSeconds { get; set; }
/// <summary>Maximum burst requests.</summary>
[ConfigurationKeyName("allow_max_burst_requests")]
public int AllowMaxBurstRequests { get; set; }
/// <summary>Per-microservice overrides.</summary>
[ConfigurationKeyName("microservices")]
public Dictionary<string, MicroserviceLimitsConfig>? Microservices { get; set; }
/// <summary>
/// Validate configuration.
/// </summary>
public void Validate(string path)
{
if (string.IsNullOrWhiteSpace(ValkeyConnection))
throw new ArgumentException($"{path}: valkey_connection is required");
if (PerSeconds < 0 || MaxRequests < 0)
throw new ArgumentException($"{path}: Window and limit must be >= 0");
CircuitBreaker?.Validate($"{path}.circuit_breaker");
if (Microservices is not null)
{
foreach (var (name, config) in Microservices)
{
config.Validate($"{path}.microservices.{name}");
}
}
}
}
/// <summary>
/// Per-microservice rate limit overrides.
/// </summary>
public sealed class MicroserviceLimitsConfig
{
/// <summary>Time window in seconds.</summary>
[ConfigurationKeyName("per_seconds")]
public int PerSeconds { get; set; }
/// <summary>Maximum requests in the time window.</summary>
[ConfigurationKeyName("max_requests")]
public int MaxRequests { get; set; }
/// <summary>Burst window in seconds (optional).</summary>
[ConfigurationKeyName("allow_burst_for_seconds")]
public int? AllowBurstForSeconds { get; set; }
/// <summary>Maximum burst requests (optional).</summary>
[ConfigurationKeyName("allow_max_burst_requests")]
public int? AllowMaxBurstRequests { get; set; }
/// <summary>
/// Validate configuration.
/// </summary>
public void Validate(string path)
{
if (PerSeconds < 0 || MaxRequests < 0)
throw new ArgumentException($"{path}: Window and limit must be >= 0");
}
}
/// <summary>
/// Circuit breaker configuration for Valkey resilience.
/// </summary>
public sealed class CircuitBreakerConfig
{
/// <summary>Number of failures before opening the circuit.</summary>
[ConfigurationKeyName("failure_threshold")]
public int FailureThreshold { get; set; } = 5;
/// <summary>Seconds to keep circuit open.</summary>
[ConfigurationKeyName("timeout_seconds")]
public int TimeoutSeconds { get; set; } = 30;
/// <summary>Seconds in half-open state before full reset.</summary>
[ConfigurationKeyName("half_open_timeout")]
public int HalfOpenTimeout { get; set; } = 10;
/// <summary>
/// Validate configuration.
/// </summary>
public void Validate(string path)
{
if (FailureThreshold < 1)
throw new ArgumentException($"{path}: failure_threshold must be >= 1");
if (TimeoutSeconds < 1)
throw new ArgumentException($"{path}: timeout_seconds must be >= 1");
if (HalfOpenTimeout < 1)
throw new ArgumentException($"{path}: half_open_timeout must be >= 1");
}
}
/// <summary>
/// Generic rate limits section (for typo alias support).
/// </summary>
public sealed class RateLimitsSection
{
[ConfigurationKeyName("per_seconds")]
public int PerSeconds { get; set; }
[ConfigurationKeyName("max_requests")]
public int MaxRequests { get; set; }
}

View File

@@ -0,0 +1,103 @@
// -----------------------------------------------------------------------------
// RateLimitDecision.cs
// Sprint: SPRINT_1200_001_001_router_rate_limiting_core
// Task: 1.1 - Rate Limit Configuration Models
// Description: Decision result model for rate limit checks
// -----------------------------------------------------------------------------
namespace StellaOps.Router.Gateway.RateLimit;
/// <summary>
/// Result of a rate limit check.
/// </summary>
/// <param name="Allowed">Whether the request is allowed.</param>
/// <param name="RetryAfterSeconds">Seconds to wait before retrying (if not allowed).</param>
/// <param name="Scope">Which scope triggered the limit (instance or environment).</param>
/// <param name="CurrentCount">Current request count in the window.</param>
/// <param name="Limit">The limit that was applied.</param>
/// <param name="WindowSeconds">The window size in seconds.</param>
/// <param name="Microservice">The microservice that was checked.</param>
public sealed record RateLimitDecision(
bool Allowed,
int RetryAfterSeconds,
RateLimitScope Scope,
long CurrentCount,
long Limit,
int WindowSeconds,
string? Microservice = null)
{
/// <summary>
/// Create an "allowed" decision.
/// </summary>
public static RateLimitDecision Allow(RateLimitScope scope, long currentCount, long limit, int windowSeconds, string? microservice = null)
=> new(true, 0, scope, currentCount, limit, windowSeconds, microservice);
/// <summary>
/// Create a "denied" decision.
/// </summary>
public static RateLimitDecision Deny(RateLimitScope scope, int retryAfterSeconds, long currentCount, long limit, int windowSeconds, string? microservice = null)
=> new(false, retryAfterSeconds, scope, currentCount, limit, windowSeconds, microservice);
/// <summary>
/// Time remaining until the window resets.
/// </summary>
public DateTimeOffset RetryAt => DateTimeOffset.UtcNow.AddSeconds(RetryAfterSeconds);
}
/// <summary>
/// Rate limit scope.
/// </summary>
public enum RateLimitScope
{
/// <summary>Instance-level (in-memory).</summary>
Instance,
/// <summary>Environment-level (Valkey-backed).</summary>
Environment
}
/// <summary>
/// Effective limits after inheritance resolution.
/// </summary>
/// <param name="WindowSeconds">Time window in seconds.</param>
/// <param name="MaxRequests">Maximum requests in the window.</param>
/// <param name="BurstWindowSeconds">Burst window in seconds.</param>
/// <param name="MaxBurstRequests">Maximum burst requests.</param>
public sealed record EffectiveLimits(
int WindowSeconds,
int MaxRequests,
int BurstWindowSeconds,
int MaxBurstRequests)
{
/// <summary>
/// Create from config.
/// </summary>
public static EffectiveLimits FromConfig(int perSeconds, int maxRequests, int burstSeconds, int maxBurst)
=> new(perSeconds, maxRequests, burstSeconds, maxBurst);
/// <summary>
/// Merge with per-microservice overrides.
/// </summary>
public EffectiveLimits MergeWith(MicroserviceLimitsConfig? msConfig)
{
if (msConfig is null)
return this;
return new EffectiveLimits(
msConfig.PerSeconds > 0 ? msConfig.PerSeconds : WindowSeconds,
msConfig.MaxRequests > 0 ? msConfig.MaxRequests : MaxRequests,
msConfig.AllowBurstForSeconds ?? BurstWindowSeconds,
msConfig.AllowMaxBurstRequests ?? MaxBurstRequests);
}
/// <summary>
/// Calculate Retry-After seconds based on current count and window position.
/// </summary>
public int CalculateRetryAfter(long currentCount, DateTimeOffset windowStart)
{
// Calculate when the window resets
var windowEnd = windowStart.AddSeconds(WindowSeconds);
var remaining = (int)Math.Ceiling((windowEnd - DateTimeOffset.UtcNow).TotalSeconds);
return Math.Max(1, remaining);
}
}

View File

@@ -0,0 +1,171 @@
// -----------------------------------------------------------------------------
// RateLimitMetrics.cs
// Sprint: SPRINT_1200_001_001_router_rate_limiting_core
// Task: 1.5 - Metrics and Observability
// Description: OpenTelemetry metrics for rate limiting
// -----------------------------------------------------------------------------
using System.Diagnostics;
using System.Diagnostics.Metrics;
namespace StellaOps.Router.Gateway.RateLimit;
/// <summary>
/// OpenTelemetry metrics for Router rate limiting.
/// </summary>
public static class RateLimitMetrics
{
private static readonly Meter Meter = new("StellaOps.Router.Gateway.RateLimit", "1.0.0");
// Counters
private static readonly Counter<long> AllowedRequests = Meter.CreateCounter<long>(
"stellaops.router.ratelimit.allowed",
description: "Number of requests allowed by rate limiter");
private static readonly Counter<long> RejectedRequests = Meter.CreateCounter<long>(
"stellaops.router.ratelimit.rejected",
description: "Number of requests rejected by rate limiter (429)");
private static readonly Counter<long> CircuitBreakerTrips = Meter.CreateCounter<long>(
"stellaops.router.ratelimit.circuit_breaker.trips",
description: "Number of circuit breaker trips");
private static readonly Counter<long> ValkeyErrors = Meter.CreateCounter<long>(
"stellaops.router.ratelimit.valkey.errors",
description: "Number of Valkey errors during rate limit checks");
// Histograms
private static readonly Histogram<double> CheckLatency = Meter.CreateHistogram<double>(
"stellaops.router.ratelimit.check_latency",
unit: "ms",
description: "Latency of rate limit checks");
// Gauges (via observable)
private static long _currentInstanceCount;
private static long _currentEnvironmentCount;
static RateLimitMetrics()
{
Meter.CreateObservableGauge(
"stellaops.router.ratelimit.instance.current",
() => _currentInstanceCount,
description: "Current request count in instance limiter");
Meter.CreateObservableGauge(
"stellaops.router.ratelimit.environment.current",
() => _currentEnvironmentCount,
description: "Current request count in environment limiter");
}
/// <summary>
/// Record a rate limit decision.
/// </summary>
public static void RecordDecision(RateLimitScope scope, string microservice, bool allowed)
{
var tags = new TagList
{
{ "scope", scope.ToString().ToLowerInvariant() },
{ "microservice", microservice }
};
if (allowed)
{
AllowedRequests.Add(1, tags);
}
else
{
RejectedRequests.Add(1, tags);
}
}
/// <summary>
/// Record a rate limit rejection.
/// </summary>
public static void RecordRejection(RateLimitScope scope, string microservice)
{
var tags = new TagList
{
{ "scope", scope.ToString().ToLowerInvariant() },
{ "microservice", microservice }
};
RejectedRequests.Add(1, tags);
}
/// <summary>
/// Record check latency.
/// </summary>
public static void RecordLatency(RateLimitScope scope, double milliseconds)
{
var tags = new TagList
{
{ "scope", scope.ToString().ToLowerInvariant() }
};
CheckLatency.Record(milliseconds, tags);
}
/// <summary>
/// Record a circuit breaker trip.
/// </summary>
public static void RecordCircuitBreakerTrip(string reason)
{
var tags = new TagList
{
{ "reason", reason }
};
CircuitBreakerTrips.Add(1, tags);
}
/// <summary>
/// Record a Valkey error.
/// </summary>
public static void RecordValkeyError(string errorType)
{
var tags = new TagList
{
{ "error_type", errorType }
};
ValkeyErrors.Add(1, tags);
}
/// <summary>
/// Update current instance count gauge.
/// </summary>
public static void UpdateInstanceCount(long count)
{
Interlocked.Exchange(ref _currentInstanceCount, count);
}
/// <summary>
/// Update current environment count gauge.
/// </summary>
public static void UpdateEnvironmentCount(long count)
{
Interlocked.Exchange(ref _currentEnvironmentCount, count);
}
/// <summary>
/// Measure check latency with a disposable scope.
/// </summary>
public static IDisposable MeasureLatency(RateLimitScope scope)
{
return new LatencyScope(scope);
}
private sealed class LatencyScope : IDisposable
{
private readonly RateLimitScope _scope;
private readonly long _startTicks;
public LatencyScope(RateLimitScope scope)
{
_scope = scope;
_startTicks = Stopwatch.GetTimestamp();
}
public void Dispose()
{
var elapsed = Stopwatch.GetElapsedTime(_startTicks);
RecordLatency(_scope, elapsed.TotalMilliseconds);
}
}
}

View File

@@ -0,0 +1,132 @@
// -----------------------------------------------------------------------------
// RateLimitMiddleware.cs
// Sprint: SPRINT_1200_001_001_router_rate_limiting_core
// Task: 1.4 - Rate Limit Middleware
// Description: ASP.NET Core middleware for rate limiting requests
// -----------------------------------------------------------------------------
using System.Text.Json;
using Microsoft.AspNetCore.Http;
using Microsoft.Extensions.Logging;
namespace StellaOps.Router.Gateway.RateLimit;
/// <summary>
/// Middleware that enforces rate limits on incoming requests.
/// Returns 429 Too Many Requests with Retry-After header when limits are exceeded.
/// </summary>
public sealed class RateLimitMiddleware
{
private readonly RequestDelegate _next;
private readonly RateLimitService _rateLimitService;
private readonly ILogger<RateLimitMiddleware> _logger;
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false
};
public RateLimitMiddleware(
RequestDelegate next,
RateLimitService rateLimitService,
ILogger<RateLimitMiddleware> logger)
{
_next = next ?? throw new ArgumentNullException(nameof(next));
_rateLimitService = rateLimitService ?? throw new ArgumentNullException(nameof(rateLimitService));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task InvokeAsync(HttpContext context)
{
// Extract microservice from routing metadata
var microservice = ExtractMicroservice(context);
// Check rate limits
var decision = await _rateLimitService.CheckLimitAsync(microservice, context.RequestAborted);
// Add rate limit headers (always, for visibility)
AddRateLimitHeaders(context.Response, decision);
if (!decision.Allowed)
{
_logger.LogWarning(
"Rate limit exceeded for {Microservice}: {CurrentCount}/{Limit} in {WindowSeconds}s (scope: {Scope})",
microservice ?? "unknown",
decision.CurrentCount,
decision.Limit,
decision.WindowSeconds,
decision.Scope);
RateLimitMetrics.RecordRejection(decision.Scope, microservice ?? "unknown");
await WriteRateLimitResponse(context, decision);
return;
}
await _next(context);
}
private static string? ExtractMicroservice(HttpContext context)
{
// Try to get from routing metadata
if (context.Items.TryGetValue(RouterHttpContextKeys.TargetMicroservice, out var ms) && ms is string microservice)
{
return microservice;
}
// Try to get from request path (first segment after /api/)
var path = context.Request.Path.Value ?? "";
if (path.StartsWith("/api/", StringComparison.OrdinalIgnoreCase))
{
var segments = path.Split('/', StringSplitOptions.RemoveEmptyEntries);
if (segments.Length > 1)
{
return segments[1];
}
}
return null;
}
private static void AddRateLimitHeaders(HttpResponse response, RateLimitDecision decision)
{
response.Headers["X-RateLimit-Limit"] = decision.Limit.ToString();
response.Headers["X-RateLimit-Remaining"] = Math.Max(0, decision.Limit - decision.CurrentCount).ToString();
response.Headers["X-RateLimit-Reset"] = decision.RetryAt.ToUnixTimeSeconds().ToString();
if (!decision.Allowed)
{
response.Headers["Retry-After"] = decision.RetryAfterSeconds.ToString();
}
}
private static async Task WriteRateLimitResponse(HttpContext context, RateLimitDecision decision)
{
context.Response.StatusCode = StatusCodes.Status429TooManyRequests;
context.Response.ContentType = "application/json";
var response = new RateLimitResponse(
Error: "rate_limit_exceeded",
Message: $"Rate limit exceeded. Try again in {decision.RetryAfterSeconds} seconds.",
RetryAfter: decision.RetryAfterSeconds,
Limit: decision.Limit,
Current: decision.CurrentCount,
Window: decision.WindowSeconds,
Scope: decision.Scope.ToString().ToLowerInvariant());
await JsonSerializer.SerializeAsync(context.Response.Body, response, JsonOptions, context.RequestAborted);
}
}
/// <summary>
/// 429 response body.
/// </summary>
internal sealed record RateLimitResponse(
string Error,
string Message,
int RetryAfter,
long Limit,
long Current,
int Window,
string Scope);

View File

@@ -0,0 +1,180 @@
// -----------------------------------------------------------------------------
// RateLimitService.cs
// Sprint: SPRINT_1200_001_001_router_rate_limiting_core
// Task: 1.4 - Rate Limit Middleware
// Description: Orchestrates instance and environment rate limit checks
// -----------------------------------------------------------------------------
using Microsoft.Extensions.Logging;
namespace StellaOps.Router.Gateway.RateLimit;
/// <summary>
/// Service that orchestrates rate limit checks across instance and environment scopes.
/// </summary>
public sealed class RateLimitService
{
private readonly RateLimitConfig _config;
private readonly InstanceRateLimiter? _instanceLimiter;
private readonly EnvironmentRateLimiter? _environmentLimiter;
private readonly ActivationGate _activationGate;
private readonly ILogger<RateLimitService> _logger;
public RateLimitService(
RateLimitConfig config,
InstanceRateLimiter? instanceLimiter,
EnvironmentRateLimiter? environmentLimiter,
ILogger<RateLimitService> logger)
{
_config = config ?? throw new ArgumentNullException(nameof(config));
_instanceLimiter = instanceLimiter;
_environmentLimiter = environmentLimiter;
_activationGate = new ActivationGate(config.ActivationThresholdPer5Min);
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <summary>
/// Check rate limits for a request.
/// </summary>
/// <param name="microservice">Target microservice.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Decision indicating whether request is allowed.</returns>
public async Task<RateLimitDecision> CheckLimitAsync(string? microservice, CancellationToken cancellationToken)
{
var ms = microservice ?? "default";
// Record request for activation gate
_activationGate.RecordRequest();
// Step 1: Check instance limits (always, fast)
if (_instanceLimiter is not null)
{
var instanceLimits = ResolveInstanceLimits(ms);
var instanceDecision = _instanceLimiter.TryAcquire(ms, instanceLimits);
RateLimitMetrics.RecordDecision(RateLimitScope.Instance, ms, instanceDecision.Allowed);
if (!instanceDecision.Allowed)
{
return instanceDecision;
}
}
// Step 2: Check environment limits (if activated)
if (_environmentLimiter is not null && _activationGate.IsActivated)
{
var envLimits = ResolveEnvironmentLimits(ms);
var envDecision = await _environmentLimiter.TryAcquireAsync(ms, envLimits, cancellationToken);
// If environment check failed (circuit breaker), allow the request
if (envDecision is null)
{
_logger.LogDebug("Environment rate limit check skipped for {Microservice} (circuit breaker)", ms);
return CreateAllowDecision(ms);
}
RateLimitMetrics.RecordDecision(RateLimitScope.Environment, ms, envDecision.Allowed);
if (!envDecision.Allowed)
{
return envDecision;
}
}
return CreateAllowDecision(ms);
}
private EffectiveLimits? ResolveInstanceLimits(string microservice)
{
if (_config.ForInstance is null)
return null;
return EffectiveLimits.FromConfig(
_config.ForInstance.PerSeconds,
_config.ForInstance.MaxRequests,
_config.ForInstance.AllowBurstForSeconds,
_config.ForInstance.AllowMaxBurstRequests);
}
private EffectiveLimits? ResolveEnvironmentLimits(string microservice)
{
if (_config.ForEnvironment is null)
return null;
var baseLimits = EffectiveLimits.FromConfig(
_config.ForEnvironment.PerSeconds,
_config.ForEnvironment.MaxRequests,
_config.ForEnvironment.AllowBurstForSeconds,
_config.ForEnvironment.AllowMaxBurstRequests);
// Check for per-microservice overrides
if (_config.ForEnvironment.Microservices?.TryGetValue(microservice, out var msConfig) == true)
{
return baseLimits.MergeWith(msConfig);
}
return baseLimits;
}
private static RateLimitDecision CreateAllowDecision(string microservice)
{
return RateLimitDecision.Allow(RateLimitScope.Instance, 0, 0, 0, microservice);
}
}
/// <summary>
/// Gate that activates environment rate limiting only when traffic exceeds threshold.
/// </summary>
internal sealed class ActivationGate
{
private readonly int _thresholdPer5Min;
private readonly object _lock = new();
private long _requestCount;
private DateTimeOffset _windowStart;
public ActivationGate(int thresholdPer5Min)
{
_thresholdPer5Min = thresholdPer5Min;
_windowStart = DateTimeOffset.UtcNow;
}
/// <summary>
/// Whether the gate is activated (traffic exceeds threshold).
/// </summary>
public bool IsActivated
{
get
{
if (_thresholdPer5Min <= 0)
return true; // Always activated if threshold is 0
lock (_lock)
{
RotateWindow();
return _requestCount >= _thresholdPer5Min;
}
}
}
/// <summary>
/// Record a request.
/// </summary>
public void RecordRequest()
{
lock (_lock)
{
RotateWindow();
_requestCount++;
}
}
private void RotateWindow()
{
var now = DateTimeOffset.UtcNow;
if (now - _windowStart >= TimeSpan.FromMinutes(5))
{
_windowStart = now;
_requestCount = 0;
}
}
}

View File

@@ -0,0 +1,113 @@
// -----------------------------------------------------------------------------
// RateLimitServiceCollectionExtensions.cs
// Sprint: SPRINT_1200_001_001_router_rate_limiting_core
// Task: 1.6 - Wire into Router Pipeline
// Description: DI registration for rate limiting services
// -----------------------------------------------------------------------------
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
namespace StellaOps.Router.Gateway.RateLimit;
/// <summary>
/// Extension methods for registering rate limiting services.
/// </summary>
public static class RateLimitServiceCollectionExtensions
{
/// <summary>
/// Adds rate limiting services to the DI container.
/// </summary>
/// <param name="services">The service collection.</param>
/// <param name="configuration">The configuration.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddRouterRateLimiting(
this IServiceCollection services,
IConfiguration configuration)
{
// Load and validate configuration
var config = RateLimitConfig.Load(configuration);
services.AddSingleton(config);
if (!config.IsEnabled)
{
return services;
}
// Register instance limiter
if (config.ForInstance is not null)
{
var instanceLimits = EffectiveLimits.FromConfig(
config.ForInstance.PerSeconds,
config.ForInstance.MaxRequests,
config.ForInstance.AllowBurstForSeconds,
config.ForInstance.AllowMaxBurstRequests);
services.AddSingleton(new InstanceRateLimiter(instanceLimits));
}
// Register environment limiter (if configured)
if (config.ForEnvironment is not null)
{
// Register Valkey store
// Note: For production, use ValkeyRateLimitStore with StackExchange.Redis
// For now, using in-memory store as a placeholder
services.AddSingleton<IValkeyRateLimitStore, InMemoryValkeyRateLimitStore>();
// Register circuit breaker
var cbConfig = config.ForEnvironment.CircuitBreaker ?? new CircuitBreakerConfig();
var circuitBreaker = new CircuitBreaker(
cbConfig.FailureThreshold,
cbConfig.TimeoutSeconds,
cbConfig.HalfOpenTimeout);
services.AddSingleton(circuitBreaker);
// Register environment limiter
services.AddSingleton(sp =>
{
var store = sp.GetRequiredService<IValkeyRateLimitStore>();
var cb = sp.GetRequiredService<CircuitBreaker>();
var logger = sp.GetRequiredService<ILogger<EnvironmentRateLimiter>>();
var envConfig = config.ForEnvironment;
var defaultLimits = EffectiveLimits.FromConfig(
envConfig.PerSeconds,
envConfig.MaxRequests,
envConfig.AllowBurstForSeconds,
envConfig.AllowMaxBurstRequests);
return new EnvironmentRateLimiter(store, cb, defaultLimits, logger);
});
}
// Register rate limit service (orchestrator)
services.AddSingleton(sp =>
{
var rateLimitConfig = sp.GetRequiredService<RateLimitConfig>();
var instanceLimiter = sp.GetService<InstanceRateLimiter>();
var environmentLimiter = sp.GetService<EnvironmentRateLimiter>();
var logger = sp.GetRequiredService<ILogger<RateLimitService>>();
return new RateLimitService(rateLimitConfig, instanceLimiter, environmentLimiter, logger);
});
return services;
}
/// <summary>
/// Adds rate limiting with custom Valkey store.
/// </summary>
/// <typeparam name="TStore">The Valkey store implementation.</typeparam>
/// <param name="services">The service collection.</param>
/// <param name="configuration">The configuration.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddRouterRateLimiting<TStore>(
this IServiceCollection services,
IConfiguration configuration)
where TStore : class, IValkeyRateLimitStore
{
services.AddSingleton<IValkeyRateLimitStore, TStore>();
return services.AddRouterRateLimiting(configuration);
}
}