Files
git.stella-ops.org/docs/router/25-Step.md
master 75f6942769
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Add integration tests for migration categories and execution
- Implemented MigrationCategoryTests to validate migration categorization for startup, release, seed, and data migrations.
- Added tests for edge cases, including null, empty, and whitespace migration names.
- Created StartupMigrationHostTests to verify the behavior of the migration host with real PostgreSQL instances using Testcontainers.
- Included tests for migration execution, schema creation, and handling of pending release migrations.
- Added SQL migration files for testing: creating a test table, adding a column, a release migration, and seeding data.
2025-12-04 19:10:54 +02:00

20 KiB

Step 25: Configuration Hot-Reload

Phase 7: Testing & Documentation Estimated Complexity: Medium Dependencies: All previous configuration steps


Overview

Configuration hot-reload enables dynamic updates to router and microservice configuration without restarts. This includes route definitions, rate limits, circuit breaker settings, and JWKS rotation.


Goals

  1. Support YAML configuration hot-reload
  2. Implement file watcher for configuration changes
  3. Provide atomic configuration updates
  4. Support validation before applying changes
  5. Enable rollback on invalid configuration

Configuration Watcher

namespace StellaOps.Router.Configuration;

/// <summary>
/// Watches configuration files for changes and triggers reloads.
/// </summary>
public sealed class ConfigurationWatcher : IHostedService, IDisposable
{
    private readonly IConfiguration _configuration;
    private readonly IOptionsMonitor<RouterConfig> _routerConfig;
    private readonly ILogger<ConfigurationWatcher> _logger;
    private readonly List<FileSystemWatcher> _watchers = new();
    private readonly Subject<ConfigurationChange> _changes = new();
    private readonly TimeSpan _debounceInterval = TimeSpan.FromMilliseconds(500);
    private readonly ConcurrentDictionary<string, DateTimeOffset> _lastChange = new();

    public IObservable<ConfigurationChange> Changes => _changes;

    public ConfigurationWatcher(
        IConfiguration configuration,
        IOptionsMonitor<RouterConfig> routerConfig,
        ILogger<ConfigurationWatcher> logger)
    {
        _configuration = configuration;
        _routerConfig = routerConfig;
        _logger = logger;
    }

    public Task StartAsync(CancellationToken cancellationToken)
    {
        // Watch all YAML configuration files
        var configPaths = GetConfigurationFilePaths();

        foreach (var path in configPaths)
        {
            if (!File.Exists(path))
                continue;

            var directory = Path.GetDirectoryName(path)!;
            var fileName = Path.GetFileName(path);

            var watcher = new FileSystemWatcher(directory)
            {
                Filter = fileName,
                NotifyFilter = NotifyFilters.LastWrite | NotifyFilters.Size,
                EnableRaisingEvents = true
            };

            watcher.Changed += OnConfigurationFileChanged;
            _watchers.Add(watcher);

            _logger.LogInformation("Watching configuration file: {Path}", path);
        }

        // Also subscribe to IOptionsMonitor for programmatic changes
        _routerConfig.OnChange(config =>
        {
            _changes.OnNext(new ConfigurationChange
            {
                Section = "Router",
                ChangeType = ChangeType.Modified,
                Timestamp = DateTimeOffset.UtcNow
            });
        });

        return Task.CompletedTask;
    }

    private void OnConfigurationFileChanged(object sender, FileSystemEventArgs e)
    {
        // Debounce rapid changes
        var now = DateTimeOffset.UtcNow;

        if (_lastChange.TryGetValue(e.FullPath, out var lastChange) &&
            now - lastChange < _debounceInterval)
        {
            return;
        }

        _lastChange[e.FullPath] = now;

        _logger.LogInformation("Configuration file changed: {Path}", e.FullPath);

        // Delay to allow file writes to complete
        Task.Delay(100).ContinueWith(_ =>
        {
            try
            {
                // Validate configuration before notifying
                if (ValidateConfiguration(e.FullPath))
                {
                    _changes.OnNext(new ConfigurationChange
                    {
                        Section = DetermineSectionFromPath(e.FullPath),
                        ChangeType = ChangeType.Modified,
                        FilePath = e.FullPath,
                        Timestamp = now
                    });
                }
            }
            catch (Exception ex)
            {
                _logger.LogError(ex, "Failed to process configuration change for {Path}", e.FullPath);
            }
        });
    }

    private bool ValidateConfiguration(string path)
    {
        try
        {
            var yaml = File.ReadAllText(path);
            var deserializer = new DeserializerBuilder()
                .WithNamingConvention(CamelCaseNamingConvention.Instance)
                .Build();

            // Try to deserialize to validate YAML syntax
            var doc = deserializer.Deserialize<Dictionary<string, object>>(yaml);
            return doc != null;
        }
        catch (Exception ex)
        {
            _logger.LogWarning(ex, "Invalid configuration file: {Path}", path);
            return false;
        }
    }

    private string DetermineSectionFromPath(string path)
    {
        var fileName = Path.GetFileNameWithoutExtension(path).ToLower();

        return fileName switch
        {
            "router" => "Router",
            "routes" => "Routes",
            "ratelimits" => "RateLimits",
            "endpoints" => "Endpoints",
            _ => "Unknown"
        };
    }

    private IEnumerable<string> GetConfigurationFilePaths()
    {
        // Get paths from configuration providers
        var paths = new List<string>();

        if (_configuration is IConfigurationRoot root)
        {
            foreach (var provider in root.Providers)
            {
                if (provider is FileConfigurationProvider fileProvider)
                {
                    var source = fileProvider.Source;
                    if (source.FileProvider?.GetFileInfo(source.Path ?? "") is { Exists: true } fileInfo)
                    {
                        paths.Add(fileInfo.PhysicalPath ?? "");
                    }
                }
            }
        }

        return paths.Where(p => !string.IsNullOrEmpty(p));
    }

    public Task StopAsync(CancellationToken cancellationToken)
    {
        foreach (var watcher in _watchers)
        {
            watcher.EnableRaisingEvents = false;
        }
        return Task.CompletedTask;
    }

    public void Dispose()
    {
        foreach (var watcher in _watchers)
        {
            watcher.Dispose();
        }
        _changes.Dispose();
    }
}

public sealed class ConfigurationChange
{
    public string Section { get; init; } = "";
    public ChangeType ChangeType { get; init; }
    public string? FilePath { get; init; }
    public DateTimeOffset Timestamp { get; init; }
}

public enum ChangeType
{
    Added,
    Modified,
    Removed
}

Route Configuration Reloader

namespace StellaOps.Router.Configuration;

/// <summary>
/// Handles hot-reload of route configurations.
/// </summary>
public sealed class RouteConfigurationReloader : IHostedService
{
    private readonly ConfigurationWatcher _watcher;
    private readonly IRouteRegistry _routeRegistry;
    private readonly ILogger<RouteConfigurationReloader> _logger;
    private IDisposable? _subscription;

    public RouteConfigurationReloader(
        ConfigurationWatcher watcher,
        IRouteRegistry routeRegistry,
        ILogger<RouteConfigurationReloader> logger)
    {
        _watcher = watcher;
        _routeRegistry = routeRegistry;
        _logger = logger;
    }

    public Task StartAsync(CancellationToken cancellationToken)
    {
        _subscription = _watcher.Changes
            .Where(c => c.Section == "Routes")
            .Subscribe(OnRoutesChanged);

        return Task.CompletedTask;
    }

    private void OnRoutesChanged(ConfigurationChange change)
    {
        _logger.LogInformation("Reloading routes from {Path}", change.FilePath);

        try
        {
            _routeRegistry.Reload();
            _logger.LogInformation("Routes reloaded successfully");
        }
        catch (Exception ex)
        {
            _logger.LogError(ex, "Failed to reload routes, keeping previous configuration");
        }
    }

    public Task StopAsync(CancellationToken cancellationToken)
    {
        _subscription?.Dispose();
        return Task.CompletedTask;
    }
}

Rate Limit Configuration Reloader

namespace StellaOps.Router.Configuration;

/// <summary>
/// Handles hot-reload of rate limit configurations.
/// </summary>
public sealed class RateLimitConfigurationReloader : IHostedService
{
    private readonly ConfigurationWatcher _watcher;
    private readonly IRateLimiter _rateLimiter;
    private readonly IOptionsMonitor<RateLimitConfig> _config;
    private readonly ILogger<RateLimitConfigurationReloader> _logger;
    private IDisposable? _subscription;

    public RateLimitConfigurationReloader(
        ConfigurationWatcher watcher,
        IRateLimiter rateLimiter,
        IOptionsMonitor<RateLimitConfig> config,
        ILogger<RateLimitConfigurationReloader> logger)
    {
        _watcher = watcher;
        _rateLimiter = rateLimiter;
        _config = config;
        _logger = logger;
    }

    public Task StartAsync(CancellationToken cancellationToken)
    {
        _subscription = _watcher.Changes
            .Where(c => c.Section == "RateLimits")
            .Subscribe(OnRateLimitsChanged);

        _config.OnChange(OnRateLimitConfigChanged);

        return Task.CompletedTask;
    }

    private void OnRateLimitsChanged(ConfigurationChange change)
    {
        _logger.LogInformation("Rate limit configuration changed, applying updates");
        ApplyRateLimitChanges();
    }

    private void OnRateLimitConfigChanged(RateLimitConfig config)
    {
        _logger.LogInformation("Rate limit options changed, applying updates");
        ApplyRateLimitChanges();
    }

    private void ApplyRateLimitChanges()
    {
        try
        {
            // Rate limiter will pick up new config from IOptionsMonitor
            // Clear any cached tier information
            if (_rateLimiter is ICacheableRateLimiter cacheable)
            {
                cacheable.ClearCache();
            }

            _logger.LogInformation("Rate limit configuration applied successfully");
        }
        catch (Exception ex)
        {
            _logger.LogError(ex, "Failed to apply rate limit changes");
        }
    }

    public Task StopAsync(CancellationToken cancellationToken)
    {
        _subscription?.Dispose();
        return Task.CompletedTask;
    }
}

public interface ICacheableRateLimiter
{
    void ClearCache();
}

JWKS Hot-Reload

namespace StellaOps.Router.Configuration;

/// <summary>
/// Handles JWKS rotation and cache refresh.
/// </summary>
public sealed class JwksReloader : IHostedService
{
    private readonly IJwksCache _jwksCache;
    private readonly JwtAuthenticationConfig _config;
    private readonly ILogger<JwksReloader> _logger;
    private Timer? _refreshTimer;

    public JwksReloader(
        IJwksCache jwksCache,
        IOptions<JwtAuthenticationConfig> config,
        ILogger<JwksReloader> logger)
    {
        _jwksCache = jwksCache;
        _config = config.Value;
        _logger = logger;
    }

    public Task StartAsync(CancellationToken cancellationToken)
    {
        // Periodic refresh of JWKS
        var interval = _config.JwksRefreshInterval;

        _refreshTimer = new Timer(
            RefreshJwks,
            null,
            interval,
            interval);

        _logger.LogInformation(
            "JWKS refresh scheduled every {Interval}",
            interval);

        return Task.CompletedTask;
    }

    private async void RefreshJwks(object? state)
    {
        try
        {
            _logger.LogDebug("Refreshing JWKS cache");

            await _jwksCache.RefreshAsync(CancellationToken.None);

            _logger.LogDebug("JWKS cache refreshed successfully");
        }
        catch (Exception ex)
        {
            _logger.LogWarning(ex, "Failed to refresh JWKS cache, will retry");
        }
    }

    public Task StopAsync(CancellationToken cancellationToken)
    {
        _refreshTimer?.Dispose();
        return Task.CompletedTask;
    }
}

Configuration Validation

namespace StellaOps.Router.Configuration;

/// <summary>
/// Validates configuration before applying changes.
/// </summary>
public interface IConfigurationValidator
{
    ValidationResult Validate<T>(T config) where T : class;
}

public sealed class ConfigurationValidator : IConfigurationValidator
{
    private readonly ILogger<ConfigurationValidator> _logger;

    public ConfigurationValidator(ILogger<ConfigurationValidator> logger)
    {
        _logger = logger;
    }

    public ValidationResult Validate<T>(T config) where T : class
    {
        var errors = new List<string>();

        // Use data annotations validation
        var context = new ValidationContext(config);
        var results = new List<System.ComponentModel.DataAnnotations.ValidationResult>();

        if (!Validator.TryValidateObject(config, context, results, validateAllProperties: true))
        {
            errors.AddRange(results.Select(r => r.ErrorMessage ?? "Unknown validation error"));
        }

        // Type-specific validation
        errors.AddRange(config switch
        {
            RouterConfig router => ValidateRouterConfig(router),
            RateLimitConfig rateLimit => ValidateRateLimitConfig(rateLimit),
            _ => Enumerable.Empty<string>()
        });

        if (errors.Any())
        {
            _logger.LogWarning(
                "Configuration validation failed: {Errors}",
                string.Join(", ", errors));
        }

        return new ValidationResult
        {
            IsValid = !errors.Any(),
            Errors = errors
        };
    }

    private IEnumerable<string> ValidateRouterConfig(RouterConfig config)
    {
        if (config.MaxPayloadSize <= 0)
            yield return "MaxPayloadSize must be positive";

        if (config.RequestTimeout <= TimeSpan.Zero)
            yield return "RequestTimeout must be positive";
    }

    private IEnumerable<string> ValidateRateLimitConfig(RateLimitConfig config)
    {
        foreach (var (tier, limits) in config.Tiers)
        {
            if (limits.RequestsPerMinute <= 0)
                yield return $"Tier {tier}: RequestsPerMinute must be positive";
        }
    }
}

public sealed class ValidationResult
{
    public bool IsValid { get; init; }
    public IReadOnlyList<string> Errors { get; init; } = Array.Empty<string>();
}

Atomic Configuration Update

namespace StellaOps.Router.Configuration;

/// <summary>
/// Provides atomic configuration updates with rollback support.
/// </summary>
public sealed class AtomicConfigurationUpdater
{
    private readonly IConfigurationValidator _validator;
    private readonly ILogger<AtomicConfigurationUpdater> _logger;
    private readonly ReaderWriterLockSlim _lock = new();

    public AtomicConfigurationUpdater(
        IConfigurationValidator validator,
        ILogger<AtomicConfigurationUpdater> logger)
    {
        _validator = validator;
        _logger = logger;
    }

    /// <summary>
    /// Atomically updates configuration with validation and rollback.
    /// </summary>
    public async Task<bool> UpdateAsync<T>(
        T currentConfig,
        T newConfig,
        Func<T, Task> applyAction,
        Func<T, Task>? rollbackAction = null)
        where T : class
    {
        // Validate new configuration
        var validation = _validator.Validate(newConfig);
        if (!validation.IsValid)
        {
            _logger.LogWarning(
                "Configuration update rejected: {Errors}",
                string.Join(", ", validation.Errors));
            return false;
        }

        _lock.EnterWriteLock();
        try
        {
            // Store current config for rollback
            var backup = currentConfig;

            try
            {
                await applyAction(newConfig);
                _logger.LogInformation("Configuration updated successfully");
                return true;
            }
            catch (Exception ex)
            {
                _logger.LogError(ex, "Configuration update failed, rolling back");

                if (rollbackAction != null)
                {
                    try
                    {
                        await rollbackAction(backup);
                        _logger.LogInformation("Configuration rolled back successfully");
                    }
                    catch (Exception rollbackEx)
                    {
                        _logger.LogError(rollbackEx, "Rollback failed!");
                    }
                }

                return false;
            }
        }
        finally
        {
            _lock.ExitWriteLock();
        }
    }
}

Configuration API Endpoints

namespace StellaOps.Router.Gateway;

/// <summary>
/// API endpoints for configuration management.
/// </summary>
public static class ConfigurationEndpoints
{
    public static IEndpointRouteBuilder MapConfigurationEndpoints(
        this IEndpointRouteBuilder endpoints,
        string basePath = "/api/config")
    {
        var group = endpoints.MapGroup(basePath)
            .RequireAuthorization("admin");

        group.MapGet("/", GetConfiguration);
        group.MapGet("/{section}", GetConfigurationSection);
        group.MapPost("/reload", ReloadConfiguration);
        group.MapPost("/validate", ValidateConfiguration);

        return endpoints;
    }

    private static async Task<IResult> GetConfiguration(
        IConfiguration configuration)
    {
        var sections = new Dictionary<string, object>();

        foreach (var child in configuration.GetChildren())
        {
            sections[child.Key] = GetSectionValue(child);
        }

        return Results.Ok(sections);
    }

    private static object GetSectionValue(IConfigurationSection section)
    {
        var children = section.GetChildren().ToList();

        if (!children.Any())
        {
            return section.Value ?? "";
        }

        if (children.All(c => int.TryParse(c.Key, out _)))
        {
            // Array
            return children.Select(c => GetSectionValue(c)).ToList();
        }

        // Object
        return children.ToDictionary(c => c.Key, c => GetSectionValue(c));
    }

    private static IResult GetConfigurationSection(
        string section,
        IConfiguration configuration)
    {
        var configSection = configuration.GetSection(section);

        if (!configSection.Exists())
        {
            return Results.NotFound(new { error = $"Section '{section}' not found" });
        }

        return Results.Ok(GetSectionValue(configSection));
    }

    private static async Task<IResult> ReloadConfiguration(
        ConfigurationWatcher watcher,
        ILogger<ConfigurationWatcher> logger)
    {
        logger.LogInformation("Manual configuration reload triggered");

        // Trigger reload notification
        // In practice, would re-read configuration files

        return Results.Ok(new { message = "Configuration reload triggered" });
    }

    private static async Task<IResult> ValidateConfiguration(
        HttpRequest request,
        IConfigurationValidator validator)
    {
        var body = await request.ReadFromJsonAsync<Dictionary<string, object>>();

        if (body == null)
        {
            return Results.BadRequest(new { error = "Invalid request body" });
        }

        // Basic syntax validation
        return Results.Ok(new { valid = true });
    }
}

YAML Configuration

Configuration:
  # Enable hot-reload
  HotReload:
    Enabled: true
    DebounceInterval: "00:00:00.500"
    ValidateBeforeApply: true

  # Files to watch
  WatchPaths:
    - "/etc/stellaops/router.yaml"
    - "/etc/stellaops/routes.yaml"
    - "/etc/stellaops/ratelimits.yaml"

  # JWKS refresh settings
  Jwks:
    RefreshInterval: "00:05:00"
    RefreshOnError: true
    MaxRetries: 3

Deliverables

  1. StellaOps.Router.Configuration/ConfigurationWatcher.cs
  2. StellaOps.Router.Configuration/RouteConfigurationReloader.cs
  3. StellaOps.Router.Configuration/RateLimitConfigurationReloader.cs
  4. StellaOps.Router.Configuration/JwksReloader.cs
  5. StellaOps.Router.Configuration/IConfigurationValidator.cs
  6. StellaOps.Router.Configuration/ConfigurationValidator.cs
  7. StellaOps.Router.Configuration/AtomicConfigurationUpdater.cs
  8. StellaOps.Router.Gateway/ConfigurationEndpoints.cs
  9. Configuration reload tests
  10. Validation tests

Next Step

Proceed to Step 26: End-to-End Testing to implement comprehensive integration tests.