using Microsoft.Extensions.Options; using StellaOps.Router.Common.Abstractions; using StellaOps.Router.Common.Enums; using StellaOps.Router.Gateway.Configuration; namespace StellaOps.Gateway.WebService.Services; public sealed class GatewayHealthMonitorService : BackgroundService { private readonly IGlobalRoutingState _routingState; private readonly IOptions _options; private readonly ILogger _logger; public GatewayHealthMonitorService( IGlobalRoutingState routingState, IOptions options, ILogger logger) { _routingState = routingState; _options = options; _logger = logger; } protected override async Task ExecuteAsync(CancellationToken stoppingToken) { _logger.LogInformation( "Health monitor started. Stale threshold: {StaleThreshold}, Check interval: {CheckInterval}", _options.Value.StaleThreshold, _options.Value.CheckInterval); while (!stoppingToken.IsCancellationRequested) { try { await Task.Delay(_options.Value.CheckInterval, stoppingToken); CheckStaleConnections(); } catch (OperationCanceledException) { break; } catch (Exception ex) { _logger.LogError(ex, "Error in health monitor loop"); } } _logger.LogInformation("Health monitor stopped"); } private void CheckStaleConnections() { var staleThreshold = _options.Value.StaleThreshold; var degradedThreshold = _options.Value.DegradedThreshold; var now = DateTime.UtcNow; var staleCount = 0; var degradedCount = 0; foreach (var connection in _routingState.GetAllConnections()) { if (connection.Status == InstanceHealthStatus.Draining) { continue; } var age = now - connection.LastHeartbeatUtc; if (age > staleThreshold && connection.Status != InstanceHealthStatus.Unhealthy) { _routingState.UpdateConnection(connection.ConnectionId, c => c.Status = InstanceHealthStatus.Unhealthy); _logger.LogWarning( "Instance {InstanceId} ({ServiceName}/{Version}) marked Unhealthy: no heartbeat for {Age:g}", connection.Instance.InstanceId, connection.Instance.ServiceName, connection.Instance.Version, age); staleCount++; } else if (age > degradedThreshold && connection.Status == InstanceHealthStatus.Healthy) { _routingState.UpdateConnection(connection.ConnectionId, c => c.Status = InstanceHealthStatus.Degraded); _logger.LogWarning( "Instance {InstanceId} ({ServiceName}/{Version}) marked Degraded: delayed heartbeat ({Age:g})", connection.Instance.InstanceId, connection.Instance.ServiceName, connection.Instance.Version, age); degradedCount++; } } if (staleCount > 0 || degradedCount > 0) { _logger.LogDebug( "Health check completed: {StaleCount} stale, {DegradedCount} degraded", staleCount, degradedCount); } } }