Refactor code structure for improved readability and maintainability; optimize performance in key functions.
This commit is contained in:
@@ -0,0 +1,106 @@
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Router.Common.Abstractions;
|
||||
using StellaOps.Router.Common.Enums;
|
||||
using StellaOps.Router.Gateway.Configuration;
|
||||
|
||||
namespace StellaOps.Gateway.WebService.Services;
|
||||
|
||||
public sealed class GatewayHealthMonitorService : BackgroundService
|
||||
{
|
||||
private readonly IGlobalRoutingState _routingState;
|
||||
private readonly IOptions<HealthOptions> _options;
|
||||
private readonly ILogger<GatewayHealthMonitorService> _logger;
|
||||
|
||||
public GatewayHealthMonitorService(
|
||||
IGlobalRoutingState routingState,
|
||||
IOptions<HealthOptions> options,
|
||||
ILogger<GatewayHealthMonitorService> logger)
|
||||
{
|
||||
_routingState = routingState;
|
||||
_options = options;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Health monitor started. Stale threshold: {StaleThreshold}, Check interval: {CheckInterval}",
|
||||
_options.Value.StaleThreshold,
|
||||
_options.Value.CheckInterval);
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
await Task.Delay(_options.Value.CheckInterval, stoppingToken);
|
||||
CheckStaleConnections();
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
break;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Error in health monitor loop");
|
||||
}
|
||||
}
|
||||
|
||||
_logger.LogInformation("Health monitor stopped");
|
||||
}
|
||||
|
||||
private void CheckStaleConnections()
|
||||
{
|
||||
var staleThreshold = _options.Value.StaleThreshold;
|
||||
var degradedThreshold = _options.Value.DegradedThreshold;
|
||||
var now = DateTime.UtcNow;
|
||||
var staleCount = 0;
|
||||
var degradedCount = 0;
|
||||
|
||||
foreach (var connection in _routingState.GetAllConnections())
|
||||
{
|
||||
if (connection.Status == InstanceHealthStatus.Draining)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var age = now - connection.LastHeartbeatUtc;
|
||||
|
||||
if (age > staleThreshold && connection.Status != InstanceHealthStatus.Unhealthy)
|
||||
{
|
||||
_routingState.UpdateConnection(connection.ConnectionId, c =>
|
||||
c.Status = InstanceHealthStatus.Unhealthy);
|
||||
|
||||
_logger.LogWarning(
|
||||
"Instance {InstanceId} ({ServiceName}/{Version}) marked Unhealthy: no heartbeat for {Age:g}",
|
||||
connection.Instance.InstanceId,
|
||||
connection.Instance.ServiceName,
|
||||
connection.Instance.Version,
|
||||
age);
|
||||
|
||||
staleCount++;
|
||||
}
|
||||
else if (age > degradedThreshold && connection.Status == InstanceHealthStatus.Healthy)
|
||||
{
|
||||
_routingState.UpdateConnection(connection.ConnectionId, c =>
|
||||
c.Status = InstanceHealthStatus.Degraded);
|
||||
|
||||
_logger.LogWarning(
|
||||
"Instance {InstanceId} ({ServiceName}/{Version}) marked Degraded: delayed heartbeat ({Age:g})",
|
||||
connection.Instance.InstanceId,
|
||||
connection.Instance.ServiceName,
|
||||
connection.Instance.Version,
|
||||
age);
|
||||
|
||||
degradedCount++;
|
||||
}
|
||||
}
|
||||
|
||||
if (staleCount > 0 || degradedCount > 0)
|
||||
{
|
||||
_logger.LogDebug(
|
||||
"Health check completed: {StaleCount} stale, {DegradedCount} degraded",
|
||||
staleCount,
|
||||
degradedCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user