Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
- Implemented MigrationCategoryTests to validate migration categorization for startup, release, seed, and data migrations. - Added tests for edge cases, including null, empty, and whitespace migration names. - Created StartupMigrationHostTests to verify the behavior of the migration host with real PostgreSQL instances using Testcontainers. - Included tests for migration execution, schema creation, and handling of pending release migrations. - Added SQL migration files for testing: creating a test table, adding a column, a release migration, and seeding data.
770 lines
21 KiB
Markdown
770 lines
21 KiB
Markdown
# Step 23: Metrics & Health Checks
|
|
|
|
**Phase 6: Observability & Resilience**
|
|
**Estimated Complexity:** Medium
|
|
**Dependencies:** Step 22 (Logging & Tracing)
|
|
|
|
---
|
|
|
|
## Overview
|
|
|
|
Metrics and health checks provide operational visibility into the router and microservices. Prometheus-compatible metrics expose request rates, latencies, error rates, and connection pool status. Health checks enable load balancers and orchestrators to route traffic appropriately.
|
|
|
|
---
|
|
|
|
## Goals
|
|
|
|
1. Expose Prometheus-compatible metrics
|
|
2. Track request/response metrics per endpoint
|
|
3. Monitor transport layer health
|
|
4. Provide liveness and readiness probes
|
|
5. Support custom health check integrations
|
|
|
|
---
|
|
|
|
## Metrics Configuration
|
|
|
|
```csharp
|
|
namespace StellaOps.Router.Common;
|
|
|
|
public class MetricsConfig
|
|
{
|
|
/// <summary>Whether to enable metrics collection.</summary>
|
|
public bool Enabled { get; set; } = true;
|
|
|
|
/// <summary>Path for metrics endpoint.</summary>
|
|
public string Path { get; set; } = "/metrics";
|
|
|
|
/// <summary>Histogram buckets for request duration.</summary>
|
|
public double[] DurationBuckets { get; set; } = new[]
|
|
{
|
|
0.001, 0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 10.0
|
|
};
|
|
|
|
/// <summary>Labels to include in metrics.</summary>
|
|
public HashSet<string> IncludeLabels { get; set; } = new()
|
|
{
|
|
"method", "path", "status_code", "service"
|
|
};
|
|
|
|
/// <summary>Whether to include path in labels (may cause high cardinality).</summary>
|
|
public bool IncludePathLabel { get; set; } = false;
|
|
|
|
/// <summary>Maximum unique path labels before aggregating.</summary>
|
|
public int MaxPathCardinality { get; set; } = 100;
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## Core Metrics
|
|
|
|
```csharp
|
|
namespace StellaOps.Router.Common;
|
|
|
|
/// <summary>
|
|
/// Central metrics registry for Stella Router.
|
|
/// </summary>
|
|
public sealed class StellaMetrics
|
|
{
|
|
// Request metrics
|
|
public static readonly Counter<long> RequestsTotal = Meter.CreateCounter<long>(
|
|
"stella_requests_total",
|
|
description: "Total number of requests processed");
|
|
|
|
public static readonly Histogram<double> RequestDuration = Meter.CreateHistogram<double>(
|
|
"stella_request_duration_seconds",
|
|
unit: "s",
|
|
description: "Request processing duration in seconds");
|
|
|
|
public static readonly Counter<long> RequestErrors = Meter.CreateCounter<long>(
|
|
"stella_request_errors_total",
|
|
description: "Total number of request errors");
|
|
|
|
// Transport metrics
|
|
public static readonly UpDownCounter<int> ActiveConnections = Meter.CreateUpDownCounter<int>(
|
|
"stella_active_connections",
|
|
description: "Number of active transport connections");
|
|
|
|
public static readonly Counter<long> ConnectionsTotal = Meter.CreateCounter<long>(
|
|
"stella_connections_total",
|
|
description: "Total number of transport connections");
|
|
|
|
public static readonly Counter<long> FramesSent = Meter.CreateCounter<long>(
|
|
"stella_frames_sent_total",
|
|
description: "Total number of frames sent");
|
|
|
|
public static readonly Counter<long> FramesReceived = Meter.CreateCounter<long>(
|
|
"stella_frames_received_total",
|
|
description: "Total number of frames received");
|
|
|
|
public static readonly Counter<long> BytesSent = Meter.CreateCounter<long>(
|
|
"stella_bytes_sent_total",
|
|
unit: "By",
|
|
description: "Total bytes sent");
|
|
|
|
public static readonly Counter<long> BytesReceived = Meter.CreateCounter<long>(
|
|
"stella_bytes_received_total",
|
|
unit: "By",
|
|
description: "Total bytes received");
|
|
|
|
// Rate limiting metrics
|
|
public static readonly Counter<long> RateLimitHits = Meter.CreateCounter<long>(
|
|
"stella_rate_limit_hits_total",
|
|
description: "Number of requests that hit rate limits");
|
|
|
|
public static readonly Gauge<int> RateLimitBuckets = Meter.CreateGauge<int>(
|
|
"stella_rate_limit_buckets",
|
|
description: "Number of active rate limit buckets");
|
|
|
|
// Auth metrics
|
|
public static readonly Counter<long> AuthSuccesses = Meter.CreateCounter<long>(
|
|
"stella_auth_success_total",
|
|
description: "Number of successful authentications");
|
|
|
|
public static readonly Counter<long> AuthFailures = Meter.CreateCounter<long>(
|
|
"stella_auth_failures_total",
|
|
description: "Number of failed authentications");
|
|
|
|
// Circuit breaker metrics
|
|
public static readonly Gauge<int> CircuitBreakerState = Meter.CreateGauge<int>(
|
|
"stella_circuit_breaker_state",
|
|
description: "Circuit breaker state (0=closed, 1=half-open, 2=open)");
|
|
|
|
private static readonly Meter Meter = new("StellaOps.Router", "1.0.0");
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## Request Metrics Middleware
|
|
|
|
```csharp
|
|
namespace StellaOps.Router.Gateway;
|
|
|
|
/// <summary>
|
|
/// Middleware to collect request metrics.
|
|
/// </summary>
|
|
public sealed class MetricsMiddleware
|
|
{
|
|
private readonly RequestDelegate _next;
|
|
private readonly MetricsConfig _config;
|
|
private readonly PathNormalizer _pathNormalizer;
|
|
|
|
public MetricsMiddleware(
|
|
RequestDelegate next,
|
|
IOptions<MetricsConfig> config)
|
|
{
|
|
_next = next;
|
|
_config = config.Value;
|
|
_pathNormalizer = new PathNormalizer(_config.MaxPathCardinality);
|
|
}
|
|
|
|
public async Task InvokeAsync(HttpContext context)
|
|
{
|
|
if (!_config.Enabled)
|
|
{
|
|
await _next(context);
|
|
return;
|
|
}
|
|
|
|
var sw = Stopwatch.StartNew();
|
|
var method = context.Request.Method;
|
|
var path = _config.IncludePathLabel
|
|
? _pathNormalizer.Normalize(context.Request.Path)
|
|
: "aggregated";
|
|
|
|
try
|
|
{
|
|
await _next(context);
|
|
}
|
|
finally
|
|
{
|
|
sw.Stop();
|
|
|
|
var tags = new TagList
|
|
{
|
|
{ "method", method },
|
|
{ "status_code", context.Response.StatusCode.ToString() }
|
|
};
|
|
|
|
if (_config.IncludePathLabel)
|
|
{
|
|
tags.Add("path", path);
|
|
}
|
|
|
|
StellaMetrics.RequestsTotal.Add(1, tags);
|
|
StellaMetrics.RequestDuration.Record(sw.Elapsed.TotalSeconds, tags);
|
|
|
|
if (context.Response.StatusCode >= 400)
|
|
{
|
|
StellaMetrics.RequestErrors.Add(1, tags);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Normalizes paths to prevent high cardinality.
|
|
/// </summary>
|
|
internal sealed class PathNormalizer
|
|
{
|
|
private readonly int _maxCardinality;
|
|
private readonly ConcurrentDictionary<string, string> _pathCache = new();
|
|
private int _uniquePaths;
|
|
|
|
public PathNormalizer(int maxCardinality)
|
|
{
|
|
_maxCardinality = maxCardinality;
|
|
}
|
|
|
|
public string Normalize(string path)
|
|
{
|
|
if (_pathCache.TryGetValue(path, out var normalized))
|
|
return normalized;
|
|
|
|
// Replace path parameters with placeholders
|
|
var segments = path.Split('/');
|
|
for (int i = 0; i < segments.Length; i++)
|
|
{
|
|
if (Guid.TryParse(segments[i], out _) ||
|
|
int.TryParse(segments[i], out _) ||
|
|
segments[i].Length > 20)
|
|
{
|
|
segments[i] = "{id}";
|
|
}
|
|
}
|
|
|
|
normalized = string.Join("/", segments);
|
|
|
|
if (Interlocked.Increment(ref _uniquePaths) <= _maxCardinality)
|
|
{
|
|
_pathCache[path] = normalized;
|
|
}
|
|
else
|
|
{
|
|
normalized = "other";
|
|
}
|
|
|
|
return normalized;
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## Transport Metrics
|
|
|
|
```csharp
|
|
namespace StellaOps.Router.Transport;
|
|
|
|
/// <summary>
|
|
/// Collects metrics for transport layer operations.
|
|
/// </summary>
|
|
public sealed class TransportMetricsCollector
|
|
{
|
|
public void RecordConnectionOpened(string transport, string serviceName)
|
|
{
|
|
var tags = new TagList
|
|
{
|
|
{ "transport", transport },
|
|
{ "service", serviceName }
|
|
};
|
|
|
|
StellaMetrics.ConnectionsTotal.Add(1, tags);
|
|
StellaMetrics.ActiveConnections.Add(1, tags);
|
|
}
|
|
|
|
public void RecordConnectionClosed(string transport, string serviceName)
|
|
{
|
|
var tags = new TagList
|
|
{
|
|
{ "transport", transport },
|
|
{ "service", serviceName }
|
|
};
|
|
|
|
StellaMetrics.ActiveConnections.Add(-1, tags);
|
|
}
|
|
|
|
public void RecordFrameSent(string transport, FrameType type, int bytes)
|
|
{
|
|
var tags = new TagList
|
|
{
|
|
{ "transport", transport },
|
|
{ "frame_type", type.ToString() }
|
|
};
|
|
|
|
StellaMetrics.FramesSent.Add(1, tags);
|
|
StellaMetrics.BytesSent.Add(bytes, new TagList { { "transport", transport } });
|
|
}
|
|
|
|
public void RecordFrameReceived(string transport, FrameType type, int bytes)
|
|
{
|
|
var tags = new TagList
|
|
{
|
|
{ "transport", transport },
|
|
{ "frame_type", type.ToString() }
|
|
};
|
|
|
|
StellaMetrics.FramesReceived.Add(1, tags);
|
|
StellaMetrics.BytesReceived.Add(bytes, new TagList { { "transport", transport } });
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## Health Check System
|
|
|
|
```csharp
|
|
namespace StellaOps.Router.Common;
|
|
|
|
/// <summary>
|
|
/// Health check result.
|
|
/// </summary>
|
|
public sealed class HealthCheckResult
|
|
{
|
|
public HealthStatus Status { get; init; }
|
|
public string? Description { get; init; }
|
|
public TimeSpan Duration { get; init; }
|
|
public IReadOnlyDictionary<string, object>? Data { get; init; }
|
|
public Exception? Exception { get; init; }
|
|
}
|
|
|
|
public enum HealthStatus
|
|
{
|
|
Healthy,
|
|
Degraded,
|
|
Unhealthy
|
|
}
|
|
|
|
/// <summary>
|
|
/// Health check interface.
|
|
/// </summary>
|
|
public interface IHealthCheck
|
|
{
|
|
string Name { get; }
|
|
Task<HealthCheckResult> CheckAsync(CancellationToken cancellationToken);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Aggregates multiple health checks.
|
|
/// </summary>
|
|
public sealed class HealthCheckService
|
|
{
|
|
private readonly IEnumerable<IHealthCheck> _checks;
|
|
private readonly ILogger<HealthCheckService> _logger;
|
|
|
|
public HealthCheckService(
|
|
IEnumerable<IHealthCheck> checks,
|
|
ILogger<HealthCheckService> logger)
|
|
{
|
|
_checks = checks;
|
|
_logger = logger;
|
|
}
|
|
|
|
public async Task<HealthReport> CheckHealthAsync(CancellationToken cancellationToken)
|
|
{
|
|
var results = new Dictionary<string, HealthCheckResult>();
|
|
var overallStatus = HealthStatus.Healthy;
|
|
|
|
foreach (var check in _checks)
|
|
{
|
|
var sw = Stopwatch.StartNew();
|
|
|
|
try
|
|
{
|
|
var result = await check.CheckAsync(cancellationToken);
|
|
result = result with { Duration = sw.Elapsed };
|
|
results[check.Name] = result;
|
|
|
|
if (result.Status > overallStatus)
|
|
{
|
|
overallStatus = result.Status;
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex, "Health check {Name} failed", check.Name);
|
|
results[check.Name] = new HealthCheckResult
|
|
{
|
|
Status = HealthStatus.Unhealthy,
|
|
Description = ex.Message,
|
|
Duration = sw.Elapsed,
|
|
Exception = ex
|
|
};
|
|
overallStatus = HealthStatus.Unhealthy;
|
|
}
|
|
}
|
|
|
|
return new HealthReport
|
|
{
|
|
Status = overallStatus,
|
|
Checks = results,
|
|
TotalDuration = results.Values.Sum(r => r.Duration.TotalMilliseconds)
|
|
};
|
|
}
|
|
}
|
|
|
|
public sealed class HealthReport
|
|
{
|
|
public HealthStatus Status { get; init; }
|
|
public IReadOnlyDictionary<string, HealthCheckResult> Checks { get; init; } = new Dictionary<string, HealthCheckResult>();
|
|
public double TotalDuration { get; init; }
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## Built-in Health Checks
|
|
|
|
```csharp
|
|
namespace StellaOps.Router.Gateway;
|
|
|
|
/// <summary>
|
|
/// Checks that at least one transport connection is active.
|
|
/// </summary>
|
|
public sealed class TransportHealthCheck : IHealthCheck
|
|
{
|
|
private readonly IGlobalRoutingState _routingState;
|
|
|
|
public string Name => "transport";
|
|
|
|
public TransportHealthCheck(IGlobalRoutingState routingState)
|
|
{
|
|
_routingState = routingState;
|
|
}
|
|
|
|
public Task<HealthCheckResult> CheckAsync(CancellationToken cancellationToken)
|
|
{
|
|
var connections = _routingState.GetAllConnections();
|
|
var activeCount = connections.Count(c => c.State == ConnectionState.Connected);
|
|
|
|
if (activeCount == 0)
|
|
{
|
|
return Task.FromResult(new HealthCheckResult
|
|
{
|
|
Status = HealthStatus.Unhealthy,
|
|
Description = "No active transport connections",
|
|
Data = new Dictionary<string, object> { ["connections"] = 0 }
|
|
});
|
|
}
|
|
|
|
return Task.FromResult(new HealthCheckResult
|
|
{
|
|
Status = HealthStatus.Healthy,
|
|
Description = $"{activeCount} active connections",
|
|
Data = new Dictionary<string, object> { ["connections"] = activeCount }
|
|
});
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Checks Authority service connectivity.
|
|
/// </summary>
|
|
public sealed class AuthorityHealthCheck : IHealthCheck
|
|
{
|
|
private readonly IAuthorityClient _authority;
|
|
private readonly TimeSpan _timeout;
|
|
|
|
public string Name => "authority";
|
|
|
|
public AuthorityHealthCheck(
|
|
IAuthorityClient authority,
|
|
IOptions<AuthorityConfig> config)
|
|
{
|
|
_authority = authority;
|
|
_timeout = config.Value.HealthCheckTimeout;
|
|
}
|
|
|
|
public async Task<HealthCheckResult> CheckAsync(CancellationToken cancellationToken)
|
|
{
|
|
try
|
|
{
|
|
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
|
cts.CancelAfter(_timeout);
|
|
|
|
var isHealthy = await _authority.CheckHealthAsync(cts.Token);
|
|
|
|
return new HealthCheckResult
|
|
{
|
|
Status = isHealthy ? HealthStatus.Healthy : HealthStatus.Degraded,
|
|
Description = isHealthy ? "Authority is responsive" : "Authority returned unhealthy"
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
return new HealthCheckResult
|
|
{
|
|
Status = HealthStatus.Degraded, // Degraded, not unhealthy - gateway can still work
|
|
Description = $"Authority unreachable: {ex.Message}",
|
|
Exception = ex
|
|
};
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Checks rate limiter backend connectivity.
|
|
/// </summary>
|
|
public sealed class RateLimiterHealthCheck : IHealthCheck
|
|
{
|
|
private readonly IRateLimiter _rateLimiter;
|
|
|
|
public string Name => "rate_limiter";
|
|
|
|
public RateLimiterHealthCheck(IRateLimiter rateLimiter)
|
|
{
|
|
_rateLimiter = rateLimiter;
|
|
}
|
|
|
|
public async Task<HealthCheckResult> CheckAsync(CancellationToken cancellationToken)
|
|
{
|
|
try
|
|
{
|
|
// Try a simple operation
|
|
await _rateLimiter.CheckLimitAsync(
|
|
new RateLimitContext { Key = "__health_check__", Tier = RateLimitTier.Free },
|
|
cancellationToken);
|
|
|
|
return new HealthCheckResult
|
|
{
|
|
Status = HealthStatus.Healthy,
|
|
Description = "Rate limiter is responsive"
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
return new HealthCheckResult
|
|
{
|
|
Status = HealthStatus.Degraded,
|
|
Description = $"Rate limiter error: {ex.Message}",
|
|
Exception = ex
|
|
};
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## Health Endpoints
|
|
|
|
```csharp
|
|
namespace StellaOps.Router.Gateway;
|
|
|
|
/// <summary>
|
|
/// Health check endpoints.
|
|
/// </summary>
|
|
public static class HealthEndpoints
|
|
{
|
|
public static IEndpointRouteBuilder MapHealthEndpoints(
|
|
this IEndpointRouteBuilder endpoints,
|
|
string basePath = "/health")
|
|
{
|
|
endpoints.MapGet(basePath + "/live", LivenessCheck);
|
|
endpoints.MapGet(basePath + "/ready", ReadinessCheck);
|
|
endpoints.MapGet(basePath, DetailedHealthCheck);
|
|
|
|
return endpoints;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Liveness probe - is the process running?
|
|
/// </summary>
|
|
private static IResult LivenessCheck()
|
|
{
|
|
return Results.Ok(new { status = "alive" });
|
|
}
|
|
|
|
/// <summary>
|
|
/// Readiness probe - can the service accept traffic?
|
|
/// </summary>
|
|
private static async Task<IResult> ReadinessCheck(
|
|
HealthCheckService healthService,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
var report = await healthService.CheckHealthAsync(cancellationToken);
|
|
|
|
return report.Status == HealthStatus.Unhealthy
|
|
? Results.Json(new
|
|
{
|
|
status = "not_ready",
|
|
checks = report.Checks.ToDictionary(c => c.Key, c => c.Value.Status.ToString())
|
|
}, statusCode: 503)
|
|
: Results.Ok(new { status = "ready" });
|
|
}
|
|
|
|
/// <summary>
|
|
/// Detailed health report.
|
|
/// </summary>
|
|
private static async Task<IResult> DetailedHealthCheck(
|
|
HealthCheckService healthService,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
var report = await healthService.CheckHealthAsync(cancellationToken);
|
|
|
|
var response = new
|
|
{
|
|
status = report.Status.ToString().ToLower(),
|
|
totalDuration = $"{report.TotalDuration:F2}ms",
|
|
checks = report.Checks.ToDictionary(c => c.Key, c => new
|
|
{
|
|
status = c.Value.Status.ToString().ToLower(),
|
|
description = c.Value.Description,
|
|
duration = $"{c.Value.Duration.TotalMilliseconds:F2}ms",
|
|
data = c.Value.Data
|
|
})
|
|
};
|
|
|
|
var statusCode = report.Status switch
|
|
{
|
|
HealthStatus.Healthy => 200,
|
|
HealthStatus.Degraded => 200, // Still return 200 for degraded
|
|
HealthStatus.Unhealthy => 503,
|
|
_ => 200
|
|
};
|
|
|
|
return Results.Json(response, statusCode: statusCode);
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## Prometheus Metrics Endpoint
|
|
|
|
```csharp
|
|
namespace StellaOps.Router.Gateway;
|
|
|
|
/// <summary>
|
|
/// Exposes metrics in Prometheus format.
|
|
/// </summary>
|
|
public sealed class PrometheusMetricsEndpoint
|
|
{
|
|
public static void Map(IEndpointRouteBuilder endpoints, string path = "/metrics")
|
|
{
|
|
endpoints.MapGet(path, async (HttpContext context) =>
|
|
{
|
|
var exporter = context.RequestServices.GetRequiredService<PrometheusExporter>();
|
|
var metrics = await exporter.ExportAsync();
|
|
|
|
context.Response.ContentType = "text/plain; version=0.0.4";
|
|
await context.Response.WriteAsync(metrics);
|
|
});
|
|
}
|
|
}
|
|
|
|
public sealed class PrometheusExporter
|
|
{
|
|
private readonly MeterProvider _meterProvider;
|
|
|
|
public PrometheusExporter(MeterProvider meterProvider)
|
|
{
|
|
_meterProvider = meterProvider;
|
|
}
|
|
|
|
public Task<string> ExportAsync()
|
|
{
|
|
// Use OpenTelemetry's Prometheus exporter
|
|
// This is a simplified example
|
|
var sb = new StringBuilder();
|
|
|
|
// Export would iterate over all registered metrics
|
|
// Real implementation uses OpenTelemetry.Exporter.Prometheus
|
|
|
|
return Task.FromResult(sb.ToString());
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## Service Registration
|
|
|
|
```csharp
|
|
namespace StellaOps.Router.Gateway;
|
|
|
|
public static class MetricsExtensions
|
|
{
|
|
public static IServiceCollection AddStellaMetrics(
|
|
this IServiceCollection services,
|
|
IConfiguration configuration)
|
|
{
|
|
services.Configure<MetricsConfig>(configuration.GetSection("Metrics"));
|
|
|
|
services.AddOpenTelemetry()
|
|
.WithMetrics(builder =>
|
|
{
|
|
builder
|
|
.AddMeter("StellaOps.Router")
|
|
.AddAspNetCoreInstrumentation()
|
|
.AddPrometheusExporter();
|
|
});
|
|
|
|
return services;
|
|
}
|
|
|
|
public static IServiceCollection AddStellaHealthChecks(
|
|
this IServiceCollection services)
|
|
{
|
|
services.AddSingleton<HealthCheckService>();
|
|
services.AddSingleton<IHealthCheck, TransportHealthCheck>();
|
|
services.AddSingleton<IHealthCheck, AuthorityHealthCheck>();
|
|
services.AddSingleton<IHealthCheck, RateLimiterHealthCheck>();
|
|
|
|
return services;
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## YAML Configuration
|
|
|
|
```yaml
|
|
Metrics:
|
|
Enabled: true
|
|
Path: "/metrics"
|
|
IncludePathLabel: false
|
|
MaxPathCardinality: 100
|
|
DurationBuckets:
|
|
- 0.005
|
|
- 0.01
|
|
- 0.025
|
|
- 0.05
|
|
- 0.1
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2.5
|
|
- 5
|
|
- 10
|
|
|
|
HealthChecks:
|
|
Enabled: true
|
|
Path: "/health"
|
|
CacheDuration: "00:00:05"
|
|
```
|
|
|
|
---
|
|
|
|
## Deliverables
|
|
|
|
1. `StellaOps.Router.Common/StellaMetrics.cs`
|
|
2. `StellaOps.Router.Gateway/MetricsMiddleware.cs`
|
|
3. `StellaOps.Router.Transport/TransportMetricsCollector.cs`
|
|
4. `StellaOps.Router.Common/HealthCheckService.cs`
|
|
5. `StellaOps.Router.Gateway/TransportHealthCheck.cs`
|
|
6. `StellaOps.Router.Gateway/AuthorityHealthCheck.cs`
|
|
7. `StellaOps.Router.Gateway/HealthEndpoints.cs`
|
|
8. `StellaOps.Router.Gateway/PrometheusMetricsEndpoint.cs`
|
|
9. Metrics collection tests
|
|
10. Health check tests
|
|
|
|
---
|
|
|
|
## Next Step
|
|
|
|
Proceed to [Step 24: Circuit Breaker & Retry Policies](24-Step.md) to implement resilience patterns.
|