Add integration tests for migration categories and execution
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
- Implemented MigrationCategoryTests to validate migration categorization for startup, release, seed, and data migrations. - Added tests for edge cases, including null, empty, and whitespace migration names. - Created StartupMigrationHostTests to verify the behavior of the migration host with real PostgreSQL instances using Testcontainers. - Included tests for migration execution, schema creation, and handling of pending release migrations. - Added SQL migration files for testing: creating a test table, adding a column, a release migration, and seeding data.
This commit is contained in:
769
docs/router/23-Step.md
Normal file
769
docs/router/23-Step.md
Normal file
@@ -0,0 +1,769 @@
|
||||
# Step 23: Metrics & Health Checks
|
||||
|
||||
**Phase 6: Observability & Resilience**
|
||||
**Estimated Complexity:** Medium
|
||||
**Dependencies:** Step 22 (Logging & Tracing)
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Metrics and health checks provide operational visibility into the router and microservices. Prometheus-compatible metrics expose request rates, latencies, error rates, and connection pool status. Health checks enable load balancers and orchestrators to route traffic appropriately.
|
||||
|
||||
---
|
||||
|
||||
## Goals
|
||||
|
||||
1. Expose Prometheus-compatible metrics
|
||||
2. Track request/response metrics per endpoint
|
||||
3. Monitor transport layer health
|
||||
4. Provide liveness and readiness probes
|
||||
5. Support custom health check integrations
|
||||
|
||||
---
|
||||
|
||||
## Metrics Configuration
|
||||
|
||||
```csharp
|
||||
namespace StellaOps.Router.Common;
|
||||
|
||||
public class MetricsConfig
|
||||
{
|
||||
/// <summary>Whether to enable metrics collection.</summary>
|
||||
public bool Enabled { get; set; } = true;
|
||||
|
||||
/// <summary>Path for metrics endpoint.</summary>
|
||||
public string Path { get; set; } = "/metrics";
|
||||
|
||||
/// <summary>Histogram buckets for request duration.</summary>
|
||||
public double[] DurationBuckets { get; set; } = new[]
|
||||
{
|
||||
0.001, 0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 10.0
|
||||
};
|
||||
|
||||
/// <summary>Labels to include in metrics.</summary>
|
||||
public HashSet<string> IncludeLabels { get; set; } = new()
|
||||
{
|
||||
"method", "path", "status_code", "service"
|
||||
};
|
||||
|
||||
/// <summary>Whether to include path in labels (may cause high cardinality).</summary>
|
||||
public bool IncludePathLabel { get; set; } = false;
|
||||
|
||||
/// <summary>Maximum unique path labels before aggregating.</summary>
|
||||
public int MaxPathCardinality { get; set; } = 100;
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Core Metrics
|
||||
|
||||
```csharp
|
||||
namespace StellaOps.Router.Common;
|
||||
|
||||
/// <summary>
|
||||
/// Central metrics registry for Stella Router.
|
||||
/// </summary>
|
||||
public sealed class StellaMetrics
|
||||
{
|
||||
// Request metrics
|
||||
public static readonly Counter<long> RequestsTotal = Meter.CreateCounter<long>(
|
||||
"stella_requests_total",
|
||||
description: "Total number of requests processed");
|
||||
|
||||
public static readonly Histogram<double> RequestDuration = Meter.CreateHistogram<double>(
|
||||
"stella_request_duration_seconds",
|
||||
unit: "s",
|
||||
description: "Request processing duration in seconds");
|
||||
|
||||
public static readonly Counter<long> RequestErrors = Meter.CreateCounter<long>(
|
||||
"stella_request_errors_total",
|
||||
description: "Total number of request errors");
|
||||
|
||||
// Transport metrics
|
||||
public static readonly UpDownCounter<int> ActiveConnections = Meter.CreateUpDownCounter<int>(
|
||||
"stella_active_connections",
|
||||
description: "Number of active transport connections");
|
||||
|
||||
public static readonly Counter<long> ConnectionsTotal = Meter.CreateCounter<long>(
|
||||
"stella_connections_total",
|
||||
description: "Total number of transport connections");
|
||||
|
||||
public static readonly Counter<long> FramesSent = Meter.CreateCounter<long>(
|
||||
"stella_frames_sent_total",
|
||||
description: "Total number of frames sent");
|
||||
|
||||
public static readonly Counter<long> FramesReceived = Meter.CreateCounter<long>(
|
||||
"stella_frames_received_total",
|
||||
description: "Total number of frames received");
|
||||
|
||||
public static readonly Counter<long> BytesSent = Meter.CreateCounter<long>(
|
||||
"stella_bytes_sent_total",
|
||||
unit: "By",
|
||||
description: "Total bytes sent");
|
||||
|
||||
public static readonly Counter<long> BytesReceived = Meter.CreateCounter<long>(
|
||||
"stella_bytes_received_total",
|
||||
unit: "By",
|
||||
description: "Total bytes received");
|
||||
|
||||
// Rate limiting metrics
|
||||
public static readonly Counter<long> RateLimitHits = Meter.CreateCounter<long>(
|
||||
"stella_rate_limit_hits_total",
|
||||
description: "Number of requests that hit rate limits");
|
||||
|
||||
public static readonly Gauge<int> RateLimitBuckets = Meter.CreateGauge<int>(
|
||||
"stella_rate_limit_buckets",
|
||||
description: "Number of active rate limit buckets");
|
||||
|
||||
// Auth metrics
|
||||
public static readonly Counter<long> AuthSuccesses = Meter.CreateCounter<long>(
|
||||
"stella_auth_success_total",
|
||||
description: "Number of successful authentications");
|
||||
|
||||
public static readonly Counter<long> AuthFailures = Meter.CreateCounter<long>(
|
||||
"stella_auth_failures_total",
|
||||
description: "Number of failed authentications");
|
||||
|
||||
// Circuit breaker metrics
|
||||
public static readonly Gauge<int> CircuitBreakerState = Meter.CreateGauge<int>(
|
||||
"stella_circuit_breaker_state",
|
||||
description: "Circuit breaker state (0=closed, 1=half-open, 2=open)");
|
||||
|
||||
private static readonly Meter Meter = new("StellaOps.Router", "1.0.0");
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Request Metrics Middleware
|
||||
|
||||
```csharp
|
||||
namespace StellaOps.Router.Gateway;
|
||||
|
||||
/// <summary>
|
||||
/// Middleware to collect request metrics.
|
||||
/// </summary>
|
||||
public sealed class MetricsMiddleware
|
||||
{
|
||||
private readonly RequestDelegate _next;
|
||||
private readonly MetricsConfig _config;
|
||||
private readonly PathNormalizer _pathNormalizer;
|
||||
|
||||
public MetricsMiddleware(
|
||||
RequestDelegate next,
|
||||
IOptions<MetricsConfig> config)
|
||||
{
|
||||
_next = next;
|
||||
_config = config.Value;
|
||||
_pathNormalizer = new PathNormalizer(_config.MaxPathCardinality);
|
||||
}
|
||||
|
||||
public async Task InvokeAsync(HttpContext context)
|
||||
{
|
||||
if (!_config.Enabled)
|
||||
{
|
||||
await _next(context);
|
||||
return;
|
||||
}
|
||||
|
||||
var sw = Stopwatch.StartNew();
|
||||
var method = context.Request.Method;
|
||||
var path = _config.IncludePathLabel
|
||||
? _pathNormalizer.Normalize(context.Request.Path)
|
||||
: "aggregated";
|
||||
|
||||
try
|
||||
{
|
||||
await _next(context);
|
||||
}
|
||||
finally
|
||||
{
|
||||
sw.Stop();
|
||||
|
||||
var tags = new TagList
|
||||
{
|
||||
{ "method", method },
|
||||
{ "status_code", context.Response.StatusCode.ToString() }
|
||||
};
|
||||
|
||||
if (_config.IncludePathLabel)
|
||||
{
|
||||
tags.Add("path", path);
|
||||
}
|
||||
|
||||
StellaMetrics.RequestsTotal.Add(1, tags);
|
||||
StellaMetrics.RequestDuration.Record(sw.Elapsed.TotalSeconds, tags);
|
||||
|
||||
if (context.Response.StatusCode >= 400)
|
||||
{
|
||||
StellaMetrics.RequestErrors.Add(1, tags);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes paths to prevent high cardinality.
|
||||
/// </summary>
|
||||
internal sealed class PathNormalizer
|
||||
{
|
||||
private readonly int _maxCardinality;
|
||||
private readonly ConcurrentDictionary<string, string> _pathCache = new();
|
||||
private int _uniquePaths;
|
||||
|
||||
public PathNormalizer(int maxCardinality)
|
||||
{
|
||||
_maxCardinality = maxCardinality;
|
||||
}
|
||||
|
||||
public string Normalize(string path)
|
||||
{
|
||||
if (_pathCache.TryGetValue(path, out var normalized))
|
||||
return normalized;
|
||||
|
||||
// Replace path parameters with placeholders
|
||||
var segments = path.Split('/');
|
||||
for (int i = 0; i < segments.Length; i++)
|
||||
{
|
||||
if (Guid.TryParse(segments[i], out _) ||
|
||||
int.TryParse(segments[i], out _) ||
|
||||
segments[i].Length > 20)
|
||||
{
|
||||
segments[i] = "{id}";
|
||||
}
|
||||
}
|
||||
|
||||
normalized = string.Join("/", segments);
|
||||
|
||||
if (Interlocked.Increment(ref _uniquePaths) <= _maxCardinality)
|
||||
{
|
||||
_pathCache[path] = normalized;
|
||||
}
|
||||
else
|
||||
{
|
||||
normalized = "other";
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Transport Metrics
|
||||
|
||||
```csharp
|
||||
namespace StellaOps.Router.Transport;
|
||||
|
||||
/// <summary>
|
||||
/// Collects metrics for transport layer operations.
|
||||
/// </summary>
|
||||
public sealed class TransportMetricsCollector
|
||||
{
|
||||
public void RecordConnectionOpened(string transport, string serviceName)
|
||||
{
|
||||
var tags = new TagList
|
||||
{
|
||||
{ "transport", transport },
|
||||
{ "service", serviceName }
|
||||
};
|
||||
|
||||
StellaMetrics.ConnectionsTotal.Add(1, tags);
|
||||
StellaMetrics.ActiveConnections.Add(1, tags);
|
||||
}
|
||||
|
||||
public void RecordConnectionClosed(string transport, string serviceName)
|
||||
{
|
||||
var tags = new TagList
|
||||
{
|
||||
{ "transport", transport },
|
||||
{ "service", serviceName }
|
||||
};
|
||||
|
||||
StellaMetrics.ActiveConnections.Add(-1, tags);
|
||||
}
|
||||
|
||||
public void RecordFrameSent(string transport, FrameType type, int bytes)
|
||||
{
|
||||
var tags = new TagList
|
||||
{
|
||||
{ "transport", transport },
|
||||
{ "frame_type", type.ToString() }
|
||||
};
|
||||
|
||||
StellaMetrics.FramesSent.Add(1, tags);
|
||||
StellaMetrics.BytesSent.Add(bytes, new TagList { { "transport", transport } });
|
||||
}
|
||||
|
||||
public void RecordFrameReceived(string transport, FrameType type, int bytes)
|
||||
{
|
||||
var tags = new TagList
|
||||
{
|
||||
{ "transport", transport },
|
||||
{ "frame_type", type.ToString() }
|
||||
};
|
||||
|
||||
StellaMetrics.FramesReceived.Add(1, tags);
|
||||
StellaMetrics.BytesReceived.Add(bytes, new TagList { { "transport", transport } });
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Health Check System
|
||||
|
||||
```csharp
|
||||
namespace StellaOps.Router.Common;
|
||||
|
||||
/// <summary>
|
||||
/// Health check result.
|
||||
/// </summary>
|
||||
public sealed class HealthCheckResult
|
||||
{
|
||||
public HealthStatus Status { get; init; }
|
||||
public string? Description { get; init; }
|
||||
public TimeSpan Duration { get; init; }
|
||||
public IReadOnlyDictionary<string, object>? Data { get; init; }
|
||||
public Exception? Exception { get; init; }
|
||||
}
|
||||
|
||||
public enum HealthStatus
|
||||
{
|
||||
Healthy,
|
||||
Degraded,
|
||||
Unhealthy
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Health check interface.
|
||||
/// </summary>
|
||||
public interface IHealthCheck
|
||||
{
|
||||
string Name { get; }
|
||||
Task<HealthCheckResult> CheckAsync(CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Aggregates multiple health checks.
|
||||
/// </summary>
|
||||
public sealed class HealthCheckService
|
||||
{
|
||||
private readonly IEnumerable<IHealthCheck> _checks;
|
||||
private readonly ILogger<HealthCheckService> _logger;
|
||||
|
||||
public HealthCheckService(
|
||||
IEnumerable<IHealthCheck> checks,
|
||||
ILogger<HealthCheckService> logger)
|
||||
{
|
||||
_checks = checks;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<HealthReport> CheckHealthAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var results = new Dictionary<string, HealthCheckResult>();
|
||||
var overallStatus = HealthStatus.Healthy;
|
||||
|
||||
foreach (var check in _checks)
|
||||
{
|
||||
var sw = Stopwatch.StartNew();
|
||||
|
||||
try
|
||||
{
|
||||
var result = await check.CheckAsync(cancellationToken);
|
||||
result = result with { Duration = sw.Elapsed };
|
||||
results[check.Name] = result;
|
||||
|
||||
if (result.Status > overallStatus)
|
||||
{
|
||||
overallStatus = result.Status;
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Health check {Name} failed", check.Name);
|
||||
results[check.Name] = new HealthCheckResult
|
||||
{
|
||||
Status = HealthStatus.Unhealthy,
|
||||
Description = ex.Message,
|
||||
Duration = sw.Elapsed,
|
||||
Exception = ex
|
||||
};
|
||||
overallStatus = HealthStatus.Unhealthy;
|
||||
}
|
||||
}
|
||||
|
||||
return new HealthReport
|
||||
{
|
||||
Status = overallStatus,
|
||||
Checks = results,
|
||||
TotalDuration = results.Values.Sum(r => r.Duration.TotalMilliseconds)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
public sealed class HealthReport
|
||||
{
|
||||
public HealthStatus Status { get; init; }
|
||||
public IReadOnlyDictionary<string, HealthCheckResult> Checks { get; init; } = new Dictionary<string, HealthCheckResult>();
|
||||
public double TotalDuration { get; init; }
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Built-in Health Checks
|
||||
|
||||
```csharp
|
||||
namespace StellaOps.Router.Gateway;
|
||||
|
||||
/// <summary>
|
||||
/// Checks that at least one transport connection is active.
|
||||
/// </summary>
|
||||
public sealed class TransportHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly IGlobalRoutingState _routingState;
|
||||
|
||||
public string Name => "transport";
|
||||
|
||||
public TransportHealthCheck(IGlobalRoutingState routingState)
|
||||
{
|
||||
_routingState = routingState;
|
||||
}
|
||||
|
||||
public Task<HealthCheckResult> CheckAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var connections = _routingState.GetAllConnections();
|
||||
var activeCount = connections.Count(c => c.State == ConnectionState.Connected);
|
||||
|
||||
if (activeCount == 0)
|
||||
{
|
||||
return Task.FromResult(new HealthCheckResult
|
||||
{
|
||||
Status = HealthStatus.Unhealthy,
|
||||
Description = "No active transport connections",
|
||||
Data = new Dictionary<string, object> { ["connections"] = 0 }
|
||||
});
|
||||
}
|
||||
|
||||
return Task.FromResult(new HealthCheckResult
|
||||
{
|
||||
Status = HealthStatus.Healthy,
|
||||
Description = $"{activeCount} active connections",
|
||||
Data = new Dictionary<string, object> { ["connections"] = activeCount }
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks Authority service connectivity.
|
||||
/// </summary>
|
||||
public sealed class AuthorityHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly IAuthorityClient _authority;
|
||||
private readonly TimeSpan _timeout;
|
||||
|
||||
public string Name => "authority";
|
||||
|
||||
public AuthorityHealthCheck(
|
||||
IAuthorityClient authority,
|
||||
IOptions<AuthorityConfig> config)
|
||||
{
|
||||
_authority = authority;
|
||||
_timeout = config.Value.HealthCheckTimeout;
|
||||
}
|
||||
|
||||
public async Task<HealthCheckResult> CheckAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
cts.CancelAfter(_timeout);
|
||||
|
||||
var isHealthy = await _authority.CheckHealthAsync(cts.Token);
|
||||
|
||||
return new HealthCheckResult
|
||||
{
|
||||
Status = isHealthy ? HealthStatus.Healthy : HealthStatus.Degraded,
|
||||
Description = isHealthy ? "Authority is responsive" : "Authority returned unhealthy"
|
||||
};
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return new HealthCheckResult
|
||||
{
|
||||
Status = HealthStatus.Degraded, // Degraded, not unhealthy - gateway can still work
|
||||
Description = $"Authority unreachable: {ex.Message}",
|
||||
Exception = ex
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks rate limiter backend connectivity.
|
||||
/// </summary>
|
||||
public sealed class RateLimiterHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly IRateLimiter _rateLimiter;
|
||||
|
||||
public string Name => "rate_limiter";
|
||||
|
||||
public RateLimiterHealthCheck(IRateLimiter rateLimiter)
|
||||
{
|
||||
_rateLimiter = rateLimiter;
|
||||
}
|
||||
|
||||
public async Task<HealthCheckResult> CheckAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Try a simple operation
|
||||
await _rateLimiter.CheckLimitAsync(
|
||||
new RateLimitContext { Key = "__health_check__", Tier = RateLimitTier.Free },
|
||||
cancellationToken);
|
||||
|
||||
return new HealthCheckResult
|
||||
{
|
||||
Status = HealthStatus.Healthy,
|
||||
Description = "Rate limiter is responsive"
|
||||
};
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return new HealthCheckResult
|
||||
{
|
||||
Status = HealthStatus.Degraded,
|
||||
Description = $"Rate limiter error: {ex.Message}",
|
||||
Exception = ex
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Health Endpoints
|
||||
|
||||
```csharp
|
||||
namespace StellaOps.Router.Gateway;
|
||||
|
||||
/// <summary>
|
||||
/// Health check endpoints.
|
||||
/// </summary>
|
||||
public static class HealthEndpoints
|
||||
{
|
||||
public static IEndpointRouteBuilder MapHealthEndpoints(
|
||||
this IEndpointRouteBuilder endpoints,
|
||||
string basePath = "/health")
|
||||
{
|
||||
endpoints.MapGet(basePath + "/live", LivenessCheck);
|
||||
endpoints.MapGet(basePath + "/ready", ReadinessCheck);
|
||||
endpoints.MapGet(basePath, DetailedHealthCheck);
|
||||
|
||||
return endpoints;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Liveness probe - is the process running?
|
||||
/// </summary>
|
||||
private static IResult LivenessCheck()
|
||||
{
|
||||
return Results.Ok(new { status = "alive" });
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Readiness probe - can the service accept traffic?
|
||||
/// </summary>
|
||||
private static async Task<IResult> ReadinessCheck(
|
||||
HealthCheckService healthService,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var report = await healthService.CheckHealthAsync(cancellationToken);
|
||||
|
||||
return report.Status == HealthStatus.Unhealthy
|
||||
? Results.Json(new
|
||||
{
|
||||
status = "not_ready",
|
||||
checks = report.Checks.ToDictionary(c => c.Key, c => c.Value.Status.ToString())
|
||||
}, statusCode: 503)
|
||||
: Results.Ok(new { status = "ready" });
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Detailed health report.
|
||||
/// </summary>
|
||||
private static async Task<IResult> DetailedHealthCheck(
|
||||
HealthCheckService healthService,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var report = await healthService.CheckHealthAsync(cancellationToken);
|
||||
|
||||
var response = new
|
||||
{
|
||||
status = report.Status.ToString().ToLower(),
|
||||
totalDuration = $"{report.TotalDuration:F2}ms",
|
||||
checks = report.Checks.ToDictionary(c => c.Key, c => new
|
||||
{
|
||||
status = c.Value.Status.ToString().ToLower(),
|
||||
description = c.Value.Description,
|
||||
duration = $"{c.Value.Duration.TotalMilliseconds:F2}ms",
|
||||
data = c.Value.Data
|
||||
})
|
||||
};
|
||||
|
||||
var statusCode = report.Status switch
|
||||
{
|
||||
HealthStatus.Healthy => 200,
|
||||
HealthStatus.Degraded => 200, // Still return 200 for degraded
|
||||
HealthStatus.Unhealthy => 503,
|
||||
_ => 200
|
||||
};
|
||||
|
||||
return Results.Json(response, statusCode: statusCode);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Prometheus Metrics Endpoint
|
||||
|
||||
```csharp
|
||||
namespace StellaOps.Router.Gateway;
|
||||
|
||||
/// <summary>
|
||||
/// Exposes metrics in Prometheus format.
|
||||
/// </summary>
|
||||
public sealed class PrometheusMetricsEndpoint
|
||||
{
|
||||
public static void Map(IEndpointRouteBuilder endpoints, string path = "/metrics")
|
||||
{
|
||||
endpoints.MapGet(path, async (HttpContext context) =>
|
||||
{
|
||||
var exporter = context.RequestServices.GetRequiredService<PrometheusExporter>();
|
||||
var metrics = await exporter.ExportAsync();
|
||||
|
||||
context.Response.ContentType = "text/plain; version=0.0.4";
|
||||
await context.Response.WriteAsync(metrics);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public sealed class PrometheusExporter
|
||||
{
|
||||
private readonly MeterProvider _meterProvider;
|
||||
|
||||
public PrometheusExporter(MeterProvider meterProvider)
|
||||
{
|
||||
_meterProvider = meterProvider;
|
||||
}
|
||||
|
||||
public Task<string> ExportAsync()
|
||||
{
|
||||
// Use OpenTelemetry's Prometheus exporter
|
||||
// This is a simplified example
|
||||
var sb = new StringBuilder();
|
||||
|
||||
// Export would iterate over all registered metrics
|
||||
// Real implementation uses OpenTelemetry.Exporter.Prometheus
|
||||
|
||||
return Task.FromResult(sb.ToString());
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Service Registration
|
||||
|
||||
```csharp
|
||||
namespace StellaOps.Router.Gateway;
|
||||
|
||||
public static class MetricsExtensions
|
||||
{
|
||||
public static IServiceCollection AddStellaMetrics(
|
||||
this IServiceCollection services,
|
||||
IConfiguration configuration)
|
||||
{
|
||||
services.Configure<MetricsConfig>(configuration.GetSection("Metrics"));
|
||||
|
||||
services.AddOpenTelemetry()
|
||||
.WithMetrics(builder =>
|
||||
{
|
||||
builder
|
||||
.AddMeter("StellaOps.Router")
|
||||
.AddAspNetCoreInstrumentation()
|
||||
.AddPrometheusExporter();
|
||||
});
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
public static IServiceCollection AddStellaHealthChecks(
|
||||
this IServiceCollection services)
|
||||
{
|
||||
services.AddSingleton<HealthCheckService>();
|
||||
services.AddSingleton<IHealthCheck, TransportHealthCheck>();
|
||||
services.AddSingleton<IHealthCheck, AuthorityHealthCheck>();
|
||||
services.AddSingleton<IHealthCheck, RateLimiterHealthCheck>();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## YAML Configuration
|
||||
|
||||
```yaml
|
||||
Metrics:
|
||||
Enabled: true
|
||||
Path: "/metrics"
|
||||
IncludePathLabel: false
|
||||
MaxPathCardinality: 100
|
||||
DurationBuckets:
|
||||
- 0.005
|
||||
- 0.01
|
||||
- 0.025
|
||||
- 0.05
|
||||
- 0.1
|
||||
- 0.25
|
||||
- 0.5
|
||||
- 1
|
||||
- 2.5
|
||||
- 5
|
||||
- 10
|
||||
|
||||
HealthChecks:
|
||||
Enabled: true
|
||||
Path: "/health"
|
||||
CacheDuration: "00:00:05"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Deliverables
|
||||
|
||||
1. `StellaOps.Router.Common/StellaMetrics.cs`
|
||||
2. `StellaOps.Router.Gateway/MetricsMiddleware.cs`
|
||||
3. `StellaOps.Router.Transport/TransportMetricsCollector.cs`
|
||||
4. `StellaOps.Router.Common/HealthCheckService.cs`
|
||||
5. `StellaOps.Router.Gateway/TransportHealthCheck.cs`
|
||||
6. `StellaOps.Router.Gateway/AuthorityHealthCheck.cs`
|
||||
7. `StellaOps.Router.Gateway/HealthEndpoints.cs`
|
||||
8. `StellaOps.Router.Gateway/PrometheusMetricsEndpoint.cs`
|
||||
9. Metrics collection tests
|
||||
10. Health check tests
|
||||
|
||||
---
|
||||
|
||||
## Next Step
|
||||
|
||||
Proceed to [Step 24: Circuit Breaker & Retry Policies](24-Step.md) to implement resilience patterns.
|
||||
Reference in New Issue
Block a user