using Npgsql; using StellaOps.Doctor.Models; using StellaOps.Doctor.Plugins; using StellaOps.Doctor.Plugins.Builders; using System.Globalization; namespace StellaOps.Doctor.Plugins.Database.Checks; /// /// Checks the health of database connection pool. /// public sealed class ConnectionPoolHealthCheck : DatabaseCheckBase { /// public override string CheckId => "check.db.pool.health"; /// public override string Name => "Connection Pool Health"; /// public override string Description => "Verifies the database connection pool is healthy"; /// public override IReadOnlyList Tags => ["database", "pool", "connectivity"]; /// protected override async Task ExecuteCheckAsync( DoctorPluginContext context, string connectionString, CheckResultBuilder result, CancellationToken ct) { await using var connection = await CreateConnectionAsync(connectionString, ct); // Get connection statistics from pg_stat_activity await using var cmd = new NpgsqlCommand(@" SELECT COUNT(*) AS total_connections, COUNT(*) FILTER (WHERE state = 'active') AS active_connections, COUNT(*) FILTER (WHERE state = 'idle') AS idle_connections, COUNT(*) FILTER (WHERE state = 'idle in transaction') AS idle_in_transaction, COUNT(*) FILTER (WHERE wait_event IS NOT NULL) AS waiting_connections, MAX(EXTRACT(EPOCH FROM (now() - backend_start))) AS oldest_connection_seconds FROM pg_stat_activity WHERE datname = current_database() AND pid <> pg_backend_pid()", connection); await using var reader = await cmd.ExecuteReaderAsync(ct); if (await reader.ReadAsync(ct)) { var totalConnections = reader.GetInt64(0); var activeConnections = reader.GetInt64(1); var idleConnections = reader.GetInt64(2); var idleInTransaction = reader.GetInt64(3); var waitingConnections = reader.GetInt64(4); var oldestConnectionSeconds = reader.IsDBNull(5) ? 0 : reader.GetDouble(5); await reader.CloseAsync(); // Get max connections setting await using var maxCmd = new NpgsqlCommand("SHOW max_connections", connection); var maxConnectionsStr = await maxCmd.ExecuteScalarAsync(ct) as string ?? "100"; var maxConnections = int.Parse(maxConnectionsStr, CultureInfo.InvariantCulture); var usagePercent = (double)totalConnections / maxConnections * 100; // Check for issues if (idleInTransaction > 5) { return result .Warn($"{idleInTransaction} connections idle in transaction") .WithEvidence("Connection pool status", e => e .Add("TotalConnections", totalConnections.ToString(CultureInfo.InvariantCulture)) .Add("ActiveConnections", activeConnections.ToString(CultureInfo.InvariantCulture)) .Add("IdleConnections", idleConnections.ToString(CultureInfo.InvariantCulture)) .Add("IdleInTransaction", idleInTransaction.ToString(CultureInfo.InvariantCulture)) .Add("WaitingConnections", waitingConnections.ToString(CultureInfo.InvariantCulture)) .Add("MaxConnections", maxConnections.ToString(CultureInfo.InvariantCulture)) .Add("UsagePercent", $"{usagePercent:F1}%")) .WithCauses( "Long-running transactions not committed", "Application not properly closing transactions", "Deadlock or lock contention") .WithRemediation(r => r .AddShellStep(1, "Find idle transactions", "psql -c \"SELECT pid, query FROM pg_stat_activity WHERE state = 'idle in transaction'\"") .AddManualStep(2, "Review application code", "Ensure transactions are properly committed or rolled back") .WithRunbookUrl("docs/doctor/articles/postgres/db-pool-health.md")) .WithVerification("stella doctor --check check.db.pool.health") .Build(); } if (usagePercent > 80) { return result .Warn($"Connection pool usage at {usagePercent:F1}%") .WithEvidence("Connection pool status", e => e .Add("TotalConnections", totalConnections.ToString(CultureInfo.InvariantCulture)) .Add("ActiveConnections", activeConnections.ToString(CultureInfo.InvariantCulture)) .Add("IdleConnections", idleConnections.ToString(CultureInfo.InvariantCulture)) .Add("MaxConnections", maxConnections.ToString(CultureInfo.InvariantCulture)) .Add("UsagePercent", $"{usagePercent:F1}%")) .WithCauses( "Connection leak in application", "Too many concurrent requests", "max_connections too low for workload") .WithRemediation(r => r .AddManualStep(1, "Review connection pool settings", "Check Npgsql connection string pool size") .AddManualStep(2, "Consider increasing max_connections", "Edit postgresql.conf if appropriate") .WithRunbookUrl("docs/doctor/articles/postgres/db-pool-health.md")) .WithVerification("stella doctor --check check.db.pool.health") .Build(); } return result .Pass($"Connection pool healthy: {totalConnections}/{maxConnections} connections ({usagePercent:F1}%)") .WithEvidence("Connection pool status", e => e .Add("TotalConnections", totalConnections.ToString(CultureInfo.InvariantCulture)) .Add("ActiveConnections", activeConnections.ToString(CultureInfo.InvariantCulture)) .Add("IdleConnections", idleConnections.ToString(CultureInfo.InvariantCulture)) .Add("IdleInTransaction", idleInTransaction.ToString(CultureInfo.InvariantCulture)) .Add("WaitingConnections", waitingConnections.ToString(CultureInfo.InvariantCulture)) .Add("MaxConnections", maxConnections.ToString(CultureInfo.InvariantCulture)) .Add("UsagePercent", $"{usagePercent:F1}%") .Add("OldestConnectionAge", $"{oldestConnectionSeconds:F0}s")) .Build(); } return result .Fail("Unable to retrieve connection pool statistics") .Build(); } }