using Npgsql;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
using StellaOps.Doctor.Plugins.Builders;
using System.Globalization;
namespace StellaOps.Doctor.Plugins.Database.Checks;
///
/// Checks the health of database connection pool.
///
public sealed class ConnectionPoolHealthCheck : DatabaseCheckBase
{
///
public override string CheckId => "check.db.pool.health";
///
public override string Name => "Connection Pool Health";
///
public override string Description => "Verifies the database connection pool is healthy";
///
public override IReadOnlyList Tags => ["database", "pool", "connectivity"];
///
protected override async Task ExecuteCheckAsync(
DoctorPluginContext context,
string connectionString,
CheckResultBuilder result,
CancellationToken ct)
{
await using var connection = await CreateConnectionAsync(connectionString, ct);
// Get connection statistics from pg_stat_activity
await using var cmd = new NpgsqlCommand(@"
SELECT
COUNT(*) AS total_connections,
COUNT(*) FILTER (WHERE state = 'active') AS active_connections,
COUNT(*) FILTER (WHERE state = 'idle') AS idle_connections,
COUNT(*) FILTER (WHERE state = 'idle in transaction') AS idle_in_transaction,
COUNT(*) FILTER (WHERE wait_event IS NOT NULL) AS waiting_connections,
MAX(EXTRACT(EPOCH FROM (now() - backend_start))) AS oldest_connection_seconds
FROM pg_stat_activity
WHERE datname = current_database()
AND pid <> pg_backend_pid()",
connection);
await using var reader = await cmd.ExecuteReaderAsync(ct);
if (await reader.ReadAsync(ct))
{
var totalConnections = reader.GetInt64(0);
var activeConnections = reader.GetInt64(1);
var idleConnections = reader.GetInt64(2);
var idleInTransaction = reader.GetInt64(3);
var waitingConnections = reader.GetInt64(4);
var oldestConnectionSeconds = reader.IsDBNull(5) ? 0 : reader.GetDouble(5);
await reader.CloseAsync();
// Get max connections setting
await using var maxCmd = new NpgsqlCommand("SHOW max_connections", connection);
var maxConnectionsStr = await maxCmd.ExecuteScalarAsync(ct) as string ?? "100";
var maxConnections = int.Parse(maxConnectionsStr, CultureInfo.InvariantCulture);
var usagePercent = (double)totalConnections / maxConnections * 100;
// Check for issues
if (idleInTransaction > 5)
{
return result
.Warn($"{idleInTransaction} connections idle in transaction")
.WithEvidence("Connection pool status", e => e
.Add("TotalConnections", totalConnections.ToString(CultureInfo.InvariantCulture))
.Add("ActiveConnections", activeConnections.ToString(CultureInfo.InvariantCulture))
.Add("IdleConnections", idleConnections.ToString(CultureInfo.InvariantCulture))
.Add("IdleInTransaction", idleInTransaction.ToString(CultureInfo.InvariantCulture))
.Add("WaitingConnections", waitingConnections.ToString(CultureInfo.InvariantCulture))
.Add("MaxConnections", maxConnections.ToString(CultureInfo.InvariantCulture))
.Add("UsagePercent", $"{usagePercent:F1}%"))
.WithCauses(
"Long-running transactions not committed",
"Application not properly closing transactions",
"Deadlock or lock contention")
.WithRemediation(r => r
.AddShellStep(1, "Find idle transactions", "psql -c \"SELECT pid, query FROM pg_stat_activity WHERE state = 'idle in transaction'\"")
.AddManualStep(2, "Review application code", "Ensure transactions are properly committed or rolled back")
.WithRunbookUrl("docs/doctor/articles/postgres/db-pool-health.md"))
.WithVerification("stella doctor --check check.db.pool.health")
.Build();
}
if (usagePercent > 80)
{
return result
.Warn($"Connection pool usage at {usagePercent:F1}%")
.WithEvidence("Connection pool status", e => e
.Add("TotalConnections", totalConnections.ToString(CultureInfo.InvariantCulture))
.Add("ActiveConnections", activeConnections.ToString(CultureInfo.InvariantCulture))
.Add("IdleConnections", idleConnections.ToString(CultureInfo.InvariantCulture))
.Add("MaxConnections", maxConnections.ToString(CultureInfo.InvariantCulture))
.Add("UsagePercent", $"{usagePercent:F1}%"))
.WithCauses(
"Connection leak in application",
"Too many concurrent requests",
"max_connections too low for workload")
.WithRemediation(r => r
.AddManualStep(1, "Review connection pool settings", "Check Npgsql connection string pool size")
.AddManualStep(2, "Consider increasing max_connections", "Edit postgresql.conf if appropriate")
.WithRunbookUrl("docs/doctor/articles/postgres/db-pool-health.md"))
.WithVerification("stella doctor --check check.db.pool.health")
.Build();
}
return result
.Pass($"Connection pool healthy: {totalConnections}/{maxConnections} connections ({usagePercent:F1}%)")
.WithEvidence("Connection pool status", e => e
.Add("TotalConnections", totalConnections.ToString(CultureInfo.InvariantCulture))
.Add("ActiveConnections", activeConnections.ToString(CultureInfo.InvariantCulture))
.Add("IdleConnections", idleConnections.ToString(CultureInfo.InvariantCulture))
.Add("IdleInTransaction", idleInTransaction.ToString(CultureInfo.InvariantCulture))
.Add("WaitingConnections", waitingConnections.ToString(CultureInfo.InvariantCulture))
.Add("MaxConnections", maxConnections.ToString(CultureInfo.InvariantCulture))
.Add("UsagePercent", $"{usagePercent:F1}%")
.Add("OldestConnectionAge", $"{oldestConnectionSeconds:F0}s"))
.Build();
}
return result
.Fail("Unable to retrieve connection pool statistics")
.Build();
}
}