sln build fix (again), tests fixes, audit work and doctors work

This commit is contained in:
master
2026-01-12 22:15:51 +02:00
parent 9873f80830
commit 9330c64349
812 changed files with 48051 additions and 3891 deletions

View File

@@ -0,0 +1,132 @@
using System.Globalization;
using Npgsql;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
using StellaOps.Doctor.Plugins.Builders;
namespace StellaOps.Doctor.Plugins.Database.Checks;
/// <summary>
/// Checks the health of database connection pool.
/// </summary>
public sealed class ConnectionPoolHealthCheck : DatabaseCheckBase
{
/// <inheritdoc />
public override string CheckId => "check.db.pool.health";
/// <inheritdoc />
public override string Name => "Connection Pool Health";
/// <inheritdoc />
public override string Description => "Verifies the database connection pool is healthy";
/// <inheritdoc />
public override IReadOnlyList<string> Tags => ["database", "pool", "connectivity"];
/// <inheritdoc />
protected override async Task<DoctorCheckResult> ExecuteCheckAsync(
DoctorPluginContext context,
string connectionString,
CheckResultBuilder result,
CancellationToken ct)
{
await using var connection = await CreateConnectionAsync(connectionString, ct);
// Get connection statistics from pg_stat_activity
await using var cmd = new NpgsqlCommand(@"
SELECT
COUNT(*) AS total_connections,
COUNT(*) FILTER (WHERE state = 'active') AS active_connections,
COUNT(*) FILTER (WHERE state = 'idle') AS idle_connections,
COUNT(*) FILTER (WHERE state = 'idle in transaction') AS idle_in_transaction,
COUNT(*) FILTER (WHERE wait_event IS NOT NULL) AS waiting_connections,
MAX(EXTRACT(EPOCH FROM (now() - backend_start))) AS oldest_connection_seconds
FROM pg_stat_activity
WHERE datname = current_database()
AND pid <> pg_backend_pid()",
connection);
await using var reader = await cmd.ExecuteReaderAsync(ct);
if (await reader.ReadAsync(ct))
{
var totalConnections = reader.GetInt64(0);
var activeConnections = reader.GetInt64(1);
var idleConnections = reader.GetInt64(2);
var idleInTransaction = reader.GetInt64(3);
var waitingConnections = reader.GetInt64(4);
var oldestConnectionSeconds = reader.IsDBNull(5) ? 0 : reader.GetDouble(5);
await reader.CloseAsync();
// Get max connections setting
await using var maxCmd = new NpgsqlCommand("SHOW max_connections", connection);
var maxConnectionsStr = await maxCmd.ExecuteScalarAsync(ct) as string ?? "100";
var maxConnections = int.Parse(maxConnectionsStr, CultureInfo.InvariantCulture);
var usagePercent = (double)totalConnections / maxConnections * 100;
// Check for issues
if (idleInTransaction > 5)
{
return result
.Warn($"{idleInTransaction} connections idle in transaction")
.WithEvidence("Connection pool status", e => e
.Add("TotalConnections", totalConnections.ToString(CultureInfo.InvariantCulture))
.Add("ActiveConnections", activeConnections.ToString(CultureInfo.InvariantCulture))
.Add("IdleConnections", idleConnections.ToString(CultureInfo.InvariantCulture))
.Add("IdleInTransaction", idleInTransaction.ToString(CultureInfo.InvariantCulture))
.Add("WaitingConnections", waitingConnections.ToString(CultureInfo.InvariantCulture))
.Add("MaxConnections", maxConnections.ToString(CultureInfo.InvariantCulture))
.Add("UsagePercent", $"{usagePercent:F1}%"))
.WithCauses(
"Long-running transactions not committed",
"Application not properly closing transactions",
"Deadlock or lock contention")
.WithRemediation(r => r
.AddShellStep(1, "Find idle transactions", "psql -c \"SELECT pid, query FROM pg_stat_activity WHERE state = 'idle in transaction'\"")
.AddManualStep(2, "Review application code", "Ensure transactions are properly committed or rolled back"))
.WithVerification("stella doctor --check check.db.pool.health")
.Build();
}
if (usagePercent > 80)
{
return result
.Warn($"Connection pool usage at {usagePercent:F1}%")
.WithEvidence("Connection pool status", e => e
.Add("TotalConnections", totalConnections.ToString(CultureInfo.InvariantCulture))
.Add("ActiveConnections", activeConnections.ToString(CultureInfo.InvariantCulture))
.Add("IdleConnections", idleConnections.ToString(CultureInfo.InvariantCulture))
.Add("MaxConnections", maxConnections.ToString(CultureInfo.InvariantCulture))
.Add("UsagePercent", $"{usagePercent:F1}%"))
.WithCauses(
"Connection leak in application",
"Too many concurrent requests",
"max_connections too low for workload")
.WithRemediation(r => r
.AddManualStep(1, "Review connection pool settings", "Check Npgsql connection string pool size")
.AddManualStep(2, "Consider increasing max_connections", "Edit postgresql.conf if appropriate"))
.WithVerification("stella doctor --check check.db.pool.health")
.Build();
}
return result
.Pass($"Connection pool healthy: {totalConnections}/{maxConnections} connections ({usagePercent:F1}%)")
.WithEvidence("Connection pool status", e => e
.Add("TotalConnections", totalConnections.ToString(CultureInfo.InvariantCulture))
.Add("ActiveConnections", activeConnections.ToString(CultureInfo.InvariantCulture))
.Add("IdleConnections", idleConnections.ToString(CultureInfo.InvariantCulture))
.Add("IdleInTransaction", idleInTransaction.ToString(CultureInfo.InvariantCulture))
.Add("WaitingConnections", waitingConnections.ToString(CultureInfo.InvariantCulture))
.Add("MaxConnections", maxConnections.ToString(CultureInfo.InvariantCulture))
.Add("UsagePercent", $"{usagePercent:F1}%")
.Add("OldestConnectionAge", $"{oldestConnectionSeconds:F0}s"))
.Build();
}
return result
.Fail("Unable to retrieve connection pool statistics")
.Build();
}
}