doctor enhancements, setup, enhancements, ui functionality and design consolidation and , test projects fixes , product advisory attestation/rekor and delta verfications enhancements
This commit is contained in:
@@ -1,12 +1,14 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// RekorClockSkewCheck.cs
|
||||
// Sprint: SPRINT_20260117_001_ATTESTOR_periodic_rekor_verification
|
||||
// Task: PRV-006 - Doctor check for clock skew
|
||||
// Description: Checks if system clock is synchronized for attestation validity
|
||||
// Sprint: SPRINT_20260118_015_Doctor_check_quality_improvements
|
||||
// Task: DQUAL-004 - Add discriminating evidence to RekorClockSkewCheck
|
||||
// Description: Checks if system clock is synchronized with NTP status and VM detection
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Runtime.InteropServices;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
@@ -15,6 +17,7 @@ namespace StellaOps.Doctor.Plugin.Attestor.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Checks if system clock is synchronized with Rekor for attestation validity.
|
||||
/// Includes NTP daemon status, VM detection, and discriminating evidence for root cause analysis.
|
||||
/// </summary>
|
||||
public sealed class RekorClockSkewCheck : IDoctorCheck
|
||||
{
|
||||
@@ -49,6 +52,10 @@ public sealed class RekorClockSkewCheck : IDoctorCheck
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, "stellaops.doctor.attestor", "Attestor");
|
||||
|
||||
// Collect NTP and VM status for discriminating evidence
|
||||
var ntpStatus = await GetNtpStatusAsync(ct);
|
||||
var vmStatus = DetectVirtualMachine();
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
@@ -65,9 +72,29 @@ public sealed class RekorClockSkewCheck : IDoctorCheck
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Skip("Could not reach time reference server")
|
||||
.WithEvidence("Clock check", eb => eb
|
||||
.Add("Note", "Rekor unavailable; cannot verify clock skew"))
|
||||
.Warn("Could not reach time reference server")
|
||||
.WithEvidence("Clock check", eb =>
|
||||
{
|
||||
eb.Add("local_time_utc", context.TimeProvider.GetUtcNow().ToString("o"));
|
||||
eb.Add("server_time_utc", "unavailable");
|
||||
eb.Add("skew_seconds", "unknown");
|
||||
eb.Add("ntp_daemon_running", ntpStatus.DaemonRunning.ToString().ToLowerInvariant());
|
||||
eb.Add("ntp_daemon_type", ntpStatus.DaemonType);
|
||||
eb.Add("ntp_servers_configured", string.Join(", ", ntpStatus.ServersConfigured));
|
||||
eb.Add("last_sync_time_utc", ntpStatus.LastSyncTime?.ToString("o") ?? "null");
|
||||
eb.Add("sync_age_seconds", ntpStatus.SyncAgeSeconds?.ToString(CultureInfo.InvariantCulture) ?? "null");
|
||||
eb.Add("is_virtual_machine", vmStatus.IsVirtualMachine.ToString().ToLowerInvariant());
|
||||
eb.Add("vm_clock_sync_enabled", vmStatus.ClockSyncEnabled.ToString().ToLowerInvariant());
|
||||
eb.Add("connection_error_type", "server_unreachable");
|
||||
})
|
||||
.WithCauses(
|
||||
"Rekor server unreachable",
|
||||
"Network connectivity issue")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Check network connectivity",
|
||||
$"curl -s {rekorUrl}/api/v1/log",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
@@ -81,65 +108,511 @@ public sealed class RekorClockSkewCheck : IDoctorCheck
|
||||
{
|
||||
return builder
|
||||
.Skip("Server did not return Date header")
|
||||
.WithEvidence("Clock check", eb => eb
|
||||
.Add("Note", "Cannot determine server time"))
|
||||
.WithEvidence("Clock check", eb =>
|
||||
{
|
||||
eb.Add("local_time_utc", context.TimeProvider.GetUtcNow().ToString("o"));
|
||||
eb.Add("server_time_utc", "not_provided");
|
||||
eb.Add("note", "Cannot determine server time");
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
|
||||
var localTime = context.TimeProvider.GetUtcNow();
|
||||
var skew = Math.Abs((localTime - serverTime).TotalSeconds);
|
||||
var skew = (localTime - serverTime).TotalSeconds;
|
||||
var absSkew = Math.Abs(skew);
|
||||
|
||||
if (skew <= MaxSkewSeconds)
|
||||
if (absSkew <= MaxSkewSeconds)
|
||||
{
|
||||
return builder
|
||||
.Pass($"System clock synchronized (skew: {skew:F1}s)")
|
||||
.WithEvidence("Clock status", eb => eb
|
||||
.Add("LocalTime", localTime.ToString("o"))
|
||||
.Add("ServerTime", serverTime.ToString("o"))
|
||||
.Add("SkewSeconds", skew.ToString("F1", CultureInfo.InvariantCulture))
|
||||
.Add("MaxAllowedSkew", $"{MaxSkewSeconds}s"))
|
||||
.Pass($"System clock synchronized (skew: {absSkew:F1}s)")
|
||||
.WithEvidence("Clock status", eb =>
|
||||
{
|
||||
eb.Add("local_time_utc", localTime.ToString("o"));
|
||||
eb.Add("server_time_utc", serverTime.ToString("o"));
|
||||
eb.Add("skew_seconds", skew.ToString("F2", CultureInfo.InvariantCulture));
|
||||
eb.Add("max_allowed_skew", MaxSkewSeconds.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("ntp_daemon_running", ntpStatus.DaemonRunning.ToString().ToLowerInvariant());
|
||||
eb.Add("ntp_daemon_type", ntpStatus.DaemonType);
|
||||
eb.Add("ntp_servers_configured", string.Join(", ", ntpStatus.ServersConfigured));
|
||||
eb.Add("last_sync_time_utc", ntpStatus.LastSyncTime?.ToString("o") ?? "null");
|
||||
eb.Add("sync_age_seconds", ntpStatus.SyncAgeSeconds?.ToString(CultureInfo.InvariantCulture) ?? "null");
|
||||
eb.Add("is_virtual_machine", vmStatus.IsVirtualMachine.ToString().ToLowerInvariant());
|
||||
eb.Add("vm_clock_sync_enabled", vmStatus.ClockSyncEnabled.ToString().ToLowerInvariant());
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
|
||||
// Build discriminating remediation based on evidence
|
||||
return builder
|
||||
.Fail($"System clock skew ({skew:F1}s) exceeds {MaxSkewSeconds}s threshold")
|
||||
.WithEvidence("Clock status", eb => eb
|
||||
.Add("LocalTime", localTime.ToString("o"))
|
||||
.Add("ServerTime", serverTime.ToString("o"))
|
||||
.Add("SkewSeconds", skew.ToString("F1", CultureInfo.InvariantCulture))
|
||||
.Add("MaxAllowedSkew", $"{MaxSkewSeconds}s"))
|
||||
.Fail($"System clock skew ({absSkew:F1}s) exceeds {MaxSkewSeconds}s threshold")
|
||||
.WithEvidence("Clock status", eb =>
|
||||
{
|
||||
eb.Add("local_time_utc", localTime.ToString("o"));
|
||||
eb.Add("server_time_utc", serverTime.ToString("o"));
|
||||
eb.Add("skew_seconds", skew.ToString("F2", CultureInfo.InvariantCulture));
|
||||
eb.Add("max_allowed_skew", MaxSkewSeconds.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("ntp_daemon_running", ntpStatus.DaemonRunning.ToString().ToLowerInvariant());
|
||||
eb.Add("ntp_daemon_type", ntpStatus.DaemonType);
|
||||
eb.Add("ntp_servers_configured", string.Join(", ", ntpStatus.ServersConfigured));
|
||||
eb.Add("last_sync_time_utc", ntpStatus.LastSyncTime?.ToString("o") ?? "null");
|
||||
eb.Add("sync_age_seconds", ntpStatus.SyncAgeSeconds?.ToString(CultureInfo.InvariantCulture) ?? "null");
|
||||
eb.Add("is_virtual_machine", vmStatus.IsVirtualMachine.ToString().ToLowerInvariant());
|
||||
eb.Add("vm_type", vmStatus.VmType);
|
||||
eb.Add("vm_clock_sync_enabled", vmStatus.ClockSyncEnabled.ToString().ToLowerInvariant());
|
||||
})
|
||||
.WithCauses(
|
||||
"NTP service not running",
|
||||
"NTP server unreachable",
|
||||
"System clock manually set incorrectly",
|
||||
"Virtual machine clock drift")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Check NTP status",
|
||||
"timedatectl status",
|
||||
CommandType.Shell)
|
||||
.AddStep(2, "Enable NTP synchronization",
|
||||
"sudo timedatectl set-ntp true",
|
||||
CommandType.Shell)
|
||||
.AddStep(3, "Force immediate sync (if using chronyd)",
|
||||
"sudo chronyc -a makestep",
|
||||
CommandType.Shell)
|
||||
.AddStep(4, "Force immediate sync (if using ntpd)",
|
||||
"sudo ntpdate -u pool.ntp.org",
|
||||
CommandType.Shell))
|
||||
.WithRemediation(rb => BuildPlatformSpecificRemediation(rb, ntpStatus, vmStatus))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (Exception ex)
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Could not verify clock skew: {ex.Message}")
|
||||
.WithEvidence("Clock check", eb => eb
|
||||
.Add("Error", ex.Message)
|
||||
.Add("Note", "Using local time only"))
|
||||
.WithEvidence("Clock check", eb =>
|
||||
{
|
||||
eb.Add("local_time_utc", context.TimeProvider.GetUtcNow().ToString("o"));
|
||||
eb.Add("error_message", ex.Message);
|
||||
eb.Add("connection_error_type", GetConnectionErrorType(ex));
|
||||
eb.Add("ntp_daemon_running", ntpStatus.DaemonRunning.ToString().ToLowerInvariant());
|
||||
eb.Add("ntp_daemon_type", ntpStatus.DaemonType);
|
||||
eb.Add("is_virtual_machine", vmStatus.IsVirtualMachine.ToString().ToLowerInvariant());
|
||||
})
|
||||
.WithCauses(
|
||||
"Network connectivity issue",
|
||||
"Reference server unavailable")
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Clock skew check timed out")
|
||||
.WithEvidence("Clock check", eb =>
|
||||
{
|
||||
eb.Add("local_time_utc", context.TimeProvider.GetUtcNow().ToString("o"));
|
||||
eb.Add("error_message", "Request timed out");
|
||||
eb.Add("connection_error_type", "timeout");
|
||||
eb.Add("timeout_seconds", "5");
|
||||
})
|
||||
.WithCauses("Network latency too high", "Reference server overloaded")
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static void BuildPlatformSpecificRemediation(RemediationBuilder rb, NtpStatus ntpStatus, VmStatus vmStatus)
|
||||
{
|
||||
if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
|
||||
{
|
||||
if (!ntpStatus.DaemonRunning)
|
||||
{
|
||||
rb.AddStep(1, "Start NTP service",
|
||||
ntpStatus.DaemonType switch
|
||||
{
|
||||
"chronyd" => "sudo systemctl start chronyd",
|
||||
"ntpd" => "sudo systemctl start ntpd",
|
||||
_ => "sudo systemctl start systemd-timesyncd"
|
||||
},
|
||||
CommandType.Shell);
|
||||
}
|
||||
else
|
||||
{
|
||||
rb.AddStep(1, "Check NTP status",
|
||||
"timedatectl status",
|
||||
CommandType.Shell);
|
||||
}
|
||||
|
||||
rb.AddStep(2, "Enable NTP synchronization",
|
||||
"sudo timedatectl set-ntp true",
|
||||
CommandType.Shell);
|
||||
|
||||
if (ntpStatus.DaemonType == "chronyd")
|
||||
{
|
||||
rb.AddStep(3, "Force immediate sync",
|
||||
"sudo chronyc -a makestep",
|
||||
CommandType.Shell);
|
||||
}
|
||||
else
|
||||
{
|
||||
rb.AddStep(3, "Force immediate sync",
|
||||
"sudo ntpdate -u pool.ntp.org",
|
||||
CommandType.Shell);
|
||||
}
|
||||
}
|
||||
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
|
||||
{
|
||||
if (!ntpStatus.DaemonRunning)
|
||||
{
|
||||
rb.AddStep(1, "Start Windows Time service",
|
||||
"net start w32time",
|
||||
CommandType.Shell);
|
||||
}
|
||||
else
|
||||
{
|
||||
rb.AddStep(1, "Check Windows Time status",
|
||||
"w32tm /query /status",
|
||||
CommandType.Shell);
|
||||
}
|
||||
|
||||
rb.AddStep(2, "Force time resync",
|
||||
"w32tm /resync /nowait",
|
||||
CommandType.Shell);
|
||||
|
||||
if (vmStatus.IsVirtualMachine && !vmStatus.ClockSyncEnabled)
|
||||
{
|
||||
rb.AddStep(3, "Enable VM time sync",
|
||||
"Enable time synchronization in Hyper-V Integration Services or VMware Tools",
|
||||
CommandType.Manual);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
rb.AddStep(1, "Sync system clock",
|
||||
"Consult your OS documentation for NTP configuration",
|
||||
CommandType.Manual);
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<NtpStatus> GetNtpStatusAsync(CancellationToken ct)
|
||||
{
|
||||
var status = new NtpStatus();
|
||||
|
||||
try
|
||||
{
|
||||
if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
|
||||
{
|
||||
// Check for various NTP daemons
|
||||
status.DaemonType = await DetectLinuxNtpDaemonAsync(ct);
|
||||
status.DaemonRunning = await IsLinuxServiceRunningAsync(status.DaemonType, ct);
|
||||
|
||||
// Try to get NTP servers from configuration
|
||||
status.ServersConfigured = await GetLinuxNtpServersAsync(status.DaemonType, ct);
|
||||
|
||||
// Get last sync time if available
|
||||
var syncInfo = await GetLinuxSyncInfoAsync(status.DaemonType, ct);
|
||||
status.LastSyncTime = syncInfo.LastSync;
|
||||
status.SyncAgeSeconds = syncInfo.SyncAge;
|
||||
}
|
||||
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
|
||||
{
|
||||
status.DaemonType = "w32time";
|
||||
status.DaemonRunning = await IsWindowsTimeServiceRunningAsync(ct);
|
||||
status.ServersConfigured = await GetWindowsNtpServersAsync(ct);
|
||||
}
|
||||
else
|
||||
{
|
||||
status.DaemonType = "unknown";
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best effort - don't fail the check if we can't determine NTP status
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
private static async Task<string> DetectLinuxNtpDaemonAsync(CancellationToken ct)
|
||||
{
|
||||
// Check for common NTP daemons in priority order
|
||||
var daemons = new[] { "chronyd", "ntpd", "systemd-timesyncd" };
|
||||
|
||||
foreach (var daemon in daemons)
|
||||
{
|
||||
if (await IsLinuxServiceRunningAsync(daemon, ct))
|
||||
{
|
||||
return daemon;
|
||||
}
|
||||
}
|
||||
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
private static async Task<bool> IsLinuxServiceRunningAsync(string serviceName, CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var process = new Process
|
||||
{
|
||||
StartInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = "systemctl",
|
||||
Arguments = $"is-active {serviceName}",
|
||||
RedirectStandardOutput = true,
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true
|
||||
}
|
||||
};
|
||||
|
||||
process.Start();
|
||||
var output = await process.StandardOutput.ReadToEndAsync(ct);
|
||||
await process.WaitForExitAsync(ct);
|
||||
|
||||
return output.Trim().Equals("active", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
catch
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<List<string>> GetLinuxNtpServersAsync(string daemonType, CancellationToken ct)
|
||||
{
|
||||
var servers = new List<string>();
|
||||
|
||||
try
|
||||
{
|
||||
if (daemonType == "chronyd")
|
||||
{
|
||||
using var process = new Process
|
||||
{
|
||||
StartInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = "chronyc",
|
||||
Arguments = "sources -n",
|
||||
RedirectStandardOutput = true,
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true
|
||||
}
|
||||
};
|
||||
|
||||
process.Start();
|
||||
var output = await process.StandardOutput.ReadToEndAsync(ct);
|
||||
await process.WaitForExitAsync(ct);
|
||||
|
||||
foreach (var line in output.Split('\n').Skip(3))
|
||||
{
|
||||
var parts = line.Split(' ', StringSplitOptions.RemoveEmptyEntries);
|
||||
if (parts.Length > 1)
|
||||
{
|
||||
servers.Add(parts[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best effort
|
||||
}
|
||||
|
||||
return servers.Count > 0 ? servers : ["pool.ntp.org"];
|
||||
}
|
||||
|
||||
private static async Task<(DateTimeOffset? LastSync, int? SyncAge)> GetLinuxSyncInfoAsync(string daemonType, CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var process = new Process
|
||||
{
|
||||
StartInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = "timedatectl",
|
||||
Arguments = "show --property=NTPSynchronized",
|
||||
RedirectStandardOutput = true,
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true
|
||||
}
|
||||
};
|
||||
|
||||
process.Start();
|
||||
var output = await process.StandardOutput.ReadToEndAsync(ct);
|
||||
await process.WaitForExitAsync(ct);
|
||||
|
||||
if (output.Contains("yes", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return (DateTimeOffset.UtcNow, 0);
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best effort
|
||||
}
|
||||
|
||||
return (null, null);
|
||||
}
|
||||
|
||||
private static async Task<bool> IsWindowsTimeServiceRunningAsync(CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var process = new Process
|
||||
{
|
||||
StartInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = "sc",
|
||||
Arguments = "query w32time",
|
||||
RedirectStandardOutput = true,
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true
|
||||
}
|
||||
};
|
||||
|
||||
process.Start();
|
||||
var output = await process.StandardOutput.ReadToEndAsync(ct);
|
||||
await process.WaitForExitAsync(ct);
|
||||
|
||||
return output.Contains("RUNNING", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
catch
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<List<string>> GetWindowsNtpServersAsync(CancellationToken ct)
|
||||
{
|
||||
var servers = new List<string>();
|
||||
|
||||
try
|
||||
{
|
||||
using var process = new Process
|
||||
{
|
||||
StartInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = "w32tm",
|
||||
Arguments = "/query /peers",
|
||||
RedirectStandardOutput = true,
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true
|
||||
}
|
||||
};
|
||||
|
||||
process.Start();
|
||||
var output = await process.StandardOutput.ReadToEndAsync(ct);
|
||||
await process.WaitForExitAsync(ct);
|
||||
|
||||
foreach (var line in output.Split('\n'))
|
||||
{
|
||||
if (line.Contains("Peer:", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var parts = line.Split(':');
|
||||
if (parts.Length > 1)
|
||||
{
|
||||
servers.Add(parts[1].Trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best effort
|
||||
}
|
||||
|
||||
return servers.Count > 0 ? servers : ["time.windows.com"];
|
||||
}
|
||||
|
||||
private static VmStatus DetectVirtualMachine()
|
||||
{
|
||||
var status = new VmStatus();
|
||||
|
||||
try
|
||||
{
|
||||
if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
|
||||
{
|
||||
// Check for VM indicators
|
||||
var dmidecodeVendor = "";
|
||||
try
|
||||
{
|
||||
if (File.Exists("/sys/class/dmi/id/sys_vendor"))
|
||||
{
|
||||
dmidecodeVendor = File.ReadAllText("/sys/class/dmi/id/sys_vendor").Trim().ToLowerInvariant();
|
||||
}
|
||||
}
|
||||
catch { }
|
||||
|
||||
if (dmidecodeVendor.Contains("vmware"))
|
||||
{
|
||||
status.IsVirtualMachine = true;
|
||||
status.VmType = "vmware";
|
||||
status.ClockSyncEnabled = File.Exists("/usr/bin/vmware-toolbox-cmd");
|
||||
}
|
||||
else if (dmidecodeVendor.Contains("microsoft"))
|
||||
{
|
||||
status.IsVirtualMachine = true;
|
||||
status.VmType = "hyper-v";
|
||||
status.ClockSyncEnabled = Directory.Exists("/sys/bus/vmbus");
|
||||
}
|
||||
else if (dmidecodeVendor.Contains("qemu") || dmidecodeVendor.Contains("kvm"))
|
||||
{
|
||||
status.IsVirtualMachine = true;
|
||||
status.VmType = "kvm";
|
||||
}
|
||||
else if (dmidecodeVendor.Contains("xen"))
|
||||
{
|
||||
status.IsVirtualMachine = true;
|
||||
status.VmType = "xen";
|
||||
}
|
||||
else if (File.Exists("/proc/1/cgroup"))
|
||||
{
|
||||
var cgroup = File.ReadAllText("/proc/1/cgroup");
|
||||
if (cgroup.Contains("docker") || cgroup.Contains("containerd"))
|
||||
{
|
||||
status.IsVirtualMachine = true;
|
||||
status.VmType = "container";
|
||||
status.ClockSyncEnabled = true; // Containers use host clock
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
|
||||
{
|
||||
// Check Windows VM indicators via WMI or registry
|
||||
var manufacturer = Environment.GetEnvironmentVariable("COMPUTERNAME_MANUFACTURER") ?? "";
|
||||
if (manufacturer.Contains("VMware", StringComparison.OrdinalIgnoreCase) ||
|
||||
manufacturer.Contains("Microsoft", StringComparison.OrdinalIgnoreCase) ||
|
||||
manufacturer.Contains("Xen", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
status.IsVirtualMachine = true;
|
||||
status.VmType = manufacturer.ToLowerInvariant();
|
||||
}
|
||||
|
||||
// Check for Hyper-V
|
||||
if (Environment.GetEnvironmentVariable("PROCESSOR_IDENTIFIER")?.Contains("Virtual", StringComparison.OrdinalIgnoreCase) == true)
|
||||
{
|
||||
status.IsVirtualMachine = true;
|
||||
status.VmType = "hyper-v";
|
||||
}
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best effort
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
private static string GetConnectionErrorType(HttpRequestException ex)
|
||||
{
|
||||
var message = ex.Message.ToLowerInvariant();
|
||||
if (message.Contains("ssl") || message.Contains("tls") || message.Contains("certificate"))
|
||||
return "ssl_error";
|
||||
if (message.Contains("name") || message.Contains("dns") || message.Contains("resolve"))
|
||||
return "dns_failure";
|
||||
if (message.Contains("refused") || message.Contains("actively refused"))
|
||||
return "refused";
|
||||
if (message.Contains("timeout"))
|
||||
return "timeout";
|
||||
return "connection_failed";
|
||||
}
|
||||
|
||||
private sealed class NtpStatus
|
||||
{
|
||||
public bool DaemonRunning { get; set; }
|
||||
public string DaemonType { get; set; } = "unknown";
|
||||
public List<string> ServersConfigured { get; set; } = [];
|
||||
public DateTimeOffset? LastSyncTime { get; set; }
|
||||
public int? SyncAgeSeconds { get; set; }
|
||||
}
|
||||
|
||||
private sealed class VmStatus
|
||||
{
|
||||
public bool IsVirtualMachine { get; set; }
|
||||
public string VmType { get; set; } = "none";
|
||||
public bool ClockSyncEnabled { get; set; }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -100,9 +100,9 @@ public sealed class TransparencyLogConsistencyCheck : IDoctorCheck
|
||||
.Add("CheckpointPath", checkpointPath)
|
||||
.Add("Error", "Failed to parse checkpoint JSON"))
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Remove corrupted checkpoint",
|
||||
.AddDestructiveStep(1, "Remove corrupted checkpoint",
|
||||
$"rm {checkpointPath}",
|
||||
CommandType.Shell)
|
||||
$"cat {checkpointPath}")
|
||||
.AddStep(2, "Trigger re-sync",
|
||||
"stella attestor transparency sync",
|
||||
CommandType.Shell))
|
||||
@@ -181,9 +181,9 @@ public sealed class TransparencyLogConsistencyCheck : IDoctorCheck
|
||||
.AddStep(3, "Check stored checkpoint",
|
||||
$"cat {checkpointPath} | jq .",
|
||||
CommandType.Shell)
|
||||
.AddStep(4, "If using wrong log, reset checkpoint",
|
||||
.AddDestructiveStep(4, "If using wrong log, reset checkpoint (DESTRUCTIVE)",
|
||||
$"rm {checkpointPath} && stella attestor transparency sync",
|
||||
CommandType.Shell))
|
||||
$"ls -la {checkpointPath}"))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
@@ -1,11 +1,15 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// OidcProviderConnectivityCheck.cs
|
||||
// Sprint: SPRINT_20260117_016_CLI_auth_access
|
||||
// Task: AAC-006 - Doctor checks for auth configuration
|
||||
// Description: Health check for OIDC provider connectivity
|
||||
// Sprint: SPRINT_20260118_015_Doctor_check_quality_improvements
|
||||
// Task: DQUAL-002 - Replace OidcProviderConnectivityCheck mock implementation
|
||||
// Description: Health check for OIDC provider connectivity with real HTTP calls
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
@@ -16,6 +20,11 @@ namespace StellaOps.Doctor.Plugin.Auth.Checks;
|
||||
/// </summary>
|
||||
public sealed class OidcProviderConnectivityCheck : IDoctorCheck
|
||||
{
|
||||
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
|
||||
{
|
||||
PropertyNameCaseInsensitive = true
|
||||
};
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.auth.oidc";
|
||||
|
||||
@@ -37,7 +46,11 @@ public sealed class OidcProviderConnectivityCheck : IDoctorCheck
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
return true;
|
||||
// Check if external OIDC provider is configured
|
||||
var issuerUrl = context.Configuration["Authentication:Oidc:Issuer"]
|
||||
?? context.Configuration["Auth:Oidc:Authority"]
|
||||
?? context.Configuration["Oidc:Issuer"];
|
||||
return !string.IsNullOrEmpty(issuerUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
@@ -45,29 +58,35 @@ public sealed class OidcProviderConnectivityCheck : IDoctorCheck
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, "stellaops.doctor.auth", "Auth & Access Control");
|
||||
|
||||
var oidcStatus = await CheckOidcProviderAsync(context, ct);
|
||||
var issuerUrl = context.Configuration["Authentication:Oidc:Issuer"]
|
||||
?? context.Configuration["Auth:Oidc:Authority"]
|
||||
?? context.Configuration["Oidc:Issuer"];
|
||||
|
||||
if (!oidcStatus.IsConfigured)
|
||||
if (string.IsNullOrEmpty(issuerUrl))
|
||||
{
|
||||
return builder
|
||||
.Pass("No external OIDC provider configured (using local authority)")
|
||||
.WithEvidence("OIDC Status", eb =>
|
||||
{
|
||||
eb.Add("ExternalProvider", "NOT CONFIGURED");
|
||||
eb.Add("LocalAuthority", "ACTIVE");
|
||||
eb.Add("external_provider", "NOT_CONFIGURED");
|
||||
eb.Add("local_authority", "ACTIVE");
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
|
||||
var oidcStatus = await CheckOidcProviderAsync(context, issuerUrl, ct);
|
||||
|
||||
if (!oidcStatus.IsReachable)
|
||||
{
|
||||
return builder
|
||||
.Fail($"Cannot reach OIDC provider at {oidcStatus.ProviderUrl}")
|
||||
.Fail($"Cannot reach OIDC provider at {issuerUrl}")
|
||||
.WithEvidence("OIDC Status", eb =>
|
||||
{
|
||||
eb.Add("ProviderUrl", oidcStatus.ProviderUrl ?? "not set");
|
||||
eb.Add("Reachable", "NO");
|
||||
eb.Add("Error", oidcStatus.Error ?? "Connection failed");
|
||||
eb.Add("issuer_url", issuerUrl);
|
||||
eb.Add("discovery_reachable", "false");
|
||||
eb.Add("http_status_code", oidcStatus.HttpStatusCode?.ToString() ?? "null");
|
||||
eb.Add("error_message", oidcStatus.Error ?? "Connection failed");
|
||||
eb.Add("connection_error_type", oidcStatus.ConnectionErrorType ?? "unknown");
|
||||
})
|
||||
.WithCauses(
|
||||
"OIDC provider is down",
|
||||
@@ -76,9 +95,12 @@ public sealed class OidcProviderConnectivityCheck : IDoctorCheck
|
||||
"DNS resolution failure")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Test provider connectivity",
|
||||
"stella auth oidc test",
|
||||
$"curl -s {issuerUrl}/.well-known/openid-configuration",
|
||||
CommandType.Shell)
|
||||
.AddStep(2, "Check network configuration",
|
||||
.AddStep(2, "Check DNS resolution",
|
||||
$"nslookup {new Uri(issuerUrl).Host}",
|
||||
CommandType.Shell)
|
||||
.AddStep(3, "Check network configuration",
|
||||
"stella doctor --check check.network.dns",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
@@ -91,10 +113,13 @@ public sealed class OidcProviderConnectivityCheck : IDoctorCheck
|
||||
.Warn("OIDC discovery document has issues")
|
||||
.WithEvidence("OIDC Status", eb =>
|
||||
{
|
||||
eb.Add("ProviderUrl", oidcStatus.ProviderUrl ?? "not set");
|
||||
eb.Add("Reachable", "YES");
|
||||
eb.Add("DiscoveryValid", "PARTIAL");
|
||||
eb.Add("Warning", oidcStatus.DiscoveryWarning ?? "");
|
||||
eb.Add("issuer_url", issuerUrl);
|
||||
eb.Add("discovery_reachable", "true");
|
||||
eb.Add("discovery_response_ms", oidcStatus.DiscoveryResponseMs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("authorization_endpoint_present", oidcStatus.AuthorizationEndpointPresent.ToString().ToLowerInvariant());
|
||||
eb.Add("token_endpoint_present", oidcStatus.TokenEndpointPresent.ToString().ToLowerInvariant());
|
||||
eb.Add("jwks_uri_present", oidcStatus.JwksUriPresent.ToString().ToLowerInvariant());
|
||||
eb.Add("error_message", oidcStatus.DiscoveryWarning ?? "");
|
||||
})
|
||||
.WithCauses(
|
||||
"Discovery document missing required fields",
|
||||
@@ -102,34 +127,179 @@ public sealed class OidcProviderConnectivityCheck : IDoctorCheck
|
||||
"JWKS endpoint issues")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Validate discovery document",
|
||||
$"curl -s {issuerUrl}/.well-known/openid-configuration | jq .",
|
||||
CommandType.Shell)
|
||||
.AddStep(2, "Check OIDC provider configuration",
|
||||
"stella auth oidc validate",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (oidcStatus.JwksKeyCount == 0)
|
||||
{
|
||||
return builder
|
||||
.Warn("JWKS has no keys - token validation may fail")
|
||||
.WithEvidence("OIDC Status", eb =>
|
||||
{
|
||||
eb.Add("issuer_url", issuerUrl);
|
||||
eb.Add("discovery_reachable", "true");
|
||||
eb.Add("discovery_response_ms", oidcStatus.DiscoveryResponseMs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("authorization_endpoint_present", "true");
|
||||
eb.Add("token_endpoint_present", "true");
|
||||
eb.Add("jwks_uri_present", "true");
|
||||
eb.Add("jwks_key_count", "0");
|
||||
eb.Add("jwks_fetch_ms", oidcStatus.JwksFetchMs.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses(
|
||||
"JWKS endpoint returned empty key set",
|
||||
"Key rotation in progress",
|
||||
"OIDC provider misconfiguration")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Check JWKS endpoint",
|
||||
$"curl -s {oidcStatus.JwksUri} | jq .",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass("OIDC provider is reachable and configured correctly")
|
||||
.WithEvidence("OIDC Status", eb =>
|
||||
{
|
||||
eb.Add("ProviderUrl", oidcStatus.ProviderUrl ?? "not set");
|
||||
eb.Add("Reachable", "YES");
|
||||
eb.Add("DiscoveryValid", "YES");
|
||||
eb.Add("ResponseTimeMs", oidcStatus.ResponseTimeMs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("issuer_url", issuerUrl);
|
||||
eb.Add("discovery_reachable", "true");
|
||||
eb.Add("discovery_response_ms", oidcStatus.DiscoveryResponseMs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("authorization_endpoint_present", "true");
|
||||
eb.Add("token_endpoint_present", "true");
|
||||
eb.Add("jwks_uri_present", "true");
|
||||
eb.Add("jwks_key_count", oidcStatus.JwksKeyCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("jwks_fetch_ms", oidcStatus.JwksFetchMs.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
|
||||
private Task<OidcStatus> CheckOidcProviderAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
private async Task<OidcStatus> CheckOidcProviderAsync(DoctorPluginContext context, string issuerUrl, CancellationToken ct)
|
||||
{
|
||||
return Task.FromResult(new OidcStatus
|
||||
var result = new OidcStatus { ProviderUrl = issuerUrl };
|
||||
|
||||
try
|
||||
{
|
||||
IsConfigured = true,
|
||||
ProviderUrl = "https://auth.example.com",
|
||||
IsReachable = true,
|
||||
DiscoveryValid = true,
|
||||
ResponseTimeMs = 85
|
||||
});
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
// Fetch discovery document
|
||||
var discoveryUrl = $"{issuerUrl.TrimEnd('/')}/.well-known/openid-configuration";
|
||||
var discoveryStopwatch = Stopwatch.StartNew();
|
||||
|
||||
HttpResponseMessage discoveryResponse;
|
||||
try
|
||||
{
|
||||
discoveryResponse = await httpClient.GetAsync(discoveryUrl, ct);
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
result.IsReachable = false;
|
||||
result.Error = ex.Message;
|
||||
result.ConnectionErrorType = GetConnectionErrorType(ex);
|
||||
return result;
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
result.IsReachable = false;
|
||||
result.Error = "Request timed out";
|
||||
result.ConnectionErrorType = "timeout";
|
||||
return result;
|
||||
}
|
||||
|
||||
discoveryStopwatch.Stop();
|
||||
result.DiscoveryResponseMs = discoveryStopwatch.ElapsedMilliseconds;
|
||||
result.HttpStatusCode = (int)discoveryResponse.StatusCode;
|
||||
|
||||
if (!discoveryResponse.IsSuccessStatusCode)
|
||||
{
|
||||
result.IsReachable = true;
|
||||
result.DiscoveryValid = false;
|
||||
result.DiscoveryWarning = $"Discovery endpoint returned HTTP {(int)discoveryResponse.StatusCode}";
|
||||
return result;
|
||||
}
|
||||
|
||||
result.IsReachable = true;
|
||||
|
||||
// Parse discovery document
|
||||
var discoveryJson = await discoveryResponse.Content.ReadAsStringAsync(ct);
|
||||
using var discoveryDoc = JsonDocument.Parse(discoveryJson);
|
||||
var root = discoveryDoc.RootElement;
|
||||
|
||||
// Check required endpoints
|
||||
result.AuthorizationEndpointPresent = root.TryGetProperty("authorization_endpoint", out _);
|
||||
result.TokenEndpointPresent = root.TryGetProperty("token_endpoint", out _);
|
||||
result.JwksUriPresent = root.TryGetProperty("jwks_uri", out var jwksUriElement);
|
||||
|
||||
if (!result.AuthorizationEndpointPresent || !result.TokenEndpointPresent || !result.JwksUriPresent)
|
||||
{
|
||||
result.DiscoveryValid = false;
|
||||
var missing = new List<string>();
|
||||
if (!result.AuthorizationEndpointPresent) missing.Add("authorization_endpoint");
|
||||
if (!result.TokenEndpointPresent) missing.Add("token_endpoint");
|
||||
if (!result.JwksUriPresent) missing.Add("jwks_uri");
|
||||
result.DiscoveryWarning = $"Missing required fields: {string.Join(", ", missing)}";
|
||||
return result;
|
||||
}
|
||||
|
||||
result.DiscoveryValid = true;
|
||||
result.JwksUri = jwksUriElement.GetString();
|
||||
|
||||
// Fetch JWKS
|
||||
if (!string.IsNullOrEmpty(result.JwksUri))
|
||||
{
|
||||
var jwksStopwatch = Stopwatch.StartNew();
|
||||
try
|
||||
{
|
||||
var jwksResponse = await httpClient.GetAsync(result.JwksUri, ct);
|
||||
jwksStopwatch.Stop();
|
||||
result.JwksFetchMs = jwksStopwatch.ElapsedMilliseconds;
|
||||
|
||||
if (jwksResponse.IsSuccessStatusCode)
|
||||
{
|
||||
var jwksJson = await jwksResponse.Content.ReadAsStringAsync(ct);
|
||||
using var jwksDoc = JsonDocument.Parse(jwksJson);
|
||||
if (jwksDoc.RootElement.TryGetProperty("keys", out var keysArray) && keysArray.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
result.JwksKeyCount = keysArray.GetArrayLength();
|
||||
}
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// JWKS fetch failed but discovery worked
|
||||
result.JwksKeyCount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
result.IsReachable = false;
|
||||
result.Error = ex.Message;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
private static string GetConnectionErrorType(HttpRequestException ex)
|
||||
{
|
||||
var message = ex.Message.ToLowerInvariant();
|
||||
if (message.Contains("ssl") || message.Contains("tls") || message.Contains("certificate"))
|
||||
return "ssl_error";
|
||||
if (message.Contains("name") || message.Contains("dns") || message.Contains("resolve"))
|
||||
return "dns_failure";
|
||||
if (message.Contains("refused") || message.Contains("actively refused"))
|
||||
return "refused";
|
||||
if (message.Contains("timeout"))
|
||||
return "timeout";
|
||||
return "connection_failed";
|
||||
}
|
||||
|
||||
private sealed class OidcStatus
|
||||
@@ -139,7 +309,15 @@ public sealed class OidcProviderConnectivityCheck : IDoctorCheck
|
||||
public bool IsReachable { get; set; }
|
||||
public bool DiscoveryValid { get; set; }
|
||||
public string? Error { get; set; }
|
||||
public string? ConnectionErrorType { get; set; }
|
||||
public string? DiscoveryWarning { get; set; }
|
||||
public long ResponseTimeMs { get; set; }
|
||||
public long DiscoveryResponseMs { get; set; }
|
||||
public int? HttpStatusCode { get; set; }
|
||||
public bool AuthorizationEndpointPresent { get; set; }
|
||||
public bool TokenEndpointPresent { get; set; }
|
||||
public bool JwksUriPresent { get; set; }
|
||||
public string? JwksUri { get; set; }
|
||||
public int JwksKeyCount { get; set; }
|
||||
public long JwksFetchMs { get; set; }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,216 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// AttestationSigningHealthCheck.cs
|
||||
// Sprint: SPRINT_20260118_024_Doctor_evidence_compliance_health
|
||||
// Task: COMPL-003 - Implement AttestationSigningHealthCheck
|
||||
// Description: Monitor attestation signing capability
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Compliance.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Monitors attestation signing health and key availability.
|
||||
/// </summary>
|
||||
public sealed class AttestationSigningHealthCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.compliance";
|
||||
private const string CategoryName = "Compliance";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.compliance.attestation-signing";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Attestation Signing Health";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Monitor signing key availability and attestation capability";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Fail;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["compliance", "attestation", "signing", "crypto"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var attestorUrl = context.Configuration["Attestor:Url"]
|
||||
?? context.Configuration["Services:Attestor:Url"];
|
||||
return !string.IsNullOrEmpty(attestorUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var attestorUrl = context.Configuration["Attestor:Url"]
|
||||
?? context.Configuration["Services:Attestor:Url"]
|
||||
?? "http://localhost:5082";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{attestorUrl.TrimEnd('/')}/api/v1/signing/status",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Fail($"Cannot retrieve signing status: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Signing Status", eb =>
|
||||
{
|
||||
eb.Add("attestor_url", attestorUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses("Attestor service unavailable", "Authentication failure")
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var json = await response.Content.ReadAsStringAsync(ct);
|
||||
var status = ParseSigningStatus(json);
|
||||
|
||||
// Check key availability
|
||||
if (!status.KeyAvailable)
|
||||
{
|
||||
return builder
|
||||
.Fail("Signing key not available - cannot create attestations")
|
||||
.WithEvidence("Signing Status", eb =>
|
||||
{
|
||||
eb.Add("key_available", "false");
|
||||
eb.Add("key_type", status.KeyType ?? "unknown");
|
||||
eb.Add("last_error", status.LastError ?? "none");
|
||||
})
|
||||
.WithCauses(
|
||||
"HSM/KMS connectivity issue",
|
||||
"Key rotation in progress",
|
||||
"Key expired or revoked",
|
||||
"Permission denied")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "Check key status",
|
||||
"stella attestor key status",
|
||||
CommandType.Stella);
|
||||
rb.AddStep(2, "Verify HSM/KMS connectivity",
|
||||
"stella attestor hsm test",
|
||||
CommandType.Stella);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
// Check key expiration
|
||||
if (status.KeyExpiresAt.HasValue)
|
||||
{
|
||||
var daysUntilExpiry = (status.KeyExpiresAt.Value - DateTimeOffset.UtcNow).TotalDays;
|
||||
|
||||
if (daysUntilExpiry <= 0)
|
||||
{
|
||||
return builder
|
||||
.Fail("Signing key has expired")
|
||||
.WithEvidence("Signing Status", eb =>
|
||||
{
|
||||
eb.Add("key_expired", "true");
|
||||
eb.Add("expired_at", status.KeyExpiresAt.Value.ToString("o", CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses("Key not rotated before expiry")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Rotate signing key", "stella attestor key rotate", CommandType.Stella))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (daysUntilExpiry <= 30)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Signing key expires in {daysUntilExpiry:F0} days")
|
||||
.WithEvidence("Signing Status", eb =>
|
||||
{
|
||||
eb.Add("key_available", "true");
|
||||
eb.Add("days_until_expiry", daysUntilExpiry.ToString("F0", CultureInfo.InvariantCulture));
|
||||
eb.Add("expires_at", status.KeyExpiresAt.Value.ToString("o", CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses("Key approaching end of validity")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Schedule key rotation", "stella attestor key rotate --schedule", CommandType.Stella))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"Signing healthy ({status.KeyType}, {status.SignaturesLast24h} signatures in 24h)")
|
||||
.WithEvidence("Signing Status", eb =>
|
||||
{
|
||||
eb.Add("key_available", "true");
|
||||
eb.Add("key_type", status.KeyType ?? "unknown");
|
||||
eb.Add("signatures_24h", status.SignaturesLast24h.ToString(CultureInfo.InvariantCulture));
|
||||
if (status.KeyExpiresAt.HasValue)
|
||||
eb.Add("expires_at", status.KeyExpiresAt.Value.ToString("o", CultureInfo.InvariantCulture));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Fail($"Cannot check signing health: {ex.Message}")
|
||||
.WithEvidence("Signing Status", eb => eb.Add("error_message", ex.Message))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Signing health check timed out")
|
||||
.WithEvidence("Signing Status", eb => eb.Add("connection_error_type", "timeout"))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static SigningStatus ParseSigningStatus(string json)
|
||||
{
|
||||
var status = new SigningStatus();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
status.KeyAvailable = doc.RootElement.TryGetProperty("keyAvailable", out var ka) && ka.GetBoolean();
|
||||
status.KeyType = doc.RootElement.TryGetProperty("keyType", out var kt) ? kt.GetString() : null;
|
||||
status.SignaturesLast24h = doc.RootElement.TryGetProperty("signaturesLast24h", out var s24) ? s24.GetInt32() : 0;
|
||||
status.LastError = doc.RootElement.TryGetProperty("lastError", out var le) ? le.GetString() : null;
|
||||
|
||||
if (doc.RootElement.TryGetProperty("keyExpiresAt", out var ke) &&
|
||||
DateTimeOffset.TryParse(ke.GetString(), out var expiresAt))
|
||||
{
|
||||
status.KeyExpiresAt = expiresAt;
|
||||
}
|
||||
}
|
||||
catch { }
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
private sealed class SigningStatus
|
||||
{
|
||||
public bool KeyAvailable { get; set; }
|
||||
public string? KeyType { get; set; }
|
||||
public int SignaturesLast24h { get; set; }
|
||||
public DateTimeOffset? KeyExpiresAt { get; set; }
|
||||
public string? LastError { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,196 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// AuditReadinessCheck.cs
|
||||
// Sprint: SPRINT_20260118_024_Doctor_evidence_compliance_health
|
||||
// Task: COMPL-005 - Implement AuditReadinessCheck
|
||||
// Description: Verify system is ready for compliance audits
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Compliance.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Verifies that the system is ready for compliance audits.
|
||||
/// Checks evidence availability, retention policies, and audit trails.
|
||||
/// </summary>
|
||||
public sealed class AuditReadinessCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.compliance";
|
||||
private const string CategoryName = "Compliance";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.compliance.audit-readiness";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Audit Readiness";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Verify system is ready for compliance audits";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["compliance", "audit", "evidence"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(10);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var evidenceUrl = context.Configuration["EvidenceLocker:Url"]
|
||||
?? context.Configuration["Services:EvidenceLocker:Url"];
|
||||
return !string.IsNullOrEmpty(evidenceUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var evidenceUrl = context.Configuration["EvidenceLocker:Url"]
|
||||
?? context.Configuration["Services:EvidenceLocker:Url"]
|
||||
?? "http://localhost:5080";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(15);
|
||||
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{evidenceUrl.TrimEnd('/')}/api/v1/evidence/audit-readiness",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check audit readiness: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Audit Readiness", eb =>
|
||||
{
|
||||
eb.Add("evidence_locker_url", evidenceUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var json = await response.Content.ReadAsStringAsync(ct);
|
||||
var status = ParseAuditStatus(json);
|
||||
|
||||
var issues = new List<string>();
|
||||
|
||||
if (!status.RetentionPolicyConfigured)
|
||||
issues.Add("No retention policy configured");
|
||||
if (!status.AuditLogEnabled)
|
||||
issues.Add("Audit logging disabled");
|
||||
if (!status.BackupVerified)
|
||||
issues.Add("Backup not verified");
|
||||
if (status.OldestEvidenceAge < status.RequiredRetentionDays)
|
||||
issues.Add($"Evidence retention {status.OldestEvidenceAge}d < required {status.RequiredRetentionDays}d");
|
||||
|
||||
if (issues.Count >= 3)
|
||||
{
|
||||
return builder
|
||||
.Fail($"Audit readiness critical: {issues.Count} issues")
|
||||
.WithEvidence("Audit Readiness", eb =>
|
||||
{
|
||||
eb.Add("issues_count", issues.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("retention_policy_configured", status.RetentionPolicyConfigured.ToString().ToLowerInvariant());
|
||||
eb.Add("audit_log_enabled", status.AuditLogEnabled.ToString().ToLowerInvariant());
|
||||
eb.Add("backup_verified", status.BackupVerified.ToString().ToLowerInvariant());
|
||||
eb.Add("evidence_count", status.EvidenceCount.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses(issues.ToArray())
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "Configure retention policy",
|
||||
"stella evidence retention set --days 365",
|
||||
CommandType.Stella);
|
||||
rb.AddStep(2, "Enable audit logging",
|
||||
"stella audit enable",
|
||||
CommandType.Stella);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (issues.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Audit readiness issues: {string.Join(", ", issues)}")
|
||||
.WithEvidence("Audit Readiness", eb =>
|
||||
{
|
||||
eb.Add("issues_count", issues.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("evidence_count", status.EvidenceCount.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses(issues.ToArray())
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"Audit ready ({status.EvidenceCount} records, {status.OldestEvidenceAge}d retention)")
|
||||
.WithEvidence("Audit Readiness", eb =>
|
||||
{
|
||||
eb.Add("evidence_count", status.EvidenceCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("oldest_evidence_days", status.OldestEvidenceAge.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("retention_policy_configured", "true");
|
||||
eb.Add("audit_log_enabled", "true");
|
||||
eb.Add("backup_verified", "true");
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check audit readiness: {ex.Message}")
|
||||
.WithEvidence("Audit Status", eb => eb.Add("error_message", ex.Message))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Audit readiness check timed out")
|
||||
.WithEvidence("Audit Status", eb => eb.Add("connection_error_type", "timeout"))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static AuditStatus ParseAuditStatus(string json)
|
||||
{
|
||||
var status = new AuditStatus();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
status.RetentionPolicyConfigured = doc.RootElement.TryGetProperty("retentionPolicyConfigured", out var rpc) && rpc.GetBoolean();
|
||||
status.AuditLogEnabled = doc.RootElement.TryGetProperty("auditLogEnabled", out var ale) && ale.GetBoolean();
|
||||
status.BackupVerified = doc.RootElement.TryGetProperty("backupVerified", out var bv) && bv.GetBoolean();
|
||||
status.EvidenceCount = doc.RootElement.TryGetProperty("evidenceCount", out var ec) ? ec.GetInt32() : 0;
|
||||
status.OldestEvidenceAge = doc.RootElement.TryGetProperty("oldestEvidenceAgeDays", out var oea) ? oea.GetInt32() : 0;
|
||||
status.RequiredRetentionDays = doc.RootElement.TryGetProperty("requiredRetentionDays", out var rrd) ? rrd.GetInt32() : 365;
|
||||
}
|
||||
catch { }
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
private sealed class AuditStatus
|
||||
{
|
||||
public bool RetentionPolicyConfigured { get; set; }
|
||||
public bool AuditLogEnabled { get; set; }
|
||||
public bool BackupVerified { get; set; }
|
||||
public int EvidenceCount { get; set; }
|
||||
public int OldestEvidenceAge { get; set; }
|
||||
public int RequiredRetentionDays { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,191 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// ComplianceFrameworkCheck.cs
|
||||
// Sprint: SPRINT_20260118_024_Doctor_evidence_compliance_health
|
||||
// Task: COMPL-007 - Implement ComplianceFrameworkCheck
|
||||
// Description: Verify compliance framework requirements are met
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Compliance.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Verifies that configured compliance framework requirements are met.
|
||||
/// Supports SOC2, FedRAMP, HIPAA, PCI-DSS, and custom frameworks.
|
||||
/// </summary>
|
||||
public sealed class ComplianceFrameworkCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.compliance";
|
||||
private const string CategoryName = "Compliance";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.compliance.framework";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Compliance Framework";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Verify compliance framework requirements are met";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["compliance", "framework", "soc2", "fedramp"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(10);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var frameworks = context.Configuration["Compliance:Frameworks"];
|
||||
return !string.IsNullOrEmpty(frameworks);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var policyUrl = context.Configuration["Policy:Url"]
|
||||
?? context.Configuration["Services:Policy:Url"]
|
||||
?? "http://localhost:5050";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(15);
|
||||
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{policyUrl.TrimEnd('/')}/api/v1/compliance/status",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check compliance status: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Compliance Status", eb =>
|
||||
{
|
||||
eb.Add("policy_url", policyUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var json = await response.Content.ReadAsStringAsync(ct);
|
||||
var status = ParseComplianceStatus(json);
|
||||
|
||||
if (status.FailingControls > 0)
|
||||
{
|
||||
return builder
|
||||
.Fail($"{status.FailingControls} compliance controls failing ({status.Framework})")
|
||||
.WithEvidence("Compliance Status", eb =>
|
||||
{
|
||||
eb.Add("framework", status.Framework);
|
||||
eb.Add("total_controls", status.TotalControls.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("passing_controls", status.PassingControls.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("failing_controls", status.FailingControls.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("compliance_score", status.ComplianceScore.ToString("P0", CultureInfo.InvariantCulture));
|
||||
if (status.FirstFailingControl != null)
|
||||
eb.Add("first_failing_control", status.FirstFailingControl);
|
||||
})
|
||||
.WithCauses(
|
||||
"Control requirements not implemented",
|
||||
"Evidence gaps",
|
||||
"Policy violations detected",
|
||||
"Configuration drift from baseline")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "List failing controls",
|
||||
"stella compliance audit --failing",
|
||||
CommandType.Stella);
|
||||
rb.AddStep(2, "Review remediation guidance",
|
||||
"stella compliance remediate --plan",
|
||||
CommandType.Stella);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (status.ComplianceScore < 1.0)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Compliance score {status.ComplianceScore:P0} ({status.Framework})")
|
||||
.WithEvidence("Compliance Status", eb =>
|
||||
{
|
||||
eb.Add("framework", status.Framework);
|
||||
eb.Add("compliance_score", status.ComplianceScore.ToString("P0", CultureInfo.InvariantCulture));
|
||||
eb.Add("passing_controls", status.PassingControls.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("total_controls", status.TotalControls.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses("Some controls not fully implemented")
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"Compliance healthy ({status.Framework}: {status.PassingControls}/{status.TotalControls} controls)")
|
||||
.WithEvidence("Compliance Status", eb =>
|
||||
{
|
||||
eb.Add("framework", status.Framework);
|
||||
eb.Add("compliance_score", status.ComplianceScore.ToString("P0", CultureInfo.InvariantCulture));
|
||||
eb.Add("passing_controls", status.PassingControls.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("total_controls", status.TotalControls.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check compliance: {ex.Message}")
|
||||
.WithEvidence("Compliance Status", eb => eb.Add("error_message", ex.Message))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Compliance check timed out")
|
||||
.WithEvidence("Compliance Status", eb => eb.Add("connection_error_type", "timeout"))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static ComplianceStatus ParseComplianceStatus(string json)
|
||||
{
|
||||
var status = new ComplianceStatus();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
status.Framework = doc.RootElement.TryGetProperty("framework", out var f) ? f.GetString() ?? "unknown" : "unknown";
|
||||
status.TotalControls = doc.RootElement.TryGetProperty("totalControls", out var tc) ? tc.GetInt32() : 0;
|
||||
status.PassingControls = doc.RootElement.TryGetProperty("passingControls", out var pc) ? pc.GetInt32() : 0;
|
||||
status.FailingControls = doc.RootElement.TryGetProperty("failingControls", out var fc) ? fc.GetInt32() : 0;
|
||||
status.ComplianceScore = doc.RootElement.TryGetProperty("complianceScore", out var cs) ? cs.GetDouble() : 0;
|
||||
status.FirstFailingControl = doc.RootElement.TryGetProperty("firstFailingControl", out var ffc) ? ffc.GetString() : null;
|
||||
}
|
||||
catch { }
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
private sealed class ComplianceStatus
|
||||
{
|
||||
public string Framework { get; set; } = "unknown";
|
||||
public int TotalControls { get; set; }
|
||||
public int PassingControls { get; set; }
|
||||
public int FailingControls { get; set; }
|
||||
public double ComplianceScore { get; set; }
|
||||
public string? FirstFailingControl { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,198 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// EvidenceExportReadinessCheck.cs
|
||||
// Sprint: SPRINT_20260118_024_Doctor_evidence_compliance_health
|
||||
// Task: COMPL-008 - Implement EvidenceExportReadinessCheck
|
||||
// Description: Verify evidence can be exported for auditors
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Compliance.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Verifies that evidence can be exported in auditor-ready formats.
|
||||
/// </summary>
|
||||
public sealed class EvidenceExportReadinessCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.compliance";
|
||||
private const string CategoryName = "Compliance";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.compliance.export-readiness";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Evidence Export Readiness";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Verify evidence can be exported for auditors";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["compliance", "export", "audit"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var evidenceUrl = context.Configuration["EvidenceLocker:Url"]
|
||||
?? context.Configuration["Services:EvidenceLocker:Url"];
|
||||
return !string.IsNullOrEmpty(evidenceUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var evidenceUrl = context.Configuration["EvidenceLocker:Url"]
|
||||
?? context.Configuration["Services:EvidenceLocker:Url"]
|
||||
?? "http://localhost:5080";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{evidenceUrl.TrimEnd('/')}/api/v1/evidence/export/capabilities",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check export capabilities: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Export Status", eb =>
|
||||
{
|
||||
eb.Add("evidence_locker_url", evidenceUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var json = await response.Content.ReadAsStringAsync(ct);
|
||||
var status = ParseExportStatus(json);
|
||||
|
||||
var issues = new List<string>();
|
||||
|
||||
if (!status.PdfExportAvailable)
|
||||
issues.Add("PDF export not available");
|
||||
if (!status.JsonExportAvailable)
|
||||
issues.Add("JSON export not available");
|
||||
if (!status.SignedBundleAvailable)
|
||||
issues.Add("Signed bundle export not available");
|
||||
if (!status.ChainOfCustodyAvailable)
|
||||
issues.Add("Chain of custody report not available");
|
||||
|
||||
if (issues.Count >= 2)
|
||||
{
|
||||
return builder
|
||||
.Fail($"Export capabilities limited: {string.Join(", ", issues)}")
|
||||
.WithEvidence("Export Status", eb =>
|
||||
{
|
||||
eb.Add("pdf_export", status.PdfExportAvailable.ToString().ToLowerInvariant());
|
||||
eb.Add("json_export", status.JsonExportAvailable.ToString().ToLowerInvariant());
|
||||
eb.Add("signed_bundle", status.SignedBundleAvailable.ToString().ToLowerInvariant());
|
||||
eb.Add("chain_of_custody", status.ChainOfCustodyAvailable.ToString().ToLowerInvariant());
|
||||
})
|
||||
.WithCauses(
|
||||
"Export dependencies not installed",
|
||||
"Signing keys not configured for bundles",
|
||||
"Template files missing")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "Check export configuration",
|
||||
"stella evidence export --check",
|
||||
CommandType.Stella);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (issues.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Some export formats unavailable: {string.Join(", ", issues)}")
|
||||
.WithEvidence("Export Status", eb =>
|
||||
{
|
||||
eb.Add("available_formats", string.Join(", ", status.AvailableFormats));
|
||||
eb.Add("issues_count", issues.Count.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"Export ready ({string.Join(", ", status.AvailableFormats)})")
|
||||
.WithEvidence("Export Status", eb =>
|
||||
{
|
||||
eb.Add("pdf_export", "true");
|
||||
eb.Add("json_export", "true");
|
||||
eb.Add("signed_bundle", "true");
|
||||
eb.Add("chain_of_custody", "true");
|
||||
eb.Add("available_formats", string.Join(", ", status.AvailableFormats));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check export readiness: {ex.Message}")
|
||||
.WithEvidence("Export Status", eb => eb.Add("error_message", ex.Message))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Export readiness check timed out")
|
||||
.WithEvidence("Export Status", eb => eb.Add("connection_error_type", "timeout"))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static ExportStatus ParseExportStatus(string json)
|
||||
{
|
||||
var status = new ExportStatus();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
status.PdfExportAvailable = doc.RootElement.TryGetProperty("pdfExportAvailable", out var pdf) && pdf.GetBoolean();
|
||||
status.JsonExportAvailable = doc.RootElement.TryGetProperty("jsonExportAvailable", out var jsonExport) && jsonExport.GetBoolean();
|
||||
status.SignedBundleAvailable = doc.RootElement.TryGetProperty("signedBundleAvailable", out var sb) && sb.GetBoolean();
|
||||
status.ChainOfCustodyAvailable = doc.RootElement.TryGetProperty("chainOfCustodyAvailable", out var coc) && coc.GetBoolean();
|
||||
|
||||
if (doc.RootElement.TryGetProperty("availableFormats", out var formats) && formats.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
status.AvailableFormats = formats.EnumerateArray()
|
||||
.Select(f => f.GetString() ?? string.Empty)
|
||||
.Where(f => !string.IsNullOrEmpty(f))
|
||||
.ToList();
|
||||
}
|
||||
}
|
||||
catch { }
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
private sealed class ExportStatus
|
||||
{
|
||||
public bool PdfExportAvailable { get; set; }
|
||||
public bool JsonExportAvailable { get; set; }
|
||||
public bool SignedBundleAvailable { get; set; }
|
||||
public bool ChainOfCustodyAvailable { get; set; }
|
||||
public List<string> AvailableFormats { get; set; } = [];
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,189 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// EvidenceGenerationRateCheck.cs
|
||||
// Sprint: SPRINT_20260118_024_Doctor_evidence_compliance_health
|
||||
// Task: COMPL-002 - Implement EvidenceGenerationRateCheck
|
||||
// Description: Monitor evidence generation rate and success
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Compliance.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Monitors evidence generation rate and success metrics.
|
||||
/// Tracks whether evidence is being generated at expected rates.
|
||||
/// </summary>
|
||||
public sealed class EvidenceGenerationRateCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.compliance";
|
||||
private const string CategoryName = "Compliance";
|
||||
private const double MinSuccessRate = 0.95;
|
||||
private const double WarnSuccessRate = 0.99;
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.compliance.evidence-rate";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Evidence Generation Rate";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Monitor evidence generation success rate";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Fail;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["compliance", "evidence", "attestation"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var evidenceUrl = context.Configuration["EvidenceLocker:Url"]
|
||||
?? context.Configuration["Services:EvidenceLocker:Url"];
|
||||
return !string.IsNullOrEmpty(evidenceUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var evidenceUrl = context.Configuration["EvidenceLocker:Url"]
|
||||
?? context.Configuration["Services:EvidenceLocker:Url"]
|
||||
?? "http://localhost:5080";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{evidenceUrl.TrimEnd('/')}/api/v1/evidence/metrics",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot retrieve evidence metrics: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Evidence Metrics", eb =>
|
||||
{
|
||||
eb.Add("evidence_locker_url", evidenceUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var json = await response.Content.ReadAsStringAsync(ct);
|
||||
var metrics = ParseMetrics(json);
|
||||
|
||||
// Check success rate
|
||||
if (metrics.SuccessRate < MinSuccessRate)
|
||||
{
|
||||
return builder
|
||||
.Fail($"Evidence generation rate critical: {metrics.SuccessRate:P1} (minimum: {MinSuccessRate:P0})")
|
||||
.WithEvidence("Evidence Metrics", eb =>
|
||||
{
|
||||
eb.Add("success_rate", metrics.SuccessRate.ToString("P2", CultureInfo.InvariantCulture));
|
||||
eb.Add("total_generated_24h", metrics.TotalGenerated.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("failed_24h", metrics.Failed.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("pending_24h", metrics.Pending.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses(
|
||||
"Evidence generation service failures",
|
||||
"Database connectivity issues",
|
||||
"Signing key unavailable",
|
||||
"Storage quota exceeded")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "Check evidence locker logs",
|
||||
"stella logs evidence-locker --since 1h",
|
||||
CommandType.Stella);
|
||||
rb.AddStep(2, "Verify signing keys",
|
||||
"stella evidence keys status",
|
||||
CommandType.Stella);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (metrics.SuccessRate < WarnSuccessRate)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Evidence generation rate degraded: {metrics.SuccessRate:P1}")
|
||||
.WithEvidence("Evidence Metrics", eb =>
|
||||
{
|
||||
eb.Add("success_rate", metrics.SuccessRate.ToString("P2", CultureInfo.InvariantCulture));
|
||||
eb.Add("failed_24h", metrics.Failed.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses("Intermittent failures", "High load")
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"Evidence generation healthy ({metrics.SuccessRate:P1} success, {metrics.TotalGenerated} in 24h)")
|
||||
.WithEvidence("Evidence Metrics", eb =>
|
||||
{
|
||||
eb.Add("success_rate", metrics.SuccessRate.ToString("P2", CultureInfo.InvariantCulture));
|
||||
eb.Add("total_generated_24h", metrics.TotalGenerated.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("avg_generation_time_ms", metrics.AvgGenerationTimeMs.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check evidence rate: {ex.Message}")
|
||||
.WithEvidence("Evidence Status", eb => eb.Add("error_message", ex.Message))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Evidence rate check timed out")
|
||||
.WithEvidence("Evidence Status", eb => eb.Add("connection_error_type", "timeout"))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static EvidenceMetrics ParseMetrics(string json)
|
||||
{
|
||||
var metrics = new EvidenceMetrics();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
metrics.TotalGenerated = doc.RootElement.TryGetProperty("totalGenerated24h", out var tg) ? tg.GetInt32() : 0;
|
||||
metrics.Failed = doc.RootElement.TryGetProperty("failed24h", out var f) ? f.GetInt32() : 0;
|
||||
metrics.Pending = doc.RootElement.TryGetProperty("pending", out var p) ? p.GetInt32() : 0;
|
||||
metrics.AvgGenerationTimeMs = doc.RootElement.TryGetProperty("avgGenerationTimeMs", out var agt) ? agt.GetInt32() : 0;
|
||||
metrics.SuccessRate = metrics.TotalGenerated > 0
|
||||
? (double)(metrics.TotalGenerated - metrics.Failed) / metrics.TotalGenerated
|
||||
: 1.0;
|
||||
}
|
||||
catch { }
|
||||
|
||||
return metrics;
|
||||
}
|
||||
|
||||
private sealed class EvidenceMetrics
|
||||
{
|
||||
public int TotalGenerated { get; set; }
|
||||
public int Failed { get; set; }
|
||||
public int Pending { get; set; }
|
||||
public int AvgGenerationTimeMs { get; set; }
|
||||
public double SuccessRate { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,190 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// EvidenceTamperCheck.cs
|
||||
// Sprint: SPRINT_20260118_024_Doctor_evidence_compliance_health
|
||||
// Task: COMPL-006 - Implement EvidenceTamperCheck
|
||||
// Description: Detect evidence tampering or integrity issues
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Compliance.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Detects evidence tampering or integrity issues.
|
||||
/// Verifies signatures and hash chains.
|
||||
/// </summary>
|
||||
public sealed class EvidenceTamperCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.compliance";
|
||||
private const string CategoryName = "Compliance";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.compliance.evidence-integrity";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Evidence Integrity";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Detect evidence tampering or integrity issues";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Fail;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["compliance", "security", "integrity", "signatures"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(30);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var evidenceUrl = context.Configuration["EvidenceLocker:Url"]
|
||||
?? context.Configuration["Services:EvidenceLocker:Url"];
|
||||
return !string.IsNullOrEmpty(evidenceUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var evidenceUrl = context.Configuration["EvidenceLocker:Url"]
|
||||
?? context.Configuration["Services:EvidenceLocker:Url"]
|
||||
?? "http://localhost:5080";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(60);
|
||||
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{evidenceUrl.TrimEnd('/')}/api/v1/evidence/integrity-check",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot verify evidence integrity: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Integrity Check", eb =>
|
||||
{
|
||||
eb.Add("evidence_locker_url", evidenceUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var json = await response.Content.ReadAsStringAsync(ct);
|
||||
var status = ParseIntegrityStatus(json);
|
||||
|
||||
if (status.TamperedCount > 0)
|
||||
{
|
||||
return builder
|
||||
.Fail($"CRITICAL: {status.TamperedCount} evidence records show tampering")
|
||||
.WithEvidence("Integrity Check", eb =>
|
||||
{
|
||||
eb.Add("tampered_count", status.TamperedCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("verified_count", status.VerifiedCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("total_checked", status.TotalChecked.ToString(CultureInfo.InvariantCulture));
|
||||
if (status.FirstTamperedId != null)
|
||||
eb.Add("first_tampered_id", status.FirstTamperedId);
|
||||
})
|
||||
.WithCauses(
|
||||
"Evidence modification after signing",
|
||||
"Storage corruption",
|
||||
"Malicious tampering",
|
||||
"Key/certificate mismatch")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "List tampered evidence", "stella evidence audit --tampered", CommandType.Stella)
|
||||
.WithSafetyNote("DO NOT delete tampered evidence - preserve for investigation");
|
||||
rb.AddStep(2, "Investigate security incident", "Contact security team", CommandType.Manual)
|
||||
.RequireBackup();
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (status.VerificationErrors > 0)
|
||||
{
|
||||
return builder
|
||||
.Warn($"{status.VerificationErrors} evidence records could not be verified")
|
||||
.WithEvidence("Integrity Check", eb =>
|
||||
{
|
||||
eb.Add("verification_errors", status.VerificationErrors.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("verified_count", status.VerifiedCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("total_checked", status.TotalChecked.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses(
|
||||
"Missing signing certificates",
|
||||
"Certificate expiration",
|
||||
"Unsupported signature algorithm")
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"Evidence integrity verified ({status.VerifiedCount}/{status.TotalChecked} records)")
|
||||
.WithEvidence("Integrity Check", eb =>
|
||||
{
|
||||
eb.Add("verified_count", status.VerifiedCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("total_checked", status.TotalChecked.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("tampered_count", "0");
|
||||
eb.Add("hash_chain_valid", status.HashChainValid.ToString().ToLowerInvariant());
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot verify evidence integrity: {ex.Message}")
|
||||
.WithEvidence("Integrity Status", eb => eb.Add("error_message", ex.Message))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Evidence integrity check timed out")
|
||||
.WithEvidence("Integrity Status", eb => eb.Add("connection_error_type", "timeout"))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static IntegrityStatus ParseIntegrityStatus(string json)
|
||||
{
|
||||
var status = new IntegrityStatus();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
status.TotalChecked = doc.RootElement.TryGetProperty("totalChecked", out var tc) ? tc.GetInt32() : 0;
|
||||
status.VerifiedCount = doc.RootElement.TryGetProperty("verifiedCount", out var vc) ? vc.GetInt32() : 0;
|
||||
status.TamperedCount = doc.RootElement.TryGetProperty("tamperedCount", out var tmc) ? tmc.GetInt32() : 0;
|
||||
status.VerificationErrors = doc.RootElement.TryGetProperty("verificationErrors", out var ve) ? ve.GetInt32() : 0;
|
||||
status.HashChainValid = doc.RootElement.TryGetProperty("hashChainValid", out var hcv) && hcv.GetBoolean();
|
||||
status.FirstTamperedId = doc.RootElement.TryGetProperty("firstTamperedId", out var fti) ? fti.GetString() : null;
|
||||
}
|
||||
catch { }
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
private sealed class IntegrityStatus
|
||||
{
|
||||
public int TotalChecked { get; set; }
|
||||
public int VerifiedCount { get; set; }
|
||||
public int TamperedCount { get; set; }
|
||||
public int VerificationErrors { get; set; }
|
||||
public bool HashChainValid { get; set; }
|
||||
public string? FirstTamperedId { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,185 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// ProvenanceCompletenessCheck.cs
|
||||
// Sprint: SPRINT_20260118_024_Doctor_evidence_compliance_health
|
||||
// Task: COMPL-004 - Implement ProvenanceCompletenessCheck
|
||||
// Description: Verify provenance records are complete
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Compliance.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Verifies that provenance records are complete for releases.
|
||||
/// </summary>
|
||||
public sealed class ProvenanceCompletenessCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.compliance";
|
||||
private const string CategoryName = "Compliance";
|
||||
private const double MinCompletenessRate = 0.99;
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.compliance.provenance-completeness";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Provenance Completeness";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Verify provenance records exist for all releases";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Fail;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["compliance", "provenance", "slsa"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(10);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var provenanceUrl = context.Configuration["Provenance:Url"]
|
||||
?? context.Configuration["Services:Provenance:Url"];
|
||||
return !string.IsNullOrEmpty(provenanceUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var provenanceUrl = context.Configuration["Provenance:Url"]
|
||||
?? context.Configuration["Services:Provenance:Url"]
|
||||
?? "http://localhost:5084";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(15);
|
||||
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{provenanceUrl.TrimEnd('/')}/api/v1/provenance/completeness",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check provenance completeness: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Provenance Status", eb =>
|
||||
{
|
||||
eb.Add("provenance_url", provenanceUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var json = await response.Content.ReadAsStringAsync(ct);
|
||||
var status = ParseProvenanceStatus(json);
|
||||
|
||||
if (status.CompletenessRate < MinCompletenessRate)
|
||||
{
|
||||
return builder
|
||||
.Fail($"Provenance incomplete: {status.CompletenessRate:P1} ({status.MissingCount} releases without provenance)")
|
||||
.WithEvidence("Provenance Completeness", eb =>
|
||||
{
|
||||
eb.Add("completeness_rate", status.CompletenessRate.ToString("P2", CultureInfo.InvariantCulture));
|
||||
eb.Add("total_releases", status.TotalReleases.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("missing_count", status.MissingCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("slsa_level", status.SlsaLevel.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses(
|
||||
"Build pipeline not generating provenance",
|
||||
"Provenance upload failures",
|
||||
"Legacy releases without provenance",
|
||||
"Manual deployments bypassing pipeline")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "List releases missing provenance",
|
||||
"stella provenance audit --missing",
|
||||
CommandType.Stella);
|
||||
rb.AddStep(2, "Generate backfill provenance",
|
||||
"stella provenance backfill --dry-run",
|
||||
CommandType.Manual);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
// Check SLSA level
|
||||
if (status.SlsaLevel < 2)
|
||||
{
|
||||
return builder
|
||||
.Warn($"SLSA level is {status.SlsaLevel} (recommend level 2+)")
|
||||
.WithEvidence("Provenance Completeness", eb =>
|
||||
{
|
||||
eb.Add("completeness_rate", status.CompletenessRate.ToString("P2", CultureInfo.InvariantCulture));
|
||||
eb.Add("slsa_level", status.SlsaLevel.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses("Build system not meeting SLSA requirements")
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"Provenance complete ({status.CompletenessRate:P1}, SLSA L{status.SlsaLevel})")
|
||||
.WithEvidence("Provenance Completeness", eb =>
|
||||
{
|
||||
eb.Add("completeness_rate", status.CompletenessRate.ToString("P2", CultureInfo.InvariantCulture));
|
||||
eb.Add("total_releases", status.TotalReleases.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("slsa_level", status.SlsaLevel.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check provenance: {ex.Message}")
|
||||
.WithEvidence("Provenance Status", eb => eb.Add("error_message", ex.Message))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Provenance check timed out")
|
||||
.WithEvidence("Provenance Status", eb => eb.Add("connection_error_type", "timeout"))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static ProvenanceStatus ParseProvenanceStatus(string json)
|
||||
{
|
||||
var status = new ProvenanceStatus();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
status.TotalReleases = doc.RootElement.TryGetProperty("totalReleases", out var tr) ? tr.GetInt32() : 0;
|
||||
status.MissingCount = doc.RootElement.TryGetProperty("missingCount", out var mc) ? mc.GetInt32() : 0;
|
||||
status.SlsaLevel = doc.RootElement.TryGetProperty("slsaLevel", out var sl) ? sl.GetInt32() : 0;
|
||||
status.CompletenessRate = status.TotalReleases > 0
|
||||
? (double)(status.TotalReleases - status.MissingCount) / status.TotalReleases
|
||||
: 1.0;
|
||||
}
|
||||
catch { }
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
private sealed class ProvenanceStatus
|
||||
{
|
||||
public int TotalReleases { get; set; }
|
||||
public int MissingCount { get; set; }
|
||||
public int SlsaLevel { get; set; }
|
||||
public double CompletenessRate { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// CompliancePlugin.cs
|
||||
// Sprint: SPRINT_20260118_024_Doctor_evidence_compliance_health
|
||||
// Task: COMPL-001 - Create Compliance plugin scaffold
|
||||
// Description: Doctor plugin for evidence and compliance health checks
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using StellaOps.Doctor.Plugin.Compliance.Checks;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Compliance;
|
||||
|
||||
/// <summary>
|
||||
/// Doctor plugin for evidence generation and compliance health monitoring.
|
||||
/// Checks attestation signing, provenance completeness, audit readiness.
|
||||
/// </summary>
|
||||
public sealed class CompliancePlugin : IDoctorPlugin
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public string PluginId => "stellaops.doctor.compliance";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string DisplayName => "Evidence & Compliance";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Checks for evidence generation, attestation signing, and compliance posture";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Category => "Compliance";
|
||||
|
||||
/// <inheritdoc />
|
||||
public Version Version => new(1, 0, 0);
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<IDoctorCheck> GetChecks() =>
|
||||
[
|
||||
new EvidenceGenerationRateCheck(),
|
||||
new AttestationSigningHealthCheck(),
|
||||
new ProvenanceCompletenessCheck(),
|
||||
new AuditReadinessCheck(),
|
||||
new EvidenceTamperCheck(),
|
||||
new ComplianceFrameworkCheck(),
|
||||
new EvidenceExportReadinessCheck()
|
||||
];
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// ServiceCollectionExtensions.cs
|
||||
// Sprint: SPRINT_20260118_024_Doctor_evidence_compliance_health
|
||||
// Task: COMPL-001 - Create Compliance plugin scaffold
|
||||
// Description: DI extension for Compliance plugin registration
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Compliance.DependencyInjection;
|
||||
|
||||
/// <summary>
|
||||
/// Extension methods for registering the Compliance Doctor plugin.
|
||||
/// </summary>
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Adds the Doctor Compliance plugin for evidence and compliance health checks.
|
||||
/// </summary>
|
||||
public static IServiceCollection AddDoctorCompliancePlugin(this IServiceCollection services)
|
||||
{
|
||||
services.AddSingleton<IDoctorPlugin, CompliancePlugin>();
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<RootNamespace>StellaOps.Doctor.Plugin.Compliance</RootNamespace>
|
||||
<Description>Doctor health checks for evidence generation and compliance posture</Description>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\__Libraries\StellaOps.Doctor\StellaOps.Doctor.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -1,12 +1,14 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// FipsComplianceCheck.cs
|
||||
// Sprint: SPRINT_20260117_025_Doctor_coverage_expansion
|
||||
// Task: DOC-EXP-003 - Regional Crypto Compliance Checks
|
||||
// Description: Health check for FIPS 140-2 mode validation
|
||||
// Sprint: SPRINT_20260118_015_Doctor_check_quality_improvements
|
||||
// Task: DQUAL-003 - Fix FipsComplianceCheck algorithm verification
|
||||
// Description: Health check for FIPS 140-2 mode validation with actual algorithm testing
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Security.Cryptography;
|
||||
using Microsoft.Win32;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
@@ -56,18 +58,28 @@ public sealed class FipsComplianceCheck : IDoctorCheck
|
||||
?? context.Configuration["Cryptography:Profile"]
|
||||
?? "default";
|
||||
|
||||
// Check .NET FIPS mode
|
||||
var fipsEnabled = IsFipsEnabled();
|
||||
// Get comprehensive FIPS status
|
||||
var fipsStatus = GetFipsStatus();
|
||||
var algorithmCheck = VerifyFipsAlgorithms();
|
||||
|
||||
if (!fipsEnabled)
|
||||
if (!fipsStatus.FipsModeEnabled)
|
||||
{
|
||||
return Task.FromResult(builder
|
||||
.Fail("FIPS 140-2 mode not enabled")
|
||||
.WithEvidence("FIPS Status", eb =>
|
||||
{
|
||||
eb.Add("CryptoProfile", cryptoProfile);
|
||||
eb.Add("FipsEnabled", "false");
|
||||
eb.Add("Platform", RuntimeInformation.OSDescription);
|
||||
eb.Add("fips_mode_enabled", "false");
|
||||
eb.Add("platform", fipsStatus.Platform);
|
||||
eb.Add("crypto_provider", fipsStatus.CryptoProvider);
|
||||
eb.Add("openssl_fips_module_loaded", fipsStatus.OpenSslFipsModuleLoaded.ToString().ToLowerInvariant());
|
||||
eb.Add("crypto_profile", cryptoProfile);
|
||||
eb.Add("algorithms_tested", string.Join(", ", algorithmCheck.AvailableAlgorithms.Concat(algorithmCheck.MissingAlgorithms)));
|
||||
eb.Add("algorithms_available", string.Join(", ", algorithmCheck.AvailableAlgorithms));
|
||||
eb.Add("algorithms_missing", string.Join(", ", algorithmCheck.MissingAlgorithms));
|
||||
foreach (var (alg, result) in algorithmCheck.TestResults)
|
||||
{
|
||||
eb.Add($"test_{alg.ToLowerInvariant().Replace("-", "_")}", result);
|
||||
}
|
||||
})
|
||||
.WithCauses(
|
||||
"FIPS mode not enabled in operating system",
|
||||
@@ -85,16 +97,16 @@ public sealed class FipsComplianceCheck : IDoctorCheck
|
||||
CommandType.Shell)
|
||||
.AddStep(3, "Restart application",
|
||||
"sudo systemctl restart stellaops",
|
||||
CommandType.Shell);
|
||||
CommandType.Manual);
|
||||
}
|
||||
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
|
||||
{
|
||||
rb.AddStep(1, "Enable FIPS via Group Policy",
|
||||
"Set 'System cryptography: Use FIPS compliant algorithms' in Local Security Policy",
|
||||
CommandType.Manual)
|
||||
.AddStep(2, "Or via registry",
|
||||
.AddStep(2, "Or via registry (requires admin and reboot)",
|
||||
"reg add HKLM\\System\\CurrentControlSet\\Control\\Lsa\\FipsAlgorithmPolicy /v Enabled /t REG_DWORD /d 1 /f",
|
||||
CommandType.Shell);
|
||||
CommandType.Manual);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -108,24 +120,35 @@ public sealed class FipsComplianceCheck : IDoctorCheck
|
||||
}
|
||||
|
||||
// Verify FIPS-compliant algorithms are available
|
||||
var algorithmCheck = VerifyFipsAlgorithms();
|
||||
if (!algorithmCheck.AllAvailable)
|
||||
{
|
||||
return Task.FromResult(builder
|
||||
.Warn($"Some FIPS algorithms unavailable: {string.Join(", ", algorithmCheck.MissingAlgorithms)}")
|
||||
.WithEvidence("FIPS Status", eb =>
|
||||
{
|
||||
eb.Add("CryptoProfile", cryptoProfile);
|
||||
eb.Add("FipsEnabled", "true");
|
||||
eb.Add("AvailableAlgorithms", string.Join(", ", algorithmCheck.AvailableAlgorithms));
|
||||
eb.Add("MissingAlgorithms", string.Join(", ", algorithmCheck.MissingAlgorithms));
|
||||
eb.Add("fips_mode_enabled", "true");
|
||||
eb.Add("platform", fipsStatus.Platform);
|
||||
eb.Add("crypto_provider", fipsStatus.CryptoProvider);
|
||||
eb.Add("openssl_fips_module_loaded", fipsStatus.OpenSslFipsModuleLoaded.ToString().ToLowerInvariant());
|
||||
eb.Add("crypto_profile", cryptoProfile);
|
||||
eb.Add("algorithms_tested", string.Join(", ", algorithmCheck.AvailableAlgorithms.Concat(algorithmCheck.MissingAlgorithms)));
|
||||
eb.Add("algorithms_available", string.Join(", ", algorithmCheck.AvailableAlgorithms));
|
||||
eb.Add("algorithms_missing", string.Join(", ", algorithmCheck.MissingAlgorithms));
|
||||
foreach (var (alg, result) in algorithmCheck.TestResults)
|
||||
{
|
||||
eb.Add($"test_{alg.ToLowerInvariant().Replace("-", "_")}", result);
|
||||
}
|
||||
})
|
||||
.WithCauses(
|
||||
"OpenSSL version missing FIPS module",
|
||||
"FIPS provider not fully configured")
|
||||
"FIPS provider not fully configured",
|
||||
"Algorithm test failed")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Check OpenSSL FIPS provider",
|
||||
"openssl list -providers",
|
||||
CommandType.Shell)
|
||||
.AddStep(2, "Verify crypto algorithms",
|
||||
"openssl list -digest-algorithms",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build());
|
||||
@@ -135,10 +158,15 @@ public sealed class FipsComplianceCheck : IDoctorCheck
|
||||
.Pass("FIPS 140-2 mode enabled and verified")
|
||||
.WithEvidence("FIPS Status", eb =>
|
||||
{
|
||||
eb.Add("CryptoProfile", cryptoProfile);
|
||||
eb.Add("FipsEnabled", "true");
|
||||
eb.Add("VerifiedAlgorithms", string.Join(", ", algorithmCheck.AvailableAlgorithms));
|
||||
eb.Add("Status", "compliant");
|
||||
eb.Add("fips_mode_enabled", "true");
|
||||
eb.Add("platform", fipsStatus.Platform);
|
||||
eb.Add("crypto_provider", fipsStatus.CryptoProvider);
|
||||
eb.Add("openssl_fips_module_loaded", fipsStatus.OpenSslFipsModuleLoaded.ToString().ToLowerInvariant());
|
||||
eb.Add("crypto_profile", cryptoProfile);
|
||||
eb.Add("algorithms_tested", string.Join(", ", algorithmCheck.AvailableAlgorithms));
|
||||
eb.Add("algorithms_available", string.Join(", ", algorithmCheck.AvailableAlgorithms));
|
||||
eb.Add("algorithms_missing", "none");
|
||||
eb.Add("status", "compliant");
|
||||
})
|
||||
.Build());
|
||||
}
|
||||
@@ -148,7 +176,6 @@ public sealed class FipsComplianceCheck : IDoctorCheck
|
||||
try
|
||||
{
|
||||
// Check if running in FIPS mode
|
||||
// On Windows, check registry; on Linux, check /proc/sys/crypto/fips_enabled
|
||||
if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
|
||||
{
|
||||
var fipsFile = "/proc/sys/crypto/fips_enabled";
|
||||
@@ -160,8 +187,24 @@ public sealed class FipsComplianceCheck : IDoctorCheck
|
||||
}
|
||||
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
|
||||
{
|
||||
// Check Windows FIPS policy
|
||||
// This is a simplified check - real implementation would use registry
|
||||
// Check Windows FIPS policy via registry
|
||||
try
|
||||
{
|
||||
using var key = Registry.LocalMachine.OpenSubKey(
|
||||
@"System\CurrentControlSet\Control\Lsa\FipsAlgorithmPolicy");
|
||||
if (key != null)
|
||||
{
|
||||
var value = key.GetValue("Enabled");
|
||||
if (value is int intVal && intVal == 1)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Registry access failed, fall back to env var check
|
||||
}
|
||||
|
||||
// Also check environment variable
|
||||
return Environment.GetEnvironmentVariable("DOTNET_SYSTEM_NET_SECURITY_USEFIPSVALIDATED") == "1";
|
||||
}
|
||||
|
||||
@@ -177,30 +220,187 @@ public sealed class FipsComplianceCheck : IDoctorCheck
|
||||
{
|
||||
var available = new List<string>();
|
||||
var missing = new List<string>();
|
||||
var required = new[] { "AES-256-GCM", "SHA-256", "SHA-384", "SHA-512", "RSA-2048", "ECDSA-P256" };
|
||||
var testResults = new Dictionary<string, string>();
|
||||
|
||||
// Simplified check - in production would verify each algorithm
|
||||
foreach (var alg in required)
|
||||
// Test AES-256-GCM
|
||||
try
|
||||
{
|
||||
try
|
||||
using var aes = Aes.Create();
|
||||
aes.KeySize = 256;
|
||||
aes.Mode = CipherMode.ECB; // GCM not directly testable in managed code
|
||||
aes.GenerateKey();
|
||||
aes.GenerateIV();
|
||||
using var encryptor = aes.CreateEncryptor();
|
||||
var testData = new byte[16];
|
||||
var encrypted = encryptor.TransformFinalBlock(testData, 0, testData.Length);
|
||||
if (encrypted.Length > 0)
|
||||
{
|
||||
// Basic availability check
|
||||
available.Add(alg);
|
||||
available.Add("AES-256");
|
||||
testResults["AES-256"] = "pass";
|
||||
}
|
||||
catch
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
missing.Add("AES-256");
|
||||
testResults["AES-256"] = $"fail: {ex.Message}";
|
||||
}
|
||||
|
||||
// Test SHA-256
|
||||
try
|
||||
{
|
||||
using var sha256 = SHA256.Create();
|
||||
var hash = sha256.ComputeHash(new byte[32]);
|
||||
if (hash.Length == 32)
|
||||
{
|
||||
missing.Add(alg);
|
||||
available.Add("SHA-256");
|
||||
testResults["SHA-256"] = "pass";
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
missing.Add("SHA-256");
|
||||
testResults["SHA-256"] = $"fail: {ex.Message}";
|
||||
}
|
||||
|
||||
// Test SHA-384
|
||||
try
|
||||
{
|
||||
using var sha384 = SHA384.Create();
|
||||
var hash = sha384.ComputeHash(new byte[32]);
|
||||
if (hash.Length == 48)
|
||||
{
|
||||
available.Add("SHA-384");
|
||||
testResults["SHA-384"] = "pass";
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
missing.Add("SHA-384");
|
||||
testResults["SHA-384"] = $"fail: {ex.Message}";
|
||||
}
|
||||
|
||||
// Test SHA-512
|
||||
try
|
||||
{
|
||||
using var sha512 = SHA512.Create();
|
||||
var hash = sha512.ComputeHash(new byte[32]);
|
||||
if (hash.Length == 64)
|
||||
{
|
||||
available.Add("SHA-512");
|
||||
testResults["SHA-512"] = "pass";
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
missing.Add("SHA-512");
|
||||
testResults["SHA-512"] = $"fail: {ex.Message}";
|
||||
}
|
||||
|
||||
// Test RSA-2048
|
||||
try
|
||||
{
|
||||
using var rsa = RSA.Create(2048);
|
||||
var testData = new byte[32];
|
||||
var signature = rsa.SignData(testData, HashAlgorithmName.SHA256, RSASignaturePadding.Pkcs1);
|
||||
var valid = rsa.VerifyData(testData, signature, HashAlgorithmName.SHA256, RSASignaturePadding.Pkcs1);
|
||||
if (valid)
|
||||
{
|
||||
available.Add("RSA-2048");
|
||||
testResults["RSA-2048"] = "pass";
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
missing.Add("RSA-2048");
|
||||
testResults["RSA-2048"] = $"fail: {ex.Message}";
|
||||
}
|
||||
|
||||
// Test ECDSA-P256
|
||||
try
|
||||
{
|
||||
using var ecdsa = ECDsa.Create(ECCurve.NamedCurves.nistP256);
|
||||
var testData = new byte[32];
|
||||
var signature = ecdsa.SignData(testData, HashAlgorithmName.SHA256);
|
||||
var valid = ecdsa.VerifyData(testData, signature, HashAlgorithmName.SHA256);
|
||||
if (valid)
|
||||
{
|
||||
available.Add("ECDSA-P256");
|
||||
testResults["ECDSA-P256"] = "pass";
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
missing.Add("ECDSA-P256");
|
||||
testResults["ECDSA-P256"] = $"fail: {ex.Message}";
|
||||
}
|
||||
|
||||
return new FipsAlgorithmCheckResult(
|
||||
AllAvailable: missing.Count == 0,
|
||||
AvailableAlgorithms: available,
|
||||
MissingAlgorithms: missing);
|
||||
MissingAlgorithms: missing,
|
||||
TestResults: testResults);
|
||||
}
|
||||
|
||||
private static FipsStatus GetFipsStatus()
|
||||
{
|
||||
var status = new FipsStatus();
|
||||
|
||||
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
|
||||
{
|
||||
status.Platform = "windows";
|
||||
status.CryptoProvider = "bcrypt";
|
||||
}
|
||||
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
|
||||
{
|
||||
status.Platform = "linux";
|
||||
status.CryptoProvider = "openssl";
|
||||
|
||||
// Check if OpenSSL FIPS module is loaded
|
||||
try
|
||||
{
|
||||
var opensslFipsPath = "/etc/pki/fips/fips.conf";
|
||||
var altFipsPath = "/usr/local/ssl/fips-2.0/lib/fipscanister.o";
|
||||
status.OpenSslFipsModuleLoaded = File.Exists(opensslFipsPath) || File.Exists(altFipsPath);
|
||||
|
||||
// Try to detect from openssl providers
|
||||
var providersPath = "/etc/ssl/openssl.cnf";
|
||||
if (File.Exists(providersPath))
|
||||
{
|
||||
var content = File.ReadAllText(providersPath);
|
||||
status.OpenSslFipsModuleLoaded |= content.Contains("fips", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
status.OpenSslFipsModuleLoaded = false;
|
||||
}
|
||||
}
|
||||
else if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
|
||||
{
|
||||
status.Platform = "macos";
|
||||
status.CryptoProvider = "corecrypto";
|
||||
}
|
||||
else
|
||||
{
|
||||
status.Platform = "unknown";
|
||||
status.CryptoProvider = "managed";
|
||||
}
|
||||
|
||||
status.FipsModeEnabled = IsFipsEnabled();
|
||||
return status;
|
||||
}
|
||||
|
||||
private sealed record FipsAlgorithmCheckResult(
|
||||
bool AllAvailable,
|
||||
List<string> AvailableAlgorithms,
|
||||
List<string> MissingAlgorithms);
|
||||
List<string> MissingAlgorithms,
|
||||
Dictionary<string, string> TestResults);
|
||||
|
||||
private sealed class FipsStatus
|
||||
{
|
||||
public bool FipsModeEnabled { get; set; }
|
||||
public string Platform { get; set; } = "unknown";
|
||||
public string CryptoProvider { get; set; } = "unknown";
|
||||
public bool OpenSslFipsModuleLoaded { get; set; }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,291 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// EnvironmentCapacityCheck.cs
|
||||
// Sprint: SPRINT_20260118_017_Doctor_environment_health
|
||||
// Task: ENVH-004 - Implement EnvironmentCapacityCheck
|
||||
// Description: Check environment resource capacity
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Environment.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Checks environment resource capacity.
|
||||
/// Monitors CPU, memory, storage, and deployment slot availability.
|
||||
/// </summary>
|
||||
public sealed class EnvironmentCapacityCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.environment";
|
||||
private const string CategoryName = "Environment Health";
|
||||
|
||||
private const double HighUsageWarningPercent = 75.0;
|
||||
private const double CriticalUsagePercent = 90.0;
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.environment.capacity";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Environment Capacity";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Check environment resource capacity";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["environment", "capacity", "resources", "cpu", "memory", "storage"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(10);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"];
|
||||
return !string.IsNullOrEmpty(orchestratorUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"]
|
||||
?? "http://localhost:5080";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
// Get capacity report
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{orchestratorUrl.TrimEnd('/')}/api/v1/environments/capacity",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot retrieve capacity report: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Capacity Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var capacityJson = await response.Content.ReadAsStringAsync(ct);
|
||||
var capacities = ParseCapacities(capacityJson);
|
||||
|
||||
if (capacities.Count == 0)
|
||||
{
|
||||
return builder
|
||||
.Pass("No environments to check capacity")
|
||||
.WithEvidence("Capacity", eb =>
|
||||
{
|
||||
eb.Add("environment_count", "0");
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
|
||||
var criticalEnvs = new List<(string Name, string Resource, double Usage)>();
|
||||
var warningEnvs = new List<(string Name, string Resource, double Usage)>();
|
||||
|
||||
foreach (var cap in capacities)
|
||||
{
|
||||
CheckResource(cap.Name, "cpu", cap.CpuUsagePercent, criticalEnvs, warningEnvs);
|
||||
CheckResource(cap.Name, "memory", cap.MemoryUsagePercent, criticalEnvs, warningEnvs);
|
||||
CheckResource(cap.Name, "storage", cap.StorageUsagePercent, criticalEnvs, warningEnvs);
|
||||
|
||||
// Check deployment slots
|
||||
if (cap.MaxDeployments > 0)
|
||||
{
|
||||
var deployUsage = (double)cap.ActiveDeployments / cap.MaxDeployments * 100;
|
||||
CheckResource(cap.Name, "deployments", deployUsage, criticalEnvs, warningEnvs);
|
||||
}
|
||||
}
|
||||
|
||||
if (criticalEnvs.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Fail($"{criticalEnvs.Count} environment(s) at critical capacity")
|
||||
.WithEvidence("Capacity", eb =>
|
||||
{
|
||||
eb.Add("environment_count", capacities.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("critical_resource_count", criticalEnvs.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("warning_resource_count", warningEnvs.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("critical_details", string.Join("; ", criticalEnvs.Select(c => $"{c.Name}/{c.Resource}:{c.Usage:F1}%")));
|
||||
AddCapacityEvidence(eb, capacities);
|
||||
})
|
||||
.WithCauses(
|
||||
"Resource exhaustion approaching",
|
||||
"Runaway process consuming resources",
|
||||
"Unexpected workload increase",
|
||||
"Resource limits too restrictive")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "View capacity details",
|
||||
$"stella env capacity {criticalEnvs[0].Name}",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "Scale up resources",
|
||||
$"stella env scale {criticalEnvs[0].Name} --{criticalEnvs[0].Resource} +20%",
|
||||
CommandType.Manual);
|
||||
rb.AddStep(3, "Or remove unused deployments",
|
||||
$"stella env cleanup {criticalEnvs[0].Name}",
|
||||
CommandType.Manual);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (warningEnvs.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Warn($"{warningEnvs.Count} environment resource(s) above 75% usage")
|
||||
.WithEvidence("Capacity", eb =>
|
||||
{
|
||||
eb.Add("environment_count", capacities.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("critical_resource_count", "0");
|
||||
eb.Add("warning_resource_count", warningEnvs.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("warning_details", string.Join("; ", warningEnvs.Select(w => $"{w.Name}/{w.Resource}:{w.Usage:F1}%")));
|
||||
AddCapacityEvidence(eb, capacities);
|
||||
})
|
||||
.WithCauses(
|
||||
"Normal growth approaching limits",
|
||||
"Temporary workload spike")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "Monitor capacity trend",
|
||||
"stella env capacity --trend",
|
||||
CommandType.Shell);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"{capacities.Count} environment(s) have adequate capacity")
|
||||
.WithEvidence("Capacity", eb =>
|
||||
{
|
||||
eb.Add("environment_count", capacities.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("critical_resource_count", "0");
|
||||
eb.Add("warning_resource_count", "0");
|
||||
AddCapacityEvidence(eb, capacities);
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check capacity: {ex.Message}")
|
||||
.WithEvidence("Capacity Status", eb =>
|
||||
{
|
||||
eb.Add("error_message", ex.Message);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Capacity check timed out")
|
||||
.WithEvidence("Capacity Status", eb =>
|
||||
{
|
||||
eb.Add("connection_error_type", "timeout");
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static void CheckResource(
|
||||
string envName, string resource, double usage,
|
||||
List<(string, string, double)> critical,
|
||||
List<(string, string, double)> warning)
|
||||
{
|
||||
if (usage >= CriticalUsagePercent)
|
||||
{
|
||||
critical.Add((envName, resource, usage));
|
||||
}
|
||||
else if (usage >= HighUsageWarningPercent)
|
||||
{
|
||||
warning.Add((envName, resource, usage));
|
||||
}
|
||||
}
|
||||
|
||||
private static void AddCapacityEvidence(EvidenceBuilder eb, List<CapacityInfo> capacities)
|
||||
{
|
||||
foreach (var cap in capacities)
|
||||
{
|
||||
var prefix = $"env_{cap.Name.ToLowerInvariant().Replace(" ", "_").Replace("-", "_")}";
|
||||
eb.Add($"{prefix}_cpu_percent", cap.CpuUsagePercent.ToString("F1", CultureInfo.InvariantCulture));
|
||||
eb.Add($"{prefix}_memory_percent", cap.MemoryUsagePercent.ToString("F1", CultureInfo.InvariantCulture));
|
||||
eb.Add($"{prefix}_storage_percent", cap.StorageUsagePercent.ToString("F1", CultureInfo.InvariantCulture));
|
||||
}
|
||||
}
|
||||
|
||||
private static List<CapacityInfo> ParseCapacities(string json)
|
||||
{
|
||||
var capacities = new List<CapacityInfo>();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
var capsArray = doc.RootElement.ValueKind == JsonValueKind.Array
|
||||
? doc.RootElement
|
||||
: doc.RootElement.TryGetProperty("environments", out var arr) ? arr : default;
|
||||
|
||||
if (capsArray.ValueKind != JsonValueKind.Array)
|
||||
return capacities;
|
||||
|
||||
foreach (var cap in capsArray.EnumerateArray())
|
||||
{
|
||||
var name = cap.TryGetProperty("name", out var nameEl) ? nameEl.GetString() : null;
|
||||
if (string.IsNullOrEmpty(name)) continue;
|
||||
|
||||
var cpuTotal = cap.TryGetProperty("totalCpuMillicores", out var cpuTEl) ? cpuTEl.GetInt64() : 0;
|
||||
var cpuUsed = cap.TryGetProperty("usedCpuMillicores", out var cpuUEl) ? cpuUEl.GetInt64() : 0;
|
||||
var memTotal = cap.TryGetProperty("totalMemoryBytes", out var memTEl) ? memTEl.GetInt64() : 0;
|
||||
var memUsed = cap.TryGetProperty("usedMemoryBytes", out var memUEl) ? memUEl.GetInt64() : 0;
|
||||
var storTotal = cap.TryGetProperty("totalStorageBytes", out var storTEl) ? storTEl.GetInt64() : 0;
|
||||
var storUsed = cap.TryGetProperty("usedStorageBytes", out var storUEl) ? storUEl.GetInt64() : 0;
|
||||
var maxDeploy = cap.TryGetProperty("maxConcurrentDeployments", out var maxDEl) ? maxDEl.GetInt32() : 0;
|
||||
var activeDeploy = cap.TryGetProperty("activeDeployments", out var actDEl) ? actDEl.GetInt32() : 0;
|
||||
|
||||
capacities.Add(new CapacityInfo
|
||||
{
|
||||
Name = name,
|
||||
CpuUsagePercent = cpuTotal > 0 ? (double)cpuUsed / cpuTotal * 100 : 0,
|
||||
MemoryUsagePercent = memTotal > 0 ? (double)memUsed / memTotal * 100 : 0,
|
||||
StorageUsagePercent = storTotal > 0 ? (double)storUsed / storTotal * 100 : 0,
|
||||
MaxDeployments = maxDeploy,
|
||||
ActiveDeployments = activeDeploy
|
||||
});
|
||||
}
|
||||
}
|
||||
catch { }
|
||||
|
||||
return capacities;
|
||||
}
|
||||
|
||||
private sealed class CapacityInfo
|
||||
{
|
||||
public required string Name { get; init; }
|
||||
public double CpuUsagePercent { get; init; }
|
||||
public double MemoryUsagePercent { get; init; }
|
||||
public double StorageUsagePercent { get; init; }
|
||||
public int MaxDeployments { get; init; }
|
||||
public int ActiveDeployments { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,401 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// EnvironmentConnectivityCheck.cs
|
||||
// Sprint: SPRINT_20260118_017_Doctor_environment_health
|
||||
// Task: ENVH-002 - Implement EnvironmentConnectivityCheck
|
||||
// Description: Verify connectivity to each configured environment agent
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Net.Security;
|
||||
using System.Security.Cryptography.X509Certificates;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Environment.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Verifies connectivity to each configured environment agent.
|
||||
/// Measures latency, verifies authentication, and checks TLS certificate validity.
|
||||
/// </summary>
|
||||
public sealed class EnvironmentConnectivityCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.environment";
|
||||
private const string CategoryName = "Environment Health";
|
||||
private const int HighLatencyThresholdMs = 500;
|
||||
private const int CertExpiryWarningDays = 30;
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.environment.connectivity";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Environment Connectivity";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Verify connectivity to environment agents";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["environment", "connectivity", "agent", "network"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(30);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"];
|
||||
return !string.IsNullOrEmpty(orchestratorUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"]
|
||||
?? "http://localhost:5080";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(30);
|
||||
|
||||
// Get list of environments
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{orchestratorUrl.TrimEnd('/')}/api/v1/environments",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot retrieve environments: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Connectivity Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var envsJson = await response.Content.ReadAsStringAsync(ct);
|
||||
var environments = ParseEnvironments(envsJson);
|
||||
|
||||
if (environments.Count == 0)
|
||||
{
|
||||
return builder
|
||||
.Pass("No environments configured")
|
||||
.WithEvidence("Connectivity", eb =>
|
||||
{
|
||||
eb.Add("total_environments", "0");
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
|
||||
// Check connectivity to each environment
|
||||
var results = new List<ConnectivityInfo>();
|
||||
var unreachable = new List<string>();
|
||||
var highLatency = new List<(string Name, int LatencyMs)>();
|
||||
var certWarnings = new List<(string Name, int DaysUntilExpiry)>();
|
||||
|
||||
foreach (var env in environments)
|
||||
{
|
||||
var connResult = await CheckEnvironmentConnectivityAsync(
|
||||
httpClient, env, context.TimeProvider, ct);
|
||||
results.Add(connResult);
|
||||
|
||||
if (!connResult.Reachable)
|
||||
{
|
||||
unreachable.Add(env.Name);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (connResult.LatencyMs > HighLatencyThresholdMs)
|
||||
{
|
||||
highLatency.Add((env.Name, connResult.LatencyMs));
|
||||
}
|
||||
if (connResult.TlsDaysUntilExpiry.HasValue &&
|
||||
connResult.TlsDaysUntilExpiry.Value <= CertExpiryWarningDays)
|
||||
{
|
||||
certWarnings.Add((env.Name, connResult.TlsDaysUntilExpiry.Value));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var reachableCount = environments.Count - unreachable.Count;
|
||||
|
||||
// Determine severity
|
||||
if (unreachable.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Fail($"{unreachable.Count} environment(s) unreachable")
|
||||
.WithEvidence("Connectivity", eb =>
|
||||
{
|
||||
eb.Add("total_environments", environments.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("reachable_environments", reachableCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("unreachable_environments", string.Join(", ", unreachable));
|
||||
eb.Add("high_latency_environments", string.Join(", ", highLatency.Select(h => $"{h.Name}:{h.LatencyMs}ms")));
|
||||
eb.Add("cert_expiring_soon", string.Join(", ", certWarnings.Select(c => $"{c.Name}:{c.DaysUntilExpiry}d")));
|
||||
AddPerEnvironmentEvidence(eb, results);
|
||||
})
|
||||
.WithCauses(
|
||||
"Environment agent not running",
|
||||
"Network connectivity issue",
|
||||
"Firewall blocking connection",
|
||||
"Agent authentication failed")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "Check environment agent status",
|
||||
$"stella env ping {unreachable[0]}",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "View agent logs",
|
||||
$"stella env logs {unreachable[0]}",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(3, "Test network connectivity",
|
||||
"# Check firewall rules and network routes to environment agent",
|
||||
CommandType.Manual);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (highLatency.Count > 0 || certWarnings.Count > 0)
|
||||
{
|
||||
var warnings = new List<string>();
|
||||
if (highLatency.Count > 0) warnings.Add($"{highLatency.Count} high latency");
|
||||
if (certWarnings.Count > 0) warnings.Add($"{certWarnings.Count} cert expiring soon");
|
||||
|
||||
return builder
|
||||
.Warn($"Environment connectivity issues: {string.Join(", ", warnings)}")
|
||||
.WithEvidence("Connectivity", eb =>
|
||||
{
|
||||
eb.Add("total_environments", environments.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("reachable_environments", reachableCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("unreachable_environments", "");
|
||||
eb.Add("high_latency_environments", string.Join(", ", highLatency.Select(h => $"{h.Name}:{h.LatencyMs}ms")));
|
||||
eb.Add("cert_expiring_soon", string.Join(", ", certWarnings.Select(c => $"{c.Name}:{c.DaysUntilExpiry}d")));
|
||||
AddPerEnvironmentEvidence(eb, results);
|
||||
})
|
||||
.WithCauses(
|
||||
"Network congestion",
|
||||
"TLS certificate approaching expiry",
|
||||
"Geographic latency")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
if (certWarnings.Count > 0)
|
||||
{
|
||||
rb.AddStep(1, "Renew TLS certificate",
|
||||
$"stella env cert renew {certWarnings[0].Name}",
|
||||
CommandType.Manual);
|
||||
}
|
||||
if (highLatency.Count > 0)
|
||||
{
|
||||
rb.AddStep(2, "Investigate latency",
|
||||
$"stella env diagnose {highLatency[0].Name} --network",
|
||||
CommandType.Shell);
|
||||
}
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"{environments.Count} environment(s) reachable")
|
||||
.WithEvidence("Connectivity", eb =>
|
||||
{
|
||||
eb.Add("total_environments", environments.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("reachable_environments", reachableCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("unreachable_environments", "");
|
||||
eb.Add("environment_names", string.Join(", ", environments.Select(e => e.Name)));
|
||||
AddPerEnvironmentEvidence(eb, results);
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check environments: {ex.Message}")
|
||||
.WithEvidence("Connectivity Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("error_message", ex.Message);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Environment connectivity check timed out")
|
||||
.WithEvidence("Connectivity Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("connection_error_type", "timeout");
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<ConnectivityInfo> CheckEnvironmentConnectivityAsync(
|
||||
HttpClient httpClient,
|
||||
EnvironmentBasicInfo env,
|
||||
TimeProvider timeProvider,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var result = new ConnectivityInfo
|
||||
{
|
||||
EnvironmentName = env.Name,
|
||||
AgentEndpoint = MaskEndpoint(env.AgentEndpoint)
|
||||
};
|
||||
|
||||
if (string.IsNullOrEmpty(env.AgentEndpoint))
|
||||
{
|
||||
result.Reachable = false;
|
||||
result.ErrorMessage = "No agent endpoint configured";
|
||||
return result;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var stopwatch = System.Diagnostics.Stopwatch.StartNew();
|
||||
|
||||
// Create a handler that captures TLS certificate info
|
||||
using var handler = new HttpClientHandler();
|
||||
X509Certificate2? serverCert = null;
|
||||
|
||||
handler.ServerCertificateCustomValidationCallback = (message, cert, chain, errors) =>
|
||||
{
|
||||
if (cert != null)
|
||||
{
|
||||
serverCert = new X509Certificate2(cert);
|
||||
}
|
||||
return errors == SslPolicyErrors.None;
|
||||
};
|
||||
|
||||
using var client = new HttpClient(handler) { Timeout = TimeSpan.FromSeconds(10) };
|
||||
|
||||
var response = await client.GetAsync(
|
||||
$"{env.AgentEndpoint.TrimEnd('/')}/health",
|
||||
ct);
|
||||
|
||||
stopwatch.Stop();
|
||||
result.LatencyMs = (int)stopwatch.ElapsedMilliseconds;
|
||||
result.Reachable = response.IsSuccessStatusCode;
|
||||
result.AuthSuccess = response.StatusCode != System.Net.HttpStatusCode.Unauthorized &&
|
||||
response.StatusCode != System.Net.HttpStatusCode.Forbidden;
|
||||
|
||||
if (serverCert != null)
|
||||
{
|
||||
result.TlsValid = true;
|
||||
result.TlsExpiresAt = serverCert.NotAfter;
|
||||
var now = timeProvider.GetUtcNow().DateTime;
|
||||
result.TlsDaysUntilExpiry = (int)(serverCert.NotAfter - now).TotalDays;
|
||||
}
|
||||
|
||||
if (result.Reachable)
|
||||
{
|
||||
result.LastSuccessfulContact = timeProvider.GetUtcNow();
|
||||
}
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
result.Reachable = false;
|
||||
result.ErrorMessage = ex.Message;
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
result.Reachable = false;
|
||||
result.ErrorMessage = "Connection timed out";
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static void AddPerEnvironmentEvidence(EvidenceBuilder eb, List<ConnectivityInfo> results)
|
||||
{
|
||||
foreach (var r in results)
|
||||
{
|
||||
var prefix = $"env_{r.EnvironmentName.ToLowerInvariant().Replace(" ", "_").Replace("-", "_")}";
|
||||
eb.Add($"{prefix}_reachable", r.Reachable.ToString().ToLowerInvariant());
|
||||
eb.Add($"{prefix}_latency_ms", r.LatencyMs.ToString(CultureInfo.InvariantCulture));
|
||||
if (r.TlsDaysUntilExpiry.HasValue)
|
||||
{
|
||||
eb.Add($"{prefix}_tls_days_until_expiry", r.TlsDaysUntilExpiry.Value.ToString(CultureInfo.InvariantCulture));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static string MaskEndpoint(string endpoint)
|
||||
{
|
||||
if (string.IsNullOrEmpty(endpoint)) return "";
|
||||
try
|
||||
{
|
||||
var uri = new Uri(endpoint);
|
||||
return $"{uri.Scheme}://{uri.Host}:***";
|
||||
}
|
||||
catch
|
||||
{
|
||||
return "***";
|
||||
}
|
||||
}
|
||||
|
||||
private static List<EnvironmentBasicInfo> ParseEnvironments(string json)
|
||||
{
|
||||
var envs = new List<EnvironmentBasicInfo>();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
var envsArray = doc.RootElement.ValueKind == JsonValueKind.Array
|
||||
? doc.RootElement
|
||||
: doc.RootElement.TryGetProperty("environments", out var arr) ? arr : default;
|
||||
|
||||
if (envsArray.ValueKind != JsonValueKind.Array)
|
||||
return envs;
|
||||
|
||||
foreach (var env in envsArray.EnumerateArray())
|
||||
{
|
||||
var name = env.TryGetProperty("name", out var nameEl) ? nameEl.GetString() : null;
|
||||
var endpoint = env.TryGetProperty("agentEndpoint", out var epEl) ? epEl.GetString() :
|
||||
env.TryGetProperty("agent_endpoint", out var ep2El) ? ep2El.GetString() : null;
|
||||
|
||||
if (!string.IsNullOrEmpty(name))
|
||||
{
|
||||
envs.Add(new EnvironmentBasicInfo { Name = name, AgentEndpoint = endpoint ?? "" });
|
||||
}
|
||||
}
|
||||
}
|
||||
catch { }
|
||||
|
||||
return envs;
|
||||
}
|
||||
|
||||
private sealed record EnvironmentBasicInfo
|
||||
{
|
||||
public required string Name { get; init; }
|
||||
public required string AgentEndpoint { get; init; }
|
||||
}
|
||||
|
||||
private sealed class ConnectivityInfo
|
||||
{
|
||||
public string EnvironmentName { get; set; } = "";
|
||||
public string AgentEndpoint { get; set; } = "";
|
||||
public bool Reachable { get; set; }
|
||||
public int LatencyMs { get; set; }
|
||||
public bool AuthSuccess { get; set; }
|
||||
public bool TlsValid { get; set; }
|
||||
public DateTime? TlsExpiresAt { get; set; }
|
||||
public int? TlsDaysUntilExpiry { get; set; }
|
||||
public string? ErrorMessage { get; set; }
|
||||
public DateTimeOffset? LastSuccessfulContact { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,335 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// EnvironmentDeploymentHealthCheck.cs
|
||||
// Sprint: SPRINT_20260118_017_Doctor_environment_health
|
||||
// Task: ENVH-005 - Implement EnvironmentDeploymentHealthCheck
|
||||
// Description: Check deployed service health within environments
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Environment.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Checks deployed service health within environments.
|
||||
/// Monitors service status, replica health, and deployment freshness.
|
||||
/// </summary>
|
||||
public sealed class EnvironmentDeploymentHealthCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.environment";
|
||||
private const string CategoryName = "Environment Health";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.environment.deployments";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Environment Deployment Health";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Check deployed service health within environments";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["environment", "deployment", "services", "health"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(15);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"];
|
||||
return !string.IsNullOrEmpty(orchestratorUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"]
|
||||
?? "http://localhost:5080";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(15);
|
||||
|
||||
// Get deployments across environments
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{orchestratorUrl.TrimEnd('/')}/api/v1/environments/deployments",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot retrieve deployments: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Deployment Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var deploymentsJson = await response.Content.ReadAsStringAsync(ct);
|
||||
var envDeployments = ParseDeployments(deploymentsJson);
|
||||
|
||||
if (envDeployments.Count == 0)
|
||||
{
|
||||
return builder
|
||||
.Pass("No deployments to check")
|
||||
.WithEvidence("Deployments", eb =>
|
||||
{
|
||||
eb.Add("environment_count", "0");
|
||||
eb.Add("total_services", "0");
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
|
||||
var failedServices = new List<(string Env, string Service, string Error)>();
|
||||
var degradedServices = new List<(string Env, string Service, int Healthy, int Total)>();
|
||||
var stoppedServices = new List<(string Env, string Service)>();
|
||||
var totalServices = 0;
|
||||
|
||||
foreach (var env in envDeployments)
|
||||
{
|
||||
foreach (var svc in env.Services)
|
||||
{
|
||||
totalServices++;
|
||||
|
||||
if (svc.Status.Equals("failed", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
failedServices.Add((env.Name, svc.Name, svc.Error ?? "Unknown error"));
|
||||
}
|
||||
else if (svc.Status.Equals("stopped", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
stoppedServices.Add((env.Name, svc.Name));
|
||||
}
|
||||
else if (svc.Status.Equals("degraded", StringComparison.OrdinalIgnoreCase) ||
|
||||
(svc.Replicas > 0 && svc.HealthyReplicas < svc.Replicas))
|
||||
{
|
||||
degradedServices.Add((env.Name, svc.Name, svc.HealthyReplicas, svc.Replicas));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Production failures are critical
|
||||
var prodFailures = failedServices.Where(f => IsProd(f.Env)).ToList();
|
||||
var hasProdIssue = prodFailures.Count > 0;
|
||||
|
||||
if (hasProdIssue)
|
||||
{
|
||||
return builder
|
||||
.Fail($"{prodFailures.Count} production service(s) failed")
|
||||
.WithEvidence("Deployments", eb =>
|
||||
{
|
||||
eb.Add("environment_count", envDeployments.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("total_services", totalServices.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("failed_service_count", failedServices.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("degraded_service_count", degradedServices.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("stopped_service_count", stoppedServices.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("failed_services", string.Join("; ", failedServices.Select(f => $"{f.Env}/{f.Service}")));
|
||||
eb.Add("prod_failures", string.Join("; ", prodFailures.Select(f => $"{f.Service}:{f.Error}")));
|
||||
})
|
||||
.WithCauses(
|
||||
"Service crashed or failed health checks",
|
||||
"Deployment rolled out with errors",
|
||||
"Dependency unavailable",
|
||||
"Resource exhaustion")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "View service logs",
|
||||
$"stella env logs {prodFailures[0].Env} --service {prodFailures[0].Service}",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "Restart service",
|
||||
$"stella env restart {prodFailures[0].Env} --service {prodFailures[0].Service}",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(3, "Rollback if needed",
|
||||
$"stella release rollback --env {prodFailures[0].Env}",
|
||||
CommandType.Manual);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (failedServices.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Fail($"{failedServices.Count} service(s) failed")
|
||||
.WithEvidence("Deployments", eb =>
|
||||
{
|
||||
eb.Add("environment_count", envDeployments.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("total_services", totalServices.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("failed_service_count", failedServices.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("degraded_service_count", degradedServices.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("failed_services", string.Join("; ", failedServices.Select(f => $"{f.Env}/{f.Service}")));
|
||||
})
|
||||
.WithCauses("Service failure", "Deployment error")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "View service logs",
|
||||
$"stella env logs {failedServices[0].Env} --service {failedServices[0].Service}",
|
||||
CommandType.Shell);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (degradedServices.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Warn($"{degradedServices.Count} service(s) degraded")
|
||||
.WithEvidence("Deployments", eb =>
|
||||
{
|
||||
eb.Add("environment_count", envDeployments.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("total_services", totalServices.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("failed_service_count", "0");
|
||||
eb.Add("degraded_service_count", degradedServices.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("degraded_services", string.Join("; ", degradedServices.Select(d => $"{d.Env}/{d.Service}:{d.Healthy}/{d.Total}")));
|
||||
})
|
||||
.WithCauses(
|
||||
"Replica failed health check",
|
||||
"Scaling in progress",
|
||||
"Node failure")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "View service health",
|
||||
$"stella env health {degradedServices[0].Env} --service {degradedServices[0].Service}",
|
||||
CommandType.Shell);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (stoppedServices.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Warn($"{stoppedServices.Count} service(s) stopped")
|
||||
.WithEvidence("Deployments", eb =>
|
||||
{
|
||||
eb.Add("environment_count", envDeployments.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("total_services", totalServices.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("stopped_service_count", stoppedServices.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("stopped_services", string.Join("; ", stoppedServices.Select(s => $"{s.Env}/{s.Service}")));
|
||||
})
|
||||
.WithCauses("Service intentionally stopped", "Maintenance mode")
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"{totalServices} service(s) healthy across {envDeployments.Count} environment(s)")
|
||||
.WithEvidence("Deployments", eb =>
|
||||
{
|
||||
eb.Add("environment_count", envDeployments.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("total_services", totalServices.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("failed_service_count", "0");
|
||||
eb.Add("degraded_service_count", "0");
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check deployments: {ex.Message}")
|
||||
.WithEvidence("Deployment Status", eb =>
|
||||
{
|
||||
eb.Add("error_message", ex.Message);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Deployment check timed out")
|
||||
.WithEvidence("Deployment Status", eb =>
|
||||
{
|
||||
eb.Add("connection_error_type", "timeout");
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static bool IsProd(string envName) =>
|
||||
envName.Contains("prod", StringComparison.OrdinalIgnoreCase) ||
|
||||
envName.Contains("production", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
private static List<EnvDeployments> ParseDeployments(string json)
|
||||
{
|
||||
var result = new List<EnvDeployments>();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
var envsArray = doc.RootElement.ValueKind == JsonValueKind.Array
|
||||
? doc.RootElement
|
||||
: doc.RootElement.TryGetProperty("environments", out var arr) ? arr : default;
|
||||
|
||||
if (envsArray.ValueKind != JsonValueKind.Array)
|
||||
return result;
|
||||
|
||||
foreach (var env in envsArray.EnumerateArray())
|
||||
{
|
||||
var name = env.TryGetProperty("name", out var nameEl) ? nameEl.GetString() : null;
|
||||
if (string.IsNullOrEmpty(name)) continue;
|
||||
|
||||
var services = new List<ServiceInfo>();
|
||||
if (env.TryGetProperty("services", out var svcsEl) && svcsEl.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var svc in svcsEl.EnumerateArray())
|
||||
{
|
||||
var svcName = svc.TryGetProperty("name", out var svcNameEl) ? svcNameEl.GetString() : null;
|
||||
if (string.IsNullOrEmpty(svcName)) continue;
|
||||
|
||||
var status = svc.TryGetProperty("status", out var statEl) ? statEl.GetString() ?? "unknown" : "unknown";
|
||||
var replicas = svc.TryGetProperty("replicas", out var repEl) ? repEl.GetInt32() : 0;
|
||||
var healthy = svc.TryGetProperty("healthyReplicas", out var healthEl) ? healthEl.GetInt32() : replicas;
|
||||
var error = svc.TryGetProperty("error", out var errEl) ? errEl.GetString() : null;
|
||||
|
||||
services.Add(new ServiceInfo
|
||||
{
|
||||
Name = svcName,
|
||||
Status = status,
|
||||
Replicas = replicas,
|
||||
HealthyReplicas = healthy,
|
||||
Error = error
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
result.Add(new EnvDeployments { Name = name, Services = services });
|
||||
}
|
||||
}
|
||||
catch { }
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private sealed class EnvDeployments
|
||||
{
|
||||
public required string Name { get; init; }
|
||||
public List<ServiceInfo> Services { get; init; } = [];
|
||||
}
|
||||
|
||||
private sealed class ServiceInfo
|
||||
{
|
||||
public required string Name { get; init; }
|
||||
public required string Status { get; init; }
|
||||
public int Replicas { get; init; }
|
||||
public int HealthyReplicas { get; init; }
|
||||
public string? Error { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,277 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// EnvironmentDriftCheck.cs
|
||||
// Sprint: SPRINT_20260118_017_Doctor_environment_health
|
||||
// Task: ENVH-003 - Implement EnvironmentDriftCheck
|
||||
// Description: Detect configuration drift between environments
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Environment.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Detects configuration drift between environments.
|
||||
/// Compares configuration snapshots and identifies unexpected differences.
|
||||
/// </summary>
|
||||
public sealed class EnvironmentDriftCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.environment";
|
||||
private const string CategoryName = "Environment Health";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.environment.drift";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Environment Drift Detection";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Detect configuration drift between environments";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["environment", "drift", "configuration", "consistency"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(15);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"];
|
||||
return !string.IsNullOrEmpty(orchestratorUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"]
|
||||
?? "http://localhost:5080";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(15);
|
||||
|
||||
// Get drift report
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{orchestratorUrl.TrimEnd('/')}/api/v1/environments/drift",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot retrieve drift report: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Drift Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var driftJson = await response.Content.ReadAsStringAsync(ct);
|
||||
var driftReport = ParseDriftReport(driftJson);
|
||||
|
||||
if (driftReport.Environments.Count < 2)
|
||||
{
|
||||
return builder
|
||||
.Pass("Drift detection requires at least 2 environments")
|
||||
.WithEvidence("Drift", eb =>
|
||||
{
|
||||
eb.Add("environment_count", driftReport.Environments.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("drift_detected", "false");
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
|
||||
var driftedConfigs = driftReport.Drifts
|
||||
.Where(d => d.IsDrift)
|
||||
.ToList();
|
||||
|
||||
if (driftedConfigs.Count == 0)
|
||||
{
|
||||
return builder
|
||||
.Pass("No configuration drift detected")
|
||||
.WithEvidence("Drift", eb =>
|
||||
{
|
||||
eb.Add("environment_count", driftReport.Environments.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("drift_detected", "false");
|
||||
eb.Add("configs_checked", driftReport.Drifts.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("environments_checked", string.Join(", ", driftReport.Environments));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
|
||||
// Categorize drifts by severity
|
||||
var criticalDrifts = driftedConfigs.Where(d => d.Severity == "critical").ToList();
|
||||
var warningDrifts = driftedConfigs.Where(d => d.Severity != "critical").ToList();
|
||||
|
||||
if (criticalDrifts.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Fail($"{criticalDrifts.Count} critical drift(s) detected")
|
||||
.WithEvidence("Drift", eb =>
|
||||
{
|
||||
eb.Add("environment_count", driftReport.Environments.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("drift_detected", "true");
|
||||
eb.Add("total_drifts", driftedConfigs.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("critical_drifts", criticalDrifts.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("warning_drifts", warningDrifts.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("drifted_configs", string.Join(", ", driftedConfigs.Select(d => d.ConfigKey)));
|
||||
eb.Add("affected_environments", string.Join(", ", driftedConfigs.SelectMany(d => d.AffectedEnvironments).Distinct()));
|
||||
})
|
||||
.WithCauses(
|
||||
"Manual configuration change in environment",
|
||||
"Failed deployment left partial configuration",
|
||||
"Configuration sync not propagated",
|
||||
"Environment restored from outdated backup")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "View drift details",
|
||||
"stella env drift show",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "Reconcile configuration",
|
||||
$"stella env drift reconcile --from staging --to prod",
|
||||
CommandType.Manual);
|
||||
rb.AddStep(3, "Or accept drift as intentional",
|
||||
$"stella env drift accept {criticalDrifts[0].ConfigKey}",
|
||||
CommandType.Manual);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Warn($"{warningDrifts.Count} configuration drift(s) detected")
|
||||
.WithEvidence("Drift", eb =>
|
||||
{
|
||||
eb.Add("environment_count", driftReport.Environments.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("drift_detected", "true");
|
||||
eb.Add("total_drifts", driftedConfigs.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("critical_drifts", "0");
|
||||
eb.Add("warning_drifts", warningDrifts.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("drifted_configs", string.Join(", ", driftedConfigs.Select(d => d.ConfigKey)));
|
||||
})
|
||||
.WithCauses(
|
||||
"Expected environment-specific differences",
|
||||
"Configuration update in progress",
|
||||
"Intentional environment variation")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "Review drift report",
|
||||
"stella env drift show",
|
||||
CommandType.Shell);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check drift: {ex.Message}")
|
||||
.WithEvidence("Drift Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("error_message", ex.Message);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Drift check timed out")
|
||||
.WithEvidence("Drift Status", eb =>
|
||||
{
|
||||
eb.Add("connection_error_type", "timeout");
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static DriftReport ParseDriftReport(string json)
|
||||
{
|
||||
var report = new DriftReport();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
|
||||
if (doc.RootElement.TryGetProperty("environments", out var envsEl) &&
|
||||
envsEl.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var env in envsEl.EnumerateArray())
|
||||
{
|
||||
var name = env.GetString();
|
||||
if (!string.IsNullOrEmpty(name))
|
||||
report.Environments.Add(name);
|
||||
}
|
||||
}
|
||||
|
||||
if (doc.RootElement.TryGetProperty("drifts", out var driftsEl) &&
|
||||
driftsEl.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var drift in driftsEl.EnumerateArray())
|
||||
{
|
||||
var configKey = drift.TryGetProperty("configKey", out var keyEl) ? keyEl.GetString() : null;
|
||||
var isDrift = drift.TryGetProperty("isDrift", out var driftEl) && driftEl.GetBoolean();
|
||||
var severity = drift.TryGetProperty("severity", out var sevEl) ? sevEl.GetString() ?? "warning" : "warning";
|
||||
|
||||
var affected = new List<string>();
|
||||
if (drift.TryGetProperty("affectedEnvironments", out var affEl) &&
|
||||
affEl.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var env in affEl.EnumerateArray())
|
||||
{
|
||||
var name = env.GetString();
|
||||
if (!string.IsNullOrEmpty(name))
|
||||
affected.Add(name);
|
||||
}
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(configKey))
|
||||
{
|
||||
report.Drifts.Add(new DriftInfo
|
||||
{
|
||||
ConfigKey = configKey,
|
||||
IsDrift = isDrift,
|
||||
Severity = severity,
|
||||
AffectedEnvironments = affected
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch { }
|
||||
|
||||
return report;
|
||||
}
|
||||
|
||||
private sealed class DriftReport
|
||||
{
|
||||
public List<string> Environments { get; } = [];
|
||||
public List<DriftInfo> Drifts { get; } = [];
|
||||
}
|
||||
|
||||
private sealed class DriftInfo
|
||||
{
|
||||
public required string ConfigKey { get; init; }
|
||||
public bool IsDrift { get; init; }
|
||||
public string Severity { get; init; } = "warning";
|
||||
public List<string> AffectedEnvironments { get; init; } = [];
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,328 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// EnvironmentNetworkPolicyCheck.cs
|
||||
// Sprint: SPRINT_20260118_017_Doctor_environment_health
|
||||
// Task: ENVH-006 - Implement EnvironmentNetworkPolicyCheck
|
||||
// Description: Verify network policies between environments
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Environment.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Verifies network policies between environments.
|
||||
/// Checks environment isolation, allowed ingress/egress, and policy consistency.
|
||||
/// </summary>
|
||||
public sealed class EnvironmentNetworkPolicyCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.environment";
|
||||
private const string CategoryName = "Environment Health";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.environment.network.policy";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Environment Network Policy";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Verify network policies between environments";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["environment", "network", "policy", "security", "isolation"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(10);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"];
|
||||
return !string.IsNullOrEmpty(orchestratorUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"]
|
||||
?? "http://localhost:5080";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
// Get network policies
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{orchestratorUrl.TrimEnd('/')}/api/v1/environments/network-policies",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot retrieve network policies: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Network Policy Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var policiesJson = await response.Content.ReadAsStringAsync(ct);
|
||||
var policies = ParseNetworkPolicies(policiesJson);
|
||||
|
||||
if (policies.Count == 0)
|
||||
{
|
||||
return builder
|
||||
.Warn("No network policies configured")
|
||||
.WithEvidence("Network Policies", eb =>
|
||||
{
|
||||
eb.Add("policy_count", "0");
|
||||
eb.Add("isolation_enforced", "false");
|
||||
})
|
||||
.WithCauses("Network policies not yet defined")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Configure network isolation",
|
||||
"stella env network-policy create --default-deny",
|
||||
CommandType.Manual))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var violations = new List<PolicyViolation>();
|
||||
|
||||
// Check for production isolation
|
||||
var prodEnvs = policies.Where(p => IsProd(p.Environment)).ToList();
|
||||
foreach (var prod in prodEnvs)
|
||||
{
|
||||
// Production should not have ingress from dev
|
||||
var devIngress = prod.AllowedIngress.Any(i =>
|
||||
i.Contains("dev", StringComparison.OrdinalIgnoreCase) &&
|
||||
!i.Contains("devops", StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
if (devIngress)
|
||||
{
|
||||
violations.Add(new PolicyViolation
|
||||
{
|
||||
Environment = prod.Environment,
|
||||
ViolationType = "prod_dev_ingress",
|
||||
Message = "Production allows ingress from dev environment",
|
||||
Severity = "critical"
|
||||
});
|
||||
}
|
||||
|
||||
// Production should have explicit deny-all with allowlist
|
||||
if (!prod.DefaultDeny)
|
||||
{
|
||||
violations.Add(new PolicyViolation
|
||||
{
|
||||
Environment = prod.Environment,
|
||||
ViolationType = "prod_no_default_deny",
|
||||
Message = "Production does not have default-deny policy",
|
||||
Severity = "warning"
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Check for overly permissive policies
|
||||
foreach (var policy in policies)
|
||||
{
|
||||
if (policy.AllowedIngress.Any(i => i == "*" || i == "0.0.0.0/0"))
|
||||
{
|
||||
violations.Add(new PolicyViolation
|
||||
{
|
||||
Environment = policy.Environment,
|
||||
ViolationType = "open_ingress",
|
||||
Message = "Environment allows ingress from any source",
|
||||
Severity = IsProd(policy.Environment) ? "critical" : "warning"
|
||||
});
|
||||
}
|
||||
|
||||
if (policy.AllowedEgress.Any(e => e == "*" || e == "0.0.0.0/0"))
|
||||
{
|
||||
violations.Add(new PolicyViolation
|
||||
{
|
||||
Environment = policy.Environment,
|
||||
ViolationType = "open_egress",
|
||||
Message = "Environment allows egress to any destination",
|
||||
Severity = "info"
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
var criticalViolations = violations.Where(v => v.Severity == "critical").ToList();
|
||||
var warningViolations = violations.Where(v => v.Severity == "warning").ToList();
|
||||
|
||||
if (criticalViolations.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Fail($"{criticalViolations.Count} critical network policy violation(s)")
|
||||
.WithEvidence("Network Policies", eb =>
|
||||
{
|
||||
eb.Add("policy_count", policies.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("violation_count", violations.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("critical_violations", criticalViolations.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("warning_violations", warningViolations.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("violations", string.Join("; ", violations.Select(v => $"{v.Environment}:{v.ViolationType}")));
|
||||
})
|
||||
.WithCauses(
|
||||
"Overly permissive network policy",
|
||||
"Production not properly isolated",
|
||||
"Legacy policy not updated")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "Review network policies",
|
||||
"stella env network-policy list",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "Fix production isolation",
|
||||
$"stella env network-policy update {criticalViolations[0].Environment} --default-deny --allow-from staging",
|
||||
CommandType.Manual);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (warningViolations.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Warn($"{warningViolations.Count} network policy warning(s)")
|
||||
.WithEvidence("Network Policies", eb =>
|
||||
{
|
||||
eb.Add("policy_count", policies.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("violation_count", violations.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("critical_violations", "0");
|
||||
eb.Add("warning_violations", warningViolations.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("violations", string.Join("; ", violations.Select(v => $"{v.Environment}:{v.ViolationType}")));
|
||||
})
|
||||
.WithCauses("Policy could be more restrictive")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Review policy recommendations",
|
||||
"stella env network-policy audit",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"{policies.Count} network policies configured correctly")
|
||||
.WithEvidence("Network Policies", eb =>
|
||||
{
|
||||
eb.Add("policy_count", policies.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("violation_count", "0");
|
||||
eb.Add("environments_with_default_deny", policies.Count(p => p.DefaultDeny).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check network policies: {ex.Message}")
|
||||
.WithEvidence("Network Policy Status", eb =>
|
||||
{
|
||||
eb.Add("error_message", ex.Message);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Network policy check timed out")
|
||||
.WithEvidence("Network Policy Status", eb =>
|
||||
{
|
||||
eb.Add("connection_error_type", "timeout");
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static bool IsProd(string envName) =>
|
||||
envName.Contains("prod", StringComparison.OrdinalIgnoreCase) ||
|
||||
envName.Contains("production", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
private static List<NetworkPolicy> ParseNetworkPolicies(string json)
|
||||
{
|
||||
var policies = new List<NetworkPolicy>();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
var policiesArray = doc.RootElement.ValueKind == JsonValueKind.Array
|
||||
? doc.RootElement
|
||||
: doc.RootElement.TryGetProperty("policies", out var arr) ? arr : default;
|
||||
|
||||
if (policiesArray.ValueKind != JsonValueKind.Array)
|
||||
return policies;
|
||||
|
||||
foreach (var policy in policiesArray.EnumerateArray())
|
||||
{
|
||||
var env = policy.TryGetProperty("environment", out var envEl) ? envEl.GetString() : null;
|
||||
if (string.IsNullOrEmpty(env)) continue;
|
||||
|
||||
var defaultDeny = policy.TryGetProperty("defaultDeny", out var denyEl) && denyEl.GetBoolean();
|
||||
|
||||
var ingress = new List<string>();
|
||||
if (policy.TryGetProperty("allowedIngress", out var ingressEl) && ingressEl.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var item in ingressEl.EnumerateArray())
|
||||
{
|
||||
var val = item.GetString();
|
||||
if (!string.IsNullOrEmpty(val)) ingress.Add(val);
|
||||
}
|
||||
}
|
||||
|
||||
var egress = new List<string>();
|
||||
if (policy.TryGetProperty("allowedEgress", out var egressEl) && egressEl.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var item in egressEl.EnumerateArray())
|
||||
{
|
||||
var val = item.GetString();
|
||||
if (!string.IsNullOrEmpty(val)) egress.Add(val);
|
||||
}
|
||||
}
|
||||
|
||||
policies.Add(new NetworkPolicy
|
||||
{
|
||||
Environment = env,
|
||||
DefaultDeny = defaultDeny,
|
||||
AllowedIngress = ingress,
|
||||
AllowedEgress = egress
|
||||
});
|
||||
}
|
||||
}
|
||||
catch { }
|
||||
|
||||
return policies;
|
||||
}
|
||||
|
||||
private sealed class NetworkPolicy
|
||||
{
|
||||
public required string Environment { get; init; }
|
||||
public bool DefaultDeny { get; init; }
|
||||
public List<string> AllowedIngress { get; init; } = [];
|
||||
public List<string> AllowedEgress { get; init; } = [];
|
||||
}
|
||||
|
||||
private sealed class PolicyViolation
|
||||
{
|
||||
public required string Environment { get; init; }
|
||||
public required string ViolationType { get; init; }
|
||||
public required string Message { get; init; }
|
||||
public string Severity { get; init; } = "warning";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,335 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// EnvironmentSecretHealthCheck.cs
|
||||
// Sprint: SPRINT_20260118_017_Doctor_environment_health
|
||||
// Task: ENVH-007 - Implement EnvironmentSecretHealthCheck
|
||||
// Description: Check secrets health for environments
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Environment.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Checks secrets health for environments.
|
||||
/// Monitors secret expiry, rotation status, and access patterns.
|
||||
/// </summary>
|
||||
public sealed class EnvironmentSecretHealthCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.environment";
|
||||
private const string CategoryName = "Environment Health";
|
||||
private const int ExpiryWarningDays = 30;
|
||||
private const int ExpiryFailDays = 7;
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.environment.secrets";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Environment Secret Health";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Check secrets health for environments";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["environment", "secrets", "security", "rotation", "expiry"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(10);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"];
|
||||
return !string.IsNullOrEmpty(orchestratorUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"]
|
||||
?? "http://localhost:5080";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
// Get secrets status (metadata only, no actual secret values)
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{orchestratorUrl.TrimEnd('/')}/api/v1/environments/secrets/status",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot retrieve secrets status: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Secret Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var secretsJson = await response.Content.ReadAsStringAsync(ct);
|
||||
var secretsStatus = ParseSecretsStatus(secretsJson);
|
||||
|
||||
if (secretsStatus.Secrets.Count == 0)
|
||||
{
|
||||
return builder
|
||||
.Pass("No secrets configured")
|
||||
.WithEvidence("Secrets", eb =>
|
||||
{
|
||||
eb.Add("total_secrets", "0");
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
|
||||
var now = context.TimeProvider.GetUtcNow();
|
||||
var expired = new List<SecretInfo>();
|
||||
var expiringCritical = new List<SecretInfo>();
|
||||
var expiringWarning = new List<SecretInfo>();
|
||||
var rotationOverdue = new List<SecretInfo>();
|
||||
|
||||
foreach (var secret in secretsStatus.Secrets)
|
||||
{
|
||||
if (secret.ExpiresAt.HasValue)
|
||||
{
|
||||
var daysUntilExpiry = (secret.ExpiresAt.Value - now).TotalDays;
|
||||
|
||||
if (daysUntilExpiry <= 0)
|
||||
{
|
||||
expired.Add(secret);
|
||||
}
|
||||
else if (daysUntilExpiry <= ExpiryFailDays)
|
||||
{
|
||||
expiringCritical.Add(secret);
|
||||
}
|
||||
else if (daysUntilExpiry <= ExpiryWarningDays)
|
||||
{
|
||||
expiringWarning.Add(secret);
|
||||
}
|
||||
}
|
||||
|
||||
if (secret.RotationPolicy != null && secret.LastRotated.HasValue)
|
||||
{
|
||||
var daysSinceRotation = (now - secret.LastRotated.Value).TotalDays;
|
||||
if (daysSinceRotation > secret.RotationPolicy.RotationIntervalDays * 1.1) // 10% grace
|
||||
{
|
||||
rotationOverdue.Add(secret);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for critical issues (production secrets)
|
||||
var prodExpired = expired.Where(s => IsProd(s.Environment)).ToList();
|
||||
var prodExpiringCritical = expiringCritical.Where(s => IsProd(s.Environment)).ToList();
|
||||
|
||||
if (prodExpired.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Fail($"{prodExpired.Count} production secret(s) EXPIRED")
|
||||
.WithEvidence("Secrets", eb =>
|
||||
{
|
||||
eb.Add("total_secrets", secretsStatus.Secrets.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("expired_count", expired.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("expiring_critical_count", expiringCritical.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("expiring_warning_count", expiringWarning.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("rotation_overdue_count", rotationOverdue.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("expired_secrets", string.Join(", ", expired.Select(s => $"{s.Environment}/{s.Name}")));
|
||||
})
|
||||
.WithCauses(
|
||||
"Secret expired without rotation",
|
||||
"Rotation job failed",
|
||||
"Secret provider connection lost")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "Rotate expired secret immediately",
|
||||
$"stella env secrets rotate {prodExpired[0].Environment} {prodExpired[0].Name}",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "Check secret provider status",
|
||||
"stella secrets provider status",
|
||||
CommandType.Shell);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (expired.Count > 0 || prodExpiringCritical.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Fail($"{expired.Count} expired, {prodExpiringCritical.Count} production critical")
|
||||
.WithEvidence("Secrets", eb =>
|
||||
{
|
||||
eb.Add("total_secrets", secretsStatus.Secrets.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("expired_count", expired.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("expiring_critical_count", expiringCritical.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("expiring_warning_count", expiringWarning.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("expired_secrets", string.Join(", ", expired.Select(s => $"{s.Environment}/{s.Name}")));
|
||||
})
|
||||
.WithCauses("Secrets expired or expiring soon")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
if (expired.Count > 0)
|
||||
{
|
||||
rb.AddStep(1, "Rotate expired secret",
|
||||
$"stella env secrets rotate {expired[0].Environment} {expired[0].Name}",
|
||||
CommandType.Shell);
|
||||
}
|
||||
if (expiringCritical.Count > 0)
|
||||
{
|
||||
rb.AddStep(2, "Schedule rotation for expiring secrets",
|
||||
"stella env secrets rotate-scheduled --days 7",
|
||||
CommandType.Manual);
|
||||
}
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (expiringCritical.Count > 0 || expiringWarning.Count > 0 || rotationOverdue.Count > 0)
|
||||
{
|
||||
var issues = new List<string>();
|
||||
if (expiringCritical.Count > 0) issues.Add($"{expiringCritical.Count} expiring within 7 days");
|
||||
if (expiringWarning.Count > 0) issues.Add($"{expiringWarning.Count} expiring within 30 days");
|
||||
if (rotationOverdue.Count > 0) issues.Add($"{rotationOverdue.Count} rotation overdue");
|
||||
|
||||
return builder
|
||||
.Warn(string.Join(", ", issues))
|
||||
.WithEvidence("Secrets", eb =>
|
||||
{
|
||||
eb.Add("total_secrets", secretsStatus.Secrets.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("expired_count", "0");
|
||||
eb.Add("expiring_critical_count", expiringCritical.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("expiring_warning_count", expiringWarning.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("rotation_overdue_count", rotationOverdue.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("expiring_secrets", string.Join(", ", expiringCritical.Concat(expiringWarning).Select(s => s.Name)));
|
||||
})
|
||||
.WithCauses("Secrets expiring soon or rotation overdue")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "View secrets status",
|
||||
"stella env secrets list --expiring",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"{secretsStatus.Secrets.Count} secret(s) healthy")
|
||||
.WithEvidence("Secrets", eb =>
|
||||
{
|
||||
eb.Add("total_secrets", secretsStatus.Secrets.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("expired_count", "0");
|
||||
eb.Add("expiring_critical_count", "0");
|
||||
eb.Add("expiring_warning_count", "0");
|
||||
eb.Add("rotation_overdue_count", "0");
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check secrets: {ex.Message}")
|
||||
.WithEvidence("Secret Status", eb =>
|
||||
{
|
||||
eb.Add("error_message", ex.Message);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Secret check timed out")
|
||||
.WithEvidence("Secret Status", eb =>
|
||||
{
|
||||
eb.Add("connection_error_type", "timeout");
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static bool IsProd(string envName) =>
|
||||
envName.Contains("prod", StringComparison.OrdinalIgnoreCase) ||
|
||||
envName.Contains("production", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
private static SecretsStatus ParseSecretsStatus(string json)
|
||||
{
|
||||
var status = new SecretsStatus();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
var secretsArray = doc.RootElement.ValueKind == JsonValueKind.Array
|
||||
? doc.RootElement
|
||||
: doc.RootElement.TryGetProperty("secrets", out var arr) ? arr : default;
|
||||
|
||||
if (secretsArray.ValueKind != JsonValueKind.Array)
|
||||
return status;
|
||||
|
||||
foreach (var secret in secretsArray.EnumerateArray())
|
||||
{
|
||||
var name = secret.TryGetProperty("name", out var nameEl) ? nameEl.GetString() : null;
|
||||
var env = secret.TryGetProperty("environment", out var envEl) ? envEl.GetString() : null;
|
||||
if (string.IsNullOrEmpty(name) || string.IsNullOrEmpty(env)) continue;
|
||||
|
||||
var expiresAt = secret.TryGetProperty("expiresAt", out var expEl) &&
|
||||
DateTimeOffset.TryParse(expEl.GetString(), out var expDt) ? expDt : (DateTimeOffset?)null;
|
||||
var lastRotated = secret.TryGetProperty("lastRotated", out var rotEl) &&
|
||||
DateTimeOffset.TryParse(rotEl.GetString(), out var rotDt) ? rotDt : (DateTimeOffset?)null;
|
||||
|
||||
RotationPolicy? rotationPolicy = null;
|
||||
if (secret.TryGetProperty("rotationPolicy", out var policyEl) && policyEl.ValueKind == JsonValueKind.Object)
|
||||
{
|
||||
var intervalDays = policyEl.TryGetProperty("intervalDays", out var intEl) ? intEl.GetInt32() : 90;
|
||||
rotationPolicy = new RotationPolicy { RotationIntervalDays = intervalDays };
|
||||
}
|
||||
|
||||
status.Secrets.Add(new SecretInfo
|
||||
{
|
||||
Name = name,
|
||||
Environment = env,
|
||||
ExpiresAt = expiresAt,
|
||||
LastRotated = lastRotated,
|
||||
RotationPolicy = rotationPolicy
|
||||
});
|
||||
}
|
||||
}
|
||||
catch { }
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
private sealed class SecretsStatus
|
||||
{
|
||||
public List<SecretInfo> Secrets { get; } = [];
|
||||
}
|
||||
|
||||
private sealed class SecretInfo
|
||||
{
|
||||
public required string Name { get; init; }
|
||||
public required string Environment { get; init; }
|
||||
public DateTimeOffset? ExpiresAt { get; init; }
|
||||
public DateTimeOffset? LastRotated { get; init; }
|
||||
public RotationPolicy? RotationPolicy { get; init; }
|
||||
}
|
||||
|
||||
private sealed class RotationPolicy
|
||||
{
|
||||
public int RotationIntervalDays { get; init; } = 90;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// EnvironmentPluginServiceCollectionExtensions.cs
|
||||
// Sprint: SPRINT_20260118_017_Doctor_environment_health
|
||||
// Task: ENVH-001 - Create Environment plugin scaffold
|
||||
// Description: Extension methods for registering the Environment plugin
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Environment.DependencyInjection;
|
||||
|
||||
/// <summary>
|
||||
/// Extension methods for registering the Environment Doctor plugin.
|
||||
/// </summary>
|
||||
public static class EnvironmentPluginServiceCollectionExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Adds the Environment health Doctor plugin.
|
||||
/// Provides checks for environment connectivity, drift, capacity, deployments, and secrets.
|
||||
/// </summary>
|
||||
/// <param name="services">The service collection.</param>
|
||||
/// <returns>The service collection for chaining.</returns>
|
||||
public static IServiceCollection AddDoctorEnvironmentPlugin(this IServiceCollection services)
|
||||
{
|
||||
services.AddSingleton<IDoctorPlugin, EnvironmentDoctorPlugin>();
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// EnvironmentDoctorPlugin.cs
|
||||
// Sprint: SPRINT_20260118_017_Doctor_environment_health
|
||||
// Task: ENVH-001 - Create Environment plugin scaffold
|
||||
// Description: Doctor plugin for per-environment health monitoring
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using StellaOps.Doctor.Plugin.Environment.Checks;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Environment;
|
||||
|
||||
/// <summary>
|
||||
/// Doctor plugin for per-environment health checks.
|
||||
/// Monitors environment connectivity, drift, capacity, deployment health, and secrets.
|
||||
/// </summary>
|
||||
public sealed class EnvironmentDoctorPlugin : IDoctorPlugin
|
||||
{
|
||||
private static readonly Version PluginVersion = new(1, 0, 0);
|
||||
private static readonly Version MinVersion = new(1, 0, 0);
|
||||
|
||||
/// <inheritdoc />
|
||||
public string PluginId => "stellaops.doctor.environment";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string DisplayName => "Environment Health";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorCategory Category => DoctorCategory.Environment;
|
||||
|
||||
/// <inheritdoc />
|
||||
public Version Version => PluginVersion;
|
||||
|
||||
/// <inheritdoc />
|
||||
public Version MinEngineVersion => MinVersion;
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool IsAvailable(IServiceProvider services)
|
||||
{
|
||||
// Available when environments are configured
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<IDoctorCheck> GetChecks(DoctorPluginContext context)
|
||||
{
|
||||
return new IDoctorCheck[]
|
||||
{
|
||||
new EnvironmentConnectivityCheck(),
|
||||
new EnvironmentDriftCheck(),
|
||||
new EnvironmentCapacityCheck(),
|
||||
new EnvironmentDeploymentHealthCheck(),
|
||||
new EnvironmentNetworkPolicyCheck(),
|
||||
new EnvironmentSecretHealthCheck()
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task InitializeAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,110 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// IEnvironmentHealthClient.cs
|
||||
// Sprint: SPRINT_20260118_017_Doctor_environment_health
|
||||
// Task: ENVH-001 - Create Environment plugin scaffold
|
||||
// Description: Interface for querying environment health status
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Environment.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Client interface for querying environment health status.
|
||||
/// </summary>
|
||||
public interface IEnvironmentHealthClient
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets all configured environments.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<EnvironmentInfo>> GetEnvironmentsAsync(CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Checks connectivity to an environment agent.
|
||||
/// </summary>
|
||||
Task<ConnectivityResult> CheckConnectivityAsync(string environmentId, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets deployed services for an environment.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<DeployedService>> GetDeployedServicesAsync(string environmentId, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets resource capacity for an environment.
|
||||
/// </summary>
|
||||
Task<CapacityInfo> GetCapacityAsync(string environmentId, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets configuration hash for drift detection.
|
||||
/// </summary>
|
||||
Task<ConfigurationSnapshot> GetConfigurationSnapshotAsync(string environmentId, CancellationToken ct = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Basic environment information.
|
||||
/// </summary>
|
||||
public sealed record EnvironmentInfo
|
||||
{
|
||||
public required string Id { get; init; }
|
||||
public required string Name { get; init; }
|
||||
public required string Type { get; init; } // dev, staging, prod
|
||||
public required string AgentEndpoint { get; init; }
|
||||
public bool IsActive { get; init; } = true;
|
||||
public IDictionary<string, string> Labels { get; init; } = new Dictionary<string, string>();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of connectivity check.
|
||||
/// </summary>
|
||||
public sealed record ConnectivityResult
|
||||
{
|
||||
public required bool Reachable { get; init; }
|
||||
public required int LatencyMs { get; init; }
|
||||
public required bool AuthSuccess { get; init; }
|
||||
public required bool TlsValid { get; init; }
|
||||
public DateTimeOffset? TlsExpiresAt { get; init; }
|
||||
public int? TlsDaysUntilExpiry { get; init; }
|
||||
public string? ErrorMessage { get; init; }
|
||||
public DateTimeOffset? LastSuccessfulContact { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Deployed service within an environment.
|
||||
/// </summary>
|
||||
public sealed record DeployedService
|
||||
{
|
||||
public required string Id { get; init; }
|
||||
public required string Name { get; init; }
|
||||
public required string Version { get; init; }
|
||||
public required string Status { get; init; } // running, stopped, failed, degraded
|
||||
public int Replicas { get; init; }
|
||||
public int HealthyReplicas { get; init; }
|
||||
public DateTimeOffset? LastDeployedAt { get; init; }
|
||||
public string? Error { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resource capacity information.
|
||||
/// </summary>
|
||||
public sealed record CapacityInfo
|
||||
{
|
||||
public required long TotalCpuMillicores { get; init; }
|
||||
public required long UsedCpuMillicores { get; init; }
|
||||
public required long TotalMemoryBytes { get; init; }
|
||||
public required long UsedMemoryBytes { get; init; }
|
||||
public required long TotalStorageBytes { get; init; }
|
||||
public required long UsedStorageBytes { get; init; }
|
||||
public int TotalNodes { get; init; }
|
||||
public int HealthyNodes { get; init; }
|
||||
public int MaxConcurrentDeployments { get; init; }
|
||||
public int ActiveDeployments { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Configuration snapshot for drift detection.
|
||||
/// </summary>
|
||||
public sealed record ConfigurationSnapshot
|
||||
{
|
||||
public required string EnvironmentId { get; init; }
|
||||
public required string ConfigHash { get; init; }
|
||||
public required DateTimeOffset CapturedAt { get; init; }
|
||||
public IDictionary<string, string> ConfigValues { get; init; } = new Dictionary<string, string>();
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<RootNamespace>StellaOps.Doctor.Plugin.Environment</RootNamespace>
|
||||
<Description>Environment health checks for Stella Ops Doctor diagnostics</Description>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Doctor\StellaOps.Doctor.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -1,13 +1,16 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// PolicyEngineHealthCheck.cs
|
||||
// Sprint: SPRINT_20260117_010_CLI_policy_engine
|
||||
// Task: PEN-005 - Doctor check for policy engine health
|
||||
// Sprint: SPRINT_20260118_015_Doctor_check_quality_improvements
|
||||
// Task: DQUAL-001 - Replace PolicyEngineHealthCheck mock implementation
|
||||
// Description: Health check for policy engine compilation, evaluation, and storage
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
@@ -18,6 +21,11 @@ namespace StellaOps.Doctor.Plugin.Policy.Checks;
|
||||
/// </summary>
|
||||
public sealed class PolicyEngineHealthCheck : IDoctorCheck
|
||||
{
|
||||
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
|
||||
{
|
||||
PropertyNameCaseInsensitive = true
|
||||
};
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.policy.engine";
|
||||
|
||||
@@ -39,7 +47,10 @@ public sealed class PolicyEngineHealthCheck : IDoctorCheck
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
return true;
|
||||
// Check if policy engine URL is configured
|
||||
var policyEngineUrl = context.Configuration["Policy:Engine:Url"]
|
||||
?? context.Configuration["PolicyEngine:BaseUrl"];
|
||||
return !string.IsNullOrEmpty(policyEngineUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
@@ -47,128 +58,341 @@ public sealed class PolicyEngineHealthCheck : IDoctorCheck
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, "stellaops.doctor.policy", "Policy");
|
||||
|
||||
var compilationResult = await CheckCompilationAsync(context, ct);
|
||||
var evaluationResult = await CheckEvaluationAsync(context, ct);
|
||||
var storageResult = await CheckStorageAsync(context, ct);
|
||||
var policyEngineUrl = context.Configuration["Policy:Engine:Url"]
|
||||
?? context.Configuration["PolicyEngine:BaseUrl"]
|
||||
?? "http://localhost:8181";
|
||||
|
||||
// Aggregate results
|
||||
var allPassed = compilationResult.Passed && evaluationResult.Passed && storageResult.Passed;
|
||||
var hasWarnings = compilationResult.HasWarnings || evaluationResult.HasWarnings || storageResult.HasWarnings;
|
||||
|
||||
if (!allPassed)
|
||||
try
|
||||
{
|
||||
var failedChecks = new List<string>();
|
||||
if (!compilationResult.Passed) failedChecks.Add("compilation");
|
||||
if (!evaluationResult.Passed) failedChecks.Add("evaluation");
|
||||
if (!storageResult.Passed) failedChecks.Add("storage");
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
var compilationResult = await CheckCompilationAsync(httpClient, policyEngineUrl, ct);
|
||||
var evaluationResult = await CheckEvaluationAsync(httpClient, policyEngineUrl, ct);
|
||||
var storageResult = await CheckStorageAsync(httpClient, policyEngineUrl, ct);
|
||||
|
||||
// Aggregate results
|
||||
var allPassed = compilationResult.Passed && evaluationResult.Passed && storageResult.Passed;
|
||||
var hasWarnings = compilationResult.HasWarnings || evaluationResult.HasWarnings || storageResult.HasWarnings;
|
||||
|
||||
if (!allPassed)
|
||||
{
|
||||
var failedChecks = new List<string>();
|
||||
if (!compilationResult.Passed) failedChecks.Add("compilation");
|
||||
if (!evaluationResult.Passed) failedChecks.Add("evaluation");
|
||||
if (!storageResult.Passed) failedChecks.Add("storage");
|
||||
|
||||
return builder
|
||||
.Fail($"Policy engine health check failed: {string.Join(", ", failedChecks)}")
|
||||
.WithEvidence("Engine Status", eb =>
|
||||
{
|
||||
eb.Add("engine_type", compilationResult.EngineType ?? "unknown");
|
||||
eb.Add("engine_version", compilationResult.EngineVersion ?? "unknown");
|
||||
eb.Add("engine_url", policyEngineUrl);
|
||||
eb.Add("compilation_status", compilationResult.Passed ? "OK" : "FAILED");
|
||||
eb.Add("evaluation_status", evaluationResult.Passed ? "OK" : "FAILED");
|
||||
eb.Add("storage_status", storageResult.Passed ? "OK" : "FAILED");
|
||||
eb.Add("policy_count", compilationResult.PolicyCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("compilation_time_ms", compilationResult.CompilationTimeMs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("evaluation_latency_p50_ms", evaluationResult.EvaluationTimeMs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("cache_hit_ratio", compilationResult.CacheHitRatio.ToString("F2", CultureInfo.InvariantCulture));
|
||||
if (!string.IsNullOrEmpty(compilationResult.LastCompilationError))
|
||||
{
|
||||
eb.Add("last_compilation_error", compilationResult.LastCompilationError);
|
||||
}
|
||||
if (!string.IsNullOrEmpty(evaluationResult.Error))
|
||||
{
|
||||
eb.Add("evaluation_error", evaluationResult.Error);
|
||||
}
|
||||
if (!string.IsNullOrEmpty(storageResult.Error))
|
||||
{
|
||||
eb.Add("storage_error", storageResult.Error);
|
||||
}
|
||||
})
|
||||
.WithCauses(
|
||||
"Policy engine service not running",
|
||||
"Policy storage unavailable",
|
||||
"OPA/Rego compilation error",
|
||||
"Policy cache corrupted")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Check policy engine service status",
|
||||
"stella policy status",
|
||||
CommandType.Shell)
|
||||
.AddStep(2, "Verify policy storage connectivity",
|
||||
"stella doctor --check check.storage.postgres",
|
||||
CommandType.Shell)
|
||||
.AddStep(3, "Recompile policies",
|
||||
"stella policy compile --all",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (hasWarnings)
|
||||
{
|
||||
return builder
|
||||
.Warn("Policy engine health check passed with warnings")
|
||||
.WithEvidence("Engine Status", eb =>
|
||||
{
|
||||
eb.Add("engine_type", compilationResult.EngineType ?? "unknown");
|
||||
eb.Add("engine_version", compilationResult.EngineVersion ?? "unknown");
|
||||
eb.Add("engine_url", policyEngineUrl);
|
||||
eb.Add("compilation_status", "OK");
|
||||
eb.Add("evaluation_status", "OK");
|
||||
eb.Add("storage_status", "OK");
|
||||
eb.Add("policy_count", compilationResult.PolicyCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("evaluation_latency_p50_ms", evaluationResult.EvaluationTimeMs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("cache_hit_ratio", compilationResult.CacheHitRatio.ToString("F2", CultureInfo.InvariantCulture));
|
||||
if (evaluationResult.EvaluationTimeMs > 100)
|
||||
{
|
||||
eb.Add("performance_warning", "SLOW - evaluation time exceeds 100ms threshold");
|
||||
}
|
||||
})
|
||||
.WithCauses(
|
||||
"Policy evaluation is slower than expected",
|
||||
"Policy cache may need warming")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Warm policy cache",
|
||||
"stella policy cache warm",
|
||||
CommandType.Shell)
|
||||
.AddStep(2, "Check for complex policies",
|
||||
"stella policy list --complexity high",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Fail($"Policy engine health check failed: {string.Join(", ", failedChecks)}")
|
||||
.Pass("Policy engine is healthy")
|
||||
.WithEvidence("Engine Status", eb =>
|
||||
{
|
||||
eb.Add("Compilation", compilationResult.Passed ? "OK" : "FAILED");
|
||||
eb.Add("Evaluation", evaluationResult.Passed ? "OK" : "FAILED");
|
||||
eb.Add("Storage", storageResult.Passed ? "OK" : "FAILED");
|
||||
eb.Add("EvaluationTimeMs", evaluationResult.EvaluationTimeMs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("engine_type", compilationResult.EngineType ?? "unknown");
|
||||
eb.Add("engine_version", compilationResult.EngineVersion ?? "unknown");
|
||||
eb.Add("engine_url", policyEngineUrl);
|
||||
eb.Add("compilation_status", "OK");
|
||||
eb.Add("evaluation_status", "OK");
|
||||
eb.Add("storage_status", "OK");
|
||||
eb.Add("policy_count", compilationResult.PolicyCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("compilation_time_ms", compilationResult.CompilationTimeMs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("evaluation_latency_p50_ms", evaluationResult.EvaluationTimeMs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("cache_hit_ratio", compilationResult.CacheHitRatio.ToString("F2", CultureInfo.InvariantCulture));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Fail($"Cannot reach policy engine at {policyEngineUrl}")
|
||||
.WithEvidence("Engine Status", eb =>
|
||||
{
|
||||
eb.Add("engine_url", policyEngineUrl);
|
||||
eb.Add("connection_error_type", GetConnectionErrorType(ex));
|
||||
eb.Add("error_message", ex.Message);
|
||||
})
|
||||
.WithCauses(
|
||||
"Policy engine service not running",
|
||||
"Policy storage unavailable",
|
||||
"OPA/Rego compilation error",
|
||||
"Policy cache corrupted")
|
||||
"Network connectivity issue",
|
||||
"Firewall blocking access",
|
||||
"DNS resolution failure")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Check policy engine service status",
|
||||
"stella policy status",
|
||||
CommandType.Shell)
|
||||
.AddStep(2, "Verify policy storage connectivity",
|
||||
"stella doctor --check check.storage.postgres",
|
||||
CommandType.Shell)
|
||||
.AddStep(3, "Recompile policies",
|
||||
"stella policy compile --all",
|
||||
.AddStep(2, "Verify network connectivity",
|
||||
$"curl -s {policyEngineUrl}/health",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (hasWarnings)
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Policy engine health check passed with warnings")
|
||||
.Fail($"Policy engine request timed out ({policyEngineUrl})")
|
||||
.WithEvidence("Engine Status", eb =>
|
||||
{
|
||||
eb.Add("Compilation", "OK");
|
||||
eb.Add("Evaluation", "OK");
|
||||
eb.Add("Storage", "OK");
|
||||
eb.Add("EvaluationTimeMs", evaluationResult.EvaluationTimeMs.ToString(CultureInfo.InvariantCulture));
|
||||
if (evaluationResult.EvaluationTimeMs > 100)
|
||||
{
|
||||
eb.Add("Performance", "SLOW - evaluation time exceeds 100ms threshold");
|
||||
}
|
||||
eb.Add("engine_url", policyEngineUrl);
|
||||
eb.Add("connection_error_type", "timeout");
|
||||
eb.Add("timeout_seconds", "10");
|
||||
})
|
||||
.WithCauses(
|
||||
"Policy evaluation is slower than expected",
|
||||
"Policy cache may need warming")
|
||||
"Policy engine overloaded",
|
||||
"Network latency too high",
|
||||
"Policy engine deadlocked")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Warm policy cache",
|
||||
"stella policy cache warm",
|
||||
.AddStep(1, "Check policy engine metrics",
|
||||
"stella policy metrics",
|
||||
CommandType.Shell)
|
||||
.AddStep(2, "Check for complex policies",
|
||||
"stella policy list --complexity high",
|
||||
CommandType.Shell))
|
||||
.AddStep(2, "Restart policy engine if needed",
|
||||
"stella policy restart",
|
||||
CommandType.Manual))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass("Policy engine is healthy")
|
||||
.WithEvidence("Engine Status", eb =>
|
||||
{
|
||||
eb.Add("Compilation", "OK");
|
||||
eb.Add("Evaluation", "OK");
|
||||
eb.Add("Storage", "OK");
|
||||
eb.Add("EvaluationTimeMs", evaluationResult.EvaluationTimeMs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("PolicyCount", compilationResult.PolicyCount.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
|
||||
private Task<CompilationCheckResult> CheckCompilationAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
private static string GetConnectionErrorType(HttpRequestException ex)
|
||||
{
|
||||
// Simulate compilation check
|
||||
return Task.FromResult(new CompilationCheckResult
|
||||
{
|
||||
Passed = true,
|
||||
PolicyCount = 12,
|
||||
CompilationTimeMs = 45
|
||||
});
|
||||
var message = ex.Message.ToLowerInvariant();
|
||||
if (message.Contains("ssl") || message.Contains("tls") || message.Contains("certificate"))
|
||||
return "ssl_error";
|
||||
if (message.Contains("name") || message.Contains("dns") || message.Contains("resolve"))
|
||||
return "dns_failure";
|
||||
if (message.Contains("refused") || message.Contains("actively refused"))
|
||||
return "refused";
|
||||
if (message.Contains("timeout"))
|
||||
return "timeout";
|
||||
return "connection_failed";
|
||||
}
|
||||
|
||||
private Task<EvaluationCheckResult> CheckEvaluationAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
private async Task<CompilationCheckResult> CheckCompilationAsync(HttpClient httpClient, string baseUrl, CancellationToken ct)
|
||||
{
|
||||
// Simulate evaluation check with a sample policy
|
||||
var result = new CompilationCheckResult();
|
||||
var stopwatch = Stopwatch.StartNew();
|
||||
|
||||
// In real implementation, this would evaluate a test policy
|
||||
Thread.Sleep(25); // Simulate evaluation time
|
||||
|
||||
stopwatch.Stop();
|
||||
|
||||
return Task.FromResult(new EvaluationCheckResult
|
||||
try
|
||||
{
|
||||
Passed = true,
|
||||
HasWarnings = stopwatch.ElapsedMilliseconds > 100,
|
||||
EvaluationTimeMs = stopwatch.ElapsedMilliseconds
|
||||
});
|
||||
// Check OPA health/info endpoint for engine info
|
||||
var healthResponse = await httpClient.GetAsync($"{baseUrl.TrimEnd('/')}/health", ct);
|
||||
if (healthResponse.IsSuccessStatusCode)
|
||||
{
|
||||
result.EngineType = "opa";
|
||||
}
|
||||
|
||||
// Get policy list to count policies
|
||||
var policiesResponse = await httpClient.GetAsync($"{baseUrl.TrimEnd('/')}/v1/policies", ct);
|
||||
if (policiesResponse.IsSuccessStatusCode)
|
||||
{
|
||||
var policiesJson = await policiesResponse.Content.ReadAsStringAsync(ct);
|
||||
using var doc = JsonDocument.Parse(policiesJson);
|
||||
if (doc.RootElement.TryGetProperty("result", out var resultArray) && resultArray.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
result.PolicyCount = resultArray.GetArrayLength();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
result.Passed = false;
|
||||
result.LastCompilationError = $"Failed to list policies: HTTP {(int)policiesResponse.StatusCode}";
|
||||
return result;
|
||||
}
|
||||
|
||||
// Get engine info/status for version and metrics
|
||||
var statusResponse = await httpClient.GetAsync($"{baseUrl.TrimEnd('/')}/v1/status", ct);
|
||||
if (statusResponse.IsSuccessStatusCode)
|
||||
{
|
||||
var statusJson = await statusResponse.Content.ReadAsStringAsync(ct);
|
||||
using var statusDoc = JsonDocument.Parse(statusJson);
|
||||
|
||||
// Try to extract version
|
||||
if (statusDoc.RootElement.TryGetProperty("result", out var statusResult))
|
||||
{
|
||||
if (statusResult.TryGetProperty("version", out var versionEl))
|
||||
{
|
||||
result.EngineVersion = versionEl.GetString();
|
||||
}
|
||||
|
||||
// Try to extract cache metrics
|
||||
if (statusResult.TryGetProperty("metrics", out var metrics))
|
||||
{
|
||||
if (metrics.TryGetProperty("cache_hit_ratio", out var cacheHitEl))
|
||||
{
|
||||
result.CacheHitRatio = cacheHitEl.GetDouble();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stopwatch.Stop();
|
||||
result.CompilationTimeMs = stopwatch.ElapsedMilliseconds;
|
||||
result.Passed = true;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
result.Passed = false;
|
||||
result.LastCompilationError = ex.Message;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private Task<StorageCheckResult> CheckStorageAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
private async Task<EvaluationCheckResult> CheckEvaluationAsync(HttpClient httpClient, string baseUrl, CancellationToken ct)
|
||||
{
|
||||
// Simulate storage check
|
||||
return Task.FromResult(new StorageCheckResult
|
||||
var result = new EvaluationCheckResult();
|
||||
var stopwatch = Stopwatch.StartNew();
|
||||
|
||||
try
|
||||
{
|
||||
Passed = true,
|
||||
PolicyVersions = 34
|
||||
});
|
||||
// Evaluate a canary policy with known input/output
|
||||
// POST to OPA data endpoint with minimal input
|
||||
var canaryInput = new { input = new { doctor_check = true, timestamp = DateTimeOffset.UtcNow.ToUnixTimeSeconds() } };
|
||||
var content = new StringContent(
|
||||
JsonSerializer.Serialize(canaryInput, JsonOptions),
|
||||
System.Text.Encoding.UTF8,
|
||||
"application/json");
|
||||
|
||||
var evalResponse = await httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/v1/data/system/health", content, ct);
|
||||
|
||||
stopwatch.Stop();
|
||||
result.EvaluationTimeMs = stopwatch.ElapsedMilliseconds;
|
||||
|
||||
// 200 or 404 (no policy at path) are both acceptable for health check
|
||||
// 500 indicates actual engine error
|
||||
if (evalResponse.IsSuccessStatusCode || evalResponse.StatusCode == System.Net.HttpStatusCode.NotFound)
|
||||
{
|
||||
result.Passed = true;
|
||||
result.HasWarnings = result.EvaluationTimeMs > 100; // Warn if slow
|
||||
}
|
||||
else
|
||||
{
|
||||
result.Passed = false;
|
||||
result.Error = $"Policy evaluation failed: HTTP {(int)evalResponse.StatusCode}";
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
result.Passed = false;
|
||||
result.Error = ex.Message;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private async Task<StorageCheckResult> CheckStorageAsync(HttpClient httpClient, string baseUrl, CancellationToken ct)
|
||||
{
|
||||
var result = new StorageCheckResult();
|
||||
|
||||
try
|
||||
{
|
||||
// Check if we can access policy data (storage is working)
|
||||
var dataResponse = await httpClient.GetAsync($"{baseUrl.TrimEnd('/')}/v1/data", ct);
|
||||
|
||||
if (dataResponse.IsSuccessStatusCode)
|
||||
{
|
||||
result.Passed = true;
|
||||
|
||||
var dataJson = await dataResponse.Content.ReadAsStringAsync(ct);
|
||||
using var doc = JsonDocument.Parse(dataJson);
|
||||
|
||||
// Count top-level data entries as proxy for stored policy versions
|
||||
if (doc.RootElement.TryGetProperty("result", out var resultObj) && resultObj.ValueKind == JsonValueKind.Object)
|
||||
{
|
||||
result.PolicyVersions = resultObj.EnumerateObject().Count();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
result.Passed = false;
|
||||
result.Error = $"Storage check failed: HTTP {(int)dataResponse.StatusCode}";
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
result.Passed = false;
|
||||
result.Error = ex.Message;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private sealed class CompilationCheckResult
|
||||
@@ -177,6 +401,10 @@ public sealed class PolicyEngineHealthCheck : IDoctorCheck
|
||||
public bool HasWarnings { get; set; }
|
||||
public int PolicyCount { get; set; }
|
||||
public long CompilationTimeMs { get; set; }
|
||||
public string? EngineType { get; set; }
|
||||
public string? EngineVersion { get; set; }
|
||||
public double CacheHitRatio { get; set; }
|
||||
public string? LastCompilationError { get; set; }
|
||||
}
|
||||
|
||||
private sealed class EvaluationCheckResult
|
||||
@@ -184,6 +412,7 @@ public sealed class PolicyEngineHealthCheck : IDoctorCheck
|
||||
public bool Passed { get; set; }
|
||||
public bool HasWarnings { get; set; }
|
||||
public long EvaluationTimeMs { get; set; }
|
||||
public string? Error { get; set; }
|
||||
}
|
||||
|
||||
private sealed class StorageCheckResult
|
||||
@@ -191,5 +420,6 @@ public sealed class PolicyEngineHealthCheck : IDoctorCheck
|
||||
public bool Passed { get; set; }
|
||||
public bool HasWarnings { get; set; }
|
||||
public int PolicyVersions { get; set; }
|
||||
public string? Error { get; set; }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,376 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// ActiveReleaseHealthCheck.cs
|
||||
// Sprint: SPRINT_20260118_016_Doctor_release_pipeline_health
|
||||
// Task: RELPIPE-002 - Implement ActiveReleaseHealthCheck
|
||||
// Description: Check health of currently active releases
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Release.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Checks health of currently active releases.
|
||||
/// Identifies releases stuck in states, with failed steps, or awaiting approval for too long.
|
||||
/// </summary>
|
||||
public sealed class ActiveReleaseHealthCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.release";
|
||||
private const string CategoryName = "Release Pipeline";
|
||||
|
||||
// Thresholds
|
||||
private static readonly TimeSpan StuckWarningThreshold = TimeSpan.FromHours(1);
|
||||
private static readonly TimeSpan StuckFailThreshold = TimeSpan.FromHours(4);
|
||||
private static readonly TimeSpan ApprovalWarningThreshold = TimeSpan.FromHours(4);
|
||||
private static readonly TimeSpan ApprovalFailThreshold = TimeSpan.FromHours(24);
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.release.active";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Active Release Health";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Check health of currently active releases";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["release", "pipeline", "active", "monitoring"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
// Check if ReleaseOrchestrator is configured
|
||||
var releaseUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"];
|
||||
return !string.IsNullOrEmpty(releaseUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"]
|
||||
?? "http://localhost:5080";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
// Query active releases
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{orchestratorUrl.TrimEnd('/')}/api/v1/releases?state=active",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot reach Release Orchestrator: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Release Orchestrator Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("connection_error_type", "http_error");
|
||||
})
|
||||
.WithCauses(
|
||||
"Release Orchestrator service unavailable",
|
||||
"Authentication/authorization failure",
|
||||
"Network connectivity issue")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Check Release Orchestrator health",
|
||||
$"curl -s {orchestratorUrl}/health",
|
||||
CommandType.Shell)
|
||||
.AddStep(2, "Check service status",
|
||||
"stella release status",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var releasesJson = await response.Content.ReadAsStringAsync(ct);
|
||||
var releases = ParseReleases(releasesJson);
|
||||
|
||||
var now = context.TimeProvider.GetUtcNow();
|
||||
var activeCount = releases.Count;
|
||||
var stuckReleases = new List<ReleaseInfo>();
|
||||
var failedReleases = new List<ReleaseInfo>();
|
||||
var pendingApprovals = new List<ReleaseInfo>();
|
||||
|
||||
foreach (var release in releases)
|
||||
{
|
||||
var duration = now - release.StartedAt;
|
||||
|
||||
if (!string.IsNullOrEmpty(release.Error))
|
||||
{
|
||||
failedReleases.Add(release);
|
||||
}
|
||||
else if (release.State.Equals("pending_approval", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
if (duration > ApprovalWarningThreshold)
|
||||
{
|
||||
pendingApprovals.Add(release with { Duration = duration });
|
||||
}
|
||||
}
|
||||
else if (release.State.Equals("executing", StringComparison.OrdinalIgnoreCase) ||
|
||||
release.State.Equals("pending", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
if (duration > StuckWarningThreshold)
|
||||
{
|
||||
stuckReleases.Add(release with { Duration = duration });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Determine severity
|
||||
var hasFailure = failedReleases.Count > 0 ||
|
||||
stuckReleases.Any(r => r.Duration > StuckFailThreshold) ||
|
||||
pendingApprovals.Any(r => r.Duration > ApprovalFailThreshold);
|
||||
|
||||
var hasWarning = stuckReleases.Count > 0 || pendingApprovals.Count > 0;
|
||||
|
||||
if (hasFailure)
|
||||
{
|
||||
return builder
|
||||
.Fail("Critical release issues detected")
|
||||
.WithEvidence("Active Releases", eb =>
|
||||
{
|
||||
eb.Add("active_release_count", activeCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("stuck_release_count", stuckReleases.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("failed_release_count", failedReleases.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("pending_approval_count", pendingApprovals.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("oldest_active_release_age_minutes", releases.Count > 0
|
||||
? ((int)(now - releases.Min(r => r.StartedAt)).TotalMinutes).ToString(CultureInfo.InvariantCulture)
|
||||
: "0");
|
||||
AddReleaseListEvidence(eb, "stuck_releases", stuckReleases);
|
||||
AddReleaseListEvidence(eb, "failed_releases", failedReleases);
|
||||
AddReleaseListEvidence(eb, "approval_pending_releases", pendingApprovals);
|
||||
})
|
||||
.WithCauses(
|
||||
"Release workflow step failed",
|
||||
"Approval bottleneck",
|
||||
"Environment unreachable",
|
||||
"Resource contention")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
if (failedReleases.Count > 0)
|
||||
{
|
||||
rb.AddStep(1, "Inspect failed release",
|
||||
$"stella release inspect {failedReleases[0].Id}",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "View release logs",
|
||||
$"stella release logs {failedReleases[0].Id}",
|
||||
CommandType.Shell);
|
||||
}
|
||||
if (stuckReleases.Count > 0)
|
||||
{
|
||||
rb.AddStep(3, "Check stuck release",
|
||||
$"stella release inspect {stuckReleases[0].Id}",
|
||||
CommandType.Shell);
|
||||
}
|
||||
if (pendingApprovals.Count > 0)
|
||||
{
|
||||
rb.AddStep(4, "Review pending approvals",
|
||||
"stella release approvals list",
|
||||
CommandType.Shell);
|
||||
}
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (hasWarning)
|
||||
{
|
||||
return builder
|
||||
.Warn("Release pipeline has items requiring attention")
|
||||
.WithEvidence("Active Releases", eb =>
|
||||
{
|
||||
eb.Add("active_release_count", activeCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("stuck_release_count", stuckReleases.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("failed_release_count", "0");
|
||||
eb.Add("pending_approval_count", pendingApprovals.Count.ToString(CultureInfo.InvariantCulture));
|
||||
AddReleaseListEvidence(eb, "stuck_releases", stuckReleases);
|
||||
AddReleaseListEvidence(eb, "approval_pending_releases", pendingApprovals);
|
||||
})
|
||||
.WithCauses(
|
||||
"Release taking longer than expected",
|
||||
"Approval not yet provided",
|
||||
"Environment slow to respond")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
if (stuckReleases.Count > 0)
|
||||
{
|
||||
rb.AddStep(1, "Inspect slow release",
|
||||
$"stella release inspect {stuckReleases[0].Id}",
|
||||
CommandType.Shell);
|
||||
}
|
||||
if (pendingApprovals.Count > 0)
|
||||
{
|
||||
rb.AddStep(2, "Review pending approvals",
|
||||
"stella release approvals list",
|
||||
CommandType.Shell);
|
||||
}
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
// All healthy
|
||||
return builder
|
||||
.Pass(activeCount == 0
|
||||
? "No active releases"
|
||||
: $"{activeCount} release(s) progressing normally")
|
||||
.WithEvidence("Active Releases", eb =>
|
||||
{
|
||||
eb.Add("active_release_count", activeCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("stuck_release_count", "0");
|
||||
eb.Add("failed_release_count", "0");
|
||||
eb.Add("pending_approval_count", "0");
|
||||
if (releases.Count > 0)
|
||||
{
|
||||
eb.Add("releases_in_progress", string.Join(", ", releases.Select(r => r.Name)));
|
||||
}
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot reach Release Orchestrator: {ex.Message}")
|
||||
.WithEvidence("Release Orchestrator Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("error_message", ex.Message);
|
||||
eb.Add("connection_error_type", GetConnectionErrorType(ex));
|
||||
})
|
||||
.WithCauses(
|
||||
"Release Orchestrator service down",
|
||||
"Network connectivity issue",
|
||||
"DNS resolution failure")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Check Release Orchestrator health",
|
||||
$"curl -s {orchestratorUrl}/health",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Release Orchestrator connection timed out")
|
||||
.WithEvidence("Release Orchestrator Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("error_message", "Request timed out");
|
||||
eb.Add("connection_error_type", "timeout");
|
||||
eb.Add("timeout_seconds", "10");
|
||||
})
|
||||
.WithCauses(
|
||||
"Release Orchestrator overloaded",
|
||||
"Network latency too high")
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static List<ReleaseInfo> ParseReleases(string json)
|
||||
{
|
||||
var releases = new List<ReleaseInfo>();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
|
||||
var releasesArray = doc.RootElement.ValueKind == JsonValueKind.Array
|
||||
? doc.RootElement
|
||||
: doc.RootElement.TryGetProperty("releases", out var arr) ? arr : default;
|
||||
|
||||
if (releasesArray.ValueKind != JsonValueKind.Array)
|
||||
return releases;
|
||||
|
||||
foreach (var release in releasesArray.EnumerateArray())
|
||||
{
|
||||
var id = release.TryGetProperty("id", out var idEl) ? idEl.GetString() : null;
|
||||
var name = release.TryGetProperty("name", out var nameEl) ? nameEl.GetString() : null;
|
||||
var state = release.TryGetProperty("state", out var stateEl) ? stateEl.GetString() : null;
|
||||
var startedAt = release.TryGetProperty("startedAt", out var startEl) &&
|
||||
DateTimeOffset.TryParse(startEl.GetString(), out var dt) ? dt : DateTimeOffset.UtcNow;
|
||||
var error = release.TryGetProperty("error", out var errEl) ? errEl.GetString() : null;
|
||||
var step = release.TryGetProperty("currentStep", out var stepEl) ? stepEl.GetString() : null;
|
||||
var env = release.TryGetProperty("targetEnvironment", out var envEl) ? envEl.GetString() : null;
|
||||
|
||||
if (!string.IsNullOrEmpty(id) && !string.IsNullOrEmpty(name))
|
||||
{
|
||||
releases.Add(new ReleaseInfo
|
||||
{
|
||||
Id = id,
|
||||
Name = name,
|
||||
State = state ?? "unknown",
|
||||
StartedAt = startedAt,
|
||||
Error = error,
|
||||
CurrentStep = step,
|
||||
TargetEnvironment = env
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best effort parsing
|
||||
}
|
||||
|
||||
return releases;
|
||||
}
|
||||
|
||||
private static void AddReleaseListEvidence(EvidenceBuilder eb, string key, List<ReleaseInfo> releases)
|
||||
{
|
||||
if (releases.Count == 0)
|
||||
{
|
||||
eb.Add(key, "[]");
|
||||
return;
|
||||
}
|
||||
|
||||
var summaries = releases.Select(r =>
|
||||
$"{r.Name}:{r.State}:{(int)r.Duration.TotalMinutes}min");
|
||||
eb.Add(key, string.Join(", ", summaries));
|
||||
}
|
||||
|
||||
private static string GetConnectionErrorType(HttpRequestException ex)
|
||||
{
|
||||
var message = ex.Message.ToLowerInvariant();
|
||||
if (message.Contains("ssl") || message.Contains("tls") || message.Contains("certificate"))
|
||||
return "ssl_error";
|
||||
if (message.Contains("name") || message.Contains("dns") || message.Contains("resolve"))
|
||||
return "dns_failure";
|
||||
if (message.Contains("refused") || message.Contains("actively refused"))
|
||||
return "refused";
|
||||
if (message.Contains("timeout"))
|
||||
return "timeout";
|
||||
return "connection_failed";
|
||||
}
|
||||
|
||||
private sealed record ReleaseInfo
|
||||
{
|
||||
public required string Id { get; init; }
|
||||
public required string Name { get; init; }
|
||||
public required string State { get; init; }
|
||||
public required DateTimeOffset StartedAt { get; init; }
|
||||
public string? Error { get; init; }
|
||||
public string? CurrentStep { get; init; }
|
||||
public string? TargetEnvironment { get; init; }
|
||||
public TimeSpan Duration { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,360 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// EnvironmentReadinessCheck.cs
|
||||
// Sprint: SPRINT_20260118_016_Doctor_release_pipeline_health
|
||||
// Task: RELPIPE-004 - Implement EnvironmentReadinessCheck
|
||||
// Description: Check health and readiness of target environments
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Release.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Checks health and readiness of target environments.
|
||||
/// Verifies environment reachability, resource limits, and deployment readiness.
|
||||
/// </summary>
|
||||
public sealed class EnvironmentReadinessCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.release";
|
||||
private const string CategoryName = "Release Pipeline";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.release.environment.readiness";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Environment Readiness";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Check health and readiness of target environments";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["release", "environment", "readiness", "deployment"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(15);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var releaseUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"];
|
||||
return !string.IsNullOrEmpty(releaseUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"]
|
||||
?? "http://localhost:5080";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(15);
|
||||
|
||||
// Query environments
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{orchestratorUrl.TrimEnd('/')}/api/v1/environments",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot retrieve environments: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Environment Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses("Release Orchestrator unavailable", "API endpoint not found")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Check Release Orchestrator health",
|
||||
$"curl -s {orchestratorUrl}/health",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var envsJson = await response.Content.ReadAsStringAsync(ct);
|
||||
var environments = ParseEnvironments(envsJson);
|
||||
|
||||
if (environments.Count == 0)
|
||||
{
|
||||
return builder
|
||||
.Pass("No environments configured")
|
||||
.WithEvidence("Environments", eb =>
|
||||
{
|
||||
eb.Add("environment_count", "0");
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
|
||||
// Check each environment
|
||||
var unreachable = new List<EnvironmentInfo>();
|
||||
var unhealthy = new List<EnvironmentInfo>();
|
||||
var staleHealthCheck = new List<EnvironmentInfo>();
|
||||
|
||||
var now = context.TimeProvider.GetUtcNow();
|
||||
var staleThreshold = TimeSpan.FromHours(1);
|
||||
|
||||
foreach (var env in environments)
|
||||
{
|
||||
if (!env.IsReachable)
|
||||
{
|
||||
unreachable.Add(env);
|
||||
}
|
||||
else if (!env.IsHealthy)
|
||||
{
|
||||
unhealthy.Add(env);
|
||||
}
|
||||
else if (env.LastHealthCheck.HasValue &&
|
||||
now - env.LastHealthCheck.Value > staleThreshold)
|
||||
{
|
||||
staleHealthCheck.Add(env);
|
||||
}
|
||||
}
|
||||
|
||||
var devEnvs = environments.Count(e => e.Type.Equals("dev", StringComparison.OrdinalIgnoreCase));
|
||||
var stagingEnvs = environments.Count(e => e.Type.Equals("staging", StringComparison.OrdinalIgnoreCase) ||
|
||||
e.Type.Equals("stage", StringComparison.OrdinalIgnoreCase));
|
||||
var prodEnvs = environments.Count(e => e.Type.Equals("prod", StringComparison.OrdinalIgnoreCase) ||
|
||||
e.Type.Equals("production", StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
// Determine severity - production issues are critical
|
||||
var hasProdIssue = unreachable.Any(e => IsProd(e.Type)) || unhealthy.Any(e => IsProd(e.Type));
|
||||
var hasAnyIssue = unreachable.Count > 0 || unhealthy.Count > 0;
|
||||
|
||||
if (hasProdIssue)
|
||||
{
|
||||
return builder
|
||||
.Fail("Production environment issues detected")
|
||||
.WithEvidence("Environments", eb =>
|
||||
{
|
||||
eb.Add("environment_count", environments.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("dev_environments", devEnvs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("staging_environments", stagingEnvs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("prod_environments", prodEnvs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("unreachable_count", unreachable.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("unhealthy_count", unhealthy.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("unreachable_environments", string.Join(", ", unreachable.Select(e => e.Name)));
|
||||
eb.Add("unhealthy_environments", string.Join(", ", unhealthy.Select(e => e.Name)));
|
||||
})
|
||||
.WithCauses(
|
||||
"Environment agent not responding",
|
||||
"Network connectivity issue to environment",
|
||||
"Container runtime issue in environment",
|
||||
"Resource exhaustion (disk, memory)")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
if (unreachable.Count > 0)
|
||||
{
|
||||
rb.AddStep(1, "Check environment connectivity",
|
||||
$"stella env ping {unreachable[0].Name}",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "View environment agent logs",
|
||||
$"stella env logs {unreachable[0].Name}",
|
||||
CommandType.Shell);
|
||||
}
|
||||
if (unhealthy.Count > 0)
|
||||
{
|
||||
rb.AddStep(3, "Check environment health details",
|
||||
$"stella env health {unhealthy[0].Name}",
|
||||
CommandType.Shell);
|
||||
}
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (hasAnyIssue)
|
||||
{
|
||||
return builder
|
||||
.Warn("Non-production environment issues detected")
|
||||
.WithEvidence("Environments", eb =>
|
||||
{
|
||||
eb.Add("environment_count", environments.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("dev_environments", devEnvs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("staging_environments", stagingEnvs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("prod_environments", prodEnvs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("unreachable_count", unreachable.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("unhealthy_count", unhealthy.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("unreachable_environments", string.Join(", ", unreachable.Select(e => e.Name)));
|
||||
eb.Add("unhealthy_environments", string.Join(", ", unhealthy.Select(e => e.Name)));
|
||||
})
|
||||
.WithCauses(
|
||||
"Environment agent not responding",
|
||||
"Dev/staging environment offline",
|
||||
"Resource issue in non-prod environment")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
if (unreachable.Count > 0)
|
||||
{
|
||||
rb.AddStep(1, "Check environment connectivity",
|
||||
$"stella env ping {unreachable[0].Name}",
|
||||
CommandType.Shell);
|
||||
}
|
||||
if (unhealthy.Count > 0)
|
||||
{
|
||||
rb.AddStep(2, "Check environment health",
|
||||
$"stella env health {unhealthy[0].Name}",
|
||||
CommandType.Shell);
|
||||
}
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (staleHealthCheck.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Warn($"{staleHealthCheck.Count} environment(s) have stale health data")
|
||||
.WithEvidence("Environments", eb =>
|
||||
{
|
||||
eb.Add("environment_count", environments.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("stale_health_check_count", staleHealthCheck.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("stale_environments", string.Join(", ", staleHealthCheck.Select(e => e.Name)));
|
||||
})
|
||||
.WithCauses(
|
||||
"Health check scheduler not running",
|
||||
"Environment agent intermittent connectivity")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Trigger health check refresh",
|
||||
"stella env health --refresh-all",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"{environments.Count} environment(s) ready")
|
||||
.WithEvidence("Environments", eb =>
|
||||
{
|
||||
eb.Add("environment_count", environments.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("dev_environments", devEnvs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("staging_environments", stagingEnvs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("prod_environments", prodEnvs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("unreachable_count", "0");
|
||||
eb.Add("unhealthy_count", "0");
|
||||
eb.Add("environment_names", string.Join(", ", environments.Select(e => e.Name)));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check environments: {ex.Message}")
|
||||
.WithEvidence("Environment Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("error_message", ex.Message);
|
||||
eb.Add("connection_error_type", GetConnectionErrorType(ex));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Environment check timed out")
|
||||
.WithEvidence("Environment Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("connection_error_type", "timeout");
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static bool IsProd(string envType) =>
|
||||
envType.Equals("prod", StringComparison.OrdinalIgnoreCase) ||
|
||||
envType.Equals("production", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
private static List<EnvironmentInfo> ParseEnvironments(string json)
|
||||
{
|
||||
var envs = new List<EnvironmentInfo>();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
|
||||
var envsArray = doc.RootElement.ValueKind == JsonValueKind.Array
|
||||
? doc.RootElement
|
||||
: doc.RootElement.TryGetProperty("environments", out var arr) ? arr : default;
|
||||
|
||||
if (envsArray.ValueKind != JsonValueKind.Array)
|
||||
return envs;
|
||||
|
||||
foreach (var env in envsArray.EnumerateArray())
|
||||
{
|
||||
var id = env.TryGetProperty("id", out var idEl) ? idEl.GetString() : null;
|
||||
var name = env.TryGetProperty("name", out var nameEl) ? nameEl.GetString() : null;
|
||||
|
||||
if (string.IsNullOrEmpty(id) || string.IsNullOrEmpty(name))
|
||||
continue;
|
||||
|
||||
var type = env.TryGetProperty("type", out var typeEl) ? typeEl.GetString() ?? "unknown" : "unknown";
|
||||
var isReachable = env.TryGetProperty("isReachable", out var reachEl) && reachEl.GetBoolean();
|
||||
var isHealthy = env.TryGetProperty("isHealthy", out var healthEl) && healthEl.GetBoolean();
|
||||
var currentVersion = env.TryGetProperty("currentVersion", out var verEl) ? verEl.GetString() : null;
|
||||
var error = env.TryGetProperty("error", out var errEl) ? errEl.GetString() : null;
|
||||
var lastCheck = env.TryGetProperty("lastHealthCheck", out var checkEl) &&
|
||||
DateTimeOffset.TryParse(checkEl.GetString(), out var dt) ? dt : (DateTimeOffset?)null;
|
||||
|
||||
envs.Add(new EnvironmentInfo
|
||||
{
|
||||
Id = id,
|
||||
Name = name,
|
||||
Type = type,
|
||||
IsReachable = isReachable,
|
||||
IsHealthy = isHealthy,
|
||||
CurrentVersion = currentVersion,
|
||||
Error = error,
|
||||
LastHealthCheck = lastCheck
|
||||
});
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best effort parsing
|
||||
}
|
||||
|
||||
return envs;
|
||||
}
|
||||
|
||||
private static string GetConnectionErrorType(HttpRequestException ex)
|
||||
{
|
||||
var message = ex.Message.ToLowerInvariant();
|
||||
if (message.Contains("ssl") || message.Contains("tls"))
|
||||
return "ssl_error";
|
||||
if (message.Contains("name") || message.Contains("dns"))
|
||||
return "dns_failure";
|
||||
if (message.Contains("refused"))
|
||||
return "refused";
|
||||
return "connection_failed";
|
||||
}
|
||||
|
||||
private sealed record EnvironmentInfo
|
||||
{
|
||||
public required string Id { get; init; }
|
||||
public required string Name { get; init; }
|
||||
public required string Type { get; init; }
|
||||
public bool IsReachable { get; init; }
|
||||
public bool IsHealthy { get; init; }
|
||||
public string? CurrentVersion { get; init; }
|
||||
public string? Error { get; init; }
|
||||
public DateTimeOffset? LastHealthCheck { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,447 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// PromotionGateHealthCheck.cs
|
||||
// Sprint: SPRINT_20260118_016_Doctor_release_pipeline_health
|
||||
// Task: RELPIPE-003 - Implement PromotionGateHealthCheck
|
||||
// Description: Check health of promotion gates between environments
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Release.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Checks health of promotion gates between environments.
|
||||
/// Verifies policy engine availability, attestation requirements, and approval configurations.
|
||||
/// </summary>
|
||||
public sealed class PromotionGateHealthCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.release";
|
||||
private const string CategoryName = "Release Pipeline";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.release.promotion.gates";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Promotion Gate Health";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Check health of promotion gates between environments";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["release", "promotion", "gates", "policy", "attestation"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(10);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var releaseUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"];
|
||||
return !string.IsNullOrEmpty(releaseUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"]
|
||||
?? "http://localhost:5080";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
// Query promotion gates configuration
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{orchestratorUrl.TrimEnd('/')}/api/v1/promotion-gates",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot retrieve promotion gates: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Promotion Gates Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses("Release Orchestrator unavailable", "API endpoint not found")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Check Release Orchestrator health",
|
||||
$"curl -s {orchestratorUrl}/health",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var gatesJson = await response.Content.ReadAsStringAsync(ct);
|
||||
var gates = ParsePromotionGates(gatesJson);
|
||||
|
||||
if (gates.Count == 0)
|
||||
{
|
||||
return builder
|
||||
.Pass("No promotion gates configured")
|
||||
.WithEvidence("Promotion Gates", eb =>
|
||||
{
|
||||
eb.Add("gate_count", "0");
|
||||
eb.Add("gates_with_policy", "0");
|
||||
eb.Add("gates_with_attestation", "0");
|
||||
eb.Add("gates_with_approval", "0");
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
|
||||
// Check gate health
|
||||
var issues = new List<GateIssue>();
|
||||
|
||||
foreach (var gate in gates)
|
||||
{
|
||||
// Check if required policies are available
|
||||
if (gate.RequiresPolicyPass && gate.RequiredPolicies.Count > 0)
|
||||
{
|
||||
var policyCheck = await CheckPoliciesAvailableAsync(
|
||||
httpClient, context, gate.RequiredPolicies, ct);
|
||||
if (!policyCheck.AllAvailable)
|
||||
{
|
||||
issues.Add(new GateIssue
|
||||
{
|
||||
GateId = gate.Id,
|
||||
GateName = gate.Name,
|
||||
IssueType = "missing_policies",
|
||||
Details = $"Missing policies: {string.Join(", ", policyCheck.MissingPolicies)}"
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Check if attestation types are configured
|
||||
if (gate.RequiresAttestations && gate.RequiredAttestations.Count > 0)
|
||||
{
|
||||
// Verify attestor is reachable
|
||||
var attestorCheck = await CheckAttestorAvailableAsync(httpClient, context, ct);
|
||||
if (!attestorCheck)
|
||||
{
|
||||
issues.Add(new GateIssue
|
||||
{
|
||||
GateId = gate.Id,
|
||||
GateName = gate.Name,
|
||||
IssueType = "attestor_unavailable",
|
||||
Details = "Attestor service not reachable"
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Check approval configuration
|
||||
if (gate.RequiresApproval && gate.Approvers.Count == 0)
|
||||
{
|
||||
issues.Add(new GateIssue
|
||||
{
|
||||
GateId = gate.Id,
|
||||
GateName = gate.Name,
|
||||
IssueType = "no_approvers",
|
||||
Details = "Approval required but no approvers configured"
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
var gatesWithPolicy = gates.Count(g => g.RequiresPolicyPass);
|
||||
var gatesWithAttestation = gates.Count(g => g.RequiresAttestations);
|
||||
var gatesWithApproval = gates.Count(g => g.RequiresApproval);
|
||||
|
||||
if (issues.Count > 0)
|
||||
{
|
||||
var severity = issues.Any(i => i.IssueType == "missing_policies" || i.IssueType == "no_approvers")
|
||||
? DoctorSeverity.Fail
|
||||
: DoctorSeverity.Warn;
|
||||
|
||||
var resultBuilder = severity == DoctorSeverity.Fail
|
||||
? builder.Fail($"{issues.Count} promotion gate issue(s) detected")
|
||||
: builder.Warn($"{issues.Count} promotion gate issue(s) detected");
|
||||
|
||||
return resultBuilder
|
||||
.WithEvidence("Promotion Gates", eb =>
|
||||
{
|
||||
eb.Add("gate_count", gates.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("gates_with_policy", gatesWithPolicy.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("gates_with_attestation", gatesWithAttestation.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("gates_with_approval", gatesWithApproval.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("issue_count", issues.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("issues", string.Join("; ", issues.Select(i => $"{i.GateName}:{i.IssueType}")));
|
||||
})
|
||||
.WithCauses(
|
||||
"Required policies not loaded in policy engine",
|
||||
"Attestor service unavailable",
|
||||
"Approval workflow misconfigured",
|
||||
"Environment deleted but gate remains")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "List promotion gates",
|
||||
"stella release gates list",
|
||||
CommandType.Shell);
|
||||
|
||||
if (issues.Any(i => i.IssueType == "missing_policies"))
|
||||
{
|
||||
rb.AddStep(2, "Check policy engine",
|
||||
"stella policy list",
|
||||
CommandType.Shell);
|
||||
}
|
||||
|
||||
if (issues.Any(i => i.IssueType == "attestor_unavailable"))
|
||||
{
|
||||
rb.AddStep(3, "Check attestor health",
|
||||
"stella doctor --check check.attestation.*",
|
||||
CommandType.Shell);
|
||||
}
|
||||
|
||||
if (issues.Any(i => i.IssueType == "no_approvers"))
|
||||
{
|
||||
rb.AddStep(4, "Configure approvers",
|
||||
"stella release gates configure <gate-id> --approvers <user>",
|
||||
CommandType.Manual);
|
||||
}
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"{gates.Count} promotion gate(s) healthy")
|
||||
.WithEvidence("Promotion Gates", eb =>
|
||||
{
|
||||
eb.Add("gate_count", gates.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("gates_with_policy", gatesWithPolicy.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("gates_with_attestation", gatesWithAttestation.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("gates_with_approval", gatesWithApproval.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("gate_names", string.Join(", ", gates.Select(g => g.Name)));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check promotion gates: {ex.Message}")
|
||||
.WithEvidence("Promotion Gates Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("error_message", ex.Message);
|
||||
eb.Add("connection_error_type", GetConnectionErrorType(ex));
|
||||
})
|
||||
.WithCauses("Release Orchestrator unavailable", "Network issue")
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Promotion gate check timed out")
|
||||
.WithEvidence("Promotion Gates Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("connection_error_type", "timeout");
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<(bool AllAvailable, List<string> MissingPolicies)> CheckPoliciesAvailableAsync(
|
||||
HttpClient httpClient,
|
||||
DoctorPluginContext context,
|
||||
IReadOnlyList<string> requiredPolicies,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var policyEngineUrl = context.Configuration["Policy:Engine:Url"]
|
||||
?? context.Configuration["PolicyEngine:Url"]
|
||||
?? "http://localhost:8181";
|
||||
|
||||
try
|
||||
{
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{policyEngineUrl.TrimEnd('/')}/v1/policies",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return (false, requiredPolicies.ToList());
|
||||
}
|
||||
|
||||
var json = await response.Content.ReadAsStringAsync(ct);
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
|
||||
var availablePolicies = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
if (doc.RootElement.TryGetProperty("result", out var result) &&
|
||||
result.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var policy in result.EnumerateArray())
|
||||
{
|
||||
if (policy.TryGetProperty("id", out var idEl))
|
||||
{
|
||||
var id = idEl.GetString();
|
||||
if (!string.IsNullOrEmpty(id))
|
||||
{
|
||||
availablePolicies.Add(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var missing = requiredPolicies
|
||||
.Where(p => !availablePolicies.Contains(p))
|
||||
.ToList();
|
||||
|
||||
return (missing.Count == 0, missing);
|
||||
}
|
||||
catch
|
||||
{
|
||||
return (false, requiredPolicies.ToList());
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<bool> CheckAttestorAvailableAsync(
|
||||
HttpClient httpClient,
|
||||
DoctorPluginContext context,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var attestorUrl = context.Configuration["Attestor:Url"]
|
||||
?? "http://localhost:5090";
|
||||
|
||||
try
|
||||
{
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{attestorUrl.TrimEnd('/')}/health",
|
||||
ct);
|
||||
|
||||
return response.IsSuccessStatusCode;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private static List<PromotionGateInfo> ParsePromotionGates(string json)
|
||||
{
|
||||
var gates = new List<PromotionGateInfo>();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
|
||||
var gatesArray = doc.RootElement.ValueKind == JsonValueKind.Array
|
||||
? doc.RootElement
|
||||
: doc.RootElement.TryGetProperty("gates", out var arr) ? arr : default;
|
||||
|
||||
if (gatesArray.ValueKind != JsonValueKind.Array)
|
||||
return gates;
|
||||
|
||||
foreach (var gate in gatesArray.EnumerateArray())
|
||||
{
|
||||
var id = gate.TryGetProperty("id", out var idEl) ? idEl.GetString() : null;
|
||||
var name = gate.TryGetProperty("name", out var nameEl) ? nameEl.GetString() : null;
|
||||
|
||||
if (string.IsNullOrEmpty(id) || string.IsNullOrEmpty(name))
|
||||
continue;
|
||||
|
||||
var requiresPolicy = gate.TryGetProperty("requiresPolicyPass", out var policyEl) && policyEl.GetBoolean();
|
||||
var requiresAttestation = gate.TryGetProperty("requiresAttestations", out var attestEl) && attestEl.GetBoolean();
|
||||
var requiresApproval = gate.TryGetProperty("requiresApproval", out var approvalEl) && approvalEl.GetBoolean();
|
||||
|
||||
var requiredPolicies = new List<string>();
|
||||
if (gate.TryGetProperty("requiredPolicies", out var policiesEl) && policiesEl.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var p in policiesEl.EnumerateArray())
|
||||
{
|
||||
var policyId = p.GetString();
|
||||
if (!string.IsNullOrEmpty(policyId))
|
||||
requiredPolicies.Add(policyId);
|
||||
}
|
||||
}
|
||||
|
||||
var requiredAttestations = new List<string>();
|
||||
if (gate.TryGetProperty("requiredAttestations", out var attestationsEl) && attestationsEl.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var a in attestationsEl.EnumerateArray())
|
||||
{
|
||||
var attestationType = a.GetString();
|
||||
if (!string.IsNullOrEmpty(attestationType))
|
||||
requiredAttestations.Add(attestationType);
|
||||
}
|
||||
}
|
||||
|
||||
var approvers = new List<string>();
|
||||
if (gate.TryGetProperty("approvers", out var approversEl) && approversEl.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var approver in approversEl.EnumerateArray())
|
||||
{
|
||||
var approverId = approver.GetString();
|
||||
if (!string.IsNullOrEmpty(approverId))
|
||||
approvers.Add(approverId);
|
||||
}
|
||||
}
|
||||
|
||||
gates.Add(new PromotionGateInfo
|
||||
{
|
||||
Id = id,
|
||||
Name = name,
|
||||
RequiresPolicyPass = requiresPolicy,
|
||||
RequiresAttestations = requiresAttestation,
|
||||
RequiresApproval = requiresApproval,
|
||||
RequiredPolicies = requiredPolicies,
|
||||
RequiredAttestations = requiredAttestations,
|
||||
Approvers = approvers
|
||||
});
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best effort parsing
|
||||
}
|
||||
|
||||
return gates;
|
||||
}
|
||||
|
||||
private static string GetConnectionErrorType(HttpRequestException ex)
|
||||
{
|
||||
var message = ex.Message.ToLowerInvariant();
|
||||
if (message.Contains("ssl") || message.Contains("tls"))
|
||||
return "ssl_error";
|
||||
if (message.Contains("name") || message.Contains("dns"))
|
||||
return "dns_failure";
|
||||
if (message.Contains("refused"))
|
||||
return "refused";
|
||||
return "connection_failed";
|
||||
}
|
||||
|
||||
private sealed record PromotionGateInfo
|
||||
{
|
||||
public required string Id { get; init; }
|
||||
public required string Name { get; init; }
|
||||
public bool RequiresPolicyPass { get; init; }
|
||||
public bool RequiresAttestations { get; init; }
|
||||
public bool RequiresApproval { get; init; }
|
||||
public IReadOnlyList<string> RequiredPolicies { get; init; } = [];
|
||||
public IReadOnlyList<string> RequiredAttestations { get; init; } = [];
|
||||
public IReadOnlyList<string> Approvers { get; init; } = [];
|
||||
}
|
||||
|
||||
private sealed record GateIssue
|
||||
{
|
||||
public required string GateId { get; init; }
|
||||
public required string GateName { get; init; }
|
||||
public required string IssueType { get; init; }
|
||||
public required string Details { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,359 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// ReleaseConfigurationCheck.cs
|
||||
// Sprint: SPRINT_20260118_016_Doctor_release_pipeline_health
|
||||
// Task: RELPIPE-006 - Implement ReleaseConfigurationCheck
|
||||
// Description: Check validity of release configuration
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Release.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Checks validity of release configuration.
|
||||
/// Verifies workflow definitions, stage transitions, and required integrations.
|
||||
/// </summary>
|
||||
public sealed class ReleaseConfigurationCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.release";
|
||||
private const string CategoryName = "Release Pipeline";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.release.configuration";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Release Configuration";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Check validity of release workflow configuration";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["release", "configuration", "workflow", "validation"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var releaseUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"];
|
||||
return !string.IsNullOrEmpty(releaseUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"]
|
||||
?? "http://localhost:5080";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
// Query workflow configurations
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{orchestratorUrl.TrimEnd('/')}/api/v1/workflows",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot retrieve workflow configurations: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Configuration Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var workflowsJson = await response.Content.ReadAsStringAsync(ct);
|
||||
var workflows = ParseWorkflows(workflowsJson);
|
||||
|
||||
if (workflows.Count == 0)
|
||||
{
|
||||
return builder
|
||||
.Warn("No release workflows configured")
|
||||
.WithEvidence("Workflows", eb =>
|
||||
{
|
||||
eb.Add("workflow_count", "0");
|
||||
})
|
||||
.WithCauses("Release workflows not yet defined")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Create a release workflow",
|
||||
"stella release workflow create --name <name> --stages dev,staging,prod",
|
||||
CommandType.Manual))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
// Validate each workflow
|
||||
var validationErrors = new List<ValidationError>();
|
||||
|
||||
foreach (var workflow in workflows)
|
||||
{
|
||||
// Check for empty stages
|
||||
if (workflow.Stages.Count == 0)
|
||||
{
|
||||
validationErrors.Add(new ValidationError
|
||||
{
|
||||
WorkflowId = workflow.Id,
|
||||
WorkflowName = workflow.Name,
|
||||
ErrorType = "no_stages",
|
||||
Message = "Workflow has no stages defined"
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for invalid transitions
|
||||
var validStages = workflow.Stages.Select(s => s.Name).ToHashSet(StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var stage in workflow.Stages)
|
||||
{
|
||||
foreach (var nextStage in stage.NextStages)
|
||||
{
|
||||
if (!validStages.Contains(nextStage))
|
||||
{
|
||||
validationErrors.Add(new ValidationError
|
||||
{
|
||||
WorkflowId = workflow.Id,
|
||||
WorkflowName = workflow.Name,
|
||||
ErrorType = "invalid_transition",
|
||||
Message = $"Stage '{stage.Name}' references unknown stage '{nextStage}'"
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for unreachable stages (no incoming transitions)
|
||||
var reachableStages = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
if (workflow.Stages.Count > 0)
|
||||
{
|
||||
reachableStages.Add(workflow.Stages[0].Name); // First stage is entry point
|
||||
}
|
||||
foreach (var stage in workflow.Stages)
|
||||
{
|
||||
foreach (var next in stage.NextStages)
|
||||
{
|
||||
reachableStages.Add(next);
|
||||
}
|
||||
}
|
||||
|
||||
foreach (var stage in workflow.Stages.Skip(1))
|
||||
{
|
||||
if (!reachableStages.Contains(stage.Name))
|
||||
{
|
||||
validationErrors.Add(new ValidationError
|
||||
{
|
||||
WorkflowId = workflow.Id,
|
||||
WorkflowName = workflow.Name,
|
||||
ErrorType = "unreachable_stage",
|
||||
Message = $"Stage '{stage.Name}' is unreachable (no incoming transitions)"
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Check for missing environment mapping
|
||||
foreach (var stage in workflow.Stages)
|
||||
{
|
||||
if (string.IsNullOrEmpty(stage.EnvironmentId))
|
||||
{
|
||||
validationErrors.Add(new ValidationError
|
||||
{
|
||||
WorkflowId = workflow.Id,
|
||||
WorkflowName = workflow.Name,
|
||||
ErrorType = "missing_environment",
|
||||
Message = $"Stage '{stage.Name}' has no target environment mapped"
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var activeWorkflows = workflows.Count(w => w.IsActive);
|
||||
var totalStages = workflows.Sum(w => w.Stages.Count);
|
||||
|
||||
if (validationErrors.Count > 0)
|
||||
{
|
||||
var hasBlockingErrors = validationErrors.Any(e =>
|
||||
e.ErrorType == "no_stages" ||
|
||||
e.ErrorType == "invalid_transition" ||
|
||||
e.ErrorType == "missing_environment");
|
||||
|
||||
var resultBuilder = hasBlockingErrors
|
||||
? builder.Fail($"{validationErrors.Count} workflow configuration error(s)")
|
||||
: builder.Warn($"{validationErrors.Count} workflow configuration warning(s)");
|
||||
|
||||
return resultBuilder
|
||||
.WithEvidence("Workflows", eb =>
|
||||
{
|
||||
eb.Add("workflow_count", workflows.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("active_workflow_count", activeWorkflows.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("total_stages", totalStages.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("validation_error_count", validationErrors.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("errors", string.Join("; ", validationErrors.Select(e => $"{e.WorkflowName}:{e.ErrorType}")));
|
||||
})
|
||||
.WithCauses(
|
||||
"Workflow configuration incomplete",
|
||||
"Stage transition misconfigured",
|
||||
"Environment deleted but workflow not updated")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "View workflow details",
|
||||
$"stella release workflow show {validationErrors[0].WorkflowId}",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "Fix workflow configuration",
|
||||
$"stella release workflow edit {validationErrors[0].WorkflowId}",
|
||||
CommandType.Manual);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"{workflows.Count} workflow(s) valid ({totalStages} stages)")
|
||||
.WithEvidence("Workflows", eb =>
|
||||
{
|
||||
eb.Add("workflow_count", workflows.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("active_workflow_count", activeWorkflows.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("total_stages", totalStages.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("validation_error_count", "0");
|
||||
eb.Add("workflow_names", string.Join(", ", workflows.Select(w => w.Name)));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check workflow configuration: {ex.Message}")
|
||||
.WithEvidence("Configuration Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("error_message", ex.Message);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Configuration check timed out")
|
||||
.WithEvidence("Configuration Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("connection_error_type", "timeout");
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static List<WorkflowInfo> ParseWorkflows(string json)
|
||||
{
|
||||
var workflows = new List<WorkflowInfo>();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
|
||||
var workflowsArray = doc.RootElement.ValueKind == JsonValueKind.Array
|
||||
? doc.RootElement
|
||||
: doc.RootElement.TryGetProperty("workflows", out var arr) ? arr : default;
|
||||
|
||||
if (workflowsArray.ValueKind != JsonValueKind.Array)
|
||||
return workflows;
|
||||
|
||||
foreach (var workflow in workflowsArray.EnumerateArray())
|
||||
{
|
||||
var id = workflow.TryGetProperty("id", out var idEl) ? idEl.GetString() : null;
|
||||
var name = workflow.TryGetProperty("name", out var nameEl) ? nameEl.GetString() : null;
|
||||
|
||||
if (string.IsNullOrEmpty(id) || string.IsNullOrEmpty(name))
|
||||
continue;
|
||||
|
||||
var isActive = workflow.TryGetProperty("isActive", out var activeEl) && activeEl.GetBoolean();
|
||||
var stages = new List<StageInfo>();
|
||||
|
||||
if (workflow.TryGetProperty("stages", out var stagesEl) && stagesEl.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var stage in stagesEl.EnumerateArray())
|
||||
{
|
||||
var stageName = stage.TryGetProperty("name", out var stageNameEl) ? stageNameEl.GetString() : null;
|
||||
if (string.IsNullOrEmpty(stageName))
|
||||
continue;
|
||||
|
||||
var envId = stage.TryGetProperty("environmentId", out var envEl) ? envEl.GetString() : null;
|
||||
var nextStages = new List<string>();
|
||||
|
||||
if (stage.TryGetProperty("nextStages", out var nextEl) && nextEl.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var next in nextEl.EnumerateArray())
|
||||
{
|
||||
var nextName = next.GetString();
|
||||
if (!string.IsNullOrEmpty(nextName))
|
||||
nextStages.Add(nextName);
|
||||
}
|
||||
}
|
||||
|
||||
stages.Add(new StageInfo
|
||||
{
|
||||
Name = stageName,
|
||||
EnvironmentId = envId,
|
||||
NextStages = nextStages
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
workflows.Add(new WorkflowInfo
|
||||
{
|
||||
Id = id,
|
||||
Name = name,
|
||||
IsActive = isActive,
|
||||
Stages = stages
|
||||
});
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best effort parsing
|
||||
}
|
||||
|
||||
return workflows;
|
||||
}
|
||||
|
||||
private sealed record WorkflowInfo
|
||||
{
|
||||
public required string Id { get; init; }
|
||||
public required string Name { get; init; }
|
||||
public bool IsActive { get; init; }
|
||||
public IReadOnlyList<StageInfo> Stages { get; init; } = [];
|
||||
}
|
||||
|
||||
private sealed record StageInfo
|
||||
{
|
||||
public required string Name { get; init; }
|
||||
public string? EnvironmentId { get; init; }
|
||||
public IReadOnlyList<string> NextStages { get; init; } = [];
|
||||
}
|
||||
|
||||
private sealed record ValidationError
|
||||
{
|
||||
public required string WorkflowId { get; init; }
|
||||
public required string WorkflowName { get; init; }
|
||||
public required string ErrorType { get; init; }
|
||||
public required string Message { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,287 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// ReleaseScheduleHealthCheck.cs
|
||||
// Sprint: SPRINT_20260118_016_Doctor_release_pipeline_health
|
||||
// Task: RELPIPE-005 - Implement ReleaseScheduleHealthCheck
|
||||
// Description: Check health of scheduled releases
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Release.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Checks health of scheduled releases.
|
||||
/// Identifies missed schedules, conflicts, and upcoming releases requiring attention.
|
||||
/// </summary>
|
||||
public sealed class ReleaseScheduleHealthCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.release";
|
||||
private const string CategoryName = "Release Pipeline";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.release.schedule";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Release Schedule Health";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Check health of scheduled releases";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Info;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["release", "schedule", "upcoming", "planning"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var releaseUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"];
|
||||
return !string.IsNullOrEmpty(releaseUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"]
|
||||
?? "http://localhost:5080";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
// Query scheduled releases
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{orchestratorUrl.TrimEnd('/')}/api/v1/releases/scheduled",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot retrieve scheduled releases: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Schedule Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var schedulesJson = await response.Content.ReadAsStringAsync(ct);
|
||||
var schedules = ParseScheduledReleases(schedulesJson);
|
||||
|
||||
var now = context.TimeProvider.GetUtcNow();
|
||||
var upcoming24h = new List<ScheduleInfo>();
|
||||
var missedSchedules = new List<ScheduleInfo>();
|
||||
var conflicts = new List<(ScheduleInfo A, ScheduleInfo B)>();
|
||||
|
||||
foreach (var schedule in schedules)
|
||||
{
|
||||
var timeUntil = schedule.ScheduledAt - now;
|
||||
|
||||
if (timeUntil < TimeSpan.Zero && schedule.Status == "pending")
|
||||
{
|
||||
// Missed schedule
|
||||
missedSchedules.Add(schedule);
|
||||
}
|
||||
else if (timeUntil > TimeSpan.Zero && timeUntil <= TimeSpan.FromHours(24))
|
||||
{
|
||||
upcoming24h.Add(schedule);
|
||||
}
|
||||
}
|
||||
|
||||
// Check for conflicts (same environment within 1 hour)
|
||||
var pendingSchedules = schedules.Where(s => s.Status == "pending").ToList();
|
||||
for (int i = 0; i < pendingSchedules.Count; i++)
|
||||
{
|
||||
for (int j = i + 1; j < pendingSchedules.Count; j++)
|
||||
{
|
||||
var a = pendingSchedules[i];
|
||||
var b = pendingSchedules[j];
|
||||
|
||||
if (a.TargetEnvironment == b.TargetEnvironment &&
|
||||
Math.Abs((a.ScheduledAt - b.ScheduledAt).TotalHours) < 1)
|
||||
{
|
||||
conflicts.Add((a, b));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (missedSchedules.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Fail($"{missedSchedules.Count} scheduled release(s) missed")
|
||||
.WithEvidence("Release Schedule", eb =>
|
||||
{
|
||||
eb.Add("scheduled_release_count", schedules.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("upcoming_24h_count", upcoming24h.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("missed_schedule_count", missedSchedules.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("conflict_count", conflicts.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("missed_releases", string.Join(", ", missedSchedules.Select(s => s.Name)));
|
||||
})
|
||||
.WithCauses(
|
||||
"Release scheduler service not running",
|
||||
"Prerequisite not met at scheduled time",
|
||||
"Environment was unavailable")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "View missed schedules",
|
||||
"stella release schedule list --missed",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "Reschedule or run immediately",
|
||||
$"stella release schedule run {missedSchedules[0].Id}",
|
||||
CommandType.Manual);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (conflicts.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Warn($"{conflicts.Count} schedule conflict(s) detected")
|
||||
.WithEvidence("Release Schedule", eb =>
|
||||
{
|
||||
eb.Add("scheduled_release_count", schedules.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("upcoming_24h_count", upcoming24h.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("missed_schedule_count", "0");
|
||||
eb.Add("conflict_count", conflicts.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("conflicts", string.Join("; ", conflicts.Select(c => $"{c.A.Name} vs {c.B.Name}")));
|
||||
})
|
||||
.WithCauses(
|
||||
"Multiple releases to same environment scheduled too close",
|
||||
"Manual schedule override without checking conflicts")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "View schedule conflicts",
|
||||
"stella release schedule list --conflicts",
|
||||
CommandType.Shell)
|
||||
.AddStep(2, "Reschedule one of the conflicting releases",
|
||||
"stella release schedule update <id> --time <new-time>",
|
||||
CommandType.Manual))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (upcoming24h.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Pass($"{upcoming24h.Count} release(s) scheduled in next 24 hours")
|
||||
.WithEvidence("Release Schedule", eb =>
|
||||
{
|
||||
eb.Add("scheduled_release_count", schedules.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("upcoming_24h_count", upcoming24h.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("missed_schedule_count", "0");
|
||||
eb.Add("conflict_count", "0");
|
||||
eb.Add("upcoming_releases", string.Join(", ", upcoming24h.Select(s =>
|
||||
$"{s.Name}@{s.ScheduledAt:HH:mm}")));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass("No scheduled releases or issues")
|
||||
.WithEvidence("Release Schedule", eb =>
|
||||
{
|
||||
eb.Add("scheduled_release_count", schedules.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("upcoming_24h_count", "0");
|
||||
eb.Add("missed_schedule_count", "0");
|
||||
eb.Add("conflict_count", "0");
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check release schedules: {ex.Message}")
|
||||
.WithEvidence("Schedule Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("error_message", ex.Message);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Schedule check timed out")
|
||||
.WithEvidence("Schedule Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("connection_error_type", "timeout");
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static List<ScheduleInfo> ParseScheduledReleases(string json)
|
||||
{
|
||||
var schedules = new List<ScheduleInfo>();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
|
||||
var schedulesArray = doc.RootElement.ValueKind == JsonValueKind.Array
|
||||
? doc.RootElement
|
||||
: doc.RootElement.TryGetProperty("schedules", out var arr) ? arr : default;
|
||||
|
||||
if (schedulesArray.ValueKind != JsonValueKind.Array)
|
||||
return schedules;
|
||||
|
||||
foreach (var schedule in schedulesArray.EnumerateArray())
|
||||
{
|
||||
var id = schedule.TryGetProperty("id", out var idEl) ? idEl.GetString() : null;
|
||||
var name = schedule.TryGetProperty("name", out var nameEl) ? nameEl.GetString() : null;
|
||||
|
||||
if (string.IsNullOrEmpty(id) || string.IsNullOrEmpty(name))
|
||||
continue;
|
||||
|
||||
var scheduledAt = schedule.TryGetProperty("scheduledAt", out var timeEl) &&
|
||||
DateTimeOffset.TryParse(timeEl.GetString(), out var dt) ? dt : DateTimeOffset.UtcNow;
|
||||
var targetEnv = schedule.TryGetProperty("targetEnvironment", out var envEl) ? envEl.GetString() ?? "" : "";
|
||||
var status = schedule.TryGetProperty("status", out var statusEl) ? statusEl.GetString() ?? "pending" : "pending";
|
||||
|
||||
schedules.Add(new ScheduleInfo
|
||||
{
|
||||
Id = id,
|
||||
Name = name,
|
||||
ScheduledAt = scheduledAt,
|
||||
TargetEnvironment = targetEnv,
|
||||
Status = status
|
||||
});
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best effort parsing
|
||||
}
|
||||
|
||||
return schedules;
|
||||
}
|
||||
|
||||
private sealed record ScheduleInfo
|
||||
{
|
||||
public required string Id { get; init; }
|
||||
public required string Name { get; init; }
|
||||
public required DateTimeOffset ScheduledAt { get; init; }
|
||||
public required string TargetEnvironment { get; init; }
|
||||
public required string Status { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,331 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// RollbackReadinessCheck.cs
|
||||
// Sprint: SPRINT_20260118_016_Doctor_release_pipeline_health
|
||||
// Task: RELPIPE-007 - Implement RollbackReadinessCheck
|
||||
// Description: Check rollback capabilities for environments
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Release.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Checks rollback capabilities for environments.
|
||||
/// Verifies previous deployments are available and health probes are configured.
|
||||
/// </summary>
|
||||
public sealed class RollbackReadinessCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.release";
|
||||
private const string CategoryName = "Release Pipeline";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.release.rollback.readiness";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Rollback Readiness";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Check rollback capabilities for production environments";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["release", "rollback", "disaster-recovery", "production"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(10);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var releaseUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"];
|
||||
return !string.IsNullOrEmpty(releaseUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
|
||||
?? context.Configuration["Release:Orchestrator:Url"]
|
||||
?? "http://localhost:5080";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
// Query environments with rollback status
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{orchestratorUrl.TrimEnd('/')}/api/v1/environments/rollback-status",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
// Try fallback endpoint
|
||||
response = await httpClient.GetAsync(
|
||||
$"{orchestratorUrl.TrimEnd('/')}/api/v1/environments",
|
||||
ct);
|
||||
}
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot retrieve rollback status: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Rollback Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var statusJson = await response.Content.ReadAsStringAsync(ct);
|
||||
var environments = ParseRollbackStatus(statusJson);
|
||||
|
||||
// Focus on production environments
|
||||
var prodEnvs = environments
|
||||
.Where(e => IsProd(e.Type))
|
||||
.ToList();
|
||||
|
||||
if (prodEnvs.Count == 0)
|
||||
{
|
||||
return builder
|
||||
.Pass("No production environments to check")
|
||||
.WithEvidence("Rollback Readiness", eb =>
|
||||
{
|
||||
eb.Add("prod_environment_count", "0");
|
||||
eb.Add("total_environment_count", environments.Count.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
|
||||
var cannotRollback = new List<RollbackInfo>();
|
||||
var noHealthProbe = new List<RollbackInfo>();
|
||||
var noPreviousVersion = new List<RollbackInfo>();
|
||||
|
||||
foreach (var env in prodEnvs)
|
||||
{
|
||||
if (!env.CanRollback)
|
||||
{
|
||||
if (string.IsNullOrEmpty(env.PreviousVersion))
|
||||
{
|
||||
noPreviousVersion.Add(env);
|
||||
}
|
||||
else
|
||||
{
|
||||
cannotRollback.Add(env);
|
||||
}
|
||||
}
|
||||
|
||||
if (!env.HasHealthProbe)
|
||||
{
|
||||
noHealthProbe.Add(env);
|
||||
}
|
||||
}
|
||||
|
||||
var rollbackReady = prodEnvs.Count - cannotRollback.Count - noPreviousVersion.Count;
|
||||
|
||||
if (cannotRollback.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Fail($"{cannotRollback.Count} production environment(s) cannot rollback")
|
||||
.WithEvidence("Rollback Readiness", eb =>
|
||||
{
|
||||
eb.Add("prod_environment_count", prodEnvs.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("rollback_ready_count", rollbackReady.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("cannot_rollback_count", cannotRollback.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("no_health_probe_count", noHealthProbe.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("cannot_rollback_environments", string.Join(", ", cannotRollback.Select(e => e.Name)));
|
||||
if (cannotRollback.Count > 0 && !string.IsNullOrEmpty(cannotRollback[0].RollbackBlocker))
|
||||
{
|
||||
eb.Add("rollback_blocker", cannotRollback[0].RollbackBlocker);
|
||||
}
|
||||
})
|
||||
.WithCauses(
|
||||
"Previous deployment artifacts not retained",
|
||||
"Database migration not reversible",
|
||||
"Breaking change deployed",
|
||||
"Rollback manually disabled")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "View rollback blockers",
|
||||
$"stella env rollback-status {cannotRollback[0].Name}",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "Check deployment history",
|
||||
$"stella env history {cannotRollback[0].Name}",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(3, "Configure artifact retention",
|
||||
"stella config set Release:ArtifactRetention:Count 5",
|
||||
CommandType.Manual);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (noPreviousVersion.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Warn($"{noPreviousVersion.Count} production environment(s) have no previous version")
|
||||
.WithEvidence("Rollback Readiness", eb =>
|
||||
{
|
||||
eb.Add("prod_environment_count", prodEnvs.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("rollback_ready_count", rollbackReady.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("no_previous_version_count", noPreviousVersion.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("no_health_probe_count", noHealthProbe.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("environments_without_previous", string.Join(", ", noPreviousVersion.Select(e => e.Name)));
|
||||
})
|
||||
.WithCauses(
|
||||
"First deployment to environment",
|
||||
"Deployment history cleared",
|
||||
"Environment recently created")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "This is expected for new environments",
|
||||
"# After the next successful deployment, rollback will be available",
|
||||
CommandType.Comment))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (noHealthProbe.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Warn($"{noHealthProbe.Count} production environment(s) missing health probes")
|
||||
.WithEvidence("Rollback Readiness", eb =>
|
||||
{
|
||||
eb.Add("prod_environment_count", prodEnvs.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("rollback_ready_count", rollbackReady.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("no_health_probe_count", noHealthProbe.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("environments_without_probe", string.Join(", ", noHealthProbe.Select(e => e.Name)));
|
||||
})
|
||||
.WithCauses(
|
||||
"Health probe not configured",
|
||||
"Auto-rollback on failure disabled")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "Configure health probe",
|
||||
$"stella env configure {noHealthProbe[0].Name} --health-probe-url <url>",
|
||||
CommandType.Manual);
|
||||
rb.AddStep(2, "Enable auto-rollback",
|
||||
$"stella env configure {noHealthProbe[0].Name} --auto-rollback-on-failure",
|
||||
CommandType.Manual);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"{prodEnvs.Count} production environment(s) ready for rollback")
|
||||
.WithEvidence("Rollback Readiness", eb =>
|
||||
{
|
||||
eb.Add("prod_environment_count", prodEnvs.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("rollback_ready_count", rollbackReady.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("cannot_rollback_count", "0");
|
||||
eb.Add("no_health_probe_count", "0");
|
||||
eb.Add("prod_environments", string.Join(", ", prodEnvs.Select(e => $"{e.Name}:{e.CurrentVersion}")));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check rollback readiness: {ex.Message}")
|
||||
.WithEvidence("Rollback Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("error_message", ex.Message);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Rollback readiness check timed out")
|
||||
.WithEvidence("Rollback Status", eb =>
|
||||
{
|
||||
eb.Add("orchestrator_url", orchestratorUrl);
|
||||
eb.Add("connection_error_type", "timeout");
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static bool IsProd(string envType) =>
|
||||
envType.Equals("prod", StringComparison.OrdinalIgnoreCase) ||
|
||||
envType.Equals("production", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
private static List<RollbackInfo> ParseRollbackStatus(string json)
|
||||
{
|
||||
var envs = new List<RollbackInfo>();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
|
||||
var envsArray = doc.RootElement.ValueKind == JsonValueKind.Array
|
||||
? doc.RootElement
|
||||
: doc.RootElement.TryGetProperty("environments", out var arr) ? arr : default;
|
||||
|
||||
if (envsArray.ValueKind != JsonValueKind.Array)
|
||||
return envs;
|
||||
|
||||
foreach (var env in envsArray.EnumerateArray())
|
||||
{
|
||||
var id = env.TryGetProperty("id", out var idEl) ? idEl.GetString() : null;
|
||||
var name = env.TryGetProperty("name", out var nameEl) ? nameEl.GetString() : null;
|
||||
|
||||
if (string.IsNullOrEmpty(id) || string.IsNullOrEmpty(name))
|
||||
continue;
|
||||
|
||||
var type = env.TryGetProperty("type", out var typeEl) ? typeEl.GetString() ?? "unknown" : "unknown";
|
||||
var canRollback = env.TryGetProperty("canRollback", out var rollbackEl) && rollbackEl.GetBoolean();
|
||||
var previousVersion = env.TryGetProperty("previousVersion", out var prevEl) ? prevEl.GetString() : null;
|
||||
var currentVersion = env.TryGetProperty("currentVersion", out var currEl) ? currEl.GetString() : null;
|
||||
var hasHealthProbe = env.TryGetProperty("hasHealthProbe", out var probeEl) && probeEl.GetBoolean();
|
||||
var rollbackBlocker = env.TryGetProperty("rollbackBlocker", out var blockerEl) ? blockerEl.GetString() : null;
|
||||
|
||||
envs.Add(new RollbackInfo
|
||||
{
|
||||
Id = id,
|
||||
Name = name,
|
||||
Type = type,
|
||||
CanRollback = canRollback,
|
||||
PreviousVersion = previousVersion,
|
||||
CurrentVersion = currentVersion,
|
||||
HasHealthProbe = hasHealthProbe,
|
||||
RollbackBlocker = rollbackBlocker
|
||||
});
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best effort parsing
|
||||
}
|
||||
|
||||
return envs;
|
||||
}
|
||||
|
||||
private sealed record RollbackInfo
|
||||
{
|
||||
public required string Id { get; init; }
|
||||
public required string Name { get; init; }
|
||||
public required string Type { get; init; }
|
||||
public bool CanRollback { get; init; }
|
||||
public string? PreviousVersion { get; init; }
|
||||
public string? CurrentVersion { get; init; }
|
||||
public bool HasHealthProbe { get; init; }
|
||||
public string? RollbackBlocker { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// ReleasePluginServiceCollectionExtensions.cs
|
||||
// Sprint: SPRINT_20260118_016_Doctor_release_pipeline_health
|
||||
// Task: RELPIPE-001 - Create Release plugin scaffold
|
||||
// Description: Extension methods for registering the Release plugin
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Release.DependencyInjection;
|
||||
|
||||
/// <summary>
|
||||
/// Extension methods for registering the Release Doctor plugin.
|
||||
/// </summary>
|
||||
public static class ReleasePluginServiceCollectionExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Adds the Release pipeline health Doctor plugin.
|
||||
/// Provides checks for active releases, promotion gates, environment readiness,
|
||||
/// release schedules, configuration validation, and rollback readiness.
|
||||
/// </summary>
|
||||
/// <param name="services">The service collection.</param>
|
||||
/// <returns>The service collection for chaining.</returns>
|
||||
public static IServiceCollection AddDoctorReleasePlugin(this IServiceCollection services)
|
||||
{
|
||||
services.AddSingleton<IDoctorPlugin, ReleaseDoctorPlugin>();
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// ReleaseDoctorPlugin.cs
|
||||
// Sprint: SPRINT_20260118_016_Doctor_release_pipeline_health
|
||||
// Task: RELPIPE-001 - Create Release plugin scaffold
|
||||
// Description: Doctor plugin for release pipeline health monitoring
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using StellaOps.Doctor.Plugin.Release.Checks;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Release;
|
||||
|
||||
/// <summary>
|
||||
/// Doctor plugin for release pipeline health checks.
|
||||
/// Monitors active releases, promotion gates, environment readiness, and rollback capabilities.
|
||||
/// </summary>
|
||||
public sealed class ReleaseDoctorPlugin : IDoctorPlugin
|
||||
{
|
||||
private static readonly Version PluginVersion = new(1, 0, 0);
|
||||
private static readonly Version MinVersion = new(1, 0, 0);
|
||||
|
||||
/// <inheritdoc />
|
||||
public string PluginId => "stellaops.doctor.release";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string DisplayName => "Release Pipeline";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorCategory Category => DoctorCategory.Release;
|
||||
|
||||
/// <inheritdoc />
|
||||
public Version Version => PluginVersion;
|
||||
|
||||
/// <inheritdoc />
|
||||
public Version MinEngineVersion => MinVersion;
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool IsAvailable(IServiceProvider services)
|
||||
{
|
||||
// Available when ReleaseOrchestrator service is configured
|
||||
// Individual checks handle their own availability based on configuration
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<IDoctorCheck> GetChecks(DoctorPluginContext context)
|
||||
{
|
||||
return new IDoctorCheck[]
|
||||
{
|
||||
new ActiveReleaseHealthCheck(),
|
||||
new PromotionGateHealthCheck(),
|
||||
new EnvironmentReadinessCheck(),
|
||||
new ReleaseScheduleHealthCheck(),
|
||||
new ReleaseConfigurationCheck(),
|
||||
new RollbackReadinessCheck()
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task InitializeAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
// No initialization required - checks are stateless
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,145 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// IReleaseHealthClient.cs
|
||||
// Sprint: SPRINT_20260118_016_Doctor_release_pipeline_health
|
||||
// Task: RELPIPE-001 - Create Release plugin scaffold
|
||||
// Description: Interface for querying release orchestrator health status
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Release.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Client interface for querying release orchestrator health status.
|
||||
/// </summary>
|
||||
public interface IReleaseHealthClient
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets all currently active releases.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<ActiveRelease>> GetActiveReleasesAsync(CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets releases that are stuck or have issues.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<StuckRelease>> GetStuckReleasesAsync(TimeSpan stuckThreshold, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets releases awaiting approval.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<PendingApproval>> GetPendingApprovalsAsync(CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets configured environments and their status.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<EnvironmentStatus>> GetEnvironmentStatusesAsync(CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets promotion gate configurations.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<PromotionGate>> GetPromotionGatesAsync(CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets scheduled releases.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<ScheduledRelease>> GetScheduledReleasesAsync(CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets rollback capabilities for an environment.
|
||||
/// </summary>
|
||||
Task<RollbackStatus> GetRollbackStatusAsync(string environmentId, CancellationToken ct = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents an active release.
|
||||
/// </summary>
|
||||
public sealed record ActiveRelease
|
||||
{
|
||||
public required string Id { get; init; }
|
||||
public required string Name { get; init; }
|
||||
public required string State { get; init; }
|
||||
public required DateTimeOffset StartedAt { get; init; }
|
||||
public string? CurrentStep { get; init; }
|
||||
public string? TargetEnvironment { get; init; }
|
||||
public string? Error { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a release that is stuck or has exceeded expected duration.
|
||||
/// </summary>
|
||||
public sealed record StuckRelease
|
||||
{
|
||||
public required string Id { get; init; }
|
||||
public required string Name { get; init; }
|
||||
public required string State { get; init; }
|
||||
public required TimeSpan StuckDuration { get; init; }
|
||||
public string? FailedStep { get; init; }
|
||||
public string? Error { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a release awaiting approval.
|
||||
/// </summary>
|
||||
public sealed record PendingApproval
|
||||
{
|
||||
public required string ReleaseId { get; init; }
|
||||
public required string ReleaseName { get; init; }
|
||||
public required string ApprovalGate { get; init; }
|
||||
public required DateTimeOffset RequestedAt { get; init; }
|
||||
public IReadOnlyList<string> RequiredApprovers { get; init; } = [];
|
||||
public IReadOnlyList<string> ReceivedApprovals { get; init; } = [];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents the status of a target environment.
|
||||
/// </summary>
|
||||
public sealed record EnvironmentStatus
|
||||
{
|
||||
public required string Id { get; init; }
|
||||
public required string Name { get; init; }
|
||||
public required string Type { get; init; } // dev, staging, prod
|
||||
public required bool IsReachable { get; init; }
|
||||
public required bool IsHealthy { get; init; }
|
||||
public string? CurrentVersion { get; init; }
|
||||
public string? Error { get; init; }
|
||||
public DateTimeOffset? LastHealthCheck { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a promotion gate configuration.
|
||||
/// </summary>
|
||||
public sealed record PromotionGate
|
||||
{
|
||||
public required string Id { get; init; }
|
||||
public required string Name { get; init; }
|
||||
public required string SourceEnvironment { get; init; }
|
||||
public required string TargetEnvironment { get; init; }
|
||||
public required bool RequiresApproval { get; init; }
|
||||
public required bool RequiresAttestations { get; init; }
|
||||
public required bool RequiresPolicyPass { get; init; }
|
||||
public IReadOnlyList<string> RequiredPolicies { get; init; } = [];
|
||||
public IReadOnlyList<string> RequiredAttestations { get; init; } = [];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a scheduled release.
|
||||
/// </summary>
|
||||
public sealed record ScheduledRelease
|
||||
{
|
||||
public required string Id { get; init; }
|
||||
public required string Name { get; init; }
|
||||
public required DateTimeOffset ScheduledAt { get; init; }
|
||||
public required string TargetEnvironment { get; init; }
|
||||
public string? Status { get; init; } // pending, confirmed, cancelled
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents rollback capabilities for an environment.
|
||||
/// </summary>
|
||||
public sealed record RollbackStatus
|
||||
{
|
||||
public required string EnvironmentId { get; init; }
|
||||
public required bool CanRollback { get; init; }
|
||||
public string? PreviousVersion { get; init; }
|
||||
public DateTimeOffset? PreviousDeployedAt { get; init; }
|
||||
public bool HasHealthProbe { get; init; }
|
||||
public string? RollbackBlocker { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<RootNamespace>StellaOps.Doctor.Plugin.Release</RootNamespace>
|
||||
<Description>Release pipeline health checks for Stella Ops Doctor diagnostics</Description>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Doctor\StellaOps.Doctor.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,233 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// ReachabilityComputationHealthCheck.cs
|
||||
// Sprint: SPRINT_20260118_019_Doctor_scanner_reachability_health
|
||||
// Task: SCAN-007 - Implement ReachabilityComputationHealthCheck
|
||||
// Description: Monitor reachability computation health
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Scanner.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Monitors reachability computation health.
|
||||
/// Checks computation success rates, performance, and accuracy.
|
||||
/// </summary>
|
||||
public sealed class ReachabilityComputationHealthCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.scanner";
|
||||
private const string CategoryName = "Scanner & Reachability";
|
||||
private const int ComputationTimeWarningMs = 5000;
|
||||
private const int ComputationTimeCriticalMs = 30000;
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.scanner.reachability";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Reachability Computation Health";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Monitor reachability analysis performance and accuracy";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["scanner", "reachability", "analysis", "performance"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var scannerUrl = context.Configuration["Scanner:Url"]
|
||||
?? context.Configuration["Services:Scanner:Url"];
|
||||
return !string.IsNullOrEmpty(scannerUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var scannerUrl = context.Configuration["Scanner:Url"]
|
||||
?? context.Configuration["Services:Scanner:Url"]
|
||||
?? "http://localhost:5090";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{scannerUrl.TrimEnd('/')}/api/v1/reachability/stats",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot retrieve reachability stats: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Reachability Status", eb =>
|
||||
{
|
||||
eb.Add("scanner_url", scannerUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var json = await response.Content.ReadAsStringAsync(ct);
|
||||
var stats = ParseReachabilityStats(json);
|
||||
|
||||
// Check for computation failures
|
||||
if (stats.ComputationFailures > 0)
|
||||
{
|
||||
var failureRate = stats.TotalComputations > 0
|
||||
? (double)stats.ComputationFailures / stats.TotalComputations
|
||||
: 1.0;
|
||||
|
||||
if (failureRate > 0.1)
|
||||
{
|
||||
return builder
|
||||
.Fail($"Reachability computation failures: {stats.ComputationFailures} ({failureRate:P0})")
|
||||
.WithEvidence("Reachability", eb =>
|
||||
{
|
||||
eb.Add("total_computations", stats.TotalComputations.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("computation_failures", stats.ComputationFailures.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("failure_rate", failureRate.ToString("P1", CultureInfo.InvariantCulture));
|
||||
eb.Add("avg_computation_time_ms", stats.AvgComputationTimeMs.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses(
|
||||
"Invalid call graph",
|
||||
"Missing slice data",
|
||||
"Timeout on large codebases",
|
||||
"Memory exhaustion")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "View computation errors",
|
||||
"stella scanner reachability failures --recent",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "Retry failed computations",
|
||||
"stella scanner reachability retry --failed",
|
||||
CommandType.Manual);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
// Check computation time
|
||||
if (stats.AvgComputationTimeMs >= ComputationTimeCriticalMs)
|
||||
{
|
||||
return builder
|
||||
.Fail($"Reachability computation critically slow: {stats.AvgComputationTimeMs}ms avg")
|
||||
.WithEvidence("Reachability", eb =>
|
||||
{
|
||||
eb.Add("avg_computation_time_ms", stats.AvgComputationTimeMs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("p95_computation_time_ms", stats.P95ComputationTimeMs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("total_computations", stats.TotalComputations.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses(
|
||||
"Large codebases",
|
||||
"Complex call graphs",
|
||||
"Insufficient resources",
|
||||
"Cache misses")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "Warm slice cache",
|
||||
"stella scanner cache warm",
|
||||
CommandType.Manual);
|
||||
rb.AddStep(2, "Scale workers",
|
||||
"stella scanner workers scale --replicas 4",
|
||||
CommandType.Manual);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (stats.AvgComputationTimeMs >= ComputationTimeWarningMs)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Reachability computation slow: {stats.AvgComputationTimeMs}ms avg")
|
||||
.WithEvidence("Reachability", eb =>
|
||||
{
|
||||
eb.Add("avg_computation_time_ms", stats.AvgComputationTimeMs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("total_computations", stats.TotalComputations.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("reachable_vulns", stats.ReachableVulns.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("unreachable_vulns", stats.UnreachableVulns.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses("Performance optimization needed")
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
// Calculate vulnerability filtering effectiveness
|
||||
var totalVulns = stats.ReachableVulns + stats.UnreachableVulns;
|
||||
var filterRate = totalVulns > 0 ? (double)stats.UnreachableVulns / totalVulns : 0;
|
||||
|
||||
return builder
|
||||
.Pass($"Reachability healthy ({stats.AvgComputationTimeMs}ms avg, {filterRate:P0} filtered)")
|
||||
.WithEvidence("Reachability", eb =>
|
||||
{
|
||||
eb.Add("total_computations", stats.TotalComputations.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("avg_computation_time_ms", stats.AvgComputationTimeMs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("reachable_vulns", stats.ReachableVulns.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("unreachable_vulns", stats.UnreachableVulns.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("filter_rate", filterRate.ToString("P0", CultureInfo.InvariantCulture));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check reachability health: {ex.Message}")
|
||||
.WithEvidence("Reachability Status", eb => eb.Add("error_message", ex.Message))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Reachability health check timed out")
|
||||
.WithEvidence("Reachability Status", eb => eb.Add("connection_error_type", "timeout"))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static ReachabilityStats ParseReachabilityStats(string json)
|
||||
{
|
||||
var stats = new ReachabilityStats();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
stats.TotalComputations = doc.RootElement.TryGetProperty("totalComputations", out var tc) ? tc.GetInt32() : 0;
|
||||
stats.ComputationFailures = doc.RootElement.TryGetProperty("computationFailures", out var cf) ? cf.GetInt32() : 0;
|
||||
stats.AvgComputationTimeMs = doc.RootElement.TryGetProperty("avgComputationTimeMs", out var act) ? act.GetInt32() : 0;
|
||||
stats.P95ComputationTimeMs = doc.RootElement.TryGetProperty("p95ComputationTimeMs", out var p95) ? p95.GetInt32() : 0;
|
||||
stats.ReachableVulns = doc.RootElement.TryGetProperty("reachableVulns", out var rv) ? rv.GetInt32() : 0;
|
||||
stats.UnreachableVulns = doc.RootElement.TryGetProperty("unreachableVulns", out var uv) ? uv.GetInt32() : 0;
|
||||
}
|
||||
catch { }
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
private sealed class ReachabilityStats
|
||||
{
|
||||
public int TotalComputations { get; set; }
|
||||
public int ComputationFailures { get; set; }
|
||||
public int AvgComputationTimeMs { get; set; }
|
||||
public int P95ComputationTimeMs { get; set; }
|
||||
public int ReachableVulns { get; set; }
|
||||
public int UnreachableVulns { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,201 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// SbomGenerationHealthCheck.cs
|
||||
// Sprint: SPRINT_20260118_019_Doctor_scanner_reachability_health
|
||||
// Task: SCAN-003 - Implement SbomGenerationHealthCheck
|
||||
// Description: Monitor SBOM generation health
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Scanner.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Monitors SBOM generation health.
|
||||
/// Checks generation success rates, format compliance, and freshness.
|
||||
/// </summary>
|
||||
public sealed class SbomGenerationHealthCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.scanner";
|
||||
private const string CategoryName = "Scanner & Reachability";
|
||||
private const double SuccessRateWarning = 0.95;
|
||||
private const double SuccessRateCritical = 0.80;
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.scanner.sbom";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "SBOM Generation Health";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Monitor SBOM generation health and compliance";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["scanner", "sbom", "cyclonedx", "spdx", "compliance"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var scannerUrl = context.Configuration["Scanner:Url"]
|
||||
?? context.Configuration["Services:Scanner:Url"];
|
||||
return !string.IsNullOrEmpty(scannerUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var scannerUrl = context.Configuration["Scanner:Url"]
|
||||
?? context.Configuration["Services:Scanner:Url"]
|
||||
?? "http://localhost:5090";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{scannerUrl.TrimEnd('/')}/api/v1/sbom/stats",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot retrieve SBOM stats: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("SBOM Status", eb =>
|
||||
{
|
||||
eb.Add("scanner_url", scannerUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var json = await response.Content.ReadAsStringAsync(ct);
|
||||
var stats = ParseSbomStats(json);
|
||||
|
||||
var successRate = stats.TotalGenerated > 0
|
||||
? (double)stats.SuccessfulGenerations / stats.TotalGenerated
|
||||
: 1.0;
|
||||
|
||||
if (successRate < SuccessRateCritical)
|
||||
{
|
||||
return builder
|
||||
.Fail($"SBOM generation success rate critical: {successRate:P0}")
|
||||
.WithEvidence("SBOM Generation", eb =>
|
||||
{
|
||||
eb.Add("total_generated", stats.TotalGenerated.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("successful_generations", stats.SuccessfulGenerations.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("failed_generations", stats.FailedGenerations.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("success_rate", successRate.ToString("P1", CultureInfo.InvariantCulture));
|
||||
eb.Add("format_cyclonedx", stats.CycloneDxCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("format_spdx", stats.SpdxCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("validation_failures", stats.ValidationFailures.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses(
|
||||
"Invalid source artifacts",
|
||||
"Parser errors for specific ecosystems",
|
||||
"Memory exhaustion on large projects",
|
||||
"SBOM schema validation failing")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "View recent failures",
|
||||
"stella scanner sbom failures --recent",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "Retry failed SBOMs",
|
||||
"stella scanner sbom retry --failed",
|
||||
CommandType.Manual);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (successRate < SuccessRateWarning || stats.ValidationFailures > 0)
|
||||
{
|
||||
var issues = new List<string>();
|
||||
if (successRate < SuccessRateWarning) issues.Add($"success rate {successRate:P0}");
|
||||
if (stats.ValidationFailures > 0) issues.Add($"{stats.ValidationFailures} validation failures");
|
||||
|
||||
return builder
|
||||
.Warn($"SBOM generation issues: {string.Join(", ", issues)}")
|
||||
.WithEvidence("SBOM Generation", eb =>
|
||||
{
|
||||
eb.Add("total_generated", stats.TotalGenerated.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("success_rate", successRate.ToString("P1", CultureInfo.InvariantCulture));
|
||||
eb.Add("validation_failures", stats.ValidationFailures.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses("Minor parsing issues", "Occasional format errors")
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"SBOM generation healthy ({stats.TotalGenerated} generated, {successRate:P0} success)")
|
||||
.WithEvidence("SBOM Generation", eb =>
|
||||
{
|
||||
eb.Add("total_generated", stats.TotalGenerated.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("success_rate", successRate.ToString("P1", CultureInfo.InvariantCulture));
|
||||
eb.Add("format_cyclonedx", stats.CycloneDxCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("format_spdx", stats.SpdxCount.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check SBOM health: {ex.Message}")
|
||||
.WithEvidence("SBOM Status", eb => eb.Add("error_message", ex.Message))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("SBOM health check timed out")
|
||||
.WithEvidence("SBOM Status", eb => eb.Add("connection_error_type", "timeout"))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static SbomStats ParseSbomStats(string json)
|
||||
{
|
||||
var stats = new SbomStats();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
stats.TotalGenerated = doc.RootElement.TryGetProperty("totalGenerated", out var tg) ? tg.GetInt32() : 0;
|
||||
stats.SuccessfulGenerations = doc.RootElement.TryGetProperty("successfulGenerations", out var sg) ? sg.GetInt32() : 0;
|
||||
stats.FailedGenerations = doc.RootElement.TryGetProperty("failedGenerations", out var fg) ? fg.GetInt32() : 0;
|
||||
stats.CycloneDxCount = doc.RootElement.TryGetProperty("cycloneDxCount", out var cdx) ? cdx.GetInt32() : 0;
|
||||
stats.SpdxCount = doc.RootElement.TryGetProperty("spdxCount", out var spdx) ? spdx.GetInt32() : 0;
|
||||
stats.ValidationFailures = doc.RootElement.TryGetProperty("validationFailures", out var vf) ? vf.GetInt32() : 0;
|
||||
}
|
||||
catch { }
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
private sealed class SbomStats
|
||||
{
|
||||
public int TotalGenerated { get; set; }
|
||||
public int SuccessfulGenerations { get; set; }
|
||||
public int FailedGenerations { get; set; }
|
||||
public int CycloneDxCount { get; set; }
|
||||
public int SpdxCount { get; set; }
|
||||
public int ValidationFailures { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,232 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// ScannerQueueHealthCheck.cs
|
||||
// Sprint: SPRINT_20260118_019_Doctor_scanner_reachability_health
|
||||
// Task: SCAN-002 - Implement ScannerQueueHealthCheck
|
||||
// Description: Monitor scanner job queue health
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Scanner.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Monitors scanner job queue health.
|
||||
/// Checks queue depth, processing rate, stuck jobs, and backlog growth.
|
||||
/// </summary>
|
||||
public sealed class ScannerQueueHealthCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.scanner";
|
||||
private const string CategoryName = "Scanner & Reachability";
|
||||
private const int QueueDepthWarning = 100;
|
||||
private const int QueueDepthCritical = 500;
|
||||
private const double FailureRateWarning = 0.05;
|
||||
private const double FailureRateCritical = 0.15;
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.scanner.queue";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Scanner Queue Health";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Monitor scanner job queue health";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["scanner", "queue", "jobs", "processing"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var scannerUrl = context.Configuration["Scanner:Url"]
|
||||
?? context.Configuration["Services:Scanner:Url"];
|
||||
return !string.IsNullOrEmpty(scannerUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var scannerUrl = context.Configuration["Scanner:Url"]
|
||||
?? context.Configuration["Services:Scanner:Url"]
|
||||
?? "http://localhost:5090";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{scannerUrl.TrimEnd('/')}/api/v1/queue/stats",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot retrieve queue stats: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Queue Status", eb =>
|
||||
{
|
||||
eb.Add("scanner_url", scannerUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var json = await response.Content.ReadAsStringAsync(ct);
|
||||
var stats = ParseQueueStats(json);
|
||||
|
||||
var failureRate = stats.TotalProcessed > 0
|
||||
? (double)stats.FailedJobs / stats.TotalProcessed
|
||||
: 0;
|
||||
|
||||
// Check for critical conditions
|
||||
if (stats.StuckJobs > 0)
|
||||
{
|
||||
return builder
|
||||
.Fail($"{stats.StuckJobs} stuck job(s) in queue")
|
||||
.WithEvidence("Queue", eb =>
|
||||
{
|
||||
eb.Add("queue_depth", stats.QueueDepth.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("processing_rate_per_min", stats.ProcessingRatePerMin.ToString("F1", CultureInfo.InvariantCulture));
|
||||
eb.Add("stuck_jobs", stats.StuckJobs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("failed_jobs", stats.FailedJobs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("failure_rate", failureRate.ToString("P1", CultureInfo.InvariantCulture));
|
||||
eb.Add("oldest_job_age_min", stats.OldestJobAgeMinutes.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses(
|
||||
"Scanner worker crashed",
|
||||
"Job dependency unavailable",
|
||||
"Resource exhaustion",
|
||||
"Database connection lost")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "View stuck jobs",
|
||||
"stella scanner queue list --status stuck",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "Retry stuck jobs",
|
||||
"stella scanner queue retry --stuck",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(3, "Check worker status",
|
||||
"stella scanner workers status",
|
||||
CommandType.Shell);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (stats.QueueDepth >= QueueDepthCritical || failureRate >= FailureRateCritical)
|
||||
{
|
||||
var issues = new List<string>();
|
||||
if (stats.QueueDepth >= QueueDepthCritical) issues.Add($"queue depth {stats.QueueDepth}");
|
||||
if (failureRate >= FailureRateCritical) issues.Add($"failure rate {failureRate:P0}");
|
||||
|
||||
return builder
|
||||
.Fail($"Scanner queue critical: {string.Join(", ", issues)}")
|
||||
.WithEvidence("Queue", eb =>
|
||||
{
|
||||
eb.Add("queue_depth", stats.QueueDepth.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("processing_rate_per_min", stats.ProcessingRatePerMin.ToString("F1", CultureInfo.InvariantCulture));
|
||||
eb.Add("failure_rate", failureRate.ToString("P1", CultureInfo.InvariantCulture));
|
||||
eb.Add("backlog_growing", stats.BacklogGrowing.ToString().ToLowerInvariant());
|
||||
})
|
||||
.WithCauses("High volume", "Workers overwhelmed", "High error rate")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Scale workers", "stella scanner workers scale --replicas 4", CommandType.Manual))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (stats.QueueDepth >= QueueDepthWarning || failureRate >= FailureRateWarning || stats.BacklogGrowing)
|
||||
{
|
||||
var issues = new List<string>();
|
||||
if (stats.QueueDepth >= QueueDepthWarning) issues.Add($"queue depth {stats.QueueDepth}");
|
||||
if (failureRate >= FailureRateWarning) issues.Add($"failure rate {failureRate:P0}");
|
||||
if (stats.BacklogGrowing) issues.Add("backlog growing");
|
||||
|
||||
return builder
|
||||
.Warn($"Scanner queue warning: {string.Join(", ", issues)}")
|
||||
.WithEvidence("Queue", eb =>
|
||||
{
|
||||
eb.Add("queue_depth", stats.QueueDepth.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("processing_rate_per_min", stats.ProcessingRatePerMin.ToString("F1", CultureInfo.InvariantCulture));
|
||||
eb.Add("failure_rate", failureRate.ToString("P1", CultureInfo.InvariantCulture));
|
||||
eb.Add("backlog_growing", stats.BacklogGrowing.ToString().ToLowerInvariant());
|
||||
})
|
||||
.WithCauses("Processing slower than ingest", "Temporary spike")
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"Scanner queue healthy ({stats.QueueDepth} pending, {stats.ProcessingRatePerMin:F0}/min)")
|
||||
.WithEvidence("Queue", eb =>
|
||||
{
|
||||
eb.Add("queue_depth", stats.QueueDepth.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("processing_rate_per_min", stats.ProcessingRatePerMin.ToString("F1", CultureInfo.InvariantCulture));
|
||||
eb.Add("failure_rate", failureRate.ToString("P1", CultureInfo.InvariantCulture));
|
||||
eb.Add("backlog_growing", "false");
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check scanner queue: {ex.Message}")
|
||||
.WithEvidence("Queue Status", eb => eb.Add("error_message", ex.Message))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Scanner queue check timed out")
|
||||
.WithEvidence("Queue Status", eb => eb.Add("connection_error_type", "timeout"))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static QueueStats ParseQueueStats(string json)
|
||||
{
|
||||
var stats = new QueueStats();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
stats.QueueDepth = doc.RootElement.TryGetProperty("queueDepth", out var qd) ? qd.GetInt32() : 0;
|
||||
stats.ProcessingRatePerMin = doc.RootElement.TryGetProperty("processingRatePerMin", out var pr) ? pr.GetDouble() : 0;
|
||||
stats.StuckJobs = doc.RootElement.TryGetProperty("stuckJobs", out var sj) ? sj.GetInt32() : 0;
|
||||
stats.FailedJobs = doc.RootElement.TryGetProperty("failedJobs", out var fj) ? fj.GetInt32() : 0;
|
||||
stats.TotalProcessed = doc.RootElement.TryGetProperty("totalProcessed", out var tp) ? tp.GetInt32() : 1;
|
||||
stats.BacklogGrowing = doc.RootElement.TryGetProperty("backlogGrowing", out var bg) && bg.GetBoolean();
|
||||
stats.OldestJobAgeMinutes = doc.RootElement.TryGetProperty("oldestJobAgeMinutes", out var oja) ? oja.GetInt32() : 0;
|
||||
}
|
||||
catch { }
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
private sealed class QueueStats
|
||||
{
|
||||
public int QueueDepth { get; set; }
|
||||
public double ProcessingRatePerMin { get; set; }
|
||||
public int StuckJobs { get; set; }
|
||||
public int FailedJobs { get; set; }
|
||||
public int TotalProcessed { get; set; } = 1;
|
||||
public bool BacklogGrowing { get; set; }
|
||||
public int OldestJobAgeMinutes { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,224 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// ScannerResourceUtilizationCheck.cs
|
||||
// Sprint: SPRINT_20260118_019_Doctor_scanner_reachability_health
|
||||
// Task: SCAN-008 - Implement ScannerResourceUtilizationCheck
|
||||
// Description: Monitor scanner resource utilization
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Scanner.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Monitors scanner resource utilization.
|
||||
/// Checks CPU, memory, and worker pool health.
|
||||
/// </summary>
|
||||
public sealed class ScannerResourceUtilizationCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.scanner";
|
||||
private const string CategoryName = "Scanner & Reachability";
|
||||
private const double CpuWarning = 0.75;
|
||||
private const double CpuCritical = 0.90;
|
||||
private const double MemoryWarning = 0.80;
|
||||
private const double MemoryCritical = 0.95;
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.scanner.resources";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Scanner Resource Utilization";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Monitor scanner CPU, memory, and worker health";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["scanner", "resources", "cpu", "memory", "workers"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var scannerUrl = context.Configuration["Scanner:Url"]
|
||||
?? context.Configuration["Services:Scanner:Url"];
|
||||
return !string.IsNullOrEmpty(scannerUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var scannerUrl = context.Configuration["Scanner:Url"]
|
||||
?? context.Configuration["Services:Scanner:Url"]
|
||||
?? "http://localhost:5090";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{scannerUrl.TrimEnd('/')}/api/v1/resources/stats",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot retrieve resource stats: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Resource Status", eb =>
|
||||
{
|
||||
eb.Add("scanner_url", scannerUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var json = await response.Content.ReadAsStringAsync(ct);
|
||||
var stats = ParseResourceStats(json);
|
||||
|
||||
// Check for critical resource issues
|
||||
if (stats.CpuUtilization >= CpuCritical || stats.MemoryUtilization >= MemoryCritical)
|
||||
{
|
||||
var issues = new List<string>();
|
||||
if (stats.CpuUtilization >= CpuCritical) issues.Add($"CPU {stats.CpuUtilization:P0}");
|
||||
if (stats.MemoryUtilization >= MemoryCritical) issues.Add($"Memory {stats.MemoryUtilization:P0}");
|
||||
|
||||
return builder
|
||||
.Fail($"Scanner resources critical: {string.Join(", ", issues)}")
|
||||
.WithEvidence("Resources", eb =>
|
||||
{
|
||||
eb.Add("cpu_utilization", stats.CpuUtilization.ToString("P0", CultureInfo.InvariantCulture));
|
||||
eb.Add("memory_utilization", stats.MemoryUtilization.ToString("P0", CultureInfo.InvariantCulture));
|
||||
eb.Add("memory_used_mb", (stats.MemoryUsedBytes / 1024 / 1024).ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("active_workers", stats.ActiveWorkers.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("total_workers", stats.TotalWorkers.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("idle_workers", stats.IdleWorkers.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses(
|
||||
"High scan volume",
|
||||
"Memory leak",
|
||||
"Large artifacts being processed",
|
||||
"Insufficient resources allocated")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "Scale scanner resources",
|
||||
"stella scanner scale --memory +2G --cpu +2",
|
||||
CommandType.Manual);
|
||||
rb.AddStep(2, "Reduce concurrent jobs",
|
||||
"stella scanner config set MaxConcurrentJobs 2",
|
||||
CommandType.Manual);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
// Check worker pool health
|
||||
if (stats.TotalWorkers > 0 && stats.ActiveWorkers == stats.TotalWorkers && stats.IdleWorkers == 0)
|
||||
{
|
||||
return builder
|
||||
.Warn("All scanner workers are busy")
|
||||
.WithEvidence("Resources", eb =>
|
||||
{
|
||||
eb.Add("active_workers", stats.ActiveWorkers.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("total_workers", stats.TotalWorkers.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("idle_workers", "0");
|
||||
eb.Add("cpu_utilization", stats.CpuUtilization.ToString("P0", CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses("High demand", "Consider scaling")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Scale workers", "stella scanner workers scale --replicas 4", CommandType.Manual))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
// Check for warning-level resource usage
|
||||
if (stats.CpuUtilization >= CpuWarning || stats.MemoryUtilization >= MemoryWarning)
|
||||
{
|
||||
var issues = new List<string>();
|
||||
if (stats.CpuUtilization >= CpuWarning) issues.Add($"CPU {stats.CpuUtilization:P0}");
|
||||
if (stats.MemoryUtilization >= MemoryWarning) issues.Add($"Memory {stats.MemoryUtilization:P0}");
|
||||
|
||||
return builder
|
||||
.Warn($"Scanner resource usage elevated: {string.Join(", ", issues)}")
|
||||
.WithEvidence("Resources", eb =>
|
||||
{
|
||||
eb.Add("cpu_utilization", stats.CpuUtilization.ToString("P0", CultureInfo.InvariantCulture));
|
||||
eb.Add("memory_utilization", stats.MemoryUtilization.ToString("P0", CultureInfo.InvariantCulture));
|
||||
eb.Add("active_workers", stats.ActiveWorkers.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses("Approaching limits")
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"Scanner resources healthy (CPU {stats.CpuUtilization:P0}, Memory {stats.MemoryUtilization:P0})")
|
||||
.WithEvidence("Resources", eb =>
|
||||
{
|
||||
eb.Add("cpu_utilization", stats.CpuUtilization.ToString("P0", CultureInfo.InvariantCulture));
|
||||
eb.Add("memory_utilization", stats.MemoryUtilization.ToString("P0", CultureInfo.InvariantCulture));
|
||||
eb.Add("active_workers", stats.ActiveWorkers.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("total_workers", stats.TotalWorkers.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("idle_workers", stats.IdleWorkers.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check scanner resources: {ex.Message}")
|
||||
.WithEvidence("Resource Status", eb => eb.Add("error_message", ex.Message))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Scanner resource check timed out")
|
||||
.WithEvidence("Resource Status", eb => eb.Add("connection_error_type", "timeout"))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static ResourceStats ParseResourceStats(string json)
|
||||
{
|
||||
var stats = new ResourceStats();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
stats.CpuUtilization = doc.RootElement.TryGetProperty("cpuUtilization", out var cpu) ? cpu.GetDouble() : 0;
|
||||
stats.MemoryUtilization = doc.RootElement.TryGetProperty("memoryUtilization", out var mem) ? mem.GetDouble() : 0;
|
||||
stats.MemoryUsedBytes = doc.RootElement.TryGetProperty("memoryUsedBytes", out var mub) ? mub.GetInt64() : 0;
|
||||
stats.TotalWorkers = doc.RootElement.TryGetProperty("totalWorkers", out var tw) ? tw.GetInt32() : 0;
|
||||
stats.ActiveWorkers = doc.RootElement.TryGetProperty("activeWorkers", out var aw) ? aw.GetInt32() : 0;
|
||||
stats.IdleWorkers = doc.RootElement.TryGetProperty("idleWorkers", out var iw) ? iw.GetInt32() : 0;
|
||||
}
|
||||
catch { }
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
private sealed class ResourceStats
|
||||
{
|
||||
public double CpuUtilization { get; set; }
|
||||
public double MemoryUtilization { get; set; }
|
||||
public long MemoryUsedBytes { get; set; }
|
||||
public int TotalWorkers { get; set; }
|
||||
public int ActiveWorkers { get; set; }
|
||||
public int IdleWorkers { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,234 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// SliceCacheHealthCheck.cs
|
||||
// Sprint: SPRINT_20260118_019_Doctor_scanner_reachability_health
|
||||
// Task: SCAN-006 - Implement SliceCacheHealthCheck
|
||||
// Description: Monitor slice cache health
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Scanner.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Monitors slice cache health.
|
||||
/// Checks cache hit rates, eviction rates, and storage utilization.
|
||||
/// </summary>
|
||||
public sealed class SliceCacheHealthCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.scanner";
|
||||
private const string CategoryName = "Scanner & Reachability";
|
||||
private const double HitRateWarning = 0.50;
|
||||
private const double HitRateCritical = 0.20;
|
||||
private const double StorageWarning = 0.80;
|
||||
private const double StorageCritical = 0.95;
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.scanner.slice.cache";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Slice Cache Health";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Monitor slice cache hit rates and storage";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["scanner", "cache", "slice", "performance"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var scannerUrl = context.Configuration["Scanner:Url"]
|
||||
?? context.Configuration["Services:Scanner:Url"];
|
||||
return !string.IsNullOrEmpty(scannerUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var scannerUrl = context.Configuration["Scanner:Url"]
|
||||
?? context.Configuration["Services:Scanner:Url"]
|
||||
?? "http://localhost:5090";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{scannerUrl.TrimEnd('/')}/api/v1/cache/stats",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot retrieve slice cache stats: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Slice Cache Status", eb =>
|
||||
{
|
||||
eb.Add("scanner_url", scannerUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var json = await response.Content.ReadAsStringAsync(ct);
|
||||
var stats = ParseCacheStats(json);
|
||||
|
||||
// Check storage utilization first
|
||||
if (stats.StorageUtilization >= StorageCritical)
|
||||
{
|
||||
return builder
|
||||
.Fail($"Slice cache storage critical: {stats.StorageUtilization:P0} full")
|
||||
.WithEvidence("Slice Cache", eb =>
|
||||
{
|
||||
eb.Add("storage_utilization", stats.StorageUtilization.ToString("P0", CultureInfo.InvariantCulture));
|
||||
eb.Add("used_bytes", stats.UsedBytes.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("total_bytes", stats.TotalBytes.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("entry_count", stats.EntryCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("eviction_rate", stats.EvictionRatePerHour.ToString("F1", CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses(
|
||||
"Cache size limit too small",
|
||||
"TTL too long",
|
||||
"Eviction not working",
|
||||
"Unexpected growth in slices")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "Clear stale entries",
|
||||
"stella scanner cache prune --stale",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "Increase cache size",
|
||||
"# Update Scanner:Cache:MaxSizeBytes in configuration",
|
||||
CommandType.Manual);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
// Check hit rate
|
||||
var hitRate = stats.Hits + stats.Misses > 0
|
||||
? (double)stats.Hits / (stats.Hits + stats.Misses)
|
||||
: 1.0;
|
||||
|
||||
if (hitRate < HitRateCritical)
|
||||
{
|
||||
return builder
|
||||
.Fail($"Slice cache hit rate critical: {hitRate:P0}")
|
||||
.WithEvidence("Slice Cache", eb =>
|
||||
{
|
||||
eb.Add("hit_rate", hitRate.ToString("P0", CultureInfo.InvariantCulture));
|
||||
eb.Add("hits", stats.Hits.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("misses", stats.Misses.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("eviction_rate", stats.EvictionRatePerHour.ToString("F1", CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses(
|
||||
"Cache size too small",
|
||||
"High eviction rate",
|
||||
"Cache was recently cleared",
|
||||
"Working set larger than cache")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "Increase cache size",
|
||||
"# Update Scanner:Cache:MaxSizeBytes in configuration",
|
||||
CommandType.Manual);
|
||||
rb.AddStep(2, "Warm cache",
|
||||
"stella scanner cache warm",
|
||||
CommandType.Manual);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (stats.StorageUtilization >= StorageWarning || hitRate < HitRateWarning)
|
||||
{
|
||||
var issues = new List<string>();
|
||||
if (stats.StorageUtilization >= StorageWarning) issues.Add($"storage {stats.StorageUtilization:P0}");
|
||||
if (hitRate < HitRateWarning) issues.Add($"hit rate {hitRate:P0}");
|
||||
|
||||
return builder
|
||||
.Warn($"Slice cache: {string.Join(", ", issues)}")
|
||||
.WithEvidence("Slice Cache", eb =>
|
||||
{
|
||||
eb.Add("hit_rate", hitRate.ToString("P0", CultureInfo.InvariantCulture));
|
||||
eb.Add("storage_utilization", stats.StorageUtilization.ToString("P0", CultureInfo.InvariantCulture));
|
||||
eb.Add("entry_count", stats.EntryCount.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses("Approaching limits", "Consider tuning")
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"Slice cache healthy ({hitRate:P0} hit rate, {stats.EntryCount} entries)")
|
||||
.WithEvidence("Slice Cache", eb =>
|
||||
{
|
||||
eb.Add("hit_rate", hitRate.ToString("P0", CultureInfo.InvariantCulture));
|
||||
eb.Add("entry_count", stats.EntryCount.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("storage_utilization", stats.StorageUtilization.ToString("P0", CultureInfo.InvariantCulture));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check slice cache health: {ex.Message}")
|
||||
.WithEvidence("Slice Cache Status", eb => eb.Add("error_message", ex.Message))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Slice cache health check timed out")
|
||||
.WithEvidence("Slice Cache Status", eb => eb.Add("connection_error_type", "timeout"))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static CacheStats ParseCacheStats(string json)
|
||||
{
|
||||
var stats = new CacheStats();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
stats.Hits = doc.RootElement.TryGetProperty("hits", out var h) ? h.GetInt64() : 0;
|
||||
stats.Misses = doc.RootElement.TryGetProperty("misses", out var m) ? m.GetInt64() : 0;
|
||||
stats.EntryCount = doc.RootElement.TryGetProperty("entryCount", out var ec) ? ec.GetInt32() : 0;
|
||||
stats.UsedBytes = doc.RootElement.TryGetProperty("usedBytes", out var ub) ? ub.GetInt64() : 0;
|
||||
stats.TotalBytes = doc.RootElement.TryGetProperty("totalBytes", out var tb) ? tb.GetInt64() : 1;
|
||||
stats.EvictionRatePerHour = doc.RootElement.TryGetProperty("evictionRatePerHour", out var er) ? er.GetDouble() : 0;
|
||||
|
||||
stats.StorageUtilization = stats.TotalBytes > 0 ? (double)stats.UsedBytes / stats.TotalBytes : 0;
|
||||
}
|
||||
catch { }
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
private sealed class CacheStats
|
||||
{
|
||||
public long Hits { get; set; }
|
||||
public long Misses { get; set; }
|
||||
public int EntryCount { get; set; }
|
||||
public long UsedBytes { get; set; }
|
||||
public long TotalBytes { get; set; } = 1;
|
||||
public double StorageUtilization { get; set; }
|
||||
public double EvictionRatePerHour { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,218 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// VulnerabilityScanHealthCheck.cs
|
||||
// Sprint: SPRINT_20260118_019_Doctor_scanner_reachability_health
|
||||
// Task: SCAN-004 - Implement VulnerabilityScanHealthCheck
|
||||
// Description: Monitor vulnerability scanning health
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Scanner.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Monitors vulnerability scanning health.
|
||||
/// Checks scan success rates, database freshness, and match accuracy.
|
||||
/// </summary>
|
||||
public sealed class VulnerabilityScanHealthCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.scanner";
|
||||
private const string CategoryName = "Scanner & Reachability";
|
||||
private const int DbStaleHours = 24;
|
||||
private const int DbCriticalHours = 72;
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.scanner.vuln";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Vulnerability Scan Health";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Monitor vulnerability scanning and database freshness";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["scanner", "vulnerability", "cve", "database"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var scannerUrl = context.Configuration["Scanner:Url"]
|
||||
?? context.Configuration["Services:Scanner:Url"];
|
||||
return !string.IsNullOrEmpty(scannerUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var scannerUrl = context.Configuration["Scanner:Url"]
|
||||
?? context.Configuration["Services:Scanner:Url"]
|
||||
?? "http://localhost:5090";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{scannerUrl.TrimEnd('/')}/api/v1/vuln/stats",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot retrieve vulnerability stats: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Vulnerability Status", eb =>
|
||||
{
|
||||
eb.Add("scanner_url", scannerUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var json = await response.Content.ReadAsStringAsync(ct);
|
||||
var stats = ParseVulnStats(json, context.TimeProvider);
|
||||
|
||||
// Check database freshness first - most critical
|
||||
if (stats.DatabaseAgeHours >= DbCriticalHours)
|
||||
{
|
||||
return builder
|
||||
.Fail($"Vulnerability database critically stale ({stats.DatabaseAgeHours}h old)")
|
||||
.WithEvidence("Vulnerability Scanning", eb =>
|
||||
{
|
||||
eb.Add("database_age_hours", stats.DatabaseAgeHours.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("last_db_update", stats.LastDbUpdate?.ToString("o") ?? "unknown");
|
||||
eb.Add("total_cves", stats.TotalCves.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("scans_completed", stats.ScansCompleted.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses(
|
||||
"Database sync job failed",
|
||||
"Feed source unavailable",
|
||||
"Network connectivity issue")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "Trigger database sync",
|
||||
"stella scanner db sync",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "Check sync status",
|
||||
"stella scanner db status",
|
||||
CommandType.Shell);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (stats.DatabaseAgeHours >= DbStaleHours)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Vulnerability database stale ({stats.DatabaseAgeHours}h old)")
|
||||
.WithEvidence("Vulnerability Scanning", eb =>
|
||||
{
|
||||
eb.Add("database_age_hours", stats.DatabaseAgeHours.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("last_db_update", stats.LastDbUpdate?.ToString("o") ?? "unknown");
|
||||
eb.Add("total_cves", stats.TotalCves.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses("Scheduled sync delayed")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Check sync schedule", "stella scanner db schedule", CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
// Check scan health
|
||||
if (stats.ScanFailures > 0 && stats.ScansCompleted > 0)
|
||||
{
|
||||
var failureRate = (double)stats.ScanFailures / (stats.ScansCompleted + stats.ScanFailures);
|
||||
if (failureRate > 0.1)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Elevated scan failure rate: {failureRate:P0}")
|
||||
.WithEvidence("Vulnerability Scanning", eb =>
|
||||
{
|
||||
eb.Add("scans_completed", stats.ScansCompleted.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("scan_failures", stats.ScanFailures.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("failure_rate", failureRate.ToString("P1", CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses("Parsing errors", "Unsupported formats")
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"Vulnerability scanning healthy (DB {stats.DatabaseAgeHours}h old, {stats.TotalCves} CVEs)")
|
||||
.WithEvidence("Vulnerability Scanning", eb =>
|
||||
{
|
||||
eb.Add("database_age_hours", stats.DatabaseAgeHours.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("total_cves", stats.TotalCves.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("scans_completed", stats.ScansCompleted.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("vulnerabilities_found", stats.VulnerabilitiesFound.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check vulnerability health: {ex.Message}")
|
||||
.WithEvidence("Vulnerability Status", eb => eb.Add("error_message", ex.Message))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Vulnerability health check timed out")
|
||||
.WithEvidence("Vulnerability Status", eb => eb.Add("connection_error_type", "timeout"))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static VulnStats ParseVulnStats(string json, TimeProvider timeProvider)
|
||||
{
|
||||
var stats = new VulnStats();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
|
||||
if (doc.RootElement.TryGetProperty("lastDbUpdate", out var ldu) &&
|
||||
DateTimeOffset.TryParse(ldu.GetString(), out var lastUpdate))
|
||||
{
|
||||
stats.LastDbUpdate = lastUpdate;
|
||||
stats.DatabaseAgeHours = (int)(timeProvider.GetUtcNow() - lastUpdate).TotalHours;
|
||||
}
|
||||
|
||||
stats.TotalCves = doc.RootElement.TryGetProperty("totalCves", out var tc) ? tc.GetInt32() : 0;
|
||||
stats.ScansCompleted = doc.RootElement.TryGetProperty("scansCompleted", out var sc) ? sc.GetInt32() : 0;
|
||||
stats.ScanFailures = doc.RootElement.TryGetProperty("scanFailures", out var sf) ? sf.GetInt32() : 0;
|
||||
stats.VulnerabilitiesFound = doc.RootElement.TryGetProperty("vulnerabilitiesFound", out var vf) ? vf.GetInt32() : 0;
|
||||
}
|
||||
catch { }
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
private sealed class VulnStats
|
||||
{
|
||||
public DateTimeOffset? LastDbUpdate { get; set; }
|
||||
public int DatabaseAgeHours { get; set; }
|
||||
public int TotalCves { get; set; }
|
||||
public int ScansCompleted { get; set; }
|
||||
public int ScanFailures { get; set; }
|
||||
public int VulnerabilitiesFound { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,215 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// WitnessGraphHealthCheck.cs
|
||||
// Sprint: SPRINT_20260118_019_Doctor_scanner_reachability_health
|
||||
// Task: SCAN-005 - Implement WitnessGraphHealthCheck
|
||||
// Description: Monitor witness graph construction health
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Scanner.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Monitors witness graph construction health.
|
||||
/// Checks graph construction success, completeness, and consistency.
|
||||
/// </summary>
|
||||
public sealed class WitnessGraphHealthCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.scanner";
|
||||
private const string CategoryName = "Scanner & Reachability";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.scanner.witness.graph";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Witness Graph Health";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Monitor witness graph construction and integrity";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["scanner", "witness", "graph", "reachability", "evidence"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var scannerUrl = context.Configuration["Scanner:Url"]
|
||||
?? context.Configuration["Services:Scanner:Url"];
|
||||
return !string.IsNullOrEmpty(scannerUrl);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var scannerUrl = context.Configuration["Scanner:Url"]
|
||||
?? context.Configuration["Services:Scanner:Url"]
|
||||
?? "http://localhost:5090";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
var response = await httpClient.GetAsync(
|
||||
$"{scannerUrl.TrimEnd('/')}/api/v1/witness/stats",
|
||||
ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot retrieve witness graph stats: HTTP {(int)response.StatusCode}")
|
||||
.WithEvidence("Witness Graph Status", eb =>
|
||||
{
|
||||
eb.Add("scanner_url", scannerUrl);
|
||||
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var json = await response.Content.ReadAsStringAsync(ct);
|
||||
var stats = ParseWitnessStats(json);
|
||||
|
||||
// Check for construction failures
|
||||
if (stats.ConstructionFailures > 0)
|
||||
{
|
||||
var failureRate = stats.TotalConstructed > 0
|
||||
? (double)stats.ConstructionFailures / (stats.TotalConstructed + stats.ConstructionFailures)
|
||||
: 1.0;
|
||||
|
||||
if (failureRate > 0.1)
|
||||
{
|
||||
return builder
|
||||
.Fail($"Witness graph construction failures: {stats.ConstructionFailures} ({failureRate:P0})")
|
||||
.WithEvidence("Witness Graph", eb =>
|
||||
{
|
||||
eb.Add("total_constructed", stats.TotalConstructed.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("construction_failures", stats.ConstructionFailures.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("failure_rate", failureRate.ToString("P1", CultureInfo.InvariantCulture));
|
||||
eb.Add("incomplete_graphs", stats.IncompleteGraphs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("avg_nodes_per_graph", stats.AvgNodesPerGraph.ToString("F0", CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses(
|
||||
"Missing SBOM input",
|
||||
"Parser error on artifact",
|
||||
"Cyclical dependency detected",
|
||||
"Resource exhaustion")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "View construction errors",
|
||||
"stella scanner witness failures --recent",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "Rebuild failed graphs",
|
||||
"stella scanner witness rebuild --failed",
|
||||
CommandType.Manual);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
// Check for incomplete graphs
|
||||
if (stats.IncompleteGraphs > 0)
|
||||
{
|
||||
return builder
|
||||
.Warn($"{stats.IncompleteGraphs} incomplete witness graph(s)")
|
||||
.WithEvidence("Witness Graph", eb =>
|
||||
{
|
||||
eb.Add("total_constructed", stats.TotalConstructed.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("incomplete_graphs", stats.IncompleteGraphs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("avg_completeness", stats.AvgCompleteness.ToString("P0", CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses("Partial SBOM data", "Missing dependencies")
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
// Check consistency
|
||||
if (stats.ConsistencyErrors > 0)
|
||||
{
|
||||
return builder
|
||||
.Warn($"{stats.ConsistencyErrors} graph consistency error(s)")
|
||||
.WithEvidence("Witness Graph", eb =>
|
||||
{
|
||||
eb.Add("total_constructed", stats.TotalConstructed.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("consistency_errors", stats.ConsistencyErrors.ToString(CultureInfo.InvariantCulture));
|
||||
})
|
||||
.WithCauses("Version mismatch", "Orphaned nodes")
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"Witness graph healthy ({stats.TotalConstructed} graphs, avg {stats.AvgNodesPerGraph:F0} nodes)")
|
||||
.WithEvidence("Witness Graph", eb =>
|
||||
{
|
||||
eb.Add("total_constructed", stats.TotalConstructed.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("avg_nodes_per_graph", stats.AvgNodesPerGraph.ToString("F0", CultureInfo.InvariantCulture));
|
||||
eb.Add("avg_edges_per_graph", stats.AvgEdgesPerGraph.ToString("F0", CultureInfo.InvariantCulture));
|
||||
eb.Add("avg_completeness", stats.AvgCompleteness.ToString("P0", CultureInfo.InvariantCulture));
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot check witness graph health: {ex.Message}")
|
||||
.WithEvidence("Witness Graph Status", eb => eb.Add("error_message", ex.Message))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Witness graph health check timed out")
|
||||
.WithEvidence("Witness Graph Status", eb => eb.Add("connection_error_type", "timeout"))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static WitnessStats ParseWitnessStats(string json)
|
||||
{
|
||||
var stats = new WitnessStats();
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
stats.TotalConstructed = doc.RootElement.TryGetProperty("totalConstructed", out var tc) ? tc.GetInt32() : 0;
|
||||
stats.ConstructionFailures = doc.RootElement.TryGetProperty("constructionFailures", out var cf) ? cf.GetInt32() : 0;
|
||||
stats.IncompleteGraphs = doc.RootElement.TryGetProperty("incompleteGraphs", out var ig) ? ig.GetInt32() : 0;
|
||||
stats.ConsistencyErrors = doc.RootElement.TryGetProperty("consistencyErrors", out var ce) ? ce.GetInt32() : 0;
|
||||
stats.AvgNodesPerGraph = doc.RootElement.TryGetProperty("avgNodesPerGraph", out var an) ? an.GetDouble() : 0;
|
||||
stats.AvgEdgesPerGraph = doc.RootElement.TryGetProperty("avgEdgesPerGraph", out var ae) ? ae.GetDouble() : 0;
|
||||
stats.AvgCompleteness = doc.RootElement.TryGetProperty("avgCompleteness", out var ac) ? ac.GetDouble() : 1.0;
|
||||
}
|
||||
catch { }
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
private sealed class WitnessStats
|
||||
{
|
||||
public int TotalConstructed { get; set; }
|
||||
public int ConstructionFailures { get; set; }
|
||||
public int IncompleteGraphs { get; set; }
|
||||
public int ConsistencyErrors { get; set; }
|
||||
public double AvgNodesPerGraph { get; set; }
|
||||
public double AvgEdgesPerGraph { get; set; }
|
||||
public double AvgCompleteness { get; set; } = 1.0;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// ScannerPluginServiceCollectionExtensions.cs
|
||||
// Sprint: SPRINT_20260118_019_Doctor_scanner_reachability_health
|
||||
// Task: SCAN-001 - Create Scanner plugin scaffold
|
||||
// Description: Extension methods for registering the Scanner plugin
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Scanner.DependencyInjection;
|
||||
|
||||
/// <summary>
|
||||
/// Extension methods for registering the Scanner Doctor plugin.
|
||||
/// </summary>
|
||||
public static class ScannerPluginServiceCollectionExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Adds the Scanner and Reachability health Doctor plugin.
|
||||
/// Provides checks for SBOM, vulnerabilities, witness graph, and slice cache.
|
||||
/// </summary>
|
||||
/// <param name="services">The service collection.</param>
|
||||
/// <returns>The service collection for chaining.</returns>
|
||||
public static IServiceCollection AddDoctorScannerPlugin(this IServiceCollection services)
|
||||
{
|
||||
services.AddSingleton<IDoctorPlugin, ScannerDoctorPlugin>();
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// ScannerDoctorPlugin.cs
|
||||
// Sprint: SPRINT_20260118_019_Doctor_scanner_reachability_health
|
||||
// Task: SCAN-001 - Create Scanner plugin scaffold
|
||||
// Description: Doctor plugin for scanner and reachability analysis health
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using StellaOps.Doctor.Plugin.Scanner.Checks;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Scanner;
|
||||
|
||||
/// <summary>
|
||||
/// Doctor plugin for scanner and reachability health checks.
|
||||
/// Monitors SBOM generation, vulnerability scanning, witness graphs, and slice cache.
|
||||
/// </summary>
|
||||
public sealed class ScannerDoctorPlugin : IDoctorPlugin
|
||||
{
|
||||
private static readonly Version PluginVersion = new(1, 0, 0);
|
||||
private static readonly Version MinVersion = new(1, 0, 0);
|
||||
|
||||
/// <inheritdoc />
|
||||
public string PluginId => "stellaops.doctor.scanner";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string DisplayName => "Scanner & Reachability";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorCategory Category => DoctorCategory.Scanner;
|
||||
|
||||
/// <inheritdoc />
|
||||
public Version Version => PluginVersion;
|
||||
|
||||
/// <inheritdoc />
|
||||
public Version MinEngineVersion => MinVersion;
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool IsAvailable(IServiceProvider services)
|
||||
{
|
||||
// Available when scanner is configured
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<IDoctorCheck> GetChecks(DoctorPluginContext context)
|
||||
{
|
||||
return new IDoctorCheck[]
|
||||
{
|
||||
new ScannerQueueHealthCheck(),
|
||||
new SbomGenerationHealthCheck(),
|
||||
new VulnerabilityScanHealthCheck(),
|
||||
new WitnessGraphHealthCheck(),
|
||||
new SliceCacheHealthCheck(),
|
||||
new ReachabilityComputationHealthCheck(),
|
||||
new ScannerResourceUtilizationCheck()
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task InitializeAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<RootNamespace>StellaOps.Doctor.Plugin.Scanner</RootNamespace>
|
||||
<Description>Scanner and reachability health checks for Stella Ops Doctor diagnostics</Description>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Doctor\StellaOps.Doctor.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
Reference in New Issue
Block a user