doctor enhancements, setup, enhancements, ui functionality and design consolidation and , test projects fixes , product advisory attestation/rekor and delta verfications enhancements

This commit is contained in:
master
2026-01-19 09:02:59 +02:00
parent 8c4bf54aed
commit 17419ba7c4
809 changed files with 170738 additions and 12244 deletions

View File

@@ -1,12 +1,14 @@
// -----------------------------------------------------------------------------
// RekorClockSkewCheck.cs
// Sprint: SPRINT_20260117_001_ATTESTOR_periodic_rekor_verification
// Task: PRV-006 - Doctor check for clock skew
// Description: Checks if system clock is synchronized for attestation validity
// Sprint: SPRINT_20260118_015_Doctor_check_quality_improvements
// Task: DQUAL-004 - Add discriminating evidence to RekorClockSkewCheck
// Description: Checks if system clock is synchronized with NTP status and VM detection
// -----------------------------------------------------------------------------
using System.Diagnostics;
using System.Globalization;
using System.Net.Http;
using System.Runtime.InteropServices;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
@@ -15,6 +17,7 @@ namespace StellaOps.Doctor.Plugin.Attestor.Checks;
/// <summary>
/// Checks if system clock is synchronized with Rekor for attestation validity.
/// Includes NTP daemon status, VM detection, and discriminating evidence for root cause analysis.
/// </summary>
public sealed class RekorClockSkewCheck : IDoctorCheck
{
@@ -49,6 +52,10 @@ public sealed class RekorClockSkewCheck : IDoctorCheck
{
var builder = context.CreateResult(CheckId, "stellaops.doctor.attestor", "Attestor");
// Collect NTP and VM status for discriminating evidence
var ntpStatus = await GetNtpStatusAsync(ct);
var vmStatus = DetectVirtualMachine();
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
@@ -65,9 +72,29 @@ public sealed class RekorClockSkewCheck : IDoctorCheck
if (!response.IsSuccessStatusCode)
{
return builder
.Skip("Could not reach time reference server")
.WithEvidence("Clock check", eb => eb
.Add("Note", "Rekor unavailable; cannot verify clock skew"))
.Warn("Could not reach time reference server")
.WithEvidence("Clock check", eb =>
{
eb.Add("local_time_utc", context.TimeProvider.GetUtcNow().ToString("o"));
eb.Add("server_time_utc", "unavailable");
eb.Add("skew_seconds", "unknown");
eb.Add("ntp_daemon_running", ntpStatus.DaemonRunning.ToString().ToLowerInvariant());
eb.Add("ntp_daemon_type", ntpStatus.DaemonType);
eb.Add("ntp_servers_configured", string.Join(", ", ntpStatus.ServersConfigured));
eb.Add("last_sync_time_utc", ntpStatus.LastSyncTime?.ToString("o") ?? "null");
eb.Add("sync_age_seconds", ntpStatus.SyncAgeSeconds?.ToString(CultureInfo.InvariantCulture) ?? "null");
eb.Add("is_virtual_machine", vmStatus.IsVirtualMachine.ToString().ToLowerInvariant());
eb.Add("vm_clock_sync_enabled", vmStatus.ClockSyncEnabled.ToString().ToLowerInvariant());
eb.Add("connection_error_type", "server_unreachable");
})
.WithCauses(
"Rekor server unreachable",
"Network connectivity issue")
.WithRemediation(rb => rb
.AddStep(1, "Check network connectivity",
$"curl -s {rekorUrl}/api/v1/log",
CommandType.Shell))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
@@ -81,65 +108,511 @@ public sealed class RekorClockSkewCheck : IDoctorCheck
{
return builder
.Skip("Server did not return Date header")
.WithEvidence("Clock check", eb => eb
.Add("Note", "Cannot determine server time"))
.WithEvidence("Clock check", eb =>
{
eb.Add("local_time_utc", context.TimeProvider.GetUtcNow().ToString("o"));
eb.Add("server_time_utc", "not_provided");
eb.Add("note", "Cannot determine server time");
})
.Build();
}
var localTime = context.TimeProvider.GetUtcNow();
var skew = Math.Abs((localTime - serverTime).TotalSeconds);
var skew = (localTime - serverTime).TotalSeconds;
var absSkew = Math.Abs(skew);
if (skew <= MaxSkewSeconds)
if (absSkew <= MaxSkewSeconds)
{
return builder
.Pass($"System clock synchronized (skew: {skew:F1}s)")
.WithEvidence("Clock status", eb => eb
.Add("LocalTime", localTime.ToString("o"))
.Add("ServerTime", serverTime.ToString("o"))
.Add("SkewSeconds", skew.ToString("F1", CultureInfo.InvariantCulture))
.Add("MaxAllowedSkew", $"{MaxSkewSeconds}s"))
.Pass($"System clock synchronized (skew: {absSkew:F1}s)")
.WithEvidence("Clock status", eb =>
{
eb.Add("local_time_utc", localTime.ToString("o"));
eb.Add("server_time_utc", serverTime.ToString("o"));
eb.Add("skew_seconds", skew.ToString("F2", CultureInfo.InvariantCulture));
eb.Add("max_allowed_skew", MaxSkewSeconds.ToString(CultureInfo.InvariantCulture));
eb.Add("ntp_daemon_running", ntpStatus.DaemonRunning.ToString().ToLowerInvariant());
eb.Add("ntp_daemon_type", ntpStatus.DaemonType);
eb.Add("ntp_servers_configured", string.Join(", ", ntpStatus.ServersConfigured));
eb.Add("last_sync_time_utc", ntpStatus.LastSyncTime?.ToString("o") ?? "null");
eb.Add("sync_age_seconds", ntpStatus.SyncAgeSeconds?.ToString(CultureInfo.InvariantCulture) ?? "null");
eb.Add("is_virtual_machine", vmStatus.IsVirtualMachine.ToString().ToLowerInvariant());
eb.Add("vm_clock_sync_enabled", vmStatus.ClockSyncEnabled.ToString().ToLowerInvariant());
})
.Build();
}
// Build discriminating remediation based on evidence
return builder
.Fail($"System clock skew ({skew:F1}s) exceeds {MaxSkewSeconds}s threshold")
.WithEvidence("Clock status", eb => eb
.Add("LocalTime", localTime.ToString("o"))
.Add("ServerTime", serverTime.ToString("o"))
.Add("SkewSeconds", skew.ToString("F1", CultureInfo.InvariantCulture))
.Add("MaxAllowedSkew", $"{MaxSkewSeconds}s"))
.Fail($"System clock skew ({absSkew:F1}s) exceeds {MaxSkewSeconds}s threshold")
.WithEvidence("Clock status", eb =>
{
eb.Add("local_time_utc", localTime.ToString("o"));
eb.Add("server_time_utc", serverTime.ToString("o"));
eb.Add("skew_seconds", skew.ToString("F2", CultureInfo.InvariantCulture));
eb.Add("max_allowed_skew", MaxSkewSeconds.ToString(CultureInfo.InvariantCulture));
eb.Add("ntp_daemon_running", ntpStatus.DaemonRunning.ToString().ToLowerInvariant());
eb.Add("ntp_daemon_type", ntpStatus.DaemonType);
eb.Add("ntp_servers_configured", string.Join(", ", ntpStatus.ServersConfigured));
eb.Add("last_sync_time_utc", ntpStatus.LastSyncTime?.ToString("o") ?? "null");
eb.Add("sync_age_seconds", ntpStatus.SyncAgeSeconds?.ToString(CultureInfo.InvariantCulture) ?? "null");
eb.Add("is_virtual_machine", vmStatus.IsVirtualMachine.ToString().ToLowerInvariant());
eb.Add("vm_type", vmStatus.VmType);
eb.Add("vm_clock_sync_enabled", vmStatus.ClockSyncEnabled.ToString().ToLowerInvariant());
})
.WithCauses(
"NTP service not running",
"NTP server unreachable",
"System clock manually set incorrectly",
"Virtual machine clock drift")
.WithRemediation(rb => rb
.AddStep(1, "Check NTP status",
"timedatectl status",
CommandType.Shell)
.AddStep(2, "Enable NTP synchronization",
"sudo timedatectl set-ntp true",
CommandType.Shell)
.AddStep(3, "Force immediate sync (if using chronyd)",
"sudo chronyc -a makestep",
CommandType.Shell)
.AddStep(4, "Force immediate sync (if using ntpd)",
"sudo ntpdate -u pool.ntp.org",
CommandType.Shell))
.WithRemediation(rb => BuildPlatformSpecificRemediation(rb, ntpStatus, vmStatus))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (Exception ex)
catch (HttpRequestException ex)
{
return builder
.Warn($"Could not verify clock skew: {ex.Message}")
.WithEvidence("Clock check", eb => eb
.Add("Error", ex.Message)
.Add("Note", "Using local time only"))
.WithEvidence("Clock check", eb =>
{
eb.Add("local_time_utc", context.TimeProvider.GetUtcNow().ToString("o"));
eb.Add("error_message", ex.Message);
eb.Add("connection_error_type", GetConnectionErrorType(ex));
eb.Add("ntp_daemon_running", ntpStatus.DaemonRunning.ToString().ToLowerInvariant());
eb.Add("ntp_daemon_type", ntpStatus.DaemonType);
eb.Add("is_virtual_machine", vmStatus.IsVirtualMachine.ToString().ToLowerInvariant());
})
.WithCauses(
"Network connectivity issue",
"Reference server unavailable")
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Clock skew check timed out")
.WithEvidence("Clock check", eb =>
{
eb.Add("local_time_utc", context.TimeProvider.GetUtcNow().ToString("o"));
eb.Add("error_message", "Request timed out");
eb.Add("connection_error_type", "timeout");
eb.Add("timeout_seconds", "5");
})
.WithCauses("Network latency too high", "Reference server overloaded")
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static void BuildPlatformSpecificRemediation(RemediationBuilder rb, NtpStatus ntpStatus, VmStatus vmStatus)
{
if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
{
if (!ntpStatus.DaemonRunning)
{
rb.AddStep(1, "Start NTP service",
ntpStatus.DaemonType switch
{
"chronyd" => "sudo systemctl start chronyd",
"ntpd" => "sudo systemctl start ntpd",
_ => "sudo systemctl start systemd-timesyncd"
},
CommandType.Shell);
}
else
{
rb.AddStep(1, "Check NTP status",
"timedatectl status",
CommandType.Shell);
}
rb.AddStep(2, "Enable NTP synchronization",
"sudo timedatectl set-ntp true",
CommandType.Shell);
if (ntpStatus.DaemonType == "chronyd")
{
rb.AddStep(3, "Force immediate sync",
"sudo chronyc -a makestep",
CommandType.Shell);
}
else
{
rb.AddStep(3, "Force immediate sync",
"sudo ntpdate -u pool.ntp.org",
CommandType.Shell);
}
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
if (!ntpStatus.DaemonRunning)
{
rb.AddStep(1, "Start Windows Time service",
"net start w32time",
CommandType.Shell);
}
else
{
rb.AddStep(1, "Check Windows Time status",
"w32tm /query /status",
CommandType.Shell);
}
rb.AddStep(2, "Force time resync",
"w32tm /resync /nowait",
CommandType.Shell);
if (vmStatus.IsVirtualMachine && !vmStatus.ClockSyncEnabled)
{
rb.AddStep(3, "Enable VM time sync",
"Enable time synchronization in Hyper-V Integration Services or VMware Tools",
CommandType.Manual);
}
}
else
{
rb.AddStep(1, "Sync system clock",
"Consult your OS documentation for NTP configuration",
CommandType.Manual);
}
}
private static async Task<NtpStatus> GetNtpStatusAsync(CancellationToken ct)
{
var status = new NtpStatus();
try
{
if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
{
// Check for various NTP daemons
status.DaemonType = await DetectLinuxNtpDaemonAsync(ct);
status.DaemonRunning = await IsLinuxServiceRunningAsync(status.DaemonType, ct);
// Try to get NTP servers from configuration
status.ServersConfigured = await GetLinuxNtpServersAsync(status.DaemonType, ct);
// Get last sync time if available
var syncInfo = await GetLinuxSyncInfoAsync(status.DaemonType, ct);
status.LastSyncTime = syncInfo.LastSync;
status.SyncAgeSeconds = syncInfo.SyncAge;
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
status.DaemonType = "w32time";
status.DaemonRunning = await IsWindowsTimeServiceRunningAsync(ct);
status.ServersConfigured = await GetWindowsNtpServersAsync(ct);
}
else
{
status.DaemonType = "unknown";
}
}
catch
{
// Best effort - don't fail the check if we can't determine NTP status
}
return status;
}
private static async Task<string> DetectLinuxNtpDaemonAsync(CancellationToken ct)
{
// Check for common NTP daemons in priority order
var daemons = new[] { "chronyd", "ntpd", "systemd-timesyncd" };
foreach (var daemon in daemons)
{
if (await IsLinuxServiceRunningAsync(daemon, ct))
{
return daemon;
}
}
return "unknown";
}
private static async Task<bool> IsLinuxServiceRunningAsync(string serviceName, CancellationToken ct)
{
try
{
using var process = new Process
{
StartInfo = new ProcessStartInfo
{
FileName = "systemctl",
Arguments = $"is-active {serviceName}",
RedirectStandardOutput = true,
UseShellExecute = false,
CreateNoWindow = true
}
};
process.Start();
var output = await process.StandardOutput.ReadToEndAsync(ct);
await process.WaitForExitAsync(ct);
return output.Trim().Equals("active", StringComparison.OrdinalIgnoreCase);
}
catch
{
return false;
}
}
private static async Task<List<string>> GetLinuxNtpServersAsync(string daemonType, CancellationToken ct)
{
var servers = new List<string>();
try
{
if (daemonType == "chronyd")
{
using var process = new Process
{
StartInfo = new ProcessStartInfo
{
FileName = "chronyc",
Arguments = "sources -n",
RedirectStandardOutput = true,
UseShellExecute = false,
CreateNoWindow = true
}
};
process.Start();
var output = await process.StandardOutput.ReadToEndAsync(ct);
await process.WaitForExitAsync(ct);
foreach (var line in output.Split('\n').Skip(3))
{
var parts = line.Split(' ', StringSplitOptions.RemoveEmptyEntries);
if (parts.Length > 1)
{
servers.Add(parts[1]);
}
}
}
}
catch
{
// Best effort
}
return servers.Count > 0 ? servers : ["pool.ntp.org"];
}
private static async Task<(DateTimeOffset? LastSync, int? SyncAge)> GetLinuxSyncInfoAsync(string daemonType, CancellationToken ct)
{
try
{
using var process = new Process
{
StartInfo = new ProcessStartInfo
{
FileName = "timedatectl",
Arguments = "show --property=NTPSynchronized",
RedirectStandardOutput = true,
UseShellExecute = false,
CreateNoWindow = true
}
};
process.Start();
var output = await process.StandardOutput.ReadToEndAsync(ct);
await process.WaitForExitAsync(ct);
if (output.Contains("yes", StringComparison.OrdinalIgnoreCase))
{
return (DateTimeOffset.UtcNow, 0);
}
}
catch
{
// Best effort
}
return (null, null);
}
private static async Task<bool> IsWindowsTimeServiceRunningAsync(CancellationToken ct)
{
try
{
using var process = new Process
{
StartInfo = new ProcessStartInfo
{
FileName = "sc",
Arguments = "query w32time",
RedirectStandardOutput = true,
UseShellExecute = false,
CreateNoWindow = true
}
};
process.Start();
var output = await process.StandardOutput.ReadToEndAsync(ct);
await process.WaitForExitAsync(ct);
return output.Contains("RUNNING", StringComparison.OrdinalIgnoreCase);
}
catch
{
return false;
}
}
private static async Task<List<string>> GetWindowsNtpServersAsync(CancellationToken ct)
{
var servers = new List<string>();
try
{
using var process = new Process
{
StartInfo = new ProcessStartInfo
{
FileName = "w32tm",
Arguments = "/query /peers",
RedirectStandardOutput = true,
UseShellExecute = false,
CreateNoWindow = true
}
};
process.Start();
var output = await process.StandardOutput.ReadToEndAsync(ct);
await process.WaitForExitAsync(ct);
foreach (var line in output.Split('\n'))
{
if (line.Contains("Peer:", StringComparison.OrdinalIgnoreCase))
{
var parts = line.Split(':');
if (parts.Length > 1)
{
servers.Add(parts[1].Trim());
}
}
}
}
catch
{
// Best effort
}
return servers.Count > 0 ? servers : ["time.windows.com"];
}
private static VmStatus DetectVirtualMachine()
{
var status = new VmStatus();
try
{
if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
{
// Check for VM indicators
var dmidecodeVendor = "";
try
{
if (File.Exists("/sys/class/dmi/id/sys_vendor"))
{
dmidecodeVendor = File.ReadAllText("/sys/class/dmi/id/sys_vendor").Trim().ToLowerInvariant();
}
}
catch { }
if (dmidecodeVendor.Contains("vmware"))
{
status.IsVirtualMachine = true;
status.VmType = "vmware";
status.ClockSyncEnabled = File.Exists("/usr/bin/vmware-toolbox-cmd");
}
else if (dmidecodeVendor.Contains("microsoft"))
{
status.IsVirtualMachine = true;
status.VmType = "hyper-v";
status.ClockSyncEnabled = Directory.Exists("/sys/bus/vmbus");
}
else if (dmidecodeVendor.Contains("qemu") || dmidecodeVendor.Contains("kvm"))
{
status.IsVirtualMachine = true;
status.VmType = "kvm";
}
else if (dmidecodeVendor.Contains("xen"))
{
status.IsVirtualMachine = true;
status.VmType = "xen";
}
else if (File.Exists("/proc/1/cgroup"))
{
var cgroup = File.ReadAllText("/proc/1/cgroup");
if (cgroup.Contains("docker") || cgroup.Contains("containerd"))
{
status.IsVirtualMachine = true;
status.VmType = "container";
status.ClockSyncEnabled = true; // Containers use host clock
}
}
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
// Check Windows VM indicators via WMI or registry
var manufacturer = Environment.GetEnvironmentVariable("COMPUTERNAME_MANUFACTURER") ?? "";
if (manufacturer.Contains("VMware", StringComparison.OrdinalIgnoreCase) ||
manufacturer.Contains("Microsoft", StringComparison.OrdinalIgnoreCase) ||
manufacturer.Contains("Xen", StringComparison.OrdinalIgnoreCase))
{
status.IsVirtualMachine = true;
status.VmType = manufacturer.ToLowerInvariant();
}
// Check for Hyper-V
if (Environment.GetEnvironmentVariable("PROCESSOR_IDENTIFIER")?.Contains("Virtual", StringComparison.OrdinalIgnoreCase) == true)
{
status.IsVirtualMachine = true;
status.VmType = "hyper-v";
}
}
}
catch
{
// Best effort
}
return status;
}
private static string GetConnectionErrorType(HttpRequestException ex)
{
var message = ex.Message.ToLowerInvariant();
if (message.Contains("ssl") || message.Contains("tls") || message.Contains("certificate"))
return "ssl_error";
if (message.Contains("name") || message.Contains("dns") || message.Contains("resolve"))
return "dns_failure";
if (message.Contains("refused") || message.Contains("actively refused"))
return "refused";
if (message.Contains("timeout"))
return "timeout";
return "connection_failed";
}
private sealed class NtpStatus
{
public bool DaemonRunning { get; set; }
public string DaemonType { get; set; } = "unknown";
public List<string> ServersConfigured { get; set; } = [];
public DateTimeOffset? LastSyncTime { get; set; }
public int? SyncAgeSeconds { get; set; }
}
private sealed class VmStatus
{
public bool IsVirtualMachine { get; set; }
public string VmType { get; set; } = "none";
public bool ClockSyncEnabled { get; set; }
}
}

View File

@@ -100,9 +100,9 @@ public sealed class TransparencyLogConsistencyCheck : IDoctorCheck
.Add("CheckpointPath", checkpointPath)
.Add("Error", "Failed to parse checkpoint JSON"))
.WithRemediation(rb => rb
.AddStep(1, "Remove corrupted checkpoint",
.AddDestructiveStep(1, "Remove corrupted checkpoint",
$"rm {checkpointPath}",
CommandType.Shell)
$"cat {checkpointPath}")
.AddStep(2, "Trigger re-sync",
"stella attestor transparency sync",
CommandType.Shell))
@@ -181,9 +181,9 @@ public sealed class TransparencyLogConsistencyCheck : IDoctorCheck
.AddStep(3, "Check stored checkpoint",
$"cat {checkpointPath} | jq .",
CommandType.Shell)
.AddStep(4, "If using wrong log, reset checkpoint",
.AddDestructiveStep(4, "If using wrong log, reset checkpoint (DESTRUCTIVE)",
$"rm {checkpointPath} && stella attestor transparency sync",
CommandType.Shell))
$"ls -la {checkpointPath}"))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}

View File

@@ -1,11 +1,15 @@
// -----------------------------------------------------------------------------
// OidcProviderConnectivityCheck.cs
// Sprint: SPRINT_20260117_016_CLI_auth_access
// Task: AAC-006 - Doctor checks for auth configuration
// Description: Health check for OIDC provider connectivity
// Sprint: SPRINT_20260118_015_Doctor_check_quality_improvements
// Task: DQUAL-002 - Replace OidcProviderConnectivityCheck mock implementation
// Description: Health check for OIDC provider connectivity with real HTTP calls
// -----------------------------------------------------------------------------
using System.Diagnostics;
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
@@ -16,6 +20,11 @@ namespace StellaOps.Doctor.Plugin.Auth.Checks;
/// </summary>
public sealed class OidcProviderConnectivityCheck : IDoctorCheck
{
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
{
PropertyNameCaseInsensitive = true
};
/// <inheritdoc />
public string CheckId => "check.auth.oidc";
@@ -37,7 +46,11 @@ public sealed class OidcProviderConnectivityCheck : IDoctorCheck
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
return true;
// Check if external OIDC provider is configured
var issuerUrl = context.Configuration["Authentication:Oidc:Issuer"]
?? context.Configuration["Auth:Oidc:Authority"]
?? context.Configuration["Oidc:Issuer"];
return !string.IsNullOrEmpty(issuerUrl);
}
/// <inheritdoc />
@@ -45,29 +58,35 @@ public sealed class OidcProviderConnectivityCheck : IDoctorCheck
{
var builder = context.CreateResult(CheckId, "stellaops.doctor.auth", "Auth & Access Control");
var oidcStatus = await CheckOidcProviderAsync(context, ct);
var issuerUrl = context.Configuration["Authentication:Oidc:Issuer"]
?? context.Configuration["Auth:Oidc:Authority"]
?? context.Configuration["Oidc:Issuer"];
if (!oidcStatus.IsConfigured)
if (string.IsNullOrEmpty(issuerUrl))
{
return builder
.Pass("No external OIDC provider configured (using local authority)")
.WithEvidence("OIDC Status", eb =>
{
eb.Add("ExternalProvider", "NOT CONFIGURED");
eb.Add("LocalAuthority", "ACTIVE");
eb.Add("external_provider", "NOT_CONFIGURED");
eb.Add("local_authority", "ACTIVE");
})
.Build();
}
var oidcStatus = await CheckOidcProviderAsync(context, issuerUrl, ct);
if (!oidcStatus.IsReachable)
{
return builder
.Fail($"Cannot reach OIDC provider at {oidcStatus.ProviderUrl}")
.Fail($"Cannot reach OIDC provider at {issuerUrl}")
.WithEvidence("OIDC Status", eb =>
{
eb.Add("ProviderUrl", oidcStatus.ProviderUrl ?? "not set");
eb.Add("Reachable", "NO");
eb.Add("Error", oidcStatus.Error ?? "Connection failed");
eb.Add("issuer_url", issuerUrl);
eb.Add("discovery_reachable", "false");
eb.Add("http_status_code", oidcStatus.HttpStatusCode?.ToString() ?? "null");
eb.Add("error_message", oidcStatus.Error ?? "Connection failed");
eb.Add("connection_error_type", oidcStatus.ConnectionErrorType ?? "unknown");
})
.WithCauses(
"OIDC provider is down",
@@ -76,9 +95,12 @@ public sealed class OidcProviderConnectivityCheck : IDoctorCheck
"DNS resolution failure")
.WithRemediation(rb => rb
.AddStep(1, "Test provider connectivity",
"stella auth oidc test",
$"curl -s {issuerUrl}/.well-known/openid-configuration",
CommandType.Shell)
.AddStep(2, "Check network configuration",
.AddStep(2, "Check DNS resolution",
$"nslookup {new Uri(issuerUrl).Host}",
CommandType.Shell)
.AddStep(3, "Check network configuration",
"stella doctor --check check.network.dns",
CommandType.Shell))
.WithVerification($"stella doctor --check {CheckId}")
@@ -91,10 +113,13 @@ public sealed class OidcProviderConnectivityCheck : IDoctorCheck
.Warn("OIDC discovery document has issues")
.WithEvidence("OIDC Status", eb =>
{
eb.Add("ProviderUrl", oidcStatus.ProviderUrl ?? "not set");
eb.Add("Reachable", "YES");
eb.Add("DiscoveryValid", "PARTIAL");
eb.Add("Warning", oidcStatus.DiscoveryWarning ?? "");
eb.Add("issuer_url", issuerUrl);
eb.Add("discovery_reachable", "true");
eb.Add("discovery_response_ms", oidcStatus.DiscoveryResponseMs.ToString(CultureInfo.InvariantCulture));
eb.Add("authorization_endpoint_present", oidcStatus.AuthorizationEndpointPresent.ToString().ToLowerInvariant());
eb.Add("token_endpoint_present", oidcStatus.TokenEndpointPresent.ToString().ToLowerInvariant());
eb.Add("jwks_uri_present", oidcStatus.JwksUriPresent.ToString().ToLowerInvariant());
eb.Add("error_message", oidcStatus.DiscoveryWarning ?? "");
})
.WithCauses(
"Discovery document missing required fields",
@@ -102,34 +127,179 @@ public sealed class OidcProviderConnectivityCheck : IDoctorCheck
"JWKS endpoint issues")
.WithRemediation(rb => rb
.AddStep(1, "Validate discovery document",
$"curl -s {issuerUrl}/.well-known/openid-configuration | jq .",
CommandType.Shell)
.AddStep(2, "Check OIDC provider configuration",
"stella auth oidc validate",
CommandType.Shell))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (oidcStatus.JwksKeyCount == 0)
{
return builder
.Warn("JWKS has no keys - token validation may fail")
.WithEvidence("OIDC Status", eb =>
{
eb.Add("issuer_url", issuerUrl);
eb.Add("discovery_reachable", "true");
eb.Add("discovery_response_ms", oidcStatus.DiscoveryResponseMs.ToString(CultureInfo.InvariantCulture));
eb.Add("authorization_endpoint_present", "true");
eb.Add("token_endpoint_present", "true");
eb.Add("jwks_uri_present", "true");
eb.Add("jwks_key_count", "0");
eb.Add("jwks_fetch_ms", oidcStatus.JwksFetchMs.ToString(CultureInfo.InvariantCulture));
})
.WithCauses(
"JWKS endpoint returned empty key set",
"Key rotation in progress",
"OIDC provider misconfiguration")
.WithRemediation(rb => rb
.AddStep(1, "Check JWKS endpoint",
$"curl -s {oidcStatus.JwksUri} | jq .",
CommandType.Shell))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass("OIDC provider is reachable and configured correctly")
.WithEvidence("OIDC Status", eb =>
{
eb.Add("ProviderUrl", oidcStatus.ProviderUrl ?? "not set");
eb.Add("Reachable", "YES");
eb.Add("DiscoveryValid", "YES");
eb.Add("ResponseTimeMs", oidcStatus.ResponseTimeMs.ToString(CultureInfo.InvariantCulture));
eb.Add("issuer_url", issuerUrl);
eb.Add("discovery_reachable", "true");
eb.Add("discovery_response_ms", oidcStatus.DiscoveryResponseMs.ToString(CultureInfo.InvariantCulture));
eb.Add("authorization_endpoint_present", "true");
eb.Add("token_endpoint_present", "true");
eb.Add("jwks_uri_present", "true");
eb.Add("jwks_key_count", oidcStatus.JwksKeyCount.ToString(CultureInfo.InvariantCulture));
eb.Add("jwks_fetch_ms", oidcStatus.JwksFetchMs.ToString(CultureInfo.InvariantCulture));
})
.Build();
}
private Task<OidcStatus> CheckOidcProviderAsync(DoctorPluginContext context, CancellationToken ct)
private async Task<OidcStatus> CheckOidcProviderAsync(DoctorPluginContext context, string issuerUrl, CancellationToken ct)
{
return Task.FromResult(new OidcStatus
var result = new OidcStatus { ProviderUrl = issuerUrl };
try
{
IsConfigured = true,
ProviderUrl = "https://auth.example.com",
IsReachable = true,
DiscoveryValid = true,
ResponseTimeMs = 85
});
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(10);
// Fetch discovery document
var discoveryUrl = $"{issuerUrl.TrimEnd('/')}/.well-known/openid-configuration";
var discoveryStopwatch = Stopwatch.StartNew();
HttpResponseMessage discoveryResponse;
try
{
discoveryResponse = await httpClient.GetAsync(discoveryUrl, ct);
}
catch (HttpRequestException ex)
{
result.IsReachable = false;
result.Error = ex.Message;
result.ConnectionErrorType = GetConnectionErrorType(ex);
return result;
}
catch (TaskCanceledException)
{
result.IsReachable = false;
result.Error = "Request timed out";
result.ConnectionErrorType = "timeout";
return result;
}
discoveryStopwatch.Stop();
result.DiscoveryResponseMs = discoveryStopwatch.ElapsedMilliseconds;
result.HttpStatusCode = (int)discoveryResponse.StatusCode;
if (!discoveryResponse.IsSuccessStatusCode)
{
result.IsReachable = true;
result.DiscoveryValid = false;
result.DiscoveryWarning = $"Discovery endpoint returned HTTP {(int)discoveryResponse.StatusCode}";
return result;
}
result.IsReachable = true;
// Parse discovery document
var discoveryJson = await discoveryResponse.Content.ReadAsStringAsync(ct);
using var discoveryDoc = JsonDocument.Parse(discoveryJson);
var root = discoveryDoc.RootElement;
// Check required endpoints
result.AuthorizationEndpointPresent = root.TryGetProperty("authorization_endpoint", out _);
result.TokenEndpointPresent = root.TryGetProperty("token_endpoint", out _);
result.JwksUriPresent = root.TryGetProperty("jwks_uri", out var jwksUriElement);
if (!result.AuthorizationEndpointPresent || !result.TokenEndpointPresent || !result.JwksUriPresent)
{
result.DiscoveryValid = false;
var missing = new List<string>();
if (!result.AuthorizationEndpointPresent) missing.Add("authorization_endpoint");
if (!result.TokenEndpointPresent) missing.Add("token_endpoint");
if (!result.JwksUriPresent) missing.Add("jwks_uri");
result.DiscoveryWarning = $"Missing required fields: {string.Join(", ", missing)}";
return result;
}
result.DiscoveryValid = true;
result.JwksUri = jwksUriElement.GetString();
// Fetch JWKS
if (!string.IsNullOrEmpty(result.JwksUri))
{
var jwksStopwatch = Stopwatch.StartNew();
try
{
var jwksResponse = await httpClient.GetAsync(result.JwksUri, ct);
jwksStopwatch.Stop();
result.JwksFetchMs = jwksStopwatch.ElapsedMilliseconds;
if (jwksResponse.IsSuccessStatusCode)
{
var jwksJson = await jwksResponse.Content.ReadAsStringAsync(ct);
using var jwksDoc = JsonDocument.Parse(jwksJson);
if (jwksDoc.RootElement.TryGetProperty("keys", out var keysArray) && keysArray.ValueKind == JsonValueKind.Array)
{
result.JwksKeyCount = keysArray.GetArrayLength();
}
}
}
catch
{
// JWKS fetch failed but discovery worked
result.JwksKeyCount = 0;
}
}
return result;
}
catch (Exception ex)
{
result.IsReachable = false;
result.Error = ex.Message;
return result;
}
}
private static string GetConnectionErrorType(HttpRequestException ex)
{
var message = ex.Message.ToLowerInvariant();
if (message.Contains("ssl") || message.Contains("tls") || message.Contains("certificate"))
return "ssl_error";
if (message.Contains("name") || message.Contains("dns") || message.Contains("resolve"))
return "dns_failure";
if (message.Contains("refused") || message.Contains("actively refused"))
return "refused";
if (message.Contains("timeout"))
return "timeout";
return "connection_failed";
}
private sealed class OidcStatus
@@ -139,7 +309,15 @@ public sealed class OidcProviderConnectivityCheck : IDoctorCheck
public bool IsReachable { get; set; }
public bool DiscoveryValid { get; set; }
public string? Error { get; set; }
public string? ConnectionErrorType { get; set; }
public string? DiscoveryWarning { get; set; }
public long ResponseTimeMs { get; set; }
public long DiscoveryResponseMs { get; set; }
public int? HttpStatusCode { get; set; }
public bool AuthorizationEndpointPresent { get; set; }
public bool TokenEndpointPresent { get; set; }
public bool JwksUriPresent { get; set; }
public string? JwksUri { get; set; }
public int JwksKeyCount { get; set; }
public long JwksFetchMs { get; set; }
}
}

View File

@@ -0,0 +1,216 @@
// -----------------------------------------------------------------------------
// AttestationSigningHealthCheck.cs
// Sprint: SPRINT_20260118_024_Doctor_evidence_compliance_health
// Task: COMPL-003 - Implement AttestationSigningHealthCheck
// Description: Monitor attestation signing capability
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Compliance.Checks;
/// <summary>
/// Monitors attestation signing health and key availability.
/// </summary>
public sealed class AttestationSigningHealthCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.compliance";
private const string CategoryName = "Compliance";
/// <inheritdoc />
public string CheckId => "check.compliance.attestation-signing";
/// <inheritdoc />
public string Name => "Attestation Signing Health";
/// <inheritdoc />
public string Description => "Monitor signing key availability and attestation capability";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Fail;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["compliance", "attestation", "signing", "crypto"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var attestorUrl = context.Configuration["Attestor:Url"]
?? context.Configuration["Services:Attestor:Url"];
return !string.IsNullOrEmpty(attestorUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var attestorUrl = context.Configuration["Attestor:Url"]
?? context.Configuration["Services:Attestor:Url"]
?? "http://localhost:5082";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(10);
var response = await httpClient.GetAsync(
$"{attestorUrl.TrimEnd('/')}/api/v1/signing/status",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Fail($"Cannot retrieve signing status: HTTP {(int)response.StatusCode}")
.WithEvidence("Signing Status", eb =>
{
eb.Add("attestor_url", attestorUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithCauses("Attestor service unavailable", "Authentication failure")
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var json = await response.Content.ReadAsStringAsync(ct);
var status = ParseSigningStatus(json);
// Check key availability
if (!status.KeyAvailable)
{
return builder
.Fail("Signing key not available - cannot create attestations")
.WithEvidence("Signing Status", eb =>
{
eb.Add("key_available", "false");
eb.Add("key_type", status.KeyType ?? "unknown");
eb.Add("last_error", status.LastError ?? "none");
})
.WithCauses(
"HSM/KMS connectivity issue",
"Key rotation in progress",
"Key expired or revoked",
"Permission denied")
.WithRemediation(rb =>
{
rb.AddStep(1, "Check key status",
"stella attestor key status",
CommandType.Stella);
rb.AddStep(2, "Verify HSM/KMS connectivity",
"stella attestor hsm test",
CommandType.Stella);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
// Check key expiration
if (status.KeyExpiresAt.HasValue)
{
var daysUntilExpiry = (status.KeyExpiresAt.Value - DateTimeOffset.UtcNow).TotalDays;
if (daysUntilExpiry <= 0)
{
return builder
.Fail("Signing key has expired")
.WithEvidence("Signing Status", eb =>
{
eb.Add("key_expired", "true");
eb.Add("expired_at", status.KeyExpiresAt.Value.ToString("o", CultureInfo.InvariantCulture));
})
.WithCauses("Key not rotated before expiry")
.WithRemediation(rb => rb
.AddStep(1, "Rotate signing key", "stella attestor key rotate", CommandType.Stella))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (daysUntilExpiry <= 30)
{
return builder
.Warn($"Signing key expires in {daysUntilExpiry:F0} days")
.WithEvidence("Signing Status", eb =>
{
eb.Add("key_available", "true");
eb.Add("days_until_expiry", daysUntilExpiry.ToString("F0", CultureInfo.InvariantCulture));
eb.Add("expires_at", status.KeyExpiresAt.Value.ToString("o", CultureInfo.InvariantCulture));
})
.WithCauses("Key approaching end of validity")
.WithRemediation(rb => rb
.AddStep(1, "Schedule key rotation", "stella attestor key rotate --schedule", CommandType.Stella))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
return builder
.Pass($"Signing healthy ({status.KeyType}, {status.SignaturesLast24h} signatures in 24h)")
.WithEvidence("Signing Status", eb =>
{
eb.Add("key_available", "true");
eb.Add("key_type", status.KeyType ?? "unknown");
eb.Add("signatures_24h", status.SignaturesLast24h.ToString(CultureInfo.InvariantCulture));
if (status.KeyExpiresAt.HasValue)
eb.Add("expires_at", status.KeyExpiresAt.Value.ToString("o", CultureInfo.InvariantCulture));
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Fail($"Cannot check signing health: {ex.Message}")
.WithEvidence("Signing Status", eb => eb.Add("error_message", ex.Message))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Signing health check timed out")
.WithEvidence("Signing Status", eb => eb.Add("connection_error_type", "timeout"))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static SigningStatus ParseSigningStatus(string json)
{
var status = new SigningStatus();
try
{
using var doc = JsonDocument.Parse(json);
status.KeyAvailable = doc.RootElement.TryGetProperty("keyAvailable", out var ka) && ka.GetBoolean();
status.KeyType = doc.RootElement.TryGetProperty("keyType", out var kt) ? kt.GetString() : null;
status.SignaturesLast24h = doc.RootElement.TryGetProperty("signaturesLast24h", out var s24) ? s24.GetInt32() : 0;
status.LastError = doc.RootElement.TryGetProperty("lastError", out var le) ? le.GetString() : null;
if (doc.RootElement.TryGetProperty("keyExpiresAt", out var ke) &&
DateTimeOffset.TryParse(ke.GetString(), out var expiresAt))
{
status.KeyExpiresAt = expiresAt;
}
}
catch { }
return status;
}
private sealed class SigningStatus
{
public bool KeyAvailable { get; set; }
public string? KeyType { get; set; }
public int SignaturesLast24h { get; set; }
public DateTimeOffset? KeyExpiresAt { get; set; }
public string? LastError { get; set; }
}
}

View File

@@ -0,0 +1,196 @@
// -----------------------------------------------------------------------------
// AuditReadinessCheck.cs
// Sprint: SPRINT_20260118_024_Doctor_evidence_compliance_health
// Task: COMPL-005 - Implement AuditReadinessCheck
// Description: Verify system is ready for compliance audits
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Compliance.Checks;
/// <summary>
/// Verifies that the system is ready for compliance audits.
/// Checks evidence availability, retention policies, and audit trails.
/// </summary>
public sealed class AuditReadinessCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.compliance";
private const string CategoryName = "Compliance";
/// <inheritdoc />
public string CheckId => "check.compliance.audit-readiness";
/// <inheritdoc />
public string Name => "Audit Readiness";
/// <inheritdoc />
public string Description => "Verify system is ready for compliance audits";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["compliance", "audit", "evidence"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(10);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var evidenceUrl = context.Configuration["EvidenceLocker:Url"]
?? context.Configuration["Services:EvidenceLocker:Url"];
return !string.IsNullOrEmpty(evidenceUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var evidenceUrl = context.Configuration["EvidenceLocker:Url"]
?? context.Configuration["Services:EvidenceLocker:Url"]
?? "http://localhost:5080";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(15);
var response = await httpClient.GetAsync(
$"{evidenceUrl.TrimEnd('/')}/api/v1/evidence/audit-readiness",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot check audit readiness: HTTP {(int)response.StatusCode}")
.WithEvidence("Audit Readiness", eb =>
{
eb.Add("evidence_locker_url", evidenceUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var json = await response.Content.ReadAsStringAsync(ct);
var status = ParseAuditStatus(json);
var issues = new List<string>();
if (!status.RetentionPolicyConfigured)
issues.Add("No retention policy configured");
if (!status.AuditLogEnabled)
issues.Add("Audit logging disabled");
if (!status.BackupVerified)
issues.Add("Backup not verified");
if (status.OldestEvidenceAge < status.RequiredRetentionDays)
issues.Add($"Evidence retention {status.OldestEvidenceAge}d < required {status.RequiredRetentionDays}d");
if (issues.Count >= 3)
{
return builder
.Fail($"Audit readiness critical: {issues.Count} issues")
.WithEvidence("Audit Readiness", eb =>
{
eb.Add("issues_count", issues.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("retention_policy_configured", status.RetentionPolicyConfigured.ToString().ToLowerInvariant());
eb.Add("audit_log_enabled", status.AuditLogEnabled.ToString().ToLowerInvariant());
eb.Add("backup_verified", status.BackupVerified.ToString().ToLowerInvariant());
eb.Add("evidence_count", status.EvidenceCount.ToString(CultureInfo.InvariantCulture));
})
.WithCauses(issues.ToArray())
.WithRemediation(rb =>
{
rb.AddStep(1, "Configure retention policy",
"stella evidence retention set --days 365",
CommandType.Stella);
rb.AddStep(2, "Enable audit logging",
"stella audit enable",
CommandType.Stella);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (issues.Count > 0)
{
return builder
.Warn($"Audit readiness issues: {string.Join(", ", issues)}")
.WithEvidence("Audit Readiness", eb =>
{
eb.Add("issues_count", issues.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("evidence_count", status.EvidenceCount.ToString(CultureInfo.InvariantCulture));
})
.WithCauses(issues.ToArray())
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass($"Audit ready ({status.EvidenceCount} records, {status.OldestEvidenceAge}d retention)")
.WithEvidence("Audit Readiness", eb =>
{
eb.Add("evidence_count", status.EvidenceCount.ToString(CultureInfo.InvariantCulture));
eb.Add("oldest_evidence_days", status.OldestEvidenceAge.ToString(CultureInfo.InvariantCulture));
eb.Add("retention_policy_configured", "true");
eb.Add("audit_log_enabled", "true");
eb.Add("backup_verified", "true");
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check audit readiness: {ex.Message}")
.WithEvidence("Audit Status", eb => eb.Add("error_message", ex.Message))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Audit readiness check timed out")
.WithEvidence("Audit Status", eb => eb.Add("connection_error_type", "timeout"))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static AuditStatus ParseAuditStatus(string json)
{
var status = new AuditStatus();
try
{
using var doc = JsonDocument.Parse(json);
status.RetentionPolicyConfigured = doc.RootElement.TryGetProperty("retentionPolicyConfigured", out var rpc) && rpc.GetBoolean();
status.AuditLogEnabled = doc.RootElement.TryGetProperty("auditLogEnabled", out var ale) && ale.GetBoolean();
status.BackupVerified = doc.RootElement.TryGetProperty("backupVerified", out var bv) && bv.GetBoolean();
status.EvidenceCount = doc.RootElement.TryGetProperty("evidenceCount", out var ec) ? ec.GetInt32() : 0;
status.OldestEvidenceAge = doc.RootElement.TryGetProperty("oldestEvidenceAgeDays", out var oea) ? oea.GetInt32() : 0;
status.RequiredRetentionDays = doc.RootElement.TryGetProperty("requiredRetentionDays", out var rrd) ? rrd.GetInt32() : 365;
}
catch { }
return status;
}
private sealed class AuditStatus
{
public bool RetentionPolicyConfigured { get; set; }
public bool AuditLogEnabled { get; set; }
public bool BackupVerified { get; set; }
public int EvidenceCount { get; set; }
public int OldestEvidenceAge { get; set; }
public int RequiredRetentionDays { get; set; }
}
}

View File

@@ -0,0 +1,191 @@
// -----------------------------------------------------------------------------
// ComplianceFrameworkCheck.cs
// Sprint: SPRINT_20260118_024_Doctor_evidence_compliance_health
// Task: COMPL-007 - Implement ComplianceFrameworkCheck
// Description: Verify compliance framework requirements are met
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Compliance.Checks;
/// <summary>
/// Verifies that configured compliance framework requirements are met.
/// Supports SOC2, FedRAMP, HIPAA, PCI-DSS, and custom frameworks.
/// </summary>
public sealed class ComplianceFrameworkCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.compliance";
private const string CategoryName = "Compliance";
/// <inheritdoc />
public string CheckId => "check.compliance.framework";
/// <inheritdoc />
public string Name => "Compliance Framework";
/// <inheritdoc />
public string Description => "Verify compliance framework requirements are met";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["compliance", "framework", "soc2", "fedramp"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(10);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var frameworks = context.Configuration["Compliance:Frameworks"];
return !string.IsNullOrEmpty(frameworks);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var policyUrl = context.Configuration["Policy:Url"]
?? context.Configuration["Services:Policy:Url"]
?? "http://localhost:5050";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(15);
var response = await httpClient.GetAsync(
$"{policyUrl.TrimEnd('/')}/api/v1/compliance/status",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot check compliance status: HTTP {(int)response.StatusCode}")
.WithEvidence("Compliance Status", eb =>
{
eb.Add("policy_url", policyUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var json = await response.Content.ReadAsStringAsync(ct);
var status = ParseComplianceStatus(json);
if (status.FailingControls > 0)
{
return builder
.Fail($"{status.FailingControls} compliance controls failing ({status.Framework})")
.WithEvidence("Compliance Status", eb =>
{
eb.Add("framework", status.Framework);
eb.Add("total_controls", status.TotalControls.ToString(CultureInfo.InvariantCulture));
eb.Add("passing_controls", status.PassingControls.ToString(CultureInfo.InvariantCulture));
eb.Add("failing_controls", status.FailingControls.ToString(CultureInfo.InvariantCulture));
eb.Add("compliance_score", status.ComplianceScore.ToString("P0", CultureInfo.InvariantCulture));
if (status.FirstFailingControl != null)
eb.Add("first_failing_control", status.FirstFailingControl);
})
.WithCauses(
"Control requirements not implemented",
"Evidence gaps",
"Policy violations detected",
"Configuration drift from baseline")
.WithRemediation(rb =>
{
rb.AddStep(1, "List failing controls",
"stella compliance audit --failing",
CommandType.Stella);
rb.AddStep(2, "Review remediation guidance",
"stella compliance remediate --plan",
CommandType.Stella);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (status.ComplianceScore < 1.0)
{
return builder
.Warn($"Compliance score {status.ComplianceScore:P0} ({status.Framework})")
.WithEvidence("Compliance Status", eb =>
{
eb.Add("framework", status.Framework);
eb.Add("compliance_score", status.ComplianceScore.ToString("P0", CultureInfo.InvariantCulture));
eb.Add("passing_controls", status.PassingControls.ToString(CultureInfo.InvariantCulture));
eb.Add("total_controls", status.TotalControls.ToString(CultureInfo.InvariantCulture));
})
.WithCauses("Some controls not fully implemented")
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass($"Compliance healthy ({status.Framework}: {status.PassingControls}/{status.TotalControls} controls)")
.WithEvidence("Compliance Status", eb =>
{
eb.Add("framework", status.Framework);
eb.Add("compliance_score", status.ComplianceScore.ToString("P0", CultureInfo.InvariantCulture));
eb.Add("passing_controls", status.PassingControls.ToString(CultureInfo.InvariantCulture));
eb.Add("total_controls", status.TotalControls.ToString(CultureInfo.InvariantCulture));
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check compliance: {ex.Message}")
.WithEvidence("Compliance Status", eb => eb.Add("error_message", ex.Message))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Compliance check timed out")
.WithEvidence("Compliance Status", eb => eb.Add("connection_error_type", "timeout"))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static ComplianceStatus ParseComplianceStatus(string json)
{
var status = new ComplianceStatus();
try
{
using var doc = JsonDocument.Parse(json);
status.Framework = doc.RootElement.TryGetProperty("framework", out var f) ? f.GetString() ?? "unknown" : "unknown";
status.TotalControls = doc.RootElement.TryGetProperty("totalControls", out var tc) ? tc.GetInt32() : 0;
status.PassingControls = doc.RootElement.TryGetProperty("passingControls", out var pc) ? pc.GetInt32() : 0;
status.FailingControls = doc.RootElement.TryGetProperty("failingControls", out var fc) ? fc.GetInt32() : 0;
status.ComplianceScore = doc.RootElement.TryGetProperty("complianceScore", out var cs) ? cs.GetDouble() : 0;
status.FirstFailingControl = doc.RootElement.TryGetProperty("firstFailingControl", out var ffc) ? ffc.GetString() : null;
}
catch { }
return status;
}
private sealed class ComplianceStatus
{
public string Framework { get; set; } = "unknown";
public int TotalControls { get; set; }
public int PassingControls { get; set; }
public int FailingControls { get; set; }
public double ComplianceScore { get; set; }
public string? FirstFailingControl { get; set; }
}
}

View File

@@ -0,0 +1,198 @@
// -----------------------------------------------------------------------------
// EvidenceExportReadinessCheck.cs
// Sprint: SPRINT_20260118_024_Doctor_evidence_compliance_health
// Task: COMPL-008 - Implement EvidenceExportReadinessCheck
// Description: Verify evidence can be exported for auditors
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Compliance.Checks;
/// <summary>
/// Verifies that evidence can be exported in auditor-ready formats.
/// </summary>
public sealed class EvidenceExportReadinessCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.compliance";
private const string CategoryName = "Compliance";
/// <inheritdoc />
public string CheckId => "check.compliance.export-readiness";
/// <inheritdoc />
public string Name => "Evidence Export Readiness";
/// <inheritdoc />
public string Description => "Verify evidence can be exported for auditors";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["compliance", "export", "audit"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var evidenceUrl = context.Configuration["EvidenceLocker:Url"]
?? context.Configuration["Services:EvidenceLocker:Url"];
return !string.IsNullOrEmpty(evidenceUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var evidenceUrl = context.Configuration["EvidenceLocker:Url"]
?? context.Configuration["Services:EvidenceLocker:Url"]
?? "http://localhost:5080";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(10);
var response = await httpClient.GetAsync(
$"{evidenceUrl.TrimEnd('/')}/api/v1/evidence/export/capabilities",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot check export capabilities: HTTP {(int)response.StatusCode}")
.WithEvidence("Export Status", eb =>
{
eb.Add("evidence_locker_url", evidenceUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var json = await response.Content.ReadAsStringAsync(ct);
var status = ParseExportStatus(json);
var issues = new List<string>();
if (!status.PdfExportAvailable)
issues.Add("PDF export not available");
if (!status.JsonExportAvailable)
issues.Add("JSON export not available");
if (!status.SignedBundleAvailable)
issues.Add("Signed bundle export not available");
if (!status.ChainOfCustodyAvailable)
issues.Add("Chain of custody report not available");
if (issues.Count >= 2)
{
return builder
.Fail($"Export capabilities limited: {string.Join(", ", issues)}")
.WithEvidence("Export Status", eb =>
{
eb.Add("pdf_export", status.PdfExportAvailable.ToString().ToLowerInvariant());
eb.Add("json_export", status.JsonExportAvailable.ToString().ToLowerInvariant());
eb.Add("signed_bundle", status.SignedBundleAvailable.ToString().ToLowerInvariant());
eb.Add("chain_of_custody", status.ChainOfCustodyAvailable.ToString().ToLowerInvariant());
})
.WithCauses(
"Export dependencies not installed",
"Signing keys not configured for bundles",
"Template files missing")
.WithRemediation(rb =>
{
rb.AddStep(1, "Check export configuration",
"stella evidence export --check",
CommandType.Stella);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (issues.Count > 0)
{
return builder
.Warn($"Some export formats unavailable: {string.Join(", ", issues)}")
.WithEvidence("Export Status", eb =>
{
eb.Add("available_formats", string.Join(", ", status.AvailableFormats));
eb.Add("issues_count", issues.Count.ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass($"Export ready ({string.Join(", ", status.AvailableFormats)})")
.WithEvidence("Export Status", eb =>
{
eb.Add("pdf_export", "true");
eb.Add("json_export", "true");
eb.Add("signed_bundle", "true");
eb.Add("chain_of_custody", "true");
eb.Add("available_formats", string.Join(", ", status.AvailableFormats));
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check export readiness: {ex.Message}")
.WithEvidence("Export Status", eb => eb.Add("error_message", ex.Message))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Export readiness check timed out")
.WithEvidence("Export Status", eb => eb.Add("connection_error_type", "timeout"))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static ExportStatus ParseExportStatus(string json)
{
var status = new ExportStatus();
try
{
using var doc = JsonDocument.Parse(json);
status.PdfExportAvailable = doc.RootElement.TryGetProperty("pdfExportAvailable", out var pdf) && pdf.GetBoolean();
status.JsonExportAvailable = doc.RootElement.TryGetProperty("jsonExportAvailable", out var jsonExport) && jsonExport.GetBoolean();
status.SignedBundleAvailable = doc.RootElement.TryGetProperty("signedBundleAvailable", out var sb) && sb.GetBoolean();
status.ChainOfCustodyAvailable = doc.RootElement.TryGetProperty("chainOfCustodyAvailable", out var coc) && coc.GetBoolean();
if (doc.RootElement.TryGetProperty("availableFormats", out var formats) && formats.ValueKind == JsonValueKind.Array)
{
status.AvailableFormats = formats.EnumerateArray()
.Select(f => f.GetString() ?? string.Empty)
.Where(f => !string.IsNullOrEmpty(f))
.ToList();
}
}
catch { }
return status;
}
private sealed class ExportStatus
{
public bool PdfExportAvailable { get; set; }
public bool JsonExportAvailable { get; set; }
public bool SignedBundleAvailable { get; set; }
public bool ChainOfCustodyAvailable { get; set; }
public List<string> AvailableFormats { get; set; } = [];
}
}

View File

@@ -0,0 +1,189 @@
// -----------------------------------------------------------------------------
// EvidenceGenerationRateCheck.cs
// Sprint: SPRINT_20260118_024_Doctor_evidence_compliance_health
// Task: COMPL-002 - Implement EvidenceGenerationRateCheck
// Description: Monitor evidence generation rate and success
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Compliance.Checks;
/// <summary>
/// Monitors evidence generation rate and success metrics.
/// Tracks whether evidence is being generated at expected rates.
/// </summary>
public sealed class EvidenceGenerationRateCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.compliance";
private const string CategoryName = "Compliance";
private const double MinSuccessRate = 0.95;
private const double WarnSuccessRate = 0.99;
/// <inheritdoc />
public string CheckId => "check.compliance.evidence-rate";
/// <inheritdoc />
public string Name => "Evidence Generation Rate";
/// <inheritdoc />
public string Description => "Monitor evidence generation success rate";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Fail;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["compliance", "evidence", "attestation"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var evidenceUrl = context.Configuration["EvidenceLocker:Url"]
?? context.Configuration["Services:EvidenceLocker:Url"];
return !string.IsNullOrEmpty(evidenceUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var evidenceUrl = context.Configuration["EvidenceLocker:Url"]
?? context.Configuration["Services:EvidenceLocker:Url"]
?? "http://localhost:5080";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(10);
var response = await httpClient.GetAsync(
$"{evidenceUrl.TrimEnd('/')}/api/v1/evidence/metrics",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot retrieve evidence metrics: HTTP {(int)response.StatusCode}")
.WithEvidence("Evidence Metrics", eb =>
{
eb.Add("evidence_locker_url", evidenceUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var json = await response.Content.ReadAsStringAsync(ct);
var metrics = ParseMetrics(json);
// Check success rate
if (metrics.SuccessRate < MinSuccessRate)
{
return builder
.Fail($"Evidence generation rate critical: {metrics.SuccessRate:P1} (minimum: {MinSuccessRate:P0})")
.WithEvidence("Evidence Metrics", eb =>
{
eb.Add("success_rate", metrics.SuccessRate.ToString("P2", CultureInfo.InvariantCulture));
eb.Add("total_generated_24h", metrics.TotalGenerated.ToString(CultureInfo.InvariantCulture));
eb.Add("failed_24h", metrics.Failed.ToString(CultureInfo.InvariantCulture));
eb.Add("pending_24h", metrics.Pending.ToString(CultureInfo.InvariantCulture));
})
.WithCauses(
"Evidence generation service failures",
"Database connectivity issues",
"Signing key unavailable",
"Storage quota exceeded")
.WithRemediation(rb =>
{
rb.AddStep(1, "Check evidence locker logs",
"stella logs evidence-locker --since 1h",
CommandType.Stella);
rb.AddStep(2, "Verify signing keys",
"stella evidence keys status",
CommandType.Stella);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (metrics.SuccessRate < WarnSuccessRate)
{
return builder
.Warn($"Evidence generation rate degraded: {metrics.SuccessRate:P1}")
.WithEvidence("Evidence Metrics", eb =>
{
eb.Add("success_rate", metrics.SuccessRate.ToString("P2", CultureInfo.InvariantCulture));
eb.Add("failed_24h", metrics.Failed.ToString(CultureInfo.InvariantCulture));
})
.WithCauses("Intermittent failures", "High load")
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass($"Evidence generation healthy ({metrics.SuccessRate:P1} success, {metrics.TotalGenerated} in 24h)")
.WithEvidence("Evidence Metrics", eb =>
{
eb.Add("success_rate", metrics.SuccessRate.ToString("P2", CultureInfo.InvariantCulture));
eb.Add("total_generated_24h", metrics.TotalGenerated.ToString(CultureInfo.InvariantCulture));
eb.Add("avg_generation_time_ms", metrics.AvgGenerationTimeMs.ToString(CultureInfo.InvariantCulture));
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check evidence rate: {ex.Message}")
.WithEvidence("Evidence Status", eb => eb.Add("error_message", ex.Message))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Evidence rate check timed out")
.WithEvidence("Evidence Status", eb => eb.Add("connection_error_type", "timeout"))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static EvidenceMetrics ParseMetrics(string json)
{
var metrics = new EvidenceMetrics();
try
{
using var doc = JsonDocument.Parse(json);
metrics.TotalGenerated = doc.RootElement.TryGetProperty("totalGenerated24h", out var tg) ? tg.GetInt32() : 0;
metrics.Failed = doc.RootElement.TryGetProperty("failed24h", out var f) ? f.GetInt32() : 0;
metrics.Pending = doc.RootElement.TryGetProperty("pending", out var p) ? p.GetInt32() : 0;
metrics.AvgGenerationTimeMs = doc.RootElement.TryGetProperty("avgGenerationTimeMs", out var agt) ? agt.GetInt32() : 0;
metrics.SuccessRate = metrics.TotalGenerated > 0
? (double)(metrics.TotalGenerated - metrics.Failed) / metrics.TotalGenerated
: 1.0;
}
catch { }
return metrics;
}
private sealed class EvidenceMetrics
{
public int TotalGenerated { get; set; }
public int Failed { get; set; }
public int Pending { get; set; }
public int AvgGenerationTimeMs { get; set; }
public double SuccessRate { get; set; }
}
}

View File

@@ -0,0 +1,190 @@
// -----------------------------------------------------------------------------
// EvidenceTamperCheck.cs
// Sprint: SPRINT_20260118_024_Doctor_evidence_compliance_health
// Task: COMPL-006 - Implement EvidenceTamperCheck
// Description: Detect evidence tampering or integrity issues
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Compliance.Checks;
/// <summary>
/// Detects evidence tampering or integrity issues.
/// Verifies signatures and hash chains.
/// </summary>
public sealed class EvidenceTamperCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.compliance";
private const string CategoryName = "Compliance";
/// <inheritdoc />
public string CheckId => "check.compliance.evidence-integrity";
/// <inheritdoc />
public string Name => "Evidence Integrity";
/// <inheritdoc />
public string Description => "Detect evidence tampering or integrity issues";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Fail;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["compliance", "security", "integrity", "signatures"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(30);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var evidenceUrl = context.Configuration["EvidenceLocker:Url"]
?? context.Configuration["Services:EvidenceLocker:Url"];
return !string.IsNullOrEmpty(evidenceUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var evidenceUrl = context.Configuration["EvidenceLocker:Url"]
?? context.Configuration["Services:EvidenceLocker:Url"]
?? "http://localhost:5080";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(60);
var response = await httpClient.GetAsync(
$"{evidenceUrl.TrimEnd('/')}/api/v1/evidence/integrity-check",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot verify evidence integrity: HTTP {(int)response.StatusCode}")
.WithEvidence("Integrity Check", eb =>
{
eb.Add("evidence_locker_url", evidenceUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var json = await response.Content.ReadAsStringAsync(ct);
var status = ParseIntegrityStatus(json);
if (status.TamperedCount > 0)
{
return builder
.Fail($"CRITICAL: {status.TamperedCount} evidence records show tampering")
.WithEvidence("Integrity Check", eb =>
{
eb.Add("tampered_count", status.TamperedCount.ToString(CultureInfo.InvariantCulture));
eb.Add("verified_count", status.VerifiedCount.ToString(CultureInfo.InvariantCulture));
eb.Add("total_checked", status.TotalChecked.ToString(CultureInfo.InvariantCulture));
if (status.FirstTamperedId != null)
eb.Add("first_tampered_id", status.FirstTamperedId);
})
.WithCauses(
"Evidence modification after signing",
"Storage corruption",
"Malicious tampering",
"Key/certificate mismatch")
.WithRemediation(rb =>
{
rb.AddStep(1, "List tampered evidence", "stella evidence audit --tampered", CommandType.Stella)
.WithSafetyNote("DO NOT delete tampered evidence - preserve for investigation");
rb.AddStep(2, "Investigate security incident", "Contact security team", CommandType.Manual)
.RequireBackup();
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (status.VerificationErrors > 0)
{
return builder
.Warn($"{status.VerificationErrors} evidence records could not be verified")
.WithEvidence("Integrity Check", eb =>
{
eb.Add("verification_errors", status.VerificationErrors.ToString(CultureInfo.InvariantCulture));
eb.Add("verified_count", status.VerifiedCount.ToString(CultureInfo.InvariantCulture));
eb.Add("total_checked", status.TotalChecked.ToString(CultureInfo.InvariantCulture));
})
.WithCauses(
"Missing signing certificates",
"Certificate expiration",
"Unsupported signature algorithm")
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass($"Evidence integrity verified ({status.VerifiedCount}/{status.TotalChecked} records)")
.WithEvidence("Integrity Check", eb =>
{
eb.Add("verified_count", status.VerifiedCount.ToString(CultureInfo.InvariantCulture));
eb.Add("total_checked", status.TotalChecked.ToString(CultureInfo.InvariantCulture));
eb.Add("tampered_count", "0");
eb.Add("hash_chain_valid", status.HashChainValid.ToString().ToLowerInvariant());
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot verify evidence integrity: {ex.Message}")
.WithEvidence("Integrity Status", eb => eb.Add("error_message", ex.Message))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Evidence integrity check timed out")
.WithEvidence("Integrity Status", eb => eb.Add("connection_error_type", "timeout"))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static IntegrityStatus ParseIntegrityStatus(string json)
{
var status = new IntegrityStatus();
try
{
using var doc = JsonDocument.Parse(json);
status.TotalChecked = doc.RootElement.TryGetProperty("totalChecked", out var tc) ? tc.GetInt32() : 0;
status.VerifiedCount = doc.RootElement.TryGetProperty("verifiedCount", out var vc) ? vc.GetInt32() : 0;
status.TamperedCount = doc.RootElement.TryGetProperty("tamperedCount", out var tmc) ? tmc.GetInt32() : 0;
status.VerificationErrors = doc.RootElement.TryGetProperty("verificationErrors", out var ve) ? ve.GetInt32() : 0;
status.HashChainValid = doc.RootElement.TryGetProperty("hashChainValid", out var hcv) && hcv.GetBoolean();
status.FirstTamperedId = doc.RootElement.TryGetProperty("firstTamperedId", out var fti) ? fti.GetString() : null;
}
catch { }
return status;
}
private sealed class IntegrityStatus
{
public int TotalChecked { get; set; }
public int VerifiedCount { get; set; }
public int TamperedCount { get; set; }
public int VerificationErrors { get; set; }
public bool HashChainValid { get; set; }
public string? FirstTamperedId { get; set; }
}
}

View File

@@ -0,0 +1,185 @@
// -----------------------------------------------------------------------------
// ProvenanceCompletenessCheck.cs
// Sprint: SPRINT_20260118_024_Doctor_evidence_compliance_health
// Task: COMPL-004 - Implement ProvenanceCompletenessCheck
// Description: Verify provenance records are complete
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Compliance.Checks;
/// <summary>
/// Verifies that provenance records are complete for releases.
/// </summary>
public sealed class ProvenanceCompletenessCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.compliance";
private const string CategoryName = "Compliance";
private const double MinCompletenessRate = 0.99;
/// <inheritdoc />
public string CheckId => "check.compliance.provenance-completeness";
/// <inheritdoc />
public string Name => "Provenance Completeness";
/// <inheritdoc />
public string Description => "Verify provenance records exist for all releases";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Fail;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["compliance", "provenance", "slsa"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(10);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var provenanceUrl = context.Configuration["Provenance:Url"]
?? context.Configuration["Services:Provenance:Url"];
return !string.IsNullOrEmpty(provenanceUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var provenanceUrl = context.Configuration["Provenance:Url"]
?? context.Configuration["Services:Provenance:Url"]
?? "http://localhost:5084";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(15);
var response = await httpClient.GetAsync(
$"{provenanceUrl.TrimEnd('/')}/api/v1/provenance/completeness",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot check provenance completeness: HTTP {(int)response.StatusCode}")
.WithEvidence("Provenance Status", eb =>
{
eb.Add("provenance_url", provenanceUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var json = await response.Content.ReadAsStringAsync(ct);
var status = ParseProvenanceStatus(json);
if (status.CompletenessRate < MinCompletenessRate)
{
return builder
.Fail($"Provenance incomplete: {status.CompletenessRate:P1} ({status.MissingCount} releases without provenance)")
.WithEvidence("Provenance Completeness", eb =>
{
eb.Add("completeness_rate", status.CompletenessRate.ToString("P2", CultureInfo.InvariantCulture));
eb.Add("total_releases", status.TotalReleases.ToString(CultureInfo.InvariantCulture));
eb.Add("missing_count", status.MissingCount.ToString(CultureInfo.InvariantCulture));
eb.Add("slsa_level", status.SlsaLevel.ToString(CultureInfo.InvariantCulture));
})
.WithCauses(
"Build pipeline not generating provenance",
"Provenance upload failures",
"Legacy releases without provenance",
"Manual deployments bypassing pipeline")
.WithRemediation(rb =>
{
rb.AddStep(1, "List releases missing provenance",
"stella provenance audit --missing",
CommandType.Stella);
rb.AddStep(2, "Generate backfill provenance",
"stella provenance backfill --dry-run",
CommandType.Manual);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
// Check SLSA level
if (status.SlsaLevel < 2)
{
return builder
.Warn($"SLSA level is {status.SlsaLevel} (recommend level 2+)")
.WithEvidence("Provenance Completeness", eb =>
{
eb.Add("completeness_rate", status.CompletenessRate.ToString("P2", CultureInfo.InvariantCulture));
eb.Add("slsa_level", status.SlsaLevel.ToString(CultureInfo.InvariantCulture));
})
.WithCauses("Build system not meeting SLSA requirements")
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass($"Provenance complete ({status.CompletenessRate:P1}, SLSA L{status.SlsaLevel})")
.WithEvidence("Provenance Completeness", eb =>
{
eb.Add("completeness_rate", status.CompletenessRate.ToString("P2", CultureInfo.InvariantCulture));
eb.Add("total_releases", status.TotalReleases.ToString(CultureInfo.InvariantCulture));
eb.Add("slsa_level", status.SlsaLevel.ToString(CultureInfo.InvariantCulture));
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check provenance: {ex.Message}")
.WithEvidence("Provenance Status", eb => eb.Add("error_message", ex.Message))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Provenance check timed out")
.WithEvidence("Provenance Status", eb => eb.Add("connection_error_type", "timeout"))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static ProvenanceStatus ParseProvenanceStatus(string json)
{
var status = new ProvenanceStatus();
try
{
using var doc = JsonDocument.Parse(json);
status.TotalReleases = doc.RootElement.TryGetProperty("totalReleases", out var tr) ? tr.GetInt32() : 0;
status.MissingCount = doc.RootElement.TryGetProperty("missingCount", out var mc) ? mc.GetInt32() : 0;
status.SlsaLevel = doc.RootElement.TryGetProperty("slsaLevel", out var sl) ? sl.GetInt32() : 0;
status.CompletenessRate = status.TotalReleases > 0
? (double)(status.TotalReleases - status.MissingCount) / status.TotalReleases
: 1.0;
}
catch { }
return status;
}
private sealed class ProvenanceStatus
{
public int TotalReleases { get; set; }
public int MissingCount { get; set; }
public int SlsaLevel { get; set; }
public double CompletenessRate { get; set; }
}
}

View File

@@ -0,0 +1,45 @@
// -----------------------------------------------------------------------------
// CompliancePlugin.cs
// Sprint: SPRINT_20260118_024_Doctor_evidence_compliance_health
// Task: COMPL-001 - Create Compliance plugin scaffold
// Description: Doctor plugin for evidence and compliance health checks
// -----------------------------------------------------------------------------
using StellaOps.Doctor.Plugin.Compliance.Checks;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Compliance;
/// <summary>
/// Doctor plugin for evidence generation and compliance health monitoring.
/// Checks attestation signing, provenance completeness, audit readiness.
/// </summary>
public sealed class CompliancePlugin : IDoctorPlugin
{
/// <inheritdoc />
public string PluginId => "stellaops.doctor.compliance";
/// <inheritdoc />
public string DisplayName => "Evidence & Compliance";
/// <inheritdoc />
public string Description => "Checks for evidence generation, attestation signing, and compliance posture";
/// <inheritdoc />
public string Category => "Compliance";
/// <inheritdoc />
public Version Version => new(1, 0, 0);
/// <inheritdoc />
public IReadOnlyList<IDoctorCheck> GetChecks() =>
[
new EvidenceGenerationRateCheck(),
new AttestationSigningHealthCheck(),
new ProvenanceCompletenessCheck(),
new AuditReadinessCheck(),
new EvidenceTamperCheck(),
new ComplianceFrameworkCheck(),
new EvidenceExportReadinessCheck()
];
}

View File

@@ -0,0 +1,26 @@
// -----------------------------------------------------------------------------
// ServiceCollectionExtensions.cs
// Sprint: SPRINT_20260118_024_Doctor_evidence_compliance_health
// Task: COMPL-001 - Create Compliance plugin scaffold
// Description: DI extension for Compliance plugin registration
// -----------------------------------------------------------------------------
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Compliance.DependencyInjection;
/// <summary>
/// Extension methods for registering the Compliance Doctor plugin.
/// </summary>
public static class ServiceCollectionExtensions
{
/// <summary>
/// Adds the Doctor Compliance plugin for evidence and compliance health checks.
/// </summary>
public static IServiceCollection AddDoctorCompliancePlugin(this IServiceCollection services)
{
services.AddSingleton<IDoctorPlugin, CompliancePlugin>();
return services;
}
}

View File

@@ -0,0 +1,17 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<RootNamespace>StellaOps.Doctor.Plugin.Compliance</RootNamespace>
<Description>Doctor health checks for evidence generation and compliance posture</Description>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\..\__Libraries\StellaOps.Doctor\StellaOps.Doctor.csproj" />
</ItemGroup>
</Project>

View File

@@ -1,12 +1,14 @@
// -----------------------------------------------------------------------------
// FipsComplianceCheck.cs
// Sprint: SPRINT_20260117_025_Doctor_coverage_expansion
// Task: DOC-EXP-003 - Regional Crypto Compliance Checks
// Description: Health check for FIPS 140-2 mode validation
// Sprint: SPRINT_20260118_015_Doctor_check_quality_improvements
// Task: DQUAL-003 - Fix FipsComplianceCheck algorithm verification
// Description: Health check for FIPS 140-2 mode validation with actual algorithm testing
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Runtime.InteropServices;
using System.Security.Cryptography;
using Microsoft.Win32;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
@@ -56,18 +58,28 @@ public sealed class FipsComplianceCheck : IDoctorCheck
?? context.Configuration["Cryptography:Profile"]
?? "default";
// Check .NET FIPS mode
var fipsEnabled = IsFipsEnabled();
// Get comprehensive FIPS status
var fipsStatus = GetFipsStatus();
var algorithmCheck = VerifyFipsAlgorithms();
if (!fipsEnabled)
if (!fipsStatus.FipsModeEnabled)
{
return Task.FromResult(builder
.Fail("FIPS 140-2 mode not enabled")
.WithEvidence("FIPS Status", eb =>
{
eb.Add("CryptoProfile", cryptoProfile);
eb.Add("FipsEnabled", "false");
eb.Add("Platform", RuntimeInformation.OSDescription);
eb.Add("fips_mode_enabled", "false");
eb.Add("platform", fipsStatus.Platform);
eb.Add("crypto_provider", fipsStatus.CryptoProvider);
eb.Add("openssl_fips_module_loaded", fipsStatus.OpenSslFipsModuleLoaded.ToString().ToLowerInvariant());
eb.Add("crypto_profile", cryptoProfile);
eb.Add("algorithms_tested", string.Join(", ", algorithmCheck.AvailableAlgorithms.Concat(algorithmCheck.MissingAlgorithms)));
eb.Add("algorithms_available", string.Join(", ", algorithmCheck.AvailableAlgorithms));
eb.Add("algorithms_missing", string.Join(", ", algorithmCheck.MissingAlgorithms));
foreach (var (alg, result) in algorithmCheck.TestResults)
{
eb.Add($"test_{alg.ToLowerInvariant().Replace("-", "_")}", result);
}
})
.WithCauses(
"FIPS mode not enabled in operating system",
@@ -85,16 +97,16 @@ public sealed class FipsComplianceCheck : IDoctorCheck
CommandType.Shell)
.AddStep(3, "Restart application",
"sudo systemctl restart stellaops",
CommandType.Shell);
CommandType.Manual);
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
rb.AddStep(1, "Enable FIPS via Group Policy",
"Set 'System cryptography: Use FIPS compliant algorithms' in Local Security Policy",
CommandType.Manual)
.AddStep(2, "Or via registry",
.AddStep(2, "Or via registry (requires admin and reboot)",
"reg add HKLM\\System\\CurrentControlSet\\Control\\Lsa\\FipsAlgorithmPolicy /v Enabled /t REG_DWORD /d 1 /f",
CommandType.Shell);
CommandType.Manual);
}
else
{
@@ -108,24 +120,35 @@ public sealed class FipsComplianceCheck : IDoctorCheck
}
// Verify FIPS-compliant algorithms are available
var algorithmCheck = VerifyFipsAlgorithms();
if (!algorithmCheck.AllAvailable)
{
return Task.FromResult(builder
.Warn($"Some FIPS algorithms unavailable: {string.Join(", ", algorithmCheck.MissingAlgorithms)}")
.WithEvidence("FIPS Status", eb =>
{
eb.Add("CryptoProfile", cryptoProfile);
eb.Add("FipsEnabled", "true");
eb.Add("AvailableAlgorithms", string.Join(", ", algorithmCheck.AvailableAlgorithms));
eb.Add("MissingAlgorithms", string.Join(", ", algorithmCheck.MissingAlgorithms));
eb.Add("fips_mode_enabled", "true");
eb.Add("platform", fipsStatus.Platform);
eb.Add("crypto_provider", fipsStatus.CryptoProvider);
eb.Add("openssl_fips_module_loaded", fipsStatus.OpenSslFipsModuleLoaded.ToString().ToLowerInvariant());
eb.Add("crypto_profile", cryptoProfile);
eb.Add("algorithms_tested", string.Join(", ", algorithmCheck.AvailableAlgorithms.Concat(algorithmCheck.MissingAlgorithms)));
eb.Add("algorithms_available", string.Join(", ", algorithmCheck.AvailableAlgorithms));
eb.Add("algorithms_missing", string.Join(", ", algorithmCheck.MissingAlgorithms));
foreach (var (alg, result) in algorithmCheck.TestResults)
{
eb.Add($"test_{alg.ToLowerInvariant().Replace("-", "_")}", result);
}
})
.WithCauses(
"OpenSSL version missing FIPS module",
"FIPS provider not fully configured")
"FIPS provider not fully configured",
"Algorithm test failed")
.WithRemediation(rb => rb
.AddStep(1, "Check OpenSSL FIPS provider",
"openssl list -providers",
CommandType.Shell)
.AddStep(2, "Verify crypto algorithms",
"openssl list -digest-algorithms",
CommandType.Shell))
.WithVerification($"stella doctor --check {CheckId}")
.Build());
@@ -135,10 +158,15 @@ public sealed class FipsComplianceCheck : IDoctorCheck
.Pass("FIPS 140-2 mode enabled and verified")
.WithEvidence("FIPS Status", eb =>
{
eb.Add("CryptoProfile", cryptoProfile);
eb.Add("FipsEnabled", "true");
eb.Add("VerifiedAlgorithms", string.Join(", ", algorithmCheck.AvailableAlgorithms));
eb.Add("Status", "compliant");
eb.Add("fips_mode_enabled", "true");
eb.Add("platform", fipsStatus.Platform);
eb.Add("crypto_provider", fipsStatus.CryptoProvider);
eb.Add("openssl_fips_module_loaded", fipsStatus.OpenSslFipsModuleLoaded.ToString().ToLowerInvariant());
eb.Add("crypto_profile", cryptoProfile);
eb.Add("algorithms_tested", string.Join(", ", algorithmCheck.AvailableAlgorithms));
eb.Add("algorithms_available", string.Join(", ", algorithmCheck.AvailableAlgorithms));
eb.Add("algorithms_missing", "none");
eb.Add("status", "compliant");
})
.Build());
}
@@ -148,7 +176,6 @@ public sealed class FipsComplianceCheck : IDoctorCheck
try
{
// Check if running in FIPS mode
// On Windows, check registry; on Linux, check /proc/sys/crypto/fips_enabled
if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
{
var fipsFile = "/proc/sys/crypto/fips_enabled";
@@ -160,8 +187,24 @@ public sealed class FipsComplianceCheck : IDoctorCheck
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
// Check Windows FIPS policy
// This is a simplified check - real implementation would use registry
// Check Windows FIPS policy via registry
try
{
using var key = Registry.LocalMachine.OpenSubKey(
@"System\CurrentControlSet\Control\Lsa\FipsAlgorithmPolicy");
if (key != null)
{
var value = key.GetValue("Enabled");
if (value is int intVal && intVal == 1)
return true;
}
}
catch
{
// Registry access failed, fall back to env var check
}
// Also check environment variable
return Environment.GetEnvironmentVariable("DOTNET_SYSTEM_NET_SECURITY_USEFIPSVALIDATED") == "1";
}
@@ -177,30 +220,187 @@ public sealed class FipsComplianceCheck : IDoctorCheck
{
var available = new List<string>();
var missing = new List<string>();
var required = new[] { "AES-256-GCM", "SHA-256", "SHA-384", "SHA-512", "RSA-2048", "ECDSA-P256" };
var testResults = new Dictionary<string, string>();
// Simplified check - in production would verify each algorithm
foreach (var alg in required)
// Test AES-256-GCM
try
{
try
using var aes = Aes.Create();
aes.KeySize = 256;
aes.Mode = CipherMode.ECB; // GCM not directly testable in managed code
aes.GenerateKey();
aes.GenerateIV();
using var encryptor = aes.CreateEncryptor();
var testData = new byte[16];
var encrypted = encryptor.TransformFinalBlock(testData, 0, testData.Length);
if (encrypted.Length > 0)
{
// Basic availability check
available.Add(alg);
available.Add("AES-256");
testResults["AES-256"] = "pass";
}
catch
}
catch (Exception ex)
{
missing.Add("AES-256");
testResults["AES-256"] = $"fail: {ex.Message}";
}
// Test SHA-256
try
{
using var sha256 = SHA256.Create();
var hash = sha256.ComputeHash(new byte[32]);
if (hash.Length == 32)
{
missing.Add(alg);
available.Add("SHA-256");
testResults["SHA-256"] = "pass";
}
}
catch (Exception ex)
{
missing.Add("SHA-256");
testResults["SHA-256"] = $"fail: {ex.Message}";
}
// Test SHA-384
try
{
using var sha384 = SHA384.Create();
var hash = sha384.ComputeHash(new byte[32]);
if (hash.Length == 48)
{
available.Add("SHA-384");
testResults["SHA-384"] = "pass";
}
}
catch (Exception ex)
{
missing.Add("SHA-384");
testResults["SHA-384"] = $"fail: {ex.Message}";
}
// Test SHA-512
try
{
using var sha512 = SHA512.Create();
var hash = sha512.ComputeHash(new byte[32]);
if (hash.Length == 64)
{
available.Add("SHA-512");
testResults["SHA-512"] = "pass";
}
}
catch (Exception ex)
{
missing.Add("SHA-512");
testResults["SHA-512"] = $"fail: {ex.Message}";
}
// Test RSA-2048
try
{
using var rsa = RSA.Create(2048);
var testData = new byte[32];
var signature = rsa.SignData(testData, HashAlgorithmName.SHA256, RSASignaturePadding.Pkcs1);
var valid = rsa.VerifyData(testData, signature, HashAlgorithmName.SHA256, RSASignaturePadding.Pkcs1);
if (valid)
{
available.Add("RSA-2048");
testResults["RSA-2048"] = "pass";
}
}
catch (Exception ex)
{
missing.Add("RSA-2048");
testResults["RSA-2048"] = $"fail: {ex.Message}";
}
// Test ECDSA-P256
try
{
using var ecdsa = ECDsa.Create(ECCurve.NamedCurves.nistP256);
var testData = new byte[32];
var signature = ecdsa.SignData(testData, HashAlgorithmName.SHA256);
var valid = ecdsa.VerifyData(testData, signature, HashAlgorithmName.SHA256);
if (valid)
{
available.Add("ECDSA-P256");
testResults["ECDSA-P256"] = "pass";
}
}
catch (Exception ex)
{
missing.Add("ECDSA-P256");
testResults["ECDSA-P256"] = $"fail: {ex.Message}";
}
return new FipsAlgorithmCheckResult(
AllAvailable: missing.Count == 0,
AvailableAlgorithms: available,
MissingAlgorithms: missing);
MissingAlgorithms: missing,
TestResults: testResults);
}
private static FipsStatus GetFipsStatus()
{
var status = new FipsStatus();
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
status.Platform = "windows";
status.CryptoProvider = "bcrypt";
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
{
status.Platform = "linux";
status.CryptoProvider = "openssl";
// Check if OpenSSL FIPS module is loaded
try
{
var opensslFipsPath = "/etc/pki/fips/fips.conf";
var altFipsPath = "/usr/local/ssl/fips-2.0/lib/fipscanister.o";
status.OpenSslFipsModuleLoaded = File.Exists(opensslFipsPath) || File.Exists(altFipsPath);
// Try to detect from openssl providers
var providersPath = "/etc/ssl/openssl.cnf";
if (File.Exists(providersPath))
{
var content = File.ReadAllText(providersPath);
status.OpenSslFipsModuleLoaded |= content.Contains("fips", StringComparison.OrdinalIgnoreCase);
}
}
catch
{
status.OpenSslFipsModuleLoaded = false;
}
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
status.Platform = "macos";
status.CryptoProvider = "corecrypto";
}
else
{
status.Platform = "unknown";
status.CryptoProvider = "managed";
}
status.FipsModeEnabled = IsFipsEnabled();
return status;
}
private sealed record FipsAlgorithmCheckResult(
bool AllAvailable,
List<string> AvailableAlgorithms,
List<string> MissingAlgorithms);
List<string> MissingAlgorithms,
Dictionary<string, string> TestResults);
private sealed class FipsStatus
{
public bool FipsModeEnabled { get; set; }
public string Platform { get; set; } = "unknown";
public string CryptoProvider { get; set; } = "unknown";
public bool OpenSslFipsModuleLoaded { get; set; }
}
}

View File

@@ -0,0 +1,291 @@
// -----------------------------------------------------------------------------
// EnvironmentCapacityCheck.cs
// Sprint: SPRINT_20260118_017_Doctor_environment_health
// Task: ENVH-004 - Implement EnvironmentCapacityCheck
// Description: Check environment resource capacity
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Environment.Checks;
/// <summary>
/// Checks environment resource capacity.
/// Monitors CPU, memory, storage, and deployment slot availability.
/// </summary>
public sealed class EnvironmentCapacityCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.environment";
private const string CategoryName = "Environment Health";
private const double HighUsageWarningPercent = 75.0;
private const double CriticalUsagePercent = 90.0;
/// <inheritdoc />
public string CheckId => "check.environment.capacity";
/// <inheritdoc />
public string Name => "Environment Capacity";
/// <inheritdoc />
public string Description => "Check environment resource capacity";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["environment", "capacity", "resources", "cpu", "memory", "storage"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(10);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"];
return !string.IsNullOrEmpty(orchestratorUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"]
?? "http://localhost:5080";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(10);
// Get capacity report
var response = await httpClient.GetAsync(
$"{orchestratorUrl.TrimEnd('/')}/api/v1/environments/capacity",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot retrieve capacity report: HTTP {(int)response.StatusCode}")
.WithEvidence("Capacity Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var capacityJson = await response.Content.ReadAsStringAsync(ct);
var capacities = ParseCapacities(capacityJson);
if (capacities.Count == 0)
{
return builder
.Pass("No environments to check capacity")
.WithEvidence("Capacity", eb =>
{
eb.Add("environment_count", "0");
})
.Build();
}
var criticalEnvs = new List<(string Name, string Resource, double Usage)>();
var warningEnvs = new List<(string Name, string Resource, double Usage)>();
foreach (var cap in capacities)
{
CheckResource(cap.Name, "cpu", cap.CpuUsagePercent, criticalEnvs, warningEnvs);
CheckResource(cap.Name, "memory", cap.MemoryUsagePercent, criticalEnvs, warningEnvs);
CheckResource(cap.Name, "storage", cap.StorageUsagePercent, criticalEnvs, warningEnvs);
// Check deployment slots
if (cap.MaxDeployments > 0)
{
var deployUsage = (double)cap.ActiveDeployments / cap.MaxDeployments * 100;
CheckResource(cap.Name, "deployments", deployUsage, criticalEnvs, warningEnvs);
}
}
if (criticalEnvs.Count > 0)
{
return builder
.Fail($"{criticalEnvs.Count} environment(s) at critical capacity")
.WithEvidence("Capacity", eb =>
{
eb.Add("environment_count", capacities.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("critical_resource_count", criticalEnvs.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("warning_resource_count", warningEnvs.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("critical_details", string.Join("; ", criticalEnvs.Select(c => $"{c.Name}/{c.Resource}:{c.Usage:F1}%")));
AddCapacityEvidence(eb, capacities);
})
.WithCauses(
"Resource exhaustion approaching",
"Runaway process consuming resources",
"Unexpected workload increase",
"Resource limits too restrictive")
.WithRemediation(rb =>
{
rb.AddStep(1, "View capacity details",
$"stella env capacity {criticalEnvs[0].Name}",
CommandType.Shell);
rb.AddStep(2, "Scale up resources",
$"stella env scale {criticalEnvs[0].Name} --{criticalEnvs[0].Resource} +20%",
CommandType.Manual);
rb.AddStep(3, "Or remove unused deployments",
$"stella env cleanup {criticalEnvs[0].Name}",
CommandType.Manual);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (warningEnvs.Count > 0)
{
return builder
.Warn($"{warningEnvs.Count} environment resource(s) above 75% usage")
.WithEvidence("Capacity", eb =>
{
eb.Add("environment_count", capacities.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("critical_resource_count", "0");
eb.Add("warning_resource_count", warningEnvs.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("warning_details", string.Join("; ", warningEnvs.Select(w => $"{w.Name}/{w.Resource}:{w.Usage:F1}%")));
AddCapacityEvidence(eb, capacities);
})
.WithCauses(
"Normal growth approaching limits",
"Temporary workload spike")
.WithRemediation(rb =>
{
rb.AddStep(1, "Monitor capacity trend",
"stella env capacity --trend",
CommandType.Shell);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass($"{capacities.Count} environment(s) have adequate capacity")
.WithEvidence("Capacity", eb =>
{
eb.Add("environment_count", capacities.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("critical_resource_count", "0");
eb.Add("warning_resource_count", "0");
AddCapacityEvidence(eb, capacities);
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check capacity: {ex.Message}")
.WithEvidence("Capacity Status", eb =>
{
eb.Add("error_message", ex.Message);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Capacity check timed out")
.WithEvidence("Capacity Status", eb =>
{
eb.Add("connection_error_type", "timeout");
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static void CheckResource(
string envName, string resource, double usage,
List<(string, string, double)> critical,
List<(string, string, double)> warning)
{
if (usage >= CriticalUsagePercent)
{
critical.Add((envName, resource, usage));
}
else if (usage >= HighUsageWarningPercent)
{
warning.Add((envName, resource, usage));
}
}
private static void AddCapacityEvidence(EvidenceBuilder eb, List<CapacityInfo> capacities)
{
foreach (var cap in capacities)
{
var prefix = $"env_{cap.Name.ToLowerInvariant().Replace(" ", "_").Replace("-", "_")}";
eb.Add($"{prefix}_cpu_percent", cap.CpuUsagePercent.ToString("F1", CultureInfo.InvariantCulture));
eb.Add($"{prefix}_memory_percent", cap.MemoryUsagePercent.ToString("F1", CultureInfo.InvariantCulture));
eb.Add($"{prefix}_storage_percent", cap.StorageUsagePercent.ToString("F1", CultureInfo.InvariantCulture));
}
}
private static List<CapacityInfo> ParseCapacities(string json)
{
var capacities = new List<CapacityInfo>();
try
{
using var doc = JsonDocument.Parse(json);
var capsArray = doc.RootElement.ValueKind == JsonValueKind.Array
? doc.RootElement
: doc.RootElement.TryGetProperty("environments", out var arr) ? arr : default;
if (capsArray.ValueKind != JsonValueKind.Array)
return capacities;
foreach (var cap in capsArray.EnumerateArray())
{
var name = cap.TryGetProperty("name", out var nameEl) ? nameEl.GetString() : null;
if (string.IsNullOrEmpty(name)) continue;
var cpuTotal = cap.TryGetProperty("totalCpuMillicores", out var cpuTEl) ? cpuTEl.GetInt64() : 0;
var cpuUsed = cap.TryGetProperty("usedCpuMillicores", out var cpuUEl) ? cpuUEl.GetInt64() : 0;
var memTotal = cap.TryGetProperty("totalMemoryBytes", out var memTEl) ? memTEl.GetInt64() : 0;
var memUsed = cap.TryGetProperty("usedMemoryBytes", out var memUEl) ? memUEl.GetInt64() : 0;
var storTotal = cap.TryGetProperty("totalStorageBytes", out var storTEl) ? storTEl.GetInt64() : 0;
var storUsed = cap.TryGetProperty("usedStorageBytes", out var storUEl) ? storUEl.GetInt64() : 0;
var maxDeploy = cap.TryGetProperty("maxConcurrentDeployments", out var maxDEl) ? maxDEl.GetInt32() : 0;
var activeDeploy = cap.TryGetProperty("activeDeployments", out var actDEl) ? actDEl.GetInt32() : 0;
capacities.Add(new CapacityInfo
{
Name = name,
CpuUsagePercent = cpuTotal > 0 ? (double)cpuUsed / cpuTotal * 100 : 0,
MemoryUsagePercent = memTotal > 0 ? (double)memUsed / memTotal * 100 : 0,
StorageUsagePercent = storTotal > 0 ? (double)storUsed / storTotal * 100 : 0,
MaxDeployments = maxDeploy,
ActiveDeployments = activeDeploy
});
}
}
catch { }
return capacities;
}
private sealed class CapacityInfo
{
public required string Name { get; init; }
public double CpuUsagePercent { get; init; }
public double MemoryUsagePercent { get; init; }
public double StorageUsagePercent { get; init; }
public int MaxDeployments { get; init; }
public int ActiveDeployments { get; init; }
}
}

View File

@@ -0,0 +1,401 @@
// -----------------------------------------------------------------------------
// EnvironmentConnectivityCheck.cs
// Sprint: SPRINT_20260118_017_Doctor_environment_health
// Task: ENVH-002 - Implement EnvironmentConnectivityCheck
// Description: Verify connectivity to each configured environment agent
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Net.Security;
using System.Security.Cryptography.X509Certificates;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Environment.Checks;
/// <summary>
/// Verifies connectivity to each configured environment agent.
/// Measures latency, verifies authentication, and checks TLS certificate validity.
/// </summary>
public sealed class EnvironmentConnectivityCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.environment";
private const string CategoryName = "Environment Health";
private const int HighLatencyThresholdMs = 500;
private const int CertExpiryWarningDays = 30;
/// <inheritdoc />
public string CheckId => "check.environment.connectivity";
/// <inheritdoc />
public string Name => "Environment Connectivity";
/// <inheritdoc />
public string Description => "Verify connectivity to environment agents";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["environment", "connectivity", "agent", "network"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(30);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"];
return !string.IsNullOrEmpty(orchestratorUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"]
?? "http://localhost:5080";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(30);
// Get list of environments
var response = await httpClient.GetAsync(
$"{orchestratorUrl.TrimEnd('/')}/api/v1/environments",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot retrieve environments: HTTP {(int)response.StatusCode}")
.WithEvidence("Connectivity Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var envsJson = await response.Content.ReadAsStringAsync(ct);
var environments = ParseEnvironments(envsJson);
if (environments.Count == 0)
{
return builder
.Pass("No environments configured")
.WithEvidence("Connectivity", eb =>
{
eb.Add("total_environments", "0");
})
.Build();
}
// Check connectivity to each environment
var results = new List<ConnectivityInfo>();
var unreachable = new List<string>();
var highLatency = new List<(string Name, int LatencyMs)>();
var certWarnings = new List<(string Name, int DaysUntilExpiry)>();
foreach (var env in environments)
{
var connResult = await CheckEnvironmentConnectivityAsync(
httpClient, env, context.TimeProvider, ct);
results.Add(connResult);
if (!connResult.Reachable)
{
unreachable.Add(env.Name);
}
else
{
if (connResult.LatencyMs > HighLatencyThresholdMs)
{
highLatency.Add((env.Name, connResult.LatencyMs));
}
if (connResult.TlsDaysUntilExpiry.HasValue &&
connResult.TlsDaysUntilExpiry.Value <= CertExpiryWarningDays)
{
certWarnings.Add((env.Name, connResult.TlsDaysUntilExpiry.Value));
}
}
}
var reachableCount = environments.Count - unreachable.Count;
// Determine severity
if (unreachable.Count > 0)
{
return builder
.Fail($"{unreachable.Count} environment(s) unreachable")
.WithEvidence("Connectivity", eb =>
{
eb.Add("total_environments", environments.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("reachable_environments", reachableCount.ToString(CultureInfo.InvariantCulture));
eb.Add("unreachable_environments", string.Join(", ", unreachable));
eb.Add("high_latency_environments", string.Join(", ", highLatency.Select(h => $"{h.Name}:{h.LatencyMs}ms")));
eb.Add("cert_expiring_soon", string.Join(", ", certWarnings.Select(c => $"{c.Name}:{c.DaysUntilExpiry}d")));
AddPerEnvironmentEvidence(eb, results);
})
.WithCauses(
"Environment agent not running",
"Network connectivity issue",
"Firewall blocking connection",
"Agent authentication failed")
.WithRemediation(rb =>
{
rb.AddStep(1, "Check environment agent status",
$"stella env ping {unreachable[0]}",
CommandType.Shell);
rb.AddStep(2, "View agent logs",
$"stella env logs {unreachable[0]}",
CommandType.Shell);
rb.AddStep(3, "Test network connectivity",
"# Check firewall rules and network routes to environment agent",
CommandType.Manual);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (highLatency.Count > 0 || certWarnings.Count > 0)
{
var warnings = new List<string>();
if (highLatency.Count > 0) warnings.Add($"{highLatency.Count} high latency");
if (certWarnings.Count > 0) warnings.Add($"{certWarnings.Count} cert expiring soon");
return builder
.Warn($"Environment connectivity issues: {string.Join(", ", warnings)}")
.WithEvidence("Connectivity", eb =>
{
eb.Add("total_environments", environments.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("reachable_environments", reachableCount.ToString(CultureInfo.InvariantCulture));
eb.Add("unreachable_environments", "");
eb.Add("high_latency_environments", string.Join(", ", highLatency.Select(h => $"{h.Name}:{h.LatencyMs}ms")));
eb.Add("cert_expiring_soon", string.Join(", ", certWarnings.Select(c => $"{c.Name}:{c.DaysUntilExpiry}d")));
AddPerEnvironmentEvidence(eb, results);
})
.WithCauses(
"Network congestion",
"TLS certificate approaching expiry",
"Geographic latency")
.WithRemediation(rb =>
{
if (certWarnings.Count > 0)
{
rb.AddStep(1, "Renew TLS certificate",
$"stella env cert renew {certWarnings[0].Name}",
CommandType.Manual);
}
if (highLatency.Count > 0)
{
rb.AddStep(2, "Investigate latency",
$"stella env diagnose {highLatency[0].Name} --network",
CommandType.Shell);
}
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass($"{environments.Count} environment(s) reachable")
.WithEvidence("Connectivity", eb =>
{
eb.Add("total_environments", environments.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("reachable_environments", reachableCount.ToString(CultureInfo.InvariantCulture));
eb.Add("unreachable_environments", "");
eb.Add("environment_names", string.Join(", ", environments.Select(e => e.Name)));
AddPerEnvironmentEvidence(eb, results);
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check environments: {ex.Message}")
.WithEvidence("Connectivity Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("error_message", ex.Message);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Environment connectivity check timed out")
.WithEvidence("Connectivity Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("connection_error_type", "timeout");
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static async Task<ConnectivityInfo> CheckEnvironmentConnectivityAsync(
HttpClient httpClient,
EnvironmentBasicInfo env,
TimeProvider timeProvider,
CancellationToken ct)
{
var result = new ConnectivityInfo
{
EnvironmentName = env.Name,
AgentEndpoint = MaskEndpoint(env.AgentEndpoint)
};
if (string.IsNullOrEmpty(env.AgentEndpoint))
{
result.Reachable = false;
result.ErrorMessage = "No agent endpoint configured";
return result;
}
try
{
var stopwatch = System.Diagnostics.Stopwatch.StartNew();
// Create a handler that captures TLS certificate info
using var handler = new HttpClientHandler();
X509Certificate2? serverCert = null;
handler.ServerCertificateCustomValidationCallback = (message, cert, chain, errors) =>
{
if (cert != null)
{
serverCert = new X509Certificate2(cert);
}
return errors == SslPolicyErrors.None;
};
using var client = new HttpClient(handler) { Timeout = TimeSpan.FromSeconds(10) };
var response = await client.GetAsync(
$"{env.AgentEndpoint.TrimEnd('/')}/health",
ct);
stopwatch.Stop();
result.LatencyMs = (int)stopwatch.ElapsedMilliseconds;
result.Reachable = response.IsSuccessStatusCode;
result.AuthSuccess = response.StatusCode != System.Net.HttpStatusCode.Unauthorized &&
response.StatusCode != System.Net.HttpStatusCode.Forbidden;
if (serverCert != null)
{
result.TlsValid = true;
result.TlsExpiresAt = serverCert.NotAfter;
var now = timeProvider.GetUtcNow().DateTime;
result.TlsDaysUntilExpiry = (int)(serverCert.NotAfter - now).TotalDays;
}
if (result.Reachable)
{
result.LastSuccessfulContact = timeProvider.GetUtcNow();
}
}
catch (HttpRequestException ex)
{
result.Reachable = false;
result.ErrorMessage = ex.Message;
}
catch (TaskCanceledException)
{
result.Reachable = false;
result.ErrorMessage = "Connection timed out";
}
return result;
}
private static void AddPerEnvironmentEvidence(EvidenceBuilder eb, List<ConnectivityInfo> results)
{
foreach (var r in results)
{
var prefix = $"env_{r.EnvironmentName.ToLowerInvariant().Replace(" ", "_").Replace("-", "_")}";
eb.Add($"{prefix}_reachable", r.Reachable.ToString().ToLowerInvariant());
eb.Add($"{prefix}_latency_ms", r.LatencyMs.ToString(CultureInfo.InvariantCulture));
if (r.TlsDaysUntilExpiry.HasValue)
{
eb.Add($"{prefix}_tls_days_until_expiry", r.TlsDaysUntilExpiry.Value.ToString(CultureInfo.InvariantCulture));
}
}
}
private static string MaskEndpoint(string endpoint)
{
if (string.IsNullOrEmpty(endpoint)) return "";
try
{
var uri = new Uri(endpoint);
return $"{uri.Scheme}://{uri.Host}:***";
}
catch
{
return "***";
}
}
private static List<EnvironmentBasicInfo> ParseEnvironments(string json)
{
var envs = new List<EnvironmentBasicInfo>();
try
{
using var doc = JsonDocument.Parse(json);
var envsArray = doc.RootElement.ValueKind == JsonValueKind.Array
? doc.RootElement
: doc.RootElement.TryGetProperty("environments", out var arr) ? arr : default;
if (envsArray.ValueKind != JsonValueKind.Array)
return envs;
foreach (var env in envsArray.EnumerateArray())
{
var name = env.TryGetProperty("name", out var nameEl) ? nameEl.GetString() : null;
var endpoint = env.TryGetProperty("agentEndpoint", out var epEl) ? epEl.GetString() :
env.TryGetProperty("agent_endpoint", out var ep2El) ? ep2El.GetString() : null;
if (!string.IsNullOrEmpty(name))
{
envs.Add(new EnvironmentBasicInfo { Name = name, AgentEndpoint = endpoint ?? "" });
}
}
}
catch { }
return envs;
}
private sealed record EnvironmentBasicInfo
{
public required string Name { get; init; }
public required string AgentEndpoint { get; init; }
}
private sealed class ConnectivityInfo
{
public string EnvironmentName { get; set; } = "";
public string AgentEndpoint { get; set; } = "";
public bool Reachable { get; set; }
public int LatencyMs { get; set; }
public bool AuthSuccess { get; set; }
public bool TlsValid { get; set; }
public DateTime? TlsExpiresAt { get; set; }
public int? TlsDaysUntilExpiry { get; set; }
public string? ErrorMessage { get; set; }
public DateTimeOffset? LastSuccessfulContact { get; set; }
}
}

View File

@@ -0,0 +1,335 @@
// -----------------------------------------------------------------------------
// EnvironmentDeploymentHealthCheck.cs
// Sprint: SPRINT_20260118_017_Doctor_environment_health
// Task: ENVH-005 - Implement EnvironmentDeploymentHealthCheck
// Description: Check deployed service health within environments
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Environment.Checks;
/// <summary>
/// Checks deployed service health within environments.
/// Monitors service status, replica health, and deployment freshness.
/// </summary>
public sealed class EnvironmentDeploymentHealthCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.environment";
private const string CategoryName = "Environment Health";
/// <inheritdoc />
public string CheckId => "check.environment.deployments";
/// <inheritdoc />
public string Name => "Environment Deployment Health";
/// <inheritdoc />
public string Description => "Check deployed service health within environments";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["environment", "deployment", "services", "health"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(15);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"];
return !string.IsNullOrEmpty(orchestratorUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"]
?? "http://localhost:5080";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(15);
// Get deployments across environments
var response = await httpClient.GetAsync(
$"{orchestratorUrl.TrimEnd('/')}/api/v1/environments/deployments",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot retrieve deployments: HTTP {(int)response.StatusCode}")
.WithEvidence("Deployment Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var deploymentsJson = await response.Content.ReadAsStringAsync(ct);
var envDeployments = ParseDeployments(deploymentsJson);
if (envDeployments.Count == 0)
{
return builder
.Pass("No deployments to check")
.WithEvidence("Deployments", eb =>
{
eb.Add("environment_count", "0");
eb.Add("total_services", "0");
})
.Build();
}
var failedServices = new List<(string Env, string Service, string Error)>();
var degradedServices = new List<(string Env, string Service, int Healthy, int Total)>();
var stoppedServices = new List<(string Env, string Service)>();
var totalServices = 0;
foreach (var env in envDeployments)
{
foreach (var svc in env.Services)
{
totalServices++;
if (svc.Status.Equals("failed", StringComparison.OrdinalIgnoreCase))
{
failedServices.Add((env.Name, svc.Name, svc.Error ?? "Unknown error"));
}
else if (svc.Status.Equals("stopped", StringComparison.OrdinalIgnoreCase))
{
stoppedServices.Add((env.Name, svc.Name));
}
else if (svc.Status.Equals("degraded", StringComparison.OrdinalIgnoreCase) ||
(svc.Replicas > 0 && svc.HealthyReplicas < svc.Replicas))
{
degradedServices.Add((env.Name, svc.Name, svc.HealthyReplicas, svc.Replicas));
}
}
}
// Production failures are critical
var prodFailures = failedServices.Where(f => IsProd(f.Env)).ToList();
var hasProdIssue = prodFailures.Count > 0;
if (hasProdIssue)
{
return builder
.Fail($"{prodFailures.Count} production service(s) failed")
.WithEvidence("Deployments", eb =>
{
eb.Add("environment_count", envDeployments.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("total_services", totalServices.ToString(CultureInfo.InvariantCulture));
eb.Add("failed_service_count", failedServices.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("degraded_service_count", degradedServices.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("stopped_service_count", stoppedServices.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("failed_services", string.Join("; ", failedServices.Select(f => $"{f.Env}/{f.Service}")));
eb.Add("prod_failures", string.Join("; ", prodFailures.Select(f => $"{f.Service}:{f.Error}")));
})
.WithCauses(
"Service crashed or failed health checks",
"Deployment rolled out with errors",
"Dependency unavailable",
"Resource exhaustion")
.WithRemediation(rb =>
{
rb.AddStep(1, "View service logs",
$"stella env logs {prodFailures[0].Env} --service {prodFailures[0].Service}",
CommandType.Shell);
rb.AddStep(2, "Restart service",
$"stella env restart {prodFailures[0].Env} --service {prodFailures[0].Service}",
CommandType.Shell);
rb.AddStep(3, "Rollback if needed",
$"stella release rollback --env {prodFailures[0].Env}",
CommandType.Manual);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (failedServices.Count > 0)
{
return builder
.Fail($"{failedServices.Count} service(s) failed")
.WithEvidence("Deployments", eb =>
{
eb.Add("environment_count", envDeployments.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("total_services", totalServices.ToString(CultureInfo.InvariantCulture));
eb.Add("failed_service_count", failedServices.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("degraded_service_count", degradedServices.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("failed_services", string.Join("; ", failedServices.Select(f => $"{f.Env}/{f.Service}")));
})
.WithCauses("Service failure", "Deployment error")
.WithRemediation(rb =>
{
rb.AddStep(1, "View service logs",
$"stella env logs {failedServices[0].Env} --service {failedServices[0].Service}",
CommandType.Shell);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (degradedServices.Count > 0)
{
return builder
.Warn($"{degradedServices.Count} service(s) degraded")
.WithEvidence("Deployments", eb =>
{
eb.Add("environment_count", envDeployments.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("total_services", totalServices.ToString(CultureInfo.InvariantCulture));
eb.Add("failed_service_count", "0");
eb.Add("degraded_service_count", degradedServices.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("degraded_services", string.Join("; ", degradedServices.Select(d => $"{d.Env}/{d.Service}:{d.Healthy}/{d.Total}")));
})
.WithCauses(
"Replica failed health check",
"Scaling in progress",
"Node failure")
.WithRemediation(rb =>
{
rb.AddStep(1, "View service health",
$"stella env health {degradedServices[0].Env} --service {degradedServices[0].Service}",
CommandType.Shell);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (stoppedServices.Count > 0)
{
return builder
.Warn($"{stoppedServices.Count} service(s) stopped")
.WithEvidence("Deployments", eb =>
{
eb.Add("environment_count", envDeployments.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("total_services", totalServices.ToString(CultureInfo.InvariantCulture));
eb.Add("stopped_service_count", stoppedServices.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("stopped_services", string.Join("; ", stoppedServices.Select(s => $"{s.Env}/{s.Service}")));
})
.WithCauses("Service intentionally stopped", "Maintenance mode")
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass($"{totalServices} service(s) healthy across {envDeployments.Count} environment(s)")
.WithEvidence("Deployments", eb =>
{
eb.Add("environment_count", envDeployments.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("total_services", totalServices.ToString(CultureInfo.InvariantCulture));
eb.Add("failed_service_count", "0");
eb.Add("degraded_service_count", "0");
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check deployments: {ex.Message}")
.WithEvidence("Deployment Status", eb =>
{
eb.Add("error_message", ex.Message);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Deployment check timed out")
.WithEvidence("Deployment Status", eb =>
{
eb.Add("connection_error_type", "timeout");
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static bool IsProd(string envName) =>
envName.Contains("prod", StringComparison.OrdinalIgnoreCase) ||
envName.Contains("production", StringComparison.OrdinalIgnoreCase);
private static List<EnvDeployments> ParseDeployments(string json)
{
var result = new List<EnvDeployments>();
try
{
using var doc = JsonDocument.Parse(json);
var envsArray = doc.RootElement.ValueKind == JsonValueKind.Array
? doc.RootElement
: doc.RootElement.TryGetProperty("environments", out var arr) ? arr : default;
if (envsArray.ValueKind != JsonValueKind.Array)
return result;
foreach (var env in envsArray.EnumerateArray())
{
var name = env.TryGetProperty("name", out var nameEl) ? nameEl.GetString() : null;
if (string.IsNullOrEmpty(name)) continue;
var services = new List<ServiceInfo>();
if (env.TryGetProperty("services", out var svcsEl) && svcsEl.ValueKind == JsonValueKind.Array)
{
foreach (var svc in svcsEl.EnumerateArray())
{
var svcName = svc.TryGetProperty("name", out var svcNameEl) ? svcNameEl.GetString() : null;
if (string.IsNullOrEmpty(svcName)) continue;
var status = svc.TryGetProperty("status", out var statEl) ? statEl.GetString() ?? "unknown" : "unknown";
var replicas = svc.TryGetProperty("replicas", out var repEl) ? repEl.GetInt32() : 0;
var healthy = svc.TryGetProperty("healthyReplicas", out var healthEl) ? healthEl.GetInt32() : replicas;
var error = svc.TryGetProperty("error", out var errEl) ? errEl.GetString() : null;
services.Add(new ServiceInfo
{
Name = svcName,
Status = status,
Replicas = replicas,
HealthyReplicas = healthy,
Error = error
});
}
}
result.Add(new EnvDeployments { Name = name, Services = services });
}
}
catch { }
return result;
}
private sealed class EnvDeployments
{
public required string Name { get; init; }
public List<ServiceInfo> Services { get; init; } = [];
}
private sealed class ServiceInfo
{
public required string Name { get; init; }
public required string Status { get; init; }
public int Replicas { get; init; }
public int HealthyReplicas { get; init; }
public string? Error { get; init; }
}
}

View File

@@ -0,0 +1,277 @@
// -----------------------------------------------------------------------------
// EnvironmentDriftCheck.cs
// Sprint: SPRINT_20260118_017_Doctor_environment_health
// Task: ENVH-003 - Implement EnvironmentDriftCheck
// Description: Detect configuration drift between environments
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Environment.Checks;
/// <summary>
/// Detects configuration drift between environments.
/// Compares configuration snapshots and identifies unexpected differences.
/// </summary>
public sealed class EnvironmentDriftCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.environment";
private const string CategoryName = "Environment Health";
/// <inheritdoc />
public string CheckId => "check.environment.drift";
/// <inheritdoc />
public string Name => "Environment Drift Detection";
/// <inheritdoc />
public string Description => "Detect configuration drift between environments";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["environment", "drift", "configuration", "consistency"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(15);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"];
return !string.IsNullOrEmpty(orchestratorUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"]
?? "http://localhost:5080";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(15);
// Get drift report
var response = await httpClient.GetAsync(
$"{orchestratorUrl.TrimEnd('/')}/api/v1/environments/drift",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot retrieve drift report: HTTP {(int)response.StatusCode}")
.WithEvidence("Drift Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var driftJson = await response.Content.ReadAsStringAsync(ct);
var driftReport = ParseDriftReport(driftJson);
if (driftReport.Environments.Count < 2)
{
return builder
.Pass("Drift detection requires at least 2 environments")
.WithEvidence("Drift", eb =>
{
eb.Add("environment_count", driftReport.Environments.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("drift_detected", "false");
})
.Build();
}
var driftedConfigs = driftReport.Drifts
.Where(d => d.IsDrift)
.ToList();
if (driftedConfigs.Count == 0)
{
return builder
.Pass("No configuration drift detected")
.WithEvidence("Drift", eb =>
{
eb.Add("environment_count", driftReport.Environments.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("drift_detected", "false");
eb.Add("configs_checked", driftReport.Drifts.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("environments_checked", string.Join(", ", driftReport.Environments));
})
.Build();
}
// Categorize drifts by severity
var criticalDrifts = driftedConfigs.Where(d => d.Severity == "critical").ToList();
var warningDrifts = driftedConfigs.Where(d => d.Severity != "critical").ToList();
if (criticalDrifts.Count > 0)
{
return builder
.Fail($"{criticalDrifts.Count} critical drift(s) detected")
.WithEvidence("Drift", eb =>
{
eb.Add("environment_count", driftReport.Environments.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("drift_detected", "true");
eb.Add("total_drifts", driftedConfigs.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("critical_drifts", criticalDrifts.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("warning_drifts", warningDrifts.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("drifted_configs", string.Join(", ", driftedConfigs.Select(d => d.ConfigKey)));
eb.Add("affected_environments", string.Join(", ", driftedConfigs.SelectMany(d => d.AffectedEnvironments).Distinct()));
})
.WithCauses(
"Manual configuration change in environment",
"Failed deployment left partial configuration",
"Configuration sync not propagated",
"Environment restored from outdated backup")
.WithRemediation(rb =>
{
rb.AddStep(1, "View drift details",
"stella env drift show",
CommandType.Shell);
rb.AddStep(2, "Reconcile configuration",
$"stella env drift reconcile --from staging --to prod",
CommandType.Manual);
rb.AddStep(3, "Or accept drift as intentional",
$"stella env drift accept {criticalDrifts[0].ConfigKey}",
CommandType.Manual);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Warn($"{warningDrifts.Count} configuration drift(s) detected")
.WithEvidence("Drift", eb =>
{
eb.Add("environment_count", driftReport.Environments.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("drift_detected", "true");
eb.Add("total_drifts", driftedConfigs.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("critical_drifts", "0");
eb.Add("warning_drifts", warningDrifts.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("drifted_configs", string.Join(", ", driftedConfigs.Select(d => d.ConfigKey)));
})
.WithCauses(
"Expected environment-specific differences",
"Configuration update in progress",
"Intentional environment variation")
.WithRemediation(rb =>
{
rb.AddStep(1, "Review drift report",
"stella env drift show",
CommandType.Shell);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check drift: {ex.Message}")
.WithEvidence("Drift Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("error_message", ex.Message);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Drift check timed out")
.WithEvidence("Drift Status", eb =>
{
eb.Add("connection_error_type", "timeout");
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static DriftReport ParseDriftReport(string json)
{
var report = new DriftReport();
try
{
using var doc = JsonDocument.Parse(json);
if (doc.RootElement.TryGetProperty("environments", out var envsEl) &&
envsEl.ValueKind == JsonValueKind.Array)
{
foreach (var env in envsEl.EnumerateArray())
{
var name = env.GetString();
if (!string.IsNullOrEmpty(name))
report.Environments.Add(name);
}
}
if (doc.RootElement.TryGetProperty("drifts", out var driftsEl) &&
driftsEl.ValueKind == JsonValueKind.Array)
{
foreach (var drift in driftsEl.EnumerateArray())
{
var configKey = drift.TryGetProperty("configKey", out var keyEl) ? keyEl.GetString() : null;
var isDrift = drift.TryGetProperty("isDrift", out var driftEl) && driftEl.GetBoolean();
var severity = drift.TryGetProperty("severity", out var sevEl) ? sevEl.GetString() ?? "warning" : "warning";
var affected = new List<string>();
if (drift.TryGetProperty("affectedEnvironments", out var affEl) &&
affEl.ValueKind == JsonValueKind.Array)
{
foreach (var env in affEl.EnumerateArray())
{
var name = env.GetString();
if (!string.IsNullOrEmpty(name))
affected.Add(name);
}
}
if (!string.IsNullOrEmpty(configKey))
{
report.Drifts.Add(new DriftInfo
{
ConfigKey = configKey,
IsDrift = isDrift,
Severity = severity,
AffectedEnvironments = affected
});
}
}
}
}
catch { }
return report;
}
private sealed class DriftReport
{
public List<string> Environments { get; } = [];
public List<DriftInfo> Drifts { get; } = [];
}
private sealed class DriftInfo
{
public required string ConfigKey { get; init; }
public bool IsDrift { get; init; }
public string Severity { get; init; } = "warning";
public List<string> AffectedEnvironments { get; init; } = [];
}
}

View File

@@ -0,0 +1,328 @@
// -----------------------------------------------------------------------------
// EnvironmentNetworkPolicyCheck.cs
// Sprint: SPRINT_20260118_017_Doctor_environment_health
// Task: ENVH-006 - Implement EnvironmentNetworkPolicyCheck
// Description: Verify network policies between environments
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Environment.Checks;
/// <summary>
/// Verifies network policies between environments.
/// Checks environment isolation, allowed ingress/egress, and policy consistency.
/// </summary>
public sealed class EnvironmentNetworkPolicyCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.environment";
private const string CategoryName = "Environment Health";
/// <inheritdoc />
public string CheckId => "check.environment.network.policy";
/// <inheritdoc />
public string Name => "Environment Network Policy";
/// <inheritdoc />
public string Description => "Verify network policies between environments";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["environment", "network", "policy", "security", "isolation"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(10);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"];
return !string.IsNullOrEmpty(orchestratorUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"]
?? "http://localhost:5080";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(10);
// Get network policies
var response = await httpClient.GetAsync(
$"{orchestratorUrl.TrimEnd('/')}/api/v1/environments/network-policies",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot retrieve network policies: HTTP {(int)response.StatusCode}")
.WithEvidence("Network Policy Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var policiesJson = await response.Content.ReadAsStringAsync(ct);
var policies = ParseNetworkPolicies(policiesJson);
if (policies.Count == 0)
{
return builder
.Warn("No network policies configured")
.WithEvidence("Network Policies", eb =>
{
eb.Add("policy_count", "0");
eb.Add("isolation_enforced", "false");
})
.WithCauses("Network policies not yet defined")
.WithRemediation(rb => rb
.AddStep(1, "Configure network isolation",
"stella env network-policy create --default-deny",
CommandType.Manual))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var violations = new List<PolicyViolation>();
// Check for production isolation
var prodEnvs = policies.Where(p => IsProd(p.Environment)).ToList();
foreach (var prod in prodEnvs)
{
// Production should not have ingress from dev
var devIngress = prod.AllowedIngress.Any(i =>
i.Contains("dev", StringComparison.OrdinalIgnoreCase) &&
!i.Contains("devops", StringComparison.OrdinalIgnoreCase));
if (devIngress)
{
violations.Add(new PolicyViolation
{
Environment = prod.Environment,
ViolationType = "prod_dev_ingress",
Message = "Production allows ingress from dev environment",
Severity = "critical"
});
}
// Production should have explicit deny-all with allowlist
if (!prod.DefaultDeny)
{
violations.Add(new PolicyViolation
{
Environment = prod.Environment,
ViolationType = "prod_no_default_deny",
Message = "Production does not have default-deny policy",
Severity = "warning"
});
}
}
// Check for overly permissive policies
foreach (var policy in policies)
{
if (policy.AllowedIngress.Any(i => i == "*" || i == "0.0.0.0/0"))
{
violations.Add(new PolicyViolation
{
Environment = policy.Environment,
ViolationType = "open_ingress",
Message = "Environment allows ingress from any source",
Severity = IsProd(policy.Environment) ? "critical" : "warning"
});
}
if (policy.AllowedEgress.Any(e => e == "*" || e == "0.0.0.0/0"))
{
violations.Add(new PolicyViolation
{
Environment = policy.Environment,
ViolationType = "open_egress",
Message = "Environment allows egress to any destination",
Severity = "info"
});
}
}
var criticalViolations = violations.Where(v => v.Severity == "critical").ToList();
var warningViolations = violations.Where(v => v.Severity == "warning").ToList();
if (criticalViolations.Count > 0)
{
return builder
.Fail($"{criticalViolations.Count} critical network policy violation(s)")
.WithEvidence("Network Policies", eb =>
{
eb.Add("policy_count", policies.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("violation_count", violations.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("critical_violations", criticalViolations.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("warning_violations", warningViolations.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("violations", string.Join("; ", violations.Select(v => $"{v.Environment}:{v.ViolationType}")));
})
.WithCauses(
"Overly permissive network policy",
"Production not properly isolated",
"Legacy policy not updated")
.WithRemediation(rb =>
{
rb.AddStep(1, "Review network policies",
"stella env network-policy list",
CommandType.Shell);
rb.AddStep(2, "Fix production isolation",
$"stella env network-policy update {criticalViolations[0].Environment} --default-deny --allow-from staging",
CommandType.Manual);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (warningViolations.Count > 0)
{
return builder
.Warn($"{warningViolations.Count} network policy warning(s)")
.WithEvidence("Network Policies", eb =>
{
eb.Add("policy_count", policies.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("violation_count", violations.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("critical_violations", "0");
eb.Add("warning_violations", warningViolations.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("violations", string.Join("; ", violations.Select(v => $"{v.Environment}:{v.ViolationType}")));
})
.WithCauses("Policy could be more restrictive")
.WithRemediation(rb => rb
.AddStep(1, "Review policy recommendations",
"stella env network-policy audit",
CommandType.Shell))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass($"{policies.Count} network policies configured correctly")
.WithEvidence("Network Policies", eb =>
{
eb.Add("policy_count", policies.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("violation_count", "0");
eb.Add("environments_with_default_deny", policies.Count(p => p.DefaultDeny).ToString(CultureInfo.InvariantCulture));
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check network policies: {ex.Message}")
.WithEvidence("Network Policy Status", eb =>
{
eb.Add("error_message", ex.Message);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Network policy check timed out")
.WithEvidence("Network Policy Status", eb =>
{
eb.Add("connection_error_type", "timeout");
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static bool IsProd(string envName) =>
envName.Contains("prod", StringComparison.OrdinalIgnoreCase) ||
envName.Contains("production", StringComparison.OrdinalIgnoreCase);
private static List<NetworkPolicy> ParseNetworkPolicies(string json)
{
var policies = new List<NetworkPolicy>();
try
{
using var doc = JsonDocument.Parse(json);
var policiesArray = doc.RootElement.ValueKind == JsonValueKind.Array
? doc.RootElement
: doc.RootElement.TryGetProperty("policies", out var arr) ? arr : default;
if (policiesArray.ValueKind != JsonValueKind.Array)
return policies;
foreach (var policy in policiesArray.EnumerateArray())
{
var env = policy.TryGetProperty("environment", out var envEl) ? envEl.GetString() : null;
if (string.IsNullOrEmpty(env)) continue;
var defaultDeny = policy.TryGetProperty("defaultDeny", out var denyEl) && denyEl.GetBoolean();
var ingress = new List<string>();
if (policy.TryGetProperty("allowedIngress", out var ingressEl) && ingressEl.ValueKind == JsonValueKind.Array)
{
foreach (var item in ingressEl.EnumerateArray())
{
var val = item.GetString();
if (!string.IsNullOrEmpty(val)) ingress.Add(val);
}
}
var egress = new List<string>();
if (policy.TryGetProperty("allowedEgress", out var egressEl) && egressEl.ValueKind == JsonValueKind.Array)
{
foreach (var item in egressEl.EnumerateArray())
{
var val = item.GetString();
if (!string.IsNullOrEmpty(val)) egress.Add(val);
}
}
policies.Add(new NetworkPolicy
{
Environment = env,
DefaultDeny = defaultDeny,
AllowedIngress = ingress,
AllowedEgress = egress
});
}
}
catch { }
return policies;
}
private sealed class NetworkPolicy
{
public required string Environment { get; init; }
public bool DefaultDeny { get; init; }
public List<string> AllowedIngress { get; init; } = [];
public List<string> AllowedEgress { get; init; } = [];
}
private sealed class PolicyViolation
{
public required string Environment { get; init; }
public required string ViolationType { get; init; }
public required string Message { get; init; }
public string Severity { get; init; } = "warning";
}
}

View File

@@ -0,0 +1,335 @@
// -----------------------------------------------------------------------------
// EnvironmentSecretHealthCheck.cs
// Sprint: SPRINT_20260118_017_Doctor_environment_health
// Task: ENVH-007 - Implement EnvironmentSecretHealthCheck
// Description: Check secrets health for environments
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Environment.Checks;
/// <summary>
/// Checks secrets health for environments.
/// Monitors secret expiry, rotation status, and access patterns.
/// </summary>
public sealed class EnvironmentSecretHealthCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.environment";
private const string CategoryName = "Environment Health";
private const int ExpiryWarningDays = 30;
private const int ExpiryFailDays = 7;
/// <inheritdoc />
public string CheckId => "check.environment.secrets";
/// <inheritdoc />
public string Name => "Environment Secret Health";
/// <inheritdoc />
public string Description => "Check secrets health for environments";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["environment", "secrets", "security", "rotation", "expiry"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(10);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"];
return !string.IsNullOrEmpty(orchestratorUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"]
?? "http://localhost:5080";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(10);
// Get secrets status (metadata only, no actual secret values)
var response = await httpClient.GetAsync(
$"{orchestratorUrl.TrimEnd('/')}/api/v1/environments/secrets/status",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot retrieve secrets status: HTTP {(int)response.StatusCode}")
.WithEvidence("Secret Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var secretsJson = await response.Content.ReadAsStringAsync(ct);
var secretsStatus = ParseSecretsStatus(secretsJson);
if (secretsStatus.Secrets.Count == 0)
{
return builder
.Pass("No secrets configured")
.WithEvidence("Secrets", eb =>
{
eb.Add("total_secrets", "0");
})
.Build();
}
var now = context.TimeProvider.GetUtcNow();
var expired = new List<SecretInfo>();
var expiringCritical = new List<SecretInfo>();
var expiringWarning = new List<SecretInfo>();
var rotationOverdue = new List<SecretInfo>();
foreach (var secret in secretsStatus.Secrets)
{
if (secret.ExpiresAt.HasValue)
{
var daysUntilExpiry = (secret.ExpiresAt.Value - now).TotalDays;
if (daysUntilExpiry <= 0)
{
expired.Add(secret);
}
else if (daysUntilExpiry <= ExpiryFailDays)
{
expiringCritical.Add(secret);
}
else if (daysUntilExpiry <= ExpiryWarningDays)
{
expiringWarning.Add(secret);
}
}
if (secret.RotationPolicy != null && secret.LastRotated.HasValue)
{
var daysSinceRotation = (now - secret.LastRotated.Value).TotalDays;
if (daysSinceRotation > secret.RotationPolicy.RotationIntervalDays * 1.1) // 10% grace
{
rotationOverdue.Add(secret);
}
}
}
// Check for critical issues (production secrets)
var prodExpired = expired.Where(s => IsProd(s.Environment)).ToList();
var prodExpiringCritical = expiringCritical.Where(s => IsProd(s.Environment)).ToList();
if (prodExpired.Count > 0)
{
return builder
.Fail($"{prodExpired.Count} production secret(s) EXPIRED")
.WithEvidence("Secrets", eb =>
{
eb.Add("total_secrets", secretsStatus.Secrets.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("expired_count", expired.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("expiring_critical_count", expiringCritical.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("expiring_warning_count", expiringWarning.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("rotation_overdue_count", rotationOverdue.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("expired_secrets", string.Join(", ", expired.Select(s => $"{s.Environment}/{s.Name}")));
})
.WithCauses(
"Secret expired without rotation",
"Rotation job failed",
"Secret provider connection lost")
.WithRemediation(rb =>
{
rb.AddStep(1, "Rotate expired secret immediately",
$"stella env secrets rotate {prodExpired[0].Environment} {prodExpired[0].Name}",
CommandType.Shell);
rb.AddStep(2, "Check secret provider status",
"stella secrets provider status",
CommandType.Shell);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (expired.Count > 0 || prodExpiringCritical.Count > 0)
{
return builder
.Fail($"{expired.Count} expired, {prodExpiringCritical.Count} production critical")
.WithEvidence("Secrets", eb =>
{
eb.Add("total_secrets", secretsStatus.Secrets.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("expired_count", expired.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("expiring_critical_count", expiringCritical.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("expiring_warning_count", expiringWarning.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("expired_secrets", string.Join(", ", expired.Select(s => $"{s.Environment}/{s.Name}")));
})
.WithCauses("Secrets expired or expiring soon")
.WithRemediation(rb =>
{
if (expired.Count > 0)
{
rb.AddStep(1, "Rotate expired secret",
$"stella env secrets rotate {expired[0].Environment} {expired[0].Name}",
CommandType.Shell);
}
if (expiringCritical.Count > 0)
{
rb.AddStep(2, "Schedule rotation for expiring secrets",
"stella env secrets rotate-scheduled --days 7",
CommandType.Manual);
}
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (expiringCritical.Count > 0 || expiringWarning.Count > 0 || rotationOverdue.Count > 0)
{
var issues = new List<string>();
if (expiringCritical.Count > 0) issues.Add($"{expiringCritical.Count} expiring within 7 days");
if (expiringWarning.Count > 0) issues.Add($"{expiringWarning.Count} expiring within 30 days");
if (rotationOverdue.Count > 0) issues.Add($"{rotationOverdue.Count} rotation overdue");
return builder
.Warn(string.Join(", ", issues))
.WithEvidence("Secrets", eb =>
{
eb.Add("total_secrets", secretsStatus.Secrets.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("expired_count", "0");
eb.Add("expiring_critical_count", expiringCritical.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("expiring_warning_count", expiringWarning.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("rotation_overdue_count", rotationOverdue.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("expiring_secrets", string.Join(", ", expiringCritical.Concat(expiringWarning).Select(s => s.Name)));
})
.WithCauses("Secrets expiring soon or rotation overdue")
.WithRemediation(rb => rb
.AddStep(1, "View secrets status",
"stella env secrets list --expiring",
CommandType.Shell))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass($"{secretsStatus.Secrets.Count} secret(s) healthy")
.WithEvidence("Secrets", eb =>
{
eb.Add("total_secrets", secretsStatus.Secrets.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("expired_count", "0");
eb.Add("expiring_critical_count", "0");
eb.Add("expiring_warning_count", "0");
eb.Add("rotation_overdue_count", "0");
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check secrets: {ex.Message}")
.WithEvidence("Secret Status", eb =>
{
eb.Add("error_message", ex.Message);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Secret check timed out")
.WithEvidence("Secret Status", eb =>
{
eb.Add("connection_error_type", "timeout");
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static bool IsProd(string envName) =>
envName.Contains("prod", StringComparison.OrdinalIgnoreCase) ||
envName.Contains("production", StringComparison.OrdinalIgnoreCase);
private static SecretsStatus ParseSecretsStatus(string json)
{
var status = new SecretsStatus();
try
{
using var doc = JsonDocument.Parse(json);
var secretsArray = doc.RootElement.ValueKind == JsonValueKind.Array
? doc.RootElement
: doc.RootElement.TryGetProperty("secrets", out var arr) ? arr : default;
if (secretsArray.ValueKind != JsonValueKind.Array)
return status;
foreach (var secret in secretsArray.EnumerateArray())
{
var name = secret.TryGetProperty("name", out var nameEl) ? nameEl.GetString() : null;
var env = secret.TryGetProperty("environment", out var envEl) ? envEl.GetString() : null;
if (string.IsNullOrEmpty(name) || string.IsNullOrEmpty(env)) continue;
var expiresAt = secret.TryGetProperty("expiresAt", out var expEl) &&
DateTimeOffset.TryParse(expEl.GetString(), out var expDt) ? expDt : (DateTimeOffset?)null;
var lastRotated = secret.TryGetProperty("lastRotated", out var rotEl) &&
DateTimeOffset.TryParse(rotEl.GetString(), out var rotDt) ? rotDt : (DateTimeOffset?)null;
RotationPolicy? rotationPolicy = null;
if (secret.TryGetProperty("rotationPolicy", out var policyEl) && policyEl.ValueKind == JsonValueKind.Object)
{
var intervalDays = policyEl.TryGetProperty("intervalDays", out var intEl) ? intEl.GetInt32() : 90;
rotationPolicy = new RotationPolicy { RotationIntervalDays = intervalDays };
}
status.Secrets.Add(new SecretInfo
{
Name = name,
Environment = env,
ExpiresAt = expiresAt,
LastRotated = lastRotated,
RotationPolicy = rotationPolicy
});
}
}
catch { }
return status;
}
private sealed class SecretsStatus
{
public List<SecretInfo> Secrets { get; } = [];
}
private sealed class SecretInfo
{
public required string Name { get; init; }
public required string Environment { get; init; }
public DateTimeOffset? ExpiresAt { get; init; }
public DateTimeOffset? LastRotated { get; init; }
public RotationPolicy? RotationPolicy { get; init; }
}
private sealed class RotationPolicy
{
public int RotationIntervalDays { get; init; } = 90;
}
}

View File

@@ -0,0 +1,29 @@
// -----------------------------------------------------------------------------
// EnvironmentPluginServiceCollectionExtensions.cs
// Sprint: SPRINT_20260118_017_Doctor_environment_health
// Task: ENVH-001 - Create Environment plugin scaffold
// Description: Extension methods for registering the Environment plugin
// -----------------------------------------------------------------------------
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Environment.DependencyInjection;
/// <summary>
/// Extension methods for registering the Environment Doctor plugin.
/// </summary>
public static class EnvironmentPluginServiceCollectionExtensions
{
/// <summary>
/// Adds the Environment health Doctor plugin.
/// Provides checks for environment connectivity, drift, capacity, deployments, and secrets.
/// </summary>
/// <param name="services">The service collection.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddDoctorEnvironmentPlugin(this IServiceCollection services)
{
services.AddSingleton<IDoctorPlugin, EnvironmentDoctorPlugin>();
return services;
}
}

View File

@@ -0,0 +1,63 @@
// -----------------------------------------------------------------------------
// EnvironmentDoctorPlugin.cs
// Sprint: SPRINT_20260118_017_Doctor_environment_health
// Task: ENVH-001 - Create Environment plugin scaffold
// Description: Doctor plugin for per-environment health monitoring
// -----------------------------------------------------------------------------
using StellaOps.Doctor.Plugin.Environment.Checks;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Environment;
/// <summary>
/// Doctor plugin for per-environment health checks.
/// Monitors environment connectivity, drift, capacity, deployment health, and secrets.
/// </summary>
public sealed class EnvironmentDoctorPlugin : IDoctorPlugin
{
private static readonly Version PluginVersion = new(1, 0, 0);
private static readonly Version MinVersion = new(1, 0, 0);
/// <inheritdoc />
public string PluginId => "stellaops.doctor.environment";
/// <inheritdoc />
public string DisplayName => "Environment Health";
/// <inheritdoc />
public DoctorCategory Category => DoctorCategory.Environment;
/// <inheritdoc />
public Version Version => PluginVersion;
/// <inheritdoc />
public Version MinEngineVersion => MinVersion;
/// <inheritdoc />
public bool IsAvailable(IServiceProvider services)
{
// Available when environments are configured
return true;
}
/// <inheritdoc />
public IReadOnlyList<IDoctorCheck> GetChecks(DoctorPluginContext context)
{
return new IDoctorCheck[]
{
new EnvironmentConnectivityCheck(),
new EnvironmentDriftCheck(),
new EnvironmentCapacityCheck(),
new EnvironmentDeploymentHealthCheck(),
new EnvironmentNetworkPolicyCheck(),
new EnvironmentSecretHealthCheck()
};
}
/// <inheritdoc />
public Task InitializeAsync(DoctorPluginContext context, CancellationToken ct)
{
return Task.CompletedTask;
}
}

View File

@@ -0,0 +1,110 @@
// -----------------------------------------------------------------------------
// IEnvironmentHealthClient.cs
// Sprint: SPRINT_20260118_017_Doctor_environment_health
// Task: ENVH-001 - Create Environment plugin scaffold
// Description: Interface for querying environment health status
// -----------------------------------------------------------------------------
namespace StellaOps.Doctor.Plugin.Environment.Services;
/// <summary>
/// Client interface for querying environment health status.
/// </summary>
public interface IEnvironmentHealthClient
{
/// <summary>
/// Gets all configured environments.
/// </summary>
Task<IReadOnlyList<EnvironmentInfo>> GetEnvironmentsAsync(CancellationToken ct = default);
/// <summary>
/// Checks connectivity to an environment agent.
/// </summary>
Task<ConnectivityResult> CheckConnectivityAsync(string environmentId, CancellationToken ct = default);
/// <summary>
/// Gets deployed services for an environment.
/// </summary>
Task<IReadOnlyList<DeployedService>> GetDeployedServicesAsync(string environmentId, CancellationToken ct = default);
/// <summary>
/// Gets resource capacity for an environment.
/// </summary>
Task<CapacityInfo> GetCapacityAsync(string environmentId, CancellationToken ct = default);
/// <summary>
/// Gets configuration hash for drift detection.
/// </summary>
Task<ConfigurationSnapshot> GetConfigurationSnapshotAsync(string environmentId, CancellationToken ct = default);
}
/// <summary>
/// Basic environment information.
/// </summary>
public sealed record EnvironmentInfo
{
public required string Id { get; init; }
public required string Name { get; init; }
public required string Type { get; init; } // dev, staging, prod
public required string AgentEndpoint { get; init; }
public bool IsActive { get; init; } = true;
public IDictionary<string, string> Labels { get; init; } = new Dictionary<string, string>();
}
/// <summary>
/// Result of connectivity check.
/// </summary>
public sealed record ConnectivityResult
{
public required bool Reachable { get; init; }
public required int LatencyMs { get; init; }
public required bool AuthSuccess { get; init; }
public required bool TlsValid { get; init; }
public DateTimeOffset? TlsExpiresAt { get; init; }
public int? TlsDaysUntilExpiry { get; init; }
public string? ErrorMessage { get; init; }
public DateTimeOffset? LastSuccessfulContact { get; init; }
}
/// <summary>
/// Deployed service within an environment.
/// </summary>
public sealed record DeployedService
{
public required string Id { get; init; }
public required string Name { get; init; }
public required string Version { get; init; }
public required string Status { get; init; } // running, stopped, failed, degraded
public int Replicas { get; init; }
public int HealthyReplicas { get; init; }
public DateTimeOffset? LastDeployedAt { get; init; }
public string? Error { get; init; }
}
/// <summary>
/// Resource capacity information.
/// </summary>
public sealed record CapacityInfo
{
public required long TotalCpuMillicores { get; init; }
public required long UsedCpuMillicores { get; init; }
public required long TotalMemoryBytes { get; init; }
public required long UsedMemoryBytes { get; init; }
public required long TotalStorageBytes { get; init; }
public required long UsedStorageBytes { get; init; }
public int TotalNodes { get; init; }
public int HealthyNodes { get; init; }
public int MaxConcurrentDeployments { get; init; }
public int ActiveDeployments { get; init; }
}
/// <summary>
/// Configuration snapshot for drift detection.
/// </summary>
public sealed record ConfigurationSnapshot
{
public required string EnvironmentId { get; init; }
public required string ConfigHash { get; init; }
public required DateTimeOffset CapturedAt { get; init; }
public IDictionary<string, string> ConfigValues { get; init; } = new Dictionary<string, string>();
}

View File

@@ -0,0 +1,17 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<RootNamespace>StellaOps.Doctor.Plugin.Environment</RootNamespace>
<Description>Environment health checks for Stella Ops Doctor diagnostics</Description>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Doctor\StellaOps.Doctor.csproj" />
</ItemGroup>
</Project>

View File

@@ -1,13 +1,16 @@
// -----------------------------------------------------------------------------
// PolicyEngineHealthCheck.cs
// Sprint: SPRINT_20260117_010_CLI_policy_engine
// Task: PEN-005 - Doctor check for policy engine health
// Sprint: SPRINT_20260118_015_Doctor_check_quality_improvements
// Task: DQUAL-001 - Replace PolicyEngineHealthCheck mock implementation
// Description: Health check for policy engine compilation, evaluation, and storage
// -----------------------------------------------------------------------------
using System.Diagnostics;
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
@@ -18,6 +21,11 @@ namespace StellaOps.Doctor.Plugin.Policy.Checks;
/// </summary>
public sealed class PolicyEngineHealthCheck : IDoctorCheck
{
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
{
PropertyNameCaseInsensitive = true
};
/// <inheritdoc />
public string CheckId => "check.policy.engine";
@@ -39,7 +47,10 @@ public sealed class PolicyEngineHealthCheck : IDoctorCheck
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
return true;
// Check if policy engine URL is configured
var policyEngineUrl = context.Configuration["Policy:Engine:Url"]
?? context.Configuration["PolicyEngine:BaseUrl"];
return !string.IsNullOrEmpty(policyEngineUrl);
}
/// <inheritdoc />
@@ -47,128 +58,341 @@ public sealed class PolicyEngineHealthCheck : IDoctorCheck
{
var builder = context.CreateResult(CheckId, "stellaops.doctor.policy", "Policy");
var compilationResult = await CheckCompilationAsync(context, ct);
var evaluationResult = await CheckEvaluationAsync(context, ct);
var storageResult = await CheckStorageAsync(context, ct);
var policyEngineUrl = context.Configuration["Policy:Engine:Url"]
?? context.Configuration["PolicyEngine:BaseUrl"]
?? "http://localhost:8181";
// Aggregate results
var allPassed = compilationResult.Passed && evaluationResult.Passed && storageResult.Passed;
var hasWarnings = compilationResult.HasWarnings || evaluationResult.HasWarnings || storageResult.HasWarnings;
if (!allPassed)
try
{
var failedChecks = new List<string>();
if (!compilationResult.Passed) failedChecks.Add("compilation");
if (!evaluationResult.Passed) failedChecks.Add("evaluation");
if (!storageResult.Passed) failedChecks.Add("storage");
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(10);
var compilationResult = await CheckCompilationAsync(httpClient, policyEngineUrl, ct);
var evaluationResult = await CheckEvaluationAsync(httpClient, policyEngineUrl, ct);
var storageResult = await CheckStorageAsync(httpClient, policyEngineUrl, ct);
// Aggregate results
var allPassed = compilationResult.Passed && evaluationResult.Passed && storageResult.Passed;
var hasWarnings = compilationResult.HasWarnings || evaluationResult.HasWarnings || storageResult.HasWarnings;
if (!allPassed)
{
var failedChecks = new List<string>();
if (!compilationResult.Passed) failedChecks.Add("compilation");
if (!evaluationResult.Passed) failedChecks.Add("evaluation");
if (!storageResult.Passed) failedChecks.Add("storage");
return builder
.Fail($"Policy engine health check failed: {string.Join(", ", failedChecks)}")
.WithEvidence("Engine Status", eb =>
{
eb.Add("engine_type", compilationResult.EngineType ?? "unknown");
eb.Add("engine_version", compilationResult.EngineVersion ?? "unknown");
eb.Add("engine_url", policyEngineUrl);
eb.Add("compilation_status", compilationResult.Passed ? "OK" : "FAILED");
eb.Add("evaluation_status", evaluationResult.Passed ? "OK" : "FAILED");
eb.Add("storage_status", storageResult.Passed ? "OK" : "FAILED");
eb.Add("policy_count", compilationResult.PolicyCount.ToString(CultureInfo.InvariantCulture));
eb.Add("compilation_time_ms", compilationResult.CompilationTimeMs.ToString(CultureInfo.InvariantCulture));
eb.Add("evaluation_latency_p50_ms", evaluationResult.EvaluationTimeMs.ToString(CultureInfo.InvariantCulture));
eb.Add("cache_hit_ratio", compilationResult.CacheHitRatio.ToString("F2", CultureInfo.InvariantCulture));
if (!string.IsNullOrEmpty(compilationResult.LastCompilationError))
{
eb.Add("last_compilation_error", compilationResult.LastCompilationError);
}
if (!string.IsNullOrEmpty(evaluationResult.Error))
{
eb.Add("evaluation_error", evaluationResult.Error);
}
if (!string.IsNullOrEmpty(storageResult.Error))
{
eb.Add("storage_error", storageResult.Error);
}
})
.WithCauses(
"Policy engine service not running",
"Policy storage unavailable",
"OPA/Rego compilation error",
"Policy cache corrupted")
.WithRemediation(rb => rb
.AddStep(1, "Check policy engine service status",
"stella policy status",
CommandType.Shell)
.AddStep(2, "Verify policy storage connectivity",
"stella doctor --check check.storage.postgres",
CommandType.Shell)
.AddStep(3, "Recompile policies",
"stella policy compile --all",
CommandType.Shell))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (hasWarnings)
{
return builder
.Warn("Policy engine health check passed with warnings")
.WithEvidence("Engine Status", eb =>
{
eb.Add("engine_type", compilationResult.EngineType ?? "unknown");
eb.Add("engine_version", compilationResult.EngineVersion ?? "unknown");
eb.Add("engine_url", policyEngineUrl);
eb.Add("compilation_status", "OK");
eb.Add("evaluation_status", "OK");
eb.Add("storage_status", "OK");
eb.Add("policy_count", compilationResult.PolicyCount.ToString(CultureInfo.InvariantCulture));
eb.Add("evaluation_latency_p50_ms", evaluationResult.EvaluationTimeMs.ToString(CultureInfo.InvariantCulture));
eb.Add("cache_hit_ratio", compilationResult.CacheHitRatio.ToString("F2", CultureInfo.InvariantCulture));
if (evaluationResult.EvaluationTimeMs > 100)
{
eb.Add("performance_warning", "SLOW - evaluation time exceeds 100ms threshold");
}
})
.WithCauses(
"Policy evaluation is slower than expected",
"Policy cache may need warming")
.WithRemediation(rb => rb
.AddStep(1, "Warm policy cache",
"stella policy cache warm",
CommandType.Shell)
.AddStep(2, "Check for complex policies",
"stella policy list --complexity high",
CommandType.Shell))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Fail($"Policy engine health check failed: {string.Join(", ", failedChecks)}")
.Pass("Policy engine is healthy")
.WithEvidence("Engine Status", eb =>
{
eb.Add("Compilation", compilationResult.Passed ? "OK" : "FAILED");
eb.Add("Evaluation", evaluationResult.Passed ? "OK" : "FAILED");
eb.Add("Storage", storageResult.Passed ? "OK" : "FAILED");
eb.Add("EvaluationTimeMs", evaluationResult.EvaluationTimeMs.ToString(CultureInfo.InvariantCulture));
eb.Add("engine_type", compilationResult.EngineType ?? "unknown");
eb.Add("engine_version", compilationResult.EngineVersion ?? "unknown");
eb.Add("engine_url", policyEngineUrl);
eb.Add("compilation_status", "OK");
eb.Add("evaluation_status", "OK");
eb.Add("storage_status", "OK");
eb.Add("policy_count", compilationResult.PolicyCount.ToString(CultureInfo.InvariantCulture));
eb.Add("compilation_time_ms", compilationResult.CompilationTimeMs.ToString(CultureInfo.InvariantCulture));
eb.Add("evaluation_latency_p50_ms", evaluationResult.EvaluationTimeMs.ToString(CultureInfo.InvariantCulture));
eb.Add("cache_hit_ratio", compilationResult.CacheHitRatio.ToString("F2", CultureInfo.InvariantCulture));
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Fail($"Cannot reach policy engine at {policyEngineUrl}")
.WithEvidence("Engine Status", eb =>
{
eb.Add("engine_url", policyEngineUrl);
eb.Add("connection_error_type", GetConnectionErrorType(ex));
eb.Add("error_message", ex.Message);
})
.WithCauses(
"Policy engine service not running",
"Policy storage unavailable",
"OPA/Rego compilation error",
"Policy cache corrupted")
"Network connectivity issue",
"Firewall blocking access",
"DNS resolution failure")
.WithRemediation(rb => rb
.AddStep(1, "Check policy engine service status",
"stella policy status",
CommandType.Shell)
.AddStep(2, "Verify policy storage connectivity",
"stella doctor --check check.storage.postgres",
CommandType.Shell)
.AddStep(3, "Recompile policies",
"stella policy compile --all",
.AddStep(2, "Verify network connectivity",
$"curl -s {policyEngineUrl}/health",
CommandType.Shell))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (hasWarnings)
catch (TaskCanceledException)
{
return builder
.Warn("Policy engine health check passed with warnings")
.Fail($"Policy engine request timed out ({policyEngineUrl})")
.WithEvidence("Engine Status", eb =>
{
eb.Add("Compilation", "OK");
eb.Add("Evaluation", "OK");
eb.Add("Storage", "OK");
eb.Add("EvaluationTimeMs", evaluationResult.EvaluationTimeMs.ToString(CultureInfo.InvariantCulture));
if (evaluationResult.EvaluationTimeMs > 100)
{
eb.Add("Performance", "SLOW - evaluation time exceeds 100ms threshold");
}
eb.Add("engine_url", policyEngineUrl);
eb.Add("connection_error_type", "timeout");
eb.Add("timeout_seconds", "10");
})
.WithCauses(
"Policy evaluation is slower than expected",
"Policy cache may need warming")
"Policy engine overloaded",
"Network latency too high",
"Policy engine deadlocked")
.WithRemediation(rb => rb
.AddStep(1, "Warm policy cache",
"stella policy cache warm",
.AddStep(1, "Check policy engine metrics",
"stella policy metrics",
CommandType.Shell)
.AddStep(2, "Check for complex policies",
"stella policy list --complexity high",
CommandType.Shell))
.AddStep(2, "Restart policy engine if needed",
"stella policy restart",
CommandType.Manual))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass("Policy engine is healthy")
.WithEvidence("Engine Status", eb =>
{
eb.Add("Compilation", "OK");
eb.Add("Evaluation", "OK");
eb.Add("Storage", "OK");
eb.Add("EvaluationTimeMs", evaluationResult.EvaluationTimeMs.ToString(CultureInfo.InvariantCulture));
eb.Add("PolicyCount", compilationResult.PolicyCount.ToString(CultureInfo.InvariantCulture));
})
.Build();
}
private Task<CompilationCheckResult> CheckCompilationAsync(DoctorPluginContext context, CancellationToken ct)
private static string GetConnectionErrorType(HttpRequestException ex)
{
// Simulate compilation check
return Task.FromResult(new CompilationCheckResult
{
Passed = true,
PolicyCount = 12,
CompilationTimeMs = 45
});
var message = ex.Message.ToLowerInvariant();
if (message.Contains("ssl") || message.Contains("tls") || message.Contains("certificate"))
return "ssl_error";
if (message.Contains("name") || message.Contains("dns") || message.Contains("resolve"))
return "dns_failure";
if (message.Contains("refused") || message.Contains("actively refused"))
return "refused";
if (message.Contains("timeout"))
return "timeout";
return "connection_failed";
}
private Task<EvaluationCheckResult> CheckEvaluationAsync(DoctorPluginContext context, CancellationToken ct)
private async Task<CompilationCheckResult> CheckCompilationAsync(HttpClient httpClient, string baseUrl, CancellationToken ct)
{
// Simulate evaluation check with a sample policy
var result = new CompilationCheckResult();
var stopwatch = Stopwatch.StartNew();
// In real implementation, this would evaluate a test policy
Thread.Sleep(25); // Simulate evaluation time
stopwatch.Stop();
return Task.FromResult(new EvaluationCheckResult
try
{
Passed = true,
HasWarnings = stopwatch.ElapsedMilliseconds > 100,
EvaluationTimeMs = stopwatch.ElapsedMilliseconds
});
// Check OPA health/info endpoint for engine info
var healthResponse = await httpClient.GetAsync($"{baseUrl.TrimEnd('/')}/health", ct);
if (healthResponse.IsSuccessStatusCode)
{
result.EngineType = "opa";
}
// Get policy list to count policies
var policiesResponse = await httpClient.GetAsync($"{baseUrl.TrimEnd('/')}/v1/policies", ct);
if (policiesResponse.IsSuccessStatusCode)
{
var policiesJson = await policiesResponse.Content.ReadAsStringAsync(ct);
using var doc = JsonDocument.Parse(policiesJson);
if (doc.RootElement.TryGetProperty("result", out var resultArray) && resultArray.ValueKind == JsonValueKind.Array)
{
result.PolicyCount = resultArray.GetArrayLength();
}
}
else
{
result.Passed = false;
result.LastCompilationError = $"Failed to list policies: HTTP {(int)policiesResponse.StatusCode}";
return result;
}
// Get engine info/status for version and metrics
var statusResponse = await httpClient.GetAsync($"{baseUrl.TrimEnd('/')}/v1/status", ct);
if (statusResponse.IsSuccessStatusCode)
{
var statusJson = await statusResponse.Content.ReadAsStringAsync(ct);
using var statusDoc = JsonDocument.Parse(statusJson);
// Try to extract version
if (statusDoc.RootElement.TryGetProperty("result", out var statusResult))
{
if (statusResult.TryGetProperty("version", out var versionEl))
{
result.EngineVersion = versionEl.GetString();
}
// Try to extract cache metrics
if (statusResult.TryGetProperty("metrics", out var metrics))
{
if (metrics.TryGetProperty("cache_hit_ratio", out var cacheHitEl))
{
result.CacheHitRatio = cacheHitEl.GetDouble();
}
}
}
}
stopwatch.Stop();
result.CompilationTimeMs = stopwatch.ElapsedMilliseconds;
result.Passed = true;
}
catch (Exception ex)
{
result.Passed = false;
result.LastCompilationError = ex.Message;
}
return result;
}
private Task<StorageCheckResult> CheckStorageAsync(DoctorPluginContext context, CancellationToken ct)
private async Task<EvaluationCheckResult> CheckEvaluationAsync(HttpClient httpClient, string baseUrl, CancellationToken ct)
{
// Simulate storage check
return Task.FromResult(new StorageCheckResult
var result = new EvaluationCheckResult();
var stopwatch = Stopwatch.StartNew();
try
{
Passed = true,
PolicyVersions = 34
});
// Evaluate a canary policy with known input/output
// POST to OPA data endpoint with minimal input
var canaryInput = new { input = new { doctor_check = true, timestamp = DateTimeOffset.UtcNow.ToUnixTimeSeconds() } };
var content = new StringContent(
JsonSerializer.Serialize(canaryInput, JsonOptions),
System.Text.Encoding.UTF8,
"application/json");
var evalResponse = await httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/v1/data/system/health", content, ct);
stopwatch.Stop();
result.EvaluationTimeMs = stopwatch.ElapsedMilliseconds;
// 200 or 404 (no policy at path) are both acceptable for health check
// 500 indicates actual engine error
if (evalResponse.IsSuccessStatusCode || evalResponse.StatusCode == System.Net.HttpStatusCode.NotFound)
{
result.Passed = true;
result.HasWarnings = result.EvaluationTimeMs > 100; // Warn if slow
}
else
{
result.Passed = false;
result.Error = $"Policy evaluation failed: HTTP {(int)evalResponse.StatusCode}";
}
}
catch (Exception ex)
{
result.Passed = false;
result.Error = ex.Message;
}
return result;
}
private async Task<StorageCheckResult> CheckStorageAsync(HttpClient httpClient, string baseUrl, CancellationToken ct)
{
var result = new StorageCheckResult();
try
{
// Check if we can access policy data (storage is working)
var dataResponse = await httpClient.GetAsync($"{baseUrl.TrimEnd('/')}/v1/data", ct);
if (dataResponse.IsSuccessStatusCode)
{
result.Passed = true;
var dataJson = await dataResponse.Content.ReadAsStringAsync(ct);
using var doc = JsonDocument.Parse(dataJson);
// Count top-level data entries as proxy for stored policy versions
if (doc.RootElement.TryGetProperty("result", out var resultObj) && resultObj.ValueKind == JsonValueKind.Object)
{
result.PolicyVersions = resultObj.EnumerateObject().Count();
}
}
else
{
result.Passed = false;
result.Error = $"Storage check failed: HTTP {(int)dataResponse.StatusCode}";
}
}
catch (Exception ex)
{
result.Passed = false;
result.Error = ex.Message;
}
return result;
}
private sealed class CompilationCheckResult
@@ -177,6 +401,10 @@ public sealed class PolicyEngineHealthCheck : IDoctorCheck
public bool HasWarnings { get; set; }
public int PolicyCount { get; set; }
public long CompilationTimeMs { get; set; }
public string? EngineType { get; set; }
public string? EngineVersion { get; set; }
public double CacheHitRatio { get; set; }
public string? LastCompilationError { get; set; }
}
private sealed class EvaluationCheckResult
@@ -184,6 +412,7 @@ public sealed class PolicyEngineHealthCheck : IDoctorCheck
public bool Passed { get; set; }
public bool HasWarnings { get; set; }
public long EvaluationTimeMs { get; set; }
public string? Error { get; set; }
}
private sealed class StorageCheckResult
@@ -191,5 +420,6 @@ public sealed class PolicyEngineHealthCheck : IDoctorCheck
public bool Passed { get; set; }
public bool HasWarnings { get; set; }
public int PolicyVersions { get; set; }
public string? Error { get; set; }
}
}

View File

@@ -0,0 +1,376 @@
// -----------------------------------------------------------------------------
// ActiveReleaseHealthCheck.cs
// Sprint: SPRINT_20260118_016_Doctor_release_pipeline_health
// Task: RELPIPE-002 - Implement ActiveReleaseHealthCheck
// Description: Check health of currently active releases
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Release.Checks;
/// <summary>
/// Checks health of currently active releases.
/// Identifies releases stuck in states, with failed steps, or awaiting approval for too long.
/// </summary>
public sealed class ActiveReleaseHealthCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.release";
private const string CategoryName = "Release Pipeline";
// Thresholds
private static readonly TimeSpan StuckWarningThreshold = TimeSpan.FromHours(1);
private static readonly TimeSpan StuckFailThreshold = TimeSpan.FromHours(4);
private static readonly TimeSpan ApprovalWarningThreshold = TimeSpan.FromHours(4);
private static readonly TimeSpan ApprovalFailThreshold = TimeSpan.FromHours(24);
/// <inheritdoc />
public string CheckId => "check.release.active";
/// <inheritdoc />
public string Name => "Active Release Health";
/// <inheritdoc />
public string Description => "Check health of currently active releases";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["release", "pipeline", "active", "monitoring"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
// Check if ReleaseOrchestrator is configured
var releaseUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"];
return !string.IsNullOrEmpty(releaseUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"]
?? "http://localhost:5080";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(10);
// Query active releases
var response = await httpClient.GetAsync(
$"{orchestratorUrl.TrimEnd('/')}/api/v1/releases?state=active",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot reach Release Orchestrator: HTTP {(int)response.StatusCode}")
.WithEvidence("Release Orchestrator Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
eb.Add("connection_error_type", "http_error");
})
.WithCauses(
"Release Orchestrator service unavailable",
"Authentication/authorization failure",
"Network connectivity issue")
.WithRemediation(rb => rb
.AddStep(1, "Check Release Orchestrator health",
$"curl -s {orchestratorUrl}/health",
CommandType.Shell)
.AddStep(2, "Check service status",
"stella release status",
CommandType.Shell))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var releasesJson = await response.Content.ReadAsStringAsync(ct);
var releases = ParseReleases(releasesJson);
var now = context.TimeProvider.GetUtcNow();
var activeCount = releases.Count;
var stuckReleases = new List<ReleaseInfo>();
var failedReleases = new List<ReleaseInfo>();
var pendingApprovals = new List<ReleaseInfo>();
foreach (var release in releases)
{
var duration = now - release.StartedAt;
if (!string.IsNullOrEmpty(release.Error))
{
failedReleases.Add(release);
}
else if (release.State.Equals("pending_approval", StringComparison.OrdinalIgnoreCase))
{
if (duration > ApprovalWarningThreshold)
{
pendingApprovals.Add(release with { Duration = duration });
}
}
else if (release.State.Equals("executing", StringComparison.OrdinalIgnoreCase) ||
release.State.Equals("pending", StringComparison.OrdinalIgnoreCase))
{
if (duration > StuckWarningThreshold)
{
stuckReleases.Add(release with { Duration = duration });
}
}
}
// Determine severity
var hasFailure = failedReleases.Count > 0 ||
stuckReleases.Any(r => r.Duration > StuckFailThreshold) ||
pendingApprovals.Any(r => r.Duration > ApprovalFailThreshold);
var hasWarning = stuckReleases.Count > 0 || pendingApprovals.Count > 0;
if (hasFailure)
{
return builder
.Fail("Critical release issues detected")
.WithEvidence("Active Releases", eb =>
{
eb.Add("active_release_count", activeCount.ToString(CultureInfo.InvariantCulture));
eb.Add("stuck_release_count", stuckReleases.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("failed_release_count", failedReleases.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("pending_approval_count", pendingApprovals.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("oldest_active_release_age_minutes", releases.Count > 0
? ((int)(now - releases.Min(r => r.StartedAt)).TotalMinutes).ToString(CultureInfo.InvariantCulture)
: "0");
AddReleaseListEvidence(eb, "stuck_releases", stuckReleases);
AddReleaseListEvidence(eb, "failed_releases", failedReleases);
AddReleaseListEvidence(eb, "approval_pending_releases", pendingApprovals);
})
.WithCauses(
"Release workflow step failed",
"Approval bottleneck",
"Environment unreachable",
"Resource contention")
.WithRemediation(rb =>
{
if (failedReleases.Count > 0)
{
rb.AddStep(1, "Inspect failed release",
$"stella release inspect {failedReleases[0].Id}",
CommandType.Shell);
rb.AddStep(2, "View release logs",
$"stella release logs {failedReleases[0].Id}",
CommandType.Shell);
}
if (stuckReleases.Count > 0)
{
rb.AddStep(3, "Check stuck release",
$"stella release inspect {stuckReleases[0].Id}",
CommandType.Shell);
}
if (pendingApprovals.Count > 0)
{
rb.AddStep(4, "Review pending approvals",
"stella release approvals list",
CommandType.Shell);
}
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (hasWarning)
{
return builder
.Warn("Release pipeline has items requiring attention")
.WithEvidence("Active Releases", eb =>
{
eb.Add("active_release_count", activeCount.ToString(CultureInfo.InvariantCulture));
eb.Add("stuck_release_count", stuckReleases.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("failed_release_count", "0");
eb.Add("pending_approval_count", pendingApprovals.Count.ToString(CultureInfo.InvariantCulture));
AddReleaseListEvidence(eb, "stuck_releases", stuckReleases);
AddReleaseListEvidence(eb, "approval_pending_releases", pendingApprovals);
})
.WithCauses(
"Release taking longer than expected",
"Approval not yet provided",
"Environment slow to respond")
.WithRemediation(rb =>
{
if (stuckReleases.Count > 0)
{
rb.AddStep(1, "Inspect slow release",
$"stella release inspect {stuckReleases[0].Id}",
CommandType.Shell);
}
if (pendingApprovals.Count > 0)
{
rb.AddStep(2, "Review pending approvals",
"stella release approvals list",
CommandType.Shell);
}
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
// All healthy
return builder
.Pass(activeCount == 0
? "No active releases"
: $"{activeCount} release(s) progressing normally")
.WithEvidence("Active Releases", eb =>
{
eb.Add("active_release_count", activeCount.ToString(CultureInfo.InvariantCulture));
eb.Add("stuck_release_count", "0");
eb.Add("failed_release_count", "0");
eb.Add("pending_approval_count", "0");
if (releases.Count > 0)
{
eb.Add("releases_in_progress", string.Join(", ", releases.Select(r => r.Name)));
}
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot reach Release Orchestrator: {ex.Message}")
.WithEvidence("Release Orchestrator Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("error_message", ex.Message);
eb.Add("connection_error_type", GetConnectionErrorType(ex));
})
.WithCauses(
"Release Orchestrator service down",
"Network connectivity issue",
"DNS resolution failure")
.WithRemediation(rb => rb
.AddStep(1, "Check Release Orchestrator health",
$"curl -s {orchestratorUrl}/health",
CommandType.Shell))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Release Orchestrator connection timed out")
.WithEvidence("Release Orchestrator Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("error_message", "Request timed out");
eb.Add("connection_error_type", "timeout");
eb.Add("timeout_seconds", "10");
})
.WithCauses(
"Release Orchestrator overloaded",
"Network latency too high")
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static List<ReleaseInfo> ParseReleases(string json)
{
var releases = new List<ReleaseInfo>();
try
{
using var doc = JsonDocument.Parse(json);
var releasesArray = doc.RootElement.ValueKind == JsonValueKind.Array
? doc.RootElement
: doc.RootElement.TryGetProperty("releases", out var arr) ? arr : default;
if (releasesArray.ValueKind != JsonValueKind.Array)
return releases;
foreach (var release in releasesArray.EnumerateArray())
{
var id = release.TryGetProperty("id", out var idEl) ? idEl.GetString() : null;
var name = release.TryGetProperty("name", out var nameEl) ? nameEl.GetString() : null;
var state = release.TryGetProperty("state", out var stateEl) ? stateEl.GetString() : null;
var startedAt = release.TryGetProperty("startedAt", out var startEl) &&
DateTimeOffset.TryParse(startEl.GetString(), out var dt) ? dt : DateTimeOffset.UtcNow;
var error = release.TryGetProperty("error", out var errEl) ? errEl.GetString() : null;
var step = release.TryGetProperty("currentStep", out var stepEl) ? stepEl.GetString() : null;
var env = release.TryGetProperty("targetEnvironment", out var envEl) ? envEl.GetString() : null;
if (!string.IsNullOrEmpty(id) && !string.IsNullOrEmpty(name))
{
releases.Add(new ReleaseInfo
{
Id = id,
Name = name,
State = state ?? "unknown",
StartedAt = startedAt,
Error = error,
CurrentStep = step,
TargetEnvironment = env
});
}
}
}
catch
{
// Best effort parsing
}
return releases;
}
private static void AddReleaseListEvidence(EvidenceBuilder eb, string key, List<ReleaseInfo> releases)
{
if (releases.Count == 0)
{
eb.Add(key, "[]");
return;
}
var summaries = releases.Select(r =>
$"{r.Name}:{r.State}:{(int)r.Duration.TotalMinutes}min");
eb.Add(key, string.Join(", ", summaries));
}
private static string GetConnectionErrorType(HttpRequestException ex)
{
var message = ex.Message.ToLowerInvariant();
if (message.Contains("ssl") || message.Contains("tls") || message.Contains("certificate"))
return "ssl_error";
if (message.Contains("name") || message.Contains("dns") || message.Contains("resolve"))
return "dns_failure";
if (message.Contains("refused") || message.Contains("actively refused"))
return "refused";
if (message.Contains("timeout"))
return "timeout";
return "connection_failed";
}
private sealed record ReleaseInfo
{
public required string Id { get; init; }
public required string Name { get; init; }
public required string State { get; init; }
public required DateTimeOffset StartedAt { get; init; }
public string? Error { get; init; }
public string? CurrentStep { get; init; }
public string? TargetEnvironment { get; init; }
public TimeSpan Duration { get; init; }
}
}

View File

@@ -0,0 +1,360 @@
// -----------------------------------------------------------------------------
// EnvironmentReadinessCheck.cs
// Sprint: SPRINT_20260118_016_Doctor_release_pipeline_health
// Task: RELPIPE-004 - Implement EnvironmentReadinessCheck
// Description: Check health and readiness of target environments
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Release.Checks;
/// <summary>
/// Checks health and readiness of target environments.
/// Verifies environment reachability, resource limits, and deployment readiness.
/// </summary>
public sealed class EnvironmentReadinessCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.release";
private const string CategoryName = "Release Pipeline";
/// <inheritdoc />
public string CheckId => "check.release.environment.readiness";
/// <inheritdoc />
public string Name => "Environment Readiness";
/// <inheritdoc />
public string Description => "Check health and readiness of target environments";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["release", "environment", "readiness", "deployment"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(15);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var releaseUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"];
return !string.IsNullOrEmpty(releaseUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"]
?? "http://localhost:5080";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(15);
// Query environments
var response = await httpClient.GetAsync(
$"{orchestratorUrl.TrimEnd('/')}/api/v1/environments",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot retrieve environments: HTTP {(int)response.StatusCode}")
.WithEvidence("Environment Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithCauses("Release Orchestrator unavailable", "API endpoint not found")
.WithRemediation(rb => rb
.AddStep(1, "Check Release Orchestrator health",
$"curl -s {orchestratorUrl}/health",
CommandType.Shell))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var envsJson = await response.Content.ReadAsStringAsync(ct);
var environments = ParseEnvironments(envsJson);
if (environments.Count == 0)
{
return builder
.Pass("No environments configured")
.WithEvidence("Environments", eb =>
{
eb.Add("environment_count", "0");
})
.Build();
}
// Check each environment
var unreachable = new List<EnvironmentInfo>();
var unhealthy = new List<EnvironmentInfo>();
var staleHealthCheck = new List<EnvironmentInfo>();
var now = context.TimeProvider.GetUtcNow();
var staleThreshold = TimeSpan.FromHours(1);
foreach (var env in environments)
{
if (!env.IsReachable)
{
unreachable.Add(env);
}
else if (!env.IsHealthy)
{
unhealthy.Add(env);
}
else if (env.LastHealthCheck.HasValue &&
now - env.LastHealthCheck.Value > staleThreshold)
{
staleHealthCheck.Add(env);
}
}
var devEnvs = environments.Count(e => e.Type.Equals("dev", StringComparison.OrdinalIgnoreCase));
var stagingEnvs = environments.Count(e => e.Type.Equals("staging", StringComparison.OrdinalIgnoreCase) ||
e.Type.Equals("stage", StringComparison.OrdinalIgnoreCase));
var prodEnvs = environments.Count(e => e.Type.Equals("prod", StringComparison.OrdinalIgnoreCase) ||
e.Type.Equals("production", StringComparison.OrdinalIgnoreCase));
// Determine severity - production issues are critical
var hasProdIssue = unreachable.Any(e => IsProd(e.Type)) || unhealthy.Any(e => IsProd(e.Type));
var hasAnyIssue = unreachable.Count > 0 || unhealthy.Count > 0;
if (hasProdIssue)
{
return builder
.Fail("Production environment issues detected")
.WithEvidence("Environments", eb =>
{
eb.Add("environment_count", environments.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("dev_environments", devEnvs.ToString(CultureInfo.InvariantCulture));
eb.Add("staging_environments", stagingEnvs.ToString(CultureInfo.InvariantCulture));
eb.Add("prod_environments", prodEnvs.ToString(CultureInfo.InvariantCulture));
eb.Add("unreachable_count", unreachable.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("unhealthy_count", unhealthy.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("unreachable_environments", string.Join(", ", unreachable.Select(e => e.Name)));
eb.Add("unhealthy_environments", string.Join(", ", unhealthy.Select(e => e.Name)));
})
.WithCauses(
"Environment agent not responding",
"Network connectivity issue to environment",
"Container runtime issue in environment",
"Resource exhaustion (disk, memory)")
.WithRemediation(rb =>
{
if (unreachable.Count > 0)
{
rb.AddStep(1, "Check environment connectivity",
$"stella env ping {unreachable[0].Name}",
CommandType.Shell);
rb.AddStep(2, "View environment agent logs",
$"stella env logs {unreachable[0].Name}",
CommandType.Shell);
}
if (unhealthy.Count > 0)
{
rb.AddStep(3, "Check environment health details",
$"stella env health {unhealthy[0].Name}",
CommandType.Shell);
}
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (hasAnyIssue)
{
return builder
.Warn("Non-production environment issues detected")
.WithEvidence("Environments", eb =>
{
eb.Add("environment_count", environments.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("dev_environments", devEnvs.ToString(CultureInfo.InvariantCulture));
eb.Add("staging_environments", stagingEnvs.ToString(CultureInfo.InvariantCulture));
eb.Add("prod_environments", prodEnvs.ToString(CultureInfo.InvariantCulture));
eb.Add("unreachable_count", unreachable.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("unhealthy_count", unhealthy.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("unreachable_environments", string.Join(", ", unreachable.Select(e => e.Name)));
eb.Add("unhealthy_environments", string.Join(", ", unhealthy.Select(e => e.Name)));
})
.WithCauses(
"Environment agent not responding",
"Dev/staging environment offline",
"Resource issue in non-prod environment")
.WithRemediation(rb =>
{
if (unreachable.Count > 0)
{
rb.AddStep(1, "Check environment connectivity",
$"stella env ping {unreachable[0].Name}",
CommandType.Shell);
}
if (unhealthy.Count > 0)
{
rb.AddStep(2, "Check environment health",
$"stella env health {unhealthy[0].Name}",
CommandType.Shell);
}
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (staleHealthCheck.Count > 0)
{
return builder
.Warn($"{staleHealthCheck.Count} environment(s) have stale health data")
.WithEvidence("Environments", eb =>
{
eb.Add("environment_count", environments.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("stale_health_check_count", staleHealthCheck.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("stale_environments", string.Join(", ", staleHealthCheck.Select(e => e.Name)));
})
.WithCauses(
"Health check scheduler not running",
"Environment agent intermittent connectivity")
.WithRemediation(rb => rb
.AddStep(1, "Trigger health check refresh",
"stella env health --refresh-all",
CommandType.Shell))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass($"{environments.Count} environment(s) ready")
.WithEvidence("Environments", eb =>
{
eb.Add("environment_count", environments.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("dev_environments", devEnvs.ToString(CultureInfo.InvariantCulture));
eb.Add("staging_environments", stagingEnvs.ToString(CultureInfo.InvariantCulture));
eb.Add("prod_environments", prodEnvs.ToString(CultureInfo.InvariantCulture));
eb.Add("unreachable_count", "0");
eb.Add("unhealthy_count", "0");
eb.Add("environment_names", string.Join(", ", environments.Select(e => e.Name)));
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check environments: {ex.Message}")
.WithEvidence("Environment Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("error_message", ex.Message);
eb.Add("connection_error_type", GetConnectionErrorType(ex));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Environment check timed out")
.WithEvidence("Environment Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("connection_error_type", "timeout");
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static bool IsProd(string envType) =>
envType.Equals("prod", StringComparison.OrdinalIgnoreCase) ||
envType.Equals("production", StringComparison.OrdinalIgnoreCase);
private static List<EnvironmentInfo> ParseEnvironments(string json)
{
var envs = new List<EnvironmentInfo>();
try
{
using var doc = JsonDocument.Parse(json);
var envsArray = doc.RootElement.ValueKind == JsonValueKind.Array
? doc.RootElement
: doc.RootElement.TryGetProperty("environments", out var arr) ? arr : default;
if (envsArray.ValueKind != JsonValueKind.Array)
return envs;
foreach (var env in envsArray.EnumerateArray())
{
var id = env.TryGetProperty("id", out var idEl) ? idEl.GetString() : null;
var name = env.TryGetProperty("name", out var nameEl) ? nameEl.GetString() : null;
if (string.IsNullOrEmpty(id) || string.IsNullOrEmpty(name))
continue;
var type = env.TryGetProperty("type", out var typeEl) ? typeEl.GetString() ?? "unknown" : "unknown";
var isReachable = env.TryGetProperty("isReachable", out var reachEl) && reachEl.GetBoolean();
var isHealthy = env.TryGetProperty("isHealthy", out var healthEl) && healthEl.GetBoolean();
var currentVersion = env.TryGetProperty("currentVersion", out var verEl) ? verEl.GetString() : null;
var error = env.TryGetProperty("error", out var errEl) ? errEl.GetString() : null;
var lastCheck = env.TryGetProperty("lastHealthCheck", out var checkEl) &&
DateTimeOffset.TryParse(checkEl.GetString(), out var dt) ? dt : (DateTimeOffset?)null;
envs.Add(new EnvironmentInfo
{
Id = id,
Name = name,
Type = type,
IsReachable = isReachable,
IsHealthy = isHealthy,
CurrentVersion = currentVersion,
Error = error,
LastHealthCheck = lastCheck
});
}
}
catch
{
// Best effort parsing
}
return envs;
}
private static string GetConnectionErrorType(HttpRequestException ex)
{
var message = ex.Message.ToLowerInvariant();
if (message.Contains("ssl") || message.Contains("tls"))
return "ssl_error";
if (message.Contains("name") || message.Contains("dns"))
return "dns_failure";
if (message.Contains("refused"))
return "refused";
return "connection_failed";
}
private sealed record EnvironmentInfo
{
public required string Id { get; init; }
public required string Name { get; init; }
public required string Type { get; init; }
public bool IsReachable { get; init; }
public bool IsHealthy { get; init; }
public string? CurrentVersion { get; init; }
public string? Error { get; init; }
public DateTimeOffset? LastHealthCheck { get; init; }
}
}

View File

@@ -0,0 +1,447 @@
// -----------------------------------------------------------------------------
// PromotionGateHealthCheck.cs
// Sprint: SPRINT_20260118_016_Doctor_release_pipeline_health
// Task: RELPIPE-003 - Implement PromotionGateHealthCheck
// Description: Check health of promotion gates between environments
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Release.Checks;
/// <summary>
/// Checks health of promotion gates between environments.
/// Verifies policy engine availability, attestation requirements, and approval configurations.
/// </summary>
public sealed class PromotionGateHealthCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.release";
private const string CategoryName = "Release Pipeline";
/// <inheritdoc />
public string CheckId => "check.release.promotion.gates";
/// <inheritdoc />
public string Name => "Promotion Gate Health";
/// <inheritdoc />
public string Description => "Check health of promotion gates between environments";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["release", "promotion", "gates", "policy", "attestation"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(10);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var releaseUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"];
return !string.IsNullOrEmpty(releaseUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"]
?? "http://localhost:5080";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(10);
// Query promotion gates configuration
var response = await httpClient.GetAsync(
$"{orchestratorUrl.TrimEnd('/')}/api/v1/promotion-gates",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot retrieve promotion gates: HTTP {(int)response.StatusCode}")
.WithEvidence("Promotion Gates Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithCauses("Release Orchestrator unavailable", "API endpoint not found")
.WithRemediation(rb => rb
.AddStep(1, "Check Release Orchestrator health",
$"curl -s {orchestratorUrl}/health",
CommandType.Shell))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var gatesJson = await response.Content.ReadAsStringAsync(ct);
var gates = ParsePromotionGates(gatesJson);
if (gates.Count == 0)
{
return builder
.Pass("No promotion gates configured")
.WithEvidence("Promotion Gates", eb =>
{
eb.Add("gate_count", "0");
eb.Add("gates_with_policy", "0");
eb.Add("gates_with_attestation", "0");
eb.Add("gates_with_approval", "0");
})
.Build();
}
// Check gate health
var issues = new List<GateIssue>();
foreach (var gate in gates)
{
// Check if required policies are available
if (gate.RequiresPolicyPass && gate.RequiredPolicies.Count > 0)
{
var policyCheck = await CheckPoliciesAvailableAsync(
httpClient, context, gate.RequiredPolicies, ct);
if (!policyCheck.AllAvailable)
{
issues.Add(new GateIssue
{
GateId = gate.Id,
GateName = gate.Name,
IssueType = "missing_policies",
Details = $"Missing policies: {string.Join(", ", policyCheck.MissingPolicies)}"
});
}
}
// Check if attestation types are configured
if (gate.RequiresAttestations && gate.RequiredAttestations.Count > 0)
{
// Verify attestor is reachable
var attestorCheck = await CheckAttestorAvailableAsync(httpClient, context, ct);
if (!attestorCheck)
{
issues.Add(new GateIssue
{
GateId = gate.Id,
GateName = gate.Name,
IssueType = "attestor_unavailable",
Details = "Attestor service not reachable"
});
}
}
// Check approval configuration
if (gate.RequiresApproval && gate.Approvers.Count == 0)
{
issues.Add(new GateIssue
{
GateId = gate.Id,
GateName = gate.Name,
IssueType = "no_approvers",
Details = "Approval required but no approvers configured"
});
}
}
var gatesWithPolicy = gates.Count(g => g.RequiresPolicyPass);
var gatesWithAttestation = gates.Count(g => g.RequiresAttestations);
var gatesWithApproval = gates.Count(g => g.RequiresApproval);
if (issues.Count > 0)
{
var severity = issues.Any(i => i.IssueType == "missing_policies" || i.IssueType == "no_approvers")
? DoctorSeverity.Fail
: DoctorSeverity.Warn;
var resultBuilder = severity == DoctorSeverity.Fail
? builder.Fail($"{issues.Count} promotion gate issue(s) detected")
: builder.Warn($"{issues.Count} promotion gate issue(s) detected");
return resultBuilder
.WithEvidence("Promotion Gates", eb =>
{
eb.Add("gate_count", gates.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("gates_with_policy", gatesWithPolicy.ToString(CultureInfo.InvariantCulture));
eb.Add("gates_with_attestation", gatesWithAttestation.ToString(CultureInfo.InvariantCulture));
eb.Add("gates_with_approval", gatesWithApproval.ToString(CultureInfo.InvariantCulture));
eb.Add("issue_count", issues.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("issues", string.Join("; ", issues.Select(i => $"{i.GateName}:{i.IssueType}")));
})
.WithCauses(
"Required policies not loaded in policy engine",
"Attestor service unavailable",
"Approval workflow misconfigured",
"Environment deleted but gate remains")
.WithRemediation(rb =>
{
rb.AddStep(1, "List promotion gates",
"stella release gates list",
CommandType.Shell);
if (issues.Any(i => i.IssueType == "missing_policies"))
{
rb.AddStep(2, "Check policy engine",
"stella policy list",
CommandType.Shell);
}
if (issues.Any(i => i.IssueType == "attestor_unavailable"))
{
rb.AddStep(3, "Check attestor health",
"stella doctor --check check.attestation.*",
CommandType.Shell);
}
if (issues.Any(i => i.IssueType == "no_approvers"))
{
rb.AddStep(4, "Configure approvers",
"stella release gates configure <gate-id> --approvers <user>",
CommandType.Manual);
}
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass($"{gates.Count} promotion gate(s) healthy")
.WithEvidence("Promotion Gates", eb =>
{
eb.Add("gate_count", gates.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("gates_with_policy", gatesWithPolicy.ToString(CultureInfo.InvariantCulture));
eb.Add("gates_with_attestation", gatesWithAttestation.ToString(CultureInfo.InvariantCulture));
eb.Add("gates_with_approval", gatesWithApproval.ToString(CultureInfo.InvariantCulture));
eb.Add("gate_names", string.Join(", ", gates.Select(g => g.Name)));
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check promotion gates: {ex.Message}")
.WithEvidence("Promotion Gates Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("error_message", ex.Message);
eb.Add("connection_error_type", GetConnectionErrorType(ex));
})
.WithCauses("Release Orchestrator unavailable", "Network issue")
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Promotion gate check timed out")
.WithEvidence("Promotion Gates Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("connection_error_type", "timeout");
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static async Task<(bool AllAvailable, List<string> MissingPolicies)> CheckPoliciesAvailableAsync(
HttpClient httpClient,
DoctorPluginContext context,
IReadOnlyList<string> requiredPolicies,
CancellationToken ct)
{
var policyEngineUrl = context.Configuration["Policy:Engine:Url"]
?? context.Configuration["PolicyEngine:Url"]
?? "http://localhost:8181";
try
{
var response = await httpClient.GetAsync(
$"{policyEngineUrl.TrimEnd('/')}/v1/policies",
ct);
if (!response.IsSuccessStatusCode)
{
return (false, requiredPolicies.ToList());
}
var json = await response.Content.ReadAsStringAsync(ct);
using var doc = JsonDocument.Parse(json);
var availablePolicies = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
if (doc.RootElement.TryGetProperty("result", out var result) &&
result.ValueKind == JsonValueKind.Array)
{
foreach (var policy in result.EnumerateArray())
{
if (policy.TryGetProperty("id", out var idEl))
{
var id = idEl.GetString();
if (!string.IsNullOrEmpty(id))
{
availablePolicies.Add(id);
}
}
}
}
var missing = requiredPolicies
.Where(p => !availablePolicies.Contains(p))
.ToList();
return (missing.Count == 0, missing);
}
catch
{
return (false, requiredPolicies.ToList());
}
}
private static async Task<bool> CheckAttestorAvailableAsync(
HttpClient httpClient,
DoctorPluginContext context,
CancellationToken ct)
{
var attestorUrl = context.Configuration["Attestor:Url"]
?? "http://localhost:5090";
try
{
var response = await httpClient.GetAsync(
$"{attestorUrl.TrimEnd('/')}/health",
ct);
return response.IsSuccessStatusCode;
}
catch
{
return false;
}
}
private static List<PromotionGateInfo> ParsePromotionGates(string json)
{
var gates = new List<PromotionGateInfo>();
try
{
using var doc = JsonDocument.Parse(json);
var gatesArray = doc.RootElement.ValueKind == JsonValueKind.Array
? doc.RootElement
: doc.RootElement.TryGetProperty("gates", out var arr) ? arr : default;
if (gatesArray.ValueKind != JsonValueKind.Array)
return gates;
foreach (var gate in gatesArray.EnumerateArray())
{
var id = gate.TryGetProperty("id", out var idEl) ? idEl.GetString() : null;
var name = gate.TryGetProperty("name", out var nameEl) ? nameEl.GetString() : null;
if (string.IsNullOrEmpty(id) || string.IsNullOrEmpty(name))
continue;
var requiresPolicy = gate.TryGetProperty("requiresPolicyPass", out var policyEl) && policyEl.GetBoolean();
var requiresAttestation = gate.TryGetProperty("requiresAttestations", out var attestEl) && attestEl.GetBoolean();
var requiresApproval = gate.TryGetProperty("requiresApproval", out var approvalEl) && approvalEl.GetBoolean();
var requiredPolicies = new List<string>();
if (gate.TryGetProperty("requiredPolicies", out var policiesEl) && policiesEl.ValueKind == JsonValueKind.Array)
{
foreach (var p in policiesEl.EnumerateArray())
{
var policyId = p.GetString();
if (!string.IsNullOrEmpty(policyId))
requiredPolicies.Add(policyId);
}
}
var requiredAttestations = new List<string>();
if (gate.TryGetProperty("requiredAttestations", out var attestationsEl) && attestationsEl.ValueKind == JsonValueKind.Array)
{
foreach (var a in attestationsEl.EnumerateArray())
{
var attestationType = a.GetString();
if (!string.IsNullOrEmpty(attestationType))
requiredAttestations.Add(attestationType);
}
}
var approvers = new List<string>();
if (gate.TryGetProperty("approvers", out var approversEl) && approversEl.ValueKind == JsonValueKind.Array)
{
foreach (var approver in approversEl.EnumerateArray())
{
var approverId = approver.GetString();
if (!string.IsNullOrEmpty(approverId))
approvers.Add(approverId);
}
}
gates.Add(new PromotionGateInfo
{
Id = id,
Name = name,
RequiresPolicyPass = requiresPolicy,
RequiresAttestations = requiresAttestation,
RequiresApproval = requiresApproval,
RequiredPolicies = requiredPolicies,
RequiredAttestations = requiredAttestations,
Approvers = approvers
});
}
}
catch
{
// Best effort parsing
}
return gates;
}
private static string GetConnectionErrorType(HttpRequestException ex)
{
var message = ex.Message.ToLowerInvariant();
if (message.Contains("ssl") || message.Contains("tls"))
return "ssl_error";
if (message.Contains("name") || message.Contains("dns"))
return "dns_failure";
if (message.Contains("refused"))
return "refused";
return "connection_failed";
}
private sealed record PromotionGateInfo
{
public required string Id { get; init; }
public required string Name { get; init; }
public bool RequiresPolicyPass { get; init; }
public bool RequiresAttestations { get; init; }
public bool RequiresApproval { get; init; }
public IReadOnlyList<string> RequiredPolicies { get; init; } = [];
public IReadOnlyList<string> RequiredAttestations { get; init; } = [];
public IReadOnlyList<string> Approvers { get; init; } = [];
}
private sealed record GateIssue
{
public required string GateId { get; init; }
public required string GateName { get; init; }
public required string IssueType { get; init; }
public required string Details { get; init; }
}
}

View File

@@ -0,0 +1,359 @@
// -----------------------------------------------------------------------------
// ReleaseConfigurationCheck.cs
// Sprint: SPRINT_20260118_016_Doctor_release_pipeline_health
// Task: RELPIPE-006 - Implement ReleaseConfigurationCheck
// Description: Check validity of release configuration
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Release.Checks;
/// <summary>
/// Checks validity of release configuration.
/// Verifies workflow definitions, stage transitions, and required integrations.
/// </summary>
public sealed class ReleaseConfigurationCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.release";
private const string CategoryName = "Release Pipeline";
/// <inheritdoc />
public string CheckId => "check.release.configuration";
/// <inheritdoc />
public string Name => "Release Configuration";
/// <inheritdoc />
public string Description => "Check validity of release workflow configuration";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["release", "configuration", "workflow", "validation"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var releaseUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"];
return !string.IsNullOrEmpty(releaseUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"]
?? "http://localhost:5080";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(10);
// Query workflow configurations
var response = await httpClient.GetAsync(
$"{orchestratorUrl.TrimEnd('/')}/api/v1/workflows",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot retrieve workflow configurations: HTTP {(int)response.StatusCode}")
.WithEvidence("Configuration Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var workflowsJson = await response.Content.ReadAsStringAsync(ct);
var workflows = ParseWorkflows(workflowsJson);
if (workflows.Count == 0)
{
return builder
.Warn("No release workflows configured")
.WithEvidence("Workflows", eb =>
{
eb.Add("workflow_count", "0");
})
.WithCauses("Release workflows not yet defined")
.WithRemediation(rb => rb
.AddStep(1, "Create a release workflow",
"stella release workflow create --name <name> --stages dev,staging,prod",
CommandType.Manual))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
// Validate each workflow
var validationErrors = new List<ValidationError>();
foreach (var workflow in workflows)
{
// Check for empty stages
if (workflow.Stages.Count == 0)
{
validationErrors.Add(new ValidationError
{
WorkflowId = workflow.Id,
WorkflowName = workflow.Name,
ErrorType = "no_stages",
Message = "Workflow has no stages defined"
});
continue;
}
// Check for invalid transitions
var validStages = workflow.Stages.Select(s => s.Name).ToHashSet(StringComparer.OrdinalIgnoreCase);
foreach (var stage in workflow.Stages)
{
foreach (var nextStage in stage.NextStages)
{
if (!validStages.Contains(nextStage))
{
validationErrors.Add(new ValidationError
{
WorkflowId = workflow.Id,
WorkflowName = workflow.Name,
ErrorType = "invalid_transition",
Message = $"Stage '{stage.Name}' references unknown stage '{nextStage}'"
});
}
}
}
// Check for unreachable stages (no incoming transitions)
var reachableStages = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
if (workflow.Stages.Count > 0)
{
reachableStages.Add(workflow.Stages[0].Name); // First stage is entry point
}
foreach (var stage in workflow.Stages)
{
foreach (var next in stage.NextStages)
{
reachableStages.Add(next);
}
}
foreach (var stage in workflow.Stages.Skip(1))
{
if (!reachableStages.Contains(stage.Name))
{
validationErrors.Add(new ValidationError
{
WorkflowId = workflow.Id,
WorkflowName = workflow.Name,
ErrorType = "unreachable_stage",
Message = $"Stage '{stage.Name}' is unreachable (no incoming transitions)"
});
}
}
// Check for missing environment mapping
foreach (var stage in workflow.Stages)
{
if (string.IsNullOrEmpty(stage.EnvironmentId))
{
validationErrors.Add(new ValidationError
{
WorkflowId = workflow.Id,
WorkflowName = workflow.Name,
ErrorType = "missing_environment",
Message = $"Stage '{stage.Name}' has no target environment mapped"
});
}
}
}
var activeWorkflows = workflows.Count(w => w.IsActive);
var totalStages = workflows.Sum(w => w.Stages.Count);
if (validationErrors.Count > 0)
{
var hasBlockingErrors = validationErrors.Any(e =>
e.ErrorType == "no_stages" ||
e.ErrorType == "invalid_transition" ||
e.ErrorType == "missing_environment");
var resultBuilder = hasBlockingErrors
? builder.Fail($"{validationErrors.Count} workflow configuration error(s)")
: builder.Warn($"{validationErrors.Count} workflow configuration warning(s)");
return resultBuilder
.WithEvidence("Workflows", eb =>
{
eb.Add("workflow_count", workflows.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("active_workflow_count", activeWorkflows.ToString(CultureInfo.InvariantCulture));
eb.Add("total_stages", totalStages.ToString(CultureInfo.InvariantCulture));
eb.Add("validation_error_count", validationErrors.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("errors", string.Join("; ", validationErrors.Select(e => $"{e.WorkflowName}:{e.ErrorType}")));
})
.WithCauses(
"Workflow configuration incomplete",
"Stage transition misconfigured",
"Environment deleted but workflow not updated")
.WithRemediation(rb =>
{
rb.AddStep(1, "View workflow details",
$"stella release workflow show {validationErrors[0].WorkflowId}",
CommandType.Shell);
rb.AddStep(2, "Fix workflow configuration",
$"stella release workflow edit {validationErrors[0].WorkflowId}",
CommandType.Manual);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass($"{workflows.Count} workflow(s) valid ({totalStages} stages)")
.WithEvidence("Workflows", eb =>
{
eb.Add("workflow_count", workflows.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("active_workflow_count", activeWorkflows.ToString(CultureInfo.InvariantCulture));
eb.Add("total_stages", totalStages.ToString(CultureInfo.InvariantCulture));
eb.Add("validation_error_count", "0");
eb.Add("workflow_names", string.Join(", ", workflows.Select(w => w.Name)));
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check workflow configuration: {ex.Message}")
.WithEvidence("Configuration Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("error_message", ex.Message);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Configuration check timed out")
.WithEvidence("Configuration Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("connection_error_type", "timeout");
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static List<WorkflowInfo> ParseWorkflows(string json)
{
var workflows = new List<WorkflowInfo>();
try
{
using var doc = JsonDocument.Parse(json);
var workflowsArray = doc.RootElement.ValueKind == JsonValueKind.Array
? doc.RootElement
: doc.RootElement.TryGetProperty("workflows", out var arr) ? arr : default;
if (workflowsArray.ValueKind != JsonValueKind.Array)
return workflows;
foreach (var workflow in workflowsArray.EnumerateArray())
{
var id = workflow.TryGetProperty("id", out var idEl) ? idEl.GetString() : null;
var name = workflow.TryGetProperty("name", out var nameEl) ? nameEl.GetString() : null;
if (string.IsNullOrEmpty(id) || string.IsNullOrEmpty(name))
continue;
var isActive = workflow.TryGetProperty("isActive", out var activeEl) && activeEl.GetBoolean();
var stages = new List<StageInfo>();
if (workflow.TryGetProperty("stages", out var stagesEl) && stagesEl.ValueKind == JsonValueKind.Array)
{
foreach (var stage in stagesEl.EnumerateArray())
{
var stageName = stage.TryGetProperty("name", out var stageNameEl) ? stageNameEl.GetString() : null;
if (string.IsNullOrEmpty(stageName))
continue;
var envId = stage.TryGetProperty("environmentId", out var envEl) ? envEl.GetString() : null;
var nextStages = new List<string>();
if (stage.TryGetProperty("nextStages", out var nextEl) && nextEl.ValueKind == JsonValueKind.Array)
{
foreach (var next in nextEl.EnumerateArray())
{
var nextName = next.GetString();
if (!string.IsNullOrEmpty(nextName))
nextStages.Add(nextName);
}
}
stages.Add(new StageInfo
{
Name = stageName,
EnvironmentId = envId,
NextStages = nextStages
});
}
}
workflows.Add(new WorkflowInfo
{
Id = id,
Name = name,
IsActive = isActive,
Stages = stages
});
}
}
catch
{
// Best effort parsing
}
return workflows;
}
private sealed record WorkflowInfo
{
public required string Id { get; init; }
public required string Name { get; init; }
public bool IsActive { get; init; }
public IReadOnlyList<StageInfo> Stages { get; init; } = [];
}
private sealed record StageInfo
{
public required string Name { get; init; }
public string? EnvironmentId { get; init; }
public IReadOnlyList<string> NextStages { get; init; } = [];
}
private sealed record ValidationError
{
public required string WorkflowId { get; init; }
public required string WorkflowName { get; init; }
public required string ErrorType { get; init; }
public required string Message { get; init; }
}
}

View File

@@ -0,0 +1,287 @@
// -----------------------------------------------------------------------------
// ReleaseScheduleHealthCheck.cs
// Sprint: SPRINT_20260118_016_Doctor_release_pipeline_health
// Task: RELPIPE-005 - Implement ReleaseScheduleHealthCheck
// Description: Check health of scheduled releases
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Release.Checks;
/// <summary>
/// Checks health of scheduled releases.
/// Identifies missed schedules, conflicts, and upcoming releases requiring attention.
/// </summary>
public sealed class ReleaseScheduleHealthCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.release";
private const string CategoryName = "Release Pipeline";
/// <inheritdoc />
public string CheckId => "check.release.schedule";
/// <inheritdoc />
public string Name => "Release Schedule Health";
/// <inheritdoc />
public string Description => "Check health of scheduled releases";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Info;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["release", "schedule", "upcoming", "planning"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var releaseUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"];
return !string.IsNullOrEmpty(releaseUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"]
?? "http://localhost:5080";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(10);
// Query scheduled releases
var response = await httpClient.GetAsync(
$"{orchestratorUrl.TrimEnd('/')}/api/v1/releases/scheduled",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot retrieve scheduled releases: HTTP {(int)response.StatusCode}")
.WithEvidence("Schedule Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var schedulesJson = await response.Content.ReadAsStringAsync(ct);
var schedules = ParseScheduledReleases(schedulesJson);
var now = context.TimeProvider.GetUtcNow();
var upcoming24h = new List<ScheduleInfo>();
var missedSchedules = new List<ScheduleInfo>();
var conflicts = new List<(ScheduleInfo A, ScheduleInfo B)>();
foreach (var schedule in schedules)
{
var timeUntil = schedule.ScheduledAt - now;
if (timeUntil < TimeSpan.Zero && schedule.Status == "pending")
{
// Missed schedule
missedSchedules.Add(schedule);
}
else if (timeUntil > TimeSpan.Zero && timeUntil <= TimeSpan.FromHours(24))
{
upcoming24h.Add(schedule);
}
}
// Check for conflicts (same environment within 1 hour)
var pendingSchedules = schedules.Where(s => s.Status == "pending").ToList();
for (int i = 0; i < pendingSchedules.Count; i++)
{
for (int j = i + 1; j < pendingSchedules.Count; j++)
{
var a = pendingSchedules[i];
var b = pendingSchedules[j];
if (a.TargetEnvironment == b.TargetEnvironment &&
Math.Abs((a.ScheduledAt - b.ScheduledAt).TotalHours) < 1)
{
conflicts.Add((a, b));
}
}
}
if (missedSchedules.Count > 0)
{
return builder
.Fail($"{missedSchedules.Count} scheduled release(s) missed")
.WithEvidence("Release Schedule", eb =>
{
eb.Add("scheduled_release_count", schedules.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("upcoming_24h_count", upcoming24h.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("missed_schedule_count", missedSchedules.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("conflict_count", conflicts.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("missed_releases", string.Join(", ", missedSchedules.Select(s => s.Name)));
})
.WithCauses(
"Release scheduler service not running",
"Prerequisite not met at scheduled time",
"Environment was unavailable")
.WithRemediation(rb =>
{
rb.AddStep(1, "View missed schedules",
"stella release schedule list --missed",
CommandType.Shell);
rb.AddStep(2, "Reschedule or run immediately",
$"stella release schedule run {missedSchedules[0].Id}",
CommandType.Manual);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (conflicts.Count > 0)
{
return builder
.Warn($"{conflicts.Count} schedule conflict(s) detected")
.WithEvidence("Release Schedule", eb =>
{
eb.Add("scheduled_release_count", schedules.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("upcoming_24h_count", upcoming24h.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("missed_schedule_count", "0");
eb.Add("conflict_count", conflicts.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("conflicts", string.Join("; ", conflicts.Select(c => $"{c.A.Name} vs {c.B.Name}")));
})
.WithCauses(
"Multiple releases to same environment scheduled too close",
"Manual schedule override without checking conflicts")
.WithRemediation(rb => rb
.AddStep(1, "View schedule conflicts",
"stella release schedule list --conflicts",
CommandType.Shell)
.AddStep(2, "Reschedule one of the conflicting releases",
"stella release schedule update <id> --time <new-time>",
CommandType.Manual))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (upcoming24h.Count > 0)
{
return builder
.Pass($"{upcoming24h.Count} release(s) scheduled in next 24 hours")
.WithEvidence("Release Schedule", eb =>
{
eb.Add("scheduled_release_count", schedules.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("upcoming_24h_count", upcoming24h.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("missed_schedule_count", "0");
eb.Add("conflict_count", "0");
eb.Add("upcoming_releases", string.Join(", ", upcoming24h.Select(s =>
$"{s.Name}@{s.ScheduledAt:HH:mm}")));
})
.Build();
}
return builder
.Pass("No scheduled releases or issues")
.WithEvidence("Release Schedule", eb =>
{
eb.Add("scheduled_release_count", schedules.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("upcoming_24h_count", "0");
eb.Add("missed_schedule_count", "0");
eb.Add("conflict_count", "0");
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check release schedules: {ex.Message}")
.WithEvidence("Schedule Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("error_message", ex.Message);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Schedule check timed out")
.WithEvidence("Schedule Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("connection_error_type", "timeout");
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static List<ScheduleInfo> ParseScheduledReleases(string json)
{
var schedules = new List<ScheduleInfo>();
try
{
using var doc = JsonDocument.Parse(json);
var schedulesArray = doc.RootElement.ValueKind == JsonValueKind.Array
? doc.RootElement
: doc.RootElement.TryGetProperty("schedules", out var arr) ? arr : default;
if (schedulesArray.ValueKind != JsonValueKind.Array)
return schedules;
foreach (var schedule in schedulesArray.EnumerateArray())
{
var id = schedule.TryGetProperty("id", out var idEl) ? idEl.GetString() : null;
var name = schedule.TryGetProperty("name", out var nameEl) ? nameEl.GetString() : null;
if (string.IsNullOrEmpty(id) || string.IsNullOrEmpty(name))
continue;
var scheduledAt = schedule.TryGetProperty("scheduledAt", out var timeEl) &&
DateTimeOffset.TryParse(timeEl.GetString(), out var dt) ? dt : DateTimeOffset.UtcNow;
var targetEnv = schedule.TryGetProperty("targetEnvironment", out var envEl) ? envEl.GetString() ?? "" : "";
var status = schedule.TryGetProperty("status", out var statusEl) ? statusEl.GetString() ?? "pending" : "pending";
schedules.Add(new ScheduleInfo
{
Id = id,
Name = name,
ScheduledAt = scheduledAt,
TargetEnvironment = targetEnv,
Status = status
});
}
}
catch
{
// Best effort parsing
}
return schedules;
}
private sealed record ScheduleInfo
{
public required string Id { get; init; }
public required string Name { get; init; }
public required DateTimeOffset ScheduledAt { get; init; }
public required string TargetEnvironment { get; init; }
public required string Status { get; init; }
}
}

View File

@@ -0,0 +1,331 @@
// -----------------------------------------------------------------------------
// RollbackReadinessCheck.cs
// Sprint: SPRINT_20260118_016_Doctor_release_pipeline_health
// Task: RELPIPE-007 - Implement RollbackReadinessCheck
// Description: Check rollback capabilities for environments
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Release.Checks;
/// <summary>
/// Checks rollback capabilities for environments.
/// Verifies previous deployments are available and health probes are configured.
/// </summary>
public sealed class RollbackReadinessCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.release";
private const string CategoryName = "Release Pipeline";
/// <inheritdoc />
public string CheckId => "check.release.rollback.readiness";
/// <inheritdoc />
public string Name => "Rollback Readiness";
/// <inheritdoc />
public string Description => "Check rollback capabilities for production environments";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["release", "rollback", "disaster-recovery", "production"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(10);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var releaseUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"];
return !string.IsNullOrEmpty(releaseUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var orchestratorUrl = context.Configuration["ReleaseOrchestrator:Url"]
?? context.Configuration["Release:Orchestrator:Url"]
?? "http://localhost:5080";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(10);
// Query environments with rollback status
var response = await httpClient.GetAsync(
$"{orchestratorUrl.TrimEnd('/')}/api/v1/environments/rollback-status",
ct);
if (!response.IsSuccessStatusCode)
{
// Try fallback endpoint
response = await httpClient.GetAsync(
$"{orchestratorUrl.TrimEnd('/')}/api/v1/environments",
ct);
}
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot retrieve rollback status: HTTP {(int)response.StatusCode}")
.WithEvidence("Rollback Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var statusJson = await response.Content.ReadAsStringAsync(ct);
var environments = ParseRollbackStatus(statusJson);
// Focus on production environments
var prodEnvs = environments
.Where(e => IsProd(e.Type))
.ToList();
if (prodEnvs.Count == 0)
{
return builder
.Pass("No production environments to check")
.WithEvidence("Rollback Readiness", eb =>
{
eb.Add("prod_environment_count", "0");
eb.Add("total_environment_count", environments.Count.ToString(CultureInfo.InvariantCulture));
})
.Build();
}
var cannotRollback = new List<RollbackInfo>();
var noHealthProbe = new List<RollbackInfo>();
var noPreviousVersion = new List<RollbackInfo>();
foreach (var env in prodEnvs)
{
if (!env.CanRollback)
{
if (string.IsNullOrEmpty(env.PreviousVersion))
{
noPreviousVersion.Add(env);
}
else
{
cannotRollback.Add(env);
}
}
if (!env.HasHealthProbe)
{
noHealthProbe.Add(env);
}
}
var rollbackReady = prodEnvs.Count - cannotRollback.Count - noPreviousVersion.Count;
if (cannotRollback.Count > 0)
{
return builder
.Fail($"{cannotRollback.Count} production environment(s) cannot rollback")
.WithEvidence("Rollback Readiness", eb =>
{
eb.Add("prod_environment_count", prodEnvs.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("rollback_ready_count", rollbackReady.ToString(CultureInfo.InvariantCulture));
eb.Add("cannot_rollback_count", cannotRollback.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("no_health_probe_count", noHealthProbe.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("cannot_rollback_environments", string.Join(", ", cannotRollback.Select(e => e.Name)));
if (cannotRollback.Count > 0 && !string.IsNullOrEmpty(cannotRollback[0].RollbackBlocker))
{
eb.Add("rollback_blocker", cannotRollback[0].RollbackBlocker);
}
})
.WithCauses(
"Previous deployment artifacts not retained",
"Database migration not reversible",
"Breaking change deployed",
"Rollback manually disabled")
.WithRemediation(rb =>
{
rb.AddStep(1, "View rollback blockers",
$"stella env rollback-status {cannotRollback[0].Name}",
CommandType.Shell);
rb.AddStep(2, "Check deployment history",
$"stella env history {cannotRollback[0].Name}",
CommandType.Shell);
rb.AddStep(3, "Configure artifact retention",
"stella config set Release:ArtifactRetention:Count 5",
CommandType.Manual);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (noPreviousVersion.Count > 0)
{
return builder
.Warn($"{noPreviousVersion.Count} production environment(s) have no previous version")
.WithEvidence("Rollback Readiness", eb =>
{
eb.Add("prod_environment_count", prodEnvs.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("rollback_ready_count", rollbackReady.ToString(CultureInfo.InvariantCulture));
eb.Add("no_previous_version_count", noPreviousVersion.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("no_health_probe_count", noHealthProbe.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("environments_without_previous", string.Join(", ", noPreviousVersion.Select(e => e.Name)));
})
.WithCauses(
"First deployment to environment",
"Deployment history cleared",
"Environment recently created")
.WithRemediation(rb => rb
.AddStep(1, "This is expected for new environments",
"# After the next successful deployment, rollback will be available",
CommandType.Comment))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (noHealthProbe.Count > 0)
{
return builder
.Warn($"{noHealthProbe.Count} production environment(s) missing health probes")
.WithEvidence("Rollback Readiness", eb =>
{
eb.Add("prod_environment_count", prodEnvs.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("rollback_ready_count", rollbackReady.ToString(CultureInfo.InvariantCulture));
eb.Add("no_health_probe_count", noHealthProbe.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("environments_without_probe", string.Join(", ", noHealthProbe.Select(e => e.Name)));
})
.WithCauses(
"Health probe not configured",
"Auto-rollback on failure disabled")
.WithRemediation(rb =>
{
rb.AddStep(1, "Configure health probe",
$"stella env configure {noHealthProbe[0].Name} --health-probe-url <url>",
CommandType.Manual);
rb.AddStep(2, "Enable auto-rollback",
$"stella env configure {noHealthProbe[0].Name} --auto-rollback-on-failure",
CommandType.Manual);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass($"{prodEnvs.Count} production environment(s) ready for rollback")
.WithEvidence("Rollback Readiness", eb =>
{
eb.Add("prod_environment_count", prodEnvs.Count.ToString(CultureInfo.InvariantCulture));
eb.Add("rollback_ready_count", rollbackReady.ToString(CultureInfo.InvariantCulture));
eb.Add("cannot_rollback_count", "0");
eb.Add("no_health_probe_count", "0");
eb.Add("prod_environments", string.Join(", ", prodEnvs.Select(e => $"{e.Name}:{e.CurrentVersion}")));
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check rollback readiness: {ex.Message}")
.WithEvidence("Rollback Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("error_message", ex.Message);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Rollback readiness check timed out")
.WithEvidence("Rollback Status", eb =>
{
eb.Add("orchestrator_url", orchestratorUrl);
eb.Add("connection_error_type", "timeout");
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static bool IsProd(string envType) =>
envType.Equals("prod", StringComparison.OrdinalIgnoreCase) ||
envType.Equals("production", StringComparison.OrdinalIgnoreCase);
private static List<RollbackInfo> ParseRollbackStatus(string json)
{
var envs = new List<RollbackInfo>();
try
{
using var doc = JsonDocument.Parse(json);
var envsArray = doc.RootElement.ValueKind == JsonValueKind.Array
? doc.RootElement
: doc.RootElement.TryGetProperty("environments", out var arr) ? arr : default;
if (envsArray.ValueKind != JsonValueKind.Array)
return envs;
foreach (var env in envsArray.EnumerateArray())
{
var id = env.TryGetProperty("id", out var idEl) ? idEl.GetString() : null;
var name = env.TryGetProperty("name", out var nameEl) ? nameEl.GetString() : null;
if (string.IsNullOrEmpty(id) || string.IsNullOrEmpty(name))
continue;
var type = env.TryGetProperty("type", out var typeEl) ? typeEl.GetString() ?? "unknown" : "unknown";
var canRollback = env.TryGetProperty("canRollback", out var rollbackEl) && rollbackEl.GetBoolean();
var previousVersion = env.TryGetProperty("previousVersion", out var prevEl) ? prevEl.GetString() : null;
var currentVersion = env.TryGetProperty("currentVersion", out var currEl) ? currEl.GetString() : null;
var hasHealthProbe = env.TryGetProperty("hasHealthProbe", out var probeEl) && probeEl.GetBoolean();
var rollbackBlocker = env.TryGetProperty("rollbackBlocker", out var blockerEl) ? blockerEl.GetString() : null;
envs.Add(new RollbackInfo
{
Id = id,
Name = name,
Type = type,
CanRollback = canRollback,
PreviousVersion = previousVersion,
CurrentVersion = currentVersion,
HasHealthProbe = hasHealthProbe,
RollbackBlocker = rollbackBlocker
});
}
}
catch
{
// Best effort parsing
}
return envs;
}
private sealed record RollbackInfo
{
public required string Id { get; init; }
public required string Name { get; init; }
public required string Type { get; init; }
public bool CanRollback { get; init; }
public string? PreviousVersion { get; init; }
public string? CurrentVersion { get; init; }
public bool HasHealthProbe { get; init; }
public string? RollbackBlocker { get; init; }
}
}

View File

@@ -0,0 +1,30 @@
// -----------------------------------------------------------------------------
// ReleasePluginServiceCollectionExtensions.cs
// Sprint: SPRINT_20260118_016_Doctor_release_pipeline_health
// Task: RELPIPE-001 - Create Release plugin scaffold
// Description: Extension methods for registering the Release plugin
// -----------------------------------------------------------------------------
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Release.DependencyInjection;
/// <summary>
/// Extension methods for registering the Release Doctor plugin.
/// </summary>
public static class ReleasePluginServiceCollectionExtensions
{
/// <summary>
/// Adds the Release pipeline health Doctor plugin.
/// Provides checks for active releases, promotion gates, environment readiness,
/// release schedules, configuration validation, and rollback readiness.
/// </summary>
/// <param name="services">The service collection.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddDoctorReleasePlugin(this IServiceCollection services)
{
services.AddSingleton<IDoctorPlugin, ReleaseDoctorPlugin>();
return services;
}
}

View File

@@ -0,0 +1,65 @@
// -----------------------------------------------------------------------------
// ReleaseDoctorPlugin.cs
// Sprint: SPRINT_20260118_016_Doctor_release_pipeline_health
// Task: RELPIPE-001 - Create Release plugin scaffold
// Description: Doctor plugin for release pipeline health monitoring
// -----------------------------------------------------------------------------
using StellaOps.Doctor.Plugin.Release.Checks;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Release;
/// <summary>
/// Doctor plugin for release pipeline health checks.
/// Monitors active releases, promotion gates, environment readiness, and rollback capabilities.
/// </summary>
public sealed class ReleaseDoctorPlugin : IDoctorPlugin
{
private static readonly Version PluginVersion = new(1, 0, 0);
private static readonly Version MinVersion = new(1, 0, 0);
/// <inheritdoc />
public string PluginId => "stellaops.doctor.release";
/// <inheritdoc />
public string DisplayName => "Release Pipeline";
/// <inheritdoc />
public DoctorCategory Category => DoctorCategory.Release;
/// <inheritdoc />
public Version Version => PluginVersion;
/// <inheritdoc />
public Version MinEngineVersion => MinVersion;
/// <inheritdoc />
public bool IsAvailable(IServiceProvider services)
{
// Available when ReleaseOrchestrator service is configured
// Individual checks handle their own availability based on configuration
return true;
}
/// <inheritdoc />
public IReadOnlyList<IDoctorCheck> GetChecks(DoctorPluginContext context)
{
return new IDoctorCheck[]
{
new ActiveReleaseHealthCheck(),
new PromotionGateHealthCheck(),
new EnvironmentReadinessCheck(),
new ReleaseScheduleHealthCheck(),
new ReleaseConfigurationCheck(),
new RollbackReadinessCheck()
};
}
/// <inheritdoc />
public Task InitializeAsync(DoctorPluginContext context, CancellationToken ct)
{
// No initialization required - checks are stateless
return Task.CompletedTask;
}
}

View File

@@ -0,0 +1,145 @@
// -----------------------------------------------------------------------------
// IReleaseHealthClient.cs
// Sprint: SPRINT_20260118_016_Doctor_release_pipeline_health
// Task: RELPIPE-001 - Create Release plugin scaffold
// Description: Interface for querying release orchestrator health status
// -----------------------------------------------------------------------------
namespace StellaOps.Doctor.Plugin.Release.Services;
/// <summary>
/// Client interface for querying release orchestrator health status.
/// </summary>
public interface IReleaseHealthClient
{
/// <summary>
/// Gets all currently active releases.
/// </summary>
Task<IReadOnlyList<ActiveRelease>> GetActiveReleasesAsync(CancellationToken ct = default);
/// <summary>
/// Gets releases that are stuck or have issues.
/// </summary>
Task<IReadOnlyList<StuckRelease>> GetStuckReleasesAsync(TimeSpan stuckThreshold, CancellationToken ct = default);
/// <summary>
/// Gets releases awaiting approval.
/// </summary>
Task<IReadOnlyList<PendingApproval>> GetPendingApprovalsAsync(CancellationToken ct = default);
/// <summary>
/// Gets configured environments and their status.
/// </summary>
Task<IReadOnlyList<EnvironmentStatus>> GetEnvironmentStatusesAsync(CancellationToken ct = default);
/// <summary>
/// Gets promotion gate configurations.
/// </summary>
Task<IReadOnlyList<PromotionGate>> GetPromotionGatesAsync(CancellationToken ct = default);
/// <summary>
/// Gets scheduled releases.
/// </summary>
Task<IReadOnlyList<ScheduledRelease>> GetScheduledReleasesAsync(CancellationToken ct = default);
/// <summary>
/// Gets rollback capabilities for an environment.
/// </summary>
Task<RollbackStatus> GetRollbackStatusAsync(string environmentId, CancellationToken ct = default);
}
/// <summary>
/// Represents an active release.
/// </summary>
public sealed record ActiveRelease
{
public required string Id { get; init; }
public required string Name { get; init; }
public required string State { get; init; }
public required DateTimeOffset StartedAt { get; init; }
public string? CurrentStep { get; init; }
public string? TargetEnvironment { get; init; }
public string? Error { get; init; }
}
/// <summary>
/// Represents a release that is stuck or has exceeded expected duration.
/// </summary>
public sealed record StuckRelease
{
public required string Id { get; init; }
public required string Name { get; init; }
public required string State { get; init; }
public required TimeSpan StuckDuration { get; init; }
public string? FailedStep { get; init; }
public string? Error { get; init; }
}
/// <summary>
/// Represents a release awaiting approval.
/// </summary>
public sealed record PendingApproval
{
public required string ReleaseId { get; init; }
public required string ReleaseName { get; init; }
public required string ApprovalGate { get; init; }
public required DateTimeOffset RequestedAt { get; init; }
public IReadOnlyList<string> RequiredApprovers { get; init; } = [];
public IReadOnlyList<string> ReceivedApprovals { get; init; } = [];
}
/// <summary>
/// Represents the status of a target environment.
/// </summary>
public sealed record EnvironmentStatus
{
public required string Id { get; init; }
public required string Name { get; init; }
public required string Type { get; init; } // dev, staging, prod
public required bool IsReachable { get; init; }
public required bool IsHealthy { get; init; }
public string? CurrentVersion { get; init; }
public string? Error { get; init; }
public DateTimeOffset? LastHealthCheck { get; init; }
}
/// <summary>
/// Represents a promotion gate configuration.
/// </summary>
public sealed record PromotionGate
{
public required string Id { get; init; }
public required string Name { get; init; }
public required string SourceEnvironment { get; init; }
public required string TargetEnvironment { get; init; }
public required bool RequiresApproval { get; init; }
public required bool RequiresAttestations { get; init; }
public required bool RequiresPolicyPass { get; init; }
public IReadOnlyList<string> RequiredPolicies { get; init; } = [];
public IReadOnlyList<string> RequiredAttestations { get; init; } = [];
}
/// <summary>
/// Represents a scheduled release.
/// </summary>
public sealed record ScheduledRelease
{
public required string Id { get; init; }
public required string Name { get; init; }
public required DateTimeOffset ScheduledAt { get; init; }
public required string TargetEnvironment { get; init; }
public string? Status { get; init; } // pending, confirmed, cancelled
}
/// <summary>
/// Represents rollback capabilities for an environment.
/// </summary>
public sealed record RollbackStatus
{
public required string EnvironmentId { get; init; }
public required bool CanRollback { get; init; }
public string? PreviousVersion { get; init; }
public DateTimeOffset? PreviousDeployedAt { get; init; }
public bool HasHealthProbe { get; init; }
public string? RollbackBlocker { get; init; }
}

View File

@@ -0,0 +1,17 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<RootNamespace>StellaOps.Doctor.Plugin.Release</RootNamespace>
<Description>Release pipeline health checks for Stella Ops Doctor diagnostics</Description>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Doctor\StellaOps.Doctor.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,233 @@
// -----------------------------------------------------------------------------
// ReachabilityComputationHealthCheck.cs
// Sprint: SPRINT_20260118_019_Doctor_scanner_reachability_health
// Task: SCAN-007 - Implement ReachabilityComputationHealthCheck
// Description: Monitor reachability computation health
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Scanner.Checks;
/// <summary>
/// Monitors reachability computation health.
/// Checks computation success rates, performance, and accuracy.
/// </summary>
public sealed class ReachabilityComputationHealthCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.scanner";
private const string CategoryName = "Scanner & Reachability";
private const int ComputationTimeWarningMs = 5000;
private const int ComputationTimeCriticalMs = 30000;
/// <inheritdoc />
public string CheckId => "check.scanner.reachability";
/// <inheritdoc />
public string Name => "Reachability Computation Health";
/// <inheritdoc />
public string Description => "Monitor reachability analysis performance and accuracy";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["scanner", "reachability", "analysis", "performance"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var scannerUrl = context.Configuration["Scanner:Url"]
?? context.Configuration["Services:Scanner:Url"];
return !string.IsNullOrEmpty(scannerUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var scannerUrl = context.Configuration["Scanner:Url"]
?? context.Configuration["Services:Scanner:Url"]
?? "http://localhost:5090";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(10);
var response = await httpClient.GetAsync(
$"{scannerUrl.TrimEnd('/')}/api/v1/reachability/stats",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot retrieve reachability stats: HTTP {(int)response.StatusCode}")
.WithEvidence("Reachability Status", eb =>
{
eb.Add("scanner_url", scannerUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var json = await response.Content.ReadAsStringAsync(ct);
var stats = ParseReachabilityStats(json);
// Check for computation failures
if (stats.ComputationFailures > 0)
{
var failureRate = stats.TotalComputations > 0
? (double)stats.ComputationFailures / stats.TotalComputations
: 1.0;
if (failureRate > 0.1)
{
return builder
.Fail($"Reachability computation failures: {stats.ComputationFailures} ({failureRate:P0})")
.WithEvidence("Reachability", eb =>
{
eb.Add("total_computations", stats.TotalComputations.ToString(CultureInfo.InvariantCulture));
eb.Add("computation_failures", stats.ComputationFailures.ToString(CultureInfo.InvariantCulture));
eb.Add("failure_rate", failureRate.ToString("P1", CultureInfo.InvariantCulture));
eb.Add("avg_computation_time_ms", stats.AvgComputationTimeMs.ToString(CultureInfo.InvariantCulture));
})
.WithCauses(
"Invalid call graph",
"Missing slice data",
"Timeout on large codebases",
"Memory exhaustion")
.WithRemediation(rb =>
{
rb.AddStep(1, "View computation errors",
"stella scanner reachability failures --recent",
CommandType.Shell);
rb.AddStep(2, "Retry failed computations",
"stella scanner reachability retry --failed",
CommandType.Manual);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
// Check computation time
if (stats.AvgComputationTimeMs >= ComputationTimeCriticalMs)
{
return builder
.Fail($"Reachability computation critically slow: {stats.AvgComputationTimeMs}ms avg")
.WithEvidence("Reachability", eb =>
{
eb.Add("avg_computation_time_ms", stats.AvgComputationTimeMs.ToString(CultureInfo.InvariantCulture));
eb.Add("p95_computation_time_ms", stats.P95ComputationTimeMs.ToString(CultureInfo.InvariantCulture));
eb.Add("total_computations", stats.TotalComputations.ToString(CultureInfo.InvariantCulture));
})
.WithCauses(
"Large codebases",
"Complex call graphs",
"Insufficient resources",
"Cache misses")
.WithRemediation(rb =>
{
rb.AddStep(1, "Warm slice cache",
"stella scanner cache warm",
CommandType.Manual);
rb.AddStep(2, "Scale workers",
"stella scanner workers scale --replicas 4",
CommandType.Manual);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (stats.AvgComputationTimeMs >= ComputationTimeWarningMs)
{
return builder
.Warn($"Reachability computation slow: {stats.AvgComputationTimeMs}ms avg")
.WithEvidence("Reachability", eb =>
{
eb.Add("avg_computation_time_ms", stats.AvgComputationTimeMs.ToString(CultureInfo.InvariantCulture));
eb.Add("total_computations", stats.TotalComputations.ToString(CultureInfo.InvariantCulture));
eb.Add("reachable_vulns", stats.ReachableVulns.ToString(CultureInfo.InvariantCulture));
eb.Add("unreachable_vulns", stats.UnreachableVulns.ToString(CultureInfo.InvariantCulture));
})
.WithCauses("Performance optimization needed")
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
// Calculate vulnerability filtering effectiveness
var totalVulns = stats.ReachableVulns + stats.UnreachableVulns;
var filterRate = totalVulns > 0 ? (double)stats.UnreachableVulns / totalVulns : 0;
return builder
.Pass($"Reachability healthy ({stats.AvgComputationTimeMs}ms avg, {filterRate:P0} filtered)")
.WithEvidence("Reachability", eb =>
{
eb.Add("total_computations", stats.TotalComputations.ToString(CultureInfo.InvariantCulture));
eb.Add("avg_computation_time_ms", stats.AvgComputationTimeMs.ToString(CultureInfo.InvariantCulture));
eb.Add("reachable_vulns", stats.ReachableVulns.ToString(CultureInfo.InvariantCulture));
eb.Add("unreachable_vulns", stats.UnreachableVulns.ToString(CultureInfo.InvariantCulture));
eb.Add("filter_rate", filterRate.ToString("P0", CultureInfo.InvariantCulture));
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check reachability health: {ex.Message}")
.WithEvidence("Reachability Status", eb => eb.Add("error_message", ex.Message))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Reachability health check timed out")
.WithEvidence("Reachability Status", eb => eb.Add("connection_error_type", "timeout"))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static ReachabilityStats ParseReachabilityStats(string json)
{
var stats = new ReachabilityStats();
try
{
using var doc = JsonDocument.Parse(json);
stats.TotalComputations = doc.RootElement.TryGetProperty("totalComputations", out var tc) ? tc.GetInt32() : 0;
stats.ComputationFailures = doc.RootElement.TryGetProperty("computationFailures", out var cf) ? cf.GetInt32() : 0;
stats.AvgComputationTimeMs = doc.RootElement.TryGetProperty("avgComputationTimeMs", out var act) ? act.GetInt32() : 0;
stats.P95ComputationTimeMs = doc.RootElement.TryGetProperty("p95ComputationTimeMs", out var p95) ? p95.GetInt32() : 0;
stats.ReachableVulns = doc.RootElement.TryGetProperty("reachableVulns", out var rv) ? rv.GetInt32() : 0;
stats.UnreachableVulns = doc.RootElement.TryGetProperty("unreachableVulns", out var uv) ? uv.GetInt32() : 0;
}
catch { }
return stats;
}
private sealed class ReachabilityStats
{
public int TotalComputations { get; set; }
public int ComputationFailures { get; set; }
public int AvgComputationTimeMs { get; set; }
public int P95ComputationTimeMs { get; set; }
public int ReachableVulns { get; set; }
public int UnreachableVulns { get; set; }
}
}

View File

@@ -0,0 +1,201 @@
// -----------------------------------------------------------------------------
// SbomGenerationHealthCheck.cs
// Sprint: SPRINT_20260118_019_Doctor_scanner_reachability_health
// Task: SCAN-003 - Implement SbomGenerationHealthCheck
// Description: Monitor SBOM generation health
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Scanner.Checks;
/// <summary>
/// Monitors SBOM generation health.
/// Checks generation success rates, format compliance, and freshness.
/// </summary>
public sealed class SbomGenerationHealthCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.scanner";
private const string CategoryName = "Scanner & Reachability";
private const double SuccessRateWarning = 0.95;
private const double SuccessRateCritical = 0.80;
/// <inheritdoc />
public string CheckId => "check.scanner.sbom";
/// <inheritdoc />
public string Name => "SBOM Generation Health";
/// <inheritdoc />
public string Description => "Monitor SBOM generation health and compliance";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["scanner", "sbom", "cyclonedx", "spdx", "compliance"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var scannerUrl = context.Configuration["Scanner:Url"]
?? context.Configuration["Services:Scanner:Url"];
return !string.IsNullOrEmpty(scannerUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var scannerUrl = context.Configuration["Scanner:Url"]
?? context.Configuration["Services:Scanner:Url"]
?? "http://localhost:5090";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(10);
var response = await httpClient.GetAsync(
$"{scannerUrl.TrimEnd('/')}/api/v1/sbom/stats",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot retrieve SBOM stats: HTTP {(int)response.StatusCode}")
.WithEvidence("SBOM Status", eb =>
{
eb.Add("scanner_url", scannerUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var json = await response.Content.ReadAsStringAsync(ct);
var stats = ParseSbomStats(json);
var successRate = stats.TotalGenerated > 0
? (double)stats.SuccessfulGenerations / stats.TotalGenerated
: 1.0;
if (successRate < SuccessRateCritical)
{
return builder
.Fail($"SBOM generation success rate critical: {successRate:P0}")
.WithEvidence("SBOM Generation", eb =>
{
eb.Add("total_generated", stats.TotalGenerated.ToString(CultureInfo.InvariantCulture));
eb.Add("successful_generations", stats.SuccessfulGenerations.ToString(CultureInfo.InvariantCulture));
eb.Add("failed_generations", stats.FailedGenerations.ToString(CultureInfo.InvariantCulture));
eb.Add("success_rate", successRate.ToString("P1", CultureInfo.InvariantCulture));
eb.Add("format_cyclonedx", stats.CycloneDxCount.ToString(CultureInfo.InvariantCulture));
eb.Add("format_spdx", stats.SpdxCount.ToString(CultureInfo.InvariantCulture));
eb.Add("validation_failures", stats.ValidationFailures.ToString(CultureInfo.InvariantCulture));
})
.WithCauses(
"Invalid source artifacts",
"Parser errors for specific ecosystems",
"Memory exhaustion on large projects",
"SBOM schema validation failing")
.WithRemediation(rb =>
{
rb.AddStep(1, "View recent failures",
"stella scanner sbom failures --recent",
CommandType.Shell);
rb.AddStep(2, "Retry failed SBOMs",
"stella scanner sbom retry --failed",
CommandType.Manual);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (successRate < SuccessRateWarning || stats.ValidationFailures > 0)
{
var issues = new List<string>();
if (successRate < SuccessRateWarning) issues.Add($"success rate {successRate:P0}");
if (stats.ValidationFailures > 0) issues.Add($"{stats.ValidationFailures} validation failures");
return builder
.Warn($"SBOM generation issues: {string.Join(", ", issues)}")
.WithEvidence("SBOM Generation", eb =>
{
eb.Add("total_generated", stats.TotalGenerated.ToString(CultureInfo.InvariantCulture));
eb.Add("success_rate", successRate.ToString("P1", CultureInfo.InvariantCulture));
eb.Add("validation_failures", stats.ValidationFailures.ToString(CultureInfo.InvariantCulture));
})
.WithCauses("Minor parsing issues", "Occasional format errors")
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass($"SBOM generation healthy ({stats.TotalGenerated} generated, {successRate:P0} success)")
.WithEvidence("SBOM Generation", eb =>
{
eb.Add("total_generated", stats.TotalGenerated.ToString(CultureInfo.InvariantCulture));
eb.Add("success_rate", successRate.ToString("P1", CultureInfo.InvariantCulture));
eb.Add("format_cyclonedx", stats.CycloneDxCount.ToString(CultureInfo.InvariantCulture));
eb.Add("format_spdx", stats.SpdxCount.ToString(CultureInfo.InvariantCulture));
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check SBOM health: {ex.Message}")
.WithEvidence("SBOM Status", eb => eb.Add("error_message", ex.Message))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("SBOM health check timed out")
.WithEvidence("SBOM Status", eb => eb.Add("connection_error_type", "timeout"))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static SbomStats ParseSbomStats(string json)
{
var stats = new SbomStats();
try
{
using var doc = JsonDocument.Parse(json);
stats.TotalGenerated = doc.RootElement.TryGetProperty("totalGenerated", out var tg) ? tg.GetInt32() : 0;
stats.SuccessfulGenerations = doc.RootElement.TryGetProperty("successfulGenerations", out var sg) ? sg.GetInt32() : 0;
stats.FailedGenerations = doc.RootElement.TryGetProperty("failedGenerations", out var fg) ? fg.GetInt32() : 0;
stats.CycloneDxCount = doc.RootElement.TryGetProperty("cycloneDxCount", out var cdx) ? cdx.GetInt32() : 0;
stats.SpdxCount = doc.RootElement.TryGetProperty("spdxCount", out var spdx) ? spdx.GetInt32() : 0;
stats.ValidationFailures = doc.RootElement.TryGetProperty("validationFailures", out var vf) ? vf.GetInt32() : 0;
}
catch { }
return stats;
}
private sealed class SbomStats
{
public int TotalGenerated { get; set; }
public int SuccessfulGenerations { get; set; }
public int FailedGenerations { get; set; }
public int CycloneDxCount { get; set; }
public int SpdxCount { get; set; }
public int ValidationFailures { get; set; }
}
}

View File

@@ -0,0 +1,232 @@
// -----------------------------------------------------------------------------
// ScannerQueueHealthCheck.cs
// Sprint: SPRINT_20260118_019_Doctor_scanner_reachability_health
// Task: SCAN-002 - Implement ScannerQueueHealthCheck
// Description: Monitor scanner job queue health
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Scanner.Checks;
/// <summary>
/// Monitors scanner job queue health.
/// Checks queue depth, processing rate, stuck jobs, and backlog growth.
/// </summary>
public sealed class ScannerQueueHealthCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.scanner";
private const string CategoryName = "Scanner & Reachability";
private const int QueueDepthWarning = 100;
private const int QueueDepthCritical = 500;
private const double FailureRateWarning = 0.05;
private const double FailureRateCritical = 0.15;
/// <inheritdoc />
public string CheckId => "check.scanner.queue";
/// <inheritdoc />
public string Name => "Scanner Queue Health";
/// <inheritdoc />
public string Description => "Monitor scanner job queue health";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["scanner", "queue", "jobs", "processing"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var scannerUrl = context.Configuration["Scanner:Url"]
?? context.Configuration["Services:Scanner:Url"];
return !string.IsNullOrEmpty(scannerUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var scannerUrl = context.Configuration["Scanner:Url"]
?? context.Configuration["Services:Scanner:Url"]
?? "http://localhost:5090";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(10);
var response = await httpClient.GetAsync(
$"{scannerUrl.TrimEnd('/')}/api/v1/queue/stats",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot retrieve queue stats: HTTP {(int)response.StatusCode}")
.WithEvidence("Queue Status", eb =>
{
eb.Add("scanner_url", scannerUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var json = await response.Content.ReadAsStringAsync(ct);
var stats = ParseQueueStats(json);
var failureRate = stats.TotalProcessed > 0
? (double)stats.FailedJobs / stats.TotalProcessed
: 0;
// Check for critical conditions
if (stats.StuckJobs > 0)
{
return builder
.Fail($"{stats.StuckJobs} stuck job(s) in queue")
.WithEvidence("Queue", eb =>
{
eb.Add("queue_depth", stats.QueueDepth.ToString(CultureInfo.InvariantCulture));
eb.Add("processing_rate_per_min", stats.ProcessingRatePerMin.ToString("F1", CultureInfo.InvariantCulture));
eb.Add("stuck_jobs", stats.StuckJobs.ToString(CultureInfo.InvariantCulture));
eb.Add("failed_jobs", stats.FailedJobs.ToString(CultureInfo.InvariantCulture));
eb.Add("failure_rate", failureRate.ToString("P1", CultureInfo.InvariantCulture));
eb.Add("oldest_job_age_min", stats.OldestJobAgeMinutes.ToString(CultureInfo.InvariantCulture));
})
.WithCauses(
"Scanner worker crashed",
"Job dependency unavailable",
"Resource exhaustion",
"Database connection lost")
.WithRemediation(rb =>
{
rb.AddStep(1, "View stuck jobs",
"stella scanner queue list --status stuck",
CommandType.Shell);
rb.AddStep(2, "Retry stuck jobs",
"stella scanner queue retry --stuck",
CommandType.Shell);
rb.AddStep(3, "Check worker status",
"stella scanner workers status",
CommandType.Shell);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (stats.QueueDepth >= QueueDepthCritical || failureRate >= FailureRateCritical)
{
var issues = new List<string>();
if (stats.QueueDepth >= QueueDepthCritical) issues.Add($"queue depth {stats.QueueDepth}");
if (failureRate >= FailureRateCritical) issues.Add($"failure rate {failureRate:P0}");
return builder
.Fail($"Scanner queue critical: {string.Join(", ", issues)}")
.WithEvidence("Queue", eb =>
{
eb.Add("queue_depth", stats.QueueDepth.ToString(CultureInfo.InvariantCulture));
eb.Add("processing_rate_per_min", stats.ProcessingRatePerMin.ToString("F1", CultureInfo.InvariantCulture));
eb.Add("failure_rate", failureRate.ToString("P1", CultureInfo.InvariantCulture));
eb.Add("backlog_growing", stats.BacklogGrowing.ToString().ToLowerInvariant());
})
.WithCauses("High volume", "Workers overwhelmed", "High error rate")
.WithRemediation(rb => rb
.AddStep(1, "Scale workers", "stella scanner workers scale --replicas 4", CommandType.Manual))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (stats.QueueDepth >= QueueDepthWarning || failureRate >= FailureRateWarning || stats.BacklogGrowing)
{
var issues = new List<string>();
if (stats.QueueDepth >= QueueDepthWarning) issues.Add($"queue depth {stats.QueueDepth}");
if (failureRate >= FailureRateWarning) issues.Add($"failure rate {failureRate:P0}");
if (stats.BacklogGrowing) issues.Add("backlog growing");
return builder
.Warn($"Scanner queue warning: {string.Join(", ", issues)}")
.WithEvidence("Queue", eb =>
{
eb.Add("queue_depth", stats.QueueDepth.ToString(CultureInfo.InvariantCulture));
eb.Add("processing_rate_per_min", stats.ProcessingRatePerMin.ToString("F1", CultureInfo.InvariantCulture));
eb.Add("failure_rate", failureRate.ToString("P1", CultureInfo.InvariantCulture));
eb.Add("backlog_growing", stats.BacklogGrowing.ToString().ToLowerInvariant());
})
.WithCauses("Processing slower than ingest", "Temporary spike")
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass($"Scanner queue healthy ({stats.QueueDepth} pending, {stats.ProcessingRatePerMin:F0}/min)")
.WithEvidence("Queue", eb =>
{
eb.Add("queue_depth", stats.QueueDepth.ToString(CultureInfo.InvariantCulture));
eb.Add("processing_rate_per_min", stats.ProcessingRatePerMin.ToString("F1", CultureInfo.InvariantCulture));
eb.Add("failure_rate", failureRate.ToString("P1", CultureInfo.InvariantCulture));
eb.Add("backlog_growing", "false");
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check scanner queue: {ex.Message}")
.WithEvidence("Queue Status", eb => eb.Add("error_message", ex.Message))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Scanner queue check timed out")
.WithEvidence("Queue Status", eb => eb.Add("connection_error_type", "timeout"))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static QueueStats ParseQueueStats(string json)
{
var stats = new QueueStats();
try
{
using var doc = JsonDocument.Parse(json);
stats.QueueDepth = doc.RootElement.TryGetProperty("queueDepth", out var qd) ? qd.GetInt32() : 0;
stats.ProcessingRatePerMin = doc.RootElement.TryGetProperty("processingRatePerMin", out var pr) ? pr.GetDouble() : 0;
stats.StuckJobs = doc.RootElement.TryGetProperty("stuckJobs", out var sj) ? sj.GetInt32() : 0;
stats.FailedJobs = doc.RootElement.TryGetProperty("failedJobs", out var fj) ? fj.GetInt32() : 0;
stats.TotalProcessed = doc.RootElement.TryGetProperty("totalProcessed", out var tp) ? tp.GetInt32() : 1;
stats.BacklogGrowing = doc.RootElement.TryGetProperty("backlogGrowing", out var bg) && bg.GetBoolean();
stats.OldestJobAgeMinutes = doc.RootElement.TryGetProperty("oldestJobAgeMinutes", out var oja) ? oja.GetInt32() : 0;
}
catch { }
return stats;
}
private sealed class QueueStats
{
public int QueueDepth { get; set; }
public double ProcessingRatePerMin { get; set; }
public int StuckJobs { get; set; }
public int FailedJobs { get; set; }
public int TotalProcessed { get; set; } = 1;
public bool BacklogGrowing { get; set; }
public int OldestJobAgeMinutes { get; set; }
}
}

View File

@@ -0,0 +1,224 @@
// -----------------------------------------------------------------------------
// ScannerResourceUtilizationCheck.cs
// Sprint: SPRINT_20260118_019_Doctor_scanner_reachability_health
// Task: SCAN-008 - Implement ScannerResourceUtilizationCheck
// Description: Monitor scanner resource utilization
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Scanner.Checks;
/// <summary>
/// Monitors scanner resource utilization.
/// Checks CPU, memory, and worker pool health.
/// </summary>
public sealed class ScannerResourceUtilizationCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.scanner";
private const string CategoryName = "Scanner & Reachability";
private const double CpuWarning = 0.75;
private const double CpuCritical = 0.90;
private const double MemoryWarning = 0.80;
private const double MemoryCritical = 0.95;
/// <inheritdoc />
public string CheckId => "check.scanner.resources";
/// <inheritdoc />
public string Name => "Scanner Resource Utilization";
/// <inheritdoc />
public string Description => "Monitor scanner CPU, memory, and worker health";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["scanner", "resources", "cpu", "memory", "workers"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var scannerUrl = context.Configuration["Scanner:Url"]
?? context.Configuration["Services:Scanner:Url"];
return !string.IsNullOrEmpty(scannerUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var scannerUrl = context.Configuration["Scanner:Url"]
?? context.Configuration["Services:Scanner:Url"]
?? "http://localhost:5090";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(10);
var response = await httpClient.GetAsync(
$"{scannerUrl.TrimEnd('/')}/api/v1/resources/stats",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot retrieve resource stats: HTTP {(int)response.StatusCode}")
.WithEvidence("Resource Status", eb =>
{
eb.Add("scanner_url", scannerUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var json = await response.Content.ReadAsStringAsync(ct);
var stats = ParseResourceStats(json);
// Check for critical resource issues
if (stats.CpuUtilization >= CpuCritical || stats.MemoryUtilization >= MemoryCritical)
{
var issues = new List<string>();
if (stats.CpuUtilization >= CpuCritical) issues.Add($"CPU {stats.CpuUtilization:P0}");
if (stats.MemoryUtilization >= MemoryCritical) issues.Add($"Memory {stats.MemoryUtilization:P0}");
return builder
.Fail($"Scanner resources critical: {string.Join(", ", issues)}")
.WithEvidence("Resources", eb =>
{
eb.Add("cpu_utilization", stats.CpuUtilization.ToString("P0", CultureInfo.InvariantCulture));
eb.Add("memory_utilization", stats.MemoryUtilization.ToString("P0", CultureInfo.InvariantCulture));
eb.Add("memory_used_mb", (stats.MemoryUsedBytes / 1024 / 1024).ToString(CultureInfo.InvariantCulture));
eb.Add("active_workers", stats.ActiveWorkers.ToString(CultureInfo.InvariantCulture));
eb.Add("total_workers", stats.TotalWorkers.ToString(CultureInfo.InvariantCulture));
eb.Add("idle_workers", stats.IdleWorkers.ToString(CultureInfo.InvariantCulture));
})
.WithCauses(
"High scan volume",
"Memory leak",
"Large artifacts being processed",
"Insufficient resources allocated")
.WithRemediation(rb =>
{
rb.AddStep(1, "Scale scanner resources",
"stella scanner scale --memory +2G --cpu +2",
CommandType.Manual);
rb.AddStep(2, "Reduce concurrent jobs",
"stella scanner config set MaxConcurrentJobs 2",
CommandType.Manual);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
// Check worker pool health
if (stats.TotalWorkers > 0 && stats.ActiveWorkers == stats.TotalWorkers && stats.IdleWorkers == 0)
{
return builder
.Warn("All scanner workers are busy")
.WithEvidence("Resources", eb =>
{
eb.Add("active_workers", stats.ActiveWorkers.ToString(CultureInfo.InvariantCulture));
eb.Add("total_workers", stats.TotalWorkers.ToString(CultureInfo.InvariantCulture));
eb.Add("idle_workers", "0");
eb.Add("cpu_utilization", stats.CpuUtilization.ToString("P0", CultureInfo.InvariantCulture));
})
.WithCauses("High demand", "Consider scaling")
.WithRemediation(rb => rb
.AddStep(1, "Scale workers", "stella scanner workers scale --replicas 4", CommandType.Manual))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
// Check for warning-level resource usage
if (stats.CpuUtilization >= CpuWarning || stats.MemoryUtilization >= MemoryWarning)
{
var issues = new List<string>();
if (stats.CpuUtilization >= CpuWarning) issues.Add($"CPU {stats.CpuUtilization:P0}");
if (stats.MemoryUtilization >= MemoryWarning) issues.Add($"Memory {stats.MemoryUtilization:P0}");
return builder
.Warn($"Scanner resource usage elevated: {string.Join(", ", issues)}")
.WithEvidence("Resources", eb =>
{
eb.Add("cpu_utilization", stats.CpuUtilization.ToString("P0", CultureInfo.InvariantCulture));
eb.Add("memory_utilization", stats.MemoryUtilization.ToString("P0", CultureInfo.InvariantCulture));
eb.Add("active_workers", stats.ActiveWorkers.ToString(CultureInfo.InvariantCulture));
})
.WithCauses("Approaching limits")
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass($"Scanner resources healthy (CPU {stats.CpuUtilization:P0}, Memory {stats.MemoryUtilization:P0})")
.WithEvidence("Resources", eb =>
{
eb.Add("cpu_utilization", stats.CpuUtilization.ToString("P0", CultureInfo.InvariantCulture));
eb.Add("memory_utilization", stats.MemoryUtilization.ToString("P0", CultureInfo.InvariantCulture));
eb.Add("active_workers", stats.ActiveWorkers.ToString(CultureInfo.InvariantCulture));
eb.Add("total_workers", stats.TotalWorkers.ToString(CultureInfo.InvariantCulture));
eb.Add("idle_workers", stats.IdleWorkers.ToString(CultureInfo.InvariantCulture));
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check scanner resources: {ex.Message}")
.WithEvidence("Resource Status", eb => eb.Add("error_message", ex.Message))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Scanner resource check timed out")
.WithEvidence("Resource Status", eb => eb.Add("connection_error_type", "timeout"))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static ResourceStats ParseResourceStats(string json)
{
var stats = new ResourceStats();
try
{
using var doc = JsonDocument.Parse(json);
stats.CpuUtilization = doc.RootElement.TryGetProperty("cpuUtilization", out var cpu) ? cpu.GetDouble() : 0;
stats.MemoryUtilization = doc.RootElement.TryGetProperty("memoryUtilization", out var mem) ? mem.GetDouble() : 0;
stats.MemoryUsedBytes = doc.RootElement.TryGetProperty("memoryUsedBytes", out var mub) ? mub.GetInt64() : 0;
stats.TotalWorkers = doc.RootElement.TryGetProperty("totalWorkers", out var tw) ? tw.GetInt32() : 0;
stats.ActiveWorkers = doc.RootElement.TryGetProperty("activeWorkers", out var aw) ? aw.GetInt32() : 0;
stats.IdleWorkers = doc.RootElement.TryGetProperty("idleWorkers", out var iw) ? iw.GetInt32() : 0;
}
catch { }
return stats;
}
private sealed class ResourceStats
{
public double CpuUtilization { get; set; }
public double MemoryUtilization { get; set; }
public long MemoryUsedBytes { get; set; }
public int TotalWorkers { get; set; }
public int ActiveWorkers { get; set; }
public int IdleWorkers { get; set; }
}
}

View File

@@ -0,0 +1,234 @@
// -----------------------------------------------------------------------------
// SliceCacheHealthCheck.cs
// Sprint: SPRINT_20260118_019_Doctor_scanner_reachability_health
// Task: SCAN-006 - Implement SliceCacheHealthCheck
// Description: Monitor slice cache health
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Scanner.Checks;
/// <summary>
/// Monitors slice cache health.
/// Checks cache hit rates, eviction rates, and storage utilization.
/// </summary>
public sealed class SliceCacheHealthCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.scanner";
private const string CategoryName = "Scanner & Reachability";
private const double HitRateWarning = 0.50;
private const double HitRateCritical = 0.20;
private const double StorageWarning = 0.80;
private const double StorageCritical = 0.95;
/// <inheritdoc />
public string CheckId => "check.scanner.slice.cache";
/// <inheritdoc />
public string Name => "Slice Cache Health";
/// <inheritdoc />
public string Description => "Monitor slice cache hit rates and storage";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["scanner", "cache", "slice", "performance"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var scannerUrl = context.Configuration["Scanner:Url"]
?? context.Configuration["Services:Scanner:Url"];
return !string.IsNullOrEmpty(scannerUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var scannerUrl = context.Configuration["Scanner:Url"]
?? context.Configuration["Services:Scanner:Url"]
?? "http://localhost:5090";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(10);
var response = await httpClient.GetAsync(
$"{scannerUrl.TrimEnd('/')}/api/v1/cache/stats",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot retrieve slice cache stats: HTTP {(int)response.StatusCode}")
.WithEvidence("Slice Cache Status", eb =>
{
eb.Add("scanner_url", scannerUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var json = await response.Content.ReadAsStringAsync(ct);
var stats = ParseCacheStats(json);
// Check storage utilization first
if (stats.StorageUtilization >= StorageCritical)
{
return builder
.Fail($"Slice cache storage critical: {stats.StorageUtilization:P0} full")
.WithEvidence("Slice Cache", eb =>
{
eb.Add("storage_utilization", stats.StorageUtilization.ToString("P0", CultureInfo.InvariantCulture));
eb.Add("used_bytes", stats.UsedBytes.ToString(CultureInfo.InvariantCulture));
eb.Add("total_bytes", stats.TotalBytes.ToString(CultureInfo.InvariantCulture));
eb.Add("entry_count", stats.EntryCount.ToString(CultureInfo.InvariantCulture));
eb.Add("eviction_rate", stats.EvictionRatePerHour.ToString("F1", CultureInfo.InvariantCulture));
})
.WithCauses(
"Cache size limit too small",
"TTL too long",
"Eviction not working",
"Unexpected growth in slices")
.WithRemediation(rb =>
{
rb.AddStep(1, "Clear stale entries",
"stella scanner cache prune --stale",
CommandType.Shell);
rb.AddStep(2, "Increase cache size",
"# Update Scanner:Cache:MaxSizeBytes in configuration",
CommandType.Manual);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
// Check hit rate
var hitRate = stats.Hits + stats.Misses > 0
? (double)stats.Hits / (stats.Hits + stats.Misses)
: 1.0;
if (hitRate < HitRateCritical)
{
return builder
.Fail($"Slice cache hit rate critical: {hitRate:P0}")
.WithEvidence("Slice Cache", eb =>
{
eb.Add("hit_rate", hitRate.ToString("P0", CultureInfo.InvariantCulture));
eb.Add("hits", stats.Hits.ToString(CultureInfo.InvariantCulture));
eb.Add("misses", stats.Misses.ToString(CultureInfo.InvariantCulture));
eb.Add("eviction_rate", stats.EvictionRatePerHour.ToString("F1", CultureInfo.InvariantCulture));
})
.WithCauses(
"Cache size too small",
"High eviction rate",
"Cache was recently cleared",
"Working set larger than cache")
.WithRemediation(rb =>
{
rb.AddStep(1, "Increase cache size",
"# Update Scanner:Cache:MaxSizeBytes in configuration",
CommandType.Manual);
rb.AddStep(2, "Warm cache",
"stella scanner cache warm",
CommandType.Manual);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (stats.StorageUtilization >= StorageWarning || hitRate < HitRateWarning)
{
var issues = new List<string>();
if (stats.StorageUtilization >= StorageWarning) issues.Add($"storage {stats.StorageUtilization:P0}");
if (hitRate < HitRateWarning) issues.Add($"hit rate {hitRate:P0}");
return builder
.Warn($"Slice cache: {string.Join(", ", issues)}")
.WithEvidence("Slice Cache", eb =>
{
eb.Add("hit_rate", hitRate.ToString("P0", CultureInfo.InvariantCulture));
eb.Add("storage_utilization", stats.StorageUtilization.ToString("P0", CultureInfo.InvariantCulture));
eb.Add("entry_count", stats.EntryCount.ToString(CultureInfo.InvariantCulture));
})
.WithCauses("Approaching limits", "Consider tuning")
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass($"Slice cache healthy ({hitRate:P0} hit rate, {stats.EntryCount} entries)")
.WithEvidence("Slice Cache", eb =>
{
eb.Add("hit_rate", hitRate.ToString("P0", CultureInfo.InvariantCulture));
eb.Add("entry_count", stats.EntryCount.ToString(CultureInfo.InvariantCulture));
eb.Add("storage_utilization", stats.StorageUtilization.ToString("P0", CultureInfo.InvariantCulture));
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check slice cache health: {ex.Message}")
.WithEvidence("Slice Cache Status", eb => eb.Add("error_message", ex.Message))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Slice cache health check timed out")
.WithEvidence("Slice Cache Status", eb => eb.Add("connection_error_type", "timeout"))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static CacheStats ParseCacheStats(string json)
{
var stats = new CacheStats();
try
{
using var doc = JsonDocument.Parse(json);
stats.Hits = doc.RootElement.TryGetProperty("hits", out var h) ? h.GetInt64() : 0;
stats.Misses = doc.RootElement.TryGetProperty("misses", out var m) ? m.GetInt64() : 0;
stats.EntryCount = doc.RootElement.TryGetProperty("entryCount", out var ec) ? ec.GetInt32() : 0;
stats.UsedBytes = doc.RootElement.TryGetProperty("usedBytes", out var ub) ? ub.GetInt64() : 0;
stats.TotalBytes = doc.RootElement.TryGetProperty("totalBytes", out var tb) ? tb.GetInt64() : 1;
stats.EvictionRatePerHour = doc.RootElement.TryGetProperty("evictionRatePerHour", out var er) ? er.GetDouble() : 0;
stats.StorageUtilization = stats.TotalBytes > 0 ? (double)stats.UsedBytes / stats.TotalBytes : 0;
}
catch { }
return stats;
}
private sealed class CacheStats
{
public long Hits { get; set; }
public long Misses { get; set; }
public int EntryCount { get; set; }
public long UsedBytes { get; set; }
public long TotalBytes { get; set; } = 1;
public double StorageUtilization { get; set; }
public double EvictionRatePerHour { get; set; }
}
}

View File

@@ -0,0 +1,218 @@
// -----------------------------------------------------------------------------
// VulnerabilityScanHealthCheck.cs
// Sprint: SPRINT_20260118_019_Doctor_scanner_reachability_health
// Task: SCAN-004 - Implement VulnerabilityScanHealthCheck
// Description: Monitor vulnerability scanning health
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Scanner.Checks;
/// <summary>
/// Monitors vulnerability scanning health.
/// Checks scan success rates, database freshness, and match accuracy.
/// </summary>
public sealed class VulnerabilityScanHealthCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.scanner";
private const string CategoryName = "Scanner & Reachability";
private const int DbStaleHours = 24;
private const int DbCriticalHours = 72;
/// <inheritdoc />
public string CheckId => "check.scanner.vuln";
/// <inheritdoc />
public string Name => "Vulnerability Scan Health";
/// <inheritdoc />
public string Description => "Monitor vulnerability scanning and database freshness";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["scanner", "vulnerability", "cve", "database"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var scannerUrl = context.Configuration["Scanner:Url"]
?? context.Configuration["Services:Scanner:Url"];
return !string.IsNullOrEmpty(scannerUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var scannerUrl = context.Configuration["Scanner:Url"]
?? context.Configuration["Services:Scanner:Url"]
?? "http://localhost:5090";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(10);
var response = await httpClient.GetAsync(
$"{scannerUrl.TrimEnd('/')}/api/v1/vuln/stats",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot retrieve vulnerability stats: HTTP {(int)response.StatusCode}")
.WithEvidence("Vulnerability Status", eb =>
{
eb.Add("scanner_url", scannerUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var json = await response.Content.ReadAsStringAsync(ct);
var stats = ParseVulnStats(json, context.TimeProvider);
// Check database freshness first - most critical
if (stats.DatabaseAgeHours >= DbCriticalHours)
{
return builder
.Fail($"Vulnerability database critically stale ({stats.DatabaseAgeHours}h old)")
.WithEvidence("Vulnerability Scanning", eb =>
{
eb.Add("database_age_hours", stats.DatabaseAgeHours.ToString(CultureInfo.InvariantCulture));
eb.Add("last_db_update", stats.LastDbUpdate?.ToString("o") ?? "unknown");
eb.Add("total_cves", stats.TotalCves.ToString(CultureInfo.InvariantCulture));
eb.Add("scans_completed", stats.ScansCompleted.ToString(CultureInfo.InvariantCulture));
})
.WithCauses(
"Database sync job failed",
"Feed source unavailable",
"Network connectivity issue")
.WithRemediation(rb =>
{
rb.AddStep(1, "Trigger database sync",
"stella scanner db sync",
CommandType.Shell);
rb.AddStep(2, "Check sync status",
"stella scanner db status",
CommandType.Shell);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
if (stats.DatabaseAgeHours >= DbStaleHours)
{
return builder
.Warn($"Vulnerability database stale ({stats.DatabaseAgeHours}h old)")
.WithEvidence("Vulnerability Scanning", eb =>
{
eb.Add("database_age_hours", stats.DatabaseAgeHours.ToString(CultureInfo.InvariantCulture));
eb.Add("last_db_update", stats.LastDbUpdate?.ToString("o") ?? "unknown");
eb.Add("total_cves", stats.TotalCves.ToString(CultureInfo.InvariantCulture));
})
.WithCauses("Scheduled sync delayed")
.WithRemediation(rb => rb
.AddStep(1, "Check sync schedule", "stella scanner db schedule", CommandType.Shell))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
// Check scan health
if (stats.ScanFailures > 0 && stats.ScansCompleted > 0)
{
var failureRate = (double)stats.ScanFailures / (stats.ScansCompleted + stats.ScanFailures);
if (failureRate > 0.1)
{
return builder
.Warn($"Elevated scan failure rate: {failureRate:P0}")
.WithEvidence("Vulnerability Scanning", eb =>
{
eb.Add("scans_completed", stats.ScansCompleted.ToString(CultureInfo.InvariantCulture));
eb.Add("scan_failures", stats.ScanFailures.ToString(CultureInfo.InvariantCulture));
eb.Add("failure_rate", failureRate.ToString("P1", CultureInfo.InvariantCulture));
})
.WithCauses("Parsing errors", "Unsupported formats")
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
return builder
.Pass($"Vulnerability scanning healthy (DB {stats.DatabaseAgeHours}h old, {stats.TotalCves} CVEs)")
.WithEvidence("Vulnerability Scanning", eb =>
{
eb.Add("database_age_hours", stats.DatabaseAgeHours.ToString(CultureInfo.InvariantCulture));
eb.Add("total_cves", stats.TotalCves.ToString(CultureInfo.InvariantCulture));
eb.Add("scans_completed", stats.ScansCompleted.ToString(CultureInfo.InvariantCulture));
eb.Add("vulnerabilities_found", stats.VulnerabilitiesFound.ToString(CultureInfo.InvariantCulture));
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check vulnerability health: {ex.Message}")
.WithEvidence("Vulnerability Status", eb => eb.Add("error_message", ex.Message))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Vulnerability health check timed out")
.WithEvidence("Vulnerability Status", eb => eb.Add("connection_error_type", "timeout"))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static VulnStats ParseVulnStats(string json, TimeProvider timeProvider)
{
var stats = new VulnStats();
try
{
using var doc = JsonDocument.Parse(json);
if (doc.RootElement.TryGetProperty("lastDbUpdate", out var ldu) &&
DateTimeOffset.TryParse(ldu.GetString(), out var lastUpdate))
{
stats.LastDbUpdate = lastUpdate;
stats.DatabaseAgeHours = (int)(timeProvider.GetUtcNow() - lastUpdate).TotalHours;
}
stats.TotalCves = doc.RootElement.TryGetProperty("totalCves", out var tc) ? tc.GetInt32() : 0;
stats.ScansCompleted = doc.RootElement.TryGetProperty("scansCompleted", out var sc) ? sc.GetInt32() : 0;
stats.ScanFailures = doc.RootElement.TryGetProperty("scanFailures", out var sf) ? sf.GetInt32() : 0;
stats.VulnerabilitiesFound = doc.RootElement.TryGetProperty("vulnerabilitiesFound", out var vf) ? vf.GetInt32() : 0;
}
catch { }
return stats;
}
private sealed class VulnStats
{
public DateTimeOffset? LastDbUpdate { get; set; }
public int DatabaseAgeHours { get; set; }
public int TotalCves { get; set; }
public int ScansCompleted { get; set; }
public int ScanFailures { get; set; }
public int VulnerabilitiesFound { get; set; }
}
}

View File

@@ -0,0 +1,215 @@
// -----------------------------------------------------------------------------
// WitnessGraphHealthCheck.cs
// Sprint: SPRINT_20260118_019_Doctor_scanner_reachability_health
// Task: SCAN-005 - Implement WitnessGraphHealthCheck
// Description: Monitor witness graph construction health
// -----------------------------------------------------------------------------
using System.Globalization;
using System.Net.Http;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Models;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Scanner.Checks;
/// <summary>
/// Monitors witness graph construction health.
/// Checks graph construction success, completeness, and consistency.
/// </summary>
public sealed class WitnessGraphHealthCheck : IDoctorCheck
{
private const string PluginId = "stellaops.doctor.scanner";
private const string CategoryName = "Scanner & Reachability";
/// <inheritdoc />
public string CheckId => "check.scanner.witness.graph";
/// <inheritdoc />
public string Name => "Witness Graph Health";
/// <inheritdoc />
public string Description => "Monitor witness graph construction and integrity";
/// <inheritdoc />
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
/// <inheritdoc />
public IReadOnlyList<string> Tags => ["scanner", "witness", "graph", "reachability", "evidence"];
/// <inheritdoc />
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(5);
/// <inheritdoc />
public bool CanRun(DoctorPluginContext context)
{
var scannerUrl = context.Configuration["Scanner:Url"]
?? context.Configuration["Services:Scanner:Url"];
return !string.IsNullOrEmpty(scannerUrl);
}
/// <inheritdoc />
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
{
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
var scannerUrl = context.Configuration["Scanner:Url"]
?? context.Configuration["Services:Scanner:Url"]
?? "http://localhost:5090";
try
{
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
httpClient.Timeout = TimeSpan.FromSeconds(10);
var response = await httpClient.GetAsync(
$"{scannerUrl.TrimEnd('/')}/api/v1/witness/stats",
ct);
if (!response.IsSuccessStatusCode)
{
return builder
.Warn($"Cannot retrieve witness graph stats: HTTP {(int)response.StatusCode}")
.WithEvidence("Witness Graph Status", eb =>
{
eb.Add("scanner_url", scannerUrl);
eb.Add("http_status_code", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture));
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
var json = await response.Content.ReadAsStringAsync(ct);
var stats = ParseWitnessStats(json);
// Check for construction failures
if (stats.ConstructionFailures > 0)
{
var failureRate = stats.TotalConstructed > 0
? (double)stats.ConstructionFailures / (stats.TotalConstructed + stats.ConstructionFailures)
: 1.0;
if (failureRate > 0.1)
{
return builder
.Fail($"Witness graph construction failures: {stats.ConstructionFailures} ({failureRate:P0})")
.WithEvidence("Witness Graph", eb =>
{
eb.Add("total_constructed", stats.TotalConstructed.ToString(CultureInfo.InvariantCulture));
eb.Add("construction_failures", stats.ConstructionFailures.ToString(CultureInfo.InvariantCulture));
eb.Add("failure_rate", failureRate.ToString("P1", CultureInfo.InvariantCulture));
eb.Add("incomplete_graphs", stats.IncompleteGraphs.ToString(CultureInfo.InvariantCulture));
eb.Add("avg_nodes_per_graph", stats.AvgNodesPerGraph.ToString("F0", CultureInfo.InvariantCulture));
})
.WithCauses(
"Missing SBOM input",
"Parser error on artifact",
"Cyclical dependency detected",
"Resource exhaustion")
.WithRemediation(rb =>
{
rb.AddStep(1, "View construction errors",
"stella scanner witness failures --recent",
CommandType.Shell);
rb.AddStep(2, "Rebuild failed graphs",
"stella scanner witness rebuild --failed",
CommandType.Manual);
})
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
// Check for incomplete graphs
if (stats.IncompleteGraphs > 0)
{
return builder
.Warn($"{stats.IncompleteGraphs} incomplete witness graph(s)")
.WithEvidence("Witness Graph", eb =>
{
eb.Add("total_constructed", stats.TotalConstructed.ToString(CultureInfo.InvariantCulture));
eb.Add("incomplete_graphs", stats.IncompleteGraphs.ToString(CultureInfo.InvariantCulture));
eb.Add("avg_completeness", stats.AvgCompleteness.ToString("P0", CultureInfo.InvariantCulture));
})
.WithCauses("Partial SBOM data", "Missing dependencies")
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
// Check consistency
if (stats.ConsistencyErrors > 0)
{
return builder
.Warn($"{stats.ConsistencyErrors} graph consistency error(s)")
.WithEvidence("Witness Graph", eb =>
{
eb.Add("total_constructed", stats.TotalConstructed.ToString(CultureInfo.InvariantCulture));
eb.Add("consistency_errors", stats.ConsistencyErrors.ToString(CultureInfo.InvariantCulture));
})
.WithCauses("Version mismatch", "Orphaned nodes")
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
return builder
.Pass($"Witness graph healthy ({stats.TotalConstructed} graphs, avg {stats.AvgNodesPerGraph:F0} nodes)")
.WithEvidence("Witness Graph", eb =>
{
eb.Add("total_constructed", stats.TotalConstructed.ToString(CultureInfo.InvariantCulture));
eb.Add("avg_nodes_per_graph", stats.AvgNodesPerGraph.ToString("F0", CultureInfo.InvariantCulture));
eb.Add("avg_edges_per_graph", stats.AvgEdgesPerGraph.ToString("F0", CultureInfo.InvariantCulture));
eb.Add("avg_completeness", stats.AvgCompleteness.ToString("P0", CultureInfo.InvariantCulture));
})
.Build();
}
catch (HttpRequestException ex)
{
return builder
.Warn($"Cannot check witness graph health: {ex.Message}")
.WithEvidence("Witness Graph Status", eb => eb.Add("error_message", ex.Message))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
catch (TaskCanceledException)
{
return builder
.Warn("Witness graph health check timed out")
.WithEvidence("Witness Graph Status", eb => eb.Add("connection_error_type", "timeout"))
.WithVerification($"stella doctor --check {CheckId}")
.Build();
}
}
private static WitnessStats ParseWitnessStats(string json)
{
var stats = new WitnessStats();
try
{
using var doc = JsonDocument.Parse(json);
stats.TotalConstructed = doc.RootElement.TryGetProperty("totalConstructed", out var tc) ? tc.GetInt32() : 0;
stats.ConstructionFailures = doc.RootElement.TryGetProperty("constructionFailures", out var cf) ? cf.GetInt32() : 0;
stats.IncompleteGraphs = doc.RootElement.TryGetProperty("incompleteGraphs", out var ig) ? ig.GetInt32() : 0;
stats.ConsistencyErrors = doc.RootElement.TryGetProperty("consistencyErrors", out var ce) ? ce.GetInt32() : 0;
stats.AvgNodesPerGraph = doc.RootElement.TryGetProperty("avgNodesPerGraph", out var an) ? an.GetDouble() : 0;
stats.AvgEdgesPerGraph = doc.RootElement.TryGetProperty("avgEdgesPerGraph", out var ae) ? ae.GetDouble() : 0;
stats.AvgCompleteness = doc.RootElement.TryGetProperty("avgCompleteness", out var ac) ? ac.GetDouble() : 1.0;
}
catch { }
return stats;
}
private sealed class WitnessStats
{
public int TotalConstructed { get; set; }
public int ConstructionFailures { get; set; }
public int IncompleteGraphs { get; set; }
public int ConsistencyErrors { get; set; }
public double AvgNodesPerGraph { get; set; }
public double AvgEdgesPerGraph { get; set; }
public double AvgCompleteness { get; set; } = 1.0;
}
}

View File

@@ -0,0 +1,29 @@
// -----------------------------------------------------------------------------
// ScannerPluginServiceCollectionExtensions.cs
// Sprint: SPRINT_20260118_019_Doctor_scanner_reachability_health
// Task: SCAN-001 - Create Scanner plugin scaffold
// Description: Extension methods for registering the Scanner plugin
// -----------------------------------------------------------------------------
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Scanner.DependencyInjection;
/// <summary>
/// Extension methods for registering the Scanner Doctor plugin.
/// </summary>
public static class ScannerPluginServiceCollectionExtensions
{
/// <summary>
/// Adds the Scanner and Reachability health Doctor plugin.
/// Provides checks for SBOM, vulnerabilities, witness graph, and slice cache.
/// </summary>
/// <param name="services">The service collection.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddDoctorScannerPlugin(this IServiceCollection services)
{
services.AddSingleton<IDoctorPlugin, ScannerDoctorPlugin>();
return services;
}
}

View File

@@ -0,0 +1,64 @@
// -----------------------------------------------------------------------------
// ScannerDoctorPlugin.cs
// Sprint: SPRINT_20260118_019_Doctor_scanner_reachability_health
// Task: SCAN-001 - Create Scanner plugin scaffold
// Description: Doctor plugin for scanner and reachability analysis health
// -----------------------------------------------------------------------------
using StellaOps.Doctor.Plugin.Scanner.Checks;
using StellaOps.Doctor.Plugins;
namespace StellaOps.Doctor.Plugin.Scanner;
/// <summary>
/// Doctor plugin for scanner and reachability health checks.
/// Monitors SBOM generation, vulnerability scanning, witness graphs, and slice cache.
/// </summary>
public sealed class ScannerDoctorPlugin : IDoctorPlugin
{
private static readonly Version PluginVersion = new(1, 0, 0);
private static readonly Version MinVersion = new(1, 0, 0);
/// <inheritdoc />
public string PluginId => "stellaops.doctor.scanner";
/// <inheritdoc />
public string DisplayName => "Scanner & Reachability";
/// <inheritdoc />
public DoctorCategory Category => DoctorCategory.Scanner;
/// <inheritdoc />
public Version Version => PluginVersion;
/// <inheritdoc />
public Version MinEngineVersion => MinVersion;
/// <inheritdoc />
public bool IsAvailable(IServiceProvider services)
{
// Available when scanner is configured
return true;
}
/// <inheritdoc />
public IReadOnlyList<IDoctorCheck> GetChecks(DoctorPluginContext context)
{
return new IDoctorCheck[]
{
new ScannerQueueHealthCheck(),
new SbomGenerationHealthCheck(),
new VulnerabilityScanHealthCheck(),
new WitnessGraphHealthCheck(),
new SliceCacheHealthCheck(),
new ReachabilityComputationHealthCheck(),
new ScannerResourceUtilizationCheck()
};
}
/// <inheritdoc />
public Task InitializeAsync(DoctorPluginContext context, CancellationToken ct)
{
return Task.CompletedTask;
}
}

View File

@@ -0,0 +1,17 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<RootNamespace>StellaOps.Doctor.Plugin.Scanner</RootNamespace>
<Description>Scanner and reachability health checks for Stella Ops Doctor diagnostics</Description>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Doctor\StellaOps.Doctor.csproj" />
</ItemGroup>
</Project>