doctor enhancements, setup, enhancements, ui functionality and design consolidation and , test projects fixes , product advisory attestation/rekor and delta verfications enhancements
This commit is contained in:
@@ -0,0 +1,374 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// CiSystemConnectivityCheck.cs
|
||||
// Sprint: SPRINT_20260118_018_Doctor_integration_health_expansion
|
||||
// Task: INTH-003 - Implement CiSystemConnectivityCheck
|
||||
// Description: Verify connectivity to CI/CD systems (Jenkins, GitLab CI, GitHub Actions, etc.)
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugins.Integration.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Verifies connectivity to configured CI/CD systems.
|
||||
/// Checks authentication, API accessibility, and runner/agent availability.
|
||||
/// </summary>
|
||||
public sealed class CiSystemConnectivityCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.integration";
|
||||
private const string CategoryName = "Integration";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.integration.ci.system";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "CI System Connectivity";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Verify connectivity to CI/CD systems (Jenkins, GitLab CI, GitHub Actions, etc.)";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["integration", "ci", "cd", "jenkins", "gitlab", "github"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(15);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var ciConfig = context.Configuration.GetSection("CI");
|
||||
return ciConfig.Exists() && ciConfig.GetChildren().Any();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var ciSystems = GetConfiguredCiSystems(context.Configuration);
|
||||
|
||||
if (ciSystems.Count == 0)
|
||||
{
|
||||
return builder
|
||||
.Skip("No CI systems configured")
|
||||
.WithEvidence("CI Systems", eb => eb.Add("configured_systems", "0"))
|
||||
.Build();
|
||||
}
|
||||
|
||||
var httpClientFactory = context.Services.GetService<IHttpClientFactory>();
|
||||
if (httpClientFactory == null)
|
||||
{
|
||||
return builder
|
||||
.Skip("IHttpClientFactory not available")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
var results = new List<CiSystemResult>();
|
||||
var unhealthy = new List<string>();
|
||||
var noRunners = new List<string>();
|
||||
|
||||
foreach (var ci in ciSystems)
|
||||
{
|
||||
var result = await CheckCiSystemAsync(httpClient, ci, ct);
|
||||
results.Add(result);
|
||||
|
||||
if (!result.Reachable || !result.AuthSuccess)
|
||||
{
|
||||
unhealthy.Add(ci.Name);
|
||||
}
|
||||
else if (result.AvailableRunners == 0 && result.TotalRunners > 0)
|
||||
{
|
||||
noRunners.Add(ci.Name);
|
||||
}
|
||||
}
|
||||
|
||||
if (unhealthy.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Fail($"{unhealthy.Count} CI system(s) unreachable or auth failed")
|
||||
.WithEvidence("CI Systems", eb =>
|
||||
{
|
||||
eb.Add("total_systems", ciSystems.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("healthy_systems", (ciSystems.Count - unhealthy.Count).ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("unhealthy_systems", string.Join(", ", unhealthy));
|
||||
AddCiEvidence(eb, results);
|
||||
})
|
||||
.WithCauses(
|
||||
"CI system is down",
|
||||
"Network connectivity issue",
|
||||
"API credentials expired",
|
||||
"Firewall blocking access")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "Test CI system connectivity",
|
||||
$"stella ci ping {unhealthy[0]}",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "Refresh credentials",
|
||||
$"stella ci auth refresh {unhealthy[0]}",
|
||||
CommandType.Manual);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (noRunners.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Warn($"{noRunners.Count} CI system(s) have no available runners")
|
||||
.WithEvidence("CI Systems", eb =>
|
||||
{
|
||||
eb.Add("total_systems", ciSystems.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("healthy_systems", ciSystems.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("no_runners_systems", string.Join(", ", noRunners));
|
||||
AddCiEvidence(eb, results);
|
||||
})
|
||||
.WithCauses(
|
||||
"All runners are busy",
|
||||
"Runners are offline",
|
||||
"Runner scaling needed")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "Check runner status",
|
||||
$"stella ci runners {noRunners[0]}",
|
||||
CommandType.Shell);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"{ciSystems.Count} CI system(s) healthy")
|
||||
.WithEvidence("CI Systems", eb =>
|
||||
{
|
||||
eb.Add("total_systems", ciSystems.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("healthy_systems", ciSystems.Count.ToString(CultureInfo.InvariantCulture));
|
||||
AddCiEvidence(eb, results);
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
|
||||
private static async Task<CiSystemResult> CheckCiSystemAsync(HttpClient client, CiSystemConfig ci, CancellationToken ct)
|
||||
{
|
||||
var result = new CiSystemResult { Name = ci.Name, Type = ci.Type };
|
||||
|
||||
try
|
||||
{
|
||||
var healthEndpoint = GetHealthEndpoint(ci);
|
||||
var request = new HttpRequestMessage(HttpMethod.Get, healthEndpoint);
|
||||
|
||||
if (!string.IsNullOrEmpty(ci.ApiToken))
|
||||
{
|
||||
// Set auth header based on CI type
|
||||
if (ci.Type.Equals("github", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
request.Headers.Authorization = new System.Net.Http.Headers.AuthenticationHeaderValue("Bearer", ci.ApiToken);
|
||||
}
|
||||
else if (ci.Type.Equals("gitlab", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
request.Headers.Add("PRIVATE-TOKEN", ci.ApiToken);
|
||||
}
|
||||
else
|
||||
{
|
||||
request.Headers.Authorization = new System.Net.Http.Headers.AuthenticationHeaderValue("Bearer", ci.ApiToken);
|
||||
}
|
||||
}
|
||||
|
||||
var sw = System.Diagnostics.Stopwatch.StartNew();
|
||||
var response = await client.SendAsync(request, ct);
|
||||
sw.Stop();
|
||||
|
||||
result.LatencyMs = (int)sw.ElapsedMilliseconds;
|
||||
result.Reachable = response.IsSuccessStatusCode;
|
||||
result.AuthSuccess = response.StatusCode != System.Net.HttpStatusCode.Unauthorized &&
|
||||
response.StatusCode != System.Net.HttpStatusCode.Forbidden;
|
||||
|
||||
// Try to get runner info if available
|
||||
if (result.Reachable && result.AuthSuccess)
|
||||
{
|
||||
await TryGetRunnerInfoAsync(client, ci, result, ct);
|
||||
}
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
result.Reachable = false;
|
||||
result.ErrorMessage = ex.Message;
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
result.Reachable = false;
|
||||
result.ErrorMessage = "Timeout";
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static async Task TryGetRunnerInfoAsync(HttpClient client, CiSystemConfig ci, CiSystemResult result, CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
var runnersEndpoint = ci.Type.ToLowerInvariant() switch
|
||||
{
|
||||
"jenkins" => $"{ci.Url.TrimEnd('/')}/computer/api/json",
|
||||
"gitlab" => $"{ci.Url.TrimEnd('/')}/api/v4/runners?status=online",
|
||||
"github" => null, // GitHub runners are per-repo/org, complex to aggregate
|
||||
_ => null
|
||||
};
|
||||
|
||||
if (runnersEndpoint != null)
|
||||
{
|
||||
var request = new HttpRequestMessage(HttpMethod.Get, runnersEndpoint);
|
||||
if (!string.IsNullOrEmpty(ci.ApiToken))
|
||||
{
|
||||
if (ci.Type.Equals("gitlab", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
request.Headers.Add("PRIVATE-TOKEN", ci.ApiToken);
|
||||
}
|
||||
else
|
||||
{
|
||||
request.Headers.Authorization = new System.Net.Http.Headers.AuthenticationHeaderValue("Bearer", ci.ApiToken);
|
||||
}
|
||||
}
|
||||
|
||||
var response = await client.SendAsync(request, ct);
|
||||
if (response.IsSuccessStatusCode)
|
||||
{
|
||||
var json = await response.Content.ReadAsStringAsync(ct);
|
||||
ParseRunnerInfo(json, ci.Type, result);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch { /* Runner info is optional */ }
|
||||
}
|
||||
|
||||
private static void ParseRunnerInfo(string json, string ciType, CiSystemResult result)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
|
||||
if (ciType.Equals("jenkins", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
if (doc.RootElement.TryGetProperty("computer", out var computers) &&
|
||||
computers.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
result.TotalRunners = computers.GetArrayLength();
|
||||
result.AvailableRunners = computers.EnumerateArray()
|
||||
.Count(c => !c.TryGetProperty("offline", out var off) || !off.GetBoolean());
|
||||
}
|
||||
}
|
||||
else if (ciType.Equals("gitlab", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
if (doc.RootElement.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
result.TotalRunners = doc.RootElement.GetArrayLength();
|
||||
result.AvailableRunners = result.TotalRunners; // Already filtered by status=online
|
||||
}
|
||||
}
|
||||
}
|
||||
catch { }
|
||||
}
|
||||
|
||||
private static string GetHealthEndpoint(CiSystemConfig ci)
|
||||
{
|
||||
return ci.Type.ToLowerInvariant() switch
|
||||
{
|
||||
"jenkins" => $"{ci.Url.TrimEnd('/')}/api/json",
|
||||
"gitlab" => $"{ci.Url.TrimEnd('/')}/api/v4/version",
|
||||
"github" => "https://api.github.com/rate_limit",
|
||||
"azure" => $"{ci.Url.TrimEnd('/')}/_apis/connectionData",
|
||||
_ => $"{ci.Url.TrimEnd('/')}/health"
|
||||
};
|
||||
}
|
||||
|
||||
private static void AddCiEvidence(EvidenceBuilder eb, List<CiSystemResult> results)
|
||||
{
|
||||
foreach (var r in results)
|
||||
{
|
||||
var prefix = $"ci_{r.Name.ToLowerInvariant().Replace(" ", "_").Replace("-", "_")}";
|
||||
eb.Add($"{prefix}_type", r.Type);
|
||||
eb.Add($"{prefix}_reachable", r.Reachable.ToString().ToLowerInvariant());
|
||||
eb.Add($"{prefix}_auth_success", r.AuthSuccess.ToString().ToLowerInvariant());
|
||||
eb.Add($"{prefix}_latency_ms", r.LatencyMs.ToString(CultureInfo.InvariantCulture));
|
||||
if (r.TotalRunners > 0)
|
||||
{
|
||||
eb.Add($"{prefix}_available_runners", r.AvailableRunners.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add($"{prefix}_total_runners", r.TotalRunners.ToString(CultureInfo.InvariantCulture));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static List<CiSystemConfig> GetConfiguredCiSystems(IConfiguration config)
|
||||
{
|
||||
var systems = new List<CiSystemConfig>();
|
||||
|
||||
var ciSection = config.GetSection("CI:Systems");
|
||||
if (ciSection.Exists())
|
||||
{
|
||||
foreach (var child in ciSection.GetChildren())
|
||||
{
|
||||
var name = child.GetValue<string>("Name") ?? child.Key;
|
||||
var url = child.GetValue<string>("Url");
|
||||
var type = child.GetValue<string>("Type") ?? "generic";
|
||||
var token = child.GetValue<string>("ApiToken");
|
||||
|
||||
if (!string.IsNullOrEmpty(url))
|
||||
{
|
||||
systems.Add(new CiSystemConfig
|
||||
{
|
||||
Name = name,
|
||||
Url = url,
|
||||
Type = type,
|
||||
ApiToken = token
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check legacy single-system config
|
||||
var legacyUrl = config.GetValue<string>("CI:Url");
|
||||
if (!string.IsNullOrEmpty(legacyUrl) && systems.Count == 0)
|
||||
{
|
||||
systems.Add(new CiSystemConfig
|
||||
{
|
||||
Name = "default",
|
||||
Url = legacyUrl,
|
||||
Type = config.GetValue<string>("CI:Type") ?? "generic",
|
||||
ApiToken = config.GetValue<string>("CI:ApiToken")
|
||||
});
|
||||
}
|
||||
|
||||
return systems;
|
||||
}
|
||||
|
||||
private sealed class CiSystemConfig
|
||||
{
|
||||
public required string Name { get; init; }
|
||||
public required string Url { get; init; }
|
||||
public required string Type { get; init; }
|
||||
public string? ApiToken { get; init; }
|
||||
}
|
||||
|
||||
private sealed class CiSystemResult
|
||||
{
|
||||
public required string Name { get; init; }
|
||||
public required string Type { get; init; }
|
||||
public bool Reachable { get; set; }
|
||||
public bool AuthSuccess { get; set; }
|
||||
public int LatencyMs { get; set; }
|
||||
public int TotalRunners { get; set; }
|
||||
public int AvailableRunners { get; set; }
|
||||
public string? ErrorMessage { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,306 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// IntegrationWebhookHealthCheck.cs
|
||||
// Sprint: SPRINT_20260118_018_Doctor_integration_health_expansion
|
||||
// Task: INTH-007 - Implement IntegrationWebhookHealthCheck
|
||||
// Description: Check health of all configured webhooks (inbound and outbound)
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugins.Integration.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Checks health of all configured webhooks.
|
||||
/// Monitors delivery success rates, endpoint availability, and recent failures.
|
||||
/// </summary>
|
||||
public sealed class IntegrationWebhookHealthCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.integration";
|
||||
private const string CategoryName = "Integration";
|
||||
private const double FailureRateWarningThreshold = 0.05; // 5%
|
||||
private const double FailureRateFailThreshold = 0.20; // 20%
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.integration.webhooks";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Integration Webhook Health";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Check health of all configured webhooks (inbound and outbound)";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["integration", "webhooks", "notifications", "events"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(10);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var webhooksConfig = context.Configuration.GetSection("Webhooks");
|
||||
return webhooksConfig.Exists() && webhooksConfig.GetChildren().Any();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var webhooks = GetConfiguredWebhooks(context.Configuration);
|
||||
|
||||
if (webhooks.Count == 0)
|
||||
{
|
||||
return builder
|
||||
.Skip("No webhooks configured")
|
||||
.WithEvidence("Webhooks", eb => eb.Add("configured_webhooks", "0"))
|
||||
.Build();
|
||||
}
|
||||
|
||||
var httpClientFactory = context.Services.GetService<IHttpClientFactory>();
|
||||
if (httpClientFactory == null)
|
||||
{
|
||||
return builder
|
||||
.Skip("IHttpClientFactory not available")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
var results = new List<WebhookResult>();
|
||||
var unreachable = new List<string>();
|
||||
var highFailureRate = new List<(string Name, double Rate)>();
|
||||
|
||||
foreach (var webhook in webhooks)
|
||||
{
|
||||
var result = await CheckWebhookAsync(httpClient, webhook, ct);
|
||||
results.Add(result);
|
||||
|
||||
if (!result.Reachable)
|
||||
{
|
||||
unreachable.Add(webhook.Name);
|
||||
}
|
||||
else if (result.FailureRate >= FailureRateFailThreshold)
|
||||
{
|
||||
highFailureRate.Add((webhook.Name, result.FailureRate));
|
||||
}
|
||||
}
|
||||
|
||||
if (unreachable.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Fail($"{unreachable.Count} webhook endpoint(s) unreachable")
|
||||
.WithEvidence("Webhooks", eb =>
|
||||
{
|
||||
eb.Add("total_webhooks", webhooks.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("healthy_webhooks", (webhooks.Count - unreachable.Count).ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("unreachable_webhooks", string.Join(", ", unreachable));
|
||||
AddWebhookEvidence(eb, results);
|
||||
})
|
||||
.WithCauses(
|
||||
"Webhook endpoint is down",
|
||||
"Network connectivity issue",
|
||||
"DNS resolution failed",
|
||||
"TLS certificate issue")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "Test webhook endpoint",
|
||||
$"stella webhooks test {unreachable[0]}",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "View webhook delivery log",
|
||||
$"stella webhooks logs {unreachable[0]}",
|
||||
CommandType.Shell);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (highFailureRate.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Fail($"{highFailureRate.Count} webhook(s) have high failure rate (>20%)")
|
||||
.WithEvidence("Webhooks", eb =>
|
||||
{
|
||||
eb.Add("total_webhooks", webhooks.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("high_failure_webhooks", string.Join(", ", highFailureRate.Select(h => $"{h.Name}:{h.Rate:P0}")));
|
||||
AddWebhookEvidence(eb, results);
|
||||
})
|
||||
.WithCauses(
|
||||
"Endpoint returning errors",
|
||||
"Payload format changed",
|
||||
"Authentication issue",
|
||||
"Rate limiting")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "View recent failures",
|
||||
$"stella webhooks logs {highFailureRate[0].Name} --status failed",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "Retry failed deliveries",
|
||||
$"stella webhooks retry {highFailureRate[0].Name}",
|
||||
CommandType.Manual);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
// Check for warning-level failure rates
|
||||
var warningFailureRate = results
|
||||
.Where(r => r.FailureRate >= FailureRateWarningThreshold && r.FailureRate < FailureRateFailThreshold)
|
||||
.ToList();
|
||||
|
||||
if (warningFailureRate.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Warn($"{warningFailureRate.Count} webhook(s) have elevated failure rate (>5%)")
|
||||
.WithEvidence("Webhooks", eb =>
|
||||
{
|
||||
eb.Add("total_webhooks", webhooks.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("elevated_failure_webhooks", string.Join(", ", warningFailureRate.Select(w => $"{w.Name}:{w.FailureRate:P0}")));
|
||||
AddWebhookEvidence(eb, results);
|
||||
})
|
||||
.WithCauses("Intermittent endpoint issues", "Occasional timeouts")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Monitor webhook metrics",
|
||||
"stella webhooks stats",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"{webhooks.Count} webhook(s) healthy")
|
||||
.WithEvidence("Webhooks", eb =>
|
||||
{
|
||||
eb.Add("total_webhooks", webhooks.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("healthy_webhooks", webhooks.Count.ToString(CultureInfo.InvariantCulture));
|
||||
AddWebhookEvidence(eb, results);
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
|
||||
private static async Task<WebhookResult> CheckWebhookAsync(HttpClient client, WebhookConfig webhook, CancellationToken ct)
|
||||
{
|
||||
var result = new WebhookResult { Name = webhook.Name, Direction = webhook.Direction };
|
||||
|
||||
// For outbound webhooks, we can ping the endpoint
|
||||
// For inbound webhooks, we check if our endpoint is ready to receive
|
||||
if (webhook.Direction == "outbound")
|
||||
{
|
||||
try
|
||||
{
|
||||
// Use HEAD or GET to check reachability (don't actually send webhook)
|
||||
var request = new HttpRequestMessage(HttpMethod.Head, webhook.Url);
|
||||
|
||||
var sw = System.Diagnostics.Stopwatch.StartNew();
|
||||
var response = await client.SendAsync(request, ct);
|
||||
sw.Stop();
|
||||
|
||||
result.LatencyMs = (int)sw.ElapsedMilliseconds;
|
||||
// Most webhook endpoints return 405 for HEAD, 401 for GET - both indicate reachable
|
||||
result.Reachable = (int)response.StatusCode < 500;
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
result.Reachable = false;
|
||||
result.ErrorMessage = ex.Message;
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
result.Reachable = false;
|
||||
result.ErrorMessage = "Timeout";
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// For inbound webhooks, just mark as reachable (endpoint is local)
|
||||
result.Reachable = true;
|
||||
}
|
||||
|
||||
// Populate delivery stats from webhook config/history
|
||||
result.TotalDeliveries = webhook.TotalDeliveries;
|
||||
result.SuccessfulDeliveries = webhook.SuccessfulDeliveries;
|
||||
result.FailedDeliveries = webhook.TotalDeliveries - webhook.SuccessfulDeliveries;
|
||||
result.FailureRate = webhook.TotalDeliveries > 0
|
||||
? (double)result.FailedDeliveries / webhook.TotalDeliveries
|
||||
: 0;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static void AddWebhookEvidence(EvidenceBuilder eb, List<WebhookResult> results)
|
||||
{
|
||||
foreach (var r in results)
|
||||
{
|
||||
var prefix = $"webhook_{r.Name.ToLowerInvariant().Replace(" ", "_").Replace("-", "_")}";
|
||||
eb.Add($"{prefix}_direction", r.Direction);
|
||||
eb.Add($"{prefix}_reachable", r.Reachable.ToString().ToLowerInvariant());
|
||||
eb.Add($"{prefix}_latency_ms", r.LatencyMs.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add($"{prefix}_failure_rate", r.FailureRate.ToString("P1", CultureInfo.InvariantCulture));
|
||||
eb.Add($"{prefix}_total_deliveries", r.TotalDeliveries.ToString(CultureInfo.InvariantCulture));
|
||||
}
|
||||
}
|
||||
|
||||
private static List<WebhookConfig> GetConfiguredWebhooks(IConfiguration config)
|
||||
{
|
||||
var webhooks = new List<WebhookConfig>();
|
||||
|
||||
var webhooksSection = config.GetSection("Webhooks:Endpoints");
|
||||
if (webhooksSection.Exists())
|
||||
{
|
||||
foreach (var child in webhooksSection.GetChildren())
|
||||
{
|
||||
var name = child.GetValue<string>("Name") ?? child.Key;
|
||||
var url = child.GetValue<string>("Url");
|
||||
var direction = child.GetValue<string>("Direction") ?? "outbound";
|
||||
var total = child.GetValue<int>("TotalDeliveries");
|
||||
var successful = child.GetValue<int>("SuccessfulDeliveries");
|
||||
|
||||
if (!string.IsNullOrEmpty(url))
|
||||
{
|
||||
webhooks.Add(new WebhookConfig
|
||||
{
|
||||
Name = name,
|
||||
Url = url,
|
||||
Direction = direction,
|
||||
TotalDeliveries = total,
|
||||
SuccessfulDeliveries = successful
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return webhooks;
|
||||
}
|
||||
|
||||
private sealed class WebhookConfig
|
||||
{
|
||||
public required string Name { get; init; }
|
||||
public required string Url { get; init; }
|
||||
public required string Direction { get; init; }
|
||||
public int TotalDeliveries { get; init; }
|
||||
public int SuccessfulDeliveries { get; init; }
|
||||
}
|
||||
|
||||
private sealed class WebhookResult
|
||||
{
|
||||
public required string Name { get; init; }
|
||||
public required string Direction { get; init; }
|
||||
public bool Reachable { get; set; }
|
||||
public int LatencyMs { get; set; }
|
||||
public int TotalDeliveries { get; set; }
|
||||
public int SuccessfulDeliveries { get; set; }
|
||||
public int FailedDeliveries { get; set; }
|
||||
public double FailureRate { get; set; }
|
||||
public string? ErrorMessage { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,338 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// SecretsManagerConnectivityCheck.cs
|
||||
// Sprint: SPRINT_20260118_018_Doctor_integration_health_expansion
|
||||
// Task: INTH-004 - Implement SecretsManagerConnectivityCheck
|
||||
// Description: Verify connectivity to secrets managers (Vault, AWS Secrets Manager, Azure Key Vault, etc.)
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugins.Integration.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Verifies connectivity to configured secrets managers.
|
||||
/// Checks authentication, seal status (Vault), and API accessibility.
|
||||
/// </summary>
|
||||
public sealed class SecretsManagerConnectivityCheck : IDoctorCheck
|
||||
{
|
||||
private const string PluginId = "stellaops.doctor.integration";
|
||||
private const string CategoryName = "Integration";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.integration.secrets.manager";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Secrets Manager Connectivity";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Verify connectivity to secrets managers (Vault, AWS Secrets Manager, Azure Key Vault)";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Fail;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["integration", "secrets", "vault", "security", "keyvault"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(10);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var secretsConfig = context.Configuration.GetSection("Secrets");
|
||||
return secretsConfig.Exists() && secretsConfig.GetChildren().Any();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, PluginId, CategoryName);
|
||||
|
||||
var managers = GetConfiguredSecretsManagers(context.Configuration);
|
||||
|
||||
if (managers.Count == 0)
|
||||
{
|
||||
return builder
|
||||
.Skip("No secrets managers configured")
|
||||
.WithEvidence("Secrets Managers", eb => eb.Add("configured_managers", "0"))
|
||||
.Build();
|
||||
}
|
||||
|
||||
var httpClientFactory = context.Services.GetService<IHttpClientFactory>();
|
||||
if (httpClientFactory == null)
|
||||
{
|
||||
return builder
|
||||
.Skip("IHttpClientFactory not available")
|
||||
.Build();
|
||||
}
|
||||
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(10);
|
||||
|
||||
var results = new List<SecretsManagerResult>();
|
||||
var unhealthy = new List<string>();
|
||||
var sealed_ = new List<string>(); // 'sealed' is a keyword
|
||||
|
||||
foreach (var mgr in managers)
|
||||
{
|
||||
var result = await CheckSecretsManagerAsync(httpClient, mgr, ct);
|
||||
results.Add(result);
|
||||
|
||||
if (!result.Reachable || !result.AuthSuccess)
|
||||
{
|
||||
unhealthy.Add(mgr.Name);
|
||||
}
|
||||
else if (result.IsSealed)
|
||||
{
|
||||
sealed_.Add(mgr.Name);
|
||||
}
|
||||
}
|
||||
|
||||
// Secrets manager issues are critical - blocks deployments
|
||||
if (unhealthy.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Fail($"{unhealthy.Count} secrets manager(s) unreachable")
|
||||
.WithEvidence("Secrets Managers", eb =>
|
||||
{
|
||||
eb.Add("total_managers", managers.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("healthy_managers", (managers.Count - unhealthy.Count).ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("unhealthy_managers", string.Join(", ", unhealthy));
|
||||
AddSecretsEvidence(eb, results);
|
||||
})
|
||||
.WithCauses(
|
||||
"Secrets manager is down",
|
||||
"Network connectivity issue",
|
||||
"Authentication token expired",
|
||||
"TLS certificate issue")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "Test secrets manager connectivity",
|
||||
$"stella secrets ping {unhealthy[0]}",
|
||||
CommandType.Shell);
|
||||
rb.AddStep(2, "Refresh authentication",
|
||||
$"stella secrets auth refresh {unhealthy[0]}",
|
||||
CommandType.Manual);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
if (sealed_.Count > 0)
|
||||
{
|
||||
return builder
|
||||
.Fail($"{sealed_.Count} Vault instance(s) are sealed")
|
||||
.WithEvidence("Secrets Managers", eb =>
|
||||
{
|
||||
eb.Add("total_managers", managers.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("healthy_managers", (managers.Count - sealed_.Count).ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("sealed_vaults", string.Join(", ", sealed_));
|
||||
AddSecretsEvidence(eb, results);
|
||||
})
|
||||
.WithCauses(
|
||||
"Vault was restarted and needs unseal",
|
||||
"Vault auto-seal triggered",
|
||||
"HSM connectivity lost")
|
||||
.WithRemediation(rb =>
|
||||
{
|
||||
rb.AddStep(1, "Unseal Vault",
|
||||
$"vault operator unseal",
|
||||
CommandType.Manual);
|
||||
rb.AddStep(2, "Check seal status",
|
||||
$"stella secrets status {sealed_[0]}",
|
||||
CommandType.Shell);
|
||||
})
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Pass($"{managers.Count} secrets manager(s) healthy")
|
||||
.WithEvidence("Secrets Managers", eb =>
|
||||
{
|
||||
eb.Add("total_managers", managers.Count.ToString(CultureInfo.InvariantCulture));
|
||||
eb.Add("healthy_managers", managers.Count.ToString(CultureInfo.InvariantCulture));
|
||||
AddSecretsEvidence(eb, results);
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
|
||||
private static async Task<SecretsManagerResult> CheckSecretsManagerAsync(
|
||||
HttpClient client, SecretsManagerConfig mgr, CancellationToken ct)
|
||||
{
|
||||
var result = new SecretsManagerResult { Name = mgr.Name, Type = mgr.Type };
|
||||
|
||||
try
|
||||
{
|
||||
var healthEndpoint = GetHealthEndpoint(mgr);
|
||||
var request = new HttpRequestMessage(HttpMethod.Get, healthEndpoint);
|
||||
|
||||
// Add auth headers based on type
|
||||
if (!string.IsNullOrEmpty(mgr.Token))
|
||||
{
|
||||
if (mgr.Type.Equals("vault", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
request.Headers.Add("X-Vault-Token", mgr.Token);
|
||||
}
|
||||
else
|
||||
{
|
||||
request.Headers.Authorization = new System.Net.Http.Headers.AuthenticationHeaderValue("Bearer", mgr.Token);
|
||||
}
|
||||
}
|
||||
|
||||
var sw = System.Diagnostics.Stopwatch.StartNew();
|
||||
var response = await client.SendAsync(request, ct);
|
||||
sw.Stop();
|
||||
|
||||
result.LatencyMs = (int)sw.ElapsedMilliseconds;
|
||||
result.Reachable = true;
|
||||
result.AuthSuccess = response.StatusCode != System.Net.HttpStatusCode.Unauthorized &&
|
||||
response.StatusCode != System.Net.HttpStatusCode.Forbidden;
|
||||
|
||||
// Parse response for type-specific info
|
||||
if (response.IsSuccessStatusCode)
|
||||
{
|
||||
var json = await response.Content.ReadAsStringAsync(ct);
|
||||
ParseSecretsManagerResponse(json, mgr.Type, result);
|
||||
}
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
result.Reachable = false;
|
||||
result.ErrorMessage = ex.Message;
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
result.Reachable = false;
|
||||
result.ErrorMessage = "Timeout";
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static void ParseSecretsManagerResponse(string json, string type, SecretsManagerResult result)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
|
||||
if (type.Equals("vault", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
// Vault health endpoint returns sealed status
|
||||
if (doc.RootElement.TryGetProperty("sealed", out var sealedEl))
|
||||
{
|
||||
result.IsSealed = sealedEl.GetBoolean();
|
||||
}
|
||||
if (doc.RootElement.TryGetProperty("initialized", out var initEl))
|
||||
{
|
||||
result.IsInitialized = initEl.GetBoolean();
|
||||
}
|
||||
if (doc.RootElement.TryGetProperty("version", out var verEl))
|
||||
{
|
||||
result.Version = verEl.GetString();
|
||||
}
|
||||
}
|
||||
}
|
||||
catch { }
|
||||
}
|
||||
|
||||
private static string GetHealthEndpoint(SecretsManagerConfig mgr)
|
||||
{
|
||||
return mgr.Type.ToLowerInvariant() switch
|
||||
{
|
||||
"vault" => $"{mgr.Url.TrimEnd('/')}/v1/sys/health?standbyok=true&sealedcode=200&uninitcode=200",
|
||||
"aws" => mgr.Url, // AWS uses SDK, URL is just for config reference
|
||||
"azure" => $"{mgr.Url.TrimEnd('/')}/healthstatus",
|
||||
"gcp" => mgr.Url, // GCP uses SDK
|
||||
_ => $"{mgr.Url.TrimEnd('/')}/health"
|
||||
};
|
||||
}
|
||||
|
||||
private static void AddSecretsEvidence(EvidenceBuilder eb, List<SecretsManagerResult> results)
|
||||
{
|
||||
foreach (var r in results)
|
||||
{
|
||||
var prefix = $"secrets_{r.Name.ToLowerInvariant().Replace(" ", "_").Replace("-", "_")}";
|
||||
eb.Add($"{prefix}_type", r.Type);
|
||||
eb.Add($"{prefix}_reachable", r.Reachable.ToString().ToLowerInvariant());
|
||||
eb.Add($"{prefix}_auth_success", r.AuthSuccess.ToString().ToLowerInvariant());
|
||||
eb.Add($"{prefix}_latency_ms", r.LatencyMs.ToString(CultureInfo.InvariantCulture));
|
||||
if (r.Type.Equals("vault", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
eb.Add($"{prefix}_sealed", r.IsSealed.ToString().ToLowerInvariant());
|
||||
eb.Add($"{prefix}_initialized", r.IsInitialized.ToString().ToLowerInvariant());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static List<SecretsManagerConfig> GetConfiguredSecretsManagers(IConfiguration config)
|
||||
{
|
||||
var managers = new List<SecretsManagerConfig>();
|
||||
|
||||
var secretsSection = config.GetSection("Secrets:Managers");
|
||||
if (secretsSection.Exists())
|
||||
{
|
||||
foreach (var child in secretsSection.GetChildren())
|
||||
{
|
||||
var name = child.GetValue<string>("Name") ?? child.Key;
|
||||
var url = child.GetValue<string>("Url");
|
||||
var type = child.GetValue<string>("Type") ?? "vault";
|
||||
var token = child.GetValue<string>("Token");
|
||||
|
||||
if (!string.IsNullOrEmpty(url))
|
||||
{
|
||||
managers.Add(new SecretsManagerConfig
|
||||
{
|
||||
Name = name,
|
||||
Url = url,
|
||||
Type = type,
|
||||
Token = token
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check legacy single-manager config
|
||||
var legacyUrl = config.GetValue<string>("Secrets:Vault:Url")
|
||||
?? config.GetValue<string>("Vault:Url");
|
||||
if (!string.IsNullOrEmpty(legacyUrl) && managers.Count == 0)
|
||||
{
|
||||
managers.Add(new SecretsManagerConfig
|
||||
{
|
||||
Name = "vault",
|
||||
Url = legacyUrl,
|
||||
Type = "vault",
|
||||
Token = config.GetValue<string>("Secrets:Vault:Token") ?? config.GetValue<string>("Vault:Token")
|
||||
});
|
||||
}
|
||||
|
||||
return managers;
|
||||
}
|
||||
|
||||
private sealed class SecretsManagerConfig
|
||||
{
|
||||
public required string Name { get; init; }
|
||||
public required string Url { get; init; }
|
||||
public required string Type { get; init; }
|
||||
public string? Token { get; init; }
|
||||
}
|
||||
|
||||
private sealed class SecretsManagerResult
|
||||
{
|
||||
public required string Name { get; init; }
|
||||
public required string Type { get; init; }
|
||||
public bool Reachable { get; set; }
|
||||
public bool AuthSuccess { get; set; }
|
||||
public int LatencyMs { get; set; }
|
||||
public bool IsSealed { get; set; }
|
||||
public bool IsInitialized { get; set; } = true;
|
||||
public string? Version { get; set; }
|
||||
public string? ErrorMessage { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -37,7 +37,10 @@ public sealed class IntegrationPlugin : IDoctorPlugin
|
||||
new TeamsWebhookCheck(),
|
||||
new GitProviderCheck(),
|
||||
new LdapConnectivityCheck(),
|
||||
new OidcProviderCheck()
|
||||
new OidcProviderCheck(),
|
||||
new CiSystemConnectivityCheck(),
|
||||
new SecretsManagerConnectivityCheck(),
|
||||
new IntegrationWebhookHealthCheck()
|
||||
];
|
||||
|
||||
/// <inheritdoc />
|
||||
|
||||
Reference in New Issue
Block a user