audit work, doctors work
This commit is contained in:
@@ -0,0 +1,143 @@
|
||||
using System.Runtime.InteropServices;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Observability.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Checks if the log directory exists and is writable.
|
||||
/// </summary>
|
||||
public sealed class LogDirectoryCheck : IDoctorCheck
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.logs.directory.writable";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Log Directory Writable";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Verify log directory exists and is writable";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Fail;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["observability", "logs", "quick"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromMilliseconds(500);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
// Always run - uses default paths if not configured
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var logPath = GetLogDirectory(context);
|
||||
var builder = context.CreateResult(CheckId, "stellaops.doctor.observability", "Observability");
|
||||
|
||||
// Check if directory exists
|
||||
if (!Directory.Exists(logPath))
|
||||
{
|
||||
return builder
|
||||
.Fail($"Log directory does not exist: {logPath}")
|
||||
.WithEvidence(eb => eb
|
||||
.Add("LogPath", logPath)
|
||||
.Add("Exists", "false"))
|
||||
.WithCauses(
|
||||
"Log directory not created during installation",
|
||||
"Directory was deleted",
|
||||
"Configuration points to wrong path")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Create log directory",
|
||||
RuntimeInformation.IsOSPlatform(OSPlatform.Windows)
|
||||
? $"mkdir \"{logPath}\""
|
||||
: $"sudo mkdir -p {logPath}",
|
||||
CommandType.Shell)
|
||||
.AddStep(2, "Set permissions",
|
||||
RuntimeInformation.IsOSPlatform(OSPlatform.Windows)
|
||||
? $"icacls \"{logPath}\" /grant Users:F"
|
||||
: $"sudo chown -R stellaops:stellaops {logPath} && sudo chmod 755 {logPath}",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
|
||||
// Check if directory is writable
|
||||
var testFile = Path.Combine(logPath, $".write-test-{Guid.NewGuid():N}");
|
||||
try
|
||||
{
|
||||
await File.WriteAllTextAsync(testFile, "test", ct);
|
||||
File.Delete(testFile);
|
||||
|
||||
return builder
|
||||
.Pass("Log directory exists and is writable")
|
||||
.WithEvidence(eb => eb
|
||||
.Add("LogPath", logPath)
|
||||
.Add("Exists", "true")
|
||||
.Add("Writable", "true"))
|
||||
.Build();
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
return builder
|
||||
.Fail($"Log directory is not writable: {logPath}")
|
||||
.WithEvidence(eb => eb
|
||||
.Add("LogPath", logPath)
|
||||
.Add("Exists", "true")
|
||||
.Add("Writable", "false"))
|
||||
.WithCauses(
|
||||
"Insufficient permissions",
|
||||
"Directory owned by different user",
|
||||
"Read-only file system")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Fix permissions",
|
||||
RuntimeInformation.IsOSPlatform(OSPlatform.Windows)
|
||||
? $"icacls \"{logPath}\" /grant Users:F"
|
||||
: $"sudo chown -R stellaops:stellaops {logPath} && sudo chmod 755 {logPath}",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (IOException ex)
|
||||
{
|
||||
return builder
|
||||
.Fail($"Cannot write to log directory: {ex.Message}")
|
||||
.WithEvidence(eb => eb
|
||||
.Add("LogPath", logPath)
|
||||
.Add("Error", ex.Message))
|
||||
.WithCauses(
|
||||
"Disk full",
|
||||
"File system error",
|
||||
"Path too long")
|
||||
.Build();
|
||||
}
|
||||
finally
|
||||
{
|
||||
// Clean up test file if it exists
|
||||
try { if (File.Exists(testFile)) File.Delete(testFile); } catch { /* ignore */ }
|
||||
}
|
||||
}
|
||||
|
||||
private static string GetLogDirectory(DoctorPluginContext context)
|
||||
{
|
||||
var configured = context.Configuration["Logging:Path"];
|
||||
if (!string.IsNullOrEmpty(configured))
|
||||
{
|
||||
return configured;
|
||||
}
|
||||
|
||||
// Platform-specific defaults
|
||||
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
|
||||
{
|
||||
var appData = Environment.GetFolderPath(Environment.SpecialFolder.CommonApplicationData);
|
||||
return Path.Combine(appData, "StellaOps", "logs");
|
||||
}
|
||||
|
||||
return "/var/log/stellaops";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,181 @@
|
||||
using System.Globalization;
|
||||
using System.Runtime.InteropServices;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Observability.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Checks if log rotation is configured.
|
||||
/// </summary>
|
||||
public sealed class LogRotationCheck : IDoctorCheck
|
||||
{
|
||||
private const long MaxLogSizeMb = 100; // 100 MB threshold for warning
|
||||
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.logs.rotation.configured";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Log Rotation";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Verify log rotation is configured to prevent disk exhaustion";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["observability", "logs"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(1);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, "stellaops.doctor.observability", "Observability");
|
||||
var logPath = GetLogDirectory(context);
|
||||
|
||||
// Check for log rotation configuration
|
||||
var rotationConfigured = IsLogRotationConfigured(context);
|
||||
var rollingPolicy = context.Configuration["Logging:RollingPolicy"];
|
||||
|
||||
if (!Directory.Exists(logPath))
|
||||
{
|
||||
return Task.FromResult(builder
|
||||
.Skip("Log directory does not exist")
|
||||
.Build());
|
||||
}
|
||||
|
||||
// Check current log sizes
|
||||
var logFiles = Directory.GetFiles(logPath, "*.log", SearchOption.TopDirectoryOnly);
|
||||
var totalSizeMb = logFiles.Sum(f => new FileInfo(f).Length) / (1024 * 1024);
|
||||
var largeFiles = logFiles
|
||||
.Select(f => new FileInfo(f))
|
||||
.Where(f => f.Length > MaxLogSizeMb * 1024 * 1024)
|
||||
.ToList();
|
||||
|
||||
if (rotationConfigured)
|
||||
{
|
||||
if (largeFiles.Count > 0)
|
||||
{
|
||||
return Task.FromResult(builder
|
||||
.Warn($"Log rotation configured but {largeFiles.Count} file(s) exceed {MaxLogSizeMb}MB threshold")
|
||||
.WithEvidence(eb => eb
|
||||
.Add("LogPath", logPath)
|
||||
.Add("TotalSizeMb", totalSizeMb.ToString(CultureInfo.InvariantCulture))
|
||||
.Add("LargeFileCount", largeFiles.Count.ToString(CultureInfo.InvariantCulture))
|
||||
.Add("RollingPolicy", rollingPolicy ?? "configured"))
|
||||
.WithCauses(
|
||||
"Log rotation not triggered yet",
|
||||
"Rotation threshold too high",
|
||||
"Very high log volume")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Force log rotation",
|
||||
RuntimeInformation.IsOSPlatform(OSPlatform.Windows)
|
||||
? "Restart-Service StellaOps"
|
||||
: "sudo logrotate -f /etc/logrotate.d/stellaops",
|
||||
CommandType.Shell)
|
||||
.AddStep(2, "Adjust rotation threshold",
|
||||
"Edit Logging:RollingPolicy in configuration",
|
||||
CommandType.Config))
|
||||
.Build());
|
||||
}
|
||||
|
||||
return Task.FromResult(builder
|
||||
.Pass("Log rotation is configured and logs are within size limits")
|
||||
.WithEvidence(eb => eb
|
||||
.Add("LogPath", logPath)
|
||||
.Add("TotalSizeMb", totalSizeMb.ToString(CultureInfo.InvariantCulture))
|
||||
.Add("FileCount", logFiles.Length.ToString(CultureInfo.InvariantCulture))
|
||||
.Add("RollingPolicy", rollingPolicy ?? "configured"))
|
||||
.Build());
|
||||
}
|
||||
|
||||
// Not configured - check if there are large files
|
||||
if (largeFiles.Count > 0 || totalSizeMb > MaxLogSizeMb * 2)
|
||||
{
|
||||
return Task.FromResult(builder
|
||||
.Warn($"Log rotation not configured and logs total {totalSizeMb}MB")
|
||||
.WithEvidence(eb => eb
|
||||
.Add("LogPath", logPath)
|
||||
.Add("TotalSizeMb", totalSizeMb.ToString(CultureInfo.InvariantCulture))
|
||||
.Add("LargeFileCount", largeFiles.Count.ToString(CultureInfo.InvariantCulture))
|
||||
.Add("RollingPolicy", "(not configured)"))
|
||||
.WithCauses(
|
||||
"Log rotation not configured",
|
||||
"logrotate not installed",
|
||||
"Application-level rotation disabled")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Enable application-level log rotation",
|
||||
"Set Logging:RollingPolicy to 'Size' or 'Date' in appsettings.json",
|
||||
CommandType.Config)
|
||||
.AddStep(2, "Or configure system-level rotation",
|
||||
RuntimeInformation.IsOSPlatform(OSPlatform.Windows)
|
||||
? "Use Windows Event Log or configure log cleanup task"
|
||||
: "sudo cp /usr/share/stellaops/logrotate.conf /etc/logrotate.d/stellaops",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build());
|
||||
}
|
||||
|
||||
return Task.FromResult(builder
|
||||
.Info("Log rotation not configured but logs are small")
|
||||
.WithEvidence(eb => eb
|
||||
.Add("LogPath", logPath)
|
||||
.Add("TotalSizeMb", totalSizeMb.ToString(CultureInfo.InvariantCulture))
|
||||
.Add("RollingPolicy", "(not configured)"))
|
||||
.Build());
|
||||
}
|
||||
|
||||
private static bool IsLogRotationConfigured(DoctorPluginContext context)
|
||||
{
|
||||
// Check application-level configuration
|
||||
var rollingPolicy = context.Configuration["Logging:RollingPolicy"];
|
||||
if (!string.IsNullOrEmpty(rollingPolicy))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check Serilog configuration
|
||||
var serilogRolling = context.Configuration["Serilog:WriteTo:0:Args:rollingInterval"];
|
||||
if (!string.IsNullOrEmpty(serilogRolling))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for system-level logrotate on Linux
|
||||
if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
|
||||
{
|
||||
if (File.Exists("/etc/logrotate.d/stellaops"))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static string GetLogDirectory(DoctorPluginContext context)
|
||||
{
|
||||
var configured = context.Configuration["Logging:Path"];
|
||||
if (!string.IsNullOrEmpty(configured))
|
||||
{
|
||||
return configured;
|
||||
}
|
||||
|
||||
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
|
||||
{
|
||||
var appData = Environment.GetFolderPath(Environment.SpecialFolder.CommonApplicationData);
|
||||
return Path.Combine(appData, "StellaOps", "logs");
|
||||
}
|
||||
|
||||
return "/var/log/stellaops";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,122 @@
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Observability.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Checks if the OTLP collector endpoint is reachable.
|
||||
/// </summary>
|
||||
public sealed class OtlpEndpointCheck : IDoctorCheck
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.telemetry.otlp.endpoint";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "OTLP Endpoint";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Verify OTLP collector endpoint is reachable";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["observability", "telemetry", "otlp"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(3);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
var endpoint = context.Configuration["Telemetry:OtlpEndpoint"];
|
||||
return !string.IsNullOrEmpty(endpoint);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var endpoint = context.Configuration["Telemetry:OtlpEndpoint"]!;
|
||||
var builder = context.CreateResult(CheckId, "stellaops.doctor.observability", "Observability");
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(5);
|
||||
|
||||
// Try the OTLP health endpoint
|
||||
var healthUrl = endpoint.TrimEnd('/') + "/v1/health";
|
||||
var response = await httpClient.GetAsync(healthUrl, ct);
|
||||
|
||||
if (response.IsSuccessStatusCode)
|
||||
{
|
||||
return builder
|
||||
.Pass("OTLP collector is reachable")
|
||||
.WithEvidence(eb => eb
|
||||
.Add("Endpoint", endpoint)
|
||||
.Add("StatusCode", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture)))
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Warn($"OTLP collector returned {response.StatusCode}")
|
||||
.WithEvidence(eb => eb
|
||||
.Add("Endpoint", endpoint)
|
||||
.Add("StatusCode", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture)))
|
||||
.WithCauses(
|
||||
"OTLP collector not running",
|
||||
"Network connectivity issue",
|
||||
"Wrong endpoint configured",
|
||||
"Health endpoint not available (may still work)")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Check OTLP collector status",
|
||||
"docker logs otel-collector --tail 50",
|
||||
CommandType.Shell)
|
||||
.AddStep(2, "Test endpoint connectivity",
|
||||
$"curl -v {endpoint}/v1/health",
|
||||
CommandType.Shell)
|
||||
.AddStep(3, "Verify configuration",
|
||||
"cat /etc/stellaops/telemetry.yaml | grep otlp",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"stella doctor --check {CheckId}")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn($"OTLP collector connection timed out")
|
||||
.WithEvidence(eb => eb
|
||||
.Add("Endpoint", endpoint)
|
||||
.Add("Error", "Connection timeout"))
|
||||
.WithCauses(
|
||||
"OTLP collector not running",
|
||||
"Network connectivity issue",
|
||||
"Firewall blocking connection")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Check if OTLP collector is running",
|
||||
"docker ps | grep otel",
|
||||
CommandType.Shell)
|
||||
.AddStep(2, "Check network connectivity",
|
||||
$"nc -zv {new Uri(endpoint).Host} {new Uri(endpoint).Port}",
|
||||
CommandType.Shell))
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot reach OTLP collector: {ex.Message}")
|
||||
.WithEvidence(eb => eb
|
||||
.Add("Endpoint", endpoint)
|
||||
.Add("Error", ex.Message))
|
||||
.WithCauses(
|
||||
"OTLP collector not running",
|
||||
"Network connectivity issue",
|
||||
"DNS resolution failure")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,135 @@
|
||||
using System.Globalization;
|
||||
using System.Net.Http;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using StellaOps.Doctor.Models;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Observability.Checks;
|
||||
|
||||
/// <summary>
|
||||
/// Checks if Prometheus can scrape metrics from the application.
|
||||
/// </summary>
|
||||
public sealed class PrometheusScrapeCheck : IDoctorCheck
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public string CheckId => "check.metrics.prometheus.scrape";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "Prometheus Scrape";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Description => "Verify application metrics endpoint is accessible for Prometheus scraping";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorSeverity DefaultSeverity => DoctorSeverity.Warn;
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Tags => ["observability", "metrics", "prometheus"];
|
||||
|
||||
/// <inheritdoc />
|
||||
public TimeSpan EstimatedDuration => TimeSpan.FromSeconds(2);
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanRun(DoctorPluginContext context)
|
||||
{
|
||||
// Check if metrics are enabled
|
||||
var metricsEnabled = context.Configuration["Metrics:Enabled"];
|
||||
return metricsEnabled?.Equals("true", StringComparison.OrdinalIgnoreCase) ?? false;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DoctorCheckResult> RunAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
var builder = context.CreateResult(CheckId, "stellaops.doctor.observability", "Observability");
|
||||
|
||||
var metricsPath = context.Configuration["Metrics:Path"] ?? "/metrics";
|
||||
var metricsPort = context.Configuration["Metrics:Port"] ?? "8080";
|
||||
var metricsHost = context.Configuration["Metrics:Host"] ?? "localhost";
|
||||
|
||||
var metricsUrl = $"http://{metricsHost}:{metricsPort}{metricsPath}";
|
||||
|
||||
try
|
||||
{
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorHealthCheck");
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(5);
|
||||
|
||||
var response = await httpClient.GetAsync(metricsUrl, ct);
|
||||
|
||||
if (response.IsSuccessStatusCode)
|
||||
{
|
||||
var content = await response.Content.ReadAsStringAsync(ct);
|
||||
var metricCount = CountMetrics(content);
|
||||
|
||||
return builder
|
||||
.Pass($"Metrics endpoint accessible with {metricCount} metrics")
|
||||
.WithEvidence(eb => eb
|
||||
.Add("MetricsUrl", metricsUrl)
|
||||
.Add("StatusCode", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture))
|
||||
.Add("MetricCount", metricCount.ToString(CultureInfo.InvariantCulture))
|
||||
.Add("ContentType", response.Content.Headers.ContentType?.ToString() ?? "unknown"))
|
||||
.Build();
|
||||
}
|
||||
|
||||
return builder
|
||||
.Warn($"Metrics endpoint returned {response.StatusCode}")
|
||||
.WithEvidence(eb => eb
|
||||
.Add("MetricsUrl", metricsUrl)
|
||||
.Add("StatusCode", ((int)response.StatusCode).ToString(CultureInfo.InvariantCulture)))
|
||||
.WithCauses(
|
||||
"Metrics endpoint not enabled",
|
||||
"Wrong port configured",
|
||||
"Authentication required")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Enable metrics endpoint",
|
||||
"Set Metrics:Enabled=true in appsettings.json",
|
||||
CommandType.Config)
|
||||
.AddStep(2, "Verify metrics configuration",
|
||||
"stella config get Metrics",
|
||||
CommandType.Shell))
|
||||
.WithVerification($"curl -s {metricsUrl} | head -5")
|
||||
.Build();
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return builder
|
||||
.Warn("Metrics endpoint connection timed out")
|
||||
.WithEvidence(eb => eb
|
||||
.Add("MetricsUrl", metricsUrl)
|
||||
.Add("Error", "Connection timeout"))
|
||||
.WithCauses(
|
||||
"Service not running",
|
||||
"Wrong port configured",
|
||||
"Firewall blocking connection")
|
||||
.WithRemediation(rb => rb
|
||||
.AddStep(1, "Check service status",
|
||||
"stella status",
|
||||
CommandType.Shell)
|
||||
.AddStep(2, "Check port binding",
|
||||
$"netstat -an | grep {metricsPort}",
|
||||
CommandType.Shell))
|
||||
.Build();
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return builder
|
||||
.Warn($"Cannot reach metrics endpoint: {ex.Message}")
|
||||
.WithEvidence(eb => eb
|
||||
.Add("MetricsUrl", metricsUrl)
|
||||
.Add("Error", ex.Message))
|
||||
.WithCauses(
|
||||
"Service not running",
|
||||
"Metrics endpoint disabled",
|
||||
"Network connectivity issue")
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
|
||||
private static int CountMetrics(string prometheusOutput)
|
||||
{
|
||||
// Count lines that look like metrics (not comments or empty)
|
||||
return prometheusOutput
|
||||
.Split('\n', StringSplitOptions.RemoveEmptyEntries)
|
||||
.Count(line => !line.StartsWith('#') && line.Contains(' '));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
using StellaOps.Doctor.Plugin.Observability.Checks;
|
||||
using StellaOps.Doctor.Plugins;
|
||||
|
||||
namespace StellaOps.Doctor.Plugin.Observability;
|
||||
|
||||
/// <summary>
|
||||
/// Doctor plugin for observability checks (OTLP, logs, metrics).
|
||||
/// </summary>
|
||||
public sealed class ObservabilityDoctorPlugin : IDoctorPlugin
|
||||
{
|
||||
private static readonly Version PluginVersion = new(1, 0, 0);
|
||||
private static readonly Version MinVersion = new(1, 0, 0);
|
||||
|
||||
/// <inheritdoc />
|
||||
public string PluginId => "stellaops.doctor.observability";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string DisplayName => "Observability";
|
||||
|
||||
/// <inheritdoc />
|
||||
public DoctorCategory Category => DoctorCategory.Observability;
|
||||
|
||||
/// <inheritdoc />
|
||||
public Version Version => PluginVersion;
|
||||
|
||||
/// <inheritdoc />
|
||||
public Version MinEngineVersion => MinVersion;
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool IsAvailable(IServiceProvider services)
|
||||
{
|
||||
// Always available - individual checks handle their own availability
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<IDoctorCheck> GetChecks(DoctorPluginContext context)
|
||||
{
|
||||
return new IDoctorCheck[]
|
||||
{
|
||||
new OtlpEndpointCheck(),
|
||||
new LogDirectoryCheck(),
|
||||
new LogRotationCheck(),
|
||||
new PrometheusScrapeCheck()
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task InitializeAsync(DoctorPluginContext context, CancellationToken ct)
|
||||
{
|
||||
// No initialization required
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<RootNamespace>StellaOps.Doctor.Plugin.Observability</RootNamespace>
|
||||
<Description>Observability checks for Stella Ops Doctor diagnostics - OTLP, logs, metrics</Description>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\..\..\__Libraries\StellaOps.Doctor\StellaOps.Doctor.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Http" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
Reference in New Issue
Block a user