refactor: DB schema fixes + container renames + compose include + audit sprint
- FindingsLedger: change schema from public to findings (V3-01) - Add 9 migration module plugins: RiskEngine, Replay, ExportCenter, Integrations, Signer, IssuerDirectory, Workflow, PacksRegistry, OpsMemory (V4-01 to V4-09) - Remove 16 redundant inline CREATE SCHEMA patterns (V4-10) - Rename export→export-web, excititor→excititor-web for consistency - Compose stella-ops.yml: thin wrapper using include: directive - Fix dead /api/v1/jobengine/* gateway routes → release-orchestrator/packsregistry - Scheduler plugin architecture: ISchedulerJobPlugin + ScanJobPlugin + DoctorJobPlugin - Create unified audit sink sprint plan - VulnExplorer integration tests + gap analysis Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
54
src/JobEngine/StellaOps.Scheduler.plugins/AGENTS.md
Normal file
54
src/JobEngine/StellaOps.Scheduler.plugins/AGENTS.md
Normal file
@@ -0,0 +1,54 @@
|
||||
# AGENTS.md -- Scheduler Plugins
|
||||
|
||||
## Overview
|
||||
|
||||
This directory contains **scheduler job plugins** that extend the Scheduler service
|
||||
with new job types. Each plugin implements `ISchedulerJobPlugin` from the
|
||||
`StellaOps.Scheduler.Plugin.Abstractions` library.
|
||||
|
||||
## Plugin Architecture
|
||||
|
||||
Plugins are discovered in two ways:
|
||||
1. **Built-in**: `ScanJobPlugin` is registered unconditionally in `Program.cs`.
|
||||
2. **Assembly-loaded**: The `PluginHost.LoadPlugins()` pipeline scans `plugins/scheduler/`
|
||||
for DLLs matching `StellaOps.Scheduler.Plugin.*.dll`. Any type implementing
|
||||
`ISchedulerJobPlugin` is instantiated and registered.
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
StellaOps.Scheduler.plugins/
|
||||
scheduler/ # Runtime plugin DLLs (empty in dev; populated by build)
|
||||
StellaOps.Scheduler.Plugin.Doctor/ # Doctor health check plugin (source)
|
||||
```
|
||||
|
||||
## Creating a New Plugin
|
||||
|
||||
1. Create a new class library under `StellaOps.Scheduler.plugins/`.
|
||||
2. Reference `StellaOps.Scheduler.Plugin.Abstractions`.
|
||||
3. Implement `ISchedulerJobPlugin`:
|
||||
- `JobKind`: unique string identifier (stored in `Schedule.job_kind`).
|
||||
- `CreatePlanAsync`: build an execution plan from the schedule config.
|
||||
- `ExecuteAsync`: run the plan (HTTP calls, computations, etc.).
|
||||
- `ValidateConfigAsync`: validate the `Schedule.PluginConfig` JSON.
|
||||
- `ConfigureServices`: register plugin-specific DI services.
|
||||
- `MapEndpoints`: register plugin-specific HTTP endpoints.
|
||||
4. Build the DLL and place it in `plugins/scheduler/` (or add a project reference
|
||||
in the WebService csproj for development).
|
||||
|
||||
## Existing Plugins
|
||||
|
||||
| Plugin | JobKind | Description |
|
||||
|--------|---------|-------------|
|
||||
| ScanJobPlugin | `scan` | Built-in; wraps existing scan scheduling logic |
|
||||
| DoctorJobPlugin | `doctor` | Doctor health check scheduling via HTTP to Doctor WebService |
|
||||
|
||||
## Schedule Model Extensions
|
||||
|
||||
- `Schedule.JobKind` (string, default "scan"): routes to the correct plugin.
|
||||
- `Schedule.PluginConfig` (JSONB, nullable): plugin-specific configuration.
|
||||
|
||||
## Testing
|
||||
|
||||
Plugin tests should be placed in `StellaOps.Scheduler.__Tests/` alongside
|
||||
the existing Scheduler test projects.
|
||||
@@ -0,0 +1,361 @@
|
||||
using System.Diagnostics;
|
||||
using System.Net.Http.Json;
|
||||
using System.Text.Json;
|
||||
using Microsoft.AspNetCore.Http;
|
||||
using Microsoft.AspNetCore.Routing;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Scheduler.Models;
|
||||
using StellaOps.Scheduler.Plugin.Doctor.Endpoints;
|
||||
using StellaOps.Scheduler.Plugin.Doctor.Models;
|
||||
using StellaOps.Scheduler.Plugin.Doctor.Persistence;
|
||||
using StellaOps.Scheduler.Plugin.Doctor.Services;
|
||||
|
||||
namespace StellaOps.Scheduler.Plugin.Doctor;
|
||||
|
||||
/// <summary>
|
||||
/// Scheduler job plugin for Doctor health checks.
|
||||
/// Replaces the standalone Doctor Scheduler service by integrating Doctor
|
||||
/// scheduling, execution, trend storage, and alert evaluation directly
|
||||
/// into the Scheduler service.
|
||||
/// </summary>
|
||||
public sealed class DoctorJobPlugin : ISchedulerJobPlugin
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public string JobKind => "doctor";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string DisplayName => "Doctor Health Checks";
|
||||
|
||||
/// <inheritdoc />
|
||||
public Version Version { get; } = new(1, 0, 0);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<JobPlan> CreatePlanAsync(JobPlanContext context, CancellationToken ct)
|
||||
{
|
||||
var config = DoctorScheduleConfig.FromPluginConfig(context.Schedule.PluginConfig);
|
||||
|
||||
var payload = new Dictionary<string, object?>
|
||||
{
|
||||
["doctorMode"] = config.DoctorMode,
|
||||
["categories"] = config.Categories,
|
||||
["plugins"] = config.Plugins,
|
||||
["timeoutSeconds"] = config.TimeoutSeconds,
|
||||
["scheduleId"] = context.Schedule.Id,
|
||||
};
|
||||
|
||||
var plan = new JobPlan(
|
||||
JobKind: "doctor",
|
||||
Payload: payload,
|
||||
EstimatedSteps: 3); // trigger, poll, store trends
|
||||
|
||||
return Task.FromResult(plan);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task ExecuteAsync(JobExecutionContext context, CancellationToken ct)
|
||||
{
|
||||
var logger = context.Services.GetRequiredService<ILoggerFactory>().CreateLogger<DoctorJobPlugin>();
|
||||
var httpClientFactory = context.Services.GetRequiredService<IHttpClientFactory>();
|
||||
var trendRepository = context.Services.GetRequiredService<IDoctorTrendRepository>();
|
||||
var alertService = context.Services.GetRequiredService<IDoctorAlertService>();
|
||||
|
||||
var config = DoctorScheduleConfig.FromPluginConfig(context.Schedule.PluginConfig);
|
||||
var httpClient = httpClientFactory.CreateClient("DoctorApi");
|
||||
|
||||
await context.Reporter.TransitionStateAsync(RunState.Running, ct: ct);
|
||||
await context.Reporter.AppendLogAsync($"Starting Doctor run (mode={config.DoctorMode})", ct: ct);
|
||||
|
||||
try
|
||||
{
|
||||
// Step 1: Trigger Doctor run
|
||||
await context.Reporter.ReportProgressAsync(0, 3, "Triggering Doctor run", ct);
|
||||
var runId = await TriggerDoctorRunAsync(httpClient, config, ct);
|
||||
await context.Reporter.AppendLogAsync($"Doctor run triggered: {runId}", ct: ct);
|
||||
|
||||
// Step 2: Wait for completion
|
||||
await context.Reporter.ReportProgressAsync(1, 3, "Waiting for Doctor run completion", ct);
|
||||
var result = await WaitForRunCompletionAsync(httpClient, runId, config.TimeoutSeconds, ct);
|
||||
await context.Reporter.AppendLogAsync(
|
||||
$"Doctor run completed: {result.Status} (passed={result.PassedChecks}, warned={result.WarnedChecks}, failed={result.FailedChecks})",
|
||||
ct: ct);
|
||||
|
||||
// Step 3: Store trend data and evaluate alerts
|
||||
await context.Reporter.ReportProgressAsync(2, 3, "Storing trend data", ct);
|
||||
await StoreTrendDataAsync(httpClient, trendRepository, runId, context.Schedule.TenantId, ct);
|
||||
await alertService.EvaluateAndSendAsync(config, result, ct);
|
||||
|
||||
await context.Reporter.ReportProgressAsync(3, 3, "Completed", ct);
|
||||
await context.Reporter.TransitionStateAsync(RunState.Completed, ct: ct);
|
||||
}
|
||||
catch (Exception ex) when (!ct.IsCancellationRequested)
|
||||
{
|
||||
logger.LogError(ex, "Doctor plugin execution failed for schedule {ScheduleId}", context.Schedule.Id);
|
||||
await context.Reporter.AppendLogAsync($"Error: {ex.Message}", "error", ct);
|
||||
await context.Reporter.TransitionStateAsync(RunState.Error, ex.Message, ct);
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<JobConfigValidationResult> ValidateConfigAsync(
|
||||
IReadOnlyDictionary<string, object?> pluginConfig,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var errors = new List<string>();
|
||||
|
||||
if (pluginConfig.TryGetValue("doctorMode", out var modeObj))
|
||||
{
|
||||
var mode = modeObj?.ToString()?.ToLowerInvariant();
|
||||
if (mode is not ("full" or "quick" or "categories" or "plugins"))
|
||||
{
|
||||
errors.Add($"Invalid doctorMode '{mode}'. Must be one of: full, quick, categories, plugins.");
|
||||
}
|
||||
|
||||
if (mode == "categories" &&
|
||||
(!pluginConfig.TryGetValue("categories", out var cats) || cats is null))
|
||||
{
|
||||
errors.Add("categories list is required when doctorMode is 'categories'.");
|
||||
}
|
||||
|
||||
if (mode == "plugins" &&
|
||||
(!pluginConfig.TryGetValue("plugins", out var plugins) || plugins is null))
|
||||
{
|
||||
errors.Add("plugins list is required when doctorMode is 'plugins'.");
|
||||
}
|
||||
}
|
||||
|
||||
if (pluginConfig.TryGetValue("timeoutSeconds", out var timeoutObj))
|
||||
{
|
||||
if (timeoutObj is int timeout && timeout <= 0)
|
||||
{
|
||||
errors.Add("timeoutSeconds must be a positive integer.");
|
||||
}
|
||||
}
|
||||
|
||||
return Task.FromResult(errors.Count == 0
|
||||
? JobConfigValidationResult.Success
|
||||
: new JobConfigValidationResult(false, errors));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string? GetConfigJsonSchema()
|
||||
{
|
||||
return """
|
||||
{
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"doctorMode": {
|
||||
"type": "string",
|
||||
"enum": ["full", "quick", "categories", "plugins"],
|
||||
"default": "full"
|
||||
},
|
||||
"categories": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" }
|
||||
},
|
||||
"plugins": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" }
|
||||
},
|
||||
"timeoutSeconds": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 300
|
||||
},
|
||||
"alerts": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"enabled": { "type": "boolean", "default": true },
|
||||
"alertOnFail": { "type": "boolean", "default": true },
|
||||
"alertOnWarn": { "type": "boolean", "default": false },
|
||||
"alertOnStatusChange": { "type": "boolean", "default": true },
|
||||
"channels": { "type": "array", "items": { "type": "string" } },
|
||||
"emailRecipients": { "type": "array", "items": { "type": "string" } },
|
||||
"webhookUrls": { "type": "array", "items": { "type": "string" } },
|
||||
"minSeverity": { "type": "string", "default": "Fail" }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
""";
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public void ConfigureServices(IServiceCollection services, IConfiguration configuration)
|
||||
{
|
||||
// Register HttpClient for Doctor API
|
||||
var doctorApiUrl = configuration["Scheduler:Doctor:ApiUrl"] ?? "http://doctor-web.stella-ops.local";
|
||||
services.AddHttpClient("DoctorApi", client =>
|
||||
{
|
||||
client.BaseAddress = new Uri(doctorApiUrl);
|
||||
client.Timeout = TimeSpan.FromSeconds(600);
|
||||
});
|
||||
|
||||
// Register trend repository
|
||||
var connectionString = configuration["Scheduler:Storage:ConnectionString"]
|
||||
?? configuration.GetConnectionString("Default")
|
||||
?? "";
|
||||
services.AddSingleton<IDoctorTrendRepository>(sp =>
|
||||
new PostgresDoctorTrendRepository(connectionString, sp.GetRequiredService<ILogger<PostgresDoctorTrendRepository>>()));
|
||||
|
||||
// Register alert service
|
||||
services.AddSingleton<IDoctorAlertService, ConsoleAlertService>();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public void MapEndpoints(IEndpointRouteBuilder routes)
|
||||
{
|
||||
DoctorTrendEndpoints.Map(routes);
|
||||
}
|
||||
|
||||
// --- Private helpers porting logic from ScheduleExecutor ---
|
||||
|
||||
private static async Task<string> TriggerDoctorRunAsync(
|
||||
HttpClient httpClient, DoctorScheduleConfig config, CancellationToken ct)
|
||||
{
|
||||
var request = new
|
||||
{
|
||||
mode = config.DoctorMode,
|
||||
categories = config.Categories,
|
||||
plugins = config.Plugins,
|
||||
@async = true,
|
||||
};
|
||||
|
||||
var response = await httpClient.PostAsJsonAsync("/api/v1/doctor/run", request, ct);
|
||||
response.EnsureSuccessStatusCode();
|
||||
|
||||
var result = await response.Content.ReadFromJsonAsync<RunTriggerResponse>(cancellationToken: ct);
|
||||
return result?.RunId ?? throw new InvalidOperationException("No run ID returned from Doctor API");
|
||||
}
|
||||
|
||||
private static async Task<DoctorExecutionResult> WaitForRunCompletionAsync(
|
||||
HttpClient httpClient, string runId, int timeoutSeconds, CancellationToken ct)
|
||||
{
|
||||
var timeout = TimeSpan.FromSeconds(timeoutSeconds);
|
||||
var sw = Stopwatch.StartNew();
|
||||
|
||||
while (sw.Elapsed < timeout)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
var response = await httpClient.GetAsync($"/api/v1/doctor/run/{runId}", ct);
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
await Task.Delay(TimeSpan.FromSeconds(5), ct);
|
||||
continue;
|
||||
}
|
||||
|
||||
var result = await response.Content.ReadFromJsonAsync<RunStatusResponse>(cancellationToken: ct);
|
||||
if (result?.Status == "completed")
|
||||
{
|
||||
var status = result.FailedChecks > 0 ? "failed"
|
||||
: result.WarnedChecks > 0 ? "warning"
|
||||
: "success";
|
||||
|
||||
return new DoctorExecutionResult
|
||||
{
|
||||
RunId = runId,
|
||||
Status = status,
|
||||
TotalChecks = result.TotalChecks,
|
||||
PassedChecks = result.PassedChecks,
|
||||
WarnedChecks = result.WarnedChecks,
|
||||
FailedChecks = result.FailedChecks,
|
||||
SkippedChecks = result.SkippedChecks,
|
||||
HealthScore = result.HealthScore,
|
||||
CategoriesWithIssues = result.CategoriesWithIssues ?? [],
|
||||
};
|
||||
}
|
||||
|
||||
await Task.Delay(TimeSpan.FromSeconds(2), ct);
|
||||
}
|
||||
|
||||
throw new TimeoutException($"Doctor run {runId} did not complete within {timeout.TotalSeconds}s");
|
||||
}
|
||||
|
||||
private static async Task StoreTrendDataAsync(
|
||||
HttpClient httpClient,
|
||||
IDoctorTrendRepository trendRepository,
|
||||
string runId,
|
||||
string tenantId,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var response = await httpClient.GetAsync($"/api/v1/doctor/run/{runId}/results", ct);
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var results = await response.Content.ReadFromJsonAsync<RunResultsResponse>(cancellationToken: ct);
|
||||
if (results?.Results is null || results.Results.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var timestamp = DateTimeOffset.UtcNow;
|
||||
var dataPoints = results.Results.Select(r => new DoctorTrendDataPoint
|
||||
{
|
||||
Timestamp = timestamp,
|
||||
TenantId = tenantId,
|
||||
CheckId = r.CheckId,
|
||||
PluginId = r.PluginId,
|
||||
Category = r.Category,
|
||||
RunId = runId,
|
||||
Status = r.Status,
|
||||
HealthScore = CalculateHealthScore(r.Status),
|
||||
DurationMs = r.DurationMs,
|
||||
EvidenceValues = ExtractTrendEvidence(r.Evidence),
|
||||
}).ToList();
|
||||
|
||||
await trendRepository.StoreTrendDataAsync(dataPoints, ct);
|
||||
}
|
||||
|
||||
private static int CalculateHealthScore(string status) => status.ToLowerInvariant() switch
|
||||
{
|
||||
"pass" => 100,
|
||||
"warn" => 50,
|
||||
"fail" => 0,
|
||||
"skip" => -1,
|
||||
_ => 0,
|
||||
};
|
||||
|
||||
private static IReadOnlyDictionary<string, string> ExtractTrendEvidence(
|
||||
Dictionary<string, object>? evidence)
|
||||
{
|
||||
if (evidence is null)
|
||||
{
|
||||
return new Dictionary<string, string>();
|
||||
}
|
||||
|
||||
return evidence
|
||||
.Where(kv => kv.Value is int or long or double or string or JsonElement)
|
||||
.Where(kv => !kv.Key.Contains("url", StringComparison.OrdinalIgnoreCase))
|
||||
.Where(kv => !kv.Key.Contains("message", StringComparison.OrdinalIgnoreCase))
|
||||
.Take(10)
|
||||
.ToDictionary(kv => kv.Key, kv => kv.Value?.ToString() ?? string.Empty);
|
||||
}
|
||||
|
||||
// Response DTOs for Doctor API
|
||||
private sealed record RunTriggerResponse(string RunId);
|
||||
|
||||
private sealed record RunStatusResponse(
|
||||
string Status,
|
||||
int TotalChecks,
|
||||
int PassedChecks,
|
||||
int WarnedChecks,
|
||||
int FailedChecks,
|
||||
int SkippedChecks,
|
||||
int HealthScore,
|
||||
IReadOnlyList<string>? CategoriesWithIssues);
|
||||
|
||||
private sealed record RunResultsResponse(IReadOnlyList<CheckResult>? Results);
|
||||
|
||||
private sealed record CheckResult(
|
||||
string CheckId,
|
||||
string PluginId,
|
||||
string Category,
|
||||
string Status,
|
||||
int DurationMs,
|
||||
Dictionary<string, object>? Evidence);
|
||||
}
|
||||
@@ -0,0 +1,180 @@
|
||||
using Microsoft.AspNetCore.Builder;
|
||||
using Microsoft.AspNetCore.Http;
|
||||
using Microsoft.AspNetCore.Routing;
|
||||
using StellaOps.Scheduler.Plugin.Doctor.Services;
|
||||
|
||||
namespace StellaOps.Scheduler.Plugin.Doctor.Endpoints;
|
||||
|
||||
/// <summary>
|
||||
/// Registers Doctor trend HTTP endpoints in the Scheduler service.
|
||||
/// These endpoints serve the same data shapes as the former Doctor Scheduler service,
|
||||
/// enabling the Doctor UI to work without code changes.
|
||||
/// </summary>
|
||||
public static class DoctorTrendEndpoints
|
||||
{
|
||||
public static void Map(IEndpointRouteBuilder routes)
|
||||
{
|
||||
var group = routes.MapGroup("/api/v1/scheduler/doctor/trends")
|
||||
.WithTags("Doctor", "Trends");
|
||||
|
||||
group.MapGet("/", async (
|
||||
DateTimeOffset? from,
|
||||
DateTimeOffset? to,
|
||||
IDoctorTrendRepository repository,
|
||||
TimeProvider timeProvider,
|
||||
HttpContext httpContext,
|
||||
CancellationToken ct) =>
|
||||
{
|
||||
var tenantId = ResolveTenantId(httpContext);
|
||||
var window = ResolveWindow(from, to, timeProvider);
|
||||
if (window is null)
|
||||
{
|
||||
return Results.BadRequest(new { message = "Invalid time window: 'from' must be before 'to'." });
|
||||
}
|
||||
|
||||
var summaries = await repository.GetTrendSummariesAsync(tenantId, window.Value.From, window.Value.To, ct);
|
||||
return Results.Ok(new
|
||||
{
|
||||
window = new { from = window.Value.From, to = window.Value.To },
|
||||
summaries,
|
||||
});
|
||||
})
|
||||
.WithName("GetDoctorPluginTrends")
|
||||
.WithDescription("Returns aggregated health-check trend summaries across all checks for the specified time window.");
|
||||
|
||||
group.MapGet("/checks/{checkId}", async (
|
||||
string checkId,
|
||||
DateTimeOffset? from,
|
||||
DateTimeOffset? to,
|
||||
IDoctorTrendRepository repository,
|
||||
TimeProvider timeProvider,
|
||||
HttpContext httpContext,
|
||||
CancellationToken ct) =>
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(checkId))
|
||||
{
|
||||
return Results.BadRequest(new { message = "checkId is required." });
|
||||
}
|
||||
|
||||
var tenantId = ResolveTenantId(httpContext);
|
||||
var window = ResolveWindow(from, to, timeProvider);
|
||||
if (window is null)
|
||||
{
|
||||
return Results.BadRequest(new { message = "Invalid time window." });
|
||||
}
|
||||
|
||||
var data = await repository.GetTrendDataAsync(tenantId, checkId, window.Value.From, window.Value.To, ct);
|
||||
var summary = await repository.GetCheckTrendSummaryAsync(tenantId, checkId, window.Value.From, window.Value.To, ct);
|
||||
return Results.Ok(new
|
||||
{
|
||||
window = new { from = window.Value.From, to = window.Value.To },
|
||||
summary,
|
||||
dataPoints = data,
|
||||
});
|
||||
})
|
||||
.WithName("GetDoctorPluginCheckTrend")
|
||||
.WithDescription("Returns detailed trend data and summary statistics for a specific Doctor health check.");
|
||||
|
||||
group.MapGet("/categories/{category}", async (
|
||||
string category,
|
||||
DateTimeOffset? from,
|
||||
DateTimeOffset? to,
|
||||
IDoctorTrendRepository repository,
|
||||
TimeProvider timeProvider,
|
||||
HttpContext httpContext,
|
||||
CancellationToken ct) =>
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(category))
|
||||
{
|
||||
return Results.BadRequest(new { message = "category is required." });
|
||||
}
|
||||
|
||||
var tenantId = ResolveTenantId(httpContext);
|
||||
var window = ResolveWindow(from, to, timeProvider);
|
||||
if (window is null)
|
||||
{
|
||||
return Results.BadRequest(new { message = "Invalid time window." });
|
||||
}
|
||||
|
||||
var data = await repository.GetCategoryTrendDataAsync(tenantId, category, window.Value.From, window.Value.To, ct);
|
||||
return Results.Ok(new
|
||||
{
|
||||
window = new { from = window.Value.From, to = window.Value.To },
|
||||
category,
|
||||
dataPoints = data,
|
||||
});
|
||||
})
|
||||
.WithName("GetDoctorPluginCategoryTrend")
|
||||
.WithDescription("Returns trend data points for all checks within a specific Doctor check category.");
|
||||
|
||||
group.MapGet("/degrading", async (
|
||||
DateTimeOffset? from,
|
||||
DateTimeOffset? to,
|
||||
double? threshold,
|
||||
IDoctorTrendRepository repository,
|
||||
TimeProvider timeProvider,
|
||||
HttpContext httpContext,
|
||||
CancellationToken ct) =>
|
||||
{
|
||||
var tenantId = ResolveTenantId(httpContext);
|
||||
var window = ResolveWindow(from, to, timeProvider);
|
||||
if (window is null)
|
||||
{
|
||||
return Results.BadRequest(new { message = "Invalid time window." });
|
||||
}
|
||||
|
||||
var effectiveThreshold = threshold ?? 0.1d;
|
||||
if (effectiveThreshold < 0 || double.IsNaN(effectiveThreshold))
|
||||
{
|
||||
return Results.BadRequest(new { message = "threshold must be a non-negative number." });
|
||||
}
|
||||
|
||||
var degrading = await repository.GetDegradingChecksAsync(
|
||||
tenantId, window.Value.From, window.Value.To, effectiveThreshold, ct);
|
||||
return Results.Ok(new
|
||||
{
|
||||
window = new { from = window.Value.From, to = window.Value.To },
|
||||
threshold = effectiveThreshold,
|
||||
checks = degrading,
|
||||
});
|
||||
})
|
||||
.WithName("GetDoctorPluginDegradingChecks")
|
||||
.WithDescription("Returns the set of Doctor health checks that have been degrading over the specified time window.");
|
||||
}
|
||||
|
||||
private static (DateTimeOffset From, DateTimeOffset To)? ResolveWindow(
|
||||
DateTimeOffset? from,
|
||||
DateTimeOffset? to,
|
||||
TimeProvider timeProvider)
|
||||
{
|
||||
var end = to ?? timeProvider.GetUtcNow();
|
||||
var start = from ?? end.AddDays(-30);
|
||||
|
||||
if (start > end)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return (start, end);
|
||||
}
|
||||
|
||||
private static string ResolveTenantId(HttpContext httpContext)
|
||||
{
|
||||
// Try to get tenant from the StellaOps tenant context (set by middleware)
|
||||
var tenantClaim = httpContext.User?.FindFirst("stellaops:tenant")
|
||||
?? httpContext.User?.FindFirst("tenant");
|
||||
if (tenantClaim is not null && !string.IsNullOrWhiteSpace(tenantClaim.Value))
|
||||
{
|
||||
return tenantClaim.Value;
|
||||
}
|
||||
|
||||
// Fall back to header (development mode)
|
||||
if (httpContext.Request.Headers.TryGetValue("X-Tenant-Id", out var tenantHeader)
|
||||
&& !string.IsNullOrWhiteSpace(tenantHeader))
|
||||
{
|
||||
return tenantHeader.ToString();
|
||||
}
|
||||
|
||||
return "demo-prod";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
-- Doctor Trends Table: Stores health check trend data points
|
||||
-- Created by the DoctorJobPlugin (scheduler plugin architecture)
|
||||
-- Uses the scheduler schema to share the same database/schema as the Scheduler service.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS scheduler.doctor_trends (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
timestamp TIMESTAMPTZ NOT NULL,
|
||||
tenant_id TEXT NOT NULL,
|
||||
check_id TEXT NOT NULL,
|
||||
plugin_id TEXT NOT NULL,
|
||||
category TEXT NOT NULL,
|
||||
run_id TEXT NOT NULL,
|
||||
status TEXT NOT NULL,
|
||||
health_score INT NOT NULL DEFAULT 0,
|
||||
duration_ms INT NOT NULL DEFAULT 0,
|
||||
evidence_values JSONB NOT NULL DEFAULT '{}'
|
||||
);
|
||||
|
||||
-- Performance indexes for common query patterns
|
||||
CREATE INDEX IF NOT EXISTS idx_doctor_trends_tenant_check
|
||||
ON scheduler.doctor_trends(tenant_id, check_id, timestamp DESC);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doctor_trends_tenant_category
|
||||
ON scheduler.doctor_trends(tenant_id, category, timestamp DESC);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doctor_trends_tenant_timestamp
|
||||
ON scheduler.doctor_trends(tenant_id, timestamp DESC);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doctor_trends_run
|
||||
ON scheduler.doctor_trends(run_id);
|
||||
|
||||
-- Retention pruning index (used by PruneOldDataAsync)
|
||||
CREATE INDEX IF NOT EXISTS idx_doctor_trends_timestamp_prune
|
||||
ON scheduler.doctor_trends(timestamp);
|
||||
|
||||
-- Row-Level Security
|
||||
ALTER TABLE scheduler.doctor_trends ENABLE ROW LEVEL SECURITY;
|
||||
ALTER TABLE scheduler.doctor_trends FORCE ROW LEVEL SECURITY;
|
||||
DROP POLICY IF EXISTS doctor_trends_tenant_isolation ON scheduler.doctor_trends;
|
||||
CREATE POLICY doctor_trends_tenant_isolation ON scheduler.doctor_trends FOR ALL
|
||||
USING (tenant_id = scheduler_app.require_current_tenant())
|
||||
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
|
||||
|
||||
COMMENT ON TABLE scheduler.doctor_trends IS 'Health check trend data points from Doctor plugin scheduled runs. Retained per configurable retention period (default 365 days).';
|
||||
@@ -0,0 +1,86 @@
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace StellaOps.Scheduler.Plugin.Doctor.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Plugin-specific configuration for Doctor job schedules.
|
||||
/// Deserialized from <see cref="StellaOps.Scheduler.Models.Schedule.PluginConfig"/>.
|
||||
/// </summary>
|
||||
public sealed record DoctorScheduleConfig
|
||||
{
|
||||
/// <summary>
|
||||
/// Doctor run mode: full, quick, categories, plugins.
|
||||
/// </summary>
|
||||
[JsonPropertyName("doctorMode")]
|
||||
public string DoctorMode { get; init; } = "full";
|
||||
|
||||
/// <summary>
|
||||
/// Optional list of categories to include (empty = all).
|
||||
/// </summary>
|
||||
[JsonPropertyName("categories")]
|
||||
public IReadOnlyList<string> Categories { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Optional list of specific plugins to run (empty = all).
|
||||
/// </summary>
|
||||
[JsonPropertyName("plugins")]
|
||||
public IReadOnlyList<string> Plugins { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Timeout in seconds for the Doctor run.
|
||||
/// </summary>
|
||||
[JsonPropertyName("timeoutSeconds")]
|
||||
public int TimeoutSeconds { get; init; } = 300;
|
||||
|
||||
/// <summary>
|
||||
/// Alert configuration for this schedule.
|
||||
/// </summary>
|
||||
[JsonPropertyName("alerts")]
|
||||
public DoctorAlertConfig? Alerts { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Deserializes a DoctorScheduleConfig from the Schedule's PluginConfig dictionary.
|
||||
/// </summary>
|
||||
public static DoctorScheduleConfig FromPluginConfig(IReadOnlyDictionary<string, object?>? pluginConfig)
|
||||
{
|
||||
if (pluginConfig is null || pluginConfig.Count == 0)
|
||||
{
|
||||
return new DoctorScheduleConfig();
|
||||
}
|
||||
|
||||
// Round-trip through JSON to correctly deserialize typed properties
|
||||
var json = JsonSerializer.Serialize(pluginConfig);
|
||||
return JsonSerializer.Deserialize<DoctorScheduleConfig>(json) ?? new DoctorScheduleConfig();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Alert configuration for Doctor scheduled runs.
|
||||
/// </summary>
|
||||
public sealed record DoctorAlertConfig
|
||||
{
|
||||
[JsonPropertyName("enabled")]
|
||||
public bool Enabled { get; init; } = true;
|
||||
|
||||
[JsonPropertyName("alertOnFail")]
|
||||
public bool AlertOnFail { get; init; } = true;
|
||||
|
||||
[JsonPropertyName("alertOnWarn")]
|
||||
public bool AlertOnWarn { get; init; }
|
||||
|
||||
[JsonPropertyName("alertOnStatusChange")]
|
||||
public bool AlertOnStatusChange { get; init; } = true;
|
||||
|
||||
[JsonPropertyName("channels")]
|
||||
public IReadOnlyList<string> Channels { get; init; } = [];
|
||||
|
||||
[JsonPropertyName("emailRecipients")]
|
||||
public IReadOnlyList<string> EmailRecipients { get; init; } = [];
|
||||
|
||||
[JsonPropertyName("webhookUrls")]
|
||||
public IReadOnlyList<string> WebhookUrls { get; init; } = [];
|
||||
|
||||
[JsonPropertyName("minSeverity")]
|
||||
public string MinSeverity { get; init; } = "Fail";
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
namespace StellaOps.Scheduler.Plugin.Doctor.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a single data point in a Doctor health trend.
|
||||
/// Stored in the scheduler.doctor_trends table.
|
||||
/// </summary>
|
||||
public sealed record DoctorTrendDataPoint
|
||||
{
|
||||
/// <summary>
|
||||
/// Timestamp of the data point.
|
||||
/// </summary>
|
||||
public DateTimeOffset Timestamp { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Check ID this data point is for.
|
||||
/// </summary>
|
||||
public required string CheckId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Plugin ID the check belongs to.
|
||||
/// </summary>
|
||||
public required string PluginId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Category of the check.
|
||||
/// </summary>
|
||||
public required string Category { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Run ID that generated this data point.
|
||||
/// </summary>
|
||||
public required string RunId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Tenant ID for multi-tenant isolation.
|
||||
/// </summary>
|
||||
public required string TenantId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Status of the check at this point (pass, warn, fail, skip).
|
||||
/// </summary>
|
||||
public required string Status { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Health score (0-100) at this point.
|
||||
/// </summary>
|
||||
public int HealthScore { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Duration of the check in milliseconds.
|
||||
/// </summary>
|
||||
public int DurationMs { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Selected evidence values for trending (stored as JSON).
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, string> EvidenceValues { get; init; } =
|
||||
new Dictionary<string, string>();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Aggregated trend summary over a time period.
|
||||
/// </summary>
|
||||
public sealed record DoctorTrendSummary
|
||||
{
|
||||
public required string CheckId { get; init; }
|
||||
public required string CheckName { get; init; }
|
||||
public DateTimeOffset PeriodStart { get; init; }
|
||||
public DateTimeOffset PeriodEnd { get; init; }
|
||||
public int TotalRuns { get; init; }
|
||||
public int PassCount { get; init; }
|
||||
public int WarnCount { get; init; }
|
||||
public int FailCount { get; init; }
|
||||
public double SuccessRate => TotalRuns > 0 ? (double)PassCount / TotalRuns : 0;
|
||||
public double AvgHealthScore { get; init; }
|
||||
public string Direction { get; init; } = "stable";
|
||||
public double ChangePercent { get; init; }
|
||||
public int AvgDurationMs { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,236 @@
|
||||
using System.Text.Json;
|
||||
using Dapper;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StellaOps.Scheduler.Plugin.Doctor.Models;
|
||||
using StellaOps.Scheduler.Plugin.Doctor.Services;
|
||||
|
||||
namespace StellaOps.Scheduler.Plugin.Doctor.Persistence;
|
||||
|
||||
/// <summary>
|
||||
/// Postgres-backed implementation of <see cref="IDoctorTrendRepository"/>.
|
||||
/// Uses the scheduler.doctor_trends table via Dapper.
|
||||
/// </summary>
|
||||
public sealed class PostgresDoctorTrendRepository : IDoctorTrendRepository
|
||||
{
|
||||
private readonly string _connectionString;
|
||||
private readonly ILogger<PostgresDoctorTrendRepository> _logger;
|
||||
|
||||
public PostgresDoctorTrendRepository(string connectionString, ILogger<PostgresDoctorTrendRepository> logger)
|
||||
{
|
||||
_connectionString = connectionString;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task StoreTrendDataAsync(IEnumerable<DoctorTrendDataPoint> dataPoints, CancellationToken ct)
|
||||
{
|
||||
const string sql = """
|
||||
INSERT INTO scheduler.doctor_trends
|
||||
(timestamp, tenant_id, check_id, plugin_id, category, run_id, status, health_score, duration_ms, evidence_values)
|
||||
VALUES
|
||||
(@Timestamp, @TenantId, @CheckId, @PluginId, @Category, @RunId, @Status, @HealthScore, @DurationMs, @EvidenceValues::jsonb)
|
||||
ON CONFLICT DO NOTHING
|
||||
""";
|
||||
|
||||
await using var connection = new NpgsqlConnection(_connectionString);
|
||||
await connection.OpenAsync(ct);
|
||||
|
||||
foreach (var point in dataPoints)
|
||||
{
|
||||
var evidenceJson = JsonSerializer.Serialize(point.EvidenceValues);
|
||||
await connection.ExecuteAsync(new CommandDefinition(sql, new
|
||||
{
|
||||
point.Timestamp,
|
||||
point.TenantId,
|
||||
point.CheckId,
|
||||
point.PluginId,
|
||||
point.Category,
|
||||
point.RunId,
|
||||
point.Status,
|
||||
point.HealthScore,
|
||||
point.DurationMs,
|
||||
EvidenceValues = evidenceJson,
|
||||
}, cancellationToken: ct));
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<DoctorTrendDataPoint>> GetTrendDataAsync(
|
||||
string tenantId, string checkId, DateTimeOffset from, DateTimeOffset to, CancellationToken ct)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT timestamp, tenant_id, check_id, plugin_id, category, run_id, status,
|
||||
health_score, duration_ms, evidence_values
|
||||
FROM scheduler.doctor_trends
|
||||
WHERE tenant_id = @TenantId AND check_id = @CheckId
|
||||
AND timestamp >= @From AND timestamp <= @To
|
||||
ORDER BY timestamp ASC, run_id ASC
|
||||
""";
|
||||
|
||||
await using var connection = new NpgsqlConnection(_connectionString);
|
||||
await connection.OpenAsync(ct);
|
||||
|
||||
var rows = await connection.QueryAsync<TrendRow>(new CommandDefinition(sql, new
|
||||
{
|
||||
TenantId = tenantId,
|
||||
CheckId = checkId,
|
||||
From = from,
|
||||
To = to,
|
||||
}, cancellationToken: ct));
|
||||
|
||||
return rows.Select(MapToDataPoint).ToList().AsReadOnly();
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<DoctorTrendDataPoint>> GetCategoryTrendDataAsync(
|
||||
string tenantId, string category, DateTimeOffset from, DateTimeOffset to, CancellationToken ct)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT timestamp, tenant_id, check_id, plugin_id, category, run_id, status,
|
||||
health_score, duration_ms, evidence_values
|
||||
FROM scheduler.doctor_trends
|
||||
WHERE tenant_id = @TenantId AND category = @Category
|
||||
AND timestamp >= @From AND timestamp <= @To
|
||||
ORDER BY timestamp ASC, check_id ASC, run_id ASC
|
||||
""";
|
||||
|
||||
await using var connection = new NpgsqlConnection(_connectionString);
|
||||
await connection.OpenAsync(ct);
|
||||
|
||||
var rows = await connection.QueryAsync<TrendRow>(new CommandDefinition(sql, new
|
||||
{
|
||||
TenantId = tenantId,
|
||||
Category = category,
|
||||
From = from,
|
||||
To = to,
|
||||
}, cancellationToken: ct));
|
||||
|
||||
return rows.Select(MapToDataPoint).ToList().AsReadOnly();
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<DoctorTrendSummary>> GetTrendSummariesAsync(
|
||||
string tenantId, DateTimeOffset from, DateTimeOffset to, CancellationToken ct)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT check_id,
|
||||
check_id AS check_name,
|
||||
COUNT(*) AS total_runs,
|
||||
COUNT(*) FILTER (WHERE LOWER(status) IN ('pass','success')) AS pass_count,
|
||||
COUNT(*) FILTER (WHERE LOWER(status) IN ('warn','warning')) AS warn_count,
|
||||
COUNT(*) FILTER (WHERE LOWER(status) IN ('fail','failed','error')) AS fail_count,
|
||||
AVG(health_score) AS avg_health_score,
|
||||
AVG(duration_ms)::int AS avg_duration_ms
|
||||
FROM scheduler.doctor_trends
|
||||
WHERE tenant_id = @TenantId AND timestamp >= @From AND timestamp <= @To
|
||||
GROUP BY check_id
|
||||
ORDER BY check_id
|
||||
""";
|
||||
|
||||
await using var connection = new NpgsqlConnection(_connectionString);
|
||||
await connection.OpenAsync(ct);
|
||||
|
||||
var rows = await connection.QueryAsync<SummaryRow>(new CommandDefinition(sql, new
|
||||
{
|
||||
TenantId = tenantId,
|
||||
From = from,
|
||||
To = to,
|
||||
}, cancellationToken: ct));
|
||||
|
||||
return rows.Select(r => new DoctorTrendSummary
|
||||
{
|
||||
CheckId = r.check_id,
|
||||
CheckName = r.check_name,
|
||||
PeriodStart = from,
|
||||
PeriodEnd = to,
|
||||
TotalRuns = r.total_runs,
|
||||
PassCount = r.pass_count,
|
||||
WarnCount = r.warn_count,
|
||||
FailCount = r.fail_count,
|
||||
AvgHealthScore = r.avg_health_score,
|
||||
Direction = DetermineDirection(r),
|
||||
ChangePercent = 0, // Simplified: full implementation would compare first/last scores
|
||||
AvgDurationMs = r.avg_duration_ms,
|
||||
}).ToList().AsReadOnly();
|
||||
}
|
||||
|
||||
public async Task<DoctorTrendSummary?> GetCheckTrendSummaryAsync(
|
||||
string tenantId, string checkId, DateTimeOffset from, DateTimeOffset to, CancellationToken ct)
|
||||
{
|
||||
var summaries = await GetTrendSummariesAsync(tenantId, from, to, ct);
|
||||
return summaries.FirstOrDefault(s => string.Equals(s.CheckId, checkId, StringComparison.Ordinal));
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<DoctorTrendSummary>> GetDegradingChecksAsync(
|
||||
string tenantId, DateTimeOffset from, DateTimeOffset to, double degradationThreshold, CancellationToken ct)
|
||||
{
|
||||
var summaries = await GetTrendSummariesAsync(tenantId, from, to, ct);
|
||||
return summaries
|
||||
.Where(s => string.Equals(s.Direction, "degrading", StringComparison.OrdinalIgnoreCase))
|
||||
.ToList()
|
||||
.AsReadOnly();
|
||||
}
|
||||
|
||||
public async Task PruneOldDataAsync(DateTimeOffset olderThan, CancellationToken ct)
|
||||
{
|
||||
const string sql = "DELETE FROM scheduler.doctor_trends WHERE timestamp < @OlderThan";
|
||||
|
||||
await using var connection = new NpgsqlConnection(_connectionString);
|
||||
await connection.OpenAsync(ct);
|
||||
|
||||
var deleted = await connection.ExecuteAsync(new CommandDefinition(sql, new { OlderThan = olderThan }, cancellationToken: ct));
|
||||
_logger.LogInformation("Pruned {Count} old Doctor trend data points (older than {OlderThan}).", deleted, olderThan);
|
||||
}
|
||||
|
||||
private static string DetermineDirection(SummaryRow row)
|
||||
{
|
||||
if (row.total_runs < 2) return "stable";
|
||||
var failRate = row.total_runs > 0 ? (double)row.fail_count / row.total_runs : 0;
|
||||
return failRate > 0.3 ? "degrading" : failRate < 0.05 ? "improving" : "stable";
|
||||
}
|
||||
|
||||
private static DoctorTrendDataPoint MapToDataPoint(TrendRow row)
|
||||
{
|
||||
var evidence = string.IsNullOrWhiteSpace(row.evidence_values)
|
||||
? new Dictionary<string, string>()
|
||||
: JsonSerializer.Deserialize<Dictionary<string, string>>(row.evidence_values) ?? new Dictionary<string, string>();
|
||||
|
||||
return new DoctorTrendDataPoint
|
||||
{
|
||||
Timestamp = row.timestamp,
|
||||
TenantId = row.tenant_id,
|
||||
CheckId = row.check_id,
|
||||
PluginId = row.plugin_id,
|
||||
Category = row.category,
|
||||
RunId = row.run_id,
|
||||
Status = row.status,
|
||||
HealthScore = row.health_score,
|
||||
DurationMs = row.duration_ms,
|
||||
EvidenceValues = evidence,
|
||||
};
|
||||
}
|
||||
|
||||
// Dapper row mapping types
|
||||
private sealed record TrendRow
|
||||
{
|
||||
public DateTimeOffset timestamp { get; init; }
|
||||
public string tenant_id { get; init; } = "";
|
||||
public string check_id { get; init; } = "";
|
||||
public string plugin_id { get; init; } = "";
|
||||
public string category { get; init; } = "";
|
||||
public string run_id { get; init; } = "";
|
||||
public string status { get; init; } = "";
|
||||
public int health_score { get; init; }
|
||||
public int duration_ms { get; init; }
|
||||
public string? evidence_values { get; init; }
|
||||
}
|
||||
|
||||
private sealed record SummaryRow
|
||||
{
|
||||
public string check_id { get; init; } = "";
|
||||
public string check_name { get; init; } = "";
|
||||
public int total_runs { get; init; }
|
||||
public int pass_count { get; init; }
|
||||
public int warn_count { get; init; }
|
||||
public int fail_count { get; init; }
|
||||
public double avg_health_score { get; init; }
|
||||
public int avg_duration_ms { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Scheduler.Plugin.Doctor.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Plugin.Doctor.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Console/logging-based alert service for Doctor scheduled runs.
|
||||
/// Logs alerts to the application logger. In production, this would be replaced
|
||||
/// with a notification channel (email, webhook, etc.) implementation.
|
||||
/// </summary>
|
||||
public sealed class ConsoleAlertService : IDoctorAlertService
|
||||
{
|
||||
private readonly ILogger<ConsoleAlertService> _logger;
|
||||
|
||||
public ConsoleAlertService(ILogger<ConsoleAlertService> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public Task EvaluateAndSendAsync(
|
||||
DoctorScheduleConfig config,
|
||||
DoctorExecutionResult result,
|
||||
CancellationToken ct)
|
||||
{
|
||||
if (config.Alerts is null || !config.Alerts.Enabled)
|
||||
{
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
var shouldAlert = false;
|
||||
|
||||
if (config.Alerts.AlertOnFail && result.FailedChecks > 0)
|
||||
{
|
||||
shouldAlert = true;
|
||||
}
|
||||
|
||||
if (config.Alerts.AlertOnWarn && result.WarnedChecks > 0)
|
||||
{
|
||||
shouldAlert = true;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(result.ErrorMessage))
|
||||
{
|
||||
shouldAlert = true;
|
||||
}
|
||||
|
||||
if (shouldAlert)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Doctor alert triggered for run {RunId}: Status={Status}, " +
|
||||
"Failed={Failed}, Warned={Warned}, HealthScore={Score}, " +
|
||||
"Categories with issues: [{Categories}]",
|
||||
result.RunId,
|
||||
result.Status,
|
||||
result.FailedChecks,
|
||||
result.WarnedChecks,
|
||||
result.HealthScore,
|
||||
string.Join(", ", result.CategoriesWithIssues));
|
||||
}
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
using StellaOps.Scheduler.Plugin.Doctor.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Plugin.Doctor.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Service for sending alerts based on Doctor schedule execution results.
|
||||
/// </summary>
|
||||
public interface IDoctorAlertService
|
||||
{
|
||||
/// <summary>
|
||||
/// Evaluates alert rules and sends notifications if triggered.
|
||||
/// </summary>
|
||||
Task EvaluateAndSendAsync(
|
||||
DoctorScheduleConfig config,
|
||||
DoctorExecutionResult result,
|
||||
CancellationToken ct = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of a Doctor execution for alert evaluation.
|
||||
/// </summary>
|
||||
public sealed record DoctorExecutionResult
|
||||
{
|
||||
public required string RunId { get; init; }
|
||||
public required string Status { get; init; }
|
||||
public int TotalChecks { get; init; }
|
||||
public int PassedChecks { get; init; }
|
||||
public int WarnedChecks { get; init; }
|
||||
public int FailedChecks { get; init; }
|
||||
public int SkippedChecks { get; init; }
|
||||
public int HealthScore { get; init; }
|
||||
public IReadOnlyList<string> CategoriesWithIssues { get; init; } = [];
|
||||
public string? ErrorMessage { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
using StellaOps.Scheduler.Plugin.Doctor.Models;
|
||||
|
||||
namespace StellaOps.Scheduler.Plugin.Doctor.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Repository for persisting and querying Doctor health trend data.
|
||||
/// Backed by the scheduler.doctor_trends table.
|
||||
/// </summary>
|
||||
public interface IDoctorTrendRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Stores trend data points from a Doctor run.
|
||||
/// </summary>
|
||||
Task StoreTrendDataAsync(IEnumerable<DoctorTrendDataPoint> dataPoints, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets trend data points for a specific check over a time range.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<DoctorTrendDataPoint>> GetTrendDataAsync(
|
||||
string tenantId,
|
||||
string checkId,
|
||||
DateTimeOffset from,
|
||||
DateTimeOffset to,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets trend data points for a category over a time range.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<DoctorTrendDataPoint>> GetCategoryTrendDataAsync(
|
||||
string tenantId,
|
||||
string category,
|
||||
DateTimeOffset from,
|
||||
DateTimeOffset to,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets aggregated trend summaries for all checks over a time range.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<DoctorTrendSummary>> GetTrendSummariesAsync(
|
||||
string tenantId,
|
||||
DateTimeOffset from,
|
||||
DateTimeOffset to,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets trend summary for a specific check.
|
||||
/// </summary>
|
||||
Task<DoctorTrendSummary?> GetCheckTrendSummaryAsync(
|
||||
string tenantId,
|
||||
string checkId,
|
||||
DateTimeOffset from,
|
||||
DateTimeOffset to,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets checks with degrading trends.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<DoctorTrendSummary>> GetDegradingChecksAsync(
|
||||
string tenantId,
|
||||
DateTimeOffset from,
|
||||
DateTimeOffset to,
|
||||
double degradationThreshold = 0.1,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Prunes old trend data beyond retention period.
|
||||
/// </summary>
|
||||
Task PruneOldDataAsync(DateTimeOffset olderThan, CancellationToken ct = default);
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<RootNamespace>StellaOps.Scheduler.Plugin.Doctor</RootNamespace>
|
||||
<AssemblyName>StellaOps.Scheduler.Plugin.Doctor</AssemblyName>
|
||||
<Description>Doctor health check plugin for the StellaOps Scheduler</Description>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<FrameworkReference Include="Microsoft.AspNetCore.App" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="../../StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/StellaOps.Scheduler.Plugin.Abstractions.csproj" />
|
||||
<ProjectReference Include="../../StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Models/StellaOps.Scheduler.Models.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Npgsql" />
|
||||
<PackageReference Include="Dapper" />
|
||||
</ItemGroup>
|
||||
|
||||
<!-- Embed SQL migrations as resources -->
|
||||
<ItemGroup>
|
||||
<EmbeddedResource Include="Migrations\**\*.sql" />
|
||||
</ItemGroup>
|
||||
|
||||
<PropertyGroup Label="StellaOpsReleaseVersion">
|
||||
<Version>1.0.0-alpha1</Version>
|
||||
<InformationalVersion>1.0.0-alpha1</InformationalVersion>
|
||||
</PropertyGroup>
|
||||
</Project>
|
||||
Reference in New Issue
Block a user