diff --git a/devops/compose/docker-compose.stella-ops.yml b/devops/compose/docker-compose.stella-ops.yml index f49618d26..16cdc9e82 100644 --- a/devops/compose/docker-compose.stella-ops.yml +++ b/devops/compose/docker-compose.stella-ops.yml @@ -1606,29 +1606,32 @@ services: <<: *healthcheck-tcp labels: *release-labels - doctor-scheduler: - <<: *resources-light - image: stellaops/doctor-scheduler:dev - container_name: stellaops-doctor-scheduler - restart: unless-stopped - depends_on: *depends-infra - environment: - ASPNETCORE_URLS: "http://+:80" - <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] - ConnectionStrings__Default: *postgres-connection - ConnectionStrings__Redis: "cache.stella-ops.local:6379" - Router__Enabled: "${DOCTOR_SCHEDULER_ROUTER_ENABLED:-true}" - Router__Messaging__ConsumerGroup: "doctor-scheduler" - volumes: - - *cert-volume - healthcheck: - test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/$(hostname)/80'"] - <<: *healthcheck-tcp - networks: - stellaops: - aliases: - - doctor-scheduler.stella-ops.local - labels: *release-labels + # DEPRECATED: doctor-scheduler is replaced by the DoctorJobPlugin in the Scheduler service. + # Scheduling and trend storage are now handled by the Scheduler with jobKind="doctor". + # See: SPRINT_20260408_003_JobEngine_scheduler_plugin_architecture.md + # doctor-scheduler: + # <<: *resources-light + # image: stellaops/doctor-scheduler:dev + # container_name: stellaops-doctor-scheduler + # restart: unless-stopped + # depends_on: *depends-infra + # environment: + # ASPNETCORE_URLS: "http://+:80" + # <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] + # ConnectionStrings__Default: *postgres-connection + # ConnectionStrings__Redis: "cache.stella-ops.local:6379" + # Router__Enabled: "${DOCTOR_SCHEDULER_ROUTER_ENABLED:-true}" + # Router__Messaging__ConsumerGroup: "doctor-scheduler" + # volumes: + # - *cert-volume + # healthcheck: + # test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/$(hostname)/80'"] + # <<: *healthcheck-tcp + # networks: + # stellaops: + # aliases: + # - doctor-scheduler.stella-ops.local + # labels: *release-labels # --- Slot 27: OpsMemory (src/AdvisoryAI/StellaOps.OpsMemory.WebService) --- opsmemory-web: diff --git a/devops/docker/services-matrix.env b/devops/docker/services-matrix.env index 8f8fdcdc8..b261512fd 100644 --- a/devops/docker/services-matrix.env +++ b/devops/docker/services-matrix.env @@ -61,7 +61,8 @@ timeline-web|devops/docker/Dockerfile.hardened.template|src/Timeline/StellaOps.T findings-ledger-web|devops/docker/Dockerfile.hardened.template|src/Findings/StellaOps.Findings.Ledger.WebService/StellaOps.Findings.Ledger.WebService.csproj|StellaOps.Findings.Ledger.WebService|8080 # ── Slot 26: Doctor ───────────────────────────────────────────────────────────── doctor-web|devops/docker/Dockerfile.hardened.template|src/Doctor/StellaOps.Doctor.WebService/StellaOps.Doctor.WebService.csproj|StellaOps.Doctor.WebService|8080 -doctor-scheduler|devops/docker/Dockerfile.hardened.template|src/Doctor/StellaOps.Doctor.Scheduler/StellaOps.Doctor.Scheduler.csproj|StellaOps.Doctor.Scheduler|8080 +# DEPRECATED: doctor-scheduler replaced by DoctorJobPlugin in Scheduler service +# doctor-scheduler|devops/docker/Dockerfile.hardened.template|src/Doctor/StellaOps.Doctor.Scheduler/StellaOps.Doctor.Scheduler.csproj|StellaOps.Doctor.Scheduler|8080 # ── Slot 27: OpsMemory ────────────────────────────────────────────────────────── opsmemory-web|devops/docker/Dockerfile.hardened.template|src/AdvisoryAI/StellaOps.OpsMemory.WebService/StellaOps.OpsMemory.WebService.csproj|StellaOps.OpsMemory.WebService|8080 # ── Slot 28: Notifier ─────────────────────────────────────────────────────────── diff --git a/docs/modules/doctor/architecture.md b/docs/modules/doctor/architecture.md index d1456477b..2b158507f 100644 --- a/docs/modules/doctor/architecture.md +++ b/docs/modules/doctor/architecture.md @@ -14,19 +14,30 @@ Doctor provides a plugin-based diagnostic system that enables: - **Capability probing** for feature compatibility - **Evidence collection** for troubleshooting and compliance -### Scheduler Runtime Surface (run-002 remediation) +### Scheduler Integration (Sprint 20260408-003) -Doctor Scheduler now exposes an HTTP management and trend surface at: -- `GET/POST /api/v1/doctor/scheduler/schedules` -- `GET/PUT/DELETE /api/v1/doctor/scheduler/schedules/{scheduleId}` -- `GET /api/v1/doctor/scheduler/schedules/{scheduleId}/executions` -- `POST /api/v1/doctor/scheduler/schedules/{scheduleId}/execute` -- `GET /api/v1/doctor/scheduler/trends` -- `GET /api/v1/doctor/scheduler/trends/checks/{checkId}` -- `GET /api/v1/doctor/scheduler/trends/categories/{category}` -- `GET /api/v1/doctor/scheduler/trends/degrading` +> **The standalone Doctor Scheduler service is deprecated.** +> Doctor health check scheduling is now handled by the Scheduler service's `DoctorJobPlugin`. -The default local runtime uses deterministic in-memory repositories with stable ordering for schedule lists, execution history, and trend summaries. +Doctor schedules are managed via the Scheduler API with `jobKind="doctor"` and plugin-specific +configuration in `pluginConfig`. Trend data is stored in `scheduler.doctor_trends` (PostgreSQL). + +**Scheduler-hosted Doctor endpoints:** +- `GET /api/v1/scheduler/doctor/trends` -- aggregated trend summaries +- `GET /api/v1/scheduler/doctor/trends/checks/{checkId}` -- per-check trend data +- `GET /api/v1/scheduler/doctor/trends/categories/{category}` -- per-category trend data +- `GET /api/v1/scheduler/doctor/trends/degrading` -- checks with degrading health + +**Schedule management** uses the standard Scheduler API at `/api/v1/scheduler/schedules` +with `jobKind="doctor"` and `pluginConfig` containing Doctor-specific options (mode, categories, alerts). + +Three default Doctor schedules are seeded by `SystemScheduleBootstrap`: +- `doctor-full-daily` (0 4 * * *) -- Full health check +- `doctor-quick-hourly` (0 * * * *) -- Quick health check +- `doctor-compliance-weekly` (0 5 * * 0) -- Compliance category audit + +The Doctor WebService (`src/Doctor/StellaOps.Doctor.WebService/`) remains the execution engine. +The plugin communicates with it via HTTP POST to `/api/v1/doctor/run`. ### AdvisoryAI Diagnosis Surface (run-003 remediation) diff --git a/src/Doctor/AGENTS.md b/src/Doctor/AGENTS.md index aabea241f..bc0e9e5ef 100644 --- a/src/Doctor/AGENTS.md +++ b/src/Doctor/AGENTS.md @@ -67,3 +67,14 @@ Every `IDoctorCheck` implementation MUST have a corresponding documentation arti - Development: https://localhost:10260, http://localhost:10261 - Local alias: https://doctor.stella-ops.local, http://doctor.stella-ops.local - Env var: STELLAOPS_DOCTOR_URL + +## Doctor Scheduling (DEPRECATED standalone service) +The standalone `StellaOps.Doctor.Scheduler` service is deprecated as of Sprint 20260408-003. +Doctor health check scheduling is now handled by the Scheduler service's `DoctorJobPlugin` +(jobKind="doctor") in `src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/`. + +- Schedule CRUD: use the Scheduler API with `jobKind="doctor"` and `pluginConfig` for Doctor-specific options. +- Trend storage: moved from in-memory to Scheduler's PostgreSQL schema (`scheduler.doctor_trends`). +- Trend endpoints: served by the Scheduler at `/api/v1/scheduler/doctor/trends/*`. +- The Doctor WebService remains unchanged as the execution engine for health checks. +- Source code for the old scheduler is kept for one release cycle before removal. diff --git a/src/Doctor/StellaOps.Doctor.Scheduler/README.md b/src/Doctor/StellaOps.Doctor.Scheduler/README.md new file mode 100644 index 000000000..d02478a75 --- /dev/null +++ b/src/Doctor/StellaOps.Doctor.Scheduler/README.md @@ -0,0 +1,24 @@ +# StellaOps.Doctor.Scheduler (DEPRECATED) + +> **DEPRECATED** as of Sprint 20260408-003. This standalone service is replaced by the +> `DoctorJobPlugin` in the Scheduler service (`src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/`). + +## Migration Summary + +| Capability | Before (this service) | After (Scheduler plugin) | +|---|---|---| +| Schedule CRUD | In-memory, `/api/v1/doctor/scheduler/schedules` | Scheduler API with `jobKind="doctor"` | +| Cron evaluation | `DoctorScheduleWorker` background service | Scheduler's existing cron infrastructure | +| Run execution | `ScheduleExecutor` HTTP calls to Doctor WebService | `DoctorJobPlugin.ExecuteAsync()` | +| Trend storage | `InMemoryTrendRepository` (volatile) | `scheduler.doctor_trends` PostgreSQL table | +| Trend endpoints | `/api/v1/doctor/scheduler/trends/*` | `/api/v1/scheduler/doctor/trends/*` | + +## What Stays + +- **Doctor WebService** (`src/Doctor/StellaOps.Doctor.WebService/`): unchanged, remains the health check execution engine. +- **Doctor Plugins** (`src/Doctor/__Plugins/`): unchanged, loaded by Doctor WebService. + +## Removal Timeline + +Source code is kept for one release cycle. The `doctor-scheduler` container is commented out +in `docker-compose.stella-ops.yml` and disabled in `services-matrix.env`. diff --git a/src/JobEngine/StellaOps.Scheduler.WebService/Bootstrap/SystemScheduleBootstrap.cs b/src/JobEngine/StellaOps.Scheduler.WebService/Bootstrap/SystemScheduleBootstrap.cs index 89ea2cc82..b164215fd 100644 --- a/src/JobEngine/StellaOps.Scheduler.WebService/Bootstrap/SystemScheduleBootstrap.cs +++ b/src/JobEngine/StellaOps.Scheduler.WebService/Bootstrap/SystemScheduleBootstrap.cs @@ -13,14 +13,34 @@ namespace StellaOps.Scheduler.WebService.Bootstrap; /// internal sealed class SystemScheduleBootstrap : BackgroundService { - private static readonly (string Slug, string Name, string Cron, ScheduleMode Mode, SelectorScope Scope)[] SystemSchedules = + private static readonly (string Slug, string Name, string Cron, ScheduleMode Mode, SelectorScope Scope, string JobKind, ImmutableDictionary? PluginConfig)[] SystemSchedules = [ - ("nightly-vuln-scan", "Nightly Vulnerability Scan", "0 2 * * *", ScheduleMode.AnalysisOnly, SelectorScope.AllImages), - ("advisory-refresh", "Continuous Advisory Refresh", "0 */4 * * *", ScheduleMode.ContentRefresh, SelectorScope.AllImages), - ("weekly-compliance-sweep", "Weekly Compliance Sweep", "0 3 * * 0", ScheduleMode.AnalysisOnly, SelectorScope.AllImages), - ("epss-score-update", "EPSS Score Update", "0 6 * * *", ScheduleMode.ContentRefresh, SelectorScope.AllImages), - ("reachability-reeval", "Reachability Re-evaluation", "0 5 * * 1-5", ScheduleMode.AnalysisOnly, SelectorScope.AllImages), - ("registry-sync", "Registry Sync", "0 */2 * * *", ScheduleMode.ContentRefresh, SelectorScope.AllImages), + ("nightly-vuln-scan", "Nightly Vulnerability Scan", "0 2 * * *", ScheduleMode.AnalysisOnly, SelectorScope.AllImages, "scan", null), + ("advisory-refresh", "Continuous Advisory Refresh", "0 */4 * * *", ScheduleMode.ContentRefresh, SelectorScope.AllImages, "scan", null), + ("weekly-compliance-sweep", "Weekly Compliance Sweep", "0 3 * * 0", ScheduleMode.AnalysisOnly, SelectorScope.AllImages, "scan", null), + ("epss-score-update", "EPSS Score Update", "0 6 * * *", ScheduleMode.ContentRefresh, SelectorScope.AllImages, "scan", null), + ("reachability-reeval", "Reachability Re-evaluation", "0 5 * * 1-5", ScheduleMode.AnalysisOnly, SelectorScope.AllImages, "scan", null), + ("registry-sync", "Registry Sync", "0 */2 * * *", ScheduleMode.ContentRefresh, SelectorScope.AllImages, "scan", null), + // Doctor health check schedules (replaces standalone doctor-scheduler seeds) + ("doctor-full-daily", "Daily Health Check", "0 4 * * *", ScheduleMode.AnalysisOnly, SelectorScope.AllImages, "doctor", + ImmutableDictionary.CreateRange(new KeyValuePair[] + { + new("doctorMode", "full"), + new("timeoutSeconds", 300), + })), + ("doctor-quick-hourly", "Hourly Quick Check", "0 * * * *", ScheduleMode.AnalysisOnly, SelectorScope.AllImages, "doctor", + ImmutableDictionary.CreateRange(new KeyValuePair[] + { + new("doctorMode", "quick"), + new("timeoutSeconds", 120), + })), + ("doctor-compliance-weekly","Weekly Compliance Audit", "0 5 * * 0", ScheduleMode.AnalysisOnly, SelectorScope.AllImages, "doctor", + ImmutableDictionary.CreateRange(new KeyValuePair[] + { + new("doctorMode", "categories"), + new("categories", new[] { "compliance" }), + new("timeoutSeconds", 600), + })), ]; // TODO: Replace with real multi-tenant resolution when available. @@ -65,7 +85,7 @@ internal sealed class SystemScheduleBootstrap : BackgroundService { var now = DateTimeOffset.UtcNow; - foreach (var (slug, name, cron, mode, selectorScope) in SystemSchedules) + foreach (var (slug, name, cron, mode, selectorScope, jobKind, pluginConfig) in SystemSchedules) { var scheduleId = $"sys-{tenantId}-{slug}"; @@ -96,10 +116,12 @@ internal sealed class SystemScheduleBootstrap : BackgroundService updatedBy: "system-bootstrap", subscribers: null, schemaVersion: SchedulerSchemaVersions.Schedule, - source: "system"); + source: "system", + jobKind: jobKind, + pluginConfig: pluginConfig); await repository.UpsertAsync(schedule, cancellationToken).ConfigureAwait(false); - _logger.LogInformation("Created system schedule {ScheduleId} ({Name}) for tenant {TenantId}.", scheduleId, name, tenantId); + _logger.LogInformation("Created system schedule {ScheduleId} ({Name}, kind={JobKind}) for tenant {TenantId}.", scheduleId, name, jobKind, tenantId); } } } diff --git a/src/JobEngine/StellaOps.Scheduler.WebService/Program.cs b/src/JobEngine/StellaOps.Scheduler.WebService/Program.cs index 3c5050ba4..36731af4e 100644 --- a/src/JobEngine/StellaOps.Scheduler.WebService/Program.cs +++ b/src/JobEngine/StellaOps.Scheduler.WebService/Program.cs @@ -38,6 +38,9 @@ using StellaOps.ReleaseOrchestrator.Scripts.Search; using StellaOps.Scheduler.Worker.Exceptions; using StellaOps.Scheduler.Worker.Observability; using StellaOps.Scheduler.Worker.Options; +using StellaOps.Scheduler.Plugin; +using StellaOps.Scheduler.Plugin.Scan; +using StellaOps.Scheduler.Plugin.Doctor; using System.Linq; var builder = WebApplication.CreateBuilder(args); @@ -188,6 +191,44 @@ var pluginHostOptions = SchedulerPluginHostFactory.Build(schedulerOptions.Plugin builder.Services.AddSingleton(pluginHostOptions); builder.Services.RegisterPluginRoutines(builder.Configuration, pluginHostOptions); +// Scheduler Plugin Registry: register built-in and assembly-loaded job plugins +var pluginRegistry = new SchedulerPluginRegistry(); + +// Built-in: ScanJobPlugin (handles jobKind="scan") +var scanPlugin = new ScanJobPlugin(); +pluginRegistry.Register(scanPlugin); + +// Built-in: DoctorJobPlugin (handles jobKind="doctor") +var doctorPlugin = new DoctorJobPlugin(); +pluginRegistry.Register(doctorPlugin); +doctorPlugin.ConfigureServices(builder.Services, builder.Configuration); + +// Discover assembly-loaded ISchedulerJobPlugin implementations from plugin DLLs +var pluginLoadResult = StellaOps.Plugin.Hosting.PluginHost.LoadPlugins(pluginHostOptions); +foreach (var loadedPlugin in pluginLoadResult.Plugins) +{ + foreach (var type in loadedPlugin.Assembly.GetTypes()) + { + if (type.IsAbstract || type.IsInterface || !typeof(ISchedulerJobPlugin).IsAssignableFrom(type)) + continue; + + if (Activator.CreateInstance(type) is ISchedulerJobPlugin jobPlugin) + { + try + { + pluginRegistry.Register(jobPlugin); + jobPlugin.ConfigureServices(builder.Services, builder.Configuration); + } + catch (InvalidOperationException) + { + // Duplicate JobKind; skip silently (built-in takes precedence) + } + } + } +} + +builder.Services.AddSingleton(pluginRegistry); + if (authorityOptions.Enabled) { builder.Services.AddHttpContextAccessor(); @@ -321,6 +362,14 @@ app.MapPolicySimulationEndpoints(); app.MapSchedulerEventWebhookEndpoints(); app.MapScriptsEndpoints(); +// Map plugin-registered endpoints (e.g. Doctor trend endpoints) +var registry = app.Services.GetRequiredService(); +foreach (var (jobKind, _) in registry.ListRegistered()) +{ + var plugin = registry.Resolve(jobKind); + plugin?.MapEndpoints(app); +} + // Refresh Router endpoint cache app.TryRefreshStellaRouterEndpoints(routerEnabled); diff --git a/src/JobEngine/StellaOps.Scheduler.WebService/Schedules/ScheduleContracts.cs b/src/JobEngine/StellaOps.Scheduler.WebService/Schedules/ScheduleContracts.cs index fc89d043f..ab2e815dd 100644 --- a/src/JobEngine/StellaOps.Scheduler.WebService/Schedules/ScheduleContracts.cs +++ b/src/JobEngine/StellaOps.Scheduler.WebService/Schedules/ScheduleContracts.cs @@ -18,7 +18,9 @@ internal sealed record ScheduleCreateRequest( [property: JsonPropertyName("limits")] ScheduleLimits? Limits = null, [property: JsonPropertyName("subscribers")] ImmutableArray? Subscribers = null, [property: JsonPropertyName("enabled")] bool Enabled = true, - [property: JsonPropertyName("source")] string? Source = null); + [property: JsonPropertyName("source")] string? Source = null, + [property: JsonPropertyName("jobKind")] string? JobKind = null, + [property: JsonPropertyName("pluginConfig")] ImmutableDictionary? PluginConfig = null); internal sealed record ScheduleUpdateRequest( [property: JsonPropertyName("name")] string? Name, @@ -29,7 +31,9 @@ internal sealed record ScheduleUpdateRequest( [property: JsonPropertyName("onlyIf")] ScheduleOnlyIf? OnlyIf, [property: JsonPropertyName("notify")] ScheduleNotify? Notify, [property: JsonPropertyName("limits")] ScheduleLimits? Limits, - [property: JsonPropertyName("subscribers")] ImmutableArray? Subscribers); + [property: JsonPropertyName("subscribers")] ImmutableArray? Subscribers, + [property: JsonPropertyName("jobKind")] string? JobKind = null, + [property: JsonPropertyName("pluginConfig")] ImmutableDictionary? PluginConfig = null); internal sealed record ScheduleCollectionResponse(IReadOnlyList Schedules); diff --git a/src/JobEngine/StellaOps.Scheduler.WebService/Schedules/ScheduleEndpoints.cs b/src/JobEngine/StellaOps.Scheduler.WebService/Schedules/ScheduleEndpoints.cs index 7515ef01c..b4f0467c4 100644 --- a/src/JobEngine/StellaOps.Scheduler.WebService/Schedules/ScheduleEndpoints.cs +++ b/src/JobEngine/StellaOps.Scheduler.WebService/Schedules/ScheduleEndpoints.cs @@ -181,7 +181,10 @@ internal static class ScheduleEndpoints SchedulerEndpointHelpers.ResolveActorId(httpContext), now, SchedulerEndpointHelpers.ResolveActorId(httpContext), - SchedulerSchemaVersions.Schedule); + SchedulerSchemaVersions.Schedule, + source: request.Source ?? "user", + jobKind: request.JobKind, + pluginConfig: request.PluginConfig); await repository.UpsertAsync(schedule, cancellationToken: cancellationToken).ConfigureAwait(false); await auditService.WriteAsync( @@ -366,7 +369,7 @@ internal static class ScheduleEndpoints enabled: false, existing.CronExpression, existing.Timezone, - existing.Mode, + existing.Mode, existing.Selection, existing.OnlyIf, existing.Notify, @@ -377,7 +380,9 @@ internal static class ScheduleEndpoints now, SchedulerEndpointHelpers.ResolveActorId(httpContext), existing.SchemaVersion, - existing.Source); + existing.Source, + jobKind: existing.JobKind, + pluginConfig: existing.PluginConfig); await repository.UpsertAsync(updated, cancellationToken: cancellationToken).ConfigureAwait(false); await auditService.WriteAsync( @@ -454,7 +459,9 @@ internal static class ScheduleEndpoints now, SchedulerEndpointHelpers.ResolveActorId(httpContext), existing.SchemaVersion, - existing.Source); + existing.Source, + jobKind: existing.JobKind, + pluginConfig: existing.PluginConfig); await repository.UpsertAsync(updated, cancellationToken: cancellationToken).ConfigureAwait(false); await auditService.WriteAsync( @@ -512,6 +519,8 @@ internal static class ScheduleEndpoints var notify = request.Notify ?? existing.Notify; var limits = request.Limits ?? existing.Limits; var subscribers = request.Subscribers ?? existing.Subscribers; + var jobKind = request.JobKind ?? existing.JobKind; + var pluginConfig = request.PluginConfig ?? existing.PluginConfig; return new Schedule( existing.Id, @@ -531,7 +540,9 @@ internal static class ScheduleEndpoints updatedAt, actor, existing.SchemaVersion, - existing.Source); + existing.Source, + jobKind: jobKind, + pluginConfig: pluginConfig); } } diff --git a/src/JobEngine/StellaOps.Scheduler.WebService/StellaOps.Scheduler.WebService.csproj b/src/JobEngine/StellaOps.Scheduler.WebService/StellaOps.Scheduler.WebService.csproj index d79a4fa41..4d6560e86 100644 --- a/src/JobEngine/StellaOps.Scheduler.WebService/StellaOps.Scheduler.WebService.csproj +++ b/src/JobEngine/StellaOps.Scheduler.WebService/StellaOps.Scheduler.WebService.csproj @@ -11,6 +11,9 @@ + + + diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Models/Schedule.cs b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Models/Schedule.cs index f851dd83d..95e5b5868 100644 --- a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Models/Schedule.cs +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Models/Schedule.cs @@ -26,7 +26,9 @@ public sealed record Schedule string updatedBy, ImmutableArray? subscribers = null, string? schemaVersion = null, - string source = "user") + string source = "user", + string? jobKind = null, + ImmutableDictionary? pluginConfig = null) : this( id, tenantId, @@ -45,7 +47,9 @@ public sealed record Schedule updatedAt, updatedBy, schemaVersion, - source) + source, + jobKind, + pluginConfig) { } @@ -68,7 +72,9 @@ public sealed record Schedule DateTimeOffset updatedAt, string updatedBy, string? schemaVersion = null, - string source = "user") + string source = "user", + string? jobKind = null, + ImmutableDictionary? pluginConfig = null) { Id = Validation.EnsureId(id, nameof(id)); TenantId = Validation.EnsureTenantId(tenantId, nameof(tenantId)); @@ -92,6 +98,8 @@ public sealed record Schedule UpdatedBy = Validation.EnsureSimpleIdentifier(updatedBy, nameof(updatedBy)); SchemaVersion = SchedulerSchemaVersions.EnsureSchedule(schemaVersion); Source = string.IsNullOrWhiteSpace(source) ? "user" : source.Trim(); + JobKind = string.IsNullOrWhiteSpace(jobKind) ? "scan" : jobKind.Trim(); + PluginConfig = pluginConfig; if (Selection.TenantId is not null && !string.Equals(Selection.TenantId, TenantId, StringComparison.Ordinal)) { @@ -135,6 +143,20 @@ public sealed record Schedule public string UpdatedBy { get; } public string Source { get; } = "user"; + + /// + /// Identifies which plugin handles this schedule's execution (e.g. "scan", "doctor"). + /// Defaults to "scan" for backward compatibility. + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public string JobKind { get; } = "scan"; + + /// + /// Plugin-specific configuration stored as JSON. For scan jobs this is null. + /// For other plugins (e.g., doctor) this contains plugin-specific settings. + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public ImmutableDictionary? PluginConfig { get; } } /// diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Persistence/Migrations/007_add_schedule_job_kind.sql b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Persistence/Migrations/007_add_schedule_job_kind.sql new file mode 100644 index 000000000..4e022d1cf --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Persistence/Migrations/007_add_schedule_job_kind.sql @@ -0,0 +1,16 @@ +-- Migration: 007_add_schedule_job_kind +-- Adds job_kind and plugin_config columns to support the scheduler plugin architecture. +-- job_kind routes schedule execution to the correct ISchedulerJobPlugin. +-- plugin_config stores plugin-specific JSON configuration. + +ALTER TABLE scheduler.schedules + ADD COLUMN IF NOT EXISTS job_kind TEXT NOT NULL DEFAULT 'scan'; + +ALTER TABLE scheduler.schedules + ADD COLUMN IF NOT EXISTS plugin_config JSONB; + +COMMENT ON COLUMN scheduler.schedules.job_kind IS 'Routes to the ISchedulerJobPlugin that handles this schedule (e.g. scan, doctor, policy-sweep).'; +COMMENT ON COLUMN scheduler.schedules.plugin_config IS 'Plugin-specific configuration as JSON. Schema defined by the plugin identified by job_kind.'; + +-- Index for filtering schedules by job kind (common in UI and API queries). +CREATE INDEX IF NOT EXISTS idx_schedules_job_kind ON scheduler.schedules(tenant_id, job_kind) WHERE deleted_at IS NULL; diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Persistence/Migrations/008_add_doctor_trends.sql b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Persistence/Migrations/008_add_doctor_trends.sql new file mode 100644 index 000000000..628a80be4 --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Persistence/Migrations/008_add_doctor_trends.sql @@ -0,0 +1,36 @@ +-- Migration: 008_add_doctor_trends +-- Adds the doctor_trends table for storing health check trend data +-- previously held in the standalone Doctor Scheduler's in-memory repository. + +CREATE TABLE IF NOT EXISTS scheduler.doctor_trends ( + id BIGSERIAL PRIMARY KEY, + tenant_id TEXT NOT NULL, + timestamp TIMESTAMPTZ NOT NULL, + check_id TEXT NOT NULL, + plugin_id TEXT NOT NULL, + category TEXT NOT NULL, + run_id TEXT NOT NULL, + status TEXT NOT NULL, + health_score INT NOT NULL, + duration_ms INT NOT NULL DEFAULT 0, + evidence_values JSONB NOT NULL DEFAULT '{}', + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_doctor_trends_tenant_check ON scheduler.doctor_trends(tenant_id, check_id, timestamp DESC); +CREATE INDEX IF NOT EXISTS idx_doctor_trends_tenant_category ON scheduler.doctor_trends(tenant_id, category, timestamp DESC); +CREATE INDEX IF NOT EXISTS idx_doctor_trends_tenant_run ON scheduler.doctor_trends(tenant_id, run_id); +CREATE INDEX IF NOT EXISTS idx_doctor_trends_timestamp ON scheduler.doctor_trends(timestamp DESC); + +-- BRIN index for time-series queries over large datasets +CREATE INDEX IF NOT EXISTS brin_doctor_trends_timestamp ON scheduler.doctor_trends USING BRIN(timestamp) WITH (pages_per_range = 128); + +COMMENT ON TABLE scheduler.doctor_trends IS 'Health check trend data from Doctor scheduled runs. Migrated from the standalone Doctor Scheduler in-memory repository.'; + +-- RLS for tenant isolation +ALTER TABLE scheduler.doctor_trends ENABLE ROW LEVEL SECURITY; +ALTER TABLE scheduler.doctor_trends FORCE ROW LEVEL SECURITY; +DROP POLICY IF EXISTS doctor_trends_tenant_isolation ON scheduler.doctor_trends; +CREATE POLICY doctor_trends_tenant_isolation ON scheduler.doctor_trends FOR ALL + USING (tenant_id = scheduler_app.require_current_tenant()) + WITH CHECK (tenant_id = scheduler_app.require_current_tenant()); diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/AGENTS.md b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/AGENTS.md new file mode 100644 index 000000000..37da9377b --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/AGENTS.md @@ -0,0 +1,48 @@ +# AGENTS - Scheduler Plugin Architecture + +## Overview + +The Scheduler Plugin system enables non-scanning workloads (health checks, policy sweeps, +graph builds, etc.) to be scheduled and executed as first-class Scheduler jobs. + +## Plugin Contract + +Every plugin implements `ISchedulerJobPlugin` from `StellaOps.Scheduler.Plugin.Abstractions`: + +| Method | Purpose | +|---|---| +| `JobKind` | Unique string identifier stored in `Schedule.JobKind` | +| `DisplayName` | Human-readable name for UI | +| `CreatePlanAsync` | Build execution plan from Schedule + Run | +| `ExecuteAsync` | Execute the plan (called by Worker Host) | +| `ValidateConfigAsync` | Validate plugin-specific config in `Schedule.PluginConfig` | +| `GetConfigJsonSchema` | Return JSON schema for UI-driven config forms | +| `ConfigureServices` | Register plugin DI services at startup | +| `MapEndpoints` | Register plugin HTTP endpoints | + +## Built-in Plugins + +| JobKind | Library | Description | +|---|---|---| +| `scan` | `StellaOps.Scheduler.Plugin.Scan` | Wraps existing scan logic (zero behavioral change) | +| `doctor` | `StellaOps.Scheduler.Plugin.Doctor` | Doctor health checks (replaces standalone doctor-scheduler) | + +## Adding a New Plugin + +1. Create a class library under `StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin./`. +2. Implement `ISchedulerJobPlugin`. +3. Reference `StellaOps.Scheduler.Plugin.Abstractions`. +4. Register in `Program.cs` or drop DLL into `plugins/scheduler/` for assembly-loaded discovery. +5. Create schedules with `jobKind="your-kind"` and `pluginConfig={...}`. + +## Database Schema + +- `scheduler.schedules.job_kind` (TEXT, default 'scan') routes to the plugin +- `scheduler.schedules.plugin_config` (JSONB, nullable) stores plugin-specific config +- Plugin-specific tables (e.g. `scheduler.doctor_trends`) added via embedded SQL migrations + +## Working Directory + +- Abstractions: `src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/` +- Scan plugin: `src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Scan/` +- Doctor plugin: `src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/` diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/IRunProgressReporter.cs b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/IRunProgressReporter.cs new file mode 100644 index 000000000..83ffcd578 --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/IRunProgressReporter.cs @@ -0,0 +1,24 @@ +using StellaOps.Scheduler.Models; + +namespace StellaOps.Scheduler.Plugin; + +/// +/// Callback interface for plugins to report progress and update Run state. +/// +public interface IRunProgressReporter +{ + /// + /// Reports progress as completed/total with an optional message. + /// + Task ReportProgressAsync(int completed, int total, string? message = null, CancellationToken ct = default); + + /// + /// Transitions the Run to a new state, optionally recording an error. + /// + Task TransitionStateAsync(RunState newState, string? error = null, CancellationToken ct = default); + + /// + /// Appends a log entry to the Run's execution log. + /// + Task AppendLogAsync(string message, string level = "info", CancellationToken ct = default); +} diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/ISchedulerJobPlugin.cs b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/ISchedulerJobPlugin.cs new file mode 100644 index 000000000..dc3503240 --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/ISchedulerJobPlugin.cs @@ -0,0 +1,68 @@ +using Microsoft.AspNetCore.Routing; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using StellaOps.Scheduler.Models; + +namespace StellaOps.Scheduler.Plugin; + +/// +/// Identifies the kind of job a plugin handles. Used in Schedule.JobKind +/// to route cron triggers to the correct plugin at execution time. +/// +public interface ISchedulerJobPlugin +{ + /// + /// Unique, stable identifier for this job kind (e.g., "scan", "doctor", "policy-sweep"). + /// Stored in the Schedule record; must be immutable once published. + /// + string JobKind { get; } + + /// + /// Human-readable display name for the UI. + /// + string DisplayName { get; } + + /// + /// Plugin version for compatibility checking. + /// + Version Version { get; } + + /// + /// Creates a typed execution plan from a Schedule + Run. + /// Called when the cron fires or a manual run is created. + /// Returns a plan object that the Scheduler persists as the Run's plan payload. + /// + Task CreatePlanAsync(JobPlanContext context, CancellationToken ct); + + /// + /// Executes the plan. Called by the Worker Host. + /// Must be idempotent and support cancellation. + /// Updates Run state via the provided IRunProgressReporter. + /// + Task ExecuteAsync(JobExecutionContext context, CancellationToken ct); + + /// + /// Optionally validates plugin-specific configuration stored in Schedule.PluginConfig. + /// Called on schedule create/update. + /// + Task ValidateConfigAsync( + IReadOnlyDictionary pluginConfig, + CancellationToken ct); + + /// + /// Returns the JSON schema for plugin-specific configuration, enabling UI-driven forms. + /// + string? GetConfigJsonSchema(); + + /// + /// Registers plugin-specific services into DI. + /// Called once during host startup. + /// + void ConfigureServices(IServiceCollection services, IConfiguration configuration); + + /// + /// Registers plugin-specific HTTP endpoints (optional). + /// Called during app.Map* phase. + /// + void MapEndpoints(IEndpointRouteBuilder routes); +} diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/ISchedulerPluginRegistry.cs b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/ISchedulerPluginRegistry.cs new file mode 100644 index 000000000..c93cab350 --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/ISchedulerPluginRegistry.cs @@ -0,0 +1,22 @@ +namespace StellaOps.Scheduler.Plugin; + +/// +/// Registry for discovering and resolving scheduler job plugins by their JobKind. +/// +public interface ISchedulerPluginRegistry +{ + /// + /// Registers a plugin. Throws if a plugin with the same JobKind is already registered. + /// + void Register(ISchedulerJobPlugin plugin); + + /// + /// Resolves a plugin by its JobKind. Returns null if no plugin is registered for the kind. + /// + ISchedulerJobPlugin? Resolve(string jobKind); + + /// + /// Returns all registered plugin summaries. + /// + IReadOnlyList<(string JobKind, string DisplayName)> ListRegistered(); +} diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/JobConfigValidationResult.cs b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/JobConfigValidationResult.cs new file mode 100644 index 000000000..a3696ce47 --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/JobConfigValidationResult.cs @@ -0,0 +1,20 @@ +namespace StellaOps.Scheduler.Plugin; + +/// +/// Result of plugin config validation. +/// +public sealed record JobConfigValidationResult( + bool IsValid, + IReadOnlyList Errors) +{ + /// + /// A successful validation result with no errors. + /// + public static JobConfigValidationResult Valid { get; } = new(true, Array.Empty()); + + /// + /// Creates a failed validation result with the given errors. + /// + public static JobConfigValidationResult Invalid(params string[] errors) + => new(false, errors); +} diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/JobExecutionContext.cs b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/JobExecutionContext.cs new file mode 100644 index 000000000..28675d4d7 --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/JobExecutionContext.cs @@ -0,0 +1,14 @@ +using StellaOps.Scheduler.Models; + +namespace StellaOps.Scheduler.Plugin; + +/// +/// Context passed to . +/// +public sealed record JobExecutionContext( + Schedule Schedule, + Run Run, + JobPlan Plan, + IRunProgressReporter Reporter, + IServiceProvider Services, + TimeProvider TimeProvider); diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/JobPlan.cs b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/JobPlan.cs new file mode 100644 index 000000000..e9c11b295 --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/JobPlan.cs @@ -0,0 +1,9 @@ +namespace StellaOps.Scheduler.Plugin; + +/// +/// The plan produced by a plugin. Serialized to JSON and stored on the Run. +/// +public sealed record JobPlan( + string JobKind, + IReadOnlyDictionary Payload, + int EstimatedSteps = 1); diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/JobPlanContext.cs b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/JobPlanContext.cs new file mode 100644 index 000000000..b57dac1ff --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/JobPlanContext.cs @@ -0,0 +1,12 @@ +using StellaOps.Scheduler.Models; + +namespace StellaOps.Scheduler.Plugin; + +/// +/// Immutable context passed to . +/// +public sealed record JobPlanContext( + Schedule Schedule, + Run Run, + IServiceProvider Services, + TimeProvider TimeProvider); diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/SchedulerPluginRegistry.cs b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/SchedulerPluginRegistry.cs new file mode 100644 index 000000000..b97854958 --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/SchedulerPluginRegistry.cs @@ -0,0 +1,50 @@ +using System.Collections.Concurrent; + +namespace StellaOps.Scheduler.Plugin; + +/// +/// Thread-safe in-memory registry for scheduler job plugins. +/// +public sealed class SchedulerPluginRegistry : ISchedulerPluginRegistry +{ + private readonly ConcurrentDictionary _plugins = new(StringComparer.OrdinalIgnoreCase); + + /// + public void Register(ISchedulerJobPlugin plugin) + { + ArgumentNullException.ThrowIfNull(plugin); + + if (string.IsNullOrWhiteSpace(plugin.JobKind)) + { + throw new ArgumentException("Plugin JobKind must not be null or whitespace.", nameof(plugin)); + } + + if (!_plugins.TryAdd(plugin.JobKind, plugin)) + { + throw new InvalidOperationException( + $"A plugin with JobKind '{plugin.JobKind}' is already registered. " + + $"Existing: '{_plugins[plugin.JobKind].DisplayName}', " + + $"Attempted: '{plugin.DisplayName}'."); + } + } + + /// + public ISchedulerJobPlugin? Resolve(string jobKind) + { + if (string.IsNullOrWhiteSpace(jobKind)) + { + return null; + } + + return _plugins.TryGetValue(jobKind, out var plugin) ? plugin : null; + } + + /// + public IReadOnlyList<(string JobKind, string DisplayName)> ListRegistered() + { + return _plugins.Values + .OrderBy(p => p.JobKind, StringComparer.OrdinalIgnoreCase) + .Select(p => (p.JobKind, p.DisplayName)) + .ToArray(); + } +} diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/StellaOps.Scheduler.Plugin.Abstractions.csproj b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/StellaOps.Scheduler.Plugin.Abstractions.csproj new file mode 100644 index 000000000..62fbb3cb5 --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Abstractions/StellaOps.Scheduler.Plugin.Abstractions.csproj @@ -0,0 +1,20 @@ + + + net10.0 + preview + enable + enable + true + StellaOps.Scheduler.Plugin + StellaOps.Scheduler.Plugin.Abstractions + Plugin abstraction contracts for the StellaOps Scheduler job plugin system. + + + + + + + + + + diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/DoctorJobOptions.cs b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/DoctorJobOptions.cs new file mode 100644 index 000000000..6757e8206 --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/DoctorJobOptions.cs @@ -0,0 +1,32 @@ +namespace StellaOps.Scheduler.Plugin.Doctor; + +/// +/// Configuration options for the Doctor job plugin. +/// +public sealed class DoctorJobOptions +{ + /// + /// Configuration section name. + /// + public const string SectionName = "Scheduler:Doctor"; + + /// + /// URL of the Doctor WebService API. + /// + public string DoctorApiUrl { get; set; } = "http://doctor.stella-ops.local"; + + /// + /// Default timeout for Doctor runs (in seconds). + /// + public int DefaultTimeoutSeconds { get; set; } = 300; + + /// + /// How long to retain trend data (in days). + /// + public int TrendDataRetentionDays { get; set; } = 365; + + /// + /// Polling interval when waiting for Doctor run completion (in seconds). + /// + public int PollIntervalSeconds { get; set; } = 2; +} diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/DoctorJobPlugin.cs b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/DoctorJobPlugin.cs new file mode 100644 index 000000000..2d6839b5b --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/DoctorJobPlugin.cs @@ -0,0 +1,551 @@ +using System.Diagnostics; +using System.Net.Http.Json; +using System.Text.Json; +using Microsoft.AspNetCore.Builder; +using Microsoft.AspNetCore.Http; +using Microsoft.AspNetCore.Routing; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using StellaOps.Scheduler.Models; + +namespace StellaOps.Scheduler.Plugin.Doctor; + +/// +/// Doctor health check job plugin for the Scheduler. +/// Replaces the standalone doctor-scheduler service by executing Doctor runs +/// via the Doctor WebService HTTP API and storing trend data in the Scheduler's database. +/// +public sealed class DoctorJobPlugin : ISchedulerJobPlugin +{ + /// + public string JobKind => "doctor"; + + /// + public string DisplayName => "Doctor Health Checks"; + + /// + public Version Version { get; } = new(1, 0, 0); + + /// + public Task CreatePlanAsync(JobPlanContext context, CancellationToken ct) + { + var config = DoctorScheduleConfig.FromPluginConfig(context.Schedule.PluginConfig); + + var payload = new Dictionary + { + ["doctorMode"] = config.DoctorMode, + ["categories"] = config.Categories, + ["plugins"] = config.Plugins, + ["timeoutSeconds"] = config.TimeoutSeconds, + ["scheduleId"] = context.Schedule.Id, + }; + + var plan = new JobPlan( + JobKind: "doctor", + Payload: payload, + EstimatedSteps: 3); // trigger, poll, store trends + + return Task.FromResult(plan); + } + + /// + public async Task ExecuteAsync(JobExecutionContext context, CancellationToken ct) + { + var logger = context.Services.GetService()?.CreateLogger(); + var httpClientFactory = context.Services.GetRequiredService(); + var options = context.Services.GetRequiredService>().Value; + var trendRepository = context.Services.GetService(); + + var config = DoctorScheduleConfig.FromPluginConfig(context.Schedule.PluginConfig); + var httpClient = httpClientFactory.CreateClient("DoctorApi"); + + logger?.LogInformation( + "Executing Doctor health check for schedule {ScheduleId} in {Mode} mode", + context.Schedule.Id, config.DoctorMode); + + await context.Reporter.ReportProgressAsync(0, 3, "Triggering Doctor run", ct).ConfigureAwait(false); + + // Step 1: Trigger Doctor run + var runId = await TriggerDoctorRunAsync(httpClient, options, config, logger, ct).ConfigureAwait(false); + + await context.Reporter.ReportProgressAsync(1, 3, $"Doctor run {runId} triggered, waiting for completion", ct).ConfigureAwait(false); + await context.Reporter.AppendLogAsync($"Doctor run triggered: {runId}", "info", ct).ConfigureAwait(false); + + // Step 2: Poll for completion + var (status, summary) = await WaitForRunCompletionAsync(httpClient, options, runId, config, logger, ct).ConfigureAwait(false); + + await context.Reporter.ReportProgressAsync(2, 3, $"Doctor run completed with status: {status}", ct).ConfigureAwait(false); + await context.Reporter.AppendLogAsync( + $"Doctor run {runId} completed: {summary.PassedChecks}/{summary.TotalChecks} passed, health={summary.HealthScore}", + status == "fail" ? "error" : "info", ct).ConfigureAwait(false); + + // Step 3: Store trend data + if (trendRepository is not null) + { + await StoreTrendDataAsync(httpClient, options, trendRepository, runId, + context.Schedule.TenantId, context.TimeProvider, logger, ct).ConfigureAwait(false); + } + + await context.Reporter.ReportProgressAsync(3, 3, "Trend data stored", ct).ConfigureAwait(false); + + // Check for alert conditions + if (config.Alerts?.Enabled == true) + { + var shouldAlert = (config.Alerts.AlertOnFail && status == "fail") || + (config.Alerts.AlertOnWarn && status == "warn"); + if (shouldAlert) + { + await context.Reporter.AppendLogAsync( + $"Alert condition met: status={status}, alertOnFail={config.Alerts.AlertOnFail}, alertOnWarn={config.Alerts.AlertOnWarn}", + "warning", ct).ConfigureAwait(false); + } + } + + // Transition to completed or error based on Doctor results + if (status == "fail" && summary.FailedChecks > 0) + { + await context.Reporter.TransitionStateAsync( + RunState.Completed, + $"Doctor run completed with {summary.FailedChecks} failed checks", + ct).ConfigureAwait(false); + } + else + { + await context.Reporter.TransitionStateAsync(RunState.Completed, ct: ct).ConfigureAwait(false); + } + } + + /// + public Task ValidateConfigAsync( + IReadOnlyDictionary pluginConfig, + CancellationToken ct) + { + var errors = new List(); + + if (pluginConfig.TryGetValue("doctorMode", out var modeObj) && modeObj is string mode) + { + var validModes = new[] { "full", "quick", "categories", "plugins" }; + if (!validModes.Contains(mode, StringComparer.OrdinalIgnoreCase)) + { + errors.Add($"Invalid doctorMode '{mode}'. Valid values: {string.Join(", ", validModes)}"); + } + + if (string.Equals(mode, "categories", StringComparison.OrdinalIgnoreCase)) + { + if (!pluginConfig.TryGetValue("categories", out var catObj) || catObj is not JsonElement cats || cats.GetArrayLength() == 0) + { + errors.Add("At least one category is required when doctorMode is 'categories'."); + } + } + + if (string.Equals(mode, "plugins", StringComparison.OrdinalIgnoreCase)) + { + if (!pluginConfig.TryGetValue("plugins", out var plugObj) || plugObj is not JsonElement plugs || plugs.GetArrayLength() == 0) + { + errors.Add("At least one plugin is required when doctorMode is 'plugins'."); + } + } + } + + return Task.FromResult(errors.Count > 0 + ? JobConfigValidationResult.Invalid([.. errors]) + : JobConfigValidationResult.Valid); + } + + /// + public string? GetConfigJsonSchema() + { + return """ + { + "type": "object", + "properties": { + "doctorMode": { + "type": "string", + "enum": ["full", "quick", "categories", "plugins"], + "default": "full" + }, + "categories": { + "type": "array", + "items": { "type": "string" } + }, + "plugins": { + "type": "array", + "items": { "type": "string" } + }, + "timeoutSeconds": { + "type": "integer", + "minimum": 30, + "maximum": 3600, + "default": 300 + }, + "alerts": { + "type": "object", + "properties": { + "enabled": { "type": "boolean", "default": true }, + "alertOnFail": { "type": "boolean", "default": true }, + "alertOnWarn": { "type": "boolean", "default": false }, + "alertOnStatusChange": { "type": "boolean", "default": true }, + "channels": { "type": "array", "items": { "type": "string" } }, + "minSeverity": { "type": "string", "default": "Fail" } + } + } + } + } + """; + } + + /// + public void ConfigureServices(IServiceCollection services, IConfiguration configuration) + { + services.Configure(configuration.GetSection(DoctorJobOptions.SectionName)); + + services.AddHttpClient("DoctorApi", (sp, client) => + { + var opts = sp.GetRequiredService>().Value; + client.BaseAddress = new Uri(opts.DoctorApiUrl); + client.Timeout = TimeSpan.FromSeconds(opts.DefaultTimeoutSeconds + 30); + }); + } + + /// + public void MapEndpoints(IEndpointRouteBuilder routes) + { + var group = routes.MapGroup("/api/v1/scheduler/doctor") + .WithTags("Doctor", "Scheduler"); + + group.MapGet("/trends", async ( + DateTimeOffset? from, + DateTimeOffset? to, + HttpContext httpContext, + IDoctorTrendRepository? trendRepository, + TimeProvider timeProvider) => + { + if (trendRepository is null) + { + return Results.Json(new { summaries = Array.Empty() }); + } + + var window = ResolveWindow(from, to, timeProvider); + if (window is null) + { + return Results.BadRequest(new { message = "Invalid time window: from must be before to." }); + } + + // Use a default tenant for now; in production this would come from the auth context + var tenantId = "demo-prod"; + var summaries = await trendRepository.GetTrendSummariesAsync(tenantId, window.Value.From, window.Value.To); + return Results.Ok(new + { + window = new { from = window.Value.From, to = window.Value.To }, + summaries + }); + }) + .WithName("GetSchedulerDoctorTrends") + .WithDescription("Returns aggregated health-check trend summaries from the Scheduler's Doctor plugin."); + + group.MapGet("/trends/checks/{checkId}", async ( + string checkId, + DateTimeOffset? from, + DateTimeOffset? to, + IDoctorTrendRepository? trendRepository, + TimeProvider timeProvider) => + { + if (string.IsNullOrWhiteSpace(checkId)) + { + return Results.BadRequest(new { message = "checkId is required." }); + } + + if (trendRepository is null) + { + return Results.Json(new { dataPoints = Array.Empty() }); + } + + var window = ResolveWindow(from, to, timeProvider); + if (window is null) + { + return Results.BadRequest(new { message = "Invalid time window." }); + } + + var tenantId = "demo-prod"; + var data = await trendRepository.GetTrendDataAsync(tenantId, checkId, window.Value.From, window.Value.To); + return Results.Ok(new + { + window = new { from = window.Value.From, to = window.Value.To }, + checkId, + dataPoints = data + }); + }) + .WithName("GetSchedulerDoctorCheckTrend") + .WithDescription("Returns trend data points for a specific Doctor health check."); + + group.MapGet("/trends/categories/{category}", async ( + string category, + DateTimeOffset? from, + DateTimeOffset? to, + IDoctorTrendRepository? trendRepository, + TimeProvider timeProvider) => + { + if (string.IsNullOrWhiteSpace(category)) + { + return Results.BadRequest(new { message = "category is required." }); + } + + if (trendRepository is null) + { + return Results.Json(new { dataPoints = Array.Empty() }); + } + + var window = ResolveWindow(from, to, timeProvider); + if (window is null) + { + return Results.BadRequest(new { message = "Invalid time window." }); + } + + var tenantId = "demo-prod"; + var data = await trendRepository.GetCategoryTrendDataAsync(tenantId, category, window.Value.From, window.Value.To); + return Results.Ok(new + { + window = new { from = window.Value.From, to = window.Value.To }, + category, + dataPoints = data + }); + }) + .WithName("GetSchedulerDoctorCategoryTrend") + .WithDescription("Returns trend data points for all checks within a specific Doctor check category."); + + group.MapGet("/trends/degrading", async ( + DateTimeOffset? from, + DateTimeOffset? to, + double? threshold, + IDoctorTrendRepository? trendRepository, + TimeProvider timeProvider) => + { + if (trendRepository is null) + { + return Results.Json(new { checks = Array.Empty() }); + } + + var window = ResolveWindow(from, to, timeProvider); + if (window is null) + { + return Results.BadRequest(new { message = "Invalid time window." }); + } + + var effectiveThreshold = threshold ?? 0.1d; + if (effectiveThreshold < 0 || double.IsNaN(effectiveThreshold)) + { + return Results.BadRequest(new { message = "Threshold must be >= 0." }); + } + + var tenantId = "demo-prod"; + var degrading = await trendRepository.GetDegradingChecksAsync(tenantId, window.Value.From, window.Value.To, effectiveThreshold); + return Results.Ok(new + { + window = new { from = window.Value.From, to = window.Value.To }, + threshold = effectiveThreshold, + checks = degrading + }); + }) + .WithName("GetSchedulerDoctorDegradingChecks") + .WithDescription("Returns Doctor health checks with degrading trends."); + } + + private static async Task TriggerDoctorRunAsync( + HttpClient httpClient, + DoctorJobOptions options, + DoctorScheduleConfig config, + ILogger? logger, + CancellationToken ct) + { + var request = new + { + mode = config.DoctorMode, + categories = config.Categories, + plugins = config.Plugins, + async_ = true + }; + + var response = await httpClient.PostAsJsonAsync( + "/api/v1/doctor/run", + request, + ct).ConfigureAwait(false); + + response.EnsureSuccessStatusCode(); + + var result = await response.Content.ReadFromJsonAsync(cancellationToken: ct).ConfigureAwait(false); + var runId = result?.RunId ?? throw new InvalidOperationException("No run ID returned from Doctor API."); + + logger?.LogInformation("Triggered Doctor run {RunId}", runId); + return runId; + } + + private static async Task<(string Status, ExecutionSummary Summary)> WaitForRunCompletionAsync( + HttpClient httpClient, + DoctorJobOptions options, + string runId, + DoctorScheduleConfig config, + ILogger? logger, + CancellationToken ct) + { + var timeout = TimeSpan.FromSeconds(config.TimeoutSeconds > 0 ? config.TimeoutSeconds : options.DefaultTimeoutSeconds); + var sw = Stopwatch.StartNew(); + + while (sw.Elapsed < timeout) + { + ct.ThrowIfCancellationRequested(); + + var response = await httpClient.GetAsync($"/api/v1/doctor/run/{runId}", ct).ConfigureAwait(false); + + if (!response.IsSuccessStatusCode) + { + await Task.Delay(TimeSpan.FromSeconds(options.PollIntervalSeconds), ct).ConfigureAwait(false); + continue; + } + + var result = await response.Content.ReadFromJsonAsync(cancellationToken: ct).ConfigureAwait(false); + + if (result?.Status is "completed") + { + var summary = new ExecutionSummary + { + TotalChecks = result.TotalChecks, + PassedChecks = result.PassedChecks, + WarnedChecks = result.WarnedChecks, + FailedChecks = result.FailedChecks, + SkippedChecks = result.SkippedChecks, + HealthScore = result.HealthScore, + }; + + var status = result.FailedChecks > 0 ? "fail" + : result.WarnedChecks > 0 ? "warn" + : "pass"; + + logger?.LogInformation("Doctor run {RunId} completed: {Status}", runId, status); + return (status, summary); + } + + await Task.Delay(TimeSpan.FromSeconds(options.PollIntervalSeconds), ct).ConfigureAwait(false); + } + + throw new TimeoutException($"Doctor run {runId} did not complete within {timeout.TotalSeconds}s"); + } + + private static async Task StoreTrendDataAsync( + HttpClient httpClient, + DoctorJobOptions options, + IDoctorTrendRepository trendRepository, + string runId, + string tenantId, + TimeProvider timeProvider, + ILogger? logger, + CancellationToken ct) + { + try + { + var response = await httpClient.GetAsync($"/api/v1/doctor/run/{runId}/results", ct).ConfigureAwait(false); + + if (!response.IsSuccessStatusCode) + { + logger?.LogWarning("Failed to fetch Doctor run results for {RunId}: {StatusCode}", runId, response.StatusCode); + return; + } + + var results = await response.Content.ReadFromJsonAsync(cancellationToken: ct).ConfigureAwait(false); + if (results?.Results is null || results.Results.Count == 0) + { + return; + } + + var timestamp = timeProvider.GetUtcNow(); + var dataPoints = results.Results.Select(r => new DoctorTrendDataPoint + { + TenantId = tenantId, + Timestamp = timestamp, + CheckId = r.CheckId, + PluginId = r.PluginId, + Category = r.Category, + RunId = runId, + Status = r.Status, + HealthScore = CalculateHealthScore(r.Status), + DurationMs = r.DurationMs, + EvidenceValues = ExtractTrendEvidence(r.Evidence), + }).ToList(); + + await trendRepository.StoreTrendDataAsync(dataPoints, ct).ConfigureAwait(false); + logger?.LogInformation("Stored {Count} trend data points for Doctor run {RunId}", dataPoints.Count, runId); + } + catch (Exception ex) + { + logger?.LogWarning(ex, "Failed to store trend data for Doctor run {RunId}", runId); + } + } + + private static int CalculateHealthScore(string status) => status.ToLowerInvariant() switch + { + "pass" => 100, + "warn" => 50, + "fail" => 0, + "skip" => -1, + _ => 0 + }; + + private static IReadOnlyDictionary ExtractTrendEvidence(Dictionary? evidence) + { + if (evidence is null) + { + return new Dictionary(); + } + + return evidence + .Where(kv => kv.Value is int or long or double or string) + .Where(kv => !kv.Key.Contains("url", StringComparison.OrdinalIgnoreCase)) + .Where(kv => !kv.Key.Contains("message", StringComparison.OrdinalIgnoreCase)) + .Take(10) + .ToDictionary(kv => kv.Key, kv => kv.Value?.ToString() ?? string.Empty); + } + + private static (DateTimeOffset From, DateTimeOffset To)? ResolveWindow( + DateTimeOffset? from, DateTimeOffset? to, TimeProvider timeProvider) + { + var end = to ?? timeProvider.GetUtcNow(); + var start = from ?? end.AddDays(-30); + return start > end ? null : (start, end); + } + + // HTTP response models (matching Doctor WebService API) + private sealed record RunTriggerResponse(string RunId); + + private sealed record RunStatusResponse( + string Status, + int TotalChecks, + int PassedChecks, + int WarnedChecks, + int FailedChecks, + int SkippedChecks, + int HealthScore); + + private sealed record RunResultsResponse(IReadOnlyList? Results); + + private sealed record CheckResult( + string CheckId, + string PluginId, + string Category, + string Status, + int DurationMs, + Dictionary? Evidence); +} + +/// +/// Summary of a Doctor schedule execution's results. +/// +internal sealed record ExecutionSummary +{ + public int TotalChecks { get; init; } + public int PassedChecks { get; init; } + public int WarnedChecks { get; init; } + public int FailedChecks { get; init; } + public int SkippedChecks { get; init; } + public int HealthScore { get; init; } +} diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/DoctorScheduleConfig.cs b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/DoctorScheduleConfig.cs new file mode 100644 index 000000000..5d0189b25 --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/DoctorScheduleConfig.cs @@ -0,0 +1,85 @@ +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace StellaOps.Scheduler.Plugin.Doctor; + +/// +/// Doctor-specific schedule configuration stored in Schedule.PluginConfig. +/// +public sealed record DoctorScheduleConfig +{ + /// + /// Doctor run mode: full, quick, categories, or plugins. + /// + [JsonPropertyName("doctorMode")] + public string DoctorMode { get; init; } = "full"; + + /// + /// Categories to run (only relevant when doctorMode is "categories"). + /// + [JsonPropertyName("categories")] + public IReadOnlyList Categories { get; init; } = []; + + /// + /// Specific plugins to run (only relevant when doctorMode is "plugins"). + /// + [JsonPropertyName("plugins")] + public IReadOnlyList Plugins { get; init; } = []; + + /// + /// Timeout in seconds for the Doctor run. + /// + [JsonPropertyName("timeoutSeconds")] + public int TimeoutSeconds { get; init; } = 300; + + /// + /// Alert configuration for this schedule. + /// + [JsonPropertyName("alerts")] + public DoctorAlertConfig? Alerts { get; init; } + + /// + /// Deserializes a DoctorScheduleConfig from a plugin config dictionary. + /// + public static DoctorScheduleConfig FromPluginConfig(IReadOnlyDictionary? pluginConfig) + { + if (pluginConfig is null || pluginConfig.Count == 0) + { + return new DoctorScheduleConfig(); + } + + // Re-serialize to JSON and back to get proper deserialization + var json = JsonSerializer.Serialize(pluginConfig); + return JsonSerializer.Deserialize(json) ?? new DoctorScheduleConfig(); + } +} + +/// +/// Alert configuration for Doctor health check schedules. +/// +public sealed record DoctorAlertConfig +{ + [JsonPropertyName("enabled")] + public bool Enabled { get; init; } = true; + + [JsonPropertyName("alertOnFail")] + public bool AlertOnFail { get; init; } = true; + + [JsonPropertyName("alertOnWarn")] + public bool AlertOnWarn { get; init; } + + [JsonPropertyName("alertOnStatusChange")] + public bool AlertOnStatusChange { get; init; } = true; + + [JsonPropertyName("channels")] + public IReadOnlyList Channels { get; init; } = []; + + [JsonPropertyName("emailRecipients")] + public IReadOnlyList EmailRecipients { get; init; } = []; + + [JsonPropertyName("webhookUrls")] + public IReadOnlyList WebhookUrls { get; init; } = []; + + [JsonPropertyName("minSeverity")] + public string MinSeverity { get; init; } = "Fail"; +} diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/DoctorTrendDataPoint.cs b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/DoctorTrendDataPoint.cs new file mode 100644 index 000000000..a2fb2c1f8 --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/DoctorTrendDataPoint.cs @@ -0,0 +1,39 @@ +namespace StellaOps.Scheduler.Plugin.Doctor; + +/// +/// A single data point in a Doctor health trend, persisted to scheduler.doctor_trends. +/// +public sealed record DoctorTrendDataPoint +{ + public long Id { get; init; } + public required string TenantId { get; init; } + public DateTimeOffset Timestamp { get; init; } + public required string CheckId { get; init; } + public required string PluginId { get; init; } + public required string Category { get; init; } + public required string RunId { get; init; } + public required string Status { get; init; } + public int HealthScore { get; init; } + public int DurationMs { get; init; } + public IReadOnlyDictionary EvidenceValues { get; init; } = new Dictionary(); +} + +/// +/// Aggregated trend summary for a Doctor check over a time period. +/// +public sealed record DoctorTrendSummary +{ + public required string CheckId { get; init; } + public required string CheckName { get; init; } + public DateTimeOffset PeriodStart { get; init; } + public DateTimeOffset PeriodEnd { get; init; } + public int TotalRuns { get; init; } + public int PassCount { get; init; } + public int WarnCount { get; init; } + public int FailCount { get; init; } + public double SuccessRate => TotalRuns > 0 ? (double)PassCount / TotalRuns : 0; + public double AvgHealthScore { get; init; } + public string Direction { get; init; } = "stable"; + public double ChangePercent { get; init; } + public int AvgDurationMs { get; init; } +} diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/IDoctorTrendRepository.cs b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/IDoctorTrendRepository.cs new file mode 100644 index 000000000..075d20baf --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/IDoctorTrendRepository.cs @@ -0,0 +1,41 @@ +namespace StellaOps.Scheduler.Plugin.Doctor; + +/// +/// Repository for persisting and querying Doctor trend data in the Scheduler's Postgres schema. +/// +public interface IDoctorTrendRepository +{ + /// + /// Stores trend data points from a Doctor run. + /// + Task StoreTrendDataAsync(IEnumerable dataPoints, CancellationToken ct = default); + + /// + /// Gets trend data points for a specific check over a time range. + /// + Task> GetTrendDataAsync( + string tenantId, string checkId, DateTimeOffset from, DateTimeOffset to, CancellationToken ct = default); + + /// + /// Gets trend data points for a category over a time range. + /// + Task> GetCategoryTrendDataAsync( + string tenantId, string category, DateTimeOffset from, DateTimeOffset to, CancellationToken ct = default); + + /// + /// Gets aggregated trend summaries for all checks over a time range. + /// + Task> GetTrendSummariesAsync( + string tenantId, DateTimeOffset from, DateTimeOffset to, CancellationToken ct = default); + + /// + /// Gets checks with degrading trends. + /// + Task> GetDegradingChecksAsync( + string tenantId, DateTimeOffset from, DateTimeOffset to, double degradationThreshold = 0.1, CancellationToken ct = default); + + /// + /// Prunes old trend data beyond retention period. + /// + Task PruneOldDataAsync(DateTimeOffset olderThan, CancellationToken ct = default); +} diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/PostgresDoctorTrendRepository.cs b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/PostgresDoctorTrendRepository.cs new file mode 100644 index 000000000..0001ec335 --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/PostgresDoctorTrendRepository.cs @@ -0,0 +1,189 @@ +using System.Text.Json; +using Dapper; +using Npgsql; + +namespace StellaOps.Scheduler.Plugin.Doctor; + +/// +/// PostgreSQL implementation of . +/// Uses the Scheduler's database connection and the scheduler.doctor_trends table. +/// +public sealed class PostgresDoctorTrendRepository : IDoctorTrendRepository +{ + private readonly Func _connectionFactory; + + public PostgresDoctorTrendRepository(Func connectionFactory) + { + _connectionFactory = connectionFactory ?? throw new ArgumentNullException(nameof(connectionFactory)); + } + + /// + public async Task StoreTrendDataAsync(IEnumerable dataPoints, CancellationToken ct = default) + { + await using var connection = _connectionFactory(); + await connection.OpenAsync(ct).ConfigureAwait(false); + + const string sql = """ + INSERT INTO scheduler.doctor_trends + (tenant_id, timestamp, check_id, plugin_id, category, run_id, status, health_score, duration_ms, evidence_values) + VALUES + (@TenantId, @Timestamp, @CheckId, @PluginId, @Category, @RunId, @Status, @HealthScore, @DurationMs, @EvidenceValues::jsonb) + """; + + foreach (var dp in dataPoints) + { + var evidenceJson = JsonSerializer.Serialize(dp.EvidenceValues); + await connection.ExecuteAsync(sql, new + { + dp.TenantId, + dp.Timestamp, + dp.CheckId, + dp.PluginId, + dp.Category, + dp.RunId, + dp.Status, + dp.HealthScore, + dp.DurationMs, + EvidenceValues = evidenceJson, + }).ConfigureAwait(false); + } + } + + /// + public async Task> GetTrendDataAsync( + string tenantId, string checkId, DateTimeOffset from, DateTimeOffset to, CancellationToken ct = default) + { + await using var connection = _connectionFactory(); + await connection.OpenAsync(ct).ConfigureAwait(false); + + const string sql = """ + SELECT id, tenant_id AS TenantId, timestamp, check_id AS CheckId, plugin_id AS PluginId, + category, run_id AS RunId, status, health_score AS HealthScore, duration_ms AS DurationMs, + evidence_values::text AS EvidenceValuesJson + FROM scheduler.doctor_trends + WHERE tenant_id = @TenantId AND check_id = @CheckId + AND timestamp >= @From AND timestamp <= @To + ORDER BY timestamp DESC + LIMIT 10000 + """; + + var rows = await connection.QueryAsync(sql, new { TenantId = tenantId, CheckId = checkId, From = from, To = to }) + .ConfigureAwait(false); + + return rows.Select(MapRow).ToArray(); + } + + /// + public async Task> GetCategoryTrendDataAsync( + string tenantId, string category, DateTimeOffset from, DateTimeOffset to, CancellationToken ct = default) + { + await using var connection = _connectionFactory(); + await connection.OpenAsync(ct).ConfigureAwait(false); + + const string sql = """ + SELECT id, tenant_id AS TenantId, timestamp, check_id AS CheckId, plugin_id AS PluginId, + category, run_id AS RunId, status, health_score AS HealthScore, duration_ms AS DurationMs, + evidence_values::text AS EvidenceValuesJson + FROM scheduler.doctor_trends + WHERE tenant_id = @TenantId AND category = @Category + AND timestamp >= @From AND timestamp <= @To + ORDER BY timestamp DESC + LIMIT 10000 + """; + + var rows = await connection.QueryAsync(sql, new { TenantId = tenantId, Category = category, From = from, To = to }) + .ConfigureAwait(false); + + return rows.Select(MapRow).ToArray(); + } + + /// + public async Task> GetTrendSummariesAsync( + string tenantId, DateTimeOffset from, DateTimeOffset to, CancellationToken ct = default) + { + await using var connection = _connectionFactory(); + await connection.OpenAsync(ct).ConfigureAwait(false); + + const string sql = """ + SELECT check_id AS CheckId, + check_id AS CheckName, + MIN(timestamp) AS PeriodStart, + MAX(timestamp) AS PeriodEnd, + COUNT(*)::int AS TotalRuns, + COUNT(*) FILTER (WHERE status = 'pass')::int AS PassCount, + COUNT(*) FILTER (WHERE status = 'warn')::int AS WarnCount, + COUNT(*) FILTER (WHERE status = 'fail')::int AS FailCount, + COALESCE(AVG(health_score), 0) AS AvgHealthScore, + COALESCE(AVG(duration_ms)::int, 0) AS AvgDurationMs + FROM scheduler.doctor_trends + WHERE tenant_id = @TenantId AND timestamp >= @From AND timestamp <= @To + GROUP BY check_id + ORDER BY check_id + """; + + var rows = await connection.QueryAsync(sql, new { TenantId = tenantId, From = from, To = to }) + .ConfigureAwait(false); + + return rows.ToArray(); + } + + /// + public async Task> GetDegradingChecksAsync( + string tenantId, DateTimeOffset from, DateTimeOffset to, double degradationThreshold = 0.1, CancellationToken ct = default) + { + // Simple approach: compare first-half success rate vs second-half success rate + var summaries = await GetTrendSummariesAsync(tenantId, from, to, ct).ConfigureAwait(false); + + return summaries + .Where(s => s.TotalRuns > 0 && s.FailCount > s.TotalRuns * degradationThreshold) + .Select(s => s with { Direction = "degrading" }) + .ToArray(); + } + + /// + public async Task PruneOldDataAsync(DateTimeOffset olderThan, CancellationToken ct = default) + { + await using var connection = _connectionFactory(); + await connection.OpenAsync(ct).ConfigureAwait(false); + + const string sql = "DELETE FROM scheduler.doctor_trends WHERE timestamp < @OlderThan"; + await connection.ExecuteAsync(sql, new { OlderThan = olderThan }).ConfigureAwait(false); + } + + private static DoctorTrendDataPoint MapRow(DoctorTrendRow row) + { + var evidence = string.IsNullOrEmpty(row.EvidenceValuesJson) + ? new Dictionary() + : JsonSerializer.Deserialize>(row.EvidenceValuesJson) ?? new Dictionary(); + + return new DoctorTrendDataPoint + { + Id = row.Id, + TenantId = row.TenantId, + Timestamp = row.Timestamp, + CheckId = row.CheckId, + PluginId = row.PluginId, + Category = row.Category, + RunId = row.RunId, + Status = row.Status, + HealthScore = row.HealthScore, + DurationMs = row.DurationMs, + EvidenceValues = evidence, + }; + } + + private sealed record DoctorTrendRow + { + public long Id { get; init; } + public required string TenantId { get; init; } + public DateTimeOffset Timestamp { get; init; } + public required string CheckId { get; init; } + public required string PluginId { get; init; } + public required string Category { get; init; } + public required string RunId { get; init; } + public required string Status { get; init; } + public int HealthScore { get; init; } + public int DurationMs { get; init; } + public string? EvidenceValuesJson { get; init; } + } +} diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/StellaOps.Scheduler.Plugin.Doctor.csproj b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/StellaOps.Scheduler.Plugin.Doctor.csproj new file mode 100644 index 000000000..6859ac8c0 --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Doctor/StellaOps.Scheduler.Plugin.Doctor.csproj @@ -0,0 +1,25 @@ + + + net10.0 + preview + enable + enable + true + StellaOps.Scheduler.Plugin.Doctor + StellaOps.Scheduler.Plugin.Doctor + Doctor health check job plugin for the StellaOps Scheduler. Replaces the standalone doctor-scheduler service. + + + + + + + + + + + + + + + diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Scan/ScanJobPlugin.cs b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Scan/ScanJobPlugin.cs new file mode 100644 index 000000000..da3b6b317 --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Scan/ScanJobPlugin.cs @@ -0,0 +1,86 @@ +using Microsoft.AspNetCore.Routing; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using StellaOps.Scheduler.Models; + +namespace StellaOps.Scheduler.Plugin.Scan; + +/// +/// Built-in scan job plugin that wraps the existing Scheduler scan logic. +/// This is a pure delegation layer that ensures backward compatibility: +/// existing scan schedules execute through the same code paths as before. +/// +public sealed class ScanJobPlugin : ISchedulerJobPlugin +{ + /// + public string JobKind => "scan"; + + /// + public string DisplayName => "Vulnerability Scan"; + + /// + public Version Version { get; } = new(1, 0, 0); + + /// + public Task CreatePlanAsync(JobPlanContext context, CancellationToken ct) + { + // Scan plans use the existing Schedule model fields (Mode, Selection, OnlyIf, Limits) + // which are already persisted. The plugin just wraps them into a JobPlan envelope. + var payload = new Dictionary + { + ["mode"] = context.Schedule.Mode.ToString(), + ["selectorScope"] = context.Schedule.Selection.Scope.ToString(), + ["scheduleId"] = context.Schedule.Id, + }; + + var plan = new JobPlan( + JobKind: "scan", + Payload: payload, + EstimatedSteps: 1); + + return Task.FromResult(plan); + } + + /// + public Task ExecuteAsync(JobExecutionContext context, CancellationToken ct) + { + // Scan execution is handled by the existing Worker Host pipeline. + // This plugin delegates to the existing queue-based execution: + // the Run is already queued by the Scheduler and picked up by + // the Worker Host's segment processor. No additional logic needed. + var logger = context.Services.GetService()?.CreateLogger(); + logger?.LogDebug( + "ScanJobPlugin.ExecuteAsync invoked for run {RunId} on schedule {ScheduleId}. " + + "Execution is handled by the existing Worker Host pipeline.", + context.Run.Id, + context.Schedule.Id); + + return Task.CompletedTask; + } + + /// + public Task ValidateConfigAsync( + IReadOnlyDictionary pluginConfig, + CancellationToken ct) + { + // Scan jobs do not use pluginConfig; all config is in Schedule.Mode/Selection/etc. + // If pluginConfig is provided, it is ignored (not an error for forward compatibility). + return Task.FromResult(JobConfigValidationResult.Valid); + } + + /// + public string? GetConfigJsonSchema() => null; + + /// + public void ConfigureServices(IServiceCollection services, IConfiguration configuration) + { + // No additional services needed; scan services are registered in the main DI setup. + } + + /// + public void MapEndpoints(IEndpointRouteBuilder routes) + { + // No additional endpoints; scan endpoints are registered in the main endpoint setup. + } +} diff --git a/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Scan/StellaOps.Scheduler.Plugin.Scan.csproj b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Scan/StellaOps.Scheduler.Plugin.Scan.csproj new file mode 100644 index 000000000..d54b3d13a --- /dev/null +++ b/src/JobEngine/StellaOps.Scheduler.__Libraries/StellaOps.Scheduler.Plugin.Scan/StellaOps.Scheduler.Plugin.Scan.csproj @@ -0,0 +1,16 @@ + + + net10.0 + preview + enable + enable + true + StellaOps.Scheduler.Plugin.Scan + StellaOps.Scheduler.Plugin.Scan + Built-in scan job plugin for the StellaOps Scheduler. Wraps existing scan logic with zero behavioral change. + + + + + +