Fix build and code structure improvements. New but essential UI functionality. CI improvements. Documentation improvements. AI module improvements.

This commit is contained in:
StellaOps Bot
2025-12-26 21:54:17 +02:00
parent 335ff7da16
commit c2b9cd8d1f
3717 changed files with 264714 additions and 48202 deletions

View File

@@ -0,0 +1,32 @@
using Microsoft.EntityFrameworkCore;
using StellaOps.Infrastructure.EfCore.Context;
namespace StellaOps.Scheduler.Persistence.EfCore.Context;
/// <summary>
/// EF Core DbContext for the Scheduler module.
/// Placeholder for future EF Core scaffolding from PostgreSQL schema.
/// </summary>
public class SchedulerDbContext : StellaOpsDbContextBase
{
/// <summary>
/// Creates a new Scheduler DbContext.
/// </summary>
public SchedulerDbContext(DbContextOptions<SchedulerDbContext> options)
: base(options)
{
}
/// <inheritdoc />
protected override string SchemaName => "scheduler";
/// <inheritdoc />
protected override void OnModelCreating(ModelBuilder modelBuilder)
{
base.OnModelCreating(modelBuilder);
// Entity configurations will be added after scaffolding
// from the PostgreSQL database using:
// dotnet ef dbcontext scaffold
}
}

View File

@@ -0,0 +1,73 @@
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Infrastructure.Postgres.Options;
using StellaOps.Scheduler.Models;
using StellaOps.Scheduler.Persistence.Postgres;
using StellaOps.Scheduler.Persistence.Postgres.Repositories;
namespace StellaOps.Scheduler.Persistence.Extensions;
/// <summary>
/// Extension methods for configuring Scheduler persistence services.
/// </summary>
public static class SchedulerPersistenceExtensions
{
/// <summary>
/// Adds Scheduler PostgreSQL persistence services using configuration section.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="configuration">Configuration root.</param>
/// <param name="sectionName">Configuration section name for PostgreSQL options.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddSchedulerPersistence(
this IServiceCollection services,
IConfiguration configuration,
string sectionName = "Postgres:Scheduler")
{
services.Configure<PostgresOptions>(sectionName, configuration.GetSection(sectionName));
services.AddSingleton<SchedulerDataSource>();
// Register repositories
services.AddScoped<IJobRepository, JobRepository>();
services.AddScoped<ITriggerRepository, TriggerRepository>();
services.AddScoped<IWorkerRepository, WorkerRepository>();
services.AddScoped<IDistributedLockRepository, DistributedLockRepository>();
services.AddScoped<IJobHistoryRepository, JobHistoryRepository>();
services.AddScoped<IMetricsRepository, MetricsRepository>();
services.AddScoped<IGraphJobRepository, GraphJobRepository>();
services.AddScoped<IRunRepository, RunRepository>();
services.AddScoped<IScheduleRepository, ScheduleRepository>();
services.AddScoped<IImpactSnapshotRepository, ImpactSnapshotRepository>();
services.AddScoped<IPolicyRunJobRepository, PolicyRunJobRepository>();
services.AddScoped<IFailureSignatureRepository, FailureSignatureRepository>();
services.AddSingleton<IRunSummaryService, RunSummaryService>();
return services;
}
/// <summary>
/// Adds Scheduler PostgreSQL persistence services with explicit options.
/// </summary>
/// <param name="services">Service collection.</param>
/// <param name="configureOptions">Options configuration action.</param>
/// <returns>Service collection for chaining.</returns>
public static IServiceCollection AddSchedulerPersistence(
this IServiceCollection services,
Action<PostgresOptions> configureOptions)
{
services.Configure(configureOptions);
services.AddSingleton<SchedulerDataSource>();
// Register repositories
services.AddScoped<IJobRepository, JobRepository>();
services.AddScoped<ITriggerRepository, TriggerRepository>();
services.AddScoped<IWorkerRepository, WorkerRepository>();
services.AddScoped<IDistributedLockRepository, DistributedLockRepository>();
services.AddScoped<IJobHistoryRepository, JobHistoryRepository>();
services.AddScoped<IMetricsRepository, MetricsRepository>();
services.AddScoped<IGraphJobRepository, GraphJobRepository>();
services.AddScoped<IFailureSignatureRepository, FailureSignatureRepository>();
return services;
}
}

View File

@@ -0,0 +1,596 @@
-- Scheduler Schema: Consolidated Initial Schema
-- Consolidated from migrations 001-012b (pre_1.0 archived)
-- Creates the complete scheduler schema for jobs, triggers, workers, runs, and policies
BEGIN;
-- ============================================================================
-- SECTION 1: Schema Creation
-- ============================================================================
CREATE SCHEMA IF NOT EXISTS scheduler;
CREATE SCHEMA IF NOT EXISTS scheduler_app;
-- ============================================================================
-- SECTION 2: Enum Types
-- ============================================================================
DO $$ BEGIN
CREATE TYPE scheduler.job_status AS ENUM (
'pending', 'scheduled', 'leased', 'running',
'succeeded', 'failed', 'canceled', 'timed_out'
);
EXCEPTION WHEN duplicate_object THEN null; END $$;
DO $$ BEGIN
CREATE TYPE scheduler.graph_job_type AS ENUM ('build', 'overlay');
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
DO $$ BEGIN
CREATE TYPE scheduler.graph_job_status AS ENUM ('pending', 'running', 'completed', 'failed', 'canceled');
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
DO $$ BEGIN
CREATE TYPE scheduler.run_state AS ENUM ('planning','queued','running','completed','error','cancelled');
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
DO $$ BEGIN
CREATE TYPE scheduler.policy_run_status AS ENUM ('pending','submitted','retrying','failed','completed','cancelled');
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
-- ============================================================================
-- SECTION 3: Helper Functions
-- ============================================================================
CREATE OR REPLACE FUNCTION scheduler.update_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION scheduler_app.require_current_tenant()
RETURNS TEXT
LANGUAGE plpgsql STABLE SECURITY DEFINER
AS $$
DECLARE
v_tenant TEXT;
BEGIN
v_tenant := current_setting('app.tenant_id', true);
IF v_tenant IS NULL OR v_tenant = '' THEN
RAISE EXCEPTION 'app.tenant_id session variable not set'
USING HINT = 'Set via: SELECT set_config(''app.tenant_id'', ''<tenant>'', false)',
ERRCODE = 'P0001';
END IF;
RETURN v_tenant;
END;
$$;
REVOKE ALL ON FUNCTION scheduler_app.require_current_tenant() FROM PUBLIC;
-- ============================================================================
-- SECTION 4: Core Tables
-- ============================================================================
-- Jobs table
CREATE TABLE IF NOT EXISTS scheduler.jobs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
project_id TEXT,
job_type TEXT NOT NULL,
status scheduler.job_status NOT NULL DEFAULT 'pending',
priority INT NOT NULL DEFAULT 0,
payload JSONB NOT NULL DEFAULT '{}',
payload_digest TEXT NOT NULL,
idempotency_key TEXT NOT NULL,
correlation_id TEXT,
attempt INT NOT NULL DEFAULT 0,
max_attempts INT NOT NULL DEFAULT 3,
lease_id UUID,
worker_id TEXT,
lease_until TIMESTAMPTZ,
not_before TIMESTAMPTZ,
reason TEXT,
result JSONB,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
scheduled_at TIMESTAMPTZ,
leased_at TIMESTAMPTZ,
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
created_by TEXT,
UNIQUE(tenant_id, idempotency_key)
);
CREATE INDEX idx_jobs_tenant_status ON scheduler.jobs(tenant_id, status);
CREATE INDEX idx_jobs_tenant_type ON scheduler.jobs(tenant_id, job_type);
CREATE INDEX idx_jobs_scheduled ON scheduler.jobs(tenant_id, status, not_before, priority DESC, created_at)
WHERE status = 'scheduled';
CREATE INDEX idx_jobs_leased ON scheduler.jobs(tenant_id, status, lease_until)
WHERE status = 'leased';
CREATE INDEX idx_jobs_project ON scheduler.jobs(tenant_id, project_id);
CREATE INDEX idx_jobs_correlation ON scheduler.jobs(correlation_id);
-- Triggers table (cron-based job triggers)
CREATE TABLE IF NOT EXISTS scheduler.triggers (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
name TEXT NOT NULL,
description TEXT,
job_type TEXT NOT NULL,
job_payload JSONB NOT NULL DEFAULT '{}',
cron_expression TEXT NOT NULL,
timezone TEXT NOT NULL DEFAULT 'UTC',
enabled BOOLEAN NOT NULL DEFAULT TRUE,
next_fire_at TIMESTAMPTZ,
last_fire_at TIMESTAMPTZ,
last_job_id UUID REFERENCES scheduler.jobs(id),
fire_count BIGINT NOT NULL DEFAULT 0,
misfire_count INT NOT NULL DEFAULT 0,
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_by TEXT,
UNIQUE(tenant_id, name)
);
CREATE INDEX idx_triggers_tenant_id ON scheduler.triggers(tenant_id);
CREATE INDEX idx_triggers_next_fire ON scheduler.triggers(enabled, next_fire_at) WHERE enabled = TRUE;
CREATE INDEX idx_triggers_job_type ON scheduler.triggers(tenant_id, job_type);
CREATE TRIGGER trg_triggers_updated_at
BEFORE UPDATE ON scheduler.triggers
FOR EACH ROW EXECUTE FUNCTION scheduler.update_updated_at();
-- Workers table (global, NOT RLS-protected)
CREATE TABLE IF NOT EXISTS scheduler.workers (
id TEXT PRIMARY KEY,
tenant_id TEXT,
hostname TEXT NOT NULL,
process_id INT,
job_types TEXT[] NOT NULL DEFAULT '{}',
max_concurrent_jobs INT NOT NULL DEFAULT 1,
current_jobs INT NOT NULL DEFAULT 0,
status TEXT NOT NULL DEFAULT 'active' CHECK (status IN ('active', 'draining', 'stopped')),
last_heartbeat_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
registered_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
metadata JSONB NOT NULL DEFAULT '{}'
);
CREATE INDEX idx_workers_status ON scheduler.workers(status);
CREATE INDEX idx_workers_heartbeat ON scheduler.workers(last_heartbeat_at);
CREATE INDEX idx_workers_tenant ON scheduler.workers(tenant_id);
COMMENT ON TABLE scheduler.workers IS 'Global worker registry. Not RLS-protected - workers serve all tenants.';
-- Distributed locks
CREATE TABLE IF NOT EXISTS scheduler.locks (
lock_key TEXT PRIMARY KEY,
tenant_id TEXT NOT NULL,
holder_id TEXT NOT NULL,
acquired_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
expires_at TIMESTAMPTZ NOT NULL,
metadata JSONB NOT NULL DEFAULT '{}'
);
CREATE INDEX idx_locks_tenant ON scheduler.locks(tenant_id);
CREATE INDEX idx_locks_expires ON scheduler.locks(expires_at);
-- Job history
CREATE TABLE IF NOT EXISTS scheduler.job_history (
id BIGSERIAL PRIMARY KEY,
job_id UUID NOT NULL,
tenant_id TEXT NOT NULL,
project_id TEXT,
job_type TEXT NOT NULL,
status scheduler.job_status NOT NULL,
attempt INT NOT NULL,
payload_digest TEXT NOT NULL,
result JSONB,
reason TEXT,
worker_id TEXT,
duration_ms BIGINT,
created_at TIMESTAMPTZ NOT NULL,
completed_at TIMESTAMPTZ NOT NULL,
archived_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_job_history_tenant ON scheduler.job_history(tenant_id);
CREATE INDEX idx_job_history_job_id ON scheduler.job_history(job_id);
CREATE INDEX idx_job_history_type ON scheduler.job_history(tenant_id, job_type);
CREATE INDEX idx_job_history_completed ON scheduler.job_history(tenant_id, completed_at);
-- Metrics table
CREATE TABLE IF NOT EXISTS scheduler.metrics (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
job_type TEXT NOT NULL,
period_start TIMESTAMPTZ NOT NULL,
period_end TIMESTAMPTZ NOT NULL,
jobs_created BIGINT NOT NULL DEFAULT 0,
jobs_completed BIGINT NOT NULL DEFAULT 0,
jobs_failed BIGINT NOT NULL DEFAULT 0,
jobs_timed_out BIGINT NOT NULL DEFAULT 0,
avg_duration_ms BIGINT,
p50_duration_ms BIGINT,
p95_duration_ms BIGINT,
p99_duration_ms BIGINT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(tenant_id, job_type, period_start)
);
CREATE INDEX idx_metrics_tenant_period ON scheduler.metrics(tenant_id, period_start);
-- ============================================================================
-- SECTION 5: Schedules and Runs
-- ============================================================================
CREATE TABLE IF NOT EXISTS scheduler.schedules (
id TEXT PRIMARY KEY,
tenant_id TEXT NOT NULL,
name TEXT NOT NULL,
description TEXT,
enabled BOOLEAN NOT NULL DEFAULT TRUE,
cron_expression TEXT,
timezone TEXT NOT NULL DEFAULT 'UTC',
mode TEXT NOT NULL CHECK (mode IN ('analysisonly', 'contentrefresh')),
selection JSONB NOT NULL DEFAULT '{}',
only_if JSONB NOT NULL DEFAULT '{}',
notify JSONB NOT NULL DEFAULT '{}',
limits JSONB NOT NULL DEFAULT '{}',
subscribers TEXT[] NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_by TEXT NOT NULL,
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_by TEXT NOT NULL,
deleted_at TIMESTAMPTZ,
deleted_by TEXT
);
CREATE INDEX IF NOT EXISTS idx_schedules_tenant ON scheduler.schedules(tenant_id) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_schedules_enabled ON scheduler.schedules(tenant_id, enabled) WHERE deleted_at IS NULL;
CREATE UNIQUE INDEX IF NOT EXISTS uq_schedules_tenant_name_active ON scheduler.schedules(tenant_id, name) WHERE deleted_at IS NULL;
-- Runs table with generated columns for stats
CREATE TABLE IF NOT EXISTS scheduler.runs (
id TEXT NOT NULL,
tenant_id TEXT NOT NULL,
schedule_id TEXT,
trigger JSONB NOT NULL,
state scheduler.run_state NOT NULL,
stats JSONB NOT NULL,
reason JSONB NOT NULL,
created_at TIMESTAMPTZ NOT NULL,
started_at TIMESTAMPTZ,
finished_at TIMESTAMPTZ,
error TEXT,
deltas JSONB NOT NULL,
retry_of TEXT,
schema_version TEXT,
finding_count INT GENERATED ALWAYS AS (NULLIF((stats->>'findingCount'), '')::int) STORED,
critical_count INT GENERATED ALWAYS AS (NULLIF((stats->>'criticalCount'), '')::int) STORED,
high_count INT GENERATED ALWAYS AS (NULLIF((stats->>'highCount'), '')::int) STORED,
new_finding_count INT GENERATED ALWAYS AS (NULLIF((stats->>'newFindingCount'), '')::int) STORED,
component_count INT GENERATED ALWAYS AS (NULLIF((stats->>'componentCount'), '')::int) STORED,
PRIMARY KEY (tenant_id, id)
);
CREATE INDEX IF NOT EXISTS idx_runs_state ON scheduler.runs(state);
CREATE INDEX IF NOT EXISTS idx_runs_schedule ON scheduler.runs(tenant_id, schedule_id);
CREATE INDEX IF NOT EXISTS idx_runs_created ON scheduler.runs(created_at);
CREATE INDEX IF NOT EXISTS ix_runs_with_findings ON scheduler.runs(tenant_id, created_at DESC) WHERE finding_count > 0;
CREATE INDEX IF NOT EXISTS ix_runs_critical ON scheduler.runs(tenant_id, created_at DESC, critical_count) WHERE critical_count > 0;
CREATE INDEX IF NOT EXISTS ix_runs_summary_cover ON scheduler.runs(tenant_id, state, created_at DESC) INCLUDE (finding_count, critical_count, high_count, new_finding_count);
CREATE INDEX IF NOT EXISTS ix_runs_tenant_findings ON scheduler.runs(tenant_id, finding_count DESC, created_at DESC) WHERE state = 'completed';
-- Impact snapshots
CREATE TABLE IF NOT EXISTS scheduler.impact_snapshots (
snapshot_id TEXT PRIMARY KEY,
tenant_id TEXT NOT NULL,
run_id TEXT,
impact JSONB NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_impact_snapshots_run ON scheduler.impact_snapshots(run_id);
-- Run summaries
CREATE TABLE IF NOT EXISTS scheduler.run_summaries (
id TEXT PRIMARY KEY,
tenant_id TEXT NOT NULL,
schedule_id TEXT REFERENCES scheduler.schedules(id),
period_start TIMESTAMPTZ NOT NULL,
period_end TIMESTAMPTZ NOT NULL,
total_runs INT NOT NULL DEFAULT 0,
successful_runs INT NOT NULL DEFAULT 0,
failed_runs INT NOT NULL DEFAULT 0,
cancelled_runs INT NOT NULL DEFAULT 0,
avg_duration_seconds NUMERIC(10,2),
max_duration_seconds INT,
min_duration_seconds INT,
total_findings_detected INT NOT NULL DEFAULT 0,
new_criticals INT NOT NULL DEFAULT 0,
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE (tenant_id, schedule_id, period_start)
);
CREATE INDEX IF NOT EXISTS idx_run_summaries_tenant ON scheduler.run_summaries(tenant_id, period_start DESC);
-- Execution logs
CREATE TABLE IF NOT EXISTS scheduler.execution_logs (
id BIGSERIAL PRIMARY KEY,
run_id TEXT NOT NULL,
logged_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
level TEXT NOT NULL,
message TEXT NOT NULL,
logger TEXT,
data JSONB NOT NULL DEFAULT '{}'
);
CREATE INDEX IF NOT EXISTS idx_execution_logs_run ON scheduler.execution_logs(run_id);
-- ============================================================================
-- SECTION 6: Graph Jobs (v2 schema)
-- ============================================================================
CREATE TABLE IF NOT EXISTS scheduler.graph_jobs (
id UUID PRIMARY KEY,
tenant_id TEXT NOT NULL,
type scheduler.graph_job_type NOT NULL,
status scheduler.graph_job_status NOT NULL,
payload JSONB NOT NULL,
correlation_id TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_graph_jobs_tenant_status ON scheduler.graph_jobs(tenant_id, status, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_graph_jobs_tenant_type_status ON scheduler.graph_jobs(tenant_id, type, status, created_at DESC);
CREATE TABLE IF NOT EXISTS scheduler.graph_job_events (
id BIGSERIAL PRIMARY KEY,
job_id UUID NOT NULL REFERENCES scheduler.graph_jobs(id) ON DELETE CASCADE,
tenant_id TEXT NOT NULL,
status scheduler.graph_job_status NOT NULL,
payload JSONB NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_graph_job_events_job ON scheduler.graph_job_events(job_id, created_at DESC);
-- ============================================================================
-- SECTION 7: Policy Run Jobs
-- ============================================================================
CREATE TABLE IF NOT EXISTS scheduler.policy_jobs (
id TEXT PRIMARY KEY,
tenant_id TEXT NOT NULL,
policy_pack_id TEXT NOT NULL,
policy_version INT,
target_type TEXT NOT NULL,
target_id TEXT NOT NULL,
status TEXT NOT NULL CHECK (status IN ('pending','queued','running','completed','failed','cancelled')),
priority INT NOT NULL DEFAULT 100,
run_id TEXT,
requested_by TEXT,
mode TEXT,
metadata JSONB NOT NULL DEFAULT '{}',
inputs JSONB NOT NULL DEFAULT '{}',
attempt_count INT NOT NULL DEFAULT 0,
max_attempts INT NOT NULL DEFAULT 3,
queued_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
available_at TIMESTAMPTZ,
submitted_at TIMESTAMPTZ,
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
cancellation_requested BOOLEAN NOT NULL DEFAULT FALSE,
cancellation_reason TEXT,
cancelled_at TIMESTAMPTZ,
last_attempt_at TIMESTAMPTZ,
last_error TEXT,
lease_owner TEXT,
lease_expires_at TIMESTAMPTZ,
correlation_id TEXT
);
CREATE INDEX IF NOT EXISTS idx_policy_jobs_tenant_status ON scheduler.policy_jobs(tenant_id, status);
CREATE INDEX IF NOT EXISTS idx_policy_jobs_run ON scheduler.policy_jobs(run_id);
CREATE TABLE IF NOT EXISTS scheduler.policy_run_jobs (
id TEXT PRIMARY KEY,
tenant_id TEXT NOT NULL,
policy_id TEXT NOT NULL,
policy_version INT,
mode TEXT NOT NULL,
priority INT NOT NULL,
priority_rank INT NOT NULL,
run_id TEXT,
requested_by TEXT,
correlation_id TEXT,
metadata JSONB,
inputs JSONB NOT NULL,
queued_at TIMESTAMPTZ,
status scheduler.policy_run_status NOT NULL,
attempt_count INT NOT NULL,
last_attempt_at TIMESTAMPTZ,
last_error TEXT,
created_at TIMESTAMPTZ NOT NULL,
updated_at TIMESTAMPTZ NOT NULL,
available_at TIMESTAMPTZ NOT NULL,
submitted_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
lease_owner TEXT,
lease_expires_at TIMESTAMPTZ,
cancellation_requested BOOLEAN NOT NULL DEFAULT FALSE,
cancellation_requested_at TIMESTAMPTZ,
cancellation_reason TEXT,
cancelled_at TIMESTAMPTZ,
schema_version TEXT
);
CREATE INDEX IF NOT EXISTS idx_policy_run_jobs_tenant ON scheduler.policy_run_jobs(tenant_id);
CREATE INDEX IF NOT EXISTS idx_policy_run_jobs_status ON scheduler.policy_run_jobs(status);
CREATE INDEX IF NOT EXISTS idx_policy_run_jobs_run ON scheduler.policy_run_jobs(run_id);
CREATE INDEX IF NOT EXISTS idx_policy_run_jobs_policy ON scheduler.policy_run_jobs(tenant_id, policy_id);
-- ============================================================================
-- SECTION 8: Partitioned Audit Table
-- ============================================================================
CREATE TABLE IF NOT EXISTS scheduler.audit (
id BIGSERIAL,
tenant_id TEXT NOT NULL,
user_id UUID,
action TEXT NOT NULL,
resource_type TEXT NOT NULL,
resource_id TEXT,
old_value JSONB,
new_value JSONB,
correlation_id TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
PRIMARY KEY (id, created_at)
) PARTITION BY RANGE (created_at);
-- Create partitions dynamically
DO $$
DECLARE
v_start DATE;
v_end DATE;
v_partition_name TEXT;
BEGIN
v_start := date_trunc('month', NOW() - INTERVAL '6 months')::DATE;
WHILE v_start <= date_trunc('month', NOW() + INTERVAL '3 months')::DATE LOOP
v_end := (v_start + INTERVAL '1 month')::DATE;
v_partition_name := 'audit_' || to_char(v_start, 'YYYY_MM');
IF NOT EXISTS (
SELECT 1 FROM pg_class c
JOIN pg_namespace n ON c.relnamespace = n.oid
WHERE n.nspname = 'scheduler' AND c.relname = v_partition_name
) THEN
EXECUTE format(
'CREATE TABLE scheduler.%I PARTITION OF scheduler.audit FOR VALUES FROM (%L) TO (%L)',
v_partition_name, v_start, v_end
);
END IF;
v_start := v_end;
END LOOP;
END $$;
CREATE TABLE IF NOT EXISTS scheduler.audit_default PARTITION OF scheduler.audit DEFAULT;
CREATE INDEX IF NOT EXISTS ix_audit_tenant ON scheduler.audit(tenant_id);
CREATE INDEX IF NOT EXISTS ix_audit_resource ON scheduler.audit(resource_type, resource_id);
CREATE INDEX IF NOT EXISTS ix_audit_correlation ON scheduler.audit(correlation_id) WHERE correlation_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS brin_audit_created ON scheduler.audit USING BRIN(created_at) WITH (pages_per_range = 128);
COMMENT ON TABLE scheduler.audit IS 'Audit log for scheduler operations. Partitioned monthly by created_at for retention management.';
-- ============================================================================
-- SECTION 9: Row-Level Security
-- ============================================================================
-- scheduler.schedules
ALTER TABLE scheduler.schedules ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.schedules FORCE ROW LEVEL SECURITY;
CREATE POLICY schedules_tenant_isolation ON scheduler.schedules FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.runs
ALTER TABLE scheduler.runs ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.runs FORCE ROW LEVEL SECURITY;
CREATE POLICY runs_tenant_isolation ON scheduler.runs FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.jobs
ALTER TABLE scheduler.jobs ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.jobs FORCE ROW LEVEL SECURITY;
CREATE POLICY jobs_tenant_isolation ON scheduler.jobs FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.triggers
ALTER TABLE scheduler.triggers ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.triggers FORCE ROW LEVEL SECURITY;
CREATE POLICY triggers_tenant_isolation ON scheduler.triggers FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.graph_jobs
ALTER TABLE scheduler.graph_jobs ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.graph_jobs FORCE ROW LEVEL SECURITY;
CREATE POLICY graph_jobs_tenant_isolation ON scheduler.graph_jobs FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.policy_jobs
ALTER TABLE scheduler.policy_jobs ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.policy_jobs FORCE ROW LEVEL SECURITY;
CREATE POLICY policy_jobs_tenant_isolation ON scheduler.policy_jobs FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.locks
ALTER TABLE scheduler.locks ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.locks FORCE ROW LEVEL SECURITY;
CREATE POLICY locks_tenant_isolation ON scheduler.locks FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.impact_snapshots
ALTER TABLE scheduler.impact_snapshots ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.impact_snapshots FORCE ROW LEVEL SECURITY;
CREATE POLICY impact_snapshots_tenant_isolation ON scheduler.impact_snapshots FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.run_summaries
ALTER TABLE scheduler.run_summaries ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.run_summaries FORCE ROW LEVEL SECURITY;
CREATE POLICY run_summaries_tenant_isolation ON scheduler.run_summaries FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.audit
ALTER TABLE scheduler.audit ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.audit FORCE ROW LEVEL SECURITY;
CREATE POLICY audit_tenant_isolation ON scheduler.audit FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.job_history
ALTER TABLE scheduler.job_history ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.job_history FORCE ROW LEVEL SECURITY;
CREATE POLICY job_history_tenant_isolation ON scheduler.job_history FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.metrics
ALTER TABLE scheduler.metrics ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.metrics FORCE ROW LEVEL SECURITY;
CREATE POLICY metrics_tenant_isolation ON scheduler.metrics FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.execution_logs inherits from runs
ALTER TABLE scheduler.execution_logs ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.execution_logs FORCE ROW LEVEL SECURITY;
CREATE POLICY execution_logs_tenant_isolation ON scheduler.execution_logs FOR ALL
USING (
run_id IN (SELECT id FROM scheduler.runs WHERE tenant_id = scheduler_app.require_current_tenant())
);
-- ============================================================================
-- SECTION 10: Admin Bypass Role
-- ============================================================================
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'scheduler_admin') THEN
CREATE ROLE scheduler_admin WITH NOLOGIN BYPASSRLS;
END IF;
END
$$;
COMMIT;

View File

@@ -0,0 +1,361 @@
-- Scheduler Schema Migration 001: Initial Schema
-- Creates the scheduler schema for jobs, triggers, and workers
-- Create schema
CREATE SCHEMA IF NOT EXISTS scheduler;
-- Job status enum type
DO $$ BEGIN
CREATE TYPE scheduler.job_status AS ENUM (
'pending', 'scheduled', 'leased', 'running',
'succeeded', 'failed', 'canceled', 'timed_out'
);
EXCEPTION
WHEN duplicate_object THEN null;
END $$;
-- Jobs table
CREATE TABLE IF NOT EXISTS scheduler.jobs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
project_id TEXT,
job_type TEXT NOT NULL,
status scheduler.job_status NOT NULL DEFAULT 'pending',
priority INT NOT NULL DEFAULT 0,
payload JSONB NOT NULL DEFAULT '{}',
payload_digest TEXT NOT NULL,
idempotency_key TEXT NOT NULL,
correlation_id TEXT,
attempt INT NOT NULL DEFAULT 0,
max_attempts INT NOT NULL DEFAULT 3,
lease_id UUID,
worker_id TEXT,
lease_until TIMESTAMPTZ,
not_before TIMESTAMPTZ,
reason TEXT,
result JSONB,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
scheduled_at TIMESTAMPTZ,
leased_at TIMESTAMPTZ,
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
created_by TEXT,
UNIQUE(tenant_id, idempotency_key)
);
CREATE INDEX idx_jobs_tenant_status ON scheduler.jobs(tenant_id, status);
CREATE INDEX idx_jobs_tenant_type ON scheduler.jobs(tenant_id, job_type);
CREATE INDEX idx_jobs_scheduled ON scheduler.jobs(tenant_id, status, not_before, priority DESC, created_at)
WHERE status = 'scheduled';
CREATE INDEX idx_jobs_leased ON scheduler.jobs(tenant_id, status, lease_until)
WHERE status = 'leased';
CREATE INDEX idx_jobs_project ON scheduler.jobs(tenant_id, project_id);
CREATE INDEX idx_jobs_correlation ON scheduler.jobs(correlation_id);
-- Triggers table (cron-based job triggers)
CREATE TABLE IF NOT EXISTS scheduler.triggers (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
name TEXT NOT NULL,
description TEXT,
job_type TEXT NOT NULL,
job_payload JSONB NOT NULL DEFAULT '{}',
cron_expression TEXT NOT NULL,
timezone TEXT NOT NULL DEFAULT 'UTC',
enabled BOOLEAN NOT NULL DEFAULT TRUE,
next_fire_at TIMESTAMPTZ,
last_fire_at TIMESTAMPTZ,
last_job_id UUID REFERENCES scheduler.jobs(id),
fire_count BIGINT NOT NULL DEFAULT 0,
misfire_count INT NOT NULL DEFAULT 0,
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_by TEXT,
UNIQUE(tenant_id, name)
);
CREATE INDEX idx_triggers_tenant_id ON scheduler.triggers(tenant_id);
CREATE INDEX idx_triggers_next_fire ON scheduler.triggers(enabled, next_fire_at)
WHERE enabled = TRUE;
CREATE INDEX idx_triggers_job_type ON scheduler.triggers(tenant_id, job_type);
-- Workers table (worker registration and heartbeat)
CREATE TABLE IF NOT EXISTS scheduler.workers (
id TEXT PRIMARY KEY,
tenant_id TEXT,
hostname TEXT NOT NULL,
process_id INT,
job_types TEXT[] NOT NULL DEFAULT '{}',
max_concurrent_jobs INT NOT NULL DEFAULT 1,
current_jobs INT NOT NULL DEFAULT 0,
status TEXT NOT NULL DEFAULT 'active' CHECK (status IN ('active', 'draining', 'stopped')),
last_heartbeat_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
registered_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
metadata JSONB NOT NULL DEFAULT '{}'
);
CREATE INDEX idx_workers_status ON scheduler.workers(status);
CREATE INDEX idx_workers_heartbeat ON scheduler.workers(last_heartbeat_at);
CREATE INDEX idx_workers_tenant ON scheduler.workers(tenant_id);
-- Distributed locks using advisory locks wrapper
CREATE TABLE IF NOT EXISTS scheduler.locks (
lock_key TEXT PRIMARY KEY,
tenant_id TEXT NOT NULL,
holder_id TEXT NOT NULL,
acquired_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
expires_at TIMESTAMPTZ NOT NULL,
metadata JSONB NOT NULL DEFAULT '{}'
);
CREATE INDEX idx_locks_tenant ON scheduler.locks(tenant_id);
CREATE INDEX idx_locks_expires ON scheduler.locks(expires_at);
-- Job history (completed jobs archive)
CREATE TABLE IF NOT EXISTS scheduler.job_history (
id BIGSERIAL PRIMARY KEY,
job_id UUID NOT NULL,
tenant_id TEXT NOT NULL,
project_id TEXT,
job_type TEXT NOT NULL,
status scheduler.job_status NOT NULL,
attempt INT NOT NULL,
payload_digest TEXT NOT NULL,
result JSONB,
reason TEXT,
worker_id TEXT,
duration_ms BIGINT,
created_at TIMESTAMPTZ NOT NULL,
completed_at TIMESTAMPTZ NOT NULL,
archived_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_job_history_tenant ON scheduler.job_history(tenant_id);
CREATE INDEX idx_job_history_job_id ON scheduler.job_history(job_id);
CREATE INDEX idx_job_history_type ON scheduler.job_history(tenant_id, job_type);
CREATE INDEX idx_job_history_completed ON scheduler.job_history(tenant_id, completed_at);
-- Metrics table (job execution metrics)
CREATE TABLE IF NOT EXISTS scheduler.metrics (
id BIGSERIAL PRIMARY KEY,
tenant_id TEXT NOT NULL,
job_type TEXT NOT NULL,
period_start TIMESTAMPTZ NOT NULL,
period_end TIMESTAMPTZ NOT NULL,
jobs_created BIGINT NOT NULL DEFAULT 0,
jobs_completed BIGINT NOT NULL DEFAULT 0,
jobs_failed BIGINT NOT NULL DEFAULT 0,
jobs_timed_out BIGINT NOT NULL DEFAULT 0,
avg_duration_ms BIGINT,
p50_duration_ms BIGINT,
p95_duration_ms BIGINT,
p99_duration_ms BIGINT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(tenant_id, job_type, period_start)
);
CREATE INDEX idx_metrics_tenant_period ON scheduler.metrics(tenant_id, period_start);
-- Function to update updated_at timestamp
CREATE OR REPLACE FUNCTION scheduler.update_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Trigger for updated_at
CREATE TRIGGER trg_triggers_updated_at
BEFORE UPDATE ON scheduler.triggers
FOR EACH ROW EXECUTE FUNCTION scheduler.update_updated_at();
-- Schedules table (control-plane schedules)
CREATE TABLE IF NOT EXISTS scheduler.schedules (
id TEXT PRIMARY KEY,
tenant_id TEXT NOT NULL,
name TEXT NOT NULL,
description TEXT,
enabled BOOLEAN NOT NULL DEFAULT TRUE,
cron_expression TEXT,
timezone TEXT NOT NULL DEFAULT 'UTC',
mode TEXT NOT NULL CHECK (mode IN ('analysisonly', 'contentrefresh')),
selection JSONB NOT NULL DEFAULT '{}',
only_if JSONB NOT NULL DEFAULT '{}',
notify JSONB NOT NULL DEFAULT '{}',
limits JSONB NOT NULL DEFAULT '{}',
subscribers TEXT[] NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_by TEXT NOT NULL,
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_by TEXT NOT NULL,
deleted_at TIMESTAMPTZ,
deleted_by TEXT
);
CREATE INDEX IF NOT EXISTS idx_schedules_tenant ON scheduler.schedules(tenant_id) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_schedules_enabled ON scheduler.schedules(tenant_id, enabled) WHERE deleted_at IS NULL;
CREATE UNIQUE INDEX IF NOT EXISTS uq_schedules_tenant_name_active ON scheduler.schedules(tenant_id, name) WHERE deleted_at IS NULL;
-- Runs table (execution records)
CREATE TABLE IF NOT EXISTS scheduler.runs (
id TEXT PRIMARY KEY,
tenant_id TEXT NOT NULL,
schedule_id TEXT REFERENCES scheduler.schedules(id),
state TEXT NOT NULL CHECK (state IN ('planning','queued','running','completed','error','cancelled')),
trigger TEXT NOT NULL,
stats JSONB NOT NULL DEFAULT '{}',
deltas JSONB NOT NULL DEFAULT '[]',
reason JSONB NOT NULL DEFAULT '{}',
retry_of TEXT REFERENCES scheduler.runs(id),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
started_at TIMESTAMPTZ,
finished_at TIMESTAMPTZ,
error TEXT,
created_by TEXT,
updated_at TIMESTAMPTZ,
metadata JSONB NOT NULL DEFAULT '{}'
);
CREATE INDEX IF NOT EXISTS idx_runs_tenant_state ON scheduler.runs(tenant_id, state);
CREATE INDEX IF NOT EXISTS idx_runs_schedule ON scheduler.runs(schedule_id);
CREATE INDEX IF NOT EXISTS idx_runs_created ON scheduler.runs(created_at DESC);
-- Graph jobs table
CREATE TABLE IF NOT EXISTS scheduler.graph_jobs (
id TEXT PRIMARY KEY,
tenant_id TEXT NOT NULL,
sbom_id TEXT NOT NULL,
sbom_version_id TEXT,
sbom_digest TEXT NOT NULL,
graph_snapshot_id TEXT,
status TEXT NOT NULL CHECK (status IN ('pending','queued','running','completed','failed','cancelled')),
trigger TEXT NOT NULL CHECK (trigger IN ('sbom-version','backfill','manual')),
priority INT NOT NULL DEFAULT 100,
attempts INT NOT NULL DEFAULT 0,
max_attempts INT NOT NULL DEFAULT 3,
cartographer_job_id TEXT,
correlation_id TEXT,
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
error TEXT,
error_details JSONB
);
CREATE INDEX IF NOT EXISTS idx_graph_jobs_tenant_status ON scheduler.graph_jobs(tenant_id, status);
CREATE INDEX IF NOT EXISTS idx_graph_jobs_sbom ON scheduler.graph_jobs(sbom_digest);
-- Policy run jobs table
CREATE TABLE IF NOT EXISTS scheduler.policy_jobs (
id TEXT PRIMARY KEY,
tenant_id TEXT NOT NULL,
policy_pack_id TEXT NOT NULL,
policy_version INT,
target_type TEXT NOT NULL,
target_id TEXT NOT NULL,
status TEXT NOT NULL CHECK (status IN ('pending','queued','running','completed','failed','cancelled')),
priority INT NOT NULL DEFAULT 100,
run_id TEXT,
requested_by TEXT,
mode TEXT,
metadata JSONB NOT NULL DEFAULT '{}',
inputs JSONB NOT NULL DEFAULT '{}',
attempt_count INT NOT NULL DEFAULT 0,
max_attempts INT NOT NULL DEFAULT 3,
queued_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
available_at TIMESTAMPTZ,
submitted_at TIMESTAMPTZ,
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
cancellation_requested BOOLEAN NOT NULL DEFAULT FALSE,
cancellation_reason TEXT,
cancelled_at TIMESTAMPTZ,
last_attempt_at TIMESTAMPTZ,
last_error TEXT,
lease_owner TEXT,
lease_expires_at TIMESTAMPTZ,
correlation_id TEXT
);
CREATE INDEX IF NOT EXISTS idx_policy_jobs_tenant_status ON scheduler.policy_jobs(tenant_id, status);
CREATE INDEX IF NOT EXISTS idx_policy_jobs_run ON scheduler.policy_jobs(run_id);
-- Impact snapshots table
CREATE TABLE IF NOT EXISTS scheduler.impact_snapshots (
id TEXT PRIMARY KEY,
tenant_id TEXT NOT NULL,
run_id TEXT NOT NULL REFERENCES scheduler.runs(id) ON DELETE CASCADE,
image_digest TEXT NOT NULL,
image_reference TEXT,
new_findings INT NOT NULL DEFAULT 0,
new_criticals INT NOT NULL DEFAULT 0,
new_high INT NOT NULL DEFAULT 0,
new_medium INT NOT NULL DEFAULT 0,
new_low INT NOT NULL DEFAULT 0,
total_findings INT NOT NULL DEFAULT 0,
kev_hits TEXT[] NOT NULL DEFAULT '{}',
top_findings JSONB NOT NULL DEFAULT '[]',
report_url TEXT,
attestation JSONB NOT NULL DEFAULT '{}',
detected_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_impact_snapshots_run ON scheduler.impact_snapshots(run_id);
CREATE INDEX IF NOT EXISTS idx_impact_snapshots_tenant ON scheduler.impact_snapshots(tenant_id, detected_at DESC);
-- Execution logs table
CREATE TABLE IF NOT EXISTS scheduler.execution_logs (
id BIGSERIAL PRIMARY KEY,
run_id TEXT NOT NULL REFERENCES scheduler.runs(id) ON DELETE CASCADE,
logged_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
level TEXT NOT NULL,
message TEXT NOT NULL,
logger TEXT,
data JSONB NOT NULL DEFAULT '{}'
);
CREATE INDEX IF NOT EXISTS idx_execution_logs_run ON scheduler.execution_logs(run_id);
-- Run summaries table
CREATE TABLE IF NOT EXISTS scheduler.run_summaries (
id TEXT PRIMARY KEY,
tenant_id TEXT NOT NULL,
schedule_id TEXT REFERENCES scheduler.schedules(id),
period_start TIMESTAMPTZ NOT NULL,
period_end TIMESTAMPTZ NOT NULL,
total_runs INT NOT NULL DEFAULT 0,
successful_runs INT NOT NULL DEFAULT 0,
failed_runs INT NOT NULL DEFAULT 0,
cancelled_runs INT NOT NULL DEFAULT 0,
avg_duration_seconds NUMERIC(10,2),
max_duration_seconds INT,
min_duration_seconds INT,
total_findings_detected INT NOT NULL DEFAULT 0,
new_criticals INT NOT NULL DEFAULT 0,
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE (tenant_id, schedule_id, period_start)
);
CREATE INDEX IF NOT EXISTS idx_run_summaries_tenant ON scheduler.run_summaries(tenant_id, period_start DESC);
-- Audit table
CREATE TABLE IF NOT EXISTS scheduler.audit (
id TEXT PRIMARY KEY,
tenant_id TEXT NOT NULL,
action TEXT NOT NULL,
entity_type TEXT NOT NULL,
entity_id TEXT NOT NULL,
actor TEXT,
actor_type TEXT,
old_value JSONB,
new_value JSONB,
details JSONB NOT NULL DEFAULT '{}',
ip_address INET,
occurred_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_audit_tenant_time ON scheduler.audit(tenant_id, occurred_at DESC);
CREATE INDEX IF NOT EXISTS idx_audit_entity ON scheduler.audit(entity_type, entity_id);

View File

@@ -0,0 +1,96 @@
-- Scheduler graph jobs schema (Postgres)
-- Legacy compatibility:
-- Earlier schema revisions shipped `scheduler.graph_jobs` as a TEXT/column-based table in `001_initial_schema.sql`.
-- This migration introduces a new JSON-payload based model with a `type` column and will fail on fresh installs
-- unless we either migrate or rename the legacy table first.
DO $$
DECLARE
rec RECORD;
BEGIN
IF EXISTS (
SELECT 1
FROM information_schema.tables
WHERE table_schema = 'scheduler'
AND table_name = 'graph_jobs'
) AND NOT EXISTS (
SELECT 1
FROM information_schema.columns
WHERE table_schema = 'scheduler'
AND table_name = 'graph_jobs'
AND column_name = 'type'
) THEN
-- Rename legacy table so we can create the v2 shape under the canonical name.
ALTER TABLE scheduler.graph_jobs RENAME TO graph_jobs_legacy;
-- Rename legacy constraints to avoid name collisions with the new table (e.g. graph_jobs_pkey).
FOR rec IN
SELECT c.conname
FROM pg_constraint c
JOIN pg_class rel ON rel.oid = c.conrelid
JOIN pg_namespace n ON n.oid = rel.relnamespace
WHERE n.nspname = 'scheduler'
AND rel.relname = 'graph_jobs_legacy'
LOOP
IF rec.conname LIKE 'graph_jobs%' THEN
EXECUTE format(
'ALTER TABLE scheduler.graph_jobs_legacy RENAME CONSTRAINT %I TO %I',
rec.conname,
replace(rec.conname, 'graph_jobs', 'graph_jobs_legacy'));
END IF;
END LOOP;
-- Rename legacy indexes to avoid collisions (idx_graph_jobs_* and graph_jobs_pkey).
FOR rec IN
SELECT indexname
FROM pg_indexes
WHERE schemaname = 'scheduler'
AND tablename = 'graph_jobs_legacy'
LOOP
IF rec.indexname = 'graph_jobs_pkey' THEN
EXECUTE format(
'ALTER INDEX scheduler.%I RENAME TO %I',
rec.indexname,
'graph_jobs_legacy_pkey');
ELSIF rec.indexname LIKE 'idx_graph_jobs%' THEN
EXECUTE format(
'ALTER INDEX scheduler.%I RENAME TO %I',
rec.indexname,
replace(rec.indexname, 'idx_graph_jobs', 'idx_graph_jobs_legacy'));
END IF;
END LOOP;
END IF;
END $$;
DO $$ BEGIN
CREATE TYPE scheduler.graph_job_type AS ENUM ('build', 'overlay');
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
DO $$ BEGIN
CREATE TYPE scheduler.graph_job_status AS ENUM ('pending', 'running', 'completed', 'failed', 'canceled');
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
CREATE TABLE IF NOT EXISTS scheduler.graph_jobs (
id UUID PRIMARY KEY,
tenant_id TEXT NOT NULL,
type scheduler.graph_job_type NOT NULL,
status scheduler.graph_job_status NOT NULL,
payload JSONB NOT NULL,
correlation_id TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_graph_jobs_tenant_status ON scheduler.graph_jobs(tenant_id, status, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_graph_jobs_tenant_type_status ON scheduler.graph_jobs(tenant_id, type, status, created_at DESC);
CREATE TABLE IF NOT EXISTS scheduler.graph_job_events (
id BIGSERIAL PRIMARY KEY,
job_id UUID NOT NULL REFERENCES scheduler.graph_jobs(id) ON DELETE CASCADE,
tenant_id TEXT NOT NULL,
status scheduler.graph_job_status NOT NULL,
payload JSONB NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_graph_job_events_job ON scheduler.graph_job_events(job_id, created_at DESC);

View File

@@ -0,0 +1,75 @@
-- Scheduler Schema Migration 003: Runs, Impact Snapshots, Policy Run Jobs
DO $$ BEGIN
CREATE TYPE scheduler.run_state AS ENUM ('planning','queued','running','completed','error','cancelled');
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
DO $$ BEGIN
CREATE TYPE scheduler.policy_run_status AS ENUM ('pending','submitted','retrying','failed','completed','cancelled');
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
CREATE TABLE IF NOT EXISTS scheduler.runs (
id TEXT NOT NULL,
tenant_id TEXT NOT NULL,
schedule_id TEXT,
trigger JSONB NOT NULL,
state scheduler.run_state NOT NULL,
stats JSONB NOT NULL,
reason JSONB NOT NULL,
created_at TIMESTAMPTZ NOT NULL,
started_at TIMESTAMPTZ,
finished_at TIMESTAMPTZ,
error TEXT,
deltas JSONB NOT NULL,
retry_of TEXT,
schema_version TEXT,
PRIMARY KEY (tenant_id, id)
);
CREATE INDEX IF NOT EXISTS idx_runs_state ON scheduler.runs(state);
CREATE INDEX IF NOT EXISTS idx_runs_schedule ON scheduler.runs(tenant_id, schedule_id);
CREATE INDEX IF NOT EXISTS idx_runs_created ON scheduler.runs(created_at);
CREATE TABLE IF NOT EXISTS scheduler.impact_snapshots (
snapshot_id TEXT PRIMARY KEY,
tenant_id TEXT NOT NULL,
run_id TEXT,
impact JSONB NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_impact_snapshots_run ON scheduler.impact_snapshots(run_id);
CREATE TABLE IF NOT EXISTS scheduler.policy_run_jobs (
id TEXT PRIMARY KEY,
tenant_id TEXT NOT NULL,
policy_id TEXT NOT NULL,
policy_version INT,
mode TEXT NOT NULL,
priority INT NOT NULL,
priority_rank INT NOT NULL,
run_id TEXT,
requested_by TEXT,
correlation_id TEXT,
metadata JSONB,
inputs JSONB NOT NULL,
queued_at TIMESTAMPTZ,
status scheduler.policy_run_status NOT NULL,
attempt_count INT NOT NULL,
last_attempt_at TIMESTAMPTZ,
last_error TEXT,
created_at TIMESTAMPTZ NOT NULL,
updated_at TIMESTAMPTZ NOT NULL,
available_at TIMESTAMPTZ NOT NULL,
submitted_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
lease_owner TEXT,
lease_expires_at TIMESTAMPTZ,
cancellation_requested BOOLEAN NOT NULL DEFAULT FALSE,
cancellation_requested_at TIMESTAMPTZ,
cancellation_reason TEXT,
cancelled_at TIMESTAMPTZ,
schema_version TEXT
);
CREATE INDEX IF NOT EXISTS idx_policy_run_jobs_tenant ON scheduler.policy_run_jobs(tenant_id);
CREATE INDEX IF NOT EXISTS idx_policy_run_jobs_status ON scheduler.policy_run_jobs(status);
CREATE INDEX IF NOT EXISTS idx_policy_run_jobs_run ON scheduler.policy_run_jobs(run_id);
CREATE INDEX IF NOT EXISTS idx_policy_run_jobs_policy ON scheduler.policy_run_jobs(tenant_id, policy_id);

View File

@@ -0,0 +1,68 @@
-- Scheduler Schema Migration 010: Generated Columns for Runs Stats
-- Sprint: SPRINT_3423_0001_0001 - Generated Columns for JSONB Hot Keys
-- Category: A (safe, can run at startup)
--
-- Purpose: Extract frequently-queried fields from stats JSONB as generated columns
-- to enable efficient B-tree indexing and accurate query planning statistics.
BEGIN;
-- Add generated columns for hot stats fields
-- These are computed automatically when stats JSONB is updated
ALTER TABLE scheduler.runs
ADD COLUMN IF NOT EXISTS finding_count INT
GENERATED ALWAYS AS (NULLIF((stats->>'findingCount'), '')::int) STORED;
ALTER TABLE scheduler.runs
ADD COLUMN IF NOT EXISTS critical_count INT
GENERATED ALWAYS AS (NULLIF((stats->>'criticalCount'), '')::int) STORED;
ALTER TABLE scheduler.runs
ADD COLUMN IF NOT EXISTS high_count INT
GENERATED ALWAYS AS (NULLIF((stats->>'highCount'), '')::int) STORED;
ALTER TABLE scheduler.runs
ADD COLUMN IF NOT EXISTS new_finding_count INT
GENERATED ALWAYS AS (NULLIF((stats->>'newFindingCount'), '')::int) STORED;
ALTER TABLE scheduler.runs
ADD COLUMN IF NOT EXISTS component_count INT
GENERATED ALWAYS AS (NULLIF((stats->>'componentCount'), '')::int) STORED;
-- Index for dashboard query: runs with findings
CREATE INDEX CONCURRENTLY IF NOT EXISTS ix_runs_with_findings
ON scheduler.runs (tenant_id, created_at DESC)
WHERE finding_count > 0;
-- Index for critical findings alerting
CREATE INDEX CONCURRENTLY IF NOT EXISTS ix_runs_critical
ON scheduler.runs (tenant_id, created_at DESC, critical_count)
WHERE critical_count > 0;
-- Covering index for run summary dashboard
-- Enables index-only scans for common dashboard queries
CREATE INDEX CONCURRENTLY IF NOT EXISTS ix_runs_summary_cover
ON scheduler.runs (tenant_id, state, created_at DESC)
INCLUDE (finding_count, critical_count, high_count, new_finding_count);
-- Index for trend analysis queries
CREATE INDEX CONCURRENTLY IF NOT EXISTS ix_runs_tenant_findings
ON scheduler.runs (tenant_id, finding_count DESC, created_at DESC)
WHERE state = 'completed';
-- Update statistics for query planner
ANALYZE scheduler.runs;
COMMIT;
-- Verification query (run manually to confirm):
-- SELECT
-- id,
-- stats->>'findingCount' as json_finding_count,
-- finding_count as generated_finding_count,
-- stats->>'criticalCount' as json_critical_count,
-- critical_count as generated_critical_count
-- FROM scheduler.runs
-- WHERE stats != '{}'
-- LIMIT 10;

View File

@@ -0,0 +1,237 @@
-- Scheduler Schema Migration 011: Row-Level Security
-- Sprint: SPRINT_3421_0001_0001 - RLS Expansion
-- Category: B (release migration, requires coordination)
--
-- Purpose: Enable Row-Level Security on all tenant-scoped tables in the scheduler
-- schema to provide database-level tenant isolation as defense-in-depth.
BEGIN;
-- ============================================================================
-- Step 1: Create helper schema and function for tenant context
-- ============================================================================
CREATE SCHEMA IF NOT EXISTS scheduler_app;
-- Tenant context helper function
-- SECURITY DEFINER ensures the function runs with owner privileges
-- to access the session variable even when RLS restricts the caller
CREATE OR REPLACE FUNCTION scheduler_app.require_current_tenant()
RETURNS TEXT
LANGUAGE plpgsql STABLE SECURITY DEFINER
AS $$
DECLARE
v_tenant TEXT;
BEGIN
v_tenant := current_setting('app.tenant_id', true);
IF v_tenant IS NULL OR v_tenant = '' THEN
RAISE EXCEPTION 'app.tenant_id session variable not set'
USING HINT = 'Set via: SELECT set_config(''app.tenant_id'', ''<tenant>'', false)',
ERRCODE = 'P0001';
END IF;
RETURN v_tenant;
END;
$$;
-- Restrict function execution to application role only
REVOKE ALL ON FUNCTION scheduler_app.require_current_tenant() FROM PUBLIC;
-- ============================================================================
-- Step 2: Enable RLS on tables with direct tenant_id column
-- ============================================================================
-- scheduler.schedules
ALTER TABLE scheduler.schedules ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.schedules FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS schedules_tenant_isolation ON scheduler.schedules;
CREATE POLICY schedules_tenant_isolation ON scheduler.schedules
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.runs
ALTER TABLE scheduler.runs ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.runs FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS runs_tenant_isolation ON scheduler.runs;
CREATE POLICY runs_tenant_isolation ON scheduler.runs
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.jobs
ALTER TABLE scheduler.jobs ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.jobs FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS jobs_tenant_isolation ON scheduler.jobs;
CREATE POLICY jobs_tenant_isolation ON scheduler.jobs
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.triggers
ALTER TABLE scheduler.triggers ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.triggers FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS triggers_tenant_isolation ON scheduler.triggers;
CREATE POLICY triggers_tenant_isolation ON scheduler.triggers
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.graph_jobs
ALTER TABLE scheduler.graph_jobs ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.graph_jobs FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS graph_jobs_tenant_isolation ON scheduler.graph_jobs;
CREATE POLICY graph_jobs_tenant_isolation ON scheduler.graph_jobs
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.policy_jobs
ALTER TABLE scheduler.policy_jobs ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.policy_jobs FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS policy_jobs_tenant_isolation ON scheduler.policy_jobs;
CREATE POLICY policy_jobs_tenant_isolation ON scheduler.policy_jobs
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.locks
ALTER TABLE scheduler.locks ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.locks FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS locks_tenant_isolation ON scheduler.locks;
CREATE POLICY locks_tenant_isolation ON scheduler.locks
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.impact_snapshots
ALTER TABLE scheduler.impact_snapshots ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.impact_snapshots FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS impact_snapshots_tenant_isolation ON scheduler.impact_snapshots;
CREATE POLICY impact_snapshots_tenant_isolation ON scheduler.impact_snapshots
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.run_summaries
ALTER TABLE scheduler.run_summaries ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.run_summaries FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS run_summaries_tenant_isolation ON scheduler.run_summaries;
CREATE POLICY run_summaries_tenant_isolation ON scheduler.run_summaries
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.audit
ALTER TABLE scheduler.audit ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.audit FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS audit_tenant_isolation ON scheduler.audit;
CREATE POLICY audit_tenant_isolation ON scheduler.audit
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.job_history
ALTER TABLE scheduler.job_history ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.job_history FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS job_history_tenant_isolation ON scheduler.job_history;
CREATE POLICY job_history_tenant_isolation ON scheduler.job_history
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.metrics
ALTER TABLE scheduler.metrics ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.metrics FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS metrics_tenant_isolation ON scheduler.metrics;
CREATE POLICY metrics_tenant_isolation ON scheduler.metrics
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- ============================================================================
-- Step 3: FK-based RLS for child tables (inherit tenant from parent)
-- ============================================================================
-- scheduler.execution_logs inherits tenant from scheduler.runs
ALTER TABLE scheduler.execution_logs ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.execution_logs FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS execution_logs_tenant_isolation ON scheduler.execution_logs;
CREATE POLICY execution_logs_tenant_isolation ON scheduler.execution_logs
FOR ALL
USING (
run_id IN (
SELECT id FROM scheduler.runs
WHERE tenant_id = scheduler_app.require_current_tenant()
)
);
-- ============================================================================
-- Step 4: Skip RLS for global tables (no tenant_id)
-- ============================================================================
-- scheduler.workers is intentionally NOT RLS-protected
-- Workers are global resources shared across tenants
-- The tenant_id column in workers is optional and for affinity only
COMMENT ON TABLE scheduler.workers IS
'Global worker registry. Not RLS-protected - workers serve all tenants.';
-- ============================================================================
-- Step 5: Create admin bypass role
-- ============================================================================
-- Create role for admin operations (migrations, cross-tenant queries)
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'scheduler_admin') THEN
CREATE ROLE scheduler_admin WITH NOLOGIN BYPASSRLS;
END IF;
END
$$;
-- Grant bypass role to admin users
-- GRANT scheduler_admin TO stellaops_admin;
-- GRANT scheduler_admin TO stellaops_migration;
COMMIT;
-- ============================================================================
-- Verification queries (run manually)
-- ============================================================================
-- Check RLS status on all scheduler tables:
/*
SELECT
schemaname,
tablename,
rowsecurity AS rls_enabled,
forcerowsecurity AS rls_forced
FROM pg_tables
WHERE schemaname = 'scheduler'
ORDER BY tablename;
*/
-- List all RLS policies:
/*
SELECT
schemaname,
tablename,
policyname,
permissive,
roles,
cmd,
qual,
with_check
FROM pg_policies
WHERE schemaname = 'scheduler'
ORDER BY tablename, policyname;
*/
-- Test RLS enforcement:
/*
-- Should fail: no tenant set
SELECT * FROM scheduler.runs LIMIT 1;
-- Should work: tenant set
SELECT set_config('app.tenant_id', 'test-tenant', false);
SELECT * FROM scheduler.runs LIMIT 1;
*/

View File

@@ -0,0 +1,158 @@
-- Scheduler Schema Migration 012: Partitioned Audit Table
-- Sprint: SPRINT_3422_0001_0001 - Time-Based Partitioning
-- Category: A (schema addition, safe to run anytime)
--
-- Purpose: Create scheduler.audit as a partitioned table for improved
-- query performance on time-range queries and easier data lifecycle management.
--
-- This creates a new partitioned audit table. If an existing non-partitioned
-- scheduler.audit table exists, run 012b_migrate_audit_data.sql to migrate data.
--
-- Partition strategy: Monthly by created_at
BEGIN;
-- ============================================================================
-- Step 1: Create partitioned audit table (or skip if already exists)
-- ============================================================================
-- Check if audit table already exists (partitioned or not)
DO $$
BEGIN
IF EXISTS (
SELECT 1 FROM pg_class c
JOIN pg_namespace n ON c.relnamespace = n.oid
WHERE n.nspname = 'scheduler' AND c.relname = 'audit'
) THEN
RAISE NOTICE 'scheduler.audit already exists - skipping creation. Run 012b for migration if needed.';
RETURN;
END IF;
END
$$;
CREATE TABLE IF NOT EXISTS scheduler.audit (
id BIGSERIAL,
tenant_id TEXT NOT NULL,
user_id UUID,
action TEXT NOT NULL,
resource_type TEXT NOT NULL,
resource_id TEXT,
old_value JSONB,
new_value JSONB,
correlation_id TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
PRIMARY KEY (id, created_at)
) PARTITION BY RANGE (created_at);
-- ============================================================================
-- Step 2: Create initial partitions (past 6 months + 3 months ahead)
-- ============================================================================
-- Create partitions for historical data and future
DO $$
DECLARE
v_start DATE;
v_end DATE;
v_partition_name TEXT;
BEGIN
-- Start from 6 months ago
v_start := date_trunc('month', NOW() - INTERVAL '6 months')::DATE;
-- Create partitions until 3 months ahead
WHILE v_start <= date_trunc('month', NOW() + INTERVAL '3 months')::DATE LOOP
v_end := (v_start + INTERVAL '1 month')::DATE;
v_partition_name := 'audit_' || to_char(v_start, 'YYYY_MM');
IF NOT EXISTS (
SELECT 1 FROM pg_class c
JOIN pg_namespace n ON c.relnamespace = n.oid
WHERE n.nspname = 'scheduler' AND c.relname = v_partition_name
) THEN
EXECUTE format(
'CREATE TABLE scheduler.%I PARTITION OF scheduler.audit
FOR VALUES FROM (%L) TO (%L)',
v_partition_name, v_start, v_end
);
RAISE NOTICE 'Created partition scheduler.%', v_partition_name;
END IF;
v_start := v_end;
END LOOP;
END
$$;
-- Create default partition for any data outside defined ranges
CREATE TABLE IF NOT EXISTS scheduler.audit_default
PARTITION OF scheduler.audit DEFAULT;
-- ============================================================================
-- Step 3: Create indexes on partitioned table
-- ============================================================================
CREATE INDEX IF NOT EXISTS ix_audit_tenant
ON scheduler.audit (tenant_id);
CREATE INDEX IF NOT EXISTS ix_audit_resource
ON scheduler.audit (resource_type, resource_id);
CREATE INDEX IF NOT EXISTS ix_audit_correlation
ON scheduler.audit (correlation_id)
WHERE correlation_id IS NOT NULL;
-- BRIN index for time-range queries (very efficient for time-series data)
CREATE INDEX IF NOT EXISTS brin_audit_created
ON scheduler.audit USING BRIN (created_at)
WITH (pages_per_range = 128);
-- ============================================================================
-- Step 4: Enable RLS on audit table
-- ============================================================================
ALTER TABLE scheduler.audit ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.audit FORCE ROW LEVEL SECURITY;
-- Create tenant isolation policy (use function if available)
DO $$
BEGIN
IF EXISTS (
SELECT 1 FROM pg_proc p
JOIN pg_namespace n ON p.pronamespace = n.oid
WHERE n.nspname = 'scheduler_app' AND p.proname = 'require_current_tenant'
) THEN
EXECUTE 'CREATE POLICY audit_tenant_isolation ON scheduler.audit
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant())';
ELSE
RAISE NOTICE 'RLS helper function not found; creating permissive policy';
EXECUTE 'CREATE POLICY audit_tenant_isolation ON scheduler.audit FOR ALL USING (true)';
END IF;
EXCEPTION
WHEN duplicate_object THEN
RAISE NOTICE 'Policy audit_tenant_isolation already exists';
END
$$;
-- ============================================================================
-- Step 5: Add comment about partitioning strategy
-- ============================================================================
COMMENT ON TABLE scheduler.audit IS
'Audit log for scheduler operations. Partitioned monthly by created_at for retention management.';
-- ============================================================================
-- Step 6: Register with partition management (if available)
-- ============================================================================
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM pg_tables WHERE schemaname = 'partition_mgmt' AND tablename = 'managed_tables') THEN
INSERT INTO partition_mgmt.managed_tables (schema_name, table_name, partition_key, partition_type, retention_months, months_ahead)
VALUES ('scheduler', 'audit', 'created_at', 'monthly', 12, 3)
ON CONFLICT (schema_name, table_name) DO UPDATE
SET retention_months = EXCLUDED.retention_months, months_ahead = EXCLUDED.months_ahead;
END IF;
END
$$;
COMMIT;

View File

@@ -0,0 +1,181 @@
-- Scheduler Schema Migration 012b: Migrate Legacy Audit Data to Partitioned Table
-- Sprint: SPRINT_3422_0001_0001 - Time-Based Partitioning
-- Task: 2.3 - Migrate data from existing non-partitioned table (if exists)
-- Category: C (data migration, requires maintenance window)
--
-- IMPORTANT: Only run this if you have an existing non-partitioned scheduler.audit table
-- that needs migration to the new partitioned schema.
--
-- If you're starting fresh (no legacy data), skip this migration entirely.
--
-- Prerequisites:
-- 1. Stop scheduler services (pause all run processing)
-- 2. Run 012_partition_audit.sql first to create the partitioned table
-- 3. Verify partitioned table exists: \d+ scheduler.audit
BEGIN;
-- ============================================================================
-- Step 1: Check if legacy migration is needed
-- ============================================================================
DO $$
DECLARE
v_has_legacy BOOLEAN := FALSE;
v_has_partitioned BOOLEAN := FALSE;
BEGIN
-- Check for legacy non-partitioned table (renamed to audit_legacy or audit_old)
SELECT EXISTS (
SELECT 1 FROM pg_class c
JOIN pg_namespace n ON c.relnamespace = n.oid
WHERE n.nspname = 'scheduler' AND c.relname IN ('audit_legacy', 'audit_old')
) INTO v_has_legacy;
-- Check for partitioned table
SELECT EXISTS (
SELECT 1 FROM pg_class c
JOIN pg_namespace n ON c.relnamespace = n.oid
WHERE n.nspname = 'scheduler' AND c.relname = 'audit'
AND c.relkind = 'p' -- 'p' = partitioned table
) INTO v_has_partitioned;
IF NOT v_has_legacy THEN
RAISE NOTICE 'No legacy audit table found (audit_legacy or audit_old). Skipping migration.';
RETURN;
END IF;
IF NOT v_has_partitioned THEN
RAISE EXCEPTION 'Partitioned scheduler.audit table not found. Run 012_partition_audit.sql first.';
END IF;
RAISE NOTICE 'Legacy audit table found. Proceeding with migration...';
END
$$;
-- ============================================================================
-- Step 2: Record row counts for verification
-- ============================================================================
DO $$
DECLARE
v_source_count BIGINT := 0;
v_source_table TEXT;
BEGIN
-- Find the legacy table
SELECT relname INTO v_source_table
FROM pg_class c
JOIN pg_namespace n ON c.relnamespace = n.oid
WHERE n.nspname = 'scheduler' AND c.relname IN ('audit_legacy', 'audit_old')
LIMIT 1;
IF v_source_table IS NULL THEN
RAISE NOTICE 'No legacy table found. Skipping.';
RETURN;
END IF;
EXECUTE format('SELECT COUNT(*) FROM scheduler.%I', v_source_table) INTO v_source_count;
RAISE NOTICE 'Source table (%) row count: %', v_source_table, v_source_count;
END
$$;
-- ============================================================================
-- Step 3: Migrate data from legacy table to partitioned table
-- ============================================================================
-- Try audit_legacy first, then audit_old
DO $$
DECLARE
v_source_table TEXT;
v_migrated BIGINT;
BEGIN
SELECT relname INTO v_source_table
FROM pg_class c
JOIN pg_namespace n ON c.relnamespace = n.oid
WHERE n.nspname = 'scheduler' AND c.relname IN ('audit_legacy', 'audit_old')
LIMIT 1;
IF v_source_table IS NULL THEN
RAISE NOTICE 'No legacy table to migrate from.';
RETURN;
END IF;
EXECUTE format(
'INSERT INTO scheduler.audit (
id, tenant_id, user_id, action, resource_type, resource_id,
old_value, new_value, correlation_id, created_at
)
SELECT
id, tenant_id, user_id, action, resource_type, resource_id,
old_value, new_value, correlation_id, created_at
FROM scheduler.%I
ON CONFLICT DO NOTHING',
v_source_table
);
GET DIAGNOSTICS v_migrated = ROW_COUNT;
RAISE NOTICE 'Migrated % rows from scheduler.% to scheduler.audit', v_migrated, v_source_table;
END
$$;
-- ============================================================================
-- Step 4: Verify row counts match
-- ============================================================================
DO $$
DECLARE
v_source_count BIGINT := 0;
v_target_count BIGINT;
v_source_table TEXT;
BEGIN
SELECT relname INTO v_source_table
FROM pg_class c
JOIN pg_namespace n ON c.relnamespace = n.oid
WHERE n.nspname = 'scheduler' AND c.relname IN ('audit_legacy', 'audit_old')
LIMIT 1;
IF v_source_table IS NOT NULL THEN
EXECUTE format('SELECT COUNT(*) FROM scheduler.%I', v_source_table) INTO v_source_count;
END IF;
SELECT COUNT(*) INTO v_target_count FROM scheduler.audit;
IF v_source_count > 0 AND v_source_count <> v_target_count THEN
RAISE WARNING 'Row count mismatch: source=% target=%. Check for conflicts.', v_source_count, v_target_count;
ELSE
RAISE NOTICE 'Migration complete: % rows in partitioned table', v_target_count;
END IF;
END
$$;
-- ============================================================================
-- Step 5: Update sequence to continue from max ID
-- ============================================================================
DO $$
DECLARE
v_max_id BIGINT;
BEGIN
SELECT COALESCE(MAX(id), 0) INTO v_max_id FROM scheduler.audit;
IF EXISTS (SELECT 1 FROM pg_sequences WHERE schemaname = 'scheduler' AND sequencename LIKE 'audit%seq') THEN
PERFORM setval(pg_get_serial_sequence('scheduler.audit', 'id'), GREATEST(v_max_id + 1, 1), false);
END IF;
RAISE NOTICE 'Sequence updated to start from %', v_max_id + 1;
END
$$;
-- ============================================================================
-- Step 6: Add migration completion comment
-- ============================================================================
COMMENT ON TABLE scheduler.audit IS
'Audit log for scheduler operations. Partitioned monthly by created_at. Legacy migration completed: ' || NOW()::TEXT;
COMMIT;
-- ============================================================================
-- Cleanup (run manually after validation - wait 24-48h)
-- ============================================================================
-- After confirming the migration is successful, drop the legacy table:
-- DROP TABLE IF EXISTS scheduler.audit_legacy;
-- DROP TABLE IF EXISTS scheduler.audit_old;

View File

@@ -0,0 +1,19 @@
using System.Text.Json;
namespace StellaOps.Scheduler.Persistence.Postgres;
internal static class CanonicalJsonSerializer
{
private static readonly JsonSerializerOptions Options = new(JsonSerializerDefaults.Web)
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
WriteIndented = false
};
public static string Serialize<T>(T value) => JsonSerializer.Serialize(value, Options);
public static T? Deserialize<T>(string json) => JsonSerializer.Deserialize<T>(json, Options);
public static JsonSerializerOptions Settings => Options;
}

View File

@@ -0,0 +1,164 @@
namespace StellaOps.Scheduler.Persistence.Postgres.Models;
/// <summary>
/// Scope type for failure signatures.
/// </summary>
public enum FailureSignatureScopeType
{
/// <summary>Repository scope.</summary>
Repo,
/// <summary>Container image scope.</summary>
Image,
/// <summary>Artifact scope.</summary>
Artifact,
/// <summary>Global scope (all tenants).</summary>
Global
}
/// <summary>
/// Error category for failure classification.
/// </summary>
public enum ErrorCategory
{
/// <summary>Network-related failure.</summary>
Network,
/// <summary>Authentication/authorization failure.</summary>
Auth,
/// <summary>Validation failure.</summary>
Validation,
/// <summary>Resource exhaustion (memory, disk, CPU).</summary>
Resource,
/// <summary>Operation timeout.</summary>
Timeout,
/// <summary>Configuration error.</summary>
Config,
/// <summary>Unknown/uncategorized error.</summary>
Unknown
}
/// <summary>
/// Resolution status for failure signatures.
/// </summary>
public enum ResolutionStatus
{
/// <summary>Issue is not yet resolved.</summary>
Unresolved,
/// <summary>Issue is being investigated.</summary>
Investigating,
/// <summary>Issue has been resolved.</summary>
Resolved,
/// <summary>Issue will not be fixed.</summary>
WontFix
}
/// <summary>
/// Predicted outcome for TTFS hints.
/// </summary>
public enum PredictedOutcome
{
/// <summary>Prediction not available.</summary>
Unknown,
/// <summary>Expected to pass.</summary>
Pass,
/// <summary>Expected to fail.</summary>
Fail,
/// <summary>Expected to be flaky.</summary>
Flaky
}
/// <summary>
/// Represents a failure signature entity for predictive TTFS hints.
/// Tracks common failure patterns by scope, toolchain, and error code.
/// </summary>
public sealed class FailureSignatureEntity
{
/// <summary>
/// Unique signature identifier.
/// </summary>
public Guid SignatureId { get; init; }
/// <summary>
/// Tenant this signature belongs to.
/// </summary>
public required string TenantId { get; init; }
/// <summary>
/// When this signature was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; } = DateTimeOffset.UtcNow;
/// <summary>
/// When this signature was last updated.
/// </summary>
public DateTimeOffset UpdatedAt { get; init; } = DateTimeOffset.UtcNow;
/// <summary>
/// Type of scope for this signature.
/// </summary>
public FailureSignatureScopeType ScopeType { get; init; }
/// <summary>
/// Identifier within the scope (repo name, image digest, etc).
/// </summary>
public required string ScopeId { get; init; }
/// <summary>
/// Hash of the toolchain/build environment.
/// </summary>
public required string ToolchainHash { get; init; }
/// <summary>
/// Error code if available.
/// </summary>
public string? ErrorCode { get; init; }
/// <summary>
/// Category of error.
/// </summary>
public ErrorCategory? ErrorCategory { get; init; }
/// <summary>
/// Number of times this signature has been seen.
/// </summary>
public int OccurrenceCount { get; init; } = 1;
/// <summary>
/// When this signature was first seen.
/// </summary>
public DateTimeOffset FirstSeenAt { get; init; } = DateTimeOffset.UtcNow;
/// <summary>
/// When this signature was last seen.
/// </summary>
public DateTimeOffset LastSeenAt { get; init; } = DateTimeOffset.UtcNow;
/// <summary>
/// Current resolution status.
/// </summary>
public ResolutionStatus ResolutionStatus { get; init; } = ResolutionStatus.Unresolved;
/// <summary>
/// Notes about resolution.
/// </summary>
public string? ResolutionNotes { get; init; }
/// <summary>
/// When the issue was resolved.
/// </summary>
public DateTimeOffset? ResolvedAt { get; init; }
/// <summary>
/// Who resolved the issue.
/// </summary>
public string? ResolvedBy { get; init; }
/// <summary>
/// Predicted outcome based on this signature.
/// </summary>
public PredictedOutcome PredictedOutcome { get; init; } = PredictedOutcome.Unknown;
/// <summary>
/// Confidence score for the prediction (0.0 to 1.0).
/// </summary>
public decimal? ConfidenceScore { get; init; }
}

View File

@@ -0,0 +1,150 @@
namespace StellaOps.Scheduler.Persistence.Postgres.Models;
/// <summary>
/// Job status values matching the PostgreSQL enum.
/// </summary>
public enum JobStatus
{
/// <summary>Job is pending.</summary>
Pending,
/// <summary>Job is scheduled.</summary>
Scheduled,
/// <summary>Job is leased to a worker.</summary>
Leased,
/// <summary>Job is running.</summary>
Running,
/// <summary>Job succeeded.</summary>
Succeeded,
/// <summary>Job failed.</summary>
Failed,
/// <summary>Job was canceled.</summary>
Canceled,
/// <summary>Job timed out.</summary>
TimedOut
}
/// <summary>
/// Represents a job entity in the scheduler schema.
/// </summary>
public sealed class JobEntity
{
/// <summary>
/// Unique job identifier.
/// </summary>
public required Guid Id { get; init; }
/// <summary>
/// Tenant this job belongs to.
/// </summary>
public required string TenantId { get; init; }
/// <summary>
/// Optional project identifier.
/// </summary>
public string? ProjectId { get; init; }
/// <summary>
/// Type of job to execute.
/// </summary>
public required string JobType { get; init; }
/// <summary>
/// Current job status.
/// </summary>
public JobStatus Status { get; init; } = JobStatus.Pending;
/// <summary>
/// Job priority (higher = more important).
/// </summary>
public int Priority { get; init; }
/// <summary>
/// Job payload as JSON.
/// </summary>
public string Payload { get; init; } = "{}";
/// <summary>
/// SHA256 digest of payload for deduplication.
/// </summary>
public required string PayloadDigest { get; init; }
/// <summary>
/// Idempotency key (unique per tenant).
/// </summary>
public required string IdempotencyKey { get; init; }
/// <summary>
/// Correlation ID for tracing.
/// </summary>
public string? CorrelationId { get; init; }
/// <summary>
/// Current attempt number.
/// </summary>
public int Attempt { get; init; }
/// <summary>
/// Maximum number of attempts.
/// </summary>
public int MaxAttempts { get; init; } = 3;
/// <summary>
/// Current lease ID if leased.
/// </summary>
public Guid? LeaseId { get; init; }
/// <summary>
/// Worker ID holding the lease.
/// </summary>
public string? WorkerId { get; init; }
/// <summary>
/// Lease expiration time.
/// </summary>
public DateTimeOffset? LeaseUntil { get; init; }
/// <summary>
/// Don't run before this time.
/// </summary>
public DateTimeOffset? NotBefore { get; init; }
/// <summary>
/// Reason for failure/cancellation.
/// </summary>
public string? Reason { get; init; }
/// <summary>
/// Job result as JSON.
/// </summary>
public string? Result { get; init; }
/// <summary>
/// When the job was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// When the job was scheduled.
/// </summary>
public DateTimeOffset? ScheduledAt { get; init; }
/// <summary>
/// When the job was leased.
/// </summary>
public DateTimeOffset? LeasedAt { get; init; }
/// <summary>
/// When the job started running.
/// </summary>
public DateTimeOffset? StartedAt { get; init; }
/// <summary>
/// When the job completed.
/// </summary>
public DateTimeOffset? CompletedAt { get; init; }
/// <summary>
/// User who created the job.
/// </summary>
public string? CreatedBy { get; init; }
}

View File

@@ -0,0 +1,82 @@
namespace StellaOps.Scheduler.Persistence.Postgres.Models;
/// <summary>
/// Represents a job history entity in the scheduler schema.
/// </summary>
public sealed class JobHistoryEntity
{
/// <summary>
/// Unique history entry identifier.
/// </summary>
public long Id { get; init; }
/// <summary>
/// Original job ID.
/// </summary>
public required Guid JobId { get; init; }
/// <summary>
/// Tenant this job belonged to.
/// </summary>
public required string TenantId { get; init; }
/// <summary>
/// Optional project identifier.
/// </summary>
public string? ProjectId { get; init; }
/// <summary>
/// Type of job that was executed.
/// </summary>
public required string JobType { get; init; }
/// <summary>
/// Final job status.
/// </summary>
public JobStatus Status { get; init; }
/// <summary>
/// Attempt number when archived.
/// </summary>
public int Attempt { get; init; }
/// <summary>
/// SHA256 digest of payload.
/// </summary>
public required string PayloadDigest { get; init; }
/// <summary>
/// Job result as JSON.
/// </summary>
public string? Result { get; init; }
/// <summary>
/// Reason for failure/cancellation.
/// </summary>
public string? Reason { get; init; }
/// <summary>
/// Worker that executed the job.
/// </summary>
public string? WorkerId { get; init; }
/// <summary>
/// Duration in milliseconds.
/// </summary>
public long? DurationMs { get; init; }
/// <summary>
/// When the job was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// When the job completed.
/// </summary>
public DateTimeOffset CompletedAt { get; init; }
/// <summary>
/// When the job was archived to history.
/// </summary>
public DateTimeOffset ArchivedAt { get; init; }
}

View File

@@ -0,0 +1,37 @@
namespace StellaOps.Scheduler.Persistence.Postgres.Models;
/// <summary>
/// Represents a distributed lock entity in the scheduler schema.
/// </summary>
public sealed class LockEntity
{
/// <summary>
/// Lock key (primary key).
/// </summary>
public required string LockKey { get; init; }
/// <summary>
/// Tenant this lock belongs to.
/// </summary>
public required string TenantId { get; init; }
/// <summary>
/// ID of the holder that acquired the lock.
/// </summary>
public required string HolderId { get; init; }
/// <summary>
/// When the lock was acquired.
/// </summary>
public DateTimeOffset AcquiredAt { get; init; }
/// <summary>
/// When the lock expires.
/// </summary>
public DateTimeOffset ExpiresAt { get; init; }
/// <summary>
/// Lock metadata as JSON.
/// </summary>
public string Metadata { get; init; } = "{}";
}

View File

@@ -0,0 +1,77 @@
namespace StellaOps.Scheduler.Persistence.Postgres.Models;
/// <summary>
/// Represents a metrics entity in the scheduler schema.
/// </summary>
public sealed class MetricsEntity
{
/// <summary>
/// Unique metrics entry identifier.
/// </summary>
public long Id { get; init; }
/// <summary>
/// Tenant this metric belongs to.
/// </summary>
public required string TenantId { get; init; }
/// <summary>
/// Job type for this metric.
/// </summary>
public required string JobType { get; init; }
/// <summary>
/// Period start time.
/// </summary>
public DateTimeOffset PeriodStart { get; init; }
/// <summary>
/// Period end time.
/// </summary>
public DateTimeOffset PeriodEnd { get; init; }
/// <summary>
/// Number of jobs created in this period.
/// </summary>
public long JobsCreated { get; init; }
/// <summary>
/// Number of jobs completed in this period.
/// </summary>
public long JobsCompleted { get; init; }
/// <summary>
/// Number of jobs failed in this period.
/// </summary>
public long JobsFailed { get; init; }
/// <summary>
/// Number of jobs timed out in this period.
/// </summary>
public long JobsTimedOut { get; init; }
/// <summary>
/// Average duration in milliseconds.
/// </summary>
public long? AvgDurationMs { get; init; }
/// <summary>
/// 50th percentile duration.
/// </summary>
public long? P50DurationMs { get; init; }
/// <summary>
/// 95th percentile duration.
/// </summary>
public long? P95DurationMs { get; init; }
/// <summary>
/// 99th percentile duration.
/// </summary>
public long? P99DurationMs { get; init; }
/// <summary>
/// When this metric was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
}

View File

@@ -0,0 +1,97 @@
namespace StellaOps.Scheduler.Persistence.Postgres.Models;
/// <summary>
/// Represents a trigger entity in the scheduler schema.
/// </summary>
public sealed class TriggerEntity
{
/// <summary>
/// Unique trigger identifier.
/// </summary>
public required Guid Id { get; init; }
/// <summary>
/// Tenant this trigger belongs to.
/// </summary>
public required string TenantId { get; init; }
/// <summary>
/// Trigger name (unique per tenant).
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Optional description.
/// </summary>
public string? Description { get; init; }
/// <summary>
/// Type of job to create when trigger fires.
/// </summary>
public required string JobType { get; init; }
/// <summary>
/// Job payload as JSON.
/// </summary>
public string JobPayload { get; init; } = "{}";
/// <summary>
/// Cron expression for scheduling.
/// </summary>
public required string CronExpression { get; init; }
/// <summary>
/// Timezone for cron evaluation.
/// </summary>
public string Timezone { get; init; } = "UTC";
/// <summary>
/// Trigger is enabled.
/// </summary>
public bool Enabled { get; init; } = true;
/// <summary>
/// Next scheduled fire time.
/// </summary>
public DateTimeOffset? NextFireAt { get; init; }
/// <summary>
/// Last time the trigger fired.
/// </summary>
public DateTimeOffset? LastFireAt { get; init; }
/// <summary>
/// ID of the last job created by this trigger.
/// </summary>
public Guid? LastJobId { get; init; }
/// <summary>
/// Total number of times the trigger has fired.
/// </summary>
public long FireCount { get; init; }
/// <summary>
/// Number of misfires.
/// </summary>
public int MisfireCount { get; init; }
/// <summary>
/// Trigger metadata as JSON.
/// </summary>
public string Metadata { get; init; } = "{}";
/// <summary>
/// When the trigger was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// When the trigger was last updated.
/// </summary>
public DateTimeOffset UpdatedAt { get; init; }
/// <summary>
/// User who created the trigger.
/// </summary>
public string? CreatedBy { get; init; }
}

View File

@@ -0,0 +1,72 @@
namespace StellaOps.Scheduler.Persistence.Postgres.Models;
/// <summary>
/// Worker status values.
/// </summary>
public static class WorkerStatus
{
public const string Active = "active";
public const string Draining = "draining";
public const string Stopped = "stopped";
}
/// <summary>
/// Represents a worker entity in the scheduler schema.
/// </summary>
public sealed class WorkerEntity
{
/// <summary>
/// Unique worker identifier.
/// </summary>
public required string Id { get; init; }
/// <summary>
/// Optional tenant this worker is dedicated to.
/// </summary>
public string? TenantId { get; init; }
/// <summary>
/// Hostname of the worker.
/// </summary>
public required string Hostname { get; init; }
/// <summary>
/// Process ID of the worker.
/// </summary>
public int? ProcessId { get; init; }
/// <summary>
/// Job types this worker can process.
/// </summary>
public string[] JobTypes { get; init; } = [];
/// <summary>
/// Maximum concurrent jobs this worker can handle.
/// </summary>
public int MaxConcurrentJobs { get; init; } = 1;
/// <summary>
/// Current number of jobs being processed.
/// </summary>
public int CurrentJobs { get; init; }
/// <summary>
/// Worker status.
/// </summary>
public string Status { get; init; } = WorkerStatus.Active;
/// <summary>
/// Last heartbeat timestamp.
/// </summary>
public DateTimeOffset LastHeartbeatAt { get; init; }
/// <summary>
/// When the worker was registered.
/// </summary>
public DateTimeOffset RegisteredAt { get; init; }
/// <summary>
/// Worker metadata as JSON.
/// </summary>
public string Metadata { get; init; } = "{}";
}

View File

@@ -0,0 +1,145 @@
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Infrastructure.Postgres.Repositories;
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// PostgreSQL repository for distributed lock operations.
/// </summary>
public sealed class DistributedLockRepository : RepositoryBase<SchedulerDataSource>, IDistributedLockRepository
{
/// <summary>
/// Creates a new distributed lock repository.
/// </summary>
public DistributedLockRepository(SchedulerDataSource dataSource, ILogger<DistributedLockRepository> logger)
: base(dataSource, logger)
{
}
/// <inheritdoc />
public async Task<bool> TryAcquireAsync(string tenantId, string lockKey, string holderId, TimeSpan duration, CancellationToken cancellationToken = default)
{
const string sql = """
INSERT INTO scheduler.locks (lock_key, tenant_id, holder_id, expires_at)
VALUES (@lock_key, @tenant_id, @holder_id, NOW() + @duration)
ON CONFLICT (lock_key) DO UPDATE SET
holder_id = EXCLUDED.holder_id,
tenant_id = EXCLUDED.tenant_id,
acquired_at = NOW(),
expires_at = NOW() + @duration
WHERE scheduler.locks.expires_at < NOW()
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "lock_key", lockKey);
AddParameter(command, "tenant_id", tenantId);
AddParameter(command, "holder_id", holderId);
AddParameter(command, "duration", duration);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<LockEntity?> GetAsync(string lockKey, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT lock_key, tenant_id, holder_id, acquired_at, expires_at, metadata
FROM scheduler.locks
WHERE lock_key = @lock_key AND expires_at > NOW()
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "lock_key", lockKey);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
return await reader.ReadAsync(cancellationToken).ConfigureAwait(false) ? MapLock(reader) : null;
}
/// <inheritdoc />
public async Task<bool> ExtendAsync(string lockKey, string holderId, TimeSpan extension, CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE scheduler.locks
SET expires_at = expires_at + @extension
WHERE lock_key = @lock_key AND holder_id = @holder_id AND expires_at > NOW()
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "lock_key", lockKey);
AddParameter(command, "holder_id", holderId);
AddParameter(command, "extension", extension);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<bool> ReleaseAsync(string lockKey, string holderId, CancellationToken cancellationToken = default)
{
const string sql = """
DELETE FROM scheduler.locks
WHERE lock_key = @lock_key AND holder_id = @holder_id
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "lock_key", lockKey);
AddParameter(command, "holder_id", holderId);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<int> CleanupExpiredAsync(CancellationToken cancellationToken = default)
{
const string sql = "DELETE FROM scheduler.locks WHERE expires_at < NOW()";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
return await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<LockEntity>> ListByTenantAsync(string tenantId, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT lock_key, tenant_id, holder_id, acquired_at, expires_at, metadata
FROM scheduler.locks
WHERE tenant_id = @tenant_id AND expires_at > NOW()
ORDER BY acquired_at DESC
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
var results = new List<LockEntity>();
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
results.Add(MapLock(reader));
}
return results;
}
private static LockEntity MapLock(NpgsqlDataReader reader) => new()
{
LockKey = reader.GetString(reader.GetOrdinal("lock_key")),
TenantId = reader.GetString(reader.GetOrdinal("tenant_id")),
HolderId = reader.GetString(reader.GetOrdinal("holder_id")),
AcquiredAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("acquired_at")),
ExpiresAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("expires_at")),
Metadata = reader.GetString(reader.GetOrdinal("metadata"))
};
}

View File

@@ -0,0 +1,483 @@
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Infrastructure.Postgres.Repositories;
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// PostgreSQL repository for failure signature operations.
/// </summary>
public sealed class FailureSignatureRepository : RepositoryBase<SchedulerDataSource>, IFailureSignatureRepository
{
/// <summary>
/// Creates a new failure signature repository.
/// </summary>
public FailureSignatureRepository(SchedulerDataSource dataSource, ILogger<FailureSignatureRepository> logger)
: base(dataSource, logger)
{
}
/// <inheritdoc />
public async Task<FailureSignatureEntity> CreateAsync(
FailureSignatureEntity signature,
CancellationToken cancellationToken = default)
{
const string sql = """
INSERT INTO scheduler.failure_signatures (
signature_id, tenant_id, scope_type, scope_id, toolchain_hash,
error_code, error_category, occurrence_count, first_seen_at, last_seen_at,
resolution_status, resolution_notes, predicted_outcome, confidence_score
)
VALUES (
@signature_id, @tenant_id, @scope_type, @scope_id, @toolchain_hash,
@error_code, @error_category, @occurrence_count, @first_seen_at, @last_seen_at,
@resolution_status, @resolution_notes, @predicted_outcome, @confidence_score
)
RETURNING *
""";
await using var connection = await DataSource.OpenConnectionAsync(signature.TenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddSignatureParameters(command, signature);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
return MapSignature(reader);
}
/// <inheritdoc />
public async Task<FailureSignatureEntity?> GetByIdAsync(
string tenantId,
Guid signatureId,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT * FROM scheduler.failure_signatures
WHERE tenant_id = @tenant_id AND signature_id = @signature_id
""";
return await QuerySingleOrDefaultAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "signature_id", signatureId);
},
MapSignature,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<FailureSignatureEntity?> GetByKeyAsync(
string tenantId,
FailureSignatureScopeType scopeType,
string scopeId,
string toolchainHash,
string? errorCode,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT * FROM scheduler.failure_signatures
WHERE tenant_id = @tenant_id
AND scope_type = @scope_type
AND scope_id = @scope_id
AND toolchain_hash = @toolchain_hash
AND (error_code = @error_code OR (@error_code IS NULL AND error_code IS NULL))
""";
return await QuerySingleOrDefaultAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "scope_type", scopeType.ToString().ToLowerInvariant());
AddParameter(cmd, "scope_id", scopeId);
AddParameter(cmd, "toolchain_hash", toolchainHash);
AddParameter(cmd, "error_code", errorCode ?? (object)DBNull.Value);
},
MapSignature,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<FailureSignatureEntity>> GetByScopeAsync(
string tenantId,
FailureSignatureScopeType scopeType,
string scopeId,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT * FROM scheduler.failure_signatures
WHERE tenant_id = @tenant_id
AND scope_type = @scope_type
AND scope_id = @scope_id
ORDER BY last_seen_at DESC
""";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "scope_type", scopeType.ToString().ToLowerInvariant());
AddParameter(cmd, "scope_id", scopeId);
},
MapSignature,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<FailureSignatureEntity>> GetUnresolvedAsync(
string tenantId,
int limit = 100,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT * FROM scheduler.failure_signatures
WHERE tenant_id = @tenant_id
AND resolution_status = 'unresolved'
ORDER BY occurrence_count DESC, last_seen_at DESC
LIMIT @limit
""";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "limit", limit);
},
MapSignature,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<FailureSignatureEntity>> GetByPredictedOutcomeAsync(
string tenantId,
PredictedOutcome outcome,
decimal minConfidence = 0.5m,
int limit = 100,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT * FROM scheduler.failure_signatures
WHERE tenant_id = @tenant_id
AND predicted_outcome = @predicted_outcome
AND confidence_score >= @min_confidence
ORDER BY confidence_score DESC, last_seen_at DESC
LIMIT @limit
""";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "predicted_outcome", outcome.ToString().ToLowerInvariant());
AddParameter(cmd, "min_confidence", minConfidence);
AddParameter(cmd, "limit", limit);
},
MapSignature,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<FailureSignatureEntity> UpsertOccurrenceAsync(
string tenantId,
FailureSignatureScopeType scopeType,
string scopeId,
string toolchainHash,
string? errorCode,
ErrorCategory? errorCategory,
CancellationToken cancellationToken = default)
{
const string sql = """
INSERT INTO scheduler.failure_signatures (
signature_id, tenant_id, scope_type, scope_id, toolchain_hash,
error_code, error_category, occurrence_count, first_seen_at, last_seen_at
)
VALUES (
gen_random_uuid(), @tenant_id, @scope_type, @scope_id, @toolchain_hash,
@error_code, @error_category, 1, NOW(), NOW()
)
ON CONFLICT (tenant_id, scope_type, scope_id, toolchain_hash, error_code)
DO UPDATE SET
occurrence_count = scheduler.failure_signatures.occurrence_count + 1,
last_seen_at = NOW(),
updated_at = NOW(),
error_category = COALESCE(EXCLUDED.error_category, scheduler.failure_signatures.error_category)
RETURNING *
""";
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
AddParameter(command, "scope_type", scopeType.ToString().ToLowerInvariant());
AddParameter(command, "scope_id", scopeId);
AddParameter(command, "toolchain_hash", toolchainHash);
AddParameter(command, "error_code", errorCode ?? (object)DBNull.Value);
AddParameter(command, "error_category", errorCategory?.ToString().ToLowerInvariant() ?? (object)DBNull.Value);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
return MapSignature(reader);
}
/// <inheritdoc />
public async Task<bool> UpdateResolutionAsync(
string tenantId,
Guid signatureId,
ResolutionStatus status,
string? notes,
string? resolvedBy,
CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE scheduler.failure_signatures
SET resolution_status = @resolution_status,
resolution_notes = @resolution_notes,
resolved_by = @resolved_by,
resolved_at = CASE WHEN @resolution_status = 'resolved' THEN NOW() ELSE resolved_at END,
updated_at = NOW()
WHERE tenant_id = @tenant_id AND signature_id = @signature_id
""";
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
AddParameter(command, "signature_id", signatureId);
AddParameter(command, "resolution_status", status.ToString().ToLowerInvariant());
AddParameter(command, "resolution_notes", notes ?? (object)DBNull.Value);
AddParameter(command, "resolved_by", resolvedBy ?? (object)DBNull.Value);
var rowsAffected = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
return rowsAffected > 0;
}
/// <inheritdoc />
public async Task<bool> UpdatePredictionAsync(
string tenantId,
Guid signatureId,
PredictedOutcome outcome,
decimal confidence,
CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE scheduler.failure_signatures
SET predicted_outcome = @predicted_outcome,
confidence_score = @confidence_score,
updated_at = NOW()
WHERE tenant_id = @tenant_id AND signature_id = @signature_id
""";
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
AddParameter(command, "signature_id", signatureId);
AddParameter(command, "predicted_outcome", outcome.ToString().ToLowerInvariant());
AddParameter(command, "confidence_score", confidence);
var rowsAffected = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
return rowsAffected > 0;
}
/// <inheritdoc />
public async Task<bool> DeleteAsync(
string tenantId,
Guid signatureId,
CancellationToken cancellationToken = default)
{
const string sql = """
DELETE FROM scheduler.failure_signatures
WHERE tenant_id = @tenant_id AND signature_id = @signature_id
""";
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
AddParameter(command, "signature_id", signatureId);
var rowsAffected = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
return rowsAffected > 0;
}
/// <inheritdoc />
public async Task<int> PruneResolvedAsync(
string tenantId,
TimeSpan olderThan,
CancellationToken cancellationToken = default)
{
const string sql = """
DELETE FROM scheduler.failure_signatures
WHERE tenant_id = @tenant_id
AND resolution_status = 'resolved'
AND resolved_at < @cutoff
""";
var cutoff = DateTimeOffset.UtcNow.Subtract(olderThan);
await using var connection = await DataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
AddParameter(command, "cutoff", cutoff);
return await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<FailureSignatureEntity?> GetBestMatchAsync(
string tenantId,
FailureSignatureScopeType scopeType,
string scopeId,
string? toolchainHash = null,
CancellationToken cancellationToken = default)
{
// Query prioritizes:
// 1. Unresolved signatures (most actionable)
// 2. Higher confidence scores
// 3. More recent occurrences
// 4. Higher hit counts
// Optionally filters by toolchain hash for better precision
const string sql = """
SELECT * FROM scheduler.failure_signatures
WHERE tenant_id = @tenant_id
AND scope_type = @scope_type
AND scope_id = @scope_id
AND resolution_status != 'resolved'
AND (@toolchain_hash IS NULL OR toolchain_hash = @toolchain_hash)
ORDER BY
CASE WHEN resolution_status = 'unresolved' THEN 0 ELSE 1 END,
confidence_score DESC NULLS LAST,
last_seen_at DESC,
occurrence_count DESC
LIMIT 1
""";
return await QuerySingleOrDefaultAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "scope_type", scopeType.ToString().ToLowerInvariant());
AddParameter(cmd, "scope_id", scopeId);
AddParameter(cmd, "toolchain_hash", toolchainHash ?? (object)DBNull.Value);
},
MapSignature,
cancellationToken).ConfigureAwait(false);
}
private void AddSignatureParameters(NpgsqlCommand command, FailureSignatureEntity signature)
{
AddParameter(command, "signature_id", signature.SignatureId == Guid.Empty ? Guid.NewGuid() : signature.SignatureId);
AddParameter(command, "tenant_id", signature.TenantId);
AddParameter(command, "scope_type", signature.ScopeType.ToString().ToLowerInvariant());
AddParameter(command, "scope_id", signature.ScopeId);
AddParameter(command, "toolchain_hash", signature.ToolchainHash);
AddParameter(command, "error_code", signature.ErrorCode ?? (object)DBNull.Value);
AddParameter(command, "error_category", signature.ErrorCategory?.ToString().ToLowerInvariant() ?? (object)DBNull.Value);
AddParameter(command, "occurrence_count", signature.OccurrenceCount);
AddParameter(command, "first_seen_at", signature.FirstSeenAt);
AddParameter(command, "last_seen_at", signature.LastSeenAt);
AddParameter(command, "resolution_status", signature.ResolutionStatus.ToString().ToLowerInvariant());
AddParameter(command, "resolution_notes", signature.ResolutionNotes ?? (object)DBNull.Value);
AddParameter(command, "predicted_outcome", signature.PredictedOutcome.ToString().ToLowerInvariant());
AddParameter(command, "confidence_score", signature.ConfidenceScore ?? (object)DBNull.Value);
}
private static FailureSignatureEntity MapSignature(NpgsqlDataReader reader)
{
return new FailureSignatureEntity
{
SignatureId = reader.GetGuid(reader.GetOrdinal("signature_id")),
TenantId = reader.GetString(reader.GetOrdinal("tenant_id")),
CreatedAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("created_at")),
UpdatedAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("updated_at")),
ScopeType = ParseScopeType(reader.GetString(reader.GetOrdinal("scope_type"))),
ScopeId = reader.GetString(reader.GetOrdinal("scope_id")),
ToolchainHash = reader.GetString(reader.GetOrdinal("toolchain_hash")),
ErrorCode = reader.IsDBNull(reader.GetOrdinal("error_code"))
? null
: reader.GetString(reader.GetOrdinal("error_code")),
ErrorCategory = reader.IsDBNull(reader.GetOrdinal("error_category"))
? null
: ParseErrorCategory(reader.GetString(reader.GetOrdinal("error_category"))),
OccurrenceCount = reader.GetInt32(reader.GetOrdinal("occurrence_count")),
FirstSeenAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("first_seen_at")),
LastSeenAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("last_seen_at")),
ResolutionStatus = ParseResolutionStatus(reader.GetString(reader.GetOrdinal("resolution_status"))),
ResolutionNotes = reader.IsDBNull(reader.GetOrdinal("resolution_notes"))
? null
: reader.GetString(reader.GetOrdinal("resolution_notes")),
ResolvedAt = reader.IsDBNull(reader.GetOrdinal("resolved_at"))
? null
: reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("resolved_at")),
ResolvedBy = reader.IsDBNull(reader.GetOrdinal("resolved_by"))
? null
: reader.GetString(reader.GetOrdinal("resolved_by")),
PredictedOutcome = reader.IsDBNull(reader.GetOrdinal("predicted_outcome"))
? PredictedOutcome.Unknown
: ParsePredictedOutcome(reader.GetString(reader.GetOrdinal("predicted_outcome"))),
ConfidenceScore = reader.IsDBNull(reader.GetOrdinal("confidence_score"))
? null
: reader.GetDecimal(reader.GetOrdinal("confidence_score"))
};
}
private static FailureSignatureScopeType ParseScopeType(string value) => value.ToLowerInvariant() switch
{
"repo" => FailureSignatureScopeType.Repo,
"image" => FailureSignatureScopeType.Image,
"artifact" => FailureSignatureScopeType.Artifact,
"global" => FailureSignatureScopeType.Global,
_ => throw new ArgumentException($"Unknown scope type: {value}")
};
private static ErrorCategory ParseErrorCategory(string value) => value.ToLowerInvariant() switch
{
"network" => ErrorCategory.Network,
"auth" => ErrorCategory.Auth,
"validation" => ErrorCategory.Validation,
"resource" => ErrorCategory.Resource,
"timeout" => ErrorCategory.Timeout,
"config" => ErrorCategory.Config,
_ => ErrorCategory.Unknown
};
private static ResolutionStatus ParseResolutionStatus(string value) => value.ToLowerInvariant() switch
{
"unresolved" => ResolutionStatus.Unresolved,
"investigating" => ResolutionStatus.Investigating,
"resolved" => ResolutionStatus.Resolved,
"wont_fix" or "wontfix" => ResolutionStatus.WontFix,
_ => ResolutionStatus.Unresolved
};
private static PredictedOutcome ParsePredictedOutcome(string value) => value.ToLowerInvariant() switch
{
"pass" => PredictedOutcome.Pass,
"fail" => PredictedOutcome.Fail,
"flaky" => PredictedOutcome.Flaky,
_ => PredictedOutcome.Unknown
};
}

View File

@@ -0,0 +1,213 @@
using System.Collections.Generic;
using Dapper;
using Npgsql;
using StellaOps.Infrastructure.Postgres;
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
public sealed class GraphJobRepository : IGraphJobRepository
{
private readonly SchedulerDataSource _dataSource;
public GraphJobRepository(SchedulerDataSource dataSource)
{
_dataSource = dataSource;
}
public async ValueTask InsertAsync(GraphBuildJob job, CancellationToken cancellationToken)
{
const string sql = @"INSERT INTO scheduler.graph_jobs
(id, tenant_id, type, status, payload, created_at, updated_at, correlation_id)
VALUES (@Id, @TenantId, @Type, @Status, @Payload, @CreatedAt, @UpdatedAt, @CorrelationId);";
await using var conn = await _dataSource.OpenConnectionAsync(job.TenantId, cancellationToken).ConfigureAwait(false);
await conn.ExecuteAsync(sql, new
{
job.Id,
job.TenantId,
Type = (short)GraphJobQueryType.Build,
Status = (short)job.Status,
Payload = CanonicalJsonSerializer.Serialize(job),
job.CreatedAt,
UpdatedAt = job.CompletedAt ?? job.CreatedAt,
job.CorrelationId
});
}
public async ValueTask InsertAsync(GraphOverlayJob job, CancellationToken cancellationToken)
{
const string sql = @"INSERT INTO scheduler.graph_jobs
(id, tenant_id, type, status, payload, created_at, updated_at, correlation_id)
VALUES (@Id, @TenantId, @Type, @Status, @Payload, @CreatedAt, @UpdatedAt, @CorrelationId);";
await using var conn = await _dataSource.OpenConnectionAsync(job.TenantId, cancellationToken).ConfigureAwait(false);
await conn.ExecuteAsync(sql, new
{
job.Id,
job.TenantId,
Type = (short)GraphJobQueryType.Overlay,
Status = (short)job.Status,
Payload = CanonicalJsonSerializer.Serialize(job),
job.CreatedAt,
UpdatedAt = job.CompletedAt ?? job.CreatedAt,
job.CorrelationId
});
}
public async ValueTask<GraphBuildJob?> GetBuildJobAsync(string tenantId, string jobId, CancellationToken cancellationToken)
{
const string sql = "SELECT payload FROM scheduler.graph_jobs WHERE tenant_id=@TenantId AND id=@Id AND type=@Type LIMIT 1";
await using var conn = await _dataSource.OpenConnectionAsync(tenantId, cancellationToken).ConfigureAwait(false);
var payload = await conn.ExecuteScalarAsync<string?>(sql, new { TenantId = tenantId, Id = jobId, Type = (short)GraphJobQueryType.Build });
return payload is null ? null : CanonicalJsonSerializer.Deserialize<GraphBuildJob>(payload);
}
public async ValueTask<GraphOverlayJob?> GetOverlayJobAsync(string tenantId, string jobId, CancellationToken cancellationToken)
{
const string sql = "SELECT payload FROM scheduler.graph_jobs WHERE tenant_id=@TenantId AND id=@Id AND type=@Type LIMIT 1";
await using var conn = await _dataSource.OpenConnectionAsync(tenantId, cancellationToken).ConfigureAwait(false);
var payload = await conn.ExecuteScalarAsync<string?>(sql, new { TenantId = tenantId, Id = jobId, Type = (short)GraphJobQueryType.Overlay });
return payload is null ? null : CanonicalJsonSerializer.Deserialize<GraphOverlayJob>(payload);
}
public async ValueTask<IReadOnlyCollection<GraphBuildJob>> ListBuildJobsAsync(string tenantId, GraphJobStatus? status, int limit, CancellationToken cancellationToken)
{
var sql = "SELECT payload FROM scheduler.graph_jobs WHERE tenant_id=@TenantId AND type=@Type";
if (status is not null)
{
sql += " AND status=@Status";
}
sql += " ORDER BY created_at DESC LIMIT @Limit";
await using var conn = await _dataSource.OpenConnectionAsync(tenantId, cancellationToken).ConfigureAwait(false);
var rows = await conn.QueryAsync<string>(sql, new
{
TenantId = tenantId,
Type = (short)GraphJobQueryType.Build,
Status = (short?)status,
Limit = limit
});
return rows
.Select(r => CanonicalJsonSerializer.Deserialize<GraphBuildJob>(r))
.Where(r => r is not null)!
.ToArray()!;
}
public async ValueTask<IReadOnlyCollection<GraphOverlayJob>> ListOverlayJobsAsync(string tenantId, GraphJobStatus? status, int limit, CancellationToken cancellationToken)
{
var sql = "SELECT payload FROM scheduler.graph_jobs WHERE tenant_id=@TenantId AND type=@Type";
if (status is not null)
{
sql += " AND status=@Status";
}
sql += " ORDER BY created_at DESC LIMIT @Limit";
await using var conn = await _dataSource.OpenConnectionAsync(tenantId, cancellationToken).ConfigureAwait(false);
var rows = await conn.QueryAsync<string>(sql, new
{
TenantId = tenantId,
Type = (short)GraphJobQueryType.Overlay,
Status = (short?)status,
Limit = limit
});
return rows
.Select(r => CanonicalJsonSerializer.Deserialize<GraphOverlayJob>(r))
.Where(r => r is not null)!
.ToArray()!;
}
public ValueTask<IReadOnlyCollection<GraphOverlayJob>> ListOverlayJobsAsync(string tenantId, CancellationToken cancellationToken)
=> ListOverlayJobsAsync(tenantId, status: null, limit: 50, cancellationToken);
// Cross-tenant overloads for background services - scans all tenants
public async ValueTask<IReadOnlyCollection<GraphBuildJob>> ListBuildJobsAsync(GraphJobStatus? status, int limit, CancellationToken cancellationToken)
{
var sql = "SELECT payload FROM scheduler.graph_jobs WHERE type=@Type";
if (status is not null)
{
sql += " AND status=@Status";
}
sql += " ORDER BY created_at LIMIT @Limit";
await using var conn = await _dataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
var results = await conn.QueryAsync<string>(sql, new
{
Type = (short)GraphJobQueryType.Build,
Status = status is not null ? (short)status : (short?)null,
Limit = limit
});
return results
.Select(r => CanonicalJsonSerializer.Deserialize<GraphBuildJob>(r))
.Where(r => r is not null)!
.ToArray()!;
}
public async ValueTask<IReadOnlyCollection<GraphOverlayJob>> ListOverlayJobsAsync(GraphJobStatus? status, int limit, CancellationToken cancellationToken)
{
var sql = "SELECT payload FROM scheduler.graph_jobs WHERE type=@Type";
if (status is not null)
{
sql += " AND status=@Status";
}
sql += " ORDER BY created_at LIMIT @Limit";
await using var conn = await _dataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
var results = await conn.QueryAsync<string>(sql, new
{
Type = (short)GraphJobQueryType.Overlay,
Status = status is not null ? (short)status : (short?)null,
Limit = limit
});
return results
.Select(r => CanonicalJsonSerializer.Deserialize<GraphOverlayJob>(r))
.Where(r => r is not null)!
.ToArray()!;
}
public async ValueTask<bool> TryReplaceAsync(GraphBuildJob job, GraphJobStatus expectedStatus, CancellationToken cancellationToken)
{
const string sql = @"UPDATE scheduler.graph_jobs
SET status=@NewStatus, payload=@Payload, updated_at=NOW()
WHERE tenant_id=@TenantId AND id=@Id AND status=@ExpectedStatus AND type=@Type";
await using var conn = await _dataSource.OpenConnectionAsync(job.TenantId, cancellationToken).ConfigureAwait(false);
var rows = await conn.ExecuteAsync(sql, new
{
job.TenantId,
job.Id,
ExpectedStatus = (short)expectedStatus,
NewStatus = (short)job.Status,
Type = (short)GraphJobQueryType.Build,
Payload = CanonicalJsonSerializer.Serialize(job)
});
return rows == 1;
}
public async ValueTask<bool> TryReplaceOverlayAsync(GraphOverlayJob job, GraphJobStatus expectedStatus, CancellationToken cancellationToken)
{
const string sql = @"UPDATE scheduler.graph_jobs
SET status=@NewStatus, payload=@Payload, updated_at=NOW()
WHERE tenant_id=@TenantId AND id=@Id AND status=@ExpectedStatus AND type=@Type";
await using var conn = await _dataSource.OpenConnectionAsync(job.TenantId, cancellationToken).ConfigureAwait(false);
var rows = await conn.ExecuteAsync(sql, new
{
job.TenantId,
job.Id,
ExpectedStatus = (short)expectedStatus,
NewStatus = (short)job.Status,
Type = (short)GraphJobQueryType.Overlay,
Payload = CanonicalJsonSerializer.Serialize(job)
});
return rows == 1;
}
}
internal enum GraphJobQueryType : short
{
Build = 0,
Overlay = 1
}

View File

@@ -0,0 +1,39 @@
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// Repository interface for distributed lock operations.
/// </summary>
public interface IDistributedLockRepository
{
/// <summary>
/// Tries to acquire a lock.
/// </summary>
Task<bool> TryAcquireAsync(string tenantId, string lockKey, string holderId, TimeSpan duration, CancellationToken cancellationToken = default);
/// <summary>
/// Gets a lock by key.
/// </summary>
Task<LockEntity?> GetAsync(string lockKey, CancellationToken cancellationToken = default);
/// <summary>
/// Extends a lock.
/// </summary>
Task<bool> ExtendAsync(string lockKey, string holderId, TimeSpan extension, CancellationToken cancellationToken = default);
/// <summary>
/// Releases a lock.
/// </summary>
Task<bool> ReleaseAsync(string lockKey, string holderId, CancellationToken cancellationToken = default);
/// <summary>
/// Cleans up expired locks.
/// </summary>
Task<int> CleanupExpiredAsync(CancellationToken cancellationToken = default);
/// <summary>
/// Lists all locks for a tenant.
/// </summary>
Task<IReadOnlyList<LockEntity>> ListByTenantAsync(string tenantId, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,124 @@
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// Repository interface for failure signature operations.
/// </summary>
public interface IFailureSignatureRepository
{
/// <summary>
/// Creates a new failure signature.
/// </summary>
Task<FailureSignatureEntity> CreateAsync(
FailureSignatureEntity signature,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets a failure signature by ID.
/// </summary>
Task<FailureSignatureEntity?> GetByIdAsync(
string tenantId,
Guid signatureId,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets a failure signature by its unique key (scope + toolchain + error code).
/// </summary>
Task<FailureSignatureEntity?> GetByKeyAsync(
string tenantId,
FailureSignatureScopeType scopeType,
string scopeId,
string toolchainHash,
string? errorCode,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets all failure signatures for a scope.
/// </summary>
Task<IReadOnlyList<FailureSignatureEntity>> GetByScopeAsync(
string tenantId,
FailureSignatureScopeType scopeType,
string scopeId,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets all unresolved failure signatures for a tenant.
/// </summary>
Task<IReadOnlyList<FailureSignatureEntity>> GetUnresolvedAsync(
string tenantId,
int limit = 100,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets failure signatures matching a predicted outcome.
/// </summary>
Task<IReadOnlyList<FailureSignatureEntity>> GetByPredictedOutcomeAsync(
string tenantId,
PredictedOutcome outcome,
decimal minConfidence = 0.5m,
int limit = 100,
CancellationToken cancellationToken = default);
/// <summary>
/// Increments the occurrence count and updates last seen timestamp.
/// Creates the signature if it doesn't exist (upsert).
/// </summary>
Task<FailureSignatureEntity> UpsertOccurrenceAsync(
string tenantId,
FailureSignatureScopeType scopeType,
string scopeId,
string toolchainHash,
string? errorCode,
ErrorCategory? errorCategory,
CancellationToken cancellationToken = default);
/// <summary>
/// Updates the resolution status of a signature.
/// </summary>
Task<bool> UpdateResolutionAsync(
string tenantId,
Guid signatureId,
ResolutionStatus status,
string? notes,
string? resolvedBy,
CancellationToken cancellationToken = default);
/// <summary>
/// Updates the predicted outcome for a signature.
/// </summary>
Task<bool> UpdatePredictionAsync(
string tenantId,
Guid signatureId,
PredictedOutcome outcome,
decimal confidence,
CancellationToken cancellationToken = default);
/// <summary>
/// Deletes a failure signature.
/// </summary>
Task<bool> DeleteAsync(
string tenantId,
Guid signatureId,
CancellationToken cancellationToken = default);
/// <summary>
/// Prunes old resolved signatures.
/// </summary>
Task<int> PruneResolvedAsync(
string tenantId,
TimeSpan olderThan,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets the best matching signature for a given scope.
/// Returns the highest confidence, most recent match.
/// Used by FirstSignal for LastKnownOutcome prediction.
/// </summary>
Task<FailureSignatureEntity?> GetBestMatchAsync(
string tenantId,
FailureSignatureScopeType scopeType,
string scopeId,
string? toolchainHash = null,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,26 @@
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
public interface IGraphJobRepository
{
ValueTask InsertAsync(GraphBuildJob job, CancellationToken cancellationToken);
ValueTask InsertAsync(GraphOverlayJob job, CancellationToken cancellationToken);
ValueTask<bool> TryReplaceAsync(GraphBuildJob job, GraphJobStatus expectedStatus, CancellationToken cancellationToken);
ValueTask<bool> TryReplaceOverlayAsync(GraphOverlayJob job, GraphJobStatus expectedStatus, CancellationToken cancellationToken);
ValueTask<GraphBuildJob?> GetBuildJobAsync(string tenantId, string jobId, CancellationToken cancellationToken);
ValueTask<GraphOverlayJob?> GetOverlayJobAsync(string tenantId, string jobId, CancellationToken cancellationToken);
ValueTask<IReadOnlyCollection<GraphBuildJob>> ListBuildJobsAsync(string tenantId, GraphJobStatus? status, int limit, CancellationToken cancellationToken);
ValueTask<IReadOnlyCollection<GraphOverlayJob>> ListOverlayJobsAsync(string tenantId, GraphJobStatus? status, int limit, CancellationToken cancellationToken);
ValueTask<IReadOnlyCollection<GraphOverlayJob>> ListOverlayJobsAsync(string tenantId, CancellationToken cancellationToken);
// Cross-tenant overloads for background services
ValueTask<IReadOnlyCollection<GraphBuildJob>> ListBuildJobsAsync(GraphJobStatus? status, int limit, CancellationToken cancellationToken);
ValueTask<IReadOnlyCollection<GraphOverlayJob>> ListOverlayJobsAsync(GraphJobStatus? status, int limit, CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,9 @@
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
public interface IImpactSnapshotRepository
{
Task UpsertAsync(ImpactSet snapshot, CancellationToken cancellationToken = default);
Task<ImpactSet?> GetBySnapshotIdAsync(string snapshotId, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,66 @@
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// Repository interface for job history operations.
/// </summary>
public interface IJobHistoryRepository
{
/// <summary>
/// Archives a completed job.
/// </summary>
Task<JobHistoryEntity> ArchiveAsync(JobHistoryEntity history, CancellationToken cancellationToken = default);
/// <summary>
/// Gets history for a specific job.
/// </summary>
Task<IReadOnlyList<JobHistoryEntity>> GetByJobIdAsync(Guid jobId, CancellationToken cancellationToken = default);
/// <summary>
/// Lists job history for a tenant.
/// </summary>
Task<IReadOnlyList<JobHistoryEntity>> ListAsync(
string tenantId,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default);
/// <summary>
/// Lists job history by type.
/// </summary>
Task<IReadOnlyList<JobHistoryEntity>> ListByJobTypeAsync(
string tenantId,
string jobType,
int limit = 100,
CancellationToken cancellationToken = default);
/// <summary>
/// Lists job history by status.
/// </summary>
Task<IReadOnlyList<JobHistoryEntity>> ListByStatusAsync(
string tenantId,
JobStatus status,
int limit = 100,
CancellationToken cancellationToken = default);
/// <summary>
/// Lists job history in a time range.
/// </summary>
Task<IReadOnlyList<JobHistoryEntity>> ListByTimeRangeAsync(
string tenantId,
DateTimeOffset from,
DateTimeOffset to,
int limit = 1000,
CancellationToken cancellationToken = default);
/// <summary>
/// Deletes old history entries.
/// </summary>
Task<int> DeleteOlderThanAsync(DateTimeOffset cutoff, CancellationToken cancellationToken = default);
/// <summary>
/// Gets recent failed jobs across all tenants for background indexing.
/// </summary>
Task<IReadOnlyList<JobHistoryEntity>> GetRecentFailedAsync(int limit, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,101 @@
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// Repository interface for job operations.
/// </summary>
public interface IJobRepository
{
/// <summary>
/// Creates a new job.
/// </summary>
Task<JobEntity> CreateAsync(JobEntity job, CancellationToken cancellationToken = default);
/// <summary>
/// Gets a job by ID.
/// </summary>
Task<JobEntity?> GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
/// <summary>
/// Gets a job by idempotency key.
/// </summary>
Task<JobEntity?> GetByIdempotencyKeyAsync(string tenantId, string idempotencyKey, CancellationToken cancellationToken = default);
/// <summary>
/// Gets scheduled jobs ready to run.
/// </summary>
Task<IReadOnlyList<JobEntity>> GetScheduledJobsAsync(
string tenantId,
string[] jobTypes,
int limit = 10,
CancellationToken cancellationToken = default);
/// <summary>
/// Attempts to lease a job for processing.
/// Uses SELECT FOR UPDATE SKIP LOCKED for distributed locking.
/// </summary>
Task<JobEntity?> TryLeaseJobAsync(
string tenantId,
Guid jobId,
string workerId,
TimeSpan leaseDuration,
CancellationToken cancellationToken = default);
/// <summary>
/// Extends the lease on a job.
/// </summary>
Task<bool> ExtendLeaseAsync(
string tenantId,
Guid jobId,
Guid leaseId,
TimeSpan extension,
CancellationToken cancellationToken = default);
/// <summary>
/// Marks a job as completed successfully.
/// </summary>
Task<bool> CompleteAsync(
string tenantId,
Guid jobId,
Guid leaseId,
string? result = null,
CancellationToken cancellationToken = default);
/// <summary>
/// Marks a job as failed.
/// </summary>
Task<bool> FailAsync(
string tenantId,
Guid jobId,
Guid leaseId,
string reason,
bool retry = true,
CancellationToken cancellationToken = default);
/// <summary>
/// Cancels a job.
/// </summary>
Task<bool> CancelAsync(
string tenantId,
Guid jobId,
string reason,
CancellationToken cancellationToken = default);
/// <summary>
/// Recovers expired leases (for jobs that timed out).
/// </summary>
Task<int> RecoverExpiredLeasesAsync(
string tenantId,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets jobs by status.
/// </summary>
Task<IReadOnlyList<JobEntity>> GetByStatusAsync(
string tenantId,
JobStatus status,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,45 @@
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// Repository interface for metrics operations.
/// </summary>
public interface IMetricsRepository
{
/// <summary>
/// Records or updates metrics for a period.
/// </summary>
Task<MetricsEntity> UpsertAsync(MetricsEntity metrics, CancellationToken cancellationToken = default);
/// <summary>
/// Gets metrics for a tenant and job type.
/// </summary>
Task<IReadOnlyList<MetricsEntity>> GetAsync(
string tenantId,
string jobType,
DateTimeOffset from,
DateTimeOffset to,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets aggregated metrics for a tenant.
/// </summary>
Task<IReadOnlyList<MetricsEntity>> GetByTenantAsync(
string tenantId,
DateTimeOffset from,
DateTimeOffset to,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets latest metrics for all job types.
/// </summary>
Task<IReadOnlyList<MetricsEntity>> GetLatestAsync(
string tenantId,
CancellationToken cancellationToken = default);
/// <summary>
/// Deletes old metrics.
/// </summary>
Task<int> DeleteOlderThanAsync(DateTimeOffset cutoff, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,14 @@
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
public interface IPolicyRunJobRepository
{
Task<PolicyRunJob?> GetAsync(string tenantId, string jobId, CancellationToken cancellationToken = default);
Task<PolicyRunJob?> GetByRunIdAsync(string tenantId, string runId, CancellationToken cancellationToken = default);
Task InsertAsync(PolicyRunJob job, CancellationToken cancellationToken = default);
Task<long> CountAsync(string tenantId, PolicyRunMode mode, IReadOnlyCollection<PolicyRunJobStatus> statuses, CancellationToken cancellationToken = default);
Task<PolicyRunJob?> LeaseAsync(string leaseOwner, DateTimeOffset now, TimeSpan leaseDuration, int maxAttempts, CancellationToken cancellationToken = default);
Task<bool> ReplaceAsync(PolicyRunJob job, string? expectedLeaseOwner = null, CancellationToken cancellationToken = default);
Task<IReadOnlyList<PolicyRunJob>> ListAsync(string tenantId, string? policyId = null, PolicyRunMode? mode = null, IReadOnlyCollection<PolicyRunJobStatus>? statuses = null, DateTimeOffset? queuedAfter = null, int limit = 50, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,12 @@
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
public interface IRunRepository
{
Task InsertAsync(Run run, CancellationToken cancellationToken = default);
Task<bool> UpdateAsync(Run run, CancellationToken cancellationToken = default);
Task<Run?> GetAsync(string tenantId, string runId, CancellationToken cancellationToken = default);
Task<IReadOnlyList<Run>> ListAsync(string tenantId, RunQueryOptions? options = null, CancellationToken cancellationToken = default);
Task<IReadOnlyList<Run>> ListByStateAsync(RunState state, int limit = 50, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,11 @@
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
public interface IScheduleRepository
{
Task UpsertAsync(Schedule schedule, CancellationToken cancellationToken = default);
Task<Schedule?> GetAsync(string tenantId, string scheduleId, CancellationToken cancellationToken = default);
Task<IReadOnlyList<Schedule>> ListAsync(string tenantId, ScheduleQueryOptions? options = null, CancellationToken cancellationToken = default);
Task<bool> SoftDeleteAsync(string tenantId, string scheduleId, string deletedBy, DateTimeOffset deletedAt, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,59 @@
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// Repository interface for trigger operations.
/// </summary>
public interface ITriggerRepository
{
/// <summary>
/// Gets a trigger by ID.
/// </summary>
Task<TriggerEntity?> GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
/// <summary>
/// Gets a trigger by name.
/// </summary>
Task<TriggerEntity?> GetByNameAsync(string tenantId, string name, CancellationToken cancellationToken = default);
/// <summary>
/// Lists all triggers for a tenant.
/// </summary>
Task<IReadOnlyList<TriggerEntity>> ListAsync(string tenantId, CancellationToken cancellationToken = default);
/// <summary>
/// Gets triggers that are due to fire.
/// </summary>
Task<IReadOnlyList<TriggerEntity>> GetDueTriggersAsync(int limit = 100, CancellationToken cancellationToken = default);
/// <summary>
/// Creates a new trigger.
/// </summary>
Task<TriggerEntity> CreateAsync(TriggerEntity trigger, CancellationToken cancellationToken = default);
/// <summary>
/// Updates a trigger.
/// </summary>
Task<bool> UpdateAsync(TriggerEntity trigger, CancellationToken cancellationToken = default);
/// <summary>
/// Records a trigger fire event.
/// </summary>
Task<bool> RecordFireAsync(string tenantId, Guid triggerId, Guid jobId, DateTimeOffset? nextFireAt, CancellationToken cancellationToken = default);
/// <summary>
/// Records a trigger misfire.
/// </summary>
Task<bool> RecordMisfireAsync(string tenantId, Guid triggerId, CancellationToken cancellationToken = default);
/// <summary>
/// Enables or disables a trigger.
/// </summary>
Task<bool> SetEnabledAsync(string tenantId, Guid id, bool enabled, CancellationToken cancellationToken = default);
/// <summary>
/// Deletes a trigger.
/// </summary>
Task<bool> DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,54 @@
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// Repository interface for worker operations.
/// </summary>
public interface IWorkerRepository
{
/// <summary>
/// Gets a worker by ID.
/// </summary>
Task<WorkerEntity?> GetByIdAsync(string id, CancellationToken cancellationToken = default);
/// <summary>
/// Lists all workers.
/// </summary>
Task<IReadOnlyList<WorkerEntity>> ListAsync(CancellationToken cancellationToken = default);
/// <summary>
/// Lists workers by status.
/// </summary>
Task<IReadOnlyList<WorkerEntity>> ListByStatusAsync(string status, CancellationToken cancellationToken = default);
/// <summary>
/// Gets workers for a specific tenant.
/// </summary>
Task<IReadOnlyList<WorkerEntity>> GetByTenantIdAsync(string tenantId, CancellationToken cancellationToken = default);
/// <summary>
/// Registers a new worker or updates existing.
/// </summary>
Task<WorkerEntity> UpsertAsync(WorkerEntity worker, CancellationToken cancellationToken = default);
/// <summary>
/// Updates worker heartbeat.
/// </summary>
Task<bool> HeartbeatAsync(string id, int currentJobs, CancellationToken cancellationToken = default);
/// <summary>
/// Updates worker status.
/// </summary>
Task<bool> SetStatusAsync(string id, string status, CancellationToken cancellationToken = default);
/// <summary>
/// Removes a worker.
/// </summary>
Task<bool> DeleteAsync(string id, CancellationToken cancellationToken = default);
/// <summary>
/// Gets stale workers (no heartbeat in duration).
/// </summary>
Task<IReadOnlyList<WorkerEntity>> GetStaleWorkersAsync(TimeSpan staleDuration, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,53 @@
using System.Text.Json;
using Dapper;
using StellaOps.Scheduler.Models;
using StellaOps.Infrastructure.Postgres.Connections;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
public sealed class ImpactSnapshotRepository : IImpactSnapshotRepository
{
private readonly SchedulerDataSource _dataSource;
private readonly JsonSerializerOptions _serializer = CanonicalJsonSerializer.Settings;
public ImpactSnapshotRepository(SchedulerDataSource dataSource)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
}
public async Task UpsertAsync(ImpactSet snapshot, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(snapshot);
var tenantId = snapshot.Selector?.TenantId ?? string.Empty;
await using var conn = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken);
const string sql = """
INSERT INTO scheduler.impact_snapshots (snapshot_id, tenant_id, impact, created_at)
VALUES (@SnapshotId, @TenantId, @Impact, NOW())
ON CONFLICT (snapshot_id) DO UPDATE SET impact = EXCLUDED.impact;
""";
await conn.ExecuteAsync(sql, new
{
SnapshotId = snapshot.SnapshotId ?? $"impact::{Guid.NewGuid():N}",
TenantId = tenantId,
Impact = JsonSerializer.Serialize(snapshot, _serializer)
});
}
public async Task<ImpactSet?> GetBySnapshotIdAsync(string snapshotId, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(snapshotId);
await using var conn = await _dataSource.OpenSystemConnectionAsync(cancellationToken);
const string sql = """
SELECT impact
FROM scheduler.impact_snapshots
WHERE snapshot_id = @SnapshotId
LIMIT 1;
""";
var json = await conn.ExecuteScalarAsync<string?>(sql, new { SnapshotId = snapshotId });
return json is null ? null : JsonSerializer.Deserialize<ImpactSet>(json, _serializer);
}
}

View File

@@ -0,0 +1,269 @@
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Infrastructure.Postgres.Repositories;
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// PostgreSQL repository for job history operations.
/// </summary>
public sealed class JobHistoryRepository : RepositoryBase<SchedulerDataSource>, IJobHistoryRepository
{
/// <summary>
/// Creates a new job history repository.
/// </summary>
public JobHistoryRepository(SchedulerDataSource dataSource, ILogger<JobHistoryRepository> logger)
: base(dataSource, logger)
{
}
/// <inheritdoc />
public async Task<JobHistoryEntity> ArchiveAsync(JobHistoryEntity history, CancellationToken cancellationToken = default)
{
const string sql = """
INSERT INTO scheduler.job_history (
job_id, tenant_id, project_id, job_type, status, attempt, payload_digest,
result, reason, worker_id, duration_ms, created_at, completed_at
)
VALUES (
@job_id, @tenant_id, @project_id, @job_type, @status::scheduler.job_status, @attempt, @payload_digest,
@result::jsonb, @reason, @worker_id, @duration_ms, @created_at, @completed_at
)
RETURNING *
""";
await using var connection = await DataSource.OpenConnectionAsync(history.TenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "job_id", history.JobId);
AddParameter(command, "tenant_id", history.TenantId);
AddParameter(command, "project_id", history.ProjectId);
AddParameter(command, "job_type", history.JobType);
AddParameter(command, "status", history.Status.ToString().ToLowerInvariant());
AddParameter(command, "attempt", history.Attempt);
AddParameter(command, "payload_digest", history.PayloadDigest);
AddJsonbParameter(command, "result", history.Result);
AddParameter(command, "reason", history.Reason);
AddParameter(command, "worker_id", history.WorkerId);
AddParameter(command, "duration_ms", history.DurationMs);
AddParameter(command, "created_at", history.CreatedAt);
AddParameter(command, "completed_at", history.CompletedAt);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
return MapJobHistory(reader);
}
/// <inheritdoc />
public async Task<IReadOnlyList<JobHistoryEntity>> GetByJobIdAsync(Guid jobId, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, job_id, tenant_id, project_id, job_type, status, attempt, payload_digest,
result, reason, worker_id, duration_ms, created_at, completed_at, archived_at
FROM scheduler.job_history
WHERE job_id = @job_id
ORDER BY archived_at DESC
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "job_id", jobId);
var results = new List<JobHistoryEntity>();
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
results.Add(MapJobHistory(reader));
}
return results;
}
/// <inheritdoc />
public async Task<IReadOnlyList<JobHistoryEntity>> ListAsync(
string tenantId,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, job_id, tenant_id, project_id, job_type, status, attempt, payload_digest,
result, reason, worker_id, duration_ms, created_at, completed_at, archived_at
FROM scheduler.job_history
WHERE tenant_id = @tenant_id
ORDER BY completed_at DESC
LIMIT @limit OFFSET @offset
""";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "limit", limit);
AddParameter(cmd, "offset", offset);
},
MapJobHistory,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<JobHistoryEntity>> ListByJobTypeAsync(
string tenantId,
string jobType,
int limit = 100,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, job_id, tenant_id, project_id, job_type, status, attempt, payload_digest,
result, reason, worker_id, duration_ms, created_at, completed_at, archived_at
FROM scheduler.job_history
WHERE tenant_id = @tenant_id AND job_type = @job_type
ORDER BY completed_at DESC
LIMIT @limit
""";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "job_type", jobType);
AddParameter(cmd, "limit", limit);
},
MapJobHistory,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<JobHistoryEntity>> ListByStatusAsync(
string tenantId,
JobStatus status,
int limit = 100,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, job_id, tenant_id, project_id, job_type, status, attempt, payload_digest,
result, reason, worker_id, duration_ms, created_at, completed_at, archived_at
FROM scheduler.job_history
WHERE tenant_id = @tenant_id AND status = @status::scheduler.job_status
ORDER BY completed_at DESC
LIMIT @limit
""";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "status", status.ToString().ToLowerInvariant());
AddParameter(cmd, "limit", limit);
},
MapJobHistory,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<JobHistoryEntity>> ListByTimeRangeAsync(
string tenantId,
DateTimeOffset from,
DateTimeOffset to,
int limit = 1000,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, job_id, tenant_id, project_id, job_type, status, attempt, payload_digest,
result, reason, worker_id, duration_ms, created_at, completed_at, archived_at
FROM scheduler.job_history
WHERE tenant_id = @tenant_id AND completed_at >= @from AND completed_at < @to
ORDER BY completed_at DESC
LIMIT @limit
""";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "from", from);
AddParameter(cmd, "to", to);
AddParameter(cmd, "limit", limit);
},
MapJobHistory,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<int> DeleteOlderThanAsync(DateTimeOffset cutoff, CancellationToken cancellationToken = default)
{
const string sql = "DELETE FROM scheduler.job_history WHERE archived_at < @cutoff";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "cutoff", cutoff);
return await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<JobHistoryEntity>> GetRecentFailedAsync(int limit, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, job_id, tenant_id, project_id, job_type, status, attempt, payload_digest,
result, reason, worker_id, duration_ms, created_at, completed_at, archived_at
FROM scheduler.job_history
WHERE status = 'failed'::scheduler.job_status OR status = 'timed_out'::scheduler.job_status
ORDER BY completed_at DESC
LIMIT @limit
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "limit", limit);
var results = new List<JobHistoryEntity>();
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
results.Add(MapJobHistory(reader));
}
return results;
}
private static JobHistoryEntity MapJobHistory(NpgsqlDataReader reader) => new()
{
Id = reader.GetInt64(reader.GetOrdinal("id")),
JobId = reader.GetGuid(reader.GetOrdinal("job_id")),
TenantId = reader.GetString(reader.GetOrdinal("tenant_id")),
ProjectId = GetNullableString(reader, reader.GetOrdinal("project_id")),
JobType = reader.GetString(reader.GetOrdinal("job_type")),
Status = ParseJobStatus(reader.GetString(reader.GetOrdinal("status"))),
Attempt = reader.GetInt32(reader.GetOrdinal("attempt")),
PayloadDigest = reader.GetString(reader.GetOrdinal("payload_digest")),
Result = GetNullableString(reader, reader.GetOrdinal("result")),
Reason = GetNullableString(reader, reader.GetOrdinal("reason")),
WorkerId = GetNullableString(reader, reader.GetOrdinal("worker_id")),
DurationMs = reader.IsDBNull(reader.GetOrdinal("duration_ms")) ? null : reader.GetInt64(reader.GetOrdinal("duration_ms")),
CreatedAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("created_at")),
CompletedAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("completed_at")),
ArchivedAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("archived_at"))
};
private static JobStatus ParseJobStatus(string status) => status switch
{
"pending" => JobStatus.Pending,
"scheduled" => JobStatus.Scheduled,
"leased" => JobStatus.Leased,
"running" => JobStatus.Running,
"succeeded" => JobStatus.Succeeded,
"failed" => JobStatus.Failed,
"canceled" => JobStatus.Canceled,
"timed_out" => JobStatus.TimedOut,
_ => throw new ArgumentException($"Unknown job status: {status}", nameof(status))
};
}

View File

@@ -0,0 +1,421 @@
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Infrastructure.Postgres.Repositories;
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// PostgreSQL repository for job operations.
/// </summary>
public sealed class JobRepository : RepositoryBase<SchedulerDataSource>, IJobRepository
{
/// <summary>
/// Creates a new job repository.
/// </summary>
public JobRepository(SchedulerDataSource dataSource, ILogger<JobRepository> logger)
: base(dataSource, logger)
{
}
/// <inheritdoc />
public async Task<JobEntity> CreateAsync(JobEntity job, CancellationToken cancellationToken = default)
{
const string sql = """
INSERT INTO scheduler.jobs (
id, tenant_id, project_id, job_type, status, priority, payload, payload_digest,
idempotency_key, correlation_id, max_attempts, not_before, created_by
)
VALUES (
@id, @tenant_id, @project_id, @job_type, @status::scheduler.job_status, @priority, @payload::jsonb, @payload_digest,
@idempotency_key, @correlation_id, @max_attempts, @not_before, @created_by
)
RETURNING *
""";
await using var connection = await DataSource.OpenConnectionAsync(job.TenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddJobParameters(command, job);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
return MapJob(reader);
}
/// <inheritdoc />
public async Task<JobEntity?> GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT * FROM scheduler.jobs
WHERE tenant_id = @tenant_id AND id = @id
""";
return await QuerySingleOrDefaultAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "id", id);
},
MapJob,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<JobEntity?> GetByIdempotencyKeyAsync(string tenantId, string idempotencyKey, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT * FROM scheduler.jobs
WHERE tenant_id = @tenant_id AND idempotency_key = @idempotency_key
""";
return await QuerySingleOrDefaultAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "idempotency_key", idempotencyKey);
},
MapJob,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<JobEntity>> GetScheduledJobsAsync(
string tenantId,
string[] jobTypes,
int limit = 10,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT * FROM scheduler.jobs
WHERE tenant_id = @tenant_id
AND status = 'scheduled'
AND (not_before IS NULL OR not_before <= NOW())
AND job_type = ANY(@job_types)
ORDER BY priority DESC, created_at
LIMIT @limit
""";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddTextArrayParameter(cmd, "job_types", jobTypes);
AddParameter(cmd, "limit", limit);
},
MapJob,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<JobEntity?> TryLeaseJobAsync(
string tenantId,
Guid jobId,
string workerId,
TimeSpan leaseDuration,
CancellationToken cancellationToken = default)
{
var leaseId = Guid.NewGuid();
var leaseUntil = DateTimeOffset.UtcNow.Add(leaseDuration);
const string sql = """
UPDATE scheduler.jobs
SET status = 'leased'::scheduler.job_status,
lease_id = @lease_id,
worker_id = @worker_id,
lease_until = @lease_until,
leased_at = NOW(),
attempt = attempt + 1
WHERE tenant_id = @tenant_id
AND id = @job_id
AND status = 'scheduled'
AND (not_before IS NULL OR not_before <= NOW())
RETURNING *
""";
return await QuerySingleOrDefaultAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "job_id", jobId);
AddParameter(cmd, "lease_id", leaseId);
AddParameter(cmd, "worker_id", workerId);
AddParameter(cmd, "lease_until", leaseUntil);
},
MapJob,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<bool> ExtendLeaseAsync(
string tenantId,
Guid jobId,
Guid leaseId,
TimeSpan extension,
CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE scheduler.jobs
SET lease_until = lease_until + @extension
WHERE tenant_id = @tenant_id
AND id = @job_id
AND lease_id = @lease_id
AND status = 'leased'
""";
var rows = await ExecuteAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "job_id", jobId);
AddParameter(cmd, "lease_id", leaseId);
AddParameter(cmd, "extension", extension);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<bool> CompleteAsync(
string tenantId,
Guid jobId,
Guid leaseId,
string? result = null,
CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE scheduler.jobs
SET status = 'succeeded'::scheduler.job_status,
result = @result::jsonb,
completed_at = NOW()
WHERE tenant_id = @tenant_id
AND id = @job_id
AND lease_id = @lease_id
AND status IN ('leased', 'running')
""";
var rows = await ExecuteAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "job_id", jobId);
AddParameter(cmd, "lease_id", leaseId);
AddJsonbParameter(cmd, "result", result);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<bool> FailAsync(
string tenantId,
Guid jobId,
Guid leaseId,
string reason,
bool retry = true,
CancellationToken cancellationToken = default)
{
// If retry is allowed and attempts remaining, reschedule; otherwise mark as failed
var sql = retry
? """
UPDATE scheduler.jobs
SET status = CASE
WHEN attempt < max_attempts THEN 'scheduled'::scheduler.job_status
ELSE 'failed'::scheduler.job_status
END,
reason = @reason,
lease_id = NULL,
worker_id = NULL,
lease_until = NULL,
completed_at = CASE WHEN attempt >= max_attempts THEN NOW() ELSE NULL END
WHERE tenant_id = @tenant_id
AND id = @job_id
AND lease_id = @lease_id
"""
: """
UPDATE scheduler.jobs
SET status = 'failed'::scheduler.job_status,
reason = @reason,
completed_at = NOW()
WHERE tenant_id = @tenant_id
AND id = @job_id
AND lease_id = @lease_id
""";
var rows = await ExecuteAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "job_id", jobId);
AddParameter(cmd, "lease_id", leaseId);
AddParameter(cmd, "reason", reason);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<bool> CancelAsync(
string tenantId,
Guid jobId,
string reason,
CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE scheduler.jobs
SET status = 'canceled'::scheduler.job_status,
reason = @reason,
completed_at = NOW()
WHERE tenant_id = @tenant_id
AND id = @job_id
AND status IN ('pending', 'scheduled')
""";
var rows = await ExecuteAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "job_id", jobId);
AddParameter(cmd, "reason", reason);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<int> RecoverExpiredLeasesAsync(
string tenantId,
CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE scheduler.jobs
SET status = CASE
WHEN attempt < max_attempts THEN 'scheduled'::scheduler.job_status
ELSE 'timed_out'::scheduler.job_status
END,
reason = 'Lease expired',
lease_id = NULL,
worker_id = NULL,
lease_until = NULL,
completed_at = CASE WHEN attempt >= max_attempts THEN NOW() ELSE NULL END
WHERE tenant_id = @tenant_id
AND status = 'leased'
AND lease_until < NOW()
""";
return await ExecuteAsync(
tenantId,
sql,
cmd => AddParameter(cmd, "tenant_id", tenantId),
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<JobEntity>> GetByStatusAsync(
string tenantId,
JobStatus status,
int limit = 100,
int offset = 0,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT * FROM scheduler.jobs
WHERE tenant_id = @tenant_id AND status = @status::scheduler.job_status
ORDER BY created_at DESC, id
LIMIT @limit OFFSET @offset
""";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "status", status.ToString().ToLowerInvariant());
AddParameter(cmd, "limit", limit);
AddParameter(cmd, "offset", offset);
},
MapJob,
cancellationToken).ConfigureAwait(false);
}
private static void AddJobParameters(NpgsqlCommand command, JobEntity job)
{
AddParameter(command, "id", job.Id);
AddParameter(command, "tenant_id", job.TenantId);
AddParameter(command, "project_id", job.ProjectId);
AddParameter(command, "job_type", job.JobType);
AddParameter(command, "status", job.Status.ToString().ToLowerInvariant());
AddParameter(command, "priority", job.Priority);
AddJsonbParameter(command, "payload", job.Payload);
AddParameter(command, "payload_digest", job.PayloadDigest);
AddParameter(command, "idempotency_key", job.IdempotencyKey);
AddParameter(command, "correlation_id", job.CorrelationId);
AddParameter(command, "max_attempts", job.MaxAttempts);
AddParameter(command, "not_before", job.NotBefore);
AddParameter(command, "created_by", job.CreatedBy);
}
private static JobEntity MapJob(NpgsqlDataReader reader) => new()
{
Id = reader.GetGuid(reader.GetOrdinal("id")),
TenantId = reader.GetString(reader.GetOrdinal("tenant_id")),
ProjectId = GetNullableString(reader, reader.GetOrdinal("project_id")),
JobType = reader.GetString(reader.GetOrdinal("job_type")),
Status = ParseJobStatus(reader.GetString(reader.GetOrdinal("status"))),
Priority = reader.GetInt32(reader.GetOrdinal("priority")),
Payload = reader.GetString(reader.GetOrdinal("payload")),
PayloadDigest = reader.GetString(reader.GetOrdinal("payload_digest")),
IdempotencyKey = reader.GetString(reader.GetOrdinal("idempotency_key")),
CorrelationId = GetNullableString(reader, reader.GetOrdinal("correlation_id")),
Attempt = reader.GetInt32(reader.GetOrdinal("attempt")),
MaxAttempts = reader.GetInt32(reader.GetOrdinal("max_attempts")),
LeaseId = GetNullableGuid(reader, reader.GetOrdinal("lease_id")),
WorkerId = GetNullableString(reader, reader.GetOrdinal("worker_id")),
LeaseUntil = GetNullableDateTimeOffset(reader, reader.GetOrdinal("lease_until")),
NotBefore = GetNullableDateTimeOffset(reader, reader.GetOrdinal("not_before")),
Reason = GetNullableString(reader, reader.GetOrdinal("reason")),
Result = GetNullableString(reader, reader.GetOrdinal("result")),
CreatedAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("created_at")),
ScheduledAt = GetNullableDateTimeOffset(reader, reader.GetOrdinal("scheduled_at")),
LeasedAt = GetNullableDateTimeOffset(reader, reader.GetOrdinal("leased_at")),
StartedAt = GetNullableDateTimeOffset(reader, reader.GetOrdinal("started_at")),
CompletedAt = GetNullableDateTimeOffset(reader, reader.GetOrdinal("completed_at")),
CreatedBy = GetNullableString(reader, reader.GetOrdinal("created_by"))
};
private static JobStatus ParseJobStatus(string status) => status switch
{
"pending" => JobStatus.Pending,
"scheduled" => JobStatus.Scheduled,
"leased" => JobStatus.Leased,
"running" => JobStatus.Running,
"succeeded" => JobStatus.Succeeded,
"failed" => JobStatus.Failed,
"canceled" => JobStatus.Canceled,
"timed_out" => JobStatus.TimedOut,
_ => throw new ArgumentException($"Unknown job status: {status}", nameof(status))
};
}

View File

@@ -0,0 +1,178 @@
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Infrastructure.Postgres.Repositories;
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// PostgreSQL repository for metrics operations.
/// </summary>
public sealed class MetricsRepository : RepositoryBase<SchedulerDataSource>, IMetricsRepository
{
/// <summary>
/// Creates a new metrics repository.
/// </summary>
public MetricsRepository(SchedulerDataSource dataSource, ILogger<MetricsRepository> logger)
: base(dataSource, logger)
{
}
/// <inheritdoc />
public async Task<MetricsEntity> UpsertAsync(MetricsEntity metrics, CancellationToken cancellationToken = default)
{
const string sql = """
INSERT INTO scheduler.metrics (
tenant_id, job_type, period_start, period_end, jobs_created, jobs_completed,
jobs_failed, jobs_timed_out, avg_duration_ms, p50_duration_ms, p95_duration_ms, p99_duration_ms
)
VALUES (
@tenant_id, @job_type, @period_start, @period_end, @jobs_created, @jobs_completed,
@jobs_failed, @jobs_timed_out, @avg_duration_ms, @p50_duration_ms, @p95_duration_ms, @p99_duration_ms
)
ON CONFLICT (tenant_id, job_type, period_start) DO UPDATE SET
period_end = EXCLUDED.period_end,
jobs_created = EXCLUDED.jobs_created,
jobs_completed = EXCLUDED.jobs_completed,
jobs_failed = EXCLUDED.jobs_failed,
jobs_timed_out = EXCLUDED.jobs_timed_out,
avg_duration_ms = EXCLUDED.avg_duration_ms,
p50_duration_ms = EXCLUDED.p50_duration_ms,
p95_duration_ms = EXCLUDED.p95_duration_ms,
p99_duration_ms = EXCLUDED.p99_duration_ms
RETURNING *
""";
await using var connection = await DataSource.OpenConnectionAsync(metrics.TenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", metrics.TenantId);
AddParameter(command, "job_type", metrics.JobType);
AddParameter(command, "period_start", metrics.PeriodStart);
AddParameter(command, "period_end", metrics.PeriodEnd);
AddParameter(command, "jobs_created", metrics.JobsCreated);
AddParameter(command, "jobs_completed", metrics.JobsCompleted);
AddParameter(command, "jobs_failed", metrics.JobsFailed);
AddParameter(command, "jobs_timed_out", metrics.JobsTimedOut);
AddParameter(command, "avg_duration_ms", metrics.AvgDurationMs);
AddParameter(command, "p50_duration_ms", metrics.P50DurationMs);
AddParameter(command, "p95_duration_ms", metrics.P95DurationMs);
AddParameter(command, "p99_duration_ms", metrics.P99DurationMs);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
return MapMetrics(reader);
}
/// <inheritdoc />
public async Task<IReadOnlyList<MetricsEntity>> GetAsync(
string tenantId,
string jobType,
DateTimeOffset from,
DateTimeOffset to,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, job_type, period_start, period_end, jobs_created, jobs_completed,
jobs_failed, jobs_timed_out, avg_duration_ms, p50_duration_ms, p95_duration_ms, p99_duration_ms, created_at
FROM scheduler.metrics
WHERE tenant_id = @tenant_id AND job_type = @job_type
AND period_start >= @from AND period_start < @to
ORDER BY period_start
""";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "job_type", jobType);
AddParameter(cmd, "from", from);
AddParameter(cmd, "to", to);
},
MapMetrics,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<MetricsEntity>> GetByTenantAsync(
string tenantId,
DateTimeOffset from,
DateTimeOffset to,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, job_type, period_start, period_end, jobs_created, jobs_completed,
jobs_failed, jobs_timed_out, avg_duration_ms, p50_duration_ms, p95_duration_ms, p99_duration_ms, created_at
FROM scheduler.metrics
WHERE tenant_id = @tenant_id AND period_start >= @from AND period_start < @to
ORDER BY period_start, job_type
""";
return await QueryAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "from", from);
AddParameter(cmd, "to", to);
},
MapMetrics,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<MetricsEntity>> GetLatestAsync(
string tenantId,
CancellationToken cancellationToken = default)
{
const string sql = """
SELECT DISTINCT ON (job_type) id, tenant_id, job_type, period_start, period_end,
jobs_created, jobs_completed, jobs_failed, jobs_timed_out,
avg_duration_ms, p50_duration_ms, p95_duration_ms, p99_duration_ms, created_at
FROM scheduler.metrics
WHERE tenant_id = @tenant_id
ORDER BY job_type, period_start DESC
""";
return await QueryAsync(
tenantId,
sql,
cmd => AddParameter(cmd, "tenant_id", tenantId),
MapMetrics,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<int> DeleteOlderThanAsync(DateTimeOffset cutoff, CancellationToken cancellationToken = default)
{
const string sql = "DELETE FROM scheduler.metrics WHERE period_end < @cutoff";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "cutoff", cutoff);
return await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
private static MetricsEntity MapMetrics(NpgsqlDataReader reader) => new()
{
Id = reader.GetInt64(reader.GetOrdinal("id")),
TenantId = reader.GetString(reader.GetOrdinal("tenant_id")),
JobType = reader.GetString(reader.GetOrdinal("job_type")),
PeriodStart = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("period_start")),
PeriodEnd = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("period_end")),
JobsCreated = reader.GetInt64(reader.GetOrdinal("jobs_created")),
JobsCompleted = reader.GetInt64(reader.GetOrdinal("jobs_completed")),
JobsFailed = reader.GetInt64(reader.GetOrdinal("jobs_failed")),
JobsTimedOut = reader.GetInt64(reader.GetOrdinal("jobs_timed_out")),
AvgDurationMs = reader.IsDBNull(reader.GetOrdinal("avg_duration_ms")) ? null : reader.GetInt64(reader.GetOrdinal("avg_duration_ms")),
P50DurationMs = reader.IsDBNull(reader.GetOrdinal("p50_duration_ms")) ? null : reader.GetInt64(reader.GetOrdinal("p50_duration_ms")),
P95DurationMs = reader.IsDBNull(reader.GetOrdinal("p95_duration_ms")) ? null : reader.GetInt64(reader.GetOrdinal("p95_duration_ms")),
P99DurationMs = reader.IsDBNull(reader.GetOrdinal("p99_duration_ms")) ? null : reader.GetInt64(reader.GetOrdinal("p99_duration_ms")),
CreatedAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("created_at"))
};
}

View File

@@ -0,0 +1,258 @@
using System.Collections.Immutable;
using System.Text.Json;
using Dapper;
using StellaOps.Scheduler.Models;
using StellaOps.Infrastructure.Postgres.Connections;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
public sealed class PolicyRunJobRepository : IPolicyRunJobRepository
{
private readonly SchedulerDataSource _dataSource;
private readonly JsonSerializerOptions _serializer = CanonicalJsonSerializer.Settings;
public PolicyRunJobRepository(SchedulerDataSource dataSource)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
}
public async Task<PolicyRunJob?> GetAsync(string tenantId, string jobId, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentException.ThrowIfNullOrWhiteSpace(jobId);
await using var conn = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken);
const string sql = "SELECT * FROM scheduler.policy_run_jobs WHERE tenant_id = @TenantId AND id = @Id LIMIT 1;";
var row = await conn.QuerySingleOrDefaultAsync(sql, new { TenantId = tenantId, Id = jobId });
return row is null ? null : Map(row);
}
public async Task<PolicyRunJob?> GetByRunIdAsync(string tenantId, string runId, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentException.ThrowIfNullOrWhiteSpace(runId);
await using var conn = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken);
const string sql = "SELECT * FROM scheduler.policy_run_jobs WHERE tenant_id = @TenantId AND run_id = @RunId LIMIT 1;";
var row = await conn.QuerySingleOrDefaultAsync(sql, new { TenantId = tenantId, RunId = runId });
return row is null ? null : Map(row);
}
public async Task InsertAsync(PolicyRunJob job, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(job);
await using var conn = await _dataSource.OpenConnectionAsync(job.TenantId, "writer", cancellationToken);
const string sql = """
INSERT INTO scheduler.policy_run_jobs (
id, tenant_id, policy_id, policy_version, mode, priority, priority_rank, run_id, requested_by, correlation_id,
metadata, inputs, queued_at, status, attempt_count, last_attempt_at, last_error,
created_at, updated_at, available_at, submitted_at, completed_at, lease_owner, lease_expires_at,
cancellation_requested, cancellation_requested_at, cancellation_reason, cancelled_at, schema_version)
VALUES (
@Id, @TenantId, @PolicyId, @PolicyVersion, @Mode, @Priority, @PriorityRank, @RunId, @RequestedBy, @CorrelationId,
@Metadata, @Inputs, @QueuedAt, @Status, @AttemptCount, @LastAttemptAt, @LastError,
@CreatedAt, @UpdatedAt, @AvailableAt, @SubmittedAt, @CompletedAt, @LeaseOwner, @LeaseExpiresAt,
@CancellationRequested, @CancellationRequestedAt, @CancellationReason, @CancelledAt, @SchemaVersion)
ON CONFLICT (id) DO NOTHING;
""";
await conn.ExecuteAsync(sql, MapParams(job));
}
public async Task<long> CountAsync(string tenantId, PolicyRunMode mode, IReadOnlyCollection<PolicyRunJobStatus> statuses, CancellationToken cancellationToken = default)
{
await using var conn = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken);
const string sql = """
SELECT COUNT(*) FROM scheduler.policy_run_jobs
WHERE tenant_id = @TenantId AND mode = @Mode AND status = ANY(@Statuses);
""";
return await conn.ExecuteScalarAsync<long>(sql, new
{
TenantId = tenantId,
Mode = mode.ToString().ToLowerInvariant(),
Statuses = statuses.Select(s => s.ToString().ToLowerInvariant()).ToArray()
});
}
public async Task<PolicyRunJob?> LeaseAsync(string leaseOwner, DateTimeOffset now, TimeSpan leaseDuration, int maxAttempts, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(leaseOwner);
await using var conn = await _dataSource.OpenSystemConnectionAsync(cancellationToken);
const string sql = """
WITH candidate AS (
SELECT *
FROM scheduler.policy_run_jobs
WHERE status IN ('pending','retrying')
ORDER BY available_at ASC, priority_rank DESC, created_at ASC
FOR UPDATE SKIP LOCKED
LIMIT 1
)
UPDATE scheduler.policy_run_jobs j
SET lease_owner = @LeaseOwner,
lease_expires_at = @LeaseExpires,
attempt_count = j.attempt_count + 1,
last_attempt_at = @Now,
status = CASE WHEN j.status = 'pending' THEN 'submitted' ELSE 'retrying' END,
updated_at = @Now
FROM candidate c
WHERE j.id = c.id
AND j.attempt_count < @MaxAttempts
RETURNING j.*;
""";
var row = await conn.QuerySingleOrDefaultAsync(sql, new
{
LeaseOwner = leaseOwner,
LeaseExpires = now.Add(leaseDuration),
Now = now,
MaxAttempts = maxAttempts
});
return row is null ? null : Map(row);
}
public async Task<bool> ReplaceAsync(PolicyRunJob job, string? expectedLeaseOwner = null, CancellationToken cancellationToken = default)
{
await using var conn = await _dataSource.OpenConnectionAsync(job.TenantId, "writer", cancellationToken);
var matchLease = string.IsNullOrWhiteSpace(expectedLeaseOwner)
? ""
: "AND lease_owner = @ExpectedLeaseOwner";
var sql = $"""
UPDATE scheduler.policy_run_jobs
SET policy_version = @PolicyVersion,
status = @Status,
attempt_count = @AttemptCount,
last_attempt_at = @LastAttemptAt,
last_error = @LastError,
available_at = @AvailableAt,
submitted_at = @SubmittedAt,
completed_at = @CompletedAt,
lease_owner = @LeaseOwner,
lease_expires_at = @LeaseExpiresAt,
cancellation_requested = @CancellationRequested,
cancellation_requested_at = @CancellationRequestedAt,
cancellation_reason = @CancellationReason,
cancelled_at = @CancelledAt,
updated_at = @UpdatedAt,
run_id = @RunId
WHERE id = @Id {matchLease};
""";
var affected = await conn.ExecuteAsync(sql, MapParams(job, expectedLeaseOwner));
return affected > 0;
}
public async Task<IReadOnlyList<PolicyRunJob>> ListAsync(
string tenantId,
string? policyId = null,
PolicyRunMode? mode = null,
IReadOnlyCollection<PolicyRunJobStatus>? statuses = null,
DateTimeOffset? queuedAfter = null,
int limit = 50,
CancellationToken cancellationToken = default)
{
await using var conn = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken);
var filters = new List<string> { "tenant_id = @TenantId" };
if (!string.IsNullOrWhiteSpace(policyId)) filters.Add("policy_id = @PolicyId");
if (mode is not null) filters.Add("mode = @Mode");
if (statuses is not null && statuses.Count > 0) filters.Add("status = ANY(@Statuses)");
if (queuedAfter is not null) filters.Add("queued_at > @QueuedAfter");
var sql = $"""
SELECT *
FROM scheduler.policy_run_jobs
WHERE {string.Join(" AND ", filters)}
ORDER BY created_at DESC
LIMIT @Limit;
""";
var rows = await conn.QueryAsync(sql, new
{
TenantId = tenantId,
PolicyId = policyId,
Mode = mode?.ToString().ToLowerInvariant(),
Statuses = statuses?.Select(s => s.ToString().ToLowerInvariant()).ToArray(),
QueuedAfter = queuedAfter,
Limit = limit
});
return rows.Select(Map).ToList();
}
private object MapParams(PolicyRunJob job, string? expectedLeaseOwner = null) => new
{
job.Id,
job.TenantId,
job.PolicyId,
job.PolicyVersion,
Mode = job.Mode.ToString().ToLowerInvariant(),
Priority = (int)job.Priority,
job.PriorityRank,
job.RunId,
job.RequestedBy,
job.CorrelationId,
Metadata = job.Metadata is null ? null : JsonSerializer.Serialize(job.Metadata, _serializer),
Inputs = JsonSerializer.Serialize(job.Inputs, _serializer),
job.QueuedAt,
Status = job.Status.ToString().ToLowerInvariant(),
job.AttemptCount,
job.LastAttemptAt,
job.LastError,
job.CreatedAt,
job.UpdatedAt,
job.AvailableAt,
job.SubmittedAt,
job.CompletedAt,
job.LeaseOwner,
job.LeaseExpiresAt,
job.CancellationRequested,
job.CancellationRequestedAt,
job.CancellationReason,
job.CancelledAt,
job.SchemaVersion,
ExpectedLeaseOwner = expectedLeaseOwner
};
private PolicyRunJob Map(dynamic row)
{
var metadata = row.metadata is null
? null
: JsonSerializer.Deserialize<ImmutableSortedDictionary<string, string>>((string)row.metadata, _serializer);
var inputs = JsonSerializer.Deserialize<PolicyRunInputs>((string)row.inputs, _serializer)!;
return new PolicyRunJob(
(string?)row.schema_version ?? SchedulerSchemaVersions.PolicyRunJob,
(string)row.id,
(string)row.tenant_id,
(string)row.policy_id,
(int?)row.policy_version,
Enum.Parse<PolicyRunMode>((string)row.mode, true),
(PolicyRunPriority)row.priority,
(int)row.priority_rank,
(string?)row.run_id,
(string?)row.requested_by,
(string?)row.correlation_id,
metadata,
inputs,
row.queued_at is null ? null : DateTime.SpecifyKind(row.queued_at, DateTimeKind.Utc),
Enum.Parse<PolicyRunJobStatus>((string)row.status, true),
(int)row.attempt_count,
row.last_attempt_at is null ? null : DateTime.SpecifyKind(row.last_attempt_at, DateTimeKind.Utc),
(string?)row.last_error,
DateTime.SpecifyKind(row.created_at, DateTimeKind.Utc),
DateTime.SpecifyKind(row.updated_at, DateTimeKind.Utc),
DateTime.SpecifyKind(row.available_at, DateTimeKind.Utc),
row.submitted_at is null ? null : DateTime.SpecifyKind(row.submitted_at, DateTimeKind.Utc),
row.completed_at is null ? null : DateTime.SpecifyKind(row.completed_at, DateTimeKind.Utc),
(string?)row.lease_owner,
row.lease_expires_at is null ? null : DateTime.SpecifyKind(row.lease_expires_at, DateTimeKind.Utc),
(bool)row.cancellation_requested,
row.cancellation_requested_at is null ? null : DateTime.SpecifyKind(row.cancellation_requested_at, DateTimeKind.Utc),
(string?)row.cancellation_reason,
row.cancelled_at is null ? null : DateTime.SpecifyKind(row.cancelled_at, DateTimeKind.Utc));
}
}

View File

@@ -0,0 +1,14 @@
using System.Collections.Immutable;
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
public sealed class RunQueryOptions
{
public string? ScheduleId { get; init; }
public ImmutableArray<RunState> States { get; init; } = ImmutableArray<RunState>.Empty;
public DateTimeOffset? CreatedAfter { get; init; }
public RunListCursor? Cursor { get; init; }
public bool SortAscending { get; init; } = false;
public int? Limit { get; init; }
}

View File

@@ -0,0 +1,199 @@
using System.Data;
using System.Text.Json;
using Dapper;
using StellaOps.Infrastructure.Postgres.Options;
using StellaOps.Scheduler.Models;
using StellaOps.Infrastructure.Postgres.Connections;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
public sealed class RunRepository : IRunRepository
{
private readonly SchedulerDataSource _dataSource;
private readonly JsonSerializerOptions _serializer = CanonicalJsonSerializer.Settings;
public RunRepository(SchedulerDataSource dataSource)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
}
public async Task InsertAsync(Run run, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(run);
await using var conn = await _dataSource.OpenConnectionAsync(run.TenantId, "writer", cancellationToken);
const string sql = """
INSERT INTO scheduler.runs (
id, tenant_id, schedule_id, trigger, state, stats, reason, created_at, started_at, finished_at,
error, deltas, retry_of, schema_version)
VALUES (@Id, @TenantId, @ScheduleId, @Trigger, @State, @Stats, @Reason, @CreatedAt, @StartedAt, @FinishedAt,
@Error, @Deltas, @RetryOf, @SchemaVersion)
ON CONFLICT (tenant_id, id) DO NOTHING;
""";
var payload = MapParams(run);
await conn.ExecuteAsync(new CommandDefinition(sql, payload, cancellationToken: cancellationToken));
}
public async Task<bool> UpdateAsync(Run run, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(run);
await using var conn = await _dataSource.OpenConnectionAsync(run.TenantId, "writer", cancellationToken);
const string sql = """
UPDATE scheduler.runs
SET state = @State,
stats = @Stats,
reason = @Reason,
started_at = @StartedAt,
finished_at = @FinishedAt,
error = @Error,
deltas = @Deltas,
retry_of = @RetryOf,
schema_version = @SchemaVersion
WHERE tenant_id = @TenantId AND id = @Id;
""";
var payload = MapParams(run);
var affected = await conn.ExecuteAsync(new CommandDefinition(sql, payload, cancellationToken: cancellationToken));
return affected > 0;
}
public async Task<Run?> GetAsync(string tenantId, string runId, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentException.ThrowIfNullOrWhiteSpace(runId);
await using var conn = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken);
const string sql = """
SELECT *
FROM scheduler.runs
WHERE tenant_id = @TenantId AND id = @RunId
LIMIT 1;
""";
var row = await conn.QuerySingleOrDefaultAsync(sql, new { TenantId = tenantId, RunId = runId });
return row is null ? null : MapRun(row);
}
public async Task<IReadOnlyList<Run>> ListAsync(string tenantId, RunQueryOptions? options = null, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
options ??= new RunQueryOptions();
await using var conn = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken);
var filters = new List<string> { "tenant_id = @TenantId" };
if (!string.IsNullOrWhiteSpace(options.ScheduleId))
{
filters.Add("schedule_id = @ScheduleId");
}
if (!options.States.IsDefaultOrEmpty)
{
filters.Add("state = ANY(@States)");
}
if (options.CreatedAfter is { } after)
{
filters.Add("created_at > @CreatedAfter");
}
if (options.Cursor is { } cursor)
{
filters.Add(options.SortAscending
? "(created_at, id) > (@CursorCreatedAt, @CursorId)"
: "(created_at, id) < (@CursorCreatedAt, @CursorId)");
}
var order = options.SortAscending ? "created_at ASC, id ASC" : "created_at DESC, id DESC";
var limit = options.Limit.GetValueOrDefault(50);
var sql = $"""
SELECT *
FROM scheduler.runs
WHERE {string.Join(" AND ", filters)}
ORDER BY {order}
LIMIT @Limit;
""";
var rows = await conn.QueryAsync(sql, new
{
TenantId = tenantId,
ScheduleId = options.ScheduleId,
States = options.States.Select(s => s.ToString().ToLowerInvariant()).ToArray(),
CreatedAfter = options.CreatedAfter?.UtcDateTime,
CursorCreatedAt = options.Cursor?.CreatedAt.UtcDateTime,
CursorId = options.Cursor?.RunId,
Limit = limit
});
return rows.Select(MapRun).ToList();
}
public async Task<IReadOnlyList<Run>> ListByStateAsync(RunState state, int limit = 50, CancellationToken cancellationToken = default)
{
await using var conn = await _dataSource.OpenSystemConnectionAsync(cancellationToken);
const string sql = """
SELECT *
FROM scheduler.runs
WHERE state = @State
ORDER BY created_at ASC
LIMIT @Limit;
""";
var rows = await conn.QueryAsync(sql, new { State = state.ToString().ToLowerInvariant(), Limit = limit });
return rows.Select(MapRun).ToList();
}
private object MapParams(Run run) => new
{
run.Id,
run.TenantId,
run.ScheduleId,
Trigger = Serialize(run.Trigger),
State = run.State.ToString().ToLowerInvariant(),
Stats = Serialize(run.Stats),
Reason = Serialize(run.Reason),
run.CreatedAt,
run.StartedAt,
run.FinishedAt,
run.Error,
Deltas = Serialize(run.Deltas),
run.RetryOf,
run.SchemaVersion
};
private Run MapRun(dynamic row)
{
var trigger = Deserialize<RunTrigger>(row.trigger);
var state = Enum.Parse<RunState>(row.state, true);
var stats = Deserialize<RunStats>(row.stats);
var reason = Deserialize<RunReason>(row.reason);
var deltas = Deserialize<IEnumerable<DeltaSummary>>(row.deltas) ?? Enumerable.Empty<DeltaSummary>();
return new Run(
(string)row.id,
(string)row.tenant_id,
trigger,
state,
stats,
reason,
(string?)row.schedule_id,
DateTime.SpecifyKind(row.created_at, DateTimeKind.Utc),
row.started_at is null ? null : DateTime.SpecifyKind(row.started_at, DateTimeKind.Utc),
row.finished_at is null ? null : DateTime.SpecifyKind(row.finished_at, DateTimeKind.Utc),
(string?)row.error,
deltas,
(string?)row.retry_of,
(string?)row.schema_version);
}
private string Serialize<T>(T value) =>
JsonSerializer.Serialize(value, _serializer);
private T? Deserialize<T>(string json) =>
JsonSerializer.Deserialize<T>(json, _serializer);
}

View File

@@ -0,0 +1,56 @@
using System.Collections.Concurrent;
using System.Collections.Immutable;
using StellaOps.Scheduler.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
public sealed class RunSummaryService : IRunSummaryService
{
private readonly ConcurrentDictionary<(string TenantId, string ScheduleId), RunSummaryProjection> _projections = new();
public Task<RunSummaryProjection> ProjectAsync(Run run, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(run);
var scheduleId = run.ScheduleId ?? string.Empty;
var updatedAt = run.FinishedAt ?? run.StartedAt ?? run.CreatedAt;
var counters = new RunSummaryCounters(
Total: 1,
Planning: run.State == RunState.Planning ? 1 : 0,
Queued: run.State == RunState.Queued ? 1 : 0,
Running: run.State == RunState.Running ? 1 : 0,
Completed: run.State == RunState.Completed ? 1 : 0,
Error: run.State == RunState.Error ? 1 : 0,
Cancelled: run.State == RunState.Cancelled ? 1 : 0,
TotalDeltas: run.Stats.Deltas,
TotalNewCriticals: run.Stats.NewCriticals,
TotalNewHigh: run.Stats.NewHigh,
TotalNewMedium: run.Stats.NewMedium,
TotalNewLow: run.Stats.NewLow);
var projection = new RunSummaryProjection(
run.TenantId,
scheduleId,
updatedAt,
run.Id,
ImmutableArray<RunSummarySnapshot>.Empty,
counters);
_projections[(run.TenantId, scheduleId)] = projection;
return Task.FromResult(projection);
}
public Task<RunSummaryProjection?> GetAsync(string tenantId, string scheduleId, CancellationToken cancellationToken = default)
{
_projections.TryGetValue((tenantId, scheduleId), out var projection);
return Task.FromResult<RunSummaryProjection?>(projection);
}
public Task<IReadOnlyList<RunSummaryProjection>> ListAsync(string tenantId, CancellationToken cancellationToken = default)
{
var results = _projections.Values
.Where(p => string.Equals(p.TenantId, tenantId, StringComparison.Ordinal))
.ToList();
return Task.FromResult<IReadOnlyList<RunSummaryProjection>>(results);
}
}

View File

@@ -0,0 +1,8 @@
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
public sealed class ScheduleQueryOptions
{
public bool IncludeDisabled { get; init; } = false;
public bool IncludeDeleted { get; init; } = false;
public int? Limit { get; init; }
}

View File

@@ -0,0 +1,160 @@
using System.Text.Json;
using Dapper;
using StellaOps.Scheduler.Models;
using StellaOps.Infrastructure.Postgres.Connections;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
public sealed class ScheduleRepository : IScheduleRepository
{
private readonly SchedulerDataSource _dataSource;
private readonly JsonSerializerOptions _serializer = CanonicalJsonSerializer.Settings;
public ScheduleRepository(SchedulerDataSource dataSource)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
}
public async Task UpsertAsync(Schedule schedule, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(schedule);
await using var conn = await _dataSource.OpenConnectionAsync(schedule.TenantId, "writer", cancellationToken);
const string sql = """
INSERT INTO scheduler.schedules (
id, tenant_id, name, description, enabled, cron_expression, timezone, mode,
selection, only_if, notify, limits, subscribers, created_at, created_by,
updated_at, updated_by, deleted_at, deleted_by, schema_version)
VALUES (
@Id, @TenantId, @Name, @Description, @Enabled, @CronExpression, @Timezone, @Mode,
@Selection, @OnlyIf, @Notify, @Limits, @Subscribers, @CreatedAt, @CreatedBy,
@UpdatedAt, @UpdatedBy, NULL, NULL, @SchemaVersion)
ON CONFLICT (id) DO UPDATE SET
name = EXCLUDED.name,
description = EXCLUDED.description,
enabled = EXCLUDED.enabled,
cron_expression = EXCLUDED.cron_expression,
timezone = EXCLUDED.timezone,
mode = EXCLUDED.mode,
selection = EXCLUDED.selection,
only_if = EXCLUDED.only_if,
notify = EXCLUDED.notify,
limits = EXCLUDED.limits,
subscribers = EXCLUDED.subscribers,
updated_at = EXCLUDED.updated_at,
updated_by = EXCLUDED.updated_by,
schema_version = EXCLUDED.schema_version,
deleted_at = NULL,
deleted_by = NULL;
""";
await conn.ExecuteAsync(sql, new
{
schedule.Id,
schedule.TenantId,
schedule.Name,
Description = (string?)null,
schedule.Enabled,
schedule.CronExpression,
schedule.Timezone,
Mode = schedule.Mode.ToString().ToLowerInvariant(),
Selection = JsonSerializer.Serialize(schedule.Selection, _serializer),
OnlyIf = JsonSerializer.Serialize(schedule.OnlyIf, _serializer),
Notify = JsonSerializer.Serialize(schedule.Notify, _serializer),
Limits = JsonSerializer.Serialize(schedule.Limits, _serializer),
Subscribers = schedule.Subscribers.ToArray(),
schedule.CreatedAt,
schedule.CreatedBy,
schedule.UpdatedAt,
schedule.UpdatedBy,
schedule.SchemaVersion
});
}
public async Task<Schedule?> GetAsync(string tenantId, string scheduleId, CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tenantId);
ArgumentException.ThrowIfNullOrWhiteSpace(scheduleId);
await using var conn = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken);
const string sql = """
SELECT *
FROM scheduler.schedules
WHERE tenant_id = @TenantId AND id = @ScheduleId AND deleted_at IS NULL
LIMIT 1;
""";
var row = await conn.QuerySingleOrDefaultAsync(sql, new { TenantId = tenantId, ScheduleId = scheduleId });
return row is null ? null : Map(row);
}
public async Task<IReadOnlyList<Schedule>> ListAsync(string tenantId, ScheduleQueryOptions? options = null, CancellationToken cancellationToken = default)
{
options ??= new ScheduleQueryOptions();
await using var conn = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken);
var where = options.IncludeDeleted
? "tenant_id = @TenantId"
: "tenant_id = @TenantId AND deleted_at IS NULL";
if (!options.IncludeDisabled)
{
where += " AND enabled = TRUE";
}
var limit = options.Limit.GetValueOrDefault(200);
var sql = $"""
SELECT *
FROM scheduler.schedules
WHERE {where}
ORDER BY name ASC
LIMIT @Limit;
""";
var rows = await conn.QueryAsync(sql, new { TenantId = tenantId, Limit = limit });
return rows.Select(Map).ToList();
}
public async Task<bool> SoftDeleteAsync(string tenantId, string scheduleId, string deletedBy, DateTimeOffset deletedAt, CancellationToken cancellationToken = default)
{
await using var conn = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken);
const string sql = """
UPDATE scheduler.schedules
SET deleted_at = @DeletedAt, deleted_by = @DeletedBy
WHERE tenant_id = @TenantId AND id = @ScheduleId AND deleted_at IS NULL;
""";
var affected = await conn.ExecuteAsync(sql, new
{
TenantId = tenantId,
ScheduleId = scheduleId,
DeletedBy = deletedBy,
DeletedAt = deletedAt
});
return affected > 0;
}
private Schedule Map(dynamic row)
{
return new Schedule(
(string)row.id,
(string)row.tenant_id,
(string)row.name,
(bool)row.enabled,
(string)row.cron_expression,
(string)row.timezone,
Enum.Parse<ScheduleMode>((string)row.mode, true),
JsonSerializer.Deserialize<Selector>((string)row.selection, _serializer)!,
JsonSerializer.Deserialize<ScheduleOnlyIf>((string)row.only_if, _serializer)!,
JsonSerializer.Deserialize<ScheduleNotify>((string)row.notify, _serializer)!,
JsonSerializer.Deserialize<ScheduleLimits>((string)row.limits, _serializer)!,
JsonSerializer.Deserialize<System.Collections.Immutable.ImmutableArray<string>>((string)row.subscribers, _serializer),
DateTime.SpecifyKind(row.created_at, DateTimeKind.Utc),
(string)row.created_by,
DateTime.SpecifyKind(row.updated_at, DateTimeKind.Utc),
(string)row.updated_by,
(string?)row.schema_version);
}
}

View File

@@ -0,0 +1,301 @@
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Infrastructure.Postgres.Repositories;
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// PostgreSQL repository for trigger operations.
/// </summary>
public sealed class TriggerRepository : RepositoryBase<SchedulerDataSource>, ITriggerRepository
{
/// <summary>
/// Creates a new trigger repository.
/// </summary>
public TriggerRepository(SchedulerDataSource dataSource, ILogger<TriggerRepository> logger)
: base(dataSource, logger)
{
}
/// <inheritdoc />
public async Task<TriggerEntity?> GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, name, description, job_type, job_payload, cron_expression, timezone,
enabled, next_fire_at, last_fire_at, last_job_id, fire_count, misfire_count,
metadata, created_at, updated_at, created_by
FROM scheduler.triggers
WHERE tenant_id = @tenant_id AND id = @id
""";
return await QuerySingleOrDefaultAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "id", id);
},
MapTrigger,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<TriggerEntity?> GetByNameAsync(string tenantId, string name, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, name, description, job_type, job_payload, cron_expression, timezone,
enabled, next_fire_at, last_fire_at, last_job_id, fire_count, misfire_count,
metadata, created_at, updated_at, created_by
FROM scheduler.triggers
WHERE tenant_id = @tenant_id AND name = @name
""";
return await QuerySingleOrDefaultAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "name", name);
},
MapTrigger,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<TriggerEntity>> ListAsync(string tenantId, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, name, description, job_type, job_payload, cron_expression, timezone,
enabled, next_fire_at, last_fire_at, last_job_id, fire_count, misfire_count,
metadata, created_at, updated_at, created_by
FROM scheduler.triggers
WHERE tenant_id = @tenant_id
ORDER BY name
""";
return await QueryAsync(
tenantId,
sql,
cmd => AddParameter(cmd, "tenant_id", tenantId),
MapTrigger,
cancellationToken).ConfigureAwait(false);
}
/// <inheritdoc />
public async Task<IReadOnlyList<TriggerEntity>> GetDueTriggersAsync(int limit = 100, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, name, description, job_type, job_payload, cron_expression, timezone,
enabled, next_fire_at, last_fire_at, last_job_id, fire_count, misfire_count,
metadata, created_at, updated_at, created_by
FROM scheduler.triggers
WHERE enabled = TRUE AND next_fire_at <= NOW()
ORDER BY next_fire_at, tenant_id, id
LIMIT @limit
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "limit", limit);
var results = new List<TriggerEntity>();
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
results.Add(MapTrigger(reader));
}
return results;
}
/// <inheritdoc />
public async Task<TriggerEntity> CreateAsync(TriggerEntity trigger, CancellationToken cancellationToken = default)
{
const string sql = """
INSERT INTO scheduler.triggers (
id, tenant_id, name, description, job_type, job_payload, cron_expression, timezone,
enabled, next_fire_at, metadata, created_by
)
VALUES (
@id, @tenant_id, @name, @description, @job_type, @job_payload::jsonb, @cron_expression, @timezone,
@enabled, @next_fire_at, @metadata::jsonb, @created_by
)
RETURNING *
""";
var id = trigger.Id == Guid.Empty ? Guid.NewGuid() : trigger.Id;
await using var connection = await DataSource.OpenConnectionAsync(trigger.TenantId, "writer", cancellationToken)
.ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "id", id);
AddParameter(command, "tenant_id", trigger.TenantId);
AddParameter(command, "name", trigger.Name);
AddParameter(command, "description", trigger.Description);
AddParameter(command, "job_type", trigger.JobType);
AddJsonbParameter(command, "job_payload", trigger.JobPayload);
AddParameter(command, "cron_expression", trigger.CronExpression);
AddParameter(command, "timezone", trigger.Timezone);
AddParameter(command, "enabled", trigger.Enabled);
AddParameter(command, "next_fire_at", trigger.NextFireAt);
AddJsonbParameter(command, "metadata", trigger.Metadata);
AddParameter(command, "created_by", trigger.CreatedBy);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
return MapTrigger(reader);
}
/// <inheritdoc />
public async Task<bool> UpdateAsync(TriggerEntity trigger, CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE scheduler.triggers
SET name = @name,
description = @description,
job_type = @job_type,
job_payload = @job_payload::jsonb,
cron_expression = @cron_expression,
timezone = @timezone,
enabled = @enabled,
next_fire_at = @next_fire_at,
metadata = @metadata::jsonb
WHERE tenant_id = @tenant_id AND id = @id
""";
var rows = await ExecuteAsync(
trigger.TenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", trigger.TenantId);
AddParameter(cmd, "id", trigger.Id);
AddParameter(cmd, "name", trigger.Name);
AddParameter(cmd, "description", trigger.Description);
AddParameter(cmd, "job_type", trigger.JobType);
AddJsonbParameter(cmd, "job_payload", trigger.JobPayload);
AddParameter(cmd, "cron_expression", trigger.CronExpression);
AddParameter(cmd, "timezone", trigger.Timezone);
AddParameter(cmd, "enabled", trigger.Enabled);
AddParameter(cmd, "next_fire_at", trigger.NextFireAt);
AddJsonbParameter(cmd, "metadata", trigger.Metadata);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<bool> RecordFireAsync(string tenantId, Guid triggerId, Guid jobId, DateTimeOffset? nextFireAt, CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE scheduler.triggers
SET last_fire_at = NOW(),
last_job_id = @job_id,
next_fire_at = @next_fire_at,
fire_count = fire_count + 1
WHERE tenant_id = @tenant_id AND id = @id
""";
var rows = await ExecuteAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "id", triggerId);
AddParameter(cmd, "job_id", jobId);
AddParameter(cmd, "next_fire_at", nextFireAt);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<bool> RecordMisfireAsync(string tenantId, Guid triggerId, CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE scheduler.triggers
SET misfire_count = misfire_count + 1
WHERE tenant_id = @tenant_id AND id = @id
""";
var rows = await ExecuteAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "id", triggerId);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<bool> SetEnabledAsync(string tenantId, Guid id, bool enabled, CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE scheduler.triggers
SET enabled = @enabled
WHERE tenant_id = @tenant_id AND id = @id
""";
var rows = await ExecuteAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "id", id);
AddParameter(cmd, "enabled", enabled);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<bool> DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default)
{
const string sql = "DELETE FROM scheduler.triggers WHERE tenant_id = @tenant_id AND id = @id";
var rows = await ExecuteAsync(
tenantId,
sql,
cmd =>
{
AddParameter(cmd, "tenant_id", tenantId);
AddParameter(cmd, "id", id);
},
cancellationToken).ConfigureAwait(false);
return rows > 0;
}
private static TriggerEntity MapTrigger(NpgsqlDataReader reader) => new()
{
Id = reader.GetGuid(reader.GetOrdinal("id")),
TenantId = reader.GetString(reader.GetOrdinal("tenant_id")),
Name = reader.GetString(reader.GetOrdinal("name")),
Description = GetNullableString(reader, reader.GetOrdinal("description")),
JobType = reader.GetString(reader.GetOrdinal("job_type")),
JobPayload = reader.GetString(reader.GetOrdinal("job_payload")),
CronExpression = reader.GetString(reader.GetOrdinal("cron_expression")),
Timezone = reader.GetString(reader.GetOrdinal("timezone")),
Enabled = reader.GetBoolean(reader.GetOrdinal("enabled")),
NextFireAt = GetNullableDateTimeOffset(reader, reader.GetOrdinal("next_fire_at")),
LastFireAt = GetNullableDateTimeOffset(reader, reader.GetOrdinal("last_fire_at")),
LastJobId = GetNullableGuid(reader, reader.GetOrdinal("last_job_id")),
FireCount = reader.GetInt64(reader.GetOrdinal("fire_count")),
MisfireCount = reader.GetInt32(reader.GetOrdinal("misfire_count")),
Metadata = reader.GetString(reader.GetOrdinal("metadata")),
CreatedAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("created_at")),
UpdatedAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("updated_at")),
CreatedBy = GetNullableString(reader, reader.GetOrdinal("created_by"))
};
}

View File

@@ -0,0 +1,230 @@
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.Infrastructure.Postgres.Repositories;
using StellaOps.Scheduler.Persistence.Postgres.Models;
namespace StellaOps.Scheduler.Persistence.Postgres.Repositories;
/// <summary>
/// PostgreSQL repository for worker operations.
/// </summary>
public sealed class WorkerRepository : RepositoryBase<SchedulerDataSource>, IWorkerRepository
{
/// <summary>
/// Creates a new worker repository.
/// </summary>
public WorkerRepository(SchedulerDataSource dataSource, ILogger<WorkerRepository> logger)
: base(dataSource, logger)
{
}
/// <inheritdoc />
public async Task<WorkerEntity?> GetByIdAsync(string id, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, hostname, process_id, job_types, max_concurrent_jobs, current_jobs,
status, last_heartbeat_at, registered_at, metadata
FROM scheduler.workers
WHERE id = @id
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "id", id);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
return await reader.ReadAsync(cancellationToken).ConfigureAwait(false) ? MapWorker(reader) : null;
}
/// <inheritdoc />
public async Task<IReadOnlyList<WorkerEntity>> ListAsync(CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, hostname, process_id, job_types, max_concurrent_jobs, current_jobs,
status, last_heartbeat_at, registered_at, metadata
FROM scheduler.workers
ORDER BY registered_at DESC
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
var results = new List<WorkerEntity>();
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
results.Add(MapWorker(reader));
}
return results;
}
/// <inheritdoc />
public async Task<IReadOnlyList<WorkerEntity>> ListByStatusAsync(string status, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, hostname, process_id, job_types, max_concurrent_jobs, current_jobs,
status, last_heartbeat_at, registered_at, metadata
FROM scheduler.workers
WHERE status = @status
ORDER BY registered_at DESC
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "status", status);
var results = new List<WorkerEntity>();
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
results.Add(MapWorker(reader));
}
return results;
}
/// <inheritdoc />
public async Task<IReadOnlyList<WorkerEntity>> GetByTenantIdAsync(string tenantId, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, hostname, process_id, job_types, max_concurrent_jobs, current_jobs,
status, last_heartbeat_at, registered_at, metadata
FROM scheduler.workers
WHERE tenant_id = @tenant_id OR tenant_id IS NULL
ORDER BY registered_at DESC
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "tenant_id", tenantId);
var results = new List<WorkerEntity>();
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
results.Add(MapWorker(reader));
}
return results;
}
/// <inheritdoc />
public async Task<WorkerEntity> UpsertAsync(WorkerEntity worker, CancellationToken cancellationToken = default)
{
const string sql = """
INSERT INTO scheduler.workers (id, tenant_id, hostname, process_id, job_types, max_concurrent_jobs, metadata)
VALUES (@id, @tenant_id, @hostname, @process_id, @job_types, @max_concurrent_jobs, @metadata::jsonb)
ON CONFLICT (id) DO UPDATE SET
tenant_id = EXCLUDED.tenant_id,
hostname = EXCLUDED.hostname,
process_id = EXCLUDED.process_id,
job_types = EXCLUDED.job_types,
max_concurrent_jobs = EXCLUDED.max_concurrent_jobs,
metadata = EXCLUDED.metadata,
last_heartbeat_at = NOW(),
status = 'active'
RETURNING *
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "id", worker.Id);
AddParameter(command, "tenant_id", worker.TenantId);
AddParameter(command, "hostname", worker.Hostname);
AddParameter(command, "process_id", worker.ProcessId);
AddTextArrayParameter(command, "job_types", worker.JobTypes);
AddParameter(command, "max_concurrent_jobs", worker.MaxConcurrentJobs);
AddJsonbParameter(command, "metadata", worker.Metadata);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
await reader.ReadAsync(cancellationToken).ConfigureAwait(false);
return MapWorker(reader);
}
/// <inheritdoc />
public async Task<bool> HeartbeatAsync(string id, int currentJobs, CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE scheduler.workers
SET last_heartbeat_at = NOW(), current_jobs = @current_jobs
WHERE id = @id
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "id", id);
AddParameter(command, "current_jobs", currentJobs);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<bool> SetStatusAsync(string id, string status, CancellationToken cancellationToken = default)
{
const string sql = """
UPDATE scheduler.workers
SET status = @status
WHERE id = @id
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "id", id);
AddParameter(command, "status", status);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<bool> DeleteAsync(string id, CancellationToken cancellationToken = default)
{
const string sql = "DELETE FROM scheduler.workers WHERE id = @id";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "id", id);
var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<IReadOnlyList<WorkerEntity>> GetStaleWorkersAsync(TimeSpan staleDuration, CancellationToken cancellationToken = default)
{
const string sql = """
SELECT id, tenant_id, hostname, process_id, job_types, max_concurrent_jobs, current_jobs,
status, last_heartbeat_at, registered_at, metadata
FROM scheduler.workers
WHERE status = 'active' AND last_heartbeat_at < NOW() - @stale_duration
ORDER BY last_heartbeat_at
""";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "stale_duration", staleDuration);
var results = new List<WorkerEntity>();
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
results.Add(MapWorker(reader));
}
return results;
}
private static WorkerEntity MapWorker(NpgsqlDataReader reader) => new()
{
Id = reader.GetString(reader.GetOrdinal("id")),
TenantId = GetNullableString(reader, reader.GetOrdinal("tenant_id")),
Hostname = reader.GetString(reader.GetOrdinal("hostname")),
ProcessId = reader.IsDBNull(reader.GetOrdinal("process_id")) ? null : reader.GetInt32(reader.GetOrdinal("process_id")),
JobTypes = reader.IsDBNull(reader.GetOrdinal("job_types")) ? [] : reader.GetFieldValue<string[]>(reader.GetOrdinal("job_types")),
MaxConcurrentJobs = reader.GetInt32(reader.GetOrdinal("max_concurrent_jobs")),
CurrentJobs = reader.GetInt32(reader.GetOrdinal("current_jobs")),
Status = reader.GetString(reader.GetOrdinal("status")),
LastHeartbeatAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("last_heartbeat_at")),
RegisteredAt = reader.GetFieldValue<DateTimeOffset>(reader.GetOrdinal("registered_at")),
Metadata = reader.GetString(reader.GetOrdinal("metadata"))
};
}

View File

@@ -0,0 +1,38 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Infrastructure.Postgres.Connections;
using StellaOps.Infrastructure.Postgres.Options;
namespace StellaOps.Scheduler.Persistence.Postgres;
/// <summary>
/// PostgreSQL data source for the Scheduler module.
/// Manages connections with tenant context for job scheduling and queue management.
/// </summary>
public sealed class SchedulerDataSource : DataSourceBase
{
/// <summary>
/// Default schema name for Scheduler tables.
/// </summary>
public const string DefaultSchemaName = "scheduler";
/// <summary>
/// Creates a new Scheduler data source.
/// </summary>
public SchedulerDataSource(IOptions<PostgresOptions> options, ILogger<SchedulerDataSource> logger)
: base(CreateOptions(options.Value), logger)
{
}
/// <inheritdoc />
protected override string ModuleName => "Scheduler";
private static PostgresOptions CreateOptions(PostgresOptions baseOptions)
{
if (string.IsNullOrWhiteSpace(baseOptions.SchemaName))
{
baseOptions.SchemaName = DefaultSchemaName;
}
return baseOptions;
}
}

View File

@@ -0,0 +1,36 @@
<?xml version="1.0" ?>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<TreatWarningsAsErrors>false</TreatWarningsAsErrors>
<RootNamespace>StellaOps.Scheduler.Persistence</RootNamespace>
<AssemblyName>StellaOps.Scheduler.Persistence</AssemblyName>
<Description>Consolidated persistence layer for StellaOps Scheduler module (EF Core + Raw SQL)</Description>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Dapper" />
<PackageReference Include="Microsoft.EntityFrameworkCore" />
<PackageReference Include="Microsoft.EntityFrameworkCore.Design" PrivateAssets="all" />
<PackageReference Include="Npgsql" />
<PackageReference Include="Npgsql.EntityFrameworkCore.PostgreSQL" />
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Configuration.Abstractions" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Scheduler.Models\StellaOps.Scheduler.Models.csproj" />
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.EfCore\StellaOps.Infrastructure.EfCore.csproj" />
</ItemGroup>
<!-- Embed SQL migrations as resources -->
<ItemGroup>
<EmbeddedResource Include="Migrations\**\*.sql" />
</ItemGroup>
</Project>