This commit is contained in:
StellaOps Bot
2025-12-14 23:20:14 +02:00
parent 3411e825cd
commit b058dbe031
356 changed files with 68310 additions and 1108 deletions

View File

@@ -0,0 +1,68 @@
-- Scheduler Schema Migration 010: Generated Columns for Runs Stats
-- Sprint: SPRINT_3423_0001_0001 - Generated Columns for JSONB Hot Keys
-- Category: A (safe, can run at startup)
--
-- Purpose: Extract frequently-queried fields from stats JSONB as generated columns
-- to enable efficient B-tree indexing and accurate query planning statistics.
BEGIN;
-- Add generated columns for hot stats fields
-- These are computed automatically when stats JSONB is updated
ALTER TABLE scheduler.runs
ADD COLUMN IF NOT EXISTS finding_count INT
GENERATED ALWAYS AS (NULLIF((stats->>'findingCount'), '')::int) STORED;
ALTER TABLE scheduler.runs
ADD COLUMN IF NOT EXISTS critical_count INT
GENERATED ALWAYS AS (NULLIF((stats->>'criticalCount'), '')::int) STORED;
ALTER TABLE scheduler.runs
ADD COLUMN IF NOT EXISTS high_count INT
GENERATED ALWAYS AS (NULLIF((stats->>'highCount'), '')::int) STORED;
ALTER TABLE scheduler.runs
ADD COLUMN IF NOT EXISTS new_finding_count INT
GENERATED ALWAYS AS (NULLIF((stats->>'newFindingCount'), '')::int) STORED;
ALTER TABLE scheduler.runs
ADD COLUMN IF NOT EXISTS component_count INT
GENERATED ALWAYS AS (NULLIF((stats->>'componentCount'), '')::int) STORED;
-- Index for dashboard query: runs with findings
CREATE INDEX CONCURRENTLY IF NOT EXISTS ix_runs_with_findings
ON scheduler.runs (tenant_id, created_at DESC)
WHERE finding_count > 0;
-- Index for critical findings alerting
CREATE INDEX CONCURRENTLY IF NOT EXISTS ix_runs_critical
ON scheduler.runs (tenant_id, created_at DESC, critical_count)
WHERE critical_count > 0;
-- Covering index for run summary dashboard
-- Enables index-only scans for common dashboard queries
CREATE INDEX CONCURRENTLY IF NOT EXISTS ix_runs_summary_cover
ON scheduler.runs (tenant_id, state, created_at DESC)
INCLUDE (finding_count, critical_count, high_count, new_finding_count);
-- Index for trend analysis queries
CREATE INDEX CONCURRENTLY IF NOT EXISTS ix_runs_tenant_findings
ON scheduler.runs (tenant_id, finding_count DESC, created_at DESC)
WHERE state = 'completed';
-- Update statistics for query planner
ANALYZE scheduler.runs;
COMMIT;
-- Verification query (run manually to confirm):
-- SELECT
-- id,
-- stats->>'findingCount' as json_finding_count,
-- finding_count as generated_finding_count,
-- stats->>'criticalCount' as json_critical_count,
-- critical_count as generated_critical_count
-- FROM scheduler.runs
-- WHERE stats != '{}'
-- LIMIT 10;

View File

@@ -0,0 +1,237 @@
-- Scheduler Schema Migration 011: Row-Level Security
-- Sprint: SPRINT_3421_0001_0001 - RLS Expansion
-- Category: B (release migration, requires coordination)
--
-- Purpose: Enable Row-Level Security on all tenant-scoped tables in the scheduler
-- schema to provide database-level tenant isolation as defense-in-depth.
BEGIN;
-- ============================================================================
-- Step 1: Create helper schema and function for tenant context
-- ============================================================================
CREATE SCHEMA IF NOT EXISTS scheduler_app;
-- Tenant context helper function
-- SECURITY DEFINER ensures the function runs with owner privileges
-- to access the session variable even when RLS restricts the caller
CREATE OR REPLACE FUNCTION scheduler_app.require_current_tenant()
RETURNS TEXT
LANGUAGE plpgsql STABLE SECURITY DEFINER
AS $$
DECLARE
v_tenant TEXT;
BEGIN
v_tenant := current_setting('app.tenant_id', true);
IF v_tenant IS NULL OR v_tenant = '' THEN
RAISE EXCEPTION 'app.tenant_id session variable not set'
USING HINT = 'Set via: SELECT set_config(''app.tenant_id'', ''<tenant>'', false)',
ERRCODE = 'P0001';
END IF;
RETURN v_tenant;
END;
$$;
-- Restrict function execution to application role only
REVOKE ALL ON FUNCTION scheduler_app.require_current_tenant() FROM PUBLIC;
-- ============================================================================
-- Step 2: Enable RLS on tables with direct tenant_id column
-- ============================================================================
-- scheduler.schedules
ALTER TABLE scheduler.schedules ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.schedules FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS schedules_tenant_isolation ON scheduler.schedules;
CREATE POLICY schedules_tenant_isolation ON scheduler.schedules
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.runs
ALTER TABLE scheduler.runs ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.runs FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS runs_tenant_isolation ON scheduler.runs;
CREATE POLICY runs_tenant_isolation ON scheduler.runs
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.jobs
ALTER TABLE scheduler.jobs ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.jobs FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS jobs_tenant_isolation ON scheduler.jobs;
CREATE POLICY jobs_tenant_isolation ON scheduler.jobs
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.triggers
ALTER TABLE scheduler.triggers ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.triggers FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS triggers_tenant_isolation ON scheduler.triggers;
CREATE POLICY triggers_tenant_isolation ON scheduler.triggers
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.graph_jobs
ALTER TABLE scheduler.graph_jobs ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.graph_jobs FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS graph_jobs_tenant_isolation ON scheduler.graph_jobs;
CREATE POLICY graph_jobs_tenant_isolation ON scheduler.graph_jobs
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.policy_jobs
ALTER TABLE scheduler.policy_jobs ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.policy_jobs FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS policy_jobs_tenant_isolation ON scheduler.policy_jobs;
CREATE POLICY policy_jobs_tenant_isolation ON scheduler.policy_jobs
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.locks
ALTER TABLE scheduler.locks ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.locks FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS locks_tenant_isolation ON scheduler.locks;
CREATE POLICY locks_tenant_isolation ON scheduler.locks
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.impact_snapshots
ALTER TABLE scheduler.impact_snapshots ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.impact_snapshots FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS impact_snapshots_tenant_isolation ON scheduler.impact_snapshots;
CREATE POLICY impact_snapshots_tenant_isolation ON scheduler.impact_snapshots
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.run_summaries
ALTER TABLE scheduler.run_summaries ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.run_summaries FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS run_summaries_tenant_isolation ON scheduler.run_summaries;
CREATE POLICY run_summaries_tenant_isolation ON scheduler.run_summaries
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.audit
ALTER TABLE scheduler.audit ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.audit FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS audit_tenant_isolation ON scheduler.audit;
CREATE POLICY audit_tenant_isolation ON scheduler.audit
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.job_history
ALTER TABLE scheduler.job_history ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.job_history FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS job_history_tenant_isolation ON scheduler.job_history;
CREATE POLICY job_history_tenant_isolation ON scheduler.job_history
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- scheduler.metrics
ALTER TABLE scheduler.metrics ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.metrics FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS metrics_tenant_isolation ON scheduler.metrics;
CREATE POLICY metrics_tenant_isolation ON scheduler.metrics
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- ============================================================================
-- Step 3: FK-based RLS for child tables (inherit tenant from parent)
-- ============================================================================
-- scheduler.execution_logs inherits tenant from scheduler.runs
ALTER TABLE scheduler.execution_logs ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.execution_logs FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS execution_logs_tenant_isolation ON scheduler.execution_logs;
CREATE POLICY execution_logs_tenant_isolation ON scheduler.execution_logs
FOR ALL
USING (
run_id IN (
SELECT id FROM scheduler.runs
WHERE tenant_id = scheduler_app.require_current_tenant()
)
);
-- ============================================================================
-- Step 4: Skip RLS for global tables (no tenant_id)
-- ============================================================================
-- scheduler.workers is intentionally NOT RLS-protected
-- Workers are global resources shared across tenants
-- The tenant_id column in workers is optional and for affinity only
COMMENT ON TABLE scheduler.workers IS
'Global worker registry. Not RLS-protected - workers serve all tenants.';
-- ============================================================================
-- Step 5: Create admin bypass role
-- ============================================================================
-- Create role for admin operations (migrations, cross-tenant queries)
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'scheduler_admin') THEN
CREATE ROLE scheduler_admin WITH NOLOGIN BYPASSRLS;
END IF;
END
$$;
-- Grant bypass role to admin users
-- GRANT scheduler_admin TO stellaops_admin;
-- GRANT scheduler_admin TO stellaops_migration;
COMMIT;
-- ============================================================================
-- Verification queries (run manually)
-- ============================================================================
-- Check RLS status on all scheduler tables:
/*
SELECT
schemaname,
tablename,
rowsecurity AS rls_enabled,
forcerowsecurity AS rls_forced
FROM pg_tables
WHERE schemaname = 'scheduler'
ORDER BY tablename;
*/
-- List all RLS policies:
/*
SELECT
schemaname,
tablename,
policyname,
permissive,
roles,
cmd,
qual,
with_check
FROM pg_policies
WHERE schemaname = 'scheduler'
ORDER BY tablename, policyname;
*/
-- Test RLS enforcement:
/*
-- Should fail: no tenant set
SELECT * FROM scheduler.runs LIMIT 1;
-- Should work: tenant set
SELECT set_config('app.tenant_id', 'test-tenant', false);
SELECT * FROM scheduler.runs LIMIT 1;
*/

View File

@@ -0,0 +1,159 @@
-- Scheduler Schema Migration 012: Partition Audit Table
-- Sprint: SPRINT_3422_0001_0001 - Time-Based Partitioning
-- Category: C (infrastructure change, requires maintenance window)
--
-- Purpose: Convert scheduler.audit to a partitioned table for improved
-- query performance on time-range queries and easier data lifecycle management.
--
-- IMPORTANT: This migration requires a maintenance window. It will:
-- 1. Create a new partitioned table
-- 2. Migrate existing data
-- 3. Rename tables to swap
--
-- Partition strategy: Monthly by created_at
BEGIN;
-- ============================================================================
-- Step 1: Create partitioned audit table
-- ============================================================================
CREATE TABLE IF NOT EXISTS scheduler.audit_partitioned (
id BIGSERIAL,
tenant_id TEXT NOT NULL,
user_id UUID,
action TEXT NOT NULL,
resource_type TEXT NOT NULL,
resource_id TEXT,
old_value JSONB,
new_value JSONB,
correlation_id TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
PRIMARY KEY (id, created_at)
) PARTITION BY RANGE (created_at);
-- ============================================================================
-- Step 2: Create initial partitions (past 6 months + 3 months ahead)
-- ============================================================================
-- Create partitions for historical data and future
DO $$
DECLARE
v_start DATE;
v_end DATE;
v_partition_name TEXT;
BEGIN
-- Start from 6 months ago
v_start := date_trunc('month', NOW() - INTERVAL '6 months')::DATE;
-- Create partitions until 3 months ahead
WHILE v_start <= date_trunc('month', NOW() + INTERVAL '3 months')::DATE LOOP
v_end := (v_start + INTERVAL '1 month')::DATE;
v_partition_name := 'audit_' || to_char(v_start, 'YYYY_MM');
IF NOT EXISTS (
SELECT 1 FROM pg_class c
JOIN pg_namespace n ON c.relnamespace = n.oid
WHERE n.nspname = 'scheduler' AND c.relname = v_partition_name
) THEN
EXECUTE format(
'CREATE TABLE scheduler.%I PARTITION OF scheduler.audit_partitioned
FOR VALUES FROM (%L) TO (%L)',
v_partition_name, v_start, v_end
);
RAISE NOTICE 'Created partition scheduler.%', v_partition_name;
END IF;
v_start := v_end;
END LOOP;
END
$$;
-- Create default partition for any data outside defined ranges
CREATE TABLE IF NOT EXISTS scheduler.audit_default
PARTITION OF scheduler.audit_partitioned DEFAULT;
-- ============================================================================
-- Step 3: Create indexes on partitioned table
-- ============================================================================
CREATE INDEX IF NOT EXISTS ix_audit_part_tenant
ON scheduler.audit_partitioned (tenant_id);
CREATE INDEX IF NOT EXISTS ix_audit_part_resource
ON scheduler.audit_partitioned (resource_type, resource_id);
CREATE INDEX IF NOT EXISTS ix_audit_part_correlation
ON scheduler.audit_partitioned (correlation_id)
WHERE correlation_id IS NOT NULL;
-- BRIN index for time-range queries (very efficient for time-series data)
CREATE INDEX IF NOT EXISTS brin_audit_part_created
ON scheduler.audit_partitioned USING BRIN (created_at)
WITH (pages_per_range = 128);
-- ============================================================================
-- Step 4: Migrate data from old table to partitioned table
-- ============================================================================
-- Note: This uses INSERT ... SELECT which is efficient for bulk operations
-- For very large tables, consider batched migration in a separate script
INSERT INTO scheduler.audit_partitioned (
id, tenant_id, user_id, action, resource_type, resource_id,
old_value, new_value, correlation_id, created_at
)
SELECT
id, tenant_id, user_id, action, resource_type, resource_id,
old_value, new_value, correlation_id, created_at
FROM scheduler.audit
ON CONFLICT DO NOTHING;
-- ============================================================================
-- Step 5: Swap tables
-- ============================================================================
-- Rename old table to backup
ALTER TABLE IF EXISTS scheduler.audit RENAME TO audit_old;
-- Rename partitioned table to production name
ALTER TABLE scheduler.audit_partitioned RENAME TO audit;
-- Update sequence to continue from max ID
DO $$
DECLARE
v_max_id BIGINT;
BEGIN
SELECT COALESCE(MAX(id), 0) INTO v_max_id FROM scheduler.audit;
PERFORM setval('scheduler.audit_id_seq', v_max_id + 1, false);
END
$$;
-- ============================================================================
-- Step 6: Re-enable RLS on new partitioned table
-- ============================================================================
ALTER TABLE scheduler.audit ENABLE ROW LEVEL SECURITY;
ALTER TABLE scheduler.audit FORCE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS audit_tenant_isolation ON scheduler.audit;
CREATE POLICY audit_tenant_isolation ON scheduler.audit
FOR ALL
USING (tenant_id = scheduler_app.require_current_tenant())
WITH CHECK (tenant_id = scheduler_app.require_current_tenant());
-- ============================================================================
-- Step 7: Add comment about partitioning strategy
-- ============================================================================
COMMENT ON TABLE scheduler.audit IS
'Audit log for scheduler operations. Partitioned monthly by created_at for retention management.';
COMMIT;
-- ============================================================================
-- Cleanup (run manually after validation)
-- ============================================================================
-- After confirming the migration is successful, drop the old table:
-- DROP TABLE IF EXISTS scheduler.audit_old;