-- ============================================================================ -- StellaOps EPSS v4 Integration Schema Migration -- ============================================================================ -- Database: concelier -- Schema Version: epss-v1 -- Created: 2025-12-17 -- Sprint: SPRINT_3410_0001_0001_epss_ingestion_storage -- -- Purpose: -- EPSS (Exploit Prediction Scoring System) v4 daily ingestion and storage. -- Provides time-series EPSS scores (0.0-1.0 probability) and percentiles -- for CVE vulnerability prioritization alongside CVSS v4. -- -- Architecture: -- - Append-only time-series (epss_scores) partitioned by month -- - Latest projection (epss_current) for fast lookups -- - Delta tracking (epss_changes) for enrichment targeting -- - Provenance (epss_import_runs) for audit trail -- -- Data Source: -- FIRST.org daily CSV: https://epss.empiricalsecurity.com/epss_scores-YYYY-MM-DD.csv.gz -- ~300k CVEs, ~15MB compressed, published daily ~00:00 UTC -- ============================================================================ BEGIN; -- ============================================================================ -- 1. EPSS Import Runs (Provenance) -- ============================================================================ -- Tracks each EPSS data import with full provenance for deterministic replay CREATE TABLE IF NOT EXISTS concelier.epss_import_runs ( -- Identity import_run_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), -- Temporal model_date DATE NOT NULL, -- EPSS model scoring date (YYYY-MM-DD) retrieved_at TIMESTAMPTZ NOT NULL, -- When we fetched/imported created_at TIMESTAMPTZ NOT NULL DEFAULT now(), -- Source Provenance source_uri TEXT NOT NULL, -- URL or "bundle://path/to/file.csv.gz" source_type TEXT NOT NULL DEFAULT 'online' CHECK (source_type IN ('online', 'bundle', 'backfill')), -- File Integrity file_sha256 TEXT NOT NULL, -- SHA-256 of compressed file decompressed_sha256 TEXT NULL, -- SHA-256 of decompressed CSV (optional) row_count INT NOT NULL CHECK (row_count >= 0), -- EPSS Model Metadata (from CSV comment line: "# model: v2025.03.14, published: 2025-03-14") model_version_tag TEXT NULL, -- e.g., "v2025.03.14" published_date DATE NULL, -- Date FIRST published this model -- Status status TEXT NOT NULL DEFAULT 'IN_PROGRESS' CHECK (status IN ('IN_PROGRESS', 'SUCCEEDED', 'FAILED')), error TEXT NULL, -- Error message if FAILED -- Constraints UNIQUE (model_date) -- Only one successful import per date ); COMMENT ON TABLE concelier.epss_import_runs IS 'Provenance tracking for EPSS data imports. Each row represents one daily EPSS snapshot ingestion.'; COMMENT ON COLUMN concelier.epss_import_runs.model_date IS 'The date for which EPSS scores were computed by FIRST.org model. Used as partition key and determinism anchor.'; COMMENT ON COLUMN concelier.epss_import_runs.model_version_tag IS 'EPSS model version extracted from CSV comment line (e.g., v2025.03.14). Null if not present in source.'; -- Indexes CREATE INDEX idx_epss_import_runs_status_date ON concelier.epss_import_runs (status, model_date DESC); CREATE INDEX idx_epss_import_runs_created ON concelier.epss_import_runs (created_at DESC); -- ============================================================================ -- 2. EPSS Scores (Time-Series, Partitioned by Month) -- ============================================================================ -- Immutable time-series of daily EPSS scores. Append-only for audit trail. -- Partitioned by month for query performance and retention management. CREATE TABLE IF NOT EXISTS concelier.epss_scores ( -- Temporal (partition key) model_date DATE NOT NULL, -- Identity cve_id TEXT NOT NULL, -- e.g., "CVE-2024-12345" -- EPSS Metrics epss_score DOUBLE PRECISION NOT NULL CHECK (epss_score >= 0.0 AND epss_score <= 1.0), percentile DOUBLE PRECISION NOT NULL CHECK (percentile >= 0.0 AND percentile <= 1.0), -- Provenance import_run_id UUID NOT NULL REFERENCES concelier.epss_import_runs(import_run_id) ON DELETE CASCADE, -- Primary Key PRIMARY KEY (model_date, cve_id) ) PARTITION BY RANGE (model_date); COMMENT ON TABLE concelier.epss_scores IS 'Immutable time-series of daily EPSS scores. Partitioned by month. Append-only for deterministic replay.'; COMMENT ON COLUMN concelier.epss_scores.epss_score IS 'EPSS probability score (0.0-1.0). Represents likelihood of CVE exploitation within next 30 days.'; COMMENT ON COLUMN concelier.epss_scores.percentile IS 'Percentile ranking (0.0-1.0) of this CVE relative to all scored CVEs on this model_date.'; -- Indexes (applied to each partition) CREATE INDEX idx_epss_scores_cve_date ON concelier.epss_scores (cve_id, model_date DESC); CREATE INDEX idx_epss_scores_score_desc ON concelier.epss_scores (model_date, epss_score DESC); CREATE INDEX idx_epss_scores_percentile_desc ON concelier.epss_scores (model_date, percentile DESC); CREATE INDEX idx_epss_scores_import_run ON concelier.epss_scores (import_run_id); -- ============================================================================ -- 3. EPSS Current (Latest Projection, Fast Lookup) -- ============================================================================ -- Materialized view of latest EPSS score per CVE. -- Updated after each successful import. Used for fast bulk queries. CREATE TABLE IF NOT EXISTS concelier.epss_current ( -- Identity cve_id TEXT PRIMARY KEY, -- Latest Metrics epss_score DOUBLE PRECISION NOT NULL CHECK (epss_score >= 0.0 AND epss_score <= 1.0), percentile DOUBLE PRECISION NOT NULL CHECK (percentile >= 0.0 AND percentile <= 1.0), -- Provenance model_date DATE NOT NULL, import_run_id UUID NOT NULL, -- Temporal updated_at TIMESTAMPTZ NOT NULL DEFAULT now() ); COMMENT ON TABLE concelier.epss_current IS 'Latest EPSS score per CVE. Materialized projection for fast bulk queries. Updated after each import.'; -- Indexes for sorting and filtering CREATE INDEX idx_epss_current_score_desc ON concelier.epss_current (epss_score DESC); CREATE INDEX idx_epss_current_percentile_desc ON concelier.epss_current (percentile DESC); CREATE INDEX idx_epss_current_model_date ON concelier.epss_current (model_date); CREATE INDEX idx_epss_current_updated_at ON concelier.epss_current (updated_at DESC); -- ============================================================================ -- 4. EPSS Changes (Delta Tracking, Partitioned by Month) -- ============================================================================ -- Tracks daily EPSS score changes for enrichment targeting. -- Only populated for CVEs where score/percentile changed materially. CREATE TABLE IF NOT EXISTS concelier.epss_changes ( -- Temporal (partition key) model_date DATE NOT NULL, -- Identity cve_id TEXT NOT NULL, -- Previous State (NULL if newly scored) old_score DOUBLE PRECISION NULL CHECK (old_score IS NULL OR (old_score >= 0.0 AND old_score <= 1.0)), old_percentile DOUBLE PRECISION NULL CHECK (old_percentile IS NULL OR (old_percentile >= 0.0 AND old_percentile <= 1.0)), -- New State new_score DOUBLE PRECISION NOT NULL CHECK (new_score >= 0.0 AND new_score <= 1.0), new_percentile DOUBLE PRECISION NOT NULL CHECK (new_percentile >= 0.0 AND new_percentile <= 1.0), -- Computed Deltas delta_score DOUBLE PRECISION NULL, -- new_score - old_score delta_percentile DOUBLE PRECISION NULL, -- new_percentile - old_percentile -- Change Classification Flags (bitmask) -- 1=NEW_SCORED, 2=CROSSED_HIGH, 4=BIG_JUMP, 8=DROPPED_LOW, 16=SCORE_INCREASED, 32=SCORE_DECREASED flags INT NOT NULL DEFAULT 0, -- Temporal created_at TIMESTAMPTZ NOT NULL DEFAULT now(), -- Primary Key PRIMARY KEY (model_date, cve_id) ) PARTITION BY RANGE (model_date); COMMENT ON TABLE concelier.epss_changes IS 'Delta tracking for EPSS score changes. Used to efficiently target enrichment jobs for impacted vulnerabilities.'; COMMENT ON COLUMN concelier.epss_changes.flags IS 'Bitmask: 1=NEW_SCORED, 2=CROSSED_HIGH (≥95th), 4=BIG_JUMP (Δ≥0.10), 8=DROPPED_LOW (<50th), 16=INCREASED, 32=DECREASED'; -- Indexes for enrichment queries CREATE INDEX idx_epss_changes_flags ON concelier.epss_changes (model_date, flags) WHERE flags > 0; CREATE INDEX idx_epss_changes_big_delta ON concelier.epss_changes (model_date, ABS(delta_score) DESC NULLS LAST); CREATE INDEX idx_epss_changes_new_scored ON concelier.epss_changes (model_date) WHERE (flags & 1) = 1; -- NEW_SCORED flag CREATE INDEX idx_epss_changes_crossed_high ON concelier.epss_changes (model_date) WHERE (flags & 2) = 2; -- CROSSED_HIGH flag -- ============================================================================ -- 5. Partition Management Helper Functions -- ============================================================================ -- Function: Create monthly partition for epss_scores CREATE OR REPLACE FUNCTION concelier.create_epss_scores_partition(partition_date DATE) RETURNS TEXT AS $$ DECLARE partition_name TEXT; start_date DATE; end_date DATE; BEGIN -- Calculate partition bounds (first day of month to first day of next month) start_date := DATE_TRUNC('month', partition_date)::DATE; end_date := (DATE_TRUNC('month', partition_date) + INTERVAL '1 month')::DATE; -- Generate partition name: epss_scores_YYYY_MM partition_name := 'epss_scores_' || TO_CHAR(start_date, 'YYYY_MM'); -- Create partition if not exists EXECUTE format( 'CREATE TABLE IF NOT EXISTS concelier.%I PARTITION OF concelier.epss_scores FOR VALUES FROM (%L) TO (%L)', partition_name, start_date, end_date ); RETURN partition_name; END; $$ LANGUAGE plpgsql; COMMENT ON FUNCTION concelier.create_epss_scores_partition IS 'Creates a monthly partition for epss_scores table. Safe to call multiple times (idempotent).'; -- Function: Create monthly partition for epss_changes CREATE OR REPLACE FUNCTION concelier.create_epss_changes_partition(partition_date DATE) RETURNS TEXT AS $$ DECLARE partition_name TEXT; start_date DATE; end_date DATE; BEGIN start_date := DATE_TRUNC('month', partition_date)::DATE; end_date := (DATE_TRUNC('month', partition_date) + INTERVAL '1 month')::DATE; partition_name := 'epss_changes_' || TO_CHAR(start_date, 'YYYY_MM'); EXECUTE format( 'CREATE TABLE IF NOT EXISTS concelier.%I PARTITION OF concelier.epss_changes FOR VALUES FROM (%L) TO (%L)', partition_name, start_date, end_date ); RETURN partition_name; END; $$ LANGUAGE plpgsql; COMMENT ON FUNCTION concelier.create_epss_changes_partition IS 'Creates a monthly partition for epss_changes table. Safe to call multiple times (idempotent).'; -- Function: Auto-create partitions for next N months CREATE OR REPLACE FUNCTION concelier.ensure_epss_partitions_exist(months_ahead INT DEFAULT 3) RETURNS TABLE(partition_name TEXT, partition_type TEXT) AS $$ DECLARE current_month DATE := DATE_TRUNC('month', CURRENT_DATE)::DATE; i INT; BEGIN FOR i IN 0..months_ahead LOOP RETURN QUERY SELECT concelier.create_epss_scores_partition(current_month + (i || ' months')::INTERVAL), 'epss_scores'::TEXT; RETURN QUERY SELECT concelier.create_epss_changes_partition(current_month + (i || ' months')::INTERVAL), 'epss_changes'::TEXT; END LOOP; END; $$ LANGUAGE plpgsql; COMMENT ON FUNCTION concelier.ensure_epss_partitions_exist IS 'Ensures partitions exist for current month and N months ahead. Safe to run daily.'; -- ============================================================================ -- 6. Initial Partition Creation -- ============================================================================ -- Create partitions for current month + next 3 months SELECT concelier.ensure_epss_partitions_exist(3); -- ============================================================================ -- 7. Maintenance Views -- ============================================================================ -- View: EPSS model staleness CREATE OR REPLACE VIEW concelier.epss_model_staleness AS SELECT MAX(model_date) AS latest_model_date, MAX(created_at) AS latest_import_at, CURRENT_DATE - MAX(model_date) AS days_stale, CASE WHEN CURRENT_DATE - MAX(model_date) <= 1 THEN 'FRESH' WHEN CURRENT_DATE - MAX(model_date) <= 7 THEN 'ACCEPTABLE' WHEN CURRENT_DATE - MAX(model_date) <= 14 THEN 'STALE' ELSE 'VERY_STALE' END AS staleness_status FROM concelier.epss_import_runs WHERE status = 'SUCCEEDED'; COMMENT ON VIEW concelier.epss_model_staleness IS 'Reports EPSS data freshness. Alert if days_stale > 7.'; -- View: EPSS coverage stats CREATE OR REPLACE VIEW concelier.epss_coverage_stats AS SELECT model_date, COUNT(*) AS cve_count, COUNT(*) FILTER (WHERE percentile >= 0.99) AS top_1_percent_count, COUNT(*) FILTER (WHERE percentile >= 0.95) AS top_5_percent_count, COUNT(*) FILTER (WHERE percentile >= 0.90) AS top_10_percent_count, COUNT(*) FILTER (WHERE epss_score >= 0.50) AS high_score_count, ROUND(AVG(epss_score)::NUMERIC, 6) AS avg_score, ROUND(PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY epss_score)::NUMERIC, 6) AS median_score, ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY epss_score)::NUMERIC, 6) AS p95_score FROM concelier.epss_scores WHERE model_date IN ( SELECT model_date FROM concelier.epss_import_runs WHERE status = 'SUCCEEDED' ORDER BY model_date DESC LIMIT 1 ) GROUP BY model_date; COMMENT ON VIEW concelier.epss_coverage_stats IS 'Statistics for latest EPSS model: CVE count, distribution, percentiles.'; -- View: Recent EPSS changes summary CREATE OR REPLACE VIEW concelier.epss_recent_changes_summary AS SELECT model_date, COUNT(*) AS total_changes, COUNT(*) FILTER (WHERE (flags & 1) = 1) AS new_scored, COUNT(*) FILTER (WHERE (flags & 2) = 2) AS crossed_high, COUNT(*) FILTER (WHERE (flags & 4) = 4) AS big_jump, COUNT(*) FILTER (WHERE (flags & 8) = 8) AS dropped_low, COUNT(*) FILTER (WHERE (flags & 16) = 16) AS score_increased, COUNT(*) FILTER (WHERE (flags & 32) = 32) AS score_decreased, ROUND(AVG(ABS(delta_score))::NUMERIC, 6) AS avg_abs_delta_score, ROUND(MAX(ABS(delta_score))::NUMERIC, 6) AS max_abs_delta_score FROM concelier.epss_changes WHERE model_date >= CURRENT_DATE - INTERVAL '30 days' GROUP BY model_date ORDER BY model_date DESC; COMMENT ON VIEW concelier.epss_recent_changes_summary IS 'Summary of EPSS changes over last 30 days. Used for monitoring and alerting.'; -- ============================================================================ -- 8. Sample Queries (Documentation) -- ============================================================================ COMMENT ON SCHEMA concelier IS E' StellaOps Concelier Schema - EPSS v4 Integration Sample Queries: -- Get latest EPSS score for a CVE SELECT cve_id, epss_score, percentile, model_date FROM concelier.epss_current WHERE cve_id = ''CVE-2024-12345''; -- Bulk query EPSS for multiple CVEs (Scanner use case) SELECT cve_id, epss_score, percentile, model_date, import_run_id FROM concelier.epss_current WHERE cve_id = ANY(ARRAY[''CVE-2024-1'', ''CVE-2024-2'', ''CVE-2024-3'']); -- Get EPSS history for a CVE (last 180 days) SELECT model_date, epss_score, percentile FROM concelier.epss_scores WHERE cve_id = ''CVE-2024-12345'' AND model_date >= CURRENT_DATE - INTERVAL ''180 days'' ORDER BY model_date DESC; -- Find top 100 CVEs by EPSS score (current) SELECT cve_id, epss_score, percentile FROM concelier.epss_current ORDER BY epss_score DESC LIMIT 100; -- Find CVEs that crossed 95th percentile today SELECT c.cve_id, c.old_percentile, c.new_percentile, c.delta_percentile FROM concelier.epss_changes c WHERE c.model_date = CURRENT_DATE AND (c.flags & 2) = 2 -- CROSSED_HIGH flag ORDER BY c.new_percentile DESC; -- Get all changes with big jumps (Δ ≥ 0.10) SELECT cve_id, old_score, new_score, delta_score, model_date FROM concelier.epss_changes WHERE (flags & 4) = 4 -- BIG_JUMP flag AND model_date >= CURRENT_DATE - INTERVAL ''7 days'' ORDER BY ABS(delta_score) DESC; -- Check model staleness SELECT * FROM concelier.epss_model_staleness; -- Get coverage stats for latest model SELECT * FROM concelier.epss_coverage_stats; '; -- ============================================================================ -- 9. Permissions (Role-Based Access Control) -- ============================================================================ -- Grant read-only access to scanner service GRANT SELECT ON concelier.epss_current TO scanner_service; GRANT SELECT ON concelier.epss_scores TO scanner_service; -- Grant read-write access to concelier worker (ingestion) GRANT SELECT, INSERT, UPDATE ON concelier.epss_import_runs TO concelier_worker; GRANT SELECT, INSERT ON concelier.epss_scores TO concelier_worker; GRANT SELECT, INSERT, UPDATE, DELETE ON concelier.epss_current TO concelier_worker; GRANT SELECT, INSERT ON concelier.epss_changes TO concelier_worker; GRANT EXECUTE ON FUNCTION concelier.create_epss_scores_partition TO concelier_worker; GRANT EXECUTE ON FUNCTION concelier.create_epss_changes_partition TO concelier_worker; GRANT EXECUTE ON FUNCTION concelier.ensure_epss_partitions_exist TO concelier_worker; -- Grant read access to policy engine GRANT SELECT ON concelier.epss_current TO policy_engine; GRANT SELECT ON concelier.epss_scores TO policy_engine; -- Grant read access to notify service GRANT SELECT ON concelier.epss_current TO notify_service; GRANT SELECT ON concelier.epss_changes TO notify_service; -- ============================================================================ -- 10. Migration Metadata -- ============================================================================ -- Track this migration INSERT INTO concelier.schema_migrations (version, description, applied_at) VALUES ('epss-v1', 'EPSS v4 Integration Schema', NOW()) ON CONFLICT (version) DO NOTHING; COMMIT; -- ============================================================================ -- Post-Migration Verification -- ============================================================================ -- Verify tables created DO $$ BEGIN ASSERT (SELECT COUNT(*) FROM pg_tables WHERE schemaname = 'concelier' AND tablename = 'epss_import_runs') = 1, 'epss_import_runs table not created'; ASSERT (SELECT COUNT(*) FROM pg_tables WHERE schemaname = 'concelier' AND tablename = 'epss_scores') = 1, 'epss_scores table not created'; ASSERT (SELECT COUNT(*) FROM pg_tables WHERE schemaname = 'concelier' AND tablename = 'epss_current') = 1, 'epss_current table not created'; ASSERT (SELECT COUNT(*) FROM pg_tables WHERE schemaname = 'concelier' AND tablename = 'epss_changes') = 1, 'epss_changes table not created'; RAISE NOTICE 'EPSS schema migration completed successfully!'; END; $$; -- List created partitions SELECT schemaname, tablename, pg_size_pretty(pg_total_relation_size(schemaname || '.' || tablename)) AS size FROM pg_tables WHERE schemaname = 'concelier' AND (tablename LIKE 'epss_scores_%' OR tablename LIKE 'epss_changes_%') ORDER BY tablename;