Files
git.stella-ops.org/docs/db/migrations/concelier-epss-schema-v1.sql
master 8bbfe4d2d2 feat(rate-limiting): Implement core rate limiting functionality with configuration, decision-making, metrics, middleware, and service registration
- Add RateLimitConfig for configuration management with YAML binding support.
- Introduce RateLimitDecision to encapsulate the result of rate limit checks.
- Implement RateLimitMetrics for OpenTelemetry metrics tracking.
- Create RateLimitMiddleware for enforcing rate limits on incoming requests.
- Develop RateLimitService to orchestrate instance and environment rate limit checks.
- Add RateLimitServiceCollectionExtensions for dependency injection registration.
2025-12-17 18:02:37 +02:00

497 lines
19 KiB
PL/PgSQL

-- ============================================================================
-- StellaOps EPSS v4 Integration Schema Migration
-- ============================================================================
-- Database: concelier
-- Schema Version: epss-v1
-- Created: 2025-12-17
-- Sprint: SPRINT_3410_0001_0001_epss_ingestion_storage
--
-- Purpose:
-- EPSS (Exploit Prediction Scoring System) v4 daily ingestion and storage.
-- Provides time-series EPSS scores (0.0-1.0 probability) and percentiles
-- for CVE vulnerability prioritization alongside CVSS v4.
--
-- Architecture:
-- - Append-only time-series (epss_scores) partitioned by month
-- - Latest projection (epss_current) for fast lookups
-- - Delta tracking (epss_changes) for enrichment targeting
-- - Provenance (epss_import_runs) for audit trail
--
-- Data Source:
-- FIRST.org daily CSV: https://epss.empiricalsecurity.com/epss_scores-YYYY-MM-DD.csv.gz
-- ~300k CVEs, ~15MB compressed, published daily ~00:00 UTC
-- ============================================================================
BEGIN;
-- ============================================================================
-- 1. EPSS Import Runs (Provenance)
-- ============================================================================
-- Tracks each EPSS data import with full provenance for deterministic replay
CREATE TABLE IF NOT EXISTS concelier.epss_import_runs (
-- Identity
import_run_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- Temporal
model_date DATE NOT NULL, -- EPSS model scoring date (YYYY-MM-DD)
retrieved_at TIMESTAMPTZ NOT NULL, -- When we fetched/imported
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
-- Source Provenance
source_uri TEXT NOT NULL, -- URL or "bundle://path/to/file.csv.gz"
source_type TEXT NOT NULL DEFAULT 'online' CHECK (source_type IN ('online', 'bundle', 'backfill')),
-- File Integrity
file_sha256 TEXT NOT NULL, -- SHA-256 of compressed file
decompressed_sha256 TEXT NULL, -- SHA-256 of decompressed CSV (optional)
row_count INT NOT NULL CHECK (row_count >= 0),
-- EPSS Model Metadata (from CSV comment line: "# model: v2025.03.14, published: 2025-03-14")
model_version_tag TEXT NULL, -- e.g., "v2025.03.14"
published_date DATE NULL, -- Date FIRST published this model
-- Status
status TEXT NOT NULL DEFAULT 'IN_PROGRESS' CHECK (status IN ('IN_PROGRESS', 'SUCCEEDED', 'FAILED')),
error TEXT NULL, -- Error message if FAILED
-- Constraints
UNIQUE (model_date) -- Only one successful import per date
);
COMMENT ON TABLE concelier.epss_import_runs IS
'Provenance tracking for EPSS data imports. Each row represents one daily EPSS snapshot ingestion.';
COMMENT ON COLUMN concelier.epss_import_runs.model_date IS
'The date for which EPSS scores were computed by FIRST.org model. Used as partition key and determinism anchor.';
COMMENT ON COLUMN concelier.epss_import_runs.model_version_tag IS
'EPSS model version extracted from CSV comment line (e.g., v2025.03.14). Null if not present in source.';
-- Indexes
CREATE INDEX idx_epss_import_runs_status_date
ON concelier.epss_import_runs (status, model_date DESC);
CREATE INDEX idx_epss_import_runs_created
ON concelier.epss_import_runs (created_at DESC);
-- ============================================================================
-- 2. EPSS Scores (Time-Series, Partitioned by Month)
-- ============================================================================
-- Immutable time-series of daily EPSS scores. Append-only for audit trail.
-- Partitioned by month for query performance and retention management.
CREATE TABLE IF NOT EXISTS concelier.epss_scores (
-- Temporal (partition key)
model_date DATE NOT NULL,
-- Identity
cve_id TEXT NOT NULL, -- e.g., "CVE-2024-12345"
-- EPSS Metrics
epss_score DOUBLE PRECISION NOT NULL CHECK (epss_score >= 0.0 AND epss_score <= 1.0),
percentile DOUBLE PRECISION NOT NULL CHECK (percentile >= 0.0 AND percentile <= 1.0),
-- Provenance
import_run_id UUID NOT NULL REFERENCES concelier.epss_import_runs(import_run_id) ON DELETE CASCADE,
-- Primary Key
PRIMARY KEY (model_date, cve_id)
) PARTITION BY RANGE (model_date);
COMMENT ON TABLE concelier.epss_scores IS
'Immutable time-series of daily EPSS scores. Partitioned by month. Append-only for deterministic replay.';
COMMENT ON COLUMN concelier.epss_scores.epss_score IS
'EPSS probability score (0.0-1.0). Represents likelihood of CVE exploitation within next 30 days.';
COMMENT ON COLUMN concelier.epss_scores.percentile IS
'Percentile ranking (0.0-1.0) of this CVE relative to all scored CVEs on this model_date.';
-- Indexes (applied to each partition)
CREATE INDEX idx_epss_scores_cve_date
ON concelier.epss_scores (cve_id, model_date DESC);
CREATE INDEX idx_epss_scores_score_desc
ON concelier.epss_scores (model_date, epss_score DESC);
CREATE INDEX idx_epss_scores_percentile_desc
ON concelier.epss_scores (model_date, percentile DESC);
CREATE INDEX idx_epss_scores_import_run
ON concelier.epss_scores (import_run_id);
-- ============================================================================
-- 3. EPSS Current (Latest Projection, Fast Lookup)
-- ============================================================================
-- Materialized view of latest EPSS score per CVE.
-- Updated after each successful import. Used for fast bulk queries.
CREATE TABLE IF NOT EXISTS concelier.epss_current (
-- Identity
cve_id TEXT PRIMARY KEY,
-- Latest Metrics
epss_score DOUBLE PRECISION NOT NULL CHECK (epss_score >= 0.0 AND epss_score <= 1.0),
percentile DOUBLE PRECISION NOT NULL CHECK (percentile >= 0.0 AND percentile <= 1.0),
-- Provenance
model_date DATE NOT NULL,
import_run_id UUID NOT NULL,
-- Temporal
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
COMMENT ON TABLE concelier.epss_current IS
'Latest EPSS score per CVE. Materialized projection for fast bulk queries. Updated after each import.';
-- Indexes for sorting and filtering
CREATE INDEX idx_epss_current_score_desc
ON concelier.epss_current (epss_score DESC);
CREATE INDEX idx_epss_current_percentile_desc
ON concelier.epss_current (percentile DESC);
CREATE INDEX idx_epss_current_model_date
ON concelier.epss_current (model_date);
CREATE INDEX idx_epss_current_updated_at
ON concelier.epss_current (updated_at DESC);
-- ============================================================================
-- 4. EPSS Changes (Delta Tracking, Partitioned by Month)
-- ============================================================================
-- Tracks daily EPSS score changes for enrichment targeting.
-- Only populated for CVEs where score/percentile changed materially.
CREATE TABLE IF NOT EXISTS concelier.epss_changes (
-- Temporal (partition key)
model_date DATE NOT NULL,
-- Identity
cve_id TEXT NOT NULL,
-- Previous State (NULL if newly scored)
old_score DOUBLE PRECISION NULL CHECK (old_score IS NULL OR (old_score >= 0.0 AND old_score <= 1.0)),
old_percentile DOUBLE PRECISION NULL CHECK (old_percentile IS NULL OR (old_percentile >= 0.0 AND old_percentile <= 1.0)),
-- New State
new_score DOUBLE PRECISION NOT NULL CHECK (new_score >= 0.0 AND new_score <= 1.0),
new_percentile DOUBLE PRECISION NOT NULL CHECK (new_percentile >= 0.0 AND new_percentile <= 1.0),
-- Computed Deltas
delta_score DOUBLE PRECISION NULL, -- new_score - old_score
delta_percentile DOUBLE PRECISION NULL, -- new_percentile - old_percentile
-- Change Classification Flags (bitmask)
-- 1=NEW_SCORED, 2=CROSSED_HIGH, 4=BIG_JUMP, 8=DROPPED_LOW, 16=SCORE_INCREASED, 32=SCORE_DECREASED
flags INT NOT NULL DEFAULT 0,
-- Temporal
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
-- Primary Key
PRIMARY KEY (model_date, cve_id)
) PARTITION BY RANGE (model_date);
COMMENT ON TABLE concelier.epss_changes IS
'Delta tracking for EPSS score changes. Used to efficiently target enrichment jobs for impacted vulnerabilities.';
COMMENT ON COLUMN concelier.epss_changes.flags IS
'Bitmask: 1=NEW_SCORED, 2=CROSSED_HIGH (≥95th), 4=BIG_JUMP (Δ≥0.10), 8=DROPPED_LOW (<50th), 16=INCREASED, 32=DECREASED';
-- Indexes for enrichment queries
CREATE INDEX idx_epss_changes_flags
ON concelier.epss_changes (model_date, flags)
WHERE flags > 0;
CREATE INDEX idx_epss_changes_big_delta
ON concelier.epss_changes (model_date, ABS(delta_score) DESC NULLS LAST);
CREATE INDEX idx_epss_changes_new_scored
ON concelier.epss_changes (model_date)
WHERE (flags & 1) = 1; -- NEW_SCORED flag
CREATE INDEX idx_epss_changes_crossed_high
ON concelier.epss_changes (model_date)
WHERE (flags & 2) = 2; -- CROSSED_HIGH flag
-- ============================================================================
-- 5. Partition Management Helper Functions
-- ============================================================================
-- Function: Create monthly partition for epss_scores
CREATE OR REPLACE FUNCTION concelier.create_epss_scores_partition(partition_date DATE)
RETURNS TEXT AS $$
DECLARE
partition_name TEXT;
start_date DATE;
end_date DATE;
BEGIN
-- Calculate partition bounds (first day of month to first day of next month)
start_date := DATE_TRUNC('month', partition_date)::DATE;
end_date := (DATE_TRUNC('month', partition_date) + INTERVAL '1 month')::DATE;
-- Generate partition name: epss_scores_YYYY_MM
partition_name := 'epss_scores_' || TO_CHAR(start_date, 'YYYY_MM');
-- Create partition if not exists
EXECUTE format(
'CREATE TABLE IF NOT EXISTS concelier.%I PARTITION OF concelier.epss_scores FOR VALUES FROM (%L) TO (%L)',
partition_name,
start_date,
end_date
);
RETURN partition_name;
END;
$$ LANGUAGE plpgsql;
COMMENT ON FUNCTION concelier.create_epss_scores_partition IS
'Creates a monthly partition for epss_scores table. Safe to call multiple times (idempotent).';
-- Function: Create monthly partition for epss_changes
CREATE OR REPLACE FUNCTION concelier.create_epss_changes_partition(partition_date DATE)
RETURNS TEXT AS $$
DECLARE
partition_name TEXT;
start_date DATE;
end_date DATE;
BEGIN
start_date := DATE_TRUNC('month', partition_date)::DATE;
end_date := (DATE_TRUNC('month', partition_date) + INTERVAL '1 month')::DATE;
partition_name := 'epss_changes_' || TO_CHAR(start_date, 'YYYY_MM');
EXECUTE format(
'CREATE TABLE IF NOT EXISTS concelier.%I PARTITION OF concelier.epss_changes FOR VALUES FROM (%L) TO (%L)',
partition_name,
start_date,
end_date
);
RETURN partition_name;
END;
$$ LANGUAGE plpgsql;
COMMENT ON FUNCTION concelier.create_epss_changes_partition IS
'Creates a monthly partition for epss_changes table. Safe to call multiple times (idempotent).';
-- Function: Auto-create partitions for next N months
CREATE OR REPLACE FUNCTION concelier.ensure_epss_partitions_exist(months_ahead INT DEFAULT 3)
RETURNS TABLE(partition_name TEXT, partition_type TEXT) AS $$
DECLARE
current_month DATE := DATE_TRUNC('month', CURRENT_DATE)::DATE;
i INT;
BEGIN
FOR i IN 0..months_ahead LOOP
RETURN QUERY SELECT
concelier.create_epss_scores_partition(current_month + (i || ' months')::INTERVAL),
'epss_scores'::TEXT;
RETURN QUERY SELECT
concelier.create_epss_changes_partition(current_month + (i || ' months')::INTERVAL),
'epss_changes'::TEXT;
END LOOP;
END;
$$ LANGUAGE plpgsql;
COMMENT ON FUNCTION concelier.ensure_epss_partitions_exist IS
'Ensures partitions exist for current month and N months ahead. Safe to run daily.';
-- ============================================================================
-- 6. Initial Partition Creation
-- ============================================================================
-- Create partitions for current month + next 3 months
SELECT concelier.ensure_epss_partitions_exist(3);
-- ============================================================================
-- 7. Maintenance Views
-- ============================================================================
-- View: EPSS model staleness
CREATE OR REPLACE VIEW concelier.epss_model_staleness AS
SELECT
MAX(model_date) AS latest_model_date,
MAX(created_at) AS latest_import_at,
CURRENT_DATE - MAX(model_date) AS days_stale,
CASE
WHEN CURRENT_DATE - MAX(model_date) <= 1 THEN 'FRESH'
WHEN CURRENT_DATE - MAX(model_date) <= 7 THEN 'ACCEPTABLE'
WHEN CURRENT_DATE - MAX(model_date) <= 14 THEN 'STALE'
ELSE 'VERY_STALE'
END AS staleness_status
FROM concelier.epss_import_runs
WHERE status = 'SUCCEEDED';
COMMENT ON VIEW concelier.epss_model_staleness IS
'Reports EPSS data freshness. Alert if days_stale > 7.';
-- View: EPSS coverage stats
CREATE OR REPLACE VIEW concelier.epss_coverage_stats AS
SELECT
model_date,
COUNT(*) AS cve_count,
COUNT(*) FILTER (WHERE percentile >= 0.99) AS top_1_percent_count,
COUNT(*) FILTER (WHERE percentile >= 0.95) AS top_5_percent_count,
COUNT(*) FILTER (WHERE percentile >= 0.90) AS top_10_percent_count,
COUNT(*) FILTER (WHERE epss_score >= 0.50) AS high_score_count,
ROUND(AVG(epss_score)::NUMERIC, 6) AS avg_score,
ROUND(PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY epss_score)::NUMERIC, 6) AS median_score,
ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY epss_score)::NUMERIC, 6) AS p95_score
FROM concelier.epss_scores
WHERE model_date IN (
SELECT model_date
FROM concelier.epss_import_runs
WHERE status = 'SUCCEEDED'
ORDER BY model_date DESC
LIMIT 1
)
GROUP BY model_date;
COMMENT ON VIEW concelier.epss_coverage_stats IS
'Statistics for latest EPSS model: CVE count, distribution, percentiles.';
-- View: Recent EPSS changes summary
CREATE OR REPLACE VIEW concelier.epss_recent_changes_summary AS
SELECT
model_date,
COUNT(*) AS total_changes,
COUNT(*) FILTER (WHERE (flags & 1) = 1) AS new_scored,
COUNT(*) FILTER (WHERE (flags & 2) = 2) AS crossed_high,
COUNT(*) FILTER (WHERE (flags & 4) = 4) AS big_jump,
COUNT(*) FILTER (WHERE (flags & 8) = 8) AS dropped_low,
COUNT(*) FILTER (WHERE (flags & 16) = 16) AS score_increased,
COUNT(*) FILTER (WHERE (flags & 32) = 32) AS score_decreased,
ROUND(AVG(ABS(delta_score))::NUMERIC, 6) AS avg_abs_delta_score,
ROUND(MAX(ABS(delta_score))::NUMERIC, 6) AS max_abs_delta_score
FROM concelier.epss_changes
WHERE model_date >= CURRENT_DATE - INTERVAL '30 days'
GROUP BY model_date
ORDER BY model_date DESC;
COMMENT ON VIEW concelier.epss_recent_changes_summary IS
'Summary of EPSS changes over last 30 days. Used for monitoring and alerting.';
-- ============================================================================
-- 8. Sample Queries (Documentation)
-- ============================================================================
COMMENT ON SCHEMA concelier IS E'
StellaOps Concelier Schema - EPSS v4 Integration
Sample Queries:
-- Get latest EPSS score for a CVE
SELECT cve_id, epss_score, percentile, model_date
FROM concelier.epss_current
WHERE cve_id = ''CVE-2024-12345'';
-- Bulk query EPSS for multiple CVEs (Scanner use case)
SELECT cve_id, epss_score, percentile, model_date, import_run_id
FROM concelier.epss_current
WHERE cve_id = ANY(ARRAY[''CVE-2024-1'', ''CVE-2024-2'', ''CVE-2024-3'']);
-- Get EPSS history for a CVE (last 180 days)
SELECT model_date, epss_score, percentile
FROM concelier.epss_scores
WHERE cve_id = ''CVE-2024-12345''
AND model_date >= CURRENT_DATE - INTERVAL ''180 days''
ORDER BY model_date DESC;
-- Find top 100 CVEs by EPSS score (current)
SELECT cve_id, epss_score, percentile
FROM concelier.epss_current
ORDER BY epss_score DESC
LIMIT 100;
-- Find CVEs that crossed 95th percentile today
SELECT c.cve_id, c.old_percentile, c.new_percentile, c.delta_percentile
FROM concelier.epss_changes c
WHERE c.model_date = CURRENT_DATE
AND (c.flags & 2) = 2 -- CROSSED_HIGH flag
ORDER BY c.new_percentile DESC;
-- Get all changes with big jumps (Δ ≥ 0.10)
SELECT cve_id, old_score, new_score, delta_score, model_date
FROM concelier.epss_changes
WHERE (flags & 4) = 4 -- BIG_JUMP flag
AND model_date >= CURRENT_DATE - INTERVAL ''7 days''
ORDER BY ABS(delta_score) DESC;
-- Check model staleness
SELECT * FROM concelier.epss_model_staleness;
-- Get coverage stats for latest model
SELECT * FROM concelier.epss_coverage_stats;
';
-- ============================================================================
-- 9. Permissions (Role-Based Access Control)
-- ============================================================================
-- Grant read-only access to scanner service
GRANT SELECT ON concelier.epss_current TO scanner_service;
GRANT SELECT ON concelier.epss_scores TO scanner_service;
-- Grant read-write access to concelier worker (ingestion)
GRANT SELECT, INSERT, UPDATE ON concelier.epss_import_runs TO concelier_worker;
GRANT SELECT, INSERT ON concelier.epss_scores TO concelier_worker;
GRANT SELECT, INSERT, UPDATE, DELETE ON concelier.epss_current TO concelier_worker;
GRANT SELECT, INSERT ON concelier.epss_changes TO concelier_worker;
GRANT EXECUTE ON FUNCTION concelier.create_epss_scores_partition TO concelier_worker;
GRANT EXECUTE ON FUNCTION concelier.create_epss_changes_partition TO concelier_worker;
GRANT EXECUTE ON FUNCTION concelier.ensure_epss_partitions_exist TO concelier_worker;
-- Grant read access to policy engine
GRANT SELECT ON concelier.epss_current TO policy_engine;
GRANT SELECT ON concelier.epss_scores TO policy_engine;
-- Grant read access to notify service
GRANT SELECT ON concelier.epss_current TO notify_service;
GRANT SELECT ON concelier.epss_changes TO notify_service;
-- ============================================================================
-- 10. Migration Metadata
-- ============================================================================
-- Track this migration
INSERT INTO concelier.schema_migrations (version, description, applied_at)
VALUES ('epss-v1', 'EPSS v4 Integration Schema', NOW())
ON CONFLICT (version) DO NOTHING;
COMMIT;
-- ============================================================================
-- Post-Migration Verification
-- ============================================================================
-- Verify tables created
DO $$
BEGIN
ASSERT (SELECT COUNT(*) FROM pg_tables WHERE schemaname = 'concelier' AND tablename = 'epss_import_runs') = 1,
'epss_import_runs table not created';
ASSERT (SELECT COUNT(*) FROM pg_tables WHERE schemaname = 'concelier' AND tablename = 'epss_scores') = 1,
'epss_scores table not created';
ASSERT (SELECT COUNT(*) FROM pg_tables WHERE schemaname = 'concelier' AND tablename = 'epss_current') = 1,
'epss_current table not created';
ASSERT (SELECT COUNT(*) FROM pg_tables WHERE schemaname = 'concelier' AND tablename = 'epss_changes') = 1,
'epss_changes table not created';
RAISE NOTICE 'EPSS schema migration completed successfully!';
END;
$$;
-- List created partitions
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname || '.' || tablename)) AS size
FROM pg_tables
WHERE schemaname = 'concelier'
AND (tablename LIKE 'epss_scores_%' OR tablename LIKE 'epss_changes_%')
ORDER BY tablename;