feat(rate-limiting): Implement core rate limiting functionality with configuration, decision-making, metrics, middleware, and service registration
- Add RateLimitConfig for configuration management with YAML binding support. - Introduce RateLimitDecision to encapsulate the result of rate limit checks. - Implement RateLimitMetrics for OpenTelemetry metrics tracking. - Create RateLimitMiddleware for enforcing rate limits on incoming requests. - Develop RateLimitService to orchestrate instance and environment rate limit checks. - Add RateLimitServiceCollectionExtensions for dependency injection registration.
This commit is contained in:
496
docs/db/migrations/concelier-epss-schema-v1.sql
Normal file
496
docs/db/migrations/concelier-epss-schema-v1.sql
Normal file
@@ -0,0 +1,496 @@
|
||||
-- ============================================================================
|
||||
-- StellaOps EPSS v4 Integration Schema Migration
|
||||
-- ============================================================================
|
||||
-- Database: concelier
|
||||
-- Schema Version: epss-v1
|
||||
-- Created: 2025-12-17
|
||||
-- Sprint: SPRINT_3410_0001_0001_epss_ingestion_storage
|
||||
--
|
||||
-- Purpose:
|
||||
-- EPSS (Exploit Prediction Scoring System) v4 daily ingestion and storage.
|
||||
-- Provides time-series EPSS scores (0.0-1.0 probability) and percentiles
|
||||
-- for CVE vulnerability prioritization alongside CVSS v4.
|
||||
--
|
||||
-- Architecture:
|
||||
-- - Append-only time-series (epss_scores) partitioned by month
|
||||
-- - Latest projection (epss_current) for fast lookups
|
||||
-- - Delta tracking (epss_changes) for enrichment targeting
|
||||
-- - Provenance (epss_import_runs) for audit trail
|
||||
--
|
||||
-- Data Source:
|
||||
-- FIRST.org daily CSV: https://epss.empiricalsecurity.com/epss_scores-YYYY-MM-DD.csv.gz
|
||||
-- ~300k CVEs, ~15MB compressed, published daily ~00:00 UTC
|
||||
-- ============================================================================
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- ============================================================================
|
||||
-- 1. EPSS Import Runs (Provenance)
|
||||
-- ============================================================================
|
||||
-- Tracks each EPSS data import with full provenance for deterministic replay
|
||||
|
||||
CREATE TABLE IF NOT EXISTS concelier.epss_import_runs (
|
||||
-- Identity
|
||||
import_run_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
|
||||
-- Temporal
|
||||
model_date DATE NOT NULL, -- EPSS model scoring date (YYYY-MM-DD)
|
||||
retrieved_at TIMESTAMPTZ NOT NULL, -- When we fetched/imported
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
|
||||
-- Source Provenance
|
||||
source_uri TEXT NOT NULL, -- URL or "bundle://path/to/file.csv.gz"
|
||||
source_type TEXT NOT NULL DEFAULT 'online' CHECK (source_type IN ('online', 'bundle', 'backfill')),
|
||||
|
||||
-- File Integrity
|
||||
file_sha256 TEXT NOT NULL, -- SHA-256 of compressed file
|
||||
decompressed_sha256 TEXT NULL, -- SHA-256 of decompressed CSV (optional)
|
||||
row_count INT NOT NULL CHECK (row_count >= 0),
|
||||
|
||||
-- EPSS Model Metadata (from CSV comment line: "# model: v2025.03.14, published: 2025-03-14")
|
||||
model_version_tag TEXT NULL, -- e.g., "v2025.03.14"
|
||||
published_date DATE NULL, -- Date FIRST published this model
|
||||
|
||||
-- Status
|
||||
status TEXT NOT NULL DEFAULT 'IN_PROGRESS' CHECK (status IN ('IN_PROGRESS', 'SUCCEEDED', 'FAILED')),
|
||||
error TEXT NULL, -- Error message if FAILED
|
||||
|
||||
-- Constraints
|
||||
UNIQUE (model_date) -- Only one successful import per date
|
||||
);
|
||||
|
||||
COMMENT ON TABLE concelier.epss_import_runs IS
|
||||
'Provenance tracking for EPSS data imports. Each row represents one daily EPSS snapshot ingestion.';
|
||||
|
||||
COMMENT ON COLUMN concelier.epss_import_runs.model_date IS
|
||||
'The date for which EPSS scores were computed by FIRST.org model. Used as partition key and determinism anchor.';
|
||||
|
||||
COMMENT ON COLUMN concelier.epss_import_runs.model_version_tag IS
|
||||
'EPSS model version extracted from CSV comment line (e.g., v2025.03.14). Null if not present in source.';
|
||||
|
||||
-- Indexes
|
||||
CREATE INDEX idx_epss_import_runs_status_date
|
||||
ON concelier.epss_import_runs (status, model_date DESC);
|
||||
|
||||
CREATE INDEX idx_epss_import_runs_created
|
||||
ON concelier.epss_import_runs (created_at DESC);
|
||||
|
||||
-- ============================================================================
|
||||
-- 2. EPSS Scores (Time-Series, Partitioned by Month)
|
||||
-- ============================================================================
|
||||
-- Immutable time-series of daily EPSS scores. Append-only for audit trail.
|
||||
-- Partitioned by month for query performance and retention management.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS concelier.epss_scores (
|
||||
-- Temporal (partition key)
|
||||
model_date DATE NOT NULL,
|
||||
|
||||
-- Identity
|
||||
cve_id TEXT NOT NULL, -- e.g., "CVE-2024-12345"
|
||||
|
||||
-- EPSS Metrics
|
||||
epss_score DOUBLE PRECISION NOT NULL CHECK (epss_score >= 0.0 AND epss_score <= 1.0),
|
||||
percentile DOUBLE PRECISION NOT NULL CHECK (percentile >= 0.0 AND percentile <= 1.0),
|
||||
|
||||
-- Provenance
|
||||
import_run_id UUID NOT NULL REFERENCES concelier.epss_import_runs(import_run_id) ON DELETE CASCADE,
|
||||
|
||||
-- Primary Key
|
||||
PRIMARY KEY (model_date, cve_id)
|
||||
|
||||
) PARTITION BY RANGE (model_date);
|
||||
|
||||
COMMENT ON TABLE concelier.epss_scores IS
|
||||
'Immutable time-series of daily EPSS scores. Partitioned by month. Append-only for deterministic replay.';
|
||||
|
||||
COMMENT ON COLUMN concelier.epss_scores.epss_score IS
|
||||
'EPSS probability score (0.0-1.0). Represents likelihood of CVE exploitation within next 30 days.';
|
||||
|
||||
COMMENT ON COLUMN concelier.epss_scores.percentile IS
|
||||
'Percentile ranking (0.0-1.0) of this CVE relative to all scored CVEs on this model_date.';
|
||||
|
||||
-- Indexes (applied to each partition)
|
||||
CREATE INDEX idx_epss_scores_cve_date
|
||||
ON concelier.epss_scores (cve_id, model_date DESC);
|
||||
|
||||
CREATE INDEX idx_epss_scores_score_desc
|
||||
ON concelier.epss_scores (model_date, epss_score DESC);
|
||||
|
||||
CREATE INDEX idx_epss_scores_percentile_desc
|
||||
ON concelier.epss_scores (model_date, percentile DESC);
|
||||
|
||||
CREATE INDEX idx_epss_scores_import_run
|
||||
ON concelier.epss_scores (import_run_id);
|
||||
|
||||
-- ============================================================================
|
||||
-- 3. EPSS Current (Latest Projection, Fast Lookup)
|
||||
-- ============================================================================
|
||||
-- Materialized view of latest EPSS score per CVE.
|
||||
-- Updated after each successful import. Used for fast bulk queries.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS concelier.epss_current (
|
||||
-- Identity
|
||||
cve_id TEXT PRIMARY KEY,
|
||||
|
||||
-- Latest Metrics
|
||||
epss_score DOUBLE PRECISION NOT NULL CHECK (epss_score >= 0.0 AND epss_score <= 1.0),
|
||||
percentile DOUBLE PRECISION NOT NULL CHECK (percentile >= 0.0 AND percentile <= 1.0),
|
||||
|
||||
-- Provenance
|
||||
model_date DATE NOT NULL,
|
||||
import_run_id UUID NOT NULL,
|
||||
|
||||
-- Temporal
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
COMMENT ON TABLE concelier.epss_current IS
|
||||
'Latest EPSS score per CVE. Materialized projection for fast bulk queries. Updated after each import.';
|
||||
|
||||
-- Indexes for sorting and filtering
|
||||
CREATE INDEX idx_epss_current_score_desc
|
||||
ON concelier.epss_current (epss_score DESC);
|
||||
|
||||
CREATE INDEX idx_epss_current_percentile_desc
|
||||
ON concelier.epss_current (percentile DESC);
|
||||
|
||||
CREATE INDEX idx_epss_current_model_date
|
||||
ON concelier.epss_current (model_date);
|
||||
|
||||
CREATE INDEX idx_epss_current_updated_at
|
||||
ON concelier.epss_current (updated_at DESC);
|
||||
|
||||
-- ============================================================================
|
||||
-- 4. EPSS Changes (Delta Tracking, Partitioned by Month)
|
||||
-- ============================================================================
|
||||
-- Tracks daily EPSS score changes for enrichment targeting.
|
||||
-- Only populated for CVEs where score/percentile changed materially.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS concelier.epss_changes (
|
||||
-- Temporal (partition key)
|
||||
model_date DATE NOT NULL,
|
||||
|
||||
-- Identity
|
||||
cve_id TEXT NOT NULL,
|
||||
|
||||
-- Previous State (NULL if newly scored)
|
||||
old_score DOUBLE PRECISION NULL CHECK (old_score IS NULL OR (old_score >= 0.0 AND old_score <= 1.0)),
|
||||
old_percentile DOUBLE PRECISION NULL CHECK (old_percentile IS NULL OR (old_percentile >= 0.0 AND old_percentile <= 1.0)),
|
||||
|
||||
-- New State
|
||||
new_score DOUBLE PRECISION NOT NULL CHECK (new_score >= 0.0 AND new_score <= 1.0),
|
||||
new_percentile DOUBLE PRECISION NOT NULL CHECK (new_percentile >= 0.0 AND new_percentile <= 1.0),
|
||||
|
||||
-- Computed Deltas
|
||||
delta_score DOUBLE PRECISION NULL, -- new_score - old_score
|
||||
delta_percentile DOUBLE PRECISION NULL, -- new_percentile - old_percentile
|
||||
|
||||
-- Change Classification Flags (bitmask)
|
||||
-- 1=NEW_SCORED, 2=CROSSED_HIGH, 4=BIG_JUMP, 8=DROPPED_LOW, 16=SCORE_INCREASED, 32=SCORE_DECREASED
|
||||
flags INT NOT NULL DEFAULT 0,
|
||||
|
||||
-- Temporal
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
|
||||
-- Primary Key
|
||||
PRIMARY KEY (model_date, cve_id)
|
||||
|
||||
) PARTITION BY RANGE (model_date);
|
||||
|
||||
COMMENT ON TABLE concelier.epss_changes IS
|
||||
'Delta tracking for EPSS score changes. Used to efficiently target enrichment jobs for impacted vulnerabilities.';
|
||||
|
||||
COMMENT ON COLUMN concelier.epss_changes.flags IS
|
||||
'Bitmask: 1=NEW_SCORED, 2=CROSSED_HIGH (≥95th), 4=BIG_JUMP (Δ≥0.10), 8=DROPPED_LOW (<50th), 16=INCREASED, 32=DECREASED';
|
||||
|
||||
-- Indexes for enrichment queries
|
||||
CREATE INDEX idx_epss_changes_flags
|
||||
ON concelier.epss_changes (model_date, flags)
|
||||
WHERE flags > 0;
|
||||
|
||||
CREATE INDEX idx_epss_changes_big_delta
|
||||
ON concelier.epss_changes (model_date, ABS(delta_score) DESC NULLS LAST);
|
||||
|
||||
CREATE INDEX idx_epss_changes_new_scored
|
||||
ON concelier.epss_changes (model_date)
|
||||
WHERE (flags & 1) = 1; -- NEW_SCORED flag
|
||||
|
||||
CREATE INDEX idx_epss_changes_crossed_high
|
||||
ON concelier.epss_changes (model_date)
|
||||
WHERE (flags & 2) = 2; -- CROSSED_HIGH flag
|
||||
|
||||
-- ============================================================================
|
||||
-- 5. Partition Management Helper Functions
|
||||
-- ============================================================================
|
||||
|
||||
-- Function: Create monthly partition for epss_scores
|
||||
CREATE OR REPLACE FUNCTION concelier.create_epss_scores_partition(partition_date DATE)
|
||||
RETURNS TEXT AS $$
|
||||
DECLARE
|
||||
partition_name TEXT;
|
||||
start_date DATE;
|
||||
end_date DATE;
|
||||
BEGIN
|
||||
-- Calculate partition bounds (first day of month to first day of next month)
|
||||
start_date := DATE_TRUNC('month', partition_date)::DATE;
|
||||
end_date := (DATE_TRUNC('month', partition_date) + INTERVAL '1 month')::DATE;
|
||||
|
||||
-- Generate partition name: epss_scores_YYYY_MM
|
||||
partition_name := 'epss_scores_' || TO_CHAR(start_date, 'YYYY_MM');
|
||||
|
||||
-- Create partition if not exists
|
||||
EXECUTE format(
|
||||
'CREATE TABLE IF NOT EXISTS concelier.%I PARTITION OF concelier.epss_scores FOR VALUES FROM (%L) TO (%L)',
|
||||
partition_name,
|
||||
start_date,
|
||||
end_date
|
||||
);
|
||||
|
||||
RETURN partition_name;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
COMMENT ON FUNCTION concelier.create_epss_scores_partition IS
|
||||
'Creates a monthly partition for epss_scores table. Safe to call multiple times (idempotent).';
|
||||
|
||||
-- Function: Create monthly partition for epss_changes
|
||||
CREATE OR REPLACE FUNCTION concelier.create_epss_changes_partition(partition_date DATE)
|
||||
RETURNS TEXT AS $$
|
||||
DECLARE
|
||||
partition_name TEXT;
|
||||
start_date DATE;
|
||||
end_date DATE;
|
||||
BEGIN
|
||||
start_date := DATE_TRUNC('month', partition_date)::DATE;
|
||||
end_date := (DATE_TRUNC('month', partition_date) + INTERVAL '1 month')::DATE;
|
||||
partition_name := 'epss_changes_' || TO_CHAR(start_date, 'YYYY_MM');
|
||||
|
||||
EXECUTE format(
|
||||
'CREATE TABLE IF NOT EXISTS concelier.%I PARTITION OF concelier.epss_changes FOR VALUES FROM (%L) TO (%L)',
|
||||
partition_name,
|
||||
start_date,
|
||||
end_date
|
||||
);
|
||||
|
||||
RETURN partition_name;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
COMMENT ON FUNCTION concelier.create_epss_changes_partition IS
|
||||
'Creates a monthly partition for epss_changes table. Safe to call multiple times (idempotent).';
|
||||
|
||||
-- Function: Auto-create partitions for next N months
|
||||
CREATE OR REPLACE FUNCTION concelier.ensure_epss_partitions_exist(months_ahead INT DEFAULT 3)
|
||||
RETURNS TABLE(partition_name TEXT, partition_type TEXT) AS $$
|
||||
DECLARE
|
||||
current_month DATE := DATE_TRUNC('month', CURRENT_DATE)::DATE;
|
||||
i INT;
|
||||
BEGIN
|
||||
FOR i IN 0..months_ahead LOOP
|
||||
RETURN QUERY SELECT
|
||||
concelier.create_epss_scores_partition(current_month + (i || ' months')::INTERVAL),
|
||||
'epss_scores'::TEXT;
|
||||
|
||||
RETURN QUERY SELECT
|
||||
concelier.create_epss_changes_partition(current_month + (i || ' months')::INTERVAL),
|
||||
'epss_changes'::TEXT;
|
||||
END LOOP;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
COMMENT ON FUNCTION concelier.ensure_epss_partitions_exist IS
|
||||
'Ensures partitions exist for current month and N months ahead. Safe to run daily.';
|
||||
|
||||
-- ============================================================================
|
||||
-- 6. Initial Partition Creation
|
||||
-- ============================================================================
|
||||
-- Create partitions for current month + next 3 months
|
||||
|
||||
SELECT concelier.ensure_epss_partitions_exist(3);
|
||||
|
||||
-- ============================================================================
|
||||
-- 7. Maintenance Views
|
||||
-- ============================================================================
|
||||
|
||||
-- View: EPSS model staleness
|
||||
CREATE OR REPLACE VIEW concelier.epss_model_staleness AS
|
||||
SELECT
|
||||
MAX(model_date) AS latest_model_date,
|
||||
MAX(created_at) AS latest_import_at,
|
||||
CURRENT_DATE - MAX(model_date) AS days_stale,
|
||||
CASE
|
||||
WHEN CURRENT_DATE - MAX(model_date) <= 1 THEN 'FRESH'
|
||||
WHEN CURRENT_DATE - MAX(model_date) <= 7 THEN 'ACCEPTABLE'
|
||||
WHEN CURRENT_DATE - MAX(model_date) <= 14 THEN 'STALE'
|
||||
ELSE 'VERY_STALE'
|
||||
END AS staleness_status
|
||||
FROM concelier.epss_import_runs
|
||||
WHERE status = 'SUCCEEDED';
|
||||
|
||||
COMMENT ON VIEW concelier.epss_model_staleness IS
|
||||
'Reports EPSS data freshness. Alert if days_stale > 7.';
|
||||
|
||||
-- View: EPSS coverage stats
|
||||
CREATE OR REPLACE VIEW concelier.epss_coverage_stats AS
|
||||
SELECT
|
||||
model_date,
|
||||
COUNT(*) AS cve_count,
|
||||
COUNT(*) FILTER (WHERE percentile >= 0.99) AS top_1_percent_count,
|
||||
COUNT(*) FILTER (WHERE percentile >= 0.95) AS top_5_percent_count,
|
||||
COUNT(*) FILTER (WHERE percentile >= 0.90) AS top_10_percent_count,
|
||||
COUNT(*) FILTER (WHERE epss_score >= 0.50) AS high_score_count,
|
||||
ROUND(AVG(epss_score)::NUMERIC, 6) AS avg_score,
|
||||
ROUND(PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY epss_score)::NUMERIC, 6) AS median_score,
|
||||
ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY epss_score)::NUMERIC, 6) AS p95_score
|
||||
FROM concelier.epss_scores
|
||||
WHERE model_date IN (
|
||||
SELECT model_date
|
||||
FROM concelier.epss_import_runs
|
||||
WHERE status = 'SUCCEEDED'
|
||||
ORDER BY model_date DESC
|
||||
LIMIT 1
|
||||
)
|
||||
GROUP BY model_date;
|
||||
|
||||
COMMENT ON VIEW concelier.epss_coverage_stats IS
|
||||
'Statistics for latest EPSS model: CVE count, distribution, percentiles.';
|
||||
|
||||
-- View: Recent EPSS changes summary
|
||||
CREATE OR REPLACE VIEW concelier.epss_recent_changes_summary AS
|
||||
SELECT
|
||||
model_date,
|
||||
COUNT(*) AS total_changes,
|
||||
COUNT(*) FILTER (WHERE (flags & 1) = 1) AS new_scored,
|
||||
COUNT(*) FILTER (WHERE (flags & 2) = 2) AS crossed_high,
|
||||
COUNT(*) FILTER (WHERE (flags & 4) = 4) AS big_jump,
|
||||
COUNT(*) FILTER (WHERE (flags & 8) = 8) AS dropped_low,
|
||||
COUNT(*) FILTER (WHERE (flags & 16) = 16) AS score_increased,
|
||||
COUNT(*) FILTER (WHERE (flags & 32) = 32) AS score_decreased,
|
||||
ROUND(AVG(ABS(delta_score))::NUMERIC, 6) AS avg_abs_delta_score,
|
||||
ROUND(MAX(ABS(delta_score))::NUMERIC, 6) AS max_abs_delta_score
|
||||
FROM concelier.epss_changes
|
||||
WHERE model_date >= CURRENT_DATE - INTERVAL '30 days'
|
||||
GROUP BY model_date
|
||||
ORDER BY model_date DESC;
|
||||
|
||||
COMMENT ON VIEW concelier.epss_recent_changes_summary IS
|
||||
'Summary of EPSS changes over last 30 days. Used for monitoring and alerting.';
|
||||
|
||||
-- ============================================================================
|
||||
-- 8. Sample Queries (Documentation)
|
||||
-- ============================================================================
|
||||
|
||||
COMMENT ON SCHEMA concelier IS E'
|
||||
StellaOps Concelier Schema - EPSS v4 Integration
|
||||
|
||||
Sample Queries:
|
||||
|
||||
-- Get latest EPSS score for a CVE
|
||||
SELECT cve_id, epss_score, percentile, model_date
|
||||
FROM concelier.epss_current
|
||||
WHERE cve_id = ''CVE-2024-12345'';
|
||||
|
||||
-- Bulk query EPSS for multiple CVEs (Scanner use case)
|
||||
SELECT cve_id, epss_score, percentile, model_date, import_run_id
|
||||
FROM concelier.epss_current
|
||||
WHERE cve_id = ANY(ARRAY[''CVE-2024-1'', ''CVE-2024-2'', ''CVE-2024-3'']);
|
||||
|
||||
-- Get EPSS history for a CVE (last 180 days)
|
||||
SELECT model_date, epss_score, percentile
|
||||
FROM concelier.epss_scores
|
||||
WHERE cve_id = ''CVE-2024-12345''
|
||||
AND model_date >= CURRENT_DATE - INTERVAL ''180 days''
|
||||
ORDER BY model_date DESC;
|
||||
|
||||
-- Find top 100 CVEs by EPSS score (current)
|
||||
SELECT cve_id, epss_score, percentile
|
||||
FROM concelier.epss_current
|
||||
ORDER BY epss_score DESC
|
||||
LIMIT 100;
|
||||
|
||||
-- Find CVEs that crossed 95th percentile today
|
||||
SELECT c.cve_id, c.old_percentile, c.new_percentile, c.delta_percentile
|
||||
FROM concelier.epss_changes c
|
||||
WHERE c.model_date = CURRENT_DATE
|
||||
AND (c.flags & 2) = 2 -- CROSSED_HIGH flag
|
||||
ORDER BY c.new_percentile DESC;
|
||||
|
||||
-- Get all changes with big jumps (Δ ≥ 0.10)
|
||||
SELECT cve_id, old_score, new_score, delta_score, model_date
|
||||
FROM concelier.epss_changes
|
||||
WHERE (flags & 4) = 4 -- BIG_JUMP flag
|
||||
AND model_date >= CURRENT_DATE - INTERVAL ''7 days''
|
||||
ORDER BY ABS(delta_score) DESC;
|
||||
|
||||
-- Check model staleness
|
||||
SELECT * FROM concelier.epss_model_staleness;
|
||||
|
||||
-- Get coverage stats for latest model
|
||||
SELECT * FROM concelier.epss_coverage_stats;
|
||||
';
|
||||
|
||||
-- ============================================================================
|
||||
-- 9. Permissions (Role-Based Access Control)
|
||||
-- ============================================================================
|
||||
|
||||
-- Grant read-only access to scanner service
|
||||
GRANT SELECT ON concelier.epss_current TO scanner_service;
|
||||
GRANT SELECT ON concelier.epss_scores TO scanner_service;
|
||||
|
||||
-- Grant read-write access to concelier worker (ingestion)
|
||||
GRANT SELECT, INSERT, UPDATE ON concelier.epss_import_runs TO concelier_worker;
|
||||
GRANT SELECT, INSERT ON concelier.epss_scores TO concelier_worker;
|
||||
GRANT SELECT, INSERT, UPDATE, DELETE ON concelier.epss_current TO concelier_worker;
|
||||
GRANT SELECT, INSERT ON concelier.epss_changes TO concelier_worker;
|
||||
GRANT EXECUTE ON FUNCTION concelier.create_epss_scores_partition TO concelier_worker;
|
||||
GRANT EXECUTE ON FUNCTION concelier.create_epss_changes_partition TO concelier_worker;
|
||||
GRANT EXECUTE ON FUNCTION concelier.ensure_epss_partitions_exist TO concelier_worker;
|
||||
|
||||
-- Grant read access to policy engine
|
||||
GRANT SELECT ON concelier.epss_current TO policy_engine;
|
||||
GRANT SELECT ON concelier.epss_scores TO policy_engine;
|
||||
|
||||
-- Grant read access to notify service
|
||||
GRANT SELECT ON concelier.epss_current TO notify_service;
|
||||
GRANT SELECT ON concelier.epss_changes TO notify_service;
|
||||
|
||||
-- ============================================================================
|
||||
-- 10. Migration Metadata
|
||||
-- ============================================================================
|
||||
|
||||
-- Track this migration
|
||||
INSERT INTO concelier.schema_migrations (version, description, applied_at)
|
||||
VALUES ('epss-v1', 'EPSS v4 Integration Schema', NOW())
|
||||
ON CONFLICT (version) DO NOTHING;
|
||||
|
||||
COMMIT;
|
||||
|
||||
-- ============================================================================
|
||||
-- Post-Migration Verification
|
||||
-- ============================================================================
|
||||
|
||||
-- Verify tables created
|
||||
DO $$
|
||||
BEGIN
|
||||
ASSERT (SELECT COUNT(*) FROM pg_tables WHERE schemaname = 'concelier' AND tablename = 'epss_import_runs') = 1,
|
||||
'epss_import_runs table not created';
|
||||
ASSERT (SELECT COUNT(*) FROM pg_tables WHERE schemaname = 'concelier' AND tablename = 'epss_scores') = 1,
|
||||
'epss_scores table not created';
|
||||
ASSERT (SELECT COUNT(*) FROM pg_tables WHERE schemaname = 'concelier' AND tablename = 'epss_current') = 1,
|
||||
'epss_current table not created';
|
||||
ASSERT (SELECT COUNT(*) FROM pg_tables WHERE schemaname = 'concelier' AND tablename = 'epss_changes') = 1,
|
||||
'epss_changes table not created';
|
||||
|
||||
RAISE NOTICE 'EPSS schema migration completed successfully!';
|
||||
END;
|
||||
$$;
|
||||
|
||||
-- List created partitions
|
||||
SELECT
|
||||
schemaname,
|
||||
tablename,
|
||||
pg_size_pretty(pg_total_relation_size(schemaname || '.' || tablename)) AS size
|
||||
FROM pg_tables
|
||||
WHERE schemaname = 'concelier'
|
||||
AND (tablename LIKE 'epss_scores_%' OR tablename LIKE 'epss_changes_%')
|
||||
ORDER BY tablename;
|
||||
468
docs/db/schemas/scanner_schema_specification.md
Normal file
468
docs/db/schemas/scanner_schema_specification.md
Normal file
@@ -0,0 +1,468 @@
|
||||
# Scanner Schema Specification
|
||||
|
||||
**Schema**: `scanner`
|
||||
**Owner**: Scanner.WebService
|
||||
**Purpose**: Scan orchestration, call-graphs, proof bundles, reachability analysis
|
||||
**Sprint**: SPRINT_3500_0002_0001, SPRINT_3500_0003_0002
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
The `scanner` schema contains all tables related to:
|
||||
1. Scan manifests and deterministic replay
|
||||
2. Proof bundles (content-addressed storage metadata)
|
||||
3. Call-graph nodes and edges (reachability analysis)
|
||||
4. Entrypoints (framework-specific entry discovery)
|
||||
5. Runtime samples (profiling data for reachability validation)
|
||||
|
||||
**Design Principles**:
|
||||
- All tables use `scan_id` as primary partition key for scan isolation
|
||||
- Deterministic data only (no timestamps in core algorithms)
|
||||
- Content-addressed references (hashes, not paths)
|
||||
- Forward-only schema evolution
|
||||
|
||||
---
|
||||
|
||||
## Tables
|
||||
|
||||
### 1. scan_manifest
|
||||
|
||||
**Purpose**: Stores immutable scan manifests capturing all inputs for deterministic replay.
|
||||
|
||||
**Schema**:
|
||||
|
||||
| Column | Type | Nullable | Description |
|
||||
|--------|------|----------|-------------|
|
||||
| `scan_id` | `text` | NOT NULL | Primary key; UUID format |
|
||||
| `created_at_utc` | `timestamptz` | NOT NULL | Scan creation timestamp |
|
||||
| `artifact_digest` | `text` | NOT NULL | Image/artifact digest (sha256:...) |
|
||||
| `artifact_purl` | `text` | NULL | PURL identifier (pkg:oci/...) |
|
||||
| `scanner_version` | `text` | NOT NULL | Scanner.WebService version |
|
||||
| `worker_version` | `text` | NOT NULL | Scanner.Worker version |
|
||||
| `concelier_snapshot_hash` | `text` | NOT NULL | Concelier feed snapshot digest |
|
||||
| `excititor_snapshot_hash` | `text` | NOT NULL | Excititor VEX snapshot digest |
|
||||
| `lattice_policy_hash` | `text` | NOT NULL | Policy bundle digest |
|
||||
| `deterministic` | `boolean` | NOT NULL | Whether scan used deterministic mode |
|
||||
| `seed` | `bytea` | NOT NULL | 32-byte deterministic seed |
|
||||
| `knobs` | `jsonb` | NULL | Configuration knobs (depth limits, etc.) |
|
||||
| `manifest_hash` | `text` | NOT NULL | SHA-256 of canonical manifest JSON (UNIQUE) |
|
||||
| `manifest_json` | `jsonb` | NOT NULL | Canonical JSON manifest |
|
||||
| `manifest_dsse_json` | `jsonb` | NOT NULL | DSSE signature envelope |
|
||||
|
||||
**Indexes**:
|
||||
|
||||
```sql
|
||||
CREATE INDEX idx_scan_manifest_artifact ON scanner.scan_manifest(artifact_digest);
|
||||
CREATE INDEX idx_scan_manifest_snapshots ON scanner.scan_manifest(concelier_snapshot_hash, excititor_snapshot_hash);
|
||||
CREATE INDEX idx_scan_manifest_created ON scanner.scan_manifest(created_at_utc DESC);
|
||||
CREATE UNIQUE INDEX idx_scan_manifest_hash ON scanner.scan_manifest(manifest_hash);
|
||||
```
|
||||
|
||||
**Constraints**:
|
||||
- `manifest_hash` format: `sha256:[0-9a-f]{64}`
|
||||
- `seed` must be exactly 32 bytes
|
||||
- `scan_id` format: UUID v4
|
||||
|
||||
**Partitioning**: None (lookup table, <100k rows expected)
|
||||
|
||||
**Retention**: 180 days (drop scans older than 180 days)
|
||||
|
||||
---
|
||||
|
||||
### 2. proof_bundle
|
||||
|
||||
**Purpose**: Metadata for content-addressed proof bundles (zip archives).
|
||||
|
||||
**Schema**:
|
||||
|
||||
| Column | Type | Nullable | Description |
|
||||
|--------|------|----------|-------------|
|
||||
| `scan_id` | `text` | NOT NULL | Foreign key to `scan_manifest.scan_id` |
|
||||
| `root_hash` | `text` | NOT NULL | Merkle root hash of bundle contents |
|
||||
| `bundle_uri` | `text` | NOT NULL | File path or S3 URI to bundle zip |
|
||||
| `proof_root_dsse_json` | `jsonb` | NOT NULL | DSSE signature of root hash |
|
||||
| `created_at_utc` | `timestamptz` | NOT NULL | Bundle creation timestamp |
|
||||
|
||||
**Primary Key**: `(scan_id, root_hash)`
|
||||
|
||||
**Indexes**:
|
||||
|
||||
```sql
|
||||
CREATE INDEX idx_proof_bundle_scan ON scanner.proof_bundle(scan_id);
|
||||
CREATE INDEX idx_proof_bundle_created ON scanner.proof_bundle(created_at_utc DESC);
|
||||
```
|
||||
|
||||
**Constraints**:
|
||||
- `root_hash` format: `sha256:[0-9a-f]{64}`
|
||||
- `bundle_uri` must be accessible file path or S3 URI
|
||||
|
||||
**Partitioning**: None (<100k rows expected)
|
||||
|
||||
**Retention**: 365 days (compliance requirement for signed bundles)
|
||||
|
||||
---
|
||||
|
||||
### 3. cg_node (call-graph nodes)
|
||||
|
||||
**Purpose**: Stores call-graph nodes (methods/functions) extracted from artifacts.
|
||||
|
||||
**Schema**:
|
||||
|
||||
| Column | Type | Nullable | Description |
|
||||
|--------|------|----------|-------------|
|
||||
| `scan_id` | `text` | NOT NULL | Partition key |
|
||||
| `node_id` | `text` | NOT NULL | Deterministic node ID (hash-based) |
|
||||
| `artifact_key` | `text` | NOT NULL | Artifact identifier (assembly name, JAR, etc.) |
|
||||
| `symbol_key` | `text` | NOT NULL | Canonical symbol name (Namespace.Type::Method) |
|
||||
| `visibility` | `text` | NOT NULL | `public`, `internal`, `private`, `unknown` |
|
||||
| `flags` | `integer` | NOT NULL | Bitfield: `IS_ENTRYPOINT_CANDIDATE=1`, `IS_VIRTUAL=2`, etc. |
|
||||
|
||||
**Primary Key**: `(scan_id, node_id)`
|
||||
|
||||
**Indexes**:
|
||||
|
||||
```sql
|
||||
CREATE INDEX idx_cg_node_artifact ON scanner.cg_node(scan_id, artifact_key);
|
||||
CREATE INDEX idx_cg_node_symbol ON scanner.cg_node(scan_id, symbol_key);
|
||||
CREATE INDEX idx_cg_node_flags ON scanner.cg_node(scan_id, flags) WHERE (flags & 1) = 1; -- Entrypoint candidates
|
||||
```
|
||||
|
||||
**Constraints**:
|
||||
- `node_id` format: `sha256:[0-9a-f]{64}` (deterministic hash)
|
||||
- `visibility` must be one of: `public`, `internal`, `private`, `unknown`
|
||||
|
||||
**Partitioning**: Hash partition by `scan_id` (for scans with >100k nodes)
|
||||
|
||||
**Retention**: 90 days (call-graphs recomputed on rescan)
|
||||
|
||||
---
|
||||
|
||||
### 4. cg_edge (call-graph edges)
|
||||
|
||||
**Purpose**: Stores call-graph edges (invocations) between nodes.
|
||||
|
||||
**Schema**:
|
||||
|
||||
| Column | Type | Nullable | Description |
|
||||
|--------|------|----------|-------------|
|
||||
| `scan_id` | `text` | NOT NULL | Partition key |
|
||||
| `from_node_id` | `text` | NOT NULL | Caller node ID |
|
||||
| `to_node_id` | `text` | NOT NULL | Callee node ID |
|
||||
| `kind` | `smallint` | NOT NULL | `1=static`, `2=heuristic` |
|
||||
| `reason` | `smallint` | NOT NULL | `1=direct_call`, `2=virtual_call`, `3=reflection_string`, etc. |
|
||||
| `weight` | `real` | NOT NULL | Edge confidence weight (0.0-1.0) |
|
||||
|
||||
**Primary Key**: `(scan_id, from_node_id, to_node_id, kind, reason)`
|
||||
|
||||
**Indexes**:
|
||||
|
||||
```sql
|
||||
CREATE INDEX idx_cg_edge_from ON scanner.cg_edge(scan_id, from_node_id);
|
||||
CREATE INDEX idx_cg_edge_to ON scanner.cg_edge(scan_id, to_node_id);
|
||||
CREATE INDEX idx_cg_edge_static ON scanner.cg_edge(scan_id, kind) WHERE kind = 1;
|
||||
CREATE INDEX idx_cg_edge_heuristic ON scanner.cg_edge(scan_id, kind) WHERE kind = 2;
|
||||
```
|
||||
|
||||
**Constraints**:
|
||||
- `kind` must be 1 (static) or 2 (heuristic)
|
||||
- `reason` must be in range 1-10 (enum defined in code)
|
||||
- `weight` must be in range [0.0, 1.0]
|
||||
|
||||
**Partitioning**: Hash partition by `scan_id` (for scans with >500k edges)
|
||||
|
||||
**Retention**: 90 days
|
||||
|
||||
**Notes**:
|
||||
- High-volume table (1M+ rows per large scan)
|
||||
- Use partial indexes for `kind` to optimize static-only queries
|
||||
- Consider GIN index on `(from_node_id, to_node_id)` for bidirectional BFS
|
||||
|
||||
---
|
||||
|
||||
### 5. entrypoint
|
||||
|
||||
**Purpose**: Stores discovered entrypoints (HTTP routes, CLI commands, background jobs).
|
||||
|
||||
**Schema**:
|
||||
|
||||
| Column | Type | Nullable | Description |
|
||||
|--------|------|----------|-------------|
|
||||
| `scan_id` | `text` | NOT NULL | Partition key |
|
||||
| `node_id` | `text` | NOT NULL | Reference to `cg_node.node_id` |
|
||||
| `kind` | `text` | NOT NULL | `http`, `grpc`, `cli`, `job`, `event`, `unknown` |
|
||||
| `framework` | `text` | NOT NULL | `aspnetcore`, `spring`, `express`, etc. |
|
||||
| `route` | `text` | NULL | HTTP route pattern (e.g., `/api/orders/{id}`) |
|
||||
| `metadata` | `jsonb` | NULL | Framework-specific metadata |
|
||||
|
||||
**Primary Key**: `(scan_id, node_id, kind, framework, route)`
|
||||
|
||||
**Indexes**:
|
||||
|
||||
```sql
|
||||
CREATE INDEX idx_entrypoint_scan ON scanner.entrypoint(scan_id);
|
||||
CREATE INDEX idx_entrypoint_kind ON scanner.entrypoint(scan_id, kind);
|
||||
CREATE INDEX idx_entrypoint_framework ON scanner.entrypoint(scan_id, framework);
|
||||
```
|
||||
|
||||
**Constraints**:
|
||||
- `kind` must be one of: `http`, `grpc`, `cli`, `job`, `event`, `unknown`
|
||||
- `route` required for `kind='http'` or `kind='grpc'`
|
||||
|
||||
**Partitioning**: None (<10k rows per scan)
|
||||
|
||||
**Retention**: 90 days
|
||||
|
||||
---
|
||||
|
||||
### 6. runtime_sample
|
||||
|
||||
**Purpose**: Stores runtime profiling samples (stack traces) for reachability validation.
|
||||
|
||||
**Schema**:
|
||||
|
||||
| Column | Type | Nullable | Description |
|
||||
|--------|------|----------|-------------|
|
||||
| `scan_id` | `text` | NOT NULL | Partition key (links to scan) |
|
||||
| `collected_at` | `timestamptz` | NOT NULL | Sample collection timestamp |
|
||||
| `env_hash` | `text` | NOT NULL | Environment hash (k8s ns+pod+container) |
|
||||
| `sample_id` | `bigserial` | NOT NULL | Auto-incrementing sample ID |
|
||||
| `timestamp` | `timestamptz` | NOT NULL | Sample timestamp |
|
||||
| `pid` | `integer` | NOT NULL | Process ID |
|
||||
| `thread_id` | `integer` | NOT NULL | Thread ID |
|
||||
| `frames` | `text[]` | NOT NULL | Array of node IDs (stack trace) |
|
||||
| `weight` | `real` | NOT NULL | Sample weight (1.0 for discrete samples) |
|
||||
|
||||
**Primary Key**: `(scan_id, sample_id)`
|
||||
|
||||
**Indexes**:
|
||||
|
||||
```sql
|
||||
CREATE INDEX idx_runtime_sample_scan ON scanner.runtime_sample(scan_id, collected_at DESC);
|
||||
CREATE INDEX idx_runtime_sample_frames ON scanner.runtime_sample USING GIN(frames);
|
||||
CREATE INDEX idx_runtime_sample_env ON scanner.runtime_sample(scan_id, env_hash);
|
||||
```
|
||||
|
||||
**Constraints**:
|
||||
- `frames` array length must be >0 and <1000
|
||||
- `weight` must be >0.0
|
||||
|
||||
**Partitioning**: **TIME-BASED** (monthly partitions by `collected_at`)
|
||||
|
||||
```sql
|
||||
CREATE TABLE scanner.runtime_sample_2025_01 PARTITION OF scanner.runtime_sample
|
||||
FOR VALUES FROM ('2025-01-01') TO ('2025-02-01');
|
||||
```
|
||||
|
||||
**Retention**: 90 days (drop old partitions automatically)
|
||||
|
||||
**Notes**:
|
||||
- **Highest volume table** (10M+ rows for long-running services)
|
||||
- GIN index on `frames[]` enables fast "find samples containing node X" queries
|
||||
- Partition pruning critical for performance
|
||||
|
||||
---
|
||||
|
||||
## Enums (Defined in Code)
|
||||
|
||||
### cg_edge.kind
|
||||
|
||||
| Value | Name | Description |
|
||||
|-------|------|-------------|
|
||||
| 1 | `static` | Statically proven call edge |
|
||||
| 2 | `heuristic` | Heuristic/inferred edge (reflection, DI, dynamic) |
|
||||
|
||||
### cg_edge.reason
|
||||
|
||||
| Value | Name | Description |
|
||||
|-------|------|-------------|
|
||||
| 1 | `direct_call` | Direct method invocation |
|
||||
| 2 | `virtual_call` | Virtual/interface dispatch |
|
||||
| 3 | `reflection_string` | Reflection with string name |
|
||||
| 4 | `di_binding` | Dependency injection registration |
|
||||
| 5 | `dynamic_import` | Dynamic module import (JS/Python) |
|
||||
| 6 | `delegate_invoke` | Delegate/lambda invocation |
|
||||
| 7 | `async_await` | Async method call |
|
||||
| 8 | `constructor` | Object constructor invocation |
|
||||
| 9 | `plt_got` | PLT/GOT indirect call (native binaries) |
|
||||
| 10 | `unknown` | Unknown edge type |
|
||||
|
||||
### cg_node.flags (Bitfield)
|
||||
|
||||
| Bit | Flag | Description |
|
||||
|-----|------|-------------|
|
||||
| 0 | `IS_ENTRYPOINT_CANDIDATE` | Node could be an entrypoint |
|
||||
| 1 | `IS_VIRTUAL` | Virtual or interface method |
|
||||
| 2 | `IS_ASYNC` | Async method |
|
||||
| 3 | `IS_CONSTRUCTOR` | Constructor method |
|
||||
| 4 | `IS_EXPORTED` | Publicly exported (for native binaries) |
|
||||
|
||||
---
|
||||
|
||||
## Schema Evolution
|
||||
|
||||
### Migration Categories
|
||||
|
||||
Per `docs/db/SPECIFICATION.md`:
|
||||
|
||||
| Category | Prefix | Execution | Description |
|
||||
|----------|--------|-----------|-------------|
|
||||
| Startup (A) | `001-099` | Automatic at boot | Non-breaking DDL (CREATE IF NOT EXISTS) |
|
||||
| Release (B) | `100-199` | Manual via CLI | Breaking changes (requires maintenance window) |
|
||||
| Seed | `S001-S999` | After schema | Reference data with ON CONFLICT DO NOTHING |
|
||||
| Data (C) | `DM001-DM999` | Background job | Batched data transformations |
|
||||
|
||||
### Upcoming Migrations
|
||||
|
||||
| Migration | Category | Sprint | Description |
|
||||
|-----------|----------|--------|-------------|
|
||||
| `010_scanner_schema.sql` | Startup (A) | 3500.0002.0001 | Create scanner schema, scan_manifest, proof_bundle |
|
||||
| `011_call_graph_tables.sql` | Startup (A) | 3500.0003.0002 | Create cg_node, cg_edge, entrypoint |
|
||||
| `012_runtime_sample_partitions.sql` | Startup (A) | 3500.0003.0004 | Create runtime_sample with monthly partitions |
|
||||
| `S001_seed_edge_reasons.sql` | Seed | 3500.0003.0002 | Seed edge reason lookup table |
|
||||
|
||||
---
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
### Query Patterns
|
||||
|
||||
**High-frequency queries**:
|
||||
|
||||
1. **Scan manifest lookup by artifact**:
|
||||
```sql
|
||||
SELECT * FROM scanner.scan_manifest
|
||||
WHERE artifact_digest = $1
|
||||
ORDER BY created_at_utc DESC LIMIT 1;
|
||||
```
|
||||
- Index: `idx_scan_manifest_artifact`
|
||||
|
||||
2. **Reachability BFS (forward)**:
|
||||
```sql
|
||||
SELECT to_node_id FROM scanner.cg_edge
|
||||
WHERE scan_id = $1 AND from_node_id = ANY($2) AND kind = 1;
|
||||
```
|
||||
- Index: `idx_cg_edge_from`
|
||||
|
||||
3. **Reachability BFS (backward)**:
|
||||
```sql
|
||||
SELECT from_node_id FROM scanner.cg_edge
|
||||
WHERE scan_id = $1 AND to_node_id = $2 AND kind = 1;
|
||||
```
|
||||
- Index: `idx_cg_edge_to`
|
||||
|
||||
4. **Find runtime samples containing node**:
|
||||
```sql
|
||||
SELECT * FROM scanner.runtime_sample
|
||||
WHERE scan_id = $1 AND $2 = ANY(frames);
|
||||
```
|
||||
- Index: `idx_runtime_sample_frames` (GIN)
|
||||
|
||||
### Index Maintenance
|
||||
|
||||
**Reindex schedule**:
|
||||
- `cg_edge` indexes: Weekly (high churn)
|
||||
- `runtime_sample` GIN index: Monthly (after partition drops)
|
||||
|
||||
**Vacuum**:
|
||||
- Autovacuum enabled for all tables
|
||||
- Manual VACUUM ANALYZE after bulk inserts (>1M rows)
|
||||
|
||||
### Partition Management
|
||||
|
||||
**Automated partition creation** (cron job):
|
||||
|
||||
```sql
|
||||
-- Create next month's partition 7 days in advance
|
||||
CREATE TABLE IF NOT EXISTS scanner.runtime_sample_2025_02 PARTITION OF scanner.runtime_sample
|
||||
FOR VALUES FROM ('2025-02-01') TO ('2025-03-01');
|
||||
```
|
||||
|
||||
**Automated partition dropping** (90-day retention):
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS scanner.runtime_sample_2024_10; -- Older than 90 days
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Compliance & Auditing
|
||||
|
||||
### DSSE Signatures
|
||||
|
||||
All proof bundles and manifests include DSSE signatures:
|
||||
- `manifest_dsse_json` in `scan_manifest`
|
||||
- `proof_root_dsse_json` in `proof_bundle`
|
||||
|
||||
**Verification**:
|
||||
- Signatures verified on read using `IContentSigner.Verify`
|
||||
- Invalid signatures → reject proof bundle
|
||||
|
||||
### Immutability
|
||||
|
||||
**Immutable tables**:
|
||||
- `scan_manifest` — No updates allowed after insert
|
||||
- `proof_bundle` — No updates allowed after insert
|
||||
|
||||
**Enforcement**: Application-level (no UPDATE grants in production)
|
||||
|
||||
### Retention Policies
|
||||
|
||||
| Table | Retention | Enforcement |
|
||||
|-------|-----------|-------------|
|
||||
| `scan_manifest` | 180 days | DELETE WHERE created_at_utc < NOW() - INTERVAL '180 days' |
|
||||
| `proof_bundle` | 365 days | DELETE WHERE created_at_utc < NOW() - INTERVAL '365 days' |
|
||||
| `cg_node` | 90 days | CASCADE delete on scan_manifest |
|
||||
| `cg_edge` | 90 days | CASCADE delete on scan_manifest |
|
||||
| `runtime_sample` | 90 days | DROP PARTITION (monthly) |
|
||||
|
||||
---
|
||||
|
||||
## Monitoring
|
||||
|
||||
### Key Metrics
|
||||
|
||||
1. **Table sizes**:
|
||||
```sql
|
||||
SELECT schemaname, tablename, pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename))
|
||||
FROM pg_tables WHERE schemaname = 'scanner';
|
||||
```
|
||||
|
||||
2. **Index usage**:
|
||||
```sql
|
||||
SELECT indexrelname, idx_scan, idx_tup_read, idx_tup_fetch
|
||||
FROM pg_stat_user_indexes
|
||||
WHERE schemaname = 'scanner'
|
||||
ORDER BY idx_scan DESC;
|
||||
```
|
||||
|
||||
3. **Partition sizes**:
|
||||
```sql
|
||||
SELECT tablename, pg_size_pretty(pg_total_relation_size('scanner.'||tablename))
|
||||
FROM pg_tables
|
||||
WHERE schemaname = 'scanner' AND tablename LIKE 'runtime_sample_%'
|
||||
ORDER BY tablename DESC;
|
||||
```
|
||||
|
||||
### Alerts
|
||||
|
||||
- **Table growth**: Alert if `cg_edge` >10GB per scan
|
||||
- **Index bloat**: Alert if index size >2x expected
|
||||
- **Partition creation**: Alert if next month's partition not created 7 days in advance
|
||||
- **Vacuum lag**: Alert if last autovacuum >7 days
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- `docs/07_HIGH_LEVEL_ARCHITECTURE.md` — Schema isolation design
|
||||
- `docs/db/SPECIFICATION.md` — Database specification
|
||||
- `docs/operations/postgresql-guide.md` — Operations guide
|
||||
- `SPRINT_3500_0002_0001_score_proofs_foundations.md` — Implementation sprint
|
||||
- `SPRINT_3500_0003_0002_reachability_dotnet_call_graphs.md` — Call-graph implementation
|
||||
|
||||
---
|
||||
|
||||
**Last Updated**: 2025-12-17
|
||||
**Schema Version**: 1.0
|
||||
**Next Review**: Sprint 3500.0003.0004 (partition strategy)
|
||||
Reference in New Issue
Block a user