Files
git.stella-ops.org/docs/db/schemas/signals.sql
StellaOps Bot b058dbe031 up
2025-12-14 23:20:14 +02:00

445 lines
17 KiB
PL/PgSQL

-- =============================================================================
-- SIGNALS SCHEMA - Call Graph Relational Tables
-- Version: V3102_001
-- Sprint: SPRINT_3102_0001_0001
-- =============================================================================
CREATE SCHEMA IF NOT EXISTS signals;
-- =============================================================================
-- SCAN TRACKING
-- =============================================================================
-- Tracks scan context for call graph analysis
CREATE TABLE signals.scans (
scan_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
artifact_digest TEXT NOT NULL,
repo_uri TEXT,
commit_sha TEXT,
sbom_digest TEXT,
policy_digest TEXT,
status TEXT NOT NULL DEFAULT 'pending'
CHECK (status IN ('pending', 'processing', 'completed', 'failed')),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
completed_at TIMESTAMPTZ,
error_message TEXT,
-- Composite index for cache lookups
CONSTRAINT scans_artifact_sbom_unique UNIQUE (artifact_digest, sbom_digest)
);
CREATE INDEX idx_scans_status ON signals.scans(status);
CREATE INDEX idx_scans_artifact ON signals.scans(artifact_digest);
CREATE INDEX idx_scans_commit ON signals.scans(commit_sha) WHERE commit_sha IS NOT NULL;
CREATE INDEX idx_scans_created ON signals.scans(created_at DESC);
-- =============================================================================
-- ARTIFACTS
-- =============================================================================
-- Individual artifacts (assemblies, JARs, modules) within a scan
CREATE TABLE signals.artifacts (
artifact_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
artifact_key TEXT NOT NULL,
kind TEXT NOT NULL CHECK (kind IN ('assembly', 'jar', 'module', 'binary', 'script')),
sha256 TEXT NOT NULL,
purl TEXT,
build_id TEXT,
file_path TEXT,
size_bytes BIGINT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT artifacts_scan_key_unique UNIQUE (scan_id, artifact_key)
);
CREATE INDEX idx_artifacts_scan ON signals.artifacts(scan_id);
CREATE INDEX idx_artifacts_sha256 ON signals.artifacts(sha256);
CREATE INDEX idx_artifacts_purl ON signals.artifacts(purl) WHERE purl IS NOT NULL;
CREATE INDEX idx_artifacts_build_id ON signals.artifacts(build_id) WHERE build_id IS NOT NULL;
-- =============================================================================
-- CALL GRAPH NODES
-- =============================================================================
-- Individual nodes (symbols) in call graphs
CREATE TABLE signals.cg_nodes (
id BIGSERIAL PRIMARY KEY,
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
node_id TEXT NOT NULL,
artifact_key TEXT,
symbol_key TEXT NOT NULL,
visibility TEXT NOT NULL DEFAULT 'unknown'
CHECK (visibility IN ('public', 'internal', 'protected', 'private', 'unknown')),
is_entrypoint_candidate BOOLEAN NOT NULL DEFAULT FALSE,
purl TEXT,
symbol_digest TEXT,
flags INT NOT NULL DEFAULT 0,
attributes JSONB,
CONSTRAINT cg_nodes_scan_node_unique UNIQUE (scan_id, node_id)
);
-- Primary lookup indexes
CREATE INDEX idx_cg_nodes_scan ON signals.cg_nodes(scan_id);
CREATE INDEX idx_cg_nodes_symbol_key ON signals.cg_nodes(symbol_key);
CREATE INDEX idx_cg_nodes_purl ON signals.cg_nodes(purl) WHERE purl IS NOT NULL;
CREATE INDEX idx_cg_nodes_entrypoint ON signals.cg_nodes(scan_id, is_entrypoint_candidate)
WHERE is_entrypoint_candidate = TRUE;
-- Full-text search on symbol keys
CREATE INDEX idx_cg_nodes_symbol_fts ON signals.cg_nodes
USING gin(to_tsvector('simple', symbol_key));
-- =============================================================================
-- CALL GRAPH EDGES
-- =============================================================================
-- Call edges between nodes
CREATE TABLE signals.cg_edges (
id BIGSERIAL PRIMARY KEY,
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
from_node_id TEXT NOT NULL,
to_node_id TEXT NOT NULL,
kind SMALLINT NOT NULL DEFAULT 0, -- 0=static, 1=heuristic, 2=runtime
reason SMALLINT NOT NULL DEFAULT 0, -- EdgeReason enum value
weight REAL NOT NULL DEFAULT 1.0,
offset_bytes INT,
is_resolved BOOLEAN NOT NULL DEFAULT TRUE,
provenance TEXT,
-- Composite unique constraint
CONSTRAINT cg_edges_unique UNIQUE (scan_id, from_node_id, to_node_id, kind, reason)
);
-- Traversal indexes (critical for reachability queries)
CREATE INDEX idx_cg_edges_scan ON signals.cg_edges(scan_id);
CREATE INDEX idx_cg_edges_from ON signals.cg_edges(scan_id, from_node_id);
CREATE INDEX idx_cg_edges_to ON signals.cg_edges(scan_id, to_node_id);
-- Covering index for common traversal pattern
CREATE INDEX idx_cg_edges_traversal ON signals.cg_edges(scan_id, from_node_id)
INCLUDE (to_node_id, kind, weight);
-- =============================================================================
-- ENTRYPOINTS
-- =============================================================================
-- Framework-aware entrypoints
CREATE TABLE signals.entrypoints (
id BIGSERIAL PRIMARY KEY,
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
node_id TEXT NOT NULL,
kind TEXT NOT NULL CHECK (kind IN (
'http', 'grpc', 'cli', 'job', 'event', 'message_queue',
'timer', 'test', 'main', 'module_init', 'static_constructor', 'unknown'
)),
framework TEXT,
route TEXT,
http_method TEXT,
phase TEXT NOT NULL DEFAULT 'runtime'
CHECK (phase IN ('module_init', 'app_start', 'runtime', 'shutdown')),
order_idx INT NOT NULL DEFAULT 0,
CONSTRAINT entrypoints_scan_node_unique UNIQUE (scan_id, node_id, kind)
);
CREATE INDEX idx_entrypoints_scan ON signals.entrypoints(scan_id);
CREATE INDEX idx_entrypoints_kind ON signals.entrypoints(kind);
CREATE INDEX idx_entrypoints_route ON signals.entrypoints(route) WHERE route IS NOT NULL;
-- =============================================================================
-- SYMBOL-TO-COMPONENT MAPPING
-- =============================================================================
-- Maps symbols to SBOM components (for vuln correlation)
CREATE TABLE signals.symbol_component_map (
id BIGSERIAL PRIMARY KEY,
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
node_id TEXT NOT NULL,
purl TEXT NOT NULL,
mapping_kind TEXT NOT NULL CHECK (mapping_kind IN (
'exact', 'assembly', 'namespace', 'heuristic'
)),
confidence REAL NOT NULL DEFAULT 1.0,
evidence JSONB,
CONSTRAINT symbol_component_map_unique UNIQUE (scan_id, node_id, purl)
);
CREATE INDEX idx_symbol_component_scan ON signals.symbol_component_map(scan_id);
CREATE INDEX idx_symbol_component_purl ON signals.symbol_component_map(purl);
CREATE INDEX idx_symbol_component_node ON signals.symbol_component_map(scan_id, node_id);
-- =============================================================================
-- REACHABILITY RESULTS
-- =============================================================================
-- Component-level reachability status
CREATE TABLE signals.reachability_components (
id BIGSERIAL PRIMARY KEY,
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
purl TEXT NOT NULL,
status SMALLINT NOT NULL DEFAULT 0, -- ReachabilityStatus enum
lattice_state TEXT,
confidence REAL NOT NULL DEFAULT 0,
why JSONB,
evidence JSONB,
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT reachability_components_unique UNIQUE (scan_id, purl)
);
CREATE INDEX idx_reachability_components_scan ON signals.reachability_components(scan_id);
CREATE INDEX idx_reachability_components_purl ON signals.reachability_components(purl);
CREATE INDEX idx_reachability_components_status ON signals.reachability_components(status);
-- CVE-level reachability findings
CREATE TABLE signals.reachability_findings (
id BIGSERIAL PRIMARY KEY,
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
cve_id TEXT NOT NULL,
purl TEXT NOT NULL,
status SMALLINT NOT NULL DEFAULT 0,
lattice_state TEXT,
confidence REAL NOT NULL DEFAULT 0,
path_witness TEXT[],
why JSONB,
evidence JSONB,
spine_id UUID, -- Reference to proof spine
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT reachability_findings_unique UNIQUE (scan_id, cve_id, purl)
);
CREATE INDEX idx_reachability_findings_scan ON signals.reachability_findings(scan_id);
CREATE INDEX idx_reachability_findings_cve ON signals.reachability_findings(cve_id);
CREATE INDEX idx_reachability_findings_purl ON signals.reachability_findings(purl);
CREATE INDEX idx_reachability_findings_status ON signals.reachability_findings(status);
-- =============================================================================
-- RUNTIME SAMPLES
-- =============================================================================
-- Stack trace samples from runtime evidence
CREATE TABLE signals.runtime_samples (
id BIGSERIAL PRIMARY KEY,
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
collected_at TIMESTAMPTZ NOT NULL,
env_hash TEXT,
timestamp TIMESTAMPTZ NOT NULL,
pid INT,
thread_id INT,
frames TEXT[] NOT NULL,
weight REAL NOT NULL DEFAULT 1.0,
container_id TEXT,
pod_name TEXT
);
CREATE INDEX idx_runtime_samples_scan ON signals.runtime_samples(scan_id);
CREATE INDEX idx_runtime_samples_collected ON signals.runtime_samples(collected_at DESC);
-- GIN index for frame array searches
CREATE INDEX idx_runtime_samples_frames ON signals.runtime_samples USING gin(frames);
-- =============================================================================
-- DEPLOYMENT REFERENCES (for popularity scoring)
-- =============================================================================
CREATE TABLE signals.deploy_refs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
purl TEXT NOT NULL,
image_id TEXT NOT NULL,
environment TEXT,
first_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
last_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE (purl, image_id, environment)
);
CREATE INDEX idx_deploy_refs_purl ON signals.deploy_refs(purl);
CREATE INDEX idx_deploy_refs_last_seen ON signals.deploy_refs(last_seen_at);
-- =============================================================================
-- GRAPH METRICS (for centrality scoring)
-- =============================================================================
CREATE TABLE signals.graph_metrics (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
symbol_id TEXT NOT NULL,
callgraph_id TEXT NOT NULL,
degree INT NOT NULL DEFAULT 0,
betweenness FLOAT NOT NULL DEFAULT 0,
last_computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE (symbol_id, callgraph_id)
);
CREATE INDEX idx_graph_metrics_symbol ON signals.graph_metrics(symbol_id);
-- =============================================================================
-- UNKNOWNS TRACKING (enhanced for scoring)
-- =============================================================================
CREATE TABLE signals.unknowns (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
subject_key TEXT NOT NULL,
callgraph_id TEXT,
symbol_id TEXT,
code_id TEXT,
purl TEXT,
purl_version TEXT,
edge_from TEXT,
edge_to TEXT,
reason TEXT,
-- Scoring factors
popularity_score FLOAT DEFAULT 0,
deployment_count INT DEFAULT 0,
exploit_potential_score FLOAT DEFAULT 0,
uncertainty_score FLOAT DEFAULT 0,
centrality_score FLOAT DEFAULT 0,
degree_centrality INT DEFAULT 0,
betweenness_centrality FLOAT DEFAULT 0,
staleness_score FLOAT DEFAULT 0,
days_since_last_analysis INT DEFAULT 0,
-- Composite score and band
score FLOAT DEFAULT 0,
band TEXT DEFAULT 'cold' CHECK (band IN ('hot', 'warm', 'cold')),
-- Flags and traces
flags JSONB DEFAULT '{}',
normalization_trace JSONB,
graph_slice_hash TEXT,
evidence_set_hash TEXT,
callgraph_attempt_hash TEXT,
-- Rescan tracking
rescan_attempts INT DEFAULT 0,
last_rescan_result TEXT,
next_scheduled_rescan TIMESTAMPTZ,
last_analyzed_at TIMESTAMPTZ,
-- Timestamps
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_unknowns_subject ON signals.unknowns(subject_key);
CREATE INDEX idx_unknowns_band ON signals.unknowns(band);
CREATE INDEX idx_unknowns_score ON signals.unknowns(score DESC);
CREATE INDEX idx_unknowns_next_rescan ON signals.unknowns(next_scheduled_rescan) WHERE next_scheduled_rescan IS NOT NULL;
-- =============================================================================
-- MATERIALIZED VIEWS FOR ANALYTICS
-- =============================================================================
-- Daily scan statistics
CREATE MATERIALIZED VIEW signals.scan_stats_daily AS
SELECT
DATE_TRUNC('day', created_at) AS day,
COUNT(*) AS total_scans,
COUNT(*) FILTER (WHERE status = 'completed') AS completed_scans,
COUNT(*) FILTER (WHERE status = 'failed') AS failed_scans,
AVG(EXTRACT(EPOCH FROM (completed_at - created_at))) FILTER (WHERE status = 'completed') AS avg_duration_seconds
FROM signals.scans
GROUP BY DATE_TRUNC('day', created_at)
ORDER BY day DESC;
CREATE UNIQUE INDEX idx_scan_stats_daily_day ON signals.scan_stats_daily(day);
-- CVE reachability summary
CREATE MATERIALIZED VIEW signals.cve_reachability_summary AS
SELECT
cve_id,
COUNT(DISTINCT scan_id) AS affected_scans,
COUNT(DISTINCT purl) AS affected_components,
COUNT(*) FILTER (WHERE status = 2) AS reachable_count, -- REACHABLE_STATIC
COUNT(*) FILTER (WHERE status = 3) AS proven_count, -- REACHABLE_PROVEN
COUNT(*) FILTER (WHERE status = 0) AS unreachable_count,
AVG(confidence) AS avg_confidence,
MAX(computed_at) AS last_updated
FROM signals.reachability_findings
GROUP BY cve_id;
CREATE UNIQUE INDEX idx_cve_reachability_summary_cve ON signals.cve_reachability_summary(cve_id);
-- Refresh function
CREATE OR REPLACE FUNCTION signals.refresh_analytics_views()
RETURNS void AS $$
BEGIN
REFRESH MATERIALIZED VIEW CONCURRENTLY signals.scan_stats_daily;
REFRESH MATERIALIZED VIEW CONCURRENTLY signals.cve_reachability_summary;
END;
$$ LANGUAGE plpgsql;
-- =============================================================================
-- PROOF SPINE TABLES (SPRINT_3100)
-- =============================================================================
-- Schema for proof spine storage
CREATE SCHEMA IF NOT EXISTS scanner;
-- Main proof spine table
CREATE TABLE scanner.proof_spines (
spine_id TEXT PRIMARY KEY,
artifact_id TEXT NOT NULL,
vuln_id TEXT NOT NULL,
policy_profile_id TEXT NOT NULL,
verdict TEXT NOT NULL CHECK (verdict IN ('not_affected', 'affected', 'fixed', 'under_investigation')),
verdict_reason TEXT,
root_hash TEXT NOT NULL,
scan_run_id UUID NOT NULL,
segment_count INT NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
superseded_by_spine_id TEXT REFERENCES scanner.proof_spines(spine_id),
-- Deterministic spine ID = hash(artifact_id + vuln_id + policy_profile_id + root_hash)
CONSTRAINT proof_spines_unique_decision UNIQUE (artifact_id, vuln_id, policy_profile_id, root_hash)
);
-- Composite index for common lookups
CREATE INDEX idx_proof_spines_lookup
ON scanner.proof_spines(artifact_id, vuln_id, policy_profile_id);
CREATE INDEX idx_proof_spines_scan_run
ON scanner.proof_spines(scan_run_id);
CREATE INDEX idx_proof_spines_created
ON scanner.proof_spines(created_at DESC);
-- Individual segments within a spine
CREATE TABLE scanner.proof_segments (
segment_id TEXT PRIMARY KEY,
spine_id TEXT NOT NULL REFERENCES scanner.proof_spines(spine_id) ON DELETE CASCADE,
idx INT NOT NULL,
segment_type TEXT NOT NULL CHECK (segment_type IN (
'SBOM_SLICE', 'MATCH', 'REACHABILITY',
'GUARD_ANALYSIS', 'RUNTIME_OBSERVATION', 'POLICY_EVAL'
)),
input_hash TEXT NOT NULL,
result_hash TEXT NOT NULL,
prev_segment_hash TEXT,
envelope BYTEA NOT NULL, -- DSSE envelope (JSON or CBOR)
tool_id TEXT NOT NULL,
tool_version TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending' CHECK (status IN (
'pending', 'verified', 'partial', 'invalid', 'untrusted'
)),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT proof_segments_unique_idx UNIQUE (spine_id, idx)
);
CREATE INDEX idx_proof_segments_spine ON scanner.proof_segments(spine_id);
CREATE INDEX idx_proof_segments_type ON scanner.proof_segments(segment_type);
-- Audit trail for spine supersession
CREATE TABLE scanner.proof_spine_history (
history_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
spine_id TEXT NOT NULL REFERENCES scanner.proof_spines(spine_id),
action TEXT NOT NULL CHECK (action IN ('created', 'superseded', 'verified', 'invalidated')),
actor TEXT,
reason TEXT,
occurred_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_proof_spine_history_spine ON scanner.proof_spine_history(spine_id);