-- ============================================================================= -- SIGNALS SCHEMA - Call Graph Relational Tables -- Version: V3102_001 -- Sprint: SPRINT_3102_0001_0001 -- ============================================================================= CREATE SCHEMA IF NOT EXISTS signals; -- ============================================================================= -- SCAN TRACKING -- ============================================================================= -- Tracks scan context for call graph analysis CREATE TABLE signals.scans ( scan_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), artifact_digest TEXT NOT NULL, repo_uri TEXT, commit_sha TEXT, sbom_digest TEXT, policy_digest TEXT, status TEXT NOT NULL DEFAULT 'pending' CHECK (status IN ('pending', 'processing', 'completed', 'failed')), created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), completed_at TIMESTAMPTZ, error_message TEXT, -- Composite index for cache lookups CONSTRAINT scans_artifact_sbom_unique UNIQUE (artifact_digest, sbom_digest) ); CREATE INDEX idx_scans_status ON signals.scans(status); CREATE INDEX idx_scans_artifact ON signals.scans(artifact_digest); CREATE INDEX idx_scans_commit ON signals.scans(commit_sha) WHERE commit_sha IS NOT NULL; CREATE INDEX idx_scans_created ON signals.scans(created_at DESC); -- ============================================================================= -- ARTIFACTS -- ============================================================================= -- Individual artifacts (assemblies, JARs, modules) within a scan CREATE TABLE signals.artifacts ( artifact_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE, artifact_key TEXT NOT NULL, kind TEXT NOT NULL CHECK (kind IN ('assembly', 'jar', 'module', 'binary', 'script')), sha256 TEXT NOT NULL, purl TEXT, build_id TEXT, file_path TEXT, size_bytes BIGINT, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), CONSTRAINT artifacts_scan_key_unique UNIQUE (scan_id, artifact_key) ); CREATE INDEX idx_artifacts_scan ON signals.artifacts(scan_id); CREATE INDEX idx_artifacts_sha256 ON signals.artifacts(sha256); CREATE INDEX idx_artifacts_purl ON signals.artifacts(purl) WHERE purl IS NOT NULL; CREATE INDEX idx_artifacts_build_id ON signals.artifacts(build_id) WHERE build_id IS NOT NULL; -- ============================================================================= -- CALL GRAPH NODES -- ============================================================================= -- Individual nodes (symbols) in call graphs CREATE TABLE signals.cg_nodes ( id BIGSERIAL PRIMARY KEY, scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE, node_id TEXT NOT NULL, artifact_key TEXT, symbol_key TEXT NOT NULL, visibility TEXT NOT NULL DEFAULT 'unknown' CHECK (visibility IN ('public', 'internal', 'protected', 'private', 'unknown')), is_entrypoint_candidate BOOLEAN NOT NULL DEFAULT FALSE, purl TEXT, symbol_digest TEXT, flags INT NOT NULL DEFAULT 0, attributes JSONB, CONSTRAINT cg_nodes_scan_node_unique UNIQUE (scan_id, node_id) ); -- Primary lookup indexes CREATE INDEX idx_cg_nodes_scan ON signals.cg_nodes(scan_id); CREATE INDEX idx_cg_nodes_symbol_key ON signals.cg_nodes(symbol_key); CREATE INDEX idx_cg_nodes_purl ON signals.cg_nodes(purl) WHERE purl IS NOT NULL; CREATE INDEX idx_cg_nodes_entrypoint ON signals.cg_nodes(scan_id, is_entrypoint_candidate) WHERE is_entrypoint_candidate = TRUE; -- Full-text search on symbol keys CREATE INDEX idx_cg_nodes_symbol_fts ON signals.cg_nodes USING gin(to_tsvector('simple', symbol_key)); -- ============================================================================= -- CALL GRAPH EDGES -- ============================================================================= -- Call edges between nodes CREATE TABLE signals.cg_edges ( id BIGSERIAL PRIMARY KEY, scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE, from_node_id TEXT NOT NULL, to_node_id TEXT NOT NULL, kind SMALLINT NOT NULL DEFAULT 0, -- 0=static, 1=heuristic, 2=runtime reason SMALLINT NOT NULL DEFAULT 0, -- EdgeReason enum value weight REAL NOT NULL DEFAULT 1.0, offset_bytes INT, is_resolved BOOLEAN NOT NULL DEFAULT TRUE, provenance TEXT, -- Composite unique constraint CONSTRAINT cg_edges_unique UNIQUE (scan_id, from_node_id, to_node_id, kind, reason) ); -- Traversal indexes (critical for reachability queries) CREATE INDEX idx_cg_edges_scan ON signals.cg_edges(scan_id); CREATE INDEX idx_cg_edges_from ON signals.cg_edges(scan_id, from_node_id); CREATE INDEX idx_cg_edges_to ON signals.cg_edges(scan_id, to_node_id); -- Covering index for common traversal pattern CREATE INDEX idx_cg_edges_traversal ON signals.cg_edges(scan_id, from_node_id) INCLUDE (to_node_id, kind, weight); -- ============================================================================= -- ENTRYPOINTS -- ============================================================================= -- Framework-aware entrypoints CREATE TABLE signals.entrypoints ( id BIGSERIAL PRIMARY KEY, scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE, node_id TEXT NOT NULL, kind TEXT NOT NULL CHECK (kind IN ( 'http', 'grpc', 'cli', 'job', 'event', 'message_queue', 'timer', 'test', 'main', 'module_init', 'static_constructor', 'unknown' )), framework TEXT, route TEXT, http_method TEXT, phase TEXT NOT NULL DEFAULT 'runtime' CHECK (phase IN ('module_init', 'app_start', 'runtime', 'shutdown')), order_idx INT NOT NULL DEFAULT 0, CONSTRAINT entrypoints_scan_node_unique UNIQUE (scan_id, node_id, kind) ); CREATE INDEX idx_entrypoints_scan ON signals.entrypoints(scan_id); CREATE INDEX idx_entrypoints_kind ON signals.entrypoints(kind); CREATE INDEX idx_entrypoints_route ON signals.entrypoints(route) WHERE route IS NOT NULL; -- ============================================================================= -- SYMBOL-TO-COMPONENT MAPPING -- ============================================================================= -- Maps symbols to SBOM components (for vuln correlation) CREATE TABLE signals.symbol_component_map ( id BIGSERIAL PRIMARY KEY, scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE, node_id TEXT NOT NULL, purl TEXT NOT NULL, mapping_kind TEXT NOT NULL CHECK (mapping_kind IN ( 'exact', 'assembly', 'namespace', 'heuristic' )), confidence REAL NOT NULL DEFAULT 1.0, evidence JSONB, CONSTRAINT symbol_component_map_unique UNIQUE (scan_id, node_id, purl) ); CREATE INDEX idx_symbol_component_scan ON signals.symbol_component_map(scan_id); CREATE INDEX idx_symbol_component_purl ON signals.symbol_component_map(purl); CREATE INDEX idx_symbol_component_node ON signals.symbol_component_map(scan_id, node_id); -- ============================================================================= -- REACHABILITY RESULTS -- ============================================================================= -- Component-level reachability status CREATE TABLE signals.reachability_components ( id BIGSERIAL PRIMARY KEY, scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE, purl TEXT NOT NULL, status SMALLINT NOT NULL DEFAULT 0, -- ReachabilityStatus enum lattice_state TEXT, confidence REAL NOT NULL DEFAULT 0, why JSONB, evidence JSONB, computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), CONSTRAINT reachability_components_unique UNIQUE (scan_id, purl) ); CREATE INDEX idx_reachability_components_scan ON signals.reachability_components(scan_id); CREATE INDEX idx_reachability_components_purl ON signals.reachability_components(purl); CREATE INDEX idx_reachability_components_status ON signals.reachability_components(status); -- CVE-level reachability findings CREATE TABLE signals.reachability_findings ( id BIGSERIAL PRIMARY KEY, scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE, cve_id TEXT NOT NULL, purl TEXT NOT NULL, status SMALLINT NOT NULL DEFAULT 0, lattice_state TEXT, confidence REAL NOT NULL DEFAULT 0, path_witness TEXT[], why JSONB, evidence JSONB, spine_id UUID, -- Reference to proof spine computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), CONSTRAINT reachability_findings_unique UNIQUE (scan_id, cve_id, purl) ); CREATE INDEX idx_reachability_findings_scan ON signals.reachability_findings(scan_id); CREATE INDEX idx_reachability_findings_cve ON signals.reachability_findings(cve_id); CREATE INDEX idx_reachability_findings_purl ON signals.reachability_findings(purl); CREATE INDEX idx_reachability_findings_status ON signals.reachability_findings(status); -- ============================================================================= -- RUNTIME SAMPLES -- ============================================================================= -- Stack trace samples from runtime evidence CREATE TABLE signals.runtime_samples ( id BIGSERIAL PRIMARY KEY, scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE, collected_at TIMESTAMPTZ NOT NULL, env_hash TEXT, timestamp TIMESTAMPTZ NOT NULL, pid INT, thread_id INT, frames TEXT[] NOT NULL, weight REAL NOT NULL DEFAULT 1.0, container_id TEXT, pod_name TEXT ); CREATE INDEX idx_runtime_samples_scan ON signals.runtime_samples(scan_id); CREATE INDEX idx_runtime_samples_collected ON signals.runtime_samples(collected_at DESC); -- GIN index for frame array searches CREATE INDEX idx_runtime_samples_frames ON signals.runtime_samples USING gin(frames); -- ============================================================================= -- DEPLOYMENT REFERENCES (for popularity scoring) -- ============================================================================= CREATE TABLE signals.deploy_refs ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), purl TEXT NOT NULL, image_id TEXT NOT NULL, environment TEXT, first_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), last_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), UNIQUE (purl, image_id, environment) ); CREATE INDEX idx_deploy_refs_purl ON signals.deploy_refs(purl); CREATE INDEX idx_deploy_refs_last_seen ON signals.deploy_refs(last_seen_at); -- ============================================================================= -- GRAPH METRICS (for centrality scoring) -- ============================================================================= CREATE TABLE signals.graph_metrics ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), symbol_id TEXT NOT NULL, callgraph_id TEXT NOT NULL, degree INT NOT NULL DEFAULT 0, betweenness FLOAT NOT NULL DEFAULT 0, last_computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), UNIQUE (symbol_id, callgraph_id) ); CREATE INDEX idx_graph_metrics_symbol ON signals.graph_metrics(symbol_id); -- ============================================================================= -- UNKNOWNS TRACKING (enhanced for scoring) -- ============================================================================= CREATE TABLE signals.unknowns ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), subject_key TEXT NOT NULL, callgraph_id TEXT, symbol_id TEXT, code_id TEXT, purl TEXT, purl_version TEXT, edge_from TEXT, edge_to TEXT, reason TEXT, -- Scoring factors popularity_score FLOAT DEFAULT 0, deployment_count INT DEFAULT 0, exploit_potential_score FLOAT DEFAULT 0, uncertainty_score FLOAT DEFAULT 0, centrality_score FLOAT DEFAULT 0, degree_centrality INT DEFAULT 0, betweenness_centrality FLOAT DEFAULT 0, staleness_score FLOAT DEFAULT 0, days_since_last_analysis INT DEFAULT 0, -- Composite score and band score FLOAT DEFAULT 0, band TEXT DEFAULT 'cold' CHECK (band IN ('hot', 'warm', 'cold')), -- Flags and traces flags JSONB DEFAULT '{}', normalization_trace JSONB, graph_slice_hash TEXT, evidence_set_hash TEXT, callgraph_attempt_hash TEXT, -- Rescan tracking rescan_attempts INT DEFAULT 0, last_rescan_result TEXT, next_scheduled_rescan TIMESTAMPTZ, last_analyzed_at TIMESTAMPTZ, -- Timestamps created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); CREATE INDEX idx_unknowns_subject ON signals.unknowns(subject_key); CREATE INDEX idx_unknowns_band ON signals.unknowns(band); CREATE INDEX idx_unknowns_score ON signals.unknowns(score DESC); CREATE INDEX idx_unknowns_next_rescan ON signals.unknowns(next_scheduled_rescan) WHERE next_scheduled_rescan IS NOT NULL; -- ============================================================================= -- MATERIALIZED VIEWS FOR ANALYTICS -- ============================================================================= -- Daily scan statistics CREATE MATERIALIZED VIEW signals.scan_stats_daily AS SELECT DATE_TRUNC('day', created_at) AS day, COUNT(*) AS total_scans, COUNT(*) FILTER (WHERE status = 'completed') AS completed_scans, COUNT(*) FILTER (WHERE status = 'failed') AS failed_scans, AVG(EXTRACT(EPOCH FROM (completed_at - created_at))) FILTER (WHERE status = 'completed') AS avg_duration_seconds FROM signals.scans GROUP BY DATE_TRUNC('day', created_at) ORDER BY day DESC; CREATE UNIQUE INDEX idx_scan_stats_daily_day ON signals.scan_stats_daily(day); -- CVE reachability summary CREATE MATERIALIZED VIEW signals.cve_reachability_summary AS SELECT cve_id, COUNT(DISTINCT scan_id) AS affected_scans, COUNT(DISTINCT purl) AS affected_components, COUNT(*) FILTER (WHERE status = 2) AS reachable_count, -- REACHABLE_STATIC COUNT(*) FILTER (WHERE status = 3) AS proven_count, -- REACHABLE_PROVEN COUNT(*) FILTER (WHERE status = 0) AS unreachable_count, AVG(confidence) AS avg_confidence, MAX(computed_at) AS last_updated FROM signals.reachability_findings GROUP BY cve_id; CREATE UNIQUE INDEX idx_cve_reachability_summary_cve ON signals.cve_reachability_summary(cve_id); -- Refresh function CREATE OR REPLACE FUNCTION signals.refresh_analytics_views() RETURNS void AS $$ BEGIN REFRESH MATERIALIZED VIEW CONCURRENTLY signals.scan_stats_daily; REFRESH MATERIALIZED VIEW CONCURRENTLY signals.cve_reachability_summary; END; $$ LANGUAGE plpgsql; -- ============================================================================= -- PROOF SPINE TABLES (SPRINT_3100) -- ============================================================================= -- Schema for proof spine storage CREATE SCHEMA IF NOT EXISTS scanner; -- Main proof spine table CREATE TABLE scanner.proof_spines ( spine_id TEXT PRIMARY KEY, artifact_id TEXT NOT NULL, vuln_id TEXT NOT NULL, policy_profile_id TEXT NOT NULL, verdict TEXT NOT NULL CHECK (verdict IN ('not_affected', 'affected', 'fixed', 'under_investigation')), verdict_reason TEXT, root_hash TEXT NOT NULL, scan_run_id UUID NOT NULL, segment_count INT NOT NULL, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), superseded_by_spine_id TEXT REFERENCES scanner.proof_spines(spine_id), -- Deterministic spine ID = hash(artifact_id + vuln_id + policy_profile_id + root_hash) CONSTRAINT proof_spines_unique_decision UNIQUE (artifact_id, vuln_id, policy_profile_id, root_hash) ); -- Composite index for common lookups CREATE INDEX idx_proof_spines_lookup ON scanner.proof_spines(artifact_id, vuln_id, policy_profile_id); CREATE INDEX idx_proof_spines_scan_run ON scanner.proof_spines(scan_run_id); CREATE INDEX idx_proof_spines_created ON scanner.proof_spines(created_at DESC); -- Individual segments within a spine CREATE TABLE scanner.proof_segments ( segment_id TEXT PRIMARY KEY, spine_id TEXT NOT NULL REFERENCES scanner.proof_spines(spine_id) ON DELETE CASCADE, idx INT NOT NULL, segment_type TEXT NOT NULL CHECK (segment_type IN ( 'SBOM_SLICE', 'MATCH', 'REACHABILITY', 'GUARD_ANALYSIS', 'RUNTIME_OBSERVATION', 'POLICY_EVAL' )), input_hash TEXT NOT NULL, result_hash TEXT NOT NULL, prev_segment_hash TEXT, envelope BYTEA NOT NULL, -- DSSE envelope (JSON or CBOR) tool_id TEXT NOT NULL, tool_version TEXT NOT NULL, status TEXT NOT NULL DEFAULT 'pending' CHECK (status IN ( 'pending', 'verified', 'partial', 'invalid', 'untrusted' )), created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), CONSTRAINT proof_segments_unique_idx UNIQUE (spine_id, idx) ); CREATE INDEX idx_proof_segments_spine ON scanner.proof_segments(spine_id); CREATE INDEX idx_proof_segments_type ON scanner.proof_segments(segment_type); -- Audit trail for spine supersession CREATE TABLE scanner.proof_spine_history ( history_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), spine_id TEXT NOT NULL REFERENCES scanner.proof_spines(spine_id), action TEXT NOT NULL CHECK (action IN ('created', 'superseded', 'verified', 'invalidated')), actor TEXT, reason TEXT, occurred_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); CREATE INDEX idx_proof_spine_history_spine ON scanner.proof_spine_history(spine_id);