445 lines
17 KiB
PL/PgSQL
445 lines
17 KiB
PL/PgSQL
-- =============================================================================
|
|
-- SIGNALS SCHEMA - Call Graph Relational Tables
|
|
-- Version: V3102_001
|
|
-- Sprint: SPRINT_3102_0001_0001
|
|
-- =============================================================================
|
|
|
|
CREATE SCHEMA IF NOT EXISTS signals;
|
|
|
|
-- =============================================================================
|
|
-- SCAN TRACKING
|
|
-- =============================================================================
|
|
|
|
-- Tracks scan context for call graph analysis
|
|
CREATE TABLE signals.scans (
|
|
scan_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
artifact_digest TEXT NOT NULL,
|
|
repo_uri TEXT,
|
|
commit_sha TEXT,
|
|
sbom_digest TEXT,
|
|
policy_digest TEXT,
|
|
status TEXT NOT NULL DEFAULT 'pending'
|
|
CHECK (status IN ('pending', 'processing', 'completed', 'failed')),
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
completed_at TIMESTAMPTZ,
|
|
error_message TEXT,
|
|
|
|
-- Composite index for cache lookups
|
|
CONSTRAINT scans_artifact_sbom_unique UNIQUE (artifact_digest, sbom_digest)
|
|
);
|
|
|
|
CREATE INDEX idx_scans_status ON signals.scans(status);
|
|
CREATE INDEX idx_scans_artifact ON signals.scans(artifact_digest);
|
|
CREATE INDEX idx_scans_commit ON signals.scans(commit_sha) WHERE commit_sha IS NOT NULL;
|
|
CREATE INDEX idx_scans_created ON signals.scans(created_at DESC);
|
|
|
|
-- =============================================================================
|
|
-- ARTIFACTS
|
|
-- =============================================================================
|
|
|
|
-- Individual artifacts (assemblies, JARs, modules) within a scan
|
|
CREATE TABLE signals.artifacts (
|
|
artifact_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
|
|
artifact_key TEXT NOT NULL,
|
|
kind TEXT NOT NULL CHECK (kind IN ('assembly', 'jar', 'module', 'binary', 'script')),
|
|
sha256 TEXT NOT NULL,
|
|
purl TEXT,
|
|
build_id TEXT,
|
|
file_path TEXT,
|
|
size_bytes BIGINT,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
|
|
CONSTRAINT artifacts_scan_key_unique UNIQUE (scan_id, artifact_key)
|
|
);
|
|
|
|
CREATE INDEX idx_artifacts_scan ON signals.artifacts(scan_id);
|
|
CREATE INDEX idx_artifacts_sha256 ON signals.artifacts(sha256);
|
|
CREATE INDEX idx_artifacts_purl ON signals.artifacts(purl) WHERE purl IS NOT NULL;
|
|
CREATE INDEX idx_artifacts_build_id ON signals.artifacts(build_id) WHERE build_id IS NOT NULL;
|
|
|
|
-- =============================================================================
|
|
-- CALL GRAPH NODES
|
|
-- =============================================================================
|
|
|
|
-- Individual nodes (symbols) in call graphs
|
|
CREATE TABLE signals.cg_nodes (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
|
|
node_id TEXT NOT NULL,
|
|
artifact_key TEXT,
|
|
symbol_key TEXT NOT NULL,
|
|
visibility TEXT NOT NULL DEFAULT 'unknown'
|
|
CHECK (visibility IN ('public', 'internal', 'protected', 'private', 'unknown')),
|
|
is_entrypoint_candidate BOOLEAN NOT NULL DEFAULT FALSE,
|
|
purl TEXT,
|
|
symbol_digest TEXT,
|
|
flags INT NOT NULL DEFAULT 0,
|
|
attributes JSONB,
|
|
|
|
CONSTRAINT cg_nodes_scan_node_unique UNIQUE (scan_id, node_id)
|
|
);
|
|
|
|
-- Primary lookup indexes
|
|
CREATE INDEX idx_cg_nodes_scan ON signals.cg_nodes(scan_id);
|
|
CREATE INDEX idx_cg_nodes_symbol_key ON signals.cg_nodes(symbol_key);
|
|
CREATE INDEX idx_cg_nodes_purl ON signals.cg_nodes(purl) WHERE purl IS NOT NULL;
|
|
CREATE INDEX idx_cg_nodes_entrypoint ON signals.cg_nodes(scan_id, is_entrypoint_candidate)
|
|
WHERE is_entrypoint_candidate = TRUE;
|
|
|
|
-- Full-text search on symbol keys
|
|
CREATE INDEX idx_cg_nodes_symbol_fts ON signals.cg_nodes
|
|
USING gin(to_tsvector('simple', symbol_key));
|
|
|
|
-- =============================================================================
|
|
-- CALL GRAPH EDGES
|
|
-- =============================================================================
|
|
|
|
-- Call edges between nodes
|
|
CREATE TABLE signals.cg_edges (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
|
|
from_node_id TEXT NOT NULL,
|
|
to_node_id TEXT NOT NULL,
|
|
kind SMALLINT NOT NULL DEFAULT 0, -- 0=static, 1=heuristic, 2=runtime
|
|
reason SMALLINT NOT NULL DEFAULT 0, -- EdgeReason enum value
|
|
weight REAL NOT NULL DEFAULT 1.0,
|
|
offset_bytes INT,
|
|
is_resolved BOOLEAN NOT NULL DEFAULT TRUE,
|
|
provenance TEXT,
|
|
|
|
-- Composite unique constraint
|
|
CONSTRAINT cg_edges_unique UNIQUE (scan_id, from_node_id, to_node_id, kind, reason)
|
|
);
|
|
|
|
-- Traversal indexes (critical for reachability queries)
|
|
CREATE INDEX idx_cg_edges_scan ON signals.cg_edges(scan_id);
|
|
CREATE INDEX idx_cg_edges_from ON signals.cg_edges(scan_id, from_node_id);
|
|
CREATE INDEX idx_cg_edges_to ON signals.cg_edges(scan_id, to_node_id);
|
|
|
|
-- Covering index for common traversal pattern
|
|
CREATE INDEX idx_cg_edges_traversal ON signals.cg_edges(scan_id, from_node_id)
|
|
INCLUDE (to_node_id, kind, weight);
|
|
|
|
-- =============================================================================
|
|
-- ENTRYPOINTS
|
|
-- =============================================================================
|
|
|
|
-- Framework-aware entrypoints
|
|
CREATE TABLE signals.entrypoints (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
|
|
node_id TEXT NOT NULL,
|
|
kind TEXT NOT NULL CHECK (kind IN (
|
|
'http', 'grpc', 'cli', 'job', 'event', 'message_queue',
|
|
'timer', 'test', 'main', 'module_init', 'static_constructor', 'unknown'
|
|
)),
|
|
framework TEXT,
|
|
route TEXT,
|
|
http_method TEXT,
|
|
phase TEXT NOT NULL DEFAULT 'runtime'
|
|
CHECK (phase IN ('module_init', 'app_start', 'runtime', 'shutdown')),
|
|
order_idx INT NOT NULL DEFAULT 0,
|
|
|
|
CONSTRAINT entrypoints_scan_node_unique UNIQUE (scan_id, node_id, kind)
|
|
);
|
|
|
|
CREATE INDEX idx_entrypoints_scan ON signals.entrypoints(scan_id);
|
|
CREATE INDEX idx_entrypoints_kind ON signals.entrypoints(kind);
|
|
CREATE INDEX idx_entrypoints_route ON signals.entrypoints(route) WHERE route IS NOT NULL;
|
|
|
|
-- =============================================================================
|
|
-- SYMBOL-TO-COMPONENT MAPPING
|
|
-- =============================================================================
|
|
|
|
-- Maps symbols to SBOM components (for vuln correlation)
|
|
CREATE TABLE signals.symbol_component_map (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
|
|
node_id TEXT NOT NULL,
|
|
purl TEXT NOT NULL,
|
|
mapping_kind TEXT NOT NULL CHECK (mapping_kind IN (
|
|
'exact', 'assembly', 'namespace', 'heuristic'
|
|
)),
|
|
confidence REAL NOT NULL DEFAULT 1.0,
|
|
evidence JSONB,
|
|
|
|
CONSTRAINT symbol_component_map_unique UNIQUE (scan_id, node_id, purl)
|
|
);
|
|
|
|
CREATE INDEX idx_symbol_component_scan ON signals.symbol_component_map(scan_id);
|
|
CREATE INDEX idx_symbol_component_purl ON signals.symbol_component_map(purl);
|
|
CREATE INDEX idx_symbol_component_node ON signals.symbol_component_map(scan_id, node_id);
|
|
|
|
-- =============================================================================
|
|
-- REACHABILITY RESULTS
|
|
-- =============================================================================
|
|
|
|
-- Component-level reachability status
|
|
CREATE TABLE signals.reachability_components (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
|
|
purl TEXT NOT NULL,
|
|
status SMALLINT NOT NULL DEFAULT 0, -- ReachabilityStatus enum
|
|
lattice_state TEXT,
|
|
confidence REAL NOT NULL DEFAULT 0,
|
|
why JSONB,
|
|
evidence JSONB,
|
|
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
|
|
CONSTRAINT reachability_components_unique UNIQUE (scan_id, purl)
|
|
);
|
|
|
|
CREATE INDEX idx_reachability_components_scan ON signals.reachability_components(scan_id);
|
|
CREATE INDEX idx_reachability_components_purl ON signals.reachability_components(purl);
|
|
CREATE INDEX idx_reachability_components_status ON signals.reachability_components(status);
|
|
|
|
-- CVE-level reachability findings
|
|
CREATE TABLE signals.reachability_findings (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
|
|
cve_id TEXT NOT NULL,
|
|
purl TEXT NOT NULL,
|
|
status SMALLINT NOT NULL DEFAULT 0,
|
|
lattice_state TEXT,
|
|
confidence REAL NOT NULL DEFAULT 0,
|
|
path_witness TEXT[],
|
|
why JSONB,
|
|
evidence JSONB,
|
|
spine_id UUID, -- Reference to proof spine
|
|
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
|
|
CONSTRAINT reachability_findings_unique UNIQUE (scan_id, cve_id, purl)
|
|
);
|
|
|
|
CREATE INDEX idx_reachability_findings_scan ON signals.reachability_findings(scan_id);
|
|
CREATE INDEX idx_reachability_findings_cve ON signals.reachability_findings(cve_id);
|
|
CREATE INDEX idx_reachability_findings_purl ON signals.reachability_findings(purl);
|
|
CREATE INDEX idx_reachability_findings_status ON signals.reachability_findings(status);
|
|
|
|
-- =============================================================================
|
|
-- RUNTIME SAMPLES
|
|
-- =============================================================================
|
|
|
|
-- Stack trace samples from runtime evidence
|
|
CREATE TABLE signals.runtime_samples (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
|
|
collected_at TIMESTAMPTZ NOT NULL,
|
|
env_hash TEXT,
|
|
timestamp TIMESTAMPTZ NOT NULL,
|
|
pid INT,
|
|
thread_id INT,
|
|
frames TEXT[] NOT NULL,
|
|
weight REAL NOT NULL DEFAULT 1.0,
|
|
container_id TEXT,
|
|
pod_name TEXT
|
|
);
|
|
|
|
CREATE INDEX idx_runtime_samples_scan ON signals.runtime_samples(scan_id);
|
|
CREATE INDEX idx_runtime_samples_collected ON signals.runtime_samples(collected_at DESC);
|
|
|
|
-- GIN index for frame array searches
|
|
CREATE INDEX idx_runtime_samples_frames ON signals.runtime_samples USING gin(frames);
|
|
|
|
-- =============================================================================
|
|
-- DEPLOYMENT REFERENCES (for popularity scoring)
|
|
-- =============================================================================
|
|
|
|
CREATE TABLE signals.deploy_refs (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
purl TEXT NOT NULL,
|
|
image_id TEXT NOT NULL,
|
|
environment TEXT,
|
|
first_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
last_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
UNIQUE (purl, image_id, environment)
|
|
);
|
|
|
|
CREATE INDEX idx_deploy_refs_purl ON signals.deploy_refs(purl);
|
|
CREATE INDEX idx_deploy_refs_last_seen ON signals.deploy_refs(last_seen_at);
|
|
|
|
-- =============================================================================
|
|
-- GRAPH METRICS (for centrality scoring)
|
|
-- =============================================================================
|
|
|
|
CREATE TABLE signals.graph_metrics (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
symbol_id TEXT NOT NULL,
|
|
callgraph_id TEXT NOT NULL,
|
|
degree INT NOT NULL DEFAULT 0,
|
|
betweenness FLOAT NOT NULL DEFAULT 0,
|
|
last_computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
UNIQUE (symbol_id, callgraph_id)
|
|
);
|
|
|
|
CREATE INDEX idx_graph_metrics_symbol ON signals.graph_metrics(symbol_id);
|
|
|
|
-- =============================================================================
|
|
-- UNKNOWNS TRACKING (enhanced for scoring)
|
|
-- =============================================================================
|
|
|
|
CREATE TABLE signals.unknowns (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
subject_key TEXT NOT NULL,
|
|
callgraph_id TEXT,
|
|
symbol_id TEXT,
|
|
code_id TEXT,
|
|
purl TEXT,
|
|
purl_version TEXT,
|
|
edge_from TEXT,
|
|
edge_to TEXT,
|
|
reason TEXT,
|
|
|
|
-- Scoring factors
|
|
popularity_score FLOAT DEFAULT 0,
|
|
deployment_count INT DEFAULT 0,
|
|
exploit_potential_score FLOAT DEFAULT 0,
|
|
uncertainty_score FLOAT DEFAULT 0,
|
|
centrality_score FLOAT DEFAULT 0,
|
|
degree_centrality INT DEFAULT 0,
|
|
betweenness_centrality FLOAT DEFAULT 0,
|
|
staleness_score FLOAT DEFAULT 0,
|
|
days_since_last_analysis INT DEFAULT 0,
|
|
|
|
-- Composite score and band
|
|
score FLOAT DEFAULT 0,
|
|
band TEXT DEFAULT 'cold' CHECK (band IN ('hot', 'warm', 'cold')),
|
|
|
|
-- Flags and traces
|
|
flags JSONB DEFAULT '{}',
|
|
normalization_trace JSONB,
|
|
graph_slice_hash TEXT,
|
|
evidence_set_hash TEXT,
|
|
callgraph_attempt_hash TEXT,
|
|
|
|
-- Rescan tracking
|
|
rescan_attempts INT DEFAULT 0,
|
|
last_rescan_result TEXT,
|
|
next_scheduled_rescan TIMESTAMPTZ,
|
|
last_analyzed_at TIMESTAMPTZ,
|
|
|
|
-- Timestamps
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX idx_unknowns_subject ON signals.unknowns(subject_key);
|
|
CREATE INDEX idx_unknowns_band ON signals.unknowns(band);
|
|
CREATE INDEX idx_unknowns_score ON signals.unknowns(score DESC);
|
|
CREATE INDEX idx_unknowns_next_rescan ON signals.unknowns(next_scheduled_rescan) WHERE next_scheduled_rescan IS NOT NULL;
|
|
|
|
-- =============================================================================
|
|
-- MATERIALIZED VIEWS FOR ANALYTICS
|
|
-- =============================================================================
|
|
|
|
-- Daily scan statistics
|
|
CREATE MATERIALIZED VIEW signals.scan_stats_daily AS
|
|
SELECT
|
|
DATE_TRUNC('day', created_at) AS day,
|
|
COUNT(*) AS total_scans,
|
|
COUNT(*) FILTER (WHERE status = 'completed') AS completed_scans,
|
|
COUNT(*) FILTER (WHERE status = 'failed') AS failed_scans,
|
|
AVG(EXTRACT(EPOCH FROM (completed_at - created_at))) FILTER (WHERE status = 'completed') AS avg_duration_seconds
|
|
FROM signals.scans
|
|
GROUP BY DATE_TRUNC('day', created_at)
|
|
ORDER BY day DESC;
|
|
|
|
CREATE UNIQUE INDEX idx_scan_stats_daily_day ON signals.scan_stats_daily(day);
|
|
|
|
-- CVE reachability summary
|
|
CREATE MATERIALIZED VIEW signals.cve_reachability_summary AS
|
|
SELECT
|
|
cve_id,
|
|
COUNT(DISTINCT scan_id) AS affected_scans,
|
|
COUNT(DISTINCT purl) AS affected_components,
|
|
COUNT(*) FILTER (WHERE status = 2) AS reachable_count, -- REACHABLE_STATIC
|
|
COUNT(*) FILTER (WHERE status = 3) AS proven_count, -- REACHABLE_PROVEN
|
|
COUNT(*) FILTER (WHERE status = 0) AS unreachable_count,
|
|
AVG(confidence) AS avg_confidence,
|
|
MAX(computed_at) AS last_updated
|
|
FROM signals.reachability_findings
|
|
GROUP BY cve_id;
|
|
|
|
CREATE UNIQUE INDEX idx_cve_reachability_summary_cve ON signals.cve_reachability_summary(cve_id);
|
|
|
|
-- Refresh function
|
|
CREATE OR REPLACE FUNCTION signals.refresh_analytics_views()
|
|
RETURNS void AS $$
|
|
BEGIN
|
|
REFRESH MATERIALIZED VIEW CONCURRENTLY signals.scan_stats_daily;
|
|
REFRESH MATERIALIZED VIEW CONCURRENTLY signals.cve_reachability_summary;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- =============================================================================
|
|
-- PROOF SPINE TABLES (SPRINT_3100)
|
|
-- =============================================================================
|
|
|
|
-- Schema for proof spine storage
|
|
CREATE SCHEMA IF NOT EXISTS scanner;
|
|
|
|
-- Main proof spine table
|
|
CREATE TABLE scanner.proof_spines (
|
|
spine_id TEXT PRIMARY KEY,
|
|
artifact_id TEXT NOT NULL,
|
|
vuln_id TEXT NOT NULL,
|
|
policy_profile_id TEXT NOT NULL,
|
|
verdict TEXT NOT NULL CHECK (verdict IN ('not_affected', 'affected', 'fixed', 'under_investigation')),
|
|
verdict_reason TEXT,
|
|
root_hash TEXT NOT NULL,
|
|
scan_run_id UUID NOT NULL,
|
|
segment_count INT NOT NULL,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
superseded_by_spine_id TEXT REFERENCES scanner.proof_spines(spine_id),
|
|
|
|
-- Deterministic spine ID = hash(artifact_id + vuln_id + policy_profile_id + root_hash)
|
|
CONSTRAINT proof_spines_unique_decision UNIQUE (artifact_id, vuln_id, policy_profile_id, root_hash)
|
|
);
|
|
|
|
-- Composite index for common lookups
|
|
CREATE INDEX idx_proof_spines_lookup
|
|
ON scanner.proof_spines(artifact_id, vuln_id, policy_profile_id);
|
|
CREATE INDEX idx_proof_spines_scan_run
|
|
ON scanner.proof_spines(scan_run_id);
|
|
CREATE INDEX idx_proof_spines_created
|
|
ON scanner.proof_spines(created_at DESC);
|
|
|
|
-- Individual segments within a spine
|
|
CREATE TABLE scanner.proof_segments (
|
|
segment_id TEXT PRIMARY KEY,
|
|
spine_id TEXT NOT NULL REFERENCES scanner.proof_spines(spine_id) ON DELETE CASCADE,
|
|
idx INT NOT NULL,
|
|
segment_type TEXT NOT NULL CHECK (segment_type IN (
|
|
'SBOM_SLICE', 'MATCH', 'REACHABILITY',
|
|
'GUARD_ANALYSIS', 'RUNTIME_OBSERVATION', 'POLICY_EVAL'
|
|
)),
|
|
input_hash TEXT NOT NULL,
|
|
result_hash TEXT NOT NULL,
|
|
prev_segment_hash TEXT,
|
|
envelope BYTEA NOT NULL, -- DSSE envelope (JSON or CBOR)
|
|
tool_id TEXT NOT NULL,
|
|
tool_version TEXT NOT NULL,
|
|
status TEXT NOT NULL DEFAULT 'pending' CHECK (status IN (
|
|
'pending', 'verified', 'partial', 'invalid', 'untrusted'
|
|
)),
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
|
|
CONSTRAINT proof_segments_unique_idx UNIQUE (spine_id, idx)
|
|
);
|
|
|
|
CREATE INDEX idx_proof_segments_spine ON scanner.proof_segments(spine_id);
|
|
CREATE INDEX idx_proof_segments_type ON scanner.proof_segments(segment_type);
|
|
|
|
-- Audit trail for spine supersession
|
|
CREATE TABLE scanner.proof_spine_history (
|
|
history_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
spine_id TEXT NOT NULL REFERENCES scanner.proof_spines(spine_id),
|
|
action TEXT NOT NULL CHECK (action IN ('created', 'superseded', 'verified', 'invalidated')),
|
|
actor TEXT,
|
|
reason TEXT,
|
|
occurred_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX idx_proof_spine_history_spine ON scanner.proof_spine_history(spine_id);
|