Files
git.stella-ops.org/docs/db/schemas/vuln.sql
2025-12-25 19:09:48 +02:00

250 lines
12 KiB
SQL

-- Generated from docs/db/SPECIFICATION.md §5.2 (2025-11-28)
CREATE SCHEMA IF NOT EXISTS vuln;
CREATE TABLE IF NOT EXISTS vuln.sources (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
key TEXT NOT NULL UNIQUE,
display_name TEXT NOT NULL,
url TEXT,
source_type TEXT NOT NULL CHECK (source_type IN ('nvd', 'osv', 'ghsa', 'vendor', 'oval', 'custom')),
enabled BOOLEAN NOT NULL DEFAULT TRUE,
priority INT NOT NULL DEFAULT 100,
config JSONB DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS vuln.feed_snapshots (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
source_id UUID NOT NULL REFERENCES vuln.sources(id),
snapshot_id TEXT NOT NULL,
taken_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
completed_at TIMESTAMPTZ,
status TEXT NOT NULL DEFAULT 'pending' CHECK (status IN ('pending', 'processing', 'completed', 'failed')),
stats JSONB DEFAULT '{}',
checksum TEXT,
error TEXT,
UNIQUE (source_id, snapshot_id)
);
CREATE TABLE IF NOT EXISTS vuln.advisory_snapshots (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
source_id UUID NOT NULL REFERENCES vuln.sources(id),
source_advisory_id TEXT NOT NULL,
feed_snapshot_id UUID REFERENCES vuln.feed_snapshots(id),
imported_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
raw_payload JSONB NOT NULL,
payload_hash TEXT NOT NULL,
is_latest BOOLEAN NOT NULL DEFAULT TRUE,
UNIQUE (source_id, source_advisory_id, payload_hash)
);
CREATE TABLE IF NOT EXISTS vuln.advisories (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
advisory_key TEXT NOT NULL UNIQUE,
primary_vuln_id TEXT NOT NULL,
source_id UUID REFERENCES vuln.sources(id),
title TEXT,
summary TEXT,
description TEXT,
language TEXT DEFAULT 'en',
severity TEXT CHECK (severity IN ('critical', 'high', 'medium', 'low', 'none', 'unknown')),
exploit_known BOOLEAN NOT NULL DEFAULT FALSE,
state TEXT NOT NULL DEFAULT 'active' CHECK (state IN ('active', 'rejected', 'withdrawn', 'disputed')),
published_at TIMESTAMPTZ,
modified_at TIMESTAMPTZ,
withdrawn_at TIMESTAMPTZ,
current_snapshot_id UUID REFERENCES vuln.advisory_snapshots(id),
canonical_metric_id UUID,
provenance JSONB DEFAULT '[]',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS vuln.advisory_aliases (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE,
alias_type TEXT NOT NULL CHECK (alias_type IN ('cve', 'ghsa', 'osv', 'vendor', 'internal', 'other')),
alias_value TEXT NOT NULL,
provenance JSONB DEFAULT '{}',
UNIQUE (alias_type, alias_value)
);
CREATE TABLE IF NOT EXISTS vuln.advisory_cvss (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE,
version TEXT NOT NULL CHECK (version IN ('2.0', '3.0', '3.1', '4.0')),
vector TEXT NOT NULL,
base_score NUMERIC(3,1) NOT NULL CHECK (base_score >= 0 AND base_score <= 10),
base_severity TEXT,
temporal_score NUMERIC(3,1) CHECK (temporal_score >= 0 AND temporal_score <= 10),
environmental_score NUMERIC(3,1) CHECK (environmental_score >= 0 AND environmental_score <= 10),
source TEXT,
is_primary BOOLEAN NOT NULL DEFAULT FALSE,
provenance JSONB DEFAULT '{}'
);
CREATE TABLE IF NOT EXISTS vuln.advisory_affected (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE,
package_type TEXT NOT NULL CHECK (package_type IN ('rpm', 'deb', 'cpe', 'semver', 'vendor', 'ics-vendor', 'generic')),
ecosystem TEXT,
package_name TEXT NOT NULL,
package_purl TEXT,
platform TEXT,
version_ranges JSONB NOT NULL DEFAULT '[]',
statuses JSONB DEFAULT '[]',
normalized_versions JSONB DEFAULT '[]',
provenance JSONB DEFAULT '[]'
);
CREATE TABLE IF NOT EXISTS vuln.advisory_references (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE,
url TEXT NOT NULL,
title TEXT,
ref_type TEXT,
provenance JSONB DEFAULT '{}'
);
CREATE TABLE IF NOT EXISTS vuln.advisory_credits (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE,
name TEXT NOT NULL,
contact TEXT,
credit_type TEXT,
provenance JSONB DEFAULT '{}'
);
CREATE TABLE IF NOT EXISTS vuln.advisory_weaknesses (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE,
cwe_id TEXT NOT NULL,
description TEXT,
provenance JSONB DEFAULT '{}'
);
CREATE TABLE IF NOT EXISTS vuln.kev_flags (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
cve_id TEXT NOT NULL UNIQUE,
advisory_id UUID REFERENCES vuln.advisories(id),
added_date DATE NOT NULL,
due_date DATE,
vendor_project TEXT,
product TEXT,
vulnerability_name TEXT,
short_description TEXT,
required_action TEXT,
notes TEXT,
known_ransomware_campaign BOOLEAN DEFAULT FALSE,
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS vuln.source_states (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
source_id UUID NOT NULL REFERENCES vuln.sources(id) UNIQUE,
cursor TEXT,
last_fetch_at TIMESTAMPTZ,
last_success_at TIMESTAMPTZ,
consecutive_failures INT DEFAULT 0,
last_error TEXT,
last_error_at TIMESTAMPTZ,
metadata JSONB DEFAULT '{}'
);
CREATE TABLE IF NOT EXISTS vuln.merge_events (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
advisory_id UUID NOT NULL REFERENCES vuln.advisories(id),
event_type TEXT NOT NULL CHECK (event_type IN ('created', 'updated', 'merged', 'superseded', 'withdrawn')),
source_id UUID REFERENCES vuln.sources(id),
changes JSONB DEFAULT '{}',
occurred_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_advisories_primary_vuln ON vuln.advisories(primary_vuln_id);
CREATE INDEX IF NOT EXISTS idx_advisories_modified ON vuln.advisories(modified_at DESC);
CREATE INDEX IF NOT EXISTS idx_advisories_published ON vuln.advisories(published_at DESC);
CREATE INDEX IF NOT EXISTS idx_advisories_severity ON vuln.advisories(severity) WHERE state = 'active';
CREATE INDEX IF NOT EXISTS idx_advisories_state ON vuln.advisories(state);
CREATE INDEX IF NOT EXISTS idx_advisory_aliases_value ON vuln.advisory_aliases(alias_value);
CREATE INDEX IF NOT EXISTS idx_advisory_aliases_advisory ON vuln.advisory_aliases(advisory_id);
CREATE INDEX IF NOT EXISTS idx_advisory_affected_purl ON vuln.advisory_affected(package_purl) WHERE package_purl IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_advisory_affected_name ON vuln.advisory_affected(ecosystem, package_name);
CREATE INDEX IF NOT EXISTS idx_advisory_affected_advisory ON vuln.advisory_affected(advisory_id);
CREATE INDEX IF NOT EXISTS idx_advisory_snapshots_latest ON vuln.advisory_snapshots(source_id, source_advisory_id) WHERE is_latest = TRUE;
CREATE INDEX IF NOT EXISTS idx_kev_flags_cve ON vuln.kev_flags(cve_id);
CREATE INDEX IF NOT EXISTS idx_merge_events_advisory ON vuln.merge_events(advisory_id, occurred_at DESC);
-- Full-text search
CREATE INDEX IF NOT EXISTS idx_advisories_fts ON vuln.advisories USING GIN (
to_tsvector('english', COALESCE(title, '') || ' ' || COALESCE(summary, '') || ' ' || COALESCE(description, ''))
);
-- ===========================================================================
-- Canonical Advisory Deduplication (Sprint 8200.0012.0002)
-- ===========================================================================
-- Deduplicated canonical advisory records
-- Identified by semantic merge_hash computed from (cve, affects_key, version_range, weakness, patch_lineage)
CREATE TABLE IF NOT EXISTS vuln.advisory_canonical (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
cve TEXT NOT NULL, -- CVE identifier (e.g., CVE-2024-1234)
affects_key TEXT NOT NULL, -- Normalized PURL or CPE
version_range JSONB, -- Structured: { introduced, fixed, last_affected }
weakness TEXT[] NOT NULL DEFAULT '{}', -- Sorted CWE array
merge_hash TEXT NOT NULL UNIQUE, -- SHA256 of normalized identity components
status TEXT NOT NULL DEFAULT 'active' CHECK (status IN ('active', 'stub', 'withdrawn')),
severity TEXT CHECK (severity IN ('critical', 'high', 'medium', 'low', 'none', 'unknown')),
epss_score NUMERIC(5,4), -- EPSS probability (0.0000-1.0000)
exploit_known BOOLEAN NOT NULL DEFAULT FALSE,
title TEXT,
summary TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Canonical indexes
CREATE INDEX IF NOT EXISTS idx_advisory_canonical_cve ON vuln.advisory_canonical(cve);
CREATE INDEX IF NOT EXISTS idx_advisory_canonical_affects ON vuln.advisory_canonical(affects_key);
CREATE INDEX IF NOT EXISTS idx_advisory_canonical_merge_hash ON vuln.advisory_canonical(merge_hash);
CREATE INDEX IF NOT EXISTS idx_advisory_canonical_status ON vuln.advisory_canonical(status) WHERE status = 'active';
CREATE INDEX IF NOT EXISTS idx_advisory_canonical_severity ON vuln.advisory_canonical(severity) WHERE severity IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_advisory_canonical_exploit ON vuln.advisory_canonical(exploit_known) WHERE exploit_known = TRUE;
CREATE INDEX IF NOT EXISTS idx_advisory_canonical_updated ON vuln.advisory_canonical(updated_at DESC);
-- Source edge linking canonical advisories to source documents
-- Stores DSSE signature envelopes and raw payloads for provenance
CREATE TABLE IF NOT EXISTS vuln.advisory_source_edge (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
canonical_id UUID NOT NULL REFERENCES vuln.advisory_canonical(id) ON DELETE CASCADE,
source_id UUID NOT NULL REFERENCES vuln.sources(id) ON DELETE RESTRICT,
source_advisory_id TEXT NOT NULL, -- Vendor advisory ID (DSA-5678, RHSA-2024:1234)
source_doc_hash TEXT NOT NULL, -- SHA256 of raw source document
vendor_status TEXT CHECK (vendor_status IN ('affected', 'not_affected', 'fixed', 'under_investigation')),
precedence_rank INT NOT NULL DEFAULT 100, -- Lower = higher priority (vendor=10, distro=20, osv=30, nvd=40)
dsse_envelope JSONB, -- { payloadType, payload, signatures[] }
raw_payload JSONB, -- Original advisory document
fetched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE (canonical_id, source_id, source_doc_hash)
);
-- Source edge indexes
CREATE INDEX IF NOT EXISTS idx_source_edge_canonical ON vuln.advisory_source_edge(canonical_id);
CREATE INDEX IF NOT EXISTS idx_source_edge_source ON vuln.advisory_source_edge(source_id);
CREATE INDEX IF NOT EXISTS idx_source_edge_advisory_id ON vuln.advisory_source_edge(source_advisory_id);
CREATE INDEX IF NOT EXISTS idx_source_edge_canonical_source ON vuln.advisory_source_edge(canonical_id, source_id);
CREATE INDEX IF NOT EXISTS idx_source_edge_fetched ON vuln.advisory_source_edge(fetched_at DESC);
CREATE INDEX IF NOT EXISTS idx_source_edge_dsse_gin ON vuln.advisory_source_edge USING GIN (dsse_envelope jsonb_path_ops);
-- Comments for canonical deduplication tables
COMMENT ON TABLE vuln.advisory_canonical IS 'Deduplicated canonical advisories with semantic merge_hash';
COMMENT ON COLUMN vuln.advisory_canonical.merge_hash IS 'Deterministic SHA256 hash of (cve, affects_key, version_range, weakness, patch_lineage)';
COMMENT ON COLUMN vuln.advisory_canonical.affects_key IS 'Normalized PURL or CPE identifying the affected package';
COMMENT ON COLUMN vuln.advisory_canonical.status IS 'active=full record, stub=minimal for low interest, withdrawn=no longer valid';
COMMENT ON TABLE vuln.advisory_source_edge IS 'Links canonical advisories to source documents with DSSE signatures';
COMMENT ON COLUMN vuln.advisory_source_edge.precedence_rank IS 'Source priority: vendor=10, distro=20, osv=30, nvd=40, default=100';
COMMENT ON COLUMN vuln.advisory_source_edge.dsse_envelope IS 'DSSE envelope with signature over raw_payload';