-- Generated from docs/db/SPECIFICATION.md ยง5.2 (2025-11-28) CREATE SCHEMA IF NOT EXISTS vuln; CREATE TABLE IF NOT EXISTS vuln.sources ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), key TEXT NOT NULL UNIQUE, display_name TEXT NOT NULL, url TEXT, source_type TEXT NOT NULL CHECK (source_type IN ('nvd', 'osv', 'ghsa', 'vendor', 'oval', 'custom')), enabled BOOLEAN NOT NULL DEFAULT TRUE, priority INT NOT NULL DEFAULT 100, config JSONB DEFAULT '{}', created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); CREATE TABLE IF NOT EXISTS vuln.feed_snapshots ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), source_id UUID NOT NULL REFERENCES vuln.sources(id), snapshot_id TEXT NOT NULL, taken_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), completed_at TIMESTAMPTZ, status TEXT NOT NULL DEFAULT 'pending' CHECK (status IN ('pending', 'processing', 'completed', 'failed')), stats JSONB DEFAULT '{}', checksum TEXT, error TEXT, UNIQUE (source_id, snapshot_id) ); CREATE TABLE IF NOT EXISTS vuln.advisory_snapshots ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), source_id UUID NOT NULL REFERENCES vuln.sources(id), source_advisory_id TEXT NOT NULL, feed_snapshot_id UUID REFERENCES vuln.feed_snapshots(id), imported_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), raw_payload JSONB NOT NULL, payload_hash TEXT NOT NULL, is_latest BOOLEAN NOT NULL DEFAULT TRUE, UNIQUE (source_id, source_advisory_id, payload_hash) ); CREATE TABLE IF NOT EXISTS vuln.advisories ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), advisory_key TEXT NOT NULL UNIQUE, primary_vuln_id TEXT NOT NULL, source_id UUID REFERENCES vuln.sources(id), title TEXT, summary TEXT, description TEXT, language TEXT DEFAULT 'en', severity TEXT CHECK (severity IN ('critical', 'high', 'medium', 'low', 'none', 'unknown')), exploit_known BOOLEAN NOT NULL DEFAULT FALSE, state TEXT NOT NULL DEFAULT 'active' CHECK (state IN ('active', 'rejected', 'withdrawn', 'disputed')), published_at TIMESTAMPTZ, modified_at TIMESTAMPTZ, withdrawn_at TIMESTAMPTZ, current_snapshot_id UUID REFERENCES vuln.advisory_snapshots(id), canonical_metric_id UUID, provenance JSONB DEFAULT '[]', created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); CREATE TABLE IF NOT EXISTS vuln.advisory_aliases ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE, alias_type TEXT NOT NULL CHECK (alias_type IN ('cve', 'ghsa', 'osv', 'vendor', 'internal', 'other')), alias_value TEXT NOT NULL, provenance JSONB DEFAULT '{}', UNIQUE (alias_type, alias_value) ); CREATE TABLE IF NOT EXISTS vuln.advisory_cvss ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE, version TEXT NOT NULL CHECK (version IN ('2.0', '3.0', '3.1', '4.0')), vector TEXT NOT NULL, base_score NUMERIC(3,1) NOT NULL CHECK (base_score >= 0 AND base_score <= 10), base_severity TEXT, temporal_score NUMERIC(3,1) CHECK (temporal_score >= 0 AND temporal_score <= 10), environmental_score NUMERIC(3,1) CHECK (environmental_score >= 0 AND environmental_score <= 10), source TEXT, is_primary BOOLEAN NOT NULL DEFAULT FALSE, provenance JSONB DEFAULT '{}' ); CREATE TABLE IF NOT EXISTS vuln.advisory_affected ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE, package_type TEXT NOT NULL CHECK (package_type IN ('rpm', 'deb', 'cpe', 'semver', 'vendor', 'ics-vendor', 'generic')), ecosystem TEXT, package_name TEXT NOT NULL, package_purl TEXT, platform TEXT, version_ranges JSONB NOT NULL DEFAULT '[]', statuses JSONB DEFAULT '[]', normalized_versions JSONB DEFAULT '[]', provenance JSONB DEFAULT '[]' ); CREATE TABLE IF NOT EXISTS vuln.advisory_references ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE, url TEXT NOT NULL, title TEXT, ref_type TEXT, provenance JSONB DEFAULT '{}' ); CREATE TABLE IF NOT EXISTS vuln.advisory_credits ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE, name TEXT NOT NULL, contact TEXT, credit_type TEXT, provenance JSONB DEFAULT '{}' ); CREATE TABLE IF NOT EXISTS vuln.advisory_weaknesses ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), advisory_id UUID NOT NULL REFERENCES vuln.advisories(id) ON DELETE CASCADE, cwe_id TEXT NOT NULL, description TEXT, provenance JSONB DEFAULT '{}' ); CREATE TABLE IF NOT EXISTS vuln.kev_flags ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), cve_id TEXT NOT NULL UNIQUE, advisory_id UUID REFERENCES vuln.advisories(id), added_date DATE NOT NULL, due_date DATE, vendor_project TEXT, product TEXT, vulnerability_name TEXT, short_description TEXT, required_action TEXT, notes TEXT, known_ransomware_campaign BOOLEAN DEFAULT FALSE, updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); CREATE TABLE IF NOT EXISTS vuln.source_states ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), source_id UUID NOT NULL REFERENCES vuln.sources(id) UNIQUE, cursor TEXT, last_fetch_at TIMESTAMPTZ, last_success_at TIMESTAMPTZ, consecutive_failures INT DEFAULT 0, last_error TEXT, last_error_at TIMESTAMPTZ, metadata JSONB DEFAULT '{}' ); CREATE TABLE IF NOT EXISTS vuln.merge_events ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), advisory_id UUID NOT NULL REFERENCES vuln.advisories(id), event_type TEXT NOT NULL CHECK (event_type IN ('created', 'updated', 'merged', 'superseded', 'withdrawn')), source_id UUID REFERENCES vuln.sources(id), changes JSONB DEFAULT '{}', occurred_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); -- Indexes CREATE INDEX IF NOT EXISTS idx_advisories_primary_vuln ON vuln.advisories(primary_vuln_id); CREATE INDEX IF NOT EXISTS idx_advisories_modified ON vuln.advisories(modified_at DESC); CREATE INDEX IF NOT EXISTS idx_advisories_published ON vuln.advisories(published_at DESC); CREATE INDEX IF NOT EXISTS idx_advisories_severity ON vuln.advisories(severity) WHERE state = 'active'; CREATE INDEX IF NOT EXISTS idx_advisories_state ON vuln.advisories(state); CREATE INDEX IF NOT EXISTS idx_advisory_aliases_value ON vuln.advisory_aliases(alias_value); CREATE INDEX IF NOT EXISTS idx_advisory_aliases_advisory ON vuln.advisory_aliases(advisory_id); CREATE INDEX IF NOT EXISTS idx_advisory_affected_purl ON vuln.advisory_affected(package_purl) WHERE package_purl IS NOT NULL; CREATE INDEX IF NOT EXISTS idx_advisory_affected_name ON vuln.advisory_affected(ecosystem, package_name); CREATE INDEX IF NOT EXISTS idx_advisory_affected_advisory ON vuln.advisory_affected(advisory_id); CREATE INDEX IF NOT EXISTS idx_advisory_snapshots_latest ON vuln.advisory_snapshots(source_id, source_advisory_id) WHERE is_latest = TRUE; CREATE INDEX IF NOT EXISTS idx_kev_flags_cve ON vuln.kev_flags(cve_id); CREATE INDEX IF NOT EXISTS idx_merge_events_advisory ON vuln.merge_events(advisory_id, occurred_at DESC); -- Full-text search CREATE INDEX IF NOT EXISTS idx_advisories_fts ON vuln.advisories USING GIN ( to_tsvector('english', COALESCE(title, '') || ' ' || COALESCE(summary, '') || ' ' || COALESCE(description, '')) ); -- =========================================================================== -- Canonical Advisory Deduplication (Sprint 8200.0012.0002) -- =========================================================================== -- Deduplicated canonical advisory records -- Identified by semantic merge_hash computed from (cve, affects_key, version_range, weakness, patch_lineage) CREATE TABLE IF NOT EXISTS vuln.advisory_canonical ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), cve TEXT NOT NULL, -- CVE identifier (e.g., CVE-2024-1234) affects_key TEXT NOT NULL, -- Normalized PURL or CPE version_range JSONB, -- Structured: { introduced, fixed, last_affected } weakness TEXT[] NOT NULL DEFAULT '{}', -- Sorted CWE array merge_hash TEXT NOT NULL UNIQUE, -- SHA256 of normalized identity components status TEXT NOT NULL DEFAULT 'active' CHECK (status IN ('active', 'stub', 'withdrawn')), severity TEXT CHECK (severity IN ('critical', 'high', 'medium', 'low', 'none', 'unknown')), epss_score NUMERIC(5,4), -- EPSS probability (0.0000-1.0000) exploit_known BOOLEAN NOT NULL DEFAULT FALSE, title TEXT, summary TEXT, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); -- Canonical indexes CREATE INDEX IF NOT EXISTS idx_advisory_canonical_cve ON vuln.advisory_canonical(cve); CREATE INDEX IF NOT EXISTS idx_advisory_canonical_affects ON vuln.advisory_canonical(affects_key); CREATE INDEX IF NOT EXISTS idx_advisory_canonical_merge_hash ON vuln.advisory_canonical(merge_hash); CREATE INDEX IF NOT EXISTS idx_advisory_canonical_status ON vuln.advisory_canonical(status) WHERE status = 'active'; CREATE INDEX IF NOT EXISTS idx_advisory_canonical_severity ON vuln.advisory_canonical(severity) WHERE severity IS NOT NULL; CREATE INDEX IF NOT EXISTS idx_advisory_canonical_exploit ON vuln.advisory_canonical(exploit_known) WHERE exploit_known = TRUE; CREATE INDEX IF NOT EXISTS idx_advisory_canonical_updated ON vuln.advisory_canonical(updated_at DESC); -- Source edge linking canonical advisories to source documents -- Stores DSSE signature envelopes and raw payloads for provenance CREATE TABLE IF NOT EXISTS vuln.advisory_source_edge ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), canonical_id UUID NOT NULL REFERENCES vuln.advisory_canonical(id) ON DELETE CASCADE, source_id UUID NOT NULL REFERENCES vuln.sources(id) ON DELETE RESTRICT, source_advisory_id TEXT NOT NULL, -- Vendor advisory ID (DSA-5678, RHSA-2024:1234) source_doc_hash TEXT NOT NULL, -- SHA256 of raw source document vendor_status TEXT CHECK (vendor_status IN ('affected', 'not_affected', 'fixed', 'under_investigation')), precedence_rank INT NOT NULL DEFAULT 100, -- Lower = higher priority (vendor=10, distro=20, osv=30, nvd=40) dsse_envelope JSONB, -- { payloadType, payload, signatures[] } raw_payload JSONB, -- Original advisory document fetched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), UNIQUE (canonical_id, source_id, source_doc_hash) ); -- Source edge indexes CREATE INDEX IF NOT EXISTS idx_source_edge_canonical ON vuln.advisory_source_edge(canonical_id); CREATE INDEX IF NOT EXISTS idx_source_edge_source ON vuln.advisory_source_edge(source_id); CREATE INDEX IF NOT EXISTS idx_source_edge_advisory_id ON vuln.advisory_source_edge(source_advisory_id); CREATE INDEX IF NOT EXISTS idx_source_edge_canonical_source ON vuln.advisory_source_edge(canonical_id, source_id); CREATE INDEX IF NOT EXISTS idx_source_edge_fetched ON vuln.advisory_source_edge(fetched_at DESC); CREATE INDEX IF NOT EXISTS idx_source_edge_dsse_gin ON vuln.advisory_source_edge USING GIN (dsse_envelope jsonb_path_ops); -- Comments for canonical deduplication tables COMMENT ON TABLE vuln.advisory_canonical IS 'Deduplicated canonical advisories with semantic merge_hash'; COMMENT ON COLUMN vuln.advisory_canonical.merge_hash IS 'Deterministic SHA256 hash of (cve, affects_key, version_range, weakness, patch_lineage)'; COMMENT ON COLUMN vuln.advisory_canonical.affects_key IS 'Normalized PURL or CPE identifying the affected package'; COMMENT ON COLUMN vuln.advisory_canonical.status IS 'active=full record, stub=minimal for low interest, withdrawn=no longer valid'; COMMENT ON TABLE vuln.advisory_source_edge IS 'Links canonical advisories to source documents with DSSE signatures'; COMMENT ON COLUMN vuln.advisory_source_edge.precedence_rank IS 'Source priority: vendor=10, distro=20, osv=30, nvd=40, default=100'; COMMENT ON COLUMN vuln.advisory_source_edge.dsse_envelope IS 'DSSE envelope with signature over raw_payload';