-- ============================================================================ -- Proof System Database Schema -- ============================================================================ -- Purpose: Support patch-aware backport detection with cryptographic proofs -- Version: 1.0.0 -- Date: 2025-12-23 -- -- This schema extends the existing Concelier and Scanner schemas with proof -- infrastructure for backport detection (Tier 1-4). -- ============================================================================ -- Advisory lock for safe migrations SELECT pg_advisory_lock(hashtext('proof_system')); -- ============================================================================ -- SCHEMA: concelier (extend existing) -- ============================================================================ -- ---------------------------------------------------------------------------- -- Distro Release Catalog -- ---------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS concelier.distro_release ( release_id TEXT PRIMARY KEY, -- e.g., "ubuntu-22.04", "rhel-9.2" distro_name TEXT NOT NULL, -- e.g., "ubuntu", "rhel", "alpine" release_version TEXT NOT NULL, -- e.g., "22.04", "9.2", "3.18" codename TEXT, -- e.g., "jammy", "bookworm" release_date DATE, eol_date DATE, -- Architecture support architectures TEXT[] NOT NULL DEFAULT ARRAY['x86_64', 'aarch64'], -- Metadata created_at TIMESTAMPTZ NOT NULL DEFAULT now(), updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), CONSTRAINT distro_release_unique UNIQUE(distro_name, release_version) ); CREATE INDEX idx_distro_release_name ON concelier.distro_release(distro_name); CREATE INDEX idx_distro_release_eol ON concelier.distro_release(eol_date) WHERE eol_date IS NOT NULL; COMMENT ON TABLE concelier.distro_release IS 'Catalog of distro releases for backport detection'; -- ---------------------------------------------------------------------------- -- Distro Package Catalog -- ---------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS concelier.distro_package ( package_id TEXT PRIMARY KEY, -- sha256:... release_id TEXT NOT NULL REFERENCES concelier.distro_release(release_id), -- Package identity package_name TEXT NOT NULL, package_version TEXT NOT NULL, -- Full NEVRA/EVR string architecture TEXT NOT NULL, -- Parsed version components epoch INTEGER DEFAULT 0, version TEXT NOT NULL, release TEXT, -- Build metadata build_id TEXT, -- ELF build-id if available build_date TIMESTAMPTZ, -- Source package reference source_package_name TEXT, source_package_version TEXT, -- Binary hashes file_sha256 TEXT, file_size BIGINT, -- Metadata created_at TIMESTAMPTZ NOT NULL DEFAULT now(), CONSTRAINT distro_package_unique UNIQUE(release_id, package_name, package_version, architecture) ); CREATE INDEX idx_distro_package_release ON concelier.distro_package(release_id); CREATE INDEX idx_distro_package_name ON concelier.distro_package(package_name); CREATE INDEX idx_distro_package_build_id ON concelier.distro_package(build_id) WHERE build_id IS NOT NULL; CREATE INDEX idx_distro_package_source ON concelier.distro_package(source_package_name, source_package_version); COMMENT ON TABLE concelier.distro_package IS 'Catalog of distro binary packages with build metadata'; -- ---------------------------------------------------------------------------- -- Distro Advisory Ingestion (raw) -- ---------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS concelier.distro_advisory ( advisory_id TEXT PRIMARY KEY, -- e.g., "DSA-5432-1", "RHSA-2024:1234" release_id TEXT NOT NULL REFERENCES concelier.distro_release(release_id), -- Advisory metadata advisory_type TEXT NOT NULL, -- "security" | "bugfix" | "enhancement" severity TEXT, -- "critical" | "high" | "medium" | "low" published_at TIMESTAMPTZ NOT NULL, updated_at TIMESTAMPTZ, -- Source source_url TEXT NOT NULL, source_hash TEXT NOT NULL, -- sha256 of source document -- Raw content (JSONB for flexible schema) raw_advisory JSONB NOT NULL, -- Ingestion metadata ingested_at TIMESTAMPTZ NOT NULL DEFAULT now(), snapshot_id TEXT NOT NULL, CONSTRAINT distro_advisory_unique UNIQUE(release_id, advisory_id) ); CREATE INDEX idx_distro_advisory_release ON concelier.distro_advisory(release_id); CREATE INDEX idx_distro_advisory_published ON concelier.distro_advisory(published_at DESC); CREATE INDEX idx_distro_advisory_severity ON concelier.distro_advisory(severity); CREATE INDEX idx_distro_advisory_snapshot ON concelier.distro_advisory(snapshot_id); -- GIN index for JSONB queries CREATE INDEX idx_distro_advisory_raw ON concelier.distro_advisory USING GIN(raw_advisory); COMMENT ON TABLE concelier.distro_advisory IS 'Raw distro security advisories (Tier 1 evidence)'; -- ---------------------------------------------------------------------------- -- CVE to Package Mapping (distro-specific) -- ---------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS concelier.distro_cve_affected ( mapping_id TEXT PRIMARY KEY, -- sha256:... release_id TEXT NOT NULL REFERENCES concelier.distro_release(release_id), cve_id TEXT NOT NULL, package_name TEXT NOT NULL, -- Affected range (distro-native format) range_kind TEXT NOT NULL, -- "nevra" | "evr" | "apk" range_start TEXT, -- Inclusive start version range_end TEXT, -- Exclusive end version -- Fix information fix_state TEXT NOT NULL, -- "fixed" | "not_affected" | "vulnerable" | "wontfix" | "unknown" fixed_version TEXT, -- Distro-native version string -- Evidence evidence_type TEXT NOT NULL, -- "distro_feed" | "changelog" | "patch_header" | "binary_match" evidence_source TEXT NOT NULL, -- Advisory ID or file path confidence NUMERIC(5,4) NOT NULL, -- 0.0-1.0 -- Provenance method TEXT NOT NULL, -- "security_feed" | "changelog" | "patch_header" | "binary_match" snapshot_id TEXT NOT NULL, created_at TIMESTAMPTZ NOT NULL DEFAULT now(), CONSTRAINT distro_cve_affected_confidence_check CHECK (confidence >= 0 AND confidence <= 1), CONSTRAINT distro_cve_affected_unique UNIQUE(release_id, cve_id, package_name, fix_state, method) ); CREATE INDEX idx_distro_cve_affected_release ON concelier.distro_cve_affected(release_id); CREATE INDEX idx_distro_cve_affected_cve ON concelier.distro_cve_affected(cve_id); CREATE INDEX idx_distro_cve_affected_package ON concelier.distro_cve_affected(package_name); CREATE INDEX idx_distro_cve_affected_confidence ON concelier.distro_cve_affected(confidence DESC); CREATE INDEX idx_distro_cve_affected_method ON concelier.distro_cve_affected(method); COMMENT ON TABLE concelier.distro_cve_affected IS 'CVE to package mappings with fix information (Tier 1-3 evidence)'; -- ---------------------------------------------------------------------------- -- Source Package Artifacts -- ---------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS concelier.source_artifact ( artifact_id TEXT PRIMARY KEY, -- sha256:... release_id TEXT NOT NULL REFERENCES concelier.distro_release(release_id), -- Source package identity source_package_name TEXT NOT NULL, source_package_version TEXT NOT NULL, -- Artifact type artifact_type TEXT NOT NULL, -- "changelog" | "patch_file" | "spec_file" | "apkbuild" artifact_path TEXT NOT NULL, -- Path within source package -- Content content_sha256 TEXT NOT NULL, content_size BIGINT NOT NULL, content BYTEA, -- May be NULL for large files (stored externally) -- Metadata created_at TIMESTAMPTZ NOT NULL DEFAULT now(), CONSTRAINT source_artifact_unique UNIQUE(release_id, source_package_name, source_package_version, artifact_path) ); CREATE INDEX idx_source_artifact_release ON concelier.source_artifact(release_id); CREATE INDEX idx_source_artifact_package ON concelier.source_artifact(source_package_name, source_package_version); CREATE INDEX idx_source_artifact_type ON concelier.source_artifact(artifact_type); COMMENT ON TABLE concelier.source_artifact IS 'Source package artifacts (changelogs, patches, specs) for Tier 2-3 analysis'; -- ---------------------------------------------------------------------------- -- Patch Signatures (HunkSig) -- ---------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS concelier.source_patch_sig ( patch_sig_id TEXT PRIMARY KEY, -- sha256:... -- Patch source cve_id TEXT, -- May be NULL for non-CVE patches upstream_repo TEXT, -- e.g., "github.com/openssl/openssl" commit_sha TEXT, -- Git commit SHA -- Normalized hunks hunks JSONB NOT NULL, -- Array of normalized hunk objects hunk_hash TEXT NOT NULL, -- sha256 of canonical hunk representation -- Function/file context affected_files TEXT[] NOT NULL, affected_functions TEXT[], -- Metadata extracted_at TIMESTAMPTZ NOT NULL DEFAULT now(), extractor_version TEXT NOT NULL, CONSTRAINT source_patch_sig_hunk_unique UNIQUE(hunk_hash) ); CREATE INDEX idx_source_patch_sig_cve ON concelier.source_patch_sig(cve_id) WHERE cve_id IS NOT NULL; CREATE INDEX idx_source_patch_sig_repo ON concelier.source_patch_sig(upstream_repo); CREATE INDEX idx_source_patch_sig_commit ON concelier.source_patch_sig(commit_sha); CREATE INDEX idx_source_patch_sig_files ON concelier.source_patch_sig USING GIN(affected_files); -- GIN index for JSONB queries CREATE INDEX idx_source_patch_sig_hunks ON concelier.source_patch_sig USING GIN(hunks); COMMENT ON TABLE concelier.source_patch_sig IS 'Upstream patch signatures (HunkSig) for equivalence matching'; -- ---------------------------------------------------------------------------- -- Build Provenance (BuildID → Package mapping) -- ---------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS concelier.build_provenance ( provenance_id TEXT PRIMARY KEY, -- sha256:... -- Binary identity build_id TEXT NOT NULL, -- ELF/PE build-id file_sha256 TEXT NOT NULL, -- Package mapping release_id TEXT NOT NULL REFERENCES concelier.distro_release(release_id), package_name TEXT NOT NULL, package_version TEXT NOT NULL, architecture TEXT NOT NULL, -- Build metadata build_date TIMESTAMPTZ, compiler TEXT, compiler_flags TEXT, -- Symbol information (optional, for advanced matching) symbols JSONB, -- Array of exported symbols -- Metadata created_at TIMESTAMPTZ NOT NULL DEFAULT now(), CONSTRAINT build_provenance_build_id_unique UNIQUE(build_id, release_id, architecture), CONSTRAINT build_provenance_file_unique UNIQUE(file_sha256, release_id) ); CREATE INDEX idx_build_provenance_build_id ON concelier.build_provenance(build_id); CREATE INDEX idx_build_provenance_file_sha ON concelier.build_provenance(file_sha256); CREATE INDEX idx_build_provenance_package ON concelier.build_provenance(package_name, package_version); -- GIN index for symbol queries CREATE INDEX idx_build_provenance_symbols ON concelier.build_provenance USING GIN(symbols) WHERE symbols IS NOT NULL; COMMENT ON TABLE concelier.build_provenance IS 'BuildID to package mapping for binary-level analysis'; -- ---------------------------------------------------------------------------- -- Binary Fingerprints (Tier 4) -- ---------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS concelier.binary_fingerprint ( fingerprint_id TEXT PRIMARY KEY, -- sha256:... -- CVE association cve_id TEXT NOT NULL, component TEXT NOT NULL, -- e.g., "openssl/libssl" architecture TEXT NOT NULL, -- Fingerprint type and value fp_type TEXT NOT NULL, -- "func_norm_hash" | "bb_multiset" | "cfg_hash" fp_value TEXT NOT NULL, -- Hash value -- Context function_hint TEXT, -- Function name if available confidence NUMERIC(5,4) NOT NULL, -- 0.0-1.0 -- Validation metrics true_positive_count INTEGER DEFAULT 0, -- Matches on known vulnerable binaries false_positive_count INTEGER DEFAULT 0, -- Matches on known fixed binaries validated_at TIMESTAMPTZ, -- Evidence reference evidence_ref TEXT NOT NULL, -- Points to reference builds + patch -- Metadata created_at TIMESTAMPTZ NOT NULL DEFAULT now(), CONSTRAINT binary_fingerprint_confidence_check CHECK (confidence >= 0 AND confidence <= 1), CONSTRAINT binary_fingerprint_unique UNIQUE(cve_id, component, architecture, fp_type, fp_value) ); CREATE INDEX idx_binary_fingerprint_cve ON concelier.binary_fingerprint(cve_id); CREATE INDEX idx_binary_fingerprint_component ON concelier.binary_fingerprint(component); CREATE INDEX idx_binary_fingerprint_type ON concelier.binary_fingerprint(fp_type); CREATE INDEX idx_binary_fingerprint_value ON concelier.binary_fingerprint(fp_value); CREATE INDEX idx_binary_fingerprint_confidence ON concelier.binary_fingerprint(confidence DESC); COMMENT ON TABLE concelier.binary_fingerprint IS 'Binary-level vulnerability fingerprints (Tier 4 evidence)'; -- ============================================================================ -- SCHEMA: scanner (extend existing) -- ============================================================================ -- ---------------------------------------------------------------------------- -- Backport Proof Blobs -- ---------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS scanner.backport_proof ( proof_id TEXT PRIMARY KEY, -- sha256:... subject_id TEXT NOT NULL, -- CVE-XXXX-YYYY:pkg:rpm/... -- Proof type and method proof_type TEXT NOT NULL, -- "backport_fixed" | "not_affected" | "vulnerable" | "unknown" method TEXT NOT NULL, -- "distro_feed" | "changelog" | "patch_header" | "binary_match" confidence NUMERIC(5,4) NOT NULL, -- 0.0-1.0 -- Scan context scan_id UUID, -- Reference to scanner.scan_manifest if part of scan -- Provenance tool_version TEXT NOT NULL, snapshot_id TEXT NOT NULL, created_at TIMESTAMPTZ NOT NULL DEFAULT now(), -- Proof blob (JSONB) proof_blob JSONB NOT NULL, -- Proof hash (canonical hash of proof_blob, excludes this field) proof_hash TEXT NOT NULL, CONSTRAINT backport_proof_confidence_check CHECK (confidence >= 0 AND confidence <= 1), CONSTRAINT backport_proof_hash_unique UNIQUE(proof_hash) ); CREATE INDEX idx_backport_proof_subject ON scanner.backport_proof(subject_id); CREATE INDEX idx_backport_proof_type ON scanner.backport_proof(proof_type); CREATE INDEX idx_backport_proof_method ON scanner.backport_proof(method); CREATE INDEX idx_backport_proof_confidence ON scanner.backport_proof(confidence DESC); CREATE INDEX idx_backport_proof_scan ON scanner.backport_proof(scan_id) WHERE scan_id IS NOT NULL; CREATE INDEX idx_backport_proof_created ON scanner.backport_proof(created_at DESC); -- GIN index for JSONB queries CREATE INDEX idx_backport_proof_blob ON scanner.backport_proof USING GIN(proof_blob); COMMENT ON TABLE scanner.backport_proof IS 'Cryptographic proof blobs for backport detection verdicts'; -- ---------------------------------------------------------------------------- -- Proof Evidence (detailed evidence entries) -- ---------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS scanner.proof_evidence ( evidence_id TEXT PRIMARY KEY, -- sha256:... proof_id TEXT NOT NULL REFERENCES scanner.backport_proof(proof_id) ON DELETE CASCADE, -- Evidence metadata evidence_type TEXT NOT NULL, -- "distro_advisory" | "changelog_mention" | "patch_header" | "binary_fingerprint" | "version_comparison" | "build_catalog" source TEXT NOT NULL, -- Advisory ID, file path, or fingerprint ID timestamp TIMESTAMPTZ NOT NULL, -- Evidence data evidence_data JSONB NOT NULL, data_hash TEXT NOT NULL, -- sha256 of canonical evidence_data -- Metadata created_at TIMESTAMPTZ NOT NULL DEFAULT now(), CONSTRAINT proof_evidence_unique UNIQUE(proof_id, evidence_type, data_hash) ); CREATE INDEX idx_proof_evidence_proof ON scanner.proof_evidence(proof_id); CREATE INDEX idx_proof_evidence_type ON scanner.proof_evidence(evidence_type); CREATE INDEX idx_proof_evidence_source ON scanner.proof_evidence(source); -- GIN index for JSONB queries CREATE INDEX idx_proof_evidence_data ON scanner.proof_evidence USING GIN(evidence_data); COMMENT ON TABLE scanner.proof_evidence IS 'Individual evidence entries within proof blobs'; -- ============================================================================ -- SCHEMA: attestor (extend existing) -- ============================================================================ -- ---------------------------------------------------------------------------- -- Multi-Profile Signatures -- ---------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS attestor.multi_profile_signature ( signature_id TEXT PRIMARY KEY, -- sha256:... -- Signed content reference content_digest TEXT NOT NULL, -- sha256 of signed payload content_type TEXT NOT NULL, -- "proof_blob" | "vex_statement" | "sbom" | "audit_bundle" content_ref TEXT NOT NULL, -- Reference to signed content (proof_id, vex_id, etc.) -- Signatures (array of signature objects) signatures JSONB NOT NULL, -- Array: [{profile, keyId, algorithm, signature, signedAt}, ...] -- Metadata created_at TIMESTAMPTZ NOT NULL DEFAULT now(), CONSTRAINT multi_profile_signature_content_unique UNIQUE(content_digest, content_type) ); CREATE INDEX idx_multi_profile_signature_content ON attestor.multi_profile_signature(content_digest); CREATE INDEX idx_multi_profile_signature_type ON attestor.multi_profile_signature(content_type); CREATE INDEX idx_multi_profile_signature_ref ON attestor.multi_profile_signature(content_ref); CREATE INDEX idx_multi_profile_signature_created ON attestor.multi_profile_signature(created_at DESC); -- GIN index for signature queries CREATE INDEX idx_multi_profile_signature_sigs ON attestor.multi_profile_signature USING GIN(signatures); COMMENT ON TABLE attestor.multi_profile_signature IS 'Multi-profile cryptographic signatures for regional compliance'; -- ============================================================================ -- FUNCTIONS AND TRIGGERS -- ============================================================================ -- ---------------------------------------------------------------------------- -- Automatic updated_at trigger -- ---------------------------------------------------------------------------- CREATE OR REPLACE FUNCTION update_updated_at_column() RETURNS TRIGGER AS $$ BEGIN NEW.updated_at = now(); RETURN NEW; END; $$ LANGUAGE plpgsql; CREATE TRIGGER update_distro_release_updated_at BEFORE UPDATE ON concelier.distro_release FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); -- ============================================================================ -- VIEWS -- ============================================================================ -- ---------------------------------------------------------------------------- -- Aggregated CVE Fix Status (for querying) -- ---------------------------------------------------------------------------- CREATE OR REPLACE VIEW concelier.cve_fix_status_aggregated AS SELECT release_id, cve_id, package_name, -- Best fix state (prioritize not_affected > fixed > wontfix > vulnerable) CASE WHEN bool_or(fix_state = 'not_affected' AND confidence >= 0.9) THEN 'not_affected' WHEN bool_or(fix_state = 'fixed') THEN 'fixed' WHEN bool_or(fix_state = 'wontfix') THEN 'wontfix' ELSE 'vulnerable' END AS fix_state, -- Best fixed version (if fixed) max(fixed_version) FILTER (WHERE fix_state = 'fixed') AS fixed_version, -- Highest confidence evidence max(confidence) AS confidence, -- Methods contributing to verdict array_agg(DISTINCT method) AS methods, -- Evidence count count(*) AS evidence_count, -- Latest update max(created_at) AS latest_evidence_at FROM concelier.distro_cve_affected GROUP BY release_id, cve_id, package_name; COMMENT ON VIEW concelier.cve_fix_status_aggregated IS 'Aggregated CVE fix status with deterministic merge logic'; -- ---------------------------------------------------------------------------- -- Proof Blob Summary (for querying) -- ---------------------------------------------------------------------------- CREATE OR REPLACE VIEW scanner.backport_proof_summary AS SELECT bp.proof_id, bp.subject_id, bp.proof_type, bp.method, bp.confidence, bp.created_at, -- Evidence summary count(pe.evidence_id) AS evidence_count, array_agg(DISTINCT pe.evidence_type) AS evidence_types, -- Scan reference bp.scan_id, -- Proof hash bp.proof_hash FROM scanner.backport_proof bp LEFT JOIN scanner.proof_evidence pe ON bp.proof_id = pe.proof_id GROUP BY bp.proof_id, bp.subject_id, bp.proof_type, bp.method, bp.confidence, bp.created_at, bp.scan_id, bp.proof_hash; COMMENT ON VIEW scanner.backport_proof_summary IS 'Summary view of proof blobs with evidence counts'; -- ============================================================================ -- PARTITIONING (for large deployments) -- ============================================================================ -- Partition backport_proof by created_at (monthly) -- This is optional and should be enabled for high-volume deployments -- Example partition creation (for January 2025): -- CREATE TABLE scanner.backport_proof_2025_01 PARTITION OF scanner.backport_proof -- FOR VALUES FROM ('2025-01-01') TO ('2025-02-01'); -- ============================================================================ -- RETENTION POLICIES -- ============================================================================ -- Cleanup old proof blobs (optional, configure retention period) -- Example: Delete proofs older than 1 year that are not referenced by active scans -- CREATE OR REPLACE FUNCTION scanner.cleanup_old_proofs() -- RETURNS INTEGER AS $$ -- DECLARE -- deleted_count INTEGER; -- BEGIN -- DELETE FROM scanner.backport_proof -- WHERE created_at < now() - INTERVAL '1 year' -- AND scan_id IS NULL; -- -- GET DIAGNOSTICS deleted_count = ROW_COUNT; -- RETURN deleted_count; -- END; -- $$ LANGUAGE plpgsql; -- ============================================================================ -- VERIFICATION -- ============================================================================ DO $$ DECLARE table_count INTEGER; BEGIN SELECT COUNT(*) INTO table_count FROM information_schema.tables WHERE table_schema IN ('concelier', 'scanner', 'attestor') AND table_name IN ( 'distro_release', 'distro_package', 'distro_advisory', 'distro_cve_affected', 'source_artifact', 'source_patch_sig', 'build_provenance', 'binary_fingerprint', 'backport_proof', 'proof_evidence', 'multi_profile_signature' ); IF table_count < 11 THEN RAISE EXCEPTION 'Proof system schema incomplete: only % of 11 tables created', table_count; END IF; RAISE NOTICE 'Proof system schema verified: % tables created successfully', table_count; END; $$; -- Release advisory lock SELECT pg_advisory_unlock(hashtext('proof_system')); -- ============================================================================ -- END OF SCHEMA -- ============================================================================