Files
git.stella-ops.org/docs/db/schemas/binaries_schema_specification.md
master 53503cb407 Add reference architecture and testing strategy documentation
- Created a new document for the Stella Ops Reference Architecture outlining the system's topology, trust boundaries, artifact association, and interfaces.
- Developed a comprehensive Testing Strategy document detailing the importance of offline readiness, interoperability, determinism, and operational guardrails.
- Introduced a README for the Testing Strategy, summarizing processing details and key concepts implemented.
- Added guidance for AI agents and developers in the tests directory, including directory structure, test categories, key patterns, and rules for test development.
2025-12-22 07:59:30 +02:00

30 KiB

Binaries Schema Specification

Version: 1.0.0 Status: DRAFT Owner: BinaryIndex Module Last Updated: 2025-12-21


1. Overview

The binaries schema stores binary identity, vulnerability mappings, fingerprints, and patch-aware fix status for the BinaryIndex module. This enables detection of vulnerable binaries independent of package metadata.

2. Schema Definition

-- ============================================================================
-- BINARIES SCHEMA
-- ============================================================================
-- Purpose: Binary identity, fingerprint, and vulnerability mapping for
-- the BinaryIndex module (vulnerable binaries database).
-- ============================================================================

CREATE SCHEMA IF NOT EXISTS binaries;
CREATE SCHEMA IF NOT EXISTS binaries_app;

-- ----------------------------------------------------------------------------
-- RLS Helper Function
-- ----------------------------------------------------------------------------

CREATE OR REPLACE FUNCTION binaries_app.require_current_tenant()
RETURNS TEXT
LANGUAGE plpgsql STABLE SECURITY DEFINER
AS $$
DECLARE
    v_tenant TEXT;
BEGIN
    v_tenant := current_setting('app.tenant_id', true);
    IF v_tenant IS NULL OR v_tenant = '' THEN
        RAISE EXCEPTION 'app.tenant_id session variable not set';
    END IF;
    RETURN v_tenant;
END;
$$;

-- ============================================================================
-- CORE IDENTITY TABLES
-- ============================================================================

-- ----------------------------------------------------------------------------
-- Table: binary_identity
-- Purpose: Known binary identities extracted from packages
-- ----------------------------------------------------------------------------

CREATE TABLE binaries.binary_identity (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    tenant_id UUID NOT NULL,

    -- Primary identity (Build-ID preferred for ELF)
    binary_key TEXT NOT NULL,           -- build_id || file_sha256 (normalized)
    build_id TEXT,                      -- ELF GNU Build-ID (hex)
    build_id_type TEXT CHECK (build_id_type IN ('gnu-build-id', 'pe-cv', 'macho-uuid')),

    -- Hashes
    file_sha256 TEXT NOT NULL,          -- sha256 of entire file
    text_sha256 TEXT,                   -- sha256 of .text section (ELF)
    blake3_hash TEXT,                   -- Optional faster hash

    -- Binary metadata
    format TEXT NOT NULL CHECK (format IN ('elf', 'pe', 'macho')),
    architecture TEXT NOT NULL,         -- x86-64, aarch64, arm, etc.
    osabi TEXT,                         -- linux, windows, darwin
    binary_type TEXT CHECK (binary_type IN ('executable', 'shared_library', 'static_library', 'object')),
    is_stripped BOOLEAN DEFAULT FALSE,

    -- Tracking
    first_seen_snapshot_id UUID,
    last_seen_snapshot_id UUID,
    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),

    CONSTRAINT binary_identity_key_unique UNIQUE (tenant_id, binary_key)
);

-- ----------------------------------------------------------------------------
-- Table: binary_package_map
-- Purpose: Maps binaries to source packages (per snapshot)
-- ----------------------------------------------------------------------------

CREATE TABLE binaries.binary_package_map (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    tenant_id UUID NOT NULL,

    -- Binary reference
    binary_identity_id UUID NOT NULL REFERENCES binaries.binary_identity(id) ON DELETE CASCADE,
    binary_key TEXT NOT NULL,

    -- Package info
    distro TEXT NOT NULL,               -- debian, ubuntu, rhel, alpine
    release TEXT NOT NULL,              -- bookworm, jammy, 9, 3.19
    source_pkg TEXT NOT NULL,           -- Source package name (e.g., openssl)
    binary_pkg TEXT NOT NULL,           -- Binary package name (e.g., libssl3)
    pkg_version TEXT NOT NULL,          -- Full distro version (e.g., 1.1.1n-0+deb11u5)
    pkg_purl TEXT,                      -- PURL if derivable
    architecture TEXT NOT NULL,

    -- File location
    file_path_in_pkg TEXT NOT NULL,     -- /usr/lib/x86_64-linux-gnu/libssl.so.3

    -- Snapshot reference
    snapshot_id UUID NOT NULL,

    -- Metadata
    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),

    CONSTRAINT binary_package_map_unique UNIQUE (binary_identity_id, snapshot_id, file_path_in_pkg)
);

-- ----------------------------------------------------------------------------
-- Table: corpus_snapshots
-- Purpose: Tracks corpus ingestion snapshots
-- ----------------------------------------------------------------------------

CREATE TABLE binaries.corpus_snapshots (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    tenant_id UUID NOT NULL,

    -- Snapshot identification
    distro TEXT NOT NULL,
    release TEXT NOT NULL,
    architecture TEXT NOT NULL,
    snapshot_id TEXT NOT NULL,          -- Unique snapshot identifier

    -- Content tracking
    packages_processed INT NOT NULL DEFAULT 0,
    binaries_indexed INT NOT NULL DEFAULT 0,
    repo_metadata_digest TEXT,          -- SHA-256 of repo metadata

    -- Signing
    signing_key_id TEXT,
    dsse_envelope_ref TEXT,             -- RustFS reference to DSSE envelope

    -- Status
    status TEXT NOT NULL DEFAULT 'pending' CHECK (status IN ('pending', 'processing', 'completed', 'failed')),
    error TEXT,

    -- Timestamps
    started_at TIMESTAMPTZ,
    completed_at TIMESTAMPTZ,
    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),

    CONSTRAINT corpus_snapshots_unique UNIQUE (tenant_id, distro, release, architecture, snapshot_id)
);

-- ============================================================================
-- VULNERABILITY MAPPING TABLES
-- ============================================================================

-- ----------------------------------------------------------------------------
-- Table: vulnerable_buildids
-- Purpose: Build-IDs known to be associated with vulnerable packages
-- ----------------------------------------------------------------------------

CREATE TABLE binaries.vulnerable_buildids (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    tenant_id UUID NOT NULL,

    -- Build-ID reference
    buildid_type TEXT NOT NULL CHECK (buildid_type IN ('gnu-build-id', 'pe-cv', 'macho-uuid')),
    buildid_value TEXT NOT NULL,        -- Hex string

    -- Package info
    purl TEXT NOT NULL,                 -- Package URL
    pkg_version TEXT NOT NULL,
    distro TEXT,
    release TEXT,

    -- Confidence
    confidence TEXT NOT NULL DEFAULT 'exact' CHECK (confidence IN ('exact', 'inferred', 'heuristic')),

    -- Provenance
    provenance JSONB DEFAULT '{}',
    snapshot_id UUID REFERENCES binaries.corpus_snapshots(id),

    -- Tracking
    indexed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),

    CONSTRAINT vulnerable_buildids_unique UNIQUE (tenant_id, buildid_value, buildid_type, purl, pkg_version)
);

-- ----------------------------------------------------------------------------
-- Table: binary_vuln_assertion
-- Purpose: CVE status assertions for specific binaries
-- ----------------------------------------------------------------------------

CREATE TABLE binaries.binary_vuln_assertion (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    tenant_id UUID NOT NULL,

    -- Binary reference
    binary_key TEXT NOT NULL,
    binary_identity_id UUID REFERENCES binaries.binary_identity(id),

    -- CVE reference
    cve_id TEXT NOT NULL,
    advisory_id UUID,                   -- Reference to vuln.advisories

    -- Status
    status TEXT NOT NULL CHECK (status IN ('affected', 'not_affected', 'fixed', 'unknown')),

    -- Method used to determine status
    method TEXT NOT NULL CHECK (method IN ('range_match', 'buildid_catalog', 'fingerprint_match', 'fix_index')),
    confidence NUMERIC(3,2) CHECK (confidence >= 0 AND confidence <= 1),

    -- Evidence
    evidence_ref TEXT,                  -- RustFS reference to evidence bundle
    evidence_digest TEXT,               -- SHA-256 of evidence

    -- Tracking
    evaluated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),

    CONSTRAINT binary_vuln_assertion_unique UNIQUE (tenant_id, binary_key, cve_id)
);

-- ============================================================================
-- FIX INDEX TABLES (Patch-Aware Backport Handling)
-- ============================================================================

-- ----------------------------------------------------------------------------
-- Table: cve_fix_evidence
-- Purpose: Raw evidence of CVE fixes (append-only)
-- ----------------------------------------------------------------------------

CREATE TABLE binaries.cve_fix_evidence (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    tenant_id UUID NOT NULL,

    -- Key fields
    distro TEXT NOT NULL,
    release TEXT NOT NULL,
    source_pkg TEXT NOT NULL,
    cve_id TEXT NOT NULL,

    -- Fix information
    state TEXT NOT NULL CHECK (state IN ('fixed', 'vulnerable', 'not_affected', 'wontfix', 'unknown')),
    fixed_version TEXT,                 -- Distro version string (nullable for not_affected)

    -- Method and confidence
    method TEXT NOT NULL CHECK (method IN ('security_feed', 'changelog', 'patch_header', 'upstream_patch_match')),
    confidence NUMERIC(3,2) NOT NULL CHECK (confidence >= 0 AND confidence <= 1),

    -- Evidence details
    evidence JSONB NOT NULL,            -- Method-specific evidence payload

    -- Snapshot reference
    snapshot_id UUID REFERENCES binaries.corpus_snapshots(id),

    -- Tracking
    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);

-- ----------------------------------------------------------------------------
-- Table: cve_fix_index
-- Purpose: Merged best-record for CVE fix status per distro/package
-- ----------------------------------------------------------------------------

CREATE TABLE binaries.cve_fix_index (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    tenant_id UUID NOT NULL,

    -- Key fields
    distro TEXT NOT NULL,
    release TEXT NOT NULL,
    source_pkg TEXT NOT NULL,
    cve_id TEXT NOT NULL,
    architecture TEXT,                  -- NULL means all architectures

    -- Fix status
    state TEXT NOT NULL CHECK (state IN ('fixed', 'vulnerable', 'not_affected', 'wontfix', 'unknown')),
    fixed_version TEXT,

    -- Merge metadata
    primary_method TEXT NOT NULL,       -- Method of highest-confidence evidence
    confidence NUMERIC(3,2) NOT NULL,
    evidence_ids UUID[],                -- References to cve_fix_evidence

    -- Tracking
    computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),

    CONSTRAINT cve_fix_index_unique UNIQUE (tenant_id, distro, release, source_pkg, cve_id, architecture)
);

-- ============================================================================
-- FINGERPRINT TABLES
-- ============================================================================

-- ----------------------------------------------------------------------------
-- Table: vulnerable_fingerprints
-- Purpose: Function fingerprints for CVE detection
-- ----------------------------------------------------------------------------

CREATE TABLE binaries.vulnerable_fingerprints (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    tenant_id UUID NOT NULL,

    -- CVE and component
    cve_id TEXT NOT NULL,
    component TEXT NOT NULL,            -- e.g., openssl, glibc
    purl TEXT,                          -- Package URL if applicable

    -- Fingerprint data
    algorithm TEXT NOT NULL CHECK (algorithm IN ('basic_block', 'control_flow_graph', 'string_refs', 'combined')),
    fingerprint_id TEXT NOT NULL,       -- Unique ID (e.g., "bb-abc123...")
    fingerprint_hash BYTEA NOT NULL,    -- Raw fingerprint bytes (16-32 bytes)
    architecture TEXT NOT NULL,         -- x86-64, aarch64

    -- Function hints
    function_name TEXT,                 -- Original function name if known
    source_file TEXT,                   -- Source file path
    source_line INT,

    -- Confidence and validation
    similarity_threshold NUMERIC(3,2) DEFAULT 0.95,
    confidence NUMERIC(3,2) CHECK (confidence >= 0 AND confidence <= 1),
    validated BOOLEAN DEFAULT FALSE,
    validation_stats JSONB DEFAULT '{}',  -- precision, recall, etc.

    -- Reference builds
    vuln_build_ref TEXT,                -- RustFS ref to vulnerable reference build
    fixed_build_ref TEXT,               -- RustFS ref to fixed reference build

    -- Metadata
    notes TEXT,
    evidence_ref TEXT,                  -- RustFS ref to evidence bundle

    -- Tracking
    indexed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),

    CONSTRAINT vulnerable_fingerprints_unique UNIQUE (tenant_id, cve_id, algorithm, fingerprint_id, architecture)
);

-- ----------------------------------------------------------------------------
-- Table: fingerprint_corpus_metadata
-- Purpose: Tracks which packages have been fingerprinted
-- ----------------------------------------------------------------------------

CREATE TABLE binaries.fingerprint_corpus_metadata (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    tenant_id UUID NOT NULL,

    -- Package identification
    purl TEXT NOT NULL,
    version TEXT NOT NULL,

    -- Fingerprinting info
    algorithm TEXT NOT NULL,
    binary_digest TEXT,                 -- sha256 of the binary analyzed

    -- Statistics
    function_count INT NOT NULL DEFAULT 0,
    fingerprints_indexed INT NOT NULL DEFAULT 0,

    -- Provenance
    indexed_by TEXT,                    -- Service/user that indexed
    indexed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),

    -- Tracking
    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),

    CONSTRAINT fingerprint_corpus_metadata_unique UNIQUE (tenant_id, purl, version, algorithm)
);

-- ============================================================================
-- MATCH RESULTS TABLES
-- ============================================================================

-- ----------------------------------------------------------------------------
-- Table: fingerprint_matches
-- Purpose: Records fingerprint matches during scans
-- ----------------------------------------------------------------------------

CREATE TABLE binaries.fingerprint_matches (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    tenant_id UUID NOT NULL,

    -- Scan reference
    scan_id UUID NOT NULL,              -- Reference to scanner.scan_manifest

    -- Match details
    match_type TEXT NOT NULL CHECK (match_type IN ('fingerprint', 'buildid', 'hash_exact')),
    binary_key TEXT NOT NULL,
    binary_identity_id UUID REFERENCES binaries.binary_identity(id),

    -- Vulnerable package
    vulnerable_purl TEXT NOT NULL,
    vulnerable_version TEXT NOT NULL,

    -- Fingerprint match specifics (nullable for non-fingerprint matches)
    matched_fingerprint_id UUID REFERENCES binaries.vulnerable_fingerprints(id),
    matched_function TEXT,
    similarity NUMERIC(3,2),            -- 0.00-1.00

    -- CVE linkage
    advisory_ids TEXT[],                -- Linked CVE/GHSA IDs

    -- Reachability (populated later by Scanner)
    reachability_status TEXT CHECK (reachability_status IN ('reachable', 'unreachable', 'unknown', 'partial')),

    -- Evidence
    evidence JSONB DEFAULT '{}',

    -- Tracking
    matched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);

-- ============================================================================
-- INDEXES
-- ============================================================================

-- binary_identity indexes
CREATE INDEX idx_binary_identity_tenant ON binaries.binary_identity(tenant_id);
CREATE INDEX idx_binary_identity_buildid ON binaries.binary_identity(build_id) WHERE build_id IS NOT NULL;
CREATE INDEX idx_binary_identity_sha256 ON binaries.binary_identity(file_sha256);
CREATE INDEX idx_binary_identity_key ON binaries.binary_identity(binary_key);

-- binary_package_map indexes
CREATE INDEX idx_binary_package_map_tenant ON binaries.binary_package_map(tenant_id);
CREATE INDEX idx_binary_package_map_binary ON binaries.binary_package_map(binary_identity_id);
CREATE INDEX idx_binary_package_map_distro ON binaries.binary_package_map(distro, release, source_pkg);
CREATE INDEX idx_binary_package_map_snapshot ON binaries.binary_package_map(snapshot_id);
CREATE INDEX idx_binary_package_map_purl ON binaries.binary_package_map(pkg_purl) WHERE pkg_purl IS NOT NULL;

-- corpus_snapshots indexes
CREATE INDEX idx_corpus_snapshots_tenant ON binaries.corpus_snapshots(tenant_id);
CREATE INDEX idx_corpus_snapshots_distro ON binaries.corpus_snapshots(distro, release, architecture);
CREATE INDEX idx_corpus_snapshots_status ON binaries.corpus_snapshots(status) WHERE status IN ('pending', 'processing');

-- vulnerable_buildids indexes
CREATE INDEX idx_vulnerable_buildids_tenant ON binaries.vulnerable_buildids(tenant_id);
CREATE INDEX idx_vulnerable_buildids_value ON binaries.vulnerable_buildids(buildid_type, buildid_value);
CREATE INDEX idx_vulnerable_buildids_purl ON binaries.vulnerable_buildids(purl);

-- binary_vuln_assertion indexes
CREATE INDEX idx_binary_vuln_assertion_tenant ON binaries.binary_vuln_assertion(tenant_id);
CREATE INDEX idx_binary_vuln_assertion_binary ON binaries.binary_vuln_assertion(binary_key);
CREATE INDEX idx_binary_vuln_assertion_cve ON binaries.binary_vuln_assertion(cve_id);
CREATE INDEX idx_binary_vuln_assertion_status ON binaries.binary_vuln_assertion(status) WHERE status = 'affected';

-- cve_fix_evidence indexes
CREATE INDEX idx_cve_fix_evidence_tenant ON binaries.cve_fix_evidence(tenant_id);
CREATE INDEX idx_cve_fix_evidence_key ON binaries.cve_fix_evidence(distro, release, source_pkg, cve_id);

-- cve_fix_index indexes
CREATE INDEX idx_cve_fix_index_tenant ON binaries.cve_fix_index(tenant_id);
CREATE INDEX idx_cve_fix_index_lookup ON binaries.cve_fix_index(distro, release, source_pkg, cve_id);
CREATE INDEX idx_cve_fix_index_state ON binaries.cve_fix_index(state) WHERE state = 'fixed';

-- vulnerable_fingerprints indexes
CREATE INDEX idx_vulnerable_fingerprints_tenant ON binaries.vulnerable_fingerprints(tenant_id);
CREATE INDEX idx_vulnerable_fingerprints_cve ON binaries.vulnerable_fingerprints(cve_id);
CREATE INDEX idx_vulnerable_fingerprints_component ON binaries.vulnerable_fingerprints(component, architecture);
CREATE INDEX idx_vulnerable_fingerprints_hash ON binaries.vulnerable_fingerprints USING hash (fingerprint_hash);
CREATE INDEX idx_vulnerable_fingerprints_validated ON binaries.vulnerable_fingerprints(validated) WHERE validated = TRUE;

-- fingerprint_corpus_metadata indexes
CREATE INDEX idx_fingerprint_corpus_tenant ON binaries.fingerprint_corpus_metadata(tenant_id);
CREATE INDEX idx_fingerprint_corpus_purl ON binaries.fingerprint_corpus_metadata(purl, version);

-- fingerprint_matches indexes
CREATE INDEX idx_fingerprint_matches_tenant ON binaries.fingerprint_matches(tenant_id);
CREATE INDEX idx_fingerprint_matches_scan ON binaries.fingerprint_matches(scan_id);
CREATE INDEX idx_fingerprint_matches_type ON binaries.fingerprint_matches(match_type);
CREATE INDEX idx_fingerprint_matches_purl ON binaries.fingerprint_matches(vulnerable_purl);

-- ============================================================================
-- ROW-LEVEL SECURITY
-- ============================================================================

-- Enable RLS on all tenant-scoped tables
ALTER TABLE binaries.binary_identity ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.binary_identity FORCE ROW LEVEL SECURITY;
CREATE POLICY binary_identity_tenant_isolation ON binaries.binary_identity
    FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
    WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());

ALTER TABLE binaries.binary_package_map ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.binary_package_map FORCE ROW LEVEL SECURITY;
CREATE POLICY binary_package_map_tenant_isolation ON binaries.binary_package_map
    FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
    WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());

ALTER TABLE binaries.corpus_snapshots ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.corpus_snapshots FORCE ROW LEVEL SECURITY;
CREATE POLICY corpus_snapshots_tenant_isolation ON binaries.corpus_snapshots
    FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
    WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());

ALTER TABLE binaries.vulnerable_buildids ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.vulnerable_buildids FORCE ROW LEVEL SECURITY;
CREATE POLICY vulnerable_buildids_tenant_isolation ON binaries.vulnerable_buildids
    FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
    WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());

ALTER TABLE binaries.binary_vuln_assertion ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.binary_vuln_assertion FORCE ROW LEVEL SECURITY;
CREATE POLICY binary_vuln_assertion_tenant_isolation ON binaries.binary_vuln_assertion
    FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
    WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());

ALTER TABLE binaries.cve_fix_evidence ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.cve_fix_evidence FORCE ROW LEVEL SECURITY;
CREATE POLICY cve_fix_evidence_tenant_isolation ON binaries.cve_fix_evidence
    FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
    WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());

ALTER TABLE binaries.cve_fix_index ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.cve_fix_index FORCE ROW LEVEL SECURITY;
CREATE POLICY cve_fix_index_tenant_isolation ON binaries.cve_fix_index
    FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
    WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());

ALTER TABLE binaries.vulnerable_fingerprints ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.vulnerable_fingerprints FORCE ROW LEVEL SECURITY;
CREATE POLICY vulnerable_fingerprints_tenant_isolation ON binaries.vulnerable_fingerprints
    FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
    WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());

ALTER TABLE binaries.fingerprint_corpus_metadata ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.fingerprint_corpus_metadata FORCE ROW LEVEL SECURITY;
CREATE POLICY fingerprint_corpus_metadata_tenant_isolation ON binaries.fingerprint_corpus_metadata
    FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
    WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());

ALTER TABLE binaries.fingerprint_matches ENABLE ROW LEVEL SECURITY;
ALTER TABLE binaries.fingerprint_matches FORCE ROW LEVEL SECURITY;
CREATE POLICY fingerprint_matches_tenant_isolation ON binaries.fingerprint_matches
    FOR ALL USING (tenant_id::text = binaries_app.require_current_tenant())
    WITH CHECK (tenant_id::text = binaries_app.require_current_tenant());

3. Table Relationships

┌─────────────────────────────────────────────────────────────────────────────┐
│                          BINARIES SCHEMA                                     │
│                                                                              │
│  ┌────────────────────┐         ┌────────────────────┐                      │
│  │ corpus_snapshots   │<────────│ binary_package_map │                      │
│  │ (ingestion state)  │         │ (binary→pkg)       │                      │
│  └─────────┬──────────┘         └────────┬───────────┘                      │
│            │                              │                                  │
│            │                              ▼                                  │
│            │                    ┌────────────────────┐                      │
│            └───────────────────>│ binary_identity    │<─────────────────┐   │
│                                 │ (Build-ID, hashes) │                  │   │
│                                 └────────┬───────────┘                  │   │
│                                          │                               │   │
│            ┌─────────────────────────────┼───────────────────────────────┤   │
│            │                             │                               │   │
│            ▼                             ▼                               │   │
│  ┌────────────────────┐        ┌─────────────────────┐      ┌──────────┴───┐
│  │ vulnerable_buildids│        │ binary_vuln_         │      │fingerprint_  │
│  │ (known vuln builds)│        │ assertion            │      │matches       │
│  └────────────────────┘        │ (CVE status)         │      │(scan results)│
│                                └─────────────────────┘      └──────────────┘
│                                                                              │
│  ┌─────────────────────────────────────────────────────────────────────────┐│
│  │                      FIX INDEX (Patch-Aware)                             ││
│  │  ┌────────────────────┐         ┌────────────────────┐                  ││
│  │  │ cve_fix_evidence   │────────>│ cve_fix_index      │                  ││
│  │  │ (raw evidence)     │  merge  │ (merged best)      │                  ││
│  │  └────────────────────┘         └────────────────────┘                  ││
│  └─────────────────────────────────────────────────────────────────────────┘│
│                                                                              │
│  ┌─────────────────────────────────────────────────────────────────────────┐│
│  │                      FINGERPRINTS                                        ││
│  │  ┌────────────────────┐         ┌──────────────────────┐                ││
│  │  │vulnerable_          │         │fingerprint_corpus_   │                ││
│  │  │fingerprints         │         │metadata              │                ││
│  │  │(CVE fingerprints)   │         │(what's indexed)      │                ││
│  │  └────────────────────┘         └──────────────────────┘                ││
│  └─────────────────────────────────────────────────────────────────────────┘│
└─────────────────────────────────────────────────────────────────────────────┘

4. Query Patterns

4.1 Lookup by Build-ID

-- Find vulnerabilities for a specific Build-ID
SELECT ba.cve_id, ba.status, ba.confidence, ba.method
FROM binaries.binary_vuln_assertion ba
JOIN binaries.binary_identity bi ON bi.binary_key = ba.binary_key
WHERE bi.build_id = :build_id
  AND bi.build_id_type = 'gnu-build-id'
  AND ba.status = 'affected';

4.2 Check Fix Status (Patch-Aware)

-- Check if a CVE is fixed for a specific distro/package
SELECT cfi.state, cfi.fixed_version, cfi.confidence, cfi.primary_method
FROM binaries.cve_fix_index cfi
WHERE cfi.distro = :distro
  AND cfi.release = :release
  AND cfi.source_pkg = :source_pkg
  AND cfi.cve_id = :cve_id;
-- Find fingerprints with similar hash (requires application-level similarity)
SELECT vf.cve_id, vf.component, vf.function_name, vf.confidence
FROM binaries.vulnerable_fingerprints vf
WHERE vf.algorithm = :algorithm
  AND vf.architecture = :architecture
  AND vf.validated = TRUE
  -- Application performs similarity comparison on fingerprint_hash

5. Migration Strategy

5.1 Initial Migration

-- V001__create_binaries_schema.sql
-- Creates all tables, indexes, and RLS policies

5.2 Seed Data

-- S001__seed_reference_fingerprints.sql
-- Seeds fingerprints for high-impact CVEs from golden corpus

6. Performance Considerations

6.1 Table Sizing Estimates

Table Expected Rows Growth Rate
binary_identity 10M 1M/month
binary_package_map 50M 5M/month
vulnerable_buildids 1M 100K/month
cve_fix_index 500K 50K/month
vulnerable_fingerprints 100K 10K/month
fingerprint_matches 10M 1M/month

6.2 Partitioning Candidates

  • fingerprint_matches - Partition by matched_at (monthly)
  • cve_fix_evidence - Partition by created_at (monthly)

6.3 Index Maintenance

  • Hash index on fingerprint_hash for exact matches
  • Consider bloom filter for fingerprint similarity pre-filtering

Document Version: 1.0.0 Last Updated: 2025-12-21