sprints work

This commit is contained in:
master
2026-01-10 20:32:13 +02:00
parent 0d5eda86fc
commit 17d0631b8e
189 changed files with 40667 additions and 497 deletions

View File

@@ -0,0 +1,220 @@
-- CVE-Symbol Mapping PostgreSQL Schema Migration
-- Version: 20260110
-- Author: StellaOps Agent
-- Sprint: SPRINT_20260109_009_003_BE_cve_symbol_mapping
-- ============================================================================
-- Reachability Schema
-- ============================================================================
CREATE SCHEMA IF NOT EXISTS reachability;
-- ============================================================================
-- CVE-Symbol Mapping Tables
-- ============================================================================
-- Mapping source enumeration type
CREATE TYPE reachability.mapping_source AS ENUM (
'patch_analysis',
'osv_advisory',
'nvd_cpe',
'manual_curation',
'fuzzing_corpus',
'exploit_database',
'unknown'
);
-- Vulnerability type enumeration (for taint analysis)
CREATE TYPE reachability.vulnerability_type AS ENUM (
'source',
'sink',
'gadget',
'both_source_and_sink',
'unknown'
);
-- Main CVE-symbol mapping table
CREATE TABLE IF NOT EXISTS reachability.cve_symbol_mappings (
mapping_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- CVE identification
cve_id TEXT NOT NULL,
cve_id_normalized TEXT NOT NULL GENERATED ALWAYS AS (UPPER(cve_id)) STORED,
-- Affected package (PURL format)
purl TEXT NOT NULL,
affected_versions TEXT[], -- Version ranges like [">=1.0.0,<2.0.0"]
fixed_versions TEXT[], -- Versions where fix is applied
-- Vulnerable symbol details
symbol_name TEXT NOT NULL,
canonical_id TEXT, -- Normalized symbol ID from canonicalization service
file_path TEXT,
start_line INTEGER,
end_line INTEGER,
-- Metadata
source reachability.mapping_source NOT NULL DEFAULT 'unknown',
vulnerability_type reachability.vulnerability_type NOT NULL DEFAULT 'unknown',
confidence DECIMAL(3, 2) NOT NULL DEFAULT 0.5 CHECK (confidence >= 0 AND confidence <= 1),
-- Provenance
evidence_uri TEXT, -- stella:// URI to evidence
source_commit_url TEXT,
patch_url TEXT,
-- Timestamps
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
verified_at TIMESTAMPTZ,
verified_by TEXT,
-- Tenant support
tenant_id TEXT NOT NULL DEFAULT 'default'
);
-- Vulnerable symbol detail records (for additional symbol metadata)
CREATE TABLE IF NOT EXISTS reachability.vulnerable_symbols (
symbol_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
mapping_id UUID NOT NULL REFERENCES reachability.cve_symbol_mappings(mapping_id) ON DELETE CASCADE,
-- Symbol identification
symbol_name TEXT NOT NULL,
canonical_id TEXT,
symbol_type TEXT, -- 'function', 'method', 'class', 'module'
-- Location
file_path TEXT,
start_line INTEGER,
end_line INTEGER,
-- Code context
signature TEXT, -- Function signature
containing_class TEXT,
namespace TEXT,
-- Vulnerability context
vulnerability_type reachability.vulnerability_type NOT NULL DEFAULT 'unknown',
is_entry_point BOOLEAN DEFAULT FALSE,
requires_control_flow BOOLEAN DEFAULT FALSE,
-- Metadata
confidence DECIMAL(3, 2) NOT NULL DEFAULT 0.5,
notes TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Patch analysis results (cached)
CREATE TABLE IF NOT EXISTS reachability.patch_analysis (
analysis_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- Source identification
commit_url TEXT NOT NULL UNIQUE,
repository_url TEXT,
commit_sha TEXT,
-- Analysis results (stored as JSONB for flexibility)
diff_content TEXT,
extracted_symbols JSONB NOT NULL DEFAULT '[]',
language_detected TEXT,
-- Metadata
analyzed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
analyzer_version TEXT,
-- Error tracking
analysis_status TEXT NOT NULL DEFAULT 'pending',
error_message TEXT
);
-- ============================================================================
-- Indexes
-- ============================================================================
-- CVE lookup indexes
CREATE INDEX IF NOT EXISTS idx_cve_mapping_cve_normalized ON reachability.cve_symbol_mappings(cve_id_normalized);
CREATE INDEX IF NOT EXISTS idx_cve_mapping_purl ON reachability.cve_symbol_mappings(purl);
CREATE INDEX IF NOT EXISTS idx_cve_mapping_symbol ON reachability.cve_symbol_mappings(symbol_name);
CREATE INDEX IF NOT EXISTS idx_cve_mapping_canonical ON reachability.cve_symbol_mappings(canonical_id) WHERE canonical_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_cve_mapping_tenant ON reachability.cve_symbol_mappings(tenant_id);
CREATE INDEX IF NOT EXISTS idx_cve_mapping_source ON reachability.cve_symbol_mappings(source);
CREATE INDEX IF NOT EXISTS idx_cve_mapping_confidence ON reachability.cve_symbol_mappings(confidence);
CREATE INDEX IF NOT EXISTS idx_cve_mapping_created ON reachability.cve_symbol_mappings(created_at);
-- Composite index for common queries
CREATE INDEX IF NOT EXISTS idx_cve_mapping_cve_purl ON reachability.cve_symbol_mappings(cve_id_normalized, purl);
-- Symbol indexes
CREATE INDEX IF NOT EXISTS idx_vuln_symbol_mapping ON reachability.vulnerable_symbols(mapping_id);
CREATE INDEX IF NOT EXISTS idx_vuln_symbol_name ON reachability.vulnerable_symbols(symbol_name);
CREATE INDEX IF NOT EXISTS idx_vuln_symbol_canonical ON reachability.vulnerable_symbols(canonical_id) WHERE canonical_id IS NOT NULL;
-- Patch analysis indexes
CREATE INDEX IF NOT EXISTS idx_patch_analysis_commit ON reachability.patch_analysis(commit_sha);
CREATE INDEX IF NOT EXISTS idx_patch_analysis_repo ON reachability.patch_analysis(repository_url);
-- ============================================================================
-- Full-text search
-- ============================================================================
-- Add tsvector column for symbol search
ALTER TABLE reachability.cve_symbol_mappings
ADD COLUMN IF NOT EXISTS symbol_search_vector tsvector
GENERATED ALWAYS AS (to_tsvector('simple', coalesce(symbol_name, '') || ' ' || coalesce(file_path, ''))) STORED;
CREATE INDEX IF NOT EXISTS idx_cve_mapping_fts ON reachability.cve_symbol_mappings USING GIN(symbol_search_vector);
-- ============================================================================
-- Trigger for updated_at
-- ============================================================================
CREATE OR REPLACE FUNCTION reachability.update_modified_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER update_cve_mapping_modtime
BEFORE UPDATE ON reachability.cve_symbol_mappings
FOR EACH ROW
EXECUTE FUNCTION reachability.update_modified_column();
-- ============================================================================
-- Comments for documentation
-- ============================================================================
COMMENT ON SCHEMA reachability IS 'Hybrid reachability analysis: CVE-symbol mappings, static/runtime evidence';
COMMENT ON TABLE reachability.cve_symbol_mappings IS 'Maps CVE IDs to vulnerable symbols with confidence scores';
COMMENT ON COLUMN reachability.cve_symbol_mappings.cve_id_normalized IS 'Uppercase normalized CVE ID for case-insensitive lookup';
COMMENT ON COLUMN reachability.cve_symbol_mappings.canonical_id IS 'Symbol canonical ID from canonicalization service';
COMMENT ON COLUMN reachability.cve_symbol_mappings.evidence_uri IS 'stella:// URI pointing to evidence bundle';
COMMENT ON TABLE reachability.vulnerable_symbols IS 'Additional symbol details for a CVE mapping';
COMMENT ON TABLE reachability.patch_analysis IS 'Cached patch analysis results for commit URLs';
-- ============================================================================
-- Initial data / seed (optional well-known CVEs for testing)
-- ============================================================================
-- Example: Log4Shell (CVE-2021-44228)
INSERT INTO reachability.cve_symbol_mappings (cve_id, purl, symbol_name, file_path, source, confidence, vulnerability_type)
VALUES
('CVE-2021-44228', 'pkg:maven/org.apache.logging.log4j/log4j-core@2.14.1', 'JndiLookup.lookup', 'log4j-core/src/main/java/org/apache/logging/log4j/core/lookup/JndiLookup.java', 'manual_curation', 0.99, 'sink'),
('CVE-2021-44228', 'pkg:maven/org.apache.logging.log4j/log4j-core@2.14.1', 'JndiManager.lookup', 'log4j-core/src/main/java/org/apache/logging/log4j/core/net/JndiManager.java', 'manual_curation', 0.95, 'sink')
ON CONFLICT DO NOTHING;
-- Example: Spring4Shell (CVE-2022-22965)
INSERT INTO reachability.cve_symbol_mappings (cve_id, purl, symbol_name, file_path, source, confidence, vulnerability_type)
VALUES
('CVE-2022-22965', 'pkg:maven/org.springframework/spring-beans@5.3.17', 'CachedIntrospectionResults.getBeanInfo', 'spring-beans/src/main/java/org/springframework/beans/CachedIntrospectionResults.java', 'patch_analysis', 0.90, 'source')
ON CONFLICT DO NOTHING;
-- Example: polyfill.io supply chain (CVE-2024-38526)
INSERT INTO reachability.cve_symbol_mappings (cve_id, purl, symbol_name, source, confidence, vulnerability_type)
VALUES
('CVE-2024-38526', 'pkg:npm/polyfill.io', 'window.polyfill', 'manual_curation', 0.85, 'source')
ON CONFLICT DO NOTHING;