git.stella-ops.org/deploy/database/migrations/005_validation_harness.sql

-- Validation harness schema for tracking validation runs and match results
-- Migration: 005_validation_harness.sql

-- Validation runs table
CREATE TABLE IF NOT EXISTS groundtruth.validation_runs (
    run_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    name TEXT NOT NULL,
    description TEXT,
    status TEXT NOT NULL DEFAULT 'pending',

    -- Configuration (stored as JSONB)
    config JSONB NOT NULL,

    -- Timestamps
    created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
    started_at TIMESTAMPTZ,
    completed_at TIMESTAMPTZ,

    -- Metrics (populated after completion)
    total_pairs INT,
    total_functions INT,
    true_positives INT,
    false_positives INT,
    true_negatives INT,
    false_negatives INT,
    match_rate DOUBLE PRECISION,
    precision_score DOUBLE PRECISION,
    recall_score DOUBLE PRECISION,
    f1_score DOUBLE PRECISION,
    average_match_score DOUBLE PRECISION,

    -- Mismatch counts by bucket (JSONB map)
    mismatch_counts JSONB,

    -- Metadata
    corpus_snapshot_id TEXT,
    matcher_version TEXT,
    error_message TEXT,
    tags TEXT[] DEFAULT '{}',

    -- Constraints
    CONSTRAINT valid_status CHECK (status IN ('pending', 'running', 'completed', 'failed', 'cancelled'))
);

-- Indexes for validation runs
CREATE INDEX IF NOT EXISTS idx_validation_runs_status ON groundtruth.validation_runs(status);
CREATE INDEX IF NOT EXISTS idx_validation_runs_created_at ON groundtruth.validation_runs(created_at DESC);
CREATE INDEX IF NOT EXISTS idx_validation_runs_tags ON groundtruth.validation_runs USING GIN (tags);

-- Match results table
CREATE TABLE IF NOT EXISTS groundtruth.match_results (
    result_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    run_id UUID NOT NULL REFERENCES groundtruth.validation_runs(run_id) ON DELETE CASCADE,
    security_pair_id UUID NOT NULL,

    -- Source function
    source_name TEXT NOT NULL,
    source_demangled_name TEXT,
    source_address BIGINT NOT NULL,
    source_size BIGINT,
    source_build_id TEXT NOT NULL,
    source_binary_name TEXT NOT NULL,

    -- Expected target
    expected_name TEXT NOT NULL,
    expected_demangled_name TEXT,
    expected_address BIGINT NOT NULL,
    expected_size BIGINT,
    expected_build_id TEXT NOT NULL,
    expected_binary_name TEXT NOT NULL,

    -- Actual matched target (nullable if no match found)
    actual_name TEXT,
    actual_demangled_name TEXT,
    actual_address BIGINT,
    actual_size BIGINT,
    actual_build_id TEXT,
    actual_binary_name TEXT,

    -- Outcome
    outcome TEXT NOT NULL,
    match_score DOUBLE PRECISION,
    confidence TEXT,

    -- Mismatch analysis
    inferred_cause TEXT,
    mismatch_detail JSONB,

    -- Performance
    match_duration_ms DOUBLE PRECISION,

    -- Constraints
    CONSTRAINT valid_outcome CHECK (outcome IN ('true_positive', 'false_positive', 'true_negative', 'false_negative'))
);

-- Indexes for match results
CREATE INDEX IF NOT EXISTS idx_match_results_run_id ON groundtruth.match_results(run_id);
CREATE INDEX IF NOT EXISTS idx_match_results_security_pair_id ON groundtruth.match_results(security_pair_id);
CREATE INDEX IF NOT EXISTS idx_match_results_outcome ON groundtruth.match_results(outcome);
CREATE INDEX IF NOT EXISTS idx_match_results_inferred_cause ON groundtruth.match_results(inferred_cause) WHERE inferred_cause IS NOT NULL;

-- View for run summaries
CREATE OR REPLACE VIEW groundtruth.validation_run_summaries AS
SELECT
    run_id AS id,
    name,
    status,
    created_at,
    completed_at,
    match_rate,
    f1_score,
    total_pairs AS pair_count,
    total_functions AS function_count,
    tags
FROM groundtruth.validation_runs;

-- Comments
COMMENT ON TABLE groundtruth.validation_runs IS 'Validation harness runs with aggregate metrics';
COMMENT ON TABLE groundtruth.match_results IS 'Per-function match results from validation runs';
COMMENT ON VIEW groundtruth.validation_run_summaries IS 'Summary view for listing validation runs';