-- Validation harness schema for tracking validation runs and match results -- Migration: 005_validation_harness.sql -- Validation runs table CREATE TABLE IF NOT EXISTS groundtruth.validation_runs ( run_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), name TEXT NOT NULL, description TEXT, status TEXT NOT NULL DEFAULT 'pending', -- Configuration (stored as JSONB) config JSONB NOT NULL, -- Timestamps created_at TIMESTAMPTZ NOT NULL DEFAULT now(), started_at TIMESTAMPTZ, completed_at TIMESTAMPTZ, -- Metrics (populated after completion) total_pairs INT, total_functions INT, true_positives INT, false_positives INT, true_negatives INT, false_negatives INT, match_rate DOUBLE PRECISION, precision_score DOUBLE PRECISION, recall_score DOUBLE PRECISION, f1_score DOUBLE PRECISION, average_match_score DOUBLE PRECISION, -- Mismatch counts by bucket (JSONB map) mismatch_counts JSONB, -- Metadata corpus_snapshot_id TEXT, matcher_version TEXT, error_message TEXT, tags TEXT[] DEFAULT '{}', -- Constraints CONSTRAINT valid_status CHECK (status IN ('pending', 'running', 'completed', 'failed', 'cancelled')) ); -- Indexes for validation runs CREATE INDEX IF NOT EXISTS idx_validation_runs_status ON groundtruth.validation_runs(status); CREATE INDEX IF NOT EXISTS idx_validation_runs_created_at ON groundtruth.validation_runs(created_at DESC); CREATE INDEX IF NOT EXISTS idx_validation_runs_tags ON groundtruth.validation_runs USING GIN (tags); -- Match results table CREATE TABLE IF NOT EXISTS groundtruth.match_results ( result_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), run_id UUID NOT NULL REFERENCES groundtruth.validation_runs(run_id) ON DELETE CASCADE, security_pair_id UUID NOT NULL, -- Source function source_name TEXT NOT NULL, source_demangled_name TEXT, source_address BIGINT NOT NULL, source_size BIGINT, source_build_id TEXT NOT NULL, source_binary_name TEXT NOT NULL, -- Expected target expected_name TEXT NOT NULL, expected_demangled_name TEXT, expected_address BIGINT NOT NULL, expected_size BIGINT, expected_build_id TEXT NOT NULL, expected_binary_name TEXT NOT NULL, -- Actual matched target (nullable if no match found) actual_name TEXT, actual_demangled_name TEXT, actual_address BIGINT, actual_size BIGINT, actual_build_id TEXT, actual_binary_name TEXT, -- Outcome outcome TEXT NOT NULL, match_score DOUBLE PRECISION, confidence TEXT, -- Mismatch analysis inferred_cause TEXT, mismatch_detail JSONB, -- Performance match_duration_ms DOUBLE PRECISION, -- Constraints CONSTRAINT valid_outcome CHECK (outcome IN ('true_positive', 'false_positive', 'true_negative', 'false_negative')) ); -- Indexes for match results CREATE INDEX IF NOT EXISTS idx_match_results_run_id ON groundtruth.match_results(run_id); CREATE INDEX IF NOT EXISTS idx_match_results_security_pair_id ON groundtruth.match_results(security_pair_id); CREATE INDEX IF NOT EXISTS idx_match_results_outcome ON groundtruth.match_results(outcome); CREATE INDEX IF NOT EXISTS idx_match_results_inferred_cause ON groundtruth.match_results(inferred_cause) WHERE inferred_cause IS NOT NULL; -- View for run summaries CREATE OR REPLACE VIEW groundtruth.validation_run_summaries AS SELECT run_id AS id, name, status, created_at, completed_at, match_rate, f1_score, total_pairs AS pair_count, total_functions AS function_count, tags FROM groundtruth.validation_runs; -- Comments COMMENT ON TABLE groundtruth.validation_runs IS 'Validation harness runs with aggregate metrics'; COMMENT ON TABLE groundtruth.match_results IS 'Per-function match results from validation runs'; COMMENT ON VIEW groundtruth.validation_run_summaries IS 'Summary view for listing validation runs';