121 lines
3.9 KiB
SQL
121 lines
3.9 KiB
SQL
-- Validation harness schema for tracking validation runs and match results
|
|
-- Migration: 005_validation_harness.sql
|
|
|
|
-- Validation runs table
|
|
CREATE TABLE IF NOT EXISTS groundtruth.validation_runs (
|
|
run_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
name TEXT NOT NULL,
|
|
description TEXT,
|
|
status TEXT NOT NULL DEFAULT 'pending',
|
|
|
|
-- Configuration (stored as JSONB)
|
|
config JSONB NOT NULL,
|
|
|
|
-- Timestamps
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
started_at TIMESTAMPTZ,
|
|
completed_at TIMESTAMPTZ,
|
|
|
|
-- Metrics (populated after completion)
|
|
total_pairs INT,
|
|
total_functions INT,
|
|
true_positives INT,
|
|
false_positives INT,
|
|
true_negatives INT,
|
|
false_negatives INT,
|
|
match_rate DOUBLE PRECISION,
|
|
precision_score DOUBLE PRECISION,
|
|
recall_score DOUBLE PRECISION,
|
|
f1_score DOUBLE PRECISION,
|
|
average_match_score DOUBLE PRECISION,
|
|
|
|
-- Mismatch counts by bucket (JSONB map)
|
|
mismatch_counts JSONB,
|
|
|
|
-- Metadata
|
|
corpus_snapshot_id TEXT,
|
|
matcher_version TEXT,
|
|
error_message TEXT,
|
|
tags TEXT[] DEFAULT '{}',
|
|
|
|
-- Constraints
|
|
CONSTRAINT valid_status CHECK (status IN ('pending', 'running', 'completed', 'failed', 'cancelled'))
|
|
);
|
|
|
|
-- Indexes for validation runs
|
|
CREATE INDEX IF NOT EXISTS idx_validation_runs_status ON groundtruth.validation_runs(status);
|
|
CREATE INDEX IF NOT EXISTS idx_validation_runs_created_at ON groundtruth.validation_runs(created_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_validation_runs_tags ON groundtruth.validation_runs USING GIN (tags);
|
|
|
|
-- Match results table
|
|
CREATE TABLE IF NOT EXISTS groundtruth.match_results (
|
|
result_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
run_id UUID NOT NULL REFERENCES groundtruth.validation_runs(run_id) ON DELETE CASCADE,
|
|
security_pair_id UUID NOT NULL,
|
|
|
|
-- Source function
|
|
source_name TEXT NOT NULL,
|
|
source_demangled_name TEXT,
|
|
source_address BIGINT NOT NULL,
|
|
source_size BIGINT,
|
|
source_build_id TEXT NOT NULL,
|
|
source_binary_name TEXT NOT NULL,
|
|
|
|
-- Expected target
|
|
expected_name TEXT NOT NULL,
|
|
expected_demangled_name TEXT,
|
|
expected_address BIGINT NOT NULL,
|
|
expected_size BIGINT,
|
|
expected_build_id TEXT NOT NULL,
|
|
expected_binary_name TEXT NOT NULL,
|
|
|
|
-- Actual matched target (nullable if no match found)
|
|
actual_name TEXT,
|
|
actual_demangled_name TEXT,
|
|
actual_address BIGINT,
|
|
actual_size BIGINT,
|
|
actual_build_id TEXT,
|
|
actual_binary_name TEXT,
|
|
|
|
-- Outcome
|
|
outcome TEXT NOT NULL,
|
|
match_score DOUBLE PRECISION,
|
|
confidence TEXT,
|
|
|
|
-- Mismatch analysis
|
|
inferred_cause TEXT,
|
|
mismatch_detail JSONB,
|
|
|
|
-- Performance
|
|
match_duration_ms DOUBLE PRECISION,
|
|
|
|
-- Constraints
|
|
CONSTRAINT valid_outcome CHECK (outcome IN ('true_positive', 'false_positive', 'true_negative', 'false_negative'))
|
|
);
|
|
|
|
-- Indexes for match results
|
|
CREATE INDEX IF NOT EXISTS idx_match_results_run_id ON groundtruth.match_results(run_id);
|
|
CREATE INDEX IF NOT EXISTS idx_match_results_security_pair_id ON groundtruth.match_results(security_pair_id);
|
|
CREATE INDEX IF NOT EXISTS idx_match_results_outcome ON groundtruth.match_results(outcome);
|
|
CREATE INDEX IF NOT EXISTS idx_match_results_inferred_cause ON groundtruth.match_results(inferred_cause) WHERE inferred_cause IS NOT NULL;
|
|
|
|
-- View for run summaries
|
|
CREATE OR REPLACE VIEW groundtruth.validation_run_summaries AS
|
|
SELECT
|
|
run_id AS id,
|
|
name,
|
|
status,
|
|
created_at,
|
|
completed_at,
|
|
match_rate,
|
|
f1_score,
|
|
total_pairs AS pair_count,
|
|
total_functions AS function_count,
|
|
tags
|
|
FROM groundtruth.validation_runs;
|
|
|
|
-- Comments
|
|
COMMENT ON TABLE groundtruth.validation_runs IS 'Validation harness runs with aggregate metrics';
|
|
COMMENT ON TABLE groundtruth.match_results IS 'Per-function match results from validation runs';
|
|
COMMENT ON VIEW groundtruth.validation_run_summaries IS 'Summary view for listing validation runs';
|