Files
git.stella-ops.org/deploy/database/migrations/005_validation_harness.sql
2026-01-25 23:27:41 +02:00

121 lines
3.9 KiB
SQL

-- Validation harness schema for tracking validation runs and match results
-- Migration: 005_validation_harness.sql
-- Validation runs table
CREATE TABLE IF NOT EXISTS groundtruth.validation_runs (
run_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name TEXT NOT NULL,
description TEXT,
status TEXT NOT NULL DEFAULT 'pending',
-- Configuration (stored as JSONB)
config JSONB NOT NULL,
-- Timestamps
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
-- Metrics (populated after completion)
total_pairs INT,
total_functions INT,
true_positives INT,
false_positives INT,
true_negatives INT,
false_negatives INT,
match_rate DOUBLE PRECISION,
precision_score DOUBLE PRECISION,
recall_score DOUBLE PRECISION,
f1_score DOUBLE PRECISION,
average_match_score DOUBLE PRECISION,
-- Mismatch counts by bucket (JSONB map)
mismatch_counts JSONB,
-- Metadata
corpus_snapshot_id TEXT,
matcher_version TEXT,
error_message TEXT,
tags TEXT[] DEFAULT '{}',
-- Constraints
CONSTRAINT valid_status CHECK (status IN ('pending', 'running', 'completed', 'failed', 'cancelled'))
);
-- Indexes for validation runs
CREATE INDEX IF NOT EXISTS idx_validation_runs_status ON groundtruth.validation_runs(status);
CREATE INDEX IF NOT EXISTS idx_validation_runs_created_at ON groundtruth.validation_runs(created_at DESC);
CREATE INDEX IF NOT EXISTS idx_validation_runs_tags ON groundtruth.validation_runs USING GIN (tags);
-- Match results table
CREATE TABLE IF NOT EXISTS groundtruth.match_results (
result_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
run_id UUID NOT NULL REFERENCES groundtruth.validation_runs(run_id) ON DELETE CASCADE,
security_pair_id UUID NOT NULL,
-- Source function
source_name TEXT NOT NULL,
source_demangled_name TEXT,
source_address BIGINT NOT NULL,
source_size BIGINT,
source_build_id TEXT NOT NULL,
source_binary_name TEXT NOT NULL,
-- Expected target
expected_name TEXT NOT NULL,
expected_demangled_name TEXT,
expected_address BIGINT NOT NULL,
expected_size BIGINT,
expected_build_id TEXT NOT NULL,
expected_binary_name TEXT NOT NULL,
-- Actual matched target (nullable if no match found)
actual_name TEXT,
actual_demangled_name TEXT,
actual_address BIGINT,
actual_size BIGINT,
actual_build_id TEXT,
actual_binary_name TEXT,
-- Outcome
outcome TEXT NOT NULL,
match_score DOUBLE PRECISION,
confidence TEXT,
-- Mismatch analysis
inferred_cause TEXT,
mismatch_detail JSONB,
-- Performance
match_duration_ms DOUBLE PRECISION,
-- Constraints
CONSTRAINT valid_outcome CHECK (outcome IN ('true_positive', 'false_positive', 'true_negative', 'false_negative'))
);
-- Indexes for match results
CREATE INDEX IF NOT EXISTS idx_match_results_run_id ON groundtruth.match_results(run_id);
CREATE INDEX IF NOT EXISTS idx_match_results_security_pair_id ON groundtruth.match_results(security_pair_id);
CREATE INDEX IF NOT EXISTS idx_match_results_outcome ON groundtruth.match_results(outcome);
CREATE INDEX IF NOT EXISTS idx_match_results_inferred_cause ON groundtruth.match_results(inferred_cause) WHERE inferred_cause IS NOT NULL;
-- View for run summaries
CREATE OR REPLACE VIEW groundtruth.validation_run_summaries AS
SELECT
run_id AS id,
name,
status,
created_at,
completed_at,
match_rate,
f1_score,
total_pairs AS pair_count,
total_functions AS function_count,
tags
FROM groundtruth.validation_runs;
-- Comments
COMMENT ON TABLE groundtruth.validation_runs IS 'Validation harness runs with aggregate metrics';
COMMENT ON TABLE groundtruth.match_results IS 'Per-function match results from validation runs';
COMMENT ON VIEW groundtruth.validation_run_summaries IS 'Summary view for listing validation runs';