-- BSim PostgreSQL Schema Initialization -- Copyright (c) StellaOps. All rights reserved. -- Licensed under AGPL-3.0-or-later. -- -- This script creates the core BSim schema structure. -- Note: Full Ghidra BSim schema is auto-created by Ghidra tools. -- This provides a minimal functional schema for integration testing. -- Create schema comment COMMENT ON DATABASE bsim_corpus IS 'Ghidra BSim function signature database for StellaOps BinaryIndex'; -- Enable required extensions CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; CREATE EXTENSION IF NOT EXISTS "pg_trgm"; -- BSim executables table CREATE TABLE IF NOT EXISTS bsim_executables ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), name TEXT NOT NULL, architecture TEXT NOT NULL, library_name TEXT, library_version TEXT, md5_hash BYTEA, sha256_hash BYTEA, date_added TIMESTAMPTZ NOT NULL DEFAULT now(), UNIQUE (sha256_hash) ); -- BSim functions table CREATE TABLE IF NOT EXISTS bsim_functions ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), executable_id UUID NOT NULL REFERENCES bsim_executables(id) ON DELETE CASCADE, name TEXT NOT NULL, address BIGINT NOT NULL, flags INTEGER DEFAULT 0, UNIQUE (executable_id, address) ); -- BSim function vectors (feature vectors for similarity) CREATE TABLE IF NOT EXISTS bsim_vectors ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), function_id UUID NOT NULL REFERENCES bsim_functions(id) ON DELETE CASCADE, lsh_hash BYTEA NOT NULL, -- Locality-sensitive hash feature_count INTEGER NOT NULL, vector_data BYTEA NOT NULL, -- Serialized feature vector UNIQUE (function_id) ); -- BSim function signatures (compact fingerprints) CREATE TABLE IF NOT EXISTS bsim_signatures ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), function_id UUID NOT NULL REFERENCES bsim_functions(id) ON DELETE CASCADE, signature_type TEXT NOT NULL, -- 'basic', 'weighted', 'full' signature_hash BYTEA NOT NULL, significance REAL NOT NULL DEFAULT 0.0, created_at TIMESTAMPTZ NOT NULL DEFAULT now(), UNIQUE (function_id, signature_type) ); -- BSim clusters (similar function groups) CREATE TABLE IF NOT EXISTS bsim_clusters ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), name TEXT, function_count INTEGER NOT NULL DEFAULT 0, centroid_vector BYTEA, created_at TIMESTAMPTZ NOT NULL DEFAULT now() ); -- Cluster membership CREATE TABLE IF NOT EXISTS bsim_cluster_members ( cluster_id UUID NOT NULL REFERENCES bsim_clusters(id) ON DELETE CASCADE, function_id UUID NOT NULL REFERENCES bsim_functions(id) ON DELETE CASCADE, similarity REAL NOT NULL, PRIMARY KEY (cluster_id, function_id) ); -- Ingestion tracking CREATE TABLE IF NOT EXISTS bsim_ingest_log ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), executable_id UUID REFERENCES bsim_executables(id), library_name TEXT NOT NULL, library_version TEXT, functions_ingested INTEGER NOT NULL DEFAULT 0, status TEXT NOT NULL DEFAULT 'pending', error_message TEXT, started_at TIMESTAMPTZ, completed_at TIMESTAMPTZ, ingested_at TIMESTAMPTZ NOT NULL DEFAULT now() ); -- Indexes for efficient querying CREATE INDEX IF NOT EXISTS idx_bsim_functions_executable ON bsim_functions(executable_id); CREATE INDEX IF NOT EXISTS idx_bsim_functions_name ON bsim_functions(name); CREATE INDEX IF NOT EXISTS idx_bsim_vectors_lsh ON bsim_vectors USING hash (lsh_hash); CREATE INDEX IF NOT EXISTS idx_bsim_signatures_hash ON bsim_signatures USING hash (signature_hash); CREATE INDEX IF NOT EXISTS idx_bsim_executables_library ON bsim_executables(library_name, library_version); CREATE INDEX IF NOT EXISTS idx_bsim_ingest_log_status ON bsim_ingest_log(status); -- Views for common queries CREATE OR REPLACE VIEW bsim_function_summary AS SELECT f.id AS function_id, f.name AS function_name, f.address, e.name AS executable_name, e.library_name, e.library_version, e.architecture, s.significance FROM bsim_functions f JOIN bsim_executables e ON f.executable_id = e.id LEFT JOIN bsim_signatures s ON f.id = s.function_id AND s.signature_type = 'basic'; CREATE OR REPLACE VIEW bsim_library_stats AS SELECT e.library_name, e.library_version, COUNT(DISTINCT e.id) AS executable_count, COUNT(DISTINCT f.id) AS function_count, MAX(l.ingested_at) AS last_ingested FROM bsim_executables e LEFT JOIN bsim_functions f ON e.id = f.executable_id LEFT JOIN bsim_ingest_log l ON e.id = l.executable_id WHERE e.library_name IS NOT NULL GROUP BY e.library_name, e.library_version ORDER BY e.library_name, e.library_version; -- Grant permissions GRANT ALL ON ALL TABLES IN SCHEMA public TO bsim_user; GRANT ALL ON ALL SEQUENCES IN SCHEMA public TO bsim_user; -- Insert schema version marker INSERT INTO bsim_ingest_log (library_name, functions_ingested, status, completed_at) VALUES ('_schema_init', 0, 'completed', now()); -- Log successful initialization DO $$ BEGIN RAISE NOTICE 'BSim schema initialized successfully'; END $$;