save progress
This commit is contained in:
140
devops/docker/ghidra/scripts/init-bsim.sql
Normal file
140
devops/docker/ghidra/scripts/init-bsim.sql
Normal file
@@ -0,0 +1,140 @@
|
||||
-- BSim PostgreSQL Schema Initialization
|
||||
-- Copyright (c) StellaOps. All rights reserved.
|
||||
-- Licensed under AGPL-3.0-or-later.
|
||||
--
|
||||
-- This script creates the core BSim schema structure.
|
||||
-- Note: Full Ghidra BSim schema is auto-created by Ghidra tools.
|
||||
-- This provides a minimal functional schema for integration testing.
|
||||
|
||||
-- Create schema comment
|
||||
COMMENT ON DATABASE bsim_corpus IS 'Ghidra BSim function signature database for StellaOps BinaryIndex';
|
||||
|
||||
-- Enable required extensions
|
||||
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
||||
CREATE EXTENSION IF NOT EXISTS "pg_trgm";
|
||||
|
||||
-- BSim executables table
|
||||
CREATE TABLE IF NOT EXISTS bsim_executables (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
name TEXT NOT NULL,
|
||||
architecture TEXT NOT NULL,
|
||||
library_name TEXT,
|
||||
library_version TEXT,
|
||||
md5_hash BYTEA,
|
||||
sha256_hash BYTEA,
|
||||
date_added TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
UNIQUE (sha256_hash)
|
||||
);
|
||||
|
||||
-- BSim functions table
|
||||
CREATE TABLE IF NOT EXISTS bsim_functions (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
executable_id UUID NOT NULL REFERENCES bsim_executables(id) ON DELETE CASCADE,
|
||||
name TEXT NOT NULL,
|
||||
address BIGINT NOT NULL,
|
||||
flags INTEGER DEFAULT 0,
|
||||
UNIQUE (executable_id, address)
|
||||
);
|
||||
|
||||
-- BSim function vectors (feature vectors for similarity)
|
||||
CREATE TABLE IF NOT EXISTS bsim_vectors (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
function_id UUID NOT NULL REFERENCES bsim_functions(id) ON DELETE CASCADE,
|
||||
lsh_hash BYTEA NOT NULL, -- Locality-sensitive hash
|
||||
feature_count INTEGER NOT NULL,
|
||||
vector_data BYTEA NOT NULL, -- Serialized feature vector
|
||||
UNIQUE (function_id)
|
||||
);
|
||||
|
||||
-- BSim function signatures (compact fingerprints)
|
||||
CREATE TABLE IF NOT EXISTS bsim_signatures (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
function_id UUID NOT NULL REFERENCES bsim_functions(id) ON DELETE CASCADE,
|
||||
signature_type TEXT NOT NULL, -- 'basic', 'weighted', 'full'
|
||||
signature_hash BYTEA NOT NULL,
|
||||
significance REAL NOT NULL DEFAULT 0.0,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
UNIQUE (function_id, signature_type)
|
||||
);
|
||||
|
||||
-- BSim clusters (similar function groups)
|
||||
CREATE TABLE IF NOT EXISTS bsim_clusters (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
name TEXT,
|
||||
function_count INTEGER NOT NULL DEFAULT 0,
|
||||
centroid_vector BYTEA,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
-- Cluster membership
|
||||
CREATE TABLE IF NOT EXISTS bsim_cluster_members (
|
||||
cluster_id UUID NOT NULL REFERENCES bsim_clusters(id) ON DELETE CASCADE,
|
||||
function_id UUID NOT NULL REFERENCES bsim_functions(id) ON DELETE CASCADE,
|
||||
similarity REAL NOT NULL,
|
||||
PRIMARY KEY (cluster_id, function_id)
|
||||
);
|
||||
|
||||
-- Ingestion tracking
|
||||
CREATE TABLE IF NOT EXISTS bsim_ingest_log (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
executable_id UUID REFERENCES bsim_executables(id),
|
||||
library_name TEXT NOT NULL,
|
||||
library_version TEXT,
|
||||
functions_ingested INTEGER NOT NULL DEFAULT 0,
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
error_message TEXT,
|
||||
started_at TIMESTAMPTZ,
|
||||
completed_at TIMESTAMPTZ,
|
||||
ingested_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
-- Indexes for efficient querying
|
||||
CREATE INDEX IF NOT EXISTS idx_bsim_functions_executable ON bsim_functions(executable_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_bsim_functions_name ON bsim_functions(name);
|
||||
CREATE INDEX IF NOT EXISTS idx_bsim_vectors_lsh ON bsim_vectors USING hash (lsh_hash);
|
||||
CREATE INDEX IF NOT EXISTS idx_bsim_signatures_hash ON bsim_signatures USING hash (signature_hash);
|
||||
CREATE INDEX IF NOT EXISTS idx_bsim_executables_library ON bsim_executables(library_name, library_version);
|
||||
CREATE INDEX IF NOT EXISTS idx_bsim_ingest_log_status ON bsim_ingest_log(status);
|
||||
|
||||
-- Views for common queries
|
||||
CREATE OR REPLACE VIEW bsim_function_summary AS
|
||||
SELECT
|
||||
f.id AS function_id,
|
||||
f.name AS function_name,
|
||||
f.address,
|
||||
e.name AS executable_name,
|
||||
e.library_name,
|
||||
e.library_version,
|
||||
e.architecture,
|
||||
s.significance
|
||||
FROM bsim_functions f
|
||||
JOIN bsim_executables e ON f.executable_id = e.id
|
||||
LEFT JOIN bsim_signatures s ON f.id = s.function_id AND s.signature_type = 'basic';
|
||||
|
||||
CREATE OR REPLACE VIEW bsim_library_stats AS
|
||||
SELECT
|
||||
e.library_name,
|
||||
e.library_version,
|
||||
COUNT(DISTINCT e.id) AS executable_count,
|
||||
COUNT(DISTINCT f.id) AS function_count,
|
||||
MAX(l.ingested_at) AS last_ingested
|
||||
FROM bsim_executables e
|
||||
LEFT JOIN bsim_functions f ON e.id = f.executable_id
|
||||
LEFT JOIN bsim_ingest_log l ON e.id = l.executable_id
|
||||
WHERE e.library_name IS NOT NULL
|
||||
GROUP BY e.library_name, e.library_version
|
||||
ORDER BY e.library_name, e.library_version;
|
||||
|
||||
-- Grant permissions
|
||||
GRANT ALL ON ALL TABLES IN SCHEMA public TO bsim_user;
|
||||
GRANT ALL ON ALL SEQUENCES IN SCHEMA public TO bsim_user;
|
||||
|
||||
-- Insert schema version marker
|
||||
INSERT INTO bsim_ingest_log (library_name, functions_ingested, status, completed_at)
|
||||
VALUES ('_schema_init', 0, 'completed', now());
|
||||
|
||||
-- Log successful initialization
|
||||
DO $$
|
||||
BEGIN
|
||||
RAISE NOTICE 'BSim schema initialized successfully';
|
||||
END $$;
|
||||
Reference in New Issue
Block a user