save progress
This commit is contained in:
84
devops/docker/ghidra/Dockerfile.headless
Normal file
84
devops/docker/ghidra/Dockerfile.headless
Normal file
@@ -0,0 +1,84 @@
|
||||
# Copyright (c) StellaOps. All rights reserved.
|
||||
# Licensed under AGPL-3.0-or-later.
|
||||
|
||||
# Ghidra Headless Analysis Server for BinaryIndex
|
||||
#
|
||||
# This image provides Ghidra headless analysis capabilities including:
|
||||
# - Ghidra Headless Analyzer (analyzeHeadless)
|
||||
# - ghidriff for automated binary diffing
|
||||
# - Version Tracking and BSim support
|
||||
#
|
||||
# Build:
|
||||
# docker build -f Dockerfile.headless -t stellaops/ghidra-headless:11.2 .
|
||||
#
|
||||
# Run:
|
||||
# docker run --rm -v /path/to/binaries:/binaries stellaops/ghidra-headless:11.2 \
|
||||
# /projects GhidraProject -import /binaries/target.exe -analyze
|
||||
|
||||
FROM eclipse-temurin:17-jdk-jammy
|
||||
|
||||
ARG GHIDRA_VERSION=11.2
|
||||
ARG GHIDRA_BUILD_DATE=20241105
|
||||
ARG GHIDRA_SHA256
|
||||
|
||||
LABEL org.opencontainers.image.title="StellaOps Ghidra Headless"
|
||||
LABEL org.opencontainers.image.description="Ghidra headless analysis server with ghidriff for BinaryIndex"
|
||||
LABEL org.opencontainers.image.version="${GHIDRA_VERSION}"
|
||||
LABEL org.opencontainers.image.licenses="AGPL-3.0-or-later"
|
||||
LABEL org.opencontainers.image.source="https://github.com/stellaops/stellaops"
|
||||
LABEL org.opencontainers.image.vendor="StellaOps"
|
||||
|
||||
# Install dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-venv \
|
||||
curl \
|
||||
unzip \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Download and verify Ghidra
|
||||
# Note: Set GHIDRA_SHA256 build arg for production builds
|
||||
RUN curl -fsSL "https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_${GHIDRA_VERSION}_build/ghidra_${GHIDRA_VERSION}_PUBLIC_${GHIDRA_BUILD_DATE}.zip" \
|
||||
-o /tmp/ghidra.zip \
|
||||
&& if [ -n "${GHIDRA_SHA256}" ]; then \
|
||||
echo "${GHIDRA_SHA256} /tmp/ghidra.zip" | sha256sum -c -; \
|
||||
fi \
|
||||
&& unzip -q /tmp/ghidra.zip -d /opt \
|
||||
&& rm /tmp/ghidra.zip \
|
||||
&& ln -s /opt/ghidra_${GHIDRA_VERSION}_PUBLIC /opt/ghidra \
|
||||
&& chmod +x /opt/ghidra/support/analyzeHeadless
|
||||
|
||||
# Install ghidriff in isolated virtual environment
|
||||
RUN python3 -m venv /opt/venv \
|
||||
&& /opt/venv/bin/pip install --no-cache-dir --upgrade pip \
|
||||
&& /opt/venv/bin/pip install --no-cache-dir ghidriff
|
||||
|
||||
# Set environment variables
|
||||
ENV GHIDRA_HOME=/opt/ghidra
|
||||
ENV GHIDRA_INSTALL_DIR=/opt/ghidra
|
||||
ENV JAVA_HOME=/opt/java/openjdk
|
||||
ENV PATH="${GHIDRA_HOME}/support:/opt/venv/bin:${PATH}"
|
||||
ENV MAXMEM=4G
|
||||
|
||||
# Create working directories with proper permissions
|
||||
RUN mkdir -p /projects /scripts /output \
|
||||
&& chmod 755 /projects /scripts /output
|
||||
|
||||
# Create non-root user for security
|
||||
RUN groupadd -r ghidra && useradd -r -g ghidra ghidra \
|
||||
&& chown -R ghidra:ghidra /projects /scripts /output
|
||||
|
||||
WORKDIR /projects
|
||||
|
||||
# Healthcheck - verify Ghidra is functional
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD analyzeHeadless /tmp HealthCheck -help > /dev/null 2>&1 || exit 1
|
||||
|
||||
# Switch to non-root user
|
||||
USER ghidra
|
||||
|
||||
# Default entrypoint is analyzeHeadless
|
||||
ENTRYPOINT ["analyzeHeadless"]
|
||||
CMD ["--help"]
|
||||
77
devops/docker/ghidra/docker-compose.bsim.yml
Normal file
77
devops/docker/ghidra/docker-compose.bsim.yml
Normal file
@@ -0,0 +1,77 @@
|
||||
# Copyright (c) StellaOps. All rights reserved.
|
||||
# Licensed under AGPL-3.0-or-later.
|
||||
|
||||
# BSim PostgreSQL Database and Ghidra Headless Services
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f docker-compose.bsim.yml up -d
|
||||
#
|
||||
# Environment variables:
|
||||
# BSIM_DB_PASSWORD - PostgreSQL password for BSim database
|
||||
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
bsim-postgres:
|
||||
image: postgres:16-alpine
|
||||
container_name: stellaops-bsim-db
|
||||
environment:
|
||||
POSTGRES_DB: bsim_corpus
|
||||
POSTGRES_USER: bsim_user
|
||||
POSTGRES_PASSWORD: ${BSIM_DB_PASSWORD:-stellaops_bsim_dev}
|
||||
POSTGRES_INITDB_ARGS: "-E UTF8 --locale=C"
|
||||
volumes:
|
||||
- bsim-data:/var/lib/postgresql/data
|
||||
- ./scripts/init-bsim.sql:/docker-entrypoint-initdb.d/10-init-bsim.sql:ro
|
||||
ports:
|
||||
- "5433:5432"
|
||||
networks:
|
||||
- stellaops-bsim
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U bsim_user -d bsim_corpus"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
restart: unless-stopped
|
||||
|
||||
# Ghidra Headless service for BSim analysis
|
||||
ghidra-headless:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.headless
|
||||
image: stellaops/ghidra-headless:11.2
|
||||
container_name: stellaops-ghidra
|
||||
depends_on:
|
||||
bsim-postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
BSIM_DB_URL: "postgresql://bsim-postgres:5432/bsim_corpus"
|
||||
BSIM_DB_USER: bsim_user
|
||||
BSIM_DB_PASSWORD: ${BSIM_DB_PASSWORD:-stellaops_bsim_dev}
|
||||
JAVA_HOME: /opt/java/openjdk
|
||||
MAXMEM: 4G
|
||||
volumes:
|
||||
- ghidra-projects:/projects
|
||||
- ghidra-scripts:/scripts
|
||||
- ghidra-output:/output
|
||||
networks:
|
||||
- stellaops-bsim
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '4'
|
||||
memory: 8G
|
||||
# Keep container running for ad-hoc analysis
|
||||
entrypoint: ["tail", "-f", "/dev/null"]
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
bsim-data:
|
||||
driver: local
|
||||
ghidra-projects:
|
||||
ghidra-scripts:
|
||||
ghidra-output:
|
||||
|
||||
networks:
|
||||
stellaops-bsim:
|
||||
driver: bridge
|
||||
140
devops/docker/ghidra/scripts/init-bsim.sql
Normal file
140
devops/docker/ghidra/scripts/init-bsim.sql
Normal file
@@ -0,0 +1,140 @@
|
||||
-- BSim PostgreSQL Schema Initialization
|
||||
-- Copyright (c) StellaOps. All rights reserved.
|
||||
-- Licensed under AGPL-3.0-or-later.
|
||||
--
|
||||
-- This script creates the core BSim schema structure.
|
||||
-- Note: Full Ghidra BSim schema is auto-created by Ghidra tools.
|
||||
-- This provides a minimal functional schema for integration testing.
|
||||
|
||||
-- Create schema comment
|
||||
COMMENT ON DATABASE bsim_corpus IS 'Ghidra BSim function signature database for StellaOps BinaryIndex';
|
||||
|
||||
-- Enable required extensions
|
||||
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
||||
CREATE EXTENSION IF NOT EXISTS "pg_trgm";
|
||||
|
||||
-- BSim executables table
|
||||
CREATE TABLE IF NOT EXISTS bsim_executables (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
name TEXT NOT NULL,
|
||||
architecture TEXT NOT NULL,
|
||||
library_name TEXT,
|
||||
library_version TEXT,
|
||||
md5_hash BYTEA,
|
||||
sha256_hash BYTEA,
|
||||
date_added TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
UNIQUE (sha256_hash)
|
||||
);
|
||||
|
||||
-- BSim functions table
|
||||
CREATE TABLE IF NOT EXISTS bsim_functions (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
executable_id UUID NOT NULL REFERENCES bsim_executables(id) ON DELETE CASCADE,
|
||||
name TEXT NOT NULL,
|
||||
address BIGINT NOT NULL,
|
||||
flags INTEGER DEFAULT 0,
|
||||
UNIQUE (executable_id, address)
|
||||
);
|
||||
|
||||
-- BSim function vectors (feature vectors for similarity)
|
||||
CREATE TABLE IF NOT EXISTS bsim_vectors (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
function_id UUID NOT NULL REFERENCES bsim_functions(id) ON DELETE CASCADE,
|
||||
lsh_hash BYTEA NOT NULL, -- Locality-sensitive hash
|
||||
feature_count INTEGER NOT NULL,
|
||||
vector_data BYTEA NOT NULL, -- Serialized feature vector
|
||||
UNIQUE (function_id)
|
||||
);
|
||||
|
||||
-- BSim function signatures (compact fingerprints)
|
||||
CREATE TABLE IF NOT EXISTS bsim_signatures (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
function_id UUID NOT NULL REFERENCES bsim_functions(id) ON DELETE CASCADE,
|
||||
signature_type TEXT NOT NULL, -- 'basic', 'weighted', 'full'
|
||||
signature_hash BYTEA NOT NULL,
|
||||
significance REAL NOT NULL DEFAULT 0.0,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
UNIQUE (function_id, signature_type)
|
||||
);
|
||||
|
||||
-- BSim clusters (similar function groups)
|
||||
CREATE TABLE IF NOT EXISTS bsim_clusters (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
name TEXT,
|
||||
function_count INTEGER NOT NULL DEFAULT 0,
|
||||
centroid_vector BYTEA,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
-- Cluster membership
|
||||
CREATE TABLE IF NOT EXISTS bsim_cluster_members (
|
||||
cluster_id UUID NOT NULL REFERENCES bsim_clusters(id) ON DELETE CASCADE,
|
||||
function_id UUID NOT NULL REFERENCES bsim_functions(id) ON DELETE CASCADE,
|
||||
similarity REAL NOT NULL,
|
||||
PRIMARY KEY (cluster_id, function_id)
|
||||
);
|
||||
|
||||
-- Ingestion tracking
|
||||
CREATE TABLE IF NOT EXISTS bsim_ingest_log (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
executable_id UUID REFERENCES bsim_executables(id),
|
||||
library_name TEXT NOT NULL,
|
||||
library_version TEXT,
|
||||
functions_ingested INTEGER NOT NULL DEFAULT 0,
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
error_message TEXT,
|
||||
started_at TIMESTAMPTZ,
|
||||
completed_at TIMESTAMPTZ,
|
||||
ingested_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
-- Indexes for efficient querying
|
||||
CREATE INDEX IF NOT EXISTS idx_bsim_functions_executable ON bsim_functions(executable_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_bsim_functions_name ON bsim_functions(name);
|
||||
CREATE INDEX IF NOT EXISTS idx_bsim_vectors_lsh ON bsim_vectors USING hash (lsh_hash);
|
||||
CREATE INDEX IF NOT EXISTS idx_bsim_signatures_hash ON bsim_signatures USING hash (signature_hash);
|
||||
CREATE INDEX IF NOT EXISTS idx_bsim_executables_library ON bsim_executables(library_name, library_version);
|
||||
CREATE INDEX IF NOT EXISTS idx_bsim_ingest_log_status ON bsim_ingest_log(status);
|
||||
|
||||
-- Views for common queries
|
||||
CREATE OR REPLACE VIEW bsim_function_summary AS
|
||||
SELECT
|
||||
f.id AS function_id,
|
||||
f.name AS function_name,
|
||||
f.address,
|
||||
e.name AS executable_name,
|
||||
e.library_name,
|
||||
e.library_version,
|
||||
e.architecture,
|
||||
s.significance
|
||||
FROM bsim_functions f
|
||||
JOIN bsim_executables e ON f.executable_id = e.id
|
||||
LEFT JOIN bsim_signatures s ON f.id = s.function_id AND s.signature_type = 'basic';
|
||||
|
||||
CREATE OR REPLACE VIEW bsim_library_stats AS
|
||||
SELECT
|
||||
e.library_name,
|
||||
e.library_version,
|
||||
COUNT(DISTINCT e.id) AS executable_count,
|
||||
COUNT(DISTINCT f.id) AS function_count,
|
||||
MAX(l.ingested_at) AS last_ingested
|
||||
FROM bsim_executables e
|
||||
LEFT JOIN bsim_functions f ON e.id = f.executable_id
|
||||
LEFT JOIN bsim_ingest_log l ON e.id = l.executable_id
|
||||
WHERE e.library_name IS NOT NULL
|
||||
GROUP BY e.library_name, e.library_version
|
||||
ORDER BY e.library_name, e.library_version;
|
||||
|
||||
-- Grant permissions
|
||||
GRANT ALL ON ALL TABLES IN SCHEMA public TO bsim_user;
|
||||
GRANT ALL ON ALL SEQUENCES IN SCHEMA public TO bsim_user;
|
||||
|
||||
-- Insert schema version marker
|
||||
INSERT INTO bsim_ingest_log (library_name, functions_ingested, status, completed_at)
|
||||
VALUES ('_schema_init', 0, 'completed', now());
|
||||
|
||||
-- Log successful initialization
|
||||
DO $$
|
||||
BEGIN
|
||||
RAISE NOTICE 'BSim schema initialized successfully';
|
||||
END $$;
|
||||
Reference in New Issue
Block a user