save progress

This commit is contained in:
StellaOps Bot
2026-01-06 09:42:02 +02:00
parent 94d68bee8b
commit 37e11918e0
443 changed files with 85863 additions and 897 deletions

View File

@@ -0,0 +1,84 @@
# Copyright (c) StellaOps. All rights reserved.
# Licensed under AGPL-3.0-or-later.
# Ghidra Headless Analysis Server for BinaryIndex
#
# This image provides Ghidra headless analysis capabilities including:
# - Ghidra Headless Analyzer (analyzeHeadless)
# - ghidriff for automated binary diffing
# - Version Tracking and BSim support
#
# Build:
# docker build -f Dockerfile.headless -t stellaops/ghidra-headless:11.2 .
#
# Run:
# docker run --rm -v /path/to/binaries:/binaries stellaops/ghidra-headless:11.2 \
# /projects GhidraProject -import /binaries/target.exe -analyze
FROM eclipse-temurin:17-jdk-jammy
ARG GHIDRA_VERSION=11.2
ARG GHIDRA_BUILD_DATE=20241105
ARG GHIDRA_SHA256
LABEL org.opencontainers.image.title="StellaOps Ghidra Headless"
LABEL org.opencontainers.image.description="Ghidra headless analysis server with ghidriff for BinaryIndex"
LABEL org.opencontainers.image.version="${GHIDRA_VERSION}"
LABEL org.opencontainers.image.licenses="AGPL-3.0-or-later"
LABEL org.opencontainers.image.source="https://github.com/stellaops/stellaops"
LABEL org.opencontainers.image.vendor="StellaOps"
# Install dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
python3-pip \
python3-venv \
curl \
unzip \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Download and verify Ghidra
# Note: Set GHIDRA_SHA256 build arg for production builds
RUN curl -fsSL "https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_${GHIDRA_VERSION}_build/ghidra_${GHIDRA_VERSION}_PUBLIC_${GHIDRA_BUILD_DATE}.zip" \
-o /tmp/ghidra.zip \
&& if [ -n "${GHIDRA_SHA256}" ]; then \
echo "${GHIDRA_SHA256} /tmp/ghidra.zip" | sha256sum -c -; \
fi \
&& unzip -q /tmp/ghidra.zip -d /opt \
&& rm /tmp/ghidra.zip \
&& ln -s /opt/ghidra_${GHIDRA_VERSION}_PUBLIC /opt/ghidra \
&& chmod +x /opt/ghidra/support/analyzeHeadless
# Install ghidriff in isolated virtual environment
RUN python3 -m venv /opt/venv \
&& /opt/venv/bin/pip install --no-cache-dir --upgrade pip \
&& /opt/venv/bin/pip install --no-cache-dir ghidriff
# Set environment variables
ENV GHIDRA_HOME=/opt/ghidra
ENV GHIDRA_INSTALL_DIR=/opt/ghidra
ENV JAVA_HOME=/opt/java/openjdk
ENV PATH="${GHIDRA_HOME}/support:/opt/venv/bin:${PATH}"
ENV MAXMEM=4G
# Create working directories with proper permissions
RUN mkdir -p /projects /scripts /output \
&& chmod 755 /projects /scripts /output
# Create non-root user for security
RUN groupadd -r ghidra && useradd -r -g ghidra ghidra \
&& chown -R ghidra:ghidra /projects /scripts /output
WORKDIR /projects
# Healthcheck - verify Ghidra is functional
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD analyzeHeadless /tmp HealthCheck -help > /dev/null 2>&1 || exit 1
# Switch to non-root user
USER ghidra
# Default entrypoint is analyzeHeadless
ENTRYPOINT ["analyzeHeadless"]
CMD ["--help"]

View File

@@ -0,0 +1,77 @@
# Copyright (c) StellaOps. All rights reserved.
# Licensed under AGPL-3.0-or-later.
# BSim PostgreSQL Database and Ghidra Headless Services
#
# Usage:
# docker compose -f docker-compose.bsim.yml up -d
#
# Environment variables:
# BSIM_DB_PASSWORD - PostgreSQL password for BSim database
version: '3.8'
services:
bsim-postgres:
image: postgres:16-alpine
container_name: stellaops-bsim-db
environment:
POSTGRES_DB: bsim_corpus
POSTGRES_USER: bsim_user
POSTGRES_PASSWORD: ${BSIM_DB_PASSWORD:-stellaops_bsim_dev}
POSTGRES_INITDB_ARGS: "-E UTF8 --locale=C"
volumes:
- bsim-data:/var/lib/postgresql/data
- ./scripts/init-bsim.sql:/docker-entrypoint-initdb.d/10-init-bsim.sql:ro
ports:
- "5433:5432"
networks:
- stellaops-bsim
healthcheck:
test: ["CMD-SHELL", "pg_isready -U bsim_user -d bsim_corpus"]
interval: 10s
timeout: 5s
retries: 5
restart: unless-stopped
# Ghidra Headless service for BSim analysis
ghidra-headless:
build:
context: .
dockerfile: Dockerfile.headless
image: stellaops/ghidra-headless:11.2
container_name: stellaops-ghidra
depends_on:
bsim-postgres:
condition: service_healthy
environment:
BSIM_DB_URL: "postgresql://bsim-postgres:5432/bsim_corpus"
BSIM_DB_USER: bsim_user
BSIM_DB_PASSWORD: ${BSIM_DB_PASSWORD:-stellaops_bsim_dev}
JAVA_HOME: /opt/java/openjdk
MAXMEM: 4G
volumes:
- ghidra-projects:/projects
- ghidra-scripts:/scripts
- ghidra-output:/output
networks:
- stellaops-bsim
deploy:
resources:
limits:
cpus: '4'
memory: 8G
# Keep container running for ad-hoc analysis
entrypoint: ["tail", "-f", "/dev/null"]
restart: unless-stopped
volumes:
bsim-data:
driver: local
ghidra-projects:
ghidra-scripts:
ghidra-output:
networks:
stellaops-bsim:
driver: bridge

View File

@@ -0,0 +1,140 @@
-- BSim PostgreSQL Schema Initialization
-- Copyright (c) StellaOps. All rights reserved.
-- Licensed under AGPL-3.0-or-later.
--
-- This script creates the core BSim schema structure.
-- Note: Full Ghidra BSim schema is auto-created by Ghidra tools.
-- This provides a minimal functional schema for integration testing.
-- Create schema comment
COMMENT ON DATABASE bsim_corpus IS 'Ghidra BSim function signature database for StellaOps BinaryIndex';
-- Enable required extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "pg_trgm";
-- BSim executables table
CREATE TABLE IF NOT EXISTS bsim_executables (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
name TEXT NOT NULL,
architecture TEXT NOT NULL,
library_name TEXT,
library_version TEXT,
md5_hash BYTEA,
sha256_hash BYTEA,
date_added TIMESTAMPTZ NOT NULL DEFAULT now(),
UNIQUE (sha256_hash)
);
-- BSim functions table
CREATE TABLE IF NOT EXISTS bsim_functions (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
executable_id UUID NOT NULL REFERENCES bsim_executables(id) ON DELETE CASCADE,
name TEXT NOT NULL,
address BIGINT NOT NULL,
flags INTEGER DEFAULT 0,
UNIQUE (executable_id, address)
);
-- BSim function vectors (feature vectors for similarity)
CREATE TABLE IF NOT EXISTS bsim_vectors (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
function_id UUID NOT NULL REFERENCES bsim_functions(id) ON DELETE CASCADE,
lsh_hash BYTEA NOT NULL, -- Locality-sensitive hash
feature_count INTEGER NOT NULL,
vector_data BYTEA NOT NULL, -- Serialized feature vector
UNIQUE (function_id)
);
-- BSim function signatures (compact fingerprints)
CREATE TABLE IF NOT EXISTS bsim_signatures (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
function_id UUID NOT NULL REFERENCES bsim_functions(id) ON DELETE CASCADE,
signature_type TEXT NOT NULL, -- 'basic', 'weighted', 'full'
signature_hash BYTEA NOT NULL,
significance REAL NOT NULL DEFAULT 0.0,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
UNIQUE (function_id, signature_type)
);
-- BSim clusters (similar function groups)
CREATE TABLE IF NOT EXISTS bsim_clusters (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
name TEXT,
function_count INTEGER NOT NULL DEFAULT 0,
centroid_vector BYTEA,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- Cluster membership
CREATE TABLE IF NOT EXISTS bsim_cluster_members (
cluster_id UUID NOT NULL REFERENCES bsim_clusters(id) ON DELETE CASCADE,
function_id UUID NOT NULL REFERENCES bsim_functions(id) ON DELETE CASCADE,
similarity REAL NOT NULL,
PRIMARY KEY (cluster_id, function_id)
);
-- Ingestion tracking
CREATE TABLE IF NOT EXISTS bsim_ingest_log (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
executable_id UUID REFERENCES bsim_executables(id),
library_name TEXT NOT NULL,
library_version TEXT,
functions_ingested INTEGER NOT NULL DEFAULT 0,
status TEXT NOT NULL DEFAULT 'pending',
error_message TEXT,
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
ingested_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- Indexes for efficient querying
CREATE INDEX IF NOT EXISTS idx_bsim_functions_executable ON bsim_functions(executable_id);
CREATE INDEX IF NOT EXISTS idx_bsim_functions_name ON bsim_functions(name);
CREATE INDEX IF NOT EXISTS idx_bsim_vectors_lsh ON bsim_vectors USING hash (lsh_hash);
CREATE INDEX IF NOT EXISTS idx_bsim_signatures_hash ON bsim_signatures USING hash (signature_hash);
CREATE INDEX IF NOT EXISTS idx_bsim_executables_library ON bsim_executables(library_name, library_version);
CREATE INDEX IF NOT EXISTS idx_bsim_ingest_log_status ON bsim_ingest_log(status);
-- Views for common queries
CREATE OR REPLACE VIEW bsim_function_summary AS
SELECT
f.id AS function_id,
f.name AS function_name,
f.address,
e.name AS executable_name,
e.library_name,
e.library_version,
e.architecture,
s.significance
FROM bsim_functions f
JOIN bsim_executables e ON f.executable_id = e.id
LEFT JOIN bsim_signatures s ON f.id = s.function_id AND s.signature_type = 'basic';
CREATE OR REPLACE VIEW bsim_library_stats AS
SELECT
e.library_name,
e.library_version,
COUNT(DISTINCT e.id) AS executable_count,
COUNT(DISTINCT f.id) AS function_count,
MAX(l.ingested_at) AS last_ingested
FROM bsim_executables e
LEFT JOIN bsim_functions f ON e.id = f.executable_id
LEFT JOIN bsim_ingest_log l ON e.id = l.executable_id
WHERE e.library_name IS NOT NULL
GROUP BY e.library_name, e.library_version
ORDER BY e.library_name, e.library_version;
-- Grant permissions
GRANT ALL ON ALL TABLES IN SCHEMA public TO bsim_user;
GRANT ALL ON ALL SEQUENCES IN SCHEMA public TO bsim_user;
-- Insert schema version marker
INSERT INTO bsim_ingest_log (library_name, functions_ingested, status, completed_at)
VALUES ('_schema_init', 0, 'completed', now());
-- Log successful initialization
DO $$
BEGIN
RAISE NOTICE 'BSim schema initialized successfully';
END $$;