save progress

This commit is contained in:
StellaOps Bot
2026-01-06 09:42:02 +02:00
parent 94d68bee8b
commit 37e11918e0
443 changed files with 85863 additions and 897 deletions

View File

@@ -0,0 +1,42 @@
# Copyright (c) StellaOps. All rights reserved.
# Licensed under AGPL-3.0-or-later.
# Function Behavior Corpus PostgreSQL Database
#
# Usage:
# docker compose -f docker-compose.corpus.yml up -d
#
# Environment variables:
# CORPUS_DB_PASSWORD - PostgreSQL password for corpus database
services:
corpus-postgres:
image: postgres:16-alpine
container_name: stellaops-corpus-db
environment:
POSTGRES_DB: stellaops_corpus
POSTGRES_USER: corpus_user
POSTGRES_PASSWORD: ${CORPUS_DB_PASSWORD:-stellaops_corpus_dev}
POSTGRES_INITDB_ARGS: "-E UTF8 --locale=C"
volumes:
- corpus-data:/var/lib/postgresql/data
- ../../../docs/db/schemas/corpus.sql:/docker-entrypoint-initdb.d/10-corpus-schema.sql:ro
- ./scripts/init-test-data.sql:/docker-entrypoint-initdb.d/20-test-data.sql:ro
ports:
- "5435:5432"
networks:
- stellaops-corpus
healthcheck:
test: ["CMD-SHELL", "pg_isready -U corpus_user -d stellaops_corpus"]
interval: 10s
timeout: 5s
retries: 5
restart: unless-stopped
volumes:
corpus-data:
driver: local
networks:
stellaops-corpus:
driver: bridge

View File

@@ -0,0 +1,220 @@
-- =============================================================================
-- CORPUS TEST DATA - Minimal corpus for integration testing
-- Copyright (c) StellaOps. All rights reserved.
-- Licensed under AGPL-3.0-or-later.
-- =============================================================================
-- Set tenant for test data
SET app.tenant_id = 'test-tenant';
-- =============================================================================
-- LIBRARIES
-- =============================================================================
INSERT INTO corpus.libraries (id, name, description, homepage_url, source_repo)
VALUES
('a0000001-0000-0000-0000-000000000001', 'glibc', 'GNU C Library', 'https://www.gnu.org/software/libc/', 'https://sourceware.org/git/glibc.git'),
('a0000001-0000-0000-0000-000000000002', 'openssl', 'OpenSSL cryptographic library', 'https://www.openssl.org/', 'https://github.com/openssl/openssl.git'),
('a0000001-0000-0000-0000-000000000003', 'zlib', 'zlib compression library', 'https://zlib.net/', 'https://github.com/madler/zlib.git'),
('a0000001-0000-0000-0000-000000000004', 'curl', 'libcurl transfer library', 'https://curl.se/', 'https://github.com/curl/curl.git'),
('a0000001-0000-0000-0000-000000000005', 'sqlite', 'SQLite database engine', 'https://sqlite.org/', 'https://sqlite.org/src')
ON CONFLICT (tenant_id, name) DO NOTHING;
-- =============================================================================
-- LIBRARY VERSIONS (glibc)
-- =============================================================================
INSERT INTO corpus.library_versions (id, library_id, version, release_date, is_security_release)
VALUES
-- glibc versions
('b0000001-0000-0000-0000-000000000001', 'a0000001-0000-0000-0000-000000000001', '2.17', '2012-12-25', false),
('b0000001-0000-0000-0000-000000000002', 'a0000001-0000-0000-0000-000000000001', '2.28', '2018-08-01', false),
('b0000001-0000-0000-0000-000000000003', 'a0000001-0000-0000-0000-000000000001', '2.31', '2020-02-01', false),
('b0000001-0000-0000-0000-000000000004', 'a0000001-0000-0000-0000-000000000001', '2.35', '2022-02-03', false),
('b0000001-0000-0000-0000-000000000005', 'a0000001-0000-0000-0000-000000000001', '2.38', '2023-07-31', false),
-- OpenSSL versions
('b0000002-0000-0000-0000-000000000001', 'a0000001-0000-0000-0000-000000000002', '1.0.2u', '2019-12-20', true),
('b0000002-0000-0000-0000-000000000002', 'a0000001-0000-0000-0000-000000000002', '1.1.1w', '2023-09-11', true),
('b0000002-0000-0000-0000-000000000003', 'a0000001-0000-0000-0000-000000000002', '3.0.12', '2023-10-24', true),
('b0000002-0000-0000-0000-000000000004', 'a0000001-0000-0000-0000-000000000002', '3.1.4', '2023-10-24', true),
-- zlib versions
('b0000003-0000-0000-0000-000000000001', 'a0000001-0000-0000-0000-000000000003', '1.2.11', '2017-01-15', false),
('b0000003-0000-0000-0000-000000000002', 'a0000001-0000-0000-0000-000000000003', '1.2.13', '2022-10-13', true),
('b0000003-0000-0000-0000-000000000003', 'a0000001-0000-0000-0000-000000000003', '1.3.1', '2024-01-22', false)
ON CONFLICT (tenant_id, library_id, version) DO NOTHING;
-- =============================================================================
-- BUILD VARIANTS
-- =============================================================================
INSERT INTO corpus.build_variants (id, library_version_id, architecture, abi, compiler, compiler_version, optimization_level, binary_sha256)
VALUES
-- glibc 2.31 variants
('c0000001-0000-0000-0000-000000000001', 'b0000001-0000-0000-0000-000000000003', 'x86_64', 'gnu', 'gcc', '9.3.0', 'O2', 'a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2'),
('c0000001-0000-0000-0000-000000000002', 'b0000001-0000-0000-0000-000000000003', 'aarch64', 'gnu', 'gcc', '9.3.0', 'O2', 'b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3'),
('c0000001-0000-0000-0000-000000000003', 'b0000001-0000-0000-0000-000000000003', 'armhf', 'gnu', 'gcc', '9.3.0', 'O2', 'c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'),
-- glibc 2.35 variants
('c0000002-0000-0000-0000-000000000001', 'b0000001-0000-0000-0000-000000000004', 'x86_64', 'gnu', 'gcc', '11.2.0', 'O2', 'd4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5'),
('c0000002-0000-0000-0000-000000000002', 'b0000001-0000-0000-0000-000000000004', 'aarch64', 'gnu', 'gcc', '11.2.0', 'O2', 'e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6'),
-- OpenSSL 3.0.12 variants
('c0000003-0000-0000-0000-000000000001', 'b0000002-0000-0000-0000-000000000003', 'x86_64', 'gnu', 'gcc', '11.2.0', 'O2', 'f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1'),
('c0000003-0000-0000-0000-000000000002', 'b0000002-0000-0000-0000-000000000003', 'aarch64', 'gnu', 'gcc', '11.2.0', 'O2', 'a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b3')
ON CONFLICT (tenant_id, library_version_id, architecture, abi, compiler, optimization_level) DO NOTHING;
-- =============================================================================
-- FUNCTIONS (Sample functions from glibc)
-- =============================================================================
INSERT INTO corpus.functions (id, build_variant_id, name, demangled_name, address, size_bytes, is_exported)
VALUES
-- glibc 2.31 x86_64 functions
('d0000001-0000-0000-0000-000000000001', 'c0000001-0000-0000-0000-000000000001', 'memcpy', 'memcpy', 140000, 256, true),
('d0000001-0000-0000-0000-000000000002', 'c0000001-0000-0000-0000-000000000001', 'memset', 'memset', 140256, 192, true),
('d0000001-0000-0000-0000-000000000003', 'c0000001-0000-0000-0000-000000000001', 'strlen', 'strlen', 140448, 128, true),
('d0000001-0000-0000-0000-000000000004', 'c0000001-0000-0000-0000-000000000001', 'strcmp', 'strcmp', 140576, 160, true),
('d0000001-0000-0000-0000-000000000005', 'c0000001-0000-0000-0000-000000000001', 'strcpy', 'strcpy', 140736, 144, true),
('d0000001-0000-0000-0000-000000000006', 'c0000001-0000-0000-0000-000000000001', 'malloc', 'malloc', 150000, 512, true),
('d0000001-0000-0000-0000-000000000007', 'c0000001-0000-0000-0000-000000000001', 'free', 'free', 150512, 384, true),
('d0000001-0000-0000-0000-000000000008', 'c0000001-0000-0000-0000-000000000001', 'realloc', 'realloc', 150896, 448, true),
('d0000001-0000-0000-0000-000000000009', 'c0000001-0000-0000-0000-000000000001', 'printf', 'printf', 160000, 1024, true),
('d0000001-0000-0000-0000-000000000010', 'c0000001-0000-0000-0000-000000000001', 'sprintf', 'sprintf', 161024, 896, true),
-- glibc 2.35 x86_64 functions (same functions, different addresses/sizes due to optimization)
('d0000002-0000-0000-0000-000000000001', 'c0000002-0000-0000-0000-000000000001', 'memcpy', 'memcpy', 145000, 280, true),
('d0000002-0000-0000-0000-000000000002', 'c0000002-0000-0000-0000-000000000001', 'memset', 'memset', 145280, 208, true),
('d0000002-0000-0000-0000-000000000003', 'c0000002-0000-0000-0000-000000000001', 'strlen', 'strlen', 145488, 144, true),
('d0000002-0000-0000-0000-000000000004', 'c0000002-0000-0000-0000-000000000001', 'strcmp', 'strcmp', 145632, 176, true),
('d0000002-0000-0000-0000-000000000005', 'c0000002-0000-0000-0000-000000000001', 'strcpy', 'strcpy', 145808, 160, true),
('d0000002-0000-0000-0000-000000000006', 'c0000002-0000-0000-0000-000000000001', 'malloc', 'malloc', 155000, 544, true),
('d0000002-0000-0000-0000-000000000007', 'c0000002-0000-0000-0000-000000000001', 'free', 'free', 155544, 400, true),
-- OpenSSL 3.0.12 functions
('d0000003-0000-0000-0000-000000000001', 'c0000003-0000-0000-0000-000000000001', 'EVP_DigestInit_ex', 'EVP_DigestInit_ex', 200000, 320, true),
('d0000003-0000-0000-0000-000000000002', 'c0000003-0000-0000-0000-000000000001', 'EVP_DigestUpdate', 'EVP_DigestUpdate', 200320, 256, true),
('d0000003-0000-0000-0000-000000000003', 'c0000003-0000-0000-0000-000000000001', 'EVP_DigestFinal_ex', 'EVP_DigestFinal_ex', 200576, 288, true),
('d0000003-0000-0000-0000-000000000004', 'c0000003-0000-0000-0000-000000000001', 'EVP_EncryptInit_ex', 'EVP_EncryptInit_ex', 201000, 384, true),
('d0000003-0000-0000-0000-000000000005', 'c0000003-0000-0000-0000-000000000001', 'EVP_DecryptInit_ex', 'EVP_DecryptInit_ex', 201384, 384, true),
('d0000003-0000-0000-0000-000000000006', 'c0000003-0000-0000-0000-000000000001', 'SSL_CTX_new', 'SSL_CTX_new', 300000, 512, true),
('d0000003-0000-0000-0000-000000000007', 'c0000003-0000-0000-0000-000000000001', 'SSL_new', 'SSL_new', 300512, 384, true),
('d0000003-0000-0000-0000-000000000008', 'c0000003-0000-0000-0000-000000000001', 'SSL_connect', 'SSL_connect', 300896, 1024, true)
ON CONFLICT (tenant_id, build_variant_id, name, address) DO NOTHING;
-- =============================================================================
-- FINGERPRINTS (Simulated semantic fingerprints)
-- =============================================================================
INSERT INTO corpus.fingerprints (id, function_id, algorithm, fingerprint, metadata)
VALUES
-- memcpy fingerprints (semantic_ksg algorithm)
('e0000001-0000-0000-0000-000000000001', 'd0000001-0000-0000-0000-000000000001', 'semantic_ksg',
decode('a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f60001', 'hex'),
'{"node_count": 45, "edge_count": 72, "api_calls": ["memcpy_internal"], "complexity": 8}'::jsonb),
('e0000001-0000-0000-0000-000000000002', 'd0000001-0000-0000-0000-000000000001', 'instruction_bb',
decode('b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a10001', 'hex'),
'{"bb_count": 8, "instruction_count": 64}'::jsonb),
-- memcpy 2.35 (similar fingerprint, different version)
('e0000002-0000-0000-0000-000000000001', 'd0000002-0000-0000-0000-000000000001', 'semantic_ksg',
decode('a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f60002', 'hex'),
'{"node_count": 48, "edge_count": 76, "api_calls": ["memcpy_internal"], "complexity": 9}'::jsonb),
-- memset fingerprints
('e0000003-0000-0000-0000-000000000001', 'd0000001-0000-0000-0000-000000000002', 'semantic_ksg',
decode('c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b20001', 'hex'),
'{"node_count": 32, "edge_count": 48, "api_calls": [], "complexity": 5}'::jsonb),
-- strlen fingerprints
('e0000004-0000-0000-0000-000000000001', 'd0000001-0000-0000-0000-000000000003', 'semantic_ksg',
decode('d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c30001', 'hex'),
'{"node_count": 24, "edge_count": 32, "api_calls": [], "complexity": 4}'::jsonb),
-- malloc fingerprints
('e0000005-0000-0000-0000-000000000001', 'd0000001-0000-0000-0000-000000000006', 'semantic_ksg',
decode('e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d40001', 'hex'),
'{"node_count": 128, "edge_count": 256, "api_calls": ["sbrk", "mmap"], "complexity": 24}'::jsonb),
-- OpenSSL EVP_DigestInit_ex
('e0000006-0000-0000-0000-000000000001', 'd0000003-0000-0000-0000-000000000001', 'semantic_ksg',
decode('f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e50001', 'hex'),
'{"node_count": 56, "edge_count": 84, "api_calls": ["OPENSSL_init_crypto"], "complexity": 12}'::jsonb),
-- SSL_CTX_new
('e0000007-0000-0000-0000-000000000001', 'd0000003-0000-0000-0000-000000000006', 'semantic_ksg',
decode('a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f60003', 'hex'),
'{"node_count": 96, "edge_count": 144, "api_calls": ["CRYPTO_malloc", "SSL_CTX_set_options"], "complexity": 18}'::jsonb)
ON CONFLICT (tenant_id, function_id, algorithm) DO NOTHING;
-- =============================================================================
-- FUNCTION CLUSTERS
-- =============================================================================
INSERT INTO corpus.function_clusters (id, library_id, canonical_name, description)
VALUES
('f0000001-0000-0000-0000-000000000001', 'a0000001-0000-0000-0000-000000000001', 'memcpy', 'Memory copy function across glibc versions'),
('f0000001-0000-0000-0000-000000000002', 'a0000001-0000-0000-0000-000000000001', 'memset', 'Memory set function across glibc versions'),
('f0000001-0000-0000-0000-000000000003', 'a0000001-0000-0000-0000-000000000001', 'strlen', 'String length function across glibc versions'),
('f0000001-0000-0000-0000-000000000004', 'a0000001-0000-0000-0000-000000000001', 'malloc', 'Memory allocation function across glibc versions'),
('f0000002-0000-0000-0000-000000000001', 'a0000001-0000-0000-0000-000000000002', 'EVP_DigestInit_ex', 'EVP digest initialization across OpenSSL versions'),
('f0000002-0000-0000-0000-000000000002', 'a0000001-0000-0000-0000-000000000002', 'SSL_CTX_new', 'SSL context creation across OpenSSL versions')
ON CONFLICT (tenant_id, library_id, canonical_name) DO NOTHING;
-- =============================================================================
-- CLUSTER MEMBERS
-- =============================================================================
INSERT INTO corpus.cluster_members (cluster_id, function_id, similarity_to_centroid)
VALUES
-- memcpy cluster
('f0000001-0000-0000-0000-000000000001', 'd0000001-0000-0000-0000-000000000001', 1.0),
('f0000001-0000-0000-0000-000000000001', 'd0000002-0000-0000-0000-000000000001', 0.95),
-- memset cluster
('f0000001-0000-0000-0000-000000000002', 'd0000001-0000-0000-0000-000000000002', 1.0),
('f0000001-0000-0000-0000-000000000002', 'd0000002-0000-0000-0000-000000000002', 0.92),
-- strlen cluster
('f0000001-0000-0000-0000-000000000003', 'd0000001-0000-0000-0000-000000000003', 1.0),
('f0000001-0000-0000-0000-000000000003', 'd0000002-0000-0000-0000-000000000003', 0.94),
-- malloc cluster
('f0000001-0000-0000-0000-000000000004', 'd0000001-0000-0000-0000-000000000006', 1.0),
('f0000001-0000-0000-0000-000000000004', 'd0000002-0000-0000-0000-000000000006', 0.88)
ON CONFLICT DO NOTHING;
-- =============================================================================
-- CVE ASSOCIATIONS
-- =============================================================================
INSERT INTO corpus.function_cves (function_id, cve_id, affected_state, confidence, evidence_type)
VALUES
-- CVE-2021-3999 affects glibc getcwd
-- Note: We don't have getcwd in our test data, but this shows the structure
-- CVE-2022-0778 affects OpenSSL BN_mod_sqrt (infinite loop)
('d0000003-0000-0000-0000-000000000001', 'CVE-2022-0778', 'fixed', 0.95, 'advisory'),
('d0000003-0000-0000-0000-000000000002', 'CVE-2022-0778', 'fixed', 0.95, 'advisory'),
-- CVE-2023-0286 affects OpenSSL X509 certificate handling
('d0000003-0000-0000-0000-000000000006', 'CVE-2023-0286', 'fixed', 0.90, 'commit'),
('d0000003-0000-0000-0000-000000000007', 'CVE-2023-0286', 'fixed', 0.90, 'commit')
ON CONFLICT (tenant_id, function_id, cve_id) DO NOTHING;
-- =============================================================================
-- INGESTION LOG
-- =============================================================================
INSERT INTO corpus.ingestion_jobs (id, library_id, job_type, status, functions_indexed, started_at, completed_at)
VALUES
('99000001-0000-0000-0000-000000000001', 'a0000001-0000-0000-0000-000000000001', 'full_ingest', 'completed', 10, now() - interval '1 day', now() - interval '1 day' + interval '5 minutes'),
('99000001-0000-0000-0000-000000000002', 'a0000001-0000-0000-0000-000000000002', 'full_ingest', 'completed', 8, now() - interval '12 hours', now() - interval '12 hours' + interval '3 minutes')
ON CONFLICT DO NOTHING;
-- =============================================================================
-- SUMMARY
-- =============================================================================
DO $$
DECLARE
lib_count INT;
ver_count INT;
func_count INT;
fp_count INT;
BEGIN
SELECT COUNT(*) INTO lib_count FROM corpus.libraries;
SELECT COUNT(*) INTO ver_count FROM corpus.library_versions;
SELECT COUNT(*) INTO func_count FROM corpus.functions;
SELECT COUNT(*) INTO fp_count FROM corpus.fingerprints;
RAISE NOTICE 'Corpus test data initialized:';
RAISE NOTICE ' Libraries: %', lib_count;
RAISE NOTICE ' Versions: %', ver_count;
RAISE NOTICE ' Functions: %', func_count;
RAISE NOTICE ' Fingerprints: %', fp_count;
END $$;

View File

@@ -0,0 +1,84 @@
# Copyright (c) StellaOps. All rights reserved.
# Licensed under AGPL-3.0-or-later.
# Ghidra Headless Analysis Server for BinaryIndex
#
# This image provides Ghidra headless analysis capabilities including:
# - Ghidra Headless Analyzer (analyzeHeadless)
# - ghidriff for automated binary diffing
# - Version Tracking and BSim support
#
# Build:
# docker build -f Dockerfile.headless -t stellaops/ghidra-headless:11.2 .
#
# Run:
# docker run --rm -v /path/to/binaries:/binaries stellaops/ghidra-headless:11.2 \
# /projects GhidraProject -import /binaries/target.exe -analyze
FROM eclipse-temurin:17-jdk-jammy
ARG GHIDRA_VERSION=11.2
ARG GHIDRA_BUILD_DATE=20241105
ARG GHIDRA_SHA256
LABEL org.opencontainers.image.title="StellaOps Ghidra Headless"
LABEL org.opencontainers.image.description="Ghidra headless analysis server with ghidriff for BinaryIndex"
LABEL org.opencontainers.image.version="${GHIDRA_VERSION}"
LABEL org.opencontainers.image.licenses="AGPL-3.0-or-later"
LABEL org.opencontainers.image.source="https://github.com/stellaops/stellaops"
LABEL org.opencontainers.image.vendor="StellaOps"
# Install dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
python3-pip \
python3-venv \
curl \
unzip \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Download and verify Ghidra
# Note: Set GHIDRA_SHA256 build arg for production builds
RUN curl -fsSL "https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_${GHIDRA_VERSION}_build/ghidra_${GHIDRA_VERSION}_PUBLIC_${GHIDRA_BUILD_DATE}.zip" \
-o /tmp/ghidra.zip \
&& if [ -n "${GHIDRA_SHA256}" ]; then \
echo "${GHIDRA_SHA256} /tmp/ghidra.zip" | sha256sum -c -; \
fi \
&& unzip -q /tmp/ghidra.zip -d /opt \
&& rm /tmp/ghidra.zip \
&& ln -s /opt/ghidra_${GHIDRA_VERSION}_PUBLIC /opt/ghidra \
&& chmod +x /opt/ghidra/support/analyzeHeadless
# Install ghidriff in isolated virtual environment
RUN python3 -m venv /opt/venv \
&& /opt/venv/bin/pip install --no-cache-dir --upgrade pip \
&& /opt/venv/bin/pip install --no-cache-dir ghidriff
# Set environment variables
ENV GHIDRA_HOME=/opt/ghidra
ENV GHIDRA_INSTALL_DIR=/opt/ghidra
ENV JAVA_HOME=/opt/java/openjdk
ENV PATH="${GHIDRA_HOME}/support:/opt/venv/bin:${PATH}"
ENV MAXMEM=4G
# Create working directories with proper permissions
RUN mkdir -p /projects /scripts /output \
&& chmod 755 /projects /scripts /output
# Create non-root user for security
RUN groupadd -r ghidra && useradd -r -g ghidra ghidra \
&& chown -R ghidra:ghidra /projects /scripts /output
WORKDIR /projects
# Healthcheck - verify Ghidra is functional
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD analyzeHeadless /tmp HealthCheck -help > /dev/null 2>&1 || exit 1
# Switch to non-root user
USER ghidra
# Default entrypoint is analyzeHeadless
ENTRYPOINT ["analyzeHeadless"]
CMD ["--help"]

View File

@@ -0,0 +1,77 @@
# Copyright (c) StellaOps. All rights reserved.
# Licensed under AGPL-3.0-or-later.
# BSim PostgreSQL Database and Ghidra Headless Services
#
# Usage:
# docker compose -f docker-compose.bsim.yml up -d
#
# Environment variables:
# BSIM_DB_PASSWORD - PostgreSQL password for BSim database
version: '3.8'
services:
bsim-postgres:
image: postgres:16-alpine
container_name: stellaops-bsim-db
environment:
POSTGRES_DB: bsim_corpus
POSTGRES_USER: bsim_user
POSTGRES_PASSWORD: ${BSIM_DB_PASSWORD:-stellaops_bsim_dev}
POSTGRES_INITDB_ARGS: "-E UTF8 --locale=C"
volumes:
- bsim-data:/var/lib/postgresql/data
- ./scripts/init-bsim.sql:/docker-entrypoint-initdb.d/10-init-bsim.sql:ro
ports:
- "5433:5432"
networks:
- stellaops-bsim
healthcheck:
test: ["CMD-SHELL", "pg_isready -U bsim_user -d bsim_corpus"]
interval: 10s
timeout: 5s
retries: 5
restart: unless-stopped
# Ghidra Headless service for BSim analysis
ghidra-headless:
build:
context: .
dockerfile: Dockerfile.headless
image: stellaops/ghidra-headless:11.2
container_name: stellaops-ghidra
depends_on:
bsim-postgres:
condition: service_healthy
environment:
BSIM_DB_URL: "postgresql://bsim-postgres:5432/bsim_corpus"
BSIM_DB_USER: bsim_user
BSIM_DB_PASSWORD: ${BSIM_DB_PASSWORD:-stellaops_bsim_dev}
JAVA_HOME: /opt/java/openjdk
MAXMEM: 4G
volumes:
- ghidra-projects:/projects
- ghidra-scripts:/scripts
- ghidra-output:/output
networks:
- stellaops-bsim
deploy:
resources:
limits:
cpus: '4'
memory: 8G
# Keep container running for ad-hoc analysis
entrypoint: ["tail", "-f", "/dev/null"]
restart: unless-stopped
volumes:
bsim-data:
driver: local
ghidra-projects:
ghidra-scripts:
ghidra-output:
networks:
stellaops-bsim:
driver: bridge

View File

@@ -0,0 +1,140 @@
-- BSim PostgreSQL Schema Initialization
-- Copyright (c) StellaOps. All rights reserved.
-- Licensed under AGPL-3.0-or-later.
--
-- This script creates the core BSim schema structure.
-- Note: Full Ghidra BSim schema is auto-created by Ghidra tools.
-- This provides a minimal functional schema for integration testing.
-- Create schema comment
COMMENT ON DATABASE bsim_corpus IS 'Ghidra BSim function signature database for StellaOps BinaryIndex';
-- Enable required extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "pg_trgm";
-- BSim executables table
CREATE TABLE IF NOT EXISTS bsim_executables (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
name TEXT NOT NULL,
architecture TEXT NOT NULL,
library_name TEXT,
library_version TEXT,
md5_hash BYTEA,
sha256_hash BYTEA,
date_added TIMESTAMPTZ NOT NULL DEFAULT now(),
UNIQUE (sha256_hash)
);
-- BSim functions table
CREATE TABLE IF NOT EXISTS bsim_functions (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
executable_id UUID NOT NULL REFERENCES bsim_executables(id) ON DELETE CASCADE,
name TEXT NOT NULL,
address BIGINT NOT NULL,
flags INTEGER DEFAULT 0,
UNIQUE (executable_id, address)
);
-- BSim function vectors (feature vectors for similarity)
CREATE TABLE IF NOT EXISTS bsim_vectors (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
function_id UUID NOT NULL REFERENCES bsim_functions(id) ON DELETE CASCADE,
lsh_hash BYTEA NOT NULL, -- Locality-sensitive hash
feature_count INTEGER NOT NULL,
vector_data BYTEA NOT NULL, -- Serialized feature vector
UNIQUE (function_id)
);
-- BSim function signatures (compact fingerprints)
CREATE TABLE IF NOT EXISTS bsim_signatures (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
function_id UUID NOT NULL REFERENCES bsim_functions(id) ON DELETE CASCADE,
signature_type TEXT NOT NULL, -- 'basic', 'weighted', 'full'
signature_hash BYTEA NOT NULL,
significance REAL NOT NULL DEFAULT 0.0,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
UNIQUE (function_id, signature_type)
);
-- BSim clusters (similar function groups)
CREATE TABLE IF NOT EXISTS bsim_clusters (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
name TEXT,
function_count INTEGER NOT NULL DEFAULT 0,
centroid_vector BYTEA,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- Cluster membership
CREATE TABLE IF NOT EXISTS bsim_cluster_members (
cluster_id UUID NOT NULL REFERENCES bsim_clusters(id) ON DELETE CASCADE,
function_id UUID NOT NULL REFERENCES bsim_functions(id) ON DELETE CASCADE,
similarity REAL NOT NULL,
PRIMARY KEY (cluster_id, function_id)
);
-- Ingestion tracking
CREATE TABLE IF NOT EXISTS bsim_ingest_log (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
executable_id UUID REFERENCES bsim_executables(id),
library_name TEXT NOT NULL,
library_version TEXT,
functions_ingested INTEGER NOT NULL DEFAULT 0,
status TEXT NOT NULL DEFAULT 'pending',
error_message TEXT,
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
ingested_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- Indexes for efficient querying
CREATE INDEX IF NOT EXISTS idx_bsim_functions_executable ON bsim_functions(executable_id);
CREATE INDEX IF NOT EXISTS idx_bsim_functions_name ON bsim_functions(name);
CREATE INDEX IF NOT EXISTS idx_bsim_vectors_lsh ON bsim_vectors USING hash (lsh_hash);
CREATE INDEX IF NOT EXISTS idx_bsim_signatures_hash ON bsim_signatures USING hash (signature_hash);
CREATE INDEX IF NOT EXISTS idx_bsim_executables_library ON bsim_executables(library_name, library_version);
CREATE INDEX IF NOT EXISTS idx_bsim_ingest_log_status ON bsim_ingest_log(status);
-- Views for common queries
CREATE OR REPLACE VIEW bsim_function_summary AS
SELECT
f.id AS function_id,
f.name AS function_name,
f.address,
e.name AS executable_name,
e.library_name,
e.library_version,
e.architecture,
s.significance
FROM bsim_functions f
JOIN bsim_executables e ON f.executable_id = e.id
LEFT JOIN bsim_signatures s ON f.id = s.function_id AND s.signature_type = 'basic';
CREATE OR REPLACE VIEW bsim_library_stats AS
SELECT
e.library_name,
e.library_version,
COUNT(DISTINCT e.id) AS executable_count,
COUNT(DISTINCT f.id) AS function_count,
MAX(l.ingested_at) AS last_ingested
FROM bsim_executables e
LEFT JOIN bsim_functions f ON e.id = f.executable_id
LEFT JOIN bsim_ingest_log l ON e.id = l.executable_id
WHERE e.library_name IS NOT NULL
GROUP BY e.library_name, e.library_version
ORDER BY e.library_name, e.library_version;
-- Grant permissions
GRANT ALL ON ALL TABLES IN SCHEMA public TO bsim_user;
GRANT ALL ON ALL SEQUENCES IN SCHEMA public TO bsim_user;
-- Insert schema version marker
INSERT INTO bsim_ingest_log (library_name, functions_ingested, status, completed_at)
VALUES ('_schema_init', 0, 'completed', now());
-- Log successful initialization
DO $$
BEGIN
RAISE NOTICE 'BSim schema initialized successfully';
END $$;

View File

@@ -0,0 +1,49 @@
# devops/docker/schema-versions/Dockerfile
# Versioned PostgreSQL container for schema evolution testing
# Sprint: SPRINT_20260105_002_005_TEST_cross_cutting
# Task: CCUT-008
#
# USAGE:
# ======
# Build for specific module and version:
# docker build --build-arg MODULE=scanner --build-arg SCHEMA_VERSION=v1.2.0 \
# -t stellaops/schema-test:scanner-v1.2.0 .
#
# Run for testing:
# docker run -d -p 5432:5432 stellaops/schema-test:scanner-v1.2.0
ARG POSTGRES_VERSION=16
FROM postgres:${POSTGRES_VERSION}-alpine
# Build arguments
ARG MODULE=scanner
ARG SCHEMA_VERSION=latest
ARG SCHEMA_DATE=""
# Labels for identification
LABEL org.opencontainers.image.title="StellaOps Schema Test - ${MODULE}"
LABEL org.opencontainers.image.description="PostgreSQL with ${MODULE} schema version ${SCHEMA_VERSION}"
LABEL org.opencontainers.image.version="${SCHEMA_VERSION}"
LABEL org.stellaops.module="${MODULE}"
LABEL org.stellaops.schema.version="${SCHEMA_VERSION}"
LABEL org.stellaops.schema.date="${SCHEMA_DATE}"
# Environment variables
ENV POSTGRES_USER=stellaops_test
ENV POSTGRES_PASSWORD=test_password
ENV POSTGRES_DB=stellaops_schema_test
ENV STELLAOPS_MODULE=${MODULE}
ENV STELLAOPS_SCHEMA_VERSION=${SCHEMA_VERSION}
# Copy initialization scripts
COPY docker-entrypoint-initdb.d/ /docker-entrypoint-initdb.d/
# Copy module-specific schema
COPY schemas/${MODULE}/ /schemas/${MODULE}/
# Health check
HEALTHCHECK --interval=10s --timeout=5s --start-period=30s --retries=3 \
CMD pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB} || exit 1
# Expose PostgreSQL port
EXPOSE 5432

View File

@@ -0,0 +1,179 @@
#!/bin/bash
# build-schema-images.sh
# Build versioned PostgreSQL images for schema evolution testing
# Sprint: SPRINT_20260105_002_005_TEST_cross_cutting
# Task: CCUT-008
#
# USAGE:
# ======
# Build all versions for a module:
# ./build-schema-images.sh scanner
#
# Build specific version:
# ./build-schema-images.sh scanner v1.2.0
#
# Build all modules:
# ./build-schema-images.sh --all
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
REGISTRY="${SCHEMA_REGISTRY:-ghcr.io/stellaops}"
POSTGRES_VERSION="${POSTGRES_VERSION:-16}"
# Modules with schema evolution support
MODULES=("scanner" "concelier" "evidencelocker" "authority" "sbomservice" "policy")
usage() {
echo "Usage: $0 <module|--all> [version]"
echo ""
echo "Arguments:"
echo " module Module name (scanner, concelier, evidencelocker, authority, sbomservice, policy)"
echo " --all Build all modules"
echo " version Optional specific version to build (default: all versions)"
echo ""
echo "Environment variables:"
echo " SCHEMA_REGISTRY Container registry (default: ghcr.io/stellaops)"
echo " POSTGRES_VERSION PostgreSQL version (default: 16)"
echo " PUSH_IMAGES Set to 'true' to push images after build"
exit 1
}
# Get schema versions from git tags or migration files
get_schema_versions() {
local module=$1
local versions=()
# Check for version tags
local tags=$(git tag -l "${module}-schema-v*" 2>/dev/null | sed "s/${module}-schema-//" | sort -V)
if [ -n "$tags" ]; then
versions=($tags)
else
# Fall back to migration file count
local migration_dir="$REPO_ROOT/docs/db/migrations/${module}"
if [ -d "$migration_dir" ]; then
local count=$(ls -1 "$migration_dir"/*.sql 2>/dev/null | wc -l)
for i in $(seq 1 $count); do
versions+=("v1.0.$i")
done
fi
fi
# Always include 'latest'
versions+=("latest")
echo "${versions[@]}"
}
# Copy schema files to build context
prepare_schema_context() {
local module=$1
local version=$2
local build_dir="$SCRIPT_DIR/.build/${module}/${version}"
mkdir -p "$build_dir/schemas/${module}"
mkdir -p "$build_dir/docker-entrypoint-initdb.d"
# Copy entrypoint scripts
cp "$SCRIPT_DIR/docker-entrypoint-initdb.d/"*.sh "$build_dir/docker-entrypoint-initdb.d/"
# Copy base schema
local base_schema="$REPO_ROOT/docs/db/schemas/${module}.sql"
if [ -f "$base_schema" ]; then
cp "$base_schema" "$build_dir/schemas/${module}/base.sql"
fi
# Copy migrations directory
local migrations_dir="$REPO_ROOT/docs/db/migrations/${module}"
if [ -d "$migrations_dir" ]; then
mkdir -p "$build_dir/schemas/${module}/migrations"
cp "$migrations_dir"/*.sql "$build_dir/schemas/${module}/migrations/" 2>/dev/null || true
fi
echo "$build_dir"
}
# Build image for module and version
build_image() {
local module=$1
local version=$2
echo "Building ${module} schema version ${version}..."
local build_dir=$(prepare_schema_context "$module" "$version")
local image_tag="${REGISTRY}/schema-test:${module}-${version}"
local schema_date=$(date -u +%Y-%m-%dT%H:%M:%SZ)
# Copy Dockerfile to build context
cp "$SCRIPT_DIR/Dockerfile" "$build_dir/"
# Build the image
docker build \
--build-arg MODULE="$module" \
--build-arg SCHEMA_VERSION="$version" \
--build-arg SCHEMA_DATE="$schema_date" \
--build-arg POSTGRES_VERSION="$POSTGRES_VERSION" \
-t "$image_tag" \
"$build_dir"
echo "Built: $image_tag"
# Push if requested
if [ "$PUSH_IMAGES" = "true" ]; then
echo "Pushing: $image_tag"
docker push "$image_tag"
fi
# Cleanup build directory
rm -rf "$build_dir"
}
# Build all versions for a module
build_module() {
local module=$1
local target_version=$2
echo "========================================"
echo "Building schema images for: $module"
echo "========================================"
if [ -n "$target_version" ]; then
build_image "$module" "$target_version"
else
local versions=$(get_schema_versions "$module")
for version in $versions; do
build_image "$module" "$version"
done
fi
}
# Main
if [ $# -lt 1 ]; then
usage
fi
case "$1" in
--all)
for module in "${MODULES[@]}"; do
build_module "$module" "$2"
done
;;
--help|-h)
usage
;;
*)
if [[ " ${MODULES[*]} " =~ " $1 " ]]; then
build_module "$1" "$2"
else
echo "Error: Unknown module '$1'"
echo "Valid modules: ${MODULES[*]}"
exit 1
fi
;;
esac
echo ""
echo "Build complete!"
echo "To push images, run with PUSH_IMAGES=true"

View File

@@ -0,0 +1,70 @@
#!/bin/bash
# 00-init-schema.sh
# Initialize PostgreSQL with module schema for testing
# Sprint: SPRINT_20260105_002_005_TEST_cross_cutting
# Task: CCUT-008
set -e
echo "Initializing schema for module: ${STELLAOPS_MODULE}"
echo "Schema version: ${STELLAOPS_SCHEMA_VERSION}"
# Create extensions
psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "pgcrypto";
CREATE EXTENSION IF NOT EXISTS "btree_gist";
EOSQL
# Apply base schema if exists
BASE_SCHEMA="/schemas/${STELLAOPS_MODULE}/base.sql"
if [ -f "$BASE_SCHEMA" ]; then
echo "Applying base schema: $BASE_SCHEMA"
psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" -f "$BASE_SCHEMA"
fi
# Apply versioned schema if exists
VERSION_SCHEMA="/schemas/${STELLAOPS_MODULE}/${STELLAOPS_SCHEMA_VERSION}.sql"
if [ -f "$VERSION_SCHEMA" ]; then
echo "Applying version schema: $VERSION_SCHEMA"
psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" -f "$VERSION_SCHEMA"
fi
# Apply all migrations up to version
MIGRATIONS_DIR="/schemas/${STELLAOPS_MODULE}/migrations"
if [ -d "$MIGRATIONS_DIR" ]; then
echo "Applying migrations from: $MIGRATIONS_DIR"
# Get version number for comparison
VERSION_NUM=$(echo "$STELLAOPS_SCHEMA_VERSION" | sed 's/v//' | sed 's/\.//g')
for migration in $(ls -1 "$MIGRATIONS_DIR"/*.sql 2>/dev/null | sort -V); do
MIGRATION_VERSION=$(basename "$migration" .sql | sed 's/[^0-9]//g')
if [ -n "$VERSION_NUM" ] && [ "$MIGRATION_VERSION" -gt "$VERSION_NUM" ]; then
echo "Skipping migration $migration (version $MIGRATION_VERSION > $VERSION_NUM)"
continue
fi
echo "Applying migration: $migration"
psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" -f "$migration"
done
fi
# Record schema version in metadata table
psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL
CREATE TABLE IF NOT EXISTS _schema_metadata (
key TEXT PRIMARY KEY,
value TEXT NOT NULL,
updated_at TIMESTAMPTZ DEFAULT NOW()
);
INSERT INTO _schema_metadata (key, value)
VALUES
('module', '${STELLAOPS_MODULE}'),
('schema_version', '${STELLAOPS_SCHEMA_VERSION}'),
('initialized_at', NOW()::TEXT)
ON CONFLICT (key) DO UPDATE SET value = EXCLUDED.value, updated_at = NOW();
EOSQL
echo "Schema initialization complete for ${STELLAOPS_MODULE} version ${STELLAOPS_SCHEMA_VERSION}"