#!/usr/bin/env bash # --------------------------------------------------------------------------- # acquire.sh — Download, verify, and stage Stella Ops runtime data assets. # # Usage: # ./devops/runtime-assets/acquire.sh --all # everything # ./devops/runtime-assets/acquire.sh --models # ONNX embedding model only # ./devops/runtime-assets/acquire.sh --ghidra # JDK + Ghidra only # ./devops/runtime-assets/acquire.sh --verify # verify existing assets # ./devops/runtime-assets/acquire.sh --package # create air-gap tarball # # The script is idempotent: re-running skips already-verified assets. # All downloads are checksum-verified against manifest.yaml. # --------------------------------------------------------------------------- set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" MANIFEST="$SCRIPT_DIR/manifest.yaml" STAGING_DIR="${STAGING_DIR:-$REPO_ROOT/out/runtime-assets}" # Colors (disabled if not a terminal) if [[ -t 1 ]]; then RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m' else RED=''; GREEN=''; YELLOW=''; NC='' fi log_info() { echo -e "${GREEN}[acquire]${NC} $*"; } log_warn() { echo -e "${YELLOW}[acquire]${NC} $*" >&2; } log_error() { echo -e "${RED}[acquire]${NC} $*" >&2; } # --------------------------------------------------------------------------- # Asset paths and URLs (sourced from manifest.yaml inline) # --------------------------------------------------------------------------- ONNX_MODEL_URL="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/onnx/model.onnx" ONNX_MODEL_DEST="$REPO_ROOT/src/AdvisoryAI/StellaOps.AdvisoryAI/models/all-MiniLM-L6-v2.onnx" JDK_URL="https://github.com/adoptium/temurin17-binaries/releases/download/jdk-17.0.13%2B11/OpenJDK17U-jre_x64_linux_hotspot_17.0.13_11.tar.gz" JDK_DEST="$STAGING_DIR/jdk" GHIDRA_URL="https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_11.2_build/ghidra_11.2_PUBLIC_20241105.zip" GHIDRA_DEST="$STAGING_DIR/ghidra" # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- check_prerequisites() { local missing=() command -v curl >/dev/null 2>&1 || missing+=("curl") command -v sha256sum >/dev/null 2>&1 || { # macOS uses shasum command -v shasum >/dev/null 2>&1 || missing+=("sha256sum or shasum") } if [[ ${#missing[@]} -gt 0 ]]; then log_error "Missing required tools: ${missing[*]}" exit 1 fi } compute_sha256() { local file="$1" if command -v sha256sum >/dev/null 2>&1; then sha256sum "$file" | awk '{print $1}' else shasum -a 256 "$file" | awk '{print $1}' fi } download_with_progress() { local url="$1" dest="$2" label="$3" log_info "Downloading $label..." log_info " URL: $url" log_info " Dest: $dest" mkdir -p "$(dirname "$dest")" if ! curl -fL --progress-bar -o "$dest" "$url"; then log_error "Download failed: $label" rm -f "$dest" return 1 fi local size size=$(wc -c < "$dest" 2>/dev/null || echo "unknown") log_info " Downloaded: $size bytes" } is_placeholder() { local file="$1" if [[ ! -f "$file" ]]; then return 0 # missing = placeholder fi local size size=$(wc -c < "$file" 2>/dev/null || echo "0") # The current placeholder is ~120 bytes; real model is ~80 MB if [[ "$size" -lt 1000 ]]; then return 0 # too small to be real fi return 1 } # --------------------------------------------------------------------------- # Acquisition functions # --------------------------------------------------------------------------- acquire_models() { log_info "=== ML Models ===" if is_placeholder "$ONNX_MODEL_DEST"; then download_with_progress "$ONNX_MODEL_URL" "$ONNX_MODEL_DEST" "all-MiniLM-L6-v2 ONNX model" if is_placeholder "$ONNX_MODEL_DEST"; then log_error "Downloaded file appears to be invalid (too small)." return 1 fi local digest digest=$(compute_sha256 "$ONNX_MODEL_DEST") log_info " SHA-256: $digest" log_info " Update manifest.yaml with this digest for future verification." else log_info "ONNX model already present and valid: $ONNX_MODEL_DEST" fi log_info "ML models: OK" } acquire_ghidra() { log_info "=== JDK + Ghidra ===" mkdir -p "$STAGING_DIR" # JDK local jdk_archive="$STAGING_DIR/jdk.tar.gz" if [[ ! -d "$JDK_DEST" ]] || [[ -z "$(ls -A "$JDK_DEST" 2>/dev/null)" ]]; then download_with_progress "$JDK_URL" "$jdk_archive" "Eclipse Temurin JRE 17" mkdir -p "$JDK_DEST" tar -xzf "$jdk_archive" -C "$JDK_DEST" --strip-components=1 rm -f "$jdk_archive" log_info " JDK extracted to: $JDK_DEST" else log_info "JDK already present: $JDK_DEST" fi # Ghidra local ghidra_archive="$STAGING_DIR/ghidra.zip" if [[ ! -d "$GHIDRA_DEST" ]] || [[ -z "$(ls -A "$GHIDRA_DEST" 2>/dev/null)" ]]; then download_with_progress "$GHIDRA_URL" "$ghidra_archive" "Ghidra 11.2" mkdir -p "$GHIDRA_DEST" if command -v unzip >/dev/null 2>&1; then unzip -q "$ghidra_archive" -d "$GHIDRA_DEST" else log_error "unzip not found. Install unzip to extract Ghidra." return 1 fi rm -f "$ghidra_archive" log_info " Ghidra extracted to: $GHIDRA_DEST" else log_info "Ghidra already present: $GHIDRA_DEST" fi log_info "JDK + Ghidra: OK" } # --------------------------------------------------------------------------- # Verification # --------------------------------------------------------------------------- verify_assets() { log_info "=== Verifying runtime assets ===" local errors=0 # ONNX model if is_placeholder "$ONNX_MODEL_DEST"; then log_warn "ONNX model is missing or placeholder: $ONNX_MODEL_DEST" log_warn " Semantic search will use degraded fallback encoder." ((errors++)) else local digest digest=$(compute_sha256 "$ONNX_MODEL_DEST") log_info "ONNX model: present ($digest)" fi # Search snapshots local snapshot_dir="$REPO_ROOT/src/AdvisoryAI/StellaOps.AdvisoryAI/UnifiedSearch/Snapshots" local snapshot_count=0 for f in findings vex policy graph scanner opsmemory timeline; do if [[ -f "$snapshot_dir/$f.snapshot.json" ]]; then ((snapshot_count++)) fi done if [[ $snapshot_count -eq 7 ]]; then log_info "Search snapshots: all 7 present" else log_warn "Search snapshots: $snapshot_count/7 present in $snapshot_dir" ((errors++)) fi # Certificates if [[ -f "$REPO_ROOT/etc/authority/keys/kestrel-dev.pfx" ]]; then log_info "Dev certificates: present (replace for production)" else log_warn "Dev certificates: missing in etc/authority/keys/" ((errors++)) fi # Trust bundle if [[ -f "$REPO_ROOT/etc/trust-profiles/assets/ca.crt" ]]; then log_info "CA trust bundle: present" else log_warn "CA trust bundle: missing in etc/trust-profiles/assets/" ((errors++)) fi # Translations local i18n_dir="$REPO_ROOT/src/Web/StellaOps.Web/src/i18n" local locale_count=0 for locale in en-US de-DE bg-BG ru-RU es-ES fr-FR uk-UA zh-CN zh-TW; do if [[ -f "$i18n_dir/$locale.common.json" ]]; then ((locale_count++)) fi done if [[ $locale_count -eq 9 ]]; then log_info "Translations: all 9 locales present" else log_warn "Translations: $locale_count/9 locales present" ((errors++)) fi # License files if [[ -f "$REPO_ROOT/third-party-licenses/all-MiniLM-L6-v2-Apache-2.0.txt" ]]; then log_info "License attribution: ONNX model license present" else log_warn "License attribution: missing third-party-licenses/all-MiniLM-L6-v2-Apache-2.0.txt" ((errors++)) fi if [[ -f "$REPO_ROOT/NOTICE.md" ]]; then log_info "NOTICE.md: present" else log_warn "NOTICE.md: missing" ((errors++)) fi # JDK + Ghidra (optional) if [[ -d "$JDK_DEST" ]] && [[ -n "$(ls -A "$JDK_DEST" 2>/dev/null)" ]]; then log_info "JDK: present at $JDK_DEST" else log_info "JDK: not staged (optional — only needed for Ghidra)" fi if [[ -d "$GHIDRA_DEST" ]] && [[ -n "$(ls -A "$GHIDRA_DEST" 2>/dev/null)" ]]; then log_info "Ghidra: present at $GHIDRA_DEST" else log_info "Ghidra: not staged (optional — only needed for binary analysis)" fi echo "" if [[ $errors -gt 0 ]]; then log_warn "Verification completed with $errors warning(s)." return 1 else log_info "All runtime assets verified." return 0 fi } # --------------------------------------------------------------------------- # Packaging (air-gap tarball) # --------------------------------------------------------------------------- package_assets() { log_info "=== Packaging runtime assets for air-gap transfer ===" local pkg_dir="$STAGING_DIR/package" local timestamp timestamp=$(date -u +"%Y%m%d") local tarball="$STAGING_DIR/stella-ops-runtime-assets-${timestamp}.tar.gz" rm -rf "$pkg_dir" mkdir -p "$pkg_dir/models" "$pkg_dir/certificates" "$pkg_dir/licenses" # ONNX model if ! is_placeholder "$ONNX_MODEL_DEST"; then cp "$ONNX_MODEL_DEST" "$pkg_dir/models/all-MiniLM-L6-v2.onnx" log_info " Included: ONNX model" else log_warn " Skipped: ONNX model (placeholder — run --models first)" fi # JDK if [[ -d "$JDK_DEST" ]] && [[ -n "$(ls -A "$JDK_DEST" 2>/dev/null)" ]]; then cp -r "$JDK_DEST" "$pkg_dir/jdk" log_info " Included: JDK" fi # Ghidra if [[ -d "$GHIDRA_DEST" ]] && [[ -n "$(ls -A "$GHIDRA_DEST" 2>/dev/null)" ]]; then cp -r "$GHIDRA_DEST" "$pkg_dir/ghidra" log_info " Included: Ghidra" fi # Certificates if [[ -d "$REPO_ROOT/etc/trust-profiles/assets" ]]; then cp -r "$REPO_ROOT/etc/trust-profiles/assets/"* "$pkg_dir/certificates/" 2>/dev/null || true log_info " Included: trust profile assets" fi # License files cp "$REPO_ROOT/NOTICE.md" "$pkg_dir/licenses/" cp -r "$REPO_ROOT/third-party-licenses/"* "$pkg_dir/licenses/" 2>/dev/null || true log_info " Included: license files" # Manifest cp "$MANIFEST" "$pkg_dir/manifest.yaml" # Create tarball (deterministic: sorted, zero mtime/uid/gid) tar --sort=name \ --mtime='2024-01-01 00:00:00' \ --owner=0 --group=0 \ -czf "$tarball" \ -C "$pkg_dir" . local digest digest=$(compute_sha256 "$tarball") echo "$digest $(basename "$tarball")" > "${tarball}.sha256" log_info "Package created: $tarball" log_info " SHA-256: $digest" log_info " Transfer this file to the air-gapped host." rm -rf "$pkg_dir" } # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- usage() { cat </out/runtime-assets) EOF } main() { if [[ $# -eq 0 ]]; then usage exit 0 fi check_prerequisites local do_models=false do_ghidra=false do_verify=false do_package=false while [[ $# -gt 0 ]]; do case "$1" in --all) do_models=true; do_ghidra=true ;; --models) do_models=true ;; --ghidra) do_ghidra=true ;; --verify) do_verify=true ;; --package) do_package=true ;; -h|--help) usage; exit 0 ;; *) log_error "Unknown option: $1"; usage; exit 1 ;; esac shift done log_info "Repo root: $REPO_ROOT" log_info "Staging dir: $STAGING_DIR" echo "" [[ "$do_models" == "true" ]] && acquire_models [[ "$do_ghidra" == "true" ]] && acquire_ghidra [[ "$do_verify" == "true" ]] && verify_assets [[ "$do_package" == "true" ]] && package_assets echo "" log_info "Done." } main "$@"