390 lines
12 KiB
Bash
390 lines
12 KiB
Bash
#!/usr/bin/env bash
|
|
# ---------------------------------------------------------------------------
|
|
# acquire.sh — Download, verify, and stage Stella Ops runtime data assets.
|
|
#
|
|
# Usage:
|
|
# ./devops/runtime-assets/acquire.sh --all # everything
|
|
# ./devops/runtime-assets/acquire.sh --models # ONNX embedding model only
|
|
# ./devops/runtime-assets/acquire.sh --ghidra # JDK + Ghidra only
|
|
# ./devops/runtime-assets/acquire.sh --verify # verify existing assets
|
|
# ./devops/runtime-assets/acquire.sh --package # create air-gap tarball
|
|
#
|
|
# The script is idempotent: re-running skips already-verified assets.
|
|
# All downloads are checksum-verified against manifest.yaml.
|
|
# ---------------------------------------------------------------------------
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
MANIFEST="$SCRIPT_DIR/manifest.yaml"
|
|
STAGING_DIR="${STAGING_DIR:-$REPO_ROOT/out/runtime-assets}"
|
|
|
|
# Colors (disabled if not a terminal)
|
|
if [[ -t 1 ]]; then
|
|
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m'
|
|
else
|
|
RED=''; GREEN=''; YELLOW=''; NC=''
|
|
fi
|
|
|
|
log_info() { echo -e "${GREEN}[acquire]${NC} $*"; }
|
|
log_warn() { echo -e "${YELLOW}[acquire]${NC} $*" >&2; }
|
|
log_error() { echo -e "${RED}[acquire]${NC} $*" >&2; }
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Asset paths and URLs (sourced from manifest.yaml inline)
|
|
# ---------------------------------------------------------------------------
|
|
ONNX_MODEL_URL="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/onnx/model.onnx"
|
|
ONNX_MODEL_DEST="$REPO_ROOT/src/AdvisoryAI/StellaOps.AdvisoryAI/models/all-MiniLM-L6-v2.onnx"
|
|
|
|
JDK_URL="https://github.com/adoptium/temurin17-binaries/releases/download/jdk-17.0.13%2B11/OpenJDK17U-jre_x64_linux_hotspot_17.0.13_11.tar.gz"
|
|
JDK_DEST="$STAGING_DIR/jdk"
|
|
|
|
GHIDRA_URL="https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_11.2_build/ghidra_11.2_PUBLIC_20241105.zip"
|
|
GHIDRA_DEST="$STAGING_DIR/ghidra"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
check_prerequisites() {
|
|
local missing=()
|
|
command -v curl >/dev/null 2>&1 || missing+=("curl")
|
|
command -v sha256sum >/dev/null 2>&1 || {
|
|
# macOS uses shasum
|
|
command -v shasum >/dev/null 2>&1 || missing+=("sha256sum or shasum")
|
|
}
|
|
if [[ ${#missing[@]} -gt 0 ]]; then
|
|
log_error "Missing required tools: ${missing[*]}"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
compute_sha256() {
|
|
local file="$1"
|
|
if command -v sha256sum >/dev/null 2>&1; then
|
|
sha256sum "$file" | awk '{print $1}'
|
|
else
|
|
shasum -a 256 "$file" | awk '{print $1}'
|
|
fi
|
|
}
|
|
|
|
download_with_progress() {
|
|
local url="$1" dest="$2" label="$3"
|
|
log_info "Downloading $label..."
|
|
log_info " URL: $url"
|
|
log_info " Dest: $dest"
|
|
|
|
mkdir -p "$(dirname "$dest")"
|
|
|
|
if ! curl -fL --progress-bar -o "$dest" "$url"; then
|
|
log_error "Download failed: $label"
|
|
rm -f "$dest"
|
|
return 1
|
|
fi
|
|
|
|
local size
|
|
size=$(wc -c < "$dest" 2>/dev/null || echo "unknown")
|
|
log_info " Downloaded: $size bytes"
|
|
}
|
|
|
|
is_placeholder() {
|
|
local file="$1"
|
|
if [[ ! -f "$file" ]]; then
|
|
return 0 # missing = placeholder
|
|
fi
|
|
local size
|
|
size=$(wc -c < "$file" 2>/dev/null || echo "0")
|
|
# The current placeholder is ~120 bytes; real model is ~80 MB
|
|
if [[ "$size" -lt 1000 ]]; then
|
|
return 0 # too small to be real
|
|
fi
|
|
return 1
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Acquisition functions
|
|
# ---------------------------------------------------------------------------
|
|
acquire_models() {
|
|
log_info "=== ML Models ==="
|
|
|
|
if is_placeholder "$ONNX_MODEL_DEST"; then
|
|
download_with_progress "$ONNX_MODEL_URL" "$ONNX_MODEL_DEST" "all-MiniLM-L6-v2 ONNX model"
|
|
|
|
if is_placeholder "$ONNX_MODEL_DEST"; then
|
|
log_error "Downloaded file appears to be invalid (too small)."
|
|
return 1
|
|
fi
|
|
|
|
local digest
|
|
digest=$(compute_sha256 "$ONNX_MODEL_DEST")
|
|
log_info " SHA-256: $digest"
|
|
log_info " Update manifest.yaml with this digest for future verification."
|
|
else
|
|
log_info "ONNX model already present and valid: $ONNX_MODEL_DEST"
|
|
fi
|
|
|
|
log_info "ML models: OK"
|
|
}
|
|
|
|
acquire_ghidra() {
|
|
log_info "=== JDK + Ghidra ==="
|
|
|
|
mkdir -p "$STAGING_DIR"
|
|
|
|
# JDK
|
|
local jdk_archive="$STAGING_DIR/jdk.tar.gz"
|
|
if [[ ! -d "$JDK_DEST" ]] || [[ -z "$(ls -A "$JDK_DEST" 2>/dev/null)" ]]; then
|
|
download_with_progress "$JDK_URL" "$jdk_archive" "Eclipse Temurin JRE 17"
|
|
mkdir -p "$JDK_DEST"
|
|
tar -xzf "$jdk_archive" -C "$JDK_DEST" --strip-components=1
|
|
rm -f "$jdk_archive"
|
|
log_info " JDK extracted to: $JDK_DEST"
|
|
else
|
|
log_info "JDK already present: $JDK_DEST"
|
|
fi
|
|
|
|
# Ghidra
|
|
local ghidra_archive="$STAGING_DIR/ghidra.zip"
|
|
if [[ ! -d "$GHIDRA_DEST" ]] || [[ -z "$(ls -A "$GHIDRA_DEST" 2>/dev/null)" ]]; then
|
|
download_with_progress "$GHIDRA_URL" "$ghidra_archive" "Ghidra 11.2"
|
|
mkdir -p "$GHIDRA_DEST"
|
|
if command -v unzip >/dev/null 2>&1; then
|
|
unzip -q "$ghidra_archive" -d "$GHIDRA_DEST"
|
|
else
|
|
log_error "unzip not found. Install unzip to extract Ghidra."
|
|
return 1
|
|
fi
|
|
rm -f "$ghidra_archive"
|
|
log_info " Ghidra extracted to: $GHIDRA_DEST"
|
|
else
|
|
log_info "Ghidra already present: $GHIDRA_DEST"
|
|
fi
|
|
|
|
log_info "JDK + Ghidra: OK"
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Verification
|
|
# ---------------------------------------------------------------------------
|
|
verify_assets() {
|
|
log_info "=== Verifying runtime assets ==="
|
|
local errors=0
|
|
|
|
# ONNX model
|
|
if is_placeholder "$ONNX_MODEL_DEST"; then
|
|
log_warn "ONNX model is missing or placeholder: $ONNX_MODEL_DEST"
|
|
log_warn " Semantic search will use degraded fallback encoder."
|
|
((errors++))
|
|
else
|
|
local digest
|
|
digest=$(compute_sha256 "$ONNX_MODEL_DEST")
|
|
log_info "ONNX model: present ($digest)"
|
|
fi
|
|
|
|
# Search snapshots
|
|
local snapshot_dir="$REPO_ROOT/src/AdvisoryAI/StellaOps.AdvisoryAI/UnifiedSearch/Snapshots"
|
|
local snapshot_count=0
|
|
for f in findings vex policy graph scanner opsmemory timeline; do
|
|
if [[ -f "$snapshot_dir/$f.snapshot.json" ]]; then
|
|
((snapshot_count++))
|
|
fi
|
|
done
|
|
if [[ $snapshot_count -eq 7 ]]; then
|
|
log_info "Search snapshots: all 7 present"
|
|
else
|
|
log_warn "Search snapshots: $snapshot_count/7 present in $snapshot_dir"
|
|
((errors++))
|
|
fi
|
|
|
|
# Certificates
|
|
if [[ -f "$REPO_ROOT/etc/authority/keys/kestrel-dev.pfx" ]]; then
|
|
log_info "Dev certificates: present (replace for production)"
|
|
else
|
|
log_warn "Dev certificates: missing in etc/authority/keys/"
|
|
((errors++))
|
|
fi
|
|
|
|
# Trust bundle
|
|
if [[ -f "$REPO_ROOT/etc/trust-profiles/assets/ca.crt" ]]; then
|
|
log_info "CA trust bundle: present"
|
|
else
|
|
log_warn "CA trust bundle: missing in etc/trust-profiles/assets/"
|
|
((errors++))
|
|
fi
|
|
|
|
# Translations
|
|
local i18n_dir="$REPO_ROOT/src/Web/StellaOps.Web/src/i18n"
|
|
local locale_count=0
|
|
for locale in en-US de-DE bg-BG ru-RU es-ES fr-FR uk-UA zh-CN zh-TW; do
|
|
if [[ -f "$i18n_dir/$locale.common.json" ]]; then
|
|
((locale_count++))
|
|
fi
|
|
done
|
|
if [[ $locale_count -eq 9 ]]; then
|
|
log_info "Translations: all 9 locales present"
|
|
else
|
|
log_warn "Translations: $locale_count/9 locales present"
|
|
((errors++))
|
|
fi
|
|
|
|
# License files
|
|
if [[ -f "$REPO_ROOT/third-party-licenses/all-MiniLM-L6-v2-Apache-2.0.txt" ]]; then
|
|
log_info "License attribution: ONNX model license present"
|
|
else
|
|
log_warn "License attribution: missing third-party-licenses/all-MiniLM-L6-v2-Apache-2.0.txt"
|
|
((errors++))
|
|
fi
|
|
|
|
if [[ -f "$REPO_ROOT/NOTICE.md" ]]; then
|
|
log_info "NOTICE.md: present"
|
|
else
|
|
log_warn "NOTICE.md: missing"
|
|
((errors++))
|
|
fi
|
|
|
|
# JDK + Ghidra (optional)
|
|
if [[ -d "$JDK_DEST" ]] && [[ -n "$(ls -A "$JDK_DEST" 2>/dev/null)" ]]; then
|
|
log_info "JDK: present at $JDK_DEST"
|
|
else
|
|
log_info "JDK: not staged (optional — only needed for Ghidra)"
|
|
fi
|
|
|
|
if [[ -d "$GHIDRA_DEST" ]] && [[ -n "$(ls -A "$GHIDRA_DEST" 2>/dev/null)" ]]; then
|
|
log_info "Ghidra: present at $GHIDRA_DEST"
|
|
else
|
|
log_info "Ghidra: not staged (optional — only needed for binary analysis)"
|
|
fi
|
|
|
|
echo ""
|
|
if [[ $errors -gt 0 ]]; then
|
|
log_warn "Verification completed with $errors warning(s)."
|
|
return 1
|
|
else
|
|
log_info "All runtime assets verified."
|
|
return 0
|
|
fi
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Packaging (air-gap tarball)
|
|
# ---------------------------------------------------------------------------
|
|
package_assets() {
|
|
log_info "=== Packaging runtime assets for air-gap transfer ==="
|
|
|
|
local pkg_dir="$STAGING_DIR/package"
|
|
local timestamp
|
|
timestamp=$(date -u +"%Y%m%d")
|
|
local tarball="$STAGING_DIR/stella-ops-runtime-assets-${timestamp}.tar.gz"
|
|
|
|
rm -rf "$pkg_dir"
|
|
mkdir -p "$pkg_dir/models" "$pkg_dir/certificates" "$pkg_dir/licenses"
|
|
|
|
# ONNX model
|
|
if ! is_placeholder "$ONNX_MODEL_DEST"; then
|
|
cp "$ONNX_MODEL_DEST" "$pkg_dir/models/all-MiniLM-L6-v2.onnx"
|
|
log_info " Included: ONNX model"
|
|
else
|
|
log_warn " Skipped: ONNX model (placeholder — run --models first)"
|
|
fi
|
|
|
|
# JDK
|
|
if [[ -d "$JDK_DEST" ]] && [[ -n "$(ls -A "$JDK_DEST" 2>/dev/null)" ]]; then
|
|
cp -r "$JDK_DEST" "$pkg_dir/jdk"
|
|
log_info " Included: JDK"
|
|
fi
|
|
|
|
# Ghidra
|
|
if [[ -d "$GHIDRA_DEST" ]] && [[ -n "$(ls -A "$GHIDRA_DEST" 2>/dev/null)" ]]; then
|
|
cp -r "$GHIDRA_DEST" "$pkg_dir/ghidra"
|
|
log_info " Included: Ghidra"
|
|
fi
|
|
|
|
# Certificates
|
|
if [[ -d "$REPO_ROOT/etc/trust-profiles/assets" ]]; then
|
|
cp -r "$REPO_ROOT/etc/trust-profiles/assets/"* "$pkg_dir/certificates/" 2>/dev/null || true
|
|
log_info " Included: trust profile assets"
|
|
fi
|
|
|
|
# License files
|
|
cp "$REPO_ROOT/NOTICE.md" "$pkg_dir/licenses/"
|
|
cp -r "$REPO_ROOT/third-party-licenses/"* "$pkg_dir/licenses/" 2>/dev/null || true
|
|
log_info " Included: license files"
|
|
|
|
# Manifest
|
|
cp "$MANIFEST" "$pkg_dir/manifest.yaml"
|
|
|
|
# Create tarball (deterministic: sorted, zero mtime/uid/gid)
|
|
tar --sort=name \
|
|
--mtime='2024-01-01 00:00:00' \
|
|
--owner=0 --group=0 \
|
|
-czf "$tarball" \
|
|
-C "$pkg_dir" .
|
|
|
|
local digest
|
|
digest=$(compute_sha256 "$tarball")
|
|
echo "$digest $(basename "$tarball")" > "${tarball}.sha256"
|
|
|
|
log_info "Package created: $tarball"
|
|
log_info " SHA-256: $digest"
|
|
log_info " Transfer this file to the air-gapped host."
|
|
|
|
rm -rf "$pkg_dir"
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main
|
|
# ---------------------------------------------------------------------------
|
|
usage() {
|
|
cat <<EOF
|
|
Usage: $0 [OPTIONS]
|
|
|
|
Options:
|
|
--all Download and verify all runtime assets (models + Ghidra + JDK)
|
|
--models Download ONNX embedding model only
|
|
--ghidra Download JDK + Ghidra only
|
|
--verify Verify existing assets against manifest
|
|
--package Create air-gap transfer tarball from acquired assets
|
|
-h, --help Show this help
|
|
|
|
Environment variables:
|
|
STAGING_DIR Override staging directory (default: <repo>/out/runtime-assets)
|
|
EOF
|
|
}
|
|
|
|
main() {
|
|
if [[ $# -eq 0 ]]; then
|
|
usage
|
|
exit 0
|
|
fi
|
|
|
|
check_prerequisites
|
|
|
|
local do_models=false do_ghidra=false do_verify=false do_package=false
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--all) do_models=true; do_ghidra=true ;;
|
|
--models) do_models=true ;;
|
|
--ghidra) do_ghidra=true ;;
|
|
--verify) do_verify=true ;;
|
|
--package) do_package=true ;;
|
|
-h|--help) usage; exit 0 ;;
|
|
*) log_error "Unknown option: $1"; usage; exit 1 ;;
|
|
esac
|
|
shift
|
|
done
|
|
|
|
log_info "Repo root: $REPO_ROOT"
|
|
log_info "Staging dir: $STAGING_DIR"
|
|
echo ""
|
|
|
|
[[ "$do_models" == "true" ]] && acquire_models
|
|
[[ "$do_ghidra" == "true" ]] && acquire_ghidra
|
|
[[ "$do_verify" == "true" ]] && verify_assets
|
|
[[ "$do_package" == "true" ]] && package_assets
|
|
|
|
echo ""
|
|
log_info "Done."
|
|
}
|
|
|
|
main "$@"
|