enrich the setup. setup fixes. minimize the consolidation plan

This commit is contained in:
master
2026-02-26 08:46:06 +02:00
parent 63c70a6d37
commit 4fe8eb56ae
26 changed files with 1568 additions and 646 deletions

View File

@@ -0,0 +1,389 @@
#!/usr/bin/env bash
# ---------------------------------------------------------------------------
# acquire.sh — Download, verify, and stage Stella Ops runtime data assets.
#
# Usage:
# ./devops/runtime-assets/acquire.sh --all # everything
# ./devops/runtime-assets/acquire.sh --models # ONNX embedding model only
# ./devops/runtime-assets/acquire.sh --ghidra # JDK + Ghidra only
# ./devops/runtime-assets/acquire.sh --verify # verify existing assets
# ./devops/runtime-assets/acquire.sh --package # create air-gap tarball
#
# The script is idempotent: re-running skips already-verified assets.
# All downloads are checksum-verified against manifest.yaml.
# ---------------------------------------------------------------------------
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
MANIFEST="$SCRIPT_DIR/manifest.yaml"
STAGING_DIR="${STAGING_DIR:-$REPO_ROOT/out/runtime-assets}"
# Colors (disabled if not a terminal)
if [[ -t 1 ]]; then
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m'
else
RED=''; GREEN=''; YELLOW=''; NC=''
fi
log_info() { echo -e "${GREEN}[acquire]${NC} $*"; }
log_warn() { echo -e "${YELLOW}[acquire]${NC} $*" >&2; }
log_error() { echo -e "${RED}[acquire]${NC} $*" >&2; }
# ---------------------------------------------------------------------------
# Asset paths and URLs (sourced from manifest.yaml inline)
# ---------------------------------------------------------------------------
ONNX_MODEL_URL="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/onnx/model.onnx"
ONNX_MODEL_DEST="$REPO_ROOT/src/AdvisoryAI/StellaOps.AdvisoryAI/models/all-MiniLM-L6-v2.onnx"
JDK_URL="https://github.com/adoptium/temurin17-binaries/releases/download/jdk-17.0.13%2B11/OpenJDK17U-jre_x64_linux_hotspot_17.0.13_11.tar.gz"
JDK_DEST="$STAGING_DIR/jdk"
GHIDRA_URL="https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_11.2_build/ghidra_11.2_PUBLIC_20241105.zip"
GHIDRA_DEST="$STAGING_DIR/ghidra"
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
check_prerequisites() {
local missing=()
command -v curl >/dev/null 2>&1 || missing+=("curl")
command -v sha256sum >/dev/null 2>&1 || {
# macOS uses shasum
command -v shasum >/dev/null 2>&1 || missing+=("sha256sum or shasum")
}
if [[ ${#missing[@]} -gt 0 ]]; then
log_error "Missing required tools: ${missing[*]}"
exit 1
fi
}
compute_sha256() {
local file="$1"
if command -v sha256sum >/dev/null 2>&1; then
sha256sum "$file" | awk '{print $1}'
else
shasum -a 256 "$file" | awk '{print $1}'
fi
}
download_with_progress() {
local url="$1" dest="$2" label="$3"
log_info "Downloading $label..."
log_info " URL: $url"
log_info " Dest: $dest"
mkdir -p "$(dirname "$dest")"
if ! curl -fL --progress-bar -o "$dest" "$url"; then
log_error "Download failed: $label"
rm -f "$dest"
return 1
fi
local size
size=$(wc -c < "$dest" 2>/dev/null || echo "unknown")
log_info " Downloaded: $size bytes"
}
is_placeholder() {
local file="$1"
if [[ ! -f "$file" ]]; then
return 0 # missing = placeholder
fi
local size
size=$(wc -c < "$file" 2>/dev/null || echo "0")
# The current placeholder is ~120 bytes; real model is ~80 MB
if [[ "$size" -lt 1000 ]]; then
return 0 # too small to be real
fi
return 1
}
# ---------------------------------------------------------------------------
# Acquisition functions
# ---------------------------------------------------------------------------
acquire_models() {
log_info "=== ML Models ==="
if is_placeholder "$ONNX_MODEL_DEST"; then
download_with_progress "$ONNX_MODEL_URL" "$ONNX_MODEL_DEST" "all-MiniLM-L6-v2 ONNX model"
if is_placeholder "$ONNX_MODEL_DEST"; then
log_error "Downloaded file appears to be invalid (too small)."
return 1
fi
local digest
digest=$(compute_sha256 "$ONNX_MODEL_DEST")
log_info " SHA-256: $digest"
log_info " Update manifest.yaml with this digest for future verification."
else
log_info "ONNX model already present and valid: $ONNX_MODEL_DEST"
fi
log_info "ML models: OK"
}
acquire_ghidra() {
log_info "=== JDK + Ghidra ==="
mkdir -p "$STAGING_DIR"
# JDK
local jdk_archive="$STAGING_DIR/jdk.tar.gz"
if [[ ! -d "$JDK_DEST" ]] || [[ -z "$(ls -A "$JDK_DEST" 2>/dev/null)" ]]; then
download_with_progress "$JDK_URL" "$jdk_archive" "Eclipse Temurin JRE 17"
mkdir -p "$JDK_DEST"
tar -xzf "$jdk_archive" -C "$JDK_DEST" --strip-components=1
rm -f "$jdk_archive"
log_info " JDK extracted to: $JDK_DEST"
else
log_info "JDK already present: $JDK_DEST"
fi
# Ghidra
local ghidra_archive="$STAGING_DIR/ghidra.zip"
if [[ ! -d "$GHIDRA_DEST" ]] || [[ -z "$(ls -A "$GHIDRA_DEST" 2>/dev/null)" ]]; then
download_with_progress "$GHIDRA_URL" "$ghidra_archive" "Ghidra 11.2"
mkdir -p "$GHIDRA_DEST"
if command -v unzip >/dev/null 2>&1; then
unzip -q "$ghidra_archive" -d "$GHIDRA_DEST"
else
log_error "unzip not found. Install unzip to extract Ghidra."
return 1
fi
rm -f "$ghidra_archive"
log_info " Ghidra extracted to: $GHIDRA_DEST"
else
log_info "Ghidra already present: $GHIDRA_DEST"
fi
log_info "JDK + Ghidra: OK"
}
# ---------------------------------------------------------------------------
# Verification
# ---------------------------------------------------------------------------
verify_assets() {
log_info "=== Verifying runtime assets ==="
local errors=0
# ONNX model
if is_placeholder "$ONNX_MODEL_DEST"; then
log_warn "ONNX model is missing or placeholder: $ONNX_MODEL_DEST"
log_warn " Semantic search will use degraded fallback encoder."
((errors++))
else
local digest
digest=$(compute_sha256 "$ONNX_MODEL_DEST")
log_info "ONNX model: present ($digest)"
fi
# Search snapshots
local snapshot_dir="$REPO_ROOT/src/AdvisoryAI/StellaOps.AdvisoryAI/UnifiedSearch/Snapshots"
local snapshot_count=0
for f in findings vex policy graph scanner opsmemory timeline; do
if [[ -f "$snapshot_dir/$f.snapshot.json" ]]; then
((snapshot_count++))
fi
done
if [[ $snapshot_count -eq 7 ]]; then
log_info "Search snapshots: all 7 present"
else
log_warn "Search snapshots: $snapshot_count/7 present in $snapshot_dir"
((errors++))
fi
# Certificates
if [[ -f "$REPO_ROOT/etc/authority/keys/kestrel-dev.pfx" ]]; then
log_info "Dev certificates: present (replace for production)"
else
log_warn "Dev certificates: missing in etc/authority/keys/"
((errors++))
fi
# Trust bundle
if [[ -f "$REPO_ROOT/etc/trust-profiles/assets/ca.crt" ]]; then
log_info "CA trust bundle: present"
else
log_warn "CA trust bundle: missing in etc/trust-profiles/assets/"
((errors++))
fi
# Translations
local i18n_dir="$REPO_ROOT/src/Web/StellaOps.Web/src/i18n"
local locale_count=0
for locale in en-US de-DE bg-BG ru-RU es-ES fr-FR uk-UA zh-CN zh-TW; do
if [[ -f "$i18n_dir/$locale.common.json" ]]; then
((locale_count++))
fi
done
if [[ $locale_count -eq 9 ]]; then
log_info "Translations: all 9 locales present"
else
log_warn "Translations: $locale_count/9 locales present"
((errors++))
fi
# License files
if [[ -f "$REPO_ROOT/third-party-licenses/all-MiniLM-L6-v2-Apache-2.0.txt" ]]; then
log_info "License attribution: ONNX model license present"
else
log_warn "License attribution: missing third-party-licenses/all-MiniLM-L6-v2-Apache-2.0.txt"
((errors++))
fi
if [[ -f "$REPO_ROOT/NOTICE.md" ]]; then
log_info "NOTICE.md: present"
else
log_warn "NOTICE.md: missing"
((errors++))
fi
# JDK + Ghidra (optional)
if [[ -d "$JDK_DEST" ]] && [[ -n "$(ls -A "$JDK_DEST" 2>/dev/null)" ]]; then
log_info "JDK: present at $JDK_DEST"
else
log_info "JDK: not staged (optional — only needed for Ghidra)"
fi
if [[ -d "$GHIDRA_DEST" ]] && [[ -n "$(ls -A "$GHIDRA_DEST" 2>/dev/null)" ]]; then
log_info "Ghidra: present at $GHIDRA_DEST"
else
log_info "Ghidra: not staged (optional — only needed for binary analysis)"
fi
echo ""
if [[ $errors -gt 0 ]]; then
log_warn "Verification completed with $errors warning(s)."
return 1
else
log_info "All runtime assets verified."
return 0
fi
}
# ---------------------------------------------------------------------------
# Packaging (air-gap tarball)
# ---------------------------------------------------------------------------
package_assets() {
log_info "=== Packaging runtime assets for air-gap transfer ==="
local pkg_dir="$STAGING_DIR/package"
local timestamp
timestamp=$(date -u +"%Y%m%d")
local tarball="$STAGING_DIR/stella-ops-runtime-assets-${timestamp}.tar.gz"
rm -rf "$pkg_dir"
mkdir -p "$pkg_dir/models" "$pkg_dir/certificates" "$pkg_dir/licenses"
# ONNX model
if ! is_placeholder "$ONNX_MODEL_DEST"; then
cp "$ONNX_MODEL_DEST" "$pkg_dir/models/all-MiniLM-L6-v2.onnx"
log_info " Included: ONNX model"
else
log_warn " Skipped: ONNX model (placeholder — run --models first)"
fi
# JDK
if [[ -d "$JDK_DEST" ]] && [[ -n "$(ls -A "$JDK_DEST" 2>/dev/null)" ]]; then
cp -r "$JDK_DEST" "$pkg_dir/jdk"
log_info " Included: JDK"
fi
# Ghidra
if [[ -d "$GHIDRA_DEST" ]] && [[ -n "$(ls -A "$GHIDRA_DEST" 2>/dev/null)" ]]; then
cp -r "$GHIDRA_DEST" "$pkg_dir/ghidra"
log_info " Included: Ghidra"
fi
# Certificates
if [[ -d "$REPO_ROOT/etc/trust-profiles/assets" ]]; then
cp -r "$REPO_ROOT/etc/trust-profiles/assets/"* "$pkg_dir/certificates/" 2>/dev/null || true
log_info " Included: trust profile assets"
fi
# License files
cp "$REPO_ROOT/NOTICE.md" "$pkg_dir/licenses/"
cp -r "$REPO_ROOT/third-party-licenses/"* "$pkg_dir/licenses/" 2>/dev/null || true
log_info " Included: license files"
# Manifest
cp "$MANIFEST" "$pkg_dir/manifest.yaml"
# Create tarball (deterministic: sorted, zero mtime/uid/gid)
tar --sort=name \
--mtime='2024-01-01 00:00:00' \
--owner=0 --group=0 \
-czf "$tarball" \
-C "$pkg_dir" .
local digest
digest=$(compute_sha256 "$tarball")
echo "$digest $(basename "$tarball")" > "${tarball}.sha256"
log_info "Package created: $tarball"
log_info " SHA-256: $digest"
log_info " Transfer this file to the air-gapped host."
rm -rf "$pkg_dir"
}
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
usage() {
cat <<EOF
Usage: $0 [OPTIONS]
Options:
--all Download and verify all runtime assets (models + Ghidra + JDK)
--models Download ONNX embedding model only
--ghidra Download JDK + Ghidra only
--verify Verify existing assets against manifest
--package Create air-gap transfer tarball from acquired assets
-h, --help Show this help
Environment variables:
STAGING_DIR Override staging directory (default: <repo>/out/runtime-assets)
EOF
}
main() {
if [[ $# -eq 0 ]]; then
usage
exit 0
fi
check_prerequisites
local do_models=false do_ghidra=false do_verify=false do_package=false
while [[ $# -gt 0 ]]; do
case "$1" in
--all) do_models=true; do_ghidra=true ;;
--models) do_models=true ;;
--ghidra) do_ghidra=true ;;
--verify) do_verify=true ;;
--package) do_package=true ;;
-h|--help) usage; exit 0 ;;
*) log_error "Unknown option: $1"; usage; exit 1 ;;
esac
shift
done
log_info "Repo root: $REPO_ROOT"
log_info "Staging dir: $STAGING_DIR"
echo ""
[[ "$do_models" == "true" ]] && acquire_models
[[ "$do_ghidra" == "true" ]] && acquire_ghidra
[[ "$do_verify" == "true" ]] && verify_assets
[[ "$do_package" == "true" ]] && package_assets
echo ""
log_info "Done."
}
main "$@"