Files
git.stella-ops.org/scripts/setup.sh

712 lines
23 KiB
Bash

#!/usr/bin/env bash
# Automated developer environment setup for Stella Ops (Linux/macOS).
#
# Usage:
# ./scripts/setup.sh [--skip-build] [--infra-only] [--images-only] [--skip-images]
set -euo pipefail
# ─── Parse flags ────────────────────────────────────────────────────────────
SKIP_BUILD=false
INFRA_ONLY=false
IMAGES_ONLY=false
SKIP_IMAGES=false
for arg in "$@"; do
case "$arg" in
--skip-build) SKIP_BUILD=true ;;
--infra-only) INFRA_ONLY=true ;;
--images-only) IMAGES_ONLY=true ;;
--skip-images) SKIP_IMAGES=true ;;
-h|--help)
echo "Usage: $0 [--skip-build] [--infra-only] [--images-only] [--skip-images]"
exit 0
;;
*) echo "Unknown flag: $arg" >&2; exit 1 ;;
esac
done
ROOT=$(git rev-parse --show-toplevel 2>/dev/null || true)
if [[ -z "$ROOT" ]]; then
echo "ERROR: Not inside a git repository." >&2
exit 1
fi
COMPOSE_DIR="${ROOT}/devops/compose"
# ─── Helpers ────────────────────────────────────────────────────────────────
step() { printf '\n\033[1;36m>> %s\033[0m\n' "$1"; }
ok() { printf ' \033[0;32m[OK]\033[0m %s\n' "$1"; }
warn() { printf ' \033[0;33m[WARN]\033[0m %s\n' "$1"; }
fail() { printf ' \033[0;31m[FAIL]\033[0m %s\n' "$1"; }
has_cmd() { command -v "$1" &>/dev/null; }
get_running_container_by_service() {
local service="$1"
docker ps --filter "label=com.docker.compose.service=${service}" --format "{{.Names}}" 2>/dev/null | head -n1
}
service_http_probe_url() {
local service="$1"
local container_port="$2"
local path="${3:-/}"
local container mapping host host_port
container=$(get_running_container_by_service "$service")
[[ -z "$container" ]] && return 1
mapping=$(docker port "$container" "${container_port}/tcp" 2>/dev/null | head -n1)
[[ -z "$mapping" ]] && return 1
host="${mapping%:*}"
host_port="${mapping##*:}"
if [[ "$host" == "0.0.0.0" || "$host" == "::" ]]; then
host="127.0.0.1"
fi
[[ "$path" != /* ]] && path="/$path"
printf 'http://%s:%s%s' "$host" "$host_port" "$path"
}
get_compose_service_records() {
local seen_names=""
local compose_file compose_path expected_services services_json line service name state health
for compose_file in "$@"; do
if [[ "${compose_file}" = /* ]]; then
compose_path="${compose_file}"
else
compose_path="${COMPOSE_DIR}/${compose_file}"
fi
[[ -f "${compose_path}" ]] || continue
expected_services="$(docker compose -f "${compose_path}" config --services 2>/dev/null || true)"
services_json="$(docker compose -f "${compose_path}" ps --format json 2>/dev/null || true)"
[[ -n "${services_json}" ]] || continue
while IFS= read -r line; do
[[ -z "${line}" ]] && continue
service=$(echo "$line" | python3 -c "import sys,json; print(json.load(sys.stdin).get('Service',''))" 2>/dev/null || true)
name=$(echo "$line" | python3 -c "import sys,json; print(json.load(sys.stdin).get('Name',''))" 2>/dev/null || true)
state=$(echo "$line" | python3 -c "import sys,json; print(json.load(sys.stdin).get('State',''))" 2>/dev/null || true)
health=$(echo "$line" | python3 -c "import sys,json; print(json.load(sys.stdin).get('Health',''))" 2>/dev/null || true)
if [[ -n "${expected_services}" ]] && ! printf '%s\n' "${expected_services}" | grep -Fxq "${service}"; then
continue
fi
[[ -n "${name}" ]] || continue
if printf '%s\n' "${seen_names}" | grep -Fxq "${name}"; then
continue
fi
seen_names="${seen_names}
${name}"
printf '%s|%s|%s|%s|%s\n' "${compose_path}" "${service}" "${name}" "${state,,}" "${health,,}"
done <<< "${services_json}"
done
}
wait_for_compose_convergence() {
local success_message="$1"
local restart_stalled="${2:-false}"
local max_wait="${3:-180}"
local restart_after="${4:-45}"
shift 4
local compose_files=("$@")
local elapsed=0
local poll_seconds=5
local restarted_services=""
while (( elapsed < max_wait )); do
local records
records="$(get_compose_service_records "${compose_files[@]}")"
if [[ -n "${records}" ]]; then
local pending=""
local blocking=""
local record compose_file service name state health
while IFS= read -r record; do
[[ -z "${record}" ]] && continue
IFS='|' read -r compose_file service name state health <<< "${record}"
if [[ "${state}" != "running" ]]; then
blocking="${blocking}
${compose_file}|${service}|${name}|state=${state}"
continue
fi
if [[ -z "${health}" || "${health}" == "healthy" ]]; then
continue
fi
if [[ "${health}" == "starting" ]]; then
pending="${pending}
${compose_file}|${service}|${name}|health=starting"
continue
fi
blocking="${blocking}
${compose_file}|${service}|${name}|health=${health}"
done <<< "${records}"
if [[ -z "${blocking//$'\n'/}" && -z "${pending//$'\n'/}" && ${elapsed} -gt ${poll_seconds} ]]; then
ok "${success_message}"
return 0
fi
if [[ "${restart_stalled}" == "true" && ${elapsed} -ge ${restart_after} && -n "${blocking//$'\n'/}" ]]; then
local restart_targets=""
while IFS= read -r record; do
[[ -z "${record}" ]] && continue
IFS='|' read -r compose_file service _ <<< "${record}"
local restart_key="${compose_file}|${service}"
if printf '%s\n' "${restarted_services}" | grep -Fxq "${restart_key}"; then
continue
fi
restarted_services="${restarted_services}
${restart_key}"
restart_targets="${restart_targets}
${compose_file}|${service}"
done <<< "${blocking}"
local compose_to_restart unique_compose_files service_to_restart
local -a services_for_compose
unique_compose_files=$(printf '%s\n' "${restart_targets}" | awk -F'|' 'NF { print $1 }' | sort -u)
while IFS= read -r compose_to_restart; do
[[ -z "${compose_to_restart}" ]] && continue
services_for_compose=()
while IFS= read -r service_to_restart; do
[[ -z "${service_to_restart}" ]] && continue
services_for_compose+=("${service_to_restart}")
done < <(printf '%s\n' "${restart_targets}" | awk -F'|' -v cf="${compose_to_restart}" 'NF && $1 == cf { print $2 }' | sort -u)
if (( ${#services_for_compose[@]} == 0 )); then
continue
fi
warn "Restarting stalled services from ${compose_to_restart}: ${services_for_compose[*]}"
(
cd "${COMPOSE_DIR}" &&
docker compose -f "${compose_to_restart}" restart "${services_for_compose[@]}" >/dev/null
) && ok "Restarted stalled services: ${services_for_compose[*]}" || \
warn "Failed to restart stalled services: ${services_for_compose[*]}"
done <<< "${unique_compose_files}"
fi
fi
sleep "${poll_seconds}"
elapsed=$((elapsed + poll_seconds))
done
local final_records
final_records="$(get_compose_service_records "${compose_files[@]}")"
local final_blocking=""
local final_pending=""
local record compose_file service name state health
while IFS= read -r record; do
[[ -z "${record}" ]] && continue
IFS='|' read -r compose_file service name state health <<< "${record}"
if [[ "${state}" != "running" ]]; then
final_blocking="${final_blocking}
${name} (state=${state})"
continue
fi
if [[ -n "${health}" && "${health}" != "healthy" ]]; then
if [[ "${health}" == "starting" ]]; then
final_pending="${final_pending}
${name} (health=starting)"
else
final_blocking="${final_blocking}
${name} (health=${health})"
fi
fi
done <<< "${final_records}"
if [[ -n "${final_blocking//$'\n'/}" ]]; then
warn "Timed out waiting for compose convergence after ${max_wait}s. Blocking services: $(printf '%s\n' "${final_blocking}" | awk 'NF { print }' | paste -sd ', ' -)"
elif [[ -n "${final_pending//$'\n'/}" ]]; then
warn "Timed out waiting for compose convergence after ${max_wait}s. Still starting: $(printf '%s\n' "${final_pending}" | awk 'NF { print }' | paste -sd ', ' -)"
else
warn "Timed out waiting for compose convergence after ${max_wait}s."
fi
return 1
}
# ─── 1. Check prerequisites ────────────────────────────────────────────────
check_prerequisites() {
step 'Checking prerequisites'
local all_good=true
# dotnet
if has_cmd dotnet; then
local v; v=$(dotnet --version 2>/dev/null)
if [[ "$v" =~ ^10\. ]]; then
ok "dotnet $v"
else
fail "dotnet $v found, but 10.x is required"
all_good=false
fi
else
fail 'dotnet SDK not found. Install .NET 10 SDK.'
all_good=false
fi
# node
if has_cmd node; then
local v; v=$(node --version 2>/dev/null | sed 's/^v//')
local major; major=$(echo "$v" | cut -d. -f1)
if (( major >= 20 )); then
ok "node $v"
else
fail "node $v found, but 20+ is required"
all_good=false
fi
else
fail 'node not found. Install Node.js 20+.'
all_good=false
fi
# npm
if has_cmd npm; then
local v; v=$(npm --version 2>/dev/null)
local major; major=$(echo "$v" | cut -d. -f1)
if (( major >= 10 )); then
ok "npm $v"
else
fail "npm $v found, but 10+ is required"
all_good=false
fi
else
fail 'npm not found.'
all_good=false
fi
# docker
if has_cmd docker; then
ok "docker: $(docker --version 2>/dev/null)"
else
fail 'docker not found. Install Docker.'
all_good=false
fi
# docker compose
if docker compose version &>/dev/null; then
ok 'docker compose available'
else
fail 'docker compose not available. Install Compose V2.'
all_good=false
fi
# git
if has_cmd git; then
ok "$(git --version 2>/dev/null)"
else
fail 'git not found.'
all_good=false
fi
if [[ "$all_good" != "true" ]]; then
echo 'ERROR: Prerequisites not met. Install missing tools and re-run.' >&2
exit 1
fi
}
# ─── 2. Check and install hosts file ─────────────────────────────────────
check_hosts() {
step 'Checking hosts file for stella-ops.local entries'
local hosts_source="${ROOT}/devops/compose/hosts.stellaops.local"
if grep -q 'stella-ops\.local' /etc/hosts 2>/dev/null; then
ok 'stella-ops.local entries found in /etc/hosts'
return
fi
warn 'stella-ops.local entries NOT found in /etc/hosts.'
if [[ ! -f "$hosts_source" ]]; then
warn "Hosts source file not found at $hosts_source"
echo ' Add the hosts block from docs/dev/DEV_ENVIRONMENT_SETUP.md section 2'
echo ' to /etc/hosts (use sudo).'
return
fi
echo ''
echo ' Stella Ops needs ~50 hosts file entries for local development.'
echo " Source: devops/compose/hosts.stellaops.local"
echo ''
printf ' Add entries to /etc/hosts now? (Y/n) '
read -r answer
if [[ -z "$answer" || "$answer" =~ ^[Yy] ]]; then
if [[ "$(id -u)" -eq 0 ]]; then
printf '\n' >> /etc/hosts
cat "$hosts_source" >> /etc/hosts
ok 'Hosts entries added successfully'
else
echo ''
echo ' Adding hosts entries requires sudo...'
if sudo sh -c "printf '\n' >> /etc/hosts && cat '$hosts_source' >> /etc/hosts"; then
ok 'Hosts entries added successfully'
else
warn 'Failed to add hosts entries. Add them manually:'
echo " sudo sh -c 'cat $hosts_source >> /etc/hosts'"
fi
fi
else
warn 'Skipped. Add them manually before accessing the platform:'
echo " sudo sh -c 'cat $hosts_source >> /etc/hosts'"
fi
}
# ─── 3. Ensure .env ────────────────────────────────────────────────────────
ensure_env() {
step 'Ensuring .env file exists'
local env_file="${COMPOSE_DIR}/.env"
local env_example="${COMPOSE_DIR}/env/stellaops.env.example"
if [[ -f "$env_file" ]]; then
ok ".env already exists at $env_file"
elif [[ -f "$env_example" ]]; then
cp "$env_example" "$env_file"
ok "Copied $env_example -> $env_file"
warn 'For production, change POSTGRES_PASSWORD in .env.'
else
fail "Neither .env nor env/stellaops.env.example found in $COMPOSE_DIR"
exit 1
fi
}
get_compose_env_value() {
local key="$1"
local env_file="${COMPOSE_DIR}/.env"
[[ -f "$env_file" ]] || return 1
awk -F= -v key="$key" '$1 == key { print substr($0, index($0, "=") + 1); exit }' "$env_file"
}
get_frontdoor_network_name() {
if [[ -n "${FRONTDOOR_NETWORK:-}" ]]; then
printf '%s\n' "$FRONTDOOR_NETWORK"
return
fi
local configured
configured="$(get_compose_env_value FRONTDOOR_NETWORK || true)"
if [[ -n "$configured" ]]; then
printf '%s\n' "$configured"
return
fi
printf '%s\n' 'stellaops_frontdoor'
}
ensure_frontdoor_network() {
local network_name
network_name="$(get_frontdoor_network_name)"
if docker network inspect "$network_name" >/dev/null 2>&1; then
ok "Frontdoor network available ($network_name)"
return
fi
warn "Frontdoor network missing ($network_name); creating it now."
docker network create "$network_name" >/dev/null
ok "Created frontdoor network ($network_name)"
}
# ─── 4. Start infrastructure ───────────────────────────────────────────────
start_infra() {
step 'Starting infrastructure containers (docker-compose.dev.yml)'
cd "$COMPOSE_DIR"
docker compose -f docker-compose.dev.yml up -d
echo ' Waiting for containers to become healthy...'
wait_for_compose_convergence 'All infrastructure containers healthy' false 120 45 docker-compose.dev.yml || true
cd "$ROOT"
}
# ─── 5. Build .NET solutions ───────────────────────────────────────────────
build_solutions() {
step 'Building all .NET solutions'
local script="${ROOT}/scripts/build-all-solutions.sh"
if [[ -x "$script" ]]; then
"$script" --stop-repo-host-processes
ok '.NET solutions built successfully'
elif [[ -f "$script" ]]; then
bash "$script" --stop-repo-host-processes
ok '.NET solutions built successfully'
else
warn "Build script not found at $script. Skipping .NET build."
fi
}
# ─── 6. Build Docker images ────────────────────────────────────────────────
build_images() {
local publish_no_restore="${1:-false}"
step 'Building Docker images'
local script="${ROOT}/devops/docker/build-all.sh"
if [[ -x "$script" ]]; then
PUBLISH_NO_RESTORE="$publish_no_restore" "$script"
ok 'Docker images built successfully'
elif [[ -f "$script" ]]; then
PUBLISH_NO_RESTORE="$publish_no_restore" bash "$script"
ok 'Docker images built successfully'
else
warn "Build script not found at $script. Skipping image build."
fi
}
# ─── 7. Start full platform ────────────────────────────────────────────────
start_platform() {
step 'Starting full Stella Ops platform'
ensure_frontdoor_network
cd "$COMPOSE_DIR"
docker compose -f docker-compose.stella-ops.yml up -d
ok 'Platform services started'
cd "$ROOT"
wait_for_compose_convergence 'Platform services converged from zero-state startup' true 180 45 docker-compose.stella-ops.yml || true
}
http_status() {
local url="$1"
local attempts="${2:-6}"
local delay_seconds="${3:-2}"
local status=""
for (( attempt=1; attempt<=attempts; attempt++ )); do
status=$(curl -sk -o /dev/null --connect-timeout 5 -w '%{http_code}' "$url" 2>/dev/null || true)
if [[ -n "$status" && "$status" != "000" ]]; then
printf '%s' "$status"
return 0
fi
if (( attempt < attempts )); then
sleep "$delay_seconds"
fi
done
return 0
}
frontdoor_bootstrap_ready() {
step 'Waiting for frontdoor bootstrap readiness'
local probes=(
"Frontdoor readiness|https://stella-ops.local/health/ready|200"
"Frontdoor welcome page|https://stella-ops.local/welcome|200"
"Frontdoor environment settings|https://stella-ops.local/envsettings.json|200"
"Authority discovery|https://stella-ops.local/.well-known/openid-configuration|200"
"Authority authorize bootstrap|https://stella-ops.local/connect/authorize?client_id=stella-ops-ui&redirect_uri=https%3A%2F%2Fstella-ops.local%2Fauth%2Fcallback&response_type=code&scope=openid%20profile%20email&state=setup-smoke&nonce=setup-smoke&code_challenge=AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&code_challenge_method=S256|200,302,303"
)
local entry name url allowed status matched
for entry in "${probes[@]}"; do
IFS='|' read -r name url allowed <<<"$entry"
status="$(http_status "$url" 24 5)"
matched=false
IFS=',' read -ra allowed_codes <<<"$allowed"
for code in "${allowed_codes[@]}"; do
if [[ "$status" == "$code" ]]; then
matched=true
break
fi
done
if [[ "$matched" == "true" ]]; then
ok "$name (HTTP $status)"
continue
fi
fail "$name did not reach an expected status ($allowed)"
return 1
done
ok 'Frontdoor bootstrap path is ready for first-user sign-in'
}
frontdoor_authenticated_ready() {
step 'Waiting for authenticated frontdoor route readiness'
if (cd "$ROOT" && node ./src/Web/StellaOps.Web/scripts/live-frontdoor-authenticated-readiness.mjs); then
ok 'Authenticated topology, notifications admin, and promotion flows are ready for first-user QA'
return 0
fi
fail 'Authenticated frontdoor route readiness did not converge'
return 1
}
# ─── 8. Smoke test ─────────────────────────────────────────────────────────
smoke_test() {
step 'Running smoke tests'
local has_blocking_failures=false
# Infrastructure checks
if docker exec stellaops-dev-postgres pg_isready -U stellaops &>/dev/null; then
ok 'PostgreSQL'
else
warn 'PostgreSQL not responding'
has_blocking_failures=true
fi
local pong; pong=$(docker exec stellaops-dev-valkey valkey-cli ping 2>/dev/null || true)
if [[ "$pong" == "PONG" ]]; then
ok 'Valkey'
else
warn 'Valkey not responding'
has_blocking_failures=true
fi
local rustfs_url rustfs_status
rustfs_url=$(service_http_probe_url rustfs 8333 / || true)
rustfs_status=$(http_status "$rustfs_url")
if [[ "$rustfs_status" == "200" || "$rustfs_status" == "403" ]]; then
ok "RustFS S3 endpoint (HTTP $rustfs_status)"
else
warn 'RustFS S3 endpoint did not respond with an expected status (wanted 200/403)'
has_blocking_failures=true
fi
local registry_url registry_status
registry_url=$(service_http_probe_url registry 5000 /v2/ || true)
registry_status=$(http_status "$registry_url")
if [[ "$registry_status" == "200" || "$registry_status" == "401" ]]; then
ok "Zot registry endpoint (HTTP $registry_status)"
else
warn 'Zot registry endpoint did not respond with an expected status (wanted 200/401)'
has_blocking_failures=true
fi
if [[ "$INFRA_ONLY" != "true" ]]; then
if ! frontdoor_bootstrap_ready; then
has_blocking_failures=true
fi
if ! frontdoor_authenticated_ready; then
has_blocking_failures=true
fi
fi
# Platform container health summary
step 'Container health summary'
cd "$COMPOSE_DIR"
local total=0
local healthy=0
local unhealthy_names=""
for cf in docker-compose.dev.yml docker-compose.stella-ops.yml; do
[[ ! -f "$cf" ]] && continue
while IFS= read -r line; do
[[ -z "$line" ]] && continue
local name; name=$(echo "$line" | python3 -c "import sys,json; print(json.load(sys.stdin).get('Name',''))" 2>/dev/null || true)
local h; h=$(echo "$line" | python3 -c "import sys,json; print(json.load(sys.stdin).get('Health',''))" 2>/dev/null || true)
total=$((total + 1))
if [[ -z "$h" || "$h" == "healthy" ]]; then
healthy=$((healthy + 1))
else
unhealthy_names="${unhealthy_names} Unhealthy: ${name}\n"
fi
done < <(docker compose -f "$cf" ps --format json 2>/dev/null)
done
if (( total > 0 )); then
if (( healthy == total )); then
ok "$healthy/$total containers healthy"
else
warn "$healthy/$total containers healthy"
[[ -n "$unhealthy_names" ]] && printf " \033[0;33m%b\033[0m" "$unhealthy_names"
fi
fi
# Platform endpoint check
if curl -sk --connect-timeout 5 -o /dev/null -w '' https://stella-ops.local 2>/dev/null; then
ok 'Platform accessible at https://stella-ops.local'
elif bash -c "echo >/dev/tcp/stella-ops.local/443" 2>/dev/null; then
ok 'Platform listening on https://stella-ops.local (TLS handshake pending)'
else
warn 'Platform not yet accessible at https://stella-ops.local (may still be starting)'
has_blocking_failures=true
fi
cd "$ROOT"
if [[ "$has_blocking_failures" == "true" ]]; then
return 1
fi
}
# ─── Main ───────────────────────────────────────────────────────────────────
echo '============================================='
echo ' Stella Ops Developer Environment Setup'
echo '============================================='
check_prerequisites
check_hosts
if [[ "$IMAGES_ONLY" == "true" ]]; then
build_images false
echo ''
echo 'Done (images only).'
exit 0
fi
ensure_env
start_infra
if [[ "$INFRA_ONLY" == "true" ]]; then
if ! smoke_test; then
fail 'Infrastructure setup did not pass blocking smoke tests. Review output and docker compose logs.'
exit 1
fi
echo ''
echo 'Done (infra only). Infrastructure is running.'
exit 0
fi
if [[ "$SKIP_BUILD" != "true" ]]; then
build_solutions
fi
if [[ "$SKIP_IMAGES" != "true" ]]; then
if [[ "$SKIP_BUILD" == "true" ]]; then
build_images false
else
build_images true
fi
fi
start_platform
if ! smoke_test; then
fail 'Setup did not pass blocking smoke tests. Review output and docker compose logs.'
exit 1
fi
echo ''
echo '============================================='
echo ' Setup complete!'
echo ' Platform: https://stella-ops.local'
echo ' Docs: docs/dev/DEV_ENVIRONMENT_SETUP.md'
echo '============================================='