Refactor code structure for improved readability and maintainability
This commit is contained in:
@@ -13,7 +13,10 @@ These Compose bundles ship the minimum services required to exercise the scanner
|
||||
| `docker-compose.mirror.yaml` | Managed mirror topology for `*.stella-ops.org` distribution (Concelier + Excititor + CDN gateway). |
|
||||
| `docker-compose.telemetry.yaml` | Optional OpenTelemetry collector overlay (mutual TLS, OTLP ingest endpoints). |
|
||||
| `docker-compose.telemetry-storage.yaml` | Prometheus/Tempo/Loki storage overlay with multi-tenant defaults. |
|
||||
| `docker-compose.gpu.yaml` | Optional GPU overlay enabling NVIDIA devices for Advisory AI web/worker. Apply with `-f docker-compose.<env>.yaml -f docker-compose.gpu.yaml`. |
|
||||
| `env/*.env.example` | Seed `.env` files that document required secrets and ports per profile. |
|
||||
| `scripts/backup.sh` | Pauses workers and creates tar.gz of Mongo/MinIO/Redis volumes (deterministic snapshot). |
|
||||
| `scripts/reset.sh` | Stops the stack and removes Mongo/MinIO/Redis volumes after explicit confirmation. |
|
||||
|
||||
## Usage
|
||||
|
||||
@@ -101,4 +104,18 @@ The Helm chart mirrors these settings under `services.advisory-ai-web` / `adviso
|
||||
2. Update image digests in the relevant Compose file(s).
|
||||
3. Re-run `docker compose config` to confirm the bundle is deterministic.
|
||||
|
||||
Keep digests synchronized between Compose, Helm, and the release manifest to preserve reproducibility guarantees. `deploy/tools/validate-profiles.sh` performs a quick audit.
|
||||
Keep digests synchronized between Compose, Helm, and the release manifest to preserve reproducibility guarantees. `deploy/tools/validate-profiles.sh` performs a quick audit.
|
||||
|
||||
### GPU toggle for Advisory AI
|
||||
|
||||
GPU is disabled by default. To run inference on NVIDIA GPUs:
|
||||
|
||||
```bash
|
||||
docker compose \
|
||||
--env-file prod.env \
|
||||
-f docker-compose.prod.yaml \
|
||||
-f docker-compose.gpu.yaml \
|
||||
up -d
|
||||
```
|
||||
|
||||
The GPU overlay requests one GPU for `advisory-ai-worker` and `advisory-ai-web` and sets `ADVISORY_AI_INFERENCE_GPU=true`. Ensure the host has the NVIDIA container runtime and that the base compose file still sets the correct digests.
|
||||
|
||||
26
deploy/compose/docker-compose.gpu.yaml
Normal file
26
deploy/compose/docker-compose.gpu.yaml
Normal file
@@ -0,0 +1,26 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
advisory-ai-worker:
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- capabilities: [gpu]
|
||||
driver: nvidia
|
||||
count: 1
|
||||
environment:
|
||||
ADVISORY_AI_INFERENCE_GPU: "true"
|
||||
runtime: nvidia
|
||||
|
||||
advisory-ai-web:
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- capabilities: [gpu]
|
||||
driver: nvidia
|
||||
count: 1
|
||||
environment:
|
||||
ADVISORY_AI_INFERENCE_GPU: "true"
|
||||
runtime: nvidia
|
||||
28
deploy/compose/scripts/backup.sh
Normal file
28
deploy/compose/scripts/backup.sh
Normal file
@@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
echo "StellaOps Compose Backup"
|
||||
echo "This will create a tar.gz of Mongo, MinIO (object-store), and Redis data volumes."
|
||||
read -rp "Proceed? [y/N] " ans
|
||||
[[ ${ans:-N} =~ ^[Yy]$ ]] || { echo "Aborted."; exit 1; }
|
||||
|
||||
TS=$(date -u +%Y%m%dT%H%M%SZ)
|
||||
OUT_DIR=${BACKUP_DIR:-backups}
|
||||
mkdir -p "$OUT_DIR"
|
||||
|
||||
docker compose ps >/dev/null
|
||||
|
||||
echo "Pausing worker containers for consistency..."
|
||||
docker compose pause scanner-worker scheduler-worker taskrunner-worker || true
|
||||
|
||||
echo "Backing up volumes..."
|
||||
docker run --rm \
|
||||
-v stellaops-mongo:/data/db:ro \
|
||||
-v stellaops-minio:/data/minio:ro \
|
||||
-v stellaops-redis:/data/redis:ro \
|
||||
-v "$PWD/$OUT_DIR":/out \
|
||||
alpine sh -c "cd / && tar czf /out/stellaops-backup-$TS.tar.gz data"
|
||||
|
||||
docker compose unpause scanner-worker scheduler-worker taskrunner-worker || true
|
||||
|
||||
echo "Backup written to $OUT_DIR/stellaops-backup-$TS.tar.gz"
|
||||
15
deploy/compose/scripts/reset.sh
Normal file
15
deploy/compose/scripts/reset.sh
Normal file
@@ -0,0 +1,15 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
echo "WARNING: This will stop the stack and wipe Mongo, MinIO, and Redis volumes."
|
||||
read -rp "Type 'RESET' to continue: " ans
|
||||
[[ ${ans:-} == "RESET" ]] || { echo "Aborted."; exit 1; }
|
||||
|
||||
docker compose down
|
||||
|
||||
for vol in stellaops-mongo stellaops-minio stellaops-redis; do
|
||||
echo "Removing volume $vol"
|
||||
docker volume rm "$vol" || true
|
||||
done
|
||||
|
||||
echo "Reset complete. Re-run compose with your env file to recreate volumes."
|
||||
@@ -105,14 +105,23 @@ spec:
|
||||
securityContext:
|
||||
{{ toYaml $svc.securityContext | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- if $svc.livenessProbe }}
|
||||
livenessProbe:
|
||||
{{ toYaml $svc.livenessProbe | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- if $svc.readinessProbe }}
|
||||
readinessProbe:
|
||||
{{ toYaml $svc.readinessProbe | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- if $svc.livenessProbe }}
|
||||
livenessProbe:
|
||||
{{ toYaml $svc.livenessProbe | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- if $svc.readinessProbe }}
|
||||
readinessProbe:
|
||||
{{ toYaml $svc.readinessProbe | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- if $svc.prometheus }}
|
||||
{{- $pr := $svc.prometheus }}
|
||||
{{- if $pr.enabled }}
|
||||
{{- if not $svc.podAnnotations }}
|
||||
{{- $svc = merge $svc (dict "podAnnotations" (dict)) }}
|
||||
{{- end }}
|
||||
{{- $svc.podAnnotations = merge $svc.podAnnotations (dict "prometheus.io/scrape" "true" "prometheus.io/path" (default "/metrics" $pr.path) "prometheus.io/port" (toString (default 8080 $pr.port)) "prometheus.io/scheme" (default "http" $pr.scheme))) }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if or $svc.volumeMounts $configMounts }}
|
||||
volumeMounts:
|
||||
{{- if $svc.volumeMounts }}
|
||||
|
||||
39
deploy/helm/stellaops/templates/hpa.yaml
Normal file
39
deploy/helm/stellaops/templates/hpa.yaml
Normal file
@@ -0,0 +1,39 @@
|
||||
{{- if and .Values.hpa.enabled .Values.services }}
|
||||
{{- range $name, $svc := .Values.services }}
|
||||
{{- if and $svc.hpa $svc.hpa.enabled }}
|
||||
apiVersion: autoscaling/v2
|
||||
kind: HorizontalPodAutoscaler
|
||||
metadata:
|
||||
name: {{ include "stellaops.fullname" (dict "root" $ "name" $name) }}
|
||||
labels:
|
||||
{{- include "stellaops.labels" (dict "root" $ "name" $name "svc" $svc) | nindent 4 }}
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: {{ include "stellaops.fullname" (dict "root" $ "name" $name) }}
|
||||
minReplicas: {{ default $.Values.hpa.minReplicas $svc.hpa.minReplicas }}
|
||||
maxReplicas: {{ default $.Values.hpa.maxReplicas $svc.hpa.maxReplicas }}
|
||||
metrics:
|
||||
{{- $cpu := coalesce $svc.hpa.cpu.targetPercentage $.Values.hpa.cpu.targetPercentage -}}
|
||||
{{- if $cpu }}
|
||||
- type: Resource
|
||||
resource:
|
||||
name: cpu
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: {{ $cpu }}
|
||||
{{- end }}
|
||||
{{- $mem := coalesce $svc.hpa.memory.targetPercentage $.Values.hpa.memory.targetPercentage -}}
|
||||
{{- if $mem }}
|
||||
- type: Resource
|
||||
resource:
|
||||
name: memory
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: {{ $mem }}
|
||||
{{- end }}
|
||||
---
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -33,6 +33,21 @@ externalSecrets:
|
||||
enabled: false
|
||||
secrets: []
|
||||
|
||||
prometheus:
|
||||
enabled: true
|
||||
path: /metrics
|
||||
port: 8080
|
||||
scheme: http
|
||||
|
||||
hpa:
|
||||
enabled: false
|
||||
minReplicas: 1
|
||||
maxReplicas: 3
|
||||
cpu:
|
||||
targetPercentage: 70
|
||||
memory:
|
||||
targetPercentage: 80
|
||||
|
||||
configMaps:
|
||||
notify-config:
|
||||
data:
|
||||
|
||||
@@ -55,6 +55,21 @@ externalSecrets:
|
||||
- key: STELLAOPS_SECRETS_ENCRYPTION_KEY
|
||||
remoteKey: prod/core/secrets-encryption-key
|
||||
|
||||
prometheus:
|
||||
enabled: true
|
||||
path: /metrics
|
||||
port: 8080
|
||||
scheme: http
|
||||
|
||||
hpa:
|
||||
enabled: true
|
||||
minReplicas: 2
|
||||
maxReplicas: 6
|
||||
cpu:
|
||||
targetPercentage: 70
|
||||
memory:
|
||||
targetPercentage: 75
|
||||
|
||||
configMaps:
|
||||
notify-config:
|
||||
data:
|
||||
|
||||
@@ -32,6 +32,21 @@ externalSecrets:
|
||||
enabled: false
|
||||
secrets: []
|
||||
|
||||
prometheus:
|
||||
enabled: false
|
||||
path: /metrics
|
||||
port: 8080
|
||||
scheme: http
|
||||
|
||||
hpa:
|
||||
enabled: false
|
||||
minReplicas: 1
|
||||
maxReplicas: 3
|
||||
cpu:
|
||||
targetPercentage: 75
|
||||
memory:
|
||||
targetPercentage: null
|
||||
|
||||
# Surface.Env configuration for Scanner/Zastava components
|
||||
# See docs/modules/scanner/design/surface-env.md for details
|
||||
surface:
|
||||
|
||||
Reference in New Issue
Block a user