Files
git.stella-ops.org/deploy/helm/stellaops/values-orchestrator.yaml
StellaOps Bot 7e7be4d2fd
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
devportal-offline / build-offline (push) Has been cancelled
Mirror Thin Bundle Sign & Verify / mirror-sign (push) Has been cancelled
up
2025-11-29 02:40:21 +02:00

210 lines
5.9 KiB
YAML

# Orchestrator Service Helm Values Overlay
# Enables job scheduling, DAG planning, and worker coordination.
#
# Usage:
# helm upgrade stellaops ./stellaops -f values.yaml -f values-orchestrator.yaml
global:
labels:
stellaops.io/component: orchestrator
# Orchestrator-specific ConfigMaps
configMaps:
orchestrator-config:
data:
orchestrator.yaml: |
Orchestrator:
# Telemetry configuration
telemetry:
minimumLogLevel: Information
enableRequestLogging: true
otelEndpoint: ""
# Authority integration (disable for standalone testing)
authority:
enabled: true
issuer: https://authority.svc.cluster.local/realms/stellaops
requireHttpsMetadata: true
audiences:
- stellaops-platform
readScope: orchestrator:read
writeScope: orchestrator:write
adminScope: orchestrator:admin
# Tenant resolution
tenantHeader: X-StellaOps-Tenant
# PostgreSQL connection
storage:
connectionString: "Host=orchestrator-postgres;Database=stellaops_orchestrator;Username=orchestrator;Password=${POSTGRES_PASSWORD}"
commandTimeoutSeconds: 60
enableSensitiveDataLogging: false
# Scheduler configuration
scheduler:
# Maximum concurrent jobs per tenant
defaultConcurrencyLimit: 100
# Default rate limit (requests per second)
defaultRateLimit: 50
# Job claim timeout before re-queue
claimTimeoutMinutes: 30
# Heartbeat interval for active jobs
heartbeatIntervalSeconds: 30
# Maximum heartbeat misses before job marked stale
maxHeartbeatMisses: 3
# Autoscaling configuration
autoscaling:
# Enable autoscaling metrics endpoint
enabled: true
# Queue depth threshold for scale-up signal
queueDepthThreshold: 10000
# Dispatch latency P95 threshold (ms)
latencyP95ThresholdMs: 150
# Scale-up cooldown period
scaleUpCooldownSeconds: 60
# Scale-down cooldown period
scaleDownCooldownSeconds: 300
# Load shedding configuration
loadShedding:
enabled: true
# Warning threshold (load factor)
warningThreshold: 0.8
# Critical threshold (load factor)
criticalThreshold: 1.0
# Emergency threshold (load factor)
emergencyThreshold: 1.5
# Recovery cooldown
recoveryCooldownSeconds: 30
# Dead letter configuration
deadLetter:
# Maximum replay attempts
maxReplayAttempts: 3
# Entry expiration (days)
expirationDays: 30
# Purge interval
purgeIntervalHours: 24
# Backfill configuration
backfill:
# Maximum concurrent backfill requests
maxConcurrentRequests: 5
# Default batch size
defaultBatchSize: 1000
# Maximum retention lookback (days)
maxRetentionDays: 90
# Service definitions
services:
orchestrator-web:
image: registry.stella-ops.org/stellaops/orchestrator-web:2025.10.0-edge
replicas: 2
service:
port: 8080
configMounts:
- name: orchestrator-config
configMap: orchestrator-config
mountPath: /app/etc/orchestrator.yaml
subPath: orchestrator.yaml
envFrom:
- secretRef:
name: orchestrator-secrets
env:
ASPNETCORE_ENVIRONMENT: Production
ORCHESTRATOR__CONFIG: /app/etc/orchestrator.yaml
ports:
- containerPort: 8080
resources:
requests:
memory: "256Mi"
cpu: "250m"
limits:
memory: "1Gi"
cpu: "1000m"
readinessProbe:
httpGet:
path: /readyz
port: 8080
initialDelaySeconds: 5
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
livenessProbe:
httpGet:
path: /livez
port: 8080
initialDelaySeconds: 10
periodSeconds: 20
timeoutSeconds: 5
failureThreshold: 3
startupProbe:
httpGet:
path: /startupz
port: 8080
initialDelaySeconds: 3
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 30
orchestrator-worker:
image: registry.stella-ops.org/stellaops/orchestrator-worker:2025.10.0-edge
replicas: 1
configMounts:
- name: orchestrator-config
configMap: orchestrator-config
mountPath: /app/etc/orchestrator.yaml
subPath: orchestrator.yaml
envFrom:
- secretRef:
name: orchestrator-secrets
env:
DOTNET_ENVIRONMENT: Production
ORCHESTRATOR__CONFIG: /app/etc/orchestrator.yaml
resources:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "512Mi"
cpu: "500m"
orchestrator-postgres:
class: infrastructure
image: docker.io/library/postgres:16-alpine
service:
port: 5432
envFrom:
- secretRef:
name: orchestrator-postgres-secrets
env:
POSTGRES_DB: stellaops_orchestrator
POSTGRES_USER: orchestrator
volumeMounts:
- name: postgres-data
mountPath: /var/lib/postgresql/data
volumeClaims:
- name: postgres-data
claimName: orchestrator-postgres-data
readinessProbe:
exec:
command:
- pg_isready
- -U
- orchestrator
- -d
- stellaops_orchestrator
initialDelaySeconds: 5
periodSeconds: 10
livenessProbe:
exec:
command:
- pg_isready
- -U
- orchestrator
- -d
- stellaops_orchestrator
initialDelaySeconds: 15
periodSeconds: 30