# Orchestrator Service Helm Values Overlay # Enables job scheduling, DAG planning, and worker coordination. # # Usage: # helm upgrade stellaops ./stellaops -f values.yaml -f values-orchestrator.yaml global: labels: stellaops.io/component: orchestrator # Orchestrator-specific ConfigMaps configMaps: orchestrator-config: data: orchestrator.yaml: | Orchestrator: # Telemetry configuration telemetry: minimumLogLevel: Information enableRequestLogging: true otelEndpoint: "" # Authority integration (disable for standalone testing) authority: enabled: true issuer: https://authority.svc.cluster.local/realms/stellaops requireHttpsMetadata: true audiences: - stellaops-platform readScope: orchestrator:read writeScope: orchestrator:write adminScope: orchestrator:admin # Tenant resolution tenantHeader: X-StellaOps-Tenant # PostgreSQL connection storage: connectionString: "Host=orchestrator-postgres;Database=stellaops_orchestrator;Username=orchestrator;Password=${POSTGRES_PASSWORD}" commandTimeoutSeconds: 60 enableSensitiveDataLogging: false # Scheduler configuration scheduler: # Maximum concurrent jobs per tenant defaultConcurrencyLimit: 100 # Default rate limit (requests per second) defaultRateLimit: 50 # Job claim timeout before re-queue claimTimeoutMinutes: 30 # Heartbeat interval for active jobs heartbeatIntervalSeconds: 30 # Maximum heartbeat misses before job marked stale maxHeartbeatMisses: 3 # Autoscaling configuration autoscaling: # Enable autoscaling metrics endpoint enabled: true # Queue depth threshold for scale-up signal queueDepthThreshold: 10000 # Dispatch latency P95 threshold (ms) latencyP95ThresholdMs: 150 # Scale-up cooldown period scaleUpCooldownSeconds: 60 # Scale-down cooldown period scaleDownCooldownSeconds: 300 # Load shedding configuration loadShedding: enabled: true # Warning threshold (load factor) warningThreshold: 0.8 # Critical threshold (load factor) criticalThreshold: 1.0 # Emergency threshold (load factor) emergencyThreshold: 1.5 # Recovery cooldown recoveryCooldownSeconds: 30 # Dead letter configuration deadLetter: # Maximum replay attempts maxReplayAttempts: 3 # Entry expiration (days) expirationDays: 30 # Purge interval purgeIntervalHours: 24 # Backfill configuration backfill: # Maximum concurrent backfill requests maxConcurrentRequests: 5 # Default batch size defaultBatchSize: 1000 # Maximum retention lookback (days) maxRetentionDays: 90 # Service definitions services: orchestrator-web: image: registry.stella-ops.org/stellaops/orchestrator-web:2025.10.0-edge replicas: 2 service: port: 8080 configMounts: - name: orchestrator-config configMap: orchestrator-config mountPath: /app/etc/orchestrator.yaml subPath: orchestrator.yaml envFrom: - secretRef: name: orchestrator-secrets env: ASPNETCORE_ENVIRONMENT: Production ORCHESTRATOR__CONFIG: /app/etc/orchestrator.yaml ports: - containerPort: 8080 resources: requests: memory: "256Mi" cpu: "250m" limits: memory: "1Gi" cpu: "1000m" readinessProbe: httpGet: path: /readyz port: 8080 initialDelaySeconds: 5 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 livenessProbe: httpGet: path: /livez port: 8080 initialDelaySeconds: 10 periodSeconds: 20 timeoutSeconds: 5 failureThreshold: 3 startupProbe: httpGet: path: /startupz port: 8080 initialDelaySeconds: 3 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 30 orchestrator-worker: image: registry.stella-ops.org/stellaops/orchestrator-worker:2025.10.0-edge replicas: 1 configMounts: - name: orchestrator-config configMap: orchestrator-config mountPath: /app/etc/orchestrator.yaml subPath: orchestrator.yaml envFrom: - secretRef: name: orchestrator-secrets env: DOTNET_ENVIRONMENT: Production ORCHESTRATOR__CONFIG: /app/etc/orchestrator.yaml resources: requests: memory: "128Mi" cpu: "100m" limits: memory: "512Mi" cpu: "500m" orchestrator-postgres: class: infrastructure image: docker.io/library/postgres:16-alpine service: port: 5432 envFrom: - secretRef: name: orchestrator-postgres-secrets env: POSTGRES_DB: stellaops_orchestrator POSTGRES_USER: orchestrator volumeMounts: - name: postgres-data mountPath: /var/lib/postgresql/data volumeClaims: - name: postgres-data claimName: orchestrator-postgres-data readinessProbe: exec: command: - pg_isready - -U - orchestrator - -d - stellaops_orchestrator initialDelaySeconds: 5 periodSeconds: 10 livenessProbe: exec: command: - pg_isready - -U - orchestrator - -d - stellaops_orchestrator initialDelaySeconds: 15 periodSeconds: 30