Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
devportal-offline / build-offline (push) Has been cancelled
Mirror Thin Bundle Sign & Verify / mirror-sign (push) Has been cancelled
210 lines
5.9 KiB
YAML
210 lines
5.9 KiB
YAML
# Orchestrator Service Helm Values Overlay
|
|
# Enables job scheduling, DAG planning, and worker coordination.
|
|
#
|
|
# Usage:
|
|
# helm upgrade stellaops ./stellaops -f values.yaml -f values-orchestrator.yaml
|
|
|
|
global:
|
|
labels:
|
|
stellaops.io/component: orchestrator
|
|
|
|
# Orchestrator-specific ConfigMaps
|
|
configMaps:
|
|
orchestrator-config:
|
|
data:
|
|
orchestrator.yaml: |
|
|
Orchestrator:
|
|
# Telemetry configuration
|
|
telemetry:
|
|
minimumLogLevel: Information
|
|
enableRequestLogging: true
|
|
otelEndpoint: ""
|
|
|
|
# Authority integration (disable for standalone testing)
|
|
authority:
|
|
enabled: true
|
|
issuer: https://authority.svc.cluster.local/realms/stellaops
|
|
requireHttpsMetadata: true
|
|
audiences:
|
|
- stellaops-platform
|
|
readScope: orchestrator:read
|
|
writeScope: orchestrator:write
|
|
adminScope: orchestrator:admin
|
|
|
|
# Tenant resolution
|
|
tenantHeader: X-StellaOps-Tenant
|
|
|
|
# PostgreSQL connection
|
|
storage:
|
|
connectionString: "Host=orchestrator-postgres;Database=stellaops_orchestrator;Username=orchestrator;Password=${POSTGRES_PASSWORD}"
|
|
commandTimeoutSeconds: 60
|
|
enableSensitiveDataLogging: false
|
|
|
|
# Scheduler configuration
|
|
scheduler:
|
|
# Maximum concurrent jobs per tenant
|
|
defaultConcurrencyLimit: 100
|
|
# Default rate limit (requests per second)
|
|
defaultRateLimit: 50
|
|
# Job claim timeout before re-queue
|
|
claimTimeoutMinutes: 30
|
|
# Heartbeat interval for active jobs
|
|
heartbeatIntervalSeconds: 30
|
|
# Maximum heartbeat misses before job marked stale
|
|
maxHeartbeatMisses: 3
|
|
|
|
# Autoscaling configuration
|
|
autoscaling:
|
|
# Enable autoscaling metrics endpoint
|
|
enabled: true
|
|
# Queue depth threshold for scale-up signal
|
|
queueDepthThreshold: 10000
|
|
# Dispatch latency P95 threshold (ms)
|
|
latencyP95ThresholdMs: 150
|
|
# Scale-up cooldown period
|
|
scaleUpCooldownSeconds: 60
|
|
# Scale-down cooldown period
|
|
scaleDownCooldownSeconds: 300
|
|
|
|
# Load shedding configuration
|
|
loadShedding:
|
|
enabled: true
|
|
# Warning threshold (load factor)
|
|
warningThreshold: 0.8
|
|
# Critical threshold (load factor)
|
|
criticalThreshold: 1.0
|
|
# Emergency threshold (load factor)
|
|
emergencyThreshold: 1.5
|
|
# Recovery cooldown
|
|
recoveryCooldownSeconds: 30
|
|
|
|
# Dead letter configuration
|
|
deadLetter:
|
|
# Maximum replay attempts
|
|
maxReplayAttempts: 3
|
|
# Entry expiration (days)
|
|
expirationDays: 30
|
|
# Purge interval
|
|
purgeIntervalHours: 24
|
|
|
|
# Backfill configuration
|
|
backfill:
|
|
# Maximum concurrent backfill requests
|
|
maxConcurrentRequests: 5
|
|
# Default batch size
|
|
defaultBatchSize: 1000
|
|
# Maximum retention lookback (days)
|
|
maxRetentionDays: 90
|
|
|
|
# Service definitions
|
|
services:
|
|
orchestrator-web:
|
|
image: registry.stella-ops.org/stellaops/orchestrator-web:2025.10.0-edge
|
|
replicas: 2
|
|
service:
|
|
port: 8080
|
|
configMounts:
|
|
- name: orchestrator-config
|
|
configMap: orchestrator-config
|
|
mountPath: /app/etc/orchestrator.yaml
|
|
subPath: orchestrator.yaml
|
|
envFrom:
|
|
- secretRef:
|
|
name: orchestrator-secrets
|
|
env:
|
|
ASPNETCORE_ENVIRONMENT: Production
|
|
ORCHESTRATOR__CONFIG: /app/etc/orchestrator.yaml
|
|
ports:
|
|
- containerPort: 8080
|
|
resources:
|
|
requests:
|
|
memory: "256Mi"
|
|
cpu: "250m"
|
|
limits:
|
|
memory: "1Gi"
|
|
cpu: "1000m"
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /readyz
|
|
port: 8080
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
failureThreshold: 3
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /livez
|
|
port: 8080
|
|
initialDelaySeconds: 10
|
|
periodSeconds: 20
|
|
timeoutSeconds: 5
|
|
failureThreshold: 3
|
|
startupProbe:
|
|
httpGet:
|
|
path: /startupz
|
|
port: 8080
|
|
initialDelaySeconds: 3
|
|
periodSeconds: 5
|
|
timeoutSeconds: 3
|
|
failureThreshold: 30
|
|
|
|
orchestrator-worker:
|
|
image: registry.stella-ops.org/stellaops/orchestrator-worker:2025.10.0-edge
|
|
replicas: 1
|
|
configMounts:
|
|
- name: orchestrator-config
|
|
configMap: orchestrator-config
|
|
mountPath: /app/etc/orchestrator.yaml
|
|
subPath: orchestrator.yaml
|
|
envFrom:
|
|
- secretRef:
|
|
name: orchestrator-secrets
|
|
env:
|
|
DOTNET_ENVIRONMENT: Production
|
|
ORCHESTRATOR__CONFIG: /app/etc/orchestrator.yaml
|
|
resources:
|
|
requests:
|
|
memory: "128Mi"
|
|
cpu: "100m"
|
|
limits:
|
|
memory: "512Mi"
|
|
cpu: "500m"
|
|
|
|
orchestrator-postgres:
|
|
class: infrastructure
|
|
image: docker.io/library/postgres:16-alpine
|
|
service:
|
|
port: 5432
|
|
envFrom:
|
|
- secretRef:
|
|
name: orchestrator-postgres-secrets
|
|
env:
|
|
POSTGRES_DB: stellaops_orchestrator
|
|
POSTGRES_USER: orchestrator
|
|
volumeMounts:
|
|
- name: postgres-data
|
|
mountPath: /var/lib/postgresql/data
|
|
volumeClaims:
|
|
- name: postgres-data
|
|
claimName: orchestrator-postgres-data
|
|
readinessProbe:
|
|
exec:
|
|
command:
|
|
- pg_isready
|
|
- -U
|
|
- orchestrator
|
|
- -d
|
|
- stellaops_orchestrator
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 10
|
|
livenessProbe:
|
|
exec:
|
|
command:
|
|
- pg_isready
|
|
- -U
|
|
- orchestrator
|
|
- -d
|
|
- stellaops_orchestrator
|
|
initialDelaySeconds: 15
|
|
periodSeconds: 30
|