Reduce idle CPU across 62 containers (phase 1)
- Add resource limits (heavy/medium/light tiers) to all 59 .NET services - Add .NET GC tuning (server/workstation GC, DATAS, conserve memory) - Convert FirstSignalSnapshotWriter from 10s polling to Valkey pub/sub - Convert EnvironmentSettingsRefreshService from 60s polling to Valkey pub/sub - Consolidate GraphAnalytics dual timers to single timer with idle-skip - Increase healthcheck interval from 30s to 60s (configurable) - Reduce debug logging to Information on 4 high-traffic services Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -74,18 +74,60 @@ x-depends-infra: &depends-infra
|
||||
condition: service_healthy
|
||||
|
||||
x-healthcheck-tcp: &healthcheck-tcp
|
||||
interval: 30s
|
||||
interval: ${HEALTHCHECK_INTERVAL:-60s}
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 15s
|
||||
|
||||
x-healthcheck-worker: &healthcheck-worker
|
||||
test: ["CMD", "/usr/local/bin/healthcheck.sh"]
|
||||
interval: 30s
|
||||
interval: ${HEALTHCHECK_INTERVAL:-60s}
|
||||
timeout: 5s
|
||||
start_period: 30s
|
||||
retries: 3
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Resource limit tiers (Workstream 1: CPU optimization)
|
||||
# ---------------------------------------------------------------------------
|
||||
x-resources-heavy: &resources-heavy
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: "1.0"
|
||||
memory: 2G
|
||||
|
||||
x-resources-medium: &resources-medium
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: "0.50"
|
||||
memory: 1G
|
||||
|
||||
x-resources-light: &resources-light
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: "0.25"
|
||||
memory: 512M
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# .NET GC tuning tiers (Workstream 6: GC configuration)
|
||||
# ---------------------------------------------------------------------------
|
||||
x-gc-heavy: &gc-heavy
|
||||
DOTNET_gcServer: "1"
|
||||
DOTNET_GCConserveMemory: "5"
|
||||
DOTNET_GCDynamicAdaptationMode: "1"
|
||||
|
||||
x-gc-medium: &gc-medium
|
||||
DOTNET_gcServer: "1"
|
||||
DOTNET_GCConserveMemory: "7"
|
||||
DOTNET_GCDynamicAdaptationMode: "1"
|
||||
|
||||
x-gc-light: &gc-light
|
||||
DOTNET_gcServer: "0"
|
||||
DOTNET_GCConserveMemory: "9"
|
||||
DOTNET_GCDynamicAdaptationMode: "1"
|
||||
|
||||
networks:
|
||||
stellaops:
|
||||
driver: bridge
|
||||
@@ -273,6 +315,7 @@ services:
|
||||
|
||||
# --- Slot 0: Router Gateway (Front Door) -----------------------------------
|
||||
router-gateway:
|
||||
<<: *resources-heavy
|
||||
image: stellaops/router-gateway:dev
|
||||
container_name: stellaops-router-gateway
|
||||
restart: unless-stopped
|
||||
@@ -282,7 +325,7 @@ services:
|
||||
condition: service_completed_successfully
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://0.0.0.0:8080"
|
||||
<<: *kestrel-cert
|
||||
<<: [*kestrel-cert, *gc-heavy]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Gateway__Auth__DpopEnabled: "false"
|
||||
@@ -307,9 +350,12 @@ services:
|
||||
Gateway__Auth__IdentityEnvelopeSigningKey: "${STELLAOPS_IDENTITY_ENVELOPE_SIGNING_KEY}"
|
||||
# Audience validation disabled until authority includes aud in access tokens
|
||||
# Gateway__Auth__Authority__Audiences__0: "stella-ops-api"
|
||||
Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Debug"
|
||||
Logging__LogLevel__Microsoft.IdentityModel: "Debug"
|
||||
Logging__LogLevel__StellaOps: "Debug"
|
||||
# Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Debug"
|
||||
Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Information"
|
||||
# Logging__LogLevel__Microsoft.IdentityModel: "Debug"
|
||||
Logging__LogLevel__Microsoft.IdentityModel: "Information"
|
||||
# Logging__LogLevel__StellaOps: "Debug"
|
||||
Logging__LogLevel__StellaOps: "Information"
|
||||
volumes:
|
||||
- *cert-volume
|
||||
- console-dist:/app/wwwroot:ro
|
||||
@@ -331,13 +377,14 @@ services:
|
||||
|
||||
# --- Slot 1: Platform ------------------------------------------------------
|
||||
platform:
|
||||
<<: *resources-heavy
|
||||
image: stellaops/platform:dev
|
||||
container_name: stellaops-platform
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-heavy]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Platform__Authority__Issuer: "https://authority.stella-ops.local/"
|
||||
@@ -345,9 +392,12 @@ services:
|
||||
Platform__Authority__BypassNetworks__0: "172.0.0.0/8"
|
||||
Platform__Authority__BypassNetworks__1: "127.0.0.0/8"
|
||||
Platform__Authority__BypassNetworks__2: "::1/128"
|
||||
Logging__LogLevel__StellaOps.Auth: "Debug"
|
||||
Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Debug"
|
||||
Logging__LogLevel__Microsoft.AspNetCore.Authorization: "Debug"
|
||||
# Logging__LogLevel__StellaOps.Auth: "Debug"
|
||||
Logging__LogLevel__StellaOps.Auth: "Information"
|
||||
# Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Debug"
|
||||
Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Information"
|
||||
# Logging__LogLevel__Microsoft.AspNetCore.Authorization: "Debug"
|
||||
Logging__LogLevel__Microsoft.AspNetCore.Authorization: "Information"
|
||||
Platform__Storage__Driver: "postgres"
|
||||
Platform__Storage__PostgresConnectionString: *postgres-connection
|
||||
Platform__EnvironmentSettings__AuthorizeEndpoint: "https://stella-ops.local/connect/authorize"
|
||||
@@ -418,6 +468,7 @@ services:
|
||||
|
||||
# --- Slot 2: Authority -----------------------------------------------------
|
||||
authority:
|
||||
<<: *resources-heavy
|
||||
image: stellaops/authority:dev
|
||||
container_name: stellaops-authority
|
||||
restart: unless-stopped
|
||||
@@ -464,7 +515,7 @@ services:
|
||||
STELLAOPS_AUTHORITY_AUTHORITY__TENANTS__0__ID: "demo-prod"
|
||||
STELLAOPS_AUTHORITY_AUTHORITY__TENANTS__0__DISPLAYNAME: "Demo Production"
|
||||
STELLAOPS_AUTHORITY_AUTHORITY__TENANTS__0__STATUS: "active"
|
||||
<<: *router-microservice-defaults
|
||||
<<: [*router-microservice-defaults, *gc-heavy]
|
||||
Router__Enabled: "${AUTHORITY_ROUTER_ENABLED:-true}"
|
||||
Router__Messaging__ConsumerGroup: "authority"
|
||||
volumes:
|
||||
@@ -484,13 +535,14 @@ services:
|
||||
|
||||
# --- Slot 3: Gateway -------------------------------------------------------
|
||||
gateway:
|
||||
<<: *resources-light
|
||||
image: stellaops/gateway:dev
|
||||
container_name: stellaops-gateway
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:80;http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Gateway__Auth__DpopEnabled: "false"
|
||||
@@ -516,6 +568,7 @@ services:
|
||||
|
||||
# --- Slot 4: Attestor ------------------------------------------------------
|
||||
attestor:
|
||||
<<: *resources-light
|
||||
image: stellaops/attestor:dev
|
||||
container_name: stellaops-attestor
|
||||
restart: unless-stopped
|
||||
@@ -523,7 +576,7 @@ services:
|
||||
- signer
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8442"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ATTESTOR_ATTESTOR__SIGNER__BASEURL: "http://signer.stella-ops.local"
|
||||
ATTESTOR_ATTESTOR__POSTGRES__CONNECTIONSTRING: *postgres-connection
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
@@ -546,6 +599,7 @@ services:
|
||||
|
||||
# --- Slot 5: Attestor TileProxy --------------------------------------------
|
||||
attestor-tileproxy:
|
||||
<<: *resources-light
|
||||
image: stellaops/attestor-tileproxy:dev
|
||||
container_name: stellaops-attestor-tileproxy
|
||||
restart: unless-stopped
|
||||
@@ -553,7 +607,7 @@ services:
|
||||
- attestor
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: *kestrel-cert
|
||||
<<: [*kestrel-cert, *gc-light]
|
||||
TILE_PROXY__tile_proxy__UpstreamUrl: "https://rekor.sigstore.dev"
|
||||
TILE_PROXY__tile_proxy__Origin: "stellaops-tileproxy"
|
||||
TILE_PROXY__tile_proxy__Cache__BasePath: "/var/cache/stellaops/tiles"
|
||||
@@ -573,13 +627,14 @@ services:
|
||||
|
||||
# --- Slot 6: Evidence Locker ------------------------------------------------
|
||||
evidence-locker-web:
|
||||
<<: *resources-light
|
||||
image: stellaops/evidence-locker-web:dev
|
||||
container_name: stellaops-evidence-locker-web
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
EvidenceLocker__Database__ConnectionString: *postgres-connection
|
||||
EvidenceLocker__Database__ApplyMigrationsAtStartup: "true"
|
||||
EvidenceLocker__ObjectStore__Kind: "FileSystem"
|
||||
@@ -619,12 +674,13 @@ services:
|
||||
labels: *release-labels
|
||||
|
||||
evidence-locker-worker:
|
||||
<<: *resources-light
|
||||
image: stellaops/evidence-locker-worker:dev
|
||||
container_name: stellaops-evidence-locker-worker
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
<<: *kestrel-cert
|
||||
<<: [*kestrel-cert, *gc-light]
|
||||
EvidenceLocker__Database__ConnectionString: *postgres-connection
|
||||
EvidenceLocker__Database__ApplyMigrationsAtStartup: "true"
|
||||
EvidenceLocker__ObjectStore__Kind: "FileSystem"
|
||||
@@ -666,6 +722,7 @@ services:
|
||||
labels: *release-labels
|
||||
|
||||
scanner-web:
|
||||
<<: *resources-heavy
|
||||
image: stellaops/scanner-web:dev
|
||||
container_name: stellaops-scanner-web
|
||||
restart: unless-stopped
|
||||
@@ -680,7 +737,7 @@ services:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8444"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-heavy]
|
||||
SCANNER_SCANNER__PLUGINS__BASEDIRECTORY: "/tmp/stellaops"
|
||||
SCANNER_SCANNER__STORAGE__DRIVER: "postgres"
|
||||
SCANNER_SCANNER__STORAGE__DSN: *postgres-connection
|
||||
@@ -737,6 +794,7 @@ services:
|
||||
labels: *release-labels
|
||||
|
||||
scanner-worker:
|
||||
<<: *resources-heavy
|
||||
image: stellaops/scanner-worker:dev
|
||||
container_name: stellaops-scanner-worker
|
||||
restart: unless-stopped
|
||||
@@ -750,7 +808,7 @@ services:
|
||||
rustfs:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
<<: *kestrel-cert
|
||||
<<: [*kestrel-cert, *gc-heavy]
|
||||
# Scanner worker options
|
||||
Scanner__Worker__Authority__Enabled: "false"
|
||||
BinaryIndex__Enabled: "false"
|
||||
@@ -786,6 +844,7 @@ services:
|
||||
|
||||
# --- Slot 9: Concelier -----------------------------------------------------
|
||||
concelier:
|
||||
<<: *resources-medium
|
||||
image: stellaops/concelier:dev
|
||||
container_name: stellaops-concelier
|
||||
restart: unless-stopped
|
||||
@@ -798,7 +857,7 @@ services:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-medium]
|
||||
CONCELIER_PLUGINS__BASEDIRECTORY: "/tmp/stellaops"
|
||||
CONCELIER_POSTGRESSTORAGE__CONNECTIONSTRING: *postgres-connection
|
||||
CONCELIER_POSTGRESSTORAGE__ENABLED: "true"
|
||||
@@ -834,13 +893,14 @@ services:
|
||||
|
||||
# --- Slot 10: Excititor ----------------------------------------------------
|
||||
excititor:
|
||||
<<: *resources-medium
|
||||
image: stellaops/excititor:dev
|
||||
container_name: stellaops-excititor
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-medium]
|
||||
# Postgres options (section: Postgres:Excititor)
|
||||
Postgres__Excititor__ConnectionString: *postgres-connection
|
||||
Postgres__Excititor__SchemaName: "vex"
|
||||
@@ -869,6 +929,7 @@ services:
|
||||
labels: *release-labels
|
||||
|
||||
excititor-worker:
|
||||
<<: *resources-medium
|
||||
image: stellaops/excititor-worker:dev
|
||||
container_name: stellaops-excititor-worker
|
||||
restart: unless-stopped
|
||||
@@ -878,7 +939,7 @@ services:
|
||||
valkey:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
<<: *kestrel-cert
|
||||
<<: [*kestrel-cert, *gc-medium]
|
||||
# Postgres options (section: Postgres:Excititor)
|
||||
Postgres__Excititor__ConnectionString: *postgres-connection
|
||||
Postgres__Excititor__SchemaName: "vex"
|
||||
@@ -903,13 +964,14 @@ services:
|
||||
|
||||
# --- Slot 11: VexHub -------------------------------------------------------
|
||||
vexhub-web:
|
||||
<<: *resources-light
|
||||
image: stellaops/vexhub-web:dev
|
||||
container_name: stellaops-vexhub-web
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Postgres__ConnectionString: *postgres-connection
|
||||
@@ -932,13 +994,14 @@ services:
|
||||
|
||||
# --- Slot 12: VexLens ------------------------------------------------------
|
||||
vexlens-web:
|
||||
<<: *resources-light
|
||||
image: stellaops/vexlens-web:dev
|
||||
container_name: stellaops-vexlens-web
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Router__Enabled: "${VEXLENS_ROUTER_ENABLED:-true}"
|
||||
@@ -959,13 +1022,14 @@ services:
|
||||
|
||||
# --- Slot 13: VulnExplorer (api) [src/Findings/StellaOps.VulnExplorer.Api] ---
|
||||
api:
|
||||
<<: *resources-light
|
||||
image: stellaops/api:dev
|
||||
container_name: stellaops-api
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Router__Enabled: "${VULNEXPLORER_ROUTER_ENABLED:-true}"
|
||||
@@ -986,13 +1050,14 @@ services:
|
||||
|
||||
# --- Slot 14: Policy Engine ------------------------------------------------
|
||||
policy-engine:
|
||||
<<: *resources-medium
|
||||
image: stellaops/policy-engine:dev
|
||||
container_name: stellaops-policy-engine
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-medium]
|
||||
STELLAOPS_POLICY_ENGINE_Postgres__Policy__ConnectionString: *postgres-connection
|
||||
STELLAOPS_POLICY_ENGINE_ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
STELLAOPS_POLICY_ENGINE_PolicyEngine__ResourceServer__Authority: "https://authority.stella-ops.local/"
|
||||
@@ -1012,8 +1077,10 @@ services:
|
||||
PolicyEngine__ResourceServer__BypassNetworks__0: "172.19.0.0/16"
|
||||
PolicyEngine__ResourceServer__BypassNetworks__1: "127.0.0.1/32"
|
||||
PolicyEngine__ResourceServer__BypassNetworks__2: "::1/128"
|
||||
Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Debug"
|
||||
Logging__LogLevel__Microsoft.IdentityModel: "Debug"
|
||||
# Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Debug"
|
||||
Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Information"
|
||||
# Logging__LogLevel__Microsoft.IdentityModel: "Debug"
|
||||
Logging__LogLevel__Microsoft.IdentityModel: "Information"
|
||||
Router__Enabled: "${POLICY_ENGINE_ROUTER_ENABLED:-true}"
|
||||
Router__Messaging__ConsumerGroup: "policy-engine"
|
||||
volumes:
|
||||
@@ -1033,13 +1100,14 @@ services:
|
||||
|
||||
# --- Slot 15: Policy Gateway -----------------------------------------------
|
||||
policy:
|
||||
<<: *resources-medium
|
||||
image: stellaops/policy:dev
|
||||
container_name: stellaops-policy
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8084"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-medium]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Postgres__Policy__ConnectionString: *postgres-connection
|
||||
@@ -1077,13 +1145,14 @@ services:
|
||||
|
||||
# --- Slot 16: RiskEngine [src/Findings/StellaOps.RiskEngine.*] ---------------
|
||||
riskengine-web:
|
||||
<<: *resources-medium
|
||||
image: stellaops/riskengine-web:dev
|
||||
container_name: stellaops-riskengine-web
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-medium]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
RISKENGINE__STORAGE__DRIVER: "postgres"
|
||||
@@ -1105,12 +1174,13 @@ services:
|
||||
labels: *release-labels
|
||||
|
||||
riskengine-worker:
|
||||
<<: *resources-medium
|
||||
image: stellaops/riskengine-worker:dev
|
||||
container_name: stellaops-riskengine-worker
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
<<: *kestrel-cert
|
||||
<<: [*kestrel-cert, *gc-medium]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
volumes:
|
||||
@@ -1125,13 +1195,14 @@ services:
|
||||
|
||||
# --- Slot 17: Orchestrator -------------------------------------------------
|
||||
jobengine:
|
||||
<<: *resources-heavy
|
||||
image: stellaops/orchestrator:dev
|
||||
container_name: stellaops-jobengine
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-heavy]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Authority__ResourceServer__Authority: "https://authority.stella-ops.local/"
|
||||
@@ -1162,12 +1233,13 @@ services:
|
||||
labels: *release-labels
|
||||
|
||||
jobengine-worker:
|
||||
<<: *resources-medium
|
||||
image: stellaops/orchestrator-worker:dev
|
||||
container_name: stellaops-jobengine-worker
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
<<: *kestrel-cert
|
||||
<<: [*kestrel-cert, *gc-medium]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
volumes:
|
||||
@@ -1182,13 +1254,14 @@ services:
|
||||
|
||||
# --- Slot 18: TaskRunner ---------------------------------------------------
|
||||
taskrunner-web:
|
||||
<<: *resources-light
|
||||
image: stellaops/taskrunner-web:dev
|
||||
container_name: stellaops-taskrunner-web
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
TASKRUNNER__STORAGE__DRIVER: "postgres"
|
||||
@@ -1213,12 +1286,13 @@ services:
|
||||
labels: *release-labels
|
||||
|
||||
taskrunner-worker:
|
||||
<<: *resources-light
|
||||
image: stellaops/taskrunner-worker:dev
|
||||
container_name: stellaops-taskrunner-worker
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
<<: *kestrel-cert
|
||||
<<: [*kestrel-cert, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
TASKRUNNER__STORAGE__DRIVER: "postgres"
|
||||
@@ -1245,13 +1319,14 @@ services:
|
||||
|
||||
# --- Slot 19: Scheduler ----------------------------------------------------
|
||||
scheduler-web:
|
||||
<<: *resources-medium
|
||||
image: stellaops/scheduler-web:dev
|
||||
container_name: stellaops-scheduler-web
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-medium]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Scheduler__Authority__Enabled: "false"
|
||||
@@ -1283,6 +1358,7 @@ services:
|
||||
labels: *release-labels
|
||||
|
||||
scheduler-worker:
|
||||
<<: *resources-medium
|
||||
image: stellaops/scheduler-worker:dev
|
||||
container_name: stellaops-scheduler-worker
|
||||
restart: unless-stopped
|
||||
@@ -1292,7 +1368,7 @@ services:
|
||||
valkey:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
<<: *kestrel-cert
|
||||
<<: [*kestrel-cert, *gc-medium]
|
||||
# Queue config (Redis transport)
|
||||
scheduler__queue__Kind: "Redis"
|
||||
scheduler__queue__Redis__ConnectionString: "cache.stella-ops.local:6379"
|
||||
@@ -1320,13 +1396,14 @@ services:
|
||||
|
||||
# --- Slot 20: Graph API ----------------------------------------------------
|
||||
graph-api:
|
||||
<<: *resources-medium
|
||||
image: stellaops/graph-api:dev
|
||||
container_name: stellaops-graph-api
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-medium]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Router__Enabled: "${GRAPH_ROUTER_ENABLED:-true}"
|
||||
@@ -1347,13 +1424,14 @@ services:
|
||||
|
||||
# --- Slot 21: Cartographer -------------------------------------------------
|
||||
cartographer:
|
||||
<<: *resources-light
|
||||
image: stellaops/cartographer:dev
|
||||
container_name: stellaops-cartographer
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Router__Enabled: "${CARTOGRAPHER_ROUTER_ENABLED:-true}"
|
||||
@@ -1374,13 +1452,14 @@ services:
|
||||
|
||||
# --- Slot 22: ReachGraph ---------------------------------------------------
|
||||
reachgraph-web:
|
||||
<<: *resources-light
|
||||
image: stellaops/reachgraph-web:dev
|
||||
container_name: stellaops-reachgraph-web
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Router__Enabled: "${REACHGRAPH_ROUTER_ENABLED:-true}"
|
||||
@@ -1401,13 +1480,14 @@ services:
|
||||
|
||||
# --- Slot 23: Timeline Indexer ---------------------------------------------
|
||||
timeline-indexer-web:
|
||||
<<: *resources-light
|
||||
image: stellaops/timeline-indexer-web:dev
|
||||
container_name: stellaops-timeline-indexer-web
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
TIMELINE_Postgres__Timeline__ConnectionString: *postgres-connection
|
||||
@@ -1428,12 +1508,13 @@ services:
|
||||
labels: *release-labels
|
||||
|
||||
timeline-indexer-worker:
|
||||
<<: *resources-light
|
||||
image: stellaops/timeline-indexer-worker:dev
|
||||
container_name: stellaops-timeline-indexer-worker
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
<<: *kestrel-cert
|
||||
<<: [*kestrel-cert, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
TIMELINE_Postgres__Timeline__ConnectionString: *postgres-connection
|
||||
@@ -1449,13 +1530,14 @@ services:
|
||||
|
||||
# --- Slot 24: Timeline ----------------------------------------------------
|
||||
timeline-web:
|
||||
<<: *resources-light
|
||||
image: stellaops/timeline-web:dev
|
||||
container_name: stellaops-timeline-web
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Authority__ResourceServer__Authority: "http://authority.stella-ops.local/"
|
||||
@@ -1481,13 +1563,14 @@ services:
|
||||
|
||||
# --- Slot 25: Findings Ledger ----------------------------------------------
|
||||
findings-ledger-web:
|
||||
<<: *resources-medium
|
||||
image: stellaops/findings-ledger-web:dev
|
||||
container_name: stellaops-findings-ledger-web
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-medium]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__FindingsLedger: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
@@ -1498,8 +1581,10 @@ services:
|
||||
findings__ledger__Authority__Audiences__0: ""
|
||||
findings__ledger__Authority__RequiredScopes__0: "findings:read"
|
||||
findings__ledger__Authority__BypassNetworks__0: "172.19.0.0/16"
|
||||
Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Debug"
|
||||
Logging__LogLevel__Microsoft.IdentityModel: "Debug"
|
||||
# Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Debug"
|
||||
Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Information"
|
||||
# Logging__LogLevel__Microsoft.IdentityModel: "Debug"
|
||||
Logging__LogLevel__Microsoft.IdentityModel: "Information"
|
||||
findings__ledger__Attachments__EncryptionKey: "IiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiI="
|
||||
findings__ledger__Attachments__SignedUrlBase: "http://findings.stella-ops.local/attachments"
|
||||
findings__ledger__Attachments__SignedUrlSecret: "dev-signed-url-secret"
|
||||
@@ -1524,13 +1609,14 @@ services:
|
||||
|
||||
# --- Slot 26: Doctor -------------------------------------------------------
|
||||
doctor-web:
|
||||
<<: *resources-light
|
||||
image: stellaops/doctor-web:dev
|
||||
container_name: stellaops-doctor-web
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Doctor__Authority__Issuer: "https://authority.stella-ops.local/"
|
||||
@@ -1554,13 +1640,14 @@ services:
|
||||
labels: *release-labels
|
||||
|
||||
doctor-scheduler:
|
||||
<<: *resources-light
|
||||
image: stellaops/doctor-scheduler:dev
|
||||
container_name: stellaops-doctor-scheduler
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:80"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Router__Enabled: "${DOCTOR_SCHEDULER_ROUTER_ENABLED:-true}"
|
||||
@@ -1578,13 +1665,14 @@ services:
|
||||
|
||||
# --- Slot 27: OpsMemory (src/AdvisoryAI/StellaOps.OpsMemory.WebService) ---
|
||||
opsmemory-web:
|
||||
<<: *resources-light
|
||||
image: stellaops/opsmemory-web:dev
|
||||
container_name: stellaops-opsmemory-web
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Router__Enabled: "${OPSMEMORY_ROUTER_ENABLED:-true}"
|
||||
@@ -1605,13 +1693,14 @@ services:
|
||||
|
||||
# --- Slot 28: Notifier ----------------------------------------------------
|
||||
notifier-web:
|
||||
<<: *resources-medium
|
||||
image: stellaops/notifier-web:dev
|
||||
container_name: stellaops-notifier-web
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-medium]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Authority__ResourceServer__Authority: "https://authority.stella-ops.local/"
|
||||
@@ -1641,12 +1730,13 @@ services:
|
||||
labels: *release-labels
|
||||
|
||||
notifier-worker:
|
||||
<<: *resources-light
|
||||
image: stellaops/notifier-worker:dev
|
||||
container_name: stellaops-notifier-worker
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
<<: *kestrel-cert
|
||||
<<: [*kestrel-cert, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
notifier__queue__Transport: "redis"
|
||||
@@ -1664,13 +1754,14 @@ services:
|
||||
|
||||
# --- Slot 29: Notify ------------------------------------------------------
|
||||
notify-web:
|
||||
<<: *resources-medium
|
||||
image: stellaops/notify-web:dev
|
||||
container_name: stellaops-notify-web
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-medium]
|
||||
DOTNET_ENVIRONMENT: Production
|
||||
NOTIFY_NOTIFY__STORAGE__DRIVER: "postgres"
|
||||
NOTIFY_NOTIFY__STORAGE__CONNECTIONSTRING: *postgres-connection
|
||||
@@ -1700,6 +1791,7 @@ services:
|
||||
|
||||
# --- Slot 30: Signer ------------------------------------------------------
|
||||
signer:
|
||||
<<: *resources-light
|
||||
image: stellaops/signer:dev
|
||||
container_name: stellaops-signer
|
||||
restart: unless-stopped
|
||||
@@ -1708,7 +1800,7 @@ services:
|
||||
- valkey
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8441"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__KeyManagement: *postgres-connection
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
Router__Enabled: "${SIGNER_ROUTER_ENABLED:-true}"
|
||||
@@ -1729,13 +1821,14 @@ services:
|
||||
|
||||
# --- Slot 31: SmRemote ----------------------------------------------------
|
||||
smremote:
|
||||
<<: *resources-light
|
||||
image: stellaops/smremote:dev
|
||||
container_name: stellaops-smremote
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Router__Enabled: "${SMREMOTE_ROUTER_ENABLED:-true}"
|
||||
@@ -1756,13 +1849,14 @@ services:
|
||||
|
||||
# --- Slot 32: AirGap Controller --------------------------------------------
|
||||
airgap-controller:
|
||||
<<: *resources-light
|
||||
image: stellaops/airgap-controller:dev
|
||||
container_name: stellaops-airgap-controller
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Router__Enabled: "${AIRGAP_CONTROLLER_ROUTER_ENABLED:-true}"
|
||||
@@ -1783,6 +1877,7 @@ services:
|
||||
|
||||
# --- Slot 33: AirGap Time -------------------------------------------------
|
||||
airgap-time:
|
||||
<<: *resources-light
|
||||
image: stellaops/airgap-time:dev
|
||||
container_name: stellaops-airgap-time
|
||||
restart: unless-stopped
|
||||
@@ -1790,7 +1885,7 @@ services:
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
Router__Enabled: "${AIRGAP_TIME_ROUTER_ENABLED:-true}"
|
||||
Router__Messaging__ConsumerGroup: "airgap-time"
|
||||
@@ -1810,13 +1905,14 @@ services:
|
||||
|
||||
# --- Slot 34: PacksRegistry -----------------------------------------------
|
||||
packsregistry-web:
|
||||
<<: *resources-light
|
||||
image: stellaops/packsregistry-web:dev
|
||||
container_name: stellaops-packsregistry-web
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
PACKSREGISTRY__STORAGE__DRIVER: "postgres"
|
||||
@@ -1840,12 +1936,13 @@ services:
|
||||
labels: *release-labels
|
||||
|
||||
packsregistry-worker:
|
||||
<<: *resources-light
|
||||
image: stellaops/packsregistry-worker:dev
|
||||
container_name: stellaops-packsregistry-worker
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
<<: *kestrel-cert
|
||||
<<: [*kestrel-cert, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
volumes:
|
||||
@@ -1860,13 +1957,14 @@ services:
|
||||
|
||||
# --- Slot 35: Registry Token -----------------------------------------------
|
||||
registry-token:
|
||||
<<: *resources-light
|
||||
image: stellaops/registry-token:dev
|
||||
container_name: stellaops-registry-token
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
RegistryTokenService__Signing__Issuer: "http://registry-token.stella-ops.local"
|
||||
RegistryTokenService__Signing__KeyPath: "/app/etc/certs/kestrel-dev.pfx"
|
||||
@@ -1898,13 +1996,14 @@ services:
|
||||
|
||||
# --- Slot 36: BinaryIndex --------------------------------------------------
|
||||
binaryindex-web:
|
||||
<<: *resources-light
|
||||
image: stellaops/binaryindex-web:dev
|
||||
container_name: stellaops-binaryindex-web
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Router__Enabled: "${BINARYINDEX_ROUTER_ENABLED:-true}"
|
||||
@@ -1925,6 +2024,7 @@ services:
|
||||
|
||||
# --- Slot 37: Issuer Directory ---------------------------------------------
|
||||
issuer-directory:
|
||||
<<: *resources-light
|
||||
image: stellaops/issuer-directory-web:dev
|
||||
container_name: stellaops-issuer-directory
|
||||
restart: unless-stopped
|
||||
@@ -1933,7 +2033,7 @@ services:
|
||||
- authority
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ISSUERDIRECTORY__AUTHORITY__ENABLED: "true"
|
||||
ISSUERDIRECTORY__AUTHORITY__ISSUER: "${AUTHORITY_ISSUER:-http://authority.stella-ops.local}"
|
||||
ISSUERDIRECTORY__AUTHORITY__AUDIENCES__0: "api://issuer-directory"
|
||||
@@ -1960,13 +2060,14 @@ services:
|
||||
|
||||
# --- Slot 38: Symbols ------------------------------------------------------
|
||||
symbols:
|
||||
<<: *resources-light
|
||||
image: stellaops/symbols:dev
|
||||
container_name: stellaops-symbols
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Authority__ResourceServer__Authority: "https://authority.stella-ops.local/"
|
||||
@@ -1991,13 +2092,14 @@ services:
|
||||
|
||||
# --- Slot 39: SbomService --------------------------------------------------
|
||||
sbomservice:
|
||||
<<: *resources-light
|
||||
image: stellaops/sbomservice:dev
|
||||
container_name: stellaops-sbomservice
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Router__Enabled: "${SBOMSERVICE_ROUTER_ENABLED:-true}"
|
||||
@@ -2018,13 +2120,14 @@ services:
|
||||
|
||||
# --- Slot 40: ExportCenter -------------------------------------------------
|
||||
export:
|
||||
<<: *resources-light
|
||||
image: stellaops/export:dev
|
||||
container_name: stellaops-export
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Export__AllowInMemoryRepositories: "true"
|
||||
@@ -2055,12 +2158,13 @@ services:
|
||||
labels: *release-labels
|
||||
|
||||
export-worker:
|
||||
<<: *resources-light
|
||||
image: stellaops/export-worker:dev
|
||||
container_name: stellaops-export-worker
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
<<: *kestrel-cert
|
||||
<<: [*kestrel-cert, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Export__AllowInMemoryRepositories: "true"
|
||||
@@ -2082,13 +2186,14 @@ services:
|
||||
|
||||
# --- Slot 41: Replay -------------------------------------------------------
|
||||
replay-web:
|
||||
<<: *resources-light
|
||||
image: stellaops/replay-web:dev
|
||||
container_name: stellaops-replay-web
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
REPLAY__STORAGE__DRIVER: "postgres"
|
||||
@@ -2113,13 +2218,14 @@ services:
|
||||
|
||||
# --- Slot 42: Integrations ------------------------------------------------
|
||||
integrations-web:
|
||||
<<: *resources-light
|
||||
image: stellaops/integrations-web:dev
|
||||
container_name: stellaops-integrations-web
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__IntegrationsDb: *postgres-connection
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
@@ -2151,6 +2257,7 @@ services:
|
||||
|
||||
# --- Slot 43: Zastava Webhook ----------------------------------------------
|
||||
zastava-webhook:
|
||||
<<: *resources-light
|
||||
image: stellaops/zastava-webhook:dev
|
||||
container_name: stellaops-zastava-webhook
|
||||
restart: unless-stopped
|
||||
@@ -2159,7 +2266,7 @@ services:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: *kestrel-cert
|
||||
<<: [*kestrel-cert, *gc-light]
|
||||
# Runtime authority (used by token provider for OIDC discovery)
|
||||
zastava__runtime__authority__Issuer: "https://authority.stella-ops.local/"
|
||||
zastava__runtime__authority__allowStaticTokenFallback: "true"
|
||||
@@ -2193,13 +2300,14 @@ services:
|
||||
|
||||
# --- Slot 44: Signals ------------------------------------------------------
|
||||
signals:
|
||||
<<: *resources-light
|
||||
image: stellaops/signals:dev
|
||||
container_name: stellaops-signals
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
Authority__ResourceServer__Authority: "https://authority.stella-ops.local/"
|
||||
@@ -2230,6 +2338,7 @@ services:
|
||||
|
||||
# --- Slot 45: Advisory AI --------------------------------------------------
|
||||
advisory-ai-web:
|
||||
<<: *resources-medium
|
||||
image: stellaops/advisory-ai-web:dev
|
||||
container_name: stellaops-advisory-ai-web
|
||||
restart: unless-stopped
|
||||
@@ -2237,7 +2346,7 @@ services:
|
||||
- scanner-web
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-medium]
|
||||
ADVISORYAI__AdvisoryAI__SbomBaseAddress: "${ADVISORY_AI_SBOM_BASEADDRESS:-http://scanner.stella-ops.local}"
|
||||
ADVISORYAI__AdvisoryAI__Queue__DirectoryPath: "/var/lib/advisory-ai/queue"
|
||||
ADVISORYAI__AdvisoryAI__Storage__PlanCacheDirectory: "/var/lib/advisory-ai/plans"
|
||||
@@ -2275,13 +2384,14 @@ services:
|
||||
labels: *release-labels
|
||||
|
||||
advisory-ai-worker:
|
||||
<<: *resources-medium
|
||||
image: stellaops/advisory-ai-worker:dev
|
||||
container_name: stellaops-advisory-ai-worker
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- scanner-web
|
||||
environment:
|
||||
<<: *kestrel-cert
|
||||
<<: [*kestrel-cert, *gc-medium]
|
||||
ADVISORYAI__AdvisoryAI__SbomBaseAddress: "${ADVISORY_AI_SBOM_BASEADDRESS:-http://scanner.stella-ops.local}"
|
||||
ADVISORYAI__AdvisoryAI__Queue__DirectoryPath: "/tmp/advisory-ai/queue"
|
||||
ADVISORYAI__AdvisoryAI__Storage__PlanCacheDirectory: "/tmp/advisory-ai/plans"
|
||||
@@ -2308,13 +2418,14 @@ services:
|
||||
|
||||
# --- Slot 46: Unknowns ----------------------------------------------------
|
||||
unknowns-web:
|
||||
<<: *resources-light
|
||||
image: stellaops/unknowns-web:dev
|
||||
container_name: stellaops-unknowns-web
|
||||
restart: unless-stopped
|
||||
depends_on: *depends-infra
|
||||
environment:
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
<<: [*kestrel-cert, *router-microservice-defaults]
|
||||
<<: [*kestrel-cert, *router-microservice-defaults, *gc-light]
|
||||
ConnectionStrings__Default: *postgres-connection
|
||||
ConnectionStrings__UnknownsDb: *postgres-connection
|
||||
ConnectionStrings__Redis: "cache.stella-ops.local:6379"
|
||||
|
||||
@@ -0,0 +1,141 @@
|
||||
# Sprint 019 — Container CPU Optimization
|
||||
|
||||
## Topic & Scope
|
||||
- Reduce idle CPU pressure from 62 Docker containers by adding resource limits, tuning GC, converting polling to event-driven patterns, and reducing log verbosity.
|
||||
- Working directory: `devops/compose/`, `src/JobEngine/`, `src/Graph/`, `src/Platform/`.
|
||||
- Expected evidence: compose validation, `docker stats` showing caps, reduced idle CPU.
|
||||
|
||||
## Dependencies & Concurrency
|
||||
- No upstream sprint dependencies.
|
||||
- Workstreams 1/2/4/6 (compose-only) are independent of workstreams 3A/3B/3D (C# changes).
|
||||
- C# workstreams (3A, 3B, 3D) are independent of each other (different modules).
|
||||
|
||||
## Documentation Prerequisites
|
||||
- `docs/modules/router/architecture.md` (Valkey messaging patterns).
|
||||
|
||||
## Delivery Tracker
|
||||
|
||||
### WS-1 — Resource Limits in Docker Compose
|
||||
Status: DONE
|
||||
Dependency: none
|
||||
Owners: Developer
|
||||
Task description:
|
||||
- Add three resource tier YAML anchors (heavy/medium/light) to compose file.
|
||||
- Apply `<<: *resources-{tier}` to all 59 .NET services.
|
||||
- Infrastructure services (postgres, valkey, rustfs, registry, rekor) remain unconstrained.
|
||||
|
||||
Completion criteria:
|
||||
- [x] Three resource anchors defined
|
||||
- [x] Tier assignments: Heavy (6), Medium (16), Light (37)
|
||||
- [x] `docker compose config` validates cleanly
|
||||
- [x] Infrastructure services have no deploy limits
|
||||
|
||||
### WS-2 — Logging Debug→Information
|
||||
Status: DONE
|
||||
Dependency: none
|
||||
Owners: Developer
|
||||
Task description:
|
||||
- Change 4 services from Debug to Information logging, keeping Debug as comments.
|
||||
- Services: router-gateway, platform, policy-engine, findings-ledger-web.
|
||||
|
||||
Completion criteria:
|
||||
- [x] Debug log levels commented out with Information active
|
||||
- [x] 4 services updated
|
||||
|
||||
### WS-3A — FirstSignalSnapshotWriter Valkey Pub/Sub
|
||||
Status: DONE
|
||||
Dependency: none
|
||||
Owners: Developer
|
||||
Task description:
|
||||
- Convert 10s polling to Valkey subscription on `notify:firstsignal:dirty`.
|
||||
- Add 60s fallback timer via `FallbackPollIntervalSeconds` option.
|
||||
- Fire Valkey notification from JobEngineEventPublisher on job lifecycle events.
|
||||
|
||||
Completion criteria:
|
||||
- [x] SemaphoreSlim + Valkey subscribe pattern implemented
|
||||
- [x] Fallback timer extended from 10s to 60s
|
||||
- [x] Event publisher fires dirty notification on orch.jobs channel events
|
||||
- [x] Project builds with 0 errors
|
||||
|
||||
### WS-3B — GraphAnalyticsHostedService Single Timer + Idle Skip
|
||||
Status: DONE
|
||||
Dependency: none
|
||||
Owners: Developer
|
||||
Task description:
|
||||
- Consolidate dual PeriodicTimer to single timer using Min(ClusterInterval, CentralityInterval).
|
||||
- Add idle-check: skip pipeline when no pending snapshots exist.
|
||||
- Add `SkipWhenIdle` option (default: true).
|
||||
|
||||
Completion criteria:
|
||||
- [x] Single timer replaces dual timers
|
||||
- [x] Idle check via IGraphSnapshotProvider.GetPendingSnapshotsAsync
|
||||
- [x] Debug log emitted when skipping
|
||||
- [x] Project builds with 0 errors
|
||||
|
||||
### WS-3D — EnvironmentSettingsRefreshService Valkey Pub/Sub
|
||||
Status: DONE
|
||||
Dependency: none
|
||||
Owners: Developer
|
||||
Task description:
|
||||
- Register IConnectionMultiplexer in Platform DI from ConnectionStrings:Redis.
|
||||
- Publish `notify:platform:envsettings:dirty` from PostgresEnvironmentSettingsStore on set/delete.
|
||||
- Convert EnvironmentSettingsRefreshService from Task.Delay(60s) to Valkey subscription with 300s fallback.
|
||||
|
||||
Completion criteria:
|
||||
- [x] IConnectionMultiplexer registered in Platform Program.cs
|
||||
- [x] Store publishes dirty notification (fire-and-forget)
|
||||
- [x] Refresh service uses SemaphoreSlim + Valkey subscribe
|
||||
- [x] Project builds with 0 errors
|
||||
|
||||
### WS-4 — Health Check Interval 60s (Configurable)
|
||||
Status: DONE
|
||||
Dependency: none
|
||||
Owners: Developer
|
||||
Task description:
|
||||
- Change healthcheck anchors from 30s to `${HEALTHCHECK_INTERVAL:-60s}`.
|
||||
- Propagates to all ~57 services using these anchors.
|
||||
|
||||
Completion criteria:
|
||||
- [x] Both healthcheck anchors updated
|
||||
- [x] Environment variable override supported
|
||||
- [x] Rendered config shows 60s intervals
|
||||
|
||||
### WS-5 — Messaging Transport (No Changes)
|
||||
Status: DONE
|
||||
Dependency: none
|
||||
Owners: Developer
|
||||
Task description:
|
||||
- Verified Valkey messaging transport is already subscription-based with SemaphoreSlim + fallback.
|
||||
- No changes needed.
|
||||
|
||||
Completion criteria:
|
||||
- [x] Verified ValkeyMessageQueue already uses push-first pattern
|
||||
|
||||
### WS-6 — GC Configuration
|
||||
Status: DONE
|
||||
Dependency: none
|
||||
Owners: Developer
|
||||
Task description:
|
||||
- Add three GC tuning YAML anchors (heavy/medium/light) with DOTNET_gcServer, GCConserveMemory, GCDynamicAdaptationMode.
|
||||
- Merge into all 59 .NET service environments.
|
||||
|
||||
Completion criteria:
|
||||
- [x] Three GC anchors defined
|
||||
- [x] Heavy/Medium use Server GC; Light uses Workstation GC
|
||||
- [x] GCDynamicAdaptationMode=1 (DATAS) on all services
|
||||
- [x] Not applied to non-.NET infrastructure
|
||||
|
||||
## Execution Log
|
||||
| Date (UTC) | Update | Owner |
|
||||
| --- | --- | --- |
|
||||
| 2026-03-10 | Sprint created. All workstreams completed. All 3 C# projects build clean. Compose validates clean. | Developer |
|
||||
|
||||
## Decisions & Risks
|
||||
- Resource limits are dev/QA defaults; production deployments should tune per hardware.
|
||||
- GCDynamicAdaptationMode=1 requires .NET 8+; all services use .NET 8/9.
|
||||
- Healthcheck interval override via HEALTHCHECK_INTERVAL env var for operator flexibility.
|
||||
- Valkey pub/sub notifications are fire-and-forget; fallback timers ensure correctness if missed.
|
||||
|
||||
## Next Checkpoints
|
||||
- Rebuild affected images (platform, jobengine, graph-indexer) after C# changes merge.
|
||||
- Verify `docker stats` shows resource caps in dev environment.
|
||||
@@ -8,37 +8,49 @@ namespace StellaOps.Graph.Indexer.Analytics;
|
||||
public sealed class GraphAnalyticsHostedService : BackgroundService
|
||||
{
|
||||
private readonly IGraphAnalyticsPipeline _pipeline;
|
||||
private readonly IGraphSnapshotProvider _snapshotProvider;
|
||||
private readonly GraphAnalyticsOptions _options;
|
||||
private readonly ILogger<GraphAnalyticsHostedService> _logger;
|
||||
|
||||
public GraphAnalyticsHostedService(
|
||||
IGraphAnalyticsPipeline pipeline,
|
||||
IGraphSnapshotProvider snapshotProvider,
|
||||
IOptions<GraphAnalyticsOptions> options,
|
||||
ILogger<GraphAnalyticsHostedService> logger)
|
||||
{
|
||||
_pipeline = pipeline ?? throw new ArgumentNullException(nameof(pipeline));
|
||||
_snapshotProvider = snapshotProvider ?? throw new ArgumentNullException(nameof(snapshotProvider));
|
||||
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
using var clusteringTimer = new PeriodicTimer(_options.ClusterInterval);
|
||||
using var centralityTimer = new PeriodicTimer(_options.CentralityInterval);
|
||||
var interval = _options.ClusterInterval < _options.CentralityInterval
|
||||
? _options.ClusterInterval
|
||||
: _options.CentralityInterval;
|
||||
|
||||
using var timer = new PeriodicTimer(interval);
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
var clusteringTask = clusteringTimer.WaitForNextTickAsync(stoppingToken).AsTask();
|
||||
var centralityTask = centralityTimer.WaitForNextTickAsync(stoppingToken).AsTask();
|
||||
|
||||
var completed = await Task.WhenAny(clusteringTask, centralityTask).ConfigureAwait(false);
|
||||
if (completed.IsCanceled || stoppingToken.IsCancellationRequested)
|
||||
if (!await timer.WaitForNextTickAsync(stoppingToken).ConfigureAwait(false))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
if (_options.SkipWhenIdle)
|
||||
{
|
||||
var pending = await _snapshotProvider.GetPendingSnapshotsAsync(stoppingToken).ConfigureAwait(false);
|
||||
if (pending.Count == 0)
|
||||
{
|
||||
_logger.LogDebug("graph-indexer: skipping analytics pipeline, no pending snapshots");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
await _pipeline.RunAsync(new GraphAnalyticsRunContext(ForceBackfill: false), stoppingToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
|
||||
@@ -28,4 +28,9 @@ public sealed class GraphAnalyticsOptions
|
||||
/// Whether to also write cluster ids onto graph node documents (alongside overlays).
|
||||
/// </summary>
|
||||
public bool WriteClusterAssignmentsToNodes { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// When true, skips the analytics pipeline if no pending snapshots exist.
|
||||
/// </summary>
|
||||
public bool SkipWhenIdle { get; set; } = true;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.JobEngine.Core.Domain.Events;
|
||||
using StellaOps.JobEngine.Infrastructure.Services;
|
||||
using StellaOps.Messaging.Transport.Valkey;
|
||||
using StackExchange.Redis;
|
||||
|
||||
namespace StellaOps.JobEngine.Infrastructure.Events;
|
||||
|
||||
@@ -14,19 +17,22 @@ public sealed class JobEngineEventPublisher : IEventPublisher
|
||||
private readonly IEventSigner? _eventSigner;
|
||||
private readonly EventPublishOptions _options;
|
||||
private readonly ILogger<JobEngineEventPublisher> _logger;
|
||||
private readonly IServiceProvider? _serviceProvider;
|
||||
|
||||
public JobEngineEventPublisher(
|
||||
IIdempotencyStore idempotencyStore,
|
||||
INotifierBus notifierBus,
|
||||
IOptions<EventPublishOptions> options,
|
||||
ILogger<JobEngineEventPublisher> logger,
|
||||
IEventSigner? eventSigner = null)
|
||||
IEventSigner? eventSigner = null,
|
||||
IServiceProvider? serviceProvider = null)
|
||||
{
|
||||
_idempotencyStore = idempotencyStore;
|
||||
_notifierBus = notifierBus;
|
||||
_eventSigner = eventSigner;
|
||||
_options = options.Value;
|
||||
_logger = logger;
|
||||
_serviceProvider = serviceProvider;
|
||||
}
|
||||
|
||||
public async Task<bool> PublishAsync(EventEnvelope envelope, CancellationToken cancellationToken = default)
|
||||
@@ -48,6 +54,14 @@ public sealed class JobEngineEventPublisher : IEventPublisher
|
||||
|
||||
await PublishWithRetryAsync(channel, message, cancellationToken);
|
||||
|
||||
// Fire Valkey notification for job-lifecycle events to wake
|
||||
// FirstSignalSnapshotWriter immediately instead of waiting for
|
||||
// its fallback poll interval.
|
||||
if (channel == "orch.jobs")
|
||||
{
|
||||
await TryNotifyFirstSignalDirtyAsync().ConfigureAwait(false);
|
||||
}
|
||||
|
||||
JobEngineMetrics.EventPublished(envelope.TenantId, envelope.EventType.ToEventTypeName());
|
||||
|
||||
_logger.LogInformation(
|
||||
@@ -206,6 +220,40 @@ public sealed class JobEngineEventPublisher : IEventPublisher
|
||||
System.Net.Http.HttpRequestException or
|
||||
System.IO.IOException;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Fire-and-forget notification to the Valkey pub/sub channel that wakes
|
||||
/// <see cref="FirstSignalSnapshotWriter"/>. This must never fail the
|
||||
/// event publish — all exceptions are swallowed and logged.
|
||||
/// </summary>
|
||||
private async Task TryNotifyFirstSignalDirtyAsync()
|
||||
{
|
||||
try
|
||||
{
|
||||
if (_serviceProvider is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var connectionFactory = _serviceProvider.GetService(typeof(ValkeyConnectionFactory)) as ValkeyConnectionFactory;
|
||||
if (connectionFactory is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var subscriber = await connectionFactory.GetSubscriberAsync().ConfigureAwait(false);
|
||||
await subscriber.PublishAsync(
|
||||
RedisChannel.Literal(FirstSignalSnapshotWriter.NotificationChannel),
|
||||
"1",
|
||||
CommandFlags.FireAndForget).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(
|
||||
ex,
|
||||
"Failed to publish first-signal dirty notification (fire-and-forget); snapshot writer will use fallback timer.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -28,6 +28,7 @@ public sealed class FirstSignalSnapshotWriterOptions
|
||||
public bool Enabled { get; set; }
|
||||
public string? TenantId { get; set; }
|
||||
public int PollIntervalSeconds { get; set; } = 10;
|
||||
public int FallbackPollIntervalSeconds { get; set; } = 60;
|
||||
public int MaxRunsPerTick { get; set; } = 50;
|
||||
public int LookbackMinutes { get; set; } = 60;
|
||||
}
|
||||
|
||||
@@ -7,23 +7,40 @@ using Microsoft.Extensions.Options;
|
||||
using StellaOps.JobEngine.Core.Domain;
|
||||
using StellaOps.JobEngine.Infrastructure.Options;
|
||||
using StellaOps.JobEngine.Infrastructure.Repositories;
|
||||
using StellaOps.Messaging.Transport.Valkey;
|
||||
using StackExchange.Redis;
|
||||
|
||||
namespace StellaOps.JobEngine.Infrastructure.Services;
|
||||
|
||||
public sealed class FirstSignalSnapshotWriter : BackgroundService
|
||||
{
|
||||
/// <summary>
|
||||
/// Valkey pub/sub channel used to notify this writer that new job-lifecycle
|
||||
/// data is available and it should wake up immediately.
|
||||
/// </summary>
|
||||
internal const string NotificationChannel = "notify:firstsignal:dirty";
|
||||
|
||||
private readonly IServiceScopeFactory _scopeFactory;
|
||||
private readonly IServiceProvider _serviceProvider;
|
||||
private readonly FirstSignalSnapshotWriterOptions _options;
|
||||
private readonly ILogger<FirstSignalSnapshotWriter> _logger;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
|
||||
/// <summary>
|
||||
/// Semaphore used for notification-based wakeup. Starts at 0 permits.
|
||||
/// Released (up to 1) when a Valkey pub/sub notification arrives.
|
||||
/// </summary>
|
||||
private readonly SemaphoreSlim _notificationSignal = new(0, 1);
|
||||
|
||||
public FirstSignalSnapshotWriter(
|
||||
IServiceScopeFactory scopeFactory,
|
||||
IServiceProvider serviceProvider,
|
||||
IOptions<FirstSignalOptions> options,
|
||||
ILogger<FirstSignalSnapshotWriter> logger,
|
||||
TimeProvider? timeProvider = null)
|
||||
{
|
||||
_scopeFactory = scopeFactory ?? throw new ArgumentNullException(nameof(scopeFactory));
|
||||
_serviceProvider = serviceProvider ?? throw new ArgumentNullException(nameof(serviceProvider));
|
||||
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value.SnapshotWriter;
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
@@ -48,13 +65,35 @@ public sealed class FirstSignalSnapshotWriter : BackgroundService
|
||||
|
||||
var tenantId = _options.TenantId.Trim();
|
||||
var lookback = TimeSpan.FromMinutes(Math.Max(1, _options.LookbackMinutes));
|
||||
var pollInterval = TimeSpan.FromSeconds(Math.Max(1, _options.PollIntervalSeconds));
|
||||
var fallbackInterval = TimeSpan.FromSeconds(Math.Max(1, _options.FallbackPollIntervalSeconds));
|
||||
var maxRuns = Math.Max(1, _options.MaxRunsPerTick);
|
||||
|
||||
using var timer = new PeriodicTimer(pollInterval);
|
||||
// Try to subscribe to Valkey pub/sub for immediate wake-up notifications.
|
||||
await TrySubscribeToValkeyNotificationsAsync(stoppingToken).ConfigureAwait(false);
|
||||
|
||||
while (await timer.WaitForNextTickAsync(stoppingToken).ConfigureAwait(false))
|
||||
using var timer = new PeriodicTimer(fallbackInterval);
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
// Wait for either a Valkey notification or the fallback timer to fire.
|
||||
try
|
||||
{
|
||||
await Task.WhenAny(
|
||||
_notificationSignal.WaitAsync(stoppingToken),
|
||||
timer.WaitForNextTickAsync(stoppingToken).AsTask()
|
||||
).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// Drain the semaphore to avoid duplicate wakeups from queued notifications.
|
||||
while (_notificationSignal.Wait(0))
|
||||
{
|
||||
// Intentionally empty: draining any extra permits.
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await WarmTenantAsync(tenantId, lookback, maxRuns, stoppingToken).ConfigureAwait(false);
|
||||
@@ -70,6 +109,50 @@ public sealed class FirstSignalSnapshotWriter : BackgroundService
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Attempts to subscribe to the Valkey notification channel. If Valkey is
|
||||
/// unavailable, logs a warning and falls back to timer-only mode.
|
||||
/// </summary>
|
||||
private async Task TrySubscribeToValkeyNotificationsAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
var connectionFactory = _serviceProvider.GetService<ValkeyConnectionFactory>();
|
||||
if (connectionFactory is null)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"ValkeyConnectionFactory not available; FirstSignalSnapshotWriter will use timer-only mode " +
|
||||
"(fallback interval {Interval}s).",
|
||||
_options.FallbackPollIntervalSeconds);
|
||||
return;
|
||||
}
|
||||
|
||||
var subscriber = await connectionFactory.GetSubscriberAsync(cancellationToken).ConfigureAwait(false);
|
||||
var channel = await subscriber
|
||||
.SubscribeAsync(RedisChannel.Literal(NotificationChannel))
|
||||
.ConfigureAwait(false);
|
||||
|
||||
channel.OnMessage(_ =>
|
||||
{
|
||||
try { _notificationSignal.Release(); }
|
||||
catch (SemaphoreFullException) { /* already signaled */ }
|
||||
});
|
||||
|
||||
_logger.LogInformation(
|
||||
"FirstSignalSnapshotWriter subscribed to Valkey channel {Channel} for immediate wake-up notifications.",
|
||||
NotificationChannel);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
ex,
|
||||
"Failed to subscribe to Valkey channel {Channel}; FirstSignalSnapshotWriter will use timer-only mode " +
|
||||
"(fallback interval {Interval}s).",
|
||||
NotificationChannel,
|
||||
_options.FallbackPollIntervalSeconds);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task WarmTenantAsync(
|
||||
string tenantId,
|
||||
TimeSpan lookback,
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
<ProjectReference Include="..\..\..\__Libraries\StellaOps.Infrastructure.Postgres\StellaOps.Infrastructure.Postgres.csproj" />
|
||||
<ProjectReference Include="..\..\..\Telemetry\StellaOps.Telemetry.Core\StellaOps.Telemetry.Core\StellaOps.Telemetry.Core.csproj"/>
|
||||
<ProjectReference Include="..\..\..\Router/__Libraries/StellaOps.Messaging\StellaOps.Messaging.csproj" />
|
||||
<ProjectReference Include="..\..\..\Router/__Libraries/StellaOps.Messaging.Transport.Valkey\StellaOps.Messaging.Transport.Valkey.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StackExchange.Redis;
|
||||
using StellaOps.Auth.Abstractions;
|
||||
using StellaOps.Auth.ServerIntegration;
|
||||
using StellaOps.Infrastructure.Postgres.Migrations;
|
||||
@@ -255,6 +256,15 @@ builder.Services.AddSingleton<ITranslationBundleProvider>(sp => sp.GetRequiredSe
|
||||
// Environment settings composer (3-layer merge: env vars -> YAML -> DB)
|
||||
builder.Services.AddSingleton<EnvironmentSettingsComposer>();
|
||||
builder.Services.AddSingleton<SetupStateDetector>();
|
||||
|
||||
// Valkey/Redis connection for pub/sub notifications (environment settings dirty signal)
|
||||
var redisCs = builder.Configuration["ConnectionStrings:Redis"];
|
||||
if (!string.IsNullOrWhiteSpace(redisCs))
|
||||
{
|
||||
builder.Services.AddSingleton<IConnectionMultiplexer>(
|
||||
sp => ConnectionMultiplexer.Connect(redisCs));
|
||||
}
|
||||
|
||||
builder.Services.AddHostedService<EnvironmentSettingsRefreshService>();
|
||||
|
||||
builder.Services.AddSingleton<IScoreEvaluationService, ScoreEvaluationService>();
|
||||
|
||||
@@ -4,52 +4,130 @@
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StackExchange.Redis;
|
||||
using StellaOps.Platform.WebService.Options;
|
||||
|
||||
namespace StellaOps.Platform.WebService.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Background service that periodically invalidates the <see cref="IEnvironmentSettingsStore"/>
|
||||
/// cache so DB-layer changes are picked up without restart.
|
||||
/// Background service that invalidates the <see cref="IEnvironmentSettingsStore"/>
|
||||
/// cache when notified via Valkey pub/sub or on a fallback periodic timer (default 300s).
|
||||
/// </summary>
|
||||
public sealed class EnvironmentSettingsRefreshService : BackgroundService
|
||||
{
|
||||
private readonly IEnvironmentSettingsStore _store;
|
||||
private readonly IOptionsMonitor<PlatformServiceOptions> _optionsMonitor;
|
||||
private readonly ILogger<EnvironmentSettingsRefreshService> _logger;
|
||||
private readonly IConnectionMultiplexer? _connectionMultiplexer;
|
||||
private readonly SemaphoreSlim _notificationSignal = new(0, 1);
|
||||
|
||||
private const int DefaultFallbackSeconds = 300;
|
||||
|
||||
private static readonly RedisChannel DirtyChannel =
|
||||
RedisChannel.Literal("notify:platform:envsettings:dirty");
|
||||
|
||||
private ISubscriber? _subscriber;
|
||||
|
||||
public EnvironmentSettingsRefreshService(
|
||||
IEnvironmentSettingsStore store,
|
||||
IOptionsMonitor<PlatformServiceOptions> optionsMonitor,
|
||||
ILogger<EnvironmentSettingsRefreshService> logger)
|
||||
ILogger<EnvironmentSettingsRefreshService> logger,
|
||||
IConnectionMultiplexer? connectionMultiplexer = null)
|
||||
{
|
||||
_store = store;
|
||||
_optionsMonitor = optionsMonitor;
|
||||
_logger = logger;
|
||||
_connectionMultiplexer = connectionMultiplexer;
|
||||
}
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
_logger.LogInformation("EnvironmentSettingsRefreshService started");
|
||||
|
||||
// Subscribe to Valkey dirty notifications (best-effort)
|
||||
try
|
||||
{
|
||||
if (_connectionMultiplexer is not null)
|
||||
{
|
||||
_subscriber = _connectionMultiplexer.GetSubscriber();
|
||||
await _subscriber.SubscribeAsync(DirtyChannel, (_, _) =>
|
||||
{
|
||||
// Release the semaphore to wake the loop immediately.
|
||||
// CurrentCount check avoids SemaphoreFullException when multiple
|
||||
// notifications arrive before the loop drains.
|
||||
if (_notificationSignal.CurrentCount == 0)
|
||||
{
|
||||
try { _notificationSignal.Release(); }
|
||||
catch (SemaphoreFullException) { /* already signalled */ }
|
||||
}
|
||||
}).ConfigureAwait(false);
|
||||
|
||||
_logger.LogInformation(
|
||||
"EnvironmentSettingsRefreshService subscribed to Valkey channel {Channel}",
|
||||
DirtyChannel);
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"EnvironmentSettingsRefreshService running without Valkey subscription (fallback timer only)");
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"EnvironmentSettingsRefreshService failed to subscribe to Valkey; falling back to timer-only mode");
|
||||
}
|
||||
|
||||
// Determine fallback interval
|
||||
var seconds = _optionsMonitor.CurrentValue.Cache.EnvironmentSettingsRefreshSeconds;
|
||||
if (seconds <= 0) seconds = DefaultFallbackSeconds;
|
||||
|
||||
using var timer = new PeriodicTimer(TimeSpan.FromSeconds(seconds));
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
var seconds = _optionsMonitor.CurrentValue.Cache.EnvironmentSettingsRefreshSeconds;
|
||||
if (seconds <= 0) seconds = 60;
|
||||
var semaphoreTask = _notificationSignal.WaitAsync(stoppingToken);
|
||||
var timerTask = timer.WaitForNextTickAsync(stoppingToken).AsTask();
|
||||
|
||||
try
|
||||
{
|
||||
await Task.Delay(TimeSpan.FromSeconds(seconds), stoppingToken).ConfigureAwait(false);
|
||||
await Task.WhenAny(semaphoreTask, timerTask).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (stoppingToken.IsCancellationRequested) break;
|
||||
|
||||
_store.InvalidateCache();
|
||||
_logger.LogDebug("Environment settings cache invalidated");
|
||||
}
|
||||
|
||||
_logger.LogInformation("EnvironmentSettingsRefreshService stopped");
|
||||
}
|
||||
|
||||
public override async Task StopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
// Unsubscribe from Valkey channel before stopping
|
||||
if (_subscriber is not null)
|
||||
{
|
||||
try
|
||||
{
|
||||
await _subscriber.UnsubscribeAsync(DirtyChannel).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Error unsubscribing from Valkey channel during shutdown");
|
||||
}
|
||||
}
|
||||
|
||||
await base.StopAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public override void Dispose()
|
||||
{
|
||||
_notificationSignal.Dispose();
|
||||
base.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StackExchange.Redis;
|
||||
using StellaOps.Platform.Database.EfCore.Context;
|
||||
using StellaOps.Platform.Database.Postgres;
|
||||
|
||||
@@ -19,10 +20,13 @@ public sealed class PostgresEnvironmentSettingsStore : IEnvironmentSettingsStore
|
||||
{
|
||||
private readonly NpgsqlDataSource _dataSource;
|
||||
private readonly ILogger<PostgresEnvironmentSettingsStore> _logger;
|
||||
private readonly ISubscriber? _subscriber;
|
||||
private volatile IReadOnlyDictionary<string, string>? _cache;
|
||||
private readonly object _cacheLock = new();
|
||||
|
||||
private const int DefaultCommandTimeoutSeconds = 30;
|
||||
private static readonly RedisChannel DirtyChannel =
|
||||
RedisChannel.Literal("notify:platform:envsettings:dirty");
|
||||
|
||||
private const string UpsertSql = """
|
||||
INSERT INTO platform.environment_settings (key, value, updated_at, updated_by)
|
||||
@@ -32,10 +36,12 @@ public sealed class PostgresEnvironmentSettingsStore : IEnvironmentSettingsStore
|
||||
|
||||
public PostgresEnvironmentSettingsStore(
|
||||
NpgsqlDataSource dataSource,
|
||||
ILogger<PostgresEnvironmentSettingsStore>? logger = null)
|
||||
ILogger<PostgresEnvironmentSettingsStore>? logger = null,
|
||||
IConnectionMultiplexer? connectionMultiplexer = null)
|
||||
{
|
||||
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
|
||||
_logger = logger ?? Microsoft.Extensions.Logging.Abstractions.NullLogger<PostgresEnvironmentSettingsStore>.Instance;
|
||||
_subscriber = connectionMultiplexer?.GetSubscriber();
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyDictionary<string, string>> GetAllAsync(CancellationToken ct = default)
|
||||
@@ -107,6 +113,7 @@ public sealed class PostgresEnvironmentSettingsStore : IEnvironmentSettingsStore
|
||||
ct).ConfigureAwait(false);
|
||||
|
||||
InvalidateCache();
|
||||
PublishDirtyNotification();
|
||||
|
||||
_logger.LogInformation("Environment setting {Key} updated by {UpdatedBy}", key, updatedBy);
|
||||
}
|
||||
@@ -129,6 +136,7 @@ public sealed class PostgresEnvironmentSettingsStore : IEnvironmentSettingsStore
|
||||
dbContext.EnvironmentSettings.Remove(entity);
|
||||
var rows = await dbContext.SaveChangesAsync(ct).ConfigureAwait(false);
|
||||
InvalidateCache();
|
||||
PublishDirtyNotification();
|
||||
|
||||
_logger.LogInformation("Environment setting {Key} deleted ({Rows} rows affected)", key, rows);
|
||||
}
|
||||
@@ -145,4 +153,17 @@ public sealed class PostgresEnvironmentSettingsStore : IEnvironmentSettingsStore
|
||||
_cache = null;
|
||||
}
|
||||
}
|
||||
|
||||
private void PublishDirtyNotification()
|
||||
{
|
||||
try
|
||||
{
|
||||
_subscriber?.PublishAsync(DirtyChannel, "1", CommandFlags.FireAndForget);
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Fire-and-forget: Valkey notification is best-effort.
|
||||
// The background refresh service will still pick up changes on the fallback timer.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.AspNetCore.OpenApi" />
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore" />
|
||||
<PackageReference Include="StackExchange.Redis" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
Reference in New Issue
Block a user