From 166745f9f9d6d2157cb5abe184db9d963a5e5712 Mon Sep 17 00:00:00 2001 From: master <> Date: Tue, 10 Mar 2026 02:16:19 +0200 Subject: [PATCH] Reduce idle CPU across 62 containers (phase 1) - Add resource limits (heavy/medium/light tiers) to all 59 .NET services - Add .NET GC tuning (server/workstation GC, DATAS, conserve memory) - Convert FirstSignalSnapshotWriter from 10s polling to Valkey pub/sub - Convert EnvironmentSettingsRefreshService from 60s polling to Valkey pub/sub - Consolidate GraphAnalytics dual timers to single timer with idle-skip - Increase healthcheck interval from 30s to 60s (configurable) - Reduce debug logging to Information on 4 high-traffic services Co-Authored-By: Claude Opus 4.6 --- devops/compose/docker-compose.stella-ops.yml | 253 +++++++++++++----- ...0_019_DevOps_container_cpu_optimization.md | 141 ++++++++++ .../Analytics/GraphAnalyticsHostedService.cs | 26 +- .../Analytics/GraphAnalyticsOptions.cs | 5 + .../Events/JobEngineEventPublisher.cs | 50 +++- .../Options/FirstSignalOptions.cs | 1 + .../Services/FirstSignalSnapshotWriter.cs | 89 +++++- .../StellaOps.JobEngine.Infrastructure.csproj | 1 + .../StellaOps.Platform.WebService/Program.cs | 10 + .../EnvironmentSettingsRefreshService.cs | 90 ++++++- .../PostgresEnvironmentSettingsStore.cs | 23 +- .../StellaOps.Platform.WebService.csproj | 1 + 12 files changed, 601 insertions(+), 89 deletions(-) create mode 100644 docs/implplan/SPRINT_20260310_019_DevOps_container_cpu_optimization.md diff --git a/devops/compose/docker-compose.stella-ops.yml b/devops/compose/docker-compose.stella-ops.yml index d63fb9ba5..41bc7f11f 100644 --- a/devops/compose/docker-compose.stella-ops.yml +++ b/devops/compose/docker-compose.stella-ops.yml @@ -74,18 +74,60 @@ x-depends-infra: &depends-infra condition: service_healthy x-healthcheck-tcp: &healthcheck-tcp - interval: 30s + interval: ${HEALTHCHECK_INTERVAL:-60s} timeout: 5s retries: 3 start_period: 15s x-healthcheck-worker: &healthcheck-worker test: ["CMD", "/usr/local/bin/healthcheck.sh"] - interval: 30s + interval: ${HEALTHCHECK_INTERVAL:-60s} timeout: 5s start_period: 30s retries: 3 +# --------------------------------------------------------------------------- +# Resource limit tiers (Workstream 1: CPU optimization) +# --------------------------------------------------------------------------- +x-resources-heavy: &resources-heavy + deploy: + resources: + limits: + cpus: "1.0" + memory: 2G + +x-resources-medium: &resources-medium + deploy: + resources: + limits: + cpus: "0.50" + memory: 1G + +x-resources-light: &resources-light + deploy: + resources: + limits: + cpus: "0.25" + memory: 512M + +# --------------------------------------------------------------------------- +# .NET GC tuning tiers (Workstream 6: GC configuration) +# --------------------------------------------------------------------------- +x-gc-heavy: &gc-heavy + DOTNET_gcServer: "1" + DOTNET_GCConserveMemory: "5" + DOTNET_GCDynamicAdaptationMode: "1" + +x-gc-medium: &gc-medium + DOTNET_gcServer: "1" + DOTNET_GCConserveMemory: "7" + DOTNET_GCDynamicAdaptationMode: "1" + +x-gc-light: &gc-light + DOTNET_gcServer: "0" + DOTNET_GCConserveMemory: "9" + DOTNET_GCDynamicAdaptationMode: "1" + networks: stellaops: driver: bridge @@ -273,6 +315,7 @@ services: # --- Slot 0: Router Gateway (Front Door) ----------------------------------- router-gateway: + <<: *resources-heavy image: stellaops/router-gateway:dev container_name: stellaops-router-gateway restart: unless-stopped @@ -282,7 +325,7 @@ services: condition: service_completed_successfully environment: ASPNETCORE_URLS: "http://0.0.0.0:8080" - <<: *kestrel-cert + <<: [*kestrel-cert, *gc-heavy] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Gateway__Auth__DpopEnabled: "false" @@ -307,9 +350,12 @@ services: Gateway__Auth__IdentityEnvelopeSigningKey: "${STELLAOPS_IDENTITY_ENVELOPE_SIGNING_KEY}" # Audience validation disabled until authority includes aud in access tokens # Gateway__Auth__Authority__Audiences__0: "stella-ops-api" - Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Debug" - Logging__LogLevel__Microsoft.IdentityModel: "Debug" - Logging__LogLevel__StellaOps: "Debug" + # Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Debug" + Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Information" + # Logging__LogLevel__Microsoft.IdentityModel: "Debug" + Logging__LogLevel__Microsoft.IdentityModel: "Information" + # Logging__LogLevel__StellaOps: "Debug" + Logging__LogLevel__StellaOps: "Information" volumes: - *cert-volume - console-dist:/app/wwwroot:ro @@ -331,13 +377,14 @@ services: # --- Slot 1: Platform ------------------------------------------------------ platform: + <<: *resources-heavy image: stellaops/platform:dev container_name: stellaops-platform restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-heavy] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Platform__Authority__Issuer: "https://authority.stella-ops.local/" @@ -345,9 +392,12 @@ services: Platform__Authority__BypassNetworks__0: "172.0.0.0/8" Platform__Authority__BypassNetworks__1: "127.0.0.0/8" Platform__Authority__BypassNetworks__2: "::1/128" - Logging__LogLevel__StellaOps.Auth: "Debug" - Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Debug" - Logging__LogLevel__Microsoft.AspNetCore.Authorization: "Debug" + # Logging__LogLevel__StellaOps.Auth: "Debug" + Logging__LogLevel__StellaOps.Auth: "Information" + # Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Debug" + Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Information" + # Logging__LogLevel__Microsoft.AspNetCore.Authorization: "Debug" + Logging__LogLevel__Microsoft.AspNetCore.Authorization: "Information" Platform__Storage__Driver: "postgres" Platform__Storage__PostgresConnectionString: *postgres-connection Platform__EnvironmentSettings__AuthorizeEndpoint: "https://stella-ops.local/connect/authorize" @@ -418,6 +468,7 @@ services: # --- Slot 2: Authority ----------------------------------------------------- authority: + <<: *resources-heavy image: stellaops/authority:dev container_name: stellaops-authority restart: unless-stopped @@ -464,7 +515,7 @@ services: STELLAOPS_AUTHORITY_AUTHORITY__TENANTS__0__ID: "demo-prod" STELLAOPS_AUTHORITY_AUTHORITY__TENANTS__0__DISPLAYNAME: "Demo Production" STELLAOPS_AUTHORITY_AUTHORITY__TENANTS__0__STATUS: "active" - <<: *router-microservice-defaults + <<: [*router-microservice-defaults, *gc-heavy] Router__Enabled: "${AUTHORITY_ROUTER_ENABLED:-true}" Router__Messaging__ConsumerGroup: "authority" volumes: @@ -484,13 +535,14 @@ services: # --- Slot 3: Gateway ------------------------------------------------------- gateway: + <<: *resources-light image: stellaops/gateway:dev container_name: stellaops-gateway restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:80;http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Gateway__Auth__DpopEnabled: "false" @@ -516,6 +568,7 @@ services: # --- Slot 4: Attestor ------------------------------------------------------ attestor: + <<: *resources-light image: stellaops/attestor:dev container_name: stellaops-attestor restart: unless-stopped @@ -523,7 +576,7 @@ services: - signer environment: ASPNETCORE_URLS: "http://+:8442" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ATTESTOR_ATTESTOR__SIGNER__BASEURL: "http://signer.stella-ops.local" ATTESTOR_ATTESTOR__POSTGRES__CONNECTIONSTRING: *postgres-connection ConnectionStrings__Default: *postgres-connection @@ -546,6 +599,7 @@ services: # --- Slot 5: Attestor TileProxy -------------------------------------------- attestor-tileproxy: + <<: *resources-light image: stellaops/attestor-tileproxy:dev container_name: stellaops-attestor-tileproxy restart: unless-stopped @@ -553,7 +607,7 @@ services: - attestor environment: ASPNETCORE_URLS: "http://+:8080" - <<: *kestrel-cert + <<: [*kestrel-cert, *gc-light] TILE_PROXY__tile_proxy__UpstreamUrl: "https://rekor.sigstore.dev" TILE_PROXY__tile_proxy__Origin: "stellaops-tileproxy" TILE_PROXY__tile_proxy__Cache__BasePath: "/var/cache/stellaops/tiles" @@ -573,13 +627,14 @@ services: # --- Slot 6: Evidence Locker ------------------------------------------------ evidence-locker-web: + <<: *resources-light image: stellaops/evidence-locker-web:dev container_name: stellaops-evidence-locker-web restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] EvidenceLocker__Database__ConnectionString: *postgres-connection EvidenceLocker__Database__ApplyMigrationsAtStartup: "true" EvidenceLocker__ObjectStore__Kind: "FileSystem" @@ -619,12 +674,13 @@ services: labels: *release-labels evidence-locker-worker: + <<: *resources-light image: stellaops/evidence-locker-worker:dev container_name: stellaops-evidence-locker-worker restart: unless-stopped depends_on: *depends-infra environment: - <<: *kestrel-cert + <<: [*kestrel-cert, *gc-light] EvidenceLocker__Database__ConnectionString: *postgres-connection EvidenceLocker__Database__ApplyMigrationsAtStartup: "true" EvidenceLocker__ObjectStore__Kind: "FileSystem" @@ -666,6 +722,7 @@ services: labels: *release-labels scanner-web: + <<: *resources-heavy image: stellaops/scanner-web:dev container_name: stellaops-scanner-web restart: unless-stopped @@ -680,7 +737,7 @@ services: condition: service_healthy environment: ASPNETCORE_URLS: "http://+:8444" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-heavy] SCANNER_SCANNER__PLUGINS__BASEDIRECTORY: "/tmp/stellaops" SCANNER_SCANNER__STORAGE__DRIVER: "postgres" SCANNER_SCANNER__STORAGE__DSN: *postgres-connection @@ -737,6 +794,7 @@ services: labels: *release-labels scanner-worker: + <<: *resources-heavy image: stellaops/scanner-worker:dev container_name: stellaops-scanner-worker restart: unless-stopped @@ -750,7 +808,7 @@ services: rustfs: condition: service_healthy environment: - <<: *kestrel-cert + <<: [*kestrel-cert, *gc-heavy] # Scanner worker options Scanner__Worker__Authority__Enabled: "false" BinaryIndex__Enabled: "false" @@ -786,6 +844,7 @@ services: # --- Slot 9: Concelier ----------------------------------------------------- concelier: + <<: *resources-medium image: stellaops/concelier:dev container_name: stellaops-concelier restart: unless-stopped @@ -798,7 +857,7 @@ services: condition: service_healthy environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-medium] CONCELIER_PLUGINS__BASEDIRECTORY: "/tmp/stellaops" CONCELIER_POSTGRESSTORAGE__CONNECTIONSTRING: *postgres-connection CONCELIER_POSTGRESSTORAGE__ENABLED: "true" @@ -834,13 +893,14 @@ services: # --- Slot 10: Excititor ---------------------------------------------------- excititor: + <<: *resources-medium image: stellaops/excititor:dev container_name: stellaops-excititor restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-medium] # Postgres options (section: Postgres:Excititor) Postgres__Excititor__ConnectionString: *postgres-connection Postgres__Excititor__SchemaName: "vex" @@ -869,6 +929,7 @@ services: labels: *release-labels excititor-worker: + <<: *resources-medium image: stellaops/excititor-worker:dev container_name: stellaops-excititor-worker restart: unless-stopped @@ -878,7 +939,7 @@ services: valkey: condition: service_healthy environment: - <<: *kestrel-cert + <<: [*kestrel-cert, *gc-medium] # Postgres options (section: Postgres:Excititor) Postgres__Excititor__ConnectionString: *postgres-connection Postgres__Excititor__SchemaName: "vex" @@ -903,13 +964,14 @@ services: # --- Slot 11: VexHub ------------------------------------------------------- vexhub-web: + <<: *resources-light image: stellaops/vexhub-web:dev container_name: stellaops-vexhub-web restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Postgres__ConnectionString: *postgres-connection @@ -932,13 +994,14 @@ services: # --- Slot 12: VexLens ------------------------------------------------------ vexlens-web: + <<: *resources-light image: stellaops/vexlens-web:dev container_name: stellaops-vexlens-web restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Router__Enabled: "${VEXLENS_ROUTER_ENABLED:-true}" @@ -959,13 +1022,14 @@ services: # --- Slot 13: VulnExplorer (api) [src/Findings/StellaOps.VulnExplorer.Api] --- api: + <<: *resources-light image: stellaops/api:dev container_name: stellaops-api restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Router__Enabled: "${VULNEXPLORER_ROUTER_ENABLED:-true}" @@ -986,13 +1050,14 @@ services: # --- Slot 14: Policy Engine ------------------------------------------------ policy-engine: + <<: *resources-medium image: stellaops/policy-engine:dev container_name: stellaops-policy-engine restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-medium] STELLAOPS_POLICY_ENGINE_Postgres__Policy__ConnectionString: *postgres-connection STELLAOPS_POLICY_ENGINE_ConnectionStrings__Redis: "cache.stella-ops.local:6379" STELLAOPS_POLICY_ENGINE_PolicyEngine__ResourceServer__Authority: "https://authority.stella-ops.local/" @@ -1012,8 +1077,10 @@ services: PolicyEngine__ResourceServer__BypassNetworks__0: "172.19.0.0/16" PolicyEngine__ResourceServer__BypassNetworks__1: "127.0.0.1/32" PolicyEngine__ResourceServer__BypassNetworks__2: "::1/128" - Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Debug" - Logging__LogLevel__Microsoft.IdentityModel: "Debug" + # Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Debug" + Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Information" + # Logging__LogLevel__Microsoft.IdentityModel: "Debug" + Logging__LogLevel__Microsoft.IdentityModel: "Information" Router__Enabled: "${POLICY_ENGINE_ROUTER_ENABLED:-true}" Router__Messaging__ConsumerGroup: "policy-engine" volumes: @@ -1033,13 +1100,14 @@ services: # --- Slot 15: Policy Gateway ----------------------------------------------- policy: + <<: *resources-medium image: stellaops/policy:dev container_name: stellaops-policy restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8084" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-medium] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Postgres__Policy__ConnectionString: *postgres-connection @@ -1077,13 +1145,14 @@ services: # --- Slot 16: RiskEngine [src/Findings/StellaOps.RiskEngine.*] --------------- riskengine-web: + <<: *resources-medium image: stellaops/riskengine-web:dev container_name: stellaops-riskengine-web restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-medium] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" RISKENGINE__STORAGE__DRIVER: "postgres" @@ -1105,12 +1174,13 @@ services: labels: *release-labels riskengine-worker: + <<: *resources-medium image: stellaops/riskengine-worker:dev container_name: stellaops-riskengine-worker restart: unless-stopped depends_on: *depends-infra environment: - <<: *kestrel-cert + <<: [*kestrel-cert, *gc-medium] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" volumes: @@ -1125,13 +1195,14 @@ services: # --- Slot 17: Orchestrator ------------------------------------------------- jobengine: + <<: *resources-heavy image: stellaops/orchestrator:dev container_name: stellaops-jobengine restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-heavy] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Authority__ResourceServer__Authority: "https://authority.stella-ops.local/" @@ -1162,12 +1233,13 @@ services: labels: *release-labels jobengine-worker: + <<: *resources-medium image: stellaops/orchestrator-worker:dev container_name: stellaops-jobengine-worker restart: unless-stopped depends_on: *depends-infra environment: - <<: *kestrel-cert + <<: [*kestrel-cert, *gc-medium] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" volumes: @@ -1182,13 +1254,14 @@ services: # --- Slot 18: TaskRunner --------------------------------------------------- taskrunner-web: + <<: *resources-light image: stellaops/taskrunner-web:dev container_name: stellaops-taskrunner-web restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" TASKRUNNER__STORAGE__DRIVER: "postgres" @@ -1213,12 +1286,13 @@ services: labels: *release-labels taskrunner-worker: + <<: *resources-light image: stellaops/taskrunner-worker:dev container_name: stellaops-taskrunner-worker restart: unless-stopped depends_on: *depends-infra environment: - <<: *kestrel-cert + <<: [*kestrel-cert, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" TASKRUNNER__STORAGE__DRIVER: "postgres" @@ -1245,13 +1319,14 @@ services: # --- Slot 19: Scheduler ---------------------------------------------------- scheduler-web: + <<: *resources-medium image: stellaops/scheduler-web:dev container_name: stellaops-scheduler-web restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-medium] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Scheduler__Authority__Enabled: "false" @@ -1283,6 +1358,7 @@ services: labels: *release-labels scheduler-worker: + <<: *resources-medium image: stellaops/scheduler-worker:dev container_name: stellaops-scheduler-worker restart: unless-stopped @@ -1292,7 +1368,7 @@ services: valkey: condition: service_healthy environment: - <<: *kestrel-cert + <<: [*kestrel-cert, *gc-medium] # Queue config (Redis transport) scheduler__queue__Kind: "Redis" scheduler__queue__Redis__ConnectionString: "cache.stella-ops.local:6379" @@ -1320,13 +1396,14 @@ services: # --- Slot 20: Graph API ---------------------------------------------------- graph-api: + <<: *resources-medium image: stellaops/graph-api:dev container_name: stellaops-graph-api restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-medium] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Router__Enabled: "${GRAPH_ROUTER_ENABLED:-true}" @@ -1347,13 +1424,14 @@ services: # --- Slot 21: Cartographer ------------------------------------------------- cartographer: + <<: *resources-light image: stellaops/cartographer:dev container_name: stellaops-cartographer restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Router__Enabled: "${CARTOGRAPHER_ROUTER_ENABLED:-true}" @@ -1374,13 +1452,14 @@ services: # --- Slot 22: ReachGraph --------------------------------------------------- reachgraph-web: + <<: *resources-light image: stellaops/reachgraph-web:dev container_name: stellaops-reachgraph-web restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Router__Enabled: "${REACHGRAPH_ROUTER_ENABLED:-true}" @@ -1401,13 +1480,14 @@ services: # --- Slot 23: Timeline Indexer --------------------------------------------- timeline-indexer-web: + <<: *resources-light image: stellaops/timeline-indexer-web:dev container_name: stellaops-timeline-indexer-web restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" TIMELINE_Postgres__Timeline__ConnectionString: *postgres-connection @@ -1428,12 +1508,13 @@ services: labels: *release-labels timeline-indexer-worker: + <<: *resources-light image: stellaops/timeline-indexer-worker:dev container_name: stellaops-timeline-indexer-worker restart: unless-stopped depends_on: *depends-infra environment: - <<: *kestrel-cert + <<: [*kestrel-cert, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" TIMELINE_Postgres__Timeline__ConnectionString: *postgres-connection @@ -1449,13 +1530,14 @@ services: # --- Slot 24: Timeline ---------------------------------------------------- timeline-web: + <<: *resources-light image: stellaops/timeline-web:dev container_name: stellaops-timeline-web restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Authority__ResourceServer__Authority: "http://authority.stella-ops.local/" @@ -1481,13 +1563,14 @@ services: # --- Slot 25: Findings Ledger ---------------------------------------------- findings-ledger-web: + <<: *resources-medium image: stellaops/findings-ledger-web:dev container_name: stellaops-findings-ledger-web restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-medium] ConnectionStrings__Default: *postgres-connection ConnectionStrings__FindingsLedger: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" @@ -1498,8 +1581,10 @@ services: findings__ledger__Authority__Audiences__0: "" findings__ledger__Authority__RequiredScopes__0: "findings:read" findings__ledger__Authority__BypassNetworks__0: "172.19.0.0/16" - Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Debug" - Logging__LogLevel__Microsoft.IdentityModel: "Debug" + # Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Debug" + Logging__LogLevel__Microsoft.AspNetCore.Authentication: "Information" + # Logging__LogLevel__Microsoft.IdentityModel: "Debug" + Logging__LogLevel__Microsoft.IdentityModel: "Information" findings__ledger__Attachments__EncryptionKey: "IiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiI=" findings__ledger__Attachments__SignedUrlBase: "http://findings.stella-ops.local/attachments" findings__ledger__Attachments__SignedUrlSecret: "dev-signed-url-secret" @@ -1524,13 +1609,14 @@ services: # --- Slot 26: Doctor ------------------------------------------------------- doctor-web: + <<: *resources-light image: stellaops/doctor-web:dev container_name: stellaops-doctor-web restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Doctor__Authority__Issuer: "https://authority.stella-ops.local/" @@ -1554,13 +1640,14 @@ services: labels: *release-labels doctor-scheduler: + <<: *resources-light image: stellaops/doctor-scheduler:dev container_name: stellaops-doctor-scheduler restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:80" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Router__Enabled: "${DOCTOR_SCHEDULER_ROUTER_ENABLED:-true}" @@ -1578,13 +1665,14 @@ services: # --- Slot 27: OpsMemory (src/AdvisoryAI/StellaOps.OpsMemory.WebService) --- opsmemory-web: + <<: *resources-light image: stellaops/opsmemory-web:dev container_name: stellaops-opsmemory-web restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Router__Enabled: "${OPSMEMORY_ROUTER_ENABLED:-true}" @@ -1605,13 +1693,14 @@ services: # --- Slot 28: Notifier ---------------------------------------------------- notifier-web: + <<: *resources-medium image: stellaops/notifier-web:dev container_name: stellaops-notifier-web restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-medium] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Authority__ResourceServer__Authority: "https://authority.stella-ops.local/" @@ -1641,12 +1730,13 @@ services: labels: *release-labels notifier-worker: + <<: *resources-light image: stellaops/notifier-worker:dev container_name: stellaops-notifier-worker restart: unless-stopped depends_on: *depends-infra environment: - <<: *kestrel-cert + <<: [*kestrel-cert, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" notifier__queue__Transport: "redis" @@ -1664,13 +1754,14 @@ services: # --- Slot 29: Notify ------------------------------------------------------ notify-web: + <<: *resources-medium image: stellaops/notify-web:dev container_name: stellaops-notify-web restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-medium] DOTNET_ENVIRONMENT: Production NOTIFY_NOTIFY__STORAGE__DRIVER: "postgres" NOTIFY_NOTIFY__STORAGE__CONNECTIONSTRING: *postgres-connection @@ -1700,6 +1791,7 @@ services: # --- Slot 30: Signer ------------------------------------------------------ signer: + <<: *resources-light image: stellaops/signer:dev container_name: stellaops-signer restart: unless-stopped @@ -1708,7 +1800,7 @@ services: - valkey environment: ASPNETCORE_URLS: "http://+:8441" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__KeyManagement: *postgres-connection ConnectionStrings__Default: *postgres-connection Router__Enabled: "${SIGNER_ROUTER_ENABLED:-true}" @@ -1729,13 +1821,14 @@ services: # --- Slot 31: SmRemote ---------------------------------------------------- smremote: + <<: *resources-light image: stellaops/smremote:dev container_name: stellaops-smremote restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Router__Enabled: "${SMREMOTE_ROUTER_ENABLED:-true}" @@ -1756,13 +1849,14 @@ services: # --- Slot 32: AirGap Controller -------------------------------------------- airgap-controller: + <<: *resources-light image: stellaops/airgap-controller:dev container_name: stellaops-airgap-controller restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Router__Enabled: "${AIRGAP_CONTROLLER_ROUTER_ENABLED:-true}" @@ -1783,6 +1877,7 @@ services: # --- Slot 33: AirGap Time ------------------------------------------------- airgap-time: + <<: *resources-light image: stellaops/airgap-time:dev container_name: stellaops-airgap-time restart: unless-stopped @@ -1790,7 +1885,7 @@ services: depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection Router__Enabled: "${AIRGAP_TIME_ROUTER_ENABLED:-true}" Router__Messaging__ConsumerGroup: "airgap-time" @@ -1810,13 +1905,14 @@ services: # --- Slot 34: PacksRegistry ----------------------------------------------- packsregistry-web: + <<: *resources-light image: stellaops/packsregistry-web:dev container_name: stellaops-packsregistry-web restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" PACKSREGISTRY__STORAGE__DRIVER: "postgres" @@ -1840,12 +1936,13 @@ services: labels: *release-labels packsregistry-worker: + <<: *resources-light image: stellaops/packsregistry-worker:dev container_name: stellaops-packsregistry-worker restart: unless-stopped depends_on: *depends-infra environment: - <<: *kestrel-cert + <<: [*kestrel-cert, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" volumes: @@ -1860,13 +1957,14 @@ services: # --- Slot 35: Registry Token ----------------------------------------------- registry-token: + <<: *resources-light image: stellaops/registry-token:dev container_name: stellaops-registry-token restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection RegistryTokenService__Signing__Issuer: "http://registry-token.stella-ops.local" RegistryTokenService__Signing__KeyPath: "/app/etc/certs/kestrel-dev.pfx" @@ -1898,13 +1996,14 @@ services: # --- Slot 36: BinaryIndex -------------------------------------------------- binaryindex-web: + <<: *resources-light image: stellaops/binaryindex-web:dev container_name: stellaops-binaryindex-web restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Router__Enabled: "${BINARYINDEX_ROUTER_ENABLED:-true}" @@ -1925,6 +2024,7 @@ services: # --- Slot 37: Issuer Directory --------------------------------------------- issuer-directory: + <<: *resources-light image: stellaops/issuer-directory-web:dev container_name: stellaops-issuer-directory restart: unless-stopped @@ -1933,7 +2033,7 @@ services: - authority environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ISSUERDIRECTORY__AUTHORITY__ENABLED: "true" ISSUERDIRECTORY__AUTHORITY__ISSUER: "${AUTHORITY_ISSUER:-http://authority.stella-ops.local}" ISSUERDIRECTORY__AUTHORITY__AUDIENCES__0: "api://issuer-directory" @@ -1960,13 +2060,14 @@ services: # --- Slot 38: Symbols ------------------------------------------------------ symbols: + <<: *resources-light image: stellaops/symbols:dev container_name: stellaops-symbols restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Authority__ResourceServer__Authority: "https://authority.stella-ops.local/" @@ -1991,13 +2092,14 @@ services: # --- Slot 39: SbomService -------------------------------------------------- sbomservice: + <<: *resources-light image: stellaops/sbomservice:dev container_name: stellaops-sbomservice restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Router__Enabled: "${SBOMSERVICE_ROUTER_ENABLED:-true}" @@ -2018,13 +2120,14 @@ services: # --- Slot 40: ExportCenter ------------------------------------------------- export: + <<: *resources-light image: stellaops/export:dev container_name: stellaops-export restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Export__AllowInMemoryRepositories: "true" @@ -2055,12 +2158,13 @@ services: labels: *release-labels export-worker: + <<: *resources-light image: stellaops/export-worker:dev container_name: stellaops-export-worker restart: unless-stopped depends_on: *depends-infra environment: - <<: *kestrel-cert + <<: [*kestrel-cert, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Export__AllowInMemoryRepositories: "true" @@ -2082,13 +2186,14 @@ services: # --- Slot 41: Replay ------------------------------------------------------- replay-web: + <<: *resources-light image: stellaops/replay-web:dev container_name: stellaops-replay-web restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" REPLAY__STORAGE__DRIVER: "postgres" @@ -2113,13 +2218,14 @@ services: # --- Slot 42: Integrations ------------------------------------------------ integrations-web: + <<: *resources-light image: stellaops/integrations-web:dev container_name: stellaops-integrations-web restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__IntegrationsDb: *postgres-connection ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" @@ -2151,6 +2257,7 @@ services: # --- Slot 43: Zastava Webhook ---------------------------------------------- zastava-webhook: + <<: *resources-light image: stellaops/zastava-webhook:dev container_name: stellaops-zastava-webhook restart: unless-stopped @@ -2159,7 +2266,7 @@ services: condition: service_healthy environment: ASPNETCORE_URLS: "http://+:8080" - <<: *kestrel-cert + <<: [*kestrel-cert, *gc-light] # Runtime authority (used by token provider for OIDC discovery) zastava__runtime__authority__Issuer: "https://authority.stella-ops.local/" zastava__runtime__authority__allowStaticTokenFallback: "true" @@ -2193,13 +2300,14 @@ services: # --- Slot 44: Signals ------------------------------------------------------ signals: + <<: *resources-light image: stellaops/signals:dev container_name: stellaops-signals restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" Authority__ResourceServer__Authority: "https://authority.stella-ops.local/" @@ -2230,6 +2338,7 @@ services: # --- Slot 45: Advisory AI -------------------------------------------------- advisory-ai-web: + <<: *resources-medium image: stellaops/advisory-ai-web:dev container_name: stellaops-advisory-ai-web restart: unless-stopped @@ -2237,7 +2346,7 @@ services: - scanner-web environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-medium] ADVISORYAI__AdvisoryAI__SbomBaseAddress: "${ADVISORY_AI_SBOM_BASEADDRESS:-http://scanner.stella-ops.local}" ADVISORYAI__AdvisoryAI__Queue__DirectoryPath: "/var/lib/advisory-ai/queue" ADVISORYAI__AdvisoryAI__Storage__PlanCacheDirectory: "/var/lib/advisory-ai/plans" @@ -2275,13 +2384,14 @@ services: labels: *release-labels advisory-ai-worker: + <<: *resources-medium image: stellaops/advisory-ai-worker:dev container_name: stellaops-advisory-ai-worker restart: unless-stopped depends_on: - scanner-web environment: - <<: *kestrel-cert + <<: [*kestrel-cert, *gc-medium] ADVISORYAI__AdvisoryAI__SbomBaseAddress: "${ADVISORY_AI_SBOM_BASEADDRESS:-http://scanner.stella-ops.local}" ADVISORYAI__AdvisoryAI__Queue__DirectoryPath: "/tmp/advisory-ai/queue" ADVISORYAI__AdvisoryAI__Storage__PlanCacheDirectory: "/tmp/advisory-ai/plans" @@ -2308,13 +2418,14 @@ services: # --- Slot 46: Unknowns ---------------------------------------------------- unknowns-web: + <<: *resources-light image: stellaops/unknowns-web:dev container_name: stellaops-unknowns-web restart: unless-stopped depends_on: *depends-infra environment: ASPNETCORE_URLS: "http://+:8080" - <<: [*kestrel-cert, *router-microservice-defaults] + <<: [*kestrel-cert, *router-microservice-defaults, *gc-light] ConnectionStrings__Default: *postgres-connection ConnectionStrings__UnknownsDb: *postgres-connection ConnectionStrings__Redis: "cache.stella-ops.local:6379" diff --git a/docs/implplan/SPRINT_20260310_019_DevOps_container_cpu_optimization.md b/docs/implplan/SPRINT_20260310_019_DevOps_container_cpu_optimization.md new file mode 100644 index 000000000..3dcfc7a0e --- /dev/null +++ b/docs/implplan/SPRINT_20260310_019_DevOps_container_cpu_optimization.md @@ -0,0 +1,141 @@ +# Sprint 019 — Container CPU Optimization + +## Topic & Scope +- Reduce idle CPU pressure from 62 Docker containers by adding resource limits, tuning GC, converting polling to event-driven patterns, and reducing log verbosity. +- Working directory: `devops/compose/`, `src/JobEngine/`, `src/Graph/`, `src/Platform/`. +- Expected evidence: compose validation, `docker stats` showing caps, reduced idle CPU. + +## Dependencies & Concurrency +- No upstream sprint dependencies. +- Workstreams 1/2/4/6 (compose-only) are independent of workstreams 3A/3B/3D (C# changes). +- C# workstreams (3A, 3B, 3D) are independent of each other (different modules). + +## Documentation Prerequisites +- `docs/modules/router/architecture.md` (Valkey messaging patterns). + +## Delivery Tracker + +### WS-1 — Resource Limits in Docker Compose +Status: DONE +Dependency: none +Owners: Developer +Task description: +- Add three resource tier YAML anchors (heavy/medium/light) to compose file. +- Apply `<<: *resources-{tier}` to all 59 .NET services. +- Infrastructure services (postgres, valkey, rustfs, registry, rekor) remain unconstrained. + +Completion criteria: +- [x] Three resource anchors defined +- [x] Tier assignments: Heavy (6), Medium (16), Light (37) +- [x] `docker compose config` validates cleanly +- [x] Infrastructure services have no deploy limits + +### WS-2 — Logging Debug→Information +Status: DONE +Dependency: none +Owners: Developer +Task description: +- Change 4 services from Debug to Information logging, keeping Debug as comments. +- Services: router-gateway, platform, policy-engine, findings-ledger-web. + +Completion criteria: +- [x] Debug log levels commented out with Information active +- [x] 4 services updated + +### WS-3A — FirstSignalSnapshotWriter Valkey Pub/Sub +Status: DONE +Dependency: none +Owners: Developer +Task description: +- Convert 10s polling to Valkey subscription on `notify:firstsignal:dirty`. +- Add 60s fallback timer via `FallbackPollIntervalSeconds` option. +- Fire Valkey notification from JobEngineEventPublisher on job lifecycle events. + +Completion criteria: +- [x] SemaphoreSlim + Valkey subscribe pattern implemented +- [x] Fallback timer extended from 10s to 60s +- [x] Event publisher fires dirty notification on orch.jobs channel events +- [x] Project builds with 0 errors + +### WS-3B — GraphAnalyticsHostedService Single Timer + Idle Skip +Status: DONE +Dependency: none +Owners: Developer +Task description: +- Consolidate dual PeriodicTimer to single timer using Min(ClusterInterval, CentralityInterval). +- Add idle-check: skip pipeline when no pending snapshots exist. +- Add `SkipWhenIdle` option (default: true). + +Completion criteria: +- [x] Single timer replaces dual timers +- [x] Idle check via IGraphSnapshotProvider.GetPendingSnapshotsAsync +- [x] Debug log emitted when skipping +- [x] Project builds with 0 errors + +### WS-3D — EnvironmentSettingsRefreshService Valkey Pub/Sub +Status: DONE +Dependency: none +Owners: Developer +Task description: +- Register IConnectionMultiplexer in Platform DI from ConnectionStrings:Redis. +- Publish `notify:platform:envsettings:dirty` from PostgresEnvironmentSettingsStore on set/delete. +- Convert EnvironmentSettingsRefreshService from Task.Delay(60s) to Valkey subscription with 300s fallback. + +Completion criteria: +- [x] IConnectionMultiplexer registered in Platform Program.cs +- [x] Store publishes dirty notification (fire-and-forget) +- [x] Refresh service uses SemaphoreSlim + Valkey subscribe +- [x] Project builds with 0 errors + +### WS-4 — Health Check Interval 60s (Configurable) +Status: DONE +Dependency: none +Owners: Developer +Task description: +- Change healthcheck anchors from 30s to `${HEALTHCHECK_INTERVAL:-60s}`. +- Propagates to all ~57 services using these anchors. + +Completion criteria: +- [x] Both healthcheck anchors updated +- [x] Environment variable override supported +- [x] Rendered config shows 60s intervals + +### WS-5 — Messaging Transport (No Changes) +Status: DONE +Dependency: none +Owners: Developer +Task description: +- Verified Valkey messaging transport is already subscription-based with SemaphoreSlim + fallback. +- No changes needed. + +Completion criteria: +- [x] Verified ValkeyMessageQueue already uses push-first pattern + +### WS-6 — GC Configuration +Status: DONE +Dependency: none +Owners: Developer +Task description: +- Add three GC tuning YAML anchors (heavy/medium/light) with DOTNET_gcServer, GCConserveMemory, GCDynamicAdaptationMode. +- Merge into all 59 .NET service environments. + +Completion criteria: +- [x] Three GC anchors defined +- [x] Heavy/Medium use Server GC; Light uses Workstation GC +- [x] GCDynamicAdaptationMode=1 (DATAS) on all services +- [x] Not applied to non-.NET infrastructure + +## Execution Log +| Date (UTC) | Update | Owner | +| --- | --- | --- | +| 2026-03-10 | Sprint created. All workstreams completed. All 3 C# projects build clean. Compose validates clean. | Developer | + +## Decisions & Risks +- Resource limits are dev/QA defaults; production deployments should tune per hardware. +- GCDynamicAdaptationMode=1 requires .NET 8+; all services use .NET 8/9. +- Healthcheck interval override via HEALTHCHECK_INTERVAL env var for operator flexibility. +- Valkey pub/sub notifications are fire-and-forget; fallback timers ensure correctness if missed. + +## Next Checkpoints +- Rebuild affected images (platform, jobengine, graph-indexer) after C# changes merge. +- Verify `docker stats` shows resource caps in dev environment. diff --git a/src/Graph/StellaOps.Graph.Indexer/Analytics/GraphAnalyticsHostedService.cs b/src/Graph/StellaOps.Graph.Indexer/Analytics/GraphAnalyticsHostedService.cs index 1b126f868..68ecc0ba8 100644 --- a/src/Graph/StellaOps.Graph.Indexer/Analytics/GraphAnalyticsHostedService.cs +++ b/src/Graph/StellaOps.Graph.Indexer/Analytics/GraphAnalyticsHostedService.cs @@ -8,37 +8,49 @@ namespace StellaOps.Graph.Indexer.Analytics; public sealed class GraphAnalyticsHostedService : BackgroundService { private readonly IGraphAnalyticsPipeline _pipeline; + private readonly IGraphSnapshotProvider _snapshotProvider; private readonly GraphAnalyticsOptions _options; private readonly ILogger _logger; public GraphAnalyticsHostedService( IGraphAnalyticsPipeline pipeline, + IGraphSnapshotProvider snapshotProvider, IOptions options, ILogger logger) { _pipeline = pipeline ?? throw new ArgumentNullException(nameof(pipeline)); + _snapshotProvider = snapshotProvider ?? throw new ArgumentNullException(nameof(snapshotProvider)); _options = options?.Value ?? throw new ArgumentNullException(nameof(options)); _logger = logger ?? throw new ArgumentNullException(nameof(logger)); } protected override async Task ExecuteAsync(CancellationToken stoppingToken) { - using var clusteringTimer = new PeriodicTimer(_options.ClusterInterval); - using var centralityTimer = new PeriodicTimer(_options.CentralityInterval); + var interval = _options.ClusterInterval < _options.CentralityInterval + ? _options.ClusterInterval + : _options.CentralityInterval; + + using var timer = new PeriodicTimer(interval); while (!stoppingToken.IsCancellationRequested) { - var clusteringTask = clusteringTimer.WaitForNextTickAsync(stoppingToken).AsTask(); - var centralityTask = centralityTimer.WaitForNextTickAsync(stoppingToken).AsTask(); - - var completed = await Task.WhenAny(clusteringTask, centralityTask).ConfigureAwait(false); - if (completed.IsCanceled || stoppingToken.IsCancellationRequested) + if (!await timer.WaitForNextTickAsync(stoppingToken).ConfigureAwait(false)) { break; } try { + if (_options.SkipWhenIdle) + { + var pending = await _snapshotProvider.GetPendingSnapshotsAsync(stoppingToken).ConfigureAwait(false); + if (pending.Count == 0) + { + _logger.LogDebug("graph-indexer: skipping analytics pipeline, no pending snapshots"); + continue; + } + } + await _pipeline.RunAsync(new GraphAnalyticsRunContext(ForceBackfill: false), stoppingToken).ConfigureAwait(false); } catch (OperationCanceledException) diff --git a/src/Graph/StellaOps.Graph.Indexer/Analytics/GraphAnalyticsOptions.cs b/src/Graph/StellaOps.Graph.Indexer/Analytics/GraphAnalyticsOptions.cs index acbbacf91..c0847e684 100644 --- a/src/Graph/StellaOps.Graph.Indexer/Analytics/GraphAnalyticsOptions.cs +++ b/src/Graph/StellaOps.Graph.Indexer/Analytics/GraphAnalyticsOptions.cs @@ -28,4 +28,9 @@ public sealed class GraphAnalyticsOptions /// Whether to also write cluster ids onto graph node documents (alongside overlays). /// public bool WriteClusterAssignmentsToNodes { get; set; } = true; + + /// + /// When true, skips the analytics pipeline if no pending snapshots exist. + /// + public bool SkipWhenIdle { get; set; } = true; } diff --git a/src/JobEngine/StellaOps.JobEngine/StellaOps.JobEngine.Infrastructure/Events/JobEngineEventPublisher.cs b/src/JobEngine/StellaOps.JobEngine/StellaOps.JobEngine.Infrastructure/Events/JobEngineEventPublisher.cs index b06d8c8df..00b1c3fea 100644 --- a/src/JobEngine/StellaOps.JobEngine/StellaOps.JobEngine.Infrastructure/Events/JobEngineEventPublisher.cs +++ b/src/JobEngine/StellaOps.JobEngine/StellaOps.JobEngine.Infrastructure/Events/JobEngineEventPublisher.cs @@ -1,6 +1,9 @@ using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using StellaOps.JobEngine.Core.Domain.Events; +using StellaOps.JobEngine.Infrastructure.Services; +using StellaOps.Messaging.Transport.Valkey; +using StackExchange.Redis; namespace StellaOps.JobEngine.Infrastructure.Events; @@ -14,19 +17,22 @@ public sealed class JobEngineEventPublisher : IEventPublisher private readonly IEventSigner? _eventSigner; private readonly EventPublishOptions _options; private readonly ILogger _logger; + private readonly IServiceProvider? _serviceProvider; public JobEngineEventPublisher( IIdempotencyStore idempotencyStore, INotifierBus notifierBus, IOptions options, ILogger logger, - IEventSigner? eventSigner = null) + IEventSigner? eventSigner = null, + IServiceProvider? serviceProvider = null) { _idempotencyStore = idempotencyStore; _notifierBus = notifierBus; _eventSigner = eventSigner; _options = options.Value; _logger = logger; + _serviceProvider = serviceProvider; } public async Task PublishAsync(EventEnvelope envelope, CancellationToken cancellationToken = default) @@ -48,6 +54,14 @@ public sealed class JobEngineEventPublisher : IEventPublisher await PublishWithRetryAsync(channel, message, cancellationToken); + // Fire Valkey notification for job-lifecycle events to wake + // FirstSignalSnapshotWriter immediately instead of waiting for + // its fallback poll interval. + if (channel == "orch.jobs") + { + await TryNotifyFirstSignalDirtyAsync().ConfigureAwait(false); + } + JobEngineMetrics.EventPublished(envelope.TenantId, envelope.EventType.ToEventTypeName()); _logger.LogInformation( @@ -206,6 +220,40 @@ public sealed class JobEngineEventPublisher : IEventPublisher System.Net.Http.HttpRequestException or System.IO.IOException; } + + /// + /// Fire-and-forget notification to the Valkey pub/sub channel that wakes + /// . This must never fail the + /// event publish — all exceptions are swallowed and logged. + /// + private async Task TryNotifyFirstSignalDirtyAsync() + { + try + { + if (_serviceProvider is null) + { + return; + } + + var connectionFactory = _serviceProvider.GetService(typeof(ValkeyConnectionFactory)) as ValkeyConnectionFactory; + if (connectionFactory is null) + { + return; + } + + var subscriber = await connectionFactory.GetSubscriberAsync().ConfigureAwait(false); + await subscriber.PublishAsync( + RedisChannel.Literal(FirstSignalSnapshotWriter.NotificationChannel), + "1", + CommandFlags.FireAndForget).ConfigureAwait(false); + } + catch (Exception ex) + { + _logger.LogDebug( + ex, + "Failed to publish first-signal dirty notification (fire-and-forget); snapshot writer will use fallback timer."); + } + } } /// diff --git a/src/JobEngine/StellaOps.JobEngine/StellaOps.JobEngine.Infrastructure/Options/FirstSignalOptions.cs b/src/JobEngine/StellaOps.JobEngine/StellaOps.JobEngine.Infrastructure/Options/FirstSignalOptions.cs index 0ce9da60f..f5c4df248 100644 --- a/src/JobEngine/StellaOps.JobEngine/StellaOps.JobEngine.Infrastructure/Options/FirstSignalOptions.cs +++ b/src/JobEngine/StellaOps.JobEngine/StellaOps.JobEngine.Infrastructure/Options/FirstSignalOptions.cs @@ -28,6 +28,7 @@ public sealed class FirstSignalSnapshotWriterOptions public bool Enabled { get; set; } public string? TenantId { get; set; } public int PollIntervalSeconds { get; set; } = 10; + public int FallbackPollIntervalSeconds { get; set; } = 60; public int MaxRunsPerTick { get; set; } = 50; public int LookbackMinutes { get; set; } = 60; } diff --git a/src/JobEngine/StellaOps.JobEngine/StellaOps.JobEngine.Infrastructure/Services/FirstSignalSnapshotWriter.cs b/src/JobEngine/StellaOps.JobEngine/StellaOps.JobEngine.Infrastructure/Services/FirstSignalSnapshotWriter.cs index a3c5deb36..ae3183d29 100644 --- a/src/JobEngine/StellaOps.JobEngine/StellaOps.JobEngine.Infrastructure/Services/FirstSignalSnapshotWriter.cs +++ b/src/JobEngine/StellaOps.JobEngine/StellaOps.JobEngine.Infrastructure/Services/FirstSignalSnapshotWriter.cs @@ -7,23 +7,40 @@ using Microsoft.Extensions.Options; using StellaOps.JobEngine.Core.Domain; using StellaOps.JobEngine.Infrastructure.Options; using StellaOps.JobEngine.Infrastructure.Repositories; +using StellaOps.Messaging.Transport.Valkey; +using StackExchange.Redis; namespace StellaOps.JobEngine.Infrastructure.Services; public sealed class FirstSignalSnapshotWriter : BackgroundService { + /// + /// Valkey pub/sub channel used to notify this writer that new job-lifecycle + /// data is available and it should wake up immediately. + /// + internal const string NotificationChannel = "notify:firstsignal:dirty"; + private readonly IServiceScopeFactory _scopeFactory; + private readonly IServiceProvider _serviceProvider; private readonly FirstSignalSnapshotWriterOptions _options; private readonly ILogger _logger; private readonly TimeProvider _timeProvider; + /// + /// Semaphore used for notification-based wakeup. Starts at 0 permits. + /// Released (up to 1) when a Valkey pub/sub notification arrives. + /// + private readonly SemaphoreSlim _notificationSignal = new(0, 1); + public FirstSignalSnapshotWriter( IServiceScopeFactory scopeFactory, + IServiceProvider serviceProvider, IOptions options, ILogger logger, TimeProvider? timeProvider = null) { _scopeFactory = scopeFactory ?? throw new ArgumentNullException(nameof(scopeFactory)); + _serviceProvider = serviceProvider ?? throw new ArgumentNullException(nameof(serviceProvider)); _options = (options ?? throw new ArgumentNullException(nameof(options))).Value.SnapshotWriter; _logger = logger ?? throw new ArgumentNullException(nameof(logger)); _timeProvider = timeProvider ?? TimeProvider.System; @@ -48,13 +65,35 @@ public sealed class FirstSignalSnapshotWriter : BackgroundService var tenantId = _options.TenantId.Trim(); var lookback = TimeSpan.FromMinutes(Math.Max(1, _options.LookbackMinutes)); - var pollInterval = TimeSpan.FromSeconds(Math.Max(1, _options.PollIntervalSeconds)); + var fallbackInterval = TimeSpan.FromSeconds(Math.Max(1, _options.FallbackPollIntervalSeconds)); var maxRuns = Math.Max(1, _options.MaxRunsPerTick); - using var timer = new PeriodicTimer(pollInterval); + // Try to subscribe to Valkey pub/sub for immediate wake-up notifications. + await TrySubscribeToValkeyNotificationsAsync(stoppingToken).ConfigureAwait(false); - while (await timer.WaitForNextTickAsync(stoppingToken).ConfigureAwait(false)) + using var timer = new PeriodicTimer(fallbackInterval); + + while (!stoppingToken.IsCancellationRequested) { + // Wait for either a Valkey notification or the fallback timer to fire. + try + { + await Task.WhenAny( + _notificationSignal.WaitAsync(stoppingToken), + timer.WaitForNextTickAsync(stoppingToken).AsTask() + ).ConfigureAwait(false); + } + catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested) + { + break; + } + + // Drain the semaphore to avoid duplicate wakeups from queued notifications. + while (_notificationSignal.Wait(0)) + { + // Intentionally empty: draining any extra permits. + } + try { await WarmTenantAsync(tenantId, lookback, maxRuns, stoppingToken).ConfigureAwait(false); @@ -70,6 +109,50 @@ public sealed class FirstSignalSnapshotWriter : BackgroundService } } + /// + /// Attempts to subscribe to the Valkey notification channel. If Valkey is + /// unavailable, logs a warning and falls back to timer-only mode. + /// + private async Task TrySubscribeToValkeyNotificationsAsync(CancellationToken cancellationToken) + { + try + { + var connectionFactory = _serviceProvider.GetService(); + if (connectionFactory is null) + { + _logger.LogWarning( + "ValkeyConnectionFactory not available; FirstSignalSnapshotWriter will use timer-only mode " + + "(fallback interval {Interval}s).", + _options.FallbackPollIntervalSeconds); + return; + } + + var subscriber = await connectionFactory.GetSubscriberAsync(cancellationToken).ConfigureAwait(false); + var channel = await subscriber + .SubscribeAsync(RedisChannel.Literal(NotificationChannel)) + .ConfigureAwait(false); + + channel.OnMessage(_ => + { + try { _notificationSignal.Release(); } + catch (SemaphoreFullException) { /* already signaled */ } + }); + + _logger.LogInformation( + "FirstSignalSnapshotWriter subscribed to Valkey channel {Channel} for immediate wake-up notifications.", + NotificationChannel); + } + catch (Exception ex) + { + _logger.LogWarning( + ex, + "Failed to subscribe to Valkey channel {Channel}; FirstSignalSnapshotWriter will use timer-only mode " + + "(fallback interval {Interval}s).", + NotificationChannel, + _options.FallbackPollIntervalSeconds); + } + } + private async Task WarmTenantAsync( string tenantId, TimeSpan lookback, diff --git a/src/JobEngine/StellaOps.JobEngine/StellaOps.JobEngine.Infrastructure/StellaOps.JobEngine.Infrastructure.csproj b/src/JobEngine/StellaOps.JobEngine/StellaOps.JobEngine.Infrastructure/StellaOps.JobEngine.Infrastructure.csproj index 11e63fc9d..c6c26e299 100644 --- a/src/JobEngine/StellaOps.JobEngine/StellaOps.JobEngine.Infrastructure/StellaOps.JobEngine.Infrastructure.csproj +++ b/src/JobEngine/StellaOps.JobEngine/StellaOps.JobEngine.Infrastructure/StellaOps.JobEngine.Infrastructure.csproj @@ -27,6 +27,7 @@ + diff --git a/src/Platform/StellaOps.Platform.WebService/Program.cs b/src/Platform/StellaOps.Platform.WebService/Program.cs index 490abd461..93927503d 100644 --- a/src/Platform/StellaOps.Platform.WebService/Program.cs +++ b/src/Platform/StellaOps.Platform.WebService/Program.cs @@ -1,6 +1,7 @@ using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; +using StackExchange.Redis; using StellaOps.Auth.Abstractions; using StellaOps.Auth.ServerIntegration; using StellaOps.Infrastructure.Postgres.Migrations; @@ -255,6 +256,15 @@ builder.Services.AddSingleton(sp => sp.GetRequiredSe // Environment settings composer (3-layer merge: env vars -> YAML -> DB) builder.Services.AddSingleton(); builder.Services.AddSingleton(); + +// Valkey/Redis connection for pub/sub notifications (environment settings dirty signal) +var redisCs = builder.Configuration["ConnectionStrings:Redis"]; +if (!string.IsNullOrWhiteSpace(redisCs)) +{ + builder.Services.AddSingleton( + sp => ConnectionMultiplexer.Connect(redisCs)); +} + builder.Services.AddHostedService(); builder.Services.AddSingleton(); diff --git a/src/Platform/StellaOps.Platform.WebService/Services/EnvironmentSettingsRefreshService.cs b/src/Platform/StellaOps.Platform.WebService/Services/EnvironmentSettingsRefreshService.cs index 00b6b6142..dd67ee3b4 100644 --- a/src/Platform/StellaOps.Platform.WebService/Services/EnvironmentSettingsRefreshService.cs +++ b/src/Platform/StellaOps.Platform.WebService/Services/EnvironmentSettingsRefreshService.cs @@ -4,52 +4,130 @@ using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; +using StackExchange.Redis; using StellaOps.Platform.WebService.Options; namespace StellaOps.Platform.WebService.Services; /// -/// Background service that periodically invalidates the -/// cache so DB-layer changes are picked up without restart. +/// Background service that invalidates the +/// cache when notified via Valkey pub/sub or on a fallback periodic timer (default 300s). /// public sealed class EnvironmentSettingsRefreshService : BackgroundService { private readonly IEnvironmentSettingsStore _store; private readonly IOptionsMonitor _optionsMonitor; private readonly ILogger _logger; + private readonly IConnectionMultiplexer? _connectionMultiplexer; + private readonly SemaphoreSlim _notificationSignal = new(0, 1); + + private const int DefaultFallbackSeconds = 300; + + private static readonly RedisChannel DirtyChannel = + RedisChannel.Literal("notify:platform:envsettings:dirty"); + + private ISubscriber? _subscriber; public EnvironmentSettingsRefreshService( IEnvironmentSettingsStore store, IOptionsMonitor optionsMonitor, - ILogger logger) + ILogger logger, + IConnectionMultiplexer? connectionMultiplexer = null) { _store = store; _optionsMonitor = optionsMonitor; _logger = logger; + _connectionMultiplexer = connectionMultiplexer; } protected override async Task ExecuteAsync(CancellationToken stoppingToken) { _logger.LogInformation("EnvironmentSettingsRefreshService started"); + // Subscribe to Valkey dirty notifications (best-effort) + try + { + if (_connectionMultiplexer is not null) + { + _subscriber = _connectionMultiplexer.GetSubscriber(); + await _subscriber.SubscribeAsync(DirtyChannel, (_, _) => + { + // Release the semaphore to wake the loop immediately. + // CurrentCount check avoids SemaphoreFullException when multiple + // notifications arrive before the loop drains. + if (_notificationSignal.CurrentCount == 0) + { + try { _notificationSignal.Release(); } + catch (SemaphoreFullException) { /* already signalled */ } + } + }).ConfigureAwait(false); + + _logger.LogInformation( + "EnvironmentSettingsRefreshService subscribed to Valkey channel {Channel}", + DirtyChannel); + } + else + { + _logger.LogInformation( + "EnvironmentSettingsRefreshService running without Valkey subscription (fallback timer only)"); + } + } + catch (Exception ex) + { + _logger.LogWarning(ex, + "EnvironmentSettingsRefreshService failed to subscribe to Valkey; falling back to timer-only mode"); + } + + // Determine fallback interval + var seconds = _optionsMonitor.CurrentValue.Cache.EnvironmentSettingsRefreshSeconds; + if (seconds <= 0) seconds = DefaultFallbackSeconds; + + using var timer = new PeriodicTimer(TimeSpan.FromSeconds(seconds)); + while (!stoppingToken.IsCancellationRequested) { - var seconds = _optionsMonitor.CurrentValue.Cache.EnvironmentSettingsRefreshSeconds; - if (seconds <= 0) seconds = 60; + var semaphoreTask = _notificationSignal.WaitAsync(stoppingToken); + var timerTask = timer.WaitForNextTickAsync(stoppingToken).AsTask(); try { - await Task.Delay(TimeSpan.FromSeconds(seconds), stoppingToken).ConfigureAwait(false); + await Task.WhenAny(semaphoreTask, timerTask).ConfigureAwait(false); } catch (OperationCanceledException) { break; } + if (stoppingToken.IsCancellationRequested) break; + _store.InvalidateCache(); _logger.LogDebug("Environment settings cache invalidated"); } _logger.LogInformation("EnvironmentSettingsRefreshService stopped"); } + + public override async Task StopAsync(CancellationToken cancellationToken) + { + // Unsubscribe from Valkey channel before stopping + if (_subscriber is not null) + { + try + { + await _subscriber.UnsubscribeAsync(DirtyChannel).ConfigureAwait(false); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Error unsubscribing from Valkey channel during shutdown"); + } + } + + await base.StopAsync(cancellationToken).ConfigureAwait(false); + } + + public override void Dispose() + { + _notificationSignal.Dispose(); + base.Dispose(); + } } diff --git a/src/Platform/StellaOps.Platform.WebService/Services/PostgresEnvironmentSettingsStore.cs b/src/Platform/StellaOps.Platform.WebService/Services/PostgresEnvironmentSettingsStore.cs index f253b74a5..c6d46321b 100644 --- a/src/Platform/StellaOps.Platform.WebService/Services/PostgresEnvironmentSettingsStore.cs +++ b/src/Platform/StellaOps.Platform.WebService/Services/PostgresEnvironmentSettingsStore.cs @@ -4,6 +4,7 @@ using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Logging; using Npgsql; +using StackExchange.Redis; using StellaOps.Platform.Database.EfCore.Context; using StellaOps.Platform.Database.Postgres; @@ -19,10 +20,13 @@ public sealed class PostgresEnvironmentSettingsStore : IEnvironmentSettingsStore { private readonly NpgsqlDataSource _dataSource; private readonly ILogger _logger; + private readonly ISubscriber? _subscriber; private volatile IReadOnlyDictionary? _cache; private readonly object _cacheLock = new(); private const int DefaultCommandTimeoutSeconds = 30; + private static readonly RedisChannel DirtyChannel = + RedisChannel.Literal("notify:platform:envsettings:dirty"); private const string UpsertSql = """ INSERT INTO platform.environment_settings (key, value, updated_at, updated_by) @@ -32,10 +36,12 @@ public sealed class PostgresEnvironmentSettingsStore : IEnvironmentSettingsStore public PostgresEnvironmentSettingsStore( NpgsqlDataSource dataSource, - ILogger? logger = null) + ILogger? logger = null, + IConnectionMultiplexer? connectionMultiplexer = null) { _dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource)); _logger = logger ?? Microsoft.Extensions.Logging.Abstractions.NullLogger.Instance; + _subscriber = connectionMultiplexer?.GetSubscriber(); } public async Task> GetAllAsync(CancellationToken ct = default) @@ -107,6 +113,7 @@ public sealed class PostgresEnvironmentSettingsStore : IEnvironmentSettingsStore ct).ConfigureAwait(false); InvalidateCache(); + PublishDirtyNotification(); _logger.LogInformation("Environment setting {Key} updated by {UpdatedBy}", key, updatedBy); } @@ -129,6 +136,7 @@ public sealed class PostgresEnvironmentSettingsStore : IEnvironmentSettingsStore dbContext.EnvironmentSettings.Remove(entity); var rows = await dbContext.SaveChangesAsync(ct).ConfigureAwait(false); InvalidateCache(); + PublishDirtyNotification(); _logger.LogInformation("Environment setting {Key} deleted ({Rows} rows affected)", key, rows); } @@ -145,4 +153,17 @@ public sealed class PostgresEnvironmentSettingsStore : IEnvironmentSettingsStore _cache = null; } } + + private void PublishDirtyNotification() + { + try + { + _subscriber?.PublishAsync(DirtyChannel, "1", CommandFlags.FireAndForget); + } + catch + { + // Fire-and-forget: Valkey notification is best-effort. + // The background refresh service will still pick up changes on the fallback timer. + } + } } diff --git a/src/Platform/StellaOps.Platform.WebService/StellaOps.Platform.WebService.csproj b/src/Platform/StellaOps.Platform.WebService/StellaOps.Platform.WebService.csproj index f4cbcb435..a0e02baba 100644 --- a/src/Platform/StellaOps.Platform.WebService/StellaOps.Platform.WebService.csproj +++ b/src/Platform/StellaOps.Platform.WebService/StellaOps.Platform.WebService.csproj @@ -11,6 +11,7 @@ +