Reduce idle CPU across 62 containers (phase 1)

- Add resource limits (heavy/medium/light tiers) to all 59 .NET services
- Add .NET GC tuning (server/workstation GC, DATAS, conserve memory)
- Convert FirstSignalSnapshotWriter from 10s polling to Valkey pub/sub
- Convert EnvironmentSettingsRefreshService from 60s polling to Valkey pub/sub
- Consolidate GraphAnalytics dual timers to single timer with idle-skip
- Increase healthcheck interval from 30s to 60s (configurable)
- Reduce debug logging to Information on 4 high-traffic services

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
master
2026-03-10 02:16:19 +02:00
parent c0c0267ac9
commit 166745f9f9
12 changed files with 601 additions and 89 deletions

View File

@@ -8,37 +8,49 @@ namespace StellaOps.Graph.Indexer.Analytics;
public sealed class GraphAnalyticsHostedService : BackgroundService
{
private readonly IGraphAnalyticsPipeline _pipeline;
private readonly IGraphSnapshotProvider _snapshotProvider;
private readonly GraphAnalyticsOptions _options;
private readonly ILogger<GraphAnalyticsHostedService> _logger;
public GraphAnalyticsHostedService(
IGraphAnalyticsPipeline pipeline,
IGraphSnapshotProvider snapshotProvider,
IOptions<GraphAnalyticsOptions> options,
ILogger<GraphAnalyticsHostedService> logger)
{
_pipeline = pipeline ?? throw new ArgumentNullException(nameof(pipeline));
_snapshotProvider = snapshotProvider ?? throw new ArgumentNullException(nameof(snapshotProvider));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
using var clusteringTimer = new PeriodicTimer(_options.ClusterInterval);
using var centralityTimer = new PeriodicTimer(_options.CentralityInterval);
var interval = _options.ClusterInterval < _options.CentralityInterval
? _options.ClusterInterval
: _options.CentralityInterval;
using var timer = new PeriodicTimer(interval);
while (!stoppingToken.IsCancellationRequested)
{
var clusteringTask = clusteringTimer.WaitForNextTickAsync(stoppingToken).AsTask();
var centralityTask = centralityTimer.WaitForNextTickAsync(stoppingToken).AsTask();
var completed = await Task.WhenAny(clusteringTask, centralityTask).ConfigureAwait(false);
if (completed.IsCanceled || stoppingToken.IsCancellationRequested)
if (!await timer.WaitForNextTickAsync(stoppingToken).ConfigureAwait(false))
{
break;
}
try
{
if (_options.SkipWhenIdle)
{
var pending = await _snapshotProvider.GetPendingSnapshotsAsync(stoppingToken).ConfigureAwait(false);
if (pending.Count == 0)
{
_logger.LogDebug("graph-indexer: skipping analytics pipeline, no pending snapshots");
continue;
}
}
await _pipeline.RunAsync(new GraphAnalyticsRunContext(ForceBackfill: false), stoppingToken).ConfigureAwait(false);
}
catch (OperationCanceledException)

View File

@@ -28,4 +28,9 @@ public sealed class GraphAnalyticsOptions
/// Whether to also write cluster ids onto graph node documents (alongside overlays).
/// </summary>
public bool WriteClusterAssignmentsToNodes { get; set; } = true;
/// <summary>
/// When true, skips the analytics pipeline if no pending snapshots exist.
/// </summary>
public bool SkipWhenIdle { get; set; } = true;
}