Reduce idle CPU across 62 containers (phase 1)
- Add resource limits (heavy/medium/light tiers) to all 59 .NET services - Add .NET GC tuning (server/workstation GC, DATAS, conserve memory) - Convert FirstSignalSnapshotWriter from 10s polling to Valkey pub/sub - Convert EnvironmentSettingsRefreshService from 60s polling to Valkey pub/sub - Consolidate GraphAnalytics dual timers to single timer with idle-skip - Increase healthcheck interval from 30s to 60s (configurable) - Reduce debug logging to Information on 4 high-traffic services Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -8,37 +8,49 @@ namespace StellaOps.Graph.Indexer.Analytics;
|
||||
public sealed class GraphAnalyticsHostedService : BackgroundService
|
||||
{
|
||||
private readonly IGraphAnalyticsPipeline _pipeline;
|
||||
private readonly IGraphSnapshotProvider _snapshotProvider;
|
||||
private readonly GraphAnalyticsOptions _options;
|
||||
private readonly ILogger<GraphAnalyticsHostedService> _logger;
|
||||
|
||||
public GraphAnalyticsHostedService(
|
||||
IGraphAnalyticsPipeline pipeline,
|
||||
IGraphSnapshotProvider snapshotProvider,
|
||||
IOptions<GraphAnalyticsOptions> options,
|
||||
ILogger<GraphAnalyticsHostedService> logger)
|
||||
{
|
||||
_pipeline = pipeline ?? throw new ArgumentNullException(nameof(pipeline));
|
||||
_snapshotProvider = snapshotProvider ?? throw new ArgumentNullException(nameof(snapshotProvider));
|
||||
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
using var clusteringTimer = new PeriodicTimer(_options.ClusterInterval);
|
||||
using var centralityTimer = new PeriodicTimer(_options.CentralityInterval);
|
||||
var interval = _options.ClusterInterval < _options.CentralityInterval
|
||||
? _options.ClusterInterval
|
||||
: _options.CentralityInterval;
|
||||
|
||||
using var timer = new PeriodicTimer(interval);
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
var clusteringTask = clusteringTimer.WaitForNextTickAsync(stoppingToken).AsTask();
|
||||
var centralityTask = centralityTimer.WaitForNextTickAsync(stoppingToken).AsTask();
|
||||
|
||||
var completed = await Task.WhenAny(clusteringTask, centralityTask).ConfigureAwait(false);
|
||||
if (completed.IsCanceled || stoppingToken.IsCancellationRequested)
|
||||
if (!await timer.WaitForNextTickAsync(stoppingToken).ConfigureAwait(false))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
if (_options.SkipWhenIdle)
|
||||
{
|
||||
var pending = await _snapshotProvider.GetPendingSnapshotsAsync(stoppingToken).ConfigureAwait(false);
|
||||
if (pending.Count == 0)
|
||||
{
|
||||
_logger.LogDebug("graph-indexer: skipping analytics pipeline, no pending snapshots");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
await _pipeline.RunAsync(new GraphAnalyticsRunContext(ForceBackfill: false), stoppingToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
|
||||
@@ -28,4 +28,9 @@ public sealed class GraphAnalyticsOptions
|
||||
/// Whether to also write cluster ids onto graph node documents (alongside overlays).
|
||||
/// </summary>
|
||||
public bool WriteClusterAssignmentsToNodes { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// When true, skips the analytics pipeline if no pending snapshots exist.
|
||||
/// </summary>
|
||||
public bool SkipWhenIdle { get; set; } = true;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user