Reduce idle CPU across 62 containers (phase 1)
- Add resource limits (heavy/medium/light tiers) to all 59 .NET services - Add .NET GC tuning (server/workstation GC, DATAS, conserve memory) - Convert FirstSignalSnapshotWriter from 10s polling to Valkey pub/sub - Convert EnvironmentSettingsRefreshService from 60s polling to Valkey pub/sub - Consolidate GraphAnalytics dual timers to single timer with idle-skip - Increase healthcheck interval from 30s to 60s (configurable) - Reduce debug logging to Information on 4 high-traffic services Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StackExchange.Redis;
|
||||
using StellaOps.Auth.Abstractions;
|
||||
using StellaOps.Auth.ServerIntegration;
|
||||
using StellaOps.Infrastructure.Postgres.Migrations;
|
||||
@@ -255,6 +256,15 @@ builder.Services.AddSingleton<ITranslationBundleProvider>(sp => sp.GetRequiredSe
|
||||
// Environment settings composer (3-layer merge: env vars -> YAML -> DB)
|
||||
builder.Services.AddSingleton<EnvironmentSettingsComposer>();
|
||||
builder.Services.AddSingleton<SetupStateDetector>();
|
||||
|
||||
// Valkey/Redis connection for pub/sub notifications (environment settings dirty signal)
|
||||
var redisCs = builder.Configuration["ConnectionStrings:Redis"];
|
||||
if (!string.IsNullOrWhiteSpace(redisCs))
|
||||
{
|
||||
builder.Services.AddSingleton<IConnectionMultiplexer>(
|
||||
sp => ConnectionMultiplexer.Connect(redisCs));
|
||||
}
|
||||
|
||||
builder.Services.AddHostedService<EnvironmentSettingsRefreshService>();
|
||||
|
||||
builder.Services.AddSingleton<IScoreEvaluationService, ScoreEvaluationService>();
|
||||
|
||||
@@ -4,52 +4,130 @@
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StackExchange.Redis;
|
||||
using StellaOps.Platform.WebService.Options;
|
||||
|
||||
namespace StellaOps.Platform.WebService.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Background service that periodically invalidates the <see cref="IEnvironmentSettingsStore"/>
|
||||
/// cache so DB-layer changes are picked up without restart.
|
||||
/// Background service that invalidates the <see cref="IEnvironmentSettingsStore"/>
|
||||
/// cache when notified via Valkey pub/sub or on a fallback periodic timer (default 300s).
|
||||
/// </summary>
|
||||
public sealed class EnvironmentSettingsRefreshService : BackgroundService
|
||||
{
|
||||
private readonly IEnvironmentSettingsStore _store;
|
||||
private readonly IOptionsMonitor<PlatformServiceOptions> _optionsMonitor;
|
||||
private readonly ILogger<EnvironmentSettingsRefreshService> _logger;
|
||||
private readonly IConnectionMultiplexer? _connectionMultiplexer;
|
||||
private readonly SemaphoreSlim _notificationSignal = new(0, 1);
|
||||
|
||||
private const int DefaultFallbackSeconds = 300;
|
||||
|
||||
private static readonly RedisChannel DirtyChannel =
|
||||
RedisChannel.Literal("notify:platform:envsettings:dirty");
|
||||
|
||||
private ISubscriber? _subscriber;
|
||||
|
||||
public EnvironmentSettingsRefreshService(
|
||||
IEnvironmentSettingsStore store,
|
||||
IOptionsMonitor<PlatformServiceOptions> optionsMonitor,
|
||||
ILogger<EnvironmentSettingsRefreshService> logger)
|
||||
ILogger<EnvironmentSettingsRefreshService> logger,
|
||||
IConnectionMultiplexer? connectionMultiplexer = null)
|
||||
{
|
||||
_store = store;
|
||||
_optionsMonitor = optionsMonitor;
|
||||
_logger = logger;
|
||||
_connectionMultiplexer = connectionMultiplexer;
|
||||
}
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
_logger.LogInformation("EnvironmentSettingsRefreshService started");
|
||||
|
||||
// Subscribe to Valkey dirty notifications (best-effort)
|
||||
try
|
||||
{
|
||||
if (_connectionMultiplexer is not null)
|
||||
{
|
||||
_subscriber = _connectionMultiplexer.GetSubscriber();
|
||||
await _subscriber.SubscribeAsync(DirtyChannel, (_, _) =>
|
||||
{
|
||||
// Release the semaphore to wake the loop immediately.
|
||||
// CurrentCount check avoids SemaphoreFullException when multiple
|
||||
// notifications arrive before the loop drains.
|
||||
if (_notificationSignal.CurrentCount == 0)
|
||||
{
|
||||
try { _notificationSignal.Release(); }
|
||||
catch (SemaphoreFullException) { /* already signalled */ }
|
||||
}
|
||||
}).ConfigureAwait(false);
|
||||
|
||||
_logger.LogInformation(
|
||||
"EnvironmentSettingsRefreshService subscribed to Valkey channel {Channel}",
|
||||
DirtyChannel);
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"EnvironmentSettingsRefreshService running without Valkey subscription (fallback timer only)");
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"EnvironmentSettingsRefreshService failed to subscribe to Valkey; falling back to timer-only mode");
|
||||
}
|
||||
|
||||
// Determine fallback interval
|
||||
var seconds = _optionsMonitor.CurrentValue.Cache.EnvironmentSettingsRefreshSeconds;
|
||||
if (seconds <= 0) seconds = DefaultFallbackSeconds;
|
||||
|
||||
using var timer = new PeriodicTimer(TimeSpan.FromSeconds(seconds));
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
var seconds = _optionsMonitor.CurrentValue.Cache.EnvironmentSettingsRefreshSeconds;
|
||||
if (seconds <= 0) seconds = 60;
|
||||
var semaphoreTask = _notificationSignal.WaitAsync(stoppingToken);
|
||||
var timerTask = timer.WaitForNextTickAsync(stoppingToken).AsTask();
|
||||
|
||||
try
|
||||
{
|
||||
await Task.Delay(TimeSpan.FromSeconds(seconds), stoppingToken).ConfigureAwait(false);
|
||||
await Task.WhenAny(semaphoreTask, timerTask).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (stoppingToken.IsCancellationRequested) break;
|
||||
|
||||
_store.InvalidateCache();
|
||||
_logger.LogDebug("Environment settings cache invalidated");
|
||||
}
|
||||
|
||||
_logger.LogInformation("EnvironmentSettingsRefreshService stopped");
|
||||
}
|
||||
|
||||
public override async Task StopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
// Unsubscribe from Valkey channel before stopping
|
||||
if (_subscriber is not null)
|
||||
{
|
||||
try
|
||||
{
|
||||
await _subscriber.UnsubscribeAsync(DirtyChannel).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Error unsubscribing from Valkey channel during shutdown");
|
||||
}
|
||||
}
|
||||
|
||||
await base.StopAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public override void Dispose()
|
||||
{
|
||||
_notificationSignal.Dispose();
|
||||
base.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Npgsql;
|
||||
using StackExchange.Redis;
|
||||
using StellaOps.Platform.Database.EfCore.Context;
|
||||
using StellaOps.Platform.Database.Postgres;
|
||||
|
||||
@@ -19,10 +20,13 @@ public sealed class PostgresEnvironmentSettingsStore : IEnvironmentSettingsStore
|
||||
{
|
||||
private readonly NpgsqlDataSource _dataSource;
|
||||
private readonly ILogger<PostgresEnvironmentSettingsStore> _logger;
|
||||
private readonly ISubscriber? _subscriber;
|
||||
private volatile IReadOnlyDictionary<string, string>? _cache;
|
||||
private readonly object _cacheLock = new();
|
||||
|
||||
private const int DefaultCommandTimeoutSeconds = 30;
|
||||
private static readonly RedisChannel DirtyChannel =
|
||||
RedisChannel.Literal("notify:platform:envsettings:dirty");
|
||||
|
||||
private const string UpsertSql = """
|
||||
INSERT INTO platform.environment_settings (key, value, updated_at, updated_by)
|
||||
@@ -32,10 +36,12 @@ public sealed class PostgresEnvironmentSettingsStore : IEnvironmentSettingsStore
|
||||
|
||||
public PostgresEnvironmentSettingsStore(
|
||||
NpgsqlDataSource dataSource,
|
||||
ILogger<PostgresEnvironmentSettingsStore>? logger = null)
|
||||
ILogger<PostgresEnvironmentSettingsStore>? logger = null,
|
||||
IConnectionMultiplexer? connectionMultiplexer = null)
|
||||
{
|
||||
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
|
||||
_logger = logger ?? Microsoft.Extensions.Logging.Abstractions.NullLogger<PostgresEnvironmentSettingsStore>.Instance;
|
||||
_subscriber = connectionMultiplexer?.GetSubscriber();
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyDictionary<string, string>> GetAllAsync(CancellationToken ct = default)
|
||||
@@ -107,6 +113,7 @@ public sealed class PostgresEnvironmentSettingsStore : IEnvironmentSettingsStore
|
||||
ct).ConfigureAwait(false);
|
||||
|
||||
InvalidateCache();
|
||||
PublishDirtyNotification();
|
||||
|
||||
_logger.LogInformation("Environment setting {Key} updated by {UpdatedBy}", key, updatedBy);
|
||||
}
|
||||
@@ -129,6 +136,7 @@ public sealed class PostgresEnvironmentSettingsStore : IEnvironmentSettingsStore
|
||||
dbContext.EnvironmentSettings.Remove(entity);
|
||||
var rows = await dbContext.SaveChangesAsync(ct).ConfigureAwait(false);
|
||||
InvalidateCache();
|
||||
PublishDirtyNotification();
|
||||
|
||||
_logger.LogInformation("Environment setting {Key} deleted ({Rows} rows affected)", key, rows);
|
||||
}
|
||||
@@ -145,4 +153,17 @@ public sealed class PostgresEnvironmentSettingsStore : IEnvironmentSettingsStore
|
||||
_cache = null;
|
||||
}
|
||||
}
|
||||
|
||||
private void PublishDirtyNotification()
|
||||
{
|
||||
try
|
||||
{
|
||||
_subscriber?.PublishAsync(DirtyChannel, "1", CommandFlags.FireAndForget);
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Fire-and-forget: Valkey notification is best-effort.
|
||||
// The background refresh service will still pick up changes on the fallback timer.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.AspNetCore.OpenApi" />
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore" />
|
||||
<PackageReference Include="StackExchange.Redis" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
Reference in New Issue
Block a user