release orchestrator v1 draft and build fixes
This commit is contained in:
@@ -0,0 +1,60 @@
|
||||
using StellaOps.Plugin.Abstractions.Execution;
|
||||
using StellaOps.Plugin.Abstractions.Health;
|
||||
|
||||
namespace StellaOps.Plugin.Host.Health;
|
||||
|
||||
/// <summary>
|
||||
/// Monitors the health of loaded plugins.
|
||||
/// </summary>
|
||||
public interface IPluginHealthMonitor : IAsyncDisposable
|
||||
{
|
||||
/// <summary>
|
||||
/// Start the health monitoring loop.
|
||||
/// </summary>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
Task StartAsync(CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Stop the health monitoring loop.
|
||||
/// </summary>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
Task StopAsync(CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Register a plugin for health monitoring.
|
||||
/// </summary>
|
||||
/// <param name="plugin">The loaded plugin.</param>
|
||||
void RegisterPlugin(LoadedPlugin plugin);
|
||||
|
||||
/// <summary>
|
||||
/// Unregister a plugin from health monitoring.
|
||||
/// </summary>
|
||||
/// <param name="pluginId">The plugin ID.</param>
|
||||
void UnregisterPlugin(string pluginId);
|
||||
|
||||
/// <summary>
|
||||
/// Perform an immediate health check on a plugin.
|
||||
/// </summary>
|
||||
/// <param name="pluginId">The plugin ID.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>The health check result.</returns>
|
||||
Task<HealthCheckResult> CheckHealthAsync(string pluginId, CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Get the current health status of a plugin.
|
||||
/// </summary>
|
||||
/// <param name="pluginId">The plugin ID.</param>
|
||||
/// <returns>The health status, or null if not registered.</returns>
|
||||
HealthStatus? GetHealthStatus(string pluginId);
|
||||
|
||||
/// <summary>
|
||||
/// Get all plugin health statuses.
|
||||
/// </summary>
|
||||
/// <returns>Dictionary of plugin ID to health status.</returns>
|
||||
IReadOnlyDictionary<string, HealthStatus> GetAllHealthStatuses();
|
||||
|
||||
/// <summary>
|
||||
/// Event raised when a plugin's health status changes.
|
||||
/// </summary>
|
||||
event EventHandler<PluginHealthChangedEventArgs>? HealthChanged;
|
||||
}
|
||||
253
src/Plugin/StellaOps.Plugin.Host/Health/PluginHealthMonitor.cs
Normal file
253
src/Plugin/StellaOps.Plugin.Host/Health/PluginHealthMonitor.cs
Normal file
@@ -0,0 +1,253 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Diagnostics;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Plugin.Abstractions.Execution;
|
||||
using StellaOps.Plugin.Abstractions.Health;
|
||||
|
||||
namespace StellaOps.Plugin.Host.Health;
|
||||
|
||||
/// <summary>
|
||||
/// Monitors plugin health with periodic checks and status change notifications.
|
||||
/// </summary>
|
||||
public sealed class PluginHealthMonitor : IPluginHealthMonitor
|
||||
{
|
||||
private readonly ConcurrentDictionary<string, PluginHealthState> _healthStates = new();
|
||||
private readonly PluginHostOptions _options;
|
||||
private readonly ILogger<PluginHealthMonitor> _logger;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private Task? _monitorTask;
|
||||
private CancellationTokenSource? _cts;
|
||||
|
||||
/// <inheritdoc />
|
||||
public event EventHandler<PluginHealthChangedEventArgs>? HealthChanged;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new plugin health monitor.
|
||||
/// </summary>
|
||||
/// <param name="options">Plugin host options.</param>
|
||||
/// <param name="logger">Logger instance.</param>
|
||||
/// <param name="timeProvider">Time provider.</param>
|
||||
public PluginHealthMonitor(
|
||||
IOptions<PluginHostOptions> options,
|
||||
ILogger<PluginHealthMonitor> logger,
|
||||
TimeProvider timeProvider)
|
||||
{
|
||||
_options = options.Value;
|
||||
_logger = logger;
|
||||
_timeProvider = timeProvider;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task StartAsync(CancellationToken ct)
|
||||
{
|
||||
_cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
_monitorTask = Task.Run(() => MonitorLoopAsync(_cts.Token), _cts.Token);
|
||||
_logger.LogInformation("Plugin health monitor started with interval {Interval}",
|
||||
_options.HealthCheckInterval);
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task StopAsync(CancellationToken ct)
|
||||
{
|
||||
if (_cts != null)
|
||||
{
|
||||
await _cts.CancelAsync();
|
||||
}
|
||||
|
||||
if (_monitorTask != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
await _monitorTask.WaitAsync(ct);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Expected
|
||||
}
|
||||
}
|
||||
|
||||
_logger.LogInformation("Plugin health monitor stopped");
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public void RegisterPlugin(LoadedPlugin plugin)
|
||||
{
|
||||
var state = new PluginHealthState
|
||||
{
|
||||
Plugin = plugin,
|
||||
LastCheck = _timeProvider.GetUtcNow(),
|
||||
Status = HealthStatus.Healthy,
|
||||
ConsecutiveFailures = 0
|
||||
};
|
||||
|
||||
_healthStates[plugin.Info.Id] = state;
|
||||
_logger.LogDebug("Registered plugin {PluginId} for health monitoring", plugin.Info.Id);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public void UnregisterPlugin(string pluginId)
|
||||
{
|
||||
if (_healthStates.TryRemove(pluginId, out _))
|
||||
{
|
||||
_logger.LogDebug("Unregistered plugin {PluginId} from health monitoring", pluginId);
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<HealthCheckResult> CheckHealthAsync(string pluginId, CancellationToken ct)
|
||||
{
|
||||
if (!_healthStates.TryGetValue(pluginId, out var state))
|
||||
{
|
||||
return HealthCheckResult.Unhealthy("Plugin not registered for health monitoring");
|
||||
}
|
||||
|
||||
return await PerformHealthCheckAsync(state, ct);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public HealthStatus? GetHealthStatus(string pluginId)
|
||||
{
|
||||
return _healthStates.TryGetValue(pluginId, out var state) ? state.Status : null;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyDictionary<string, HealthStatus> GetAllHealthStatuses()
|
||||
{
|
||||
return _healthStates.ToDictionary(
|
||||
kvp => kvp.Key,
|
||||
kvp => kvp.Value.Status);
|
||||
}
|
||||
|
||||
private async Task MonitorLoopAsync(CancellationToken ct)
|
||||
{
|
||||
var timer = new PeriodicTimer(_options.HealthCheckInterval);
|
||||
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
await timer.WaitForNextTickAsync(ct);
|
||||
|
||||
// Check all registered plugins
|
||||
foreach (var kvp in _healthStates)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
var state = kvp.Value;
|
||||
var timeSinceLastCheck = _timeProvider.GetUtcNow() - state.LastCheck;
|
||||
|
||||
if (timeSinceLastCheck >= _options.HealthCheckInterval)
|
||||
{
|
||||
try
|
||||
{
|
||||
await PerformHealthCheckAsync(state, ct);
|
||||
}
|
||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||
{
|
||||
_logger.LogError(ex, "Health check failed for plugin {PluginId}", kvp.Key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
break;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Error in health monitor loop");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<HealthCheckResult> PerformHealthCheckAsync(PluginHealthState state, CancellationToken ct)
|
||||
{
|
||||
var plugin = state.Plugin;
|
||||
var stopwatch = Stopwatch.StartNew();
|
||||
|
||||
try
|
||||
{
|
||||
using var timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
timeoutCts.CancelAfter(_options.HealthCheckTimeout);
|
||||
|
||||
var result = await plugin.Instance.HealthCheckAsync(timeoutCts.Token);
|
||||
stopwatch.Stop();
|
||||
|
||||
result = result with { Duration = stopwatch.Elapsed };
|
||||
|
||||
UpdateHealthState(state, result);
|
||||
return result;
|
||||
}
|
||||
catch (OperationCanceledException) when (!ct.IsCancellationRequested)
|
||||
{
|
||||
// Timeout
|
||||
var result = HealthCheckResult.Unhealthy("Health check timed out")
|
||||
.WithDuration(stopwatch.Elapsed);
|
||||
|
||||
state.ConsecutiveFailures++;
|
||||
UpdateHealthState(state, result);
|
||||
return result;
|
||||
}
|
||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||
{
|
||||
var result = HealthCheckResult.Unhealthy(ex)
|
||||
.WithDuration(stopwatch.Elapsed);
|
||||
|
||||
state.ConsecutiveFailures++;
|
||||
UpdateHealthState(state, result);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
private void UpdateHealthState(PluginHealthState state, HealthCheckResult result)
|
||||
{
|
||||
var oldStatus = state.Status;
|
||||
var newStatus = result.Status;
|
||||
|
||||
state.Status = newStatus;
|
||||
state.LastCheck = _timeProvider.GetUtcNow();
|
||||
state.LastResult = result;
|
||||
|
||||
if (newStatus == HealthStatus.Healthy)
|
||||
{
|
||||
state.ConsecutiveFailures = 0;
|
||||
}
|
||||
|
||||
// Raise event if status changed
|
||||
if (oldStatus != newStatus)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Plugin {PluginId} health changed from {OldStatus} to {NewStatus}",
|
||||
state.Plugin.Info.Id, oldStatus, newStatus);
|
||||
|
||||
HealthChanged?.Invoke(this, new PluginHealthChangedEventArgs
|
||||
{
|
||||
PluginId = state.Plugin.Info.Id,
|
||||
OldStatus = oldStatus,
|
||||
NewStatus = newStatus,
|
||||
CheckResult = result
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
await StopAsync(CancellationToken.None);
|
||||
_cts?.Dispose();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Internal class to track plugin health state.
|
||||
/// </summary>
|
||||
private sealed class PluginHealthState
|
||||
{
|
||||
public required LoadedPlugin Plugin { get; init; }
|
||||
public DateTimeOffset LastCheck { get; set; }
|
||||
public HealthStatus Status { get; set; }
|
||||
public int ConsecutiveFailures { get; set; }
|
||||
public HealthCheckResult? LastResult { get; set; }
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user