Files
git.stella-ops.org/src/Plugin/StellaOps.Plugin.Sandbox/ProcessSandbox.cs
2026-02-01 21:37:40 +02:00

475 lines
15 KiB
C#

using Microsoft.Extensions.Logging;
using StellaOps.Plugin.Abstractions.Health;
using StellaOps.Plugin.Abstractions.Manifest;
using StellaOps.Plugin.Sandbox.Communication;
using StellaOps.Plugin.Sandbox.Filesystem;
using StellaOps.Plugin.Sandbox.Network;
using StellaOps.Plugin.Sandbox.Process;
using StellaOps.Plugin.Sandbox.Resources;
using System.Runtime.CompilerServices;
using SystemProcess = System.Diagnostics.Process;
namespace StellaOps.Plugin.Sandbox;
/// <summary>
/// Process-based sandbox implementation for untrusted plugins.
/// </summary>
public sealed class ProcessSandbox : ISandbox
{
private readonly SandboxConfiguration _config;
private readonly IPluginProcessManager _processManager;
private readonly IGrpcPluginBridge _bridge;
private readonly IResourceLimiter _resourceLimiter;
private readonly INetworkPolicyEnforcer _networkEnforcer;
private readonly ILogger<ProcessSandbox> _logger;
private readonly TimeProvider _timeProvider;
private SystemProcess? _process;
private SandboxState _state = SandboxState.Created;
private ResourceUsage _currentUsage = ResourceUsage.Empty;
private CancellationTokenSource? _monitoringCts;
private Task? _monitoringTask;
private string? _workingDirectory;
/// <inheritdoc />
public string Id { get; }
/// <inheritdoc />
public SandboxState State => _state;
/// <inheritdoc />
public ResourceUsage CurrentUsage => _currentUsage;
/// <inheritdoc />
public event EventHandler<SandboxStateChangedEventArgs>? StateChanged;
/// <inheritdoc />
public event EventHandler<ResourceWarningEventArgs>? ResourceWarning;
/// <summary>
/// Creates a new process sandbox.
/// </summary>
public ProcessSandbox(
string id,
SandboxConfiguration config,
IPluginProcessManager processManager,
IGrpcPluginBridge bridge,
IResourceLimiter resourceLimiter,
INetworkPolicyEnforcer networkEnforcer,
ILogger<ProcessSandbox> logger,
TimeProvider timeProvider)
{
Id = id;
_config = config;
_processManager = processManager;
_bridge = bridge;
_resourceLimiter = resourceLimiter;
_networkEnforcer = networkEnforcer;
_logger = logger;
_timeProvider = timeProvider;
}
/// <inheritdoc />
public async Task StartAsync(PluginManifest manifest, CancellationToken ct)
{
TransitionState(SandboxState.Starting);
try
{
// 1. Create isolated working directory
_workingDirectory = PrepareWorkingDirectory(manifest);
// 2. Configure resource limits
var resourceConfig = _resourceLimiter.CreateConfiguration(_config.ResourceLimits);
// 3. Configure network policy
await _networkEnforcer.ApplyPolicyAsync(Id, _config.NetworkPolicy, ct);
// 4. Start the plugin host process
var socketPath = GetSocketPath();
_process = await _processManager.StartAsync(new ProcessStartRequest
{
PluginAssemblyPath = manifest.AssemblyPath!,
EntryPoint = manifest.EntryPoint,
WorkingDirectory = _workingDirectory,
SocketPath = socketPath,
ResourceConfiguration = resourceConfig,
EnvironmentVariables = _config.EnvironmentVariables
}, ct);
// 5. Apply resource limits to the process
await _resourceLimiter.ApplyLimitsAsync(_process, resourceConfig, ct);
// 6. Wait for the process to be ready and connect
await WaitForReadyAsync(socketPath, ct);
// 7. Initialize the plugin
await _bridge.InitializePluginAsync(manifest, ct);
// 8. Start resource monitoring
StartResourceMonitoring();
TransitionState(SandboxState.Running);
_logger.LogInformation("Sandbox {Id} started for plugin {PluginId}",
Id, manifest.Info.Id);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to start sandbox {Id}", Id);
TransitionState(SandboxState.Failed, ex.Message);
await CleanupAsync();
throw;
}
}
/// <inheritdoc />
public async Task StopAsync(TimeSpan timeout, CancellationToken ct)
{
if (_state is SandboxState.Stopped or SandboxState.Failed or SandboxState.Killed)
return;
TransitionState(SandboxState.Stopping);
try
{
// Stop monitoring
_monitoringCts?.Cancel();
if (_monitoringTask != null)
{
try { await _monitoringTask; } catch { /* Ignore */ }
}
// 1. Signal graceful shutdown via gRPC
using var timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
timeoutCts.CancelAfter(timeout);
try
{
if (_bridge.IsConnected)
{
await _bridge.ShutdownPluginAsync(timeoutCts.Token);
}
}
catch (OperationCanceledException)
{
_logger.LogWarning("Sandbox {Id} did not shutdown gracefully, killing", Id);
}
// 2. Disconnect bridge
await _bridge.DisconnectAsync(ct);
// 3. Stop the process
if (_process != null)
{
await _processManager.StopAsync(_process, timeout, ct);
}
// 4. Cleanup resources
await CleanupAsync();
TransitionState(SandboxState.Stopped);
_logger.LogInformation("Sandbox {Id} stopped", Id);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error stopping sandbox {Id}", Id);
TransitionState(SandboxState.Failed, ex.Message);
await CleanupAsync();
throw;
}
}
/// <inheritdoc />
public async Task<T> ExecuteAsync<T>(
string operationName,
object? parameters,
TimeSpan timeout,
CancellationToken ct)
{
EnsureRunning();
using var timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
timeoutCts.CancelAfter(timeout);
try
{
return await _bridge.InvokeAsync<T>(operationName, parameters, timeoutCts.Token);
}
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested && !ct.IsCancellationRequested)
{
throw new TimeoutException($"Operation '{operationName}' timed out after {timeout}");
}
}
/// <inheritdoc />
public async IAsyncEnumerable<TEvent> ExecuteStreamingAsync<TEvent>(
string operationName,
object? parameters,
[EnumeratorCancellation] CancellationToken ct)
{
EnsureRunning();
await foreach (var evt in _bridge.InvokeStreamingAsync<TEvent>(operationName, parameters, ct))
{
yield return evt;
}
}
/// <inheritdoc />
public async Task<HealthCheckResult> HealthCheckAsync(CancellationToken ct)
{
if (_state != SandboxState.Running)
{
return HealthCheckResult.Unhealthy($"Sandbox is in state {_state}");
}
try
{
using var timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
timeoutCts.CancelAfter(_config.Timeouts.HealthCheckTimeout);
var result = await _bridge.HealthCheckAsync(timeoutCts.Token);
// Add resource usage to details
var details = new Dictionary<string, object>(result.Details ?? new Dictionary<string, object>())
{
["sandboxId"] = Id,
["memoryUsageMb"] = _currentUsage.MemoryUsageMb,
["cpuUsagePercent"] = _currentUsage.CpuUsagePercent
};
return result with { Details = details };
}
catch (Exception ex)
{
return HealthCheckResult.Unhealthy(ex);
}
}
/// <inheritdoc />
public async ValueTask DisposeAsync()
{
if (_state == SandboxState.Running)
{
await StopAsync(_config.Timeouts.ShutdownTimeout, CancellationToken.None);
}
_bridge.Dispose();
_monitoringCts?.Dispose();
}
private void EnsureRunning()
{
if (_state != SandboxState.Running)
{
throw new InvalidOperationException($"Sandbox is not running (state: {_state})");
}
}
private void TransitionState(SandboxState newState, string? reason = null)
{
var oldState = _state;
_state = newState;
_logger.LogDebug("Sandbox {Id} state changed: {OldState} -> {NewState} ({Reason})",
Id, oldState, newState, reason ?? "N/A");
StateChanged?.Invoke(this, new SandboxStateChangedEventArgs
{
OldState = oldState,
NewState = newState,
Reason = reason
});
}
private string PrepareWorkingDirectory(PluginManifest manifest)
{
var workDir = _config.WorkingDirectory
?? Path.Combine(Path.GetTempPath(), "stellaops-sandbox", Id);
if (Directory.Exists(workDir))
Directory.Delete(workDir, recursive: true);
Directory.CreateDirectory(workDir);
// Copy plugin files to sandbox directory
if (!string.IsNullOrEmpty(manifest.AssemblyPath))
{
var pluginDir = Path.GetDirectoryName(manifest.AssemblyPath);
if (!string.IsNullOrEmpty(pluginDir) && Directory.Exists(pluginDir))
{
CopyDirectory(pluginDir, workDir);
}
}
return workDir;
}
private async Task CleanupAsync()
{
// Cleanup network policy
try
{
await _networkEnforcer.RemovePolicyAsync(Id, CancellationToken.None);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to cleanup network policy for sandbox {Id}", Id);
}
// Cleanup resource limits
if (_process != null)
{
try
{
await _resourceLimiter.RemoveLimitsAsync(_process, CancellationToken.None);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to cleanup resource limits for sandbox {Id}", Id);
}
}
// Cleanup working directory
CleanupWorkingDirectory();
}
private void CleanupWorkingDirectory()
{
var workDir = _workingDirectory
?? Path.Combine(Path.GetTempPath(), "stellaops-sandbox", Id);
if (Directory.Exists(workDir))
{
try
{
Directory.Delete(workDir, recursive: true);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to cleanup sandbox directory {WorkDir}", workDir);
}
}
}
private string GetSocketPath()
{
if (OperatingSystem.IsWindows())
{
return $"\\\\.\\pipe\\stellaops-sandbox-{Id}";
}
else
{
return Path.Combine(Path.GetTempPath(), $"stellaops-sandbox-{Id}.sock");
}
}
private async Task WaitForReadyAsync(string socketPath, CancellationToken ct)
{
using var timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
timeoutCts.CancelAfter(_config.Timeouts.StartupTimeout);
while (!timeoutCts.IsCancellationRequested)
{
if (_process?.HasExited == true)
{
throw new InvalidOperationException(
$"Plugin process exited with code {_process.ExitCode}");
}
// Try to connect
try
{
await _bridge.ConnectAsync(socketPath, timeoutCts.Token);
return;
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
// Not ready yet, wait and retry
await Task.Delay(100, timeoutCts.Token);
}
}
throw new TimeoutException("Plugin process did not become ready in time");
}
private void StartResourceMonitoring()
{
_monitoringCts = new CancellationTokenSource();
_monitoringTask = Task.Run(async () =>
{
while (!_monitoringCts.Token.IsCancellationRequested && _state == SandboxState.Running)
{
try
{
if (_process != null && !_process.HasExited)
{
_currentUsage = await _resourceLimiter.GetUsageAsync(_process, _monitoringCts.Token);
// Check thresholds
CheckResourceThreshold(ResourceType.Memory,
_currentUsage.MemoryUsageMb,
_config.ResourceLimits.MaxMemoryMb);
CheckResourceThreshold(ResourceType.Cpu,
_currentUsage.CpuUsagePercent,
_config.ResourceLimits.MaxCpuPercent);
// Check if limits exceeded
var limitCheck = await _resourceLimiter.CheckLimitsAsync(
_process, _config.ResourceLimits, _monitoringCts.Token);
if (limitCheck.IsExceeded)
{
_logger.LogWarning("Sandbox {Id} exceeded resource limit: {Message}",
Id, limitCheck.Message);
}
}
await Task.Delay(1000, _monitoringCts.Token);
}
catch (OperationCanceledException)
{
break;
}
catch (Exception ex)
{
_logger.LogError(ex, "Error monitoring resources for sandbox {Id}", Id);
}
}
});
}
private void CheckResourceThreshold(ResourceType resource, double current, double max)
{
if (max <= 0) return;
var percent = (current / max) * 100;
if (percent >= 80)
{
ResourceWarning?.Invoke(this, new ResourceWarningEventArgs
{
Resource = resource,
CurrentUsagePercent = percent,
ThresholdPercent = 80
});
}
}
private static void CopyDirectory(string source, string destination)
{
foreach (var dir in Directory.GetDirectories(source, "*", SearchOption.AllDirectories))
{
Directory.CreateDirectory(dir.Replace(source, destination));
}
foreach (var file in Directory.GetFiles(source, "*", SearchOption.AllDirectories))
{
File.Copy(file, file.Replace(source, destination), overwrite: true);
}
}
}