using Microsoft.Extensions.Logging; using StellaOps.Plugin.Abstractions.Health; using StellaOps.Plugin.Abstractions.Manifest; using StellaOps.Plugin.Sandbox.Communication; using StellaOps.Plugin.Sandbox.Filesystem; using StellaOps.Plugin.Sandbox.Network; using StellaOps.Plugin.Sandbox.Process; using StellaOps.Plugin.Sandbox.Resources; using System.Runtime.CompilerServices; using SystemProcess = System.Diagnostics.Process; namespace StellaOps.Plugin.Sandbox; /// /// Process-based sandbox implementation for untrusted plugins. /// public sealed class ProcessSandbox : ISandbox { private readonly SandboxConfiguration _config; private readonly IPluginProcessManager _processManager; private readonly IGrpcPluginBridge _bridge; private readonly IResourceLimiter _resourceLimiter; private readonly INetworkPolicyEnforcer _networkEnforcer; private readonly ILogger _logger; private readonly TimeProvider _timeProvider; private SystemProcess? _process; private SandboxState _state = SandboxState.Created; private ResourceUsage _currentUsage = ResourceUsage.Empty; private CancellationTokenSource? _monitoringCts; private Task? _monitoringTask; private string? _workingDirectory; /// public string Id { get; } /// public SandboxState State => _state; /// public ResourceUsage CurrentUsage => _currentUsage; /// public event EventHandler? StateChanged; /// public event EventHandler? ResourceWarning; /// /// Creates a new process sandbox. /// public ProcessSandbox( string id, SandboxConfiguration config, IPluginProcessManager processManager, IGrpcPluginBridge bridge, IResourceLimiter resourceLimiter, INetworkPolicyEnforcer networkEnforcer, ILogger logger, TimeProvider timeProvider) { Id = id; _config = config; _processManager = processManager; _bridge = bridge; _resourceLimiter = resourceLimiter; _networkEnforcer = networkEnforcer; _logger = logger; _timeProvider = timeProvider; } /// public async Task StartAsync(PluginManifest manifest, CancellationToken ct) { TransitionState(SandboxState.Starting); try { // 1. Create isolated working directory _workingDirectory = PrepareWorkingDirectory(manifest); // 2. Configure resource limits var resourceConfig = _resourceLimiter.CreateConfiguration(_config.ResourceLimits); // 3. Configure network policy await _networkEnforcer.ApplyPolicyAsync(Id, _config.NetworkPolicy, ct); // 4. Start the plugin host process var socketPath = GetSocketPath(); _process = await _processManager.StartAsync(new ProcessStartRequest { PluginAssemblyPath = manifest.AssemblyPath!, EntryPoint = manifest.EntryPoint, WorkingDirectory = _workingDirectory, SocketPath = socketPath, ResourceConfiguration = resourceConfig, EnvironmentVariables = _config.EnvironmentVariables }, ct); // 5. Apply resource limits to the process await _resourceLimiter.ApplyLimitsAsync(_process, resourceConfig, ct); // 6. Wait for the process to be ready and connect await WaitForReadyAsync(socketPath, ct); // 7. Initialize the plugin await _bridge.InitializePluginAsync(manifest, ct); // 8. Start resource monitoring StartResourceMonitoring(); TransitionState(SandboxState.Running); _logger.LogInformation("Sandbox {Id} started for plugin {PluginId}", Id, manifest.Info.Id); } catch (Exception ex) { _logger.LogError(ex, "Failed to start sandbox {Id}", Id); TransitionState(SandboxState.Failed, ex.Message); await CleanupAsync(); throw; } } /// public async Task StopAsync(TimeSpan timeout, CancellationToken ct) { if (_state is SandboxState.Stopped or SandboxState.Failed or SandboxState.Killed) return; TransitionState(SandboxState.Stopping); try { // Stop monitoring _monitoringCts?.Cancel(); if (_monitoringTask != null) { try { await _monitoringTask; } catch { /* Ignore */ } } // 1. Signal graceful shutdown via gRPC using var timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(ct); timeoutCts.CancelAfter(timeout); try { if (_bridge.IsConnected) { await _bridge.ShutdownPluginAsync(timeoutCts.Token); } } catch (OperationCanceledException) { _logger.LogWarning("Sandbox {Id} did not shutdown gracefully, killing", Id); } // 2. Disconnect bridge await _bridge.DisconnectAsync(ct); // 3. Stop the process if (_process != null) { await _processManager.StopAsync(_process, timeout, ct); } // 4. Cleanup resources await CleanupAsync(); TransitionState(SandboxState.Stopped); _logger.LogInformation("Sandbox {Id} stopped", Id); } catch (Exception ex) { _logger.LogError(ex, "Error stopping sandbox {Id}", Id); TransitionState(SandboxState.Failed, ex.Message); await CleanupAsync(); throw; } } /// public async Task ExecuteAsync( string operationName, object? parameters, TimeSpan timeout, CancellationToken ct) { EnsureRunning(); using var timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(ct); timeoutCts.CancelAfter(timeout); try { return await _bridge.InvokeAsync(operationName, parameters, timeoutCts.Token); } catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested && !ct.IsCancellationRequested) { throw new TimeoutException($"Operation '{operationName}' timed out after {timeout}"); } } /// public async IAsyncEnumerable ExecuteStreamingAsync( string operationName, object? parameters, [EnumeratorCancellation] CancellationToken ct) { EnsureRunning(); await foreach (var evt in _bridge.InvokeStreamingAsync(operationName, parameters, ct)) { yield return evt; } } /// public async Task HealthCheckAsync(CancellationToken ct) { if (_state != SandboxState.Running) { return HealthCheckResult.Unhealthy($"Sandbox is in state {_state}"); } try { using var timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(ct); timeoutCts.CancelAfter(_config.Timeouts.HealthCheckTimeout); var result = await _bridge.HealthCheckAsync(timeoutCts.Token); // Add resource usage to details var details = new Dictionary(result.Details ?? new Dictionary()) { ["sandboxId"] = Id, ["memoryUsageMb"] = _currentUsage.MemoryUsageMb, ["cpuUsagePercent"] = _currentUsage.CpuUsagePercent }; return result with { Details = details }; } catch (Exception ex) { return HealthCheckResult.Unhealthy(ex); } } /// public async ValueTask DisposeAsync() { if (_state == SandboxState.Running) { await StopAsync(_config.Timeouts.ShutdownTimeout, CancellationToken.None); } _bridge.Dispose(); _monitoringCts?.Dispose(); } private void EnsureRunning() { if (_state != SandboxState.Running) { throw new InvalidOperationException($"Sandbox is not running (state: {_state})"); } } private void TransitionState(SandboxState newState, string? reason = null) { var oldState = _state; _state = newState; _logger.LogDebug("Sandbox {Id} state changed: {OldState} -> {NewState} ({Reason})", Id, oldState, newState, reason ?? "N/A"); StateChanged?.Invoke(this, new SandboxStateChangedEventArgs { OldState = oldState, NewState = newState, Reason = reason }); } private string PrepareWorkingDirectory(PluginManifest manifest) { var workDir = _config.WorkingDirectory ?? Path.Combine(Path.GetTempPath(), "stellaops-sandbox", Id); if (Directory.Exists(workDir)) Directory.Delete(workDir, recursive: true); Directory.CreateDirectory(workDir); // Copy plugin files to sandbox directory if (!string.IsNullOrEmpty(manifest.AssemblyPath)) { var pluginDir = Path.GetDirectoryName(manifest.AssemblyPath); if (!string.IsNullOrEmpty(pluginDir) && Directory.Exists(pluginDir)) { CopyDirectory(pluginDir, workDir); } } return workDir; } private async Task CleanupAsync() { // Cleanup network policy try { await _networkEnforcer.RemovePolicyAsync(Id, CancellationToken.None); } catch (Exception ex) { _logger.LogWarning(ex, "Failed to cleanup network policy for sandbox {Id}", Id); } // Cleanup resource limits if (_process != null) { try { await _resourceLimiter.RemoveLimitsAsync(_process, CancellationToken.None); } catch (Exception ex) { _logger.LogWarning(ex, "Failed to cleanup resource limits for sandbox {Id}", Id); } } // Cleanup working directory CleanupWorkingDirectory(); } private void CleanupWorkingDirectory() { var workDir = _workingDirectory ?? Path.Combine(Path.GetTempPath(), "stellaops-sandbox", Id); if (Directory.Exists(workDir)) { try { Directory.Delete(workDir, recursive: true); } catch (Exception ex) { _logger.LogWarning(ex, "Failed to cleanup sandbox directory {WorkDir}", workDir); } } } private string GetSocketPath() { if (OperatingSystem.IsWindows()) { return $"\\\\.\\pipe\\stellaops-sandbox-{Id}"; } else { return Path.Combine(Path.GetTempPath(), $"stellaops-sandbox-{Id}.sock"); } } private async Task WaitForReadyAsync(string socketPath, CancellationToken ct) { using var timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(ct); timeoutCts.CancelAfter(_config.Timeouts.StartupTimeout); while (!timeoutCts.IsCancellationRequested) { if (_process?.HasExited == true) { throw new InvalidOperationException( $"Plugin process exited with code {_process.ExitCode}"); } // Try to connect try { await _bridge.ConnectAsync(socketPath, timeoutCts.Token); return; } catch (Exception ex) when (ex is not OperationCanceledException) { // Not ready yet, wait and retry await Task.Delay(100, timeoutCts.Token); } } throw new TimeoutException("Plugin process did not become ready in time"); } private void StartResourceMonitoring() { _monitoringCts = new CancellationTokenSource(); _monitoringTask = Task.Run(async () => { while (!_monitoringCts.Token.IsCancellationRequested && _state == SandboxState.Running) { try { if (_process != null && !_process.HasExited) { _currentUsage = await _resourceLimiter.GetUsageAsync(_process, _monitoringCts.Token); // Check thresholds CheckResourceThreshold(ResourceType.Memory, _currentUsage.MemoryUsageMb, _config.ResourceLimits.MaxMemoryMb); CheckResourceThreshold(ResourceType.Cpu, _currentUsage.CpuUsagePercent, _config.ResourceLimits.MaxCpuPercent); // Check if limits exceeded var limitCheck = await _resourceLimiter.CheckLimitsAsync( _process, _config.ResourceLimits, _monitoringCts.Token); if (limitCheck.IsExceeded) { _logger.LogWarning("Sandbox {Id} exceeded resource limit: {Message}", Id, limitCheck.Message); } } await Task.Delay(1000, _monitoringCts.Token); } catch (OperationCanceledException) { break; } catch (Exception ex) { _logger.LogError(ex, "Error monitoring resources for sandbox {Id}", Id); } } }); } private void CheckResourceThreshold(ResourceType resource, double current, double max) { if (max <= 0) return; var percent = (current / max) * 100; if (percent >= 80) { ResourceWarning?.Invoke(this, new ResourceWarningEventArgs { Resource = resource, CurrentUsagePercent = percent, ThresholdPercent = 80 }); } } private static void CopyDirectory(string source, string destination) { foreach (var dir in Directory.GetDirectories(source, "*", SearchOption.AllDirectories)) { Directory.CreateDirectory(dir.Replace(source, destination)); } foreach (var file in Directory.GetFiles(source, "*", SearchOption.AllDirectories)) { File.Copy(file, file.Replace(source, destination), overwrite: true); } } }