release orchestrator v1 draft and build fixes

This commit is contained in:
master
2026-01-12 12:24:17 +02:00
parent f3de858c59
commit 9873f80830
1598 changed files with 240385 additions and 5944 deletions

View File

@@ -0,0 +1,104 @@
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Compose.Tasks;
using StellaOps.Agent.Core.Capability;
using StellaOps.Agent.Core.Exceptions;
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.Compose;
/// <summary>
/// Compose capability for managing docker-compose stacks.
/// </summary>
public sealed class ComposeCapability : IAgentCapability
{
private readonly ComposeExecutor _executor;
private readonly ComposeFileManager _fileManager;
private readonly TimeProvider _timeProvider;
private readonly ILogger<ComposeCapability> _logger;
private readonly Dictionary<string, IComposeTask> _taskHandlers;
/// <inheritdoc />
public string Name => "compose";
/// <inheritdoc />
public string Version => "1.0.0";
/// <inheritdoc />
public IReadOnlyList<string> SupportedTaskTypes => new[]
{
"compose.pull",
"compose.up",
"compose.down",
"compose.scale",
"compose.health-check",
"compose.ps"
};
/// <summary>
/// Creates a new instance.
/// </summary>
public ComposeCapability(
ComposeExecutor executor,
ComposeFileManager fileManager,
TimeProvider timeProvider,
ILogger<ComposeCapability> logger)
{
_executor = executor;
_fileManager = fileManager;
_timeProvider = timeProvider;
_logger = logger;
_taskHandlers = new Dictionary<string, IComposeTask>(StringComparer.OrdinalIgnoreCase)
{
["compose.pull"] = new ComposePullTask(_executor, _fileManager, logger),
["compose.up"] = new ComposeUpTask(_executor, _fileManager, logger),
["compose.down"] = new ComposeDownTask(_executor, _fileManager, logger),
["compose.scale"] = new ComposeScaleTask(_executor, _fileManager, logger),
["compose.health-check"] = new ComposeHealthCheckTask(_executor, _fileManager, logger),
["compose.ps"] = new ComposePsTask(_executor, _fileManager, logger)
};
}
/// <inheritdoc />
public async Task<bool> InitializeAsync(CancellationToken ct = default)
{
try
{
var version = await _executor.GetVersionAsync(ct);
_logger.LogInformation("Compose capability initialized: {Version}", version);
return true;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to initialize Compose capability");
return false;
}
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
{
if (!_taskHandlers.TryGetValue(task.TaskType, out var handler))
{
throw new UnsupportedTaskTypeException(task.TaskType);
}
_logger.LogDebug("Executing task {TaskType} with ID {TaskId}", task.TaskType, task.Id);
return await handler.ExecuteAsync(task, _timeProvider, ct);
}
/// <inheritdoc />
public async Task<CapabilityHealthStatus> CheckHealthAsync(CancellationToken ct = default)
{
try
{
await _executor.GetVersionAsync(ct);
return new CapabilityHealthStatus(true, "Docker Compose available");
}
catch (Exception ex)
{
return new CapabilityHealthStatus(false, $"Docker Compose not available: {ex.Message}");
}
}
}

View File

@@ -0,0 +1,321 @@
using System.Diagnostics;
using System.Text;
using Microsoft.Extensions.Logging;
namespace StellaOps.Agent.Compose;
/// <summary>
/// Executes docker compose commands.
/// </summary>
public sealed class ComposeExecutor
{
private readonly string _composeCommand;
private readonly ILogger<ComposeExecutor> _logger;
/// <summary>
/// Creates a new instance.
/// </summary>
public ComposeExecutor(ILogger<ComposeExecutor> logger)
{
_logger = logger;
_composeCommand = DetectComposeCommand();
}
/// <summary>
/// Creates a new instance with a specific compose command.
/// </summary>
public ComposeExecutor(string composeCommand, ILogger<ComposeExecutor> logger)
{
_composeCommand = composeCommand;
_logger = logger;
}
/// <summary>
/// Gets the docker compose version.
/// </summary>
public async Task<string> GetVersionAsync(CancellationToken ct = default)
{
var result = await ExecuteAsync("version --short", null, ct);
return result.StandardOutput.Trim();
}
/// <summary>
/// Pulls images for a compose project.
/// </summary>
public async Task<ComposeResult> PullAsync(
string projectDir,
string composeFile,
IReadOnlyDictionary<string, string>? credentials = null,
CancellationToken ct = default)
{
var args = $"-f \"{composeFile}\" pull";
return await ExecuteAsync(args, projectDir, ct, BuildEnvironment(credentials));
}
/// <summary>
/// Starts a compose project.
/// </summary>
public async Task<ComposeResult> UpAsync(
string projectDir,
string composeFile,
ComposeUpOptions options,
CancellationToken ct = default)
{
var args = $"-f \"{composeFile}\" up -d";
if (options.ForceRecreate)
args += " --force-recreate";
if (options.RemoveOrphans)
args += " --remove-orphans";
if (options.NoStart)
args += " --no-start";
if (options.Services?.Count > 0)
args += " " + string.Join(" ", options.Services);
return await ExecuteAsync(args, projectDir, ct, options.Environment);
}
/// <summary>
/// Stops and removes a compose project.
/// </summary>
public async Task<ComposeResult> DownAsync(
string projectDir,
string composeFile,
ComposeDownOptions options,
CancellationToken ct = default)
{
var args = $"-f \"{composeFile}\" down";
if (options.RemoveVolumes)
args += " -v";
if (options.RemoveOrphans)
args += " --remove-orphans";
if (options.Timeout.HasValue)
args += $" -t {(int)options.Timeout.Value.TotalSeconds}";
return await ExecuteAsync(args, projectDir, ct);
}
/// <summary>
/// Scales services in a compose project.
/// </summary>
public async Task<ComposeResult> ScaleAsync(
string projectDir,
string composeFile,
IReadOnlyDictionary<string, int> scaling,
CancellationToken ct = default)
{
var scaleArgs = string.Join(" ", scaling.Select(kv => $"{kv.Key}={kv.Value}"));
var args = $"-f \"{composeFile}\" up -d --no-recreate --scale {scaleArgs}";
return await ExecuteAsync(args, projectDir, ct);
}
/// <summary>
/// Lists running services in a compose project.
/// </summary>
public async Task<ComposeResult> PsAsync(
string projectDir,
string composeFile,
bool all = false,
CancellationToken ct = default)
{
var args = $"-f \"{composeFile}\" ps --format json";
if (all)
args += " -a";
return await ExecuteAsync(args, projectDir, ct);
}
private async Task<ComposeResult> ExecuteAsync(
string arguments,
string? workingDirectory,
CancellationToken ct,
IReadOnlyDictionary<string, string>? environment = null)
{
var (fileName, commandArgs) = ParseCommand(_composeCommand, arguments);
var psi = new ProcessStartInfo
{
FileName = fileName,
Arguments = commandArgs,
WorkingDirectory = workingDirectory ?? Environment.CurrentDirectory,
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false,
CreateNoWindow = true
};
if (environment is not null)
{
foreach (var (key, value) in environment)
{
psi.Environment[key] = value;
}
}
_logger.LogDebug("Executing: {Command} {Args}", psi.FileName, psi.Arguments);
using var process = new Process { StartInfo = psi };
var stdout = new StringBuilder();
var stderr = new StringBuilder();
process.OutputDataReceived += (_, e) =>
{
if (e.Data is not null)
stdout.AppendLine(e.Data);
};
process.ErrorDataReceived += (_, e) =>
{
if (e.Data is not null)
stderr.AppendLine(e.Data);
};
process.Start();
process.BeginOutputReadLine();
process.BeginErrorReadLine();
await process.WaitForExitAsync(ct);
var result = new ComposeResult(
process.ExitCode == 0,
process.ExitCode,
stdout.ToString(),
stderr.ToString());
if (!result.Success)
{
_logger.LogWarning(
"Compose command failed with exit code {ExitCode}: {Stderr}",
result.ExitCode,
result.StandardError);
}
return result;
}
private static (string FileName, string Arguments) ParseCommand(string composeCommand, string additionalArgs)
{
// Handle "docker compose" vs "docker-compose"
if (composeCommand.StartsWith("docker compose", StringComparison.OrdinalIgnoreCase))
{
return ("docker", $"compose {additionalArgs}");
}
return (composeCommand, additionalArgs);
}
private static string DetectComposeCommand()
{
// Try docker compose (v2) first
try
{
var psi = new ProcessStartInfo
{
FileName = "docker",
Arguments = "compose version",
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false,
CreateNoWindow = true
};
using var process = Process.Start(psi);
process?.WaitForExit(5000);
if (process?.ExitCode == 0)
{
return "docker compose";
}
}
catch
{
// Ignore
}
// Fall back to docker-compose (v1)
return "docker-compose";
}
private static IReadOnlyDictionary<string, string>? BuildEnvironment(
IReadOnlyDictionary<string, string>? credentials)
{
if (credentials is null)
return null;
var env = new Dictionary<string, string>();
if (credentials.TryGetValue("registry.username", out var user))
env["DOCKER_REGISTRY_USER"] = user;
if (credentials.TryGetValue("registry.password", out var pass))
env["DOCKER_REGISTRY_PASSWORD"] = pass;
return env;
}
}
/// <summary>
/// Result of a compose command.
/// </summary>
public sealed record ComposeResult(
bool Success,
int ExitCode,
string StandardOutput,
string StandardError);
/// <summary>
/// Options for compose up.
/// </summary>
public sealed record ComposeUpOptions
{
/// <summary>
/// Force recreate containers.
/// </summary>
public bool ForceRecreate { get; init; }
/// <summary>
/// Remove orphan containers.
/// </summary>
public bool RemoveOrphans { get; init; } = true;
/// <summary>
/// Don't start services after creating.
/// </summary>
public bool NoStart { get; init; }
/// <summary>
/// Specific services to start.
/// </summary>
public IReadOnlyList<string>? Services { get; init; }
/// <summary>
/// Environment variables for the compose process.
/// </summary>
public IReadOnlyDictionary<string, string>? Environment { get; init; }
}
/// <summary>
/// Options for compose down.
/// </summary>
public sealed record ComposeDownOptions
{
/// <summary>
/// Remove volumes.
/// </summary>
public bool RemoveVolumes { get; init; }
/// <summary>
/// Remove orphan containers.
/// </summary>
public bool RemoveOrphans { get; init; } = true;
/// <summary>
/// Timeout for stopping containers.
/// </summary>
public TimeSpan? Timeout { get; init; }
}

View File

@@ -0,0 +1,148 @@
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core;
namespace StellaOps.Agent.Compose;
/// <summary>
/// Manages compose files and deployment directories.
/// </summary>
public sealed class ComposeFileManager
{
private readonly string _deploymentRoot;
private readonly ILogger<ComposeFileManager> _logger;
/// <summary>
/// Creates a new instance using default deployment root.
/// </summary>
public ComposeFileManager(ILogger<ComposeFileManager> logger)
{
_deploymentRoot = GetDefaultDeploymentRoot();
_logger = logger;
}
/// <summary>
/// Creates a new instance with custom deployment root.
/// </summary>
public ComposeFileManager(string deploymentRoot, ILogger<ComposeFileManager> logger)
{
_deploymentRoot = deploymentRoot;
_logger = logger;
}
/// <summary>
/// Writes compose files for a project.
/// </summary>
/// <param name="projectName">Project name.</param>
/// <param name="composeLockContent">Content of compose.stella.lock.yml.</param>
/// <param name="versionStickerContent">Content of stella.version.json.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Project directory path.</returns>
public async Task<string> WriteComposeFileAsync(
string projectName,
string composeLockContent,
string versionStickerContent,
CancellationToken ct = default)
{
var projectDir = Path.Combine(_deploymentRoot, projectName);
Directory.CreateDirectory(projectDir);
// Write compose.stella.lock.yml
var composeFile = Path.Combine(projectDir, "compose.stella.lock.yml");
await File.WriteAllTextAsync(composeFile, composeLockContent, ct);
_logger.LogDebug("Wrote compose file: {Path}", composeFile);
// Write stella.version.json
var versionFile = Path.Combine(projectDir, "stella.version.json");
await File.WriteAllTextAsync(versionFile, versionStickerContent, ct);
_logger.LogDebug("Wrote version sticker: {Path}", versionFile);
return projectDir;
}
/// <summary>
/// Gets the project directory path.
/// </summary>
public string GetProjectDirectory(string projectName)
{
return Path.Combine(_deploymentRoot, projectName);
}
/// <summary>
/// Gets the compose file path for a project.
/// </summary>
public string GetComposeFilePath(string projectName)
{
return Path.Combine(GetProjectDirectory(projectName), "compose.stella.lock.yml");
}
/// <summary>
/// Gets the version sticker content for a project.
/// </summary>
public async Task<string?> GetVersionStickerAsync(string projectName, CancellationToken ct = default)
{
var path = Path.Combine(GetProjectDirectory(projectName), "stella.version.json");
if (!File.Exists(path))
return null;
return await File.ReadAllTextAsync(path, ct);
}
/// <summary>
/// Checks if a project exists.
/// </summary>
public bool ProjectExists(string projectName)
{
var composeFile = GetComposeFilePath(projectName);
return File.Exists(composeFile);
}
/// <summary>
/// Backs up existing deployment before update.
/// </summary>
public Task BackupExistingAsync(string projectName, TimeProvider timeProvider, CancellationToken ct = default)
{
var projectDir = GetProjectDirectory(projectName);
if (!Directory.Exists(projectDir))
return Task.CompletedTask;
var timestamp = timeProvider.GetUtcNow().ToString("yyyyMMdd-HHmmss");
var backupDir = Path.Combine(projectDir, ".backup", timestamp);
Directory.CreateDirectory(backupDir);
foreach (var file in Directory.GetFiles(projectDir, "*.*"))
{
var fileName = Path.GetFileName(file);
if (fileName.StartsWith('.'))
continue;
File.Copy(file, Path.Combine(backupDir, fileName));
}
_logger.LogDebug("Backed up existing deployment to {BackupDir}", backupDir);
return Task.CompletedTask;
}
/// <summary>
/// Cleans up a project directory.
/// </summary>
public Task CleanupAsync(string projectName, CancellationToken ct = default)
{
var projectDir = GetProjectDirectory(projectName);
if (Directory.Exists(projectDir))
{
Directory.Delete(projectDir, recursive: true);
_logger.LogDebug("Cleaned up project directory: {Path}", projectDir);
}
return Task.CompletedTask;
}
private static string GetDefaultDeploymentRoot()
{
// Platform-specific default
if (OperatingSystem.IsWindows())
{
return Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.CommonApplicationData), "stella-agent", "deployments");
}
return "/var/lib/stella-agent/deployments";
}
}

View File

@@ -0,0 +1,69 @@
using StellaOps.Agent.Core.Exceptions;
namespace StellaOps.Agent.Compose.Exceptions;
/// <summary>
/// Thrown when a compose task payload is invalid.
/// </summary>
public sealed class InvalidComposePayloadException : AgentException
{
/// <summary>
/// The task type with invalid payload.
/// </summary>
public string TaskType { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public InvalidComposePayloadException(string taskType)
: base($"Invalid payload for task type '{taskType}'")
{
TaskType = taskType;
}
}
/// <summary>
/// Thrown when compose command execution fails.
/// </summary>
public sealed class ComposeCommandException : AgentException
{
/// <summary>
/// The command that failed.
/// </summary>
public string Command { get; }
/// <summary>
/// The exit code.
/// </summary>
public int ExitCode { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public ComposeCommandException(string command, int exitCode, string error)
: base($"Compose command '{command}' failed with exit code {exitCode}: {error}")
{
Command = command;
ExitCode = exitCode;
}
}
/// <summary>
/// Thrown when a compose project is not found.
/// </summary>
public sealed class ComposeProjectNotFoundException : AgentException
{
/// <summary>
/// The project name.
/// </summary>
public string ProjectName { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public ComposeProjectNotFoundException(string projectName)
: base($"Compose project not found: '{projectName}'")
{
ProjectName = projectName;
}
}

View File

@@ -0,0 +1,22 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<RootNamespace>StellaOps.Agent.Compose</RootNamespace>
<Description>Stella Agent Compose Capability - manages docker-compose stacks on target hosts</Description>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Agent.Core\StellaOps.Agent.Core.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,156 @@
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Compose.Exceptions;
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.Compose.Tasks;
/// <summary>
/// Task handler for stopping a compose stack.
/// </summary>
public sealed class ComposeDownTask : IComposeTask
{
private readonly ComposeExecutor _executor;
private readonly ComposeFileManager _fileManager;
private readonly ILogger _logger;
/// <summary>
/// Payload for compose.down task.
/// </summary>
public sealed record DownPayload
{
/// <summary>
/// Project name.
/// </summary>
public required string ProjectName { get; init; }
/// <summary>
/// Remove volumes.
/// </summary>
public bool RemoveVolumes { get; init; }
/// <summary>
/// Remove orphan containers.
/// </summary>
public bool RemoveOrphans { get; init; } = true;
/// <summary>
/// Cleanup files after down.
/// </summary>
public bool CleanupFiles { get; init; }
/// <summary>
/// Timeout for stopping containers.
/// </summary>
public TimeSpan? Timeout { get; init; } = TimeSpan.FromSeconds(30);
}
/// <summary>
/// Creates a new instance.
/// </summary>
public ComposeDownTask(ComposeExecutor executor, ComposeFileManager fileManager, ILogger logger)
{
_executor = executor;
_fileManager = fileManager;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
var payload = JsonSerializer.Deserialize<DownPayload>(task.Payload)
?? throw new InvalidComposePayloadException("compose.down");
_logger.LogInformation("Stopping compose stack: {Project}", payload.ProjectName);
try
{
var projectDir = _fileManager.GetProjectDirectory(payload.ProjectName);
var composeFile = _fileManager.GetComposeFilePath(payload.ProjectName);
if (!_fileManager.ProjectExists(payload.ProjectName))
{
_logger.LogWarning(
"Compose file not found for project {Project}, skipping down",
payload.ProjectName);
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["projectName"] = payload.ProjectName,
["skipped"] = true,
["reason"] = "Compose file not found"
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
var result = await _executor.DownAsync(
projectDir,
composeFile,
new ComposeDownOptions
{
RemoveVolumes = payload.RemoveVolumes,
RemoveOrphans = payload.RemoveOrphans,
Timeout = payload.Timeout
},
ct);
if (!result.Success)
{
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to stop stack: {result.StandardError}",
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
// Cleanup files if requested
if (payload.CleanupFiles)
{
await _fileManager.CleanupAsync(payload.ProjectName, ct);
}
_logger.LogInformation("Stopped compose stack: {Project}", payload.ProjectName);
var finalCompletedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["projectName"] = payload.ProjectName,
["removedVolumes"] = payload.RemoveVolumes,
["cleanedFiles"] = payload.CleanupFiles
},
CompletedAt = finalCompletedAt,
Duration = finalCompletedAt - startTime
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to stop compose stack {Project}", payload.ProjectName);
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = ex.Message,
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
}
}

View File

@@ -0,0 +1,196 @@
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Compose.Exceptions;
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.Compose.Tasks;
/// <summary>
/// Task handler for checking compose stack health.
/// </summary>
public sealed class ComposeHealthCheckTask : IComposeTask
{
private readonly ComposeExecutor _executor;
private readonly ComposeFileManager _fileManager;
private readonly ILogger _logger;
/// <summary>
/// Payload for compose.health-check task.
/// </summary>
public sealed record HealthCheckPayload
{
/// <summary>
/// Project name.
/// </summary>
public required string ProjectName { get; init; }
/// <summary>
/// Specific services to check.
/// </summary>
public IReadOnlyList<string>? Services { get; init; }
/// <summary>
/// Timeout for health check.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(5);
/// <summary>
/// Wait for services to become healthy.
/// </summary>
public bool WaitForHealthy { get; init; } = true;
}
/// <summary>
/// Creates a new instance.
/// </summary>
public ComposeHealthCheckTask(ComposeExecutor executor, ComposeFileManager fileManager, ILogger logger)
{
_executor = executor;
_fileManager = fileManager;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
var payload = JsonSerializer.Deserialize<HealthCheckPayload>(task.Payload)
?? throw new InvalidComposePayloadException("compose.health-check");
_logger.LogInformation("Checking health of compose stack: {Project}", payload.ProjectName);
try
{
var projectDir = _fileManager.GetProjectDirectory(payload.ProjectName);
var composeFile = _fileManager.GetComposeFilePath(payload.ProjectName);
if (!_fileManager.ProjectExists(payload.ProjectName))
{
throw new ComposeProjectNotFoundException(payload.ProjectName);
}
using var timeoutCts = new CancellationTokenSource(payload.Timeout);
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
while (!linkedCts.IsCancellationRequested)
{
var psResult = await _executor.PsAsync(projectDir, composeFile, ct: linkedCts.Token);
var services = ParseServices(psResult.StandardOutput);
// Filter to requested services if specified
if (payload.Services?.Count > 0)
{
var requestedServices = payload.Services.ToHashSet(StringComparer.OrdinalIgnoreCase);
services = services.Where(s => requestedServices.Contains(s.Service)).ToList();
}
var allRunning = services.All(s => s.State == "running");
var allHealthy = services.All(s =>
s.Health is null || s.Health == "healthy" || s.Health == "");
if (allRunning && allHealthy)
{
_logger.LogInformation(
"Compose stack {Project} is healthy ({Count} services)",
payload.ProjectName,
services.Count);
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["projectName"] = payload.ProjectName,
["serviceCount"] = services.Count,
["allHealthy"] = true
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
var unhealthyServices = services
.Where(s => s.State != "running" || (s.Health is not null && s.Health != "healthy" && s.Health != ""))
.ToList();
if (!payload.WaitForHealthy)
{
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = "Some services are unhealthy",
Outputs = new Dictionary<string, object>
{
["projectName"] = payload.ProjectName,
["serviceCount"] = services.Count,
["unhealthyCount"] = unhealthyServices.Count,
["unhealthyServices"] = unhealthyServices.Select(s => s.Service).ToList()
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
await Task.Delay(TimeSpan.FromSeconds(5), linkedCts.Token);
}
throw new OperationCanceledException();
}
catch (OperationCanceledException)
{
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Health check timed out after {payload.Timeout}",
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Health check failed for stack {Project}", payload.ProjectName);
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = ex.Message,
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
}
private static List<ServiceStatus> ParseServices(string output)
{
var services = new List<ServiceStatus>();
if (string.IsNullOrWhiteSpace(output))
return services;
foreach (var line in output.Split('\n', StringSplitOptions.RemoveEmptyEntries))
{
try
{
var service = JsonSerializer.Deserialize<JsonElement>(line);
services.Add(new ServiceStatus(
service.TryGetProperty("Name", out var name) ? name.GetString() ?? "" : "",
service.TryGetProperty("Service", out var svc) ? svc.GetString() ?? "" : "",
service.TryGetProperty("State", out var state) ? state.GetString() ?? "" : "",
service.TryGetProperty("Health", out var health) ? health.GetString() : null
));
}
catch
{
// Skip malformed lines
}
}
return services;
}
}

View File

@@ -0,0 +1,141 @@
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Compose.Exceptions;
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.Compose.Tasks;
/// <summary>
/// Task handler for listing compose services.
/// </summary>
public sealed class ComposePsTask : IComposeTask
{
private readonly ComposeExecutor _executor;
private readonly ComposeFileManager _fileManager;
private readonly ILogger _logger;
/// <summary>
/// Payload for compose.ps task.
/// </summary>
public sealed record PsPayload
{
/// <summary>
/// Project name.
/// </summary>
public required string ProjectName { get; init; }
/// <summary>
/// Include stopped containers.
/// </summary>
public bool All { get; init; }
}
/// <summary>
/// Creates a new instance.
/// </summary>
public ComposePsTask(ComposeExecutor executor, ComposeFileManager fileManager, ILogger logger)
{
_executor = executor;
_fileManager = fileManager;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
var payload = JsonSerializer.Deserialize<PsPayload>(task.Payload)
?? throw new InvalidComposePayloadException("compose.ps");
_logger.LogInformation("Listing services for compose stack: {Project}", payload.ProjectName);
try
{
var projectDir = _fileManager.GetProjectDirectory(payload.ProjectName);
var composeFile = _fileManager.GetComposeFilePath(payload.ProjectName);
if (!_fileManager.ProjectExists(payload.ProjectName))
{
throw new ComposeProjectNotFoundException(payload.ProjectName);
}
var result = await _executor.PsAsync(projectDir, composeFile, payload.All, ct);
var completedAt = timeProvider.GetUtcNow();
if (!result.Success)
{
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to list services: {result.StandardError}",
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
var services = ParseServices(result.StandardOutput);
_logger.LogInformation("Found {Count} services in {Project}", services.Count, payload.ProjectName);
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["projectName"] = payload.ProjectName,
["serviceCount"] = services.Count,
["services"] = services.Select(s => new Dictionary<string, object?>
{
["name"] = s.Name,
["service"] = s.Service,
["state"] = s.State,
["health"] = s.Health
}).ToList()
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to list services for {Project}", payload.ProjectName);
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = ex.Message,
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
}
private static List<ServiceStatus> ParseServices(string output)
{
var services = new List<ServiceStatus>();
if (string.IsNullOrWhiteSpace(output))
return services;
foreach (var line in output.Split('\n', StringSplitOptions.RemoveEmptyEntries))
{
try
{
var service = JsonSerializer.Deserialize<JsonElement>(line);
services.Add(new ServiceStatus(
service.TryGetProperty("Name", out var name) ? name.GetString() ?? "" : "",
service.TryGetProperty("Service", out var svc) ? svc.GetString() ?? "" : "",
service.TryGetProperty("State", out var state) ? state.GetString() ?? "" : "",
service.TryGetProperty("Health", out var health) ? health.GetString() : null
));
}
catch
{
// Skip malformed lines
}
}
return services;
}
}

View File

@@ -0,0 +1,101 @@
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Compose.Exceptions;
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.Compose.Tasks;
/// <summary>
/// Task handler for pulling images for a compose stack.
/// </summary>
public sealed class ComposePullTask : IComposeTask
{
private readonly ComposeExecutor _executor;
private readonly ComposeFileManager _fileManager;
private readonly ILogger _logger;
/// <summary>
/// Payload for compose.pull task.
/// </summary>
public sealed record PullPayload
{
/// <summary>
/// Project name.
/// </summary>
public required string ProjectName { get; init; }
}
/// <summary>
/// Creates a new instance.
/// </summary>
public ComposePullTask(ComposeExecutor executor, ComposeFileManager fileManager, ILogger logger)
{
_executor = executor;
_fileManager = fileManager;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
var payload = JsonSerializer.Deserialize<PullPayload>(task.Payload)
?? throw new InvalidComposePayloadException("compose.pull");
_logger.LogInformation("Pulling images for compose stack: {Project}", payload.ProjectName);
try
{
var projectDir = _fileManager.GetProjectDirectory(payload.ProjectName);
var composeFile = _fileManager.GetComposeFilePath(payload.ProjectName);
if (!_fileManager.ProjectExists(payload.ProjectName))
{
throw new ComposeProjectNotFoundException(payload.ProjectName);
}
var result = await _executor.PullAsync(projectDir, composeFile, task.Credentials, ct);
var completedAt = timeProvider.GetUtcNow();
if (!result.Success)
{
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to pull images: {result.StandardError}",
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
_logger.LogInformation("Pulled images for compose stack: {Project}", payload.ProjectName);
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["projectName"] = payload.ProjectName
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to pull images for {Project}", payload.ProjectName);
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = ex.Message,
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
}
}

View File

@@ -0,0 +1,110 @@
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Compose.Exceptions;
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.Compose.Tasks;
/// <summary>
/// Task handler for scaling compose services.
/// </summary>
public sealed class ComposeScaleTask : IComposeTask
{
private readonly ComposeExecutor _executor;
private readonly ComposeFileManager _fileManager;
private readonly ILogger _logger;
/// <summary>
/// Payload for compose.scale task.
/// </summary>
public sealed record ScalePayload
{
/// <summary>
/// Project name.
/// </summary>
public required string ProjectName { get; init; }
/// <summary>
/// Service scaling configuration (service name -> replica count).
/// </summary>
public required IReadOnlyDictionary<string, int> Scaling { get; init; }
}
/// <summary>
/// Creates a new instance.
/// </summary>
public ComposeScaleTask(ComposeExecutor executor, ComposeFileManager fileManager, ILogger logger)
{
_executor = executor;
_fileManager = fileManager;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
var payload = JsonSerializer.Deserialize<ScalePayload>(task.Payload)
?? throw new InvalidComposePayloadException("compose.scale");
_logger.LogInformation(
"Scaling compose stack {Project}: {Scaling}",
payload.ProjectName,
string.Join(", ", payload.Scaling.Select(kv => $"{kv.Key}={kv.Value}")));
try
{
var projectDir = _fileManager.GetProjectDirectory(payload.ProjectName);
var composeFile = _fileManager.GetComposeFilePath(payload.ProjectName);
if (!_fileManager.ProjectExists(payload.ProjectName))
{
throw new ComposeProjectNotFoundException(payload.ProjectName);
}
var result = await _executor.ScaleAsync(projectDir, composeFile, payload.Scaling, ct);
var completedAt = timeProvider.GetUtcNow();
if (!result.Success)
{
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to scale services: {result.StandardError}",
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
_logger.LogInformation("Scaled compose stack: {Project}", payload.ProjectName);
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["projectName"] = payload.ProjectName,
["scaling"] = payload.Scaling.ToDictionary(kv => kv.Key, kv => (object)kv.Value)
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to scale compose stack {Project}", payload.ProjectName);
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = ex.Message,
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
}
}

View File

@@ -0,0 +1,238 @@
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Compose.Exceptions;
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.Compose.Tasks;
/// <summary>
/// Task handler for deploying a compose stack.
/// </summary>
public sealed class ComposeUpTask : IComposeTask
{
private readonly ComposeExecutor _executor;
private readonly ComposeFileManager _fileManager;
private readonly ILogger _logger;
/// <summary>
/// Payload for compose.up task.
/// </summary>
public sealed record UpPayload
{
/// <summary>
/// Project name.
/// </summary>
public required string ProjectName { get; init; }
/// <summary>
/// Content of compose.stella.lock.yml.
/// </summary>
public required string ComposeLock { get; init; }
/// <summary>
/// Content of stella.version.json.
/// </summary>
public required string VersionSticker { get; init; }
/// <summary>
/// Force recreate containers.
/// </summary>
public bool ForceRecreate { get; init; } = true;
/// <summary>
/// Remove orphan containers.
/// </summary>
public bool RemoveOrphans { get; init; } = true;
/// <summary>
/// Specific services to deploy.
/// </summary>
public IReadOnlyList<string>? Services { get; init; }
/// <summary>
/// Environment variables.
/// </summary>
public IReadOnlyDictionary<string, string>? Environment { get; init; }
/// <summary>
/// Backup existing deployment before update.
/// </summary>
public bool BackupExisting { get; init; } = true;
}
/// <summary>
/// Creates a new instance.
/// </summary>
public ComposeUpTask(ComposeExecutor executor, ComposeFileManager fileManager, ILogger logger)
{
_executor = executor;
_fileManager = fileManager;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
var payload = JsonSerializer.Deserialize<UpPayload>(task.Payload)
?? throw new InvalidComposePayloadException("compose.up");
_logger.LogInformation("Deploying compose stack: {Project}", payload.ProjectName);
try
{
// Backup existing deployment
if (payload.BackupExisting)
{
await _fileManager.BackupExistingAsync(payload.ProjectName, timeProvider, ct);
}
// Write compose files
var projectDir = await _fileManager.WriteComposeFileAsync(
payload.ProjectName,
payload.ComposeLock,
payload.VersionSticker,
ct);
var composeFile = _fileManager.GetComposeFilePath(payload.ProjectName);
// Pull images first
_logger.LogInformation("Pulling images for {Project}", payload.ProjectName);
var pullResult = await _executor.PullAsync(
projectDir,
composeFile,
task.Credentials,
ct);
if (!pullResult.Success)
{
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to pull images: {pullResult.StandardError}",
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
// Deploy the stack
_logger.LogInformation("Starting compose stack: {Project}", payload.ProjectName);
var upResult = await _executor.UpAsync(
projectDir,
composeFile,
new ComposeUpOptions
{
ForceRecreate = payload.ForceRecreate,
RemoveOrphans = payload.RemoveOrphans,
Services = payload.Services,
Environment = MergeEnvironment(payload.Environment, task.Variables)
},
ct);
if (!upResult.Success)
{
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to deploy stack: {upResult.StandardError}",
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
// Get running services
var psResult = await _executor.PsAsync(projectDir, composeFile, ct: ct);
var services = ParseServicesFromPs(psResult.StandardOutput);
_logger.LogInformation(
"Deployed compose stack {Project} with {Count} services",
payload.ProjectName,
services.Count);
var finalCompletedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["projectName"] = payload.ProjectName,
["projectDir"] = projectDir,
["serviceCount"] = services.Count,
["services"] = services.Select(s => s.Service).ToList()
},
CompletedAt = finalCompletedAt,
Duration = finalCompletedAt - startTime
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to deploy compose stack {Project}", payload.ProjectName);
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = ex.Message,
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
}
private static IReadOnlyDictionary<string, string>? MergeEnvironment(
IReadOnlyDictionary<string, string>? env,
IReadOnlyDictionary<string, string> variables)
{
if (env is null && variables.Count == 0)
return null;
var merged = new Dictionary<string, string>(variables);
if (env is not null)
{
foreach (var (key, value) in env)
{
merged[key] = value;
}
}
return merged;
}
private static IReadOnlyList<ServiceStatus> ParseServicesFromPs(string output)
{
if (string.IsNullOrWhiteSpace(output))
return [];
try
{
var services = new List<ServiceStatus>();
foreach (var line in output.Split('\n', StringSplitOptions.RemoveEmptyEntries))
{
try
{
var service = JsonSerializer.Deserialize<JsonElement>(line);
services.Add(new ServiceStatus(
service.TryGetProperty("Name", out var name) ? name.GetString() ?? "" : "",
service.TryGetProperty("Service", out var svc) ? svc.GetString() ?? "" : "",
service.TryGetProperty("State", out var state) ? state.GetString() ?? "" : "",
service.TryGetProperty("Health", out var health) ? health.GetString() : null
));
}
catch
{
// Skip malformed lines
}
}
return services;
}
catch
{
return [];
}
}
}

View File

@@ -0,0 +1,27 @@
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.Compose.Tasks;
/// <summary>
/// Interface for Compose task handlers.
/// </summary>
public interface IComposeTask
{
/// <summary>
/// Executes the task.
/// </summary>
/// <param name="task">Task information.</param>
/// <param name="timeProvider">Time provider for deterministic timestamps.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Task result.</returns>
Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default);
}
/// <summary>
/// Status of a service in a compose stack.
/// </summary>
public sealed record ServiceStatus(
string Name,
string Service,
string State,
string? Health);

View File

@@ -0,0 +1,57 @@
namespace StellaOps.Agent.Core;
/// <summary>
/// Configuration for the Stella Agent.
/// </summary>
public sealed class AgentConfiguration
{
/// <summary>
/// Unique identifier for this agent.
/// </summary>
public required string AgentId { get; set; }
/// <summary>
/// Display name for this agent.
/// </summary>
public required string AgentName { get; set; }
/// <summary>
/// URL of the orchestrator to connect to.
/// </summary>
public required string OrchestratorUrl { get; set; }
/// <summary>
/// Path to the agent's TLS certificate.
/// </summary>
public required string CertificatePath { get; set; }
/// <summary>
/// Path to the agent's private key.
/// </summary>
public required string PrivateKeyPath { get; set; }
/// <summary>
/// Path to the CA certificate for verifying the orchestrator.
/// </summary>
public required string CaCertificatePath { get; set; }
/// <summary>
/// Port for the gRPC server. Default is 50051.
/// </summary>
public int GrpcPort { get; set; } = 50051;
/// <summary>
/// Interval between heartbeat messages. Default is 30 seconds.
/// </summary>
public TimeSpan HeartbeatInterval { get; set; } = TimeSpan.FromSeconds(30);
/// <summary>
/// Default timeout for task execution. Default is 30 minutes.
/// </summary>
public TimeSpan TaskTimeout { get; set; } = TimeSpan.FromMinutes(30);
/// <summary>
/// List of capability names to enable.
/// </summary>
public IReadOnlyList<string> EnabledCapabilities { get; set; } = [];
}

View File

@@ -0,0 +1,157 @@
using System.Collections.Immutable;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Exceptions;
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.Core.Capability;
/// <summary>
/// Registry for agent capabilities.
/// </summary>
public sealed class CapabilityRegistry
{
private readonly Dictionary<string, IAgentCapability> _capabilities = new(StringComparer.OrdinalIgnoreCase);
private readonly Dictionary<string, IAgentCapability> _taskTypeToCapability = new(StringComparer.OrdinalIgnoreCase);
private readonly ILogger<CapabilityRegistry> _logger;
/// <summary>
/// Creates a new instance of <see cref="CapabilityRegistry"/>.
/// </summary>
public CapabilityRegistry(ILogger<CapabilityRegistry> logger)
{
_logger = logger;
}
/// <summary>
/// Registers a capability.
/// </summary>
/// <param name="capability">The capability to register.</param>
/// <exception cref="CapabilityAlreadyRegisteredException">If the capability is already registered.</exception>
public void Register(IAgentCapability capability)
{
ArgumentNullException.ThrowIfNull(capability);
if (_capabilities.ContainsKey(capability.Name))
{
throw new CapabilityAlreadyRegisteredException(capability.Name);
}
_capabilities[capability.Name] = capability;
foreach (var taskType in capability.SupportedTaskTypes)
{
if (_taskTypeToCapability.TryGetValue(taskType, out var existing))
{
_logger.LogWarning(
"Task type {TaskType} already registered by {ExistingCapability}, overriding with {NewCapability}",
taskType, existing.Name, capability.Name);
}
_taskTypeToCapability[taskType] = capability;
}
_logger.LogInformation(
"Registered capability {Name} v{Version} with tasks: {Tasks}",
capability.Name,
capability.Version,
string.Join(", ", capability.SupportedTaskTypes));
}
/// <summary>
/// Gets a capability by name.
/// </summary>
/// <param name="name">The capability name.</param>
/// <returns>The capability, or null if not found.</returns>
public IAgentCapability? Get(string name)
{
_capabilities.TryGetValue(name, out var capability);
return capability;
}
/// <summary>
/// Gets the capability that can handle the specified task type.
/// </summary>
/// <param name="taskType">The task type.</param>
/// <returns>The capability, or null if not found.</returns>
public IAgentCapability? GetForTaskType(string taskType)
{
_taskTypeToCapability.TryGetValue(taskType, out var capability);
return capability;
}
/// <summary>
/// Gets information about all registered capabilities.
/// </summary>
public IReadOnlyList<CapabilityInfo> GetCapabilities()
{
return _capabilities.Values
.Select(c => new CapabilityInfo(
c.Name,
c.Version,
c.SupportedTaskTypes.ToImmutableArray()))
.ToList()
.AsReadOnly();
}
/// <summary>
/// Gets all registered capabilities.
/// </summary>
public IReadOnlyCollection<IAgentCapability> GetAll()
{
return _capabilities.Values;
}
/// <summary>
/// Initializes all registered capabilities.
/// </summary>
/// <param name="ct">Cancellation token.</param>
public async Task InitializeAllAsync(CancellationToken ct = default)
{
foreach (var (name, capability) in _capabilities)
{
try
{
var success = await capability.InitializeAsync(ct);
if (!success)
{
_logger.LogWarning("Capability {Name} failed to initialize", name);
}
else
{
_logger.LogDebug("Capability {Name} initialized successfully", name);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Capability {Name} threw exception during initialization", name);
}
}
}
/// <summary>
/// Checks health of all capabilities.
/// </summary>
/// <param name="ct">Cancellation token.</param>
/// <returns>Overall health status and per-capability details.</returns>
public async Task<(bool AllHealthy, IReadOnlyDictionary<string, object> Details)> CheckHealthAsync(CancellationToken ct = default)
{
var details = new Dictionary<string, object>();
var allHealthy = true;
foreach (var (name, capability) in _capabilities)
{
try
{
var health = await capability.CheckHealthAsync(ct);
details[name] = new { health.IsHealthy, health.Message };
allHealthy = allHealthy && health.IsHealthy;
}
catch (Exception ex)
{
details[name] = new { IsHealthy = false, Message = ex.Message };
allHealthy = false;
}
}
return (allHealthy, details);
}
}

View File

@@ -0,0 +1,46 @@
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.Core.Capability;
/// <summary>
/// Interface for agent capabilities that can execute tasks.
/// </summary>
public interface IAgentCapability
{
/// <summary>
/// Name of this capability.
/// </summary>
string Name { get; }
/// <summary>
/// Version of this capability.
/// </summary>
string Version { get; }
/// <summary>
/// Task types this capability can handle.
/// </summary>
IReadOnlyList<string> SupportedTaskTypes { get; }
/// <summary>
/// Initializes the capability.
/// </summary>
/// <param name="ct">Cancellation token.</param>
/// <returns>True if initialization succeeded.</returns>
Task<bool> InitializeAsync(CancellationToken ct = default);
/// <summary>
/// Executes a task.
/// </summary>
/// <param name="task">The task to execute.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The task result.</returns>
Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default);
/// <summary>
/// Checks the health of this capability.
/// </summary>
/// <param name="ct">Cancellation token.</param>
/// <returns>Health status.</returns>
Task<CapabilityHealthStatus> CheckHealthAsync(CancellationToken ct = default);
}

View File

@@ -0,0 +1,91 @@
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.Core.Communication;
/// <summary>
/// Client for communicating with the orchestrator.
/// </summary>
public interface IOrchestratorClient
{
/// <summary>
/// Connects to the orchestrator.
/// </summary>
Task ConnectAsync(CancellationToken ct = default);
/// <summary>
/// Disconnects from the orchestrator.
/// </summary>
Task DisconnectAsync(CancellationToken ct = default);
/// <summary>
/// Sends a heartbeat to the orchestrator.
/// </summary>
Task SendHeartbeatAsync(AgentHeartbeatMessage heartbeat, CancellationToken ct = default);
/// <summary>
/// Sends log entries to the orchestrator.
/// </summary>
Task SendLogsAsync(IReadOnlyList<LogEntry> logs, CancellationToken ct = default);
}
/// <summary>
/// Log entry to send to the orchestrator.
/// </summary>
public sealed record LogEntry
{
/// <summary>
/// Task that generated this log.
/// </summary>
public required Guid TaskId { get; init; }
/// <summary>
/// When the log was generated.
/// </summary>
public required DateTimeOffset Timestamp { get; init; }
/// <summary>
/// Log level.
/// </summary>
public required LogLevel Level { get; init; }
/// <summary>
/// Log message.
/// </summary>
public required string Message { get; init; }
}
/// <summary>
/// Log level for agent logs.
/// </summary>
public enum LogLevel
{
/// <summary>
/// Trace level logging.
/// </summary>
Trace,
/// <summary>
/// Debug level logging.
/// </summary>
Debug,
/// <summary>
/// Information level logging.
/// </summary>
Information,
/// <summary>
/// Warning level logging.
/// </summary>
Warning,
/// <summary>
/// Error level logging.
/// </summary>
Error,
/// <summary>
/// Critical error logging.
/// </summary>
Critical
}

View File

@@ -0,0 +1,60 @@
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.Core.Communication;
/// <summary>
/// Stub orchestrator client for testing.
/// </summary>
public sealed class StubOrchestratorClient : IOrchestratorClient
{
private readonly ILogger<StubOrchestratorClient> _logger;
private bool _connected;
/// <summary>
/// Creates a new instance.
/// </summary>
public StubOrchestratorClient(ILogger<StubOrchestratorClient> logger)
{
_logger = logger;
}
/// <inheritdoc />
public Task ConnectAsync(CancellationToken ct = default)
{
_connected = true;
_logger.LogDebug("Stub: Connected to orchestrator");
return Task.CompletedTask;
}
/// <inheritdoc />
public Task DisconnectAsync(CancellationToken ct = default)
{
_connected = false;
_logger.LogDebug("Stub: Disconnected from orchestrator");
return Task.CompletedTask;
}
/// <inheritdoc />
public Task SendHeartbeatAsync(AgentHeartbeatMessage heartbeat, CancellationToken ct = default)
{
if (!_connected)
throw new InvalidOperationException("Not connected to orchestrator");
_logger.LogDebug(
"Stub: Sent heartbeat - status={Status}, tasks={TaskCount}",
heartbeat.Status,
heartbeat.RunningTaskCount);
return Task.CompletedTask;
}
/// <inheritdoc />
public Task SendLogsAsync(IReadOnlyList<LogEntry> logs, CancellationToken ct = default)
{
if (!_connected)
throw new InvalidOperationException("Not connected to orchestrator");
_logger.LogDebug("Stub: Sent {Count} log entries", logs.Count);
return Task.CompletedTask;
}
}

View File

@@ -0,0 +1,35 @@
namespace StellaOps.Agent.Core.Credentials;
/// <summary>
/// Credential provider that reads from environment variables.
/// </summary>
public sealed class EnvironmentCredentialProvider : ICredentialProvider
{
/// <inheritdoc />
public string Scheme => "env";
/// <inheritdoc />
public Task<string?> GetSecretAsync(string path, CancellationToken ct = default)
{
return Task.FromResult(Environment.GetEnvironmentVariable(path));
}
}
/// <summary>
/// Credential provider that reads from files.
/// </summary>
public sealed class FileCredentialProvider : ICredentialProvider
{
/// <inheritdoc />
public string Scheme => "file";
/// <inheritdoc />
public async Task<string?> GetSecretAsync(string path, CancellationToken ct = default)
{
if (!File.Exists(path))
return null;
var content = await File.ReadAllTextAsync(path, ct);
return content.Trim();
}
}

View File

@@ -0,0 +1,97 @@
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Exceptions;
namespace StellaOps.Agent.Core.Credentials;
/// <summary>
/// Resolves credential references to their actual values.
/// </summary>
public sealed partial class CredentialResolver
{
private readonly Dictionary<string, ICredentialProvider> _providers = new(StringComparer.OrdinalIgnoreCase);
private readonly ILogger<CredentialResolver> _logger;
/// <summary>
/// Creates a new instance of <see cref="CredentialResolver"/>.
/// </summary>
public CredentialResolver(IEnumerable<ICredentialProvider> providers, ILogger<CredentialResolver> logger)
{
_logger = logger;
foreach (var provider in providers)
{
_providers[provider.Scheme] = provider;
_logger.LogDebug("Registered credential provider for scheme {Scheme}", provider.Scheme);
}
}
/// <summary>
/// Resolves a credential reference to its actual value.
/// </summary>
/// <param name="reference">The credential reference (e.g., "env://DB_PASSWORD").</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The resolved secret value.</returns>
/// <exception cref="UnknownCredentialProviderException">If the scheme is not recognized.</exception>
/// <exception cref="CredentialNotFoundException">If the credential cannot be found.</exception>
public async Task<string> ResolveAsync(string reference, CancellationToken ct = default)
{
if (string.IsNullOrEmpty(reference))
return string.Empty;
var parsed = ParseReference(reference);
if (parsed is null)
{
// Not a reference, return as-is (literal value)
return reference;
}
if (!_providers.TryGetValue(parsed.Scheme, out var provider))
{
throw new UnknownCredentialProviderException(parsed.Scheme);
}
var value = await provider.GetSecretAsync(parsed.Path, ct);
if (value is null)
{
throw new CredentialNotFoundException(reference);
}
_logger.LogDebug("Resolved credential reference {Scheme}://***", parsed.Scheme);
return value;
}
/// <summary>
/// Resolves all credential references in a dictionary.
/// </summary>
/// <param name="credentials">Dictionary of credential references.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Dictionary with resolved values.</returns>
public async Task<IReadOnlyDictionary<string, string>> ResolveAllAsync(
IReadOnlyDictionary<string, string> credentials,
CancellationToken ct = default)
{
var resolved = new Dictionary<string, string>();
foreach (var (key, reference) in credentials)
{
resolved[key] = await ResolveAsync(reference, ct);
}
return resolved;
}
private static CredentialReference? ParseReference(string reference)
{
var match = ReferencePattern().Match(reference);
if (!match.Success)
return null;
return new CredentialReference(match.Groups[1].Value, match.Groups[2].Value);
}
[GeneratedRegex(@"^([a-z]+)://(.+)$", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex ReferencePattern();
}
internal sealed record CredentialReference(string Scheme, string Path);

View File

@@ -0,0 +1,20 @@
namespace StellaOps.Agent.Core.Credentials;
/// <summary>
/// Interface for credential providers that resolve secret references.
/// </summary>
public interface ICredentialProvider
{
/// <summary>
/// The URI scheme this provider handles (e.g., "env", "file", "vault").
/// </summary>
string Scheme { get; }
/// <summary>
/// Retrieves a secret value.
/// </summary>
/// <param name="path">The path to the secret.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The secret value, or null if not found.</returns>
Task<string?> GetSecretAsync(string path, CancellationToken ct = default);
}

View File

@@ -0,0 +1,123 @@
namespace StellaOps.Agent.Core.Exceptions;
/// <summary>
/// Base exception for Agent Core errors.
/// </summary>
public abstract class AgentException : Exception
{
/// <summary>
/// Creates a new agent exception.
/// </summary>
protected AgentException(string message) : base(message) { }
/// <summary>
/// Creates a new agent exception with inner exception.
/// </summary>
protected AgentException(string message, Exception innerException) : base(message, innerException) { }
}
/// <summary>
/// Thrown when a capability is already registered.
/// </summary>
public sealed class CapabilityAlreadyRegisteredException : AgentException
{
/// <summary>
/// Name of the capability.
/// </summary>
public string CapabilityName { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public CapabilityAlreadyRegisteredException(string capabilityName)
: base($"Capability '{capabilityName}' is already registered")
{
CapabilityName = capabilityName;
}
}
/// <summary>
/// Thrown when a task type is not supported by any capability.
/// </summary>
public sealed class UnsupportedTaskTypeException : AgentException
{
/// <summary>
/// The unsupported task type.
/// </summary>
public string TaskType { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public UnsupportedTaskTypeException(string taskType)
: base($"No capability found for task type '{taskType}'")
{
TaskType = taskType;
}
}
/// <summary>
/// Thrown when a credential provider is not found.
/// </summary>
public sealed class UnknownCredentialProviderException : AgentException
{
/// <summary>
/// The unknown scheme.
/// </summary>
public string Scheme { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public UnknownCredentialProviderException(string scheme)
: base($"Unknown credential provider scheme '{scheme}'")
{
Scheme = scheme;
}
}
/// <summary>
/// Thrown when a credential cannot be resolved.
/// </summary>
public sealed class CredentialNotFoundException : AgentException
{
/// <summary>
/// The credential reference.
/// </summary>
public string Reference { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public CredentialNotFoundException(string reference)
: base($"Credential not found: {reference}")
{
Reference = reference;
}
}
/// <summary>
/// Thrown when task execution times out.
/// </summary>
public sealed class TaskTimeoutException : AgentException
{
/// <summary>
/// Task that timed out.
/// </summary>
public Guid TaskId { get; }
/// <summary>
/// Timeout duration.
/// </summary>
public TimeSpan Timeout { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public TaskTimeoutException(Guid taskId, TimeSpan timeout)
: base($"Task {taskId} timed out after {timeout}")
{
TaskId = taskId;
Timeout = timeout;
}
}

View File

@@ -0,0 +1,170 @@
using System.Collections.Concurrent;
using System.Diagnostics;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Capability;
using StellaOps.Agent.Core.Credentials;
using StellaOps.Agent.Core.Exceptions;
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.Core.Execution;
/// <summary>
/// Executes tasks using the appropriate capability.
/// </summary>
public sealed class TaskExecutor
{
private readonly CapabilityRegistry _capabilities;
private readonly CredentialResolver _credentialResolver;
private readonly TimeProvider _timeProvider;
private readonly ILogger<TaskExecutor> _logger;
private readonly ConcurrentDictionary<Guid, CancellationTokenSource> _runningTasks = new();
/// <summary>
/// Creates a new instance of <see cref="TaskExecutor"/>.
/// </summary>
public TaskExecutor(
CapabilityRegistry capabilities,
CredentialResolver credentialResolver,
TimeProvider timeProvider,
ILogger<TaskExecutor> logger)
{
_capabilities = capabilities;
_credentialResolver = credentialResolver;
_timeProvider = timeProvider;
_logger = logger;
}
/// <summary>
/// Gets the number of currently running tasks.
/// </summary>
public int RunningTaskCount => _runningTasks.Count;
/// <summary>
/// Gets the IDs of currently running tasks.
/// </summary>
public IReadOnlyList<Guid> RunningTaskIds => _runningTasks.Keys.ToList().AsReadOnly();
/// <summary>
/// Executes a task.
/// </summary>
/// <param name="task">The task to execute.</param>
/// <param name="progress">Optional progress reporter.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The task result.</returns>
public async Task<AgentTaskResult> ExecuteAsync(
AgentTaskInfo task,
IProgress<TaskProgress>? progress = null,
CancellationToken ct = default)
{
var capability = _capabilities.GetForTaskType(task.TaskType)
?? throw new UnsupportedTaskTypeException(task.TaskType);
using var taskCts = new CancellationTokenSource(task.Timeout);
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, taskCts.Token);
_runningTasks[task.Id] = linkedCts;
var stopwatch = Stopwatch.StartNew();
try
{
_logger.LogInformation(
"Executing task {TaskId} of type {TaskType} using capability {Capability}",
task.Id, task.TaskType, capability.Name);
progress?.Report(new TaskProgress(task.Id, TaskState.Running, 0, "Starting"));
// Resolve credentials
var resolvedCredentials = await _credentialResolver.ResolveAllAsync(task.Credentials, linkedCts.Token);
var resolvedTask = task with { Credentials = resolvedCredentials };
// Execute via capability
var result = await capability.ExecuteAsync(resolvedTask, linkedCts.Token);
progress?.Report(new TaskProgress(
task.Id,
result.Success ? TaskState.Succeeded : TaskState.Failed,
100,
result.Success ? "Completed" : result.Error ?? "Failed"));
_logger.LogInformation(
"Task {TaskId} completed with status {Status} in {Duration}ms",
task.Id,
result.Success ? "success" : "failure",
stopwatch.ElapsedMilliseconds);
return result with
{
Duration = stopwatch.Elapsed,
CompletedAt = _timeProvider.GetUtcNow()
};
}
catch (OperationCanceledException) when (taskCts.IsCancellationRequested)
{
_logger.LogWarning("Task {TaskId} timed out after {Timeout}", task.Id, task.Timeout);
progress?.Report(new TaskProgress(task.Id, TaskState.Failed, 0, "Timeout"));
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Task timed out after {task.Timeout}",
CompletedAt = _timeProvider.GetUtcNow(),
Duration = stopwatch.Elapsed
};
}
catch (OperationCanceledException)
{
_logger.LogInformation("Task {TaskId} was cancelled", task.Id);
progress?.Report(new TaskProgress(task.Id, TaskState.Cancelled, 0, "Cancelled"));
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = "Task was cancelled",
CompletedAt = _timeProvider.GetUtcNow(),
Duration = stopwatch.Elapsed
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Task {TaskId} failed with exception", task.Id);
progress?.Report(new TaskProgress(task.Id, TaskState.Failed, 0, ex.Message));
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = ex.Message,
CompletedAt = _timeProvider.GetUtcNow(),
Duration = stopwatch.Elapsed
};
}
finally
{
_runningTasks.TryRemove(task.Id, out _);
}
}
/// <summary>
/// Cancels a running task.
/// </summary>
/// <param name="taskId">The task to cancel.</param>
/// <returns>True if the task was found and cancelled.</returns>
public bool CancelTask(Guid taskId)
{
if (_runningTasks.TryGetValue(taskId, out var cts))
{
_logger.LogInformation("Cancelling task {TaskId}", taskId);
cts.Cancel();
return true;
}
_logger.LogWarning("Task {TaskId} not found for cancellation", taskId);
return false;
}
}

View File

@@ -0,0 +1,82 @@
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Agent.Core.Capability;
using StellaOps.Agent.Core.Communication;
namespace StellaOps.Agent.Core.Hosting;
/// <summary>
/// Main hosted service for the Stella Agent.
/// </summary>
public sealed class AgentHost : IHostedService
{
private readonly AgentConfiguration _config;
private readonly CapabilityRegistry _capabilities;
private readonly IOrchestratorClient _orchestratorClient;
private readonly IAgentServer? _agentServer;
private readonly ILogger<AgentHost> _logger;
/// <summary>
/// Creates a new instance of <see cref="AgentHost"/>.
/// </summary>
public AgentHost(
IOptions<AgentConfiguration> config,
CapabilityRegistry capabilities,
IOrchestratorClient orchestratorClient,
IAgentServer? agentServer,
ILogger<AgentHost> logger)
{
_config = config.Value;
_capabilities = capabilities;
_orchestratorClient = orchestratorClient;
_agentServer = agentServer;
_logger = logger;
}
/// <inheritdoc />
public async Task StartAsync(CancellationToken cancellationToken)
{
_logger.LogInformation(
"Starting Stella Agent {Name} ({Id})",
_config.AgentName,
_config.AgentId);
// Initialize capabilities
_logger.LogDebug("Initializing capabilities...");
await _capabilities.InitializeAllAsync(cancellationToken);
// Connect to orchestrator
_logger.LogDebug("Connecting to orchestrator at {Url}...", _config.OrchestratorUrl);
await _orchestratorClient.ConnectAsync(cancellationToken);
// Start agent server if available
if (_agentServer is not null)
{
_logger.LogDebug("Starting agent server on port {Port}...", _config.GrpcPort);
await _agentServer.StartAsync(cancellationToken);
}
_logger.LogInformation(
"Agent started with {Count} capabilities: {Capabilities}",
_capabilities.GetCapabilities().Count,
string.Join(", ", _capabilities.GetCapabilities().Select(c => c.Name)));
}
/// <inheritdoc />
public async Task StopAsync(CancellationToken cancellationToken)
{
_logger.LogInformation("Stopping Stella Agent {Id}", _config.AgentId);
// Stop agent server if available
if (_agentServer is not null)
{
await _agentServer.StopAsync(cancellationToken);
}
// Disconnect from orchestrator
await _orchestratorClient.DisconnectAsync(cancellationToken);
_logger.LogInformation("Agent stopped");
}
}

View File

@@ -0,0 +1,17 @@
namespace StellaOps.Agent.Core.Hosting;
/// <summary>
/// Interface for the agent's task server.
/// </summary>
public interface IAgentServer
{
/// <summary>
/// Starts the server.
/// </summary>
Task StartAsync(CancellationToken ct = default);
/// <summary>
/// Stops the server.
/// </summary>
Task StopAsync(CancellationToken ct = default);
}

View File

@@ -0,0 +1,117 @@
using System.Collections.Immutable;
namespace StellaOps.Agent.Core.Models;
/// <summary>
/// Information about an agent capability.
/// </summary>
public sealed record CapabilityInfo(
string Name,
string Version,
ImmutableArray<string> SupportedTaskTypes);
/// <summary>
/// Health status of a capability.
/// </summary>
public sealed record CapabilityHealthStatus(
bool IsHealthy,
string? Message = null,
IReadOnlyDictionary<string, object>? Details = null);
/// <summary>
/// System information for heartbeats.
/// </summary>
public sealed record SystemInfo
{
/// <summary>
/// Hostname of the machine.
/// </summary>
public required string Hostname { get; init; }
/// <summary>
/// Operating system description.
/// </summary>
public required string OsDescription { get; init; }
/// <summary>
/// Number of processors available.
/// </summary>
public required int ProcessorCount { get; init; }
/// <summary>
/// Total available memory in bytes.
/// </summary>
public required long MemoryBytes { get; init; }
}
/// <summary>
/// Heartbeat message sent to the orchestrator.
/// </summary>
public sealed record AgentHeartbeatMessage
{
/// <summary>
/// Agent identifier.
/// </summary>
public required string AgentId { get; init; }
/// <summary>
/// When the heartbeat was generated.
/// </summary>
public required DateTimeOffset Timestamp { get; init; }
/// <summary>
/// Current agent status.
/// </summary>
public required AgentRuntimeStatus Status { get; init; }
/// <summary>
/// Available capabilities.
/// </summary>
public required IReadOnlyList<CapabilityInfo> Capabilities { get; init; }
/// <summary>
/// System information.
/// </summary>
public required SystemInfo SystemInfo { get; init; }
/// <summary>
/// Number of currently running tasks.
/// </summary>
public int RunningTaskCount { get; init; }
/// <summary>
/// Detailed health information per capability.
/// </summary>
public IReadOnlyDictionary<string, object>? HealthDetails { get; init; }
}
/// <summary>
/// Runtime status of the agent.
/// </summary>
public enum AgentRuntimeStatus
{
/// <summary>
/// Agent is starting up.
/// </summary>
Starting,
/// <summary>
/// Agent is active and healthy.
/// </summary>
Active,
/// <summary>
/// Agent is degraded (some capabilities unhealthy).
/// </summary>
Degraded,
/// <summary>
/// Agent is shutting down.
/// </summary>
ShuttingDown,
/// <summary>
/// Agent is stopped.
/// </summary>
Stopped
}

View File

@@ -0,0 +1,118 @@
namespace StellaOps.Agent.Core.Models;
/// <summary>
/// Represents a task to be executed by the agent.
/// </summary>
public sealed record AgentTaskInfo
{
/// <summary>
/// Unique identifier for the task.
/// </summary>
public required Guid Id { get; init; }
/// <summary>
/// Type of task to execute.
/// </summary>
public required string TaskType { get; init; }
/// <summary>
/// JSON payload containing task-specific parameters.
/// </summary>
public required string Payload { get; init; }
/// <summary>
/// Credential references to resolve.
/// </summary>
public IReadOnlyDictionary<string, string> Credentials { get; init; } = new Dictionary<string, string>();
/// <summary>
/// Variables for template substitution.
/// </summary>
public IReadOnlyDictionary<string, string> Variables { get; init; } = new Dictionary<string, string>();
/// <summary>
/// When the task was received by the agent.
/// </summary>
public DateTimeOffset ReceivedAt { get; init; }
/// <summary>
/// Timeout for task execution.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(30);
}
/// <summary>
/// Result of task execution.
/// </summary>
public sealed record AgentTaskResult
{
/// <summary>
/// Task that was executed.
/// </summary>
public required Guid TaskId { get; init; }
/// <summary>
/// Whether the task succeeded.
/// </summary>
public required bool Success { get; init; }
/// <summary>
/// Error message if the task failed.
/// </summary>
public string? Error { get; init; }
/// <summary>
/// Output values from the task.
/// </summary>
public IReadOnlyDictionary<string, object> Outputs { get; init; } = new Dictionary<string, object>();
/// <summary>
/// When the task completed.
/// </summary>
public DateTimeOffset CompletedAt { get; init; }
/// <summary>
/// How long the task took to execute.
/// </summary>
public TimeSpan Duration { get; init; }
}
/// <summary>
/// Progress update for a task.
/// </summary>
public sealed record TaskProgress(
Guid TaskId,
TaskState State,
int ProgressPercent,
string Message);
/// <summary>
/// State of a task.
/// </summary>
public enum TaskState
{
/// <summary>
/// Task is waiting to be executed.
/// </summary>
Pending,
/// <summary>
/// Task is currently executing.
/// </summary>
Running,
/// <summary>
/// Task completed successfully.
/// </summary>
Succeeded,
/// <summary>
/// Task failed.
/// </summary>
Failed,
/// <summary>
/// Task was cancelled.
/// </summary>
Cancelled
}

View File

@@ -0,0 +1,105 @@
using System.Runtime.InteropServices;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Agent.Core.Capability;
using StellaOps.Agent.Core.Communication;
using StellaOps.Agent.Core.Execution;
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.Core.Services;
/// <summary>
/// Background service that sends periodic heartbeats to the orchestrator.
/// </summary>
public sealed class HeartbeatService : BackgroundService
{
private readonly AgentConfiguration _config;
private readonly CapabilityRegistry _capabilities;
private readonly TaskExecutor _taskExecutor;
private readonly IOrchestratorClient _orchestratorClient;
private readonly TimeProvider _timeProvider;
private readonly ILogger<HeartbeatService> _logger;
/// <summary>
/// Creates a new instance of <see cref="HeartbeatService"/>.
/// </summary>
public HeartbeatService(
IOptions<AgentConfiguration> config,
CapabilityRegistry capabilities,
TaskExecutor taskExecutor,
IOrchestratorClient orchestratorClient,
TimeProvider timeProvider,
ILogger<HeartbeatService> logger)
{
_config = config.Value;
_capabilities = capabilities;
_taskExecutor = taskExecutor;
_orchestratorClient = orchestratorClient;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
_logger.LogInformation(
"Heartbeat service started with interval {Interval}",
_config.HeartbeatInterval);
// Wait a bit before first heartbeat to allow initialization
await Task.Delay(TimeSpan.FromSeconds(5), stoppingToken);
while (!stoppingToken.IsCancellationRequested)
{
try
{
await SendHeartbeatAsync(stoppingToken);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to send heartbeat");
}
await Task.Delay(_config.HeartbeatInterval, stoppingToken);
}
_logger.LogInformation("Heartbeat service stopped");
}
private async Task SendHeartbeatAsync(CancellationToken ct)
{
var capabilities = _capabilities.GetCapabilities();
var (allHealthy, healthDetails) = await _capabilities.CheckHealthAsync(ct);
var heartbeat = new AgentHeartbeatMessage
{
AgentId = _config.AgentId,
Timestamp = _timeProvider.GetUtcNow(),
Status = allHealthy ? AgentRuntimeStatus.Active : AgentRuntimeStatus.Degraded,
Capabilities = capabilities,
SystemInfo = GetSystemInfo(),
RunningTaskCount = _taskExecutor.RunningTaskCount,
HealthDetails = healthDetails
};
await _orchestratorClient.SendHeartbeatAsync(heartbeat, ct);
_logger.LogDebug(
"Heartbeat sent: status={Status}, tasks={TaskCount}, capabilities={CapabilityCount}",
heartbeat.Status,
heartbeat.RunningTaskCount,
heartbeat.Capabilities.Count);
}
private static SystemInfo GetSystemInfo()
{
return new SystemInfo
{
Hostname = Environment.MachineName,
OsDescription = RuntimeInformation.OSDescription,
ProcessorCount = Environment.ProcessorCount,
MemoryBytes = GC.GetGCMemoryInfo().TotalAvailableMemoryBytes
};
}
}

View File

@@ -0,0 +1,159 @@
using System.Threading.Channels;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Communication;
using LogEntry = StellaOps.Agent.Core.Communication.LogEntry;
using LogLevel = StellaOps.Agent.Core.Communication.LogLevel;
namespace StellaOps.Agent.Core.Services;
/// <summary>
/// Streams logs from task execution to the orchestrator.
/// </summary>
public sealed class LogStreamer : IAsyncDisposable
{
private readonly IOrchestratorClient _orchestratorClient;
private readonly Channel<LogEntry> _logChannel;
private readonly TimeProvider _timeProvider;
private readonly ILogger<LogStreamer> _logger;
private readonly CancellationTokenSource _cts = new();
private readonly Task _streamTask;
/// <summary>
/// Creates a new instance of <see cref="LogStreamer"/>.
/// </summary>
public LogStreamer(
IOrchestratorClient orchestratorClient,
TimeProvider timeProvider,
ILogger<LogStreamer> logger)
{
_orchestratorClient = orchestratorClient;
_timeProvider = timeProvider;
_logger = logger;
_logChannel = Channel.CreateBounded<LogEntry>(new BoundedChannelOptions(10000)
{
FullMode = BoundedChannelFullMode.DropOldest
});
_streamTask = StreamLogsAsync(_cts.Token);
_logger.LogDebug("Log streamer started");
}
/// <summary>
/// Logs a message for a task.
/// </summary>
/// <param name="taskId">The task ID.</param>
/// <param name="level">Log level.</param>
/// <param name="message">Log message.</param>
public void Log(Guid taskId, LogLevel level, string message)
{
var entry = new LogEntry
{
TaskId = taskId,
Timestamp = _timeProvider.GetUtcNow(),
Level = level,
Message = message
};
if (!_logChannel.Writer.TryWrite(entry))
{
_logger.LogWarning("Log channel full, dropping log entry for task {TaskId}", taskId);
}
}
/// <summary>
/// Logs a trace message.
/// </summary>
public void LogTrace(Guid taskId, string message) => Log(taskId, LogLevel.Trace, message);
/// <summary>
/// Logs a debug message.
/// </summary>
public void LogDebug(Guid taskId, string message) => Log(taskId, LogLevel.Debug, message);
/// <summary>
/// Logs an information message.
/// </summary>
public void LogInformation(Guid taskId, string message) => Log(taskId, LogLevel.Information, message);
/// <summary>
/// Logs a warning message.
/// </summary>
public void LogWarning(Guid taskId, string message) => Log(taskId, LogLevel.Warning, message);
/// <summary>
/// Logs an error message.
/// </summary>
public void LogError(Guid taskId, string message) => Log(taskId, LogLevel.Error, message);
private async Task StreamLogsAsync(CancellationToken ct)
{
var batch = new List<LogEntry>();
var batchTimeout = TimeSpan.FromMilliseconds(100);
while (!ct.IsCancellationRequested)
{
try
{
// Collect logs for batching
using var timeoutCts = new CancellationTokenSource(batchTimeout);
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
while (batch.Count < 100)
{
if (_logChannel.Reader.TryRead(out var entry))
{
batch.Add(entry);
}
else
{
try
{
await _logChannel.Reader.WaitToReadAsync(linkedCts.Token);
}
catch (OperationCanceledException) when (!ct.IsCancellationRequested)
{
// Batch timeout, break to send what we have
break;
}
}
}
}
catch (OperationCanceledException) when (!ct.IsCancellationRequested)
{
// Timeout, send what we have
}
if (batch.Count > 0)
{
try
{
await _orchestratorClient.SendLogsAsync(batch, ct);
batch.Clear();
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to send {Count} logs, will retry", batch.Count);
}
}
}
}
/// <inheritdoc />
public async ValueTask DisposeAsync()
{
_cts.Cancel();
try
{
await _streamTask;
}
catch (OperationCanceledException)
{
// Expected
}
_cts.Dispose();
_logger.LogDebug("Log streamer disposed");
}
}

View File

@@ -0,0 +1,23 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<RootNamespace>StellaOps.Agent.Core</RootNamespace>
<Description>Stella Agent Core Runtime - the lightweight agent process that runs on target hosts</Description>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Hosting.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\__Libraries\StellaOps.ReleaseOrchestrator.Agent\StellaOps.ReleaseOrchestrator.Agent.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,127 @@
using Docker.DotNet;
using Docker.DotNet.Models;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Services;
using LogLevel = StellaOps.Agent.Core.Communication.LogLevel;
namespace StellaOps.Agent.Docker;
/// <summary>
/// Streams container logs to the orchestrator in real-time.
/// </summary>
public sealed class ContainerLogStreamer
{
private readonly IDockerClient _dockerClient;
private readonly LogStreamer _logStreamer;
private readonly ILogger<ContainerLogStreamer> _logger;
/// <summary>
/// Creates a new instance.
/// </summary>
public ContainerLogStreamer(
IDockerClient dockerClient,
LogStreamer logStreamer,
ILogger<ContainerLogStreamer> logger)
{
_dockerClient = dockerClient;
_logStreamer = logStreamer;
_logger = logger;
}
/// <summary>
/// Streams logs from a container until cancellation.
/// </summary>
/// <param name="taskId">Task ID to associate logs with.</param>
/// <param name="containerId">Container ID to stream logs from.</param>
/// <param name="ct">Cancellation token.</param>
public async Task StreamLogsAsync(
Guid taskId,
string containerId,
CancellationToken ct = default)
{
try
{
_logger.LogDebug("Starting log stream for container {ContainerId}", TruncateId(containerId));
var multiplexedStream = await _dockerClient.Containers.GetContainerLogsAsync(
containerId,
tty: false,
new ContainerLogsParameters
{
Follow = true,
ShowStdout = true,
ShowStderr = true,
Timestamps = true
},
ct);
var buffer = new byte[81920];
MultiplexedStream.ReadResult result;
while (!ct.IsCancellationRequested)
{
result = await multiplexedStream.ReadOutputAsync(buffer, 0, buffer.Length, ct);
if (result.Count == 0)
break;
var text = System.Text.Encoding.UTF8.GetString(buffer, 0, result.Count);
var lines = text.Split('\n', StringSplitOptions.RemoveEmptyEntries);
foreach (var line in lines)
{
var trimmedLine = line.TrimEnd('\r');
// result.Target indicates stdout (1) or stderr (2)
var (level, message) = ParseLogLine(trimmedLine, result.Target == MultiplexedStream.TargetStream.StandardError);
_logStreamer.Log(taskId, level, message);
}
}
}
catch (OperationCanceledException)
{
// Expected when task completes
_logger.LogDebug("Log stream cancelled for container {ContainerId}", TruncateId(containerId));
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Error streaming logs for container {ContainerId}", TruncateId(containerId));
}
}
private static (LogLevel Level, string Message) ParseLogLine(string line, bool isStderr)
{
// Stderr, treat as warning/error
var baseLevel = isStderr ? LogLevel.Warning : LogLevel.Information;
// Simple heuristic for log level detection based on content
if (line.Contains("ERROR", StringComparison.OrdinalIgnoreCase) ||
line.Contains("FATAL", StringComparison.OrdinalIgnoreCase))
{
return (LogLevel.Error, line);
}
if (line.Contains("WARN", StringComparison.OrdinalIgnoreCase))
{
return (LogLevel.Warning, line);
}
if (line.Contains("DEBUG", StringComparison.OrdinalIgnoreCase))
{
return (LogLevel.Debug, line);
}
if (line.Contains("TRACE", StringComparison.OrdinalIgnoreCase))
{
return (LogLevel.Trace, line);
}
return (baseLevel, line);
}
private static string TruncateId(string id)
{
var trimmed = id.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase)
? id[7..]
: id;
return trimmed.Length > 12 ? trimmed[..12] : trimmed;
}
}

View File

@@ -0,0 +1,105 @@
using Docker.DotNet;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Capability;
using StellaOps.Agent.Core.Exceptions;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.Docker.Tasks;
namespace StellaOps.Agent.Docker;
/// <summary>
/// Docker capability for managing containers on target hosts.
/// </summary>
public sealed class DockerCapability : IAgentCapability
{
private readonly IDockerClient _dockerClient;
private readonly TimeProvider _timeProvider;
private readonly ILogger<DockerCapability> _logger;
private readonly Dictionary<string, IDockerTask> _taskHandlers;
/// <inheritdoc />
public string Name => "docker";
/// <inheritdoc />
public string Version => "1.0.0";
/// <inheritdoc />
public IReadOnlyList<string> SupportedTaskTypes => new[]
{
"docker.pull",
"docker.run",
"docker.stop",
"docker.remove",
"docker.health-check",
"docker.logs"
};
/// <summary>
/// Creates a new instance.
/// </summary>
public DockerCapability(
IDockerClient dockerClient,
TimeProvider timeProvider,
ILogger<DockerCapability> logger)
{
_dockerClient = dockerClient;
_timeProvider = timeProvider;
_logger = logger;
_taskHandlers = new Dictionary<string, IDockerTask>(StringComparer.OrdinalIgnoreCase)
{
["docker.pull"] = new DockerPullTask(_dockerClient, logger),
["docker.run"] = new DockerRunTask(_dockerClient, logger),
["docker.stop"] = new DockerStopTask(_dockerClient, logger),
["docker.remove"] = new DockerRemoveTask(_dockerClient, logger),
["docker.health-check"] = new DockerHealthCheckTask(_dockerClient, logger),
["docker.logs"] = new DockerLogsTask(_dockerClient, logger)
};
}
/// <inheritdoc />
public async Task<bool> InitializeAsync(CancellationToken ct = default)
{
try
{
var version = await _dockerClient.System.GetVersionAsync(ct);
_logger.LogInformation(
"Docker capability initialized: Docker {Version} on {OS}",
version.Version,
version.Os);
return true;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to initialize Docker capability");
return false;
}
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
{
if (!_taskHandlers.TryGetValue(task.TaskType, out var handler))
{
throw new UnsupportedTaskTypeException(task.TaskType);
}
_logger.LogDebug("Executing task {TaskType} with ID {TaskId}", task.TaskType, task.Id);
return await handler.ExecuteAsync(task, _timeProvider, ct);
}
/// <inheritdoc />
public async Task<CapabilityHealthStatus> CheckHealthAsync(CancellationToken ct = default)
{
try
{
await _dockerClient.System.PingAsync(ct);
return new CapabilityHealthStatus(true, "Docker daemon responding");
}
catch (Exception ex)
{
return new CapabilityHealthStatus(false, $"Docker daemon not responding: {ex.Message}");
}
}
}

View File

@@ -0,0 +1,43 @@
using Docker.DotNet;
namespace StellaOps.Agent.Docker;
/// <summary>
/// Factory for creating Docker clients.
/// </summary>
public interface IDockerClientFactory
{
/// <summary>
/// Creates a Docker client for the local Docker daemon.
/// </summary>
IDockerClient CreateClient();
}
/// <summary>
/// Default implementation of <see cref="IDockerClientFactory"/>.
/// </summary>
public sealed class DockerClientFactory : IDockerClientFactory
{
private readonly DockerClientConfiguration _configuration;
/// <summary>
/// Creates a new instance with default configuration.
/// </summary>
public DockerClientFactory()
{
// Default to local Docker socket
_configuration = new DockerClientConfiguration();
}
/// <summary>
/// Creates a new instance with a custom Docker endpoint.
/// </summary>
/// <param name="endpoint">Docker endpoint URI (e.g., "unix:///var/run/docker.sock" or "npipe://./pipe/docker_engine").</param>
public DockerClientFactory(Uri endpoint)
{
_configuration = new DockerClientConfiguration(endpoint);
}
/// <inheritdoc />
public IDockerClient CreateClient() => _configuration.CreateClient();
}

View File

@@ -0,0 +1,83 @@
using StellaOps.Agent.Core.Exceptions;
namespace StellaOps.Agent.Docker.Exceptions;
/// <summary>
/// Thrown when a task payload is invalid.
/// </summary>
public sealed class InvalidPayloadException : AgentException
{
/// <summary>
/// The task type with invalid payload.
/// </summary>
public string TaskType { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public InvalidPayloadException(string taskType)
: base($"Invalid payload for task type '{taskType}'")
{
TaskType = taskType;
}
}
/// <summary>
/// Thrown when an image pull fails.
/// </summary>
public sealed class ImagePullException : AgentException
{
/// <summary>
/// The image reference.
/// </summary>
public string ImageRef { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public ImagePullException(string imageRef, string message)
: base($"Failed to pull image '{imageRef}': {message}")
{
ImageRef = imageRef;
}
}
/// <summary>
/// Thrown when a container fails to start.
/// </summary>
public sealed class ContainerStartException : AgentException
{
/// <summary>
/// The container name.
/// </summary>
public string ContainerName { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public ContainerStartException(string containerName, string message)
: base($"Failed to start container '{containerName}': {message}")
{
ContainerName = containerName;
}
}
/// <summary>
/// Thrown when a container is not found.
/// </summary>
public sealed class ContainerNotFoundException : AgentException
{
/// <summary>
/// The container identifier.
/// </summary>
public string ContainerIdentifier { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public ContainerNotFoundException(string containerIdentifier)
: base($"Container not found: '{containerIdentifier}'")
{
ContainerIdentifier = containerIdentifier;
}
}

View File

@@ -0,0 +1,22 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<RootNamespace>StellaOps.Agent.Docker</RootNamespace>
<Description>Stella Agent Docker Capability - manages standalone Docker containers on target hosts</Description>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Docker.DotNet" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Agent.Core\StellaOps.Agent.Core.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,233 @@
using System.Text.Json;
using Docker.DotNet;
using Docker.DotNet.Models;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.Docker.Exceptions;
namespace StellaOps.Agent.Docker.Tasks;
/// <summary>
/// Task handler for checking Docker container health.
/// </summary>
public sealed class DockerHealthCheckTask : IDockerTask
{
private readonly IDockerClient _dockerClient;
private readonly ILogger _logger;
/// <summary>
/// Payload for docker.health-check task.
/// </summary>
public sealed record HealthCheckPayload
{
/// <summary>
/// Container ID.
/// </summary>
public string? ContainerId { get; init; }
/// <summary>
/// Container name.
/// </summary>
public string? ContainerName { get; init; }
/// <summary>
/// Timeout for health check.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromSeconds(30);
/// <summary>
/// Whether to wait for healthy status.
/// </summary>
public bool WaitForHealthy { get; init; } = true;
}
/// <summary>
/// Creates a new instance.
/// </summary>
public DockerHealthCheckTask(IDockerClient dockerClient, ILogger logger)
{
_dockerClient = dockerClient;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
var payload = JsonSerializer.Deserialize<HealthCheckPayload>(task.Payload)
?? throw new InvalidPayloadException("docker.health-check");
var containerId = await ResolveContainerIdAsync(payload, ct);
if (containerId is null)
{
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = "Container not found",
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
_logger.LogInformation("Checking health of container {ContainerId}", TruncateId(containerId));
try
{
using var timeoutCts = new CancellationTokenSource(payload.Timeout);
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
while (!linkedCts.IsCancellationRequested)
{
var containerInfo = await _dockerClient.Containers.InspectContainerAsync(containerId, linkedCts.Token);
if (containerInfo.State.Status != "running")
{
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Container not running (state: {containerInfo.State.Status})",
Outputs = new Dictionary<string, object>
{
["state"] = containerInfo.State.Status
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
var health = containerInfo.State.Health;
if (health is null)
{
// No health check configured, container is running
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["containerId"] = containerId,
["state"] = "running",
["healthCheck"] = "none"
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
if (health.Status == "healthy")
{
_logger.LogInformation("Container {ContainerId} is healthy", TruncateId(containerId));
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["containerId"] = containerId,
["state"] = "running",
["healthStatus"] = "healthy",
["failingStreak"] = health.FailingStreak
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
if (health.Status == "unhealthy")
{
var completedAt = timeProvider.GetUtcNow();
var lastLog = health.Log?.LastOrDefault();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Container unhealthy: {lastLog?.Output ?? "unknown"}",
Outputs = new Dictionary<string, object>
{
["containerId"] = containerId,
["healthStatus"] = "unhealthy",
["failingStreak"] = health.FailingStreak
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
if (!payload.WaitForHealthy)
{
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["containerId"] = containerId,
["healthStatus"] = health.Status
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
// Wait before checking again
await Task.Delay(TimeSpan.FromSeconds(2), linkedCts.Token);
}
throw new OperationCanceledException();
}
catch (OperationCanceledException)
{
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Health check timed out after {payload.Timeout}",
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
}
private async Task<string?> ResolveContainerIdAsync(HealthCheckPayload payload, CancellationToken ct)
{
if (!string.IsNullOrEmpty(payload.ContainerId))
{
return payload.ContainerId;
}
if (!string.IsNullOrEmpty(payload.ContainerName))
{
var containers = await _dockerClient.Containers.ListContainersAsync(
new ContainersListParameters
{
All = true,
Filters = new Dictionary<string, IDictionary<string, bool>>
{
["name"] = new Dictionary<string, bool> { [$"^/{payload.ContainerName}$"] = true }
}
},
ct);
return containers.FirstOrDefault()?.ID;
}
return null;
}
private static string TruncateId(string id)
{
var trimmed = id.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase)
? id[7..]
: id;
return trimmed.Length > 12 ? trimmed[..12] : trimmed;
}
}

View File

@@ -0,0 +1,197 @@
using System.Text.Json;
using Docker.DotNet;
using Docker.DotNet.Models;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.Docker.Exceptions;
namespace StellaOps.Agent.Docker.Tasks;
/// <summary>
/// Task handler for retrieving Docker container logs.
/// </summary>
public sealed class DockerLogsTask : IDockerTask
{
private readonly IDockerClient _dockerClient;
private readonly ILogger _logger;
/// <summary>
/// Payload for docker.logs task.
/// </summary>
public sealed record LogsPayload
{
/// <summary>
/// Container ID.
/// </summary>
public string? ContainerId { get; init; }
/// <summary>
/// Container name.
/// </summary>
public string? ContainerName { get; init; }
/// <summary>
/// Number of lines from the end to retrieve.
/// </summary>
public int? Tail { get; init; }
/// <summary>
/// Include timestamps.
/// </summary>
public bool Timestamps { get; init; } = true;
/// <summary>
/// Only logs since this time.
/// </summary>
public string? Since { get; init; }
/// <summary>
/// Only logs before this time.
/// </summary>
public string? Until { get; init; }
}
/// <summary>
/// Creates a new instance.
/// </summary>
public DockerLogsTask(IDockerClient dockerClient, ILogger logger)
{
_dockerClient = dockerClient;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
var payload = JsonSerializer.Deserialize<LogsPayload>(task.Payload)
?? throw new InvalidPayloadException("docker.logs");
var containerId = await ResolveContainerIdAsync(payload, ct);
if (containerId is null)
{
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = "Container not found",
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
_logger.LogInformation("Retrieving logs for container {ContainerId}", TruncateId(containerId));
try
{
var logParams = new ContainerLogsParameters
{
ShowStdout = true,
ShowStderr = true,
Timestamps = payload.Timestamps
};
if (payload.Tail.HasValue)
{
logParams.Tail = payload.Tail.Value.ToString();
}
if (!string.IsNullOrEmpty(payload.Since))
{
logParams.Since = payload.Since;
}
if (!string.IsNullOrEmpty(payload.Until))
{
logParams.Until = payload.Until;
}
var multiplexedStream = await _dockerClient.Containers.GetContainerLogsAsync(
containerId,
tty: false,
logParams,
ct);
var logs = new List<string>();
// Read logs using MultiplexedStream
var buffer = new byte[81920];
MultiplexedStream.ReadResult result;
while ((result = await multiplexedStream.ReadOutputAsync(buffer, 0, buffer.Length, ct)).Count > 0)
{
var text = System.Text.Encoding.UTF8.GetString(buffer, 0, result.Count);
var lines = text.Split('\n', StringSplitOptions.RemoveEmptyEntries);
foreach (var line in lines)
{
logs.Add(line.TrimEnd('\r'));
}
}
var completedAt = timeProvider.GetUtcNow();
_logger.LogInformation("Retrieved {Count} log lines from container {ContainerId}", logs.Count, TruncateId(containerId));
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["containerId"] = containerId,
["lineCount"] = logs.Count,
["logs"] = logs
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (DockerApiException ex)
{
var completedAt = timeProvider.GetUtcNow();
_logger.LogError(ex, "Failed to retrieve logs for container {ContainerId}", TruncateId(containerId));
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to retrieve logs: {ex.Message}",
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
}
private async Task<string?> ResolveContainerIdAsync(LogsPayload payload, CancellationToken ct)
{
if (!string.IsNullOrEmpty(payload.ContainerId))
{
return payload.ContainerId;
}
if (!string.IsNullOrEmpty(payload.ContainerName))
{
var containers = await _dockerClient.Containers.ListContainersAsync(
new ContainersListParameters
{
All = true,
Filters = new Dictionary<string, IDictionary<string, bool>>
{
["name"] = new Dictionary<string, bool> { [$"^/{payload.ContainerName}$"] = true }
}
},
ct);
return containers.FirstOrDefault()?.ID;
}
return null;
}
private static string TruncateId(string id)
{
var trimmed = id.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase)
? id[7..]
: id;
return trimmed.Length > 12 ? trimmed[..12] : trimmed;
}
}

View File

@@ -0,0 +1,183 @@
using System.Text.Json;
using Docker.DotNet;
using Docker.DotNet.Models;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.Docker.Exceptions;
namespace StellaOps.Agent.Docker.Tasks;
/// <summary>
/// Task handler for pulling Docker images.
/// </summary>
public sealed class DockerPullTask : IDockerTask
{
private readonly IDockerClient _dockerClient;
private readonly ILogger _logger;
/// <summary>
/// Payload for docker.pull task.
/// </summary>
public sealed record PullPayload
{
/// <summary>
/// Image name (e.g., "nginx", "myregistry.com/myapp").
/// </summary>
public required string Image { get; init; }
/// <summary>
/// Image tag (e.g., "latest", "1.0.0").
/// </summary>
public string? Tag { get; init; }
/// <summary>
/// Image digest (e.g., "sha256:abc123...").
/// </summary>
public string? Digest { get; init; }
/// <summary>
/// Registry address (e.g., "registry.example.com").
/// </summary>
public string? Registry { get; init; }
}
/// <summary>
/// Creates a new instance.
/// </summary>
public DockerPullTask(IDockerClient dockerClient, ILogger logger)
{
_dockerClient = dockerClient;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
var payload = JsonSerializer.Deserialize<PullPayload>(task.Payload)
?? throw new InvalidPayloadException("docker.pull");
var imageRef = BuildImageReference(payload);
_logger.LogInformation("Pulling image {Image}", imageRef);
try
{
// Get registry credentials if provided
AuthConfig? authConfig = null;
if (task.Credentials.TryGetValue("registry.username", out var username) &&
task.Credentials.TryGetValue("registry.password", out var password))
{
authConfig = new AuthConfig
{
Username = username,
Password = password,
ServerAddress = payload.Registry ?? "https://index.docker.io/v1/"
};
}
await _dockerClient.Images.CreateImageAsync(
new ImagesCreateParameters
{
FromImage = imageRef
},
authConfig,
new Progress<JSONMessage>(msg =>
{
if (!string.IsNullOrEmpty(msg.Status))
{
_logger.LogDebug("Pull progress: {Status}", msg.Status);
}
}),
ct);
// Verify the image was pulled
var images = await _dockerClient.Images.ListImagesAsync(
new ImagesListParameters
{
Filters = new Dictionary<string, IDictionary<string, bool>>
{
["reference"] = new Dictionary<string, bool> { [imageRef] = true }
}
},
ct);
if (images.Count == 0)
{
throw new ImagePullException(imageRef, "Image not found after pull");
}
var pulledImage = images.First();
var completedAt = timeProvider.GetUtcNow();
_logger.LogInformation(
"Successfully pulled image {Image} (ID: {Id})",
imageRef,
TruncateId(pulledImage.ID));
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["imageId"] = pulledImage.ID,
["size"] = pulledImage.Size,
["digest"] = payload.Digest ?? ExtractDigest(pulledImage)
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (DockerApiException ex)
{
var completedAt = timeProvider.GetUtcNow();
_logger.LogError(ex, "Failed to pull image {Image}", imageRef);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to pull image: {ex.Message}",
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
}
private static string BuildImageReference(PullPayload payload)
{
var image = payload.Image;
if (!string.IsNullOrEmpty(payload.Registry))
{
image = $"{payload.Registry}/{image}";
}
if (!string.IsNullOrEmpty(payload.Digest))
{
return $"{image}@{payload.Digest}";
}
if (!string.IsNullOrEmpty(payload.Tag))
{
return $"{image}:{payload.Tag}";
}
return $"{image}:latest";
}
private static string ExtractDigest(ImagesListResponse image)
{
return image.RepoDigests.FirstOrDefault()?.Split('@').LastOrDefault() ?? string.Empty;
}
private static string TruncateId(string id)
{
// Remove "sha256:" prefix if present and take first 12 chars
var trimmed = id.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase)
? id[7..]
: id;
return trimmed.Length > 12 ? trimmed[..12] : trimmed;
}
}

View File

@@ -0,0 +1,155 @@
using System.Text.Json;
using Docker.DotNet;
using Docker.DotNet.Models;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.Docker.Exceptions;
namespace StellaOps.Agent.Docker.Tasks;
/// <summary>
/// Task handler for removing Docker containers.
/// </summary>
public sealed class DockerRemoveTask : IDockerTask
{
private readonly IDockerClient _dockerClient;
private readonly ILogger _logger;
/// <summary>
/// Payload for docker.remove task.
/// </summary>
public sealed record RemovePayload
{
/// <summary>
/// Container ID.
/// </summary>
public string? ContainerId { get; init; }
/// <summary>
/// Container name.
/// </summary>
public string? ContainerName { get; init; }
/// <summary>
/// Whether to force removal (kill if running).
/// </summary>
public bool Force { get; init; }
/// <summary>
/// Whether to remove anonymous volumes.
/// </summary>
public bool RemoveVolumes { get; init; }
}
/// <summary>
/// Creates a new instance.
/// </summary>
public DockerRemoveTask(IDockerClient dockerClient, ILogger logger)
{
_dockerClient = dockerClient;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
var payload = JsonSerializer.Deserialize<RemovePayload>(task.Payload)
?? throw new InvalidPayloadException("docker.remove");
var containerId = await ResolveContainerIdAsync(payload, ct);
if (containerId is null)
{
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["alreadyRemoved"] = true
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
_logger.LogInformation("Removing container {ContainerId}", TruncateId(containerId));
try
{
await _dockerClient.Containers.RemoveContainerAsync(
containerId,
new ContainerRemoveParameters
{
Force = payload.Force,
RemoveVolumes = payload.RemoveVolumes
},
ct);
var completedAt = timeProvider.GetUtcNow();
_logger.LogInformation("Container {ContainerId} removed", TruncateId(containerId));
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["containerId"] = containerId,
["alreadyRemoved"] = false
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (DockerApiException ex)
{
var completedAt = timeProvider.GetUtcNow();
_logger.LogError(ex, "Failed to remove container {ContainerId}", TruncateId(containerId));
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to remove container: {ex.Message}",
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
}
private async Task<string?> ResolveContainerIdAsync(RemovePayload payload, CancellationToken ct)
{
if (!string.IsNullOrEmpty(payload.ContainerId))
{
return payload.ContainerId;
}
if (!string.IsNullOrEmpty(payload.ContainerName))
{
var containers = await _dockerClient.Containers.ListContainersAsync(
new ContainersListParameters
{
All = true,
Filters = new Dictionary<string, IDictionary<string, bool>>
{
["name"] = new Dictionary<string, bool> { [$"^/{payload.ContainerName}$"] = true }
}
},
ct);
return containers.FirstOrDefault()?.ID;
}
return null;
}
private static string TruncateId(string id)
{
var trimmed = id.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase)
? id[7..]
: id;
return trimmed.Length > 12 ? trimmed[..12] : trimmed;
}
}

View File

@@ -0,0 +1,354 @@
using System.Text.Json;
using System.Text.RegularExpressions;
using Docker.DotNet;
using Docker.DotNet.Models;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.Docker.Exceptions;
namespace StellaOps.Agent.Docker.Tasks;
/// <summary>
/// Task handler for running Docker containers.
/// </summary>
public sealed partial class DockerRunTask : IDockerTask
{
private readonly IDockerClient _dockerClient;
private readonly ILogger _logger;
/// <summary>
/// Payload for docker.run task.
/// </summary>
public sealed record RunPayload
{
/// <summary>
/// Image to run.
/// </summary>
public required string Image { get; init; }
/// <summary>
/// Container name.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Environment variables.
/// </summary>
public IReadOnlyDictionary<string, string>? Environment { get; init; }
/// <summary>
/// Port mappings (e.g., ["8080:80", "443:443"]).
/// </summary>
public IReadOnlyList<string>? Ports { get; init; }
/// <summary>
/// Volume mounts (e.g., ["/host/path:/container/path"]).
/// </summary>
public IReadOnlyList<string>? Volumes { get; init; }
/// <summary>
/// Container labels.
/// </summary>
public IReadOnlyDictionary<string, string>? Labels { get; init; }
/// <summary>
/// Network mode.
/// </summary>
public string? Network { get; init; }
/// <summary>
/// Command to run.
/// </summary>
public IReadOnlyList<string>? Command { get; init; }
/// <summary>
/// Health check configuration.
/// </summary>
public ContainerHealthConfig? HealthCheck { get; init; }
/// <summary>
/// Whether to auto-remove the container when it exits.
/// </summary>
public bool AutoRemove { get; init; }
/// <summary>
/// Restart policy.
/// </summary>
public ContainerRestartPolicy? RestartPolicy { get; init; }
}
/// <summary>
/// Health check configuration.
/// </summary>
public sealed record ContainerHealthConfig
{
/// <summary>
/// Test command (e.g., ["CMD", "curl", "-f", "http://localhost/"]).
/// </summary>
public required IReadOnlyList<string> Test { get; init; }
/// <summary>
/// Interval between health checks.
/// </summary>
public TimeSpan Interval { get; init; } = TimeSpan.FromSeconds(30);
/// <summary>
/// Timeout for each health check.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromSeconds(10);
/// <summary>
/// Number of retries before marking unhealthy.
/// </summary>
public int Retries { get; init; } = 3;
/// <summary>
/// Start period to wait before counting failures.
/// </summary>
public TimeSpan StartPeriod { get; init; } = TimeSpan.Zero;
}
/// <summary>
/// Restart policy configuration.
/// </summary>
public sealed record ContainerRestartPolicy
{
/// <summary>
/// Policy name (no, always, unless-stopped, on-failure).
/// </summary>
public string Name { get; init; } = "no";
/// <summary>
/// Maximum retry count for on-failure policy.
/// </summary>
public int MaximumRetryCount { get; init; }
}
/// <summary>
/// Creates a new instance.
/// </summary>
public DockerRunTask(IDockerClient dockerClient, ILogger logger)
{
_dockerClient = dockerClient;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
var payload = JsonSerializer.Deserialize<RunPayload>(task.Payload)
?? throw new InvalidPayloadException("docker.run");
_logger.LogInformation(
"Creating container {Name} from image {Image}",
payload.Name,
payload.Image);
try
{
// Check if container already exists
var existingContainers = await _dockerClient.Containers.ListContainersAsync(
new ContainersListParameters
{
All = true,
Filters = new Dictionary<string, IDictionary<string, bool>>
{
["name"] = new Dictionary<string, bool> { [$"^/{payload.Name}$"] = true }
}
},
ct);
if (existingContainers.Count > 0)
{
var existing = existingContainers.First();
_logger.LogInformation(
"Container {Name} already exists (ID: {Id}), removing",
payload.Name,
TruncateId(existing.ID));
if (existing.State == "running")
{
await _dockerClient.Containers.StopContainerAsync(existing.ID, new ContainerStopParameters(), ct);
}
await _dockerClient.Containers.RemoveContainerAsync(existing.ID, new ContainerRemoveParameters(), ct);
}
// Merge labels with Stella metadata
var labels = new Dictionary<string, string>(payload.Labels ?? new Dictionary<string, string>());
labels["stella.managed"] = "true";
labels["stella.task.id"] = task.Id.ToString();
// Build create parameters
var createParams = new CreateContainerParameters
{
Image = payload.Image,
Name = payload.Name,
Env = BuildEnvironment(payload.Environment, task.Variables),
Labels = labels,
Cmd = payload.Command?.ToList(),
HostConfig = new HostConfig
{
PortBindings = ParsePortBindings(payload.Ports),
Binds = payload.Volumes?.ToList(),
NetworkMode = payload.Network,
AutoRemove = payload.AutoRemove,
RestartPolicy = payload.RestartPolicy is not null
? new global::Docker.DotNet.Models.RestartPolicy
{
Name = ParseRestartPolicyKind(payload.RestartPolicy.Name),
MaximumRetryCount = payload.RestartPolicy.MaximumRetryCount
}
: null
},
Healthcheck = payload.HealthCheck is not null
? new HealthConfig
{
Test = payload.HealthCheck.Test.ToList(),
Interval = payload.HealthCheck.Interval,
Timeout = payload.HealthCheck.Timeout,
Retries = payload.HealthCheck.Retries,
StartPeriod = (long)(payload.HealthCheck.StartPeriod.TotalSeconds * 1_000_000_000)
}
: null
};
// Create container
var createResponse = await _dockerClient.Containers.CreateContainerAsync(createParams, ct);
_logger.LogInformation(
"Created container {Name} (ID: {Id})",
payload.Name,
TruncateId(createResponse.ID));
// Start container
var started = await _dockerClient.Containers.StartContainerAsync(
createResponse.ID,
new ContainerStartParameters(),
ct);
if (!started)
{
throw new ContainerStartException(payload.Name, "Container failed to start");
}
// Get container info
var containerInfo = await _dockerClient.Containers.InspectContainerAsync(createResponse.ID, ct);
var completedAt = timeProvider.GetUtcNow();
_logger.LogInformation(
"Started container {Name} (State: {State})",
payload.Name,
containerInfo.State.Status);
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["containerId"] = createResponse.ID,
["containerName"] = payload.Name,
["state"] = containerInfo.State.Status,
["ipAddress"] = containerInfo.NetworkSettings.IPAddress ?? string.Empty
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (DockerApiException ex)
{
var completedAt = timeProvider.GetUtcNow();
_logger.LogError(ex, "Failed to create/start container {Name}", payload.Name);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to create/start container: {ex.Message}",
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
}
private static List<string> BuildEnvironment(
IReadOnlyDictionary<string, string>? env,
IReadOnlyDictionary<string, string> variables)
{
var result = new List<string>();
if (env is not null)
{
foreach (var (key, value) in env)
{
// Substitute variables in values
var resolvedValue = SubstituteVariables(value, variables);
result.Add($"{key}={resolvedValue}");
}
}
return result;
}
private static string SubstituteVariables(string value, IReadOnlyDictionary<string, string> variables)
{
return VariablePattern().Replace(value, match =>
{
var varName = match.Groups[1].Value;
return variables.TryGetValue(varName, out var varValue) ? varValue : match.Value;
});
}
[GeneratedRegex(@"\$\{([^}]+)\}")]
private static partial Regex VariablePattern();
private static IDictionary<string, IList<PortBinding>> ParsePortBindings(IReadOnlyList<string>? ports)
{
var bindings = new Dictionary<string, IList<PortBinding>>();
if (ports is null)
return bindings;
foreach (var port in ports)
{
// Format: hostPort:containerPort or hostPort:containerPort/protocol
var parts = port.Split(':');
if (parts.Length != 2)
continue;
var hostPort = parts[0];
var containerPortWithProtocol = parts[1];
var containerPort = containerPortWithProtocol.Contains('/')
? containerPortWithProtocol
: $"{containerPortWithProtocol}/tcp";
bindings[containerPort] = new List<PortBinding>
{
new() { HostPort = hostPort }
};
}
return bindings;
}
private static RestartPolicyKind ParseRestartPolicyKind(string name)
{
return name.ToLowerInvariant() switch
{
"no" => RestartPolicyKind.No,
"always" => RestartPolicyKind.Always,
"unless-stopped" => RestartPolicyKind.UnlessStopped,
"on-failure" => RestartPolicyKind.OnFailure,
_ => RestartPolicyKind.No
};
}
private static string TruncateId(string id)
{
var trimmed = id.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase)
? id[7..]
: id;
return trimmed.Length > 12 ? trimmed[..12] : trimmed;
}
}

View File

@@ -0,0 +1,154 @@
using System.Text.Json;
using Docker.DotNet;
using Docker.DotNet.Models;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.Docker.Exceptions;
namespace StellaOps.Agent.Docker.Tasks;
/// <summary>
/// Task handler for stopping Docker containers.
/// </summary>
public sealed class DockerStopTask : IDockerTask
{
private readonly IDockerClient _dockerClient;
private readonly ILogger _logger;
/// <summary>
/// Payload for docker.stop task.
/// </summary>
public sealed record StopPayload
{
/// <summary>
/// Container ID.
/// </summary>
public string? ContainerId { get; init; }
/// <summary>
/// Container name.
/// </summary>
public string? ContainerName { get; init; }
/// <summary>
/// Timeout before killing the container.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromSeconds(30);
}
/// <summary>
/// Creates a new instance.
/// </summary>
public DockerStopTask(IDockerClient dockerClient, ILogger logger)
{
_dockerClient = dockerClient;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
var payload = JsonSerializer.Deserialize<StopPayload>(task.Payload)
?? throw new InvalidPayloadException("docker.stop");
var containerId = await ResolveContainerIdAsync(payload, ct);
if (containerId is null)
{
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = "Container not found",
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
_logger.LogInformation("Stopping container {ContainerId}", TruncateId(containerId));
try
{
var stopped = await _dockerClient.Containers.StopContainerAsync(
containerId,
new ContainerStopParameters
{
WaitBeforeKillSeconds = (uint)payload.Timeout.TotalSeconds
},
ct);
var completedAt = timeProvider.GetUtcNow();
if (stopped)
{
_logger.LogInformation("Container {ContainerId} stopped", TruncateId(containerId));
}
else
{
_logger.LogWarning("Container {ContainerId} was already stopped", TruncateId(containerId));
}
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["containerId"] = containerId,
["wasRunning"] = stopped
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (DockerApiException ex)
{
var completedAt = timeProvider.GetUtcNow();
_logger.LogError(ex, "Failed to stop container {ContainerId}", TruncateId(containerId));
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to stop container: {ex.Message}",
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
}
private async Task<string?> ResolveContainerIdAsync(StopPayload payload, CancellationToken ct)
{
if (!string.IsNullOrEmpty(payload.ContainerId))
{
return payload.ContainerId;
}
if (!string.IsNullOrEmpty(payload.ContainerName))
{
var containers = await _dockerClient.Containers.ListContainersAsync(
new ContainersListParameters
{
All = true,
Filters = new Dictionary<string, IDictionary<string, bool>>
{
["name"] = new Dictionary<string, bool> { [$"^/{payload.ContainerName}$"] = true }
}
},
ct);
return containers.FirstOrDefault()?.ID;
}
return null;
}
private static string TruncateId(string id)
{
var trimmed = id.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase)
? id[7..]
: id;
return trimmed.Length > 12 ? trimmed[..12] : trimmed;
}
}

View File

@@ -0,0 +1,18 @@
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.Docker.Tasks;
/// <summary>
/// Interface for Docker task handlers.
/// </summary>
public interface IDockerTask
{
/// <summary>
/// Executes the task.
/// </summary>
/// <param name="task">Task information.</param>
/// <param name="timeProvider">Time provider for deterministic timestamps.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Task result.</returns>
Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default);
}

View File

@@ -0,0 +1,216 @@
using Amazon.CloudWatchLogs;
using Amazon.CloudWatchLogs.Model;
using Microsoft.Extensions.Logging;
using Task = System.Threading.Tasks.Task;
namespace StellaOps.Agent.Ecs;
/// <summary>
/// Streams logs from CloudWatch Logs for ECS tasks.
/// </summary>
public sealed class CloudWatchLogStreamer
{
private readonly IAmazonCloudWatchLogs _logsClient;
private readonly ILogger<CloudWatchLogStreamer> _logger;
/// <summary>
/// Event raised when a log message is received.
/// </summary>
public event EventHandler<LogMessageEventArgs>? LogReceived;
/// <summary>
/// Creates a new CloudWatch log streamer.
/// </summary>
public CloudWatchLogStreamer(
IAmazonCloudWatchLogs logsClient,
ILogger<CloudWatchLogStreamer> logger)
{
_logsClient = logsClient;
_logger = logger;
}
/// <summary>
/// Streams logs from a CloudWatch log group/stream.
/// </summary>
/// <param name="logGroupName">The log group name.</param>
/// <param name="logStreamName">The log stream name.</param>
/// <param name="startTime">The start time for log retrieval.</param>
/// <param name="ct">Cancellation token.</param>
public async Task StreamLogsAsync(
string logGroupName,
string logStreamName,
DateTimeOffset? startTime = null,
CancellationToken ct = default)
{
string? nextToken = null;
var startFromHead = startTime is null;
var startTimeUtc = startTime?.UtcDateTime;
_logger.LogDebug(
"Starting log stream from {LogGroup}/{LogStream}",
logGroupName,
logStreamName);
try
{
while (!ct.IsCancellationRequested)
{
var request = new GetLogEventsRequest
{
LogGroupName = logGroupName,
LogStreamName = logStreamName,
StartFromHead = startFromHead,
NextToken = nextToken
};
if (startTimeUtc.HasValue && nextToken is null)
{
request.StartTime = startTimeUtc.Value;
}
var response = await _logsClient.GetLogEventsAsync(request, ct);
foreach (var logEvent in response.Events)
{
var level = DetectLogLevel(logEvent.Message);
OnLogReceived(new LogMessageEventArgs(
logGroupName,
logStreamName,
logEvent.Timestamp ?? DateTime.UtcNow,
level,
logEvent.Message));
}
// If token hasn't changed, no new logs - wait before polling
if (response.NextForwardToken == nextToken)
{
await Task.Delay(TimeSpan.FromSeconds(2), ct);
}
nextToken = response.NextForwardToken;
startFromHead = false;
}
}
catch (OperationCanceledException)
{
_logger.LogDebug("Log streaming cancelled");
}
catch (ResourceNotFoundException)
{
_logger.LogWarning(
"Log stream {LogGroup}/{LogStream} not found",
logGroupName,
logStreamName);
}
catch (Exception ex)
{
_logger.LogWarning(
ex,
"Error streaming logs from {LogGroup}/{LogStream}",
logGroupName,
logStreamName);
}
}
/// <summary>
/// Gets the log stream name for an ECS task.
/// </summary>
/// <param name="logStreamPrefix">The log stream prefix configured in the task definition.</param>
/// <param name="containerName">The container name.</param>
/// <param name="taskId">The task ID (last part of task ARN).</param>
/// <returns>The full log stream name.</returns>
public static string GetTaskLogStreamName(
string logStreamPrefix,
string containerName,
string taskId)
{
return $"{logStreamPrefix}/{containerName}/{taskId}";
}
/// <summary>
/// Extracts the task ID from a task ARN.
/// </summary>
/// <param name="taskArn">The task ARN.</param>
/// <returns>The task ID.</returns>
public static string ExtractTaskId(string taskArn)
{
var parts = taskArn.Split('/');
return parts.Length > 0 ? parts[^1] : taskArn;
}
private void OnLogReceived(LogMessageEventArgs e)
{
LogReceived?.Invoke(this, e);
}
private static LogLevel DetectLogLevel(string message)
{
var upperMessage = message.ToUpperInvariant();
if (upperMessage.Contains("ERROR") || upperMessage.Contains("FATAL") ||
upperMessage.Contains("EXCEPTION") || upperMessage.Contains("FAIL"))
{
return LogLevel.Error;
}
if (upperMessage.Contains("WARN"))
{
return LogLevel.Warning;
}
if (upperMessage.Contains("DEBUG") || upperMessage.Contains("TRACE"))
{
return LogLevel.Debug;
}
return LogLevel.Information;
}
}
/// <summary>
/// Event args for log messages.
/// </summary>
public sealed class LogMessageEventArgs : EventArgs
{
/// <summary>
/// The log group name.
/// </summary>
public string LogGroup { get; }
/// <summary>
/// The log stream name.
/// </summary>
public string LogStream { get; }
/// <summary>
/// The timestamp of the log event.
/// </summary>
public DateTime Timestamp { get; }
/// <summary>
/// The detected log level.
/// </summary>
public LogLevel Level { get; }
/// <summary>
/// The log message.
/// </summary>
public string Message { get; }
/// <summary>
/// Creates a new log message event args.
/// </summary>
public LogMessageEventArgs(
string logGroup,
string logStream,
DateTime timestamp,
LogLevel level,
string message)
{
LogGroup = logGroup;
LogStream = logStream;
Timestamp = timestamp;
Level = level;
Message = message;
}
}

View File

@@ -0,0 +1,222 @@
using System.Text.Json;
using Amazon.CloudWatchLogs;
using Amazon.ECS;
using Amazon.ECS.Model;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Capability;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.Ecs.Tasks;
namespace StellaOps.Agent.Ecs;
/// <summary>
/// Agent capability for managing AWS ECS services and tasks.
/// </summary>
public sealed class EcsCapability : IAgentCapability, IAsyncDisposable
{
private readonly IAmazonECS _ecsClient;
private readonly IAmazonCloudWatchLogs _logsClient;
private readonly TimeProvider _timeProvider;
private readonly ILoggerFactory _loggerFactory;
private readonly ILogger<EcsCapability> _logger;
private readonly Dictionary<string, Func<AgentTaskInfo, CancellationToken, Task<AgentTaskResult>>> _taskHandlers;
/// <summary>
/// Gets the capability name.
/// </summary>
public string Name => "ecs";
/// <summary>
/// Gets the capability version.
/// </summary>
public string Version => "1.0.0";
/// <summary>
/// Gets the supported task types.
/// </summary>
public IReadOnlyList<string> SupportedTaskTypes { get; } = new[]
{
"ecs.deploy",
"ecs.run",
"ecs.stop",
"ecs.scale",
"ecs.register",
"ecs.health",
"ecs.describe"
};
/// <summary>
/// Creates a new ECS capability.
/// </summary>
/// <param name="ecsClient">The ECS client.</param>
/// <param name="logsClient">The CloudWatch Logs client.</param>
/// <param name="timeProvider">Time provider for timestamps.</param>
/// <param name="loggerFactory">Logger factory.</param>
public EcsCapability(
IAmazonECS ecsClient,
IAmazonCloudWatchLogs logsClient,
TimeProvider timeProvider,
ILoggerFactory loggerFactory)
{
_ecsClient = ecsClient ?? throw new ArgumentNullException(nameof(ecsClient));
_logsClient = logsClient ?? throw new ArgumentNullException(nameof(logsClient));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_loggerFactory = loggerFactory ?? throw new ArgumentNullException(nameof(loggerFactory));
_logger = loggerFactory.CreateLogger<EcsCapability>();
_taskHandlers = new Dictionary<string, Func<AgentTaskInfo, CancellationToken, Task<AgentTaskResult>>>
{
["ecs.deploy"] = ExecuteDeployAsync,
["ecs.run"] = ExecuteRunTaskAsync,
["ecs.stop"] = ExecuteStopTaskAsync,
["ecs.scale"] = ExecuteScaleAsync,
["ecs.register"] = ExecuteRegisterAsync,
["ecs.health"] = ExecuteHealthCheckAsync,
["ecs.describe"] = ExecuteDescribeAsync
};
}
/// <inheritdoc />
public async Task<bool> InitializeAsync(CancellationToken ct = default)
{
try
{
// Verify AWS credentials and ECS access by listing clusters
var response = await _ecsClient.ListClustersAsync(new ListClustersRequest
{
MaxResults = 1
}, ct);
_logger.LogInformation(
"ECS capability initialized, AWS API accessible");
return true;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to initialize ECS capability - AWS API not accessible");
return false;
}
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
{
if (!_taskHandlers.TryGetValue(task.TaskType, out var handler))
{
throw new InvalidEcsPayloadException(task.TaskType, "Unsupported task type");
}
var startTime = _timeProvider.GetUtcNow();
try
{
var result = await handler(task, ct);
return result with
{
Duration = _timeProvider.GetUtcNow() - startTime
};
}
catch (InvalidEcsPayloadException)
{
throw;
}
catch (Exception ex)
{
_logger.LogError(ex, "ECS task {TaskType} failed", task.TaskType);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = ex.Message,
CompletedAt = _timeProvider.GetUtcNow(),
Duration = _timeProvider.GetUtcNow() - startTime
};
}
}
/// <inheritdoc />
public async Task<CapabilityHealthStatus> CheckHealthAsync(CancellationToken ct = default)
{
try
{
await _ecsClient.ListClustersAsync(new ListClustersRequest { MaxResults = 1 }, ct);
return new CapabilityHealthStatus(true, "ECS capability ready");
}
catch (Exception ex)
{
return new CapabilityHealthStatus(false, $"ECS API not accessible: {ex.Message}");
}
}
private async Task<AgentTaskResult> ExecuteDeployAsync(AgentTaskInfo task, CancellationToken ct)
{
var taskHandler = new EcsDeployServiceTask(
_ecsClient,
_timeProvider,
_loggerFactory.CreateLogger<EcsDeployServiceTask>());
return await taskHandler.ExecuteAsync(task, ct);
}
private async Task<AgentTaskResult> ExecuteRunTaskAsync(AgentTaskInfo task, CancellationToken ct)
{
var taskHandler = new EcsRunTaskTask(
_ecsClient,
_timeProvider,
_loggerFactory.CreateLogger<EcsRunTaskTask>());
return await taskHandler.ExecuteAsync(task, ct);
}
private async Task<AgentTaskResult> ExecuteStopTaskAsync(AgentTaskInfo task, CancellationToken ct)
{
var taskHandler = new EcsStopTaskTask(
_ecsClient,
_timeProvider,
_loggerFactory.CreateLogger<EcsStopTaskTask>());
return await taskHandler.ExecuteAsync(task, ct);
}
private async Task<AgentTaskResult> ExecuteScaleAsync(AgentTaskInfo task, CancellationToken ct)
{
var taskHandler = new EcsScaleServiceTask(
_ecsClient,
_timeProvider,
_loggerFactory.CreateLogger<EcsScaleServiceTask>());
return await taskHandler.ExecuteAsync(task, ct);
}
private async Task<AgentTaskResult> ExecuteRegisterAsync(AgentTaskInfo task, CancellationToken ct)
{
var taskHandler = new EcsRegisterTaskDefinitionTask(
_ecsClient,
_timeProvider,
_loggerFactory.CreateLogger<EcsRegisterTaskDefinitionTask>());
return await taskHandler.ExecuteAsync(task, ct);
}
private async Task<AgentTaskResult> ExecuteHealthCheckAsync(AgentTaskInfo task, CancellationToken ct)
{
var taskHandler = new EcsHealthCheckTask(
_ecsClient,
_timeProvider,
_loggerFactory.CreateLogger<EcsHealthCheckTask>());
return await taskHandler.ExecuteAsync(task, ct);
}
private async Task<AgentTaskResult> ExecuteDescribeAsync(AgentTaskInfo task, CancellationToken ct)
{
var taskHandler = new EcsDescribeServiceTask(
_ecsClient,
_timeProvider,
_loggerFactory.CreateLogger<EcsDescribeServiceTask>());
return await taskHandler.ExecuteAsync(task, ct);
}
/// <inheritdoc />
public ValueTask DisposeAsync()
{
_ecsClient.Dispose();
_logsClient.Dispose();
return ValueTask.CompletedTask;
}
}

View File

@@ -0,0 +1,86 @@
namespace StellaOps.Agent.Ecs;
/// <summary>
/// Base exception for ECS agent operations.
/// </summary>
public class EcsAgentException : Exception
{
public EcsAgentException(string message) : base(message) { }
public EcsAgentException(string message, Exception innerException) : base(message, innerException) { }
}
/// <summary>
/// Thrown when an ECS task payload is invalid or missing required fields.
/// </summary>
public class InvalidEcsPayloadException : EcsAgentException
{
public string TaskType { get; }
public InvalidEcsPayloadException(string taskType, string? details = null)
: base($"Invalid payload for ECS task type '{taskType}'{(details is not null ? $": {details}" : "")}")
{
TaskType = taskType;
}
}
/// <summary>
/// Thrown when an ECS service or task operation fails.
/// </summary>
public class EcsOperationException : EcsAgentException
{
public string Operation { get; }
public string? Cluster { get; }
public string? Resource { get; }
public EcsOperationException(string operation, string? cluster, string? resource, string message)
: base($"ECS {operation} failed{(cluster is not null ? $" in cluster '{cluster}'" : "")}{(resource is not null ? $" for '{resource}'" : "")}: {message}")
{
Operation = operation;
Cluster = cluster;
Resource = resource;
}
public EcsOperationException(string operation, string? cluster, string? resource, string message, Exception innerException)
: base($"ECS {operation} failed{(cluster is not null ? $" in cluster '{cluster}'" : "")}{(resource is not null ? $" for '{resource}'" : "")}: {message}", innerException)
{
Operation = operation;
Cluster = cluster;
Resource = resource;
}
}
/// <summary>
/// Thrown when an ECS deployment times out waiting for stabilization.
/// </summary>
public class EcsDeploymentTimeoutException : EcsAgentException
{
public string Cluster { get; }
public string ServiceName { get; }
public TimeSpan Timeout { get; }
public EcsDeploymentTimeoutException(string cluster, string serviceName, TimeSpan timeout)
: base($"ECS deployment timed out waiting for service '{serviceName}' in cluster '{cluster}' to stabilize after {timeout}")
{
Cluster = cluster;
ServiceName = serviceName;
Timeout = timeout;
}
}
/// <summary>
/// Thrown when an ECS task fails to complete successfully.
/// </summary>
public class EcsTaskFailedException : EcsAgentException
{
public string Cluster { get; }
public IReadOnlyList<string> TaskArns { get; }
public IReadOnlyList<int> ExitCodes { get; }
public EcsTaskFailedException(string cluster, IReadOnlyList<string> taskArns, IReadOnlyList<int> exitCodes)
: base($"ECS task(s) failed in cluster '{cluster}' with exit codes: [{string.Join(", ", exitCodes)}]")
{
Cluster = cluster;
TaskArns = taskArns;
ExitCodes = exitCodes;
}
}

View File

@@ -0,0 +1,17 @@
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.Ecs;
/// <summary>
/// Interface for ECS task handlers.
/// </summary>
public interface IEcsTask
{
/// <summary>
/// Executes the ECS task.
/// </summary>
/// <param name="task">The agent task to execute.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The result of the task execution.</returns>
Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default);
}

View File

@@ -0,0 +1,25 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<RootNamespace>StellaOps.Agent.Ecs</RootNamespace>
<Description>Stella Agent ECS Capability - manages AWS ECS services and tasks</Description>
<!-- AWS SDK v4 nullable annotations cause false positives with value type boxing to Dictionary<string, object> -->
<NoWarn>$(NoWarn);CS8600;CS8601;CS8620</NoWarn>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="AWSSDK.ECS" />
<PackageReference Include="AWSSDK.CloudWatchLogs" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Agent.Core\StellaOps.Agent.Core.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,470 @@
using System.Text.Json;
using Amazon.ECS;
using Amazon.ECS.Model;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using Task = System.Threading.Tasks.Task;
namespace StellaOps.Agent.Ecs.Tasks;
/// <summary>
/// Task handler for deploying ECS services.
/// </summary>
public sealed class EcsDeployServiceTask : IEcsTask
{
private readonly IAmazonECS _ecsClient;
private readonly TimeProvider _timeProvider;
private readonly ILogger<EcsDeployServiceTask> _logger;
/// <summary>
/// Payload for deploying an ECS service.
/// </summary>
public sealed record DeployServicePayload
{
/// <summary>
/// Name or ARN of the ECS cluster.
/// </summary>
public required string Cluster { get; init; }
/// <summary>
/// Name of the service to deploy.
/// </summary>
public required string ServiceName { get; init; }
/// <summary>
/// Task definition family:revision or ARN.
/// </summary>
public required string TaskDefinition { get; init; }
/// <summary>
/// Desired number of tasks.
/// </summary>
public int DesiredCount { get; init; } = 1;
/// <summary>
/// Launch type (FARGATE or EC2).
/// </summary>
public string? LaunchType { get; init; }
/// <summary>
/// Network configuration for awsvpc mode.
/// </summary>
public NetworkConfigurationPayload? NetworkConfiguration { get; init; }
/// <summary>
/// Load balancer configuration.
/// </summary>
public LoadBalancerPayload? LoadBalancer { get; init; }
/// <summary>
/// Deployment configuration.
/// </summary>
public DeploymentConfigPayload? DeploymentConfiguration { get; init; }
/// <summary>
/// Whether to force a new deployment.
/// </summary>
public bool ForceNewDeployment { get; init; } = true;
/// <summary>
/// Timeout waiting for deployment to stabilize.
/// </summary>
public TimeSpan DeploymentTimeout { get; init; } = TimeSpan.FromMinutes(10);
/// <summary>
/// Tags to apply to the service.
/// </summary>
public IReadOnlyDictionary<string, string>? Tags { get; init; }
}
/// <summary>
/// Network configuration payload.
/// </summary>
public sealed record NetworkConfigurationPayload
{
/// <summary>
/// Subnet IDs.
/// </summary>
public required IReadOnlyList<string> Subnets { get; init; }
/// <summary>
/// Security group IDs.
/// </summary>
public IReadOnlyList<string>? SecurityGroups { get; init; }
/// <summary>
/// Whether to assign a public IP.
/// </summary>
public bool AssignPublicIp { get; init; }
}
/// <summary>
/// Load balancer configuration payload.
/// </summary>
public sealed record LoadBalancerPayload
{
/// <summary>
/// Target group ARN.
/// </summary>
public required string TargetGroupArn { get; init; }
/// <summary>
/// Container name for the target.
/// </summary>
public required string ContainerName { get; init; }
/// <summary>
/// Container port.
/// </summary>
public required int ContainerPort { get; init; }
}
/// <summary>
/// Deployment configuration payload.
/// </summary>
public sealed record DeploymentConfigPayload
{
/// <summary>
/// Maximum percent during deployment.
/// </summary>
public int MaximumPercent { get; init; } = 200;
/// <summary>
/// Minimum healthy percent.
/// </summary>
public int MinimumHealthyPercent { get; init; } = 100;
/// <summary>
/// Enable deployment circuit breaker.
/// </summary>
public bool EnableCircuitBreaker { get; init; } = true;
/// <summary>
/// Enable rollback on failure.
/// </summary>
public bool EnableRollback { get; init; } = true;
}
/// <summary>
/// Creates a new ECS deploy service task handler.
/// </summary>
public EcsDeployServiceTask(
IAmazonECS ecsClient,
TimeProvider timeProvider,
ILogger<EcsDeployServiceTask> logger)
{
_ecsClient = ecsClient;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
{
var payload = JsonSerializer.Deserialize<DeployServicePayload>(task.Payload)
?? throw new InvalidEcsPayloadException("ecs.deploy", "Failed to deserialize payload");
_logger.LogInformation(
"Deploying ECS service {Service} to cluster {Cluster} with task definition {TaskDef}",
payload.ServiceName,
payload.Cluster,
payload.TaskDefinition);
try
{
// Check if service exists
var existingService = await GetServiceAsync(payload.Cluster, payload.ServiceName, ct);
if (existingService is not null && existingService.Status != "INACTIVE")
{
return await UpdateServiceAsync(task.Id, payload, ct);
}
else
{
return await CreateServiceAsync(task.Id, payload, ct);
}
}
catch (AmazonECSException ex)
{
_logger.LogError(ex, "Failed to deploy ECS service {Service}", payload.ServiceName);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"ECS deployment failed: {ex.Message}",
CompletedAt = _timeProvider.GetUtcNow()
};
}
}
private async Task<Service?> GetServiceAsync(string cluster, string serviceName, CancellationToken ct)
{
try
{
var response = await _ecsClient.DescribeServicesAsync(new DescribeServicesRequest
{
Cluster = cluster,
Services = new List<string> { serviceName }
}, ct);
return response.Services.FirstOrDefault();
}
catch
{
return null;
}
}
private async Task<AgentTaskResult> CreateServiceAsync(
Guid taskId,
DeployServicePayload payload,
CancellationToken ct)
{
_logger.LogInformation("Creating new ECS service {Service}", payload.ServiceName);
var request = new CreateServiceRequest
{
Cluster = payload.Cluster,
ServiceName = payload.ServiceName,
TaskDefinition = payload.TaskDefinition,
DesiredCount = payload.DesiredCount
};
if (!string.IsNullOrEmpty(payload.LaunchType))
{
request.LaunchType = new LaunchType(payload.LaunchType);
}
if (payload.NetworkConfiguration is not null)
{
request.NetworkConfiguration = new NetworkConfiguration
{
AwsvpcConfiguration = new AwsVpcConfiguration
{
Subnets = payload.NetworkConfiguration.Subnets.ToList(),
SecurityGroups = payload.NetworkConfiguration.SecurityGroups?.ToList(),
AssignPublicIp = payload.NetworkConfiguration.AssignPublicIp
? AssignPublicIp.ENABLED
: AssignPublicIp.DISABLED
}
};
}
if (payload.LoadBalancer is not null)
{
request.LoadBalancers = new List<LoadBalancer>
{
new()
{
TargetGroupArn = payload.LoadBalancer.TargetGroupArn,
ContainerName = payload.LoadBalancer.ContainerName,
ContainerPort = payload.LoadBalancer.ContainerPort
}
};
}
if (payload.DeploymentConfiguration is not null)
{
request.DeploymentConfiguration = new DeploymentConfiguration
{
MaximumPercent = payload.DeploymentConfiguration.MaximumPercent,
MinimumHealthyPercent = payload.DeploymentConfiguration.MinimumHealthyPercent,
DeploymentCircuitBreaker = new DeploymentCircuitBreaker
{
Enable = payload.DeploymentConfiguration.EnableCircuitBreaker,
Rollback = payload.DeploymentConfiguration.EnableRollback
}
};
}
if (payload.Tags is not null)
{
request.Tags = payload.Tags.Select(kv => new Tag { Key = kv.Key, Value = kv.Value }).ToList();
}
var createResponse = await _ecsClient.CreateServiceAsync(request, ct);
if (createResponse.Service is not { } service)
{
return new AgentTaskResult
{
TaskId = taskId,
Success = false,
Error = "Service creation returned no service object",
CompletedAt = _timeProvider.GetUtcNow()
};
}
_logger.LogInformation(
"Created ECS service {Service} (ARN: {Arn})",
payload.ServiceName,
service.ServiceArn);
// Wait for deployment to stabilize
var stable = await WaitForServiceStableAsync(
payload.Cluster,
payload.ServiceName,
payload.DeploymentTimeout,
ct);
return new AgentTaskResult
{
TaskId = taskId,
Success = stable,
Error = stable ? null : "Service did not stabilize within timeout",
Outputs = new Dictionary<string, object>
{
["serviceArn"] = service.ServiceArn ?? "",
["serviceName"] = service.ServiceName ?? "",
["taskDefinition"] = service.TaskDefinition ?? "",
["runningCount"] = service.RunningCount,
["desiredCount"] = service.DesiredCount,
["deploymentStatus"] = stable ? "COMPLETED" : "TIMED_OUT",
["operation"] = "create"
},
CompletedAt = _timeProvider.GetUtcNow()
};
}
private async Task<AgentTaskResult> UpdateServiceAsync(
Guid taskId,
DeployServicePayload payload,
CancellationToken ct)
{
_logger.LogInformation(
"Updating existing ECS service {Service} to task definition {TaskDef}",
payload.ServiceName,
payload.TaskDefinition);
var request = new UpdateServiceRequest
{
Cluster = payload.Cluster,
Service = payload.ServiceName,
TaskDefinition = payload.TaskDefinition,
DesiredCount = payload.DesiredCount,
ForceNewDeployment = payload.ForceNewDeployment
};
if (payload.DeploymentConfiguration is not null)
{
request.DeploymentConfiguration = new DeploymentConfiguration
{
MaximumPercent = payload.DeploymentConfiguration.MaximumPercent,
MinimumHealthyPercent = payload.DeploymentConfiguration.MinimumHealthyPercent,
DeploymentCircuitBreaker = new DeploymentCircuitBreaker
{
Enable = payload.DeploymentConfiguration.EnableCircuitBreaker,
Rollback = payload.DeploymentConfiguration.EnableRollback
}
};
}
var updateResponse = await _ecsClient.UpdateServiceAsync(request, ct);
if (updateResponse.Service is not { } service)
{
return new AgentTaskResult
{
TaskId = taskId,
Success = false,
Error = "Service update returned no service object",
CompletedAt = _timeProvider.GetUtcNow()
};
}
_logger.LogInformation(
"Updated ECS service {Service}, deployment ID: {DeploymentId}",
payload.ServiceName,
service.Deployments.FirstOrDefault()?.Id ?? "unknown");
// Wait for deployment to stabilize
var stable = await WaitForServiceStableAsync(
payload.Cluster,
payload.ServiceName,
payload.DeploymentTimeout,
ct);
return new AgentTaskResult
{
TaskId = taskId,
Success = stable,
Error = stable ? null : "Service did not stabilize within timeout",
Outputs = new Dictionary<string, object>
{
["serviceArn"] = service.ServiceArn ?? "",
["serviceName"] = service.ServiceName ?? "",
["taskDefinition"] = service.TaskDefinition ?? "",
["runningCount"] = service.RunningCount,
["desiredCount"] = service.DesiredCount,
["deploymentId"] = service.Deployments.FirstOrDefault()?.Id ?? "",
["deploymentStatus"] = stable ? "COMPLETED" : "TIMED_OUT",
["operation"] = "update"
},
CompletedAt = _timeProvider.GetUtcNow()
};
}
private async Task<bool> WaitForServiceStableAsync(
string cluster,
string serviceName,
TimeSpan timeout,
CancellationToken ct)
{
_logger.LogInformation("Waiting for service {Service} to stabilize", serviceName);
using var timeoutCts = new CancellationTokenSource(timeout);
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
try
{
while (!linkedCts.IsCancellationRequested)
{
var response = await _ecsClient.DescribeServicesAsync(new DescribeServicesRequest
{
Cluster = cluster,
Services = new List<string> { serviceName }
}, linkedCts.Token);
var service = response.Services.FirstOrDefault();
if (service is null)
{
_logger.LogWarning("Service {Service} not found during stabilization check", serviceName);
return false;
}
var primaryDeployment = service.Deployments.FirstOrDefault(d => d.Status == "PRIMARY");
if (primaryDeployment is null)
{
await Task.Delay(TimeSpan.FromSeconds(10), linkedCts.Token);
continue;
}
if (primaryDeployment.RunningCount == primaryDeployment.DesiredCount &&
service.Deployments.Count == 1)
{
_logger.LogInformation(
"Service {Service} stabilized with {Count} running tasks",
serviceName,
primaryDeployment.RunningCount);
return true;
}
_logger.LogDebug(
"Service {Service} not stable: running={Running}, desired={Desired}, deployments={Deployments}",
serviceName,
primaryDeployment.RunningCount,
primaryDeployment.DesiredCount,
service.Deployments.Count);
await Task.Delay(TimeSpan.FromSeconds(10), linkedCts.Token);
}
}
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested)
{
_logger.LogWarning("Service {Service} stabilization timed out after {Timeout}", serviceName, timeout);
}
return false;
}
}

View File

@@ -0,0 +1,173 @@
using System.Globalization;
using System.Text.Json;
using Amazon.ECS;
using Amazon.ECS.Model;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using Task = System.Threading.Tasks.Task;
namespace StellaOps.Agent.Ecs.Tasks;
/// <summary>
/// Task handler for describing ECS services.
/// </summary>
public sealed class EcsDescribeServiceTask : IEcsTask
{
private readonly IAmazonECS _ecsClient;
private readonly TimeProvider _timeProvider;
private readonly ILogger<EcsDescribeServiceTask> _logger;
/// <summary>
/// Payload for describing an ECS service.
/// </summary>
public sealed record DescribeServicePayload
{
/// <summary>
/// Name or ARN of the ECS cluster.
/// </summary>
public required string Cluster { get; init; }
/// <summary>
/// Name of the service to describe.
/// </summary>
public required string ServiceName { get; init; }
/// <summary>
/// Whether to include task information.
/// </summary>
public bool IncludeTasks { get; init; } = false;
}
/// <summary>
/// Creates a new ECS describe service task handler.
/// </summary>
public EcsDescribeServiceTask(
IAmazonECS ecsClient,
TimeProvider timeProvider,
ILogger<EcsDescribeServiceTask> logger)
{
_ecsClient = ecsClient;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
{
var payload = JsonSerializer.Deserialize<DescribeServicePayload>(task.Payload)
?? throw new InvalidEcsPayloadException("ecs.describe", "Failed to deserialize payload");
_logger.LogInformation(
"Describing ECS service {Service} in cluster {Cluster}",
payload.ServiceName,
payload.Cluster);
try
{
var response = await _ecsClient.DescribeServicesAsync(new DescribeServicesRequest
{
Cluster = payload.Cluster,
Services = new List<string> { payload.ServiceName }
}, ct);
if (response.Services.FirstOrDefault() is not { } service)
{
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Service '{payload.ServiceName}' not found",
CompletedAt = _timeProvider.GetUtcNow()
};
}
var outputs = new Dictionary<string, object>
{
["serviceArn"] = service.ServiceArn ?? "",
["serviceName"] = service.ServiceName ?? "",
["clusterArn"] = service.ClusterArn ?? "",
["status"] = service.Status ?? "",
["taskDefinition"] = service.TaskDefinition ?? "",
["desiredCount"] = service.DesiredCount,
["runningCount"] = service.RunningCount,
["pendingCount"] = service.PendingCount,
["launchType"] = service.LaunchType?.Value ?? "unknown",
["deploymentCount"] = service.Deployments.Count,
["createdAt"] = service.CreatedAt.GetValueOrDefault().ToUniversalTime().ToString("o", CultureInfo.InvariantCulture),
["deployments"] = service.Deployments.Select(d => new Dictionary<string, object>
{
["id"] = d.Id ?? "",
["status"] = d.Status ?? "",
["taskDefinition"] = d.TaskDefinition ?? "",
["desiredCount"] = d.DesiredCount,
["runningCount"] = d.RunningCount,
["pendingCount"] = d.PendingCount,
["createdAt"] = d.CreatedAt.GetValueOrDefault().ToUniversalTime().ToString("o", CultureInfo.InvariantCulture)
}).ToList()
};
// Include tasks if requested
if (payload.IncludeTasks)
{
var tasksResponse = await _ecsClient.ListTasksAsync(new ListTasksRequest
{
Cluster = payload.Cluster,
ServiceName = payload.ServiceName
}, ct);
if (tasksResponse.TaskArns.Count > 0)
{
var describeTasksResponse = await _ecsClient.DescribeTasksAsync(new DescribeTasksRequest
{
Cluster = payload.Cluster,
Tasks = tasksResponse.TaskArns
}, ct);
outputs["tasks"] = describeTasksResponse.Tasks.Select(t => new Dictionary<string, object>
{
["taskArn"] = t.TaskArn ?? "",
["taskDefinitionArn"] = t.TaskDefinitionArn ?? "",
["lastStatus"] = t.LastStatus ?? "",
["desiredStatus"] = t.DesiredStatus ?? "",
["healthStatus"] = t.HealthStatus?.Value ?? "unknown",
["createdAt"] = t.CreatedAt.GetValueOrDefault().ToUniversalTime().ToString("o", CultureInfo.InvariantCulture),
["containers"] = t.Containers.Select(c => new Dictionary<string, object>
{
["name"] = c.Name ?? "",
["lastStatus"] = c.LastStatus ?? "",
["exitCode"] = c.ExitCode ?? -1,
["healthStatus"] = c.HealthStatus?.Value ?? "unknown"
}).ToList()
}).ToList();
}
}
_logger.LogInformation(
"Described ECS service {Service}: {Running}/{Desired} running, {Deployments} deployments",
payload.ServiceName,
service.RunningCount,
service.DesiredCount,
service.Deployments.Count);
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = outputs,
CompletedAt = _timeProvider.GetUtcNow()
};
}
catch (AmazonECSException ex)
{
_logger.LogError(ex, "Failed to describe ECS service {Service}", payload.ServiceName);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to describe service: {ex.Message}",
CompletedAt = _timeProvider.GetUtcNow()
};
}
}
}

View File

@@ -0,0 +1,233 @@
using System.Text.Json;
using Amazon.ECS;
using Amazon.ECS.Model;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using Task = System.Threading.Tasks.Task;
namespace StellaOps.Agent.Ecs.Tasks;
/// <summary>
/// Task handler for checking ECS service health.
/// </summary>
public sealed class EcsHealthCheckTask : IEcsTask
{
private readonly IAmazonECS _ecsClient;
private readonly TimeProvider _timeProvider;
private readonly ILogger<EcsHealthCheckTask> _logger;
/// <summary>
/// Payload for checking ECS service health.
/// </summary>
public sealed record HealthCheckPayload
{
/// <summary>
/// Name or ARN of the ECS cluster.
/// </summary>
public required string Cluster { get; init; }
/// <summary>
/// Name of the service to check.
/// </summary>
public required string ServiceName { get; init; }
/// <summary>
/// Minimum healthy percent to consider the service healthy.
/// </summary>
public int MinHealthyPercent { get; init; } = 100;
/// <summary>
/// Whether to wait for the service to become healthy.
/// </summary>
public bool WaitForHealthy { get; init; } = true;
/// <summary>
/// Timeout for waiting for health.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(5);
}
/// <summary>
/// Creates a new ECS health check task handler.
/// </summary>
public EcsHealthCheckTask(
IAmazonECS ecsClient,
TimeProvider timeProvider,
ILogger<EcsHealthCheckTask> logger)
{
_ecsClient = ecsClient;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
{
var payload = JsonSerializer.Deserialize<HealthCheckPayload>(task.Payload)
?? throw new InvalidEcsPayloadException("ecs.health", "Failed to deserialize payload");
_logger.LogInformation(
"Checking health of ECS service {Service} in cluster {Cluster}",
payload.ServiceName,
payload.Cluster);
try
{
if (!payload.WaitForHealthy)
{
return await CheckHealthOnceAsync(task.Id, payload, ct);
}
return await WaitForHealthyAsync(task.Id, payload, ct);
}
catch (AmazonECSException ex)
{
_logger.LogError(ex, "Failed to check health of ECS service {Service}", payload.ServiceName);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Health check failed: {ex.Message}",
CompletedAt = _timeProvider.GetUtcNow()
};
}
}
private async Task<AgentTaskResult> CheckHealthOnceAsync(
Guid taskId,
HealthCheckPayload payload,
CancellationToken ct)
{
var response = await _ecsClient.DescribeServicesAsync(new DescribeServicesRequest
{
Cluster = payload.Cluster,
Services = new List<string> { payload.ServiceName }
}, ct);
var service = response.Services.FirstOrDefault();
if (service is null)
{
return new AgentTaskResult
{
TaskId = taskId,
Success = false,
Error = $"Service '{payload.ServiceName}' not found",
CompletedAt = _timeProvider.GetUtcNow()
};
}
var healthyPercent = service.DesiredCount > 0
? (service.RunningCount * 100) / service.DesiredCount
: 0;
var isHealthy = healthyPercent >= payload.MinHealthyPercent && service.Deployments.Count == 1;
return new AgentTaskResult
{
TaskId = taskId,
Success = isHealthy,
Error = isHealthy ? null : $"Service unhealthy: {healthyPercent}% running (minimum: {payload.MinHealthyPercent}%)",
Outputs = new Dictionary<string, object>
{
["serviceName"] = service.ServiceName ?? "",
["serviceArn"] = service.ServiceArn ?? "",
["runningCount"] = service.RunningCount,
["desiredCount"] = service.DesiredCount,
["healthyPercent"] = healthyPercent,
["status"] = service.Status ?? "",
["deploymentCount"] = service.Deployments.Count,
["isHealthy"] = isHealthy
},
CompletedAt = _timeProvider.GetUtcNow()
};
}
private async System.Threading.Tasks.Task<AgentTaskResult> WaitForHealthyAsync(
Guid taskId,
HealthCheckPayload payload,
CancellationToken ct)
{
using var timeoutCts = new CancellationTokenSource(payload.Timeout);
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
try
{
while (!linkedCts.IsCancellationRequested)
{
var response = await _ecsClient.DescribeServicesAsync(new DescribeServicesRequest
{
Cluster = payload.Cluster,
Services = new List<string> { payload.ServiceName }
}, linkedCts.Token);
var service = response.Services.FirstOrDefault();
if (service is null)
{
return new AgentTaskResult
{
TaskId = taskId,
Success = false,
Error = $"Service '{payload.ServiceName}' not found",
CompletedAt = _timeProvider.GetUtcNow()
};
}
var healthyPercent = service.DesiredCount > 0
? (service.RunningCount * 100) / service.DesiredCount
: 0;
if (healthyPercent >= payload.MinHealthyPercent && service.Deployments.Count == 1)
{
_logger.LogInformation(
"Service {Service} is healthy: {Running}/{Desired} tasks running ({Percent}%)",
payload.ServiceName,
service.RunningCount,
service.DesiredCount,
healthyPercent);
return new AgentTaskResult
{
TaskId = taskId,
Success = true,
Outputs = new Dictionary<string, object>
{
["serviceName"] = service.ServiceName ?? "",
["serviceArn"] = service.ServiceArn ?? "",
["runningCount"] = service.RunningCount,
["desiredCount"] = service.DesiredCount,
["healthyPercent"] = healthyPercent,
["status"] = service.Status ?? "",
["isHealthy"] = true
},
CompletedAt = _timeProvider.GetUtcNow()
};
}
_logger.LogDebug(
"Service {Service} health check: {Running}/{Desired} ({Percent}%), waiting...",
payload.ServiceName,
service.RunningCount,
service.DesiredCount,
healthyPercent);
await Task.Delay(TimeSpan.FromSeconds(10), linkedCts.Token);
}
}
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested)
{
_logger.LogWarning(
"Health check timed out after {Timeout} for service {Service}",
payload.Timeout,
payload.ServiceName);
}
return new AgentTaskResult
{
TaskId = taskId,
Success = false,
Error = $"Health check timed out after {payload.Timeout}",
CompletedAt = _timeProvider.GetUtcNow()
};
}
}

View File

@@ -0,0 +1,282 @@
using System.Text.Json;
using Amazon.ECS;
using Amazon.ECS.Model;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.Ecs.Tasks;
/// <summary>
/// Task handler for registering ECS task definitions.
/// </summary>
public sealed class EcsRegisterTaskDefinitionTask : IEcsTask
{
private readonly IAmazonECS _ecsClient;
private readonly TimeProvider _timeProvider;
private readonly ILogger<EcsRegisterTaskDefinitionTask> _logger;
/// <summary>
/// Payload for registering an ECS task definition.
/// </summary>
public sealed record RegisterTaskDefinitionPayload
{
/// <summary>
/// Family name for the task definition.
/// </summary>
public required string Family { get; init; }
/// <summary>
/// Container definitions.
/// </summary>
public required IReadOnlyList<ContainerDefinitionPayload> ContainerDefinitions { get; init; }
/// <summary>
/// Task-level CPU.
/// </summary>
public string? Cpu { get; init; }
/// <summary>
/// Task-level memory.
/// </summary>
public string? Memory { get; init; }
/// <summary>
/// Network mode.
/// </summary>
public string? NetworkMode { get; init; }
/// <summary>
/// Task role ARN.
/// </summary>
public string? TaskRoleArn { get; init; }
/// <summary>
/// Execution role ARN.
/// </summary>
public string? ExecutionRoleArn { get; init; }
/// <summary>
/// Required capabilities (FARGATE, EC2).
/// </summary>
public IReadOnlyList<string>? RequiresCompatibilities { get; init; }
/// <summary>
/// Tags to apply.
/// </summary>
public IReadOnlyDictionary<string, string>? Tags { get; init; }
}
/// <summary>
/// Container definition payload.
/// </summary>
public sealed record ContainerDefinitionPayload
{
/// <summary>
/// Container name.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Container image.
/// </summary>
public required string Image { get; init; }
/// <summary>
/// Container CPU units.
/// </summary>
public int? Cpu { get; init; }
/// <summary>
/// Container memory in MB.
/// </summary>
public int? Memory { get; init; }
/// <summary>
/// Container memory reservation in MB.
/// </summary>
public int? MemoryReservation { get; init; }
/// <summary>
/// Port mappings.
/// </summary>
public IReadOnlyList<PortMappingPayload>? PortMappings { get; init; }
/// <summary>
/// Environment variables.
/// </summary>
public IReadOnlyDictionary<string, string>? Environment { get; init; }
/// <summary>
/// Whether the container is essential.
/// </summary>
public bool Essential { get; init; } = true;
/// <summary>
/// Entry point override.
/// </summary>
public IReadOnlyList<string>? EntryPoint { get; init; }
/// <summary>
/// Command override.
/// </summary>
public IReadOnlyList<string>? Command { get; init; }
/// <summary>
/// Log configuration.
/// </summary>
public LogConfigurationPayload? LogConfiguration { get; init; }
}
/// <summary>
/// Port mapping payload.
/// </summary>
public sealed record PortMappingPayload
{
/// <summary>
/// Container port.
/// </summary>
public required int ContainerPort { get; init; }
/// <summary>
/// Host port.
/// </summary>
public int? HostPort { get; init; }
/// <summary>
/// Protocol (tcp or udp).
/// </summary>
public string Protocol { get; init; } = "tcp";
}
/// <summary>
/// Log configuration payload.
/// </summary>
public sealed record LogConfigurationPayload
{
/// <summary>
/// Log driver.
/// </summary>
public required string LogDriver { get; init; }
/// <summary>
/// Log driver options.
/// </summary>
public IReadOnlyDictionary<string, string>? Options { get; init; }
}
/// <summary>
/// Creates a new ECS register task definition handler.
/// </summary>
public EcsRegisterTaskDefinitionTask(
IAmazonECS ecsClient,
TimeProvider timeProvider,
ILogger<EcsRegisterTaskDefinitionTask> logger)
{
_ecsClient = ecsClient;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
{
var payload = JsonSerializer.Deserialize<RegisterTaskDefinitionPayload>(task.Payload)
?? throw new InvalidEcsPayloadException("ecs.register", "Failed to deserialize payload");
_logger.LogInformation(
"Registering ECS task definition for family {Family}",
payload.Family);
try
{
var request = new RegisterTaskDefinitionRequest
{
Family = payload.Family,
Cpu = payload.Cpu,
Memory = payload.Memory,
TaskRoleArn = payload.TaskRoleArn,
ExecutionRoleArn = payload.ExecutionRoleArn,
ContainerDefinitions = payload.ContainerDefinitions.Select(c => new ContainerDefinition
{
Name = c.Name,
Image = c.Image,
Cpu = c.Cpu ?? 0,
Memory = c.Memory,
MemoryReservation = c.MemoryReservation,
Essential = c.Essential,
EntryPoint = c.EntryPoint?.ToList(),
Command = c.Command?.ToList(),
PortMappings = c.PortMappings?.Select(p => new PortMapping
{
ContainerPort = p.ContainerPort,
HostPort = p.HostPort ?? p.ContainerPort,
Protocol = p.Protocol
}).ToList(),
Environment = c.Environment?.Select(kv => new Amazon.ECS.Model.KeyValuePair
{
Name = kv.Key,
Value = kv.Value
}).ToList(),
LogConfiguration = c.LogConfiguration is not null
? new LogConfiguration
{
LogDriver = c.LogConfiguration.LogDriver,
Options = c.LogConfiguration.Options?.ToDictionary(kv => kv.Key, kv => kv.Value)
}
: null
}).ToList()
};
if (!string.IsNullOrEmpty(payload.NetworkMode))
{
request.NetworkMode = new NetworkMode(payload.NetworkMode);
}
if (payload.RequiresCompatibilities is not null)
{
request.RequiresCompatibilities = payload.RequiresCompatibilities.ToList();
}
if (payload.Tags is not null)
{
request.Tags = payload.Tags.Select(kv => new Tag { Key = kv.Key, Value = kv.Value }).ToList();
}
var response = await _ecsClient.RegisterTaskDefinitionAsync(request, ct);
var taskDef = response.TaskDefinition;
_logger.LogInformation(
"Registered ECS task definition {Family}:{Revision} (ARN: {Arn})",
taskDef.Family,
taskDef.Revision,
taskDef.TaskDefinitionArn);
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["taskDefinitionArn"] = taskDef.TaskDefinitionArn ?? "",
["family"] = taskDef.Family ?? "",
["revision"] = taskDef.Revision,
["status"] = taskDef.Status?.Value ?? "",
["containerCount"] = taskDef.ContainerDefinitions.Count
},
CompletedAt = _timeProvider.GetUtcNow()
};
}
catch (AmazonECSException ex)
{
_logger.LogError(ex, "Failed to register ECS task definition for family {Family}", payload.Family);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to register task definition: {ex.Message}",
CompletedAt = _timeProvider.GetUtcNow()
};
}
}
}

View File

@@ -0,0 +1,331 @@
using System.Text.Json;
using Amazon.ECS;
using Amazon.ECS.Model;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using Task = System.Threading.Tasks.Task;
namespace StellaOps.Agent.Ecs.Tasks;
/// <summary>
/// Task handler for running ECS tasks.
/// </summary>
public sealed class EcsRunTaskTask : IEcsTask
{
private readonly IAmazonECS _ecsClient;
private readonly TimeProvider _timeProvider;
private readonly ILogger<EcsRunTaskTask> _logger;
/// <summary>
/// Payload for running an ECS task.
/// </summary>
public sealed record RunTaskPayload
{
/// <summary>
/// Name or ARN of the ECS cluster.
/// </summary>
public required string Cluster { get; init; }
/// <summary>
/// Task definition family:revision or ARN.
/// </summary>
public required string TaskDefinition { get; init; }
/// <summary>
/// Number of tasks to run.
/// </summary>
public int Count { get; init; } = 1;
/// <summary>
/// Launch type (FARGATE or EC2).
/// </summary>
public string? LaunchType { get; init; }
/// <summary>
/// Network configuration for awsvpc mode.
/// </summary>
public NetworkConfigurationPayload? NetworkConfiguration { get; init; }
/// <summary>
/// Container overrides.
/// </summary>
public IReadOnlyList<ContainerOverridePayload>? Overrides { get; init; }
/// <summary>
/// Task group.
/// </summary>
public string? Group { get; init; }
/// <summary>
/// Whether to wait for task completion.
/// </summary>
public bool WaitForCompletion { get; init; } = true;
/// <summary>
/// Timeout for waiting for completion.
/// </summary>
public TimeSpan CompletionTimeout { get; init; } = TimeSpan.FromMinutes(30);
/// <summary>
/// Tags to apply to the task.
/// </summary>
public IReadOnlyDictionary<string, string>? Tags { get; init; }
}
/// <summary>
/// Network configuration payload.
/// </summary>
public sealed record NetworkConfigurationPayload
{
/// <summary>
/// Subnet IDs.
/// </summary>
public required IReadOnlyList<string> Subnets { get; init; }
/// <summary>
/// Security group IDs.
/// </summary>
public IReadOnlyList<string>? SecurityGroups { get; init; }
/// <summary>
/// Whether to assign a public IP.
/// </summary>
public bool AssignPublicIp { get; init; }
}
/// <summary>
/// Container override payload.
/// </summary>
public sealed record ContainerOverridePayload
{
/// <summary>
/// Container name.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Command override.
/// </summary>
public IReadOnlyList<string>? Command { get; init; }
/// <summary>
/// Environment variable overrides.
/// </summary>
public IReadOnlyDictionary<string, string>? Environment { get; init; }
/// <summary>
/// CPU override.
/// </summary>
public int? Cpu { get; init; }
/// <summary>
/// Memory override.
/// </summary>
public int? Memory { get; init; }
}
/// <summary>
/// Creates a new ECS run task handler.
/// </summary>
public EcsRunTaskTask(
IAmazonECS ecsClient,
TimeProvider timeProvider,
ILogger<EcsRunTaskTask> logger)
{
_ecsClient = ecsClient;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
{
var payload = JsonSerializer.Deserialize<RunTaskPayload>(task.Payload)
?? throw new InvalidEcsPayloadException("ecs.run", "Failed to deserialize payload");
_logger.LogInformation(
"Running ECS task from definition {TaskDef} on cluster {Cluster}",
payload.TaskDefinition,
payload.Cluster);
try
{
var request = new RunTaskRequest
{
Cluster = payload.Cluster,
TaskDefinition = payload.TaskDefinition,
Count = payload.Count,
Group = payload.Group
};
if (!string.IsNullOrEmpty(payload.LaunchType))
{
request.LaunchType = new LaunchType(payload.LaunchType);
}
if (payload.NetworkConfiguration is not null)
{
request.NetworkConfiguration = new NetworkConfiguration
{
AwsvpcConfiguration = new AwsVpcConfiguration
{
Subnets = payload.NetworkConfiguration.Subnets.ToList(),
SecurityGroups = payload.NetworkConfiguration.SecurityGroups?.ToList(),
AssignPublicIp = payload.NetworkConfiguration.AssignPublicIp
? AssignPublicIp.ENABLED
: AssignPublicIp.DISABLED
}
};
}
if (payload.Overrides is not null && payload.Overrides.Count > 0)
{
request.Overrides = new TaskOverride
{
ContainerOverrides = payload.Overrides.Select(o => new ContainerOverride
{
Name = o.Name,
Command = o.Command?.ToList(),
Environment = o.Environment?.Select(kv => new Amazon.ECS.Model.KeyValuePair
{
Name = kv.Key,
Value = kv.Value
}).ToList(),
Cpu = o.Cpu,
Memory = o.Memory
}).ToList()
};
}
if (payload.Tags is not null)
{
request.Tags = payload.Tags.Select(kv => new Tag { Key = kv.Key, Value = kv.Value }).ToList();
}
var runResponse = await _ecsClient.RunTaskAsync(request, ct);
if (runResponse.Failures.Count > 0)
{
var failure = runResponse.Failures.First();
_logger.LogError(
"Failed to run ECS task: {Reason} (ARN: {Arn})",
failure.Reason,
failure.Arn);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to run task: {failure.Reason}",
CompletedAt = _timeProvider.GetUtcNow()
};
}
var ecsTasks = runResponse.Tasks;
var taskArns = ecsTasks.Select(t => t.TaskArn).ToList();
_logger.LogInformation(
"Started {Count} ECS task(s): {TaskArns}",
ecsTasks.Count,
string.Join(", ", taskArns.Select(a => a.Split('/').Last())));
if (!payload.WaitForCompletion)
{
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["taskArns"] = taskArns,
["taskCount"] = ecsTasks.Count,
["status"] = "RUNNING"
},
CompletedAt = _timeProvider.GetUtcNow()
};
}
// Wait for tasks to complete
var (completed, exitCodes) = await WaitForTasksAsync(
payload.Cluster,
taskArns,
payload.CompletionTimeout,
ct);
var allSucceeded = completed && exitCodes.All(e => e == 0);
return new AgentTaskResult
{
TaskId = task.Id,
Success = allSucceeded,
Error = allSucceeded ? null : $"Task(s) failed with exit codes: [{string.Join(", ", exitCodes)}]",
Outputs = new Dictionary<string, object>
{
["taskArns"] = taskArns,
["taskCount"] = ecsTasks.Count,
["exitCodes"] = exitCodes,
["status"] = allSucceeded ? "SUCCEEDED" : "FAILED"
},
CompletedAt = _timeProvider.GetUtcNow()
};
}
catch (AmazonECSException ex)
{
_logger.LogError(ex, "Failed to run ECS task from {TaskDef}", payload.TaskDefinition);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to run task: {ex.Message}",
CompletedAt = _timeProvider.GetUtcNow()
};
}
}
private async Task<(bool Completed, List<int> ExitCodes)> WaitForTasksAsync(
string cluster,
List<string> taskArns,
TimeSpan timeout,
CancellationToken ct)
{
using var timeoutCts = new CancellationTokenSource(timeout);
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
var exitCodes = new List<int>();
try
{
while (!linkedCts.IsCancellationRequested)
{
var response = await _ecsClient.DescribeTasksAsync(new DescribeTasksRequest
{
Cluster = cluster,
Tasks = taskArns
}, linkedCts.Token);
var allStopped = response.Tasks.All(t => t.LastStatus == "STOPPED");
if (allStopped)
{
exitCodes = response.Tasks
.SelectMany(t => t.Containers.Select(c => c.ExitCode ?? -1))
.ToList();
_logger.LogInformation(
"ECS tasks completed with exit codes: [{ExitCodes}]",
string.Join(", ", exitCodes));
return (true, exitCodes);
}
await Task.Delay(TimeSpan.FromSeconds(10), linkedCts.Token);
}
}
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested)
{
_logger.LogWarning("Task completion wait timed out after {Timeout}", timeout);
}
return (false, exitCodes);
}
}

View File

@@ -0,0 +1,231 @@
using System.Text.Json;
using Amazon.ECS;
using Amazon.ECS.Model;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using Task = System.Threading.Tasks.Task;
namespace StellaOps.Agent.Ecs.Tasks;
/// <summary>
/// Task handler for scaling ECS services.
/// </summary>
public sealed class EcsScaleServiceTask : IEcsTask
{
private readonly IAmazonECS _ecsClient;
private readonly TimeProvider _timeProvider;
private readonly ILogger<EcsScaleServiceTask> _logger;
/// <summary>
/// Payload for scaling an ECS service.
/// </summary>
public sealed record ScaleServicePayload
{
/// <summary>
/// Name or ARN of the ECS cluster.
/// </summary>
public required string Cluster { get; init; }
/// <summary>
/// Name of the service to scale.
/// </summary>
public required string ServiceName { get; init; }
/// <summary>
/// Desired number of tasks.
/// </summary>
public required int DesiredCount { get; init; }
/// <summary>
/// Whether to wait for the scaling operation to complete.
/// </summary>
public bool WaitForStable { get; init; } = true;
/// <summary>
/// Timeout waiting for stabilization.
/// </summary>
public TimeSpan StabilizeTimeout { get; init; } = TimeSpan.FromMinutes(5);
}
/// <summary>
/// Creates a new ECS scale service task handler.
/// </summary>
public EcsScaleServiceTask(
IAmazonECS ecsClient,
TimeProvider timeProvider,
ILogger<EcsScaleServiceTask> logger)
{
_ecsClient = ecsClient;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
{
var payload = JsonSerializer.Deserialize<ScaleServicePayload>(task.Payload)
?? throw new InvalidEcsPayloadException("ecs.scale", "Failed to deserialize payload");
_logger.LogInformation(
"Scaling ECS service {Service} in cluster {Cluster} to {DesiredCount} tasks",
payload.ServiceName,
payload.Cluster,
payload.DesiredCount);
try
{
// Get current service state
var describeResponse = await _ecsClient.DescribeServicesAsync(new DescribeServicesRequest
{
Cluster = payload.Cluster,
Services = new List<string> { payload.ServiceName }
}, ct);
var currentService = describeResponse.Services.FirstOrDefault();
if (currentService is null)
{
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Service '{payload.ServiceName}' not found in cluster '{payload.Cluster}'",
CompletedAt = _timeProvider.GetUtcNow()
};
}
var previousCount = currentService.DesiredCount;
// Update desired count
var updateResponse = await _ecsClient.UpdateServiceAsync(new UpdateServiceRequest
{
Cluster = payload.Cluster,
Service = payload.ServiceName,
DesiredCount = payload.DesiredCount
}, ct);
if (updateResponse.Service is not { } service)
{
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Service update returned no service object",
CompletedAt = _timeProvider.GetUtcNow()
};
}
_logger.LogInformation(
"Updated ECS service {Service} desired count from {Previous} to {New}",
payload.ServiceName,
previousCount,
payload.DesiredCount);
if (!payload.WaitForStable)
{
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["serviceArn"] = service.ServiceArn ?? "",
["serviceName"] = service.ServiceName ?? "",
["previousDesiredCount"] = previousCount,
["newDesiredCount"] = payload.DesiredCount,
["runningCount"] = service.RunningCount,
["status"] = "SCALING"
},
CompletedAt = _timeProvider.GetUtcNow()
};
}
// Wait for stable
var stable = await WaitForServiceStableAsync(
payload.Cluster,
payload.ServiceName,
payload.DesiredCount,
payload.StabilizeTimeout,
ct);
return new AgentTaskResult
{
TaskId = task.Id,
Success = stable,
Error = stable ? null : "Service did not stabilize within timeout",
Outputs = new Dictionary<string, object>
{
["serviceArn"] = service.ServiceArn ?? "",
["serviceName"] = service.ServiceName ?? "",
["previousDesiredCount"] = previousCount,
["newDesiredCount"] = payload.DesiredCount,
["status"] = stable ? "STABLE" : "UNSTABLE"
},
CompletedAt = _timeProvider.GetUtcNow()
};
}
catch (AmazonECSException ex)
{
_logger.LogError(ex, "Failed to scale ECS service {Service}", payload.ServiceName);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to scale service: {ex.Message}",
CompletedAt = _timeProvider.GetUtcNow()
};
}
}
private async Task<bool> WaitForServiceStableAsync(
string cluster,
string serviceName,
int targetCount,
TimeSpan timeout,
CancellationToken ct)
{
using var timeoutCts = new CancellationTokenSource(timeout);
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
try
{
while (!linkedCts.IsCancellationRequested)
{
var response = await _ecsClient.DescribeServicesAsync(new DescribeServicesRequest
{
Cluster = cluster,
Services = new List<string> { serviceName }
}, linkedCts.Token);
var service = response.Services.FirstOrDefault();
if (service is null)
{
return false;
}
if (service.RunningCount == targetCount && service.Deployments.Count == 1)
{
_logger.LogInformation(
"Service {Service} scaled to {Count} running tasks",
serviceName,
targetCount);
return true;
}
_logger.LogDebug(
"Service {Service} scaling: running={Running}, desired={Desired}",
serviceName,
service.RunningCount,
targetCount);
await Task.Delay(TimeSpan.FromSeconds(5), linkedCts.Token);
}
}
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested)
{
_logger.LogWarning("Service scaling stabilization timed out after {Timeout}", timeout);
}
return false;
}
}

View File

@@ -0,0 +1,107 @@
using System.Text.Json;
using Amazon.ECS;
using Amazon.ECS.Model;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.Ecs.Tasks;
/// <summary>
/// Task handler for stopping ECS tasks.
/// </summary>
public sealed class EcsStopTaskTask : IEcsTask
{
private readonly IAmazonECS _ecsClient;
private readonly TimeProvider _timeProvider;
private readonly ILogger<EcsStopTaskTask> _logger;
/// <summary>
/// Payload for stopping an ECS task.
/// </summary>
public sealed record StopTaskPayload
{
/// <summary>
/// Name or ARN of the ECS cluster.
/// </summary>
public required string Cluster { get; init; }
/// <summary>
/// Task ARN or ID to stop.
/// </summary>
public required string TaskArn { get; init; }
/// <summary>
/// Reason for stopping the task.
/// </summary>
public string? Reason { get; init; }
}
/// <summary>
/// Creates a new ECS stop task handler.
/// </summary>
public EcsStopTaskTask(
IAmazonECS ecsClient,
TimeProvider timeProvider,
ILogger<EcsStopTaskTask> logger)
{
_ecsClient = ecsClient;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
{
var payload = JsonSerializer.Deserialize<StopTaskPayload>(task.Payload)
?? throw new InvalidEcsPayloadException("ecs.stop", "Failed to deserialize payload");
_logger.LogInformation(
"Stopping ECS task {TaskArn} in cluster {Cluster}",
payload.TaskArn,
payload.Cluster);
try
{
var request = new StopTaskRequest
{
Cluster = payload.Cluster,
Task = payload.TaskArn,
Reason = payload.Reason ?? "Stopped by Stella Agent"
};
var response = await _ecsClient.StopTaskAsync(request, ct);
var stoppedTask = response.Task;
_logger.LogInformation(
"Stopped ECS task {TaskArn}, last status: {Status}",
stoppedTask.TaskArn,
stoppedTask.LastStatus);
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["taskArn"] = stoppedTask.TaskArn ?? "",
["lastStatus"] = stoppedTask.LastStatus ?? "",
["stoppedReason"] = stoppedTask.StoppedReason ?? payload.Reason ?? "Stopped by agent",
["stoppedAt"] = stoppedTask.StoppedAt.GetValueOrDefault().ToUniversalTime().ToString("o", System.Globalization.CultureInfo.InvariantCulture)
},
CompletedAt = _timeProvider.GetUtcNow()
};
}
catch (AmazonECSException ex)
{
_logger.LogError(ex, "Failed to stop ECS task {TaskArn}", payload.TaskArn);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to stop task: {ex.Message}",
CompletedAt = _timeProvider.GetUtcNow()
};
}
}
}

View File

@@ -0,0 +1,137 @@
namespace StellaOps.Agent.Nomad.Client;
/// <summary>
/// Interface for Nomad API client.
/// </summary>
public interface INomadClient : IDisposable
{
/// <summary>
/// Gets agent self information.
/// </summary>
Task<NomadAgentSelf> GetAgentSelfAsync(CancellationToken ct = default);
/// <summary>
/// Lists all jobs.
/// </summary>
Task<IReadOnlyList<NomadJobListItem>> ListJobsAsync(
string? ns = null,
string? region = null,
CancellationToken ct = default);
/// <summary>
/// Gets a job by ID.
/// </summary>
Task<NomadJob?> GetJobAsync(
string jobId,
string? ns = null,
string? region = null,
CancellationToken ct = default);
/// <summary>
/// Registers (creates or updates) a job.
/// </summary>
Task<NomadJobRegisterResponse> RegisterJobAsync(
NomadJob job,
string? ns = null,
string? region = null,
CancellationToken ct = default);
/// <summary>
/// Parses a job specification (HCL or JSON).
/// </summary>
Task<NomadJob> ParseJobAsync(
string jobSpec,
bool canonicalize = true,
CancellationToken ct = default);
/// <summary>
/// Stops (deregisters) a job.
/// </summary>
Task<NomadJobDeregisterResponse> StopJobAsync(
string jobId,
bool purge = false,
bool global = false,
string? ns = null,
string? region = null,
CancellationToken ct = default);
/// <summary>
/// Scales a job task group.
/// </summary>
Task<NomadJobScaleResponse> ScaleJobAsync(
string jobId,
string taskGroup,
int count,
string? reason = null,
string? ns = null,
string? region = null,
CancellationToken ct = default);
/// <summary>
/// Dispatches a parameterized batch job.
/// </summary>
Task<NomadJobDispatchResponse> DispatchJobAsync(
string jobId,
IReadOnlyDictionary<string, string>? meta = null,
string? payload = null,
string? ns = null,
string? region = null,
CancellationToken ct = default);
/// <summary>
/// Gets allocations for a job.
/// </summary>
Task<IReadOnlyList<NomadAllocation>> GetJobAllocationsAsync(
string jobId,
string? ns = null,
string? region = null,
CancellationToken ct = default);
/// <summary>
/// Gets deployments for a job.
/// </summary>
Task<IReadOnlyList<NomadDeployment>> GetJobDeploymentsAsync(
string jobId,
string? ns = null,
string? region = null,
CancellationToken ct = default);
/// <summary>
/// Gets a specific deployment.
/// </summary>
Task<NomadDeployment?> GetDeploymentAsync(
string deploymentId,
string? ns = null,
string? region = null,
CancellationToken ct = default);
/// <summary>
/// Gets an evaluation by ID.
/// </summary>
Task<NomadEvaluation?> GetEvaluationAsync(
string evalId,
string? ns = null,
string? region = null,
CancellationToken ct = default);
/// <summary>
/// Gets an allocation by ID.
/// </summary>
Task<NomadAllocation?> GetAllocationAsync(
string allocId,
string? ns = null,
string? region = null,
CancellationToken ct = default);
/// <summary>
/// Gets logs for an allocation task.
/// </summary>
Task<Stream> GetAllocationLogsAsync(
string allocId,
string taskName,
string logType,
bool follow = false,
int? offset = null,
string? origin = null,
CancellationToken ct = default);
}

View File

@@ -0,0 +1,349 @@
using System.Net.Http.Json;
using System.Text.Json;
using System.Text.Json.Serialization;
using Microsoft.Extensions.Logging;
namespace StellaOps.Agent.Nomad.Client;
/// <summary>
/// HTTP client wrapper for Nomad API.
/// </summary>
public sealed class NomadClient : INomadClient
{
private readonly HttpClient _httpClient;
private readonly ILogger<NomadClient> _logger;
private readonly JsonSerializerOptions _jsonOptions;
/// <summary>
/// Creates a new Nomad client.
/// </summary>
/// <param name="httpClient">The HTTP client.</param>
/// <param name="logger">Logger instance.</param>
public NomadClient(HttpClient httpClient, ILogger<NomadClient> logger)
{
_httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_jsonOptions = new JsonSerializerOptions
{
PropertyNameCaseInsensitive = true,
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
}
/// <summary>
/// Gets agent self information.
/// </summary>
public async Task<NomadAgentSelf> GetAgentSelfAsync(CancellationToken ct = default)
{
var response = await _httpClient.GetAsync("/v1/agent/self", ct);
await EnsureSuccessAsync(response, ct);
return await response.Content.ReadFromJsonAsync<NomadAgentSelf>(_jsonOptions, ct)
?? throw new NomadApiException("Failed to deserialize agent self response");
}
/// <summary>
/// Lists all jobs.
/// </summary>
public async Task<IReadOnlyList<NomadJobListItem>> ListJobsAsync(
string? ns = null,
string? region = null,
CancellationToken ct = default)
{
var url = BuildUrl("/v1/jobs", ns, region);
var response = await _httpClient.GetAsync(url, ct);
await EnsureSuccessAsync(response, ct);
return await response.Content.ReadFromJsonAsync<IReadOnlyList<NomadJobListItem>>(_jsonOptions, ct)
?? [];
}
/// <summary>
/// Gets a job by ID.
/// </summary>
public async Task<NomadJob?> GetJobAsync(
string jobId,
string? ns = null,
string? region = null,
CancellationToken ct = default)
{
var url = BuildUrl($"/v1/job/{Uri.EscapeDataString(jobId)}", ns, region);
var response = await _httpClient.GetAsync(url, ct);
if (response.StatusCode == System.Net.HttpStatusCode.NotFound)
{
return null;
}
await EnsureSuccessAsync(response, ct);
return await response.Content.ReadFromJsonAsync<NomadJob>(_jsonOptions, ct);
}
/// <summary>
/// Registers (creates or updates) a job.
/// </summary>
public async Task<NomadJobRegisterResponse> RegisterJobAsync(
NomadJob job,
string? ns = null,
string? region = null,
CancellationToken ct = default)
{
var url = BuildUrl("/v1/jobs", ns, region);
var request = new NomadJobRegisterRequest { Job = job };
var response = await _httpClient.PostAsJsonAsync(url, request, _jsonOptions, ct);
await EnsureSuccessAsync(response, ct);
return await response.Content.ReadFromJsonAsync<NomadJobRegisterResponse>(_jsonOptions, ct)
?? throw new NomadApiException("Failed to deserialize job register response");
}
/// <summary>
/// Parses a job specification (HCL or JSON).
/// </summary>
public async Task<NomadJob> ParseJobAsync(
string jobSpec,
bool canonicalize = true,
CancellationToken ct = default)
{
var url = "/v1/jobs/parse";
var request = new NomadJobParseRequest
{
JobHCL = jobSpec,
Canonicalize = canonicalize
};
var response = await _httpClient.PostAsJsonAsync(url, request, _jsonOptions, ct);
await EnsureSuccessAsync(response, ct);
return await response.Content.ReadFromJsonAsync<NomadJob>(_jsonOptions, ct)
?? throw new NomadApiException("Failed to deserialize parsed job");
}
/// <summary>
/// Stops (deregisters) a job.
/// </summary>
public async Task<NomadJobDeregisterResponse> StopJobAsync(
string jobId,
bool purge = false,
bool global = false,
string? ns = null,
string? region = null,
CancellationToken ct = default)
{
var url = BuildUrl($"/v1/job/{Uri.EscapeDataString(jobId)}", ns, region);
if (purge) url += url.Contains('?') ? "&purge=true" : "?purge=true";
if (global) url += url.Contains('?') ? "&global=true" : "?global=true";
var response = await _httpClient.DeleteAsync(url, ct);
await EnsureSuccessAsync(response, ct);
return await response.Content.ReadFromJsonAsync<NomadJobDeregisterResponse>(_jsonOptions, ct)
?? throw new NomadApiException("Failed to deserialize job deregister response");
}
/// <summary>
/// Scales a job task group.
/// </summary>
public async Task<NomadJobScaleResponse> ScaleJobAsync(
string jobId,
string taskGroup,
int count,
string? reason = null,
string? ns = null,
string? region = null,
CancellationToken ct = default)
{
var url = BuildUrl($"/v1/job/{Uri.EscapeDataString(jobId)}/scale", ns, region);
var request = new NomadJobScaleRequest
{
Count = count,
Target = new Dictionary<string, string> { ["Group"] = taskGroup },
Message = reason
};
var response = await _httpClient.PostAsJsonAsync(url, request, _jsonOptions, ct);
await EnsureSuccessAsync(response, ct);
return await response.Content.ReadFromJsonAsync<NomadJobScaleResponse>(_jsonOptions, ct)
?? throw new NomadApiException("Failed to deserialize job scale response");
}
/// <summary>
/// Dispatches a parameterized batch job.
/// </summary>
public async Task<NomadJobDispatchResponse> DispatchJobAsync(
string jobId,
IReadOnlyDictionary<string, string>? meta = null,
string? payload = null,
string? ns = null,
string? region = null,
CancellationToken ct = default)
{
var url = BuildUrl($"/v1/job/{Uri.EscapeDataString(jobId)}/dispatch", ns, region);
var request = new NomadJobDispatchRequest
{
Meta = meta?.ToDictionary(kv => kv.Key, kv => kv.Value),
Payload = payload is not null ? Convert.ToBase64String(System.Text.Encoding.UTF8.GetBytes(payload)) : null
};
var response = await _httpClient.PostAsJsonAsync(url, request, _jsonOptions, ct);
await EnsureSuccessAsync(response, ct);
return await response.Content.ReadFromJsonAsync<NomadJobDispatchResponse>(_jsonOptions, ct)
?? throw new NomadApiException("Failed to deserialize job dispatch response");
}
/// <summary>
/// Gets allocations for a job.
/// </summary>
public async Task<IReadOnlyList<NomadAllocation>> GetJobAllocationsAsync(
string jobId,
string? ns = null,
string? region = null,
CancellationToken ct = default)
{
var url = BuildUrl($"/v1/job/{Uri.EscapeDataString(jobId)}/allocations", ns, region);
var response = await _httpClient.GetAsync(url, ct);
await EnsureSuccessAsync(response, ct);
return await response.Content.ReadFromJsonAsync<IReadOnlyList<NomadAllocation>>(_jsonOptions, ct)
?? [];
}
/// <summary>
/// Gets deployments for a job.
/// </summary>
public async Task<IReadOnlyList<NomadDeployment>> GetJobDeploymentsAsync(
string jobId,
string? ns = null,
string? region = null,
CancellationToken ct = default)
{
var url = BuildUrl($"/v1/job/{Uri.EscapeDataString(jobId)}/deployments", ns, region);
var response = await _httpClient.GetAsync(url, ct);
await EnsureSuccessAsync(response, ct);
return await response.Content.ReadFromJsonAsync<IReadOnlyList<NomadDeployment>>(_jsonOptions, ct)
?? [];
}
/// <summary>
/// Gets a specific deployment.
/// </summary>
public async Task<NomadDeployment?> GetDeploymentAsync(
string deploymentId,
string? ns = null,
string? region = null,
CancellationToken ct = default)
{
var url = BuildUrl($"/v1/deployment/{Uri.EscapeDataString(deploymentId)}", ns, region);
var response = await _httpClient.GetAsync(url, ct);
if (response.StatusCode == System.Net.HttpStatusCode.NotFound)
{
return null;
}
await EnsureSuccessAsync(response, ct);
return await response.Content.ReadFromJsonAsync<NomadDeployment>(_jsonOptions, ct);
}
/// <summary>
/// Gets an evaluation by ID.
/// </summary>
public async Task<NomadEvaluation?> GetEvaluationAsync(
string evalId,
string? ns = null,
string? region = null,
CancellationToken ct = default)
{
var url = BuildUrl($"/v1/evaluation/{Uri.EscapeDataString(evalId)}", ns, region);
var response = await _httpClient.GetAsync(url, ct);
if (response.StatusCode == System.Net.HttpStatusCode.NotFound)
{
return null;
}
await EnsureSuccessAsync(response, ct);
return await response.Content.ReadFromJsonAsync<NomadEvaluation>(_jsonOptions, ct);
}
/// <summary>
/// Gets an allocation by ID.
/// </summary>
public async Task<NomadAllocation?> GetAllocationAsync(
string allocId,
string? ns = null,
string? region = null,
CancellationToken ct = default)
{
var url = BuildUrl($"/v1/allocation/{Uri.EscapeDataString(allocId)}", ns, region);
var response = await _httpClient.GetAsync(url, ct);
if (response.StatusCode == System.Net.HttpStatusCode.NotFound)
{
return null;
}
await EnsureSuccessAsync(response, ct);
return await response.Content.ReadFromJsonAsync<NomadAllocation>(_jsonOptions, ct);
}
/// <summary>
/// Gets logs for an allocation task.
/// </summary>
public async Task<Stream> GetAllocationLogsAsync(
string allocId,
string taskName,
string logType,
bool follow = false,
int? offset = null,
string? origin = null,
CancellationToken ct = default)
{
var url = $"/v1/client/fs/logs/{Uri.EscapeDataString(allocId)}?task={Uri.EscapeDataString(taskName)}&type={Uri.EscapeDataString(logType)}";
if (follow) url += "&follow=true";
if (offset.HasValue) url += $"&offset={offset.Value}";
if (origin is not null) url += $"&origin={Uri.EscapeDataString(origin)}";
var response = await _httpClient.GetAsync(url, HttpCompletionOption.ResponseHeadersRead, ct);
await EnsureSuccessAsync(response, ct);
return await response.Content.ReadAsStreamAsync(ct);
}
private static string BuildUrl(string path, string? ns, string? region)
{
var url = path;
var hasQuery = false;
if (!string.IsNullOrEmpty(ns))
{
url += $"?namespace={Uri.EscapeDataString(ns)}";
hasQuery = true;
}
if (!string.IsNullOrEmpty(region))
{
url += hasQuery ? $"&region={Uri.EscapeDataString(region)}" : $"?region={Uri.EscapeDataString(region)}";
}
return url;
}
private static async Task EnsureSuccessAsync(HttpResponseMessage response, CancellationToken ct)
{
if (!response.IsSuccessStatusCode)
{
var content = await response.Content.ReadAsStringAsync(ct);
throw new NomadApiException(
$"Nomad API returned {(int)response.StatusCode} {response.StatusCode}: {content}",
(int)response.StatusCode);
}
}
/// <inheritdoc />
public void Dispose()
{
_httpClient.Dispose();
}
}

View File

@@ -0,0 +1,576 @@
using System.Text.Json.Serialization;
namespace StellaOps.Agent.Nomad.Client;
/// <summary>
/// Nomad agent self information.
/// </summary>
public sealed record NomadAgentSelf
{
public NomadAgentConfig? Config { get; init; }
public NomadAgentMember? Member { get; init; }
public IReadOnlyDictionary<string, IReadOnlyDictionary<string, string>>? Stats { get; init; }
}
/// <summary>
/// Nomad agent configuration.
/// </summary>
public sealed record NomadAgentConfig
{
public string? Region { get; init; }
public string? Datacenter { get; init; }
public string? NodeName { get; init; }
public string? Version { get; init; }
}
/// <summary>
/// Nomad agent member information.
/// </summary>
public sealed record NomadAgentMember
{
public string? Name { get; init; }
public string? Addr { get; init; }
public int? Port { get; init; }
public string? Status { get; init; }
}
/// <summary>
/// Nomad job list item.
/// </summary>
public sealed record NomadJobListItem
{
public string? ID { get; init; }
public string? Name { get; init; }
public string? Namespace { get; init; }
public string? Type { get; init; }
public string? Status { get; init; }
public string? StatusDescription { get; init; }
public int? Priority { get; init; }
public IReadOnlyList<string>? Datacenters { get; init; }
[JsonPropertyName("SubmitTime")]
public long? SubmitTimeNanos { get; init; }
public long? ModifyIndex { get; init; }
public long? JobModifyIndex { get; init; }
}
/// <summary>
/// Nomad job definition.
/// </summary>
public sealed record NomadJob
{
public string? ID { get; init; }
public string? Name { get; init; }
public string? Namespace { get; init; }
public string? Region { get; init; }
public string? Type { get; init; }
public int? Priority { get; init; }
public bool? AllAtOnce { get; init; }
public IReadOnlyList<string>? Datacenters { get; init; }
public IReadOnlyList<NomadTaskGroup>? TaskGroups { get; init; }
public NomadUpdateStrategy? Update { get; init; }
public IReadOnlyList<NomadConstraint>? Constraints { get; init; }
public IReadOnlyList<NomadAffinity>? Affinities { get; init; }
public IReadOnlyDictionary<string, string>? Meta { get; init; }
public string? Status { get; init; }
public string? StatusDescription { get; init; }
public bool? Stable { get; init; }
public long? Version { get; init; }
public long? SubmitTime { get; init; }
public long? CreateIndex { get; init; }
public long? ModifyIndex { get; init; }
public long? JobModifyIndex { get; init; }
}
/// <summary>
/// Nomad task group definition.
/// </summary>
public sealed record NomadTaskGroup
{
public string? Name { get; init; }
public int? Count { get; init; }
public IReadOnlyList<NomadTask>? Tasks { get; init; }
public IReadOnlyList<NomadNetwork>? Networks { get; init; }
public IReadOnlyList<NomadService>? Services { get; init; }
public NomadRestartPolicy? RestartPolicy { get; init; }
public NomadReschedulePolicy? ReschedulePolicy { get; init; }
public NomadEphemeralDisk? EphemeralDisk { get; init; }
public NomadUpdateStrategy? Update { get; init; }
public IReadOnlyList<NomadConstraint>? Constraints { get; init; }
public IReadOnlyDictionary<string, string>? Meta { get; init; }
}
/// <summary>
/// Nomad task definition.
/// </summary>
public sealed record NomadTask
{
public string? Name { get; init; }
public string? Driver { get; init; }
public IReadOnlyDictionary<string, object>? Config { get; init; }
public NomadResources? Resources { get; init; }
public IReadOnlyDictionary<string, string>? Env { get; init; }
public IReadOnlyList<NomadTemplate>? Templates { get; init; }
public IReadOnlyList<NomadArtifact>? Artifacts { get; init; }
public NomadLogConfig? LogConfig { get; init; }
public bool? Leader { get; init; }
public string? KillSignal { get; init; }
[JsonPropertyName("KillTimeout")]
public long? KillTimeoutNanos { get; init; }
}
/// <summary>
/// Nomad update strategy.
/// </summary>
public sealed record NomadUpdateStrategy
{
public int? MaxParallel { get; init; }
public string? HealthCheck { get; init; }
[JsonPropertyName("MinHealthyTime")]
public long? MinHealthyTimeNanos { get; init; }
[JsonPropertyName("HealthyDeadline")]
public long? HealthyDeadlineNanos { get; init; }
[JsonPropertyName("ProgressDeadline")]
public long? ProgressDeadlineNanos { get; init; }
public bool? AutoRevert { get; init; }
public bool? AutoPromote { get; init; }
public int? Canary { get; init; }
[JsonPropertyName("Stagger")]
public long? StaggerNanos { get; init; }
}
/// <summary>
/// Nomad resource requirements.
/// </summary>
public sealed record NomadResources
{
public int? CPU { get; init; }
public int? MemoryMB { get; init; }
public int? MemoryMaxMB { get; init; }
public int? DiskMB { get; init; }
public IReadOnlyList<NomadNetworkResource>? Networks { get; init; }
}
/// <summary>
/// Nomad network resource.
/// </summary>
public sealed record NomadNetworkResource
{
public string? Mode { get; init; }
public string? CIDR { get; init; }
public int? MBits { get; init; }
public IReadOnlyList<NomadPort>? ReservedPorts { get; init; }
public IReadOnlyList<NomadPort>? DynamicPorts { get; init; }
}
/// <summary>
/// Nomad port definition.
/// </summary>
public sealed record NomadPort
{
public string? Label { get; init; }
public int? Value { get; init; }
public int? To { get; init; }
public string? HostNetwork { get; init; }
}
/// <summary>
/// Nomad network definition.
/// </summary>
public sealed record NomadNetwork
{
public string? Mode { get; init; }
public string? Device { get; init; }
public string? CIDR { get; init; }
public string? IP { get; init; }
public IReadOnlyList<NomadPort>? ReservedPorts { get; init; }
public IReadOnlyList<NomadPort>? DynamicPorts { get; init; }
public NomadDNSConfig? DNS { get; init; }
}
/// <summary>
/// Nomad DNS configuration.
/// </summary>
public sealed record NomadDNSConfig
{
public IReadOnlyList<string>? Servers { get; init; }
public IReadOnlyList<string>? Searches { get; init; }
public IReadOnlyList<string>? Options { get; init; }
}
/// <summary>
/// Nomad service definition.
/// </summary>
public sealed record NomadService
{
public string? Name { get; init; }
public string? PortLabel { get; init; }
public IReadOnlyList<string>? Tags { get; init; }
public IReadOnlyList<string>? CanaryTags { get; init; }
public IReadOnlyList<NomadServiceCheck>? Checks { get; init; }
public NomadConsulConnect? Connect { get; init; }
public IReadOnlyDictionary<string, string>? Meta { get; init; }
}
/// <summary>
/// Nomad service check.
/// </summary>
public sealed record NomadServiceCheck
{
public string? Name { get; init; }
public string? Type { get; init; }
public string? Path { get; init; }
public string? Protocol { get; init; }
public string? PortLabel { get; init; }
[JsonPropertyName("Interval")]
public long? IntervalNanos { get; init; }
[JsonPropertyName("Timeout")]
public long? TimeoutNanos { get; init; }
public string? InitialStatus { get; init; }
public int? SuccessBeforePassing { get; init; }
public int? FailuresBeforeCritical { get; init; }
}
/// <summary>
/// Nomad Consul Connect configuration.
/// </summary>
public sealed record NomadConsulConnect
{
public bool? Native { get; init; }
public NomadConsulSidecarService? SidecarService { get; init; }
}
/// <summary>
/// Nomad Consul sidecar service.
/// </summary>
public sealed record NomadConsulSidecarService
{
public IReadOnlyList<string>? Tags { get; init; }
public int? Port { get; init; }
public NomadConsulProxy? Proxy { get; init; }
}
/// <summary>
/// Nomad Consul proxy configuration.
/// </summary>
public sealed record NomadConsulProxy
{
public string? LocalServiceAddress { get; init; }
public int? LocalServicePort { get; init; }
public IReadOnlyList<NomadConsulUpstream>? Upstreams { get; init; }
}
/// <summary>
/// Nomad Consul upstream.
/// </summary>
public sealed record NomadConsulUpstream
{
public string? DestinationName { get; init; }
public int? LocalBindPort { get; init; }
}
/// <summary>
/// Nomad template definition.
/// </summary>
public sealed record NomadTemplate
{
public string? SourcePath { get; init; }
public string? DestPath { get; init; }
public string? EmbeddedTmpl { get; init; }
public string? ChangeMode { get; init; }
public string? ChangeSignal { get; init; }
[JsonPropertyName("Splay")]
public long? SplayNanos { get; init; }
public string? Perms { get; init; }
public string? LeftDelim { get; init; }
public string? RightDelim { get; init; }
public bool? Envvars { get; init; }
}
/// <summary>
/// Nomad artifact definition.
/// </summary>
public sealed record NomadArtifact
{
public string? GetterSource { get; init; }
public string? GetterMode { get; init; }
public IReadOnlyDictionary<string, string>? GetterOptions { get; init; }
public IReadOnlyDictionary<string, string>? GetterHeaders { get; init; }
public string? RelativeDest { get; init; }
}
/// <summary>
/// Nomad log configuration.
/// </summary>
public sealed record NomadLogConfig
{
public int? MaxFiles { get; init; }
public int? MaxFileSizeMB { get; init; }
}
/// <summary>
/// Nomad restart policy.
/// </summary>
public sealed record NomadRestartPolicy
{
public int? Attempts { get; init; }
[JsonPropertyName("Interval")]
public long? IntervalNanos { get; init; }
[JsonPropertyName("Delay")]
public long? DelayNanos { get; init; }
public string? Mode { get; init; }
}
/// <summary>
/// Nomad reschedule policy.
/// </summary>
public sealed record NomadReschedulePolicy
{
public int? Attempts { get; init; }
[JsonPropertyName("Interval")]
public long? IntervalNanos { get; init; }
[JsonPropertyName("Delay")]
public long? DelayNanos { get; init; }
public string? DelayFunction { get; init; }
[JsonPropertyName("MaxDelay")]
public long? MaxDelayNanos { get; init; }
public bool? Unlimited { get; init; }
}
/// <summary>
/// Nomad ephemeral disk configuration.
/// </summary>
public sealed record NomadEphemeralDisk
{
public bool? Sticky { get; init; }
public bool? Migrate { get; init; }
public int? SizeMB { get; init; }
}
/// <summary>
/// Nomad constraint.
/// </summary>
public sealed record NomadConstraint
{
public string? LTarget { get; init; }
public string? RTarget { get; init; }
public string? Operand { get; init; }
}
/// <summary>
/// Nomad affinity.
/// </summary>
public sealed record NomadAffinity
{
public string? LTarget { get; init; }
public string? RTarget { get; init; }
public string? Operand { get; init; }
public int? Weight { get; init; }
}
/// <summary>
/// Nomad allocation.
/// </summary>
public sealed record NomadAllocation
{
public string? ID { get; init; }
public string? EvalID { get; init; }
public string? Name { get; init; }
public string? Namespace { get; init; }
public string? NodeID { get; init; }
public string? NodeName { get; init; }
public string? JobID { get; init; }
public string? TaskGroup { get; init; }
public string? DesiredStatus { get; init; }
public string? DesiredDescription { get; init; }
public string? ClientStatus { get; init; }
public string? ClientDescription { get; init; }
public IReadOnlyDictionary<string, NomadTaskState>? TaskStates { get; init; }
public NomadDeploymentStatus? DeploymentStatus { get; init; }
public long? CreateIndex { get; init; }
public long? ModifyIndex { get; init; }
public long? CreateTime { get; init; }
public long? ModifyTime { get; init; }
}
/// <summary>
/// Nomad task state.
/// </summary>
public sealed record NomadTaskState
{
public string? State { get; init; }
public bool? Failed { get; init; }
public int? Restarts { get; init; }
public string? LastRestart { get; init; }
public long? StartedAt { get; init; }
public long? FinishedAt { get; init; }
public IReadOnlyList<NomadTaskEvent>? Events { get; init; }
}
/// <summary>
/// Nomad task event.
/// </summary>
public sealed record NomadTaskEvent
{
public string? Type { get; init; }
public long? Time { get; init; }
public string? Message { get; init; }
public string? DisplayMessage { get; init; }
public IReadOnlyDictionary<string, string>? Details { get; init; }
public int? ExitCode { get; init; }
public int? Signal { get; init; }
public bool? KillError { get; init; }
public string? KillReason { get; init; }
}
/// <summary>
/// Nomad deployment status for an allocation.
/// </summary>
public sealed record NomadDeploymentStatus
{
public bool? Healthy { get; init; }
public bool? Canary { get; init; }
public long? Timestamp { get; init; }
public int? ModifyIndex { get; init; }
}
/// <summary>
/// Nomad deployment.
/// </summary>
public sealed record NomadDeployment
{
public string? ID { get; init; }
public string? Namespace { get; init; }
public string? JobID { get; init; }
public long? JobVersion { get; init; }
public long? JobModifyIndex { get; init; }
public long? JobCreateIndex { get; init; }
public string? Status { get; init; }
public string? StatusDescription { get; init; }
public IReadOnlyDictionary<string, NomadDeploymentState>? TaskGroups { get; init; }
public long? CreateIndex { get; init; }
public long? ModifyIndex { get; init; }
}
/// <summary>
/// Nomad deployment state for a task group.
/// </summary>
public sealed record NomadDeploymentState
{
public bool? AutoRevert { get; init; }
public bool? AutoPromote { get; init; }
public bool? Promoted { get; init; }
public int? DesiredCanaries { get; init; }
public int? DesiredTotal { get; init; }
public int? PlacedCanaries { get; init; }
public IReadOnlyList<string>? PlacedAllocs { get; init; }
public int? HealthyAllocs { get; init; }
public int? UnhealthyAllocs { get; init; }
[JsonPropertyName("RequireProgressBy")]
public long? RequireProgressByNanos { get; init; }
}
/// <summary>
/// Nomad evaluation.
/// </summary>
public sealed record NomadEvaluation
{
public string? ID { get; init; }
public string? Namespace { get; init; }
public int? Priority { get; init; }
public string? Type { get; init; }
public string? TriggeredBy { get; init; }
public string? JobID { get; init; }
public long? JobModifyIndex { get; init; }
public string? Status { get; init; }
public string? StatusDescription { get; init; }
public string? NextEval { get; init; }
public string? PreviousEval { get; init; }
public string? BlockedEval { get; init; }
public long? CreateIndex { get; init; }
public long? ModifyIndex { get; init; }
public long? CreateTime { get; init; }
public long? ModifyTime { get; init; }
}
// Request/Response models
/// <summary>
/// Nomad job register request.
/// </summary>
public sealed record NomadJobRegisterRequest
{
public NomadJob? Job { get; init; }
public bool? EnforceIndex { get; init; }
public long? JobModifyIndex { get; init; }
public bool? PolicyOverride { get; init; }
public bool? PreserveCounts { get; init; }
}
/// <summary>
/// Nomad job register response.
/// </summary>
public sealed record NomadJobRegisterResponse
{
public string? EvalID { get; init; }
public long? EvalCreateIndex { get; init; }
public long? JobModifyIndex { get; init; }
public IReadOnlyList<string>? Warnings { get; init; }
}
/// <summary>
/// Nomad job parse request.
/// </summary>
public sealed record NomadJobParseRequest
{
public string? JobHCL { get; init; }
public bool? Canonicalize { get; init; }
}
/// <summary>
/// Nomad job deregister response.
/// </summary>
public sealed record NomadJobDeregisterResponse
{
public string? EvalID { get; init; }
public long? EvalCreateIndex { get; init; }
public long? JobModifyIndex { get; init; }
}
/// <summary>
/// Nomad job scale request.
/// </summary>
public sealed record NomadJobScaleRequest
{
public int? Count { get; init; }
public IReadOnlyDictionary<string, string>? Target { get; init; }
public string? Message { get; init; }
public bool? PolicyOverride { get; init; }
}
/// <summary>
/// Nomad job scale response.
/// </summary>
public sealed record NomadJobScaleResponse
{
public string? EvalID { get; init; }
public long? EvalCreateIndex { get; init; }
}
/// <summary>
/// Nomad job dispatch request.
/// </summary>
public sealed record NomadJobDispatchRequest
{
public string? Payload { get; init; }
public Dictionary<string, string>? Meta { get; init; }
}
/// <summary>
/// Nomad job dispatch response.
/// </summary>
public sealed record NomadJobDispatchResponse
{
public string? DispatchedJobID { get; init; }
public string? EvalID { get; init; }
public long? EvalCreateIndex { get; init; }
}

View File

@@ -0,0 +1,203 @@
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Capability;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.Nomad.Client;
using StellaOps.Agent.Nomad.Tasks;
namespace StellaOps.Agent.Nomad;
/// <summary>
/// Agent capability for managing HashiCorp Nomad jobs and allocations.
/// </summary>
public sealed class NomadCapability : IAgentCapability, IDisposable
{
private readonly INomadClient _nomadClient;
private readonly TimeProvider _timeProvider;
private readonly ILoggerFactory _loggerFactory;
private readonly ILogger<NomadCapability> _logger;
private readonly Dictionary<string, Func<AgentTaskInfo, CancellationToken, Task<AgentTaskResult>>> _taskHandlers;
/// <summary>
/// Gets the capability name.
/// </summary>
public string Name => "nomad";
/// <summary>
/// Gets the capability version.
/// </summary>
public string Version => "1.0.0";
/// <summary>
/// Gets the supported task types.
/// </summary>
public IReadOnlyList<string> SupportedTaskTypes { get; } = new[]
{
"nomad.deploy",
"nomad.stop",
"nomad.scale",
"nomad.dispatch",
"nomad.status",
"nomad.health"
};
/// <summary>
/// Creates a new Nomad capability.
/// </summary>
/// <param name="nomadClient">The Nomad client.</param>
/// <param name="timeProvider">Time provider for timestamps.</param>
/// <param name="loggerFactory">Logger factory.</param>
public NomadCapability(
INomadClient nomadClient,
TimeProvider timeProvider,
ILoggerFactory loggerFactory)
{
_nomadClient = nomadClient ?? throw new ArgumentNullException(nameof(nomadClient));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_loggerFactory = loggerFactory ?? throw new ArgumentNullException(nameof(loggerFactory));
_logger = loggerFactory.CreateLogger<NomadCapability>();
_taskHandlers = new Dictionary<string, Func<AgentTaskInfo, CancellationToken, Task<AgentTaskResult>>>
{
["nomad.deploy"] = ExecuteDeployAsync,
["nomad.stop"] = ExecuteStopAsync,
["nomad.scale"] = ExecuteScaleAsync,
["nomad.dispatch"] = ExecuteDispatchAsync,
["nomad.status"] = ExecuteStatusAsync,
["nomad.health"] = ExecuteHealthCheckAsync
};
}
/// <inheritdoc />
public async Task<bool> InitializeAsync(CancellationToken ct = default)
{
try
{
var self = await _nomadClient.GetAgentSelfAsync(ct);
var config = self.Config;
_logger.LogInformation(
"Nomad capability initialized, connected to {Region}/{Datacenter} (version {Version})",
config?.Region ?? "unknown",
config?.Datacenter ?? "unknown",
config?.Version ?? "unknown");
return true;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to initialize Nomad capability - Nomad agent not accessible");
return false;
}
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
{
if (!_taskHandlers.TryGetValue(task.TaskType, out var handler))
{
throw new InvalidNomadPayloadException(task.TaskType, "Unsupported task type");
}
var startTime = _timeProvider.GetUtcNow();
try
{
var result = await handler(task, ct);
return result with
{
Duration = _timeProvider.GetUtcNow() - startTime
};
}
catch (InvalidNomadPayloadException)
{
throw;
}
catch (Exception ex)
{
_logger.LogError(ex, "Nomad task {TaskType} failed", task.TaskType);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = ex.Message,
CompletedAt = _timeProvider.GetUtcNow(),
Duration = _timeProvider.GetUtcNow() - startTime
};
}
}
/// <inheritdoc />
public async Task<CapabilityHealthStatus> CheckHealthAsync(CancellationToken ct = default)
{
try
{
var self = await _nomadClient.GetAgentSelfAsync(ct);
var region = self.Config?.Region ?? "unknown";
return new CapabilityHealthStatus(true, $"Nomad capability ready ({region})");
}
catch (Exception ex)
{
return new CapabilityHealthStatus(false, $"Nomad agent not accessible: {ex.Message}");
}
}
private async Task<AgentTaskResult> ExecuteDeployAsync(AgentTaskInfo task, CancellationToken ct)
{
var taskHandler = new NomadDeployJobTask(
_nomadClient,
_timeProvider,
_loggerFactory.CreateLogger<NomadDeployJobTask>());
return await taskHandler.ExecuteAsync(task, ct);
}
private async Task<AgentTaskResult> ExecuteStopAsync(AgentTaskInfo task, CancellationToken ct)
{
var taskHandler = new NomadStopJobTask(
_nomadClient,
_timeProvider,
_loggerFactory.CreateLogger<NomadStopJobTask>());
return await taskHandler.ExecuteAsync(task, ct);
}
private async Task<AgentTaskResult> ExecuteScaleAsync(AgentTaskInfo task, CancellationToken ct)
{
var taskHandler = new NomadScaleJobTask(
_nomadClient,
_timeProvider,
_loggerFactory.CreateLogger<NomadScaleJobTask>());
return await taskHandler.ExecuteAsync(task, ct);
}
private async Task<AgentTaskResult> ExecuteDispatchAsync(AgentTaskInfo task, CancellationToken ct)
{
var taskHandler = new NomadDispatchJobTask(
_nomadClient,
_timeProvider,
_loggerFactory.CreateLogger<NomadDispatchJobTask>());
return await taskHandler.ExecuteAsync(task, ct);
}
private async Task<AgentTaskResult> ExecuteStatusAsync(AgentTaskInfo task, CancellationToken ct)
{
var taskHandler = new NomadJobStatusTask(
_nomadClient,
_timeProvider,
_loggerFactory.CreateLogger<NomadJobStatusTask>());
return await taskHandler.ExecuteAsync(task, ct);
}
private async Task<AgentTaskResult> ExecuteHealthCheckAsync(AgentTaskInfo task, CancellationToken ct)
{
var taskHandler = new NomadHealthCheckTask(
_nomadClient,
_timeProvider,
_loggerFactory.CreateLogger<NomadHealthCheckTask>());
return await taskHandler.ExecuteAsync(task, ct);
}
/// <inheritdoc />
public void Dispose()
{
_nomadClient.Dispose();
}
}

View File

@@ -0,0 +1,94 @@
namespace StellaOps.Agent.Nomad;
/// <summary>
/// Base exception for Nomad agent operations.
/// </summary>
public class NomadAgentException : Exception
{
public NomadAgentException(string message) : base(message) { }
public NomadAgentException(string message, Exception innerException) : base(message, innerException) { }
}
/// <summary>
/// Thrown when an Nomad task payload is invalid or missing required fields.
/// </summary>
public class InvalidNomadPayloadException : NomadAgentException
{
public string TaskType { get; }
public InvalidNomadPayloadException(string taskType, string? details = null)
: base($"Invalid payload for Nomad task type '{taskType}'{(details is not null ? $": {details}" : "")}")
{
TaskType = taskType;
}
}
/// <summary>
/// Thrown when a Nomad API call fails.
/// </summary>
public class NomadApiException : NomadAgentException
{
public int? StatusCode { get; }
public NomadApiException(string message, int? statusCode = null)
: base(message)
{
StatusCode = statusCode;
}
public NomadApiException(string message, int statusCode, Exception innerException)
: base(message, innerException)
{
StatusCode = statusCode;
}
}
/// <summary>
/// Thrown when a Nomad job operation fails.
/// </summary>
public class NomadJobOperationException : NomadAgentException
{
public string JobId { get; }
public string Operation { get; }
public NomadJobOperationException(string operation, string jobId, string message)
: base($"Nomad {operation} failed for job '{jobId}': {message}")
{
Operation = operation;
JobId = jobId;
}
}
/// <summary>
/// Thrown when a Nomad deployment times out waiting for completion.
/// </summary>
public class NomadDeploymentTimeoutException : NomadAgentException
{
public string JobId { get; }
public string? DeploymentId { get; }
public TimeSpan Timeout { get; }
public NomadDeploymentTimeoutException(string jobId, string? deploymentId, TimeSpan timeout)
: base($"Nomad deployment timed out waiting for job '{jobId}' to complete after {timeout}")
{
JobId = jobId;
DeploymentId = deploymentId;
Timeout = timeout;
}
}
/// <summary>
/// Thrown when a Nomad evaluation fails.
/// </summary>
public class NomadEvaluationFailedException : NomadAgentException
{
public string EvalId { get; }
public string Status { get; }
public NomadEvaluationFailedException(string evalId, string status, string? description = null)
: base($"Nomad evaluation '{evalId}' failed with status '{status}'{(description is not null ? $": {description}" : "")}")
{
EvalId = evalId;
Status = status;
}
}

View File

@@ -0,0 +1,22 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<RootNamespace>StellaOps.Agent.Nomad</RootNamespace>
<Description>Stella Agent Nomad Capability - manages HashiCorp Nomad jobs and allocations</Description>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Http" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Agent.Core\StellaOps.Agent.Core.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,17 @@
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.Nomad.Tasks;
/// <summary>
/// Interface for Nomad task handlers.
/// </summary>
public interface INomadTask
{
/// <summary>
/// Executes the Nomad task.
/// </summary>
/// <param name="task">The task information.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The task result.</returns>
Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default);
}

View File

@@ -0,0 +1,279 @@
using System.Globalization;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.Nomad.Client;
namespace StellaOps.Agent.Nomad.Tasks;
/// <summary>
/// Task handler for deploying Nomad jobs.
/// </summary>
public sealed class NomadDeployJobTask : INomadTask
{
private readonly INomadClient _nomadClient;
private readonly TimeProvider _timeProvider;
private readonly ILogger<NomadDeployJobTask> _logger;
/// <summary>
/// Payload for deploying a Nomad job.
/// </summary>
public sealed record DeployJobPayload
{
/// <summary>
/// Job specification in HCL or JSON format.
/// Either JobSpec or Job must be provided.
/// </summary>
public string? JobSpec { get; init; }
/// <summary>
/// Job ID when using JobSpec.
/// </summary>
public string? JobId { get; init; }
/// <summary>
/// Pre-parsed job definition (alternative to JobSpec).
/// </summary>
public NomadJob? Job { get; init; }
/// <summary>
/// Nomad namespace.
/// </summary>
public string? Namespace { get; init; }
/// <summary>
/// Nomad region.
/// </summary>
public string? Region { get; init; }
/// <summary>
/// Whether to wait for deployment to complete.
/// </summary>
public bool WaitForDeployment { get; init; } = true;
/// <summary>
/// Timeout for deployment completion.
/// </summary>
public TimeSpan DeploymentTimeout { get; init; } = TimeSpan.FromMinutes(10);
/// <summary>
/// Whether to run in detached mode (fire and forget).
/// </summary>
public bool Detach { get; init; } = false;
}
/// <summary>
/// Creates a new Nomad deploy job task handler.
/// </summary>
public NomadDeployJobTask(
INomadClient nomadClient,
TimeProvider timeProvider,
ILogger<NomadDeployJobTask> logger)
{
_nomadClient = nomadClient;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
{
var payload = JsonSerializer.Deserialize<DeployJobPayload>(task.Payload)
?? throw new InvalidNomadPayloadException("nomad.deploy", "Failed to deserialize payload");
NomadJob nomadJob;
if (!string.IsNullOrEmpty(payload.JobSpec))
{
_logger.LogInformation("Parsing Nomad job spec");
nomadJob = await _nomadClient.ParseJobAsync(payload.JobSpec, ct: ct);
}
else if (payload.Job is not null)
{
nomadJob = payload.Job;
}
else
{
throw new InvalidNomadPayloadException("nomad.deploy", "Either JobSpec or Job must be provided");
}
var jobId = nomadJob.ID ?? payload.JobId ?? throw new InvalidNomadPayloadException("nomad.deploy", "Job ID is required");
_logger.LogInformation(
"Deploying Nomad job {JobId} to {Region}/{Namespace}",
jobId,
payload.Region ?? "default",
payload.Namespace ?? "default");
try
{
var registerResponse = await _nomadClient.RegisterJobAsync(
nomadJob,
payload.Namespace,
payload.Region,
ct);
_logger.LogInformation(
"Registered Nomad job {JobId}, evaluation ID: {EvalId}",
jobId,
registerResponse.EvalID);
if (payload.Detach)
{
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["jobId"] = jobId,
["evalId"] = registerResponse.EvalID ?? "",
["status"] = "DETACHED"
},
CompletedAt = _timeProvider.GetUtcNow()
};
}
if (!payload.WaitForDeployment)
{
// Wait for evaluation only
var evaluation = await WaitForEvaluationAsync(
registerResponse.EvalID ?? "",
payload.Namespace,
TimeSpan.FromMinutes(2),
ct);
var evalSuccess = evaluation?.Status == "complete";
return new AgentTaskResult
{
TaskId = task.Id,
Success = evalSuccess,
Error = evalSuccess ? null : $"Evaluation failed: {evaluation?.StatusDescription ?? evaluation?.Status}",
Outputs = new Dictionary<string, object>
{
["jobId"] = jobId,
["evalId"] = registerResponse.EvalID ?? "",
["evalStatus"] = evaluation?.Status ?? "unknown",
["status"] = evalSuccess ? "EVALUATED" : "EVAL_FAILED"
},
CompletedAt = _timeProvider.GetUtcNow()
};
}
// Wait for deployment to complete
var deployment = await WaitForDeploymentAsync(
jobId,
payload.Namespace,
payload.DeploymentTimeout,
ct);
var success = deployment?.Status == "successful";
return new AgentTaskResult
{
TaskId = task.Id,
Success = success,
Error = success ? null : $"Deployment failed: {deployment?.StatusDescription ?? deployment?.Status ?? "unknown"}",
Outputs = new Dictionary<string, object>
{
["jobId"] = jobId,
["evalId"] = registerResponse.EvalID ?? "",
["deploymentId"] = deployment?.ID ?? "",
["deploymentStatus"] = deployment?.Status ?? "unknown",
["status"] = success ? "DEPLOYED" : "DEPLOYMENT_FAILED"
},
CompletedAt = _timeProvider.GetUtcNow()
};
}
catch (NomadApiException ex)
{
_logger.LogError(ex, "Failed to deploy Nomad job {JobId}", jobId);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to deploy job: {ex.Message}",
CompletedAt = _timeProvider.GetUtcNow()
};
}
}
private async Task<NomadEvaluation?> WaitForEvaluationAsync(
string evalId,
string? ns,
TimeSpan timeout,
CancellationToken ct)
{
using var timeoutCts = new CancellationTokenSource(timeout);
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
try
{
while (!linkedCts.IsCancellationRequested)
{
var evaluation = await _nomadClient.GetEvaluationAsync(evalId, ns, ct: linkedCts.Token);
if (evaluation?.Status is "complete" or "failed" or "canceled")
{
return evaluation;
}
_logger.LogDebug("Evaluation {EvalId} status: {Status}", evalId, evaluation?.Status ?? "unknown");
await Task.Delay(TimeSpan.FromSeconds(2), linkedCts.Token);
}
}
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested)
{
_logger.LogWarning("Evaluation {EvalId} wait timed out after {Timeout}", evalId, timeout);
}
return null;
}
private async Task<NomadDeployment?> WaitForDeploymentAsync(
string jobId,
string? ns,
TimeSpan timeout,
CancellationToken ct)
{
using var timeoutCts = new CancellationTokenSource(timeout);
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
NomadDeployment? deployment = null;
try
{
while (!linkedCts.IsCancellationRequested)
{
var deployments = await _nomadClient.GetJobDeploymentsAsync(jobId, ns, ct: linkedCts.Token);
deployment = deployments.FirstOrDefault();
if (deployment is null)
{
await Task.Delay(TimeSpan.FromSeconds(2), linkedCts.Token);
continue;
}
if (deployment.Status is "successful" or "failed" or "cancelled")
{
return deployment;
}
_logger.LogDebug(
"Deployment {DeploymentId} for job {JobId} status: {Status}",
deployment.ID,
jobId,
deployment.Status);
await Task.Delay(TimeSpan.FromSeconds(5), linkedCts.Token);
}
}
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested)
{
_logger.LogWarning("Deployment wait for job {JobId} timed out after {Timeout}", jobId, timeout);
}
return deployment;
}
}

View File

@@ -0,0 +1,202 @@
using System.Globalization;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.Nomad.Client;
namespace StellaOps.Agent.Nomad.Tasks;
/// <summary>
/// Task handler for dispatching parameterized Nomad jobs.
/// </summary>
public sealed class NomadDispatchJobTask : INomadTask
{
private readonly INomadClient _nomadClient;
private readonly TimeProvider _timeProvider;
private readonly ILogger<NomadDispatchJobTask> _logger;
/// <summary>
/// Payload for dispatching a parameterized Nomad job.
/// </summary>
public sealed record DispatchJobPayload
{
/// <summary>
/// Job ID to dispatch (must be a parameterized batch job).
/// </summary>
public required string JobId { get; init; }
/// <summary>
/// Nomad namespace.
/// </summary>
public string? Namespace { get; init; }
/// <summary>
/// Nomad region.
/// </summary>
public string? Region { get; init; }
/// <summary>
/// Metadata to pass to the dispatched job.
/// </summary>
public IReadOnlyDictionary<string, string>? Meta { get; init; }
/// <summary>
/// Payload data to pass to the dispatched job.
/// </summary>
public string? Payload { get; init; }
/// <summary>
/// Whether to wait for the dispatched job to complete.
/// </summary>
public bool WaitForCompletion { get; init; } = false;
/// <summary>
/// Timeout for job completion.
/// </summary>
public TimeSpan CompletionTimeout { get; init; } = TimeSpan.FromMinutes(30);
}
/// <summary>
/// Creates a new Nomad dispatch job task handler.
/// </summary>
public NomadDispatchJobTask(
INomadClient nomadClient,
TimeProvider timeProvider,
ILogger<NomadDispatchJobTask> logger)
{
_nomadClient = nomadClient;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
{
var payload = JsonSerializer.Deserialize<DispatchJobPayload>(task.Payload)
?? throw new InvalidNomadPayloadException("nomad.dispatch", "Failed to deserialize payload");
_logger.LogInformation(
"Dispatching Nomad parameterized job {JobId}",
payload.JobId);
try
{
var response = await _nomadClient.DispatchJobAsync(
payload.JobId,
payload.Meta,
payload.Payload,
payload.Namespace,
payload.Region,
ct);
_logger.LogInformation(
"Dispatched Nomad job {JobId}, dispatched job ID: {DispatchedJobId}, evaluation ID: {EvalId}",
payload.JobId,
response.DispatchedJobID,
response.EvalID);
if (!payload.WaitForCompletion)
{
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["jobId"] = payload.JobId,
["dispatchedJobId"] = response.DispatchedJobID ?? "",
["evalId"] = response.EvalID ?? "",
["status"] = "DISPATCHED"
},
CompletedAt = _timeProvider.GetUtcNow()
};
}
// Wait for the dispatched job to complete
var dispatchedJobId = response.DispatchedJobID
?? throw new NomadApiException("Dispatch response missing dispatched job ID");
var completed = await WaitForJobCompletionAsync(
dispatchedJobId,
payload.Namespace,
payload.CompletionTimeout,
ct);
return new AgentTaskResult
{
TaskId = task.Id,
Success = completed,
Error = completed ? null : $"Dispatched job {dispatchedJobId} did not complete within timeout",
Outputs = new Dictionary<string, object>
{
["jobId"] = payload.JobId,
["dispatchedJobId"] = dispatchedJobId,
["evalId"] = response.EvalID ?? "",
["status"] = completed ? "COMPLETED" : "TIMED_OUT"
},
CompletedAt = _timeProvider.GetUtcNow()
};
}
catch (NomadApiException ex)
{
_logger.LogError(ex, "Failed to dispatch Nomad job {JobId}", payload.JobId);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to dispatch job: {ex.Message}",
CompletedAt = _timeProvider.GetUtcNow()
};
}
}
private async Task<bool> WaitForJobCompletionAsync(
string jobId,
string? ns,
TimeSpan timeout,
CancellationToken ct)
{
using var timeoutCts = new CancellationTokenSource(timeout);
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
try
{
while (!linkedCts.IsCancellationRequested)
{
var job = await _nomadClient.GetJobAsync(jobId, ns, ct: linkedCts.Token);
if (job is null)
{
_logger.LogWarning("Dispatched job {JobId} not found", jobId);
return false;
}
if (job.Status == "dead")
{
// Check allocations to determine success
var allocations = await _nomadClient.GetJobAllocationsAsync(jobId, ns, ct: linkedCts.Token);
var allSucceeded = allocations.All(a =>
a.ClientStatus == "complete" &&
a.TaskStates?.Values.All(ts => !ts.Failed ?? true) == true);
_logger.LogInformation(
"Dispatched job {JobId} completed, success: {Success}",
jobId,
allSucceeded);
return allSucceeded;
}
_logger.LogDebug("Dispatched job {JobId} status: {Status}", jobId, job.Status);
await Task.Delay(TimeSpan.FromSeconds(5), linkedCts.Token);
}
}
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested)
{
_logger.LogWarning("Dispatched job {JobId} completion wait timed out after {Timeout}", jobId, timeout);
}
return false;
}
}

View File

@@ -0,0 +1,220 @@
using System.Globalization;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.Nomad.Client;
namespace StellaOps.Agent.Nomad.Tasks;
/// <summary>
/// Task handler for checking Nomad job health.
/// </summary>
public sealed class NomadHealthCheckTask : INomadTask
{
private readonly INomadClient _nomadClient;
private readonly TimeProvider _timeProvider;
private readonly ILogger<NomadHealthCheckTask> _logger;
/// <summary>
/// Payload for checking Nomad job health.
/// </summary>
public sealed record HealthCheckPayload
{
/// <summary>
/// Job ID to check health for.
/// </summary>
public required string JobId { get; init; }
/// <summary>
/// Nomad namespace.
/// </summary>
public string? Namespace { get; init; }
/// <summary>
/// Nomad region.
/// </summary>
public string? Region { get; init; }
/// <summary>
/// Minimum number of healthy allocations required.
/// </summary>
public int MinHealthyAllocations { get; init; } = 1;
/// <summary>
/// Whether to wait for health requirements to be met.
/// </summary>
public bool WaitForHealthy { get; init; } = true;
/// <summary>
/// Timeout for health check.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(5);
}
/// <summary>
/// Creates a new Nomad health check task handler.
/// </summary>
public NomadHealthCheckTask(
INomadClient nomadClient,
TimeProvider timeProvider,
ILogger<NomadHealthCheckTask> logger)
{
_nomadClient = nomadClient;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
{
var payload = JsonSerializer.Deserialize<HealthCheckPayload>(task.Payload)
?? throw new InvalidNomadPayloadException("nomad.health", "Failed to deserialize payload");
_logger.LogInformation("Checking health of Nomad job {JobId}", payload.JobId);
try
{
if (!payload.WaitForHealthy)
{
// Just check current state
return await CheckCurrentHealthAsync(task.Id, payload, ct);
}
// Wait for health requirements to be met
using var timeoutCts = new CancellationTokenSource(payload.Timeout);
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
try
{
while (!linkedCts.IsCancellationRequested)
{
var (isHealthy, healthyCount, totalAllocations, runningAllocations) =
await GetHealthStatusAsync(payload, linkedCts.Token);
if (isHealthy)
{
_logger.LogInformation(
"Nomad job {JobId} is healthy: {Healthy}/{MinRequired} healthy allocations",
payload.JobId,
healthyCount,
payload.MinHealthyAllocations);
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["jobId"] = payload.JobId,
["healthyAllocations"] = healthyCount,
["runningAllocations"] = runningAllocations,
["totalAllocations"] = totalAllocations,
["minRequired"] = payload.MinHealthyAllocations,
["status"] = "HEALTHY"
},
CompletedAt = _timeProvider.GetUtcNow()
};
}
_logger.LogDebug(
"Nomad job {JobId} health check: {Healthy}/{MinRequired} healthy, waiting...",
payload.JobId,
healthyCount,
payload.MinHealthyAllocations);
await Task.Delay(TimeSpan.FromSeconds(5), linkedCts.Token);
}
}
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested)
{
_logger.LogWarning(
"Nomad job {JobId} health check timed out after {Timeout}",
payload.JobId,
payload.Timeout);
}
// Timeout - return current state
var (_, finalHealthy, finalTotal, finalRunning) =
await GetHealthStatusAsync(payload, ct);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Health check timed out after {payload.Timeout}",
Outputs = new Dictionary<string, object>
{
["jobId"] = payload.JobId,
["healthyAllocations"] = finalHealthy,
["runningAllocations"] = finalRunning,
["totalAllocations"] = finalTotal,
["minRequired"] = payload.MinHealthyAllocations,
["status"] = "TIMED_OUT"
},
CompletedAt = _timeProvider.GetUtcNow()
};
}
catch (NomadApiException ex)
{
_logger.LogError(ex, "Failed to check health of Nomad job {JobId}", payload.JobId);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Health check failed: {ex.Message}",
CompletedAt = _timeProvider.GetUtcNow()
};
}
}
private async Task<AgentTaskResult> CheckCurrentHealthAsync(
Guid taskId,
HealthCheckPayload payload,
CancellationToken ct)
{
var (isHealthy, healthyCount, totalAllocations, runningAllocations) =
await GetHealthStatusAsync(payload, ct);
return new AgentTaskResult
{
TaskId = taskId,
Success = isHealthy,
Error = isHealthy ? null : $"Only {healthyCount} healthy allocations, requires {payload.MinHealthyAllocations}",
Outputs = new Dictionary<string, object>
{
["jobId"] = payload.JobId,
["healthyAllocations"] = healthyCount,
["runningAllocations"] = runningAllocations,
["totalAllocations"] = totalAllocations,
["minRequired"] = payload.MinHealthyAllocations,
["status"] = isHealthy ? "HEALTHY" : "UNHEALTHY"
},
CompletedAt = _timeProvider.GetUtcNow()
};
}
private async Task<(bool IsHealthy, int HealthyCount, int TotalAllocations, int RunningAllocations)>
GetHealthStatusAsync(HealthCheckPayload payload, CancellationToken ct)
{
var allocations = await _nomadClient.GetJobAllocationsAsync(
payload.JobId,
payload.Namespace,
payload.Region,
ct);
var runningAllocations = allocations
.Where(a => a.ClientStatus == "running")
.ToList();
var healthyCount = runningAllocations
.Count(a => a.DeploymentStatus?.Healthy == true);
return (
healthyCount >= payload.MinHealthyAllocations,
healthyCount,
allocations.Count,
runningAllocations.Count
);
}
}

View File

@@ -0,0 +1,186 @@
using System.Globalization;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.Nomad.Client;
namespace StellaOps.Agent.Nomad.Tasks;
/// <summary>
/// Task handler for getting Nomad job status.
/// </summary>
public sealed class NomadJobStatusTask : INomadTask
{
private readonly INomadClient _nomadClient;
private readonly TimeProvider _timeProvider;
private readonly ILogger<NomadJobStatusTask> _logger;
/// <summary>
/// Payload for getting Nomad job status.
/// </summary>
public sealed record JobStatusPayload
{
/// <summary>
/// Job ID to get status for.
/// </summary>
public required string JobId { get; init; }
/// <summary>
/// Nomad namespace.
/// </summary>
public string? Namespace { get; init; }
/// <summary>
/// Nomad region.
/// </summary>
public string? Region { get; init; }
/// <summary>
/// Whether to include allocation details.
/// </summary>
public bool IncludeAllocations { get; init; } = false;
/// <summary>
/// Whether to include deployment details.
/// </summary>
public bool IncludeDeployments { get; init; } = false;
}
/// <summary>
/// Creates a new Nomad job status task handler.
/// </summary>
public NomadJobStatusTask(
INomadClient nomadClient,
TimeProvider timeProvider,
ILogger<NomadJobStatusTask> logger)
{
_nomadClient = nomadClient;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
{
var payload = JsonSerializer.Deserialize<JobStatusPayload>(task.Payload)
?? throw new InvalidNomadPayloadException("nomad.status", "Failed to deserialize payload");
_logger.LogInformation("Getting status for Nomad job {JobId}", payload.JobId);
try
{
var job = await _nomadClient.GetJobAsync(
payload.JobId,
payload.Namespace,
payload.Region,
ct);
if (job is null)
{
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Job '{payload.JobId}' not found",
CompletedAt = _timeProvider.GetUtcNow()
};
}
var outputs = new Dictionary<string, object>
{
["jobId"] = job.ID ?? payload.JobId,
["name"] = job.Name ?? "",
["type"] = job.Type ?? "service",
["status"] = job.Status ?? "unknown",
["statusDescription"] = job.StatusDescription ?? "",
["namespace"] = job.Namespace ?? "default",
["region"] = job.Region ?? "",
["priority"] = job.Priority ?? 50,
["version"] = job.Version ?? 0,
["stable"] = job.Stable ?? false,
["datacenters"] = job.Datacenters ?? new List<string>()
};
// Get task group info
if (job.TaskGroups is not null)
{
var taskGroups = job.TaskGroups.Select(tg => new Dictionary<string, object>
{
["name"] = tg.Name ?? "",
["count"] = tg.Count ?? 1,
["taskCount"] = tg.Tasks?.Count ?? 0
}).ToList();
outputs["taskGroups"] = taskGroups;
}
// Include allocations if requested
if (payload.IncludeAllocations)
{
var allocations = await _nomadClient.GetJobAllocationsAsync(
payload.JobId,
payload.Namespace,
payload.Region,
ct);
var allocSummary = allocations.GroupBy(a => a.ClientStatus ?? "unknown")
.ToDictionary(g => g.Key, g => g.Count());
outputs["allocations"] = new Dictionary<string, object>
{
["total"] = allocations.Count,
["byStatus"] = allocSummary,
["running"] = allocations.Count(a => a.ClientStatus == "running"),
["complete"] = allocations.Count(a => a.ClientStatus == "complete"),
["failed"] = allocations.Count(a => a.ClientStatus == "failed")
};
}
// Include deployments if requested
if (payload.IncludeDeployments)
{
var deployments = await _nomadClient.GetJobDeploymentsAsync(
payload.JobId,
payload.Namespace,
payload.Region,
ct);
var latestDeployment = deployments.FirstOrDefault();
if (latestDeployment is not null)
{
outputs["latestDeployment"] = new Dictionary<string, object>
{
["id"] = latestDeployment.ID ?? "",
["status"] = latestDeployment.Status ?? "unknown",
["statusDescription"] = latestDeployment.StatusDescription ?? ""
};
}
}
_logger.LogInformation(
"Nomad job {JobId} status: {Status}",
payload.JobId,
job.Status);
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = outputs,
CompletedAt = _timeProvider.GetUtcNow()
};
}
catch (NomadApiException ex)
{
_logger.LogError(ex, "Failed to get status for Nomad job {JobId}", payload.JobId);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to get job status: {ex.Message}",
CompletedAt = _timeProvider.GetUtcNow()
};
}
}
}

View File

@@ -0,0 +1,128 @@
using System.Globalization;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.Nomad.Client;
namespace StellaOps.Agent.Nomad.Tasks;
/// <summary>
/// Task handler for scaling Nomad job task groups.
/// </summary>
public sealed class NomadScaleJobTask : INomadTask
{
private readonly INomadClient _nomadClient;
private readonly TimeProvider _timeProvider;
private readonly ILogger<NomadScaleJobTask> _logger;
/// <summary>
/// Payload for scaling a Nomad job task group.
/// </summary>
public sealed record ScaleJobPayload
{
/// <summary>
/// Job ID to scale.
/// </summary>
public required string JobId { get; init; }
/// <summary>
/// Task group name to scale.
/// </summary>
public required string TaskGroup { get; init; }
/// <summary>
/// Desired count.
/// </summary>
public required int Count { get; init; }
/// <summary>
/// Nomad namespace.
/// </summary>
public string? Namespace { get; init; }
/// <summary>
/// Nomad region.
/// </summary>
public string? Region { get; init; }
/// <summary>
/// Reason for scaling.
/// </summary>
public string? Reason { get; init; }
}
/// <summary>
/// Creates a new Nomad scale job task handler.
/// </summary>
public NomadScaleJobTask(
INomadClient nomadClient,
TimeProvider timeProvider,
ILogger<NomadScaleJobTask> logger)
{
_nomadClient = nomadClient;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
{
var payload = JsonSerializer.Deserialize<ScaleJobPayload>(task.Payload)
?? throw new InvalidNomadPayloadException("nomad.scale", "Failed to deserialize payload");
_logger.LogInformation(
"Scaling Nomad job {JobId} task group {TaskGroup} to {Count}",
payload.JobId,
payload.TaskGroup,
payload.Count);
try
{
var response = await _nomadClient.ScaleJobAsync(
payload.JobId,
payload.TaskGroup,
payload.Count,
payload.Reason ?? $"Scaled by Stella Ops (task: {task.Id})",
payload.Namespace,
payload.Region,
ct);
_logger.LogInformation(
"Scaled Nomad job {JobId} task group {TaskGroup} to {Count}, evaluation ID: {EvalId}",
payload.JobId,
payload.TaskGroup,
payload.Count,
response.EvalID);
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["jobId"] = payload.JobId,
["taskGroup"] = payload.TaskGroup,
["count"] = payload.Count,
["evalId"] = response.EvalID ?? ""
},
CompletedAt = _timeProvider.GetUtcNow()
};
}
catch (NomadApiException ex)
{
_logger.LogError(
ex,
"Failed to scale Nomad job {JobId} task group {TaskGroup}",
payload.JobId,
payload.TaskGroup);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to scale job: {ex.Message}",
CompletedAt = _timeProvider.GetUtcNow()
};
}
}
}

View File

@@ -0,0 +1,115 @@
using System.Globalization;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.Nomad.Client;
namespace StellaOps.Agent.Nomad.Tasks;
/// <summary>
/// Task handler for stopping Nomad jobs.
/// </summary>
public sealed class NomadStopJobTask : INomadTask
{
private readonly INomadClient _nomadClient;
private readonly TimeProvider _timeProvider;
private readonly ILogger<NomadStopJobTask> _logger;
/// <summary>
/// Payload for stopping a Nomad job.
/// </summary>
public sealed record StopJobPayload
{
/// <summary>
/// Job ID to stop.
/// </summary>
public required string JobId { get; init; }
/// <summary>
/// Nomad namespace.
/// </summary>
public string? Namespace { get; init; }
/// <summary>
/// Nomad region.
/// </summary>
public string? Region { get; init; }
/// <summary>
/// Whether to purge the job (completely remove it).
/// </summary>
public bool Purge { get; init; } = false;
/// <summary>
/// Whether to stop the job globally (all regions).
/// </summary>
public bool Global { get; init; } = false;
}
/// <summary>
/// Creates a new Nomad stop job task handler.
/// </summary>
public NomadStopJobTask(
INomadClient nomadClient,
TimeProvider timeProvider,
ILogger<NomadStopJobTask> logger)
{
_nomadClient = nomadClient;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
{
var payload = JsonSerializer.Deserialize<StopJobPayload>(task.Payload)
?? throw new InvalidNomadPayloadException("nomad.stop", "Failed to deserialize payload");
_logger.LogInformation(
"Stopping Nomad job {JobId} (purge: {Purge}, global: {Global})",
payload.JobId,
payload.Purge,
payload.Global);
try
{
var response = await _nomadClient.StopJobAsync(
payload.JobId,
payload.Purge,
payload.Global,
payload.Namespace,
payload.Region,
ct);
_logger.LogInformation(
"Stopped Nomad job {JobId}, evaluation ID: {EvalId}",
payload.JobId,
response.EvalID);
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["jobId"] = payload.JobId,
["evalId"] = response.EvalID ?? "",
["purged"] = payload.Purge
},
CompletedAt = _timeProvider.GetUtcNow()
};
}
catch (NomadApiException ex)
{
_logger.LogError(ex, "Failed to stop Nomad job {JobId}", payload.JobId);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Failed to stop job: {ex.Message}",
CompletedAt = _timeProvider.GetUtcNow()
};
}
}
}

View File

@@ -0,0 +1,101 @@
using StellaOps.Agent.Core.Exceptions;
namespace StellaOps.Agent.Ssh.Exceptions;
/// <summary>
/// Thrown when an SSH task payload is invalid.
/// </summary>
public sealed class InvalidSshPayloadException : AgentException
{
/// <summary>
/// The task type with invalid payload.
/// </summary>
public string TaskType { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public InvalidSshPayloadException(string taskType)
: base($"Invalid payload for task type '{taskType}'")
{
TaskType = taskType;
}
}
/// <summary>
/// Thrown when SSH connection fails.
/// </summary>
public sealed class SshConnectionException : AgentException
{
/// <summary>
/// The target host.
/// </summary>
public string Host { get; }
/// <summary>
/// The SSH port.
/// </summary>
public int Port { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public SshConnectionException(string host, int port, string message)
: base($"Failed to connect to {host}:{port}: {message}")
{
Host = host;
Port = port;
}
}
/// <summary>
/// Thrown when SSH command execution fails.
/// </summary>
public sealed class SshCommandException : AgentException
{
/// <summary>
/// The command that failed.
/// </summary>
public string Command { get; }
/// <summary>
/// The exit code.
/// </summary>
public int ExitCode { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public SshCommandException(string command, int exitCode, string error)
: base($"SSH command failed with exit code {exitCode}: {error}")
{
Command = command;
ExitCode = exitCode;
}
}
/// <summary>
/// Thrown when file transfer fails.
/// </summary>
public sealed class SshFileTransferException : AgentException
{
/// <summary>
/// The local path.
/// </summary>
public string LocalPath { get; }
/// <summary>
/// The remote path.
/// </summary>
public string RemotePath { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public SshFileTransferException(string localPath, string remotePath, string message)
: base($"File transfer failed between '{localPath}' and '{remotePath}': {message}")
{
LocalPath = localPath;
RemotePath = remotePath;
}
}

View File

@@ -0,0 +1,82 @@
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Capability;
using StellaOps.Agent.Core.Exceptions;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.Ssh.Tasks;
namespace StellaOps.Agent.Ssh;
/// <summary>
/// SSH capability for remote command execution and file transfer.
/// </summary>
public sealed class SshCapability : IAgentCapability
{
private readonly SshConnectionPool _connectionPool;
private readonly TimeProvider _timeProvider;
private readonly ILogger<SshCapability> _logger;
private readonly Dictionary<string, ISshTask> _taskHandlers;
/// <inheritdoc />
public string Name => "ssh";
/// <inheritdoc />
public string Version => "1.0.0";
/// <inheritdoc />
public IReadOnlyList<string> SupportedTaskTypes => new[]
{
"ssh.execute",
"ssh.upload",
"ssh.download",
"ssh.tunnel"
};
/// <summary>
/// Creates a new instance.
/// </summary>
public SshCapability(
SshConnectionPool connectionPool,
TimeProvider timeProvider,
ILogger<SshCapability> logger)
{
_connectionPool = connectionPool;
_timeProvider = timeProvider;
_logger = logger;
_taskHandlers = new Dictionary<string, ISshTask>(StringComparer.OrdinalIgnoreCase)
{
["ssh.execute"] = new SshExecuteTask(_connectionPool, logger),
["ssh.upload"] = new SshUploadTask(_connectionPool, logger),
["ssh.download"] = new SshDownloadTask(_connectionPool, logger),
["ssh.tunnel"] = new SshTunnelTask(_connectionPool, logger)
};
}
/// <inheritdoc />
public Task<bool> InitializeAsync(CancellationToken ct = default)
{
// SSH capability is always available if SSH.NET is loaded
_logger.LogInformation("SSH capability initialized");
return Task.FromResult(true);
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
{
if (!_taskHandlers.TryGetValue(task.TaskType, out var handler))
{
throw new UnsupportedTaskTypeException(task.TaskType);
}
_logger.LogDebug("Executing task {TaskType} with ID {TaskId}", task.TaskType, task.Id);
return await handler.ExecuteAsync(task, _timeProvider, ct);
}
/// <inheritdoc />
public Task<CapabilityHealthStatus> CheckHealthAsync(CancellationToken ct = default)
{
// SSH capability is always available (no daemon to check)
return Task.FromResult(new CapabilityHealthStatus(true, "SSH capability available"));
}
}

View File

@@ -0,0 +1,253 @@
using System.Collections.Concurrent;
using System.Text;
using Microsoft.Extensions.Logging;
using Renci.SshNet;
namespace StellaOps.Agent.Ssh;
/// <summary>
/// Connection pool for SSH connections.
/// </summary>
public sealed class SshConnectionPool : IAsyncDisposable
{
private readonly ConcurrentDictionary<string, PooledConnection> _connections = new();
private readonly TimeSpan _connectionTimeout;
private readonly TimeProvider _timeProvider;
private readonly ILogger<SshConnectionPool> _logger;
private readonly Timer _cleanupTimer;
private bool _disposed;
/// <summary>
/// Creates a new instance with default 10-minute timeout.
/// </summary>
public SshConnectionPool(TimeProvider timeProvider, ILogger<SshConnectionPool> logger)
: this(TimeSpan.FromMinutes(10), timeProvider, logger)
{
}
/// <summary>
/// Creates a new instance with custom timeout.
/// </summary>
public SshConnectionPool(TimeSpan connectionTimeout, TimeProvider timeProvider, ILogger<SshConnectionPool> logger)
{
_connectionTimeout = connectionTimeout;
_timeProvider = timeProvider;
_logger = logger;
_cleanupTimer = new Timer(CleanupExpiredConnections, null, TimeSpan.FromMinutes(1), TimeSpan.FromMinutes(1));
}
/// <summary>
/// Gets a connection from the pool or creates a new one.
/// </summary>
public async Task<SshClient> GetConnectionAsync(
SshConnectionInfo connectionInfo,
CancellationToken ct = default)
{
ObjectDisposedException.ThrowIf(_disposed, this);
var key = connectionInfo.GetConnectionKey();
if (_connections.TryGetValue(key, out var pooled) && pooled.Client.IsConnected)
{
pooled.LastUsed = _timeProvider.GetUtcNow();
return pooled.Client;
}
var client = await CreateConnectionAsync(connectionInfo, ct);
_connections[key] = new PooledConnection(client, _timeProvider.GetUtcNow());
return client;
}
/// <summary>
/// Gets an SFTP client for file operations.
/// </summary>
public async Task<SftpClient> GetSftpClientAsync(
SshConnectionInfo connectionInfo,
CancellationToken ct = default)
{
ObjectDisposedException.ThrowIf(_disposed, this);
var connInfo = BuildConnectionInfo(connectionInfo);
var sftp = new SftpClient(connInfo);
await Task.Run(() => sftp.Connect(), ct);
_logger.LogDebug(
"SFTP connection established to {User}@{Host}:{Port}",
connectionInfo.Username,
connectionInfo.Host,
connectionInfo.Port);
return sftp;
}
private async Task<SshClient> CreateConnectionAsync(
SshConnectionInfo info,
CancellationToken ct)
{
var connInfo = BuildConnectionInfo(info);
var client = new SshClient(connInfo);
await Task.Run(() => client.Connect(), ct);
_logger.LogDebug(
"SSH connection established to {User}@{Host}:{Port}",
info.Username,
info.Host,
info.Port);
return client;
}
private static ConnectionInfo BuildConnectionInfo(SshConnectionInfo info)
{
var authMethods = new List<AuthenticationMethod>();
// Private key authentication
if (!string.IsNullOrEmpty(info.PrivateKey))
{
var keyBytes = Encoding.UTF8.GetBytes(info.PrivateKey);
var keyFile = string.IsNullOrEmpty(info.PrivateKeyPassphrase)
? new PrivateKeyFile(new MemoryStream(keyBytes))
: new PrivateKeyFile(new MemoryStream(keyBytes), info.PrivateKeyPassphrase);
authMethods.Add(new PrivateKeyAuthenticationMethod(info.Username, keyFile));
}
// Password authentication
if (!string.IsNullOrEmpty(info.Password))
{
authMethods.Add(new PasswordAuthenticationMethod(info.Username, info.Password));
}
if (authMethods.Count == 0)
{
throw new InvalidOperationException("No SSH authentication method configured");
}
return new ConnectionInfo(
info.Host,
info.Port,
info.Username,
authMethods.ToArray());
}
/// <summary>
/// Releases a connection back to the pool.
/// </summary>
public void ReleaseConnection(string connectionKey)
{
if (_connections.TryGetValue(connectionKey, out var pooled))
{
pooled.LastUsed = _timeProvider.GetUtcNow();
}
}
private void CleanupExpiredConnections(object? state)
{
if (_disposed)
return;
var now = _timeProvider.GetUtcNow();
var expired = _connections
.Where(kv => now - kv.Value.LastUsed > _connectionTimeout)
.ToList();
foreach (var (key, pooled) in expired)
{
if (_connections.TryRemove(key, out _))
{
try
{
pooled.Client.Disconnect();
pooled.Client.Dispose();
_logger.LogDebug("Closed expired SSH connection: {Key}", key);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Error closing SSH connection: {Key}", key);
}
}
}
}
/// <inheritdoc />
public async ValueTask DisposeAsync()
{
if (_disposed)
return;
_disposed = true;
await _cleanupTimer.DisposeAsync();
foreach (var (_, pooled) in _connections)
{
try
{
pooled.Client.Disconnect();
pooled.Client.Dispose();
}
catch
{
// Ignore errors during disposal
}
}
_connections.Clear();
}
private sealed class PooledConnection
{
public SshClient Client { get; }
public DateTimeOffset LastUsed { get; set; }
public PooledConnection(SshClient client, DateTimeOffset lastUsed)
{
Client = client;
LastUsed = lastUsed;
}
}
}
/// <summary>
/// SSH connection information.
/// </summary>
public sealed record SshConnectionInfo
{
/// <summary>
/// Target host.
/// </summary>
public required string Host { get; init; }
/// <summary>
/// SSH port.
/// </summary>
public int Port { get; init; } = 22;
/// <summary>
/// Username.
/// </summary>
public required string Username { get; init; }
/// <summary>
/// Password for password authentication.
/// </summary>
public string? Password { get; init; }
/// <summary>
/// Private key content for key authentication.
/// </summary>
public string? PrivateKey { get; init; }
/// <summary>
/// Passphrase for encrypted private key.
/// </summary>
public string? PrivateKeyPassphrase { get; init; }
/// <summary>
/// Gets a unique key for connection pooling.
/// </summary>
public string GetConnectionKey() => $"{Username}@{Host}:{Port}";
}

View File

@@ -0,0 +1,22 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<RootNamespace>StellaOps.Agent.Ssh</RootNamespace>
<AssemblyName>StellaOps.Agent.Ssh</AssemblyName>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="SSH.NET" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Agent.Core\StellaOps.Agent.Core.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,14 @@
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.Ssh.Tasks;
/// <summary>
/// Interface for SSH task handlers.
/// </summary>
public interface ISshTask
{
/// <summary>
/// Executes the SSH task.
/// </summary>
Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default);
}

View File

@@ -0,0 +1,156 @@
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Renci.SshNet;
using Renci.SshNet.Common;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.Ssh.Exceptions;
namespace StellaOps.Agent.Ssh.Tasks;
/// <summary>
/// Task handler for downloading files via SFTP.
/// </summary>
public sealed class SshDownloadTask : ISshTask
{
private readonly SshConnectionPool _connectionPool;
private readonly ILogger _logger;
/// <summary>
/// Payload for ssh.download task.
/// </summary>
public sealed record DownloadPayload
{
/// <summary>
/// Target host.
/// </summary>
public required string Host { get; init; }
/// <summary>
/// SSH port.
/// </summary>
public int Port { get; init; } = 22;
/// <summary>
/// Username.
/// </summary>
public required string Username { get; init; }
/// <summary>
/// Remote file path.
/// </summary>
public required string RemotePath { get; init; }
/// <summary>
/// Local destination path.
/// </summary>
public required string LocalPath { get; init; }
/// <summary>
/// Create parent directories if needed.
/// </summary>
public bool CreateDirectory { get; init; } = true;
}
/// <summary>
/// Creates a new instance.
/// </summary>
public SshDownloadTask(SshConnectionPool connectionPool, ILogger logger)
{
_connectionPool = connectionPool;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
var payload = JsonSerializer.Deserialize<DownloadPayload>(task.Payload)
?? throw new InvalidSshPayloadException("ssh.download");
var connectionInfo = new SshConnectionInfo
{
Host = payload.Host,
Port = payload.Port,
Username = payload.Username,
Password = task.Credentials.GetValueOrDefault("ssh.password"),
PrivateKey = task.Credentials.GetValueOrDefault("ssh.privateKey"),
PrivateKeyPassphrase = task.Credentials.GetValueOrDefault("ssh.passphrase")
};
_logger.LogInformation(
"Downloading {User}@{Host}:{Remote} to {Local}",
payload.Username,
payload.Host,
payload.RemotePath,
payload.LocalPath);
try
{
using var sftp = await _connectionPool.GetSftpClientAsync(connectionInfo, ct);
// Create local directory if needed
if (payload.CreateDirectory)
{
var localDir = Path.GetDirectoryName(payload.LocalPath);
if (!string.IsNullOrEmpty(localDir))
{
Directory.CreateDirectory(localDir);
}
}
// Get remote file attributes
var remoteAttributes = sftp.GetAttributes(payload.RemotePath);
// Download file
await using var localFile = File.Create(payload.LocalPath);
await Task.Run(() => sftp.DownloadFile(payload.RemotePath, localFile), ct);
sftp.Disconnect();
_logger.LogInformation(
"Downloaded {Size} bytes to {Local}",
remoteAttributes.Size,
payload.LocalPath);
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["localPath"] = payload.LocalPath,
["size"] = remoteAttributes.Size
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (SftpPathNotFoundException)
{
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Remote file not found: {payload.RemotePath}",
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to download file from {Host}", payload.Host);
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = ex.Message,
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
}
}

View File

@@ -0,0 +1,199 @@
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Renci.SshNet;
using Renci.SshNet.Common;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.Ssh.Exceptions;
namespace StellaOps.Agent.Ssh.Tasks;
/// <summary>
/// Task handler for executing remote commands via SSH.
/// </summary>
public sealed class SshExecuteTask : ISshTask
{
private readonly SshConnectionPool _connectionPool;
private readonly ILogger _logger;
/// <summary>
/// Payload for ssh.execute task.
/// </summary>
public sealed record ExecutePayload
{
/// <summary>
/// Target host.
/// </summary>
public required string Host { get; init; }
/// <summary>
/// SSH port.
/// </summary>
public int Port { get; init; } = 22;
/// <summary>
/// Username.
/// </summary>
public required string Username { get; init; }
/// <summary>
/// Command to execute.
/// </summary>
public required string Command { get; init; }
/// <summary>
/// Environment variables.
/// </summary>
public IReadOnlyDictionary<string, string>? Environment { get; init; }
/// <summary>
/// Working directory.
/// </summary>
public string? WorkingDirectory { get; init; }
/// <summary>
/// Command timeout.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(10);
/// <summary>
/// Combine stdout and stderr in output.
/// </summary>
public bool CombineOutput { get; init; } = true;
}
/// <summary>
/// Creates a new instance.
/// </summary>
public SshExecuteTask(SshConnectionPool connectionPool, ILogger logger)
{
_connectionPool = connectionPool;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
var payload = JsonSerializer.Deserialize<ExecutePayload>(task.Payload)
?? throw new InvalidSshPayloadException("ssh.execute");
var connectionInfo = new SshConnectionInfo
{
Host = payload.Host,
Port = payload.Port,
Username = payload.Username,
Password = task.Credentials.GetValueOrDefault("ssh.password"),
PrivateKey = task.Credentials.GetValueOrDefault("ssh.privateKey"),
PrivateKeyPassphrase = task.Credentials.GetValueOrDefault("ssh.passphrase")
};
_logger.LogInformation(
"Executing SSH command on {User}@{Host}",
payload.Username,
payload.Host);
try
{
var client = await _connectionPool.GetConnectionAsync(connectionInfo, ct);
// Build command with environment and working directory
var fullCommand = BuildCommand(payload);
using var command = client.CreateCommand(fullCommand);
command.CommandTimeout = payload.Timeout;
var asyncResult = command.BeginExecute();
// Wait for completion with cancellation support
while (!asyncResult.IsCompleted)
{
ct.ThrowIfCancellationRequested();
await Task.Delay(100, ct);
}
var result = command.EndExecute(asyncResult);
int exitCode = command.ExitStatus ?? -1;
string stdout = result ?? string.Empty;
string stderr = command.Error ?? string.Empty;
_logger.LogInformation(
"SSH command completed with exit code {ExitCode}",
exitCode);
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = exitCode == 0,
Error = exitCode != 0 ? stderr : null,
Outputs = new Dictionary<string, object>
{
["exitCode"] = exitCode,
["stdout"] = stdout,
["stderr"] = stderr,
["output"] = payload.CombineOutput ? $"{stdout}\n{stderr}".Trim() : stdout
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (SshException ex)
{
_logger.LogError(ex, "SSH command failed on {Host}", payload.Host);
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"SSH error: {ex.Message}",
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to execute SSH command on {Host}", payload.Host);
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = ex.Message,
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
}
private static string BuildCommand(ExecutePayload payload)
{
var parts = new List<string>();
// Set environment variables
if (payload.Environment is not null)
{
foreach (var (key, value) in payload.Environment)
{
parts.Add($"export {key}='{EscapeShellString(value)}'");
}
}
// Change to working directory
if (!string.IsNullOrEmpty(payload.WorkingDirectory))
{
parts.Add($"cd '{EscapeShellString(payload.WorkingDirectory)}'");
}
parts.Add(payload.Command);
return string.Join(" && ", parts);
}
private static string EscapeShellString(string value)
{
return value.Replace("'", "'\"'\"'");
}
}

View File

@@ -0,0 +1,168 @@
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Renci.SshNet;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.Ssh.Exceptions;
namespace StellaOps.Agent.Ssh.Tasks;
/// <summary>
/// Task handler for creating SSH tunnels (port forwarding).
/// </summary>
public sealed class SshTunnelTask : ISshTask
{
private readonly SshConnectionPool _connectionPool;
private readonly ILogger _logger;
/// <summary>
/// Payload for ssh.tunnel task.
/// </summary>
public sealed record TunnelPayload
{
/// <summary>
/// Target host for SSH connection.
/// </summary>
public required string Host { get; init; }
/// <summary>
/// SSH port.
/// </summary>
public int Port { get; init; } = 22;
/// <summary>
/// Username.
/// </summary>
public required string Username { get; init; }
/// <summary>
/// Local port to bind.
/// </summary>
public required int LocalPort { get; init; }
/// <summary>
/// Remote host to forward to.
/// </summary>
public required string RemoteHost { get; init; }
/// <summary>
/// Remote port to forward to.
/// </summary>
public required int RemotePort { get; init; }
/// <summary>
/// Duration to keep tunnel open.
/// </summary>
public TimeSpan Duration { get; init; } = TimeSpan.FromMinutes(30);
/// <summary>
/// Local bind address (default: 127.0.0.1).
/// </summary>
public string LocalHost { get; init; } = "127.0.0.1";
}
/// <summary>
/// Creates a new instance.
/// </summary>
public SshTunnelTask(SshConnectionPool connectionPool, ILogger logger)
{
_connectionPool = connectionPool;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
var payload = JsonSerializer.Deserialize<TunnelPayload>(task.Payload)
?? throw new InvalidSshPayloadException("ssh.tunnel");
var connectionInfo = new SshConnectionInfo
{
Host = payload.Host,
Port = payload.Port,
Username = payload.Username,
Password = task.Credentials.GetValueOrDefault("ssh.password"),
PrivateKey = task.Credentials.GetValueOrDefault("ssh.privateKey"),
PrivateKeyPassphrase = task.Credentials.GetValueOrDefault("ssh.passphrase")
};
_logger.LogInformation(
"Creating SSH tunnel: {LocalHost}:{LocalPort} -> {User}@{Host} -> {RemoteHost}:{RemotePort}",
payload.LocalHost,
payload.LocalPort,
payload.Username,
payload.Host,
payload.RemoteHost,
payload.RemotePort);
try
{
var client = await _connectionPool.GetConnectionAsync(connectionInfo, ct);
var tunnel = new ForwardedPortLocal(
payload.LocalHost,
(uint)payload.LocalPort,
payload.RemoteHost,
(uint)payload.RemotePort);
client.AddForwardedPort(tunnel);
tunnel.Start();
_logger.LogInformation(
"SSH tunnel established: {LocalHost}:{LocalPort} -> {RemoteHost}:{RemotePort}",
payload.LocalHost,
payload.LocalPort,
payload.RemoteHost,
payload.RemotePort);
// Keep tunnel open for specified duration
using var durationCts = new CancellationTokenSource(payload.Duration);
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, durationCts.Token);
try
{
await Task.Delay(payload.Duration, linkedCts.Token);
}
catch (OperationCanceledException) when (durationCts.IsCancellationRequested)
{
// Duration expired, normal completion
}
tunnel.Stop();
client.RemoveForwardedPort(tunnel);
_logger.LogInformation("SSH tunnel closed after {Duration}", payload.Duration);
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["localHost"] = payload.LocalHost,
["localPort"] = payload.LocalPort,
["remoteHost"] = payload.RemoteHost,
["remotePort"] = payload.RemotePort,
["duration"] = payload.Duration.ToString()
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
_logger.LogError(ex, "Failed to create SSH tunnel to {Host}", payload.Host);
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = ex.Message,
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
}
}

View File

@@ -0,0 +1,188 @@
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Renci.SshNet;
using Renci.SshNet.Common;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.Ssh.Exceptions;
namespace StellaOps.Agent.Ssh.Tasks;
/// <summary>
/// Task handler for uploading files via SFTP.
/// </summary>
public sealed class SshUploadTask : ISshTask
{
private readonly SshConnectionPool _connectionPool;
private readonly ILogger _logger;
/// <summary>
/// Payload for ssh.upload task.
/// </summary>
public sealed record UploadPayload
{
/// <summary>
/// Target host.
/// </summary>
public required string Host { get; init; }
/// <summary>
/// SSH port.
/// </summary>
public int Port { get; init; } = 22;
/// <summary>
/// Username.
/// </summary>
public required string Username { get; init; }
/// <summary>
/// Local file path.
/// </summary>
public required string LocalPath { get; init; }
/// <summary>
/// Remote destination path.
/// </summary>
public required string RemotePath { get; init; }
/// <summary>
/// Create parent directories if needed.
/// </summary>
public bool CreateDirectory { get; init; } = true;
/// <summary>
/// File permissions (octal).
/// </summary>
public int Permissions { get; init; } = 420; // 0644
}
/// <summary>
/// Creates a new instance.
/// </summary>
public SshUploadTask(SshConnectionPool connectionPool, ILogger logger)
{
_connectionPool = connectionPool;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
var payload = JsonSerializer.Deserialize<UploadPayload>(task.Payload)
?? throw new InvalidSshPayloadException("ssh.upload");
var connectionInfo = new SshConnectionInfo
{
Host = payload.Host,
Port = payload.Port,
Username = payload.Username,
Password = task.Credentials.GetValueOrDefault("ssh.password"),
PrivateKey = task.Credentials.GetValueOrDefault("ssh.privateKey"),
PrivateKeyPassphrase = task.Credentials.GetValueOrDefault("ssh.passphrase")
};
_logger.LogInformation(
"Uploading {Local} to {User}@{Host}:{Remote}",
payload.LocalPath,
payload.Username,
payload.Host,
payload.RemotePath);
try
{
using var sftp = await _connectionPool.GetSftpClientAsync(connectionInfo, ct);
// Create parent directory if needed
if (payload.CreateDirectory)
{
var parentDir = Path.GetDirectoryName(payload.RemotePath)?.Replace('\\', '/');
if (!string.IsNullOrEmpty(parentDir))
{
await CreateRemoteDirectoryAsync(sftp, parentDir, ct);
}
}
// Upload file
await using var localFile = File.OpenRead(payload.LocalPath);
await Task.Run(() => sftp.UploadFile(localFile, payload.RemotePath), ct);
// Set permissions
sftp.ChangePermissions(payload.RemotePath, (short)payload.Permissions);
var fileInfo = sftp.GetAttributes(payload.RemotePath);
sftp.Disconnect();
_logger.LogInformation(
"Uploaded {Size} bytes to {Remote}",
fileInfo.Size,
payload.RemotePath);
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["remotePath"] = payload.RemotePath,
["size"] = fileInfo.Size,
["permissions"] = payload.Permissions
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (SftpPathNotFoundException ex)
{
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = $"Remote path not found: {ex.Message}",
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to upload file to {Host}", payload.Host);
var completedAt = timeProvider.GetUtcNow();
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = ex.Message,
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
}
private static async Task CreateRemoteDirectoryAsync(SftpClient sftp, string path, CancellationToken ct)
{
var parts = path.Split('/').Where(p => !string.IsNullOrEmpty(p)).ToList();
var current = "";
foreach (var part in parts)
{
current = $"{current}/{part}";
try
{
var attrs = sftp.GetAttributes(current);
if (!attrs.IsDirectory)
{
throw new InvalidOperationException($"Path exists but is not a directory: {current}");
}
}
catch (SftpPathNotFoundException)
{
await Task.Run(() => sftp.CreateDirectory(current), ct);
}
}
}
}

View File

@@ -0,0 +1,121 @@
using StellaOps.Agent.Core.Exceptions;
namespace StellaOps.Agent.WinRM.Exceptions;
/// <summary>
/// Thrown when a WinRM task payload is invalid.
/// </summary>
public sealed class InvalidWinRmPayloadException : AgentException
{
/// <summary>
/// The task type with invalid payload.
/// </summary>
public string TaskType { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public InvalidWinRmPayloadException(string taskType)
: base($"Invalid payload for task type '{taskType}'")
{
TaskType = taskType;
}
}
/// <summary>
/// Thrown when WinRM connection fails.
/// </summary>
public sealed class WinRmConnectionException : AgentException
{
/// <summary>
/// The target host.
/// </summary>
public string Host { get; }
/// <summary>
/// The WinRM port.
/// </summary>
public int Port { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public WinRmConnectionException(string host, int port, string message)
: base($"Failed to connect to {host}:{port}: {message}")
{
Host = host;
Port = port;
}
}
/// <summary>
/// Thrown when PowerShell execution fails.
/// </summary>
public sealed class PowerShellExecutionException : AgentException
{
/// <summary>
/// The script that failed.
/// </summary>
public string Script { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public PowerShellExecutionException(string script, string error)
: base($"PowerShell execution failed: {error}")
{
Script = script;
}
}
/// <summary>
/// Thrown when a Windows service operation fails.
/// </summary>
public sealed class WindowsServiceException : AgentException
{
/// <summary>
/// The service name.
/// </summary>
public string ServiceName { get; }
/// <summary>
/// The operation that failed.
/// </summary>
public string Operation { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public WindowsServiceException(string serviceName, string operation, string message)
: base($"Windows service operation '{operation}' failed for '{serviceName}': {message}")
{
ServiceName = serviceName;
Operation = operation;
}
}
/// <summary>
/// Thrown when WinRM file transfer fails.
/// </summary>
public sealed class WinRmFileTransferException : AgentException
{
/// <summary>
/// The local path.
/// </summary>
public string LocalPath { get; }
/// <summary>
/// The remote path.
/// </summary>
public string RemotePath { get; }
/// <summary>
/// Creates a new instance.
/// </summary>
public WinRmFileTransferException(string localPath, string remotePath, string message)
: base($"File transfer failed between '{localPath}' and '{remotePath}': {message}")
{
LocalPath = localPath;
RemotePath = remotePath;
}
}

View File

@@ -0,0 +1,23 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<RootNamespace>StellaOps.Agent.WinRM</RootNamespace>
<AssemblyName>StellaOps.Agent.WinRM</AssemblyName>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Http" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<!-- WS-Management over HTTP for cross-platform WinRM support -->
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.Agent.Core\StellaOps.Agent.Core.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,14 @@
using StellaOps.Agent.Core.Models;
namespace StellaOps.Agent.WinRM.Tasks;
/// <summary>
/// Interface for WinRM task handlers.
/// </summary>
public interface IWinRmTask
{
/// <summary>
/// Executes the WinRM task.
/// </summary>
Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default);
}

View File

@@ -0,0 +1,169 @@
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.WinRM.Exceptions;
namespace StellaOps.Agent.WinRM.Tasks;
/// <summary>
/// Task for executing PowerShell commands via WinRM.
/// </summary>
public sealed class PowerShellTask : IWinRmTask
{
private readonly WinRmConnectionPool _connectionPool;
private readonly ILogger _logger;
/// <summary>
/// Payload for PowerShell execution.
/// </summary>
public sealed record PowerShellPayload
{
/// <summary>
/// Target Windows host.
/// </summary>
public required string Host { get; init; }
/// <summary>
/// WinRM port.
/// </summary>
public int Port { get; init; } = 5985;
/// <summary>
/// Use SSL/TLS.
/// </summary>
public bool UseSSL { get; init; }
/// <summary>
/// Username for authentication.
/// </summary>
public required string Username { get; init; }
/// <summary>
/// Password for authentication.
/// </summary>
public string? Password { get; init; }
/// <summary>
/// Windows domain.
/// </summary>
public string? Domain { get; init; }
/// <summary>
/// PowerShell script to execute.
/// </summary>
public required string Script { get; init; }
/// <summary>
/// Execution timeout.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(5);
/// <summary>
/// Whether to fail on non-zero exit code.
/// </summary>
public bool FailOnError { get; init; } = true;
}
/// <summary>
/// Creates a new PowerShell task handler.
/// </summary>
public PowerShellTask(WinRmConnectionPool connectionPool, ILogger<PowerShellTask> logger)
{
_connectionPool = connectionPool;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
PowerShellPayload? payload;
try
{
payload = JsonSerializer.Deserialize<PowerShellPayload>(task.Payload);
if (payload is null)
throw new InvalidWinRmPayloadException("winrm.powershell");
}
catch (JsonException)
{
throw new InvalidWinRmPayloadException("winrm.powershell");
}
var connectionInfo = new WinRmConnectionInfo
{
Host = payload.Host,
Port = payload.Port,
UseSSL = payload.UseSSL,
Username = payload.Username,
Password = payload.Password,
Domain = payload.Domain,
Timeout = payload.Timeout
};
try
{
_logger.LogInformation(
"Executing PowerShell script on {Host}:{Port}",
payload.Host, payload.Port);
var session = await _connectionPool.GetSessionAsync(connectionInfo, ct);
var result = await session.ExecuteAsync(payload.Script, ct);
var completedAt = timeProvider.GetUtcNow();
if (payload.FailOnError && result.HadErrors)
{
_logger.LogWarning(
"PowerShell script failed with exit code {ExitCode}: {Stderr}",
result.ExitCode, result.Stderr);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = result.Stderr,
Outputs = new Dictionary<string, object>
{
["exitCode"] = result.ExitCode,
["stdout"] = result.Stdout,
["stderr"] = result.Stderr
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
_logger.LogInformation(
"PowerShell script completed with exit code {ExitCode}",
result.ExitCode);
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["exitCode"] = result.ExitCode,
["stdout"] = result.Stdout,
["stderr"] = result.Stderr
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (HttpRequestException ex)
{
var completedAt = timeProvider.GetUtcNow();
_logger.LogError(ex, "WinRM connection failed to {Host}:{Port}", payload.Host, payload.Port);
throw new WinRmConnectionException(payload.Host, payload.Port, ex.Message);
}
catch (Exception ex) when (ex is not WinRmConnectionException and not InvalidWinRmPayloadException)
{
_logger.LogError(ex, "PowerShell execution failed on {Host}", payload.Host);
throw new PowerShellExecutionException(payload.Script, ex.Message);
}
}
}

View File

@@ -0,0 +1,360 @@
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.WinRM.Exceptions;
namespace StellaOps.Agent.WinRM.Tasks;
/// <summary>
/// Task for file transfer via WinRM using PowerShell.
/// </summary>
public sealed class WinRmFileTransferTask : IWinRmTask
{
private readonly WinRmConnectionPool _connectionPool;
private readonly ILogger _logger;
/// <summary>
/// Payload for file upload operations.
/// </summary>
public sealed record FileUploadPayload
{
/// <summary>
/// Target Windows host.
/// </summary>
public required string Host { get; init; }
/// <summary>
/// WinRM port.
/// </summary>
public int Port { get; init; } = 5985;
/// <summary>
/// Use SSL/TLS.
/// </summary>
public bool UseSSL { get; init; }
/// <summary>
/// Username for authentication.
/// </summary>
public required string Username { get; init; }
/// <summary>
/// Password for authentication.
/// </summary>
public string? Password { get; init; }
/// <summary>
/// Windows domain.
/// </summary>
public string? Domain { get; init; }
/// <summary>
/// Local file path to upload.
/// </summary>
public required string LocalPath { get; init; }
/// <summary>
/// Remote destination path.
/// </summary>
public required string RemotePath { get; init; }
/// <summary>
/// Whether to create parent directories.
/// </summary>
public bool CreateDirectories { get; init; } = true;
/// <summary>
/// Operation timeout.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(30);
}
/// <summary>
/// Payload for file download operations.
/// </summary>
public sealed record FileDownloadPayload
{
/// <summary>
/// Target Windows host.
/// </summary>
public required string Host { get; init; }
/// <summary>
/// WinRM port.
/// </summary>
public int Port { get; init; } = 5985;
/// <summary>
/// Use SSL/TLS.
/// </summary>
public bool UseSSL { get; init; }
/// <summary>
/// Username for authentication.
/// </summary>
public required string Username { get; init; }
/// <summary>
/// Password for authentication.
/// </summary>
public string? Password { get; init; }
/// <summary>
/// Windows domain.
/// </summary>
public string? Domain { get; init; }
/// <summary>
/// Remote file path to download.
/// </summary>
public required string RemotePath { get; init; }
/// <summary>
/// Local destination path.
/// </summary>
public required string LocalPath { get; init; }
/// <summary>
/// Operation timeout.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(30);
}
/// <summary>
/// Creates a new file transfer task handler.
/// </summary>
public WinRmFileTransferTask(WinRmConnectionPool connectionPool, ILogger<WinRmFileTransferTask> logger)
{
_connectionPool = connectionPool;
_logger = logger;
}
/// <summary>
/// Executes a file upload operation.
/// </summary>
public async Task<AgentTaskResult> ExecuteUploadAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
FileUploadPayload? payload;
try
{
payload = JsonSerializer.Deserialize<FileUploadPayload>(task.Payload);
if (payload is null)
throw new InvalidWinRmPayloadException("winrm.upload");
}
catch (JsonException)
{
throw new InvalidWinRmPayloadException("winrm.upload");
}
if (!File.Exists(payload.LocalPath))
{
throw new WinRmFileTransferException(
payload.LocalPath,
payload.RemotePath,
"Local file does not exist");
}
var connectionInfo = new WinRmConnectionInfo
{
Host = payload.Host,
Port = payload.Port,
UseSSL = payload.UseSSL,
Username = payload.Username,
Password = payload.Password,
Domain = payload.Domain,
Timeout = payload.Timeout
};
try
{
_logger.LogInformation(
"Uploading file {LocalPath} to {Host}:{RemotePath}",
payload.LocalPath, payload.Host, payload.RemotePath);
var session = await _connectionPool.GetSessionAsync(connectionInfo, ct);
// Create parent directory if needed
if (payload.CreateDirectories)
{
var remotePath = payload.RemotePath.Replace("'", "''");
var dirScript = $@"
$dir = Split-Path -Parent '{remotePath}'
if ($dir -and !(Test-Path $dir)) {{
New-Item -Path $dir -ItemType Directory -Force | Out-Null
}}
";
await session.ExecuteAsync(dirScript, ct);
}
// Read and upload file using Base64
var fileBytes = await File.ReadAllBytesAsync(payload.LocalPath, ct);
var base64 = Convert.ToBase64String(fileBytes);
var uploadScript = $@"
$bytes = [Convert]::FromBase64String('{base64}')
[System.IO.File]::WriteAllBytes('{payload.RemotePath.Replace("'", "''")}', $bytes)
Write-Output 'File uploaded successfully'
Get-Item '{payload.RemotePath.Replace("'", "''")}' | Select-Object FullName, Length, LastWriteTime | ConvertTo-Json
";
var result = await session.ExecuteAsync(uploadScript, ct);
var completedAt = timeProvider.GetUtcNow();
if (result.HadErrors)
{
throw new WinRmFileTransferException(
payload.LocalPath,
payload.RemotePath,
result.Stderr);
}
_logger.LogInformation(
"File upload completed: {LocalPath} -> {RemotePath}",
payload.LocalPath, payload.RemotePath);
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["localPath"] = payload.LocalPath,
["remotePath"] = payload.RemotePath,
["bytesTransferred"] = fileBytes.Length,
["output"] = result.Stdout
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (HttpRequestException ex)
{
_logger.LogError(ex, "WinRM connection failed to {Host}:{Port}", payload.Host, payload.Port);
throw new WinRmConnectionException(payload.Host, payload.Port, ex.Message);
}
catch (Exception ex) when (ex is not WinRmConnectionException
and not InvalidWinRmPayloadException
and not WinRmFileTransferException)
{
_logger.LogError(ex, "File upload failed");
throw new WinRmFileTransferException(payload.LocalPath, payload.RemotePath, ex.Message);
}
}
/// <summary>
/// Executes a file download operation.
/// </summary>
public async Task<AgentTaskResult> ExecuteDownloadAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
FileDownloadPayload? payload;
try
{
payload = JsonSerializer.Deserialize<FileDownloadPayload>(task.Payload);
if (payload is null)
throw new InvalidWinRmPayloadException("winrm.download");
}
catch (JsonException)
{
throw new InvalidWinRmPayloadException("winrm.download");
}
var connectionInfo = new WinRmConnectionInfo
{
Host = payload.Host,
Port = payload.Port,
UseSSL = payload.UseSSL,
Username = payload.Username,
Password = payload.Password,
Domain = payload.Domain,
Timeout = payload.Timeout
};
try
{
_logger.LogInformation(
"Downloading file {RemotePath} from {Host} to {LocalPath}",
payload.RemotePath, payload.Host, payload.LocalPath);
var session = await _connectionPool.GetSessionAsync(connectionInfo, ct);
// Read file as Base64
var downloadScript = $@"
$path = '{payload.RemotePath.Replace("'", "''")}'
if (!(Test-Path $path)) {{
throw ""File not found: $path""
}}
$bytes = [System.IO.File]::ReadAllBytes($path)
[Convert]::ToBase64String($bytes)
";
var result = await session.ExecuteAsync(downloadScript, ct);
var completedAt = timeProvider.GetUtcNow();
if (result.HadErrors)
{
throw new WinRmFileTransferException(
payload.LocalPath,
payload.RemotePath,
result.Stderr);
}
// Decode and write local file
var base64 = result.Stdout.Trim();
var fileBytes = Convert.FromBase64String(base64);
var localDir = Path.GetDirectoryName(payload.LocalPath);
if (!string.IsNullOrEmpty(localDir) && !Directory.Exists(localDir))
{
Directory.CreateDirectory(localDir);
}
await File.WriteAllBytesAsync(payload.LocalPath, fileBytes, ct);
_logger.LogInformation(
"File download completed: {RemotePath} -> {LocalPath}",
payload.RemotePath, payload.LocalPath);
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["localPath"] = payload.LocalPath,
["remotePath"] = payload.RemotePath,
["bytesTransferred"] = fileBytes.Length
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (HttpRequestException ex)
{
_logger.LogError(ex, "WinRM connection failed to {Host}:{Port}", payload.Host, payload.Port);
throw new WinRmConnectionException(payload.Host, payload.Port, ex.Message);
}
catch (Exception ex) when (ex is not WinRmConnectionException
and not InvalidWinRmPayloadException
and not WinRmFileTransferException)
{
_logger.LogError(ex, "File download failed");
throw new WinRmFileTransferException(payload.LocalPath, payload.RemotePath, ex.Message);
}
}
/// <inheritdoc />
public Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
// Route based on task type
return task.TaskType.ToLowerInvariant() switch
{
"winrm.upload" => ExecuteUploadAsync(task, timeProvider, ct),
"winrm.download" => ExecuteDownloadAsync(task, timeProvider, ct),
_ => throw new InvalidWinRmPayloadException(task.TaskType)
};
}
}

View File

@@ -0,0 +1,348 @@
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.WinRM.Exceptions;
namespace StellaOps.Agent.WinRM.Tasks;
/// <summary>
/// Task for managing Windows containers via WinRM.
/// </summary>
public sealed class WindowsContainerTask : IWinRmTask
{
private readonly WinRmConnectionPool _connectionPool;
private readonly ILogger _logger;
/// <summary>
/// Payload for Windows container operations.
/// </summary>
public sealed record WindowsContainerPayload
{
/// <summary>
/// Target Windows host.
/// </summary>
public required string Host { get; init; }
/// <summary>
/// WinRM port.
/// </summary>
public int Port { get; init; } = 5985;
/// <summary>
/// Use SSL/TLS.
/// </summary>
public bool UseSSL { get; init; }
/// <summary>
/// Username for authentication.
/// </summary>
public required string Username { get; init; }
/// <summary>
/// Password for authentication.
/// </summary>
public string? Password { get; init; }
/// <summary>
/// Windows domain.
/// </summary>
public string? Domain { get; init; }
/// <summary>
/// Container operation to perform.
/// </summary>
public required ContainerOperation Operation { get; init; }
/// <summary>
/// Container name or ID (for existing containers).
/// </summary>
public string? ContainerName { get; init; }
/// <summary>
/// Image reference (for create/run operations).
/// </summary>
public string? Image { get; init; }
/// <summary>
/// Environment variables.
/// </summary>
public Dictionary<string, string>? Environment { get; init; }
/// <summary>
/// Port mappings (host:container).
/// </summary>
public List<string>? Ports { get; init; }
/// <summary>
/// Volume mounts (host:container).
/// </summary>
public List<string>? Volumes { get; init; }
/// <summary>
/// Additional Docker run arguments.
/// </summary>
public string? ExtraArgs { get; init; }
/// <summary>
/// Command timeout.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(10);
/// <summary>
/// Whether to remove existing container before creating.
/// </summary>
public bool ForceRecreate { get; init; }
}
/// <summary>
/// Windows container operations.
/// </summary>
public enum ContainerOperation
{
/// <summary>
/// Pull a container image.
/// </summary>
Pull,
/// <summary>
/// Create and start a container.
/// </summary>
Run,
/// <summary>
/// Start an existing container.
/// </summary>
Start,
/// <summary>
/// Stop a running container.
/// </summary>
Stop,
/// <summary>
/// Remove a container.
/// </summary>
Remove,
/// <summary>
/// Get container status.
/// </summary>
Status,
/// <summary>
/// List containers.
/// </summary>
List,
/// <summary>
/// Get container logs.
/// </summary>
Logs
}
/// <summary>
/// Creates a new Windows container task handler.
/// </summary>
public WindowsContainerTask(WinRmConnectionPool connectionPool, ILogger<WindowsContainerTask> logger)
{
_connectionPool = connectionPool;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
WindowsContainerPayload? payload;
try
{
payload = JsonSerializer.Deserialize<WindowsContainerPayload>(task.Payload);
if (payload is null)
throw new InvalidWinRmPayloadException("winrm.container");
}
catch (JsonException)
{
throw new InvalidWinRmPayloadException("winrm.container");
}
ValidatePayload(payload);
var connectionInfo = new WinRmConnectionInfo
{
Host = payload.Host,
Port = payload.Port,
UseSSL = payload.UseSSL,
Username = payload.Username,
Password = payload.Password,
Domain = payload.Domain,
Timeout = payload.Timeout
};
try
{
_logger.LogInformation(
"Performing container {Operation} on {Host}",
payload.Operation, payload.Host);
var session = await _connectionPool.GetSessionAsync(connectionInfo, ct);
var script = GenerateScript(payload);
var result = await session.ExecuteAsync(script, ct);
var completedAt = timeProvider.GetUtcNow();
if (result.HadErrors)
{
_logger.LogWarning(
"Container operation {Operation} failed: {Stderr}",
payload.Operation, result.Stderr);
return new AgentTaskResult
{
TaskId = task.Id,
Success = false,
Error = result.Stderr,
Outputs = new Dictionary<string, object>
{
["operation"] = payload.Operation.ToString(),
["stdout"] = result.Stdout,
["stderr"] = result.Stderr,
["exitCode"] = result.ExitCode
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
_logger.LogInformation(
"Container operation {Operation} completed successfully",
payload.Operation);
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["operation"] = payload.Operation.ToString(),
["containerName"] = payload.ContainerName ?? string.Empty,
["output"] = result.Stdout,
["exitCode"] = result.ExitCode
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (HttpRequestException ex)
{
_logger.LogError(ex, "WinRM connection failed to {Host}:{Port}", payload.Host, payload.Port);
throw new WinRmConnectionException(payload.Host, payload.Port, ex.Message);
}
}
private static void ValidatePayload(WindowsContainerPayload payload)
{
switch (payload.Operation)
{
case ContainerOperation.Pull:
if (string.IsNullOrEmpty(payload.Image))
throw new InvalidWinRmPayloadException("winrm.container: Image required for Pull operation");
break;
case ContainerOperation.Run:
if (string.IsNullOrEmpty(payload.Image))
throw new InvalidWinRmPayloadException("winrm.container: Image required for Run operation");
if (string.IsNullOrEmpty(payload.ContainerName))
throw new InvalidWinRmPayloadException("winrm.container: ContainerName required for Run operation");
break;
case ContainerOperation.Start:
case ContainerOperation.Stop:
case ContainerOperation.Remove:
case ContainerOperation.Status:
case ContainerOperation.Logs:
if (string.IsNullOrEmpty(payload.ContainerName))
throw new InvalidWinRmPayloadException($"winrm.container: ContainerName required for {payload.Operation} operation");
break;
}
}
private static string GenerateScript(WindowsContainerPayload payload)
{
return payload.Operation switch
{
ContainerOperation.Pull => GeneratePullScript(payload),
ContainerOperation.Run => GenerateRunScript(payload),
ContainerOperation.Start => $"docker start {EscapeName(payload.ContainerName!)}; docker inspect {EscapeName(payload.ContainerName!)} --format '{{{{json .State}}}}'",
ContainerOperation.Stop => $"docker stop {EscapeName(payload.ContainerName!)}; docker inspect {EscapeName(payload.ContainerName!)} --format '{{{{json .State}}}}'",
ContainerOperation.Remove => $"docker rm -f {EscapeName(payload.ContainerName!)}",
ContainerOperation.Status => $"docker inspect {EscapeName(payload.ContainerName!)} --format '{{{{json .State}}}}'",
ContainerOperation.List => "docker ps -a --format '{{json .}}'",
ContainerOperation.Logs => $"docker logs --tail 500 {EscapeName(payload.ContainerName!)}",
_ => throw new ArgumentOutOfRangeException(nameof(payload.Operation))
};
}
private static string GeneratePullScript(WindowsContainerPayload payload)
{
var image = EscapeName(payload.Image!);
return $"docker pull {image}; docker images {image} --format '{{{{json .}}}}'";
}
private static string GenerateRunScript(WindowsContainerPayload payload)
{
var parts = new List<string> { "docker run -d" };
parts.Add($"--name {EscapeName(payload.ContainerName!)}");
if (payload.Environment != null)
{
foreach (var kvp in payload.Environment)
{
parts.Add($"-e \"{EscapeValue(kvp.Key)}={EscapeValue(kvp.Value)}\"");
}
}
if (payload.Ports != null)
{
foreach (var port in payload.Ports)
{
parts.Add($"-p {EscapeValue(port)}");
}
}
if (payload.Volumes != null)
{
foreach (var volume in payload.Volumes)
{
parts.Add($"-v \"{EscapeValue(volume)}\"");
}
}
if (!string.IsNullOrEmpty(payload.ExtraArgs))
{
parts.Add(payload.ExtraArgs);
}
parts.Add(EscapeName(payload.Image!));
var runCommand = string.Join(" ", parts);
if (payload.ForceRecreate)
{
return $@"
$existing = docker ps -aq -f name='^{EscapeName(payload.ContainerName!)}$'
if ($existing) {{
docker rm -f $existing
}}
{runCommand}
docker inspect {EscapeName(payload.ContainerName!)} --format '{{{{json .State}}}}'
";
}
return $"{runCommand}; docker inspect {EscapeName(payload.ContainerName!)} --format '{{{{json .State}}}}'";
}
private static string EscapeName(string name) => name.Replace("'", "''").Replace("\"", "`\"");
private static string EscapeValue(string value) => value.Replace("\"", "`\"");
}

View File

@@ -0,0 +1,241 @@
using System.Text.Json;
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.WinRM.Exceptions;
namespace StellaOps.Agent.WinRM.Tasks;
/// <summary>
/// Task for managing Windows services via WinRM.
/// </summary>
public sealed class WindowsServiceTask : IWinRmTask
{
private readonly WinRmConnectionPool _connectionPool;
private readonly ILogger _logger;
/// <summary>
/// Payload for Windows service operations.
/// </summary>
public sealed record WindowsServicePayload
{
/// <summary>
/// Target Windows host.
/// </summary>
public required string Host { get; init; }
/// <summary>
/// WinRM port.
/// </summary>
public int Port { get; init; } = 5985;
/// <summary>
/// Use SSL/TLS.
/// </summary>
public bool UseSSL { get; init; }
/// <summary>
/// Username for authentication.
/// </summary>
public required string Username { get; init; }
/// <summary>
/// Password for authentication.
/// </summary>
public string? Password { get; init; }
/// <summary>
/// Windows domain.
/// </summary>
public string? Domain { get; init; }
/// <summary>
/// Service name.
/// </summary>
public required string ServiceName { get; init; }
/// <summary>
/// Service operation to perform.
/// </summary>
public required ServiceOperation Operation { get; init; }
/// <summary>
/// Timeout for service operations.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(2);
/// <summary>
/// Wait for service to reach target state.
/// </summary>
public bool WaitForState { get; init; } = true;
}
/// <summary>
/// Windows service operations.
/// </summary>
public enum ServiceOperation
{
/// <summary>
/// Start the service.
/// </summary>
Start,
/// <summary>
/// Stop the service.
/// </summary>
Stop,
/// <summary>
/// Restart the service.
/// </summary>
Restart,
/// <summary>
/// Get service status.
/// </summary>
Status,
/// <summary>
/// Enable the service (set to automatic start).
/// </summary>
Enable,
/// <summary>
/// Disable the service.
/// </summary>
Disable
}
/// <summary>
/// Creates a new Windows service task handler.
/// </summary>
public WindowsServiceTask(WinRmConnectionPool connectionPool, ILogger<WindowsServiceTask> logger)
{
_connectionPool = connectionPool;
_logger = logger;
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
{
var startTime = timeProvider.GetUtcNow();
WindowsServicePayload? payload;
try
{
payload = JsonSerializer.Deserialize<WindowsServicePayload>(task.Payload);
if (payload is null)
throw new InvalidWinRmPayloadException("winrm.service");
}
catch (JsonException)
{
throw new InvalidWinRmPayloadException("winrm.service");
}
var connectionInfo = new WinRmConnectionInfo
{
Host = payload.Host,
Port = payload.Port,
UseSSL = payload.UseSSL,
Username = payload.Username,
Password = payload.Password,
Domain = payload.Domain,
Timeout = payload.Timeout
};
try
{
_logger.LogInformation(
"Performing {Operation} on service {ServiceName} on {Host}",
payload.Operation, payload.ServiceName, payload.Host);
var session = await _connectionPool.GetSessionAsync(connectionInfo, ct);
var script = GenerateScript(payload);
var result = await session.ExecuteAsync(script, ct);
var completedAt = timeProvider.GetUtcNow();
if (result.HadErrors)
{
_logger.LogWarning(
"Service operation {Operation} failed for {ServiceName}: {Stderr}",
payload.Operation, payload.ServiceName, result.Stderr);
throw new WindowsServiceException(
payload.ServiceName,
payload.Operation.ToString(),
result.Stderr);
}
_logger.LogInformation(
"Service operation {Operation} completed for {ServiceName}",
payload.Operation, payload.ServiceName);
return new AgentTaskResult
{
TaskId = task.Id,
Success = true,
Outputs = new Dictionary<string, object>
{
["serviceName"] = payload.ServiceName,
["operation"] = payload.Operation.ToString(),
["output"] = result.Stdout,
["exitCode"] = result.ExitCode
},
CompletedAt = completedAt,
Duration = completedAt - startTime
};
}
catch (HttpRequestException ex)
{
_logger.LogError(ex, "WinRM connection failed to {Host}:{Port}", payload.Host, payload.Port);
throw new WinRmConnectionException(payload.Host, payload.Port, ex.Message);
}
catch (Exception ex) when (ex is not WinRmConnectionException
and not InvalidWinRmPayloadException
and not WindowsServiceException)
{
_logger.LogError(ex, "Service operation failed for {ServiceName}", payload.ServiceName);
throw new WindowsServiceException(payload.ServiceName, payload.Operation.ToString(), ex.Message);
}
}
private static string GenerateScript(WindowsServicePayload payload)
{
var serviceName = payload.ServiceName.Replace("'", "''");
var waitClause = payload.WaitForState ? "-Wait" : "";
return payload.Operation switch
{
ServiceOperation.Start => $@"
$service = Get-Service -Name '{serviceName}' -ErrorAction Stop
if ($service.Status -ne 'Running') {{
Start-Service -Name '{serviceName}' {waitClause} -ErrorAction Stop
}}
Get-Service -Name '{serviceName}' | Select-Object Name, Status, StartType | ConvertTo-Json
",
ServiceOperation.Stop => $@"
$service = Get-Service -Name '{serviceName}' -ErrorAction Stop
if ($service.Status -ne 'Stopped') {{
Stop-Service -Name '{serviceName}' {waitClause} -Force -ErrorAction Stop
}}
Get-Service -Name '{serviceName}' | Select-Object Name, Status, StartType | ConvertTo-Json
",
ServiceOperation.Restart => $@"
Restart-Service -Name '{serviceName}' {waitClause} -Force -ErrorAction Stop
Get-Service -Name '{serviceName}' | Select-Object Name, Status, StartType | ConvertTo-Json
",
ServiceOperation.Status => $@"
Get-Service -Name '{serviceName}' -ErrorAction Stop | Select-Object Name, Status, StartType, DisplayName | ConvertTo-Json
",
ServiceOperation.Enable => $@"
Set-Service -Name '{serviceName}' -StartupType Automatic -ErrorAction Stop
Get-Service -Name '{serviceName}' | Select-Object Name, Status, StartType | ConvertTo-Json
",
ServiceOperation.Disable => $@"
Set-Service -Name '{serviceName}' -StartupType Disabled -ErrorAction Stop
Get-Service -Name '{serviceName}' | Select-Object Name, Status, StartType | ConvertTo-Json
",
_ => throw new ArgumentOutOfRangeException(nameof(payload.Operation))
};
}
}

View File

@@ -0,0 +1,121 @@
using Microsoft.Extensions.Logging;
using StellaOps.Agent.Core.Capability;
using StellaOps.Agent.Core.Models;
using StellaOps.Agent.WinRM.Exceptions;
using StellaOps.Agent.WinRM.Tasks;
namespace StellaOps.Agent.WinRM;
/// <summary>
/// WinRM capability for Windows remote management via WS-Management protocol.
/// </summary>
public sealed class WinRmCapability : IAgentCapability, IAsyncDisposable
{
private readonly WinRmConnectionPool _connectionPool;
private readonly TimeProvider _timeProvider;
private readonly ILogger<WinRmCapability> _logger;
private readonly Dictionary<string, IWinRmTask> _taskHandlers;
/// <inheritdoc />
public string Name => "winrm";
/// <inheritdoc />
public string Version => "1.0.0";
/// <inheritdoc />
public IReadOnlyList<string> SupportedTaskTypes => new[]
{
"winrm.powershell",
"winrm.service",
"winrm.container",
"winrm.upload",
"winrm.download"
};
/// <summary>
/// Creates a new WinRM capability.
/// </summary>
public WinRmCapability(
IHttpClientFactory httpClientFactory,
TimeProvider timeProvider,
ILoggerFactory loggerFactory)
{
_timeProvider = timeProvider;
_logger = loggerFactory.CreateLogger<WinRmCapability>();
_connectionPool = new WinRmConnectionPool(
httpClientFactory,
loggerFactory.CreateLogger<WinRmConnectionPool>());
var powerShellTask = new PowerShellTask(
_connectionPool,
loggerFactory.CreateLogger<PowerShellTask>());
var serviceTask = new WindowsServiceTask(
_connectionPool,
loggerFactory.CreateLogger<WindowsServiceTask>());
var containerTask = new WindowsContainerTask(
_connectionPool,
loggerFactory.CreateLogger<WindowsContainerTask>());
var fileTransferTask = new WinRmFileTransferTask(
_connectionPool,
loggerFactory.CreateLogger<WinRmFileTransferTask>());
_taskHandlers = new Dictionary<string, IWinRmTask>(StringComparer.OrdinalIgnoreCase)
{
["winrm.powershell"] = powerShellTask,
["winrm.service"] = serviceTask,
["winrm.container"] = containerTask,
["winrm.upload"] = fileTransferTask,
["winrm.download"] = fileTransferTask
};
}
/// <summary>
/// Creates a new WinRM capability with explicit dependencies for testing.
/// </summary>
public WinRmCapability(
WinRmConnectionPool connectionPool,
TimeProvider timeProvider,
ILogger<WinRmCapability> logger,
Dictionary<string, IWinRmTask> taskHandlers)
{
_connectionPool = connectionPool;
_timeProvider = timeProvider;
_logger = logger;
_taskHandlers = taskHandlers;
}
/// <inheritdoc />
public Task<bool> InitializeAsync(CancellationToken ct = default)
{
_logger.LogInformation("WinRM capability initialized");
return Task.FromResult(true);
}
/// <inheritdoc />
public Task<CapabilityHealthStatus> CheckHealthAsync(CancellationToken ct = default)
{
// WinRM capability is healthy if we can create instances
// Actual connection health is checked per-task
return Task.FromResult(new CapabilityHealthStatus(true, "WinRM capability ready"));
}
/// <inheritdoc />
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
{
if (!_taskHandlers.TryGetValue(task.TaskType, out var handler))
{
throw new InvalidWinRmPayloadException($"Unsupported task type: {task.TaskType}");
}
_logger.LogDebug("Executing WinRM task type: {TaskType} with ID {TaskId}", task.TaskType, task.Id);
return await handler.ExecuteAsync(task, _timeProvider, ct);
}
/// <inheritdoc />
public async ValueTask DisposeAsync()
{
await _connectionPool.DisposeAsync();
_logger.LogDebug("WinRM capability disposed");
}
}

View File

@@ -0,0 +1,93 @@
namespace StellaOps.Agent.WinRM;
/// <summary>
/// WinRM connection information.
/// </summary>
public sealed record WinRmConnectionInfo
{
/// <summary>
/// Target Windows host.
/// </summary>
public required string Host { get; init; }
/// <summary>
/// WinRM port (5985 for HTTP, 5986 for HTTPS).
/// </summary>
public int Port { get; init; } = 5985;
/// <summary>
/// Use SSL/TLS.
/// </summary>
public bool UseSSL { get; init; }
/// <summary>
/// Username for authentication.
/// </summary>
public required string Username { get; init; }
/// <summary>
/// Password for authentication.
/// </summary>
public string? Password { get; init; }
/// <summary>
/// Windows domain (optional).
/// </summary>
public string? Domain { get; init; }
/// <summary>
/// Authentication mechanism.
/// </summary>
public WinRmAuthMechanism AuthMechanism { get; init; } = WinRmAuthMechanism.Negotiate;
/// <summary>
/// Connection timeout.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromSeconds(30);
/// <summary>
/// Gets a unique connection key.
/// </summary>
public string GetConnectionKey()
{
var userPart = string.IsNullOrEmpty(Domain)
? Username
: $"{Domain}\\{Username}";
return $"{userPart}@{Host}:{Port}";
}
/// <summary>
/// Gets the WinRM endpoint URL.
/// </summary>
public string GetEndpointUrl()
{
var scheme = UseSSL ? "https" : "http";
return $"{scheme}://{Host}:{Port}/wsman";
}
}
/// <summary>
/// WinRM authentication mechanisms.
/// </summary>
public enum WinRmAuthMechanism
{
/// <summary>
/// Basic authentication (username/password over HTTPS).
/// </summary>
Basic,
/// <summary>
/// Negotiate (NTLM or Kerberos).
/// </summary>
Negotiate,
/// <summary>
/// Kerberos authentication.
/// </summary>
Kerberos,
/// <summary>
/// CredSSP (Credential Security Support Provider).
/// </summary>
CredSSP
}

View File

@@ -0,0 +1,172 @@
using System.Collections.Concurrent;
using Microsoft.Extensions.Logging;
namespace StellaOps.Agent.WinRM;
/// <summary>
/// Connection pool for WinRM sessions.
/// </summary>
public sealed class WinRmConnectionPool : IAsyncDisposable
{
private readonly ConcurrentDictionary<string, PooledSession> _sessions = new();
private readonly IHttpClientFactory _httpClientFactory;
private readonly ILogger _logger;
private readonly TimeSpan _idleTimeout;
private readonly Timer _cleanupTimer;
private bool _disposed;
/// <summary>
/// Creates a new WinRM connection pool.
/// </summary>
public WinRmConnectionPool(
IHttpClientFactory httpClientFactory,
ILogger<WinRmConnectionPool> logger,
TimeSpan? idleTimeout = null)
{
_httpClientFactory = httpClientFactory;
_logger = logger;
_idleTimeout = idleTimeout ?? TimeSpan.FromMinutes(5);
_cleanupTimer = new Timer(CleanupIdleSessions, null, _idleTimeout, _idleTimeout);
}
/// <summary>
/// Gets or creates a WinRM session for the given connection info.
/// </summary>
public async Task<WinRmSession> GetSessionAsync(WinRmConnectionInfo connectionInfo, CancellationToken ct = default)
{
ObjectDisposedException.ThrowIf(_disposed, this);
var key = connectionInfo.GetConnectionKey();
if (_sessions.TryGetValue(key, out var pooled) && !pooled.IsExpired(_idleTimeout))
{
pooled.Touch();
return pooled.Session;
}
// Create new session
var httpClient = CreateHttpClient(connectionInfo);
var session = new WinRmSession(connectionInfo, httpClient, _logger);
await session.ConnectAsync(ct);
var newPooled = new PooledSession(session, httpClient);
_sessions[key] = newPooled;
_logger.LogDebug("Created new WinRM session for {Key}", key);
return session;
}
/// <summary>
/// Removes a session from the pool.
/// </summary>
public async Task RemoveSessionAsync(WinRmConnectionInfo connectionInfo, CancellationToken ct = default)
{
var key = connectionInfo.GetConnectionKey();
if (_sessions.TryRemove(key, out var pooled))
{
await pooled.DisposeAsync();
_logger.LogDebug("Removed WinRM session for {Key}", key);
}
}
private HttpClient CreateHttpClient(WinRmConnectionInfo connectionInfo)
{
var client = _httpClientFactory.CreateClient("WinRM");
client.Timeout = connectionInfo.Timeout;
// Set up authentication based on mechanism
var credentials = CreateCredentials(connectionInfo);
if (credentials != null)
{
// Note: In production, use HttpClientHandler with credentials
// For Basic auth, set Authorization header directly
if (connectionInfo.AuthMechanism == WinRmAuthMechanism.Basic)
{
var authValue = Convert.ToBase64String(
System.Text.Encoding.UTF8.GetBytes(
$"{connectionInfo.Username}:{connectionInfo.Password}"));
client.DefaultRequestHeaders.Authorization =
new System.Net.Http.Headers.AuthenticationHeaderValue("Basic", authValue);
}
}
return client;
}
private static System.Net.NetworkCredential? CreateCredentials(WinRmConnectionInfo connectionInfo)
{
if (string.IsNullOrEmpty(connectionInfo.Password))
return null;
return new System.Net.NetworkCredential(
connectionInfo.Username,
connectionInfo.Password,
connectionInfo.Domain ?? string.Empty);
}
private void CleanupIdleSessions(object? state)
{
if (_disposed)
return;
foreach (var kvp in _sessions)
{
if (kvp.Value.IsExpired(_idleTimeout))
{
if (_sessions.TryRemove(kvp.Key, out var pooled))
{
_ = pooled.DisposeAsync();
_logger.LogDebug("Cleaned up idle WinRM session for {Key}", kvp.Key);
}
}
}
}
/// <inheritdoc />
public async ValueTask DisposeAsync()
{
if (_disposed)
return;
_disposed = true;
await _cleanupTimer.DisposeAsync();
foreach (var kvp in _sessions)
{
await kvp.Value.DisposeAsync();
}
_sessions.Clear();
}
private sealed class PooledSession : IAsyncDisposable
{
private readonly HttpClient _httpClient;
private DateTimeOffset _lastUsed;
public WinRmSession Session { get; }
public PooledSession(WinRmSession session, HttpClient httpClient)
{
Session = session;
_httpClient = httpClient;
_lastUsed = DateTimeOffset.UtcNow;
}
public void Touch() => _lastUsed = DateTimeOffset.UtcNow;
public bool IsExpired(TimeSpan idleTimeout) =>
DateTimeOffset.UtcNow - _lastUsed > idleTimeout;
public async ValueTask DisposeAsync()
{
await Session.CloseAsync();
Session.Dispose();
_httpClient.Dispose();
}
}
}

View File

@@ -0,0 +1,344 @@
using System.Net;
using System.Net.Http.Headers;
using System.Text;
using System.Xml.Linq;
using Microsoft.Extensions.Logging;
namespace StellaOps.Agent.WinRM;
/// <summary>
/// WinRM session for executing remote PowerShell commands.
/// Uses WS-Management protocol over HTTP(S).
/// </summary>
public sealed class WinRmSession : IDisposable
{
private static readonly XNamespace WsMan = "http://schemas.dmtf.org/wbem/wsman/1/wsman.xsd";
private static readonly XNamespace WsEnum = "http://schemas.xmlsoap.org/ws/2004/09/enumeration";
private static readonly XNamespace WsAddr = "http://schemas.xmlsoap.org/ws/2004/08/addressing";
private static readonly XNamespace WsShell = "http://schemas.microsoft.com/wbem/wsman/1/windows/shell";
private static readonly XNamespace Soap = "http://www.w3.org/2003/05/soap-envelope";
private readonly HttpClient _httpClient;
private readonly WinRmConnectionInfo _connectionInfo;
private readonly ILogger _logger;
private string? _shellId;
private bool _disposed;
/// <summary>
/// Creates a new WinRM session.
/// </summary>
public WinRmSession(WinRmConnectionInfo connectionInfo, HttpClient httpClient, ILogger logger)
{
_connectionInfo = connectionInfo;
_httpClient = httpClient;
_logger = logger;
}
/// <summary>
/// Connects to the WinRM service.
/// </summary>
public async Task ConnectAsync(CancellationToken ct = default)
{
ObjectDisposedException.ThrowIf(_disposed, this);
_logger.LogDebug("Creating WinRM shell on {Host}", _connectionInfo.Host);
var envelope = CreateShellEnvelope();
var response = await SendRequestAsync(envelope, ct);
// Parse shell ID from response
_shellId = ExtractShellId(response);
_logger.LogDebug("WinRM shell created: {ShellId}", _shellId);
}
/// <summary>
/// Executes a PowerShell command.
/// </summary>
public async Task<PowerShellResult> ExecuteAsync(string command, CancellationToken ct = default)
{
ObjectDisposedException.ThrowIf(_disposed, this);
if (_shellId is null)
{
await ConnectAsync(ct);
}
_logger.LogDebug("Executing PowerShell command on {Host}", _connectionInfo.Host);
// Send command
var commandEnvelope = CreateCommandEnvelope(command);
var commandResponse = await SendRequestAsync(commandEnvelope, ct);
var commandId = ExtractCommandId(commandResponse);
// Receive output
var (stdout, stderr, exitCode) = await ReceiveOutputAsync(commandId!, ct);
return new PowerShellResult
{
Stdout = stdout,
Stderr = stderr,
ExitCode = exitCode,
HadErrors = exitCode != 0 || !string.IsNullOrEmpty(stderr)
};
}
/// <summary>
/// Closes the WinRM session.
/// </summary>
public async Task CloseAsync(CancellationToken ct = default)
{
if (_shellId is null)
return;
try
{
var deleteEnvelope = CreateDeleteEnvelope();
await SendRequestAsync(deleteEnvelope, ct);
_logger.LogDebug("WinRM shell deleted: {ShellId}", _shellId);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Error deleting WinRM shell");
}
_shellId = null;
}
private async Task<string> SendRequestAsync(XDocument envelope, CancellationToken ct)
{
var content = new StringContent(envelope.ToString(), Encoding.UTF8, "application/soap+xml");
content.Headers.ContentType = new MediaTypeHeaderValue("application/soap+xml")
{
CharSet = "UTF-8"
};
var response = await _httpClient.PostAsync(_connectionInfo.GetEndpointUrl(), content, ct);
if (!response.IsSuccessStatusCode)
{
var errorContent = await response.Content.ReadAsStringAsync(ct);
throw new HttpRequestException($"WinRM request failed with status {response.StatusCode}: {errorContent}");
}
return await response.Content.ReadAsStringAsync(ct);
}
private XDocument CreateShellEnvelope()
{
var messageId = Guid.NewGuid().ToString();
return new XDocument(
new XElement(Soap + "Envelope",
new XAttribute(XNamespace.Xmlns + "s", Soap.NamespaceName),
new XAttribute(XNamespace.Xmlns + "a", WsAddr.NamespaceName),
new XAttribute(XNamespace.Xmlns + "w", WsMan.NamespaceName),
new XAttribute(XNamespace.Xmlns + "rsp", WsShell.NamespaceName),
new XElement(Soap + "Header",
new XElement(WsAddr + "To", _connectionInfo.GetEndpointUrl()),
new XElement(WsAddr + "ReplyTo",
new XElement(WsAddr + "Address", "http://schemas.xmlsoap.org/ws/2004/08/addressing/role/anonymous")),
new XElement(WsMan + "ResourceURI", "http://schemas.microsoft.com/wbem/wsman/1/windows/shell/cmd",
new XAttribute(Soap + "mustUnderstand", "true")),
new XElement(WsAddr + "Action", "http://schemas.xmlsoap.org/ws/2004/09/transfer/Create",
new XAttribute(Soap + "mustUnderstand", "true")),
new XElement(WsMan + "MaxEnvelopeSize", "512000",
new XAttribute(Soap + "mustUnderstand", "true")),
new XElement(WsAddr + "MessageID", $"uuid:{messageId}"),
new XElement(WsMan + "OperationTimeout", "PT60S"),
new XElement(WsMan + "OptionSet",
new XElement(WsMan + "Option", "TRUE", new XAttribute("Name", "WINRS_NOPROFILE")),
new XElement(WsMan + "Option", "437", new XAttribute("Name", "WINRS_CODEPAGE")))),
new XElement(Soap + "Body",
new XElement(WsShell + "Shell",
new XElement(WsShell + "InputStreams", "stdin"),
new XElement(WsShell + "OutputStreams", "stdout stderr")))));
}
private XDocument CreateCommandEnvelope(string command)
{
var messageId = Guid.NewGuid().ToString();
var encodedCommand = Convert.ToBase64String(Encoding.Unicode.GetBytes(command));
return new XDocument(
new XElement(Soap + "Envelope",
new XAttribute(XNamespace.Xmlns + "s", Soap.NamespaceName),
new XAttribute(XNamespace.Xmlns + "a", WsAddr.NamespaceName),
new XAttribute(XNamespace.Xmlns + "w", WsMan.NamespaceName),
new XAttribute(XNamespace.Xmlns + "rsp", WsShell.NamespaceName),
new XElement(Soap + "Header",
new XElement(WsAddr + "To", _connectionInfo.GetEndpointUrl()),
new XElement(WsMan + "ResourceURI", "http://schemas.microsoft.com/wbem/wsman/1/windows/shell/cmd",
new XAttribute(Soap + "mustUnderstand", "true")),
new XElement(WsAddr + "Action", "http://schemas.microsoft.com/wbem/wsman/1/windows/shell/Command",
new XAttribute(Soap + "mustUnderstand", "true")),
new XElement(WsMan + "MaxEnvelopeSize", "512000",
new XAttribute(Soap + "mustUnderstand", "true")),
new XElement(WsAddr + "MessageID", $"uuid:{messageId}"),
new XElement(WsMan + "OperationTimeout", "PT60S"),
new XElement(WsMan + "SelectorSet",
new XElement(WsMan + "Selector", _shellId, new XAttribute("Name", "ShellId")))),
new XElement(Soap + "Body",
new XElement(WsShell + "CommandLine",
new XElement(WsShell + "Command", $"powershell -encodedcommand {encodedCommand}")))));
}
private XDocument CreateReceiveEnvelope(string commandId)
{
var messageId = Guid.NewGuid().ToString();
return new XDocument(
new XElement(Soap + "Envelope",
new XAttribute(XNamespace.Xmlns + "s", Soap.NamespaceName),
new XAttribute(XNamespace.Xmlns + "a", WsAddr.NamespaceName),
new XAttribute(XNamespace.Xmlns + "w", WsMan.NamespaceName),
new XAttribute(XNamespace.Xmlns + "rsp", WsShell.NamespaceName),
new XElement(Soap + "Header",
new XElement(WsAddr + "To", _connectionInfo.GetEndpointUrl()),
new XElement(WsMan + "ResourceURI", "http://schemas.microsoft.com/wbem/wsman/1/windows/shell/cmd",
new XAttribute(Soap + "mustUnderstand", "true")),
new XElement(WsAddr + "Action", "http://schemas.microsoft.com/wbem/wsman/1/windows/shell/Receive",
new XAttribute(Soap + "mustUnderstand", "true")),
new XElement(WsMan + "MaxEnvelopeSize", "512000",
new XAttribute(Soap + "mustUnderstand", "true")),
new XElement(WsAddr + "MessageID", $"uuid:{messageId}"),
new XElement(WsMan + "OperationTimeout", "PT60S"),
new XElement(WsMan + "SelectorSet",
new XElement(WsMan + "Selector", _shellId, new XAttribute("Name", "ShellId")))),
new XElement(Soap + "Body",
new XElement(WsShell + "Receive",
new XElement(WsShell + "DesiredStream", "stdout stderr",
new XAttribute("CommandId", commandId))))));
}
private XDocument CreateDeleteEnvelope()
{
var messageId = Guid.NewGuid().ToString();
return new XDocument(
new XElement(Soap + "Envelope",
new XAttribute(XNamespace.Xmlns + "s", Soap.NamespaceName),
new XAttribute(XNamespace.Xmlns + "a", WsAddr.NamespaceName),
new XAttribute(XNamespace.Xmlns + "w", WsMan.NamespaceName),
new XElement(Soap + "Header",
new XElement(WsAddr + "To", _connectionInfo.GetEndpointUrl()),
new XElement(WsMan + "ResourceURI", "http://schemas.microsoft.com/wbem/wsman/1/windows/shell/cmd",
new XAttribute(Soap + "mustUnderstand", "true")),
new XElement(WsAddr + "Action", "http://schemas.xmlsoap.org/ws/2004/09/transfer/Delete",
new XAttribute(Soap + "mustUnderstand", "true")),
new XElement(WsMan + "MaxEnvelopeSize", "512000",
new XAttribute(Soap + "mustUnderstand", "true")),
new XElement(WsAddr + "MessageID", $"uuid:{messageId}"),
new XElement(WsMan + "OperationTimeout", "PT60S"),
new XElement(WsMan + "SelectorSet",
new XElement(WsMan + "Selector", _shellId, new XAttribute("Name", "ShellId")))),
new XElement(Soap + "Body")));
}
private static string? ExtractShellId(string response)
{
var doc = XDocument.Parse(response);
var shellId = doc.Descendants(WsShell + "Shell")
.FirstOrDefault()?
.Attribute("ShellId")?
.Value;
// Also try getting from Selector
shellId ??= doc.Descendants(WsMan + "Selector")
.FirstOrDefault(e => e.Attribute("Name")?.Value == "ShellId")?
.Value;
return shellId;
}
private static string? ExtractCommandId(string response)
{
var doc = XDocument.Parse(response);
return doc.Descendants(WsShell + "CommandId").FirstOrDefault()?.Value;
}
private async Task<(string Stdout, string Stderr, int ExitCode)> ReceiveOutputAsync(string commandId, CancellationToken ct)
{
var stdout = new StringBuilder();
var stderr = new StringBuilder();
var exitCode = 0;
var done = false;
while (!done)
{
var receiveEnvelope = CreateReceiveEnvelope(commandId);
var response = await SendRequestAsync(receiveEnvelope, ct);
var doc = XDocument.Parse(response);
// Extract output streams
foreach (var stream in doc.Descendants(WsShell + "Stream"))
{
var name = stream.Attribute("Name")?.Value;
var content = stream.Value;
if (!string.IsNullOrEmpty(content))
{
var decoded = Encoding.UTF8.GetString(Convert.FromBase64String(content));
if (name == "stdout")
stdout.Append(decoded);
else if (name == "stderr")
stderr.Append(decoded);
}
}
// Check for command state
var commandState = doc.Descendants(WsShell + "CommandState").FirstOrDefault();
if (commandState?.Attribute("State")?.Value?.EndsWith("Done") == true)
{
done = true;
// Extract exit code
var exitCodeElement = commandState.Element(WsShell + "ExitCode");
if (exitCodeElement != null && int.TryParse(exitCodeElement.Value, out var code))
{
exitCode = code;
}
}
}
return (stdout.ToString(), stderr.ToString(), exitCode);
}
/// <inheritdoc />
public void Dispose()
{
if (_disposed)
return;
_disposed = true;
// Note: CloseAsync should be called before disposal for proper cleanup
_shellId = null;
}
}
/// <summary>
/// Result of PowerShell execution.
/// </summary>
public sealed record PowerShellResult
{
/// <summary>
/// Standard output.
/// </summary>
public required string Stdout { get; init; }
/// <summary>
/// Standard error.
/// </summary>
public required string Stderr { get; init; }
/// <summary>
/// Exit code.
/// </summary>
public required int ExitCode { get; init; }
/// <summary>
/// Whether the execution had errors.
/// </summary>
public required bool HadErrors { get; init; }
}

View File

@@ -0,0 +1,29 @@
using StellaOps.ReleaseOrchestrator.Agent.Models;
namespace StellaOps.ReleaseOrchestrator.Agent.Certificate;
/// <summary>
/// Service for issuing and managing agent certificates.
/// </summary>
public interface IAgentCertificateService
{
/// <summary>
/// Issues a new certificate for an agent.
/// </summary>
Task<AgentCertificate> IssueAsync(Models.Agent agent, CancellationToken ct = default);
/// <summary>
/// Renews an agent's certificate.
/// </summary>
Task<AgentCertificate> RenewAsync(Models.Agent agent, CancellationToken ct = default);
/// <summary>
/// Revokes an agent's certificate.
/// </summary>
Task RevokeAsync(Models.Agent agent, CancellationToken ct = default);
/// <summary>
/// Validates a certificate thumbprint.
/// </summary>
Task<bool> ValidateAsync(string thumbprint, CancellationToken ct = default);
}

View File

@@ -0,0 +1,88 @@
using System.Security.Cryptography;
using StellaOps.ReleaseOrchestrator.Agent.Models;
using StellaOps.ReleaseOrchestrator.Agent.Store;
namespace StellaOps.ReleaseOrchestrator.Agent.Certificate;
/// <summary>
/// Stub implementation of <see cref="IAgentCertificateService"/> for testing.
/// Generates self-signed certificates without a real CA.
/// </summary>
public sealed class StubAgentCertificateService : IAgentCertificateService
{
private readonly IAgentStore _store;
private readonly TimeProvider _timeProvider;
private static readonly TimeSpan CertificateValidity = TimeSpan.FromHours(24);
public StubAgentCertificateService(IAgentStore store, TimeProvider timeProvider)
{
_store = store;
_timeProvider = timeProvider;
}
/// <inheritdoc />
public async Task<AgentCertificate> IssueAsync(Models.Agent agent, CancellationToken ct = default)
{
var now = _timeProvider.GetUtcNow();
var notAfter = now.Add(CertificateValidity);
// Generate a stub thumbprint
var thumbprintBytes = RandomNumberGenerator.GetBytes(32);
var thumbprint = Convert.ToHexString(thumbprintBytes);
var certificate = new AgentCertificate
{
Thumbprint = thumbprint,
SubjectName = $"CN={agent.Name}, O=StellaOps Agent, OU={agent.TenantId}",
NotBefore = now,
NotAfter = notAfter,
CertificatePem = GenerateStubPem("CERTIFICATE"),
PrivateKeyPem = GenerateStubPem("RSA PRIVATE KEY")
};
// Update agent with new certificate
await _store.UpdateCertificateAsync(
agent.Id,
certificate.Thumbprint,
notAfter,
ct);
return certificate;
}
/// <inheritdoc />
public async Task<AgentCertificate> RenewAsync(Models.Agent agent, CancellationToken ct = default)
{
// Clear old certificate
if (!string.IsNullOrEmpty(agent.CertificateThumbprint))
{
await _store.ClearCertificateAsync(agent.Id, ct);
}
// Issue new certificate
return await IssueAsync(agent, ct);
}
/// <inheritdoc />
public async Task RevokeAsync(Models.Agent agent, CancellationToken ct = default)
{
if (!string.IsNullOrEmpty(agent.CertificateThumbprint))
{
await _store.ClearCertificateAsync(agent.Id, ct);
}
}
/// <inheritdoc />
public Task<bool> ValidateAsync(string thumbprint, CancellationToken ct = default)
{
// Stub implementation always returns true for non-empty thumbprints
return Task.FromResult(!string.IsNullOrEmpty(thumbprint));
}
private static string GenerateStubPem(string label)
{
var data = RandomNumberGenerator.GetBytes(64);
var base64 = Convert.ToBase64String(data);
return $"-----BEGIN {label}-----\n{base64}\n-----END {label}-----";
}
}

View File

@@ -0,0 +1,98 @@
namespace StellaOps.ReleaseOrchestrator.Agent.Exceptions;
/// <summary>
/// Base exception for agent-related errors.
/// </summary>
public abstract class AgentException : Exception
{
protected AgentException(string message) : base(message) { }
protected AgentException(string message, Exception innerException) : base(message, innerException) { }
}
/// <summary>
/// Thrown when an agent with the specified name already exists.
/// </summary>
public sealed class AgentAlreadyExistsException : AgentException
{
public string AgentName { get; }
public AgentAlreadyExistsException(string agentName)
: base($"Agent with name '{agentName}' already exists.")
{
AgentName = agentName;
}
}
/// <summary>
/// Thrown when an agent is not found.
/// </summary>
public sealed class AgentNotFoundException : AgentException
{
public Guid AgentId { get; }
public AgentNotFoundException(Guid agentId)
: base($"Agent with ID '{agentId}' was not found.")
{
AgentId = agentId;
}
}
/// <summary>
/// Thrown when a registration token has already been used.
/// </summary>
public sealed class RegistrationTokenAlreadyUsedException : AgentException
{
public string Token { get; }
public RegistrationTokenAlreadyUsedException(string token)
: base("Registration token has already been used.")
{
Token = token;
}
}
/// <summary>
/// Thrown when a registration token has expired.
/// </summary>
public sealed class RegistrationTokenExpiredException : AgentException
{
public string Token { get; }
public RegistrationTokenExpiredException(string token)
: base("Registration token has expired.")
{
Token = token;
}
}
/// <summary>
/// Thrown when a registration token is not found.
/// </summary>
public sealed class RegistrationTokenNotFoundException : AgentException
{
public string Token { get; }
public RegistrationTokenNotFoundException(string token)
: base("Registration token was not found.")
{
Token = token;
}
}
/// <summary>
/// Thrown when an agent is in an invalid state for the requested operation.
/// </summary>
public sealed class InvalidAgentStateException : AgentException
{
public Guid AgentId { get; }
public string CurrentState { get; }
public string RequestedOperation { get; }
public InvalidAgentStateException(Guid agentId, string currentState, string requestedOperation)
: base($"Agent '{agentId}' is in state '{currentState}' and cannot perform '{requestedOperation}'.")
{
AgentId = agentId;
CurrentState = currentState;
RequestedOperation = requestedOperation;
}
}

View File

@@ -0,0 +1,66 @@
using Microsoft.Extensions.Logging;
using StellaOps.ReleaseOrchestrator.Agent.Models;
using StellaOps.ReleaseOrchestrator.Agent.Store;
namespace StellaOps.ReleaseOrchestrator.Agent.Heartbeat;
/// <summary>
/// Processes heartbeats from agents.
/// </summary>
public sealed class HeartbeatProcessor : IHeartbeatProcessor
{
private readonly IAgentStore _store;
private readonly TimeProvider _timeProvider;
private readonly ILogger<HeartbeatProcessor> _logger;
public HeartbeatProcessor(
IAgentStore store,
TimeProvider timeProvider,
ILogger<HeartbeatProcessor> logger)
{
_store = store;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public async Task ProcessAsync(AgentHeartbeat heartbeat, CancellationToken ct = default)
{
var agent = await _store.GetAsync(heartbeat.AgentId, ct);
if (agent is null)
{
_logger.LogWarning(
"Received heartbeat from unknown agent {AgentId}",
heartbeat.AgentId);
return;
}
if (agent.Status == AgentStatus.Revoked)
{
_logger.LogWarning(
"Received heartbeat from revoked agent {AgentName}",
agent.Name);
return;
}
// Update last heartbeat
await _store.UpdateHeartbeatAsync(
heartbeat.AgentId,
_timeProvider.GetUtcNow(),
heartbeat.ResourceStatus,
ct);
// If agent was stale, reactivate it
if (agent.Status == AgentStatus.Stale)
{
await _store.UpdateStatusAsync(
heartbeat.AgentId,
AgentStatus.Active,
ct);
_logger.LogInformation(
"Agent {AgentName} recovered from stale state",
agent.Name);
}
}
}

View File

@@ -0,0 +1,91 @@
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using StellaOps.ReleaseOrchestrator.Agent.Manager;
using StellaOps.ReleaseOrchestrator.Agent.Models;
namespace StellaOps.ReleaseOrchestrator.Agent.Heartbeat;
/// <summary>
/// Background service that monitors agent heartbeats and marks stale agents.
/// </summary>
public sealed class HeartbeatTimeoutMonitor : IHostedService, IDisposable
{
private readonly IAgentManager _agentManager;
private readonly TimeProvider _timeProvider;
private readonly ILogger<HeartbeatTimeoutMonitor> _logger;
private readonly TimeSpan _checkInterval;
private readonly TimeSpan _heartbeatTimeout;
private Timer? _timer;
public HeartbeatTimeoutMonitor(
IAgentManager agentManager,
TimeProvider timeProvider,
ILogger<HeartbeatTimeoutMonitor> logger,
TimeSpan? checkInterval = null,
TimeSpan? heartbeatTimeout = null)
{
_agentManager = agentManager;
_timeProvider = timeProvider;
_logger = logger;
_checkInterval = checkInterval ?? TimeSpan.FromSeconds(30);
_heartbeatTimeout = heartbeatTimeout ?? TimeSpan.FromMinutes(2);
}
public Task StartAsync(CancellationToken ct)
{
_timer = new Timer(
CheckForTimeouts,
null,
TimeSpan.FromMinutes(1),
_checkInterval);
_logger.LogInformation(
"Heartbeat timeout monitor started (check interval: {CheckInterval}, timeout: {Timeout})",
_checkInterval,
_heartbeatTimeout);
return Task.CompletedTask;
}
public Task StopAsync(CancellationToken ct)
{
_timer?.Change(Timeout.Infinite, 0);
_logger.LogInformation("Heartbeat timeout monitor stopped");
return Task.CompletedTask;
}
private async void CheckForTimeouts(object? state)
{
try
{
var agents = await _agentManager.ListActiveAsync();
var now = _timeProvider.GetUtcNow();
foreach (var agent in agents)
{
if (agent.LastHeartbeatAt is null)
{
continue;
}
var timeSinceHeartbeat = now - agent.LastHeartbeatAt.Value;
if (timeSinceHeartbeat > _heartbeatTimeout)
{
_logger.LogWarning(
"Agent {AgentName} missed heartbeat (last: {LastHeartbeat})",
agent.Name,
agent.LastHeartbeatAt);
await _agentManager.MarkStaleAsync(agent.Id);
}
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Heartbeat timeout check failed");
}
}
public void Dispose() => _timer?.Dispose();
}

View File

@@ -0,0 +1,14 @@
using StellaOps.ReleaseOrchestrator.Agent.Models;
namespace StellaOps.ReleaseOrchestrator.Agent.Heartbeat;
/// <summary>
/// Processor for agent heartbeats.
/// </summary>
public interface IHeartbeatProcessor
{
/// <summary>
/// Processes a heartbeat from an agent.
/// </summary>
Task ProcessAsync(AgentHeartbeat heartbeat, CancellationToken ct = default);
}

View File

@@ -0,0 +1,243 @@
using System.Collections.Immutable;
using Microsoft.Extensions.Logging;
using StellaOps.ReleaseOrchestrator.Agent.Certificate;
using StellaOps.ReleaseOrchestrator.Agent.Exceptions;
using StellaOps.ReleaseOrchestrator.Agent.Heartbeat;
using StellaOps.ReleaseOrchestrator.Agent.Models;
using StellaOps.ReleaseOrchestrator.Agent.Registration;
using StellaOps.ReleaseOrchestrator.Agent.Store;
namespace StellaOps.ReleaseOrchestrator.Agent.Manager;
/// <summary>
/// Manager for agent lifecycle operations.
/// </summary>
public sealed class AgentManager : IAgentManager
{
private readonly IAgentStore _store;
private readonly RegistrationTokenService _tokenService;
private readonly IAgentCertificateService _certificateService;
private readonly IHeartbeatProcessor _heartbeatProcessor;
private readonly TimeProvider _timeProvider;
private readonly Func<Guid> _guidGenerator;
private readonly Func<Guid> _tenantIdProvider;
private readonly ILogger<AgentManager> _logger;
public AgentManager(
IAgentStore store,
RegistrationTokenService tokenService,
IAgentCertificateService certificateService,
IHeartbeatProcessor heartbeatProcessor,
TimeProvider timeProvider,
Func<Guid> guidGenerator,
Func<Guid> tenantIdProvider,
ILogger<AgentManager> logger)
{
_store = store;
_tokenService = tokenService;
_certificateService = certificateService;
_heartbeatProcessor = heartbeatProcessor;
_timeProvider = timeProvider;
_guidGenerator = guidGenerator;
_tenantIdProvider = tenantIdProvider;
_logger = logger;
}
/// <inheritdoc />
public Task<RegistrationToken> CreateRegistrationTokenAsync(
CreateRegistrationTokenRequest request,
CancellationToken ct = default)
{
return _tokenService.CreateAsync(request, ct);
}
/// <inheritdoc />
public async Task<AgentRegistrationResult> RegisterAsync(
AgentRegistrationRequest request,
CancellationToken ct = default)
{
// Validate and consume token
var token = await _tokenService.ValidateAndConsumeAsync(request.Token, ct);
var now = _timeProvider.GetUtcNow();
// Create the agent
var agent = new Models.Agent
{
Id = _guidGenerator(),
TenantId = token.TenantId,
Name = token.AgentName,
DisplayName = token.DisplayName,
Version = request.AgentVersion,
Hostname = request.Hostname,
Status = AgentStatus.Active,
Capabilities = token.Capabilities,
Labels = request.Labels.ToImmutableDictionary(),
CertificateThumbprint = null,
CertificateExpiresAt = null,
LastHeartbeatAt = now,
LastResourceStatus = null,
RegisteredAt = now,
CreatedAt = now,
UpdatedAt = now
};
await _store.SaveAsync(agent, ct);
// Issue certificate
var certificate = await _certificateService.IssueAsync(agent, ct);
// Reload agent with certificate info
agent = await _store.GetAsync(agent.Id, ct);
_logger.LogInformation(
"Agent {AgentName} registered successfully",
agent!.Name);
return new AgentRegistrationResult(agent, certificate);
}
/// <inheritdoc />
public Task<Models.Agent?> GetAsync(Guid id, CancellationToken ct = default)
{
return _store.GetAsync(id, ct);
}
/// <inheritdoc />
public Task<Models.Agent?> GetByNameAsync(string name, CancellationToken ct = default)
{
var tenantId = _tenantIdProvider();
return _store.GetByNameAsync(tenantId, name, ct);
}
/// <inheritdoc />
public Task<IReadOnlyList<Models.Agent>> ListAsync(AgentFilter? filter = null, CancellationToken ct = default)
{
var tenantId = _tenantIdProvider();
return _store.ListAsync(tenantId, filter, ct);
}
/// <inheritdoc />
public Task<IReadOnlyList<Models.Agent>> ListActiveAsync(CancellationToken ct = default)
{
var tenantId = _tenantIdProvider();
return _store.ListAsync(tenantId, new AgentFilter(Status: AgentStatus.Active), ct);
}
/// <inheritdoc />
public async Task ActivateAsync(Guid id, CancellationToken ct = default)
{
var agent = await GetAgentOrThrowAsync(id, ct);
if (agent.Status == AgentStatus.Revoked)
{
throw new InvalidAgentStateException(id, agent.Status.ToString(), "activate");
}
await _store.UpdateStatusAsync(id, AgentStatus.Active, ct);
_logger.LogInformation(
"Agent {AgentName} activated",
agent.Name);
}
/// <inheritdoc />
public async Task DeactivateAsync(Guid id, CancellationToken ct = default)
{
var agent = await GetAgentOrThrowAsync(id, ct);
if (agent.Status == AgentStatus.Revoked)
{
throw new InvalidAgentStateException(id, agent.Status.ToString(), "deactivate");
}
await _store.UpdateStatusAsync(id, AgentStatus.Inactive, ct);
_logger.LogInformation(
"Agent {AgentName} deactivated",
agent.Name);
}
/// <inheritdoc />
public async Task RevokeAsync(Guid id, string reason, CancellationToken ct = default)
{
var agent = await GetAgentOrThrowAsync(id, ct);
// Revoke certificate
await _certificateService.RevokeAsync(agent, ct);
// Update status
await _store.UpdateStatusAsync(id, AgentStatus.Revoked, ct);
_logger.LogWarning(
"Agent {AgentName} revoked: {Reason}",
agent.Name,
reason);
}
/// <inheritdoc />
public async Task MarkStaleAsync(Guid id, CancellationToken ct = default)
{
var agent = await _store.GetAsync(id, ct);
if (agent is null)
{
return;
}
if (agent.Status != AgentStatus.Active)
{
return;
}
await _store.UpdateStatusAsync(id, AgentStatus.Stale, ct);
_logger.LogWarning(
"Agent {AgentName} marked as stale",
agent.Name);
}
/// <inheritdoc />
public Task ProcessHeartbeatAsync(AgentHeartbeat heartbeat, CancellationToken ct = default)
{
return _heartbeatProcessor.ProcessAsync(heartbeat, ct);
}
/// <inheritdoc />
public async Task<AgentCertificate> RenewCertificateAsync(Guid id, CancellationToken ct = default)
{
var agent = await GetAgentOrThrowAsync(id, ct);
if (agent.Status == AgentStatus.Revoked)
{
throw new InvalidAgentStateException(id, agent.Status.ToString(), "renew certificate");
}
var certificate = await _certificateService.RenewAsync(agent, ct);
_logger.LogInformation(
"Certificate renewed for agent {AgentName}",
agent.Name);
return certificate;
}
/// <inheritdoc />
public Task<TaskResult> ExecuteTaskAsync(
Guid agentId,
AgentTask task,
CancellationToken ct = default)
{
// Stub implementation - task execution will be implemented in a later sprint
throw new NotImplementedException("Task execution not yet implemented. See sprint 103_005.");
}
private async Task<Models.Agent> GetAgentOrThrowAsync(Guid id, CancellationToken ct)
{
var agent = await _store.GetAsync(id, ct);
if (agent is null)
{
throw new AgentNotFoundException(id);
}
return agent;
}
}

View File

@@ -0,0 +1,57 @@
using StellaOps.ReleaseOrchestrator.Agent.Models;
using StellaOps.ReleaseOrchestrator.Agent.Registration;
namespace StellaOps.ReleaseOrchestrator.Agent.Manager;
/// <summary>
/// Manager for agent lifecycle operations.
/// </summary>
public interface IAgentManager
{
// Registration
Task<RegistrationToken> CreateRegistrationTokenAsync(
CreateRegistrationTokenRequest request,
CancellationToken ct = default);
Task<AgentRegistrationResult> RegisterAsync(
AgentRegistrationRequest request,
CancellationToken ct = default);
// Lifecycle
Task<Models.Agent?> GetAsync(Guid id, CancellationToken ct = default);
Task<Models.Agent?> GetByNameAsync(string name, CancellationToken ct = default);
Task<IReadOnlyList<Models.Agent>> ListAsync(AgentFilter? filter = null, CancellationToken ct = default);
Task<IReadOnlyList<Models.Agent>> ListActiveAsync(CancellationToken ct = default);
Task ActivateAsync(Guid id, CancellationToken ct = default);
Task DeactivateAsync(Guid id, CancellationToken ct = default);
Task RevokeAsync(Guid id, string reason, CancellationToken ct = default);
Task MarkStaleAsync(Guid id, CancellationToken ct = default);
// Heartbeat
Task ProcessHeartbeatAsync(AgentHeartbeat heartbeat, CancellationToken ct = default);
// Certificate
Task<AgentCertificate> RenewCertificateAsync(Guid id, CancellationToken ct = default);
// Task execution
Task<TaskResult> ExecuteTaskAsync(
Guid agentId,
AgentTask task,
CancellationToken ct = default);
}
/// <summary>
/// Request to register an agent.
/// </summary>
public sealed record AgentRegistrationRequest(
string Token,
string AgentVersion,
string Hostname,
IReadOnlyDictionary<string, string> Labels);
/// <summary>
/// Result of agent registration.
/// </summary>
public sealed record AgentRegistrationResult(
Models.Agent Agent,
AgentCertificate Certificate);

View File

@@ -0,0 +1,164 @@
using System.Collections.Immutable;
namespace StellaOps.ReleaseOrchestrator.Agent.Models;
/// <summary>
/// Represents a deployment agent.
/// </summary>
public sealed record Agent
{
/// <summary>
/// Unique identifier for the agent.
/// </summary>
public required Guid Id { get; init; }
/// <summary>
/// Tenant this agent belongs to.
/// </summary>
public required Guid TenantId { get; init; }
/// <summary>
/// Unique name for the agent.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Display name for UI.
/// </summary>
public required string DisplayName { get; init; }
/// <summary>
/// Agent version.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Hostname where the agent runs.
/// </summary>
public string? Hostname { get; init; }
/// <summary>
/// Current status of the agent.
/// </summary>
public required AgentStatus Status { get; init; }
/// <summary>
/// Capabilities this agent supports.
/// </summary>
public required ImmutableArray<AgentCapability> Capabilities { get; init; }
/// <summary>
/// Labels for agent selection.
/// </summary>
public ImmutableDictionary<string, string> Labels { get; init; } = ImmutableDictionary<string, string>.Empty;
/// <summary>
/// Certificate thumbprint for mTLS.
/// </summary>
public string? CertificateThumbprint { get; init; }
/// <summary>
/// When the certificate expires.
/// </summary>
public DateTimeOffset? CertificateExpiresAt { get; init; }
/// <summary>
/// When the last heartbeat was received.
/// </summary>
public DateTimeOffset? LastHeartbeatAt { get; init; }
/// <summary>
/// Last reported resource status.
/// </summary>
public AgentResourceStatus? LastResourceStatus { get; init; }
/// <summary>
/// When the agent was registered.
/// </summary>
public DateTimeOffset? RegisteredAt { get; init; }
/// <summary>
/// When the agent record was created.
/// </summary>
public DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// When the agent record was last updated.
/// </summary>
public DateTimeOffset UpdatedAt { get; init; }
}
/// <summary>
/// Status of an agent.
/// </summary>
public enum AgentStatus
{
/// <summary>
/// Token created, not yet registered.
/// </summary>
Pending = 0,
/// <summary>
/// Registered and healthy.
/// </summary>
Active = 1,
/// <summary>
/// Manually deactivated.
/// </summary>
Inactive = 2,
/// <summary>
/// Missed heartbeats.
/// </summary>
Stale = 3,
/// <summary>
/// Permanently disabled.
/// </summary>
Revoked = 4
}
/// <summary>
/// Capabilities an agent can have.
/// </summary>
public enum AgentCapability
{
/// <summary>
/// Docker Engine support.
/// </summary>
Docker = 0,
/// <summary>
/// Docker Compose support.
/// </summary>
Compose = 1,
/// <summary>
/// SSH support.
/// </summary>
Ssh = 2,
/// <summary>
/// WinRM support.
/// </summary>
WinRm = 3
}
/// <summary>
/// Resource status reported by an agent.
/// </summary>
public sealed record AgentResourceStatus(
double CpuPercent,
long MemoryUsedBytes,
long MemoryTotalBytes,
long DiskUsedBytes,
long DiskTotalBytes);
/// <summary>
/// Filter for listing agents.
/// </summary>
public sealed record AgentFilter(
AgentStatus? Status = null,
AgentCapability? Capability = null,
IReadOnlyDictionary<string, string>? Labels = null);

View File

@@ -0,0 +1,37 @@
namespace StellaOps.ReleaseOrchestrator.Agent.Models;
/// <summary>
/// Certificate issued to an agent for mTLS authentication.
/// </summary>
public sealed record AgentCertificate
{
/// <summary>
/// Certificate thumbprint (SHA-256).
/// </summary>
public required string Thumbprint { get; init; }
/// <summary>
/// Subject name from the certificate.
/// </summary>
public required string SubjectName { get; init; }
/// <summary>
/// Certificate validity start.
/// </summary>
public required DateTimeOffset NotBefore { get; init; }
/// <summary>
/// Certificate validity end.
/// </summary>
public required DateTimeOffset NotAfter { get; init; }
/// <summary>
/// PEM-encoded certificate.
/// </summary>
public required string CertificatePem { get; init; }
/// <summary>
/// PEM-encoded private key (only returned during issuance).
/// </summary>
public required string PrivateKeyPem { get; init; }
}

View File

@@ -0,0 +1,11 @@
namespace StellaOps.ReleaseOrchestrator.Agent.Models;
/// <summary>
/// Heartbeat message sent by an agent.
/// </summary>
public sealed record AgentHeartbeat(
Guid AgentId,
string Version,
AgentResourceStatus ResourceStatus,
IReadOnlyList<string> RunningTasks,
DateTimeOffset Timestamp);

View File

@@ -0,0 +1,67 @@
namespace StellaOps.ReleaseOrchestrator.Agent.Models;
/// <summary>
/// Base class for tasks that can be executed by agents.
/// </summary>
public abstract record AgentTask
{
/// <summary>
/// Task identifier.
/// </summary>
public required Guid Id { get; init; }
/// <summary>
/// Task type discriminator.
/// </summary>
public abstract string TaskType { get; }
}
/// <summary>
/// Health check task for a target.
/// </summary>
public sealed record HealthCheckTask : AgentTask
{
/// <inheritdoc />
public override string TaskType => "health_check";
/// <summary>
/// Target to check.
/// </summary>
public required Guid TargetId { get; init; }
}
/// <summary>
/// Result of task execution.
/// </summary>
public sealed record TaskResult
{
/// <summary>
/// Task that was executed.
/// </summary>
public required Guid TaskId { get; init; }
/// <summary>
/// Whether the task succeeded.
/// </summary>
public required bool Success { get; init; }
/// <summary>
/// Result message.
/// </summary>
public string? Message { get; init; }
/// <summary>
/// Result data (JSON).
/// </summary>
public string? ResultData { get; init; }
/// <summary>
/// Task execution duration.
/// </summary>
public TimeSpan Duration { get; init; }
/// <summary>
/// When the task completed.
/// </summary>
public DateTimeOffset CompletedAt { get; init; }
}

View File

@@ -0,0 +1,111 @@
using System.Collections.Immutable;
namespace StellaOps.ReleaseOrchestrator.Agent.Models;
/// <summary>
/// Agent task for deploying containers to a target.
/// </summary>
public sealed record DeploymentAgentTask : AgentTask
{
/// <inheritdoc />
public override string TaskType => DeployType switch
{
DeploymentTaskType.DockerDeploy => "docker_deploy",
DeploymentTaskType.ComposeDeploy => "compose_deploy",
DeploymentTaskType.DockerRollback => "docker_rollback",
DeploymentTaskType.ComposeRollback => "compose_rollback",
_ => "unknown"
};
/// <summary>
/// Type of deployment operation.
/// </summary>
public required DeploymentTaskType DeployType { get; init; }
/// <summary>
/// Release ID being deployed.
/// </summary>
public required Guid ReleaseId { get; init; }
/// <summary>
/// Release name for logging.
/// </summary>
public required string ReleaseName { get; init; }
/// <summary>
/// Components to deploy.
/// </summary>
public required ImmutableArray<AgentDeploymentComponent> Components { get; init; }
/// <summary>
/// Compose lock content (for compose deployments).
/// </summary>
public string? ComposeLock { get; init; }
/// <summary>
/// Version sticker content.
/// </summary>
public string? VersionSticker { get; init; }
/// <summary>
/// Deployment variables.
/// </summary>
public ImmutableDictionary<string, string> Variables { get; init; } = ImmutableDictionary<string, string>.Empty;
/// <summary>
/// Timeout for the deployment operation.
/// </summary>
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(30);
}
/// <summary>
/// Types of deployment operations.
/// </summary>
public enum DeploymentTaskType
{
/// <summary>
/// Deploy using Docker Engine.
/// </summary>
DockerDeploy = 0,
/// <summary>
/// Deploy using Docker Compose.
/// </summary>
ComposeDeploy = 1,
/// <summary>
/// Rollback using Docker Engine.
/// </summary>
DockerRollback = 2,
/// <summary>
/// Rollback using Docker Compose.
/// </summary>
ComposeRollback = 3
}
/// <summary>
/// Component to deploy via agent.
/// </summary>
public sealed record AgentDeploymentComponent
{
/// <summary>
/// Component name.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Full image reference with digest.
/// </summary>
public required string Image { get; init; }
/// <summary>
/// Image digest.
/// </summary>
public required string Digest { get; init; }
/// <summary>
/// Component-specific configuration.
/// </summary>
public ImmutableDictionary<string, string> Config { get; init; } = ImmutableDictionary<string, string>.Empty;
}

View File

@@ -0,0 +1,54 @@
using System.Collections.Immutable;
namespace StellaOps.ReleaseOrchestrator.Agent.Models;
/// <summary>
/// One-time registration token for agent registration.
/// </summary>
public sealed record RegistrationToken
{
/// <summary>
/// Unique identifier for the token.
/// </summary>
public required Guid Id { get; init; }
/// <summary>
/// Tenant this token belongs to.
/// </summary>
public required Guid TenantId { get; init; }
/// <summary>
/// The secure token value.
/// </summary>
public required string Token { get; init; }
/// <summary>
/// Intended agent name.
/// </summary>
public required string AgentName { get; init; }
/// <summary>
/// Intended display name.
/// </summary>
public required string DisplayName { get; init; }
/// <summary>
/// Capabilities the agent will have.
/// </summary>
public required ImmutableArray<AgentCapability> Capabilities { get; init; }
/// <summary>
/// When the token expires.
/// </summary>
public required DateTimeOffset ExpiresAt { get; init; }
/// <summary>
/// When the token was created.
/// </summary>
public required DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// Whether the token has been used.
/// </summary>
public required bool IsUsed { get; init; }
}

Some files were not shown because too many files have changed in this diff Show More