release orchestrator v1 draft and build fixes
This commit is contained in:
@@ -0,0 +1,104 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Compose.Tasks;
|
||||
using StellaOps.Agent.Core.Capability;
|
||||
using StellaOps.Agent.Core.Exceptions;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.Compose;
|
||||
|
||||
/// <summary>
|
||||
/// Compose capability for managing docker-compose stacks.
|
||||
/// </summary>
|
||||
public sealed class ComposeCapability : IAgentCapability
|
||||
{
|
||||
private readonly ComposeExecutor _executor;
|
||||
private readonly ComposeFileManager _fileManager;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<ComposeCapability> _logger;
|
||||
private readonly Dictionary<string, IComposeTask> _taskHandlers;
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "compose";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Version => "1.0.0";
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> SupportedTaskTypes => new[]
|
||||
{
|
||||
"compose.pull",
|
||||
"compose.up",
|
||||
"compose.down",
|
||||
"compose.scale",
|
||||
"compose.health-check",
|
||||
"compose.ps"
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public ComposeCapability(
|
||||
ComposeExecutor executor,
|
||||
ComposeFileManager fileManager,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<ComposeCapability> logger)
|
||||
{
|
||||
_executor = executor;
|
||||
_fileManager = fileManager;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
|
||||
_taskHandlers = new Dictionary<string, IComposeTask>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
["compose.pull"] = new ComposePullTask(_executor, _fileManager, logger),
|
||||
["compose.up"] = new ComposeUpTask(_executor, _fileManager, logger),
|
||||
["compose.down"] = new ComposeDownTask(_executor, _fileManager, logger),
|
||||
["compose.scale"] = new ComposeScaleTask(_executor, _fileManager, logger),
|
||||
["compose.health-check"] = new ComposeHealthCheckTask(_executor, _fileManager, logger),
|
||||
["compose.ps"] = new ComposePsTask(_executor, _fileManager, logger)
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> InitializeAsync(CancellationToken ct = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
var version = await _executor.GetVersionAsync(ct);
|
||||
_logger.LogInformation("Compose capability initialized: {Version}", version);
|
||||
return true;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to initialize Compose capability");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
|
||||
{
|
||||
if (!_taskHandlers.TryGetValue(task.TaskType, out var handler))
|
||||
{
|
||||
throw new UnsupportedTaskTypeException(task.TaskType);
|
||||
}
|
||||
|
||||
_logger.LogDebug("Executing task {TaskType} with ID {TaskId}", task.TaskType, task.Id);
|
||||
|
||||
return await handler.ExecuteAsync(task, _timeProvider, ct);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<CapabilityHealthStatus> CheckHealthAsync(CancellationToken ct = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
await _executor.GetVersionAsync(ct);
|
||||
return new CapabilityHealthStatus(true, "Docker Compose available");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return new CapabilityHealthStatus(false, $"Docker Compose not available: {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,321 @@
|
||||
using System.Diagnostics;
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace StellaOps.Agent.Compose;
|
||||
|
||||
/// <summary>
|
||||
/// Executes docker compose commands.
|
||||
/// </summary>
|
||||
public sealed class ComposeExecutor
|
||||
{
|
||||
private readonly string _composeCommand;
|
||||
private readonly ILogger<ComposeExecutor> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public ComposeExecutor(ILogger<ComposeExecutor> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
_composeCommand = DetectComposeCommand();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance with a specific compose command.
|
||||
/// </summary>
|
||||
public ComposeExecutor(string composeCommand, ILogger<ComposeExecutor> logger)
|
||||
{
|
||||
_composeCommand = composeCommand;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the docker compose version.
|
||||
/// </summary>
|
||||
public async Task<string> GetVersionAsync(CancellationToken ct = default)
|
||||
{
|
||||
var result = await ExecuteAsync("version --short", null, ct);
|
||||
return result.StandardOutput.Trim();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Pulls images for a compose project.
|
||||
/// </summary>
|
||||
public async Task<ComposeResult> PullAsync(
|
||||
string projectDir,
|
||||
string composeFile,
|
||||
IReadOnlyDictionary<string, string>? credentials = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var args = $"-f \"{composeFile}\" pull";
|
||||
return await ExecuteAsync(args, projectDir, ct, BuildEnvironment(credentials));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starts a compose project.
|
||||
/// </summary>
|
||||
public async Task<ComposeResult> UpAsync(
|
||||
string projectDir,
|
||||
string composeFile,
|
||||
ComposeUpOptions options,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var args = $"-f \"{composeFile}\" up -d";
|
||||
|
||||
if (options.ForceRecreate)
|
||||
args += " --force-recreate";
|
||||
|
||||
if (options.RemoveOrphans)
|
||||
args += " --remove-orphans";
|
||||
|
||||
if (options.NoStart)
|
||||
args += " --no-start";
|
||||
|
||||
if (options.Services?.Count > 0)
|
||||
args += " " + string.Join(" ", options.Services);
|
||||
|
||||
return await ExecuteAsync(args, projectDir, ct, options.Environment);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stops and removes a compose project.
|
||||
/// </summary>
|
||||
public async Task<ComposeResult> DownAsync(
|
||||
string projectDir,
|
||||
string composeFile,
|
||||
ComposeDownOptions options,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var args = $"-f \"{composeFile}\" down";
|
||||
|
||||
if (options.RemoveVolumes)
|
||||
args += " -v";
|
||||
|
||||
if (options.RemoveOrphans)
|
||||
args += " --remove-orphans";
|
||||
|
||||
if (options.Timeout.HasValue)
|
||||
args += $" -t {(int)options.Timeout.Value.TotalSeconds}";
|
||||
|
||||
return await ExecuteAsync(args, projectDir, ct);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Scales services in a compose project.
|
||||
/// </summary>
|
||||
public async Task<ComposeResult> ScaleAsync(
|
||||
string projectDir,
|
||||
string composeFile,
|
||||
IReadOnlyDictionary<string, int> scaling,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var scaleArgs = string.Join(" ", scaling.Select(kv => $"{kv.Key}={kv.Value}"));
|
||||
var args = $"-f \"{composeFile}\" up -d --no-recreate --scale {scaleArgs}";
|
||||
return await ExecuteAsync(args, projectDir, ct);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Lists running services in a compose project.
|
||||
/// </summary>
|
||||
public async Task<ComposeResult> PsAsync(
|
||||
string projectDir,
|
||||
string composeFile,
|
||||
bool all = false,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var args = $"-f \"{composeFile}\" ps --format json";
|
||||
if (all)
|
||||
args += " -a";
|
||||
|
||||
return await ExecuteAsync(args, projectDir, ct);
|
||||
}
|
||||
|
||||
private async Task<ComposeResult> ExecuteAsync(
|
||||
string arguments,
|
||||
string? workingDirectory,
|
||||
CancellationToken ct,
|
||||
IReadOnlyDictionary<string, string>? environment = null)
|
||||
{
|
||||
var (fileName, commandArgs) = ParseCommand(_composeCommand, arguments);
|
||||
|
||||
var psi = new ProcessStartInfo
|
||||
{
|
||||
FileName = fileName,
|
||||
Arguments = commandArgs,
|
||||
WorkingDirectory = workingDirectory ?? Environment.CurrentDirectory,
|
||||
RedirectStandardOutput = true,
|
||||
RedirectStandardError = true,
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true
|
||||
};
|
||||
|
||||
if (environment is not null)
|
||||
{
|
||||
foreach (var (key, value) in environment)
|
||||
{
|
||||
psi.Environment[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
_logger.LogDebug("Executing: {Command} {Args}", psi.FileName, psi.Arguments);
|
||||
|
||||
using var process = new Process { StartInfo = psi };
|
||||
var stdout = new StringBuilder();
|
||||
var stderr = new StringBuilder();
|
||||
|
||||
process.OutputDataReceived += (_, e) =>
|
||||
{
|
||||
if (e.Data is not null)
|
||||
stdout.AppendLine(e.Data);
|
||||
};
|
||||
|
||||
process.ErrorDataReceived += (_, e) =>
|
||||
{
|
||||
if (e.Data is not null)
|
||||
stderr.AppendLine(e.Data);
|
||||
};
|
||||
|
||||
process.Start();
|
||||
process.BeginOutputReadLine();
|
||||
process.BeginErrorReadLine();
|
||||
|
||||
await process.WaitForExitAsync(ct);
|
||||
|
||||
var result = new ComposeResult(
|
||||
process.ExitCode == 0,
|
||||
process.ExitCode,
|
||||
stdout.ToString(),
|
||||
stderr.ToString());
|
||||
|
||||
if (!result.Success)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Compose command failed with exit code {ExitCode}: {Stderr}",
|
||||
result.ExitCode,
|
||||
result.StandardError);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static (string FileName, string Arguments) ParseCommand(string composeCommand, string additionalArgs)
|
||||
{
|
||||
// Handle "docker compose" vs "docker-compose"
|
||||
if (composeCommand.StartsWith("docker compose", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return ("docker", $"compose {additionalArgs}");
|
||||
}
|
||||
|
||||
return (composeCommand, additionalArgs);
|
||||
}
|
||||
|
||||
private static string DetectComposeCommand()
|
||||
{
|
||||
// Try docker compose (v2) first
|
||||
try
|
||||
{
|
||||
var psi = new ProcessStartInfo
|
||||
{
|
||||
FileName = "docker",
|
||||
Arguments = "compose version",
|
||||
RedirectStandardOutput = true,
|
||||
RedirectStandardError = true,
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true
|
||||
};
|
||||
|
||||
using var process = Process.Start(psi);
|
||||
process?.WaitForExit(5000);
|
||||
if (process?.ExitCode == 0)
|
||||
{
|
||||
return "docker compose";
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Ignore
|
||||
}
|
||||
|
||||
// Fall back to docker-compose (v1)
|
||||
return "docker-compose";
|
||||
}
|
||||
|
||||
private static IReadOnlyDictionary<string, string>? BuildEnvironment(
|
||||
IReadOnlyDictionary<string, string>? credentials)
|
||||
{
|
||||
if (credentials is null)
|
||||
return null;
|
||||
|
||||
var env = new Dictionary<string, string>();
|
||||
|
||||
if (credentials.TryGetValue("registry.username", out var user))
|
||||
env["DOCKER_REGISTRY_USER"] = user;
|
||||
|
||||
if (credentials.TryGetValue("registry.password", out var pass))
|
||||
env["DOCKER_REGISTRY_PASSWORD"] = pass;
|
||||
|
||||
return env;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of a compose command.
|
||||
/// </summary>
|
||||
public sealed record ComposeResult(
|
||||
bool Success,
|
||||
int ExitCode,
|
||||
string StandardOutput,
|
||||
string StandardError);
|
||||
|
||||
/// <summary>
|
||||
/// Options for compose up.
|
||||
/// </summary>
|
||||
public sealed record ComposeUpOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Force recreate containers.
|
||||
/// </summary>
|
||||
public bool ForceRecreate { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Remove orphan containers.
|
||||
/// </summary>
|
||||
public bool RemoveOrphans { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Don't start services after creating.
|
||||
/// </summary>
|
||||
public bool NoStart { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Specific services to start.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? Services { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Environment variables for the compose process.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, string>? Environment { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Options for compose down.
|
||||
/// </summary>
|
||||
public sealed record ComposeDownOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Remove volumes.
|
||||
/// </summary>
|
||||
public bool RemoveVolumes { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Remove orphan containers.
|
||||
/// </summary>
|
||||
public bool RemoveOrphans { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Timeout for stopping containers.
|
||||
/// </summary>
|
||||
public TimeSpan? Timeout { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,148 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core;
|
||||
|
||||
namespace StellaOps.Agent.Compose;
|
||||
|
||||
/// <summary>
|
||||
/// Manages compose files and deployment directories.
|
||||
/// </summary>
|
||||
public sealed class ComposeFileManager
|
||||
{
|
||||
private readonly string _deploymentRoot;
|
||||
private readonly ILogger<ComposeFileManager> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance using default deployment root.
|
||||
/// </summary>
|
||||
public ComposeFileManager(ILogger<ComposeFileManager> logger)
|
||||
{
|
||||
_deploymentRoot = GetDefaultDeploymentRoot();
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance with custom deployment root.
|
||||
/// </summary>
|
||||
public ComposeFileManager(string deploymentRoot, ILogger<ComposeFileManager> logger)
|
||||
{
|
||||
_deploymentRoot = deploymentRoot;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Writes compose files for a project.
|
||||
/// </summary>
|
||||
/// <param name="projectName">Project name.</param>
|
||||
/// <param name="composeLockContent">Content of compose.stella.lock.yml.</param>
|
||||
/// <param name="versionStickerContent">Content of stella.version.json.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>Project directory path.</returns>
|
||||
public async Task<string> WriteComposeFileAsync(
|
||||
string projectName,
|
||||
string composeLockContent,
|
||||
string versionStickerContent,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var projectDir = Path.Combine(_deploymentRoot, projectName);
|
||||
Directory.CreateDirectory(projectDir);
|
||||
|
||||
// Write compose.stella.lock.yml
|
||||
var composeFile = Path.Combine(projectDir, "compose.stella.lock.yml");
|
||||
await File.WriteAllTextAsync(composeFile, composeLockContent, ct);
|
||||
_logger.LogDebug("Wrote compose file: {Path}", composeFile);
|
||||
|
||||
// Write stella.version.json
|
||||
var versionFile = Path.Combine(projectDir, "stella.version.json");
|
||||
await File.WriteAllTextAsync(versionFile, versionStickerContent, ct);
|
||||
_logger.LogDebug("Wrote version sticker: {Path}", versionFile);
|
||||
|
||||
return projectDir;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the project directory path.
|
||||
/// </summary>
|
||||
public string GetProjectDirectory(string projectName)
|
||||
{
|
||||
return Path.Combine(_deploymentRoot, projectName);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the compose file path for a project.
|
||||
/// </summary>
|
||||
public string GetComposeFilePath(string projectName)
|
||||
{
|
||||
return Path.Combine(GetProjectDirectory(projectName), "compose.stella.lock.yml");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the version sticker content for a project.
|
||||
/// </summary>
|
||||
public async Task<string?> GetVersionStickerAsync(string projectName, CancellationToken ct = default)
|
||||
{
|
||||
var path = Path.Combine(GetProjectDirectory(projectName), "stella.version.json");
|
||||
if (!File.Exists(path))
|
||||
return null;
|
||||
|
||||
return await File.ReadAllTextAsync(path, ct);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if a project exists.
|
||||
/// </summary>
|
||||
public bool ProjectExists(string projectName)
|
||||
{
|
||||
var composeFile = GetComposeFilePath(projectName);
|
||||
return File.Exists(composeFile);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Backs up existing deployment before update.
|
||||
/// </summary>
|
||||
public Task BackupExistingAsync(string projectName, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var projectDir = GetProjectDirectory(projectName);
|
||||
if (!Directory.Exists(projectDir))
|
||||
return Task.CompletedTask;
|
||||
|
||||
var timestamp = timeProvider.GetUtcNow().ToString("yyyyMMdd-HHmmss");
|
||||
var backupDir = Path.Combine(projectDir, ".backup", timestamp);
|
||||
Directory.CreateDirectory(backupDir);
|
||||
|
||||
foreach (var file in Directory.GetFiles(projectDir, "*.*"))
|
||||
{
|
||||
var fileName = Path.GetFileName(file);
|
||||
if (fileName.StartsWith('.'))
|
||||
continue;
|
||||
|
||||
File.Copy(file, Path.Combine(backupDir, fileName));
|
||||
}
|
||||
|
||||
_logger.LogDebug("Backed up existing deployment to {BackupDir}", backupDir);
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Cleans up a project directory.
|
||||
/// </summary>
|
||||
public Task CleanupAsync(string projectName, CancellationToken ct = default)
|
||||
{
|
||||
var projectDir = GetProjectDirectory(projectName);
|
||||
if (Directory.Exists(projectDir))
|
||||
{
|
||||
Directory.Delete(projectDir, recursive: true);
|
||||
_logger.LogDebug("Cleaned up project directory: {Path}", projectDir);
|
||||
}
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
private static string GetDefaultDeploymentRoot()
|
||||
{
|
||||
// Platform-specific default
|
||||
if (OperatingSystem.IsWindows())
|
||||
{
|
||||
return Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.CommonApplicationData), "stella-agent", "deployments");
|
||||
}
|
||||
return "/var/lib/stella-agent/deployments";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
using StellaOps.Agent.Core.Exceptions;
|
||||
|
||||
namespace StellaOps.Agent.Compose.Exceptions;
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when a compose task payload is invalid.
|
||||
/// </summary>
|
||||
public sealed class InvalidComposePayloadException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// The task type with invalid payload.
|
||||
/// </summary>
|
||||
public string TaskType { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public InvalidComposePayloadException(string taskType)
|
||||
: base($"Invalid payload for task type '{taskType}'")
|
||||
{
|
||||
TaskType = taskType;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when compose command execution fails.
|
||||
/// </summary>
|
||||
public sealed class ComposeCommandException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// The command that failed.
|
||||
/// </summary>
|
||||
public string Command { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The exit code.
|
||||
/// </summary>
|
||||
public int ExitCode { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public ComposeCommandException(string command, int exitCode, string error)
|
||||
: base($"Compose command '{command}' failed with exit code {exitCode}: {error}")
|
||||
{
|
||||
Command = command;
|
||||
ExitCode = exitCode;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when a compose project is not found.
|
||||
/// </summary>
|
||||
public sealed class ComposeProjectNotFoundException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// The project name.
|
||||
/// </summary>
|
||||
public string ProjectName { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public ComposeProjectNotFoundException(string projectName)
|
||||
: base($"Compose project not found: '{projectName}'")
|
||||
{
|
||||
ProjectName = projectName;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<RootNamespace>StellaOps.Agent.Compose</RootNamespace>
|
||||
<Description>Stella Agent Compose Capability - manages docker-compose stacks on target hosts</Description>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
|
||||
<PackageReference Include="Microsoft.Extensions.Options" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\StellaOps.Agent.Core\StellaOps.Agent.Core.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,156 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Compose.Exceptions;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.Compose.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for stopping a compose stack.
|
||||
/// </summary>
|
||||
public sealed class ComposeDownTask : IComposeTask
|
||||
{
|
||||
private readonly ComposeExecutor _executor;
|
||||
private readonly ComposeFileManager _fileManager;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for compose.down task.
|
||||
/// </summary>
|
||||
public sealed record DownPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Project name.
|
||||
/// </summary>
|
||||
public required string ProjectName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Remove volumes.
|
||||
/// </summary>
|
||||
public bool RemoveVolumes { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Remove orphan containers.
|
||||
/// </summary>
|
||||
public bool RemoveOrphans { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Cleanup files after down.
|
||||
/// </summary>
|
||||
public bool CleanupFiles { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Timeout for stopping containers.
|
||||
/// </summary>
|
||||
public TimeSpan? Timeout { get; init; } = TimeSpan.FromSeconds(30);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public ComposeDownTask(ComposeExecutor executor, ComposeFileManager fileManager, ILogger logger)
|
||||
{
|
||||
_executor = executor;
|
||||
_fileManager = fileManager;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
var payload = JsonSerializer.Deserialize<DownPayload>(task.Payload)
|
||||
?? throw new InvalidComposePayloadException("compose.down");
|
||||
|
||||
_logger.LogInformation("Stopping compose stack: {Project}", payload.ProjectName);
|
||||
|
||||
try
|
||||
{
|
||||
var projectDir = _fileManager.GetProjectDirectory(payload.ProjectName);
|
||||
var composeFile = _fileManager.GetComposeFilePath(payload.ProjectName);
|
||||
|
||||
if (!_fileManager.ProjectExists(payload.ProjectName))
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Compose file not found for project {Project}, skipping down",
|
||||
payload.ProjectName);
|
||||
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["projectName"] = payload.ProjectName,
|
||||
["skipped"] = true,
|
||||
["reason"] = "Compose file not found"
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
|
||||
var result = await _executor.DownAsync(
|
||||
projectDir,
|
||||
composeFile,
|
||||
new ComposeDownOptions
|
||||
{
|
||||
RemoveVolumes = payload.RemoveVolumes,
|
||||
RemoveOrphans = payload.RemoveOrphans,
|
||||
Timeout = payload.Timeout
|
||||
},
|
||||
ct);
|
||||
|
||||
if (!result.Success)
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to stop stack: {result.StandardError}",
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
|
||||
// Cleanup files if requested
|
||||
if (payload.CleanupFiles)
|
||||
{
|
||||
await _fileManager.CleanupAsync(payload.ProjectName, ct);
|
||||
}
|
||||
|
||||
_logger.LogInformation("Stopped compose stack: {Project}", payload.ProjectName);
|
||||
|
||||
var finalCompletedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["projectName"] = payload.ProjectName,
|
||||
["removedVolumes"] = payload.RemoveVolumes,
|
||||
["cleanedFiles"] = payload.CleanupFiles
|
||||
},
|
||||
CompletedAt = finalCompletedAt,
|
||||
Duration = finalCompletedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to stop compose stack {Project}", payload.ProjectName);
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = ex.Message,
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,196 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Compose.Exceptions;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.Compose.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for checking compose stack health.
|
||||
/// </summary>
|
||||
public sealed class ComposeHealthCheckTask : IComposeTask
|
||||
{
|
||||
private readonly ComposeExecutor _executor;
|
||||
private readonly ComposeFileManager _fileManager;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for compose.health-check task.
|
||||
/// </summary>
|
||||
public sealed record HealthCheckPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Project name.
|
||||
/// </summary>
|
||||
public required string ProjectName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Specific services to check.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? Services { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Timeout for health check.
|
||||
/// </summary>
|
||||
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(5);
|
||||
|
||||
/// <summary>
|
||||
/// Wait for services to become healthy.
|
||||
/// </summary>
|
||||
public bool WaitForHealthy { get; init; } = true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public ComposeHealthCheckTask(ComposeExecutor executor, ComposeFileManager fileManager, ILogger logger)
|
||||
{
|
||||
_executor = executor;
|
||||
_fileManager = fileManager;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
var payload = JsonSerializer.Deserialize<HealthCheckPayload>(task.Payload)
|
||||
?? throw new InvalidComposePayloadException("compose.health-check");
|
||||
|
||||
_logger.LogInformation("Checking health of compose stack: {Project}", payload.ProjectName);
|
||||
|
||||
try
|
||||
{
|
||||
var projectDir = _fileManager.GetProjectDirectory(payload.ProjectName);
|
||||
var composeFile = _fileManager.GetComposeFilePath(payload.ProjectName);
|
||||
|
||||
if (!_fileManager.ProjectExists(payload.ProjectName))
|
||||
{
|
||||
throw new ComposeProjectNotFoundException(payload.ProjectName);
|
||||
}
|
||||
|
||||
using var timeoutCts = new CancellationTokenSource(payload.Timeout);
|
||||
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
|
||||
|
||||
while (!linkedCts.IsCancellationRequested)
|
||||
{
|
||||
var psResult = await _executor.PsAsync(projectDir, composeFile, ct: linkedCts.Token);
|
||||
var services = ParseServices(psResult.StandardOutput);
|
||||
|
||||
// Filter to requested services if specified
|
||||
if (payload.Services?.Count > 0)
|
||||
{
|
||||
var requestedServices = payload.Services.ToHashSet(StringComparer.OrdinalIgnoreCase);
|
||||
services = services.Where(s => requestedServices.Contains(s.Service)).ToList();
|
||||
}
|
||||
|
||||
var allRunning = services.All(s => s.State == "running");
|
||||
var allHealthy = services.All(s =>
|
||||
s.Health is null || s.Health == "healthy" || s.Health == "");
|
||||
|
||||
if (allRunning && allHealthy)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Compose stack {Project} is healthy ({Count} services)",
|
||||
payload.ProjectName,
|
||||
services.Count);
|
||||
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["projectName"] = payload.ProjectName,
|
||||
["serviceCount"] = services.Count,
|
||||
["allHealthy"] = true
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
|
||||
var unhealthyServices = services
|
||||
.Where(s => s.State != "running" || (s.Health is not null && s.Health != "healthy" && s.Health != ""))
|
||||
.ToList();
|
||||
|
||||
if (!payload.WaitForHealthy)
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = "Some services are unhealthy",
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["projectName"] = payload.ProjectName,
|
||||
["serviceCount"] = services.Count,
|
||||
["unhealthyCount"] = unhealthyServices.Count,
|
||||
["unhealthyServices"] = unhealthyServices.Select(s => s.Service).ToList()
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
|
||||
await Task.Delay(TimeSpan.FromSeconds(5), linkedCts.Token);
|
||||
}
|
||||
|
||||
throw new OperationCanceledException();
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Health check timed out after {payload.Timeout}",
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Health check failed for stack {Project}", payload.ProjectName);
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = ex.Message,
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private static List<ServiceStatus> ParseServices(string output)
|
||||
{
|
||||
var services = new List<ServiceStatus>();
|
||||
if (string.IsNullOrWhiteSpace(output))
|
||||
return services;
|
||||
|
||||
foreach (var line in output.Split('\n', StringSplitOptions.RemoveEmptyEntries))
|
||||
{
|
||||
try
|
||||
{
|
||||
var service = JsonSerializer.Deserialize<JsonElement>(line);
|
||||
services.Add(new ServiceStatus(
|
||||
service.TryGetProperty("Name", out var name) ? name.GetString() ?? "" : "",
|
||||
service.TryGetProperty("Service", out var svc) ? svc.GetString() ?? "" : "",
|
||||
service.TryGetProperty("State", out var state) ? state.GetString() ?? "" : "",
|
||||
service.TryGetProperty("Health", out var health) ? health.GetString() : null
|
||||
));
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Skip malformed lines
|
||||
}
|
||||
}
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,141 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Compose.Exceptions;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.Compose.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for listing compose services.
|
||||
/// </summary>
|
||||
public sealed class ComposePsTask : IComposeTask
|
||||
{
|
||||
private readonly ComposeExecutor _executor;
|
||||
private readonly ComposeFileManager _fileManager;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for compose.ps task.
|
||||
/// </summary>
|
||||
public sealed record PsPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Project name.
|
||||
/// </summary>
|
||||
public required string ProjectName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Include stopped containers.
|
||||
/// </summary>
|
||||
public bool All { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public ComposePsTask(ComposeExecutor executor, ComposeFileManager fileManager, ILogger logger)
|
||||
{
|
||||
_executor = executor;
|
||||
_fileManager = fileManager;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
var payload = JsonSerializer.Deserialize<PsPayload>(task.Payload)
|
||||
?? throw new InvalidComposePayloadException("compose.ps");
|
||||
|
||||
_logger.LogInformation("Listing services for compose stack: {Project}", payload.ProjectName);
|
||||
|
||||
try
|
||||
{
|
||||
var projectDir = _fileManager.GetProjectDirectory(payload.ProjectName);
|
||||
var composeFile = _fileManager.GetComposeFilePath(payload.ProjectName);
|
||||
|
||||
if (!_fileManager.ProjectExists(payload.ProjectName))
|
||||
{
|
||||
throw new ComposeProjectNotFoundException(payload.ProjectName);
|
||||
}
|
||||
|
||||
var result = await _executor.PsAsync(projectDir, composeFile, payload.All, ct);
|
||||
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
if (!result.Success)
|
||||
{
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to list services: {result.StandardError}",
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
|
||||
var services = ParseServices(result.StandardOutput);
|
||||
_logger.LogInformation("Found {Count} services in {Project}", services.Count, payload.ProjectName);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["projectName"] = payload.ProjectName,
|
||||
["serviceCount"] = services.Count,
|
||||
["services"] = services.Select(s => new Dictionary<string, object?>
|
||||
{
|
||||
["name"] = s.Name,
|
||||
["service"] = s.Service,
|
||||
["state"] = s.State,
|
||||
["health"] = s.Health
|
||||
}).ToList()
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to list services for {Project}", payload.ProjectName);
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = ex.Message,
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private static List<ServiceStatus> ParseServices(string output)
|
||||
{
|
||||
var services = new List<ServiceStatus>();
|
||||
if (string.IsNullOrWhiteSpace(output))
|
||||
return services;
|
||||
|
||||
foreach (var line in output.Split('\n', StringSplitOptions.RemoveEmptyEntries))
|
||||
{
|
||||
try
|
||||
{
|
||||
var service = JsonSerializer.Deserialize<JsonElement>(line);
|
||||
services.Add(new ServiceStatus(
|
||||
service.TryGetProperty("Name", out var name) ? name.GetString() ?? "" : "",
|
||||
service.TryGetProperty("Service", out var svc) ? svc.GetString() ?? "" : "",
|
||||
service.TryGetProperty("State", out var state) ? state.GetString() ?? "" : "",
|
||||
service.TryGetProperty("Health", out var health) ? health.GetString() : null
|
||||
));
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Skip malformed lines
|
||||
}
|
||||
}
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Compose.Exceptions;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.Compose.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for pulling images for a compose stack.
|
||||
/// </summary>
|
||||
public sealed class ComposePullTask : IComposeTask
|
||||
{
|
||||
private readonly ComposeExecutor _executor;
|
||||
private readonly ComposeFileManager _fileManager;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for compose.pull task.
|
||||
/// </summary>
|
||||
public sealed record PullPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Project name.
|
||||
/// </summary>
|
||||
public required string ProjectName { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public ComposePullTask(ComposeExecutor executor, ComposeFileManager fileManager, ILogger logger)
|
||||
{
|
||||
_executor = executor;
|
||||
_fileManager = fileManager;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
var payload = JsonSerializer.Deserialize<PullPayload>(task.Payload)
|
||||
?? throw new InvalidComposePayloadException("compose.pull");
|
||||
|
||||
_logger.LogInformation("Pulling images for compose stack: {Project}", payload.ProjectName);
|
||||
|
||||
try
|
||||
{
|
||||
var projectDir = _fileManager.GetProjectDirectory(payload.ProjectName);
|
||||
var composeFile = _fileManager.GetComposeFilePath(payload.ProjectName);
|
||||
|
||||
if (!_fileManager.ProjectExists(payload.ProjectName))
|
||||
{
|
||||
throw new ComposeProjectNotFoundException(payload.ProjectName);
|
||||
}
|
||||
|
||||
var result = await _executor.PullAsync(projectDir, composeFile, task.Credentials, ct);
|
||||
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
if (!result.Success)
|
||||
{
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to pull images: {result.StandardError}",
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
|
||||
_logger.LogInformation("Pulled images for compose stack: {Project}", payload.ProjectName);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["projectName"] = payload.ProjectName
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to pull images for {Project}", payload.ProjectName);
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = ex.Message,
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,110 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Compose.Exceptions;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.Compose.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for scaling compose services.
|
||||
/// </summary>
|
||||
public sealed class ComposeScaleTask : IComposeTask
|
||||
{
|
||||
private readonly ComposeExecutor _executor;
|
||||
private readonly ComposeFileManager _fileManager;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for compose.scale task.
|
||||
/// </summary>
|
||||
public sealed record ScalePayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Project name.
|
||||
/// </summary>
|
||||
public required string ProjectName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Service scaling configuration (service name -> replica count).
|
||||
/// </summary>
|
||||
public required IReadOnlyDictionary<string, int> Scaling { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public ComposeScaleTask(ComposeExecutor executor, ComposeFileManager fileManager, ILogger logger)
|
||||
{
|
||||
_executor = executor;
|
||||
_fileManager = fileManager;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
var payload = JsonSerializer.Deserialize<ScalePayload>(task.Payload)
|
||||
?? throw new InvalidComposePayloadException("compose.scale");
|
||||
|
||||
_logger.LogInformation(
|
||||
"Scaling compose stack {Project}: {Scaling}",
|
||||
payload.ProjectName,
|
||||
string.Join(", ", payload.Scaling.Select(kv => $"{kv.Key}={kv.Value}")));
|
||||
|
||||
try
|
||||
{
|
||||
var projectDir = _fileManager.GetProjectDirectory(payload.ProjectName);
|
||||
var composeFile = _fileManager.GetComposeFilePath(payload.ProjectName);
|
||||
|
||||
if (!_fileManager.ProjectExists(payload.ProjectName))
|
||||
{
|
||||
throw new ComposeProjectNotFoundException(payload.ProjectName);
|
||||
}
|
||||
|
||||
var result = await _executor.ScaleAsync(projectDir, composeFile, payload.Scaling, ct);
|
||||
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
if (!result.Success)
|
||||
{
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to scale services: {result.StandardError}",
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
|
||||
_logger.LogInformation("Scaled compose stack: {Project}", payload.ProjectName);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["projectName"] = payload.ProjectName,
|
||||
["scaling"] = payload.Scaling.ToDictionary(kv => kv.Key, kv => (object)kv.Value)
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to scale compose stack {Project}", payload.ProjectName);
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = ex.Message,
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,238 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Compose.Exceptions;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.Compose.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for deploying a compose stack.
|
||||
/// </summary>
|
||||
public sealed class ComposeUpTask : IComposeTask
|
||||
{
|
||||
private readonly ComposeExecutor _executor;
|
||||
private readonly ComposeFileManager _fileManager;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for compose.up task.
|
||||
/// </summary>
|
||||
public sealed record UpPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Project name.
|
||||
/// </summary>
|
||||
public required string ProjectName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Content of compose.stella.lock.yml.
|
||||
/// </summary>
|
||||
public required string ComposeLock { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Content of stella.version.json.
|
||||
/// </summary>
|
||||
public required string VersionSticker { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Force recreate containers.
|
||||
/// </summary>
|
||||
public bool ForceRecreate { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Remove orphan containers.
|
||||
/// </summary>
|
||||
public bool RemoveOrphans { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Specific services to deploy.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? Services { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Environment variables.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, string>? Environment { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Backup existing deployment before update.
|
||||
/// </summary>
|
||||
public bool BackupExisting { get; init; } = true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public ComposeUpTask(ComposeExecutor executor, ComposeFileManager fileManager, ILogger logger)
|
||||
{
|
||||
_executor = executor;
|
||||
_fileManager = fileManager;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
var payload = JsonSerializer.Deserialize<UpPayload>(task.Payload)
|
||||
?? throw new InvalidComposePayloadException("compose.up");
|
||||
|
||||
_logger.LogInformation("Deploying compose stack: {Project}", payload.ProjectName);
|
||||
|
||||
try
|
||||
{
|
||||
// Backup existing deployment
|
||||
if (payload.BackupExisting)
|
||||
{
|
||||
await _fileManager.BackupExistingAsync(payload.ProjectName, timeProvider, ct);
|
||||
}
|
||||
|
||||
// Write compose files
|
||||
var projectDir = await _fileManager.WriteComposeFileAsync(
|
||||
payload.ProjectName,
|
||||
payload.ComposeLock,
|
||||
payload.VersionSticker,
|
||||
ct);
|
||||
|
||||
var composeFile = _fileManager.GetComposeFilePath(payload.ProjectName);
|
||||
|
||||
// Pull images first
|
||||
_logger.LogInformation("Pulling images for {Project}", payload.ProjectName);
|
||||
var pullResult = await _executor.PullAsync(
|
||||
projectDir,
|
||||
composeFile,
|
||||
task.Credentials,
|
||||
ct);
|
||||
|
||||
if (!pullResult.Success)
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to pull images: {pullResult.StandardError}",
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
|
||||
// Deploy the stack
|
||||
_logger.LogInformation("Starting compose stack: {Project}", payload.ProjectName);
|
||||
var upResult = await _executor.UpAsync(
|
||||
projectDir,
|
||||
composeFile,
|
||||
new ComposeUpOptions
|
||||
{
|
||||
ForceRecreate = payload.ForceRecreate,
|
||||
RemoveOrphans = payload.RemoveOrphans,
|
||||
Services = payload.Services,
|
||||
Environment = MergeEnvironment(payload.Environment, task.Variables)
|
||||
},
|
||||
ct);
|
||||
|
||||
if (!upResult.Success)
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to deploy stack: {upResult.StandardError}",
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
|
||||
// Get running services
|
||||
var psResult = await _executor.PsAsync(projectDir, composeFile, ct: ct);
|
||||
var services = ParseServicesFromPs(psResult.StandardOutput);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Deployed compose stack {Project} with {Count} services",
|
||||
payload.ProjectName,
|
||||
services.Count);
|
||||
|
||||
var finalCompletedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["projectName"] = payload.ProjectName,
|
||||
["projectDir"] = projectDir,
|
||||
["serviceCount"] = services.Count,
|
||||
["services"] = services.Select(s => s.Service).ToList()
|
||||
},
|
||||
CompletedAt = finalCompletedAt,
|
||||
Duration = finalCompletedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to deploy compose stack {Project}", payload.ProjectName);
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = ex.Message,
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private static IReadOnlyDictionary<string, string>? MergeEnvironment(
|
||||
IReadOnlyDictionary<string, string>? env,
|
||||
IReadOnlyDictionary<string, string> variables)
|
||||
{
|
||||
if (env is null && variables.Count == 0)
|
||||
return null;
|
||||
|
||||
var merged = new Dictionary<string, string>(variables);
|
||||
if (env is not null)
|
||||
{
|
||||
foreach (var (key, value) in env)
|
||||
{
|
||||
merged[key] = value;
|
||||
}
|
||||
}
|
||||
return merged;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<ServiceStatus> ParseServicesFromPs(string output)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(output))
|
||||
return [];
|
||||
|
||||
try
|
||||
{
|
||||
var services = new List<ServiceStatus>();
|
||||
foreach (var line in output.Split('\n', StringSplitOptions.RemoveEmptyEntries))
|
||||
{
|
||||
try
|
||||
{
|
||||
var service = JsonSerializer.Deserialize<JsonElement>(line);
|
||||
services.Add(new ServiceStatus(
|
||||
service.TryGetProperty("Name", out var name) ? name.GetString() ?? "" : "",
|
||||
service.TryGetProperty("Service", out var svc) ? svc.GetString() ?? "" : "",
|
||||
service.TryGetProperty("State", out var state) ? state.GetString() ?? "" : "",
|
||||
service.TryGetProperty("Health", out var health) ? health.GetString() : null
|
||||
));
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Skip malformed lines
|
||||
}
|
||||
}
|
||||
return services;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return [];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.Compose.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Interface for Compose task handlers.
|
||||
/// </summary>
|
||||
public interface IComposeTask
|
||||
{
|
||||
/// <summary>
|
||||
/// Executes the task.
|
||||
/// </summary>
|
||||
/// <param name="task">Task information.</param>
|
||||
/// <param name="timeProvider">Time provider for deterministic timestamps.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>Task result.</returns>
|
||||
Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Status of a service in a compose stack.
|
||||
/// </summary>
|
||||
public sealed record ServiceStatus(
|
||||
string Name,
|
||||
string Service,
|
||||
string State,
|
||||
string? Health);
|
||||
@@ -0,0 +1,57 @@
|
||||
namespace StellaOps.Agent.Core;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration for the Stella Agent.
|
||||
/// </summary>
|
||||
public sealed class AgentConfiguration
|
||||
{
|
||||
/// <summary>
|
||||
/// Unique identifier for this agent.
|
||||
/// </summary>
|
||||
public required string AgentId { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Display name for this agent.
|
||||
/// </summary>
|
||||
public required string AgentName { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// URL of the orchestrator to connect to.
|
||||
/// </summary>
|
||||
public required string OrchestratorUrl { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Path to the agent's TLS certificate.
|
||||
/// </summary>
|
||||
public required string CertificatePath { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Path to the agent's private key.
|
||||
/// </summary>
|
||||
public required string PrivateKeyPath { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Path to the CA certificate for verifying the orchestrator.
|
||||
/// </summary>
|
||||
public required string CaCertificatePath { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Port for the gRPC server. Default is 50051.
|
||||
/// </summary>
|
||||
public int GrpcPort { get; set; } = 50051;
|
||||
|
||||
/// <summary>
|
||||
/// Interval between heartbeat messages. Default is 30 seconds.
|
||||
/// </summary>
|
||||
public TimeSpan HeartbeatInterval { get; set; } = TimeSpan.FromSeconds(30);
|
||||
|
||||
/// <summary>
|
||||
/// Default timeout for task execution. Default is 30 minutes.
|
||||
/// </summary>
|
||||
public TimeSpan TaskTimeout { get; set; } = TimeSpan.FromMinutes(30);
|
||||
|
||||
/// <summary>
|
||||
/// List of capability names to enable.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string> EnabledCapabilities { get; set; } = [];
|
||||
}
|
||||
@@ -0,0 +1,157 @@
|
||||
using System.Collections.Immutable;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Exceptions;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.Core.Capability;
|
||||
|
||||
/// <summary>
|
||||
/// Registry for agent capabilities.
|
||||
/// </summary>
|
||||
public sealed class CapabilityRegistry
|
||||
{
|
||||
private readonly Dictionary<string, IAgentCapability> _capabilities = new(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly Dictionary<string, IAgentCapability> _taskTypeToCapability = new(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly ILogger<CapabilityRegistry> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance of <see cref="CapabilityRegistry"/>.
|
||||
/// </summary>
|
||||
public CapabilityRegistry(ILogger<CapabilityRegistry> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Registers a capability.
|
||||
/// </summary>
|
||||
/// <param name="capability">The capability to register.</param>
|
||||
/// <exception cref="CapabilityAlreadyRegisteredException">If the capability is already registered.</exception>
|
||||
public void Register(IAgentCapability capability)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(capability);
|
||||
|
||||
if (_capabilities.ContainsKey(capability.Name))
|
||||
{
|
||||
throw new CapabilityAlreadyRegisteredException(capability.Name);
|
||||
}
|
||||
|
||||
_capabilities[capability.Name] = capability;
|
||||
|
||||
foreach (var taskType in capability.SupportedTaskTypes)
|
||||
{
|
||||
if (_taskTypeToCapability.TryGetValue(taskType, out var existing))
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Task type {TaskType} already registered by {ExistingCapability}, overriding with {NewCapability}",
|
||||
taskType, existing.Name, capability.Name);
|
||||
}
|
||||
_taskTypeToCapability[taskType] = capability;
|
||||
}
|
||||
|
||||
_logger.LogInformation(
|
||||
"Registered capability {Name} v{Version} with tasks: {Tasks}",
|
||||
capability.Name,
|
||||
capability.Version,
|
||||
string.Join(", ", capability.SupportedTaskTypes));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a capability by name.
|
||||
/// </summary>
|
||||
/// <param name="name">The capability name.</param>
|
||||
/// <returns>The capability, or null if not found.</returns>
|
||||
public IAgentCapability? Get(string name)
|
||||
{
|
||||
_capabilities.TryGetValue(name, out var capability);
|
||||
return capability;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the capability that can handle the specified task type.
|
||||
/// </summary>
|
||||
/// <param name="taskType">The task type.</param>
|
||||
/// <returns>The capability, or null if not found.</returns>
|
||||
public IAgentCapability? GetForTaskType(string taskType)
|
||||
{
|
||||
_taskTypeToCapability.TryGetValue(taskType, out var capability);
|
||||
return capability;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets information about all registered capabilities.
|
||||
/// </summary>
|
||||
public IReadOnlyList<CapabilityInfo> GetCapabilities()
|
||||
{
|
||||
return _capabilities.Values
|
||||
.Select(c => new CapabilityInfo(
|
||||
c.Name,
|
||||
c.Version,
|
||||
c.SupportedTaskTypes.ToImmutableArray()))
|
||||
.ToList()
|
||||
.AsReadOnly();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets all registered capabilities.
|
||||
/// </summary>
|
||||
public IReadOnlyCollection<IAgentCapability> GetAll()
|
||||
{
|
||||
return _capabilities.Values;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes all registered capabilities.
|
||||
/// </summary>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
public async Task InitializeAllAsync(CancellationToken ct = default)
|
||||
{
|
||||
foreach (var (name, capability) in _capabilities)
|
||||
{
|
||||
try
|
||||
{
|
||||
var success = await capability.InitializeAsync(ct);
|
||||
if (!success)
|
||||
{
|
||||
_logger.LogWarning("Capability {Name} failed to initialize", name);
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogDebug("Capability {Name} initialized successfully", name);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Capability {Name} threw exception during initialization", name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks health of all capabilities.
|
||||
/// </summary>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>Overall health status and per-capability details.</returns>
|
||||
public async Task<(bool AllHealthy, IReadOnlyDictionary<string, object> Details)> CheckHealthAsync(CancellationToken ct = default)
|
||||
{
|
||||
var details = new Dictionary<string, object>();
|
||||
var allHealthy = true;
|
||||
|
||||
foreach (var (name, capability) in _capabilities)
|
||||
{
|
||||
try
|
||||
{
|
||||
var health = await capability.CheckHealthAsync(ct);
|
||||
details[name] = new { health.IsHealthy, health.Message };
|
||||
allHealthy = allHealthy && health.IsHealthy;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
details[name] = new { IsHealthy = false, Message = ex.Message };
|
||||
allHealthy = false;
|
||||
}
|
||||
}
|
||||
|
||||
return (allHealthy, details);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.Core.Capability;
|
||||
|
||||
/// <summary>
|
||||
/// Interface for agent capabilities that can execute tasks.
|
||||
/// </summary>
|
||||
public interface IAgentCapability
|
||||
{
|
||||
/// <summary>
|
||||
/// Name of this capability.
|
||||
/// </summary>
|
||||
string Name { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Version of this capability.
|
||||
/// </summary>
|
||||
string Version { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Task types this capability can handle.
|
||||
/// </summary>
|
||||
IReadOnlyList<string> SupportedTaskTypes { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes the capability.
|
||||
/// </summary>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>True if initialization succeeded.</returns>
|
||||
Task<bool> InitializeAsync(CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Executes a task.
|
||||
/// </summary>
|
||||
/// <param name="task">The task to execute.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>The task result.</returns>
|
||||
Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Checks the health of this capability.
|
||||
/// </summary>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>Health status.</returns>
|
||||
Task<CapabilityHealthStatus> CheckHealthAsync(CancellationToken ct = default);
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.Core.Communication;
|
||||
|
||||
/// <summary>
|
||||
/// Client for communicating with the orchestrator.
|
||||
/// </summary>
|
||||
public interface IOrchestratorClient
|
||||
{
|
||||
/// <summary>
|
||||
/// Connects to the orchestrator.
|
||||
/// </summary>
|
||||
Task ConnectAsync(CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Disconnects from the orchestrator.
|
||||
/// </summary>
|
||||
Task DisconnectAsync(CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Sends a heartbeat to the orchestrator.
|
||||
/// </summary>
|
||||
Task SendHeartbeatAsync(AgentHeartbeatMessage heartbeat, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Sends log entries to the orchestrator.
|
||||
/// </summary>
|
||||
Task SendLogsAsync(IReadOnlyList<LogEntry> logs, CancellationToken ct = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Log entry to send to the orchestrator.
|
||||
/// </summary>
|
||||
public sealed record LogEntry
|
||||
{
|
||||
/// <summary>
|
||||
/// Task that generated this log.
|
||||
/// </summary>
|
||||
public required Guid TaskId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the log was generated.
|
||||
/// </summary>
|
||||
public required DateTimeOffset Timestamp { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Log level.
|
||||
/// </summary>
|
||||
public required LogLevel Level { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Log message.
|
||||
/// </summary>
|
||||
public required string Message { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Log level for agent logs.
|
||||
/// </summary>
|
||||
public enum LogLevel
|
||||
{
|
||||
/// <summary>
|
||||
/// Trace level logging.
|
||||
/// </summary>
|
||||
Trace,
|
||||
|
||||
/// <summary>
|
||||
/// Debug level logging.
|
||||
/// </summary>
|
||||
Debug,
|
||||
|
||||
/// <summary>
|
||||
/// Information level logging.
|
||||
/// </summary>
|
||||
Information,
|
||||
|
||||
/// <summary>
|
||||
/// Warning level logging.
|
||||
/// </summary>
|
||||
Warning,
|
||||
|
||||
/// <summary>
|
||||
/// Error level logging.
|
||||
/// </summary>
|
||||
Error,
|
||||
|
||||
/// <summary>
|
||||
/// Critical error logging.
|
||||
/// </summary>
|
||||
Critical
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.Core.Communication;
|
||||
|
||||
/// <summary>
|
||||
/// Stub orchestrator client for testing.
|
||||
/// </summary>
|
||||
public sealed class StubOrchestratorClient : IOrchestratorClient
|
||||
{
|
||||
private readonly ILogger<StubOrchestratorClient> _logger;
|
||||
private bool _connected;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public StubOrchestratorClient(ILogger<StubOrchestratorClient> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task ConnectAsync(CancellationToken ct = default)
|
||||
{
|
||||
_connected = true;
|
||||
_logger.LogDebug("Stub: Connected to orchestrator");
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task DisconnectAsync(CancellationToken ct = default)
|
||||
{
|
||||
_connected = false;
|
||||
_logger.LogDebug("Stub: Disconnected from orchestrator");
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task SendHeartbeatAsync(AgentHeartbeatMessage heartbeat, CancellationToken ct = default)
|
||||
{
|
||||
if (!_connected)
|
||||
throw new InvalidOperationException("Not connected to orchestrator");
|
||||
|
||||
_logger.LogDebug(
|
||||
"Stub: Sent heartbeat - status={Status}, tasks={TaskCount}",
|
||||
heartbeat.Status,
|
||||
heartbeat.RunningTaskCount);
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task SendLogsAsync(IReadOnlyList<LogEntry> logs, CancellationToken ct = default)
|
||||
{
|
||||
if (!_connected)
|
||||
throw new InvalidOperationException("Not connected to orchestrator");
|
||||
|
||||
_logger.LogDebug("Stub: Sent {Count} log entries", logs.Count);
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
namespace StellaOps.Agent.Core.Credentials;
|
||||
|
||||
/// <summary>
|
||||
/// Credential provider that reads from environment variables.
|
||||
/// </summary>
|
||||
public sealed class EnvironmentCredentialProvider : ICredentialProvider
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public string Scheme => "env";
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<string?> GetSecretAsync(string path, CancellationToken ct = default)
|
||||
{
|
||||
return Task.FromResult(Environment.GetEnvironmentVariable(path));
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Credential provider that reads from files.
|
||||
/// </summary>
|
||||
public sealed class FileCredentialProvider : ICredentialProvider
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public string Scheme => "file";
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<string?> GetSecretAsync(string path, CancellationToken ct = default)
|
||||
{
|
||||
if (!File.Exists(path))
|
||||
return null;
|
||||
|
||||
var content = await File.ReadAllTextAsync(path, ct);
|
||||
return content.Trim();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Exceptions;
|
||||
|
||||
namespace StellaOps.Agent.Core.Credentials;
|
||||
|
||||
/// <summary>
|
||||
/// Resolves credential references to their actual values.
|
||||
/// </summary>
|
||||
public sealed partial class CredentialResolver
|
||||
{
|
||||
private readonly Dictionary<string, ICredentialProvider> _providers = new(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly ILogger<CredentialResolver> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance of <see cref="CredentialResolver"/>.
|
||||
/// </summary>
|
||||
public CredentialResolver(IEnumerable<ICredentialProvider> providers, ILogger<CredentialResolver> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
|
||||
foreach (var provider in providers)
|
||||
{
|
||||
_providers[provider.Scheme] = provider;
|
||||
_logger.LogDebug("Registered credential provider for scheme {Scheme}", provider.Scheme);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolves a credential reference to its actual value.
|
||||
/// </summary>
|
||||
/// <param name="reference">The credential reference (e.g., "env://DB_PASSWORD").</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>The resolved secret value.</returns>
|
||||
/// <exception cref="UnknownCredentialProviderException">If the scheme is not recognized.</exception>
|
||||
/// <exception cref="CredentialNotFoundException">If the credential cannot be found.</exception>
|
||||
public async Task<string> ResolveAsync(string reference, CancellationToken ct = default)
|
||||
{
|
||||
if (string.IsNullOrEmpty(reference))
|
||||
return string.Empty;
|
||||
|
||||
var parsed = ParseReference(reference);
|
||||
if (parsed is null)
|
||||
{
|
||||
// Not a reference, return as-is (literal value)
|
||||
return reference;
|
||||
}
|
||||
|
||||
if (!_providers.TryGetValue(parsed.Scheme, out var provider))
|
||||
{
|
||||
throw new UnknownCredentialProviderException(parsed.Scheme);
|
||||
}
|
||||
|
||||
var value = await provider.GetSecretAsync(parsed.Path, ct);
|
||||
if (value is null)
|
||||
{
|
||||
throw new CredentialNotFoundException(reference);
|
||||
}
|
||||
|
||||
_logger.LogDebug("Resolved credential reference {Scheme}://***", parsed.Scheme);
|
||||
return value;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolves all credential references in a dictionary.
|
||||
/// </summary>
|
||||
/// <param name="credentials">Dictionary of credential references.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>Dictionary with resolved values.</returns>
|
||||
public async Task<IReadOnlyDictionary<string, string>> ResolveAllAsync(
|
||||
IReadOnlyDictionary<string, string> credentials,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var resolved = new Dictionary<string, string>();
|
||||
|
||||
foreach (var (key, reference) in credentials)
|
||||
{
|
||||
resolved[key] = await ResolveAsync(reference, ct);
|
||||
}
|
||||
|
||||
return resolved;
|
||||
}
|
||||
|
||||
private static CredentialReference? ParseReference(string reference)
|
||||
{
|
||||
var match = ReferencePattern().Match(reference);
|
||||
if (!match.Success)
|
||||
return null;
|
||||
|
||||
return new CredentialReference(match.Groups[1].Value, match.Groups[2].Value);
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"^([a-z]+)://(.+)$", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
|
||||
private static partial Regex ReferencePattern();
|
||||
}
|
||||
|
||||
internal sealed record CredentialReference(string Scheme, string Path);
|
||||
@@ -0,0 +1,20 @@
|
||||
namespace StellaOps.Agent.Core.Credentials;
|
||||
|
||||
/// <summary>
|
||||
/// Interface for credential providers that resolve secret references.
|
||||
/// </summary>
|
||||
public interface ICredentialProvider
|
||||
{
|
||||
/// <summary>
|
||||
/// The URI scheme this provider handles (e.g., "env", "file", "vault").
|
||||
/// </summary>
|
||||
string Scheme { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Retrieves a secret value.
|
||||
/// </summary>
|
||||
/// <param name="path">The path to the secret.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>The secret value, or null if not found.</returns>
|
||||
Task<string?> GetSecretAsync(string path, CancellationToken ct = default);
|
||||
}
|
||||
@@ -0,0 +1,123 @@
|
||||
namespace StellaOps.Agent.Core.Exceptions;
|
||||
|
||||
/// <summary>
|
||||
/// Base exception for Agent Core errors.
|
||||
/// </summary>
|
||||
public abstract class AgentException : Exception
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new agent exception.
|
||||
/// </summary>
|
||||
protected AgentException(string message) : base(message) { }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new agent exception with inner exception.
|
||||
/// </summary>
|
||||
protected AgentException(string message, Exception innerException) : base(message, innerException) { }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when a capability is already registered.
|
||||
/// </summary>
|
||||
public sealed class CapabilityAlreadyRegisteredException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// Name of the capability.
|
||||
/// </summary>
|
||||
public string CapabilityName { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public CapabilityAlreadyRegisteredException(string capabilityName)
|
||||
: base($"Capability '{capabilityName}' is already registered")
|
||||
{
|
||||
CapabilityName = capabilityName;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when a task type is not supported by any capability.
|
||||
/// </summary>
|
||||
public sealed class UnsupportedTaskTypeException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// The unsupported task type.
|
||||
/// </summary>
|
||||
public string TaskType { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public UnsupportedTaskTypeException(string taskType)
|
||||
: base($"No capability found for task type '{taskType}'")
|
||||
{
|
||||
TaskType = taskType;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when a credential provider is not found.
|
||||
/// </summary>
|
||||
public sealed class UnknownCredentialProviderException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// The unknown scheme.
|
||||
/// </summary>
|
||||
public string Scheme { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public UnknownCredentialProviderException(string scheme)
|
||||
: base($"Unknown credential provider scheme '{scheme}'")
|
||||
{
|
||||
Scheme = scheme;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when a credential cannot be resolved.
|
||||
/// </summary>
|
||||
public sealed class CredentialNotFoundException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// The credential reference.
|
||||
/// </summary>
|
||||
public string Reference { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public CredentialNotFoundException(string reference)
|
||||
: base($"Credential not found: {reference}")
|
||||
{
|
||||
Reference = reference;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when task execution times out.
|
||||
/// </summary>
|
||||
public sealed class TaskTimeoutException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// Task that timed out.
|
||||
/// </summary>
|
||||
public Guid TaskId { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Timeout duration.
|
||||
/// </summary>
|
||||
public TimeSpan Timeout { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public TaskTimeoutException(Guid taskId, TimeSpan timeout)
|
||||
: base($"Task {taskId} timed out after {timeout}")
|
||||
{
|
||||
TaskId = taskId;
|
||||
Timeout = timeout;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,170 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Diagnostics;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Capability;
|
||||
using StellaOps.Agent.Core.Credentials;
|
||||
using StellaOps.Agent.Core.Exceptions;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.Core.Execution;
|
||||
|
||||
/// <summary>
|
||||
/// Executes tasks using the appropriate capability.
|
||||
/// </summary>
|
||||
public sealed class TaskExecutor
|
||||
{
|
||||
private readonly CapabilityRegistry _capabilities;
|
||||
private readonly CredentialResolver _credentialResolver;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<TaskExecutor> _logger;
|
||||
private readonly ConcurrentDictionary<Guid, CancellationTokenSource> _runningTasks = new();
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance of <see cref="TaskExecutor"/>.
|
||||
/// </summary>
|
||||
public TaskExecutor(
|
||||
CapabilityRegistry capabilities,
|
||||
CredentialResolver credentialResolver,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<TaskExecutor> logger)
|
||||
{
|
||||
_capabilities = capabilities;
|
||||
_credentialResolver = credentialResolver;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the number of currently running tasks.
|
||||
/// </summary>
|
||||
public int RunningTaskCount => _runningTasks.Count;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the IDs of currently running tasks.
|
||||
/// </summary>
|
||||
public IReadOnlyList<Guid> RunningTaskIds => _runningTasks.Keys.ToList().AsReadOnly();
|
||||
|
||||
/// <summary>
|
||||
/// Executes a task.
|
||||
/// </summary>
|
||||
/// <param name="task">The task to execute.</param>
|
||||
/// <param name="progress">Optional progress reporter.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>The task result.</returns>
|
||||
public async Task<AgentTaskResult> ExecuteAsync(
|
||||
AgentTaskInfo task,
|
||||
IProgress<TaskProgress>? progress = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var capability = _capabilities.GetForTaskType(task.TaskType)
|
||||
?? throw new UnsupportedTaskTypeException(task.TaskType);
|
||||
|
||||
using var taskCts = new CancellationTokenSource(task.Timeout);
|
||||
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, taskCts.Token);
|
||||
|
||||
_runningTasks[task.Id] = linkedCts;
|
||||
|
||||
var stopwatch = Stopwatch.StartNew();
|
||||
|
||||
try
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Executing task {TaskId} of type {TaskType} using capability {Capability}",
|
||||
task.Id, task.TaskType, capability.Name);
|
||||
|
||||
progress?.Report(new TaskProgress(task.Id, TaskState.Running, 0, "Starting"));
|
||||
|
||||
// Resolve credentials
|
||||
var resolvedCredentials = await _credentialResolver.ResolveAllAsync(task.Credentials, linkedCts.Token);
|
||||
var resolvedTask = task with { Credentials = resolvedCredentials };
|
||||
|
||||
// Execute via capability
|
||||
var result = await capability.ExecuteAsync(resolvedTask, linkedCts.Token);
|
||||
|
||||
progress?.Report(new TaskProgress(
|
||||
task.Id,
|
||||
result.Success ? TaskState.Succeeded : TaskState.Failed,
|
||||
100,
|
||||
result.Success ? "Completed" : result.Error ?? "Failed"));
|
||||
|
||||
_logger.LogInformation(
|
||||
"Task {TaskId} completed with status {Status} in {Duration}ms",
|
||||
task.Id,
|
||||
result.Success ? "success" : "failure",
|
||||
stopwatch.ElapsedMilliseconds);
|
||||
|
||||
return result with
|
||||
{
|
||||
Duration = stopwatch.Elapsed,
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
catch (OperationCanceledException) when (taskCts.IsCancellationRequested)
|
||||
{
|
||||
_logger.LogWarning("Task {TaskId} timed out after {Timeout}", task.Id, task.Timeout);
|
||||
|
||||
progress?.Report(new TaskProgress(task.Id, TaskState.Failed, 0, "Timeout"));
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Task timed out after {task.Timeout}",
|
||||
CompletedAt = _timeProvider.GetUtcNow(),
|
||||
Duration = stopwatch.Elapsed
|
||||
};
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
_logger.LogInformation("Task {TaskId} was cancelled", task.Id);
|
||||
|
||||
progress?.Report(new TaskProgress(task.Id, TaskState.Cancelled, 0, "Cancelled"));
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = "Task was cancelled",
|
||||
CompletedAt = _timeProvider.GetUtcNow(),
|
||||
Duration = stopwatch.Elapsed
|
||||
};
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Task {TaskId} failed with exception", task.Id);
|
||||
|
||||
progress?.Report(new TaskProgress(task.Id, TaskState.Failed, 0, ex.Message));
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = ex.Message,
|
||||
CompletedAt = _timeProvider.GetUtcNow(),
|
||||
Duration = stopwatch.Elapsed
|
||||
};
|
||||
}
|
||||
finally
|
||||
{
|
||||
_runningTasks.TryRemove(task.Id, out _);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Cancels a running task.
|
||||
/// </summary>
|
||||
/// <param name="taskId">The task to cancel.</param>
|
||||
/// <returns>True if the task was found and cancelled.</returns>
|
||||
public bool CancelTask(Guid taskId)
|
||||
{
|
||||
if (_runningTasks.TryGetValue(taskId, out var cts))
|
||||
{
|
||||
_logger.LogInformation("Cancelling task {TaskId}", taskId);
|
||||
cts.Cancel();
|
||||
return true;
|
||||
}
|
||||
|
||||
_logger.LogWarning("Task {TaskId} not found for cancellation", taskId);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Agent.Core.Capability;
|
||||
using StellaOps.Agent.Core.Communication;
|
||||
|
||||
namespace StellaOps.Agent.Core.Hosting;
|
||||
|
||||
/// <summary>
|
||||
/// Main hosted service for the Stella Agent.
|
||||
/// </summary>
|
||||
public sealed class AgentHost : IHostedService
|
||||
{
|
||||
private readonly AgentConfiguration _config;
|
||||
private readonly CapabilityRegistry _capabilities;
|
||||
private readonly IOrchestratorClient _orchestratorClient;
|
||||
private readonly IAgentServer? _agentServer;
|
||||
private readonly ILogger<AgentHost> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance of <see cref="AgentHost"/>.
|
||||
/// </summary>
|
||||
public AgentHost(
|
||||
IOptions<AgentConfiguration> config,
|
||||
CapabilityRegistry capabilities,
|
||||
IOrchestratorClient orchestratorClient,
|
||||
IAgentServer? agentServer,
|
||||
ILogger<AgentHost> logger)
|
||||
{
|
||||
_config = config.Value;
|
||||
_capabilities = capabilities;
|
||||
_orchestratorClient = orchestratorClient;
|
||||
_agentServer = agentServer;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task StartAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Starting Stella Agent {Name} ({Id})",
|
||||
_config.AgentName,
|
||||
_config.AgentId);
|
||||
|
||||
// Initialize capabilities
|
||||
_logger.LogDebug("Initializing capabilities...");
|
||||
await _capabilities.InitializeAllAsync(cancellationToken);
|
||||
|
||||
// Connect to orchestrator
|
||||
_logger.LogDebug("Connecting to orchestrator at {Url}...", _config.OrchestratorUrl);
|
||||
await _orchestratorClient.ConnectAsync(cancellationToken);
|
||||
|
||||
// Start agent server if available
|
||||
if (_agentServer is not null)
|
||||
{
|
||||
_logger.LogDebug("Starting agent server on port {Port}...", _config.GrpcPort);
|
||||
await _agentServer.StartAsync(cancellationToken);
|
||||
}
|
||||
|
||||
_logger.LogInformation(
|
||||
"Agent started with {Count} capabilities: {Capabilities}",
|
||||
_capabilities.GetCapabilities().Count,
|
||||
string.Join(", ", _capabilities.GetCapabilities().Select(c => c.Name)));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task StopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
_logger.LogInformation("Stopping Stella Agent {Id}", _config.AgentId);
|
||||
|
||||
// Stop agent server if available
|
||||
if (_agentServer is not null)
|
||||
{
|
||||
await _agentServer.StopAsync(cancellationToken);
|
||||
}
|
||||
|
||||
// Disconnect from orchestrator
|
||||
await _orchestratorClient.DisconnectAsync(cancellationToken);
|
||||
|
||||
_logger.LogInformation("Agent stopped");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
namespace StellaOps.Agent.Core.Hosting;
|
||||
|
||||
/// <summary>
|
||||
/// Interface for the agent's task server.
|
||||
/// </summary>
|
||||
public interface IAgentServer
|
||||
{
|
||||
/// <summary>
|
||||
/// Starts the server.
|
||||
/// </summary>
|
||||
Task StartAsync(CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Stops the server.
|
||||
/// </summary>
|
||||
Task StopAsync(CancellationToken ct = default);
|
||||
}
|
||||
@@ -0,0 +1,117 @@
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.Agent.Core.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Information about an agent capability.
|
||||
/// </summary>
|
||||
public sealed record CapabilityInfo(
|
||||
string Name,
|
||||
string Version,
|
||||
ImmutableArray<string> SupportedTaskTypes);
|
||||
|
||||
/// <summary>
|
||||
/// Health status of a capability.
|
||||
/// </summary>
|
||||
public sealed record CapabilityHealthStatus(
|
||||
bool IsHealthy,
|
||||
string? Message = null,
|
||||
IReadOnlyDictionary<string, object>? Details = null);
|
||||
|
||||
/// <summary>
|
||||
/// System information for heartbeats.
|
||||
/// </summary>
|
||||
public sealed record SystemInfo
|
||||
{
|
||||
/// <summary>
|
||||
/// Hostname of the machine.
|
||||
/// </summary>
|
||||
public required string Hostname { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Operating system description.
|
||||
/// </summary>
|
||||
public required string OsDescription { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of processors available.
|
||||
/// </summary>
|
||||
public required int ProcessorCount { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Total available memory in bytes.
|
||||
/// </summary>
|
||||
public required long MemoryBytes { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Heartbeat message sent to the orchestrator.
|
||||
/// </summary>
|
||||
public sealed record AgentHeartbeatMessage
|
||||
{
|
||||
/// <summary>
|
||||
/// Agent identifier.
|
||||
/// </summary>
|
||||
public required string AgentId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the heartbeat was generated.
|
||||
/// </summary>
|
||||
public required DateTimeOffset Timestamp { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Current agent status.
|
||||
/// </summary>
|
||||
public required AgentRuntimeStatus Status { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Available capabilities.
|
||||
/// </summary>
|
||||
public required IReadOnlyList<CapabilityInfo> Capabilities { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// System information.
|
||||
/// </summary>
|
||||
public required SystemInfo SystemInfo { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of currently running tasks.
|
||||
/// </summary>
|
||||
public int RunningTaskCount { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Detailed health information per capability.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, object>? HealthDetails { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Runtime status of the agent.
|
||||
/// </summary>
|
||||
public enum AgentRuntimeStatus
|
||||
{
|
||||
/// <summary>
|
||||
/// Agent is starting up.
|
||||
/// </summary>
|
||||
Starting,
|
||||
|
||||
/// <summary>
|
||||
/// Agent is active and healthy.
|
||||
/// </summary>
|
||||
Active,
|
||||
|
||||
/// <summary>
|
||||
/// Agent is degraded (some capabilities unhealthy).
|
||||
/// </summary>
|
||||
Degraded,
|
||||
|
||||
/// <summary>
|
||||
/// Agent is shutting down.
|
||||
/// </summary>
|
||||
ShuttingDown,
|
||||
|
||||
/// <summary>
|
||||
/// Agent is stopped.
|
||||
/// </summary>
|
||||
Stopped
|
||||
}
|
||||
@@ -0,0 +1,118 @@
|
||||
namespace StellaOps.Agent.Core.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a task to be executed by the agent.
|
||||
/// </summary>
|
||||
public sealed record AgentTaskInfo
|
||||
{
|
||||
/// <summary>
|
||||
/// Unique identifier for the task.
|
||||
/// </summary>
|
||||
public required Guid Id { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Type of task to execute.
|
||||
/// </summary>
|
||||
public required string TaskType { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// JSON payload containing task-specific parameters.
|
||||
/// </summary>
|
||||
public required string Payload { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Credential references to resolve.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, string> Credentials { get; init; } = new Dictionary<string, string>();
|
||||
|
||||
/// <summary>
|
||||
/// Variables for template substitution.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, string> Variables { get; init; } = new Dictionary<string, string>();
|
||||
|
||||
/// <summary>
|
||||
/// When the task was received by the agent.
|
||||
/// </summary>
|
||||
public DateTimeOffset ReceivedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Timeout for task execution.
|
||||
/// </summary>
|
||||
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(30);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of task execution.
|
||||
/// </summary>
|
||||
public sealed record AgentTaskResult
|
||||
{
|
||||
/// <summary>
|
||||
/// Task that was executed.
|
||||
/// </summary>
|
||||
public required Guid TaskId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether the task succeeded.
|
||||
/// </summary>
|
||||
public required bool Success { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Error message if the task failed.
|
||||
/// </summary>
|
||||
public string? Error { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Output values from the task.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, object> Outputs { get; init; } = new Dictionary<string, object>();
|
||||
|
||||
/// <summary>
|
||||
/// When the task completed.
|
||||
/// </summary>
|
||||
public DateTimeOffset CompletedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// How long the task took to execute.
|
||||
/// </summary>
|
||||
public TimeSpan Duration { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Progress update for a task.
|
||||
/// </summary>
|
||||
public sealed record TaskProgress(
|
||||
Guid TaskId,
|
||||
TaskState State,
|
||||
int ProgressPercent,
|
||||
string Message);
|
||||
|
||||
/// <summary>
|
||||
/// State of a task.
|
||||
/// </summary>
|
||||
public enum TaskState
|
||||
{
|
||||
/// <summary>
|
||||
/// Task is waiting to be executed.
|
||||
/// </summary>
|
||||
Pending,
|
||||
|
||||
/// <summary>
|
||||
/// Task is currently executing.
|
||||
/// </summary>
|
||||
Running,
|
||||
|
||||
/// <summary>
|
||||
/// Task completed successfully.
|
||||
/// </summary>
|
||||
Succeeded,
|
||||
|
||||
/// <summary>
|
||||
/// Task failed.
|
||||
/// </summary>
|
||||
Failed,
|
||||
|
||||
/// <summary>
|
||||
/// Task was cancelled.
|
||||
/// </summary>
|
||||
Cancelled
|
||||
}
|
||||
@@ -0,0 +1,105 @@
|
||||
using System.Runtime.InteropServices;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Agent.Core.Capability;
|
||||
using StellaOps.Agent.Core.Communication;
|
||||
using StellaOps.Agent.Core.Execution;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.Core.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Background service that sends periodic heartbeats to the orchestrator.
|
||||
/// </summary>
|
||||
public sealed class HeartbeatService : BackgroundService
|
||||
{
|
||||
private readonly AgentConfiguration _config;
|
||||
private readonly CapabilityRegistry _capabilities;
|
||||
private readonly TaskExecutor _taskExecutor;
|
||||
private readonly IOrchestratorClient _orchestratorClient;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<HeartbeatService> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance of <see cref="HeartbeatService"/>.
|
||||
/// </summary>
|
||||
public HeartbeatService(
|
||||
IOptions<AgentConfiguration> config,
|
||||
CapabilityRegistry capabilities,
|
||||
TaskExecutor taskExecutor,
|
||||
IOrchestratorClient orchestratorClient,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<HeartbeatService> logger)
|
||||
{
|
||||
_config = config.Value;
|
||||
_capabilities = capabilities;
|
||||
_taskExecutor = taskExecutor;
|
||||
_orchestratorClient = orchestratorClient;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Heartbeat service started with interval {Interval}",
|
||||
_config.HeartbeatInterval);
|
||||
|
||||
// Wait a bit before first heartbeat to allow initialization
|
||||
await Task.Delay(TimeSpan.FromSeconds(5), stoppingToken);
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
await SendHeartbeatAsync(stoppingToken);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to send heartbeat");
|
||||
}
|
||||
|
||||
await Task.Delay(_config.HeartbeatInterval, stoppingToken);
|
||||
}
|
||||
|
||||
_logger.LogInformation("Heartbeat service stopped");
|
||||
}
|
||||
|
||||
private async Task SendHeartbeatAsync(CancellationToken ct)
|
||||
{
|
||||
var capabilities = _capabilities.GetCapabilities();
|
||||
var (allHealthy, healthDetails) = await _capabilities.CheckHealthAsync(ct);
|
||||
|
||||
var heartbeat = new AgentHeartbeatMessage
|
||||
{
|
||||
AgentId = _config.AgentId,
|
||||
Timestamp = _timeProvider.GetUtcNow(),
|
||||
Status = allHealthy ? AgentRuntimeStatus.Active : AgentRuntimeStatus.Degraded,
|
||||
Capabilities = capabilities,
|
||||
SystemInfo = GetSystemInfo(),
|
||||
RunningTaskCount = _taskExecutor.RunningTaskCount,
|
||||
HealthDetails = healthDetails
|
||||
};
|
||||
|
||||
await _orchestratorClient.SendHeartbeatAsync(heartbeat, ct);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Heartbeat sent: status={Status}, tasks={TaskCount}, capabilities={CapabilityCount}",
|
||||
heartbeat.Status,
|
||||
heartbeat.RunningTaskCount,
|
||||
heartbeat.Capabilities.Count);
|
||||
}
|
||||
|
||||
private static SystemInfo GetSystemInfo()
|
||||
{
|
||||
return new SystemInfo
|
||||
{
|
||||
Hostname = Environment.MachineName,
|
||||
OsDescription = RuntimeInformation.OSDescription,
|
||||
ProcessorCount = Environment.ProcessorCount,
|
||||
MemoryBytes = GC.GetGCMemoryInfo().TotalAvailableMemoryBytes
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,159 @@
|
||||
using System.Threading.Channels;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Communication;
|
||||
using LogEntry = StellaOps.Agent.Core.Communication.LogEntry;
|
||||
using LogLevel = StellaOps.Agent.Core.Communication.LogLevel;
|
||||
|
||||
namespace StellaOps.Agent.Core.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Streams logs from task execution to the orchestrator.
|
||||
/// </summary>
|
||||
public sealed class LogStreamer : IAsyncDisposable
|
||||
{
|
||||
private readonly IOrchestratorClient _orchestratorClient;
|
||||
private readonly Channel<LogEntry> _logChannel;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<LogStreamer> _logger;
|
||||
private readonly CancellationTokenSource _cts = new();
|
||||
private readonly Task _streamTask;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance of <see cref="LogStreamer"/>.
|
||||
/// </summary>
|
||||
public LogStreamer(
|
||||
IOrchestratorClient orchestratorClient,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<LogStreamer> logger)
|
||||
{
|
||||
_orchestratorClient = orchestratorClient;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
|
||||
_logChannel = Channel.CreateBounded<LogEntry>(new BoundedChannelOptions(10000)
|
||||
{
|
||||
FullMode = BoundedChannelFullMode.DropOldest
|
||||
});
|
||||
|
||||
_streamTask = StreamLogsAsync(_cts.Token);
|
||||
_logger.LogDebug("Log streamer started");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Logs a message for a task.
|
||||
/// </summary>
|
||||
/// <param name="taskId">The task ID.</param>
|
||||
/// <param name="level">Log level.</param>
|
||||
/// <param name="message">Log message.</param>
|
||||
public void Log(Guid taskId, LogLevel level, string message)
|
||||
{
|
||||
var entry = new LogEntry
|
||||
{
|
||||
TaskId = taskId,
|
||||
Timestamp = _timeProvider.GetUtcNow(),
|
||||
Level = level,
|
||||
Message = message
|
||||
};
|
||||
|
||||
if (!_logChannel.Writer.TryWrite(entry))
|
||||
{
|
||||
_logger.LogWarning("Log channel full, dropping log entry for task {TaskId}", taskId);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Logs a trace message.
|
||||
/// </summary>
|
||||
public void LogTrace(Guid taskId, string message) => Log(taskId, LogLevel.Trace, message);
|
||||
|
||||
/// <summary>
|
||||
/// Logs a debug message.
|
||||
/// </summary>
|
||||
public void LogDebug(Guid taskId, string message) => Log(taskId, LogLevel.Debug, message);
|
||||
|
||||
/// <summary>
|
||||
/// Logs an information message.
|
||||
/// </summary>
|
||||
public void LogInformation(Guid taskId, string message) => Log(taskId, LogLevel.Information, message);
|
||||
|
||||
/// <summary>
|
||||
/// Logs a warning message.
|
||||
/// </summary>
|
||||
public void LogWarning(Guid taskId, string message) => Log(taskId, LogLevel.Warning, message);
|
||||
|
||||
/// <summary>
|
||||
/// Logs an error message.
|
||||
/// </summary>
|
||||
public void LogError(Guid taskId, string message) => Log(taskId, LogLevel.Error, message);
|
||||
|
||||
private async Task StreamLogsAsync(CancellationToken ct)
|
||||
{
|
||||
var batch = new List<LogEntry>();
|
||||
var batchTimeout = TimeSpan.FromMilliseconds(100);
|
||||
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Collect logs for batching
|
||||
using var timeoutCts = new CancellationTokenSource(batchTimeout);
|
||||
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
|
||||
|
||||
while (batch.Count < 100)
|
||||
{
|
||||
if (_logChannel.Reader.TryRead(out var entry))
|
||||
{
|
||||
batch.Add(entry);
|
||||
}
|
||||
else
|
||||
{
|
||||
try
|
||||
{
|
||||
await _logChannel.Reader.WaitToReadAsync(linkedCts.Token);
|
||||
}
|
||||
catch (OperationCanceledException) when (!ct.IsCancellationRequested)
|
||||
{
|
||||
// Batch timeout, break to send what we have
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (!ct.IsCancellationRequested)
|
||||
{
|
||||
// Timeout, send what we have
|
||||
}
|
||||
|
||||
if (batch.Count > 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
await _orchestratorClient.SendLogsAsync(batch, ct);
|
||||
batch.Clear();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to send {Count} logs, will retry", batch.Count);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
_cts.Cancel();
|
||||
|
||||
try
|
||||
{
|
||||
await _streamTask;
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Expected
|
||||
}
|
||||
|
||||
_cts.Dispose();
|
||||
_logger.LogDebug("Log streamer disposed");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<RootNamespace>StellaOps.Agent.Core</RootNamespace>
|
||||
<Description>Stella Agent Core Runtime - the lightweight agent process that runs on target hosts</Description>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Hosting.Abstractions" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
|
||||
<PackageReference Include="Microsoft.Extensions.Options" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\__Libraries\StellaOps.ReleaseOrchestrator.Agent\StellaOps.ReleaseOrchestrator.Agent.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,127 @@
|
||||
using Docker.DotNet;
|
||||
using Docker.DotNet.Models;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Services;
|
||||
using LogLevel = StellaOps.Agent.Core.Communication.LogLevel;
|
||||
|
||||
namespace StellaOps.Agent.Docker;
|
||||
|
||||
/// <summary>
|
||||
/// Streams container logs to the orchestrator in real-time.
|
||||
/// </summary>
|
||||
public sealed class ContainerLogStreamer
|
||||
{
|
||||
private readonly IDockerClient _dockerClient;
|
||||
private readonly LogStreamer _logStreamer;
|
||||
private readonly ILogger<ContainerLogStreamer> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public ContainerLogStreamer(
|
||||
IDockerClient dockerClient,
|
||||
LogStreamer logStreamer,
|
||||
ILogger<ContainerLogStreamer> logger)
|
||||
{
|
||||
_dockerClient = dockerClient;
|
||||
_logStreamer = logStreamer;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Streams logs from a container until cancellation.
|
||||
/// </summary>
|
||||
/// <param name="taskId">Task ID to associate logs with.</param>
|
||||
/// <param name="containerId">Container ID to stream logs from.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
public async Task StreamLogsAsync(
|
||||
Guid taskId,
|
||||
string containerId,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
_logger.LogDebug("Starting log stream for container {ContainerId}", TruncateId(containerId));
|
||||
|
||||
var multiplexedStream = await _dockerClient.Containers.GetContainerLogsAsync(
|
||||
containerId,
|
||||
tty: false,
|
||||
new ContainerLogsParameters
|
||||
{
|
||||
Follow = true,
|
||||
ShowStdout = true,
|
||||
ShowStderr = true,
|
||||
Timestamps = true
|
||||
},
|
||||
ct);
|
||||
|
||||
var buffer = new byte[81920];
|
||||
MultiplexedStream.ReadResult result;
|
||||
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
result = await multiplexedStream.ReadOutputAsync(buffer, 0, buffer.Length, ct);
|
||||
if (result.Count == 0)
|
||||
break;
|
||||
|
||||
var text = System.Text.Encoding.UTF8.GetString(buffer, 0, result.Count);
|
||||
var lines = text.Split('\n', StringSplitOptions.RemoveEmptyEntries);
|
||||
|
||||
foreach (var line in lines)
|
||||
{
|
||||
var trimmedLine = line.TrimEnd('\r');
|
||||
// result.Target indicates stdout (1) or stderr (2)
|
||||
var (level, message) = ParseLogLine(trimmedLine, result.Target == MultiplexedStream.TargetStream.StandardError);
|
||||
_logStreamer.Log(taskId, level, message);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Expected when task completes
|
||||
_logger.LogDebug("Log stream cancelled for container {ContainerId}", TruncateId(containerId));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Error streaming logs for container {ContainerId}", TruncateId(containerId));
|
||||
}
|
||||
}
|
||||
|
||||
private static (LogLevel Level, string Message) ParseLogLine(string line, bool isStderr)
|
||||
{
|
||||
// Stderr, treat as warning/error
|
||||
var baseLevel = isStderr ? LogLevel.Warning : LogLevel.Information;
|
||||
|
||||
// Simple heuristic for log level detection based on content
|
||||
if (line.Contains("ERROR", StringComparison.OrdinalIgnoreCase) ||
|
||||
line.Contains("FATAL", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return (LogLevel.Error, line);
|
||||
}
|
||||
|
||||
if (line.Contains("WARN", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return (LogLevel.Warning, line);
|
||||
}
|
||||
|
||||
if (line.Contains("DEBUG", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return (LogLevel.Debug, line);
|
||||
}
|
||||
|
||||
if (line.Contains("TRACE", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return (LogLevel.Trace, line);
|
||||
}
|
||||
|
||||
return (baseLevel, line);
|
||||
}
|
||||
|
||||
private static string TruncateId(string id)
|
||||
{
|
||||
var trimmed = id.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase)
|
||||
? id[7..]
|
||||
: id;
|
||||
return trimmed.Length > 12 ? trimmed[..12] : trimmed;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,105 @@
|
||||
using Docker.DotNet;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Capability;
|
||||
using StellaOps.Agent.Core.Exceptions;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.Docker.Tasks;
|
||||
|
||||
namespace StellaOps.Agent.Docker;
|
||||
|
||||
/// <summary>
|
||||
/// Docker capability for managing containers on target hosts.
|
||||
/// </summary>
|
||||
public sealed class DockerCapability : IAgentCapability
|
||||
{
|
||||
private readonly IDockerClient _dockerClient;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<DockerCapability> _logger;
|
||||
private readonly Dictionary<string, IDockerTask> _taskHandlers;
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "docker";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Version => "1.0.0";
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> SupportedTaskTypes => new[]
|
||||
{
|
||||
"docker.pull",
|
||||
"docker.run",
|
||||
"docker.stop",
|
||||
"docker.remove",
|
||||
"docker.health-check",
|
||||
"docker.logs"
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public DockerCapability(
|
||||
IDockerClient dockerClient,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<DockerCapability> logger)
|
||||
{
|
||||
_dockerClient = dockerClient;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
|
||||
_taskHandlers = new Dictionary<string, IDockerTask>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
["docker.pull"] = new DockerPullTask(_dockerClient, logger),
|
||||
["docker.run"] = new DockerRunTask(_dockerClient, logger),
|
||||
["docker.stop"] = new DockerStopTask(_dockerClient, logger),
|
||||
["docker.remove"] = new DockerRemoveTask(_dockerClient, logger),
|
||||
["docker.health-check"] = new DockerHealthCheckTask(_dockerClient, logger),
|
||||
["docker.logs"] = new DockerLogsTask(_dockerClient, logger)
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> InitializeAsync(CancellationToken ct = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
var version = await _dockerClient.System.GetVersionAsync(ct);
|
||||
_logger.LogInformation(
|
||||
"Docker capability initialized: Docker {Version} on {OS}",
|
||||
version.Version,
|
||||
version.Os);
|
||||
return true;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to initialize Docker capability");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
|
||||
{
|
||||
if (!_taskHandlers.TryGetValue(task.TaskType, out var handler))
|
||||
{
|
||||
throw new UnsupportedTaskTypeException(task.TaskType);
|
||||
}
|
||||
|
||||
_logger.LogDebug("Executing task {TaskType} with ID {TaskId}", task.TaskType, task.Id);
|
||||
|
||||
return await handler.ExecuteAsync(task, _timeProvider, ct);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<CapabilityHealthStatus> CheckHealthAsync(CancellationToken ct = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
await _dockerClient.System.PingAsync(ct);
|
||||
return new CapabilityHealthStatus(true, "Docker daemon responding");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return new CapabilityHealthStatus(false, $"Docker daemon not responding: {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
using Docker.DotNet;
|
||||
|
||||
namespace StellaOps.Agent.Docker;
|
||||
|
||||
/// <summary>
|
||||
/// Factory for creating Docker clients.
|
||||
/// </summary>
|
||||
public interface IDockerClientFactory
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a Docker client for the local Docker daemon.
|
||||
/// </summary>
|
||||
IDockerClient CreateClient();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Default implementation of <see cref="IDockerClientFactory"/>.
|
||||
/// </summary>
|
||||
public sealed class DockerClientFactory : IDockerClientFactory
|
||||
{
|
||||
private readonly DockerClientConfiguration _configuration;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance with default configuration.
|
||||
/// </summary>
|
||||
public DockerClientFactory()
|
||||
{
|
||||
// Default to local Docker socket
|
||||
_configuration = new DockerClientConfiguration();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance with a custom Docker endpoint.
|
||||
/// </summary>
|
||||
/// <param name="endpoint">Docker endpoint URI (e.g., "unix:///var/run/docker.sock" or "npipe://./pipe/docker_engine").</param>
|
||||
public DockerClientFactory(Uri endpoint)
|
||||
{
|
||||
_configuration = new DockerClientConfiguration(endpoint);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IDockerClient CreateClient() => _configuration.CreateClient();
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
using StellaOps.Agent.Core.Exceptions;
|
||||
|
||||
namespace StellaOps.Agent.Docker.Exceptions;
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when a task payload is invalid.
|
||||
/// </summary>
|
||||
public sealed class InvalidPayloadException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// The task type with invalid payload.
|
||||
/// </summary>
|
||||
public string TaskType { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public InvalidPayloadException(string taskType)
|
||||
: base($"Invalid payload for task type '{taskType}'")
|
||||
{
|
||||
TaskType = taskType;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when an image pull fails.
|
||||
/// </summary>
|
||||
public sealed class ImagePullException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// The image reference.
|
||||
/// </summary>
|
||||
public string ImageRef { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public ImagePullException(string imageRef, string message)
|
||||
: base($"Failed to pull image '{imageRef}': {message}")
|
||||
{
|
||||
ImageRef = imageRef;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when a container fails to start.
|
||||
/// </summary>
|
||||
public sealed class ContainerStartException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// The container name.
|
||||
/// </summary>
|
||||
public string ContainerName { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public ContainerStartException(string containerName, string message)
|
||||
: base($"Failed to start container '{containerName}': {message}")
|
||||
{
|
||||
ContainerName = containerName;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when a container is not found.
|
||||
/// </summary>
|
||||
public sealed class ContainerNotFoundException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// The container identifier.
|
||||
/// </summary>
|
||||
public string ContainerIdentifier { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public ContainerNotFoundException(string containerIdentifier)
|
||||
: base($"Container not found: '{containerIdentifier}'")
|
||||
{
|
||||
ContainerIdentifier = containerIdentifier;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<RootNamespace>StellaOps.Agent.Docker</RootNamespace>
|
||||
<Description>Stella Agent Docker Capability - manages standalone Docker containers on target hosts</Description>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Docker.DotNet" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\StellaOps.Agent.Core\StellaOps.Agent.Core.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,233 @@
|
||||
using System.Text.Json;
|
||||
using Docker.DotNet;
|
||||
using Docker.DotNet.Models;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.Docker.Exceptions;
|
||||
|
||||
namespace StellaOps.Agent.Docker.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for checking Docker container health.
|
||||
/// </summary>
|
||||
public sealed class DockerHealthCheckTask : IDockerTask
|
||||
{
|
||||
private readonly IDockerClient _dockerClient;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for docker.health-check task.
|
||||
/// </summary>
|
||||
public sealed record HealthCheckPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Container ID.
|
||||
/// </summary>
|
||||
public string? ContainerId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Container name.
|
||||
/// </summary>
|
||||
public string? ContainerName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Timeout for health check.
|
||||
/// </summary>
|
||||
public TimeSpan Timeout { get; init; } = TimeSpan.FromSeconds(30);
|
||||
|
||||
/// <summary>
|
||||
/// Whether to wait for healthy status.
|
||||
/// </summary>
|
||||
public bool WaitForHealthy { get; init; } = true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public DockerHealthCheckTask(IDockerClient dockerClient, ILogger logger)
|
||||
{
|
||||
_dockerClient = dockerClient;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
var payload = JsonSerializer.Deserialize<HealthCheckPayload>(task.Payload)
|
||||
?? throw new InvalidPayloadException("docker.health-check");
|
||||
|
||||
var containerId = await ResolveContainerIdAsync(payload, ct);
|
||||
if (containerId is null)
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = "Container not found",
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
|
||||
_logger.LogInformation("Checking health of container {ContainerId}", TruncateId(containerId));
|
||||
|
||||
try
|
||||
{
|
||||
using var timeoutCts = new CancellationTokenSource(payload.Timeout);
|
||||
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
|
||||
|
||||
while (!linkedCts.IsCancellationRequested)
|
||||
{
|
||||
var containerInfo = await _dockerClient.Containers.InspectContainerAsync(containerId, linkedCts.Token);
|
||||
|
||||
if (containerInfo.State.Status != "running")
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Container not running (state: {containerInfo.State.Status})",
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["state"] = containerInfo.State.Status
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
|
||||
var health = containerInfo.State.Health;
|
||||
if (health is null)
|
||||
{
|
||||
// No health check configured, container is running
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["containerId"] = containerId,
|
||||
["state"] = "running",
|
||||
["healthCheck"] = "none"
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
|
||||
if (health.Status == "healthy")
|
||||
{
|
||||
_logger.LogInformation("Container {ContainerId} is healthy", TruncateId(containerId));
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["containerId"] = containerId,
|
||||
["state"] = "running",
|
||||
["healthStatus"] = "healthy",
|
||||
["failingStreak"] = health.FailingStreak
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
|
||||
if (health.Status == "unhealthy")
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
var lastLog = health.Log?.LastOrDefault();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Container unhealthy: {lastLog?.Output ?? "unknown"}",
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["containerId"] = containerId,
|
||||
["healthStatus"] = "unhealthy",
|
||||
["failingStreak"] = health.FailingStreak
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
|
||||
if (!payload.WaitForHealthy)
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["containerId"] = containerId,
|
||||
["healthStatus"] = health.Status
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
|
||||
// Wait before checking again
|
||||
await Task.Delay(TimeSpan.FromSeconds(2), linkedCts.Token);
|
||||
}
|
||||
|
||||
throw new OperationCanceledException();
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Health check timed out after {payload.Timeout}",
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<string?> ResolveContainerIdAsync(HealthCheckPayload payload, CancellationToken ct)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(payload.ContainerId))
|
||||
{
|
||||
return payload.ContainerId;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(payload.ContainerName))
|
||||
{
|
||||
var containers = await _dockerClient.Containers.ListContainersAsync(
|
||||
new ContainersListParameters
|
||||
{
|
||||
All = true,
|
||||
Filters = new Dictionary<string, IDictionary<string, bool>>
|
||||
{
|
||||
["name"] = new Dictionary<string, bool> { [$"^/{payload.ContainerName}$"] = true }
|
||||
}
|
||||
},
|
||||
ct);
|
||||
|
||||
return containers.FirstOrDefault()?.ID;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string TruncateId(string id)
|
||||
{
|
||||
var trimmed = id.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase)
|
||||
? id[7..]
|
||||
: id;
|
||||
return trimmed.Length > 12 ? trimmed[..12] : trimmed;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,197 @@
|
||||
using System.Text.Json;
|
||||
using Docker.DotNet;
|
||||
using Docker.DotNet.Models;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.Docker.Exceptions;
|
||||
|
||||
namespace StellaOps.Agent.Docker.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for retrieving Docker container logs.
|
||||
/// </summary>
|
||||
public sealed class DockerLogsTask : IDockerTask
|
||||
{
|
||||
private readonly IDockerClient _dockerClient;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for docker.logs task.
|
||||
/// </summary>
|
||||
public sealed record LogsPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Container ID.
|
||||
/// </summary>
|
||||
public string? ContainerId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Container name.
|
||||
/// </summary>
|
||||
public string? ContainerName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of lines from the end to retrieve.
|
||||
/// </summary>
|
||||
public int? Tail { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Include timestamps.
|
||||
/// </summary>
|
||||
public bool Timestamps { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Only logs since this time.
|
||||
/// </summary>
|
||||
public string? Since { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Only logs before this time.
|
||||
/// </summary>
|
||||
public string? Until { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public DockerLogsTask(IDockerClient dockerClient, ILogger logger)
|
||||
{
|
||||
_dockerClient = dockerClient;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
var payload = JsonSerializer.Deserialize<LogsPayload>(task.Payload)
|
||||
?? throw new InvalidPayloadException("docker.logs");
|
||||
|
||||
var containerId = await ResolveContainerIdAsync(payload, ct);
|
||||
if (containerId is null)
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = "Container not found",
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
|
||||
_logger.LogInformation("Retrieving logs for container {ContainerId}", TruncateId(containerId));
|
||||
|
||||
try
|
||||
{
|
||||
var logParams = new ContainerLogsParameters
|
||||
{
|
||||
ShowStdout = true,
|
||||
ShowStderr = true,
|
||||
Timestamps = payload.Timestamps
|
||||
};
|
||||
|
||||
if (payload.Tail.HasValue)
|
||||
{
|
||||
logParams.Tail = payload.Tail.Value.ToString();
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(payload.Since))
|
||||
{
|
||||
logParams.Since = payload.Since;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(payload.Until))
|
||||
{
|
||||
logParams.Until = payload.Until;
|
||||
}
|
||||
|
||||
var multiplexedStream = await _dockerClient.Containers.GetContainerLogsAsync(
|
||||
containerId,
|
||||
tty: false,
|
||||
logParams,
|
||||
ct);
|
||||
|
||||
var logs = new List<string>();
|
||||
|
||||
// Read logs using MultiplexedStream
|
||||
var buffer = new byte[81920];
|
||||
MultiplexedStream.ReadResult result;
|
||||
while ((result = await multiplexedStream.ReadOutputAsync(buffer, 0, buffer.Length, ct)).Count > 0)
|
||||
{
|
||||
var text = System.Text.Encoding.UTF8.GetString(buffer, 0, result.Count);
|
||||
var lines = text.Split('\n', StringSplitOptions.RemoveEmptyEntries);
|
||||
foreach (var line in lines)
|
||||
{
|
||||
logs.Add(line.TrimEnd('\r'));
|
||||
}
|
||||
}
|
||||
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
_logger.LogInformation("Retrieved {Count} log lines from container {ContainerId}", logs.Count, TruncateId(containerId));
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["containerId"] = containerId,
|
||||
["lineCount"] = logs.Count,
|
||||
["logs"] = logs
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (DockerApiException ex)
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
_logger.LogError(ex, "Failed to retrieve logs for container {ContainerId}", TruncateId(containerId));
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to retrieve logs: {ex.Message}",
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<string?> ResolveContainerIdAsync(LogsPayload payload, CancellationToken ct)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(payload.ContainerId))
|
||||
{
|
||||
return payload.ContainerId;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(payload.ContainerName))
|
||||
{
|
||||
var containers = await _dockerClient.Containers.ListContainersAsync(
|
||||
new ContainersListParameters
|
||||
{
|
||||
All = true,
|
||||
Filters = new Dictionary<string, IDictionary<string, bool>>
|
||||
{
|
||||
["name"] = new Dictionary<string, bool> { [$"^/{payload.ContainerName}$"] = true }
|
||||
}
|
||||
},
|
||||
ct);
|
||||
|
||||
return containers.FirstOrDefault()?.ID;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string TruncateId(string id)
|
||||
{
|
||||
var trimmed = id.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase)
|
||||
? id[7..]
|
||||
: id;
|
||||
return trimmed.Length > 12 ? trimmed[..12] : trimmed;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,183 @@
|
||||
using System.Text.Json;
|
||||
using Docker.DotNet;
|
||||
using Docker.DotNet.Models;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.Docker.Exceptions;
|
||||
|
||||
namespace StellaOps.Agent.Docker.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for pulling Docker images.
|
||||
/// </summary>
|
||||
public sealed class DockerPullTask : IDockerTask
|
||||
{
|
||||
private readonly IDockerClient _dockerClient;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for docker.pull task.
|
||||
/// </summary>
|
||||
public sealed record PullPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Image name (e.g., "nginx", "myregistry.com/myapp").
|
||||
/// </summary>
|
||||
public required string Image { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Image tag (e.g., "latest", "1.0.0").
|
||||
/// </summary>
|
||||
public string? Tag { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Image digest (e.g., "sha256:abc123...").
|
||||
/// </summary>
|
||||
public string? Digest { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Registry address (e.g., "registry.example.com").
|
||||
/// </summary>
|
||||
public string? Registry { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public DockerPullTask(IDockerClient dockerClient, ILogger logger)
|
||||
{
|
||||
_dockerClient = dockerClient;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
var payload = JsonSerializer.Deserialize<PullPayload>(task.Payload)
|
||||
?? throw new InvalidPayloadException("docker.pull");
|
||||
|
||||
var imageRef = BuildImageReference(payload);
|
||||
|
||||
_logger.LogInformation("Pulling image {Image}", imageRef);
|
||||
|
||||
try
|
||||
{
|
||||
// Get registry credentials if provided
|
||||
AuthConfig? authConfig = null;
|
||||
if (task.Credentials.TryGetValue("registry.username", out var username) &&
|
||||
task.Credentials.TryGetValue("registry.password", out var password))
|
||||
{
|
||||
authConfig = new AuthConfig
|
||||
{
|
||||
Username = username,
|
||||
Password = password,
|
||||
ServerAddress = payload.Registry ?? "https://index.docker.io/v1/"
|
||||
};
|
||||
}
|
||||
|
||||
await _dockerClient.Images.CreateImageAsync(
|
||||
new ImagesCreateParameters
|
||||
{
|
||||
FromImage = imageRef
|
||||
},
|
||||
authConfig,
|
||||
new Progress<JSONMessage>(msg =>
|
||||
{
|
||||
if (!string.IsNullOrEmpty(msg.Status))
|
||||
{
|
||||
_logger.LogDebug("Pull progress: {Status}", msg.Status);
|
||||
}
|
||||
}),
|
||||
ct);
|
||||
|
||||
// Verify the image was pulled
|
||||
var images = await _dockerClient.Images.ListImagesAsync(
|
||||
new ImagesListParameters
|
||||
{
|
||||
Filters = new Dictionary<string, IDictionary<string, bool>>
|
||||
{
|
||||
["reference"] = new Dictionary<string, bool> { [imageRef] = true }
|
||||
}
|
||||
},
|
||||
ct);
|
||||
|
||||
if (images.Count == 0)
|
||||
{
|
||||
throw new ImagePullException(imageRef, "Image not found after pull");
|
||||
}
|
||||
|
||||
var pulledImage = images.First();
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
|
||||
_logger.LogInformation(
|
||||
"Successfully pulled image {Image} (ID: {Id})",
|
||||
imageRef,
|
||||
TruncateId(pulledImage.ID));
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["imageId"] = pulledImage.ID,
|
||||
["size"] = pulledImage.Size,
|
||||
["digest"] = payload.Digest ?? ExtractDigest(pulledImage)
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (DockerApiException ex)
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
_logger.LogError(ex, "Failed to pull image {Image}", imageRef);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to pull image: {ex.Message}",
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private static string BuildImageReference(PullPayload payload)
|
||||
{
|
||||
var image = payload.Image;
|
||||
|
||||
if (!string.IsNullOrEmpty(payload.Registry))
|
||||
{
|
||||
image = $"{payload.Registry}/{image}";
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(payload.Digest))
|
||||
{
|
||||
return $"{image}@{payload.Digest}";
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(payload.Tag))
|
||||
{
|
||||
return $"{image}:{payload.Tag}";
|
||||
}
|
||||
|
||||
return $"{image}:latest";
|
||||
}
|
||||
|
||||
private static string ExtractDigest(ImagesListResponse image)
|
||||
{
|
||||
return image.RepoDigests.FirstOrDefault()?.Split('@').LastOrDefault() ?? string.Empty;
|
||||
}
|
||||
|
||||
private static string TruncateId(string id)
|
||||
{
|
||||
// Remove "sha256:" prefix if present and take first 12 chars
|
||||
var trimmed = id.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase)
|
||||
? id[7..]
|
||||
: id;
|
||||
return trimmed.Length > 12 ? trimmed[..12] : trimmed;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,155 @@
|
||||
using System.Text.Json;
|
||||
using Docker.DotNet;
|
||||
using Docker.DotNet.Models;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.Docker.Exceptions;
|
||||
|
||||
namespace StellaOps.Agent.Docker.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for removing Docker containers.
|
||||
/// </summary>
|
||||
public sealed class DockerRemoveTask : IDockerTask
|
||||
{
|
||||
private readonly IDockerClient _dockerClient;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for docker.remove task.
|
||||
/// </summary>
|
||||
public sealed record RemovePayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Container ID.
|
||||
/// </summary>
|
||||
public string? ContainerId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Container name.
|
||||
/// </summary>
|
||||
public string? ContainerName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to force removal (kill if running).
|
||||
/// </summary>
|
||||
public bool Force { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to remove anonymous volumes.
|
||||
/// </summary>
|
||||
public bool RemoveVolumes { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public DockerRemoveTask(IDockerClient dockerClient, ILogger logger)
|
||||
{
|
||||
_dockerClient = dockerClient;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
var payload = JsonSerializer.Deserialize<RemovePayload>(task.Payload)
|
||||
?? throw new InvalidPayloadException("docker.remove");
|
||||
|
||||
var containerId = await ResolveContainerIdAsync(payload, ct);
|
||||
if (containerId is null)
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["alreadyRemoved"] = true
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
|
||||
_logger.LogInformation("Removing container {ContainerId}", TruncateId(containerId));
|
||||
|
||||
try
|
||||
{
|
||||
await _dockerClient.Containers.RemoveContainerAsync(
|
||||
containerId,
|
||||
new ContainerRemoveParameters
|
||||
{
|
||||
Force = payload.Force,
|
||||
RemoveVolumes = payload.RemoveVolumes
|
||||
},
|
||||
ct);
|
||||
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
_logger.LogInformation("Container {ContainerId} removed", TruncateId(containerId));
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["containerId"] = containerId,
|
||||
["alreadyRemoved"] = false
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (DockerApiException ex)
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
_logger.LogError(ex, "Failed to remove container {ContainerId}", TruncateId(containerId));
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to remove container: {ex.Message}",
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<string?> ResolveContainerIdAsync(RemovePayload payload, CancellationToken ct)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(payload.ContainerId))
|
||||
{
|
||||
return payload.ContainerId;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(payload.ContainerName))
|
||||
{
|
||||
var containers = await _dockerClient.Containers.ListContainersAsync(
|
||||
new ContainersListParameters
|
||||
{
|
||||
All = true,
|
||||
Filters = new Dictionary<string, IDictionary<string, bool>>
|
||||
{
|
||||
["name"] = new Dictionary<string, bool> { [$"^/{payload.ContainerName}$"] = true }
|
||||
}
|
||||
},
|
||||
ct);
|
||||
|
||||
return containers.FirstOrDefault()?.ID;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string TruncateId(string id)
|
||||
{
|
||||
var trimmed = id.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase)
|
||||
? id[7..]
|
||||
: id;
|
||||
return trimmed.Length > 12 ? trimmed[..12] : trimmed;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,354 @@
|
||||
using System.Text.Json;
|
||||
using System.Text.RegularExpressions;
|
||||
using Docker.DotNet;
|
||||
using Docker.DotNet.Models;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.Docker.Exceptions;
|
||||
|
||||
namespace StellaOps.Agent.Docker.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for running Docker containers.
|
||||
/// </summary>
|
||||
public sealed partial class DockerRunTask : IDockerTask
|
||||
{
|
||||
private readonly IDockerClient _dockerClient;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for docker.run task.
|
||||
/// </summary>
|
||||
public sealed record RunPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Image to run.
|
||||
/// </summary>
|
||||
public required string Image { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Container name.
|
||||
/// </summary>
|
||||
public required string Name { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Environment variables.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, string>? Environment { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Port mappings (e.g., ["8080:80", "443:443"]).
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? Ports { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Volume mounts (e.g., ["/host/path:/container/path"]).
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? Volumes { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Container labels.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, string>? Labels { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Network mode.
|
||||
/// </summary>
|
||||
public string? Network { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Command to run.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? Command { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Health check configuration.
|
||||
/// </summary>
|
||||
public ContainerHealthConfig? HealthCheck { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to auto-remove the container when it exits.
|
||||
/// </summary>
|
||||
public bool AutoRemove { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Restart policy.
|
||||
/// </summary>
|
||||
public ContainerRestartPolicy? RestartPolicy { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Health check configuration.
|
||||
/// </summary>
|
||||
public sealed record ContainerHealthConfig
|
||||
{
|
||||
/// <summary>
|
||||
/// Test command (e.g., ["CMD", "curl", "-f", "http://localhost/"]).
|
||||
/// </summary>
|
||||
public required IReadOnlyList<string> Test { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Interval between health checks.
|
||||
/// </summary>
|
||||
public TimeSpan Interval { get; init; } = TimeSpan.FromSeconds(30);
|
||||
|
||||
/// <summary>
|
||||
/// Timeout for each health check.
|
||||
/// </summary>
|
||||
public TimeSpan Timeout { get; init; } = TimeSpan.FromSeconds(10);
|
||||
|
||||
/// <summary>
|
||||
/// Number of retries before marking unhealthy.
|
||||
/// </summary>
|
||||
public int Retries { get; init; } = 3;
|
||||
|
||||
/// <summary>
|
||||
/// Start period to wait before counting failures.
|
||||
/// </summary>
|
||||
public TimeSpan StartPeriod { get; init; } = TimeSpan.Zero;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Restart policy configuration.
|
||||
/// </summary>
|
||||
public sealed record ContainerRestartPolicy
|
||||
{
|
||||
/// <summary>
|
||||
/// Policy name (no, always, unless-stopped, on-failure).
|
||||
/// </summary>
|
||||
public string Name { get; init; } = "no";
|
||||
|
||||
/// <summary>
|
||||
/// Maximum retry count for on-failure policy.
|
||||
/// </summary>
|
||||
public int MaximumRetryCount { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public DockerRunTask(IDockerClient dockerClient, ILogger logger)
|
||||
{
|
||||
_dockerClient = dockerClient;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
var payload = JsonSerializer.Deserialize<RunPayload>(task.Payload)
|
||||
?? throw new InvalidPayloadException("docker.run");
|
||||
|
||||
_logger.LogInformation(
|
||||
"Creating container {Name} from image {Image}",
|
||||
payload.Name,
|
||||
payload.Image);
|
||||
|
||||
try
|
||||
{
|
||||
// Check if container already exists
|
||||
var existingContainers = await _dockerClient.Containers.ListContainersAsync(
|
||||
new ContainersListParameters
|
||||
{
|
||||
All = true,
|
||||
Filters = new Dictionary<string, IDictionary<string, bool>>
|
||||
{
|
||||
["name"] = new Dictionary<string, bool> { [$"^/{payload.Name}$"] = true }
|
||||
}
|
||||
},
|
||||
ct);
|
||||
|
||||
if (existingContainers.Count > 0)
|
||||
{
|
||||
var existing = existingContainers.First();
|
||||
_logger.LogInformation(
|
||||
"Container {Name} already exists (ID: {Id}), removing",
|
||||
payload.Name,
|
||||
TruncateId(existing.ID));
|
||||
|
||||
if (existing.State == "running")
|
||||
{
|
||||
await _dockerClient.Containers.StopContainerAsync(existing.ID, new ContainerStopParameters(), ct);
|
||||
}
|
||||
await _dockerClient.Containers.RemoveContainerAsync(existing.ID, new ContainerRemoveParameters(), ct);
|
||||
}
|
||||
|
||||
// Merge labels with Stella metadata
|
||||
var labels = new Dictionary<string, string>(payload.Labels ?? new Dictionary<string, string>());
|
||||
labels["stella.managed"] = "true";
|
||||
labels["stella.task.id"] = task.Id.ToString();
|
||||
|
||||
// Build create parameters
|
||||
var createParams = new CreateContainerParameters
|
||||
{
|
||||
Image = payload.Image,
|
||||
Name = payload.Name,
|
||||
Env = BuildEnvironment(payload.Environment, task.Variables),
|
||||
Labels = labels,
|
||||
Cmd = payload.Command?.ToList(),
|
||||
HostConfig = new HostConfig
|
||||
{
|
||||
PortBindings = ParsePortBindings(payload.Ports),
|
||||
Binds = payload.Volumes?.ToList(),
|
||||
NetworkMode = payload.Network,
|
||||
AutoRemove = payload.AutoRemove,
|
||||
RestartPolicy = payload.RestartPolicy is not null
|
||||
? new global::Docker.DotNet.Models.RestartPolicy
|
||||
{
|
||||
Name = ParseRestartPolicyKind(payload.RestartPolicy.Name),
|
||||
MaximumRetryCount = payload.RestartPolicy.MaximumRetryCount
|
||||
}
|
||||
: null
|
||||
},
|
||||
Healthcheck = payload.HealthCheck is not null
|
||||
? new HealthConfig
|
||||
{
|
||||
Test = payload.HealthCheck.Test.ToList(),
|
||||
Interval = payload.HealthCheck.Interval,
|
||||
Timeout = payload.HealthCheck.Timeout,
|
||||
Retries = payload.HealthCheck.Retries,
|
||||
StartPeriod = (long)(payload.HealthCheck.StartPeriod.TotalSeconds * 1_000_000_000)
|
||||
}
|
||||
: null
|
||||
};
|
||||
|
||||
// Create container
|
||||
var createResponse = await _dockerClient.Containers.CreateContainerAsync(createParams, ct);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Created container {Name} (ID: {Id})",
|
||||
payload.Name,
|
||||
TruncateId(createResponse.ID));
|
||||
|
||||
// Start container
|
||||
var started = await _dockerClient.Containers.StartContainerAsync(
|
||||
createResponse.ID,
|
||||
new ContainerStartParameters(),
|
||||
ct);
|
||||
|
||||
if (!started)
|
||||
{
|
||||
throw new ContainerStartException(payload.Name, "Container failed to start");
|
||||
}
|
||||
|
||||
// Get container info
|
||||
var containerInfo = await _dockerClient.Containers.InspectContainerAsync(createResponse.ID, ct);
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
|
||||
_logger.LogInformation(
|
||||
"Started container {Name} (State: {State})",
|
||||
payload.Name,
|
||||
containerInfo.State.Status);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["containerId"] = createResponse.ID,
|
||||
["containerName"] = payload.Name,
|
||||
["state"] = containerInfo.State.Status,
|
||||
["ipAddress"] = containerInfo.NetworkSettings.IPAddress ?? string.Empty
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (DockerApiException ex)
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
_logger.LogError(ex, "Failed to create/start container {Name}", payload.Name);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to create/start container: {ex.Message}",
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private static List<string> BuildEnvironment(
|
||||
IReadOnlyDictionary<string, string>? env,
|
||||
IReadOnlyDictionary<string, string> variables)
|
||||
{
|
||||
var result = new List<string>();
|
||||
|
||||
if (env is not null)
|
||||
{
|
||||
foreach (var (key, value) in env)
|
||||
{
|
||||
// Substitute variables in values
|
||||
var resolvedValue = SubstituteVariables(value, variables);
|
||||
result.Add($"{key}={resolvedValue}");
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static string SubstituteVariables(string value, IReadOnlyDictionary<string, string> variables)
|
||||
{
|
||||
return VariablePattern().Replace(value, match =>
|
||||
{
|
||||
var varName = match.Groups[1].Value;
|
||||
return variables.TryGetValue(varName, out var varValue) ? varValue : match.Value;
|
||||
});
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"\$\{([^}]+)\}")]
|
||||
private static partial Regex VariablePattern();
|
||||
|
||||
private static IDictionary<string, IList<PortBinding>> ParsePortBindings(IReadOnlyList<string>? ports)
|
||||
{
|
||||
var bindings = new Dictionary<string, IList<PortBinding>>();
|
||||
|
||||
if (ports is null)
|
||||
return bindings;
|
||||
|
||||
foreach (var port in ports)
|
||||
{
|
||||
// Format: hostPort:containerPort or hostPort:containerPort/protocol
|
||||
var parts = port.Split(':');
|
||||
if (parts.Length != 2)
|
||||
continue;
|
||||
|
||||
var hostPort = parts[0];
|
||||
var containerPortWithProtocol = parts[1];
|
||||
var containerPort = containerPortWithProtocol.Contains('/')
|
||||
? containerPortWithProtocol
|
||||
: $"{containerPortWithProtocol}/tcp";
|
||||
|
||||
bindings[containerPort] = new List<PortBinding>
|
||||
{
|
||||
new() { HostPort = hostPort }
|
||||
};
|
||||
}
|
||||
|
||||
return bindings;
|
||||
}
|
||||
|
||||
private static RestartPolicyKind ParseRestartPolicyKind(string name)
|
||||
{
|
||||
return name.ToLowerInvariant() switch
|
||||
{
|
||||
"no" => RestartPolicyKind.No,
|
||||
"always" => RestartPolicyKind.Always,
|
||||
"unless-stopped" => RestartPolicyKind.UnlessStopped,
|
||||
"on-failure" => RestartPolicyKind.OnFailure,
|
||||
_ => RestartPolicyKind.No
|
||||
};
|
||||
}
|
||||
|
||||
private static string TruncateId(string id)
|
||||
{
|
||||
var trimmed = id.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase)
|
||||
? id[7..]
|
||||
: id;
|
||||
return trimmed.Length > 12 ? trimmed[..12] : trimmed;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,154 @@
|
||||
using System.Text.Json;
|
||||
using Docker.DotNet;
|
||||
using Docker.DotNet.Models;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.Docker.Exceptions;
|
||||
|
||||
namespace StellaOps.Agent.Docker.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for stopping Docker containers.
|
||||
/// </summary>
|
||||
public sealed class DockerStopTask : IDockerTask
|
||||
{
|
||||
private readonly IDockerClient _dockerClient;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for docker.stop task.
|
||||
/// </summary>
|
||||
public sealed record StopPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Container ID.
|
||||
/// </summary>
|
||||
public string? ContainerId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Container name.
|
||||
/// </summary>
|
||||
public string? ContainerName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Timeout before killing the container.
|
||||
/// </summary>
|
||||
public TimeSpan Timeout { get; init; } = TimeSpan.FromSeconds(30);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public DockerStopTask(IDockerClient dockerClient, ILogger logger)
|
||||
{
|
||||
_dockerClient = dockerClient;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
var payload = JsonSerializer.Deserialize<StopPayload>(task.Payload)
|
||||
?? throw new InvalidPayloadException("docker.stop");
|
||||
|
||||
var containerId = await ResolveContainerIdAsync(payload, ct);
|
||||
if (containerId is null)
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = "Container not found",
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
|
||||
_logger.LogInformation("Stopping container {ContainerId}", TruncateId(containerId));
|
||||
|
||||
try
|
||||
{
|
||||
var stopped = await _dockerClient.Containers.StopContainerAsync(
|
||||
containerId,
|
||||
new ContainerStopParameters
|
||||
{
|
||||
WaitBeforeKillSeconds = (uint)payload.Timeout.TotalSeconds
|
||||
},
|
||||
ct);
|
||||
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
|
||||
if (stopped)
|
||||
{
|
||||
_logger.LogInformation("Container {ContainerId} stopped", TruncateId(containerId));
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogWarning("Container {ContainerId} was already stopped", TruncateId(containerId));
|
||||
}
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["containerId"] = containerId,
|
||||
["wasRunning"] = stopped
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (DockerApiException ex)
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
_logger.LogError(ex, "Failed to stop container {ContainerId}", TruncateId(containerId));
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to stop container: {ex.Message}",
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<string?> ResolveContainerIdAsync(StopPayload payload, CancellationToken ct)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(payload.ContainerId))
|
||||
{
|
||||
return payload.ContainerId;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(payload.ContainerName))
|
||||
{
|
||||
var containers = await _dockerClient.Containers.ListContainersAsync(
|
||||
new ContainersListParameters
|
||||
{
|
||||
All = true,
|
||||
Filters = new Dictionary<string, IDictionary<string, bool>>
|
||||
{
|
||||
["name"] = new Dictionary<string, bool> { [$"^/{payload.ContainerName}$"] = true }
|
||||
}
|
||||
},
|
||||
ct);
|
||||
|
||||
return containers.FirstOrDefault()?.ID;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string TruncateId(string id)
|
||||
{
|
||||
var trimmed = id.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase)
|
||||
? id[7..]
|
||||
: id;
|
||||
return trimmed.Length > 12 ? trimmed[..12] : trimmed;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.Docker.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Interface for Docker task handlers.
|
||||
/// </summary>
|
||||
public interface IDockerTask
|
||||
{
|
||||
/// <summary>
|
||||
/// Executes the task.
|
||||
/// </summary>
|
||||
/// <param name="task">Task information.</param>
|
||||
/// <param name="timeProvider">Time provider for deterministic timestamps.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>Task result.</returns>
|
||||
Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default);
|
||||
}
|
||||
@@ -0,0 +1,216 @@
|
||||
using Amazon.CloudWatchLogs;
|
||||
using Amazon.CloudWatchLogs.Model;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Task = System.Threading.Tasks.Task;
|
||||
|
||||
namespace StellaOps.Agent.Ecs;
|
||||
|
||||
/// <summary>
|
||||
/// Streams logs from CloudWatch Logs for ECS tasks.
|
||||
/// </summary>
|
||||
public sealed class CloudWatchLogStreamer
|
||||
{
|
||||
private readonly IAmazonCloudWatchLogs _logsClient;
|
||||
private readonly ILogger<CloudWatchLogStreamer> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Event raised when a log message is received.
|
||||
/// </summary>
|
||||
public event EventHandler<LogMessageEventArgs>? LogReceived;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new CloudWatch log streamer.
|
||||
/// </summary>
|
||||
public CloudWatchLogStreamer(
|
||||
IAmazonCloudWatchLogs logsClient,
|
||||
ILogger<CloudWatchLogStreamer> logger)
|
||||
{
|
||||
_logsClient = logsClient;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Streams logs from a CloudWatch log group/stream.
|
||||
/// </summary>
|
||||
/// <param name="logGroupName">The log group name.</param>
|
||||
/// <param name="logStreamName">The log stream name.</param>
|
||||
/// <param name="startTime">The start time for log retrieval.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
public async Task StreamLogsAsync(
|
||||
string logGroupName,
|
||||
string logStreamName,
|
||||
DateTimeOffset? startTime = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
string? nextToken = null;
|
||||
var startFromHead = startTime is null;
|
||||
var startTimeUtc = startTime?.UtcDateTime;
|
||||
|
||||
_logger.LogDebug(
|
||||
"Starting log stream from {LogGroup}/{LogStream}",
|
||||
logGroupName,
|
||||
logStreamName);
|
||||
|
||||
try
|
||||
{
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
var request = new GetLogEventsRequest
|
||||
{
|
||||
LogGroupName = logGroupName,
|
||||
LogStreamName = logStreamName,
|
||||
StartFromHead = startFromHead,
|
||||
NextToken = nextToken
|
||||
};
|
||||
|
||||
if (startTimeUtc.HasValue && nextToken is null)
|
||||
{
|
||||
request.StartTime = startTimeUtc.Value;
|
||||
}
|
||||
|
||||
var response = await _logsClient.GetLogEventsAsync(request, ct);
|
||||
|
||||
foreach (var logEvent in response.Events)
|
||||
{
|
||||
var level = DetectLogLevel(logEvent.Message);
|
||||
OnLogReceived(new LogMessageEventArgs(
|
||||
logGroupName,
|
||||
logStreamName,
|
||||
logEvent.Timestamp ?? DateTime.UtcNow,
|
||||
level,
|
||||
logEvent.Message));
|
||||
}
|
||||
|
||||
// If token hasn't changed, no new logs - wait before polling
|
||||
if (response.NextForwardToken == nextToken)
|
||||
{
|
||||
await Task.Delay(TimeSpan.FromSeconds(2), ct);
|
||||
}
|
||||
|
||||
nextToken = response.NextForwardToken;
|
||||
startFromHead = false;
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
_logger.LogDebug("Log streaming cancelled");
|
||||
}
|
||||
catch (ResourceNotFoundException)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Log stream {LogGroup}/{LogStream} not found",
|
||||
logGroupName,
|
||||
logStreamName);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
ex,
|
||||
"Error streaming logs from {LogGroup}/{LogStream}",
|
||||
logGroupName,
|
||||
logStreamName);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the log stream name for an ECS task.
|
||||
/// </summary>
|
||||
/// <param name="logStreamPrefix">The log stream prefix configured in the task definition.</param>
|
||||
/// <param name="containerName">The container name.</param>
|
||||
/// <param name="taskId">The task ID (last part of task ARN).</param>
|
||||
/// <returns>The full log stream name.</returns>
|
||||
public static string GetTaskLogStreamName(
|
||||
string logStreamPrefix,
|
||||
string containerName,
|
||||
string taskId)
|
||||
{
|
||||
return $"{logStreamPrefix}/{containerName}/{taskId}";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts the task ID from a task ARN.
|
||||
/// </summary>
|
||||
/// <param name="taskArn">The task ARN.</param>
|
||||
/// <returns>The task ID.</returns>
|
||||
public static string ExtractTaskId(string taskArn)
|
||||
{
|
||||
var parts = taskArn.Split('/');
|
||||
return parts.Length > 0 ? parts[^1] : taskArn;
|
||||
}
|
||||
|
||||
private void OnLogReceived(LogMessageEventArgs e)
|
||||
{
|
||||
LogReceived?.Invoke(this, e);
|
||||
}
|
||||
|
||||
private static LogLevel DetectLogLevel(string message)
|
||||
{
|
||||
var upperMessage = message.ToUpperInvariant();
|
||||
|
||||
if (upperMessage.Contains("ERROR") || upperMessage.Contains("FATAL") ||
|
||||
upperMessage.Contains("EXCEPTION") || upperMessage.Contains("FAIL"))
|
||||
{
|
||||
return LogLevel.Error;
|
||||
}
|
||||
|
||||
if (upperMessage.Contains("WARN"))
|
||||
{
|
||||
return LogLevel.Warning;
|
||||
}
|
||||
|
||||
if (upperMessage.Contains("DEBUG") || upperMessage.Contains("TRACE"))
|
||||
{
|
||||
return LogLevel.Debug;
|
||||
}
|
||||
|
||||
return LogLevel.Information;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Event args for log messages.
|
||||
/// </summary>
|
||||
public sealed class LogMessageEventArgs : EventArgs
|
||||
{
|
||||
/// <summary>
|
||||
/// The log group name.
|
||||
/// </summary>
|
||||
public string LogGroup { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The log stream name.
|
||||
/// </summary>
|
||||
public string LogStream { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The timestamp of the log event.
|
||||
/// </summary>
|
||||
public DateTime Timestamp { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The detected log level.
|
||||
/// </summary>
|
||||
public LogLevel Level { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The log message.
|
||||
/// </summary>
|
||||
public string Message { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new log message event args.
|
||||
/// </summary>
|
||||
public LogMessageEventArgs(
|
||||
string logGroup,
|
||||
string logStream,
|
||||
DateTime timestamp,
|
||||
LogLevel level,
|
||||
string message)
|
||||
{
|
||||
LogGroup = logGroup;
|
||||
LogStream = logStream;
|
||||
Timestamp = timestamp;
|
||||
Level = level;
|
||||
Message = message;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,222 @@
|
||||
using System.Text.Json;
|
||||
using Amazon.CloudWatchLogs;
|
||||
using Amazon.ECS;
|
||||
using Amazon.ECS.Model;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Capability;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.Ecs.Tasks;
|
||||
|
||||
namespace StellaOps.Agent.Ecs;
|
||||
|
||||
/// <summary>
|
||||
/// Agent capability for managing AWS ECS services and tasks.
|
||||
/// </summary>
|
||||
public sealed class EcsCapability : IAgentCapability, IAsyncDisposable
|
||||
{
|
||||
private readonly IAmazonECS _ecsClient;
|
||||
private readonly IAmazonCloudWatchLogs _logsClient;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILoggerFactory _loggerFactory;
|
||||
private readonly ILogger<EcsCapability> _logger;
|
||||
private readonly Dictionary<string, Func<AgentTaskInfo, CancellationToken, Task<AgentTaskResult>>> _taskHandlers;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the capability name.
|
||||
/// </summary>
|
||||
public string Name => "ecs";
|
||||
|
||||
/// <summary>
|
||||
/// Gets the capability version.
|
||||
/// </summary>
|
||||
public string Version => "1.0.0";
|
||||
|
||||
/// <summary>
|
||||
/// Gets the supported task types.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string> SupportedTaskTypes { get; } = new[]
|
||||
{
|
||||
"ecs.deploy",
|
||||
"ecs.run",
|
||||
"ecs.stop",
|
||||
"ecs.scale",
|
||||
"ecs.register",
|
||||
"ecs.health",
|
||||
"ecs.describe"
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new ECS capability.
|
||||
/// </summary>
|
||||
/// <param name="ecsClient">The ECS client.</param>
|
||||
/// <param name="logsClient">The CloudWatch Logs client.</param>
|
||||
/// <param name="timeProvider">Time provider for timestamps.</param>
|
||||
/// <param name="loggerFactory">Logger factory.</param>
|
||||
public EcsCapability(
|
||||
IAmazonECS ecsClient,
|
||||
IAmazonCloudWatchLogs logsClient,
|
||||
TimeProvider timeProvider,
|
||||
ILoggerFactory loggerFactory)
|
||||
{
|
||||
_ecsClient = ecsClient ?? throw new ArgumentNullException(nameof(ecsClient));
|
||||
_logsClient = logsClient ?? throw new ArgumentNullException(nameof(logsClient));
|
||||
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
|
||||
_loggerFactory = loggerFactory ?? throw new ArgumentNullException(nameof(loggerFactory));
|
||||
_logger = loggerFactory.CreateLogger<EcsCapability>();
|
||||
|
||||
_taskHandlers = new Dictionary<string, Func<AgentTaskInfo, CancellationToken, Task<AgentTaskResult>>>
|
||||
{
|
||||
["ecs.deploy"] = ExecuteDeployAsync,
|
||||
["ecs.run"] = ExecuteRunTaskAsync,
|
||||
["ecs.stop"] = ExecuteStopTaskAsync,
|
||||
["ecs.scale"] = ExecuteScaleAsync,
|
||||
["ecs.register"] = ExecuteRegisterAsync,
|
||||
["ecs.health"] = ExecuteHealthCheckAsync,
|
||||
["ecs.describe"] = ExecuteDescribeAsync
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> InitializeAsync(CancellationToken ct = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Verify AWS credentials and ECS access by listing clusters
|
||||
var response = await _ecsClient.ListClustersAsync(new ListClustersRequest
|
||||
{
|
||||
MaxResults = 1
|
||||
}, ct);
|
||||
|
||||
_logger.LogInformation(
|
||||
"ECS capability initialized, AWS API accessible");
|
||||
|
||||
return true;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to initialize ECS capability - AWS API not accessible");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
|
||||
{
|
||||
if (!_taskHandlers.TryGetValue(task.TaskType, out var handler))
|
||||
{
|
||||
throw new InvalidEcsPayloadException(task.TaskType, "Unsupported task type");
|
||||
}
|
||||
|
||||
var startTime = _timeProvider.GetUtcNow();
|
||||
|
||||
try
|
||||
{
|
||||
var result = await handler(task, ct);
|
||||
return result with
|
||||
{
|
||||
Duration = _timeProvider.GetUtcNow() - startTime
|
||||
};
|
||||
}
|
||||
catch (InvalidEcsPayloadException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "ECS task {TaskType} failed", task.TaskType);
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = ex.Message,
|
||||
CompletedAt = _timeProvider.GetUtcNow(),
|
||||
Duration = _timeProvider.GetUtcNow() - startTime
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<CapabilityHealthStatus> CheckHealthAsync(CancellationToken ct = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
await _ecsClient.ListClustersAsync(new ListClustersRequest { MaxResults = 1 }, ct);
|
||||
return new CapabilityHealthStatus(true, "ECS capability ready");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return new CapabilityHealthStatus(false, $"ECS API not accessible: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<AgentTaskResult> ExecuteDeployAsync(AgentTaskInfo task, CancellationToken ct)
|
||||
{
|
||||
var taskHandler = new EcsDeployServiceTask(
|
||||
_ecsClient,
|
||||
_timeProvider,
|
||||
_loggerFactory.CreateLogger<EcsDeployServiceTask>());
|
||||
return await taskHandler.ExecuteAsync(task, ct);
|
||||
}
|
||||
|
||||
private async Task<AgentTaskResult> ExecuteRunTaskAsync(AgentTaskInfo task, CancellationToken ct)
|
||||
{
|
||||
var taskHandler = new EcsRunTaskTask(
|
||||
_ecsClient,
|
||||
_timeProvider,
|
||||
_loggerFactory.CreateLogger<EcsRunTaskTask>());
|
||||
return await taskHandler.ExecuteAsync(task, ct);
|
||||
}
|
||||
|
||||
private async Task<AgentTaskResult> ExecuteStopTaskAsync(AgentTaskInfo task, CancellationToken ct)
|
||||
{
|
||||
var taskHandler = new EcsStopTaskTask(
|
||||
_ecsClient,
|
||||
_timeProvider,
|
||||
_loggerFactory.CreateLogger<EcsStopTaskTask>());
|
||||
return await taskHandler.ExecuteAsync(task, ct);
|
||||
}
|
||||
|
||||
private async Task<AgentTaskResult> ExecuteScaleAsync(AgentTaskInfo task, CancellationToken ct)
|
||||
{
|
||||
var taskHandler = new EcsScaleServiceTask(
|
||||
_ecsClient,
|
||||
_timeProvider,
|
||||
_loggerFactory.CreateLogger<EcsScaleServiceTask>());
|
||||
return await taskHandler.ExecuteAsync(task, ct);
|
||||
}
|
||||
|
||||
private async Task<AgentTaskResult> ExecuteRegisterAsync(AgentTaskInfo task, CancellationToken ct)
|
||||
{
|
||||
var taskHandler = new EcsRegisterTaskDefinitionTask(
|
||||
_ecsClient,
|
||||
_timeProvider,
|
||||
_loggerFactory.CreateLogger<EcsRegisterTaskDefinitionTask>());
|
||||
return await taskHandler.ExecuteAsync(task, ct);
|
||||
}
|
||||
|
||||
private async Task<AgentTaskResult> ExecuteHealthCheckAsync(AgentTaskInfo task, CancellationToken ct)
|
||||
{
|
||||
var taskHandler = new EcsHealthCheckTask(
|
||||
_ecsClient,
|
||||
_timeProvider,
|
||||
_loggerFactory.CreateLogger<EcsHealthCheckTask>());
|
||||
return await taskHandler.ExecuteAsync(task, ct);
|
||||
}
|
||||
|
||||
private async Task<AgentTaskResult> ExecuteDescribeAsync(AgentTaskInfo task, CancellationToken ct)
|
||||
{
|
||||
var taskHandler = new EcsDescribeServiceTask(
|
||||
_ecsClient,
|
||||
_timeProvider,
|
||||
_loggerFactory.CreateLogger<EcsDescribeServiceTask>());
|
||||
return await taskHandler.ExecuteAsync(task, ct);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public ValueTask DisposeAsync()
|
||||
{
|
||||
_ecsClient.Dispose();
|
||||
_logsClient.Dispose();
|
||||
return ValueTask.CompletedTask;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
namespace StellaOps.Agent.Ecs;
|
||||
|
||||
/// <summary>
|
||||
/// Base exception for ECS agent operations.
|
||||
/// </summary>
|
||||
public class EcsAgentException : Exception
|
||||
{
|
||||
public EcsAgentException(string message) : base(message) { }
|
||||
public EcsAgentException(string message, Exception innerException) : base(message, innerException) { }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when an ECS task payload is invalid or missing required fields.
|
||||
/// </summary>
|
||||
public class InvalidEcsPayloadException : EcsAgentException
|
||||
{
|
||||
public string TaskType { get; }
|
||||
|
||||
public InvalidEcsPayloadException(string taskType, string? details = null)
|
||||
: base($"Invalid payload for ECS task type '{taskType}'{(details is not null ? $": {details}" : "")}")
|
||||
{
|
||||
TaskType = taskType;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when an ECS service or task operation fails.
|
||||
/// </summary>
|
||||
public class EcsOperationException : EcsAgentException
|
||||
{
|
||||
public string Operation { get; }
|
||||
public string? Cluster { get; }
|
||||
public string? Resource { get; }
|
||||
|
||||
public EcsOperationException(string operation, string? cluster, string? resource, string message)
|
||||
: base($"ECS {operation} failed{(cluster is not null ? $" in cluster '{cluster}'" : "")}{(resource is not null ? $" for '{resource}'" : "")}: {message}")
|
||||
{
|
||||
Operation = operation;
|
||||
Cluster = cluster;
|
||||
Resource = resource;
|
||||
}
|
||||
|
||||
public EcsOperationException(string operation, string? cluster, string? resource, string message, Exception innerException)
|
||||
: base($"ECS {operation} failed{(cluster is not null ? $" in cluster '{cluster}'" : "")}{(resource is not null ? $" for '{resource}'" : "")}: {message}", innerException)
|
||||
{
|
||||
Operation = operation;
|
||||
Cluster = cluster;
|
||||
Resource = resource;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when an ECS deployment times out waiting for stabilization.
|
||||
/// </summary>
|
||||
public class EcsDeploymentTimeoutException : EcsAgentException
|
||||
{
|
||||
public string Cluster { get; }
|
||||
public string ServiceName { get; }
|
||||
public TimeSpan Timeout { get; }
|
||||
|
||||
public EcsDeploymentTimeoutException(string cluster, string serviceName, TimeSpan timeout)
|
||||
: base($"ECS deployment timed out waiting for service '{serviceName}' in cluster '{cluster}' to stabilize after {timeout}")
|
||||
{
|
||||
Cluster = cluster;
|
||||
ServiceName = serviceName;
|
||||
Timeout = timeout;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when an ECS task fails to complete successfully.
|
||||
/// </summary>
|
||||
public class EcsTaskFailedException : EcsAgentException
|
||||
{
|
||||
public string Cluster { get; }
|
||||
public IReadOnlyList<string> TaskArns { get; }
|
||||
public IReadOnlyList<int> ExitCodes { get; }
|
||||
|
||||
public EcsTaskFailedException(string cluster, IReadOnlyList<string> taskArns, IReadOnlyList<int> exitCodes)
|
||||
: base($"ECS task(s) failed in cluster '{cluster}' with exit codes: [{string.Join(", ", exitCodes)}]")
|
||||
{
|
||||
Cluster = cluster;
|
||||
TaskArns = taskArns;
|
||||
ExitCodes = exitCodes;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.Ecs;
|
||||
|
||||
/// <summary>
|
||||
/// Interface for ECS task handlers.
|
||||
/// </summary>
|
||||
public interface IEcsTask
|
||||
{
|
||||
/// <summary>
|
||||
/// Executes the ECS task.
|
||||
/// </summary>
|
||||
/// <param name="task">The agent task to execute.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>The result of the task execution.</returns>
|
||||
Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default);
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<RootNamespace>StellaOps.Agent.Ecs</RootNamespace>
|
||||
<Description>Stella Agent ECS Capability - manages AWS ECS services and tasks</Description>
|
||||
<!-- AWS SDK v4 nullable annotations cause false positives with value type boxing to Dictionary<string, object> -->
|
||||
<NoWarn>$(NoWarn);CS8600;CS8601;CS8620</NoWarn>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="AWSSDK.ECS" />
|
||||
<PackageReference Include="AWSSDK.CloudWatchLogs" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\StellaOps.Agent.Core\StellaOps.Agent.Core.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,470 @@
|
||||
using System.Text.Json;
|
||||
using Amazon.ECS;
|
||||
using Amazon.ECS.Model;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using Task = System.Threading.Tasks.Task;
|
||||
|
||||
namespace StellaOps.Agent.Ecs.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for deploying ECS services.
|
||||
/// </summary>
|
||||
public sealed class EcsDeployServiceTask : IEcsTask
|
||||
{
|
||||
private readonly IAmazonECS _ecsClient;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<EcsDeployServiceTask> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for deploying an ECS service.
|
||||
/// </summary>
|
||||
public sealed record DeployServicePayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Name or ARN of the ECS cluster.
|
||||
/// </summary>
|
||||
public required string Cluster { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Name of the service to deploy.
|
||||
/// </summary>
|
||||
public required string ServiceName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Task definition family:revision or ARN.
|
||||
/// </summary>
|
||||
public required string TaskDefinition { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Desired number of tasks.
|
||||
/// </summary>
|
||||
public int DesiredCount { get; init; } = 1;
|
||||
|
||||
/// <summary>
|
||||
/// Launch type (FARGATE or EC2).
|
||||
/// </summary>
|
||||
public string? LaunchType { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Network configuration for awsvpc mode.
|
||||
/// </summary>
|
||||
public NetworkConfigurationPayload? NetworkConfiguration { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Load balancer configuration.
|
||||
/// </summary>
|
||||
public LoadBalancerPayload? LoadBalancer { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Deployment configuration.
|
||||
/// </summary>
|
||||
public DeploymentConfigPayload? DeploymentConfiguration { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to force a new deployment.
|
||||
/// </summary>
|
||||
public bool ForceNewDeployment { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Timeout waiting for deployment to stabilize.
|
||||
/// </summary>
|
||||
public TimeSpan DeploymentTimeout { get; init; } = TimeSpan.FromMinutes(10);
|
||||
|
||||
/// <summary>
|
||||
/// Tags to apply to the service.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, string>? Tags { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Network configuration payload.
|
||||
/// </summary>
|
||||
public sealed record NetworkConfigurationPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Subnet IDs.
|
||||
/// </summary>
|
||||
public required IReadOnlyList<string> Subnets { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Security group IDs.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? SecurityGroups { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to assign a public IP.
|
||||
/// </summary>
|
||||
public bool AssignPublicIp { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Load balancer configuration payload.
|
||||
/// </summary>
|
||||
public sealed record LoadBalancerPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Target group ARN.
|
||||
/// </summary>
|
||||
public required string TargetGroupArn { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Container name for the target.
|
||||
/// </summary>
|
||||
public required string ContainerName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Container port.
|
||||
/// </summary>
|
||||
public required int ContainerPort { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Deployment configuration payload.
|
||||
/// </summary>
|
||||
public sealed record DeploymentConfigPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Maximum percent during deployment.
|
||||
/// </summary>
|
||||
public int MaximumPercent { get; init; } = 200;
|
||||
|
||||
/// <summary>
|
||||
/// Minimum healthy percent.
|
||||
/// </summary>
|
||||
public int MinimumHealthyPercent { get; init; } = 100;
|
||||
|
||||
/// <summary>
|
||||
/// Enable deployment circuit breaker.
|
||||
/// </summary>
|
||||
public bool EnableCircuitBreaker { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Enable rollback on failure.
|
||||
/// </summary>
|
||||
public bool EnableRollback { get; init; } = true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new ECS deploy service task handler.
|
||||
/// </summary>
|
||||
public EcsDeployServiceTask(
|
||||
IAmazonECS ecsClient,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<EcsDeployServiceTask> logger)
|
||||
{
|
||||
_ecsClient = ecsClient;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
|
||||
{
|
||||
var payload = JsonSerializer.Deserialize<DeployServicePayload>(task.Payload)
|
||||
?? throw new InvalidEcsPayloadException("ecs.deploy", "Failed to deserialize payload");
|
||||
|
||||
_logger.LogInformation(
|
||||
"Deploying ECS service {Service} to cluster {Cluster} with task definition {TaskDef}",
|
||||
payload.ServiceName,
|
||||
payload.Cluster,
|
||||
payload.TaskDefinition);
|
||||
|
||||
try
|
||||
{
|
||||
// Check if service exists
|
||||
var existingService = await GetServiceAsync(payload.Cluster, payload.ServiceName, ct);
|
||||
|
||||
if (existingService is not null && existingService.Status != "INACTIVE")
|
||||
{
|
||||
return await UpdateServiceAsync(task.Id, payload, ct);
|
||||
}
|
||||
else
|
||||
{
|
||||
return await CreateServiceAsync(task.Id, payload, ct);
|
||||
}
|
||||
}
|
||||
catch (AmazonECSException ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to deploy ECS service {Service}", payload.ServiceName);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"ECS deployment failed: {ex.Message}",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<Service?> GetServiceAsync(string cluster, string serviceName, CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
var response = await _ecsClient.DescribeServicesAsync(new DescribeServicesRequest
|
||||
{
|
||||
Cluster = cluster,
|
||||
Services = new List<string> { serviceName }
|
||||
}, ct);
|
||||
|
||||
return response.Services.FirstOrDefault();
|
||||
}
|
||||
catch
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<AgentTaskResult> CreateServiceAsync(
|
||||
Guid taskId,
|
||||
DeployServicePayload payload,
|
||||
CancellationToken ct)
|
||||
{
|
||||
_logger.LogInformation("Creating new ECS service {Service}", payload.ServiceName);
|
||||
|
||||
var request = new CreateServiceRequest
|
||||
{
|
||||
Cluster = payload.Cluster,
|
||||
ServiceName = payload.ServiceName,
|
||||
TaskDefinition = payload.TaskDefinition,
|
||||
DesiredCount = payload.DesiredCount
|
||||
};
|
||||
|
||||
if (!string.IsNullOrEmpty(payload.LaunchType))
|
||||
{
|
||||
request.LaunchType = new LaunchType(payload.LaunchType);
|
||||
}
|
||||
|
||||
if (payload.NetworkConfiguration is not null)
|
||||
{
|
||||
request.NetworkConfiguration = new NetworkConfiguration
|
||||
{
|
||||
AwsvpcConfiguration = new AwsVpcConfiguration
|
||||
{
|
||||
Subnets = payload.NetworkConfiguration.Subnets.ToList(),
|
||||
SecurityGroups = payload.NetworkConfiguration.SecurityGroups?.ToList(),
|
||||
AssignPublicIp = payload.NetworkConfiguration.AssignPublicIp
|
||||
? AssignPublicIp.ENABLED
|
||||
: AssignPublicIp.DISABLED
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
if (payload.LoadBalancer is not null)
|
||||
{
|
||||
request.LoadBalancers = new List<LoadBalancer>
|
||||
{
|
||||
new()
|
||||
{
|
||||
TargetGroupArn = payload.LoadBalancer.TargetGroupArn,
|
||||
ContainerName = payload.LoadBalancer.ContainerName,
|
||||
ContainerPort = payload.LoadBalancer.ContainerPort
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
if (payload.DeploymentConfiguration is not null)
|
||||
{
|
||||
request.DeploymentConfiguration = new DeploymentConfiguration
|
||||
{
|
||||
MaximumPercent = payload.DeploymentConfiguration.MaximumPercent,
|
||||
MinimumHealthyPercent = payload.DeploymentConfiguration.MinimumHealthyPercent,
|
||||
DeploymentCircuitBreaker = new DeploymentCircuitBreaker
|
||||
{
|
||||
Enable = payload.DeploymentConfiguration.EnableCircuitBreaker,
|
||||
Rollback = payload.DeploymentConfiguration.EnableRollback
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
if (payload.Tags is not null)
|
||||
{
|
||||
request.Tags = payload.Tags.Select(kv => new Tag { Key = kv.Key, Value = kv.Value }).ToList();
|
||||
}
|
||||
|
||||
var createResponse = await _ecsClient.CreateServiceAsync(request, ct);
|
||||
|
||||
if (createResponse.Service is not { } service)
|
||||
{
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = taskId,
|
||||
Success = false,
|
||||
Error = "Service creation returned no service object",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
|
||||
_logger.LogInformation(
|
||||
"Created ECS service {Service} (ARN: {Arn})",
|
||||
payload.ServiceName,
|
||||
service.ServiceArn);
|
||||
|
||||
// Wait for deployment to stabilize
|
||||
var stable = await WaitForServiceStableAsync(
|
||||
payload.Cluster,
|
||||
payload.ServiceName,
|
||||
payload.DeploymentTimeout,
|
||||
ct);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = taskId,
|
||||
Success = stable,
|
||||
Error = stable ? null : "Service did not stabilize within timeout",
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["serviceArn"] = service.ServiceArn ?? "",
|
||||
["serviceName"] = service.ServiceName ?? "",
|
||||
["taskDefinition"] = service.TaskDefinition ?? "",
|
||||
["runningCount"] = service.RunningCount,
|
||||
["desiredCount"] = service.DesiredCount,
|
||||
["deploymentStatus"] = stable ? "COMPLETED" : "TIMED_OUT",
|
||||
["operation"] = "create"
|
||||
},
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
|
||||
private async Task<AgentTaskResult> UpdateServiceAsync(
|
||||
Guid taskId,
|
||||
DeployServicePayload payload,
|
||||
CancellationToken ct)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Updating existing ECS service {Service} to task definition {TaskDef}",
|
||||
payload.ServiceName,
|
||||
payload.TaskDefinition);
|
||||
|
||||
var request = new UpdateServiceRequest
|
||||
{
|
||||
Cluster = payload.Cluster,
|
||||
Service = payload.ServiceName,
|
||||
TaskDefinition = payload.TaskDefinition,
|
||||
DesiredCount = payload.DesiredCount,
|
||||
ForceNewDeployment = payload.ForceNewDeployment
|
||||
};
|
||||
|
||||
if (payload.DeploymentConfiguration is not null)
|
||||
{
|
||||
request.DeploymentConfiguration = new DeploymentConfiguration
|
||||
{
|
||||
MaximumPercent = payload.DeploymentConfiguration.MaximumPercent,
|
||||
MinimumHealthyPercent = payload.DeploymentConfiguration.MinimumHealthyPercent,
|
||||
DeploymentCircuitBreaker = new DeploymentCircuitBreaker
|
||||
{
|
||||
Enable = payload.DeploymentConfiguration.EnableCircuitBreaker,
|
||||
Rollback = payload.DeploymentConfiguration.EnableRollback
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
var updateResponse = await _ecsClient.UpdateServiceAsync(request, ct);
|
||||
|
||||
if (updateResponse.Service is not { } service)
|
||||
{
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = taskId,
|
||||
Success = false,
|
||||
Error = "Service update returned no service object",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
|
||||
_logger.LogInformation(
|
||||
"Updated ECS service {Service}, deployment ID: {DeploymentId}",
|
||||
payload.ServiceName,
|
||||
service.Deployments.FirstOrDefault()?.Id ?? "unknown");
|
||||
|
||||
// Wait for deployment to stabilize
|
||||
var stable = await WaitForServiceStableAsync(
|
||||
payload.Cluster,
|
||||
payload.ServiceName,
|
||||
payload.DeploymentTimeout,
|
||||
ct);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = taskId,
|
||||
Success = stable,
|
||||
Error = stable ? null : "Service did not stabilize within timeout",
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["serviceArn"] = service.ServiceArn ?? "",
|
||||
["serviceName"] = service.ServiceName ?? "",
|
||||
["taskDefinition"] = service.TaskDefinition ?? "",
|
||||
["runningCount"] = service.RunningCount,
|
||||
["desiredCount"] = service.DesiredCount,
|
||||
["deploymentId"] = service.Deployments.FirstOrDefault()?.Id ?? "",
|
||||
["deploymentStatus"] = stable ? "COMPLETED" : "TIMED_OUT",
|
||||
["operation"] = "update"
|
||||
},
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
|
||||
private async Task<bool> WaitForServiceStableAsync(
|
||||
string cluster,
|
||||
string serviceName,
|
||||
TimeSpan timeout,
|
||||
CancellationToken ct)
|
||||
{
|
||||
_logger.LogInformation("Waiting for service {Service} to stabilize", serviceName);
|
||||
|
||||
using var timeoutCts = new CancellationTokenSource(timeout);
|
||||
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
|
||||
|
||||
try
|
||||
{
|
||||
while (!linkedCts.IsCancellationRequested)
|
||||
{
|
||||
var response = await _ecsClient.DescribeServicesAsync(new DescribeServicesRequest
|
||||
{
|
||||
Cluster = cluster,
|
||||
Services = new List<string> { serviceName }
|
||||
}, linkedCts.Token);
|
||||
|
||||
var service = response.Services.FirstOrDefault();
|
||||
if (service is null)
|
||||
{
|
||||
_logger.LogWarning("Service {Service} not found during stabilization check", serviceName);
|
||||
return false;
|
||||
}
|
||||
|
||||
var primaryDeployment = service.Deployments.FirstOrDefault(d => d.Status == "PRIMARY");
|
||||
if (primaryDeployment is null)
|
||||
{
|
||||
await Task.Delay(TimeSpan.FromSeconds(10), linkedCts.Token);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (primaryDeployment.RunningCount == primaryDeployment.DesiredCount &&
|
||||
service.Deployments.Count == 1)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Service {Service} stabilized with {Count} running tasks",
|
||||
serviceName,
|
||||
primaryDeployment.RunningCount);
|
||||
return true;
|
||||
}
|
||||
|
||||
_logger.LogDebug(
|
||||
"Service {Service} not stable: running={Running}, desired={Desired}, deployments={Deployments}",
|
||||
serviceName,
|
||||
primaryDeployment.RunningCount,
|
||||
primaryDeployment.DesiredCount,
|
||||
service.Deployments.Count);
|
||||
|
||||
await Task.Delay(TimeSpan.FromSeconds(10), linkedCts.Token);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested)
|
||||
{
|
||||
_logger.LogWarning("Service {Service} stabilization timed out after {Timeout}", serviceName, timeout);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,173 @@
|
||||
using System.Globalization;
|
||||
using System.Text.Json;
|
||||
using Amazon.ECS;
|
||||
using Amazon.ECS.Model;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using Task = System.Threading.Tasks.Task;
|
||||
|
||||
namespace StellaOps.Agent.Ecs.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for describing ECS services.
|
||||
/// </summary>
|
||||
public sealed class EcsDescribeServiceTask : IEcsTask
|
||||
{
|
||||
private readonly IAmazonECS _ecsClient;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<EcsDescribeServiceTask> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for describing an ECS service.
|
||||
/// </summary>
|
||||
public sealed record DescribeServicePayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Name or ARN of the ECS cluster.
|
||||
/// </summary>
|
||||
public required string Cluster { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Name of the service to describe.
|
||||
/// </summary>
|
||||
public required string ServiceName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to include task information.
|
||||
/// </summary>
|
||||
public bool IncludeTasks { get; init; } = false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new ECS describe service task handler.
|
||||
/// </summary>
|
||||
public EcsDescribeServiceTask(
|
||||
IAmazonECS ecsClient,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<EcsDescribeServiceTask> logger)
|
||||
{
|
||||
_ecsClient = ecsClient;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
|
||||
{
|
||||
var payload = JsonSerializer.Deserialize<DescribeServicePayload>(task.Payload)
|
||||
?? throw new InvalidEcsPayloadException("ecs.describe", "Failed to deserialize payload");
|
||||
|
||||
_logger.LogInformation(
|
||||
"Describing ECS service {Service} in cluster {Cluster}",
|
||||
payload.ServiceName,
|
||||
payload.Cluster);
|
||||
|
||||
try
|
||||
{
|
||||
var response = await _ecsClient.DescribeServicesAsync(new DescribeServicesRequest
|
||||
{
|
||||
Cluster = payload.Cluster,
|
||||
Services = new List<string> { payload.ServiceName }
|
||||
}, ct);
|
||||
|
||||
if (response.Services.FirstOrDefault() is not { } service)
|
||||
{
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Service '{payload.ServiceName}' not found",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
|
||||
var outputs = new Dictionary<string, object>
|
||||
{
|
||||
["serviceArn"] = service.ServiceArn ?? "",
|
||||
["serviceName"] = service.ServiceName ?? "",
|
||||
["clusterArn"] = service.ClusterArn ?? "",
|
||||
["status"] = service.Status ?? "",
|
||||
["taskDefinition"] = service.TaskDefinition ?? "",
|
||||
["desiredCount"] = service.DesiredCount,
|
||||
["runningCount"] = service.RunningCount,
|
||||
["pendingCount"] = service.PendingCount,
|
||||
["launchType"] = service.LaunchType?.Value ?? "unknown",
|
||||
["deploymentCount"] = service.Deployments.Count,
|
||||
["createdAt"] = service.CreatedAt.GetValueOrDefault().ToUniversalTime().ToString("o", CultureInfo.InvariantCulture),
|
||||
["deployments"] = service.Deployments.Select(d => new Dictionary<string, object>
|
||||
{
|
||||
["id"] = d.Id ?? "",
|
||||
["status"] = d.Status ?? "",
|
||||
["taskDefinition"] = d.TaskDefinition ?? "",
|
||||
["desiredCount"] = d.DesiredCount,
|
||||
["runningCount"] = d.RunningCount,
|
||||
["pendingCount"] = d.PendingCount,
|
||||
["createdAt"] = d.CreatedAt.GetValueOrDefault().ToUniversalTime().ToString("o", CultureInfo.InvariantCulture)
|
||||
}).ToList()
|
||||
};
|
||||
|
||||
// Include tasks if requested
|
||||
if (payload.IncludeTasks)
|
||||
{
|
||||
var tasksResponse = await _ecsClient.ListTasksAsync(new ListTasksRequest
|
||||
{
|
||||
Cluster = payload.Cluster,
|
||||
ServiceName = payload.ServiceName
|
||||
}, ct);
|
||||
|
||||
if (tasksResponse.TaskArns.Count > 0)
|
||||
{
|
||||
var describeTasksResponse = await _ecsClient.DescribeTasksAsync(new DescribeTasksRequest
|
||||
{
|
||||
Cluster = payload.Cluster,
|
||||
Tasks = tasksResponse.TaskArns
|
||||
}, ct);
|
||||
|
||||
outputs["tasks"] = describeTasksResponse.Tasks.Select(t => new Dictionary<string, object>
|
||||
{
|
||||
["taskArn"] = t.TaskArn ?? "",
|
||||
["taskDefinitionArn"] = t.TaskDefinitionArn ?? "",
|
||||
["lastStatus"] = t.LastStatus ?? "",
|
||||
["desiredStatus"] = t.DesiredStatus ?? "",
|
||||
["healthStatus"] = t.HealthStatus?.Value ?? "unknown",
|
||||
["createdAt"] = t.CreatedAt.GetValueOrDefault().ToUniversalTime().ToString("o", CultureInfo.InvariantCulture),
|
||||
["containers"] = t.Containers.Select(c => new Dictionary<string, object>
|
||||
{
|
||||
["name"] = c.Name ?? "",
|
||||
["lastStatus"] = c.LastStatus ?? "",
|
||||
["exitCode"] = c.ExitCode ?? -1,
|
||||
["healthStatus"] = c.HealthStatus?.Value ?? "unknown"
|
||||
}).ToList()
|
||||
}).ToList();
|
||||
}
|
||||
}
|
||||
|
||||
_logger.LogInformation(
|
||||
"Described ECS service {Service}: {Running}/{Desired} running, {Deployments} deployments",
|
||||
payload.ServiceName,
|
||||
service.RunningCount,
|
||||
service.DesiredCount,
|
||||
service.Deployments.Count);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = outputs,
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
catch (AmazonECSException ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to describe ECS service {Service}", payload.ServiceName);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to describe service: {ex.Message}",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,233 @@
|
||||
using System.Text.Json;
|
||||
using Amazon.ECS;
|
||||
using Amazon.ECS.Model;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using Task = System.Threading.Tasks.Task;
|
||||
|
||||
namespace StellaOps.Agent.Ecs.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for checking ECS service health.
|
||||
/// </summary>
|
||||
public sealed class EcsHealthCheckTask : IEcsTask
|
||||
{
|
||||
private readonly IAmazonECS _ecsClient;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<EcsHealthCheckTask> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for checking ECS service health.
|
||||
/// </summary>
|
||||
public sealed record HealthCheckPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Name or ARN of the ECS cluster.
|
||||
/// </summary>
|
||||
public required string Cluster { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Name of the service to check.
|
||||
/// </summary>
|
||||
public required string ServiceName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Minimum healthy percent to consider the service healthy.
|
||||
/// </summary>
|
||||
public int MinHealthyPercent { get; init; } = 100;
|
||||
|
||||
/// <summary>
|
||||
/// Whether to wait for the service to become healthy.
|
||||
/// </summary>
|
||||
public bool WaitForHealthy { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Timeout for waiting for health.
|
||||
/// </summary>
|
||||
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(5);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new ECS health check task handler.
|
||||
/// </summary>
|
||||
public EcsHealthCheckTask(
|
||||
IAmazonECS ecsClient,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<EcsHealthCheckTask> logger)
|
||||
{
|
||||
_ecsClient = ecsClient;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
|
||||
{
|
||||
var payload = JsonSerializer.Deserialize<HealthCheckPayload>(task.Payload)
|
||||
?? throw new InvalidEcsPayloadException("ecs.health", "Failed to deserialize payload");
|
||||
|
||||
_logger.LogInformation(
|
||||
"Checking health of ECS service {Service} in cluster {Cluster}",
|
||||
payload.ServiceName,
|
||||
payload.Cluster);
|
||||
|
||||
try
|
||||
{
|
||||
if (!payload.WaitForHealthy)
|
||||
{
|
||||
return await CheckHealthOnceAsync(task.Id, payload, ct);
|
||||
}
|
||||
|
||||
return await WaitForHealthyAsync(task.Id, payload, ct);
|
||||
}
|
||||
catch (AmazonECSException ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to check health of ECS service {Service}", payload.ServiceName);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Health check failed: {ex.Message}",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<AgentTaskResult> CheckHealthOnceAsync(
|
||||
Guid taskId,
|
||||
HealthCheckPayload payload,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var response = await _ecsClient.DescribeServicesAsync(new DescribeServicesRequest
|
||||
{
|
||||
Cluster = payload.Cluster,
|
||||
Services = new List<string> { payload.ServiceName }
|
||||
}, ct);
|
||||
|
||||
var service = response.Services.FirstOrDefault();
|
||||
if (service is null)
|
||||
{
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = taskId,
|
||||
Success = false,
|
||||
Error = $"Service '{payload.ServiceName}' not found",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
|
||||
var healthyPercent = service.DesiredCount > 0
|
||||
? (service.RunningCount * 100) / service.DesiredCount
|
||||
: 0;
|
||||
|
||||
var isHealthy = healthyPercent >= payload.MinHealthyPercent && service.Deployments.Count == 1;
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = taskId,
|
||||
Success = isHealthy,
|
||||
Error = isHealthy ? null : $"Service unhealthy: {healthyPercent}% running (minimum: {payload.MinHealthyPercent}%)",
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["serviceName"] = service.ServiceName ?? "",
|
||||
["serviceArn"] = service.ServiceArn ?? "",
|
||||
["runningCount"] = service.RunningCount,
|
||||
["desiredCount"] = service.DesiredCount,
|
||||
["healthyPercent"] = healthyPercent,
|
||||
["status"] = service.Status ?? "",
|
||||
["deploymentCount"] = service.Deployments.Count,
|
||||
["isHealthy"] = isHealthy
|
||||
},
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
|
||||
private async System.Threading.Tasks.Task<AgentTaskResult> WaitForHealthyAsync(
|
||||
Guid taskId,
|
||||
HealthCheckPayload payload,
|
||||
CancellationToken ct)
|
||||
{
|
||||
using var timeoutCts = new CancellationTokenSource(payload.Timeout);
|
||||
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
|
||||
|
||||
try
|
||||
{
|
||||
while (!linkedCts.IsCancellationRequested)
|
||||
{
|
||||
var response = await _ecsClient.DescribeServicesAsync(new DescribeServicesRequest
|
||||
{
|
||||
Cluster = payload.Cluster,
|
||||
Services = new List<string> { payload.ServiceName }
|
||||
}, linkedCts.Token);
|
||||
|
||||
var service = response.Services.FirstOrDefault();
|
||||
if (service is null)
|
||||
{
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = taskId,
|
||||
Success = false,
|
||||
Error = $"Service '{payload.ServiceName}' not found",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
|
||||
var healthyPercent = service.DesiredCount > 0
|
||||
? (service.RunningCount * 100) / service.DesiredCount
|
||||
: 0;
|
||||
|
||||
if (healthyPercent >= payload.MinHealthyPercent && service.Deployments.Count == 1)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Service {Service} is healthy: {Running}/{Desired} tasks running ({Percent}%)",
|
||||
payload.ServiceName,
|
||||
service.RunningCount,
|
||||
service.DesiredCount,
|
||||
healthyPercent);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = taskId,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["serviceName"] = service.ServiceName ?? "",
|
||||
["serviceArn"] = service.ServiceArn ?? "",
|
||||
["runningCount"] = service.RunningCount,
|
||||
["desiredCount"] = service.DesiredCount,
|
||||
["healthyPercent"] = healthyPercent,
|
||||
["status"] = service.Status ?? "",
|
||||
["isHealthy"] = true
|
||||
},
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
|
||||
_logger.LogDebug(
|
||||
"Service {Service} health check: {Running}/{Desired} ({Percent}%), waiting...",
|
||||
payload.ServiceName,
|
||||
service.RunningCount,
|
||||
service.DesiredCount,
|
||||
healthyPercent);
|
||||
|
||||
await Task.Delay(TimeSpan.FromSeconds(10), linkedCts.Token);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Health check timed out after {Timeout} for service {Service}",
|
||||
payload.Timeout,
|
||||
payload.ServiceName);
|
||||
}
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = taskId,
|
||||
Success = false,
|
||||
Error = $"Health check timed out after {payload.Timeout}",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,282 @@
|
||||
using System.Text.Json;
|
||||
using Amazon.ECS;
|
||||
using Amazon.ECS.Model;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.Ecs.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for registering ECS task definitions.
|
||||
/// </summary>
|
||||
public sealed class EcsRegisterTaskDefinitionTask : IEcsTask
|
||||
{
|
||||
private readonly IAmazonECS _ecsClient;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<EcsRegisterTaskDefinitionTask> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for registering an ECS task definition.
|
||||
/// </summary>
|
||||
public sealed record RegisterTaskDefinitionPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Family name for the task definition.
|
||||
/// </summary>
|
||||
public required string Family { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Container definitions.
|
||||
/// </summary>
|
||||
public required IReadOnlyList<ContainerDefinitionPayload> ContainerDefinitions { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Task-level CPU.
|
||||
/// </summary>
|
||||
public string? Cpu { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Task-level memory.
|
||||
/// </summary>
|
||||
public string? Memory { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Network mode.
|
||||
/// </summary>
|
||||
public string? NetworkMode { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Task role ARN.
|
||||
/// </summary>
|
||||
public string? TaskRoleArn { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Execution role ARN.
|
||||
/// </summary>
|
||||
public string? ExecutionRoleArn { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Required capabilities (FARGATE, EC2).
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? RequiresCompatibilities { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Tags to apply.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, string>? Tags { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Container definition payload.
|
||||
/// </summary>
|
||||
public sealed record ContainerDefinitionPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Container name.
|
||||
/// </summary>
|
||||
public required string Name { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Container image.
|
||||
/// </summary>
|
||||
public required string Image { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Container CPU units.
|
||||
/// </summary>
|
||||
public int? Cpu { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Container memory in MB.
|
||||
/// </summary>
|
||||
public int? Memory { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Container memory reservation in MB.
|
||||
/// </summary>
|
||||
public int? MemoryReservation { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Port mappings.
|
||||
/// </summary>
|
||||
public IReadOnlyList<PortMappingPayload>? PortMappings { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Environment variables.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, string>? Environment { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether the container is essential.
|
||||
/// </summary>
|
||||
public bool Essential { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Entry point override.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? EntryPoint { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Command override.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? Command { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Log configuration.
|
||||
/// </summary>
|
||||
public LogConfigurationPayload? LogConfiguration { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Port mapping payload.
|
||||
/// </summary>
|
||||
public sealed record PortMappingPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Container port.
|
||||
/// </summary>
|
||||
public required int ContainerPort { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Host port.
|
||||
/// </summary>
|
||||
public int? HostPort { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Protocol (tcp or udp).
|
||||
/// </summary>
|
||||
public string Protocol { get; init; } = "tcp";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Log configuration payload.
|
||||
/// </summary>
|
||||
public sealed record LogConfigurationPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Log driver.
|
||||
/// </summary>
|
||||
public required string LogDriver { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Log driver options.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, string>? Options { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new ECS register task definition handler.
|
||||
/// </summary>
|
||||
public EcsRegisterTaskDefinitionTask(
|
||||
IAmazonECS ecsClient,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<EcsRegisterTaskDefinitionTask> logger)
|
||||
{
|
||||
_ecsClient = ecsClient;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
|
||||
{
|
||||
var payload = JsonSerializer.Deserialize<RegisterTaskDefinitionPayload>(task.Payload)
|
||||
?? throw new InvalidEcsPayloadException("ecs.register", "Failed to deserialize payload");
|
||||
|
||||
_logger.LogInformation(
|
||||
"Registering ECS task definition for family {Family}",
|
||||
payload.Family);
|
||||
|
||||
try
|
||||
{
|
||||
var request = new RegisterTaskDefinitionRequest
|
||||
{
|
||||
Family = payload.Family,
|
||||
Cpu = payload.Cpu,
|
||||
Memory = payload.Memory,
|
||||
TaskRoleArn = payload.TaskRoleArn,
|
||||
ExecutionRoleArn = payload.ExecutionRoleArn,
|
||||
ContainerDefinitions = payload.ContainerDefinitions.Select(c => new ContainerDefinition
|
||||
{
|
||||
Name = c.Name,
|
||||
Image = c.Image,
|
||||
Cpu = c.Cpu ?? 0,
|
||||
Memory = c.Memory,
|
||||
MemoryReservation = c.MemoryReservation,
|
||||
Essential = c.Essential,
|
||||
EntryPoint = c.EntryPoint?.ToList(),
|
||||
Command = c.Command?.ToList(),
|
||||
PortMappings = c.PortMappings?.Select(p => new PortMapping
|
||||
{
|
||||
ContainerPort = p.ContainerPort,
|
||||
HostPort = p.HostPort ?? p.ContainerPort,
|
||||
Protocol = p.Protocol
|
||||
}).ToList(),
|
||||
Environment = c.Environment?.Select(kv => new Amazon.ECS.Model.KeyValuePair
|
||||
{
|
||||
Name = kv.Key,
|
||||
Value = kv.Value
|
||||
}).ToList(),
|
||||
LogConfiguration = c.LogConfiguration is not null
|
||||
? new LogConfiguration
|
||||
{
|
||||
LogDriver = c.LogConfiguration.LogDriver,
|
||||
Options = c.LogConfiguration.Options?.ToDictionary(kv => kv.Key, kv => kv.Value)
|
||||
}
|
||||
: null
|
||||
}).ToList()
|
||||
};
|
||||
|
||||
if (!string.IsNullOrEmpty(payload.NetworkMode))
|
||||
{
|
||||
request.NetworkMode = new NetworkMode(payload.NetworkMode);
|
||||
}
|
||||
|
||||
if (payload.RequiresCompatibilities is not null)
|
||||
{
|
||||
request.RequiresCompatibilities = payload.RequiresCompatibilities.ToList();
|
||||
}
|
||||
|
||||
if (payload.Tags is not null)
|
||||
{
|
||||
request.Tags = payload.Tags.Select(kv => new Tag { Key = kv.Key, Value = kv.Value }).ToList();
|
||||
}
|
||||
|
||||
var response = await _ecsClient.RegisterTaskDefinitionAsync(request, ct);
|
||||
var taskDef = response.TaskDefinition;
|
||||
|
||||
_logger.LogInformation(
|
||||
"Registered ECS task definition {Family}:{Revision} (ARN: {Arn})",
|
||||
taskDef.Family,
|
||||
taskDef.Revision,
|
||||
taskDef.TaskDefinitionArn);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["taskDefinitionArn"] = taskDef.TaskDefinitionArn ?? "",
|
||||
["family"] = taskDef.Family ?? "",
|
||||
["revision"] = taskDef.Revision,
|
||||
["status"] = taskDef.Status?.Value ?? "",
|
||||
["containerCount"] = taskDef.ContainerDefinitions.Count
|
||||
},
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
catch (AmazonECSException ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to register ECS task definition for family {Family}", payload.Family);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to register task definition: {ex.Message}",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,331 @@
|
||||
using System.Text.Json;
|
||||
using Amazon.ECS;
|
||||
using Amazon.ECS.Model;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using Task = System.Threading.Tasks.Task;
|
||||
|
||||
namespace StellaOps.Agent.Ecs.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for running ECS tasks.
|
||||
/// </summary>
|
||||
public sealed class EcsRunTaskTask : IEcsTask
|
||||
{
|
||||
private readonly IAmazonECS _ecsClient;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<EcsRunTaskTask> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for running an ECS task.
|
||||
/// </summary>
|
||||
public sealed record RunTaskPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Name or ARN of the ECS cluster.
|
||||
/// </summary>
|
||||
public required string Cluster { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Task definition family:revision or ARN.
|
||||
/// </summary>
|
||||
public required string TaskDefinition { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of tasks to run.
|
||||
/// </summary>
|
||||
public int Count { get; init; } = 1;
|
||||
|
||||
/// <summary>
|
||||
/// Launch type (FARGATE or EC2).
|
||||
/// </summary>
|
||||
public string? LaunchType { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Network configuration for awsvpc mode.
|
||||
/// </summary>
|
||||
public NetworkConfigurationPayload? NetworkConfiguration { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Container overrides.
|
||||
/// </summary>
|
||||
public IReadOnlyList<ContainerOverridePayload>? Overrides { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Task group.
|
||||
/// </summary>
|
||||
public string? Group { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to wait for task completion.
|
||||
/// </summary>
|
||||
public bool WaitForCompletion { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Timeout for waiting for completion.
|
||||
/// </summary>
|
||||
public TimeSpan CompletionTimeout { get; init; } = TimeSpan.FromMinutes(30);
|
||||
|
||||
/// <summary>
|
||||
/// Tags to apply to the task.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, string>? Tags { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Network configuration payload.
|
||||
/// </summary>
|
||||
public sealed record NetworkConfigurationPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Subnet IDs.
|
||||
/// </summary>
|
||||
public required IReadOnlyList<string> Subnets { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Security group IDs.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? SecurityGroups { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to assign a public IP.
|
||||
/// </summary>
|
||||
public bool AssignPublicIp { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Container override payload.
|
||||
/// </summary>
|
||||
public sealed record ContainerOverridePayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Container name.
|
||||
/// </summary>
|
||||
public required string Name { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Command override.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? Command { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Environment variable overrides.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, string>? Environment { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// CPU override.
|
||||
/// </summary>
|
||||
public int? Cpu { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Memory override.
|
||||
/// </summary>
|
||||
public int? Memory { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new ECS run task handler.
|
||||
/// </summary>
|
||||
public EcsRunTaskTask(
|
||||
IAmazonECS ecsClient,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<EcsRunTaskTask> logger)
|
||||
{
|
||||
_ecsClient = ecsClient;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
|
||||
{
|
||||
var payload = JsonSerializer.Deserialize<RunTaskPayload>(task.Payload)
|
||||
?? throw new InvalidEcsPayloadException("ecs.run", "Failed to deserialize payload");
|
||||
|
||||
_logger.LogInformation(
|
||||
"Running ECS task from definition {TaskDef} on cluster {Cluster}",
|
||||
payload.TaskDefinition,
|
||||
payload.Cluster);
|
||||
|
||||
try
|
||||
{
|
||||
var request = new RunTaskRequest
|
||||
{
|
||||
Cluster = payload.Cluster,
|
||||
TaskDefinition = payload.TaskDefinition,
|
||||
Count = payload.Count,
|
||||
Group = payload.Group
|
||||
};
|
||||
|
||||
if (!string.IsNullOrEmpty(payload.LaunchType))
|
||||
{
|
||||
request.LaunchType = new LaunchType(payload.LaunchType);
|
||||
}
|
||||
|
||||
if (payload.NetworkConfiguration is not null)
|
||||
{
|
||||
request.NetworkConfiguration = new NetworkConfiguration
|
||||
{
|
||||
AwsvpcConfiguration = new AwsVpcConfiguration
|
||||
{
|
||||
Subnets = payload.NetworkConfiguration.Subnets.ToList(),
|
||||
SecurityGroups = payload.NetworkConfiguration.SecurityGroups?.ToList(),
|
||||
AssignPublicIp = payload.NetworkConfiguration.AssignPublicIp
|
||||
? AssignPublicIp.ENABLED
|
||||
: AssignPublicIp.DISABLED
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
if (payload.Overrides is not null && payload.Overrides.Count > 0)
|
||||
{
|
||||
request.Overrides = new TaskOverride
|
||||
{
|
||||
ContainerOverrides = payload.Overrides.Select(o => new ContainerOverride
|
||||
{
|
||||
Name = o.Name,
|
||||
Command = o.Command?.ToList(),
|
||||
Environment = o.Environment?.Select(kv => new Amazon.ECS.Model.KeyValuePair
|
||||
{
|
||||
Name = kv.Key,
|
||||
Value = kv.Value
|
||||
}).ToList(),
|
||||
Cpu = o.Cpu,
|
||||
Memory = o.Memory
|
||||
}).ToList()
|
||||
};
|
||||
}
|
||||
|
||||
if (payload.Tags is not null)
|
||||
{
|
||||
request.Tags = payload.Tags.Select(kv => new Tag { Key = kv.Key, Value = kv.Value }).ToList();
|
||||
}
|
||||
|
||||
var runResponse = await _ecsClient.RunTaskAsync(request, ct);
|
||||
|
||||
if (runResponse.Failures.Count > 0)
|
||||
{
|
||||
var failure = runResponse.Failures.First();
|
||||
_logger.LogError(
|
||||
"Failed to run ECS task: {Reason} (ARN: {Arn})",
|
||||
failure.Reason,
|
||||
failure.Arn);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to run task: {failure.Reason}",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
|
||||
var ecsTasks = runResponse.Tasks;
|
||||
var taskArns = ecsTasks.Select(t => t.TaskArn).ToList();
|
||||
|
||||
_logger.LogInformation(
|
||||
"Started {Count} ECS task(s): {TaskArns}",
|
||||
ecsTasks.Count,
|
||||
string.Join(", ", taskArns.Select(a => a.Split('/').Last())));
|
||||
|
||||
if (!payload.WaitForCompletion)
|
||||
{
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["taskArns"] = taskArns,
|
||||
["taskCount"] = ecsTasks.Count,
|
||||
["status"] = "RUNNING"
|
||||
},
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
|
||||
// Wait for tasks to complete
|
||||
var (completed, exitCodes) = await WaitForTasksAsync(
|
||||
payload.Cluster,
|
||||
taskArns,
|
||||
payload.CompletionTimeout,
|
||||
ct);
|
||||
|
||||
var allSucceeded = completed && exitCodes.All(e => e == 0);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = allSucceeded,
|
||||
Error = allSucceeded ? null : $"Task(s) failed with exit codes: [{string.Join(", ", exitCodes)}]",
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["taskArns"] = taskArns,
|
||||
["taskCount"] = ecsTasks.Count,
|
||||
["exitCodes"] = exitCodes,
|
||||
["status"] = allSucceeded ? "SUCCEEDED" : "FAILED"
|
||||
},
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
catch (AmazonECSException ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to run ECS task from {TaskDef}", payload.TaskDefinition);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to run task: {ex.Message}",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<(bool Completed, List<int> ExitCodes)> WaitForTasksAsync(
|
||||
string cluster,
|
||||
List<string> taskArns,
|
||||
TimeSpan timeout,
|
||||
CancellationToken ct)
|
||||
{
|
||||
using var timeoutCts = new CancellationTokenSource(timeout);
|
||||
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
|
||||
|
||||
var exitCodes = new List<int>();
|
||||
|
||||
try
|
||||
{
|
||||
while (!linkedCts.IsCancellationRequested)
|
||||
{
|
||||
var response = await _ecsClient.DescribeTasksAsync(new DescribeTasksRequest
|
||||
{
|
||||
Cluster = cluster,
|
||||
Tasks = taskArns
|
||||
}, linkedCts.Token);
|
||||
|
||||
var allStopped = response.Tasks.All(t => t.LastStatus == "STOPPED");
|
||||
if (allStopped)
|
||||
{
|
||||
exitCodes = response.Tasks
|
||||
.SelectMany(t => t.Containers.Select(c => c.ExitCode ?? -1))
|
||||
.ToList();
|
||||
|
||||
_logger.LogInformation(
|
||||
"ECS tasks completed with exit codes: [{ExitCodes}]",
|
||||
string.Join(", ", exitCodes));
|
||||
|
||||
return (true, exitCodes);
|
||||
}
|
||||
|
||||
await Task.Delay(TimeSpan.FromSeconds(10), linkedCts.Token);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested)
|
||||
{
|
||||
_logger.LogWarning("Task completion wait timed out after {Timeout}", timeout);
|
||||
}
|
||||
|
||||
return (false, exitCodes);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,231 @@
|
||||
using System.Text.Json;
|
||||
using Amazon.ECS;
|
||||
using Amazon.ECS.Model;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using Task = System.Threading.Tasks.Task;
|
||||
|
||||
namespace StellaOps.Agent.Ecs.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for scaling ECS services.
|
||||
/// </summary>
|
||||
public sealed class EcsScaleServiceTask : IEcsTask
|
||||
{
|
||||
private readonly IAmazonECS _ecsClient;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<EcsScaleServiceTask> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for scaling an ECS service.
|
||||
/// </summary>
|
||||
public sealed record ScaleServicePayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Name or ARN of the ECS cluster.
|
||||
/// </summary>
|
||||
public required string Cluster { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Name of the service to scale.
|
||||
/// </summary>
|
||||
public required string ServiceName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Desired number of tasks.
|
||||
/// </summary>
|
||||
public required int DesiredCount { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to wait for the scaling operation to complete.
|
||||
/// </summary>
|
||||
public bool WaitForStable { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Timeout waiting for stabilization.
|
||||
/// </summary>
|
||||
public TimeSpan StabilizeTimeout { get; init; } = TimeSpan.FromMinutes(5);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new ECS scale service task handler.
|
||||
/// </summary>
|
||||
public EcsScaleServiceTask(
|
||||
IAmazonECS ecsClient,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<EcsScaleServiceTask> logger)
|
||||
{
|
||||
_ecsClient = ecsClient;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
|
||||
{
|
||||
var payload = JsonSerializer.Deserialize<ScaleServicePayload>(task.Payload)
|
||||
?? throw new InvalidEcsPayloadException("ecs.scale", "Failed to deserialize payload");
|
||||
|
||||
_logger.LogInformation(
|
||||
"Scaling ECS service {Service} in cluster {Cluster} to {DesiredCount} tasks",
|
||||
payload.ServiceName,
|
||||
payload.Cluster,
|
||||
payload.DesiredCount);
|
||||
|
||||
try
|
||||
{
|
||||
// Get current service state
|
||||
var describeResponse = await _ecsClient.DescribeServicesAsync(new DescribeServicesRequest
|
||||
{
|
||||
Cluster = payload.Cluster,
|
||||
Services = new List<string> { payload.ServiceName }
|
||||
}, ct);
|
||||
|
||||
var currentService = describeResponse.Services.FirstOrDefault();
|
||||
if (currentService is null)
|
||||
{
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Service '{payload.ServiceName}' not found in cluster '{payload.Cluster}'",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
|
||||
var previousCount = currentService.DesiredCount;
|
||||
|
||||
// Update desired count
|
||||
var updateResponse = await _ecsClient.UpdateServiceAsync(new UpdateServiceRequest
|
||||
{
|
||||
Cluster = payload.Cluster,
|
||||
Service = payload.ServiceName,
|
||||
DesiredCount = payload.DesiredCount
|
||||
}, ct);
|
||||
|
||||
if (updateResponse.Service is not { } service)
|
||||
{
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Service update returned no service object",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
|
||||
_logger.LogInformation(
|
||||
"Updated ECS service {Service} desired count from {Previous} to {New}",
|
||||
payload.ServiceName,
|
||||
previousCount,
|
||||
payload.DesiredCount);
|
||||
|
||||
if (!payload.WaitForStable)
|
||||
{
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["serviceArn"] = service.ServiceArn ?? "",
|
||||
["serviceName"] = service.ServiceName ?? "",
|
||||
["previousDesiredCount"] = previousCount,
|
||||
["newDesiredCount"] = payload.DesiredCount,
|
||||
["runningCount"] = service.RunningCount,
|
||||
["status"] = "SCALING"
|
||||
},
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
|
||||
// Wait for stable
|
||||
var stable = await WaitForServiceStableAsync(
|
||||
payload.Cluster,
|
||||
payload.ServiceName,
|
||||
payload.DesiredCount,
|
||||
payload.StabilizeTimeout,
|
||||
ct);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = stable,
|
||||
Error = stable ? null : "Service did not stabilize within timeout",
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["serviceArn"] = service.ServiceArn ?? "",
|
||||
["serviceName"] = service.ServiceName ?? "",
|
||||
["previousDesiredCount"] = previousCount,
|
||||
["newDesiredCount"] = payload.DesiredCount,
|
||||
["status"] = stable ? "STABLE" : "UNSTABLE"
|
||||
},
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
catch (AmazonECSException ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to scale ECS service {Service}", payload.ServiceName);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to scale service: {ex.Message}",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<bool> WaitForServiceStableAsync(
|
||||
string cluster,
|
||||
string serviceName,
|
||||
int targetCount,
|
||||
TimeSpan timeout,
|
||||
CancellationToken ct)
|
||||
{
|
||||
using var timeoutCts = new CancellationTokenSource(timeout);
|
||||
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
|
||||
|
||||
try
|
||||
{
|
||||
while (!linkedCts.IsCancellationRequested)
|
||||
{
|
||||
var response = await _ecsClient.DescribeServicesAsync(new DescribeServicesRequest
|
||||
{
|
||||
Cluster = cluster,
|
||||
Services = new List<string> { serviceName }
|
||||
}, linkedCts.Token);
|
||||
|
||||
var service = response.Services.FirstOrDefault();
|
||||
if (service is null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (service.RunningCount == targetCount && service.Deployments.Count == 1)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Service {Service} scaled to {Count} running tasks",
|
||||
serviceName,
|
||||
targetCount);
|
||||
return true;
|
||||
}
|
||||
|
||||
_logger.LogDebug(
|
||||
"Service {Service} scaling: running={Running}, desired={Desired}",
|
||||
serviceName,
|
||||
service.RunningCount,
|
||||
targetCount);
|
||||
|
||||
await Task.Delay(TimeSpan.FromSeconds(5), linkedCts.Token);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested)
|
||||
{
|
||||
_logger.LogWarning("Service scaling stabilization timed out after {Timeout}", timeout);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,107 @@
|
||||
using System.Text.Json;
|
||||
using Amazon.ECS;
|
||||
using Amazon.ECS.Model;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.Ecs.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for stopping ECS tasks.
|
||||
/// </summary>
|
||||
public sealed class EcsStopTaskTask : IEcsTask
|
||||
{
|
||||
private readonly IAmazonECS _ecsClient;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<EcsStopTaskTask> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for stopping an ECS task.
|
||||
/// </summary>
|
||||
public sealed record StopTaskPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Name or ARN of the ECS cluster.
|
||||
/// </summary>
|
||||
public required string Cluster { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Task ARN or ID to stop.
|
||||
/// </summary>
|
||||
public required string TaskArn { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Reason for stopping the task.
|
||||
/// </summary>
|
||||
public string? Reason { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new ECS stop task handler.
|
||||
/// </summary>
|
||||
public EcsStopTaskTask(
|
||||
IAmazonECS ecsClient,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<EcsStopTaskTask> logger)
|
||||
{
|
||||
_ecsClient = ecsClient;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
|
||||
{
|
||||
var payload = JsonSerializer.Deserialize<StopTaskPayload>(task.Payload)
|
||||
?? throw new InvalidEcsPayloadException("ecs.stop", "Failed to deserialize payload");
|
||||
|
||||
_logger.LogInformation(
|
||||
"Stopping ECS task {TaskArn} in cluster {Cluster}",
|
||||
payload.TaskArn,
|
||||
payload.Cluster);
|
||||
|
||||
try
|
||||
{
|
||||
var request = new StopTaskRequest
|
||||
{
|
||||
Cluster = payload.Cluster,
|
||||
Task = payload.TaskArn,
|
||||
Reason = payload.Reason ?? "Stopped by Stella Agent"
|
||||
};
|
||||
|
||||
var response = await _ecsClient.StopTaskAsync(request, ct);
|
||||
var stoppedTask = response.Task;
|
||||
|
||||
_logger.LogInformation(
|
||||
"Stopped ECS task {TaskArn}, last status: {Status}",
|
||||
stoppedTask.TaskArn,
|
||||
stoppedTask.LastStatus);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["taskArn"] = stoppedTask.TaskArn ?? "",
|
||||
["lastStatus"] = stoppedTask.LastStatus ?? "",
|
||||
["stoppedReason"] = stoppedTask.StoppedReason ?? payload.Reason ?? "Stopped by agent",
|
||||
["stoppedAt"] = stoppedTask.StoppedAt.GetValueOrDefault().ToUniversalTime().ToString("o", System.Globalization.CultureInfo.InvariantCulture)
|
||||
},
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
catch (AmazonECSException ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to stop ECS task {TaskArn}", payload.TaskArn);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to stop task: {ex.Message}",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,137 @@
|
||||
namespace StellaOps.Agent.Nomad.Client;
|
||||
|
||||
/// <summary>
|
||||
/// Interface for Nomad API client.
|
||||
/// </summary>
|
||||
public interface INomadClient : IDisposable
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets agent self information.
|
||||
/// </summary>
|
||||
Task<NomadAgentSelf> GetAgentSelfAsync(CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Lists all jobs.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<NomadJobListItem>> ListJobsAsync(
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a job by ID.
|
||||
/// </summary>
|
||||
Task<NomadJob?> GetJobAsync(
|
||||
string jobId,
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Registers (creates or updates) a job.
|
||||
/// </summary>
|
||||
Task<NomadJobRegisterResponse> RegisterJobAsync(
|
||||
NomadJob job,
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Parses a job specification (HCL or JSON).
|
||||
/// </summary>
|
||||
Task<NomadJob> ParseJobAsync(
|
||||
string jobSpec,
|
||||
bool canonicalize = true,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Stops (deregisters) a job.
|
||||
/// </summary>
|
||||
Task<NomadJobDeregisterResponse> StopJobAsync(
|
||||
string jobId,
|
||||
bool purge = false,
|
||||
bool global = false,
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Scales a job task group.
|
||||
/// </summary>
|
||||
Task<NomadJobScaleResponse> ScaleJobAsync(
|
||||
string jobId,
|
||||
string taskGroup,
|
||||
int count,
|
||||
string? reason = null,
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Dispatches a parameterized batch job.
|
||||
/// </summary>
|
||||
Task<NomadJobDispatchResponse> DispatchJobAsync(
|
||||
string jobId,
|
||||
IReadOnlyDictionary<string, string>? meta = null,
|
||||
string? payload = null,
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets allocations for a job.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<NomadAllocation>> GetJobAllocationsAsync(
|
||||
string jobId,
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets deployments for a job.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<NomadDeployment>> GetJobDeploymentsAsync(
|
||||
string jobId,
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a specific deployment.
|
||||
/// </summary>
|
||||
Task<NomadDeployment?> GetDeploymentAsync(
|
||||
string deploymentId,
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets an evaluation by ID.
|
||||
/// </summary>
|
||||
Task<NomadEvaluation?> GetEvaluationAsync(
|
||||
string evalId,
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets an allocation by ID.
|
||||
/// </summary>
|
||||
Task<NomadAllocation?> GetAllocationAsync(
|
||||
string allocId,
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets logs for an allocation task.
|
||||
/// </summary>
|
||||
Task<Stream> GetAllocationLogsAsync(
|
||||
string allocId,
|
||||
string taskName,
|
||||
string logType,
|
||||
bool follow = false,
|
||||
int? offset = null,
|
||||
string? origin = null,
|
||||
CancellationToken ct = default);
|
||||
}
|
||||
@@ -0,0 +1,349 @@
|
||||
using System.Net.Http.Json;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace StellaOps.Agent.Nomad.Client;
|
||||
|
||||
/// <summary>
|
||||
/// HTTP client wrapper for Nomad API.
|
||||
/// </summary>
|
||||
public sealed class NomadClient : INomadClient
|
||||
{
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly ILogger<NomadClient> _logger;
|
||||
private readonly JsonSerializerOptions _jsonOptions;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new Nomad client.
|
||||
/// </summary>
|
||||
/// <param name="httpClient">The HTTP client.</param>
|
||||
/// <param name="logger">Logger instance.</param>
|
||||
public NomadClient(HttpClient httpClient, ILogger<NomadClient> logger)
|
||||
{
|
||||
_httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
|
||||
_jsonOptions = new JsonSerializerOptions
|
||||
{
|
||||
PropertyNameCaseInsensitive = true,
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets agent self information.
|
||||
/// </summary>
|
||||
public async Task<NomadAgentSelf> GetAgentSelfAsync(CancellationToken ct = default)
|
||||
{
|
||||
var response = await _httpClient.GetAsync("/v1/agent/self", ct);
|
||||
await EnsureSuccessAsync(response, ct);
|
||||
return await response.Content.ReadFromJsonAsync<NomadAgentSelf>(_jsonOptions, ct)
|
||||
?? throw new NomadApiException("Failed to deserialize agent self response");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Lists all jobs.
|
||||
/// </summary>
|
||||
public async Task<IReadOnlyList<NomadJobListItem>> ListJobsAsync(
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var url = BuildUrl("/v1/jobs", ns, region);
|
||||
var response = await _httpClient.GetAsync(url, ct);
|
||||
await EnsureSuccessAsync(response, ct);
|
||||
return await response.Content.ReadFromJsonAsync<IReadOnlyList<NomadJobListItem>>(_jsonOptions, ct)
|
||||
?? [];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a job by ID.
|
||||
/// </summary>
|
||||
public async Task<NomadJob?> GetJobAsync(
|
||||
string jobId,
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var url = BuildUrl($"/v1/job/{Uri.EscapeDataString(jobId)}", ns, region);
|
||||
var response = await _httpClient.GetAsync(url, ct);
|
||||
|
||||
if (response.StatusCode == System.Net.HttpStatusCode.NotFound)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
await EnsureSuccessAsync(response, ct);
|
||||
return await response.Content.ReadFromJsonAsync<NomadJob>(_jsonOptions, ct);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Registers (creates or updates) a job.
|
||||
/// </summary>
|
||||
public async Task<NomadJobRegisterResponse> RegisterJobAsync(
|
||||
NomadJob job,
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var url = BuildUrl("/v1/jobs", ns, region);
|
||||
var request = new NomadJobRegisterRequest { Job = job };
|
||||
|
||||
var response = await _httpClient.PostAsJsonAsync(url, request, _jsonOptions, ct);
|
||||
await EnsureSuccessAsync(response, ct);
|
||||
|
||||
return await response.Content.ReadFromJsonAsync<NomadJobRegisterResponse>(_jsonOptions, ct)
|
||||
?? throw new NomadApiException("Failed to deserialize job register response");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses a job specification (HCL or JSON).
|
||||
/// </summary>
|
||||
public async Task<NomadJob> ParseJobAsync(
|
||||
string jobSpec,
|
||||
bool canonicalize = true,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var url = "/v1/jobs/parse";
|
||||
var request = new NomadJobParseRequest
|
||||
{
|
||||
JobHCL = jobSpec,
|
||||
Canonicalize = canonicalize
|
||||
};
|
||||
|
||||
var response = await _httpClient.PostAsJsonAsync(url, request, _jsonOptions, ct);
|
||||
await EnsureSuccessAsync(response, ct);
|
||||
|
||||
return await response.Content.ReadFromJsonAsync<NomadJob>(_jsonOptions, ct)
|
||||
?? throw new NomadApiException("Failed to deserialize parsed job");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stops (deregisters) a job.
|
||||
/// </summary>
|
||||
public async Task<NomadJobDeregisterResponse> StopJobAsync(
|
||||
string jobId,
|
||||
bool purge = false,
|
||||
bool global = false,
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var url = BuildUrl($"/v1/job/{Uri.EscapeDataString(jobId)}", ns, region);
|
||||
if (purge) url += url.Contains('?') ? "&purge=true" : "?purge=true";
|
||||
if (global) url += url.Contains('?') ? "&global=true" : "?global=true";
|
||||
|
||||
var response = await _httpClient.DeleteAsync(url, ct);
|
||||
await EnsureSuccessAsync(response, ct);
|
||||
|
||||
return await response.Content.ReadFromJsonAsync<NomadJobDeregisterResponse>(_jsonOptions, ct)
|
||||
?? throw new NomadApiException("Failed to deserialize job deregister response");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Scales a job task group.
|
||||
/// </summary>
|
||||
public async Task<NomadJobScaleResponse> ScaleJobAsync(
|
||||
string jobId,
|
||||
string taskGroup,
|
||||
int count,
|
||||
string? reason = null,
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var url = BuildUrl($"/v1/job/{Uri.EscapeDataString(jobId)}/scale", ns, region);
|
||||
var request = new NomadJobScaleRequest
|
||||
{
|
||||
Count = count,
|
||||
Target = new Dictionary<string, string> { ["Group"] = taskGroup },
|
||||
Message = reason
|
||||
};
|
||||
|
||||
var response = await _httpClient.PostAsJsonAsync(url, request, _jsonOptions, ct);
|
||||
await EnsureSuccessAsync(response, ct);
|
||||
|
||||
return await response.Content.ReadFromJsonAsync<NomadJobScaleResponse>(_jsonOptions, ct)
|
||||
?? throw new NomadApiException("Failed to deserialize job scale response");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Dispatches a parameterized batch job.
|
||||
/// </summary>
|
||||
public async Task<NomadJobDispatchResponse> DispatchJobAsync(
|
||||
string jobId,
|
||||
IReadOnlyDictionary<string, string>? meta = null,
|
||||
string? payload = null,
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var url = BuildUrl($"/v1/job/{Uri.EscapeDataString(jobId)}/dispatch", ns, region);
|
||||
var request = new NomadJobDispatchRequest
|
||||
{
|
||||
Meta = meta?.ToDictionary(kv => kv.Key, kv => kv.Value),
|
||||
Payload = payload is not null ? Convert.ToBase64String(System.Text.Encoding.UTF8.GetBytes(payload)) : null
|
||||
};
|
||||
|
||||
var response = await _httpClient.PostAsJsonAsync(url, request, _jsonOptions, ct);
|
||||
await EnsureSuccessAsync(response, ct);
|
||||
|
||||
return await response.Content.ReadFromJsonAsync<NomadJobDispatchResponse>(_jsonOptions, ct)
|
||||
?? throw new NomadApiException("Failed to deserialize job dispatch response");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets allocations for a job.
|
||||
/// </summary>
|
||||
public async Task<IReadOnlyList<NomadAllocation>> GetJobAllocationsAsync(
|
||||
string jobId,
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var url = BuildUrl($"/v1/job/{Uri.EscapeDataString(jobId)}/allocations", ns, region);
|
||||
var response = await _httpClient.GetAsync(url, ct);
|
||||
await EnsureSuccessAsync(response, ct);
|
||||
return await response.Content.ReadFromJsonAsync<IReadOnlyList<NomadAllocation>>(_jsonOptions, ct)
|
||||
?? [];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets deployments for a job.
|
||||
/// </summary>
|
||||
public async Task<IReadOnlyList<NomadDeployment>> GetJobDeploymentsAsync(
|
||||
string jobId,
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var url = BuildUrl($"/v1/job/{Uri.EscapeDataString(jobId)}/deployments", ns, region);
|
||||
var response = await _httpClient.GetAsync(url, ct);
|
||||
await EnsureSuccessAsync(response, ct);
|
||||
return await response.Content.ReadFromJsonAsync<IReadOnlyList<NomadDeployment>>(_jsonOptions, ct)
|
||||
?? [];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a specific deployment.
|
||||
/// </summary>
|
||||
public async Task<NomadDeployment?> GetDeploymentAsync(
|
||||
string deploymentId,
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var url = BuildUrl($"/v1/deployment/{Uri.EscapeDataString(deploymentId)}", ns, region);
|
||||
var response = await _httpClient.GetAsync(url, ct);
|
||||
|
||||
if (response.StatusCode == System.Net.HttpStatusCode.NotFound)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
await EnsureSuccessAsync(response, ct);
|
||||
return await response.Content.ReadFromJsonAsync<NomadDeployment>(_jsonOptions, ct);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets an evaluation by ID.
|
||||
/// </summary>
|
||||
public async Task<NomadEvaluation?> GetEvaluationAsync(
|
||||
string evalId,
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var url = BuildUrl($"/v1/evaluation/{Uri.EscapeDataString(evalId)}", ns, region);
|
||||
var response = await _httpClient.GetAsync(url, ct);
|
||||
|
||||
if (response.StatusCode == System.Net.HttpStatusCode.NotFound)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
await EnsureSuccessAsync(response, ct);
|
||||
return await response.Content.ReadFromJsonAsync<NomadEvaluation>(_jsonOptions, ct);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets an allocation by ID.
|
||||
/// </summary>
|
||||
public async Task<NomadAllocation?> GetAllocationAsync(
|
||||
string allocId,
|
||||
string? ns = null,
|
||||
string? region = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var url = BuildUrl($"/v1/allocation/{Uri.EscapeDataString(allocId)}", ns, region);
|
||||
var response = await _httpClient.GetAsync(url, ct);
|
||||
|
||||
if (response.StatusCode == System.Net.HttpStatusCode.NotFound)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
await EnsureSuccessAsync(response, ct);
|
||||
return await response.Content.ReadFromJsonAsync<NomadAllocation>(_jsonOptions, ct);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets logs for an allocation task.
|
||||
/// </summary>
|
||||
public async Task<Stream> GetAllocationLogsAsync(
|
||||
string allocId,
|
||||
string taskName,
|
||||
string logType,
|
||||
bool follow = false,
|
||||
int? offset = null,
|
||||
string? origin = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var url = $"/v1/client/fs/logs/{Uri.EscapeDataString(allocId)}?task={Uri.EscapeDataString(taskName)}&type={Uri.EscapeDataString(logType)}";
|
||||
if (follow) url += "&follow=true";
|
||||
if (offset.HasValue) url += $"&offset={offset.Value}";
|
||||
if (origin is not null) url += $"&origin={Uri.EscapeDataString(origin)}";
|
||||
|
||||
var response = await _httpClient.GetAsync(url, HttpCompletionOption.ResponseHeadersRead, ct);
|
||||
await EnsureSuccessAsync(response, ct);
|
||||
return await response.Content.ReadAsStreamAsync(ct);
|
||||
}
|
||||
|
||||
private static string BuildUrl(string path, string? ns, string? region)
|
||||
{
|
||||
var url = path;
|
||||
var hasQuery = false;
|
||||
|
||||
if (!string.IsNullOrEmpty(ns))
|
||||
{
|
||||
url += $"?namespace={Uri.EscapeDataString(ns)}";
|
||||
hasQuery = true;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(region))
|
||||
{
|
||||
url += hasQuery ? $"®ion={Uri.EscapeDataString(region)}" : $"?region={Uri.EscapeDataString(region)}";
|
||||
}
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
private static async Task EnsureSuccessAsync(HttpResponseMessage response, CancellationToken ct)
|
||||
{
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
var content = await response.Content.ReadAsStringAsync(ct);
|
||||
throw new NomadApiException(
|
||||
$"Nomad API returned {(int)response.StatusCode} {response.StatusCode}: {content}",
|
||||
(int)response.StatusCode);
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public void Dispose()
|
||||
{
|
||||
_httpClient.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,576 @@
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace StellaOps.Agent.Nomad.Client;
|
||||
|
||||
/// <summary>
|
||||
/// Nomad agent self information.
|
||||
/// </summary>
|
||||
public sealed record NomadAgentSelf
|
||||
{
|
||||
public NomadAgentConfig? Config { get; init; }
|
||||
public NomadAgentMember? Member { get; init; }
|
||||
public IReadOnlyDictionary<string, IReadOnlyDictionary<string, string>>? Stats { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad agent configuration.
|
||||
/// </summary>
|
||||
public sealed record NomadAgentConfig
|
||||
{
|
||||
public string? Region { get; init; }
|
||||
public string? Datacenter { get; init; }
|
||||
public string? NodeName { get; init; }
|
||||
public string? Version { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad agent member information.
|
||||
/// </summary>
|
||||
public sealed record NomadAgentMember
|
||||
{
|
||||
public string? Name { get; init; }
|
||||
public string? Addr { get; init; }
|
||||
public int? Port { get; init; }
|
||||
public string? Status { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad job list item.
|
||||
/// </summary>
|
||||
public sealed record NomadJobListItem
|
||||
{
|
||||
public string? ID { get; init; }
|
||||
public string? Name { get; init; }
|
||||
public string? Namespace { get; init; }
|
||||
public string? Type { get; init; }
|
||||
public string? Status { get; init; }
|
||||
public string? StatusDescription { get; init; }
|
||||
public int? Priority { get; init; }
|
||||
public IReadOnlyList<string>? Datacenters { get; init; }
|
||||
[JsonPropertyName("SubmitTime")]
|
||||
public long? SubmitTimeNanos { get; init; }
|
||||
public long? ModifyIndex { get; init; }
|
||||
public long? JobModifyIndex { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad job definition.
|
||||
/// </summary>
|
||||
public sealed record NomadJob
|
||||
{
|
||||
public string? ID { get; init; }
|
||||
public string? Name { get; init; }
|
||||
public string? Namespace { get; init; }
|
||||
public string? Region { get; init; }
|
||||
public string? Type { get; init; }
|
||||
public int? Priority { get; init; }
|
||||
public bool? AllAtOnce { get; init; }
|
||||
public IReadOnlyList<string>? Datacenters { get; init; }
|
||||
public IReadOnlyList<NomadTaskGroup>? TaskGroups { get; init; }
|
||||
public NomadUpdateStrategy? Update { get; init; }
|
||||
public IReadOnlyList<NomadConstraint>? Constraints { get; init; }
|
||||
public IReadOnlyList<NomadAffinity>? Affinities { get; init; }
|
||||
public IReadOnlyDictionary<string, string>? Meta { get; init; }
|
||||
public string? Status { get; init; }
|
||||
public string? StatusDescription { get; init; }
|
||||
public bool? Stable { get; init; }
|
||||
public long? Version { get; init; }
|
||||
public long? SubmitTime { get; init; }
|
||||
public long? CreateIndex { get; init; }
|
||||
public long? ModifyIndex { get; init; }
|
||||
public long? JobModifyIndex { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad task group definition.
|
||||
/// </summary>
|
||||
public sealed record NomadTaskGroup
|
||||
{
|
||||
public string? Name { get; init; }
|
||||
public int? Count { get; init; }
|
||||
public IReadOnlyList<NomadTask>? Tasks { get; init; }
|
||||
public IReadOnlyList<NomadNetwork>? Networks { get; init; }
|
||||
public IReadOnlyList<NomadService>? Services { get; init; }
|
||||
public NomadRestartPolicy? RestartPolicy { get; init; }
|
||||
public NomadReschedulePolicy? ReschedulePolicy { get; init; }
|
||||
public NomadEphemeralDisk? EphemeralDisk { get; init; }
|
||||
public NomadUpdateStrategy? Update { get; init; }
|
||||
public IReadOnlyList<NomadConstraint>? Constraints { get; init; }
|
||||
public IReadOnlyDictionary<string, string>? Meta { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad task definition.
|
||||
/// </summary>
|
||||
public sealed record NomadTask
|
||||
{
|
||||
public string? Name { get; init; }
|
||||
public string? Driver { get; init; }
|
||||
public IReadOnlyDictionary<string, object>? Config { get; init; }
|
||||
public NomadResources? Resources { get; init; }
|
||||
public IReadOnlyDictionary<string, string>? Env { get; init; }
|
||||
public IReadOnlyList<NomadTemplate>? Templates { get; init; }
|
||||
public IReadOnlyList<NomadArtifact>? Artifacts { get; init; }
|
||||
public NomadLogConfig? LogConfig { get; init; }
|
||||
public bool? Leader { get; init; }
|
||||
public string? KillSignal { get; init; }
|
||||
[JsonPropertyName("KillTimeout")]
|
||||
public long? KillTimeoutNanos { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad update strategy.
|
||||
/// </summary>
|
||||
public sealed record NomadUpdateStrategy
|
||||
{
|
||||
public int? MaxParallel { get; init; }
|
||||
public string? HealthCheck { get; init; }
|
||||
[JsonPropertyName("MinHealthyTime")]
|
||||
public long? MinHealthyTimeNanos { get; init; }
|
||||
[JsonPropertyName("HealthyDeadline")]
|
||||
public long? HealthyDeadlineNanos { get; init; }
|
||||
[JsonPropertyName("ProgressDeadline")]
|
||||
public long? ProgressDeadlineNanos { get; init; }
|
||||
public bool? AutoRevert { get; init; }
|
||||
public bool? AutoPromote { get; init; }
|
||||
public int? Canary { get; init; }
|
||||
[JsonPropertyName("Stagger")]
|
||||
public long? StaggerNanos { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad resource requirements.
|
||||
/// </summary>
|
||||
public sealed record NomadResources
|
||||
{
|
||||
public int? CPU { get; init; }
|
||||
public int? MemoryMB { get; init; }
|
||||
public int? MemoryMaxMB { get; init; }
|
||||
public int? DiskMB { get; init; }
|
||||
public IReadOnlyList<NomadNetworkResource>? Networks { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad network resource.
|
||||
/// </summary>
|
||||
public sealed record NomadNetworkResource
|
||||
{
|
||||
public string? Mode { get; init; }
|
||||
public string? CIDR { get; init; }
|
||||
public int? MBits { get; init; }
|
||||
public IReadOnlyList<NomadPort>? ReservedPorts { get; init; }
|
||||
public IReadOnlyList<NomadPort>? DynamicPorts { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad port definition.
|
||||
/// </summary>
|
||||
public sealed record NomadPort
|
||||
{
|
||||
public string? Label { get; init; }
|
||||
public int? Value { get; init; }
|
||||
public int? To { get; init; }
|
||||
public string? HostNetwork { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad network definition.
|
||||
/// </summary>
|
||||
public sealed record NomadNetwork
|
||||
{
|
||||
public string? Mode { get; init; }
|
||||
public string? Device { get; init; }
|
||||
public string? CIDR { get; init; }
|
||||
public string? IP { get; init; }
|
||||
public IReadOnlyList<NomadPort>? ReservedPorts { get; init; }
|
||||
public IReadOnlyList<NomadPort>? DynamicPorts { get; init; }
|
||||
public NomadDNSConfig? DNS { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad DNS configuration.
|
||||
/// </summary>
|
||||
public sealed record NomadDNSConfig
|
||||
{
|
||||
public IReadOnlyList<string>? Servers { get; init; }
|
||||
public IReadOnlyList<string>? Searches { get; init; }
|
||||
public IReadOnlyList<string>? Options { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad service definition.
|
||||
/// </summary>
|
||||
public sealed record NomadService
|
||||
{
|
||||
public string? Name { get; init; }
|
||||
public string? PortLabel { get; init; }
|
||||
public IReadOnlyList<string>? Tags { get; init; }
|
||||
public IReadOnlyList<string>? CanaryTags { get; init; }
|
||||
public IReadOnlyList<NomadServiceCheck>? Checks { get; init; }
|
||||
public NomadConsulConnect? Connect { get; init; }
|
||||
public IReadOnlyDictionary<string, string>? Meta { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad service check.
|
||||
/// </summary>
|
||||
public sealed record NomadServiceCheck
|
||||
{
|
||||
public string? Name { get; init; }
|
||||
public string? Type { get; init; }
|
||||
public string? Path { get; init; }
|
||||
public string? Protocol { get; init; }
|
||||
public string? PortLabel { get; init; }
|
||||
[JsonPropertyName("Interval")]
|
||||
public long? IntervalNanos { get; init; }
|
||||
[JsonPropertyName("Timeout")]
|
||||
public long? TimeoutNanos { get; init; }
|
||||
public string? InitialStatus { get; init; }
|
||||
public int? SuccessBeforePassing { get; init; }
|
||||
public int? FailuresBeforeCritical { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad Consul Connect configuration.
|
||||
/// </summary>
|
||||
public sealed record NomadConsulConnect
|
||||
{
|
||||
public bool? Native { get; init; }
|
||||
public NomadConsulSidecarService? SidecarService { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad Consul sidecar service.
|
||||
/// </summary>
|
||||
public sealed record NomadConsulSidecarService
|
||||
{
|
||||
public IReadOnlyList<string>? Tags { get; init; }
|
||||
public int? Port { get; init; }
|
||||
public NomadConsulProxy? Proxy { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad Consul proxy configuration.
|
||||
/// </summary>
|
||||
public sealed record NomadConsulProxy
|
||||
{
|
||||
public string? LocalServiceAddress { get; init; }
|
||||
public int? LocalServicePort { get; init; }
|
||||
public IReadOnlyList<NomadConsulUpstream>? Upstreams { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad Consul upstream.
|
||||
/// </summary>
|
||||
public sealed record NomadConsulUpstream
|
||||
{
|
||||
public string? DestinationName { get; init; }
|
||||
public int? LocalBindPort { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad template definition.
|
||||
/// </summary>
|
||||
public sealed record NomadTemplate
|
||||
{
|
||||
public string? SourcePath { get; init; }
|
||||
public string? DestPath { get; init; }
|
||||
public string? EmbeddedTmpl { get; init; }
|
||||
public string? ChangeMode { get; init; }
|
||||
public string? ChangeSignal { get; init; }
|
||||
[JsonPropertyName("Splay")]
|
||||
public long? SplayNanos { get; init; }
|
||||
public string? Perms { get; init; }
|
||||
public string? LeftDelim { get; init; }
|
||||
public string? RightDelim { get; init; }
|
||||
public bool? Envvars { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad artifact definition.
|
||||
/// </summary>
|
||||
public sealed record NomadArtifact
|
||||
{
|
||||
public string? GetterSource { get; init; }
|
||||
public string? GetterMode { get; init; }
|
||||
public IReadOnlyDictionary<string, string>? GetterOptions { get; init; }
|
||||
public IReadOnlyDictionary<string, string>? GetterHeaders { get; init; }
|
||||
public string? RelativeDest { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad log configuration.
|
||||
/// </summary>
|
||||
public sealed record NomadLogConfig
|
||||
{
|
||||
public int? MaxFiles { get; init; }
|
||||
public int? MaxFileSizeMB { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad restart policy.
|
||||
/// </summary>
|
||||
public sealed record NomadRestartPolicy
|
||||
{
|
||||
public int? Attempts { get; init; }
|
||||
[JsonPropertyName("Interval")]
|
||||
public long? IntervalNanos { get; init; }
|
||||
[JsonPropertyName("Delay")]
|
||||
public long? DelayNanos { get; init; }
|
||||
public string? Mode { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad reschedule policy.
|
||||
/// </summary>
|
||||
public sealed record NomadReschedulePolicy
|
||||
{
|
||||
public int? Attempts { get; init; }
|
||||
[JsonPropertyName("Interval")]
|
||||
public long? IntervalNanos { get; init; }
|
||||
[JsonPropertyName("Delay")]
|
||||
public long? DelayNanos { get; init; }
|
||||
public string? DelayFunction { get; init; }
|
||||
[JsonPropertyName("MaxDelay")]
|
||||
public long? MaxDelayNanos { get; init; }
|
||||
public bool? Unlimited { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad ephemeral disk configuration.
|
||||
/// </summary>
|
||||
public sealed record NomadEphemeralDisk
|
||||
{
|
||||
public bool? Sticky { get; init; }
|
||||
public bool? Migrate { get; init; }
|
||||
public int? SizeMB { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad constraint.
|
||||
/// </summary>
|
||||
public sealed record NomadConstraint
|
||||
{
|
||||
public string? LTarget { get; init; }
|
||||
public string? RTarget { get; init; }
|
||||
public string? Operand { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad affinity.
|
||||
/// </summary>
|
||||
public sealed record NomadAffinity
|
||||
{
|
||||
public string? LTarget { get; init; }
|
||||
public string? RTarget { get; init; }
|
||||
public string? Operand { get; init; }
|
||||
public int? Weight { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad allocation.
|
||||
/// </summary>
|
||||
public sealed record NomadAllocation
|
||||
{
|
||||
public string? ID { get; init; }
|
||||
public string? EvalID { get; init; }
|
||||
public string? Name { get; init; }
|
||||
public string? Namespace { get; init; }
|
||||
public string? NodeID { get; init; }
|
||||
public string? NodeName { get; init; }
|
||||
public string? JobID { get; init; }
|
||||
public string? TaskGroup { get; init; }
|
||||
public string? DesiredStatus { get; init; }
|
||||
public string? DesiredDescription { get; init; }
|
||||
public string? ClientStatus { get; init; }
|
||||
public string? ClientDescription { get; init; }
|
||||
public IReadOnlyDictionary<string, NomadTaskState>? TaskStates { get; init; }
|
||||
public NomadDeploymentStatus? DeploymentStatus { get; init; }
|
||||
public long? CreateIndex { get; init; }
|
||||
public long? ModifyIndex { get; init; }
|
||||
public long? CreateTime { get; init; }
|
||||
public long? ModifyTime { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad task state.
|
||||
/// </summary>
|
||||
public sealed record NomadTaskState
|
||||
{
|
||||
public string? State { get; init; }
|
||||
public bool? Failed { get; init; }
|
||||
public int? Restarts { get; init; }
|
||||
public string? LastRestart { get; init; }
|
||||
public long? StartedAt { get; init; }
|
||||
public long? FinishedAt { get; init; }
|
||||
public IReadOnlyList<NomadTaskEvent>? Events { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad task event.
|
||||
/// </summary>
|
||||
public sealed record NomadTaskEvent
|
||||
{
|
||||
public string? Type { get; init; }
|
||||
public long? Time { get; init; }
|
||||
public string? Message { get; init; }
|
||||
public string? DisplayMessage { get; init; }
|
||||
public IReadOnlyDictionary<string, string>? Details { get; init; }
|
||||
public int? ExitCode { get; init; }
|
||||
public int? Signal { get; init; }
|
||||
public bool? KillError { get; init; }
|
||||
public string? KillReason { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad deployment status for an allocation.
|
||||
/// </summary>
|
||||
public sealed record NomadDeploymentStatus
|
||||
{
|
||||
public bool? Healthy { get; init; }
|
||||
public bool? Canary { get; init; }
|
||||
public long? Timestamp { get; init; }
|
||||
public int? ModifyIndex { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad deployment.
|
||||
/// </summary>
|
||||
public sealed record NomadDeployment
|
||||
{
|
||||
public string? ID { get; init; }
|
||||
public string? Namespace { get; init; }
|
||||
public string? JobID { get; init; }
|
||||
public long? JobVersion { get; init; }
|
||||
public long? JobModifyIndex { get; init; }
|
||||
public long? JobCreateIndex { get; init; }
|
||||
public string? Status { get; init; }
|
||||
public string? StatusDescription { get; init; }
|
||||
public IReadOnlyDictionary<string, NomadDeploymentState>? TaskGroups { get; init; }
|
||||
public long? CreateIndex { get; init; }
|
||||
public long? ModifyIndex { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad deployment state for a task group.
|
||||
/// </summary>
|
||||
public sealed record NomadDeploymentState
|
||||
{
|
||||
public bool? AutoRevert { get; init; }
|
||||
public bool? AutoPromote { get; init; }
|
||||
public bool? Promoted { get; init; }
|
||||
public int? DesiredCanaries { get; init; }
|
||||
public int? DesiredTotal { get; init; }
|
||||
public int? PlacedCanaries { get; init; }
|
||||
public IReadOnlyList<string>? PlacedAllocs { get; init; }
|
||||
public int? HealthyAllocs { get; init; }
|
||||
public int? UnhealthyAllocs { get; init; }
|
||||
[JsonPropertyName("RequireProgressBy")]
|
||||
public long? RequireProgressByNanos { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad evaluation.
|
||||
/// </summary>
|
||||
public sealed record NomadEvaluation
|
||||
{
|
||||
public string? ID { get; init; }
|
||||
public string? Namespace { get; init; }
|
||||
public int? Priority { get; init; }
|
||||
public string? Type { get; init; }
|
||||
public string? TriggeredBy { get; init; }
|
||||
public string? JobID { get; init; }
|
||||
public long? JobModifyIndex { get; init; }
|
||||
public string? Status { get; init; }
|
||||
public string? StatusDescription { get; init; }
|
||||
public string? NextEval { get; init; }
|
||||
public string? PreviousEval { get; init; }
|
||||
public string? BlockedEval { get; init; }
|
||||
public long? CreateIndex { get; init; }
|
||||
public long? ModifyIndex { get; init; }
|
||||
public long? CreateTime { get; init; }
|
||||
public long? ModifyTime { get; init; }
|
||||
}
|
||||
|
||||
// Request/Response models
|
||||
|
||||
/// <summary>
|
||||
/// Nomad job register request.
|
||||
/// </summary>
|
||||
public sealed record NomadJobRegisterRequest
|
||||
{
|
||||
public NomadJob? Job { get; init; }
|
||||
public bool? EnforceIndex { get; init; }
|
||||
public long? JobModifyIndex { get; init; }
|
||||
public bool? PolicyOverride { get; init; }
|
||||
public bool? PreserveCounts { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad job register response.
|
||||
/// </summary>
|
||||
public sealed record NomadJobRegisterResponse
|
||||
{
|
||||
public string? EvalID { get; init; }
|
||||
public long? EvalCreateIndex { get; init; }
|
||||
public long? JobModifyIndex { get; init; }
|
||||
public IReadOnlyList<string>? Warnings { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad job parse request.
|
||||
/// </summary>
|
||||
public sealed record NomadJobParseRequest
|
||||
{
|
||||
public string? JobHCL { get; init; }
|
||||
public bool? Canonicalize { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad job deregister response.
|
||||
/// </summary>
|
||||
public sealed record NomadJobDeregisterResponse
|
||||
{
|
||||
public string? EvalID { get; init; }
|
||||
public long? EvalCreateIndex { get; init; }
|
||||
public long? JobModifyIndex { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad job scale request.
|
||||
/// </summary>
|
||||
public sealed record NomadJobScaleRequest
|
||||
{
|
||||
public int? Count { get; init; }
|
||||
public IReadOnlyDictionary<string, string>? Target { get; init; }
|
||||
public string? Message { get; init; }
|
||||
public bool? PolicyOverride { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad job scale response.
|
||||
/// </summary>
|
||||
public sealed record NomadJobScaleResponse
|
||||
{
|
||||
public string? EvalID { get; init; }
|
||||
public long? EvalCreateIndex { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad job dispatch request.
|
||||
/// </summary>
|
||||
public sealed record NomadJobDispatchRequest
|
||||
{
|
||||
public string? Payload { get; init; }
|
||||
public Dictionary<string, string>? Meta { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Nomad job dispatch response.
|
||||
/// </summary>
|
||||
public sealed record NomadJobDispatchResponse
|
||||
{
|
||||
public string? DispatchedJobID { get; init; }
|
||||
public string? EvalID { get; init; }
|
||||
public long? EvalCreateIndex { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,203 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Capability;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.Nomad.Client;
|
||||
using StellaOps.Agent.Nomad.Tasks;
|
||||
|
||||
namespace StellaOps.Agent.Nomad;
|
||||
|
||||
/// <summary>
|
||||
/// Agent capability for managing HashiCorp Nomad jobs and allocations.
|
||||
/// </summary>
|
||||
public sealed class NomadCapability : IAgentCapability, IDisposable
|
||||
{
|
||||
private readonly INomadClient _nomadClient;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILoggerFactory _loggerFactory;
|
||||
private readonly ILogger<NomadCapability> _logger;
|
||||
private readonly Dictionary<string, Func<AgentTaskInfo, CancellationToken, Task<AgentTaskResult>>> _taskHandlers;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the capability name.
|
||||
/// </summary>
|
||||
public string Name => "nomad";
|
||||
|
||||
/// <summary>
|
||||
/// Gets the capability version.
|
||||
/// </summary>
|
||||
public string Version => "1.0.0";
|
||||
|
||||
/// <summary>
|
||||
/// Gets the supported task types.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string> SupportedTaskTypes { get; } = new[]
|
||||
{
|
||||
"nomad.deploy",
|
||||
"nomad.stop",
|
||||
"nomad.scale",
|
||||
"nomad.dispatch",
|
||||
"nomad.status",
|
||||
"nomad.health"
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new Nomad capability.
|
||||
/// </summary>
|
||||
/// <param name="nomadClient">The Nomad client.</param>
|
||||
/// <param name="timeProvider">Time provider for timestamps.</param>
|
||||
/// <param name="loggerFactory">Logger factory.</param>
|
||||
public NomadCapability(
|
||||
INomadClient nomadClient,
|
||||
TimeProvider timeProvider,
|
||||
ILoggerFactory loggerFactory)
|
||||
{
|
||||
_nomadClient = nomadClient ?? throw new ArgumentNullException(nameof(nomadClient));
|
||||
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
|
||||
_loggerFactory = loggerFactory ?? throw new ArgumentNullException(nameof(loggerFactory));
|
||||
_logger = loggerFactory.CreateLogger<NomadCapability>();
|
||||
|
||||
_taskHandlers = new Dictionary<string, Func<AgentTaskInfo, CancellationToken, Task<AgentTaskResult>>>
|
||||
{
|
||||
["nomad.deploy"] = ExecuteDeployAsync,
|
||||
["nomad.stop"] = ExecuteStopAsync,
|
||||
["nomad.scale"] = ExecuteScaleAsync,
|
||||
["nomad.dispatch"] = ExecuteDispatchAsync,
|
||||
["nomad.status"] = ExecuteStatusAsync,
|
||||
["nomad.health"] = ExecuteHealthCheckAsync
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> InitializeAsync(CancellationToken ct = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
var self = await _nomadClient.GetAgentSelfAsync(ct);
|
||||
var config = self.Config;
|
||||
|
||||
_logger.LogInformation(
|
||||
"Nomad capability initialized, connected to {Region}/{Datacenter} (version {Version})",
|
||||
config?.Region ?? "unknown",
|
||||
config?.Datacenter ?? "unknown",
|
||||
config?.Version ?? "unknown");
|
||||
|
||||
return true;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to initialize Nomad capability - Nomad agent not accessible");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
|
||||
{
|
||||
if (!_taskHandlers.TryGetValue(task.TaskType, out var handler))
|
||||
{
|
||||
throw new InvalidNomadPayloadException(task.TaskType, "Unsupported task type");
|
||||
}
|
||||
|
||||
var startTime = _timeProvider.GetUtcNow();
|
||||
|
||||
try
|
||||
{
|
||||
var result = await handler(task, ct);
|
||||
return result with
|
||||
{
|
||||
Duration = _timeProvider.GetUtcNow() - startTime
|
||||
};
|
||||
}
|
||||
catch (InvalidNomadPayloadException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Nomad task {TaskType} failed", task.TaskType);
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = ex.Message,
|
||||
CompletedAt = _timeProvider.GetUtcNow(),
|
||||
Duration = _timeProvider.GetUtcNow() - startTime
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<CapabilityHealthStatus> CheckHealthAsync(CancellationToken ct = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
var self = await _nomadClient.GetAgentSelfAsync(ct);
|
||||
var region = self.Config?.Region ?? "unknown";
|
||||
return new CapabilityHealthStatus(true, $"Nomad capability ready ({region})");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return new CapabilityHealthStatus(false, $"Nomad agent not accessible: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<AgentTaskResult> ExecuteDeployAsync(AgentTaskInfo task, CancellationToken ct)
|
||||
{
|
||||
var taskHandler = new NomadDeployJobTask(
|
||||
_nomadClient,
|
||||
_timeProvider,
|
||||
_loggerFactory.CreateLogger<NomadDeployJobTask>());
|
||||
return await taskHandler.ExecuteAsync(task, ct);
|
||||
}
|
||||
|
||||
private async Task<AgentTaskResult> ExecuteStopAsync(AgentTaskInfo task, CancellationToken ct)
|
||||
{
|
||||
var taskHandler = new NomadStopJobTask(
|
||||
_nomadClient,
|
||||
_timeProvider,
|
||||
_loggerFactory.CreateLogger<NomadStopJobTask>());
|
||||
return await taskHandler.ExecuteAsync(task, ct);
|
||||
}
|
||||
|
||||
private async Task<AgentTaskResult> ExecuteScaleAsync(AgentTaskInfo task, CancellationToken ct)
|
||||
{
|
||||
var taskHandler = new NomadScaleJobTask(
|
||||
_nomadClient,
|
||||
_timeProvider,
|
||||
_loggerFactory.CreateLogger<NomadScaleJobTask>());
|
||||
return await taskHandler.ExecuteAsync(task, ct);
|
||||
}
|
||||
|
||||
private async Task<AgentTaskResult> ExecuteDispatchAsync(AgentTaskInfo task, CancellationToken ct)
|
||||
{
|
||||
var taskHandler = new NomadDispatchJobTask(
|
||||
_nomadClient,
|
||||
_timeProvider,
|
||||
_loggerFactory.CreateLogger<NomadDispatchJobTask>());
|
||||
return await taskHandler.ExecuteAsync(task, ct);
|
||||
}
|
||||
|
||||
private async Task<AgentTaskResult> ExecuteStatusAsync(AgentTaskInfo task, CancellationToken ct)
|
||||
{
|
||||
var taskHandler = new NomadJobStatusTask(
|
||||
_nomadClient,
|
||||
_timeProvider,
|
||||
_loggerFactory.CreateLogger<NomadJobStatusTask>());
|
||||
return await taskHandler.ExecuteAsync(task, ct);
|
||||
}
|
||||
|
||||
private async Task<AgentTaskResult> ExecuteHealthCheckAsync(AgentTaskInfo task, CancellationToken ct)
|
||||
{
|
||||
var taskHandler = new NomadHealthCheckTask(
|
||||
_nomadClient,
|
||||
_timeProvider,
|
||||
_loggerFactory.CreateLogger<NomadHealthCheckTask>());
|
||||
return await taskHandler.ExecuteAsync(task, ct);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public void Dispose()
|
||||
{
|
||||
_nomadClient.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
namespace StellaOps.Agent.Nomad;
|
||||
|
||||
/// <summary>
|
||||
/// Base exception for Nomad agent operations.
|
||||
/// </summary>
|
||||
public class NomadAgentException : Exception
|
||||
{
|
||||
public NomadAgentException(string message) : base(message) { }
|
||||
public NomadAgentException(string message, Exception innerException) : base(message, innerException) { }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when an Nomad task payload is invalid or missing required fields.
|
||||
/// </summary>
|
||||
public class InvalidNomadPayloadException : NomadAgentException
|
||||
{
|
||||
public string TaskType { get; }
|
||||
|
||||
public InvalidNomadPayloadException(string taskType, string? details = null)
|
||||
: base($"Invalid payload for Nomad task type '{taskType}'{(details is not null ? $": {details}" : "")}")
|
||||
{
|
||||
TaskType = taskType;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when a Nomad API call fails.
|
||||
/// </summary>
|
||||
public class NomadApiException : NomadAgentException
|
||||
{
|
||||
public int? StatusCode { get; }
|
||||
|
||||
public NomadApiException(string message, int? statusCode = null)
|
||||
: base(message)
|
||||
{
|
||||
StatusCode = statusCode;
|
||||
}
|
||||
|
||||
public NomadApiException(string message, int statusCode, Exception innerException)
|
||||
: base(message, innerException)
|
||||
{
|
||||
StatusCode = statusCode;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when a Nomad job operation fails.
|
||||
/// </summary>
|
||||
public class NomadJobOperationException : NomadAgentException
|
||||
{
|
||||
public string JobId { get; }
|
||||
public string Operation { get; }
|
||||
|
||||
public NomadJobOperationException(string operation, string jobId, string message)
|
||||
: base($"Nomad {operation} failed for job '{jobId}': {message}")
|
||||
{
|
||||
Operation = operation;
|
||||
JobId = jobId;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when a Nomad deployment times out waiting for completion.
|
||||
/// </summary>
|
||||
public class NomadDeploymentTimeoutException : NomadAgentException
|
||||
{
|
||||
public string JobId { get; }
|
||||
public string? DeploymentId { get; }
|
||||
public TimeSpan Timeout { get; }
|
||||
|
||||
public NomadDeploymentTimeoutException(string jobId, string? deploymentId, TimeSpan timeout)
|
||||
: base($"Nomad deployment timed out waiting for job '{jobId}' to complete after {timeout}")
|
||||
{
|
||||
JobId = jobId;
|
||||
DeploymentId = deploymentId;
|
||||
Timeout = timeout;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when a Nomad evaluation fails.
|
||||
/// </summary>
|
||||
public class NomadEvaluationFailedException : NomadAgentException
|
||||
{
|
||||
public string EvalId { get; }
|
||||
public string Status { get; }
|
||||
|
||||
public NomadEvaluationFailedException(string evalId, string status, string? description = null)
|
||||
: base($"Nomad evaluation '{evalId}' failed with status '{status}'{(description is not null ? $": {description}" : "")}")
|
||||
{
|
||||
EvalId = evalId;
|
||||
Status = status;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<RootNamespace>StellaOps.Agent.Nomad</RootNamespace>
|
||||
<Description>Stella Agent Nomad Capability - manages HashiCorp Nomad jobs and allocations</Description>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Http" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\StellaOps.Agent.Core\StellaOps.Agent.Core.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,17 @@
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.Nomad.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Interface for Nomad task handlers.
|
||||
/// </summary>
|
||||
public interface INomadTask
|
||||
{
|
||||
/// <summary>
|
||||
/// Executes the Nomad task.
|
||||
/// </summary>
|
||||
/// <param name="task">The task information.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>The task result.</returns>
|
||||
Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default);
|
||||
}
|
||||
@@ -0,0 +1,279 @@
|
||||
using System.Globalization;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.Nomad.Client;
|
||||
|
||||
namespace StellaOps.Agent.Nomad.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for deploying Nomad jobs.
|
||||
/// </summary>
|
||||
public sealed class NomadDeployJobTask : INomadTask
|
||||
{
|
||||
private readonly INomadClient _nomadClient;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<NomadDeployJobTask> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for deploying a Nomad job.
|
||||
/// </summary>
|
||||
public sealed record DeployJobPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Job specification in HCL or JSON format.
|
||||
/// Either JobSpec or Job must be provided.
|
||||
/// </summary>
|
||||
public string? JobSpec { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Job ID when using JobSpec.
|
||||
/// </summary>
|
||||
public string? JobId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Pre-parsed job definition (alternative to JobSpec).
|
||||
/// </summary>
|
||||
public NomadJob? Job { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Nomad namespace.
|
||||
/// </summary>
|
||||
public string? Namespace { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Nomad region.
|
||||
/// </summary>
|
||||
public string? Region { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to wait for deployment to complete.
|
||||
/// </summary>
|
||||
public bool WaitForDeployment { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Timeout for deployment completion.
|
||||
/// </summary>
|
||||
public TimeSpan DeploymentTimeout { get; init; } = TimeSpan.FromMinutes(10);
|
||||
|
||||
/// <summary>
|
||||
/// Whether to run in detached mode (fire and forget).
|
||||
/// </summary>
|
||||
public bool Detach { get; init; } = false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new Nomad deploy job task handler.
|
||||
/// </summary>
|
||||
public NomadDeployJobTask(
|
||||
INomadClient nomadClient,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<NomadDeployJobTask> logger)
|
||||
{
|
||||
_nomadClient = nomadClient;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
|
||||
{
|
||||
var payload = JsonSerializer.Deserialize<DeployJobPayload>(task.Payload)
|
||||
?? throw new InvalidNomadPayloadException("nomad.deploy", "Failed to deserialize payload");
|
||||
|
||||
NomadJob nomadJob;
|
||||
|
||||
if (!string.IsNullOrEmpty(payload.JobSpec))
|
||||
{
|
||||
_logger.LogInformation("Parsing Nomad job spec");
|
||||
nomadJob = await _nomadClient.ParseJobAsync(payload.JobSpec, ct: ct);
|
||||
}
|
||||
else if (payload.Job is not null)
|
||||
{
|
||||
nomadJob = payload.Job;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new InvalidNomadPayloadException("nomad.deploy", "Either JobSpec or Job must be provided");
|
||||
}
|
||||
|
||||
var jobId = nomadJob.ID ?? payload.JobId ?? throw new InvalidNomadPayloadException("nomad.deploy", "Job ID is required");
|
||||
|
||||
_logger.LogInformation(
|
||||
"Deploying Nomad job {JobId} to {Region}/{Namespace}",
|
||||
jobId,
|
||||
payload.Region ?? "default",
|
||||
payload.Namespace ?? "default");
|
||||
|
||||
try
|
||||
{
|
||||
var registerResponse = await _nomadClient.RegisterJobAsync(
|
||||
nomadJob,
|
||||
payload.Namespace,
|
||||
payload.Region,
|
||||
ct);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Registered Nomad job {JobId}, evaluation ID: {EvalId}",
|
||||
jobId,
|
||||
registerResponse.EvalID);
|
||||
|
||||
if (payload.Detach)
|
||||
{
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["jobId"] = jobId,
|
||||
["evalId"] = registerResponse.EvalID ?? "",
|
||||
["status"] = "DETACHED"
|
||||
},
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
|
||||
if (!payload.WaitForDeployment)
|
||||
{
|
||||
// Wait for evaluation only
|
||||
var evaluation = await WaitForEvaluationAsync(
|
||||
registerResponse.EvalID ?? "",
|
||||
payload.Namespace,
|
||||
TimeSpan.FromMinutes(2),
|
||||
ct);
|
||||
|
||||
var evalSuccess = evaluation?.Status == "complete";
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = evalSuccess,
|
||||
Error = evalSuccess ? null : $"Evaluation failed: {evaluation?.StatusDescription ?? evaluation?.Status}",
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["jobId"] = jobId,
|
||||
["evalId"] = registerResponse.EvalID ?? "",
|
||||
["evalStatus"] = evaluation?.Status ?? "unknown",
|
||||
["status"] = evalSuccess ? "EVALUATED" : "EVAL_FAILED"
|
||||
},
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
|
||||
// Wait for deployment to complete
|
||||
var deployment = await WaitForDeploymentAsync(
|
||||
jobId,
|
||||
payload.Namespace,
|
||||
payload.DeploymentTimeout,
|
||||
ct);
|
||||
|
||||
var success = deployment?.Status == "successful";
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = success,
|
||||
Error = success ? null : $"Deployment failed: {deployment?.StatusDescription ?? deployment?.Status ?? "unknown"}",
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["jobId"] = jobId,
|
||||
["evalId"] = registerResponse.EvalID ?? "",
|
||||
["deploymentId"] = deployment?.ID ?? "",
|
||||
["deploymentStatus"] = deployment?.Status ?? "unknown",
|
||||
["status"] = success ? "DEPLOYED" : "DEPLOYMENT_FAILED"
|
||||
},
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
catch (NomadApiException ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to deploy Nomad job {JobId}", jobId);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to deploy job: {ex.Message}",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<NomadEvaluation?> WaitForEvaluationAsync(
|
||||
string evalId,
|
||||
string? ns,
|
||||
TimeSpan timeout,
|
||||
CancellationToken ct)
|
||||
{
|
||||
using var timeoutCts = new CancellationTokenSource(timeout);
|
||||
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
|
||||
|
||||
try
|
||||
{
|
||||
while (!linkedCts.IsCancellationRequested)
|
||||
{
|
||||
var evaluation = await _nomadClient.GetEvaluationAsync(evalId, ns, ct: linkedCts.Token);
|
||||
|
||||
if (evaluation?.Status is "complete" or "failed" or "canceled")
|
||||
{
|
||||
return evaluation;
|
||||
}
|
||||
|
||||
_logger.LogDebug("Evaluation {EvalId} status: {Status}", evalId, evaluation?.Status ?? "unknown");
|
||||
await Task.Delay(TimeSpan.FromSeconds(2), linkedCts.Token);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested)
|
||||
{
|
||||
_logger.LogWarning("Evaluation {EvalId} wait timed out after {Timeout}", evalId, timeout);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private async Task<NomadDeployment?> WaitForDeploymentAsync(
|
||||
string jobId,
|
||||
string? ns,
|
||||
TimeSpan timeout,
|
||||
CancellationToken ct)
|
||||
{
|
||||
using var timeoutCts = new CancellationTokenSource(timeout);
|
||||
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
|
||||
|
||||
NomadDeployment? deployment = null;
|
||||
|
||||
try
|
||||
{
|
||||
while (!linkedCts.IsCancellationRequested)
|
||||
{
|
||||
var deployments = await _nomadClient.GetJobDeploymentsAsync(jobId, ns, ct: linkedCts.Token);
|
||||
|
||||
deployment = deployments.FirstOrDefault();
|
||||
if (deployment is null)
|
||||
{
|
||||
await Task.Delay(TimeSpan.FromSeconds(2), linkedCts.Token);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (deployment.Status is "successful" or "failed" or "cancelled")
|
||||
{
|
||||
return deployment;
|
||||
}
|
||||
|
||||
_logger.LogDebug(
|
||||
"Deployment {DeploymentId} for job {JobId} status: {Status}",
|
||||
deployment.ID,
|
||||
jobId,
|
||||
deployment.Status);
|
||||
|
||||
await Task.Delay(TimeSpan.FromSeconds(5), linkedCts.Token);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested)
|
||||
{
|
||||
_logger.LogWarning("Deployment wait for job {JobId} timed out after {Timeout}", jobId, timeout);
|
||||
}
|
||||
|
||||
return deployment;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,202 @@
|
||||
using System.Globalization;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.Nomad.Client;
|
||||
|
||||
namespace StellaOps.Agent.Nomad.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for dispatching parameterized Nomad jobs.
|
||||
/// </summary>
|
||||
public sealed class NomadDispatchJobTask : INomadTask
|
||||
{
|
||||
private readonly INomadClient _nomadClient;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<NomadDispatchJobTask> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for dispatching a parameterized Nomad job.
|
||||
/// </summary>
|
||||
public sealed record DispatchJobPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Job ID to dispatch (must be a parameterized batch job).
|
||||
/// </summary>
|
||||
public required string JobId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Nomad namespace.
|
||||
/// </summary>
|
||||
public string? Namespace { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Nomad region.
|
||||
/// </summary>
|
||||
public string? Region { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Metadata to pass to the dispatched job.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, string>? Meta { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Payload data to pass to the dispatched job.
|
||||
/// </summary>
|
||||
public string? Payload { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to wait for the dispatched job to complete.
|
||||
/// </summary>
|
||||
public bool WaitForCompletion { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Timeout for job completion.
|
||||
/// </summary>
|
||||
public TimeSpan CompletionTimeout { get; init; } = TimeSpan.FromMinutes(30);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new Nomad dispatch job task handler.
|
||||
/// </summary>
|
||||
public NomadDispatchJobTask(
|
||||
INomadClient nomadClient,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<NomadDispatchJobTask> logger)
|
||||
{
|
||||
_nomadClient = nomadClient;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
|
||||
{
|
||||
var payload = JsonSerializer.Deserialize<DispatchJobPayload>(task.Payload)
|
||||
?? throw new InvalidNomadPayloadException("nomad.dispatch", "Failed to deserialize payload");
|
||||
|
||||
_logger.LogInformation(
|
||||
"Dispatching Nomad parameterized job {JobId}",
|
||||
payload.JobId);
|
||||
|
||||
try
|
||||
{
|
||||
var response = await _nomadClient.DispatchJobAsync(
|
||||
payload.JobId,
|
||||
payload.Meta,
|
||||
payload.Payload,
|
||||
payload.Namespace,
|
||||
payload.Region,
|
||||
ct);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Dispatched Nomad job {JobId}, dispatched job ID: {DispatchedJobId}, evaluation ID: {EvalId}",
|
||||
payload.JobId,
|
||||
response.DispatchedJobID,
|
||||
response.EvalID);
|
||||
|
||||
if (!payload.WaitForCompletion)
|
||||
{
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["jobId"] = payload.JobId,
|
||||
["dispatchedJobId"] = response.DispatchedJobID ?? "",
|
||||
["evalId"] = response.EvalID ?? "",
|
||||
["status"] = "DISPATCHED"
|
||||
},
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
|
||||
// Wait for the dispatched job to complete
|
||||
var dispatchedJobId = response.DispatchedJobID
|
||||
?? throw new NomadApiException("Dispatch response missing dispatched job ID");
|
||||
|
||||
var completed = await WaitForJobCompletionAsync(
|
||||
dispatchedJobId,
|
||||
payload.Namespace,
|
||||
payload.CompletionTimeout,
|
||||
ct);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = completed,
|
||||
Error = completed ? null : $"Dispatched job {dispatchedJobId} did not complete within timeout",
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["jobId"] = payload.JobId,
|
||||
["dispatchedJobId"] = dispatchedJobId,
|
||||
["evalId"] = response.EvalID ?? "",
|
||||
["status"] = completed ? "COMPLETED" : "TIMED_OUT"
|
||||
},
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
catch (NomadApiException ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to dispatch Nomad job {JobId}", payload.JobId);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to dispatch job: {ex.Message}",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<bool> WaitForJobCompletionAsync(
|
||||
string jobId,
|
||||
string? ns,
|
||||
TimeSpan timeout,
|
||||
CancellationToken ct)
|
||||
{
|
||||
using var timeoutCts = new CancellationTokenSource(timeout);
|
||||
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
|
||||
|
||||
try
|
||||
{
|
||||
while (!linkedCts.IsCancellationRequested)
|
||||
{
|
||||
var job = await _nomadClient.GetJobAsync(jobId, ns, ct: linkedCts.Token);
|
||||
|
||||
if (job is null)
|
||||
{
|
||||
_logger.LogWarning("Dispatched job {JobId} not found", jobId);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (job.Status == "dead")
|
||||
{
|
||||
// Check allocations to determine success
|
||||
var allocations = await _nomadClient.GetJobAllocationsAsync(jobId, ns, ct: linkedCts.Token);
|
||||
var allSucceeded = allocations.All(a =>
|
||||
a.ClientStatus == "complete" &&
|
||||
a.TaskStates?.Values.All(ts => !ts.Failed ?? true) == true);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Dispatched job {JobId} completed, success: {Success}",
|
||||
jobId,
|
||||
allSucceeded);
|
||||
|
||||
return allSucceeded;
|
||||
}
|
||||
|
||||
_logger.LogDebug("Dispatched job {JobId} status: {Status}", jobId, job.Status);
|
||||
await Task.Delay(TimeSpan.FromSeconds(5), linkedCts.Token);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested)
|
||||
{
|
||||
_logger.LogWarning("Dispatched job {JobId} completion wait timed out after {Timeout}", jobId, timeout);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,220 @@
|
||||
using System.Globalization;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.Nomad.Client;
|
||||
|
||||
namespace StellaOps.Agent.Nomad.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for checking Nomad job health.
|
||||
/// </summary>
|
||||
public sealed class NomadHealthCheckTask : INomadTask
|
||||
{
|
||||
private readonly INomadClient _nomadClient;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<NomadHealthCheckTask> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for checking Nomad job health.
|
||||
/// </summary>
|
||||
public sealed record HealthCheckPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Job ID to check health for.
|
||||
/// </summary>
|
||||
public required string JobId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Nomad namespace.
|
||||
/// </summary>
|
||||
public string? Namespace { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Nomad region.
|
||||
/// </summary>
|
||||
public string? Region { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Minimum number of healthy allocations required.
|
||||
/// </summary>
|
||||
public int MinHealthyAllocations { get; init; } = 1;
|
||||
|
||||
/// <summary>
|
||||
/// Whether to wait for health requirements to be met.
|
||||
/// </summary>
|
||||
public bool WaitForHealthy { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Timeout for health check.
|
||||
/// </summary>
|
||||
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(5);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new Nomad health check task handler.
|
||||
/// </summary>
|
||||
public NomadHealthCheckTask(
|
||||
INomadClient nomadClient,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<NomadHealthCheckTask> logger)
|
||||
{
|
||||
_nomadClient = nomadClient;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
|
||||
{
|
||||
var payload = JsonSerializer.Deserialize<HealthCheckPayload>(task.Payload)
|
||||
?? throw new InvalidNomadPayloadException("nomad.health", "Failed to deserialize payload");
|
||||
|
||||
_logger.LogInformation("Checking health of Nomad job {JobId}", payload.JobId);
|
||||
|
||||
try
|
||||
{
|
||||
if (!payload.WaitForHealthy)
|
||||
{
|
||||
// Just check current state
|
||||
return await CheckCurrentHealthAsync(task.Id, payload, ct);
|
||||
}
|
||||
|
||||
// Wait for health requirements to be met
|
||||
using var timeoutCts = new CancellationTokenSource(payload.Timeout);
|
||||
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token);
|
||||
|
||||
try
|
||||
{
|
||||
while (!linkedCts.IsCancellationRequested)
|
||||
{
|
||||
var (isHealthy, healthyCount, totalAllocations, runningAllocations) =
|
||||
await GetHealthStatusAsync(payload, linkedCts.Token);
|
||||
|
||||
if (isHealthy)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Nomad job {JobId} is healthy: {Healthy}/{MinRequired} healthy allocations",
|
||||
payload.JobId,
|
||||
healthyCount,
|
||||
payload.MinHealthyAllocations);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["jobId"] = payload.JobId,
|
||||
["healthyAllocations"] = healthyCount,
|
||||
["runningAllocations"] = runningAllocations,
|
||||
["totalAllocations"] = totalAllocations,
|
||||
["minRequired"] = payload.MinHealthyAllocations,
|
||||
["status"] = "HEALTHY"
|
||||
},
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
|
||||
_logger.LogDebug(
|
||||
"Nomad job {JobId} health check: {Healthy}/{MinRequired} healthy, waiting...",
|
||||
payload.JobId,
|
||||
healthyCount,
|
||||
payload.MinHealthyAllocations);
|
||||
|
||||
await Task.Delay(TimeSpan.FromSeconds(5), linkedCts.Token);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Nomad job {JobId} health check timed out after {Timeout}",
|
||||
payload.JobId,
|
||||
payload.Timeout);
|
||||
}
|
||||
|
||||
// Timeout - return current state
|
||||
var (_, finalHealthy, finalTotal, finalRunning) =
|
||||
await GetHealthStatusAsync(payload, ct);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Health check timed out after {payload.Timeout}",
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["jobId"] = payload.JobId,
|
||||
["healthyAllocations"] = finalHealthy,
|
||||
["runningAllocations"] = finalRunning,
|
||||
["totalAllocations"] = finalTotal,
|
||||
["minRequired"] = payload.MinHealthyAllocations,
|
||||
["status"] = "TIMED_OUT"
|
||||
},
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
catch (NomadApiException ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to check health of Nomad job {JobId}", payload.JobId);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Health check failed: {ex.Message}",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<AgentTaskResult> CheckCurrentHealthAsync(
|
||||
Guid taskId,
|
||||
HealthCheckPayload payload,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var (isHealthy, healthyCount, totalAllocations, runningAllocations) =
|
||||
await GetHealthStatusAsync(payload, ct);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = taskId,
|
||||
Success = isHealthy,
|
||||
Error = isHealthy ? null : $"Only {healthyCount} healthy allocations, requires {payload.MinHealthyAllocations}",
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["jobId"] = payload.JobId,
|
||||
["healthyAllocations"] = healthyCount,
|
||||
["runningAllocations"] = runningAllocations,
|
||||
["totalAllocations"] = totalAllocations,
|
||||
["minRequired"] = payload.MinHealthyAllocations,
|
||||
["status"] = isHealthy ? "HEALTHY" : "UNHEALTHY"
|
||||
},
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
|
||||
private async Task<(bool IsHealthy, int HealthyCount, int TotalAllocations, int RunningAllocations)>
|
||||
GetHealthStatusAsync(HealthCheckPayload payload, CancellationToken ct)
|
||||
{
|
||||
var allocations = await _nomadClient.GetJobAllocationsAsync(
|
||||
payload.JobId,
|
||||
payload.Namespace,
|
||||
payload.Region,
|
||||
ct);
|
||||
|
||||
var runningAllocations = allocations
|
||||
.Where(a => a.ClientStatus == "running")
|
||||
.ToList();
|
||||
|
||||
var healthyCount = runningAllocations
|
||||
.Count(a => a.DeploymentStatus?.Healthy == true);
|
||||
|
||||
return (
|
||||
healthyCount >= payload.MinHealthyAllocations,
|
||||
healthyCount,
|
||||
allocations.Count,
|
||||
runningAllocations.Count
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,186 @@
|
||||
using System.Globalization;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.Nomad.Client;
|
||||
|
||||
namespace StellaOps.Agent.Nomad.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for getting Nomad job status.
|
||||
/// </summary>
|
||||
public sealed class NomadJobStatusTask : INomadTask
|
||||
{
|
||||
private readonly INomadClient _nomadClient;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<NomadJobStatusTask> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for getting Nomad job status.
|
||||
/// </summary>
|
||||
public sealed record JobStatusPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Job ID to get status for.
|
||||
/// </summary>
|
||||
public required string JobId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Nomad namespace.
|
||||
/// </summary>
|
||||
public string? Namespace { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Nomad region.
|
||||
/// </summary>
|
||||
public string? Region { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to include allocation details.
|
||||
/// </summary>
|
||||
public bool IncludeAllocations { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Whether to include deployment details.
|
||||
/// </summary>
|
||||
public bool IncludeDeployments { get; init; } = false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new Nomad job status task handler.
|
||||
/// </summary>
|
||||
public NomadJobStatusTask(
|
||||
INomadClient nomadClient,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<NomadJobStatusTask> logger)
|
||||
{
|
||||
_nomadClient = nomadClient;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
|
||||
{
|
||||
var payload = JsonSerializer.Deserialize<JobStatusPayload>(task.Payload)
|
||||
?? throw new InvalidNomadPayloadException("nomad.status", "Failed to deserialize payload");
|
||||
|
||||
_logger.LogInformation("Getting status for Nomad job {JobId}", payload.JobId);
|
||||
|
||||
try
|
||||
{
|
||||
var job = await _nomadClient.GetJobAsync(
|
||||
payload.JobId,
|
||||
payload.Namespace,
|
||||
payload.Region,
|
||||
ct);
|
||||
|
||||
if (job is null)
|
||||
{
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Job '{payload.JobId}' not found",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
|
||||
var outputs = new Dictionary<string, object>
|
||||
{
|
||||
["jobId"] = job.ID ?? payload.JobId,
|
||||
["name"] = job.Name ?? "",
|
||||
["type"] = job.Type ?? "service",
|
||||
["status"] = job.Status ?? "unknown",
|
||||
["statusDescription"] = job.StatusDescription ?? "",
|
||||
["namespace"] = job.Namespace ?? "default",
|
||||
["region"] = job.Region ?? "",
|
||||
["priority"] = job.Priority ?? 50,
|
||||
["version"] = job.Version ?? 0,
|
||||
["stable"] = job.Stable ?? false,
|
||||
["datacenters"] = job.Datacenters ?? new List<string>()
|
||||
};
|
||||
|
||||
// Get task group info
|
||||
if (job.TaskGroups is not null)
|
||||
{
|
||||
var taskGroups = job.TaskGroups.Select(tg => new Dictionary<string, object>
|
||||
{
|
||||
["name"] = tg.Name ?? "",
|
||||
["count"] = tg.Count ?? 1,
|
||||
["taskCount"] = tg.Tasks?.Count ?? 0
|
||||
}).ToList();
|
||||
|
||||
outputs["taskGroups"] = taskGroups;
|
||||
}
|
||||
|
||||
// Include allocations if requested
|
||||
if (payload.IncludeAllocations)
|
||||
{
|
||||
var allocations = await _nomadClient.GetJobAllocationsAsync(
|
||||
payload.JobId,
|
||||
payload.Namespace,
|
||||
payload.Region,
|
||||
ct);
|
||||
|
||||
var allocSummary = allocations.GroupBy(a => a.ClientStatus ?? "unknown")
|
||||
.ToDictionary(g => g.Key, g => g.Count());
|
||||
|
||||
outputs["allocations"] = new Dictionary<string, object>
|
||||
{
|
||||
["total"] = allocations.Count,
|
||||
["byStatus"] = allocSummary,
|
||||
["running"] = allocations.Count(a => a.ClientStatus == "running"),
|
||||
["complete"] = allocations.Count(a => a.ClientStatus == "complete"),
|
||||
["failed"] = allocations.Count(a => a.ClientStatus == "failed")
|
||||
};
|
||||
}
|
||||
|
||||
// Include deployments if requested
|
||||
if (payload.IncludeDeployments)
|
||||
{
|
||||
var deployments = await _nomadClient.GetJobDeploymentsAsync(
|
||||
payload.JobId,
|
||||
payload.Namespace,
|
||||
payload.Region,
|
||||
ct);
|
||||
|
||||
var latestDeployment = deployments.FirstOrDefault();
|
||||
if (latestDeployment is not null)
|
||||
{
|
||||
outputs["latestDeployment"] = new Dictionary<string, object>
|
||||
{
|
||||
["id"] = latestDeployment.ID ?? "",
|
||||
["status"] = latestDeployment.Status ?? "unknown",
|
||||
["statusDescription"] = latestDeployment.StatusDescription ?? ""
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
_logger.LogInformation(
|
||||
"Nomad job {JobId} status: {Status}",
|
||||
payload.JobId,
|
||||
job.Status);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = outputs,
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
catch (NomadApiException ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to get status for Nomad job {JobId}", payload.JobId);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to get job status: {ex.Message}",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,128 @@
|
||||
using System.Globalization;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.Nomad.Client;
|
||||
|
||||
namespace StellaOps.Agent.Nomad.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for scaling Nomad job task groups.
|
||||
/// </summary>
|
||||
public sealed class NomadScaleJobTask : INomadTask
|
||||
{
|
||||
private readonly INomadClient _nomadClient;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<NomadScaleJobTask> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for scaling a Nomad job task group.
|
||||
/// </summary>
|
||||
public sealed record ScaleJobPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Job ID to scale.
|
||||
/// </summary>
|
||||
public required string JobId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Task group name to scale.
|
||||
/// </summary>
|
||||
public required string TaskGroup { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Desired count.
|
||||
/// </summary>
|
||||
public required int Count { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Nomad namespace.
|
||||
/// </summary>
|
||||
public string? Namespace { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Nomad region.
|
||||
/// </summary>
|
||||
public string? Region { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Reason for scaling.
|
||||
/// </summary>
|
||||
public string? Reason { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new Nomad scale job task handler.
|
||||
/// </summary>
|
||||
public NomadScaleJobTask(
|
||||
INomadClient nomadClient,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<NomadScaleJobTask> logger)
|
||||
{
|
||||
_nomadClient = nomadClient;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
|
||||
{
|
||||
var payload = JsonSerializer.Deserialize<ScaleJobPayload>(task.Payload)
|
||||
?? throw new InvalidNomadPayloadException("nomad.scale", "Failed to deserialize payload");
|
||||
|
||||
_logger.LogInformation(
|
||||
"Scaling Nomad job {JobId} task group {TaskGroup} to {Count}",
|
||||
payload.JobId,
|
||||
payload.TaskGroup,
|
||||
payload.Count);
|
||||
|
||||
try
|
||||
{
|
||||
var response = await _nomadClient.ScaleJobAsync(
|
||||
payload.JobId,
|
||||
payload.TaskGroup,
|
||||
payload.Count,
|
||||
payload.Reason ?? $"Scaled by Stella Ops (task: {task.Id})",
|
||||
payload.Namespace,
|
||||
payload.Region,
|
||||
ct);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Scaled Nomad job {JobId} task group {TaskGroup} to {Count}, evaluation ID: {EvalId}",
|
||||
payload.JobId,
|
||||
payload.TaskGroup,
|
||||
payload.Count,
|
||||
response.EvalID);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["jobId"] = payload.JobId,
|
||||
["taskGroup"] = payload.TaskGroup,
|
||||
["count"] = payload.Count,
|
||||
["evalId"] = response.EvalID ?? ""
|
||||
},
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
catch (NomadApiException ex)
|
||||
{
|
||||
_logger.LogError(
|
||||
ex,
|
||||
"Failed to scale Nomad job {JobId} task group {TaskGroup}",
|
||||
payload.JobId,
|
||||
payload.TaskGroup);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to scale job: {ex.Message}",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,115 @@
|
||||
using System.Globalization;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.Nomad.Client;
|
||||
|
||||
namespace StellaOps.Agent.Nomad.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for stopping Nomad jobs.
|
||||
/// </summary>
|
||||
public sealed class NomadStopJobTask : INomadTask
|
||||
{
|
||||
private readonly INomadClient _nomadClient;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<NomadStopJobTask> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for stopping a Nomad job.
|
||||
/// </summary>
|
||||
public sealed record StopJobPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Job ID to stop.
|
||||
/// </summary>
|
||||
public required string JobId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Nomad namespace.
|
||||
/// </summary>
|
||||
public string? Namespace { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Nomad region.
|
||||
/// </summary>
|
||||
public string? Region { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to purge the job (completely remove it).
|
||||
/// </summary>
|
||||
public bool Purge { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Whether to stop the job globally (all regions).
|
||||
/// </summary>
|
||||
public bool Global { get; init; } = false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new Nomad stop job task handler.
|
||||
/// </summary>
|
||||
public NomadStopJobTask(
|
||||
INomadClient nomadClient,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<NomadStopJobTask> logger)
|
||||
{
|
||||
_nomadClient = nomadClient;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
|
||||
{
|
||||
var payload = JsonSerializer.Deserialize<StopJobPayload>(task.Payload)
|
||||
?? throw new InvalidNomadPayloadException("nomad.stop", "Failed to deserialize payload");
|
||||
|
||||
_logger.LogInformation(
|
||||
"Stopping Nomad job {JobId} (purge: {Purge}, global: {Global})",
|
||||
payload.JobId,
|
||||
payload.Purge,
|
||||
payload.Global);
|
||||
|
||||
try
|
||||
{
|
||||
var response = await _nomadClient.StopJobAsync(
|
||||
payload.JobId,
|
||||
payload.Purge,
|
||||
payload.Global,
|
||||
payload.Namespace,
|
||||
payload.Region,
|
||||
ct);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Stopped Nomad job {JobId}, evaluation ID: {EvalId}",
|
||||
payload.JobId,
|
||||
response.EvalID);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["jobId"] = payload.JobId,
|
||||
["evalId"] = response.EvalID ?? "",
|
||||
["purged"] = payload.Purge
|
||||
},
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
catch (NomadApiException ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to stop Nomad job {JobId}", payload.JobId);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Failed to stop job: {ex.Message}",
|
||||
CompletedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
using StellaOps.Agent.Core.Exceptions;
|
||||
|
||||
namespace StellaOps.Agent.Ssh.Exceptions;
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when an SSH task payload is invalid.
|
||||
/// </summary>
|
||||
public sealed class InvalidSshPayloadException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// The task type with invalid payload.
|
||||
/// </summary>
|
||||
public string TaskType { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public InvalidSshPayloadException(string taskType)
|
||||
: base($"Invalid payload for task type '{taskType}'")
|
||||
{
|
||||
TaskType = taskType;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when SSH connection fails.
|
||||
/// </summary>
|
||||
public sealed class SshConnectionException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// The target host.
|
||||
/// </summary>
|
||||
public string Host { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The SSH port.
|
||||
/// </summary>
|
||||
public int Port { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public SshConnectionException(string host, int port, string message)
|
||||
: base($"Failed to connect to {host}:{port}: {message}")
|
||||
{
|
||||
Host = host;
|
||||
Port = port;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when SSH command execution fails.
|
||||
/// </summary>
|
||||
public sealed class SshCommandException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// The command that failed.
|
||||
/// </summary>
|
||||
public string Command { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The exit code.
|
||||
/// </summary>
|
||||
public int ExitCode { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public SshCommandException(string command, int exitCode, string error)
|
||||
: base($"SSH command failed with exit code {exitCode}: {error}")
|
||||
{
|
||||
Command = command;
|
||||
ExitCode = exitCode;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when file transfer fails.
|
||||
/// </summary>
|
||||
public sealed class SshFileTransferException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// The local path.
|
||||
/// </summary>
|
||||
public string LocalPath { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The remote path.
|
||||
/// </summary>
|
||||
public string RemotePath { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public SshFileTransferException(string localPath, string remotePath, string message)
|
||||
: base($"File transfer failed between '{localPath}' and '{remotePath}': {message}")
|
||||
{
|
||||
LocalPath = localPath;
|
||||
RemotePath = remotePath;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Capability;
|
||||
using StellaOps.Agent.Core.Exceptions;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.Ssh.Tasks;
|
||||
|
||||
namespace StellaOps.Agent.Ssh;
|
||||
|
||||
/// <summary>
|
||||
/// SSH capability for remote command execution and file transfer.
|
||||
/// </summary>
|
||||
public sealed class SshCapability : IAgentCapability
|
||||
{
|
||||
private readonly SshConnectionPool _connectionPool;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<SshCapability> _logger;
|
||||
private readonly Dictionary<string, ISshTask> _taskHandlers;
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "ssh";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Version => "1.0.0";
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> SupportedTaskTypes => new[]
|
||||
{
|
||||
"ssh.execute",
|
||||
"ssh.upload",
|
||||
"ssh.download",
|
||||
"ssh.tunnel"
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public SshCapability(
|
||||
SshConnectionPool connectionPool,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<SshCapability> logger)
|
||||
{
|
||||
_connectionPool = connectionPool;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
|
||||
_taskHandlers = new Dictionary<string, ISshTask>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
["ssh.execute"] = new SshExecuteTask(_connectionPool, logger),
|
||||
["ssh.upload"] = new SshUploadTask(_connectionPool, logger),
|
||||
["ssh.download"] = new SshDownloadTask(_connectionPool, logger),
|
||||
["ssh.tunnel"] = new SshTunnelTask(_connectionPool, logger)
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<bool> InitializeAsync(CancellationToken ct = default)
|
||||
{
|
||||
// SSH capability is always available if SSH.NET is loaded
|
||||
_logger.LogInformation("SSH capability initialized");
|
||||
return Task.FromResult(true);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
|
||||
{
|
||||
if (!_taskHandlers.TryGetValue(task.TaskType, out var handler))
|
||||
{
|
||||
throw new UnsupportedTaskTypeException(task.TaskType);
|
||||
}
|
||||
|
||||
_logger.LogDebug("Executing task {TaskType} with ID {TaskId}", task.TaskType, task.Id);
|
||||
|
||||
return await handler.ExecuteAsync(task, _timeProvider, ct);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<CapabilityHealthStatus> CheckHealthAsync(CancellationToken ct = default)
|
||||
{
|
||||
// SSH capability is always available (no daemon to check)
|
||||
return Task.FromResult(new CapabilityHealthStatus(true, "SSH capability available"));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,253 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Renci.SshNet;
|
||||
|
||||
namespace StellaOps.Agent.Ssh;
|
||||
|
||||
/// <summary>
|
||||
/// Connection pool for SSH connections.
|
||||
/// </summary>
|
||||
public sealed class SshConnectionPool : IAsyncDisposable
|
||||
{
|
||||
private readonly ConcurrentDictionary<string, PooledConnection> _connections = new();
|
||||
private readonly TimeSpan _connectionTimeout;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<SshConnectionPool> _logger;
|
||||
private readonly Timer _cleanupTimer;
|
||||
private bool _disposed;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance with default 10-minute timeout.
|
||||
/// </summary>
|
||||
public SshConnectionPool(TimeProvider timeProvider, ILogger<SshConnectionPool> logger)
|
||||
: this(TimeSpan.FromMinutes(10), timeProvider, logger)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance with custom timeout.
|
||||
/// </summary>
|
||||
public SshConnectionPool(TimeSpan connectionTimeout, TimeProvider timeProvider, ILogger<SshConnectionPool> logger)
|
||||
{
|
||||
_connectionTimeout = connectionTimeout;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
_cleanupTimer = new Timer(CleanupExpiredConnections, null, TimeSpan.FromMinutes(1), TimeSpan.FromMinutes(1));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a connection from the pool or creates a new one.
|
||||
/// </summary>
|
||||
public async Task<SshClient> GetConnectionAsync(
|
||||
SshConnectionInfo connectionInfo,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
|
||||
var key = connectionInfo.GetConnectionKey();
|
||||
|
||||
if (_connections.TryGetValue(key, out var pooled) && pooled.Client.IsConnected)
|
||||
{
|
||||
pooled.LastUsed = _timeProvider.GetUtcNow();
|
||||
return pooled.Client;
|
||||
}
|
||||
|
||||
var client = await CreateConnectionAsync(connectionInfo, ct);
|
||||
_connections[key] = new PooledConnection(client, _timeProvider.GetUtcNow());
|
||||
|
||||
return client;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets an SFTP client for file operations.
|
||||
/// </summary>
|
||||
public async Task<SftpClient> GetSftpClientAsync(
|
||||
SshConnectionInfo connectionInfo,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
|
||||
var connInfo = BuildConnectionInfo(connectionInfo);
|
||||
var sftp = new SftpClient(connInfo);
|
||||
|
||||
await Task.Run(() => sftp.Connect(), ct);
|
||||
|
||||
_logger.LogDebug(
|
||||
"SFTP connection established to {User}@{Host}:{Port}",
|
||||
connectionInfo.Username,
|
||||
connectionInfo.Host,
|
||||
connectionInfo.Port);
|
||||
|
||||
return sftp;
|
||||
}
|
||||
|
||||
private async Task<SshClient> CreateConnectionAsync(
|
||||
SshConnectionInfo info,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var connInfo = BuildConnectionInfo(info);
|
||||
var client = new SshClient(connInfo);
|
||||
|
||||
await Task.Run(() => client.Connect(), ct);
|
||||
|
||||
_logger.LogDebug(
|
||||
"SSH connection established to {User}@{Host}:{Port}",
|
||||
info.Username,
|
||||
info.Host,
|
||||
info.Port);
|
||||
|
||||
return client;
|
||||
}
|
||||
|
||||
private static ConnectionInfo BuildConnectionInfo(SshConnectionInfo info)
|
||||
{
|
||||
var authMethods = new List<AuthenticationMethod>();
|
||||
|
||||
// Private key authentication
|
||||
if (!string.IsNullOrEmpty(info.PrivateKey))
|
||||
{
|
||||
var keyBytes = Encoding.UTF8.GetBytes(info.PrivateKey);
|
||||
var keyFile = string.IsNullOrEmpty(info.PrivateKeyPassphrase)
|
||||
? new PrivateKeyFile(new MemoryStream(keyBytes))
|
||||
: new PrivateKeyFile(new MemoryStream(keyBytes), info.PrivateKeyPassphrase);
|
||||
|
||||
authMethods.Add(new PrivateKeyAuthenticationMethod(info.Username, keyFile));
|
||||
}
|
||||
|
||||
// Password authentication
|
||||
if (!string.IsNullOrEmpty(info.Password))
|
||||
{
|
||||
authMethods.Add(new PasswordAuthenticationMethod(info.Username, info.Password));
|
||||
}
|
||||
|
||||
if (authMethods.Count == 0)
|
||||
{
|
||||
throw new InvalidOperationException("No SSH authentication method configured");
|
||||
}
|
||||
|
||||
return new ConnectionInfo(
|
||||
info.Host,
|
||||
info.Port,
|
||||
info.Username,
|
||||
authMethods.ToArray());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Releases a connection back to the pool.
|
||||
/// </summary>
|
||||
public void ReleaseConnection(string connectionKey)
|
||||
{
|
||||
if (_connections.TryGetValue(connectionKey, out var pooled))
|
||||
{
|
||||
pooled.LastUsed = _timeProvider.GetUtcNow();
|
||||
}
|
||||
}
|
||||
|
||||
private void CleanupExpiredConnections(object? state)
|
||||
{
|
||||
if (_disposed)
|
||||
return;
|
||||
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var expired = _connections
|
||||
.Where(kv => now - kv.Value.LastUsed > _connectionTimeout)
|
||||
.ToList();
|
||||
|
||||
foreach (var (key, pooled) in expired)
|
||||
{
|
||||
if (_connections.TryRemove(key, out _))
|
||||
{
|
||||
try
|
||||
{
|
||||
pooled.Client.Disconnect();
|
||||
pooled.Client.Dispose();
|
||||
_logger.LogDebug("Closed expired SSH connection: {Key}", key);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Error closing SSH connection: {Key}", key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
if (_disposed)
|
||||
return;
|
||||
|
||||
_disposed = true;
|
||||
|
||||
await _cleanupTimer.DisposeAsync();
|
||||
|
||||
foreach (var (_, pooled) in _connections)
|
||||
{
|
||||
try
|
||||
{
|
||||
pooled.Client.Disconnect();
|
||||
pooled.Client.Dispose();
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Ignore errors during disposal
|
||||
}
|
||||
}
|
||||
|
||||
_connections.Clear();
|
||||
}
|
||||
|
||||
private sealed class PooledConnection
|
||||
{
|
||||
public SshClient Client { get; }
|
||||
public DateTimeOffset LastUsed { get; set; }
|
||||
|
||||
public PooledConnection(SshClient client, DateTimeOffset lastUsed)
|
||||
{
|
||||
Client = client;
|
||||
LastUsed = lastUsed;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// SSH connection information.
|
||||
/// </summary>
|
||||
public sealed record SshConnectionInfo
|
||||
{
|
||||
/// <summary>
|
||||
/// Target host.
|
||||
/// </summary>
|
||||
public required string Host { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// SSH port.
|
||||
/// </summary>
|
||||
public int Port { get; init; } = 22;
|
||||
|
||||
/// <summary>
|
||||
/// Username.
|
||||
/// </summary>
|
||||
public required string Username { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Password for password authentication.
|
||||
/// </summary>
|
||||
public string? Password { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Private key content for key authentication.
|
||||
/// </summary>
|
||||
public string? PrivateKey { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Passphrase for encrypted private key.
|
||||
/// </summary>
|
||||
public string? PrivateKeyPassphrase { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets a unique key for connection pooling.
|
||||
/// </summary>
|
||||
public string GetConnectionKey() => $"{Username}@{Host}:{Port}";
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<RootNamespace>StellaOps.Agent.Ssh</RootNamespace>
|
||||
<AssemblyName>StellaOps.Agent.Ssh</AssemblyName>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
|
||||
<PackageReference Include="SSH.NET" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\StellaOps.Agent.Core\StellaOps.Agent.Core.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,14 @@
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.Ssh.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Interface for SSH task handlers.
|
||||
/// </summary>
|
||||
public interface ISshTask
|
||||
{
|
||||
/// <summary>
|
||||
/// Executes the SSH task.
|
||||
/// </summary>
|
||||
Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default);
|
||||
}
|
||||
@@ -0,0 +1,156 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Renci.SshNet;
|
||||
using Renci.SshNet.Common;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.Ssh.Exceptions;
|
||||
|
||||
namespace StellaOps.Agent.Ssh.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for downloading files via SFTP.
|
||||
/// </summary>
|
||||
public sealed class SshDownloadTask : ISshTask
|
||||
{
|
||||
private readonly SshConnectionPool _connectionPool;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for ssh.download task.
|
||||
/// </summary>
|
||||
public sealed record DownloadPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Target host.
|
||||
/// </summary>
|
||||
public required string Host { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// SSH port.
|
||||
/// </summary>
|
||||
public int Port { get; init; } = 22;
|
||||
|
||||
/// <summary>
|
||||
/// Username.
|
||||
/// </summary>
|
||||
public required string Username { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Remote file path.
|
||||
/// </summary>
|
||||
public required string RemotePath { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Local destination path.
|
||||
/// </summary>
|
||||
public required string LocalPath { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Create parent directories if needed.
|
||||
/// </summary>
|
||||
public bool CreateDirectory { get; init; } = true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public SshDownloadTask(SshConnectionPool connectionPool, ILogger logger)
|
||||
{
|
||||
_connectionPool = connectionPool;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
var payload = JsonSerializer.Deserialize<DownloadPayload>(task.Payload)
|
||||
?? throw new InvalidSshPayloadException("ssh.download");
|
||||
|
||||
var connectionInfo = new SshConnectionInfo
|
||||
{
|
||||
Host = payload.Host,
|
||||
Port = payload.Port,
|
||||
Username = payload.Username,
|
||||
Password = task.Credentials.GetValueOrDefault("ssh.password"),
|
||||
PrivateKey = task.Credentials.GetValueOrDefault("ssh.privateKey"),
|
||||
PrivateKeyPassphrase = task.Credentials.GetValueOrDefault("ssh.passphrase")
|
||||
};
|
||||
|
||||
_logger.LogInformation(
|
||||
"Downloading {User}@{Host}:{Remote} to {Local}",
|
||||
payload.Username,
|
||||
payload.Host,
|
||||
payload.RemotePath,
|
||||
payload.LocalPath);
|
||||
|
||||
try
|
||||
{
|
||||
using var sftp = await _connectionPool.GetSftpClientAsync(connectionInfo, ct);
|
||||
|
||||
// Create local directory if needed
|
||||
if (payload.CreateDirectory)
|
||||
{
|
||||
var localDir = Path.GetDirectoryName(payload.LocalPath);
|
||||
if (!string.IsNullOrEmpty(localDir))
|
||||
{
|
||||
Directory.CreateDirectory(localDir);
|
||||
}
|
||||
}
|
||||
|
||||
// Get remote file attributes
|
||||
var remoteAttributes = sftp.GetAttributes(payload.RemotePath);
|
||||
|
||||
// Download file
|
||||
await using var localFile = File.Create(payload.LocalPath);
|
||||
await Task.Run(() => sftp.DownloadFile(payload.RemotePath, localFile), ct);
|
||||
|
||||
sftp.Disconnect();
|
||||
|
||||
_logger.LogInformation(
|
||||
"Downloaded {Size} bytes to {Local}",
|
||||
remoteAttributes.Size,
|
||||
payload.LocalPath);
|
||||
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["localPath"] = payload.LocalPath,
|
||||
["size"] = remoteAttributes.Size
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (SftpPathNotFoundException)
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Remote file not found: {payload.RemotePath}",
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to download file from {Host}", payload.Host);
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = ex.Message,
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,199 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Renci.SshNet;
|
||||
using Renci.SshNet.Common;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.Ssh.Exceptions;
|
||||
|
||||
namespace StellaOps.Agent.Ssh.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for executing remote commands via SSH.
|
||||
/// </summary>
|
||||
public sealed class SshExecuteTask : ISshTask
|
||||
{
|
||||
private readonly SshConnectionPool _connectionPool;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for ssh.execute task.
|
||||
/// </summary>
|
||||
public sealed record ExecutePayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Target host.
|
||||
/// </summary>
|
||||
public required string Host { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// SSH port.
|
||||
/// </summary>
|
||||
public int Port { get; init; } = 22;
|
||||
|
||||
/// <summary>
|
||||
/// Username.
|
||||
/// </summary>
|
||||
public required string Username { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Command to execute.
|
||||
/// </summary>
|
||||
public required string Command { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Environment variables.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, string>? Environment { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Working directory.
|
||||
/// </summary>
|
||||
public string? WorkingDirectory { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Command timeout.
|
||||
/// </summary>
|
||||
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(10);
|
||||
|
||||
/// <summary>
|
||||
/// Combine stdout and stderr in output.
|
||||
/// </summary>
|
||||
public bool CombineOutput { get; init; } = true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public SshExecuteTask(SshConnectionPool connectionPool, ILogger logger)
|
||||
{
|
||||
_connectionPool = connectionPool;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
var payload = JsonSerializer.Deserialize<ExecutePayload>(task.Payload)
|
||||
?? throw new InvalidSshPayloadException("ssh.execute");
|
||||
|
||||
var connectionInfo = new SshConnectionInfo
|
||||
{
|
||||
Host = payload.Host,
|
||||
Port = payload.Port,
|
||||
Username = payload.Username,
|
||||
Password = task.Credentials.GetValueOrDefault("ssh.password"),
|
||||
PrivateKey = task.Credentials.GetValueOrDefault("ssh.privateKey"),
|
||||
PrivateKeyPassphrase = task.Credentials.GetValueOrDefault("ssh.passphrase")
|
||||
};
|
||||
|
||||
_logger.LogInformation(
|
||||
"Executing SSH command on {User}@{Host}",
|
||||
payload.Username,
|
||||
payload.Host);
|
||||
|
||||
try
|
||||
{
|
||||
var client = await _connectionPool.GetConnectionAsync(connectionInfo, ct);
|
||||
|
||||
// Build command with environment and working directory
|
||||
var fullCommand = BuildCommand(payload);
|
||||
|
||||
using var command = client.CreateCommand(fullCommand);
|
||||
command.CommandTimeout = payload.Timeout;
|
||||
|
||||
var asyncResult = command.BeginExecute();
|
||||
|
||||
// Wait for completion with cancellation support
|
||||
while (!asyncResult.IsCompleted)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
await Task.Delay(100, ct);
|
||||
}
|
||||
|
||||
var result = command.EndExecute(asyncResult);
|
||||
|
||||
int exitCode = command.ExitStatus ?? -1;
|
||||
string stdout = result ?? string.Empty;
|
||||
string stderr = command.Error ?? string.Empty;
|
||||
|
||||
_logger.LogInformation(
|
||||
"SSH command completed with exit code {ExitCode}",
|
||||
exitCode);
|
||||
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = exitCode == 0,
|
||||
Error = exitCode != 0 ? stderr : null,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["exitCode"] = exitCode,
|
||||
["stdout"] = stdout,
|
||||
["stderr"] = stderr,
|
||||
["output"] = payload.CombineOutput ? $"{stdout}\n{stderr}".Trim() : stdout
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (SshException ex)
|
||||
{
|
||||
_logger.LogError(ex, "SSH command failed on {Host}", payload.Host);
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"SSH error: {ex.Message}",
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to execute SSH command on {Host}", payload.Host);
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = ex.Message,
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private static string BuildCommand(ExecutePayload payload)
|
||||
{
|
||||
var parts = new List<string>();
|
||||
|
||||
// Set environment variables
|
||||
if (payload.Environment is not null)
|
||||
{
|
||||
foreach (var (key, value) in payload.Environment)
|
||||
{
|
||||
parts.Add($"export {key}='{EscapeShellString(value)}'");
|
||||
}
|
||||
}
|
||||
|
||||
// Change to working directory
|
||||
if (!string.IsNullOrEmpty(payload.WorkingDirectory))
|
||||
{
|
||||
parts.Add($"cd '{EscapeShellString(payload.WorkingDirectory)}'");
|
||||
}
|
||||
|
||||
parts.Add(payload.Command);
|
||||
|
||||
return string.Join(" && ", parts);
|
||||
}
|
||||
|
||||
private static string EscapeShellString(string value)
|
||||
{
|
||||
return value.Replace("'", "'\"'\"'");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,168 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Renci.SshNet;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.Ssh.Exceptions;
|
||||
|
||||
namespace StellaOps.Agent.Ssh.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for creating SSH tunnels (port forwarding).
|
||||
/// </summary>
|
||||
public sealed class SshTunnelTask : ISshTask
|
||||
{
|
||||
private readonly SshConnectionPool _connectionPool;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for ssh.tunnel task.
|
||||
/// </summary>
|
||||
public sealed record TunnelPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Target host for SSH connection.
|
||||
/// </summary>
|
||||
public required string Host { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// SSH port.
|
||||
/// </summary>
|
||||
public int Port { get; init; } = 22;
|
||||
|
||||
/// <summary>
|
||||
/// Username.
|
||||
/// </summary>
|
||||
public required string Username { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Local port to bind.
|
||||
/// </summary>
|
||||
public required int LocalPort { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Remote host to forward to.
|
||||
/// </summary>
|
||||
public required string RemoteHost { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Remote port to forward to.
|
||||
/// </summary>
|
||||
public required int RemotePort { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Duration to keep tunnel open.
|
||||
/// </summary>
|
||||
public TimeSpan Duration { get; init; } = TimeSpan.FromMinutes(30);
|
||||
|
||||
/// <summary>
|
||||
/// Local bind address (default: 127.0.0.1).
|
||||
/// </summary>
|
||||
public string LocalHost { get; init; } = "127.0.0.1";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public SshTunnelTask(SshConnectionPool connectionPool, ILogger logger)
|
||||
{
|
||||
_connectionPool = connectionPool;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
var payload = JsonSerializer.Deserialize<TunnelPayload>(task.Payload)
|
||||
?? throw new InvalidSshPayloadException("ssh.tunnel");
|
||||
|
||||
var connectionInfo = new SshConnectionInfo
|
||||
{
|
||||
Host = payload.Host,
|
||||
Port = payload.Port,
|
||||
Username = payload.Username,
|
||||
Password = task.Credentials.GetValueOrDefault("ssh.password"),
|
||||
PrivateKey = task.Credentials.GetValueOrDefault("ssh.privateKey"),
|
||||
PrivateKeyPassphrase = task.Credentials.GetValueOrDefault("ssh.passphrase")
|
||||
};
|
||||
|
||||
_logger.LogInformation(
|
||||
"Creating SSH tunnel: {LocalHost}:{LocalPort} -> {User}@{Host} -> {RemoteHost}:{RemotePort}",
|
||||
payload.LocalHost,
|
||||
payload.LocalPort,
|
||||
payload.Username,
|
||||
payload.Host,
|
||||
payload.RemoteHost,
|
||||
payload.RemotePort);
|
||||
|
||||
try
|
||||
{
|
||||
var client = await _connectionPool.GetConnectionAsync(connectionInfo, ct);
|
||||
|
||||
var tunnel = new ForwardedPortLocal(
|
||||
payload.LocalHost,
|
||||
(uint)payload.LocalPort,
|
||||
payload.RemoteHost,
|
||||
(uint)payload.RemotePort);
|
||||
|
||||
client.AddForwardedPort(tunnel);
|
||||
tunnel.Start();
|
||||
|
||||
_logger.LogInformation(
|
||||
"SSH tunnel established: {LocalHost}:{LocalPort} -> {RemoteHost}:{RemotePort}",
|
||||
payload.LocalHost,
|
||||
payload.LocalPort,
|
||||
payload.RemoteHost,
|
||||
payload.RemotePort);
|
||||
|
||||
// Keep tunnel open for specified duration
|
||||
using var durationCts = new CancellationTokenSource(payload.Duration);
|
||||
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, durationCts.Token);
|
||||
|
||||
try
|
||||
{
|
||||
await Task.Delay(payload.Duration, linkedCts.Token);
|
||||
}
|
||||
catch (OperationCanceledException) when (durationCts.IsCancellationRequested)
|
||||
{
|
||||
// Duration expired, normal completion
|
||||
}
|
||||
|
||||
tunnel.Stop();
|
||||
client.RemoveForwardedPort(tunnel);
|
||||
|
||||
_logger.LogInformation("SSH tunnel closed after {Duration}", payload.Duration);
|
||||
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["localHost"] = payload.LocalHost,
|
||||
["localPort"] = payload.LocalPort,
|
||||
["remoteHost"] = payload.RemoteHost,
|
||||
["remotePort"] = payload.RemotePort,
|
||||
["duration"] = payload.Duration.ToString()
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to create SSH tunnel to {Host}", payload.Host);
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = ex.Message,
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,188 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Renci.SshNet;
|
||||
using Renci.SshNet.Common;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.Ssh.Exceptions;
|
||||
|
||||
namespace StellaOps.Agent.Ssh.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task handler for uploading files via SFTP.
|
||||
/// </summary>
|
||||
public sealed class SshUploadTask : ISshTask
|
||||
{
|
||||
private readonly SshConnectionPool _connectionPool;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for ssh.upload task.
|
||||
/// </summary>
|
||||
public sealed record UploadPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Target host.
|
||||
/// </summary>
|
||||
public required string Host { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// SSH port.
|
||||
/// </summary>
|
||||
public int Port { get; init; } = 22;
|
||||
|
||||
/// <summary>
|
||||
/// Username.
|
||||
/// </summary>
|
||||
public required string Username { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Local file path.
|
||||
/// </summary>
|
||||
public required string LocalPath { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Remote destination path.
|
||||
/// </summary>
|
||||
public required string RemotePath { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Create parent directories if needed.
|
||||
/// </summary>
|
||||
public bool CreateDirectory { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// File permissions (octal).
|
||||
/// </summary>
|
||||
public int Permissions { get; init; } = 420; // 0644
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public SshUploadTask(SshConnectionPool connectionPool, ILogger logger)
|
||||
{
|
||||
_connectionPool = connectionPool;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
var payload = JsonSerializer.Deserialize<UploadPayload>(task.Payload)
|
||||
?? throw new InvalidSshPayloadException("ssh.upload");
|
||||
|
||||
var connectionInfo = new SshConnectionInfo
|
||||
{
|
||||
Host = payload.Host,
|
||||
Port = payload.Port,
|
||||
Username = payload.Username,
|
||||
Password = task.Credentials.GetValueOrDefault("ssh.password"),
|
||||
PrivateKey = task.Credentials.GetValueOrDefault("ssh.privateKey"),
|
||||
PrivateKeyPassphrase = task.Credentials.GetValueOrDefault("ssh.passphrase")
|
||||
};
|
||||
|
||||
_logger.LogInformation(
|
||||
"Uploading {Local} to {User}@{Host}:{Remote}",
|
||||
payload.LocalPath,
|
||||
payload.Username,
|
||||
payload.Host,
|
||||
payload.RemotePath);
|
||||
|
||||
try
|
||||
{
|
||||
using var sftp = await _connectionPool.GetSftpClientAsync(connectionInfo, ct);
|
||||
|
||||
// Create parent directory if needed
|
||||
if (payload.CreateDirectory)
|
||||
{
|
||||
var parentDir = Path.GetDirectoryName(payload.RemotePath)?.Replace('\\', '/');
|
||||
if (!string.IsNullOrEmpty(parentDir))
|
||||
{
|
||||
await CreateRemoteDirectoryAsync(sftp, parentDir, ct);
|
||||
}
|
||||
}
|
||||
|
||||
// Upload file
|
||||
await using var localFile = File.OpenRead(payload.LocalPath);
|
||||
await Task.Run(() => sftp.UploadFile(localFile, payload.RemotePath), ct);
|
||||
|
||||
// Set permissions
|
||||
sftp.ChangePermissions(payload.RemotePath, (short)payload.Permissions);
|
||||
|
||||
var fileInfo = sftp.GetAttributes(payload.RemotePath);
|
||||
|
||||
sftp.Disconnect();
|
||||
|
||||
_logger.LogInformation(
|
||||
"Uploaded {Size} bytes to {Remote}",
|
||||
fileInfo.Size,
|
||||
payload.RemotePath);
|
||||
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["remotePath"] = payload.RemotePath,
|
||||
["size"] = fileInfo.Size,
|
||||
["permissions"] = payload.Permissions
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (SftpPathNotFoundException ex)
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = $"Remote path not found: {ex.Message}",
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to upload file to {Host}", payload.Host);
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = ex.Message,
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task CreateRemoteDirectoryAsync(SftpClient sftp, string path, CancellationToken ct)
|
||||
{
|
||||
var parts = path.Split('/').Where(p => !string.IsNullOrEmpty(p)).ToList();
|
||||
var current = "";
|
||||
|
||||
foreach (var part in parts)
|
||||
{
|
||||
current = $"{current}/{part}";
|
||||
|
||||
try
|
||||
{
|
||||
var attrs = sftp.GetAttributes(current);
|
||||
if (!attrs.IsDirectory)
|
||||
{
|
||||
throw new InvalidOperationException($"Path exists but is not a directory: {current}");
|
||||
}
|
||||
}
|
||||
catch (SftpPathNotFoundException)
|
||||
{
|
||||
await Task.Run(() => sftp.CreateDirectory(current), ct);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
using StellaOps.Agent.Core.Exceptions;
|
||||
|
||||
namespace StellaOps.Agent.WinRM.Exceptions;
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when a WinRM task payload is invalid.
|
||||
/// </summary>
|
||||
public sealed class InvalidWinRmPayloadException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// The task type with invalid payload.
|
||||
/// </summary>
|
||||
public string TaskType { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public InvalidWinRmPayloadException(string taskType)
|
||||
: base($"Invalid payload for task type '{taskType}'")
|
||||
{
|
||||
TaskType = taskType;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when WinRM connection fails.
|
||||
/// </summary>
|
||||
public sealed class WinRmConnectionException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// The target host.
|
||||
/// </summary>
|
||||
public string Host { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The WinRM port.
|
||||
/// </summary>
|
||||
public int Port { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public WinRmConnectionException(string host, int port, string message)
|
||||
: base($"Failed to connect to {host}:{port}: {message}")
|
||||
{
|
||||
Host = host;
|
||||
Port = port;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when PowerShell execution fails.
|
||||
/// </summary>
|
||||
public sealed class PowerShellExecutionException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// The script that failed.
|
||||
/// </summary>
|
||||
public string Script { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public PowerShellExecutionException(string script, string error)
|
||||
: base($"PowerShell execution failed: {error}")
|
||||
{
|
||||
Script = script;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when a Windows service operation fails.
|
||||
/// </summary>
|
||||
public sealed class WindowsServiceException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// The service name.
|
||||
/// </summary>
|
||||
public string ServiceName { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The operation that failed.
|
||||
/// </summary>
|
||||
public string Operation { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public WindowsServiceException(string serviceName, string operation, string message)
|
||||
: base($"Windows service operation '{operation}' failed for '{serviceName}': {message}")
|
||||
{
|
||||
ServiceName = serviceName;
|
||||
Operation = operation;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when WinRM file transfer fails.
|
||||
/// </summary>
|
||||
public sealed class WinRmFileTransferException : AgentException
|
||||
{
|
||||
/// <summary>
|
||||
/// The local path.
|
||||
/// </summary>
|
||||
public string LocalPath { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The remote path.
|
||||
/// </summary>
|
||||
public string RemotePath { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance.
|
||||
/// </summary>
|
||||
public WinRmFileTransferException(string localPath, string remotePath, string message)
|
||||
: base($"File transfer failed between '{localPath}' and '{remotePath}': {message}")
|
||||
{
|
||||
LocalPath = localPath;
|
||||
RemotePath = remotePath;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<RootNamespace>StellaOps.Agent.WinRM</RootNamespace>
|
||||
<AssemblyName>StellaOps.Agent.WinRM</AssemblyName>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Http" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
|
||||
<!-- WS-Management over HTTP for cross-platform WinRM support -->
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\StellaOps.Agent.Core\StellaOps.Agent.Core.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,14 @@
|
||||
using StellaOps.Agent.Core.Models;
|
||||
|
||||
namespace StellaOps.Agent.WinRM.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Interface for WinRM task handlers.
|
||||
/// </summary>
|
||||
public interface IWinRmTask
|
||||
{
|
||||
/// <summary>
|
||||
/// Executes the WinRM task.
|
||||
/// </summary>
|
||||
Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default);
|
||||
}
|
||||
@@ -0,0 +1,169 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.WinRM.Exceptions;
|
||||
|
||||
namespace StellaOps.Agent.WinRM.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task for executing PowerShell commands via WinRM.
|
||||
/// </summary>
|
||||
public sealed class PowerShellTask : IWinRmTask
|
||||
{
|
||||
private readonly WinRmConnectionPool _connectionPool;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for PowerShell execution.
|
||||
/// </summary>
|
||||
public sealed record PowerShellPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Target Windows host.
|
||||
/// </summary>
|
||||
public required string Host { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// WinRM port.
|
||||
/// </summary>
|
||||
public int Port { get; init; } = 5985;
|
||||
|
||||
/// <summary>
|
||||
/// Use SSL/TLS.
|
||||
/// </summary>
|
||||
public bool UseSSL { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Username for authentication.
|
||||
/// </summary>
|
||||
public required string Username { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Password for authentication.
|
||||
/// </summary>
|
||||
public string? Password { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Windows domain.
|
||||
/// </summary>
|
||||
public string? Domain { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// PowerShell script to execute.
|
||||
/// </summary>
|
||||
public required string Script { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Execution timeout.
|
||||
/// </summary>
|
||||
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(5);
|
||||
|
||||
/// <summary>
|
||||
/// Whether to fail on non-zero exit code.
|
||||
/// </summary>
|
||||
public bool FailOnError { get; init; } = true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new PowerShell task handler.
|
||||
/// </summary>
|
||||
public PowerShellTask(WinRmConnectionPool connectionPool, ILogger<PowerShellTask> logger)
|
||||
{
|
||||
_connectionPool = connectionPool;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
|
||||
PowerShellPayload? payload;
|
||||
try
|
||||
{
|
||||
payload = JsonSerializer.Deserialize<PowerShellPayload>(task.Payload);
|
||||
if (payload is null)
|
||||
throw new InvalidWinRmPayloadException("winrm.powershell");
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
throw new InvalidWinRmPayloadException("winrm.powershell");
|
||||
}
|
||||
|
||||
var connectionInfo = new WinRmConnectionInfo
|
||||
{
|
||||
Host = payload.Host,
|
||||
Port = payload.Port,
|
||||
UseSSL = payload.UseSSL,
|
||||
Username = payload.Username,
|
||||
Password = payload.Password,
|
||||
Domain = payload.Domain,
|
||||
Timeout = payload.Timeout
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Executing PowerShell script on {Host}:{Port}",
|
||||
payload.Host, payload.Port);
|
||||
|
||||
var session = await _connectionPool.GetSessionAsync(connectionInfo, ct);
|
||||
var result = await session.ExecuteAsync(payload.Script, ct);
|
||||
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
|
||||
if (payload.FailOnError && result.HadErrors)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"PowerShell script failed with exit code {ExitCode}: {Stderr}",
|
||||
result.ExitCode, result.Stderr);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = result.Stderr,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["exitCode"] = result.ExitCode,
|
||||
["stdout"] = result.Stdout,
|
||||
["stderr"] = result.Stderr
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
|
||||
_logger.LogInformation(
|
||||
"PowerShell script completed with exit code {ExitCode}",
|
||||
result.ExitCode);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["exitCode"] = result.ExitCode,
|
||||
["stdout"] = result.Stdout,
|
||||
["stderr"] = result.Stderr
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
_logger.LogError(ex, "WinRM connection failed to {Host}:{Port}", payload.Host, payload.Port);
|
||||
|
||||
throw new WinRmConnectionException(payload.Host, payload.Port, ex.Message);
|
||||
}
|
||||
catch (Exception ex) when (ex is not WinRmConnectionException and not InvalidWinRmPayloadException)
|
||||
{
|
||||
_logger.LogError(ex, "PowerShell execution failed on {Host}", payload.Host);
|
||||
|
||||
throw new PowerShellExecutionException(payload.Script, ex.Message);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,360 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.WinRM.Exceptions;
|
||||
|
||||
namespace StellaOps.Agent.WinRM.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task for file transfer via WinRM using PowerShell.
|
||||
/// </summary>
|
||||
public sealed class WinRmFileTransferTask : IWinRmTask
|
||||
{
|
||||
private readonly WinRmConnectionPool _connectionPool;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for file upload operations.
|
||||
/// </summary>
|
||||
public sealed record FileUploadPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Target Windows host.
|
||||
/// </summary>
|
||||
public required string Host { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// WinRM port.
|
||||
/// </summary>
|
||||
public int Port { get; init; } = 5985;
|
||||
|
||||
/// <summary>
|
||||
/// Use SSL/TLS.
|
||||
/// </summary>
|
||||
public bool UseSSL { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Username for authentication.
|
||||
/// </summary>
|
||||
public required string Username { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Password for authentication.
|
||||
/// </summary>
|
||||
public string? Password { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Windows domain.
|
||||
/// </summary>
|
||||
public string? Domain { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Local file path to upload.
|
||||
/// </summary>
|
||||
public required string LocalPath { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Remote destination path.
|
||||
/// </summary>
|
||||
public required string RemotePath { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to create parent directories.
|
||||
/// </summary>
|
||||
public bool CreateDirectories { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Operation timeout.
|
||||
/// </summary>
|
||||
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(30);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Payload for file download operations.
|
||||
/// </summary>
|
||||
public sealed record FileDownloadPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Target Windows host.
|
||||
/// </summary>
|
||||
public required string Host { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// WinRM port.
|
||||
/// </summary>
|
||||
public int Port { get; init; } = 5985;
|
||||
|
||||
/// <summary>
|
||||
/// Use SSL/TLS.
|
||||
/// </summary>
|
||||
public bool UseSSL { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Username for authentication.
|
||||
/// </summary>
|
||||
public required string Username { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Password for authentication.
|
||||
/// </summary>
|
||||
public string? Password { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Windows domain.
|
||||
/// </summary>
|
||||
public string? Domain { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Remote file path to download.
|
||||
/// </summary>
|
||||
public required string RemotePath { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Local destination path.
|
||||
/// </summary>
|
||||
public required string LocalPath { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Operation timeout.
|
||||
/// </summary>
|
||||
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(30);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new file transfer task handler.
|
||||
/// </summary>
|
||||
public WinRmFileTransferTask(WinRmConnectionPool connectionPool, ILogger<WinRmFileTransferTask> logger)
|
||||
{
|
||||
_connectionPool = connectionPool;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Executes a file upload operation.
|
||||
/// </summary>
|
||||
public async Task<AgentTaskResult> ExecuteUploadAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
|
||||
FileUploadPayload? payload;
|
||||
try
|
||||
{
|
||||
payload = JsonSerializer.Deserialize<FileUploadPayload>(task.Payload);
|
||||
if (payload is null)
|
||||
throw new InvalidWinRmPayloadException("winrm.upload");
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
throw new InvalidWinRmPayloadException("winrm.upload");
|
||||
}
|
||||
|
||||
if (!File.Exists(payload.LocalPath))
|
||||
{
|
||||
throw new WinRmFileTransferException(
|
||||
payload.LocalPath,
|
||||
payload.RemotePath,
|
||||
"Local file does not exist");
|
||||
}
|
||||
|
||||
var connectionInfo = new WinRmConnectionInfo
|
||||
{
|
||||
Host = payload.Host,
|
||||
Port = payload.Port,
|
||||
UseSSL = payload.UseSSL,
|
||||
Username = payload.Username,
|
||||
Password = payload.Password,
|
||||
Domain = payload.Domain,
|
||||
Timeout = payload.Timeout
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Uploading file {LocalPath} to {Host}:{RemotePath}",
|
||||
payload.LocalPath, payload.Host, payload.RemotePath);
|
||||
|
||||
var session = await _connectionPool.GetSessionAsync(connectionInfo, ct);
|
||||
|
||||
// Create parent directory if needed
|
||||
if (payload.CreateDirectories)
|
||||
{
|
||||
var remotePath = payload.RemotePath.Replace("'", "''");
|
||||
var dirScript = $@"
|
||||
$dir = Split-Path -Parent '{remotePath}'
|
||||
if ($dir -and !(Test-Path $dir)) {{
|
||||
New-Item -Path $dir -ItemType Directory -Force | Out-Null
|
||||
}}
|
||||
";
|
||||
await session.ExecuteAsync(dirScript, ct);
|
||||
}
|
||||
|
||||
// Read and upload file using Base64
|
||||
var fileBytes = await File.ReadAllBytesAsync(payload.LocalPath, ct);
|
||||
var base64 = Convert.ToBase64String(fileBytes);
|
||||
|
||||
var uploadScript = $@"
|
||||
$bytes = [Convert]::FromBase64String('{base64}')
|
||||
[System.IO.File]::WriteAllBytes('{payload.RemotePath.Replace("'", "''")}', $bytes)
|
||||
Write-Output 'File uploaded successfully'
|
||||
Get-Item '{payload.RemotePath.Replace("'", "''")}' | Select-Object FullName, Length, LastWriteTime | ConvertTo-Json
|
||||
";
|
||||
|
||||
var result = await session.ExecuteAsync(uploadScript, ct);
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
|
||||
if (result.HadErrors)
|
||||
{
|
||||
throw new WinRmFileTransferException(
|
||||
payload.LocalPath,
|
||||
payload.RemotePath,
|
||||
result.Stderr);
|
||||
}
|
||||
|
||||
_logger.LogInformation(
|
||||
"File upload completed: {LocalPath} -> {RemotePath}",
|
||||
payload.LocalPath, payload.RemotePath);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["localPath"] = payload.LocalPath,
|
||||
["remotePath"] = payload.RemotePath,
|
||||
["bytesTransferred"] = fileBytes.Length,
|
||||
["output"] = result.Stdout
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
_logger.LogError(ex, "WinRM connection failed to {Host}:{Port}", payload.Host, payload.Port);
|
||||
throw new WinRmConnectionException(payload.Host, payload.Port, ex.Message);
|
||||
}
|
||||
catch (Exception ex) when (ex is not WinRmConnectionException
|
||||
and not InvalidWinRmPayloadException
|
||||
and not WinRmFileTransferException)
|
||||
{
|
||||
_logger.LogError(ex, "File upload failed");
|
||||
throw new WinRmFileTransferException(payload.LocalPath, payload.RemotePath, ex.Message);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Executes a file download operation.
|
||||
/// </summary>
|
||||
public async Task<AgentTaskResult> ExecuteDownloadAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
|
||||
FileDownloadPayload? payload;
|
||||
try
|
||||
{
|
||||
payload = JsonSerializer.Deserialize<FileDownloadPayload>(task.Payload);
|
||||
if (payload is null)
|
||||
throw new InvalidWinRmPayloadException("winrm.download");
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
throw new InvalidWinRmPayloadException("winrm.download");
|
||||
}
|
||||
|
||||
var connectionInfo = new WinRmConnectionInfo
|
||||
{
|
||||
Host = payload.Host,
|
||||
Port = payload.Port,
|
||||
UseSSL = payload.UseSSL,
|
||||
Username = payload.Username,
|
||||
Password = payload.Password,
|
||||
Domain = payload.Domain,
|
||||
Timeout = payload.Timeout
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Downloading file {RemotePath} from {Host} to {LocalPath}",
|
||||
payload.RemotePath, payload.Host, payload.LocalPath);
|
||||
|
||||
var session = await _connectionPool.GetSessionAsync(connectionInfo, ct);
|
||||
|
||||
// Read file as Base64
|
||||
var downloadScript = $@"
|
||||
$path = '{payload.RemotePath.Replace("'", "''")}'
|
||||
if (!(Test-Path $path)) {{
|
||||
throw ""File not found: $path""
|
||||
}}
|
||||
$bytes = [System.IO.File]::ReadAllBytes($path)
|
||||
[Convert]::ToBase64String($bytes)
|
||||
";
|
||||
|
||||
var result = await session.ExecuteAsync(downloadScript, ct);
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
|
||||
if (result.HadErrors)
|
||||
{
|
||||
throw new WinRmFileTransferException(
|
||||
payload.LocalPath,
|
||||
payload.RemotePath,
|
||||
result.Stderr);
|
||||
}
|
||||
|
||||
// Decode and write local file
|
||||
var base64 = result.Stdout.Trim();
|
||||
var fileBytes = Convert.FromBase64String(base64);
|
||||
|
||||
var localDir = Path.GetDirectoryName(payload.LocalPath);
|
||||
if (!string.IsNullOrEmpty(localDir) && !Directory.Exists(localDir))
|
||||
{
|
||||
Directory.CreateDirectory(localDir);
|
||||
}
|
||||
|
||||
await File.WriteAllBytesAsync(payload.LocalPath, fileBytes, ct);
|
||||
|
||||
_logger.LogInformation(
|
||||
"File download completed: {RemotePath} -> {LocalPath}",
|
||||
payload.RemotePath, payload.LocalPath);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["localPath"] = payload.LocalPath,
|
||||
["remotePath"] = payload.RemotePath,
|
||||
["bytesTransferred"] = fileBytes.Length
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
_logger.LogError(ex, "WinRM connection failed to {Host}:{Port}", payload.Host, payload.Port);
|
||||
throw new WinRmConnectionException(payload.Host, payload.Port, ex.Message);
|
||||
}
|
||||
catch (Exception ex) when (ex is not WinRmConnectionException
|
||||
and not InvalidWinRmPayloadException
|
||||
and not WinRmFileTransferException)
|
||||
{
|
||||
_logger.LogError(ex, "File download failed");
|
||||
throw new WinRmFileTransferException(payload.LocalPath, payload.RemotePath, ex.Message);
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
// Route based on task type
|
||||
return task.TaskType.ToLowerInvariant() switch
|
||||
{
|
||||
"winrm.upload" => ExecuteUploadAsync(task, timeProvider, ct),
|
||||
"winrm.download" => ExecuteDownloadAsync(task, timeProvider, ct),
|
||||
_ => throw new InvalidWinRmPayloadException(task.TaskType)
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,348 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.WinRM.Exceptions;
|
||||
|
||||
namespace StellaOps.Agent.WinRM.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task for managing Windows containers via WinRM.
|
||||
/// </summary>
|
||||
public sealed class WindowsContainerTask : IWinRmTask
|
||||
{
|
||||
private readonly WinRmConnectionPool _connectionPool;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for Windows container operations.
|
||||
/// </summary>
|
||||
public sealed record WindowsContainerPayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Target Windows host.
|
||||
/// </summary>
|
||||
public required string Host { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// WinRM port.
|
||||
/// </summary>
|
||||
public int Port { get; init; } = 5985;
|
||||
|
||||
/// <summary>
|
||||
/// Use SSL/TLS.
|
||||
/// </summary>
|
||||
public bool UseSSL { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Username for authentication.
|
||||
/// </summary>
|
||||
public required string Username { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Password for authentication.
|
||||
/// </summary>
|
||||
public string? Password { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Windows domain.
|
||||
/// </summary>
|
||||
public string? Domain { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Container operation to perform.
|
||||
/// </summary>
|
||||
public required ContainerOperation Operation { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Container name or ID (for existing containers).
|
||||
/// </summary>
|
||||
public string? ContainerName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Image reference (for create/run operations).
|
||||
/// </summary>
|
||||
public string? Image { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Environment variables.
|
||||
/// </summary>
|
||||
public Dictionary<string, string>? Environment { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Port mappings (host:container).
|
||||
/// </summary>
|
||||
public List<string>? Ports { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Volume mounts (host:container).
|
||||
/// </summary>
|
||||
public List<string>? Volumes { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Additional Docker run arguments.
|
||||
/// </summary>
|
||||
public string? ExtraArgs { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Command timeout.
|
||||
/// </summary>
|
||||
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(10);
|
||||
|
||||
/// <summary>
|
||||
/// Whether to remove existing container before creating.
|
||||
/// </summary>
|
||||
public bool ForceRecreate { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Windows container operations.
|
||||
/// </summary>
|
||||
public enum ContainerOperation
|
||||
{
|
||||
/// <summary>
|
||||
/// Pull a container image.
|
||||
/// </summary>
|
||||
Pull,
|
||||
|
||||
/// <summary>
|
||||
/// Create and start a container.
|
||||
/// </summary>
|
||||
Run,
|
||||
|
||||
/// <summary>
|
||||
/// Start an existing container.
|
||||
/// </summary>
|
||||
Start,
|
||||
|
||||
/// <summary>
|
||||
/// Stop a running container.
|
||||
/// </summary>
|
||||
Stop,
|
||||
|
||||
/// <summary>
|
||||
/// Remove a container.
|
||||
/// </summary>
|
||||
Remove,
|
||||
|
||||
/// <summary>
|
||||
/// Get container status.
|
||||
/// </summary>
|
||||
Status,
|
||||
|
||||
/// <summary>
|
||||
/// List containers.
|
||||
/// </summary>
|
||||
List,
|
||||
|
||||
/// <summary>
|
||||
/// Get container logs.
|
||||
/// </summary>
|
||||
Logs
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new Windows container task handler.
|
||||
/// </summary>
|
||||
public WindowsContainerTask(WinRmConnectionPool connectionPool, ILogger<WindowsContainerTask> logger)
|
||||
{
|
||||
_connectionPool = connectionPool;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
|
||||
WindowsContainerPayload? payload;
|
||||
try
|
||||
{
|
||||
payload = JsonSerializer.Deserialize<WindowsContainerPayload>(task.Payload);
|
||||
if (payload is null)
|
||||
throw new InvalidWinRmPayloadException("winrm.container");
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
throw new InvalidWinRmPayloadException("winrm.container");
|
||||
}
|
||||
|
||||
ValidatePayload(payload);
|
||||
|
||||
var connectionInfo = new WinRmConnectionInfo
|
||||
{
|
||||
Host = payload.Host,
|
||||
Port = payload.Port,
|
||||
UseSSL = payload.UseSSL,
|
||||
Username = payload.Username,
|
||||
Password = payload.Password,
|
||||
Domain = payload.Domain,
|
||||
Timeout = payload.Timeout
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Performing container {Operation} on {Host}",
|
||||
payload.Operation, payload.Host);
|
||||
|
||||
var session = await _connectionPool.GetSessionAsync(connectionInfo, ct);
|
||||
var script = GenerateScript(payload);
|
||||
var result = await session.ExecuteAsync(script, ct);
|
||||
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
|
||||
if (result.HadErrors)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Container operation {Operation} failed: {Stderr}",
|
||||
payload.Operation, result.Stderr);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = false,
|
||||
Error = result.Stderr,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["operation"] = payload.Operation.ToString(),
|
||||
["stdout"] = result.Stdout,
|
||||
["stderr"] = result.Stderr,
|
||||
["exitCode"] = result.ExitCode
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
|
||||
_logger.LogInformation(
|
||||
"Container operation {Operation} completed successfully",
|
||||
payload.Operation);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["operation"] = payload.Operation.ToString(),
|
||||
["containerName"] = payload.ContainerName ?? string.Empty,
|
||||
["output"] = result.Stdout,
|
||||
["exitCode"] = result.ExitCode
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
_logger.LogError(ex, "WinRM connection failed to {Host}:{Port}", payload.Host, payload.Port);
|
||||
throw new WinRmConnectionException(payload.Host, payload.Port, ex.Message);
|
||||
}
|
||||
}
|
||||
|
||||
private static void ValidatePayload(WindowsContainerPayload payload)
|
||||
{
|
||||
switch (payload.Operation)
|
||||
{
|
||||
case ContainerOperation.Pull:
|
||||
if (string.IsNullOrEmpty(payload.Image))
|
||||
throw new InvalidWinRmPayloadException("winrm.container: Image required for Pull operation");
|
||||
break;
|
||||
|
||||
case ContainerOperation.Run:
|
||||
if (string.IsNullOrEmpty(payload.Image))
|
||||
throw new InvalidWinRmPayloadException("winrm.container: Image required for Run operation");
|
||||
if (string.IsNullOrEmpty(payload.ContainerName))
|
||||
throw new InvalidWinRmPayloadException("winrm.container: ContainerName required for Run operation");
|
||||
break;
|
||||
|
||||
case ContainerOperation.Start:
|
||||
case ContainerOperation.Stop:
|
||||
case ContainerOperation.Remove:
|
||||
case ContainerOperation.Status:
|
||||
case ContainerOperation.Logs:
|
||||
if (string.IsNullOrEmpty(payload.ContainerName))
|
||||
throw new InvalidWinRmPayloadException($"winrm.container: ContainerName required for {payload.Operation} operation");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private static string GenerateScript(WindowsContainerPayload payload)
|
||||
{
|
||||
return payload.Operation switch
|
||||
{
|
||||
ContainerOperation.Pull => GeneratePullScript(payload),
|
||||
ContainerOperation.Run => GenerateRunScript(payload),
|
||||
ContainerOperation.Start => $"docker start {EscapeName(payload.ContainerName!)}; docker inspect {EscapeName(payload.ContainerName!)} --format '{{{{json .State}}}}'",
|
||||
ContainerOperation.Stop => $"docker stop {EscapeName(payload.ContainerName!)}; docker inspect {EscapeName(payload.ContainerName!)} --format '{{{{json .State}}}}'",
|
||||
ContainerOperation.Remove => $"docker rm -f {EscapeName(payload.ContainerName!)}",
|
||||
ContainerOperation.Status => $"docker inspect {EscapeName(payload.ContainerName!)} --format '{{{{json .State}}}}'",
|
||||
ContainerOperation.List => "docker ps -a --format '{{json .}}'",
|
||||
ContainerOperation.Logs => $"docker logs --tail 500 {EscapeName(payload.ContainerName!)}",
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(payload.Operation))
|
||||
};
|
||||
}
|
||||
|
||||
private static string GeneratePullScript(WindowsContainerPayload payload)
|
||||
{
|
||||
var image = EscapeName(payload.Image!);
|
||||
return $"docker pull {image}; docker images {image} --format '{{{{json .}}}}'";
|
||||
}
|
||||
|
||||
private static string GenerateRunScript(WindowsContainerPayload payload)
|
||||
{
|
||||
var parts = new List<string> { "docker run -d" };
|
||||
|
||||
parts.Add($"--name {EscapeName(payload.ContainerName!)}");
|
||||
|
||||
if (payload.Environment != null)
|
||||
{
|
||||
foreach (var kvp in payload.Environment)
|
||||
{
|
||||
parts.Add($"-e \"{EscapeValue(kvp.Key)}={EscapeValue(kvp.Value)}\"");
|
||||
}
|
||||
}
|
||||
|
||||
if (payload.Ports != null)
|
||||
{
|
||||
foreach (var port in payload.Ports)
|
||||
{
|
||||
parts.Add($"-p {EscapeValue(port)}");
|
||||
}
|
||||
}
|
||||
|
||||
if (payload.Volumes != null)
|
||||
{
|
||||
foreach (var volume in payload.Volumes)
|
||||
{
|
||||
parts.Add($"-v \"{EscapeValue(volume)}\"");
|
||||
}
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(payload.ExtraArgs))
|
||||
{
|
||||
parts.Add(payload.ExtraArgs);
|
||||
}
|
||||
|
||||
parts.Add(EscapeName(payload.Image!));
|
||||
|
||||
var runCommand = string.Join(" ", parts);
|
||||
|
||||
if (payload.ForceRecreate)
|
||||
{
|
||||
return $@"
|
||||
$existing = docker ps -aq -f name='^{EscapeName(payload.ContainerName!)}$'
|
||||
if ($existing) {{
|
||||
docker rm -f $existing
|
||||
}}
|
||||
{runCommand}
|
||||
docker inspect {EscapeName(payload.ContainerName!)} --format '{{{{json .State}}}}'
|
||||
";
|
||||
}
|
||||
|
||||
return $"{runCommand}; docker inspect {EscapeName(payload.ContainerName!)} --format '{{{{json .State}}}}'";
|
||||
}
|
||||
|
||||
private static string EscapeName(string name) => name.Replace("'", "''").Replace("\"", "`\"");
|
||||
private static string EscapeValue(string value) => value.Replace("\"", "`\"");
|
||||
}
|
||||
@@ -0,0 +1,241 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.WinRM.Exceptions;
|
||||
|
||||
namespace StellaOps.Agent.WinRM.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Task for managing Windows services via WinRM.
|
||||
/// </summary>
|
||||
public sealed class WindowsServiceTask : IWinRmTask
|
||||
{
|
||||
private readonly WinRmConnectionPool _connectionPool;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Payload for Windows service operations.
|
||||
/// </summary>
|
||||
public sealed record WindowsServicePayload
|
||||
{
|
||||
/// <summary>
|
||||
/// Target Windows host.
|
||||
/// </summary>
|
||||
public required string Host { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// WinRM port.
|
||||
/// </summary>
|
||||
public int Port { get; init; } = 5985;
|
||||
|
||||
/// <summary>
|
||||
/// Use SSL/TLS.
|
||||
/// </summary>
|
||||
public bool UseSSL { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Username for authentication.
|
||||
/// </summary>
|
||||
public required string Username { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Password for authentication.
|
||||
/// </summary>
|
||||
public string? Password { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Windows domain.
|
||||
/// </summary>
|
||||
public string? Domain { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Service name.
|
||||
/// </summary>
|
||||
public required string ServiceName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Service operation to perform.
|
||||
/// </summary>
|
||||
public required ServiceOperation Operation { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Timeout for service operations.
|
||||
/// </summary>
|
||||
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(2);
|
||||
|
||||
/// <summary>
|
||||
/// Wait for service to reach target state.
|
||||
/// </summary>
|
||||
public bool WaitForState { get; init; } = true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Windows service operations.
|
||||
/// </summary>
|
||||
public enum ServiceOperation
|
||||
{
|
||||
/// <summary>
|
||||
/// Start the service.
|
||||
/// </summary>
|
||||
Start,
|
||||
|
||||
/// <summary>
|
||||
/// Stop the service.
|
||||
/// </summary>
|
||||
Stop,
|
||||
|
||||
/// <summary>
|
||||
/// Restart the service.
|
||||
/// </summary>
|
||||
Restart,
|
||||
|
||||
/// <summary>
|
||||
/// Get service status.
|
||||
/// </summary>
|
||||
Status,
|
||||
|
||||
/// <summary>
|
||||
/// Enable the service (set to automatic start).
|
||||
/// </summary>
|
||||
Enable,
|
||||
|
||||
/// <summary>
|
||||
/// Disable the service.
|
||||
/// </summary>
|
||||
Disable
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new Windows service task handler.
|
||||
/// </summary>
|
||||
public WindowsServiceTask(WinRmConnectionPool connectionPool, ILogger<WindowsServiceTask> logger)
|
||||
{
|
||||
_connectionPool = connectionPool;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, TimeProvider timeProvider, CancellationToken ct = default)
|
||||
{
|
||||
var startTime = timeProvider.GetUtcNow();
|
||||
|
||||
WindowsServicePayload? payload;
|
||||
try
|
||||
{
|
||||
payload = JsonSerializer.Deserialize<WindowsServicePayload>(task.Payload);
|
||||
if (payload is null)
|
||||
throw new InvalidWinRmPayloadException("winrm.service");
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
throw new InvalidWinRmPayloadException("winrm.service");
|
||||
}
|
||||
|
||||
var connectionInfo = new WinRmConnectionInfo
|
||||
{
|
||||
Host = payload.Host,
|
||||
Port = payload.Port,
|
||||
UseSSL = payload.UseSSL,
|
||||
Username = payload.Username,
|
||||
Password = payload.Password,
|
||||
Domain = payload.Domain,
|
||||
Timeout = payload.Timeout
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Performing {Operation} on service {ServiceName} on {Host}",
|
||||
payload.Operation, payload.ServiceName, payload.Host);
|
||||
|
||||
var session = await _connectionPool.GetSessionAsync(connectionInfo, ct);
|
||||
var script = GenerateScript(payload);
|
||||
var result = await session.ExecuteAsync(script, ct);
|
||||
|
||||
var completedAt = timeProvider.GetUtcNow();
|
||||
|
||||
if (result.HadErrors)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Service operation {Operation} failed for {ServiceName}: {Stderr}",
|
||||
payload.Operation, payload.ServiceName, result.Stderr);
|
||||
|
||||
throw new WindowsServiceException(
|
||||
payload.ServiceName,
|
||||
payload.Operation.ToString(),
|
||||
result.Stderr);
|
||||
}
|
||||
|
||||
_logger.LogInformation(
|
||||
"Service operation {Operation} completed for {ServiceName}",
|
||||
payload.Operation, payload.ServiceName);
|
||||
|
||||
return new AgentTaskResult
|
||||
{
|
||||
TaskId = task.Id,
|
||||
Success = true,
|
||||
Outputs = new Dictionary<string, object>
|
||||
{
|
||||
["serviceName"] = payload.ServiceName,
|
||||
["operation"] = payload.Operation.ToString(),
|
||||
["output"] = result.Stdout,
|
||||
["exitCode"] = result.ExitCode
|
||||
},
|
||||
CompletedAt = completedAt,
|
||||
Duration = completedAt - startTime
|
||||
};
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
_logger.LogError(ex, "WinRM connection failed to {Host}:{Port}", payload.Host, payload.Port);
|
||||
throw new WinRmConnectionException(payload.Host, payload.Port, ex.Message);
|
||||
}
|
||||
catch (Exception ex) when (ex is not WinRmConnectionException
|
||||
and not InvalidWinRmPayloadException
|
||||
and not WindowsServiceException)
|
||||
{
|
||||
_logger.LogError(ex, "Service operation failed for {ServiceName}", payload.ServiceName);
|
||||
throw new WindowsServiceException(payload.ServiceName, payload.Operation.ToString(), ex.Message);
|
||||
}
|
||||
}
|
||||
|
||||
private static string GenerateScript(WindowsServicePayload payload)
|
||||
{
|
||||
var serviceName = payload.ServiceName.Replace("'", "''");
|
||||
var waitClause = payload.WaitForState ? "-Wait" : "";
|
||||
|
||||
return payload.Operation switch
|
||||
{
|
||||
ServiceOperation.Start => $@"
|
||||
$service = Get-Service -Name '{serviceName}' -ErrorAction Stop
|
||||
if ($service.Status -ne 'Running') {{
|
||||
Start-Service -Name '{serviceName}' {waitClause} -ErrorAction Stop
|
||||
}}
|
||||
Get-Service -Name '{serviceName}' | Select-Object Name, Status, StartType | ConvertTo-Json
|
||||
",
|
||||
ServiceOperation.Stop => $@"
|
||||
$service = Get-Service -Name '{serviceName}' -ErrorAction Stop
|
||||
if ($service.Status -ne 'Stopped') {{
|
||||
Stop-Service -Name '{serviceName}' {waitClause} -Force -ErrorAction Stop
|
||||
}}
|
||||
Get-Service -Name '{serviceName}' | Select-Object Name, Status, StartType | ConvertTo-Json
|
||||
",
|
||||
ServiceOperation.Restart => $@"
|
||||
Restart-Service -Name '{serviceName}' {waitClause} -Force -ErrorAction Stop
|
||||
Get-Service -Name '{serviceName}' | Select-Object Name, Status, StartType | ConvertTo-Json
|
||||
",
|
||||
ServiceOperation.Status => $@"
|
||||
Get-Service -Name '{serviceName}' -ErrorAction Stop | Select-Object Name, Status, StartType, DisplayName | ConvertTo-Json
|
||||
",
|
||||
ServiceOperation.Enable => $@"
|
||||
Set-Service -Name '{serviceName}' -StartupType Automatic -ErrorAction Stop
|
||||
Get-Service -Name '{serviceName}' | Select-Object Name, Status, StartType | ConvertTo-Json
|
||||
",
|
||||
ServiceOperation.Disable => $@"
|
||||
Set-Service -Name '{serviceName}' -StartupType Disabled -ErrorAction Stop
|
||||
Get-Service -Name '{serviceName}' | Select-Object Name, Status, StartType | ConvertTo-Json
|
||||
",
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(payload.Operation))
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Agent.Core.Capability;
|
||||
using StellaOps.Agent.Core.Models;
|
||||
using StellaOps.Agent.WinRM.Exceptions;
|
||||
using StellaOps.Agent.WinRM.Tasks;
|
||||
|
||||
namespace StellaOps.Agent.WinRM;
|
||||
|
||||
/// <summary>
|
||||
/// WinRM capability for Windows remote management via WS-Management protocol.
|
||||
/// </summary>
|
||||
public sealed class WinRmCapability : IAgentCapability, IAsyncDisposable
|
||||
{
|
||||
private readonly WinRmConnectionPool _connectionPool;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<WinRmCapability> _logger;
|
||||
private readonly Dictionary<string, IWinRmTask> _taskHandlers;
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Name => "winrm";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Version => "1.0.0";
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> SupportedTaskTypes => new[]
|
||||
{
|
||||
"winrm.powershell",
|
||||
"winrm.service",
|
||||
"winrm.container",
|
||||
"winrm.upload",
|
||||
"winrm.download"
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new WinRM capability.
|
||||
/// </summary>
|
||||
public WinRmCapability(
|
||||
IHttpClientFactory httpClientFactory,
|
||||
TimeProvider timeProvider,
|
||||
ILoggerFactory loggerFactory)
|
||||
{
|
||||
_timeProvider = timeProvider;
|
||||
_logger = loggerFactory.CreateLogger<WinRmCapability>();
|
||||
_connectionPool = new WinRmConnectionPool(
|
||||
httpClientFactory,
|
||||
loggerFactory.CreateLogger<WinRmConnectionPool>());
|
||||
|
||||
var powerShellTask = new PowerShellTask(
|
||||
_connectionPool,
|
||||
loggerFactory.CreateLogger<PowerShellTask>());
|
||||
var serviceTask = new WindowsServiceTask(
|
||||
_connectionPool,
|
||||
loggerFactory.CreateLogger<WindowsServiceTask>());
|
||||
var containerTask = new WindowsContainerTask(
|
||||
_connectionPool,
|
||||
loggerFactory.CreateLogger<WindowsContainerTask>());
|
||||
var fileTransferTask = new WinRmFileTransferTask(
|
||||
_connectionPool,
|
||||
loggerFactory.CreateLogger<WinRmFileTransferTask>());
|
||||
|
||||
_taskHandlers = new Dictionary<string, IWinRmTask>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
["winrm.powershell"] = powerShellTask,
|
||||
["winrm.service"] = serviceTask,
|
||||
["winrm.container"] = containerTask,
|
||||
["winrm.upload"] = fileTransferTask,
|
||||
["winrm.download"] = fileTransferTask
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new WinRM capability with explicit dependencies for testing.
|
||||
/// </summary>
|
||||
public WinRmCapability(
|
||||
WinRmConnectionPool connectionPool,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<WinRmCapability> logger,
|
||||
Dictionary<string, IWinRmTask> taskHandlers)
|
||||
{
|
||||
_connectionPool = connectionPool;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
_taskHandlers = taskHandlers;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<bool> InitializeAsync(CancellationToken ct = default)
|
||||
{
|
||||
_logger.LogInformation("WinRM capability initialized");
|
||||
return Task.FromResult(true);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<CapabilityHealthStatus> CheckHealthAsync(CancellationToken ct = default)
|
||||
{
|
||||
// WinRM capability is healthy if we can create instances
|
||||
// Actual connection health is checked per-task
|
||||
return Task.FromResult(new CapabilityHealthStatus(true, "WinRM capability ready"));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentTaskResult> ExecuteAsync(AgentTaskInfo task, CancellationToken ct = default)
|
||||
{
|
||||
if (!_taskHandlers.TryGetValue(task.TaskType, out var handler))
|
||||
{
|
||||
throw new InvalidWinRmPayloadException($"Unsupported task type: {task.TaskType}");
|
||||
}
|
||||
|
||||
_logger.LogDebug("Executing WinRM task type: {TaskType} with ID {TaskId}", task.TaskType, task.Id);
|
||||
|
||||
return await handler.ExecuteAsync(task, _timeProvider, ct);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
await _connectionPool.DisposeAsync();
|
||||
_logger.LogDebug("WinRM capability disposed");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,93 @@
|
||||
namespace StellaOps.Agent.WinRM;
|
||||
|
||||
/// <summary>
|
||||
/// WinRM connection information.
|
||||
/// </summary>
|
||||
public sealed record WinRmConnectionInfo
|
||||
{
|
||||
/// <summary>
|
||||
/// Target Windows host.
|
||||
/// </summary>
|
||||
public required string Host { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// WinRM port (5985 for HTTP, 5986 for HTTPS).
|
||||
/// </summary>
|
||||
public int Port { get; init; } = 5985;
|
||||
|
||||
/// <summary>
|
||||
/// Use SSL/TLS.
|
||||
/// </summary>
|
||||
public bool UseSSL { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Username for authentication.
|
||||
/// </summary>
|
||||
public required string Username { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Password for authentication.
|
||||
/// </summary>
|
||||
public string? Password { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Windows domain (optional).
|
||||
/// </summary>
|
||||
public string? Domain { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Authentication mechanism.
|
||||
/// </summary>
|
||||
public WinRmAuthMechanism AuthMechanism { get; init; } = WinRmAuthMechanism.Negotiate;
|
||||
|
||||
/// <summary>
|
||||
/// Connection timeout.
|
||||
/// </summary>
|
||||
public TimeSpan Timeout { get; init; } = TimeSpan.FromSeconds(30);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a unique connection key.
|
||||
/// </summary>
|
||||
public string GetConnectionKey()
|
||||
{
|
||||
var userPart = string.IsNullOrEmpty(Domain)
|
||||
? Username
|
||||
: $"{Domain}\\{Username}";
|
||||
return $"{userPart}@{Host}:{Port}";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the WinRM endpoint URL.
|
||||
/// </summary>
|
||||
public string GetEndpointUrl()
|
||||
{
|
||||
var scheme = UseSSL ? "https" : "http";
|
||||
return $"{scheme}://{Host}:{Port}/wsman";
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WinRM authentication mechanisms.
|
||||
/// </summary>
|
||||
public enum WinRmAuthMechanism
|
||||
{
|
||||
/// <summary>
|
||||
/// Basic authentication (username/password over HTTPS).
|
||||
/// </summary>
|
||||
Basic,
|
||||
|
||||
/// <summary>
|
||||
/// Negotiate (NTLM or Kerberos).
|
||||
/// </summary>
|
||||
Negotiate,
|
||||
|
||||
/// <summary>
|
||||
/// Kerberos authentication.
|
||||
/// </summary>
|
||||
Kerberos,
|
||||
|
||||
/// <summary>
|
||||
/// CredSSP (Credential Security Support Provider).
|
||||
/// </summary>
|
||||
CredSSP
|
||||
}
|
||||
@@ -0,0 +1,172 @@
|
||||
using System.Collections.Concurrent;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace StellaOps.Agent.WinRM;
|
||||
|
||||
/// <summary>
|
||||
/// Connection pool for WinRM sessions.
|
||||
/// </summary>
|
||||
public sealed class WinRmConnectionPool : IAsyncDisposable
|
||||
{
|
||||
private readonly ConcurrentDictionary<string, PooledSession> _sessions = new();
|
||||
private readonly IHttpClientFactory _httpClientFactory;
|
||||
private readonly ILogger _logger;
|
||||
private readonly TimeSpan _idleTimeout;
|
||||
private readonly Timer _cleanupTimer;
|
||||
private bool _disposed;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new WinRM connection pool.
|
||||
/// </summary>
|
||||
public WinRmConnectionPool(
|
||||
IHttpClientFactory httpClientFactory,
|
||||
ILogger<WinRmConnectionPool> logger,
|
||||
TimeSpan? idleTimeout = null)
|
||||
{
|
||||
_httpClientFactory = httpClientFactory;
|
||||
_logger = logger;
|
||||
_idleTimeout = idleTimeout ?? TimeSpan.FromMinutes(5);
|
||||
_cleanupTimer = new Timer(CleanupIdleSessions, null, _idleTimeout, _idleTimeout);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets or creates a WinRM session for the given connection info.
|
||||
/// </summary>
|
||||
public async Task<WinRmSession> GetSessionAsync(WinRmConnectionInfo connectionInfo, CancellationToken ct = default)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
|
||||
var key = connectionInfo.GetConnectionKey();
|
||||
|
||||
if (_sessions.TryGetValue(key, out var pooled) && !pooled.IsExpired(_idleTimeout))
|
||||
{
|
||||
pooled.Touch();
|
||||
return pooled.Session;
|
||||
}
|
||||
|
||||
// Create new session
|
||||
var httpClient = CreateHttpClient(connectionInfo);
|
||||
var session = new WinRmSession(connectionInfo, httpClient, _logger);
|
||||
|
||||
await session.ConnectAsync(ct);
|
||||
|
||||
var newPooled = new PooledSession(session, httpClient);
|
||||
_sessions[key] = newPooled;
|
||||
|
||||
_logger.LogDebug("Created new WinRM session for {Key}", key);
|
||||
|
||||
return session;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Removes a session from the pool.
|
||||
/// </summary>
|
||||
public async Task RemoveSessionAsync(WinRmConnectionInfo connectionInfo, CancellationToken ct = default)
|
||||
{
|
||||
var key = connectionInfo.GetConnectionKey();
|
||||
|
||||
if (_sessions.TryRemove(key, out var pooled))
|
||||
{
|
||||
await pooled.DisposeAsync();
|
||||
_logger.LogDebug("Removed WinRM session for {Key}", key);
|
||||
}
|
||||
}
|
||||
|
||||
private HttpClient CreateHttpClient(WinRmConnectionInfo connectionInfo)
|
||||
{
|
||||
var client = _httpClientFactory.CreateClient("WinRM");
|
||||
client.Timeout = connectionInfo.Timeout;
|
||||
|
||||
// Set up authentication based on mechanism
|
||||
var credentials = CreateCredentials(connectionInfo);
|
||||
if (credentials != null)
|
||||
{
|
||||
// Note: In production, use HttpClientHandler with credentials
|
||||
// For Basic auth, set Authorization header directly
|
||||
if (connectionInfo.AuthMechanism == WinRmAuthMechanism.Basic)
|
||||
{
|
||||
var authValue = Convert.ToBase64String(
|
||||
System.Text.Encoding.UTF8.GetBytes(
|
||||
$"{connectionInfo.Username}:{connectionInfo.Password}"));
|
||||
client.DefaultRequestHeaders.Authorization =
|
||||
new System.Net.Http.Headers.AuthenticationHeaderValue("Basic", authValue);
|
||||
}
|
||||
}
|
||||
|
||||
return client;
|
||||
}
|
||||
|
||||
private static System.Net.NetworkCredential? CreateCredentials(WinRmConnectionInfo connectionInfo)
|
||||
{
|
||||
if (string.IsNullOrEmpty(connectionInfo.Password))
|
||||
return null;
|
||||
|
||||
return new System.Net.NetworkCredential(
|
||||
connectionInfo.Username,
|
||||
connectionInfo.Password,
|
||||
connectionInfo.Domain ?? string.Empty);
|
||||
}
|
||||
|
||||
private void CleanupIdleSessions(object? state)
|
||||
{
|
||||
if (_disposed)
|
||||
return;
|
||||
|
||||
foreach (var kvp in _sessions)
|
||||
{
|
||||
if (kvp.Value.IsExpired(_idleTimeout))
|
||||
{
|
||||
if (_sessions.TryRemove(kvp.Key, out var pooled))
|
||||
{
|
||||
_ = pooled.DisposeAsync();
|
||||
_logger.LogDebug("Cleaned up idle WinRM session for {Key}", kvp.Key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
if (_disposed)
|
||||
return;
|
||||
|
||||
_disposed = true;
|
||||
|
||||
await _cleanupTimer.DisposeAsync();
|
||||
|
||||
foreach (var kvp in _sessions)
|
||||
{
|
||||
await kvp.Value.DisposeAsync();
|
||||
}
|
||||
|
||||
_sessions.Clear();
|
||||
}
|
||||
|
||||
private sealed class PooledSession : IAsyncDisposable
|
||||
{
|
||||
private readonly HttpClient _httpClient;
|
||||
private DateTimeOffset _lastUsed;
|
||||
|
||||
public WinRmSession Session { get; }
|
||||
|
||||
public PooledSession(WinRmSession session, HttpClient httpClient)
|
||||
{
|
||||
Session = session;
|
||||
_httpClient = httpClient;
|
||||
_lastUsed = DateTimeOffset.UtcNow;
|
||||
}
|
||||
|
||||
public void Touch() => _lastUsed = DateTimeOffset.UtcNow;
|
||||
|
||||
public bool IsExpired(TimeSpan idleTimeout) =>
|
||||
DateTimeOffset.UtcNow - _lastUsed > idleTimeout;
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
await Session.CloseAsync();
|
||||
Session.Dispose();
|
||||
_httpClient.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,344 @@
|
||||
using System.Net;
|
||||
using System.Net.Http.Headers;
|
||||
using System.Text;
|
||||
using System.Xml.Linq;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace StellaOps.Agent.WinRM;
|
||||
|
||||
/// <summary>
|
||||
/// WinRM session for executing remote PowerShell commands.
|
||||
/// Uses WS-Management protocol over HTTP(S).
|
||||
/// </summary>
|
||||
public sealed class WinRmSession : IDisposable
|
||||
{
|
||||
private static readonly XNamespace WsMan = "http://schemas.dmtf.org/wbem/wsman/1/wsman.xsd";
|
||||
private static readonly XNamespace WsEnum = "http://schemas.xmlsoap.org/ws/2004/09/enumeration";
|
||||
private static readonly XNamespace WsAddr = "http://schemas.xmlsoap.org/ws/2004/08/addressing";
|
||||
private static readonly XNamespace WsShell = "http://schemas.microsoft.com/wbem/wsman/1/windows/shell";
|
||||
private static readonly XNamespace Soap = "http://www.w3.org/2003/05/soap-envelope";
|
||||
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly WinRmConnectionInfo _connectionInfo;
|
||||
private readonly ILogger _logger;
|
||||
private string? _shellId;
|
||||
private bool _disposed;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new WinRM session.
|
||||
/// </summary>
|
||||
public WinRmSession(WinRmConnectionInfo connectionInfo, HttpClient httpClient, ILogger logger)
|
||||
{
|
||||
_connectionInfo = connectionInfo;
|
||||
_httpClient = httpClient;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Connects to the WinRM service.
|
||||
/// </summary>
|
||||
public async Task ConnectAsync(CancellationToken ct = default)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
|
||||
_logger.LogDebug("Creating WinRM shell on {Host}", _connectionInfo.Host);
|
||||
|
||||
var envelope = CreateShellEnvelope();
|
||||
var response = await SendRequestAsync(envelope, ct);
|
||||
|
||||
// Parse shell ID from response
|
||||
_shellId = ExtractShellId(response);
|
||||
|
||||
_logger.LogDebug("WinRM shell created: {ShellId}", _shellId);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Executes a PowerShell command.
|
||||
/// </summary>
|
||||
public async Task<PowerShellResult> ExecuteAsync(string command, CancellationToken ct = default)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
|
||||
if (_shellId is null)
|
||||
{
|
||||
await ConnectAsync(ct);
|
||||
}
|
||||
|
||||
_logger.LogDebug("Executing PowerShell command on {Host}", _connectionInfo.Host);
|
||||
|
||||
// Send command
|
||||
var commandEnvelope = CreateCommandEnvelope(command);
|
||||
var commandResponse = await SendRequestAsync(commandEnvelope, ct);
|
||||
var commandId = ExtractCommandId(commandResponse);
|
||||
|
||||
// Receive output
|
||||
var (stdout, stderr, exitCode) = await ReceiveOutputAsync(commandId!, ct);
|
||||
|
||||
return new PowerShellResult
|
||||
{
|
||||
Stdout = stdout,
|
||||
Stderr = stderr,
|
||||
ExitCode = exitCode,
|
||||
HadErrors = exitCode != 0 || !string.IsNullOrEmpty(stderr)
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Closes the WinRM session.
|
||||
/// </summary>
|
||||
public async Task CloseAsync(CancellationToken ct = default)
|
||||
{
|
||||
if (_shellId is null)
|
||||
return;
|
||||
|
||||
try
|
||||
{
|
||||
var deleteEnvelope = CreateDeleteEnvelope();
|
||||
await SendRequestAsync(deleteEnvelope, ct);
|
||||
_logger.LogDebug("WinRM shell deleted: {ShellId}", _shellId);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Error deleting WinRM shell");
|
||||
}
|
||||
|
||||
_shellId = null;
|
||||
}
|
||||
|
||||
private async Task<string> SendRequestAsync(XDocument envelope, CancellationToken ct)
|
||||
{
|
||||
var content = new StringContent(envelope.ToString(), Encoding.UTF8, "application/soap+xml");
|
||||
content.Headers.ContentType = new MediaTypeHeaderValue("application/soap+xml")
|
||||
{
|
||||
CharSet = "UTF-8"
|
||||
};
|
||||
|
||||
var response = await _httpClient.PostAsync(_connectionInfo.GetEndpointUrl(), content, ct);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
var errorContent = await response.Content.ReadAsStringAsync(ct);
|
||||
throw new HttpRequestException($"WinRM request failed with status {response.StatusCode}: {errorContent}");
|
||||
}
|
||||
|
||||
return await response.Content.ReadAsStringAsync(ct);
|
||||
}
|
||||
|
||||
private XDocument CreateShellEnvelope()
|
||||
{
|
||||
var messageId = Guid.NewGuid().ToString();
|
||||
|
||||
return new XDocument(
|
||||
new XElement(Soap + "Envelope",
|
||||
new XAttribute(XNamespace.Xmlns + "s", Soap.NamespaceName),
|
||||
new XAttribute(XNamespace.Xmlns + "a", WsAddr.NamespaceName),
|
||||
new XAttribute(XNamespace.Xmlns + "w", WsMan.NamespaceName),
|
||||
new XAttribute(XNamespace.Xmlns + "rsp", WsShell.NamespaceName),
|
||||
new XElement(Soap + "Header",
|
||||
new XElement(WsAddr + "To", _connectionInfo.GetEndpointUrl()),
|
||||
new XElement(WsAddr + "ReplyTo",
|
||||
new XElement(WsAddr + "Address", "http://schemas.xmlsoap.org/ws/2004/08/addressing/role/anonymous")),
|
||||
new XElement(WsMan + "ResourceURI", "http://schemas.microsoft.com/wbem/wsman/1/windows/shell/cmd",
|
||||
new XAttribute(Soap + "mustUnderstand", "true")),
|
||||
new XElement(WsAddr + "Action", "http://schemas.xmlsoap.org/ws/2004/09/transfer/Create",
|
||||
new XAttribute(Soap + "mustUnderstand", "true")),
|
||||
new XElement(WsMan + "MaxEnvelopeSize", "512000",
|
||||
new XAttribute(Soap + "mustUnderstand", "true")),
|
||||
new XElement(WsAddr + "MessageID", $"uuid:{messageId}"),
|
||||
new XElement(WsMan + "OperationTimeout", "PT60S"),
|
||||
new XElement(WsMan + "OptionSet",
|
||||
new XElement(WsMan + "Option", "TRUE", new XAttribute("Name", "WINRS_NOPROFILE")),
|
||||
new XElement(WsMan + "Option", "437", new XAttribute("Name", "WINRS_CODEPAGE")))),
|
||||
new XElement(Soap + "Body",
|
||||
new XElement(WsShell + "Shell",
|
||||
new XElement(WsShell + "InputStreams", "stdin"),
|
||||
new XElement(WsShell + "OutputStreams", "stdout stderr")))));
|
||||
}
|
||||
|
||||
private XDocument CreateCommandEnvelope(string command)
|
||||
{
|
||||
var messageId = Guid.NewGuid().ToString();
|
||||
var encodedCommand = Convert.ToBase64String(Encoding.Unicode.GetBytes(command));
|
||||
|
||||
return new XDocument(
|
||||
new XElement(Soap + "Envelope",
|
||||
new XAttribute(XNamespace.Xmlns + "s", Soap.NamespaceName),
|
||||
new XAttribute(XNamespace.Xmlns + "a", WsAddr.NamespaceName),
|
||||
new XAttribute(XNamespace.Xmlns + "w", WsMan.NamespaceName),
|
||||
new XAttribute(XNamespace.Xmlns + "rsp", WsShell.NamespaceName),
|
||||
new XElement(Soap + "Header",
|
||||
new XElement(WsAddr + "To", _connectionInfo.GetEndpointUrl()),
|
||||
new XElement(WsMan + "ResourceURI", "http://schemas.microsoft.com/wbem/wsman/1/windows/shell/cmd",
|
||||
new XAttribute(Soap + "mustUnderstand", "true")),
|
||||
new XElement(WsAddr + "Action", "http://schemas.microsoft.com/wbem/wsman/1/windows/shell/Command",
|
||||
new XAttribute(Soap + "mustUnderstand", "true")),
|
||||
new XElement(WsMan + "MaxEnvelopeSize", "512000",
|
||||
new XAttribute(Soap + "mustUnderstand", "true")),
|
||||
new XElement(WsAddr + "MessageID", $"uuid:{messageId}"),
|
||||
new XElement(WsMan + "OperationTimeout", "PT60S"),
|
||||
new XElement(WsMan + "SelectorSet",
|
||||
new XElement(WsMan + "Selector", _shellId, new XAttribute("Name", "ShellId")))),
|
||||
new XElement(Soap + "Body",
|
||||
new XElement(WsShell + "CommandLine",
|
||||
new XElement(WsShell + "Command", $"powershell -encodedcommand {encodedCommand}")))));
|
||||
}
|
||||
|
||||
private XDocument CreateReceiveEnvelope(string commandId)
|
||||
{
|
||||
var messageId = Guid.NewGuid().ToString();
|
||||
|
||||
return new XDocument(
|
||||
new XElement(Soap + "Envelope",
|
||||
new XAttribute(XNamespace.Xmlns + "s", Soap.NamespaceName),
|
||||
new XAttribute(XNamespace.Xmlns + "a", WsAddr.NamespaceName),
|
||||
new XAttribute(XNamespace.Xmlns + "w", WsMan.NamespaceName),
|
||||
new XAttribute(XNamespace.Xmlns + "rsp", WsShell.NamespaceName),
|
||||
new XElement(Soap + "Header",
|
||||
new XElement(WsAddr + "To", _connectionInfo.GetEndpointUrl()),
|
||||
new XElement(WsMan + "ResourceURI", "http://schemas.microsoft.com/wbem/wsman/1/windows/shell/cmd",
|
||||
new XAttribute(Soap + "mustUnderstand", "true")),
|
||||
new XElement(WsAddr + "Action", "http://schemas.microsoft.com/wbem/wsman/1/windows/shell/Receive",
|
||||
new XAttribute(Soap + "mustUnderstand", "true")),
|
||||
new XElement(WsMan + "MaxEnvelopeSize", "512000",
|
||||
new XAttribute(Soap + "mustUnderstand", "true")),
|
||||
new XElement(WsAddr + "MessageID", $"uuid:{messageId}"),
|
||||
new XElement(WsMan + "OperationTimeout", "PT60S"),
|
||||
new XElement(WsMan + "SelectorSet",
|
||||
new XElement(WsMan + "Selector", _shellId, new XAttribute("Name", "ShellId")))),
|
||||
new XElement(Soap + "Body",
|
||||
new XElement(WsShell + "Receive",
|
||||
new XElement(WsShell + "DesiredStream", "stdout stderr",
|
||||
new XAttribute("CommandId", commandId))))));
|
||||
}
|
||||
|
||||
private XDocument CreateDeleteEnvelope()
|
||||
{
|
||||
var messageId = Guid.NewGuid().ToString();
|
||||
|
||||
return new XDocument(
|
||||
new XElement(Soap + "Envelope",
|
||||
new XAttribute(XNamespace.Xmlns + "s", Soap.NamespaceName),
|
||||
new XAttribute(XNamespace.Xmlns + "a", WsAddr.NamespaceName),
|
||||
new XAttribute(XNamespace.Xmlns + "w", WsMan.NamespaceName),
|
||||
new XElement(Soap + "Header",
|
||||
new XElement(WsAddr + "To", _connectionInfo.GetEndpointUrl()),
|
||||
new XElement(WsMan + "ResourceURI", "http://schemas.microsoft.com/wbem/wsman/1/windows/shell/cmd",
|
||||
new XAttribute(Soap + "mustUnderstand", "true")),
|
||||
new XElement(WsAddr + "Action", "http://schemas.xmlsoap.org/ws/2004/09/transfer/Delete",
|
||||
new XAttribute(Soap + "mustUnderstand", "true")),
|
||||
new XElement(WsMan + "MaxEnvelopeSize", "512000",
|
||||
new XAttribute(Soap + "mustUnderstand", "true")),
|
||||
new XElement(WsAddr + "MessageID", $"uuid:{messageId}"),
|
||||
new XElement(WsMan + "OperationTimeout", "PT60S"),
|
||||
new XElement(WsMan + "SelectorSet",
|
||||
new XElement(WsMan + "Selector", _shellId, new XAttribute("Name", "ShellId")))),
|
||||
new XElement(Soap + "Body")));
|
||||
}
|
||||
|
||||
private static string? ExtractShellId(string response)
|
||||
{
|
||||
var doc = XDocument.Parse(response);
|
||||
var shellId = doc.Descendants(WsShell + "Shell")
|
||||
.FirstOrDefault()?
|
||||
.Attribute("ShellId")?
|
||||
.Value;
|
||||
|
||||
// Also try getting from Selector
|
||||
shellId ??= doc.Descendants(WsMan + "Selector")
|
||||
.FirstOrDefault(e => e.Attribute("Name")?.Value == "ShellId")?
|
||||
.Value;
|
||||
|
||||
return shellId;
|
||||
}
|
||||
|
||||
private static string? ExtractCommandId(string response)
|
||||
{
|
||||
var doc = XDocument.Parse(response);
|
||||
return doc.Descendants(WsShell + "CommandId").FirstOrDefault()?.Value;
|
||||
}
|
||||
|
||||
private async Task<(string Stdout, string Stderr, int ExitCode)> ReceiveOutputAsync(string commandId, CancellationToken ct)
|
||||
{
|
||||
var stdout = new StringBuilder();
|
||||
var stderr = new StringBuilder();
|
||||
var exitCode = 0;
|
||||
var done = false;
|
||||
|
||||
while (!done)
|
||||
{
|
||||
var receiveEnvelope = CreateReceiveEnvelope(commandId);
|
||||
var response = await SendRequestAsync(receiveEnvelope, ct);
|
||||
var doc = XDocument.Parse(response);
|
||||
|
||||
// Extract output streams
|
||||
foreach (var stream in doc.Descendants(WsShell + "Stream"))
|
||||
{
|
||||
var name = stream.Attribute("Name")?.Value;
|
||||
var content = stream.Value;
|
||||
|
||||
if (!string.IsNullOrEmpty(content))
|
||||
{
|
||||
var decoded = Encoding.UTF8.GetString(Convert.FromBase64String(content));
|
||||
if (name == "stdout")
|
||||
stdout.Append(decoded);
|
||||
else if (name == "stderr")
|
||||
stderr.Append(decoded);
|
||||
}
|
||||
}
|
||||
|
||||
// Check for command state
|
||||
var commandState = doc.Descendants(WsShell + "CommandState").FirstOrDefault();
|
||||
if (commandState?.Attribute("State")?.Value?.EndsWith("Done") == true)
|
||||
{
|
||||
done = true;
|
||||
|
||||
// Extract exit code
|
||||
var exitCodeElement = commandState.Element(WsShell + "ExitCode");
|
||||
if (exitCodeElement != null && int.TryParse(exitCodeElement.Value, out var code))
|
||||
{
|
||||
exitCode = code;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (stdout.ToString(), stderr.ToString(), exitCode);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed)
|
||||
return;
|
||||
|
||||
_disposed = true;
|
||||
|
||||
// Note: CloseAsync should be called before disposal for proper cleanup
|
||||
_shellId = null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of PowerShell execution.
|
||||
/// </summary>
|
||||
public sealed record PowerShellResult
|
||||
{
|
||||
/// <summary>
|
||||
/// Standard output.
|
||||
/// </summary>
|
||||
public required string Stdout { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Standard error.
|
||||
/// </summary>
|
||||
public required string Stderr { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Exit code.
|
||||
/// </summary>
|
||||
public required int ExitCode { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether the execution had errors.
|
||||
/// </summary>
|
||||
public required bool HadErrors { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
using StellaOps.ReleaseOrchestrator.Agent.Models;
|
||||
|
||||
namespace StellaOps.ReleaseOrchestrator.Agent.Certificate;
|
||||
|
||||
/// <summary>
|
||||
/// Service for issuing and managing agent certificates.
|
||||
/// </summary>
|
||||
public interface IAgentCertificateService
|
||||
{
|
||||
/// <summary>
|
||||
/// Issues a new certificate for an agent.
|
||||
/// </summary>
|
||||
Task<AgentCertificate> IssueAsync(Models.Agent agent, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Renews an agent's certificate.
|
||||
/// </summary>
|
||||
Task<AgentCertificate> RenewAsync(Models.Agent agent, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Revokes an agent's certificate.
|
||||
/// </summary>
|
||||
Task RevokeAsync(Models.Agent agent, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Validates a certificate thumbprint.
|
||||
/// </summary>
|
||||
Task<bool> ValidateAsync(string thumbprint, CancellationToken ct = default);
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
using System.Security.Cryptography;
|
||||
using StellaOps.ReleaseOrchestrator.Agent.Models;
|
||||
using StellaOps.ReleaseOrchestrator.Agent.Store;
|
||||
|
||||
namespace StellaOps.ReleaseOrchestrator.Agent.Certificate;
|
||||
|
||||
/// <summary>
|
||||
/// Stub implementation of <see cref="IAgentCertificateService"/> for testing.
|
||||
/// Generates self-signed certificates without a real CA.
|
||||
/// </summary>
|
||||
public sealed class StubAgentCertificateService : IAgentCertificateService
|
||||
{
|
||||
private readonly IAgentStore _store;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private static readonly TimeSpan CertificateValidity = TimeSpan.FromHours(24);
|
||||
|
||||
public StubAgentCertificateService(IAgentStore store, TimeProvider timeProvider)
|
||||
{
|
||||
_store = store;
|
||||
_timeProvider = timeProvider;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentCertificate> IssueAsync(Models.Agent agent, CancellationToken ct = default)
|
||||
{
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
var notAfter = now.Add(CertificateValidity);
|
||||
|
||||
// Generate a stub thumbprint
|
||||
var thumbprintBytes = RandomNumberGenerator.GetBytes(32);
|
||||
var thumbprint = Convert.ToHexString(thumbprintBytes);
|
||||
|
||||
var certificate = new AgentCertificate
|
||||
{
|
||||
Thumbprint = thumbprint,
|
||||
SubjectName = $"CN={agent.Name}, O=StellaOps Agent, OU={agent.TenantId}",
|
||||
NotBefore = now,
|
||||
NotAfter = notAfter,
|
||||
CertificatePem = GenerateStubPem("CERTIFICATE"),
|
||||
PrivateKeyPem = GenerateStubPem("RSA PRIVATE KEY")
|
||||
};
|
||||
|
||||
// Update agent with new certificate
|
||||
await _store.UpdateCertificateAsync(
|
||||
agent.Id,
|
||||
certificate.Thumbprint,
|
||||
notAfter,
|
||||
ct);
|
||||
|
||||
return certificate;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentCertificate> RenewAsync(Models.Agent agent, CancellationToken ct = default)
|
||||
{
|
||||
// Clear old certificate
|
||||
if (!string.IsNullOrEmpty(agent.CertificateThumbprint))
|
||||
{
|
||||
await _store.ClearCertificateAsync(agent.Id, ct);
|
||||
}
|
||||
|
||||
// Issue new certificate
|
||||
return await IssueAsync(agent, ct);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task RevokeAsync(Models.Agent agent, CancellationToken ct = default)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(agent.CertificateThumbprint))
|
||||
{
|
||||
await _store.ClearCertificateAsync(agent.Id, ct);
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<bool> ValidateAsync(string thumbprint, CancellationToken ct = default)
|
||||
{
|
||||
// Stub implementation always returns true for non-empty thumbprints
|
||||
return Task.FromResult(!string.IsNullOrEmpty(thumbprint));
|
||||
}
|
||||
|
||||
private static string GenerateStubPem(string label)
|
||||
{
|
||||
var data = RandomNumberGenerator.GetBytes(64);
|
||||
var base64 = Convert.ToBase64String(data);
|
||||
return $"-----BEGIN {label}-----\n{base64}\n-----END {label}-----";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
namespace StellaOps.ReleaseOrchestrator.Agent.Exceptions;
|
||||
|
||||
/// <summary>
|
||||
/// Base exception for agent-related errors.
|
||||
/// </summary>
|
||||
public abstract class AgentException : Exception
|
||||
{
|
||||
protected AgentException(string message) : base(message) { }
|
||||
protected AgentException(string message, Exception innerException) : base(message, innerException) { }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when an agent with the specified name already exists.
|
||||
/// </summary>
|
||||
public sealed class AgentAlreadyExistsException : AgentException
|
||||
{
|
||||
public string AgentName { get; }
|
||||
|
||||
public AgentAlreadyExistsException(string agentName)
|
||||
: base($"Agent with name '{agentName}' already exists.")
|
||||
{
|
||||
AgentName = agentName;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when an agent is not found.
|
||||
/// </summary>
|
||||
public sealed class AgentNotFoundException : AgentException
|
||||
{
|
||||
public Guid AgentId { get; }
|
||||
|
||||
public AgentNotFoundException(Guid agentId)
|
||||
: base($"Agent with ID '{agentId}' was not found.")
|
||||
{
|
||||
AgentId = agentId;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when a registration token has already been used.
|
||||
/// </summary>
|
||||
public sealed class RegistrationTokenAlreadyUsedException : AgentException
|
||||
{
|
||||
public string Token { get; }
|
||||
|
||||
public RegistrationTokenAlreadyUsedException(string token)
|
||||
: base("Registration token has already been used.")
|
||||
{
|
||||
Token = token;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when a registration token has expired.
|
||||
/// </summary>
|
||||
public sealed class RegistrationTokenExpiredException : AgentException
|
||||
{
|
||||
public string Token { get; }
|
||||
|
||||
public RegistrationTokenExpiredException(string token)
|
||||
: base("Registration token has expired.")
|
||||
{
|
||||
Token = token;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when a registration token is not found.
|
||||
/// </summary>
|
||||
public sealed class RegistrationTokenNotFoundException : AgentException
|
||||
{
|
||||
public string Token { get; }
|
||||
|
||||
public RegistrationTokenNotFoundException(string token)
|
||||
: base("Registration token was not found.")
|
||||
{
|
||||
Token = token;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when an agent is in an invalid state for the requested operation.
|
||||
/// </summary>
|
||||
public sealed class InvalidAgentStateException : AgentException
|
||||
{
|
||||
public Guid AgentId { get; }
|
||||
public string CurrentState { get; }
|
||||
public string RequestedOperation { get; }
|
||||
|
||||
public InvalidAgentStateException(Guid agentId, string currentState, string requestedOperation)
|
||||
: base($"Agent '{agentId}' is in state '{currentState}' and cannot perform '{requestedOperation}'.")
|
||||
{
|
||||
AgentId = agentId;
|
||||
CurrentState = currentState;
|
||||
RequestedOperation = requestedOperation;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.ReleaseOrchestrator.Agent.Models;
|
||||
using StellaOps.ReleaseOrchestrator.Agent.Store;
|
||||
|
||||
namespace StellaOps.ReleaseOrchestrator.Agent.Heartbeat;
|
||||
|
||||
/// <summary>
|
||||
/// Processes heartbeats from agents.
|
||||
/// </summary>
|
||||
public sealed class HeartbeatProcessor : IHeartbeatProcessor
|
||||
{
|
||||
private readonly IAgentStore _store;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<HeartbeatProcessor> _logger;
|
||||
|
||||
public HeartbeatProcessor(
|
||||
IAgentStore store,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<HeartbeatProcessor> logger)
|
||||
{
|
||||
_store = store;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task ProcessAsync(AgentHeartbeat heartbeat, CancellationToken ct = default)
|
||||
{
|
||||
var agent = await _store.GetAsync(heartbeat.AgentId, ct);
|
||||
if (agent is null)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Received heartbeat from unknown agent {AgentId}",
|
||||
heartbeat.AgentId);
|
||||
return;
|
||||
}
|
||||
|
||||
if (agent.Status == AgentStatus.Revoked)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Received heartbeat from revoked agent {AgentName}",
|
||||
agent.Name);
|
||||
return;
|
||||
}
|
||||
|
||||
// Update last heartbeat
|
||||
await _store.UpdateHeartbeatAsync(
|
||||
heartbeat.AgentId,
|
||||
_timeProvider.GetUtcNow(),
|
||||
heartbeat.ResourceStatus,
|
||||
ct);
|
||||
|
||||
// If agent was stale, reactivate it
|
||||
if (agent.Status == AgentStatus.Stale)
|
||||
{
|
||||
await _store.UpdateStatusAsync(
|
||||
heartbeat.AgentId,
|
||||
AgentStatus.Active,
|
||||
ct);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Agent {AgentName} recovered from stale state",
|
||||
agent.Name);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.ReleaseOrchestrator.Agent.Manager;
|
||||
using StellaOps.ReleaseOrchestrator.Agent.Models;
|
||||
|
||||
namespace StellaOps.ReleaseOrchestrator.Agent.Heartbeat;
|
||||
|
||||
/// <summary>
|
||||
/// Background service that monitors agent heartbeats and marks stale agents.
|
||||
/// </summary>
|
||||
public sealed class HeartbeatTimeoutMonitor : IHostedService, IDisposable
|
||||
{
|
||||
private readonly IAgentManager _agentManager;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<HeartbeatTimeoutMonitor> _logger;
|
||||
private readonly TimeSpan _checkInterval;
|
||||
private readonly TimeSpan _heartbeatTimeout;
|
||||
private Timer? _timer;
|
||||
|
||||
public HeartbeatTimeoutMonitor(
|
||||
IAgentManager agentManager,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<HeartbeatTimeoutMonitor> logger,
|
||||
TimeSpan? checkInterval = null,
|
||||
TimeSpan? heartbeatTimeout = null)
|
||||
{
|
||||
_agentManager = agentManager;
|
||||
_timeProvider = timeProvider;
|
||||
_logger = logger;
|
||||
_checkInterval = checkInterval ?? TimeSpan.FromSeconds(30);
|
||||
_heartbeatTimeout = heartbeatTimeout ?? TimeSpan.FromMinutes(2);
|
||||
}
|
||||
|
||||
public Task StartAsync(CancellationToken ct)
|
||||
{
|
||||
_timer = new Timer(
|
||||
CheckForTimeouts,
|
||||
null,
|
||||
TimeSpan.FromMinutes(1),
|
||||
_checkInterval);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Heartbeat timeout monitor started (check interval: {CheckInterval}, timeout: {Timeout})",
|
||||
_checkInterval,
|
||||
_heartbeatTimeout);
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public Task StopAsync(CancellationToken ct)
|
||||
{
|
||||
_timer?.Change(Timeout.Infinite, 0);
|
||||
_logger.LogInformation("Heartbeat timeout monitor stopped");
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
private async void CheckForTimeouts(object? state)
|
||||
{
|
||||
try
|
||||
{
|
||||
var agents = await _agentManager.ListActiveAsync();
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
|
||||
foreach (var agent in agents)
|
||||
{
|
||||
if (agent.LastHeartbeatAt is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var timeSinceHeartbeat = now - agent.LastHeartbeatAt.Value;
|
||||
|
||||
if (timeSinceHeartbeat > _heartbeatTimeout)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Agent {AgentName} missed heartbeat (last: {LastHeartbeat})",
|
||||
agent.Name,
|
||||
agent.LastHeartbeatAt);
|
||||
|
||||
await _agentManager.MarkStaleAsync(agent.Id);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Heartbeat timeout check failed");
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose() => _timer?.Dispose();
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
using StellaOps.ReleaseOrchestrator.Agent.Models;
|
||||
|
||||
namespace StellaOps.ReleaseOrchestrator.Agent.Heartbeat;
|
||||
|
||||
/// <summary>
|
||||
/// Processor for agent heartbeats.
|
||||
/// </summary>
|
||||
public interface IHeartbeatProcessor
|
||||
{
|
||||
/// <summary>
|
||||
/// Processes a heartbeat from an agent.
|
||||
/// </summary>
|
||||
Task ProcessAsync(AgentHeartbeat heartbeat, CancellationToken ct = default);
|
||||
}
|
||||
@@ -0,0 +1,243 @@
|
||||
using System.Collections.Immutable;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.ReleaseOrchestrator.Agent.Certificate;
|
||||
using StellaOps.ReleaseOrchestrator.Agent.Exceptions;
|
||||
using StellaOps.ReleaseOrchestrator.Agent.Heartbeat;
|
||||
using StellaOps.ReleaseOrchestrator.Agent.Models;
|
||||
using StellaOps.ReleaseOrchestrator.Agent.Registration;
|
||||
using StellaOps.ReleaseOrchestrator.Agent.Store;
|
||||
|
||||
namespace StellaOps.ReleaseOrchestrator.Agent.Manager;
|
||||
|
||||
/// <summary>
|
||||
/// Manager for agent lifecycle operations.
|
||||
/// </summary>
|
||||
public sealed class AgentManager : IAgentManager
|
||||
{
|
||||
private readonly IAgentStore _store;
|
||||
private readonly RegistrationTokenService _tokenService;
|
||||
private readonly IAgentCertificateService _certificateService;
|
||||
private readonly IHeartbeatProcessor _heartbeatProcessor;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly Func<Guid> _guidGenerator;
|
||||
private readonly Func<Guid> _tenantIdProvider;
|
||||
private readonly ILogger<AgentManager> _logger;
|
||||
|
||||
public AgentManager(
|
||||
IAgentStore store,
|
||||
RegistrationTokenService tokenService,
|
||||
IAgentCertificateService certificateService,
|
||||
IHeartbeatProcessor heartbeatProcessor,
|
||||
TimeProvider timeProvider,
|
||||
Func<Guid> guidGenerator,
|
||||
Func<Guid> tenantIdProvider,
|
||||
ILogger<AgentManager> logger)
|
||||
{
|
||||
_store = store;
|
||||
_tokenService = tokenService;
|
||||
_certificateService = certificateService;
|
||||
_heartbeatProcessor = heartbeatProcessor;
|
||||
_timeProvider = timeProvider;
|
||||
_guidGenerator = guidGenerator;
|
||||
_tenantIdProvider = tenantIdProvider;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<RegistrationToken> CreateRegistrationTokenAsync(
|
||||
CreateRegistrationTokenRequest request,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
return _tokenService.CreateAsync(request, ct);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentRegistrationResult> RegisterAsync(
|
||||
AgentRegistrationRequest request,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
// Validate and consume token
|
||||
var token = await _tokenService.ValidateAndConsumeAsync(request.Token, ct);
|
||||
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
|
||||
// Create the agent
|
||||
var agent = new Models.Agent
|
||||
{
|
||||
Id = _guidGenerator(),
|
||||
TenantId = token.TenantId,
|
||||
Name = token.AgentName,
|
||||
DisplayName = token.DisplayName,
|
||||
Version = request.AgentVersion,
|
||||
Hostname = request.Hostname,
|
||||
Status = AgentStatus.Active,
|
||||
Capabilities = token.Capabilities,
|
||||
Labels = request.Labels.ToImmutableDictionary(),
|
||||
CertificateThumbprint = null,
|
||||
CertificateExpiresAt = null,
|
||||
LastHeartbeatAt = now,
|
||||
LastResourceStatus = null,
|
||||
RegisteredAt = now,
|
||||
CreatedAt = now,
|
||||
UpdatedAt = now
|
||||
};
|
||||
|
||||
await _store.SaveAsync(agent, ct);
|
||||
|
||||
// Issue certificate
|
||||
var certificate = await _certificateService.IssueAsync(agent, ct);
|
||||
|
||||
// Reload agent with certificate info
|
||||
agent = await _store.GetAsync(agent.Id, ct);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Agent {AgentName} registered successfully",
|
||||
agent!.Name);
|
||||
|
||||
return new AgentRegistrationResult(agent, certificate);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<Models.Agent?> GetAsync(Guid id, CancellationToken ct = default)
|
||||
{
|
||||
return _store.GetAsync(id, ct);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<Models.Agent?> GetByNameAsync(string name, CancellationToken ct = default)
|
||||
{
|
||||
var tenantId = _tenantIdProvider();
|
||||
return _store.GetByNameAsync(tenantId, name, ct);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IReadOnlyList<Models.Agent>> ListAsync(AgentFilter? filter = null, CancellationToken ct = default)
|
||||
{
|
||||
var tenantId = _tenantIdProvider();
|
||||
return _store.ListAsync(tenantId, filter, ct);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IReadOnlyList<Models.Agent>> ListActiveAsync(CancellationToken ct = default)
|
||||
{
|
||||
var tenantId = _tenantIdProvider();
|
||||
return _store.ListAsync(tenantId, new AgentFilter(Status: AgentStatus.Active), ct);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task ActivateAsync(Guid id, CancellationToken ct = default)
|
||||
{
|
||||
var agent = await GetAgentOrThrowAsync(id, ct);
|
||||
|
||||
if (agent.Status == AgentStatus.Revoked)
|
||||
{
|
||||
throw new InvalidAgentStateException(id, agent.Status.ToString(), "activate");
|
||||
}
|
||||
|
||||
await _store.UpdateStatusAsync(id, AgentStatus.Active, ct);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Agent {AgentName} activated",
|
||||
agent.Name);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task DeactivateAsync(Guid id, CancellationToken ct = default)
|
||||
{
|
||||
var agent = await GetAgentOrThrowAsync(id, ct);
|
||||
|
||||
if (agent.Status == AgentStatus.Revoked)
|
||||
{
|
||||
throw new InvalidAgentStateException(id, agent.Status.ToString(), "deactivate");
|
||||
}
|
||||
|
||||
await _store.UpdateStatusAsync(id, AgentStatus.Inactive, ct);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Agent {AgentName} deactivated",
|
||||
agent.Name);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task RevokeAsync(Guid id, string reason, CancellationToken ct = default)
|
||||
{
|
||||
var agent = await GetAgentOrThrowAsync(id, ct);
|
||||
|
||||
// Revoke certificate
|
||||
await _certificateService.RevokeAsync(agent, ct);
|
||||
|
||||
// Update status
|
||||
await _store.UpdateStatusAsync(id, AgentStatus.Revoked, ct);
|
||||
|
||||
_logger.LogWarning(
|
||||
"Agent {AgentName} revoked: {Reason}",
|
||||
agent.Name,
|
||||
reason);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task MarkStaleAsync(Guid id, CancellationToken ct = default)
|
||||
{
|
||||
var agent = await _store.GetAsync(id, ct);
|
||||
if (agent is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (agent.Status != AgentStatus.Active)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
await _store.UpdateStatusAsync(id, AgentStatus.Stale, ct);
|
||||
|
||||
_logger.LogWarning(
|
||||
"Agent {AgentName} marked as stale",
|
||||
agent.Name);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task ProcessHeartbeatAsync(AgentHeartbeat heartbeat, CancellationToken ct = default)
|
||||
{
|
||||
return _heartbeatProcessor.ProcessAsync(heartbeat, ct);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<AgentCertificate> RenewCertificateAsync(Guid id, CancellationToken ct = default)
|
||||
{
|
||||
var agent = await GetAgentOrThrowAsync(id, ct);
|
||||
|
||||
if (agent.Status == AgentStatus.Revoked)
|
||||
{
|
||||
throw new InvalidAgentStateException(id, agent.Status.ToString(), "renew certificate");
|
||||
}
|
||||
|
||||
var certificate = await _certificateService.RenewAsync(agent, ct);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Certificate renewed for agent {AgentName}",
|
||||
agent.Name);
|
||||
|
||||
return certificate;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<TaskResult> ExecuteTaskAsync(
|
||||
Guid agentId,
|
||||
AgentTask task,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
// Stub implementation - task execution will be implemented in a later sprint
|
||||
throw new NotImplementedException("Task execution not yet implemented. See sprint 103_005.");
|
||||
}
|
||||
|
||||
private async Task<Models.Agent> GetAgentOrThrowAsync(Guid id, CancellationToken ct)
|
||||
{
|
||||
var agent = await _store.GetAsync(id, ct);
|
||||
if (agent is null)
|
||||
{
|
||||
throw new AgentNotFoundException(id);
|
||||
}
|
||||
return agent;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
using StellaOps.ReleaseOrchestrator.Agent.Models;
|
||||
using StellaOps.ReleaseOrchestrator.Agent.Registration;
|
||||
|
||||
namespace StellaOps.ReleaseOrchestrator.Agent.Manager;
|
||||
|
||||
/// <summary>
|
||||
/// Manager for agent lifecycle operations.
|
||||
/// </summary>
|
||||
public interface IAgentManager
|
||||
{
|
||||
// Registration
|
||||
Task<RegistrationToken> CreateRegistrationTokenAsync(
|
||||
CreateRegistrationTokenRequest request,
|
||||
CancellationToken ct = default);
|
||||
|
||||
Task<AgentRegistrationResult> RegisterAsync(
|
||||
AgentRegistrationRequest request,
|
||||
CancellationToken ct = default);
|
||||
|
||||
// Lifecycle
|
||||
Task<Models.Agent?> GetAsync(Guid id, CancellationToken ct = default);
|
||||
Task<Models.Agent?> GetByNameAsync(string name, CancellationToken ct = default);
|
||||
Task<IReadOnlyList<Models.Agent>> ListAsync(AgentFilter? filter = null, CancellationToken ct = default);
|
||||
Task<IReadOnlyList<Models.Agent>> ListActiveAsync(CancellationToken ct = default);
|
||||
Task ActivateAsync(Guid id, CancellationToken ct = default);
|
||||
Task DeactivateAsync(Guid id, CancellationToken ct = default);
|
||||
Task RevokeAsync(Guid id, string reason, CancellationToken ct = default);
|
||||
Task MarkStaleAsync(Guid id, CancellationToken ct = default);
|
||||
|
||||
// Heartbeat
|
||||
Task ProcessHeartbeatAsync(AgentHeartbeat heartbeat, CancellationToken ct = default);
|
||||
|
||||
// Certificate
|
||||
Task<AgentCertificate> RenewCertificateAsync(Guid id, CancellationToken ct = default);
|
||||
|
||||
// Task execution
|
||||
Task<TaskResult> ExecuteTaskAsync(
|
||||
Guid agentId,
|
||||
AgentTask task,
|
||||
CancellationToken ct = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Request to register an agent.
|
||||
/// </summary>
|
||||
public sealed record AgentRegistrationRequest(
|
||||
string Token,
|
||||
string AgentVersion,
|
||||
string Hostname,
|
||||
IReadOnlyDictionary<string, string> Labels);
|
||||
|
||||
/// <summary>
|
||||
/// Result of agent registration.
|
||||
/// </summary>
|
||||
public sealed record AgentRegistrationResult(
|
||||
Models.Agent Agent,
|
||||
AgentCertificate Certificate);
|
||||
@@ -0,0 +1,164 @@
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.ReleaseOrchestrator.Agent.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a deployment agent.
|
||||
/// </summary>
|
||||
public sealed record Agent
|
||||
{
|
||||
/// <summary>
|
||||
/// Unique identifier for the agent.
|
||||
/// </summary>
|
||||
public required Guid Id { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Tenant this agent belongs to.
|
||||
/// </summary>
|
||||
public required Guid TenantId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Unique name for the agent.
|
||||
/// </summary>
|
||||
public required string Name { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Display name for UI.
|
||||
/// </summary>
|
||||
public required string DisplayName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Agent version.
|
||||
/// </summary>
|
||||
public required string Version { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Hostname where the agent runs.
|
||||
/// </summary>
|
||||
public string? Hostname { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Current status of the agent.
|
||||
/// </summary>
|
||||
public required AgentStatus Status { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Capabilities this agent supports.
|
||||
/// </summary>
|
||||
public required ImmutableArray<AgentCapability> Capabilities { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Labels for agent selection.
|
||||
/// </summary>
|
||||
public ImmutableDictionary<string, string> Labels { get; init; } = ImmutableDictionary<string, string>.Empty;
|
||||
|
||||
/// <summary>
|
||||
/// Certificate thumbprint for mTLS.
|
||||
/// </summary>
|
||||
public string? CertificateThumbprint { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the certificate expires.
|
||||
/// </summary>
|
||||
public DateTimeOffset? CertificateExpiresAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the last heartbeat was received.
|
||||
/// </summary>
|
||||
public DateTimeOffset? LastHeartbeatAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Last reported resource status.
|
||||
/// </summary>
|
||||
public AgentResourceStatus? LastResourceStatus { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the agent was registered.
|
||||
/// </summary>
|
||||
public DateTimeOffset? RegisteredAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the agent record was created.
|
||||
/// </summary>
|
||||
public DateTimeOffset CreatedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the agent record was last updated.
|
||||
/// </summary>
|
||||
public DateTimeOffset UpdatedAt { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Status of an agent.
|
||||
/// </summary>
|
||||
public enum AgentStatus
|
||||
{
|
||||
/// <summary>
|
||||
/// Token created, not yet registered.
|
||||
/// </summary>
|
||||
Pending = 0,
|
||||
|
||||
/// <summary>
|
||||
/// Registered and healthy.
|
||||
/// </summary>
|
||||
Active = 1,
|
||||
|
||||
/// <summary>
|
||||
/// Manually deactivated.
|
||||
/// </summary>
|
||||
Inactive = 2,
|
||||
|
||||
/// <summary>
|
||||
/// Missed heartbeats.
|
||||
/// </summary>
|
||||
Stale = 3,
|
||||
|
||||
/// <summary>
|
||||
/// Permanently disabled.
|
||||
/// </summary>
|
||||
Revoked = 4
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Capabilities an agent can have.
|
||||
/// </summary>
|
||||
public enum AgentCapability
|
||||
{
|
||||
/// <summary>
|
||||
/// Docker Engine support.
|
||||
/// </summary>
|
||||
Docker = 0,
|
||||
|
||||
/// <summary>
|
||||
/// Docker Compose support.
|
||||
/// </summary>
|
||||
Compose = 1,
|
||||
|
||||
/// <summary>
|
||||
/// SSH support.
|
||||
/// </summary>
|
||||
Ssh = 2,
|
||||
|
||||
/// <summary>
|
||||
/// WinRM support.
|
||||
/// </summary>
|
||||
WinRm = 3
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resource status reported by an agent.
|
||||
/// </summary>
|
||||
public sealed record AgentResourceStatus(
|
||||
double CpuPercent,
|
||||
long MemoryUsedBytes,
|
||||
long MemoryTotalBytes,
|
||||
long DiskUsedBytes,
|
||||
long DiskTotalBytes);
|
||||
|
||||
/// <summary>
|
||||
/// Filter for listing agents.
|
||||
/// </summary>
|
||||
public sealed record AgentFilter(
|
||||
AgentStatus? Status = null,
|
||||
AgentCapability? Capability = null,
|
||||
IReadOnlyDictionary<string, string>? Labels = null);
|
||||
@@ -0,0 +1,37 @@
|
||||
namespace StellaOps.ReleaseOrchestrator.Agent.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Certificate issued to an agent for mTLS authentication.
|
||||
/// </summary>
|
||||
public sealed record AgentCertificate
|
||||
{
|
||||
/// <summary>
|
||||
/// Certificate thumbprint (SHA-256).
|
||||
/// </summary>
|
||||
public required string Thumbprint { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Subject name from the certificate.
|
||||
/// </summary>
|
||||
public required string SubjectName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Certificate validity start.
|
||||
/// </summary>
|
||||
public required DateTimeOffset NotBefore { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Certificate validity end.
|
||||
/// </summary>
|
||||
public required DateTimeOffset NotAfter { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// PEM-encoded certificate.
|
||||
/// </summary>
|
||||
public required string CertificatePem { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// PEM-encoded private key (only returned during issuance).
|
||||
/// </summary>
|
||||
public required string PrivateKeyPem { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
namespace StellaOps.ReleaseOrchestrator.Agent.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Heartbeat message sent by an agent.
|
||||
/// </summary>
|
||||
public sealed record AgentHeartbeat(
|
||||
Guid AgentId,
|
||||
string Version,
|
||||
AgentResourceStatus ResourceStatus,
|
||||
IReadOnlyList<string> RunningTasks,
|
||||
DateTimeOffset Timestamp);
|
||||
@@ -0,0 +1,67 @@
|
||||
namespace StellaOps.ReleaseOrchestrator.Agent.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Base class for tasks that can be executed by agents.
|
||||
/// </summary>
|
||||
public abstract record AgentTask
|
||||
{
|
||||
/// <summary>
|
||||
/// Task identifier.
|
||||
/// </summary>
|
||||
public required Guid Id { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Task type discriminator.
|
||||
/// </summary>
|
||||
public abstract string TaskType { get; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Health check task for a target.
|
||||
/// </summary>
|
||||
public sealed record HealthCheckTask : AgentTask
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public override string TaskType => "health_check";
|
||||
|
||||
/// <summary>
|
||||
/// Target to check.
|
||||
/// </summary>
|
||||
public required Guid TargetId { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of task execution.
|
||||
/// </summary>
|
||||
public sealed record TaskResult
|
||||
{
|
||||
/// <summary>
|
||||
/// Task that was executed.
|
||||
/// </summary>
|
||||
public required Guid TaskId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether the task succeeded.
|
||||
/// </summary>
|
||||
public required bool Success { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Result message.
|
||||
/// </summary>
|
||||
public string? Message { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Result data (JSON).
|
||||
/// </summary>
|
||||
public string? ResultData { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Task execution duration.
|
||||
/// </summary>
|
||||
public TimeSpan Duration { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the task completed.
|
||||
/// </summary>
|
||||
public DateTimeOffset CompletedAt { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,111 @@
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.ReleaseOrchestrator.Agent.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Agent task for deploying containers to a target.
|
||||
/// </summary>
|
||||
public sealed record DeploymentAgentTask : AgentTask
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public override string TaskType => DeployType switch
|
||||
{
|
||||
DeploymentTaskType.DockerDeploy => "docker_deploy",
|
||||
DeploymentTaskType.ComposeDeploy => "compose_deploy",
|
||||
DeploymentTaskType.DockerRollback => "docker_rollback",
|
||||
DeploymentTaskType.ComposeRollback => "compose_rollback",
|
||||
_ => "unknown"
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Type of deployment operation.
|
||||
/// </summary>
|
||||
public required DeploymentTaskType DeployType { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Release ID being deployed.
|
||||
/// </summary>
|
||||
public required Guid ReleaseId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Release name for logging.
|
||||
/// </summary>
|
||||
public required string ReleaseName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Components to deploy.
|
||||
/// </summary>
|
||||
public required ImmutableArray<AgentDeploymentComponent> Components { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Compose lock content (for compose deployments).
|
||||
/// </summary>
|
||||
public string? ComposeLock { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Version sticker content.
|
||||
/// </summary>
|
||||
public string? VersionSticker { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Deployment variables.
|
||||
/// </summary>
|
||||
public ImmutableDictionary<string, string> Variables { get; init; } = ImmutableDictionary<string, string>.Empty;
|
||||
|
||||
/// <summary>
|
||||
/// Timeout for the deployment operation.
|
||||
/// </summary>
|
||||
public TimeSpan Timeout { get; init; } = TimeSpan.FromMinutes(30);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Types of deployment operations.
|
||||
/// </summary>
|
||||
public enum DeploymentTaskType
|
||||
{
|
||||
/// <summary>
|
||||
/// Deploy using Docker Engine.
|
||||
/// </summary>
|
||||
DockerDeploy = 0,
|
||||
|
||||
/// <summary>
|
||||
/// Deploy using Docker Compose.
|
||||
/// </summary>
|
||||
ComposeDeploy = 1,
|
||||
|
||||
/// <summary>
|
||||
/// Rollback using Docker Engine.
|
||||
/// </summary>
|
||||
DockerRollback = 2,
|
||||
|
||||
/// <summary>
|
||||
/// Rollback using Docker Compose.
|
||||
/// </summary>
|
||||
ComposeRollback = 3
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Component to deploy via agent.
|
||||
/// </summary>
|
||||
public sealed record AgentDeploymentComponent
|
||||
{
|
||||
/// <summary>
|
||||
/// Component name.
|
||||
/// </summary>
|
||||
public required string Name { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Full image reference with digest.
|
||||
/// </summary>
|
||||
public required string Image { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Image digest.
|
||||
/// </summary>
|
||||
public required string Digest { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Component-specific configuration.
|
||||
/// </summary>
|
||||
public ImmutableDictionary<string, string> Config { get; init; } = ImmutableDictionary<string, string>.Empty;
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.ReleaseOrchestrator.Agent.Models;
|
||||
|
||||
/// <summary>
|
||||
/// One-time registration token for agent registration.
|
||||
/// </summary>
|
||||
public sealed record RegistrationToken
|
||||
{
|
||||
/// <summary>
|
||||
/// Unique identifier for the token.
|
||||
/// </summary>
|
||||
public required Guid Id { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Tenant this token belongs to.
|
||||
/// </summary>
|
||||
public required Guid TenantId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// The secure token value.
|
||||
/// </summary>
|
||||
public required string Token { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Intended agent name.
|
||||
/// </summary>
|
||||
public required string AgentName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Intended display name.
|
||||
/// </summary>
|
||||
public required string DisplayName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Capabilities the agent will have.
|
||||
/// </summary>
|
||||
public required ImmutableArray<AgentCapability> Capabilities { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the token expires.
|
||||
/// </summary>
|
||||
public required DateTimeOffset ExpiresAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When the token was created.
|
||||
/// </summary>
|
||||
public required DateTimeOffset CreatedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether the token has been used.
|
||||
/// </summary>
|
||||
public required bool IsUsed { get; init; }
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user