21 KiB
21 KiB
AGENTS: Deployment Agents
Purpose: Lightweight deployment agents for target execution.
Agent Types
| Agent Type | Transport | Target Types |
|---|---|---|
agent-docker |
gRPC | Docker hosts |
agent-compose |
gRPC | Docker Compose hosts |
agent-ssh |
SSH | Linux remote hosts |
agent-winrm |
WinRM | Windows remote hosts |
agent-ecs |
AWS API | AWS ECS services |
agent-nomad |
Nomad API | HashiCorp Nomad jobs |
Modules
Module: agent-core
| Aspect | Specification |
|---|---|
| Responsibility | Shared agent runtime; task execution framework |
| Protocol | gRPC for communication with Stella Core |
| Security | mTLS authentication; short-lived JWT for tasks |
Agent Lifecycle:
- Agent starts with registration token
- Agent registers with capabilities and labels
- Agent sends heartbeats (default: 30s interval)
- Agent receives tasks from Stella Core
- Agent reports task completion/failure
Agent Task Protocol:
// Task assignment (Core → Agent)
interface AgentTask {
id: UUID;
type: TaskType;
targetId: UUID;
payload: TaskPayload;
credentials: EncryptedCredentials;
timeout: number;
priority: TaskPriority;
idempotencyKey: string;
assignedAt: DateTime;
expiresAt: DateTime;
}
type TaskType =
| "deploy"
| "rollback"
| "health-check"
| "inspect"
| "execute-command"
| "upload-files"
| "write-sticker"
| "read-sticker";
interface DeployTaskPayload {
image: string;
digest: string;
config: DeployConfig;
artifacts: ArtifactReference[];
previousDigest?: string;
hooks: {
preDeploy?: HookConfig;
postDeploy?: HookConfig;
};
}
// Task result (Agent → Core)
interface TaskResult {
taskId: UUID;
success: boolean;
startedAt: DateTime;
completedAt: DateTime;
// Success details
outputs?: Record<string, any>;
artifacts?: ArtifactReference[];
// Failure details
error?: string;
errorType?: string;
retriable?: boolean;
// Logs
logs: string;
// Metrics
metrics: {
pullDurationMs?: number;
deployDurationMs?: number;
healthCheckDurationMs?: number;
};
}
Module: agent-docker
| Aspect | Specification |
|---|---|
| Responsibility | Docker container deployment |
| Dependencies | Docker Engine API |
| Capabilities | docker.deploy, docker.rollback, docker.inspect |
Docker Agent Implementation:
class DockerAgent implements TargetExecutor {
private docker: Docker;
async deploy(task: DeployTaskPayload): Promise<DeployResult> {
const { image, digest, config, previousDigest } = task;
const containerName = config.containerName;
// 1. Pull image and verify digest
this.log(`Pulling image ${image}@${digest}`);
await this.docker.pull(image, { digest });
const pulledDigest = await this.getImageDigest(image);
if (pulledDigest !== digest) {
throw new DigestMismatchError(
`Expected digest ${digest}, got ${pulledDigest}. Possible tampering detected.`
);
}
// 2. Run pre-deploy hook
if (task.hooks?.preDeploy) {
await this.runHook(task.hooks.preDeploy, "pre-deploy");
}
// 3. Stop and rename existing container
const existingContainer = await this.findContainer(containerName);
if (existingContainer) {
this.log(`Stopping existing container ${containerName}`);
await existingContainer.stop({ t: 10 });
await existingContainer.rename(`${containerName}-previous-${Date.now()}`);
}
// 4. Create new container
this.log(`Creating container ${containerName} from ${image}@${digest}`);
const container = await this.docker.createContainer({
name: containerName,
Image: `${image}@${digest}`, // Always use digest, not tag
Env: this.buildEnvVars(config.environment),
HostConfig: {
PortBindings: this.buildPortBindings(config.ports),
Binds: this.buildBindMounts(config.volumes),
RestartPolicy: { Name: config.restartPolicy || "unless-stopped" },
Memory: config.memoryLimit,
CpuQuota: config.cpuLimit,
},
Labels: {
"stella.release.id": config.releaseId,
"stella.release.name": config.releaseName,
"stella.digest": digest,
"stella.deployed.at": new Date().toISOString(),
},
});
// 5. Start container
this.log(`Starting container ${containerName}`);
await container.start();
// 6. Wait for container to be healthy
if (config.healthCheck) {
this.log(`Waiting for container health check`);
const healthy = await this.waitForHealthy(container, config.healthCheck.timeout);
if (!healthy) {
await this.rollbackContainer(containerName, existingContainer);
throw new HealthCheckFailedError(`Container ${containerName} failed health check`);
}
}
// 7. Run post-deploy hook
if (task.hooks?.postDeploy) {
await this.runHook(task.hooks.postDeploy, "post-deploy");
}
// 8. Cleanup previous container
if (existingContainer && config.cleanupPrevious !== false) {
this.log(`Removing previous container`);
await existingContainer.remove({ force: true });
}
return {
success: true,
containerId: container.id,
previousDigest: previousDigest,
};
}
async rollback(task: RollbackTaskPayload): Promise<DeployResult> {
const { containerName, targetDigest } = task;
if (targetDigest) {
// Deploy specific digest
return this.deploy({ ...task, digest: targetDigest });
}
// Find and restore previous container
const previousContainer = await this.findContainer(`${containerName}-previous-*`);
if (!previousContainer) {
throw new RollbackError(`No previous container found for ${containerName}`);
}
const currentContainer = await this.findContainer(containerName);
if (currentContainer) {
await currentContainer.stop({ t: 10 });
await currentContainer.rename(`${containerName}-failed-${Date.now()}`);
}
await previousContainer.rename(containerName);
await previousContainer.start();
return { success: true, containerId: previousContainer.id };
}
async writeSticker(sticker: VersionSticker): Promise<void> {
const stickerPath = this.config.stickerPath || "/var/stella/version.json";
const stickerContent = JSON.stringify(sticker, null, 2);
if (this.config.stickerLocation === "volume") {
await this.docker.run("alpine", [
"sh", "-c",
`echo '${stickerContent}' > ${stickerPath}`
], {
HostConfig: { Binds: [`${this.config.stickerVolume}:/var/stella`] }
});
} else {
fs.writeFileSync(stickerPath, stickerContent);
}
}
}
Module: agent-compose
| Aspect | Specification |
|---|---|
| Responsibility | Docker Compose stack deployment |
| Dependencies | Docker Compose CLI |
| Capabilities | compose.deploy, compose.rollback, compose.inspect |
Compose Agent Implementation:
class ComposeAgent implements TargetExecutor {
async deploy(task: DeployTaskPayload): Promise<DeployResult> {
const { artifacts, config } = task;
const deployDir = config.deploymentDirectory;
// 1. Write compose lock file
const composeLock = artifacts.find(a => a.type === "compose_lock");
const composeContent = await this.fetchArtifact(composeLock);
const composePath = path.join(deployDir, "compose.stella.lock.yml");
await fs.writeFile(composePath, composeContent);
// 2. Run pre-deploy hook
if (task.hooks?.preDeploy) {
await this.runHook(task.hooks.preDeploy, deployDir);
}
// 3. Pull images
this.log("Pulling images...");
await this.runCompose(deployDir, ["pull"]);
// 4. Verify digests
await this.verifyDigests(composePath, config.expectedDigests);
// 5. Deploy
this.log("Deploying services...");
await this.runCompose(deployDir, ["up", "-d", "--remove-orphans", "--force-recreate"]);
// 6. Wait for services to be healthy
if (config.healthCheck) {
const healthy = await this.waitForServicesHealthy(deployDir, config.healthCheck.timeout);
if (!healthy) {
await this.rollbackToBackup(deployDir);
throw new HealthCheckFailedError("Services failed health check");
}
}
// 7. Run post-deploy hook
if (task.hooks?.postDeploy) {
await this.runHook(task.hooks.postDeploy, deployDir);
}
// 8. Write version sticker
await this.writeSticker(config.sticker, deployDir);
return { success: true };
}
private async verifyDigests(
composePath: string,
expectedDigests: Record<string, string>
): Promise<void> {
const composeContent = yaml.parse(await fs.readFile(composePath, "utf-8"));
for (const [service, expectedDigest] of Object.entries(expectedDigests)) {
const serviceConfig = composeContent.services[service];
if (!serviceConfig) {
throw new Error(`Service ${service} not found in compose file`);
}
const image = serviceConfig.image;
if (!image.includes("@sha256:")) {
throw new Error(`Service ${service} image not pinned to digest: ${image}`);
}
const actualDigest = image.split("@")[1];
if (actualDigest !== expectedDigest) {
throw new DigestMismatchError(
`Service ${service}: expected ${expectedDigest}, got ${actualDigest}`
);
}
}
}
}
Module: agent-ssh
| Aspect | Specification |
|---|---|
| Responsibility | SSH remote execution (agentless) |
| Dependencies | SSH client library |
| Capabilities | ssh.deploy, ssh.execute, ssh.upload |
SSH Remote Executor:
class SSHRemoteExecutor implements TargetExecutor {
async connect(config: SSHConnectionConfig): Promise<void> {
const privateKey = await this.secrets.getSecret(config.privateKeyRef);
this.ssh = new SSHClient();
await this.ssh.connect({
host: config.host,
port: config.port || 22,
username: config.username,
privateKey: privateKey.value,
readyTimeout: config.connectionTimeout || 30000,
});
}
async deploy(task: DeployTaskPayload): Promise<DeployResult> {
const { artifacts, config } = task;
const deployDir = config.deploymentDirectory;
try {
// 1. Ensure deployment directory exists
await this.exec(`mkdir -p ${deployDir}`);
await this.exec(`mkdir -p ${deployDir}/.stella-backup`);
// 2. Backup current deployment
await this.exec(`cp -r ${deployDir}/* ${deployDir}/.stella-backup/ 2>/dev/null || true`);
// 3. Upload artifacts
for (const artifact of artifacts) {
const content = await this.fetchArtifact(artifact);
const remotePath = path.join(deployDir, artifact.name);
await this.uploadFile(content, remotePath);
}
// 4. Run pre-deploy hook
if (task.hooks?.preDeploy) {
await this.runRemoteHook(task.hooks.preDeploy, deployDir);
}
// 5. Execute deployment script
const deployScript = artifacts.find(a => a.type === "deploy_script");
if (deployScript) {
const scriptPath = path.join(deployDir, deployScript.name);
await this.exec(`chmod +x ${scriptPath}`);
const result = await this.exec(scriptPath, { cwd: deployDir, timeout: config.deploymentTimeout });
if (result.exitCode !== 0) {
throw new DeploymentError(`Deploy script failed: ${result.stderr}`);
}
}
// 6. Run post-deploy hook
if (task.hooks?.postDeploy) {
await this.runRemoteHook(task.hooks.postDeploy, deployDir);
}
// 7. Health check
if (config.healthCheck) {
const healthy = await this.runHealthCheck(config.healthCheck);
if (!healthy) {
await this.rollback(task);
throw new HealthCheckFailedError("Health check failed");
}
}
// 8. Write version sticker
await this.writeSticker(config.sticker, deployDir);
// 9. Cleanup backup
await this.exec(`rm -rf ${deployDir}/.stella-backup`);
return { success: true };
} finally {
this.ssh.end();
}
}
}
Module: agent-winrm
| Aspect | Specification |
|---|---|
| Responsibility | WinRM remote execution (agentless) |
| Dependencies | WinRM client library |
| Capabilities | winrm.deploy, winrm.execute, winrm.upload |
| Authentication | NTLM, Kerberos, Basic |
Module: agent-ecs
| Aspect | Specification |
|---|---|
| Responsibility | AWS ECS service deployment |
| Dependencies | AWS SDK |
| Capabilities | ecs.deploy, ecs.rollback, ecs.inspect |
Module: agent-nomad
| Aspect | Specification |
|---|---|
| Responsibility | HashiCorp Nomad job deployment |
| Dependencies | Nomad API client |
| Capabilities | nomad.deploy, nomad.rollback, nomad.inspect |
Agent Security Model
Registration Flow
┌─────────────────────────────────────────────────────────────────────────────┐
│ AGENT REGISTRATION FLOW │
│ │
│ 1. Admin generates registration token (one-time use) │
│ POST /api/v1/admin/agent-tokens │
│ → { token: "reg_xxx", expiresAt: "..." } │
│ │
│ 2. Agent starts with registration token │
│ ./stella-agent --register --token=reg_xxx │
│ │
│ 3. Agent requests mTLS certificate │
│ POST /api/v1/agents/register │
│ Headers: X-Registration-Token: reg_xxx │
│ Body: { name, version, capabilities, csr } │
│ → { agentId, certificate, caCertificate } │
│ │
│ 4. Agent establishes mTLS connection │
│ Uses issued certificate for all subsequent requests │
│ │
│ 5. Agent requests short-lived JWT for task execution │
│ POST /api/v1/agents/token (over mTLS) │
│ → { token, expiresIn: 3600 } // 1 hour │
│ │
│ 6. Agent refreshes token before expiration │
│ Token refresh only over mTLS connection │
│ │
└─────────────────────────────────────────────────────────────────────────────┘
Communication Security
┌─────────────────────────────────────────────────────────────────────────────┐
│ AGENT COMMUNICATION SECURITY │
│ │
│ ┌──────────────┐ ┌──────────────┐ │
│ │ AGENT │ │ STELLA CORE │ │
│ └──────┬───────┘ └──────┬───────┘ │
│ │ │ │
│ │ mTLS (mutual TLS) │ │
│ │ - Agent cert signed by Stella CA │ │
│ │ - Server cert verified by Agent │ │
│ │ - TLS 1.3 only │ │
│ │ - Perfect forward secrecy │ │
│ │◄───────────────────────────────────────►│ │
│ │ │ │
│ │ Encrypted payload │ │
│ │ - Task payloads encrypted with │ │
│ │ agent-specific key │ │
│ │ - Logs encrypted in transit │ │
│ │◄───────────────────────────────────────►│ │
│ │ │ │
│ │ Heartbeat + capability refresh │ │
│ │ - Every 30 seconds │ │
│ │ - Signed with agent key │ │
│ │─────────────────────────────────────────►│ │
│ │ │ │
│ │ Task assignment │ │
│ │ - Contains short-lived credentials │ │
│ │ - Scoped to specific target │ │
│ │ - Expires after task timeout │ │
│ │◄─────────────────────────────────────────│ │
│ │ │ │
└─────────────────────────────────────────────────────────────────────────────┘
Database Schema
-- Agents
CREATE TABLE release.agents (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
name VARCHAR(255) NOT NULL,
version VARCHAR(50) NOT NULL,
capabilities JSONB NOT NULL DEFAULT '[]',
labels JSONB NOT NULL DEFAULT '{}',
status VARCHAR(50) NOT NULL DEFAULT 'offline' CHECK (status IN (
'online', 'offline', 'degraded'
)),
last_heartbeat TIMESTAMPTZ,
resource_usage JSONB,
certificate_fingerprint VARCHAR(64),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE (tenant_id, name)
);
CREATE INDEX idx_agents_tenant ON release.agents(tenant_id);
CREATE INDEX idx_agents_status ON release.agents(status);
CREATE INDEX idx_agents_capabilities ON release.agents USING GIN (capabilities);
API Endpoints
# Agent Registration
POST /api/v1/agents/register
Headers: X-Registration-Token: {token}
Body: { name, version, capabilities, csr }
Response: { agentId, certificate, caCertificate }
# Agent Management
GET /api/v1/agents
Query: ?status={online|offline|degraded}&capability={type}
Response: Agent[]
GET /api/v1/agents/{id}
Response: Agent
PUT /api/v1/agents/{id}
Body: { labels?, capabilities? }
Response: Agent
DELETE /api/v1/agents/{id}
Response: { deleted: true }
# Agent Communication
POST /api/v1/agents/{id}/heartbeat
Body: { status, resourceUsage, capabilities }
Response: { tasks: AgentTask[] }
POST /api/v1/agents/{id}/tasks/{taskId}/complete
Body: { success, result, logs }
Response: { acknowledged: true }
# WebSocket for real-time task stream
WS /api/v1/agents/{id}/task-stream
Messages:
- { type: "task_assigned", task: AgentTask }
- { type: "task_cancelled", taskId }