# AGENTS: Deployment Agents **Purpose**: Lightweight deployment agents for target execution. ## Agent Types | Agent Type | Transport | Target Types | |------------|-----------|--------------| | `agent-docker` | gRPC | Docker hosts | | `agent-compose` | gRPC | Docker Compose hosts | | `agent-ssh` | SSH | Linux remote hosts | | `agent-winrm` | WinRM | Windows remote hosts | | `agent-ecs` | AWS API | AWS ECS services | | `agent-nomad` | Nomad API | HashiCorp Nomad jobs | ## Modules ### Module: `agent-core` | Aspect | Specification | |--------|---------------| | **Responsibility** | Shared agent runtime; task execution framework | | **Protocol** | gRPC for communication with Stella Core | | **Security** | mTLS authentication; short-lived JWT for tasks | **Agent Lifecycle**: 1. Agent starts with registration token 2. Agent registers with capabilities and labels 3. Agent sends heartbeats (default: 30s interval) 4. Agent receives tasks from Stella Core 5. Agent reports task completion/failure **Agent Task Protocol**: ```typescript // Task assignment (Core → Agent) interface AgentTask { id: UUID; type: TaskType; targetId: UUID; payload: TaskPayload; credentials: EncryptedCredentials; timeout: number; priority: TaskPriority; idempotencyKey: string; assignedAt: DateTime; expiresAt: DateTime; } type TaskType = | "deploy" | "rollback" | "health-check" | "inspect" | "execute-command" | "upload-files" | "write-sticker" | "read-sticker"; interface DeployTaskPayload { image: string; digest: string; config: DeployConfig; artifacts: ArtifactReference[]; previousDigest?: string; hooks: { preDeploy?: HookConfig; postDeploy?: HookConfig; }; } // Task result (Agent → Core) interface TaskResult { taskId: UUID; success: boolean; startedAt: DateTime; completedAt: DateTime; // Success details outputs?: Record; artifacts?: ArtifactReference[]; // Failure details error?: string; errorType?: string; retriable?: boolean; // Logs logs: string; // Metrics metrics: { pullDurationMs?: number; deployDurationMs?: number; healthCheckDurationMs?: number; }; } ``` --- ### Module: `agent-docker` | Aspect | Specification | |--------|---------------| | **Responsibility** | Docker container deployment | | **Dependencies** | Docker Engine API | | **Capabilities** | `docker.deploy`, `docker.rollback`, `docker.inspect` | **Docker Agent Implementation**: ```typescript class DockerAgent implements TargetExecutor { private docker: Docker; async deploy(task: DeployTaskPayload): Promise { const { image, digest, config, previousDigest } = task; const containerName = config.containerName; // 1. Pull image and verify digest this.log(`Pulling image ${image}@${digest}`); await this.docker.pull(image, { digest }); const pulledDigest = await this.getImageDigest(image); if (pulledDigest !== digest) { throw new DigestMismatchError( `Expected digest ${digest}, got ${pulledDigest}. Possible tampering detected.` ); } // 2. Run pre-deploy hook if (task.hooks?.preDeploy) { await this.runHook(task.hooks.preDeploy, "pre-deploy"); } // 3. Stop and rename existing container const existingContainer = await this.findContainer(containerName); if (existingContainer) { this.log(`Stopping existing container ${containerName}`); await existingContainer.stop({ t: 10 }); await existingContainer.rename(`${containerName}-previous-${Date.now()}`); } // 4. Create new container this.log(`Creating container ${containerName} from ${image}@${digest}`); const container = await this.docker.createContainer({ name: containerName, Image: `${image}@${digest}`, // Always use digest, not tag Env: this.buildEnvVars(config.environment), HostConfig: { PortBindings: this.buildPortBindings(config.ports), Binds: this.buildBindMounts(config.volumes), RestartPolicy: { Name: config.restartPolicy || "unless-stopped" }, Memory: config.memoryLimit, CpuQuota: config.cpuLimit, }, Labels: { "stella.release.id": config.releaseId, "stella.release.name": config.releaseName, "stella.digest": digest, "stella.deployed.at": new Date().toISOString(), }, }); // 5. Start container this.log(`Starting container ${containerName}`); await container.start(); // 6. Wait for container to be healthy if (config.healthCheck) { this.log(`Waiting for container health check`); const healthy = await this.waitForHealthy(container, config.healthCheck.timeout); if (!healthy) { await this.rollbackContainer(containerName, existingContainer); throw new HealthCheckFailedError(`Container ${containerName} failed health check`); } } // 7. Run post-deploy hook if (task.hooks?.postDeploy) { await this.runHook(task.hooks.postDeploy, "post-deploy"); } // 8. Cleanup previous container if (existingContainer && config.cleanupPrevious !== false) { this.log(`Removing previous container`); await existingContainer.remove({ force: true }); } return { success: true, containerId: container.id, previousDigest: previousDigest, }; } async rollback(task: RollbackTaskPayload): Promise { const { containerName, targetDigest } = task; if (targetDigest) { // Deploy specific digest return this.deploy({ ...task, digest: targetDigest }); } // Find and restore previous container const previousContainer = await this.findContainer(`${containerName}-previous-*`); if (!previousContainer) { throw new RollbackError(`No previous container found for ${containerName}`); } const currentContainer = await this.findContainer(containerName); if (currentContainer) { await currentContainer.stop({ t: 10 }); await currentContainer.rename(`${containerName}-failed-${Date.now()}`); } await previousContainer.rename(containerName); await previousContainer.start(); return { success: true, containerId: previousContainer.id }; } async writeSticker(sticker: VersionSticker): Promise { const stickerPath = this.config.stickerPath || "/var/stella/version.json"; const stickerContent = JSON.stringify(sticker, null, 2); if (this.config.stickerLocation === "volume") { await this.docker.run("alpine", [ "sh", "-c", `echo '${stickerContent}' > ${stickerPath}` ], { HostConfig: { Binds: [`${this.config.stickerVolume}:/var/stella`] } }); } else { fs.writeFileSync(stickerPath, stickerContent); } } } ``` --- ### Module: `agent-compose` | Aspect | Specification | |--------|---------------| | **Responsibility** | Docker Compose stack deployment | | **Dependencies** | Docker Compose CLI | | **Capabilities** | `compose.deploy`, `compose.rollback`, `compose.inspect` | **Compose Agent Implementation**: ```typescript class ComposeAgent implements TargetExecutor { async deploy(task: DeployTaskPayload): Promise { const { artifacts, config } = task; const deployDir = config.deploymentDirectory; // 1. Write compose lock file const composeLock = artifacts.find(a => a.type === "compose_lock"); const composeContent = await this.fetchArtifact(composeLock); const composePath = path.join(deployDir, "compose.stella.lock.yml"); await fs.writeFile(composePath, composeContent); // 2. Run pre-deploy hook if (task.hooks?.preDeploy) { await this.runHook(task.hooks.preDeploy, deployDir); } // 3. Pull images this.log("Pulling images..."); await this.runCompose(deployDir, ["pull"]); // 4. Verify digests await this.verifyDigests(composePath, config.expectedDigests); // 5. Deploy this.log("Deploying services..."); await this.runCompose(deployDir, ["up", "-d", "--remove-orphans", "--force-recreate"]); // 6. Wait for services to be healthy if (config.healthCheck) { const healthy = await this.waitForServicesHealthy(deployDir, config.healthCheck.timeout); if (!healthy) { await this.rollbackToBackup(deployDir); throw new HealthCheckFailedError("Services failed health check"); } } // 7. Run post-deploy hook if (task.hooks?.postDeploy) { await this.runHook(task.hooks.postDeploy, deployDir); } // 8. Write version sticker await this.writeSticker(config.sticker, deployDir); return { success: true }; } private async verifyDigests( composePath: string, expectedDigests: Record ): Promise { const composeContent = yaml.parse(await fs.readFile(composePath, "utf-8")); for (const [service, expectedDigest] of Object.entries(expectedDigests)) { const serviceConfig = composeContent.services[service]; if (!serviceConfig) { throw new Error(`Service ${service} not found in compose file`); } const image = serviceConfig.image; if (!image.includes("@sha256:")) { throw new Error(`Service ${service} image not pinned to digest: ${image}`); } const actualDigest = image.split("@")[1]; if (actualDigest !== expectedDigest) { throw new DigestMismatchError( `Service ${service}: expected ${expectedDigest}, got ${actualDigest}` ); } } } } ``` --- ### Module: `agent-ssh` | Aspect | Specification | |--------|---------------| | **Responsibility** | SSH remote execution (agentless) | | **Dependencies** | SSH client library | | **Capabilities** | `ssh.deploy`, `ssh.execute`, `ssh.upload` | **SSH Remote Executor**: ```typescript class SSHRemoteExecutor implements TargetExecutor { async connect(config: SSHConnectionConfig): Promise { const privateKey = await this.secrets.getSecret(config.privateKeyRef); this.ssh = new SSHClient(); await this.ssh.connect({ host: config.host, port: config.port || 22, username: config.username, privateKey: privateKey.value, readyTimeout: config.connectionTimeout || 30000, }); } async deploy(task: DeployTaskPayload): Promise { const { artifacts, config } = task; const deployDir = config.deploymentDirectory; try { // 1. Ensure deployment directory exists await this.exec(`mkdir -p ${deployDir}`); await this.exec(`mkdir -p ${deployDir}/.stella-backup`); // 2. Backup current deployment await this.exec(`cp -r ${deployDir}/* ${deployDir}/.stella-backup/ 2>/dev/null || true`); // 3. Upload artifacts for (const artifact of artifacts) { const content = await this.fetchArtifact(artifact); const remotePath = path.join(deployDir, artifact.name); await this.uploadFile(content, remotePath); } // 4. Run pre-deploy hook if (task.hooks?.preDeploy) { await this.runRemoteHook(task.hooks.preDeploy, deployDir); } // 5. Execute deployment script const deployScript = artifacts.find(a => a.type === "deploy_script"); if (deployScript) { const scriptPath = path.join(deployDir, deployScript.name); await this.exec(`chmod +x ${scriptPath}`); const result = await this.exec(scriptPath, { cwd: deployDir, timeout: config.deploymentTimeout }); if (result.exitCode !== 0) { throw new DeploymentError(`Deploy script failed: ${result.stderr}`); } } // 6. Run post-deploy hook if (task.hooks?.postDeploy) { await this.runRemoteHook(task.hooks.postDeploy, deployDir); } // 7. Health check if (config.healthCheck) { const healthy = await this.runHealthCheck(config.healthCheck); if (!healthy) { await this.rollback(task); throw new HealthCheckFailedError("Health check failed"); } } // 8. Write version sticker await this.writeSticker(config.sticker, deployDir); // 9. Cleanup backup await this.exec(`rm -rf ${deployDir}/.stella-backup`); return { success: true }; } finally { this.ssh.end(); } } } ``` --- ### Module: `agent-winrm` | Aspect | Specification | |--------|---------------| | **Responsibility** | WinRM remote execution (agentless) | | **Dependencies** | WinRM client library | | **Capabilities** | `winrm.deploy`, `winrm.execute`, `winrm.upload` | | **Authentication** | NTLM, Kerberos, Basic | --- ### Module: `agent-ecs` | Aspect | Specification | |--------|---------------| | **Responsibility** | AWS ECS service deployment | | **Dependencies** | AWS SDK | | **Capabilities** | `ecs.deploy`, `ecs.rollback`, `ecs.inspect` | --- ### Module: `agent-nomad` | Aspect | Specification | |--------|---------------| | **Responsibility** | HashiCorp Nomad job deployment | | **Dependencies** | Nomad API client | | **Capabilities** | `nomad.deploy`, `nomad.rollback`, `nomad.inspect` | --- ## Agent Security Model ### Registration Flow ``` ┌─────────────────────────────────────────────────────────────────────────────┐ │ AGENT REGISTRATION FLOW │ │ │ │ 1. Admin generates registration token (one-time use) │ │ POST /api/v1/admin/agent-tokens │ │ → { token: "reg_xxx", expiresAt: "..." } │ │ │ │ 2. Agent starts with registration token │ │ ./stella-agent --register --token=reg_xxx │ │ │ │ 3. Agent requests mTLS certificate │ │ POST /api/v1/agents/register │ │ Headers: X-Registration-Token: reg_xxx │ │ Body: { name, version, capabilities, csr } │ │ → { agentId, certificate, caCertificate } │ │ │ │ 4. Agent establishes mTLS connection │ │ Uses issued certificate for all subsequent requests │ │ │ │ 5. Agent requests short-lived JWT for task execution │ │ POST /api/v1/agents/token (over mTLS) │ │ → { token, expiresIn: 3600 } // 1 hour │ │ │ │ 6. Agent refreshes token before expiration │ │ Token refresh only over mTLS connection │ │ │ └─────────────────────────────────────────────────────────────────────────────┘ ``` ### Communication Security ``` ┌─────────────────────────────────────────────────────────────────────────────┐ │ AGENT COMMUNICATION SECURITY │ │ │ │ ┌──────────────┐ ┌──────────────┐ │ │ │ AGENT │ │ STELLA CORE │ │ │ └──────┬───────┘ └──────┬───────┘ │ │ │ │ │ │ │ mTLS (mutual TLS) │ │ │ │ - Agent cert signed by Stella CA │ │ │ │ - Server cert verified by Agent │ │ │ │ - TLS 1.3 only │ │ │ │ - Perfect forward secrecy │ │ │ │◄───────────────────────────────────────►│ │ │ │ │ │ │ │ Encrypted payload │ │ │ │ - Task payloads encrypted with │ │ │ │ agent-specific key │ │ │ │ - Logs encrypted in transit │ │ │ │◄───────────────────────────────────────►│ │ │ │ │ │ │ │ Heartbeat + capability refresh │ │ │ │ - Every 30 seconds │ │ │ │ - Signed with agent key │ │ │ │─────────────────────────────────────────►│ │ │ │ │ │ │ │ Task assignment │ │ │ │ - Contains short-lived credentials │ │ │ │ - Scoped to specific target │ │ │ │ - Expires after task timeout │ │ │ │◄─────────────────────────────────────────│ │ │ │ │ │ └─────────────────────────────────────────────────────────────────────────────┘ ``` --- ## Database Schema ```sql -- Agents CREATE TABLE release.agents ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE, name VARCHAR(255) NOT NULL, version VARCHAR(50) NOT NULL, capabilities JSONB NOT NULL DEFAULT '[]', labels JSONB NOT NULL DEFAULT '{}', status VARCHAR(50) NOT NULL DEFAULT 'offline' CHECK (status IN ( 'online', 'offline', 'degraded' )), last_heartbeat TIMESTAMPTZ, resource_usage JSONB, certificate_fingerprint VARCHAR(64), created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), UNIQUE (tenant_id, name) ); CREATE INDEX idx_agents_tenant ON release.agents(tenant_id); CREATE INDEX idx_agents_status ON release.agents(status); CREATE INDEX idx_agents_capabilities ON release.agents USING GIN (capabilities); ``` --- ## API Endpoints ```yaml # Agent Registration POST /api/v1/agents/register Headers: X-Registration-Token: {token} Body: { name, version, capabilities, csr } Response: { agentId, certificate, caCertificate } # Agent Management GET /api/v1/agents Query: ?status={online|offline|degraded}&capability={type} Response: Agent[] GET /api/v1/agents/{id} Response: Agent PUT /api/v1/agents/{id} Body: { labels?, capabilities? } Response: Agent DELETE /api/v1/agents/{id} Response: { deleted: true } # Agent Communication POST /api/v1/agents/{id}/heartbeat Body: { status, resourceUsage, capabilities } Response: { tasks: AgentTask[] } POST /api/v1/agents/{id}/tasks/{taskId}/complete Body: { success, result, logs } Response: { acknowledged: true } # WebSocket for real-time task stream WS /api/v1/agents/{id}/task-stream Messages: - { type: "task_assigned", task: AgentTask } - { type: "task_cancelled", taskId } ``` --- ## References - [Module Overview](overview.md) - [Deploy Orchestrator](deploy-orchestrator.md) - [Agent Security](../security/agent-security.md) - [API Documentation](../api/agents.md)