Files
git.stella-ops.org/docs/modules/release-orchestrator/deployment/overview.md

22 KiB

Deployment Overview

Purpose

The Deployment system executes the actual deployment of releases to target environments, managing deployment jobs, tasks, artifact generation, and rollback capabilities.

Deployment Architecture

                         DEPLOYMENT ARCHITECTURE

  ┌─────────────────────────────────────────────────────────────────────────────┐
  │                        DEPLOY ORCHESTRATOR                                   │
  │                                                                             │
  │  ┌─────────────────────────────────────────────────────────────────────┐   │
  │  │                      DEPLOYMENT JOB MANAGER                          │   │
  │  │                                                                      │   │
  │  │  Promotion ───► Create Job ───► Plan Tasks ───► Execute Tasks       │   │
  │  │                                                                      │   │
  │  └─────────────────────────────────────────────────────────────────────┘   │
  │                                    │                                        │
  │                    ┌───────────────┼───────────────┐                       │
  │                    │               │               │                       │
  │                    ▼               ▼               ▼                       │
  │  ┌─────────────────────┐ ┌─────────────────┐ ┌─────────────────────┐      │
  │  │   TARGET EXECUTOR   │ │ RUNNER EXECUTOR │ │ ARTIFACT GENERATOR  │      │
  │  │                     │ │                 │ │                     │      │
  │  │  - Task dispatch    │ │ - Agent tasks   │ │ - Compose files     │      │
  │  │  - Status tracking  │ │ - SSH tasks     │ │ - Env configs       │      │
  │  │  - Log aggregation  │ │ - API tasks     │ │ - Manifests         │      │
  │  └─────────────────────┘ └─────────────────┘ └─────────────────────┘      │
  │                                    │                                        │
  └─────────────────────────────────────────────────────────────────────────────┘
                                       │
          ┌────────────────────────────┼────────────────────────────┐
          │                            │                            │
          ▼                            ▼                            ▼
   ┌─────────────┐             ┌─────────────┐             ┌─────────────┐
   │   Agent     │             │  Agentless  │             │    API      │
   │  Execution  │             │  Execution  │             │  Execution  │
   │             │             │             │             │             │
   │  Docker,    │             │  SSH,       │             │  ECS,       │
   │  Compose    │             │  WinRM      │             │  Nomad      │
   └─────────────┘             └─────────────┘             └─────────────┘

Deployment Flow

Standard Deployment Flow

                    DEPLOYMENT FLOW

  Promotion        Deployment        Task             Agent/Target
  Approved         Job               Execution
     │                │                 │                 │
     │  Create Job    │                 │                 │
     ├───────────────►│                 │                 │
     │                │                 │                 │
     │                │  Generate       │                 │
     │                │  Artifacts      │                 │
     │                ├────────────────►│                 │
     │                │                 │                 │
     │                │  Create Tasks   │                 │
     │                │  per Target     │                 │
     │                ├────────────────►│                 │
     │                │                 │                 │
     │                │                 │  Dispatch Task  │
     │                │                 ├────────────────►│
     │                │                 │                 │
     │                │                 │  Execute        │
     │                │                 │  (Pull, Deploy) │
     │                │                 │                 │
     │                │                 │  Report Status  │
     │                │                 │◄────────────────┤
     │                │                 │                 │
     │                │  Aggregate      │                 │
     │                │  Results        │                 │
     │                │◄────────────────┤                 │
     │                │                 │                 │
     │  Job Complete  │                 │                 │
     │◄───────────────┤                 │                 │
     │                │                 │                 │

Deployment Job

Job Entity

interface DeploymentJob {
  id: UUID;
  promotionId: UUID;
  releaseId: UUID;
  environmentId: UUID;

  // Execution configuration
  strategy: DeploymentStrategy;
  parallelism: number;

  // Status tracking
  status: JobStatus;
  startedAt?: DateTime;
  completedAt?: DateTime;

  // Artifacts
  artifacts: GeneratedArtifact[];

  // Rollback reference
  rollbackOf?: UUID;           // If this is a rollback job
  previousJobId?: UUID;        // Previous successful job

  // Tasks
  tasks: DeploymentTask[];
}

type JobStatus =
  | "pending"
  | "preparing"
  | "running"
  | "completing"
  | "completed"
  | "failed"
  | "rolling_back"
  | "rolled_back";

type DeploymentStrategy =
  | "all-at-once"
  | "rolling"
  | "canary"
  | "blue-green";

Job State Machine

                      JOB STATE MACHINE

                          ┌──────────┐
                          │ PENDING  │
                          └────┬─────┘
                               │ start()
                               ▼
                          ┌──────────┐
                          │PREPARING │
                          │          │
                          │ Generate │
                          │ artifacts│
                          └────┬─────┘
                               │
                               ▼
                          ┌──────────┐
                          │ RUNNING  │◄────────────────┐
                          │          │                 │
                          │ Execute  │                 │
                          │ tasks    │                 │
                          └────┬─────┘                 │
                               │                       │
               ┌───────────────┼───────────────┐       │
               │               │               │       │
               ▼               ▼               ▼       │
          ┌──────────┐   ┌──────────┐   ┌──────────┐  │
          │COMPLETING│   │ FAILED   │   │ ROLLING  │  │
          │          │   │          │   │  BACK    │──┘
          │ Verify   │   │          │   │          │
          │ health   │   │          │   │          │
          └────┬─────┘   └────┬─────┘   └────┬─────┘
               │              │              │
               ▼              │              ▼
          ┌──────────┐        │         ┌──────────┐
          │COMPLETED │        │         │ ROLLED   │
          └──────────┘        │         │  BACK    │
                              │         └──────────┘
                              │
                              ▼
                         [Failure
                          handling]

Deployment Task

Task Entity

interface DeploymentTask {
  id: UUID;
  jobId: UUID;
  targetId: UUID;

  // What to deploy
  componentId: UUID;
  digest: string;

  // Execution
  status: TaskStatus;
  agentId?: UUID;
  startedAt?: DateTime;
  completedAt?: DateTime;

  // Results
  logs: string;
  previousDigest?: string;    // For rollback
  error?: string;

  // Retry tracking
  attemptNumber: number;
  maxAttempts: number;
}

type TaskStatus =
  | "pending"
  | "queued"
  | "dispatched"
  | "running"
  | "verifying"
  | "succeeded"
  | "failed"
  | "retrying";

Task Dispatch

class TaskDispatcher {
  async dispatchTask(task: DeploymentTask): Promise<void> {
    const target = await this.targetRepository.get(task.targetId);

    switch (target.executionModel) {
      case "agent":
        await this.dispatchToAgent(task, target);
        break;

      case "ssh":
        await this.dispatchViaSsh(task, target);
        break;

      case "api":
        await this.dispatchViaApi(task, target);
        break;
    }
  }

  private async dispatchToAgent(
    task: DeploymentTask,
    target: Target
  ): Promise<void> {
    // Find available agent for target
    const agent = await this.agentManager.findAgentForTarget(target);

    if (!agent) {
      throw new NoAgentAvailableError(target.id);
    }

    // Create task payload
    const payload: AgentTaskPayload = {
      taskId: task.id,
      targetId: target.id,
      action: "deploy",
      digest: task.digest,
      config: target.connection,
      credentials: await this.fetchTaskCredentials(target)
    };

    // Dispatch to agent
    await this.agentClient.dispatchTask(agent.id, payload);

    // Update task status
    task.status = "dispatched";
    task.agentId = agent.id;
    await this.taskRepository.update(task);
  }
}

Generated Artifacts

Artifact Types

Type Description Format
compose-file Docker Compose file YAML
compose-lock Pinned compose file YAML
env-file Environment variables .env
systemd-unit Systemd service unit .service
nginx-config Nginx configuration .conf
manifest Deployment manifest JSON

Compose Lock Generation

interface ComposeLock {
  version: string;
  services: Record<string, LockedService>;
  generated: {
    releaseId: string;
    promotionId: string;
    timestamp: string;
    digest: string;         // Hash of this file
  };
}

interface LockedService {
  image: string;            // Full image reference with digest
  environment?: Record<string, string>;
  labels: Record<string, string>;
}

class ComposeArtifactGenerator {
  async generateLock(
    release: Release,
    target: Target,
    template: ComposeTemplate
  ): Promise<ComposeLock> {
    const services: Record<string, LockedService> = {};

    for (const [serviceName, serviceConfig] of Object.entries(template.services)) {
      // Find component for this service
      const componentDigest = release.components.find(
        c => c.name === serviceConfig.componentName
      );

      if (!componentDigest) {
        throw new Error(`No component found for service ${serviceName}`);
      }

      // Build locked image reference
      const imageRef = `${componentDigest.repository}@${componentDigest.digest}`;

      services[serviceName] = {
        image: imageRef,
        environment: {
          ...serviceConfig.environment,
          STELLA_RELEASE_ID: release.id,
          STELLA_DIGEST: componentDigest.digest
        },
        labels: {
          "stella.release.id": release.id,
          "stella.component.name": componentDigest.name,
          "stella.digest": componentDigest.digest,
          "stella.deployed.at": new Date().toISOString()
        }
      };
    }

    const lock: ComposeLock = {
      version: "3.8",
      services,
      generated: {
        releaseId: release.id,
        promotionId: target.promotionId,
        timestamp: new Date().toISOString(),
        digest: "" // Computed below
      }
    };

    // Compute content hash
    const content = yaml.stringify(lock);
    lock.generated.digest = crypto.createHash("sha256").update(content).digest("hex");

    return lock;
  }
}

Deployment Execution

Execution Models

Model Description Use Case
agent Stella agent on target Docker hosts, servers
ssh SSH-based agentless Unix servers
winrm WinRM-based agentless Windows servers
api API-based ECS, Nomad, K8s

Agent-Based Execution

class AgentExecutor {
  async execute(task: DeploymentTask): Promise<ExecutionResult> {
    const agent = await this.agentManager.get(task.agentId);
    const target = await this.targetRepository.get(task.targetId);

    // Prepare task payload with secrets
    const payload: TaskPayload = {
      taskId: task.id,
      targetId: target.id,
      action: "deploy",
      digest: task.digest,
      config: target.connection,
      artifacts: await this.getArtifacts(task.jobId),
      credentials: await this.secretsManager.fetchForTask(target)
    };

    // Dispatch to agent
    const taskRef = await this.agentClient.dispatchTask(agent.id, payload);

    // Wait for completion
    const result = await this.waitForTaskCompletion(taskRef, task.timeout);

    return result;
  }

  private async waitForTaskCompletion(
    taskRef: TaskReference,
    timeout: number
  ): Promise<ExecutionResult> {
    const deadline = Date.now() + timeout * 1000;

    while (Date.now() < deadline) {
      const status = await this.agentClient.getTaskStatus(taskRef);

      if (status.completed) {
        return {
          success: status.success,
          logs: status.logs,
          deployedDigest: status.deployedDigest,
          error: status.error
        };
      }

      await sleep(1000);
    }

    throw new TimeoutError(`Task did not complete within ${timeout} seconds`);
  }
}

SSH-Based Execution

class SshExecutor {
  async execute(task: DeploymentTask): Promise<ExecutionResult> {
    const target = await this.targetRepository.get(task.targetId);
    const sshConfig = target.connection as SshConnectionConfig;

    // Get SSH credentials from vault
    const creds = await this.secretsManager.fetchSshCredentials(
      sshConfig.credentialRef
    );

    // Connect via SSH
    const ssh = new NodeSSH();
    await ssh.connect({
      host: sshConfig.host,
      port: sshConfig.port || 22,
      username: creds.username,
      privateKey: creds.privateKey
    });

    try {
      // Upload artifacts
      const artifacts = await this.getArtifacts(task.jobId);
      for (const artifact of artifacts) {
        await ssh.putFile(artifact.localPath, artifact.remotePath);
      }

      // Execute deployment script
      const result = await ssh.execCommand(
        this.buildDeployCommand(task, target),
        { cwd: sshConfig.workDir }
      );

      return {
        success: result.code === 0,
        logs: `${result.stdout}\n${result.stderr}`,
        error: result.code !== 0 ? result.stderr : undefined
      };
    } finally {
      ssh.dispose();
    }
  }

  private buildDeployCommand(task: DeploymentTask, target: Target): string {
    // Build deployment command based on target type
    switch (target.targetType) {
      case "compose_host":
        return `cd ${target.connection.workDir} && docker-compose pull && docker-compose up -d`;

      case "docker_host":
        return `docker pull ${task.digest} && docker stop ${target.containerName} && docker run -d --name ${target.containerName} ${task.digest}`;

      default:
        throw new Error(`Unsupported target type: ${target.targetType}`);
    }
  }
}

Health Verification

interface HealthCheckConfig {
  type: "http" | "tcp" | "command";
  timeout: number;
  retries: number;
  interval: number;

  // HTTP-specific
  path?: string;
  expectedStatus?: number;
  expectedBody?: string;

  // TCP-specific
  port?: number;

  // Command-specific
  command?: string;
}

class HealthVerifier {
  async verify(
    target: Target,
    config: HealthCheckConfig
  ): Promise<HealthCheckResult> {
    let lastError: Error | undefined;

    for (let attempt = 0; attempt < config.retries; attempt++) {
      try {
        const result = await this.performCheck(target, config);

        if (result.healthy) {
          return result;
        }

        lastError = new Error(result.message);
      } catch (error) {
        lastError = error as Error;
      }

      if (attempt < config.retries - 1) {
        await sleep(config.interval * 1000);
      }
    }

    return {
      healthy: false,
      message: lastError?.message || "Health check failed",
      attempts: config.retries
    };
  }

  private async performCheck(
    target: Target,
    config: HealthCheckConfig
  ): Promise<HealthCheckResult> {
    switch (config.type) {
      case "http":
        return this.httpCheck(target, config);

      case "tcp":
        return this.tcpCheck(target, config);

      case "command":
        return this.commandCheck(target, config);
    }
  }

  private async httpCheck(
    target: Target,
    config: HealthCheckConfig
  ): Promise<HealthCheckResult> {
    const url = `${target.healthEndpoint}${config.path || "/health"}`;

    try {
      const response = await fetch(url, {
        signal: AbortSignal.timeout(config.timeout * 1000)
      });

      const healthy = response.status === (config.expectedStatus || 200);

      return {
        healthy,
        message: healthy ? "OK" : `Status ${response.status}`,
        statusCode: response.status
      };
    } catch (error) {
      return {
        healthy: false,
        message: (error as Error).message
      };
    }
  }
}

Rollback Management

class RollbackManager {
  async initiateRollback(
    jobId: UUID,
    reason: string
  ): Promise<DeploymentJob> {
    const failedJob = await this.jobRepository.get(jobId);
    const previousJob = await this.findPreviousSuccessfulJob(
      failedJob.environmentId,
      failedJob.releaseId
    );

    if (!previousJob) {
      throw new NoRollbackTargetError(jobId);
    }

    // Create rollback job
    const rollbackJob: DeploymentJob = {
      id: uuidv4(),
      promotionId: failedJob.promotionId,
      releaseId: previousJob.releaseId,  // Previous release
      environmentId: failedJob.environmentId,
      strategy: "all-at-once",           // Fast rollback
      parallelism: 10,
      status: "pending",
      rollbackOf: jobId,
      previousJobId: previousJob.id,
      artifacts: [],
      tasks: []
    };

    // Create tasks to restore previous state
    for (const task of failedJob.tasks) {
      const previousTask = previousJob.tasks.find(
        t => t.targetId === task.targetId
      );

      if (previousTask) {
        rollbackJob.tasks.push({
          id: uuidv4(),
          jobId: rollbackJob.id,
          targetId: task.targetId,
          componentId: previousTask.componentId,
          digest: previousTask.previousDigest || task.previousDigest!,
          status: "pending",
          logs: "",
          attemptNumber: 0,
          maxAttempts: 3
        });
      }
    }

    await this.jobRepository.save(rollbackJob);

    // Execute rollback
    await this.executeJob(rollbackJob);

    return rollbackJob;
  }

  private async findPreviousSuccessfulJob(
    environmentId: UUID,
    excludeReleaseId: UUID
  ): Promise<DeploymentJob | null> {
    return this.jobRepository.findOne({
      environmentId,
      status: "completed",
      releaseId: { $ne: excludeReleaseId }
    }, {
      orderBy: { completedAt: "desc" }
    });
  }
}

References