release orchestrator pivot, architecture and planning

This commit is contained in:
2026-01-10 22:37:22 +02:00
parent c84f421e2f
commit d509c44411
130 changed files with 70292 additions and 721 deletions

View File

@@ -0,0 +1,308 @@
# Artifact Generation
## Overview
Every deployment generates immutable artifacts that enable reproducibility, audit, and rollback.
## Generated Artifacts
### 1. Compose Lock File
**File:** `compose.stella.lock.yml`
A Docker Compose file with all image references pinned to specific digests.
```yaml
# compose.stella.lock.yml
# Generated by Stella Ops - DO NOT EDIT
# Release: myapp-v2.3.1
# Generated: 2026-01-10T14:30:00Z
# Generator: stella-artifact-generator@1.5.0
version: "3.8"
services:
api:
image: registry.example.com/myapp/api@sha256:abc123...
# Original tag: v2.3.1
deploy:
replicas: 2
environment:
- DATABASE_URL=${DATABASE_URL}
- REDIS_URL=${REDIS_URL}
labels:
stella.component.id: "comp-api-uuid"
stella.release.id: "rel-uuid"
stella.digest: "sha256:abc123..."
worker:
image: registry.example.com/myapp/worker@sha256:def456...
# Original tag: v2.3.1
deploy:
replicas: 1
labels:
stella.component.id: "comp-worker-uuid"
stella.release.id: "rel-uuid"
stella.digest: "sha256:def456..."
# Stella metadata
x-stella:
release:
id: "rel-uuid"
name: "myapp-v2.3.1"
created_at: "2026-01-10T14:00:00Z"
environment:
id: "env-uuid"
name: "production"
deployment:
id: "deploy-uuid"
started_at: "2026-01-10T14:30:00Z"
checksums:
sha256: "checksum-of-this-file"
```
### 2. Version Sticker
**File:** `stella.version.json`
Metadata file placed on deployment targets indicating current deployment state.
```json
{
"version": "1.0",
"generatedAt": "2026-01-10T14:35:00Z",
"generator": "stella-artifact-generator@1.5.0",
"release": {
"id": "rel-uuid",
"name": "myapp-v2.3.1",
"createdAt": "2026-01-10T14:00:00Z",
"components": [
{
"name": "api",
"digest": "sha256:abc123...",
"semver": "2.3.1",
"tag": "v2.3.1"
},
{
"name": "worker",
"digest": "sha256:def456...",
"semver": "2.3.1",
"tag": "v2.3.1"
}
]
},
"deployment": {
"id": "deploy-uuid",
"promotionId": "promo-uuid",
"environmentId": "env-uuid",
"environmentName": "production",
"targetId": "target-uuid",
"targetName": "prod-web-01",
"strategy": "rolling",
"startedAt": "2026-01-10T14:30:00Z",
"completedAt": "2026-01-10T14:35:00Z"
},
"deployer": {
"userId": "user-uuid",
"userName": "john.doe",
"agentId": "agent-uuid",
"agentName": "prod-agent-01"
},
"previous": {
"releaseId": "prev-rel-uuid",
"releaseName": "myapp-v2.3.0",
"digest": "sha256:789..."
},
"signature": "base64-encoded-signature",
"signatureAlgorithm": "RS256",
"signerKeyRef": "stella/signing/prod-key-2026"
}
```
### 3. Evidence Packet
**File:** Evidence stored in database (exportable as JSON/PDF)
See [Evidence Schema](../appendices/evidence-schema.md) for full specification.
### 4. Deployment Script (Optional)
**File:** `deploy.stella.script.dll` or `deploy.stella.sh`
When deployments use C# or shell scripts with hooks:
```csharp
// deploy.stella.csx (source, compiled to DLL)
#r "nuget: StellaOps.Sdk, 1.0.0"
using StellaOps.Sdk;
// Pre-deploy hook
await Context.RunPreDeployHook(async (ctx) => {
await ctx.ExecuteCommand("./scripts/backup-database.sh");
await ctx.HealthCheck("/ready", timeout: 30);
});
// Deploy
await Context.Deploy();
// Post-deploy hook
await Context.RunPostDeployHook(async (ctx) => {
await ctx.ExecuteCommand("./scripts/warm-cache.sh");
await ctx.Notify("slack", "Deployment complete");
});
```
## Artifact Storage
### Storage Structure
```
artifacts/
├── {tenant_id}/
│ ├── {deployment_id}/
│ │ ├── compose.stella.lock.yml
│ │ ├── deploy.stella.script.dll (if applicable)
│ │ ├── deploy.stella.script.csx (source)
│ │ ├── manifest.json
│ │ └── checksums.sha256
│ └── ...
└── ...
```
### Manifest File
```json
{
"version": "1.0",
"deploymentId": "deploy-uuid",
"createdAt": "2026-01-10T14:30:00Z",
"artifacts": [
{
"name": "compose.stella.lock.yml",
"type": "compose-lock",
"size": 2048,
"sha256": "abc123..."
},
{
"name": "deploy.stella.script.dll",
"type": "script-compiled",
"size": 8192,
"sha256": "def456..."
}
],
"totalSize": 10240,
"signature": "base64-signature"
}
```
## Artifact Generation Process
```
┌─────────────────────────────────────────────────────────────────────────────┐
│ ARTIFACT GENERATION FLOW │
│ │
│ ┌─────────────────┐ │
│ │ Promotion │ │
│ │ Approved │ │
│ └────────┬────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────────────────┐ │
│ │ ARTIFACT GENERATOR │ │
│ │ │ │
│ │ 1. Load release bundle (components, digests) │ │
│ │ 2. Load environment configuration (variables, secrets refs) │ │
│ │ 3. Load workflow template (hooks, scripts) │ │
│ │ 4. Generate compose.stella.lock.yml │ │
│ │ 5. Compile scripts (if any) │ │
│ │ 6. Generate version sticker template │ │
│ │ 7. Compute checksums │ │
│ │ 8. Sign artifacts │ │
│ │ 9. Store in artifact storage │ │
│ │ │ │
│ └────────────────────────────┬────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────────────────┐ │
│ │ DEPLOYMENT ORCHESTRATOR │ │
│ │ │ │
│ │ Artifacts distributed to targets via agents │ │
│ └─────────────────────────────────────────────────────────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────────────────┘
```
## Artifact Properties
### Immutability
Once generated, artifacts are never modified:
- Content-addressed storage (hash in path/metadata)
- No overwrite capability
- Append-only storage pattern
### Integrity
All artifacts are:
- Checksummed (SHA-256)
- Signed with deployment key
- Verifiable at deployment time
### Retention
| Environment | Retention Period |
|-------------|------------------|
| Development | 30 days |
| Staging | 90 days |
| Production | 7 years (compliance) |
## API Operations
```yaml
# List artifacts for deployment
GET /api/v1/deployment-jobs/{id}/artifacts
Response: Artifact[]
# Download specific artifact
GET /api/v1/deployment-jobs/{id}/artifacts/{name}
Response: binary
# Get artifact manifest
GET /api/v1/deployment-jobs/{id}/artifacts/manifest
Response: ArtifactManifest
# Verify artifact integrity
POST /api/v1/deployment-jobs/{id}/artifacts/{name}/verify
Response: { valid: boolean, checksum: string, signature: string }
```
## Drift Detection
Version stickers enable drift detection:
```typescript
interface DriftCheck {
targetId: UUID;
expectedSticker: VersionSticker;
actualSticker: VersionSticker | null;
driftDetected: boolean;
driftType?: "missing" | "corrupted" | "mismatch";
details?: {
expectedDigest: string;
actualDigest: string;
field: string;
};
}
```
## References
- [Deployment Overview](overview.md)
- [Deployment Strategies](strategies.md)
- [Evidence Schema](../appendices/evidence-schema.md)

View File

@@ -0,0 +1,671 @@
# Deployment Overview
## Purpose
The Deployment system executes the actual deployment of releases to target environments, managing deployment jobs, tasks, artifact generation, and rollback capabilities.
## Deployment Architecture
```
DEPLOYMENT ARCHITECTURE
┌─────────────────────────────────────────────────────────────────────────────┐
│ DEPLOY ORCHESTRATOR │
│ │
│ ┌─────────────────────────────────────────────────────────────────────┐ │
│ │ DEPLOYMENT JOB MANAGER │ │
│ │ │ │
│ │ Promotion ───► Create Job ───► Plan Tasks ───► Execute Tasks │ │
│ │ │ │
│ └─────────────────────────────────────────────────────────────────────┘ │
│ │ │
│ ┌───────────────┼───────────────┐ │
│ │ │ │ │
│ ▼ ▼ ▼ │
│ ┌─────────────────────┐ ┌─────────────────┐ ┌─────────────────────┐ │
│ │ TARGET EXECUTOR │ │ RUNNER EXECUTOR │ │ ARTIFACT GENERATOR │ │
│ │ │ │ │ │ │ │
│ │ - Task dispatch │ │ - Agent tasks │ │ - Compose files │ │
│ │ - Status tracking │ │ - SSH tasks │ │ - Env configs │ │
│ │ - Log aggregation │ │ - API tasks │ │ - Manifests │ │
│ └─────────────────────┘ └─────────────────┘ └─────────────────────┘ │
│ │ │
└─────────────────────────────────────────────────────────────────────────────┘
┌────────────────────────────┼────────────────────────────┐
│ │ │
▼ ▼ ▼
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
│ Agent │ │ Agentless │ │ API │
│ Execution │ │ Execution │ │ Execution │
│ │ │ │ │ │
│ Docker, │ │ SSH, │ │ ECS, │
│ Compose │ │ WinRM │ │ Nomad │
└─────────────┘ └─────────────┘ └─────────────┘
```
## Deployment Flow
### Standard Deployment Flow
```
DEPLOYMENT FLOW
Promotion Deployment Task Agent/Target
Approved Job Execution
│ │ │ │
│ Create Job │ │ │
├───────────────►│ │ │
│ │ │ │
│ │ Generate │ │
│ │ Artifacts │ │
│ ├────────────────►│ │
│ │ │ │
│ │ Create Tasks │ │
│ │ per Target │ │
│ ├────────────────►│ │
│ │ │ │
│ │ │ Dispatch Task │
│ │ ├────────────────►│
│ │ │ │
│ │ │ Execute │
│ │ │ (Pull, Deploy) │
│ │ │ │
│ │ │ Report Status │
│ │ │◄────────────────┤
│ │ │ │
│ │ Aggregate │ │
│ │ Results │ │
│ │◄────────────────┤ │
│ │ │ │
│ Job Complete │ │ │
│◄───────────────┤ │ │
│ │ │ │
```
## Deployment Job
### Job Entity
```typescript
interface DeploymentJob {
id: UUID;
promotionId: UUID;
releaseId: UUID;
environmentId: UUID;
// Execution configuration
strategy: DeploymentStrategy;
parallelism: number;
// Status tracking
status: JobStatus;
startedAt?: DateTime;
completedAt?: DateTime;
// Artifacts
artifacts: GeneratedArtifact[];
// Rollback reference
rollbackOf?: UUID; // If this is a rollback job
previousJobId?: UUID; // Previous successful job
// Tasks
tasks: DeploymentTask[];
}
type JobStatus =
| "pending"
| "preparing"
| "running"
| "completing"
| "completed"
| "failed"
| "rolling_back"
| "rolled_back";
type DeploymentStrategy =
| "all-at-once"
| "rolling"
| "canary"
| "blue-green";
```
### Job State Machine
```
JOB STATE MACHINE
┌──────────┐
│ PENDING │
└────┬─────┘
│ start()
┌──────────┐
│PREPARING │
│ │
│ Generate │
│ artifacts│
└────┬─────┘
┌──────────┐
│ RUNNING │◄────────────────┐
│ │ │
│ Execute │ │
│ tasks │ │
└────┬─────┘ │
│ │
┌───────────────┼───────────────┐ │
│ │ │ │
▼ ▼ ▼ │
┌──────────┐ ┌──────────┐ ┌──────────┐ │
│COMPLETING│ │ FAILED │ │ ROLLING │ │
│ │ │ │ │ BACK │──┘
│ Verify │ │ │ │ │
│ health │ │ │ │ │
└────┬─────┘ └────┬─────┘ └────┬─────┘
│ │ │
▼ │ ▼
┌──────────┐ │ ┌──────────┐
│COMPLETED │ │ │ ROLLED │
└──────────┘ │ │ BACK │
│ └──────────┘
[Failure
handling]
```
## Deployment Task
### Task Entity
```typescript
interface DeploymentTask {
id: UUID;
jobId: UUID;
targetId: UUID;
// What to deploy
componentId: UUID;
digest: string;
// Execution
status: TaskStatus;
agentId?: UUID;
startedAt?: DateTime;
completedAt?: DateTime;
// Results
logs: string;
previousDigest?: string; // For rollback
error?: string;
// Retry tracking
attemptNumber: number;
maxAttempts: number;
}
type TaskStatus =
| "pending"
| "queued"
| "dispatched"
| "running"
| "verifying"
| "succeeded"
| "failed"
| "retrying";
```
### Task Dispatch
```typescript
class TaskDispatcher {
async dispatchTask(task: DeploymentTask): Promise<void> {
const target = await this.targetRepository.get(task.targetId);
switch (target.executionModel) {
case "agent":
await this.dispatchToAgent(task, target);
break;
case "ssh":
await this.dispatchViaSsh(task, target);
break;
case "api":
await this.dispatchViaApi(task, target);
break;
}
}
private async dispatchToAgent(
task: DeploymentTask,
target: Target
): Promise<void> {
// Find available agent for target
const agent = await this.agentManager.findAgentForTarget(target);
if (!agent) {
throw new NoAgentAvailableError(target.id);
}
// Create task payload
const payload: AgentTaskPayload = {
taskId: task.id,
targetId: target.id,
action: "deploy",
digest: task.digest,
config: target.connection,
credentials: await this.fetchTaskCredentials(target)
};
// Dispatch to agent
await this.agentClient.dispatchTask(agent.id, payload);
// Update task status
task.status = "dispatched";
task.agentId = agent.id;
await this.taskRepository.update(task);
}
}
```
## Generated Artifacts
### Artifact Types
| Type | Description | Format |
|------|-------------|--------|
| `compose-file` | Docker Compose file | YAML |
| `compose-lock` | Pinned compose file | YAML |
| `env-file` | Environment variables | .env |
| `systemd-unit` | Systemd service unit | .service |
| `nginx-config` | Nginx configuration | .conf |
| `manifest` | Deployment manifest | JSON |
### Compose Lock Generation
```typescript
interface ComposeLock {
version: string;
services: Record<string, LockedService>;
generated: {
releaseId: string;
promotionId: string;
timestamp: string;
digest: string; // Hash of this file
};
}
interface LockedService {
image: string; // Full image reference with digest
environment?: Record<string, string>;
labels: Record<string, string>;
}
class ComposeArtifactGenerator {
async generateLock(
release: Release,
target: Target,
template: ComposeTemplate
): Promise<ComposeLock> {
const services: Record<string, LockedService> = {};
for (const [serviceName, serviceConfig] of Object.entries(template.services)) {
// Find component for this service
const componentDigest = release.components.find(
c => c.name === serviceConfig.componentName
);
if (!componentDigest) {
throw new Error(`No component found for service ${serviceName}`);
}
// Build locked image reference
const imageRef = `${componentDigest.repository}@${componentDigest.digest}`;
services[serviceName] = {
image: imageRef,
environment: {
...serviceConfig.environment,
STELLA_RELEASE_ID: release.id,
STELLA_DIGEST: componentDigest.digest
},
labels: {
"stella.release.id": release.id,
"stella.component.name": componentDigest.name,
"stella.digest": componentDigest.digest,
"stella.deployed.at": new Date().toISOString()
}
};
}
const lock: ComposeLock = {
version: "3.8",
services,
generated: {
releaseId: release.id,
promotionId: target.promotionId,
timestamp: new Date().toISOString(),
digest: "" // Computed below
}
};
// Compute content hash
const content = yaml.stringify(lock);
lock.generated.digest = crypto.createHash("sha256").update(content).digest("hex");
return lock;
}
}
```
## Deployment Execution
### Execution Models
| Model | Description | Use Case |
|-------|-------------|----------|
| `agent` | Stella agent on target | Docker hosts, servers |
| `ssh` | SSH-based agentless | Unix servers |
| `winrm` | WinRM-based agentless | Windows servers |
| `api` | API-based | ECS, Nomad, K8s |
### Agent-Based Execution
```typescript
class AgentExecutor {
async execute(task: DeploymentTask): Promise<ExecutionResult> {
const agent = await this.agentManager.get(task.agentId);
const target = await this.targetRepository.get(task.targetId);
// Prepare task payload with secrets
const payload: TaskPayload = {
taskId: task.id,
targetId: target.id,
action: "deploy",
digest: task.digest,
config: target.connection,
artifacts: await this.getArtifacts(task.jobId),
credentials: await this.secretsManager.fetchForTask(target)
};
// Dispatch to agent
const taskRef = await this.agentClient.dispatchTask(agent.id, payload);
// Wait for completion
const result = await this.waitForTaskCompletion(taskRef, task.timeout);
return result;
}
private async waitForTaskCompletion(
taskRef: TaskReference,
timeout: number
): Promise<ExecutionResult> {
const deadline = Date.now() + timeout * 1000;
while (Date.now() < deadline) {
const status = await this.agentClient.getTaskStatus(taskRef);
if (status.completed) {
return {
success: status.success,
logs: status.logs,
deployedDigest: status.deployedDigest,
error: status.error
};
}
await sleep(1000);
}
throw new TimeoutError(`Task did not complete within ${timeout} seconds`);
}
}
```
### SSH-Based Execution
```typescript
class SshExecutor {
async execute(task: DeploymentTask): Promise<ExecutionResult> {
const target = await this.targetRepository.get(task.targetId);
const sshConfig = target.connection as SshConnectionConfig;
// Get SSH credentials from vault
const creds = await this.secretsManager.fetchSshCredentials(
sshConfig.credentialRef
);
// Connect via SSH
const ssh = new NodeSSH();
await ssh.connect({
host: sshConfig.host,
port: sshConfig.port || 22,
username: creds.username,
privateKey: creds.privateKey
});
try {
// Upload artifacts
const artifacts = await this.getArtifacts(task.jobId);
for (const artifact of artifacts) {
await ssh.putFile(artifact.localPath, artifact.remotePath);
}
// Execute deployment script
const result = await ssh.execCommand(
this.buildDeployCommand(task, target),
{ cwd: sshConfig.workDir }
);
return {
success: result.code === 0,
logs: `${result.stdout}\n${result.stderr}`,
error: result.code !== 0 ? result.stderr : undefined
};
} finally {
ssh.dispose();
}
}
private buildDeployCommand(task: DeploymentTask, target: Target): string {
// Build deployment command based on target type
switch (target.targetType) {
case "compose_host":
return `cd ${target.connection.workDir} && docker-compose pull && docker-compose up -d`;
case "docker_host":
return `docker pull ${task.digest} && docker stop ${target.containerName} && docker run -d --name ${target.containerName} ${task.digest}`;
default:
throw new Error(`Unsupported target type: ${target.targetType}`);
}
}
}
```
## Health Verification
```typescript
interface HealthCheckConfig {
type: "http" | "tcp" | "command";
timeout: number;
retries: number;
interval: number;
// HTTP-specific
path?: string;
expectedStatus?: number;
expectedBody?: string;
// TCP-specific
port?: number;
// Command-specific
command?: string;
}
class HealthVerifier {
async verify(
target: Target,
config: HealthCheckConfig
): Promise<HealthCheckResult> {
let lastError: Error | undefined;
for (let attempt = 0; attempt < config.retries; attempt++) {
try {
const result = await this.performCheck(target, config);
if (result.healthy) {
return result;
}
lastError = new Error(result.message);
} catch (error) {
lastError = error as Error;
}
if (attempt < config.retries - 1) {
await sleep(config.interval * 1000);
}
}
return {
healthy: false,
message: lastError?.message || "Health check failed",
attempts: config.retries
};
}
private async performCheck(
target: Target,
config: HealthCheckConfig
): Promise<HealthCheckResult> {
switch (config.type) {
case "http":
return this.httpCheck(target, config);
case "tcp":
return this.tcpCheck(target, config);
case "command":
return this.commandCheck(target, config);
}
}
private async httpCheck(
target: Target,
config: HealthCheckConfig
): Promise<HealthCheckResult> {
const url = `${target.healthEndpoint}${config.path || "/health"}`;
try {
const response = await fetch(url, {
signal: AbortSignal.timeout(config.timeout * 1000)
});
const healthy = response.status === (config.expectedStatus || 200);
return {
healthy,
message: healthy ? "OK" : `Status ${response.status}`,
statusCode: response.status
};
} catch (error) {
return {
healthy: false,
message: (error as Error).message
};
}
}
}
```
## Rollback Management
```typescript
class RollbackManager {
async initiateRollback(
jobId: UUID,
reason: string
): Promise<DeploymentJob> {
const failedJob = await this.jobRepository.get(jobId);
const previousJob = await this.findPreviousSuccessfulJob(
failedJob.environmentId,
failedJob.releaseId
);
if (!previousJob) {
throw new NoRollbackTargetError(jobId);
}
// Create rollback job
const rollbackJob: DeploymentJob = {
id: uuidv4(),
promotionId: failedJob.promotionId,
releaseId: previousJob.releaseId, // Previous release
environmentId: failedJob.environmentId,
strategy: "all-at-once", // Fast rollback
parallelism: 10,
status: "pending",
rollbackOf: jobId,
previousJobId: previousJob.id,
artifacts: [],
tasks: []
};
// Create tasks to restore previous state
for (const task of failedJob.tasks) {
const previousTask = previousJob.tasks.find(
t => t.targetId === task.targetId
);
if (previousTask) {
rollbackJob.tasks.push({
id: uuidv4(),
jobId: rollbackJob.id,
targetId: task.targetId,
componentId: previousTask.componentId,
digest: previousTask.previousDigest || task.previousDigest!,
status: "pending",
logs: "",
attemptNumber: 0,
maxAttempts: 3
});
}
}
await this.jobRepository.save(rollbackJob);
// Execute rollback
await this.executeJob(rollbackJob);
return rollbackJob;
}
private async findPreviousSuccessfulJob(
environmentId: UUID,
excludeReleaseId: UUID
): Promise<DeploymentJob | null> {
return this.jobRepository.findOne({
environmentId,
status: "completed",
releaseId: { $ne: excludeReleaseId }
}, {
orderBy: { completedAt: "desc" }
});
}
}
```
## References
- [Deployment Strategies](strategies.md)
- [Agent-Based Deployment](agent-based.md)
- [Agentless Deployment](agentless.md)
- [Generated Artifacts](artifacts.md)
- [Deploy Orchestrator Module](../modules/deploy-orchestrator.md)

View File

@@ -0,0 +1,656 @@
# Deployment Strategies
## Overview
Release Orchestrator supports multiple deployment strategies to balance deployment speed, risk, and availability requirements.
## Strategy Comparison
| Strategy | Description | Risk Level | Downtime | Rollback Speed |
|----------|-------------|------------|----------|----------------|
| All-at-once | Deploy to all targets simultaneously | High | Brief | Fast |
| Rolling | Deploy to targets in batches | Medium | None | Medium |
| Canary | Deploy to subset, then expand | Low | None | Fast |
| Blue-Green | Deploy to parallel environment | Low | None | Instant |
## All-at-Once Strategy
### Description
Deploys to all targets simultaneously. Simple and fast, but highest risk.
```
ALL-AT-ONCE DEPLOYMENT
Time 0 Time 1
┌─────────────────┐ ┌─────────────────┐
│ Target 1 [v1] │ │ Target 1 [v2] │
├─────────────────┤ ├─────────────────┤
│ Target 2 [v1] │ ───► │ Target 2 [v2] │
├─────────────────┤ ├─────────────────┤
│ Target 3 [v1] │ │ Target 3 [v2] │
└─────────────────┘ └─────────────────┘
```
### Configuration
```typescript
interface AllAtOnceConfig {
strategy: "all-at-once";
// Concurrency limit (0 = unlimited)
maxConcurrent: number;
// Health check after deployment
healthCheck: HealthCheckConfig;
// Failure behavior
failureBehavior: "rollback" | "continue" | "pause";
}
// Example
const config: AllAtOnceConfig = {
strategy: "all-at-once",
maxConcurrent: 0,
healthCheck: {
type: "http",
path: "/health",
timeout: 30,
retries: 3,
interval: 10
},
failureBehavior: "rollback"
};
```
### Execution
```typescript
class AllAtOnceExecutor {
async execute(job: DeploymentJob, config: AllAtOnceConfig): Promise<void> {
const tasks = job.tasks;
const concurrency = config.maxConcurrent || tasks.length;
// Execute all tasks with concurrency limit
const results = await pMap(
tasks,
async (task) => {
try {
await this.executeTask(task);
return { taskId: task.id, success: true };
} catch (error) {
return { taskId: task.id, success: false, error };
}
},
{ concurrency }
);
// Check for failures
const failures = results.filter(r => !r.success);
if (failures.length > 0) {
if (config.failureBehavior === "rollback") {
await this.rollbackAll(job);
throw new DeploymentFailedError(failures);
} else if (config.failureBehavior === "pause") {
job.status = "failed";
throw new DeploymentFailedError(failures);
}
// "continue" - proceed despite failures
}
// Health check all targets
await this.verifyAllTargets(job, config.healthCheck);
}
}
```
### Use Cases
- Development environments
- Small deployments
- Time-critical updates
- Stateless services with fast startup
## Rolling Strategy
### Description
Deploys to targets in configurable batches, maintaining availability throughout.
```
ROLLING DEPLOYMENT (batch size: 1)
Time 0 Time 1 Time 2 Time 3
┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
│ T1 [v1] │ │ T1 [v2] ✓ │ │ T1 [v2] ✓ │ │ T1 [v2] ✓ │
├─────────────┤ ├─────────────┤ ├─────────────┤ ├─────────────┤
│ T2 [v1] │──►│ T2 [v1] │──►│ T2 [v2] ✓ │──►│ T2 [v2] ✓ │
├─────────────┤ ├─────────────┤ ├─────────────┤ ├─────────────┤
│ T3 [v1] │ │ T3 [v1] │ │ T3 [v1] │ │ T3 [v2] ✓ │
└─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘
```
### Configuration
```typescript
interface RollingConfig {
strategy: "rolling";
// Batch configuration
batchSize: number; // Targets per batch
batchPercent?: number; // Alternative: percentage of targets
// Timing
batchDelay: number; // Seconds between batches
stabilizationTime: number; // Wait after health check passes
// Health check
healthCheck: HealthCheckConfig;
// Failure handling
maxFailedBatches: number; // Failures before stopping
failureBehavior: "rollback" | "pause" | "skip";
// Ordering
targetOrder: "default" | "shuffle" | "priority";
}
// Example
const config: RollingConfig = {
strategy: "rolling",
batchSize: 2,
batchDelay: 30,
stabilizationTime: 60,
healthCheck: {
type: "http",
path: "/health",
timeout: 30,
retries: 5,
interval: 10
},
maxFailedBatches: 1,
failureBehavior: "rollback",
targetOrder: "default"
};
```
### Execution
```typescript
class RollingExecutor {
async execute(job: DeploymentJob, config: RollingConfig): Promise<void> {
const tasks = this.orderTasks(job.tasks, config.targetOrder);
const batches = this.createBatches(tasks, config);
let failedBatches = 0;
const completedTasks: DeploymentTask[] = [];
for (const batch of batches) {
this.emitProgress(job, {
phase: "deploying",
currentBatch: batches.indexOf(batch) + 1,
totalBatches: batches.length,
completedTargets: completedTasks.length,
totalTargets: tasks.length
});
// Execute batch
const results = await Promise.all(
batch.map(task => this.executeTask(task))
);
// Check batch results
const failures = results.filter(r => !r.success);
if (failures.length > 0) {
failedBatches++;
if (failedBatches > config.maxFailedBatches) {
if (config.failureBehavior === "rollback") {
await this.rollbackCompleted(completedTasks);
}
throw new DeploymentFailedError(failures);
}
if (config.failureBehavior === "pause") {
job.status = "failed";
throw new DeploymentFailedError(failures);
}
// "skip" - continue to next batch
}
// Health check batch targets
await this.verifyBatch(batch, config.healthCheck);
// Wait for stabilization
if (config.stabilizationTime > 0) {
await sleep(config.stabilizationTime * 1000);
}
completedTasks.push(...batch);
// Wait before next batch
if (batches.indexOf(batch) < batches.length - 1) {
await sleep(config.batchDelay * 1000);
}
}
}
private createBatches(
tasks: DeploymentTask[],
config: RollingConfig
): DeploymentTask[][] {
const batchSize = config.batchPercent
? Math.ceil(tasks.length * config.batchPercent / 100)
: config.batchSize;
const batches: DeploymentTask[][] = [];
for (let i = 0; i < tasks.length; i += batchSize) {
batches.push(tasks.slice(i, i + batchSize));
}
return batches;
}
}
```
### Use Cases
- Production deployments
- High-availability requirements
- Large target counts
- Services requiring gradual rollout
## Canary Strategy
### Description
Deploys to a small subset of targets first, validates, then expands to remaining targets.
```
CANARY DEPLOYMENT
Phase 1: Canary (10%) Phase 2: Expand (50%) Phase 3: Full (100%)
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
│ T1 [v2] ✓ │ ◄─canary │ T1 [v2] ✓ │ │ T1 [v2] ✓ │
├─────────────┤ ├─────────────┤ ├─────────────┤
│ T2 [v1] │ │ T2 [v2] ✓ │ │ T2 [v2] ✓ │
├─────────────┤ ├─────────────┤ ├─────────────┤
│ T3 [v1] │ │ T3 [v2] ✓ │ │ T3 [v2] ✓ │
├─────────────┤ ├─────────────┤ ├─────────────┤
│ T4 [v1] │ │ T4 [v2] ✓ │ │ T4 [v2] ✓ │
├─────────────┤ ├─────────────┤ ├─────────────┤
│ T5 [v1] │ │ T5 [v1] │ │ T5 [v2] ✓ │
└─────────────┘ └─────────────┘ └─────────────┘
│ │ │
▼ ▼ ▼
Health Check Health Check Health Check
Error Rate Check Error Rate Check Error Rate Check
```
### Configuration
```typescript
interface CanaryConfig {
strategy: "canary";
// Canary stages
stages: CanaryStage[];
// Canary selection
canarySelector: "random" | "labeled" | "first";
canaryLabel?: string; // Label for canary targets
// Automatic vs manual progression
autoProgress: boolean;
// Health and metrics checks
healthCheck: HealthCheckConfig;
metricsCheck?: MetricsCheckConfig;
}
interface CanaryStage {
name: string;
percentage: number; // Target percentage
duration: number; // Minimum time at this stage (seconds)
autoProgress: boolean; // Auto-advance after duration
}
interface MetricsCheckConfig {
integrationId: UUID; // Metrics integration
queries: MetricQuery[];
failureThreshold: number; // Percentage deviation to fail
}
interface MetricQuery {
name: string;
query: string; // PromQL or similar
operator: "lt" | "gt" | "eq";
threshold: number;
}
// Example
const config: CanaryConfig = {
strategy: "canary",
stages: [
{ name: "canary", percentage: 10, duration: 300, autoProgress: false },
{ name: "expand", percentage: 50, duration: 300, autoProgress: true },
{ name: "full", percentage: 100, duration: 0, autoProgress: true }
],
canarySelector: "labeled",
canaryLabel: "canary=true",
autoProgress: false,
healthCheck: {
type: "http",
path: "/health",
timeout: 30,
retries: 5,
interval: 10
},
metricsCheck: {
integrationId: "prometheus-uuid",
queries: [
{
name: "error_rate",
query: "rate(http_requests_total{status=~\"5..\"}[5m]) / rate(http_requests_total[5m])",
operator: "lt",
threshold: 0.01 // Less than 1% error rate
}
],
failureThreshold: 10
}
};
```
### Execution
```typescript
class CanaryExecutor {
async execute(job: DeploymentJob, config: CanaryConfig): Promise<void> {
const tasks = this.orderTasks(job.tasks, config);
for (const stage of config.stages) {
const targetCount = Math.ceil(tasks.length * stage.percentage / 100);
const stageTasks = tasks.slice(0, targetCount);
const newTasks = stageTasks.filter(t => t.status === "pending");
this.emitProgress(job, {
phase: "canary",
stage: stage.name,
percentage: stage.percentage,
targets: stageTasks.length
});
// Deploy to new targets in this stage
await Promise.all(newTasks.map(task => this.executeTask(task)));
// Health check stage targets
await this.verifyTargets(stageTasks, config.healthCheck);
// Metrics check if configured
if (config.metricsCheck) {
await this.checkMetrics(stageTasks, config.metricsCheck);
}
// Wait for stage duration
if (stage.duration > 0) {
await this.waitWithMonitoring(
stageTasks,
stage.duration,
config.metricsCheck
);
}
// Wait for manual approval if not auto-progress
if (!stage.autoProgress && stage.percentage < 100) {
await this.waitForApproval(job, stage.name);
}
}
}
private async checkMetrics(
targets: DeploymentTask[],
config: MetricsCheckConfig
): Promise<void> {
const metricsClient = await this.getMetricsClient(config.integrationId);
for (const query of config.queries) {
const result = await metricsClient.query(query.query);
const passed = this.evaluateMetric(result, query);
if (!passed) {
throw new CanaryMetricsFailedError(query.name, result, query.threshold);
}
}
}
}
```
### Use Cases
- Risk-sensitive deployments
- Services with real user traffic
- Deployments with metrics-based validation
- Gradual feature rollouts
## Blue-Green Strategy
### Description
Deploys to a parallel "green" environment while "blue" continues serving traffic, then switches.
```
BLUE-GREEN DEPLOYMENT
Phase 1: Deploy Green Phase 2: Switch Traffic
┌─────────────────────────┐ ┌─────────────────────────┐
│ Load Balancer │ │ Load Balancer │
│ │ │ │ │ │
│ ▼ │ │ ▼ │
│ ┌─────────────┐ │ │ ┌─────────────┐ │
│ │ Blue [v1] │◄─active│ │ │ Blue [v1] │ │
│ │ T1, T2, T3 │ │ │ │ T1, T2, T3 │ │
│ └─────────────┘ │ │ └─────────────┘ │
│ │ │ │
│ ┌─────────────┐ │ │ ┌─────────────┐ │
│ │ Green [v2] │◄─deploy│ │ │ Green [v2] │◄─active│
│ │ T4, T5, T6 │ │ │ │ T4, T5, T6 │ │
│ └─────────────┘ │ │ └─────────────┘ │
│ │ │ │
└─────────────────────────┘ └─────────────────────────┘
```
### Configuration
```typescript
interface BlueGreenConfig {
strategy: "blue-green";
// Environment labels
blueLabel: string; // Label for blue targets
greenLabel: string; // Label for green targets
// Traffic routing
routerIntegration: UUID; // Router/LB integration
routingConfig: RoutingConfig;
// Validation
healthCheck: HealthCheckConfig;
warmupTime: number; // Seconds to warm up green
validationTests?: string[]; // Test suites to run
// Switchover
switchoverMode: "instant" | "gradual";
gradualSteps?: number[]; // Percentage steps for gradual
// Rollback
keepBlueActive: number; // Seconds to keep blue ready
}
// Example
const config: BlueGreenConfig = {
strategy: "blue-green",
blueLabel: "deployment=blue",
greenLabel: "deployment=green",
routerIntegration: "nginx-lb-uuid",
routingConfig: {
upstreamName: "myapp",
healthEndpoint: "/health"
},
healthCheck: {
type: "http",
path: "/health",
timeout: 30,
retries: 5,
interval: 10
},
warmupTime: 60,
validationTests: ["smoke-test-suite"],
switchoverMode: "instant",
keepBlueActive: 1800 // 30 minutes
};
```
### Execution
```typescript
class BlueGreenExecutor {
async execute(job: DeploymentJob, config: BlueGreenConfig): Promise<void> {
// Identify blue and green targets
const { blue, green } = this.categorizeTargets(job.tasks, config);
// Phase 1: Deploy to green
this.emitProgress(job, { phase: "deploying-green" });
await Promise.all(green.map(task => this.executeTask(task)));
// Health check green targets
await this.verifyTargets(green, config.healthCheck);
// Warmup period
if (config.warmupTime > 0) {
this.emitProgress(job, { phase: "warming-up" });
await sleep(config.warmupTime * 1000);
}
// Run validation tests
if (config.validationTests?.length) {
this.emitProgress(job, { phase: "validating" });
await this.runValidationTests(green, config.validationTests);
}
// Phase 2: Switch traffic
this.emitProgress(job, { phase: "switching-traffic" });
if (config.switchoverMode === "instant") {
await this.instantSwitchover(config, blue, green);
} else {
await this.gradualSwitchover(config, blue, green);
}
// Verify traffic routing
await this.verifyRouting(green, config);
// Schedule blue decommission
if (config.keepBlueActive > 0) {
this.scheduleBlueDecommission(blue, config.keepBlueActive);
}
}
private async instantSwitchover(
config: BlueGreenConfig,
blue: DeploymentTask[],
green: DeploymentTask[]
): Promise<void> {
const router = await this.getRouter(config.routerIntegration);
// Update upstream to green targets
await router.updateUpstream(config.routingConfig.upstreamName, {
servers: green.map(t => ({
address: t.target.address,
weight: 1
}))
});
// Remove blue from rotation
await router.removeServers(
config.routingConfig.upstreamName,
blue.map(t => t.target.address)
);
}
private async gradualSwitchover(
config: BlueGreenConfig,
blue: DeploymentTask[],
green: DeploymentTask[]
): Promise<void> {
const router = await this.getRouter(config.routerIntegration);
const steps = config.gradualSteps || [25, 50, 75, 100];
for (const percentage of steps) {
await router.setTrafficSplit(config.routingConfig.upstreamName, {
blue: 100 - percentage,
green: percentage
});
// Monitor for errors
await this.monitorTraffic(30);
}
}
}
```
### Use Cases
- Zero-downtime deployments
- Database migration deployments
- High-stakes production updates
- Instant rollback requirements
## Strategy Selection Guide
```
STRATEGY SELECTION
START
┌────────────────────────┐
│ Zero downtime needed? │
└───────────┬────────────┘
No │ Yes
│ │ │
▼ │ ▼
┌──────────┐ │ ┌───────────────────┐
│ All-at- │ │ │ Metrics-based │
│ once │ │ │ validation needed?│
└──────────┘ │ └─────────┬─────────┘
│ │
│ No │ Yes
│ │ │ │
│ ▼ │ ▼
│ ┌──────────┐│ ┌──────────┐
│ │ Instant ││ │ Canary │
│ │ rollback? ││ │ │
│ └────┬─────┘│ └──────────┘
│ │ │
│ No │ Yes │
│ │ │ │ │
│ ▼ │ ▼ │
│┌──────┐│┌────┴─────┐
││Rolling│││Blue-Green│
│└──────┘│└──────────┘
│ │
└───────┘
```
## References
- [Deployment Overview](overview.md)
- [Progressive Delivery](../modules/progressive-delivery.md)
- [Rollback Management](overview.md#rollback-management)