release orchestrator pivot, architecture and planning
This commit is contained in:
308
docs/modules/release-orchestrator/deployment/artifacts.md
Normal file
308
docs/modules/release-orchestrator/deployment/artifacts.md
Normal file
@@ -0,0 +1,308 @@
|
||||
# Artifact Generation
|
||||
|
||||
## Overview
|
||||
|
||||
Every deployment generates immutable artifacts that enable reproducibility, audit, and rollback.
|
||||
|
||||
## Generated Artifacts
|
||||
|
||||
### 1. Compose Lock File
|
||||
|
||||
**File:** `compose.stella.lock.yml`
|
||||
|
||||
A Docker Compose file with all image references pinned to specific digests.
|
||||
|
||||
```yaml
|
||||
# compose.stella.lock.yml
|
||||
# Generated by Stella Ops - DO NOT EDIT
|
||||
# Release: myapp-v2.3.1
|
||||
# Generated: 2026-01-10T14:30:00Z
|
||||
# Generator: stella-artifact-generator@1.5.0
|
||||
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
api:
|
||||
image: registry.example.com/myapp/api@sha256:abc123...
|
||||
# Original tag: v2.3.1
|
||||
deploy:
|
||||
replicas: 2
|
||||
environment:
|
||||
- DATABASE_URL=${DATABASE_URL}
|
||||
- REDIS_URL=${REDIS_URL}
|
||||
labels:
|
||||
stella.component.id: "comp-api-uuid"
|
||||
stella.release.id: "rel-uuid"
|
||||
stella.digest: "sha256:abc123..."
|
||||
|
||||
worker:
|
||||
image: registry.example.com/myapp/worker@sha256:def456...
|
||||
# Original tag: v2.3.1
|
||||
deploy:
|
||||
replicas: 1
|
||||
labels:
|
||||
stella.component.id: "comp-worker-uuid"
|
||||
stella.release.id: "rel-uuid"
|
||||
stella.digest: "sha256:def456..."
|
||||
|
||||
# Stella metadata
|
||||
x-stella:
|
||||
release:
|
||||
id: "rel-uuid"
|
||||
name: "myapp-v2.3.1"
|
||||
created_at: "2026-01-10T14:00:00Z"
|
||||
environment:
|
||||
id: "env-uuid"
|
||||
name: "production"
|
||||
deployment:
|
||||
id: "deploy-uuid"
|
||||
started_at: "2026-01-10T14:30:00Z"
|
||||
checksums:
|
||||
sha256: "checksum-of-this-file"
|
||||
```
|
||||
|
||||
### 2. Version Sticker
|
||||
|
||||
**File:** `stella.version.json`
|
||||
|
||||
Metadata file placed on deployment targets indicating current deployment state.
|
||||
|
||||
```json
|
||||
{
|
||||
"version": "1.0",
|
||||
"generatedAt": "2026-01-10T14:35:00Z",
|
||||
"generator": "stella-artifact-generator@1.5.0",
|
||||
|
||||
"release": {
|
||||
"id": "rel-uuid",
|
||||
"name": "myapp-v2.3.1",
|
||||
"createdAt": "2026-01-10T14:00:00Z",
|
||||
"components": [
|
||||
{
|
||||
"name": "api",
|
||||
"digest": "sha256:abc123...",
|
||||
"semver": "2.3.1",
|
||||
"tag": "v2.3.1"
|
||||
},
|
||||
{
|
||||
"name": "worker",
|
||||
"digest": "sha256:def456...",
|
||||
"semver": "2.3.1",
|
||||
"tag": "v2.3.1"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"deployment": {
|
||||
"id": "deploy-uuid",
|
||||
"promotionId": "promo-uuid",
|
||||
"environmentId": "env-uuid",
|
||||
"environmentName": "production",
|
||||
"targetId": "target-uuid",
|
||||
"targetName": "prod-web-01",
|
||||
"strategy": "rolling",
|
||||
"startedAt": "2026-01-10T14:30:00Z",
|
||||
"completedAt": "2026-01-10T14:35:00Z"
|
||||
},
|
||||
|
||||
"deployer": {
|
||||
"userId": "user-uuid",
|
||||
"userName": "john.doe",
|
||||
"agentId": "agent-uuid",
|
||||
"agentName": "prod-agent-01"
|
||||
},
|
||||
|
||||
"previous": {
|
||||
"releaseId": "prev-rel-uuid",
|
||||
"releaseName": "myapp-v2.3.0",
|
||||
"digest": "sha256:789..."
|
||||
},
|
||||
|
||||
"signature": "base64-encoded-signature",
|
||||
"signatureAlgorithm": "RS256",
|
||||
"signerKeyRef": "stella/signing/prod-key-2026"
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Evidence Packet
|
||||
|
||||
**File:** Evidence stored in database (exportable as JSON/PDF)
|
||||
|
||||
See [Evidence Schema](../appendices/evidence-schema.md) for full specification.
|
||||
|
||||
### 4. Deployment Script (Optional)
|
||||
|
||||
**File:** `deploy.stella.script.dll` or `deploy.stella.sh`
|
||||
|
||||
When deployments use C# or shell scripts with hooks:
|
||||
|
||||
```csharp
|
||||
// deploy.stella.csx (source, compiled to DLL)
|
||||
#r "nuget: StellaOps.Sdk, 1.0.0"
|
||||
|
||||
using StellaOps.Sdk;
|
||||
|
||||
// Pre-deploy hook
|
||||
await Context.RunPreDeployHook(async (ctx) => {
|
||||
await ctx.ExecuteCommand("./scripts/backup-database.sh");
|
||||
await ctx.HealthCheck("/ready", timeout: 30);
|
||||
});
|
||||
|
||||
// Deploy
|
||||
await Context.Deploy();
|
||||
|
||||
// Post-deploy hook
|
||||
await Context.RunPostDeployHook(async (ctx) => {
|
||||
await ctx.ExecuteCommand("./scripts/warm-cache.sh");
|
||||
await ctx.Notify("slack", "Deployment complete");
|
||||
});
|
||||
```
|
||||
|
||||
## Artifact Storage
|
||||
|
||||
### Storage Structure
|
||||
|
||||
```
|
||||
artifacts/
|
||||
├── {tenant_id}/
|
||||
│ ├── {deployment_id}/
|
||||
│ │ ├── compose.stella.lock.yml
|
||||
│ │ ├── deploy.stella.script.dll (if applicable)
|
||||
│ │ ├── deploy.stella.script.csx (source)
|
||||
│ │ ├── manifest.json
|
||||
│ │ └── checksums.sha256
|
||||
│ └── ...
|
||||
└── ...
|
||||
```
|
||||
|
||||
### Manifest File
|
||||
|
||||
```json
|
||||
{
|
||||
"version": "1.0",
|
||||
"deploymentId": "deploy-uuid",
|
||||
"createdAt": "2026-01-10T14:30:00Z",
|
||||
"artifacts": [
|
||||
{
|
||||
"name": "compose.stella.lock.yml",
|
||||
"type": "compose-lock",
|
||||
"size": 2048,
|
||||
"sha256": "abc123..."
|
||||
},
|
||||
{
|
||||
"name": "deploy.stella.script.dll",
|
||||
"type": "script-compiled",
|
||||
"size": 8192,
|
||||
"sha256": "def456..."
|
||||
}
|
||||
],
|
||||
"totalSize": 10240,
|
||||
"signature": "base64-signature"
|
||||
}
|
||||
```
|
||||
|
||||
## Artifact Generation Process
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ ARTIFACT GENERATION FLOW │
|
||||
│ │
|
||||
│ ┌─────────────────┐ │
|
||||
│ │ Promotion │ │
|
||||
│ │ Approved │ │
|
||||
│ └────────┬────────┘ │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ ARTIFACT GENERATOR │ │
|
||||
│ │ │ │
|
||||
│ │ 1. Load release bundle (components, digests) │ │
|
||||
│ │ 2. Load environment configuration (variables, secrets refs) │ │
|
||||
│ │ 3. Load workflow template (hooks, scripts) │ │
|
||||
│ │ 4. Generate compose.stella.lock.yml │ │
|
||||
│ │ 5. Compile scripts (if any) │ │
|
||||
│ │ 6. Generate version sticker template │ │
|
||||
│ │ 7. Compute checksums │ │
|
||||
│ │ 8. Sign artifacts │ │
|
||||
│ │ 9. Store in artifact storage │ │
|
||||
│ │ │ │
|
||||
│ └────────────────────────────┬────────────────────────────────────────┘ │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ DEPLOYMENT ORCHESTRATOR │ │
|
||||
│ │ │ │
|
||||
│ │ Artifacts distributed to targets via agents │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Artifact Properties
|
||||
|
||||
### Immutability
|
||||
|
||||
Once generated, artifacts are never modified:
|
||||
- Content-addressed storage (hash in path/metadata)
|
||||
- No overwrite capability
|
||||
- Append-only storage pattern
|
||||
|
||||
### Integrity
|
||||
|
||||
All artifacts are:
|
||||
- Checksummed (SHA-256)
|
||||
- Signed with deployment key
|
||||
- Verifiable at deployment time
|
||||
|
||||
### Retention
|
||||
|
||||
| Environment | Retention Period |
|
||||
|-------------|------------------|
|
||||
| Development | 30 days |
|
||||
| Staging | 90 days |
|
||||
| Production | 7 years (compliance) |
|
||||
|
||||
## API Operations
|
||||
|
||||
```yaml
|
||||
# List artifacts for deployment
|
||||
GET /api/v1/deployment-jobs/{id}/artifacts
|
||||
Response: Artifact[]
|
||||
|
||||
# Download specific artifact
|
||||
GET /api/v1/deployment-jobs/{id}/artifacts/{name}
|
||||
Response: binary
|
||||
|
||||
# Get artifact manifest
|
||||
GET /api/v1/deployment-jobs/{id}/artifacts/manifest
|
||||
Response: ArtifactManifest
|
||||
|
||||
# Verify artifact integrity
|
||||
POST /api/v1/deployment-jobs/{id}/artifacts/{name}/verify
|
||||
Response: { valid: boolean, checksum: string, signature: string }
|
||||
```
|
||||
|
||||
## Drift Detection
|
||||
|
||||
Version stickers enable drift detection:
|
||||
|
||||
```typescript
|
||||
interface DriftCheck {
|
||||
targetId: UUID;
|
||||
expectedSticker: VersionSticker;
|
||||
actualSticker: VersionSticker | null;
|
||||
driftDetected: boolean;
|
||||
driftType?: "missing" | "corrupted" | "mismatch";
|
||||
details?: {
|
||||
expectedDigest: string;
|
||||
actualDigest: string;
|
||||
field: string;
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
## References
|
||||
|
||||
- [Deployment Overview](overview.md)
|
||||
- [Deployment Strategies](strategies.md)
|
||||
- [Evidence Schema](../appendices/evidence-schema.md)
|
||||
671
docs/modules/release-orchestrator/deployment/overview.md
Normal file
671
docs/modules/release-orchestrator/deployment/overview.md
Normal file
@@ -0,0 +1,671 @@
|
||||
# Deployment Overview
|
||||
|
||||
## Purpose
|
||||
|
||||
The Deployment system executes the actual deployment of releases to target environments, managing deployment jobs, tasks, artifact generation, and rollback capabilities.
|
||||
|
||||
## Deployment Architecture
|
||||
|
||||
```
|
||||
DEPLOYMENT ARCHITECTURE
|
||||
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ DEPLOY ORCHESTRATOR │
|
||||
│ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ DEPLOYMENT JOB MANAGER │ │
|
||||
│ │ │ │
|
||||
│ │ Promotion ───► Create Job ───► Plan Tasks ───► Execute Tasks │ │
|
||||
│ │ │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
│ │ │
|
||||
│ ┌───────────────┼───────────────┐ │
|
||||
│ │ │ │ │
|
||||
│ ▼ ▼ ▼ │
|
||||
│ ┌─────────────────────┐ ┌─────────────────┐ ┌─────────────────────┐ │
|
||||
│ │ TARGET EXECUTOR │ │ RUNNER EXECUTOR │ │ ARTIFACT GENERATOR │ │
|
||||
│ │ │ │ │ │ │ │
|
||||
│ │ - Task dispatch │ │ - Agent tasks │ │ - Compose files │ │
|
||||
│ │ - Status tracking │ │ - SSH tasks │ │ - Env configs │ │
|
||||
│ │ - Log aggregation │ │ - API tasks │ │ - Manifests │ │
|
||||
│ └─────────────────────┘ └─────────────────┘ └─────────────────────┘ │
|
||||
│ │ │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
┌────────────────────────────┼────────────────────────────┐
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│ Agent │ │ Agentless │ │ API │
|
||||
│ Execution │ │ Execution │ │ Execution │
|
||||
│ │ │ │ │ │
|
||||
│ Docker, │ │ SSH, │ │ ECS, │
|
||||
│ Compose │ │ WinRM │ │ Nomad │
|
||||
└─────────────┘ └─────────────┘ └─────────────┘
|
||||
```
|
||||
|
||||
## Deployment Flow
|
||||
|
||||
### Standard Deployment Flow
|
||||
|
||||
```
|
||||
DEPLOYMENT FLOW
|
||||
|
||||
Promotion Deployment Task Agent/Target
|
||||
Approved Job Execution
|
||||
│ │ │ │
|
||||
│ Create Job │ │ │
|
||||
├───────────────►│ │ │
|
||||
│ │ │ │
|
||||
│ │ Generate │ │
|
||||
│ │ Artifacts │ │
|
||||
│ ├────────────────►│ │
|
||||
│ │ │ │
|
||||
│ │ Create Tasks │ │
|
||||
│ │ per Target │ │
|
||||
│ ├────────────────►│ │
|
||||
│ │ │ │
|
||||
│ │ │ Dispatch Task │
|
||||
│ │ ├────────────────►│
|
||||
│ │ │ │
|
||||
│ │ │ Execute │
|
||||
│ │ │ (Pull, Deploy) │
|
||||
│ │ │ │
|
||||
│ │ │ Report Status │
|
||||
│ │ │◄────────────────┤
|
||||
│ │ │ │
|
||||
│ │ Aggregate │ │
|
||||
│ │ Results │ │
|
||||
│ │◄────────────────┤ │
|
||||
│ │ │ │
|
||||
│ Job Complete │ │ │
|
||||
│◄───────────────┤ │ │
|
||||
│ │ │ │
|
||||
```
|
||||
|
||||
## Deployment Job
|
||||
|
||||
### Job Entity
|
||||
|
||||
```typescript
|
||||
interface DeploymentJob {
|
||||
id: UUID;
|
||||
promotionId: UUID;
|
||||
releaseId: UUID;
|
||||
environmentId: UUID;
|
||||
|
||||
// Execution configuration
|
||||
strategy: DeploymentStrategy;
|
||||
parallelism: number;
|
||||
|
||||
// Status tracking
|
||||
status: JobStatus;
|
||||
startedAt?: DateTime;
|
||||
completedAt?: DateTime;
|
||||
|
||||
// Artifacts
|
||||
artifacts: GeneratedArtifact[];
|
||||
|
||||
// Rollback reference
|
||||
rollbackOf?: UUID; // If this is a rollback job
|
||||
previousJobId?: UUID; // Previous successful job
|
||||
|
||||
// Tasks
|
||||
tasks: DeploymentTask[];
|
||||
}
|
||||
|
||||
type JobStatus =
|
||||
| "pending"
|
||||
| "preparing"
|
||||
| "running"
|
||||
| "completing"
|
||||
| "completed"
|
||||
| "failed"
|
||||
| "rolling_back"
|
||||
| "rolled_back";
|
||||
|
||||
type DeploymentStrategy =
|
||||
| "all-at-once"
|
||||
| "rolling"
|
||||
| "canary"
|
||||
| "blue-green";
|
||||
```
|
||||
|
||||
### Job State Machine
|
||||
|
||||
```
|
||||
JOB STATE MACHINE
|
||||
|
||||
┌──────────┐
|
||||
│ PENDING │
|
||||
└────┬─────┘
|
||||
│ start()
|
||||
▼
|
||||
┌──────────┐
|
||||
│PREPARING │
|
||||
│ │
|
||||
│ Generate │
|
||||
│ artifacts│
|
||||
└────┬─────┘
|
||||
│
|
||||
▼
|
||||
┌──────────┐
|
||||
│ RUNNING │◄────────────────┐
|
||||
│ │ │
|
||||
│ Execute │ │
|
||||
│ tasks │ │
|
||||
└────┬─────┘ │
|
||||
│ │
|
||||
┌───────────────┼───────────────┐ │
|
||||
│ │ │ │
|
||||
▼ ▼ ▼ │
|
||||
┌──────────┐ ┌──────────┐ ┌──────────┐ │
|
||||
│COMPLETING│ │ FAILED │ │ ROLLING │ │
|
||||
│ │ │ │ │ BACK │──┘
|
||||
│ Verify │ │ │ │ │
|
||||
│ health │ │ │ │ │
|
||||
└────┬─────┘ └────┬─────┘ └────┬─────┘
|
||||
│ │ │
|
||||
▼ │ ▼
|
||||
┌──────────┐ │ ┌──────────┐
|
||||
│COMPLETED │ │ │ ROLLED │
|
||||
└──────────┘ │ │ BACK │
|
||||
│ └──────────┘
|
||||
│
|
||||
▼
|
||||
[Failure
|
||||
handling]
|
||||
```
|
||||
|
||||
## Deployment Task
|
||||
|
||||
### Task Entity
|
||||
|
||||
```typescript
|
||||
interface DeploymentTask {
|
||||
id: UUID;
|
||||
jobId: UUID;
|
||||
targetId: UUID;
|
||||
|
||||
// What to deploy
|
||||
componentId: UUID;
|
||||
digest: string;
|
||||
|
||||
// Execution
|
||||
status: TaskStatus;
|
||||
agentId?: UUID;
|
||||
startedAt?: DateTime;
|
||||
completedAt?: DateTime;
|
||||
|
||||
// Results
|
||||
logs: string;
|
||||
previousDigest?: string; // For rollback
|
||||
error?: string;
|
||||
|
||||
// Retry tracking
|
||||
attemptNumber: number;
|
||||
maxAttempts: number;
|
||||
}
|
||||
|
||||
type TaskStatus =
|
||||
| "pending"
|
||||
| "queued"
|
||||
| "dispatched"
|
||||
| "running"
|
||||
| "verifying"
|
||||
| "succeeded"
|
||||
| "failed"
|
||||
| "retrying";
|
||||
```
|
||||
|
||||
### Task Dispatch
|
||||
|
||||
```typescript
|
||||
class TaskDispatcher {
|
||||
async dispatchTask(task: DeploymentTask): Promise<void> {
|
||||
const target = await this.targetRepository.get(task.targetId);
|
||||
|
||||
switch (target.executionModel) {
|
||||
case "agent":
|
||||
await this.dispatchToAgent(task, target);
|
||||
break;
|
||||
|
||||
case "ssh":
|
||||
await this.dispatchViaSsh(task, target);
|
||||
break;
|
||||
|
||||
case "api":
|
||||
await this.dispatchViaApi(task, target);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private async dispatchToAgent(
|
||||
task: DeploymentTask,
|
||||
target: Target
|
||||
): Promise<void> {
|
||||
// Find available agent for target
|
||||
const agent = await this.agentManager.findAgentForTarget(target);
|
||||
|
||||
if (!agent) {
|
||||
throw new NoAgentAvailableError(target.id);
|
||||
}
|
||||
|
||||
// Create task payload
|
||||
const payload: AgentTaskPayload = {
|
||||
taskId: task.id,
|
||||
targetId: target.id,
|
||||
action: "deploy",
|
||||
digest: task.digest,
|
||||
config: target.connection,
|
||||
credentials: await this.fetchTaskCredentials(target)
|
||||
};
|
||||
|
||||
// Dispatch to agent
|
||||
await this.agentClient.dispatchTask(agent.id, payload);
|
||||
|
||||
// Update task status
|
||||
task.status = "dispatched";
|
||||
task.agentId = agent.id;
|
||||
await this.taskRepository.update(task);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Generated Artifacts
|
||||
|
||||
### Artifact Types
|
||||
|
||||
| Type | Description | Format |
|
||||
|------|-------------|--------|
|
||||
| `compose-file` | Docker Compose file | YAML |
|
||||
| `compose-lock` | Pinned compose file | YAML |
|
||||
| `env-file` | Environment variables | .env |
|
||||
| `systemd-unit` | Systemd service unit | .service |
|
||||
| `nginx-config` | Nginx configuration | .conf |
|
||||
| `manifest` | Deployment manifest | JSON |
|
||||
|
||||
### Compose Lock Generation
|
||||
|
||||
```typescript
|
||||
interface ComposeLock {
|
||||
version: string;
|
||||
services: Record<string, LockedService>;
|
||||
generated: {
|
||||
releaseId: string;
|
||||
promotionId: string;
|
||||
timestamp: string;
|
||||
digest: string; // Hash of this file
|
||||
};
|
||||
}
|
||||
|
||||
interface LockedService {
|
||||
image: string; // Full image reference with digest
|
||||
environment?: Record<string, string>;
|
||||
labels: Record<string, string>;
|
||||
}
|
||||
|
||||
class ComposeArtifactGenerator {
|
||||
async generateLock(
|
||||
release: Release,
|
||||
target: Target,
|
||||
template: ComposeTemplate
|
||||
): Promise<ComposeLock> {
|
||||
const services: Record<string, LockedService> = {};
|
||||
|
||||
for (const [serviceName, serviceConfig] of Object.entries(template.services)) {
|
||||
// Find component for this service
|
||||
const componentDigest = release.components.find(
|
||||
c => c.name === serviceConfig.componentName
|
||||
);
|
||||
|
||||
if (!componentDigest) {
|
||||
throw new Error(`No component found for service ${serviceName}`);
|
||||
}
|
||||
|
||||
// Build locked image reference
|
||||
const imageRef = `${componentDigest.repository}@${componentDigest.digest}`;
|
||||
|
||||
services[serviceName] = {
|
||||
image: imageRef,
|
||||
environment: {
|
||||
...serviceConfig.environment,
|
||||
STELLA_RELEASE_ID: release.id,
|
||||
STELLA_DIGEST: componentDigest.digest
|
||||
},
|
||||
labels: {
|
||||
"stella.release.id": release.id,
|
||||
"stella.component.name": componentDigest.name,
|
||||
"stella.digest": componentDigest.digest,
|
||||
"stella.deployed.at": new Date().toISOString()
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const lock: ComposeLock = {
|
||||
version: "3.8",
|
||||
services,
|
||||
generated: {
|
||||
releaseId: release.id,
|
||||
promotionId: target.promotionId,
|
||||
timestamp: new Date().toISOString(),
|
||||
digest: "" // Computed below
|
||||
}
|
||||
};
|
||||
|
||||
// Compute content hash
|
||||
const content = yaml.stringify(lock);
|
||||
lock.generated.digest = crypto.createHash("sha256").update(content).digest("hex");
|
||||
|
||||
return lock;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Deployment Execution
|
||||
|
||||
### Execution Models
|
||||
|
||||
| Model | Description | Use Case |
|
||||
|-------|-------------|----------|
|
||||
| `agent` | Stella agent on target | Docker hosts, servers |
|
||||
| `ssh` | SSH-based agentless | Unix servers |
|
||||
| `winrm` | WinRM-based agentless | Windows servers |
|
||||
| `api` | API-based | ECS, Nomad, K8s |
|
||||
|
||||
### Agent-Based Execution
|
||||
|
||||
```typescript
|
||||
class AgentExecutor {
|
||||
async execute(task: DeploymentTask): Promise<ExecutionResult> {
|
||||
const agent = await this.agentManager.get(task.agentId);
|
||||
const target = await this.targetRepository.get(task.targetId);
|
||||
|
||||
// Prepare task payload with secrets
|
||||
const payload: TaskPayload = {
|
||||
taskId: task.id,
|
||||
targetId: target.id,
|
||||
action: "deploy",
|
||||
digest: task.digest,
|
||||
config: target.connection,
|
||||
artifacts: await this.getArtifacts(task.jobId),
|
||||
credentials: await this.secretsManager.fetchForTask(target)
|
||||
};
|
||||
|
||||
// Dispatch to agent
|
||||
const taskRef = await this.agentClient.dispatchTask(agent.id, payload);
|
||||
|
||||
// Wait for completion
|
||||
const result = await this.waitForTaskCompletion(taskRef, task.timeout);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private async waitForTaskCompletion(
|
||||
taskRef: TaskReference,
|
||||
timeout: number
|
||||
): Promise<ExecutionResult> {
|
||||
const deadline = Date.now() + timeout * 1000;
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
const status = await this.agentClient.getTaskStatus(taskRef);
|
||||
|
||||
if (status.completed) {
|
||||
return {
|
||||
success: status.success,
|
||||
logs: status.logs,
|
||||
deployedDigest: status.deployedDigest,
|
||||
error: status.error
|
||||
};
|
||||
}
|
||||
|
||||
await sleep(1000);
|
||||
}
|
||||
|
||||
throw new TimeoutError(`Task did not complete within ${timeout} seconds`);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### SSH-Based Execution
|
||||
|
||||
```typescript
|
||||
class SshExecutor {
|
||||
async execute(task: DeploymentTask): Promise<ExecutionResult> {
|
||||
const target = await this.targetRepository.get(task.targetId);
|
||||
const sshConfig = target.connection as SshConnectionConfig;
|
||||
|
||||
// Get SSH credentials from vault
|
||||
const creds = await this.secretsManager.fetchSshCredentials(
|
||||
sshConfig.credentialRef
|
||||
);
|
||||
|
||||
// Connect via SSH
|
||||
const ssh = new NodeSSH();
|
||||
await ssh.connect({
|
||||
host: sshConfig.host,
|
||||
port: sshConfig.port || 22,
|
||||
username: creds.username,
|
||||
privateKey: creds.privateKey
|
||||
});
|
||||
|
||||
try {
|
||||
// Upload artifacts
|
||||
const artifacts = await this.getArtifacts(task.jobId);
|
||||
for (const artifact of artifacts) {
|
||||
await ssh.putFile(artifact.localPath, artifact.remotePath);
|
||||
}
|
||||
|
||||
// Execute deployment script
|
||||
const result = await ssh.execCommand(
|
||||
this.buildDeployCommand(task, target),
|
||||
{ cwd: sshConfig.workDir }
|
||||
);
|
||||
|
||||
return {
|
||||
success: result.code === 0,
|
||||
logs: `${result.stdout}\n${result.stderr}`,
|
||||
error: result.code !== 0 ? result.stderr : undefined
|
||||
};
|
||||
} finally {
|
||||
ssh.dispose();
|
||||
}
|
||||
}
|
||||
|
||||
private buildDeployCommand(task: DeploymentTask, target: Target): string {
|
||||
// Build deployment command based on target type
|
||||
switch (target.targetType) {
|
||||
case "compose_host":
|
||||
return `cd ${target.connection.workDir} && docker-compose pull && docker-compose up -d`;
|
||||
|
||||
case "docker_host":
|
||||
return `docker pull ${task.digest} && docker stop ${target.containerName} && docker run -d --name ${target.containerName} ${task.digest}`;
|
||||
|
||||
default:
|
||||
throw new Error(`Unsupported target type: ${target.targetType}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Health Verification
|
||||
|
||||
```typescript
|
||||
interface HealthCheckConfig {
|
||||
type: "http" | "tcp" | "command";
|
||||
timeout: number;
|
||||
retries: number;
|
||||
interval: number;
|
||||
|
||||
// HTTP-specific
|
||||
path?: string;
|
||||
expectedStatus?: number;
|
||||
expectedBody?: string;
|
||||
|
||||
// TCP-specific
|
||||
port?: number;
|
||||
|
||||
// Command-specific
|
||||
command?: string;
|
||||
}
|
||||
|
||||
class HealthVerifier {
|
||||
async verify(
|
||||
target: Target,
|
||||
config: HealthCheckConfig
|
||||
): Promise<HealthCheckResult> {
|
||||
let lastError: Error | undefined;
|
||||
|
||||
for (let attempt = 0; attempt < config.retries; attempt++) {
|
||||
try {
|
||||
const result = await this.performCheck(target, config);
|
||||
|
||||
if (result.healthy) {
|
||||
return result;
|
||||
}
|
||||
|
||||
lastError = new Error(result.message);
|
||||
} catch (error) {
|
||||
lastError = error as Error;
|
||||
}
|
||||
|
||||
if (attempt < config.retries - 1) {
|
||||
await sleep(config.interval * 1000);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
healthy: false,
|
||||
message: lastError?.message || "Health check failed",
|
||||
attempts: config.retries
|
||||
};
|
||||
}
|
||||
|
||||
private async performCheck(
|
||||
target: Target,
|
||||
config: HealthCheckConfig
|
||||
): Promise<HealthCheckResult> {
|
||||
switch (config.type) {
|
||||
case "http":
|
||||
return this.httpCheck(target, config);
|
||||
|
||||
case "tcp":
|
||||
return this.tcpCheck(target, config);
|
||||
|
||||
case "command":
|
||||
return this.commandCheck(target, config);
|
||||
}
|
||||
}
|
||||
|
||||
private async httpCheck(
|
||||
target: Target,
|
||||
config: HealthCheckConfig
|
||||
): Promise<HealthCheckResult> {
|
||||
const url = `${target.healthEndpoint}${config.path || "/health"}`;
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
signal: AbortSignal.timeout(config.timeout * 1000)
|
||||
});
|
||||
|
||||
const healthy = response.status === (config.expectedStatus || 200);
|
||||
|
||||
return {
|
||||
healthy,
|
||||
message: healthy ? "OK" : `Status ${response.status}`,
|
||||
statusCode: response.status
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
healthy: false,
|
||||
message: (error as Error).message
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Rollback Management
|
||||
|
||||
```typescript
|
||||
class RollbackManager {
|
||||
async initiateRollback(
|
||||
jobId: UUID,
|
||||
reason: string
|
||||
): Promise<DeploymentJob> {
|
||||
const failedJob = await this.jobRepository.get(jobId);
|
||||
const previousJob = await this.findPreviousSuccessfulJob(
|
||||
failedJob.environmentId,
|
||||
failedJob.releaseId
|
||||
);
|
||||
|
||||
if (!previousJob) {
|
||||
throw new NoRollbackTargetError(jobId);
|
||||
}
|
||||
|
||||
// Create rollback job
|
||||
const rollbackJob: DeploymentJob = {
|
||||
id: uuidv4(),
|
||||
promotionId: failedJob.promotionId,
|
||||
releaseId: previousJob.releaseId, // Previous release
|
||||
environmentId: failedJob.environmentId,
|
||||
strategy: "all-at-once", // Fast rollback
|
||||
parallelism: 10,
|
||||
status: "pending",
|
||||
rollbackOf: jobId,
|
||||
previousJobId: previousJob.id,
|
||||
artifacts: [],
|
||||
tasks: []
|
||||
};
|
||||
|
||||
// Create tasks to restore previous state
|
||||
for (const task of failedJob.tasks) {
|
||||
const previousTask = previousJob.tasks.find(
|
||||
t => t.targetId === task.targetId
|
||||
);
|
||||
|
||||
if (previousTask) {
|
||||
rollbackJob.tasks.push({
|
||||
id: uuidv4(),
|
||||
jobId: rollbackJob.id,
|
||||
targetId: task.targetId,
|
||||
componentId: previousTask.componentId,
|
||||
digest: previousTask.previousDigest || task.previousDigest!,
|
||||
status: "pending",
|
||||
logs: "",
|
||||
attemptNumber: 0,
|
||||
maxAttempts: 3
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
await this.jobRepository.save(rollbackJob);
|
||||
|
||||
// Execute rollback
|
||||
await this.executeJob(rollbackJob);
|
||||
|
||||
return rollbackJob;
|
||||
}
|
||||
|
||||
private async findPreviousSuccessfulJob(
|
||||
environmentId: UUID,
|
||||
excludeReleaseId: UUID
|
||||
): Promise<DeploymentJob | null> {
|
||||
return this.jobRepository.findOne({
|
||||
environmentId,
|
||||
status: "completed",
|
||||
releaseId: { $ne: excludeReleaseId }
|
||||
}, {
|
||||
orderBy: { completedAt: "desc" }
|
||||
});
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## References
|
||||
|
||||
- [Deployment Strategies](strategies.md)
|
||||
- [Agent-Based Deployment](agent-based.md)
|
||||
- [Agentless Deployment](agentless.md)
|
||||
- [Generated Artifacts](artifacts.md)
|
||||
- [Deploy Orchestrator Module](../modules/deploy-orchestrator.md)
|
||||
656
docs/modules/release-orchestrator/deployment/strategies.md
Normal file
656
docs/modules/release-orchestrator/deployment/strategies.md
Normal file
@@ -0,0 +1,656 @@
|
||||
# Deployment Strategies
|
||||
|
||||
## Overview
|
||||
|
||||
Release Orchestrator supports multiple deployment strategies to balance deployment speed, risk, and availability requirements.
|
||||
|
||||
## Strategy Comparison
|
||||
|
||||
| Strategy | Description | Risk Level | Downtime | Rollback Speed |
|
||||
|----------|-------------|------------|----------|----------------|
|
||||
| All-at-once | Deploy to all targets simultaneously | High | Brief | Fast |
|
||||
| Rolling | Deploy to targets in batches | Medium | None | Medium |
|
||||
| Canary | Deploy to subset, then expand | Low | None | Fast |
|
||||
| Blue-Green | Deploy to parallel environment | Low | None | Instant |
|
||||
|
||||
## All-at-Once Strategy
|
||||
|
||||
### Description
|
||||
|
||||
Deploys to all targets simultaneously. Simple and fast, but highest risk.
|
||||
|
||||
```
|
||||
ALL-AT-ONCE DEPLOYMENT
|
||||
|
||||
Time 0 Time 1
|
||||
┌─────────────────┐ ┌─────────────────┐
|
||||
│ Target 1 [v1] │ │ Target 1 [v2] │
|
||||
├─────────────────┤ ├─────────────────┤
|
||||
│ Target 2 [v1] │ ───► │ Target 2 [v2] │
|
||||
├─────────────────┤ ├─────────────────┤
|
||||
│ Target 3 [v1] │ │ Target 3 [v2] │
|
||||
└─────────────────┘ └─────────────────┘
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
```typescript
|
||||
interface AllAtOnceConfig {
|
||||
strategy: "all-at-once";
|
||||
|
||||
// Concurrency limit (0 = unlimited)
|
||||
maxConcurrent: number;
|
||||
|
||||
// Health check after deployment
|
||||
healthCheck: HealthCheckConfig;
|
||||
|
||||
// Failure behavior
|
||||
failureBehavior: "rollback" | "continue" | "pause";
|
||||
}
|
||||
|
||||
// Example
|
||||
const config: AllAtOnceConfig = {
|
||||
strategy: "all-at-once",
|
||||
maxConcurrent: 0,
|
||||
healthCheck: {
|
||||
type: "http",
|
||||
path: "/health",
|
||||
timeout: 30,
|
||||
retries: 3,
|
||||
interval: 10
|
||||
},
|
||||
failureBehavior: "rollback"
|
||||
};
|
||||
```
|
||||
|
||||
### Execution
|
||||
|
||||
```typescript
|
||||
class AllAtOnceExecutor {
|
||||
async execute(job: DeploymentJob, config: AllAtOnceConfig): Promise<void> {
|
||||
const tasks = job.tasks;
|
||||
const concurrency = config.maxConcurrent || tasks.length;
|
||||
|
||||
// Execute all tasks with concurrency limit
|
||||
const results = await pMap(
|
||||
tasks,
|
||||
async (task) => {
|
||||
try {
|
||||
await this.executeTask(task);
|
||||
return { taskId: task.id, success: true };
|
||||
} catch (error) {
|
||||
return { taskId: task.id, success: false, error };
|
||||
}
|
||||
},
|
||||
{ concurrency }
|
||||
);
|
||||
|
||||
// Check for failures
|
||||
const failures = results.filter(r => !r.success);
|
||||
|
||||
if (failures.length > 0) {
|
||||
if (config.failureBehavior === "rollback") {
|
||||
await this.rollbackAll(job);
|
||||
throw new DeploymentFailedError(failures);
|
||||
} else if (config.failureBehavior === "pause") {
|
||||
job.status = "failed";
|
||||
throw new DeploymentFailedError(failures);
|
||||
}
|
||||
// "continue" - proceed despite failures
|
||||
}
|
||||
|
||||
// Health check all targets
|
||||
await this.verifyAllTargets(job, config.healthCheck);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Use Cases
|
||||
|
||||
- Development environments
|
||||
- Small deployments
|
||||
- Time-critical updates
|
||||
- Stateless services with fast startup
|
||||
|
||||
## Rolling Strategy
|
||||
|
||||
### Description
|
||||
|
||||
Deploys to targets in configurable batches, maintaining availability throughout.
|
||||
|
||||
```
|
||||
ROLLING DEPLOYMENT (batch size: 1)
|
||||
|
||||
Time 0 Time 1 Time 2 Time 3
|
||||
┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│ T1 [v1] │ │ T1 [v2] ✓ │ │ T1 [v2] ✓ │ │ T1 [v2] ✓ │
|
||||
├─────────────┤ ├─────────────┤ ├─────────────┤ ├─────────────┤
|
||||
│ T2 [v1] │──►│ T2 [v1] │──►│ T2 [v2] ✓ │──►│ T2 [v2] ✓ │
|
||||
├─────────────┤ ├─────────────┤ ├─────────────┤ ├─────────────┤
|
||||
│ T3 [v1] │ │ T3 [v1] │ │ T3 [v1] │ │ T3 [v2] ✓ │
|
||||
└─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
```typescript
|
||||
interface RollingConfig {
|
||||
strategy: "rolling";
|
||||
|
||||
// Batch configuration
|
||||
batchSize: number; // Targets per batch
|
||||
batchPercent?: number; // Alternative: percentage of targets
|
||||
|
||||
// Timing
|
||||
batchDelay: number; // Seconds between batches
|
||||
stabilizationTime: number; // Wait after health check passes
|
||||
|
||||
// Health check
|
||||
healthCheck: HealthCheckConfig;
|
||||
|
||||
// Failure handling
|
||||
maxFailedBatches: number; // Failures before stopping
|
||||
failureBehavior: "rollback" | "pause" | "skip";
|
||||
|
||||
// Ordering
|
||||
targetOrder: "default" | "shuffle" | "priority";
|
||||
}
|
||||
|
||||
// Example
|
||||
const config: RollingConfig = {
|
||||
strategy: "rolling",
|
||||
batchSize: 2,
|
||||
batchDelay: 30,
|
||||
stabilizationTime: 60,
|
||||
healthCheck: {
|
||||
type: "http",
|
||||
path: "/health",
|
||||
timeout: 30,
|
||||
retries: 5,
|
||||
interval: 10
|
||||
},
|
||||
maxFailedBatches: 1,
|
||||
failureBehavior: "rollback",
|
||||
targetOrder: "default"
|
||||
};
|
||||
```
|
||||
|
||||
### Execution
|
||||
|
||||
```typescript
|
||||
class RollingExecutor {
|
||||
async execute(job: DeploymentJob, config: RollingConfig): Promise<void> {
|
||||
const tasks = this.orderTasks(job.tasks, config.targetOrder);
|
||||
const batches = this.createBatches(tasks, config);
|
||||
let failedBatches = 0;
|
||||
const completedTasks: DeploymentTask[] = [];
|
||||
|
||||
for (const batch of batches) {
|
||||
this.emitProgress(job, {
|
||||
phase: "deploying",
|
||||
currentBatch: batches.indexOf(batch) + 1,
|
||||
totalBatches: batches.length,
|
||||
completedTargets: completedTasks.length,
|
||||
totalTargets: tasks.length
|
||||
});
|
||||
|
||||
// Execute batch
|
||||
const results = await Promise.all(
|
||||
batch.map(task => this.executeTask(task))
|
||||
);
|
||||
|
||||
// Check batch results
|
||||
const failures = results.filter(r => !r.success);
|
||||
|
||||
if (failures.length > 0) {
|
||||
failedBatches++;
|
||||
|
||||
if (failedBatches > config.maxFailedBatches) {
|
||||
if (config.failureBehavior === "rollback") {
|
||||
await this.rollbackCompleted(completedTasks);
|
||||
}
|
||||
throw new DeploymentFailedError(failures);
|
||||
}
|
||||
|
||||
if (config.failureBehavior === "pause") {
|
||||
job.status = "failed";
|
||||
throw new DeploymentFailedError(failures);
|
||||
}
|
||||
// "skip" - continue to next batch
|
||||
}
|
||||
|
||||
// Health check batch targets
|
||||
await this.verifyBatch(batch, config.healthCheck);
|
||||
|
||||
// Wait for stabilization
|
||||
if (config.stabilizationTime > 0) {
|
||||
await sleep(config.stabilizationTime * 1000);
|
||||
}
|
||||
|
||||
completedTasks.push(...batch);
|
||||
|
||||
// Wait before next batch
|
||||
if (batches.indexOf(batch) < batches.length - 1) {
|
||||
await sleep(config.batchDelay * 1000);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private createBatches(
|
||||
tasks: DeploymentTask[],
|
||||
config: RollingConfig
|
||||
): DeploymentTask[][] {
|
||||
const batchSize = config.batchPercent
|
||||
? Math.ceil(tasks.length * config.batchPercent / 100)
|
||||
: config.batchSize;
|
||||
|
||||
const batches: DeploymentTask[][] = [];
|
||||
for (let i = 0; i < tasks.length; i += batchSize) {
|
||||
batches.push(tasks.slice(i, i + batchSize));
|
||||
}
|
||||
|
||||
return batches;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Use Cases
|
||||
|
||||
- Production deployments
|
||||
- High-availability requirements
|
||||
- Large target counts
|
||||
- Services requiring gradual rollout
|
||||
|
||||
## Canary Strategy
|
||||
|
||||
### Description
|
||||
|
||||
Deploys to a small subset of targets first, validates, then expands to remaining targets.
|
||||
|
||||
```
|
||||
CANARY DEPLOYMENT
|
||||
|
||||
Phase 1: Canary (10%) Phase 2: Expand (50%) Phase 3: Full (100%)
|
||||
|
||||
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│ T1 [v2] ✓ │ ◄─canary │ T1 [v2] ✓ │ │ T1 [v2] ✓ │
|
||||
├─────────────┤ ├─────────────┤ ├─────────────┤
|
||||
│ T2 [v1] │ │ T2 [v2] ✓ │ │ T2 [v2] ✓ │
|
||||
├─────────────┤ ├─────────────┤ ├─────────────┤
|
||||
│ T3 [v1] │ │ T3 [v2] ✓ │ │ T3 [v2] ✓ │
|
||||
├─────────────┤ ├─────────────┤ ├─────────────┤
|
||||
│ T4 [v1] │ │ T4 [v2] ✓ │ │ T4 [v2] ✓ │
|
||||
├─────────────┤ ├─────────────┤ ├─────────────┤
|
||||
│ T5 [v1] │ │ T5 [v1] │ │ T5 [v2] ✓ │
|
||||
└─────────────┘ └─────────────┘ └─────────────┘
|
||||
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
Health Check Health Check Health Check
|
||||
Error Rate Check Error Rate Check Error Rate Check
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
```typescript
|
||||
interface CanaryConfig {
|
||||
strategy: "canary";
|
||||
|
||||
// Canary stages
|
||||
stages: CanaryStage[];
|
||||
|
||||
// Canary selection
|
||||
canarySelector: "random" | "labeled" | "first";
|
||||
canaryLabel?: string; // Label for canary targets
|
||||
|
||||
// Automatic vs manual progression
|
||||
autoProgress: boolean;
|
||||
|
||||
// Health and metrics checks
|
||||
healthCheck: HealthCheckConfig;
|
||||
metricsCheck?: MetricsCheckConfig;
|
||||
}
|
||||
|
||||
interface CanaryStage {
|
||||
name: string;
|
||||
percentage: number; // Target percentage
|
||||
duration: number; // Minimum time at this stage (seconds)
|
||||
autoProgress: boolean; // Auto-advance after duration
|
||||
}
|
||||
|
||||
interface MetricsCheckConfig {
|
||||
integrationId: UUID; // Metrics integration
|
||||
queries: MetricQuery[];
|
||||
failureThreshold: number; // Percentage deviation to fail
|
||||
}
|
||||
|
||||
interface MetricQuery {
|
||||
name: string;
|
||||
query: string; // PromQL or similar
|
||||
operator: "lt" | "gt" | "eq";
|
||||
threshold: number;
|
||||
}
|
||||
|
||||
// Example
|
||||
const config: CanaryConfig = {
|
||||
strategy: "canary",
|
||||
stages: [
|
||||
{ name: "canary", percentage: 10, duration: 300, autoProgress: false },
|
||||
{ name: "expand", percentage: 50, duration: 300, autoProgress: true },
|
||||
{ name: "full", percentage: 100, duration: 0, autoProgress: true }
|
||||
],
|
||||
canarySelector: "labeled",
|
||||
canaryLabel: "canary=true",
|
||||
autoProgress: false,
|
||||
healthCheck: {
|
||||
type: "http",
|
||||
path: "/health",
|
||||
timeout: 30,
|
||||
retries: 5,
|
||||
interval: 10
|
||||
},
|
||||
metricsCheck: {
|
||||
integrationId: "prometheus-uuid",
|
||||
queries: [
|
||||
{
|
||||
name: "error_rate",
|
||||
query: "rate(http_requests_total{status=~\"5..\"}[5m]) / rate(http_requests_total[5m])",
|
||||
operator: "lt",
|
||||
threshold: 0.01 // Less than 1% error rate
|
||||
}
|
||||
],
|
||||
failureThreshold: 10
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
### Execution
|
||||
|
||||
```typescript
|
||||
class CanaryExecutor {
|
||||
async execute(job: DeploymentJob, config: CanaryConfig): Promise<void> {
|
||||
const tasks = this.orderTasks(job.tasks, config);
|
||||
|
||||
for (const stage of config.stages) {
|
||||
const targetCount = Math.ceil(tasks.length * stage.percentage / 100);
|
||||
const stageTasks = tasks.slice(0, targetCount);
|
||||
const newTasks = stageTasks.filter(t => t.status === "pending");
|
||||
|
||||
this.emitProgress(job, {
|
||||
phase: "canary",
|
||||
stage: stage.name,
|
||||
percentage: stage.percentage,
|
||||
targets: stageTasks.length
|
||||
});
|
||||
|
||||
// Deploy to new targets in this stage
|
||||
await Promise.all(newTasks.map(task => this.executeTask(task)));
|
||||
|
||||
// Health check stage targets
|
||||
await this.verifyTargets(stageTasks, config.healthCheck);
|
||||
|
||||
// Metrics check if configured
|
||||
if (config.metricsCheck) {
|
||||
await this.checkMetrics(stageTasks, config.metricsCheck);
|
||||
}
|
||||
|
||||
// Wait for stage duration
|
||||
if (stage.duration > 0) {
|
||||
await this.waitWithMonitoring(
|
||||
stageTasks,
|
||||
stage.duration,
|
||||
config.metricsCheck
|
||||
);
|
||||
}
|
||||
|
||||
// Wait for manual approval if not auto-progress
|
||||
if (!stage.autoProgress && stage.percentage < 100) {
|
||||
await this.waitForApproval(job, stage.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async checkMetrics(
|
||||
targets: DeploymentTask[],
|
||||
config: MetricsCheckConfig
|
||||
): Promise<void> {
|
||||
const metricsClient = await this.getMetricsClient(config.integrationId);
|
||||
|
||||
for (const query of config.queries) {
|
||||
const result = await metricsClient.query(query.query);
|
||||
|
||||
const passed = this.evaluateMetric(result, query);
|
||||
|
||||
if (!passed) {
|
||||
throw new CanaryMetricsFailedError(query.name, result, query.threshold);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Use Cases
|
||||
|
||||
- Risk-sensitive deployments
|
||||
- Services with real user traffic
|
||||
- Deployments with metrics-based validation
|
||||
- Gradual feature rollouts
|
||||
|
||||
## Blue-Green Strategy
|
||||
|
||||
### Description
|
||||
|
||||
Deploys to a parallel "green" environment while "blue" continues serving traffic, then switches.
|
||||
|
||||
```
|
||||
BLUE-GREEN DEPLOYMENT
|
||||
|
||||
Phase 1: Deploy Green Phase 2: Switch Traffic
|
||||
|
||||
┌─────────────────────────┐ ┌─────────────────────────┐
|
||||
│ Load Balancer │ │ Load Balancer │
|
||||
│ │ │ │ │ │
|
||||
│ ▼ │ │ ▼ │
|
||||
│ ┌─────────────┐ │ │ ┌─────────────┐ │
|
||||
│ │ Blue [v1] │◄─active│ │ │ Blue [v1] │ │
|
||||
│ │ T1, T2, T3 │ │ │ │ T1, T2, T3 │ │
|
||||
│ └─────────────┘ │ │ └─────────────┘ │
|
||||
│ │ │ │
|
||||
│ ┌─────────────┐ │ │ ┌─────────────┐ │
|
||||
│ │ Green [v2] │◄─deploy│ │ │ Green [v2] │◄─active│
|
||||
│ │ T4, T5, T6 │ │ │ │ T4, T5, T6 │ │
|
||||
│ └─────────────┘ │ │ └─────────────┘ │
|
||||
│ │ │ │
|
||||
└─────────────────────────┘ └─────────────────────────┘
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
```typescript
|
||||
interface BlueGreenConfig {
|
||||
strategy: "blue-green";
|
||||
|
||||
// Environment labels
|
||||
blueLabel: string; // Label for blue targets
|
||||
greenLabel: string; // Label for green targets
|
||||
|
||||
// Traffic routing
|
||||
routerIntegration: UUID; // Router/LB integration
|
||||
routingConfig: RoutingConfig;
|
||||
|
||||
// Validation
|
||||
healthCheck: HealthCheckConfig;
|
||||
warmupTime: number; // Seconds to warm up green
|
||||
validationTests?: string[]; // Test suites to run
|
||||
|
||||
// Switchover
|
||||
switchoverMode: "instant" | "gradual";
|
||||
gradualSteps?: number[]; // Percentage steps for gradual
|
||||
|
||||
// Rollback
|
||||
keepBlueActive: number; // Seconds to keep blue ready
|
||||
}
|
||||
|
||||
// Example
|
||||
const config: BlueGreenConfig = {
|
||||
strategy: "blue-green",
|
||||
blueLabel: "deployment=blue",
|
||||
greenLabel: "deployment=green",
|
||||
routerIntegration: "nginx-lb-uuid",
|
||||
routingConfig: {
|
||||
upstreamName: "myapp",
|
||||
healthEndpoint: "/health"
|
||||
},
|
||||
healthCheck: {
|
||||
type: "http",
|
||||
path: "/health",
|
||||
timeout: 30,
|
||||
retries: 5,
|
||||
interval: 10
|
||||
},
|
||||
warmupTime: 60,
|
||||
validationTests: ["smoke-test-suite"],
|
||||
switchoverMode: "instant",
|
||||
keepBlueActive: 1800 // 30 minutes
|
||||
};
|
||||
```
|
||||
|
||||
### Execution
|
||||
|
||||
```typescript
|
||||
class BlueGreenExecutor {
|
||||
async execute(job: DeploymentJob, config: BlueGreenConfig): Promise<void> {
|
||||
// Identify blue and green targets
|
||||
const { blue, green } = this.categorizeTargets(job.tasks, config);
|
||||
|
||||
// Phase 1: Deploy to green
|
||||
this.emitProgress(job, { phase: "deploying-green" });
|
||||
|
||||
await Promise.all(green.map(task => this.executeTask(task)));
|
||||
|
||||
// Health check green targets
|
||||
await this.verifyTargets(green, config.healthCheck);
|
||||
|
||||
// Warmup period
|
||||
if (config.warmupTime > 0) {
|
||||
this.emitProgress(job, { phase: "warming-up" });
|
||||
await sleep(config.warmupTime * 1000);
|
||||
}
|
||||
|
||||
// Run validation tests
|
||||
if (config.validationTests?.length) {
|
||||
this.emitProgress(job, { phase: "validating" });
|
||||
await this.runValidationTests(green, config.validationTests);
|
||||
}
|
||||
|
||||
// Phase 2: Switch traffic
|
||||
this.emitProgress(job, { phase: "switching-traffic" });
|
||||
|
||||
if (config.switchoverMode === "instant") {
|
||||
await this.instantSwitchover(config, blue, green);
|
||||
} else {
|
||||
await this.gradualSwitchover(config, blue, green);
|
||||
}
|
||||
|
||||
// Verify traffic routing
|
||||
await this.verifyRouting(green, config);
|
||||
|
||||
// Schedule blue decommission
|
||||
if (config.keepBlueActive > 0) {
|
||||
this.scheduleBlueDecommission(blue, config.keepBlueActive);
|
||||
}
|
||||
}
|
||||
|
||||
private async instantSwitchover(
|
||||
config: BlueGreenConfig,
|
||||
blue: DeploymentTask[],
|
||||
green: DeploymentTask[]
|
||||
): Promise<void> {
|
||||
const router = await this.getRouter(config.routerIntegration);
|
||||
|
||||
// Update upstream to green targets
|
||||
await router.updateUpstream(config.routingConfig.upstreamName, {
|
||||
servers: green.map(t => ({
|
||||
address: t.target.address,
|
||||
weight: 1
|
||||
}))
|
||||
});
|
||||
|
||||
// Remove blue from rotation
|
||||
await router.removeServers(
|
||||
config.routingConfig.upstreamName,
|
||||
blue.map(t => t.target.address)
|
||||
);
|
||||
}
|
||||
|
||||
private async gradualSwitchover(
|
||||
config: BlueGreenConfig,
|
||||
blue: DeploymentTask[],
|
||||
green: DeploymentTask[]
|
||||
): Promise<void> {
|
||||
const router = await this.getRouter(config.routerIntegration);
|
||||
const steps = config.gradualSteps || [25, 50, 75, 100];
|
||||
|
||||
for (const percentage of steps) {
|
||||
await router.setTrafficSplit(config.routingConfig.upstreamName, {
|
||||
blue: 100 - percentage,
|
||||
green: percentage
|
||||
});
|
||||
|
||||
// Monitor for errors
|
||||
await this.monitorTraffic(30);
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Use Cases
|
||||
|
||||
- Zero-downtime deployments
|
||||
- Database migration deployments
|
||||
- High-stakes production updates
|
||||
- Instant rollback requirements
|
||||
|
||||
## Strategy Selection Guide
|
||||
|
||||
```
|
||||
STRATEGY SELECTION
|
||||
|
||||
START
|
||||
│
|
||||
▼
|
||||
┌────────────────────────┐
|
||||
│ Zero downtime needed? │
|
||||
└───────────┬────────────┘
|
||||
│
|
||||
No │ Yes
|
||||
│ │ │
|
||||
▼ │ ▼
|
||||
┌──────────┐ │ ┌───────────────────┐
|
||||
│ All-at- │ │ │ Metrics-based │
|
||||
│ once │ │ │ validation needed?│
|
||||
└──────────┘ │ └─────────┬─────────┘
|
||||
│ │
|
||||
│ No │ Yes
|
||||
│ │ │ │
|
||||
│ ▼ │ ▼
|
||||
│ ┌──────────┐│ ┌──────────┐
|
||||
│ │ Instant ││ │ Canary │
|
||||
│ │ rollback? ││ │ │
|
||||
│ └────┬─────┘│ └──────────┘
|
||||
│ │ │
|
||||
│ No │ Yes │
|
||||
│ │ │ │ │
|
||||
│ ▼ │ ▼ │
|
||||
│┌──────┐│┌────┴─────┐
|
||||
││Rolling│││Blue-Green│
|
||||
│└──────┘│└──────────┘
|
||||
│ │
|
||||
└───────┘
|
||||
```
|
||||
|
||||
## References
|
||||
|
||||
- [Deployment Overview](overview.md)
|
||||
- [Progressive Delivery](../modules/progressive-delivery.md)
|
||||
- [Rollback Management](overview.md#rollback-management)
|
||||
Reference in New Issue
Block a user