release orchestrator pivot, architecture and planning

This commit is contained in:
2026-01-10 22:37:22 +02:00
parent c84f421e2f
commit d509c44411
130 changed files with 70292 additions and 721 deletions

View File

@@ -0,0 +1,671 @@
# Deployment Overview
## Purpose
The Deployment system executes the actual deployment of releases to target environments, managing deployment jobs, tasks, artifact generation, and rollback capabilities.
## Deployment Architecture
```
DEPLOYMENT ARCHITECTURE
┌─────────────────────────────────────────────────────────────────────────────┐
│ DEPLOY ORCHESTRATOR │
│ │
│ ┌─────────────────────────────────────────────────────────────────────┐ │
│ │ DEPLOYMENT JOB MANAGER │ │
│ │ │ │
│ │ Promotion ───► Create Job ───► Plan Tasks ───► Execute Tasks │ │
│ │ │ │
│ └─────────────────────────────────────────────────────────────────────┘ │
│ │ │
│ ┌───────────────┼───────────────┐ │
│ │ │ │ │
│ ▼ ▼ ▼ │
│ ┌─────────────────────┐ ┌─────────────────┐ ┌─────────────────────┐ │
│ │ TARGET EXECUTOR │ │ RUNNER EXECUTOR │ │ ARTIFACT GENERATOR │ │
│ │ │ │ │ │ │ │
│ │ - Task dispatch │ │ - Agent tasks │ │ - Compose files │ │
│ │ - Status tracking │ │ - SSH tasks │ │ - Env configs │ │
│ │ - Log aggregation │ │ - API tasks │ │ - Manifests │ │
│ └─────────────────────┘ └─────────────────┘ └─────────────────────┘ │
│ │ │
└─────────────────────────────────────────────────────────────────────────────┘
┌────────────────────────────┼────────────────────────────┐
│ │ │
▼ ▼ ▼
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
│ Agent │ │ Agentless │ │ API │
│ Execution │ │ Execution │ │ Execution │
│ │ │ │ │ │
│ Docker, │ │ SSH, │ │ ECS, │
│ Compose │ │ WinRM │ │ Nomad │
└─────────────┘ └─────────────┘ └─────────────┘
```
## Deployment Flow
### Standard Deployment Flow
```
DEPLOYMENT FLOW
Promotion Deployment Task Agent/Target
Approved Job Execution
│ │ │ │
│ Create Job │ │ │
├───────────────►│ │ │
│ │ │ │
│ │ Generate │ │
│ │ Artifacts │ │
│ ├────────────────►│ │
│ │ │ │
│ │ Create Tasks │ │
│ │ per Target │ │
│ ├────────────────►│ │
│ │ │ │
│ │ │ Dispatch Task │
│ │ ├────────────────►│
│ │ │ │
│ │ │ Execute │
│ │ │ (Pull, Deploy) │
│ │ │ │
│ │ │ Report Status │
│ │ │◄────────────────┤
│ │ │ │
│ │ Aggregate │ │
│ │ Results │ │
│ │◄────────────────┤ │
│ │ │ │
│ Job Complete │ │ │
│◄───────────────┤ │ │
│ │ │ │
```
## Deployment Job
### Job Entity
```typescript
interface DeploymentJob {
id: UUID;
promotionId: UUID;
releaseId: UUID;
environmentId: UUID;
// Execution configuration
strategy: DeploymentStrategy;
parallelism: number;
// Status tracking
status: JobStatus;
startedAt?: DateTime;
completedAt?: DateTime;
// Artifacts
artifacts: GeneratedArtifact[];
// Rollback reference
rollbackOf?: UUID; // If this is a rollback job
previousJobId?: UUID; // Previous successful job
// Tasks
tasks: DeploymentTask[];
}
type JobStatus =
| "pending"
| "preparing"
| "running"
| "completing"
| "completed"
| "failed"
| "rolling_back"
| "rolled_back";
type DeploymentStrategy =
| "all-at-once"
| "rolling"
| "canary"
| "blue-green";
```
### Job State Machine
```
JOB STATE MACHINE
┌──────────┐
│ PENDING │
└────┬─────┘
│ start()
┌──────────┐
│PREPARING │
│ │
│ Generate │
│ artifacts│
└────┬─────┘
┌──────────┐
│ RUNNING │◄────────────────┐
│ │ │
│ Execute │ │
│ tasks │ │
└────┬─────┘ │
│ │
┌───────────────┼───────────────┐ │
│ │ │ │
▼ ▼ ▼ │
┌──────────┐ ┌──────────┐ ┌──────────┐ │
│COMPLETING│ │ FAILED │ │ ROLLING │ │
│ │ │ │ │ BACK │──┘
│ Verify │ │ │ │ │
│ health │ │ │ │ │
└────┬─────┘ └────┬─────┘ └────┬─────┘
│ │ │
▼ │ ▼
┌──────────┐ │ ┌──────────┐
│COMPLETED │ │ │ ROLLED │
└──────────┘ │ │ BACK │
│ └──────────┘
[Failure
handling]
```
## Deployment Task
### Task Entity
```typescript
interface DeploymentTask {
id: UUID;
jobId: UUID;
targetId: UUID;
// What to deploy
componentId: UUID;
digest: string;
// Execution
status: TaskStatus;
agentId?: UUID;
startedAt?: DateTime;
completedAt?: DateTime;
// Results
logs: string;
previousDigest?: string; // For rollback
error?: string;
// Retry tracking
attemptNumber: number;
maxAttempts: number;
}
type TaskStatus =
| "pending"
| "queued"
| "dispatched"
| "running"
| "verifying"
| "succeeded"
| "failed"
| "retrying";
```
### Task Dispatch
```typescript
class TaskDispatcher {
async dispatchTask(task: DeploymentTask): Promise<void> {
const target = await this.targetRepository.get(task.targetId);
switch (target.executionModel) {
case "agent":
await this.dispatchToAgent(task, target);
break;
case "ssh":
await this.dispatchViaSsh(task, target);
break;
case "api":
await this.dispatchViaApi(task, target);
break;
}
}
private async dispatchToAgent(
task: DeploymentTask,
target: Target
): Promise<void> {
// Find available agent for target
const agent = await this.agentManager.findAgentForTarget(target);
if (!agent) {
throw new NoAgentAvailableError(target.id);
}
// Create task payload
const payload: AgentTaskPayload = {
taskId: task.id,
targetId: target.id,
action: "deploy",
digest: task.digest,
config: target.connection,
credentials: await this.fetchTaskCredentials(target)
};
// Dispatch to agent
await this.agentClient.dispatchTask(agent.id, payload);
// Update task status
task.status = "dispatched";
task.agentId = agent.id;
await this.taskRepository.update(task);
}
}
```
## Generated Artifacts
### Artifact Types
| Type | Description | Format |
|------|-------------|--------|
| `compose-file` | Docker Compose file | YAML |
| `compose-lock` | Pinned compose file | YAML |
| `env-file` | Environment variables | .env |
| `systemd-unit` | Systemd service unit | .service |
| `nginx-config` | Nginx configuration | .conf |
| `manifest` | Deployment manifest | JSON |
### Compose Lock Generation
```typescript
interface ComposeLock {
version: string;
services: Record<string, LockedService>;
generated: {
releaseId: string;
promotionId: string;
timestamp: string;
digest: string; // Hash of this file
};
}
interface LockedService {
image: string; // Full image reference with digest
environment?: Record<string, string>;
labels: Record<string, string>;
}
class ComposeArtifactGenerator {
async generateLock(
release: Release,
target: Target,
template: ComposeTemplate
): Promise<ComposeLock> {
const services: Record<string, LockedService> = {};
for (const [serviceName, serviceConfig] of Object.entries(template.services)) {
// Find component for this service
const componentDigest = release.components.find(
c => c.name === serviceConfig.componentName
);
if (!componentDigest) {
throw new Error(`No component found for service ${serviceName}`);
}
// Build locked image reference
const imageRef = `${componentDigest.repository}@${componentDigest.digest}`;
services[serviceName] = {
image: imageRef,
environment: {
...serviceConfig.environment,
STELLA_RELEASE_ID: release.id,
STELLA_DIGEST: componentDigest.digest
},
labels: {
"stella.release.id": release.id,
"stella.component.name": componentDigest.name,
"stella.digest": componentDigest.digest,
"stella.deployed.at": new Date().toISOString()
}
};
}
const lock: ComposeLock = {
version: "3.8",
services,
generated: {
releaseId: release.id,
promotionId: target.promotionId,
timestamp: new Date().toISOString(),
digest: "" // Computed below
}
};
// Compute content hash
const content = yaml.stringify(lock);
lock.generated.digest = crypto.createHash("sha256").update(content).digest("hex");
return lock;
}
}
```
## Deployment Execution
### Execution Models
| Model | Description | Use Case |
|-------|-------------|----------|
| `agent` | Stella agent on target | Docker hosts, servers |
| `ssh` | SSH-based agentless | Unix servers |
| `winrm` | WinRM-based agentless | Windows servers |
| `api` | API-based | ECS, Nomad, K8s |
### Agent-Based Execution
```typescript
class AgentExecutor {
async execute(task: DeploymentTask): Promise<ExecutionResult> {
const agent = await this.agentManager.get(task.agentId);
const target = await this.targetRepository.get(task.targetId);
// Prepare task payload with secrets
const payload: TaskPayload = {
taskId: task.id,
targetId: target.id,
action: "deploy",
digest: task.digest,
config: target.connection,
artifacts: await this.getArtifacts(task.jobId),
credentials: await this.secretsManager.fetchForTask(target)
};
// Dispatch to agent
const taskRef = await this.agentClient.dispatchTask(agent.id, payload);
// Wait for completion
const result = await this.waitForTaskCompletion(taskRef, task.timeout);
return result;
}
private async waitForTaskCompletion(
taskRef: TaskReference,
timeout: number
): Promise<ExecutionResult> {
const deadline = Date.now() + timeout * 1000;
while (Date.now() < deadline) {
const status = await this.agentClient.getTaskStatus(taskRef);
if (status.completed) {
return {
success: status.success,
logs: status.logs,
deployedDigest: status.deployedDigest,
error: status.error
};
}
await sleep(1000);
}
throw new TimeoutError(`Task did not complete within ${timeout} seconds`);
}
}
```
### SSH-Based Execution
```typescript
class SshExecutor {
async execute(task: DeploymentTask): Promise<ExecutionResult> {
const target = await this.targetRepository.get(task.targetId);
const sshConfig = target.connection as SshConnectionConfig;
// Get SSH credentials from vault
const creds = await this.secretsManager.fetchSshCredentials(
sshConfig.credentialRef
);
// Connect via SSH
const ssh = new NodeSSH();
await ssh.connect({
host: sshConfig.host,
port: sshConfig.port || 22,
username: creds.username,
privateKey: creds.privateKey
});
try {
// Upload artifacts
const artifacts = await this.getArtifacts(task.jobId);
for (const artifact of artifacts) {
await ssh.putFile(artifact.localPath, artifact.remotePath);
}
// Execute deployment script
const result = await ssh.execCommand(
this.buildDeployCommand(task, target),
{ cwd: sshConfig.workDir }
);
return {
success: result.code === 0,
logs: `${result.stdout}\n${result.stderr}`,
error: result.code !== 0 ? result.stderr : undefined
};
} finally {
ssh.dispose();
}
}
private buildDeployCommand(task: DeploymentTask, target: Target): string {
// Build deployment command based on target type
switch (target.targetType) {
case "compose_host":
return `cd ${target.connection.workDir} && docker-compose pull && docker-compose up -d`;
case "docker_host":
return `docker pull ${task.digest} && docker stop ${target.containerName} && docker run -d --name ${target.containerName} ${task.digest}`;
default:
throw new Error(`Unsupported target type: ${target.targetType}`);
}
}
}
```
## Health Verification
```typescript
interface HealthCheckConfig {
type: "http" | "tcp" | "command";
timeout: number;
retries: number;
interval: number;
// HTTP-specific
path?: string;
expectedStatus?: number;
expectedBody?: string;
// TCP-specific
port?: number;
// Command-specific
command?: string;
}
class HealthVerifier {
async verify(
target: Target,
config: HealthCheckConfig
): Promise<HealthCheckResult> {
let lastError: Error | undefined;
for (let attempt = 0; attempt < config.retries; attempt++) {
try {
const result = await this.performCheck(target, config);
if (result.healthy) {
return result;
}
lastError = new Error(result.message);
} catch (error) {
lastError = error as Error;
}
if (attempt < config.retries - 1) {
await sleep(config.interval * 1000);
}
}
return {
healthy: false,
message: lastError?.message || "Health check failed",
attempts: config.retries
};
}
private async performCheck(
target: Target,
config: HealthCheckConfig
): Promise<HealthCheckResult> {
switch (config.type) {
case "http":
return this.httpCheck(target, config);
case "tcp":
return this.tcpCheck(target, config);
case "command":
return this.commandCheck(target, config);
}
}
private async httpCheck(
target: Target,
config: HealthCheckConfig
): Promise<HealthCheckResult> {
const url = `${target.healthEndpoint}${config.path || "/health"}`;
try {
const response = await fetch(url, {
signal: AbortSignal.timeout(config.timeout * 1000)
});
const healthy = response.status === (config.expectedStatus || 200);
return {
healthy,
message: healthy ? "OK" : `Status ${response.status}`,
statusCode: response.status
};
} catch (error) {
return {
healthy: false,
message: (error as Error).message
};
}
}
}
```
## Rollback Management
```typescript
class RollbackManager {
async initiateRollback(
jobId: UUID,
reason: string
): Promise<DeploymentJob> {
const failedJob = await this.jobRepository.get(jobId);
const previousJob = await this.findPreviousSuccessfulJob(
failedJob.environmentId,
failedJob.releaseId
);
if (!previousJob) {
throw new NoRollbackTargetError(jobId);
}
// Create rollback job
const rollbackJob: DeploymentJob = {
id: uuidv4(),
promotionId: failedJob.promotionId,
releaseId: previousJob.releaseId, // Previous release
environmentId: failedJob.environmentId,
strategy: "all-at-once", // Fast rollback
parallelism: 10,
status: "pending",
rollbackOf: jobId,
previousJobId: previousJob.id,
artifacts: [],
tasks: []
};
// Create tasks to restore previous state
for (const task of failedJob.tasks) {
const previousTask = previousJob.tasks.find(
t => t.targetId === task.targetId
);
if (previousTask) {
rollbackJob.tasks.push({
id: uuidv4(),
jobId: rollbackJob.id,
targetId: task.targetId,
componentId: previousTask.componentId,
digest: previousTask.previousDigest || task.previousDigest!,
status: "pending",
logs: "",
attemptNumber: 0,
maxAttempts: 3
});
}
}
await this.jobRepository.save(rollbackJob);
// Execute rollback
await this.executeJob(rollbackJob);
return rollbackJob;
}
private async findPreviousSuccessfulJob(
environmentId: UUID,
excludeReleaseId: UUID
): Promise<DeploymentJob | null> {
return this.jobRepository.findOne({
environmentId,
status: "completed",
releaseId: { $ne: excludeReleaseId }
}, {
orderBy: { completedAt: "desc" }
});
}
}
```
## References
- [Deployment Strategies](strategies.md)
- [Agent-Based Deployment](agent-based.md)
- [Agentless Deployment](agentless.md)
- [Generated Artifacts](artifacts.md)
- [Deploy Orchestrator Module](../modules/deploy-orchestrator.md)