278 lines
9.3 KiB
YAML
278 lines
9.3 KiB
YAML
# Emergency Rollback Workflow
|
|
# Sprint: CI/CD Enhancement - Deployment Safety
|
|
#
|
|
# Purpose: Automated rollback to previous known-good version
|
|
# Triggers: Manual dispatch only (emergency procedure)
|
|
#
|
|
# SLA Target: < 5 minutes from trigger to rollback complete
|
|
|
|
name: Emergency Rollback
|
|
|
|
on:
|
|
workflow_dispatch:
|
|
inputs:
|
|
environment:
|
|
description: 'Target environment'
|
|
required: true
|
|
type: choice
|
|
options:
|
|
- staging
|
|
- production
|
|
service:
|
|
description: 'Service to rollback (or "all" for full rollback)'
|
|
required: true
|
|
type: choice
|
|
options:
|
|
- all
|
|
- authority
|
|
- attestor
|
|
- concelier
|
|
- scanner
|
|
- policy
|
|
- excititor
|
|
- gateway
|
|
- scheduler
|
|
- cli
|
|
target_version:
|
|
description: 'Version to rollback to (leave empty for previous version)'
|
|
required: false
|
|
type: string
|
|
reason:
|
|
description: 'Reason for rollback'
|
|
required: true
|
|
type: string
|
|
skip_health_check:
|
|
description: 'Skip health check (use only in emergencies)'
|
|
required: false
|
|
type: boolean
|
|
default: false
|
|
|
|
env:
|
|
ROLLBACK_TIMEOUT: 300 # 5 minutes
|
|
|
|
jobs:
|
|
validate:
|
|
name: Validate Rollback Request
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
target_version: ${{ steps.resolve.outputs.version }}
|
|
services: ${{ steps.resolve.outputs.services }}
|
|
approved: ${{ steps.validate.outputs.approved }}
|
|
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Validate inputs
|
|
id: validate
|
|
run: |
|
|
echo "## Rollback Request Validation" >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "| Parameter | Value |" >> $GITHUB_STEP_SUMMARY
|
|
echo "|-----------|-------|" >> $GITHUB_STEP_SUMMARY
|
|
echo "| Environment | ${{ inputs.environment }} |" >> $GITHUB_STEP_SUMMARY
|
|
echo "| Service | ${{ inputs.service }} |" >> $GITHUB_STEP_SUMMARY
|
|
echo "| Target Version | ${{ inputs.target_version || 'previous' }} |" >> $GITHUB_STEP_SUMMARY
|
|
echo "| Reason | ${{ inputs.reason }} |" >> $GITHUB_STEP_SUMMARY
|
|
echo "| Triggered By | ${{ github.actor }} |" >> $GITHUB_STEP_SUMMARY
|
|
echo "| Timestamp | $(date -u +"%Y-%m-%dT%H:%M:%SZ") |" >> $GITHUB_STEP_SUMMARY
|
|
|
|
# Production requires additional validation
|
|
if [[ "${{ inputs.environment }}" == "production" ]]; then
|
|
echo ""
|
|
echo "### Production Rollback Warning" >> $GITHUB_STEP_SUMMARY
|
|
echo "This will affect production users immediately." >> $GITHUB_STEP_SUMMARY
|
|
fi
|
|
|
|
echo "approved=true" >> $GITHUB_OUTPUT
|
|
|
|
- name: Resolve target version
|
|
id: resolve
|
|
run: |
|
|
VERSION="${{ inputs.target_version }}"
|
|
SERVICE="${{ inputs.service }}"
|
|
|
|
# If no version specified, get previous from manifest
|
|
if [[ -z "$VERSION" ]]; then
|
|
MANIFEST="devops/releases/service-versions.json"
|
|
if [[ -f "$MANIFEST" ]]; then
|
|
if [[ "$SERVICE" == "all" ]]; then
|
|
# Get oldest version across all services
|
|
VERSION=$(jq -r '.services | to_entries | map(.value.version) | sort | first // "unknown"' "$MANIFEST")
|
|
else
|
|
VERSION=$(jq -r --arg svc "$SERVICE" '.services[$svc].previousVersion // .services[$svc].version // "unknown"' "$MANIFEST")
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
# Determine services to rollback
|
|
if [[ "$SERVICE" == "all" ]]; then
|
|
SERVICES='["authority","attestor","concelier","scanner","policy","excititor","gateway","scheduler"]'
|
|
else
|
|
SERVICES="[\"$SERVICE\"]"
|
|
fi
|
|
|
|
echo "Resolved version: $VERSION"
|
|
echo "Services: $SERVICES"
|
|
|
|
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
|
echo "services=$SERVICES" >> $GITHUB_OUTPUT
|
|
|
|
rollback:
|
|
name: Execute Rollback
|
|
runs-on: ubuntu-latest
|
|
needs: [validate]
|
|
if: needs.validate.outputs.approved == 'true'
|
|
environment: ${{ inputs.environment }}
|
|
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Setup kubectl
|
|
uses: azure/setup-kubectl@v3
|
|
with:
|
|
version: 'latest'
|
|
|
|
- name: Setup Helm
|
|
uses: azure/setup-helm@v3
|
|
with:
|
|
version: 'latest'
|
|
|
|
- name: Configure deployment access
|
|
run: |
|
|
echo "::notice::Configure deployment access for ${{ inputs.environment }}"
|
|
# TODO: Configure kubectl context / kubeconfig
|
|
# kubectl config use-context ${{ inputs.environment }}
|
|
|
|
- name: Execute rollback
|
|
id: rollback
|
|
run: |
|
|
echo "Starting rollback..."
|
|
START_TIME=$(date +%s)
|
|
|
|
TARGET_VERSION="${{ needs.validate.outputs.target_version }}"
|
|
SERVICES='${{ needs.validate.outputs.services }}'
|
|
ENVIRONMENT="${{ inputs.environment }}"
|
|
|
|
# Execute rollback script
|
|
if [[ -f ".gitea/scripts/release/rollback.sh" ]]; then
|
|
.gitea/scripts/release/rollback.sh \
|
|
--environment "$ENVIRONMENT" \
|
|
--version "$TARGET_VERSION" \
|
|
--services "$SERVICES" \
|
|
--reason "${{ inputs.reason }}"
|
|
else
|
|
echo "::warning::Rollback script not found - using placeholder"
|
|
echo ""
|
|
echo "Rollback would execute:"
|
|
echo " Environment: $ENVIRONMENT"
|
|
echo " Version: $TARGET_VERSION"
|
|
echo " Services: $SERVICES"
|
|
echo ""
|
|
echo "TODO: Implement rollback.sh script"
|
|
fi
|
|
|
|
END_TIME=$(date +%s)
|
|
DURATION=$((END_TIME - START_TIME))
|
|
|
|
echo "duration=$DURATION" >> $GITHUB_OUTPUT
|
|
echo "Rollback completed in ${DURATION}s"
|
|
|
|
- name: Health check
|
|
if: inputs.skip_health_check != true
|
|
run: |
|
|
echo "Running health checks..."
|
|
|
|
SERVICES='${{ needs.validate.outputs.services }}'
|
|
|
|
echo "$SERVICES" | jq -r '.[]' | while read -r service; do
|
|
echo "Checking $service..."
|
|
# TODO: Implement service-specific health checks
|
|
# curl -sf "https://${service}.${{ inputs.environment }}.stella-ops.org/health" || exit 1
|
|
echo " Status: OK (placeholder)"
|
|
done
|
|
|
|
echo "All health checks passed"
|
|
|
|
- name: Rollback summary
|
|
if: always()
|
|
run: |
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "## Rollback Execution" >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
|
|
if [[ "${{ steps.rollback.outcome }}" == "success" ]]; then
|
|
echo "### Rollback Successful" >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "- Duration: ${{ steps.rollback.outputs.duration }}s" >> $GITHUB_STEP_SUMMARY
|
|
echo "- Target Version: ${{ needs.validate.outputs.target_version }}" >> $GITHUB_STEP_SUMMARY
|
|
else
|
|
echo "### Rollback Failed" >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "Please investigate immediately and consider manual intervention." >> $GITHUB_STEP_SUMMARY
|
|
fi
|
|
|
|
notify:
|
|
name: Send Notifications
|
|
runs-on: ubuntu-latest
|
|
needs: [validate, rollback]
|
|
if: always()
|
|
|
|
steps:
|
|
- name: Notify team
|
|
run: |
|
|
STATUS="${{ needs.rollback.result }}"
|
|
ENVIRONMENT="${{ inputs.environment }}"
|
|
SERVICE="${{ inputs.service }}"
|
|
ACTOR="${{ github.actor }}"
|
|
REASON="${{ inputs.reason }}"
|
|
VERSION="${{ needs.validate.outputs.target_version }}"
|
|
|
|
# Build notification message
|
|
if [[ "$STATUS" == "success" ]]; then
|
|
EMOJI="white_check_mark"
|
|
TITLE="Rollback Completed Successfully"
|
|
else
|
|
EMOJI="x"
|
|
TITLE="Rollback Failed - Immediate Attention Required"
|
|
fi
|
|
|
|
echo "Notification:"
|
|
echo " Title: $TITLE"
|
|
echo " Environment: $ENVIRONMENT"
|
|
echo " Service: $SERVICE"
|
|
echo " Version: $VERSION"
|
|
echo " Actor: $ACTOR"
|
|
echo " Reason: $REASON"
|
|
|
|
# TODO: Send to Slack/Teams/PagerDuty
|
|
# - name: Slack notification
|
|
# uses: slackapi/slack-github-action@v1
|
|
# with:
|
|
# payload: |
|
|
# {
|
|
# "text": "${{ env.TITLE }}",
|
|
# "blocks": [...]
|
|
# }
|
|
|
|
- name: Create incident record
|
|
run: |
|
|
echo "Creating incident record..."
|
|
|
|
# Log to incident tracking
|
|
INCIDENT_LOG="devops/incidents/$(date +%Y-%m-%d)-rollback.json"
|
|
echo "{
|
|
\"timestamp\": \"$(date -u +"%Y-%m-%dT%H:%M:%SZ")\",
|
|
\"type\": \"rollback\",
|
|
\"environment\": \"${{ inputs.environment }}\",
|
|
\"service\": \"${{ inputs.service }}\",
|
|
\"target_version\": \"${{ needs.validate.outputs.target_version }}\",
|
|
\"reason\": \"${{ inputs.reason }}\",
|
|
\"actor\": \"${{ github.actor }}\",
|
|
\"status\": \"${{ needs.rollback.result }}\",
|
|
\"run_id\": \"${{ github.run_id }}\"
|
|
}"
|
|
|
|
echo "::notice::Incident record would be created at $INCIDENT_LOG"
|