# Emergency Rollback Workflow # Sprint: CI/CD Enhancement - Deployment Safety # # Purpose: Automated rollback to previous known-good version # Triggers: Manual dispatch only (emergency procedure) # # SLA Target: < 5 minutes from trigger to rollback complete name: Emergency Rollback on: workflow_dispatch: inputs: environment: description: 'Target environment' required: true type: choice options: - staging - production service: description: 'Service to rollback (or "all" for full rollback)' required: true type: choice options: - all - authority - attestor - concelier - scanner - policy - excititor - gateway - scheduler - cli target_version: description: 'Version to rollback to (leave empty for previous version)' required: false type: string reason: description: 'Reason for rollback' required: true type: string skip_health_check: description: 'Skip health check (use only in emergencies)' required: false type: boolean default: false env: ROLLBACK_TIMEOUT: 300 # 5 minutes jobs: validate: name: Validate Rollback Request runs-on: ubuntu-latest outputs: target_version: ${{ steps.resolve.outputs.version }} services: ${{ steps.resolve.outputs.services }} approved: ${{ steps.validate.outputs.approved }} steps: - name: Checkout repository uses: actions/checkout@v4 - name: Validate inputs id: validate run: | echo "## Rollback Request Validation" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "| Parameter | Value |" >> $GITHUB_STEP_SUMMARY echo "|-----------|-------|" >> $GITHUB_STEP_SUMMARY echo "| Environment | ${{ inputs.environment }} |" >> $GITHUB_STEP_SUMMARY echo "| Service | ${{ inputs.service }} |" >> $GITHUB_STEP_SUMMARY echo "| Target Version | ${{ inputs.target_version || 'previous' }} |" >> $GITHUB_STEP_SUMMARY echo "| Reason | ${{ inputs.reason }} |" >> $GITHUB_STEP_SUMMARY echo "| Triggered By | ${{ github.actor }} |" >> $GITHUB_STEP_SUMMARY echo "| Timestamp | $(date -u +"%Y-%m-%dT%H:%M:%SZ") |" >> $GITHUB_STEP_SUMMARY # Production requires additional validation if [[ "${{ inputs.environment }}" == "production" ]]; then echo "" echo "### Production Rollback Warning" >> $GITHUB_STEP_SUMMARY echo "This will affect production users immediately." >> $GITHUB_STEP_SUMMARY fi echo "approved=true" >> $GITHUB_OUTPUT - name: Resolve target version id: resolve run: | VERSION="${{ inputs.target_version }}" SERVICE="${{ inputs.service }}" # If no version specified, get previous from manifest if [[ -z "$VERSION" ]]; then MANIFEST="devops/releases/service-versions.json" if [[ -f "$MANIFEST" ]]; then if [[ "$SERVICE" == "all" ]]; then # Get oldest version across all services VERSION=$(jq -r '.services | to_entries | map(.value.version) | sort | first // "unknown"' "$MANIFEST") else VERSION=$(jq -r --arg svc "$SERVICE" '.services[$svc].previousVersion // .services[$svc].version // "unknown"' "$MANIFEST") fi fi fi # Determine services to rollback if [[ "$SERVICE" == "all" ]]; then SERVICES='["authority","attestor","concelier","scanner","policy","excititor","gateway","scheduler"]' else SERVICES="[\"$SERVICE\"]" fi echo "Resolved version: $VERSION" echo "Services: $SERVICES" echo "version=$VERSION" >> $GITHUB_OUTPUT echo "services=$SERVICES" >> $GITHUB_OUTPUT rollback: name: Execute Rollback runs-on: ubuntu-latest needs: [validate] if: needs.validate.outputs.approved == 'true' environment: ${{ inputs.environment }} steps: - name: Checkout repository uses: actions/checkout@v4 - name: Setup kubectl uses: azure/setup-kubectl@v3 with: version: 'latest' - name: Setup Helm uses: azure/setup-helm@v3 with: version: 'latest' - name: Configure deployment access run: | echo "::notice::Configure deployment access for ${{ inputs.environment }}" # TODO: Configure kubectl context / kubeconfig # kubectl config use-context ${{ inputs.environment }} - name: Execute rollback id: rollback run: | echo "Starting rollback..." START_TIME=$(date +%s) TARGET_VERSION="${{ needs.validate.outputs.target_version }}" SERVICES='${{ needs.validate.outputs.services }}' ENVIRONMENT="${{ inputs.environment }}" # Execute rollback script if [[ -f ".gitea/scripts/release/rollback.sh" ]]; then .gitea/scripts/release/rollback.sh \ --environment "$ENVIRONMENT" \ --version "$TARGET_VERSION" \ --services "$SERVICES" \ --reason "${{ inputs.reason }}" else echo "::warning::Rollback script not found - using placeholder" echo "" echo "Rollback would execute:" echo " Environment: $ENVIRONMENT" echo " Version: $TARGET_VERSION" echo " Services: $SERVICES" echo "" echo "TODO: Implement rollback.sh script" fi END_TIME=$(date +%s) DURATION=$((END_TIME - START_TIME)) echo "duration=$DURATION" >> $GITHUB_OUTPUT echo "Rollback completed in ${DURATION}s" - name: Health check if: inputs.skip_health_check != true run: | echo "Running health checks..." SERVICES='${{ needs.validate.outputs.services }}' echo "$SERVICES" | jq -r '.[]' | while read -r service; do echo "Checking $service..." # TODO: Implement service-specific health checks # curl -sf "https://${service}.${{ inputs.environment }}.stella-ops.org/health" || exit 1 echo " Status: OK (placeholder)" done echo "All health checks passed" - name: Rollback summary if: always() run: | echo "" >> $GITHUB_STEP_SUMMARY echo "## Rollback Execution" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY if [[ "${{ steps.rollback.outcome }}" == "success" ]]; then echo "### Rollback Successful" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "- Duration: ${{ steps.rollback.outputs.duration }}s" >> $GITHUB_STEP_SUMMARY echo "- Target Version: ${{ needs.validate.outputs.target_version }}" >> $GITHUB_STEP_SUMMARY else echo "### Rollback Failed" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "Please investigate immediately and consider manual intervention." >> $GITHUB_STEP_SUMMARY fi notify: name: Send Notifications runs-on: ubuntu-latest needs: [validate, rollback] if: always() steps: - name: Notify team run: | STATUS="${{ needs.rollback.result }}" ENVIRONMENT="${{ inputs.environment }}" SERVICE="${{ inputs.service }}" ACTOR="${{ github.actor }}" REASON="${{ inputs.reason }}" VERSION="${{ needs.validate.outputs.target_version }}" # Build notification message if [[ "$STATUS" == "success" ]]; then EMOJI="white_check_mark" TITLE="Rollback Completed Successfully" else EMOJI="x" TITLE="Rollback Failed - Immediate Attention Required" fi echo "Notification:" echo " Title: $TITLE" echo " Environment: $ENVIRONMENT" echo " Service: $SERVICE" echo " Version: $VERSION" echo " Actor: $ACTOR" echo " Reason: $REASON" # TODO: Send to Slack/Teams/PagerDuty # - name: Slack notification # uses: slackapi/slack-github-action@v1 # with: # payload: | # { # "text": "${{ env.TITLE }}", # "blocks": [...] # } - name: Create incident record run: | echo "Creating incident record..." # Log to incident tracking INCIDENT_LOG="devops/incidents/$(date +%Y-%m-%d)-rollback.json" echo "{ \"timestamp\": \"$(date -u +"%Y-%m-%dT%H:%M:%SZ")\", \"type\": \"rollback\", \"environment\": \"${{ inputs.environment }}\", \"service\": \"${{ inputs.service }}\", \"target_version\": \"${{ needs.validate.outputs.target_version }}\", \"reason\": \"${{ inputs.reason }}\", \"actor\": \"${{ github.actor }}\", \"status\": \"${{ needs.rollback.result }}\", \"run_id\": \"${{ github.run_id }}\" }" echo "::notice::Incident record would be created at $INCIDENT_LOG"