188 lines
7.0 KiB
YAML
188 lines
7.0 KiB
YAML
# -----------------------------------------------------------------------------
|
|
# control-plane-chaos.yml
|
|
# Sprint: Testing Enhancement Advisory - Phase 3.3
|
|
# Description: CI workflow for control-plane outage chaos tests
|
|
# Schedule: Weekly (chaos tests are intensive)
|
|
# -----------------------------------------------------------------------------
|
|
|
|
name: Control-Plane Chaos Tests
|
|
|
|
on:
|
|
schedule:
|
|
# Run weekly on Sundays at 3:00 AM UTC
|
|
- cron: '0 3 * * 0'
|
|
workflow_dispatch:
|
|
inputs:
|
|
test_filter:
|
|
description: 'Test filter (e.g., FullyQualifiedName~Authority)'
|
|
required: false
|
|
default: ''
|
|
verbosity:
|
|
description: 'Test verbosity level'
|
|
required: false
|
|
default: 'normal'
|
|
type: choice
|
|
options:
|
|
- minimal
|
|
- normal
|
|
- detailed
|
|
- diagnostic
|
|
|
|
env:
|
|
DOTNET_NOLOGO: true
|
|
DOTNET_CLI_TELEMETRY_OPTOUT: true
|
|
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
|
|
|
|
jobs:
|
|
chaos-tests:
|
|
name: Control-Plane Chaos Tests
|
|
runs-on: ubuntu-latest
|
|
timeout-minutes: 60
|
|
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
with:
|
|
fetch-depth: 0
|
|
|
|
- name: Setup .NET
|
|
uses: actions/setup-dotnet@v4
|
|
with:
|
|
dotnet-version: '10.0.x'
|
|
dotnet-quality: 'preview'
|
|
|
|
- name: Restore dependencies
|
|
run: |
|
|
dotnet restore src/__Tests/chaos/StellaOps.Chaos.ControlPlane.Tests/StellaOps.Chaos.ControlPlane.Tests.csproj
|
|
|
|
- name: Build chaos test project
|
|
run: |
|
|
dotnet build src/__Tests/chaos/StellaOps.Chaos.ControlPlane.Tests/StellaOps.Chaos.ControlPlane.Tests.csproj \
|
|
--configuration Release \
|
|
--no-restore
|
|
|
|
- name: Run control-plane outage tests
|
|
id: outage-tests
|
|
run: |
|
|
FILTER="${{ github.event.inputs.test_filter }}"
|
|
VERBOSITY="${{ github.event.inputs.verbosity || 'normal' }}"
|
|
|
|
dotnet test src/__Tests/chaos/StellaOps.Chaos.ControlPlane.Tests/StellaOps.Chaos.ControlPlane.Tests.csproj \
|
|
--configuration Release \
|
|
--no-build \
|
|
--verbosity $VERBOSITY \
|
|
--logger "trx;LogFileName=chaos-outage-results.trx" \
|
|
--logger "console;verbosity=$VERBOSITY" \
|
|
--results-directory ./TestResults \
|
|
--filter "Category=ControlPlane${FILTER:+&$FILTER}" \
|
|
-- \
|
|
RunConfiguration.CollectSourceInformation=true
|
|
continue-on-error: true
|
|
|
|
- name: Run partial outage tests
|
|
id: partial-tests
|
|
run: |
|
|
FILTER="${{ github.event.inputs.test_filter }}"
|
|
VERBOSITY="${{ github.event.inputs.verbosity || 'normal' }}"
|
|
|
|
dotnet test src/__Tests/chaos/StellaOps.Chaos.ControlPlane.Tests/StellaOps.Chaos.ControlPlane.Tests.csproj \
|
|
--configuration Release \
|
|
--no-build \
|
|
--verbosity $VERBOSITY \
|
|
--logger "trx;LogFileName=chaos-partial-results.trx" \
|
|
--logger "console;verbosity=$VERBOSITY" \
|
|
--results-directory ./TestResults \
|
|
--filter "Category=PartialOutage${FILTER:+&$FILTER}" \
|
|
-- \
|
|
RunConfiguration.CollectSourceInformation=true
|
|
continue-on-error: true
|
|
|
|
- name: Upload test results
|
|
uses: actions/upload-artifact@v4
|
|
if: always()
|
|
with:
|
|
name: chaos-test-results
|
|
path: ./TestResults/*.trx
|
|
retention-days: 30
|
|
|
|
- name: Generate chaos test summary
|
|
if: always()
|
|
run: |
|
|
echo "## Control-Plane Chaos Test Results" >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "### Test Execution" >> $GITHUB_STEP_SUMMARY
|
|
echo "| Test Suite | Status |" >> $GITHUB_STEP_SUMMARY
|
|
echo "|------------|--------|" >> $GITHUB_STEP_SUMMARY
|
|
|
|
if [ "${{ steps.outage-tests.outcome }}" == "success" ]; then
|
|
echo "| Full Outage Tests | :white_check_mark: Passed |" >> $GITHUB_STEP_SUMMARY
|
|
else
|
|
echo "| Full Outage Tests | :x: Failed |" >> $GITHUB_STEP_SUMMARY
|
|
fi
|
|
|
|
if [ "${{ steps.partial-tests.outcome }}" == "success" ]; then
|
|
echo "| Partial Outage Tests | :white_check_mark: Passed |" >> $GITHUB_STEP_SUMMARY
|
|
else
|
|
echo "| Partial Outage Tests | :x: Failed |" >> $GITHUB_STEP_SUMMARY
|
|
fi
|
|
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "### Test Categories Covered" >> $GITHUB_STEP_SUMMARY
|
|
echo "- Authority outage and cached token validation" >> $GITHUB_STEP_SUMMARY
|
|
echo "- Scheduler outage and job persistence" >> $GITHUB_STEP_SUMMARY
|
|
echo "- Full control-plane outage and data integrity" >> $GITHUB_STEP_SUMMARY
|
|
echo "- Partial failure rate scenarios" >> $GITHUB_STEP_SUMMARY
|
|
echo "- Latency injection and degraded service handling" >> $GITHUB_STEP_SUMMARY
|
|
echo "- Service isolation and cascading failure prevention" >> $GITHUB_STEP_SUMMARY
|
|
|
|
- name: Check test results
|
|
if: always()
|
|
run: |
|
|
if [ "${{ steps.outage-tests.outcome }}" != "success" ] || [ "${{ steps.partial-tests.outcome }}" != "success" ]; then
|
|
echo "::error::One or more chaos test suites failed"
|
|
exit 1
|
|
fi
|
|
echo "All chaos tests passed successfully"
|
|
|
|
chaos-report:
|
|
name: Generate Chaos Report
|
|
runs-on: ubuntu-latest
|
|
needs: chaos-tests
|
|
if: always()
|
|
|
|
steps:
|
|
- name: Download test results
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
name: chaos-test-results
|
|
path: ./TestResults
|
|
|
|
- name: Parse TRX results
|
|
run: |
|
|
echo "## Chaos Test Detailed Report" >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "Test results have been uploaded as artifacts." >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "### Artifact Location" >> $GITHUB_STEP_SUMMARY
|
|
echo "- chaos-test-results (TRX format)" >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
|
|
# List TRX files
|
|
echo "### Available Result Files" >> $GITHUB_STEP_SUMMARY
|
|
for file in ./TestResults/*.trx; do
|
|
if [ -f "$file" ]; then
|
|
echo "- $(basename $file)" >> $GITHUB_STEP_SUMMARY
|
|
fi
|
|
done
|
|
|
|
- name: Notify on failure
|
|
if: needs.chaos-tests.result == 'failure'
|
|
run: |
|
|
echo "::warning::Chaos tests failed. Review the test results for details."
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "### :warning: Action Required" >> $GITHUB_STEP_SUMMARY
|
|
echo "Chaos tests have failed. Please review:" >> $GITHUB_STEP_SUMMARY
|
|
echo "1. Download the test artifacts for detailed results" >> $GITHUB_STEP_SUMMARY
|
|
echo "2. Check if failures are due to test infrastructure or actual regressions" >> $GITHUB_STEP_SUMMARY
|
|
echo "3. Consider running tests locally with diagnostic verbosity" >> $GITHUB_STEP_SUMMARY
|