# ----------------------------------------------------------------------------- # router-chaos.yml # Sprint: SPRINT_5100_0005_0001_router_chaos_suite # Task: T5 - CI Chaos Workflow # Description: CI workflow for running router chaos tests. # ----------------------------------------------------------------------------- name: Router Chaos Tests on: schedule: - cron: '0 3 * * *' # Nightly at 3 AM UTC workflow_dispatch: inputs: spike_multiplier: description: 'Load spike multiplier (e.g., 10, 50, 100)' default: '10' type: choice options: - '10' - '50' - '100' run_valkey_tests: description: 'Run Valkey failure injection tests' default: true type: boolean env: DOTNET_NOLOGO: 1 DOTNET_CLI_TELEMETRY_OPTOUT: 1 TZ: UTC ROUTER_URL: http://localhost:8080 jobs: load-tests: runs-on: ubuntu-22.04 timeout-minutes: 30 services: postgres: image: postgres:16-alpine env: POSTGRES_USER: stellaops POSTGRES_PASSWORD: test POSTGRES_DB: stellaops_test ports: - 5432:5432 options: >- --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 valkey: image: valkey/valkey:7-alpine ports: - 6379:6379 options: >- --health-cmd "valkey-cli ping" --health-interval 10s --health-timeout 5s --health-retries 5 steps: - name: Checkout uses: actions/checkout@v4 with: fetch-depth: 0 - name: Setup .NET uses: actions/setup-dotnet@v4 with: dotnet-version: '10.0.100' include-prerelease: true - name: Install k6 run: | curl -sSL https://github.com/grafana/k6/releases/download/v0.54.0/k6-v0.54.0-linux-amd64.tar.gz | tar xz sudo mv k6-v0.54.0-linux-amd64/k6 /usr/local/bin/ k6 version - name: Cache NuGet packages uses: actions/cache@v4 with: path: ~/.nuget/packages key: chaos-nuget-${{ runner.os }}-${{ hashFiles('**/*.csproj') }} - name: Build Router run: | dotnet restore src/Router/StellaOps.Router.WebService/StellaOps.Router.WebService.csproj dotnet build src/Router/StellaOps.Router.WebService/StellaOps.Router.WebService.csproj -c Release --no-restore - name: Start Router run: | dotnet run --project src/Router/StellaOps.Router.WebService/StellaOps.Router.WebService.csproj -c Release --no-build & echo $! > router.pid # Wait for router to start for i in {1..30}; do if curl -s http://localhost:8080/health > /dev/null 2>&1; then echo "Router is ready" break fi echo "Waiting for router... ($i/30)" sleep 2 done - name: Run k6 spike test id: k6 run: | mkdir -p results k6 run tests/load/router/spike-test.js \ -e ROUTER_URL=${{ env.ROUTER_URL }} \ --out json=results/k6-results.json \ --summary-export results/k6-summary.json \ 2>&1 | tee results/k6-output.txt # Check exit code if [ ${PIPESTATUS[0]} -ne 0 ]; then echo "k6_status=failed" >> $GITHUB_OUTPUT else echo "k6_status=passed" >> $GITHUB_OUTPUT fi - name: Upload k6 results if: always() uses: actions/upload-artifact@v4 with: name: k6-results-${{ github.run_id }} path: results/ retention-days: 30 - name: Stop Router if: always() run: | if [ -f router.pid ]; then kill $(cat router.pid) 2>/dev/null || true fi chaos-unit-tests: runs-on: ubuntu-22.04 timeout-minutes: 20 needs: load-tests if: always() services: postgres: image: postgres:16-alpine env: POSTGRES_USER: stellaops POSTGRES_PASSWORD: test POSTGRES_DB: stellaops_test ports: - 5432:5432 valkey: image: valkey/valkey:7-alpine ports: - 6379:6379 steps: - name: Checkout uses: actions/checkout@v4 - name: Setup .NET uses: actions/setup-dotnet@v4 with: dotnet-version: '10.0.100' include-prerelease: true - name: Build Chaos Tests run: | dotnet restore tests/chaos/StellaOps.Chaos.Router.Tests/StellaOps.Chaos.Router.Tests.csproj dotnet build tests/chaos/StellaOps.Chaos.Router.Tests/StellaOps.Chaos.Router.Tests.csproj -c Release --no-restore - name: Start Router for Tests run: | dotnet run --project src/Router/StellaOps.Router.WebService/StellaOps.Router.WebService.csproj -c Release & sleep 15 # Wait for startup - name: Run Chaos Unit Tests run: | dotnet test tests/chaos/StellaOps.Chaos.Router.Tests/StellaOps.Chaos.Router.Tests.csproj \ -c Release \ --no-build \ --logger "trx;LogFileName=chaos-results.trx" \ --logger "console;verbosity=detailed" \ --results-directory results \ -- RunConfiguration.TestSessionTimeout=600000 - name: Upload Test Results if: always() uses: actions/upload-artifact@v4 with: name: chaos-test-results-${{ github.run_id }} path: results/ retention-days: 30 valkey-failure-tests: runs-on: ubuntu-22.04 timeout-minutes: 20 needs: load-tests if: ${{ github.event.inputs.run_valkey_tests != 'false' }} steps: - name: Checkout uses: actions/checkout@v4 - name: Setup .NET uses: actions/setup-dotnet@v4 with: dotnet-version: '10.0.100' include-prerelease: true - name: Install Docker Compose run: | sudo apt-get update sudo apt-get install -y docker-compose - name: Run Valkey Failure Tests run: | dotnet test tests/chaos/StellaOps.Chaos.Router.Tests/StellaOps.Chaos.Router.Tests.csproj \ -c Release \ --filter "Category=Valkey" \ --logger "trx;LogFileName=valkey-results.trx" \ --results-directory results \ -- RunConfiguration.TestSessionTimeout=600000 - name: Upload Valkey Test Results if: always() uses: actions/upload-artifact@v4 with: name: valkey-test-results-${{ github.run_id }} path: results/ analyze-results: runs-on: ubuntu-22.04 needs: [load-tests, chaos-unit-tests] if: always() steps: - name: Checkout uses: actions/checkout@v4 - name: Download k6 Results uses: actions/download-artifact@v4 with: name: k6-results-${{ github.run_id }} path: k6-results/ - name: Download Chaos Test Results uses: actions/download-artifact@v4 with: name: chaos-test-results-${{ github.run_id }} path: chaos-results/ - name: Analyze Results id: analysis run: | mkdir -p analysis # Parse k6 summary if [ -f k6-results/k6-summary.json ]; then echo "=== k6 Test Summary ===" | tee analysis/summary.txt # Extract key metrics jq -r '.metrics | to_entries[] | "\(.key): \(.value)"' k6-results/k6-summary.json >> analysis/summary.txt 2>/dev/null || true fi # Check thresholds THRESHOLDS_PASSED=true if [ -f k6-results/k6-summary.json ]; then # Check if any threshold failed FAILED_THRESHOLDS=$(jq -r '.thresholds | to_entries[] | select(.value.ok == false) | .key' k6-results/k6-summary.json 2>/dev/null || echo "") if [ -n "$FAILED_THRESHOLDS" ]; then echo "Failed thresholds: $FAILED_THRESHOLDS" THRESHOLDS_PASSED=false fi fi echo "thresholds_passed=$THRESHOLDS_PASSED" >> $GITHUB_OUTPUT - name: Upload Analysis uses: actions/upload-artifact@v4 with: name: chaos-analysis-${{ github.run_id }} path: analysis/ - name: Create Summary run: | echo "## Router Chaos Test Results" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "### Load Test Results" >> $GITHUB_STEP_SUMMARY if [ -f k6-results/k6-summary.json ]; then echo "- Total Requests: $(jq -r '.metrics.http_reqs.values.count // "N/A"' k6-results/k6-summary.json)" >> $GITHUB_STEP_SUMMARY echo "- Failed Rate: $(jq -r '.metrics.http_req_failed.values.rate // "N/A"' k6-results/k6-summary.json)" >> $GITHUB_STEP_SUMMARY else echo "- No k6 results found" >> $GITHUB_STEP_SUMMARY fi echo "" >> $GITHUB_STEP_SUMMARY echo "### Thresholds" >> $GITHUB_STEP_SUMMARY echo "- Status: ${{ steps.analysis.outputs.thresholds_passed == 'true' && 'PASSED' || 'FAILED' }}" >> $GITHUB_STEP_SUMMARY