git.stella-ops.org/.gitea/workflows/reachability-corpus-ci.yml

name: Reachability Corpus Validation

on:
  workflow_dispatch:
  push:
    branches: [ main ]
    paths:
      - 'tests/reachability/corpus/**'
      - 'tests/reachability/fixtures/**'
      - 'tests/reachability/StellaOps.Reachability.FixtureTests/**'
      - 'scripts/reachability/**'
      - '.gitea/workflows/reachability-corpus-ci.yml'
  pull_request:
    paths:
      - 'tests/reachability/corpus/**'
      - 'tests/reachability/fixtures/**'
      - 'tests/reachability/StellaOps.Reachability.FixtureTests/**'
      - 'scripts/reachability/**'
      - '.gitea/workflows/reachability-corpus-ci.yml'

jobs:
  validate-corpus:
    runs-on: ubuntu-22.04
    env:
      DOTNET_NOLOGO: 1
      DOTNET_CLI_TELEMETRY_OPTOUT: 1
      DOTNET_SYSTEM_GLOBALIZATION_INVARIANT: 1
      TZ: UTC
    steps:
      - name: Checkout
        uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Setup .NET 10 RC
        uses: actions/setup-dotnet@v4
        with:
          dotnet-version: 10.0.100
          include-prerelease: true

      - name: Verify corpus manifest integrity
        run: |
          echo "Verifying corpus manifest..."
          cd tests/reachability/corpus
          if [ ! -f manifest.json ]; then
            echo "::error::Corpus manifest.json not found"
            exit 1
          fi
          echo "Manifest exists, checking JSON validity..."
          python3 -c "import json; json.load(open('manifest.json'))"
          echo "Manifest is valid JSON"

      - name: Verify reachbench index integrity
        run: |
          echo "Verifying reachbench fixtures..."
          cd tests/reachability/fixtures/reachbench-2025-expanded
          if [ ! -f INDEX.json ]; then
            echo "::error::Reachbench INDEX.json not found"
            exit 1
          fi
          echo "INDEX exists, checking JSON validity..."
          python3 -c "import json; json.load(open('INDEX.json'))"
          echo "INDEX is valid JSON"

      - name: Restore test project
        run: dotnet restore tests/reachability/StellaOps.Reachability.FixtureTests/StellaOps.Reachability.FixtureTests.csproj --configfile nuget.config

      - name: Build test project
        run: dotnet build tests/reachability/StellaOps.Reachability.FixtureTests/StellaOps.Reachability.FixtureTests.csproj -c Release --no-restore

      - name: Run corpus fixture tests
        run: |
          dotnet test tests/reachability/StellaOps.Reachability.FixtureTests/StellaOps.Reachability.FixtureTests.csproj \
            -c Release \
            --no-build \
            --logger "trx;LogFileName=corpus-results.trx" \
            --results-directory ./TestResults \
            --filter "FullyQualifiedName~CorpusFixtureTests"

      - name: Run reachbench fixture tests
        run: |
          dotnet test tests/reachability/StellaOps.Reachability.FixtureTests/StellaOps.Reachability.FixtureTests.csproj \
            -c Release \
            --no-build \
            --logger "trx;LogFileName=reachbench-results.trx" \
            --results-directory ./TestResults \
            --filter "FullyQualifiedName~ReachbenchFixtureTests"

      - name: Verify deterministic hashes
        run: |
          echo "Verifying SHA-256 hashes in corpus manifest..."
          chmod +x scripts/reachability/verify_corpus_hashes.sh || true
          if [ -f scripts/reachability/verify_corpus_hashes.sh ]; then
            scripts/reachability/verify_corpus_hashes.sh
          else
            echo "Hash verification script not found, using inline verification..."
            cd tests/reachability/corpus
            python3 << 'EOF'
          import json
          import hashlib
          import sys
          import os

          with open('manifest.json') as f:
              manifest = json.load(f)

          errors = []
          for entry in manifest:
              case_id = entry['id']
              lang = entry['language']
              case_dir = os.path.join(lang, case_id)
              for filename, expected_hash in entry['files'].items():
                  filepath = os.path.join(case_dir, filename)
                  if not os.path.exists(filepath):
                      errors.append(f"{case_id}: missing {filename}")
                      continue
                  with open(filepath, 'rb') as f:
                      actual_hash = hashlib.sha256(f.read()).hexdigest()
                  if actual_hash != expected_hash:
                      errors.append(f"{case_id}: {filename} hash mismatch (expected {expected_hash}, got {actual_hash})")

          if errors:
              for err in errors:
                  print(f"::error::{err}")
              sys.exit(1)
          print(f"All {len(manifest)} corpus entries verified")
          EOF
          fi

      - name: Upload test results
        uses: actions/upload-artifact@v4
        if: always()
        with:
          name: corpus-test-results-${{ github.run_number }}
          path: ./TestResults/*.trx
          retention-days: 14

  validate-ground-truths:
    runs-on: ubuntu-22.04
    env:
      TZ: UTC
    steps:
      - name: Checkout
        uses: actions/checkout@v4

      - name: Validate ground-truth schema version
        run: |
          echo "Validating ground-truth files..."
          cd tests/reachability
          python3 << 'EOF'
          import json
          import os
          import sys

          EXPECTED_SCHEMA = "reachbench.reachgraph.truth/v1"
          ALLOWED_VARIANTS = {"reachable", "unreachable"}
          errors = []

          # Validate corpus ground-truths
          corpus_manifest = 'corpus/manifest.json'
          if os.path.exists(corpus_manifest):
              with open(corpus_manifest) as f:
                  manifest = json.load(f)
              for entry in manifest:
                  case_id = entry['id']
                  lang = entry['language']
                  truth_path = os.path.join('corpus', lang, case_id, 'ground-truth.json')
                  if not os.path.exists(truth_path):
                      errors.append(f"corpus/{case_id}: missing ground-truth.json")
                      continue
                  with open(truth_path) as f:
                      truth = json.load(f)
                  if truth.get('schema_version') != EXPECTED_SCHEMA:
                      errors.append(f"corpus/{case_id}: wrong schema_version")
                  if truth.get('variant') not in ALLOWED_VARIANTS:
                      errors.append(f"corpus/{case_id}: invalid variant '{truth.get('variant')}'")
                  if not isinstance(truth.get('paths'), list):
                      errors.append(f"corpus/{case_id}: paths must be an array")

          # Validate reachbench ground-truths
          reachbench_index = 'fixtures/reachbench-2025-expanded/INDEX.json'
          if os.path.exists(reachbench_index):
              with open(reachbench_index) as f:
                  index = json.load(f)
              for case in index.get('cases', []):
                  case_id = case['id']
                  case_path = case.get('path', os.path.join('cases', case_id))
                  for variant in ['reachable', 'unreachable']:
                      truth_path = os.path.join('fixtures/reachbench-2025-expanded', case_path, 'images', variant, 'reachgraph.truth.json')
                      if not os.path.exists(truth_path):
                          errors.append(f"reachbench/{case_id}/{variant}: missing reachgraph.truth.json")
                          continue
                      with open(truth_path) as f:
                          truth = json.load(f)
                      if not truth.get('schema_version'):
                          errors.append(f"reachbench/{case_id}/{variant}: missing schema_version")
                      if not isinstance(truth.get('paths'), list):
                          errors.append(f"reachbench/{case_id}/{variant}: paths must be an array")

          if errors:
              for err in errors:
                  print(f"::error::{err}")
              sys.exit(1)
          print("All ground-truth files validated successfully")
          EOF

  determinism-check:
    runs-on: ubuntu-22.04
    env:
      TZ: UTC
    needs: validate-corpus
    steps:
      - name: Checkout
        uses: actions/checkout@v4

      - name: Verify JSON determinism (sorted keys, no trailing whitespace)
        run: |
          echo "Checking JSON determinism..."
          cd tests/reachability
          python3 << 'EOF'
          import json
          import os
          import sys

          def check_json_sorted(filepath):
              """Check if JSON has sorted keys (deterministic)."""
              with open(filepath) as f:
                  content = f.read()
              parsed = json.loads(content)
              reserialized = json.dumps(parsed, sort_keys=True, indent=2)
              # Normalize line endings
              content_normalized = content.replace('\r\n', '\n').strip()
              reserialized_normalized = reserialized.strip()
              return content_normalized == reserialized_normalized

          errors = []
          json_files = []

          # Collect JSON files from corpus
          for root, dirs, files in os.walk('corpus'):
              for f in files:
                  if f.endswith('.json'):
                      json_files.append(os.path.join(root, f))

          # Check determinism
          non_deterministic = []
          for filepath in json_files:
              try:
                  if not check_json_sorted(filepath):
                      non_deterministic.append(filepath)
              except json.JSONDecodeError as e:
                  errors.append(f"{filepath}: invalid JSON - {e}")

          if non_deterministic:
              print(f"::warning::Found {len(non_deterministic)} non-deterministic JSON files (keys not sorted or whitespace differs)")
              for f in non_deterministic[:10]:
                  print(f"  - {f}")
              if len(non_deterministic) > 10:
                  print(f"  ... and {len(non_deterministic) - 10} more")

          if errors:
              for err in errors:
                  print(f"::error::{err}")
              sys.exit(1)

          print(f"Checked {len(json_files)} JSON files")
          EOF