name: Reachability Corpus Validation on: workflow_dispatch: push: branches: [ main ] paths: - 'tests/reachability/corpus/**' - 'tests/reachability/fixtures/**' - 'tests/reachability/StellaOps.Reachability.FixtureTests/**' - 'scripts/reachability/**' - '.gitea/workflows/reachability-corpus-ci.yml' pull_request: paths: - 'tests/reachability/corpus/**' - 'tests/reachability/fixtures/**' - 'tests/reachability/StellaOps.Reachability.FixtureTests/**' - 'scripts/reachability/**' - '.gitea/workflows/reachability-corpus-ci.yml' jobs: validate-corpus: runs-on: ubuntu-22.04 env: DOTNET_NOLOGO: 1 DOTNET_CLI_TELEMETRY_OPTOUT: 1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT: 1 TZ: UTC steps: - name: Checkout uses: actions/checkout@v4 with: fetch-depth: 0 - name: Setup .NET 10 RC uses: actions/setup-dotnet@v4 with: dotnet-version: 10.0.100 include-prerelease: true - name: Verify corpus manifest integrity run: | echo "Verifying corpus manifest..." cd tests/reachability/corpus if [ ! -f manifest.json ]; then echo "::error::Corpus manifest.json not found" exit 1 fi echo "Manifest exists, checking JSON validity..." python3 -c "import json; json.load(open('manifest.json'))" echo "Manifest is valid JSON" - name: Verify reachbench index integrity run: | echo "Verifying reachbench fixtures..." cd tests/reachability/fixtures/reachbench-2025-expanded if [ ! -f INDEX.json ]; then echo "::error::Reachbench INDEX.json not found" exit 1 fi echo "INDEX exists, checking JSON validity..." python3 -c "import json; json.load(open('INDEX.json'))" echo "INDEX is valid JSON" - name: Restore test project run: dotnet restore tests/reachability/StellaOps.Reachability.FixtureTests/StellaOps.Reachability.FixtureTests.csproj --configfile nuget.config - name: Build test project run: dotnet build tests/reachability/StellaOps.Reachability.FixtureTests/StellaOps.Reachability.FixtureTests.csproj -c Release --no-restore - name: Run corpus fixture tests run: | dotnet test tests/reachability/StellaOps.Reachability.FixtureTests/StellaOps.Reachability.FixtureTests.csproj \ -c Release \ --no-build \ --logger "trx;LogFileName=corpus-results.trx" \ --results-directory ./TestResults \ --filter "FullyQualifiedName~CorpusFixtureTests" - name: Run reachbench fixture tests run: | dotnet test tests/reachability/StellaOps.Reachability.FixtureTests/StellaOps.Reachability.FixtureTests.csproj \ -c Release \ --no-build \ --logger "trx;LogFileName=reachbench-results.trx" \ --results-directory ./TestResults \ --filter "FullyQualifiedName~ReachbenchFixtureTests" - name: Verify deterministic hashes run: | echo "Verifying SHA-256 hashes in corpus manifest..." chmod +x scripts/reachability/verify_corpus_hashes.sh || true if [ -f scripts/reachability/verify_corpus_hashes.sh ]; then scripts/reachability/verify_corpus_hashes.sh else echo "Hash verification script not found, using inline verification..." cd tests/reachability/corpus python3 << 'EOF' import json import hashlib import sys import os with open('manifest.json') as f: manifest = json.load(f) errors = [] for entry in manifest: case_id = entry['id'] lang = entry['language'] case_dir = os.path.join(lang, case_id) for filename, expected_hash in entry['files'].items(): filepath = os.path.join(case_dir, filename) if not os.path.exists(filepath): errors.append(f"{case_id}: missing {filename}") continue with open(filepath, 'rb') as f: actual_hash = hashlib.sha256(f.read()).hexdigest() if actual_hash != expected_hash: errors.append(f"{case_id}: {filename} hash mismatch (expected {expected_hash}, got {actual_hash})") if errors: for err in errors: print(f"::error::{err}") sys.exit(1) print(f"All {len(manifest)} corpus entries verified") EOF fi - name: Upload test results uses: actions/upload-artifact@v4 if: always() with: name: corpus-test-results-${{ github.run_number }} path: ./TestResults/*.trx retention-days: 14 validate-ground-truths: runs-on: ubuntu-22.04 env: TZ: UTC steps: - name: Checkout uses: actions/checkout@v4 - name: Validate ground-truth schema version run: | echo "Validating ground-truth files..." cd tests/reachability python3 << 'EOF' import json import os import sys EXPECTED_SCHEMA = "reachbench.reachgraph.truth/v1" ALLOWED_VARIANTS = {"reachable", "unreachable"} errors = [] # Validate corpus ground-truths corpus_manifest = 'corpus/manifest.json' if os.path.exists(corpus_manifest): with open(corpus_manifest) as f: manifest = json.load(f) for entry in manifest: case_id = entry['id'] lang = entry['language'] truth_path = os.path.join('corpus', lang, case_id, 'ground-truth.json') if not os.path.exists(truth_path): errors.append(f"corpus/{case_id}: missing ground-truth.json") continue with open(truth_path) as f: truth = json.load(f) if truth.get('schema_version') != EXPECTED_SCHEMA: errors.append(f"corpus/{case_id}: wrong schema_version") if truth.get('variant') not in ALLOWED_VARIANTS: errors.append(f"corpus/{case_id}: invalid variant '{truth.get('variant')}'") if not isinstance(truth.get('paths'), list): errors.append(f"corpus/{case_id}: paths must be an array") # Validate reachbench ground-truths reachbench_index = 'fixtures/reachbench-2025-expanded/INDEX.json' if os.path.exists(reachbench_index): with open(reachbench_index) as f: index = json.load(f) for case in index.get('cases', []): case_id = case['id'] case_path = case.get('path', os.path.join('cases', case_id)) for variant in ['reachable', 'unreachable']: truth_path = os.path.join('fixtures/reachbench-2025-expanded', case_path, 'images', variant, 'reachgraph.truth.json') if not os.path.exists(truth_path): errors.append(f"reachbench/{case_id}/{variant}: missing reachgraph.truth.json") continue with open(truth_path) as f: truth = json.load(f) if not truth.get('schema_version'): errors.append(f"reachbench/{case_id}/{variant}: missing schema_version") if not isinstance(truth.get('paths'), list): errors.append(f"reachbench/{case_id}/{variant}: paths must be an array") if errors: for err in errors: print(f"::error::{err}") sys.exit(1) print("All ground-truth files validated successfully") EOF determinism-check: runs-on: ubuntu-22.04 env: TZ: UTC needs: validate-corpus steps: - name: Checkout uses: actions/checkout@v4 - name: Verify JSON determinism (sorted keys, no trailing whitespace) run: | echo "Checking JSON determinism..." cd tests/reachability python3 << 'EOF' import json import os import sys def check_json_sorted(filepath): """Check if JSON has sorted keys (deterministic).""" with open(filepath) as f: content = f.read() parsed = json.loads(content) reserialized = json.dumps(parsed, sort_keys=True, indent=2) # Normalize line endings content_normalized = content.replace('\r\n', '\n').strip() reserialized_normalized = reserialized.strip() return content_normalized == reserialized_normalized errors = [] json_files = [] # Collect JSON files from corpus for root, dirs, files in os.walk('corpus'): for f in files: if f.endswith('.json'): json_files.append(os.path.join(root, f)) # Check determinism non_deterministic = [] for filepath in json_files: try: if not check_json_sorted(filepath): non_deterministic.append(filepath) except json.JSONDecodeError as e: errors.append(f"{filepath}: invalid JSON - {e}") if non_deterministic: print(f"::warning::Found {len(non_deterministic)} non-deterministic JSON files (keys not sorted or whitespace differs)") for f in non_deterministic[:10]: print(f" - {f}") if len(non_deterministic) > 10: print(f" ... and {len(non_deterministic) - 10} more") if errors: for err in errors: print(f"::error::{err}") sys.exit(1) print(f"Checked {len(json_files)} JSON files") EOF