Files
git.stella-ops.org/.gitea/workflows/reachability-corpus-ci.yml
StellaOps Bot 233873f620
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Signals Reachability Scoring & Events / reachability-smoke (push) Has been cancelled
Signals Reachability Scoring & Events / sign-and-upload (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Reachability Corpus Validation / validate-corpus (push) Has been cancelled
Reachability Corpus Validation / validate-ground-truths (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Reachability Corpus Validation / determinism-check (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
Notify Smoke Test / Notify Unit Tests (push) Has been cancelled
Notify Smoke Test / Notifier Service Tests (push) Has been cancelled
Notify Smoke Test / Notification Smoke Test (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
up
2025-12-14 15:50:38 +02:00

268 lines
10 KiB
YAML

name: Reachability Corpus Validation
on:
workflow_dispatch:
push:
branches: [ main ]
paths:
- 'tests/reachability/corpus/**'
- 'tests/reachability/fixtures/**'
- 'tests/reachability/StellaOps.Reachability.FixtureTests/**'
- 'scripts/reachability/**'
- '.gitea/workflows/reachability-corpus-ci.yml'
pull_request:
paths:
- 'tests/reachability/corpus/**'
- 'tests/reachability/fixtures/**'
- 'tests/reachability/StellaOps.Reachability.FixtureTests/**'
- 'scripts/reachability/**'
- '.gitea/workflows/reachability-corpus-ci.yml'
jobs:
validate-corpus:
runs-on: ubuntu-22.04
env:
DOTNET_NOLOGO: 1
DOTNET_CLI_TELEMETRY_OPTOUT: 1
DOTNET_SYSTEM_GLOBALIZATION_INVARIANT: 1
TZ: UTC
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup .NET 10 RC
uses: actions/setup-dotnet@v4
with:
dotnet-version: 10.0.100
include-prerelease: true
- name: Verify corpus manifest integrity
run: |
echo "Verifying corpus manifest..."
cd tests/reachability/corpus
if [ ! -f manifest.json ]; then
echo "::error::Corpus manifest.json not found"
exit 1
fi
echo "Manifest exists, checking JSON validity..."
python3 -c "import json; json.load(open('manifest.json'))"
echo "Manifest is valid JSON"
- name: Verify reachbench index integrity
run: |
echo "Verifying reachbench fixtures..."
cd tests/reachability/fixtures/reachbench-2025-expanded
if [ ! -f INDEX.json ]; then
echo "::error::Reachbench INDEX.json not found"
exit 1
fi
echo "INDEX exists, checking JSON validity..."
python3 -c "import json; json.load(open('INDEX.json'))"
echo "INDEX is valid JSON"
- name: Restore test project
run: dotnet restore tests/reachability/StellaOps.Reachability.FixtureTests/StellaOps.Reachability.FixtureTests.csproj --configfile nuget.config
- name: Build test project
run: dotnet build tests/reachability/StellaOps.Reachability.FixtureTests/StellaOps.Reachability.FixtureTests.csproj -c Release --no-restore
- name: Run corpus fixture tests
run: |
dotnet test tests/reachability/StellaOps.Reachability.FixtureTests/StellaOps.Reachability.FixtureTests.csproj \
-c Release \
--no-build \
--logger "trx;LogFileName=corpus-results.trx" \
--results-directory ./TestResults \
--filter "FullyQualifiedName~CorpusFixtureTests"
- name: Run reachbench fixture tests
run: |
dotnet test tests/reachability/StellaOps.Reachability.FixtureTests/StellaOps.Reachability.FixtureTests.csproj \
-c Release \
--no-build \
--logger "trx;LogFileName=reachbench-results.trx" \
--results-directory ./TestResults \
--filter "FullyQualifiedName~ReachbenchFixtureTests"
- name: Verify deterministic hashes
run: |
echo "Verifying SHA-256 hashes in corpus manifest..."
chmod +x scripts/reachability/verify_corpus_hashes.sh || true
if [ -f scripts/reachability/verify_corpus_hashes.sh ]; then
scripts/reachability/verify_corpus_hashes.sh
else
echo "Hash verification script not found, using inline verification..."
cd tests/reachability/corpus
python3 << 'EOF'
import json
import hashlib
import sys
import os
with open('manifest.json') as f:
manifest = json.load(f)
errors = []
for entry in manifest:
case_id = entry['id']
lang = entry['language']
case_dir = os.path.join(lang, case_id)
for filename, expected_hash in entry['files'].items():
filepath = os.path.join(case_dir, filename)
if not os.path.exists(filepath):
errors.append(f"{case_id}: missing {filename}")
continue
with open(filepath, 'rb') as f:
actual_hash = hashlib.sha256(f.read()).hexdigest()
if actual_hash != expected_hash:
errors.append(f"{case_id}: {filename} hash mismatch (expected {expected_hash}, got {actual_hash})")
if errors:
for err in errors:
print(f"::error::{err}")
sys.exit(1)
print(f"All {len(manifest)} corpus entries verified")
EOF
fi
- name: Upload test results
uses: actions/upload-artifact@v4
if: always()
with:
name: corpus-test-results-${{ github.run_number }}
path: ./TestResults/*.trx
retention-days: 14
validate-ground-truths:
runs-on: ubuntu-22.04
env:
TZ: UTC
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Validate ground-truth schema version
run: |
echo "Validating ground-truth files..."
cd tests/reachability
python3 << 'EOF'
import json
import os
import sys
EXPECTED_SCHEMA = "reachbench.reachgraph.truth/v1"
ALLOWED_VARIANTS = {"reachable", "unreachable"}
errors = []
# Validate corpus ground-truths
corpus_manifest = 'corpus/manifest.json'
if os.path.exists(corpus_manifest):
with open(corpus_manifest) as f:
manifest = json.load(f)
for entry in manifest:
case_id = entry['id']
lang = entry['language']
truth_path = os.path.join('corpus', lang, case_id, 'ground-truth.json')
if not os.path.exists(truth_path):
errors.append(f"corpus/{case_id}: missing ground-truth.json")
continue
with open(truth_path) as f:
truth = json.load(f)
if truth.get('schema_version') != EXPECTED_SCHEMA:
errors.append(f"corpus/{case_id}: wrong schema_version")
if truth.get('variant') not in ALLOWED_VARIANTS:
errors.append(f"corpus/{case_id}: invalid variant '{truth.get('variant')}'")
if not isinstance(truth.get('paths'), list):
errors.append(f"corpus/{case_id}: paths must be an array")
# Validate reachbench ground-truths
reachbench_index = 'fixtures/reachbench-2025-expanded/INDEX.json'
if os.path.exists(reachbench_index):
with open(reachbench_index) as f:
index = json.load(f)
for case in index.get('cases', []):
case_id = case['id']
case_path = case.get('path', os.path.join('cases', case_id))
for variant in ['reachable', 'unreachable']:
truth_path = os.path.join('fixtures/reachbench-2025-expanded', case_path, 'images', variant, 'reachgraph.truth.json')
if not os.path.exists(truth_path):
errors.append(f"reachbench/{case_id}/{variant}: missing reachgraph.truth.json")
continue
with open(truth_path) as f:
truth = json.load(f)
if not truth.get('schema_version'):
errors.append(f"reachbench/{case_id}/{variant}: missing schema_version")
if not isinstance(truth.get('paths'), list):
errors.append(f"reachbench/{case_id}/{variant}: paths must be an array")
if errors:
for err in errors:
print(f"::error::{err}")
sys.exit(1)
print("All ground-truth files validated successfully")
EOF
determinism-check:
runs-on: ubuntu-22.04
env:
TZ: UTC
needs: validate-corpus
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Verify JSON determinism (sorted keys, no trailing whitespace)
run: |
echo "Checking JSON determinism..."
cd tests/reachability
python3 << 'EOF'
import json
import os
import sys
def check_json_sorted(filepath):
"""Check if JSON has sorted keys (deterministic)."""
with open(filepath) as f:
content = f.read()
parsed = json.loads(content)
reserialized = json.dumps(parsed, sort_keys=True, indent=2)
# Normalize line endings
content_normalized = content.replace('\r\n', '\n').strip()
reserialized_normalized = reserialized.strip()
return content_normalized == reserialized_normalized
errors = []
json_files = []
# Collect JSON files from corpus
for root, dirs, files in os.walk('corpus'):
for f in files:
if f.endswith('.json'):
json_files.append(os.path.join(root, f))
# Check determinism
non_deterministic = []
for filepath in json_files:
try:
if not check_json_sorted(filepath):
non_deterministic.append(filepath)
except json.JSONDecodeError as e:
errors.append(f"{filepath}: invalid JSON - {e}")
if non_deterministic:
print(f"::warning::Found {len(non_deterministic)} non-deterministic JSON files (keys not sorted or whitespace differs)")
for f in non_deterministic[:10]:
print(f" - {f}")
if len(non_deterministic) > 10:
print(f" ... and {len(non_deterministic) - 10} more")
if errors:
for err in errors:
print(f"::error::{err}")
sys.exit(1)
print(f"Checked {len(json_files)} JSON files")
EOF