From e0b585c7999bbeae270bd0187633a52034b64233 Mon Sep 17 00:00:00 2001 From: StellaOps Bot Date: Wed, 3 Dec 2025 09:49:59 +0200 Subject: [PATCH] feat: Add JSON schema definitions for coverage and trace artifacts in reachability benchmark --- .../reachability-benchmark/schemas/README.md | 5 ++ .../schemas/coverage.schema.json | 51 +++++++++++++++++++ .../schemas/trace.schema.json | 42 +++++++++++++++ 3 files changed, 98 insertions(+) create mode 100644 bench/reachability-benchmark/schemas/coverage.schema.json create mode 100644 bench/reachability-benchmark/schemas/trace.schema.json diff --git a/bench/reachability-benchmark/schemas/README.md b/bench/reachability-benchmark/schemas/README.md index 209f19d96..5b5d4981a 100644 --- a/bench/reachability-benchmark/schemas/README.md +++ b/bench/reachability-benchmark/schemas/README.md @@ -3,8 +3,11 @@ - `case.schema.yaml` — case descriptor (language, sinks, deterministic build/test, environment, optional inline ground truth summary). - `entrypoints.schema.yaml` — declared entrypoints grouped by type (`http`, `cli`, `scheduled`, etc.). - `truth.schema.json` — ground-truth labels + evidence per sink (`reachable`/`unreachable`/`unknown`). +- `coverage.schema.json` — coverage artifact emitted by oracle tests (lines covered or function-level coverage). +- `trace.schema.json` — dynamic trace artifact (structured path or event log) proving reachability/unreachability. - `submission.schema.json` — submission format (tool/run metadata, per-sink predictions, explanations). - `examples/` — minimal valid examples for each schema. +- `benchmark/schemas/benchmark-manifest.schema.json` — kit manifest with hashed artifacts, resource limits, and optional DSSE signatures. ## Validate quickly ```bash @@ -15,6 +18,8 @@ python -m pip install -r ../tools/requirements.txt python ../tools/validate.py case examples/case.sample.yaml python ../tools/validate.py entrypoints examples/entrypoints.sample.yaml python ../tools/validate.py truth examples/truth.sample.json +python ../tools/validate.py coverage ../cases/js/unsafe-eval/outputs/coverage.json +python ../tools/validate.py trace ../cases/js/unsafe-eval/outputs/traces/traces.json python ../tools/validate.py submission examples/submission.sample.json # or validate everything in one shot diff --git a/bench/reachability-benchmark/schemas/coverage.schema.json b/bench/reachability-benchmark/schemas/coverage.schema.json new file mode 100644 index 000000000..132dcd7d6 --- /dev/null +++ b/bench/reachability-benchmark/schemas/coverage.schema.json @@ -0,0 +1,51 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://stellaops.org/benchmark/reachability/coverage.schema.json", + "title": "Reachability Benchmark Coverage Artifact", + "type": "object", + "oneOf": [ + { + "required": ["files"], + "properties": { + "files": { + "type": "object", + "additionalProperties": { + "type": "object", + "required": ["lines_covered", "lines_total"], + "properties": { + "lines_covered": { + "type": "array", + "items": { "type": "integer", "minimum": 1 } + }, + "lines_total": { "type": "integer", "minimum": 1 } + }, + "additionalProperties": false + } + } + }, + "additionalProperties": false + }, + { + "required": ["files"], + "properties": { + "files": { + "type": "array", + "items": { + "type": "object", + "required": ["path", "coverage"], + "properties": { + "path": { "type": "string" }, + "functions": { + "type": "array", + "items": { "type": "string" } + }, + "coverage": { "type": "number", "minimum": 0, "maximum": 1 } + }, + "additionalProperties": false + } + } + }, + "additionalProperties": false + } + ] +} diff --git a/bench/reachability-benchmark/schemas/trace.schema.json b/bench/reachability-benchmark/schemas/trace.schema.json new file mode 100644 index 000000000..cbc89f4c4 --- /dev/null +++ b/bench/reachability-benchmark/schemas/trace.schema.json @@ -0,0 +1,42 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://stellaops.org/benchmark/reachability/trace.schema.json", + "title": "Reachability Benchmark Trace Artifact", + "type": "object", + "oneOf": [ + { + "required": ["entry", "path", "sink"], + "properties": { + "entry": { "type": "string" }, + "path": { + "type": "array", + "minItems": 1, + "items": { "type": "string" } + }, + "sink": { "type": "string" }, + "notes": { "type": "string" } + }, + "additionalProperties": false + }, + { + "required": ["events"], + "properties": { + "events": { + "type": "array", + "items": { + "type": "object", + "required": ["path", "type"], + "properties": { + "path": { "type": "string" }, + "type": { "type": "string" }, + "at": { "type": "integer", "minimum": 0 } + }, + "additionalProperties": false + } + }, + "notes": { "type": "string" } + }, + "additionalProperties": false + } + ] +}