up
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
devportal-offline / build-offline (push) Has been cancelled
Mirror Thin Bundle Sign & Verify / mirror-sign (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-11-29 11:37:00 +02:00
parent 3488b22c0c
commit 8f54ffa203
14 changed files with 685 additions and 4 deletions

View File

@@ -27,10 +27,10 @@ Deterministic, reproducible benchmark for reachability analysis tools.
## Quick Start (once populated)
```bash
# validate schemas
npm test ./schemas # or python -m pytest schemas
# schema sanity checks (offline)
python tools/validate.py all schemas/examples
# score a submission
# score a submission (coming in task 513-008)
cd tools/scorer
./rb-score --cases ../cases --truth ../benchmark/truth --submission ../benchmark/submissions/sample.json
```

View File

@@ -0,0 +1,22 @@
# Schemas
- `case.schema.yaml` — case descriptor (language, sinks, deterministic build/test, environment, optional inline ground truth summary).
- `entrypoints.schema.yaml` — declared entrypoints grouped by type (`http`, `cli`, `scheduled`, etc.).
- `truth.schema.json` — ground-truth labels + evidence per sink (`reachable`/`unreachable`/`unknown`).
- `submission.schema.json` — submission format (tool/run metadata, per-sink predictions, explanations).
- `examples/` — minimal valid examples for each schema.
## Validate quickly
```bash
# install minimal deps (offline-friendly, pinned)
python -m pip install -r ../tools/requirements.txt
# validate individual files
python ../tools/validate.py case examples/case.sample.yaml
python ../tools/validate.py entrypoints examples/entrypoints.sample.yaml
python ../tools/validate.py truth examples/truth.sample.json
python ../tools/validate.py submission examples/submission.sample.json
# or validate everything in one shot
python ../tools/validate.py all examples
```

View File

@@ -0,0 +1,145 @@
$schema: "https://json-schema.org/draft/2020-12/schema"
$id: "https://stellaops.org/benchmark/reachability/case.schema.yaml"
title: Reachability Benchmark Case Descriptor
type: object
required:
- id
- language
- project
- version
- sinks
- environment
- build
- test
properties:
id:
type: string
description: Unique, stable case identifier (e.g. js-express-blog:001)
pattern: "^[A-Za-z0-9._:-]+$"
language:
type: string
enum: [js, py, java, c]
project:
type: string
description: Short project name
version:
type: string
description: Semantic-ish version of the case contents
pattern: "^\\d+(\\.\\d+){0,2}(-[A-Za-z0-9._-]+)?$"
description:
type: string
repository:
type: string
format: uri
description: Upstream repo (if vendored); optional for in-repo cases
entrypoints:
type: array
items:
type: string
uniqueItems: true
sinks:
type: array
minItems: 1
items:
type: object
required: [id, path, kind, location]
additionalProperties: false
properties:
id:
type: string
pattern: "^[A-Za-z0-9._:-]+$"
path:
type: string
description: Fully-qualified function/method path for the sink
kind:
type: string
enum: [http, file, crypto, process, deserialization, custom]
location:
type: object
required: [file]
additionalProperties: false
properties:
file:
type: string
line:
type: integer
minimum: 1
notes:
type: string
environment:
type: object
required: [os_image]
additionalProperties: false
properties:
os_image:
type: string
description: Base image or OS identifier (e.g. ubuntu:24.04)
runtime:
type: object
description: Language/runtime versions
additionalProperties:
type: string
compiler:
type: string
source_date_epoch:
type: integer
minimum: 0
build:
type: object
required: [command, source_date_epoch]
additionalProperties: false
properties:
command:
type: string
description: Deterministic build command (invokes Dockerfile/build.sh)
source_date_epoch:
type: integer
minimum: 0
env:
type: object
additionalProperties: true
outputs:
type: object
additionalProperties: false
properties:
artifact_path:
type: string
sbom_path:
type: string
coverage_path:
type: string
traces_dir:
type: string
test:
type: object
required: [command]
additionalProperties: false
properties:
command:
type: string
description: Oracle test command producing coverage/traces
expected_coverage:
type: array
items:
type: string
expected_traces:
type: array
items:
type: string
env:
type: object
additionalProperties: true
ground_truth:
type: object
description: Optional inline truth summary (full truth lives in truth files)
additionalProperties: false
properties:
summary:
type: string
evidence_files:
type: array
items:
type: string
notes:
type: string
additionalProperties: false

View File

@@ -0,0 +1,41 @@
$schema: "https://json-schema.org/draft/2020-12/schema"
$id: "https://stellaops.org/benchmark/reachability/entrypoints.schema.yaml"
title: Reachability Case Entrypoints
type: object
required:
- case_id
- entries
properties:
case_id:
type: string
entries:
type: object
minProperties: 1
additionalProperties: false
patternProperties:
"^[a-z][a-z0-9_-]*$":
type: array
minItems: 1
items:
type: object
required: [id]
additionalProperties: false
properties:
id:
type: string
route:
type: string
method:
type: string
command:
type: string
schedule:
type: string
handler:
type: string
env:
type: object
additionalProperties: true
description:
type: string
additionalProperties: false

View File

@@ -0,0 +1,44 @@
id: "js-express-blog:001"
language: js
project: express-blog
version: "1.0.0"
description: Minimal blog API with an unsafe deserializer sink.
repository: "https://example.org/express-blog"
entrypoints:
- "POST /api/posts"
sinks:
- id: "Deserializer::parse"
path: "src/deserializer.js::parse"
kind: deserialization
location:
file: src/deserializer.js
line: 42
notes: "JSON.parse on user input without guards"
environment:
os_image: "ubuntu:24.04"
runtime:
node: "20.11.0"
source_date_epoch: 1730000000
build:
command: "./build/build.sh"
source_date_epoch: 1730000000
outputs:
artifact_path: outputs/binary.tar.gz
sbom_path: outputs/sbom.cdx.json
coverage_path: outputs/coverage.json
traces_dir: outputs/traces
env:
NODE_ENV: production
test:
command: "npm test"
expected_coverage:
- outputs/coverage.json
expected_traces:
- outputs/traces/traces.json
env:
NODE_ENV: test
ground_truth:
summary: "Unit test test_reachable_deserialization hits the sink"
evidence_files:
- truth/truth.yaml
notes: "FEATURE_JSON_ENABLED must be true for reachability"

View File

@@ -0,0 +1,17 @@
case_id: "js-express-blog:001"
entries:
http:
- id: "POST /api/posts"
route: "/api/posts"
method: "POST"
handler: "PostsController.create"
description: "Create a new post (hits deserializer)"
cli:
- id: "generate-report"
command: "node cli.js generate-report"
description: "Generates a report from posts"
scheduled:
- id: "daily-cleanup"
schedule: "0 3 * * *"
handler: "CleanupJob.run"
description: "Archives soft-deleted posts nightly"

View File

@@ -0,0 +1,46 @@
{
"version": "1.0.0",
"tool": {
"name": "sample-tool",
"version": "0.1.0"
},
"run": {
"commit": "abcd1234",
"platform": "ubuntu:24.04",
"time_s": 182.4,
"peak_mb": 3072
},
"cases": [
{
"case_id": "js-express-blog:001",
"sinks": [
{
"sink_id": "Deserializer::parse",
"prediction": "reachable",
"confidence": 0.88,
"explain": {
"entry": "POST /api/posts",
"path": [
"PostsController.create",
"PostsService.createFromJson",
"Deserializer.parse"
],
"guards": [
"process.env.FEATURE_JSON_ENABLED === 'true'"
]
},
"notes": "Observed via dynamic trace"
}
]
}
],
"artifacts": {
"sbom": "sha256:deadbeef",
"attestation": "sha256:cafebabe"
},
"submitter": {
"name": "Example Corp",
"organization": "Example",
"contact": "bench@example.org"
}
}

View File

@@ -0,0 +1,37 @@
{
"version": "1.0.0",
"cases": [
{
"case_id": "js-express-blog:001",
"case_version": "1.0.0",
"notes": "Baseline public case",
"sinks": [
{
"sink_id": "Deserializer::parse",
"label": "reachable",
"confidence": "high",
"dynamic_evidence": {
"covered_by_tests": [
"tests/test_reachable_deserialization.js::should_reach_sink"
],
"coverage_files": [
"outputs/coverage.json"
]
},
"static_evidence": {
"call_path": [
"POST /api/posts",
"PostsController.create",
"PostsService.createFromJson",
"Deserializer.parse"
]
},
"config_conditions": [
"process.env.FEATURE_JSON_ENABLED == 'true'"
],
"notes": "If FEATURE_JSON_ENABLED=false the path is unreachable"
}
]
}
]
}

View File

@@ -0,0 +1,104 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://stellaops.org/benchmark/reachability/submission.schema.json",
"title": "Reachability Benchmark Submission",
"type": "object",
"required": ["version", "tool", "run", "cases"],
"additionalProperties": false,
"properties": {
"version": {
"type": "string",
"enum": ["1.0.0"],
"description": "Submission schema version"
},
"tool": {
"type": "object",
"required": ["name", "version"],
"additionalProperties": false,
"properties": {
"name": {"type": "string"},
"version": {"type": "string"}
}
},
"run": {
"type": "object",
"required": ["platform"],
"additionalProperties": false,
"description": "Execution metadata for reproducibility",
"properties": {
"commit": {"type": "string"},
"platform": {"type": "string"},
"time_s": {"type": "number", "minimum": 0},
"peak_mb": {"type": "number", "minimum": 0}
}
},
"cases": {
"type": "array",
"minItems": 1,
"items": {
"type": "object",
"required": ["case_id", "sinks"],
"additionalProperties": false,
"properties": {
"case_id": {"type": "string"},
"sinks": {
"type": "array",
"minItems": 1,
"items": {
"type": "object",
"required": ["sink_id", "prediction"],
"additionalProperties": false,
"properties": {
"sink_id": {"type": "string"},
"prediction": {
"type": "string",
"enum": ["reachable", "unreachable"]
},
"confidence": {
"type": "number",
"minimum": 0,
"maximum": 1
},
"explain": {
"type": "object",
"additionalProperties": false,
"properties": {
"entry": {"type": "string"},
"path": {
"type": "array",
"items": {"type": "string"},
"minItems": 2
},
"guards": {
"type": "array",
"items": {"type": "string"},
"uniqueItems": true
}
}
},
"notes": {"type": "string"}
}
}
}
}
}
},
"artifacts": {
"type": "object",
"additionalProperties": false,
"properties": {
"sbom": {"type": "string"},
"attestation": {"type": "string"}
}
},
"submitter": {
"type": "object",
"properties": {
"name": {"type": "string"},
"organization": {"type": "string"},
"contact": {"type": "string", "format": "email"}
},
"additionalProperties": false
}
}
}

View File

@@ -0,0 +1,79 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://stellaops.org/benchmark/reachability/truth.schema.json",
"title": "Reachability Benchmark Truth Set",
"type": "object",
"required": ["version", "cases"],
"properties": {
"version": {"type": "string", "enum": ["1.0.0"]},
"cases": {
"type": "array",
"minItems": 1,
"items": {
"type": "object",
"required": ["case_id", "sinks"],
"additionalProperties": false,
"properties": {
"case_id": {"type": "string"},
"case_version": {"type": "string"},
"notes": {"type": "string"},
"sinks": {
"type": "array",
"minItems": 1,
"items": {
"type": "object",
"required": ["sink_id", "label"],
"additionalProperties": false,
"properties": {
"sink_id": {"type": "string"},
"label": {
"type": "string",
"enum": ["reachable", "unreachable", "unknown"]
},
"confidence": {
"type": "string",
"enum": ["high", "medium", "low"],
"default": "high"
},
"dynamic_evidence": {
"type": "object",
"additionalProperties": false,
"properties": {
"covered_by_tests": {
"type": "array",
"items": {"type": "string"},
"uniqueItems": true
},
"coverage_files": {
"type": "array",
"items": {"type": "string"},
"uniqueItems": true
}
}
},
"static_evidence": {
"type": "object",
"additionalProperties": false,
"properties": {
"call_path": {
"type": "array",
"items": {"type": "string"},
"minItems": 2
}
}
},
"config_conditions": {
"type": "array",
"items": {"type": "string"},
"uniqueItems": true
},
"notes": {"type": "string"}
}
}
}
}
}
}
},
"additionalProperties": false
}

View File

@@ -0,0 +1,2 @@
jsonschema==4.23.0
PyYAML==6.0.2

View File

@@ -0,0 +1,11 @@
# rb-score (placeholder)
Planned CLI to score reachability submissions against truth sets.
Future work (BENCH-SCORER-513-008):
- Validate submission against `schemas/submission.schema.json`.
- Validate truth against `schemas/truth.schema.json`.
- Compute precision/recall/F1, explainability score (0-3), runtime stats, determinism rate.
- Emit JSON report with stable ordering.
For now this folder is a stub; implementation will be added in task 513-008 once schemas stabilize.

View File

@@ -0,0 +1,132 @@
#!/usr/bin/env python3
"""Deterministic schema validator for reachability benchmark assets.
Usage examples:
python tools/validate.py case schemas/examples/case.sample.yaml
python tools/validate.py truth benchmark/truth/public.json
python tools/validate.py all schemas/examples
The script is offline-friendly and relies only on pinned deps from
`tools/requirements.txt`.
"""
from __future__ import annotations
import argparse
import json
from pathlib import Path
from typing import Iterable, Tuple
import yaml
from jsonschema import Draft202012Validator, FormatChecker
ROOT = Path(__file__).resolve().parent.parent
SCHEMAS = {
"case": ROOT / "schemas" / "case.schema.yaml",
"entrypoints": ROOT / "schemas" / "entrypoints.schema.yaml",
"truth": ROOT / "schemas" / "truth.schema.json",
"submission": ROOT / "schemas" / "submission.schema.json",
}
def load_yaml_or_json(path: Path):
text = path.read_text(encoding="utf-8")
if path.suffix.lower() in {".yaml", ".yml"}:
return yaml.safe_load(text)
return json.loads(text)
def load_schema(kind: str):
schema_path = SCHEMAS[kind]
return load_yaml_or_json(schema_path)
def validate_one(kind: str, payload_path: Path) -> Tuple[bool, Tuple[str, ...]]:
schema = load_schema(kind)
document = load_yaml_or_json(payload_path)
validator = Draft202012Validator(schema, format_checker=FormatChecker())
errors = sorted(validator.iter_errors(document), key=lambda e: (list(e.path), e.message))
if errors:
messages = tuple(
f"{payload_path}: {"/".join(str(p) for p in err.path) or '<root>'}: {err.message}"
for err in errors
)
return False, messages
return True, ()
def collect_all(directory: Path) -> Iterable[Tuple[str, Path]]:
mapping = {
"case": ("case",),
"entrypoints": ("entrypoints", "entrypoint"),
"truth": ("truth",),
"submission": ("submission",),
}
for path in sorted(directory.rglob("*")):
if not path.is_file():
continue
stem_lower = path.stem.lower()
for kind, tokens in mapping.items():
if any(token in stem_lower for token in tokens):
yield kind, path
break
def parse_args():
parser = argparse.ArgumentParser(description="Validate reachability benchmark files against schemas.")
parser.add_argument(
"kind",
choices=["case", "entrypoints", "truth", "submission", "all"],
help="Which schema to validate against or 'all' to auto-detect in a directory",
)
parser.add_argument(
"paths",
nargs="+",
help="File(s) to validate. If kind=all, provide one or more directories to scan.",
)
return parser.parse_args()
def main() -> int:
args = parse_args()
failures: list[str] = []
if args.kind == "all":
for base in args.paths:
base_path = Path(base)
if not base_path.exists():
failures.append(f"{base}: path not found")
continue
for kind, path in collect_all(base_path):
ok, messages = validate_one(kind, path)
if ok:
print(f"OK [{kind}] {path}")
else:
failures.extend(messages)
if failures:
for msg in failures:
print(f"FAIL {msg}")
return 1
return 0
# Single schema mode
for path_str in args.paths:
path = Path(path_str)
if not path.exists():
failures.append(f"{path}: path not found")
continue
ok, messages = validate_one(args.kind, path)
if ok:
print(f"OK [{args.kind}] {path}")
else:
failures.extend(messages)
if failures:
for msg in failures:
print(f"FAIL {msg}")
return 1
return 0
if __name__ == "__main__":
raise SystemExit(main())