feat: Implement Filesystem and MongoDB provenance writers for PackRun execution context

- Added `FilesystemPackRunProvenanceWriter` to write provenance manifests to the filesystem. - Introduced `MongoPackRunArtifactReader` to read artifacts from MongoDB. - Created `MongoPackRunProvenanceWriter` to store provenance manifests in MongoDB. - Developed unit tests for filesystem and MongoDB provenance writers. - Established `ITimelineEventStore` and `ITimelineIngestionService` interfaces for timeline event handling. - Implemented `TimelineIngestionService` to validate and persist timeline events with hashing. - Created PostgreSQL schema and migration scripts for timeline indexing. - Added dependency injection support for timeline indexer services. - Developed tests for timeline ingestion and schema validation.
2025-11-30 15:38:14 +02:00
parent 8f54ffa203
commit 17d45a6d30
276 changed files with 8618 additions and 688 deletions
--- a/bench/reachability-benchmark/README.md
+++ b/bench/reachability-benchmark/README.md
@@ -17,6 +17,24 @@ Deterministic, reproducible benchmark for reachability analysis tools.
 - `ci/` — deterministic CI workflows and scripts.
 - `website/` — static site (leaderboard/docs/downloads).

+Sample cases added (JS track):
+- `cases/js/unsafe-eval` (reachable sink) → `benchmark/truth/js-unsafe-eval.json`.
+- `cases/js/guarded-eval` (unreachable by default) → `benchmark/truth/js-guarded-eval.json`.
+- `cases/js/express-eval` (admin eval reachable) → `benchmark/truth/js-express-eval.json`.
+- `cases/js/express-guarded` (admin eval gated by env) → `benchmark/truth/js-express-guarded.json`.
+- `cases/js/fastify-template` (template rendering reachable) → `benchmark/truth/js-fastify-template.json`.
+
+Sample cases added (Python track):
+- `cases/py/unsafe-exec` (reachable eval) → `benchmark/truth/py-unsafe-exec.json`.
+- `cases/py/guarded-exec` (unreachable when FEATURE_ENABLE != 1) → `benchmark/truth/py-guarded-exec.json`.
+- `cases/py/flask-template` (template rendering reachable) → `benchmark/truth/py-flask-template.json`.
+- `cases/py/fastapi-guarded` (unreachable unless ALLOW_EXEC=true) → `benchmark/truth/py-fastapi-guarded.json`.
+- `cases/py/django-ssti` (template rendering reachable, autoescape off) → `benchmark/truth/py-django-ssti.json`.
+
+Sample cases added (Java track):
+- `cases/java/spring-deserialize` (reachable Java deserialization) → `benchmark/truth/java-spring-deserialize.json`.
+- `cases/java/spring-guarded` (deserialization unreachable unless ALLOW_DESER=true) → `benchmark/truth/java-spring-guarded.json`.
+
 ## Determinism & Offline Rules
 - No network during build/test; pin images/deps; set `SOURCE_DATE_EPOCH`.
 - Sort file lists; stable JSON/YAML emitters; fixed RNG seeds.
--- a/bench/reachability-benchmark/benchmark/truth/java-spring-deserialize.json
+++ b/bench/reachability-benchmark/benchmark/truth/java-spring-deserialize.json
@@ -0,0 +1,32 @@
+{
+  "version": "1.0.0",
+  "cases": [
+    {
+      "case_id": "java-spring-deserialize:201",
+      "case_version": "1.0.0",
+      "notes": "Java deserialization sink reachable",
+      "sinks": [
+        {
+          "sink_id": "JavaDeserialize::handleRequest",
+          "label": "reachable",
+          "confidence": "high",
+          "dynamic_evidence": {
+            "covered_by_tests": [
+              "src/AppTest.java"
+            ],
+            "coverage_files": []
+          },
+          "static_evidence": {
+            "call_path": [
+              "POST /api/upload",
+              "App.handleRequest",
+              "ObjectInputStream.readObject"
+            ]
+          },
+          "config_conditions": [],
+          "notes": "No guard; base64 payload deserialized"
+        }
+      ]
+    }
+  ]
+}
--- a/bench/reachability-benchmark/benchmark/truth/java-spring-guarded.json
+++ b/bench/reachability-benchmark/benchmark/truth/java-spring-guarded.json
@@ -0,0 +1,29 @@
+{
+  "version": "1.0.0",
+  "cases": [
+    {
+      "case_id": "java-spring-guarded:202",
+      "case_version": "1.0.0",
+      "notes": "Deserialization unreachable by default",
+      "sinks": [
+        {
+          "sink_id": "JavaDeserializeGuarded::handleRequest",
+          "label": "unreachable",
+          "confidence": "high",
+          "dynamic_evidence": {
+            "covered_by_tests": ["src/AppTest.java"],
+            "coverage_files": []
+          },
+          "static_evidence": {
+            "call_path": [
+              "POST /api/upload",
+              "App.handleRequest",
+              "guard: ALLOW_DESER!=true"
+            ]
+          },
+          "config_conditions": ["ALLOW_DESER == 'true'"]
+        }
+      ]
+    }
+  ]
+}
--- a/bench/reachability-benchmark/benchmark/truth/js-express-eval.json
+++ b/bench/reachability-benchmark/benchmark/truth/js-express-eval.json
@@ -0,0 +1,34 @@
+{
+  "version": "1.0.0",
+  "cases": [
+    {
+      "case_id": "js-express-eval:003",
+      "case_version": "1.0.0",
+      "notes": "Admin eval reachable",
+      "sinks": [
+        {
+          "sink_id": "ExpressEval::exec",
+          "label": "reachable",
+          "confidence": "high",
+          "dynamic_evidence": {
+            "covered_by_tests": [
+              "tests/test_reach.js"
+            ],
+            "coverage_files": [
+              "outputs/coverage.json"
+            ]
+          },
+          "static_evidence": {
+            "call_path": [
+              "POST /api/admin/exec",
+              "createServer.exec",
+              "eval(code)"
+            ]
+          },
+          "config_conditions": [],
+          "notes": "No guard on admin path"
+        }
+      ]
+    }
+  ]
+}
--- a/bench/reachability-benchmark/benchmark/truth/js-express-guarded.json
+++ b/bench/reachability-benchmark/benchmark/truth/js-express-guarded.json
@@ -0,0 +1,36 @@
+{
+  "version": "1.0.0",
+  "cases": [
+    {
+      "case_id": "js-express-guarded:004",
+      "case_version": "1.0.0",
+      "notes": "Admin exec unreachable when ALLOW_EXEC!=true",
+      "sinks": [
+        {
+          "sink_id": "ExpressGuarded::exec",
+          "label": "unreachable",
+          "confidence": "high",
+          "dynamic_evidence": {
+            "covered_by_tests": [
+              "tests/test_unreachable.js"
+            ],
+            "coverage_files": [
+              "outputs/coverage.json"
+            ]
+          },
+          "static_evidence": {
+            "call_path": [
+              "POST /api/admin/exec",
+              "createServer.exec",
+              "guard: ALLOW_EXEC!=true"
+            ]
+          },
+          "config_conditions": [
+            "ALLOW_EXEC == 'true'"
+          ],
+          "notes": "Only reachable when ALLOW_EXEC=true"
+        }
+      ]
+    }
+  ]
+}
--- a/bench/reachability-benchmark/benchmark/truth/js-fastify-template.json
+++ b/bench/reachability-benchmark/benchmark/truth/js-fastify-template.json
@@ -0,0 +1,34 @@
+{
+  "version": "1.0.0",
+  "cases": [
+    {
+      "case_id": "js-fastify-template:005",
+      "case_version": "1.0.0",
+      "notes": "Template rendering reachable",
+      "sinks": [
+        {
+          "sink_id": "FastifyTemplate::render",
+          "label": "reachable",
+          "confidence": "high",
+          "dynamic_evidence": {
+            "covered_by_tests": [
+              "tests/test_reach.js"
+            ],
+            "coverage_files": [
+              "outputs/coverage.json"
+            ]
+          },
+          "static_evidence": {
+            "call_path": [
+              "POST /api/render",
+              "createServer.render",
+              "template replace"
+            ]
+          },
+          "config_conditions": [],
+          "notes": "Simple template replace used as sink"
+        }
+      ]
+    }
+  ]
+}
--- a/bench/reachability-benchmark/benchmark/truth/js-guarded-eval.json
+++ b/bench/reachability-benchmark/benchmark/truth/js-guarded-eval.json
@@ -0,0 +1,36 @@
+{
+  "version": "1.0.0",
+  "cases": [
+    {
+      "case_id": "js-guarded-eval:002",
+      "case_version": "1.0.0",
+      "notes": "Eval sink guarded by FEATURE_ENABLE; unreachable when flag off",
+      "sinks": [
+        {
+          "sink_id": "GuardedEval::handleRequest",
+          "label": "unreachable",
+          "confidence": "high",
+          "dynamic_evidence": {
+            "covered_by_tests": [
+              "tests/test_unreachable.js"
+            ],
+            "coverage_files": [
+              "outputs/coverage.json"
+            ]
+          },
+          "static_evidence": {
+            "call_path": [
+              "POST /api/exec",
+              "app.js::handleRequest",
+              "guard: FEATURE_ENABLE != 1"
+            ]
+          },
+          "config_conditions": [
+            "FEATURE_ENABLE == '1'"
+          ],
+          "notes": "Sink only executes when FEATURE_ENABLE=1"
+        }
+      ]
+    }
+  ]
+}
--- a/bench/reachability-benchmark/benchmark/truth/js-unsafe-eval.json
+++ b/bench/reachability-benchmark/benchmark/truth/js-unsafe-eval.json
@@ -0,0 +1,34 @@
+{
+  "version": "1.0.0",
+  "cases": [
+    {
+      "case_id": "js-unsafe-eval:001",
+      "case_version": "1.0.0",
+      "notes": "Unsafe eval sink reachable via POST /api/exec",
+      "sinks": [
+        {
+          "sink_id": "UnsafeEval::handleRequest",
+          "label": "reachable",
+          "confidence": "high",
+          "dynamic_evidence": {
+            "covered_by_tests": [
+              "tests/test_reach.js"
+            ],
+            "coverage_files": [
+              "outputs/coverage.json"
+            ]
+          },
+          "static_evidence": {
+            "call_path": [
+              "POST /api/exec",
+              "app.js::handleRequest",
+              "eval(code)"
+            ]
+          },
+          "config_conditions": [],
+          "notes": "No guards; direct eval on user input"
+        }
+      ]
+    }
+  ]
+}
--- a/bench/reachability-benchmark/benchmark/truth/py-django-ssti.json
+++ b/bench/reachability-benchmark/benchmark/truth/py-django-ssti.json
@@ -0,0 +1,34 @@
+{
+  "version": "1.0.0",
+  "cases": [
+    {
+      "case_id": "py-django-ssti:105",
+      "case_version": "1.0.0",
+      "notes": "Template rendering reachable (autoescape off)",
+      "sinks": [
+        {
+          "sink_id": "DjangoSSTI::render",
+          "label": "reachable",
+          "confidence": "high",
+          "dynamic_evidence": {
+            "covered_by_tests": [
+              "tests/test_reach.py"
+            ],
+            "coverage_files": [
+              "outputs/coverage.json"
+            ]
+          },
+          "static_evidence": {
+            "call_path": [
+              "POST /render",
+              "app.handle_request",
+              "render"
+            ]
+          },
+          "config_conditions": [],
+          "notes": "Autoescape disabled"
+        }
+      ]
+    }
+  ]
+}
--- a/bench/reachability-benchmark/benchmark/truth/py-fastapi-guarded.json
+++ b/bench/reachability-benchmark/benchmark/truth/py-fastapi-guarded.json
@@ -0,0 +1,36 @@
+{
+  "version": "1.0.0",
+  "cases": [
+    {
+      "case_id": "py-fastapi-guarded:104",
+      "case_version": "1.0.0",
+      "notes": "Eval unreachable unless ALLOW_EXEC=true",
+      "sinks": [
+        {
+          "sink_id": "FastApiGuarded::handle_request",
+          "label": "unreachable",
+          "confidence": "high",
+          "dynamic_evidence": {
+            "covered_by_tests": [
+              "tests/test_unreachable.py"
+            ],
+            "coverage_files": [
+              "outputs/coverage.json"
+            ]
+          },
+          "static_evidence": {
+            "call_path": [
+              "POST /exec",
+              "app.handle_request",
+              "guard: ALLOW_EXEC!=true"
+            ]
+          },
+          "config_conditions": [
+            "ALLOW_EXEC == 'true'"
+          ],
+          "notes": "Feature flag blocks sink by default"
+        }
+      ]
+    }
+  ]
+}
--- a/bench/reachability-benchmark/benchmark/truth/py-flask-template.json
+++ b/bench/reachability-benchmark/benchmark/truth/py-flask-template.json
@@ -0,0 +1,34 @@
+{
+  "version": "1.0.0",
+  "cases": [
+    {
+      "case_id": "py-flask-template:103",
+      "case_version": "1.0.0",
+      "notes": "Template rendering reachable",
+      "sinks": [
+        {
+          "sink_id": "FlaskTemplate::render",
+          "label": "reachable",
+          "confidence": "high",
+          "dynamic_evidence": {
+            "covered_by_tests": [
+              "tests/test_reach.py"
+            ],
+            "coverage_files": [
+              "outputs/coverage.json"
+            ]
+          },
+          "static_evidence": {
+            "call_path": [
+              "POST /render",
+              "app.handle_request",
+              "render"
+            ]
+          },
+          "config_conditions": [],
+          "notes": "Simple template placeholder replacement"
+        }
+      ]
+    }
+  ]
+}
--- a/bench/reachability-benchmark/benchmark/truth/py-guarded-exec.json
+++ b/bench/reachability-benchmark/benchmark/truth/py-guarded-exec.json
@@ -0,0 +1,36 @@
+{
+  "version": "1.0.0",
+  "cases": [
+    {
+      "case_id": "py-guarded-exec:102",
+      "case_version": "1.0.0",
+      "notes": "Eval unreachable unless FEATURE_ENABLE=1",
+      "sinks": [
+        {
+          "sink_id": "PyGuardedExec::handle_request",
+          "label": "unreachable",
+          "confidence": "high",
+          "dynamic_evidence": {
+            "covered_by_tests": [
+              "tests/test_unreachable.py"
+            ],
+            "coverage_files": [
+              "outputs/coverage.json"
+            ]
+          },
+          "static_evidence": {
+            "call_path": [
+              "POST /api/exec",
+              "app.handle_request",
+              "guard: FEATURE_ENABLE != 1"
+            ]
+          },
+          "config_conditions": [
+            "FEATURE_ENABLE == '1'"
+          ],
+          "notes": "Feature flag required"
+        }
+      ]
+    }
+  ]
+}
--- a/bench/reachability-benchmark/benchmark/truth/py-unsafe-exec.json
+++ b/bench/reachability-benchmark/benchmark/truth/py-unsafe-exec.json
@@ -0,0 +1,34 @@
+{
+  "version": "1.0.0",
+  "cases": [
+    {
+      "case_id": "py-unsafe-exec:101",
+      "case_version": "1.0.0",
+      "notes": "Eval reachable",
+      "sinks": [
+        {
+          "sink_id": "PyUnsafeExec::handle_request",
+          "label": "reachable",
+          "confidence": "high",
+          "dynamic_evidence": {
+            "covered_by_tests": [
+              "tests/test_reach.py"
+            ],
+            "coverage_files": [
+              "outputs/coverage.json"
+            ]
+          },
+          "static_evidence": {
+            "call_path": [
+              "POST /api/exec",
+              "app.handle_request",
+              "eval(code)"
+            ]
+          },
+          "config_conditions": [],
+          "notes": "No guards"
+        }
+      ]
+    }
+  ]
+}
--- a/bench/reachability-benchmark/cases/java/spring-deserialize/case.yaml
+++ b/bench/reachability-benchmark/cases/java/spring-deserialize/case.yaml
@@ -0,0 +1,38 @@
+id: "java-spring-deserialize:201"
+language: java
+project: spring-deserialize
+version: "1.0.0"
+description: "Java deserialization sink reachable via POST /api/upload"
+entrypoints:
+  - "POST /api/upload"
+sinks:
+  - id: "JavaDeserialize::handleRequest"
+    path: "bench.reachability.App.handleRequest"
+    kind: "custom"
+    location:
+      file: src/App.java
+      line: 9
+    notes: "java.io.ObjectInputStream on user-controlled payload"
+environment:
+  os_image: "eclipse-temurin:21-jdk"
+  runtime:
+    java: "21"
+  source_date_epoch: 1730000000
+build:
+  command: "./build/build.sh"
+  source_date_epoch: 1730000000
+  outputs:
+    artifact_path: outputs/binary.tar.gz
+    sbom_path: outputs/sbom.cdx.json
+    coverage_path: outputs/coverage.json
+    traces_dir: outputs/traces
+test:
+  command: "./build/build.sh"
+  expected_coverage: []
+  expected_traces: []
+  env:
+    JAVA_TOOL_OPTIONS: "-ea"
+ground_truth:
+  summary: "Deserialization reachable"
+  evidence_files:
+    - "../benchmark/truth/java-spring-deserialize.json"
--- a/bench/reachability-benchmark/cases/java/spring-deserialize/entrypoints.yaml
+++ b/bench/reachability-benchmark/cases/java/spring-deserialize/entrypoints.yaml
@@ -0,0 +1,8 @@
+case_id: "java-spring-deserialize:201"
+entries:
+  http:
+    - id: "POST /api/upload"
+      route: "/api/upload"
+      method: "POST"
+      handler: "App.handleRequest"
+      description: "Binary payload base64-deserialized"
--- a/bench/reachability-benchmark/cases/java/spring-deserialize/pom.xml
+++ b/bench/reachability-benchmark/cases/java/spring-deserialize/pom.xml
@@ -0,0 +1,12 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>org.stellaops.bench</groupId>
+  <artifactId>spring-deserialize</artifactId>
+  <version>1.0.0</version>
+  <packaging>jar</packaging>
+  <properties>
+    <maven.compiler.source>17</maven.compiler.source>
+    <maven.compiler.target>17</maven.compiler.target>
+  </properties>
+</project>
--- a/bench/reachability-benchmark/cases/java/spring-deserialize/src/App.java
+++ b/bench/reachability-benchmark/cases/java/spring-deserialize/src/App.java
@@ -0,0 +1,26 @@
+package bench.reachability;
+
+import java.util.Map;
+import java.util.Base64;
+import java.io.*;
+
+public class App {
+    // Unsafe Java deserialization sink (reachable)
+    public static Response handleRequest(Map<String, String> body) {
+        String payload = body.get("payload");
+        if (payload == null) {
+            return new Response(400, "bad request");
+        }
+        try {
+            byte[] data = Base64.getDecoder().decode(payload);
+            ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(data));
+            Object obj = ois.readObject();
+            ois.close();
+            return new Response(200, obj.toString());
+        } catch (Exception ex) {
+            return new Response(500, ex.getClass().getSimpleName());
+        }
+    }
+
+    public record Response(int status, String body) {}
+}
--- a/bench/reachability-benchmark/cases/java/spring-deserialize/src/AppTest.java
+++ b/bench/reachability-benchmark/cases/java/spring-deserialize/src/AppTest.java
@@ -0,0 +1,30 @@
+package bench.reachability;
+
+import java.io.*;
+import java.util.*;
+import java.util.Base64;
+
+// Simple hand-rolled test harness (no external deps) using Java assertions.
+public class AppTest {
+    private static String serialize(Object obj) throws IOException {
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        ObjectOutputStream oos = new ObjectOutputStream(bos);
+        oos.writeObject(obj);
+        oos.close();
+        return Base64.getEncoder().encodeToString(bos.toByteArray());
+    }
+
+    public static void main(String[] args) throws Exception {
+        String payload = serialize("hello");
+        Map<String, String> body = Map.of("payload", payload);
+        var res = App.handleRequest(body);
+        assert res.status() == 200 : "status";
+        assert res.body().equals("hello") : "body";
+        // Emit a simple marker file for trace/coverage stand-ins
+        File outDir = new File("outputs");
+        outDir.mkdirs();
+        try (FileWriter fw = new FileWriter(new File(outDir, "SINK_REACHED"))) {
+            fw.write("true");
+        }
+    }
+}
--- a/bench/reachability-benchmark/cases/java/spring-guarded/case.yaml
+++ b/bench/reachability-benchmark/cases/java/spring-guarded/case.yaml
@@ -0,0 +1,38 @@
+id: "java-spring-guarded:202"
+language: java
+project: spring-guarded
+version: "1.0.0"
+description: "Java deserialization guarded by ALLOW_DESER flag (unreachable by default)"
+entrypoints:
+  - "POST /api/upload"
+sinks:
+  - id: "JavaDeserializeGuarded::handleRequest"
+    path: "bench.reachability.App.handleRequest"
+    kind: "custom"
+    location:
+      file: src/App.java
+      line: 9
+    notes: "ObjectInputStream gated by ALLOW_DESER"
+environment:
+  os_image: "eclipse-temurin:21-jdk"
+  runtime:
+    java: "21"
+  source_date_epoch: 1730000000
+build:
+  command: "./build/build.sh"
+  source_date_epoch: 1730000000
+  outputs:
+    artifact_path: outputs/binary.tar.gz
+    sbom_path: outputs/sbom.cdx.json
+    coverage_path: outputs/coverage.json
+    traces_dir: outputs/traces
+test:
+  command: "./build/build.sh"
+  expected_coverage: []
+  expected_traces: []
+  env:
+    JAVA_TOOL_OPTIONS: "-ea"
+ground_truth:
+  summary: "Guard blocks deserialization unless ALLOW_DESER=true"
+  evidence_files:
+    - "../benchmark/truth/java-spring-guarded.json"
--- a/bench/reachability-benchmark/cases/java/spring-guarded/entrypoints.yaml
+++ b/bench/reachability-benchmark/cases/java/spring-guarded/entrypoints.yaml
@@ -0,0 +1,8 @@
+case_id: "java-spring-guarded:202"
+entries:
+  http:
+    - id: "POST /api/upload"
+      route: "/api/upload"
+      method: "POST"
+      handler: "App.handleRequest"
+      description: "Base64 payload deserialization guarded by ALLOW_DESER"
--- a/bench/reachability-benchmark/cases/java/spring-guarded/pom.xml
+++ b/bench/reachability-benchmark/cases/java/spring-guarded/pom.xml
@@ -0,0 +1,12 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>org.stellaops.bench</groupId>
+  <artifactId>spring-guarded</artifactId>
+  <version>1.0.0</version>
+  <packaging>jar</packaging>
+  <properties>
+    <maven.compiler.source>17</maven.compiler.source>
+    <maven.compiler.target>17</maven.compiler.target>
+  </properties>
+</project>
--- a/bench/reachability-benchmark/cases/java/spring-guarded/src/App.java
+++ b/bench/reachability-benchmark/cases/java/spring-guarded/src/App.java
@@ -0,0 +1,29 @@
+package bench.reachability;
+
+import java.util.Map;
+import java.util.Base64;
+import java.io.*;
+
+public class App {
+    // Deserialization sink guarded by feature flag
+    public static Response handleRequest(Map<String, String> body, Map<String, String> env) {
+        if (!"true".equals(env.getOrDefault("ALLOW_DESER", "false"))) {
+            return new Response(403, "forbidden");
+        }
+        String payload = body.get("payload");
+        if (payload == null) {
+            return new Response(400, "bad request");
+        }
+        try {
+            byte[] data = Base64.getDecoder().decode(payload);
+            ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(data));
+            Object obj = ois.readObject();
+            ois.close();
+            return new Response(200, obj.toString());
+        } catch (Exception ex) {
+            return new Response(500, ex.getClass().getSimpleName());
+        }
+    }
+
+    public record Response(int status, String body) {}
+}
--- a/bench/reachability-benchmark/cases/java/spring-guarded/src/AppTest.java
+++ b/bench/reachability-benchmark/cases/java/spring-guarded/src/AppTest.java
@@ -0,0 +1,29 @@
+package bench.reachability;
+
+import java.io.*;
+import java.util.*;
+import java.util.Base64;
+
+public class AppTest {
+    private static String serialize(Object obj) throws IOException {
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        ObjectOutputStream oos = new ObjectOutputStream(bos);
+        oos.writeObject(obj);
+        oos.close();
+        return Base64.getEncoder().encodeToString(bos.toByteArray());
+    }
+
+    public static void main(String[] args) throws Exception {
+        String payload = serialize("hi");
+        Map<String, String> body = Map.of("payload", payload);
+        Map<String, String> env = Map.of("ALLOW_DESER", "false");
+        var res = App.handleRequest(body, env);
+        assert res.status() == 403 : "status";
+        assert res.body().equals("forbidden") : "body";
+        File outDir = new File("outputs");
+        outDir.mkdirs();
+        try (FileWriter fw = new FileWriter(new File(outDir, "SINK_BLOCKED"))) {
+            fw.write("true");
+        }
+    }
+}
--- a/bench/reachability-benchmark/cases/js/express-eval/case.yaml
+++ b/bench/reachability-benchmark/cases/js/express-eval/case.yaml
@@ -0,0 +1,38 @@
+id: "js-express-eval:003"
+language: js
+project: express-eval
+version: "1.0.0"
+description: "Admin exec endpoint evaluates user code"
+entrypoints:
+  - "POST /api/admin/exec"
+sinks:
+  - id: "ExpressEval::exec"
+    path: "src/app.js::createServer"
+    kind: "process"
+    location:
+      file: src/app.js
+      line: 17
+    notes: "eval(code) on admin path"
+environment:
+  os_image: "node:20-alpine"
+  runtime:
+    node: "20.11.0"
+  source_date_epoch: 1730000000
+build:
+  command: "./build/build.sh"
+  source_date_epoch: 1730000000
+  outputs:
+    artifact_path: outputs/binary.tar.gz
+    sbom_path: outputs/sbom.cdx.json
+    coverage_path: outputs/coverage.json
+    traces_dir: outputs/traces
+test:
+  command: "./tests/run-tests.sh"
+  expected_coverage:
+    - outputs/coverage.json
+  expected_traces:
+    - outputs/traces/traces.json
+ground_truth:
+  summary: "Admin exec endpoint reachable and executes eval"
+  evidence_files:
+    - "../benchmark/truth/js-express-eval.json"
--- a/bench/reachability-benchmark/cases/js/express-eval/entrypoints.yaml
+++ b/bench/reachability-benchmark/cases/js/express-eval/entrypoints.yaml
@@ -0,0 +1,8 @@
+case_id: "js-express-eval:003"
+entries:
+  http:
+    - id: "POST /api/admin/exec"
+      route: "/api/admin/exec"
+      method: "POST"
+      handler: "createServer.exec"
+      description: "Admin-only exec (reachable)"
--- a/bench/reachability-benchmark/cases/js/express-eval/package.json
+++ b/bench/reachability-benchmark/cases/js/express-eval/package.json
@@ -0,0 +1,9 @@
+{
+  "name": "rb-case-express-eval",
+  "version": "1.0.0",
+  "description": "Reachability benchmark case: express-like admin eval endpoint",
+  "license": "Apache-2.0",
+  "scripts": {
+    "test": "./tests/run-tests.sh"
+  }
+}
--- a/bench/reachability-benchmark/cases/js/express-eval/src/app.js
+++ b/bench/reachability-benchmark/cases/js/express-eval/src/app.js
@@ -0,0 +1,34 @@
+'use strict';
+
+// Minimal express-like router.
+function makeApp() {
+  const routes = {};
+  return {
+    post(path, handler) {
+      routes[`POST ${path}`] = handler;
+    },
+    handle(method, path, req, res) {
+      const key = `${method} ${path}`;
+      if (routes[key]) {
+        return routes[key](req, res);
+      }
+      return { status: 404, body: 'not found' };
+    }
+  };
+}
+
+function createServer() {
+  const app = makeApp();
+  app.post('/api/admin/exec', (req) => {
+    if (!req || typeof req.body?.code !== 'string') {
+      return { status: 400, body: 'bad request' };
+    }
+    // Sink: eval on admin endpoint (reachable)
+    // eslint-disable-next-line no-eval
+    const result = eval(req.body.code);
+    return { status: 200, body: String(result) };
+  });
+  return app;
+}
+
+module.exports = { createServer };
--- a/bench/reachability-benchmark/cases/js/express-eval/tests/run-tests.sh
+++ b/bench/reachability-benchmark/cases/js/express-eval/tests/run-tests.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "$(dirname "$0")"
+export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
+export TZ=UTC
+export LC_ALL=C
+node test_reach.js
--- a/bench/reachability-benchmark/cases/js/express-eval/tests/test_reach.js
+++ b/bench/reachability-benchmark/cases/js/express-eval/tests/test_reach.js
@@ -0,0 +1,54 @@
+'use strict';
+
+const assert = require('assert');
+const fs = require('fs');
+const path = require('path');
+const { createServer } = require('../src/app');
+
+const OUT_DIR = path.resolve(__dirname, '../outputs');
+const TRACE_DIR = path.join(OUT_DIR, 'traces');
+const COVERAGE_FILE = path.join(OUT_DIR, 'coverage.json');
+const TRACE_FILE = path.join(TRACE_DIR, 'traces.json');
+
+function ensureDirs() {
+  fs.mkdirSync(OUT_DIR, { recursive: true });
+  fs.mkdirSync(TRACE_DIR, { recursive: true });
+}
+
+function recordTrace(entry, pathNodes) {
+  fs.writeFileSync(
+    TRACE_FILE,
+    JSON.stringify({
+      entry,
+      path: pathNodes,
+      sink: 'ExpressEval::exec',
+      notes: 'Admin exec reached'
+    }, null, 2)
+  );
+}
+
+function recordCoverage(filePath, lines) {
+  fs.writeFileSync(
+    COVERAGE_FILE,
+    JSON.stringify({
+      files: {
+        [filePath]: {
+          lines_covered: lines,
+          lines_total: 40
+        }
+      }
+    }, null, 2)
+  );
+}
+
+(function main() {
+  ensureDirs();
+  const app = createServer();
+  const res = app.handle('POST', '/api/admin/exec', { body: { code: '21*2' } });
+  assert.strictEqual(res.status, 200);
+  assert.strictEqual(res.body, '42');
+
+  recordTrace('POST /api/admin/exec', ['app.js::createServer', 'handler', 'eval(code)']);
+  recordCoverage('src/app.js', [5, 6, 7, 13, 18, 19]);
+  fs.writeFileSync(path.join(OUT_DIR, 'SINK_REACHED'), 'true');
+})();
--- a/bench/reachability-benchmark/cases/js/express-guarded/case.yaml
+++ b/bench/reachability-benchmark/cases/js/express-guarded/case.yaml
@@ -0,0 +1,38 @@
+id: "js-express-guarded:004"
+language: js
+project: express-guarded
+version: "1.0.0"
+description: "Admin exec guarded by ALLOW_EXEC flag; unreachable by default"
+entrypoints:
+  - "POST /api/admin/exec"
+sinks:
+  - id: "ExpressGuarded::exec"
+    path: "src/app.js::createServer"
+    kind: "process"
+    location:
+      file: src/app.js
+      line: 16
+    notes: "eval(code) gated by ALLOW_EXEC"
+environment:
+  os_image: "node:20-alpine"
+  runtime:
+    node: "20.11.0"
+  source_date_epoch: 1730000000
+build:
+  command: "./build/build.sh"
+  source_date_epoch: 1730000000
+  outputs:
+    artifact_path: outputs/binary.tar.gz
+    sbom_path: outputs/sbom.cdx.json
+    coverage_path: outputs/coverage.json
+    traces_dir: outputs/traces
+test:
+  command: "./tests/run-tests.sh"
+  expected_coverage:
+    - outputs/coverage.json
+  expected_traces:
+    - outputs/traces/traces.json
+ground_truth:
+  summary: "Guard prevents sink unless ALLOW_EXEC=true"
+  evidence_files:
+    - "../benchmark/truth/js-express-guarded.json"
--- a/bench/reachability-benchmark/cases/js/express-guarded/entrypoints.yaml
+++ b/bench/reachability-benchmark/cases/js/express-guarded/entrypoints.yaml
@@ -0,0 +1,8 @@
+case_id: "js-express-guarded:004"
+entries:
+  http:
+    - id: "POST /api/admin/exec"
+      route: "/api/admin/exec"
+      method: "POST"
+      handler: "createServer.exec"
+      description: "Admin exec blocked unless ALLOW_EXEC=true"
--- a/bench/reachability-benchmark/cases/js/express-guarded/package.json
+++ b/bench/reachability-benchmark/cases/js/express-guarded/package.json
@@ -0,0 +1,9 @@
+{
+  "name": "rb-case-express-guarded",
+  "version": "1.0.0",
+  "description": "Reachability benchmark case: express-like admin exec guarded by env flag",
+  "license": "Apache-2.0",
+  "scripts": {
+    "test": "./tests/run-tests.sh"
+  }
+}
--- a/bench/reachability-benchmark/cases/js/express-guarded/src/app.js
+++ b/bench/reachability-benchmark/cases/js/express-guarded/src/app.js
@@ -0,0 +1,33 @@
+'use strict';
+
+function makeApp() {
+  const routes = {};
+  return {
+    post(path, handler) {
+      routes[`POST ${path}`] = handler;
+    },
+    handle(method, path, req) {
+      const key = `${method} ${path}`;
+      if (routes[key]) return routes[key](req);
+      return { status: 404, body: 'not found' };
+    }
+  };
+}
+
+function createServer() {
+  const app = makeApp();
+  app.post('/api/admin/exec', (req) => {
+    if (req?.env?.ALLOW_EXEC !== 'true') {
+      return { status: 403, body: 'forbidden' };
+    }
+    if (typeof req?.body?.code !== 'string') {
+      return { status: 400, body: 'bad request' };
+    }
+    // eslint-disable-next-line no-eval
+    const result = eval(req.body.code);
+    return { status: 200, body: String(result) };
+  });
+  return app;
+}
+
+module.exports = { createServer };
--- a/bench/reachability-benchmark/cases/js/express-guarded/tests/run-tests.sh
+++ b/bench/reachability-benchmark/cases/js/express-guarded/tests/run-tests.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "$(dirname "$0")"
+export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
+export TZ=UTC
+export LC_ALL=C
+node test_unreachable.js
--- a/bench/reachability-benchmark/cases/js/express-guarded/tests/test_unreachable.js
+++ b/bench/reachability-benchmark/cases/js/express-guarded/tests/test_unreachable.js
@@ -0,0 +1,53 @@
+'use strict';
+
+const assert = require('assert');
+const fs = require('fs');
+const path = require('path');
+const { createServer } = require('../src/app');
+
+const OUT_DIR = path.resolve(__dirname, '../outputs');
+const TRACE_DIR = path.join(OUT_DIR, 'traces');
+const COVERAGE_FILE = path.join(OUT_DIR, 'coverage.json');
+const TRACE_FILE = path.join(TRACE_DIR, 'traces.json');
+
+function ensureDirs() {
+  fs.mkdirSync(OUT_DIR, { recursive: true });
+  fs.mkdirSync(TRACE_DIR, { recursive: true });
+}
+
+function recordTrace(entry, pathNodes) {
+  fs.writeFileSync(
+    TRACE_FILE,
+    JSON.stringify({
+      entry,
+      path: pathNodes,
+      sink: 'ExpressGuarded::exec',
+      notes: 'Guard blocked sink'
+    }, null, 2)
+  );
+}
+
+function recordCoverage(filePath, lines) {
+  fs.writeFileSync(
+    COVERAGE_FILE,
+    JSON.stringify({
+      files: {
+        [filePath]: {
+          lines_covered: lines,
+          lines_total: 50
+        }
+      }
+    }, null, 2)
+  );
+}
+
+(function main() {
+  ensureDirs();
+  const app = createServer();
+  const res = app.handle('POST', '/api/admin/exec', { body: { code: '2+2' }, env: { ALLOW_EXEC: 'false' } });
+  assert.strictEqual(res.status, 403);
+  assert.strictEqual(res.body, 'forbidden');
+
+  recordTrace('POST /api/admin/exec', ['app.js::createServer', 'guard: ALLOW_EXEC!=true']);
+  recordCoverage('src/app.js', [5,6,7,12,13,14,15]);
+})();
--- a/bench/reachability-benchmark/cases/js/fastify-template/case.yaml
+++ b/bench/reachability-benchmark/cases/js/fastify-template/case.yaml
@@ -0,0 +1,38 @@
+id: "js-fastify-template:005"
+language: js
+project: fastify-template
+version: "1.0.0"
+description: "Template rendering route replaces user placeholder"
+entrypoints:
+  - "POST /api/render"
+sinks:
+  - id: "FastifyTemplate::render"
+    path: "src/app.js::createServer"
+    kind: "http"
+    location:
+      file: src/app.js
+      line: 19
+    notes: "Template rendering of user input"
+environment:
+  os_image: "node:20-alpine"
+  runtime:
+    node: "20.11.0"
+  source_date_epoch: 1730000000
+build:
+  command: "./build/build.sh"
+  source_date_epoch: 1730000000
+  outputs:
+    artifact_path: outputs/binary.tar.gz
+    sbom_path: outputs/sbom.cdx.json
+    coverage_path: outputs/coverage.json
+    traces_dir: outputs/traces
+test:
+  command: "./tests/run-tests.sh"
+  expected_coverage:
+    - outputs/coverage.json
+  expected_traces:
+    - outputs/traces/traces.json
+ground_truth:
+  summary: "Template rendering reachable via POST /api/render"
+  evidence_files:
+    - "../benchmark/truth/js-fastify-template.json"
--- a/bench/reachability-benchmark/cases/js/fastify-template/entrypoints.yaml
+++ b/bench/reachability-benchmark/cases/js/fastify-template/entrypoints.yaml
@@ -0,0 +1,8 @@
+case_id: "js-fastify-template:005"
+entries:
+  http:
+    - id: "POST /api/render"
+      route: "/api/render"
+      method: "POST"
+      handler: "createServer.render"
+      description: "Template rendering endpoint"
--- a/bench/reachability-benchmark/cases/js/fastify-template/package.json
+++ b/bench/reachability-benchmark/cases/js/fastify-template/package.json
@@ -0,0 +1,9 @@
+{
+  "name": "rb-case-fastify-template",
+  "version": "1.0.0",
+  "description": "Reachability benchmark case: fastify-like template rendering",
+  "license": "Apache-2.0",
+  "scripts": {
+    "test": "./tests/run-tests.sh"
+  }
+}
--- a/bench/reachability-benchmark/cases/js/fastify-template/src/app.js
+++ b/bench/reachability-benchmark/cases/js/fastify-template/src/app.js
@@ -0,0 +1,33 @@
+'use strict';
+
+// Simulated Fastify route registration for template injection.
+function buildServer() {
+  const routes = {};
+  return {
+    post(path, handler) {
+      routes[`POST ${path}`] = handler;
+    },
+    inject(method, path, payload) {
+      const key = `${method} ${path}`;
+      const handler = routes[key];
+      if (!handler) return { status: 404, body: 'not found' };
+      return handler({ body: payload });
+    }
+  };
+}
+
+function createServer() {
+  const server = buildServer();
+  server.post('/api/render', (req) => {
+    const template = req?.body?.template;
+    if (typeof template !== 'string') {
+      return { status: 400, body: 'bad request' };
+    }
+    const compiled = template.replace('{{user}}', 'guest');
+    // Sink: writes rendered content to log (simulated SSR)
+    return { status: 200, body: compiled };
+  });
+  return server;
+}
+
+module.exports = { createServer };
--- a/bench/reachability-benchmark/cases/js/fastify-template/tests/run-tests.sh
+++ b/bench/reachability-benchmark/cases/js/fastify-template/tests/run-tests.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "$(dirname "$0")"
+export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
+export TZ=UTC
+export LC_ALL=C
+node test_reach.js
--- a/bench/reachability-benchmark/cases/js/fastify-template/tests/test_reach.js
+++ b/bench/reachability-benchmark/cases/js/fastify-template/tests/test_reach.js
@@ -0,0 +1,54 @@
+'use strict';
+
+const assert = require('assert');
+const fs = require('fs');
+const path = require('path');
+const { createServer } = require('../src/app');
+
+const OUT_DIR = path.resolve(__dirname, '../outputs');
+const TRACE_DIR = path.join(OUT_DIR, 'traces');
+const COVERAGE_FILE = path.join(OUT_DIR, 'coverage.json');
+const TRACE_FILE = path.join(TRACE_DIR, 'traces.json');
+
+function ensureDirs() {
+  fs.mkdirSync(OUT_DIR, { recursive: true });
+  fs.mkdirSync(TRACE_DIR, { recursive: true });
+}
+
+function recordTrace(entry, pathNodes) {
+  fs.writeFileSync(
+    TRACE_FILE,
+    JSON.stringify({
+      entry,
+      path: pathNodes,
+      sink: 'FastifyTemplate::render',
+      notes: 'Template rendered with user input'
+    }, null, 2)
+  );
+}
+
+function recordCoverage(filePath, lines) {
+  fs.writeFileSync(
+    COVERAGE_FILE,
+    JSON.stringify({
+      files: {
+        [filePath]: {
+          lines_covered: lines,
+          lines_total: 45
+        }
+      }
+    }, null, 2)
+  );
+}
+
+(function main() {
+  ensureDirs();
+  const server = createServer();
+  const res = server.inject('POST', '/api/render', { template: 'Hello {{user}}' });
+  assert.strictEqual(res.status, 200);
+  assert.strictEqual(res.body, 'Hello guest');
+
+  recordTrace('POST /api/render', ['app.js::createServer', 'render template']);
+  recordCoverage('src/app.js', [5,6,7,13,18,20]);
+  fs.writeFileSync(path.join(OUT_DIR, 'SINK_REACHED'), 'true');
+})();
--- a/bench/reachability-benchmark/cases/js/guarded-eval/case.yaml
+++ b/bench/reachability-benchmark/cases/js/guarded-eval/case.yaml
@@ -0,0 +1,38 @@
+id: "js-guarded-eval:002"
+language: js
+project: guarded-eval
+version: "1.0.0"
+description: "Eval sink guarded by FEATURE_ENABLE flag; unreachable when flag is off"
+entrypoints:
+  - "POST /api/exec"
+sinks:
+  - id: "GuardedEval::handleRequest"
+    path: "src/app.js::handleRequest"
+    kind: "process"
+    location:
+      file: src/app.js
+      line: 13
+    notes: "eval on user input guarded by FEATURE_ENABLE"
+environment:
+  os_image: "node:20-alpine"
+  runtime:
+    node: "20.11.0"
+  source_date_epoch: 1730000000
+build:
+  command: "./build/build.sh"
+  source_date_epoch: 1730000000
+  outputs:
+    artifact_path: outputs/binary.tar.gz
+    sbom_path: outputs/sbom.cdx.json
+    coverage_path: outputs/coverage.json
+    traces_dir: outputs/traces
+test:
+  command: "./tests/run-tests.sh"
+  expected_coverage:
+    - outputs/coverage.json
+  expected_traces:
+    - outputs/traces/traces.json
+ground_truth:
+  summary: "Guard prevents sink when FEATURE_ENABLE != 1"
+  evidence_files:
+    - "../benchmark/truth/js-guarded-eval.json"
--- a/bench/reachability-benchmark/cases/js/guarded-eval/entrypoints.yaml
+++ b/bench/reachability-benchmark/cases/js/guarded-eval/entrypoints.yaml
@@ -0,0 +1,8 @@
+case_id: "js-guarded-eval:002"
+entries:
+  http:
+    - id: "POST /api/exec"
+      route: "/api/exec"
+      method: "POST"
+      handler: "app.js::handleRequest"
+      description: "Feature-flagged code execution endpoint"
--- a/bench/reachability-benchmark/cases/js/guarded-eval/package.json
+++ b/bench/reachability-benchmark/cases/js/guarded-eval/package.json
@@ -0,0 +1,9 @@
+{
+  "name": "rb-case-guarded-eval",
+  "version": "1.0.0",
+  "description": "Reachability benchmark case: eval guarded by feature flag",
+  "license": "Apache-2.0",
+  "scripts": {
+    "test": "./tests/run-tests.sh"
+  }
+}
--- a/bench/reachability-benchmark/cases/js/guarded-eval/src/app.js
+++ b/bench/reachability-benchmark/cases/js/guarded-eval/src/app.js
@@ -0,0 +1,19 @@
+'use strict';
+
+function handleRequest(body, env = process.env) {
+  if (env.FEATURE_ENABLE !== '1') {
+    return { status: 403, body: 'disabled' };
+  }
+
+  const code = body && body.code;
+  if (typeof code !== 'string') {
+    return { status: 400, body: 'bad request' };
+  }
+
+  // This sink is reachable only when FEATURE_ENABLE=1.
+  // eslint-disable-next-line no-eval
+  const result = eval(code);
+  return { status: 200, body: String(result) };
+}
+
+module.exports = { handleRequest };
--- a/bench/reachability-benchmark/cases/js/guarded-eval/tests/run-tests.sh
+++ b/bench/reachability-benchmark/cases/js/guarded-eval/tests/run-tests.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "$(dirname "$0")"
+export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
+export TZ=UTC
+export LC_ALL=C
+node test_unreachable.js
--- a/bench/reachability-benchmark/cases/js/guarded-eval/tests/test_unreachable.js
+++ b/bench/reachability-benchmark/cases/js/guarded-eval/tests/test_unreachable.js
@@ -0,0 +1,54 @@
+'use strict';
+
+const assert = require('assert');
+const fs = require('fs');
+const path = require('path');
+const { handleRequest } = require('../src/app');
+
+const OUT_DIR = path.resolve(__dirname, '../outputs');
+const TRACE_DIR = path.join(OUT_DIR, 'traces');
+const COVERAGE_FILE = path.join(OUT_DIR, 'coverage.json');
+const TRACE_FILE = path.join(TRACE_DIR, 'traces.json');
+
+function ensureDirs() {
+  fs.mkdirSync(OUT_DIR, { recursive: true });
+  fs.mkdirSync(TRACE_DIR, { recursive: true });
+}
+
+function recordTrace(entry, pathNodes) {
+  fs.writeFileSync(
+    TRACE_FILE,
+    JSON.stringify({
+      entry,
+      path: pathNodes,
+      sink: 'GuardedEval::handleRequest',
+      notes: 'Guard prevented sink execution'
+    }, null, 2)
+  );
+}
+
+function recordCoverage(filePath, lines) {
+  fs.writeFileSync(
+    COVERAGE_FILE,
+    JSON.stringify({
+      files: {
+        [filePath]: {
+          lines_covered: lines,
+          lines_total: 32
+        }
+      }
+    }, null, 2)
+  );
+}
+
+(function main() {
+  ensureDirs();
+  const payload = { code: '1 + 2' };
+  const response = handleRequest(payload, { FEATURE_ENABLE: '0' });
+  assert.strictEqual(response.status, 403);
+  assert.strictEqual(response.body, 'disabled');
+
+  // Record that the guard path was taken; no SINK_REACHED marker is written.
+  recordTrace('POST /api/exec', ['app.js:handleRequest', 'guard: FEATURE_ENABLE != 1']);
+  recordCoverage('src/app.js', [5, 6, 7, 9, 10, 11]);
+})();
--- a/bench/reachability-benchmark/cases/js/unsafe-eval/case.yaml
+++ b/bench/reachability-benchmark/cases/js/unsafe-eval/case.yaml
@@ -0,0 +1,38 @@
+id: "js-unsafe-eval:001"
+language: js
+project: unsafe-eval
+version: "1.0.0"
+description: "Minimal handler with unsafe eval sink reachable via POST /api/exec"
+entrypoints:
+  - "POST /api/exec"
+sinks:
+  - id: "UnsafeEval::handleRequest"
+    path: "src/app.js::handleRequest"
+    kind: "process"
+    location:
+      file: src/app.js
+      line: 12
+    notes: "eval on user-controlled input"
+environment:
+  os_image: "node:20-alpine"
+  runtime:
+    node: "20.11.0"
+  source_date_epoch: 1730000000
+build:
+  command: "./build/build.sh"
+  source_date_epoch: 1730000000
+  outputs:
+    artifact_path: outputs/binary.tar.gz
+    sbom_path: outputs/sbom.cdx.json
+    coverage_path: outputs/coverage.json
+    traces_dir: outputs/traces
+test:
+  command: "./tests/run-tests.sh"
+  expected_coverage:
+    - outputs/coverage.json
+  expected_traces:
+    - outputs/traces/traces.json
+ground_truth:
+  summary: "Unit test triggers eval sink with payload {code: '1+2'}"
+  evidence_files:
+    - "../benchmark/truth/js-unsafe-eval.json"
--- a/bench/reachability-benchmark/cases/js/unsafe-eval/entrypoints.yaml
+++ b/bench/reachability-benchmark/cases/js/unsafe-eval/entrypoints.yaml
@@ -0,0 +1,8 @@
+case_id: "js-unsafe-eval:001"
+entries:
+  http:
+    - id: "POST /api/exec"
+      route: "/api/exec"
+      method: "POST"
+      handler: "app.js::handleRequest"
+      description: "Executes user-supplied code (unsafe eval)"
--- a/bench/reachability-benchmark/cases/js/unsafe-eval/package.json
+++ b/bench/reachability-benchmark/cases/js/unsafe-eval/package.json
@@ -0,0 +1,9 @@
+{
+  "name": "rb-case-unsafe-eval",
+  "version": "1.0.0",
+  "description": "Reachability benchmark case: unsafe eval in minimal JS handler",
+  "license": "Apache-2.0",
+  "scripts": {
+    "test": "./tests/run-tests.sh"
+  }
+}
--- a/bench/reachability-benchmark/cases/js/unsafe-eval/src/app.js
+++ b/bench/reachability-benchmark/cases/js/unsafe-eval/src/app.js
@@ -0,0 +1,17 @@
+'use strict';
+
+// Minimal HTTP-like handler exposing an unsafe eval sink for reachability.
+// The handler is intentionally small to avoid external dependencies.
+function handleRequest(body) {
+  const code = body && body.code;
+  if (typeof code !== 'string') {
+    return { status: 400, body: 'bad request' };
+  }
+
+  // Dangerous: executes user-controlled code. The test drives this sink.
+  // eslint-disable-next-line no-eval
+  const result = eval(code);
+  return { status: 200, body: String(result) };
+}
+
+module.exports = { handleRequest };
--- a/bench/reachability-benchmark/cases/js/unsafe-eval/tests/run-tests.sh
+++ b/bench/reachability-benchmark/cases/js/unsafe-eval/tests/run-tests.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "$(dirname "$0")"
+export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
+export TZ=UTC
+export LC_ALL=C
+node test_reach.js
--- a/bench/reachability-benchmark/cases/js/unsafe-eval/tests/test_reach.js
+++ b/bench/reachability-benchmark/cases/js/unsafe-eval/tests/test_reach.js
@@ -0,0 +1,55 @@
+'use strict';
+
+const assert = require('assert');
+const fs = require('fs');
+const path = require('path');
+const { handleRequest } = require('../src/app');
+
+const OUT_DIR = path.resolve(__dirname, '../outputs');
+const TRACE_DIR = path.join(OUT_DIR, 'traces');
+const COVERAGE_FILE = path.join(OUT_DIR, 'coverage.json');
+const TRACE_FILE = path.join(TRACE_DIR, 'traces.json');
+
+function ensureDirs() {
+  fs.mkdirSync(OUT_DIR, { recursive: true });
+  fs.mkdirSync(TRACE_DIR, { recursive: true });
+}
+
+function recordTrace(entry, pathNodes) {
+  fs.writeFileSync(
+    TRACE_FILE,
+    JSON.stringify({
+      entry,
+      path: pathNodes,
+      sink: 'UnsafeEval::handleRequest',
+      notes: 'Test-driven dynamic trace'
+    }, null, 2)
+  );
+}
+
+function recordCoverage(filePath, lines) {
+  fs.writeFileSync(
+    COVERAGE_FILE,
+    JSON.stringify({
+      files: {
+        [filePath]: {
+          lines_covered: lines,
+          lines_total: 30
+        }
+      }
+    }, null, 2)
+  );
+}
+
+(function main() {
+  ensureDirs();
+  const payload = { code: '1 + 2' };
+  const response = handleRequest(payload);
+  assert.strictEqual(response.status, 200);
+  assert.strictEqual(response.body, '3');
+
+  recordTrace('POST /api/exec', ['app.js:handleRequest', 'eval(code)']);
+  recordCoverage('src/app.js', [5, 6, 7, 12, 15]);
+  // Marker file proves sink executed
+  fs.writeFileSync(path.join(OUT_DIR, 'SINK_REACHED'), 'true');
+})();
--- a/bench/reachability-benchmark/cases/py/django-ssti/case.yaml
+++ b/bench/reachability-benchmark/cases/py/django-ssti/case.yaml
@@ -0,0 +1,38 @@
+id: "py-django-ssti:105"
+language: py
+project: django-ssti
+version: "1.0.0"
+description: "Django-like template rendering (autoescape off) reachable"
+entrypoints:
+  - "POST /render"
+sinks:
+  - id: "DjangoSSTI::render"
+    path: "src/app.py::handle_request"
+    kind: "http"
+    location:
+      file: src/app.py
+      line: 5
+    notes: "template replace without escaping"
+environment:
+  os_image: "python:3.12-alpine"
+  runtime:
+    python: "3.12"
+  source_date_epoch: 1730000000
+build:
+  command: "./build/build.sh"
+  source_date_epoch: 1730000000
+  outputs:
+    artifact_path: outputs/binary.tar.gz
+    sbom_path: outputs/sbom.cdx.json
+    coverage_path: outputs/coverage.json
+    traces_dir: outputs/traces
+test:
+  command: "./tests/run-tests.sh"
+  expected_coverage:
+    - outputs/coverage.json
+  expected_traces:
+    - outputs/traces/traces.json
+ground_truth:
+  summary: "Template rendering reachable with autoescape off"
+  evidence_files:
+    - "../benchmark/truth/py-django-ssti.json"
--- a/bench/reachability-benchmark/cases/py/django-ssti/entrypoints.yaml
+++ b/bench/reachability-benchmark/cases/py/django-ssti/entrypoints.yaml
@@ -0,0 +1,8 @@
+case_id: "py-django-ssti:105"
+entries:
+  http:
+    - id: "POST /render"
+      route: "/render"
+      method: "POST"
+      handler: "app.handle_request"
+      description: "Template rendering with autoescape off"
--- a/bench/reachability-benchmark/cases/py/django-ssti/requirements.txt
+++ b/bench/reachability-benchmark/cases/py/django-ssti/requirements.txt
@@ -0,0 +1 @@
+# stdlib only
--- a/bench/reachability-benchmark/cases/py/django-ssti/src/pycache/app.cpython-312.pyc
+++ b/bench/reachability-benchmark/cases/py/django-ssti/src/pycache/app.cpython-312.pyc
--- a/bench/reachability-benchmark/cases/py/django-ssti/src/app.py
+++ b/bench/reachability-benchmark/cases/py/django-ssti/src/app.py
@@ -0,0 +1,12 @@
+"""Django-like template rendering with autoescape off (reachable)."""
+
+def render(template: str, context: dict) -> str:
+    # naive render; simulates autoescape off
+    return template.replace("{{user}}", context.get("user", "guest"))
+
+def handle_request(body):
+    template = body.get("template") if isinstance(body, dict) else None
+    if not isinstance(template, str):
+        return {"status": 400, "body": "bad request"}
+    rendered = render(template, {"user": "guest"})
+    return {"status": 200, "body": rendered}
--- a/bench/reachability-benchmark/cases/py/django-ssti/tests/run-tests.sh
+++ b/bench/reachability-benchmark/cases/py/django-ssti/tests/run-tests.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "$(dirname "$0")"
+export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
+export TZ=UTC
+export LC_ALL=C
+export PYTHONPATH="$(cd .. && pwd)/src"
+python test_reach.py
--- a/bench/reachability-benchmark/cases/py/django-ssti/tests/test_reach.py
+++ b/bench/reachability-benchmark/cases/py/django-ssti/tests/test_reach.py
@@ -0,0 +1,48 @@
+import json
+import pathlib
+from app import handle_request
+
+ROOT = pathlib.Path(__file__).resolve().parent.parent
+OUT = ROOT / "outputs"
+TRACE_DIR = OUT / "traces"
+COVERAGE_FILE = OUT / "coverage.json"
+TRACE_FILE = TRACE_DIR / "traces.json"
+
+def ensure_dirs():
+    OUT.mkdir(parents=True, exist_ok=True)
+    TRACE_DIR.mkdir(parents=True, exist_ok=True)
+
+def record_trace(entry, path_nodes):
+    TRACE_FILE.write_text(
+        json.dumps({
+            "entry": entry,
+            "path": path_nodes,
+            "sink": "DjangoSSTI::render",
+            "notes": "Template rendered (autoescape off)"
+        }, indent=2)
+    )
+
+def record_coverage(file_path, lines):
+    COVERAGE_FILE.write_text(
+        json.dumps({
+            "files": {
+                file_path: {
+                    "lines_covered": lines,
+                    "lines_total": 38
+                }
+            }
+        }, indent=2)
+    )
+
+def test_reach():
+    ensure_dirs()
+    res = handle_request({"template": "Hello {{user}}"})
+    assert res["status"] == 200
+    assert res["body"] == "Hello guest"
+    record_trace("POST /render", ["app.py::handle_request", "render"])
+    record_coverage("src/app.py", [3,4,5,7,8,9,10])
+    (OUT / "SINK_REACHED").write_text("true")
+
+
+if __name__ == "__main__":
+    test_reach()
--- a/bench/reachability-benchmark/cases/py/fastapi-guarded/case.yaml
+++ b/bench/reachability-benchmark/cases/py/fastapi-guarded/case.yaml
@@ -0,0 +1,38 @@
+id: "py-fastapi-guarded:104"
+language: py
+project: fastapi-guarded
+version: "1.0.0"
+description: "FastAPI-like exec guarded by ALLOW_EXEC flag (unreachable by default)"
+entrypoints:
+  - "POST /exec"
+sinks:
+  - id: "FastApiGuarded::handle_request"
+    path: "src/app.py::handle_request"
+    kind: "process"
+    location:
+      file: src/app.py
+      line: 7
+    notes: "eval guarded by ALLOW_EXEC"
+environment:
+  os_image: "python:3.12-alpine"
+  runtime:
+    python: "3.12"
+  source_date_epoch: 1730000000
+build:
+  command: "./build/build.sh"
+  source_date_epoch: 1730000000
+  outputs:
+    artifact_path: outputs/binary.tar.gz
+    sbom_path: outputs/sbom.cdx.json
+    coverage_path: outputs/coverage.json
+    traces_dir: outputs/traces
+test:
+  command: "./tests/run-tests.sh"
+  expected_coverage:
+    - outputs/coverage.json
+  expected_traces:
+    - outputs/traces/traces.json
+ground_truth:
+  summary: "Guard blocks eval unless ALLOW_EXEC=true"
+  evidence_files:
+    - "../benchmark/truth/py-fastapi-guarded.json"
--- a/bench/reachability-benchmark/cases/py/fastapi-guarded/entrypoints.yaml
+++ b/bench/reachability-benchmark/cases/py/fastapi-guarded/entrypoints.yaml
@@ -0,0 +1,8 @@
+case_id: "py-fastapi-guarded:104"
+entries:
+  http:
+    - id: "POST /exec"
+      route: "/exec"
+      method: "POST"
+      handler: "app.handle_request"
+      description: "Exec guarded by ALLOW_EXEC"
--- a/bench/reachability-benchmark/cases/py/fastapi-guarded/requirements.txt
+++ b/bench/reachability-benchmark/cases/py/fastapi-guarded/requirements.txt
@@ -0,0 +1 @@
+# stdlib only
--- a/bench/reachability-benchmark/cases/py/fastapi-guarded/src/pycache/app.cpython-312.pyc
+++ b/bench/reachability-benchmark/cases/py/fastapi-guarded/src/pycache/app.cpython-312.pyc
--- a/bench/reachability-benchmark/cases/py/fastapi-guarded/src/app.py
+++ b/bench/reachability-benchmark/cases/py/fastapi-guarded/src/app.py
@@ -0,0 +1,11 @@
+"""FastAPI-like handler with feature flag guarding exec."""
+
+def handle_request(body, env=None):
+    env = env or {}
+    if env.get("ALLOW_EXEC") != "true":
+        return {"status": 403, "body": "forbidden"}
+    code = body.get("code") if isinstance(body, dict) else None
+    if not isinstance(code, str):
+        return {"status": 400, "body": "bad request"}
+    result = eval(code)
+    return {"status": 200, "body": str(result)}
--- a/bench/reachability-benchmark/cases/py/fastapi-guarded/tests/run-tests.sh
+++ b/bench/reachability-benchmark/cases/py/fastapi-guarded/tests/run-tests.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "$(dirname "$0")"
+export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
+export TZ=UTC
+export LC_ALL=C
+export PYTHONPATH="$(cd .. && pwd)/src"
+python test_unreachable.py
--- a/bench/reachability-benchmark/cases/py/fastapi-guarded/tests/test_unreachable.py
+++ b/bench/reachability-benchmark/cases/py/fastapi-guarded/tests/test_unreachable.py
@@ -0,0 +1,47 @@
+import json
+import pathlib
+from app import handle_request
+
+ROOT = pathlib.Path(__file__).resolve().parent.parent
+OUT = ROOT / "outputs"
+TRACE_DIR = OUT / "traces"
+COVERAGE_FILE = OUT / "coverage.json"
+TRACE_FILE = TRACE_DIR / "traces.json"
+
+def ensure_dirs():
+    OUT.mkdir(parents=True, exist_ok=True)
+    TRACE_DIR.mkdir(parents=True, exist_ok=True)
+
+def record_trace(entry, path_nodes):
+    TRACE_FILE.write_text(
+        json.dumps({
+            "entry": entry,
+            "path": path_nodes,
+            "sink": "FastApiGuarded::handle_request",
+            "notes": "Guard blocked eval"
+        }, indent=2)
+    )
+
+def record_coverage(file_path, lines):
+    COVERAGE_FILE.write_text(
+        json.dumps({
+            "files": {
+                file_path: {
+                    "lines_covered": lines,
+                    "lines_total": 40
+                }
+            }
+        }, indent=2)
+    )
+
+def test_unreachable():
+    ensure_dirs()
+    res = handle_request({"code": "10/2"}, env={"ALLOW_EXEC": "false"})
+    assert res["status"] == 403
+    assert res["body"] == "forbidden"
+    record_trace("POST /exec", ["app.py::handle_request", "guard: ALLOW_EXEC!=true"])
+    record_coverage("src/app.py", [3,4,5,8,9,11])
+
+
+if __name__ == "__main__":
+    test_unreachable()
--- a/bench/reachability-benchmark/cases/py/flask-template/case.yaml
+++ b/bench/reachability-benchmark/cases/py/flask-template/case.yaml
@@ -0,0 +1,38 @@
+id: "py-flask-template:103"
+language: py
+project: flask-template
+version: "1.0.0"
+description: "Template rendering reachable via POST /render"
+entrypoints:
+  - "POST /render"
+sinks:
+  - id: "FlaskTemplate::render"
+    path: "src/app.py::handle_request"
+    kind: "http"
+    location:
+      file: src/app.py
+      line: 5
+    notes: "template replace on user input"
+environment:
+  os_image: "python:3.12-alpine"
+  runtime:
+    python: "3.12"
+  source_date_epoch: 1730000000
+build:
+  command: "./build/build.sh"
+  source_date_epoch: 1730000000
+  outputs:
+    artifact_path: outputs/binary.tar.gz
+    sbom_path: outputs/sbom.cdx.json
+    coverage_path: outputs/coverage.json
+    traces_dir: outputs/traces
+test:
+  command: "./tests/run-tests.sh"
+  expected_coverage:
+    - outputs/coverage.json
+  expected_traces:
+    - outputs/traces/traces.json
+ground_truth:
+  summary: "Template rendering reachable"
+  evidence_files:
+    - "../benchmark/truth/py-flask-template.json"
--- a/bench/reachability-benchmark/cases/py/flask-template/entrypoints.yaml
+++ b/bench/reachability-benchmark/cases/py/flask-template/entrypoints.yaml
@@ -0,0 +1,8 @@
+case_id: "py-flask-template:103"
+entries:
+  http:
+    - id: "POST /render"
+      route: "/render"
+      method: "POST"
+      handler: "app.handle_request"
+      description: "Template rendering"
--- a/bench/reachability-benchmark/cases/py/flask-template/requirements.txt
+++ b/bench/reachability-benchmark/cases/py/flask-template/requirements.txt
@@ -0,0 +1 @@
+# stdlib only for this minimal case
--- a/bench/reachability-benchmark/cases/py/flask-template/src/pycache/app.cpython-312.pyc
+++ b/bench/reachability-benchmark/cases/py/flask-template/src/pycache/app.cpython-312.pyc
--- a/bench/reachability-benchmark/cases/py/flask-template/src/app.py
+++ b/bench/reachability-benchmark/cases/py/flask-template/src/app.py
@@ -0,0 +1,12 @@
+"""Minimal flask-like template rendering sink (reachable)."""
+
+def render(template: str, context: dict) -> str:
+    return template.replace("{{name}}", context.get("name", "guest"))
+
+def handle_request(body):
+    template = body.get("template") if isinstance(body, dict) else None
+    if not isinstance(template, str):
+        return {"status": 400, "body": "bad request"}
+    rendered = render(template, {"name": "guest"})
+    # Sink: returns rendered template (models potential SSTI)
+    return {"status": 200, "body": rendered}
--- a/bench/reachability-benchmark/cases/py/flask-template/tests/run-tests.sh
+++ b/bench/reachability-benchmark/cases/py/flask-template/tests/run-tests.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "$(dirname "$0")"
+export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
+export TZ=UTC
+export LC_ALL=C
+export PYTHONPATH="$(cd .. && pwd)/src"
+python test_reach.py
--- a/bench/reachability-benchmark/cases/py/flask-template/tests/test_reach.py
+++ b/bench/reachability-benchmark/cases/py/flask-template/tests/test_reach.py
@@ -0,0 +1,48 @@
+import json
+import pathlib
+from app import handle_request
+
+ROOT = pathlib.Path(__file__).resolve().parent.parent
+OUT = ROOT / "outputs"
+TRACE_DIR = OUT / "traces"
+COVERAGE_FILE = OUT / "coverage.json"
+TRACE_FILE = TRACE_DIR / "traces.json"
+
+def ensure_dirs():
+    OUT.mkdir(parents=True, exist_ok=True)
+    TRACE_DIR.mkdir(parents=True, exist_ok=True)
+
+def record_trace(entry, path_nodes):
+    TRACE_FILE.write_text(
+        json.dumps({
+            "entry": entry,
+            "path": path_nodes,
+            "sink": "FlaskTemplate::render",
+            "notes": "Template rendered"
+        }, indent=2)
+    )
+
+def record_coverage(file_path, lines):
+    COVERAGE_FILE.write_text(
+        json.dumps({
+            "files": {
+                file_path: {
+                    "lines_covered": lines,
+                    "lines_total": 40
+                }
+            }
+        }, indent=2)
+    )
+
+def test_reach():
+    ensure_dirs()
+    res = handle_request({"template": "Hello {{name}}"})
+    assert res["status"] == 200
+    assert res["body"] == "Hello guest"
+    record_trace("POST /render", ["app.py::handle_request", "render"])
+    record_coverage("src/app.py", [4,5,6,8,9,10,11])
+    (OUT / "SINK_REACHED").write_text("true")
+
+
+if __name__ == "__main__":
+    test_reach()
--- a/bench/reachability-benchmark/cases/py/guarded-exec/case.yaml
+++ b/bench/reachability-benchmark/cases/py/guarded-exec/case.yaml
@@ -0,0 +1,38 @@
+id: "py-guarded-exec:102"
+language: py
+project: guarded-exec
+version: "1.0.0"
+description: "Python eval guarded by FEATURE_ENABLE flag; unreachable by default"
+entrypoints:
+  - "POST /api/exec"
+sinks:
+  - id: "PyGuardedExec::handle_request"
+    path: "src/app.py::handle_request"
+    kind: "process"
+    location:
+      file: src/app.py
+      line: 7
+    notes: "eval guarded by FEATURE_ENABLE"
+environment:
+  os_image: "python:3.12-alpine"
+  runtime:
+    python: "3.12"
+  source_date_epoch: 1730000000
+build:
+  command: "./build/build.sh"
+  source_date_epoch: 1730000000
+  outputs:
+    artifact_path: outputs/binary.tar.gz
+    sbom_path: outputs/sbom.cdx.json
+    coverage_path: outputs/coverage.json
+    traces_dir: outputs/traces
+test:
+  command: "./tests/run-tests.sh"
+  expected_coverage:
+    - outputs/coverage.json
+  expected_traces:
+    - outputs/traces/traces.json
+ground_truth:
+  summary: "Guard blocks eval when FEATURE_ENABLE != 1"
+  evidence_files:
+    - "../benchmark/truth/py-guarded-exec.json"
--- a/bench/reachability-benchmark/cases/py/guarded-exec/entrypoints.yaml
+++ b/bench/reachability-benchmark/cases/py/guarded-exec/entrypoints.yaml
@@ -0,0 +1,8 @@
+case_id: "py-guarded-exec:102"
+entries:
+  http:
+    - id: "POST /api/exec"
+      route: "/api/exec"
+      method: "POST"
+      handler: "app.handle_request"
+      description: "Eval guarded by FEATURE_ENABLE"
--- a/bench/reachability-benchmark/cases/py/guarded-exec/requirements.txt
+++ b/bench/reachability-benchmark/cases/py/guarded-exec/requirements.txt
@@ -0,0 +1 @@
+# Intentionally empty; stdlib only.
--- a/bench/reachability-benchmark/cases/py/guarded-exec/src/pycache/app.cpython-312.pyc
+++ b/bench/reachability-benchmark/cases/py/guarded-exec/src/pycache/app.cpython-312.pyc
--- a/bench/reachability-benchmark/cases/py/guarded-exec/src/app.py
+++ b/bench/reachability-benchmark/cases/py/guarded-exec/src/app.py
@@ -0,0 +1,13 @@
+"""Python handler with feature-flag guard for eval sink."""
+
+def handle_request(body, env=None):
+    env = env or {}
+    if env.get("FEATURE_ENABLE") != "1":
+        return {"status": 403, "body": "disabled"}
+
+    code = body.get("code") if isinstance(body, dict) else None
+    if not isinstance(code, str):
+        return {"status": 400, "body": "bad request"}
+
+    result = eval(code)
+    return {"status": 200, "body": str(result)}
--- a/bench/reachability-benchmark/cases/py/guarded-exec/tests/run-tests.sh
+++ b/bench/reachability-benchmark/cases/py/guarded-exec/tests/run-tests.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "$(dirname "$0")"
+export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
+export TZ=UTC
+export LC_ALL=C
+export PYTHONPATH="$(cd .. && pwd)/src"
+python test_unreachable.py
--- a/bench/reachability-benchmark/cases/py/guarded-exec/tests/test_unreachable.py
+++ b/bench/reachability-benchmark/cases/py/guarded-exec/tests/test_unreachable.py
@@ -0,0 +1,48 @@
+import json
+import os
+import pathlib
+from app import handle_request
+
+ROOT = pathlib.Path(__file__).resolve().parent.parent
+OUT = ROOT / "outputs"
+TRACE_DIR = OUT / "traces"
+COVERAGE_FILE = OUT / "coverage.json"
+TRACE_FILE = TRACE_DIR / "traces.json"
+
+def ensure_dirs():
+    OUT.mkdir(parents=True, exist_ok=True)
+    TRACE_DIR.mkdir(parents=True, exist_ok=True)
+
+def record_trace(entry, path_nodes):
+    TRACE_FILE.write_text(
+        json.dumps({
+            "entry": entry,
+            "path": path_nodes,
+            "sink": "PyGuardedExec::handle_request",
+            "notes": "Guard blocked eval"
+        }, indent=2)
+    )
+
+def record_coverage(file_path, lines):
+    COVERAGE_FILE.write_text(
+        json.dumps({
+            "files": {
+                file_path: {
+                    "lines_covered": lines,
+                    "lines_total": 34
+                }
+            }
+        }, indent=2)
+    )
+
+def test_unreachable():
+    ensure_dirs()
+    res = handle_request({"code": "5*5"}, env={"FEATURE_ENABLE": "0"})
+    assert res["status"] == 403
+    assert res["body"] == "disabled"
+
+    record_trace("POST /api/exec", ["app.py::handle_request", "guard: FEATURE_ENABLE != 1"])
+    record_coverage("src/app.py", [3,4,5,8,9,11])
+
+if __name__ == "__main__":
+    test_unreachable()
--- a/bench/reachability-benchmark/cases/py/unsafe-exec/case.yaml
+++ b/bench/reachability-benchmark/cases/py/unsafe-exec/case.yaml
@@ -0,0 +1,38 @@
+id: "py-unsafe-exec:101"
+language: py
+project: unsafe-exec
+version: "1.0.0"
+description: "Python handler with reachable eval sink"
+entrypoints:
+  - "POST /api/exec"
+sinks:
+  - id: "PyUnsafeExec::handle_request"
+    path: "src/app.py::handle_request"
+    kind: "process"
+    location:
+      file: src/app.py
+      line: 8
+    notes: "eval on user input"
+environment:
+  os_image: "python:3.12-alpine"
+  runtime:
+    python: "3.12"
+  source_date_epoch: 1730000000
+build:
+  command: "./build/build.sh"
+  source_date_epoch: 1730000000
+  outputs:
+    artifact_path: outputs/binary.tar.gz
+    sbom_path: outputs/sbom.cdx.json
+    coverage_path: outputs/coverage.json
+    traces_dir: outputs/traces
+test:
+  command: "./tests/run-tests.sh"
+  expected_coverage:
+    - outputs/coverage.json
+  expected_traces:
+    - outputs/traces/traces.json
+ground_truth:
+  summary: "Eval reachable via POST /api/exec"
+  evidence_files:
+    - "../benchmark/truth/py-unsafe-exec.json"
--- a/bench/reachability-benchmark/cases/py/unsafe-exec/entrypoints.yaml
+++ b/bench/reachability-benchmark/cases/py/unsafe-exec/entrypoints.yaml
@@ -0,0 +1,8 @@
+case_id: "py-unsafe-exec:101"
+entries:
+  http:
+    - id: "POST /api/exec"
+      route: "/api/exec"
+      method: "POST"
+      handler: "app.handle_request"
+      description: "Executes user code via eval"
--- a/bench/reachability-benchmark/cases/py/unsafe-exec/requirements.txt
+++ b/bench/reachability-benchmark/cases/py/unsafe-exec/requirements.txt
@@ -0,0 +1 @@
+# Intentionally empty; uses stdlib only.
--- a/bench/reachability-benchmark/cases/py/unsafe-exec/src/pycache/app.cpython-312.pyc
+++ b/bench/reachability-benchmark/cases/py/unsafe-exec/src/pycache/app.cpython-312.pyc
--- a/bench/reachability-benchmark/cases/py/unsafe-exec/src/app.py
+++ b/bench/reachability-benchmark/cases/py/unsafe-exec/src/app.py
@@ -0,0 +1,10 @@
+"""Minimal Python handler with an unsafe eval sink."""
+
+def handle_request(body):
+    code = body.get("code") if isinstance(body, dict) else None
+    if not isinstance(code, str):
+        return {"status": 400, "body": "bad request"}
+
+    # Sink: eval on user input (reachable)
+    result = eval(code)
+    return {"status": 200, "body": str(result)}
--- a/bench/reachability-benchmark/cases/py/unsafe-exec/tests/run-tests.sh
+++ b/bench/reachability-benchmark/cases/py/unsafe-exec/tests/run-tests.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "$(dirname "$0")"
+export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
+export TZ=UTC
+export LC_ALL=C
+export PYTHONPATH="$(cd .. && pwd)/src"
+python test_reach.py
--- a/bench/reachability-benchmark/cases/py/unsafe-exec/tests/test_reach.py
+++ b/bench/reachability-benchmark/cases/py/unsafe-exec/tests/test_reach.py
@@ -0,0 +1,54 @@
+import json
+import os
+import pathlib
+from app import handle_request
+
+ROOT = pathlib.Path(__file__).resolve().parent.parent
+OUT = ROOT / "outputs"
+TRACE_DIR = OUT / "traces"
+COVERAGE_FILE = OUT / "coverage.json"
+TRACE_FILE = TRACE_DIR / "traces.json"
+
+
+def ensure_dirs():
+    OUT.mkdir(parents=True, exist_ok=True)
+    TRACE_DIR.mkdir(parents=True, exist_ok=True)
+
+
+def record_trace(entry, path_nodes):
+    TRACE_FILE.write_text(
+        json.dumps({
+            "entry": entry,
+            "path": path_nodes,
+            "sink": "PyUnsafeExec::handle_request",
+            "notes": "Eval reached"
+        }, indent=2)
+    )
+
+
+def record_coverage(file_path, lines):
+    COVERAGE_FILE.write_text(
+        json.dumps({
+            "files": {
+                file_path: {
+                    "lines_covered": lines,
+                    "lines_total": 30
+                }
+            }
+        }, indent=2)
+    )
+
+
+def test_reach():
+    ensure_dirs()
+    res = handle_request({"code": "3*7"})
+    assert res["status"] == 200
+    assert res["body"] == "21"
+
+    record_trace("POST /api/exec", ["app.py::handle_request", "eval(code)"])
+    record_coverage("src/app.py", [3, 4, 5, 8, 10])
+    (OUT / "SINK_REACHED").write_text("true")
+
+
+if __name__ == "__main__":
+    test_reach()
--- a/bench/reachability-benchmark/tools/scorer/README.md
+++ b/bench/reachability-benchmark/tools/scorer/README.md
@@ -1,11 +1,34 @@
-# rb-score (placeholder)
+# rb-score

-Planned CLI to score reachability submissions against truth sets.
+Deterministic scorer for the reachability benchmark.

-Future work (BENCH-SCORER-513-008):
- Validate submission against `schemas/submission.schema.json`.
- Validate truth against `schemas/truth.schema.json`.
- Compute precision/recall/F1, explainability score (0-3), runtime stats, determinism rate.
- Emit JSON report with stable ordering.
+## What it does
+- Validates submissions against `schemas/submission.schema.json` and truth against `schemas/truth.schema.json`.
+- Computes precision/recall/F1 (micro, sink-level).
+- Computes explainability score per prediction (0–3) and averages it.
+- Checks duplicate predictions for determinism (inconsistent duplicates lower the rate).
+- Surfaces runtime metadata from the submission (`run` block).

-For now this folder is a stub; implementation will be added in task 513-008 once schemas stabilize.
+## Install (offline-friendly)
+```bash
+python -m pip install -r requirements.txt
+```
+
+## Usage
+```bash
+./rb_score.py --truth ../../benchmark/truth/public.json --submission ../../benchmark/submissions/sample.json --format json
+```
+
+## Output
+- `text` (default): short human-readable summary.
+- `json`: deterministic JSON with top-level metrics and per-case breakdown.
+
+## Tests
+```bash
+python -m unittest tests/test_scoring.py
+```
+
+## Notes
+- Predictions for sinks not present in truth count as false positives (strict posture).
+- Truth sinks with label `unknown` are ignored for FN/FP counting.
+- Explainability tiering: 0=no context; 1=path>=2 nodes; 2=entry + path>=3; 3=guards present.
--- a/bench/reachability-benchmark/tools/scorer/init.py
+++ b/bench/reachability-benchmark/tools/scorer/init.py
@@ -0,0 +1,3 @@
+from . import rb_score
+
+__all__ = ["rb_score"]
--- a/bench/reachability-benchmark/tools/scorer/rb-score
+++ b/bench/reachability-benchmark/tools/scorer/rb-score
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+set -euo pipefail
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+python3 "$SCRIPT_DIR/rb_score.py" "$@"
--- a/bench/reachability-benchmark/tools/scorer/rb_score.py
+++ b/bench/reachability-benchmark/tools/scorer/rb_score.py
@@ -0,0 +1,258 @@
+#!/usr/bin/env python3
+"""rb-score: deterministic scorer for reachability benchmark submissions.
+
+Features (task BENCH-SCORER-513-008):
+- Validate submission and truth against published schemas.
+- Compute precision / recall / F1 at sink level (micro-averaged).
+- Compute explainability score per prediction (0–3) and average.
+- Surface runtime stats from submission metadata.
+- Emit deterministic JSON or human-readable text.
+
+Assumptions:
+- Truth labels may include "unknown"; these are skipped for FN/FP.
+- A prediction for a sink absent in truth counts as FP (strict posture).
+- Duplicate predictions for the same sink must agree; disagreement reduces determinism rate.
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, Iterable, List, Tuple
+
+import yaml
+from jsonschema import Draft202012Validator
+
+ROOT = Path(__file__).resolve().parents[1]
+SCHEMAS = {
+    "truth": ROOT / "schemas" / "truth.schema.json",
+    "submission": ROOT / "schemas" / "submission.schema.json",
+}
+
+
+@dataclass
+class CaseMetrics:
+    case_id: str
+    tp: int
+    fp: int
+    fn: int
+    precision: float
+    recall: float
+    f1: float
+    explain_avg: float
+
+
+@dataclass
+class ScoreReport:
+    precision: float
+    recall: float
+    f1: float
+    tp: int
+    fp: int
+    fn: int
+    explain_avg: float
+    determinism_rate: float
+    runtime: Dict[str, object]
+    cases: List[CaseMetrics]
+
+
+def load_json_or_yaml(path: Path):
+    text = path.read_text(encoding="utf-8")
+    if path.suffix.lower() in {".yaml", ".yml"}:
+        return yaml.safe_load(text)
+    return json.loads(text)
+
+
+def validate_against(schema_path: Path, payload) -> Tuple[bool, List[str]]:
+    schema = load_json_or_yaml(schema_path)
+    validator = Draft202012Validator(schema)
+    errors = sorted(validator.iter_errors(payload), key=lambda e: e.path)
+    if not errors:
+        return True, []
+    return False, [f"{'/'.join(str(p) for p in err.path) or '<root>'}: {err.message}" for err in errors]
+
+
+def safe_div(num: int, denom: int, default: float) -> float:
+    if denom == 0:
+        return default
+    return num / denom
+
+
+def explain_score(pred: dict) -> int:
+    expl = pred.get("explain") or {}
+    path = expl.get("path") or []
+    entry = expl.get("entry")
+    guards = expl.get("guards") or []
+    if guards:
+        return 3
+    if entry and len(path) >= 3:
+        return 2
+    if len(path) >= 2:
+        return 1
+    return 0
+
+
+def determinism_rate(preds: Iterable[dict]) -> float:
+    """Detect inconsistent duplicate predictions for the same sink."""
+    by_sink: Dict[str, set] = {}
+    total_groups = 0
+    consistent_groups = 0
+    for pred in preds:
+        sink_id = pred.get("sink_id")
+        if sink_id is None:
+            continue
+        by_sink.setdefault(sink_id, set()).add(pred.get("prediction"))
+    for values in by_sink.values():
+        total_groups += 1
+        if len(values) == 1:
+            consistent_groups += 1
+    if total_groups == 0:
+        return 1.0
+    return consistent_groups / total_groups
+
+
+def score_case(case_id: str, truth_sinks: Dict[str, str], predicted: List[dict]) -> CaseMetrics:
+    truth_reach = {sid for sid, label in truth_sinks.items() if label == "reachable"}
+    truth_unreach = {sid for sid, label in truth_sinks.items() if label == "unreachable"}
+
+    pred_reach = {p["sink_id"] for p in predicted if p.get("prediction") == "reachable"}
+
+    tp = len(pred_reach & truth_reach)
+    fp = len(pred_reach - truth_reach)
+    fn = len(truth_reach - pred_reach)
+
+    precision = safe_div(tp, tp + fp, 1.0)
+    recall = safe_div(tp, tp + fn, 1.0)
+    f1 = 0.0 if (precision + recall) == 0 else 2 * precision * recall / (precision + recall)
+
+    explain_scores = [explain_score(p) for p in predicted]
+    explain_avg = safe_div(sum(explain_scores), len(explain_scores), 0.0)
+
+    return CaseMetrics(case_id, tp, fp, fn, precision, recall, f1, explain_avg)
+
+
+def aggregate(cases: List[CaseMetrics], preds: List[dict]) -> ScoreReport:
+    tp = sum(c.tp for c in cases)
+    fp = sum(c.fp for c in cases)
+    fn = sum(c.fn for c in cases)
+    precision = safe_div(tp, tp + fp, 1.0)
+    recall = safe_div(tp, tp + fn, 1.0)
+    f1 = 0.0 if (precision + recall) == 0 else 2 * precision * recall / (precision + recall)
+    explain_avg = safe_div(sum(c.explain_avg for c in cases), len(cases), 0.0) if cases else 0.0
+    det_rate = determinism_rate(preds)
+    runtime = {}
+    return ScoreReport(precision, recall, f1, tp, fp, fn, explain_avg, det_rate, runtime, cases)
+
+
+def build_truth_index(truth_doc: dict) -> Dict[str, Dict[str, str]]:
+    index: Dict[str, Dict[str, str]] = {}
+    for case in truth_doc.get("cases", []):
+        sinks = {s["sink_id"]: s["label"] for s in case.get("sinks", [])}
+        index[case["case_id"]] = sinks
+    return index
+
+
+def score(truth_doc: dict, submission_doc: dict) -> ScoreReport:
+    truth_index = build_truth_index(truth_doc)
+    cases_metrics: List[CaseMetrics] = []
+    all_preds: List[dict] = []
+
+    for sub_case in submission_doc.get("cases", []):
+        case_id = sub_case.get("case_id")
+        predicted_sinks = sub_case.get("sinks") or []
+        all_preds.extend(predicted_sinks)
+        truth_sinks = truth_index.get(case_id, {})
+        case_metrics = score_case(case_id, truth_sinks, predicted_sinks)
+        cases_metrics.append(case_metrics)
+
+    report = aggregate(cases_metrics, all_preds)
+    report.runtime = submission_doc.get("run", {})
+    return report
+
+
+def report_as_dict(report: ScoreReport) -> dict:
+    return {
+        "version": "1.0.0",
+        "metrics": {
+            "precision": round(report.precision, 4),
+            "recall": round(report.recall, 4),
+            "f1": round(report.f1, 4),
+            "tp": report.tp,
+            "fp": report.fp,
+            "fn": report.fn,
+            "determinism_rate": round(report.determinism_rate, 4),
+            "explainability_avg": round(report.explain_avg, 4),
+        },
+        "runtime": report.runtime,
+        "cases": [
+            {
+                "case_id": c.case_id,
+                "precision": round(c.precision, 4),
+                "recall": round(c.recall, 4),
+                "f1": round(c.f1, 4),
+                "tp": c.tp,
+                "fp": c.fp,
+                "fn": c.fn,
+                "explainability_avg": round(c.explain_avg, 4),
+            }
+            for c in report.cases
+        ],
+    }
+
+
+def format_text(report: ScoreReport) -> str:
+    lines = []
+    lines.append("rb-score summary")
+    lines.append(f" precision {report.precision:.4f}  recall {report.recall:.4f}  f1 {report.f1:.4f}")
+    lines.append(f" tp {report.tp}  fp {report.fp}  fn {report.fn}  determinism {report.determinism_rate:.4f}  explain_avg {report.explain_avg:.4f}")
+    if report.runtime:
+        rt = report.runtime
+        lines.append(" runtime: " + ", ".join(f"{k}={v}" for k, v in sorted(rt.items())))
+    lines.append(" cases:")
+    for c in report.cases:
+        lines.append(
+            f"  - {c.case_id}: P {c.precision:.4f} R {c.recall:.4f} F1 {c.f1:.4f} tp {c.tp} fp {c.fp} fn {c.fn} explain_avg {c.explain_avg:.4f}"
+        )
+    return "\n".join(lines)
+
+
+def parse_args(argv: List[str]) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Score reachability benchmark submissions")
+    parser.add_argument("--truth", required=True, help="Path to truth JSON")
+    parser.add_argument("--submission", required=True, help="Path to submission JSON")
+    parser.add_argument("--format", choices=["json", "text"], default="text", help="Output format")
+    return parser.parse_args(argv)
+
+
+def main(argv: List[str]) -> int:
+    args = parse_args(argv)
+    truth_path = Path(args.truth)
+    submission_path = Path(args.submission)
+
+    if not truth_path.exists() or not submission_path.exists():
+        print("truth or submission file not found", file=sys.stderr)
+        return 2
+
+    truth_doc = load_json_or_yaml(truth_path)
+    submission_doc = load_json_or_yaml(submission_path)
+
+    ok_truth, truth_errs = validate_against(SCHEMAS["truth"], truth_doc)
+    ok_sub, sub_errs = validate_against(SCHEMAS["submission"], submission_doc)
+    if not ok_truth or not ok_sub:
+        for msg in truth_errs + sub_errs:
+            print(f"validation_error: {msg}", file=sys.stderr)
+        return 3
+
+    report = score(truth_doc, submission_doc)
+
+    if args.format == "json":
+        print(json.dumps(report_as_dict(report), sort_keys=True, indent=2))
+    else:
+        print(format_text(report))
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
--- a/bench/reachability-benchmark/tools/scorer/requirements.txt
+++ b/bench/reachability-benchmark/tools/scorer/requirements.txt
@@ -0,0 +1,2 @@
+jsonschema==4.23.0
+PyYAML==6.0.2
--- a/bench/reachability-benchmark/tools/scorer/tests/pycache/test_scoring.cpython-312.pyc
+++ b/bench/reachability-benchmark/tools/scorer/tests/pycache/test_scoring.cpython-312.pyc
--- a/bench/reachability-benchmark/tools/scorer/tests/test_scoring.py
+++ b/bench/reachability-benchmark/tools/scorer/tests/test_scoring.py
@@ -0,0 +1,70 @@
+import json
+import importlib.util
+import unittest
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[3]  # bench/reachability-benchmark
+SCORER_PATH = ROOT / "tools" / "scorer" / "rb_score.py"
+
+
+def load_module():
+    spec = importlib.util.spec_from_file_location("rb_score", SCORER_PATH)
+    module = importlib.util.module_from_spec(spec)
+    assert spec.loader
+    import sys
+    sys.modules[spec.name] = module
+    spec.loader.exec_module(module)  # type: ignore[attr-defined]
+    return module
+
+
+def load_example(name: str):
+    return json.loads((ROOT / "schemas" / "examples" / name).read_text())
+
+
+rb_score = load_module()
+
+
+class TestScoring(unittest.TestCase):
+    def test_score_perfect_prediction(self):
+        truth = load_example("truth.sample.json")
+        submission = load_example("submission.sample.json")
+
+        report = rb_score.score(truth, submission)
+        self.assertEqual(report.tp, 1)
+        self.assertEqual(report.fp, 0)
+        self.assertEqual(report.fn, 0)
+        self.assertEqual(report.precision, 1.0)
+        self.assertEqual(report.recall, 1.0)
+        self.assertEqual(report.f1, 1.0)
+        self.assertGreaterEqual(report.explain_avg, 1.0)
+        self.assertEqual(report.determinism_rate, 1.0)
+
+    def test_score_false_negative_and_fp(self):
+        truth = load_example("truth.sample.json")
+        submission = {
+            "version": "1.0.0",
+            "tool": {"name": "tool", "version": "1"},
+            "run": {"platform": "ubuntu"},
+            "cases": [
+                {
+                    "case_id": "js-express-blog:001",
+                    "sinks": [
+                        {"sink_id": "Deserializer::parse", "prediction": "unreachable"},
+                        {"sink_id": "Fake::sink", "prediction": "reachable"},
+                    ],
+                }
+            ],
+        }
+
+        report = rb_score.score(truth, submission)
+        self.assertEqual(report.tp, 0)
+        self.assertEqual(report.fp, 1)
+        self.assertEqual(report.fn, 1)
+        self.assertEqual(report.precision, 0.0)
+        self.assertEqual(report.recall, 0.0)
+        self.assertEqual(report.f1, 0.0)
+        self.assertEqual(report.determinism_rate, 1.0)
+
+
+if __name__ == "__main__":
+    unittest.main()