feat: Implement Filesystem and MongoDB provenance writers for PackRun execution context
Some checks failed
Airgap Sealed CI Smoke / sealed-smoke (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled

- Added `FilesystemPackRunProvenanceWriter` to write provenance manifests to the filesystem.
- Introduced `MongoPackRunArtifactReader` to read artifacts from MongoDB.
- Created `MongoPackRunProvenanceWriter` to store provenance manifests in MongoDB.
- Developed unit tests for filesystem and MongoDB provenance writers.
- Established `ITimelineEventStore` and `ITimelineIngestionService` interfaces for timeline event handling.
- Implemented `TimelineIngestionService` to validate and persist timeline events with hashing.
- Created PostgreSQL schema and migration scripts for timeline indexing.
- Added dependency injection support for timeline indexer services.
- Developed tests for timeline ingestion and schema validation.
This commit is contained in:
StellaOps Bot
2025-11-30 15:38:14 +02:00
parent 8f54ffa203
commit 17d45a6d30
276 changed files with 8618 additions and 688 deletions

View File

@@ -17,6 +17,24 @@ Deterministic, reproducible benchmark for reachability analysis tools.
- `ci/` — deterministic CI workflows and scripts.
- `website/` — static site (leaderboard/docs/downloads).
Sample cases added (JS track):
- `cases/js/unsafe-eval` (reachable sink) → `benchmark/truth/js-unsafe-eval.json`.
- `cases/js/guarded-eval` (unreachable by default) → `benchmark/truth/js-guarded-eval.json`.
- `cases/js/express-eval` (admin eval reachable) → `benchmark/truth/js-express-eval.json`.
- `cases/js/express-guarded` (admin eval gated by env) → `benchmark/truth/js-express-guarded.json`.
- `cases/js/fastify-template` (template rendering reachable) → `benchmark/truth/js-fastify-template.json`.
Sample cases added (Python track):
- `cases/py/unsafe-exec` (reachable eval) → `benchmark/truth/py-unsafe-exec.json`.
- `cases/py/guarded-exec` (unreachable when FEATURE_ENABLE != 1) → `benchmark/truth/py-guarded-exec.json`.
- `cases/py/flask-template` (template rendering reachable) → `benchmark/truth/py-flask-template.json`.
- `cases/py/fastapi-guarded` (unreachable unless ALLOW_EXEC=true) → `benchmark/truth/py-fastapi-guarded.json`.
- `cases/py/django-ssti` (template rendering reachable, autoescape off) → `benchmark/truth/py-django-ssti.json`.
Sample cases added (Java track):
- `cases/java/spring-deserialize` (reachable Java deserialization) → `benchmark/truth/java-spring-deserialize.json`.
- `cases/java/spring-guarded` (deserialization unreachable unless ALLOW_DESER=true) → `benchmark/truth/java-spring-guarded.json`.
## Determinism & Offline Rules
- No network during build/test; pin images/deps; set `SOURCE_DATE_EPOCH`.
- Sort file lists; stable JSON/YAML emitters; fixed RNG seeds.

View File

@@ -0,0 +1,32 @@
{
"version": "1.0.0",
"cases": [
{
"case_id": "java-spring-deserialize:201",
"case_version": "1.0.0",
"notes": "Java deserialization sink reachable",
"sinks": [
{
"sink_id": "JavaDeserialize::handleRequest",
"label": "reachable",
"confidence": "high",
"dynamic_evidence": {
"covered_by_tests": [
"src/AppTest.java"
],
"coverage_files": []
},
"static_evidence": {
"call_path": [
"POST /api/upload",
"App.handleRequest",
"ObjectInputStream.readObject"
]
},
"config_conditions": [],
"notes": "No guard; base64 payload deserialized"
}
]
}
]
}

View File

@@ -0,0 +1,29 @@
{
"version": "1.0.0",
"cases": [
{
"case_id": "java-spring-guarded:202",
"case_version": "1.0.0",
"notes": "Deserialization unreachable by default",
"sinks": [
{
"sink_id": "JavaDeserializeGuarded::handleRequest",
"label": "unreachable",
"confidence": "high",
"dynamic_evidence": {
"covered_by_tests": ["src/AppTest.java"],
"coverage_files": []
},
"static_evidence": {
"call_path": [
"POST /api/upload",
"App.handleRequest",
"guard: ALLOW_DESER!=true"
]
},
"config_conditions": ["ALLOW_DESER == 'true'"]
}
]
}
]
}

View File

@@ -0,0 +1,34 @@
{
"version": "1.0.0",
"cases": [
{
"case_id": "js-express-eval:003",
"case_version": "1.0.0",
"notes": "Admin eval reachable",
"sinks": [
{
"sink_id": "ExpressEval::exec",
"label": "reachable",
"confidence": "high",
"dynamic_evidence": {
"covered_by_tests": [
"tests/test_reach.js"
],
"coverage_files": [
"outputs/coverage.json"
]
},
"static_evidence": {
"call_path": [
"POST /api/admin/exec",
"createServer.exec",
"eval(code)"
]
},
"config_conditions": [],
"notes": "No guard on admin path"
}
]
}
]
}

View File

@@ -0,0 +1,36 @@
{
"version": "1.0.0",
"cases": [
{
"case_id": "js-express-guarded:004",
"case_version": "1.0.0",
"notes": "Admin exec unreachable when ALLOW_EXEC!=true",
"sinks": [
{
"sink_id": "ExpressGuarded::exec",
"label": "unreachable",
"confidence": "high",
"dynamic_evidence": {
"covered_by_tests": [
"tests/test_unreachable.js"
],
"coverage_files": [
"outputs/coverage.json"
]
},
"static_evidence": {
"call_path": [
"POST /api/admin/exec",
"createServer.exec",
"guard: ALLOW_EXEC!=true"
]
},
"config_conditions": [
"ALLOW_EXEC == 'true'"
],
"notes": "Only reachable when ALLOW_EXEC=true"
}
]
}
]
}

View File

@@ -0,0 +1,34 @@
{
"version": "1.0.0",
"cases": [
{
"case_id": "js-fastify-template:005",
"case_version": "1.0.0",
"notes": "Template rendering reachable",
"sinks": [
{
"sink_id": "FastifyTemplate::render",
"label": "reachable",
"confidence": "high",
"dynamic_evidence": {
"covered_by_tests": [
"tests/test_reach.js"
],
"coverage_files": [
"outputs/coverage.json"
]
},
"static_evidence": {
"call_path": [
"POST /api/render",
"createServer.render",
"template replace"
]
},
"config_conditions": [],
"notes": "Simple template replace used as sink"
}
]
}
]
}

View File

@@ -0,0 +1,36 @@
{
"version": "1.0.0",
"cases": [
{
"case_id": "js-guarded-eval:002",
"case_version": "1.0.0",
"notes": "Eval sink guarded by FEATURE_ENABLE; unreachable when flag off",
"sinks": [
{
"sink_id": "GuardedEval::handleRequest",
"label": "unreachable",
"confidence": "high",
"dynamic_evidence": {
"covered_by_tests": [
"tests/test_unreachable.js"
],
"coverage_files": [
"outputs/coverage.json"
]
},
"static_evidence": {
"call_path": [
"POST /api/exec",
"app.js::handleRequest",
"guard: FEATURE_ENABLE != 1"
]
},
"config_conditions": [
"FEATURE_ENABLE == '1'"
],
"notes": "Sink only executes when FEATURE_ENABLE=1"
}
]
}
]
}

View File

@@ -0,0 +1,34 @@
{
"version": "1.0.0",
"cases": [
{
"case_id": "js-unsafe-eval:001",
"case_version": "1.0.0",
"notes": "Unsafe eval sink reachable via POST /api/exec",
"sinks": [
{
"sink_id": "UnsafeEval::handleRequest",
"label": "reachable",
"confidence": "high",
"dynamic_evidence": {
"covered_by_tests": [
"tests/test_reach.js"
],
"coverage_files": [
"outputs/coverage.json"
]
},
"static_evidence": {
"call_path": [
"POST /api/exec",
"app.js::handleRequest",
"eval(code)"
]
},
"config_conditions": [],
"notes": "No guards; direct eval on user input"
}
]
}
]
}

View File

@@ -0,0 +1,34 @@
{
"version": "1.0.0",
"cases": [
{
"case_id": "py-django-ssti:105",
"case_version": "1.0.0",
"notes": "Template rendering reachable (autoescape off)",
"sinks": [
{
"sink_id": "DjangoSSTI::render",
"label": "reachable",
"confidence": "high",
"dynamic_evidence": {
"covered_by_tests": [
"tests/test_reach.py"
],
"coverage_files": [
"outputs/coverage.json"
]
},
"static_evidence": {
"call_path": [
"POST /render",
"app.handle_request",
"render"
]
},
"config_conditions": [],
"notes": "Autoescape disabled"
}
]
}
]
}

View File

@@ -0,0 +1,36 @@
{
"version": "1.0.0",
"cases": [
{
"case_id": "py-fastapi-guarded:104",
"case_version": "1.0.0",
"notes": "Eval unreachable unless ALLOW_EXEC=true",
"sinks": [
{
"sink_id": "FastApiGuarded::handle_request",
"label": "unreachable",
"confidence": "high",
"dynamic_evidence": {
"covered_by_tests": [
"tests/test_unreachable.py"
],
"coverage_files": [
"outputs/coverage.json"
]
},
"static_evidence": {
"call_path": [
"POST /exec",
"app.handle_request",
"guard: ALLOW_EXEC!=true"
]
},
"config_conditions": [
"ALLOW_EXEC == 'true'"
],
"notes": "Feature flag blocks sink by default"
}
]
}
]
}

View File

@@ -0,0 +1,34 @@
{
"version": "1.0.0",
"cases": [
{
"case_id": "py-flask-template:103",
"case_version": "1.0.0",
"notes": "Template rendering reachable",
"sinks": [
{
"sink_id": "FlaskTemplate::render",
"label": "reachable",
"confidence": "high",
"dynamic_evidence": {
"covered_by_tests": [
"tests/test_reach.py"
],
"coverage_files": [
"outputs/coverage.json"
]
},
"static_evidence": {
"call_path": [
"POST /render",
"app.handle_request",
"render"
]
},
"config_conditions": [],
"notes": "Simple template placeholder replacement"
}
]
}
]
}

View File

@@ -0,0 +1,36 @@
{
"version": "1.0.0",
"cases": [
{
"case_id": "py-guarded-exec:102",
"case_version": "1.0.0",
"notes": "Eval unreachable unless FEATURE_ENABLE=1",
"sinks": [
{
"sink_id": "PyGuardedExec::handle_request",
"label": "unreachable",
"confidence": "high",
"dynamic_evidence": {
"covered_by_tests": [
"tests/test_unreachable.py"
],
"coverage_files": [
"outputs/coverage.json"
]
},
"static_evidence": {
"call_path": [
"POST /api/exec",
"app.handle_request",
"guard: FEATURE_ENABLE != 1"
]
},
"config_conditions": [
"FEATURE_ENABLE == '1'"
],
"notes": "Feature flag required"
}
]
}
]
}

View File

@@ -0,0 +1,34 @@
{
"version": "1.0.0",
"cases": [
{
"case_id": "py-unsafe-exec:101",
"case_version": "1.0.0",
"notes": "Eval reachable",
"sinks": [
{
"sink_id": "PyUnsafeExec::handle_request",
"label": "reachable",
"confidence": "high",
"dynamic_evidence": {
"covered_by_tests": [
"tests/test_reach.py"
],
"coverage_files": [
"outputs/coverage.json"
]
},
"static_evidence": {
"call_path": [
"POST /api/exec",
"app.handle_request",
"eval(code)"
]
},
"config_conditions": [],
"notes": "No guards"
}
]
}
]
}

View File

@@ -0,0 +1,38 @@
id: "java-spring-deserialize:201"
language: java
project: spring-deserialize
version: "1.0.0"
description: "Java deserialization sink reachable via POST /api/upload"
entrypoints:
- "POST /api/upload"
sinks:
- id: "JavaDeserialize::handleRequest"
path: "bench.reachability.App.handleRequest"
kind: "custom"
location:
file: src/App.java
line: 9
notes: "java.io.ObjectInputStream on user-controlled payload"
environment:
os_image: "eclipse-temurin:21-jdk"
runtime:
java: "21"
source_date_epoch: 1730000000
build:
command: "./build/build.sh"
source_date_epoch: 1730000000
outputs:
artifact_path: outputs/binary.tar.gz
sbom_path: outputs/sbom.cdx.json
coverage_path: outputs/coverage.json
traces_dir: outputs/traces
test:
command: "./build/build.sh"
expected_coverage: []
expected_traces: []
env:
JAVA_TOOL_OPTIONS: "-ea"
ground_truth:
summary: "Deserialization reachable"
evidence_files:
- "../benchmark/truth/java-spring-deserialize.json"

View File

@@ -0,0 +1,8 @@
case_id: "java-spring-deserialize:201"
entries:
http:
- id: "POST /api/upload"
route: "/api/upload"
method: "POST"
handler: "App.handleRequest"
description: "Binary payload base64-deserialized"

View File

@@ -0,0 +1,12 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.stellaops.bench</groupId>
<artifactId>spring-deserialize</artifactId>
<version>1.0.0</version>
<packaging>jar</packaging>
<properties>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
</properties>
</project>

View File

@@ -0,0 +1,26 @@
package bench.reachability;
import java.util.Map;
import java.util.Base64;
import java.io.*;
public class App {
// Unsafe Java deserialization sink (reachable)
public static Response handleRequest(Map<String, String> body) {
String payload = body.get("payload");
if (payload == null) {
return new Response(400, "bad request");
}
try {
byte[] data = Base64.getDecoder().decode(payload);
ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(data));
Object obj = ois.readObject();
ois.close();
return new Response(200, obj.toString());
} catch (Exception ex) {
return new Response(500, ex.getClass().getSimpleName());
}
}
public record Response(int status, String body) {}
}

View File

@@ -0,0 +1,30 @@
package bench.reachability;
import java.io.*;
import java.util.*;
import java.util.Base64;
// Simple hand-rolled test harness (no external deps) using Java assertions.
public class AppTest {
private static String serialize(Object obj) throws IOException {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(bos);
oos.writeObject(obj);
oos.close();
return Base64.getEncoder().encodeToString(bos.toByteArray());
}
public static void main(String[] args) throws Exception {
String payload = serialize("hello");
Map<String, String> body = Map.of("payload", payload);
var res = App.handleRequest(body);
assert res.status() == 200 : "status";
assert res.body().equals("hello") : "body";
// Emit a simple marker file for trace/coverage stand-ins
File outDir = new File("outputs");
outDir.mkdirs();
try (FileWriter fw = new FileWriter(new File(outDir, "SINK_REACHED"))) {
fw.write("true");
}
}
}

View File

@@ -0,0 +1,38 @@
id: "java-spring-guarded:202"
language: java
project: spring-guarded
version: "1.0.0"
description: "Java deserialization guarded by ALLOW_DESER flag (unreachable by default)"
entrypoints:
- "POST /api/upload"
sinks:
- id: "JavaDeserializeGuarded::handleRequest"
path: "bench.reachability.App.handleRequest"
kind: "custom"
location:
file: src/App.java
line: 9
notes: "ObjectInputStream gated by ALLOW_DESER"
environment:
os_image: "eclipse-temurin:21-jdk"
runtime:
java: "21"
source_date_epoch: 1730000000
build:
command: "./build/build.sh"
source_date_epoch: 1730000000
outputs:
artifact_path: outputs/binary.tar.gz
sbom_path: outputs/sbom.cdx.json
coverage_path: outputs/coverage.json
traces_dir: outputs/traces
test:
command: "./build/build.sh"
expected_coverage: []
expected_traces: []
env:
JAVA_TOOL_OPTIONS: "-ea"
ground_truth:
summary: "Guard blocks deserialization unless ALLOW_DESER=true"
evidence_files:
- "../benchmark/truth/java-spring-guarded.json"

View File

@@ -0,0 +1,8 @@
case_id: "java-spring-guarded:202"
entries:
http:
- id: "POST /api/upload"
route: "/api/upload"
method: "POST"
handler: "App.handleRequest"
description: "Base64 payload deserialization guarded by ALLOW_DESER"

View File

@@ -0,0 +1,12 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.stellaops.bench</groupId>
<artifactId>spring-guarded</artifactId>
<version>1.0.0</version>
<packaging>jar</packaging>
<properties>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
</properties>
</project>

View File

@@ -0,0 +1,29 @@
package bench.reachability;
import java.util.Map;
import java.util.Base64;
import java.io.*;
public class App {
// Deserialization sink guarded by feature flag
public static Response handleRequest(Map<String, String> body, Map<String, String> env) {
if (!"true".equals(env.getOrDefault("ALLOW_DESER", "false"))) {
return new Response(403, "forbidden");
}
String payload = body.get("payload");
if (payload == null) {
return new Response(400, "bad request");
}
try {
byte[] data = Base64.getDecoder().decode(payload);
ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(data));
Object obj = ois.readObject();
ois.close();
return new Response(200, obj.toString());
} catch (Exception ex) {
return new Response(500, ex.getClass().getSimpleName());
}
}
public record Response(int status, String body) {}
}

View File

@@ -0,0 +1,29 @@
package bench.reachability;
import java.io.*;
import java.util.*;
import java.util.Base64;
public class AppTest {
private static String serialize(Object obj) throws IOException {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(bos);
oos.writeObject(obj);
oos.close();
return Base64.getEncoder().encodeToString(bos.toByteArray());
}
public static void main(String[] args) throws Exception {
String payload = serialize("hi");
Map<String, String> body = Map.of("payload", payload);
Map<String, String> env = Map.of("ALLOW_DESER", "false");
var res = App.handleRequest(body, env);
assert res.status() == 403 : "status";
assert res.body().equals("forbidden") : "body";
File outDir = new File("outputs");
outDir.mkdirs();
try (FileWriter fw = new FileWriter(new File(outDir, "SINK_BLOCKED"))) {
fw.write("true");
}
}
}

View File

@@ -0,0 +1,38 @@
id: "js-express-eval:003"
language: js
project: express-eval
version: "1.0.0"
description: "Admin exec endpoint evaluates user code"
entrypoints:
- "POST /api/admin/exec"
sinks:
- id: "ExpressEval::exec"
path: "src/app.js::createServer"
kind: "process"
location:
file: src/app.js
line: 17
notes: "eval(code) on admin path"
environment:
os_image: "node:20-alpine"
runtime:
node: "20.11.0"
source_date_epoch: 1730000000
build:
command: "./build/build.sh"
source_date_epoch: 1730000000
outputs:
artifact_path: outputs/binary.tar.gz
sbom_path: outputs/sbom.cdx.json
coverage_path: outputs/coverage.json
traces_dir: outputs/traces
test:
command: "./tests/run-tests.sh"
expected_coverage:
- outputs/coverage.json
expected_traces:
- outputs/traces/traces.json
ground_truth:
summary: "Admin exec endpoint reachable and executes eval"
evidence_files:
- "../benchmark/truth/js-express-eval.json"

View File

@@ -0,0 +1,8 @@
case_id: "js-express-eval:003"
entries:
http:
- id: "POST /api/admin/exec"
route: "/api/admin/exec"
method: "POST"
handler: "createServer.exec"
description: "Admin-only exec (reachable)"

View File

@@ -0,0 +1,9 @@
{
"name": "rb-case-express-eval",
"version": "1.0.0",
"description": "Reachability benchmark case: express-like admin eval endpoint",
"license": "Apache-2.0",
"scripts": {
"test": "./tests/run-tests.sh"
}
}

View File

@@ -0,0 +1,34 @@
'use strict';
// Minimal express-like router.
function makeApp() {
const routes = {};
return {
post(path, handler) {
routes[`POST ${path}`] = handler;
},
handle(method, path, req, res) {
const key = `${method} ${path}`;
if (routes[key]) {
return routes[key](req, res);
}
return { status: 404, body: 'not found' };
}
};
}
function createServer() {
const app = makeApp();
app.post('/api/admin/exec', (req) => {
if (!req || typeof req.body?.code !== 'string') {
return { status: 400, body: 'bad request' };
}
// Sink: eval on admin endpoint (reachable)
// eslint-disable-next-line no-eval
const result = eval(req.body.code);
return { status: 200, body: String(result) };
});
return app;
}
module.exports = { createServer };

View File

@@ -0,0 +1,7 @@
#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")"
export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
export TZ=UTC
export LC_ALL=C
node test_reach.js

View File

@@ -0,0 +1,54 @@
'use strict';
const assert = require('assert');
const fs = require('fs');
const path = require('path');
const { createServer } = require('../src/app');
const OUT_DIR = path.resolve(__dirname, '../outputs');
const TRACE_DIR = path.join(OUT_DIR, 'traces');
const COVERAGE_FILE = path.join(OUT_DIR, 'coverage.json');
const TRACE_FILE = path.join(TRACE_DIR, 'traces.json');
function ensureDirs() {
fs.mkdirSync(OUT_DIR, { recursive: true });
fs.mkdirSync(TRACE_DIR, { recursive: true });
}
function recordTrace(entry, pathNodes) {
fs.writeFileSync(
TRACE_FILE,
JSON.stringify({
entry,
path: pathNodes,
sink: 'ExpressEval::exec',
notes: 'Admin exec reached'
}, null, 2)
);
}
function recordCoverage(filePath, lines) {
fs.writeFileSync(
COVERAGE_FILE,
JSON.stringify({
files: {
[filePath]: {
lines_covered: lines,
lines_total: 40
}
}
}, null, 2)
);
}
(function main() {
ensureDirs();
const app = createServer();
const res = app.handle('POST', '/api/admin/exec', { body: { code: '21*2' } });
assert.strictEqual(res.status, 200);
assert.strictEqual(res.body, '42');
recordTrace('POST /api/admin/exec', ['app.js::createServer', 'handler', 'eval(code)']);
recordCoverage('src/app.js', [5, 6, 7, 13, 18, 19]);
fs.writeFileSync(path.join(OUT_DIR, 'SINK_REACHED'), 'true');
})();

View File

@@ -0,0 +1,38 @@
id: "js-express-guarded:004"
language: js
project: express-guarded
version: "1.0.0"
description: "Admin exec guarded by ALLOW_EXEC flag; unreachable by default"
entrypoints:
- "POST /api/admin/exec"
sinks:
- id: "ExpressGuarded::exec"
path: "src/app.js::createServer"
kind: "process"
location:
file: src/app.js
line: 16
notes: "eval(code) gated by ALLOW_EXEC"
environment:
os_image: "node:20-alpine"
runtime:
node: "20.11.0"
source_date_epoch: 1730000000
build:
command: "./build/build.sh"
source_date_epoch: 1730000000
outputs:
artifact_path: outputs/binary.tar.gz
sbom_path: outputs/sbom.cdx.json
coverage_path: outputs/coverage.json
traces_dir: outputs/traces
test:
command: "./tests/run-tests.sh"
expected_coverage:
- outputs/coverage.json
expected_traces:
- outputs/traces/traces.json
ground_truth:
summary: "Guard prevents sink unless ALLOW_EXEC=true"
evidence_files:
- "../benchmark/truth/js-express-guarded.json"

View File

@@ -0,0 +1,8 @@
case_id: "js-express-guarded:004"
entries:
http:
- id: "POST /api/admin/exec"
route: "/api/admin/exec"
method: "POST"
handler: "createServer.exec"
description: "Admin exec blocked unless ALLOW_EXEC=true"

View File

@@ -0,0 +1,9 @@
{
"name": "rb-case-express-guarded",
"version": "1.0.0",
"description": "Reachability benchmark case: express-like admin exec guarded by env flag",
"license": "Apache-2.0",
"scripts": {
"test": "./tests/run-tests.sh"
}
}

View File

@@ -0,0 +1,33 @@
'use strict';
function makeApp() {
const routes = {};
return {
post(path, handler) {
routes[`POST ${path}`] = handler;
},
handle(method, path, req) {
const key = `${method} ${path}`;
if (routes[key]) return routes[key](req);
return { status: 404, body: 'not found' };
}
};
}
function createServer() {
const app = makeApp();
app.post('/api/admin/exec', (req) => {
if (req?.env?.ALLOW_EXEC !== 'true') {
return { status: 403, body: 'forbidden' };
}
if (typeof req?.body?.code !== 'string') {
return { status: 400, body: 'bad request' };
}
// eslint-disable-next-line no-eval
const result = eval(req.body.code);
return { status: 200, body: String(result) };
});
return app;
}
module.exports = { createServer };

View File

@@ -0,0 +1,7 @@
#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")"
export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
export TZ=UTC
export LC_ALL=C
node test_unreachable.js

View File

@@ -0,0 +1,53 @@
'use strict';
const assert = require('assert');
const fs = require('fs');
const path = require('path');
const { createServer } = require('../src/app');
const OUT_DIR = path.resolve(__dirname, '../outputs');
const TRACE_DIR = path.join(OUT_DIR, 'traces');
const COVERAGE_FILE = path.join(OUT_DIR, 'coverage.json');
const TRACE_FILE = path.join(TRACE_DIR, 'traces.json');
function ensureDirs() {
fs.mkdirSync(OUT_DIR, { recursive: true });
fs.mkdirSync(TRACE_DIR, { recursive: true });
}
function recordTrace(entry, pathNodes) {
fs.writeFileSync(
TRACE_FILE,
JSON.stringify({
entry,
path: pathNodes,
sink: 'ExpressGuarded::exec',
notes: 'Guard blocked sink'
}, null, 2)
);
}
function recordCoverage(filePath, lines) {
fs.writeFileSync(
COVERAGE_FILE,
JSON.stringify({
files: {
[filePath]: {
lines_covered: lines,
lines_total: 50
}
}
}, null, 2)
);
}
(function main() {
ensureDirs();
const app = createServer();
const res = app.handle('POST', '/api/admin/exec', { body: { code: '2+2' }, env: { ALLOW_EXEC: 'false' } });
assert.strictEqual(res.status, 403);
assert.strictEqual(res.body, 'forbidden');
recordTrace('POST /api/admin/exec', ['app.js::createServer', 'guard: ALLOW_EXEC!=true']);
recordCoverage('src/app.js', [5,6,7,12,13,14,15]);
})();

View File

@@ -0,0 +1,38 @@
id: "js-fastify-template:005"
language: js
project: fastify-template
version: "1.0.0"
description: "Template rendering route replaces user placeholder"
entrypoints:
- "POST /api/render"
sinks:
- id: "FastifyTemplate::render"
path: "src/app.js::createServer"
kind: "http"
location:
file: src/app.js
line: 19
notes: "Template rendering of user input"
environment:
os_image: "node:20-alpine"
runtime:
node: "20.11.0"
source_date_epoch: 1730000000
build:
command: "./build/build.sh"
source_date_epoch: 1730000000
outputs:
artifact_path: outputs/binary.tar.gz
sbom_path: outputs/sbom.cdx.json
coverage_path: outputs/coverage.json
traces_dir: outputs/traces
test:
command: "./tests/run-tests.sh"
expected_coverage:
- outputs/coverage.json
expected_traces:
- outputs/traces/traces.json
ground_truth:
summary: "Template rendering reachable via POST /api/render"
evidence_files:
- "../benchmark/truth/js-fastify-template.json"

View File

@@ -0,0 +1,8 @@
case_id: "js-fastify-template:005"
entries:
http:
- id: "POST /api/render"
route: "/api/render"
method: "POST"
handler: "createServer.render"
description: "Template rendering endpoint"

View File

@@ -0,0 +1,9 @@
{
"name": "rb-case-fastify-template",
"version": "1.0.0",
"description": "Reachability benchmark case: fastify-like template rendering",
"license": "Apache-2.0",
"scripts": {
"test": "./tests/run-tests.sh"
}
}

View File

@@ -0,0 +1,33 @@
'use strict';
// Simulated Fastify route registration for template injection.
function buildServer() {
const routes = {};
return {
post(path, handler) {
routes[`POST ${path}`] = handler;
},
inject(method, path, payload) {
const key = `${method} ${path}`;
const handler = routes[key];
if (!handler) return { status: 404, body: 'not found' };
return handler({ body: payload });
}
};
}
function createServer() {
const server = buildServer();
server.post('/api/render', (req) => {
const template = req?.body?.template;
if (typeof template !== 'string') {
return { status: 400, body: 'bad request' };
}
const compiled = template.replace('{{user}}', 'guest');
// Sink: writes rendered content to log (simulated SSR)
return { status: 200, body: compiled };
});
return server;
}
module.exports = { createServer };

View File

@@ -0,0 +1,7 @@
#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")"
export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
export TZ=UTC
export LC_ALL=C
node test_reach.js

View File

@@ -0,0 +1,54 @@
'use strict';
const assert = require('assert');
const fs = require('fs');
const path = require('path');
const { createServer } = require('../src/app');
const OUT_DIR = path.resolve(__dirname, '../outputs');
const TRACE_DIR = path.join(OUT_DIR, 'traces');
const COVERAGE_FILE = path.join(OUT_DIR, 'coverage.json');
const TRACE_FILE = path.join(TRACE_DIR, 'traces.json');
function ensureDirs() {
fs.mkdirSync(OUT_DIR, { recursive: true });
fs.mkdirSync(TRACE_DIR, { recursive: true });
}
function recordTrace(entry, pathNodes) {
fs.writeFileSync(
TRACE_FILE,
JSON.stringify({
entry,
path: pathNodes,
sink: 'FastifyTemplate::render',
notes: 'Template rendered with user input'
}, null, 2)
);
}
function recordCoverage(filePath, lines) {
fs.writeFileSync(
COVERAGE_FILE,
JSON.stringify({
files: {
[filePath]: {
lines_covered: lines,
lines_total: 45
}
}
}, null, 2)
);
}
(function main() {
ensureDirs();
const server = createServer();
const res = server.inject('POST', '/api/render', { template: 'Hello {{user}}' });
assert.strictEqual(res.status, 200);
assert.strictEqual(res.body, 'Hello guest');
recordTrace('POST /api/render', ['app.js::createServer', 'render template']);
recordCoverage('src/app.js', [5,6,7,13,18,20]);
fs.writeFileSync(path.join(OUT_DIR, 'SINK_REACHED'), 'true');
})();

View File

@@ -0,0 +1,38 @@
id: "js-guarded-eval:002"
language: js
project: guarded-eval
version: "1.0.0"
description: "Eval sink guarded by FEATURE_ENABLE flag; unreachable when flag is off"
entrypoints:
- "POST /api/exec"
sinks:
- id: "GuardedEval::handleRequest"
path: "src/app.js::handleRequest"
kind: "process"
location:
file: src/app.js
line: 13
notes: "eval on user input guarded by FEATURE_ENABLE"
environment:
os_image: "node:20-alpine"
runtime:
node: "20.11.0"
source_date_epoch: 1730000000
build:
command: "./build/build.sh"
source_date_epoch: 1730000000
outputs:
artifact_path: outputs/binary.tar.gz
sbom_path: outputs/sbom.cdx.json
coverage_path: outputs/coverage.json
traces_dir: outputs/traces
test:
command: "./tests/run-tests.sh"
expected_coverage:
- outputs/coverage.json
expected_traces:
- outputs/traces/traces.json
ground_truth:
summary: "Guard prevents sink when FEATURE_ENABLE != 1"
evidence_files:
- "../benchmark/truth/js-guarded-eval.json"

View File

@@ -0,0 +1,8 @@
case_id: "js-guarded-eval:002"
entries:
http:
- id: "POST /api/exec"
route: "/api/exec"
method: "POST"
handler: "app.js::handleRequest"
description: "Feature-flagged code execution endpoint"

View File

@@ -0,0 +1,9 @@
{
"name": "rb-case-guarded-eval",
"version": "1.0.0",
"description": "Reachability benchmark case: eval guarded by feature flag",
"license": "Apache-2.0",
"scripts": {
"test": "./tests/run-tests.sh"
}
}

View File

@@ -0,0 +1,19 @@
'use strict';
function handleRequest(body, env = process.env) {
if (env.FEATURE_ENABLE !== '1') {
return { status: 403, body: 'disabled' };
}
const code = body && body.code;
if (typeof code !== 'string') {
return { status: 400, body: 'bad request' };
}
// This sink is reachable only when FEATURE_ENABLE=1.
// eslint-disable-next-line no-eval
const result = eval(code);
return { status: 200, body: String(result) };
}
module.exports = { handleRequest };

View File

@@ -0,0 +1,7 @@
#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")"
export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
export TZ=UTC
export LC_ALL=C
node test_unreachable.js

View File

@@ -0,0 +1,54 @@
'use strict';
const assert = require('assert');
const fs = require('fs');
const path = require('path');
const { handleRequest } = require('../src/app');
const OUT_DIR = path.resolve(__dirname, '../outputs');
const TRACE_DIR = path.join(OUT_DIR, 'traces');
const COVERAGE_FILE = path.join(OUT_DIR, 'coverage.json');
const TRACE_FILE = path.join(TRACE_DIR, 'traces.json');
function ensureDirs() {
fs.mkdirSync(OUT_DIR, { recursive: true });
fs.mkdirSync(TRACE_DIR, { recursive: true });
}
function recordTrace(entry, pathNodes) {
fs.writeFileSync(
TRACE_FILE,
JSON.stringify({
entry,
path: pathNodes,
sink: 'GuardedEval::handleRequest',
notes: 'Guard prevented sink execution'
}, null, 2)
);
}
function recordCoverage(filePath, lines) {
fs.writeFileSync(
COVERAGE_FILE,
JSON.stringify({
files: {
[filePath]: {
lines_covered: lines,
lines_total: 32
}
}
}, null, 2)
);
}
(function main() {
ensureDirs();
const payload = { code: '1 + 2' };
const response = handleRequest(payload, { FEATURE_ENABLE: '0' });
assert.strictEqual(response.status, 403);
assert.strictEqual(response.body, 'disabled');
// Record that the guard path was taken; no SINK_REACHED marker is written.
recordTrace('POST /api/exec', ['app.js:handleRequest', 'guard: FEATURE_ENABLE != 1']);
recordCoverage('src/app.js', [5, 6, 7, 9, 10, 11]);
})();

View File

@@ -0,0 +1,38 @@
id: "js-unsafe-eval:001"
language: js
project: unsafe-eval
version: "1.0.0"
description: "Minimal handler with unsafe eval sink reachable via POST /api/exec"
entrypoints:
- "POST /api/exec"
sinks:
- id: "UnsafeEval::handleRequest"
path: "src/app.js::handleRequest"
kind: "process"
location:
file: src/app.js
line: 12
notes: "eval on user-controlled input"
environment:
os_image: "node:20-alpine"
runtime:
node: "20.11.0"
source_date_epoch: 1730000000
build:
command: "./build/build.sh"
source_date_epoch: 1730000000
outputs:
artifact_path: outputs/binary.tar.gz
sbom_path: outputs/sbom.cdx.json
coverage_path: outputs/coverage.json
traces_dir: outputs/traces
test:
command: "./tests/run-tests.sh"
expected_coverage:
- outputs/coverage.json
expected_traces:
- outputs/traces/traces.json
ground_truth:
summary: "Unit test triggers eval sink with payload {code: '1+2'}"
evidence_files:
- "../benchmark/truth/js-unsafe-eval.json"

View File

@@ -0,0 +1,8 @@
case_id: "js-unsafe-eval:001"
entries:
http:
- id: "POST /api/exec"
route: "/api/exec"
method: "POST"
handler: "app.js::handleRequest"
description: "Executes user-supplied code (unsafe eval)"

View File

@@ -0,0 +1,9 @@
{
"name": "rb-case-unsafe-eval",
"version": "1.0.0",
"description": "Reachability benchmark case: unsafe eval in minimal JS handler",
"license": "Apache-2.0",
"scripts": {
"test": "./tests/run-tests.sh"
}
}

View File

@@ -0,0 +1,17 @@
'use strict';
// Minimal HTTP-like handler exposing an unsafe eval sink for reachability.
// The handler is intentionally small to avoid external dependencies.
function handleRequest(body) {
const code = body && body.code;
if (typeof code !== 'string') {
return { status: 400, body: 'bad request' };
}
// Dangerous: executes user-controlled code. The test drives this sink.
// eslint-disable-next-line no-eval
const result = eval(code);
return { status: 200, body: String(result) };
}
module.exports = { handleRequest };

View File

@@ -0,0 +1,7 @@
#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")"
export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
export TZ=UTC
export LC_ALL=C
node test_reach.js

View File

@@ -0,0 +1,55 @@
'use strict';
const assert = require('assert');
const fs = require('fs');
const path = require('path');
const { handleRequest } = require('../src/app');
const OUT_DIR = path.resolve(__dirname, '../outputs');
const TRACE_DIR = path.join(OUT_DIR, 'traces');
const COVERAGE_FILE = path.join(OUT_DIR, 'coverage.json');
const TRACE_FILE = path.join(TRACE_DIR, 'traces.json');
function ensureDirs() {
fs.mkdirSync(OUT_DIR, { recursive: true });
fs.mkdirSync(TRACE_DIR, { recursive: true });
}
function recordTrace(entry, pathNodes) {
fs.writeFileSync(
TRACE_FILE,
JSON.stringify({
entry,
path: pathNodes,
sink: 'UnsafeEval::handleRequest',
notes: 'Test-driven dynamic trace'
}, null, 2)
);
}
function recordCoverage(filePath, lines) {
fs.writeFileSync(
COVERAGE_FILE,
JSON.stringify({
files: {
[filePath]: {
lines_covered: lines,
lines_total: 30
}
}
}, null, 2)
);
}
(function main() {
ensureDirs();
const payload = { code: '1 + 2' };
const response = handleRequest(payload);
assert.strictEqual(response.status, 200);
assert.strictEqual(response.body, '3');
recordTrace('POST /api/exec', ['app.js:handleRequest', 'eval(code)']);
recordCoverage('src/app.js', [5, 6, 7, 12, 15]);
// Marker file proves sink executed
fs.writeFileSync(path.join(OUT_DIR, 'SINK_REACHED'), 'true');
})();

View File

@@ -0,0 +1,38 @@
id: "py-django-ssti:105"
language: py
project: django-ssti
version: "1.0.0"
description: "Django-like template rendering (autoescape off) reachable"
entrypoints:
- "POST /render"
sinks:
- id: "DjangoSSTI::render"
path: "src/app.py::handle_request"
kind: "http"
location:
file: src/app.py
line: 5
notes: "template replace without escaping"
environment:
os_image: "python:3.12-alpine"
runtime:
python: "3.12"
source_date_epoch: 1730000000
build:
command: "./build/build.sh"
source_date_epoch: 1730000000
outputs:
artifact_path: outputs/binary.tar.gz
sbom_path: outputs/sbom.cdx.json
coverage_path: outputs/coverage.json
traces_dir: outputs/traces
test:
command: "./tests/run-tests.sh"
expected_coverage:
- outputs/coverage.json
expected_traces:
- outputs/traces/traces.json
ground_truth:
summary: "Template rendering reachable with autoescape off"
evidence_files:
- "../benchmark/truth/py-django-ssti.json"

View File

@@ -0,0 +1,8 @@
case_id: "py-django-ssti:105"
entries:
http:
- id: "POST /render"
route: "/render"
method: "POST"
handler: "app.handle_request"
description: "Template rendering with autoescape off"

View File

@@ -0,0 +1 @@
# stdlib only

View File

@@ -0,0 +1,12 @@
"""Django-like template rendering with autoescape off (reachable)."""
def render(template: str, context: dict) -> str:
# naive render; simulates autoescape off
return template.replace("{{user}}", context.get("user", "guest"))
def handle_request(body):
template = body.get("template") if isinstance(body, dict) else None
if not isinstance(template, str):
return {"status": 400, "body": "bad request"}
rendered = render(template, {"user": "guest"})
return {"status": 200, "body": rendered}

View File

@@ -0,0 +1,8 @@
#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")"
export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
export TZ=UTC
export LC_ALL=C
export PYTHONPATH="$(cd .. && pwd)/src"
python test_reach.py

View File

@@ -0,0 +1,48 @@
import json
import pathlib
from app import handle_request
ROOT = pathlib.Path(__file__).resolve().parent.parent
OUT = ROOT / "outputs"
TRACE_DIR = OUT / "traces"
COVERAGE_FILE = OUT / "coverage.json"
TRACE_FILE = TRACE_DIR / "traces.json"
def ensure_dirs():
OUT.mkdir(parents=True, exist_ok=True)
TRACE_DIR.mkdir(parents=True, exist_ok=True)
def record_trace(entry, path_nodes):
TRACE_FILE.write_text(
json.dumps({
"entry": entry,
"path": path_nodes,
"sink": "DjangoSSTI::render",
"notes": "Template rendered (autoescape off)"
}, indent=2)
)
def record_coverage(file_path, lines):
COVERAGE_FILE.write_text(
json.dumps({
"files": {
file_path: {
"lines_covered": lines,
"lines_total": 38
}
}
}, indent=2)
)
def test_reach():
ensure_dirs()
res = handle_request({"template": "Hello {{user}}"})
assert res["status"] == 200
assert res["body"] == "Hello guest"
record_trace("POST /render", ["app.py::handle_request", "render"])
record_coverage("src/app.py", [3,4,5,7,8,9,10])
(OUT / "SINK_REACHED").write_text("true")
if __name__ == "__main__":
test_reach()

View File

@@ -0,0 +1,38 @@
id: "py-fastapi-guarded:104"
language: py
project: fastapi-guarded
version: "1.0.0"
description: "FastAPI-like exec guarded by ALLOW_EXEC flag (unreachable by default)"
entrypoints:
- "POST /exec"
sinks:
- id: "FastApiGuarded::handle_request"
path: "src/app.py::handle_request"
kind: "process"
location:
file: src/app.py
line: 7
notes: "eval guarded by ALLOW_EXEC"
environment:
os_image: "python:3.12-alpine"
runtime:
python: "3.12"
source_date_epoch: 1730000000
build:
command: "./build/build.sh"
source_date_epoch: 1730000000
outputs:
artifact_path: outputs/binary.tar.gz
sbom_path: outputs/sbom.cdx.json
coverage_path: outputs/coverage.json
traces_dir: outputs/traces
test:
command: "./tests/run-tests.sh"
expected_coverage:
- outputs/coverage.json
expected_traces:
- outputs/traces/traces.json
ground_truth:
summary: "Guard blocks eval unless ALLOW_EXEC=true"
evidence_files:
- "../benchmark/truth/py-fastapi-guarded.json"

View File

@@ -0,0 +1,8 @@
case_id: "py-fastapi-guarded:104"
entries:
http:
- id: "POST /exec"
route: "/exec"
method: "POST"
handler: "app.handle_request"
description: "Exec guarded by ALLOW_EXEC"

View File

@@ -0,0 +1 @@
# stdlib only

View File

@@ -0,0 +1,11 @@
"""FastAPI-like handler with feature flag guarding exec."""
def handle_request(body, env=None):
env = env or {}
if env.get("ALLOW_EXEC") != "true":
return {"status": 403, "body": "forbidden"}
code = body.get("code") if isinstance(body, dict) else None
if not isinstance(code, str):
return {"status": 400, "body": "bad request"}
result = eval(code)
return {"status": 200, "body": str(result)}

View File

@@ -0,0 +1,8 @@
#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")"
export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
export TZ=UTC
export LC_ALL=C
export PYTHONPATH="$(cd .. && pwd)/src"
python test_unreachable.py

View File

@@ -0,0 +1,47 @@
import json
import pathlib
from app import handle_request
ROOT = pathlib.Path(__file__).resolve().parent.parent
OUT = ROOT / "outputs"
TRACE_DIR = OUT / "traces"
COVERAGE_FILE = OUT / "coverage.json"
TRACE_FILE = TRACE_DIR / "traces.json"
def ensure_dirs():
OUT.mkdir(parents=True, exist_ok=True)
TRACE_DIR.mkdir(parents=True, exist_ok=True)
def record_trace(entry, path_nodes):
TRACE_FILE.write_text(
json.dumps({
"entry": entry,
"path": path_nodes,
"sink": "FastApiGuarded::handle_request",
"notes": "Guard blocked eval"
}, indent=2)
)
def record_coverage(file_path, lines):
COVERAGE_FILE.write_text(
json.dumps({
"files": {
file_path: {
"lines_covered": lines,
"lines_total": 40
}
}
}, indent=2)
)
def test_unreachable():
ensure_dirs()
res = handle_request({"code": "10/2"}, env={"ALLOW_EXEC": "false"})
assert res["status"] == 403
assert res["body"] == "forbidden"
record_trace("POST /exec", ["app.py::handle_request", "guard: ALLOW_EXEC!=true"])
record_coverage("src/app.py", [3,4,5,8,9,11])
if __name__ == "__main__":
test_unreachable()

View File

@@ -0,0 +1,38 @@
id: "py-flask-template:103"
language: py
project: flask-template
version: "1.0.0"
description: "Template rendering reachable via POST /render"
entrypoints:
- "POST /render"
sinks:
- id: "FlaskTemplate::render"
path: "src/app.py::handle_request"
kind: "http"
location:
file: src/app.py
line: 5
notes: "template replace on user input"
environment:
os_image: "python:3.12-alpine"
runtime:
python: "3.12"
source_date_epoch: 1730000000
build:
command: "./build/build.sh"
source_date_epoch: 1730000000
outputs:
artifact_path: outputs/binary.tar.gz
sbom_path: outputs/sbom.cdx.json
coverage_path: outputs/coverage.json
traces_dir: outputs/traces
test:
command: "./tests/run-tests.sh"
expected_coverage:
- outputs/coverage.json
expected_traces:
- outputs/traces/traces.json
ground_truth:
summary: "Template rendering reachable"
evidence_files:
- "../benchmark/truth/py-flask-template.json"

View File

@@ -0,0 +1,8 @@
case_id: "py-flask-template:103"
entries:
http:
- id: "POST /render"
route: "/render"
method: "POST"
handler: "app.handle_request"
description: "Template rendering"

View File

@@ -0,0 +1 @@
# stdlib only for this minimal case

View File

@@ -0,0 +1,12 @@
"""Minimal flask-like template rendering sink (reachable)."""
def render(template: str, context: dict) -> str:
return template.replace("{{name}}", context.get("name", "guest"))
def handle_request(body):
template = body.get("template") if isinstance(body, dict) else None
if not isinstance(template, str):
return {"status": 400, "body": "bad request"}
rendered = render(template, {"name": "guest"})
# Sink: returns rendered template (models potential SSTI)
return {"status": 200, "body": rendered}

View File

@@ -0,0 +1,8 @@
#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")"
export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
export TZ=UTC
export LC_ALL=C
export PYTHONPATH="$(cd .. && pwd)/src"
python test_reach.py

View File

@@ -0,0 +1,48 @@
import json
import pathlib
from app import handle_request
ROOT = pathlib.Path(__file__).resolve().parent.parent
OUT = ROOT / "outputs"
TRACE_DIR = OUT / "traces"
COVERAGE_FILE = OUT / "coverage.json"
TRACE_FILE = TRACE_DIR / "traces.json"
def ensure_dirs():
OUT.mkdir(parents=True, exist_ok=True)
TRACE_DIR.mkdir(parents=True, exist_ok=True)
def record_trace(entry, path_nodes):
TRACE_FILE.write_text(
json.dumps({
"entry": entry,
"path": path_nodes,
"sink": "FlaskTemplate::render",
"notes": "Template rendered"
}, indent=2)
)
def record_coverage(file_path, lines):
COVERAGE_FILE.write_text(
json.dumps({
"files": {
file_path: {
"lines_covered": lines,
"lines_total": 40
}
}
}, indent=2)
)
def test_reach():
ensure_dirs()
res = handle_request({"template": "Hello {{name}}"})
assert res["status"] == 200
assert res["body"] == "Hello guest"
record_trace("POST /render", ["app.py::handle_request", "render"])
record_coverage("src/app.py", [4,5,6,8,9,10,11])
(OUT / "SINK_REACHED").write_text("true")
if __name__ == "__main__":
test_reach()

View File

@@ -0,0 +1,38 @@
id: "py-guarded-exec:102"
language: py
project: guarded-exec
version: "1.0.0"
description: "Python eval guarded by FEATURE_ENABLE flag; unreachable by default"
entrypoints:
- "POST /api/exec"
sinks:
- id: "PyGuardedExec::handle_request"
path: "src/app.py::handle_request"
kind: "process"
location:
file: src/app.py
line: 7
notes: "eval guarded by FEATURE_ENABLE"
environment:
os_image: "python:3.12-alpine"
runtime:
python: "3.12"
source_date_epoch: 1730000000
build:
command: "./build/build.sh"
source_date_epoch: 1730000000
outputs:
artifact_path: outputs/binary.tar.gz
sbom_path: outputs/sbom.cdx.json
coverage_path: outputs/coverage.json
traces_dir: outputs/traces
test:
command: "./tests/run-tests.sh"
expected_coverage:
- outputs/coverage.json
expected_traces:
- outputs/traces/traces.json
ground_truth:
summary: "Guard blocks eval when FEATURE_ENABLE != 1"
evidence_files:
- "../benchmark/truth/py-guarded-exec.json"

View File

@@ -0,0 +1,8 @@
case_id: "py-guarded-exec:102"
entries:
http:
- id: "POST /api/exec"
route: "/api/exec"
method: "POST"
handler: "app.handle_request"
description: "Eval guarded by FEATURE_ENABLE"

View File

@@ -0,0 +1 @@
# Intentionally empty; stdlib only.

View File

@@ -0,0 +1,13 @@
"""Python handler with feature-flag guard for eval sink."""
def handle_request(body, env=None):
env = env or {}
if env.get("FEATURE_ENABLE") != "1":
return {"status": 403, "body": "disabled"}
code = body.get("code") if isinstance(body, dict) else None
if not isinstance(code, str):
return {"status": 400, "body": "bad request"}
result = eval(code)
return {"status": 200, "body": str(result)}

View File

@@ -0,0 +1,8 @@
#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")"
export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
export TZ=UTC
export LC_ALL=C
export PYTHONPATH="$(cd .. && pwd)/src"
python test_unreachable.py

View File

@@ -0,0 +1,48 @@
import json
import os
import pathlib
from app import handle_request
ROOT = pathlib.Path(__file__).resolve().parent.parent
OUT = ROOT / "outputs"
TRACE_DIR = OUT / "traces"
COVERAGE_FILE = OUT / "coverage.json"
TRACE_FILE = TRACE_DIR / "traces.json"
def ensure_dirs():
OUT.mkdir(parents=True, exist_ok=True)
TRACE_DIR.mkdir(parents=True, exist_ok=True)
def record_trace(entry, path_nodes):
TRACE_FILE.write_text(
json.dumps({
"entry": entry,
"path": path_nodes,
"sink": "PyGuardedExec::handle_request",
"notes": "Guard blocked eval"
}, indent=2)
)
def record_coverage(file_path, lines):
COVERAGE_FILE.write_text(
json.dumps({
"files": {
file_path: {
"lines_covered": lines,
"lines_total": 34
}
}
}, indent=2)
)
def test_unreachable():
ensure_dirs()
res = handle_request({"code": "5*5"}, env={"FEATURE_ENABLE": "0"})
assert res["status"] == 403
assert res["body"] == "disabled"
record_trace("POST /api/exec", ["app.py::handle_request", "guard: FEATURE_ENABLE != 1"])
record_coverage("src/app.py", [3,4,5,8,9,11])
if __name__ == "__main__":
test_unreachable()

View File

@@ -0,0 +1,38 @@
id: "py-unsafe-exec:101"
language: py
project: unsafe-exec
version: "1.0.0"
description: "Python handler with reachable eval sink"
entrypoints:
- "POST /api/exec"
sinks:
- id: "PyUnsafeExec::handle_request"
path: "src/app.py::handle_request"
kind: "process"
location:
file: src/app.py
line: 8
notes: "eval on user input"
environment:
os_image: "python:3.12-alpine"
runtime:
python: "3.12"
source_date_epoch: 1730000000
build:
command: "./build/build.sh"
source_date_epoch: 1730000000
outputs:
artifact_path: outputs/binary.tar.gz
sbom_path: outputs/sbom.cdx.json
coverage_path: outputs/coverage.json
traces_dir: outputs/traces
test:
command: "./tests/run-tests.sh"
expected_coverage:
- outputs/coverage.json
expected_traces:
- outputs/traces/traces.json
ground_truth:
summary: "Eval reachable via POST /api/exec"
evidence_files:
- "../benchmark/truth/py-unsafe-exec.json"

View File

@@ -0,0 +1,8 @@
case_id: "py-unsafe-exec:101"
entries:
http:
- id: "POST /api/exec"
route: "/api/exec"
method: "POST"
handler: "app.handle_request"
description: "Executes user code via eval"

View File

@@ -0,0 +1 @@
# Intentionally empty; uses stdlib only.

View File

@@ -0,0 +1,10 @@
"""Minimal Python handler with an unsafe eval sink."""
def handle_request(body):
code = body.get("code") if isinstance(body, dict) else None
if not isinstance(code, str):
return {"status": 400, "body": "bad request"}
# Sink: eval on user input (reachable)
result = eval(code)
return {"status": 200, "body": str(result)}

View File

@@ -0,0 +1,8 @@
#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")"
export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
export TZ=UTC
export LC_ALL=C
export PYTHONPATH="$(cd .. && pwd)/src"
python test_reach.py

View File

@@ -0,0 +1,54 @@
import json
import os
import pathlib
from app import handle_request
ROOT = pathlib.Path(__file__).resolve().parent.parent
OUT = ROOT / "outputs"
TRACE_DIR = OUT / "traces"
COVERAGE_FILE = OUT / "coverage.json"
TRACE_FILE = TRACE_DIR / "traces.json"
def ensure_dirs():
OUT.mkdir(parents=True, exist_ok=True)
TRACE_DIR.mkdir(parents=True, exist_ok=True)
def record_trace(entry, path_nodes):
TRACE_FILE.write_text(
json.dumps({
"entry": entry,
"path": path_nodes,
"sink": "PyUnsafeExec::handle_request",
"notes": "Eval reached"
}, indent=2)
)
def record_coverage(file_path, lines):
COVERAGE_FILE.write_text(
json.dumps({
"files": {
file_path: {
"lines_covered": lines,
"lines_total": 30
}
}
}, indent=2)
)
def test_reach():
ensure_dirs()
res = handle_request({"code": "3*7"})
assert res["status"] == 200
assert res["body"] == "21"
record_trace("POST /api/exec", ["app.py::handle_request", "eval(code)"])
record_coverage("src/app.py", [3, 4, 5, 8, 10])
(OUT / "SINK_REACHED").write_text("true")
if __name__ == "__main__":
test_reach()

View File

@@ -1,11 +1,34 @@
# rb-score (placeholder)
# rb-score
Planned CLI to score reachability submissions against truth sets.
Deterministic scorer for the reachability benchmark.
Future work (BENCH-SCORER-513-008):
- Validate submission against `schemas/submission.schema.json`.
- Validate truth against `schemas/truth.schema.json`.
- Compute precision/recall/F1, explainability score (0-3), runtime stats, determinism rate.
- Emit JSON report with stable ordering.
## What it does
- Validates submissions against `schemas/submission.schema.json` and truth against `schemas/truth.schema.json`.
- Computes precision/recall/F1 (micro, sink-level).
- Computes explainability score per prediction (03) and averages it.
- Checks duplicate predictions for determinism (inconsistent duplicates lower the rate).
- Surfaces runtime metadata from the submission (`run` block).
For now this folder is a stub; implementation will be added in task 513-008 once schemas stabilize.
## Install (offline-friendly)
```bash
python -m pip install -r requirements.txt
```
## Usage
```bash
./rb_score.py --truth ../../benchmark/truth/public.json --submission ../../benchmark/submissions/sample.json --format json
```
## Output
- `text` (default): short human-readable summary.
- `json`: deterministic JSON with top-level metrics and per-case breakdown.
## Tests
```bash
python -m unittest tests/test_scoring.py
```
## Notes
- Predictions for sinks not present in truth count as false positives (strict posture).
- Truth sinks with label `unknown` are ignored for FN/FP counting.
- Explainability tiering: 0=no context; 1=path>=2 nodes; 2=entry + path>=3; 3=guards present.

View File

@@ -0,0 +1,3 @@
from . import rb_score
__all__ = ["rb_score"]

View File

@@ -0,0 +1,4 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
python3 "$SCRIPT_DIR/rb_score.py" "$@"

View File

@@ -0,0 +1,258 @@
#!/usr/bin/env python3
"""rb-score: deterministic scorer for reachability benchmark submissions.
Features (task BENCH-SCORER-513-008):
- Validate submission and truth against published schemas.
- Compute precision / recall / F1 at sink level (micro-averaged).
- Compute explainability score per prediction (03) and average.
- Surface runtime stats from submission metadata.
- Emit deterministic JSON or human-readable text.
Assumptions:
- Truth labels may include "unknown"; these are skipped for FN/FP.
- A prediction for a sink absent in truth counts as FP (strict posture).
- Duplicate predictions for the same sink must agree; disagreement reduces determinism rate.
"""
from __future__ import annotations
import argparse
import json
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Iterable, List, Tuple
import yaml
from jsonschema import Draft202012Validator
ROOT = Path(__file__).resolve().parents[1]
SCHEMAS = {
"truth": ROOT / "schemas" / "truth.schema.json",
"submission": ROOT / "schemas" / "submission.schema.json",
}
@dataclass
class CaseMetrics:
case_id: str
tp: int
fp: int
fn: int
precision: float
recall: float
f1: float
explain_avg: float
@dataclass
class ScoreReport:
precision: float
recall: float
f1: float
tp: int
fp: int
fn: int
explain_avg: float
determinism_rate: float
runtime: Dict[str, object]
cases: List[CaseMetrics]
def load_json_or_yaml(path: Path):
text = path.read_text(encoding="utf-8")
if path.suffix.lower() in {".yaml", ".yml"}:
return yaml.safe_load(text)
return json.loads(text)
def validate_against(schema_path: Path, payload) -> Tuple[bool, List[str]]:
schema = load_json_or_yaml(schema_path)
validator = Draft202012Validator(schema)
errors = sorted(validator.iter_errors(payload), key=lambda e: e.path)
if not errors:
return True, []
return False, [f"{'/'.join(str(p) for p in err.path) or '<root>'}: {err.message}" for err in errors]
def safe_div(num: int, denom: int, default: float) -> float:
if denom == 0:
return default
return num / denom
def explain_score(pred: dict) -> int:
expl = pred.get("explain") or {}
path = expl.get("path") or []
entry = expl.get("entry")
guards = expl.get("guards") or []
if guards:
return 3
if entry and len(path) >= 3:
return 2
if len(path) >= 2:
return 1
return 0
def determinism_rate(preds: Iterable[dict]) -> float:
"""Detect inconsistent duplicate predictions for the same sink."""
by_sink: Dict[str, set] = {}
total_groups = 0
consistent_groups = 0
for pred in preds:
sink_id = pred.get("sink_id")
if sink_id is None:
continue
by_sink.setdefault(sink_id, set()).add(pred.get("prediction"))
for values in by_sink.values():
total_groups += 1
if len(values) == 1:
consistent_groups += 1
if total_groups == 0:
return 1.0
return consistent_groups / total_groups
def score_case(case_id: str, truth_sinks: Dict[str, str], predicted: List[dict]) -> CaseMetrics:
truth_reach = {sid for sid, label in truth_sinks.items() if label == "reachable"}
truth_unreach = {sid for sid, label in truth_sinks.items() if label == "unreachable"}
pred_reach = {p["sink_id"] for p in predicted if p.get("prediction") == "reachable"}
tp = len(pred_reach & truth_reach)
fp = len(pred_reach - truth_reach)
fn = len(truth_reach - pred_reach)
precision = safe_div(tp, tp + fp, 1.0)
recall = safe_div(tp, tp + fn, 1.0)
f1 = 0.0 if (precision + recall) == 0 else 2 * precision * recall / (precision + recall)
explain_scores = [explain_score(p) for p in predicted]
explain_avg = safe_div(sum(explain_scores), len(explain_scores), 0.0)
return CaseMetrics(case_id, tp, fp, fn, precision, recall, f1, explain_avg)
def aggregate(cases: List[CaseMetrics], preds: List[dict]) -> ScoreReport:
tp = sum(c.tp for c in cases)
fp = sum(c.fp for c in cases)
fn = sum(c.fn for c in cases)
precision = safe_div(tp, tp + fp, 1.0)
recall = safe_div(tp, tp + fn, 1.0)
f1 = 0.0 if (precision + recall) == 0 else 2 * precision * recall / (precision + recall)
explain_avg = safe_div(sum(c.explain_avg for c in cases), len(cases), 0.0) if cases else 0.0
det_rate = determinism_rate(preds)
runtime = {}
return ScoreReport(precision, recall, f1, tp, fp, fn, explain_avg, det_rate, runtime, cases)
def build_truth_index(truth_doc: dict) -> Dict[str, Dict[str, str]]:
index: Dict[str, Dict[str, str]] = {}
for case in truth_doc.get("cases", []):
sinks = {s["sink_id"]: s["label"] for s in case.get("sinks", [])}
index[case["case_id"]] = sinks
return index
def score(truth_doc: dict, submission_doc: dict) -> ScoreReport:
truth_index = build_truth_index(truth_doc)
cases_metrics: List[CaseMetrics] = []
all_preds: List[dict] = []
for sub_case in submission_doc.get("cases", []):
case_id = sub_case.get("case_id")
predicted_sinks = sub_case.get("sinks") or []
all_preds.extend(predicted_sinks)
truth_sinks = truth_index.get(case_id, {})
case_metrics = score_case(case_id, truth_sinks, predicted_sinks)
cases_metrics.append(case_metrics)
report = aggregate(cases_metrics, all_preds)
report.runtime = submission_doc.get("run", {})
return report
def report_as_dict(report: ScoreReport) -> dict:
return {
"version": "1.0.0",
"metrics": {
"precision": round(report.precision, 4),
"recall": round(report.recall, 4),
"f1": round(report.f1, 4),
"tp": report.tp,
"fp": report.fp,
"fn": report.fn,
"determinism_rate": round(report.determinism_rate, 4),
"explainability_avg": round(report.explain_avg, 4),
},
"runtime": report.runtime,
"cases": [
{
"case_id": c.case_id,
"precision": round(c.precision, 4),
"recall": round(c.recall, 4),
"f1": round(c.f1, 4),
"tp": c.tp,
"fp": c.fp,
"fn": c.fn,
"explainability_avg": round(c.explain_avg, 4),
}
for c in report.cases
],
}
def format_text(report: ScoreReport) -> str:
lines = []
lines.append("rb-score summary")
lines.append(f" precision {report.precision:.4f} recall {report.recall:.4f} f1 {report.f1:.4f}")
lines.append(f" tp {report.tp} fp {report.fp} fn {report.fn} determinism {report.determinism_rate:.4f} explain_avg {report.explain_avg:.4f}")
if report.runtime:
rt = report.runtime
lines.append(" runtime: " + ", ".join(f"{k}={v}" for k, v in sorted(rt.items())))
lines.append(" cases:")
for c in report.cases:
lines.append(
f" - {c.case_id}: P {c.precision:.4f} R {c.recall:.4f} F1 {c.f1:.4f} tp {c.tp} fp {c.fp} fn {c.fn} explain_avg {c.explain_avg:.4f}"
)
return "\n".join(lines)
def parse_args(argv: List[str]) -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Score reachability benchmark submissions")
parser.add_argument("--truth", required=True, help="Path to truth JSON")
parser.add_argument("--submission", required=True, help="Path to submission JSON")
parser.add_argument("--format", choices=["json", "text"], default="text", help="Output format")
return parser.parse_args(argv)
def main(argv: List[str]) -> int:
args = parse_args(argv)
truth_path = Path(args.truth)
submission_path = Path(args.submission)
if not truth_path.exists() or not submission_path.exists():
print("truth or submission file not found", file=sys.stderr)
return 2
truth_doc = load_json_or_yaml(truth_path)
submission_doc = load_json_or_yaml(submission_path)
ok_truth, truth_errs = validate_against(SCHEMAS["truth"], truth_doc)
ok_sub, sub_errs = validate_against(SCHEMAS["submission"], submission_doc)
if not ok_truth or not ok_sub:
for msg in truth_errs + sub_errs:
print(f"validation_error: {msg}", file=sys.stderr)
return 3
report = score(truth_doc, submission_doc)
if args.format == "json":
print(json.dumps(report_as_dict(report), sort_keys=True, indent=2))
else:
print(format_text(report))
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))

View File

@@ -0,0 +1,2 @@
jsonschema==4.23.0
PyYAML==6.0.2

View File

@@ -0,0 +1,70 @@
import json
import importlib.util
import unittest
from pathlib import Path
ROOT = Path(__file__).resolve().parents[3] # bench/reachability-benchmark
SCORER_PATH = ROOT / "tools" / "scorer" / "rb_score.py"
def load_module():
spec = importlib.util.spec_from_file_location("rb_score", SCORER_PATH)
module = importlib.util.module_from_spec(spec)
assert spec.loader
import sys
sys.modules[spec.name] = module
spec.loader.exec_module(module) # type: ignore[attr-defined]
return module
def load_example(name: str):
return json.loads((ROOT / "schemas" / "examples" / name).read_text())
rb_score = load_module()
class TestScoring(unittest.TestCase):
def test_score_perfect_prediction(self):
truth = load_example("truth.sample.json")
submission = load_example("submission.sample.json")
report = rb_score.score(truth, submission)
self.assertEqual(report.tp, 1)
self.assertEqual(report.fp, 0)
self.assertEqual(report.fn, 0)
self.assertEqual(report.precision, 1.0)
self.assertEqual(report.recall, 1.0)
self.assertEqual(report.f1, 1.0)
self.assertGreaterEqual(report.explain_avg, 1.0)
self.assertEqual(report.determinism_rate, 1.0)
def test_score_false_negative_and_fp(self):
truth = load_example("truth.sample.json")
submission = {
"version": "1.0.0",
"tool": {"name": "tool", "version": "1"},
"run": {"platform": "ubuntu"},
"cases": [
{
"case_id": "js-express-blog:001",
"sinks": [
{"sink_id": "Deserializer::parse", "prediction": "unreachable"},
{"sink_id": "Fake::sink", "prediction": "reachable"},
],
}
],
}
report = rb_score.score(truth, submission)
self.assertEqual(report.tp, 0)
self.assertEqual(report.fp, 1)
self.assertEqual(report.fn, 1)
self.assertEqual(report.precision, 0.0)
self.assertEqual(report.recall, 0.0)
self.assertEqual(report.f1, 0.0)
self.assertEqual(report.determinism_rate, 1.0)
if __name__ == "__main__":
unittest.main()