feat: Implement Filesystem and MongoDB provenance writers for PackRun execution context
- Added `FilesystemPackRunProvenanceWriter` to write provenance manifests to the filesystem. - Introduced `MongoPackRunArtifactReader` to read artifacts from MongoDB. - Created `MongoPackRunProvenanceWriter` to store provenance manifests in MongoDB. - Developed unit tests for filesystem and MongoDB provenance writers. - Established `ITimelineEventStore` and `ITimelineIngestionService` interfaces for timeline event handling. - Implemented `TimelineIngestionService` to validate and persist timeline events with hashing. - Created PostgreSQL schema and migration scripts for timeline indexing. - Added dependency injection support for timeline indexer services. - Developed tests for timeline ingestion and schema validation.
This commit is contained in:
@@ -17,6 +17,24 @@ Deterministic, reproducible benchmark for reachability analysis tools.
|
||||
- `ci/` — deterministic CI workflows and scripts.
|
||||
- `website/` — static site (leaderboard/docs/downloads).
|
||||
|
||||
Sample cases added (JS track):
|
||||
- `cases/js/unsafe-eval` (reachable sink) → `benchmark/truth/js-unsafe-eval.json`.
|
||||
- `cases/js/guarded-eval` (unreachable by default) → `benchmark/truth/js-guarded-eval.json`.
|
||||
- `cases/js/express-eval` (admin eval reachable) → `benchmark/truth/js-express-eval.json`.
|
||||
- `cases/js/express-guarded` (admin eval gated by env) → `benchmark/truth/js-express-guarded.json`.
|
||||
- `cases/js/fastify-template` (template rendering reachable) → `benchmark/truth/js-fastify-template.json`.
|
||||
|
||||
Sample cases added (Python track):
|
||||
- `cases/py/unsafe-exec` (reachable eval) → `benchmark/truth/py-unsafe-exec.json`.
|
||||
- `cases/py/guarded-exec` (unreachable when FEATURE_ENABLE != 1) → `benchmark/truth/py-guarded-exec.json`.
|
||||
- `cases/py/flask-template` (template rendering reachable) → `benchmark/truth/py-flask-template.json`.
|
||||
- `cases/py/fastapi-guarded` (unreachable unless ALLOW_EXEC=true) → `benchmark/truth/py-fastapi-guarded.json`.
|
||||
- `cases/py/django-ssti` (template rendering reachable, autoescape off) → `benchmark/truth/py-django-ssti.json`.
|
||||
|
||||
Sample cases added (Java track):
|
||||
- `cases/java/spring-deserialize` (reachable Java deserialization) → `benchmark/truth/java-spring-deserialize.json`.
|
||||
- `cases/java/spring-guarded` (deserialization unreachable unless ALLOW_DESER=true) → `benchmark/truth/java-spring-guarded.json`.
|
||||
|
||||
## Determinism & Offline Rules
|
||||
- No network during build/test; pin images/deps; set `SOURCE_DATE_EPOCH`.
|
||||
- Sort file lists; stable JSON/YAML emitters; fixed RNG seeds.
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "java-spring-deserialize:201",
|
||||
"case_version": "1.0.0",
|
||||
"notes": "Java deserialization sink reachable",
|
||||
"sinks": [
|
||||
{
|
||||
"sink_id": "JavaDeserialize::handleRequest",
|
||||
"label": "reachable",
|
||||
"confidence": "high",
|
||||
"dynamic_evidence": {
|
||||
"covered_by_tests": [
|
||||
"src/AppTest.java"
|
||||
],
|
||||
"coverage_files": []
|
||||
},
|
||||
"static_evidence": {
|
||||
"call_path": [
|
||||
"POST /api/upload",
|
||||
"App.handleRequest",
|
||||
"ObjectInputStream.readObject"
|
||||
]
|
||||
},
|
||||
"config_conditions": [],
|
||||
"notes": "No guard; base64 payload deserialized"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "java-spring-guarded:202",
|
||||
"case_version": "1.0.0",
|
||||
"notes": "Deserialization unreachable by default",
|
||||
"sinks": [
|
||||
{
|
||||
"sink_id": "JavaDeserializeGuarded::handleRequest",
|
||||
"label": "unreachable",
|
||||
"confidence": "high",
|
||||
"dynamic_evidence": {
|
||||
"covered_by_tests": ["src/AppTest.java"],
|
||||
"coverage_files": []
|
||||
},
|
||||
"static_evidence": {
|
||||
"call_path": [
|
||||
"POST /api/upload",
|
||||
"App.handleRequest",
|
||||
"guard: ALLOW_DESER!=true"
|
||||
]
|
||||
},
|
||||
"config_conditions": ["ALLOW_DESER == 'true'"]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "js-express-eval:003",
|
||||
"case_version": "1.0.0",
|
||||
"notes": "Admin eval reachable",
|
||||
"sinks": [
|
||||
{
|
||||
"sink_id": "ExpressEval::exec",
|
||||
"label": "reachable",
|
||||
"confidence": "high",
|
||||
"dynamic_evidence": {
|
||||
"covered_by_tests": [
|
||||
"tests/test_reach.js"
|
||||
],
|
||||
"coverage_files": [
|
||||
"outputs/coverage.json"
|
||||
]
|
||||
},
|
||||
"static_evidence": {
|
||||
"call_path": [
|
||||
"POST /api/admin/exec",
|
||||
"createServer.exec",
|
||||
"eval(code)"
|
||||
]
|
||||
},
|
||||
"config_conditions": [],
|
||||
"notes": "No guard on admin path"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "js-express-guarded:004",
|
||||
"case_version": "1.0.0",
|
||||
"notes": "Admin exec unreachable when ALLOW_EXEC!=true",
|
||||
"sinks": [
|
||||
{
|
||||
"sink_id": "ExpressGuarded::exec",
|
||||
"label": "unreachable",
|
||||
"confidence": "high",
|
||||
"dynamic_evidence": {
|
||||
"covered_by_tests": [
|
||||
"tests/test_unreachable.js"
|
||||
],
|
||||
"coverage_files": [
|
||||
"outputs/coverage.json"
|
||||
]
|
||||
},
|
||||
"static_evidence": {
|
||||
"call_path": [
|
||||
"POST /api/admin/exec",
|
||||
"createServer.exec",
|
||||
"guard: ALLOW_EXEC!=true"
|
||||
]
|
||||
},
|
||||
"config_conditions": [
|
||||
"ALLOW_EXEC == 'true'"
|
||||
],
|
||||
"notes": "Only reachable when ALLOW_EXEC=true"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "js-fastify-template:005",
|
||||
"case_version": "1.0.0",
|
||||
"notes": "Template rendering reachable",
|
||||
"sinks": [
|
||||
{
|
||||
"sink_id": "FastifyTemplate::render",
|
||||
"label": "reachable",
|
||||
"confidence": "high",
|
||||
"dynamic_evidence": {
|
||||
"covered_by_tests": [
|
||||
"tests/test_reach.js"
|
||||
],
|
||||
"coverage_files": [
|
||||
"outputs/coverage.json"
|
||||
]
|
||||
},
|
||||
"static_evidence": {
|
||||
"call_path": [
|
||||
"POST /api/render",
|
||||
"createServer.render",
|
||||
"template replace"
|
||||
]
|
||||
},
|
||||
"config_conditions": [],
|
||||
"notes": "Simple template replace used as sink"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "js-guarded-eval:002",
|
||||
"case_version": "1.0.0",
|
||||
"notes": "Eval sink guarded by FEATURE_ENABLE; unreachable when flag off",
|
||||
"sinks": [
|
||||
{
|
||||
"sink_id": "GuardedEval::handleRequest",
|
||||
"label": "unreachable",
|
||||
"confidence": "high",
|
||||
"dynamic_evidence": {
|
||||
"covered_by_tests": [
|
||||
"tests/test_unreachable.js"
|
||||
],
|
||||
"coverage_files": [
|
||||
"outputs/coverage.json"
|
||||
]
|
||||
},
|
||||
"static_evidence": {
|
||||
"call_path": [
|
||||
"POST /api/exec",
|
||||
"app.js::handleRequest",
|
||||
"guard: FEATURE_ENABLE != 1"
|
||||
]
|
||||
},
|
||||
"config_conditions": [
|
||||
"FEATURE_ENABLE == '1'"
|
||||
],
|
||||
"notes": "Sink only executes when FEATURE_ENABLE=1"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "js-unsafe-eval:001",
|
||||
"case_version": "1.0.0",
|
||||
"notes": "Unsafe eval sink reachable via POST /api/exec",
|
||||
"sinks": [
|
||||
{
|
||||
"sink_id": "UnsafeEval::handleRequest",
|
||||
"label": "reachable",
|
||||
"confidence": "high",
|
||||
"dynamic_evidence": {
|
||||
"covered_by_tests": [
|
||||
"tests/test_reach.js"
|
||||
],
|
||||
"coverage_files": [
|
||||
"outputs/coverage.json"
|
||||
]
|
||||
},
|
||||
"static_evidence": {
|
||||
"call_path": [
|
||||
"POST /api/exec",
|
||||
"app.js::handleRequest",
|
||||
"eval(code)"
|
||||
]
|
||||
},
|
||||
"config_conditions": [],
|
||||
"notes": "No guards; direct eval on user input"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "py-django-ssti:105",
|
||||
"case_version": "1.0.0",
|
||||
"notes": "Template rendering reachable (autoescape off)",
|
||||
"sinks": [
|
||||
{
|
||||
"sink_id": "DjangoSSTI::render",
|
||||
"label": "reachable",
|
||||
"confidence": "high",
|
||||
"dynamic_evidence": {
|
||||
"covered_by_tests": [
|
||||
"tests/test_reach.py"
|
||||
],
|
||||
"coverage_files": [
|
||||
"outputs/coverage.json"
|
||||
]
|
||||
},
|
||||
"static_evidence": {
|
||||
"call_path": [
|
||||
"POST /render",
|
||||
"app.handle_request",
|
||||
"render"
|
||||
]
|
||||
},
|
||||
"config_conditions": [],
|
||||
"notes": "Autoescape disabled"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "py-fastapi-guarded:104",
|
||||
"case_version": "1.0.0",
|
||||
"notes": "Eval unreachable unless ALLOW_EXEC=true",
|
||||
"sinks": [
|
||||
{
|
||||
"sink_id": "FastApiGuarded::handle_request",
|
||||
"label": "unreachable",
|
||||
"confidence": "high",
|
||||
"dynamic_evidence": {
|
||||
"covered_by_tests": [
|
||||
"tests/test_unreachable.py"
|
||||
],
|
||||
"coverage_files": [
|
||||
"outputs/coverage.json"
|
||||
]
|
||||
},
|
||||
"static_evidence": {
|
||||
"call_path": [
|
||||
"POST /exec",
|
||||
"app.handle_request",
|
||||
"guard: ALLOW_EXEC!=true"
|
||||
]
|
||||
},
|
||||
"config_conditions": [
|
||||
"ALLOW_EXEC == 'true'"
|
||||
],
|
||||
"notes": "Feature flag blocks sink by default"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "py-flask-template:103",
|
||||
"case_version": "1.0.0",
|
||||
"notes": "Template rendering reachable",
|
||||
"sinks": [
|
||||
{
|
||||
"sink_id": "FlaskTemplate::render",
|
||||
"label": "reachable",
|
||||
"confidence": "high",
|
||||
"dynamic_evidence": {
|
||||
"covered_by_tests": [
|
||||
"tests/test_reach.py"
|
||||
],
|
||||
"coverage_files": [
|
||||
"outputs/coverage.json"
|
||||
]
|
||||
},
|
||||
"static_evidence": {
|
||||
"call_path": [
|
||||
"POST /render",
|
||||
"app.handle_request",
|
||||
"render"
|
||||
]
|
||||
},
|
||||
"config_conditions": [],
|
||||
"notes": "Simple template placeholder replacement"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "py-guarded-exec:102",
|
||||
"case_version": "1.0.0",
|
||||
"notes": "Eval unreachable unless FEATURE_ENABLE=1",
|
||||
"sinks": [
|
||||
{
|
||||
"sink_id": "PyGuardedExec::handle_request",
|
||||
"label": "unreachable",
|
||||
"confidence": "high",
|
||||
"dynamic_evidence": {
|
||||
"covered_by_tests": [
|
||||
"tests/test_unreachable.py"
|
||||
],
|
||||
"coverage_files": [
|
||||
"outputs/coverage.json"
|
||||
]
|
||||
},
|
||||
"static_evidence": {
|
||||
"call_path": [
|
||||
"POST /api/exec",
|
||||
"app.handle_request",
|
||||
"guard: FEATURE_ENABLE != 1"
|
||||
]
|
||||
},
|
||||
"config_conditions": [
|
||||
"FEATURE_ENABLE == '1'"
|
||||
],
|
||||
"notes": "Feature flag required"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "py-unsafe-exec:101",
|
||||
"case_version": "1.0.0",
|
||||
"notes": "Eval reachable",
|
||||
"sinks": [
|
||||
{
|
||||
"sink_id": "PyUnsafeExec::handle_request",
|
||||
"label": "reachable",
|
||||
"confidence": "high",
|
||||
"dynamic_evidence": {
|
||||
"covered_by_tests": [
|
||||
"tests/test_reach.py"
|
||||
],
|
||||
"coverage_files": [
|
||||
"outputs/coverage.json"
|
||||
]
|
||||
},
|
||||
"static_evidence": {
|
||||
"call_path": [
|
||||
"POST /api/exec",
|
||||
"app.handle_request",
|
||||
"eval(code)"
|
||||
]
|
||||
},
|
||||
"config_conditions": [],
|
||||
"notes": "No guards"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
id: "java-spring-deserialize:201"
|
||||
language: java
|
||||
project: spring-deserialize
|
||||
version: "1.0.0"
|
||||
description: "Java deserialization sink reachable via POST /api/upload"
|
||||
entrypoints:
|
||||
- "POST /api/upload"
|
||||
sinks:
|
||||
- id: "JavaDeserialize::handleRequest"
|
||||
path: "bench.reachability.App.handleRequest"
|
||||
kind: "custom"
|
||||
location:
|
||||
file: src/App.java
|
||||
line: 9
|
||||
notes: "java.io.ObjectInputStream on user-controlled payload"
|
||||
environment:
|
||||
os_image: "eclipse-temurin:21-jdk"
|
||||
runtime:
|
||||
java: "21"
|
||||
source_date_epoch: 1730000000
|
||||
build:
|
||||
command: "./build/build.sh"
|
||||
source_date_epoch: 1730000000
|
||||
outputs:
|
||||
artifact_path: outputs/binary.tar.gz
|
||||
sbom_path: outputs/sbom.cdx.json
|
||||
coverage_path: outputs/coverage.json
|
||||
traces_dir: outputs/traces
|
||||
test:
|
||||
command: "./build/build.sh"
|
||||
expected_coverage: []
|
||||
expected_traces: []
|
||||
env:
|
||||
JAVA_TOOL_OPTIONS: "-ea"
|
||||
ground_truth:
|
||||
summary: "Deserialization reachable"
|
||||
evidence_files:
|
||||
- "../benchmark/truth/java-spring-deserialize.json"
|
||||
@@ -0,0 +1,8 @@
|
||||
case_id: "java-spring-deserialize:201"
|
||||
entries:
|
||||
http:
|
||||
- id: "POST /api/upload"
|
||||
route: "/api/upload"
|
||||
method: "POST"
|
||||
handler: "App.handleRequest"
|
||||
description: "Binary payload base64-deserialized"
|
||||
@@ -0,0 +1,12 @@
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.stellaops.bench</groupId>
|
||||
<artifactId>spring-deserialize</artifactId>
|
||||
<version>1.0.0</version>
|
||||
<packaging>jar</packaging>
|
||||
<properties>
|
||||
<maven.compiler.source>17</maven.compiler.source>
|
||||
<maven.compiler.target>17</maven.compiler.target>
|
||||
</properties>
|
||||
</project>
|
||||
@@ -0,0 +1,26 @@
|
||||
package bench.reachability;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Base64;
|
||||
import java.io.*;
|
||||
|
||||
public class App {
|
||||
// Unsafe Java deserialization sink (reachable)
|
||||
public static Response handleRequest(Map<String, String> body) {
|
||||
String payload = body.get("payload");
|
||||
if (payload == null) {
|
||||
return new Response(400, "bad request");
|
||||
}
|
||||
try {
|
||||
byte[] data = Base64.getDecoder().decode(payload);
|
||||
ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(data));
|
||||
Object obj = ois.readObject();
|
||||
ois.close();
|
||||
return new Response(200, obj.toString());
|
||||
} catch (Exception ex) {
|
||||
return new Response(500, ex.getClass().getSimpleName());
|
||||
}
|
||||
}
|
||||
|
||||
public record Response(int status, String body) {}
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
package bench.reachability;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
import java.util.Base64;
|
||||
|
||||
// Simple hand-rolled test harness (no external deps) using Java assertions.
|
||||
public class AppTest {
|
||||
private static String serialize(Object obj) throws IOException {
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||
ObjectOutputStream oos = new ObjectOutputStream(bos);
|
||||
oos.writeObject(obj);
|
||||
oos.close();
|
||||
return Base64.getEncoder().encodeToString(bos.toByteArray());
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String payload = serialize("hello");
|
||||
Map<String, String> body = Map.of("payload", payload);
|
||||
var res = App.handleRequest(body);
|
||||
assert res.status() == 200 : "status";
|
||||
assert res.body().equals("hello") : "body";
|
||||
// Emit a simple marker file for trace/coverage stand-ins
|
||||
File outDir = new File("outputs");
|
||||
outDir.mkdirs();
|
||||
try (FileWriter fw = new FileWriter(new File(outDir, "SINK_REACHED"))) {
|
||||
fw.write("true");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
id: "java-spring-guarded:202"
|
||||
language: java
|
||||
project: spring-guarded
|
||||
version: "1.0.0"
|
||||
description: "Java deserialization guarded by ALLOW_DESER flag (unreachable by default)"
|
||||
entrypoints:
|
||||
- "POST /api/upload"
|
||||
sinks:
|
||||
- id: "JavaDeserializeGuarded::handleRequest"
|
||||
path: "bench.reachability.App.handleRequest"
|
||||
kind: "custom"
|
||||
location:
|
||||
file: src/App.java
|
||||
line: 9
|
||||
notes: "ObjectInputStream gated by ALLOW_DESER"
|
||||
environment:
|
||||
os_image: "eclipse-temurin:21-jdk"
|
||||
runtime:
|
||||
java: "21"
|
||||
source_date_epoch: 1730000000
|
||||
build:
|
||||
command: "./build/build.sh"
|
||||
source_date_epoch: 1730000000
|
||||
outputs:
|
||||
artifact_path: outputs/binary.tar.gz
|
||||
sbom_path: outputs/sbom.cdx.json
|
||||
coverage_path: outputs/coverage.json
|
||||
traces_dir: outputs/traces
|
||||
test:
|
||||
command: "./build/build.sh"
|
||||
expected_coverage: []
|
||||
expected_traces: []
|
||||
env:
|
||||
JAVA_TOOL_OPTIONS: "-ea"
|
||||
ground_truth:
|
||||
summary: "Guard blocks deserialization unless ALLOW_DESER=true"
|
||||
evidence_files:
|
||||
- "../benchmark/truth/java-spring-guarded.json"
|
||||
@@ -0,0 +1,8 @@
|
||||
case_id: "java-spring-guarded:202"
|
||||
entries:
|
||||
http:
|
||||
- id: "POST /api/upload"
|
||||
route: "/api/upload"
|
||||
method: "POST"
|
||||
handler: "App.handleRequest"
|
||||
description: "Base64 payload deserialization guarded by ALLOW_DESER"
|
||||
@@ -0,0 +1,12 @@
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.stellaops.bench</groupId>
|
||||
<artifactId>spring-guarded</artifactId>
|
||||
<version>1.0.0</version>
|
||||
<packaging>jar</packaging>
|
||||
<properties>
|
||||
<maven.compiler.source>17</maven.compiler.source>
|
||||
<maven.compiler.target>17</maven.compiler.target>
|
||||
</properties>
|
||||
</project>
|
||||
@@ -0,0 +1,29 @@
|
||||
package bench.reachability;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Base64;
|
||||
import java.io.*;
|
||||
|
||||
public class App {
|
||||
// Deserialization sink guarded by feature flag
|
||||
public static Response handleRequest(Map<String, String> body, Map<String, String> env) {
|
||||
if (!"true".equals(env.getOrDefault("ALLOW_DESER", "false"))) {
|
||||
return new Response(403, "forbidden");
|
||||
}
|
||||
String payload = body.get("payload");
|
||||
if (payload == null) {
|
||||
return new Response(400, "bad request");
|
||||
}
|
||||
try {
|
||||
byte[] data = Base64.getDecoder().decode(payload);
|
||||
ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(data));
|
||||
Object obj = ois.readObject();
|
||||
ois.close();
|
||||
return new Response(200, obj.toString());
|
||||
} catch (Exception ex) {
|
||||
return new Response(500, ex.getClass().getSimpleName());
|
||||
}
|
||||
}
|
||||
|
||||
public record Response(int status, String body) {}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
package bench.reachability;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
import java.util.Base64;
|
||||
|
||||
public class AppTest {
|
||||
private static String serialize(Object obj) throws IOException {
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||
ObjectOutputStream oos = new ObjectOutputStream(bos);
|
||||
oos.writeObject(obj);
|
||||
oos.close();
|
||||
return Base64.getEncoder().encodeToString(bos.toByteArray());
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String payload = serialize("hi");
|
||||
Map<String, String> body = Map.of("payload", payload);
|
||||
Map<String, String> env = Map.of("ALLOW_DESER", "false");
|
||||
var res = App.handleRequest(body, env);
|
||||
assert res.status() == 403 : "status";
|
||||
assert res.body().equals("forbidden") : "body";
|
||||
File outDir = new File("outputs");
|
||||
outDir.mkdirs();
|
||||
try (FileWriter fw = new FileWriter(new File(outDir, "SINK_BLOCKED"))) {
|
||||
fw.write("true");
|
||||
}
|
||||
}
|
||||
}
|
||||
38
bench/reachability-benchmark/cases/js/express-eval/case.yaml
Normal file
38
bench/reachability-benchmark/cases/js/express-eval/case.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
id: "js-express-eval:003"
|
||||
language: js
|
||||
project: express-eval
|
||||
version: "1.0.0"
|
||||
description: "Admin exec endpoint evaluates user code"
|
||||
entrypoints:
|
||||
- "POST /api/admin/exec"
|
||||
sinks:
|
||||
- id: "ExpressEval::exec"
|
||||
path: "src/app.js::createServer"
|
||||
kind: "process"
|
||||
location:
|
||||
file: src/app.js
|
||||
line: 17
|
||||
notes: "eval(code) on admin path"
|
||||
environment:
|
||||
os_image: "node:20-alpine"
|
||||
runtime:
|
||||
node: "20.11.0"
|
||||
source_date_epoch: 1730000000
|
||||
build:
|
||||
command: "./build/build.sh"
|
||||
source_date_epoch: 1730000000
|
||||
outputs:
|
||||
artifact_path: outputs/binary.tar.gz
|
||||
sbom_path: outputs/sbom.cdx.json
|
||||
coverage_path: outputs/coverage.json
|
||||
traces_dir: outputs/traces
|
||||
test:
|
||||
command: "./tests/run-tests.sh"
|
||||
expected_coverage:
|
||||
- outputs/coverage.json
|
||||
expected_traces:
|
||||
- outputs/traces/traces.json
|
||||
ground_truth:
|
||||
summary: "Admin exec endpoint reachable and executes eval"
|
||||
evidence_files:
|
||||
- "../benchmark/truth/js-express-eval.json"
|
||||
@@ -0,0 +1,8 @@
|
||||
case_id: "js-express-eval:003"
|
||||
entries:
|
||||
http:
|
||||
- id: "POST /api/admin/exec"
|
||||
route: "/api/admin/exec"
|
||||
method: "POST"
|
||||
handler: "createServer.exec"
|
||||
description: "Admin-only exec (reachable)"
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"name": "rb-case-express-eval",
|
||||
"version": "1.0.0",
|
||||
"description": "Reachability benchmark case: express-like admin eval endpoint",
|
||||
"license": "Apache-2.0",
|
||||
"scripts": {
|
||||
"test": "./tests/run-tests.sh"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
'use strict';
|
||||
|
||||
// Minimal express-like router.
|
||||
function makeApp() {
|
||||
const routes = {};
|
||||
return {
|
||||
post(path, handler) {
|
||||
routes[`POST ${path}`] = handler;
|
||||
},
|
||||
handle(method, path, req, res) {
|
||||
const key = `${method} ${path}`;
|
||||
if (routes[key]) {
|
||||
return routes[key](req, res);
|
||||
}
|
||||
return { status: 404, body: 'not found' };
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
function createServer() {
|
||||
const app = makeApp();
|
||||
app.post('/api/admin/exec', (req) => {
|
||||
if (!req || typeof req.body?.code !== 'string') {
|
||||
return { status: 400, body: 'bad request' };
|
||||
}
|
||||
// Sink: eval on admin endpoint (reachable)
|
||||
// eslint-disable-next-line no-eval
|
||||
const result = eval(req.body.code);
|
||||
return { status: 200, body: String(result) };
|
||||
});
|
||||
return app;
|
||||
}
|
||||
|
||||
module.exports = { createServer };
|
||||
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")"
|
||||
export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
|
||||
export TZ=UTC
|
||||
export LC_ALL=C
|
||||
node test_reach.js
|
||||
@@ -0,0 +1,54 @@
|
||||
'use strict';
|
||||
|
||||
const assert = require('assert');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const { createServer } = require('../src/app');
|
||||
|
||||
const OUT_DIR = path.resolve(__dirname, '../outputs');
|
||||
const TRACE_DIR = path.join(OUT_DIR, 'traces');
|
||||
const COVERAGE_FILE = path.join(OUT_DIR, 'coverage.json');
|
||||
const TRACE_FILE = path.join(TRACE_DIR, 'traces.json');
|
||||
|
||||
function ensureDirs() {
|
||||
fs.mkdirSync(OUT_DIR, { recursive: true });
|
||||
fs.mkdirSync(TRACE_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
function recordTrace(entry, pathNodes) {
|
||||
fs.writeFileSync(
|
||||
TRACE_FILE,
|
||||
JSON.stringify({
|
||||
entry,
|
||||
path: pathNodes,
|
||||
sink: 'ExpressEval::exec',
|
||||
notes: 'Admin exec reached'
|
||||
}, null, 2)
|
||||
);
|
||||
}
|
||||
|
||||
function recordCoverage(filePath, lines) {
|
||||
fs.writeFileSync(
|
||||
COVERAGE_FILE,
|
||||
JSON.stringify({
|
||||
files: {
|
||||
[filePath]: {
|
||||
lines_covered: lines,
|
||||
lines_total: 40
|
||||
}
|
||||
}
|
||||
}, null, 2)
|
||||
);
|
||||
}
|
||||
|
||||
(function main() {
|
||||
ensureDirs();
|
||||
const app = createServer();
|
||||
const res = app.handle('POST', '/api/admin/exec', { body: { code: '21*2' } });
|
||||
assert.strictEqual(res.status, 200);
|
||||
assert.strictEqual(res.body, '42');
|
||||
|
||||
recordTrace('POST /api/admin/exec', ['app.js::createServer', 'handler', 'eval(code)']);
|
||||
recordCoverage('src/app.js', [5, 6, 7, 13, 18, 19]);
|
||||
fs.writeFileSync(path.join(OUT_DIR, 'SINK_REACHED'), 'true');
|
||||
})();
|
||||
@@ -0,0 +1,38 @@
|
||||
id: "js-express-guarded:004"
|
||||
language: js
|
||||
project: express-guarded
|
||||
version: "1.0.0"
|
||||
description: "Admin exec guarded by ALLOW_EXEC flag; unreachable by default"
|
||||
entrypoints:
|
||||
- "POST /api/admin/exec"
|
||||
sinks:
|
||||
- id: "ExpressGuarded::exec"
|
||||
path: "src/app.js::createServer"
|
||||
kind: "process"
|
||||
location:
|
||||
file: src/app.js
|
||||
line: 16
|
||||
notes: "eval(code) gated by ALLOW_EXEC"
|
||||
environment:
|
||||
os_image: "node:20-alpine"
|
||||
runtime:
|
||||
node: "20.11.0"
|
||||
source_date_epoch: 1730000000
|
||||
build:
|
||||
command: "./build/build.sh"
|
||||
source_date_epoch: 1730000000
|
||||
outputs:
|
||||
artifact_path: outputs/binary.tar.gz
|
||||
sbom_path: outputs/sbom.cdx.json
|
||||
coverage_path: outputs/coverage.json
|
||||
traces_dir: outputs/traces
|
||||
test:
|
||||
command: "./tests/run-tests.sh"
|
||||
expected_coverage:
|
||||
- outputs/coverage.json
|
||||
expected_traces:
|
||||
- outputs/traces/traces.json
|
||||
ground_truth:
|
||||
summary: "Guard prevents sink unless ALLOW_EXEC=true"
|
||||
evidence_files:
|
||||
- "../benchmark/truth/js-express-guarded.json"
|
||||
@@ -0,0 +1,8 @@
|
||||
case_id: "js-express-guarded:004"
|
||||
entries:
|
||||
http:
|
||||
- id: "POST /api/admin/exec"
|
||||
route: "/api/admin/exec"
|
||||
method: "POST"
|
||||
handler: "createServer.exec"
|
||||
description: "Admin exec blocked unless ALLOW_EXEC=true"
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"name": "rb-case-express-guarded",
|
||||
"version": "1.0.0",
|
||||
"description": "Reachability benchmark case: express-like admin exec guarded by env flag",
|
||||
"license": "Apache-2.0",
|
||||
"scripts": {
|
||||
"test": "./tests/run-tests.sh"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
'use strict';
|
||||
|
||||
function makeApp() {
|
||||
const routes = {};
|
||||
return {
|
||||
post(path, handler) {
|
||||
routes[`POST ${path}`] = handler;
|
||||
},
|
||||
handle(method, path, req) {
|
||||
const key = `${method} ${path}`;
|
||||
if (routes[key]) return routes[key](req);
|
||||
return { status: 404, body: 'not found' };
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
function createServer() {
|
||||
const app = makeApp();
|
||||
app.post('/api/admin/exec', (req) => {
|
||||
if (req?.env?.ALLOW_EXEC !== 'true') {
|
||||
return { status: 403, body: 'forbidden' };
|
||||
}
|
||||
if (typeof req?.body?.code !== 'string') {
|
||||
return { status: 400, body: 'bad request' };
|
||||
}
|
||||
// eslint-disable-next-line no-eval
|
||||
const result = eval(req.body.code);
|
||||
return { status: 200, body: String(result) };
|
||||
});
|
||||
return app;
|
||||
}
|
||||
|
||||
module.exports = { createServer };
|
||||
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")"
|
||||
export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
|
||||
export TZ=UTC
|
||||
export LC_ALL=C
|
||||
node test_unreachable.js
|
||||
@@ -0,0 +1,53 @@
|
||||
'use strict';
|
||||
|
||||
const assert = require('assert');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const { createServer } = require('../src/app');
|
||||
|
||||
const OUT_DIR = path.resolve(__dirname, '../outputs');
|
||||
const TRACE_DIR = path.join(OUT_DIR, 'traces');
|
||||
const COVERAGE_FILE = path.join(OUT_DIR, 'coverage.json');
|
||||
const TRACE_FILE = path.join(TRACE_DIR, 'traces.json');
|
||||
|
||||
function ensureDirs() {
|
||||
fs.mkdirSync(OUT_DIR, { recursive: true });
|
||||
fs.mkdirSync(TRACE_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
function recordTrace(entry, pathNodes) {
|
||||
fs.writeFileSync(
|
||||
TRACE_FILE,
|
||||
JSON.stringify({
|
||||
entry,
|
||||
path: pathNodes,
|
||||
sink: 'ExpressGuarded::exec',
|
||||
notes: 'Guard blocked sink'
|
||||
}, null, 2)
|
||||
);
|
||||
}
|
||||
|
||||
function recordCoverage(filePath, lines) {
|
||||
fs.writeFileSync(
|
||||
COVERAGE_FILE,
|
||||
JSON.stringify({
|
||||
files: {
|
||||
[filePath]: {
|
||||
lines_covered: lines,
|
||||
lines_total: 50
|
||||
}
|
||||
}
|
||||
}, null, 2)
|
||||
);
|
||||
}
|
||||
|
||||
(function main() {
|
||||
ensureDirs();
|
||||
const app = createServer();
|
||||
const res = app.handle('POST', '/api/admin/exec', { body: { code: '2+2' }, env: { ALLOW_EXEC: 'false' } });
|
||||
assert.strictEqual(res.status, 403);
|
||||
assert.strictEqual(res.body, 'forbidden');
|
||||
|
||||
recordTrace('POST /api/admin/exec', ['app.js::createServer', 'guard: ALLOW_EXEC!=true']);
|
||||
recordCoverage('src/app.js', [5,6,7,12,13,14,15]);
|
||||
})();
|
||||
@@ -0,0 +1,38 @@
|
||||
id: "js-fastify-template:005"
|
||||
language: js
|
||||
project: fastify-template
|
||||
version: "1.0.0"
|
||||
description: "Template rendering route replaces user placeholder"
|
||||
entrypoints:
|
||||
- "POST /api/render"
|
||||
sinks:
|
||||
- id: "FastifyTemplate::render"
|
||||
path: "src/app.js::createServer"
|
||||
kind: "http"
|
||||
location:
|
||||
file: src/app.js
|
||||
line: 19
|
||||
notes: "Template rendering of user input"
|
||||
environment:
|
||||
os_image: "node:20-alpine"
|
||||
runtime:
|
||||
node: "20.11.0"
|
||||
source_date_epoch: 1730000000
|
||||
build:
|
||||
command: "./build/build.sh"
|
||||
source_date_epoch: 1730000000
|
||||
outputs:
|
||||
artifact_path: outputs/binary.tar.gz
|
||||
sbom_path: outputs/sbom.cdx.json
|
||||
coverage_path: outputs/coverage.json
|
||||
traces_dir: outputs/traces
|
||||
test:
|
||||
command: "./tests/run-tests.sh"
|
||||
expected_coverage:
|
||||
- outputs/coverage.json
|
||||
expected_traces:
|
||||
- outputs/traces/traces.json
|
||||
ground_truth:
|
||||
summary: "Template rendering reachable via POST /api/render"
|
||||
evidence_files:
|
||||
- "../benchmark/truth/js-fastify-template.json"
|
||||
@@ -0,0 +1,8 @@
|
||||
case_id: "js-fastify-template:005"
|
||||
entries:
|
||||
http:
|
||||
- id: "POST /api/render"
|
||||
route: "/api/render"
|
||||
method: "POST"
|
||||
handler: "createServer.render"
|
||||
description: "Template rendering endpoint"
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"name": "rb-case-fastify-template",
|
||||
"version": "1.0.0",
|
||||
"description": "Reachability benchmark case: fastify-like template rendering",
|
||||
"license": "Apache-2.0",
|
||||
"scripts": {
|
||||
"test": "./tests/run-tests.sh"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
'use strict';
|
||||
|
||||
// Simulated Fastify route registration for template injection.
|
||||
function buildServer() {
|
||||
const routes = {};
|
||||
return {
|
||||
post(path, handler) {
|
||||
routes[`POST ${path}`] = handler;
|
||||
},
|
||||
inject(method, path, payload) {
|
||||
const key = `${method} ${path}`;
|
||||
const handler = routes[key];
|
||||
if (!handler) return { status: 404, body: 'not found' };
|
||||
return handler({ body: payload });
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
function createServer() {
|
||||
const server = buildServer();
|
||||
server.post('/api/render', (req) => {
|
||||
const template = req?.body?.template;
|
||||
if (typeof template !== 'string') {
|
||||
return { status: 400, body: 'bad request' };
|
||||
}
|
||||
const compiled = template.replace('{{user}}', 'guest');
|
||||
// Sink: writes rendered content to log (simulated SSR)
|
||||
return { status: 200, body: compiled };
|
||||
});
|
||||
return server;
|
||||
}
|
||||
|
||||
module.exports = { createServer };
|
||||
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")"
|
||||
export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
|
||||
export TZ=UTC
|
||||
export LC_ALL=C
|
||||
node test_reach.js
|
||||
@@ -0,0 +1,54 @@
|
||||
'use strict';
|
||||
|
||||
const assert = require('assert');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const { createServer } = require('../src/app');
|
||||
|
||||
const OUT_DIR = path.resolve(__dirname, '../outputs');
|
||||
const TRACE_DIR = path.join(OUT_DIR, 'traces');
|
||||
const COVERAGE_FILE = path.join(OUT_DIR, 'coverage.json');
|
||||
const TRACE_FILE = path.join(TRACE_DIR, 'traces.json');
|
||||
|
||||
function ensureDirs() {
|
||||
fs.mkdirSync(OUT_DIR, { recursive: true });
|
||||
fs.mkdirSync(TRACE_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
function recordTrace(entry, pathNodes) {
|
||||
fs.writeFileSync(
|
||||
TRACE_FILE,
|
||||
JSON.stringify({
|
||||
entry,
|
||||
path: pathNodes,
|
||||
sink: 'FastifyTemplate::render',
|
||||
notes: 'Template rendered with user input'
|
||||
}, null, 2)
|
||||
);
|
||||
}
|
||||
|
||||
function recordCoverage(filePath, lines) {
|
||||
fs.writeFileSync(
|
||||
COVERAGE_FILE,
|
||||
JSON.stringify({
|
||||
files: {
|
||||
[filePath]: {
|
||||
lines_covered: lines,
|
||||
lines_total: 45
|
||||
}
|
||||
}
|
||||
}, null, 2)
|
||||
);
|
||||
}
|
||||
|
||||
(function main() {
|
||||
ensureDirs();
|
||||
const server = createServer();
|
||||
const res = server.inject('POST', '/api/render', { template: 'Hello {{user}}' });
|
||||
assert.strictEqual(res.status, 200);
|
||||
assert.strictEqual(res.body, 'Hello guest');
|
||||
|
||||
recordTrace('POST /api/render', ['app.js::createServer', 'render template']);
|
||||
recordCoverage('src/app.js', [5,6,7,13,18,20]);
|
||||
fs.writeFileSync(path.join(OUT_DIR, 'SINK_REACHED'), 'true');
|
||||
})();
|
||||
38
bench/reachability-benchmark/cases/js/guarded-eval/case.yaml
Normal file
38
bench/reachability-benchmark/cases/js/guarded-eval/case.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
id: "js-guarded-eval:002"
|
||||
language: js
|
||||
project: guarded-eval
|
||||
version: "1.0.0"
|
||||
description: "Eval sink guarded by FEATURE_ENABLE flag; unreachable when flag is off"
|
||||
entrypoints:
|
||||
- "POST /api/exec"
|
||||
sinks:
|
||||
- id: "GuardedEval::handleRequest"
|
||||
path: "src/app.js::handleRequest"
|
||||
kind: "process"
|
||||
location:
|
||||
file: src/app.js
|
||||
line: 13
|
||||
notes: "eval on user input guarded by FEATURE_ENABLE"
|
||||
environment:
|
||||
os_image: "node:20-alpine"
|
||||
runtime:
|
||||
node: "20.11.0"
|
||||
source_date_epoch: 1730000000
|
||||
build:
|
||||
command: "./build/build.sh"
|
||||
source_date_epoch: 1730000000
|
||||
outputs:
|
||||
artifact_path: outputs/binary.tar.gz
|
||||
sbom_path: outputs/sbom.cdx.json
|
||||
coverage_path: outputs/coverage.json
|
||||
traces_dir: outputs/traces
|
||||
test:
|
||||
command: "./tests/run-tests.sh"
|
||||
expected_coverage:
|
||||
- outputs/coverage.json
|
||||
expected_traces:
|
||||
- outputs/traces/traces.json
|
||||
ground_truth:
|
||||
summary: "Guard prevents sink when FEATURE_ENABLE != 1"
|
||||
evidence_files:
|
||||
- "../benchmark/truth/js-guarded-eval.json"
|
||||
@@ -0,0 +1,8 @@
|
||||
case_id: "js-guarded-eval:002"
|
||||
entries:
|
||||
http:
|
||||
- id: "POST /api/exec"
|
||||
route: "/api/exec"
|
||||
method: "POST"
|
||||
handler: "app.js::handleRequest"
|
||||
description: "Feature-flagged code execution endpoint"
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"name": "rb-case-guarded-eval",
|
||||
"version": "1.0.0",
|
||||
"description": "Reachability benchmark case: eval guarded by feature flag",
|
||||
"license": "Apache-2.0",
|
||||
"scripts": {
|
||||
"test": "./tests/run-tests.sh"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
'use strict';
|
||||
|
||||
function handleRequest(body, env = process.env) {
|
||||
if (env.FEATURE_ENABLE !== '1') {
|
||||
return { status: 403, body: 'disabled' };
|
||||
}
|
||||
|
||||
const code = body && body.code;
|
||||
if (typeof code !== 'string') {
|
||||
return { status: 400, body: 'bad request' };
|
||||
}
|
||||
|
||||
// This sink is reachable only when FEATURE_ENABLE=1.
|
||||
// eslint-disable-next-line no-eval
|
||||
const result = eval(code);
|
||||
return { status: 200, body: String(result) };
|
||||
}
|
||||
|
||||
module.exports = { handleRequest };
|
||||
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")"
|
||||
export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
|
||||
export TZ=UTC
|
||||
export LC_ALL=C
|
||||
node test_unreachable.js
|
||||
@@ -0,0 +1,54 @@
|
||||
'use strict';
|
||||
|
||||
const assert = require('assert');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const { handleRequest } = require('../src/app');
|
||||
|
||||
const OUT_DIR = path.resolve(__dirname, '../outputs');
|
||||
const TRACE_DIR = path.join(OUT_DIR, 'traces');
|
||||
const COVERAGE_FILE = path.join(OUT_DIR, 'coverage.json');
|
||||
const TRACE_FILE = path.join(TRACE_DIR, 'traces.json');
|
||||
|
||||
function ensureDirs() {
|
||||
fs.mkdirSync(OUT_DIR, { recursive: true });
|
||||
fs.mkdirSync(TRACE_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
function recordTrace(entry, pathNodes) {
|
||||
fs.writeFileSync(
|
||||
TRACE_FILE,
|
||||
JSON.stringify({
|
||||
entry,
|
||||
path: pathNodes,
|
||||
sink: 'GuardedEval::handleRequest',
|
||||
notes: 'Guard prevented sink execution'
|
||||
}, null, 2)
|
||||
);
|
||||
}
|
||||
|
||||
function recordCoverage(filePath, lines) {
|
||||
fs.writeFileSync(
|
||||
COVERAGE_FILE,
|
||||
JSON.stringify({
|
||||
files: {
|
||||
[filePath]: {
|
||||
lines_covered: lines,
|
||||
lines_total: 32
|
||||
}
|
||||
}
|
||||
}, null, 2)
|
||||
);
|
||||
}
|
||||
|
||||
(function main() {
|
||||
ensureDirs();
|
||||
const payload = { code: '1 + 2' };
|
||||
const response = handleRequest(payload, { FEATURE_ENABLE: '0' });
|
||||
assert.strictEqual(response.status, 403);
|
||||
assert.strictEqual(response.body, 'disabled');
|
||||
|
||||
// Record that the guard path was taken; no SINK_REACHED marker is written.
|
||||
recordTrace('POST /api/exec', ['app.js:handleRequest', 'guard: FEATURE_ENABLE != 1']);
|
||||
recordCoverage('src/app.js', [5, 6, 7, 9, 10, 11]);
|
||||
})();
|
||||
38
bench/reachability-benchmark/cases/js/unsafe-eval/case.yaml
Normal file
38
bench/reachability-benchmark/cases/js/unsafe-eval/case.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
id: "js-unsafe-eval:001"
|
||||
language: js
|
||||
project: unsafe-eval
|
||||
version: "1.0.0"
|
||||
description: "Minimal handler with unsafe eval sink reachable via POST /api/exec"
|
||||
entrypoints:
|
||||
- "POST /api/exec"
|
||||
sinks:
|
||||
- id: "UnsafeEval::handleRequest"
|
||||
path: "src/app.js::handleRequest"
|
||||
kind: "process"
|
||||
location:
|
||||
file: src/app.js
|
||||
line: 12
|
||||
notes: "eval on user-controlled input"
|
||||
environment:
|
||||
os_image: "node:20-alpine"
|
||||
runtime:
|
||||
node: "20.11.0"
|
||||
source_date_epoch: 1730000000
|
||||
build:
|
||||
command: "./build/build.sh"
|
||||
source_date_epoch: 1730000000
|
||||
outputs:
|
||||
artifact_path: outputs/binary.tar.gz
|
||||
sbom_path: outputs/sbom.cdx.json
|
||||
coverage_path: outputs/coverage.json
|
||||
traces_dir: outputs/traces
|
||||
test:
|
||||
command: "./tests/run-tests.sh"
|
||||
expected_coverage:
|
||||
- outputs/coverage.json
|
||||
expected_traces:
|
||||
- outputs/traces/traces.json
|
||||
ground_truth:
|
||||
summary: "Unit test triggers eval sink with payload {code: '1+2'}"
|
||||
evidence_files:
|
||||
- "../benchmark/truth/js-unsafe-eval.json"
|
||||
@@ -0,0 +1,8 @@
|
||||
case_id: "js-unsafe-eval:001"
|
||||
entries:
|
||||
http:
|
||||
- id: "POST /api/exec"
|
||||
route: "/api/exec"
|
||||
method: "POST"
|
||||
handler: "app.js::handleRequest"
|
||||
description: "Executes user-supplied code (unsafe eval)"
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"name": "rb-case-unsafe-eval",
|
||||
"version": "1.0.0",
|
||||
"description": "Reachability benchmark case: unsafe eval in minimal JS handler",
|
||||
"license": "Apache-2.0",
|
||||
"scripts": {
|
||||
"test": "./tests/run-tests.sh"
|
||||
}
|
||||
}
|
||||
17
bench/reachability-benchmark/cases/js/unsafe-eval/src/app.js
Normal file
17
bench/reachability-benchmark/cases/js/unsafe-eval/src/app.js
Normal file
@@ -0,0 +1,17 @@
|
||||
'use strict';
|
||||
|
||||
// Minimal HTTP-like handler exposing an unsafe eval sink for reachability.
|
||||
// The handler is intentionally small to avoid external dependencies.
|
||||
function handleRequest(body) {
|
||||
const code = body && body.code;
|
||||
if (typeof code !== 'string') {
|
||||
return { status: 400, body: 'bad request' };
|
||||
}
|
||||
|
||||
// Dangerous: executes user-controlled code. The test drives this sink.
|
||||
// eslint-disable-next-line no-eval
|
||||
const result = eval(code);
|
||||
return { status: 200, body: String(result) };
|
||||
}
|
||||
|
||||
module.exports = { handleRequest };
|
||||
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")"
|
||||
export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
|
||||
export TZ=UTC
|
||||
export LC_ALL=C
|
||||
node test_reach.js
|
||||
@@ -0,0 +1,55 @@
|
||||
'use strict';
|
||||
|
||||
const assert = require('assert');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const { handleRequest } = require('../src/app');
|
||||
|
||||
const OUT_DIR = path.resolve(__dirname, '../outputs');
|
||||
const TRACE_DIR = path.join(OUT_DIR, 'traces');
|
||||
const COVERAGE_FILE = path.join(OUT_DIR, 'coverage.json');
|
||||
const TRACE_FILE = path.join(TRACE_DIR, 'traces.json');
|
||||
|
||||
function ensureDirs() {
|
||||
fs.mkdirSync(OUT_DIR, { recursive: true });
|
||||
fs.mkdirSync(TRACE_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
function recordTrace(entry, pathNodes) {
|
||||
fs.writeFileSync(
|
||||
TRACE_FILE,
|
||||
JSON.stringify({
|
||||
entry,
|
||||
path: pathNodes,
|
||||
sink: 'UnsafeEval::handleRequest',
|
||||
notes: 'Test-driven dynamic trace'
|
||||
}, null, 2)
|
||||
);
|
||||
}
|
||||
|
||||
function recordCoverage(filePath, lines) {
|
||||
fs.writeFileSync(
|
||||
COVERAGE_FILE,
|
||||
JSON.stringify({
|
||||
files: {
|
||||
[filePath]: {
|
||||
lines_covered: lines,
|
||||
lines_total: 30
|
||||
}
|
||||
}
|
||||
}, null, 2)
|
||||
);
|
||||
}
|
||||
|
||||
(function main() {
|
||||
ensureDirs();
|
||||
const payload = { code: '1 + 2' };
|
||||
const response = handleRequest(payload);
|
||||
assert.strictEqual(response.status, 200);
|
||||
assert.strictEqual(response.body, '3');
|
||||
|
||||
recordTrace('POST /api/exec', ['app.js:handleRequest', 'eval(code)']);
|
||||
recordCoverage('src/app.js', [5, 6, 7, 12, 15]);
|
||||
// Marker file proves sink executed
|
||||
fs.writeFileSync(path.join(OUT_DIR, 'SINK_REACHED'), 'true');
|
||||
})();
|
||||
38
bench/reachability-benchmark/cases/py/django-ssti/case.yaml
Normal file
38
bench/reachability-benchmark/cases/py/django-ssti/case.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
id: "py-django-ssti:105"
|
||||
language: py
|
||||
project: django-ssti
|
||||
version: "1.0.0"
|
||||
description: "Django-like template rendering (autoescape off) reachable"
|
||||
entrypoints:
|
||||
- "POST /render"
|
||||
sinks:
|
||||
- id: "DjangoSSTI::render"
|
||||
path: "src/app.py::handle_request"
|
||||
kind: "http"
|
||||
location:
|
||||
file: src/app.py
|
||||
line: 5
|
||||
notes: "template replace without escaping"
|
||||
environment:
|
||||
os_image: "python:3.12-alpine"
|
||||
runtime:
|
||||
python: "3.12"
|
||||
source_date_epoch: 1730000000
|
||||
build:
|
||||
command: "./build/build.sh"
|
||||
source_date_epoch: 1730000000
|
||||
outputs:
|
||||
artifact_path: outputs/binary.tar.gz
|
||||
sbom_path: outputs/sbom.cdx.json
|
||||
coverage_path: outputs/coverage.json
|
||||
traces_dir: outputs/traces
|
||||
test:
|
||||
command: "./tests/run-tests.sh"
|
||||
expected_coverage:
|
||||
- outputs/coverage.json
|
||||
expected_traces:
|
||||
- outputs/traces/traces.json
|
||||
ground_truth:
|
||||
summary: "Template rendering reachable with autoescape off"
|
||||
evidence_files:
|
||||
- "../benchmark/truth/py-django-ssti.json"
|
||||
@@ -0,0 +1,8 @@
|
||||
case_id: "py-django-ssti:105"
|
||||
entries:
|
||||
http:
|
||||
- id: "POST /render"
|
||||
route: "/render"
|
||||
method: "POST"
|
||||
handler: "app.handle_request"
|
||||
description: "Template rendering with autoescape off"
|
||||
@@ -0,0 +1 @@
|
||||
# stdlib only
|
||||
Binary file not shown.
12
bench/reachability-benchmark/cases/py/django-ssti/src/app.py
Normal file
12
bench/reachability-benchmark/cases/py/django-ssti/src/app.py
Normal file
@@ -0,0 +1,12 @@
|
||||
"""Django-like template rendering with autoescape off (reachable)."""
|
||||
|
||||
def render(template: str, context: dict) -> str:
|
||||
# naive render; simulates autoescape off
|
||||
return template.replace("{{user}}", context.get("user", "guest"))
|
||||
|
||||
def handle_request(body):
|
||||
template = body.get("template") if isinstance(body, dict) else None
|
||||
if not isinstance(template, str):
|
||||
return {"status": 400, "body": "bad request"}
|
||||
rendered = render(template, {"user": "guest"})
|
||||
return {"status": 200, "body": rendered}
|
||||
@@ -0,0 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")"
|
||||
export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
|
||||
export TZ=UTC
|
||||
export LC_ALL=C
|
||||
export PYTHONPATH="$(cd .. && pwd)/src"
|
||||
python test_reach.py
|
||||
@@ -0,0 +1,48 @@
|
||||
import json
|
||||
import pathlib
|
||||
from app import handle_request
|
||||
|
||||
ROOT = pathlib.Path(__file__).resolve().parent.parent
|
||||
OUT = ROOT / "outputs"
|
||||
TRACE_DIR = OUT / "traces"
|
||||
COVERAGE_FILE = OUT / "coverage.json"
|
||||
TRACE_FILE = TRACE_DIR / "traces.json"
|
||||
|
||||
def ensure_dirs():
|
||||
OUT.mkdir(parents=True, exist_ok=True)
|
||||
TRACE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def record_trace(entry, path_nodes):
|
||||
TRACE_FILE.write_text(
|
||||
json.dumps({
|
||||
"entry": entry,
|
||||
"path": path_nodes,
|
||||
"sink": "DjangoSSTI::render",
|
||||
"notes": "Template rendered (autoescape off)"
|
||||
}, indent=2)
|
||||
)
|
||||
|
||||
def record_coverage(file_path, lines):
|
||||
COVERAGE_FILE.write_text(
|
||||
json.dumps({
|
||||
"files": {
|
||||
file_path: {
|
||||
"lines_covered": lines,
|
||||
"lines_total": 38
|
||||
}
|
||||
}
|
||||
}, indent=2)
|
||||
)
|
||||
|
||||
def test_reach():
|
||||
ensure_dirs()
|
||||
res = handle_request({"template": "Hello {{user}}"})
|
||||
assert res["status"] == 200
|
||||
assert res["body"] == "Hello guest"
|
||||
record_trace("POST /render", ["app.py::handle_request", "render"])
|
||||
record_coverage("src/app.py", [3,4,5,7,8,9,10])
|
||||
(OUT / "SINK_REACHED").write_text("true")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_reach()
|
||||
@@ -0,0 +1,38 @@
|
||||
id: "py-fastapi-guarded:104"
|
||||
language: py
|
||||
project: fastapi-guarded
|
||||
version: "1.0.0"
|
||||
description: "FastAPI-like exec guarded by ALLOW_EXEC flag (unreachable by default)"
|
||||
entrypoints:
|
||||
- "POST /exec"
|
||||
sinks:
|
||||
- id: "FastApiGuarded::handle_request"
|
||||
path: "src/app.py::handle_request"
|
||||
kind: "process"
|
||||
location:
|
||||
file: src/app.py
|
||||
line: 7
|
||||
notes: "eval guarded by ALLOW_EXEC"
|
||||
environment:
|
||||
os_image: "python:3.12-alpine"
|
||||
runtime:
|
||||
python: "3.12"
|
||||
source_date_epoch: 1730000000
|
||||
build:
|
||||
command: "./build/build.sh"
|
||||
source_date_epoch: 1730000000
|
||||
outputs:
|
||||
artifact_path: outputs/binary.tar.gz
|
||||
sbom_path: outputs/sbom.cdx.json
|
||||
coverage_path: outputs/coverage.json
|
||||
traces_dir: outputs/traces
|
||||
test:
|
||||
command: "./tests/run-tests.sh"
|
||||
expected_coverage:
|
||||
- outputs/coverage.json
|
||||
expected_traces:
|
||||
- outputs/traces/traces.json
|
||||
ground_truth:
|
||||
summary: "Guard blocks eval unless ALLOW_EXEC=true"
|
||||
evidence_files:
|
||||
- "../benchmark/truth/py-fastapi-guarded.json"
|
||||
@@ -0,0 +1,8 @@
|
||||
case_id: "py-fastapi-guarded:104"
|
||||
entries:
|
||||
http:
|
||||
- id: "POST /exec"
|
||||
route: "/exec"
|
||||
method: "POST"
|
||||
handler: "app.handle_request"
|
||||
description: "Exec guarded by ALLOW_EXEC"
|
||||
@@ -0,0 +1 @@
|
||||
# stdlib only
|
||||
Binary file not shown.
@@ -0,0 +1,11 @@
|
||||
"""FastAPI-like handler with feature flag guarding exec."""
|
||||
|
||||
def handle_request(body, env=None):
|
||||
env = env or {}
|
||||
if env.get("ALLOW_EXEC") != "true":
|
||||
return {"status": 403, "body": "forbidden"}
|
||||
code = body.get("code") if isinstance(body, dict) else None
|
||||
if not isinstance(code, str):
|
||||
return {"status": 400, "body": "bad request"}
|
||||
result = eval(code)
|
||||
return {"status": 200, "body": str(result)}
|
||||
@@ -0,0 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")"
|
||||
export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
|
||||
export TZ=UTC
|
||||
export LC_ALL=C
|
||||
export PYTHONPATH="$(cd .. && pwd)/src"
|
||||
python test_unreachable.py
|
||||
@@ -0,0 +1,47 @@
|
||||
import json
|
||||
import pathlib
|
||||
from app import handle_request
|
||||
|
||||
ROOT = pathlib.Path(__file__).resolve().parent.parent
|
||||
OUT = ROOT / "outputs"
|
||||
TRACE_DIR = OUT / "traces"
|
||||
COVERAGE_FILE = OUT / "coverage.json"
|
||||
TRACE_FILE = TRACE_DIR / "traces.json"
|
||||
|
||||
def ensure_dirs():
|
||||
OUT.mkdir(parents=True, exist_ok=True)
|
||||
TRACE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def record_trace(entry, path_nodes):
|
||||
TRACE_FILE.write_text(
|
||||
json.dumps({
|
||||
"entry": entry,
|
||||
"path": path_nodes,
|
||||
"sink": "FastApiGuarded::handle_request",
|
||||
"notes": "Guard blocked eval"
|
||||
}, indent=2)
|
||||
)
|
||||
|
||||
def record_coverage(file_path, lines):
|
||||
COVERAGE_FILE.write_text(
|
||||
json.dumps({
|
||||
"files": {
|
||||
file_path: {
|
||||
"lines_covered": lines,
|
||||
"lines_total": 40
|
||||
}
|
||||
}
|
||||
}, indent=2)
|
||||
)
|
||||
|
||||
def test_unreachable():
|
||||
ensure_dirs()
|
||||
res = handle_request({"code": "10/2"}, env={"ALLOW_EXEC": "false"})
|
||||
assert res["status"] == 403
|
||||
assert res["body"] == "forbidden"
|
||||
record_trace("POST /exec", ["app.py::handle_request", "guard: ALLOW_EXEC!=true"])
|
||||
record_coverage("src/app.py", [3,4,5,8,9,11])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_unreachable()
|
||||
@@ -0,0 +1,38 @@
|
||||
id: "py-flask-template:103"
|
||||
language: py
|
||||
project: flask-template
|
||||
version: "1.0.0"
|
||||
description: "Template rendering reachable via POST /render"
|
||||
entrypoints:
|
||||
- "POST /render"
|
||||
sinks:
|
||||
- id: "FlaskTemplate::render"
|
||||
path: "src/app.py::handle_request"
|
||||
kind: "http"
|
||||
location:
|
||||
file: src/app.py
|
||||
line: 5
|
||||
notes: "template replace on user input"
|
||||
environment:
|
||||
os_image: "python:3.12-alpine"
|
||||
runtime:
|
||||
python: "3.12"
|
||||
source_date_epoch: 1730000000
|
||||
build:
|
||||
command: "./build/build.sh"
|
||||
source_date_epoch: 1730000000
|
||||
outputs:
|
||||
artifact_path: outputs/binary.tar.gz
|
||||
sbom_path: outputs/sbom.cdx.json
|
||||
coverage_path: outputs/coverage.json
|
||||
traces_dir: outputs/traces
|
||||
test:
|
||||
command: "./tests/run-tests.sh"
|
||||
expected_coverage:
|
||||
- outputs/coverage.json
|
||||
expected_traces:
|
||||
- outputs/traces/traces.json
|
||||
ground_truth:
|
||||
summary: "Template rendering reachable"
|
||||
evidence_files:
|
||||
- "../benchmark/truth/py-flask-template.json"
|
||||
@@ -0,0 +1,8 @@
|
||||
case_id: "py-flask-template:103"
|
||||
entries:
|
||||
http:
|
||||
- id: "POST /render"
|
||||
route: "/render"
|
||||
method: "POST"
|
||||
handler: "app.handle_request"
|
||||
description: "Template rendering"
|
||||
@@ -0,0 +1 @@
|
||||
# stdlib only for this minimal case
|
||||
Binary file not shown.
@@ -0,0 +1,12 @@
|
||||
"""Minimal flask-like template rendering sink (reachable)."""
|
||||
|
||||
def render(template: str, context: dict) -> str:
|
||||
return template.replace("{{name}}", context.get("name", "guest"))
|
||||
|
||||
def handle_request(body):
|
||||
template = body.get("template") if isinstance(body, dict) else None
|
||||
if not isinstance(template, str):
|
||||
return {"status": 400, "body": "bad request"}
|
||||
rendered = render(template, {"name": "guest"})
|
||||
# Sink: returns rendered template (models potential SSTI)
|
||||
return {"status": 200, "body": rendered}
|
||||
@@ -0,0 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")"
|
||||
export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
|
||||
export TZ=UTC
|
||||
export LC_ALL=C
|
||||
export PYTHONPATH="$(cd .. && pwd)/src"
|
||||
python test_reach.py
|
||||
@@ -0,0 +1,48 @@
|
||||
import json
|
||||
import pathlib
|
||||
from app import handle_request
|
||||
|
||||
ROOT = pathlib.Path(__file__).resolve().parent.parent
|
||||
OUT = ROOT / "outputs"
|
||||
TRACE_DIR = OUT / "traces"
|
||||
COVERAGE_FILE = OUT / "coverage.json"
|
||||
TRACE_FILE = TRACE_DIR / "traces.json"
|
||||
|
||||
def ensure_dirs():
|
||||
OUT.mkdir(parents=True, exist_ok=True)
|
||||
TRACE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def record_trace(entry, path_nodes):
|
||||
TRACE_FILE.write_text(
|
||||
json.dumps({
|
||||
"entry": entry,
|
||||
"path": path_nodes,
|
||||
"sink": "FlaskTemplate::render",
|
||||
"notes": "Template rendered"
|
||||
}, indent=2)
|
||||
)
|
||||
|
||||
def record_coverage(file_path, lines):
|
||||
COVERAGE_FILE.write_text(
|
||||
json.dumps({
|
||||
"files": {
|
||||
file_path: {
|
||||
"lines_covered": lines,
|
||||
"lines_total": 40
|
||||
}
|
||||
}
|
||||
}, indent=2)
|
||||
)
|
||||
|
||||
def test_reach():
|
||||
ensure_dirs()
|
||||
res = handle_request({"template": "Hello {{name}}"})
|
||||
assert res["status"] == 200
|
||||
assert res["body"] == "Hello guest"
|
||||
record_trace("POST /render", ["app.py::handle_request", "render"])
|
||||
record_coverage("src/app.py", [4,5,6,8,9,10,11])
|
||||
(OUT / "SINK_REACHED").write_text("true")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_reach()
|
||||
38
bench/reachability-benchmark/cases/py/guarded-exec/case.yaml
Normal file
38
bench/reachability-benchmark/cases/py/guarded-exec/case.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
id: "py-guarded-exec:102"
|
||||
language: py
|
||||
project: guarded-exec
|
||||
version: "1.0.0"
|
||||
description: "Python eval guarded by FEATURE_ENABLE flag; unreachable by default"
|
||||
entrypoints:
|
||||
- "POST /api/exec"
|
||||
sinks:
|
||||
- id: "PyGuardedExec::handle_request"
|
||||
path: "src/app.py::handle_request"
|
||||
kind: "process"
|
||||
location:
|
||||
file: src/app.py
|
||||
line: 7
|
||||
notes: "eval guarded by FEATURE_ENABLE"
|
||||
environment:
|
||||
os_image: "python:3.12-alpine"
|
||||
runtime:
|
||||
python: "3.12"
|
||||
source_date_epoch: 1730000000
|
||||
build:
|
||||
command: "./build/build.sh"
|
||||
source_date_epoch: 1730000000
|
||||
outputs:
|
||||
artifact_path: outputs/binary.tar.gz
|
||||
sbom_path: outputs/sbom.cdx.json
|
||||
coverage_path: outputs/coverage.json
|
||||
traces_dir: outputs/traces
|
||||
test:
|
||||
command: "./tests/run-tests.sh"
|
||||
expected_coverage:
|
||||
- outputs/coverage.json
|
||||
expected_traces:
|
||||
- outputs/traces/traces.json
|
||||
ground_truth:
|
||||
summary: "Guard blocks eval when FEATURE_ENABLE != 1"
|
||||
evidence_files:
|
||||
- "../benchmark/truth/py-guarded-exec.json"
|
||||
@@ -0,0 +1,8 @@
|
||||
case_id: "py-guarded-exec:102"
|
||||
entries:
|
||||
http:
|
||||
- id: "POST /api/exec"
|
||||
route: "/api/exec"
|
||||
method: "POST"
|
||||
handler: "app.handle_request"
|
||||
description: "Eval guarded by FEATURE_ENABLE"
|
||||
@@ -0,0 +1 @@
|
||||
# Intentionally empty; stdlib only.
|
||||
Binary file not shown.
@@ -0,0 +1,13 @@
|
||||
"""Python handler with feature-flag guard for eval sink."""
|
||||
|
||||
def handle_request(body, env=None):
|
||||
env = env or {}
|
||||
if env.get("FEATURE_ENABLE") != "1":
|
||||
return {"status": 403, "body": "disabled"}
|
||||
|
||||
code = body.get("code") if isinstance(body, dict) else None
|
||||
if not isinstance(code, str):
|
||||
return {"status": 400, "body": "bad request"}
|
||||
|
||||
result = eval(code)
|
||||
return {"status": 200, "body": str(result)}
|
||||
@@ -0,0 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")"
|
||||
export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
|
||||
export TZ=UTC
|
||||
export LC_ALL=C
|
||||
export PYTHONPATH="$(cd .. && pwd)/src"
|
||||
python test_unreachable.py
|
||||
@@ -0,0 +1,48 @@
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
from app import handle_request
|
||||
|
||||
ROOT = pathlib.Path(__file__).resolve().parent.parent
|
||||
OUT = ROOT / "outputs"
|
||||
TRACE_DIR = OUT / "traces"
|
||||
COVERAGE_FILE = OUT / "coverage.json"
|
||||
TRACE_FILE = TRACE_DIR / "traces.json"
|
||||
|
||||
def ensure_dirs():
|
||||
OUT.mkdir(parents=True, exist_ok=True)
|
||||
TRACE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def record_trace(entry, path_nodes):
|
||||
TRACE_FILE.write_text(
|
||||
json.dumps({
|
||||
"entry": entry,
|
||||
"path": path_nodes,
|
||||
"sink": "PyGuardedExec::handle_request",
|
||||
"notes": "Guard blocked eval"
|
||||
}, indent=2)
|
||||
)
|
||||
|
||||
def record_coverage(file_path, lines):
|
||||
COVERAGE_FILE.write_text(
|
||||
json.dumps({
|
||||
"files": {
|
||||
file_path: {
|
||||
"lines_covered": lines,
|
||||
"lines_total": 34
|
||||
}
|
||||
}
|
||||
}, indent=2)
|
||||
)
|
||||
|
||||
def test_unreachable():
|
||||
ensure_dirs()
|
||||
res = handle_request({"code": "5*5"}, env={"FEATURE_ENABLE": "0"})
|
||||
assert res["status"] == 403
|
||||
assert res["body"] == "disabled"
|
||||
|
||||
record_trace("POST /api/exec", ["app.py::handle_request", "guard: FEATURE_ENABLE != 1"])
|
||||
record_coverage("src/app.py", [3,4,5,8,9,11])
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_unreachable()
|
||||
38
bench/reachability-benchmark/cases/py/unsafe-exec/case.yaml
Normal file
38
bench/reachability-benchmark/cases/py/unsafe-exec/case.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
id: "py-unsafe-exec:101"
|
||||
language: py
|
||||
project: unsafe-exec
|
||||
version: "1.0.0"
|
||||
description: "Python handler with reachable eval sink"
|
||||
entrypoints:
|
||||
- "POST /api/exec"
|
||||
sinks:
|
||||
- id: "PyUnsafeExec::handle_request"
|
||||
path: "src/app.py::handle_request"
|
||||
kind: "process"
|
||||
location:
|
||||
file: src/app.py
|
||||
line: 8
|
||||
notes: "eval on user input"
|
||||
environment:
|
||||
os_image: "python:3.12-alpine"
|
||||
runtime:
|
||||
python: "3.12"
|
||||
source_date_epoch: 1730000000
|
||||
build:
|
||||
command: "./build/build.sh"
|
||||
source_date_epoch: 1730000000
|
||||
outputs:
|
||||
artifact_path: outputs/binary.tar.gz
|
||||
sbom_path: outputs/sbom.cdx.json
|
||||
coverage_path: outputs/coverage.json
|
||||
traces_dir: outputs/traces
|
||||
test:
|
||||
command: "./tests/run-tests.sh"
|
||||
expected_coverage:
|
||||
- outputs/coverage.json
|
||||
expected_traces:
|
||||
- outputs/traces/traces.json
|
||||
ground_truth:
|
||||
summary: "Eval reachable via POST /api/exec"
|
||||
evidence_files:
|
||||
- "../benchmark/truth/py-unsafe-exec.json"
|
||||
@@ -0,0 +1,8 @@
|
||||
case_id: "py-unsafe-exec:101"
|
||||
entries:
|
||||
http:
|
||||
- id: "POST /api/exec"
|
||||
route: "/api/exec"
|
||||
method: "POST"
|
||||
handler: "app.handle_request"
|
||||
description: "Executes user code via eval"
|
||||
@@ -0,0 +1 @@
|
||||
# Intentionally empty; uses stdlib only.
|
||||
Binary file not shown.
10
bench/reachability-benchmark/cases/py/unsafe-exec/src/app.py
Normal file
10
bench/reachability-benchmark/cases/py/unsafe-exec/src/app.py
Normal file
@@ -0,0 +1,10 @@
|
||||
"""Minimal Python handler with an unsafe eval sink."""
|
||||
|
||||
def handle_request(body):
|
||||
code = body.get("code") if isinstance(body, dict) else None
|
||||
if not isinstance(code, str):
|
||||
return {"status": 400, "body": "bad request"}
|
||||
|
||||
# Sink: eval on user input (reachable)
|
||||
result = eval(code)
|
||||
return {"status": 200, "body": str(result)}
|
||||
@@ -0,0 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")"
|
||||
export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000}
|
||||
export TZ=UTC
|
||||
export LC_ALL=C
|
||||
export PYTHONPATH="$(cd .. && pwd)/src"
|
||||
python test_reach.py
|
||||
@@ -0,0 +1,54 @@
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
from app import handle_request
|
||||
|
||||
ROOT = pathlib.Path(__file__).resolve().parent.parent
|
||||
OUT = ROOT / "outputs"
|
||||
TRACE_DIR = OUT / "traces"
|
||||
COVERAGE_FILE = OUT / "coverage.json"
|
||||
TRACE_FILE = TRACE_DIR / "traces.json"
|
||||
|
||||
|
||||
def ensure_dirs():
|
||||
OUT.mkdir(parents=True, exist_ok=True)
|
||||
TRACE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def record_trace(entry, path_nodes):
|
||||
TRACE_FILE.write_text(
|
||||
json.dumps({
|
||||
"entry": entry,
|
||||
"path": path_nodes,
|
||||
"sink": "PyUnsafeExec::handle_request",
|
||||
"notes": "Eval reached"
|
||||
}, indent=2)
|
||||
)
|
||||
|
||||
|
||||
def record_coverage(file_path, lines):
|
||||
COVERAGE_FILE.write_text(
|
||||
json.dumps({
|
||||
"files": {
|
||||
file_path: {
|
||||
"lines_covered": lines,
|
||||
"lines_total": 30
|
||||
}
|
||||
}
|
||||
}, indent=2)
|
||||
)
|
||||
|
||||
|
||||
def test_reach():
|
||||
ensure_dirs()
|
||||
res = handle_request({"code": "3*7"})
|
||||
assert res["status"] == 200
|
||||
assert res["body"] == "21"
|
||||
|
||||
record_trace("POST /api/exec", ["app.py::handle_request", "eval(code)"])
|
||||
record_coverage("src/app.py", [3, 4, 5, 8, 10])
|
||||
(OUT / "SINK_REACHED").write_text("true")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_reach()
|
||||
@@ -1,11 +1,34 @@
|
||||
# rb-score (placeholder)
|
||||
# rb-score
|
||||
|
||||
Planned CLI to score reachability submissions against truth sets.
|
||||
Deterministic scorer for the reachability benchmark.
|
||||
|
||||
Future work (BENCH-SCORER-513-008):
|
||||
- Validate submission against `schemas/submission.schema.json`.
|
||||
- Validate truth against `schemas/truth.schema.json`.
|
||||
- Compute precision/recall/F1, explainability score (0-3), runtime stats, determinism rate.
|
||||
- Emit JSON report with stable ordering.
|
||||
## What it does
|
||||
- Validates submissions against `schemas/submission.schema.json` and truth against `schemas/truth.schema.json`.
|
||||
- Computes precision/recall/F1 (micro, sink-level).
|
||||
- Computes explainability score per prediction (0–3) and averages it.
|
||||
- Checks duplicate predictions for determinism (inconsistent duplicates lower the rate).
|
||||
- Surfaces runtime metadata from the submission (`run` block).
|
||||
|
||||
For now this folder is a stub; implementation will be added in task 513-008 once schemas stabilize.
|
||||
## Install (offline-friendly)
|
||||
```bash
|
||||
python -m pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Usage
|
||||
```bash
|
||||
./rb_score.py --truth ../../benchmark/truth/public.json --submission ../../benchmark/submissions/sample.json --format json
|
||||
```
|
||||
|
||||
## Output
|
||||
- `text` (default): short human-readable summary.
|
||||
- `json`: deterministic JSON with top-level metrics and per-case breakdown.
|
||||
|
||||
## Tests
|
||||
```bash
|
||||
python -m unittest tests/test_scoring.py
|
||||
```
|
||||
|
||||
## Notes
|
||||
- Predictions for sinks not present in truth count as false positives (strict posture).
|
||||
- Truth sinks with label `unknown` are ignored for FN/FP counting.
|
||||
- Explainability tiering: 0=no context; 1=path>=2 nodes; 2=entry + path>=3; 3=guards present.
|
||||
|
||||
3
bench/reachability-benchmark/tools/scorer/__init__.py
Normal file
3
bench/reachability-benchmark/tools/scorer/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from . import rb_score
|
||||
|
||||
__all__ = ["rb_score"]
|
||||
4
bench/reachability-benchmark/tools/scorer/rb-score
Normal file
4
bench/reachability-benchmark/tools/scorer/rb-score
Normal file
@@ -0,0 +1,4 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
python3 "$SCRIPT_DIR/rb_score.py" "$@"
|
||||
258
bench/reachability-benchmark/tools/scorer/rb_score.py
Normal file
258
bench/reachability-benchmark/tools/scorer/rb_score.py
Normal file
@@ -0,0 +1,258 @@
|
||||
#!/usr/bin/env python3
|
||||
"""rb-score: deterministic scorer for reachability benchmark submissions.
|
||||
|
||||
Features (task BENCH-SCORER-513-008):
|
||||
- Validate submission and truth against published schemas.
|
||||
- Compute precision / recall / F1 at sink level (micro-averaged).
|
||||
- Compute explainability score per prediction (0–3) and average.
|
||||
- Surface runtime stats from submission metadata.
|
||||
- Emit deterministic JSON or human-readable text.
|
||||
|
||||
Assumptions:
|
||||
- Truth labels may include "unknown"; these are skipped for FN/FP.
|
||||
- A prediction for a sink absent in truth counts as FP (strict posture).
|
||||
- Duplicate predictions for the same sink must agree; disagreement reduces determinism rate.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Tuple
|
||||
|
||||
import yaml
|
||||
from jsonschema import Draft202012Validator
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
SCHEMAS = {
|
||||
"truth": ROOT / "schemas" / "truth.schema.json",
|
||||
"submission": ROOT / "schemas" / "submission.schema.json",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class CaseMetrics:
|
||||
case_id: str
|
||||
tp: int
|
||||
fp: int
|
||||
fn: int
|
||||
precision: float
|
||||
recall: float
|
||||
f1: float
|
||||
explain_avg: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScoreReport:
|
||||
precision: float
|
||||
recall: float
|
||||
f1: float
|
||||
tp: int
|
||||
fp: int
|
||||
fn: int
|
||||
explain_avg: float
|
||||
determinism_rate: float
|
||||
runtime: Dict[str, object]
|
||||
cases: List[CaseMetrics]
|
||||
|
||||
|
||||
def load_json_or_yaml(path: Path):
|
||||
text = path.read_text(encoding="utf-8")
|
||||
if path.suffix.lower() in {".yaml", ".yml"}:
|
||||
return yaml.safe_load(text)
|
||||
return json.loads(text)
|
||||
|
||||
|
||||
def validate_against(schema_path: Path, payload) -> Tuple[bool, List[str]]:
|
||||
schema = load_json_or_yaml(schema_path)
|
||||
validator = Draft202012Validator(schema)
|
||||
errors = sorted(validator.iter_errors(payload), key=lambda e: e.path)
|
||||
if not errors:
|
||||
return True, []
|
||||
return False, [f"{'/'.join(str(p) for p in err.path) or '<root>'}: {err.message}" for err in errors]
|
||||
|
||||
|
||||
def safe_div(num: int, denom: int, default: float) -> float:
|
||||
if denom == 0:
|
||||
return default
|
||||
return num / denom
|
||||
|
||||
|
||||
def explain_score(pred: dict) -> int:
|
||||
expl = pred.get("explain") or {}
|
||||
path = expl.get("path") or []
|
||||
entry = expl.get("entry")
|
||||
guards = expl.get("guards") or []
|
||||
if guards:
|
||||
return 3
|
||||
if entry and len(path) >= 3:
|
||||
return 2
|
||||
if len(path) >= 2:
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
def determinism_rate(preds: Iterable[dict]) -> float:
|
||||
"""Detect inconsistent duplicate predictions for the same sink."""
|
||||
by_sink: Dict[str, set] = {}
|
||||
total_groups = 0
|
||||
consistent_groups = 0
|
||||
for pred in preds:
|
||||
sink_id = pred.get("sink_id")
|
||||
if sink_id is None:
|
||||
continue
|
||||
by_sink.setdefault(sink_id, set()).add(pred.get("prediction"))
|
||||
for values in by_sink.values():
|
||||
total_groups += 1
|
||||
if len(values) == 1:
|
||||
consistent_groups += 1
|
||||
if total_groups == 0:
|
||||
return 1.0
|
||||
return consistent_groups / total_groups
|
||||
|
||||
|
||||
def score_case(case_id: str, truth_sinks: Dict[str, str], predicted: List[dict]) -> CaseMetrics:
|
||||
truth_reach = {sid for sid, label in truth_sinks.items() if label == "reachable"}
|
||||
truth_unreach = {sid for sid, label in truth_sinks.items() if label == "unreachable"}
|
||||
|
||||
pred_reach = {p["sink_id"] for p in predicted if p.get("prediction") == "reachable"}
|
||||
|
||||
tp = len(pred_reach & truth_reach)
|
||||
fp = len(pred_reach - truth_reach)
|
||||
fn = len(truth_reach - pred_reach)
|
||||
|
||||
precision = safe_div(tp, tp + fp, 1.0)
|
||||
recall = safe_div(tp, tp + fn, 1.0)
|
||||
f1 = 0.0 if (precision + recall) == 0 else 2 * precision * recall / (precision + recall)
|
||||
|
||||
explain_scores = [explain_score(p) for p in predicted]
|
||||
explain_avg = safe_div(sum(explain_scores), len(explain_scores), 0.0)
|
||||
|
||||
return CaseMetrics(case_id, tp, fp, fn, precision, recall, f1, explain_avg)
|
||||
|
||||
|
||||
def aggregate(cases: List[CaseMetrics], preds: List[dict]) -> ScoreReport:
|
||||
tp = sum(c.tp for c in cases)
|
||||
fp = sum(c.fp for c in cases)
|
||||
fn = sum(c.fn for c in cases)
|
||||
precision = safe_div(tp, tp + fp, 1.0)
|
||||
recall = safe_div(tp, tp + fn, 1.0)
|
||||
f1 = 0.0 if (precision + recall) == 0 else 2 * precision * recall / (precision + recall)
|
||||
explain_avg = safe_div(sum(c.explain_avg for c in cases), len(cases), 0.0) if cases else 0.0
|
||||
det_rate = determinism_rate(preds)
|
||||
runtime = {}
|
||||
return ScoreReport(precision, recall, f1, tp, fp, fn, explain_avg, det_rate, runtime, cases)
|
||||
|
||||
|
||||
def build_truth_index(truth_doc: dict) -> Dict[str, Dict[str, str]]:
|
||||
index: Dict[str, Dict[str, str]] = {}
|
||||
for case in truth_doc.get("cases", []):
|
||||
sinks = {s["sink_id"]: s["label"] for s in case.get("sinks", [])}
|
||||
index[case["case_id"]] = sinks
|
||||
return index
|
||||
|
||||
|
||||
def score(truth_doc: dict, submission_doc: dict) -> ScoreReport:
|
||||
truth_index = build_truth_index(truth_doc)
|
||||
cases_metrics: List[CaseMetrics] = []
|
||||
all_preds: List[dict] = []
|
||||
|
||||
for sub_case in submission_doc.get("cases", []):
|
||||
case_id = sub_case.get("case_id")
|
||||
predicted_sinks = sub_case.get("sinks") or []
|
||||
all_preds.extend(predicted_sinks)
|
||||
truth_sinks = truth_index.get(case_id, {})
|
||||
case_metrics = score_case(case_id, truth_sinks, predicted_sinks)
|
||||
cases_metrics.append(case_metrics)
|
||||
|
||||
report = aggregate(cases_metrics, all_preds)
|
||||
report.runtime = submission_doc.get("run", {})
|
||||
return report
|
||||
|
||||
|
||||
def report_as_dict(report: ScoreReport) -> dict:
|
||||
return {
|
||||
"version": "1.0.0",
|
||||
"metrics": {
|
||||
"precision": round(report.precision, 4),
|
||||
"recall": round(report.recall, 4),
|
||||
"f1": round(report.f1, 4),
|
||||
"tp": report.tp,
|
||||
"fp": report.fp,
|
||||
"fn": report.fn,
|
||||
"determinism_rate": round(report.determinism_rate, 4),
|
||||
"explainability_avg": round(report.explain_avg, 4),
|
||||
},
|
||||
"runtime": report.runtime,
|
||||
"cases": [
|
||||
{
|
||||
"case_id": c.case_id,
|
||||
"precision": round(c.precision, 4),
|
||||
"recall": round(c.recall, 4),
|
||||
"f1": round(c.f1, 4),
|
||||
"tp": c.tp,
|
||||
"fp": c.fp,
|
||||
"fn": c.fn,
|
||||
"explainability_avg": round(c.explain_avg, 4),
|
||||
}
|
||||
for c in report.cases
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def format_text(report: ScoreReport) -> str:
|
||||
lines = []
|
||||
lines.append("rb-score summary")
|
||||
lines.append(f" precision {report.precision:.4f} recall {report.recall:.4f} f1 {report.f1:.4f}")
|
||||
lines.append(f" tp {report.tp} fp {report.fp} fn {report.fn} determinism {report.determinism_rate:.4f} explain_avg {report.explain_avg:.4f}")
|
||||
if report.runtime:
|
||||
rt = report.runtime
|
||||
lines.append(" runtime: " + ", ".join(f"{k}={v}" for k, v in sorted(rt.items())))
|
||||
lines.append(" cases:")
|
||||
for c in report.cases:
|
||||
lines.append(
|
||||
f" - {c.case_id}: P {c.precision:.4f} R {c.recall:.4f} F1 {c.f1:.4f} tp {c.tp} fp {c.fp} fn {c.fn} explain_avg {c.explain_avg:.4f}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def parse_args(argv: List[str]) -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Score reachability benchmark submissions")
|
||||
parser.add_argument("--truth", required=True, help="Path to truth JSON")
|
||||
parser.add_argument("--submission", required=True, help="Path to submission JSON")
|
||||
parser.add_argument("--format", choices=["json", "text"], default="text", help="Output format")
|
||||
return parser.parse_args(argv)
|
||||
|
||||
|
||||
def main(argv: List[str]) -> int:
|
||||
args = parse_args(argv)
|
||||
truth_path = Path(args.truth)
|
||||
submission_path = Path(args.submission)
|
||||
|
||||
if not truth_path.exists() or not submission_path.exists():
|
||||
print("truth or submission file not found", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
truth_doc = load_json_or_yaml(truth_path)
|
||||
submission_doc = load_json_or_yaml(submission_path)
|
||||
|
||||
ok_truth, truth_errs = validate_against(SCHEMAS["truth"], truth_doc)
|
||||
ok_sub, sub_errs = validate_against(SCHEMAS["submission"], submission_doc)
|
||||
if not ok_truth or not ok_sub:
|
||||
for msg in truth_errs + sub_errs:
|
||||
print(f"validation_error: {msg}", file=sys.stderr)
|
||||
return 3
|
||||
|
||||
report = score(truth_doc, submission_doc)
|
||||
|
||||
if args.format == "json":
|
||||
print(json.dumps(report_as_dict(report), sort_keys=True, indent=2))
|
||||
else:
|
||||
print(format_text(report))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv[1:]))
|
||||
@@ -0,0 +1,2 @@
|
||||
jsonschema==4.23.0
|
||||
PyYAML==6.0.2
|
||||
Binary file not shown.
@@ -0,0 +1,70 @@
|
||||
import json
|
||||
import importlib.util
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[3] # bench/reachability-benchmark
|
||||
SCORER_PATH = ROOT / "tools" / "scorer" / "rb_score.py"
|
||||
|
||||
|
||||
def load_module():
|
||||
spec = importlib.util.spec_from_file_location("rb_score", SCORER_PATH)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader
|
||||
import sys
|
||||
sys.modules[spec.name] = module
|
||||
spec.loader.exec_module(module) # type: ignore[attr-defined]
|
||||
return module
|
||||
|
||||
|
||||
def load_example(name: str):
|
||||
return json.loads((ROOT / "schemas" / "examples" / name).read_text())
|
||||
|
||||
|
||||
rb_score = load_module()
|
||||
|
||||
|
||||
class TestScoring(unittest.TestCase):
|
||||
def test_score_perfect_prediction(self):
|
||||
truth = load_example("truth.sample.json")
|
||||
submission = load_example("submission.sample.json")
|
||||
|
||||
report = rb_score.score(truth, submission)
|
||||
self.assertEqual(report.tp, 1)
|
||||
self.assertEqual(report.fp, 0)
|
||||
self.assertEqual(report.fn, 0)
|
||||
self.assertEqual(report.precision, 1.0)
|
||||
self.assertEqual(report.recall, 1.0)
|
||||
self.assertEqual(report.f1, 1.0)
|
||||
self.assertGreaterEqual(report.explain_avg, 1.0)
|
||||
self.assertEqual(report.determinism_rate, 1.0)
|
||||
|
||||
def test_score_false_negative_and_fp(self):
|
||||
truth = load_example("truth.sample.json")
|
||||
submission = {
|
||||
"version": "1.0.0",
|
||||
"tool": {"name": "tool", "version": "1"},
|
||||
"run": {"platform": "ubuntu"},
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "js-express-blog:001",
|
||||
"sinks": [
|
||||
{"sink_id": "Deserializer::parse", "prediction": "unreachable"},
|
||||
{"sink_id": "Fake::sink", "prediction": "reachable"},
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
report = rb_score.score(truth, submission)
|
||||
self.assertEqual(report.tp, 0)
|
||||
self.assertEqual(report.fp, 1)
|
||||
self.assertEqual(report.fn, 1)
|
||||
self.assertEqual(report.precision, 0.0)
|
||||
self.assertEqual(report.recall, 0.0)
|
||||
self.assertEqual(report.f1, 0.0)
|
||||
self.assertEqual(report.determinism_rate, 1.0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user