This commit is contained in:
StellaOps Bot
2025-12-13 02:22:15 +02:00
parent 564df71bfb
commit 999e26a48e
395 changed files with 25045 additions and 2224 deletions

View File

@@ -0,0 +1,87 @@
# Reachability Test Datasets
This directory contains ground truth samples for validating reachability analysis accuracy.
## Directory Structure
```
datasets/reachability/
├── README.md # This file
├── samples/ # Test samples by language
│ ├── csharp/
│ │ ├── simple-reachable/ # Positive: direct call path
│ │ └── dead-code/ # Negative: unreachable code
│ ├── java/
│ │ └── vulnerable-log4j/ # Positive: Log4Shell CVE
│ └── native/
│ └── stripped-elf/ # Positive: stripped binary
└── schema/
├── manifest.schema.json # Sample manifest schema
└── ground-truth.schema.json # Ground truth schema
```
## Sample Categories
### Positive (Reachable)
Samples where vulnerable code has a confirmed path from entry points:
- `csharp/simple-reachable` - Direct call to vulnerable API
- `java/vulnerable-log4j` - Log4Shell with runtime confirmation
- `native/stripped-elf` - Stripped ELF with heuristic analysis
### Negative (Unreachable)
Samples where vulnerable code exists but is never called:
- `csharp/dead-code` - Deprecated API replaced by safe implementation
## Schema Reference
### manifest.json
Sample metadata including:
- `sampleId` - Unique identifier
- `language` - Primary language (java, csharp, native, etc.)
- `category` - positive, negative, or contested
- `vulnerabilities` - CVEs and affected symbols
- `artifacts` - Binary/SBOM file references
### ground-truth.json
Expected outcomes including:
- `targets` - Symbols with expected lattice states
- `entryPoints` - Program entry points
- `expectedUncertainty` - Expected uncertainty tier
- `expectedGateDecisions` - Expected policy gate outcomes
## Lattice States
| Code | Name | Description |
|------|------|-------------|
| U | Unknown | No analysis performed |
| SR | StaticallyReachable | Static analysis finds path |
| SU | StaticallyUnreachable | Static analysis finds no path |
| RO | RuntimeObserved | Runtime probe observed execution |
| RU | RuntimeUnobserved | Runtime probe did not observe |
| CR | ConfirmedReachable | Both static and runtime confirm |
| CU | ConfirmedUnreachable | Both static and runtime confirm unreachable |
| X | Contested | Static and runtime evidence conflict |
## Running Tests
```bash
# Validate schemas
npx ajv validate -s schema/ground-truth.schema.json -d samples/**/ground-truth.json
# Run benchmark tests
dotnet test --filter "GroundTruth" src/Scanner/__Tests/StellaOps.Scanner.Reachability.Benchmarks/
```
## Adding New Samples
1. Create directory: `samples/{language}/{sample-name}/`
2. Add `manifest.json` with sample metadata
3. Add `ground-truth.json` with expected outcomes
4. Include `reasoning` for each target explaining the expected state
5. Validate against schema before committing
## Related Documentation
- [Ground Truth Schema](../../docs/reachability/ground-truth-schema.md)
- [Lattice Model](../../docs/reachability/lattice.md)
- [Policy Gates](../../docs/reachability/policy-gate.md)

View File

@@ -0,0 +1,86 @@
{
"schema": "ground-truth-v1",
"sampleId": "sample:csharp:dead-code:001",
"generatedAt": "2025-12-13T12:00:00Z",
"generator": {
"name": "manual-annotation",
"version": "1.0.0",
"annotator": "scanner-guild"
},
"targets": [
{
"symbolId": "sym:csharp:JsonConvert.DeserializeObject",
"display": "Newtonsoft.Json.JsonConvert.DeserializeObject<T>(string, JsonSerializerSettings)",
"purl": "pkg:nuget/Newtonsoft.Json@13.0.1",
"expected": {
"latticeState": "CU",
"bucket": "unreachable",
"reachable": false,
"confidence": 0.95,
"pathLength": null,
"path": null
},
"reasoning": "DeserializeObject referenced in deprecated LegacyParser class but LegacyParser is never instantiated - new SafeParser uses System.Text.Json instead"
},
{
"symbolId": "sym:csharp:LegacyParser.ParseJson",
"display": "SampleApp.LegacyParser.ParseJson(string)",
"purl": "pkg:generic/SampleApp@1.0.0",
"expected": {
"latticeState": "SU",
"bucket": "unreachable",
"reachable": false,
"confidence": 0.90,
"pathLength": null,
"path": null
},
"reasoning": "LegacyParser.ParseJson exists but LegacyParser is never instantiated - replaced by SafeParser"
},
{
"symbolId": "sym:csharp:SafeParser.ParseJson",
"display": "SampleApp.SafeParser.ParseJson(string)",
"purl": "pkg:generic/SampleApp@1.0.0",
"expected": {
"latticeState": "SR",
"bucket": "direct",
"reachable": true,
"confidence": 0.95,
"pathLength": 2,
"path": [
"sym:csharp:Program.Main",
"sym:csharp:SafeParser.ParseJson"
]
},
"reasoning": "SafeParser.ParseJson is the active implementation called from Main"
}
],
"entryPoints": [
{
"symbolId": "sym:csharp:Program.Main",
"display": "SampleApp.Program.Main(string[])",
"phase": "runtime",
"source": "manifest"
}
],
"expectedUncertainty": {
"states": [],
"aggregateTier": "T4",
"riskScore": 0.0
},
"expectedGateDecisions": [
{
"vulnId": "CVE-2024-21907",
"targetSymbol": "sym:csharp:JsonConvert.DeserializeObject",
"requestedStatus": "not_affected",
"expectedDecision": "allow",
"expectedReason": "CU state allows not_affected - confirmed unreachable"
},
{
"vulnId": "CVE-2024-21907",
"targetSymbol": "sym:csharp:JsonConvert.DeserializeObject",
"requestedStatus": "affected",
"expectedDecision": "warn",
"expectedReason": "Marking as affected when CU suggests false positive"
}
]
}

View File

@@ -0,0 +1,27 @@
{
"sampleId": "sample:csharp:dead-code:001",
"version": "1.0.0",
"createdAt": "2025-12-13T12:00:00Z",
"language": "csharp",
"category": "negative",
"description": "C# app where vulnerable code exists but is never called - deprecated API replaced by safe implementation",
"source": {
"repository": "synthetic",
"commit": "synthetic-sample",
"buildToolchain": "dotnet:10.0"
},
"vulnerabilities": [
{
"vulnId": "CVE-2024-21907",
"purl": "pkg:nuget/Newtonsoft.Json@13.0.1",
"affectedSymbol": "Newtonsoft.Json.JsonConvert.DeserializeObject"
}
],
"artifacts": [
{
"path": "artifacts/app.dll",
"hash": "sha256:0000000000000000000000000000000000000000000000000000000000000002",
"type": "application/x-msdownload"
}
]
}

View File

@@ -0,0 +1,79 @@
{
"schema": "ground-truth-v1",
"sampleId": "sample:csharp:simple-reachable:001",
"generatedAt": "2025-12-13T12:00:00Z",
"generator": {
"name": "manual-annotation",
"version": "1.0.0",
"annotator": "scanner-guild"
},
"targets": [
{
"symbolId": "sym:csharp:JsonConvert.DeserializeObject",
"display": "Newtonsoft.Json.JsonConvert.DeserializeObject<T>(string, JsonSerializerSettings)",
"purl": "pkg:nuget/Newtonsoft.Json@13.0.1",
"expected": {
"latticeState": "SR",
"bucket": "direct",
"reachable": true,
"confidence": 0.95,
"pathLength": 2,
"path": [
"sym:csharp:Program.Main",
"sym:csharp:JsonConvert.DeserializeObject"
]
},
"reasoning": "Direct call from Main() to JsonConvert.DeserializeObject with TypeNameHandling.All settings"
},
{
"symbolId": "sym:csharp:JsonConvert.SerializeObject",
"display": "Newtonsoft.Json.JsonConvert.SerializeObject(object)",
"purl": "pkg:nuget/Newtonsoft.Json@13.0.1",
"expected": {
"latticeState": "SU",
"bucket": "unreachable",
"reachable": false,
"confidence": 0.90,
"pathLength": null,
"path": null
},
"reasoning": "SerializeObject is present in the dependency but never called from any entry point"
}
],
"entryPoints": [
{
"symbolId": "sym:csharp:Program.Main",
"display": "SampleApp.Program.Main(string[])",
"phase": "runtime",
"source": "manifest"
}
],
"expectedUncertainty": {
"states": [],
"aggregateTier": "T4",
"riskScore": 0.0
},
"expectedGateDecisions": [
{
"vulnId": "CVE-2024-21907",
"targetSymbol": "sym:csharp:JsonConvert.DeserializeObject",
"requestedStatus": "not_affected",
"expectedDecision": "block",
"expectedBlockedBy": "LatticeState",
"expectedReason": "SR state incompatible with not_affected - code path exists from entry point"
},
{
"vulnId": "CVE-2024-21907",
"targetSymbol": "sym:csharp:JsonConvert.DeserializeObject",
"requestedStatus": "affected",
"expectedDecision": "allow"
},
{
"vulnId": "CVE-2024-21907",
"targetSymbol": "sym:csharp:JsonConvert.SerializeObject",
"requestedStatus": "not_affected",
"expectedDecision": "allow",
"expectedReason": "SU state allows not_affected - unreachable code path"
}
]
}

View File

@@ -0,0 +1,27 @@
{
"sampleId": "sample:csharp:simple-reachable:001",
"version": "1.0.0",
"createdAt": "2025-12-13T12:00:00Z",
"language": "csharp",
"category": "positive",
"description": "Simple C# console app with direct call path to vulnerable Newtonsoft.Json TypeNameHandling usage",
"source": {
"repository": "synthetic",
"commit": "synthetic-sample",
"buildToolchain": "dotnet:10.0"
},
"vulnerabilities": [
{
"vulnId": "CVE-2024-21907",
"purl": "pkg:nuget/Newtonsoft.Json@13.0.1",
"affectedSymbol": "Newtonsoft.Json.JsonConvert.DeserializeObject"
}
],
"artifacts": [
{
"path": "artifacts/app.dll",
"hash": "sha256:0000000000000000000000000000000000000000000000000000000000000001",
"type": "application/x-msdownload"
}
]
}

View File

@@ -0,0 +1,108 @@
{
"schema": "ground-truth-v1",
"sampleId": "sample:java:vulnerable-log4j:001",
"generatedAt": "2025-12-13T12:00:00Z",
"generator": {
"name": "manual-annotation",
"version": "1.0.0",
"annotator": "security-team"
},
"targets": [
{
"symbolId": "sym:java:log4j.JndiLookup.lookup",
"display": "org.apache.logging.log4j.core.lookup.JndiLookup.lookup(LogEvent, String)",
"purl": "pkg:maven/org.apache.logging.log4j/log4j-core@2.14.1",
"expected": {
"latticeState": "CR",
"bucket": "runtime",
"reachable": true,
"confidence": 0.98,
"pathLength": 4,
"path": [
"sym:java:HttpRequestHandler.handle",
"sym:java:LogManager.getLogger",
"sym:java:Logger.info",
"sym:java:log4j.JndiLookup.lookup"
]
},
"reasoning": "Confirmed reachable via runtime probe - HTTP request handler logs user-controlled input which triggers JNDI lookup via message substitution"
},
{
"symbolId": "sym:java:log4j.JndiManager.lookup",
"display": "org.apache.logging.log4j.core.net.JndiManager.lookup(String)",
"purl": "pkg:maven/org.apache.logging.log4j/log4j-core@2.14.1",
"expected": {
"latticeState": "CU",
"bucket": "unreachable",
"reachable": false,
"confidence": 0.92,
"pathLength": null,
"path": null
},
"reasoning": "JndiManager.lookup is present in log4j-core but the direct JndiManager usage path is not exercised - only JndiLookup wrapper is used"
},
{
"symbolId": "sym:java:log4j.ScriptLookup.lookup",
"display": "org.apache.logging.log4j.core.lookup.ScriptLookup.lookup(LogEvent, String)",
"purl": "pkg:maven/org.apache.logging.log4j/log4j-core@2.14.1",
"expected": {
"latticeState": "SU",
"bucket": "unreachable",
"reachable": false,
"confidence": 0.85,
"pathLength": null,
"path": null
},
"reasoning": "ScriptLookup exists in log4j-core but is disabled by default and no configuration enables it"
}
],
"entryPoints": [
{
"symbolId": "sym:java:HttpRequestHandler.handle",
"display": "com.example.app.HttpRequestHandler.handle(HttpExchange)",
"phase": "runtime",
"source": "servlet"
},
{
"symbolId": "sym:java:Application.main",
"display": "com.example.app.Application.main(String[])",
"phase": "main",
"source": "manifest"
}
],
"expectedUncertainty": {
"states": [],
"aggregateTier": "T4",
"riskScore": 0.0
},
"expectedGateDecisions": [
{
"vulnId": "CVE-2021-44228",
"targetSymbol": "sym:java:log4j.JndiLookup.lookup",
"requestedStatus": "not_affected",
"expectedDecision": "block",
"expectedBlockedBy": "LatticeState",
"expectedReason": "CR state blocks not_affected - runtime evidence confirms reachability"
},
{
"vulnId": "CVE-2021-44228",
"targetSymbol": "sym:java:log4j.JndiLookup.lookup",
"requestedStatus": "affected",
"expectedDecision": "allow"
},
{
"vulnId": "CVE-2021-44228",
"targetSymbol": "sym:java:log4j.JndiManager.lookup",
"requestedStatus": "not_affected",
"expectedDecision": "allow",
"expectedReason": "CU state allows not_affected - confirmed unreachable"
},
{
"vulnId": "CVE-2021-44228",
"targetSymbol": "sym:java:log4j.ScriptLookup.lookup",
"requestedStatus": "not_affected",
"expectedDecision": "warn",
"expectedReason": "SU state allows not_affected but with warning - static analysis only, no runtime confirmation"
}
]
}

View File

@@ -0,0 +1,32 @@
{
"sampleId": "sample:java:vulnerable-log4j:001",
"version": "1.0.0",
"createdAt": "2025-12-13T12:00:00Z",
"language": "java",
"category": "positive",
"description": "Log4Shell CVE-2021-44228 reachable via JNDI lookup in logging path from HTTP request handler",
"source": {
"repository": "synthetic",
"commit": "synthetic-sample",
"buildToolchain": "maven:3.9.0,jdk:17"
},
"vulnerabilities": [
{
"vulnId": "CVE-2021-44228",
"purl": "pkg:maven/org.apache.logging.log4j/log4j-core@2.14.1",
"affectedSymbol": "org.apache.logging.log4j.core.lookup.JndiLookup.lookup"
}
],
"artifacts": [
{
"path": "artifacts/app.jar",
"hash": "sha256:0000000000000000000000000000000000000000000000000000000000000004",
"type": "application/java-archive"
},
{
"path": "artifacts/sbom.cdx.json",
"hash": "sha256:0000000000000000000000000000000000000000000000000000000000000005",
"type": "application/vnd.cyclonedx+json"
}
]
}

View File

@@ -0,0 +1,100 @@
{
"schema": "ground-truth-v1",
"sampleId": "sample:native:stripped-elf:001",
"generatedAt": "2025-12-13T12:00:00Z",
"generator": {
"name": "manual-annotation",
"version": "1.0.0",
"annotator": "scanner-guild"
},
"targets": [
{
"symbolId": "sym:binary:ossl_punycode_decode",
"display": "ossl_punycode_decode",
"purl": "pkg:deb/ubuntu/openssl@3.0.2?arch=amd64",
"expected": {
"latticeState": "SR",
"bucket": "direct",
"reachable": true,
"confidence": 0.85,
"pathLength": 4,
"path": [
"sym:binary:_start",
"sym:binary:main",
"sym:binary:SSL_connect",
"sym:binary:ossl_punycode_decode"
]
},
"reasoning": "punycode_decode is reachable via SSL certificate validation during SSL_connect - lower confidence due to stripped binary heuristics"
},
{
"symbolId": "sym:binary:sub_401000",
"display": "sub_401000 (heuristic function)",
"purl": "pkg:generic/app@1.0.0",
"expected": {
"latticeState": "U",
"bucket": "unknown",
"reachable": null,
"confidence": 0.4,
"pathLength": null,
"path": null
},
"reasoning": "Stripped symbol detected by heuristic CFG analysis - function boundaries uncertain"
}
],
"entryPoints": [
{
"symbolId": "sym:binary:_start",
"display": "_start",
"phase": "load",
"source": "e_entry"
},
{
"symbolId": "sym:binary:main",
"display": "main",
"phase": "runtime",
"source": "symbol"
},
{
"symbolId": "init:binary:0x401000",
"display": "DT_INIT_ARRAY[0]",
"phase": "init",
"source": "DT_INIT_ARRAY"
}
],
"expectedUncertainty": {
"states": [
{
"code": "U1",
"entropy": 0.35
}
],
"aggregateTier": "T2",
"riskScore": 0.25
},
"expectedGateDecisions": [
{
"vulnId": "CVE-2022-3602",
"targetSymbol": "sym:binary:ossl_punycode_decode",
"requestedStatus": "not_affected",
"expectedDecision": "block",
"expectedBlockedBy": "LatticeState",
"expectedReason": "SR state blocks not_affected - static analysis shows reachability"
},
{
"vulnId": "CVE-2022-3602",
"targetSymbol": "sym:binary:ossl_punycode_decode",
"requestedStatus": "affected",
"expectedDecision": "warn",
"expectedReason": "T2 uncertainty tier requires review for affected status"
},
{
"vulnId": "CVE-2022-3602",
"targetSymbol": "sym:binary:sub_401000",
"requestedStatus": "not_affected",
"expectedDecision": "block",
"expectedBlockedBy": "UncertaintyTier",
"expectedReason": "Unknown state with U1 uncertainty blocks not_affected without justification"
}
]
}

View File

@@ -0,0 +1,27 @@
{
"sampleId": "sample:native:stripped-elf:001",
"version": "1.0.0",
"createdAt": "2025-12-13T12:00:00Z",
"language": "native",
"category": "positive",
"description": "Stripped ELF binary linking to vulnerable OpenSSL version with reachable SSL_read path",
"source": {
"repository": "synthetic",
"commit": "synthetic-sample",
"buildToolchain": "gcc:13.0,openssl:3.0.2"
},
"vulnerabilities": [
{
"vulnId": "CVE-2022-3602",
"purl": "pkg:deb/ubuntu/openssl@3.0.2?arch=amd64",
"affectedSymbol": "ossl_punycode_decode"
}
],
"artifacts": [
{
"path": "artifacts/app",
"hash": "sha256:0000000000000000000000000000000000000000000000000000000000000003",
"type": "application/x-executable"
}
]
}

View File

@@ -0,0 +1,189 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://stellaops.io/schemas/reachability/ground-truth.schema.json",
"title": "Reachability Ground Truth",
"description": "Ground truth annotations for reachability test samples",
"type": "object",
"required": ["schema", "sampleId", "generatedAt", "generator", "targets", "entryPoints"],
"properties": {
"schema": {
"type": "string",
"const": "ground-truth-v1"
},
"sampleId": {
"type": "string",
"pattern": "^sample:[a-z]+:[a-z0-9-]+:[0-9]+$"
},
"generatedAt": {
"type": "string",
"format": "date-time"
},
"generator": {
"type": "object",
"required": ["name", "version"],
"properties": {
"name": {
"type": "string"
},
"version": {
"type": "string"
},
"annotator": {
"type": "string"
}
}
},
"targets": {
"type": "array",
"items": {
"$ref": "#/definitions/target"
}
},
"entryPoints": {
"type": "array",
"items": {
"$ref": "#/definitions/entryPoint"
}
},
"expectedUncertainty": {
"$ref": "#/definitions/uncertainty"
},
"expectedGateDecisions": {
"type": "array",
"items": {
"$ref": "#/definitions/gateDecision"
}
}
},
"definitions": {
"target": {
"type": "object",
"required": ["symbolId", "expected", "reasoning"],
"properties": {
"symbolId": {
"type": "string",
"pattern": "^sym:[a-z]+:.+"
},
"display": {
"type": "string"
},
"purl": {
"type": "string"
},
"expected": {
"type": "object",
"required": ["latticeState", "bucket", "reachable", "confidence"],
"properties": {
"latticeState": {
"type": "string",
"enum": ["U", "SR", "SU", "RO", "RU", "CR", "CU", "X"]
},
"bucket": {
"type": "string",
"enum": ["unknown", "direct", "runtime", "unreachable", "entrypoint"]
},
"reachable": {
"type": "boolean"
},
"confidence": {
"type": "number",
"minimum": 0,
"maximum": 1
},
"pathLength": {
"type": ["integer", "null"],
"minimum": 0
},
"path": {
"type": ["array", "null"],
"items": {
"type": "string"
}
}
}
},
"reasoning": {
"type": "string"
}
}
},
"entryPoint": {
"type": "object",
"required": ["symbolId", "phase", "source"],
"properties": {
"symbolId": {
"type": "string"
},
"display": {
"type": "string"
},
"phase": {
"type": "string",
"enum": ["load", "init", "runtime", "main", "fini"]
},
"source": {
"type": "string"
}
}
},
"uncertainty": {
"type": "object",
"required": ["aggregateTier"],
"properties": {
"states": {
"type": "array",
"items": {
"type": "object",
"required": ["code", "entropy"],
"properties": {
"code": {
"type": "string",
"enum": ["U1", "U2", "U3", "U4"]
},
"entropy": {
"type": "number",
"minimum": 0,
"maximum": 1
}
}
}
},
"aggregateTier": {
"type": "string",
"enum": ["T1", "T2", "T3", "T4"]
},
"riskScore": {
"type": "number",
"minimum": 0,
"maximum": 1
}
}
},
"gateDecision": {
"type": "object",
"required": ["vulnId", "targetSymbol", "requestedStatus", "expectedDecision"],
"properties": {
"vulnId": {
"type": "string"
},
"targetSymbol": {
"type": "string"
},
"requestedStatus": {
"type": "string",
"enum": ["affected", "not_affected", "under_investigation", "fixed"]
},
"expectedDecision": {
"type": "string",
"enum": ["allow", "block", "warn"]
},
"expectedBlockedBy": {
"type": "string"
},
"expectedReason": {
"type": "string"
}
}
}
}
}

View File

@@ -0,0 +1,94 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://stellaops.io/schemas/reachability/manifest.schema.json",
"title": "Reachability Sample Manifest",
"description": "Metadata for a reachability test sample",
"type": "object",
"required": ["sampleId", "version", "createdAt", "language", "category", "description"],
"properties": {
"sampleId": {
"type": "string",
"pattern": "^sample:[a-z]+:[a-z0-9-]+:[0-9]+$",
"description": "Unique sample identifier"
},
"version": {
"type": "string",
"pattern": "^[0-9]+\\.[0-9]+\\.[0-9]+$",
"description": "Sample version (SemVer)"
},
"createdAt": {
"type": "string",
"format": "date-time",
"description": "Creation timestamp (UTC ISO-8601)"
},
"language": {
"type": "string",
"enum": ["java", "csharp", "javascript", "php", "python", "native", "polyglot"],
"description": "Primary language of the sample"
},
"category": {
"type": "string",
"enum": ["positive", "negative", "contested"],
"description": "Ground truth category"
},
"description": {
"type": "string",
"description": "Human-readable description"
},
"source": {
"type": "object",
"properties": {
"repository": {
"type": "string",
"format": "uri"
},
"commit": {
"type": "string"
},
"buildToolchain": {
"type": "string"
}
}
},
"vulnerabilities": {
"type": "array",
"items": {
"type": "object",
"required": ["vulnId", "purl", "affectedSymbol"],
"properties": {
"vulnId": {
"type": "string",
"description": "CVE or advisory ID"
},
"purl": {
"type": "string",
"description": "Package URL of vulnerable package"
},
"affectedSymbol": {
"type": "string",
"description": "Symbol name that is vulnerable"
}
}
}
},
"artifacts": {
"type": "array",
"items": {
"type": "object",
"required": ["path", "hash", "type"],
"properties": {
"path": {
"type": "string"
},
"hash": {
"type": "string",
"pattern": "^sha256:[a-f0-9]{64}$"
},
"type": {
"type": "string"
}
}
}
}
}
}