{ "$schema": "http://json-schema.org/draft-07/schema#", "$id": "https://stellaops.io/schemas/corpus-sample.v1.json", "title": "CorpusSample", "description": "Schema for ground-truth corpus samples used in reachability benchmarking", "type": "object", "required": ["sampleId", "name", "format", "arch", "sinks"], "properties": { "sampleId": { "type": "string", "pattern": "^gt-[0-9]{4}$", "description": "Unique identifier for the sample (e.g., gt-0001)" }, "name": { "type": "string", "description": "Human-readable name for the sample" }, "description": { "type": "string", "description": "Detailed description of what this sample tests" }, "category": { "type": "string", "enum": ["basic", "indirect", "stripped", "obfuscated", "guarded", "callback", "virtual"], "description": "Sample category for organization" }, "format": { "type": "string", "enum": ["elf64", "elf32", "pe64", "pe32", "macho64", "macho32"], "description": "Binary format" }, "arch": { "type": "string", "enum": ["x86_64", "x86", "aarch64", "arm32", "riscv64"], "description": "Target architecture" }, "language": { "type": "string", "enum": ["c", "cpp", "rust", "go"], "description": "Source language (for reference)" }, "compiler": { "type": "object", "properties": { "name": { "type": "string" }, "version": { "type": "string" }, "flags": { "type": "array", "items": { "type": "string" } } }, "description": "Compiler information used to build the sample" }, "entryPoint": { "type": "string", "default": "main", "description": "Entry point function name" }, "sinks": { "type": "array", "minItems": 1, "items": { "type": "object", "required": ["sinkId", "signature", "expected"], "properties": { "sinkId": { "type": "string", "pattern": "^sink-[0-9]{3}$", "description": "Unique sink identifier within the sample" }, "signature": { "type": "string", "description": "Function signature of the sink" }, "sinkType": { "type": "string", "enum": ["memory_corruption", "command_injection", "sql_injection", "path_traversal", "format_string", "crypto_weakness", "custom"], "description": "Type of vulnerability represented by the sink" }, "expected": { "type": "string", "enum": ["reachable", "unreachable", "conditional"], "description": "Expected reachability determination" }, "expectedPaths": { "type": "array", "items": { "type": "array", "items": { "type": "string" } }, "description": "Expected call paths from entry to sink (for reachable sinks)" }, "guardConditions": { "type": "array", "items": { "type": "object", "properties": { "variable": { "type": "string" }, "condition": { "type": "string" }, "value": { "type": "string" } } }, "description": "Guard conditions that protect the sink (for conditional sinks)" }, "notes": { "type": "string", "description": "Additional notes about this sink" } } }, "description": "List of sinks with expected reachability" }, "metadata": { "type": "object", "properties": { "createdAt": { "type": "string", "format": "date-time" }, "createdBy": { "type": "string" }, "version": { "type": "string" }, "sha256": { "type": "string", "pattern": "^[a-f0-9]{64}$" } }, "description": "Metadata about the sample" } } }