Files
git.stella-ops.org/docs/schemas/predicates/deltasig-v2.schema.json
2026-01-20 00:45:38 +02:00

352 lines
10 KiB
JSON

{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://stella-ops.org/schemas/predicates/deltasig/v2.json",
"title": "DeltaSig Predicate v2",
"description": "DSSE predicate for function-level binary diffs with symbol provenance and IR diff references",
"type": "object",
"required": ["schemaVersion", "subject", "functionMatches", "verdict", "computedAt", "tooling", "summary"],
"properties": {
"schemaVersion": {
"type": "string",
"const": "2.0.0",
"description": "Schema version"
},
"subject": {
"$ref": "#/$defs/subject",
"description": "Subject artifact being analyzed"
},
"functionMatches": {
"type": "array",
"items": { "$ref": "#/$defs/functionMatch" },
"description": "Function-level matches with provenance and evidence"
},
"verdict": {
"type": "string",
"enum": ["vulnerable", "patched", "unknown", "partial"],
"description": "Overall verdict"
},
"confidence": {
"type": "number",
"minimum": 0,
"maximum": 1,
"description": "Overall confidence score (0.0-1.0)"
},
"cveIds": {
"type": "array",
"items": { "type": "string", "pattern": "^CVE-\\d{4}-\\d+$" },
"description": "CVE identifiers this analysis addresses"
},
"computedAt": {
"type": "string",
"format": "date-time",
"description": "Timestamp when analysis was computed (RFC 3339)"
},
"tooling": {
"$ref": "#/$defs/tooling",
"description": "Tooling used to generate the predicate"
},
"summary": {
"$ref": "#/$defs/summary",
"description": "Summary statistics"
},
"advisories": {
"type": "array",
"items": { "type": "string", "format": "uri" },
"description": "Optional advisory references"
},
"metadata": {
"type": "object",
"additionalProperties": true,
"description": "Additional metadata"
}
},
"$defs": {
"subject": {
"type": "object",
"required": ["purl", "digest"],
"properties": {
"purl": {
"type": "string",
"description": "Package URL (purl) of the subject"
},
"digest": {
"type": "object",
"additionalProperties": { "type": "string" },
"description": "Digests of the artifact (algorithm -> hash)"
},
"arch": {
"type": "string",
"description": "Target architecture"
},
"filename": {
"type": "string",
"description": "Binary filename or path"
},
"size": {
"type": "integer",
"minimum": 0,
"description": "Size of the binary in bytes"
},
"debugId": {
"type": "string",
"description": "ELF Build-ID or equivalent debug identifier"
}
}
},
"functionMatch": {
"type": "object",
"required": ["name", "matchMethod", "matchState"],
"properties": {
"name": {
"type": "string",
"description": "Function name (symbol name)"
},
"beforeHash": {
"type": "string",
"description": "Hash of function in the analyzed binary"
},
"afterHash": {
"type": "string",
"description": "Hash of function in the reference binary"
},
"matchScore": {
"type": "number",
"minimum": 0,
"maximum": 1,
"description": "Match score (0.0-1.0)"
},
"matchMethod": {
"type": "string",
"enum": ["semantic_ksg", "byte_exact", "cfg_structural", "ir_semantic", "chunk_rolling"],
"description": "Method used for matching"
},
"matchState": {
"type": "string",
"enum": ["vulnerable", "patched", "modified", "unchanged", "unknown"],
"description": "Match state"
},
"symbolProvenance": {
"$ref": "#/$defs/symbolProvenance",
"description": "Symbol provenance from ground-truth corpus"
},
"irDiff": {
"$ref": "#/$defs/irDiffReference",
"description": "IR diff reference for detailed evidence"
},
"address": {
"type": "integer",
"description": "Virtual address of the function"
},
"size": {
"type": "integer",
"minimum": 0,
"description": "Function size in bytes"
},
"section": {
"type": "string",
"default": ".text",
"description": "Section containing the function"
},
"explanation": {
"type": "string",
"description": "Human-readable explanation of the match"
}
}
},
"symbolProvenance": {
"type": "object",
"required": ["sourceId", "observationId", "fetchedAt", "signatureState"],
"properties": {
"sourceId": {
"type": "string",
"description": "Ground-truth source ID (e.g., debuginfod-fedora)"
},
"observationId": {
"type": "string",
"pattern": "^groundtruth:[^:]+:[^:]+:[^:]+$",
"description": "Observation ID in ground-truth corpus"
},
"fetchedAt": {
"type": "string",
"format": "date-time",
"description": "When the symbol was fetched from the source"
},
"signatureState": {
"type": "string",
"enum": ["verified", "unverified", "expired", "invalid"],
"description": "Signature state of the source"
},
"packageName": {
"type": "string",
"description": "Package name from the source"
},
"packageVersion": {
"type": "string",
"description": "Package version from the source"
},
"distro": {
"type": "string",
"description": "Distribution (e.g., fedora, ubuntu, debian)"
},
"distroVersion": {
"type": "string",
"description": "Distribution version"
},
"debugId": {
"type": "string",
"description": "Debug ID used for lookup"
}
}
},
"irDiffReference": {
"type": "object",
"required": ["casDigest"],
"properties": {
"casDigest": {
"type": "string",
"pattern": "^sha256:[a-f0-9]{64}$",
"description": "Content-addressed digest of the full diff in CAS"
},
"addedBlocks": {
"type": "integer",
"minimum": 0,
"description": "Number of basic blocks added"
},
"removedBlocks": {
"type": "integer",
"minimum": 0,
"description": "Number of basic blocks removed"
},
"changedInstructions": {
"type": "integer",
"minimum": 0,
"description": "Number of instructions changed"
},
"statementsAdded": {
"type": "integer",
"minimum": 0,
"description": "Number of IR statements added"
},
"statementsRemoved": {
"type": "integer",
"minimum": 0,
"description": "Number of IR statements removed"
},
"irFormat": {
"type": "string",
"description": "IR format used (e.g., b2r2-lowuir, ghidra-pcode)"
},
"casUrl": {
"type": "string",
"format": "uri",
"description": "URL to fetch the full diff from CAS"
},
"diffSize": {
"type": "integer",
"minimum": 0,
"description": "Size of the diff in bytes"
}
}
},
"tooling": {
"type": "object",
"required": ["lifter", "lifterVersion", "canonicalIr", "matchAlgorithm", "binaryIndexVersion"],
"properties": {
"lifter": {
"type": "string",
"enum": ["b2r2", "ghidra", "radare2", "ida"],
"description": "Primary lifter used"
},
"lifterVersion": {
"type": "string",
"description": "Lifter version"
},
"canonicalIr": {
"type": "string",
"enum": ["b2r2-lowuir", "ghidra-pcode", "llvm-ir"],
"description": "Canonical IR format"
},
"matchAlgorithm": {
"type": "string",
"description": "Matching algorithm"
},
"normalizationRecipe": {
"type": "string",
"description": "Normalization recipe applied"
},
"binaryIndexVersion": {
"type": "string",
"description": "StellaOps BinaryIndex version"
},
"hashAlgorithm": {
"type": "string",
"default": "sha256",
"description": "Hash algorithm used"
},
"casBackend": {
"type": "string",
"description": "CAS storage backend used for IR diffs"
}
}
},
"summary": {
"type": "object",
"properties": {
"totalFunctions": {
"type": "integer",
"minimum": 0,
"description": "Total number of functions analyzed"
},
"vulnerableFunctions": {
"type": "integer",
"minimum": 0,
"description": "Number of functions matched as vulnerable"
},
"patchedFunctions": {
"type": "integer",
"minimum": 0,
"description": "Number of functions matched as patched"
},
"unknownFunctions": {
"type": "integer",
"minimum": 0,
"description": "Number of functions with unknown state"
},
"functionsWithProvenance": {
"type": "integer",
"minimum": 0,
"description": "Number of functions with symbol provenance"
},
"functionsWithIrDiff": {
"type": "integer",
"minimum": 0,
"description": "Number of functions with IR diff evidence"
},
"avgMatchScore": {
"type": "number",
"minimum": 0,
"maximum": 1,
"description": "Average match score"
},
"minMatchScore": {
"type": "number",
"minimum": 0,
"maximum": 1,
"description": "Minimum match score"
},
"maxMatchScore": {
"type": "number",
"minimum": 0,
"maximum": 1,
"description": "Maximum match score"
},
"totalIrDiffSize": {
"type": "integer",
"minimum": 0,
"description": "Total size of IR diffs stored in CAS"
}
}
}
}
}