save progress

This commit is contained in:
StellaOps Bot
2026-01-03 00:47:24 +02:00
parent 3f197814c5
commit ca578801fd
319 changed files with 32478 additions and 2202 deletions

View File

@@ -0,0 +1,46 @@
# Delta Signature Library Charter
## Mission
Generate and match delta signatures for binary vulnerability detection. Provide cryptographic proof of backport status by comparing normalized function code against known patched/vulnerable signatures.
## Responsibilities
- Implement `IDeltaSignatureGenerator` for signature creation
- Implement `IDeltaSignatureMatcher` for binary matching
- Support exact full-hash matching and partial chunk matching
- Generate CFG (control flow graph) hashes for semantic similarity
- Maintain signature schema versioning (`stellaops.deltasig.v1`)
## Key Paths
- `IDeltaSignatureGenerator.cs` - Signature generation interface
- `IDeltaSignatureMatcher.cs` - Matching interface
- `DeltaSignature.cs` - Core signature model
- `SymbolSignature.cs` - Per-function signature
- `ChunkHash.cs` - Rolling 2KB window hashes for partial matching
- `Authoring/SignatureAuthoringService.cs` - Compare vuln/patched binaries
- `Cfg/CfgExtractor.cs` - Control flow graph extraction
## Signature Components
- **hash_hex**: SHA-256 of normalized function bytes
- **size_bytes**: Normalized function size
- **cfg_bb_count**: Basic block count
- **cfg_edge_hash**: CFG structure hash
- **chunk_hashes**: Rolling window hashes for LTO resilience
## Coordination
- Normalization pipeline for instruction normalization
- Disassembly service for binary loading
- Persistence for signature storage
- Scanner for vulnerability matching
- CLI for signature authoring workflow
## Required Reading
- `docs/implplan/SPRINT_20260102_001_BE_binary_delta_signatures.md`
- `docs/product-advisories/30-Dec-2025 - Binary Diff Signatures for Patch Detection.md`
## Working Agreement
1. Update task status in sprint file when starting/finishing work.
2. Signatures must be **deterministic** - same binary always produces same signature.
3. Include normalization recipe in signature for reproducibility.
4. Chunk hashes enable ~70% match threshold for LTO-modified binaries.
5. Test with known CVEs (Heartbleed, Log4Shell, POODLE) as golden tests.
6. Keep signature schema backward compatible; increment version for breaking changes.

View File

@@ -0,0 +1,502 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
using System.Collections.Immutable;
using System.Security.Cryptography;
using StellaOps.BinaryIndex.Disassembly;
using StellaOps.BinaryIndex.Normalization;
namespace StellaOps.BinaryIndex.DeltaSig;
/// <summary>
/// Represents a basic block in a control flow graph.
/// </summary>
public sealed record BasicBlock
{
/// <summary>
/// Unique identifier for this block within the function.
/// </summary>
public required int Id { get; init; }
/// <summary>
/// Start address of the block.
/// </summary>
public required ulong StartAddress { get; init; }
/// <summary>
/// End address of the block (exclusive).
/// </summary>
public required ulong EndAddress { get; init; }
/// <summary>
/// Instructions in this block.
/// </summary>
public required ImmutableArray<NormalizedInstruction> Instructions { get; init; }
/// <summary>
/// IDs of successor blocks.
/// </summary>
public required ImmutableArray<int> Successors { get; init; }
/// <summary>
/// IDs of predecessor blocks.
/// </summary>
public required ImmutableArray<int> Predecessors { get; init; }
/// <summary>
/// Type of block terminator.
/// </summary>
public required BlockTerminatorKind TerminatorKind { get; init; }
}
/// <summary>
/// Kind of block terminator.
/// </summary>
public enum BlockTerminatorKind
{
/// <summary>
/// Falls through to next block.
/// </summary>
FallThrough,
/// <summary>
/// Unconditional jump.
/// </summary>
Jump,
/// <summary>
/// Conditional branch.
/// </summary>
ConditionalBranch,
/// <summary>
/// Function call (continues to next block).
/// </summary>
Call,
/// <summary>
/// Return from function.
/// </summary>
Return,
/// <summary>
/// Indirect jump (jump table, etc.).
/// </summary>
IndirectJump,
/// <summary>
/// Indirect call.
/// </summary>
IndirectCall,
/// <summary>
/// System call.
/// </summary>
Syscall,
/// <summary>
/// Trap/interrupt.
/// </summary>
Trap
}
/// <summary>
/// Represents a control flow graph.
/// </summary>
public sealed record ControlFlowGraph
{
/// <summary>
/// All basic blocks in the graph.
/// </summary>
public required ImmutableArray<BasicBlock> Blocks { get; init; }
/// <summary>
/// Entry block ID.
/// </summary>
public required int EntryBlockId { get; init; }
/// <summary>
/// Exit block IDs (blocks that end with return).
/// </summary>
public required ImmutableArray<int> ExitBlockIds { get; init; }
/// <summary>
/// Total number of edges in the graph.
/// </summary>
public int EdgeCount => Blocks.Sum(b => b.Successors.Length);
}
/// <summary>
/// CFG metrics for signature matching.
/// </summary>
public sealed record CfgMetrics
{
/// <summary>
/// Number of basic blocks.
/// </summary>
public required int BasicBlockCount { get; init; }
/// <summary>
/// Number of edges.
/// </summary>
public required int EdgeCount { get; init; }
/// <summary>
/// Hash of the edge structure for semantic comparison.
/// </summary>
public required string EdgeHash { get; init; }
/// <summary>
/// Cyclomatic complexity (edges - nodes + 2).
/// </summary>
public int CyclomaticComplexity => EdgeCount - BasicBlockCount + 2;
}
/// <summary>
/// Extracts control flow graph from normalized instructions.
/// </summary>
public static class CfgExtractor
{
/// <summary>
/// Extracts a control flow graph from normalized instructions.
/// </summary>
/// <param name="instructions">The normalized instructions.</param>
/// <param name="startAddress">Base address of the function (uses first instruction's OriginalAddress if 0).</param>
/// <returns>The extracted control flow graph.</returns>
public static ControlFlowGraph Extract(
IReadOnlyList<NormalizedInstruction> instructions,
ulong startAddress = 0)
{
if (instructions.Count == 0)
{
return new ControlFlowGraph
{
Blocks = [],
EntryBlockId = 0,
ExitBlockIds = []
};
}
// Use first instruction's address if not specified
if (startAddress == 0)
{
startAddress = instructions[0].OriginalAddress;
}
// Step 1: Identify block boundaries (leaders)
var leaders = IdentifyLeaders(instructions, startAddress);
// Step 2: Build basic blocks
var blocks = BuildBasicBlocks(instructions, leaders, startAddress);
// Step 3: Connect blocks with edges
ConnectBlocks(blocks, instructions, startAddress);
// Step 4: Identify entry and exit blocks
var entryBlockId = 0;
var exitBlockIds = blocks
.Where(b => b.TerminatorKind == BlockTerminatorKind.Return)
.Select(b => b.Id)
.ToImmutableArray();
return new ControlFlowGraph
{
Blocks = [.. blocks],
EntryBlockId = entryBlockId,
ExitBlockIds = exitBlockIds
};
}
/// <summary>
/// Computes CFG metrics for signature matching.
/// </summary>
public static CfgMetrics ComputeMetrics(ControlFlowGraph cfg)
{
var edgeHash = ComputeEdgeHash(cfg);
return new CfgMetrics
{
BasicBlockCount = cfg.Blocks.Length,
EdgeCount = cfg.EdgeCount,
EdgeHash = edgeHash
};
}
/// <summary>
/// Computes CFG metrics directly from instructions.
/// </summary>
public static CfgMetrics ComputeMetrics(
IReadOnlyList<NormalizedInstruction> instructions,
ulong startAddress = 0)
{
var cfg = Extract(instructions, startAddress);
return ComputeMetrics(cfg);
}
private static HashSet<int> IdentifyLeaders(
IReadOnlyList<NormalizedInstruction> instructions,
ulong startAddress)
{
var leaders = new HashSet<int> { 0 }; // First instruction is always a leader
// Build address-to-index map using OriginalAddress
var addressToIndex = new Dictionary<ulong, int>();
for (var i = 0; i < instructions.Count; i++)
{
addressToIndex[instructions[i].OriginalAddress] = i;
}
// Scan for branch targets and instructions after branches
for (var i = 0; i < instructions.Count; i++)
{
var instruction = instructions[i];
var kind = instruction.Kind;
if (kind is InstructionKind.Branch or InstructionKind.ConditionalBranch)
{
// Next instruction is a leader (if it exists)
if (i + 1 < instructions.Count)
{
leaders.Add(i + 1);
}
// Target of branch is a leader
var targetAddress = ExtractBranchTarget(instruction);
if (targetAddress.HasValue && addressToIndex.TryGetValue(targetAddress.Value, out var targetIndex))
{
leaders.Add(targetIndex);
}
}
else if (kind == InstructionKind.Return)
{
// Next instruction is a leader (if it exists) - for code after a function
if (i + 1 < instructions.Count)
{
leaders.Add(i + 1);
}
}
else if (kind == InstructionKind.Call)
{
// Next instruction is a leader (for cases where call doesn't return)
if (i + 1 < instructions.Count)
{
leaders.Add(i + 1);
}
}
}
return leaders;
}
private static List<BasicBlock> BuildBasicBlocks(
IReadOnlyList<NormalizedInstruction> instructions,
HashSet<int> leaders,
ulong startAddress)
{
var blocks = new List<BasicBlock>();
var sortedLeaders = leaders.OrderBy(l => l).ToList();
for (var blockIndex = 0; blockIndex < sortedLeaders.Count; blockIndex++)
{
var startIdx = sortedLeaders[blockIndex];
var endIdx = blockIndex + 1 < sortedLeaders.Count
? sortedLeaders[blockIndex + 1]
: instructions.Count;
if (startIdx >= instructions.Count)
{
continue;
}
// Get block instructions and addresses
var blockInstructions = new List<NormalizedInstruction>();
for (var i = startIdx; i < endIdx; i++)
{
blockInstructions.Add(instructions[i]);
}
var blockStartAddress = blockInstructions[0].OriginalAddress;
var lastInstr = blockInstructions[^1];
var blockEndAddress = lastInstr.OriginalAddress + (ulong)lastInstr.NormalizedBytes.Length;
var terminatorKind = ClassifyTerminator(lastInstr);
blocks.Add(new BasicBlock
{
Id = blockIndex,
StartAddress = blockStartAddress,
EndAddress = blockEndAddress,
Instructions = [.. blockInstructions],
Successors = [], // Filled in later
Predecessors = [], // Filled in later
TerminatorKind = terminatorKind
});
}
return blocks;
}
private static void ConnectBlocks(
List<BasicBlock> blocks,
IReadOnlyList<NormalizedInstruction> instructions,
ulong startAddress)
{
if (blocks.Count == 0)
{
return;
}
// Build address-to-block map
var addressToBlock = new Dictionary<ulong, int>();
foreach (var block in blocks)
{
addressToBlock[block.StartAddress] = block.Id;
}
// Connect blocks based on control flow
for (var i = 0; i < blocks.Count; i++)
{
var block = blocks[i];
var successors = new List<int>();
switch (block.TerminatorKind)
{
case BlockTerminatorKind.FallThrough:
case BlockTerminatorKind.Call:
// Falls through to next block
if (i + 1 < blocks.Count)
{
successors.Add(i + 1);
}
break;
case BlockTerminatorKind.ConditionalBranch:
// Falls through AND branches
if (i + 1 < blocks.Count)
{
successors.Add(i + 1);
}
// Add branch target
var target = ExtractBranchTarget(block.Instructions[^1]);
if (target.HasValue && addressToBlock.TryGetValue(target.Value, out var targetBlockId))
{
if (!successors.Contains(targetBlockId))
{
successors.Add(targetBlockId);
}
}
break;
case BlockTerminatorKind.Jump:
// Only branches to target
var jumpTarget = ExtractBranchTarget(block.Instructions[^1]);
if (jumpTarget.HasValue && addressToBlock.TryGetValue(jumpTarget.Value, out var jumpTargetBlockId))
{
successors.Add(jumpTargetBlockId);
}
break;
case BlockTerminatorKind.Return:
case BlockTerminatorKind.Trap:
// No successors
break;
case BlockTerminatorKind.IndirectJump:
case BlockTerminatorKind.IndirectCall:
case BlockTerminatorKind.Syscall:
// Unknown successors - could potentially add heuristics
break;
}
// Update block with successors
blocks[i] = block with { Successors = [.. successors] };
}
// Build predecessors from successors
var predecessors = new Dictionary<int, List<int>>();
for (var i = 0; i < blocks.Count; i++)
{
predecessors[i] = [];
}
foreach (var block in blocks)
{
foreach (var succ in block.Successors)
{
if (succ < blocks.Count)
{
predecessors[succ].Add(block.Id);
}
}
}
for (var i = 0; i < blocks.Count; i++)
{
blocks[i] = blocks[i] with { Predecessors = [.. predecessors[i]] };
}
}
private static BlockTerminatorKind ClassifyTerminator(NormalizedInstruction instruction)
{
return instruction.Kind switch
{
InstructionKind.Return => BlockTerminatorKind.Return,
InstructionKind.Branch => BlockTerminatorKind.Jump,
InstructionKind.ConditionalBranch => BlockTerminatorKind.ConditionalBranch,
InstructionKind.Call => BlockTerminatorKind.Call,
InstructionKind.Syscall => BlockTerminatorKind.Syscall,
InstructionKind.Interrupt => BlockTerminatorKind.Trap,
_ => BlockTerminatorKind.FallThrough
};
}
private static ulong? ExtractBranchTarget(NormalizedInstruction instruction)
{
// For normalized instructions, look at operands for branch targets
// Branch targets are typically Address or Immediate type operands
if (instruction.Operands.Length == 0)
{
return null;
}
var firstOperand = instruction.Operands[0];
// Check if it's an address or immediate operand with a value
if ((firstOperand.Type == OperandType.Address || firstOperand.Type == OperandType.Immediate)
&& firstOperand.Value.HasValue)
{
return (ulong)firstOperand.Value.Value;
}
return null;
}
private static string ComputeEdgeHash(ControlFlowGraph cfg)
{
// Create a canonical representation of edges
// Sort edges and hash them for comparison
var edgeList = new List<(int From, int To)>();
foreach (var block in cfg.Blocks)
{
foreach (var succ in block.Successors)
{
edgeList.Add((block.Id, succ));
}
}
// Sort deterministically
edgeList.Sort((a, b) =>
{
var cmp = a.From.CompareTo(b.From);
return cmp != 0 ? cmp : a.To.CompareTo(b.To);
});
// Build canonical string
var edgeString = string.Join(";", edgeList.Select(e => $"{e.From}->{e.To}"));
var bytes = System.Text.Encoding.UTF8.GetBytes(edgeString);
return Convert.ToHexString(SHA256.HashData(bytes)).ToLowerInvariant();
}
}

View File

@@ -0,0 +1,322 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
using System.Collections.Immutable;
using System.Globalization;
using System.Security.Cryptography;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Disassembly;
using StellaOps.BinaryIndex.Normalization;
namespace StellaOps.BinaryIndex.DeltaSig;
/// <summary>
/// Generates delta signatures from binaries for CVE detection.
/// </summary>
public sealed class DeltaSignatureGenerator : IDeltaSignatureGenerator
{
private readonly DisassemblyService _disassemblyService;
private readonly NormalizationService _normalizationService;
private readonly ILogger<DeltaSignatureGenerator> _logger;
public DeltaSignatureGenerator(
DisassemblyService disassemblyService,
NormalizationService normalizationService,
ILogger<DeltaSignatureGenerator> logger)
{
_disassemblyService = disassemblyService;
_normalizationService = normalizationService;
_logger = logger;
}
/// <inheritdoc />
public async Task<DeltaSignature> GenerateSignaturesAsync(
Stream binaryStream,
DeltaSignatureRequest request,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(binaryStream);
ArgumentNullException.ThrowIfNull(request);
_logger.LogInformation(
"Generating delta signatures for {Cve} ({Package}) with {SymbolCount} target symbols",
request.Cve,
request.Package,
request.TargetSymbols.Count);
var options = request.Options ?? new SignatureOptions();
// Load and analyze the binary
var (binary, plugin) = await Task.Run(
() => _disassemblyService.LoadBinary(binaryStream),
ct);
_logger.LogDebug(
"Loaded binary: format={Format}, arch={Arch}",
binary.Format,
binary.Architecture);
// Get all symbols
var symbols = plugin.GetSymbols(binary).ToDictionary(s => s.Name);
// Generate signatures for each target symbol
var symbolSignatures = new List<SymbolSignature>();
var appliedSteps = new List<string>();
foreach (var symbolName in request.TargetSymbols)
{
ct.ThrowIfCancellationRequested();
if (!symbols.TryGetValue(symbolName, out var symbolInfo))
{
_logger.LogWarning("Symbol {Symbol} not found in binary", symbolName);
continue;
}
// Disassemble the symbol
var instructions = plugin.DisassembleSymbol(binary, symbolInfo).ToList();
if (instructions.Count == 0)
{
_logger.LogWarning("No instructions for symbol {Symbol}", symbolName);
continue;
}
// Normalize the instructions
var normalized = _normalizationService.Normalize(
instructions,
binary.Architecture);
// Track applied steps
foreach (var step in normalized.AppliedSteps)
{
if (!appliedSteps.Contains(step))
appliedSteps.Add(step);
}
// Generate signature from normalized bytes
var signature = GenerateSymbolSignature(
normalized,
symbolName,
symbolInfo.Section ?? ".text",
options);
symbolSignatures.Add(signature);
_logger.LogDebug(
"Generated signature for {Symbol}: {Hash} ({Size} bytes)",
symbolName,
signature.HashHex,
signature.SizeBytes);
}
// Get the pipeline used for normalization reference
var pipeline = _normalizationService.GetPipeline(binary.Architecture);
return new DeltaSignature
{
Cve = request.Cve,
Package = new PackageRef(request.Package, request.Soname),
Target = new TargetRef(request.Arch, request.Abi),
Normalization = new NormalizationRef(
pipeline.RecipeId,
pipeline.RecipeVersion,
[.. appliedSteps]),
SignatureState = request.SignatureState,
Symbols = [.. symbolSignatures],
GeneratedAt = DateTimeOffset.UtcNow
};
}
/// <inheritdoc />
public SymbolSignature GenerateSymbolSignature(
ReadOnlySpan<byte> normalizedBytes,
string symbolName,
string scope,
SignatureOptions? options = null)
{
options ??= new SignatureOptions();
// Compute the main hash
var hashHex = ComputeHash(normalizedBytes, options.HashAlgorithm);
// Compute chunk hashes for resilience
ImmutableArray<ChunkHash>? chunks = null;
if (options.IncludeChunks && normalizedBytes.Length >= options.ChunkSize)
{
chunks = ComputeChunkHashes(normalizedBytes, options.ChunkSize, options.HashAlgorithm);
}
// For byte-only overload, we cannot compute accurate CFG metrics
// Use heuristic estimation instead
int? bbCount = null;
string? cfgEdgeHash = null;
if (options.IncludeCfg)
{
bbCount = EstimateBasicBlockCount(normalizedBytes);
}
return new SymbolSignature
{
Name = symbolName,
Scope = scope,
HashAlg = options.HashAlgorithm,
HashHex = hashHex,
SizeBytes = normalizedBytes.Length,
CfgBbCount = bbCount,
CfgEdgeHash = cfgEdgeHash,
Chunks = chunks
};
}
/// <inheritdoc />
public SymbolSignature GenerateSymbolSignature(
NormalizedFunction normalized,
string symbolName,
string scope,
SignatureOptions? options = null)
{
options ??= new SignatureOptions();
// Get normalized bytes for hashing
var normalizedBytes = GetNormalizedBytes(normalized);
// Compute the main hash
var hashHex = ComputeHash(normalizedBytes, options.HashAlgorithm);
// Compute chunk hashes for resilience
ImmutableArray<ChunkHash>? chunks = null;
if (options.IncludeChunks && normalizedBytes.Length >= options.ChunkSize)
{
chunks = ComputeChunkHashes(normalizedBytes, options.ChunkSize, options.HashAlgorithm);
}
// Compute CFG metrics using proper CFG analysis
int? bbCount = null;
string? cfgEdgeHash = null;
if (options.IncludeCfg && normalized.Instructions.Length > 0)
{
// Use first instruction's address as start address
var startAddress = normalized.Instructions[0].OriginalAddress;
var cfgMetrics = CfgExtractor.ComputeMetrics(
normalized.Instructions.ToList(),
startAddress);
bbCount = cfgMetrics.BasicBlockCount;
cfgEdgeHash = cfgMetrics.EdgeHash;
}
return new SymbolSignature
{
Name = symbolName,
Scope = scope,
HashAlg = options.HashAlgorithm,
HashHex = hashHex,
SizeBytes = normalizedBytes.Length,
CfgBbCount = bbCount,
CfgEdgeHash = cfgEdgeHash,
Chunks = chunks
};
}
private static byte[] GetNormalizedBytes(NormalizedFunction normalized)
{
// Concatenate all normalized instruction bytes
var totalSize = normalized.Instructions.Sum(i => i.NormalizedBytes.Length);
var result = new byte[totalSize];
var offset = 0;
foreach (var instruction in normalized.Instructions)
{
instruction.NormalizedBytes.CopyTo(result.AsSpan(offset));
offset += instruction.NormalizedBytes.Length;
}
return result;
}
private static string ComputeHash(ReadOnlySpan<byte> data, string algorithm)
{
Span<byte> hash = stackalloc byte[64]; // Max hash size
int bytesWritten;
switch (algorithm.ToLowerInvariant())
{
case "sha256":
bytesWritten = SHA256.HashData(data, hash);
break;
case "sha384":
bytesWritten = SHA384.HashData(data, hash);
break;
case "sha512":
bytesWritten = SHA512.HashData(data, hash);
break;
default:
throw new ArgumentException($"Unsupported hash algorithm: {algorithm}", nameof(algorithm));
}
return Convert.ToHexString(hash[..bytesWritten]).ToLowerInvariant();
}
private static ImmutableArray<ChunkHash> ComputeChunkHashes(
ReadOnlySpan<byte> data,
int chunkSize,
string algorithm)
{
var chunks = new List<ChunkHash>();
var offset = 0;
while (offset < data.Length)
{
var size = Math.Min(chunkSize, data.Length - offset);
var chunkData = data.Slice(offset, size);
var hash = ComputeHash(chunkData, algorithm);
chunks.Add(new ChunkHash(offset, size, hash));
offset += size;
}
return [.. chunks];
}
private static int EstimateBasicBlockCount(ReadOnlySpan<byte> data)
{
// Simplified heuristic: count potential block terminators
// Real implementation would use proper CFG analysis
var count = 1; // At least one block
for (var i = 0; i < data.Length; i++)
{
var b = data[i];
// Common x64 block terminators
if (b is 0xC3 or 0xE8 or 0xE9 or 0xEB or (>= 0x70 and <= 0x7F))
{
count++;
}
// 0F 8x = conditional jumps
else if (i + 1 < data.Length && b == 0x0F && data[i + 1] >= 0x80 && data[i + 1] <= 0x8F)
{
count++;
i++; // Skip next byte
}
}
return count;
}
private static CpuArchitecture ParseArch(string arch)
{
return arch.ToLowerInvariant() switch
{
"x86_64" or "amd64" or "x64" => CpuArchitecture.X86_64,
"x86" or "i386" or "i686" => CpuArchitecture.X86,
"aarch64" or "arm64" => CpuArchitecture.ARM64,
"arm" or "armv7" => CpuArchitecture.ARM32,
"mips" or "mips32" => CpuArchitecture.MIPS32,
"mips64" => CpuArchitecture.MIPS64,
"riscv64" => CpuArchitecture.RISCV64,
"ppc" or "ppc32" or "powerpc" => CpuArchitecture.PPC32,
"ppc64" or "powerpc64" => CpuArchitecture.PPC64,
_ => throw new ArgumentException($"Unknown architecture: {arch}", nameof(arch))
};
}
}

View File

@@ -0,0 +1,369 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
using System.Collections.Immutable;
using System.Globalization;
using System.Security.Cryptography;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Disassembly;
using StellaOps.BinaryIndex.Normalization;
namespace StellaOps.BinaryIndex.DeltaSig;
/// <summary>
/// Matches binaries against delta signatures.
/// </summary>
public sealed class DeltaSignatureMatcher : IDeltaSignatureMatcher
{
private readonly DisassemblyService _disassemblyService;
private readonly NormalizationService _normalizationService;
private readonly ILogger<DeltaSignatureMatcher> _logger;
public DeltaSignatureMatcher(
DisassemblyService disassemblyService,
NormalizationService normalizationService,
ILogger<DeltaSignatureMatcher> logger)
{
_disassemblyService = disassemblyService;
_normalizationService = normalizationService;
_logger = logger;
}
/// <inheritdoc />
public async Task<IReadOnlyList<MatchResult>> MatchAsync(
Stream binaryStream,
IEnumerable<DeltaSignature> signatures,
string? cveFilter = null,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(binaryStream);
ArgumentNullException.ThrowIfNull(signatures);
var signatureList = signatures.ToList();
if (signatureList.Count == 0)
{
return [];
}
// Filter by CVE if specified
if (!string.IsNullOrEmpty(cveFilter))
{
signatureList = signatureList
.Where(s => s.Cve.Equals(cveFilter, StringComparison.OrdinalIgnoreCase))
.ToList();
if (signatureList.Count == 0)
{
_logger.LogDebug("No signatures match CVE filter {Cve}", cveFilter);
return [];
}
}
_logger.LogInformation(
"Matching binary against {Count} signature(s)",
signatureList.Count);
// Load the binary
var (binary, plugin) = await Task.Run(
() => _disassemblyService.LoadBinary(binaryStream),
ct);
_logger.LogDebug(
"Loaded binary: format={Format}, arch={Arch}",
binary.Format,
binary.Architecture);
// Get all symbols
var symbols = plugin.GetSymbols(binary).ToDictionary(s => s.Name);
// Group signatures by target symbol for efficient matching
var signaturesBySymbol = signatureList
.SelectMany(sig => sig.Symbols.Select(sym => (Signature: sig, Symbol: sym)))
.GroupBy(x => x.Symbol.Name)
.ToDictionary(g => g.Key, g => g.ToList());
// Generate hashes for symbols that we have signatures for
var symbolHashes = new Dictionary<string, (string Hash, int Size)>();
foreach (var symbolName in signaturesBySymbol.Keys)
{
ct.ThrowIfCancellationRequested();
if (!symbols.TryGetValue(symbolName, out var symbolInfo))
{
_logger.LogDebug("Symbol {Symbol} not found in binary", symbolName);
continue;
}
try
{
// Disassemble and normalize
var instructions = plugin.DisassembleSymbol(binary, symbolInfo).ToList();
if (instructions.Count == 0)
{
continue;
}
var normalized = _normalizationService.Normalize(
instructions,
binary.Architecture);
// Compute hash
var normalizedBytes = GetNormalizedBytes(normalized);
var hash = ComputeHash(normalizedBytes, "sha256");
symbolHashes[symbolName] = (hash, normalizedBytes.Length);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to process symbol {Symbol}", symbolName);
}
}
// Match against each signature
var results = new List<MatchResult>();
foreach (var signature in signatureList)
{
ct.ThrowIfCancellationRequested();
var symbolMatches = new List<SymbolMatchResult>();
var matchedCount = 0;
var totalCount = signature.Symbols.Length;
foreach (var symbolSig in signature.Symbols)
{
if (!symbolHashes.TryGetValue(symbolSig.Name, out var computed))
{
symbolMatches.Add(new SymbolMatchResult
{
SymbolName = symbolSig.Name,
ExactMatch = false,
Confidence = 0.0
});
continue;
}
var exactMatch = computed.Hash.Equals(
symbolSig.HashHex,
StringComparison.OrdinalIgnoreCase);
if (exactMatch)
{
matchedCount++;
symbolMatches.Add(new SymbolMatchResult
{
SymbolName = symbolSig.Name,
ExactMatch = true,
Confidence = 1.0
});
}
else
{
// Try chunk matching for partial match
var chunkMatch = TryChunkMatch(computed.Hash, computed.Size, symbolSig);
symbolMatches.Add(chunkMatch);
if (chunkMatch.Confidence >= 0.8)
{
matchedCount++;
}
}
}
// Compute overall confidence
var overallConfidence = totalCount > 0
? (double)matchedCount / totalCount
: 0.0;
if (overallConfidence > 0)
{
results.Add(new MatchResult
{
Matched = overallConfidence >= 0.5,
Cve = signature.Cve,
SignatureState = signature.SignatureState,
Confidence = overallConfidence,
SymbolMatches = [.. symbolMatches],
Explanation = GenerateExplanation(
signature.Cve,
signature.SignatureState,
matchedCount,
totalCount,
overallConfidence)
});
_logger.LogDebug(
"Matched {Cve} ({State}): {Matched}/{Total} symbols, confidence={Confidence:P0}",
signature.Cve,
signature.SignatureState,
matchedCount,
totalCount,
overallConfidence);
}
}
return results;
}
/// <inheritdoc />
public IReadOnlyList<MatchResult> MatchSymbol(
string symbolHash,
string symbolName,
IEnumerable<DeltaSignature> signatures)
{
ArgumentNullException.ThrowIfNull(symbolHash);
ArgumentNullException.ThrowIfNull(symbolName);
ArgumentNullException.ThrowIfNull(signatures);
var results = new List<MatchResult>();
foreach (var signature in signatures)
{
var symbolSig = signature.Symbols
.FirstOrDefault(s => s.Name.Equals(symbolName, StringComparison.Ordinal));
if (symbolSig is null)
{
continue;
}
var exactMatch = symbolHash.Equals(
symbolSig.HashHex,
StringComparison.OrdinalIgnoreCase);
results.Add(new MatchResult
{
Matched = exactMatch,
Cve = signature.Cve,
SignatureState = signature.SignatureState,
Confidence = exactMatch ? 1.0 : 0.0,
SymbolMatches =
[
new SymbolMatchResult
{
SymbolName = symbolName,
ExactMatch = exactMatch,
Confidence = exactMatch ? 1.0 : 0.0
}
],
Explanation = exactMatch
? $"Symbol {symbolName} matches {signature.SignatureState} signature for {signature.Cve}"
: null
});
}
return results;
}
private static SymbolMatchResult TryChunkMatch(
string computedHash,
int computedSize,
SymbolSignature symbolSig)
{
// If no chunks, can't do partial matching
if (symbolSig.Chunks is null || symbolSig.Chunks.Value.Length == 0)
{
return new SymbolMatchResult
{
SymbolName = symbolSig.Name,
ExactMatch = false,
Confidence = 0.0
};
}
// For now, we can only compare sizes as a heuristic
// Real chunk matching would require recomputing chunks on the binary
var sizeDiff = Math.Abs(computedSize - symbolSig.SizeBytes);
var sizeTolerance = symbolSig.SizeBytes * 0.1; // 10% tolerance
var sizeMatch = sizeDiff <= sizeTolerance;
var confidence = sizeMatch ? 0.3 : 0.0; // Low confidence without actual chunk comparison
return new SymbolMatchResult
{
SymbolName = symbolSig.Name,
ExactMatch = false,
ChunksMatched = 0,
ChunksTotal = symbolSig.Chunks.Value.Length,
Confidence = confidence
};
}
private static string GenerateExplanation(
string cve,
string state,
int matched,
int total,
double confidence)
{
if (state.Equals("patched", StringComparison.OrdinalIgnoreCase))
{
if (confidence >= 0.9)
{
return $"Binary contains the patched version of {cve} ({matched}/{total} symbols match)";
}
else if (confidence >= 0.5)
{
return $"Binary likely contains the patched version of {cve} ({matched}/{total} symbols match)";
}
else
{
return $"Binary may contain partial fix for {cve} ({matched}/{total} symbols match)";
}
}
else
{
if (confidence >= 0.9)
{
return $"Binary is VULNERABLE to {cve} ({matched}/{total} symbols match)";
}
else if (confidence >= 0.5)
{
return $"Binary is likely VULNERABLE to {cve} ({matched}/{total} symbols match)";
}
else
{
return $"Binary may be vulnerable to {cve} ({matched}/{total} symbols match)";
}
}
}
private static byte[] GetNormalizedBytes(NormalizedFunction normalized)
{
var totalSize = normalized.Instructions.Sum(i => i.NormalizedBytes.Length);
var result = new byte[totalSize];
var offset = 0;
foreach (var instruction in normalized.Instructions)
{
instruction.NormalizedBytes.CopyTo(result.AsSpan(offset));
offset += instruction.NormalizedBytes.Length;
}
return result;
}
private static string ComputeHash(ReadOnlySpan<byte> data, string algorithm)
{
Span<byte> hash = stackalloc byte[64];
int bytesWritten;
switch (algorithm.ToLowerInvariant())
{
case "sha256":
bytesWritten = SHA256.HashData(data, hash);
break;
case "sha384":
bytesWritten = SHA384.HashData(data, hash);
break;
case "sha512":
bytesWritten = SHA512.HashData(data, hash);
break;
default:
throw new ArgumentException($"Unsupported hash algorithm: {algorithm}", nameof(algorithm));
}
return Convert.ToHexString(hash[..bytesWritten]).ToLowerInvariant();
}
}

View File

@@ -0,0 +1,52 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
using StellaOps.BinaryIndex.Normalization;
namespace StellaOps.BinaryIndex.DeltaSig;
/// <summary>
/// Generates delta signatures from binaries for CVE detection.
/// </summary>
public interface IDeltaSignatureGenerator
{
/// <summary>
/// Generates signatures for specified symbols in a binary.
/// </summary>
/// <param name="binaryStream">Stream containing the binary data.</param>
/// <param name="request">Signature generation request.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The generated delta signature.</returns>
Task<DeltaSignature> GenerateSignaturesAsync(
Stream binaryStream,
DeltaSignatureRequest request,
CancellationToken ct = default);
/// <summary>
/// Generates a signature for a single symbol given already-disassembled instructions.
/// </summary>
/// <param name="normalizedBytes">The normalized bytes of the symbol.</param>
/// <param name="symbolName">Name of the symbol.</param>
/// <param name="scope">Section containing the symbol.</param>
/// <param name="options">Generation options.</param>
/// <returns>The symbol signature.</returns>
SymbolSignature GenerateSymbolSignature(
ReadOnlySpan<byte> normalizedBytes,
string symbolName,
string scope,
SignatureOptions? options = null);
/// <summary>
/// Generates a signature for a single symbol with full CFG analysis.
/// </summary>
/// <param name="normalized">The normalized function with instructions.</param>
/// <param name="symbolName">Name of the symbol.</param>
/// <param name="scope">Section containing the symbol.</param>
/// <param name="options">Generation options.</param>
/// <returns>The symbol signature with CFG metrics.</returns>
SymbolSignature GenerateSymbolSignature(
NormalizedFunction normalized,
string symbolName,
string scope,
SignatureOptions? options = null);
}

View File

@@ -0,0 +1,38 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.DeltaSig;
/// <summary>
/// Matches binaries against delta signatures.
/// </summary>
public interface IDeltaSignatureMatcher
{
/// <summary>
/// Matches a binary against a collection of delta signatures.
/// </summary>
/// <param name="binaryStream">Stream containing the binary data.</param>
/// <param name="signatures">Signatures to match against.</param>
/// <param name="cveFilter">Optional CVE filter.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Match results for each matching signature.</returns>
Task<IReadOnlyList<MatchResult>> MatchAsync(
Stream binaryStream,
IEnumerable<DeltaSignature> signatures,
string? cveFilter = null,
CancellationToken ct = default);
/// <summary>
/// Matches a single symbol's hash against signatures.
/// </summary>
/// <param name="symbolHash">Hash of the normalized symbol.</param>
/// <param name="symbolName">Name of the symbol.</param>
/// <param name="signatures">Signatures to match against.</param>
/// <returns>Match results.</returns>
IReadOnlyList<MatchResult> MatchSymbol(
string symbolHash,
string symbolName,
IEnumerable<DeltaSignature> signatures);
}

View File

@@ -0,0 +1,299 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
using System.Collections.Immutable;
using StellaOps.BinaryIndex.Disassembly;
namespace StellaOps.BinaryIndex.DeltaSig;
/// <summary>
/// Signature generation options.
/// </summary>
/// <param name="IncludeCfg">Include control flow graph metrics.</param>
/// <param name="IncludeChunks">Include rolling chunk hashes for resilience.</param>
/// <param name="ChunkSize">Size of rolling chunks in bytes (default 2KB).</param>
/// <param name="HashAlgorithm">Hash algorithm to use (default sha256).</param>
public sealed record SignatureOptions(
bool IncludeCfg = true,
bool IncludeChunks = true,
int ChunkSize = 2048,
string HashAlgorithm = "sha256");
/// <summary>
/// Request for generating delta signatures from a binary.
/// </summary>
public sealed record DeltaSignatureRequest
{
/// <summary>
/// CVE identifier (e.g., CVE-2024-1234).
/// </summary>
public required string Cve { get; init; }
/// <summary>
/// Package name.
/// </summary>
public required string Package { get; init; }
/// <summary>
/// Shared object name (e.g., libssl.so.1.1).
/// </summary>
public string? Soname { get; init; }
/// <summary>
/// Target architecture (e.g., x86_64, aarch64).
/// </summary>
public required string Arch { get; init; }
/// <summary>
/// ABI (e.g., gnu, musl, android).
/// </summary>
public string Abi { get; init; } = "gnu";
/// <summary>
/// Symbol names to generate signatures for.
/// </summary>
public required IReadOnlyList<string> TargetSymbols { get; init; }
/// <summary>
/// State of this signature: "vulnerable" or "patched".
/// </summary>
public required string SignatureState { get; init; }
/// <summary>
/// Signature generation options.
/// </summary>
public SignatureOptions? Options { get; init; }
}
/// <summary>
/// A complete delta signature for a binary.
/// </summary>
public sealed record DeltaSignature
{
/// <summary>
/// Schema identifier for this signature format.
/// </summary>
public string Schema { get; init; } = "stellaops.deltasig.v1";
/// <summary>
/// Schema version.
/// </summary>
public string SchemaVersion { get; init; } = "1.0.0";
/// <summary>
/// CVE this signature is for.
/// </summary>
public required string Cve { get; init; }
/// <summary>
/// Package reference.
/// </summary>
public required PackageRef Package { get; init; }
/// <summary>
/// Target platform reference.
/// </summary>
public required TargetRef Target { get; init; }
/// <summary>
/// Normalization recipe used.
/// </summary>
public required NormalizationRef Normalization { get; init; }
/// <summary>
/// Signature state: "vulnerable" or "patched".
/// </summary>
public required string SignatureState { get; init; }
/// <summary>
/// Individual symbol signatures.
/// </summary>
public required ImmutableArray<SymbolSignature> Symbols { get; init; }
/// <summary>
/// When this signature was generated (UTC).
/// </summary>
public DateTimeOffset GeneratedAt { get; init; } = DateTimeOffset.UtcNow;
/// <summary>
/// Additional metadata.
/// </summary>
public IReadOnlyDictionary<string, object>? Metadata { get; init; }
}
/// <summary>
/// Package reference for a delta signature.
/// </summary>
/// <param name="Name">Package name.</param>
/// <param name="Soname">Shared object name.</param>
public sealed record PackageRef(string Name, string? Soname);
/// <summary>
/// Target platform reference.
/// </summary>
/// <param name="Arch">CPU architecture (x86_64, aarch64, etc.).</param>
/// <param name="Abi">ABI (gnu, musl, android, etc.).</param>
public sealed record TargetRef(string Arch, string Abi);
/// <summary>
/// Normalization recipe reference for reproducibility.
/// </summary>
/// <param name="RecipeId">Recipe identifier (e.g., elf.delta.norm.x64).</param>
/// <param name="RecipeVersion">Recipe version.</param>
/// <param name="Steps">List of normalization steps applied.</param>
public sealed record NormalizationRef(
string RecipeId,
string RecipeVersion,
ImmutableArray<string> Steps);
/// <summary>
/// Signature for a single symbol (function).
/// </summary>
public sealed record SymbolSignature
{
/// <summary>
/// Symbol name.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Section containing the symbol (e.g., .text).
/// </summary>
public string Scope { get; init; } = ".text";
/// <summary>
/// Hash algorithm used.
/// </summary>
public required string HashAlg { get; init; }
/// <summary>
/// Hash of the normalized function as hex string.
/// </summary>
public required string HashHex { get; init; }
/// <summary>
/// Size of the normalized function in bytes.
/// </summary>
public required int SizeBytes { get; init; }
/// <summary>
/// Number of basic blocks in the control flow graph.
/// </summary>
public int? CfgBbCount { get; init; }
/// <summary>
/// Hash of the CFG structure (edges).
/// </summary>
public string? CfgEdgeHash { get; init; }
/// <summary>
/// Rolling chunk hashes for resilience against small changes.
/// </summary>
public ImmutableArray<ChunkHash>? Chunks { get; init; }
}
/// <summary>
/// Hash of a chunk within a function for resilience.
/// </summary>
/// <param name="Offset">Offset from function start.</param>
/// <param name="Size">Chunk size in bytes.</param>
/// <param name="HashHex">Hash of the chunk as hex string.</param>
public sealed record ChunkHash(int Offset, int Size, string HashHex);
/// <summary>
/// Result of matching a binary against delta signatures.
/// </summary>
public sealed record MatchResult
{
/// <summary>
/// Whether a match was found.
/// </summary>
public required bool Matched { get; init; }
/// <summary>
/// The CVE that matched.
/// </summary>
public string? Cve { get; init; }
/// <summary>
/// The signature state that matched (vulnerable/patched).
/// </summary>
public string? SignatureState { get; init; }
/// <summary>
/// Confidence score (0.0 - 1.0).
/// </summary>
public double Confidence { get; init; }
/// <summary>
/// Individual symbol match results.
/// </summary>
public ImmutableArray<SymbolMatchResult> SymbolMatches { get; init; } = [];
/// <summary>
/// Explanation of the match result.
/// </summary>
public string? Explanation { get; init; }
}
/// <summary>
/// Match result for a single symbol.
/// </summary>
public sealed record SymbolMatchResult
{
/// <summary>
/// Symbol name.
/// </summary>
public required string SymbolName { get; init; }
/// <summary>
/// Whether the symbol hash matched exactly.
/// </summary>
public required bool ExactMatch { get; init; }
/// <summary>
/// Number of chunk hashes that matched (partial match).
/// </summary>
public int ChunksMatched { get; init; }
/// <summary>
/// Total chunks in the signature.
/// </summary>
public int ChunksTotal { get; init; }
/// <summary>
/// Match confidence (0.0 - 1.0).
/// </summary>
public double Confidence { get; init; }
}
/// <summary>
/// Result of authoring signatures from vulnerable and patched binaries.
/// </summary>
public sealed record AuthoringResult
{
/// <summary>
/// Whether authoring succeeded.
/// </summary>
public required bool Success { get; init; }
/// <summary>
/// Signature for the vulnerable binary.
/// </summary>
public DeltaSignature? VulnerableSignature { get; init; }
/// <summary>
/// Signature for the patched binary.
/// </summary>
public DeltaSignature? PatchedSignature { get; init; }
/// <summary>
/// Symbols that differ between vulnerable and patched.
/// </summary>
public ImmutableArray<string> DifferingSymbols { get; init; } = [];
/// <summary>
/// Error message if authoring failed.
/// </summary>
public string? Error { get; init; }
}

View File

@@ -0,0 +1,47 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
using Microsoft.Extensions.DependencyInjection;
using StellaOps.BinaryIndex.Disassembly;
using StellaOps.BinaryIndex.Normalization;
namespace StellaOps.BinaryIndex.DeltaSig;
/// <summary>
/// Extension methods for registering delta signature services.
/// </summary>
public static class ServiceCollectionExtensions
{
/// <summary>
/// Adds delta signature generation and matching services.
/// Requires disassembly and normalization services to be registered.
/// </summary>
/// <param name="services">The service collection.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddDeltaSignatures(this IServiceCollection services)
{
services.AddSingleton<IDeltaSignatureGenerator, DeltaSignatureGenerator>();
services.AddSingleton<IDeltaSignatureMatcher, DeltaSignatureMatcher>();
return services;
}
/// <summary>
/// Adds all binary index services: disassembly, normalization, and delta signatures.
/// </summary>
/// <param name="services">The service collection.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddBinaryIndexServices(this IServiceCollection services)
{
// Add disassembly with default plugins
services.AddDisassemblyServices();
// Add normalization pipelines
services.AddNormalizationPipelines();
// Add delta signature services
services.AddDeltaSignatures();
return services;
}
}

View File

@@ -0,0 +1,24 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<RootNamespace>StellaOps.BinaryIndex.DeltaSig</RootNamespace>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<Description>Delta signature generation for binary patch detection. Produces deterministic signatures for CVE fix verification.</Description>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.BinaryIndex.Disassembly.Abstractions\StellaOps.BinaryIndex.Disassembly.Abstractions.csproj" />
<ProjectReference Include="..\StellaOps.BinaryIndex.Disassembly\StellaOps.BinaryIndex.Disassembly.csproj" />
<ProjectReference Include="..\StellaOps.BinaryIndex.Normalization\StellaOps.BinaryIndex.Normalization.csproj" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" />
</ItemGroup>
</Project>