Files
git.stella-ops.org/src/BinaryIndex/__Libraries/StellaOps.BinaryIndex.DeltaSig/DeltaSignatureGenerator.cs
2026-02-18 12:00:10 +02:00

495 lines
17 KiB
C#

// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Disassembly;
using StellaOps.BinaryIndex.Normalization;
using StellaOps.BinaryIndex.Semantic;
using System.Collections.Immutable;
using System.Globalization;
using System.Security.Cryptography;
namespace StellaOps.BinaryIndex.DeltaSig;
/// <summary>
/// Generates delta signatures from binaries for CVE detection.
/// </summary>
public sealed class DeltaSignatureGenerator : IDeltaSignatureGenerator
{
private readonly DisassemblyService _disassemblyService;
private readonly NormalizationService _normalizationService;
private readonly IIrLiftingService? _irLiftingService;
private readonly ISemanticGraphExtractor? _graphExtractor;
private readonly ISemanticFingerprintGenerator? _fingerprintGenerator;
private readonly ILogger<DeltaSignatureGenerator> _logger;
/// <summary>
/// Creates a new delta signature generator without semantic analysis support.
/// </summary>
public DeltaSignatureGenerator(
DisassemblyService disassemblyService,
NormalizationService normalizationService,
ILogger<DeltaSignatureGenerator> logger)
: this(disassemblyService, normalizationService, null, null, null, logger)
{
}
/// <summary>
/// Creates a new delta signature generator with optional semantic analysis support.
/// </summary>
public DeltaSignatureGenerator(
DisassemblyService disassemblyService,
NormalizationService normalizationService,
IIrLiftingService? irLiftingService,
ISemanticGraphExtractor? graphExtractor,
ISemanticFingerprintGenerator? fingerprintGenerator,
ILogger<DeltaSignatureGenerator> logger)
{
_disassemblyService = disassemblyService ?? throw new ArgumentNullException(nameof(disassemblyService));
_normalizationService = normalizationService ?? throw new ArgumentNullException(nameof(normalizationService));
_irLiftingService = irLiftingService;
_graphExtractor = graphExtractor;
_fingerprintGenerator = fingerprintGenerator;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <summary>
/// Gets a value indicating whether semantic analysis is available.
/// </summary>
public bool SemanticAnalysisAvailable =>
_irLiftingService is not null &&
_graphExtractor is not null &&
_fingerprintGenerator is not null;
/// <inheritdoc />
public async Task<DeltaSignature> GenerateSignaturesAsync(
Stream binaryStream,
DeltaSignatureRequest request,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(binaryStream);
ArgumentNullException.ThrowIfNull(request);
_logger.LogInformation(
"Generating delta signatures for {Cve} ({Package}) with {SymbolCount} target symbols",
request.Cve,
request.Package,
request.TargetSymbols.Count);
var options = request.Options ?? new SignatureOptions();
// Load and analyze the binary
var (binary, plugin) = await Task.Run(
() => _disassemblyService.LoadBinary(binaryStream),
ct);
_logger.LogDebug(
"Loaded binary: format={Format}, arch={Arch}",
binary.Format,
binary.Architecture);
// Get all symbols
var symbols = plugin.GetSymbols(binary).ToDictionary(s => s.Name);
// Generate signatures for each target symbol. Empty target list means "all symbols".
var targetSymbols = request.TargetSymbols.Count == 0
? symbols.Keys.OrderBy(v => v, StringComparer.Ordinal).ToArray()
: request.TargetSymbols;
// Generate signatures for each target symbol
var symbolSignatures = new List<SymbolSignature>();
var appliedSteps = new List<string>();
foreach (var symbolName in targetSymbols)
{
ct.ThrowIfCancellationRequested();
if (!symbols.TryGetValue(symbolName, out var symbolInfo))
{
_logger.LogWarning("Symbol {Symbol} not found in binary", symbolName);
continue;
}
// Disassemble the symbol
var instructions = plugin.DisassembleSymbol(binary, symbolInfo).ToList();
if (instructions.Count == 0)
{
_logger.LogWarning("No instructions for symbol {Symbol}", symbolName);
continue;
}
// Normalize the instructions
var normalized = _normalizationService.Normalize(
instructions,
binary.Architecture);
// Track applied steps
foreach (var step in normalized.AppliedSteps)
{
if (!appliedSteps.Contains(step))
appliedSteps.Add(step);
}
// Generate signature from normalized bytes
var signature = await GenerateSymbolSignatureAsync(
normalized,
symbolName,
symbolInfo.Section ?? ".text",
instructions,
binary.Architecture,
options,
ct);
symbolSignatures.Add(signature);
_logger.LogDebug(
"Generated signature for {Symbol}: {Hash} ({Size} bytes)",
symbolName,
signature.HashHex,
signature.SizeBytes);
}
// Get the pipeline used for normalization reference
var pipeline = _normalizationService.GetPipeline(binary.Architecture);
return new DeltaSignature
{
Cve = request.Cve,
Package = new PackageRef(request.Package, request.Soname),
Target = new TargetRef(request.Arch, request.Abi),
Normalization = new NormalizationRef(
pipeline.RecipeId,
pipeline.RecipeVersion,
[.. appliedSteps]),
SignatureState = request.SignatureState,
Symbols = [.. symbolSignatures],
GeneratedAt = DateTimeOffset.UtcNow
};
}
/// <inheritdoc />
public SymbolSignature GenerateSymbolSignature(
ReadOnlySpan<byte> normalizedBytes,
string symbolName,
string scope,
SignatureOptions? options = null)
{
options ??= new SignatureOptions();
// Compute the main hash
var hashHex = ComputeHash(normalizedBytes, options.HashAlgorithm);
// Compute chunk hashes for resilience
ImmutableArray<ChunkHash>? chunks = null;
if (options.IncludeChunks && normalizedBytes.Length >= options.ChunkSize)
{
chunks = ComputeChunkHashes(normalizedBytes, options.ChunkSize, options.HashAlgorithm);
}
// For byte-only overload, we cannot compute accurate CFG metrics
// Use heuristic estimation instead
int? bbCount = null;
string? cfgEdgeHash = null;
if (options.IncludeCfg)
{
bbCount = EstimateBasicBlockCount(normalizedBytes);
}
return new SymbolSignature
{
Name = symbolName,
Scope = scope,
HashAlg = options.HashAlgorithm,
HashHex = hashHex,
SizeBytes = normalizedBytes.Length,
CfgBbCount = bbCount,
CfgEdgeHash = cfgEdgeHash,
Chunks = chunks
};
}
/// <inheritdoc />
public SymbolSignature GenerateSymbolSignature(
NormalizedFunction normalized,
string symbolName,
string scope,
SignatureOptions? options = null)
{
options ??= new SignatureOptions();
// Get normalized bytes for hashing
var normalizedBytes = GetNormalizedBytes(normalized);
// Compute the main hash
var hashHex = ComputeHash(normalizedBytes, options.HashAlgorithm);
// Compute chunk hashes for resilience
ImmutableArray<ChunkHash>? chunks = null;
if (options.IncludeChunks && normalizedBytes.Length >= options.ChunkSize)
{
chunks = ComputeChunkHashes(normalizedBytes, options.ChunkSize, options.HashAlgorithm);
}
// Compute CFG metrics using proper CFG analysis
int? bbCount = null;
string? cfgEdgeHash = null;
if (options.IncludeCfg && normalized.Instructions.Length > 0)
{
// Use first instruction's address as start address
var startAddress = normalized.Instructions[0].OriginalAddress;
var cfgMetrics = CfgExtractor.ComputeMetrics(
normalized.Instructions.ToList(),
startAddress);
bbCount = cfgMetrics.BasicBlockCount;
cfgEdgeHash = cfgMetrics.EdgeHash;
}
return new SymbolSignature
{
Name = symbolName,
Scope = scope,
HashAlg = options.HashAlgorithm,
HashHex = hashHex,
SizeBytes = normalizedBytes.Length,
CfgBbCount = bbCount,
CfgEdgeHash = cfgEdgeHash,
Chunks = chunks
};
}
/// <inheritdoc />
public async Task<SymbolSignature> GenerateSymbolSignatureAsync(
NormalizedFunction normalized,
string symbolName,
string scope,
IReadOnlyList<DisassembledInstruction> originalInstructions,
CpuArchitecture architecture,
SignatureOptions? options = null,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(normalized);
ArgumentNullException.ThrowIfNull(symbolName);
ArgumentNullException.ThrowIfNull(scope);
ArgumentNullException.ThrowIfNull(originalInstructions);
options ??= new SignatureOptions();
// Get normalized bytes for hashing
var normalizedBytes = GetNormalizedBytes(normalized);
// Compute the main hash
var hashHex = ComputeHash(normalizedBytes, options.HashAlgorithm);
// Compute chunk hashes for resilience
ImmutableArray<ChunkHash>? chunks = null;
if (options.IncludeChunks && normalizedBytes.Length >= options.ChunkSize)
{
chunks = ComputeChunkHashes(normalizedBytes, options.ChunkSize, options.HashAlgorithm);
}
// Compute CFG metrics using proper CFG analysis
int? bbCount = null;
string? cfgEdgeHash = null;
if (options.IncludeCfg && normalized.Instructions.Length > 0)
{
// Use first instruction's address as start address
var startAddress = normalized.Instructions[0].OriginalAddress;
var cfgMetrics = CfgExtractor.ComputeMetrics(
normalized.Instructions.ToList(),
startAddress);
bbCount = cfgMetrics.BasicBlockCount;
cfgEdgeHash = cfgMetrics.EdgeHash;
}
// Compute semantic fingerprint if enabled and services available
string? semanticHashHex = null;
ImmutableArray<string>? semanticApiCalls = null;
if (options.IncludeSemantic && SemanticAnalysisAvailable && originalInstructions.Count > 0)
{
try
{
var semanticFingerprint = await ComputeSemanticFingerprintAsync(
originalInstructions,
symbolName,
architecture,
ct);
if (semanticFingerprint is not null)
{
semanticHashHex = semanticFingerprint.GraphHashHex;
semanticApiCalls = semanticFingerprint.ApiCalls;
}
}
catch (Exception ex)
{
_logger.LogWarning(
ex,
"Failed to compute semantic fingerprint for {Symbol}, continuing without semantic data",
symbolName);
}
}
return new SymbolSignature
{
Name = symbolName,
Scope = scope,
HashAlg = options.HashAlgorithm,
HashHex = hashHex,
SizeBytes = normalizedBytes.Length,
CfgBbCount = bbCount,
CfgEdgeHash = cfgEdgeHash,
Chunks = chunks,
SemanticHashHex = semanticHashHex,
SemanticApiCalls = semanticApiCalls
};
}
private async Task<SemanticFingerprint?> ComputeSemanticFingerprintAsync(
IReadOnlyList<DisassembledInstruction> instructions,
string functionName,
CpuArchitecture architecture,
CancellationToken ct)
{
if (_irLiftingService is null || _graphExtractor is null || _fingerprintGenerator is null)
{
return null;
}
// Check if architecture is supported
if (!_irLiftingService.SupportsArchitecture(architecture))
{
_logger.LogDebug(
"Architecture {Arch} not supported for semantic analysis",
architecture);
return null;
}
// Lift to IR
var startAddress = instructions.Count > 0 ? instructions[0].Address : 0UL;
var lifted = await _irLiftingService.LiftToIrAsync(
instructions,
functionName,
startAddress,
architecture,
ct: ct);
// Extract semantic graph
var graph = await _graphExtractor.ExtractGraphAsync(lifted, ct: ct);
// Generate fingerprint
var fingerprint = await _fingerprintGenerator.GenerateAsync(
graph,
startAddress,
ct: ct);
return fingerprint;
}
private static byte[] GetNormalizedBytes(NormalizedFunction normalized)
{
// Concatenate all normalized instruction bytes
var totalSize = normalized.Instructions.Sum(i => i.NormalizedBytes.Length);
var result = new byte[totalSize];
var offset = 0;
foreach (var instruction in normalized.Instructions)
{
instruction.NormalizedBytes.CopyTo(result.AsSpan(offset));
offset += instruction.NormalizedBytes.Length;
}
return result;
}
private static string ComputeHash(ReadOnlySpan<byte> data, string algorithm)
{
Span<byte> hash = stackalloc byte[64]; // Max hash size
int bytesWritten;
switch (algorithm.ToLowerInvariant())
{
case "sha256":
bytesWritten = SHA256.HashData(data, hash);
break;
case "sha384":
bytesWritten = SHA384.HashData(data, hash);
break;
case "sha512":
bytesWritten = SHA512.HashData(data, hash);
break;
default:
throw new ArgumentException($"Unsupported hash algorithm: {algorithm}", nameof(algorithm));
}
return Convert.ToHexString(hash[..bytesWritten]).ToLowerInvariant();
}
private static ImmutableArray<ChunkHash> ComputeChunkHashes(
ReadOnlySpan<byte> data,
int chunkSize,
string algorithm)
{
var chunks = new List<ChunkHash>();
var offset = 0;
while (offset < data.Length)
{
var size = Math.Min(chunkSize, data.Length - offset);
var chunkData = data.Slice(offset, size);
var hash = ComputeHash(chunkData, algorithm);
chunks.Add(new ChunkHash(offset, size, hash));
offset += size;
}
return [.. chunks];
}
private static int EstimateBasicBlockCount(ReadOnlySpan<byte> data)
{
// Simplified heuristic: count potential block terminators
// Real implementation would use proper CFG analysis
var count = 1; // At least one block
for (var i = 0; i < data.Length; i++)
{
var b = data[i];
// Common x64 block terminators
if (b is 0xC3 or 0xE8 or 0xE9 or 0xEB or (>= 0x70 and <= 0x7F))
{
count++;
}
// 0F 8x = conditional jumps
else if (i + 1 < data.Length && b == 0x0F && data[i + 1] >= 0x80 && data[i + 1] <= 0x8F)
{
count++;
i++; // Skip next byte
}
}
return count;
}
private static CpuArchitecture ParseArch(string arch)
{
return arch.ToLowerInvariant() switch
{
"x86_64" or "amd64" or "x64" => CpuArchitecture.X86_64,
"x86" or "i386" or "i686" => CpuArchitecture.X86,
"aarch64" or "arm64" => CpuArchitecture.ARM64,
"arm" or "armv7" => CpuArchitecture.ARM32,
"mips" or "mips32" => CpuArchitecture.MIPS32,
"mips64" => CpuArchitecture.MIPS64,
"riscv64" => CpuArchitecture.RISCV64,
"ppc" or "ppc32" or "powerpc" => CpuArchitecture.PPC32,
"ppc64" or "powerpc64" => CpuArchitecture.PPC64,
_ => throw new ArgumentException($"Unknown architecture: {arch}", nameof(arch))
};
}
}