495 lines
17 KiB
C#
495 lines
17 KiB
C#
// Copyright (c) StellaOps. All rights reserved.
|
|
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
|
|
|
|
|
using Microsoft.Extensions.Logging;
|
|
using StellaOps.BinaryIndex.Disassembly;
|
|
using StellaOps.BinaryIndex.Normalization;
|
|
using StellaOps.BinaryIndex.Semantic;
|
|
using System.Collections.Immutable;
|
|
using System.Globalization;
|
|
using System.Security.Cryptography;
|
|
|
|
namespace StellaOps.BinaryIndex.DeltaSig;
|
|
|
|
/// <summary>
|
|
/// Generates delta signatures from binaries for CVE detection.
|
|
/// </summary>
|
|
public sealed class DeltaSignatureGenerator : IDeltaSignatureGenerator
|
|
{
|
|
private readonly DisassemblyService _disassemblyService;
|
|
private readonly NormalizationService _normalizationService;
|
|
private readonly IIrLiftingService? _irLiftingService;
|
|
private readonly ISemanticGraphExtractor? _graphExtractor;
|
|
private readonly ISemanticFingerprintGenerator? _fingerprintGenerator;
|
|
private readonly ILogger<DeltaSignatureGenerator> _logger;
|
|
|
|
/// <summary>
|
|
/// Creates a new delta signature generator without semantic analysis support.
|
|
/// </summary>
|
|
public DeltaSignatureGenerator(
|
|
DisassemblyService disassemblyService,
|
|
NormalizationService normalizationService,
|
|
ILogger<DeltaSignatureGenerator> logger)
|
|
: this(disassemblyService, normalizationService, null, null, null, logger)
|
|
{
|
|
}
|
|
|
|
/// <summary>
|
|
/// Creates a new delta signature generator with optional semantic analysis support.
|
|
/// </summary>
|
|
public DeltaSignatureGenerator(
|
|
DisassemblyService disassemblyService,
|
|
NormalizationService normalizationService,
|
|
IIrLiftingService? irLiftingService,
|
|
ISemanticGraphExtractor? graphExtractor,
|
|
ISemanticFingerprintGenerator? fingerprintGenerator,
|
|
ILogger<DeltaSignatureGenerator> logger)
|
|
{
|
|
_disassemblyService = disassemblyService ?? throw new ArgumentNullException(nameof(disassemblyService));
|
|
_normalizationService = normalizationService ?? throw new ArgumentNullException(nameof(normalizationService));
|
|
_irLiftingService = irLiftingService;
|
|
_graphExtractor = graphExtractor;
|
|
_fingerprintGenerator = fingerprintGenerator;
|
|
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
|
}
|
|
|
|
/// <summary>
|
|
/// Gets a value indicating whether semantic analysis is available.
|
|
/// </summary>
|
|
public bool SemanticAnalysisAvailable =>
|
|
_irLiftingService is not null &&
|
|
_graphExtractor is not null &&
|
|
_fingerprintGenerator is not null;
|
|
|
|
/// <inheritdoc />
|
|
public async Task<DeltaSignature> GenerateSignaturesAsync(
|
|
Stream binaryStream,
|
|
DeltaSignatureRequest request,
|
|
CancellationToken ct = default)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(binaryStream);
|
|
ArgumentNullException.ThrowIfNull(request);
|
|
|
|
_logger.LogInformation(
|
|
"Generating delta signatures for {Cve} ({Package}) with {SymbolCount} target symbols",
|
|
request.Cve,
|
|
request.Package,
|
|
request.TargetSymbols.Count);
|
|
|
|
var options = request.Options ?? new SignatureOptions();
|
|
|
|
// Load and analyze the binary
|
|
var (binary, plugin) = await Task.Run(
|
|
() => _disassemblyService.LoadBinary(binaryStream),
|
|
ct);
|
|
|
|
_logger.LogDebug(
|
|
"Loaded binary: format={Format}, arch={Arch}",
|
|
binary.Format,
|
|
binary.Architecture);
|
|
|
|
// Get all symbols
|
|
var symbols = plugin.GetSymbols(binary).ToDictionary(s => s.Name);
|
|
|
|
// Generate signatures for each target symbol. Empty target list means "all symbols".
|
|
var targetSymbols = request.TargetSymbols.Count == 0
|
|
? symbols.Keys.OrderBy(v => v, StringComparer.Ordinal).ToArray()
|
|
: request.TargetSymbols;
|
|
|
|
// Generate signatures for each target symbol
|
|
var symbolSignatures = new List<SymbolSignature>();
|
|
var appliedSteps = new List<string>();
|
|
|
|
foreach (var symbolName in targetSymbols)
|
|
{
|
|
ct.ThrowIfCancellationRequested();
|
|
|
|
if (!symbols.TryGetValue(symbolName, out var symbolInfo))
|
|
{
|
|
_logger.LogWarning("Symbol {Symbol} not found in binary", symbolName);
|
|
continue;
|
|
}
|
|
|
|
// Disassemble the symbol
|
|
var instructions = plugin.DisassembleSymbol(binary, symbolInfo).ToList();
|
|
if (instructions.Count == 0)
|
|
{
|
|
_logger.LogWarning("No instructions for symbol {Symbol}", symbolName);
|
|
continue;
|
|
}
|
|
|
|
// Normalize the instructions
|
|
var normalized = _normalizationService.Normalize(
|
|
instructions,
|
|
binary.Architecture);
|
|
|
|
// Track applied steps
|
|
foreach (var step in normalized.AppliedSteps)
|
|
{
|
|
if (!appliedSteps.Contains(step))
|
|
appliedSteps.Add(step);
|
|
}
|
|
|
|
// Generate signature from normalized bytes
|
|
var signature = await GenerateSymbolSignatureAsync(
|
|
normalized,
|
|
symbolName,
|
|
symbolInfo.Section ?? ".text",
|
|
instructions,
|
|
binary.Architecture,
|
|
options,
|
|
ct);
|
|
|
|
symbolSignatures.Add(signature);
|
|
|
|
_logger.LogDebug(
|
|
"Generated signature for {Symbol}: {Hash} ({Size} bytes)",
|
|
symbolName,
|
|
signature.HashHex,
|
|
signature.SizeBytes);
|
|
}
|
|
|
|
// Get the pipeline used for normalization reference
|
|
var pipeline = _normalizationService.GetPipeline(binary.Architecture);
|
|
|
|
return new DeltaSignature
|
|
{
|
|
Cve = request.Cve,
|
|
Package = new PackageRef(request.Package, request.Soname),
|
|
Target = new TargetRef(request.Arch, request.Abi),
|
|
Normalization = new NormalizationRef(
|
|
pipeline.RecipeId,
|
|
pipeline.RecipeVersion,
|
|
[.. appliedSteps]),
|
|
SignatureState = request.SignatureState,
|
|
Symbols = [.. symbolSignatures],
|
|
GeneratedAt = DateTimeOffset.UtcNow
|
|
};
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public SymbolSignature GenerateSymbolSignature(
|
|
ReadOnlySpan<byte> normalizedBytes,
|
|
string symbolName,
|
|
string scope,
|
|
SignatureOptions? options = null)
|
|
{
|
|
options ??= new SignatureOptions();
|
|
|
|
// Compute the main hash
|
|
var hashHex = ComputeHash(normalizedBytes, options.HashAlgorithm);
|
|
|
|
// Compute chunk hashes for resilience
|
|
ImmutableArray<ChunkHash>? chunks = null;
|
|
if (options.IncludeChunks && normalizedBytes.Length >= options.ChunkSize)
|
|
{
|
|
chunks = ComputeChunkHashes(normalizedBytes, options.ChunkSize, options.HashAlgorithm);
|
|
}
|
|
|
|
// For byte-only overload, we cannot compute accurate CFG metrics
|
|
// Use heuristic estimation instead
|
|
int? bbCount = null;
|
|
string? cfgEdgeHash = null;
|
|
if (options.IncludeCfg)
|
|
{
|
|
bbCount = EstimateBasicBlockCount(normalizedBytes);
|
|
}
|
|
|
|
return new SymbolSignature
|
|
{
|
|
Name = symbolName,
|
|
Scope = scope,
|
|
HashAlg = options.HashAlgorithm,
|
|
HashHex = hashHex,
|
|
SizeBytes = normalizedBytes.Length,
|
|
CfgBbCount = bbCount,
|
|
CfgEdgeHash = cfgEdgeHash,
|
|
Chunks = chunks
|
|
};
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public SymbolSignature GenerateSymbolSignature(
|
|
NormalizedFunction normalized,
|
|
string symbolName,
|
|
string scope,
|
|
SignatureOptions? options = null)
|
|
{
|
|
options ??= new SignatureOptions();
|
|
|
|
// Get normalized bytes for hashing
|
|
var normalizedBytes = GetNormalizedBytes(normalized);
|
|
|
|
// Compute the main hash
|
|
var hashHex = ComputeHash(normalizedBytes, options.HashAlgorithm);
|
|
|
|
// Compute chunk hashes for resilience
|
|
ImmutableArray<ChunkHash>? chunks = null;
|
|
if (options.IncludeChunks && normalizedBytes.Length >= options.ChunkSize)
|
|
{
|
|
chunks = ComputeChunkHashes(normalizedBytes, options.ChunkSize, options.HashAlgorithm);
|
|
}
|
|
|
|
// Compute CFG metrics using proper CFG analysis
|
|
int? bbCount = null;
|
|
string? cfgEdgeHash = null;
|
|
if (options.IncludeCfg && normalized.Instructions.Length > 0)
|
|
{
|
|
// Use first instruction's address as start address
|
|
var startAddress = normalized.Instructions[0].OriginalAddress;
|
|
var cfgMetrics = CfgExtractor.ComputeMetrics(
|
|
normalized.Instructions.ToList(),
|
|
startAddress);
|
|
|
|
bbCount = cfgMetrics.BasicBlockCount;
|
|
cfgEdgeHash = cfgMetrics.EdgeHash;
|
|
}
|
|
|
|
return new SymbolSignature
|
|
{
|
|
Name = symbolName,
|
|
Scope = scope,
|
|
HashAlg = options.HashAlgorithm,
|
|
HashHex = hashHex,
|
|
SizeBytes = normalizedBytes.Length,
|
|
CfgBbCount = bbCount,
|
|
CfgEdgeHash = cfgEdgeHash,
|
|
Chunks = chunks
|
|
};
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public async Task<SymbolSignature> GenerateSymbolSignatureAsync(
|
|
NormalizedFunction normalized,
|
|
string symbolName,
|
|
string scope,
|
|
IReadOnlyList<DisassembledInstruction> originalInstructions,
|
|
CpuArchitecture architecture,
|
|
SignatureOptions? options = null,
|
|
CancellationToken ct = default)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(normalized);
|
|
ArgumentNullException.ThrowIfNull(symbolName);
|
|
ArgumentNullException.ThrowIfNull(scope);
|
|
ArgumentNullException.ThrowIfNull(originalInstructions);
|
|
|
|
options ??= new SignatureOptions();
|
|
|
|
// Get normalized bytes for hashing
|
|
var normalizedBytes = GetNormalizedBytes(normalized);
|
|
|
|
// Compute the main hash
|
|
var hashHex = ComputeHash(normalizedBytes, options.HashAlgorithm);
|
|
|
|
// Compute chunk hashes for resilience
|
|
ImmutableArray<ChunkHash>? chunks = null;
|
|
if (options.IncludeChunks && normalizedBytes.Length >= options.ChunkSize)
|
|
{
|
|
chunks = ComputeChunkHashes(normalizedBytes, options.ChunkSize, options.HashAlgorithm);
|
|
}
|
|
|
|
// Compute CFG metrics using proper CFG analysis
|
|
int? bbCount = null;
|
|
string? cfgEdgeHash = null;
|
|
if (options.IncludeCfg && normalized.Instructions.Length > 0)
|
|
{
|
|
// Use first instruction's address as start address
|
|
var startAddress = normalized.Instructions[0].OriginalAddress;
|
|
var cfgMetrics = CfgExtractor.ComputeMetrics(
|
|
normalized.Instructions.ToList(),
|
|
startAddress);
|
|
|
|
bbCount = cfgMetrics.BasicBlockCount;
|
|
cfgEdgeHash = cfgMetrics.EdgeHash;
|
|
}
|
|
|
|
// Compute semantic fingerprint if enabled and services available
|
|
string? semanticHashHex = null;
|
|
ImmutableArray<string>? semanticApiCalls = null;
|
|
|
|
if (options.IncludeSemantic && SemanticAnalysisAvailable && originalInstructions.Count > 0)
|
|
{
|
|
try
|
|
{
|
|
var semanticFingerprint = await ComputeSemanticFingerprintAsync(
|
|
originalInstructions,
|
|
symbolName,
|
|
architecture,
|
|
ct);
|
|
|
|
if (semanticFingerprint is not null)
|
|
{
|
|
semanticHashHex = semanticFingerprint.GraphHashHex;
|
|
semanticApiCalls = semanticFingerprint.ApiCalls;
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(
|
|
ex,
|
|
"Failed to compute semantic fingerprint for {Symbol}, continuing without semantic data",
|
|
symbolName);
|
|
}
|
|
}
|
|
|
|
return new SymbolSignature
|
|
{
|
|
Name = symbolName,
|
|
Scope = scope,
|
|
HashAlg = options.HashAlgorithm,
|
|
HashHex = hashHex,
|
|
SizeBytes = normalizedBytes.Length,
|
|
CfgBbCount = bbCount,
|
|
CfgEdgeHash = cfgEdgeHash,
|
|
Chunks = chunks,
|
|
SemanticHashHex = semanticHashHex,
|
|
SemanticApiCalls = semanticApiCalls
|
|
};
|
|
}
|
|
|
|
private async Task<SemanticFingerprint?> ComputeSemanticFingerprintAsync(
|
|
IReadOnlyList<DisassembledInstruction> instructions,
|
|
string functionName,
|
|
CpuArchitecture architecture,
|
|
CancellationToken ct)
|
|
{
|
|
if (_irLiftingService is null || _graphExtractor is null || _fingerprintGenerator is null)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
// Check if architecture is supported
|
|
if (!_irLiftingService.SupportsArchitecture(architecture))
|
|
{
|
|
_logger.LogDebug(
|
|
"Architecture {Arch} not supported for semantic analysis",
|
|
architecture);
|
|
return null;
|
|
}
|
|
|
|
// Lift to IR
|
|
var startAddress = instructions.Count > 0 ? instructions[0].Address : 0UL;
|
|
var lifted = await _irLiftingService.LiftToIrAsync(
|
|
instructions,
|
|
functionName,
|
|
startAddress,
|
|
architecture,
|
|
ct: ct);
|
|
|
|
// Extract semantic graph
|
|
var graph = await _graphExtractor.ExtractGraphAsync(lifted, ct: ct);
|
|
|
|
// Generate fingerprint
|
|
var fingerprint = await _fingerprintGenerator.GenerateAsync(
|
|
graph,
|
|
startAddress,
|
|
ct: ct);
|
|
|
|
return fingerprint;
|
|
}
|
|
|
|
private static byte[] GetNormalizedBytes(NormalizedFunction normalized)
|
|
{
|
|
// Concatenate all normalized instruction bytes
|
|
var totalSize = normalized.Instructions.Sum(i => i.NormalizedBytes.Length);
|
|
var result = new byte[totalSize];
|
|
var offset = 0;
|
|
|
|
foreach (var instruction in normalized.Instructions)
|
|
{
|
|
instruction.NormalizedBytes.CopyTo(result.AsSpan(offset));
|
|
offset += instruction.NormalizedBytes.Length;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
private static string ComputeHash(ReadOnlySpan<byte> data, string algorithm)
|
|
{
|
|
Span<byte> hash = stackalloc byte[64]; // Max hash size
|
|
int bytesWritten;
|
|
|
|
switch (algorithm.ToLowerInvariant())
|
|
{
|
|
case "sha256":
|
|
bytesWritten = SHA256.HashData(data, hash);
|
|
break;
|
|
case "sha384":
|
|
bytesWritten = SHA384.HashData(data, hash);
|
|
break;
|
|
case "sha512":
|
|
bytesWritten = SHA512.HashData(data, hash);
|
|
break;
|
|
default:
|
|
throw new ArgumentException($"Unsupported hash algorithm: {algorithm}", nameof(algorithm));
|
|
}
|
|
|
|
return Convert.ToHexString(hash[..bytesWritten]).ToLowerInvariant();
|
|
}
|
|
|
|
private static ImmutableArray<ChunkHash> ComputeChunkHashes(
|
|
ReadOnlySpan<byte> data,
|
|
int chunkSize,
|
|
string algorithm)
|
|
{
|
|
var chunks = new List<ChunkHash>();
|
|
var offset = 0;
|
|
|
|
while (offset < data.Length)
|
|
{
|
|
var size = Math.Min(chunkSize, data.Length - offset);
|
|
var chunkData = data.Slice(offset, size);
|
|
var hash = ComputeHash(chunkData, algorithm);
|
|
|
|
chunks.Add(new ChunkHash(offset, size, hash));
|
|
offset += size;
|
|
}
|
|
|
|
return [.. chunks];
|
|
}
|
|
|
|
private static int EstimateBasicBlockCount(ReadOnlySpan<byte> data)
|
|
{
|
|
// Simplified heuristic: count potential block terminators
|
|
// Real implementation would use proper CFG analysis
|
|
var count = 1; // At least one block
|
|
|
|
for (var i = 0; i < data.Length; i++)
|
|
{
|
|
var b = data[i];
|
|
// Common x64 block terminators
|
|
if (b is 0xC3 or 0xE8 or 0xE9 or 0xEB or (>= 0x70 and <= 0x7F))
|
|
{
|
|
count++;
|
|
}
|
|
// 0F 8x = conditional jumps
|
|
else if (i + 1 < data.Length && b == 0x0F && data[i + 1] >= 0x80 && data[i + 1] <= 0x8F)
|
|
{
|
|
count++;
|
|
i++; // Skip next byte
|
|
}
|
|
}
|
|
|
|
return count;
|
|
}
|
|
|
|
private static CpuArchitecture ParseArch(string arch)
|
|
{
|
|
return arch.ToLowerInvariant() switch
|
|
{
|
|
"x86_64" or "amd64" or "x64" => CpuArchitecture.X86_64,
|
|
"x86" or "i386" or "i686" => CpuArchitecture.X86,
|
|
"aarch64" or "arm64" => CpuArchitecture.ARM64,
|
|
"arm" or "armv7" => CpuArchitecture.ARM32,
|
|
"mips" or "mips32" => CpuArchitecture.MIPS32,
|
|
"mips64" => CpuArchitecture.MIPS64,
|
|
"riscv64" => CpuArchitecture.RISCV64,
|
|
"ppc" or "ppc32" or "powerpc" => CpuArchitecture.PPC32,
|
|
"ppc64" or "powerpc64" => CpuArchitecture.PPC64,
|
|
_ => throw new ArgumentException($"Unknown architecture: {arch}", nameof(arch))
|
|
};
|
|
}
|
|
}
|
|
|