save progress
This commit is contained in:
@@ -0,0 +1,322 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
|
||||
|
||||
using System.Collections.Immutable;
|
||||
using System.Globalization;
|
||||
using System.Security.Cryptography;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.BinaryIndex.Disassembly;
|
||||
using StellaOps.BinaryIndex.Normalization;
|
||||
|
||||
namespace StellaOps.BinaryIndex.DeltaSig;
|
||||
|
||||
/// <summary>
|
||||
/// Generates delta signatures from binaries for CVE detection.
|
||||
/// </summary>
|
||||
public sealed class DeltaSignatureGenerator : IDeltaSignatureGenerator
|
||||
{
|
||||
private readonly DisassemblyService _disassemblyService;
|
||||
private readonly NormalizationService _normalizationService;
|
||||
private readonly ILogger<DeltaSignatureGenerator> _logger;
|
||||
|
||||
public DeltaSignatureGenerator(
|
||||
DisassemblyService disassemblyService,
|
||||
NormalizationService normalizationService,
|
||||
ILogger<DeltaSignatureGenerator> logger)
|
||||
{
|
||||
_disassemblyService = disassemblyService;
|
||||
_normalizationService = normalizationService;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DeltaSignature> GenerateSignaturesAsync(
|
||||
Stream binaryStream,
|
||||
DeltaSignatureRequest request,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(binaryStream);
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Generating delta signatures for {Cve} ({Package}) with {SymbolCount} target symbols",
|
||||
request.Cve,
|
||||
request.Package,
|
||||
request.TargetSymbols.Count);
|
||||
|
||||
var options = request.Options ?? new SignatureOptions();
|
||||
|
||||
// Load and analyze the binary
|
||||
var (binary, plugin) = await Task.Run(
|
||||
() => _disassemblyService.LoadBinary(binaryStream),
|
||||
ct);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Loaded binary: format={Format}, arch={Arch}",
|
||||
binary.Format,
|
||||
binary.Architecture);
|
||||
|
||||
// Get all symbols
|
||||
var symbols = plugin.GetSymbols(binary).ToDictionary(s => s.Name);
|
||||
|
||||
// Generate signatures for each target symbol
|
||||
var symbolSignatures = new List<SymbolSignature>();
|
||||
var appliedSteps = new List<string>();
|
||||
|
||||
foreach (var symbolName in request.TargetSymbols)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
if (!symbols.TryGetValue(symbolName, out var symbolInfo))
|
||||
{
|
||||
_logger.LogWarning("Symbol {Symbol} not found in binary", symbolName);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Disassemble the symbol
|
||||
var instructions = plugin.DisassembleSymbol(binary, symbolInfo).ToList();
|
||||
if (instructions.Count == 0)
|
||||
{
|
||||
_logger.LogWarning("No instructions for symbol {Symbol}", symbolName);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Normalize the instructions
|
||||
var normalized = _normalizationService.Normalize(
|
||||
instructions,
|
||||
binary.Architecture);
|
||||
|
||||
// Track applied steps
|
||||
foreach (var step in normalized.AppliedSteps)
|
||||
{
|
||||
if (!appliedSteps.Contains(step))
|
||||
appliedSteps.Add(step);
|
||||
}
|
||||
|
||||
// Generate signature from normalized bytes
|
||||
var signature = GenerateSymbolSignature(
|
||||
normalized,
|
||||
symbolName,
|
||||
symbolInfo.Section ?? ".text",
|
||||
options);
|
||||
|
||||
symbolSignatures.Add(signature);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Generated signature for {Symbol}: {Hash} ({Size} bytes)",
|
||||
symbolName,
|
||||
signature.HashHex,
|
||||
signature.SizeBytes);
|
||||
}
|
||||
|
||||
// Get the pipeline used for normalization reference
|
||||
var pipeline = _normalizationService.GetPipeline(binary.Architecture);
|
||||
|
||||
return new DeltaSignature
|
||||
{
|
||||
Cve = request.Cve,
|
||||
Package = new PackageRef(request.Package, request.Soname),
|
||||
Target = new TargetRef(request.Arch, request.Abi),
|
||||
Normalization = new NormalizationRef(
|
||||
pipeline.RecipeId,
|
||||
pipeline.RecipeVersion,
|
||||
[.. appliedSteps]),
|
||||
SignatureState = request.SignatureState,
|
||||
Symbols = [.. symbolSignatures],
|
||||
GeneratedAt = DateTimeOffset.UtcNow
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public SymbolSignature GenerateSymbolSignature(
|
||||
ReadOnlySpan<byte> normalizedBytes,
|
||||
string symbolName,
|
||||
string scope,
|
||||
SignatureOptions? options = null)
|
||||
{
|
||||
options ??= new SignatureOptions();
|
||||
|
||||
// Compute the main hash
|
||||
var hashHex = ComputeHash(normalizedBytes, options.HashAlgorithm);
|
||||
|
||||
// Compute chunk hashes for resilience
|
||||
ImmutableArray<ChunkHash>? chunks = null;
|
||||
if (options.IncludeChunks && normalizedBytes.Length >= options.ChunkSize)
|
||||
{
|
||||
chunks = ComputeChunkHashes(normalizedBytes, options.ChunkSize, options.HashAlgorithm);
|
||||
}
|
||||
|
||||
// For byte-only overload, we cannot compute accurate CFG metrics
|
||||
// Use heuristic estimation instead
|
||||
int? bbCount = null;
|
||||
string? cfgEdgeHash = null;
|
||||
if (options.IncludeCfg)
|
||||
{
|
||||
bbCount = EstimateBasicBlockCount(normalizedBytes);
|
||||
}
|
||||
|
||||
return new SymbolSignature
|
||||
{
|
||||
Name = symbolName,
|
||||
Scope = scope,
|
||||
HashAlg = options.HashAlgorithm,
|
||||
HashHex = hashHex,
|
||||
SizeBytes = normalizedBytes.Length,
|
||||
CfgBbCount = bbCount,
|
||||
CfgEdgeHash = cfgEdgeHash,
|
||||
Chunks = chunks
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public SymbolSignature GenerateSymbolSignature(
|
||||
NormalizedFunction normalized,
|
||||
string symbolName,
|
||||
string scope,
|
||||
SignatureOptions? options = null)
|
||||
{
|
||||
options ??= new SignatureOptions();
|
||||
|
||||
// Get normalized bytes for hashing
|
||||
var normalizedBytes = GetNormalizedBytes(normalized);
|
||||
|
||||
// Compute the main hash
|
||||
var hashHex = ComputeHash(normalizedBytes, options.HashAlgorithm);
|
||||
|
||||
// Compute chunk hashes for resilience
|
||||
ImmutableArray<ChunkHash>? chunks = null;
|
||||
if (options.IncludeChunks && normalizedBytes.Length >= options.ChunkSize)
|
||||
{
|
||||
chunks = ComputeChunkHashes(normalizedBytes, options.ChunkSize, options.HashAlgorithm);
|
||||
}
|
||||
|
||||
// Compute CFG metrics using proper CFG analysis
|
||||
int? bbCount = null;
|
||||
string? cfgEdgeHash = null;
|
||||
if (options.IncludeCfg && normalized.Instructions.Length > 0)
|
||||
{
|
||||
// Use first instruction's address as start address
|
||||
var startAddress = normalized.Instructions[0].OriginalAddress;
|
||||
var cfgMetrics = CfgExtractor.ComputeMetrics(
|
||||
normalized.Instructions.ToList(),
|
||||
startAddress);
|
||||
|
||||
bbCount = cfgMetrics.BasicBlockCount;
|
||||
cfgEdgeHash = cfgMetrics.EdgeHash;
|
||||
}
|
||||
|
||||
return new SymbolSignature
|
||||
{
|
||||
Name = symbolName,
|
||||
Scope = scope,
|
||||
HashAlg = options.HashAlgorithm,
|
||||
HashHex = hashHex,
|
||||
SizeBytes = normalizedBytes.Length,
|
||||
CfgBbCount = bbCount,
|
||||
CfgEdgeHash = cfgEdgeHash,
|
||||
Chunks = chunks
|
||||
};
|
||||
}
|
||||
|
||||
private static byte[] GetNormalizedBytes(NormalizedFunction normalized)
|
||||
{
|
||||
// Concatenate all normalized instruction bytes
|
||||
var totalSize = normalized.Instructions.Sum(i => i.NormalizedBytes.Length);
|
||||
var result = new byte[totalSize];
|
||||
var offset = 0;
|
||||
|
||||
foreach (var instruction in normalized.Instructions)
|
||||
{
|
||||
instruction.NormalizedBytes.CopyTo(result.AsSpan(offset));
|
||||
offset += instruction.NormalizedBytes.Length;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static string ComputeHash(ReadOnlySpan<byte> data, string algorithm)
|
||||
{
|
||||
Span<byte> hash = stackalloc byte[64]; // Max hash size
|
||||
int bytesWritten;
|
||||
|
||||
switch (algorithm.ToLowerInvariant())
|
||||
{
|
||||
case "sha256":
|
||||
bytesWritten = SHA256.HashData(data, hash);
|
||||
break;
|
||||
case "sha384":
|
||||
bytesWritten = SHA384.HashData(data, hash);
|
||||
break;
|
||||
case "sha512":
|
||||
bytesWritten = SHA512.HashData(data, hash);
|
||||
break;
|
||||
default:
|
||||
throw new ArgumentException($"Unsupported hash algorithm: {algorithm}", nameof(algorithm));
|
||||
}
|
||||
|
||||
return Convert.ToHexString(hash[..bytesWritten]).ToLowerInvariant();
|
||||
}
|
||||
|
||||
private static ImmutableArray<ChunkHash> ComputeChunkHashes(
|
||||
ReadOnlySpan<byte> data,
|
||||
int chunkSize,
|
||||
string algorithm)
|
||||
{
|
||||
var chunks = new List<ChunkHash>();
|
||||
var offset = 0;
|
||||
|
||||
while (offset < data.Length)
|
||||
{
|
||||
var size = Math.Min(chunkSize, data.Length - offset);
|
||||
var chunkData = data.Slice(offset, size);
|
||||
var hash = ComputeHash(chunkData, algorithm);
|
||||
|
||||
chunks.Add(new ChunkHash(offset, size, hash));
|
||||
offset += size;
|
||||
}
|
||||
|
||||
return [.. chunks];
|
||||
}
|
||||
|
||||
private static int EstimateBasicBlockCount(ReadOnlySpan<byte> data)
|
||||
{
|
||||
// Simplified heuristic: count potential block terminators
|
||||
// Real implementation would use proper CFG analysis
|
||||
var count = 1; // At least one block
|
||||
|
||||
for (var i = 0; i < data.Length; i++)
|
||||
{
|
||||
var b = data[i];
|
||||
// Common x64 block terminators
|
||||
if (b is 0xC3 or 0xE8 or 0xE9 or 0xEB or (>= 0x70 and <= 0x7F))
|
||||
{
|
||||
count++;
|
||||
}
|
||||
// 0F 8x = conditional jumps
|
||||
else if (i + 1 < data.Length && b == 0x0F && data[i + 1] >= 0x80 && data[i + 1] <= 0x8F)
|
||||
{
|
||||
count++;
|
||||
i++; // Skip next byte
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
private static CpuArchitecture ParseArch(string arch)
|
||||
{
|
||||
return arch.ToLowerInvariant() switch
|
||||
{
|
||||
"x86_64" or "amd64" or "x64" => CpuArchitecture.X86_64,
|
||||
"x86" or "i386" or "i686" => CpuArchitecture.X86,
|
||||
"aarch64" or "arm64" => CpuArchitecture.ARM64,
|
||||
"arm" or "armv7" => CpuArchitecture.ARM32,
|
||||
"mips" or "mips32" => CpuArchitecture.MIPS32,
|
||||
"mips64" => CpuArchitecture.MIPS64,
|
||||
"riscv64" => CpuArchitecture.RISCV64,
|
||||
"ppc" or "ppc32" or "powerpc" => CpuArchitecture.PPC32,
|
||||
"ppc64" or "powerpc64" => CpuArchitecture.PPC64,
|
||||
_ => throw new ArgumentException($"Unknown architecture: {arch}", nameof(arch))
|
||||
};
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user