save progress
This commit is contained in:
@@ -7,6 +7,7 @@ using System.Security.Cryptography;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.BinaryIndex.Disassembly;
|
||||
using StellaOps.BinaryIndex.Normalization;
|
||||
using StellaOps.BinaryIndex.Semantic;
|
||||
|
||||
namespace StellaOps.BinaryIndex.DeltaSig;
|
||||
|
||||
@@ -17,18 +18,49 @@ public sealed class DeltaSignatureGenerator : IDeltaSignatureGenerator
|
||||
{
|
||||
private readonly DisassemblyService _disassemblyService;
|
||||
private readonly NormalizationService _normalizationService;
|
||||
private readonly IIrLiftingService? _irLiftingService;
|
||||
private readonly ISemanticGraphExtractor? _graphExtractor;
|
||||
private readonly ISemanticFingerprintGenerator? _fingerprintGenerator;
|
||||
private readonly ILogger<DeltaSignatureGenerator> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new delta signature generator without semantic analysis support.
|
||||
/// </summary>
|
||||
public DeltaSignatureGenerator(
|
||||
DisassemblyService disassemblyService,
|
||||
NormalizationService normalizationService,
|
||||
ILogger<DeltaSignatureGenerator> logger)
|
||||
: this(disassemblyService, normalizationService, null, null, null, logger)
|
||||
{
|
||||
_disassemblyService = disassemblyService;
|
||||
_normalizationService = normalizationService;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new delta signature generator with optional semantic analysis support.
|
||||
/// </summary>
|
||||
public DeltaSignatureGenerator(
|
||||
DisassemblyService disassemblyService,
|
||||
NormalizationService normalizationService,
|
||||
IIrLiftingService? irLiftingService,
|
||||
ISemanticGraphExtractor? graphExtractor,
|
||||
ISemanticFingerprintGenerator? fingerprintGenerator,
|
||||
ILogger<DeltaSignatureGenerator> logger)
|
||||
{
|
||||
_disassemblyService = disassemblyService ?? throw new ArgumentNullException(nameof(disassemblyService));
|
||||
_normalizationService = normalizationService ?? throw new ArgumentNullException(nameof(normalizationService));
|
||||
_irLiftingService = irLiftingService;
|
||||
_graphExtractor = graphExtractor;
|
||||
_fingerprintGenerator = fingerprintGenerator;
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a value indicating whether semantic analysis is available.
|
||||
/// </summary>
|
||||
public bool SemanticAnalysisAvailable =>
|
||||
_irLiftingService is not null &&
|
||||
_graphExtractor is not null &&
|
||||
_fingerprintGenerator is not null;
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DeltaSignature> GenerateSignaturesAsync(
|
||||
Stream binaryStream,
|
||||
@@ -94,11 +126,14 @@ public sealed class DeltaSignatureGenerator : IDeltaSignatureGenerator
|
||||
}
|
||||
|
||||
// Generate signature from normalized bytes
|
||||
var signature = GenerateSymbolSignature(
|
||||
var signature = await GenerateSymbolSignatureAsync(
|
||||
normalized,
|
||||
symbolName,
|
||||
symbolInfo.Section ?? ".text",
|
||||
options);
|
||||
instructions,
|
||||
binary.Architecture,
|
||||
options,
|
||||
ct);
|
||||
|
||||
symbolSignatures.Add(signature);
|
||||
|
||||
@@ -218,6 +253,136 @@ public sealed class DeltaSignatureGenerator : IDeltaSignatureGenerator
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<SymbolSignature> GenerateSymbolSignatureAsync(
|
||||
NormalizedFunction normalized,
|
||||
string symbolName,
|
||||
string scope,
|
||||
IReadOnlyList<DisassembledInstruction> originalInstructions,
|
||||
CpuArchitecture architecture,
|
||||
SignatureOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(normalized);
|
||||
ArgumentNullException.ThrowIfNull(symbolName);
|
||||
ArgumentNullException.ThrowIfNull(scope);
|
||||
ArgumentNullException.ThrowIfNull(originalInstructions);
|
||||
|
||||
options ??= new SignatureOptions();
|
||||
|
||||
// Get normalized bytes for hashing
|
||||
var normalizedBytes = GetNormalizedBytes(normalized);
|
||||
|
||||
// Compute the main hash
|
||||
var hashHex = ComputeHash(normalizedBytes, options.HashAlgorithm);
|
||||
|
||||
// Compute chunk hashes for resilience
|
||||
ImmutableArray<ChunkHash>? chunks = null;
|
||||
if (options.IncludeChunks && normalizedBytes.Length >= options.ChunkSize)
|
||||
{
|
||||
chunks = ComputeChunkHashes(normalizedBytes, options.ChunkSize, options.HashAlgorithm);
|
||||
}
|
||||
|
||||
// Compute CFG metrics using proper CFG analysis
|
||||
int? bbCount = null;
|
||||
string? cfgEdgeHash = null;
|
||||
if (options.IncludeCfg && normalized.Instructions.Length > 0)
|
||||
{
|
||||
// Use first instruction's address as start address
|
||||
var startAddress = normalized.Instructions[0].OriginalAddress;
|
||||
var cfgMetrics = CfgExtractor.ComputeMetrics(
|
||||
normalized.Instructions.ToList(),
|
||||
startAddress);
|
||||
|
||||
bbCount = cfgMetrics.BasicBlockCount;
|
||||
cfgEdgeHash = cfgMetrics.EdgeHash;
|
||||
}
|
||||
|
||||
// Compute semantic fingerprint if enabled and services available
|
||||
string? semanticHashHex = null;
|
||||
ImmutableArray<string>? semanticApiCalls = null;
|
||||
|
||||
if (options.IncludeSemantic && SemanticAnalysisAvailable && originalInstructions.Count > 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
var semanticFingerprint = await ComputeSemanticFingerprintAsync(
|
||||
originalInstructions,
|
||||
symbolName,
|
||||
architecture,
|
||||
ct);
|
||||
|
||||
if (semanticFingerprint is not null)
|
||||
{
|
||||
semanticHashHex = semanticFingerprint.GraphHashHex;
|
||||
semanticApiCalls = semanticFingerprint.ApiCalls;
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
ex,
|
||||
"Failed to compute semantic fingerprint for {Symbol}, continuing without semantic data",
|
||||
symbolName);
|
||||
}
|
||||
}
|
||||
|
||||
return new SymbolSignature
|
||||
{
|
||||
Name = symbolName,
|
||||
Scope = scope,
|
||||
HashAlg = options.HashAlgorithm,
|
||||
HashHex = hashHex,
|
||||
SizeBytes = normalizedBytes.Length,
|
||||
CfgBbCount = bbCount,
|
||||
CfgEdgeHash = cfgEdgeHash,
|
||||
Chunks = chunks,
|
||||
SemanticHashHex = semanticHashHex,
|
||||
SemanticApiCalls = semanticApiCalls
|
||||
};
|
||||
}
|
||||
|
||||
private async Task<SemanticFingerprint?> ComputeSemanticFingerprintAsync(
|
||||
IReadOnlyList<DisassembledInstruction> instructions,
|
||||
string functionName,
|
||||
CpuArchitecture architecture,
|
||||
CancellationToken ct)
|
||||
{
|
||||
if (_irLiftingService is null || _graphExtractor is null || _fingerprintGenerator is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Check if architecture is supported
|
||||
if (!_irLiftingService.SupportsArchitecture(architecture))
|
||||
{
|
||||
_logger.LogDebug(
|
||||
"Architecture {Arch} not supported for semantic analysis",
|
||||
architecture);
|
||||
return null;
|
||||
}
|
||||
|
||||
// Lift to IR
|
||||
var startAddress = instructions.Count > 0 ? instructions[0].Address : 0UL;
|
||||
var lifted = await _irLiftingService.LiftToIrAsync(
|
||||
instructions,
|
||||
functionName,
|
||||
startAddress,
|
||||
architecture,
|
||||
ct: ct);
|
||||
|
||||
// Extract semantic graph
|
||||
var graph = await _graphExtractor.ExtractGraphAsync(lifted, ct: ct);
|
||||
|
||||
// Generate fingerprint
|
||||
var fingerprint = await _fingerprintGenerator.GenerateAsync(
|
||||
graph,
|
||||
startAddress,
|
||||
ct: ct);
|
||||
|
||||
return fingerprint;
|
||||
}
|
||||
|
||||
private static byte[] GetNormalizedBytes(NormalizedFunction normalized)
|
||||
{
|
||||
// Concatenate all normalized instruction bytes
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
|
||||
|
||||
using StellaOps.BinaryIndex.Disassembly;
|
||||
using StellaOps.BinaryIndex.Normalization;
|
||||
|
||||
namespace StellaOps.BinaryIndex.DeltaSig;
|
||||
@@ -49,4 +50,24 @@ public interface IDeltaSignatureGenerator
|
||||
string symbolName,
|
||||
string scope,
|
||||
SignatureOptions? options = null);
|
||||
|
||||
/// <summary>
|
||||
/// Generates a signature for a single symbol with optional semantic analysis.
|
||||
/// </summary>
|
||||
/// <param name="normalized">The normalized function with instructions.</param>
|
||||
/// <param name="symbolName">Name of the symbol.</param>
|
||||
/// <param name="scope">Section containing the symbol.</param>
|
||||
/// <param name="originalInstructions">Original disassembled instructions for semantic analysis.</param>
|
||||
/// <param name="architecture">CPU architecture for IR lifting.</param>
|
||||
/// <param name="options">Generation options.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>The symbol signature with CFG metrics and optional semantic fingerprint.</returns>
|
||||
Task<SymbolSignature> GenerateSymbolSignatureAsync(
|
||||
NormalizedFunction normalized,
|
||||
string symbolName,
|
||||
string scope,
|
||||
IReadOnlyList<DisassembledInstruction> originalInstructions,
|
||||
CpuArchitecture architecture,
|
||||
SignatureOptions? options = null,
|
||||
CancellationToken ct = default);
|
||||
}
|
||||
|
||||
@@ -13,11 +13,13 @@ namespace StellaOps.BinaryIndex.DeltaSig;
|
||||
/// <param name="IncludeChunks">Include rolling chunk hashes for resilience.</param>
|
||||
/// <param name="ChunkSize">Size of rolling chunks in bytes (default 2KB).</param>
|
||||
/// <param name="HashAlgorithm">Hash algorithm to use (default sha256).</param>
|
||||
/// <param name="IncludeSemantic">Include IR-level semantic fingerprints for optimization-resilient matching.</param>
|
||||
public sealed record SignatureOptions(
|
||||
bool IncludeCfg = true,
|
||||
bool IncludeChunks = true,
|
||||
int ChunkSize = 2048,
|
||||
string HashAlgorithm = "sha256");
|
||||
string HashAlgorithm = "sha256",
|
||||
bool IncludeSemantic = false);
|
||||
|
||||
/// <summary>
|
||||
/// Request for generating delta signatures from a binary.
|
||||
@@ -190,6 +192,17 @@ public sealed record SymbolSignature
|
||||
/// Rolling chunk hashes for resilience against small changes.
|
||||
/// </summary>
|
||||
public ImmutableArray<ChunkHash>? Chunks { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Semantic fingerprint hash based on IR-level analysis (hex string).
|
||||
/// Provides resilience against compiler optimizations and instruction reordering.
|
||||
/// </summary>
|
||||
public string? SemanticHashHex { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// API calls extracted from semantic analysis (for semantic anchoring).
|
||||
/// </summary>
|
||||
public ImmutableArray<string>? SemanticApiCalls { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -2,8 +2,10 @@
|
||||
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
|
||||
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.BinaryIndex.Disassembly;
|
||||
using StellaOps.BinaryIndex.Normalization;
|
||||
using StellaOps.BinaryIndex.Semantic;
|
||||
|
||||
namespace StellaOps.BinaryIndex.DeltaSig;
|
||||
|
||||
@@ -15,17 +17,52 @@ public static class ServiceCollectionExtensions
|
||||
/// <summary>
|
||||
/// Adds delta signature generation and matching services.
|
||||
/// Requires disassembly and normalization services to be registered.
|
||||
/// If semantic services are registered, semantic fingerprinting will be available.
|
||||
/// </summary>
|
||||
/// <param name="services">The service collection.</param>
|
||||
/// <returns>The service collection for chaining.</returns>
|
||||
public static IServiceCollection AddDeltaSignatures(this IServiceCollection services)
|
||||
{
|
||||
services.AddSingleton<IDeltaSignatureGenerator, DeltaSignatureGenerator>();
|
||||
services.AddSingleton<IDeltaSignatureGenerator>(sp =>
|
||||
{
|
||||
var disassembly = sp.GetRequiredService<DisassemblyService>();
|
||||
var normalization = sp.GetRequiredService<NormalizationService>();
|
||||
var logger = sp.GetRequiredService<ILogger<DeltaSignatureGenerator>>();
|
||||
|
||||
// Semantic services are optional
|
||||
var irLifting = sp.GetService<IIrLiftingService>();
|
||||
var graphExtractor = sp.GetService<ISemanticGraphExtractor>();
|
||||
var fingerprintGenerator = sp.GetService<ISemanticFingerprintGenerator>();
|
||||
|
||||
return new DeltaSignatureGenerator(
|
||||
disassembly,
|
||||
normalization,
|
||||
irLifting,
|
||||
graphExtractor,
|
||||
fingerprintGenerator,
|
||||
logger);
|
||||
});
|
||||
|
||||
services.AddSingleton<IDeltaSignatureMatcher, DeltaSignatureMatcher>();
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds delta signature services with semantic analysis support enabled.
|
||||
/// Requires disassembly and normalization services to be registered.
|
||||
/// </summary>
|
||||
/// <param name="services">The service collection.</param>
|
||||
/// <returns>The service collection for chaining.</returns>
|
||||
public static IServiceCollection AddDeltaSignaturesWithSemantic(this IServiceCollection services)
|
||||
{
|
||||
// Register semantic services first
|
||||
services.AddBinaryIndexSemantic();
|
||||
|
||||
// Then register delta signature services
|
||||
return services.AddDeltaSignatures();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds all binary index services: disassembly, normalization, and delta signatures.
|
||||
/// </summary>
|
||||
@@ -44,4 +81,26 @@ public static class ServiceCollectionExtensions
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds all binary index services with semantic analysis: disassembly, normalization, semantic, and delta signatures.
|
||||
/// </summary>
|
||||
/// <param name="services">The service collection.</param>
|
||||
/// <returns>The service collection for chaining.</returns>
|
||||
public static IServiceCollection AddBinaryIndexServicesWithSemantic(this IServiceCollection services)
|
||||
{
|
||||
// Add disassembly with default plugins
|
||||
services.AddDisassemblyServices();
|
||||
|
||||
// Add normalization pipelines
|
||||
services.AddNormalizationPipelines();
|
||||
|
||||
// Add semantic analysis services
|
||||
services.AddBinaryIndexSemantic();
|
||||
|
||||
// Add delta signature services (will pick up semantic services)
|
||||
services.AddDeltaSignatures();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.Disassembly.Abstractions\StellaOps.BinaryIndex.Disassembly.Abstractions.csproj" />
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.Disassembly\StellaOps.BinaryIndex.Disassembly.csproj" />
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.Normalization\StellaOps.BinaryIndex.Normalization.csproj" />
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.Semantic\StellaOps.BinaryIndex.Semantic.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
Reference in New Issue
Block a user