save progress

This commit is contained in:
StellaOps Bot
2026-01-06 09:42:02 +02:00
parent 94d68bee8b
commit 37e11918e0
443 changed files with 85863 additions and 897 deletions

View File

@@ -7,6 +7,7 @@ using System.Security.Cryptography;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Disassembly;
using StellaOps.BinaryIndex.Normalization;
using StellaOps.BinaryIndex.Semantic;
namespace StellaOps.BinaryIndex.DeltaSig;
@@ -17,18 +18,49 @@ public sealed class DeltaSignatureGenerator : IDeltaSignatureGenerator
{
private readonly DisassemblyService _disassemblyService;
private readonly NormalizationService _normalizationService;
private readonly IIrLiftingService? _irLiftingService;
private readonly ISemanticGraphExtractor? _graphExtractor;
private readonly ISemanticFingerprintGenerator? _fingerprintGenerator;
private readonly ILogger<DeltaSignatureGenerator> _logger;
/// <summary>
/// Creates a new delta signature generator without semantic analysis support.
/// </summary>
public DeltaSignatureGenerator(
DisassemblyService disassemblyService,
NormalizationService normalizationService,
ILogger<DeltaSignatureGenerator> logger)
: this(disassemblyService, normalizationService, null, null, null, logger)
{
_disassemblyService = disassemblyService;
_normalizationService = normalizationService;
_logger = logger;
}
/// <summary>
/// Creates a new delta signature generator with optional semantic analysis support.
/// </summary>
public DeltaSignatureGenerator(
DisassemblyService disassemblyService,
NormalizationService normalizationService,
IIrLiftingService? irLiftingService,
ISemanticGraphExtractor? graphExtractor,
ISemanticFingerprintGenerator? fingerprintGenerator,
ILogger<DeltaSignatureGenerator> logger)
{
_disassemblyService = disassemblyService ?? throw new ArgumentNullException(nameof(disassemblyService));
_normalizationService = normalizationService ?? throw new ArgumentNullException(nameof(normalizationService));
_irLiftingService = irLiftingService;
_graphExtractor = graphExtractor;
_fingerprintGenerator = fingerprintGenerator;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <summary>
/// Gets a value indicating whether semantic analysis is available.
/// </summary>
public bool SemanticAnalysisAvailable =>
_irLiftingService is not null &&
_graphExtractor is not null &&
_fingerprintGenerator is not null;
/// <inheritdoc />
public async Task<DeltaSignature> GenerateSignaturesAsync(
Stream binaryStream,
@@ -94,11 +126,14 @@ public sealed class DeltaSignatureGenerator : IDeltaSignatureGenerator
}
// Generate signature from normalized bytes
var signature = GenerateSymbolSignature(
var signature = await GenerateSymbolSignatureAsync(
normalized,
symbolName,
symbolInfo.Section ?? ".text",
options);
instructions,
binary.Architecture,
options,
ct);
symbolSignatures.Add(signature);
@@ -218,6 +253,136 @@ public sealed class DeltaSignatureGenerator : IDeltaSignatureGenerator
};
}
/// <inheritdoc />
public async Task<SymbolSignature> GenerateSymbolSignatureAsync(
NormalizedFunction normalized,
string symbolName,
string scope,
IReadOnlyList<DisassembledInstruction> originalInstructions,
CpuArchitecture architecture,
SignatureOptions? options = null,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(normalized);
ArgumentNullException.ThrowIfNull(symbolName);
ArgumentNullException.ThrowIfNull(scope);
ArgumentNullException.ThrowIfNull(originalInstructions);
options ??= new SignatureOptions();
// Get normalized bytes for hashing
var normalizedBytes = GetNormalizedBytes(normalized);
// Compute the main hash
var hashHex = ComputeHash(normalizedBytes, options.HashAlgorithm);
// Compute chunk hashes for resilience
ImmutableArray<ChunkHash>? chunks = null;
if (options.IncludeChunks && normalizedBytes.Length >= options.ChunkSize)
{
chunks = ComputeChunkHashes(normalizedBytes, options.ChunkSize, options.HashAlgorithm);
}
// Compute CFG metrics using proper CFG analysis
int? bbCount = null;
string? cfgEdgeHash = null;
if (options.IncludeCfg && normalized.Instructions.Length > 0)
{
// Use first instruction's address as start address
var startAddress = normalized.Instructions[0].OriginalAddress;
var cfgMetrics = CfgExtractor.ComputeMetrics(
normalized.Instructions.ToList(),
startAddress);
bbCount = cfgMetrics.BasicBlockCount;
cfgEdgeHash = cfgMetrics.EdgeHash;
}
// Compute semantic fingerprint if enabled and services available
string? semanticHashHex = null;
ImmutableArray<string>? semanticApiCalls = null;
if (options.IncludeSemantic && SemanticAnalysisAvailable && originalInstructions.Count > 0)
{
try
{
var semanticFingerprint = await ComputeSemanticFingerprintAsync(
originalInstructions,
symbolName,
architecture,
ct);
if (semanticFingerprint is not null)
{
semanticHashHex = semanticFingerprint.GraphHashHex;
semanticApiCalls = semanticFingerprint.ApiCalls;
}
}
catch (Exception ex)
{
_logger.LogWarning(
ex,
"Failed to compute semantic fingerprint for {Symbol}, continuing without semantic data",
symbolName);
}
}
return new SymbolSignature
{
Name = symbolName,
Scope = scope,
HashAlg = options.HashAlgorithm,
HashHex = hashHex,
SizeBytes = normalizedBytes.Length,
CfgBbCount = bbCount,
CfgEdgeHash = cfgEdgeHash,
Chunks = chunks,
SemanticHashHex = semanticHashHex,
SemanticApiCalls = semanticApiCalls
};
}
private async Task<SemanticFingerprint?> ComputeSemanticFingerprintAsync(
IReadOnlyList<DisassembledInstruction> instructions,
string functionName,
CpuArchitecture architecture,
CancellationToken ct)
{
if (_irLiftingService is null || _graphExtractor is null || _fingerprintGenerator is null)
{
return null;
}
// Check if architecture is supported
if (!_irLiftingService.SupportsArchitecture(architecture))
{
_logger.LogDebug(
"Architecture {Arch} not supported for semantic analysis",
architecture);
return null;
}
// Lift to IR
var startAddress = instructions.Count > 0 ? instructions[0].Address : 0UL;
var lifted = await _irLiftingService.LiftToIrAsync(
instructions,
functionName,
startAddress,
architecture,
ct: ct);
// Extract semantic graph
var graph = await _graphExtractor.ExtractGraphAsync(lifted, ct: ct);
// Generate fingerprint
var fingerprint = await _fingerprintGenerator.GenerateAsync(
graph,
startAddress,
ct: ct);
return fingerprint;
}
private static byte[] GetNormalizedBytes(NormalizedFunction normalized)
{
// Concatenate all normalized instruction bytes

View File

@@ -1,6 +1,7 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
using StellaOps.BinaryIndex.Disassembly;
using StellaOps.BinaryIndex.Normalization;
namespace StellaOps.BinaryIndex.DeltaSig;
@@ -49,4 +50,24 @@ public interface IDeltaSignatureGenerator
string symbolName,
string scope,
SignatureOptions? options = null);
/// <summary>
/// Generates a signature for a single symbol with optional semantic analysis.
/// </summary>
/// <param name="normalized">The normalized function with instructions.</param>
/// <param name="symbolName">Name of the symbol.</param>
/// <param name="scope">Section containing the symbol.</param>
/// <param name="originalInstructions">Original disassembled instructions for semantic analysis.</param>
/// <param name="architecture">CPU architecture for IR lifting.</param>
/// <param name="options">Generation options.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The symbol signature with CFG metrics and optional semantic fingerprint.</returns>
Task<SymbolSignature> GenerateSymbolSignatureAsync(
NormalizedFunction normalized,
string symbolName,
string scope,
IReadOnlyList<DisassembledInstruction> originalInstructions,
CpuArchitecture architecture,
SignatureOptions? options = null,
CancellationToken ct = default);
}

View File

@@ -13,11 +13,13 @@ namespace StellaOps.BinaryIndex.DeltaSig;
/// <param name="IncludeChunks">Include rolling chunk hashes for resilience.</param>
/// <param name="ChunkSize">Size of rolling chunks in bytes (default 2KB).</param>
/// <param name="HashAlgorithm">Hash algorithm to use (default sha256).</param>
/// <param name="IncludeSemantic">Include IR-level semantic fingerprints for optimization-resilient matching.</param>
public sealed record SignatureOptions(
bool IncludeCfg = true,
bool IncludeChunks = true,
int ChunkSize = 2048,
string HashAlgorithm = "sha256");
string HashAlgorithm = "sha256",
bool IncludeSemantic = false);
/// <summary>
/// Request for generating delta signatures from a binary.
@@ -190,6 +192,17 @@ public sealed record SymbolSignature
/// Rolling chunk hashes for resilience against small changes.
/// </summary>
public ImmutableArray<ChunkHash>? Chunks { get; init; }
/// <summary>
/// Semantic fingerprint hash based on IR-level analysis (hex string).
/// Provides resilience against compiler optimizations and instruction reordering.
/// </summary>
public string? SemanticHashHex { get; init; }
/// <summary>
/// API calls extracted from semantic analysis (for semantic anchoring).
/// </summary>
public ImmutableArray<string>? SemanticApiCalls { get; init; }
}
/// <summary>

View File

@@ -2,8 +2,10 @@
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Disassembly;
using StellaOps.BinaryIndex.Normalization;
using StellaOps.BinaryIndex.Semantic;
namespace StellaOps.BinaryIndex.DeltaSig;
@@ -15,17 +17,52 @@ public static class ServiceCollectionExtensions
/// <summary>
/// Adds delta signature generation and matching services.
/// Requires disassembly and normalization services to be registered.
/// If semantic services are registered, semantic fingerprinting will be available.
/// </summary>
/// <param name="services">The service collection.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddDeltaSignatures(this IServiceCollection services)
{
services.AddSingleton<IDeltaSignatureGenerator, DeltaSignatureGenerator>();
services.AddSingleton<IDeltaSignatureGenerator>(sp =>
{
var disassembly = sp.GetRequiredService<DisassemblyService>();
var normalization = sp.GetRequiredService<NormalizationService>();
var logger = sp.GetRequiredService<ILogger<DeltaSignatureGenerator>>();
// Semantic services are optional
var irLifting = sp.GetService<IIrLiftingService>();
var graphExtractor = sp.GetService<ISemanticGraphExtractor>();
var fingerprintGenerator = sp.GetService<ISemanticFingerprintGenerator>();
return new DeltaSignatureGenerator(
disassembly,
normalization,
irLifting,
graphExtractor,
fingerprintGenerator,
logger);
});
services.AddSingleton<IDeltaSignatureMatcher, DeltaSignatureMatcher>();
return services;
}
/// <summary>
/// Adds delta signature services with semantic analysis support enabled.
/// Requires disassembly and normalization services to be registered.
/// </summary>
/// <param name="services">The service collection.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddDeltaSignaturesWithSemantic(this IServiceCollection services)
{
// Register semantic services first
services.AddBinaryIndexSemantic();
// Then register delta signature services
return services.AddDeltaSignatures();
}
/// <summary>
/// Adds all binary index services: disassembly, normalization, and delta signatures.
/// </summary>
@@ -44,4 +81,26 @@ public static class ServiceCollectionExtensions
return services;
}
/// <summary>
/// Adds all binary index services with semantic analysis: disassembly, normalization, semantic, and delta signatures.
/// </summary>
/// <param name="services">The service collection.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddBinaryIndexServicesWithSemantic(this IServiceCollection services)
{
// Add disassembly with default plugins
services.AddDisassemblyServices();
// Add normalization pipelines
services.AddNormalizationPipelines();
// Add semantic analysis services
services.AddBinaryIndexSemantic();
// Add delta signature services (will pick up semantic services)
services.AddDeltaSignatures();
return services;
}
}

View File

@@ -14,6 +14,7 @@
<ProjectReference Include="..\StellaOps.BinaryIndex.Disassembly.Abstractions\StellaOps.BinaryIndex.Disassembly.Abstractions.csproj" />
<ProjectReference Include="..\StellaOps.BinaryIndex.Disassembly\StellaOps.BinaryIndex.Disassembly.csproj" />
<ProjectReference Include="..\StellaOps.BinaryIndex.Normalization\StellaOps.BinaryIndex.Normalization.csproj" />
<ProjectReference Include="..\StellaOps.BinaryIndex.Semantic\StellaOps.BinaryIndex.Semantic.csproj" />
</ItemGroup>
<ItemGroup>