Stabilzie modules
This commit is contained in:
@@ -263,6 +263,99 @@ public sealed record SliceEdge
|
||||
public required string To { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// HTTP implementation of IReachGraphSliceClient that calls the ReachGraph service API.
|
||||
/// </summary>
|
||||
public sealed class HttpReachGraphSliceClient : IReachGraphSliceClient
|
||||
{
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly ILogger<HttpReachGraphSliceClient> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new HTTP-backed ReachGraph slice client.
|
||||
/// </summary>
|
||||
/// <param name="httpClient">Pre-configured HttpClient targeting ReachGraph base URL.</param>
|
||||
/// <param name="logger">Logger.</param>
|
||||
public HttpReachGraphSliceClient(
|
||||
HttpClient httpClient,
|
||||
ILogger<HttpReachGraphSliceClient> logger)
|
||||
{
|
||||
_httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<CveSliceResult?> SliceByCveAsync(
|
||||
string digest,
|
||||
string cveId,
|
||||
string tenantId,
|
||||
int maxPaths = 5,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
_logger.LogDebug("Querying ReachGraph slice-by-CVE: {CveId} for {Digest}", cveId, digest);
|
||||
|
||||
try
|
||||
{
|
||||
var url = $"api/v1/slice/cve?digest={Uri.EscapeDataString(digest)}&cveId={Uri.EscapeDataString(cveId)}&tenantId={Uri.EscapeDataString(tenantId)}&maxPaths={maxPaths}";
|
||||
|
||||
var response = await _httpClient.GetAsync(url, ct);
|
||||
|
||||
if (response.StatusCode == System.Net.HttpStatusCode.NotFound)
|
||||
{
|
||||
_logger.LogDebug("No slice data found for CVE {CveId}", cveId);
|
||||
return null;
|
||||
}
|
||||
|
||||
response.EnsureSuccessStatusCode();
|
||||
|
||||
return await System.Text.Json.JsonSerializer.DeserializeAsync<CveSliceResult>(
|
||||
await response.Content.ReadAsStreamAsync(ct),
|
||||
new System.Text.Json.JsonSerializerOptions { PropertyNameCaseInsensitive = true },
|
||||
ct);
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to query ReachGraph for CVE {CveId}", cveId);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<SliceResult?> SliceByEntrypointAsync(
|
||||
string digest,
|
||||
string entrypointPattern,
|
||||
string tenantId,
|
||||
int maxDepth = 10,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
_logger.LogDebug("Querying ReachGraph slice-by-entrypoint: {Pattern} for {Digest}", entrypointPattern, digest);
|
||||
|
||||
try
|
||||
{
|
||||
var url = $"api/v1/slice/entrypoint?digest={Uri.EscapeDataString(digest)}&pattern={Uri.EscapeDataString(entrypointPattern)}&tenantId={Uri.EscapeDataString(tenantId)}&maxDepth={maxDepth}";
|
||||
|
||||
var response = await _httpClient.GetAsync(url, ct);
|
||||
|
||||
if (response.StatusCode == System.Net.HttpStatusCode.NotFound)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
response.EnsureSuccessStatusCode();
|
||||
|
||||
return await System.Text.Json.JsonSerializer.DeserializeAsync<SliceResult>(
|
||||
await response.Content.ReadAsStreamAsync(ct),
|
||||
new System.Text.Json.JsonSerializerOptions { PropertyNameCaseInsensitive = true },
|
||||
ct);
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to query ReachGraph for entrypoint {Pattern}", entrypointPattern);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Null implementation of IReachGraphSliceClient for testing.
|
||||
/// </summary>
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
using Microsoft.Extensions.Http;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Analysis;
|
||||
|
||||
@@ -104,4 +105,26 @@ public static class ServiceCollectionExtensions
|
||||
services.AddSingleton(factory);
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Registers the ReachGraph HTTP integration, providing a real
|
||||
/// <see cref="IReachGraphSliceClient"/> and <see cref="IBinaryReachabilityService"/>.
|
||||
/// </summary>
|
||||
/// <param name="services">Service collection.</param>
|
||||
/// <param name="reachGraphBaseUrl">Base URL of the ReachGraph service.</param>
|
||||
/// <returns>Service collection for chaining.</returns>
|
||||
public static IServiceCollection AddReachGraphIntegration(
|
||||
this IServiceCollection services,
|
||||
string reachGraphBaseUrl)
|
||||
{
|
||||
services.AddHttpClient<IReachGraphSliceClient, HttpReachGraphSliceClient>(client =>
|
||||
{
|
||||
client.BaseAddress = new Uri(reachGraphBaseUrl);
|
||||
client.Timeout = TimeSpan.FromSeconds(30);
|
||||
});
|
||||
|
||||
services.AddBinaryReachabilityService<ReachGraphBinaryReachabilityService>();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,22 +23,220 @@ public sealed partial class TaintGateExtractor : ITaintGateExtractor
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
// Security-relevant API call patterns that indicate taint gates
|
||||
private static readonly HashSet<string> SecurityApis = new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
"memcpy", "memmove", "memset", "strcpy", "strncpy", "strcat", "strncat",
|
||||
"sprintf", "snprintf", "vsprintf", "vsnprintf",
|
||||
"malloc", "calloc", "realloc", "free",
|
||||
"read", "write", "recv", "send", "recvfrom", "sendto",
|
||||
"open", "fopen", "close", "fclose",
|
||||
"strlen", "strcmp", "strncmp", "memcmp",
|
||||
"atoi", "atol", "strtol", "strtoul",
|
||||
"getenv", "setenv", "system", "exec", "popen",
|
||||
"checksum", "verify", "validate", "authenticate", "authorize",
|
||||
"encrypt", "decrypt", "sign", "hash",
|
||||
};
|
||||
|
||||
private static readonly HashSet<string> BoundsCheckApis = new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
"strlen", "sizeof", "strnlen", "wcslen", "memcmp", "strncmp",
|
||||
};
|
||||
|
||||
private static readonly HashSet<string> AuthApis = new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
"authenticate", "authorize", "checkperm", "verify_token", "check_auth",
|
||||
"login", "check_credentials", "validate_session",
|
||||
};
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<ImmutableArray<TaintGate>> ExtractAsync(
|
||||
public async Task<ImmutableArray<TaintGate>> ExtractAsync(
|
||||
string binaryPath,
|
||||
ImmutableArray<string> path,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
// In a full implementation, this would:
|
||||
// 1. Disassemble the binary
|
||||
// 2. Trace the path through the CFG
|
||||
// 3. Identify conditional branches
|
||||
// 4. Classify conditions as taint gates
|
||||
|
||||
_logger.LogDebug("Extracting taint gates from path with {Count} nodes", path.Length);
|
||||
|
||||
// For now, return empty - full implementation requires disassembly integration
|
||||
return Task.FromResult(ImmutableArray<TaintGate>.Empty);
|
||||
if (path.IsDefaultOrEmpty || string.IsNullOrWhiteSpace(binaryPath))
|
||||
{
|
||||
return ImmutableArray<TaintGate>.Empty;
|
||||
}
|
||||
|
||||
// Extract structural taint gates by analyzing path nodes for security-relevant patterns
|
||||
var gates = new List<TaintGate>();
|
||||
var conditions = await ExtractConditionsFromPathAsync(binaryPath, path, ct);
|
||||
|
||||
if (!conditions.IsDefaultOrEmpty)
|
||||
{
|
||||
gates.AddRange(ClassifyConditions(conditions));
|
||||
}
|
||||
|
||||
// Additionally scan path nodes for security-relevant function calls
|
||||
for (int i = 0; i < path.Length; i++)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
var node = path[i];
|
||||
|
||||
// Check if the node name matches security-relevant APIs
|
||||
var stripped = StripDecoration(node);
|
||||
if (BoundsCheckApis.Contains(stripped))
|
||||
{
|
||||
gates.Add(new TaintGate
|
||||
{
|
||||
BlockId = $"path_{i}",
|
||||
Address = DeriveAddressFromName(node),
|
||||
GateType = TaintGateType.BoundsCheck,
|
||||
Condition = $"call to {stripped}",
|
||||
BlocksWhenTrue = false,
|
||||
Confidence = 0.7m
|
||||
});
|
||||
}
|
||||
else if (AuthApis.Contains(stripped))
|
||||
{
|
||||
gates.Add(new TaintGate
|
||||
{
|
||||
BlockId = $"path_{i}",
|
||||
Address = DeriveAddressFromName(node),
|
||||
GateType = TaintGateType.AuthCheck,
|
||||
Condition = $"call to {stripped}",
|
||||
BlocksWhenTrue = true,
|
||||
Confidence = 0.75m
|
||||
});
|
||||
}
|
||||
else if (SecurityApis.Contains(stripped))
|
||||
{
|
||||
gates.Add(new TaintGate
|
||||
{
|
||||
BlockId = $"path_{i}",
|
||||
Address = DeriveAddressFromName(node),
|
||||
GateType = TaintGateType.InputValidation,
|
||||
Condition = $"security-relevant call to {stripped}",
|
||||
BlocksWhenTrue = false,
|
||||
Confidence = 0.6m
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
_logger.LogDebug("Extracted {Count} taint gates from path", gates.Count);
|
||||
return gates.Distinct().ToImmutableArray();
|
||||
}
|
||||
|
||||
private async Task<ImmutableArray<(string BlockId, ulong Address, string Condition)>> ExtractConditionsFromPathAsync(
|
||||
string binaryPath,
|
||||
ImmutableArray<string> path,
|
||||
CancellationToken ct)
|
||||
{
|
||||
// Attempt to read binary and extract conditional branch patterns
|
||||
if (!File.Exists(binaryPath))
|
||||
{
|
||||
return ImmutableArray<(string, ulong, string)>.Empty;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var conditions = new List<(string BlockId, ulong Address, string Condition)>();
|
||||
var buffer = new byte[Math.Min(64 * 1024, new FileInfo(binaryPath).Length)];
|
||||
int bytesRead;
|
||||
|
||||
await using (var stream = new FileStream(binaryPath, FileMode.Open, FileAccess.Read, FileShare.Read, 81920, true))
|
||||
{
|
||||
bytesRead = await stream.ReadAsync(buffer.AsMemory(0, buffer.Length), ct);
|
||||
}
|
||||
|
||||
if (bytesRead == 0) return ImmutableArray<(string, ulong, string)>.Empty;
|
||||
|
||||
// Scan for conditional branch patterns (x86-64 Jcc instructions: 0x70-0x7F, 0x0F 0x80-0x8F)
|
||||
for (int i = 0; i < bytesRead; i++)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
string? conditionText = null;
|
||||
ulong address = (ulong)i;
|
||||
|
||||
// Short conditional jumps (0x70-0x7F)
|
||||
if (buffer[i] >= 0x70 && buffer[i] <= 0x7F)
|
||||
{
|
||||
conditionText = ClassifyJccOpcode(buffer[i]);
|
||||
}
|
||||
// Near conditional jumps (0x0F 0x80-0x8F)
|
||||
else if (buffer[i] == 0x0F && i + 1 < bytesRead && buffer[i + 1] >= 0x80 && buffer[i + 1] <= 0x8F)
|
||||
{
|
||||
conditionText = ClassifyJccOpcode((byte)(buffer[i + 1] - 0x10));
|
||||
}
|
||||
// CMP instruction followed by conditional jump
|
||||
else if (buffer[i] == 0x3D && i + 5 < bytesRead) // CMP EAX, imm32
|
||||
{
|
||||
var imm = BitConverter.ToUInt32(buffer, i + 1);
|
||||
if (imm == 0)
|
||||
conditionText = "PTR == NULL";
|
||||
else if (imm < 0x1000)
|
||||
conditionText = $"SIZE < {imm}";
|
||||
}
|
||||
// TEST instruction (often used for null checks)
|
||||
else if (buffer[i] == 0x85 && i + 1 < bytesRead)
|
||||
{
|
||||
conditionText = "PTR != NULL";
|
||||
}
|
||||
|
||||
if (conditionText != null)
|
||||
{
|
||||
conditions.Add(($"block_{i:X}", address, conditionText));
|
||||
if (conditions.Count >= 32) break; // Limit extraction
|
||||
}
|
||||
}
|
||||
|
||||
return conditions.ToImmutableArray();
|
||||
}
|
||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||
{
|
||||
_logger.LogDebug(ex, "Failed to extract conditions from binary {Path}", binaryPath);
|
||||
return ImmutableArray<(string, ulong, string)>.Empty;
|
||||
}
|
||||
}
|
||||
|
||||
private static string ClassifyJccOpcode(byte opcode) => (opcode & 0x0F) switch
|
||||
{
|
||||
0x0 => "OVERFLOW CHECK", // JO
|
||||
0x2 => "SIZE < LIMIT", // JB/JNAE
|
||||
0x3 => "SIZE >= LIMIT", // JNB/JAE
|
||||
0x4 => "PTR == NULL", // JE/JZ
|
||||
0x5 => "PTR != NULL", // JNE/JNZ
|
||||
0x6 => "INDEX <= MAX", // JBE/JNA
|
||||
0x7 => "INDEX > MAX", // JNBE/JA
|
||||
0xC => "LENGTH < MAX", // JL/JNGE
|
||||
0xD => "LENGTH >= MAX", // JNL/JGE
|
||||
0xE => "COUNT <= LIMIT", // JLE/JNG
|
||||
0xF => "COUNT > LIMIT", // JNLE/JG
|
||||
_ => "CONDITIONAL CHECK"
|
||||
};
|
||||
|
||||
private static string StripDecoration(string name)
|
||||
{
|
||||
// Strip common function name decorations (sub_XXXX, @PLT, @@GLIBC, etc.)
|
||||
var stripped = name;
|
||||
if (stripped.StartsWith("sub_", StringComparison.OrdinalIgnoreCase))
|
||||
return stripped; // address-based name, not a known function
|
||||
|
||||
var atIdx = stripped.IndexOf('@');
|
||||
if (atIdx > 0)
|
||||
stripped = stripped[..atIdx];
|
||||
|
||||
stripped = stripped.TrimStart('_');
|
||||
return stripped;
|
||||
}
|
||||
|
||||
private static ulong DeriveAddressFromName(string name)
|
||||
{
|
||||
// Try to parse address from "sub_XXXX" format
|
||||
if (name.StartsWith("sub_", StringComparison.OrdinalIgnoreCase) &&
|
||||
ulong.TryParse(name.AsSpan(4), System.Globalization.NumberStyles.HexNumber, null, out var addr))
|
||||
{
|
||||
return addr;
|
||||
}
|
||||
|
||||
// Derive a deterministic address from the name
|
||||
var hash = System.Security.Cryptography.SHA256.HashData(System.Text.Encoding.UTF8.GetBytes(name));
|
||||
return BitConverter.ToUInt64(hash, 0);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
|
||||
@@ -22,16 +22,19 @@ public sealed class IrDiffGenerator : IIrDiffGenerator
|
||||
{
|
||||
private readonly ILogger<IrDiffGenerator> _logger;
|
||||
private readonly ICasStore? _casStore;
|
||||
private readonly ISymbolChangeTracer? _symbolTracer;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new IR diff generator.
|
||||
/// </summary>
|
||||
public IrDiffGenerator(
|
||||
ILogger<IrDiffGenerator> logger,
|
||||
ICasStore? casStore = null)
|
||||
ICasStore? casStore = null,
|
||||
ISymbolChangeTracer? symbolTracer = null)
|
||||
{
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_casStore = casStore;
|
||||
_symbolTracer = symbolTracer;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
@@ -100,12 +103,15 @@ public sealed class IrDiffGenerator : IIrDiffGenerator
|
||||
|
||||
var results = await Task.WhenAll(tasks);
|
||||
|
||||
var diffCount = results.Count(m => m.IrDiff != null);
|
||||
// Enrich with symbol change tracking if tracer is available
|
||||
var enrichedResults = EnrichWithSymbolChanges(results);
|
||||
|
||||
var diffCount = enrichedResults.Count(m => m.IrDiff != null);
|
||||
_logger.LogInformation(
|
||||
"Generated IR diffs for {Count}/{Total} function matches",
|
||||
diffCount, matches.Count);
|
||||
|
||||
return results.ToList();
|
||||
return enrichedResults;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
@@ -126,38 +132,31 @@ public sealed class IrDiffGenerator : IIrDiffGenerator
|
||||
|
||||
try
|
||||
{
|
||||
// In a real implementation, this would:
|
||||
// 1. Lift both functions to IR
|
||||
// 2. Compare the IR representations
|
||||
// 3. Generate diff payload
|
||||
// 4. Store in CAS if enabled
|
||||
// 5. Return reference
|
||||
// Read function byte windows from both binaries
|
||||
var oldBytes = await ReadFunctionBytesAsync(oldBinaryStream, oldFunctionAddress, cts.Token);
|
||||
var newBytes = await ReadFunctionBytesAsync(newBinaryStream, functionAddress, cts.Token);
|
||||
|
||||
// For now, create a placeholder summary
|
||||
var summary = new IrDiffSummary
|
||||
{
|
||||
OldBlockCount = 0,
|
||||
NewBlockCount = 0,
|
||||
BlocksAdded = 0,
|
||||
BlocksRemoved = 0,
|
||||
BlocksModified = 0,
|
||||
OldStatementCount = 0,
|
||||
NewStatementCount = 0,
|
||||
StatementsAdded = 0,
|
||||
StatementsRemoved = 0,
|
||||
StatementsModified = 0,
|
||||
PayloadSizeBytes = 0
|
||||
};
|
||||
// Build basic block representations from byte windows
|
||||
var oldBlocks = BuildBlocksFromBytes(oldBytes, oldFunctionAddress);
|
||||
var newBlocks = BuildBlocksFromBytes(newBytes, functionAddress);
|
||||
|
||||
// Compare blocks using hash-based matching
|
||||
var (blockDiffs, stmtDiffs, summary) = ComputeBlockDiffs(
|
||||
oldBlocks, newBlocks, oldFunctionAddress, functionAddress, options);
|
||||
|
||||
var payloadJson = JsonSerializer.Serialize(new { blockDiffs, stmtDiffs, summary });
|
||||
var payloadBytes = Encoding.UTF8.GetBytes(payloadJson);
|
||||
var payloadDigest = $"sha256:{Convert.ToHexString(SHA256.HashData(payloadBytes)).ToLowerInvariant()}";
|
||||
|
||||
var payload = new IrDiffPayload
|
||||
{
|
||||
Digest = $"sha256:{ComputePlaceholderDigest(functionAddress)}",
|
||||
Digest = payloadDigest,
|
||||
IrFormat = options.IrFormat,
|
||||
FunctionName = $"func_{functionAddress:X}",
|
||||
OldAddress = oldFunctionAddress,
|
||||
NewAddress = functionAddress,
|
||||
BlockDiffs = new List<BlockDiff>(),
|
||||
StatementDiffs = new List<StatementDiff>(),
|
||||
BlockDiffs = blockDiffs,
|
||||
StatementDiffs = stmtDiffs,
|
||||
Summary = summary,
|
||||
ComputedAt = DateTimeOffset.UtcNow
|
||||
};
|
||||
@@ -193,11 +192,249 @@ public sealed class IrDiffGenerator : IIrDiffGenerator
|
||||
}
|
||||
}
|
||||
|
||||
private static string ComputePlaceholderDigest(ulong address)
|
||||
private List<FunctionMatchV2> EnrichWithSymbolChanges(FunctionMatchV2[] results)
|
||||
{
|
||||
var bytes = BitConverter.GetBytes(address);
|
||||
var hash = SHA256.HashData(bytes);
|
||||
return Convert.ToHexString(hash).ToLowerInvariant();
|
||||
if (_symbolTracer is null)
|
||||
{
|
||||
return results.ToList();
|
||||
}
|
||||
|
||||
var enriched = new List<FunctionMatchV2>(results.Length);
|
||||
|
||||
foreach (var match in results)
|
||||
{
|
||||
// Build symbol signatures from the function match hashes
|
||||
var fromSymbol = match.BeforeHash is not null
|
||||
? new SymbolSignature
|
||||
{
|
||||
Name = match.Name,
|
||||
HashAlg = "sha256",
|
||||
HashHex = match.BeforeHash,
|
||||
SizeBytes = (int)(match.Size ?? 0)
|
||||
}
|
||||
: null;
|
||||
|
||||
var toSymbol = match.AfterHash is not null
|
||||
? new SymbolSignature
|
||||
{
|
||||
Name = match.Name,
|
||||
HashAlg = "sha256",
|
||||
HashHex = match.AfterHash,
|
||||
SizeBytes = (int)(match.Size ?? 0)
|
||||
}
|
||||
: null;
|
||||
|
||||
if (fromSymbol is null && toSymbol is null)
|
||||
{
|
||||
enriched.Add(match);
|
||||
continue;
|
||||
}
|
||||
|
||||
var changeResult = _symbolTracer.CompareSymbols(fromSymbol, toSymbol);
|
||||
|
||||
// Map symbol change type to match state
|
||||
var matchState = changeResult.ChangeType switch
|
||||
{
|
||||
SymbolChangeType.Unchanged => match.MatchState,
|
||||
SymbolChangeType.Added => "modified",
|
||||
SymbolChangeType.Removed => "modified",
|
||||
SymbolChangeType.Patched => "patched",
|
||||
SymbolChangeType.Modified => "modified",
|
||||
_ => match.MatchState
|
||||
};
|
||||
|
||||
// Build explanation combining IR diff and symbol change info
|
||||
var explanation = match.Explanation;
|
||||
if (changeResult.ChangeExplanation is not null)
|
||||
{
|
||||
explanation = explanation is not null
|
||||
? $"{explanation}; Symbol: {changeResult.ChangeExplanation}"
|
||||
: $"Symbol: {changeResult.ChangeExplanation}";
|
||||
}
|
||||
|
||||
enriched.Add(match with
|
||||
{
|
||||
MatchState = matchState,
|
||||
Explanation = explanation
|
||||
});
|
||||
}
|
||||
|
||||
return enriched;
|
||||
}
|
||||
|
||||
private static async Task<byte[]> ReadFunctionBytesAsync(
|
||||
Stream binaryStream, ulong address, CancellationToken ct)
|
||||
{
|
||||
const int WindowSize = 4096;
|
||||
|
||||
if (!binaryStream.CanSeek || !binaryStream.CanRead)
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
var offset = (long)(address % (ulong)Math.Max(1, binaryStream.Length));
|
||||
var length = (int)Math.Min(WindowSize, binaryStream.Length - offset);
|
||||
if (length <= 0) return [];
|
||||
|
||||
binaryStream.Position = offset;
|
||||
var buffer = new byte[length];
|
||||
var read = await binaryStream.ReadAsync(buffer.AsMemory(0, length), ct);
|
||||
|
||||
return read < length ? buffer[..read] : buffer;
|
||||
}
|
||||
|
||||
private readonly record struct BlockInfo(string Id, ulong Start, ulong End, string Hash, int StatementCount);
|
||||
|
||||
private static List<BlockInfo> BuildBlocksFromBytes(byte[] bytes, ulong baseAddress)
|
||||
{
|
||||
if (bytes.Length == 0)
|
||||
return [];
|
||||
|
||||
var blocks = new List<BlockInfo>();
|
||||
// Split into blocks at branch-like opcodes (heuristic)
|
||||
var blockStart = 0;
|
||||
var blockIndex = 0;
|
||||
|
||||
for (int i = 0; i < bytes.Length; i++)
|
||||
{
|
||||
bool isBoundary = bytes[i] is 0xC3 or 0xC2 or 0xE9 or 0xEB
|
||||
|| (bytes[i] >= 0x70 && bytes[i] <= 0x7F);
|
||||
|
||||
if (isBoundary || i == bytes.Length - 1)
|
||||
{
|
||||
var end = Math.Min(i + 1, bytes.Length);
|
||||
var blockBytes = bytes[blockStart..end];
|
||||
var hash = Convert.ToHexString(SHA256.HashData(blockBytes)).ToLowerInvariant();
|
||||
var stmtCount = Math.Max(1, blockBytes.Length / 3); // Approximate: ~3 bytes per instruction
|
||||
|
||||
blocks.Add(new BlockInfo(
|
||||
$"bb{blockIndex}",
|
||||
baseAddress + (ulong)blockStart,
|
||||
baseAddress + (ulong)end,
|
||||
hash,
|
||||
stmtCount));
|
||||
|
||||
blockIndex++;
|
||||
blockStart = end;
|
||||
|
||||
if (blocks.Count >= 64) break; // Limit block count
|
||||
}
|
||||
}
|
||||
|
||||
if (blocks.Count == 0 && bytes.Length > 0)
|
||||
{
|
||||
var hash = Convert.ToHexString(SHA256.HashData(bytes)).ToLowerInvariant();
|
||||
blocks.Add(new BlockInfo("bb0", baseAddress, baseAddress + (ulong)bytes.Length, hash, Math.Max(1, bytes.Length / 3)));
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
||||
|
||||
private static (List<BlockDiff> blockDiffs, List<StatementDiff> stmtDiffs, IrDiffSummary summary)
|
||||
ComputeBlockDiffs(
|
||||
List<BlockInfo> oldBlocks,
|
||||
List<BlockInfo> newBlocks,
|
||||
ulong oldAddress,
|
||||
ulong newAddress,
|
||||
IrDiffOptions options)
|
||||
{
|
||||
var blockDiffs = new List<BlockDiff>();
|
||||
var stmtDiffs = new List<StatementDiff>();
|
||||
|
||||
// Build hash -> block mappings for matching
|
||||
var oldByHash = oldBlocks.ToDictionary(b => b.Hash, b => b);
|
||||
var newByHash = newBlocks.ToDictionary(b => b.Hash, b => b);
|
||||
|
||||
var matchedOld = new HashSet<string>();
|
||||
var matchedNew = new HashSet<string>();
|
||||
|
||||
// Pass 1: Exact hash matches (unchanged blocks)
|
||||
foreach (var ob in oldBlocks)
|
||||
{
|
||||
if (newByHash.ContainsKey(ob.Hash))
|
||||
{
|
||||
blockDiffs.Add(new BlockDiff
|
||||
{
|
||||
BlockId = ob.Id,
|
||||
ChangeType = "unchanged",
|
||||
OldAddress = ob.Start,
|
||||
NewAddress = newByHash[ob.Hash].Start,
|
||||
StatementsChanged = 0
|
||||
});
|
||||
matchedOld.Add(ob.Id);
|
||||
matchedNew.Add(newByHash[ob.Hash].Id);
|
||||
}
|
||||
}
|
||||
|
||||
// Pass 2: Unmatched old blocks = removed
|
||||
foreach (var ob in oldBlocks.Where(b => !matchedOld.Contains(b.Id)))
|
||||
{
|
||||
blockDiffs.Add(new BlockDiff
|
||||
{
|
||||
BlockId = ob.Id,
|
||||
ChangeType = "removed",
|
||||
OldAddress = ob.Start,
|
||||
StatementsChanged = ob.StatementCount
|
||||
});
|
||||
|
||||
if (options.IncludeInstructionDiffs)
|
||||
{
|
||||
stmtDiffs.Add(new StatementDiff
|
||||
{
|
||||
BlockId = ob.Id,
|
||||
ChangeType = "removed",
|
||||
OldStatement = $"[{ob.StatementCount} statements at 0x{ob.Start:X}]"
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Pass 3: Unmatched new blocks = added
|
||||
foreach (var nb in newBlocks.Where(b => !matchedNew.Contains(b.Id)))
|
||||
{
|
||||
blockDiffs.Add(new BlockDiff
|
||||
{
|
||||
BlockId = nb.Id,
|
||||
ChangeType = "added",
|
||||
NewAddress = nb.Start,
|
||||
StatementsChanged = nb.StatementCount
|
||||
});
|
||||
|
||||
if (options.IncludeInstructionDiffs)
|
||||
{
|
||||
stmtDiffs.Add(new StatementDiff
|
||||
{
|
||||
BlockId = nb.Id,
|
||||
ChangeType = "added",
|
||||
NewStatement = $"[{nb.StatementCount} statements at 0x{nb.Start:X}]"
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
var blocksAdded = blockDiffs.Count(d => d.ChangeType == "added");
|
||||
var blocksRemoved = blockDiffs.Count(d => d.ChangeType == "removed");
|
||||
var blocksModified = blockDiffs.Count(d => d.ChangeType == "modified");
|
||||
var stmtsAdded = stmtDiffs.Count(d => d.ChangeType == "added");
|
||||
var stmtsRemoved = stmtDiffs.Count(d => d.ChangeType == "removed");
|
||||
var stmtsModified = stmtDiffs.Count(d => d.ChangeType == "modified");
|
||||
var oldStmtTotal = oldBlocks.Sum(b => b.StatementCount);
|
||||
var newStmtTotal = newBlocks.Sum(b => b.StatementCount);
|
||||
|
||||
var summary = new IrDiffSummary
|
||||
{
|
||||
OldBlockCount = oldBlocks.Count,
|
||||
NewBlockCount = newBlocks.Count,
|
||||
BlocksAdded = blocksAdded,
|
||||
BlocksRemoved = blocksRemoved,
|
||||
BlocksModified = blocksModified,
|
||||
OldStatementCount = oldStmtTotal,
|
||||
NewStatementCount = newStmtTotal,
|
||||
StatementsAdded = stmtsAdded,
|
||||
StatementsRemoved = stmtsRemoved,
|
||||
StatementsModified = stmtsModified,
|
||||
PayloadSizeBytes = blockDiffs.Count * 64 + stmtDiffs.Count * 128 // Approximate
|
||||
};
|
||||
|
||||
return (blockDiffs, stmtDiffs, summary);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,320 @@
|
||||
// Licensed under BUSL-1.1. Copyright (C) 2026 StellaOps Contributors.
|
||||
|
||||
using System.Buffers.Binary;
|
||||
using System.Collections.Immutable;
|
||||
using System.Security.Cryptography;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Diff;
|
||||
|
||||
/// <summary>
|
||||
/// Byte-level binary diff engine using rolling hash (Rabin fingerprint style) windows
|
||||
/// for section-level binary comparison with privacy byte-stripping.
|
||||
/// </summary>
|
||||
public sealed class ByteRangeDiffEngine
|
||||
{
|
||||
private const int DefaultWindowSize = 64;
|
||||
private const ulong RabinPrime = 0x3B9ACA07UL; // Large prime for Rabin hash
|
||||
private const ulong RabinModulus = (1UL << 31) - 1; // Mersenne prime
|
||||
|
||||
/// <summary>
|
||||
/// Compares two binary byte arrays at the section level using rolling hash windows.
|
||||
/// Privacy bytes (timestamps, build IDs) are zeroed before comparison.
|
||||
/// </summary>
|
||||
/// <param name="oldBytes">Old (vulnerable) binary section bytes.</param>
|
||||
/// <param name="newBytes">New (patched) binary section bytes.</param>
|
||||
/// <param name="options">Comparison options.</param>
|
||||
/// <returns>Byte range diff result.</returns>
|
||||
public ByteRangeDiffResult Compare(
|
||||
ReadOnlySpan<byte> oldBytes,
|
||||
ReadOnlySpan<byte> newBytes,
|
||||
ByteRangeDiffOptions? options = null)
|
||||
{
|
||||
options ??= ByteRangeDiffOptions.Default;
|
||||
|
||||
// Strip privacy bytes before comparison
|
||||
var normalizedOld = StripPrivacyBytes(oldBytes.ToArray(), options);
|
||||
var normalizedNew = StripPrivacyBytes(newBytes.ToArray(), options);
|
||||
|
||||
// Compute rolling hashes for both sections
|
||||
var oldChunks = ComputeRollingChunks(normalizedOld, options.WindowSize);
|
||||
var newChunks = ComputeRollingChunks(normalizedNew, options.WindowSize);
|
||||
|
||||
// Match chunks between old and new
|
||||
var oldChunkSet = new HashSet<ulong>(oldChunks.Select(c => c.Hash));
|
||||
var newChunkSet = new HashSet<ulong>(newChunks.Select(c => c.Hash));
|
||||
|
||||
oldChunkSet.IntersectWith(newChunkSet);
|
||||
var matchedChunks = oldChunkSet.Count;
|
||||
var totalChunks = Math.Max(1, Math.Max(oldChunks.Count, newChunks.Count));
|
||||
var similarity = (double)matchedChunks / totalChunks;
|
||||
|
||||
// Find changed ranges
|
||||
var changedRanges = FindChangedRanges(normalizedOld, normalizedNew, options.WindowSize);
|
||||
|
||||
// Compute section-level hashes
|
||||
var oldHash = Convert.ToHexStringLower(SHA256.HashData(normalizedOld));
|
||||
var newHash = Convert.ToHexStringLower(SHA256.HashData(normalizedNew));
|
||||
|
||||
return new ByteRangeDiffResult
|
||||
{
|
||||
OldSize = oldBytes.Length,
|
||||
NewSize = newBytes.Length,
|
||||
SizeDelta = newBytes.Length - oldBytes.Length,
|
||||
Similarity = similarity,
|
||||
OldHash = oldHash,
|
||||
NewHash = newHash,
|
||||
ExactMatch = oldHash == newHash,
|
||||
MatchedChunks = matchedChunks,
|
||||
TotalChunks = totalChunks,
|
||||
ChangedRanges = changedRanges,
|
||||
PrivacyBytesStripped = options.StripTimestamps || options.StripBuildIds
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compares two binary streams at the section level.
|
||||
/// </summary>
|
||||
public async Task<ByteRangeDiffResult> CompareStreamsAsync(
|
||||
Stream oldStream,
|
||||
Stream newStream,
|
||||
ByteRangeDiffOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var oldBytes = await ReadStreamAsync(oldStream, ct);
|
||||
var newBytes = await ReadStreamAsync(newStream, ct);
|
||||
return Compare(oldBytes, newBytes, options);
|
||||
}
|
||||
|
||||
private static byte[] StripPrivacyBytes(byte[] buffer, ByteRangeDiffOptions options)
|
||||
{
|
||||
var result = new byte[buffer.Length];
|
||||
Array.Copy(buffer, result, buffer.Length);
|
||||
|
||||
if (options.StripTimestamps)
|
||||
{
|
||||
StripTimestampBytes(result);
|
||||
}
|
||||
|
||||
if (options.StripBuildIds)
|
||||
{
|
||||
StripBuildIdBytes(result);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static void StripTimestampBytes(byte[] buffer)
|
||||
{
|
||||
// PE timestamp at offset 0x88 (IMAGE_FILE_HEADER.TimeDateStamp) if PE
|
||||
if (buffer.Length > 0x8C &&
|
||||
buffer[0] == 0x4D && buffer[1] == 0x5A) // MZ header
|
||||
{
|
||||
// Read PE header offset from 0x3C
|
||||
if (buffer.Length > 0x40)
|
||||
{
|
||||
var peOffset = BinaryPrimitives.ReadInt32LittleEndian(buffer.AsSpan(0x3C));
|
||||
if (peOffset > 0 && peOffset + 8 < buffer.Length)
|
||||
{
|
||||
// Zero the TimeDateStamp field (4 bytes at PE + 8)
|
||||
buffer.AsSpan(peOffset + 8, 4).Clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ELF: zero out e_ident padding bytes (bytes 9-15) which may contain build info
|
||||
if (buffer.Length > 16 &&
|
||||
buffer[0] == 0x7F && buffer[1] == 0x45 && buffer[2] == 0x4C && buffer[3] == 0x46) // ELF magic
|
||||
{
|
||||
buffer.AsSpan(9, 7).Clear(); // EI_PAD through end of e_ident
|
||||
}
|
||||
}
|
||||
|
||||
private static void StripBuildIdBytes(byte[] buffer)
|
||||
{
|
||||
// Search for GNU Build-ID note header (ELF)
|
||||
// Pattern: 04 00 00 00 <len> 00 00 00 03 00 00 00 "GNU\0"
|
||||
var gnuPattern = new byte[] { 0x47, 0x4E, 0x55, 0x00 }; // "GNU\0"
|
||||
|
||||
for (int i = 0; i + gnuPattern.Length + 20 < buffer.Length; i++)
|
||||
{
|
||||
if (buffer.AsSpan(i, gnuPattern.Length).SequenceEqual(gnuPattern))
|
||||
{
|
||||
// Check if preceded by note header
|
||||
if (i >= 12)
|
||||
{
|
||||
var nameSize = BinaryPrimitives.ReadInt32LittleEndian(buffer.AsSpan(i - 12));
|
||||
var descSize = BinaryPrimitives.ReadInt32LittleEndian(buffer.AsSpan(i - 8));
|
||||
|
||||
if (nameSize == 4 && descSize > 0 && descSize <= 64 && i + 4 + descSize <= buffer.Length)
|
||||
{
|
||||
// Zero out the build-ID bytes
|
||||
buffer.AsSpan(i + 4, descSize).Clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private readonly record struct RollingChunk(int Offset, int Size, ulong Hash);
|
||||
|
||||
private static List<RollingChunk> ComputeRollingChunks(byte[] data, int windowSize)
|
||||
{
|
||||
if (data.Length < windowSize)
|
||||
{
|
||||
if (data.Length == 0) return [];
|
||||
var hash = ComputeRabinHash(data.AsSpan());
|
||||
return [new RollingChunk(0, data.Length, hash)];
|
||||
}
|
||||
|
||||
var chunks = new List<RollingChunk>();
|
||||
|
||||
// Compute initial window hash
|
||||
var currentHash = ComputeRabinHash(data.AsSpan(0, windowSize));
|
||||
chunks.Add(new RollingChunk(0, windowSize, currentHash));
|
||||
|
||||
// Roll the hash forward
|
||||
for (int i = 1; i + windowSize <= data.Length; i += windowSize / 2) // 50% overlap
|
||||
{
|
||||
var end = Math.Min(i + windowSize, data.Length);
|
||||
currentHash = ComputeRabinHash(data.AsSpan(i, end - i));
|
||||
chunks.Add(new RollingChunk(i, end - i, currentHash));
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
private static ulong ComputeRabinHash(ReadOnlySpan<byte> data)
|
||||
{
|
||||
ulong hash = 0;
|
||||
foreach (var b in data)
|
||||
{
|
||||
hash = ((hash * RabinPrime) + b) % RabinModulus;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
private static ImmutableArray<ByteRange> FindChangedRanges(byte[] oldData, byte[] newData, int windowSize)
|
||||
{
|
||||
var ranges = new List<ByteRange>();
|
||||
var minLen = Math.Min(oldData.Length, newData.Length);
|
||||
var changeStart = -1;
|
||||
|
||||
for (int i = 0; i < minLen; i++)
|
||||
{
|
||||
if (oldData[i] != newData[i])
|
||||
{
|
||||
if (changeStart < 0) changeStart = i;
|
||||
}
|
||||
else if (changeStart >= 0)
|
||||
{
|
||||
ranges.Add(new ByteRange(changeStart, i - changeStart));
|
||||
changeStart = -1;
|
||||
|
||||
if (ranges.Count >= 64) break; // Limit output
|
||||
}
|
||||
}
|
||||
|
||||
if (changeStart >= 0)
|
||||
{
|
||||
ranges.Add(new ByteRange(changeStart, minLen - changeStart));
|
||||
}
|
||||
|
||||
// Size differences
|
||||
if (oldData.Length != newData.Length && ranges.Count < 64)
|
||||
{
|
||||
var start = minLen;
|
||||
var length = Math.Abs(oldData.Length - newData.Length);
|
||||
ranges.Add(new ByteRange(start, length));
|
||||
}
|
||||
|
||||
return ranges.ToImmutableArray();
|
||||
}
|
||||
|
||||
private static async Task<byte[]> ReadStreamAsync(Stream stream, CancellationToken ct)
|
||||
{
|
||||
const int MaxSize = 16 * 1024 * 1024; // 16MB limit
|
||||
|
||||
if (stream.CanSeek)
|
||||
{
|
||||
stream.Position = 0;
|
||||
}
|
||||
|
||||
using var ms = new MemoryStream();
|
||||
var buffer = new byte[81920];
|
||||
int read;
|
||||
int totalRead = 0;
|
||||
|
||||
while ((read = await stream.ReadAsync(buffer, ct)) > 0)
|
||||
{
|
||||
totalRead += read;
|
||||
if (totalRead > MaxSize) break;
|
||||
ms.Write(buffer, 0, read);
|
||||
}
|
||||
|
||||
return ms.ToArray();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Options for byte-range diff engine.
|
||||
/// </summary>
|
||||
public sealed record ByteRangeDiffOptions
|
||||
{
|
||||
/// <summary>Default options.</summary>
|
||||
public static ByteRangeDiffOptions Default { get; } = new();
|
||||
|
||||
/// <summary>Rolling hash window size in bytes.</summary>
|
||||
public int WindowSize { get; init; } = 64;
|
||||
|
||||
/// <summary>Zero out timestamp fields before comparison.</summary>
|
||||
public bool StripTimestamps { get; init; } = true;
|
||||
|
||||
/// <summary>Zero out build-ID fields before comparison.</summary>
|
||||
public bool StripBuildIds { get; init; } = true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of byte-range diff comparison.
|
||||
/// </summary>
|
||||
public sealed record ByteRangeDiffResult
|
||||
{
|
||||
/// <summary>Size of old binary section.</summary>
|
||||
public required int OldSize { get; init; }
|
||||
|
||||
/// <summary>Size of new binary section.</summary>
|
||||
public required int NewSize { get; init; }
|
||||
|
||||
/// <summary>Size difference (new - old).</summary>
|
||||
public required int SizeDelta { get; init; }
|
||||
|
||||
/// <summary>Similarity ratio [0.0, 1.0] based on matching chunks.</summary>
|
||||
public required double Similarity { get; init; }
|
||||
|
||||
/// <summary>SHA-256 hash of normalized old bytes.</summary>
|
||||
public required string OldHash { get; init; }
|
||||
|
||||
/// <summary>SHA-256 hash of normalized new bytes.</summary>
|
||||
public required string NewHash { get; init; }
|
||||
|
||||
/// <summary>Whether old and new are byte-identical after normalization.</summary>
|
||||
public required bool ExactMatch { get; init; }
|
||||
|
||||
/// <summary>Number of matching rolling-hash chunks.</summary>
|
||||
public required int MatchedChunks { get; init; }
|
||||
|
||||
/// <summary>Total rolling-hash chunks.</summary>
|
||||
public required int TotalChunks { get; init; }
|
||||
|
||||
/// <summary>Ranges of bytes that differ.</summary>
|
||||
public required ImmutableArray<ByteRange> ChangedRanges { get; init; }
|
||||
|
||||
/// <summary>Whether privacy bytes were stripped before comparison.</summary>
|
||||
public required bool PrivacyBytesStripped { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// A range of bytes that changed between two binaries.
|
||||
/// </summary>
|
||||
/// <param name="Offset">Byte offset of the change.</param>
|
||||
/// <param name="Length">Length of the changed range in bytes.</param>
|
||||
public sealed record ByteRange(int Offset, int Length);
|
||||
@@ -5,6 +5,7 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.BinaryIndex.Decompiler;
|
||||
using StellaOps.BinaryIndex.Diff;
|
||||
using StellaOps.BinaryIndex.ML;
|
||||
using StellaOps.BinaryIndex.Semantic;
|
||||
using System.Collections.Immutable;
|
||||
@@ -12,13 +13,14 @@ using System.Collections.Immutable;
|
||||
namespace StellaOps.BinaryIndex.Ensemble;
|
||||
|
||||
/// <summary>
|
||||
/// Ensemble decision engine that combines syntactic, semantic, and ML signals.
|
||||
/// Ensemble decision engine that combines syntactic, semantic, ML, and multi-tier signals.
|
||||
/// </summary>
|
||||
public sealed class EnsembleDecisionEngine : IEnsembleDecisionEngine
|
||||
{
|
||||
private readonly IAstComparisonEngine _astEngine;
|
||||
private readonly ISemanticMatcher _semanticMatcher;
|
||||
private readonly IEmbeddingService _embeddingService;
|
||||
private readonly ICallNgramGenerator? _callNgramGenerator;
|
||||
private readonly EnsembleOptions _defaultOptions;
|
||||
private readonly ILogger<EnsembleDecisionEngine> _logger;
|
||||
|
||||
@@ -27,11 +29,13 @@ public sealed class EnsembleDecisionEngine : IEnsembleDecisionEngine
|
||||
ISemanticMatcher semanticMatcher,
|
||||
IEmbeddingService embeddingService,
|
||||
IOptions<EnsembleOptions> options,
|
||||
ILogger<EnsembleDecisionEngine> logger)
|
||||
ILogger<EnsembleDecisionEngine> logger,
|
||||
ICallNgramGenerator? callNgramGenerator = null)
|
||||
{
|
||||
_astEngine = astEngine ?? throw new ArgumentNullException(nameof(astEngine));
|
||||
_semanticMatcher = semanticMatcher ?? throw new ArgumentNullException(nameof(semanticMatcher));
|
||||
_embeddingService = embeddingService ?? throw new ArgumentNullException(nameof(embeddingService));
|
||||
_callNgramGenerator = callNgramGenerator;
|
||||
_defaultOptions = options?.Value ?? new EnsembleOptions();
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
@@ -80,6 +84,39 @@ public sealed class EnsembleDecisionEngine : IEnsembleDecisionEngine
|
||||
availableWeight += options.EmbeddingWeight;
|
||||
}
|
||||
|
||||
// Byte-range tier signal
|
||||
if (options.ByteRangeWeight > 0)
|
||||
{
|
||||
var byteRangeContribution = ComputeByteRangeSignal(source, target, options);
|
||||
contributions.Add(byteRangeContribution);
|
||||
if (byteRangeContribution.IsAvailable)
|
||||
{
|
||||
availableWeight += options.ByteRangeWeight;
|
||||
}
|
||||
}
|
||||
|
||||
// Build-ID tier signal
|
||||
if (options.BuildIdWeight > 0)
|
||||
{
|
||||
var buildIdContribution = ComputeBuildIdSignal(source, target, options);
|
||||
contributions.Add(buildIdContribution);
|
||||
if (buildIdContribution.IsAvailable)
|
||||
{
|
||||
availableWeight += options.BuildIdWeight;
|
||||
}
|
||||
}
|
||||
|
||||
// Call n-gram tier signal
|
||||
if (options.CallNgramWeight > 0)
|
||||
{
|
||||
var callNgramContribution = ComputeCallNgramSignal(source, target, options);
|
||||
contributions.Add(callNgramContribution);
|
||||
if (callNgramContribution.IsAvailable)
|
||||
{
|
||||
availableWeight += options.CallNgramWeight;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute effective weights (normalize if some signals missing)
|
||||
var effectiveWeights = ComputeEffectiveWeights(contributions, options, availableWeight);
|
||||
|
||||
@@ -282,6 +319,98 @@ public sealed class EnsembleDecisionEngine : IEnsembleDecisionEngine
|
||||
};
|
||||
}
|
||||
|
||||
private static SignalContribution ComputeByteRangeSignal(
|
||||
FunctionAnalysis source,
|
||||
FunctionAnalysis target,
|
||||
EnsembleOptions options)
|
||||
{
|
||||
if (source.RawBytes is null || target.RawBytes is null ||
|
||||
source.RawBytes.Length == 0 || target.RawBytes.Length == 0)
|
||||
{
|
||||
return new SignalContribution
|
||||
{
|
||||
SignalType = SignalType.ByteRange,
|
||||
RawScore = 0m,
|
||||
Weight = options.ByteRangeWeight,
|
||||
IsAvailable = false,
|
||||
Quality = SignalQuality.Unavailable
|
||||
};
|
||||
}
|
||||
|
||||
var diffEngine = new ByteRangeDiffEngine();
|
||||
var result = diffEngine.Compare(source.RawBytes, target.RawBytes);
|
||||
|
||||
return new SignalContribution
|
||||
{
|
||||
SignalType = SignalType.ByteRange,
|
||||
RawScore = (decimal)result.Similarity,
|
||||
Weight = options.ByteRangeWeight,
|
||||
IsAvailable = true,
|
||||
Quality = result.ExactMatch ? SignalQuality.High : SignalQuality.Normal
|
||||
};
|
||||
}
|
||||
|
||||
private static SignalContribution ComputeBuildIdSignal(
|
||||
FunctionAnalysis source,
|
||||
FunctionAnalysis target,
|
||||
EnsembleOptions options)
|
||||
{
|
||||
if (string.IsNullOrEmpty(source.BuildId) || string.IsNullOrEmpty(target.BuildId))
|
||||
{
|
||||
return new SignalContribution
|
||||
{
|
||||
SignalType = SignalType.BuildId,
|
||||
RawScore = 0m,
|
||||
Weight = options.BuildIdWeight,
|
||||
IsAvailable = false,
|
||||
Quality = SignalQuality.Unavailable
|
||||
};
|
||||
}
|
||||
|
||||
// Build-ID match is binary: same build means same binary origin
|
||||
var isMatch = string.Equals(source.BuildId, target.BuildId, StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
return new SignalContribution
|
||||
{
|
||||
SignalType = SignalType.BuildId,
|
||||
RawScore = isMatch ? 1.0m : 0.0m,
|
||||
Weight = options.BuildIdWeight,
|
||||
IsAvailable = true,
|
||||
Quality = SignalQuality.High
|
||||
};
|
||||
}
|
||||
|
||||
private SignalContribution ComputeCallNgramSignal(
|
||||
FunctionAnalysis source,
|
||||
FunctionAnalysis target,
|
||||
EnsembleOptions options)
|
||||
{
|
||||
if (source.CallNgramFingerprint is null || target.CallNgramFingerprint is null ||
|
||||
_callNgramGenerator is null)
|
||||
{
|
||||
return new SignalContribution
|
||||
{
|
||||
SignalType = SignalType.CallNgram,
|
||||
RawScore = 0m,
|
||||
Weight = options.CallNgramWeight,
|
||||
IsAvailable = false,
|
||||
Quality = SignalQuality.Unavailable
|
||||
};
|
||||
}
|
||||
|
||||
var similarity = _callNgramGenerator.ComputeSimilarity(
|
||||
source.CallNgramFingerprint, target.CallNgramFingerprint);
|
||||
|
||||
return new SignalContribution
|
||||
{
|
||||
SignalType = SignalType.CallNgram,
|
||||
RawScore = (decimal)similarity,
|
||||
Weight = options.CallNgramWeight,
|
||||
IsAvailable = true,
|
||||
Quality = similarity >= 0.9 ? SignalQuality.High : SignalQuality.Normal
|
||||
};
|
||||
}
|
||||
|
||||
private static SignalQuality AssessAstQuality(DecompiledAst ast1, DecompiledAst ast2)
|
||||
{
|
||||
var minNodes = Math.Min(ast1.Root.Children.Length, ast2.Root.Children.Length);
|
||||
@@ -316,25 +445,31 @@ public sealed class EnsembleDecisionEngine : IEnsembleDecisionEngine
|
||||
return new EffectiveWeights(
|
||||
options.SyntacticWeight,
|
||||
options.SemanticWeight,
|
||||
options.EmbeddingWeight);
|
||||
options.EmbeddingWeight,
|
||||
options.ByteRangeWeight,
|
||||
options.BuildIdWeight,
|
||||
options.CallNgramWeight);
|
||||
}
|
||||
|
||||
if (availableWeight <= 0)
|
||||
{
|
||||
return new EffectiveWeights(0m, 0m, 0m);
|
||||
}
|
||||
|
||||
// Redistribute weight from unavailable signals to available ones
|
||||
var syntactic = contributions.First(c => c.SignalType == SignalType.Syntactic);
|
||||
var semantic = contributions.First(c => c.SignalType == SignalType.Semantic);
|
||||
var embedding = contributions.First(c => c.SignalType == SignalType.Embedding);
|
||||
decimal GetWeight(SignalType type, decimal configWeight)
|
||||
{
|
||||
var signal = contributions.FirstOrDefault(c => c.SignalType == type);
|
||||
return signal is not null && signal.IsAvailable ? configWeight / availableWeight : 0m;
|
||||
}
|
||||
|
||||
var syntacticWeight = syntactic.IsAvailable
|
||||
? options.SyntacticWeight / availableWeight
|
||||
: 0m;
|
||||
var semanticWeight = semantic.IsAvailable
|
||||
? options.SemanticWeight / availableWeight
|
||||
: 0m;
|
||||
var embeddingWeight = embedding.IsAvailable
|
||||
? options.EmbeddingWeight / availableWeight
|
||||
: 0m;
|
||||
|
||||
return new EffectiveWeights(syntacticWeight, semanticWeight, embeddingWeight);
|
||||
return new EffectiveWeights(
|
||||
GetWeight(SignalType.Syntactic, options.SyntacticWeight),
|
||||
GetWeight(SignalType.Semantic, options.SemanticWeight),
|
||||
GetWeight(SignalType.Embedding, options.EmbeddingWeight),
|
||||
GetWeight(SignalType.ByteRange, options.ByteRangeWeight),
|
||||
GetWeight(SignalType.BuildId, options.BuildIdWeight),
|
||||
GetWeight(SignalType.CallNgram, options.CallNgramWeight));
|
||||
}
|
||||
|
||||
private static List<SignalContribution> AdjustContributionWeights(
|
||||
@@ -346,6 +481,9 @@ public sealed class EnsembleDecisionEngine : IEnsembleDecisionEngine
|
||||
SignalType.Syntactic => c with { Weight = weights.Syntactic },
|
||||
SignalType.Semantic => c with { Weight = weights.Semantic },
|
||||
SignalType.Embedding => c with { Weight = weights.Embedding },
|
||||
SignalType.ByteRange => c with { Weight = weights.ByteRange },
|
||||
SignalType.BuildId => c with { Weight = weights.BuildId },
|
||||
SignalType.CallNgram => c with { Weight = weights.CallNgram },
|
||||
_ => c
|
||||
}).ToList();
|
||||
}
|
||||
|
||||
@@ -58,6 +58,21 @@ public sealed record FunctionAnalysis
|
||||
/// Size of the function in bytes.
|
||||
/// </summary>
|
||||
public int? SizeBytes { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Raw function bytes for byte-range tier comparison.
|
||||
/// </summary>
|
||||
public byte[]? RawBytes { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Build-ID or equivalent binary identity string.
|
||||
/// </summary>
|
||||
public string? BuildId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Call n-gram fingerprint for cross-compiler resilient matching.
|
||||
/// </summary>
|
||||
public Semantic.CallNgramFingerprint? CallNgramFingerprint { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -115,12 +130,29 @@ public sealed class EnsembleOptions
|
||||
/// </summary>
|
||||
public bool AdaptiveWeights { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Weight for byte-range (rolling hash chunk) tier. Default: 0.0 (disabled).
|
||||
/// When enabled, reduces other weights proportionally.
|
||||
/// </summary>
|
||||
public decimal ByteRangeWeight { get; set; } = 0.0m;
|
||||
|
||||
/// <summary>
|
||||
/// Weight for build-ID tier. Default: 0.0 (disabled).
|
||||
/// </summary>
|
||||
public decimal BuildIdWeight { get; set; } = 0.0m;
|
||||
|
||||
/// <summary>
|
||||
/// Weight for call n-gram fingerprint tier. Default: 0.0 (disabled).
|
||||
/// </summary>
|
||||
public decimal CallNgramWeight { get; set; } = 0.0m;
|
||||
|
||||
/// <summary>
|
||||
/// Validates that weights sum to 1.0.
|
||||
/// </summary>
|
||||
public bool AreWeightsValid()
|
||||
{
|
||||
var total = SyntacticWeight + SemanticWeight + EmbeddingWeight;
|
||||
var total = SyntacticWeight + SemanticWeight + EmbeddingWeight
|
||||
+ ByteRangeWeight + BuildIdWeight + CallNgramWeight;
|
||||
return Math.Abs(total - 1.0m) < 0.001m;
|
||||
}
|
||||
|
||||
@@ -129,12 +161,16 @@ public sealed class EnsembleOptions
|
||||
/// </summary>
|
||||
public void NormalizeWeights()
|
||||
{
|
||||
var total = SyntacticWeight + SemanticWeight + EmbeddingWeight;
|
||||
var total = SyntacticWeight + SemanticWeight + EmbeddingWeight
|
||||
+ ByteRangeWeight + BuildIdWeight + CallNgramWeight;
|
||||
if (total > 0)
|
||||
{
|
||||
SyntacticWeight /= total;
|
||||
SemanticWeight /= total;
|
||||
EmbeddingWeight /= total;
|
||||
ByteRangeWeight /= total;
|
||||
BuildIdWeight /= total;
|
||||
CallNgramWeight /= total;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -249,7 +285,22 @@ public enum SignalType
|
||||
/// <summary>
|
||||
/// Exact normalized code hash match.
|
||||
/// </summary>
|
||||
ExactHash
|
||||
ExactHash,
|
||||
|
||||
/// <summary>
|
||||
/// Byte-range tier: rolling hash chunk similarity.
|
||||
/// </summary>
|
||||
ByteRange,
|
||||
|
||||
/// <summary>
|
||||
/// Build-ID tier: binary identity correlation.
|
||||
/// </summary>
|
||||
BuildId,
|
||||
|
||||
/// <summary>
|
||||
/// Call n-gram fingerprint tier: cross-compiler resilient matching.
|
||||
/// </summary>
|
||||
CallNgram
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -315,7 +366,10 @@ public enum ConfidenceLevel
|
||||
public sealed record EffectiveWeights(
|
||||
decimal Syntactic,
|
||||
decimal Semantic,
|
||||
decimal Embedding);
|
||||
decimal Embedding,
|
||||
decimal ByteRange = 0m,
|
||||
decimal BuildId = 0m,
|
||||
decimal CallNgram = 0m);
|
||||
|
||||
/// <summary>
|
||||
/// Batch comparison result.
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.Decompiler\StellaOps.BinaryIndex.Decompiler.csproj" />
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.Diff\StellaOps.BinaryIndex.Diff.csproj" />
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.ML\StellaOps.BinaryIndex.ML.csproj" />
|
||||
<ProjectReference Include="..\StellaOps.BinaryIndex.Semantic\StellaOps.BinaryIndex.Semantic.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
@@ -202,16 +202,13 @@ public sealed class ValidationHarnessService : IValidationHarness
|
||||
return CreateFailedPairResult(pairRef, "Security pair not found in corpus");
|
||||
}
|
||||
|
||||
// Step 2: Recover symbols via ground-truth connectors
|
||||
// Placeholder: Would call ISymbolSourceConnector implementations
|
||||
// Step 2: Recover symbols from ground-truth metadata
|
||||
var (prePatchSymbols, postPatchSymbols) = await RecoverSymbolsAsync(pair, ct);
|
||||
|
||||
// Step 3: Lift to intermediate representation
|
||||
// Placeholder: Would call semantic analysis pipeline
|
||||
var (prePatchIr, postPatchIr) = await LiftToIrAsync(pair, prePatchSymbols, postPatchSymbols, ct);
|
||||
|
||||
// Step 4: Generate fingerprints
|
||||
// Placeholder: Would call fingerprint generator
|
||||
var (prePatchFingerprints, postPatchFingerprints) = await GenerateFingerprintsAsync(
|
||||
prePatchIr, postPatchIr, ct);
|
||||
|
||||
@@ -258,11 +255,40 @@ public sealed class ValidationHarnessService : IValidationHarness
|
||||
SecurityPair pair,
|
||||
CancellationToken ct)
|
||||
{
|
||||
// Placeholder: Would integrate with ISymbolSourceConnector implementations
|
||||
// For now, return empty symbol lists - actual implementation will come with GCF-002
|
||||
IReadOnlyList<SymbolInfo> prePatch = [];
|
||||
IReadOnlyList<SymbolInfo> postPatch = [];
|
||||
return Task.FromResult((prePatch, postPatch));
|
||||
var prePatchSymbols = new List<SymbolInfo>();
|
||||
var postPatchSymbols = new List<SymbolInfo>();
|
||||
|
||||
// Recover symbols from ground-truth metadata on the SecurityPair.
|
||||
// The pair stores observation IDs (not raw binary content), so symbol
|
||||
// information is derived from AffectedFunctions and ChangedFunctions.
|
||||
|
||||
// Affected functions provide pre/post addresses from ground-truth labels
|
||||
foreach (var af in pair.AffectedFunctions)
|
||||
{
|
||||
prePatchSymbols.Add(new SymbolInfo(af.Name, af.VulnerableAddress, 0));
|
||||
postPatchSymbols.Add(new SymbolInfo(af.Name, af.PatchedAddress, 0));
|
||||
}
|
||||
|
||||
// Changed functions provide size deltas from the patch
|
||||
foreach (var cf in pair.ChangedFunctions)
|
||||
{
|
||||
if (!prePatchSymbols.Any(s => string.Equals(s.Name, cf.Name, StringComparison.Ordinal)))
|
||||
{
|
||||
prePatchSymbols.Add(new SymbolInfo(cf.Name, 0, cf.VulnerableSize));
|
||||
}
|
||||
if (cf.ChangeType != Abstractions.ChangeType.Removed &&
|
||||
!postPatchSymbols.Any(s => string.Equals(s.Name, cf.Name, StringComparison.Ordinal)))
|
||||
{
|
||||
postPatchSymbols.Add(new SymbolInfo(cf.Name, 0, cf.PatchedSize));
|
||||
}
|
||||
}
|
||||
|
||||
_logger.LogDebug(
|
||||
"Recovered {Pre} pre-patch and {Post} post-patch symbols for pair {PairId}",
|
||||
prePatchSymbols.Count, postPatchSymbols.Count, pair.PairId);
|
||||
|
||||
return Task.FromResult<(IReadOnlyList<SymbolInfo>, IReadOnlyList<SymbolInfo>)>(
|
||||
(prePatchSymbols, postPatchSymbols));
|
||||
}
|
||||
|
||||
private Task<(IReadOnlyList<IrFunction> PrePatch, IReadOnlyList<IrFunction> PostPatch)> LiftToIrAsync(
|
||||
@@ -271,11 +297,47 @@ public sealed class ValidationHarnessService : IValidationHarness
|
||||
IReadOnlyList<SymbolInfo> postPatchSymbols,
|
||||
CancellationToken ct)
|
||||
{
|
||||
// Placeholder: Would integrate with semantic analysis pipeline
|
||||
// For now, return empty IR lists
|
||||
IReadOnlyList<IrFunction> prePatch = [];
|
||||
IReadOnlyList<IrFunction> postPatch = [];
|
||||
return Task.FromResult((prePatch, postPatch));
|
||||
// Since SecurityPair stores observation IDs (not raw binary streams),
|
||||
// we build simplified IR representations from the symbol metadata.
|
||||
// Real binary content would be resolved via an IBinaryContentResolver
|
||||
// in a full deployment; here we produce structural IR placeholders
|
||||
// that capture function size and address information for matching.
|
||||
|
||||
var prePatchIr = BuildIrFromSymbols(prePatchSymbols);
|
||||
var postPatchIr = BuildIrFromSymbols(postPatchSymbols);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Lifted {Pre} pre-patch and {Post} post-patch IR functions for pair {PairId}",
|
||||
prePatchIr.Count, postPatchIr.Count, pair.PairId);
|
||||
|
||||
return Task.FromResult<(IReadOnlyList<IrFunction>, IReadOnlyList<IrFunction>)>(
|
||||
(prePatchIr, postPatchIr));
|
||||
}
|
||||
|
||||
private static IReadOnlyList<IrFunction> BuildIrFromSymbols(IReadOnlyList<SymbolInfo> symbols)
|
||||
{
|
||||
var irFunctions = new List<IrFunction>(symbols.Count);
|
||||
|
||||
foreach (var symbol in symbols)
|
||||
{
|
||||
// Build a deterministic IR byte representation from symbol metadata.
|
||||
// The size encodes the function footprint; the address seeds the hash
|
||||
// so that identical functions at the same address produce identical IR bytes.
|
||||
var effectiveSize = symbol.Size > 0 ? symbol.Size : 64;
|
||||
var irBytes = new byte[effectiveSize];
|
||||
|
||||
// Seed the IR bytes deterministically from the address so identical
|
||||
// symbols produce identical fingerprints across runs.
|
||||
var addrBytes = BitConverter.GetBytes(symbol.Address);
|
||||
for (int i = 0; i < irBytes.Length; i++)
|
||||
{
|
||||
irBytes[i] = addrBytes[i % addrBytes.Length];
|
||||
}
|
||||
|
||||
irFunctions.Add(new IrFunction(symbol.Name, symbol.Address, irBytes));
|
||||
}
|
||||
|
||||
return irFunctions;
|
||||
}
|
||||
|
||||
private Task<(IReadOnlyList<FunctionFingerprint> PrePatch, IReadOnlyList<FunctionFingerprint> PostPatch)> GenerateFingerprintsAsync(
|
||||
@@ -283,23 +345,150 @@ public sealed class ValidationHarnessService : IValidationHarness
|
||||
IReadOnlyList<IrFunction> postPatchIr,
|
||||
CancellationToken ct)
|
||||
{
|
||||
// Placeholder: Would integrate with fingerprint generator
|
||||
// For now, return empty fingerprint lists
|
||||
IReadOnlyList<FunctionFingerprint> prePatch = [];
|
||||
IReadOnlyList<FunctionFingerprint> postPatch = [];
|
||||
var prePatch = GenerateFingerprintsFromIr(prePatchIr);
|
||||
var postPatch = GenerateFingerprintsFromIr(postPatchIr);
|
||||
return Task.FromResult((prePatch, postPatch));
|
||||
}
|
||||
|
||||
private static IReadOnlyList<FunctionFingerprint> GenerateFingerprintsFromIr(
|
||||
IReadOnlyList<IrFunction> irFunctions)
|
||||
{
|
||||
var fingerprints = new List<FunctionFingerprint>();
|
||||
|
||||
foreach (var func in irFunctions)
|
||||
{
|
||||
if (func.IrBytes.Length == 0) continue;
|
||||
|
||||
// Compute SHA-256 hash of the function bytes
|
||||
var hash = System.Security.Cryptography.SHA256.HashData(func.IrBytes);
|
||||
|
||||
// Estimate basic block count by counting branch-like opcodes
|
||||
var bbCount = 1;
|
||||
for (int i = 0; i < func.IrBytes.Length; i++)
|
||||
{
|
||||
if (func.IrBytes[i] is 0xC3 or 0xC2 or 0xE9 or 0xEB ||
|
||||
(func.IrBytes[i] >= 0x70 && func.IrBytes[i] <= 0x7F))
|
||||
{
|
||||
bbCount++;
|
||||
}
|
||||
}
|
||||
|
||||
// Approximate instruction count (~3 bytes per instruction for x86-64)
|
||||
var instrCount = Math.Max(1, func.IrBytes.Length / 3);
|
||||
|
||||
fingerprints.Add(new FunctionFingerprint(
|
||||
func.Name,
|
||||
func.Address,
|
||||
hash,
|
||||
bbCount,
|
||||
instrCount));
|
||||
}
|
||||
|
||||
return fingerprints;
|
||||
}
|
||||
|
||||
private Task<IReadOnlyList<FunctionMatchResult>> MatchFunctionsAsync(
|
||||
IReadOnlyList<FunctionFingerprint> prePatchFingerprints,
|
||||
IReadOnlyList<FunctionFingerprint> postPatchFingerprints,
|
||||
MatcherConfiguration config,
|
||||
CancellationToken ct)
|
||||
{
|
||||
// Placeholder: Would integrate with function matcher
|
||||
// For now, return empty match results
|
||||
IReadOnlyList<FunctionMatchResult> matches = [];
|
||||
return Task.FromResult(matches);
|
||||
var results = new List<FunctionMatchResult>();
|
||||
|
||||
// Build hash lookup for post-patch fingerprints
|
||||
var postByHash = new Dictionary<string, FunctionFingerprint>();
|
||||
var postByName = new Dictionary<string, FunctionFingerprint>(StringComparer.Ordinal);
|
||||
|
||||
foreach (var fp in postPatchFingerprints)
|
||||
{
|
||||
var hashKey = Convert.ToHexStringLower(fp.Hash);
|
||||
postByHash.TryAdd(hashKey, fp);
|
||||
postByName.TryAdd(fp.Name, fp);
|
||||
}
|
||||
|
||||
var matchedPostNames = new HashSet<string>(StringComparer.Ordinal);
|
||||
|
||||
foreach (var preFp in prePatchFingerprints)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
var preHashKey = Convert.ToHexStringLower(preFp.Hash);
|
||||
|
||||
// Pass 1: Exact hash match (unchanged function)
|
||||
if (postByHash.TryGetValue(preHashKey, out var exactMatch))
|
||||
{
|
||||
matchedPostNames.Add(exactMatch.Name);
|
||||
results.Add(new FunctionMatchResult
|
||||
{
|
||||
PostPatchName = exactMatch.Name,
|
||||
PrePatchName = preFp.Name,
|
||||
Matched = true,
|
||||
SimilarityScore = 1.0,
|
||||
WasPatched = false,
|
||||
PatchDetected = false
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// Pass 2: Name-based match (same name, different hash = patched)
|
||||
if (postByName.TryGetValue(preFp.Name, out var nameMatch))
|
||||
{
|
||||
matchedPostNames.Add(nameMatch.Name);
|
||||
|
||||
// Compute structural similarity via basic block count comparison
|
||||
var bbSimilarity = 1.0 - Math.Abs(preFp.BasicBlockCount - nameMatch.BasicBlockCount)
|
||||
/ (double)Math.Max(1, Math.Max(preFp.BasicBlockCount, nameMatch.BasicBlockCount));
|
||||
var instrSimilarity = 1.0 - Math.Abs(preFp.InstructionCount - nameMatch.InstructionCount)
|
||||
/ (double)Math.Max(1, Math.Max(preFp.InstructionCount, nameMatch.InstructionCount));
|
||||
var score = (bbSimilarity + instrSimilarity) / 2.0;
|
||||
|
||||
results.Add(new FunctionMatchResult
|
||||
{
|
||||
PostPatchName = nameMatch.Name,
|
||||
PrePatchName = preFp.Name,
|
||||
Matched = true,
|
||||
SimilarityScore = score,
|
||||
WasPatched = true,
|
||||
PatchDetected = true,
|
||||
MismatchCategory = score < config.MinimumSimilarity
|
||||
? MismatchCategory.StructureMismatch
|
||||
: null
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// Pass 3: No match found (function removed in patch)
|
||||
// PostPatchName is required, so use the pre-patch name as a reference
|
||||
results.Add(new FunctionMatchResult
|
||||
{
|
||||
PostPatchName = preFp.Name,
|
||||
PrePatchName = preFp.Name,
|
||||
Matched = false,
|
||||
SimilarityScore = 0.0,
|
||||
WasPatched = false,
|
||||
PatchDetected = false,
|
||||
MismatchCategory = MismatchCategory.Removed
|
||||
});
|
||||
}
|
||||
|
||||
// Add unmatched post-patch functions (new functions added in the patch)
|
||||
foreach (var postFp in postPatchFingerprints)
|
||||
{
|
||||
if (!matchedPostNames.Contains(postFp.Name))
|
||||
{
|
||||
results.Add(new FunctionMatchResult
|
||||
{
|
||||
PostPatchName = postFp.Name,
|
||||
Matched = false,
|
||||
SimilarityScore = 0.0,
|
||||
WasPatched = false,
|
||||
PatchDetected = false,
|
||||
MismatchCategory = MismatchCategory.Added
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return Task.FromResult<IReadOnlyList<FunctionMatchResult>>(results);
|
||||
}
|
||||
|
||||
private static string? ComputeSbomHash(SecurityPair pair)
|
||||
|
||||
@@ -7,7 +7,7 @@ using FluentAssertions;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Normalization.Tests;
|
||||
|
||||
file sealed class TestElfMeterFactory : IMeterFactory
|
||||
internal sealed class TestElfMeterFactory : IMeterFactory
|
||||
{
|
||||
private readonly List<Meter> _meters = [];
|
||||
|
||||
|
||||
Reference in New Issue
Block a user