238 lines
7.7 KiB
C#
238 lines
7.7 KiB
C#
// Copyright (c) StellaOps. All rights reserved.
|
|
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
|
|
|
using System.Collections.Immutable;
|
|
using System.Globalization;
|
|
|
|
namespace StellaOps.BinaryIndex.DeltaSig;
|
|
|
|
/// <summary>
|
|
/// Service for detailed symbol comparison between binary versions.
|
|
/// Determines change type, similarity, and generates explanations.
|
|
/// </summary>
|
|
public sealed class SymbolChangeTracer : ISymbolChangeTracer
|
|
{
|
|
/// <inheritdoc />
|
|
public SymbolMatchResult CompareSymbols(
|
|
SymbolSignature? fromSymbol,
|
|
SymbolSignature? toSymbol)
|
|
{
|
|
// Case 1: Symbol added
|
|
if (fromSymbol is null && toSymbol is not null)
|
|
{
|
|
return new SymbolMatchResult
|
|
{
|
|
SymbolName = toSymbol.Name,
|
|
ExactMatch = false,
|
|
Confidence = 1.0,
|
|
ChangeType = SymbolChangeType.Added,
|
|
SizeDelta = toSymbol.SizeBytes,
|
|
ToHash = toSymbol.HashHex,
|
|
ChangeExplanation = "Symbol added in new version"
|
|
};
|
|
}
|
|
|
|
// Case 2: Symbol removed
|
|
if (fromSymbol is not null && toSymbol is null)
|
|
{
|
|
return new SymbolMatchResult
|
|
{
|
|
SymbolName = fromSymbol.Name,
|
|
ExactMatch = false,
|
|
Confidence = 1.0,
|
|
ChangeType = SymbolChangeType.Removed,
|
|
SizeDelta = -fromSymbol.SizeBytes,
|
|
FromHash = fromSymbol.HashHex,
|
|
ChangeExplanation = "Symbol removed in new version"
|
|
};
|
|
}
|
|
|
|
// Case 3: Both exist - compare
|
|
if (fromSymbol is not null && toSymbol is not null)
|
|
{
|
|
return CompareExistingSymbols(fromSymbol, toSymbol);
|
|
}
|
|
|
|
// Case 4: Both null (shouldn't happen)
|
|
throw new ArgumentException("Both symbols cannot be null");
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public IReadOnlyList<SymbolMatchResult> CompareAllSymbols(
|
|
DeltaSignature fromSignature,
|
|
DeltaSignature toSignature)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(fromSignature);
|
|
ArgumentNullException.ThrowIfNull(toSignature);
|
|
|
|
var fromSymbols = fromSignature.Symbols
|
|
.ToDictionary(s => s.Name, StringComparer.Ordinal);
|
|
var toSymbols = toSignature.Symbols
|
|
.ToDictionary(s => s.Name, StringComparer.Ordinal);
|
|
|
|
var allNames = fromSymbols.Keys
|
|
.Union(toSymbols.Keys, StringComparer.Ordinal)
|
|
.OrderBy(n => n, StringComparer.Ordinal);
|
|
|
|
var results = new List<SymbolMatchResult>();
|
|
|
|
foreach (var name in allNames)
|
|
{
|
|
fromSymbols.TryGetValue(name, out var fromSymbol);
|
|
toSymbols.TryGetValue(name, out var toSymbol);
|
|
|
|
var result = CompareSymbols(fromSymbol, toSymbol);
|
|
results.Add(result);
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
private static SymbolMatchResult CompareExistingSymbols(
|
|
SymbolSignature from,
|
|
SymbolSignature to)
|
|
{
|
|
var exactMatch = string.Equals(from.HashHex, to.HashHex, StringComparison.OrdinalIgnoreCase);
|
|
var sizeDelta = to.SizeBytes - from.SizeBytes;
|
|
var cfgDelta = (from.CfgBbCount.HasValue && to.CfgBbCount.HasValue)
|
|
? to.CfgBbCount.Value - from.CfgBbCount.Value
|
|
: (int?)null;
|
|
|
|
if (exactMatch)
|
|
{
|
|
return new SymbolMatchResult
|
|
{
|
|
SymbolName = from.Name,
|
|
ExactMatch = true,
|
|
Confidence = 1.0,
|
|
ChangeType = SymbolChangeType.Unchanged,
|
|
SizeDelta = 0,
|
|
FromHash = from.HashHex,
|
|
ToHash = to.HashHex,
|
|
MatchMethod = "ExactHash",
|
|
ChangeExplanation = "No change detected"
|
|
};
|
|
}
|
|
|
|
// Compute chunk matches
|
|
var fromChunks = from.Chunks ?? [];
|
|
var toChunks = to.Chunks ?? [];
|
|
var (chunksMatched, matchedIndices) = CompareChunks(fromChunks, toChunks);
|
|
var chunkSimilarity = fromChunks.Length > 0
|
|
? (double)chunksMatched / fromChunks.Length
|
|
: 0.0;
|
|
|
|
// Determine change type and confidence
|
|
var (changeType, confidence, explanation, method) = DetermineChange(
|
|
from, to, chunkSimilarity, cfgDelta);
|
|
|
|
return new SymbolMatchResult
|
|
{
|
|
SymbolName = from.Name,
|
|
ExactMatch = false,
|
|
ChunksMatched = chunksMatched,
|
|
ChunksTotal = Math.Max(fromChunks.Length, toChunks.Length),
|
|
Confidence = confidence,
|
|
ChangeType = changeType,
|
|
SizeDelta = sizeDelta,
|
|
CfgBlockDelta = cfgDelta,
|
|
MatchedChunkIndices = matchedIndices,
|
|
FromHash = from.HashHex,
|
|
ToHash = to.HashHex,
|
|
MatchMethod = method,
|
|
ChangeExplanation = explanation
|
|
};
|
|
}
|
|
|
|
private static (int matched, ImmutableArray<int> indices) CompareChunks(
|
|
ImmutableArray<ChunkHash> fromChunks,
|
|
ImmutableArray<ChunkHash> toChunks)
|
|
{
|
|
if (fromChunks.Length == 0 || toChunks.Length == 0)
|
|
{
|
|
return (0, []);
|
|
}
|
|
|
|
var toChunkSet = toChunks
|
|
.Select(c => c.HashHex)
|
|
.ToHashSet(StringComparer.OrdinalIgnoreCase);
|
|
|
|
var matchedIndices = new List<int>();
|
|
var matched = 0;
|
|
|
|
for (var i = 0; i < fromChunks.Length; i++)
|
|
{
|
|
if (toChunkSet.Contains(fromChunks[i].HashHex))
|
|
{
|
|
matched++;
|
|
matchedIndices.Add(i);
|
|
}
|
|
}
|
|
|
|
return (matched, matchedIndices.ToImmutableArray());
|
|
}
|
|
|
|
private static (SymbolChangeType type, double confidence, string explanation, string method)
|
|
DetermineChange(
|
|
SymbolSignature from,
|
|
SymbolSignature to,
|
|
double chunkSimilarity,
|
|
int? cfgDelta)
|
|
{
|
|
// High chunk similarity with CFG change = likely patch
|
|
if (chunkSimilarity >= 0.85 && cfgDelta.HasValue && Math.Abs(cfgDelta.Value) <= 5)
|
|
{
|
|
return (
|
|
SymbolChangeType.Patched,
|
|
Math.Min(0.95, chunkSimilarity),
|
|
string.Format(
|
|
CultureInfo.InvariantCulture,
|
|
"Function patched: {0} basic blocks changed",
|
|
Math.Abs(cfgDelta.Value)),
|
|
"CFGHash+ChunkMatch"
|
|
);
|
|
}
|
|
|
|
// High chunk similarity = minor modification
|
|
if (chunkSimilarity >= 0.7)
|
|
{
|
|
return (
|
|
SymbolChangeType.Modified,
|
|
chunkSimilarity,
|
|
string.Format(
|
|
CultureInfo.InvariantCulture,
|
|
"Function modified: {0:P0} of code changed",
|
|
1 - chunkSimilarity),
|
|
"ChunkMatch"
|
|
);
|
|
}
|
|
|
|
// Semantic match check (if available)
|
|
if (!string.IsNullOrEmpty(from.SemanticHashHex) &&
|
|
!string.IsNullOrEmpty(to.SemanticHashHex))
|
|
{
|
|
var semanticMatch = string.Equals(
|
|
from.SemanticHashHex, to.SemanticHashHex,
|
|
StringComparison.OrdinalIgnoreCase);
|
|
|
|
if (semanticMatch)
|
|
{
|
|
return (
|
|
SymbolChangeType.Modified,
|
|
0.80,
|
|
"Function semantically equivalent (compiler variation)",
|
|
"SemanticHash"
|
|
);
|
|
}
|
|
}
|
|
|
|
// Low similarity = significant modification
|
|
return (
|
|
SymbolChangeType.Modified,
|
|
Math.Max(0.4, chunkSimilarity),
|
|
"Function significantly modified",
|
|
"ChunkMatch"
|
|
);
|
|
}
|
|
}
|