Merge branch 'main' of https://git.stella-ops.org/stella-ops.org/git.stella-ops.org
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
using StellaOps.BinaryIndex.Semantic;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Builders;
|
||||
|
||||
/// <summary>
|
||||
@@ -109,6 +111,12 @@ public sealed record FunctionFingerprint
|
||||
/// Source line number if debug info available.
|
||||
/// </summary>
|
||||
public int? SourceLine { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Semantic fingerprint for enhanced similarity comparison.
|
||||
/// Uses IR-level analysis for resilience to compiler optimizations.
|
||||
/// </summary>
|
||||
public Semantic.SemanticFingerprint? SemanticFingerprint { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -192,25 +192,42 @@ public sealed record HashWeights
|
||||
/// <summary>
|
||||
/// Weight for basic block hash comparison.
|
||||
/// </summary>
|
||||
public decimal BasicBlockWeight { get; init; } = 0.5m;
|
||||
public decimal BasicBlockWeight { get; init; } = 0.4m;
|
||||
|
||||
/// <summary>
|
||||
/// Weight for CFG hash comparison.
|
||||
/// </summary>
|
||||
public decimal CfgWeight { get; init; } = 0.3m;
|
||||
public decimal CfgWeight { get; init; } = 0.25m;
|
||||
|
||||
/// <summary>
|
||||
/// Weight for string refs hash comparison.
|
||||
/// </summary>
|
||||
public decimal StringRefsWeight { get; init; } = 0.2m;
|
||||
public decimal StringRefsWeight { get; init; } = 0.15m;
|
||||
|
||||
/// <summary>
|
||||
/// Weight for semantic fingerprint comparison.
|
||||
/// Only used when both fingerprints have semantic data.
|
||||
/// </summary>
|
||||
public decimal SemanticWeight { get; init; } = 0.2m;
|
||||
|
||||
/// <summary>
|
||||
/// Default weights.
|
||||
/// </summary>
|
||||
public static HashWeights Default => new();
|
||||
|
||||
/// <summary>
|
||||
/// Weights without semantic analysis (traditional mode).
|
||||
/// </summary>
|
||||
public static HashWeights Traditional => new()
|
||||
{
|
||||
BasicBlockWeight = 0.5m,
|
||||
CfgWeight = 0.3m,
|
||||
StringRefsWeight = 0.2m,
|
||||
SemanticWeight = 0.0m
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Validates that weights sum to 1.0.
|
||||
/// </summary>
|
||||
public bool IsValid => Math.Abs(BasicBlockWeight + CfgWeight + StringRefsWeight - 1.0m) < 0.001m;
|
||||
public bool IsValid => Math.Abs(BasicBlockWeight + CfgWeight + StringRefsWeight + SemanticWeight - 1.0m) < 0.001m;
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.BinaryIndex.Semantic;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Builders;
|
||||
|
||||
@@ -202,6 +203,16 @@ public sealed class PatchDiffEngine : IPatchDiffEngine
|
||||
matchedWeight += weights.StringRefsWeight;
|
||||
}
|
||||
|
||||
// Include semantic fingerprint similarity if available
|
||||
if (weights.SemanticWeight > 0 &&
|
||||
a.SemanticFingerprint is not null &&
|
||||
b.SemanticFingerprint is not null)
|
||||
{
|
||||
totalWeight += weights.SemanticWeight;
|
||||
var semanticSimilarity = ComputeSemanticSimilarity(a.SemanticFingerprint, b.SemanticFingerprint);
|
||||
matchedWeight += weights.SemanticWeight * semanticSimilarity;
|
||||
}
|
||||
|
||||
// Size similarity bonus (if sizes are within 10%, add small bonus)
|
||||
if (a.Size > 0 && b.Size > 0)
|
||||
{
|
||||
@@ -216,6 +227,86 @@ public sealed class PatchDiffEngine : IPatchDiffEngine
|
||||
return totalWeight > 0 ? matchedWeight / totalWeight : 0m;
|
||||
}
|
||||
|
||||
private static decimal ComputeSemanticSimilarity(
|
||||
Semantic.SemanticFingerprint a,
|
||||
Semantic.SemanticFingerprint b)
|
||||
{
|
||||
// Check for exact hash match first
|
||||
if (a.HashEquals(b))
|
||||
{
|
||||
return 1.0m;
|
||||
}
|
||||
|
||||
// Compute weighted similarity from components
|
||||
decimal graphSim = ComputeHashSimilarity(a.GraphHash, b.GraphHash);
|
||||
decimal opSim = ComputeHashSimilarity(a.OperationHash, b.OperationHash);
|
||||
decimal dfSim = ComputeHashSimilarity(a.DataFlowHash, b.DataFlowHash);
|
||||
decimal apiSim = ComputeApiCallSimilarity(a.ApiCalls, b.ApiCalls);
|
||||
|
||||
// Weights: graph structure 40%, operation sequence 25%, data flow 20%, API calls 15%
|
||||
return (graphSim * 0.40m) + (opSim * 0.25m) + (dfSim * 0.20m) + (apiSim * 0.15m);
|
||||
}
|
||||
|
||||
private static decimal ComputeHashSimilarity(byte[] hashA, byte[] hashB)
|
||||
{
|
||||
if (hashA.Length == 0 || hashB.Length == 0)
|
||||
{
|
||||
return 0m;
|
||||
}
|
||||
|
||||
if (hashA.AsSpan().SequenceEqual(hashB))
|
||||
{
|
||||
return 1.0m;
|
||||
}
|
||||
|
||||
// Count matching bits (Hamming similarity)
|
||||
int matchingBits = 0;
|
||||
int totalBits = hashA.Length * 8;
|
||||
int len = Math.Min(hashA.Length, hashB.Length);
|
||||
|
||||
for (int i = 0; i < len; i++)
|
||||
{
|
||||
byte xor = (byte)(hashA[i] ^ hashB[i]);
|
||||
matchingBits += 8 - PopCount(xor);
|
||||
}
|
||||
|
||||
return (decimal)matchingBits / totalBits;
|
||||
}
|
||||
|
||||
private static int PopCount(byte value)
|
||||
{
|
||||
int count = 0;
|
||||
while (value != 0)
|
||||
{
|
||||
count += value & 1;
|
||||
value >>= 1;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
private static decimal ComputeApiCallSimilarity(
|
||||
System.Collections.Immutable.ImmutableArray<string> apiCallsA,
|
||||
System.Collections.Immutable.ImmutableArray<string> apiCallsB)
|
||||
{
|
||||
if (apiCallsA.IsEmpty && apiCallsB.IsEmpty)
|
||||
{
|
||||
return 1.0m;
|
||||
}
|
||||
|
||||
if (apiCallsA.IsEmpty || apiCallsB.IsEmpty)
|
||||
{
|
||||
return 0.0m;
|
||||
}
|
||||
|
||||
var setA = new HashSet<string>(apiCallsA, StringComparer.Ordinal);
|
||||
var setB = new HashSet<string>(apiCallsB, StringComparer.Ordinal);
|
||||
|
||||
var intersection = setA.Intersect(setB).Count();
|
||||
var union = setA.Union(setB).Count();
|
||||
|
||||
return union > 0 ? (decimal)intersection / union : 0m;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyDictionary<string, string> FindFunctionMappings(
|
||||
IReadOnlyList<FunctionFingerprint> vulnerable,
|
||||
|
||||
@@ -20,5 +20,6 @@
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="../StellaOps.BinaryIndex.Core/StellaOps.BinaryIndex.Core.csproj" />
|
||||
<ProjectReference Include="../StellaOps.BinaryIndex.Fingerprints/StellaOps.BinaryIndex.Fingerprints.csproj" />
|
||||
<ProjectReference Include="../StellaOps.BinaryIndex.Semantic/StellaOps.BinaryIndex.Semantic.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
|
||||
Reference in New Issue
Block a user