save progress

This commit is contained in:
StellaOps Bot
2026-01-06 09:42:02 +02:00
parent 94d68bee8b
commit 37e11918e0
443 changed files with 85863 additions and 897 deletions

View File

@@ -1,3 +1,5 @@
using StellaOps.BinaryIndex.Semantic;
namespace StellaOps.BinaryIndex.Builders;
/// <summary>
@@ -109,6 +111,12 @@ public sealed record FunctionFingerprint
/// Source line number if debug info available.
/// </summary>
public int? SourceLine { get; init; }
/// <summary>
/// Semantic fingerprint for enhanced similarity comparison.
/// Uses IR-level analysis for resilience to compiler optimizations.
/// </summary>
public Semantic.SemanticFingerprint? SemanticFingerprint { get; init; }
}
/// <summary>

View File

@@ -192,25 +192,42 @@ public sealed record HashWeights
/// <summary>
/// Weight for basic block hash comparison.
/// </summary>
public decimal BasicBlockWeight { get; init; } = 0.5m;
public decimal BasicBlockWeight { get; init; } = 0.4m;
/// <summary>
/// Weight for CFG hash comparison.
/// </summary>
public decimal CfgWeight { get; init; } = 0.3m;
public decimal CfgWeight { get; init; } = 0.25m;
/// <summary>
/// Weight for string refs hash comparison.
/// </summary>
public decimal StringRefsWeight { get; init; } = 0.2m;
public decimal StringRefsWeight { get; init; } = 0.15m;
/// <summary>
/// Weight for semantic fingerprint comparison.
/// Only used when both fingerprints have semantic data.
/// </summary>
public decimal SemanticWeight { get; init; } = 0.2m;
/// <summary>
/// Default weights.
/// </summary>
public static HashWeights Default => new();
/// <summary>
/// Weights without semantic analysis (traditional mode).
/// </summary>
public static HashWeights Traditional => new()
{
BasicBlockWeight = 0.5m,
CfgWeight = 0.3m,
StringRefsWeight = 0.2m,
SemanticWeight = 0.0m
};
/// <summary>
/// Validates that weights sum to 1.0.
/// </summary>
public bool IsValid => Math.Abs(BasicBlockWeight + CfgWeight + StringRefsWeight - 1.0m) < 0.001m;
public bool IsValid => Math.Abs(BasicBlockWeight + CfgWeight + StringRefsWeight + SemanticWeight - 1.0m) < 0.001m;
}

View File

@@ -1,4 +1,5 @@
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Semantic;
namespace StellaOps.BinaryIndex.Builders;
@@ -202,6 +203,16 @@ public sealed class PatchDiffEngine : IPatchDiffEngine
matchedWeight += weights.StringRefsWeight;
}
// Include semantic fingerprint similarity if available
if (weights.SemanticWeight > 0 &&
a.SemanticFingerprint is not null &&
b.SemanticFingerprint is not null)
{
totalWeight += weights.SemanticWeight;
var semanticSimilarity = ComputeSemanticSimilarity(a.SemanticFingerprint, b.SemanticFingerprint);
matchedWeight += weights.SemanticWeight * semanticSimilarity;
}
// Size similarity bonus (if sizes are within 10%, add small bonus)
if (a.Size > 0 && b.Size > 0)
{
@@ -216,6 +227,86 @@ public sealed class PatchDiffEngine : IPatchDiffEngine
return totalWeight > 0 ? matchedWeight / totalWeight : 0m;
}
private static decimal ComputeSemanticSimilarity(
Semantic.SemanticFingerprint a,
Semantic.SemanticFingerprint b)
{
// Check for exact hash match first
if (a.HashEquals(b))
{
return 1.0m;
}
// Compute weighted similarity from components
decimal graphSim = ComputeHashSimilarity(a.GraphHash, b.GraphHash);
decimal opSim = ComputeHashSimilarity(a.OperationHash, b.OperationHash);
decimal dfSim = ComputeHashSimilarity(a.DataFlowHash, b.DataFlowHash);
decimal apiSim = ComputeApiCallSimilarity(a.ApiCalls, b.ApiCalls);
// Weights: graph structure 40%, operation sequence 25%, data flow 20%, API calls 15%
return (graphSim * 0.40m) + (opSim * 0.25m) + (dfSim * 0.20m) + (apiSim * 0.15m);
}
private static decimal ComputeHashSimilarity(byte[] hashA, byte[] hashB)
{
if (hashA.Length == 0 || hashB.Length == 0)
{
return 0m;
}
if (hashA.AsSpan().SequenceEqual(hashB))
{
return 1.0m;
}
// Count matching bits (Hamming similarity)
int matchingBits = 0;
int totalBits = hashA.Length * 8;
int len = Math.Min(hashA.Length, hashB.Length);
for (int i = 0; i < len; i++)
{
byte xor = (byte)(hashA[i] ^ hashB[i]);
matchingBits += 8 - PopCount(xor);
}
return (decimal)matchingBits / totalBits;
}
private static int PopCount(byte value)
{
int count = 0;
while (value != 0)
{
count += value & 1;
value >>= 1;
}
return count;
}
private static decimal ComputeApiCallSimilarity(
System.Collections.Immutable.ImmutableArray<string> apiCallsA,
System.Collections.Immutable.ImmutableArray<string> apiCallsB)
{
if (apiCallsA.IsEmpty && apiCallsB.IsEmpty)
{
return 1.0m;
}
if (apiCallsA.IsEmpty || apiCallsB.IsEmpty)
{
return 0.0m;
}
var setA = new HashSet<string>(apiCallsA, StringComparer.Ordinal);
var setB = new HashSet<string>(apiCallsB, StringComparer.Ordinal);
var intersection = setA.Intersect(setB).Count();
var union = setA.Union(setB).Count();
return union > 0 ? (decimal)intersection / union : 0m;
}
/// <inheritdoc />
public IReadOnlyDictionary<string, string> FindFunctionMappings(
IReadOnlyList<FunctionFingerprint> vulnerable,

View File

@@ -20,5 +20,6 @@
<ItemGroup>
<ProjectReference Include="../StellaOps.BinaryIndex.Core/StellaOps.BinaryIndex.Core.csproj" />
<ProjectReference Include="../StellaOps.BinaryIndex.Fingerprints/StellaOps.BinaryIndex.Fingerprints.csproj" />
<ProjectReference Include="../StellaOps.BinaryIndex.Semantic/StellaOps.BinaryIndex.Semantic.csproj" />
</ItemGroup>
</Project>