343 lines
13 KiB
C#
343 lines
13 KiB
C#
// Copyright (c) StellaOps. All rights reserved.
|
|
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
|
|
|
|
using System.Collections.Immutable;
|
|
using FluentAssertions;
|
|
using Microsoft.Extensions.DependencyInjection;
|
|
using Microsoft.Extensions.Logging;
|
|
using Microsoft.Extensions.Logging.Abstractions;
|
|
using StellaOps.BinaryIndex.Disassembly;
|
|
using Xunit;
|
|
|
|
namespace StellaOps.BinaryIndex.Semantic.Tests.Integration;
|
|
|
|
/// <summary>
|
|
/// End-to-end integration tests for the semantic diffing pipeline.
|
|
/// Tests the full flow from disassembled instructions to semantic match results.
|
|
/// </summary>
|
|
[Trait("Category", "Integration")]
|
|
public class EndToEndSemanticDiffTests
|
|
{
|
|
private readonly IIrLiftingService _liftingService;
|
|
private readonly ISemanticGraphExtractor _graphExtractor;
|
|
private readonly ISemanticFingerprintGenerator _fingerprintGenerator;
|
|
private readonly ISemanticMatcher _matcher;
|
|
|
|
public EndToEndSemanticDiffTests()
|
|
{
|
|
var services = new ServiceCollection();
|
|
services.AddLogging(builder => builder.AddProvider(NullLoggerProvider.Instance));
|
|
services.AddBinaryIndexSemantic();
|
|
var provider = services.BuildServiceProvider();
|
|
|
|
_liftingService = provider.GetRequiredService<IIrLiftingService>();
|
|
_graphExtractor = provider.GetRequiredService<ISemanticGraphExtractor>();
|
|
_fingerprintGenerator = provider.GetRequiredService<ISemanticFingerprintGenerator>();
|
|
_matcher = provider.GetRequiredService<ISemanticMatcher>();
|
|
}
|
|
|
|
[Fact]
|
|
public async Task EndToEnd_IdenticalFunctions_ShouldProducePerfectMatch()
|
|
{
|
|
// Arrange - two identical x86_64 functions
|
|
var instructions = CreateSimpleAddFunction();
|
|
|
|
// Act - Process both through the full pipeline
|
|
var fingerprint1 = await ProcessFullPipelineAsync(instructions, "func1");
|
|
var fingerprint2 = await ProcessFullPipelineAsync(instructions, "func2");
|
|
|
|
// Match
|
|
var result = await _matcher.MatchAsync(fingerprint1, fingerprint2);
|
|
|
|
// Assert
|
|
result.OverallSimilarity.Should().Be(1.0m);
|
|
result.Confidence.Should().Be(MatchConfidence.VeryHigh);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task EndToEnd_SameStructureDifferentRegisters_ShouldProduceHighSimilarity()
|
|
{
|
|
// Arrange - two functions with same structure but different register allocation
|
|
// mov rax, rdi vs mov rbx, rsi (same operation: move argument to temp)
|
|
// add rax, 1 vs add rbx, 1 (same operation: add immediate)
|
|
// ret vs ret
|
|
var func1 = new List<DisassembledInstruction>
|
|
{
|
|
CreateInstruction(0x1000, "mov", "rax, rdi", InstructionKind.Move),
|
|
CreateInstruction(0x1003, "add", "rax, 1", InstructionKind.Arithmetic),
|
|
CreateInstruction(0x1007, "ret", "", InstructionKind.Return),
|
|
};
|
|
|
|
var func2 = new List<DisassembledInstruction>
|
|
{
|
|
CreateInstruction(0x2000, "mov", "rbx, rsi", InstructionKind.Move),
|
|
CreateInstruction(0x2003, "add", "rbx, 1", InstructionKind.Arithmetic),
|
|
CreateInstruction(0x2007, "ret", "", InstructionKind.Return),
|
|
};
|
|
|
|
// Act
|
|
var fingerprint1 = await ProcessFullPipelineAsync(func1, "func1");
|
|
var fingerprint2 = await ProcessFullPipelineAsync(func2, "func2");
|
|
var result = await _matcher.MatchAsync(fingerprint1, fingerprint2);
|
|
|
|
// Assert - semantic analysis should recognize these as similar
|
|
result.OverallSimilarity.Should().BeGreaterThanOrEqualTo(0.7m,
|
|
"Semantically equivalent functions with different registers should have high similarity");
|
|
result.Confidence.Should().BeOneOf(MatchConfidence.High, MatchConfidence.VeryHigh);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task EndToEnd_DifferentFunctions_ShouldProduceLowSimilarity()
|
|
{
|
|
// Arrange - completely different functions
|
|
var addFunc = CreateSimpleAddFunction();
|
|
var multiplyFunc = CreateSimpleMultiplyFunction();
|
|
|
|
// Act
|
|
var fingerprint1 = await ProcessFullPipelineAsync(addFunc, "add_func");
|
|
var fingerprint2 = await ProcessFullPipelineAsync(multiplyFunc, "multiply_func");
|
|
var result = await _matcher.MatchAsync(fingerprint1, fingerprint2);
|
|
|
|
// Assert
|
|
result.OverallSimilarity.Should().BeLessThan(0.9m,
|
|
"Different functions should have lower similarity");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task EndToEnd_FunctionWithExternalCall_ShouldCaptureApiCalls()
|
|
{
|
|
// Arrange - function that calls an external function
|
|
var funcWithCall = new List<DisassembledInstruction>
|
|
{
|
|
CreateInstruction(0x1000, "mov", "rax, rdi", InstructionKind.Move),
|
|
CreateInstruction(0x1003, "call", "malloc", InstructionKind.Call),
|
|
CreateInstruction(0x1008, "ret", "", InstructionKind.Return),
|
|
};
|
|
|
|
// Act
|
|
var fingerprint = await ProcessFullPipelineAsync(funcWithCall, "func_with_call");
|
|
|
|
// Assert
|
|
fingerprint.ApiCalls.Should().Contain("malloc");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task EndToEnd_EmptyFunction_ShouldHandleGracefully()
|
|
{
|
|
// Arrange - minimal function (just ret)
|
|
var minimalFunc = new List<DisassembledInstruction>
|
|
{
|
|
CreateInstruction(0x1000, "ret", "", InstructionKind.Return),
|
|
};
|
|
|
|
// Act
|
|
var fingerprint = await ProcessFullPipelineAsync(minimalFunc, "minimal");
|
|
|
|
// Assert
|
|
fingerprint.Should().NotBeNull();
|
|
fingerprint.NodeCount.Should().BeGreaterThanOrEqualTo(0);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task EndToEnd_ConditionalBranch_ShouldCaptureControlFlow()
|
|
{
|
|
// Arrange - function with conditional branch
|
|
var branchFunc = new List<DisassembledInstruction>
|
|
{
|
|
CreateInstruction(0x1000, "test", "rdi, rdi", InstructionKind.Logic),
|
|
CreateInstruction(0x1003, "je", "0x100a", InstructionKind.ConditionalBranch),
|
|
CreateInstruction(0x1005, "mov", "rax, rdi", InstructionKind.Move),
|
|
CreateInstruction(0x1008, "jmp", "0x100d", InstructionKind.Branch),
|
|
CreateInstruction(0x100a, "xor", "eax, eax", InstructionKind.Logic),
|
|
CreateInstruction(0x100c, "ret", "", InstructionKind.Return),
|
|
};
|
|
|
|
// Act
|
|
var fingerprint = await ProcessFullPipelineAsync(branchFunc, "branch_func");
|
|
|
|
// Assert
|
|
fingerprint.CyclomaticComplexity.Should().BeGreaterThan(1,
|
|
"Function with branches should have cyclomatic complexity > 1");
|
|
fingerprint.EdgeCount.Should().BeGreaterThan(0,
|
|
"Function with branches should have edges in the semantic graph");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task EndToEnd_DeterministicPipeline_ShouldProduceConsistentResults()
|
|
{
|
|
// Arrange
|
|
var instructions = CreateSimpleAddFunction();
|
|
|
|
// Act - process multiple times
|
|
var fingerprint1 = await ProcessFullPipelineAsync(instructions, "func");
|
|
var fingerprint2 = await ProcessFullPipelineAsync(instructions, "func");
|
|
var fingerprint3 = await ProcessFullPipelineAsync(instructions, "func");
|
|
|
|
// Assert - all fingerprints should be identical
|
|
fingerprint1.GraphHashHex.Should().Be(fingerprint2.GraphHashHex);
|
|
fingerprint2.GraphHashHex.Should().Be(fingerprint3.GraphHashHex);
|
|
fingerprint1.OperationHashHex.Should().Be(fingerprint2.OperationHashHex);
|
|
fingerprint2.OperationHashHex.Should().Be(fingerprint3.OperationHashHex);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task EndToEnd_FindMatchesInCorpus_ShouldReturnBestMatches()
|
|
{
|
|
// Arrange - create a corpus of functions
|
|
var targetFunc = CreateSimpleAddFunction();
|
|
var targetFingerprint = await ProcessFullPipelineAsync(targetFunc, "target");
|
|
|
|
var corpusFingerprints = new List<SemanticFingerprint>
|
|
{
|
|
await ProcessFullPipelineAsync(CreateSimpleAddFunction(), "add1"),
|
|
await ProcessFullPipelineAsync(CreateSimpleMultiplyFunction(), "mul1"),
|
|
await ProcessFullPipelineAsync(CreateSimpleAddFunction(), "add2"),
|
|
await ProcessFullPipelineAsync(CreateSimpleSubtractFunction(), "sub1"),
|
|
};
|
|
|
|
// Act
|
|
var matches = await _matcher.FindMatchesAsync(
|
|
targetFingerprint,
|
|
corpusFingerprints.ToAsyncEnumerable(),
|
|
minSimilarity: 0.5m,
|
|
maxResults: 5);
|
|
|
|
// Assert
|
|
matches.Should().HaveCountGreaterThan(0);
|
|
// The identical add functions should rank highest
|
|
matches[0].OverallSimilarity.Should().BeGreaterThanOrEqualTo(0.9m);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task EndToEnd_MatchWithDeltas_ShouldIdentifyDifferences()
|
|
{
|
|
// Arrange - two similar but not identical functions
|
|
var func1 = CreateSimpleAddFunction();
|
|
var func2 = CreateSimpleSubtractFunction();
|
|
|
|
var fingerprint1 = await ProcessFullPipelineAsync(func1, "add_func");
|
|
var fingerprint2 = await ProcessFullPipelineAsync(func2, "sub_func");
|
|
|
|
// Act
|
|
var result = await _matcher.MatchAsync(
|
|
fingerprint1,
|
|
fingerprint2,
|
|
new MatchOptions { ComputeDeltas = true });
|
|
|
|
// Assert
|
|
result.Deltas.Should().NotBeEmpty(
|
|
"Match between different functions should identify deltas");
|
|
}
|
|
|
|
private async Task<SemanticFingerprint> ProcessFullPipelineAsync(
|
|
IReadOnlyList<DisassembledInstruction> instructions,
|
|
string functionName)
|
|
{
|
|
var startAddress = instructions.Count > 0 ? instructions[0].Address : 0UL;
|
|
|
|
// Step 1: Lift to IR
|
|
var lifted = await _liftingService.LiftToIrAsync(
|
|
instructions,
|
|
functionName,
|
|
startAddress,
|
|
CpuArchitecture.X86_64);
|
|
|
|
// Step 2: Extract semantic graph
|
|
var graph = await _graphExtractor.ExtractGraphAsync(lifted);
|
|
|
|
// Step 3: Generate fingerprint
|
|
var fingerprint = await _fingerprintGenerator.GenerateAsync(graph, startAddress);
|
|
|
|
return fingerprint;
|
|
}
|
|
|
|
private static DisassembledInstruction CreateInstruction(
|
|
ulong address,
|
|
string mnemonic,
|
|
string operandsText,
|
|
InstructionKind kind)
|
|
{
|
|
// Parse operands from text for simple test cases
|
|
// For call instructions, treat the operand as a call target (Address type)
|
|
var isCallTarget = kind == InstructionKind.Call;
|
|
var operands = string.IsNullOrEmpty(operandsText)
|
|
? []
|
|
: operandsText.Split(", ").Select(op => ParseOperand(op, isCallTarget)).ToImmutableArray();
|
|
|
|
return new DisassembledInstruction(
|
|
address,
|
|
[0x90], // Placeholder bytes
|
|
mnemonic,
|
|
operandsText,
|
|
kind,
|
|
operands);
|
|
}
|
|
|
|
private static Operand ParseOperand(string text, bool isCallTarget = false)
|
|
{
|
|
// Simple operand parsing for tests
|
|
if (long.TryParse(text, out var immediate) ||
|
|
(text.StartsWith("0x", StringComparison.OrdinalIgnoreCase) &&
|
|
long.TryParse(text.AsSpan(2), System.Globalization.NumberStyles.HexNumber, null, out immediate)))
|
|
{
|
|
return new Operand(OperandType.Immediate, text, Value: immediate);
|
|
}
|
|
|
|
if (text.Contains('['))
|
|
{
|
|
return new Operand(OperandType.Memory, text);
|
|
}
|
|
|
|
// Function names in call instructions should be Address type
|
|
if (isCallTarget)
|
|
{
|
|
return new Operand(OperandType.Address, text);
|
|
}
|
|
|
|
// Assume register
|
|
return new Operand(OperandType.Register, text, Register: text);
|
|
}
|
|
|
|
private static List<DisassembledInstruction> CreateSimpleAddFunction()
|
|
{
|
|
// Simple function: add two values and return
|
|
// mov rax, rdi
|
|
// add rax, rsi
|
|
// ret
|
|
return
|
|
[
|
|
CreateInstruction(0x1000, "mov", "rax, rdi", InstructionKind.Move),
|
|
CreateInstruction(0x1003, "add", "rax, rsi", InstructionKind.Arithmetic),
|
|
CreateInstruction(0x1006, "ret", "", InstructionKind.Return),
|
|
];
|
|
}
|
|
|
|
private static List<DisassembledInstruction> CreateSimpleMultiplyFunction()
|
|
{
|
|
// Simple function: multiply two values and return
|
|
// mov rax, rdi
|
|
// imul rax, rsi
|
|
// ret
|
|
return
|
|
[
|
|
CreateInstruction(0x1000, "mov", "rax, rdi", InstructionKind.Move),
|
|
CreateInstruction(0x1003, "imul", "rax, rsi", InstructionKind.Arithmetic),
|
|
CreateInstruction(0x1007, "ret", "", InstructionKind.Return),
|
|
];
|
|
}
|
|
|
|
private static List<DisassembledInstruction> CreateSimpleSubtractFunction()
|
|
{
|
|
// Simple function: subtract two values and return
|
|
// mov rax, rdi
|
|
// sub rax, rsi
|
|
// ret
|
|
return
|
|
[
|
|
CreateInstruction(0x1000, "mov", "rax, rdi", InstructionKind.Move),
|
|
CreateInstruction(0x1003, "sub", "rax, rsi", InstructionKind.Arithmetic),
|
|
CreateInstruction(0x1006, "ret", "", InstructionKind.Return),
|
|
];
|
|
}
|
|
}
|