// Copyright (c) StellaOps. All rights reserved. // Licensed under AGPL-3.0-or-later. See LICENSE in the project root. using System.Collections.Immutable; using FluentAssertions; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging.Abstractions; using StellaOps.BinaryIndex.Disassembly; using Xunit; namespace StellaOps.BinaryIndex.Semantic.Tests.Integration; /// /// End-to-end integration tests for the semantic diffing pipeline. /// Tests the full flow from disassembled instructions to semantic match results. /// [Trait("Category", "Integration")] public class EndToEndSemanticDiffTests { private readonly IIrLiftingService _liftingService; private readonly ISemanticGraphExtractor _graphExtractor; private readonly ISemanticFingerprintGenerator _fingerprintGenerator; private readonly ISemanticMatcher _matcher; public EndToEndSemanticDiffTests() { var services = new ServiceCollection(); services.AddLogging(builder => builder.AddProvider(NullLoggerProvider.Instance)); services.AddBinaryIndexSemantic(); var provider = services.BuildServiceProvider(); _liftingService = provider.GetRequiredService(); _graphExtractor = provider.GetRequiredService(); _fingerprintGenerator = provider.GetRequiredService(); _matcher = provider.GetRequiredService(); } [Fact] public async Task EndToEnd_IdenticalFunctions_ShouldProducePerfectMatch() { // Arrange - two identical x86_64 functions var instructions = CreateSimpleAddFunction(); // Act - Process both through the full pipeline var fingerprint1 = await ProcessFullPipelineAsync(instructions, "func1"); var fingerprint2 = await ProcessFullPipelineAsync(instructions, "func2"); // Match var result = await _matcher.MatchAsync(fingerprint1, fingerprint2); // Assert result.OverallSimilarity.Should().Be(1.0m); result.Confidence.Should().Be(MatchConfidence.VeryHigh); } [Fact] public async Task EndToEnd_SameStructureDifferentRegisters_ShouldProduceHighSimilarity() { // Arrange - two functions with same structure but different register allocation // mov rax, rdi vs mov rbx, rsi (same operation: move argument to temp) // add rax, 1 vs add rbx, 1 (same operation: add immediate) // ret vs ret var func1 = new List { CreateInstruction(0x1000, "mov", "rax, rdi", InstructionKind.Move), CreateInstruction(0x1003, "add", "rax, 1", InstructionKind.Arithmetic), CreateInstruction(0x1007, "ret", "", InstructionKind.Return), }; var func2 = new List { CreateInstruction(0x2000, "mov", "rbx, rsi", InstructionKind.Move), CreateInstruction(0x2003, "add", "rbx, 1", InstructionKind.Arithmetic), CreateInstruction(0x2007, "ret", "", InstructionKind.Return), }; // Act var fingerprint1 = await ProcessFullPipelineAsync(func1, "func1"); var fingerprint2 = await ProcessFullPipelineAsync(func2, "func2"); var result = await _matcher.MatchAsync(fingerprint1, fingerprint2); // Assert - semantic analysis should recognize these as similar result.OverallSimilarity.Should().BeGreaterThanOrEqualTo(0.7m, "Semantically equivalent functions with different registers should have high similarity"); result.Confidence.Should().BeOneOf(MatchConfidence.High, MatchConfidence.VeryHigh); } [Fact] public async Task EndToEnd_DifferentFunctions_ShouldProduceLowSimilarity() { // Arrange - completely different functions var addFunc = CreateSimpleAddFunction(); var multiplyFunc = CreateSimpleMultiplyFunction(); // Act var fingerprint1 = await ProcessFullPipelineAsync(addFunc, "add_func"); var fingerprint2 = await ProcessFullPipelineAsync(multiplyFunc, "multiply_func"); var result = await _matcher.MatchAsync(fingerprint1, fingerprint2); // Assert result.OverallSimilarity.Should().BeLessThan(0.9m, "Different functions should have lower similarity"); } [Fact] public async Task EndToEnd_FunctionWithExternalCall_ShouldCaptureApiCalls() { // Arrange - function that calls an external function var funcWithCall = new List { CreateInstruction(0x1000, "mov", "rax, rdi", InstructionKind.Move), CreateInstruction(0x1003, "call", "malloc", InstructionKind.Call), CreateInstruction(0x1008, "ret", "", InstructionKind.Return), }; // Act var fingerprint = await ProcessFullPipelineAsync(funcWithCall, "func_with_call"); // Assert fingerprint.ApiCalls.Should().Contain("malloc"); } [Fact] public async Task EndToEnd_EmptyFunction_ShouldHandleGracefully() { // Arrange - minimal function (just ret) var minimalFunc = new List { CreateInstruction(0x1000, "ret", "", InstructionKind.Return), }; // Act var fingerprint = await ProcessFullPipelineAsync(minimalFunc, "minimal"); // Assert fingerprint.Should().NotBeNull(); fingerprint.NodeCount.Should().BeGreaterThanOrEqualTo(0); } [Fact] public async Task EndToEnd_ConditionalBranch_ShouldCaptureControlFlow() { // Arrange - function with conditional branch var branchFunc = new List { CreateInstruction(0x1000, "test", "rdi, rdi", InstructionKind.Logic), CreateInstruction(0x1003, "je", "0x100a", InstructionKind.ConditionalBranch), CreateInstruction(0x1005, "mov", "rax, rdi", InstructionKind.Move), CreateInstruction(0x1008, "jmp", "0x100d", InstructionKind.Branch), CreateInstruction(0x100a, "xor", "eax, eax", InstructionKind.Logic), CreateInstruction(0x100c, "ret", "", InstructionKind.Return), }; // Act var fingerprint = await ProcessFullPipelineAsync(branchFunc, "branch_func"); // Assert fingerprint.CyclomaticComplexity.Should().BeGreaterThan(1, "Function with branches should have cyclomatic complexity > 1"); fingerprint.EdgeCount.Should().BeGreaterThan(0, "Function with branches should have edges in the semantic graph"); } [Fact] public async Task EndToEnd_DeterministicPipeline_ShouldProduceConsistentResults() { // Arrange var instructions = CreateSimpleAddFunction(); // Act - process multiple times var fingerprint1 = await ProcessFullPipelineAsync(instructions, "func"); var fingerprint2 = await ProcessFullPipelineAsync(instructions, "func"); var fingerprint3 = await ProcessFullPipelineAsync(instructions, "func"); // Assert - all fingerprints should be identical fingerprint1.GraphHashHex.Should().Be(fingerprint2.GraphHashHex); fingerprint2.GraphHashHex.Should().Be(fingerprint3.GraphHashHex); fingerprint1.OperationHashHex.Should().Be(fingerprint2.OperationHashHex); fingerprint2.OperationHashHex.Should().Be(fingerprint3.OperationHashHex); } [Fact] public async Task EndToEnd_FindMatchesInCorpus_ShouldReturnBestMatches() { // Arrange - create a corpus of functions var targetFunc = CreateSimpleAddFunction(); var targetFingerprint = await ProcessFullPipelineAsync(targetFunc, "target"); var corpusFingerprints = new List { await ProcessFullPipelineAsync(CreateSimpleAddFunction(), "add1"), await ProcessFullPipelineAsync(CreateSimpleMultiplyFunction(), "mul1"), await ProcessFullPipelineAsync(CreateSimpleAddFunction(), "add2"), await ProcessFullPipelineAsync(CreateSimpleSubtractFunction(), "sub1"), }; // Act var matches = await _matcher.FindMatchesAsync( targetFingerprint, corpusFingerprints.ToAsyncEnumerable(), minSimilarity: 0.5m, maxResults: 5); // Assert matches.Should().HaveCountGreaterThan(0); // The identical add functions should rank highest matches[0].OverallSimilarity.Should().BeGreaterThanOrEqualTo(0.9m); } [Fact] public async Task EndToEnd_MatchWithDeltas_ShouldIdentifyDifferences() { // Arrange - two similar but not identical functions var func1 = CreateSimpleAddFunction(); var func2 = CreateSimpleSubtractFunction(); var fingerprint1 = await ProcessFullPipelineAsync(func1, "add_func"); var fingerprint2 = await ProcessFullPipelineAsync(func2, "sub_func"); // Act var result = await _matcher.MatchAsync( fingerprint1, fingerprint2, new MatchOptions { ComputeDeltas = true }); // Assert result.Deltas.Should().NotBeEmpty( "Match between different functions should identify deltas"); } private async Task ProcessFullPipelineAsync( IReadOnlyList instructions, string functionName) { var startAddress = instructions.Count > 0 ? instructions[0].Address : 0UL; // Step 1: Lift to IR var lifted = await _liftingService.LiftToIrAsync( instructions, functionName, startAddress, CpuArchitecture.X86_64); // Step 2: Extract semantic graph var graph = await _graphExtractor.ExtractGraphAsync(lifted); // Step 3: Generate fingerprint var fingerprint = await _fingerprintGenerator.GenerateAsync(graph, startAddress); return fingerprint; } private static DisassembledInstruction CreateInstruction( ulong address, string mnemonic, string operandsText, InstructionKind kind) { // Parse operands from text for simple test cases // For call instructions, treat the operand as a call target (Address type) var isCallTarget = kind == InstructionKind.Call; var operands = string.IsNullOrEmpty(operandsText) ? [] : operandsText.Split(", ").Select(op => ParseOperand(op, isCallTarget)).ToImmutableArray(); return new DisassembledInstruction( address, [0x90], // Placeholder bytes mnemonic, operandsText, kind, operands); } private static Operand ParseOperand(string text, bool isCallTarget = false) { // Simple operand parsing for tests if (long.TryParse(text, out var immediate) || (text.StartsWith("0x", StringComparison.OrdinalIgnoreCase) && long.TryParse(text.AsSpan(2), System.Globalization.NumberStyles.HexNumber, null, out immediate))) { return new Operand(OperandType.Immediate, text, Value: immediate); } if (text.Contains('[')) { return new Operand(OperandType.Memory, text); } // Function names in call instructions should be Address type if (isCallTarget) { return new Operand(OperandType.Address, text); } // Assume register return new Operand(OperandType.Register, text, Register: text); } private static List CreateSimpleAddFunction() { // Simple function: add two values and return // mov rax, rdi // add rax, rsi // ret return [ CreateInstruction(0x1000, "mov", "rax, rdi", InstructionKind.Move), CreateInstruction(0x1003, "add", "rax, rsi", InstructionKind.Arithmetic), CreateInstruction(0x1006, "ret", "", InstructionKind.Return), ]; } private static List CreateSimpleMultiplyFunction() { // Simple function: multiply two values and return // mov rax, rdi // imul rax, rsi // ret return [ CreateInstruction(0x1000, "mov", "rax, rdi", InstructionKind.Move), CreateInstruction(0x1003, "imul", "rax, rsi", InstructionKind.Arithmetic), CreateInstruction(0x1007, "ret", "", InstructionKind.Return), ]; } private static List CreateSimpleSubtractFunction() { // Simple function: subtract two values and return // mov rax, rdi // sub rax, rsi // ret return [ CreateInstruction(0x1000, "mov", "rax, rdi", InstructionKind.Move), CreateInstruction(0x1003, "sub", "rax, rsi", InstructionKind.Arithmetic), CreateInstruction(0x1006, "ret", "", InstructionKind.Return), ]; } }