// Copyright (c) StellaOps. All rights reserved. // Licensed under BUSL-1.1. See LICENSE in the project root. using System.Collections.Immutable; using FluentAssertions; using StellaOps.BinaryIndex.Disassembly; using StellaOps.BinaryIndex.Normalization; using Xunit; namespace StellaOps.BinaryIndex.DeltaSig.Tests; /// /// Tests for the CFG extractor. /// [Trait("Category", "Unit")] public sealed class CfgExtractorTests { #region Helper Methods private static NormalizedInstruction CreateInstruction( ulong address, InstructionKind kind, string mnemonic, byte[] bytes, params NormalizedOperand[] operands) { return new NormalizedInstruction { OriginalAddress = address, Kind = kind, NormalizedMnemonic = mnemonic, NormalizedBytes = [.. bytes], Operands = [.. operands] }; } private static NormalizedOperand CreateAddressOperand(long value) { return new NormalizedOperand { Type = OperandType.Address, Text = $"0x{value:x}", Value = value }; } private static NormalizedOperand CreateImmediateOperand(long value) { return new NormalizedOperand { Type = OperandType.Immediate, Text = $"0x{value:x}", Value = value }; } private static NormalizedOperand CreateRegisterOperand(string reg) { return new NormalizedOperand { Type = OperandType.Register, Text = reg, Register = reg }; } #endregion #region Empty Input Tests [Fact] public void Extract_EmptyInstructions_ReturnsEmptyCfg() { // Arrange var instructions = Array.Empty(); // Act var cfg = CfgExtractor.Extract(instructions); // Assert cfg.Blocks.Should().BeEmpty(); cfg.EntryBlockId.Should().Be(0); cfg.ExitBlockIds.Should().BeEmpty(); cfg.EdgeCount.Should().Be(0); } #endregion #region Single Block Tests [Fact] public void Extract_SingleReturnInstruction_CreatesOneBlock() { // Arrange: ret var instructions = new[] { CreateInstruction(0x1000, InstructionKind.Return, "ret", [0xC3]) }; // Act var cfg = CfgExtractor.Extract(instructions); // Assert cfg.Blocks.Should().HaveCount(1); cfg.Blocks[0].Id.Should().Be(0); cfg.Blocks[0].TerminatorKind.Should().Be(BlockTerminatorKind.Return); cfg.Blocks[0].Successors.Should().BeEmpty(); cfg.Blocks[0].Predecessors.Should().BeEmpty(); cfg.ExitBlockIds.Should().ContainSingle().Which.Should().Be(0); } [Fact] public void Extract_LinearSequence_CreatesOneBlock() { // Arrange: mov rax, 0; add rax, 1; ret var instructions = new[] { CreateInstruction(0x1000, InstructionKind.Move, "mov", [0x48, 0xC7, 0xC0, 0x00, 0x00, 0x00, 0x00], CreateRegisterOperand("rax"), CreateImmediateOperand(0)), CreateInstruction(0x1007, InstructionKind.Arithmetic, "add", [0x48, 0x83, 0xC0, 0x01], CreateRegisterOperand("rax"), CreateImmediateOperand(1)), CreateInstruction(0x100B, InstructionKind.Return, "ret", [0xC3]) }; // Act var cfg = CfgExtractor.Extract(instructions); // Assert cfg.Blocks.Should().HaveCount(1); cfg.Blocks[0].Instructions.Should().HaveCount(3); cfg.Blocks[0].StartAddress.Should().Be(0x1000); cfg.Blocks[0].EndAddress.Should().Be(0x100C); cfg.EdgeCount.Should().Be(0); } #endregion #region Conditional Branch Tests [Fact] public void Extract_ConditionalBranch_CreatesTwoBlocks() { // Arrange: cmp rax, 0; je +4; nop; ret // The je jumps over the nop to the ret var instructions = new[] { CreateInstruction(0x1000, InstructionKind.Compare, "cmp", [0x48, 0x83, 0xF8, 0x00], CreateRegisterOperand("rax"), CreateImmediateOperand(0)), CreateInstruction(0x1004, InstructionKind.ConditionalBranch, "je", [0x74, 0x01], CreateAddressOperand(0x1007)), // Jump to ret CreateInstruction(0x1006, InstructionKind.Nop, "nop", [0x90]), CreateInstruction(0x1007, InstructionKind.Return, "ret", [0xC3]) }; // Act var cfg = CfgExtractor.Extract(instructions); // Assert cfg.Blocks.Should().HaveCount(3); // Block 0: cmp + je cfg.Blocks[0].Instructions.Should().HaveCount(2); cfg.Blocks[0].TerminatorKind.Should().Be(BlockTerminatorKind.ConditionalBranch); cfg.Blocks[0].Successors.Should().Contain(1); // Fall through to nop cfg.Blocks[0].Successors.Should().Contain(2); // Jump to ret // Block 1: nop cfg.Blocks[1].Instructions.Should().HaveCount(1); cfg.Blocks[1].TerminatorKind.Should().Be(BlockTerminatorKind.FallThrough); cfg.Blocks[1].Successors.Should().ContainSingle().Which.Should().Be(2); cfg.Blocks[1].Predecessors.Should().ContainSingle().Which.Should().Be(0); // Block 2: ret cfg.Blocks[2].Instructions.Should().HaveCount(1); cfg.Blocks[2].TerminatorKind.Should().Be(BlockTerminatorKind.Return); cfg.Blocks[2].Successors.Should().BeEmpty(); } [Fact] public void Extract_IfElsePattern_CreatesCorrectBlocks() { // Arrange: if-else pattern // cmp rax, 0 // je else_label // mov rbx, 1 ; then branch // jmp end_label // else_label: mov rbx, 2 // end_label: ret var instructions = new[] { CreateInstruction(0x1000, InstructionKind.Compare, "cmp", [0x48, 0x83, 0xF8, 0x00]), CreateInstruction(0x1004, InstructionKind.ConditionalBranch, "je", [0x74, 0x05], CreateAddressOperand(0x100B)), // Jump to else CreateInstruction(0x1006, InstructionKind.Move, "mov", [0x48, 0xC7, 0xC3, 0x01, 0x00, 0x00, 0x00]), CreateInstruction(0x100D, InstructionKind.Branch, "jmp", [0xEB, 0x07], CreateAddressOperand(0x1016)), // Jump to ret CreateInstruction(0x100F, InstructionKind.Move, "mov", [0x48, 0xC7, 0xC3, 0x02, 0x00, 0x00, 0x00]), CreateInstruction(0x1016, InstructionKind.Return, "ret", [0xC3]) }; // Act var cfg = CfgExtractor.Extract(instructions); // Assert cfg.Blocks.Should().HaveCount(4); cfg.ExitBlockIds.Should().HaveCount(1); } #endregion #region Loop Tests [Fact] public void Extract_SimpleLoop_CreatesBackEdge() { // Arrange: simple loop // loop_start: dec rax // jnz loop_start // ret var instructions = new[] { CreateInstruction(0x1000, InstructionKind.Arithmetic, "dec", [0x48, 0xFF, 0xC8], CreateRegisterOperand("rax")), CreateInstruction(0x1003, InstructionKind.ConditionalBranch, "jnz", [0x75, 0xFB], CreateAddressOperand(0x1000)), // Jump back to dec CreateInstruction(0x1005, InstructionKind.Return, "ret", [0xC3]) }; // Act var cfg = CfgExtractor.Extract(instructions); // Assert cfg.Blocks.Should().HaveCount(2); // Block 0: dec + jnz (loops back to itself) cfg.Blocks[0].TerminatorKind.Should().Be(BlockTerminatorKind.ConditionalBranch); cfg.Blocks[0].Successors.Should().Contain(0); // Back edge to itself cfg.Blocks[0].Successors.Should().Contain(1); // Fall through to ret // Block 1: ret cfg.Blocks[1].TerminatorKind.Should().Be(BlockTerminatorKind.Return); cfg.Blocks[1].Predecessors.Should().ContainSingle().Which.Should().Be(0); } #endregion #region CFG Metrics Tests [Fact] public void ComputeMetrics_LinearCode_HasCorrectMetrics() { // Arrange: linear code with no branches var instructions = new[] { CreateInstruction(0x1000, InstructionKind.Move, "mov", [0x48, 0x89, 0xC0]), CreateInstruction(0x1003, InstructionKind.Arithmetic, "add", [0x48, 0x83, 0xC0, 0x01]), CreateInstruction(0x1007, InstructionKind.Return, "ret", [0xC3]) }; // Act var metrics = CfgExtractor.ComputeMetrics(instructions); // Assert metrics.BasicBlockCount.Should().Be(1); metrics.EdgeCount.Should().Be(0); metrics.CyclomaticComplexity.Should().Be(1); // edges - nodes + 2 = 0 - 1 + 2 = 1 metrics.EdgeHash.Should().NotBeNullOrEmpty(); } [Fact] public void ComputeMetrics_IfStatement_HasCorrectComplexity() { // Arrange: simple if with two paths var instructions = new[] { CreateInstruction(0x1000, InstructionKind.Compare, "cmp", [0x48, 0x83, 0xF8, 0x00]), CreateInstruction(0x1004, InstructionKind.ConditionalBranch, "je", [0x74, 0x01], CreateAddressOperand(0x1007)), CreateInstruction(0x1006, InstructionKind.Nop, "nop", [0x90]), CreateInstruction(0x1007, InstructionKind.Return, "ret", [0xC3]) }; // Act var metrics = CfgExtractor.ComputeMetrics(instructions); // Assert metrics.BasicBlockCount.Should().Be(3); // Block 0 -> Block 1 (fallthrough), Block 0 -> Block 2 (branch), Block 1 -> Block 2 (fallthrough) metrics.EdgeCount.Should().Be(3); metrics.CyclomaticComplexity.Should().Be(2); // 3 - 3 + 2 = 2 } [Fact] public void ComputeMetrics_DifferentCfgs_HaveDifferentEdgeHashes() { // Arrange: two different CFGs var linearCode = new[] { CreateInstruction(0x1000, InstructionKind.Move, "mov", [0x48, 0x89, 0xC0]), CreateInstruction(0x1003, InstructionKind.Return, "ret", [0xC3]) }; var branchingCode = new[] { CreateInstruction(0x1000, InstructionKind.ConditionalBranch, "je", [0x74, 0x01], CreateAddressOperand(0x1003)), CreateInstruction(0x1002, InstructionKind.Nop, "nop", [0x90]), CreateInstruction(0x1003, InstructionKind.Return, "ret", [0xC3]) }; // Act var linearMetrics = CfgExtractor.ComputeMetrics(linearCode); var branchingMetrics = CfgExtractor.ComputeMetrics(branchingCode); // Assert linearMetrics.EdgeHash.Should().NotBe(branchingMetrics.EdgeHash); } [Fact] public void ComputeMetrics_SameCfgStructure_HasSameEdgeHash() { // Arrange: two CFGs with same structure but different addresses var cfg1 = new[] { CreateInstruction(0x1000, InstructionKind.ConditionalBranch, "je", [0x74, 0x01], CreateAddressOperand(0x1003)), CreateInstruction(0x1002, InstructionKind.Nop, "nop", [0x90]), CreateInstruction(0x1003, InstructionKind.Return, "ret", [0xC3]) }; var cfg2 = new[] { CreateInstruction(0x2000, InstructionKind.ConditionalBranch, "jne", [0x75, 0x01], CreateAddressOperand(0x2003)), CreateInstruction(0x2002, InstructionKind.Nop, "nop", [0x90]), CreateInstruction(0x2003, InstructionKind.Return, "ret", [0xC3]) }; // Act var metrics1 = CfgExtractor.ComputeMetrics(cfg1); var metrics2 = CfgExtractor.ComputeMetrics(cfg2); // Assert: same CFG structure should produce same edge hash metrics1.EdgeHash.Should().Be(metrics2.EdgeHash); metrics1.BasicBlockCount.Should().Be(metrics2.BasicBlockCount); metrics1.EdgeCount.Should().Be(metrics2.EdgeCount); } #endregion #region Call Instruction Tests [Fact] public void Extract_CallInstruction_ContinuesToNextBlock() { // Arrange: call followed by more code var instructions = new[] { CreateInstruction(0x1000, InstructionKind.Call, "call", [0xE8, 0x00, 0x10, 0x00, 0x00], CreateAddressOperand(0x2000)), CreateInstruction(0x1005, InstructionKind.Move, "mov", [0x48, 0x89, 0xC0]), CreateInstruction(0x1008, InstructionKind.Return, "ret", [0xC3]) }; // Act var cfg = CfgExtractor.Extract(instructions); // Assert cfg.Blocks.Should().HaveCount(2); cfg.Blocks[0].TerminatorKind.Should().Be(BlockTerminatorKind.Call); cfg.Blocks[0].Successors.Should().ContainSingle().Which.Should().Be(1); } #endregion #region Unconditional Jump Tests [Fact] public void Extract_UnconditionalJump_NoFallthrough() { // Arrange: unconditional jump var instructions = new[] { CreateInstruction(0x1000, InstructionKind.Branch, "jmp", [0xEB, 0x02], CreateAddressOperand(0x1004)), CreateInstruction(0x1002, InstructionKind.Nop, "nop", [0x90]), // Unreachable CreateInstruction(0x1003, InstructionKind.Nop, "nop", [0x90]), // Unreachable CreateInstruction(0x1004, InstructionKind.Return, "ret", [0xC3]) }; // Act var cfg = CfgExtractor.Extract(instructions); // Assert cfg.Blocks.Should().HaveCount(3); cfg.Blocks[0].TerminatorKind.Should().Be(BlockTerminatorKind.Jump); cfg.Blocks[0].Successors.Should().ContainSingle().Which.Should().Be(2); // Jump target only } #endregion #region Edge Cases [Fact] public void Extract_MultipleExits_TracksAllExitBlocks() { // Arrange: multiple return paths var instructions = new[] { CreateInstruction(0x1000, InstructionKind.ConditionalBranch, "je", [0x74, 0x01], CreateAddressOperand(0x1003)), CreateInstruction(0x1002, InstructionKind.Return, "ret", [0xC3]), // Exit 1 CreateInstruction(0x1003, InstructionKind.Return, "ret", [0xC3]) // Exit 2 }; // Act var cfg = CfgExtractor.Extract(instructions); // Assert cfg.ExitBlockIds.Should().HaveCount(2); } [Fact] public void Extract_PredecessorsAreCorrect() { // Arrange: diamond pattern // B0 (conditional) // / \ // B1 B2 // \ / // B3 (ret) var instructions = new[] { CreateInstruction(0x1000, InstructionKind.ConditionalBranch, "je", [0x74, 0x02], CreateAddressOperand(0x1004)), CreateInstruction(0x1002, InstructionKind.Branch, "jmp", [0xEB, 0x02], CreateAddressOperand(0x1006)), CreateInstruction(0x1004, InstructionKind.Branch, "jmp", [0xEB, 0x00], CreateAddressOperand(0x1006)), CreateInstruction(0x1006, InstructionKind.Return, "ret", [0xC3]) }; // Act var cfg = CfgExtractor.Extract(instructions); // Assert // Last block should have two predecessors var lastBlock = cfg.Blocks.First(b => b.TerminatorKind == BlockTerminatorKind.Return); lastBlock.Predecessors.Should().HaveCount(2); } #endregion }