Files
git.stella-ops.org/src/BinaryIndex/__Tests/StellaOps.BinaryIndex.DeltaSig.Tests/CfgExtractorTests.cs

454 lines
15 KiB
C#

// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
using FluentAssertions;
using StellaOps.BinaryIndex.Disassembly;
using StellaOps.BinaryIndex.Normalization;
using Xunit;
namespace StellaOps.BinaryIndex.DeltaSig.Tests;
/// <summary>
/// Tests for the CFG extractor.
/// </summary>
[Trait("Category", "Unit")]
public sealed class CfgExtractorTests
{
#region Helper Methods
private static NormalizedInstruction CreateInstruction(
ulong address,
InstructionKind kind,
string mnemonic,
byte[] bytes,
params NormalizedOperand[] operands)
{
return new NormalizedInstruction
{
OriginalAddress = address,
Kind = kind,
NormalizedMnemonic = mnemonic,
NormalizedBytes = [.. bytes],
Operands = [.. operands]
};
}
private static NormalizedOperand CreateAddressOperand(long value)
{
return new NormalizedOperand
{
Type = OperandType.Address,
Text = $"0x{value:x}",
Value = value
};
}
private static NormalizedOperand CreateImmediateOperand(long value)
{
return new NormalizedOperand
{
Type = OperandType.Immediate,
Text = $"0x{value:x}",
Value = value
};
}
private static NormalizedOperand CreateRegisterOperand(string reg)
{
return new NormalizedOperand
{
Type = OperandType.Register,
Text = reg,
Register = reg
};
}
#endregion
#region Empty Input Tests
[Fact]
public void Extract_EmptyInstructions_ReturnsEmptyCfg()
{
// Arrange
var instructions = Array.Empty<NormalizedInstruction>();
// Act
var cfg = CfgExtractor.Extract(instructions);
// Assert
cfg.Blocks.Should().BeEmpty();
cfg.EntryBlockId.Should().Be(0);
cfg.ExitBlockIds.Should().BeEmpty();
cfg.EdgeCount.Should().Be(0);
}
#endregion
#region Single Block Tests
[Fact]
public void Extract_SingleReturnInstruction_CreatesOneBlock()
{
// Arrange: ret
var instructions = new[]
{
CreateInstruction(0x1000, InstructionKind.Return, "ret", [0xC3])
};
// Act
var cfg = CfgExtractor.Extract(instructions);
// Assert
cfg.Blocks.Should().HaveCount(1);
cfg.Blocks[0].Id.Should().Be(0);
cfg.Blocks[0].TerminatorKind.Should().Be(BlockTerminatorKind.Return);
cfg.Blocks[0].Successors.Should().BeEmpty();
cfg.Blocks[0].Predecessors.Should().BeEmpty();
cfg.ExitBlockIds.Should().ContainSingle().Which.Should().Be(0);
}
[Fact]
public void Extract_LinearSequence_CreatesOneBlock()
{
// Arrange: mov rax, 0; add rax, 1; ret
var instructions = new[]
{
CreateInstruction(0x1000, InstructionKind.Move, "mov", [0x48, 0xC7, 0xC0, 0x00, 0x00, 0x00, 0x00],
CreateRegisterOperand("rax"), CreateImmediateOperand(0)),
CreateInstruction(0x1007, InstructionKind.Arithmetic, "add", [0x48, 0x83, 0xC0, 0x01],
CreateRegisterOperand("rax"), CreateImmediateOperand(1)),
CreateInstruction(0x100B, InstructionKind.Return, "ret", [0xC3])
};
// Act
var cfg = CfgExtractor.Extract(instructions);
// Assert
cfg.Blocks.Should().HaveCount(1);
cfg.Blocks[0].Instructions.Should().HaveCount(3);
cfg.Blocks[0].StartAddress.Should().Be(0x1000);
cfg.Blocks[0].EndAddress.Should().Be(0x100C);
cfg.EdgeCount.Should().Be(0);
}
#endregion
#region Conditional Branch Tests
[Fact]
public void Extract_ConditionalBranch_CreatesTwoBlocks()
{
// Arrange: cmp rax, 0; je +4; nop; ret
// The je jumps over the nop to the ret
var instructions = new[]
{
CreateInstruction(0x1000, InstructionKind.Compare, "cmp", [0x48, 0x83, 0xF8, 0x00],
CreateRegisterOperand("rax"), CreateImmediateOperand(0)),
CreateInstruction(0x1004, InstructionKind.ConditionalBranch, "je", [0x74, 0x01],
CreateAddressOperand(0x1007)), // Jump to ret
CreateInstruction(0x1006, InstructionKind.Nop, "nop", [0x90]),
CreateInstruction(0x1007, InstructionKind.Return, "ret", [0xC3])
};
// Act
var cfg = CfgExtractor.Extract(instructions);
// Assert
cfg.Blocks.Should().HaveCount(3);
// Block 0: cmp + je
cfg.Blocks[0].Instructions.Should().HaveCount(2);
cfg.Blocks[0].TerminatorKind.Should().Be(BlockTerminatorKind.ConditionalBranch);
cfg.Blocks[0].Successors.Should().Contain(1); // Fall through to nop
cfg.Blocks[0].Successors.Should().Contain(2); // Jump to ret
// Block 1: nop
cfg.Blocks[1].Instructions.Should().HaveCount(1);
cfg.Blocks[1].TerminatorKind.Should().Be(BlockTerminatorKind.FallThrough);
cfg.Blocks[1].Successors.Should().ContainSingle().Which.Should().Be(2);
cfg.Blocks[1].Predecessors.Should().ContainSingle().Which.Should().Be(0);
// Block 2: ret
cfg.Blocks[2].Instructions.Should().HaveCount(1);
cfg.Blocks[2].TerminatorKind.Should().Be(BlockTerminatorKind.Return);
cfg.Blocks[2].Successors.Should().BeEmpty();
}
[Fact]
public void Extract_IfElsePattern_CreatesCorrectBlocks()
{
// Arrange: if-else pattern
// cmp rax, 0
// je else_label
// mov rbx, 1 ; then branch
// jmp end_label
// else_label: mov rbx, 2
// end_label: ret
var instructions = new[]
{
CreateInstruction(0x1000, InstructionKind.Compare, "cmp", [0x48, 0x83, 0xF8, 0x00]),
CreateInstruction(0x1004, InstructionKind.ConditionalBranch, "je", [0x74, 0x05],
CreateAddressOperand(0x100B)), // Jump to else
CreateInstruction(0x1006, InstructionKind.Move, "mov", [0x48, 0xC7, 0xC3, 0x01, 0x00, 0x00, 0x00]),
CreateInstruction(0x100D, InstructionKind.Branch, "jmp", [0xEB, 0x07],
CreateAddressOperand(0x1016)), // Jump to ret
CreateInstruction(0x100F, InstructionKind.Move, "mov", [0x48, 0xC7, 0xC3, 0x02, 0x00, 0x00, 0x00]),
CreateInstruction(0x1016, InstructionKind.Return, "ret", [0xC3])
};
// Act
var cfg = CfgExtractor.Extract(instructions);
// Assert
cfg.Blocks.Should().HaveCount(4);
cfg.ExitBlockIds.Should().HaveCount(1);
}
#endregion
#region Loop Tests
[Fact]
public void Extract_SimpleLoop_CreatesBackEdge()
{
// Arrange: simple loop
// loop_start: dec rax
// jnz loop_start
// ret
var instructions = new[]
{
CreateInstruction(0x1000, InstructionKind.Arithmetic, "dec", [0x48, 0xFF, 0xC8],
CreateRegisterOperand("rax")),
CreateInstruction(0x1003, InstructionKind.ConditionalBranch, "jnz", [0x75, 0xFB],
CreateAddressOperand(0x1000)), // Jump back to dec
CreateInstruction(0x1005, InstructionKind.Return, "ret", [0xC3])
};
// Act
var cfg = CfgExtractor.Extract(instructions);
// Assert
cfg.Blocks.Should().HaveCount(2);
// Block 0: dec + jnz (loops back to itself)
cfg.Blocks[0].TerminatorKind.Should().Be(BlockTerminatorKind.ConditionalBranch);
cfg.Blocks[0].Successors.Should().Contain(0); // Back edge to itself
cfg.Blocks[0].Successors.Should().Contain(1); // Fall through to ret
// Block 1: ret
cfg.Blocks[1].TerminatorKind.Should().Be(BlockTerminatorKind.Return);
cfg.Blocks[1].Predecessors.Should().ContainSingle().Which.Should().Be(0);
}
#endregion
#region CFG Metrics Tests
[Fact]
public void ComputeMetrics_LinearCode_HasCorrectMetrics()
{
// Arrange: linear code with no branches
var instructions = new[]
{
CreateInstruction(0x1000, InstructionKind.Move, "mov", [0x48, 0x89, 0xC0]),
CreateInstruction(0x1003, InstructionKind.Arithmetic, "add", [0x48, 0x83, 0xC0, 0x01]),
CreateInstruction(0x1007, InstructionKind.Return, "ret", [0xC3])
};
// Act
var metrics = CfgExtractor.ComputeMetrics(instructions);
// Assert
metrics.BasicBlockCount.Should().Be(1);
metrics.EdgeCount.Should().Be(0);
metrics.CyclomaticComplexity.Should().Be(1); // edges - nodes + 2 = 0 - 1 + 2 = 1
metrics.EdgeHash.Should().NotBeNullOrEmpty();
}
[Fact]
public void ComputeMetrics_IfStatement_HasCorrectComplexity()
{
// Arrange: simple if with two paths
var instructions = new[]
{
CreateInstruction(0x1000, InstructionKind.Compare, "cmp", [0x48, 0x83, 0xF8, 0x00]),
CreateInstruction(0x1004, InstructionKind.ConditionalBranch, "je", [0x74, 0x01],
CreateAddressOperand(0x1007)),
CreateInstruction(0x1006, InstructionKind.Nop, "nop", [0x90]),
CreateInstruction(0x1007, InstructionKind.Return, "ret", [0xC3])
};
// Act
var metrics = CfgExtractor.ComputeMetrics(instructions);
// Assert
metrics.BasicBlockCount.Should().Be(3);
// Block 0 -> Block 1 (fallthrough), Block 0 -> Block 2 (branch), Block 1 -> Block 2 (fallthrough)
metrics.EdgeCount.Should().Be(3);
metrics.CyclomaticComplexity.Should().Be(2); // 3 - 3 + 2 = 2
}
[Fact]
public void ComputeMetrics_DifferentCfgs_HaveDifferentEdgeHashes()
{
// Arrange: two different CFGs
var linearCode = new[]
{
CreateInstruction(0x1000, InstructionKind.Move, "mov", [0x48, 0x89, 0xC0]),
CreateInstruction(0x1003, InstructionKind.Return, "ret", [0xC3])
};
var branchingCode = new[]
{
CreateInstruction(0x1000, InstructionKind.ConditionalBranch, "je", [0x74, 0x01],
CreateAddressOperand(0x1003)),
CreateInstruction(0x1002, InstructionKind.Nop, "nop", [0x90]),
CreateInstruction(0x1003, InstructionKind.Return, "ret", [0xC3])
};
// Act
var linearMetrics = CfgExtractor.ComputeMetrics(linearCode);
var branchingMetrics = CfgExtractor.ComputeMetrics(branchingCode);
// Assert
linearMetrics.EdgeHash.Should().NotBe(branchingMetrics.EdgeHash);
}
[Fact]
public void ComputeMetrics_SameCfgStructure_HasSameEdgeHash()
{
// Arrange: two CFGs with same structure but different addresses
var cfg1 = new[]
{
CreateInstruction(0x1000, InstructionKind.ConditionalBranch, "je", [0x74, 0x01],
CreateAddressOperand(0x1003)),
CreateInstruction(0x1002, InstructionKind.Nop, "nop", [0x90]),
CreateInstruction(0x1003, InstructionKind.Return, "ret", [0xC3])
};
var cfg2 = new[]
{
CreateInstruction(0x2000, InstructionKind.ConditionalBranch, "jne", [0x75, 0x01],
CreateAddressOperand(0x2003)),
CreateInstruction(0x2002, InstructionKind.Nop, "nop", [0x90]),
CreateInstruction(0x2003, InstructionKind.Return, "ret", [0xC3])
};
// Act
var metrics1 = CfgExtractor.ComputeMetrics(cfg1);
var metrics2 = CfgExtractor.ComputeMetrics(cfg2);
// Assert: same CFG structure should produce same edge hash
metrics1.EdgeHash.Should().Be(metrics2.EdgeHash);
metrics1.BasicBlockCount.Should().Be(metrics2.BasicBlockCount);
metrics1.EdgeCount.Should().Be(metrics2.EdgeCount);
}
#endregion
#region Call Instruction Tests
[Fact]
public void Extract_CallInstruction_ContinuesToNextBlock()
{
// Arrange: call followed by more code
var instructions = new[]
{
CreateInstruction(0x1000, InstructionKind.Call, "call", [0xE8, 0x00, 0x10, 0x00, 0x00],
CreateAddressOperand(0x2000)),
CreateInstruction(0x1005, InstructionKind.Move, "mov", [0x48, 0x89, 0xC0]),
CreateInstruction(0x1008, InstructionKind.Return, "ret", [0xC3])
};
// Act
var cfg = CfgExtractor.Extract(instructions);
// Assert
cfg.Blocks.Should().HaveCount(2);
cfg.Blocks[0].TerminatorKind.Should().Be(BlockTerminatorKind.Call);
cfg.Blocks[0].Successors.Should().ContainSingle().Which.Should().Be(1);
}
#endregion
#region Unconditional Jump Tests
[Fact]
public void Extract_UnconditionalJump_NoFallthrough()
{
// Arrange: unconditional jump
var instructions = new[]
{
CreateInstruction(0x1000, InstructionKind.Branch, "jmp", [0xEB, 0x02],
CreateAddressOperand(0x1004)),
CreateInstruction(0x1002, InstructionKind.Nop, "nop", [0x90]), // Unreachable
CreateInstruction(0x1003, InstructionKind.Nop, "nop", [0x90]), // Unreachable
CreateInstruction(0x1004, InstructionKind.Return, "ret", [0xC3])
};
// Act
var cfg = CfgExtractor.Extract(instructions);
// Assert
cfg.Blocks.Should().HaveCount(3);
cfg.Blocks[0].TerminatorKind.Should().Be(BlockTerminatorKind.Jump);
cfg.Blocks[0].Successors.Should().ContainSingle().Which.Should().Be(2); // Jump target only
}
#endregion
#region Edge Cases
[Fact]
public void Extract_MultipleExits_TracksAllExitBlocks()
{
// Arrange: multiple return paths
var instructions = new[]
{
CreateInstruction(0x1000, InstructionKind.ConditionalBranch, "je", [0x74, 0x01],
CreateAddressOperand(0x1003)),
CreateInstruction(0x1002, InstructionKind.Return, "ret", [0xC3]), // Exit 1
CreateInstruction(0x1003, InstructionKind.Return, "ret", [0xC3]) // Exit 2
};
// Act
var cfg = CfgExtractor.Extract(instructions);
// Assert
cfg.ExitBlockIds.Should().HaveCount(2);
}
[Fact]
public void Extract_PredecessorsAreCorrect()
{
// Arrange: diamond pattern
// B0 (conditional)
// / \
// B1 B2
// \ /
// B3 (ret)
var instructions = new[]
{
CreateInstruction(0x1000, InstructionKind.ConditionalBranch, "je", [0x74, 0x02],
CreateAddressOperand(0x1004)),
CreateInstruction(0x1002, InstructionKind.Branch, "jmp", [0xEB, 0x02],
CreateAddressOperand(0x1006)),
CreateInstruction(0x1004, InstructionKind.Branch, "jmp", [0xEB, 0x00],
CreateAddressOperand(0x1006)),
CreateInstruction(0x1006, InstructionKind.Return, "ret", [0xC3])
};
// Act
var cfg = CfgExtractor.Extract(instructions);
// Assert
// Last block should have two predecessors
var lastBlock = cfg.Blocks.First(b => b.TerminatorKind == BlockTerminatorKind.Return);
lastBlock.Predecessors.Should().HaveCount(2);
}
#endregion
}