454 lines
15 KiB
C#
454 lines
15 KiB
C#
// Copyright (c) StellaOps. All rights reserved.
|
|
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
|
|
|
using System.Collections.Immutable;
|
|
using FluentAssertions;
|
|
using StellaOps.BinaryIndex.Disassembly;
|
|
using StellaOps.BinaryIndex.Normalization;
|
|
using Xunit;
|
|
|
|
namespace StellaOps.BinaryIndex.DeltaSig.Tests;
|
|
|
|
/// <summary>
|
|
/// Tests for the CFG extractor.
|
|
/// </summary>
|
|
[Trait("Category", "Unit")]
|
|
public sealed class CfgExtractorTests
|
|
{
|
|
#region Helper Methods
|
|
|
|
private static NormalizedInstruction CreateInstruction(
|
|
ulong address,
|
|
InstructionKind kind,
|
|
string mnemonic,
|
|
byte[] bytes,
|
|
params NormalizedOperand[] operands)
|
|
{
|
|
return new NormalizedInstruction
|
|
{
|
|
OriginalAddress = address,
|
|
Kind = kind,
|
|
NormalizedMnemonic = mnemonic,
|
|
NormalizedBytes = [.. bytes],
|
|
Operands = [.. operands]
|
|
};
|
|
}
|
|
|
|
private static NormalizedOperand CreateAddressOperand(long value)
|
|
{
|
|
return new NormalizedOperand
|
|
{
|
|
Type = OperandType.Address,
|
|
Text = $"0x{value:x}",
|
|
Value = value
|
|
};
|
|
}
|
|
|
|
private static NormalizedOperand CreateImmediateOperand(long value)
|
|
{
|
|
return new NormalizedOperand
|
|
{
|
|
Type = OperandType.Immediate,
|
|
Text = $"0x{value:x}",
|
|
Value = value
|
|
};
|
|
}
|
|
|
|
private static NormalizedOperand CreateRegisterOperand(string reg)
|
|
{
|
|
return new NormalizedOperand
|
|
{
|
|
Type = OperandType.Register,
|
|
Text = reg,
|
|
Register = reg
|
|
};
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region Empty Input Tests
|
|
|
|
[Fact]
|
|
public void Extract_EmptyInstructions_ReturnsEmptyCfg()
|
|
{
|
|
// Arrange
|
|
var instructions = Array.Empty<NormalizedInstruction>();
|
|
|
|
// Act
|
|
var cfg = CfgExtractor.Extract(instructions);
|
|
|
|
// Assert
|
|
cfg.Blocks.Should().BeEmpty();
|
|
cfg.EntryBlockId.Should().Be(0);
|
|
cfg.ExitBlockIds.Should().BeEmpty();
|
|
cfg.EdgeCount.Should().Be(0);
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region Single Block Tests
|
|
|
|
[Fact]
|
|
public void Extract_SingleReturnInstruction_CreatesOneBlock()
|
|
{
|
|
// Arrange: ret
|
|
var instructions = new[]
|
|
{
|
|
CreateInstruction(0x1000, InstructionKind.Return, "ret", [0xC3])
|
|
};
|
|
|
|
// Act
|
|
var cfg = CfgExtractor.Extract(instructions);
|
|
|
|
// Assert
|
|
cfg.Blocks.Should().HaveCount(1);
|
|
cfg.Blocks[0].Id.Should().Be(0);
|
|
cfg.Blocks[0].TerminatorKind.Should().Be(BlockTerminatorKind.Return);
|
|
cfg.Blocks[0].Successors.Should().BeEmpty();
|
|
cfg.Blocks[0].Predecessors.Should().BeEmpty();
|
|
cfg.ExitBlockIds.Should().ContainSingle().Which.Should().Be(0);
|
|
}
|
|
|
|
[Fact]
|
|
public void Extract_LinearSequence_CreatesOneBlock()
|
|
{
|
|
// Arrange: mov rax, 0; add rax, 1; ret
|
|
var instructions = new[]
|
|
{
|
|
CreateInstruction(0x1000, InstructionKind.Move, "mov", [0x48, 0xC7, 0xC0, 0x00, 0x00, 0x00, 0x00],
|
|
CreateRegisterOperand("rax"), CreateImmediateOperand(0)),
|
|
CreateInstruction(0x1007, InstructionKind.Arithmetic, "add", [0x48, 0x83, 0xC0, 0x01],
|
|
CreateRegisterOperand("rax"), CreateImmediateOperand(1)),
|
|
CreateInstruction(0x100B, InstructionKind.Return, "ret", [0xC3])
|
|
};
|
|
|
|
// Act
|
|
var cfg = CfgExtractor.Extract(instructions);
|
|
|
|
// Assert
|
|
cfg.Blocks.Should().HaveCount(1);
|
|
cfg.Blocks[0].Instructions.Should().HaveCount(3);
|
|
cfg.Blocks[0].StartAddress.Should().Be(0x1000);
|
|
cfg.Blocks[0].EndAddress.Should().Be(0x100C);
|
|
cfg.EdgeCount.Should().Be(0);
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region Conditional Branch Tests
|
|
|
|
[Fact]
|
|
public void Extract_ConditionalBranch_CreatesTwoBlocks()
|
|
{
|
|
// Arrange: cmp rax, 0; je +4; nop; ret
|
|
// The je jumps over the nop to the ret
|
|
var instructions = new[]
|
|
{
|
|
CreateInstruction(0x1000, InstructionKind.Compare, "cmp", [0x48, 0x83, 0xF8, 0x00],
|
|
CreateRegisterOperand("rax"), CreateImmediateOperand(0)),
|
|
CreateInstruction(0x1004, InstructionKind.ConditionalBranch, "je", [0x74, 0x01],
|
|
CreateAddressOperand(0x1007)), // Jump to ret
|
|
CreateInstruction(0x1006, InstructionKind.Nop, "nop", [0x90]),
|
|
CreateInstruction(0x1007, InstructionKind.Return, "ret", [0xC3])
|
|
};
|
|
|
|
// Act
|
|
var cfg = CfgExtractor.Extract(instructions);
|
|
|
|
// Assert
|
|
cfg.Blocks.Should().HaveCount(3);
|
|
|
|
// Block 0: cmp + je
|
|
cfg.Blocks[0].Instructions.Should().HaveCount(2);
|
|
cfg.Blocks[0].TerminatorKind.Should().Be(BlockTerminatorKind.ConditionalBranch);
|
|
cfg.Blocks[0].Successors.Should().Contain(1); // Fall through to nop
|
|
cfg.Blocks[0].Successors.Should().Contain(2); // Jump to ret
|
|
|
|
// Block 1: nop
|
|
cfg.Blocks[1].Instructions.Should().HaveCount(1);
|
|
cfg.Blocks[1].TerminatorKind.Should().Be(BlockTerminatorKind.FallThrough);
|
|
cfg.Blocks[1].Successors.Should().ContainSingle().Which.Should().Be(2);
|
|
cfg.Blocks[1].Predecessors.Should().ContainSingle().Which.Should().Be(0);
|
|
|
|
// Block 2: ret
|
|
cfg.Blocks[2].Instructions.Should().HaveCount(1);
|
|
cfg.Blocks[2].TerminatorKind.Should().Be(BlockTerminatorKind.Return);
|
|
cfg.Blocks[2].Successors.Should().BeEmpty();
|
|
}
|
|
|
|
[Fact]
|
|
public void Extract_IfElsePattern_CreatesCorrectBlocks()
|
|
{
|
|
// Arrange: if-else pattern
|
|
// cmp rax, 0
|
|
// je else_label
|
|
// mov rbx, 1 ; then branch
|
|
// jmp end_label
|
|
// else_label: mov rbx, 2
|
|
// end_label: ret
|
|
var instructions = new[]
|
|
{
|
|
CreateInstruction(0x1000, InstructionKind.Compare, "cmp", [0x48, 0x83, 0xF8, 0x00]),
|
|
CreateInstruction(0x1004, InstructionKind.ConditionalBranch, "je", [0x74, 0x05],
|
|
CreateAddressOperand(0x100B)), // Jump to else
|
|
CreateInstruction(0x1006, InstructionKind.Move, "mov", [0x48, 0xC7, 0xC3, 0x01, 0x00, 0x00, 0x00]),
|
|
CreateInstruction(0x100D, InstructionKind.Branch, "jmp", [0xEB, 0x07],
|
|
CreateAddressOperand(0x1016)), // Jump to ret
|
|
CreateInstruction(0x100F, InstructionKind.Move, "mov", [0x48, 0xC7, 0xC3, 0x02, 0x00, 0x00, 0x00]),
|
|
CreateInstruction(0x1016, InstructionKind.Return, "ret", [0xC3])
|
|
};
|
|
|
|
// Act
|
|
var cfg = CfgExtractor.Extract(instructions);
|
|
|
|
// Assert
|
|
cfg.Blocks.Should().HaveCount(4);
|
|
cfg.ExitBlockIds.Should().HaveCount(1);
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region Loop Tests
|
|
|
|
[Fact]
|
|
public void Extract_SimpleLoop_CreatesBackEdge()
|
|
{
|
|
// Arrange: simple loop
|
|
// loop_start: dec rax
|
|
// jnz loop_start
|
|
// ret
|
|
var instructions = new[]
|
|
{
|
|
CreateInstruction(0x1000, InstructionKind.Arithmetic, "dec", [0x48, 0xFF, 0xC8],
|
|
CreateRegisterOperand("rax")),
|
|
CreateInstruction(0x1003, InstructionKind.ConditionalBranch, "jnz", [0x75, 0xFB],
|
|
CreateAddressOperand(0x1000)), // Jump back to dec
|
|
CreateInstruction(0x1005, InstructionKind.Return, "ret", [0xC3])
|
|
};
|
|
|
|
// Act
|
|
var cfg = CfgExtractor.Extract(instructions);
|
|
|
|
// Assert
|
|
cfg.Blocks.Should().HaveCount(2);
|
|
|
|
// Block 0: dec + jnz (loops back to itself)
|
|
cfg.Blocks[0].TerminatorKind.Should().Be(BlockTerminatorKind.ConditionalBranch);
|
|
cfg.Blocks[0].Successors.Should().Contain(0); // Back edge to itself
|
|
cfg.Blocks[0].Successors.Should().Contain(1); // Fall through to ret
|
|
|
|
// Block 1: ret
|
|
cfg.Blocks[1].TerminatorKind.Should().Be(BlockTerminatorKind.Return);
|
|
cfg.Blocks[1].Predecessors.Should().ContainSingle().Which.Should().Be(0);
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region CFG Metrics Tests
|
|
|
|
[Fact]
|
|
public void ComputeMetrics_LinearCode_HasCorrectMetrics()
|
|
{
|
|
// Arrange: linear code with no branches
|
|
var instructions = new[]
|
|
{
|
|
CreateInstruction(0x1000, InstructionKind.Move, "mov", [0x48, 0x89, 0xC0]),
|
|
CreateInstruction(0x1003, InstructionKind.Arithmetic, "add", [0x48, 0x83, 0xC0, 0x01]),
|
|
CreateInstruction(0x1007, InstructionKind.Return, "ret", [0xC3])
|
|
};
|
|
|
|
// Act
|
|
var metrics = CfgExtractor.ComputeMetrics(instructions);
|
|
|
|
// Assert
|
|
metrics.BasicBlockCount.Should().Be(1);
|
|
metrics.EdgeCount.Should().Be(0);
|
|
metrics.CyclomaticComplexity.Should().Be(1); // edges - nodes + 2 = 0 - 1 + 2 = 1
|
|
metrics.EdgeHash.Should().NotBeNullOrEmpty();
|
|
}
|
|
|
|
[Fact]
|
|
public void ComputeMetrics_IfStatement_HasCorrectComplexity()
|
|
{
|
|
// Arrange: simple if with two paths
|
|
var instructions = new[]
|
|
{
|
|
CreateInstruction(0x1000, InstructionKind.Compare, "cmp", [0x48, 0x83, 0xF8, 0x00]),
|
|
CreateInstruction(0x1004, InstructionKind.ConditionalBranch, "je", [0x74, 0x01],
|
|
CreateAddressOperand(0x1007)),
|
|
CreateInstruction(0x1006, InstructionKind.Nop, "nop", [0x90]),
|
|
CreateInstruction(0x1007, InstructionKind.Return, "ret", [0xC3])
|
|
};
|
|
|
|
// Act
|
|
var metrics = CfgExtractor.ComputeMetrics(instructions);
|
|
|
|
// Assert
|
|
metrics.BasicBlockCount.Should().Be(3);
|
|
// Block 0 -> Block 1 (fallthrough), Block 0 -> Block 2 (branch), Block 1 -> Block 2 (fallthrough)
|
|
metrics.EdgeCount.Should().Be(3);
|
|
metrics.CyclomaticComplexity.Should().Be(2); // 3 - 3 + 2 = 2
|
|
}
|
|
|
|
[Fact]
|
|
public void ComputeMetrics_DifferentCfgs_HaveDifferentEdgeHashes()
|
|
{
|
|
// Arrange: two different CFGs
|
|
var linearCode = new[]
|
|
{
|
|
CreateInstruction(0x1000, InstructionKind.Move, "mov", [0x48, 0x89, 0xC0]),
|
|
CreateInstruction(0x1003, InstructionKind.Return, "ret", [0xC3])
|
|
};
|
|
|
|
var branchingCode = new[]
|
|
{
|
|
CreateInstruction(0x1000, InstructionKind.ConditionalBranch, "je", [0x74, 0x01],
|
|
CreateAddressOperand(0x1003)),
|
|
CreateInstruction(0x1002, InstructionKind.Nop, "nop", [0x90]),
|
|
CreateInstruction(0x1003, InstructionKind.Return, "ret", [0xC3])
|
|
};
|
|
|
|
// Act
|
|
var linearMetrics = CfgExtractor.ComputeMetrics(linearCode);
|
|
var branchingMetrics = CfgExtractor.ComputeMetrics(branchingCode);
|
|
|
|
// Assert
|
|
linearMetrics.EdgeHash.Should().NotBe(branchingMetrics.EdgeHash);
|
|
}
|
|
|
|
[Fact]
|
|
public void ComputeMetrics_SameCfgStructure_HasSameEdgeHash()
|
|
{
|
|
// Arrange: two CFGs with same structure but different addresses
|
|
var cfg1 = new[]
|
|
{
|
|
CreateInstruction(0x1000, InstructionKind.ConditionalBranch, "je", [0x74, 0x01],
|
|
CreateAddressOperand(0x1003)),
|
|
CreateInstruction(0x1002, InstructionKind.Nop, "nop", [0x90]),
|
|
CreateInstruction(0x1003, InstructionKind.Return, "ret", [0xC3])
|
|
};
|
|
|
|
var cfg2 = new[]
|
|
{
|
|
CreateInstruction(0x2000, InstructionKind.ConditionalBranch, "jne", [0x75, 0x01],
|
|
CreateAddressOperand(0x2003)),
|
|
CreateInstruction(0x2002, InstructionKind.Nop, "nop", [0x90]),
|
|
CreateInstruction(0x2003, InstructionKind.Return, "ret", [0xC3])
|
|
};
|
|
|
|
// Act
|
|
var metrics1 = CfgExtractor.ComputeMetrics(cfg1);
|
|
var metrics2 = CfgExtractor.ComputeMetrics(cfg2);
|
|
|
|
// Assert: same CFG structure should produce same edge hash
|
|
metrics1.EdgeHash.Should().Be(metrics2.EdgeHash);
|
|
metrics1.BasicBlockCount.Should().Be(metrics2.BasicBlockCount);
|
|
metrics1.EdgeCount.Should().Be(metrics2.EdgeCount);
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region Call Instruction Tests
|
|
|
|
[Fact]
|
|
public void Extract_CallInstruction_ContinuesToNextBlock()
|
|
{
|
|
// Arrange: call followed by more code
|
|
var instructions = new[]
|
|
{
|
|
CreateInstruction(0x1000, InstructionKind.Call, "call", [0xE8, 0x00, 0x10, 0x00, 0x00],
|
|
CreateAddressOperand(0x2000)),
|
|
CreateInstruction(0x1005, InstructionKind.Move, "mov", [0x48, 0x89, 0xC0]),
|
|
CreateInstruction(0x1008, InstructionKind.Return, "ret", [0xC3])
|
|
};
|
|
|
|
// Act
|
|
var cfg = CfgExtractor.Extract(instructions);
|
|
|
|
// Assert
|
|
cfg.Blocks.Should().HaveCount(2);
|
|
cfg.Blocks[0].TerminatorKind.Should().Be(BlockTerminatorKind.Call);
|
|
cfg.Blocks[0].Successors.Should().ContainSingle().Which.Should().Be(1);
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region Unconditional Jump Tests
|
|
|
|
[Fact]
|
|
public void Extract_UnconditionalJump_NoFallthrough()
|
|
{
|
|
// Arrange: unconditional jump
|
|
var instructions = new[]
|
|
{
|
|
CreateInstruction(0x1000, InstructionKind.Branch, "jmp", [0xEB, 0x02],
|
|
CreateAddressOperand(0x1004)),
|
|
CreateInstruction(0x1002, InstructionKind.Nop, "nop", [0x90]), // Unreachable
|
|
CreateInstruction(0x1003, InstructionKind.Nop, "nop", [0x90]), // Unreachable
|
|
CreateInstruction(0x1004, InstructionKind.Return, "ret", [0xC3])
|
|
};
|
|
|
|
// Act
|
|
var cfg = CfgExtractor.Extract(instructions);
|
|
|
|
// Assert
|
|
cfg.Blocks.Should().HaveCount(3);
|
|
cfg.Blocks[0].TerminatorKind.Should().Be(BlockTerminatorKind.Jump);
|
|
cfg.Blocks[0].Successors.Should().ContainSingle().Which.Should().Be(2); // Jump target only
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region Edge Cases
|
|
|
|
[Fact]
|
|
public void Extract_MultipleExits_TracksAllExitBlocks()
|
|
{
|
|
// Arrange: multiple return paths
|
|
var instructions = new[]
|
|
{
|
|
CreateInstruction(0x1000, InstructionKind.ConditionalBranch, "je", [0x74, 0x01],
|
|
CreateAddressOperand(0x1003)),
|
|
CreateInstruction(0x1002, InstructionKind.Return, "ret", [0xC3]), // Exit 1
|
|
CreateInstruction(0x1003, InstructionKind.Return, "ret", [0xC3]) // Exit 2
|
|
};
|
|
|
|
// Act
|
|
var cfg = CfgExtractor.Extract(instructions);
|
|
|
|
// Assert
|
|
cfg.ExitBlockIds.Should().HaveCount(2);
|
|
}
|
|
|
|
[Fact]
|
|
public void Extract_PredecessorsAreCorrect()
|
|
{
|
|
// Arrange: diamond pattern
|
|
// B0 (conditional)
|
|
// / \
|
|
// B1 B2
|
|
// \ /
|
|
// B3 (ret)
|
|
var instructions = new[]
|
|
{
|
|
CreateInstruction(0x1000, InstructionKind.ConditionalBranch, "je", [0x74, 0x02],
|
|
CreateAddressOperand(0x1004)),
|
|
CreateInstruction(0x1002, InstructionKind.Branch, "jmp", [0xEB, 0x02],
|
|
CreateAddressOperand(0x1006)),
|
|
CreateInstruction(0x1004, InstructionKind.Branch, "jmp", [0xEB, 0x00],
|
|
CreateAddressOperand(0x1006)),
|
|
CreateInstruction(0x1006, InstructionKind.Return, "ret", [0xC3])
|
|
};
|
|
|
|
// Act
|
|
var cfg = CfgExtractor.Extract(instructions);
|
|
|
|
// Assert
|
|
// Last block should have two predecessors
|
|
var lastBlock = cfg.Blocks.First(b => b.TerminatorKind == BlockTerminatorKind.Return);
|
|
lastBlock.Predecessors.Should().HaveCount(2);
|
|
}
|
|
|
|
#endregion
|
|
}
|