Files
git.stella-ops.org/src/BinaryIndex/__Libraries/StellaOps.BinaryIndex.Disassembly.B2R2/B2R2LowUirLiftingService.cs
2026-01-14 18:39:19 +02:00

698 lines
24 KiB
C#

// Copyright (c) StellaOps. All rights reserved.
// Licensed under AGPL-3.0-or-later. See LICENSE in the project root.
// Sprint: SPRINT_20260112_004_BINIDX_b2r2_lowuir_perf_cache (BINIDX-LIR-01)
// Task: Implement B2R2 LowUIR adapter for IIrLiftingService
using System.Collections.Immutable;
using System.Globalization;
using B2R2;
using B2R2.FrontEnd;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Disassembly;
using StellaOps.BinaryIndex.Semantic;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
/// <summary>
/// B2R2 LowUIR adapter for the IR lifting service.
/// Maps B2R2 BinIR/LowUIR statements to the StellaOps IR model
/// with deterministic ordering and invariant formatting.
/// </summary>
public sealed class B2R2LowUirLiftingService : IIrLiftingService
{
private readonly ILogger<B2R2LowUirLiftingService> _logger;
/// <summary>
/// Version string for cache key generation.
/// </summary>
public const string AdapterVersion = "1.0.0";
private static readonly ImmutableHashSet<CpuArchitecture> SupportedArchitectures =
[
CpuArchitecture.X86,
CpuArchitecture.X86_64,
CpuArchitecture.ARM32,
CpuArchitecture.ARM64,
CpuArchitecture.MIPS32,
CpuArchitecture.MIPS64,
CpuArchitecture.RISCV64,
CpuArchitecture.PPC32,
CpuArchitecture.SPARC
];
public B2R2LowUirLiftingService(ILogger<B2R2LowUirLiftingService> logger)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public bool SupportsArchitecture(CpuArchitecture architecture) =>
SupportedArchitectures.Contains(architecture);
/// <inheritdoc />
public Task<LiftedFunction> LiftToIrAsync(
IReadOnlyList<DisassembledInstruction> instructions,
string functionName,
ulong startAddress,
CpuArchitecture architecture,
LiftOptions? options = null,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(instructions);
ct.ThrowIfCancellationRequested();
options ??= LiftOptions.Default;
if (!SupportsArchitecture(architecture))
{
throw new NotSupportedException(
$"Architecture {architecture} is not supported for B2R2 LowUIR lifting.");
}
_logger.LogDebug(
"B2R2 LowUIR lifting {InstructionCount} instructions for function {FunctionName} ({Architecture})",
instructions.Count,
functionName,
architecture);
var isa = MapToB2R2Isa(architecture);
var statements = new List<IrStatement>();
var basicBlocks = new List<IrBasicBlock>();
var currentBlockStatements = new List<int>();
var blockStartAddress = startAddress;
var statementId = 0;
var blockId = 0;
var effectiveMaxInstructions = options.MaxInstructions > 0
? options.MaxInstructions
: int.MaxValue;
foreach (var instr in instructions.Take(effectiveMaxInstructions))
{
ct.ThrowIfCancellationRequested();
// Lift instruction to B2R2 LowUIR
var liftedStatements = LiftInstructionToLowUir(isa, instr, ref statementId);
statements.AddRange(liftedStatements);
foreach (var stmt in liftedStatements)
{
currentBlockStatements.Add(stmt.Id);
}
// Check for block-ending instructions
if (IsBlockTerminator(instr))
{
var endAddress = instr.Address + (ulong)instr.RawBytes.Length;
var block = new IrBasicBlock(
Id: blockId,
Label: string.Format(CultureInfo.InvariantCulture, "bb_{0}", blockId),
StartAddress: blockStartAddress,
EndAddress: endAddress,
StatementIds: [.. currentBlockStatements],
Predecessors: ImmutableArray<int>.Empty,
Successors: ImmutableArray<int>.Empty);
basicBlocks.Add(block);
blockId++;
currentBlockStatements.Clear();
blockStartAddress = endAddress;
}
}
// Handle trailing statements not yet in a block
if (currentBlockStatements.Count > 0 && instructions.Count > 0)
{
var lastInstr = instructions[^1];
var endAddress = lastInstr.Address + (ulong)lastInstr.RawBytes.Length;
var block = new IrBasicBlock(
Id: blockId,
Label: string.Format(CultureInfo.InvariantCulture, "bb_{0}", blockId),
StartAddress: blockStartAddress,
EndAddress: endAddress,
StatementIds: [.. currentBlockStatements],
Predecessors: ImmutableArray<int>.Empty,
Successors: ImmutableArray<int>.Empty);
basicBlocks.Add(block);
}
// Build CFG edges deterministically (sorted by address)
var (blocksWithEdges, edges) = BuildCfgEdges([.. basicBlocks]);
var cfg = new ControlFlowGraph(
EntryBlockId: blocksWithEdges.Length > 0 ? 0 : -1,
ExitBlockIds: FindExitBlocks(blocksWithEdges),
Edges: edges);
var lifted = new LiftedFunction(
Name: functionName,
Address: startAddress,
Statements: [.. statements],
BasicBlocks: blocksWithEdges,
Cfg: cfg);
_logger.LogDebug(
"B2R2 LowUIR lifted {StatementCount} statements in {BlockCount} blocks for {FunctionName}",
statements.Count,
blocksWithEdges.Length,
functionName);
return Task.FromResult(lifted);
}
/// <inheritdoc />
public Task<SsaFunction> TransformToSsaAsync(
LiftedFunction lifted,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(lifted);
ct.ThrowIfCancellationRequested();
_logger.LogDebug(
"Transforming {FunctionName} to SSA form ({StatementCount} statements)",
lifted.Name,
lifted.Statements.Length);
// Build SSA form from lifted function
var ssaStatements = new List<SsaStatement>();
var ssaBlocks = new List<SsaBasicBlock>();
var definitions = new Dictionary<SsaVariable, int>();
var uses = new Dictionary<SsaVariable, HashSet<int>>();
var versionCounters = new Dictionary<string, int>();
foreach (var stmt in lifted.Statements)
{
ct.ThrowIfCancellationRequested();
SsaVariable? destVar = null;
var sourceVars = new List<SsaVariable>();
// Process destination
if (stmt.Destination != null)
{
var varName = stmt.Destination.Name ?? "?";
if (!versionCounters.TryGetValue(varName, out var version))
{
version = 0;
}
versionCounters[varName] = version + 1;
destVar = new SsaVariable(
BaseName: varName,
Version: version + 1,
BitSize: stmt.Destination.BitSize,
Kind: MapOperandKindToSsaKind(stmt.Destination.Kind));
definitions[destVar] = stmt.Id;
}
// Process sources
foreach (var src in stmt.Sources)
{
var varName = src.Name ?? "?";
var currentVersion = versionCounters.GetValueOrDefault(varName, 0);
var ssaVar = new SsaVariable(
BaseName: varName,
Version: currentVersion,
BitSize: src.BitSize,
Kind: MapOperandKindToSsaKind(src.Kind));
sourceVars.Add(ssaVar);
if (!uses.ContainsKey(ssaVar))
{
uses[ssaVar] = [];
}
uses[ssaVar].Add(stmt.Id);
}
var ssaStmt = new SsaStatement(
Id: stmt.Id,
Address: stmt.Address,
Kind: stmt.Kind,
Operation: stmt.Operation,
Destination: destVar,
Sources: [.. sourceVars],
PhiSources: null);
ssaStatements.Add(ssaStmt);
}
// Build SSA basic blocks from lifted blocks
foreach (var block in lifted.BasicBlocks)
{
var blockStatements = ssaStatements
.Where(s => block.StatementIds.Contains(s.Id))
.ToImmutableArray();
var ssaBlock = new SsaBasicBlock(
Id: block.Id,
Label: block.Label,
PhiNodes: ImmutableArray<SsaStatement>.Empty,
Statements: blockStatements,
Predecessors: block.Predecessors,
Successors: block.Successors);
ssaBlocks.Add(ssaBlock);
}
var defUse = new DefUseChains(
Definitions: definitions.ToImmutableDictionary(),
Uses: uses.ToImmutableDictionary(
k => k.Key,
v => v.Value.ToImmutableHashSet()));
var ssaFunction = new SsaFunction(
Name: lifted.Name,
Address: lifted.Address,
Statements: [.. ssaStatements],
BasicBlocks: [.. ssaBlocks],
DefUse: defUse);
_logger.LogDebug(
"SSA transformation complete: {StatementCount} SSA statements, {DefCount} definitions",
ssaStatements.Count,
definitions.Count);
return Task.FromResult(ssaFunction);
}
#region B2R2 LowUIR Mapping
private List<IrStatement> LiftInstructionToLowUir(
ISA isa,
DisassembledInstruction instr,
ref int statementId)
{
var statements = new List<IrStatement>();
try
{
// Create B2R2 BinHandle and lifting unit for the ISA
var bytes = instr.RawBytes.ToArray();
var binHandle = new BinHandle(bytes, isa, null, true);
var lifter = binHandle.NewLiftingUnit();
// Lift to LowUIR using B2R2 - returns Stmt[] directly
var liftResult = lifter.LiftInstruction(instr.Address);
if (liftResult == null || liftResult.Length == 0)
{
// Fallback to simple mapping if B2R2 lift fails
statements.Add(CreateFallbackStatement(instr, statementId++));
return statements;
}
// Map each B2R2 LowUIR statement to our IR model
foreach (var b2r2Stmt in liftResult)
{
var irStmt = MapB2R2Statement(b2r2Stmt, instr.Address, ref statementId);
if (irStmt != null)
{
statements.Add(irStmt);
}
}
// Ensure at least one statement per instruction for determinism
if (statements.Count == 0)
{
statements.Add(CreateFallbackStatement(instr, statementId++));
}
}
catch (Exception ex)
{
_logger.LogWarning(
ex,
"B2R2 lift failed for instruction at {Address}: {Mnemonic}",
instr.Address,
instr.Mnemonic);
statements.Add(CreateFallbackStatement(instr, statementId++));
}
return statements;
}
private IrStatement? MapB2R2Statement(object b2r2Stmt, ulong baseAddress, ref int statementId)
{
// B2R2 LowUIR statement types:
// - Put: register assignment
// - Store: memory write
// - Jmp: unconditional jump
// - CJmp: conditional jump
// - InterJmp: indirect jump
// - InterCJmp: indirect conditional jump
// - LMark: label marker
// - SideEffect: side effects (syscall, fence, etc.)
var stmtType = b2r2Stmt.GetType().Name;
var kind = MapB2R2StmtTypeToKind(stmtType);
if (kind == IrStatementKind.Unknown)
{
return null;
}
var (dest, sources) = ExtractOperandsFromB2R2Stmt(b2r2Stmt);
var operation = stmtType;
return new IrStatement(
Id: statementId++,
Address: baseAddress,
Kind: kind,
Operation: operation,
Destination: dest,
Sources: sources,
Metadata: null);
}
private static IrStatementKind MapB2R2StmtTypeToKind(string stmtType) => stmtType switch
{
"Put" => IrStatementKind.Assign,
"Store" => IrStatementKind.Store,
"Jmp" => IrStatementKind.Jump,
"CJmp" => IrStatementKind.ConditionalJump,
"InterJmp" => IrStatementKind.Jump,
"InterCJmp" => IrStatementKind.ConditionalJump,
"LMark" => IrStatementKind.Nop,
"SideEffect" => IrStatementKind.Syscall,
_ => IrStatementKind.Unknown
};
private static (IrOperand? Dest, ImmutableArray<IrOperand> Sources) ExtractOperandsFromB2R2Stmt(object b2r2Stmt)
{
IrOperand? dest = null;
var sources = new List<IrOperand>();
var type = b2r2Stmt.GetType();
// Try to extract destination
var destProp = type.GetProperty("Dest");
if (destProp != null)
{
var destVal = destProp.GetValue(b2r2Stmt);
if (destVal != null)
{
dest = CreateOperandFromB2R2Expr(destVal);
}
}
// Try to extract source/value
var srcProp = type.GetProperty("Value") ?? type.GetProperty("Src");
if (srcProp != null)
{
var srcVal = srcProp.GetValue(b2r2Stmt);
if (srcVal != null)
{
sources.Add(CreateOperandFromB2R2Expr(srcVal));
}
}
// Try to extract condition for conditional jumps
var condProp = type.GetProperty("Cond");
if (condProp != null)
{
var condVal = condProp.GetValue(b2r2Stmt);
if (condVal != null)
{
sources.Add(CreateOperandFromB2R2Expr(condVal));
}
}
return (dest, [.. sources]);
}
private static IrOperand CreateOperandFromB2R2Expr(object expr)
{
var exprType = expr.GetType().Name;
return exprType switch
{
"Var" => new IrOperand(
Kind: IrOperandKind.Register,
Name: GetVarName(expr),
Value: null,
BitSize: GetVarBitWidth(expr),
IsMemory: false),
"TempVar" => new IrOperand(
Kind: IrOperandKind.Temporary,
Name: GetTempVarName(expr),
Value: null,
BitSize: GetVarBitWidth(expr),
IsMemory: false),
"Num" => new IrOperand(
Kind: IrOperandKind.Immediate,
Name: null,
Value: GetNumValueLong(expr),
BitSize: GetNumBitWidth(expr),
IsMemory: false),
"Load" => new IrOperand(
Kind: IrOperandKind.Memory,
Name: "[mem]",
Value: null,
BitSize: GetLoadBitWidth(expr),
IsMemory: true),
_ => new IrOperand(
Kind: IrOperandKind.Unknown,
Name: exprType,
Value: null,
BitSize: 64,
IsMemory: false)
};
}
private static string GetVarName(object varExpr)
{
var nameProp = varExpr.GetType().GetProperty("Name");
return nameProp?.GetValue(varExpr)?.ToString() ?? "?";
}
private static string GetTempVarName(object tempVarExpr)
{
var numProp = tempVarExpr.GetType().GetProperty("N");
var num = numProp?.GetValue(tempVarExpr) ?? 0;
return string.Format(CultureInfo.InvariantCulture, "T{0}", num);
}
private static int GetVarBitWidth(object varExpr)
{
var typeProp = varExpr.GetType().GetProperty("Type");
if (typeProp == null) return 64;
var regType = typeProp.GetValue(varExpr);
var bitSizeProp = regType?.GetType().GetProperty("BitSize");
return (int?)bitSizeProp?.GetValue(regType) ?? 64;
}
private static long GetNumValueLong(object numExpr)
{
var valueProp = numExpr.GetType().GetProperty("Value");
var value = valueProp?.GetValue(numExpr);
return Convert.ToInt64(value, CultureInfo.InvariantCulture);
}
private static int GetNumBitWidth(object numExpr)
{
var typeProp = numExpr.GetType().GetProperty("Type");
if (typeProp == null) return 64;
var numType = typeProp.GetValue(numExpr);
var bitSizeProp = numType?.GetType().GetProperty("BitSize");
return (int?)bitSizeProp?.GetValue(numType) ?? 64;
}
private static int GetLoadBitWidth(object loadExpr)
{
var typeProp = loadExpr.GetType().GetProperty("Type");
if (typeProp == null) return 64;
var loadType = typeProp.GetValue(loadExpr);
var bitSizeProp = loadType?.GetType().GetProperty("BitSize");
return (int?)bitSizeProp?.GetValue(loadType) ?? 64;
}
private static IrStatement CreateFallbackStatement(DisassembledInstruction instr, int id)
{
var sources = instr.Operands.Skip(1)
.Select(op => new IrOperand(
Kind: MapOperandType(op.Type),
Name: op.Text,
Value: op.Value,
BitSize: 64,
IsMemory: op.Type == OperandType.Memory))
.ToImmutableArray();
var dest = instr.Operands.Length > 0
? new IrOperand(
Kind: MapOperandType(instr.Operands[0].Type),
Name: instr.Operands[0].Text,
Value: instr.Operands[0].Value,
BitSize: 64,
IsMemory: instr.Operands[0].Type == OperandType.Memory)
: null;
return new IrStatement(
Id: id,
Address: instr.Address,
Kind: MapMnemonicToKind(instr.Mnemonic),
Operation: instr.Mnemonic,
Destination: dest,
Sources: sources,
Metadata: ImmutableDictionary<string, object>.Empty.Add("fallback", true));
}
private static SsaVariableKind MapOperandKindToSsaKind(IrOperandKind kind) => kind switch
{
IrOperandKind.Register => SsaVariableKind.Register,
IrOperandKind.Temporary => SsaVariableKind.Temporary,
IrOperandKind.Memory => SsaVariableKind.Memory,
IrOperandKind.Immediate => SsaVariableKind.Constant,
_ => SsaVariableKind.Temporary
};
private static IrOperandKind MapOperandType(OperandType type) => type switch
{
OperandType.Register => IrOperandKind.Register,
OperandType.Immediate => IrOperandKind.Immediate,
OperandType.Memory => IrOperandKind.Memory,
OperandType.Address => IrOperandKind.Label,
_ => IrOperandKind.Unknown
};
#endregion
#region Helper Methods
private static ISA MapToB2R2Isa(CpuArchitecture arch) => arch switch
{
CpuArchitecture.X86 => new ISA(Architecture.Intel, WordSize.Bit32),
CpuArchitecture.X86_64 => new ISA(Architecture.Intel, WordSize.Bit64),
CpuArchitecture.ARM32 => new ISA(Architecture.ARMv7, WordSize.Bit32),
CpuArchitecture.ARM64 => new ISA(Architecture.ARMv8, WordSize.Bit64),
CpuArchitecture.MIPS32 => new ISA(Architecture.MIPS, WordSize.Bit32),
CpuArchitecture.MIPS64 => new ISA(Architecture.MIPS, WordSize.Bit64),
CpuArchitecture.RISCV64 => new ISA(Architecture.RISCV, WordSize.Bit64),
CpuArchitecture.PPC32 => new ISA(Architecture.PPC, Endian.Big, WordSize.Bit32),
CpuArchitecture.SPARC => new ISA(Architecture.SPARC, Endian.Big),
_ => throw new NotSupportedException($"Unsupported architecture: {arch}")
};
private static bool IsBlockTerminator(DisassembledInstruction instr)
{
var mnemonic = instr.Mnemonic.ToUpperInvariant();
return mnemonic.StartsWith("J", StringComparison.Ordinal) ||
mnemonic.StartsWith("B", StringComparison.Ordinal) ||
mnemonic == "RET" ||
mnemonic == "RETN" ||
mnemonic == "RETF" ||
mnemonic == "IRET" ||
mnemonic == "SYSRET" ||
mnemonic == "BLR" ||
mnemonic == "BX" ||
mnemonic == "JR";
}
private static IrStatementKind MapMnemonicToKind(string mnemonic)
{
var upper = mnemonic.ToUpperInvariant();
if (upper.StartsWith("MOV", StringComparison.Ordinal) ||
upper.StartsWith("LEA", StringComparison.Ordinal) ||
upper.StartsWith("LDR", StringComparison.Ordinal))
return IrStatementKind.Assign;
if (upper.StartsWith("ADD", StringComparison.Ordinal) ||
upper.StartsWith("SUB", StringComparison.Ordinal) ||
upper.StartsWith("MUL", StringComparison.Ordinal) ||
upper.StartsWith("DIV", StringComparison.Ordinal))
return IrStatementKind.BinaryOp;
if (upper.StartsWith("AND", StringComparison.Ordinal) ||
upper.StartsWith("OR", StringComparison.Ordinal) ||
upper.StartsWith("XOR", StringComparison.Ordinal) ||
upper.StartsWith("SH", StringComparison.Ordinal))
return IrStatementKind.BinaryOp;
if (upper.StartsWith("CMP", StringComparison.Ordinal) ||
upper.StartsWith("TEST", StringComparison.Ordinal))
return IrStatementKind.Compare;
if (upper.StartsWith("J", StringComparison.Ordinal) ||
upper.StartsWith("B", StringComparison.Ordinal))
return IrStatementKind.ConditionalJump;
if (upper == "CALL" || upper == "BL" || upper == "BLX")
return IrStatementKind.Call;
if (upper == "RET" || upper == "RETN" || upper == "BLR")
return IrStatementKind.Return;
if (upper.StartsWith("PUSH", StringComparison.Ordinal) ||
upper.StartsWith("POP", StringComparison.Ordinal) ||
upper.StartsWith("STR", StringComparison.Ordinal))
return IrStatementKind.Store;
if (upper == "NOP")
return IrStatementKind.Nop;
return IrStatementKind.Unknown;
}
private static (ImmutableArray<IrBasicBlock> Blocks, ImmutableArray<CfgEdge> Edges) BuildCfgEdges(
ImmutableArray<IrBasicBlock> blocks)
{
if (blocks.Length == 0)
return (blocks, ImmutableArray<CfgEdge>.Empty);
var result = new IrBasicBlock[blocks.Length];
var edges = new List<CfgEdge>();
for (var i = 0; i < blocks.Length; i++)
{
var block = blocks[i];
var predecessors = new List<int>();
var successors = new List<int>();
// Fall-through successor (next block in sequence)
if (i < blocks.Length - 1)
{
successors.Add(i + 1);
edges.Add(new CfgEdge(
SourceBlockId: i,
TargetBlockId: i + 1,
Kind: CfgEdgeKind.FallThrough,
Condition: null));
}
// Predecessor from fall-through
if (i > 0)
{
predecessors.Add(i - 1);
}
result[i] = block with
{
Predecessors = [.. predecessors.Distinct().OrderBy(x => x)],
Successors = [.. successors.Distinct().OrderBy(x => x)]
};
}
return ([.. result], [.. edges]);
}
private static ImmutableArray<int> FindExitBlocks(ImmutableArray<IrBasicBlock> blocks)
{
return blocks
.Where(b => b.Successors.Length == 0)
.Select(b => b.Id)
.ToImmutableArray();
}
#endregion
}