// Copyright (c) StellaOps. All rights reserved. // Licensed under AGPL-3.0-or-later. See LICENSE in the project root. // Sprint: SPRINT_20260112_004_BINIDX_b2r2_lowuir_perf_cache (BINIDX-LIR-01) // Task: Implement B2R2 LowUIR adapter for IIrLiftingService using System.Collections.Immutable; using System.Globalization; using B2R2; using B2R2.FrontEnd; using Microsoft.Extensions.Logging; using StellaOps.BinaryIndex.Disassembly; using StellaOps.BinaryIndex.Semantic; namespace StellaOps.BinaryIndex.Disassembly.B2R2; /// /// B2R2 LowUIR adapter for the IR lifting service. /// Maps B2R2 BinIR/LowUIR statements to the StellaOps IR model /// with deterministic ordering and invariant formatting. /// public sealed class B2R2LowUirLiftingService : IIrLiftingService { private readonly ILogger _logger; /// /// Version string for cache key generation. /// public const string AdapterVersion = "1.0.0"; private static readonly ImmutableHashSet SupportedArchitectures = [ CpuArchitecture.X86, CpuArchitecture.X86_64, CpuArchitecture.ARM32, CpuArchitecture.ARM64, CpuArchitecture.MIPS32, CpuArchitecture.MIPS64, CpuArchitecture.RISCV64, CpuArchitecture.PPC32, CpuArchitecture.SPARC ]; public B2R2LowUirLiftingService(ILogger logger) { _logger = logger ?? throw new ArgumentNullException(nameof(logger)); } /// public bool SupportsArchitecture(CpuArchitecture architecture) => SupportedArchitectures.Contains(architecture); /// public Task LiftToIrAsync( IReadOnlyList instructions, string functionName, ulong startAddress, CpuArchitecture architecture, LiftOptions? options = null, CancellationToken ct = default) { ArgumentNullException.ThrowIfNull(instructions); ct.ThrowIfCancellationRequested(); options ??= LiftOptions.Default; if (!SupportsArchitecture(architecture)) { throw new NotSupportedException( $"Architecture {architecture} is not supported for B2R2 LowUIR lifting."); } _logger.LogDebug( "B2R2 LowUIR lifting {InstructionCount} instructions for function {FunctionName} ({Architecture})", instructions.Count, functionName, architecture); var isa = MapToB2R2Isa(architecture); var statements = new List(); var basicBlocks = new List(); var currentBlockStatements = new List(); var blockStartAddress = startAddress; var statementId = 0; var blockId = 0; var effectiveMaxInstructions = options.MaxInstructions > 0 ? options.MaxInstructions : int.MaxValue; foreach (var instr in instructions.Take(effectiveMaxInstructions)) { ct.ThrowIfCancellationRequested(); // Lift instruction to B2R2 LowUIR var liftedStatements = LiftInstructionToLowUir(isa, instr, ref statementId); statements.AddRange(liftedStatements); foreach (var stmt in liftedStatements) { currentBlockStatements.Add(stmt.Id); } // Check for block-ending instructions if (IsBlockTerminator(instr)) { var endAddress = instr.Address + (ulong)instr.RawBytes.Length; var block = new IrBasicBlock( Id: blockId, Label: string.Format(CultureInfo.InvariantCulture, "bb_{0}", blockId), StartAddress: blockStartAddress, EndAddress: endAddress, StatementIds: [.. currentBlockStatements], Predecessors: ImmutableArray.Empty, Successors: ImmutableArray.Empty); basicBlocks.Add(block); blockId++; currentBlockStatements.Clear(); blockStartAddress = endAddress; } } // Handle trailing statements not yet in a block if (currentBlockStatements.Count > 0 && instructions.Count > 0) { var lastInstr = instructions[^1]; var endAddress = lastInstr.Address + (ulong)lastInstr.RawBytes.Length; var block = new IrBasicBlock( Id: blockId, Label: string.Format(CultureInfo.InvariantCulture, "bb_{0}", blockId), StartAddress: blockStartAddress, EndAddress: endAddress, StatementIds: [.. currentBlockStatements], Predecessors: ImmutableArray.Empty, Successors: ImmutableArray.Empty); basicBlocks.Add(block); } // Build CFG edges deterministically (sorted by address) var (blocksWithEdges, edges) = BuildCfgEdges([.. basicBlocks]); var cfg = new ControlFlowGraph( EntryBlockId: blocksWithEdges.Length > 0 ? 0 : -1, ExitBlockIds: FindExitBlocks(blocksWithEdges), Edges: edges); var lifted = new LiftedFunction( Name: functionName, Address: startAddress, Statements: [.. statements], BasicBlocks: blocksWithEdges, Cfg: cfg); _logger.LogDebug( "B2R2 LowUIR lifted {StatementCount} statements in {BlockCount} blocks for {FunctionName}", statements.Count, blocksWithEdges.Length, functionName); return Task.FromResult(lifted); } /// public Task TransformToSsaAsync( LiftedFunction lifted, CancellationToken ct = default) { ArgumentNullException.ThrowIfNull(lifted); ct.ThrowIfCancellationRequested(); _logger.LogDebug( "Transforming {FunctionName} to SSA form ({StatementCount} statements)", lifted.Name, lifted.Statements.Length); // Build SSA form from lifted function var ssaStatements = new List(); var ssaBlocks = new List(); var definitions = new Dictionary(); var uses = new Dictionary>(); var versionCounters = new Dictionary(); foreach (var stmt in lifted.Statements) { ct.ThrowIfCancellationRequested(); SsaVariable? destVar = null; var sourceVars = new List(); // Process destination if (stmt.Destination != null) { var varName = stmt.Destination.Name ?? "?"; if (!versionCounters.TryGetValue(varName, out var version)) { version = 0; } versionCounters[varName] = version + 1; destVar = new SsaVariable( BaseName: varName, Version: version + 1, BitSize: stmt.Destination.BitSize, Kind: MapOperandKindToSsaKind(stmt.Destination.Kind)); definitions[destVar] = stmt.Id; } // Process sources foreach (var src in stmt.Sources) { var varName = src.Name ?? "?"; var currentVersion = versionCounters.GetValueOrDefault(varName, 0); var ssaVar = new SsaVariable( BaseName: varName, Version: currentVersion, BitSize: src.BitSize, Kind: MapOperandKindToSsaKind(src.Kind)); sourceVars.Add(ssaVar); if (!uses.ContainsKey(ssaVar)) { uses[ssaVar] = []; } uses[ssaVar].Add(stmt.Id); } var ssaStmt = new SsaStatement( Id: stmt.Id, Address: stmt.Address, Kind: stmt.Kind, Operation: stmt.Operation, Destination: destVar, Sources: [.. sourceVars], PhiSources: null); ssaStatements.Add(ssaStmt); } // Build SSA basic blocks from lifted blocks foreach (var block in lifted.BasicBlocks) { var blockStatements = ssaStatements .Where(s => block.StatementIds.Contains(s.Id)) .ToImmutableArray(); var ssaBlock = new SsaBasicBlock( Id: block.Id, Label: block.Label, PhiNodes: ImmutableArray.Empty, Statements: blockStatements, Predecessors: block.Predecessors, Successors: block.Successors); ssaBlocks.Add(ssaBlock); } var defUse = new DefUseChains( Definitions: definitions.ToImmutableDictionary(), Uses: uses.ToImmutableDictionary( k => k.Key, v => v.Value.ToImmutableHashSet())); var ssaFunction = new SsaFunction( Name: lifted.Name, Address: lifted.Address, Statements: [.. ssaStatements], BasicBlocks: [.. ssaBlocks], DefUse: defUse); _logger.LogDebug( "SSA transformation complete: {StatementCount} SSA statements, {DefCount} definitions", ssaStatements.Count, definitions.Count); return Task.FromResult(ssaFunction); } #region B2R2 LowUIR Mapping private List LiftInstructionToLowUir( ISA isa, DisassembledInstruction instr, ref int statementId) { var statements = new List(); try { // Create B2R2 BinHandle and lifting unit for the ISA var bytes = instr.RawBytes.ToArray(); var binHandle = new BinHandle(bytes, isa, null, true); var lifter = binHandle.NewLiftingUnit(); // Lift to LowUIR using B2R2 - returns Stmt[] directly var liftResult = lifter.LiftInstruction(instr.Address); if (liftResult == null || liftResult.Length == 0) { // Fallback to simple mapping if B2R2 lift fails statements.Add(CreateFallbackStatement(instr, statementId++)); return statements; } // Map each B2R2 LowUIR statement to our IR model foreach (var b2r2Stmt in liftResult) { var irStmt = MapB2R2Statement(b2r2Stmt, instr.Address, ref statementId); if (irStmt != null) { statements.Add(irStmt); } } // Ensure at least one statement per instruction for determinism if (statements.Count == 0) { statements.Add(CreateFallbackStatement(instr, statementId++)); } } catch (Exception ex) { _logger.LogWarning( ex, "B2R2 lift failed for instruction at {Address}: {Mnemonic}", instr.Address, instr.Mnemonic); statements.Add(CreateFallbackStatement(instr, statementId++)); } return statements; } private IrStatement? MapB2R2Statement(object b2r2Stmt, ulong baseAddress, ref int statementId) { // B2R2 LowUIR statement types: // - Put: register assignment // - Store: memory write // - Jmp: unconditional jump // - CJmp: conditional jump // - InterJmp: indirect jump // - InterCJmp: indirect conditional jump // - LMark: label marker // - SideEffect: side effects (syscall, fence, etc.) var stmtType = b2r2Stmt.GetType().Name; var kind = MapB2R2StmtTypeToKind(stmtType); if (kind == IrStatementKind.Unknown) { return null; } var (dest, sources) = ExtractOperandsFromB2R2Stmt(b2r2Stmt); var operation = stmtType; return new IrStatement( Id: statementId++, Address: baseAddress, Kind: kind, Operation: operation, Destination: dest, Sources: sources, Metadata: null); } private static IrStatementKind MapB2R2StmtTypeToKind(string stmtType) => stmtType switch { "Put" => IrStatementKind.Assign, "Store" => IrStatementKind.Store, "Jmp" => IrStatementKind.Jump, "CJmp" => IrStatementKind.ConditionalJump, "InterJmp" => IrStatementKind.Jump, "InterCJmp" => IrStatementKind.ConditionalJump, "LMark" => IrStatementKind.Nop, "SideEffect" => IrStatementKind.Syscall, _ => IrStatementKind.Unknown }; private static (IrOperand? Dest, ImmutableArray Sources) ExtractOperandsFromB2R2Stmt(object b2r2Stmt) { IrOperand? dest = null; var sources = new List(); var type = b2r2Stmt.GetType(); // Try to extract destination var destProp = type.GetProperty("Dest"); if (destProp != null) { var destVal = destProp.GetValue(b2r2Stmt); if (destVal != null) { dest = CreateOperandFromB2R2Expr(destVal); } } // Try to extract source/value var srcProp = type.GetProperty("Value") ?? type.GetProperty("Src"); if (srcProp != null) { var srcVal = srcProp.GetValue(b2r2Stmt); if (srcVal != null) { sources.Add(CreateOperandFromB2R2Expr(srcVal)); } } // Try to extract condition for conditional jumps var condProp = type.GetProperty("Cond"); if (condProp != null) { var condVal = condProp.GetValue(b2r2Stmt); if (condVal != null) { sources.Add(CreateOperandFromB2R2Expr(condVal)); } } return (dest, [.. sources]); } private static IrOperand CreateOperandFromB2R2Expr(object expr) { var exprType = expr.GetType().Name; return exprType switch { "Var" => new IrOperand( Kind: IrOperandKind.Register, Name: GetVarName(expr), Value: null, BitSize: GetVarBitWidth(expr), IsMemory: false), "TempVar" => new IrOperand( Kind: IrOperandKind.Temporary, Name: GetTempVarName(expr), Value: null, BitSize: GetVarBitWidth(expr), IsMemory: false), "Num" => new IrOperand( Kind: IrOperandKind.Immediate, Name: null, Value: GetNumValueLong(expr), BitSize: GetNumBitWidth(expr), IsMemory: false), "Load" => new IrOperand( Kind: IrOperandKind.Memory, Name: "[mem]", Value: null, BitSize: GetLoadBitWidth(expr), IsMemory: true), _ => new IrOperand( Kind: IrOperandKind.Unknown, Name: exprType, Value: null, BitSize: 64, IsMemory: false) }; } private static string GetVarName(object varExpr) { var nameProp = varExpr.GetType().GetProperty("Name"); return nameProp?.GetValue(varExpr)?.ToString() ?? "?"; } private static string GetTempVarName(object tempVarExpr) { var numProp = tempVarExpr.GetType().GetProperty("N"); var num = numProp?.GetValue(tempVarExpr) ?? 0; return string.Format(CultureInfo.InvariantCulture, "T{0}", num); } private static int GetVarBitWidth(object varExpr) { var typeProp = varExpr.GetType().GetProperty("Type"); if (typeProp == null) return 64; var regType = typeProp.GetValue(varExpr); var bitSizeProp = regType?.GetType().GetProperty("BitSize"); return (int?)bitSizeProp?.GetValue(regType) ?? 64; } private static long GetNumValueLong(object numExpr) { var valueProp = numExpr.GetType().GetProperty("Value"); var value = valueProp?.GetValue(numExpr); return Convert.ToInt64(value, CultureInfo.InvariantCulture); } private static int GetNumBitWidth(object numExpr) { var typeProp = numExpr.GetType().GetProperty("Type"); if (typeProp == null) return 64; var numType = typeProp.GetValue(numExpr); var bitSizeProp = numType?.GetType().GetProperty("BitSize"); return (int?)bitSizeProp?.GetValue(numType) ?? 64; } private static int GetLoadBitWidth(object loadExpr) { var typeProp = loadExpr.GetType().GetProperty("Type"); if (typeProp == null) return 64; var loadType = typeProp.GetValue(loadExpr); var bitSizeProp = loadType?.GetType().GetProperty("BitSize"); return (int?)bitSizeProp?.GetValue(loadType) ?? 64; } private static IrStatement CreateFallbackStatement(DisassembledInstruction instr, int id) { var sources = instr.Operands.Skip(1) .Select(op => new IrOperand( Kind: MapOperandType(op.Type), Name: op.Text, Value: op.Value, BitSize: 64, IsMemory: op.Type == OperandType.Memory)) .ToImmutableArray(); var dest = instr.Operands.Length > 0 ? new IrOperand( Kind: MapOperandType(instr.Operands[0].Type), Name: instr.Operands[0].Text, Value: instr.Operands[0].Value, BitSize: 64, IsMemory: instr.Operands[0].Type == OperandType.Memory) : null; return new IrStatement( Id: id, Address: instr.Address, Kind: MapMnemonicToKind(instr.Mnemonic), Operation: instr.Mnemonic, Destination: dest, Sources: sources, Metadata: ImmutableDictionary.Empty.Add("fallback", true)); } private static SsaVariableKind MapOperandKindToSsaKind(IrOperandKind kind) => kind switch { IrOperandKind.Register => SsaVariableKind.Register, IrOperandKind.Temporary => SsaVariableKind.Temporary, IrOperandKind.Memory => SsaVariableKind.Memory, IrOperandKind.Immediate => SsaVariableKind.Constant, _ => SsaVariableKind.Temporary }; private static IrOperandKind MapOperandType(OperandType type) => type switch { OperandType.Register => IrOperandKind.Register, OperandType.Immediate => IrOperandKind.Immediate, OperandType.Memory => IrOperandKind.Memory, OperandType.Address => IrOperandKind.Label, _ => IrOperandKind.Unknown }; #endregion #region Helper Methods private static ISA MapToB2R2Isa(CpuArchitecture arch) => arch switch { CpuArchitecture.X86 => new ISA(Architecture.Intel, WordSize.Bit32), CpuArchitecture.X86_64 => new ISA(Architecture.Intel, WordSize.Bit64), CpuArchitecture.ARM32 => new ISA(Architecture.ARMv7, WordSize.Bit32), CpuArchitecture.ARM64 => new ISA(Architecture.ARMv8, WordSize.Bit64), CpuArchitecture.MIPS32 => new ISA(Architecture.MIPS, WordSize.Bit32), CpuArchitecture.MIPS64 => new ISA(Architecture.MIPS, WordSize.Bit64), CpuArchitecture.RISCV64 => new ISA(Architecture.RISCV, WordSize.Bit64), CpuArchitecture.PPC32 => new ISA(Architecture.PPC, Endian.Big, WordSize.Bit32), CpuArchitecture.SPARC => new ISA(Architecture.SPARC, Endian.Big), _ => throw new NotSupportedException($"Unsupported architecture: {arch}") }; private static bool IsBlockTerminator(DisassembledInstruction instr) { var mnemonic = instr.Mnemonic.ToUpperInvariant(); return mnemonic.StartsWith("J", StringComparison.Ordinal) || mnemonic.StartsWith("B", StringComparison.Ordinal) || mnemonic == "RET" || mnemonic == "RETN" || mnemonic == "RETF" || mnemonic == "IRET" || mnemonic == "SYSRET" || mnemonic == "BLR" || mnemonic == "BX" || mnemonic == "JR"; } private static IrStatementKind MapMnemonicToKind(string mnemonic) { var upper = mnemonic.ToUpperInvariant(); if (upper.StartsWith("MOV", StringComparison.Ordinal) || upper.StartsWith("LEA", StringComparison.Ordinal) || upper.StartsWith("LDR", StringComparison.Ordinal)) return IrStatementKind.Assign; if (upper.StartsWith("ADD", StringComparison.Ordinal) || upper.StartsWith("SUB", StringComparison.Ordinal) || upper.StartsWith("MUL", StringComparison.Ordinal) || upper.StartsWith("DIV", StringComparison.Ordinal)) return IrStatementKind.BinaryOp; if (upper.StartsWith("AND", StringComparison.Ordinal) || upper.StartsWith("OR", StringComparison.Ordinal) || upper.StartsWith("XOR", StringComparison.Ordinal) || upper.StartsWith("SH", StringComparison.Ordinal)) return IrStatementKind.BinaryOp; if (upper.StartsWith("CMP", StringComparison.Ordinal) || upper.StartsWith("TEST", StringComparison.Ordinal)) return IrStatementKind.Compare; if (upper.StartsWith("J", StringComparison.Ordinal) || upper.StartsWith("B", StringComparison.Ordinal)) return IrStatementKind.ConditionalJump; if (upper == "CALL" || upper == "BL" || upper == "BLX") return IrStatementKind.Call; if (upper == "RET" || upper == "RETN" || upper == "BLR") return IrStatementKind.Return; if (upper.StartsWith("PUSH", StringComparison.Ordinal) || upper.StartsWith("POP", StringComparison.Ordinal) || upper.StartsWith("STR", StringComparison.Ordinal)) return IrStatementKind.Store; if (upper == "NOP") return IrStatementKind.Nop; return IrStatementKind.Unknown; } private static (ImmutableArray Blocks, ImmutableArray Edges) BuildCfgEdges( ImmutableArray blocks) { if (blocks.Length == 0) return (blocks, ImmutableArray.Empty); var result = new IrBasicBlock[blocks.Length]; var edges = new List(); for (var i = 0; i < blocks.Length; i++) { var block = blocks[i]; var predecessors = new List(); var successors = new List(); // Fall-through successor (next block in sequence) if (i < blocks.Length - 1) { successors.Add(i + 1); edges.Add(new CfgEdge( SourceBlockId: i, TargetBlockId: i + 1, Kind: CfgEdgeKind.FallThrough, Condition: null)); } // Predecessor from fall-through if (i > 0) { predecessors.Add(i - 1); } result[i] = block with { Predecessors = [.. predecessors.Distinct().OrderBy(x => x)], Successors = [.. successors.Distinct().OrderBy(x => x)] }; } return ([.. result], [.. edges]); } private static ImmutableArray FindExitBlocks(ImmutableArray blocks) { return blocks .Where(b => b.Successors.Length == 0) .Select(b => b.Id) .ToImmutableArray(); } #endregion }