This commit is contained in:
master
2026-02-04 19:59:20 +02:00
parent 557feefdc3
commit 5548cf83bf
1479 changed files with 53557 additions and 40339 deletions

View File

@@ -23,6 +23,7 @@
<ProjectReference Include="../__Libraries/StellaOps.BinaryIndex.VexBridge/StellaOps.BinaryIndex.VexBridge.csproj" />
<ProjectReference Include="../__Libraries/StellaOps.BinaryIndex.GoldenSet/StellaOps.BinaryIndex.GoldenSet.csproj" />
<ProjectReference Include="../__Libraries/StellaOps.BinaryIndex.Disassembly.B2R2/StellaOps.BinaryIndex.Disassembly.B2R2.csproj" />
<ProjectReference Include="../../Authority/StellaOps.Authority/StellaOps.Auth.ServerIntegration/StellaOps.Auth.ServerIntegration.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,91 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
public sealed partial class AstComparisonEngine
{
private static void CompareNodes(AstNode a, AstNode b, List<CodeDifference> differences)
{
if (a.Type != b.Type)
{
differences.Add(new CodeDifference(
DifferenceType.Modified,
a,
b,
$"Node type changed: {a.Type} -> {b.Type}"));
return;
}
switch (a)
{
case VariableNode varA when b is VariableNode varB:
if (varA.Name != varB.Name)
{
differences.Add(new CodeDifference(
DifferenceType.Modified,
a,
b,
$"Variable renamed: {varA.Name} -> {varB.Name}"));
}
break;
case ConstantNode constA when b is ConstantNode constB:
if (constA.Value?.ToString() != constB.Value?.ToString())
{
differences.Add(new CodeDifference(
DifferenceType.Modified,
a,
b,
$"Constant changed: {constA.Value} -> {constB.Value}"));
}
break;
case BinaryOpNode binA when b is BinaryOpNode binB:
if (binA.Operator != binB.Operator)
{
differences.Add(new CodeDifference(
DifferenceType.Modified,
a,
b,
$"Operator changed: {binA.Operator} -> {binB.Operator}"));
}
break;
case CallNode callA when b is CallNode callB:
if (callA.FunctionName != callB.FunctionName)
{
differences.Add(new CodeDifference(
DifferenceType.Modified,
a,
b,
$"Function call changed: {callA.FunctionName} -> {callB.FunctionName}"));
}
break;
}
var minChildren = Math.Min(a.Children.Length, b.Children.Length);
for (var i = 0; i < minChildren; i++)
{
CompareNodes(a.Children[i], b.Children[i], differences);
}
for (var i = minChildren; i < a.Children.Length; i++)
{
differences.Add(new CodeDifference(
DifferenceType.Removed,
a.Children[i],
null,
$"Node removed: {a.Children[i].Type}"));
}
for (var i = minChildren; i < b.Children.Length; i++)
{
differences.Add(new CodeDifference(
DifferenceType.Added,
null,
b.Children[i],
$"Node added: {b.Children[i].Type}"));
}
}
}

View File

@@ -0,0 +1,37 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
public sealed partial class AstComparisonEngine
{
private static EditOperations ComputeTreeEditOperations(AstNode a, AstNode b)
{
if (a.Type != b.Type)
{
return new EditOperations(0, 0, 1, 1);
}
var childrenA = a.Children;
var childrenB = b.Children;
var insertions = 0;
var deletions = 0;
var modifications = 0;
var minLen = Math.Min(childrenA.Length, childrenB.Length);
insertions = childrenB.Length - minLen;
deletions = childrenA.Length - minLen;
for (var i = 0; i < minLen; i++)
{
var childOps = ComputeTreeEditOperations(childrenA[i], childrenB[i]);
insertions += childOps.Insertions;
deletions += childOps.Deletions;
modifications += childOps.Modifications;
}
return new EditOperations(insertions, deletions, modifications, insertions + deletions + modifications);
}
private readonly record struct EditOperations(int Insertions, int Deletions, int Modifications, int TotalOperations);
}

View File

@@ -0,0 +1,104 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
public sealed partial class AstComparisonEngine
{
private static bool AreNodesRenamed(AstNode a, AstNode b)
{
if (a.Type != b.Type || a.Children.Length != b.Children.Length)
{
return false;
}
if (a is VariableNode && b is VariableNode)
{
return true;
}
for (var i = 0; i < a.Children.Length; i++)
{
if (!AreNodesRenamed(a.Children[i], b.Children[i]) &&
!AreNodesIdentical(a.Children[i], b.Children[i]))
{
return false;
}
}
return true;
}
private static bool AreOptimizationVariants(AstNode a, AstNode b)
{
if (a.Type == AstNodeType.For && b.Type == AstNodeType.Block)
{
return true;
}
if (a is BinaryOpNode binA && b is BinaryOpNode binB)
{
if ((binA.Operator == "*" && binB.Operator == "<<") ||
(binA.Operator == "/" && binB.Operator == ">>"))
{
return true;
}
}
if (a.Type == AstNodeType.Call && b.Type == AstNodeType.Block)
{
return true;
}
return false;
}
private static IEnumerable<AstNode> CollectNodes(AstNode root)
{
yield return root;
foreach (var child in root.Children)
{
foreach (var node in CollectNodes(child))
{
yield return node;
}
}
}
private static IEnumerable<SemanticEquivalence> FilterRedundantEquivalences(List<SemanticEquivalence> equivalences)
{
var result = new List<SemanticEquivalence>();
foreach (var eq in equivalences)
{
var isRedundant = equivalences.Any(other =>
other != eq &&
IsAncestor(other.NodeA, eq.NodeA) &&
IsAncestor(other.NodeB, eq.NodeB));
if (!isRedundant)
{
result.Add(eq);
}
}
return result;
}
private static bool IsAncestor(AstNode potential, AstNode node)
{
if (potential == node)
{
return false;
}
foreach (var child in potential.Children)
{
if (child == node || IsAncestor(child, node))
{
return true;
}
}
return false;
}
}

View File

@@ -0,0 +1,76 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
public sealed partial class AstComparisonEngine
{
private static SemanticEquivalence? CheckEquivalence(AstNode a, AstNode b)
{
if (a.Type != b.Type)
{
return null;
}
if (AreNodesIdentical(a, b))
{
return new SemanticEquivalence(a, b, EquivalenceType.Identical, 1.0m, "Identical nodes");
}
if (AreNodesRenamed(a, b))
{
return new SemanticEquivalence(a, b, EquivalenceType.Renamed, 0.95m, "Same structure with renamed identifiers");
}
if (AreOptimizationVariants(a, b))
{
return new SemanticEquivalence(a, b, EquivalenceType.Optimized, 0.85m, "Optimization variant");
}
return null;
}
private static bool AreNodesIdentical(AstNode a, AstNode b)
{
if (a.Type != b.Type || a.Children.Length != b.Children.Length)
{
return false;
}
if (a is ConstantNode constA && b is ConstantNode constB)
{
return constA.Value?.ToString() == constB.Value?.ToString();
}
if (a is VariableNode varA && b is VariableNode varB)
{
return varA.Name == varB.Name;
}
if (a is BinaryOpNode binA && b is BinaryOpNode binB)
{
if (binA.Operator != binB.Operator)
{
return false;
}
}
if (a is CallNode callA && b is CallNode callB)
{
if (callA.FunctionName != callB.FunctionName)
{
return false;
}
}
for (var i = 0; i < a.Children.Length; i++)
{
if (!AreNodesIdentical(a.Children[i], b.Children[i]))
{
return false;
}
}
return true;
}
}

View File

@@ -1,6 +1,5 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Decompiler;
@@ -8,7 +7,7 @@ namespace StellaOps.BinaryIndex.Decompiler;
/// <summary>
/// Engine for comparing AST structures using tree edit distance and semantic analysis.
/// </summary>
public sealed class AstComparisonEngine : IAstComparisonEngine
public sealed partial class AstComparisonEngine : IAstComparisonEngine
{
/// <inheritdoc />
public decimal ComputeStructuralSimilarity(DecompiledAst a, DecompiledAst b)
@@ -16,7 +15,6 @@ public sealed class AstComparisonEngine : IAstComparisonEngine
ArgumentNullException.ThrowIfNull(a);
ArgumentNullException.ThrowIfNull(b);
// Use normalized tree edit distance
var editDistance = ComputeEditDistance(a, b);
return 1.0m - editDistance.NormalizedDistance;
}
@@ -27,9 +25,7 @@ public sealed class AstComparisonEngine : IAstComparisonEngine
ArgumentNullException.ThrowIfNull(a);
ArgumentNullException.ThrowIfNull(b);
// Simplified Zhang-Shasha tree edit distance
var operations = ComputeTreeEditOperations(a.Root, b.Root);
var totalNodes = Math.Max(a.NodeCount, b.NodeCount);
var normalized = totalNodes > 0
? (decimal)operations.TotalOperations / totalNodes
@@ -50,8 +46,6 @@ public sealed class AstComparisonEngine : IAstComparisonEngine
ArgumentNullException.ThrowIfNull(b);
var equivalences = new List<SemanticEquivalence>();
// Find equivalent subtrees
var nodesA = CollectNodes(a.Root).ToList();
var nodesB = CollectNodes(b.Root).ToList();
@@ -67,7 +61,6 @@ public sealed class AstComparisonEngine : IAstComparisonEngine
}
}
// Remove redundant equivalences (child nodes when parent is equivalent)
return [.. FilterRedundantEquivalences(equivalences)];
}
@@ -78,315 +71,8 @@ public sealed class AstComparisonEngine : IAstComparisonEngine
ArgumentNullException.ThrowIfNull(b);
var differences = new List<CodeDifference>();
// Compare root structures
CompareNodes(a.Root, b.Root, differences);
return [.. differences];
}
private static EditOperations ComputeTreeEditOperations(AstNode a, AstNode b)
{
// Simplified tree comparison
if (a.Type != b.Type)
{
return new EditOperations(0, 0, 1, 1);
}
var childrenA = a.Children;
var childrenB = b.Children;
var insertions = 0;
var deletions = 0;
var modifications = 0;
// Compare children using LCS-like approach
var maxLen = Math.Max(childrenA.Length, childrenB.Length);
var minLen = Math.Min(childrenA.Length, childrenB.Length);
insertions = childrenB.Length - minLen;
deletions = childrenA.Length - minLen;
for (var i = 0; i < minLen; i++)
{
var childOps = ComputeTreeEditOperations(childrenA[i], childrenB[i]);
insertions += childOps.Insertions;
deletions += childOps.Deletions;
modifications += childOps.Modifications;
}
return new EditOperations(insertions, deletions, modifications, insertions + deletions + modifications);
}
private static SemanticEquivalence? CheckEquivalence(AstNode a, AstNode b)
{
// Same type - potential equivalence
if (a.Type != b.Type)
{
return null;
}
// Check for identical
if (AreNodesIdentical(a, b))
{
return new SemanticEquivalence(a, b, EquivalenceType.Identical, 1.0m, "Identical nodes");
}
// Check for renamed (same structure, different names)
if (AreNodesRenamed(a, b))
{
return new SemanticEquivalence(a, b, EquivalenceType.Renamed, 0.95m, "Same structure with renamed identifiers");
}
// Check for optimization variants
if (AreOptimizationVariants(a, b))
{
return new SemanticEquivalence(a, b, EquivalenceType.Optimized, 0.85m, "Optimization variant");
}
return null;
}
private static bool AreNodesIdentical(AstNode a, AstNode b)
{
if (a.Type != b.Type || a.Children.Length != b.Children.Length)
{
return false;
}
// Check node-specific equality
if (a is ConstantNode constA && b is ConstantNode constB)
{
return constA.Value?.ToString() == constB.Value?.ToString();
}
if (a is VariableNode varA && b is VariableNode varB)
{
return varA.Name == varB.Name;
}
if (a is BinaryOpNode binA && b is BinaryOpNode binB)
{
if (binA.Operator != binB.Operator)
{
return false;
}
}
if (a is CallNode callA && b is CallNode callB)
{
if (callA.FunctionName != callB.FunctionName)
{
return false;
}
}
// Check children recursively
for (var i = 0; i < a.Children.Length; i++)
{
if (!AreNodesIdentical(a.Children[i], b.Children[i]))
{
return false;
}
}
return true;
}
private static bool AreNodesRenamed(AstNode a, AstNode b)
{
if (a.Type != b.Type || a.Children.Length != b.Children.Length)
{
return false;
}
// Same structure but variable/parameter names differ
if (a is VariableNode && b is VariableNode)
{
return true; // Different name but same position = renamed
}
// Check children have same structure
for (var i = 0; i < a.Children.Length; i++)
{
if (!AreNodesRenamed(a.Children[i], b.Children[i]) &&
!AreNodesIdentical(a.Children[i], b.Children[i]))
{
return false;
}
}
return true;
}
private static bool AreOptimizationVariants(AstNode a, AstNode b)
{
// Detect common optimization patterns
// Loop unrolling: for loop vs repeated statements
if (a.Type == AstNodeType.For && b.Type == AstNodeType.Block)
{
return true; // Might be unrolled
}
// Strength reduction: multiplication vs addition
if (a is BinaryOpNode binA && b is BinaryOpNode binB)
{
if ((binA.Operator == "*" && binB.Operator == "<<") ||
(binA.Operator == "/" && binB.Operator == ">>"))
{
return true;
}
}
// Inline expansion
if (a.Type == AstNodeType.Call && b.Type == AstNodeType.Block)
{
return true; // Might be inlined
}
return false;
}
private static void CompareNodes(AstNode a, AstNode b, List<CodeDifference> differences)
{
if (a.Type != b.Type)
{
differences.Add(new CodeDifference(
DifferenceType.Modified,
a,
b,
$"Node type changed: {a.Type} -> {b.Type}"));
return;
}
// Compare specific node types
switch (a)
{
case VariableNode varA when b is VariableNode varB:
if (varA.Name != varB.Name)
{
differences.Add(new CodeDifference(
DifferenceType.Modified,
a,
b,
$"Variable renamed: {varA.Name} -> {varB.Name}"));
}
break;
case ConstantNode constA when b is ConstantNode constB:
if (constA.Value?.ToString() != constB.Value?.ToString())
{
differences.Add(new CodeDifference(
DifferenceType.Modified,
a,
b,
$"Constant changed: {constA.Value} -> {constB.Value}"));
}
break;
case BinaryOpNode binA when b is BinaryOpNode binB:
if (binA.Operator != binB.Operator)
{
differences.Add(new CodeDifference(
DifferenceType.Modified,
a,
b,
$"Operator changed: {binA.Operator} -> {binB.Operator}"));
}
break;
case CallNode callA when b is CallNode callB:
if (callA.FunctionName != callB.FunctionName)
{
differences.Add(new CodeDifference(
DifferenceType.Modified,
a,
b,
$"Function call changed: {callA.FunctionName} -> {callB.FunctionName}"));
}
break;
}
// Compare children
var minChildren = Math.Min(a.Children.Length, b.Children.Length);
for (var i = 0; i < minChildren; i++)
{
CompareNodes(a.Children[i], b.Children[i], differences);
}
// Handle added/removed children
for (var i = minChildren; i < a.Children.Length; i++)
{
differences.Add(new CodeDifference(
DifferenceType.Removed,
a.Children[i],
null,
$"Node removed: {a.Children[i].Type}"));
}
for (var i = minChildren; i < b.Children.Length; i++)
{
differences.Add(new CodeDifference(
DifferenceType.Added,
null,
b.Children[i],
$"Node added: {b.Children[i].Type}"));
}
}
private static IEnumerable<AstNode> CollectNodes(AstNode root)
{
yield return root;
foreach (var child in root.Children)
{
foreach (var node in CollectNodes(child))
{
yield return node;
}
}
}
private static IEnumerable<SemanticEquivalence> FilterRedundantEquivalences(
List<SemanticEquivalence> equivalences)
{
// Keep only top-level equivalences
var result = new List<SemanticEquivalence>();
foreach (var eq in equivalences)
{
var isRedundant = equivalences.Any(other =>
other != eq &&
IsAncestor(other.NodeA, eq.NodeA) &&
IsAncestor(other.NodeB, eq.NodeB));
if (!isRedundant)
{
result.Add(eq);
}
}
return result;
}
private static bool IsAncestor(AstNode potential, AstNode node)
{
if (potential == node)
{
return false;
}
foreach (var child in potential.Children)
{
if (child == node || IsAncestor(child, node))
{
return true;
}
}
return false;
}
private readonly record struct EditOperations(int Insertions, int Deletions, int Modifications, int TotalOperations);
}

View File

@@ -0,0 +1,60 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Decompiler;
/// <summary>
/// Abstract syntax tree node.
/// </summary>
public abstract record AstNode(
AstNodeType Type,
ImmutableArray<AstNode> Children,
SourceLocation? Location);
/// <summary>
/// Types of AST nodes.
/// </summary>
public enum AstNodeType
{
// Structure
Function,
Block,
Parameter,
// Control flow
If,
While,
For,
DoWhile,
Switch,
Case,
Default,
Return,
Break,
Continue,
Goto,
Label,
// Expressions
Assignment,
BinaryOp,
UnaryOp,
TernaryOp,
Call,
Cast,
Sizeof,
// Operands
Variable,
Constant,
StringLiteral,
ArrayAccess,
FieldAccess,
PointerDeref,
AddressOf,
// Declarations
VariableDecl,
TypeDef
}

View File

@@ -0,0 +1,29 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
public sealed record IfNode(
AstNode Condition,
AstNode ThenBranch,
AstNode? ElseBranch,
SourceLocation? Location = null)
: AstNode(AstNodeType.If, ElseBranch is null ? [Condition, ThenBranch] : [Condition, ThenBranch, ElseBranch], Location);
public sealed record WhileNode(
AstNode Condition,
AstNode Body,
SourceLocation? Location = null)
: AstNode(AstNodeType.While, [Condition, Body], Location);
public sealed record ForNode(
AstNode? Init,
AstNode? Condition,
AstNode? Update,
AstNode Body,
SourceLocation? Location = null)
: AstNode(AstNodeType.For, [Init ?? EmptyNode.Instance, Condition ?? EmptyNode.Instance, Update ?? EmptyNode.Instance, Body], Location);
public sealed record ReturnNode(
AstNode? Value,
SourceLocation? Location = null)
: AstNode(AstNodeType.Return, Value is null ? [] : [Value], Location);

View File

@@ -0,0 +1,42 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Decompiler;
public sealed record CallNode(
string FunctionName,
ImmutableArray<AstNode> Arguments,
SourceLocation? Location = null)
: AstNode(AstNodeType.Call, Arguments, Location);
public sealed record VariableNode(
string Name,
string? DataType,
SourceLocation? Location = null)
: AstNode(AstNodeType.Variable, [], Location);
public sealed record ConstantNode(
object Value,
string DataType,
SourceLocation? Location = null)
: AstNode(AstNodeType.Constant, [], Location);
public sealed record ArrayAccessNode(
AstNode Array,
AstNode Index,
SourceLocation? Location = null)
: AstNode(AstNodeType.ArrayAccess, [Array, Index], Location);
public sealed record FieldAccessNode(
AstNode Object,
string FieldName,
bool IsPointer,
SourceLocation? Location = null)
: AstNode(AstNodeType.FieldAccess, [Object], Location);
public sealed record CastNode(
AstNode Expression,
string TargetType,
SourceLocation? Location = null)
: AstNode(AstNodeType.Cast, [Expression], Location);

View File

@@ -0,0 +1,24 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
public sealed record AssignmentNode(
AstNode Target,
AstNode Value,
string Operator,
SourceLocation? Location = null)
: AstNode(AstNodeType.Assignment, [Target, Value], Location);
public sealed record BinaryOpNode(
AstNode Left,
AstNode Right,
string Operator,
SourceLocation? Location = null)
: AstNode(AstNodeType.BinaryOp, [Left, Right], Location);
public sealed record UnaryOpNode(
AstNode Operand,
string Operator,
bool IsPrefix,
SourceLocation? Location = null)
: AstNode(AstNodeType.UnaryOp, [Operand], Location);

View File

@@ -0,0 +1,30 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Decompiler;
public sealed record FunctionNode(
string Name,
string ReturnType,
ImmutableArray<ParameterNode> Parameters,
BlockNode Body,
SourceLocation? Location = null)
: AstNode(AstNodeType.Function, [Body, .. Parameters], Location);
public sealed record ParameterNode(
string Name,
string DataType,
int Index,
SourceLocation? Location = null)
: AstNode(AstNodeType.Parameter, [], Location);
public sealed record BlockNode(
ImmutableArray<AstNode> Statements,
SourceLocation? Location = null)
: AstNode(AstNodeType.Block, Statements, Location);
public sealed record EmptyNode() : AstNode(AstNodeType.Block, [], null)
{
public static EmptyNode Instance { get; } = new();
}

View File

@@ -0,0 +1,55 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Decompiler;
/// <summary>
/// A recognized code pattern.
/// </summary>
public sealed record AstPattern(
PatternType Type,
AstNode Node,
PatternMetadata? Metadata);
/// <summary>
/// Types of code patterns.
/// </summary>
public enum PatternType
{
// Loops
CountedLoop,
ConditionalLoop,
InfiniteLoop,
LoopUnrolled,
// Branches
IfElseChain,
SwitchTable,
ShortCircuit,
// Memory
MemoryAllocation,
MemoryDeallocation,
BufferOperation,
StackBuffer,
// Error handling
ErrorCheck,
NullCheck,
BoundsCheck,
// Idioms
StringOperation,
MathOperation,
BitwiseOperation,
TableLookup
}
/// <summary>
/// Metadata about a recognized pattern.
/// </summary>
public sealed record PatternMetadata(
string Description,
decimal Confidence,
ImmutableDictionary<string, string>? Properties);

View File

@@ -0,0 +1,25 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
/// <summary>
/// A difference between two pieces of code.
/// </summary>
public sealed record CodeDifference(
DifferenceType Type,
AstNode? NodeA,
AstNode? NodeB,
string Description);
/// <summary>
/// Types of code differences.
/// </summary>
public enum DifferenceType
{
Added,
Removed,
Modified,
Reordered,
TypeChanged,
OptimizationVariant
}

View File

@@ -0,0 +1,28 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Decompiler;
public sealed partial class CodeNormalizer
{
private static AstNode NormalizeChildren(
AstNode node,
NormalizationOptions options,
Dictionary<string, string> varMap,
ref int varIndex)
{
if (node.Children.IsDefaultOrEmpty)
{
return node;
}
var builder = ImmutableArray.CreateBuilder<AstNode>(node.Children.Length);
foreach (var child in node.Children)
{
builder.Add(NormalizeNode(child, options, varMap, ref varIndex));
}
return node with { Children = builder.MoveToImmutable() };
}
}

View File

@@ -0,0 +1,106 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
public sealed partial class CodeNormalizer
{
/// <inheritdoc />
public DecompiledAst NormalizeAst(DecompiledAst ast, NormalizationOptions? options = null)
{
ArgumentNullException.ThrowIfNull(ast);
options ??= NormalizationOptions.Default;
var varIndex = 0;
var varMap = new Dictionary<string, string>();
var normalizedRoot = NormalizeNode(ast.Root, options, varMap, ref varIndex);
return new DecompiledAst(
normalizedRoot,
ast.NodeCount,
ast.Depth,
ast.Patterns);
}
private static AstNode NormalizeNode(
AstNode node,
NormalizationOptions options,
Dictionary<string, string> varMap,
ref int varIndex)
{
return node switch
{
VariableNode varNode when options.NormalizeVariables =>
NormalizeVariableNode(varNode, varMap, ref varIndex),
CallNode callNode when options.NormalizeFunctionCalls =>
NormalizeCallNode(callNode, options, varMap, ref varIndex),
ConstantNode constNode when options.NormalizeConstants =>
NormalizeConstantNode(constNode),
_ => NormalizeChildren(node, options, varMap, ref varIndex)
};
}
private static AstNode NormalizeVariableNode(
VariableNode node,
Dictionary<string, string> varMap,
ref int varIndex)
{
if (IsKeywordOrType(node.Name))
{
return node;
}
if (!varMap.TryGetValue(node.Name, out var canonical))
{
canonical = $"var_{varIndex++}";
varMap[node.Name] = canonical;
}
return node with { Name = canonical };
}
private static AstNode NormalizeCallNode(
CallNode node,
NormalizationOptions options,
Dictionary<string, string> varMap,
ref int varIndex)
{
var funcName = node.FunctionName;
if (options.KnownFunctions?.Contains(funcName) != true &&
!IsStandardLibraryFunction(funcName))
{
funcName = $"func_{funcName.GetHashCode():X8}";
}
var normalizedArgs = new List<AstNode>(node.Arguments.Length);
foreach (var arg in node.Arguments)
{
normalizedArgs.Add(NormalizeNode(arg, options, varMap, ref varIndex));
}
return new CallNode(funcName, [.. normalizedArgs], node.Location);
}
private static AstNode NormalizeConstantNode(ConstantNode node)
{
if (node.Value is long or int or short or byte)
{
return node with { Value = "CONST_INT" };
}
if (node.Value is double or float or decimal)
{
return node with { Value = "CONST_FLOAT" };
}
if (node.Value is string)
{
return node with { Value = "CONST_STR" };
}
return node;
}
}

View File

@@ -0,0 +1,43 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Decompiler;
public sealed partial class CodeNormalizer
{
private static readonly ImmutableHashSet<string> _cKeywords = ImmutableHashSet.Create(
"auto", "break", "case", "char", "const", "continue", "default", "do",
"double", "else", "enum", "extern", "float", "for", "goto", "if",
"int", "long", "register", "return", "short", "signed", "sizeof", "static",
"struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while",
// Common Ghidra types
"undefined", "undefined1", "undefined2", "undefined4", "undefined8",
"byte", "word", "dword", "qword", "bool", "uchar", "ushort", "uint", "ulong",
"int8_t", "int16_t", "int32_t", "int64_t", "uint8_t", "uint16_t", "uint32_t", "uint64_t",
"size_t", "ssize_t", "ptrdiff_t", "intptr_t", "uintptr_t",
// Common function names to preserve
"NULL", "true", "false"
);
private static bool IsKeywordOrType(string name)
{
return _cKeywords.Contains(name);
}
private static bool IsStandardLibraryFunction(string name)
{
return name switch
{
"malloc" or "calloc" or "realloc" or "free" or "memcpy" or "memmove" or "memset" or "memcmp" => true,
"strlen" or "strcpy" or "strncpy" or "strcat" or "strncat" or "strcmp" or "strncmp" or "strchr" or "strrchr" or "strstr" => true,
"printf" or "fprintf" or "sprintf" or "snprintf" or "scanf" or "fscanf" or "sscanf" => true,
"fopen" or "fclose" or "fread" or "fwrite" or "fseek" or "ftell" or "fflush" => true,
"puts" or "fputs" or "gets" or "fgets" or "putchar" or "getchar" => true,
"abs" or "labs" or "llabs" or "fabs" or "sqrt" or "pow" or "sin" or "cos" or "tan" or "log" or "exp" => true,
"exit" or "abort" or "atexit" or "atoi" or "atol" or "atof" or "strtol" or "strtoul" or "strtod" => true,
"assert" or "errno" => true,
_ => false
};
}
}

View File

@@ -0,0 +1,44 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Text.RegularExpressions;
namespace StellaOps.BinaryIndex.Decompiler;
public sealed partial class CodeNormalizer
{
[GeneratedRegex(@"//[^\n]*")]
private static partial Regex SingleLineCommentRegex();
[GeneratedRegex(@"/\*[\s\S]*?\*/")]
private static partial Regex MultiLineCommentRegex();
[GeneratedRegex(@"\b([a-zA-Z_][a-zA-Z0-9_]*)\b")]
private static partial Regex IdentifierRegex();
[GeneratedRegex(@"\b([a-zA-Z_][a-zA-Z0-9_]*)\s*\(")]
private static partial Regex FunctionCallRegex();
[GeneratedRegex(@"0[xX][0-9a-fA-F]+")]
private static partial Regex HexConstantRegex();
[GeneratedRegex(@"\b[0-9]{4,}\b")]
private static partial Regex LargeDecimalRegex();
[GeneratedRegex(@"""(?:[^""\\]|\\.)*""")]
private static partial Regex StringLiteralRegex();
[GeneratedRegex(@"[ \t]+")]
private static partial Regex MultipleWhitespaceRegex();
[GeneratedRegex(@"\s*([+\-*/%=<>!&|^~?:;,{}()\[\]])\s*")]
private static partial Regex WhitespaceAroundOperatorsRegex();
[GeneratedRegex(@"[ \t]+\n")]
private static partial Regex TrailingWhitespaceRegex();
[GeneratedRegex(@"^([a-zA-Z_][a-zA-Z0-9_]*)\s*=")]
private static partial Regex AssignmentTargetRegex();
[GeneratedRegex(@"^([a-zA-Z_][a-zA-Z0-9_]*)\s*\(")]
private static partial Regex FunctionNameRegex();
}

View File

@@ -0,0 +1,81 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Text;
namespace StellaOps.BinaryIndex.Decompiler;
public sealed partial class CodeNormalizer
{
private static string SortIndependentStatements(string code)
{
var lines = code.Split('\n', StringSplitOptions.RemoveEmptyEntries);
var result = new StringBuilder();
var blockDepth = 0;
var currentBlock = new List<string>();
foreach (var line in lines)
{
var trimmed = line.Trim();
blockDepth += trimmed.Count(c => c == '{');
blockDepth -= trimmed.Count(c => c == '}');
if (blockDepth == 1 && !trimmed.Contains('{') && !trimmed.Contains('}'))
{
currentBlock.Add(trimmed);
}
else
{
if (currentBlock.Count > 0)
{
var sorted = SortStatements(currentBlock);
foreach (var stmt in sorted)
{
result.AppendLine(stmt);
}
currentBlock.Clear();
}
result.AppendLine(line);
}
}
if (currentBlock.Count > 0)
{
var sorted = SortStatements(currentBlock);
foreach (var stmt in sorted)
{
result.AppendLine(stmt);
}
}
return result.ToString().Trim();
}
private static List<string> SortStatements(List<string> statements)
{
return statements
.OrderBy(s => GetStatementSortKey(s), StringComparer.Ordinal)
.ToList();
}
private static string GetStatementSortKey(string statement)
{
var trimmed = statement.Trim();
var assignMatch = AssignmentTargetRegex().Match(trimmed);
if (assignMatch.Success)
{
return $"A_{assignMatch.Groups[1].Value}";
}
var callMatch = FunctionNameRegex().Match(trimmed);
if (callMatch.Success)
{
return $"C_{callMatch.Groups[1].Value}";
}
return $"Z_{trimmed}";
}
}

View File

@@ -0,0 +1,90 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
using System.Text.RegularExpressions;
namespace StellaOps.BinaryIndex.Decompiler;
public sealed partial class CodeNormalizer
{
private static string RemoveComments(string code)
{
code = SingleLineCommentRegex().Replace(code, string.Empty);
code = MultiLineCommentRegex().Replace(code, string.Empty);
return code;
}
private static string NormalizeVariableNames(string code, ImmutableHashSet<string>? knownFunctions)
{
var varIndex = 0;
var varMap = new Dictionary<string, string>();
return IdentifierRegex().Replace(code, match =>
{
var name = match.Value;
if (IsKeywordOrType(name))
{
return name;
}
if (knownFunctions?.Contains(name) == true)
{
return name;
}
if (IsStandardLibraryFunction(name))
{
return name;
}
if (!varMap.TryGetValue(name, out var canonical))
{
canonical = $"var_{varIndex++}";
varMap[name] = canonical;
}
return canonical;
});
}
private static string NormalizeFunctionCalls(string code, ImmutableHashSet<string>? knownFunctions)
{
return FunctionCallRegex().Replace(code, match =>
{
var funcName = match.Groups[1].Value;
if (knownFunctions?.Contains(funcName) == true)
{
return match.Value;
}
if (IsStandardLibraryFunction(funcName))
{
return match.Value;
}
return $"func_{funcName.GetHashCode():X8}(";
});
}
private static string NormalizeConstants(string code)
{
code = HexConstantRegex().Replace(code, "CONST_HEX");
code = LargeDecimalRegex().Replace(code, "CONST_INT");
code = StringLiteralRegex().Replace(code, "CONST_STR");
return code;
}
private static string NormalizeWhitespace(string code)
{
code = MultipleWhitespaceRegex().Replace(code, " ");
code = WhitespaceAroundOperatorsRegex().Replace(code, "$1");
code = code.Replace("\r\n", "\n").Replace("\r", "\n");
code = TrailingWhitespaceRegex().Replace(code, "\n");
return code.Trim();
}
}

View File

@@ -1,10 +1,7 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
namespace StellaOps.BinaryIndex.Decompiler;
@@ -13,20 +10,6 @@ namespace StellaOps.BinaryIndex.Decompiler;
/// </summary>
public sealed partial class CodeNormalizer : ICodeNormalizer
{
private static readonly ImmutableHashSet<string> CKeywords = ImmutableHashSet.Create(
"auto", "break", "case", "char", "const", "continue", "default", "do",
"double", "else", "enum", "extern", "float", "for", "goto", "if",
"int", "long", "register", "return", "short", "signed", "sizeof", "static",
"struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while",
// Common Ghidra types
"undefined", "undefined1", "undefined2", "undefined4", "undefined8",
"byte", "word", "dword", "qword", "bool", "uchar", "ushort", "uint", "ulong",
"int8_t", "int16_t", "int32_t", "int64_t", "uint8_t", "uint16_t", "uint32_t", "uint64_t",
"size_t", "ssize_t", "ptrdiff_t", "intptr_t", "uintptr_t",
// Common function names to preserve
"NULL", "true", "false"
);
/// <inheritdoc />
public string Normalize(string code, NormalizationOptions? options = null)
{
@@ -36,34 +19,28 @@ public sealed partial class CodeNormalizer : ICodeNormalizer
var normalized = code;
// 1. Remove comments
normalized = RemoveComments(normalized);
// 2. Normalize variable names
if (options.NormalizeVariables)
{
normalized = NormalizeVariableNames(normalized, options.KnownFunctions);
}
// 3. Normalize function calls
if (options.NormalizeFunctionCalls)
{
normalized = NormalizeFunctionCalls(normalized, options.KnownFunctions);
}
// 4. Normalize constants
if (options.NormalizeConstants)
{
normalized = NormalizeConstants(normalized);
}
// 5. Normalize whitespace
if (options.NormalizeWhitespace)
{
normalized = NormalizeWhitespace(normalized);
}
// 6. Sort independent statements (within blocks)
if (options.SortIndependentStatements)
{
normalized = SortIndependentStatements(normalized);
@@ -77,458 +54,15 @@ public sealed partial class CodeNormalizer : ICodeNormalizer
{
ArgumentException.ThrowIfNullOrEmpty(code);
// Normalize with full normalization for hashing
var normalized = Normalize(code, new NormalizationOptions
{
NormalizeVariables = true,
NormalizeFunctionCalls = true,
NormalizeConstants = false, // Keep constants for semantic identity
NormalizeConstants = false,
NormalizeWhitespace = true,
SortIndependentStatements = true
});
return SHA256.HashData(Encoding.UTF8.GetBytes(normalized));
}
/// <inheritdoc />
public DecompiledAst NormalizeAst(DecompiledAst ast, NormalizationOptions? options = null)
{
ArgumentNullException.ThrowIfNull(ast);
options ??= NormalizationOptions.Default;
var varIndex = 0;
var varMap = new Dictionary<string, string>();
var normalizedRoot = NormalizeNode(ast.Root, options, varMap, ref varIndex);
return new DecompiledAst(
normalizedRoot,
ast.NodeCount,
ast.Depth,
ast.Patterns);
}
private static AstNode NormalizeNode(
AstNode node,
NormalizationOptions options,
Dictionary<string, string> varMap,
ref int varIndex)
{
return node switch
{
VariableNode varNode when options.NormalizeVariables =>
NormalizeVariableNode(varNode, varMap, ref varIndex),
CallNode callNode when options.NormalizeFunctionCalls =>
NormalizeCallNode(callNode, options, varMap, ref varIndex),
ConstantNode constNode when options.NormalizeConstants =>
NormalizeConstantNode(constNode),
_ => NormalizeChildren(node, options, varMap, ref varIndex)
};
}
private static AstNode NormalizeVariableNode(
VariableNode node,
Dictionary<string, string> varMap,
ref int varIndex)
{
if (IsKeywordOrType(node.Name))
{
return node;
}
if (!varMap.TryGetValue(node.Name, out var canonical))
{
canonical = $"var_{varIndex++}";
varMap[node.Name] = canonical;
}
return node with { Name = canonical };
}
private static AstNode NormalizeCallNode(
CallNode node,
NormalizationOptions options,
Dictionary<string, string> varMap,
ref int varIndex)
{
var funcName = node.FunctionName;
// Preserve known functions
if (options.KnownFunctions?.Contains(funcName) != true &&
!IsStandardLibraryFunction(funcName))
{
funcName = $"func_{funcName.GetHashCode():X8}";
}
var normalizedArgs = new List<AstNode>(node.Arguments.Length);
foreach (var arg in node.Arguments)
{
normalizedArgs.Add(NormalizeNode(arg, options, varMap, ref varIndex));
}
return new CallNode(funcName, [.. normalizedArgs], node.Location);
}
private static AstNode NormalizeConstantNode(ConstantNode node)
{
// Normalize numeric constants to canonical form
if (node.Value is long or int or short or byte)
{
return node with { Value = "CONST_INT" };
}
if (node.Value is double or float or decimal)
{
return node with { Value = "CONST_FLOAT" };
}
if (node.Value is string)
{
return node with { Value = "CONST_STR" };
}
return node;
}
private static AstNode NormalizeChildren(
AstNode node,
NormalizationOptions options,
Dictionary<string, string> varMap,
ref int varIndex)
{
if (node.Children.Length == 0)
{
return node;
}
var normalizedChildren = new List<AstNode>(node.Children.Length);
foreach (var child in node.Children)
{
normalizedChildren.Add(NormalizeNode(child, options, varMap, ref varIndex));
}
var normalizedArray = normalizedChildren.ToImmutableArray();
// Use reflection-free approach for common node types
return node switch
{
BlockNode block => block with { Statements = normalizedArray },
IfNode ifNode => CreateNormalizedIf(ifNode, normalizedArray),
WhileNode whileNode => CreateNormalizedWhile(whileNode, normalizedArray),
ForNode forNode => CreateNormalizedFor(forNode, normalizedArray),
ReturnNode returnNode when normalizedArray.Length > 0 =>
returnNode with { Value = normalizedArray[0] },
AssignmentNode assignment => CreateNormalizedAssignment(assignment, normalizedArray),
BinaryOpNode binOp => CreateNormalizedBinaryOp(binOp, normalizedArray),
UnaryOpNode unaryOp when normalizedArray.Length > 0 =>
unaryOp with { Operand = normalizedArray[0] },
_ => node // Return as-is for other node types
};
}
private static IfNode CreateNormalizedIf(IfNode node, ImmutableArray<AstNode> children)
{
return new IfNode(
children.Length > 0 ? children[0] : node.Condition,
children.Length > 1 ? children[1] : node.ThenBranch,
children.Length > 2 ? children[2] : node.ElseBranch,
node.Location);
}
private static WhileNode CreateNormalizedWhile(WhileNode node, ImmutableArray<AstNode> children)
{
return new WhileNode(
children.Length > 0 ? children[0] : node.Condition,
children.Length > 1 ? children[1] : node.Body,
node.Location);
}
private static ForNode CreateNormalizedFor(ForNode node, ImmutableArray<AstNode> children)
{
return new ForNode(
children.Length > 0 ? children[0] : node.Init,
children.Length > 1 ? children[1] : node.Condition,
children.Length > 2 ? children[2] : node.Update,
children.Length > 3 ? children[3] : node.Body,
node.Location);
}
private static AssignmentNode CreateNormalizedAssignment(
AssignmentNode node,
ImmutableArray<AstNode> children)
{
return new AssignmentNode(
children.Length > 0 ? children[0] : node.Target,
children.Length > 1 ? children[1] : node.Value,
node.Operator,
node.Location);
}
private static BinaryOpNode CreateNormalizedBinaryOp(
BinaryOpNode node,
ImmutableArray<AstNode> children)
{
return new BinaryOpNode(
children.Length > 0 ? children[0] : node.Left,
children.Length > 1 ? children[1] : node.Right,
node.Operator,
node.Location);
}
private static string RemoveComments(string code)
{
// Remove single-line comments
code = SingleLineCommentRegex().Replace(code, "");
// Remove multi-line comments
code = MultiLineCommentRegex().Replace(code, "");
return code;
}
private static string NormalizeVariableNames(string code, ImmutableHashSet<string>? knownFunctions)
{
var varIndex = 0;
var varMap = new Dictionary<string, string>();
return IdentifierRegex().Replace(code, match =>
{
var name = match.Value;
// Skip keywords and types
if (IsKeywordOrType(name))
{
return name;
}
// Skip known functions
if (knownFunctions?.Contains(name) == true)
{
return name;
}
// Skip standard library functions
if (IsStandardLibraryFunction(name))
{
return name;
}
if (!varMap.TryGetValue(name, out var canonical))
{
canonical = $"var_{varIndex++}";
varMap[name] = canonical;
}
return canonical;
});
}
private static string NormalizeFunctionCalls(string code, ImmutableHashSet<string>? knownFunctions)
{
// Match function calls: identifier followed by (
return FunctionCallRegex().Replace(code, match =>
{
var funcName = match.Groups[1].Value;
// Skip known functions
if (knownFunctions?.Contains(funcName) == true)
{
return match.Value;
}
// Skip standard library functions
if (IsStandardLibraryFunction(funcName))
{
return match.Value;
}
return $"func_{funcName.GetHashCode():X8}(";
});
}
private static string NormalizeConstants(string code)
{
// Normalize hex constants
code = HexConstantRegex().Replace(code, "CONST_HEX");
// Normalize decimal constants (but preserve small common ones like 0, 1, 2)
code = LargeDecimalRegex().Replace(code, "CONST_INT");
// Normalize string literals
code = StringLiteralRegex().Replace(code, "CONST_STR");
return code;
}
private static string NormalizeWhitespace(string code)
{
// Collapse multiple whitespace to single space
code = MultipleWhitespaceRegex().Replace(code, " ");
// Remove whitespace around operators
code = WhitespaceAroundOperatorsRegex().Replace(code, "$1");
// Normalize line endings
code = code.Replace("\r\n", "\n").Replace("\r", "\n");
// Remove trailing whitespace on lines
code = TrailingWhitespaceRegex().Replace(code, "\n");
return code.Trim();
}
private static string SortIndependentStatements(string code)
{
// Parse into blocks and sort independent statements within each block
// This is a simplified implementation that sorts top-level statements
// A full implementation would need to analyze data dependencies
var lines = code.Split('\n', StringSplitOptions.RemoveEmptyEntries);
var result = new StringBuilder();
var blockDepth = 0;
var currentBlock = new List<string>();
foreach (var line in lines)
{
var trimmed = line.Trim();
// Track block depth
blockDepth += trimmed.Count(c => c == '{');
blockDepth -= trimmed.Count(c => c == '}');
if (blockDepth == 1 && !trimmed.Contains('{') && !trimmed.Contains('}'))
{
// Simple statement at block level 1
currentBlock.Add(trimmed);
}
else
{
// Flush sorted block
if (currentBlock.Count > 0)
{
var sorted = SortStatements(currentBlock);
foreach (var stmt in sorted)
{
result.AppendLine(stmt);
}
currentBlock.Clear();
}
result.AppendLine(line);
}
}
// Flush remaining
if (currentBlock.Count > 0)
{
var sorted = SortStatements(currentBlock);
foreach (var stmt in sorted)
{
result.AppendLine(stmt);
}
}
return result.ToString().Trim();
}
private static List<string> SortStatements(List<string> statements)
{
// Group statements that can be reordered
// For now, just sort by canonical form (conservative)
return statements
.OrderBy(s => GetStatementSortKey(s), StringComparer.Ordinal)
.ToList();
}
private static string GetStatementSortKey(string statement)
{
// Extract the "essence" of the statement for sorting
// e.g., assignment target, function call name
var trimmed = statement.Trim();
// Assignment: sort by target
var assignMatch = AssignmentTargetRegex().Match(trimmed);
if (assignMatch.Success)
{
return $"A_{assignMatch.Groups[1].Value}";
}
// Function call: sort by function name
var callMatch = FunctionNameRegex().Match(trimmed);
if (callMatch.Success)
{
return $"C_{callMatch.Groups[1].Value}";
}
return $"Z_{trimmed}";
}
private static bool IsKeywordOrType(string name)
{
return CKeywords.Contains(name);
}
private static bool IsStandardLibraryFunction(string name)
{
// Common C standard library functions to preserve
return name switch
{
// Memory
"malloc" or "calloc" or "realloc" or "free" or "memcpy" or "memmove" or "memset" or "memcmp" => true,
// String
"strlen" or "strcpy" or "strncpy" or "strcat" or "strncat" or "strcmp" or "strncmp" or "strchr" or "strrchr" or "strstr" => true,
// I/O
"printf" or "fprintf" or "sprintf" or "snprintf" or "scanf" or "fscanf" or "sscanf" => true,
"fopen" or "fclose" or "fread" or "fwrite" or "fseek" or "ftell" or "fflush" => true,
"puts" or "fputs" or "gets" or "fgets" or "putchar" or "getchar" => true,
// Math
"abs" or "labs" or "llabs" or "fabs" or "sqrt" or "pow" or "sin" or "cos" or "tan" or "log" or "exp" => true,
// Other
"exit" or "abort" or "atexit" or "atoi" or "atol" or "atof" or "strtol" or "strtoul" or "strtod" => true,
"assert" or "errno" => true,
_ => false
};
}
// Regex patterns using source generators
[GeneratedRegex(@"//[^\n]*")]
private static partial Regex SingleLineCommentRegex();
[GeneratedRegex(@"/\*[\s\S]*?\*/")]
private static partial Regex MultiLineCommentRegex();
[GeneratedRegex(@"\b([a-zA-Z_][a-zA-Z0-9_]*)\b")]
private static partial Regex IdentifierRegex();
[GeneratedRegex(@"\b([a-zA-Z_][a-zA-Z0-9_]*)\s*\(")]
private static partial Regex FunctionCallRegex();
[GeneratedRegex(@"0[xX][0-9a-fA-F]+")]
private static partial Regex HexConstantRegex();
[GeneratedRegex(@"\b[0-9]{4,}\b")]
private static partial Regex LargeDecimalRegex();
[GeneratedRegex(@"""(?:[^""\\]|\\.)*""")]
private static partial Regex StringLiteralRegex();
[GeneratedRegex(@"[ \t]+")]
private static partial Regex MultipleWhitespaceRegex();
[GeneratedRegex(@"\s*([+\-*/%=<>!&|^~?:;,{}()\[\]])\s*")]
private static partial Regex WhitespaceAroundOperatorsRegex();
[GeneratedRegex(@"[ \t]+\n")]
private static partial Regex TrailingWhitespaceRegex();
[GeneratedRegex(@"^([a-zA-Z_][a-zA-Z0-9_]*)\s*=")]
private static partial Regex AssignmentTargetRegex();
[GeneratedRegex(@"^([a-zA-Z_][a-zA-Z0-9_]*)\s*\(")]
private static partial Regex FunctionNameRegex();
}

View File

@@ -0,0 +1,14 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
/// <summary>
/// Confidence level for comparison results.
/// </summary>
public enum ComparisonConfidence
{
Low,
Medium,
High,
VeryHigh
}

View File

@@ -0,0 +1,14 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
/// <summary>
/// Options for AST comparison.
/// </summary>
public sealed record ComparisonOptions
{
public bool IgnoreVariableNames { get; init; } = true;
public bool IgnoreConstants { get; init; } = false;
public bool DetectOptimizations { get; init; } = true;
public decimal MinSimilarityThreshold { get; init; } = 0.5m;
}

View File

@@ -0,0 +1,15 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
/// <summary>
/// Options for decompilation.
/// </summary>
public sealed record DecompileOptions
{
public bool SimplifyCode { get; init; } = true;
public bool RecoverTypes { get; init; } = true;
public bool RecoverStructs { get; init; } = true;
public int MaxCodeLength { get; init; } = 100_000;
public TimeSpan Timeout { get; init; } = TimeSpan.FromSeconds(30);
}

View File

@@ -0,0 +1,14 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Decompiler;
/// <summary>
/// AST representation of decompiled code.
/// </summary>
public sealed record DecompiledAst(
AstNode Root,
int NodeCount,
int Depth,
ImmutableArray<AstPattern> Patterns);

View File

@@ -0,0 +1,50 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Decompiler;
public sealed partial class DecompiledCodeParser
{
private static int CountNodes(AstNode node)
{
var count = 1;
foreach (var child in node.Children)
{
count += CountNodes(child);
}
return count;
}
private static int ComputeDepth(AstNode node)
{
if (node.Children.Length == 0)
{
return 1;
}
var maxDepth = 0;
foreach (var child in node.Children)
{
var depth = ComputeDepth(child);
if (depth > maxDepth)
{
maxDepth = depth;
}
}
return 1 + maxDepth;
}
private static ImmutableArray<AstPattern> ExtractPatterns(AstNode node)
{
var patterns = new List<AstPattern>();
foreach (var child in node.Children)
{
patterns.AddRange(ExtractPatterns(child));
}
return [.. patterns];
}
}

View File

@@ -0,0 +1,14 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Text.RegularExpressions;
namespace StellaOps.BinaryIndex.Decompiler;
public sealed partial class DecompiledCodeParser
{
[GeneratedRegex(@"(?<type>\w+)\s+(?<name>\w+)\s*(?:=|;)", RegexOptions.Compiled)]
private static partial Regex VariableDeclarationRegex();
[GeneratedRegex(@"(?<name>\w+)\s*\(", RegexOptions.Compiled)]
private static partial Regex FunctionCallRegex();
}

View File

@@ -0,0 +1,77 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
public sealed partial class DecompiledCodeParser
{
private static bool TryConsumeIdentifier(
string code,
ref int index,
ref int column,
int line,
int startColumn,
List<Token> tokens)
{
var c = code[index];
if (!char.IsLetter(c) && c != '_')
{
return false;
}
var start = index;
while (index < code.Length && (char.IsLetterOrDigit(code[index]) || code[index] == '_'))
{
index++;
column++;
}
var value = code[start..index];
var type = _keywords.Contains(value) ? TokenType.Keyword : TokenType.Identifier;
tokens.Add(new Token(type, value, line, startColumn));
return true;
}
private static bool TryConsumeNumber(
string code,
ref int index,
ref int column,
int line,
int startColumn,
List<Token> tokens)
{
var c = code[index];
if (!char.IsDigit(c) && !(c == '0' && index + 1 < code.Length && code[index + 1] == 'x'))
{
return false;
}
var start = index;
if (c == '0' && index + 1 < code.Length && code[index + 1] == 'x')
{
index += 2;
column += 2;
while (index < code.Length && char.IsAsciiHexDigit(code[index]))
{
index++;
column++;
}
}
else
{
while (index < code.Length && (char.IsDigit(code[index]) || code[index] == '.'))
{
index++;
column++;
}
}
while (index < code.Length && (code[index] == 'U' || code[index] == 'L' || code[index] == 'u' || code[index] == 'l'))
{
index++;
column++;
}
tokens.Add(new Token(TokenType.Number, code[start..index], line, startColumn));
return true;
}
}

View File

@@ -0,0 +1,78 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
public sealed partial class DecompiledCodeParser
{
private static bool TryConsumeStringLiteral(
string code,
ref int index,
ref int column,
int line,
int startColumn,
List<Token> tokens)
{
if (code[index] != '"')
{
return false;
}
var start = index;
index++;
column++;
while (index < code.Length && code[index] != '"')
{
if (code[index] == '\\' && index + 1 < code.Length)
{
index += 2;
column += 2;
}
else
{
index++;
column++;
}
}
index++;
column++;
tokens.Add(new Token(TokenType.String, code[start..index], line, startColumn));
return true;
}
private static bool TryConsumeCharLiteral(
string code,
ref int index,
ref int column,
int line,
int startColumn,
List<Token> tokens)
{
if (code[index] != '\'')
{
return false;
}
var start = index;
index++;
column++;
while (index < code.Length && code[index] != '\'')
{
if (code[index] == '\\' && index + 1 < code.Length)
{
index += 2;
column += 2;
}
else
{
index++;
column++;
}
}
index++;
column++;
tokens.Add(new Token(TokenType.Char, code[start..index], line, startColumn));
return true;
}
}

View File

@@ -0,0 +1,41 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
public sealed partial class DecompiledCodeParser
{
private static void ConsumeOperator(
string code,
ref int index,
ref int column,
int line,
int startColumn,
List<Token> tokens)
{
var twoChar = index + 1 < code.Length ? code[index..(index + 2)] : string.Empty;
var op = twoChar is "==" or "!=" or "<=" or ">=" or "&&" or "||" or "++" or "--"
or "->" or "+=" or "-=" or "*=" or "/=" or "&=" or "|=" or "^=" or "<<"
or ">>"
? twoChar
: code[index].ToString();
if (op.Length == 2)
{
index += 2;
column += 2;
}
else
{
index++;
column++;
}
var tokenType = op is "(" or ")" or "{" or "}" or "[" or "]"
? TokenType.Bracket
: op is ";" or "," or "."
? TokenType.Punctuation
: TokenType.Operator;
tokens.Add(new Token(tokenType, op, line, startColumn));
}
}

View File

@@ -0,0 +1,64 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
public sealed partial class DecompiledCodeParser
{
private static bool TryConsumeWhitespace(string code, ref int index, ref int line, ref int column)
{
if (!char.IsWhiteSpace(code[index]))
{
return false;
}
if (code[index] == '\n')
{
line++;
column = 1;
}
else
{
column++;
}
index++;
return true;
}
private static bool TryConsumeLineComment(string code, ref int index)
{
if (index + 1 >= code.Length || code[index] != '/' || code[index + 1] != '/')
{
return false;
}
while (index < code.Length && code[index] != '\n')
{
index++;
}
return true;
}
private static bool TryConsumeBlockComment(string code, ref int index, ref int line, ref int column)
{
if (index + 1 >= code.Length || code[index] != '/' || code[index + 1] != '*')
{
return false;
}
index += 2;
while (index + 1 < code.Length && !(code[index] == '*' && code[index + 1] == '/'))
{
if (code[index] == '\n')
{
line++;
column = 1;
}
index++;
}
index += 2;
return true;
}
}

View File

@@ -0,0 +1,58 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
public sealed partial class DecompiledCodeParser
{
private static List<Token> Tokenize(string code)
{
var tokens = new List<Token>();
var index = 0;
var line = 1;
var column = 1;
while (index < code.Length)
{
if (TryConsumeWhitespace(code, ref index, ref line, ref column))
{
continue;
}
if (TryConsumeLineComment(code, ref index))
{
continue;
}
if (TryConsumeBlockComment(code, ref index, ref line, ref column))
{
continue;
}
var startColumn = column;
if (TryConsumeIdentifier(code, ref index, ref column, line, startColumn, tokens))
{
continue;
}
if (TryConsumeNumber(code, ref index, ref column, line, startColumn, tokens))
{
continue;
}
if (TryConsumeStringLiteral(code, ref index, ref column, line, startColumn, tokens))
{
continue;
}
if (TryConsumeCharLiteral(code, ref index, ref column, line, startColumn, tokens))
{
continue;
}
ConsumeOperator(code, ref index, ref column, line, startColumn, tokens);
}
return tokens;
}
}

View File

@@ -1,7 +1,7 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
using System.Globalization;
using System.Text.RegularExpressions;
namespace StellaOps.BinaryIndex.Decompiler;
@@ -11,7 +11,7 @@ namespace StellaOps.BinaryIndex.Decompiler;
/// </summary>
public sealed partial class DecompiledCodeParser : IDecompiledCodeParser
{
private static readonly HashSet<string> s_keywords =
private static readonly HashSet<string> _keywords =
[
"if", "else", "while", "for", "do", "switch", "case", "default",
"return", "break", "continue", "goto", "sizeof", "typedef",
@@ -19,7 +19,7 @@ public sealed partial class DecompiledCodeParser : IDecompiledCodeParser
"float", "double", "unsigned", "signed", "const", "static", "extern"
];
private static readonly HashSet<string> s_types =
private static readonly HashSet<string> _types =
[
"void", "int", "uint", "char", "uchar", "byte", "ubyte",
"short", "ushort", "long", "ulong", "longlong", "ulonglong",
@@ -47,10 +47,7 @@ public sealed partial class DecompiledCodeParser : IDecompiledCodeParser
public ImmutableArray<LocalVariable> ExtractVariables(string code)
{
var variables = new List<LocalVariable>();
var varIndex = 0;
// Match variable declarations: type name [= value];
// Ghidra style: int local_10; or undefined8 param_1;
var declPattern = VariableDeclarationRegex();
foreach (Match match in declPattern.Matches(code))
@@ -58,6 +55,11 @@ public sealed partial class DecompiledCodeParser : IDecompiledCodeParser
var type = match.Groups["type"].Value;
var name = match.Groups["name"].Value;
if (_keywords.Contains(type) && !_types.Contains(type))
{
continue;
}
var isParam = name.StartsWith("param_", StringComparison.Ordinal);
int? paramIndex = null;
int stackOffset = 0;
@@ -68,13 +70,12 @@ public sealed partial class DecompiledCodeParser : IDecompiledCodeParser
}
if (name.StartsWith("local_", StringComparison.Ordinal) &&
int.TryParse(name.AsSpan(6), System.Globalization.NumberStyles.HexNumber, null, out var offset))
int.TryParse(name.AsSpan(6), NumberStyles.HexNumber, null, out var offset))
{
stackOffset = -offset; // Negative for locals
stackOffset = -offset;
}
variables.Add(new LocalVariable(name, type, stackOffset, isParam, paramIndex));
varIndex++;
}
return [.. variables];
@@ -84,16 +85,13 @@ public sealed partial class DecompiledCodeParser : IDecompiledCodeParser
public ImmutableArray<string> ExtractCalledFunctions(string code)
{
var functions = new HashSet<string>();
// Match function calls: name(...)
var callPattern = FunctionCallRegex();
foreach (Match match in callPattern.Matches(code))
{
var name = match.Groups["name"].Value;
// Skip keywords and types
if (!s_keywords.Contains(name) && !s_types.Contains(name))
if (!_keywords.Contains(name) && !_types.Contains(name))
{
functions.Add(name);
}
@@ -102,849 +100,4 @@ public sealed partial class DecompiledCodeParser : IDecompiledCodeParser
return [.. functions.Order()];
}
private static List<Token> Tokenize(string code)
{
var tokens = new List<Token>();
var i = 0;
var line = 1;
var column = 1;
while (i < code.Length)
{
var c = code[i];
// Skip whitespace
if (char.IsWhiteSpace(c))
{
if (c == '\n')
{
line++;
column = 1;
}
else
{
column++;
}
i++;
continue;
}
// Skip comments
if (i + 1 < code.Length && code[i] == '/' && code[i + 1] == '/')
{
while (i < code.Length && code[i] != '\n')
{
i++;
}
continue;
}
if (i + 1 < code.Length && code[i] == '/' && code[i + 1] == '*')
{
i += 2;
while (i + 1 < code.Length && !(code[i] == '*' && code[i + 1] == '/'))
{
if (code[i] == '\n')
{
line++;
column = 1;
}
i++;
}
i += 2;
continue;
}
var startColumn = column;
// Identifiers and keywords
if (char.IsLetter(c) || c == '_')
{
var start = i;
while (i < code.Length && (char.IsLetterOrDigit(code[i]) || code[i] == '_'))
{
i++;
column++;
}
var value = code[start..i];
var type = s_keywords.Contains(value) ? TokenType.Keyword : TokenType.Identifier;
tokens.Add(new Token(type, value, line, startColumn));
continue;
}
// Numbers
if (char.IsDigit(c) || (c == '0' && i + 1 < code.Length && code[i + 1] == 'x'))
{
var start = i;
if (c == '0' && i + 1 < code.Length && code[i + 1] == 'x')
{
i += 2;
column += 2;
while (i < code.Length && char.IsAsciiHexDigit(code[i]))
{
i++;
column++;
}
}
else
{
while (i < code.Length && (char.IsDigit(code[i]) || code[i] == '.'))
{
i++;
column++;
}
}
// Handle suffixes (U, L, UL, etc.)
while (i < code.Length && (code[i] == 'U' || code[i] == 'L' || code[i] == 'u' || code[i] == 'l'))
{
i++;
column++;
}
tokens.Add(new Token(TokenType.Number, code[start..i], line, startColumn));
continue;
}
// String literals
if (c == '"')
{
var start = i;
i++;
column++;
while (i < code.Length && code[i] != '"')
{
if (code[i] == '\\' && i + 1 < code.Length)
{
i += 2;
column += 2;
}
else
{
i++;
column++;
}
}
i++; // closing quote
column++;
tokens.Add(new Token(TokenType.String, code[start..i], line, startColumn));
continue;
}
// Character literals
if (c == '\'')
{
var start = i;
i++;
column++;
while (i < code.Length && code[i] != '\'')
{
if (code[i] == '\\' && i + 1 < code.Length)
{
i += 2;
column += 2;
}
else
{
i++;
column++;
}
}
i++; // closing quote
column++;
tokens.Add(new Token(TokenType.Char, code[start..i], line, startColumn));
continue;
}
// Multi-character operators
if (i + 1 < code.Length)
{
var twoChar = code.Substring(i, 2);
if (twoChar is "==" or "!=" or "<=" or ">=" or "&&" or "||" or
"++" or "--" or "+=" or "-=" or "*=" or "/=" or
"<<" or ">>" or "->" or "::")
{
tokens.Add(new Token(TokenType.Operator, twoChar, line, startColumn));
i += 2;
column += 2;
continue;
}
}
// Single character operators and punctuation
var tokenType = c switch
{
'(' or ')' or '{' or '}' or '[' or ']' => TokenType.Bracket,
';' or ',' or ':' or '?' => TokenType.Punctuation,
_ => TokenType.Operator
};
tokens.Add(new Token(tokenType, c.ToString(), line, startColumn));
i++;
column++;
}
return tokens;
}
private static int CountNodes(AstNode node)
{
var count = 1;
foreach (var child in node.Children)
{
count += CountNodes(child);
}
return count;
}
private static int ComputeDepth(AstNode node)
{
if (node.Children.Length == 0)
{
return 1;
}
return 1 + node.Children.Max(c => ComputeDepth(c));
}
private static ImmutableArray<AstPattern> ExtractPatterns(AstNode root)
{
var patterns = new List<AstPattern>();
foreach (var node in TraverseNodes(root))
{
// Detect loop patterns
if (node.Type == AstNodeType.For)
{
patterns.Add(new AstPattern(
PatternType.CountedLoop,
node,
new PatternMetadata("For loop", 0.9m, null)));
}
else if (node.Type == AstNodeType.While)
{
patterns.Add(new AstPattern(
PatternType.ConditionalLoop,
node,
new PatternMetadata("While loop", 0.9m, null)));
}
else if (node.Type == AstNodeType.DoWhile)
{
patterns.Add(new AstPattern(
PatternType.ConditionalLoop,
node,
new PatternMetadata("Do-while loop", 0.9m, null)));
}
// Detect error handling
if (node is IfNode ifNode && IsErrorCheck(ifNode))
{
patterns.Add(new AstPattern(
PatternType.ErrorCheck,
node,
new PatternMetadata("Error check", 0.8m, null)));
}
// Detect null checks
if (node is IfNode ifNull && IsNullCheck(ifNull))
{
patterns.Add(new AstPattern(
PatternType.NullCheck,
node,
new PatternMetadata("Null check", 0.9m, null)));
}
}
return [.. patterns];
}
private static IEnumerable<AstNode> TraverseNodes(AstNode root)
{
yield return root;
foreach (var child in root.Children)
{
foreach (var node in TraverseNodes(child))
{
yield return node;
}
}
}
private static bool IsErrorCheck(IfNode node)
{
// Check if condition compares against -1, 0, or NULL
if (node.Condition is BinaryOpNode binaryOp)
{
if (binaryOp.Right is ConstantNode constant)
{
var value = constant.Value?.ToString();
return value is "0" or "-1" or "0xffffffff" or "NULL";
}
}
return false;
}
private static bool IsNullCheck(IfNode node)
{
if (node.Condition is BinaryOpNode binaryOp)
{
if (binaryOp.Operator is "==" or "!=")
{
if (binaryOp.Right is ConstantNode constant)
{
var value = constant.Value?.ToString();
return value is "0" or "NULL" or "nullptr";
}
}
}
return false;
}
[GeneratedRegex(@"(?<type>\w+)\s+(?<name>\w+)\s*(?:=|;)", RegexOptions.Compiled)]
private static partial Regex VariableDeclarationRegex();
[GeneratedRegex(@"(?<name>\w+)\s*\(", RegexOptions.Compiled)]
private static partial Regex FunctionCallRegex();
}
internal enum TokenType
{
Identifier,
Keyword,
Number,
String,
Char,
Operator,
Bracket,
Punctuation
}
internal readonly record struct Token(TokenType Type, string Value, int Line, int Column);
internal sealed class RecursiveParser
{
private readonly List<Token> _tokens;
private int _pos;
public RecursiveParser(List<Token> tokens)
{
_tokens = tokens;
_pos = 0;
}
public AstNode ParseFunction()
{
// Parse return type
var returnType = ParseType();
// Parse function name
var name = Expect(TokenType.Identifier).Value;
// Parse parameters
Expect(TokenType.Bracket, "(");
var parameters = ParseParameterList();
Expect(TokenType.Bracket, ")");
// Parse body
var body = ParseBlock();
return new FunctionNode(name, returnType, parameters, body);
}
private string ParseType()
{
var type = new System.Text.StringBuilder();
// Handle modifiers
while (Peek().Value is "const" or "unsigned" or "signed" or "static" or "extern")
{
type.Append(Advance().Value);
type.Append(' ');
}
// Main type
type.Append(Advance().Value);
// Handle pointers
while (Peek().Value == "*")
{
type.Append(Advance().Value);
}
return type.ToString().Trim();
}
private ImmutableArray<ParameterNode> ParseParameterList()
{
var parameters = new List<ParameterNode>();
var index = 0;
if (Peek().Value == ")")
{
return [];
}
if (Peek().Value == "void" && PeekAhead(1).Value == ")")
{
Advance(); // consume void
return [];
}
do
{
if (Peek().Value == ",")
{
Advance();
}
var type = ParseType();
var name = Peek().Type == TokenType.Identifier ? Advance().Value : $"param_{index}";
parameters.Add(new ParameterNode(name, type, index));
index++;
}
while (Peek().Value == ",");
return [.. parameters];
}
private BlockNode ParseBlock()
{
Expect(TokenType.Bracket, "{");
var statements = new List<AstNode>();
while (Peek().Value != "}")
{
var stmt = ParseStatement();
if (stmt is not null)
{
statements.Add(stmt);
}
}
Expect(TokenType.Bracket, "}");
return new BlockNode([.. statements]);
}
private AstNode? ParseStatement()
{
var token = Peek();
return token.Value switch
{
"if" => ParseIf(),
"while" => ParseWhile(),
"for" => ParseFor(),
"do" => ParseDoWhile(),
"return" => ParseReturn(),
"break" => ParseBreak(),
"continue" => ParseContinue(),
"{" => ParseBlock(),
";" => SkipSemicolon(),
_ => ParseExpressionStatement()
};
}
private IfNode ParseIf()
{
Advance(); // consume 'if'
Expect(TokenType.Bracket, "(");
var condition = ParseExpression();
Expect(TokenType.Bracket, ")");
var thenBranch = ParseStatement() ?? new BlockNode([]);
AstNode? elseBranch = null;
if (Peek().Value == "else")
{
Advance();
elseBranch = ParseStatement();
}
return new IfNode(condition, thenBranch, elseBranch);
}
private WhileNode ParseWhile()
{
Advance(); // consume 'while'
Expect(TokenType.Bracket, "(");
var condition = ParseExpression();
Expect(TokenType.Bracket, ")");
var body = ParseStatement() ?? new BlockNode([]);
return new WhileNode(condition, body);
}
private ForNode ParseFor()
{
Advance(); // consume 'for'
Expect(TokenType.Bracket, "(");
AstNode? init = null;
if (Peek().Value != ";")
{
init = ParseExpression();
}
Expect(TokenType.Punctuation, ";");
AstNode? condition = null;
if (Peek().Value != ";")
{
condition = ParseExpression();
}
Expect(TokenType.Punctuation, ";");
AstNode? update = null;
if (Peek().Value != ")")
{
update = ParseExpression();
}
Expect(TokenType.Bracket, ")");
var body = ParseStatement() ?? new BlockNode([]);
return new ForNode(init, condition, update, body);
}
private AstNode ParseDoWhile()
{
Advance(); // consume 'do'
var body = ParseStatement() ?? new BlockNode([]);
Expect(TokenType.Keyword, "while");
Expect(TokenType.Bracket, "(");
var condition = ParseExpression();
Expect(TokenType.Bracket, ")");
Expect(TokenType.Punctuation, ";");
return new WhileNode(condition, body); // Simplify do-while to while for now
}
private ReturnNode ParseReturn()
{
Advance(); // consume 'return'
AstNode? value = null;
if (Peek().Value != ";")
{
value = ParseExpression();
}
Expect(TokenType.Punctuation, ";");
return new ReturnNode(value);
}
private AstNode ParseBreak()
{
Advance();
Expect(TokenType.Punctuation, ";");
return new BlockNode([]); // Simplified
}
private AstNode ParseContinue()
{
Advance();
Expect(TokenType.Punctuation, ";");
return new BlockNode([]); // Simplified
}
private AstNode? SkipSemicolon()
{
Advance();
return null;
}
private AstNode? ParseExpressionStatement()
{
var expr = ParseExpression();
if (Peek().Value == ";")
{
Advance();
}
return expr;
}
private AstNode ParseExpression()
{
return ParseAssignment();
}
private AstNode ParseAssignment()
{
var left = ParseLogicalOr();
if (Peek().Value is "=" or "+=" or "-=" or "*=" or "/=" or "&=" or "|=" or "^=" or "<<=" or ">>=")
{
var op = Advance().Value;
var right = ParseAssignment();
return new AssignmentNode(left, right, op);
}
return left;
}
private AstNode ParseLogicalOr()
{
var left = ParseLogicalAnd();
while (Peek().Value == "||")
{
var op = Advance().Value;
var right = ParseLogicalAnd();
left = new BinaryOpNode(left, right, op);
}
return left;
}
private AstNode ParseLogicalAnd()
{
var left = ParseBitwiseOr();
while (Peek().Value == "&&")
{
var op = Advance().Value;
var right = ParseBitwiseOr();
left = new BinaryOpNode(left, right, op);
}
return left;
}
private AstNode ParseBitwiseOr()
{
var left = ParseComparison();
while (Peek().Value is "|" or "^" or "&")
{
var op = Advance().Value;
var right = ParseComparison();
left = new BinaryOpNode(left, right, op);
}
return left;
}
private AstNode ParseComparison()
{
var left = ParseShift();
while (Peek().Value is "==" or "!=" or "<" or ">" or "<=" or ">=")
{
var op = Advance().Value;
var right = ParseShift();
left = new BinaryOpNode(left, right, op);
}
return left;
}
private AstNode ParseShift()
{
var left = ParseAdditive();
while (Peek().Value is "<<" or ">>")
{
var op = Advance().Value;
var right = ParseAdditive();
left = new BinaryOpNode(left, right, op);
}
return left;
}
private AstNode ParseAdditive()
{
var left = ParseMultiplicative();
while (Peek().Value is "+" or "-")
{
var op = Advance().Value;
var right = ParseMultiplicative();
left = new BinaryOpNode(left, right, op);
}
return left;
}
private AstNode ParseMultiplicative()
{
var left = ParseUnary();
while (Peek().Value is "*" or "/" or "%")
{
var op = Advance().Value;
var right = ParseUnary();
left = new BinaryOpNode(left, right, op);
}
return left;
}
private AstNode ParseUnary()
{
if (Peek().Value is "!" or "~" or "-" or "+" or "*" or "&" or "++" or "--")
{
var op = Advance().Value;
var operand = ParseUnary();
return new UnaryOpNode(operand, op, true);
}
return ParsePostfix();
}
private AstNode ParsePostfix()
{
var expr = ParsePrimary();
while (true)
{
if (Peek().Value == "(")
{
// Function call
Advance();
var args = ParseArgumentList();
Expect(TokenType.Bracket, ")");
if (expr is VariableNode varNode)
{
expr = new CallNode(varNode.Name, args);
}
}
else if (Peek().Value == "[")
{
// Array access
Advance();
var index = ParseExpression();
Expect(TokenType.Bracket, "]");
expr = new ArrayAccessNode(expr, index);
}
else if (Peek().Value is "." or "->")
{
var isPointer = Advance().Value == "->";
var field = Expect(TokenType.Identifier).Value;
expr = new FieldAccessNode(expr, field, isPointer);
}
else if (Peek().Value is "++" or "--")
{
var op = Advance().Value;
expr = new UnaryOpNode(expr, op, false);
}
else
{
break;
}
}
return expr;
}
private ImmutableArray<AstNode> ParseArgumentList()
{
var args = new List<AstNode>();
if (Peek().Value == ")")
{
return [];
}
do
{
if (Peek().Value == ",")
{
Advance();
}
args.Add(ParseExpression());
}
while (Peek().Value == ",");
return [.. args];
}
private AstNode ParsePrimary()
{
var token = Peek();
if (token.Type == TokenType.Number)
{
Advance();
return new ConstantNode(token.Value, "int");
}
if (token.Type == TokenType.String)
{
Advance();
return new ConstantNode(token.Value, "char*");
}
if (token.Type == TokenType.Char)
{
Advance();
return new ConstantNode(token.Value, "char");
}
if (token.Type == TokenType.Identifier)
{
Advance();
return new VariableNode(token.Value, null);
}
if (token.Value == "(")
{
Advance();
// Check for cast
if (IsType(Peek().Value))
{
var targetType = ParseType();
Expect(TokenType.Bracket, ")");
var expr = ParseUnary();
return new CastNode(expr, targetType);
}
var inner = ParseExpression();
Expect(TokenType.Bracket, ")");
return inner;
}
// Handle sizeof
if (token.Value == "sizeof")
{
Advance();
Expect(TokenType.Bracket, "(");
var type = ParseType();
Expect(TokenType.Bracket, ")");
return new ConstantNode($"sizeof({type})", "size_t");
}
// Unknown token - return empty node
Advance();
return new ConstantNode(token.Value, "unknown");
}
private static bool IsType(string value)
{
return value is "int" or "char" or "void" or "long" or "short" or "float" or "double"
or "unsigned" or "signed" or "const" or "struct" or "union" or "enum"
or "undefined" or "undefined1" or "undefined2" or "undefined4" or "undefined8"
or "byte" or "word" or "dword" or "qword" or "pointer" or "code" or "uint" or "ulong";
}
private Token Peek() => _pos < _tokens.Count ? _tokens[_pos] : new Token(TokenType.Punctuation, "", 0, 0);
private Token PeekAhead(int offset) => _pos + offset < _tokens.Count
? _tokens[_pos + offset]
: new Token(TokenType.Punctuation, "", 0, 0);
private Token Advance() => _pos < _tokens.Count ? _tokens[_pos++] : new Token(TokenType.Punctuation, "", 0, 0);
private Token Expect(TokenType type, string? value = null)
{
var token = Peek();
if (token.Type != type || (value is not null && token.Value != value))
{
// Skip unexpected tokens
return Advance();
}
return Advance();
}
}

View File

@@ -0,0 +1,27 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Decompiler;
/// <summary>
/// Result of comparing two decompiled functions.
/// </summary>
public sealed record DecompiledComparisonResult(
decimal Similarity,
decimal StructuralSimilarity,
decimal SemanticSimilarity,
AstEditDistance EditDistance,
ImmutableArray<SemanticEquivalence> Equivalences,
ImmutableArray<CodeDifference> Differences,
ComparisonConfidence Confidence);
/// <summary>
/// Edit distance between ASTs.
/// </summary>
public sealed record AstEditDistance(
int Insertions,
int Deletions,
int Modifications,
int TotalOperations,
decimal NormalizedDistance);

View File

@@ -0,0 +1,18 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Decompiler;
/// <summary>
/// A function decompiled to C-like pseudo-code.
/// </summary>
public sealed record DecompiledFunction(
string FunctionName,
string Signature,
string Code,
DecompiledAst? Ast,
ImmutableArray<LocalVariable> Locals,
ImmutableArray<string> CalledFunctions,
ulong Address,
int SizeBytes);

View File

@@ -0,0 +1,13 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
/// <summary>
/// Options for the decompiler adapter.
/// </summary>
public sealed class DecompilerOptions
{
public string GhidraScriptsPath { get; set; } = "/scripts";
public TimeSpan DefaultTimeout { get; set; } = TimeSpan.FromSeconds(30);
public int MaxCodeLength { get; set; } = 100_000;
}

View File

@@ -0,0 +1,85 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.Decompiler;
public sealed partial class GhidraDecompilerAdapter
{
/// <inheritdoc />
public Task<DecompiledAst> ParseToAstAsync(
string decompiledCode,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrEmpty(decompiledCode);
ct.ThrowIfCancellationRequested();
var ast = _parser.Parse(decompiledCode);
return Task.FromResult(ast);
}
/// <inheritdoc />
public Task<DecompiledComparisonResult> CompareAsync(
DecompiledFunction a,
DecompiledFunction b,
ComparisonOptions? options = null,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(a);
ArgumentNullException.ThrowIfNull(b);
options ??= new ComparisonOptions();
ct.ThrowIfCancellationRequested();
_logger.LogDebug(
"Comparing functions {A} and {B}",
a.FunctionName,
b.FunctionName);
if (a.Ast is null || b.Ast is null)
{
_logger.LogWarning("Cannot compare functions without ASTs");
return Task.FromResult(new DecompiledComparisonResult(
Similarity: 0,
StructuralSimilarity: 0,
SemanticSimilarity: 0,
EditDistance: new AstEditDistance(0, 0, 0, 0, 1.0m),
Equivalences: [],
Differences: [],
Confidence: ComparisonConfidence.Low));
}
var structuralSimilarity = _comparisonEngine.ComputeStructuralSimilarity(a.Ast, b.Ast);
var editDistance = _comparisonEngine.ComputeEditDistance(a.Ast, b.Ast);
var equivalences = _comparisonEngine.FindEquivalences(a.Ast, b.Ast);
var differences = _comparisonEngine.FindDifferences(a.Ast, b.Ast);
var totalNodes = Math.Max(a.Ast.NodeCount, b.Ast.NodeCount);
var equivalentNodes = equivalences.Length;
var semanticSimilarity = totalNodes > 0
? (decimal)equivalentNodes / totalNodes
: 0m;
var overallSimilarity = ComputeOverallSimilarity(
structuralSimilarity,
semanticSimilarity,
editDistance.NormalizedDistance);
var confidence = DetermineConfidence(
overallSimilarity,
a.Ast.NodeCount,
b.Ast.NodeCount,
equivalences.Length);
return Task.FromResult(new DecompiledComparisonResult(
Similarity: overallSimilarity,
StructuralSimilarity: structuralSimilarity,
SemanticSimilarity: semanticSimilarity,
EditDistance: editDistance,
Equivalences: equivalences,
Differences: differences,
Confidence: confidence));
}
}

View File

@@ -0,0 +1,108 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Ghidra;
namespace StellaOps.BinaryIndex.Decompiler;
public sealed partial class GhidraDecompilerAdapter
{
/// <inheritdoc />
public async Task<DecompiledFunction> DecompileAsync(
GhidraFunction function,
DecompileOptions? options = null,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(function);
options ??= new DecompileOptions();
_logger.LogDebug(
"Decompiling function {Name} at 0x{Address:X}",
function.Name,
function.Address);
var code = function.DecompiledCode;
if (string.IsNullOrEmpty(code))
{
_logger.LogWarning(
"Function {Name} has no decompiled code, returning stub",
function.Name);
return new DecompiledFunction(
function.Name,
BuildSignature(function),
"/* Decompilation unavailable */",
null,
[],
[],
function.Address,
function.Size);
}
if (code.Length > options.MaxCodeLength)
{
code = code[..options.MaxCodeLength] + "\n/* ... truncated ... */";
}
DecompiledAst? ast = null;
try
{
ast = _parser.Parse(code);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to parse decompiled code for {Name}", function.Name);
}
var locals = _parser.ExtractVariables(code);
var calledFunctions = _parser.ExtractCalledFunctions(code);
return new DecompiledFunction(
function.Name,
BuildSignature(function),
code,
ast,
locals,
calledFunctions,
function.Address,
function.Size);
}
/// <inheritdoc />
public async Task<DecompiledFunction> DecompileAtAddressAsync(
string binaryPath,
ulong address,
DecompileOptions? options = null,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrEmpty(binaryPath);
options ??= new DecompileOptions();
_logger.LogDebug(
"Decompiling function at 0x{Address:X} in {Binary}",
address,
Path.GetFileName(binaryPath));
using var stream = File.OpenRead(binaryPath);
var analysis = await _ghidraService.AnalyzeAsync(
stream,
new GhidraAnalysisOptions
{
IncludeDecompilation = true,
ExtractDecompilation = true
},
ct).ConfigureAwait(false);
var function = analysis.Functions.FirstOrDefault(f => f.Address == address);
if (function is null)
{
throw new InvalidOperationException($"No function found at address 0x{address:X}");
}
return await DecompileAsync(function, options, ct).ConfigureAwait(false);
}
}

View File

@@ -0,0 +1,57 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using StellaOps.BinaryIndex.Ghidra;
namespace StellaOps.BinaryIndex.Decompiler;
public sealed partial class GhidraDecompilerAdapter
{
private static string BuildSignature(GhidraFunction function)
{
if (!string.IsNullOrEmpty(function.Signature))
{
return function.Signature;
}
return $"void {function.Name}(void)";
}
private static decimal ComputeOverallSimilarity(
decimal structural,
decimal semantic,
decimal normalizedEditDistance)
{
var editSimilarity = 1.0m - normalizedEditDistance;
return structural * 0.4m + semantic * 0.4m + editSimilarity * 0.2m;
}
private static ComparisonConfidence DetermineConfidence(
decimal similarity,
int nodeCountA,
int nodeCountB,
int equivalenceCount)
{
var minNodes = Math.Min(nodeCountA, nodeCountB);
if (minNodes < 5)
{
return ComparisonConfidence.Low;
}
if (similarity > 0.9m && equivalenceCount > minNodes * 0.7)
{
return ComparisonConfidence.VeryHigh;
}
if (similarity > 0.7m && equivalenceCount > minNodes * 0.5)
{
return ComparisonConfidence.High;
}
if (similarity > 0.5m)
{
return ComparisonConfidence.Medium;
}
return ComparisonConfidence.Low;
}
}

View File

@@ -1,18 +1,15 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.BinaryIndex.Ghidra;
using System.Text.Json;
namespace StellaOps.BinaryIndex.Decompiler;
/// <summary>
/// Adapter for Ghidra's decompiler via headless analysis.
/// </summary>
public sealed class GhidraDecompilerAdapter : IDecompilerService
public sealed partial class GhidraDecompilerAdapter : IDecompilerService
{
private readonly IGhidraService _ghidraService;
private readonly IDecompiledCodeParser _parser;
@@ -33,260 +30,4 @@ public sealed class GhidraDecompilerAdapter : IDecompilerService
_options = options.Value;
_logger = logger;
}
/// <inheritdoc />
public async Task<DecompiledFunction> DecompileAsync(
GhidraFunction function,
DecompileOptions? options = null,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(function);
options ??= new DecompileOptions();
_logger.LogDebug(
"Decompiling function {Name} at 0x{Address:X}",
function.Name,
function.Address);
// The GhidraFunction should already have decompiled code from analysis
var code = function.DecompiledCode;
if (string.IsNullOrEmpty(code))
{
_logger.LogWarning(
"Function {Name} has no decompiled code, returning stub",
function.Name);
return new DecompiledFunction(
function.Name,
BuildSignature(function),
"/* Decompilation unavailable */",
null,
[],
[],
function.Address,
function.Size);
}
// Truncate if too long
if (code.Length > options.MaxCodeLength)
{
code = code[..options.MaxCodeLength] + "\n/* ... truncated ... */";
}
// Parse to AST
DecompiledAst? ast = null;
try
{
ast = _parser.Parse(code);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to parse decompiled code for {Name}", function.Name);
}
// Extract metadata
var locals = _parser.ExtractVariables(code);
var calledFunctions = _parser.ExtractCalledFunctions(code);
return new DecompiledFunction(
function.Name,
BuildSignature(function),
code,
ast,
locals,
calledFunctions,
function.Address,
function.Size);
}
/// <inheritdoc />
public async Task<DecompiledFunction> DecompileAtAddressAsync(
string binaryPath,
ulong address,
DecompileOptions? options = null,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrEmpty(binaryPath);
options ??= new DecompileOptions();
_logger.LogDebug(
"Decompiling function at 0x{Address:X} in {Binary}",
address,
Path.GetFileName(binaryPath));
// Use Ghidra to analyze and get the function
using var stream = File.OpenRead(binaryPath);
var analysis = await _ghidraService.AnalyzeAsync(
stream,
new GhidraAnalysisOptions
{
IncludeDecompilation = true,
ExtractDecompilation = true
},
ct);
var function = analysis.Functions.FirstOrDefault(f => f.Address == address);
if (function is null)
{
throw new InvalidOperationException($"No function found at address 0x{address:X}");
}
return await DecompileAsync(function, options, ct);
}
/// <inheritdoc />
public Task<DecompiledAst> ParseToAstAsync(
string decompiledCode,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrEmpty(decompiledCode);
ct.ThrowIfCancellationRequested();
var ast = _parser.Parse(decompiledCode);
return Task.FromResult(ast);
}
/// <inheritdoc />
public Task<DecompiledComparisonResult> CompareAsync(
DecompiledFunction a,
DecompiledFunction b,
ComparisonOptions? options = null,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(a);
ArgumentNullException.ThrowIfNull(b);
options ??= new ComparisonOptions();
ct.ThrowIfCancellationRequested();
_logger.LogDebug(
"Comparing functions {A} and {B}",
a.FunctionName,
b.FunctionName);
// Need ASTs for comparison
if (a.Ast is null || b.Ast is null)
{
_logger.LogWarning("Cannot compare functions without ASTs");
return Task.FromResult(new DecompiledComparisonResult(
Similarity: 0,
StructuralSimilarity: 0,
SemanticSimilarity: 0,
EditDistance: new AstEditDistance(0, 0, 0, 0, 1.0m),
Equivalences: [],
Differences: [],
Confidence: ComparisonConfidence.Low));
}
// Compute structural similarity
var structuralSimilarity = _comparisonEngine.ComputeStructuralSimilarity(a.Ast, b.Ast);
// Compute edit distance
var editDistance = _comparisonEngine.ComputeEditDistance(a.Ast, b.Ast);
// Find semantic equivalences
var equivalences = _comparisonEngine.FindEquivalences(a.Ast, b.Ast);
// Find differences
var differences = _comparisonEngine.FindDifferences(a.Ast, b.Ast);
// Compute semantic similarity from equivalences
var totalNodes = Math.Max(a.Ast.NodeCount, b.Ast.NodeCount);
var equivalentNodes = equivalences.Length;
var semanticSimilarity = totalNodes > 0
? (decimal)equivalentNodes / totalNodes
: 0m;
// Combine into overall similarity
var overallSimilarity = ComputeOverallSimilarity(
structuralSimilarity,
semanticSimilarity,
editDistance.NormalizedDistance);
// Determine confidence
var confidence = DetermineConfidence(
overallSimilarity,
a.Ast.NodeCount,
b.Ast.NodeCount,
equivalences.Length);
return Task.FromResult(new DecompiledComparisonResult(
Similarity: overallSimilarity,
StructuralSimilarity: structuralSimilarity,
SemanticSimilarity: semanticSimilarity,
EditDistance: editDistance,
Equivalences: equivalences,
Differences: differences,
Confidence: confidence));
}
private static string BuildSignature(GhidraFunction function)
{
// Use the signature from Ghidra if available, otherwise construct a simple one
if (!string.IsNullOrEmpty(function.Signature))
{
return function.Signature;
}
// Default signature if none available
return $"void {function.Name}(void)";
}
private static decimal ComputeOverallSimilarity(
decimal structural,
decimal semantic,
decimal normalizedEditDistance)
{
// Weight: 40% structural, 40% semantic, 20% edit distance (inverted)
var editSimilarity = 1.0m - normalizedEditDistance;
return structural * 0.4m + semantic * 0.4m + editSimilarity * 0.2m;
}
private static ComparisonConfidence DetermineConfidence(
decimal similarity,
int nodeCountA,
int nodeCountB,
int equivalenceCount)
{
// Very small functions are harder to compare confidently
var minNodes = Math.Min(nodeCountA, nodeCountB);
if (minNodes < 5)
{
return ComparisonConfidence.Low;
}
// High similarity with many equivalences = high confidence
if (similarity > 0.9m && equivalenceCount > minNodes * 0.7)
{
return ComparisonConfidence.VeryHigh;
}
if (similarity > 0.7m && equivalenceCount > minNodes * 0.5)
{
return ComparisonConfidence.High;
}
if (similarity > 0.5m)
{
return ComparisonConfidence.Medium;
}
return ComparisonConfidence.Low;
}
}
/// <summary>
/// Options for the decompiler adapter.
/// </summary>
public sealed class DecompilerOptions
{
public string GhidraScriptsPath { get; set; } = "/scripts";
public TimeSpan DefaultTimeout { get; set; } = TimeSpan.FromSeconds(30);
public int MaxCodeLength { get; set; } = 100_000;
}

View File

@@ -0,0 +1,43 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Decompiler;
/// <summary>
/// Engine for comparing AST structures.
/// </summary>
public interface IAstComparisonEngine
{
/// <summary>
/// Compute structural similarity between ASTs.
/// </summary>
/// <param name="a">First AST.</param>
/// <param name="b">Second AST.</param>
/// <returns>Similarity score (0.0 to 1.0).</returns>
decimal ComputeStructuralSimilarity(DecompiledAst a, DecompiledAst b);
/// <summary>
/// Compute edit distance between ASTs.
/// </summary>
/// <param name="a">First AST.</param>
/// <param name="b">Second AST.</param>
/// <returns>Edit distance metrics.</returns>
AstEditDistance ComputeEditDistance(DecompiledAst a, DecompiledAst b);
/// <summary>
/// Find semantic equivalences between ASTs.
/// </summary>
/// <param name="a">First AST.</param>
/// <param name="b">Second AST.</param>
/// <returns>List of equivalent node pairs.</returns>
ImmutableArray<SemanticEquivalence> FindEquivalences(DecompiledAst a, DecompiledAst b);
/// <summary>
/// Find differences between ASTs.
/// </summary>
/// <param name="a">First AST.</param>
/// <param name="b">Second AST.</param>
/// <returns>List of differences.</returns>
ImmutableArray<CodeDifference> FindDifferences(DecompiledAst a, DecompiledAst b);
}

View File

@@ -0,0 +1,32 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
/// <summary>
/// Normalizes decompiled code for comparison.
/// </summary>
public interface ICodeNormalizer
{
/// <summary>
/// Normalize decompiled code for comparison.
/// </summary>
/// <param name="code">Raw decompiled code.</param>
/// <param name="options">Normalization options.</param>
/// <returns>Normalized code.</returns>
string Normalize(string code, NormalizationOptions? options = null);
/// <summary>
/// Compute canonical hash of normalized code.
/// </summary>
/// <param name="code">Decompiled code.</param>
/// <returns>32-byte hash.</returns>
byte[] ComputeCanonicalHash(string code);
/// <summary>
/// Normalize an AST for comparison.
/// </summary>
/// <param name="ast">AST to normalize.</param>
/// <param name="options">Normalization options.</param>
/// <returns>Normalized AST.</returns>
DecompiledAst NormalizeAst(DecompiledAst ast, NormalizationOptions? options = null);
}

View File

@@ -0,0 +1,32 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Decompiler;
/// <summary>
/// Parses decompiled C-like code into AST.
/// </summary>
public interface IDecompiledCodeParser
{
/// <summary>
/// Parse decompiled code into AST.
/// </summary>
/// <param name="code">C-like pseudo-code.</param>
/// <returns>Parsed AST.</returns>
DecompiledAst Parse(string code);
/// <summary>
/// Extract local variables from decompiled code.
/// </summary>
/// <param name="code">C-like pseudo-code.</param>
/// <returns>List of local variables.</returns>
ImmutableArray<LocalVariable> ExtractVariables(string code);
/// <summary>
/// Extract called functions from decompiled code.
/// </summary>
/// <param name="code">C-like pseudo-code.</param>
/// <returns>List of function names called.</returns>
ImmutableArray<string> ExtractCalledFunctions(string code);
}

View File

@@ -3,7 +3,6 @@
using StellaOps.BinaryIndex.Ghidra;
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Decompiler;
@@ -62,97 +61,3 @@ public interface IDecompilerService
ComparisonOptions? options = null,
CancellationToken ct = default);
}
/// <summary>
/// Engine for comparing AST structures.
/// </summary>
public interface IAstComparisonEngine
{
/// <summary>
/// Compute structural similarity between ASTs.
/// </summary>
/// <param name="a">First AST.</param>
/// <param name="b">Second AST.</param>
/// <returns>Similarity score (0.0 to 1.0).</returns>
decimal ComputeStructuralSimilarity(DecompiledAst a, DecompiledAst b);
/// <summary>
/// Compute edit distance between ASTs.
/// </summary>
/// <param name="a">First AST.</param>
/// <param name="b">Second AST.</param>
/// <returns>Edit distance metrics.</returns>
AstEditDistance ComputeEditDistance(DecompiledAst a, DecompiledAst b);
/// <summary>
/// Find semantic equivalences between ASTs.
/// </summary>
/// <param name="a">First AST.</param>
/// <param name="b">Second AST.</param>
/// <returns>List of equivalent node pairs.</returns>
ImmutableArray<SemanticEquivalence> FindEquivalences(DecompiledAst a, DecompiledAst b);
/// <summary>
/// Find differences between ASTs.
/// </summary>
/// <param name="a">First AST.</param>
/// <param name="b">Second AST.</param>
/// <returns>List of differences.</returns>
ImmutableArray<CodeDifference> FindDifferences(DecompiledAst a, DecompiledAst b);
}
/// <summary>
/// Normalizes decompiled code for comparison.
/// </summary>
public interface ICodeNormalizer
{
/// <summary>
/// Normalize decompiled code for comparison.
/// </summary>
/// <param name="code">Raw decompiled code.</param>
/// <param name="options">Normalization options.</param>
/// <returns>Normalized code.</returns>
string Normalize(string code, NormalizationOptions? options = null);
/// <summary>
/// Compute canonical hash of normalized code.
/// </summary>
/// <param name="code">Decompiled code.</param>
/// <returns>32-byte hash.</returns>
byte[] ComputeCanonicalHash(string code);
/// <summary>
/// Normalize an AST for comparison.
/// </summary>
/// <param name="ast">AST to normalize.</param>
/// <param name="options">Normalization options.</param>
/// <returns>Normalized AST.</returns>
DecompiledAst NormalizeAst(DecompiledAst ast, NormalizationOptions? options = null);
}
/// <summary>
/// Parses decompiled C-like code into AST.
/// </summary>
public interface IDecompiledCodeParser
{
/// <summary>
/// Parse decompiled code into AST.
/// </summary>
/// <param name="code">C-like pseudo-code.</param>
/// <returns>Parsed AST.</returns>
DecompiledAst Parse(string code);
/// <summary>
/// Extract local variables from decompiled code.
/// </summary>
/// <param name="code">C-like pseudo-code.</param>
/// <returns>List of local variables.</returns>
ImmutableArray<LocalVariable> ExtractVariables(string code);
/// <summary>
/// Extract called functions from decompiled code.
/// </summary>
/// <param name="code">C-like pseudo-code.</param>
/// <returns>List of function names called.</returns>
ImmutableArray<string> ExtractCalledFunctions(string code);
}

View File

@@ -0,0 +1,13 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
/// <summary>
/// A local variable in decompiled code.
/// </summary>
public sealed record LocalVariable(
string Name,
string Type,
int StackOffset,
bool IsParameter,
int? ParameterIndex);

View File

@@ -1,377 +0,0 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Decompiler;
/// <summary>
/// A function decompiled to C-like pseudo-code.
/// </summary>
public sealed record DecompiledFunction(
string FunctionName,
string Signature,
string Code,
DecompiledAst? Ast,
ImmutableArray<LocalVariable> Locals,
ImmutableArray<string> CalledFunctions,
ulong Address,
int SizeBytes);
/// <summary>
/// AST representation of decompiled code.
/// </summary>
public sealed record DecompiledAst(
AstNode Root,
int NodeCount,
int Depth,
ImmutableArray<AstPattern> Patterns);
/// <summary>
/// Abstract syntax tree node.
/// </summary>
public abstract record AstNode(
AstNodeType Type,
ImmutableArray<AstNode> Children,
SourceLocation? Location);
/// <summary>
/// Types of AST nodes.
/// </summary>
public enum AstNodeType
{
// Structure
Function,
Block,
Parameter,
// Control flow
If,
While,
For,
DoWhile,
Switch,
Case,
Default,
Return,
Break,
Continue,
Goto,
Label,
// Expressions
Assignment,
BinaryOp,
UnaryOp,
TernaryOp,
Call,
Cast,
Sizeof,
// Operands
Variable,
Constant,
StringLiteral,
ArrayAccess,
FieldAccess,
PointerDeref,
AddressOf,
// Declarations
VariableDecl,
TypeDef
}
/// <summary>
/// Source location in decompiled code.
/// </summary>
public sealed record SourceLocation(int Line, int Column, int Length);
/// <summary>
/// A local variable in decompiled code.
/// </summary>
public sealed record LocalVariable(
string Name,
string Type,
int StackOffset,
bool IsParameter,
int? ParameterIndex);
/// <summary>
/// A recognized code pattern.
/// </summary>
public sealed record AstPattern(
PatternType Type,
AstNode Node,
PatternMetadata? Metadata);
/// <summary>
/// Types of code patterns.
/// </summary>
public enum PatternType
{
// Loops
CountedLoop,
ConditionalLoop,
InfiniteLoop,
LoopUnrolled,
// Branches
IfElseChain,
SwitchTable,
ShortCircuit,
// Memory
MemoryAllocation,
MemoryDeallocation,
BufferOperation,
StackBuffer,
// Error handling
ErrorCheck,
NullCheck,
BoundsCheck,
// Idioms
StringOperation,
MathOperation,
BitwiseOperation,
TableLookup
}
/// <summary>
/// Metadata about a recognized pattern.
/// </summary>
public sealed record PatternMetadata(
string Description,
decimal Confidence,
ImmutableDictionary<string, string>? Properties);
/// <summary>
/// Result of comparing two decompiled functions.
/// </summary>
public sealed record DecompiledComparisonResult(
decimal Similarity,
decimal StructuralSimilarity,
decimal SemanticSimilarity,
AstEditDistance EditDistance,
ImmutableArray<SemanticEquivalence> Equivalences,
ImmutableArray<CodeDifference> Differences,
ComparisonConfidence Confidence);
/// <summary>
/// Edit distance between ASTs.
/// </summary>
public sealed record AstEditDistance(
int Insertions,
int Deletions,
int Modifications,
int TotalOperations,
decimal NormalizedDistance);
/// <summary>
/// A semantic equivalence between AST nodes.
/// </summary>
public sealed record SemanticEquivalence(
AstNode NodeA,
AstNode NodeB,
EquivalenceType Type,
decimal Confidence,
string? Explanation);
/// <summary>
/// Types of semantic equivalence.
/// </summary>
public enum EquivalenceType
{
Identical,
Renamed,
Reordered,
Optimized,
Inlined,
Semantically
}
/// <summary>
/// A difference between two pieces of code.
/// </summary>
public sealed record CodeDifference(
DifferenceType Type,
AstNode? NodeA,
AstNode? NodeB,
string Description);
/// <summary>
/// Types of code differences.
/// </summary>
public enum DifferenceType
{
Added,
Removed,
Modified,
Reordered,
TypeChanged,
OptimizationVariant
}
/// <summary>
/// Confidence level for comparison results.
/// </summary>
public enum ComparisonConfidence
{
Low,
Medium,
High,
VeryHigh
}
/// <summary>
/// Options for decompilation.
/// </summary>
public sealed record DecompileOptions
{
public bool SimplifyCode { get; init; } = true;
public bool RecoverTypes { get; init; } = true;
public bool RecoverStructs { get; init; } = true;
public int MaxCodeLength { get; init; } = 100_000;
public TimeSpan Timeout { get; init; } = TimeSpan.FromSeconds(30);
}
/// <summary>
/// Options for AST comparison.
/// </summary>
public sealed record ComparisonOptions
{
public bool IgnoreVariableNames { get; init; } = true;
public bool IgnoreConstants { get; init; } = false;
public bool DetectOptimizations { get; init; } = true;
public decimal MinSimilarityThreshold { get; init; } = 0.5m;
}
/// <summary>
/// Options for code normalization.
/// </summary>
public sealed record NormalizationOptions
{
public bool NormalizeVariables { get; init; } = true;
public bool NormalizeFunctionCalls { get; init; } = true;
public bool NormalizeConstants { get; init; } = false;
public bool NormalizeWhitespace { get; init; } = true;
public bool SortIndependentStatements { get; init; } = false;
public ImmutableHashSet<string>? KnownFunctions { get; init; }
public static NormalizationOptions Default { get; } = new();
}
#region Concrete AST Node Types
public sealed record FunctionNode(
string Name,
string ReturnType,
ImmutableArray<ParameterNode> Parameters,
BlockNode Body,
SourceLocation? Location = null)
: AstNode(AstNodeType.Function, [Body, .. Parameters], Location);
public sealed record ParameterNode(
string Name,
string DataType,
int Index,
SourceLocation? Location = null)
: AstNode(AstNodeType.Parameter, [], Location);
public sealed record BlockNode(
ImmutableArray<AstNode> Statements,
SourceLocation? Location = null)
: AstNode(AstNodeType.Block, Statements, Location);
public sealed record IfNode(
AstNode Condition,
AstNode ThenBranch,
AstNode? ElseBranch,
SourceLocation? Location = null)
: AstNode(AstNodeType.If, ElseBranch is null ? [Condition, ThenBranch] : [Condition, ThenBranch, ElseBranch], Location);
public sealed record WhileNode(
AstNode Condition,
AstNode Body,
SourceLocation? Location = null)
: AstNode(AstNodeType.While, [Condition, Body], Location);
public sealed record ForNode(
AstNode? Init,
AstNode? Condition,
AstNode? Update,
AstNode Body,
SourceLocation? Location = null)
: AstNode(AstNodeType.For, [Init ?? EmptyNode.Instance, Condition ?? EmptyNode.Instance, Update ?? EmptyNode.Instance, Body], Location);
public sealed record ReturnNode(
AstNode? Value,
SourceLocation? Location = null)
: AstNode(AstNodeType.Return, Value is null ? [] : [Value], Location);
public sealed record AssignmentNode(
AstNode Target,
AstNode Value,
string Operator,
SourceLocation? Location = null)
: AstNode(AstNodeType.Assignment, [Target, Value], Location);
public sealed record BinaryOpNode(
AstNode Left,
AstNode Right,
string Operator,
SourceLocation? Location = null)
: AstNode(AstNodeType.BinaryOp, [Left, Right], Location);
public sealed record UnaryOpNode(
AstNode Operand,
string Operator,
bool IsPrefix,
SourceLocation? Location = null)
: AstNode(AstNodeType.UnaryOp, [Operand], Location);
public sealed record CallNode(
string FunctionName,
ImmutableArray<AstNode> Arguments,
SourceLocation? Location = null)
: AstNode(AstNodeType.Call, Arguments, Location);
public sealed record VariableNode(
string Name,
string? DataType,
SourceLocation? Location = null)
: AstNode(AstNodeType.Variable, [], Location);
public sealed record ConstantNode(
object Value,
string DataType,
SourceLocation? Location = null)
: AstNode(AstNodeType.Constant, [], Location);
public sealed record ArrayAccessNode(
AstNode Array,
AstNode Index,
SourceLocation? Location = null)
: AstNode(AstNodeType.ArrayAccess, [Array, Index], Location);
public sealed record FieldAccessNode(
AstNode Object,
string FieldName,
bool IsPointer,
SourceLocation? Location = null)
: AstNode(AstNodeType.FieldAccess, [Object], Location);
public sealed record CastNode(
AstNode Expression,
string TargetType,
SourceLocation? Location = null)
: AstNode(AstNodeType.Cast, [Expression], Location);
public sealed record EmptyNode() : AstNode(AstNodeType.Block, [], null)
{
public static EmptyNode Instance { get; } = new();
}
#endregion

View File

@@ -0,0 +1,20 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Decompiler;
/// <summary>
/// Options for code normalization.
/// </summary>
public sealed record NormalizationOptions
{
public bool NormalizeVariables { get; init; } = true;
public bool NormalizeFunctionCalls { get; init; } = true;
public bool NormalizeConstants { get; init; } = false;
public bool NormalizeWhitespace { get; init; } = true;
public bool SortIndependentStatements { get; init; } = false;
public ImmutableHashSet<string>? KnownFunctions { get; init; }
public static NormalizationOptions Default { get; } = new();
}

View File

@@ -0,0 +1,24 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
internal sealed partial class RecursiveParser
{
private IfNode ParseIf()
{
Advance();
Expect(TokenType.Bracket, "(");
var condition = ParseExpression();
Expect(TokenType.Bracket, ")");
var thenBranch = ParseStatement() ?? new BlockNode([]);
AstNode? elseBranch = null;
if (Peek().Value == "else")
{
Advance();
elseBranch = ParseStatement();
}
return new IfNode(condition, thenBranch, elseBranch);
}
}

View File

@@ -0,0 +1,63 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
internal sealed partial class RecursiveParser
{
private WhileNode ParseWhile()
{
Advance();
Expect(TokenType.Bracket, "(");
var condition = ParseExpression();
Expect(TokenType.Bracket, ")");
var body = ParseStatement() ?? new BlockNode([]);
return new WhileNode(condition, body);
}
private ForNode ParseFor()
{
Advance();
Expect(TokenType.Bracket, "(");
AstNode? init = null;
if (Peek().Value != ";")
{
init = ParseExpression();
}
Expect(TokenType.Punctuation, ";");
AstNode? condition = null;
if (Peek().Value != ";")
{
condition = ParseExpression();
}
Expect(TokenType.Punctuation, ";");
AstNode? update = null;
if (Peek().Value != ")")
{
update = ParseExpression();
}
Expect(TokenType.Bracket, ")");
var body = ParseStatement() ?? new BlockNode([]);
return new ForNode(init, condition, update, body);
}
private AstNode ParseDoWhile()
{
Advance();
var body = ParseStatement() ?? new BlockNode([]);
Expect(TokenType.Keyword, "while");
Expect(TokenType.Bracket, "(");
var condition = ParseExpression();
Expect(TokenType.Bracket, ")");
Expect(TokenType.Punctuation, ";");
return new WhileNode(condition, body);
}
}

View File

@@ -0,0 +1,50 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
internal sealed partial class RecursiveParser
{
private ReturnNode ParseReturn()
{
Advance();
AstNode? value = null;
if (Peek().Value != ";")
{
value = ParseExpression();
}
Expect(TokenType.Punctuation, ";");
return new ReturnNode(value);
}
private AstNode ParseBreak()
{
Advance();
Expect(TokenType.Punctuation, ";");
return new BlockNode([]);
}
private AstNode ParseContinue()
{
Advance();
Expect(TokenType.Punctuation, ";");
return new BlockNode([]);
}
private AstNode? SkipSemicolon()
{
Advance();
return null;
}
private AstNode? ParseExpressionStatement()
{
var expr = ParseExpression();
if (Peek().Value == ";")
{
Advance();
}
return expr;
}
}

View File

@@ -0,0 +1,48 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
internal sealed partial class RecursiveParser
{
private AstNode ParseShift()
{
var left = ParseAdditive();
while (Peek().Value is "<<" or ">>")
{
var op = Advance().Value;
var right = ParseAdditive();
left = new BinaryOpNode(left, right, op);
}
return left;
}
private AstNode ParseAdditive()
{
var left = ParseMultiplicative();
while (Peek().Value is "+" or "-")
{
var op = Advance().Value;
var right = ParseMultiplicative();
left = new BinaryOpNode(left, right, op);
}
return left;
}
private AstNode ParseMultiplicative()
{
var left = ParseUnary();
while (Peek().Value is "*" or "/" or "%")
{
var op = Advance().Value;
var right = ParseUnary();
left = new BinaryOpNode(left, right, op);
}
return left;
}
}

View File

@@ -0,0 +1,62 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
internal sealed partial class RecursiveParser
{
private AstNode ParseLogicalOr()
{
var left = ParseLogicalAnd();
while (Peek().Value == "||")
{
var op = Advance().Value;
var right = ParseLogicalAnd();
left = new BinaryOpNode(left, right, op);
}
return left;
}
private AstNode ParseLogicalAnd()
{
var left = ParseBitwiseOr();
while (Peek().Value == "&&")
{
var op = Advance().Value;
var right = ParseBitwiseOr();
left = new BinaryOpNode(left, right, op);
}
return left;
}
private AstNode ParseBitwiseOr()
{
var left = ParseComparison();
while (Peek().Value is "|" or "^" or "&")
{
var op = Advance().Value;
var right = ParseComparison();
left = new BinaryOpNode(left, right, op);
}
return left;
}
private AstNode ParseComparison()
{
var left = ParseShift();
while (Peek().Value is "==" or "!=" or "<" or ">" or "<=" or ">=")
{
var op = Advance().Value;
var right = ParseShift();
left = new BinaryOpNode(left, right, op);
}
return left;
}
}

View File

@@ -0,0 +1,88 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Decompiler;
internal sealed partial class RecursiveParser
{
private ImmutableArray<AstNode> ParseArgumentList()
{
var args = new List<AstNode>();
if (Peek().Value == ")")
{
return [];
}
do
{
if (Peek().Value == ",")
{
Advance();
}
args.Add(ParseExpression());
}
while (Peek().Value == ",");
return [.. args];
}
private AstNode ParsePrimary()
{
var token = Peek();
if (token.Type == TokenType.Number)
{
Advance();
return new ConstantNode(token.Value, "int");
}
if (token.Type == TokenType.String)
{
Advance();
return new ConstantNode(token.Value, "char*");
}
if (token.Type == TokenType.Char)
{
Advance();
return new ConstantNode(token.Value, "char");
}
if (token.Type == TokenType.Identifier)
{
Advance();
return new VariableNode(token.Value, null);
}
if (token.Value == "(")
{
Advance();
if (IsType(Peek().Value))
{
var targetType = ParseType();
Expect(TokenType.Bracket, ")");
var expr = ParseUnary();
return new CastNode(expr, targetType);
}
var inner = ParseExpression();
Expect(TokenType.Bracket, ")");
return inner;
}
if (token.Value == "sizeof")
{
Advance();
Expect(TokenType.Bracket, "(");
var type = ParseType();
Expect(TokenType.Bracket, ")");
return new ConstantNode($"sizeof({type})", "size_t");
}
Advance();
return new ConstantNode(token.Value, "unknown");
}
}

View File

@@ -0,0 +1,62 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
internal sealed partial class RecursiveParser
{
private AstNode ParseUnary()
{
if (Peek().Value is "!" or "~" or "-" or "+" or "*" or "&" or "++" or "--")
{
var op = Advance().Value;
var operand = ParseUnary();
return new UnaryOpNode(operand, op, true);
}
return ParsePostfix();
}
private AstNode ParsePostfix()
{
var expr = ParsePrimary();
while (true)
{
if (Peek().Value == "(")
{
Advance();
var args = ParseArgumentList();
Expect(TokenType.Bracket, ")");
if (expr is VariableNode varNode)
{
expr = new CallNode(varNode.Name, args);
}
}
else if (Peek().Value == "[")
{
Advance();
var index = ParseExpression();
Expect(TokenType.Bracket, "]");
expr = new ArrayAccessNode(expr, index);
}
else if (Peek().Value is "." or "->")
{
var isPointer = Advance().Value == "->";
var field = Expect(TokenType.Identifier).Value;
expr = new FieldAccessNode(expr, field, isPointer);
}
else if (Peek().Value is "++" or "--")
{
var op = Advance().Value;
expr = new UnaryOpNode(expr, op, false);
}
else
{
break;
}
}
return expr;
}
}

View File

@@ -0,0 +1,25 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
internal sealed partial class RecursiveParser
{
private AstNode ParseExpression()
{
return ParseAssignment();
}
private AstNode ParseAssignment()
{
var left = ParseLogicalOr();
if (Peek().Value is "=" or "+=" or "-=" or "*=" or "/=" or "&=" or "|=" or "^=" or "<<=" or ">>=")
{
var op = Advance().Value;
var right = ParseAssignment();
return new AssignmentNode(left, right, op);
}
return left;
}
}

View File

@@ -0,0 +1,32 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
internal sealed partial class RecursiveParser
{
private static bool IsType(string value)
{
return value is "int" or "char" or "void" or "long" or "short" or "float" or "double"
or "unsigned" or "signed" or "const" or "struct" or "union" or "enum"
or "undefined" or "undefined1" or "undefined2" or "undefined4" or "undefined8"
or "byte" or "word" or "dword" or "qword" or "pointer" or "code" or "uint" or "ulong";
}
private Token Peek() => _pos < _tokens.Count ? _tokens[_pos] : new Token(TokenType.Punctuation, "", 0, 0);
private Token PeekAhead(int offset) => _pos + offset < _tokens.Count
? _tokens[_pos + offset]
: new Token(TokenType.Punctuation, "", 0, 0);
private Token Advance() => _pos < _tokens.Count ? _tokens[_pos++] : new Token(TokenType.Punctuation, "", 0, 0);
private Token Expect(TokenType type, string? value = null)
{
var token = Peek();
if (token.Type != type || (value is not null && token.Value != value))
{
return Advance();
}
return Advance();
}
}

View File

@@ -0,0 +1,124 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Decompiler;
internal sealed partial class RecursiveParser
{
private readonly List<Token> _tokens;
private int _pos;
public RecursiveParser(List<Token> tokens)
{
_tokens = tokens;
_pos = 0;
}
public AstNode ParseFunction()
{
var returnType = ParseType();
var name = Expect(TokenType.Identifier).Value;
Expect(TokenType.Bracket, "(");
var parameters = ParseParameterList();
Expect(TokenType.Bracket, ")");
var body = ParseBlock();
return new FunctionNode(name, returnType, parameters, body);
}
private string ParseType()
{
var type = new System.Text.StringBuilder();
while (Peek().Value is "const" or "unsigned" or "signed" or "static" or "extern")
{
type.Append(Advance().Value);
type.Append(' ');
}
type.Append(Advance().Value);
while (Peek().Value == "*")
{
type.Append(Advance().Value);
}
return type.ToString().Trim();
}
private ImmutableArray<ParameterNode> ParseParameterList()
{
var parameters = new List<ParameterNode>();
var index = 0;
if (Peek().Value == ")")
{
return [];
}
if (Peek().Value == "void" && PeekAhead(1).Value == ")")
{
Advance();
return [];
}
do
{
if (Peek().Value == ",")
{
Advance();
}
var type = ParseType();
var name = Peek().Type == TokenType.Identifier ? Advance().Value : $"param_{index}";
parameters.Add(new ParameterNode(name, type, index));
index++;
}
while (Peek().Value == ",");
return [.. parameters];
}
private BlockNode ParseBlock()
{
Expect(TokenType.Bracket, "{");
var statements = new List<AstNode>();
while (Peek().Value != "}")
{
var stmt = ParseStatement();
if (stmt is not null)
{
statements.Add(stmt);
}
}
Expect(TokenType.Bracket, "}");
return new BlockNode([.. statements]);
}
private AstNode? ParseStatement()
{
var token = Peek();
return token.Value switch
{
"if" => ParseIf(),
"while" => ParseWhile(),
"for" => ParseFor(),
"do" => ParseDoWhile(),
"return" => ParseReturn(),
"break" => ParseBreak(),
"continue" => ParseContinue(),
"{" => ParseBlock(),
";" => SkipSemicolon(),
_ => ParseExpressionStatement()
};
}
}

View File

@@ -0,0 +1,26 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
/// <summary>
/// A semantic equivalence between AST nodes.
/// </summary>
public sealed record SemanticEquivalence(
AstNode NodeA,
AstNode NodeB,
EquivalenceType Type,
decimal Confidence,
string? Explanation);
/// <summary>
/// Types of semantic equivalence.
/// </summary>
public enum EquivalenceType
{
Identical,
Renamed,
Reordered,
Optimized,
Inlined,
Semantically
}

View File

@@ -0,0 +1,8 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
/// <summary>
/// Source location in decompiled code.
/// </summary>
public sealed record SourceLocation(int Line, int Column, int Length);

View File

@@ -4,5 +4,5 @@ Source of truth: `docs/implplan/SPRINT_20260130_002_Tools_csproj_remediation_sol
| Task ID | Status | Notes |
| --- | --- | --- |
| REMED-05 | TODO | Remediation checklist: docs/implplan/audits/csproj-standards/remediation/checklists/src/BinaryIndex/__Libraries/StellaOps.BinaryIndex.Decompiler/StellaOps.BinaryIndex.Decompiler.md. |
| REMED-05 | DONE | Remediation checklist: docs/implplan/audits/csproj-standards/remediation/checklists/src/BinaryIndex/__Libraries/StellaOps.BinaryIndex.Decompiler/StellaOps.BinaryIndex.Decompiler.md. |
| REMED-06 | DONE | SOLID review notes captured for SPRINT_20260130_002. |

View File

@@ -0,0 +1,5 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
internal readonly record struct Token(TokenType Type, string Value, int Line, int Column);

View File

@@ -0,0 +1,15 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Decompiler;
internal enum TokenType
{
Identifier,
Keyword,
Number,
String,
Char,
Operator,
Bracket,
Punctuation
}

View File

@@ -71,70 +71,3 @@ public interface IDisassemblyPlugin
/// <returns>Enumerable of disassembled instructions.</returns>
IEnumerable<DisassembledInstruction> DisassembleSymbol(BinaryInfo binary, SymbolInfo symbol);
}
/// <summary>
/// Registry for disassembly plugins. Manages plugin discovery and selection.
/// </summary>
public interface IDisassemblyPluginRegistry
{
/// <summary>
/// Gets all registered plugins.
/// </summary>
IReadOnlyList<IDisassemblyPlugin> Plugins { get; }
/// <summary>
/// Finds the best plugin for the given architecture and format.
/// </summary>
/// <param name="architecture">Target CPU architecture.</param>
/// <param name="format">Target binary format.</param>
/// <returns>The best matching plugin, or null if none found.</returns>
IDisassemblyPlugin? FindPlugin(CpuArchitecture architecture, BinaryFormat format);
/// <summary>
/// Finds all plugins that support the given architecture.
/// </summary>
/// <param name="architecture">Target CPU architecture.</param>
/// <returns>All matching plugins ordered by priority.</returns>
IEnumerable<IDisassemblyPlugin> FindPluginsForArchitecture(CpuArchitecture architecture);
/// <summary>
/// Finds all plugins that support the given format.
/// </summary>
/// <param name="format">Target binary format.</param>
/// <returns>All matching plugins ordered by priority.</returns>
IEnumerable<IDisassemblyPlugin> FindPluginsForFormat(BinaryFormat format);
/// <summary>
/// Gets a plugin by its unique identifier.
/// </summary>
/// <param name="pluginId">The plugin identifier.</param>
/// <returns>The plugin if found, null otherwise.</returns>
IDisassemblyPlugin? GetPlugin(string pluginId);
}
/// <summary>
/// Facade service for disassembly operations. Automatically selects the best plugin.
/// </summary>
public interface IDisassemblyService
{
/// <summary>
/// Loads a binary and automatically selects the best plugin.
/// </summary>
/// <param name="stream">The binary stream to load.</param>
/// <param name="preferredPluginId">Optional preferred plugin ID.</param>
/// <returns>Binary information and the plugin used.</returns>
(BinaryInfo Binary, IDisassemblyPlugin Plugin) LoadBinary(Stream stream, string? preferredPluginId = null);
/// <summary>
/// Loads a binary from bytes and automatically selects the best plugin.
/// </summary>
/// <param name="bytes">The binary data.</param>
/// <param name="preferredPluginId">Optional preferred plugin ID.</param>
/// <returns>Binary information and the plugin used.</returns>
(BinaryInfo Binary, IDisassemblyPlugin Plugin) LoadBinary(ReadOnlySpan<byte> bytes, string? preferredPluginId = null);
/// <summary>
/// Gets the plugin registry.
/// </summary>
IDisassemblyPluginRegistry Registry { get; }
}

View File

@@ -0,0 +1,43 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Disassembly;
/// <summary>
/// Registry for disassembly plugins. Manages plugin discovery and selection.
/// </summary>
public interface IDisassemblyPluginRegistry
{
/// <summary>
/// Gets all registered plugins.
/// </summary>
IReadOnlyList<IDisassemblyPlugin> Plugins { get; }
/// <summary>
/// Finds the best plugin for the given architecture and format.
/// </summary>
/// <param name="architecture">Target CPU architecture.</param>
/// <param name="format">Target binary format.</param>
/// <returns>The best matching plugin, or null if none found.</returns>
IDisassemblyPlugin? FindPlugin(CpuArchitecture architecture, BinaryFormat format);
/// <summary>
/// Finds all plugins that support the given architecture.
/// </summary>
/// <param name="architecture">Target CPU architecture.</param>
/// <returns>All matching plugins ordered by priority.</returns>
IEnumerable<IDisassemblyPlugin> FindPluginsForArchitecture(CpuArchitecture architecture);
/// <summary>
/// Finds all plugins that support the given format.
/// </summary>
/// <param name="format">Target binary format.</param>
/// <returns>All matching plugins ordered by priority.</returns>
IEnumerable<IDisassemblyPlugin> FindPluginsForFormat(BinaryFormat format);
/// <summary>
/// Gets a plugin by its unique identifier.
/// </summary>
/// <param name="pluginId">The plugin identifier.</param>
/// <returns>The plugin if found, null otherwise.</returns>
IDisassemblyPlugin? GetPlugin(string pluginId);
}

View File

@@ -0,0 +1,30 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Disassembly;
/// <summary>
/// Facade service for disassembly operations. Automatically selects the best plugin.
/// </summary>
public interface IDisassemblyService
{
/// <summary>
/// Loads a binary and automatically selects the best plugin.
/// </summary>
/// <param name="stream">The binary stream to load.</param>
/// <param name="preferredPluginId">Optional preferred plugin ID.</param>
/// <returns>Binary information and the plugin used.</returns>
(BinaryInfo Binary, IDisassemblyPlugin Plugin) LoadBinary(Stream stream, string? preferredPluginId = null);
/// <summary>
/// Loads a binary from bytes and automatically selects the best plugin.
/// </summary>
/// <param name="bytes">The binary data.</param>
/// <param name="preferredPluginId">Optional preferred plugin ID.</param>
/// <returns>Binary information and the plugin used.</returns>
(BinaryInfo Binary, IDisassemblyPlugin Plugin) LoadBinary(ReadOnlySpan<byte> bytes, string? preferredPluginId = null);
/// <summary>
/// Gets the plugin registry.
/// </summary>
IDisassemblyPluginRegistry Registry { get; }
}

View File

@@ -1,348 +0,0 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Disassembly;
/// <summary>
/// CPU architecture identifier.
/// </summary>
public enum CpuArchitecture
{
/// <summary>Unknown architecture.</summary>
Unknown = 0,
/// <summary>Intel/AMD 32-bit x86.</summary>
X86 = 1,
/// <summary>Intel/AMD 64-bit x86-64 (amd64).</summary>
X86_64 = 2,
/// <summary>ARM 32-bit (ARMv7).</summary>
ARM32 = 3,
/// <summary>ARM 64-bit (AArch64/ARMv8).</summary>
ARM64 = 4,
/// <summary>MIPS 32-bit.</summary>
MIPS32 = 5,
/// <summary>MIPS 64-bit.</summary>
MIPS64 = 6,
/// <summary>RISC-V 64-bit.</summary>
RISCV64 = 7,
/// <summary>PowerPC 32-bit.</summary>
PPC32 = 8,
/// <summary>PowerPC 64-bit.</summary>
PPC64 = 9,
/// <summary>SPARC.</summary>
SPARC = 10,
/// <summary>SuperH SH4.</summary>
SH4 = 11,
/// <summary>AVR microcontroller.</summary>
AVR = 12,
/// <summary>Ethereum Virtual Machine.</summary>
EVM = 13,
/// <summary>WebAssembly.</summary>
WASM = 14
}
/// <summary>
/// Binary executable format.
/// </summary>
public enum BinaryFormat
{
/// <summary>Unknown format.</summary>
Unknown = 0,
/// <summary>Raw binary data (no format metadata).</summary>
Raw = 1,
/// <summary>Executable and Linkable Format (Linux, BSD, etc.).</summary>
ELF = 2,
/// <summary>Portable Executable (Windows).</summary>
PE = 3,
/// <summary>Mach-O (macOS, iOS).</summary>
MachO = 4,
/// <summary>WebAssembly module.</summary>
WASM = 5
}
/// <summary>
/// Describes the capabilities of a disassembly plugin.
/// </summary>
public sealed record DisassemblyCapabilities
{
/// <summary>
/// The unique identifier of the plugin.
/// </summary>
public required string PluginId { get; init; }
/// <summary>
/// Display name of the disassembly engine.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Version of the underlying disassembly library.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Supported CPU architectures.
/// </summary>
public required ImmutableHashSet<CpuArchitecture> SupportedArchitectures { get; init; }
/// <summary>
/// Supported binary formats.
/// </summary>
public required ImmutableHashSet<BinaryFormat> SupportedFormats { get; init; }
/// <summary>
/// Whether the plugin supports lifting to intermediate representation.
/// </summary>
public bool SupportsLifting { get; init; }
/// <summary>
/// Whether the plugin supports control flow graph recovery.
/// </summary>
public bool SupportsCfgRecovery { get; init; }
/// <summary>
/// Priority for plugin selection when multiple plugins support the same arch/format.
/// Higher values indicate higher priority.
/// </summary>
public int Priority { get; init; } = 0;
/// <summary>
/// Checks if this plugin supports the given architecture.
/// </summary>
public bool SupportsArchitecture(CpuArchitecture arch) =>
SupportedArchitectures.Contains(arch);
/// <summary>
/// Checks if this plugin supports the given format.
/// </summary>
public bool SupportsFormat(BinaryFormat format) =>
SupportedFormats.Contains(format);
/// <summary>
/// Checks if this plugin can handle the given architecture and format combination.
/// </summary>
public bool CanHandle(CpuArchitecture arch, BinaryFormat format) =>
SupportsArchitecture(arch) && SupportsFormat(format);
}
/// <summary>
/// Information about a loaded binary.
/// </summary>
/// <param name="Format">Binary format: ELF, PE, MachO, etc.</param>
/// <param name="Architecture">CPU architecture.</param>
/// <param name="Bitness">32 or 64 bit.</param>
/// <param name="Endianness">Byte order.</param>
/// <param name="Abi">Application binary interface hint (gnu, musl, msvc, darwin).</param>
/// <param name="EntryPoint">Entry point address if available.</param>
/// <param name="BuildId">Build identifier if present (e.g., GNU build-id).</param>
/// <param name="Metadata">Additional metadata from the binary.</param>
/// <param name="Handle">Internal handle for the disassembly engine (engine-specific).</param>
public sealed record BinaryInfo(
BinaryFormat Format,
CpuArchitecture Architecture,
int Bitness,
Endianness Endianness,
string? Abi,
ulong? EntryPoint,
string? BuildId,
IReadOnlyDictionary<string, object> Metadata,
object Handle);
/// <summary>
/// Byte order.
/// </summary>
public enum Endianness
{
/// <summary>Little-endian (LSB first).</summary>
Little,
/// <summary>Big-endian (MSB first).</summary>
Big
}
/// <summary>
/// Represents a code region (section) in a binary.
/// </summary>
/// <param name="Name">Section name: .text, .rodata, etc.</param>
/// <param name="VirtualAddress">Virtual address in memory.</param>
/// <param name="FileOffset">Offset in the binary file.</param>
/// <param name="Size">Size in bytes.</param>
/// <param name="IsExecutable">Whether the region contains executable code.</param>
/// <param name="IsReadable">Whether the region is readable.</param>
/// <param name="IsWritable">Whether the region is writable.</param>
public sealed record CodeRegion(
string Name,
ulong VirtualAddress,
ulong FileOffset,
ulong Size,
bool IsExecutable,
bool IsReadable,
bool IsWritable);
/// <summary>
/// Information about a symbol in the binary.
/// </summary>
/// <param name="Name">Symbol name.</param>
/// <param name="Address">Virtual address of the symbol.</param>
/// <param name="Size">Size in bytes (0 if unknown).</param>
/// <param name="Type">Symbol type.</param>
/// <param name="Binding">Symbol binding.</param>
/// <param name="Section">Section containing the symbol.</param>
public sealed record SymbolInfo(
string Name,
ulong Address,
ulong Size,
SymbolType Type,
SymbolBinding Binding,
string? Section);
/// <summary>
/// Type of symbol.
/// </summary>
public enum SymbolType
{
/// <summary>Unknown or unspecified type.</summary>
Unknown,
/// <summary>Function/procedure.</summary>
Function,
/// <summary>Data object.</summary>
Object,
/// <summary>Section symbol.</summary>
Section,
/// <summary>Source file name.</summary>
File,
/// <summary>Common block symbol.</summary>
Common,
/// <summary>Thread-local storage.</summary>
Tls
}
/// <summary>
/// Symbol binding/visibility.
/// </summary>
public enum SymbolBinding
{
/// <summary>Unknown binding.</summary>
Unknown,
/// <summary>Local symbol (not visible outside the object).</summary>
Local,
/// <summary>Global symbol (visible to other objects).</summary>
Global,
/// <summary>Weak symbol (can be overridden).</summary>
Weak
}
/// <summary>
/// A disassembled instruction.
/// </summary>
/// <param name="Address">Virtual address of the instruction.</param>
/// <param name="RawBytes">Raw bytes of the instruction.</param>
/// <param name="Mnemonic">Instruction mnemonic (e.g., MOV, ADD, JMP).</param>
/// <param name="OperandsText">Text representation of operands.</param>
/// <param name="Kind">Classification of the instruction.</param>
/// <param name="Operands">Parsed operands.</param>
public sealed record DisassembledInstruction(
ulong Address,
ImmutableArray<byte> RawBytes,
string Mnemonic,
string OperandsText,
InstructionKind Kind,
ImmutableArray<Operand> Operands);
/// <summary>
/// Classification of instruction types.
/// </summary>
public enum InstructionKind
{
/// <summary>Unknown or unclassified instruction.</summary>
Unknown,
/// <summary>Arithmetic operation (ADD, SUB, MUL, DIV).</summary>
Arithmetic,
/// <summary>Logical operation (AND, OR, XOR, NOT).</summary>
Logic,
/// <summary>Data movement (MOV, PUSH, POP).</summary>
Move,
/// <summary>Memory load operation.</summary>
Load,
/// <summary>Memory store operation.</summary>
Store,
/// <summary>Unconditional branch (JMP).</summary>
Branch,
/// <summary>Conditional branch (JE, JNE, JL, etc.).</summary>
ConditionalBranch,
/// <summary>Function call.</summary>
Call,
/// <summary>Function return.</summary>
Return,
/// <summary>No operation.</summary>
Nop,
/// <summary>System call.</summary>
Syscall,
/// <summary>Software interrupt.</summary>
Interrupt,
/// <summary>Compare operation.</summary>
Compare,
/// <summary>Shift operation.</summary>
Shift,
/// <summary>Vector/SIMD operation.</summary>
Vector,
/// <summary>Floating point operation.</summary>
FloatingPoint
}
/// <summary>
/// An instruction operand.
/// </summary>
/// <param name="Type">Operand type.</param>
/// <param name="Text">Text representation.</param>
/// <param name="Value">Immediate value if applicable.</param>
/// <param name="Register">Register name if applicable.</param>
/// <param name="MemoryBase">Base register for memory operand.</param>
/// <param name="MemoryIndex">Index register for memory operand.</param>
/// <param name="MemoryScale">Scale factor for indexed memory operand.</param>
/// <param name="MemoryDisplacement">Displacement for memory operand.</param>
public sealed record Operand(
OperandType Type,
string Text,
long? Value = null,
string? Register = null,
string? MemoryBase = null,
string? MemoryIndex = null,
int? MemoryScale = null,
long? MemoryDisplacement = null);
/// <summary>
/// Type of operand.
/// </summary>
public enum OperandType
{
/// <summary>Unknown operand type.</summary>
Unknown,
/// <summary>CPU register.</summary>
Register,
/// <summary>Immediate value.</summary>
Immediate,
/// <summary>Memory reference.</summary>
Memory,
/// <summary>Address/label.</summary>
Address
}

View File

@@ -0,0 +1,22 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Disassembly;
/// <summary>
/// Binary executable format.
/// </summary>
public enum BinaryFormat
{
/// <summary>Unknown format.</summary>
Unknown = 0,
/// <summary>Raw binary data (no format metadata).</summary>
Raw = 1,
/// <summary>Executable and Linkable Format (Linux, BSD, etc.).</summary>
ELF = 2,
/// <summary>Portable Executable (Windows).</summary>
PE = 3,
/// <summary>Mach-O (macOS, iOS).</summary>
MachO = 4,
/// <summary>WebAssembly module.</summary>
WASM = 5
}

View File

@@ -0,0 +1,26 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Disassembly;
/// <summary>
/// Information about a loaded binary.
/// </summary>
/// <param name="Format">Binary format: ELF, PE, MachO, etc.</param>
/// <param name="Architecture">CPU architecture.</param>
/// <param name="Bitness">32 or 64 bit.</param>
/// <param name="Endianness">Byte order.</param>
/// <param name="Abi">Application binary interface hint (gnu, musl, msvc, darwin).</param>
/// <param name="EntryPoint">Entry point address if available.</param>
/// <param name="BuildId">Build identifier if present (e.g., GNU build-id).</param>
/// <param name="Metadata">Additional metadata from the binary.</param>
/// <param name="Handle">Internal handle for the disassembly engine (engine-specific).</param>
public sealed record BinaryInfo(
BinaryFormat Format,
CpuArchitecture Architecture,
int Bitness,
Endianness Endianness,
string? Abi,
ulong? EntryPoint,
string? BuildId,
IReadOnlyDictionary<string, object> Metadata,
object Handle);

View File

@@ -0,0 +1,22 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Disassembly;
/// <summary>
/// Represents a code region (section) in a binary.
/// </summary>
/// <param name="Name">Section name: .text, .rodata, etc.</param>
/// <param name="VirtualAddress">Virtual address in memory.</param>
/// <param name="FileOffset">Offset in the binary file.</param>
/// <param name="Size">Size in bytes.</param>
/// <param name="IsExecutable">Whether the region contains executable code.</param>
/// <param name="IsReadable">Whether the region is readable.</param>
/// <param name="IsWritable">Whether the region is writable.</param>
public sealed record CodeRegion(
string Name,
ulong VirtualAddress,
ulong FileOffset,
ulong Size,
bool IsExecutable,
bool IsReadable,
bool IsWritable);

View File

@@ -0,0 +1,40 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Disassembly;
/// <summary>
/// CPU architecture identifier.
/// </summary>
public enum CpuArchitecture
{
/// <summary>Unknown architecture.</summary>
Unknown = 0,
/// <summary>Intel/AMD 32-bit x86.</summary>
X86 = 1,
/// <summary>Intel/AMD 64-bit x86-64 (amd64).</summary>
X86_64 = 2,
/// <summary>ARM 32-bit (ARMv7).</summary>
ARM32 = 3,
/// <summary>ARM 64-bit (AArch64/ARMv8).</summary>
ARM64 = 4,
/// <summary>MIPS 32-bit.</summary>
MIPS32 = 5,
/// <summary>MIPS 64-bit.</summary>
MIPS64 = 6,
/// <summary>RISC-V 64-bit.</summary>
RISCV64 = 7,
/// <summary>PowerPC 32-bit.</summary>
PPC32 = 8,
/// <summary>PowerPC 64-bit.</summary>
PPC64 = 9,
/// <summary>SPARC.</summary>
SPARC = 10,
/// <summary>SuperH SH4.</summary>
SH4 = 11,
/// <summary>AVR microcontroller.</summary>
AVR = 12,
/// <summary>Ethereum Virtual Machine.</summary>
EVM = 13,
/// <summary>WebAssembly.</summary>
WASM = 14
}

View File

@@ -0,0 +1,22 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Disassembly;
/// <summary>
/// A disassembled instruction.
/// </summary>
/// <param name="Address">Virtual address of the instruction.</param>
/// <param name="RawBytes">Raw bytes of the instruction.</param>
/// <param name="Mnemonic">Instruction mnemonic (e.g., MOV, ADD, JMP).</param>
/// <param name="OperandsText">Text representation of operands.</param>
/// <param name="Kind">Classification of the instruction.</param>
/// <param name="Operands">Parsed operands.</param>
public sealed record DisassembledInstruction(
ulong Address,
ImmutableArray<byte> RawBytes,
string Mnemonic,
string OperandsText,
InstructionKind Kind,
ImmutableArray<Operand> Operands);

View File

@@ -0,0 +1,70 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Disassembly;
/// <summary>
/// Describes the capabilities of a disassembly plugin.
/// </summary>
public sealed record DisassemblyCapabilities
{
/// <summary>
/// The unique identifier of the plugin.
/// </summary>
public required string PluginId { get; init; }
/// <summary>
/// Display name of the disassembly engine.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Version of the underlying disassembly library.
/// </summary>
public required string Version { get; init; }
/// <summary>
/// Supported CPU architectures.
/// </summary>
public required ImmutableHashSet<CpuArchitecture> SupportedArchitectures { get; init; }
/// <summary>
/// Supported binary formats.
/// </summary>
public required ImmutableHashSet<BinaryFormat> SupportedFormats { get; init; }
/// <summary>
/// Whether the plugin supports lifting to intermediate representation.
/// </summary>
public bool SupportsLifting { get; init; }
/// <summary>
/// Whether the plugin supports control flow graph recovery.
/// </summary>
public bool SupportsCfgRecovery { get; init; }
/// <summary>
/// Priority for plugin selection when multiple plugins support the same arch/format.
/// Higher values indicate higher priority.
/// </summary>
public int Priority { get; init; } = 0;
/// <summary>
/// Checks if this plugin supports the given architecture.
/// </summary>
public bool SupportsArchitecture(CpuArchitecture arch) =>
SupportedArchitectures.Contains(arch);
/// <summary>
/// Checks if this plugin supports the given format.
/// </summary>
public bool SupportsFormat(BinaryFormat format) =>
SupportedFormats.Contains(format);
/// <summary>
/// Checks if this plugin can handle the given architecture and format combination.
/// </summary>
public bool CanHandle(CpuArchitecture arch, BinaryFormat format) =>
SupportsArchitecture(arch) && SupportsFormat(format);
}

View File

@@ -0,0 +1,14 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Disassembly;
/// <summary>
/// Byte order.
/// </summary>
public enum Endianness
{
/// <summary>Little-endian (LSB first).</summary>
Little,
/// <summary>Big-endian (MSB first).</summary>
Big
}

View File

@@ -0,0 +1,44 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Disassembly;
/// <summary>
/// Classification of instruction types.
/// </summary>
public enum InstructionKind
{
/// <summary>Unknown or unclassified instruction.</summary>
Unknown,
/// <summary>Arithmetic operation (ADD, SUB, MUL, DIV).</summary>
Arithmetic,
/// <summary>Logical operation (AND, OR, XOR, NOT).</summary>
Logic,
/// <summary>Data movement (MOV, PUSH, POP).</summary>
Move,
/// <summary>Memory load operation.</summary>
Load,
/// <summary>Memory store operation.</summary>
Store,
/// <summary>Unconditional branch (JMP).</summary>
Branch,
/// <summary>Conditional branch (JE, JNE, JL, etc.).</summary>
ConditionalBranch,
/// <summary>Function call.</summary>
Call,
/// <summary>Function return.</summary>
Return,
/// <summary>No operation.</summary>
Nop,
/// <summary>System call.</summary>
Syscall,
/// <summary>Software interrupt.</summary>
Interrupt,
/// <summary>Compare operation.</summary>
Compare,
/// <summary>Shift operation.</summary>
Shift,
/// <summary>Vector/SIMD operation.</summary>
Vector,
/// <summary>Floating point operation.</summary>
FloatingPoint
}

View File

@@ -0,0 +1,24 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Disassembly;
/// <summary>
/// An instruction operand.
/// </summary>
/// <param name="Type">Operand type.</param>
/// <param name="Text">Text representation.</param>
/// <param name="Value">Immediate value if applicable.</param>
/// <param name="Register">Register name if applicable.</param>
/// <param name="MemoryBase">Base register for memory operand.</param>
/// <param name="MemoryIndex">Index register for memory operand.</param>
/// <param name="MemoryScale">Scale factor for indexed memory operand.</param>
/// <param name="MemoryDisplacement">Displacement for memory operand.</param>
public sealed record Operand(
OperandType Type,
string Text,
long? Value = null,
string? Register = null,
string? MemoryBase = null,
string? MemoryIndex = null,
int? MemoryScale = null,
long? MemoryDisplacement = null);

View File

@@ -0,0 +1,20 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Disassembly;
/// <summary>
/// Type of operand.
/// </summary>
public enum OperandType
{
/// <summary>Unknown operand type.</summary>
Unknown,
/// <summary>CPU register.</summary>
Register,
/// <summary>Immediate value.</summary>
Immediate,
/// <summary>Memory reference.</summary>
Memory,
/// <summary>Address/label.</summary>
Address
}

View File

@@ -0,0 +1,56 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Disassembly;
/// <summary>
/// Information about a symbol in the binary.
/// </summary>
/// <param name="Name">Symbol name.</param>
/// <param name="Address">Virtual address of the symbol.</param>
/// <param name="Size">Size in bytes (0 if unknown).</param>
/// <param name="Type">Symbol type.</param>
/// <param name="Binding">Symbol binding.</param>
/// <param name="Section">Section containing the symbol.</param>
public sealed record SymbolInfo(
string Name,
ulong Address,
ulong Size,
SymbolType Type,
SymbolBinding Binding,
string? Section);
/// <summary>
/// Type of symbol.
/// </summary>
public enum SymbolType
{
/// <summary>Unknown or unspecified type.</summary>
Unknown,
/// <summary>Function/procedure.</summary>
Function,
/// <summary>Data object.</summary>
Object,
/// <summary>Section symbol.</summary>
Section,
/// <summary>Source file name.</summary>
File,
/// <summary>Common block symbol.</summary>
Common,
/// <summary>Thread-local storage.</summary>
Tls
}
/// <summary>
/// Symbol binding/visibility.
/// </summary>
public enum SymbolBinding
{
/// <summary>Unknown binding.</summary>
Unknown,
/// <summary>Local symbol (not visible outside the object).</summary>
Local,
/// <summary>Global symbol (visible to other objects).</summary>
Global,
/// <summary>Weak symbol (can be overridden).</summary>
Weak
}

View File

@@ -4,5 +4,5 @@ Source of truth: `docs/implplan/SPRINT_20260130_002_Tools_csproj_remediation_sol
| Task ID | Status | Notes |
| --- | --- | --- |
| REMED-05 | TODO | Remediation checklist: docs/implplan/audits/csproj-standards/remediation/checklists/src/BinaryIndex/__Libraries/StellaOps.BinaryIndex.Disassembly.Abstractions/StellaOps.BinaryIndex.Disassembly.Abstractions.md. |
| REMED-05 | DONE | Remediation checklist: docs/implplan/audits/csproj-standards/remediation/checklists/src/BinaryIndex/__Libraries/StellaOps.BinaryIndex.Disassembly.Abstractions/StellaOps.BinaryIndex.Disassembly.Abstractions.md (2026-02-04). |
| REMED-06 | DONE | SOLID review notes captured for SPRINT_20260130_002. |

View File

@@ -0,0 +1,11 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using B2R2.FrontEnd;
using B2R2.FrontEnd.BinFile;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
/// <summary>
/// Internal handle for B2R2 binary data.
/// </summary>
internal sealed record B2R2BinaryHandle(BinHandle BinHandle, byte[] Bytes);

View File

@@ -0,0 +1,89 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using B2R2;
using B2R2.FrontEnd.BinFile;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
public sealed partial class B2R2DisassemblyPlugin
{
private static ISA MapToB2R2Isa(CpuArchitecture arch)
{
return arch switch
{
CpuArchitecture.X86 => new ISA(Architecture.Intel, WordSize.Bit32),
CpuArchitecture.X86_64 => new ISA(Architecture.Intel, WordSize.Bit64),
CpuArchitecture.ARM32 => new ISA(Architecture.ARMv7, WordSize.Bit32),
CpuArchitecture.ARM64 => new ISA(Architecture.ARMv8, WordSize.Bit64),
CpuArchitecture.MIPS32 => new ISA(Architecture.MIPS, WordSize.Bit32),
CpuArchitecture.MIPS64 => new ISA(Architecture.MIPS, WordSize.Bit64),
CpuArchitecture.RISCV64 => new ISA(Architecture.RISCV, WordSize.Bit64),
CpuArchitecture.PPC32 => new ISA(Architecture.PPC, Endian.Big, WordSize.Bit32),
CpuArchitecture.SPARC => new ISA(Architecture.SPARC, Endian.Big),
CpuArchitecture.SH4 => new ISA(Architecture.SH4),
CpuArchitecture.AVR => new ISA(Architecture.AVR),
CpuArchitecture.EVM => new ISA(Architecture.EVM, Endian.Big),
_ => new ISA(Architecture.Intel, WordSize.Bit64)
};
}
private static CpuArchitecture MapFromB2R2Architecture(ISA isa)
{
return isa.Arch switch
{
Architecture.Intel when isa.WordSize == WordSize.Bit32 => CpuArchitecture.X86,
Architecture.Intel when isa.WordSize == WordSize.Bit64 => CpuArchitecture.X86_64,
Architecture.Intel => isa.IsX86 ? CpuArchitecture.X86 : CpuArchitecture.X86_64,
Architecture.ARMv7 => CpuArchitecture.ARM32,
Architecture.ARMv8 when isa.WordSize == WordSize.Bit64 => CpuArchitecture.ARM64,
Architecture.ARMv8 => CpuArchitecture.ARM32,
Architecture.MIPS when isa.WordSize == WordSize.Bit64 => CpuArchitecture.MIPS64,
Architecture.MIPS => CpuArchitecture.MIPS32,
Architecture.RISCV => CpuArchitecture.RISCV64,
Architecture.PPC => CpuArchitecture.PPC32,
Architecture.SPARC => CpuArchitecture.SPARC,
Architecture.SH4 => CpuArchitecture.SH4,
Architecture.AVR => CpuArchitecture.AVR,
Architecture.EVM => CpuArchitecture.EVM,
_ => CpuArchitecture.Unknown
};
}
private static BinaryFormat MapFromB2R2Format(FileFormat format)
{
return format switch
{
FileFormat.ELFBinary => BinaryFormat.ELF,
FileFormat.PEBinary => BinaryFormat.PE,
FileFormat.MachBinary => BinaryFormat.MachO,
FileFormat.WasmBinary => BinaryFormat.WASM,
FileFormat.RawBinary => BinaryFormat.Raw,
_ => BinaryFormat.Unknown
};
}
private static int GetBitness(WordSize wordSize)
{
return wordSize switch
{
WordSize.Bit8 => 8,
WordSize.Bit16 => 16,
WordSize.Bit32 => 32,
WordSize.Bit64 => 64,
WordSize.Bit128 => 128,
WordSize.Bit256 => 256,
_ => 64
};
}
private static string? DetectAbi(BinaryFormat format)
{
return format switch
{
BinaryFormat.ELF => "gnu",
BinaryFormat.PE => "msvc",
BinaryFormat.MachO => "darwin",
_ => null
};
}
}

View File

@@ -0,0 +1,84 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using B2R2.FrontEnd.BinLifter;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
public sealed partial class B2R2DisassemblyPlugin
{
/// <inheritdoc />
public IEnumerable<DisassembledInstruction> Disassemble(BinaryInfo binary, CodeRegion region)
{
ArgumentNullException.ThrowIfNull(binary);
ArgumentNullException.ThrowIfNull(region);
var handle = GetHandle(binary);
var lifter = handle.BinHandle.NewLiftingUnit();
var addr = region.VirtualAddress;
var endAddr = region.VirtualAddress + region.Size;
_logger.LogDebug(
"Disassembling region {Name} from 0x{Start:X} to 0x{End:X}",
region.Name, addr, endAddr);
while (addr < endAddr)
{
IInstruction? instr;
try
{
instr = lifter.ParseInstruction(addr);
}
catch
{
addr++;
continue;
}
if (instr is null || instr.Length == 0)
{
addr++;
continue;
}
yield return MapInstruction(instr, handle, addr);
addr += instr.Length;
}
}
/// <inheritdoc />
public IEnumerable<DisassembledInstruction> Disassemble(BinaryInfo binary, ulong startAddress, ulong length)
{
var region = new CodeRegion(
Name: $"0x{startAddress:X}",
VirtualAddress: startAddress,
FileOffset: startAddress,
Size: length,
IsExecutable: true,
IsReadable: true,
IsWritable: false);
return Disassemble(binary, region);
}
/// <inheritdoc />
public IEnumerable<DisassembledInstruction> DisassembleSymbol(BinaryInfo binary, SymbolInfo symbol)
{
ArgumentNullException.ThrowIfNull(binary);
ArgumentNullException.ThrowIfNull(symbol);
var size = symbol.Size > 0 ? symbol.Size : 4096UL;
var region = new CodeRegion(
Name: symbol.Name,
VirtualAddress: symbol.Address,
FileOffset: symbol.Address,
Size: size,
IsExecutable: true,
IsReadable: true,
IsWritable: false);
return Disassemble(binary, region);
}
}

View File

@@ -0,0 +1,76 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using B2R2.FrontEnd.BinLifter;
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
public sealed partial class B2R2DisassemblyPlugin
{
private static B2R2BinaryHandle GetHandle(BinaryInfo binary)
{
if (binary.Handle is not B2R2BinaryHandle handle)
throw new ArgumentException("Invalid binary handle - not a B2R2 handle", nameof(binary));
return handle;
}
private static DisassembledInstruction MapInstruction(IInstruction instr, B2R2BinaryHandle handle, ulong address)
{
var disasm = instr.Disasm();
var parts = disasm.Split(' ', 2, StringSplitOptions.RemoveEmptyEntries);
var mnemonic = parts.Length > 0 ? parts[0] : "???";
var operandsText = parts.Length > 1 ? parts[1] : "";
var offset = (int)(address - handle.BinHandle.File.BaseAddress);
var length = (int)instr.Length;
var rawBytes = offset >= 0 && offset + length <= handle.Bytes.Length
? handle.Bytes.AsSpan(offset, length).ToArray().ToImmutableArray()
: ImmutableArray<byte>.Empty;
var kind = ClassifyInstruction(instr, mnemonic);
var operands = ParseOperands(operandsText, mnemonic);
return new DisassembledInstruction(
Address: address,
RawBytes: rawBytes,
Mnemonic: mnemonic,
OperandsText: operandsText,
Kind: kind,
Operands: operands);
}
private static InstructionKind ClassifyInstruction(IInstruction instr, string mnemonic)
{
if (instr.IsRET) return InstructionKind.Return;
if (instr.IsCall) return InstructionKind.Call;
if (instr.IsCondBranch) return InstructionKind.ConditionalBranch;
if (instr.IsBranch) return InstructionKind.Branch;
if (instr.IsNop) return InstructionKind.Nop;
if (instr.IsInterrupt) return InstructionKind.Syscall;
var upper = mnemonic.ToUpperInvariant();
if (upper is "ADD" or "SUB" or "MUL" or "DIV" or "IMUL" or "IDIV" or "INC" or "DEC" or "NEG" or "ADC" or "SBB")
return InstructionKind.Arithmetic;
if (upper is "AND" or "OR" or "XOR" or "NOT" or "TEST" or "ORR" or "EOR")
return InstructionKind.Logic;
if (upper is "SHL" or "SHR" or "SAL" or "SAR" or "ROL" or "ROR" or "LSL" or "LSR" or "ASR")
return InstructionKind.Shift;
if (upper.StartsWith("MOV", StringComparison.Ordinal) || upper is "LEA" or "PUSH" or "POP" or "XCHG")
return InstructionKind.Move;
if (upper.StartsWith("LDR", StringComparison.Ordinal) || upper.StartsWith("LD", StringComparison.Ordinal))
return InstructionKind.Load;
if (upper.StartsWith("STR", StringComparison.Ordinal) || upper.StartsWith("ST", StringComparison.Ordinal))
return InstructionKind.Store;
if (upper is "CMP" or "CMPS" or "SCAS") return InstructionKind.Compare;
return InstructionKind.Unknown;
}
}

View File

@@ -0,0 +1,72 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using B2R2;
using B2R2.FrontEnd;
using B2R2.FrontEnd.BinFile;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
public sealed partial class B2R2DisassemblyPlugin
{
/// <inheritdoc />
public BinaryInfo LoadBinary(Stream stream, CpuArchitecture? archHint = null, BinaryFormat? formatHint = null)
{
ArgumentNullException.ThrowIfNull(stream);
using var memStream = new MemoryStream();
stream.CopyTo(memStream);
return LoadBinary(memStream.ToArray(), archHint, formatHint);
}
/// <inheritdoc />
public BinaryInfo LoadBinary(ReadOnlySpan<byte> bytes, CpuArchitecture? archHint = null, BinaryFormat? formatHint = null)
{
var byteArray = bytes.ToArray();
_logger.LogDebug("Loading binary with B2R2 plugin (size: {Size} bytes)", byteArray.Length);
var isa = archHint.HasValue
? MapToB2R2Isa(archHint.Value)
: new ISA(Architecture.Intel, WordSize.Bit64); // Default to x64
var binHandle = new BinHandle(byteArray, isa, null, true);
var binFile = binHandle.File;
var format = MapFromB2R2Format(binFile.Format);
var architecture = MapFromB2R2Architecture(binFile.ISA);
var bitness = GetBitness(binFile.ISA.WordSize);
var endianness = binFile.ISA.Endian == Endian.Little ? Endianness.Little : Endianness.Big;
var abi = DetectAbi(format);
var entryPointOpt = binFile.EntryPoint;
var entryPoint = Microsoft.FSharp.Core.FSharpOption<ulong>.get_IsSome(entryPointOpt)
? entryPointOpt.Value
: (ulong?)null;
_logger.LogInformation(
"Loaded binary with B2R2: Format={Format}, Architecture={Architecture}, Endian={Endian}",
format, architecture, endianness);
var metadata = new Dictionary<string, object>
{
["size"] = byteArray.Length,
["b2r2_isa"] = binFile.ISA.Arch.ToString()
};
if (entryPoint.HasValue)
{
metadata["entry_point"] = entryPoint.Value;
}
return new BinaryInfo(
Format: format,
Architecture: architecture,
Bitness: bitness,
Endianness: endianness,
Abi: abi,
EntryPoint: entryPoint,
BuildId: null,
Metadata: metadata,
Handle: new B2R2BinaryHandle(binHandle, byteArray));
}
}

View File

@@ -0,0 +1,109 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Globalization;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
public sealed partial class B2R2DisassemblyPlugin
{
private static Operand ParseMemoryOperand(string text)
{
var start = text.IndexOf('[');
var end = text.LastIndexOf(']');
if (start < 0 || end <= start)
{
return new Operand(
Type: OperandType.Memory,
Text: text,
Value: null,
Register: null,
MemoryBase: null,
MemoryIndex: null,
MemoryScale: null,
MemoryDisplacement: null);
}
var inner = text.Substring(start + 1, end - start - 1);
string? memBase = null;
string? memIndex = null;
int? memScale = null;
long? memDisp = null;
var components = inner.Split(['+', ','], StringSplitOptions.RemoveEmptyEntries);
foreach (var comp in components)
{
var trimmed = comp.Trim();
if (trimmed.Contains('*'))
{
var scaleParts = trimmed.Split('*');
if (scaleParts.Length == 2)
{
memIndex = scaleParts[0].Trim().ToUpperInvariant();
if (int.TryParse(scaleParts[1].Trim(), out var scale))
{
memScale = scale;
}
}
continue;
}
if (trimmed.StartsWith('#'))
{
var immText = trimmed.TrimStart('#');
if (immText.StartsWith("0x", StringComparison.OrdinalIgnoreCase))
{
if (long.TryParse(immText.AsSpan(2), NumberStyles.HexNumber, CultureInfo.InvariantCulture, out var hexDisp))
{
memDisp = hexDisp;
}
}
else if (long.TryParse(immText, out var decDisp))
{
memDisp = decDisp;
}
continue;
}
if (trimmed.StartsWith("0x", StringComparison.OrdinalIgnoreCase))
{
if (long.TryParse(trimmed.AsSpan(2), NumberStyles.HexNumber, CultureInfo.InvariantCulture, out var hexDisp))
{
memDisp = hexDisp;
}
continue;
}
if (trimmed.StartsWith('-'))
{
if (long.TryParse(trimmed, out var negDisp))
{
memDisp = negDisp;
}
continue;
}
if (memBase == null)
{
memBase = trimmed.ToUpperInvariant();
}
else if (memIndex == null)
{
memIndex = trimmed.ToUpperInvariant();
}
}
return new Operand(
Type: OperandType.Memory,
Text: text,
Value: null,
Register: null,
MemoryBase: memBase,
MemoryIndex: memIndex,
MemoryScale: memScale,
MemoryDisplacement: memDisp);
}
}

View File

@@ -0,0 +1,48 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Text;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
public sealed partial class B2R2DisassemblyPlugin
{
private static IReadOnlyList<string> SplitOperands(string operandsText)
{
var result = new List<string>();
var current = new StringBuilder();
var bracketDepth = 0;
foreach (var c in operandsText)
{
if (c == '[' || c == '(' || c == '{')
{
bracketDepth++;
current.Append(c);
}
else if (c == ']' || c == ')' || c == '}')
{
bracketDepth--;
current.Append(c);
}
else if (c == ',' && bracketDepth == 0)
{
if (current.Length > 0)
{
result.Add(current.ToString());
current.Clear();
}
}
else
{
current.Append(c);
}
}
if (current.Length > 0)
{
result.Add(current.ToString());
}
return result;
}
}

View File

@@ -0,0 +1,105 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
using System.Globalization;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
public sealed partial class B2R2DisassemblyPlugin
{
private static ImmutableArray<Operand> ParseOperands(string operandsText, string mnemonic)
{
if (string.IsNullOrWhiteSpace(operandsText))
{
return ImmutableArray<Operand>.Empty;
}
var builder = ImmutableArray.CreateBuilder<Operand>();
var operandStrings = SplitOperands(operandsText);
foreach (var opStr in operandStrings)
{
var trimmed = opStr.Trim();
if (string.IsNullOrEmpty(trimmed)) continue;
var operand = ParseSingleOperand(trimmed);
builder.Add(operand);
}
return builder.ToImmutable();
}
private static Operand ParseSingleOperand(string text)
{
var trimmed = text.Trim();
if (trimmed.StartsWith('[') && trimmed.EndsWith(']'))
{
return ParseMemoryOperand(trimmed);
}
if (trimmed.StartsWith('[') && (trimmed.EndsWith("]!") || trimmed.Contains("],")))
{
return ParseMemoryOperand(trimmed);
}
if (trimmed.StartsWith('#') || trimmed.StartsWith("0x", StringComparison.OrdinalIgnoreCase) ||
trimmed.StartsWith("0X", StringComparison.OrdinalIgnoreCase) ||
(trimmed.Length > 0 && (char.IsDigit(trimmed[0]) || trimmed[0] == '-')))
{
return ParseImmediateOperand(trimmed);
}
return ParseRegisterOperand(trimmed);
}
private static Operand ParseRegisterOperand(string text)
{
var regName = text.ToUpperInvariant();
return new Operand(
Type: OperandType.Register,
Text: text,
Value: null,
Register: regName,
MemoryBase: null,
MemoryIndex: null,
MemoryScale: null,
MemoryDisplacement: null);
}
private static Operand ParseImmediateOperand(string text)
{
var cleanText = text.TrimStart('#');
long? value = null;
if (cleanText.StartsWith("0x", StringComparison.OrdinalIgnoreCase))
{
if (long.TryParse(cleanText.AsSpan(2), NumberStyles.HexNumber, CultureInfo.InvariantCulture, out var hexVal))
{
value = hexVal;
}
}
else if (cleanText.StartsWith("-0x", StringComparison.OrdinalIgnoreCase))
{
if (long.TryParse(cleanText.AsSpan(3), NumberStyles.HexNumber, CultureInfo.InvariantCulture, out var hexVal))
{
value = -hexVal;
}
}
else if (long.TryParse(cleanText, CultureInfo.InvariantCulture, out var decVal))
{
value = decVal;
}
return new Operand(
Type: OperandType.Immediate,
Text: text,
Value: value,
Register: null,
MemoryBase: null,
MemoryIndex: null,
MemoryScale: null,
MemoryDisplacement: null);
}
}

View File

@@ -0,0 +1,57 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
public sealed partial class B2R2DisassemblyPlugin
{
/// <inheritdoc />
public IEnumerable<CodeRegion> GetCodeRegions(BinaryInfo binary)
{
ArgumentNullException.ThrowIfNull(binary);
var handle = GetHandle(binary);
var textPtr = handle.BinHandle.File.GetTextSectionPointer();
if (textPtr.IsValid)
{
yield return new CodeRegion(
Name: ".text",
VirtualAddress: textPtr.Addr,
FileOffset: (ulong)textPtr.Offset,
Size: (ulong)(textPtr.MaxAddr - textPtr.Addr + 1),
IsExecutable: true,
IsReadable: true,
IsWritable: false);
}
else
{
yield return new CodeRegion(
Name: ".code",
VirtualAddress: handle.BinHandle.File.BaseAddress,
FileOffset: 0,
Size: (ulong)handle.Bytes.Length,
IsExecutable: true,
IsReadable: true,
IsWritable: false);
}
}
/// <inheritdoc />
public IEnumerable<SymbolInfo> GetSymbols(BinaryInfo binary)
{
ArgumentNullException.ThrowIfNull(binary);
var handle = GetHandle(binary);
var funcAddrs = handle.BinHandle.File.GetFunctionAddresses();
foreach (var addr in funcAddrs)
{
yield return new SymbolInfo(
Name: $"func_{addr:X}",
Address: addr,
Size: 0,
Type: SymbolType.Function,
Binding: SymbolBinding.Global,
Section: ".text");
}
}
}

View File

@@ -1,13 +1,6 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using B2R2;
using B2R2.FrontEnd;
using B2R2.FrontEnd.BinFile;
using B2R2.FrontEnd.BinLifter;
using Microsoft.Extensions.Logging;
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
@@ -16,7 +9,7 @@ namespace StellaOps.BinaryIndex.Disassembly.B2R2;
/// B2R2 is a pure .NET binary analysis framework supporting ELF, PE, and Mach-O
/// on x86, x86-64, ARM32, ARM64, MIPS, RISC-V, and more.
/// </summary>
public sealed class B2R2DisassemblyPlugin : IDisassemblyPlugin
public sealed partial class B2R2DisassemblyPlugin : IDisassemblyPlugin
{
/// <summary>
/// Plugin identifier.
@@ -25,7 +18,7 @@ public sealed class B2R2DisassemblyPlugin : IDisassemblyPlugin
private readonly ILogger<B2R2DisassemblyPlugin> _logger;
private static readonly DisassemblyCapabilities s_capabilities = new()
private static readonly DisassemblyCapabilities _capabilities = new()
{
PluginId = PluginId,
Name = "B2R2 Disassembler",
@@ -61,627 +54,5 @@ public sealed class B2R2DisassemblyPlugin : IDisassemblyPlugin
}
/// <inheritdoc />
public DisassemblyCapabilities Capabilities => s_capabilities;
/// <inheritdoc />
public BinaryInfo LoadBinary(Stream stream, CpuArchitecture? archHint = null, BinaryFormat? formatHint = null)
{
ArgumentNullException.ThrowIfNull(stream);
using var memStream = new MemoryStream();
stream.CopyTo(memStream);
return LoadBinary(memStream.ToArray(), archHint, formatHint);
}
/// <inheritdoc />
public BinaryInfo LoadBinary(ReadOnlySpan<byte> bytes, CpuArchitecture? archHint = null, BinaryFormat? formatHint = null)
{
var byteArray = bytes.ToArray();
_logger.LogDebug("Loading binary with B2R2 plugin (size: {Size} bytes)", byteArray.Length);
// Create B2R2 ISA hint if provided
var isa = archHint.HasValue
? MapToB2R2Isa(archHint.Value)
: new ISA(Architecture.Intel, WordSize.Bit64); // Default to x64
// Create BinHandle - B2R2's main interface
// Enable format detection when loading from bytes
var binHandle = new BinHandle(byteArray, isa, null, true);
var binFile = binHandle.File;
// Extract binary information
var format = MapFromB2R2Format(binFile.Format);
var architecture = MapFromB2R2Architecture(binFile.ISA);
var bitness = GetBitness(binFile.ISA.WordSize);
var endianness = binFile.ISA.Endian == Endian.Little ? Endianness.Little : Endianness.Big;
var abi = DetectAbi(format);
// Extract entry point - B2R2 returns FSharpOption<ulong>
var entryPointOpt = binFile.EntryPoint;
var entryPoint = Microsoft.FSharp.Core.FSharpOption<ulong>.get_IsSome(entryPointOpt)
? entryPointOpt.Value
: (ulong?)null;
_logger.LogInformation(
"Loaded binary with B2R2: Format={Format}, Architecture={Architecture}, Endian={Endian}",
format, architecture, endianness);
var metadata = new Dictionary<string, object>
{
["size"] = byteArray.Length,
["b2r2_isa"] = binFile.ISA.Arch.ToString()
};
if (entryPoint.HasValue)
{
metadata["entry_point"] = entryPoint.Value;
}
return new BinaryInfo(
Format: format,
Architecture: architecture,
Bitness: bitness,
Endianness: endianness,
Abi: abi,
EntryPoint: entryPoint,
BuildId: null,
Metadata: metadata,
Handle: new B2R2BinaryHandle(binHandle, byteArray));
}
/// <inheritdoc />
public IEnumerable<CodeRegion> GetCodeRegions(BinaryInfo binary)
{
ArgumentNullException.ThrowIfNull(binary);
var handle = GetHandle(binary);
// Use the text section pointer if available
var textPtr = handle.BinHandle.File.GetTextSectionPointer();
if (textPtr.IsValid)
{
yield return new CodeRegion(
Name: ".text",
VirtualAddress: textPtr.Addr,
FileOffset: (ulong)textPtr.Offset,
Size: (ulong)(textPtr.MaxAddr - textPtr.Addr + 1),
IsExecutable: true,
IsReadable: true,
IsWritable: false);
}
else
{
// Fallback: treat entire binary as code
yield return new CodeRegion(
Name: ".code",
VirtualAddress: handle.BinHandle.File.BaseAddress,
FileOffset: 0,
Size: (ulong)handle.Bytes.Length,
IsExecutable: true,
IsReadable: true,
IsWritable: false);
}
}
/// <inheritdoc />
public IEnumerable<SymbolInfo> GetSymbols(BinaryInfo binary)
{
ArgumentNullException.ThrowIfNull(binary);
var handle = GetHandle(binary);
// Get function addresses from B2R2
var funcAddrs = handle.BinHandle.File.GetFunctionAddresses();
foreach (var addr in funcAddrs)
{
yield return new SymbolInfo(
Name: $"func_{addr:X}",
Address: addr,
Size: 0, // Unknown size
Type: SymbolType.Function,
Binding: SymbolBinding.Global,
Section: ".text");
}
}
/// <inheritdoc />
public IEnumerable<DisassembledInstruction> Disassemble(BinaryInfo binary, CodeRegion region)
{
ArgumentNullException.ThrowIfNull(binary);
ArgumentNullException.ThrowIfNull(region);
var handle = GetHandle(binary);
var lifter = handle.BinHandle.NewLiftingUnit();
var addr = region.VirtualAddress;
var endAddr = region.VirtualAddress + region.Size;
_logger.LogDebug(
"Disassembling region {Name} from 0x{Start:X} to 0x{End:X}",
region.Name, addr, endAddr);
while (addr < endAddr)
{
IInstruction? instr;
try
{
instr = lifter.ParseInstruction(addr);
}
catch
{
// Skip invalid instruction
addr++;
continue;
}
if (instr is null || instr.Length == 0)
{
addr++;
continue;
}
yield return MapInstruction(instr, handle, addr);
addr += instr.Length;
}
}
/// <inheritdoc />
public IEnumerable<DisassembledInstruction> Disassemble(BinaryInfo binary, ulong startAddress, ulong length)
{
var region = new CodeRegion(
Name: $"0x{startAddress:X}",
VirtualAddress: startAddress,
FileOffset: startAddress,
Size: length,
IsExecutable: true,
IsReadable: true,
IsWritable: false);
return Disassemble(binary, region);
}
/// <inheritdoc />
public IEnumerable<DisassembledInstruction> DisassembleSymbol(BinaryInfo binary, SymbolInfo symbol)
{
ArgumentNullException.ThrowIfNull(binary);
ArgumentNullException.ThrowIfNull(symbol);
var size = symbol.Size > 0 ? symbol.Size : 4096UL;
var region = new CodeRegion(
Name: symbol.Name,
VirtualAddress: symbol.Address,
FileOffset: symbol.Address,
Size: size,
IsExecutable: true,
IsReadable: true,
IsWritable: false);
return Disassemble(binary, region);
}
#region Architecture Mapping
private static ISA MapToB2R2Isa(CpuArchitecture arch)
{
return arch switch
{
CpuArchitecture.X86 => new ISA(Architecture.Intel, WordSize.Bit32),
CpuArchitecture.X86_64 => new ISA(Architecture.Intel, WordSize.Bit64),
CpuArchitecture.ARM32 => new ISA(Architecture.ARMv7, WordSize.Bit32),
CpuArchitecture.ARM64 => new ISA(Architecture.ARMv8, WordSize.Bit64),
CpuArchitecture.MIPS32 => new ISA(Architecture.MIPS, WordSize.Bit32),
CpuArchitecture.MIPS64 => new ISA(Architecture.MIPS, WordSize.Bit64),
CpuArchitecture.RISCV64 => new ISA(Architecture.RISCV, WordSize.Bit64),
CpuArchitecture.PPC32 => new ISA(Architecture.PPC, Endian.Big, WordSize.Bit32),
CpuArchitecture.SPARC => new ISA(Architecture.SPARC, Endian.Big),
CpuArchitecture.SH4 => new ISA(Architecture.SH4),
CpuArchitecture.AVR => new ISA(Architecture.AVR),
CpuArchitecture.EVM => new ISA(Architecture.EVM, Endian.Big),
_ => new ISA(Architecture.Intel, WordSize.Bit64) // Default to x64
};
}
private static CpuArchitecture MapFromB2R2Architecture(ISA isa)
{
return isa.Arch switch
{
Architecture.Intel when isa.WordSize == WordSize.Bit32 => CpuArchitecture.X86,
Architecture.Intel when isa.WordSize == WordSize.Bit64 => CpuArchitecture.X86_64,
Architecture.Intel => isa.IsX86 ? CpuArchitecture.X86 : CpuArchitecture.X86_64,
Architecture.ARMv7 => CpuArchitecture.ARM32,
Architecture.ARMv8 when isa.WordSize == WordSize.Bit64 => CpuArchitecture.ARM64,
Architecture.ARMv8 => CpuArchitecture.ARM32,
Architecture.MIPS when isa.WordSize == WordSize.Bit64 => CpuArchitecture.MIPS64,
Architecture.MIPS => CpuArchitecture.MIPS32,
Architecture.RISCV => CpuArchitecture.RISCV64,
Architecture.PPC => CpuArchitecture.PPC32,
Architecture.SPARC => CpuArchitecture.SPARC,
Architecture.SH4 => CpuArchitecture.SH4,
Architecture.AVR => CpuArchitecture.AVR,
Architecture.EVM => CpuArchitecture.EVM,
_ => CpuArchitecture.Unknown
};
}
private static BinaryFormat MapFromB2R2Format(FileFormat format)
{
return format switch
{
FileFormat.ELFBinary => BinaryFormat.ELF,
FileFormat.PEBinary => BinaryFormat.PE,
FileFormat.MachBinary => BinaryFormat.MachO,
FileFormat.WasmBinary => BinaryFormat.WASM,
FileFormat.RawBinary => BinaryFormat.Raw,
_ => BinaryFormat.Unknown
};
}
private static int GetBitness(WordSize wordSize)
{
return wordSize switch
{
WordSize.Bit8 => 8,
WordSize.Bit16 => 16,
WordSize.Bit32 => 32,
WordSize.Bit64 => 64,
WordSize.Bit128 => 128,
WordSize.Bit256 => 256,
_ => 64
};
}
private static string? DetectAbi(BinaryFormat format)
{
return format switch
{
BinaryFormat.ELF => "gnu",
BinaryFormat.PE => "msvc",
BinaryFormat.MachO => "darwin",
_ => null
};
}
#endregion
#region Instruction Mapping
private static B2R2BinaryHandle GetHandle(BinaryInfo binary)
{
if (binary.Handle is not B2R2BinaryHandle handle)
throw new ArgumentException("Invalid binary handle - not a B2R2 handle", nameof(binary));
return handle;
}
private static DisassembledInstruction MapInstruction(IInstruction instr, B2R2BinaryHandle handle, ulong address)
{
// Get disassembly string
var disasm = instr.Disasm();
// Parse mnemonic and operands from disassembly string
var parts = disasm.Split(' ', 2, StringSplitOptions.RemoveEmptyEntries);
var mnemonic = parts.Length > 0 ? parts[0] : "???";
var operandsText = parts.Length > 1 ? parts[1] : "";
// Get raw bytes from the binary data
var offset = (int)(address - handle.BinHandle.File.BaseAddress);
var length = (int)instr.Length;
var rawBytes = offset >= 0 && offset + length <= handle.Bytes.Length
? handle.Bytes.AsSpan(offset, length).ToArray().ToImmutableArray()
: ImmutableArray<byte>.Empty;
var kind = ClassifyInstruction(instr, mnemonic);
var operands = ParseOperands(operandsText, mnemonic);
return new DisassembledInstruction(
Address: address,
RawBytes: rawBytes,
Mnemonic: mnemonic,
OperandsText: operandsText,
Kind: kind,
Operands: operands);
}
private static ImmutableArray<Operand> ParseOperands(string operandsText, string mnemonic)
{
if (string.IsNullOrWhiteSpace(operandsText))
{
return ImmutableArray<Operand>.Empty;
}
var builder = ImmutableArray.CreateBuilder<Operand>();
// Split operands by comma, respecting brackets
var operandStrings = SplitOperands(operandsText);
foreach (var opStr in operandStrings)
{
var trimmed = opStr.Trim();
if (string.IsNullOrEmpty(trimmed)) continue;
var operand = ParseSingleOperand(trimmed);
builder.Add(operand);
}
return builder.ToImmutable();
}
private static IReadOnlyList<string> SplitOperands(string operandsText)
{
var result = new List<string>();
var current = new System.Text.StringBuilder();
var bracketDepth = 0;
foreach (var c in operandsText)
{
if (c == '[' || c == '(' || c == '{')
{
bracketDepth++;
current.Append(c);
}
else if (c == ']' || c == ')' || c == '}')
{
bracketDepth--;
current.Append(c);
}
else if (c == ',' && bracketDepth == 0)
{
if (current.Length > 0)
{
result.Add(current.ToString());
current.Clear();
}
}
else
{
current.Append(c);
}
}
if (current.Length > 0)
{
result.Add(current.ToString());
}
return result;
}
private static Operand ParseSingleOperand(string text)
{
var trimmed = text.Trim();
// Check for memory operand [...]
if (trimmed.StartsWith('[') && trimmed.EndsWith(']'))
{
return ParseMemoryOperand(trimmed);
}
// Check for ARM64 memory operand [...]!
if (trimmed.StartsWith('[') && (trimmed.EndsWith("]!") || trimmed.Contains("],")))
{
return ParseMemoryOperand(trimmed);
}
// Check for immediate value
if (trimmed.StartsWith('#') || trimmed.StartsWith("0x", StringComparison.OrdinalIgnoreCase) ||
trimmed.StartsWith("0X", StringComparison.OrdinalIgnoreCase) ||
(trimmed.Length > 0 && (char.IsDigit(trimmed[0]) || trimmed[0] == '-')))
{
return ParseImmediateOperand(trimmed);
}
// Assume it's a register
return ParseRegisterOperand(trimmed);
}
private static Operand ParseRegisterOperand(string text)
{
var regName = text.ToUpperInvariant();
return new Operand(
Type: OperandType.Register,
Text: text,
Value: null,
Register: regName,
MemoryBase: null,
MemoryIndex: null,
MemoryScale: null,
MemoryDisplacement: null);
}
private static Operand ParseImmediateOperand(string text)
{
var cleanText = text.TrimStart('#');
long? value = null;
if (cleanText.StartsWith("0x", StringComparison.OrdinalIgnoreCase))
{
if (long.TryParse(cleanText.AsSpan(2), System.Globalization.NumberStyles.HexNumber,
System.Globalization.CultureInfo.InvariantCulture, out var hexVal))
{
value = hexVal;
}
}
else if (cleanText.StartsWith("-0x", StringComparison.OrdinalIgnoreCase))
{
if (long.TryParse(cleanText.AsSpan(3), System.Globalization.NumberStyles.HexNumber,
System.Globalization.CultureInfo.InvariantCulture, out var hexVal))
{
value = -hexVal;
}
}
else if (long.TryParse(cleanText, System.Globalization.CultureInfo.InvariantCulture, out var decVal))
{
value = decVal;
}
return new Operand(
Type: OperandType.Immediate,
Text: text,
Value: value,
Register: null,
MemoryBase: null,
MemoryIndex: null,
MemoryScale: null,
MemoryDisplacement: null);
}
private static Operand ParseMemoryOperand(string text)
{
// Extract content between brackets
var start = text.IndexOf('[');
var end = text.LastIndexOf(']');
if (start < 0 || end <= start)
{
return new Operand(
Type: OperandType.Memory,
Text: text,
Value: null,
Register: null,
MemoryBase: null,
MemoryIndex: null,
MemoryScale: null,
MemoryDisplacement: null);
}
var inner = text.Substring(start + 1, end - start - 1);
// Parse components: base, index, scale, displacement
// Common patterns:
// x86: [rax], [rax+rbx], [rax+rbx*4], [rax+0x10], [rax+rbx*4+0x10]
// ARM: [x0], [x0, #8], [x0, x1], [x0, x1, lsl #2]
string? memBase = null;
string? memIndex = null;
int? memScale = null;
long? memDisp = null;
// Split by + or , depending on architecture style
var components = inner.Split(['+', ','], StringSplitOptions.RemoveEmptyEntries);
foreach (var comp in components)
{
var trimmed = comp.Trim();
// Check for scale pattern: reg*N
if (trimmed.Contains('*'))
{
var scaleParts = trimmed.Split('*');
if (scaleParts.Length == 2)
{
memIndex = scaleParts[0].Trim().ToUpperInvariant();
if (int.TryParse(scaleParts[1].Trim(), out var scale))
{
memScale = scale;
}
}
continue;
}
// Check for ARM immediate: #N
if (trimmed.StartsWith('#'))
{
var immText = trimmed.TrimStart('#');
if (immText.StartsWith("0x", StringComparison.OrdinalIgnoreCase))
{
if (long.TryParse(immText.AsSpan(2), System.Globalization.NumberStyles.HexNumber,
System.Globalization.CultureInfo.InvariantCulture, out var hexDisp))
{
memDisp = hexDisp;
}
}
else if (long.TryParse(immText, out var decDisp))
{
memDisp = decDisp;
}
continue;
}
// Check for hex displacement: 0xNN
if (trimmed.StartsWith("0x", StringComparison.OrdinalIgnoreCase))
{
if (long.TryParse(trimmed.AsSpan(2), System.Globalization.NumberStyles.HexNumber,
System.Globalization.CultureInfo.InvariantCulture, out var hexDisp))
{
memDisp = hexDisp;
}
continue;
}
// Check for negative displacement
if (trimmed.StartsWith('-'))
{
if (long.TryParse(trimmed, out var negDisp))
{
memDisp = negDisp;
}
continue;
}
// Must be a register
if (memBase == null)
{
memBase = trimmed.ToUpperInvariant();
}
else if (memIndex == null)
{
memIndex = trimmed.ToUpperInvariant();
}
}
return new Operand(
Type: OperandType.Memory,
Text: text,
Value: null,
Register: null,
MemoryBase: memBase,
MemoryIndex: memIndex,
MemoryScale: memScale,
MemoryDisplacement: memDisp);
}
private static InstructionKind ClassifyInstruction(IInstruction instr, string mnemonic)
{
// Use B2R2's built-in classification where possible
if (instr.IsRET) return InstructionKind.Return;
if (instr.IsCall) return InstructionKind.Call;
if (instr.IsCondBranch) return InstructionKind.ConditionalBranch;
if (instr.IsBranch) return InstructionKind.Branch;
if (instr.IsNop) return InstructionKind.Nop;
if (instr.IsInterrupt) return InstructionKind.Syscall;
// Fall back to mnemonic-based classification
var upper = mnemonic.ToUpperInvariant();
if (upper is "ADD" or "SUB" or "MUL" or "DIV" or "IMUL" or "IDIV" or
"INC" or "DEC" or "NEG" or "ADC" or "SBB")
return InstructionKind.Arithmetic;
if (upper is "AND" or "OR" or "XOR" or "NOT" or "TEST" or "ORR" or "EOR")
return InstructionKind.Logic;
if (upper is "SHL" or "SHR" or "SAL" or "SAR" or "ROL" or "ROR" or
"LSL" or "LSR" or "ASR")
return InstructionKind.Shift;
if (upper.StartsWith("MOV", StringComparison.Ordinal) || upper is "LEA" or "PUSH" or "POP" or "XCHG")
return InstructionKind.Move;
if (upper.StartsWith("LDR", StringComparison.Ordinal) || upper.StartsWith("LD", StringComparison.Ordinal))
return InstructionKind.Load;
if (upper.StartsWith("STR", StringComparison.Ordinal) || upper.StartsWith("ST", StringComparison.Ordinal))
return InstructionKind.Store;
if (upper is "CMP" or "CMPS" or "SCAS") return InstructionKind.Compare;
return InstructionKind.Unknown;
}
#endregion
public DisassemblyCapabilities Capabilities => _capabilities;
}
/// <summary>
/// Internal handle for B2R2 binary data.
/// </summary>
internal sealed record B2R2BinaryHandle(BinHandle BinHandle, byte[] Bytes);

View File

@@ -0,0 +1,14 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
/// <summary>
/// Per-ISA pool statistics.
/// </summary>
/// <param name="PooledCount">Number of lifters in pool for this ISA.</param>
/// <param name="ActiveCount">Number of lifters currently in use for this ISA.</param>
/// <param name="MaxPoolSize">Maximum pool size for this ISA.</param>
public sealed record B2R2IsaPoolStats(
int PooledCount,
int ActiveCount,
int MaxPoolSize);

View File

@@ -0,0 +1,106 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using B2R2;
using B2R2.FrontEnd;
using B2R2.FrontEnd.BinFile;
using B2R2.FrontEnd.BinLifter;
using System.Collections.Concurrent;
using System.Globalization;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
public sealed partial class B2R2LifterPool
{
private static string GetIsaKey(ISA isa) =>
string.Format(
CultureInfo.InvariantCulture,
"{0}-{1}",
isa.Arch.ToString().ToLowerInvariant(),
isa.WordSize == WordSize.Bit64 ? "64" : "32");
private static ISA? ParseIsaKey(string key)
{
var parts = key.Split('-');
if (parts.Length != 2) return null;
var archStr = parts[0].ToLowerInvariant();
var bits = parts[1];
var wordSize = bits == "64" ? WordSize.Bit64 : WordSize.Bit32;
return archStr switch
{
"intel" => new ISA(Architecture.Intel, wordSize),
"armv7" => new ISA(Architecture.ARMv7, wordSize),
"armv8" => new ISA(Architecture.ARMv8, wordSize),
"mips" => new ISA(Architecture.MIPS, wordSize),
"riscv" => new ISA(Architecture.RISCV, wordSize),
"ppc" => new ISA(Architecture.PPC, Endian.Big, wordSize),
"sparc" => new ISA(Architecture.SPARC, Endian.Big),
_ => (ISA?)null
};
}
private ConcurrentBag<PooledLifterEntry> GetOrCreatePool(string isaKey) =>
_pools.GetOrAdd(isaKey, _ => new ConcurrentBag<PooledLifterEntry>());
private static PooledLifterEntry CreateLifterEntry(ISA isa)
{
var nopBytes = CreateNopSled(isa, 64);
var binHandle = new BinHandle(nopBytes, isa, null, true);
var liftingUnit = binHandle.NewLiftingUnit();
return new PooledLifterEntry(binHandle, liftingUnit, DateTimeOffset.UtcNow);
}
private static byte[] CreateNopSled(ISA isa, int size)
{
var bytes = new byte[size];
switch (isa.Arch)
{
case Architecture.Intel:
Array.Fill(bytes, (byte)0x90);
break;
case Architecture.ARMv7:
case Architecture.ARMv8:
if (isa.WordSize == WordSize.Bit64)
{
for (var i = 0; i + 3 < size; i += 4)
{
bytes[i] = 0x1F;
bytes[i + 1] = 0x20;
bytes[i + 2] = 0x03;
bytes[i + 3] = 0xD5;
}
}
else
{
for (var i = 0; i + 3 < size; i += 4)
{
bytes[i] = 0x00;
bytes[i + 1] = 0xF0;
bytes[i + 2] = 0x20;
bytes[i + 3] = 0xE3;
}
}
break;
default:
Array.Fill(bytes, (byte)0x00);
break;
}
return bytes;
}
private void IncrementActive(string isaKey)
{
_activeCount.AddOrUpdate(isaKey, 1, (_, v) => v + 1);
}
private void DecrementActive(string isaKey)
{
_activeCount.AddOrUpdate(isaKey, 0, (_, v) => Math.Max(0, v - 1));
}
}

View File

@@ -0,0 +1,79 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using Microsoft.Extensions.Logging;
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
public sealed partial class B2R2LifterPool
{
/// <summary>
/// Gets the current pool statistics.
/// </summary>
public B2R2LifterPoolStats GetStats()
{
var isaStats = new Dictionary<string, B2R2IsaPoolStats>();
foreach (var kvp in _pools)
{
var isaKey = kvp.Key;
var poolSize = kvp.Value.Count;
var activeCount = _activeCount.GetValueOrDefault(isaKey, 0);
isaStats[isaKey] = new B2R2IsaPoolStats(
PooledCount: poolSize,
ActiveCount: activeCount,
MaxPoolSize: _options.MaxPoolSizePerIsa);
}
return new B2R2LifterPoolStats(
TotalPooledLifters: _pools.Values.Sum(b => b.Count),
TotalActiveLifters: _activeCount.Values.Sum(),
IsWarm: _warmed,
IsaStats: isaStats.ToImmutableDictionary());
}
/// <summary>
/// Warms the pool by preloading lifters for common ISAs.
/// </summary>
public void WarmPool()
{
if (!_options.EnableWarmPreload) return;
if (_warmed) return;
lock (_warmLock)
{
if (_warmed) return;
_logger.LogInformation(
"Warming B2R2 lifter pool for {IsaCount} ISAs",
_options.WarmPreloadIsas.Length);
foreach (var isaKey in _options.WarmPreloadIsas)
{
try
{
var isa = ParseIsaKey(isaKey);
if (isa is null)
{
_logger.LogWarning("Unknown ISA key for warm preload: {IsaKey}", isaKey);
continue;
}
var entry = CreateLifterEntry(isa);
var pool = GetOrCreatePool(GetIsaKey(isa));
pool.Add(entry);
_logger.LogDebug("Warmed lifter for ISA: {IsaKey}", isaKey);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to warm lifter for ISA: {IsaKey}", isaKey);
}
}
_warmed = true;
_logger.LogInformation("B2R2 lifter pool warm complete");
}
}
}

View File

@@ -2,103 +2,21 @@
// Licensed under BUSL-1.1. See LICENSE in the project root.
// Sprint: SPRINT_20260112_004_BINIDX_b2r2_lowuir_perf_cache (BINIDX-LIFTER-02)
// Task: Bounded lifter pool with warm preload per ISA
using B2R2;
using B2R2.FrontEnd;
using B2R2.FrontEnd.BinFile;
using B2R2.FrontEnd.BinLifter;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using System.Collections.Concurrent;
using System.Collections.Immutable;
using System.Globalization;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
/// <summary>
/// Configuration options for the B2R2 lifter pool.
/// </summary>
public sealed class B2R2LifterPoolOptions
{
/// <summary>
/// Configuration section name.
/// </summary>
public const string SectionName = "StellaOps:BinaryIndex:B2R2LifterPool";
/// <summary>
/// Maximum number of pooled lifters per ISA.
/// </summary>
public int MaxPoolSizePerIsa { get; set; } = 4;
/// <summary>
/// Whether to warm preload lifters for common ISAs at startup.
/// </summary>
public bool EnableWarmPreload { get; set; } = true;
/// <summary>
/// ISAs to warm preload at startup.
/// </summary>
public ImmutableArray<string> WarmPreloadIsas { get; set; } =
[
"intel-64",
"intel-32",
"armv8-64",
"armv7-32"
];
/// <summary>
/// Timeout for acquiring a lifter from the pool.
/// </summary>
public TimeSpan AcquireTimeout { get; set; } = TimeSpan.FromSeconds(5);
}
/// <summary>
/// Pooled B2R2 BinHandle and LiftingUnit for reuse across calls.
/// </summary>
public sealed class PooledLifter : IDisposable
{
private readonly B2R2LifterPool _pool;
private readonly ISA _isa;
private bool _disposed;
internal PooledLifter(
B2R2LifterPool pool,
ISA isa,
BinHandle binHandle,
LiftingUnit liftingUnit)
{
_pool = pool ?? throw new ArgumentNullException(nameof(pool));
_isa = isa;
BinHandle = binHandle ?? throw new ArgumentNullException(nameof(binHandle));
LiftingUnit = liftingUnit ?? throw new ArgumentNullException(nameof(liftingUnit));
}
/// <summary>
/// The B2R2 BinHandle for this lifter.
/// </summary>
public BinHandle BinHandle { get; }
/// <summary>
/// The B2R2 LiftingUnit for this lifter.
/// </summary>
public LiftingUnit LiftingUnit { get; }
/// <summary>
/// Returns the lifter to the pool.
/// </summary>
public void Dispose()
{
if (_disposed) return;
_disposed = true;
_pool.Return(this, _isa);
}
}
/// <summary>
/// Bounded pool of B2R2 lifters with warm preload per ISA.
/// Thread-safe and designed for reuse in high-throughput scenarios.
/// </summary>
public sealed class B2R2LifterPool : IDisposable
public sealed partial class B2R2LifterPool : IDisposable
{
private readonly ILogger<B2R2LifterPool> _logger;
private readonly B2R2LifterPoolOptions _options;
@@ -121,77 +39,6 @@ public sealed class B2R2LifterPool : IDisposable
_options = options?.Value ?? new B2R2LifterPoolOptions();
}
/// <summary>
/// Gets the current pool statistics.
/// </summary>
public B2R2LifterPoolStats GetStats()
{
var isaStats = new Dictionary<string, B2R2IsaPoolStats>();
foreach (var kvp in _pools)
{
var isaKey = kvp.Key;
var poolSize = kvp.Value.Count;
var activeCount = _activeCount.GetValueOrDefault(isaKey, 0);
isaStats[isaKey] = new B2R2IsaPoolStats(
PooledCount: poolSize,
ActiveCount: activeCount,
MaxPoolSize: _options.MaxPoolSizePerIsa);
}
return new B2R2LifterPoolStats(
TotalPooledLifters: _pools.Values.Sum(b => b.Count),
TotalActiveLifters: _activeCount.Values.Sum(),
IsWarm: _warmed,
IsaStats: isaStats.ToImmutableDictionary());
}
/// <summary>
/// Warms the pool by preloading lifters for common ISAs.
/// </summary>
public void WarmPool()
{
if (!_options.EnableWarmPreload) return;
if (_warmed) return;
lock (_warmLock)
{
if (_warmed) return;
_logger.LogInformation(
"Warming B2R2 lifter pool for {IsaCount} ISAs",
_options.WarmPreloadIsas.Length);
foreach (var isaKey in _options.WarmPreloadIsas)
{
try
{
var isa = ParseIsaKey(isaKey);
if (isa is null)
{
_logger.LogWarning("Unknown ISA key for warm preload: {IsaKey}", isaKey);
continue;
}
// Create and pool a lifter for this ISA
var entry = CreateLifterEntry(isa);
var pool = GetOrCreatePool(GetIsaKey(isa));
pool.Add(entry);
_logger.LogDebug("Warmed lifter for ISA: {IsaKey}", isaKey);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to warm lifter for ISA: {IsaKey}", isaKey);
}
}
_warmed = true;
_logger.LogInformation("B2R2 lifter pool warm complete");
}
}
/// <summary>
/// Acquires a lifter for the specified ISA.
/// </summary>
@@ -202,7 +49,6 @@ public sealed class B2R2LifterPool : IDisposable
var isaKey = GetIsaKey(isa);
var pool = GetOrCreatePool(isaKey);
// Try to get an existing lifter from the pool
if (pool.TryTake(out var entry))
{
IncrementActive(isaKey);
@@ -210,7 +56,6 @@ public sealed class B2R2LifterPool : IDisposable
return new PooledLifter(this, isa, entry.BinHandle, entry.LiftingUnit);
}
// Create a new lifter
var newEntry = CreateLifterEntry(isa);
IncrementActive(isaKey);
_logger.LogTrace("Created new lifter for {Isa}", isaKey);
@@ -227,7 +72,6 @@ public sealed class B2R2LifterPool : IDisposable
var pool = GetOrCreatePool(isaKey);
// Only return to pool if under limit
if (pool.Count < _options.MaxPoolSizePerIsa)
{
var entry = new PooledLifterEntry(
@@ -254,132 +98,4 @@ public sealed class B2R2LifterPool : IDisposable
_logger.LogInformation("B2R2 lifter pool disposed");
}
#region Private Helpers
private static string GetIsaKey(ISA isa) =>
string.Format(
CultureInfo.InvariantCulture,
"{0}-{1}",
isa.Arch.ToString().ToLowerInvariant(),
isa.WordSize == WordSize.Bit64 ? "64" : "32");
private static ISA? ParseIsaKey(string key)
{
var parts = key.Split('-');
if (parts.Length != 2) return null;
var archStr = parts[0].ToLowerInvariant();
var bits = parts[1];
var wordSize = bits == "64" ? WordSize.Bit64 : WordSize.Bit32;
return archStr switch
{
"intel" => new ISA(Architecture.Intel, wordSize),
"armv7" => new ISA(Architecture.ARMv7, wordSize),
"armv8" => new ISA(Architecture.ARMv8, wordSize),
"mips" => new ISA(Architecture.MIPS, wordSize),
"riscv" => new ISA(Architecture.RISCV, wordSize),
"ppc" => new ISA(Architecture.PPC, Endian.Big, wordSize),
"sparc" => new ISA(Architecture.SPARC, Endian.Big),
_ => (ISA?)null
};
}
private ConcurrentBag<PooledLifterEntry> GetOrCreatePool(string isaKey) =>
_pools.GetOrAdd(isaKey, _ => new ConcurrentBag<PooledLifterEntry>());
private static PooledLifterEntry CreateLifterEntry(ISA isa)
{
// Create a minimal BinHandle for the ISA
// Use a small NOP sled as placeholder code
var nopBytes = CreateNopSled(isa, 64);
var binHandle = new BinHandle(nopBytes, isa, null, true);
var liftingUnit = binHandle.NewLiftingUnit();
return new PooledLifterEntry(binHandle, liftingUnit, DateTimeOffset.UtcNow);
}
private static byte[] CreateNopSled(ISA isa, int size)
{
var bytes = new byte[size];
// Fill with architecture-appropriate NOP bytes
switch (isa.Arch)
{
case Architecture.Intel:
// x86/x64 NOP = 0x90
Array.Fill(bytes, (byte)0x90);
break;
case Architecture.ARMv7:
case Architecture.ARMv8:
// ARM NOP = 0x00000000 or 0x1F 20 03 D5 (ARM64)
if (isa.WordSize == WordSize.Bit64)
{
for (var i = 0; i + 3 < size; i += 4)
{
bytes[i] = 0x1F;
bytes[i + 1] = 0x20;
bytes[i + 2] = 0x03;
bytes[i + 3] = 0xD5;
}
}
else
{
// ARM32 NOP = 0xE320F000 (big endian) or 0x00 F0 20 E3 (little)
for (var i = 0; i + 3 < size; i += 4)
{
bytes[i] = 0x00;
bytes[i + 1] = 0xF0;
bytes[i + 2] = 0x20;
bytes[i + 3] = 0xE3;
}
}
break;
default:
// Generic zeroes for other architectures
Array.Fill(bytes, (byte)0x00);
break;
}
return bytes;
}
private void IncrementActive(string isaKey)
{
_activeCount.AddOrUpdate(isaKey, 1, (_, v) => v + 1);
}
private void DecrementActive(string isaKey)
{
_activeCount.AddOrUpdate(isaKey, 0, (_, v) => Math.Max(0, v - 1));
}
#endregion
}
/// <summary>
/// Statistics for the B2R2 lifter pool.
/// </summary>
/// <param name="TotalPooledLifters">Total lifters currently in pool.</param>
/// <param name="TotalActiveLifters">Total lifters currently in use.</param>
/// <param name="IsWarm">Whether the pool has been warmed.</param>
/// <param name="IsaStats">Per-ISA pool statistics.</param>
public sealed record B2R2LifterPoolStats(
int TotalPooledLifters,
int TotalActiveLifters,
bool IsWarm,
ImmutableDictionary<string, B2R2IsaPoolStats> IsaStats);
/// <summary>
/// Per-ISA pool statistics.
/// </summary>
/// <param name="PooledCount">Number of lifters in pool for this ISA.</param>
/// <param name="ActiveCount">Number of lifters currently in use for this ISA.</param>
/// <param name="MaxPoolSize">Maximum pool size for this ISA.</param>
public sealed record B2R2IsaPoolStats(
int PooledCount,
int ActiveCount,
int MaxPoolSize);

View File

@@ -0,0 +1,42 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
/// <summary>
/// Configuration options for the B2R2 lifter pool.
/// </summary>
public sealed class B2R2LifterPoolOptions
{
/// <summary>
/// Configuration section name.
/// </summary>
public const string SectionName = "StellaOps:BinaryIndex:B2R2LifterPool";
/// <summary>
/// Maximum number of pooled lifters per ISA.
/// </summary>
public int MaxPoolSizePerIsa { get; set; } = 4;
/// <summary>
/// Whether to warm preload lifters for common ISAs at startup.
/// </summary>
public bool EnableWarmPreload { get; set; } = true;
/// <summary>
/// ISAs to warm preload at startup.
/// </summary>
public ImmutableArray<string> WarmPreloadIsas { get; set; } =
[
"intel-64",
"intel-32",
"armv8-64",
"armv7-32"
];
/// <summary>
/// Timeout for acquiring a lifter from the pool.
/// </summary>
public TimeSpan AcquireTimeout { get; set; } = TimeSpan.FromSeconds(5);
}

View File

@@ -0,0 +1,18 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
/// <summary>
/// Statistics for the B2R2 lifter pool.
/// </summary>
/// <param name="TotalPooledLifters">Total lifters currently in pool.</param>
/// <param name="TotalActiveLifters">Total lifters currently in use.</param>
/// <param name="IsWarm">Whether the pool has been warmed.</param>
/// <param name="IsaStats">Per-ISA pool statistics.</param>
public sealed record B2R2LifterPoolStats(
int TotalPooledLifters,
int TotalActiveLifters,
bool IsWarm,
ImmutableDictionary<string, B2R2IsaPoolStats> IsaStats);

View File

@@ -0,0 +1,73 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using B2R2;
using StellaOps.BinaryIndex.Semantic;
using System.Collections.Immutable;
using System.Globalization;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
public sealed partial class B2R2LowUirLiftingService
{
private (List<IrStatement> Statements, List<IrBasicBlock> BasicBlocks) BuildStatementsAndBlocks(
IReadOnlyList<DisassembledInstruction> instructions,
ISA isa,
ulong startAddress,
int maxInstructions,
CancellationToken ct)
{
var statements = new List<IrStatement>();
var basicBlocks = new List<IrBasicBlock>();
var currentBlockStatements = new List<int>();
var blockStartAddress = startAddress;
var statementId = 0;
var blockId = 0;
foreach (var instr in instructions.Take(maxInstructions))
{
ct.ThrowIfCancellationRequested();
var liftedStatements = LiftInstructionToLowUir(isa, instr, ref statementId);
statements.AddRange(liftedStatements);
foreach (var stmt in liftedStatements)
{
currentBlockStatements.Add(stmt.Id);
}
if (IsBlockTerminator(instr))
{
var endAddress = instr.Address + (ulong)instr.RawBytes.Length;
basicBlocks.Add(CreateBasicBlock(blockId, blockStartAddress, endAddress, currentBlockStatements));
blockId++;
currentBlockStatements.Clear();
blockStartAddress = endAddress;
}
}
if (currentBlockStatements.Count > 0 && instructions.Count > 0)
{
var lastInstr = instructions[^1];
var endAddress = lastInstr.Address + (ulong)lastInstr.RawBytes.Length;
basicBlocks.Add(CreateBasicBlock(blockId, blockStartAddress, endAddress, currentBlockStatements));
}
return (statements, basicBlocks);
}
private static IrBasicBlock CreateBasicBlock(
int blockId,
ulong startAddress,
ulong endAddress,
IReadOnlyList<int> statementIds)
{
return new IrBasicBlock(
Id: blockId,
Label: string.Format(CultureInfo.InvariantCulture, "bb_{0}", blockId),
StartAddress: startAddress,
EndAddress: endAddress,
StatementIds: [.. statementIds],
Predecessors: ImmutableArray<int>.Empty,
Successors: ImmutableArray<int>.Empty);
}
}

View File

@@ -0,0 +1,57 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using StellaOps.BinaryIndex.Semantic;
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
public sealed partial class B2R2LowUirLiftingService
{
private static (ImmutableArray<IrBasicBlock> Blocks, ImmutableArray<CfgEdge> Edges) BuildCfgEdges(
ImmutableArray<IrBasicBlock> blocks)
{
if (blocks.Length == 0)
return (blocks, ImmutableArray<CfgEdge>.Empty);
var result = new IrBasicBlock[blocks.Length];
var edges = new List<CfgEdge>();
for (var i = 0; i < blocks.Length; i++)
{
var block = blocks[i];
var predecessors = new List<int>();
var successors = new List<int>();
if (i < blocks.Length - 1)
{
successors.Add(i + 1);
edges.Add(new CfgEdge(
SourceBlockId: i,
TargetBlockId: i + 1,
Kind: CfgEdgeKind.FallThrough,
Condition: null));
}
if (i > 0)
{
predecessors.Add(i - 1);
}
result[i] = block with
{
Predecessors = [.. predecessors.Distinct().OrderBy(x => x)],
Successors = [.. successors.Distinct().OrderBy(x => x)]
};
}
return ([.. result], [.. edges]);
}
private static ImmutableArray<int> FindExitBlocks(ImmutableArray<IrBasicBlock> blocks)
{
return blocks
.Where(b => b.Successors.Length == 0)
.Select(b => b.Id)
.ToImmutableArray();
}
}

View File

@@ -0,0 +1,57 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using StellaOps.BinaryIndex.Semantic;
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
public sealed partial class B2R2LowUirLiftingService
{
private static IrStatement CreateFallbackStatement(DisassembledInstruction instr, int id)
{
var sources = instr.Operands.Skip(1)
.Select(op => new IrOperand(
Kind: MapOperandType(op.Type),
Name: op.Text,
Value: op.Value,
BitSize: 64,
IsMemory: op.Type == OperandType.Memory))
.ToImmutableArray();
var dest = instr.Operands.Length > 0
? new IrOperand(
Kind: MapOperandType(instr.Operands[0].Type),
Name: instr.Operands[0].Text,
Value: instr.Operands[0].Value,
BitSize: 64,
IsMemory: instr.Operands[0].Type == OperandType.Memory)
: null;
return new IrStatement(
Id: id,
Address: instr.Address,
Kind: MapMnemonicToKind(instr.Mnemonic),
Operation: instr.Mnemonic,
Destination: dest,
Sources: sources,
Metadata: ImmutableDictionary<string, object>.Empty.Add("fallback", true));
}
private static SsaVariableKind MapOperandKindToSsaKind(IrOperandKind kind) => kind switch
{
IrOperandKind.Register => SsaVariableKind.Register,
IrOperandKind.Temporary => SsaVariableKind.Temporary,
IrOperandKind.Memory => SsaVariableKind.Memory,
IrOperandKind.Immediate => SsaVariableKind.Constant,
_ => SsaVariableKind.Temporary
};
private static IrOperandKind MapOperandType(OperandType type) => type switch
{
OperandType.Register => IrOperandKind.Register,
OperandType.Immediate => IrOperandKind.Immediate,
OperandType.Memory => IrOperandKind.Memory,
OperandType.Address => IrOperandKind.Label,
_ => IrOperandKind.Unknown
};
}

View File

@@ -0,0 +1,69 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using StellaOps.BinaryIndex.Semantic;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
public sealed partial class B2R2LowUirLiftingService
{
private static bool IsBlockTerminator(DisassembledInstruction instr)
{
var mnemonic = instr.Mnemonic.ToUpperInvariant();
return mnemonic.StartsWith("J", StringComparison.Ordinal) ||
mnemonic.StartsWith("B", StringComparison.Ordinal) ||
mnemonic == "RET" ||
mnemonic == "RETN" ||
mnemonic == "RETF" ||
mnemonic == "IRET" ||
mnemonic == "SYSRET" ||
mnemonic == "BLR" ||
mnemonic == "BX" ||
mnemonic == "JR";
}
private static IrStatementKind MapMnemonicToKind(string mnemonic)
{
var upper = mnemonic.ToUpperInvariant();
if (upper.StartsWith("MOV", StringComparison.Ordinal) ||
upper.StartsWith("LEA", StringComparison.Ordinal) ||
upper.StartsWith("LDR", StringComparison.Ordinal))
return IrStatementKind.Assign;
if (upper.StartsWith("ADD", StringComparison.Ordinal) ||
upper.StartsWith("SUB", StringComparison.Ordinal) ||
upper.StartsWith("MUL", StringComparison.Ordinal) ||
upper.StartsWith("DIV", StringComparison.Ordinal))
return IrStatementKind.BinaryOp;
if (upper.StartsWith("AND", StringComparison.Ordinal) ||
upper.StartsWith("OR", StringComparison.Ordinal) ||
upper.StartsWith("XOR", StringComparison.Ordinal) ||
upper.StartsWith("SH", StringComparison.Ordinal))
return IrStatementKind.BinaryOp;
if (upper.StartsWith("CMP", StringComparison.Ordinal) ||
upper.StartsWith("TEST", StringComparison.Ordinal))
return IrStatementKind.Compare;
if (upper.StartsWith("J", StringComparison.Ordinal) ||
upper.StartsWith("B", StringComparison.Ordinal))
return IrStatementKind.ConditionalJump;
if (upper == "CALL" || upper == "BL" || upper == "BLX")
return IrStatementKind.Call;
if (upper == "RET" || upper == "RETN" || upper == "BLR")
return IrStatementKind.Return;
if (upper.StartsWith("PUSH", StringComparison.Ordinal) ||
upper.StartsWith("POP", StringComparison.Ordinal) ||
upper.StartsWith("STR", StringComparison.Ordinal))
return IrStatementKind.Store;
if (upper == "NOP")
return IrStatementKind.Nop;
return IrStatementKind.Unknown;
}
}

View File

@@ -0,0 +1,22 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using B2R2;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
public sealed partial class B2R2LowUirLiftingService
{
private static ISA MapToB2R2Isa(CpuArchitecture arch) => arch switch
{
CpuArchitecture.X86 => new ISA(Architecture.Intel, WordSize.Bit32),
CpuArchitecture.X86_64 => new ISA(Architecture.Intel, WordSize.Bit64),
CpuArchitecture.ARM32 => new ISA(Architecture.ARMv7, WordSize.Bit32),
CpuArchitecture.ARM64 => new ISA(Architecture.ARMv8, WordSize.Bit64),
CpuArchitecture.MIPS32 => new ISA(Architecture.MIPS, WordSize.Bit32),
CpuArchitecture.MIPS64 => new ISA(Architecture.MIPS, WordSize.Bit64),
CpuArchitecture.RISCV64 => new ISA(Architecture.RISCV, WordSize.Bit64),
CpuArchitecture.PPC32 => new ISA(Architecture.PPC, Endian.Big, WordSize.Bit32),
CpuArchitecture.SPARC => new ISA(Architecture.SPARC, Endian.Big),
_ => throw new NotSupportedException($"Unsupported architecture: {arch}")
};
}

View File

@@ -0,0 +1,77 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Semantic;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
public sealed partial class B2R2LowUirLiftingService
{
/// <inheritdoc />
public Task<LiftedFunction> LiftToIrAsync(
IReadOnlyList<DisassembledInstruction> instructions,
string functionName,
ulong startAddress,
CpuArchitecture architecture,
LiftOptions? options = null,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(instructions);
ct.ThrowIfCancellationRequested();
options ??= LiftOptions.Default;
if (!SupportsArchitecture(architecture))
{
throw new NotSupportedException(
$"Architecture {architecture} is not supported for B2R2 LowUIR lifting.");
}
_logger.LogDebug(
"B2R2 LowUIR lifting {InstructionCount} instructions for function {FunctionName} ({Architecture})",
instructions.Count,
functionName,
architecture);
var lifted = BuildLiftedFunction(instructions, functionName, startAddress, architecture, options, ct);
_logger.LogDebug(
"B2R2 LowUIR lifted {StatementCount} statements in {BlockCount} blocks for {FunctionName}",
lifted.Statements.Length,
lifted.BasicBlocks.Length,
functionName);
return Task.FromResult(lifted);
}
private LiftedFunction BuildLiftedFunction(
IReadOnlyList<DisassembledInstruction> instructions,
string functionName,
ulong startAddress,
CpuArchitecture architecture,
LiftOptions options,
CancellationToken ct)
{
var isa = MapToB2R2Isa(architecture);
var maxInstructions = GetEffectiveMaxInstructions(options);
var (statements, basicBlocks) = BuildStatementsAndBlocks(instructions, isa, startAddress, maxInstructions, ct);
var (blocksWithEdges, edges) = BuildCfgEdges([.. basicBlocks]);
var cfg = new ControlFlowGraph(
EntryBlockId: blocksWithEdges.Length > 0 ? 0 : -1,
ExitBlockIds: FindExitBlocks(blocksWithEdges),
Edges: edges);
return new LiftedFunction(
Name: functionName,
Address: startAddress,
Statements: [.. statements],
BasicBlocks: blocksWithEdges,
Cfg: cfg);
}
private static int GetEffectiveMaxInstructions(LiftOptions options)
{
return options.MaxInstructions > 0 ? options.MaxInstructions : int.MaxValue;
}
}

View File

@@ -0,0 +1,57 @@
// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using B2R2;
using B2R2.FrontEnd;
using Microsoft.Extensions.Logging;
using StellaOps.BinaryIndex.Semantic;
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
public sealed partial class B2R2LowUirLiftingService
{
private List<IrStatement> LiftInstructionToLowUir(
ISA isa,
DisassembledInstruction instr,
ref int statementId)
{
var statements = new List<IrStatement>();
try
{
var bytes = instr.RawBytes.ToArray();
var binHandle = new BinHandle(bytes, isa, null, true);
var lifter = binHandle.NewLiftingUnit();
var liftResult = lifter.LiftInstruction(instr.Address);
if (liftResult == null || liftResult.Length == 0)
{
statements.Add(CreateFallbackStatement(instr, statementId++));
return statements;
}
foreach (var b2r2Stmt in liftResult)
{
var stmtType = b2r2Stmt.GetType().Name;
var (dest, sources) = ExtractOperandsFromB2R2Stmt(b2r2Stmt);
statements.Add(new IrStatement(
Id: statementId++,
Address: instr.Address,
Kind: MapB2R2StmtTypeToKind(stmtType),
Operation: stmtType,
Destination: dest,
Sources: sources,
Metadata: ImmutableDictionary<string, object>.Empty));
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to lift instruction at {Address:X} with B2R2 LowUIR", instr.Address);
statements.Add(CreateFallbackStatement(instr, statementId++));
}
return statements;
}
}

Some files were not shown because too many files have changed in this diff Show More