part #2
This commit is contained in:
@@ -23,6 +23,7 @@
|
||||
<ProjectReference Include="../__Libraries/StellaOps.BinaryIndex.VexBridge/StellaOps.BinaryIndex.VexBridge.csproj" />
|
||||
<ProjectReference Include="../__Libraries/StellaOps.BinaryIndex.GoldenSet/StellaOps.BinaryIndex.GoldenSet.csproj" />
|
||||
<ProjectReference Include="../__Libraries/StellaOps.BinaryIndex.Disassembly.B2R2/StellaOps.BinaryIndex.Disassembly.B2R2.csproj" />
|
||||
<ProjectReference Include="../../Authority/StellaOps.Authority/StellaOps.Auth.ServerIntegration/StellaOps.Auth.ServerIntegration.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed partial class AstComparisonEngine
|
||||
{
|
||||
private static void CompareNodes(AstNode a, AstNode b, List<CodeDifference> differences)
|
||||
{
|
||||
if (a.Type != b.Type)
|
||||
{
|
||||
differences.Add(new CodeDifference(
|
||||
DifferenceType.Modified,
|
||||
a,
|
||||
b,
|
||||
$"Node type changed: {a.Type} -> {b.Type}"));
|
||||
return;
|
||||
}
|
||||
|
||||
switch (a)
|
||||
{
|
||||
case VariableNode varA when b is VariableNode varB:
|
||||
if (varA.Name != varB.Name)
|
||||
{
|
||||
differences.Add(new CodeDifference(
|
||||
DifferenceType.Modified,
|
||||
a,
|
||||
b,
|
||||
$"Variable renamed: {varA.Name} -> {varB.Name}"));
|
||||
}
|
||||
break;
|
||||
|
||||
case ConstantNode constA when b is ConstantNode constB:
|
||||
if (constA.Value?.ToString() != constB.Value?.ToString())
|
||||
{
|
||||
differences.Add(new CodeDifference(
|
||||
DifferenceType.Modified,
|
||||
a,
|
||||
b,
|
||||
$"Constant changed: {constA.Value} -> {constB.Value}"));
|
||||
}
|
||||
break;
|
||||
|
||||
case BinaryOpNode binA when b is BinaryOpNode binB:
|
||||
if (binA.Operator != binB.Operator)
|
||||
{
|
||||
differences.Add(new CodeDifference(
|
||||
DifferenceType.Modified,
|
||||
a,
|
||||
b,
|
||||
$"Operator changed: {binA.Operator} -> {binB.Operator}"));
|
||||
}
|
||||
break;
|
||||
|
||||
case CallNode callA when b is CallNode callB:
|
||||
if (callA.FunctionName != callB.FunctionName)
|
||||
{
|
||||
differences.Add(new CodeDifference(
|
||||
DifferenceType.Modified,
|
||||
a,
|
||||
b,
|
||||
$"Function call changed: {callA.FunctionName} -> {callB.FunctionName}"));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
var minChildren = Math.Min(a.Children.Length, b.Children.Length);
|
||||
|
||||
for (var i = 0; i < minChildren; i++)
|
||||
{
|
||||
CompareNodes(a.Children[i], b.Children[i], differences);
|
||||
}
|
||||
|
||||
for (var i = minChildren; i < a.Children.Length; i++)
|
||||
{
|
||||
differences.Add(new CodeDifference(
|
||||
DifferenceType.Removed,
|
||||
a.Children[i],
|
||||
null,
|
||||
$"Node removed: {a.Children[i].Type}"));
|
||||
}
|
||||
|
||||
for (var i = minChildren; i < b.Children.Length; i++)
|
||||
{
|
||||
differences.Add(new CodeDifference(
|
||||
DifferenceType.Added,
|
||||
null,
|
||||
b.Children[i],
|
||||
$"Node added: {b.Children[i].Type}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed partial class AstComparisonEngine
|
||||
{
|
||||
private static EditOperations ComputeTreeEditOperations(AstNode a, AstNode b)
|
||||
{
|
||||
if (a.Type != b.Type)
|
||||
{
|
||||
return new EditOperations(0, 0, 1, 1);
|
||||
}
|
||||
|
||||
var childrenA = a.Children;
|
||||
var childrenB = b.Children;
|
||||
|
||||
var insertions = 0;
|
||||
var deletions = 0;
|
||||
var modifications = 0;
|
||||
|
||||
var minLen = Math.Min(childrenA.Length, childrenB.Length);
|
||||
insertions = childrenB.Length - minLen;
|
||||
deletions = childrenA.Length - minLen;
|
||||
|
||||
for (var i = 0; i < minLen; i++)
|
||||
{
|
||||
var childOps = ComputeTreeEditOperations(childrenA[i], childrenB[i]);
|
||||
insertions += childOps.Insertions;
|
||||
deletions += childOps.Deletions;
|
||||
modifications += childOps.Modifications;
|
||||
}
|
||||
|
||||
return new EditOperations(insertions, deletions, modifications, insertions + deletions + modifications);
|
||||
}
|
||||
|
||||
private readonly record struct EditOperations(int Insertions, int Deletions, int Modifications, int TotalOperations);
|
||||
}
|
||||
@@ -0,0 +1,104 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed partial class AstComparisonEngine
|
||||
{
|
||||
private static bool AreNodesRenamed(AstNode a, AstNode b)
|
||||
{
|
||||
if (a.Type != b.Type || a.Children.Length != b.Children.Length)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (a is VariableNode && b is VariableNode)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
for (var i = 0; i < a.Children.Length; i++)
|
||||
{
|
||||
if (!AreNodesRenamed(a.Children[i], b.Children[i]) &&
|
||||
!AreNodesIdentical(a.Children[i], b.Children[i]))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static bool AreOptimizationVariants(AstNode a, AstNode b)
|
||||
{
|
||||
if (a.Type == AstNodeType.For && b.Type == AstNodeType.Block)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (a is BinaryOpNode binA && b is BinaryOpNode binB)
|
||||
{
|
||||
if ((binA.Operator == "*" && binB.Operator == "<<") ||
|
||||
(binA.Operator == "/" && binB.Operator == ">>"))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (a.Type == AstNodeType.Call && b.Type == AstNodeType.Block)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static IEnumerable<AstNode> CollectNodes(AstNode root)
|
||||
{
|
||||
yield return root;
|
||||
foreach (var child in root.Children)
|
||||
{
|
||||
foreach (var node in CollectNodes(child))
|
||||
{
|
||||
yield return node;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static IEnumerable<SemanticEquivalence> FilterRedundantEquivalences(List<SemanticEquivalence> equivalences)
|
||||
{
|
||||
var result = new List<SemanticEquivalence>();
|
||||
|
||||
foreach (var eq in equivalences)
|
||||
{
|
||||
var isRedundant = equivalences.Any(other =>
|
||||
other != eq &&
|
||||
IsAncestor(other.NodeA, eq.NodeA) &&
|
||||
IsAncestor(other.NodeB, eq.NodeB));
|
||||
|
||||
if (!isRedundant)
|
||||
{
|
||||
result.Add(eq);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static bool IsAncestor(AstNode potential, AstNode node)
|
||||
{
|
||||
if (potential == node)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
foreach (var child in potential.Children)
|
||||
{
|
||||
if (child == node || IsAncestor(child, node))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed partial class AstComparisonEngine
|
||||
{
|
||||
private static SemanticEquivalence? CheckEquivalence(AstNode a, AstNode b)
|
||||
{
|
||||
if (a.Type != b.Type)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (AreNodesIdentical(a, b))
|
||||
{
|
||||
return new SemanticEquivalence(a, b, EquivalenceType.Identical, 1.0m, "Identical nodes");
|
||||
}
|
||||
|
||||
if (AreNodesRenamed(a, b))
|
||||
{
|
||||
return new SemanticEquivalence(a, b, EquivalenceType.Renamed, 0.95m, "Same structure with renamed identifiers");
|
||||
}
|
||||
|
||||
if (AreOptimizationVariants(a, b))
|
||||
{
|
||||
return new SemanticEquivalence(a, b, EquivalenceType.Optimized, 0.85m, "Optimization variant");
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static bool AreNodesIdentical(AstNode a, AstNode b)
|
||||
{
|
||||
if (a.Type != b.Type || a.Children.Length != b.Children.Length)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (a is ConstantNode constA && b is ConstantNode constB)
|
||||
{
|
||||
return constA.Value?.ToString() == constB.Value?.ToString();
|
||||
}
|
||||
|
||||
if (a is VariableNode varA && b is VariableNode varB)
|
||||
{
|
||||
return varA.Name == varB.Name;
|
||||
}
|
||||
|
||||
if (a is BinaryOpNode binA && b is BinaryOpNode binB)
|
||||
{
|
||||
if (binA.Operator != binB.Operator)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (a is CallNode callA && b is CallNode callB)
|
||||
{
|
||||
if (callA.FunctionName != callB.FunctionName)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
for (var i = 0; i < a.Children.Length; i++)
|
||||
{
|
||||
if (!AreNodesIdentical(a.Children[i], b.Children[i]))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,6 +1,5 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
@@ -8,7 +7,7 @@ namespace StellaOps.BinaryIndex.Decompiler;
|
||||
/// <summary>
|
||||
/// Engine for comparing AST structures using tree edit distance and semantic analysis.
|
||||
/// </summary>
|
||||
public sealed class AstComparisonEngine : IAstComparisonEngine
|
||||
public sealed partial class AstComparisonEngine : IAstComparisonEngine
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public decimal ComputeStructuralSimilarity(DecompiledAst a, DecompiledAst b)
|
||||
@@ -16,7 +15,6 @@ public sealed class AstComparisonEngine : IAstComparisonEngine
|
||||
ArgumentNullException.ThrowIfNull(a);
|
||||
ArgumentNullException.ThrowIfNull(b);
|
||||
|
||||
// Use normalized tree edit distance
|
||||
var editDistance = ComputeEditDistance(a, b);
|
||||
return 1.0m - editDistance.NormalizedDistance;
|
||||
}
|
||||
@@ -27,9 +25,7 @@ public sealed class AstComparisonEngine : IAstComparisonEngine
|
||||
ArgumentNullException.ThrowIfNull(a);
|
||||
ArgumentNullException.ThrowIfNull(b);
|
||||
|
||||
// Simplified Zhang-Shasha tree edit distance
|
||||
var operations = ComputeTreeEditOperations(a.Root, b.Root);
|
||||
|
||||
var totalNodes = Math.Max(a.NodeCount, b.NodeCount);
|
||||
var normalized = totalNodes > 0
|
||||
? (decimal)operations.TotalOperations / totalNodes
|
||||
@@ -50,8 +46,6 @@ public sealed class AstComparisonEngine : IAstComparisonEngine
|
||||
ArgumentNullException.ThrowIfNull(b);
|
||||
|
||||
var equivalences = new List<SemanticEquivalence>();
|
||||
|
||||
// Find equivalent subtrees
|
||||
var nodesA = CollectNodes(a.Root).ToList();
|
||||
var nodesB = CollectNodes(b.Root).ToList();
|
||||
|
||||
@@ -67,7 +61,6 @@ public sealed class AstComparisonEngine : IAstComparisonEngine
|
||||
}
|
||||
}
|
||||
|
||||
// Remove redundant equivalences (child nodes when parent is equivalent)
|
||||
return [.. FilterRedundantEquivalences(equivalences)];
|
||||
}
|
||||
|
||||
@@ -78,315 +71,8 @@ public sealed class AstComparisonEngine : IAstComparisonEngine
|
||||
ArgumentNullException.ThrowIfNull(b);
|
||||
|
||||
var differences = new List<CodeDifference>();
|
||||
|
||||
// Compare root structures
|
||||
CompareNodes(a.Root, b.Root, differences);
|
||||
|
||||
return [.. differences];
|
||||
}
|
||||
|
||||
private static EditOperations ComputeTreeEditOperations(AstNode a, AstNode b)
|
||||
{
|
||||
// Simplified tree comparison
|
||||
if (a.Type != b.Type)
|
||||
{
|
||||
return new EditOperations(0, 0, 1, 1);
|
||||
}
|
||||
|
||||
var childrenA = a.Children;
|
||||
var childrenB = b.Children;
|
||||
|
||||
var insertions = 0;
|
||||
var deletions = 0;
|
||||
var modifications = 0;
|
||||
|
||||
// Compare children using LCS-like approach
|
||||
var maxLen = Math.Max(childrenA.Length, childrenB.Length);
|
||||
var minLen = Math.Min(childrenA.Length, childrenB.Length);
|
||||
|
||||
insertions = childrenB.Length - minLen;
|
||||
deletions = childrenA.Length - minLen;
|
||||
|
||||
for (var i = 0; i < minLen; i++)
|
||||
{
|
||||
var childOps = ComputeTreeEditOperations(childrenA[i], childrenB[i]);
|
||||
insertions += childOps.Insertions;
|
||||
deletions += childOps.Deletions;
|
||||
modifications += childOps.Modifications;
|
||||
}
|
||||
|
||||
return new EditOperations(insertions, deletions, modifications, insertions + deletions + modifications);
|
||||
}
|
||||
|
||||
private static SemanticEquivalence? CheckEquivalence(AstNode a, AstNode b)
|
||||
{
|
||||
// Same type - potential equivalence
|
||||
if (a.Type != b.Type)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Check for identical
|
||||
if (AreNodesIdentical(a, b))
|
||||
{
|
||||
return new SemanticEquivalence(a, b, EquivalenceType.Identical, 1.0m, "Identical nodes");
|
||||
}
|
||||
|
||||
// Check for renamed (same structure, different names)
|
||||
if (AreNodesRenamed(a, b))
|
||||
{
|
||||
return new SemanticEquivalence(a, b, EquivalenceType.Renamed, 0.95m, "Same structure with renamed identifiers");
|
||||
}
|
||||
|
||||
// Check for optimization variants
|
||||
if (AreOptimizationVariants(a, b))
|
||||
{
|
||||
return new SemanticEquivalence(a, b, EquivalenceType.Optimized, 0.85m, "Optimization variant");
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static bool AreNodesIdentical(AstNode a, AstNode b)
|
||||
{
|
||||
if (a.Type != b.Type || a.Children.Length != b.Children.Length)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check node-specific equality
|
||||
if (a is ConstantNode constA && b is ConstantNode constB)
|
||||
{
|
||||
return constA.Value?.ToString() == constB.Value?.ToString();
|
||||
}
|
||||
|
||||
if (a is VariableNode varA && b is VariableNode varB)
|
||||
{
|
||||
return varA.Name == varB.Name;
|
||||
}
|
||||
|
||||
if (a is BinaryOpNode binA && b is BinaryOpNode binB)
|
||||
{
|
||||
if (binA.Operator != binB.Operator)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (a is CallNode callA && b is CallNode callB)
|
||||
{
|
||||
if (callA.FunctionName != callB.FunctionName)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check children recursively
|
||||
for (var i = 0; i < a.Children.Length; i++)
|
||||
{
|
||||
if (!AreNodesIdentical(a.Children[i], b.Children[i]))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static bool AreNodesRenamed(AstNode a, AstNode b)
|
||||
{
|
||||
if (a.Type != b.Type || a.Children.Length != b.Children.Length)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Same structure but variable/parameter names differ
|
||||
if (a is VariableNode && b is VariableNode)
|
||||
{
|
||||
return true; // Different name but same position = renamed
|
||||
}
|
||||
|
||||
// Check children have same structure
|
||||
for (var i = 0; i < a.Children.Length; i++)
|
||||
{
|
||||
if (!AreNodesRenamed(a.Children[i], b.Children[i]) &&
|
||||
!AreNodesIdentical(a.Children[i], b.Children[i]))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static bool AreOptimizationVariants(AstNode a, AstNode b)
|
||||
{
|
||||
// Detect common optimization patterns
|
||||
|
||||
// Loop unrolling: for loop vs repeated statements
|
||||
if (a.Type == AstNodeType.For && b.Type == AstNodeType.Block)
|
||||
{
|
||||
return true; // Might be unrolled
|
||||
}
|
||||
|
||||
// Strength reduction: multiplication vs addition
|
||||
if (a is BinaryOpNode binA && b is BinaryOpNode binB)
|
||||
{
|
||||
if ((binA.Operator == "*" && binB.Operator == "<<") ||
|
||||
(binA.Operator == "/" && binB.Operator == ">>"))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Inline expansion
|
||||
if (a.Type == AstNodeType.Call && b.Type == AstNodeType.Block)
|
||||
{
|
||||
return true; // Might be inlined
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static void CompareNodes(AstNode a, AstNode b, List<CodeDifference> differences)
|
||||
{
|
||||
if (a.Type != b.Type)
|
||||
{
|
||||
differences.Add(new CodeDifference(
|
||||
DifferenceType.Modified,
|
||||
a,
|
||||
b,
|
||||
$"Node type changed: {a.Type} -> {b.Type}"));
|
||||
return;
|
||||
}
|
||||
|
||||
// Compare specific node types
|
||||
switch (a)
|
||||
{
|
||||
case VariableNode varA when b is VariableNode varB:
|
||||
if (varA.Name != varB.Name)
|
||||
{
|
||||
differences.Add(new CodeDifference(
|
||||
DifferenceType.Modified,
|
||||
a,
|
||||
b,
|
||||
$"Variable renamed: {varA.Name} -> {varB.Name}"));
|
||||
}
|
||||
break;
|
||||
|
||||
case ConstantNode constA when b is ConstantNode constB:
|
||||
if (constA.Value?.ToString() != constB.Value?.ToString())
|
||||
{
|
||||
differences.Add(new CodeDifference(
|
||||
DifferenceType.Modified,
|
||||
a,
|
||||
b,
|
||||
$"Constant changed: {constA.Value} -> {constB.Value}"));
|
||||
}
|
||||
break;
|
||||
|
||||
case BinaryOpNode binA when b is BinaryOpNode binB:
|
||||
if (binA.Operator != binB.Operator)
|
||||
{
|
||||
differences.Add(new CodeDifference(
|
||||
DifferenceType.Modified,
|
||||
a,
|
||||
b,
|
||||
$"Operator changed: {binA.Operator} -> {binB.Operator}"));
|
||||
}
|
||||
break;
|
||||
|
||||
case CallNode callA when b is CallNode callB:
|
||||
if (callA.FunctionName != callB.FunctionName)
|
||||
{
|
||||
differences.Add(new CodeDifference(
|
||||
DifferenceType.Modified,
|
||||
a,
|
||||
b,
|
||||
$"Function call changed: {callA.FunctionName} -> {callB.FunctionName}"));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Compare children
|
||||
var minChildren = Math.Min(a.Children.Length, b.Children.Length);
|
||||
|
||||
for (var i = 0; i < minChildren; i++)
|
||||
{
|
||||
CompareNodes(a.Children[i], b.Children[i], differences);
|
||||
}
|
||||
|
||||
// Handle added/removed children
|
||||
for (var i = minChildren; i < a.Children.Length; i++)
|
||||
{
|
||||
differences.Add(new CodeDifference(
|
||||
DifferenceType.Removed,
|
||||
a.Children[i],
|
||||
null,
|
||||
$"Node removed: {a.Children[i].Type}"));
|
||||
}
|
||||
|
||||
for (var i = minChildren; i < b.Children.Length; i++)
|
||||
{
|
||||
differences.Add(new CodeDifference(
|
||||
DifferenceType.Added,
|
||||
null,
|
||||
b.Children[i],
|
||||
$"Node added: {b.Children[i].Type}"));
|
||||
}
|
||||
}
|
||||
|
||||
private static IEnumerable<AstNode> CollectNodes(AstNode root)
|
||||
{
|
||||
yield return root;
|
||||
foreach (var child in root.Children)
|
||||
{
|
||||
foreach (var node in CollectNodes(child))
|
||||
{
|
||||
yield return node;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static IEnumerable<SemanticEquivalence> FilterRedundantEquivalences(
|
||||
List<SemanticEquivalence> equivalences)
|
||||
{
|
||||
// Keep only top-level equivalences
|
||||
var result = new List<SemanticEquivalence>();
|
||||
|
||||
foreach (var eq in equivalences)
|
||||
{
|
||||
var isRedundant = equivalences.Any(other =>
|
||||
other != eq &&
|
||||
IsAncestor(other.NodeA, eq.NodeA) &&
|
||||
IsAncestor(other.NodeB, eq.NodeB));
|
||||
|
||||
if (!isRedundant)
|
||||
{
|
||||
result.Add(eq);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static bool IsAncestor(AstNode potential, AstNode node)
|
||||
{
|
||||
if (potential == node)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
foreach (var child in potential.Children)
|
||||
{
|
||||
if (child == node || IsAncestor(child, node))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private readonly record struct EditOperations(int Insertions, int Deletions, int Modifications, int TotalOperations);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
/// <summary>
|
||||
/// Abstract syntax tree node.
|
||||
/// </summary>
|
||||
public abstract record AstNode(
|
||||
AstNodeType Type,
|
||||
ImmutableArray<AstNode> Children,
|
||||
SourceLocation? Location);
|
||||
|
||||
/// <summary>
|
||||
/// Types of AST nodes.
|
||||
/// </summary>
|
||||
public enum AstNodeType
|
||||
{
|
||||
// Structure
|
||||
Function,
|
||||
Block,
|
||||
Parameter,
|
||||
|
||||
// Control flow
|
||||
If,
|
||||
While,
|
||||
For,
|
||||
DoWhile,
|
||||
Switch,
|
||||
Case,
|
||||
Default,
|
||||
Return,
|
||||
Break,
|
||||
Continue,
|
||||
Goto,
|
||||
Label,
|
||||
|
||||
// Expressions
|
||||
Assignment,
|
||||
BinaryOp,
|
||||
UnaryOp,
|
||||
TernaryOp,
|
||||
Call,
|
||||
Cast,
|
||||
Sizeof,
|
||||
|
||||
// Operands
|
||||
Variable,
|
||||
Constant,
|
||||
StringLiteral,
|
||||
ArrayAccess,
|
||||
FieldAccess,
|
||||
PointerDeref,
|
||||
AddressOf,
|
||||
|
||||
// Declarations
|
||||
VariableDecl,
|
||||
TypeDef
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed record IfNode(
|
||||
AstNode Condition,
|
||||
AstNode ThenBranch,
|
||||
AstNode? ElseBranch,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.If, ElseBranch is null ? [Condition, ThenBranch] : [Condition, ThenBranch, ElseBranch], Location);
|
||||
|
||||
public sealed record WhileNode(
|
||||
AstNode Condition,
|
||||
AstNode Body,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.While, [Condition, Body], Location);
|
||||
|
||||
public sealed record ForNode(
|
||||
AstNode? Init,
|
||||
AstNode? Condition,
|
||||
AstNode? Update,
|
||||
AstNode Body,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.For, [Init ?? EmptyNode.Instance, Condition ?? EmptyNode.Instance, Update ?? EmptyNode.Instance, Body], Location);
|
||||
|
||||
public sealed record ReturnNode(
|
||||
AstNode? Value,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.Return, Value is null ? [] : [Value], Location);
|
||||
@@ -0,0 +1,42 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed record CallNode(
|
||||
string FunctionName,
|
||||
ImmutableArray<AstNode> Arguments,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.Call, Arguments, Location);
|
||||
|
||||
public sealed record VariableNode(
|
||||
string Name,
|
||||
string? DataType,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.Variable, [], Location);
|
||||
|
||||
public sealed record ConstantNode(
|
||||
object Value,
|
||||
string DataType,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.Constant, [], Location);
|
||||
|
||||
public sealed record ArrayAccessNode(
|
||||
AstNode Array,
|
||||
AstNode Index,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.ArrayAccess, [Array, Index], Location);
|
||||
|
||||
public sealed record FieldAccessNode(
|
||||
AstNode Object,
|
||||
string FieldName,
|
||||
bool IsPointer,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.FieldAccess, [Object], Location);
|
||||
|
||||
public sealed record CastNode(
|
||||
AstNode Expression,
|
||||
string TargetType,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.Cast, [Expression], Location);
|
||||
@@ -0,0 +1,24 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed record AssignmentNode(
|
||||
AstNode Target,
|
||||
AstNode Value,
|
||||
string Operator,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.Assignment, [Target, Value], Location);
|
||||
|
||||
public sealed record BinaryOpNode(
|
||||
AstNode Left,
|
||||
AstNode Right,
|
||||
string Operator,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.BinaryOp, [Left, Right], Location);
|
||||
|
||||
public sealed record UnaryOpNode(
|
||||
AstNode Operand,
|
||||
string Operator,
|
||||
bool IsPrefix,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.UnaryOp, [Operand], Location);
|
||||
@@ -0,0 +1,30 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed record FunctionNode(
|
||||
string Name,
|
||||
string ReturnType,
|
||||
ImmutableArray<ParameterNode> Parameters,
|
||||
BlockNode Body,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.Function, [Body, .. Parameters], Location);
|
||||
|
||||
public sealed record ParameterNode(
|
||||
string Name,
|
||||
string DataType,
|
||||
int Index,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.Parameter, [], Location);
|
||||
|
||||
public sealed record BlockNode(
|
||||
ImmutableArray<AstNode> Statements,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.Block, Statements, Location);
|
||||
|
||||
public sealed record EmptyNode() : AstNode(AstNodeType.Block, [], null)
|
||||
{
|
||||
public static EmptyNode Instance { get; } = new();
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
/// <summary>
|
||||
/// A recognized code pattern.
|
||||
/// </summary>
|
||||
public sealed record AstPattern(
|
||||
PatternType Type,
|
||||
AstNode Node,
|
||||
PatternMetadata? Metadata);
|
||||
|
||||
/// <summary>
|
||||
/// Types of code patterns.
|
||||
/// </summary>
|
||||
public enum PatternType
|
||||
{
|
||||
// Loops
|
||||
CountedLoop,
|
||||
ConditionalLoop,
|
||||
InfiniteLoop,
|
||||
LoopUnrolled,
|
||||
|
||||
// Branches
|
||||
IfElseChain,
|
||||
SwitchTable,
|
||||
ShortCircuit,
|
||||
|
||||
// Memory
|
||||
MemoryAllocation,
|
||||
MemoryDeallocation,
|
||||
BufferOperation,
|
||||
StackBuffer,
|
||||
|
||||
// Error handling
|
||||
ErrorCheck,
|
||||
NullCheck,
|
||||
BoundsCheck,
|
||||
|
||||
// Idioms
|
||||
StringOperation,
|
||||
MathOperation,
|
||||
BitwiseOperation,
|
||||
TableLookup
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Metadata about a recognized pattern.
|
||||
/// </summary>
|
||||
public sealed record PatternMetadata(
|
||||
string Description,
|
||||
decimal Confidence,
|
||||
ImmutableDictionary<string, string>? Properties);
|
||||
@@ -0,0 +1,25 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
/// <summary>
|
||||
/// A difference between two pieces of code.
|
||||
/// </summary>
|
||||
public sealed record CodeDifference(
|
||||
DifferenceType Type,
|
||||
AstNode? NodeA,
|
||||
AstNode? NodeB,
|
||||
string Description);
|
||||
|
||||
/// <summary>
|
||||
/// Types of code differences.
|
||||
/// </summary>
|
||||
public enum DifferenceType
|
||||
{
|
||||
Added,
|
||||
Removed,
|
||||
Modified,
|
||||
Reordered,
|
||||
TypeChanged,
|
||||
OptimizationVariant
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed partial class CodeNormalizer
|
||||
{
|
||||
private static AstNode NormalizeChildren(
|
||||
AstNode node,
|
||||
NormalizationOptions options,
|
||||
Dictionary<string, string> varMap,
|
||||
ref int varIndex)
|
||||
{
|
||||
if (node.Children.IsDefaultOrEmpty)
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
var builder = ImmutableArray.CreateBuilder<AstNode>(node.Children.Length);
|
||||
foreach (var child in node.Children)
|
||||
{
|
||||
builder.Add(NormalizeNode(child, options, varMap, ref varIndex));
|
||||
}
|
||||
|
||||
return node with { Children = builder.MoveToImmutable() };
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed partial class CodeNormalizer
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public DecompiledAst NormalizeAst(DecompiledAst ast, NormalizationOptions? options = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(ast);
|
||||
|
||||
options ??= NormalizationOptions.Default;
|
||||
|
||||
var varIndex = 0;
|
||||
var varMap = new Dictionary<string, string>();
|
||||
|
||||
var normalizedRoot = NormalizeNode(ast.Root, options, varMap, ref varIndex);
|
||||
|
||||
return new DecompiledAst(
|
||||
normalizedRoot,
|
||||
ast.NodeCount,
|
||||
ast.Depth,
|
||||
ast.Patterns);
|
||||
}
|
||||
|
||||
private static AstNode NormalizeNode(
|
||||
AstNode node,
|
||||
NormalizationOptions options,
|
||||
Dictionary<string, string> varMap,
|
||||
ref int varIndex)
|
||||
{
|
||||
return node switch
|
||||
{
|
||||
VariableNode varNode when options.NormalizeVariables =>
|
||||
NormalizeVariableNode(varNode, varMap, ref varIndex),
|
||||
CallNode callNode when options.NormalizeFunctionCalls =>
|
||||
NormalizeCallNode(callNode, options, varMap, ref varIndex),
|
||||
ConstantNode constNode when options.NormalizeConstants =>
|
||||
NormalizeConstantNode(constNode),
|
||||
_ => NormalizeChildren(node, options, varMap, ref varIndex)
|
||||
};
|
||||
}
|
||||
|
||||
private static AstNode NormalizeVariableNode(
|
||||
VariableNode node,
|
||||
Dictionary<string, string> varMap,
|
||||
ref int varIndex)
|
||||
{
|
||||
if (IsKeywordOrType(node.Name))
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
if (!varMap.TryGetValue(node.Name, out var canonical))
|
||||
{
|
||||
canonical = $"var_{varIndex++}";
|
||||
varMap[node.Name] = canonical;
|
||||
}
|
||||
|
||||
return node with { Name = canonical };
|
||||
}
|
||||
|
||||
private static AstNode NormalizeCallNode(
|
||||
CallNode node,
|
||||
NormalizationOptions options,
|
||||
Dictionary<string, string> varMap,
|
||||
ref int varIndex)
|
||||
{
|
||||
var funcName = node.FunctionName;
|
||||
|
||||
if (options.KnownFunctions?.Contains(funcName) != true &&
|
||||
!IsStandardLibraryFunction(funcName))
|
||||
{
|
||||
funcName = $"func_{funcName.GetHashCode():X8}";
|
||||
}
|
||||
|
||||
var normalizedArgs = new List<AstNode>(node.Arguments.Length);
|
||||
foreach (var arg in node.Arguments)
|
||||
{
|
||||
normalizedArgs.Add(NormalizeNode(arg, options, varMap, ref varIndex));
|
||||
}
|
||||
|
||||
return new CallNode(funcName, [.. normalizedArgs], node.Location);
|
||||
}
|
||||
|
||||
private static AstNode NormalizeConstantNode(ConstantNode node)
|
||||
{
|
||||
if (node.Value is long or int or short or byte)
|
||||
{
|
||||
return node with { Value = "CONST_INT" };
|
||||
}
|
||||
|
||||
if (node.Value is double or float or decimal)
|
||||
{
|
||||
return node with { Value = "CONST_FLOAT" };
|
||||
}
|
||||
|
||||
if (node.Value is string)
|
||||
{
|
||||
return node with { Value = "CONST_STR" };
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed partial class CodeNormalizer
|
||||
{
|
||||
private static readonly ImmutableHashSet<string> _cKeywords = ImmutableHashSet.Create(
|
||||
"auto", "break", "case", "char", "const", "continue", "default", "do",
|
||||
"double", "else", "enum", "extern", "float", "for", "goto", "if",
|
||||
"int", "long", "register", "return", "short", "signed", "sizeof", "static",
|
||||
"struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while",
|
||||
// Common Ghidra types
|
||||
"undefined", "undefined1", "undefined2", "undefined4", "undefined8",
|
||||
"byte", "word", "dword", "qword", "bool", "uchar", "ushort", "uint", "ulong",
|
||||
"int8_t", "int16_t", "int32_t", "int64_t", "uint8_t", "uint16_t", "uint32_t", "uint64_t",
|
||||
"size_t", "ssize_t", "ptrdiff_t", "intptr_t", "uintptr_t",
|
||||
// Common function names to preserve
|
||||
"NULL", "true", "false"
|
||||
);
|
||||
|
||||
private static bool IsKeywordOrType(string name)
|
||||
{
|
||||
return _cKeywords.Contains(name);
|
||||
}
|
||||
|
||||
private static bool IsStandardLibraryFunction(string name)
|
||||
{
|
||||
return name switch
|
||||
{
|
||||
"malloc" or "calloc" or "realloc" or "free" or "memcpy" or "memmove" or "memset" or "memcmp" => true,
|
||||
"strlen" or "strcpy" or "strncpy" or "strcat" or "strncat" or "strcmp" or "strncmp" or "strchr" or "strrchr" or "strstr" => true,
|
||||
"printf" or "fprintf" or "sprintf" or "snprintf" or "scanf" or "fscanf" or "sscanf" => true,
|
||||
"fopen" or "fclose" or "fread" or "fwrite" or "fseek" or "ftell" or "fflush" => true,
|
||||
"puts" or "fputs" or "gets" or "fgets" or "putchar" or "getchar" => true,
|
||||
"abs" or "labs" or "llabs" or "fabs" or "sqrt" or "pow" or "sin" or "cos" or "tan" or "log" or "exp" => true,
|
||||
"exit" or "abort" or "atexit" or "atoi" or "atol" or "atof" or "strtol" or "strtoul" or "strtod" => true,
|
||||
"assert" or "errno" => true,
|
||||
_ => false
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed partial class CodeNormalizer
|
||||
{
|
||||
[GeneratedRegex(@"//[^\n]*")]
|
||||
private static partial Regex SingleLineCommentRegex();
|
||||
|
||||
[GeneratedRegex(@"/\*[\s\S]*?\*/")]
|
||||
private static partial Regex MultiLineCommentRegex();
|
||||
|
||||
[GeneratedRegex(@"\b([a-zA-Z_][a-zA-Z0-9_]*)\b")]
|
||||
private static partial Regex IdentifierRegex();
|
||||
|
||||
[GeneratedRegex(@"\b([a-zA-Z_][a-zA-Z0-9_]*)\s*\(")]
|
||||
private static partial Regex FunctionCallRegex();
|
||||
|
||||
[GeneratedRegex(@"0[xX][0-9a-fA-F]+")]
|
||||
private static partial Regex HexConstantRegex();
|
||||
|
||||
[GeneratedRegex(@"\b[0-9]{4,}\b")]
|
||||
private static partial Regex LargeDecimalRegex();
|
||||
|
||||
[GeneratedRegex(@"""(?:[^""\\]|\\.)*""")]
|
||||
private static partial Regex StringLiteralRegex();
|
||||
|
||||
[GeneratedRegex(@"[ \t]+")]
|
||||
private static partial Regex MultipleWhitespaceRegex();
|
||||
|
||||
[GeneratedRegex(@"\s*([+\-*/%=<>!&|^~?:;,{}()\[\]])\s*")]
|
||||
private static partial Regex WhitespaceAroundOperatorsRegex();
|
||||
|
||||
[GeneratedRegex(@"[ \t]+\n")]
|
||||
private static partial Regex TrailingWhitespaceRegex();
|
||||
|
||||
[GeneratedRegex(@"^([a-zA-Z_][a-zA-Z0-9_]*)\s*=")]
|
||||
private static partial Regex AssignmentTargetRegex();
|
||||
|
||||
[GeneratedRegex(@"^([a-zA-Z_][a-zA-Z0-9_]*)\s*\(")]
|
||||
private static partial Regex FunctionNameRegex();
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Text;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed partial class CodeNormalizer
|
||||
{
|
||||
private static string SortIndependentStatements(string code)
|
||||
{
|
||||
var lines = code.Split('\n', StringSplitOptions.RemoveEmptyEntries);
|
||||
var result = new StringBuilder();
|
||||
|
||||
var blockDepth = 0;
|
||||
var currentBlock = new List<string>();
|
||||
|
||||
foreach (var line in lines)
|
||||
{
|
||||
var trimmed = line.Trim();
|
||||
|
||||
blockDepth += trimmed.Count(c => c == '{');
|
||||
blockDepth -= trimmed.Count(c => c == '}');
|
||||
|
||||
if (blockDepth == 1 && !trimmed.Contains('{') && !trimmed.Contains('}'))
|
||||
{
|
||||
currentBlock.Add(trimmed);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (currentBlock.Count > 0)
|
||||
{
|
||||
var sorted = SortStatements(currentBlock);
|
||||
foreach (var stmt in sorted)
|
||||
{
|
||||
result.AppendLine(stmt);
|
||||
}
|
||||
currentBlock.Clear();
|
||||
}
|
||||
|
||||
result.AppendLine(line);
|
||||
}
|
||||
}
|
||||
|
||||
if (currentBlock.Count > 0)
|
||||
{
|
||||
var sorted = SortStatements(currentBlock);
|
||||
foreach (var stmt in sorted)
|
||||
{
|
||||
result.AppendLine(stmt);
|
||||
}
|
||||
}
|
||||
|
||||
return result.ToString().Trim();
|
||||
}
|
||||
|
||||
private static List<string> SortStatements(List<string> statements)
|
||||
{
|
||||
return statements
|
||||
.OrderBy(s => GetStatementSortKey(s), StringComparer.Ordinal)
|
||||
.ToList();
|
||||
}
|
||||
|
||||
private static string GetStatementSortKey(string statement)
|
||||
{
|
||||
var trimmed = statement.Trim();
|
||||
|
||||
var assignMatch = AssignmentTargetRegex().Match(trimmed);
|
||||
if (assignMatch.Success)
|
||||
{
|
||||
return $"A_{assignMatch.Groups[1].Value}";
|
||||
}
|
||||
|
||||
var callMatch = FunctionNameRegex().Match(trimmed);
|
||||
if (callMatch.Success)
|
||||
{
|
||||
return $"C_{callMatch.Groups[1].Value}";
|
||||
}
|
||||
|
||||
return $"Z_{trimmed}";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed partial class CodeNormalizer
|
||||
{
|
||||
private static string RemoveComments(string code)
|
||||
{
|
||||
code = SingleLineCommentRegex().Replace(code, string.Empty);
|
||||
code = MultiLineCommentRegex().Replace(code, string.Empty);
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
private static string NormalizeVariableNames(string code, ImmutableHashSet<string>? knownFunctions)
|
||||
{
|
||||
var varIndex = 0;
|
||||
var varMap = new Dictionary<string, string>();
|
||||
|
||||
return IdentifierRegex().Replace(code, match =>
|
||||
{
|
||||
var name = match.Value;
|
||||
|
||||
if (IsKeywordOrType(name))
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
if (knownFunctions?.Contains(name) == true)
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
if (IsStandardLibraryFunction(name))
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
if (!varMap.TryGetValue(name, out var canonical))
|
||||
{
|
||||
canonical = $"var_{varIndex++}";
|
||||
varMap[name] = canonical;
|
||||
}
|
||||
|
||||
return canonical;
|
||||
});
|
||||
}
|
||||
|
||||
private static string NormalizeFunctionCalls(string code, ImmutableHashSet<string>? knownFunctions)
|
||||
{
|
||||
return FunctionCallRegex().Replace(code, match =>
|
||||
{
|
||||
var funcName = match.Groups[1].Value;
|
||||
|
||||
if (knownFunctions?.Contains(funcName) == true)
|
||||
{
|
||||
return match.Value;
|
||||
}
|
||||
|
||||
if (IsStandardLibraryFunction(funcName))
|
||||
{
|
||||
return match.Value;
|
||||
}
|
||||
|
||||
return $"func_{funcName.GetHashCode():X8}(";
|
||||
});
|
||||
}
|
||||
|
||||
private static string NormalizeConstants(string code)
|
||||
{
|
||||
code = HexConstantRegex().Replace(code, "CONST_HEX");
|
||||
code = LargeDecimalRegex().Replace(code, "CONST_INT");
|
||||
code = StringLiteralRegex().Replace(code, "CONST_STR");
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
private static string NormalizeWhitespace(string code)
|
||||
{
|
||||
code = MultipleWhitespaceRegex().Replace(code, " ");
|
||||
code = WhitespaceAroundOperatorsRegex().Replace(code, "$1");
|
||||
code = code.Replace("\r\n", "\n").Replace("\r", "\n");
|
||||
code = TrailingWhitespaceRegex().Replace(code, "\n");
|
||||
|
||||
return code.Trim();
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,7 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
|
||||
using System.Collections.Immutable;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
@@ -13,20 +10,6 @@ namespace StellaOps.BinaryIndex.Decompiler;
|
||||
/// </summary>
|
||||
public sealed partial class CodeNormalizer : ICodeNormalizer
|
||||
{
|
||||
private static readonly ImmutableHashSet<string> CKeywords = ImmutableHashSet.Create(
|
||||
"auto", "break", "case", "char", "const", "continue", "default", "do",
|
||||
"double", "else", "enum", "extern", "float", "for", "goto", "if",
|
||||
"int", "long", "register", "return", "short", "signed", "sizeof", "static",
|
||||
"struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while",
|
||||
// Common Ghidra types
|
||||
"undefined", "undefined1", "undefined2", "undefined4", "undefined8",
|
||||
"byte", "word", "dword", "qword", "bool", "uchar", "ushort", "uint", "ulong",
|
||||
"int8_t", "int16_t", "int32_t", "int64_t", "uint8_t", "uint16_t", "uint32_t", "uint64_t",
|
||||
"size_t", "ssize_t", "ptrdiff_t", "intptr_t", "uintptr_t",
|
||||
// Common function names to preserve
|
||||
"NULL", "true", "false"
|
||||
);
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Normalize(string code, NormalizationOptions? options = null)
|
||||
{
|
||||
@@ -36,34 +19,28 @@ public sealed partial class CodeNormalizer : ICodeNormalizer
|
||||
|
||||
var normalized = code;
|
||||
|
||||
// 1. Remove comments
|
||||
normalized = RemoveComments(normalized);
|
||||
|
||||
// 2. Normalize variable names
|
||||
if (options.NormalizeVariables)
|
||||
{
|
||||
normalized = NormalizeVariableNames(normalized, options.KnownFunctions);
|
||||
}
|
||||
|
||||
// 3. Normalize function calls
|
||||
if (options.NormalizeFunctionCalls)
|
||||
{
|
||||
normalized = NormalizeFunctionCalls(normalized, options.KnownFunctions);
|
||||
}
|
||||
|
||||
// 4. Normalize constants
|
||||
if (options.NormalizeConstants)
|
||||
{
|
||||
normalized = NormalizeConstants(normalized);
|
||||
}
|
||||
|
||||
// 5. Normalize whitespace
|
||||
if (options.NormalizeWhitespace)
|
||||
{
|
||||
normalized = NormalizeWhitespace(normalized);
|
||||
}
|
||||
|
||||
// 6. Sort independent statements (within blocks)
|
||||
if (options.SortIndependentStatements)
|
||||
{
|
||||
normalized = SortIndependentStatements(normalized);
|
||||
@@ -77,458 +54,15 @@ public sealed partial class CodeNormalizer : ICodeNormalizer
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrEmpty(code);
|
||||
|
||||
// Normalize with full normalization for hashing
|
||||
var normalized = Normalize(code, new NormalizationOptions
|
||||
{
|
||||
NormalizeVariables = true,
|
||||
NormalizeFunctionCalls = true,
|
||||
NormalizeConstants = false, // Keep constants for semantic identity
|
||||
NormalizeConstants = false,
|
||||
NormalizeWhitespace = true,
|
||||
SortIndependentStatements = true
|
||||
});
|
||||
|
||||
return SHA256.HashData(Encoding.UTF8.GetBytes(normalized));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public DecompiledAst NormalizeAst(DecompiledAst ast, NormalizationOptions? options = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(ast);
|
||||
|
||||
options ??= NormalizationOptions.Default;
|
||||
|
||||
var varIndex = 0;
|
||||
var varMap = new Dictionary<string, string>();
|
||||
|
||||
var normalizedRoot = NormalizeNode(ast.Root, options, varMap, ref varIndex);
|
||||
|
||||
return new DecompiledAst(
|
||||
normalizedRoot,
|
||||
ast.NodeCount,
|
||||
ast.Depth,
|
||||
ast.Patterns);
|
||||
}
|
||||
|
||||
private static AstNode NormalizeNode(
|
||||
AstNode node,
|
||||
NormalizationOptions options,
|
||||
Dictionary<string, string> varMap,
|
||||
ref int varIndex)
|
||||
{
|
||||
return node switch
|
||||
{
|
||||
VariableNode varNode when options.NormalizeVariables =>
|
||||
NormalizeVariableNode(varNode, varMap, ref varIndex),
|
||||
|
||||
CallNode callNode when options.NormalizeFunctionCalls =>
|
||||
NormalizeCallNode(callNode, options, varMap, ref varIndex),
|
||||
|
||||
ConstantNode constNode when options.NormalizeConstants =>
|
||||
NormalizeConstantNode(constNode),
|
||||
|
||||
_ => NormalizeChildren(node, options, varMap, ref varIndex)
|
||||
};
|
||||
}
|
||||
|
||||
private static AstNode NormalizeVariableNode(
|
||||
VariableNode node,
|
||||
Dictionary<string, string> varMap,
|
||||
ref int varIndex)
|
||||
{
|
||||
if (IsKeywordOrType(node.Name))
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
if (!varMap.TryGetValue(node.Name, out var canonical))
|
||||
{
|
||||
canonical = $"var_{varIndex++}";
|
||||
varMap[node.Name] = canonical;
|
||||
}
|
||||
|
||||
return node with { Name = canonical };
|
||||
}
|
||||
|
||||
private static AstNode NormalizeCallNode(
|
||||
CallNode node,
|
||||
NormalizationOptions options,
|
||||
Dictionary<string, string> varMap,
|
||||
ref int varIndex)
|
||||
{
|
||||
var funcName = node.FunctionName;
|
||||
|
||||
// Preserve known functions
|
||||
if (options.KnownFunctions?.Contains(funcName) != true &&
|
||||
!IsStandardLibraryFunction(funcName))
|
||||
{
|
||||
funcName = $"func_{funcName.GetHashCode():X8}";
|
||||
}
|
||||
|
||||
var normalizedArgs = new List<AstNode>(node.Arguments.Length);
|
||||
foreach (var arg in node.Arguments)
|
||||
{
|
||||
normalizedArgs.Add(NormalizeNode(arg, options, varMap, ref varIndex));
|
||||
}
|
||||
|
||||
return new CallNode(funcName, [.. normalizedArgs], node.Location);
|
||||
}
|
||||
|
||||
private static AstNode NormalizeConstantNode(ConstantNode node)
|
||||
{
|
||||
// Normalize numeric constants to canonical form
|
||||
if (node.Value is long or int or short or byte)
|
||||
{
|
||||
return node with { Value = "CONST_INT" };
|
||||
}
|
||||
|
||||
if (node.Value is double or float or decimal)
|
||||
{
|
||||
return node with { Value = "CONST_FLOAT" };
|
||||
}
|
||||
|
||||
if (node.Value is string)
|
||||
{
|
||||
return node with { Value = "CONST_STR" };
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
private static AstNode NormalizeChildren(
|
||||
AstNode node,
|
||||
NormalizationOptions options,
|
||||
Dictionary<string, string> varMap,
|
||||
ref int varIndex)
|
||||
{
|
||||
if (node.Children.Length == 0)
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
var normalizedChildren = new List<AstNode>(node.Children.Length);
|
||||
foreach (var child in node.Children)
|
||||
{
|
||||
normalizedChildren.Add(NormalizeNode(child, options, varMap, ref varIndex));
|
||||
}
|
||||
|
||||
var normalizedArray = normalizedChildren.ToImmutableArray();
|
||||
|
||||
// Use reflection-free approach for common node types
|
||||
return node switch
|
||||
{
|
||||
BlockNode block => block with { Statements = normalizedArray },
|
||||
IfNode ifNode => CreateNormalizedIf(ifNode, normalizedArray),
|
||||
WhileNode whileNode => CreateNormalizedWhile(whileNode, normalizedArray),
|
||||
ForNode forNode => CreateNormalizedFor(forNode, normalizedArray),
|
||||
ReturnNode returnNode when normalizedArray.Length > 0 =>
|
||||
returnNode with { Value = normalizedArray[0] },
|
||||
AssignmentNode assignment => CreateNormalizedAssignment(assignment, normalizedArray),
|
||||
BinaryOpNode binOp => CreateNormalizedBinaryOp(binOp, normalizedArray),
|
||||
UnaryOpNode unaryOp when normalizedArray.Length > 0 =>
|
||||
unaryOp with { Operand = normalizedArray[0] },
|
||||
_ => node // Return as-is for other node types
|
||||
};
|
||||
}
|
||||
|
||||
private static IfNode CreateNormalizedIf(IfNode node, ImmutableArray<AstNode> children)
|
||||
{
|
||||
return new IfNode(
|
||||
children.Length > 0 ? children[0] : node.Condition,
|
||||
children.Length > 1 ? children[1] : node.ThenBranch,
|
||||
children.Length > 2 ? children[2] : node.ElseBranch,
|
||||
node.Location);
|
||||
}
|
||||
|
||||
private static WhileNode CreateNormalizedWhile(WhileNode node, ImmutableArray<AstNode> children)
|
||||
{
|
||||
return new WhileNode(
|
||||
children.Length > 0 ? children[0] : node.Condition,
|
||||
children.Length > 1 ? children[1] : node.Body,
|
||||
node.Location);
|
||||
}
|
||||
|
||||
private static ForNode CreateNormalizedFor(ForNode node, ImmutableArray<AstNode> children)
|
||||
{
|
||||
return new ForNode(
|
||||
children.Length > 0 ? children[0] : node.Init,
|
||||
children.Length > 1 ? children[1] : node.Condition,
|
||||
children.Length > 2 ? children[2] : node.Update,
|
||||
children.Length > 3 ? children[3] : node.Body,
|
||||
node.Location);
|
||||
}
|
||||
|
||||
private static AssignmentNode CreateNormalizedAssignment(
|
||||
AssignmentNode node,
|
||||
ImmutableArray<AstNode> children)
|
||||
{
|
||||
return new AssignmentNode(
|
||||
children.Length > 0 ? children[0] : node.Target,
|
||||
children.Length > 1 ? children[1] : node.Value,
|
||||
node.Operator,
|
||||
node.Location);
|
||||
}
|
||||
|
||||
private static BinaryOpNode CreateNormalizedBinaryOp(
|
||||
BinaryOpNode node,
|
||||
ImmutableArray<AstNode> children)
|
||||
{
|
||||
return new BinaryOpNode(
|
||||
children.Length > 0 ? children[0] : node.Left,
|
||||
children.Length > 1 ? children[1] : node.Right,
|
||||
node.Operator,
|
||||
node.Location);
|
||||
}
|
||||
|
||||
private static string RemoveComments(string code)
|
||||
{
|
||||
// Remove single-line comments
|
||||
code = SingleLineCommentRegex().Replace(code, "");
|
||||
|
||||
// Remove multi-line comments
|
||||
code = MultiLineCommentRegex().Replace(code, "");
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
private static string NormalizeVariableNames(string code, ImmutableHashSet<string>? knownFunctions)
|
||||
{
|
||||
var varIndex = 0;
|
||||
var varMap = new Dictionary<string, string>();
|
||||
|
||||
return IdentifierRegex().Replace(code, match =>
|
||||
{
|
||||
var name = match.Value;
|
||||
|
||||
// Skip keywords and types
|
||||
if (IsKeywordOrType(name))
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
// Skip known functions
|
||||
if (knownFunctions?.Contains(name) == true)
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
// Skip standard library functions
|
||||
if (IsStandardLibraryFunction(name))
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
if (!varMap.TryGetValue(name, out var canonical))
|
||||
{
|
||||
canonical = $"var_{varIndex++}";
|
||||
varMap[name] = canonical;
|
||||
}
|
||||
|
||||
return canonical;
|
||||
});
|
||||
}
|
||||
|
||||
private static string NormalizeFunctionCalls(string code, ImmutableHashSet<string>? knownFunctions)
|
||||
{
|
||||
// Match function calls: identifier followed by (
|
||||
return FunctionCallRegex().Replace(code, match =>
|
||||
{
|
||||
var funcName = match.Groups[1].Value;
|
||||
|
||||
// Skip known functions
|
||||
if (knownFunctions?.Contains(funcName) == true)
|
||||
{
|
||||
return match.Value;
|
||||
}
|
||||
|
||||
// Skip standard library functions
|
||||
if (IsStandardLibraryFunction(funcName))
|
||||
{
|
||||
return match.Value;
|
||||
}
|
||||
|
||||
return $"func_{funcName.GetHashCode():X8}(";
|
||||
});
|
||||
}
|
||||
|
||||
private static string NormalizeConstants(string code)
|
||||
{
|
||||
// Normalize hex constants
|
||||
code = HexConstantRegex().Replace(code, "CONST_HEX");
|
||||
|
||||
// Normalize decimal constants (but preserve small common ones like 0, 1, 2)
|
||||
code = LargeDecimalRegex().Replace(code, "CONST_INT");
|
||||
|
||||
// Normalize string literals
|
||||
code = StringLiteralRegex().Replace(code, "CONST_STR");
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
private static string NormalizeWhitespace(string code)
|
||||
{
|
||||
// Collapse multiple whitespace to single space
|
||||
code = MultipleWhitespaceRegex().Replace(code, " ");
|
||||
|
||||
// Remove whitespace around operators
|
||||
code = WhitespaceAroundOperatorsRegex().Replace(code, "$1");
|
||||
|
||||
// Normalize line endings
|
||||
code = code.Replace("\r\n", "\n").Replace("\r", "\n");
|
||||
|
||||
// Remove trailing whitespace on lines
|
||||
code = TrailingWhitespaceRegex().Replace(code, "\n");
|
||||
|
||||
return code.Trim();
|
||||
}
|
||||
|
||||
private static string SortIndependentStatements(string code)
|
||||
{
|
||||
// Parse into blocks and sort independent statements within each block
|
||||
// This is a simplified implementation that sorts top-level statements
|
||||
// A full implementation would need to analyze data dependencies
|
||||
|
||||
var lines = code.Split('\n', StringSplitOptions.RemoveEmptyEntries);
|
||||
var result = new StringBuilder();
|
||||
|
||||
var blockDepth = 0;
|
||||
var currentBlock = new List<string>();
|
||||
|
||||
foreach (var line in lines)
|
||||
{
|
||||
var trimmed = line.Trim();
|
||||
|
||||
// Track block depth
|
||||
blockDepth += trimmed.Count(c => c == '{');
|
||||
blockDepth -= trimmed.Count(c => c == '}');
|
||||
|
||||
if (blockDepth == 1 && !trimmed.Contains('{') && !trimmed.Contains('}'))
|
||||
{
|
||||
// Simple statement at block level 1
|
||||
currentBlock.Add(trimmed);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Flush sorted block
|
||||
if (currentBlock.Count > 0)
|
||||
{
|
||||
var sorted = SortStatements(currentBlock);
|
||||
foreach (var stmt in sorted)
|
||||
{
|
||||
result.AppendLine(stmt);
|
||||
}
|
||||
currentBlock.Clear();
|
||||
}
|
||||
|
||||
result.AppendLine(line);
|
||||
}
|
||||
}
|
||||
|
||||
// Flush remaining
|
||||
if (currentBlock.Count > 0)
|
||||
{
|
||||
var sorted = SortStatements(currentBlock);
|
||||
foreach (var stmt in sorted)
|
||||
{
|
||||
result.AppendLine(stmt);
|
||||
}
|
||||
}
|
||||
|
||||
return result.ToString().Trim();
|
||||
}
|
||||
|
||||
private static List<string> SortStatements(List<string> statements)
|
||||
{
|
||||
// Group statements that can be reordered
|
||||
// For now, just sort by canonical form (conservative)
|
||||
return statements
|
||||
.OrderBy(s => GetStatementSortKey(s), StringComparer.Ordinal)
|
||||
.ToList();
|
||||
}
|
||||
|
||||
private static string GetStatementSortKey(string statement)
|
||||
{
|
||||
// Extract the "essence" of the statement for sorting
|
||||
// e.g., assignment target, function call name
|
||||
var trimmed = statement.Trim();
|
||||
|
||||
// Assignment: sort by target
|
||||
var assignMatch = AssignmentTargetRegex().Match(trimmed);
|
||||
if (assignMatch.Success)
|
||||
{
|
||||
return $"A_{assignMatch.Groups[1].Value}";
|
||||
}
|
||||
|
||||
// Function call: sort by function name
|
||||
var callMatch = FunctionNameRegex().Match(trimmed);
|
||||
if (callMatch.Success)
|
||||
{
|
||||
return $"C_{callMatch.Groups[1].Value}";
|
||||
}
|
||||
|
||||
return $"Z_{trimmed}";
|
||||
}
|
||||
|
||||
private static bool IsKeywordOrType(string name)
|
||||
{
|
||||
return CKeywords.Contains(name);
|
||||
}
|
||||
|
||||
private static bool IsStandardLibraryFunction(string name)
|
||||
{
|
||||
// Common C standard library functions to preserve
|
||||
return name switch
|
||||
{
|
||||
// Memory
|
||||
"malloc" or "calloc" or "realloc" or "free" or "memcpy" or "memmove" or "memset" or "memcmp" => true,
|
||||
// String
|
||||
"strlen" or "strcpy" or "strncpy" or "strcat" or "strncat" or "strcmp" or "strncmp" or "strchr" or "strrchr" or "strstr" => true,
|
||||
// I/O
|
||||
"printf" or "fprintf" or "sprintf" or "snprintf" or "scanf" or "fscanf" or "sscanf" => true,
|
||||
"fopen" or "fclose" or "fread" or "fwrite" or "fseek" or "ftell" or "fflush" => true,
|
||||
"puts" or "fputs" or "gets" or "fgets" or "putchar" or "getchar" => true,
|
||||
// Math
|
||||
"abs" or "labs" or "llabs" or "fabs" or "sqrt" or "pow" or "sin" or "cos" or "tan" or "log" or "exp" => true,
|
||||
// Other
|
||||
"exit" or "abort" or "atexit" or "atoi" or "atol" or "atof" or "strtol" or "strtoul" or "strtod" => true,
|
||||
"assert" or "errno" => true,
|
||||
_ => false
|
||||
};
|
||||
}
|
||||
|
||||
// Regex patterns using source generators
|
||||
[GeneratedRegex(@"//[^\n]*")]
|
||||
private static partial Regex SingleLineCommentRegex();
|
||||
|
||||
[GeneratedRegex(@"/\*[\s\S]*?\*/")]
|
||||
private static partial Regex MultiLineCommentRegex();
|
||||
|
||||
[GeneratedRegex(@"\b([a-zA-Z_][a-zA-Z0-9_]*)\b")]
|
||||
private static partial Regex IdentifierRegex();
|
||||
|
||||
[GeneratedRegex(@"\b([a-zA-Z_][a-zA-Z0-9_]*)\s*\(")]
|
||||
private static partial Regex FunctionCallRegex();
|
||||
|
||||
[GeneratedRegex(@"0[xX][0-9a-fA-F]+")]
|
||||
private static partial Regex HexConstantRegex();
|
||||
|
||||
[GeneratedRegex(@"\b[0-9]{4,}\b")]
|
||||
private static partial Regex LargeDecimalRegex();
|
||||
|
||||
[GeneratedRegex(@"""(?:[^""\\]|\\.)*""")]
|
||||
private static partial Regex StringLiteralRegex();
|
||||
|
||||
[GeneratedRegex(@"[ \t]+")]
|
||||
private static partial Regex MultipleWhitespaceRegex();
|
||||
|
||||
[GeneratedRegex(@"\s*([+\-*/%=<>!&|^~?:;,{}()\[\]])\s*")]
|
||||
private static partial Regex WhitespaceAroundOperatorsRegex();
|
||||
|
||||
[GeneratedRegex(@"[ \t]+\n")]
|
||||
private static partial Regex TrailingWhitespaceRegex();
|
||||
|
||||
[GeneratedRegex(@"^([a-zA-Z_][a-zA-Z0-9_]*)\s*=")]
|
||||
private static partial Regex AssignmentTargetRegex();
|
||||
|
||||
[GeneratedRegex(@"^([a-zA-Z_][a-zA-Z0-9_]*)\s*\(")]
|
||||
private static partial Regex FunctionNameRegex();
|
||||
}
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
/// <summary>
|
||||
/// Confidence level for comparison results.
|
||||
/// </summary>
|
||||
public enum ComparisonConfidence
|
||||
{
|
||||
Low,
|
||||
Medium,
|
||||
High,
|
||||
VeryHigh
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
/// <summary>
|
||||
/// Options for AST comparison.
|
||||
/// </summary>
|
||||
public sealed record ComparisonOptions
|
||||
{
|
||||
public bool IgnoreVariableNames { get; init; } = true;
|
||||
public bool IgnoreConstants { get; init; } = false;
|
||||
public bool DetectOptimizations { get; init; } = true;
|
||||
public decimal MinSimilarityThreshold { get; init; } = 0.5m;
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
/// <summary>
|
||||
/// Options for decompilation.
|
||||
/// </summary>
|
||||
public sealed record DecompileOptions
|
||||
{
|
||||
public bool SimplifyCode { get; init; } = true;
|
||||
public bool RecoverTypes { get; init; } = true;
|
||||
public bool RecoverStructs { get; init; } = true;
|
||||
public int MaxCodeLength { get; init; } = 100_000;
|
||||
public TimeSpan Timeout { get; init; } = TimeSpan.FromSeconds(30);
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
/// <summary>
|
||||
/// AST representation of decompiled code.
|
||||
/// </summary>
|
||||
public sealed record DecompiledAst(
|
||||
AstNode Root,
|
||||
int NodeCount,
|
||||
int Depth,
|
||||
ImmutableArray<AstPattern> Patterns);
|
||||
@@ -0,0 +1,50 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed partial class DecompiledCodeParser
|
||||
{
|
||||
private static int CountNodes(AstNode node)
|
||||
{
|
||||
var count = 1;
|
||||
foreach (var child in node.Children)
|
||||
{
|
||||
count += CountNodes(child);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
private static int ComputeDepth(AstNode node)
|
||||
{
|
||||
if (node.Children.Length == 0)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
var maxDepth = 0;
|
||||
foreach (var child in node.Children)
|
||||
{
|
||||
var depth = ComputeDepth(child);
|
||||
if (depth > maxDepth)
|
||||
{
|
||||
maxDepth = depth;
|
||||
}
|
||||
}
|
||||
|
||||
return 1 + maxDepth;
|
||||
}
|
||||
|
||||
private static ImmutableArray<AstPattern> ExtractPatterns(AstNode node)
|
||||
{
|
||||
var patterns = new List<AstPattern>();
|
||||
|
||||
foreach (var child in node.Children)
|
||||
{
|
||||
patterns.AddRange(ExtractPatterns(child));
|
||||
}
|
||||
|
||||
return [.. patterns];
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed partial class DecompiledCodeParser
|
||||
{
|
||||
[GeneratedRegex(@"(?<type>\w+)\s+(?<name>\w+)\s*(?:=|;)", RegexOptions.Compiled)]
|
||||
private static partial Regex VariableDeclarationRegex();
|
||||
|
||||
[GeneratedRegex(@"(?<name>\w+)\s*\(", RegexOptions.Compiled)]
|
||||
private static partial Regex FunctionCallRegex();
|
||||
}
|
||||
@@ -0,0 +1,77 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed partial class DecompiledCodeParser
|
||||
{
|
||||
private static bool TryConsumeIdentifier(
|
||||
string code,
|
||||
ref int index,
|
||||
ref int column,
|
||||
int line,
|
||||
int startColumn,
|
||||
List<Token> tokens)
|
||||
{
|
||||
var c = code[index];
|
||||
if (!char.IsLetter(c) && c != '_')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var start = index;
|
||||
while (index < code.Length && (char.IsLetterOrDigit(code[index]) || code[index] == '_'))
|
||||
{
|
||||
index++;
|
||||
column++;
|
||||
}
|
||||
|
||||
var value = code[start..index];
|
||||
var type = _keywords.Contains(value) ? TokenType.Keyword : TokenType.Identifier;
|
||||
tokens.Add(new Token(type, value, line, startColumn));
|
||||
return true;
|
||||
}
|
||||
|
||||
private static bool TryConsumeNumber(
|
||||
string code,
|
||||
ref int index,
|
||||
ref int column,
|
||||
int line,
|
||||
int startColumn,
|
||||
List<Token> tokens)
|
||||
{
|
||||
var c = code[index];
|
||||
if (!char.IsDigit(c) && !(c == '0' && index + 1 < code.Length && code[index + 1] == 'x'))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var start = index;
|
||||
if (c == '0' && index + 1 < code.Length && code[index + 1] == 'x')
|
||||
{
|
||||
index += 2;
|
||||
column += 2;
|
||||
while (index < code.Length && char.IsAsciiHexDigit(code[index]))
|
||||
{
|
||||
index++;
|
||||
column++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (index < code.Length && (char.IsDigit(code[index]) || code[index] == '.'))
|
||||
{
|
||||
index++;
|
||||
column++;
|
||||
}
|
||||
}
|
||||
|
||||
while (index < code.Length && (code[index] == 'U' || code[index] == 'L' || code[index] == 'u' || code[index] == 'l'))
|
||||
{
|
||||
index++;
|
||||
column++;
|
||||
}
|
||||
|
||||
tokens.Add(new Token(TokenType.Number, code[start..index], line, startColumn));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,78 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed partial class DecompiledCodeParser
|
||||
{
|
||||
private static bool TryConsumeStringLiteral(
|
||||
string code,
|
||||
ref int index,
|
||||
ref int column,
|
||||
int line,
|
||||
int startColumn,
|
||||
List<Token> tokens)
|
||||
{
|
||||
if (code[index] != '"')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var start = index;
|
||||
index++;
|
||||
column++;
|
||||
while (index < code.Length && code[index] != '"')
|
||||
{
|
||||
if (code[index] == '\\' && index + 1 < code.Length)
|
||||
{
|
||||
index += 2;
|
||||
column += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
index++;
|
||||
column++;
|
||||
}
|
||||
}
|
||||
index++;
|
||||
column++;
|
||||
|
||||
tokens.Add(new Token(TokenType.String, code[start..index], line, startColumn));
|
||||
return true;
|
||||
}
|
||||
|
||||
private static bool TryConsumeCharLiteral(
|
||||
string code,
|
||||
ref int index,
|
||||
ref int column,
|
||||
int line,
|
||||
int startColumn,
|
||||
List<Token> tokens)
|
||||
{
|
||||
if (code[index] != '\'')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var start = index;
|
||||
index++;
|
||||
column++;
|
||||
while (index < code.Length && code[index] != '\'')
|
||||
{
|
||||
if (code[index] == '\\' && index + 1 < code.Length)
|
||||
{
|
||||
index += 2;
|
||||
column += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
index++;
|
||||
column++;
|
||||
}
|
||||
}
|
||||
index++;
|
||||
column++;
|
||||
|
||||
tokens.Add(new Token(TokenType.Char, code[start..index], line, startColumn));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed partial class DecompiledCodeParser
|
||||
{
|
||||
private static void ConsumeOperator(
|
||||
string code,
|
||||
ref int index,
|
||||
ref int column,
|
||||
int line,
|
||||
int startColumn,
|
||||
List<Token> tokens)
|
||||
{
|
||||
var twoChar = index + 1 < code.Length ? code[index..(index + 2)] : string.Empty;
|
||||
var op = twoChar is "==" or "!=" or "<=" or ">=" or "&&" or "||" or "++" or "--"
|
||||
or "->" or "+=" or "-=" or "*=" or "/=" or "&=" or "|=" or "^=" or "<<"
|
||||
or ">>"
|
||||
? twoChar
|
||||
: code[index].ToString();
|
||||
|
||||
if (op.Length == 2)
|
||||
{
|
||||
index += 2;
|
||||
column += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
index++;
|
||||
column++;
|
||||
}
|
||||
|
||||
var tokenType = op is "(" or ")" or "{" or "}" or "[" or "]"
|
||||
? TokenType.Bracket
|
||||
: op is ";" or "," or "."
|
||||
? TokenType.Punctuation
|
||||
: TokenType.Operator;
|
||||
|
||||
tokens.Add(new Token(tokenType, op, line, startColumn));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed partial class DecompiledCodeParser
|
||||
{
|
||||
private static bool TryConsumeWhitespace(string code, ref int index, ref int line, ref int column)
|
||||
{
|
||||
if (!char.IsWhiteSpace(code[index]))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (code[index] == '\n')
|
||||
{
|
||||
line++;
|
||||
column = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
column++;
|
||||
}
|
||||
|
||||
index++;
|
||||
return true;
|
||||
}
|
||||
|
||||
private static bool TryConsumeLineComment(string code, ref int index)
|
||||
{
|
||||
if (index + 1 >= code.Length || code[index] != '/' || code[index + 1] != '/')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
while (index < code.Length && code[index] != '\n')
|
||||
{
|
||||
index++;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static bool TryConsumeBlockComment(string code, ref int index, ref int line, ref int column)
|
||||
{
|
||||
if (index + 1 >= code.Length || code[index] != '/' || code[index + 1] != '*')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
index += 2;
|
||||
while (index + 1 < code.Length && !(code[index] == '*' && code[index + 1] == '/'))
|
||||
{
|
||||
if (code[index] == '\n')
|
||||
{
|
||||
line++;
|
||||
column = 1;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
index += 2;
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed partial class DecompiledCodeParser
|
||||
{
|
||||
private static List<Token> Tokenize(string code)
|
||||
{
|
||||
var tokens = new List<Token>();
|
||||
var index = 0;
|
||||
var line = 1;
|
||||
var column = 1;
|
||||
|
||||
while (index < code.Length)
|
||||
{
|
||||
if (TryConsumeWhitespace(code, ref index, ref line, ref column))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (TryConsumeLineComment(code, ref index))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (TryConsumeBlockComment(code, ref index, ref line, ref column))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var startColumn = column;
|
||||
|
||||
if (TryConsumeIdentifier(code, ref index, ref column, line, startColumn, tokens))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (TryConsumeNumber(code, ref index, ref column, line, startColumn, tokens))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (TryConsumeStringLiteral(code, ref index, ref column, line, startColumn, tokens))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (TryConsumeCharLiteral(code, ref index, ref column, line, startColumn, tokens))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
ConsumeOperator(code, ref index, ref column, line, startColumn, tokens);
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
|
||||
using System.Collections.Immutable;
|
||||
using System.Globalization;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
@@ -11,7 +11,7 @@ namespace StellaOps.BinaryIndex.Decompiler;
|
||||
/// </summary>
|
||||
public sealed partial class DecompiledCodeParser : IDecompiledCodeParser
|
||||
{
|
||||
private static readonly HashSet<string> s_keywords =
|
||||
private static readonly HashSet<string> _keywords =
|
||||
[
|
||||
"if", "else", "while", "for", "do", "switch", "case", "default",
|
||||
"return", "break", "continue", "goto", "sizeof", "typedef",
|
||||
@@ -19,7 +19,7 @@ public sealed partial class DecompiledCodeParser : IDecompiledCodeParser
|
||||
"float", "double", "unsigned", "signed", "const", "static", "extern"
|
||||
];
|
||||
|
||||
private static readonly HashSet<string> s_types =
|
||||
private static readonly HashSet<string> _types =
|
||||
[
|
||||
"void", "int", "uint", "char", "uchar", "byte", "ubyte",
|
||||
"short", "ushort", "long", "ulong", "longlong", "ulonglong",
|
||||
@@ -47,10 +47,7 @@ public sealed partial class DecompiledCodeParser : IDecompiledCodeParser
|
||||
public ImmutableArray<LocalVariable> ExtractVariables(string code)
|
||||
{
|
||||
var variables = new List<LocalVariable>();
|
||||
var varIndex = 0;
|
||||
|
||||
// Match variable declarations: type name [= value];
|
||||
// Ghidra style: int local_10; or undefined8 param_1;
|
||||
var declPattern = VariableDeclarationRegex();
|
||||
|
||||
foreach (Match match in declPattern.Matches(code))
|
||||
@@ -58,6 +55,11 @@ public sealed partial class DecompiledCodeParser : IDecompiledCodeParser
|
||||
var type = match.Groups["type"].Value;
|
||||
var name = match.Groups["name"].Value;
|
||||
|
||||
if (_keywords.Contains(type) && !_types.Contains(type))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var isParam = name.StartsWith("param_", StringComparison.Ordinal);
|
||||
int? paramIndex = null;
|
||||
int stackOffset = 0;
|
||||
@@ -68,13 +70,12 @@ public sealed partial class DecompiledCodeParser : IDecompiledCodeParser
|
||||
}
|
||||
|
||||
if (name.StartsWith("local_", StringComparison.Ordinal) &&
|
||||
int.TryParse(name.AsSpan(6), System.Globalization.NumberStyles.HexNumber, null, out var offset))
|
||||
int.TryParse(name.AsSpan(6), NumberStyles.HexNumber, null, out var offset))
|
||||
{
|
||||
stackOffset = -offset; // Negative for locals
|
||||
stackOffset = -offset;
|
||||
}
|
||||
|
||||
variables.Add(new LocalVariable(name, type, stackOffset, isParam, paramIndex));
|
||||
varIndex++;
|
||||
}
|
||||
|
||||
return [.. variables];
|
||||
@@ -84,16 +85,13 @@ public sealed partial class DecompiledCodeParser : IDecompiledCodeParser
|
||||
public ImmutableArray<string> ExtractCalledFunctions(string code)
|
||||
{
|
||||
var functions = new HashSet<string>();
|
||||
|
||||
// Match function calls: name(...)
|
||||
var callPattern = FunctionCallRegex();
|
||||
|
||||
foreach (Match match in callPattern.Matches(code))
|
||||
{
|
||||
var name = match.Groups["name"].Value;
|
||||
|
||||
// Skip keywords and types
|
||||
if (!s_keywords.Contains(name) && !s_types.Contains(name))
|
||||
if (!_keywords.Contains(name) && !_types.Contains(name))
|
||||
{
|
||||
functions.Add(name);
|
||||
}
|
||||
@@ -102,849 +100,4 @@ public sealed partial class DecompiledCodeParser : IDecompiledCodeParser
|
||||
return [.. functions.Order()];
|
||||
}
|
||||
|
||||
private static List<Token> Tokenize(string code)
|
||||
{
|
||||
var tokens = new List<Token>();
|
||||
var i = 0;
|
||||
var line = 1;
|
||||
var column = 1;
|
||||
|
||||
while (i < code.Length)
|
||||
{
|
||||
var c = code[i];
|
||||
|
||||
// Skip whitespace
|
||||
if (char.IsWhiteSpace(c))
|
||||
{
|
||||
if (c == '\n')
|
||||
{
|
||||
line++;
|
||||
column = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
column++;
|
||||
}
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip comments
|
||||
if (i + 1 < code.Length && code[i] == '/' && code[i + 1] == '/')
|
||||
{
|
||||
while (i < code.Length && code[i] != '\n')
|
||||
{
|
||||
i++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i + 1 < code.Length && code[i] == '/' && code[i + 1] == '*')
|
||||
{
|
||||
i += 2;
|
||||
while (i + 1 < code.Length && !(code[i] == '*' && code[i + 1] == '/'))
|
||||
{
|
||||
if (code[i] == '\n')
|
||||
{
|
||||
line++;
|
||||
column = 1;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
var startColumn = column;
|
||||
|
||||
// Identifiers and keywords
|
||||
if (char.IsLetter(c) || c == '_')
|
||||
{
|
||||
var start = i;
|
||||
while (i < code.Length && (char.IsLetterOrDigit(code[i]) || code[i] == '_'))
|
||||
{
|
||||
i++;
|
||||
column++;
|
||||
}
|
||||
var value = code[start..i];
|
||||
var type = s_keywords.Contains(value) ? TokenType.Keyword : TokenType.Identifier;
|
||||
tokens.Add(new Token(type, value, line, startColumn));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Numbers
|
||||
if (char.IsDigit(c) || (c == '0' && i + 1 < code.Length && code[i + 1] == 'x'))
|
||||
{
|
||||
var start = i;
|
||||
if (c == '0' && i + 1 < code.Length && code[i + 1] == 'x')
|
||||
{
|
||||
i += 2;
|
||||
column += 2;
|
||||
while (i < code.Length && char.IsAsciiHexDigit(code[i]))
|
||||
{
|
||||
i++;
|
||||
column++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (i < code.Length && (char.IsDigit(code[i]) || code[i] == '.'))
|
||||
{
|
||||
i++;
|
||||
column++;
|
||||
}
|
||||
}
|
||||
// Handle suffixes (U, L, UL, etc.)
|
||||
while (i < code.Length && (code[i] == 'U' || code[i] == 'L' || code[i] == 'u' || code[i] == 'l'))
|
||||
{
|
||||
i++;
|
||||
column++;
|
||||
}
|
||||
tokens.Add(new Token(TokenType.Number, code[start..i], line, startColumn));
|
||||
continue;
|
||||
}
|
||||
|
||||
// String literals
|
||||
if (c == '"')
|
||||
{
|
||||
var start = i;
|
||||
i++;
|
||||
column++;
|
||||
while (i < code.Length && code[i] != '"')
|
||||
{
|
||||
if (code[i] == '\\' && i + 1 < code.Length)
|
||||
{
|
||||
i += 2;
|
||||
column += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
i++;
|
||||
column++;
|
||||
}
|
||||
}
|
||||
i++; // closing quote
|
||||
column++;
|
||||
tokens.Add(new Token(TokenType.String, code[start..i], line, startColumn));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Character literals
|
||||
if (c == '\'')
|
||||
{
|
||||
var start = i;
|
||||
i++;
|
||||
column++;
|
||||
while (i < code.Length && code[i] != '\'')
|
||||
{
|
||||
if (code[i] == '\\' && i + 1 < code.Length)
|
||||
{
|
||||
i += 2;
|
||||
column += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
i++;
|
||||
column++;
|
||||
}
|
||||
}
|
||||
i++; // closing quote
|
||||
column++;
|
||||
tokens.Add(new Token(TokenType.Char, code[start..i], line, startColumn));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Multi-character operators
|
||||
if (i + 1 < code.Length)
|
||||
{
|
||||
var twoChar = code.Substring(i, 2);
|
||||
if (twoChar is "==" or "!=" or "<=" or ">=" or "&&" or "||" or
|
||||
"++" or "--" or "+=" or "-=" or "*=" or "/=" or
|
||||
"<<" or ">>" or "->" or "::")
|
||||
{
|
||||
tokens.Add(new Token(TokenType.Operator, twoChar, line, startColumn));
|
||||
i += 2;
|
||||
column += 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Single character operators and punctuation
|
||||
var tokenType = c switch
|
||||
{
|
||||
'(' or ')' or '{' or '}' or '[' or ']' => TokenType.Bracket,
|
||||
';' or ',' or ':' or '?' => TokenType.Punctuation,
|
||||
_ => TokenType.Operator
|
||||
};
|
||||
tokens.Add(new Token(tokenType, c.ToString(), line, startColumn));
|
||||
i++;
|
||||
column++;
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
private static int CountNodes(AstNode node)
|
||||
{
|
||||
var count = 1;
|
||||
foreach (var child in node.Children)
|
||||
{
|
||||
count += CountNodes(child);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
private static int ComputeDepth(AstNode node)
|
||||
{
|
||||
if (node.Children.Length == 0)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 1 + node.Children.Max(c => ComputeDepth(c));
|
||||
}
|
||||
|
||||
private static ImmutableArray<AstPattern> ExtractPatterns(AstNode root)
|
||||
{
|
||||
var patterns = new List<AstPattern>();
|
||||
|
||||
foreach (var node in TraverseNodes(root))
|
||||
{
|
||||
// Detect loop patterns
|
||||
if (node.Type == AstNodeType.For)
|
||||
{
|
||||
patterns.Add(new AstPattern(
|
||||
PatternType.CountedLoop,
|
||||
node,
|
||||
new PatternMetadata("For loop", 0.9m, null)));
|
||||
}
|
||||
else if (node.Type == AstNodeType.While)
|
||||
{
|
||||
patterns.Add(new AstPattern(
|
||||
PatternType.ConditionalLoop,
|
||||
node,
|
||||
new PatternMetadata("While loop", 0.9m, null)));
|
||||
}
|
||||
else if (node.Type == AstNodeType.DoWhile)
|
||||
{
|
||||
patterns.Add(new AstPattern(
|
||||
PatternType.ConditionalLoop,
|
||||
node,
|
||||
new PatternMetadata("Do-while loop", 0.9m, null)));
|
||||
}
|
||||
|
||||
// Detect error handling
|
||||
if (node is IfNode ifNode && IsErrorCheck(ifNode))
|
||||
{
|
||||
patterns.Add(new AstPattern(
|
||||
PatternType.ErrorCheck,
|
||||
node,
|
||||
new PatternMetadata("Error check", 0.8m, null)));
|
||||
}
|
||||
|
||||
// Detect null checks
|
||||
if (node is IfNode ifNull && IsNullCheck(ifNull))
|
||||
{
|
||||
patterns.Add(new AstPattern(
|
||||
PatternType.NullCheck,
|
||||
node,
|
||||
new PatternMetadata("Null check", 0.9m, null)));
|
||||
}
|
||||
}
|
||||
|
||||
return [.. patterns];
|
||||
}
|
||||
|
||||
private static IEnumerable<AstNode> TraverseNodes(AstNode root)
|
||||
{
|
||||
yield return root;
|
||||
foreach (var child in root.Children)
|
||||
{
|
||||
foreach (var node in TraverseNodes(child))
|
||||
{
|
||||
yield return node;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static bool IsErrorCheck(IfNode node)
|
||||
{
|
||||
// Check if condition compares against -1, 0, or NULL
|
||||
if (node.Condition is BinaryOpNode binaryOp)
|
||||
{
|
||||
if (binaryOp.Right is ConstantNode constant)
|
||||
{
|
||||
var value = constant.Value?.ToString();
|
||||
return value is "0" or "-1" or "0xffffffff" or "NULL";
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private static bool IsNullCheck(IfNode node)
|
||||
{
|
||||
if (node.Condition is BinaryOpNode binaryOp)
|
||||
{
|
||||
if (binaryOp.Operator is "==" or "!=")
|
||||
{
|
||||
if (binaryOp.Right is ConstantNode constant)
|
||||
{
|
||||
var value = constant.Value?.ToString();
|
||||
return value is "0" or "NULL" or "nullptr";
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"(?<type>\w+)\s+(?<name>\w+)\s*(?:=|;)", RegexOptions.Compiled)]
|
||||
private static partial Regex VariableDeclarationRegex();
|
||||
|
||||
[GeneratedRegex(@"(?<name>\w+)\s*\(", RegexOptions.Compiled)]
|
||||
private static partial Regex FunctionCallRegex();
|
||||
}
|
||||
|
||||
internal enum TokenType
|
||||
{
|
||||
Identifier,
|
||||
Keyword,
|
||||
Number,
|
||||
String,
|
||||
Char,
|
||||
Operator,
|
||||
Bracket,
|
||||
Punctuation
|
||||
}
|
||||
|
||||
internal readonly record struct Token(TokenType Type, string Value, int Line, int Column);
|
||||
|
||||
internal sealed class RecursiveParser
|
||||
{
|
||||
private readonly List<Token> _tokens;
|
||||
private int _pos;
|
||||
|
||||
public RecursiveParser(List<Token> tokens)
|
||||
{
|
||||
_tokens = tokens;
|
||||
_pos = 0;
|
||||
}
|
||||
|
||||
public AstNode ParseFunction()
|
||||
{
|
||||
// Parse return type
|
||||
var returnType = ParseType();
|
||||
|
||||
// Parse function name
|
||||
var name = Expect(TokenType.Identifier).Value;
|
||||
|
||||
// Parse parameters
|
||||
Expect(TokenType.Bracket, "(");
|
||||
var parameters = ParseParameterList();
|
||||
Expect(TokenType.Bracket, ")");
|
||||
|
||||
// Parse body
|
||||
var body = ParseBlock();
|
||||
|
||||
return new FunctionNode(name, returnType, parameters, body);
|
||||
}
|
||||
|
||||
private string ParseType()
|
||||
{
|
||||
var type = new System.Text.StringBuilder();
|
||||
|
||||
// Handle modifiers
|
||||
while (Peek().Value is "const" or "unsigned" or "signed" or "static" or "extern")
|
||||
{
|
||||
type.Append(Advance().Value);
|
||||
type.Append(' ');
|
||||
}
|
||||
|
||||
// Main type
|
||||
type.Append(Advance().Value);
|
||||
|
||||
// Handle pointers
|
||||
while (Peek().Value == "*")
|
||||
{
|
||||
type.Append(Advance().Value);
|
||||
}
|
||||
|
||||
return type.ToString().Trim();
|
||||
}
|
||||
|
||||
private ImmutableArray<ParameterNode> ParseParameterList()
|
||||
{
|
||||
var parameters = new List<ParameterNode>();
|
||||
var index = 0;
|
||||
|
||||
if (Peek().Value == ")")
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
if (Peek().Value == "void" && PeekAhead(1).Value == ")")
|
||||
{
|
||||
Advance(); // consume void
|
||||
return [];
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
if (Peek().Value == ",")
|
||||
{
|
||||
Advance();
|
||||
}
|
||||
|
||||
var type = ParseType();
|
||||
var name = Peek().Type == TokenType.Identifier ? Advance().Value : $"param_{index}";
|
||||
|
||||
parameters.Add(new ParameterNode(name, type, index));
|
||||
index++;
|
||||
}
|
||||
while (Peek().Value == ",");
|
||||
|
||||
return [.. parameters];
|
||||
}
|
||||
|
||||
private BlockNode ParseBlock()
|
||||
{
|
||||
Expect(TokenType.Bracket, "{");
|
||||
|
||||
var statements = new List<AstNode>();
|
||||
|
||||
while (Peek().Value != "}")
|
||||
{
|
||||
var stmt = ParseStatement();
|
||||
if (stmt is not null)
|
||||
{
|
||||
statements.Add(stmt);
|
||||
}
|
||||
}
|
||||
|
||||
Expect(TokenType.Bracket, "}");
|
||||
|
||||
return new BlockNode([.. statements]);
|
||||
}
|
||||
|
||||
private AstNode? ParseStatement()
|
||||
{
|
||||
var token = Peek();
|
||||
|
||||
return token.Value switch
|
||||
{
|
||||
"if" => ParseIf(),
|
||||
"while" => ParseWhile(),
|
||||
"for" => ParseFor(),
|
||||
"do" => ParseDoWhile(),
|
||||
"return" => ParseReturn(),
|
||||
"break" => ParseBreak(),
|
||||
"continue" => ParseContinue(),
|
||||
"{" => ParseBlock(),
|
||||
";" => SkipSemicolon(),
|
||||
_ => ParseExpressionStatement()
|
||||
};
|
||||
}
|
||||
|
||||
private IfNode ParseIf()
|
||||
{
|
||||
Advance(); // consume 'if'
|
||||
Expect(TokenType.Bracket, "(");
|
||||
var condition = ParseExpression();
|
||||
Expect(TokenType.Bracket, ")");
|
||||
|
||||
var thenBranch = ParseStatement() ?? new BlockNode([]);
|
||||
|
||||
AstNode? elseBranch = null;
|
||||
if (Peek().Value == "else")
|
||||
{
|
||||
Advance();
|
||||
elseBranch = ParseStatement();
|
||||
}
|
||||
|
||||
return new IfNode(condition, thenBranch, elseBranch);
|
||||
}
|
||||
|
||||
private WhileNode ParseWhile()
|
||||
{
|
||||
Advance(); // consume 'while'
|
||||
Expect(TokenType.Bracket, "(");
|
||||
var condition = ParseExpression();
|
||||
Expect(TokenType.Bracket, ")");
|
||||
|
||||
var body = ParseStatement() ?? new BlockNode([]);
|
||||
|
||||
return new WhileNode(condition, body);
|
||||
}
|
||||
|
||||
private ForNode ParseFor()
|
||||
{
|
||||
Advance(); // consume 'for'
|
||||
Expect(TokenType.Bracket, "(");
|
||||
|
||||
AstNode? init = null;
|
||||
if (Peek().Value != ";")
|
||||
{
|
||||
init = ParseExpression();
|
||||
}
|
||||
Expect(TokenType.Punctuation, ";");
|
||||
|
||||
AstNode? condition = null;
|
||||
if (Peek().Value != ";")
|
||||
{
|
||||
condition = ParseExpression();
|
||||
}
|
||||
Expect(TokenType.Punctuation, ";");
|
||||
|
||||
AstNode? update = null;
|
||||
if (Peek().Value != ")")
|
||||
{
|
||||
update = ParseExpression();
|
||||
}
|
||||
Expect(TokenType.Bracket, ")");
|
||||
|
||||
var body = ParseStatement() ?? new BlockNode([]);
|
||||
|
||||
return new ForNode(init, condition, update, body);
|
||||
}
|
||||
|
||||
private AstNode ParseDoWhile()
|
||||
{
|
||||
Advance(); // consume 'do'
|
||||
var body = ParseStatement() ?? new BlockNode([]);
|
||||
|
||||
Expect(TokenType.Keyword, "while");
|
||||
Expect(TokenType.Bracket, "(");
|
||||
var condition = ParseExpression();
|
||||
Expect(TokenType.Bracket, ")");
|
||||
Expect(TokenType.Punctuation, ";");
|
||||
|
||||
return new WhileNode(condition, body); // Simplify do-while to while for now
|
||||
}
|
||||
|
||||
private ReturnNode ParseReturn()
|
||||
{
|
||||
Advance(); // consume 'return'
|
||||
|
||||
AstNode? value = null;
|
||||
if (Peek().Value != ";")
|
||||
{
|
||||
value = ParseExpression();
|
||||
}
|
||||
Expect(TokenType.Punctuation, ";");
|
||||
|
||||
return new ReturnNode(value);
|
||||
}
|
||||
|
||||
private AstNode ParseBreak()
|
||||
{
|
||||
Advance();
|
||||
Expect(TokenType.Punctuation, ";");
|
||||
return new BlockNode([]); // Simplified
|
||||
}
|
||||
|
||||
private AstNode ParseContinue()
|
||||
{
|
||||
Advance();
|
||||
Expect(TokenType.Punctuation, ";");
|
||||
return new BlockNode([]); // Simplified
|
||||
}
|
||||
|
||||
private AstNode? SkipSemicolon()
|
||||
{
|
||||
Advance();
|
||||
return null;
|
||||
}
|
||||
|
||||
private AstNode? ParseExpressionStatement()
|
||||
{
|
||||
var expr = ParseExpression();
|
||||
if (Peek().Value == ";")
|
||||
{
|
||||
Advance();
|
||||
}
|
||||
return expr;
|
||||
}
|
||||
|
||||
private AstNode ParseExpression()
|
||||
{
|
||||
return ParseAssignment();
|
||||
}
|
||||
|
||||
private AstNode ParseAssignment()
|
||||
{
|
||||
var left = ParseLogicalOr();
|
||||
|
||||
if (Peek().Value is "=" or "+=" or "-=" or "*=" or "/=" or "&=" or "|=" or "^=" or "<<=" or ">>=")
|
||||
{
|
||||
var op = Advance().Value;
|
||||
var right = ParseAssignment();
|
||||
return new AssignmentNode(left, right, op);
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
private AstNode ParseLogicalOr()
|
||||
{
|
||||
var left = ParseLogicalAnd();
|
||||
|
||||
while (Peek().Value == "||")
|
||||
{
|
||||
var op = Advance().Value;
|
||||
var right = ParseLogicalAnd();
|
||||
left = new BinaryOpNode(left, right, op);
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
private AstNode ParseLogicalAnd()
|
||||
{
|
||||
var left = ParseBitwiseOr();
|
||||
|
||||
while (Peek().Value == "&&")
|
||||
{
|
||||
var op = Advance().Value;
|
||||
var right = ParseBitwiseOr();
|
||||
left = new BinaryOpNode(left, right, op);
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
private AstNode ParseBitwiseOr()
|
||||
{
|
||||
var left = ParseComparison();
|
||||
|
||||
while (Peek().Value is "|" or "^" or "&")
|
||||
{
|
||||
var op = Advance().Value;
|
||||
var right = ParseComparison();
|
||||
left = new BinaryOpNode(left, right, op);
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
private AstNode ParseComparison()
|
||||
{
|
||||
var left = ParseShift();
|
||||
|
||||
while (Peek().Value is "==" or "!=" or "<" or ">" or "<=" or ">=")
|
||||
{
|
||||
var op = Advance().Value;
|
||||
var right = ParseShift();
|
||||
left = new BinaryOpNode(left, right, op);
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
private AstNode ParseShift()
|
||||
{
|
||||
var left = ParseAdditive();
|
||||
|
||||
while (Peek().Value is "<<" or ">>")
|
||||
{
|
||||
var op = Advance().Value;
|
||||
var right = ParseAdditive();
|
||||
left = new BinaryOpNode(left, right, op);
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
private AstNode ParseAdditive()
|
||||
{
|
||||
var left = ParseMultiplicative();
|
||||
|
||||
while (Peek().Value is "+" or "-")
|
||||
{
|
||||
var op = Advance().Value;
|
||||
var right = ParseMultiplicative();
|
||||
left = new BinaryOpNode(left, right, op);
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
private AstNode ParseMultiplicative()
|
||||
{
|
||||
var left = ParseUnary();
|
||||
|
||||
while (Peek().Value is "*" or "/" or "%")
|
||||
{
|
||||
var op = Advance().Value;
|
||||
var right = ParseUnary();
|
||||
left = new BinaryOpNode(left, right, op);
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
private AstNode ParseUnary()
|
||||
{
|
||||
if (Peek().Value is "!" or "~" or "-" or "+" or "*" or "&" or "++" or "--")
|
||||
{
|
||||
var op = Advance().Value;
|
||||
var operand = ParseUnary();
|
||||
return new UnaryOpNode(operand, op, true);
|
||||
}
|
||||
|
||||
return ParsePostfix();
|
||||
}
|
||||
|
||||
private AstNode ParsePostfix()
|
||||
{
|
||||
var expr = ParsePrimary();
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (Peek().Value == "(")
|
||||
{
|
||||
// Function call
|
||||
Advance();
|
||||
var args = ParseArgumentList();
|
||||
Expect(TokenType.Bracket, ")");
|
||||
|
||||
if (expr is VariableNode varNode)
|
||||
{
|
||||
expr = new CallNode(varNode.Name, args);
|
||||
}
|
||||
}
|
||||
else if (Peek().Value == "[")
|
||||
{
|
||||
// Array access
|
||||
Advance();
|
||||
var index = ParseExpression();
|
||||
Expect(TokenType.Bracket, "]");
|
||||
expr = new ArrayAccessNode(expr, index);
|
||||
}
|
||||
else if (Peek().Value is "." or "->")
|
||||
{
|
||||
var isPointer = Advance().Value == "->";
|
||||
var field = Expect(TokenType.Identifier).Value;
|
||||
expr = new FieldAccessNode(expr, field, isPointer);
|
||||
}
|
||||
else if (Peek().Value is "++" or "--")
|
||||
{
|
||||
var op = Advance().Value;
|
||||
expr = new UnaryOpNode(expr, op, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return expr;
|
||||
}
|
||||
|
||||
private ImmutableArray<AstNode> ParseArgumentList()
|
||||
{
|
||||
var args = new List<AstNode>();
|
||||
|
||||
if (Peek().Value == ")")
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
if (Peek().Value == ",")
|
||||
{
|
||||
Advance();
|
||||
}
|
||||
args.Add(ParseExpression());
|
||||
}
|
||||
while (Peek().Value == ",");
|
||||
|
||||
return [.. args];
|
||||
}
|
||||
|
||||
private AstNode ParsePrimary()
|
||||
{
|
||||
var token = Peek();
|
||||
|
||||
if (token.Type == TokenType.Number)
|
||||
{
|
||||
Advance();
|
||||
return new ConstantNode(token.Value, "int");
|
||||
}
|
||||
|
||||
if (token.Type == TokenType.String)
|
||||
{
|
||||
Advance();
|
||||
return new ConstantNode(token.Value, "char*");
|
||||
}
|
||||
|
||||
if (token.Type == TokenType.Char)
|
||||
{
|
||||
Advance();
|
||||
return new ConstantNode(token.Value, "char");
|
||||
}
|
||||
|
||||
if (token.Type == TokenType.Identifier)
|
||||
{
|
||||
Advance();
|
||||
return new VariableNode(token.Value, null);
|
||||
}
|
||||
|
||||
if (token.Value == "(")
|
||||
{
|
||||
Advance();
|
||||
|
||||
// Check for cast
|
||||
if (IsType(Peek().Value))
|
||||
{
|
||||
var targetType = ParseType();
|
||||
Expect(TokenType.Bracket, ")");
|
||||
var expr = ParseUnary();
|
||||
return new CastNode(expr, targetType);
|
||||
}
|
||||
|
||||
var inner = ParseExpression();
|
||||
Expect(TokenType.Bracket, ")");
|
||||
return inner;
|
||||
}
|
||||
|
||||
// Handle sizeof
|
||||
if (token.Value == "sizeof")
|
||||
{
|
||||
Advance();
|
||||
Expect(TokenType.Bracket, "(");
|
||||
var type = ParseType();
|
||||
Expect(TokenType.Bracket, ")");
|
||||
return new ConstantNode($"sizeof({type})", "size_t");
|
||||
}
|
||||
|
||||
// Unknown token - return empty node
|
||||
Advance();
|
||||
return new ConstantNode(token.Value, "unknown");
|
||||
}
|
||||
|
||||
private static bool IsType(string value)
|
||||
{
|
||||
return value is "int" or "char" or "void" or "long" or "short" or "float" or "double"
|
||||
or "unsigned" or "signed" or "const" or "struct" or "union" or "enum"
|
||||
or "undefined" or "undefined1" or "undefined2" or "undefined4" or "undefined8"
|
||||
or "byte" or "word" or "dword" or "qword" or "pointer" or "code" or "uint" or "ulong";
|
||||
}
|
||||
|
||||
private Token Peek() => _pos < _tokens.Count ? _tokens[_pos] : new Token(TokenType.Punctuation, "", 0, 0);
|
||||
|
||||
private Token PeekAhead(int offset) => _pos + offset < _tokens.Count
|
||||
? _tokens[_pos + offset]
|
||||
: new Token(TokenType.Punctuation, "", 0, 0);
|
||||
|
||||
private Token Advance() => _pos < _tokens.Count ? _tokens[_pos++] : new Token(TokenType.Punctuation, "", 0, 0);
|
||||
|
||||
private Token Expect(TokenType type, string? value = null)
|
||||
{
|
||||
var token = Peek();
|
||||
if (token.Type != type || (value is not null && token.Value != value))
|
||||
{
|
||||
// Skip unexpected tokens
|
||||
return Advance();
|
||||
}
|
||||
return Advance();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
/// <summary>
|
||||
/// Result of comparing two decompiled functions.
|
||||
/// </summary>
|
||||
public sealed record DecompiledComparisonResult(
|
||||
decimal Similarity,
|
||||
decimal StructuralSimilarity,
|
||||
decimal SemanticSimilarity,
|
||||
AstEditDistance EditDistance,
|
||||
ImmutableArray<SemanticEquivalence> Equivalences,
|
||||
ImmutableArray<CodeDifference> Differences,
|
||||
ComparisonConfidence Confidence);
|
||||
|
||||
/// <summary>
|
||||
/// Edit distance between ASTs.
|
||||
/// </summary>
|
||||
public sealed record AstEditDistance(
|
||||
int Insertions,
|
||||
int Deletions,
|
||||
int Modifications,
|
||||
int TotalOperations,
|
||||
decimal NormalizedDistance);
|
||||
@@ -0,0 +1,18 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
/// <summary>
|
||||
/// A function decompiled to C-like pseudo-code.
|
||||
/// </summary>
|
||||
public sealed record DecompiledFunction(
|
||||
string FunctionName,
|
||||
string Signature,
|
||||
string Code,
|
||||
DecompiledAst? Ast,
|
||||
ImmutableArray<LocalVariable> Locals,
|
||||
ImmutableArray<string> CalledFunctions,
|
||||
ulong Address,
|
||||
int SizeBytes);
|
||||
@@ -0,0 +1,13 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
/// <summary>
|
||||
/// Options for the decompiler adapter.
|
||||
/// </summary>
|
||||
public sealed class DecompilerOptions
|
||||
{
|
||||
public string GhidraScriptsPath { get; set; } = "/scripts";
|
||||
public TimeSpan DefaultTimeout { get; set; } = TimeSpan.FromSeconds(30);
|
||||
public int MaxCodeLength { get; set; } = 100_000;
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed partial class GhidraDecompilerAdapter
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public Task<DecompiledAst> ParseToAstAsync(
|
||||
string decompiledCode,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrEmpty(decompiledCode);
|
||||
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
var ast = _parser.Parse(decompiledCode);
|
||||
return Task.FromResult(ast);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<DecompiledComparisonResult> CompareAsync(
|
||||
DecompiledFunction a,
|
||||
DecompiledFunction b,
|
||||
ComparisonOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(a);
|
||||
ArgumentNullException.ThrowIfNull(b);
|
||||
|
||||
options ??= new ComparisonOptions();
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
_logger.LogDebug(
|
||||
"Comparing functions {A} and {B}",
|
||||
a.FunctionName,
|
||||
b.FunctionName);
|
||||
|
||||
if (a.Ast is null || b.Ast is null)
|
||||
{
|
||||
_logger.LogWarning("Cannot compare functions without ASTs");
|
||||
|
||||
return Task.FromResult(new DecompiledComparisonResult(
|
||||
Similarity: 0,
|
||||
StructuralSimilarity: 0,
|
||||
SemanticSimilarity: 0,
|
||||
EditDistance: new AstEditDistance(0, 0, 0, 0, 1.0m),
|
||||
Equivalences: [],
|
||||
Differences: [],
|
||||
Confidence: ComparisonConfidence.Low));
|
||||
}
|
||||
|
||||
var structuralSimilarity = _comparisonEngine.ComputeStructuralSimilarity(a.Ast, b.Ast);
|
||||
var editDistance = _comparisonEngine.ComputeEditDistance(a.Ast, b.Ast);
|
||||
var equivalences = _comparisonEngine.FindEquivalences(a.Ast, b.Ast);
|
||||
var differences = _comparisonEngine.FindDifferences(a.Ast, b.Ast);
|
||||
|
||||
var totalNodes = Math.Max(a.Ast.NodeCount, b.Ast.NodeCount);
|
||||
var equivalentNodes = equivalences.Length;
|
||||
var semanticSimilarity = totalNodes > 0
|
||||
? (decimal)equivalentNodes / totalNodes
|
||||
: 0m;
|
||||
|
||||
var overallSimilarity = ComputeOverallSimilarity(
|
||||
structuralSimilarity,
|
||||
semanticSimilarity,
|
||||
editDistance.NormalizedDistance);
|
||||
|
||||
var confidence = DetermineConfidence(
|
||||
overallSimilarity,
|
||||
a.Ast.NodeCount,
|
||||
b.Ast.NodeCount,
|
||||
equivalences.Length);
|
||||
|
||||
return Task.FromResult(new DecompiledComparisonResult(
|
||||
Similarity: overallSimilarity,
|
||||
StructuralSimilarity: structuralSimilarity,
|
||||
SemanticSimilarity: semanticSimilarity,
|
||||
EditDistance: editDistance,
|
||||
Equivalences: equivalences,
|
||||
Differences: differences,
|
||||
Confidence: confidence));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,108 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.BinaryIndex.Ghidra;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed partial class GhidraDecompilerAdapter
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public async Task<DecompiledFunction> DecompileAsync(
|
||||
GhidraFunction function,
|
||||
DecompileOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(function);
|
||||
|
||||
options ??= new DecompileOptions();
|
||||
|
||||
_logger.LogDebug(
|
||||
"Decompiling function {Name} at 0x{Address:X}",
|
||||
function.Name,
|
||||
function.Address);
|
||||
|
||||
var code = function.DecompiledCode;
|
||||
|
||||
if (string.IsNullOrEmpty(code))
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Function {Name} has no decompiled code, returning stub",
|
||||
function.Name);
|
||||
|
||||
return new DecompiledFunction(
|
||||
function.Name,
|
||||
BuildSignature(function),
|
||||
"/* Decompilation unavailable */",
|
||||
null,
|
||||
[],
|
||||
[],
|
||||
function.Address,
|
||||
function.Size);
|
||||
}
|
||||
|
||||
if (code.Length > options.MaxCodeLength)
|
||||
{
|
||||
code = code[..options.MaxCodeLength] + "\n/* ... truncated ... */";
|
||||
}
|
||||
|
||||
DecompiledAst? ast = null;
|
||||
try
|
||||
{
|
||||
ast = _parser.Parse(code);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to parse decompiled code for {Name}", function.Name);
|
||||
}
|
||||
|
||||
var locals = _parser.ExtractVariables(code);
|
||||
var calledFunctions = _parser.ExtractCalledFunctions(code);
|
||||
|
||||
return new DecompiledFunction(
|
||||
function.Name,
|
||||
BuildSignature(function),
|
||||
code,
|
||||
ast,
|
||||
locals,
|
||||
calledFunctions,
|
||||
function.Address,
|
||||
function.Size);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DecompiledFunction> DecompileAtAddressAsync(
|
||||
string binaryPath,
|
||||
ulong address,
|
||||
DecompileOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrEmpty(binaryPath);
|
||||
|
||||
options ??= new DecompileOptions();
|
||||
|
||||
_logger.LogDebug(
|
||||
"Decompiling function at 0x{Address:X} in {Binary}",
|
||||
address,
|
||||
Path.GetFileName(binaryPath));
|
||||
|
||||
using var stream = File.OpenRead(binaryPath);
|
||||
var analysis = await _ghidraService.AnalyzeAsync(
|
||||
stream,
|
||||
new GhidraAnalysisOptions
|
||||
{
|
||||
IncludeDecompilation = true,
|
||||
ExtractDecompilation = true
|
||||
},
|
||||
ct).ConfigureAwait(false);
|
||||
|
||||
var function = analysis.Functions.FirstOrDefault(f => f.Address == address);
|
||||
|
||||
if (function is null)
|
||||
{
|
||||
throw new InvalidOperationException($"No function found at address 0x{address:X}");
|
||||
}
|
||||
|
||||
return await DecompileAsync(function, options, ct).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using StellaOps.BinaryIndex.Ghidra;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
public sealed partial class GhidraDecompilerAdapter
|
||||
{
|
||||
private static string BuildSignature(GhidraFunction function)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(function.Signature))
|
||||
{
|
||||
return function.Signature;
|
||||
}
|
||||
|
||||
return $"void {function.Name}(void)";
|
||||
}
|
||||
|
||||
private static decimal ComputeOverallSimilarity(
|
||||
decimal structural,
|
||||
decimal semantic,
|
||||
decimal normalizedEditDistance)
|
||||
{
|
||||
var editSimilarity = 1.0m - normalizedEditDistance;
|
||||
return structural * 0.4m + semantic * 0.4m + editSimilarity * 0.2m;
|
||||
}
|
||||
|
||||
private static ComparisonConfidence DetermineConfidence(
|
||||
decimal similarity,
|
||||
int nodeCountA,
|
||||
int nodeCountB,
|
||||
int equivalenceCount)
|
||||
{
|
||||
var minNodes = Math.Min(nodeCountA, nodeCountB);
|
||||
if (minNodes < 5)
|
||||
{
|
||||
return ComparisonConfidence.Low;
|
||||
}
|
||||
|
||||
if (similarity > 0.9m && equivalenceCount > minNodes * 0.7)
|
||||
{
|
||||
return ComparisonConfidence.VeryHigh;
|
||||
}
|
||||
|
||||
if (similarity > 0.7m && equivalenceCount > minNodes * 0.5)
|
||||
{
|
||||
return ComparisonConfidence.High;
|
||||
}
|
||||
|
||||
if (similarity > 0.5m)
|
||||
{
|
||||
return ComparisonConfidence.Medium;
|
||||
}
|
||||
|
||||
return ComparisonConfidence.Low;
|
||||
}
|
||||
}
|
||||
@@ -1,18 +1,15 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
|
||||
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.BinaryIndex.Ghidra;
|
||||
using System.Text.Json;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
/// <summary>
|
||||
/// Adapter for Ghidra's decompiler via headless analysis.
|
||||
/// </summary>
|
||||
public sealed class GhidraDecompilerAdapter : IDecompilerService
|
||||
public sealed partial class GhidraDecompilerAdapter : IDecompilerService
|
||||
{
|
||||
private readonly IGhidraService _ghidraService;
|
||||
private readonly IDecompiledCodeParser _parser;
|
||||
@@ -33,260 +30,4 @@ public sealed class GhidraDecompilerAdapter : IDecompilerService
|
||||
_options = options.Value;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DecompiledFunction> DecompileAsync(
|
||||
GhidraFunction function,
|
||||
DecompileOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(function);
|
||||
|
||||
options ??= new DecompileOptions();
|
||||
|
||||
_logger.LogDebug(
|
||||
"Decompiling function {Name} at 0x{Address:X}",
|
||||
function.Name,
|
||||
function.Address);
|
||||
|
||||
// The GhidraFunction should already have decompiled code from analysis
|
||||
var code = function.DecompiledCode;
|
||||
|
||||
if (string.IsNullOrEmpty(code))
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Function {Name} has no decompiled code, returning stub",
|
||||
function.Name);
|
||||
|
||||
return new DecompiledFunction(
|
||||
function.Name,
|
||||
BuildSignature(function),
|
||||
"/* Decompilation unavailable */",
|
||||
null,
|
||||
[],
|
||||
[],
|
||||
function.Address,
|
||||
function.Size);
|
||||
}
|
||||
|
||||
// Truncate if too long
|
||||
if (code.Length > options.MaxCodeLength)
|
||||
{
|
||||
code = code[..options.MaxCodeLength] + "\n/* ... truncated ... */";
|
||||
}
|
||||
|
||||
// Parse to AST
|
||||
DecompiledAst? ast = null;
|
||||
try
|
||||
{
|
||||
ast = _parser.Parse(code);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to parse decompiled code for {Name}", function.Name);
|
||||
}
|
||||
|
||||
// Extract metadata
|
||||
var locals = _parser.ExtractVariables(code);
|
||||
var calledFunctions = _parser.ExtractCalledFunctions(code);
|
||||
|
||||
return new DecompiledFunction(
|
||||
function.Name,
|
||||
BuildSignature(function),
|
||||
code,
|
||||
ast,
|
||||
locals,
|
||||
calledFunctions,
|
||||
function.Address,
|
||||
function.Size);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DecompiledFunction> DecompileAtAddressAsync(
|
||||
string binaryPath,
|
||||
ulong address,
|
||||
DecompileOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrEmpty(binaryPath);
|
||||
|
||||
options ??= new DecompileOptions();
|
||||
|
||||
_logger.LogDebug(
|
||||
"Decompiling function at 0x{Address:X} in {Binary}",
|
||||
address,
|
||||
Path.GetFileName(binaryPath));
|
||||
|
||||
// Use Ghidra to analyze and get the function
|
||||
using var stream = File.OpenRead(binaryPath);
|
||||
var analysis = await _ghidraService.AnalyzeAsync(
|
||||
stream,
|
||||
new GhidraAnalysisOptions
|
||||
{
|
||||
IncludeDecompilation = true,
|
||||
ExtractDecompilation = true
|
||||
},
|
||||
ct);
|
||||
|
||||
var function = analysis.Functions.FirstOrDefault(f => f.Address == address);
|
||||
|
||||
if (function is null)
|
||||
{
|
||||
throw new InvalidOperationException($"No function found at address 0x{address:X}");
|
||||
}
|
||||
|
||||
return await DecompileAsync(function, options, ct);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<DecompiledAst> ParseToAstAsync(
|
||||
string decompiledCode,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrEmpty(decompiledCode);
|
||||
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
var ast = _parser.Parse(decompiledCode);
|
||||
return Task.FromResult(ast);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<DecompiledComparisonResult> CompareAsync(
|
||||
DecompiledFunction a,
|
||||
DecompiledFunction b,
|
||||
ComparisonOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(a);
|
||||
ArgumentNullException.ThrowIfNull(b);
|
||||
|
||||
options ??= new ComparisonOptions();
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
_logger.LogDebug(
|
||||
"Comparing functions {A} and {B}",
|
||||
a.FunctionName,
|
||||
b.FunctionName);
|
||||
|
||||
// Need ASTs for comparison
|
||||
if (a.Ast is null || b.Ast is null)
|
||||
{
|
||||
_logger.LogWarning("Cannot compare functions without ASTs");
|
||||
|
||||
return Task.FromResult(new DecompiledComparisonResult(
|
||||
Similarity: 0,
|
||||
StructuralSimilarity: 0,
|
||||
SemanticSimilarity: 0,
|
||||
EditDistance: new AstEditDistance(0, 0, 0, 0, 1.0m),
|
||||
Equivalences: [],
|
||||
Differences: [],
|
||||
Confidence: ComparisonConfidence.Low));
|
||||
}
|
||||
|
||||
// Compute structural similarity
|
||||
var structuralSimilarity = _comparisonEngine.ComputeStructuralSimilarity(a.Ast, b.Ast);
|
||||
|
||||
// Compute edit distance
|
||||
var editDistance = _comparisonEngine.ComputeEditDistance(a.Ast, b.Ast);
|
||||
|
||||
// Find semantic equivalences
|
||||
var equivalences = _comparisonEngine.FindEquivalences(a.Ast, b.Ast);
|
||||
|
||||
// Find differences
|
||||
var differences = _comparisonEngine.FindDifferences(a.Ast, b.Ast);
|
||||
|
||||
// Compute semantic similarity from equivalences
|
||||
var totalNodes = Math.Max(a.Ast.NodeCount, b.Ast.NodeCount);
|
||||
var equivalentNodes = equivalences.Length;
|
||||
var semanticSimilarity = totalNodes > 0
|
||||
? (decimal)equivalentNodes / totalNodes
|
||||
: 0m;
|
||||
|
||||
// Combine into overall similarity
|
||||
var overallSimilarity = ComputeOverallSimilarity(
|
||||
structuralSimilarity,
|
||||
semanticSimilarity,
|
||||
editDistance.NormalizedDistance);
|
||||
|
||||
// Determine confidence
|
||||
var confidence = DetermineConfidence(
|
||||
overallSimilarity,
|
||||
a.Ast.NodeCount,
|
||||
b.Ast.NodeCount,
|
||||
equivalences.Length);
|
||||
|
||||
return Task.FromResult(new DecompiledComparisonResult(
|
||||
Similarity: overallSimilarity,
|
||||
StructuralSimilarity: structuralSimilarity,
|
||||
SemanticSimilarity: semanticSimilarity,
|
||||
EditDistance: editDistance,
|
||||
Equivalences: equivalences,
|
||||
Differences: differences,
|
||||
Confidence: confidence));
|
||||
}
|
||||
|
||||
private static string BuildSignature(GhidraFunction function)
|
||||
{
|
||||
// Use the signature from Ghidra if available, otherwise construct a simple one
|
||||
if (!string.IsNullOrEmpty(function.Signature))
|
||||
{
|
||||
return function.Signature;
|
||||
}
|
||||
|
||||
// Default signature if none available
|
||||
return $"void {function.Name}(void)";
|
||||
}
|
||||
|
||||
private static decimal ComputeOverallSimilarity(
|
||||
decimal structural,
|
||||
decimal semantic,
|
||||
decimal normalizedEditDistance)
|
||||
{
|
||||
// Weight: 40% structural, 40% semantic, 20% edit distance (inverted)
|
||||
var editSimilarity = 1.0m - normalizedEditDistance;
|
||||
return structural * 0.4m + semantic * 0.4m + editSimilarity * 0.2m;
|
||||
}
|
||||
|
||||
private static ComparisonConfidence DetermineConfidence(
|
||||
decimal similarity,
|
||||
int nodeCountA,
|
||||
int nodeCountB,
|
||||
int equivalenceCount)
|
||||
{
|
||||
// Very small functions are harder to compare confidently
|
||||
var minNodes = Math.Min(nodeCountA, nodeCountB);
|
||||
if (minNodes < 5)
|
||||
{
|
||||
return ComparisonConfidence.Low;
|
||||
}
|
||||
|
||||
// High similarity with many equivalences = high confidence
|
||||
if (similarity > 0.9m && equivalenceCount > minNodes * 0.7)
|
||||
{
|
||||
return ComparisonConfidence.VeryHigh;
|
||||
}
|
||||
|
||||
if (similarity > 0.7m && equivalenceCount > minNodes * 0.5)
|
||||
{
|
||||
return ComparisonConfidence.High;
|
||||
}
|
||||
|
||||
if (similarity > 0.5m)
|
||||
{
|
||||
return ComparisonConfidence.Medium;
|
||||
}
|
||||
|
||||
return ComparisonConfidence.Low;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Options for the decompiler adapter.
|
||||
/// </summary>
|
||||
public sealed class DecompilerOptions
|
||||
{
|
||||
public string GhidraScriptsPath { get; set; } = "/scripts";
|
||||
public TimeSpan DefaultTimeout { get; set; } = TimeSpan.FromSeconds(30);
|
||||
public int MaxCodeLength { get; set; } = 100_000;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
/// <summary>
|
||||
/// Engine for comparing AST structures.
|
||||
/// </summary>
|
||||
public interface IAstComparisonEngine
|
||||
{
|
||||
/// <summary>
|
||||
/// Compute structural similarity between ASTs.
|
||||
/// </summary>
|
||||
/// <param name="a">First AST.</param>
|
||||
/// <param name="b">Second AST.</param>
|
||||
/// <returns>Similarity score (0.0 to 1.0).</returns>
|
||||
decimal ComputeStructuralSimilarity(DecompiledAst a, DecompiledAst b);
|
||||
|
||||
/// <summary>
|
||||
/// Compute edit distance between ASTs.
|
||||
/// </summary>
|
||||
/// <param name="a">First AST.</param>
|
||||
/// <param name="b">Second AST.</param>
|
||||
/// <returns>Edit distance metrics.</returns>
|
||||
AstEditDistance ComputeEditDistance(DecompiledAst a, DecompiledAst b);
|
||||
|
||||
/// <summary>
|
||||
/// Find semantic equivalences between ASTs.
|
||||
/// </summary>
|
||||
/// <param name="a">First AST.</param>
|
||||
/// <param name="b">Second AST.</param>
|
||||
/// <returns>List of equivalent node pairs.</returns>
|
||||
ImmutableArray<SemanticEquivalence> FindEquivalences(DecompiledAst a, DecompiledAst b);
|
||||
|
||||
/// <summary>
|
||||
/// Find differences between ASTs.
|
||||
/// </summary>
|
||||
/// <param name="a">First AST.</param>
|
||||
/// <param name="b">Second AST.</param>
|
||||
/// <returns>List of differences.</returns>
|
||||
ImmutableArray<CodeDifference> FindDifferences(DecompiledAst a, DecompiledAst b);
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes decompiled code for comparison.
|
||||
/// </summary>
|
||||
public interface ICodeNormalizer
|
||||
{
|
||||
/// <summary>
|
||||
/// Normalize decompiled code for comparison.
|
||||
/// </summary>
|
||||
/// <param name="code">Raw decompiled code.</param>
|
||||
/// <param name="options">Normalization options.</param>
|
||||
/// <returns>Normalized code.</returns>
|
||||
string Normalize(string code, NormalizationOptions? options = null);
|
||||
|
||||
/// <summary>
|
||||
/// Compute canonical hash of normalized code.
|
||||
/// </summary>
|
||||
/// <param name="code">Decompiled code.</param>
|
||||
/// <returns>32-byte hash.</returns>
|
||||
byte[] ComputeCanonicalHash(string code);
|
||||
|
||||
/// <summary>
|
||||
/// Normalize an AST for comparison.
|
||||
/// </summary>
|
||||
/// <param name="ast">AST to normalize.</param>
|
||||
/// <param name="options">Normalization options.</param>
|
||||
/// <returns>Normalized AST.</returns>
|
||||
DecompiledAst NormalizeAst(DecompiledAst ast, NormalizationOptions? options = null);
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
/// <summary>
|
||||
/// Parses decompiled C-like code into AST.
|
||||
/// </summary>
|
||||
public interface IDecompiledCodeParser
|
||||
{
|
||||
/// <summary>
|
||||
/// Parse decompiled code into AST.
|
||||
/// </summary>
|
||||
/// <param name="code">C-like pseudo-code.</param>
|
||||
/// <returns>Parsed AST.</returns>
|
||||
DecompiledAst Parse(string code);
|
||||
|
||||
/// <summary>
|
||||
/// Extract local variables from decompiled code.
|
||||
/// </summary>
|
||||
/// <param name="code">C-like pseudo-code.</param>
|
||||
/// <returns>List of local variables.</returns>
|
||||
ImmutableArray<LocalVariable> ExtractVariables(string code);
|
||||
|
||||
/// <summary>
|
||||
/// Extract called functions from decompiled code.
|
||||
/// </summary>
|
||||
/// <param name="code">C-like pseudo-code.</param>
|
||||
/// <returns>List of function names called.</returns>
|
||||
ImmutableArray<string> ExtractCalledFunctions(string code);
|
||||
}
|
||||
@@ -3,7 +3,6 @@
|
||||
|
||||
|
||||
using StellaOps.BinaryIndex.Ghidra;
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
@@ -62,97 +61,3 @@ public interface IDecompilerService
|
||||
ComparisonOptions? options = null,
|
||||
CancellationToken ct = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Engine for comparing AST structures.
|
||||
/// </summary>
|
||||
public interface IAstComparisonEngine
|
||||
{
|
||||
/// <summary>
|
||||
/// Compute structural similarity between ASTs.
|
||||
/// </summary>
|
||||
/// <param name="a">First AST.</param>
|
||||
/// <param name="b">Second AST.</param>
|
||||
/// <returns>Similarity score (0.0 to 1.0).</returns>
|
||||
decimal ComputeStructuralSimilarity(DecompiledAst a, DecompiledAst b);
|
||||
|
||||
/// <summary>
|
||||
/// Compute edit distance between ASTs.
|
||||
/// </summary>
|
||||
/// <param name="a">First AST.</param>
|
||||
/// <param name="b">Second AST.</param>
|
||||
/// <returns>Edit distance metrics.</returns>
|
||||
AstEditDistance ComputeEditDistance(DecompiledAst a, DecompiledAst b);
|
||||
|
||||
/// <summary>
|
||||
/// Find semantic equivalences between ASTs.
|
||||
/// </summary>
|
||||
/// <param name="a">First AST.</param>
|
||||
/// <param name="b">Second AST.</param>
|
||||
/// <returns>List of equivalent node pairs.</returns>
|
||||
ImmutableArray<SemanticEquivalence> FindEquivalences(DecompiledAst a, DecompiledAst b);
|
||||
|
||||
/// <summary>
|
||||
/// Find differences between ASTs.
|
||||
/// </summary>
|
||||
/// <param name="a">First AST.</param>
|
||||
/// <param name="b">Second AST.</param>
|
||||
/// <returns>List of differences.</returns>
|
||||
ImmutableArray<CodeDifference> FindDifferences(DecompiledAst a, DecompiledAst b);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes decompiled code for comparison.
|
||||
/// </summary>
|
||||
public interface ICodeNormalizer
|
||||
{
|
||||
/// <summary>
|
||||
/// Normalize decompiled code for comparison.
|
||||
/// </summary>
|
||||
/// <param name="code">Raw decompiled code.</param>
|
||||
/// <param name="options">Normalization options.</param>
|
||||
/// <returns>Normalized code.</returns>
|
||||
string Normalize(string code, NormalizationOptions? options = null);
|
||||
|
||||
/// <summary>
|
||||
/// Compute canonical hash of normalized code.
|
||||
/// </summary>
|
||||
/// <param name="code">Decompiled code.</param>
|
||||
/// <returns>32-byte hash.</returns>
|
||||
byte[] ComputeCanonicalHash(string code);
|
||||
|
||||
/// <summary>
|
||||
/// Normalize an AST for comparison.
|
||||
/// </summary>
|
||||
/// <param name="ast">AST to normalize.</param>
|
||||
/// <param name="options">Normalization options.</param>
|
||||
/// <returns>Normalized AST.</returns>
|
||||
DecompiledAst NormalizeAst(DecompiledAst ast, NormalizationOptions? options = null);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses decompiled C-like code into AST.
|
||||
/// </summary>
|
||||
public interface IDecompiledCodeParser
|
||||
{
|
||||
/// <summary>
|
||||
/// Parse decompiled code into AST.
|
||||
/// </summary>
|
||||
/// <param name="code">C-like pseudo-code.</param>
|
||||
/// <returns>Parsed AST.</returns>
|
||||
DecompiledAst Parse(string code);
|
||||
|
||||
/// <summary>
|
||||
/// Extract local variables from decompiled code.
|
||||
/// </summary>
|
||||
/// <param name="code">C-like pseudo-code.</param>
|
||||
/// <returns>List of local variables.</returns>
|
||||
ImmutableArray<LocalVariable> ExtractVariables(string code);
|
||||
|
||||
/// <summary>
|
||||
/// Extract called functions from decompiled code.
|
||||
/// </summary>
|
||||
/// <param name="code">C-like pseudo-code.</param>
|
||||
/// <returns>List of function names called.</returns>
|
||||
ImmutableArray<string> ExtractCalledFunctions(string code);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
/// <summary>
|
||||
/// A local variable in decompiled code.
|
||||
/// </summary>
|
||||
public sealed record LocalVariable(
|
||||
string Name,
|
||||
string Type,
|
||||
int StackOffset,
|
||||
bool IsParameter,
|
||||
int? ParameterIndex);
|
||||
@@ -1,377 +0,0 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
/// <summary>
|
||||
/// A function decompiled to C-like pseudo-code.
|
||||
/// </summary>
|
||||
public sealed record DecompiledFunction(
|
||||
string FunctionName,
|
||||
string Signature,
|
||||
string Code,
|
||||
DecompiledAst? Ast,
|
||||
ImmutableArray<LocalVariable> Locals,
|
||||
ImmutableArray<string> CalledFunctions,
|
||||
ulong Address,
|
||||
int SizeBytes);
|
||||
|
||||
/// <summary>
|
||||
/// AST representation of decompiled code.
|
||||
/// </summary>
|
||||
public sealed record DecompiledAst(
|
||||
AstNode Root,
|
||||
int NodeCount,
|
||||
int Depth,
|
||||
ImmutableArray<AstPattern> Patterns);
|
||||
|
||||
/// <summary>
|
||||
/// Abstract syntax tree node.
|
||||
/// </summary>
|
||||
public abstract record AstNode(
|
||||
AstNodeType Type,
|
||||
ImmutableArray<AstNode> Children,
|
||||
SourceLocation? Location);
|
||||
|
||||
/// <summary>
|
||||
/// Types of AST nodes.
|
||||
/// </summary>
|
||||
public enum AstNodeType
|
||||
{
|
||||
// Structure
|
||||
Function,
|
||||
Block,
|
||||
Parameter,
|
||||
|
||||
// Control flow
|
||||
If,
|
||||
While,
|
||||
For,
|
||||
DoWhile,
|
||||
Switch,
|
||||
Case,
|
||||
Default,
|
||||
Return,
|
||||
Break,
|
||||
Continue,
|
||||
Goto,
|
||||
Label,
|
||||
|
||||
// Expressions
|
||||
Assignment,
|
||||
BinaryOp,
|
||||
UnaryOp,
|
||||
TernaryOp,
|
||||
Call,
|
||||
Cast,
|
||||
Sizeof,
|
||||
|
||||
// Operands
|
||||
Variable,
|
||||
Constant,
|
||||
StringLiteral,
|
||||
ArrayAccess,
|
||||
FieldAccess,
|
||||
PointerDeref,
|
||||
AddressOf,
|
||||
|
||||
// Declarations
|
||||
VariableDecl,
|
||||
TypeDef
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Source location in decompiled code.
|
||||
/// </summary>
|
||||
public sealed record SourceLocation(int Line, int Column, int Length);
|
||||
|
||||
/// <summary>
|
||||
/// A local variable in decompiled code.
|
||||
/// </summary>
|
||||
public sealed record LocalVariable(
|
||||
string Name,
|
||||
string Type,
|
||||
int StackOffset,
|
||||
bool IsParameter,
|
||||
int? ParameterIndex);
|
||||
|
||||
/// <summary>
|
||||
/// A recognized code pattern.
|
||||
/// </summary>
|
||||
public sealed record AstPattern(
|
||||
PatternType Type,
|
||||
AstNode Node,
|
||||
PatternMetadata? Metadata);
|
||||
|
||||
/// <summary>
|
||||
/// Types of code patterns.
|
||||
/// </summary>
|
||||
public enum PatternType
|
||||
{
|
||||
// Loops
|
||||
CountedLoop,
|
||||
ConditionalLoop,
|
||||
InfiniteLoop,
|
||||
LoopUnrolled,
|
||||
|
||||
// Branches
|
||||
IfElseChain,
|
||||
SwitchTable,
|
||||
ShortCircuit,
|
||||
|
||||
// Memory
|
||||
MemoryAllocation,
|
||||
MemoryDeallocation,
|
||||
BufferOperation,
|
||||
StackBuffer,
|
||||
|
||||
// Error handling
|
||||
ErrorCheck,
|
||||
NullCheck,
|
||||
BoundsCheck,
|
||||
|
||||
// Idioms
|
||||
StringOperation,
|
||||
MathOperation,
|
||||
BitwiseOperation,
|
||||
TableLookup
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Metadata about a recognized pattern.
|
||||
/// </summary>
|
||||
public sealed record PatternMetadata(
|
||||
string Description,
|
||||
decimal Confidence,
|
||||
ImmutableDictionary<string, string>? Properties);
|
||||
|
||||
/// <summary>
|
||||
/// Result of comparing two decompiled functions.
|
||||
/// </summary>
|
||||
public sealed record DecompiledComparisonResult(
|
||||
decimal Similarity,
|
||||
decimal StructuralSimilarity,
|
||||
decimal SemanticSimilarity,
|
||||
AstEditDistance EditDistance,
|
||||
ImmutableArray<SemanticEquivalence> Equivalences,
|
||||
ImmutableArray<CodeDifference> Differences,
|
||||
ComparisonConfidence Confidence);
|
||||
|
||||
/// <summary>
|
||||
/// Edit distance between ASTs.
|
||||
/// </summary>
|
||||
public sealed record AstEditDistance(
|
||||
int Insertions,
|
||||
int Deletions,
|
||||
int Modifications,
|
||||
int TotalOperations,
|
||||
decimal NormalizedDistance);
|
||||
|
||||
/// <summary>
|
||||
/// A semantic equivalence between AST nodes.
|
||||
/// </summary>
|
||||
public sealed record SemanticEquivalence(
|
||||
AstNode NodeA,
|
||||
AstNode NodeB,
|
||||
EquivalenceType Type,
|
||||
decimal Confidence,
|
||||
string? Explanation);
|
||||
|
||||
/// <summary>
|
||||
/// Types of semantic equivalence.
|
||||
/// </summary>
|
||||
public enum EquivalenceType
|
||||
{
|
||||
Identical,
|
||||
Renamed,
|
||||
Reordered,
|
||||
Optimized,
|
||||
Inlined,
|
||||
Semantically
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// A difference between two pieces of code.
|
||||
/// </summary>
|
||||
public sealed record CodeDifference(
|
||||
DifferenceType Type,
|
||||
AstNode? NodeA,
|
||||
AstNode? NodeB,
|
||||
string Description);
|
||||
|
||||
/// <summary>
|
||||
/// Types of code differences.
|
||||
/// </summary>
|
||||
public enum DifferenceType
|
||||
{
|
||||
Added,
|
||||
Removed,
|
||||
Modified,
|
||||
Reordered,
|
||||
TypeChanged,
|
||||
OptimizationVariant
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Confidence level for comparison results.
|
||||
/// </summary>
|
||||
public enum ComparisonConfidence
|
||||
{
|
||||
Low,
|
||||
Medium,
|
||||
High,
|
||||
VeryHigh
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Options for decompilation.
|
||||
/// </summary>
|
||||
public sealed record DecompileOptions
|
||||
{
|
||||
public bool SimplifyCode { get; init; } = true;
|
||||
public bool RecoverTypes { get; init; } = true;
|
||||
public bool RecoverStructs { get; init; } = true;
|
||||
public int MaxCodeLength { get; init; } = 100_000;
|
||||
public TimeSpan Timeout { get; init; } = TimeSpan.FromSeconds(30);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Options for AST comparison.
|
||||
/// </summary>
|
||||
public sealed record ComparisonOptions
|
||||
{
|
||||
public bool IgnoreVariableNames { get; init; } = true;
|
||||
public bool IgnoreConstants { get; init; } = false;
|
||||
public bool DetectOptimizations { get; init; } = true;
|
||||
public decimal MinSimilarityThreshold { get; init; } = 0.5m;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Options for code normalization.
|
||||
/// </summary>
|
||||
public sealed record NormalizationOptions
|
||||
{
|
||||
public bool NormalizeVariables { get; init; } = true;
|
||||
public bool NormalizeFunctionCalls { get; init; } = true;
|
||||
public bool NormalizeConstants { get; init; } = false;
|
||||
public bool NormalizeWhitespace { get; init; } = true;
|
||||
public bool SortIndependentStatements { get; init; } = false;
|
||||
public ImmutableHashSet<string>? KnownFunctions { get; init; }
|
||||
|
||||
public static NormalizationOptions Default { get; } = new();
|
||||
}
|
||||
|
||||
#region Concrete AST Node Types
|
||||
|
||||
public sealed record FunctionNode(
|
||||
string Name,
|
||||
string ReturnType,
|
||||
ImmutableArray<ParameterNode> Parameters,
|
||||
BlockNode Body,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.Function, [Body, .. Parameters], Location);
|
||||
|
||||
public sealed record ParameterNode(
|
||||
string Name,
|
||||
string DataType,
|
||||
int Index,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.Parameter, [], Location);
|
||||
|
||||
public sealed record BlockNode(
|
||||
ImmutableArray<AstNode> Statements,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.Block, Statements, Location);
|
||||
|
||||
public sealed record IfNode(
|
||||
AstNode Condition,
|
||||
AstNode ThenBranch,
|
||||
AstNode? ElseBranch,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.If, ElseBranch is null ? [Condition, ThenBranch] : [Condition, ThenBranch, ElseBranch], Location);
|
||||
|
||||
public sealed record WhileNode(
|
||||
AstNode Condition,
|
||||
AstNode Body,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.While, [Condition, Body], Location);
|
||||
|
||||
public sealed record ForNode(
|
||||
AstNode? Init,
|
||||
AstNode? Condition,
|
||||
AstNode? Update,
|
||||
AstNode Body,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.For, [Init ?? EmptyNode.Instance, Condition ?? EmptyNode.Instance, Update ?? EmptyNode.Instance, Body], Location);
|
||||
|
||||
public sealed record ReturnNode(
|
||||
AstNode? Value,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.Return, Value is null ? [] : [Value], Location);
|
||||
|
||||
public sealed record AssignmentNode(
|
||||
AstNode Target,
|
||||
AstNode Value,
|
||||
string Operator,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.Assignment, [Target, Value], Location);
|
||||
|
||||
public sealed record BinaryOpNode(
|
||||
AstNode Left,
|
||||
AstNode Right,
|
||||
string Operator,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.BinaryOp, [Left, Right], Location);
|
||||
|
||||
public sealed record UnaryOpNode(
|
||||
AstNode Operand,
|
||||
string Operator,
|
||||
bool IsPrefix,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.UnaryOp, [Operand], Location);
|
||||
|
||||
public sealed record CallNode(
|
||||
string FunctionName,
|
||||
ImmutableArray<AstNode> Arguments,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.Call, Arguments, Location);
|
||||
|
||||
public sealed record VariableNode(
|
||||
string Name,
|
||||
string? DataType,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.Variable, [], Location);
|
||||
|
||||
public sealed record ConstantNode(
|
||||
object Value,
|
||||
string DataType,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.Constant, [], Location);
|
||||
|
||||
public sealed record ArrayAccessNode(
|
||||
AstNode Array,
|
||||
AstNode Index,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.ArrayAccess, [Array, Index], Location);
|
||||
|
||||
public sealed record FieldAccessNode(
|
||||
AstNode Object,
|
||||
string FieldName,
|
||||
bool IsPointer,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.FieldAccess, [Object], Location);
|
||||
|
||||
public sealed record CastNode(
|
||||
AstNode Expression,
|
||||
string TargetType,
|
||||
SourceLocation? Location = null)
|
||||
: AstNode(AstNodeType.Cast, [Expression], Location);
|
||||
|
||||
public sealed record EmptyNode() : AstNode(AstNodeType.Block, [], null)
|
||||
{
|
||||
public static EmptyNode Instance { get; } = new();
|
||||
}
|
||||
|
||||
#endregion
|
||||
@@ -0,0 +1,20 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
/// <summary>
|
||||
/// Options for code normalization.
|
||||
/// </summary>
|
||||
public sealed record NormalizationOptions
|
||||
{
|
||||
public bool NormalizeVariables { get; init; } = true;
|
||||
public bool NormalizeFunctionCalls { get; init; } = true;
|
||||
public bool NormalizeConstants { get; init; } = false;
|
||||
public bool NormalizeWhitespace { get; init; } = true;
|
||||
public bool SortIndependentStatements { get; init; } = false;
|
||||
public ImmutableHashSet<string>? KnownFunctions { get; init; }
|
||||
|
||||
public static NormalizationOptions Default { get; } = new();
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
internal sealed partial class RecursiveParser
|
||||
{
|
||||
private IfNode ParseIf()
|
||||
{
|
||||
Advance();
|
||||
Expect(TokenType.Bracket, "(");
|
||||
var condition = ParseExpression();
|
||||
Expect(TokenType.Bracket, ")");
|
||||
|
||||
var thenBranch = ParseStatement() ?? new BlockNode([]);
|
||||
AstNode? elseBranch = null;
|
||||
if (Peek().Value == "else")
|
||||
{
|
||||
Advance();
|
||||
elseBranch = ParseStatement();
|
||||
}
|
||||
|
||||
return new IfNode(condition, thenBranch, elseBranch);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
internal sealed partial class RecursiveParser
|
||||
{
|
||||
private WhileNode ParseWhile()
|
||||
{
|
||||
Advance();
|
||||
Expect(TokenType.Bracket, "(");
|
||||
var condition = ParseExpression();
|
||||
Expect(TokenType.Bracket, ")");
|
||||
|
||||
var body = ParseStatement() ?? new BlockNode([]);
|
||||
|
||||
return new WhileNode(condition, body);
|
||||
}
|
||||
|
||||
private ForNode ParseFor()
|
||||
{
|
||||
Advance();
|
||||
Expect(TokenType.Bracket, "(");
|
||||
|
||||
AstNode? init = null;
|
||||
if (Peek().Value != ";")
|
||||
{
|
||||
init = ParseExpression();
|
||||
}
|
||||
Expect(TokenType.Punctuation, ";");
|
||||
|
||||
AstNode? condition = null;
|
||||
if (Peek().Value != ";")
|
||||
{
|
||||
condition = ParseExpression();
|
||||
}
|
||||
Expect(TokenType.Punctuation, ";");
|
||||
|
||||
AstNode? update = null;
|
||||
if (Peek().Value != ")")
|
||||
{
|
||||
update = ParseExpression();
|
||||
}
|
||||
Expect(TokenType.Bracket, ")");
|
||||
|
||||
var body = ParseStatement() ?? new BlockNode([]);
|
||||
|
||||
return new ForNode(init, condition, update, body);
|
||||
}
|
||||
|
||||
private AstNode ParseDoWhile()
|
||||
{
|
||||
Advance();
|
||||
var body = ParseStatement() ?? new BlockNode([]);
|
||||
|
||||
Expect(TokenType.Keyword, "while");
|
||||
Expect(TokenType.Bracket, "(");
|
||||
var condition = ParseExpression();
|
||||
Expect(TokenType.Bracket, ")");
|
||||
Expect(TokenType.Punctuation, ";");
|
||||
|
||||
return new WhileNode(condition, body);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
internal sealed partial class RecursiveParser
|
||||
{
|
||||
private ReturnNode ParseReturn()
|
||||
{
|
||||
Advance();
|
||||
|
||||
AstNode? value = null;
|
||||
if (Peek().Value != ";")
|
||||
{
|
||||
value = ParseExpression();
|
||||
}
|
||||
Expect(TokenType.Punctuation, ";");
|
||||
|
||||
return new ReturnNode(value);
|
||||
}
|
||||
|
||||
private AstNode ParseBreak()
|
||||
{
|
||||
Advance();
|
||||
Expect(TokenType.Punctuation, ";");
|
||||
return new BlockNode([]);
|
||||
}
|
||||
|
||||
private AstNode ParseContinue()
|
||||
{
|
||||
Advance();
|
||||
Expect(TokenType.Punctuation, ";");
|
||||
return new BlockNode([]);
|
||||
}
|
||||
|
||||
private AstNode? SkipSemicolon()
|
||||
{
|
||||
Advance();
|
||||
return null;
|
||||
}
|
||||
|
||||
private AstNode? ParseExpressionStatement()
|
||||
{
|
||||
var expr = ParseExpression();
|
||||
if (Peek().Value == ";")
|
||||
{
|
||||
Advance();
|
||||
}
|
||||
return expr;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
internal sealed partial class RecursiveParser
|
||||
{
|
||||
private AstNode ParseShift()
|
||||
{
|
||||
var left = ParseAdditive();
|
||||
|
||||
while (Peek().Value is "<<" or ">>")
|
||||
{
|
||||
var op = Advance().Value;
|
||||
var right = ParseAdditive();
|
||||
left = new BinaryOpNode(left, right, op);
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
private AstNode ParseAdditive()
|
||||
{
|
||||
var left = ParseMultiplicative();
|
||||
|
||||
while (Peek().Value is "+" or "-")
|
||||
{
|
||||
var op = Advance().Value;
|
||||
var right = ParseMultiplicative();
|
||||
left = new BinaryOpNode(left, right, op);
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
private AstNode ParseMultiplicative()
|
||||
{
|
||||
var left = ParseUnary();
|
||||
|
||||
while (Peek().Value is "*" or "/" or "%")
|
||||
{
|
||||
var op = Advance().Value;
|
||||
var right = ParseUnary();
|
||||
left = new BinaryOpNode(left, right, op);
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
internal sealed partial class RecursiveParser
|
||||
{
|
||||
private AstNode ParseLogicalOr()
|
||||
{
|
||||
var left = ParseLogicalAnd();
|
||||
|
||||
while (Peek().Value == "||")
|
||||
{
|
||||
var op = Advance().Value;
|
||||
var right = ParseLogicalAnd();
|
||||
left = new BinaryOpNode(left, right, op);
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
private AstNode ParseLogicalAnd()
|
||||
{
|
||||
var left = ParseBitwiseOr();
|
||||
|
||||
while (Peek().Value == "&&")
|
||||
{
|
||||
var op = Advance().Value;
|
||||
var right = ParseBitwiseOr();
|
||||
left = new BinaryOpNode(left, right, op);
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
private AstNode ParseBitwiseOr()
|
||||
{
|
||||
var left = ParseComparison();
|
||||
|
||||
while (Peek().Value is "|" or "^" or "&")
|
||||
{
|
||||
var op = Advance().Value;
|
||||
var right = ParseComparison();
|
||||
left = new BinaryOpNode(left, right, op);
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
private AstNode ParseComparison()
|
||||
{
|
||||
var left = ParseShift();
|
||||
|
||||
while (Peek().Value is "==" or "!=" or "<" or ">" or "<=" or ">=")
|
||||
{
|
||||
var op = Advance().Value;
|
||||
var right = ParseShift();
|
||||
left = new BinaryOpNode(left, right, op);
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
internal sealed partial class RecursiveParser
|
||||
{
|
||||
private ImmutableArray<AstNode> ParseArgumentList()
|
||||
{
|
||||
var args = new List<AstNode>();
|
||||
|
||||
if (Peek().Value == ")")
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
if (Peek().Value == ",")
|
||||
{
|
||||
Advance();
|
||||
}
|
||||
args.Add(ParseExpression());
|
||||
}
|
||||
while (Peek().Value == ",");
|
||||
|
||||
return [.. args];
|
||||
}
|
||||
|
||||
private AstNode ParsePrimary()
|
||||
{
|
||||
var token = Peek();
|
||||
|
||||
if (token.Type == TokenType.Number)
|
||||
{
|
||||
Advance();
|
||||
return new ConstantNode(token.Value, "int");
|
||||
}
|
||||
|
||||
if (token.Type == TokenType.String)
|
||||
{
|
||||
Advance();
|
||||
return new ConstantNode(token.Value, "char*");
|
||||
}
|
||||
|
||||
if (token.Type == TokenType.Char)
|
||||
{
|
||||
Advance();
|
||||
return new ConstantNode(token.Value, "char");
|
||||
}
|
||||
|
||||
if (token.Type == TokenType.Identifier)
|
||||
{
|
||||
Advance();
|
||||
return new VariableNode(token.Value, null);
|
||||
}
|
||||
|
||||
if (token.Value == "(")
|
||||
{
|
||||
Advance();
|
||||
|
||||
if (IsType(Peek().Value))
|
||||
{
|
||||
var targetType = ParseType();
|
||||
Expect(TokenType.Bracket, ")");
|
||||
var expr = ParseUnary();
|
||||
return new CastNode(expr, targetType);
|
||||
}
|
||||
|
||||
var inner = ParseExpression();
|
||||
Expect(TokenType.Bracket, ")");
|
||||
return inner;
|
||||
}
|
||||
|
||||
if (token.Value == "sizeof")
|
||||
{
|
||||
Advance();
|
||||
Expect(TokenType.Bracket, "(");
|
||||
var type = ParseType();
|
||||
Expect(TokenType.Bracket, ")");
|
||||
return new ConstantNode($"sizeof({type})", "size_t");
|
||||
}
|
||||
|
||||
Advance();
|
||||
return new ConstantNode(token.Value, "unknown");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
internal sealed partial class RecursiveParser
|
||||
{
|
||||
private AstNode ParseUnary()
|
||||
{
|
||||
if (Peek().Value is "!" or "~" or "-" or "+" or "*" or "&" or "++" or "--")
|
||||
{
|
||||
var op = Advance().Value;
|
||||
var operand = ParseUnary();
|
||||
return new UnaryOpNode(operand, op, true);
|
||||
}
|
||||
|
||||
return ParsePostfix();
|
||||
}
|
||||
|
||||
private AstNode ParsePostfix()
|
||||
{
|
||||
var expr = ParsePrimary();
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (Peek().Value == "(")
|
||||
{
|
||||
Advance();
|
||||
var args = ParseArgumentList();
|
||||
Expect(TokenType.Bracket, ")");
|
||||
|
||||
if (expr is VariableNode varNode)
|
||||
{
|
||||
expr = new CallNode(varNode.Name, args);
|
||||
}
|
||||
}
|
||||
else if (Peek().Value == "[")
|
||||
{
|
||||
Advance();
|
||||
var index = ParseExpression();
|
||||
Expect(TokenType.Bracket, "]");
|
||||
expr = new ArrayAccessNode(expr, index);
|
||||
}
|
||||
else if (Peek().Value is "." or "->")
|
||||
{
|
||||
var isPointer = Advance().Value == "->";
|
||||
var field = Expect(TokenType.Identifier).Value;
|
||||
expr = new FieldAccessNode(expr, field, isPointer);
|
||||
}
|
||||
else if (Peek().Value is "++" or "--")
|
||||
{
|
||||
var op = Advance().Value;
|
||||
expr = new UnaryOpNode(expr, op, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return expr;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
internal sealed partial class RecursiveParser
|
||||
{
|
||||
private AstNode ParseExpression()
|
||||
{
|
||||
return ParseAssignment();
|
||||
}
|
||||
|
||||
private AstNode ParseAssignment()
|
||||
{
|
||||
var left = ParseLogicalOr();
|
||||
|
||||
if (Peek().Value is "=" or "+=" or "-=" or "*=" or "/=" or "&=" or "|=" or "^=" or "<<=" or ">>=")
|
||||
{
|
||||
var op = Advance().Value;
|
||||
var right = ParseAssignment();
|
||||
return new AssignmentNode(left, right, op);
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
internal sealed partial class RecursiveParser
|
||||
{
|
||||
private static bool IsType(string value)
|
||||
{
|
||||
return value is "int" or "char" or "void" or "long" or "short" or "float" or "double"
|
||||
or "unsigned" or "signed" or "const" or "struct" or "union" or "enum"
|
||||
or "undefined" or "undefined1" or "undefined2" or "undefined4" or "undefined8"
|
||||
or "byte" or "word" or "dword" or "qword" or "pointer" or "code" or "uint" or "ulong";
|
||||
}
|
||||
|
||||
private Token Peek() => _pos < _tokens.Count ? _tokens[_pos] : new Token(TokenType.Punctuation, "", 0, 0);
|
||||
|
||||
private Token PeekAhead(int offset) => _pos + offset < _tokens.Count
|
||||
? _tokens[_pos + offset]
|
||||
: new Token(TokenType.Punctuation, "", 0, 0);
|
||||
|
||||
private Token Advance() => _pos < _tokens.Count ? _tokens[_pos++] : new Token(TokenType.Punctuation, "", 0, 0);
|
||||
|
||||
private Token Expect(TokenType type, string? value = null)
|
||||
{
|
||||
var token = Peek();
|
||||
if (token.Type != type || (value is not null && token.Value != value))
|
||||
{
|
||||
return Advance();
|
||||
}
|
||||
return Advance();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,124 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
internal sealed partial class RecursiveParser
|
||||
{
|
||||
private readonly List<Token> _tokens;
|
||||
private int _pos;
|
||||
|
||||
public RecursiveParser(List<Token> tokens)
|
||||
{
|
||||
_tokens = tokens;
|
||||
_pos = 0;
|
||||
}
|
||||
|
||||
public AstNode ParseFunction()
|
||||
{
|
||||
var returnType = ParseType();
|
||||
var name = Expect(TokenType.Identifier).Value;
|
||||
|
||||
Expect(TokenType.Bracket, "(");
|
||||
var parameters = ParseParameterList();
|
||||
Expect(TokenType.Bracket, ")");
|
||||
|
||||
var body = ParseBlock();
|
||||
|
||||
return new FunctionNode(name, returnType, parameters, body);
|
||||
}
|
||||
|
||||
private string ParseType()
|
||||
{
|
||||
var type = new System.Text.StringBuilder();
|
||||
|
||||
while (Peek().Value is "const" or "unsigned" or "signed" or "static" or "extern")
|
||||
{
|
||||
type.Append(Advance().Value);
|
||||
type.Append(' ');
|
||||
}
|
||||
|
||||
type.Append(Advance().Value);
|
||||
|
||||
while (Peek().Value == "*")
|
||||
{
|
||||
type.Append(Advance().Value);
|
||||
}
|
||||
|
||||
return type.ToString().Trim();
|
||||
}
|
||||
|
||||
private ImmutableArray<ParameterNode> ParseParameterList()
|
||||
{
|
||||
var parameters = new List<ParameterNode>();
|
||||
var index = 0;
|
||||
|
||||
if (Peek().Value == ")")
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
if (Peek().Value == "void" && PeekAhead(1).Value == ")")
|
||||
{
|
||||
Advance();
|
||||
return [];
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
if (Peek().Value == ",")
|
||||
{
|
||||
Advance();
|
||||
}
|
||||
|
||||
var type = ParseType();
|
||||
var name = Peek().Type == TokenType.Identifier ? Advance().Value : $"param_{index}";
|
||||
|
||||
parameters.Add(new ParameterNode(name, type, index));
|
||||
index++;
|
||||
}
|
||||
while (Peek().Value == ",");
|
||||
|
||||
return [.. parameters];
|
||||
}
|
||||
|
||||
private BlockNode ParseBlock()
|
||||
{
|
||||
Expect(TokenType.Bracket, "{");
|
||||
|
||||
var statements = new List<AstNode>();
|
||||
|
||||
while (Peek().Value != "}")
|
||||
{
|
||||
var stmt = ParseStatement();
|
||||
if (stmt is not null)
|
||||
{
|
||||
statements.Add(stmt);
|
||||
}
|
||||
}
|
||||
|
||||
Expect(TokenType.Bracket, "}");
|
||||
|
||||
return new BlockNode([.. statements]);
|
||||
}
|
||||
|
||||
private AstNode? ParseStatement()
|
||||
{
|
||||
var token = Peek();
|
||||
|
||||
return token.Value switch
|
||||
{
|
||||
"if" => ParseIf(),
|
||||
"while" => ParseWhile(),
|
||||
"for" => ParseFor(),
|
||||
"do" => ParseDoWhile(),
|
||||
"return" => ParseReturn(),
|
||||
"break" => ParseBreak(),
|
||||
"continue" => ParseContinue(),
|
||||
"{" => ParseBlock(),
|
||||
";" => SkipSemicolon(),
|
||||
_ => ParseExpressionStatement()
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
/// <summary>
|
||||
/// A semantic equivalence between AST nodes.
|
||||
/// </summary>
|
||||
public sealed record SemanticEquivalence(
|
||||
AstNode NodeA,
|
||||
AstNode NodeB,
|
||||
EquivalenceType Type,
|
||||
decimal Confidence,
|
||||
string? Explanation);
|
||||
|
||||
/// <summary>
|
||||
/// Types of semantic equivalence.
|
||||
/// </summary>
|
||||
public enum EquivalenceType
|
||||
{
|
||||
Identical,
|
||||
Renamed,
|
||||
Reordered,
|
||||
Optimized,
|
||||
Inlined,
|
||||
Semantically
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
/// <summary>
|
||||
/// Source location in decompiled code.
|
||||
/// </summary>
|
||||
public sealed record SourceLocation(int Line, int Column, int Length);
|
||||
@@ -4,5 +4,5 @@ Source of truth: `docs/implplan/SPRINT_20260130_002_Tools_csproj_remediation_sol
|
||||
|
||||
| Task ID | Status | Notes |
|
||||
| --- | --- | --- |
|
||||
| REMED-05 | TODO | Remediation checklist: docs/implplan/audits/csproj-standards/remediation/checklists/src/BinaryIndex/__Libraries/StellaOps.BinaryIndex.Decompiler/StellaOps.BinaryIndex.Decompiler.md. |
|
||||
| REMED-05 | DONE | Remediation checklist: docs/implplan/audits/csproj-standards/remediation/checklists/src/BinaryIndex/__Libraries/StellaOps.BinaryIndex.Decompiler/StellaOps.BinaryIndex.Decompiler.md. |
|
||||
| REMED-06 | DONE | SOLID review notes captured for SPRINT_20260130_002. |
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
internal readonly record struct Token(TokenType Type, string Value, int Line, int Column);
|
||||
@@ -0,0 +1,15 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Decompiler;
|
||||
|
||||
internal enum TokenType
|
||||
{
|
||||
Identifier,
|
||||
Keyword,
|
||||
Number,
|
||||
String,
|
||||
Char,
|
||||
Operator,
|
||||
Bracket,
|
||||
Punctuation
|
||||
}
|
||||
@@ -71,70 +71,3 @@ public interface IDisassemblyPlugin
|
||||
/// <returns>Enumerable of disassembled instructions.</returns>
|
||||
IEnumerable<DisassembledInstruction> DisassembleSymbol(BinaryInfo binary, SymbolInfo symbol);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Registry for disassembly plugins. Manages plugin discovery and selection.
|
||||
/// </summary>
|
||||
public interface IDisassemblyPluginRegistry
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets all registered plugins.
|
||||
/// </summary>
|
||||
IReadOnlyList<IDisassemblyPlugin> Plugins { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Finds the best plugin for the given architecture and format.
|
||||
/// </summary>
|
||||
/// <param name="architecture">Target CPU architecture.</param>
|
||||
/// <param name="format">Target binary format.</param>
|
||||
/// <returns>The best matching plugin, or null if none found.</returns>
|
||||
IDisassemblyPlugin? FindPlugin(CpuArchitecture architecture, BinaryFormat format);
|
||||
|
||||
/// <summary>
|
||||
/// Finds all plugins that support the given architecture.
|
||||
/// </summary>
|
||||
/// <param name="architecture">Target CPU architecture.</param>
|
||||
/// <returns>All matching plugins ordered by priority.</returns>
|
||||
IEnumerable<IDisassemblyPlugin> FindPluginsForArchitecture(CpuArchitecture architecture);
|
||||
|
||||
/// <summary>
|
||||
/// Finds all plugins that support the given format.
|
||||
/// </summary>
|
||||
/// <param name="format">Target binary format.</param>
|
||||
/// <returns>All matching plugins ordered by priority.</returns>
|
||||
IEnumerable<IDisassemblyPlugin> FindPluginsForFormat(BinaryFormat format);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a plugin by its unique identifier.
|
||||
/// </summary>
|
||||
/// <param name="pluginId">The plugin identifier.</param>
|
||||
/// <returns>The plugin if found, null otherwise.</returns>
|
||||
IDisassemblyPlugin? GetPlugin(string pluginId);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Facade service for disassembly operations. Automatically selects the best plugin.
|
||||
/// </summary>
|
||||
public interface IDisassemblyService
|
||||
{
|
||||
/// <summary>
|
||||
/// Loads a binary and automatically selects the best plugin.
|
||||
/// </summary>
|
||||
/// <param name="stream">The binary stream to load.</param>
|
||||
/// <param name="preferredPluginId">Optional preferred plugin ID.</param>
|
||||
/// <returns>Binary information and the plugin used.</returns>
|
||||
(BinaryInfo Binary, IDisassemblyPlugin Plugin) LoadBinary(Stream stream, string? preferredPluginId = null);
|
||||
|
||||
/// <summary>
|
||||
/// Loads a binary from bytes and automatically selects the best plugin.
|
||||
/// </summary>
|
||||
/// <param name="bytes">The binary data.</param>
|
||||
/// <param name="preferredPluginId">Optional preferred plugin ID.</param>
|
||||
/// <returns>Binary information and the plugin used.</returns>
|
||||
(BinaryInfo Binary, IDisassemblyPlugin Plugin) LoadBinary(ReadOnlySpan<byte> bytes, string? preferredPluginId = null);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the plugin registry.
|
||||
/// </summary>
|
||||
IDisassemblyPluginRegistry Registry { get; }
|
||||
}
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Disassembly;
|
||||
|
||||
/// <summary>
|
||||
/// Registry for disassembly plugins. Manages plugin discovery and selection.
|
||||
/// </summary>
|
||||
public interface IDisassemblyPluginRegistry
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets all registered plugins.
|
||||
/// </summary>
|
||||
IReadOnlyList<IDisassemblyPlugin> Plugins { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Finds the best plugin for the given architecture and format.
|
||||
/// </summary>
|
||||
/// <param name="architecture">Target CPU architecture.</param>
|
||||
/// <param name="format">Target binary format.</param>
|
||||
/// <returns>The best matching plugin, or null if none found.</returns>
|
||||
IDisassemblyPlugin? FindPlugin(CpuArchitecture architecture, BinaryFormat format);
|
||||
|
||||
/// <summary>
|
||||
/// Finds all plugins that support the given architecture.
|
||||
/// </summary>
|
||||
/// <param name="architecture">Target CPU architecture.</param>
|
||||
/// <returns>All matching plugins ordered by priority.</returns>
|
||||
IEnumerable<IDisassemblyPlugin> FindPluginsForArchitecture(CpuArchitecture architecture);
|
||||
|
||||
/// <summary>
|
||||
/// Finds all plugins that support the given format.
|
||||
/// </summary>
|
||||
/// <param name="format">Target binary format.</param>
|
||||
/// <returns>All matching plugins ordered by priority.</returns>
|
||||
IEnumerable<IDisassemblyPlugin> FindPluginsForFormat(BinaryFormat format);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a plugin by its unique identifier.
|
||||
/// </summary>
|
||||
/// <param name="pluginId">The plugin identifier.</param>
|
||||
/// <returns>The plugin if found, null otherwise.</returns>
|
||||
IDisassemblyPlugin? GetPlugin(string pluginId);
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Disassembly;
|
||||
|
||||
/// <summary>
|
||||
/// Facade service for disassembly operations. Automatically selects the best plugin.
|
||||
/// </summary>
|
||||
public interface IDisassemblyService
|
||||
{
|
||||
/// <summary>
|
||||
/// Loads a binary and automatically selects the best plugin.
|
||||
/// </summary>
|
||||
/// <param name="stream">The binary stream to load.</param>
|
||||
/// <param name="preferredPluginId">Optional preferred plugin ID.</param>
|
||||
/// <returns>Binary information and the plugin used.</returns>
|
||||
(BinaryInfo Binary, IDisassemblyPlugin Plugin) LoadBinary(Stream stream, string? preferredPluginId = null);
|
||||
|
||||
/// <summary>
|
||||
/// Loads a binary from bytes and automatically selects the best plugin.
|
||||
/// </summary>
|
||||
/// <param name="bytes">The binary data.</param>
|
||||
/// <param name="preferredPluginId">Optional preferred plugin ID.</param>
|
||||
/// <returns>Binary information and the plugin used.</returns>
|
||||
(BinaryInfo Binary, IDisassemblyPlugin Plugin) LoadBinary(ReadOnlySpan<byte> bytes, string? preferredPluginId = null);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the plugin registry.
|
||||
/// </summary>
|
||||
IDisassemblyPluginRegistry Registry { get; }
|
||||
}
|
||||
@@ -1,348 +0,0 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly;
|
||||
|
||||
/// <summary>
|
||||
/// CPU architecture identifier.
|
||||
/// </summary>
|
||||
public enum CpuArchitecture
|
||||
{
|
||||
/// <summary>Unknown architecture.</summary>
|
||||
Unknown = 0,
|
||||
|
||||
/// <summary>Intel/AMD 32-bit x86.</summary>
|
||||
X86 = 1,
|
||||
|
||||
/// <summary>Intel/AMD 64-bit x86-64 (amd64).</summary>
|
||||
X86_64 = 2,
|
||||
|
||||
/// <summary>ARM 32-bit (ARMv7).</summary>
|
||||
ARM32 = 3,
|
||||
|
||||
/// <summary>ARM 64-bit (AArch64/ARMv8).</summary>
|
||||
ARM64 = 4,
|
||||
|
||||
/// <summary>MIPS 32-bit.</summary>
|
||||
MIPS32 = 5,
|
||||
|
||||
/// <summary>MIPS 64-bit.</summary>
|
||||
MIPS64 = 6,
|
||||
|
||||
/// <summary>RISC-V 64-bit.</summary>
|
||||
RISCV64 = 7,
|
||||
|
||||
/// <summary>PowerPC 32-bit.</summary>
|
||||
PPC32 = 8,
|
||||
|
||||
/// <summary>PowerPC 64-bit.</summary>
|
||||
PPC64 = 9,
|
||||
|
||||
/// <summary>SPARC.</summary>
|
||||
SPARC = 10,
|
||||
|
||||
/// <summary>SuperH SH4.</summary>
|
||||
SH4 = 11,
|
||||
|
||||
/// <summary>AVR microcontroller.</summary>
|
||||
AVR = 12,
|
||||
|
||||
/// <summary>Ethereum Virtual Machine.</summary>
|
||||
EVM = 13,
|
||||
|
||||
/// <summary>WebAssembly.</summary>
|
||||
WASM = 14
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Binary executable format.
|
||||
/// </summary>
|
||||
public enum BinaryFormat
|
||||
{
|
||||
/// <summary>Unknown format.</summary>
|
||||
Unknown = 0,
|
||||
|
||||
/// <summary>Raw binary data (no format metadata).</summary>
|
||||
Raw = 1,
|
||||
|
||||
/// <summary>Executable and Linkable Format (Linux, BSD, etc.).</summary>
|
||||
ELF = 2,
|
||||
|
||||
/// <summary>Portable Executable (Windows).</summary>
|
||||
PE = 3,
|
||||
|
||||
/// <summary>Mach-O (macOS, iOS).</summary>
|
||||
MachO = 4,
|
||||
|
||||
/// <summary>WebAssembly module.</summary>
|
||||
WASM = 5
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Describes the capabilities of a disassembly plugin.
|
||||
/// </summary>
|
||||
public sealed record DisassemblyCapabilities
|
||||
{
|
||||
/// <summary>
|
||||
/// The unique identifier of the plugin.
|
||||
/// </summary>
|
||||
public required string PluginId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Display name of the disassembly engine.
|
||||
/// </summary>
|
||||
public required string Name { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Version of the underlying disassembly library.
|
||||
/// </summary>
|
||||
public required string Version { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Supported CPU architectures.
|
||||
/// </summary>
|
||||
public required ImmutableHashSet<CpuArchitecture> SupportedArchitectures { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Supported binary formats.
|
||||
/// </summary>
|
||||
public required ImmutableHashSet<BinaryFormat> SupportedFormats { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether the plugin supports lifting to intermediate representation.
|
||||
/// </summary>
|
||||
public bool SupportsLifting { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether the plugin supports control flow graph recovery.
|
||||
/// </summary>
|
||||
public bool SupportsCfgRecovery { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Priority for plugin selection when multiple plugins support the same arch/format.
|
||||
/// Higher values indicate higher priority.
|
||||
/// </summary>
|
||||
public int Priority { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Checks if this plugin supports the given architecture.
|
||||
/// </summary>
|
||||
public bool SupportsArchitecture(CpuArchitecture arch) =>
|
||||
SupportedArchitectures.Contains(arch);
|
||||
|
||||
/// <summary>
|
||||
/// Checks if this plugin supports the given format.
|
||||
/// </summary>
|
||||
public bool SupportsFormat(BinaryFormat format) =>
|
||||
SupportedFormats.Contains(format);
|
||||
|
||||
/// <summary>
|
||||
/// Checks if this plugin can handle the given architecture and format combination.
|
||||
/// </summary>
|
||||
public bool CanHandle(CpuArchitecture arch, BinaryFormat format) =>
|
||||
SupportsArchitecture(arch) && SupportsFormat(format);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Information about a loaded binary.
|
||||
/// </summary>
|
||||
/// <param name="Format">Binary format: ELF, PE, MachO, etc.</param>
|
||||
/// <param name="Architecture">CPU architecture.</param>
|
||||
/// <param name="Bitness">32 or 64 bit.</param>
|
||||
/// <param name="Endianness">Byte order.</param>
|
||||
/// <param name="Abi">Application binary interface hint (gnu, musl, msvc, darwin).</param>
|
||||
/// <param name="EntryPoint">Entry point address if available.</param>
|
||||
/// <param name="BuildId">Build identifier if present (e.g., GNU build-id).</param>
|
||||
/// <param name="Metadata">Additional metadata from the binary.</param>
|
||||
/// <param name="Handle">Internal handle for the disassembly engine (engine-specific).</param>
|
||||
public sealed record BinaryInfo(
|
||||
BinaryFormat Format,
|
||||
CpuArchitecture Architecture,
|
||||
int Bitness,
|
||||
Endianness Endianness,
|
||||
string? Abi,
|
||||
ulong? EntryPoint,
|
||||
string? BuildId,
|
||||
IReadOnlyDictionary<string, object> Metadata,
|
||||
object Handle);
|
||||
|
||||
/// <summary>
|
||||
/// Byte order.
|
||||
/// </summary>
|
||||
public enum Endianness
|
||||
{
|
||||
/// <summary>Little-endian (LSB first).</summary>
|
||||
Little,
|
||||
/// <summary>Big-endian (MSB first).</summary>
|
||||
Big
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a code region (section) in a binary.
|
||||
/// </summary>
|
||||
/// <param name="Name">Section name: .text, .rodata, etc.</param>
|
||||
/// <param name="VirtualAddress">Virtual address in memory.</param>
|
||||
/// <param name="FileOffset">Offset in the binary file.</param>
|
||||
/// <param name="Size">Size in bytes.</param>
|
||||
/// <param name="IsExecutable">Whether the region contains executable code.</param>
|
||||
/// <param name="IsReadable">Whether the region is readable.</param>
|
||||
/// <param name="IsWritable">Whether the region is writable.</param>
|
||||
public sealed record CodeRegion(
|
||||
string Name,
|
||||
ulong VirtualAddress,
|
||||
ulong FileOffset,
|
||||
ulong Size,
|
||||
bool IsExecutable,
|
||||
bool IsReadable,
|
||||
bool IsWritable);
|
||||
|
||||
/// <summary>
|
||||
/// Information about a symbol in the binary.
|
||||
/// </summary>
|
||||
/// <param name="Name">Symbol name.</param>
|
||||
/// <param name="Address">Virtual address of the symbol.</param>
|
||||
/// <param name="Size">Size in bytes (0 if unknown).</param>
|
||||
/// <param name="Type">Symbol type.</param>
|
||||
/// <param name="Binding">Symbol binding.</param>
|
||||
/// <param name="Section">Section containing the symbol.</param>
|
||||
public sealed record SymbolInfo(
|
||||
string Name,
|
||||
ulong Address,
|
||||
ulong Size,
|
||||
SymbolType Type,
|
||||
SymbolBinding Binding,
|
||||
string? Section);
|
||||
|
||||
/// <summary>
|
||||
/// Type of symbol.
|
||||
/// </summary>
|
||||
public enum SymbolType
|
||||
{
|
||||
/// <summary>Unknown or unspecified type.</summary>
|
||||
Unknown,
|
||||
/// <summary>Function/procedure.</summary>
|
||||
Function,
|
||||
/// <summary>Data object.</summary>
|
||||
Object,
|
||||
/// <summary>Section symbol.</summary>
|
||||
Section,
|
||||
/// <summary>Source file name.</summary>
|
||||
File,
|
||||
/// <summary>Common block symbol.</summary>
|
||||
Common,
|
||||
/// <summary>Thread-local storage.</summary>
|
||||
Tls
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Symbol binding/visibility.
|
||||
/// </summary>
|
||||
public enum SymbolBinding
|
||||
{
|
||||
/// <summary>Unknown binding.</summary>
|
||||
Unknown,
|
||||
/// <summary>Local symbol (not visible outside the object).</summary>
|
||||
Local,
|
||||
/// <summary>Global symbol (visible to other objects).</summary>
|
||||
Global,
|
||||
/// <summary>Weak symbol (can be overridden).</summary>
|
||||
Weak
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// A disassembled instruction.
|
||||
/// </summary>
|
||||
/// <param name="Address">Virtual address of the instruction.</param>
|
||||
/// <param name="RawBytes">Raw bytes of the instruction.</param>
|
||||
/// <param name="Mnemonic">Instruction mnemonic (e.g., MOV, ADD, JMP).</param>
|
||||
/// <param name="OperandsText">Text representation of operands.</param>
|
||||
/// <param name="Kind">Classification of the instruction.</param>
|
||||
/// <param name="Operands">Parsed operands.</param>
|
||||
public sealed record DisassembledInstruction(
|
||||
ulong Address,
|
||||
ImmutableArray<byte> RawBytes,
|
||||
string Mnemonic,
|
||||
string OperandsText,
|
||||
InstructionKind Kind,
|
||||
ImmutableArray<Operand> Operands);
|
||||
|
||||
/// <summary>
|
||||
/// Classification of instruction types.
|
||||
/// </summary>
|
||||
public enum InstructionKind
|
||||
{
|
||||
/// <summary>Unknown or unclassified instruction.</summary>
|
||||
Unknown,
|
||||
/// <summary>Arithmetic operation (ADD, SUB, MUL, DIV).</summary>
|
||||
Arithmetic,
|
||||
/// <summary>Logical operation (AND, OR, XOR, NOT).</summary>
|
||||
Logic,
|
||||
/// <summary>Data movement (MOV, PUSH, POP).</summary>
|
||||
Move,
|
||||
/// <summary>Memory load operation.</summary>
|
||||
Load,
|
||||
/// <summary>Memory store operation.</summary>
|
||||
Store,
|
||||
/// <summary>Unconditional branch (JMP).</summary>
|
||||
Branch,
|
||||
/// <summary>Conditional branch (JE, JNE, JL, etc.).</summary>
|
||||
ConditionalBranch,
|
||||
/// <summary>Function call.</summary>
|
||||
Call,
|
||||
/// <summary>Function return.</summary>
|
||||
Return,
|
||||
/// <summary>No operation.</summary>
|
||||
Nop,
|
||||
/// <summary>System call.</summary>
|
||||
Syscall,
|
||||
/// <summary>Software interrupt.</summary>
|
||||
Interrupt,
|
||||
/// <summary>Compare operation.</summary>
|
||||
Compare,
|
||||
/// <summary>Shift operation.</summary>
|
||||
Shift,
|
||||
/// <summary>Vector/SIMD operation.</summary>
|
||||
Vector,
|
||||
/// <summary>Floating point operation.</summary>
|
||||
FloatingPoint
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// An instruction operand.
|
||||
/// </summary>
|
||||
/// <param name="Type">Operand type.</param>
|
||||
/// <param name="Text">Text representation.</param>
|
||||
/// <param name="Value">Immediate value if applicable.</param>
|
||||
/// <param name="Register">Register name if applicable.</param>
|
||||
/// <param name="MemoryBase">Base register for memory operand.</param>
|
||||
/// <param name="MemoryIndex">Index register for memory operand.</param>
|
||||
/// <param name="MemoryScale">Scale factor for indexed memory operand.</param>
|
||||
/// <param name="MemoryDisplacement">Displacement for memory operand.</param>
|
||||
public sealed record Operand(
|
||||
OperandType Type,
|
||||
string Text,
|
||||
long? Value = null,
|
||||
string? Register = null,
|
||||
string? MemoryBase = null,
|
||||
string? MemoryIndex = null,
|
||||
int? MemoryScale = null,
|
||||
long? MemoryDisplacement = null);
|
||||
|
||||
/// <summary>
|
||||
/// Type of operand.
|
||||
/// </summary>
|
||||
public enum OperandType
|
||||
{
|
||||
/// <summary>Unknown operand type.</summary>
|
||||
Unknown,
|
||||
/// <summary>CPU register.</summary>
|
||||
Register,
|
||||
/// <summary>Immediate value.</summary>
|
||||
Immediate,
|
||||
/// <summary>Memory reference.</summary>
|
||||
Memory,
|
||||
/// <summary>Address/label.</summary>
|
||||
Address
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Disassembly;
|
||||
|
||||
/// <summary>
|
||||
/// Binary executable format.
|
||||
/// </summary>
|
||||
public enum BinaryFormat
|
||||
{
|
||||
/// <summary>Unknown format.</summary>
|
||||
Unknown = 0,
|
||||
/// <summary>Raw binary data (no format metadata).</summary>
|
||||
Raw = 1,
|
||||
/// <summary>Executable and Linkable Format (Linux, BSD, etc.).</summary>
|
||||
ELF = 2,
|
||||
/// <summary>Portable Executable (Windows).</summary>
|
||||
PE = 3,
|
||||
/// <summary>Mach-O (macOS, iOS).</summary>
|
||||
MachO = 4,
|
||||
/// <summary>WebAssembly module.</summary>
|
||||
WASM = 5
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Disassembly;
|
||||
|
||||
/// <summary>
|
||||
/// Information about a loaded binary.
|
||||
/// </summary>
|
||||
/// <param name="Format">Binary format: ELF, PE, MachO, etc.</param>
|
||||
/// <param name="Architecture">CPU architecture.</param>
|
||||
/// <param name="Bitness">32 or 64 bit.</param>
|
||||
/// <param name="Endianness">Byte order.</param>
|
||||
/// <param name="Abi">Application binary interface hint (gnu, musl, msvc, darwin).</param>
|
||||
/// <param name="EntryPoint">Entry point address if available.</param>
|
||||
/// <param name="BuildId">Build identifier if present (e.g., GNU build-id).</param>
|
||||
/// <param name="Metadata">Additional metadata from the binary.</param>
|
||||
/// <param name="Handle">Internal handle for the disassembly engine (engine-specific).</param>
|
||||
public sealed record BinaryInfo(
|
||||
BinaryFormat Format,
|
||||
CpuArchitecture Architecture,
|
||||
int Bitness,
|
||||
Endianness Endianness,
|
||||
string? Abi,
|
||||
ulong? EntryPoint,
|
||||
string? BuildId,
|
||||
IReadOnlyDictionary<string, object> Metadata,
|
||||
object Handle);
|
||||
@@ -0,0 +1,22 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Disassembly;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a code region (section) in a binary.
|
||||
/// </summary>
|
||||
/// <param name="Name">Section name: .text, .rodata, etc.</param>
|
||||
/// <param name="VirtualAddress">Virtual address in memory.</param>
|
||||
/// <param name="FileOffset">Offset in the binary file.</param>
|
||||
/// <param name="Size">Size in bytes.</param>
|
||||
/// <param name="IsExecutable">Whether the region contains executable code.</param>
|
||||
/// <param name="IsReadable">Whether the region is readable.</param>
|
||||
/// <param name="IsWritable">Whether the region is writable.</param>
|
||||
public sealed record CodeRegion(
|
||||
string Name,
|
||||
ulong VirtualAddress,
|
||||
ulong FileOffset,
|
||||
ulong Size,
|
||||
bool IsExecutable,
|
||||
bool IsReadable,
|
||||
bool IsWritable);
|
||||
@@ -0,0 +1,40 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Disassembly;
|
||||
|
||||
/// <summary>
|
||||
/// CPU architecture identifier.
|
||||
/// </summary>
|
||||
public enum CpuArchitecture
|
||||
{
|
||||
/// <summary>Unknown architecture.</summary>
|
||||
Unknown = 0,
|
||||
/// <summary>Intel/AMD 32-bit x86.</summary>
|
||||
X86 = 1,
|
||||
/// <summary>Intel/AMD 64-bit x86-64 (amd64).</summary>
|
||||
X86_64 = 2,
|
||||
/// <summary>ARM 32-bit (ARMv7).</summary>
|
||||
ARM32 = 3,
|
||||
/// <summary>ARM 64-bit (AArch64/ARMv8).</summary>
|
||||
ARM64 = 4,
|
||||
/// <summary>MIPS 32-bit.</summary>
|
||||
MIPS32 = 5,
|
||||
/// <summary>MIPS 64-bit.</summary>
|
||||
MIPS64 = 6,
|
||||
/// <summary>RISC-V 64-bit.</summary>
|
||||
RISCV64 = 7,
|
||||
/// <summary>PowerPC 32-bit.</summary>
|
||||
PPC32 = 8,
|
||||
/// <summary>PowerPC 64-bit.</summary>
|
||||
PPC64 = 9,
|
||||
/// <summary>SPARC.</summary>
|
||||
SPARC = 10,
|
||||
/// <summary>SuperH SH4.</summary>
|
||||
SH4 = 11,
|
||||
/// <summary>AVR microcontroller.</summary>
|
||||
AVR = 12,
|
||||
/// <summary>Ethereum Virtual Machine.</summary>
|
||||
EVM = 13,
|
||||
/// <summary>WebAssembly.</summary>
|
||||
WASM = 14
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly;
|
||||
|
||||
/// <summary>
|
||||
/// A disassembled instruction.
|
||||
/// </summary>
|
||||
/// <param name="Address">Virtual address of the instruction.</param>
|
||||
/// <param name="RawBytes">Raw bytes of the instruction.</param>
|
||||
/// <param name="Mnemonic">Instruction mnemonic (e.g., MOV, ADD, JMP).</param>
|
||||
/// <param name="OperandsText">Text representation of operands.</param>
|
||||
/// <param name="Kind">Classification of the instruction.</param>
|
||||
/// <param name="Operands">Parsed operands.</param>
|
||||
public sealed record DisassembledInstruction(
|
||||
ulong Address,
|
||||
ImmutableArray<byte> RawBytes,
|
||||
string Mnemonic,
|
||||
string OperandsText,
|
||||
InstructionKind Kind,
|
||||
ImmutableArray<Operand> Operands);
|
||||
@@ -0,0 +1,70 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly;
|
||||
|
||||
/// <summary>
|
||||
/// Describes the capabilities of a disassembly plugin.
|
||||
/// </summary>
|
||||
public sealed record DisassemblyCapabilities
|
||||
{
|
||||
/// <summary>
|
||||
/// The unique identifier of the plugin.
|
||||
/// </summary>
|
||||
public required string PluginId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Display name of the disassembly engine.
|
||||
/// </summary>
|
||||
public required string Name { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Version of the underlying disassembly library.
|
||||
/// </summary>
|
||||
public required string Version { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Supported CPU architectures.
|
||||
/// </summary>
|
||||
public required ImmutableHashSet<CpuArchitecture> SupportedArchitectures { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Supported binary formats.
|
||||
/// </summary>
|
||||
public required ImmutableHashSet<BinaryFormat> SupportedFormats { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether the plugin supports lifting to intermediate representation.
|
||||
/// </summary>
|
||||
public bool SupportsLifting { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether the plugin supports control flow graph recovery.
|
||||
/// </summary>
|
||||
public bool SupportsCfgRecovery { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Priority for plugin selection when multiple plugins support the same arch/format.
|
||||
/// Higher values indicate higher priority.
|
||||
/// </summary>
|
||||
public int Priority { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Checks if this plugin supports the given architecture.
|
||||
/// </summary>
|
||||
public bool SupportsArchitecture(CpuArchitecture arch) =>
|
||||
SupportedArchitectures.Contains(arch);
|
||||
|
||||
/// <summary>
|
||||
/// Checks if this plugin supports the given format.
|
||||
/// </summary>
|
||||
public bool SupportsFormat(BinaryFormat format) =>
|
||||
SupportedFormats.Contains(format);
|
||||
|
||||
/// <summary>
|
||||
/// Checks if this plugin can handle the given architecture and format combination.
|
||||
/// </summary>
|
||||
public bool CanHandle(CpuArchitecture arch, BinaryFormat format) =>
|
||||
SupportsArchitecture(arch) && SupportsFormat(format);
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Disassembly;
|
||||
|
||||
/// <summary>
|
||||
/// Byte order.
|
||||
/// </summary>
|
||||
public enum Endianness
|
||||
{
|
||||
/// <summary>Little-endian (LSB first).</summary>
|
||||
Little,
|
||||
/// <summary>Big-endian (MSB first).</summary>
|
||||
Big
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Disassembly;
|
||||
|
||||
/// <summary>
|
||||
/// Classification of instruction types.
|
||||
/// </summary>
|
||||
public enum InstructionKind
|
||||
{
|
||||
/// <summary>Unknown or unclassified instruction.</summary>
|
||||
Unknown,
|
||||
/// <summary>Arithmetic operation (ADD, SUB, MUL, DIV).</summary>
|
||||
Arithmetic,
|
||||
/// <summary>Logical operation (AND, OR, XOR, NOT).</summary>
|
||||
Logic,
|
||||
/// <summary>Data movement (MOV, PUSH, POP).</summary>
|
||||
Move,
|
||||
/// <summary>Memory load operation.</summary>
|
||||
Load,
|
||||
/// <summary>Memory store operation.</summary>
|
||||
Store,
|
||||
/// <summary>Unconditional branch (JMP).</summary>
|
||||
Branch,
|
||||
/// <summary>Conditional branch (JE, JNE, JL, etc.).</summary>
|
||||
ConditionalBranch,
|
||||
/// <summary>Function call.</summary>
|
||||
Call,
|
||||
/// <summary>Function return.</summary>
|
||||
Return,
|
||||
/// <summary>No operation.</summary>
|
||||
Nop,
|
||||
/// <summary>System call.</summary>
|
||||
Syscall,
|
||||
/// <summary>Software interrupt.</summary>
|
||||
Interrupt,
|
||||
/// <summary>Compare operation.</summary>
|
||||
Compare,
|
||||
/// <summary>Shift operation.</summary>
|
||||
Shift,
|
||||
/// <summary>Vector/SIMD operation.</summary>
|
||||
Vector,
|
||||
/// <summary>Floating point operation.</summary>
|
||||
FloatingPoint
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Disassembly;
|
||||
|
||||
/// <summary>
|
||||
/// An instruction operand.
|
||||
/// </summary>
|
||||
/// <param name="Type">Operand type.</param>
|
||||
/// <param name="Text">Text representation.</param>
|
||||
/// <param name="Value">Immediate value if applicable.</param>
|
||||
/// <param name="Register">Register name if applicable.</param>
|
||||
/// <param name="MemoryBase">Base register for memory operand.</param>
|
||||
/// <param name="MemoryIndex">Index register for memory operand.</param>
|
||||
/// <param name="MemoryScale">Scale factor for indexed memory operand.</param>
|
||||
/// <param name="MemoryDisplacement">Displacement for memory operand.</param>
|
||||
public sealed record Operand(
|
||||
OperandType Type,
|
||||
string Text,
|
||||
long? Value = null,
|
||||
string? Register = null,
|
||||
string? MemoryBase = null,
|
||||
string? MemoryIndex = null,
|
||||
int? MemoryScale = null,
|
||||
long? MemoryDisplacement = null);
|
||||
@@ -0,0 +1,20 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Disassembly;
|
||||
|
||||
/// <summary>
|
||||
/// Type of operand.
|
||||
/// </summary>
|
||||
public enum OperandType
|
||||
{
|
||||
/// <summary>Unknown operand type.</summary>
|
||||
Unknown,
|
||||
/// <summary>CPU register.</summary>
|
||||
Register,
|
||||
/// <summary>Immediate value.</summary>
|
||||
Immediate,
|
||||
/// <summary>Memory reference.</summary>
|
||||
Memory,
|
||||
/// <summary>Address/label.</summary>
|
||||
Address
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Disassembly;
|
||||
|
||||
/// <summary>
|
||||
/// Information about a symbol in the binary.
|
||||
/// </summary>
|
||||
/// <param name="Name">Symbol name.</param>
|
||||
/// <param name="Address">Virtual address of the symbol.</param>
|
||||
/// <param name="Size">Size in bytes (0 if unknown).</param>
|
||||
/// <param name="Type">Symbol type.</param>
|
||||
/// <param name="Binding">Symbol binding.</param>
|
||||
/// <param name="Section">Section containing the symbol.</param>
|
||||
public sealed record SymbolInfo(
|
||||
string Name,
|
||||
ulong Address,
|
||||
ulong Size,
|
||||
SymbolType Type,
|
||||
SymbolBinding Binding,
|
||||
string? Section);
|
||||
|
||||
/// <summary>
|
||||
/// Type of symbol.
|
||||
/// </summary>
|
||||
public enum SymbolType
|
||||
{
|
||||
/// <summary>Unknown or unspecified type.</summary>
|
||||
Unknown,
|
||||
/// <summary>Function/procedure.</summary>
|
||||
Function,
|
||||
/// <summary>Data object.</summary>
|
||||
Object,
|
||||
/// <summary>Section symbol.</summary>
|
||||
Section,
|
||||
/// <summary>Source file name.</summary>
|
||||
File,
|
||||
/// <summary>Common block symbol.</summary>
|
||||
Common,
|
||||
/// <summary>Thread-local storage.</summary>
|
||||
Tls
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Symbol binding/visibility.
|
||||
/// </summary>
|
||||
public enum SymbolBinding
|
||||
{
|
||||
/// <summary>Unknown binding.</summary>
|
||||
Unknown,
|
||||
/// <summary>Local symbol (not visible outside the object).</summary>
|
||||
Local,
|
||||
/// <summary>Global symbol (visible to other objects).</summary>
|
||||
Global,
|
||||
/// <summary>Weak symbol (can be overridden).</summary>
|
||||
Weak
|
||||
}
|
||||
@@ -4,5 +4,5 @@ Source of truth: `docs/implplan/SPRINT_20260130_002_Tools_csproj_remediation_sol
|
||||
|
||||
| Task ID | Status | Notes |
|
||||
| --- | --- | --- |
|
||||
| REMED-05 | TODO | Remediation checklist: docs/implplan/audits/csproj-standards/remediation/checklists/src/BinaryIndex/__Libraries/StellaOps.BinaryIndex.Disassembly.Abstractions/StellaOps.BinaryIndex.Disassembly.Abstractions.md. |
|
||||
| REMED-05 | DONE | Remediation checklist: docs/implplan/audits/csproj-standards/remediation/checklists/src/BinaryIndex/__Libraries/StellaOps.BinaryIndex.Disassembly.Abstractions/StellaOps.BinaryIndex.Disassembly.Abstractions.md (2026-02-04). |
|
||||
| REMED-06 | DONE | SOLID review notes captured for SPRINT_20260130_002. |
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using B2R2.FrontEnd;
|
||||
using B2R2.FrontEnd.BinFile;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
/// <summary>
|
||||
/// Internal handle for B2R2 binary data.
|
||||
/// </summary>
|
||||
internal sealed record B2R2BinaryHandle(BinHandle BinHandle, byte[] Bytes);
|
||||
@@ -0,0 +1,89 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using B2R2;
|
||||
using B2R2.FrontEnd.BinFile;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
public sealed partial class B2R2DisassemblyPlugin
|
||||
{
|
||||
private static ISA MapToB2R2Isa(CpuArchitecture arch)
|
||||
{
|
||||
return arch switch
|
||||
{
|
||||
CpuArchitecture.X86 => new ISA(Architecture.Intel, WordSize.Bit32),
|
||||
CpuArchitecture.X86_64 => new ISA(Architecture.Intel, WordSize.Bit64),
|
||||
CpuArchitecture.ARM32 => new ISA(Architecture.ARMv7, WordSize.Bit32),
|
||||
CpuArchitecture.ARM64 => new ISA(Architecture.ARMv8, WordSize.Bit64),
|
||||
CpuArchitecture.MIPS32 => new ISA(Architecture.MIPS, WordSize.Bit32),
|
||||
CpuArchitecture.MIPS64 => new ISA(Architecture.MIPS, WordSize.Bit64),
|
||||
CpuArchitecture.RISCV64 => new ISA(Architecture.RISCV, WordSize.Bit64),
|
||||
CpuArchitecture.PPC32 => new ISA(Architecture.PPC, Endian.Big, WordSize.Bit32),
|
||||
CpuArchitecture.SPARC => new ISA(Architecture.SPARC, Endian.Big),
|
||||
CpuArchitecture.SH4 => new ISA(Architecture.SH4),
|
||||
CpuArchitecture.AVR => new ISA(Architecture.AVR),
|
||||
CpuArchitecture.EVM => new ISA(Architecture.EVM, Endian.Big),
|
||||
_ => new ISA(Architecture.Intel, WordSize.Bit64)
|
||||
};
|
||||
}
|
||||
|
||||
private static CpuArchitecture MapFromB2R2Architecture(ISA isa)
|
||||
{
|
||||
return isa.Arch switch
|
||||
{
|
||||
Architecture.Intel when isa.WordSize == WordSize.Bit32 => CpuArchitecture.X86,
|
||||
Architecture.Intel when isa.WordSize == WordSize.Bit64 => CpuArchitecture.X86_64,
|
||||
Architecture.Intel => isa.IsX86 ? CpuArchitecture.X86 : CpuArchitecture.X86_64,
|
||||
Architecture.ARMv7 => CpuArchitecture.ARM32,
|
||||
Architecture.ARMv8 when isa.WordSize == WordSize.Bit64 => CpuArchitecture.ARM64,
|
||||
Architecture.ARMv8 => CpuArchitecture.ARM32,
|
||||
Architecture.MIPS when isa.WordSize == WordSize.Bit64 => CpuArchitecture.MIPS64,
|
||||
Architecture.MIPS => CpuArchitecture.MIPS32,
|
||||
Architecture.RISCV => CpuArchitecture.RISCV64,
|
||||
Architecture.PPC => CpuArchitecture.PPC32,
|
||||
Architecture.SPARC => CpuArchitecture.SPARC,
|
||||
Architecture.SH4 => CpuArchitecture.SH4,
|
||||
Architecture.AVR => CpuArchitecture.AVR,
|
||||
Architecture.EVM => CpuArchitecture.EVM,
|
||||
_ => CpuArchitecture.Unknown
|
||||
};
|
||||
}
|
||||
|
||||
private static BinaryFormat MapFromB2R2Format(FileFormat format)
|
||||
{
|
||||
return format switch
|
||||
{
|
||||
FileFormat.ELFBinary => BinaryFormat.ELF,
|
||||
FileFormat.PEBinary => BinaryFormat.PE,
|
||||
FileFormat.MachBinary => BinaryFormat.MachO,
|
||||
FileFormat.WasmBinary => BinaryFormat.WASM,
|
||||
FileFormat.RawBinary => BinaryFormat.Raw,
|
||||
_ => BinaryFormat.Unknown
|
||||
};
|
||||
}
|
||||
|
||||
private static int GetBitness(WordSize wordSize)
|
||||
{
|
||||
return wordSize switch
|
||||
{
|
||||
WordSize.Bit8 => 8,
|
||||
WordSize.Bit16 => 16,
|
||||
WordSize.Bit32 => 32,
|
||||
WordSize.Bit64 => 64,
|
||||
WordSize.Bit128 => 128,
|
||||
WordSize.Bit256 => 256,
|
||||
_ => 64
|
||||
};
|
||||
}
|
||||
|
||||
private static string? DetectAbi(BinaryFormat format)
|
||||
{
|
||||
return format switch
|
||||
{
|
||||
BinaryFormat.ELF => "gnu",
|
||||
BinaryFormat.PE => "msvc",
|
||||
BinaryFormat.MachO => "darwin",
|
||||
_ => null
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,84 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using B2R2.FrontEnd.BinLifter;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
public sealed partial class B2R2DisassemblyPlugin
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public IEnumerable<DisassembledInstruction> Disassemble(BinaryInfo binary, CodeRegion region)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(binary);
|
||||
ArgumentNullException.ThrowIfNull(region);
|
||||
|
||||
var handle = GetHandle(binary);
|
||||
var lifter = handle.BinHandle.NewLiftingUnit();
|
||||
|
||||
var addr = region.VirtualAddress;
|
||||
var endAddr = region.VirtualAddress + region.Size;
|
||||
|
||||
_logger.LogDebug(
|
||||
"Disassembling region {Name} from 0x{Start:X} to 0x{End:X}",
|
||||
region.Name, addr, endAddr);
|
||||
|
||||
while (addr < endAddr)
|
||||
{
|
||||
IInstruction? instr;
|
||||
try
|
||||
{
|
||||
instr = lifter.ParseInstruction(addr);
|
||||
}
|
||||
catch
|
||||
{
|
||||
addr++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (instr is null || instr.Length == 0)
|
||||
{
|
||||
addr++;
|
||||
continue;
|
||||
}
|
||||
|
||||
yield return MapInstruction(instr, handle, addr);
|
||||
addr += instr.Length;
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IEnumerable<DisassembledInstruction> Disassemble(BinaryInfo binary, ulong startAddress, ulong length)
|
||||
{
|
||||
var region = new CodeRegion(
|
||||
Name: $"0x{startAddress:X}",
|
||||
VirtualAddress: startAddress,
|
||||
FileOffset: startAddress,
|
||||
Size: length,
|
||||
IsExecutable: true,
|
||||
IsReadable: true,
|
||||
IsWritable: false);
|
||||
|
||||
return Disassemble(binary, region);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IEnumerable<DisassembledInstruction> DisassembleSymbol(BinaryInfo binary, SymbolInfo symbol)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(binary);
|
||||
ArgumentNullException.ThrowIfNull(symbol);
|
||||
|
||||
var size = symbol.Size > 0 ? symbol.Size : 4096UL;
|
||||
|
||||
var region = new CodeRegion(
|
||||
Name: symbol.Name,
|
||||
VirtualAddress: symbol.Address,
|
||||
FileOffset: symbol.Address,
|
||||
Size: size,
|
||||
IsExecutable: true,
|
||||
IsReadable: true,
|
||||
IsWritable: false);
|
||||
|
||||
return Disassemble(binary, region);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using B2R2.FrontEnd.BinLifter;
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
public sealed partial class B2R2DisassemblyPlugin
|
||||
{
|
||||
private static B2R2BinaryHandle GetHandle(BinaryInfo binary)
|
||||
{
|
||||
if (binary.Handle is not B2R2BinaryHandle handle)
|
||||
throw new ArgumentException("Invalid binary handle - not a B2R2 handle", nameof(binary));
|
||||
return handle;
|
||||
}
|
||||
|
||||
private static DisassembledInstruction MapInstruction(IInstruction instr, B2R2BinaryHandle handle, ulong address)
|
||||
{
|
||||
var disasm = instr.Disasm();
|
||||
|
||||
var parts = disasm.Split(' ', 2, StringSplitOptions.RemoveEmptyEntries);
|
||||
var mnemonic = parts.Length > 0 ? parts[0] : "???";
|
||||
var operandsText = parts.Length > 1 ? parts[1] : "";
|
||||
|
||||
var offset = (int)(address - handle.BinHandle.File.BaseAddress);
|
||||
var length = (int)instr.Length;
|
||||
var rawBytes = offset >= 0 && offset + length <= handle.Bytes.Length
|
||||
? handle.Bytes.AsSpan(offset, length).ToArray().ToImmutableArray()
|
||||
: ImmutableArray<byte>.Empty;
|
||||
|
||||
var kind = ClassifyInstruction(instr, mnemonic);
|
||||
var operands = ParseOperands(operandsText, mnemonic);
|
||||
|
||||
return new DisassembledInstruction(
|
||||
Address: address,
|
||||
RawBytes: rawBytes,
|
||||
Mnemonic: mnemonic,
|
||||
OperandsText: operandsText,
|
||||
Kind: kind,
|
||||
Operands: operands);
|
||||
}
|
||||
|
||||
private static InstructionKind ClassifyInstruction(IInstruction instr, string mnemonic)
|
||||
{
|
||||
if (instr.IsRET) return InstructionKind.Return;
|
||||
if (instr.IsCall) return InstructionKind.Call;
|
||||
if (instr.IsCondBranch) return InstructionKind.ConditionalBranch;
|
||||
if (instr.IsBranch) return InstructionKind.Branch;
|
||||
if (instr.IsNop) return InstructionKind.Nop;
|
||||
if (instr.IsInterrupt) return InstructionKind.Syscall;
|
||||
|
||||
var upper = mnemonic.ToUpperInvariant();
|
||||
|
||||
if (upper is "ADD" or "SUB" or "MUL" or "DIV" or "IMUL" or "IDIV" or "INC" or "DEC" or "NEG" or "ADC" or "SBB")
|
||||
return InstructionKind.Arithmetic;
|
||||
|
||||
if (upper is "AND" or "OR" or "XOR" or "NOT" or "TEST" or "ORR" or "EOR")
|
||||
return InstructionKind.Logic;
|
||||
|
||||
if (upper is "SHL" or "SHR" or "SAL" or "SAR" or "ROL" or "ROR" or "LSL" or "LSR" or "ASR")
|
||||
return InstructionKind.Shift;
|
||||
|
||||
if (upper.StartsWith("MOV", StringComparison.Ordinal) || upper is "LEA" or "PUSH" or "POP" or "XCHG")
|
||||
return InstructionKind.Move;
|
||||
|
||||
if (upper.StartsWith("LDR", StringComparison.Ordinal) || upper.StartsWith("LD", StringComparison.Ordinal))
|
||||
return InstructionKind.Load;
|
||||
|
||||
if (upper.StartsWith("STR", StringComparison.Ordinal) || upper.StartsWith("ST", StringComparison.Ordinal))
|
||||
return InstructionKind.Store;
|
||||
|
||||
if (upper is "CMP" or "CMPS" or "SCAS") return InstructionKind.Compare;
|
||||
|
||||
return InstructionKind.Unknown;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using B2R2;
|
||||
using B2R2.FrontEnd;
|
||||
using B2R2.FrontEnd.BinFile;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
public sealed partial class B2R2DisassemblyPlugin
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public BinaryInfo LoadBinary(Stream stream, CpuArchitecture? archHint = null, BinaryFormat? formatHint = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(stream);
|
||||
|
||||
using var memStream = new MemoryStream();
|
||||
stream.CopyTo(memStream);
|
||||
return LoadBinary(memStream.ToArray(), archHint, formatHint);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public BinaryInfo LoadBinary(ReadOnlySpan<byte> bytes, CpuArchitecture? archHint = null, BinaryFormat? formatHint = null)
|
||||
{
|
||||
var byteArray = bytes.ToArray();
|
||||
|
||||
_logger.LogDebug("Loading binary with B2R2 plugin (size: {Size} bytes)", byteArray.Length);
|
||||
|
||||
var isa = archHint.HasValue
|
||||
? MapToB2R2Isa(archHint.Value)
|
||||
: new ISA(Architecture.Intel, WordSize.Bit64); // Default to x64
|
||||
|
||||
var binHandle = new BinHandle(byteArray, isa, null, true);
|
||||
var binFile = binHandle.File;
|
||||
|
||||
var format = MapFromB2R2Format(binFile.Format);
|
||||
var architecture = MapFromB2R2Architecture(binFile.ISA);
|
||||
var bitness = GetBitness(binFile.ISA.WordSize);
|
||||
var endianness = binFile.ISA.Endian == Endian.Little ? Endianness.Little : Endianness.Big;
|
||||
var abi = DetectAbi(format);
|
||||
|
||||
var entryPointOpt = binFile.EntryPoint;
|
||||
var entryPoint = Microsoft.FSharp.Core.FSharpOption<ulong>.get_IsSome(entryPointOpt)
|
||||
? entryPointOpt.Value
|
||||
: (ulong?)null;
|
||||
|
||||
_logger.LogInformation(
|
||||
"Loaded binary with B2R2: Format={Format}, Architecture={Architecture}, Endian={Endian}",
|
||||
format, architecture, endianness);
|
||||
|
||||
var metadata = new Dictionary<string, object>
|
||||
{
|
||||
["size"] = byteArray.Length,
|
||||
["b2r2_isa"] = binFile.ISA.Arch.ToString()
|
||||
};
|
||||
if (entryPoint.HasValue)
|
||||
{
|
||||
metadata["entry_point"] = entryPoint.Value;
|
||||
}
|
||||
|
||||
return new BinaryInfo(
|
||||
Format: format,
|
||||
Architecture: architecture,
|
||||
Bitness: bitness,
|
||||
Endianness: endianness,
|
||||
Abi: abi,
|
||||
EntryPoint: entryPoint,
|
||||
BuildId: null,
|
||||
Metadata: metadata,
|
||||
Handle: new B2R2BinaryHandle(binHandle, byteArray));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,109 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Globalization;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
public sealed partial class B2R2DisassemblyPlugin
|
||||
{
|
||||
private static Operand ParseMemoryOperand(string text)
|
||||
{
|
||||
var start = text.IndexOf('[');
|
||||
var end = text.LastIndexOf(']');
|
||||
|
||||
if (start < 0 || end <= start)
|
||||
{
|
||||
return new Operand(
|
||||
Type: OperandType.Memory,
|
||||
Text: text,
|
||||
Value: null,
|
||||
Register: null,
|
||||
MemoryBase: null,
|
||||
MemoryIndex: null,
|
||||
MemoryScale: null,
|
||||
MemoryDisplacement: null);
|
||||
}
|
||||
|
||||
var inner = text.Substring(start + 1, end - start - 1);
|
||||
|
||||
string? memBase = null;
|
||||
string? memIndex = null;
|
||||
int? memScale = null;
|
||||
long? memDisp = null;
|
||||
|
||||
var components = inner.Split(['+', ','], StringSplitOptions.RemoveEmptyEntries);
|
||||
|
||||
foreach (var comp in components)
|
||||
{
|
||||
var trimmed = comp.Trim();
|
||||
|
||||
if (trimmed.Contains('*'))
|
||||
{
|
||||
var scaleParts = trimmed.Split('*');
|
||||
if (scaleParts.Length == 2)
|
||||
{
|
||||
memIndex = scaleParts[0].Trim().ToUpperInvariant();
|
||||
if (int.TryParse(scaleParts[1].Trim(), out var scale))
|
||||
{
|
||||
memScale = scale;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (trimmed.StartsWith('#'))
|
||||
{
|
||||
var immText = trimmed.TrimStart('#');
|
||||
if (immText.StartsWith("0x", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
if (long.TryParse(immText.AsSpan(2), NumberStyles.HexNumber, CultureInfo.InvariantCulture, out var hexDisp))
|
||||
{
|
||||
memDisp = hexDisp;
|
||||
}
|
||||
}
|
||||
else if (long.TryParse(immText, out var decDisp))
|
||||
{
|
||||
memDisp = decDisp;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (trimmed.StartsWith("0x", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
if (long.TryParse(trimmed.AsSpan(2), NumberStyles.HexNumber, CultureInfo.InvariantCulture, out var hexDisp))
|
||||
{
|
||||
memDisp = hexDisp;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (trimmed.StartsWith('-'))
|
||||
{
|
||||
if (long.TryParse(trimmed, out var negDisp))
|
||||
{
|
||||
memDisp = negDisp;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (memBase == null)
|
||||
{
|
||||
memBase = trimmed.ToUpperInvariant();
|
||||
}
|
||||
else if (memIndex == null)
|
||||
{
|
||||
memIndex = trimmed.ToUpperInvariant();
|
||||
}
|
||||
}
|
||||
|
||||
return new Operand(
|
||||
Type: OperandType.Memory,
|
||||
Text: text,
|
||||
Value: null,
|
||||
Register: null,
|
||||
MemoryBase: memBase,
|
||||
MemoryIndex: memIndex,
|
||||
MemoryScale: memScale,
|
||||
MemoryDisplacement: memDisp);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Text;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
public sealed partial class B2R2DisassemblyPlugin
|
||||
{
|
||||
private static IReadOnlyList<string> SplitOperands(string operandsText)
|
||||
{
|
||||
var result = new List<string>();
|
||||
var current = new StringBuilder();
|
||||
var bracketDepth = 0;
|
||||
|
||||
foreach (var c in operandsText)
|
||||
{
|
||||
if (c == '[' || c == '(' || c == '{')
|
||||
{
|
||||
bracketDepth++;
|
||||
current.Append(c);
|
||||
}
|
||||
else if (c == ']' || c == ')' || c == '}')
|
||||
{
|
||||
bracketDepth--;
|
||||
current.Append(c);
|
||||
}
|
||||
else if (c == ',' && bracketDepth == 0)
|
||||
{
|
||||
if (current.Length > 0)
|
||||
{
|
||||
result.Add(current.ToString());
|
||||
current.Clear();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
current.Append(c);
|
||||
}
|
||||
}
|
||||
|
||||
if (current.Length > 0)
|
||||
{
|
||||
result.Add(current.ToString());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,105 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
using System.Globalization;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
public sealed partial class B2R2DisassemblyPlugin
|
||||
{
|
||||
private static ImmutableArray<Operand> ParseOperands(string operandsText, string mnemonic)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(operandsText))
|
||||
{
|
||||
return ImmutableArray<Operand>.Empty;
|
||||
}
|
||||
|
||||
var builder = ImmutableArray.CreateBuilder<Operand>();
|
||||
var operandStrings = SplitOperands(operandsText);
|
||||
|
||||
foreach (var opStr in operandStrings)
|
||||
{
|
||||
var trimmed = opStr.Trim();
|
||||
if (string.IsNullOrEmpty(trimmed)) continue;
|
||||
|
||||
var operand = ParseSingleOperand(trimmed);
|
||||
builder.Add(operand);
|
||||
}
|
||||
|
||||
return builder.ToImmutable();
|
||||
}
|
||||
|
||||
private static Operand ParseSingleOperand(string text)
|
||||
{
|
||||
var trimmed = text.Trim();
|
||||
|
||||
if (trimmed.StartsWith('[') && trimmed.EndsWith(']'))
|
||||
{
|
||||
return ParseMemoryOperand(trimmed);
|
||||
}
|
||||
|
||||
if (trimmed.StartsWith('[') && (trimmed.EndsWith("]!") || trimmed.Contains("],")))
|
||||
{
|
||||
return ParseMemoryOperand(trimmed);
|
||||
}
|
||||
|
||||
if (trimmed.StartsWith('#') || trimmed.StartsWith("0x", StringComparison.OrdinalIgnoreCase) ||
|
||||
trimmed.StartsWith("0X", StringComparison.OrdinalIgnoreCase) ||
|
||||
(trimmed.Length > 0 && (char.IsDigit(trimmed[0]) || trimmed[0] == '-')))
|
||||
{
|
||||
return ParseImmediateOperand(trimmed);
|
||||
}
|
||||
|
||||
return ParseRegisterOperand(trimmed);
|
||||
}
|
||||
|
||||
private static Operand ParseRegisterOperand(string text)
|
||||
{
|
||||
var regName = text.ToUpperInvariant();
|
||||
|
||||
return new Operand(
|
||||
Type: OperandType.Register,
|
||||
Text: text,
|
||||
Value: null,
|
||||
Register: regName,
|
||||
MemoryBase: null,
|
||||
MemoryIndex: null,
|
||||
MemoryScale: null,
|
||||
MemoryDisplacement: null);
|
||||
}
|
||||
|
||||
private static Operand ParseImmediateOperand(string text)
|
||||
{
|
||||
var cleanText = text.TrimStart('#');
|
||||
long? value = null;
|
||||
|
||||
if (cleanText.StartsWith("0x", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
if (long.TryParse(cleanText.AsSpan(2), NumberStyles.HexNumber, CultureInfo.InvariantCulture, out var hexVal))
|
||||
{
|
||||
value = hexVal;
|
||||
}
|
||||
}
|
||||
else if (cleanText.StartsWith("-0x", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
if (long.TryParse(cleanText.AsSpan(3), NumberStyles.HexNumber, CultureInfo.InvariantCulture, out var hexVal))
|
||||
{
|
||||
value = -hexVal;
|
||||
}
|
||||
}
|
||||
else if (long.TryParse(cleanText, CultureInfo.InvariantCulture, out var decVal))
|
||||
{
|
||||
value = decVal;
|
||||
}
|
||||
|
||||
return new Operand(
|
||||
Type: OperandType.Immediate,
|
||||
Text: text,
|
||||
Value: value,
|
||||
Register: null,
|
||||
MemoryBase: null,
|
||||
MemoryIndex: null,
|
||||
MemoryScale: null,
|
||||
MemoryDisplacement: null);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
public sealed partial class B2R2DisassemblyPlugin
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public IEnumerable<CodeRegion> GetCodeRegions(BinaryInfo binary)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(binary);
|
||||
var handle = GetHandle(binary);
|
||||
|
||||
var textPtr = handle.BinHandle.File.GetTextSectionPointer();
|
||||
if (textPtr.IsValid)
|
||||
{
|
||||
yield return new CodeRegion(
|
||||
Name: ".text",
|
||||
VirtualAddress: textPtr.Addr,
|
||||
FileOffset: (ulong)textPtr.Offset,
|
||||
Size: (ulong)(textPtr.MaxAddr - textPtr.Addr + 1),
|
||||
IsExecutable: true,
|
||||
IsReadable: true,
|
||||
IsWritable: false);
|
||||
}
|
||||
else
|
||||
{
|
||||
yield return new CodeRegion(
|
||||
Name: ".code",
|
||||
VirtualAddress: handle.BinHandle.File.BaseAddress,
|
||||
FileOffset: 0,
|
||||
Size: (ulong)handle.Bytes.Length,
|
||||
IsExecutable: true,
|
||||
IsReadable: true,
|
||||
IsWritable: false);
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IEnumerable<SymbolInfo> GetSymbols(BinaryInfo binary)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(binary);
|
||||
var handle = GetHandle(binary);
|
||||
|
||||
var funcAddrs = handle.BinHandle.File.GetFunctionAddresses();
|
||||
|
||||
foreach (var addr in funcAddrs)
|
||||
{
|
||||
yield return new SymbolInfo(
|
||||
Name: $"func_{addr:X}",
|
||||
Address: addr,
|
||||
Size: 0,
|
||||
Type: SymbolType.Function,
|
||||
Binding: SymbolBinding.Global,
|
||||
Section: ".text");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,13 +1,6 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
|
||||
|
||||
using B2R2;
|
||||
using B2R2.FrontEnd;
|
||||
using B2R2.FrontEnd.BinFile;
|
||||
using B2R2.FrontEnd.BinLifter;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
@@ -16,7 +9,7 @@ namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
/// B2R2 is a pure .NET binary analysis framework supporting ELF, PE, and Mach-O
|
||||
/// on x86, x86-64, ARM32, ARM64, MIPS, RISC-V, and more.
|
||||
/// </summary>
|
||||
public sealed class B2R2DisassemblyPlugin : IDisassemblyPlugin
|
||||
public sealed partial class B2R2DisassemblyPlugin : IDisassemblyPlugin
|
||||
{
|
||||
/// <summary>
|
||||
/// Plugin identifier.
|
||||
@@ -25,7 +18,7 @@ public sealed class B2R2DisassemblyPlugin : IDisassemblyPlugin
|
||||
|
||||
private readonly ILogger<B2R2DisassemblyPlugin> _logger;
|
||||
|
||||
private static readonly DisassemblyCapabilities s_capabilities = new()
|
||||
private static readonly DisassemblyCapabilities _capabilities = new()
|
||||
{
|
||||
PluginId = PluginId,
|
||||
Name = "B2R2 Disassembler",
|
||||
@@ -61,627 +54,5 @@ public sealed class B2R2DisassemblyPlugin : IDisassemblyPlugin
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public DisassemblyCapabilities Capabilities => s_capabilities;
|
||||
|
||||
/// <inheritdoc />
|
||||
public BinaryInfo LoadBinary(Stream stream, CpuArchitecture? archHint = null, BinaryFormat? formatHint = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(stream);
|
||||
|
||||
using var memStream = new MemoryStream();
|
||||
stream.CopyTo(memStream);
|
||||
return LoadBinary(memStream.ToArray(), archHint, formatHint);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public BinaryInfo LoadBinary(ReadOnlySpan<byte> bytes, CpuArchitecture? archHint = null, BinaryFormat? formatHint = null)
|
||||
{
|
||||
var byteArray = bytes.ToArray();
|
||||
|
||||
_logger.LogDebug("Loading binary with B2R2 plugin (size: {Size} bytes)", byteArray.Length);
|
||||
|
||||
// Create B2R2 ISA hint if provided
|
||||
var isa = archHint.HasValue
|
||||
? MapToB2R2Isa(archHint.Value)
|
||||
: new ISA(Architecture.Intel, WordSize.Bit64); // Default to x64
|
||||
|
||||
// Create BinHandle - B2R2's main interface
|
||||
// Enable format detection when loading from bytes
|
||||
var binHandle = new BinHandle(byteArray, isa, null, true);
|
||||
var binFile = binHandle.File;
|
||||
|
||||
// Extract binary information
|
||||
var format = MapFromB2R2Format(binFile.Format);
|
||||
var architecture = MapFromB2R2Architecture(binFile.ISA);
|
||||
var bitness = GetBitness(binFile.ISA.WordSize);
|
||||
var endianness = binFile.ISA.Endian == Endian.Little ? Endianness.Little : Endianness.Big;
|
||||
var abi = DetectAbi(format);
|
||||
|
||||
// Extract entry point - B2R2 returns FSharpOption<ulong>
|
||||
var entryPointOpt = binFile.EntryPoint;
|
||||
var entryPoint = Microsoft.FSharp.Core.FSharpOption<ulong>.get_IsSome(entryPointOpt)
|
||||
? entryPointOpt.Value
|
||||
: (ulong?)null;
|
||||
|
||||
_logger.LogInformation(
|
||||
"Loaded binary with B2R2: Format={Format}, Architecture={Architecture}, Endian={Endian}",
|
||||
format, architecture, endianness);
|
||||
|
||||
var metadata = new Dictionary<string, object>
|
||||
{
|
||||
["size"] = byteArray.Length,
|
||||
["b2r2_isa"] = binFile.ISA.Arch.ToString()
|
||||
};
|
||||
if (entryPoint.HasValue)
|
||||
{
|
||||
metadata["entry_point"] = entryPoint.Value;
|
||||
}
|
||||
|
||||
return new BinaryInfo(
|
||||
Format: format,
|
||||
Architecture: architecture,
|
||||
Bitness: bitness,
|
||||
Endianness: endianness,
|
||||
Abi: abi,
|
||||
EntryPoint: entryPoint,
|
||||
BuildId: null,
|
||||
Metadata: metadata,
|
||||
Handle: new B2R2BinaryHandle(binHandle, byteArray));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IEnumerable<CodeRegion> GetCodeRegions(BinaryInfo binary)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(binary);
|
||||
var handle = GetHandle(binary);
|
||||
|
||||
// Use the text section pointer if available
|
||||
var textPtr = handle.BinHandle.File.GetTextSectionPointer();
|
||||
if (textPtr.IsValid)
|
||||
{
|
||||
yield return new CodeRegion(
|
||||
Name: ".text",
|
||||
VirtualAddress: textPtr.Addr,
|
||||
FileOffset: (ulong)textPtr.Offset,
|
||||
Size: (ulong)(textPtr.MaxAddr - textPtr.Addr + 1),
|
||||
IsExecutable: true,
|
||||
IsReadable: true,
|
||||
IsWritable: false);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fallback: treat entire binary as code
|
||||
yield return new CodeRegion(
|
||||
Name: ".code",
|
||||
VirtualAddress: handle.BinHandle.File.BaseAddress,
|
||||
FileOffset: 0,
|
||||
Size: (ulong)handle.Bytes.Length,
|
||||
IsExecutable: true,
|
||||
IsReadable: true,
|
||||
IsWritable: false);
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IEnumerable<SymbolInfo> GetSymbols(BinaryInfo binary)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(binary);
|
||||
var handle = GetHandle(binary);
|
||||
|
||||
// Get function addresses from B2R2
|
||||
var funcAddrs = handle.BinHandle.File.GetFunctionAddresses();
|
||||
|
||||
foreach (var addr in funcAddrs)
|
||||
{
|
||||
yield return new SymbolInfo(
|
||||
Name: $"func_{addr:X}",
|
||||
Address: addr,
|
||||
Size: 0, // Unknown size
|
||||
Type: SymbolType.Function,
|
||||
Binding: SymbolBinding.Global,
|
||||
Section: ".text");
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IEnumerable<DisassembledInstruction> Disassemble(BinaryInfo binary, CodeRegion region)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(binary);
|
||||
ArgumentNullException.ThrowIfNull(region);
|
||||
|
||||
var handle = GetHandle(binary);
|
||||
var lifter = handle.BinHandle.NewLiftingUnit();
|
||||
|
||||
var addr = region.VirtualAddress;
|
||||
var endAddr = region.VirtualAddress + region.Size;
|
||||
|
||||
_logger.LogDebug(
|
||||
"Disassembling region {Name} from 0x{Start:X} to 0x{End:X}",
|
||||
region.Name, addr, endAddr);
|
||||
|
||||
while (addr < endAddr)
|
||||
{
|
||||
IInstruction? instr;
|
||||
try
|
||||
{
|
||||
instr = lifter.ParseInstruction(addr);
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Skip invalid instruction
|
||||
addr++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (instr is null || instr.Length == 0)
|
||||
{
|
||||
addr++;
|
||||
continue;
|
||||
}
|
||||
|
||||
yield return MapInstruction(instr, handle, addr);
|
||||
addr += instr.Length;
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IEnumerable<DisassembledInstruction> Disassemble(BinaryInfo binary, ulong startAddress, ulong length)
|
||||
{
|
||||
var region = new CodeRegion(
|
||||
Name: $"0x{startAddress:X}",
|
||||
VirtualAddress: startAddress,
|
||||
FileOffset: startAddress,
|
||||
Size: length,
|
||||
IsExecutable: true,
|
||||
IsReadable: true,
|
||||
IsWritable: false);
|
||||
|
||||
return Disassemble(binary, region);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IEnumerable<DisassembledInstruction> DisassembleSymbol(BinaryInfo binary, SymbolInfo symbol)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(binary);
|
||||
ArgumentNullException.ThrowIfNull(symbol);
|
||||
|
||||
var size = symbol.Size > 0 ? symbol.Size : 4096UL;
|
||||
|
||||
var region = new CodeRegion(
|
||||
Name: symbol.Name,
|
||||
VirtualAddress: symbol.Address,
|
||||
FileOffset: symbol.Address,
|
||||
Size: size,
|
||||
IsExecutable: true,
|
||||
IsReadable: true,
|
||||
IsWritable: false);
|
||||
|
||||
return Disassemble(binary, region);
|
||||
}
|
||||
|
||||
#region Architecture Mapping
|
||||
|
||||
private static ISA MapToB2R2Isa(CpuArchitecture arch)
|
||||
{
|
||||
return arch switch
|
||||
{
|
||||
CpuArchitecture.X86 => new ISA(Architecture.Intel, WordSize.Bit32),
|
||||
CpuArchitecture.X86_64 => new ISA(Architecture.Intel, WordSize.Bit64),
|
||||
CpuArchitecture.ARM32 => new ISA(Architecture.ARMv7, WordSize.Bit32),
|
||||
CpuArchitecture.ARM64 => new ISA(Architecture.ARMv8, WordSize.Bit64),
|
||||
CpuArchitecture.MIPS32 => new ISA(Architecture.MIPS, WordSize.Bit32),
|
||||
CpuArchitecture.MIPS64 => new ISA(Architecture.MIPS, WordSize.Bit64),
|
||||
CpuArchitecture.RISCV64 => new ISA(Architecture.RISCV, WordSize.Bit64),
|
||||
CpuArchitecture.PPC32 => new ISA(Architecture.PPC, Endian.Big, WordSize.Bit32),
|
||||
CpuArchitecture.SPARC => new ISA(Architecture.SPARC, Endian.Big),
|
||||
CpuArchitecture.SH4 => new ISA(Architecture.SH4),
|
||||
CpuArchitecture.AVR => new ISA(Architecture.AVR),
|
||||
CpuArchitecture.EVM => new ISA(Architecture.EVM, Endian.Big),
|
||||
_ => new ISA(Architecture.Intel, WordSize.Bit64) // Default to x64
|
||||
};
|
||||
}
|
||||
|
||||
private static CpuArchitecture MapFromB2R2Architecture(ISA isa)
|
||||
{
|
||||
return isa.Arch switch
|
||||
{
|
||||
Architecture.Intel when isa.WordSize == WordSize.Bit32 => CpuArchitecture.X86,
|
||||
Architecture.Intel when isa.WordSize == WordSize.Bit64 => CpuArchitecture.X86_64,
|
||||
Architecture.Intel => isa.IsX86 ? CpuArchitecture.X86 : CpuArchitecture.X86_64,
|
||||
Architecture.ARMv7 => CpuArchitecture.ARM32,
|
||||
Architecture.ARMv8 when isa.WordSize == WordSize.Bit64 => CpuArchitecture.ARM64,
|
||||
Architecture.ARMv8 => CpuArchitecture.ARM32,
|
||||
Architecture.MIPS when isa.WordSize == WordSize.Bit64 => CpuArchitecture.MIPS64,
|
||||
Architecture.MIPS => CpuArchitecture.MIPS32,
|
||||
Architecture.RISCV => CpuArchitecture.RISCV64,
|
||||
Architecture.PPC => CpuArchitecture.PPC32,
|
||||
Architecture.SPARC => CpuArchitecture.SPARC,
|
||||
Architecture.SH4 => CpuArchitecture.SH4,
|
||||
Architecture.AVR => CpuArchitecture.AVR,
|
||||
Architecture.EVM => CpuArchitecture.EVM,
|
||||
_ => CpuArchitecture.Unknown
|
||||
};
|
||||
}
|
||||
|
||||
private static BinaryFormat MapFromB2R2Format(FileFormat format)
|
||||
{
|
||||
return format switch
|
||||
{
|
||||
FileFormat.ELFBinary => BinaryFormat.ELF,
|
||||
FileFormat.PEBinary => BinaryFormat.PE,
|
||||
FileFormat.MachBinary => BinaryFormat.MachO,
|
||||
FileFormat.WasmBinary => BinaryFormat.WASM,
|
||||
FileFormat.RawBinary => BinaryFormat.Raw,
|
||||
_ => BinaryFormat.Unknown
|
||||
};
|
||||
}
|
||||
|
||||
private static int GetBitness(WordSize wordSize)
|
||||
{
|
||||
return wordSize switch
|
||||
{
|
||||
WordSize.Bit8 => 8,
|
||||
WordSize.Bit16 => 16,
|
||||
WordSize.Bit32 => 32,
|
||||
WordSize.Bit64 => 64,
|
||||
WordSize.Bit128 => 128,
|
||||
WordSize.Bit256 => 256,
|
||||
_ => 64
|
||||
};
|
||||
}
|
||||
|
||||
private static string? DetectAbi(BinaryFormat format)
|
||||
{
|
||||
return format switch
|
||||
{
|
||||
BinaryFormat.ELF => "gnu",
|
||||
BinaryFormat.PE => "msvc",
|
||||
BinaryFormat.MachO => "darwin",
|
||||
_ => null
|
||||
};
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Instruction Mapping
|
||||
|
||||
private static B2R2BinaryHandle GetHandle(BinaryInfo binary)
|
||||
{
|
||||
if (binary.Handle is not B2R2BinaryHandle handle)
|
||||
throw new ArgumentException("Invalid binary handle - not a B2R2 handle", nameof(binary));
|
||||
return handle;
|
||||
}
|
||||
|
||||
private static DisassembledInstruction MapInstruction(IInstruction instr, B2R2BinaryHandle handle, ulong address)
|
||||
{
|
||||
// Get disassembly string
|
||||
var disasm = instr.Disasm();
|
||||
|
||||
// Parse mnemonic and operands from disassembly string
|
||||
var parts = disasm.Split(' ', 2, StringSplitOptions.RemoveEmptyEntries);
|
||||
var mnemonic = parts.Length > 0 ? parts[0] : "???";
|
||||
var operandsText = parts.Length > 1 ? parts[1] : "";
|
||||
|
||||
// Get raw bytes from the binary data
|
||||
var offset = (int)(address - handle.BinHandle.File.BaseAddress);
|
||||
var length = (int)instr.Length;
|
||||
var rawBytes = offset >= 0 && offset + length <= handle.Bytes.Length
|
||||
? handle.Bytes.AsSpan(offset, length).ToArray().ToImmutableArray()
|
||||
: ImmutableArray<byte>.Empty;
|
||||
|
||||
var kind = ClassifyInstruction(instr, mnemonic);
|
||||
var operands = ParseOperands(operandsText, mnemonic);
|
||||
|
||||
return new DisassembledInstruction(
|
||||
Address: address,
|
||||
RawBytes: rawBytes,
|
||||
Mnemonic: mnemonic,
|
||||
OperandsText: operandsText,
|
||||
Kind: kind,
|
||||
Operands: operands);
|
||||
}
|
||||
|
||||
private static ImmutableArray<Operand> ParseOperands(string operandsText, string mnemonic)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(operandsText))
|
||||
{
|
||||
return ImmutableArray<Operand>.Empty;
|
||||
}
|
||||
|
||||
var builder = ImmutableArray.CreateBuilder<Operand>();
|
||||
|
||||
// Split operands by comma, respecting brackets
|
||||
var operandStrings = SplitOperands(operandsText);
|
||||
|
||||
foreach (var opStr in operandStrings)
|
||||
{
|
||||
var trimmed = opStr.Trim();
|
||||
if (string.IsNullOrEmpty(trimmed)) continue;
|
||||
|
||||
var operand = ParseSingleOperand(trimmed);
|
||||
builder.Add(operand);
|
||||
}
|
||||
|
||||
return builder.ToImmutable();
|
||||
}
|
||||
|
||||
private static IReadOnlyList<string> SplitOperands(string operandsText)
|
||||
{
|
||||
var result = new List<string>();
|
||||
var current = new System.Text.StringBuilder();
|
||||
var bracketDepth = 0;
|
||||
|
||||
foreach (var c in operandsText)
|
||||
{
|
||||
if (c == '[' || c == '(' || c == '{')
|
||||
{
|
||||
bracketDepth++;
|
||||
current.Append(c);
|
||||
}
|
||||
else if (c == ']' || c == ')' || c == '}')
|
||||
{
|
||||
bracketDepth--;
|
||||
current.Append(c);
|
||||
}
|
||||
else if (c == ',' && bracketDepth == 0)
|
||||
{
|
||||
if (current.Length > 0)
|
||||
{
|
||||
result.Add(current.ToString());
|
||||
current.Clear();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
current.Append(c);
|
||||
}
|
||||
}
|
||||
|
||||
if (current.Length > 0)
|
||||
{
|
||||
result.Add(current.ToString());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static Operand ParseSingleOperand(string text)
|
||||
{
|
||||
var trimmed = text.Trim();
|
||||
|
||||
// Check for memory operand [...]
|
||||
if (trimmed.StartsWith('[') && trimmed.EndsWith(']'))
|
||||
{
|
||||
return ParseMemoryOperand(trimmed);
|
||||
}
|
||||
|
||||
// Check for ARM64 memory operand [...]!
|
||||
if (trimmed.StartsWith('[') && (trimmed.EndsWith("]!") || trimmed.Contains("],")))
|
||||
{
|
||||
return ParseMemoryOperand(trimmed);
|
||||
}
|
||||
|
||||
// Check for immediate value
|
||||
if (trimmed.StartsWith('#') || trimmed.StartsWith("0x", StringComparison.OrdinalIgnoreCase) ||
|
||||
trimmed.StartsWith("0X", StringComparison.OrdinalIgnoreCase) ||
|
||||
(trimmed.Length > 0 && (char.IsDigit(trimmed[0]) || trimmed[0] == '-')))
|
||||
{
|
||||
return ParseImmediateOperand(trimmed);
|
||||
}
|
||||
|
||||
// Assume it's a register
|
||||
return ParseRegisterOperand(trimmed);
|
||||
}
|
||||
|
||||
private static Operand ParseRegisterOperand(string text)
|
||||
{
|
||||
var regName = text.ToUpperInvariant();
|
||||
|
||||
return new Operand(
|
||||
Type: OperandType.Register,
|
||||
Text: text,
|
||||
Value: null,
|
||||
Register: regName,
|
||||
MemoryBase: null,
|
||||
MemoryIndex: null,
|
||||
MemoryScale: null,
|
||||
MemoryDisplacement: null);
|
||||
}
|
||||
|
||||
private static Operand ParseImmediateOperand(string text)
|
||||
{
|
||||
var cleanText = text.TrimStart('#');
|
||||
long? value = null;
|
||||
|
||||
if (cleanText.StartsWith("0x", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
if (long.TryParse(cleanText.AsSpan(2), System.Globalization.NumberStyles.HexNumber,
|
||||
System.Globalization.CultureInfo.InvariantCulture, out var hexVal))
|
||||
{
|
||||
value = hexVal;
|
||||
}
|
||||
}
|
||||
else if (cleanText.StartsWith("-0x", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
if (long.TryParse(cleanText.AsSpan(3), System.Globalization.NumberStyles.HexNumber,
|
||||
System.Globalization.CultureInfo.InvariantCulture, out var hexVal))
|
||||
{
|
||||
value = -hexVal;
|
||||
}
|
||||
}
|
||||
else if (long.TryParse(cleanText, System.Globalization.CultureInfo.InvariantCulture, out var decVal))
|
||||
{
|
||||
value = decVal;
|
||||
}
|
||||
|
||||
return new Operand(
|
||||
Type: OperandType.Immediate,
|
||||
Text: text,
|
||||
Value: value,
|
||||
Register: null,
|
||||
MemoryBase: null,
|
||||
MemoryIndex: null,
|
||||
MemoryScale: null,
|
||||
MemoryDisplacement: null);
|
||||
}
|
||||
|
||||
private static Operand ParseMemoryOperand(string text)
|
||||
{
|
||||
// Extract content between brackets
|
||||
var start = text.IndexOf('[');
|
||||
var end = text.LastIndexOf(']');
|
||||
|
||||
if (start < 0 || end <= start)
|
||||
{
|
||||
return new Operand(
|
||||
Type: OperandType.Memory,
|
||||
Text: text,
|
||||
Value: null,
|
||||
Register: null,
|
||||
MemoryBase: null,
|
||||
MemoryIndex: null,
|
||||
MemoryScale: null,
|
||||
MemoryDisplacement: null);
|
||||
}
|
||||
|
||||
var inner = text.Substring(start + 1, end - start - 1);
|
||||
|
||||
// Parse components: base, index, scale, displacement
|
||||
// Common patterns:
|
||||
// x86: [rax], [rax+rbx], [rax+rbx*4], [rax+0x10], [rax+rbx*4+0x10]
|
||||
// ARM: [x0], [x0, #8], [x0, x1], [x0, x1, lsl #2]
|
||||
|
||||
string? memBase = null;
|
||||
string? memIndex = null;
|
||||
int? memScale = null;
|
||||
long? memDisp = null;
|
||||
|
||||
// Split by + or , depending on architecture style
|
||||
var components = inner.Split(['+', ','], StringSplitOptions.RemoveEmptyEntries);
|
||||
|
||||
foreach (var comp in components)
|
||||
{
|
||||
var trimmed = comp.Trim();
|
||||
|
||||
// Check for scale pattern: reg*N
|
||||
if (trimmed.Contains('*'))
|
||||
{
|
||||
var scaleParts = trimmed.Split('*');
|
||||
if (scaleParts.Length == 2)
|
||||
{
|
||||
memIndex = scaleParts[0].Trim().ToUpperInvariant();
|
||||
if (int.TryParse(scaleParts[1].Trim(), out var scale))
|
||||
{
|
||||
memScale = scale;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for ARM immediate: #N
|
||||
if (trimmed.StartsWith('#'))
|
||||
{
|
||||
var immText = trimmed.TrimStart('#');
|
||||
if (immText.StartsWith("0x", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
if (long.TryParse(immText.AsSpan(2), System.Globalization.NumberStyles.HexNumber,
|
||||
System.Globalization.CultureInfo.InvariantCulture, out var hexDisp))
|
||||
{
|
||||
memDisp = hexDisp;
|
||||
}
|
||||
}
|
||||
else if (long.TryParse(immText, out var decDisp))
|
||||
{
|
||||
memDisp = decDisp;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for hex displacement: 0xNN
|
||||
if (trimmed.StartsWith("0x", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
if (long.TryParse(trimmed.AsSpan(2), System.Globalization.NumberStyles.HexNumber,
|
||||
System.Globalization.CultureInfo.InvariantCulture, out var hexDisp))
|
||||
{
|
||||
memDisp = hexDisp;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for negative displacement
|
||||
if (trimmed.StartsWith('-'))
|
||||
{
|
||||
if (long.TryParse(trimmed, out var negDisp))
|
||||
{
|
||||
memDisp = negDisp;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Must be a register
|
||||
if (memBase == null)
|
||||
{
|
||||
memBase = trimmed.ToUpperInvariant();
|
||||
}
|
||||
else if (memIndex == null)
|
||||
{
|
||||
memIndex = trimmed.ToUpperInvariant();
|
||||
}
|
||||
}
|
||||
|
||||
return new Operand(
|
||||
Type: OperandType.Memory,
|
||||
Text: text,
|
||||
Value: null,
|
||||
Register: null,
|
||||
MemoryBase: memBase,
|
||||
MemoryIndex: memIndex,
|
||||
MemoryScale: memScale,
|
||||
MemoryDisplacement: memDisp);
|
||||
}
|
||||
|
||||
private static InstructionKind ClassifyInstruction(IInstruction instr, string mnemonic)
|
||||
{
|
||||
// Use B2R2's built-in classification where possible
|
||||
if (instr.IsRET) return InstructionKind.Return;
|
||||
if (instr.IsCall) return InstructionKind.Call;
|
||||
if (instr.IsCondBranch) return InstructionKind.ConditionalBranch;
|
||||
if (instr.IsBranch) return InstructionKind.Branch;
|
||||
if (instr.IsNop) return InstructionKind.Nop;
|
||||
if (instr.IsInterrupt) return InstructionKind.Syscall;
|
||||
|
||||
// Fall back to mnemonic-based classification
|
||||
var upper = mnemonic.ToUpperInvariant();
|
||||
|
||||
if (upper is "ADD" or "SUB" or "MUL" or "DIV" or "IMUL" or "IDIV" or
|
||||
"INC" or "DEC" or "NEG" or "ADC" or "SBB")
|
||||
return InstructionKind.Arithmetic;
|
||||
|
||||
if (upper is "AND" or "OR" or "XOR" or "NOT" or "TEST" or "ORR" or "EOR")
|
||||
return InstructionKind.Logic;
|
||||
|
||||
if (upper is "SHL" or "SHR" or "SAL" or "SAR" or "ROL" or "ROR" or
|
||||
"LSL" or "LSR" or "ASR")
|
||||
return InstructionKind.Shift;
|
||||
|
||||
if (upper.StartsWith("MOV", StringComparison.Ordinal) || upper is "LEA" or "PUSH" or "POP" or "XCHG")
|
||||
return InstructionKind.Move;
|
||||
|
||||
if (upper.StartsWith("LDR", StringComparison.Ordinal) || upper.StartsWith("LD", StringComparison.Ordinal))
|
||||
return InstructionKind.Load;
|
||||
|
||||
if (upper.StartsWith("STR", StringComparison.Ordinal) || upper.StartsWith("ST", StringComparison.Ordinal))
|
||||
return InstructionKind.Store;
|
||||
|
||||
if (upper is "CMP" or "CMPS" or "SCAS") return InstructionKind.Compare;
|
||||
|
||||
return InstructionKind.Unknown;
|
||||
}
|
||||
|
||||
#endregion
|
||||
public DisassemblyCapabilities Capabilities => _capabilities;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Internal handle for B2R2 binary data.
|
||||
/// </summary>
|
||||
internal sealed record B2R2BinaryHandle(BinHandle BinHandle, byte[] Bytes);
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
/// <summary>
|
||||
/// Per-ISA pool statistics.
|
||||
/// </summary>
|
||||
/// <param name="PooledCount">Number of lifters in pool for this ISA.</param>
|
||||
/// <param name="ActiveCount">Number of lifters currently in use for this ISA.</param>
|
||||
/// <param name="MaxPoolSize">Maximum pool size for this ISA.</param>
|
||||
public sealed record B2R2IsaPoolStats(
|
||||
int PooledCount,
|
||||
int ActiveCount,
|
||||
int MaxPoolSize);
|
||||
@@ -0,0 +1,106 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using B2R2;
|
||||
using B2R2.FrontEnd;
|
||||
using B2R2.FrontEnd.BinFile;
|
||||
using B2R2.FrontEnd.BinLifter;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Globalization;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
public sealed partial class B2R2LifterPool
|
||||
{
|
||||
private static string GetIsaKey(ISA isa) =>
|
||||
string.Format(
|
||||
CultureInfo.InvariantCulture,
|
||||
"{0}-{1}",
|
||||
isa.Arch.ToString().ToLowerInvariant(),
|
||||
isa.WordSize == WordSize.Bit64 ? "64" : "32");
|
||||
|
||||
private static ISA? ParseIsaKey(string key)
|
||||
{
|
||||
var parts = key.Split('-');
|
||||
if (parts.Length != 2) return null;
|
||||
|
||||
var archStr = parts[0].ToLowerInvariant();
|
||||
var bits = parts[1];
|
||||
|
||||
var wordSize = bits == "64" ? WordSize.Bit64 : WordSize.Bit32;
|
||||
|
||||
return archStr switch
|
||||
{
|
||||
"intel" => new ISA(Architecture.Intel, wordSize),
|
||||
"armv7" => new ISA(Architecture.ARMv7, wordSize),
|
||||
"armv8" => new ISA(Architecture.ARMv8, wordSize),
|
||||
"mips" => new ISA(Architecture.MIPS, wordSize),
|
||||
"riscv" => new ISA(Architecture.RISCV, wordSize),
|
||||
"ppc" => new ISA(Architecture.PPC, Endian.Big, wordSize),
|
||||
"sparc" => new ISA(Architecture.SPARC, Endian.Big),
|
||||
_ => (ISA?)null
|
||||
};
|
||||
}
|
||||
|
||||
private ConcurrentBag<PooledLifterEntry> GetOrCreatePool(string isaKey) =>
|
||||
_pools.GetOrAdd(isaKey, _ => new ConcurrentBag<PooledLifterEntry>());
|
||||
|
||||
private static PooledLifterEntry CreateLifterEntry(ISA isa)
|
||||
{
|
||||
var nopBytes = CreateNopSled(isa, 64);
|
||||
var binHandle = new BinHandle(nopBytes, isa, null, true);
|
||||
var liftingUnit = binHandle.NewLiftingUnit();
|
||||
return new PooledLifterEntry(binHandle, liftingUnit, DateTimeOffset.UtcNow);
|
||||
}
|
||||
|
||||
private static byte[] CreateNopSled(ISA isa, int size)
|
||||
{
|
||||
var bytes = new byte[size];
|
||||
|
||||
switch (isa.Arch)
|
||||
{
|
||||
case Architecture.Intel:
|
||||
Array.Fill(bytes, (byte)0x90);
|
||||
break;
|
||||
|
||||
case Architecture.ARMv7:
|
||||
case Architecture.ARMv8:
|
||||
if (isa.WordSize == WordSize.Bit64)
|
||||
{
|
||||
for (var i = 0; i + 3 < size; i += 4)
|
||||
{
|
||||
bytes[i] = 0x1F;
|
||||
bytes[i + 1] = 0x20;
|
||||
bytes[i + 2] = 0x03;
|
||||
bytes[i + 3] = 0xD5;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var i = 0; i + 3 < size; i += 4)
|
||||
{
|
||||
bytes[i] = 0x00;
|
||||
bytes[i + 1] = 0xF0;
|
||||
bytes[i + 2] = 0x20;
|
||||
bytes[i + 3] = 0xE3;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
Array.Fill(bytes, (byte)0x00);
|
||||
break;
|
||||
}
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
private void IncrementActive(string isaKey)
|
||||
{
|
||||
_activeCount.AddOrUpdate(isaKey, 1, (_, v) => v + 1);
|
||||
}
|
||||
|
||||
private void DecrementActive(string isaKey)
|
||||
{
|
||||
_activeCount.AddOrUpdate(isaKey, 0, (_, v) => Math.Max(0, v - 1));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
public sealed partial class B2R2LifterPool
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the current pool statistics.
|
||||
/// </summary>
|
||||
public B2R2LifterPoolStats GetStats()
|
||||
{
|
||||
var isaStats = new Dictionary<string, B2R2IsaPoolStats>();
|
||||
|
||||
foreach (var kvp in _pools)
|
||||
{
|
||||
var isaKey = kvp.Key;
|
||||
var poolSize = kvp.Value.Count;
|
||||
var activeCount = _activeCount.GetValueOrDefault(isaKey, 0);
|
||||
|
||||
isaStats[isaKey] = new B2R2IsaPoolStats(
|
||||
PooledCount: poolSize,
|
||||
ActiveCount: activeCount,
|
||||
MaxPoolSize: _options.MaxPoolSizePerIsa);
|
||||
}
|
||||
|
||||
return new B2R2LifterPoolStats(
|
||||
TotalPooledLifters: _pools.Values.Sum(b => b.Count),
|
||||
TotalActiveLifters: _activeCount.Values.Sum(),
|
||||
IsWarm: _warmed,
|
||||
IsaStats: isaStats.ToImmutableDictionary());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Warms the pool by preloading lifters for common ISAs.
|
||||
/// </summary>
|
||||
public void WarmPool()
|
||||
{
|
||||
if (!_options.EnableWarmPreload) return;
|
||||
if (_warmed) return;
|
||||
|
||||
lock (_warmLock)
|
||||
{
|
||||
if (_warmed) return;
|
||||
|
||||
_logger.LogInformation(
|
||||
"Warming B2R2 lifter pool for {IsaCount} ISAs",
|
||||
_options.WarmPreloadIsas.Length);
|
||||
|
||||
foreach (var isaKey in _options.WarmPreloadIsas)
|
||||
{
|
||||
try
|
||||
{
|
||||
var isa = ParseIsaKey(isaKey);
|
||||
if (isa is null)
|
||||
{
|
||||
_logger.LogWarning("Unknown ISA key for warm preload: {IsaKey}", isaKey);
|
||||
continue;
|
||||
}
|
||||
|
||||
var entry = CreateLifterEntry(isa);
|
||||
var pool = GetOrCreatePool(GetIsaKey(isa));
|
||||
pool.Add(entry);
|
||||
|
||||
_logger.LogDebug("Warmed lifter for ISA: {IsaKey}", isaKey);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to warm lifter for ISA: {IsaKey}", isaKey);
|
||||
}
|
||||
}
|
||||
|
||||
_warmed = true;
|
||||
_logger.LogInformation("B2R2 lifter pool warm complete");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,103 +2,21 @@
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
// Sprint: SPRINT_20260112_004_BINIDX_b2r2_lowuir_perf_cache (BINIDX-LIFTER-02)
|
||||
// Task: Bounded lifter pool with warm preload per ISA
|
||||
|
||||
|
||||
using B2R2;
|
||||
using B2R2.FrontEnd;
|
||||
using B2R2.FrontEnd.BinFile;
|
||||
using B2R2.FrontEnd.BinLifter;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Immutable;
|
||||
using System.Globalization;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration options for the B2R2 lifter pool.
|
||||
/// </summary>
|
||||
public sealed class B2R2LifterPoolOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Configuration section name.
|
||||
/// </summary>
|
||||
public const string SectionName = "StellaOps:BinaryIndex:B2R2LifterPool";
|
||||
|
||||
/// <summary>
|
||||
/// Maximum number of pooled lifters per ISA.
|
||||
/// </summary>
|
||||
public int MaxPoolSizePerIsa { get; set; } = 4;
|
||||
|
||||
/// <summary>
|
||||
/// Whether to warm preload lifters for common ISAs at startup.
|
||||
/// </summary>
|
||||
public bool EnableWarmPreload { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// ISAs to warm preload at startup.
|
||||
/// </summary>
|
||||
public ImmutableArray<string> WarmPreloadIsas { get; set; } =
|
||||
[
|
||||
"intel-64",
|
||||
"intel-32",
|
||||
"armv8-64",
|
||||
"armv7-32"
|
||||
];
|
||||
|
||||
/// <summary>
|
||||
/// Timeout for acquiring a lifter from the pool.
|
||||
/// </summary>
|
||||
public TimeSpan AcquireTimeout { get; set; } = TimeSpan.FromSeconds(5);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Pooled B2R2 BinHandle and LiftingUnit for reuse across calls.
|
||||
/// </summary>
|
||||
public sealed class PooledLifter : IDisposable
|
||||
{
|
||||
private readonly B2R2LifterPool _pool;
|
||||
private readonly ISA _isa;
|
||||
private bool _disposed;
|
||||
|
||||
internal PooledLifter(
|
||||
B2R2LifterPool pool,
|
||||
ISA isa,
|
||||
BinHandle binHandle,
|
||||
LiftingUnit liftingUnit)
|
||||
{
|
||||
_pool = pool ?? throw new ArgumentNullException(nameof(pool));
|
||||
_isa = isa;
|
||||
BinHandle = binHandle ?? throw new ArgumentNullException(nameof(binHandle));
|
||||
LiftingUnit = liftingUnit ?? throw new ArgumentNullException(nameof(liftingUnit));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// The B2R2 BinHandle for this lifter.
|
||||
/// </summary>
|
||||
public BinHandle BinHandle { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The B2R2 LiftingUnit for this lifter.
|
||||
/// </summary>
|
||||
public LiftingUnit LiftingUnit { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Returns the lifter to the pool.
|
||||
/// </summary>
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
_pool.Return(this, _isa);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Bounded pool of B2R2 lifters with warm preload per ISA.
|
||||
/// Thread-safe and designed for reuse in high-throughput scenarios.
|
||||
/// </summary>
|
||||
public sealed class B2R2LifterPool : IDisposable
|
||||
public sealed partial class B2R2LifterPool : IDisposable
|
||||
{
|
||||
private readonly ILogger<B2R2LifterPool> _logger;
|
||||
private readonly B2R2LifterPoolOptions _options;
|
||||
@@ -121,77 +39,6 @@ public sealed class B2R2LifterPool : IDisposable
|
||||
_options = options?.Value ?? new B2R2LifterPoolOptions();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the current pool statistics.
|
||||
/// </summary>
|
||||
public B2R2LifterPoolStats GetStats()
|
||||
{
|
||||
var isaStats = new Dictionary<string, B2R2IsaPoolStats>();
|
||||
|
||||
foreach (var kvp in _pools)
|
||||
{
|
||||
var isaKey = kvp.Key;
|
||||
var poolSize = kvp.Value.Count;
|
||||
var activeCount = _activeCount.GetValueOrDefault(isaKey, 0);
|
||||
|
||||
isaStats[isaKey] = new B2R2IsaPoolStats(
|
||||
PooledCount: poolSize,
|
||||
ActiveCount: activeCount,
|
||||
MaxPoolSize: _options.MaxPoolSizePerIsa);
|
||||
}
|
||||
|
||||
return new B2R2LifterPoolStats(
|
||||
TotalPooledLifters: _pools.Values.Sum(b => b.Count),
|
||||
TotalActiveLifters: _activeCount.Values.Sum(),
|
||||
IsWarm: _warmed,
|
||||
IsaStats: isaStats.ToImmutableDictionary());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Warms the pool by preloading lifters for common ISAs.
|
||||
/// </summary>
|
||||
public void WarmPool()
|
||||
{
|
||||
if (!_options.EnableWarmPreload) return;
|
||||
if (_warmed) return;
|
||||
|
||||
lock (_warmLock)
|
||||
{
|
||||
if (_warmed) return;
|
||||
|
||||
_logger.LogInformation(
|
||||
"Warming B2R2 lifter pool for {IsaCount} ISAs",
|
||||
_options.WarmPreloadIsas.Length);
|
||||
|
||||
foreach (var isaKey in _options.WarmPreloadIsas)
|
||||
{
|
||||
try
|
||||
{
|
||||
var isa = ParseIsaKey(isaKey);
|
||||
if (isa is null)
|
||||
{
|
||||
_logger.LogWarning("Unknown ISA key for warm preload: {IsaKey}", isaKey);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Create and pool a lifter for this ISA
|
||||
var entry = CreateLifterEntry(isa);
|
||||
var pool = GetOrCreatePool(GetIsaKey(isa));
|
||||
pool.Add(entry);
|
||||
|
||||
_logger.LogDebug("Warmed lifter for ISA: {IsaKey}", isaKey);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to warm lifter for ISA: {IsaKey}", isaKey);
|
||||
}
|
||||
}
|
||||
|
||||
_warmed = true;
|
||||
_logger.LogInformation("B2R2 lifter pool warm complete");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Acquires a lifter for the specified ISA.
|
||||
/// </summary>
|
||||
@@ -202,7 +49,6 @@ public sealed class B2R2LifterPool : IDisposable
|
||||
var isaKey = GetIsaKey(isa);
|
||||
var pool = GetOrCreatePool(isaKey);
|
||||
|
||||
// Try to get an existing lifter from the pool
|
||||
if (pool.TryTake(out var entry))
|
||||
{
|
||||
IncrementActive(isaKey);
|
||||
@@ -210,7 +56,6 @@ public sealed class B2R2LifterPool : IDisposable
|
||||
return new PooledLifter(this, isa, entry.BinHandle, entry.LiftingUnit);
|
||||
}
|
||||
|
||||
// Create a new lifter
|
||||
var newEntry = CreateLifterEntry(isa);
|
||||
IncrementActive(isaKey);
|
||||
_logger.LogTrace("Created new lifter for {Isa}", isaKey);
|
||||
@@ -227,7 +72,6 @@ public sealed class B2R2LifterPool : IDisposable
|
||||
|
||||
var pool = GetOrCreatePool(isaKey);
|
||||
|
||||
// Only return to pool if under limit
|
||||
if (pool.Count < _options.MaxPoolSizePerIsa)
|
||||
{
|
||||
var entry = new PooledLifterEntry(
|
||||
@@ -254,132 +98,4 @@ public sealed class B2R2LifterPool : IDisposable
|
||||
|
||||
_logger.LogInformation("B2R2 lifter pool disposed");
|
||||
}
|
||||
|
||||
#region Private Helpers
|
||||
|
||||
private static string GetIsaKey(ISA isa) =>
|
||||
string.Format(
|
||||
CultureInfo.InvariantCulture,
|
||||
"{0}-{1}",
|
||||
isa.Arch.ToString().ToLowerInvariant(),
|
||||
isa.WordSize == WordSize.Bit64 ? "64" : "32");
|
||||
|
||||
private static ISA? ParseIsaKey(string key)
|
||||
{
|
||||
var parts = key.Split('-');
|
||||
if (parts.Length != 2) return null;
|
||||
|
||||
var archStr = parts[0].ToLowerInvariant();
|
||||
var bits = parts[1];
|
||||
|
||||
var wordSize = bits == "64" ? WordSize.Bit64 : WordSize.Bit32;
|
||||
|
||||
return archStr switch
|
||||
{
|
||||
"intel" => new ISA(Architecture.Intel, wordSize),
|
||||
"armv7" => new ISA(Architecture.ARMv7, wordSize),
|
||||
"armv8" => new ISA(Architecture.ARMv8, wordSize),
|
||||
"mips" => new ISA(Architecture.MIPS, wordSize),
|
||||
"riscv" => new ISA(Architecture.RISCV, wordSize),
|
||||
"ppc" => new ISA(Architecture.PPC, Endian.Big, wordSize),
|
||||
"sparc" => new ISA(Architecture.SPARC, Endian.Big),
|
||||
_ => (ISA?)null
|
||||
};
|
||||
}
|
||||
|
||||
private ConcurrentBag<PooledLifterEntry> GetOrCreatePool(string isaKey) =>
|
||||
_pools.GetOrAdd(isaKey, _ => new ConcurrentBag<PooledLifterEntry>());
|
||||
|
||||
private static PooledLifterEntry CreateLifterEntry(ISA isa)
|
||||
{
|
||||
// Create a minimal BinHandle for the ISA
|
||||
// Use a small NOP sled as placeholder code
|
||||
var nopBytes = CreateNopSled(isa, 64);
|
||||
var binHandle = new BinHandle(nopBytes, isa, null, true);
|
||||
var liftingUnit = binHandle.NewLiftingUnit();
|
||||
return new PooledLifterEntry(binHandle, liftingUnit, DateTimeOffset.UtcNow);
|
||||
}
|
||||
|
||||
private static byte[] CreateNopSled(ISA isa, int size)
|
||||
{
|
||||
var bytes = new byte[size];
|
||||
|
||||
// Fill with architecture-appropriate NOP bytes
|
||||
switch (isa.Arch)
|
||||
{
|
||||
case Architecture.Intel:
|
||||
// x86/x64 NOP = 0x90
|
||||
Array.Fill(bytes, (byte)0x90);
|
||||
break;
|
||||
|
||||
case Architecture.ARMv7:
|
||||
case Architecture.ARMv8:
|
||||
// ARM NOP = 0x00000000 or 0x1F 20 03 D5 (ARM64)
|
||||
if (isa.WordSize == WordSize.Bit64)
|
||||
{
|
||||
for (var i = 0; i + 3 < size; i += 4)
|
||||
{
|
||||
bytes[i] = 0x1F;
|
||||
bytes[i + 1] = 0x20;
|
||||
bytes[i + 2] = 0x03;
|
||||
bytes[i + 3] = 0xD5;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// ARM32 NOP = 0xE320F000 (big endian) or 0x00 F0 20 E3 (little)
|
||||
for (var i = 0; i + 3 < size; i += 4)
|
||||
{
|
||||
bytes[i] = 0x00;
|
||||
bytes[i + 1] = 0xF0;
|
||||
bytes[i + 2] = 0x20;
|
||||
bytes[i + 3] = 0xE3;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
// Generic zeroes for other architectures
|
||||
Array.Fill(bytes, (byte)0x00);
|
||||
break;
|
||||
}
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
private void IncrementActive(string isaKey)
|
||||
{
|
||||
_activeCount.AddOrUpdate(isaKey, 1, (_, v) => v + 1);
|
||||
}
|
||||
|
||||
private void DecrementActive(string isaKey)
|
||||
{
|
||||
_activeCount.AddOrUpdate(isaKey, 0, (_, v) => Math.Max(0, v - 1));
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Statistics for the B2R2 lifter pool.
|
||||
/// </summary>
|
||||
/// <param name="TotalPooledLifters">Total lifters currently in pool.</param>
|
||||
/// <param name="TotalActiveLifters">Total lifters currently in use.</param>
|
||||
/// <param name="IsWarm">Whether the pool has been warmed.</param>
|
||||
/// <param name="IsaStats">Per-ISA pool statistics.</param>
|
||||
public sealed record B2R2LifterPoolStats(
|
||||
int TotalPooledLifters,
|
||||
int TotalActiveLifters,
|
||||
bool IsWarm,
|
||||
ImmutableDictionary<string, B2R2IsaPoolStats> IsaStats);
|
||||
|
||||
/// <summary>
|
||||
/// Per-ISA pool statistics.
|
||||
/// </summary>
|
||||
/// <param name="PooledCount">Number of lifters in pool for this ISA.</param>
|
||||
/// <param name="ActiveCount">Number of lifters currently in use for this ISA.</param>
|
||||
/// <param name="MaxPoolSize">Maximum pool size for this ISA.</param>
|
||||
public sealed record B2R2IsaPoolStats(
|
||||
int PooledCount,
|
||||
int ActiveCount,
|
||||
int MaxPoolSize);
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration options for the B2R2 lifter pool.
|
||||
/// </summary>
|
||||
public sealed class B2R2LifterPoolOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Configuration section name.
|
||||
/// </summary>
|
||||
public const string SectionName = "StellaOps:BinaryIndex:B2R2LifterPool";
|
||||
|
||||
/// <summary>
|
||||
/// Maximum number of pooled lifters per ISA.
|
||||
/// </summary>
|
||||
public int MaxPoolSizePerIsa { get; set; } = 4;
|
||||
|
||||
/// <summary>
|
||||
/// Whether to warm preload lifters for common ISAs at startup.
|
||||
/// </summary>
|
||||
public bool EnableWarmPreload { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// ISAs to warm preload at startup.
|
||||
/// </summary>
|
||||
public ImmutableArray<string> WarmPreloadIsas { get; set; } =
|
||||
[
|
||||
"intel-64",
|
||||
"intel-32",
|
||||
"armv8-64",
|
||||
"armv7-32"
|
||||
];
|
||||
|
||||
/// <summary>
|
||||
/// Timeout for acquiring a lifter from the pool.
|
||||
/// </summary>
|
||||
public TimeSpan AcquireTimeout { get; set; } = TimeSpan.FromSeconds(5);
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
/// <summary>
|
||||
/// Statistics for the B2R2 lifter pool.
|
||||
/// </summary>
|
||||
/// <param name="TotalPooledLifters">Total lifters currently in pool.</param>
|
||||
/// <param name="TotalActiveLifters">Total lifters currently in use.</param>
|
||||
/// <param name="IsWarm">Whether the pool has been warmed.</param>
|
||||
/// <param name="IsaStats">Per-ISA pool statistics.</param>
|
||||
public sealed record B2R2LifterPoolStats(
|
||||
int TotalPooledLifters,
|
||||
int TotalActiveLifters,
|
||||
bool IsWarm,
|
||||
ImmutableDictionary<string, B2R2IsaPoolStats> IsaStats);
|
||||
@@ -0,0 +1,73 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using B2R2;
|
||||
using StellaOps.BinaryIndex.Semantic;
|
||||
using System.Collections.Immutable;
|
||||
using System.Globalization;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
public sealed partial class B2R2LowUirLiftingService
|
||||
{
|
||||
private (List<IrStatement> Statements, List<IrBasicBlock> BasicBlocks) BuildStatementsAndBlocks(
|
||||
IReadOnlyList<DisassembledInstruction> instructions,
|
||||
ISA isa,
|
||||
ulong startAddress,
|
||||
int maxInstructions,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var statements = new List<IrStatement>();
|
||||
var basicBlocks = new List<IrBasicBlock>();
|
||||
var currentBlockStatements = new List<int>();
|
||||
var blockStartAddress = startAddress;
|
||||
var statementId = 0;
|
||||
var blockId = 0;
|
||||
|
||||
foreach (var instr in instructions.Take(maxInstructions))
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
var liftedStatements = LiftInstructionToLowUir(isa, instr, ref statementId);
|
||||
statements.AddRange(liftedStatements);
|
||||
|
||||
foreach (var stmt in liftedStatements)
|
||||
{
|
||||
currentBlockStatements.Add(stmt.Id);
|
||||
}
|
||||
|
||||
if (IsBlockTerminator(instr))
|
||||
{
|
||||
var endAddress = instr.Address + (ulong)instr.RawBytes.Length;
|
||||
basicBlocks.Add(CreateBasicBlock(blockId, blockStartAddress, endAddress, currentBlockStatements));
|
||||
blockId++;
|
||||
currentBlockStatements.Clear();
|
||||
blockStartAddress = endAddress;
|
||||
}
|
||||
}
|
||||
|
||||
if (currentBlockStatements.Count > 0 && instructions.Count > 0)
|
||||
{
|
||||
var lastInstr = instructions[^1];
|
||||
var endAddress = lastInstr.Address + (ulong)lastInstr.RawBytes.Length;
|
||||
basicBlocks.Add(CreateBasicBlock(blockId, blockStartAddress, endAddress, currentBlockStatements));
|
||||
}
|
||||
|
||||
return (statements, basicBlocks);
|
||||
}
|
||||
|
||||
private static IrBasicBlock CreateBasicBlock(
|
||||
int blockId,
|
||||
ulong startAddress,
|
||||
ulong endAddress,
|
||||
IReadOnlyList<int> statementIds)
|
||||
{
|
||||
return new IrBasicBlock(
|
||||
Id: blockId,
|
||||
Label: string.Format(CultureInfo.InvariantCulture, "bb_{0}", blockId),
|
||||
StartAddress: startAddress,
|
||||
EndAddress: endAddress,
|
||||
StatementIds: [.. statementIds],
|
||||
Predecessors: ImmutableArray<int>.Empty,
|
||||
Successors: ImmutableArray<int>.Empty);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using StellaOps.BinaryIndex.Semantic;
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
public sealed partial class B2R2LowUirLiftingService
|
||||
{
|
||||
private static (ImmutableArray<IrBasicBlock> Blocks, ImmutableArray<CfgEdge> Edges) BuildCfgEdges(
|
||||
ImmutableArray<IrBasicBlock> blocks)
|
||||
{
|
||||
if (blocks.Length == 0)
|
||||
return (blocks, ImmutableArray<CfgEdge>.Empty);
|
||||
|
||||
var result = new IrBasicBlock[blocks.Length];
|
||||
var edges = new List<CfgEdge>();
|
||||
|
||||
for (var i = 0; i < blocks.Length; i++)
|
||||
{
|
||||
var block = blocks[i];
|
||||
var predecessors = new List<int>();
|
||||
var successors = new List<int>();
|
||||
|
||||
if (i < blocks.Length - 1)
|
||||
{
|
||||
successors.Add(i + 1);
|
||||
edges.Add(new CfgEdge(
|
||||
SourceBlockId: i,
|
||||
TargetBlockId: i + 1,
|
||||
Kind: CfgEdgeKind.FallThrough,
|
||||
Condition: null));
|
||||
}
|
||||
|
||||
if (i > 0)
|
||||
{
|
||||
predecessors.Add(i - 1);
|
||||
}
|
||||
|
||||
result[i] = block with
|
||||
{
|
||||
Predecessors = [.. predecessors.Distinct().OrderBy(x => x)],
|
||||
Successors = [.. successors.Distinct().OrderBy(x => x)]
|
||||
};
|
||||
}
|
||||
|
||||
return ([.. result], [.. edges]);
|
||||
}
|
||||
|
||||
private static ImmutableArray<int> FindExitBlocks(ImmutableArray<IrBasicBlock> blocks)
|
||||
{
|
||||
return blocks
|
||||
.Where(b => b.Successors.Length == 0)
|
||||
.Select(b => b.Id)
|
||||
.ToImmutableArray();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using StellaOps.BinaryIndex.Semantic;
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
public sealed partial class B2R2LowUirLiftingService
|
||||
{
|
||||
private static IrStatement CreateFallbackStatement(DisassembledInstruction instr, int id)
|
||||
{
|
||||
var sources = instr.Operands.Skip(1)
|
||||
.Select(op => new IrOperand(
|
||||
Kind: MapOperandType(op.Type),
|
||||
Name: op.Text,
|
||||
Value: op.Value,
|
||||
BitSize: 64,
|
||||
IsMemory: op.Type == OperandType.Memory))
|
||||
.ToImmutableArray();
|
||||
|
||||
var dest = instr.Operands.Length > 0
|
||||
? new IrOperand(
|
||||
Kind: MapOperandType(instr.Operands[0].Type),
|
||||
Name: instr.Operands[0].Text,
|
||||
Value: instr.Operands[0].Value,
|
||||
BitSize: 64,
|
||||
IsMemory: instr.Operands[0].Type == OperandType.Memory)
|
||||
: null;
|
||||
|
||||
return new IrStatement(
|
||||
Id: id,
|
||||
Address: instr.Address,
|
||||
Kind: MapMnemonicToKind(instr.Mnemonic),
|
||||
Operation: instr.Mnemonic,
|
||||
Destination: dest,
|
||||
Sources: sources,
|
||||
Metadata: ImmutableDictionary<string, object>.Empty.Add("fallback", true));
|
||||
}
|
||||
|
||||
private static SsaVariableKind MapOperandKindToSsaKind(IrOperandKind kind) => kind switch
|
||||
{
|
||||
IrOperandKind.Register => SsaVariableKind.Register,
|
||||
IrOperandKind.Temporary => SsaVariableKind.Temporary,
|
||||
IrOperandKind.Memory => SsaVariableKind.Memory,
|
||||
IrOperandKind.Immediate => SsaVariableKind.Constant,
|
||||
_ => SsaVariableKind.Temporary
|
||||
};
|
||||
|
||||
private static IrOperandKind MapOperandType(OperandType type) => type switch
|
||||
{
|
||||
OperandType.Register => IrOperandKind.Register,
|
||||
OperandType.Immediate => IrOperandKind.Immediate,
|
||||
OperandType.Memory => IrOperandKind.Memory,
|
||||
OperandType.Address => IrOperandKind.Label,
|
||||
_ => IrOperandKind.Unknown
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using StellaOps.BinaryIndex.Semantic;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
public sealed partial class B2R2LowUirLiftingService
|
||||
{
|
||||
private static bool IsBlockTerminator(DisassembledInstruction instr)
|
||||
{
|
||||
var mnemonic = instr.Mnemonic.ToUpperInvariant();
|
||||
return mnemonic.StartsWith("J", StringComparison.Ordinal) ||
|
||||
mnemonic.StartsWith("B", StringComparison.Ordinal) ||
|
||||
mnemonic == "RET" ||
|
||||
mnemonic == "RETN" ||
|
||||
mnemonic == "RETF" ||
|
||||
mnemonic == "IRET" ||
|
||||
mnemonic == "SYSRET" ||
|
||||
mnemonic == "BLR" ||
|
||||
mnemonic == "BX" ||
|
||||
mnemonic == "JR";
|
||||
}
|
||||
|
||||
private static IrStatementKind MapMnemonicToKind(string mnemonic)
|
||||
{
|
||||
var upper = mnemonic.ToUpperInvariant();
|
||||
|
||||
if (upper.StartsWith("MOV", StringComparison.Ordinal) ||
|
||||
upper.StartsWith("LEA", StringComparison.Ordinal) ||
|
||||
upper.StartsWith("LDR", StringComparison.Ordinal))
|
||||
return IrStatementKind.Assign;
|
||||
|
||||
if (upper.StartsWith("ADD", StringComparison.Ordinal) ||
|
||||
upper.StartsWith("SUB", StringComparison.Ordinal) ||
|
||||
upper.StartsWith("MUL", StringComparison.Ordinal) ||
|
||||
upper.StartsWith("DIV", StringComparison.Ordinal))
|
||||
return IrStatementKind.BinaryOp;
|
||||
|
||||
if (upper.StartsWith("AND", StringComparison.Ordinal) ||
|
||||
upper.StartsWith("OR", StringComparison.Ordinal) ||
|
||||
upper.StartsWith("XOR", StringComparison.Ordinal) ||
|
||||
upper.StartsWith("SH", StringComparison.Ordinal))
|
||||
return IrStatementKind.BinaryOp;
|
||||
|
||||
if (upper.StartsWith("CMP", StringComparison.Ordinal) ||
|
||||
upper.StartsWith("TEST", StringComparison.Ordinal))
|
||||
return IrStatementKind.Compare;
|
||||
|
||||
if (upper.StartsWith("J", StringComparison.Ordinal) ||
|
||||
upper.StartsWith("B", StringComparison.Ordinal))
|
||||
return IrStatementKind.ConditionalJump;
|
||||
|
||||
if (upper == "CALL" || upper == "BL" || upper == "BLX")
|
||||
return IrStatementKind.Call;
|
||||
|
||||
if (upper == "RET" || upper == "RETN" || upper == "BLR")
|
||||
return IrStatementKind.Return;
|
||||
|
||||
if (upper.StartsWith("PUSH", StringComparison.Ordinal) ||
|
||||
upper.StartsWith("POP", StringComparison.Ordinal) ||
|
||||
upper.StartsWith("STR", StringComparison.Ordinal))
|
||||
return IrStatementKind.Store;
|
||||
|
||||
if (upper == "NOP")
|
||||
return IrStatementKind.Nop;
|
||||
|
||||
return IrStatementKind.Unknown;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using B2R2;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
public sealed partial class B2R2LowUirLiftingService
|
||||
{
|
||||
private static ISA MapToB2R2Isa(CpuArchitecture arch) => arch switch
|
||||
{
|
||||
CpuArchitecture.X86 => new ISA(Architecture.Intel, WordSize.Bit32),
|
||||
CpuArchitecture.X86_64 => new ISA(Architecture.Intel, WordSize.Bit64),
|
||||
CpuArchitecture.ARM32 => new ISA(Architecture.ARMv7, WordSize.Bit32),
|
||||
CpuArchitecture.ARM64 => new ISA(Architecture.ARMv8, WordSize.Bit64),
|
||||
CpuArchitecture.MIPS32 => new ISA(Architecture.MIPS, WordSize.Bit32),
|
||||
CpuArchitecture.MIPS64 => new ISA(Architecture.MIPS, WordSize.Bit64),
|
||||
CpuArchitecture.RISCV64 => new ISA(Architecture.RISCV, WordSize.Bit64),
|
||||
CpuArchitecture.PPC32 => new ISA(Architecture.PPC, Endian.Big, WordSize.Bit32),
|
||||
CpuArchitecture.SPARC => new ISA(Architecture.SPARC, Endian.Big),
|
||||
_ => throw new NotSupportedException($"Unsupported architecture: {arch}")
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,77 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.BinaryIndex.Semantic;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
public sealed partial class B2R2LowUirLiftingService
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public Task<LiftedFunction> LiftToIrAsync(
|
||||
IReadOnlyList<DisassembledInstruction> instructions,
|
||||
string functionName,
|
||||
ulong startAddress,
|
||||
CpuArchitecture architecture,
|
||||
LiftOptions? options = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(instructions);
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
options ??= LiftOptions.Default;
|
||||
|
||||
if (!SupportsArchitecture(architecture))
|
||||
{
|
||||
throw new NotSupportedException(
|
||||
$"Architecture {architecture} is not supported for B2R2 LowUIR lifting.");
|
||||
}
|
||||
|
||||
_logger.LogDebug(
|
||||
"B2R2 LowUIR lifting {InstructionCount} instructions for function {FunctionName} ({Architecture})",
|
||||
instructions.Count,
|
||||
functionName,
|
||||
architecture);
|
||||
|
||||
var lifted = BuildLiftedFunction(instructions, functionName, startAddress, architecture, options, ct);
|
||||
|
||||
_logger.LogDebug(
|
||||
"B2R2 LowUIR lifted {StatementCount} statements in {BlockCount} blocks for {FunctionName}",
|
||||
lifted.Statements.Length,
|
||||
lifted.BasicBlocks.Length,
|
||||
functionName);
|
||||
|
||||
return Task.FromResult(lifted);
|
||||
}
|
||||
|
||||
private LiftedFunction BuildLiftedFunction(
|
||||
IReadOnlyList<DisassembledInstruction> instructions,
|
||||
string functionName,
|
||||
ulong startAddress,
|
||||
CpuArchitecture architecture,
|
||||
LiftOptions options,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var isa = MapToB2R2Isa(architecture);
|
||||
var maxInstructions = GetEffectiveMaxInstructions(options);
|
||||
var (statements, basicBlocks) = BuildStatementsAndBlocks(instructions, isa, startAddress, maxInstructions, ct);
|
||||
|
||||
var (blocksWithEdges, edges) = BuildCfgEdges([.. basicBlocks]);
|
||||
var cfg = new ControlFlowGraph(
|
||||
EntryBlockId: blocksWithEdges.Length > 0 ? 0 : -1,
|
||||
ExitBlockIds: FindExitBlocks(blocksWithEdges),
|
||||
Edges: edges);
|
||||
|
||||
return new LiftedFunction(
|
||||
Name: functionName,
|
||||
Address: startAddress,
|
||||
Statements: [.. statements],
|
||||
BasicBlocks: blocksWithEdges,
|
||||
Cfg: cfg);
|
||||
}
|
||||
|
||||
private static int GetEffectiveMaxInstructions(LiftOptions options)
|
||||
{
|
||||
return options.MaxInstructions > 0 ? options.MaxInstructions : int.MaxValue;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
// Copyright (c) StellaOps. All rights reserved.
|
||||
// Licensed under BUSL-1.1. See LICENSE in the project root.
|
||||
using B2R2;
|
||||
using B2R2.FrontEnd;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.BinaryIndex.Semantic;
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.BinaryIndex.Disassembly.B2R2;
|
||||
|
||||
public sealed partial class B2R2LowUirLiftingService
|
||||
{
|
||||
private List<IrStatement> LiftInstructionToLowUir(
|
||||
ISA isa,
|
||||
DisassembledInstruction instr,
|
||||
ref int statementId)
|
||||
{
|
||||
var statements = new List<IrStatement>();
|
||||
|
||||
try
|
||||
{
|
||||
var bytes = instr.RawBytes.ToArray();
|
||||
var binHandle = new BinHandle(bytes, isa, null, true);
|
||||
var lifter = binHandle.NewLiftingUnit();
|
||||
|
||||
var liftResult = lifter.LiftInstruction(instr.Address);
|
||||
|
||||
if (liftResult == null || liftResult.Length == 0)
|
||||
{
|
||||
statements.Add(CreateFallbackStatement(instr, statementId++));
|
||||
return statements;
|
||||
}
|
||||
|
||||
foreach (var b2r2Stmt in liftResult)
|
||||
{
|
||||
var stmtType = b2r2Stmt.GetType().Name;
|
||||
var (dest, sources) = ExtractOperandsFromB2R2Stmt(b2r2Stmt);
|
||||
|
||||
statements.Add(new IrStatement(
|
||||
Id: statementId++,
|
||||
Address: instr.Address,
|
||||
Kind: MapB2R2StmtTypeToKind(stmtType),
|
||||
Operation: stmtType,
|
||||
Destination: dest,
|
||||
Sources: sources,
|
||||
Metadata: ImmutableDictionary<string, object>.Empty));
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to lift instruction at {Address:X} with B2R2 LowUIR", instr.Address);
|
||||
statements.Add(CreateFallbackStatement(instr, statementId++));
|
||||
}
|
||||
|
||||
return statements;
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user