- Implemented CanonJson class for deterministic JSON serialization and hashing. - Added unit tests for CanonJson functionality, covering various scenarios including key sorting, handling of nested objects, arrays, and special characters. - Created project files for the Canonical JSON library and its tests, including necessary package references. - Added README.md for library usage and API reference. - Introduced RabbitMqIntegrationFactAttribute for conditional RabbitMQ integration tests.
509 lines
16 KiB
C#
509 lines
16 KiB
C#
// -----------------------------------------------------------------------------
|
|
// JavaBytecodeFingerprinter.cs
|
|
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-010)
|
|
// Description: Java method fingerprinting using bytecode parsing.
|
|
// Parses .class files from JAR archives for method extraction.
|
|
// -----------------------------------------------------------------------------
|
|
|
|
using System;
|
|
using System.Buffers.Binary;
|
|
using System.Collections.Generic;
|
|
using System.Diagnostics;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using System.Security.Cryptography;
|
|
using System.Text;
|
|
using System.Threading;
|
|
using System.Threading.Tasks;
|
|
using Microsoft.Extensions.Logging;
|
|
|
|
namespace StellaOps.Scanner.VulnSurfaces.Fingerprint;
|
|
|
|
/// <summary>
|
|
/// Computes method fingerprints for Java packages using bytecode hashing.
|
|
/// Parses .class files from extracted JAR archives.
|
|
/// </summary>
|
|
public sealed class JavaBytecodeFingerprinter : IMethodFingerprinter
|
|
{
|
|
private readonly ILogger<JavaBytecodeFingerprinter> _logger;
|
|
|
|
// Java class file magic number
|
|
private const uint ClassFileMagic = 0xCAFEBABE;
|
|
|
|
public JavaBytecodeFingerprinter(ILogger<JavaBytecodeFingerprinter> logger)
|
|
{
|
|
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public string Ecosystem => "maven";
|
|
|
|
/// <inheritdoc />
|
|
public async Task<FingerprintResult> FingerprintAsync(
|
|
FingerprintRequest request,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(request);
|
|
|
|
var sw = Stopwatch.StartNew();
|
|
var methods = new Dictionary<string, MethodFingerprint>(StringComparer.Ordinal);
|
|
|
|
try
|
|
{
|
|
var classFiles = GetClassFiles(request.PackagePath);
|
|
var filesProcessed = 0;
|
|
|
|
foreach (var classPath in classFiles)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
|
|
try
|
|
{
|
|
await ProcessClassFileAsync(classPath, request.PackagePath, methods, request, cancellationToken);
|
|
filesProcessed++;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogDebug(ex, "Failed to process class file {Path}", classPath);
|
|
}
|
|
}
|
|
|
|
sw.Stop();
|
|
_logger.LogDebug(
|
|
"Fingerprinted {MethodCount} methods from {FileCount} class files in {Duration}ms",
|
|
methods.Count, filesProcessed, sw.ElapsedMilliseconds);
|
|
|
|
return FingerprintResult.Ok(methods, sw.Elapsed, filesProcessed);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
sw.Stop();
|
|
_logger.LogWarning(ex, "Failed to fingerprint Java package at {Path}", request.PackagePath);
|
|
return FingerprintResult.Fail(ex.Message, sw.Elapsed);
|
|
}
|
|
}
|
|
|
|
private static string[] GetClassFiles(string packagePath)
|
|
{
|
|
if (!Directory.Exists(packagePath))
|
|
return [];
|
|
|
|
return Directory.GetFiles(packagePath, "*.class", SearchOption.AllDirectories)
|
|
.Where(f =>
|
|
{
|
|
// Skip META-INF and common non-source directories
|
|
var relativePath = f.Replace(packagePath, "").TrimStart(Path.DirectorySeparatorChar);
|
|
return !relativePath.StartsWith("META-INF", StringComparison.OrdinalIgnoreCase);
|
|
})
|
|
.ToArray();
|
|
}
|
|
|
|
private async Task ProcessClassFileAsync(
|
|
string classPath,
|
|
string packagePath,
|
|
Dictionary<string, MethodFingerprint> methods,
|
|
FingerprintRequest request,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
var bytes = await File.ReadAllBytesAsync(classPath, cancellationToken);
|
|
|
|
if (bytes.Length < 10)
|
|
return;
|
|
|
|
// Verify magic number
|
|
var magic = BinaryPrimitives.ReadUInt32BigEndian(bytes);
|
|
if (magic != ClassFileMagic)
|
|
{
|
|
_logger.LogDebug("Invalid class file magic in {Path}", classPath);
|
|
return;
|
|
}
|
|
|
|
try
|
|
{
|
|
var classInfo = ParseClassFile(bytes);
|
|
var relativePath = Path.GetRelativePath(packagePath, classPath);
|
|
|
|
foreach (var method in classInfo.Methods)
|
|
{
|
|
// Skip private methods unless requested
|
|
if (!request.IncludePrivateMethods && !method.IsPublic && !method.IsProtected)
|
|
continue;
|
|
|
|
// Skip synthetic and bridge methods
|
|
if (method.IsSynthetic || method.IsBridge)
|
|
continue;
|
|
|
|
var methodKey = $"{classInfo.ClassName}::{method.Name}{method.Descriptor}";
|
|
|
|
methods[methodKey] = new MethodFingerprint
|
|
{
|
|
MethodKey = methodKey,
|
|
DeclaringType = classInfo.ClassName,
|
|
Name = method.Name,
|
|
Signature = ParseDescriptor(method.Descriptor),
|
|
BodyHash = method.BodyHash,
|
|
SignatureHash = ComputeHash(method.Descriptor),
|
|
IsPublic = method.IsPublic,
|
|
BodySize = method.CodeLength,
|
|
SourceFile = relativePath
|
|
};
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogDebug(ex, "Error parsing class file {Path}", classPath);
|
|
}
|
|
}
|
|
|
|
private JavaClassInfo ParseClassFile(byte[] bytes)
|
|
{
|
|
var reader = new JavaClassReader(bytes);
|
|
|
|
// Skip magic (already verified)
|
|
reader.Skip(4);
|
|
|
|
// Version info
|
|
_ = reader.ReadU2(); // minor version
|
|
_ = reader.ReadU2(); // major version
|
|
|
|
// Constant pool
|
|
var constantPool = ParseConstantPool(reader);
|
|
|
|
// Access flags
|
|
var accessFlags = reader.ReadU2();
|
|
|
|
// This class
|
|
var thisClassIndex = reader.ReadU2();
|
|
var className = ResolveClassName(constantPool, thisClassIndex);
|
|
|
|
// Super class
|
|
_ = reader.ReadU2(); // super class index
|
|
|
|
// Interfaces
|
|
var interfaceCount = reader.ReadU2();
|
|
reader.Skip(interfaceCount * 2);
|
|
|
|
// Fields
|
|
var fieldCount = reader.ReadU2();
|
|
for (var i = 0; i < fieldCount; i++)
|
|
{
|
|
SkipFieldOrMethod(reader);
|
|
}
|
|
|
|
// Methods
|
|
var methodCount = reader.ReadU2();
|
|
var methods = new List<JavaMethodInfo>();
|
|
|
|
for (var i = 0; i < methodCount; i++)
|
|
{
|
|
var method = ParseMethod(reader, constantPool);
|
|
methods.Add(method);
|
|
}
|
|
|
|
return new JavaClassInfo
|
|
{
|
|
ClassName = className,
|
|
AccessFlags = accessFlags,
|
|
Methods = methods
|
|
};
|
|
}
|
|
|
|
private static List<ConstantPoolEntry> ParseConstantPool(JavaClassReader reader)
|
|
{
|
|
var count = reader.ReadU2();
|
|
var pool = new List<ConstantPoolEntry>(count) { new() }; // Index 0 is unused
|
|
|
|
for (var i = 1; i < count; i++)
|
|
{
|
|
var tag = reader.ReadU1();
|
|
var entry = new ConstantPoolEntry { Tag = tag };
|
|
|
|
switch (tag)
|
|
{
|
|
case 1: // CONSTANT_Utf8
|
|
var length = reader.ReadU2();
|
|
entry.StringValue = Encoding.UTF8.GetString(reader.ReadBytes(length));
|
|
break;
|
|
case 3: // CONSTANT_Integer
|
|
case 4: // CONSTANT_Float
|
|
reader.Skip(4);
|
|
break;
|
|
case 5: // CONSTANT_Long
|
|
case 6: // CONSTANT_Double
|
|
reader.Skip(8);
|
|
pool.Add(new ConstantPoolEntry()); // Takes two entries
|
|
i++;
|
|
break;
|
|
case 7: // CONSTANT_Class
|
|
case 8: // CONSTANT_String
|
|
entry.NameIndex = reader.ReadU2();
|
|
break;
|
|
case 9: // CONSTANT_Fieldref
|
|
case 10: // CONSTANT_Methodref
|
|
case 11: // CONSTANT_InterfaceMethodref
|
|
entry.ClassIndex = reader.ReadU2();
|
|
entry.NameAndTypeIndex = reader.ReadU2();
|
|
break;
|
|
case 12: // CONSTANT_NameAndType
|
|
entry.NameIndex = reader.ReadU2();
|
|
entry.DescriptorIndex = reader.ReadU2();
|
|
break;
|
|
case 15: // CONSTANT_MethodHandle
|
|
reader.Skip(3);
|
|
break;
|
|
case 16: // CONSTANT_MethodType
|
|
reader.Skip(2);
|
|
break;
|
|
case 17: // CONSTANT_Dynamic
|
|
case 18: // CONSTANT_InvokeDynamic
|
|
reader.Skip(4);
|
|
break;
|
|
case 19: // CONSTANT_Module
|
|
case 20: // CONSTANT_Package
|
|
reader.Skip(2);
|
|
break;
|
|
}
|
|
|
|
pool.Add(entry);
|
|
}
|
|
|
|
return pool;
|
|
}
|
|
|
|
private static JavaMethodInfo ParseMethod(JavaClassReader reader, List<ConstantPoolEntry> constantPool)
|
|
{
|
|
var accessFlags = reader.ReadU2();
|
|
var nameIndex = reader.ReadU2();
|
|
var descriptorIndex = reader.ReadU2();
|
|
|
|
var name = GetUtf8(constantPool, nameIndex);
|
|
var descriptor = GetUtf8(constantPool, descriptorIndex);
|
|
|
|
// Attributes
|
|
var attributeCount = reader.ReadU2();
|
|
var codeBytes = Array.Empty<byte>();
|
|
var codeLength = 0;
|
|
|
|
for (var i = 0; i < attributeCount; i++)
|
|
{
|
|
var attrNameIndex = reader.ReadU2();
|
|
var attrLength = reader.ReadU4();
|
|
var attrName = GetUtf8(constantPool, attrNameIndex);
|
|
|
|
if (attrName == "Code")
|
|
{
|
|
// max_stack (2) + max_locals (2) + code_length (4)
|
|
reader.Skip(4);
|
|
codeLength = (int)reader.ReadU4();
|
|
codeBytes = reader.ReadBytes(codeLength);
|
|
|
|
// Skip exception table and code attributes
|
|
var remainingLength = attrLength - 8 - codeLength;
|
|
reader.Skip((int)remainingLength);
|
|
}
|
|
else
|
|
{
|
|
reader.Skip((int)attrLength);
|
|
}
|
|
}
|
|
|
|
return new JavaMethodInfo
|
|
{
|
|
Name = name,
|
|
Descriptor = descriptor,
|
|
AccessFlags = accessFlags,
|
|
CodeLength = codeLength,
|
|
BodyHash = ComputeHash(codeBytes)
|
|
};
|
|
}
|
|
|
|
private static void SkipFieldOrMethod(JavaClassReader reader)
|
|
{
|
|
reader.Skip(6); // access_flags + name_index + descriptor_index
|
|
|
|
var attributeCount = reader.ReadU2();
|
|
for (var i = 0; i < attributeCount; i++)
|
|
{
|
|
reader.Skip(2); // attribute_name_index
|
|
var length = reader.ReadU4();
|
|
reader.Skip((int)length);
|
|
}
|
|
}
|
|
|
|
private static string ResolveClassName(List<ConstantPoolEntry> pool, int classIndex)
|
|
{
|
|
if (classIndex <= 0 || classIndex >= pool.Count)
|
|
return "Unknown";
|
|
|
|
var classEntry = pool[classIndex];
|
|
if (classEntry.Tag != 7)
|
|
return "Unknown";
|
|
|
|
return GetUtf8(pool, classEntry.NameIndex).Replace('/', '.');
|
|
}
|
|
|
|
private static string GetUtf8(List<ConstantPoolEntry> pool, int index)
|
|
{
|
|
if (index <= 0 || index >= pool.Count)
|
|
return string.Empty;
|
|
|
|
return pool[index].StringValue ?? string.Empty;
|
|
}
|
|
|
|
private static string ParseDescriptor(string descriptor)
|
|
{
|
|
// Convert Java method descriptor to readable signature
|
|
// e.g., (Ljava/lang/String;I)V -> (String, int) void
|
|
var sb = new StringBuilder();
|
|
var i = 0;
|
|
|
|
if (descriptor.StartsWith('('))
|
|
{
|
|
sb.Append('(');
|
|
i = 1;
|
|
var first = true;
|
|
|
|
while (i < descriptor.Length && descriptor[i] != ')')
|
|
{
|
|
if (!first) sb.Append(", ");
|
|
first = false;
|
|
|
|
var (typeName, newIndex) = ParseType(descriptor, i);
|
|
sb.Append(typeName);
|
|
i = newIndex;
|
|
}
|
|
|
|
sb.Append(')');
|
|
i++; // Skip ')'
|
|
}
|
|
|
|
if (i < descriptor.Length)
|
|
{
|
|
var (returnType, _) = ParseType(descriptor, i);
|
|
sb.Append(" -> ");
|
|
sb.Append(returnType);
|
|
}
|
|
|
|
return sb.ToString();
|
|
}
|
|
|
|
private static (string typeName, int newIndex) ParseType(string descriptor, int index)
|
|
{
|
|
if (index >= descriptor.Length)
|
|
return ("void", index);
|
|
|
|
var c = descriptor[index];
|
|
|
|
return c switch
|
|
{
|
|
'B' => ("byte", index + 1),
|
|
'C' => ("char", index + 1),
|
|
'D' => ("double", index + 1),
|
|
'F' => ("float", index + 1),
|
|
'I' => ("int", index + 1),
|
|
'J' => ("long", index + 1),
|
|
'S' => ("short", index + 1),
|
|
'Z' => ("boolean", index + 1),
|
|
'V' => ("void", index + 1),
|
|
'[' => ParseArrayType(descriptor, index),
|
|
'L' => ParseObjectType(descriptor, index),
|
|
_ => ("?", index + 1)
|
|
};
|
|
}
|
|
|
|
private static (string typeName, int newIndex) ParseArrayType(string descriptor, int index)
|
|
{
|
|
var (elementType, newIndex) = ParseType(descriptor, index + 1);
|
|
return ($"{elementType}[]", newIndex);
|
|
}
|
|
|
|
private static (string typeName, int newIndex) ParseObjectType(string descriptor, int index)
|
|
{
|
|
var semicolonIndex = descriptor.IndexOf(';', index);
|
|
if (semicolonIndex < 0)
|
|
return ("Object", index + 1);
|
|
|
|
var className = descriptor[(index + 1)..semicolonIndex];
|
|
var simpleName = className.Split('/')[^1];
|
|
return (simpleName, semicolonIndex + 1);
|
|
}
|
|
|
|
private static string ComputeHash(byte[] data)
|
|
{
|
|
if (data.Length == 0)
|
|
return "empty";
|
|
|
|
var hashBytes = SHA256.HashData(data);
|
|
return Convert.ToHexStringLower(hashBytes[..16]);
|
|
}
|
|
|
|
private static string ComputeHash(string data)
|
|
{
|
|
if (string.IsNullOrEmpty(data))
|
|
return "empty";
|
|
|
|
return ComputeHash(Encoding.UTF8.GetBytes(data));
|
|
}
|
|
|
|
private sealed class JavaClassReader(byte[] data)
|
|
{
|
|
private int _position;
|
|
|
|
public byte ReadU1() => data[_position++];
|
|
|
|
public ushort ReadU2()
|
|
{
|
|
var value = BinaryPrimitives.ReadUInt16BigEndian(data.AsSpan(_position));
|
|
_position += 2;
|
|
return value;
|
|
}
|
|
|
|
public uint ReadU4()
|
|
{
|
|
var value = BinaryPrimitives.ReadUInt32BigEndian(data.AsSpan(_position));
|
|
_position += 4;
|
|
return value;
|
|
}
|
|
|
|
public byte[] ReadBytes(int count)
|
|
{
|
|
var result = data[_position..(_position + count)];
|
|
_position += count;
|
|
return result;
|
|
}
|
|
|
|
public void Skip(int count) => _position += count;
|
|
}
|
|
|
|
private sealed class ConstantPoolEntry
|
|
{
|
|
public byte Tag { get; init; }
|
|
public string? StringValue { get; set; }
|
|
public int NameIndex { get; set; }
|
|
public int DescriptorIndex { get; set; }
|
|
public int ClassIndex { get; set; }
|
|
public int NameAndTypeIndex { get; set; }
|
|
}
|
|
|
|
private sealed record JavaClassInfo
|
|
{
|
|
public required string ClassName { get; init; }
|
|
public ushort AccessFlags { get; init; }
|
|
public required List<JavaMethodInfo> Methods { get; init; }
|
|
}
|
|
|
|
private sealed record JavaMethodInfo
|
|
{
|
|
public required string Name { get; init; }
|
|
public required string Descriptor { get; init; }
|
|
public ushort AccessFlags { get; init; }
|
|
public int CodeLength { get; init; }
|
|
public required string BodyHash { get; init; }
|
|
|
|
public bool IsPublic => (AccessFlags & 0x0001) != 0;
|
|
public bool IsProtected => (AccessFlags & 0x0004) != 0;
|
|
public bool IsSynthetic => (AccessFlags & 0x1000) != 0;
|
|
public bool IsBridge => (AccessFlags & 0x0040) != 0;
|
|
}
|
|
}
|