Add Canonical JSON serialization library with tests and documentation

- Implemented CanonJson class for deterministic JSON serialization and hashing.
- Added unit tests for CanonJson functionality, covering various scenarios including key sorting, handling of nested objects, arrays, and special characters.
- Created project files for the Canonical JSON library and its tests, including necessary package references.
- Added README.md for library usage and API reference.
- Introduced RabbitMqIntegrationFactAttribute for conditional RabbitMQ integration tests.
This commit is contained in:
master
2025-12-19 15:35:00 +02:00
parent 43882078a4
commit 951a38d561
192 changed files with 27550 additions and 2611 deletions

View File

@@ -0,0 +1,508 @@
// -----------------------------------------------------------------------------
// JavaBytecodeFingerprinter.cs
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-010)
// Description: Java method fingerprinting using bytecode parsing.
// Parses .class files from JAR archives for method extraction.
// -----------------------------------------------------------------------------
using System;
using System.Buffers.Binary;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Security.Cryptography;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
namespace StellaOps.Scanner.VulnSurfaces.Fingerprint;
/// <summary>
/// Computes method fingerprints for Java packages using bytecode hashing.
/// Parses .class files from extracted JAR archives.
/// </summary>
public sealed class JavaBytecodeFingerprinter : IMethodFingerprinter
{
private readonly ILogger<JavaBytecodeFingerprinter> _logger;
// Java class file magic number
private const uint ClassFileMagic = 0xCAFEBABE;
public JavaBytecodeFingerprinter(ILogger<JavaBytecodeFingerprinter> logger)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public string Ecosystem => "maven";
/// <inheritdoc />
public async Task<FingerprintResult> FingerprintAsync(
FingerprintRequest request,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(request);
var sw = Stopwatch.StartNew();
var methods = new Dictionary<string, MethodFingerprint>(StringComparer.Ordinal);
try
{
var classFiles = GetClassFiles(request.PackagePath);
var filesProcessed = 0;
foreach (var classPath in classFiles)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
await ProcessClassFileAsync(classPath, request.PackagePath, methods, request, cancellationToken);
filesProcessed++;
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Failed to process class file {Path}", classPath);
}
}
sw.Stop();
_logger.LogDebug(
"Fingerprinted {MethodCount} methods from {FileCount} class files in {Duration}ms",
methods.Count, filesProcessed, sw.ElapsedMilliseconds);
return FingerprintResult.Ok(methods, sw.Elapsed, filesProcessed);
}
catch (Exception ex)
{
sw.Stop();
_logger.LogWarning(ex, "Failed to fingerprint Java package at {Path}", request.PackagePath);
return FingerprintResult.Fail(ex.Message, sw.Elapsed);
}
}
private static string[] GetClassFiles(string packagePath)
{
if (!Directory.Exists(packagePath))
return [];
return Directory.GetFiles(packagePath, "*.class", SearchOption.AllDirectories)
.Where(f =>
{
// Skip META-INF and common non-source directories
var relativePath = f.Replace(packagePath, "").TrimStart(Path.DirectorySeparatorChar);
return !relativePath.StartsWith("META-INF", StringComparison.OrdinalIgnoreCase);
})
.ToArray();
}
private async Task ProcessClassFileAsync(
string classPath,
string packagePath,
Dictionary<string, MethodFingerprint> methods,
FingerprintRequest request,
CancellationToken cancellationToken)
{
var bytes = await File.ReadAllBytesAsync(classPath, cancellationToken);
if (bytes.Length < 10)
return;
// Verify magic number
var magic = BinaryPrimitives.ReadUInt32BigEndian(bytes);
if (magic != ClassFileMagic)
{
_logger.LogDebug("Invalid class file magic in {Path}", classPath);
return;
}
try
{
var classInfo = ParseClassFile(bytes);
var relativePath = Path.GetRelativePath(packagePath, classPath);
foreach (var method in classInfo.Methods)
{
// Skip private methods unless requested
if (!request.IncludePrivateMethods && !method.IsPublic && !method.IsProtected)
continue;
// Skip synthetic and bridge methods
if (method.IsSynthetic || method.IsBridge)
continue;
var methodKey = $"{classInfo.ClassName}::{method.Name}{method.Descriptor}";
methods[methodKey] = new MethodFingerprint
{
MethodKey = methodKey,
DeclaringType = classInfo.ClassName,
Name = method.Name,
Signature = ParseDescriptor(method.Descriptor),
BodyHash = method.BodyHash,
SignatureHash = ComputeHash(method.Descriptor),
IsPublic = method.IsPublic,
BodySize = method.CodeLength,
SourceFile = relativePath
};
}
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Error parsing class file {Path}", classPath);
}
}
private JavaClassInfo ParseClassFile(byte[] bytes)
{
var reader = new JavaClassReader(bytes);
// Skip magic (already verified)
reader.Skip(4);
// Version info
_ = reader.ReadU2(); // minor version
_ = reader.ReadU2(); // major version
// Constant pool
var constantPool = ParseConstantPool(reader);
// Access flags
var accessFlags = reader.ReadU2();
// This class
var thisClassIndex = reader.ReadU2();
var className = ResolveClassName(constantPool, thisClassIndex);
// Super class
_ = reader.ReadU2(); // super class index
// Interfaces
var interfaceCount = reader.ReadU2();
reader.Skip(interfaceCount * 2);
// Fields
var fieldCount = reader.ReadU2();
for (var i = 0; i < fieldCount; i++)
{
SkipFieldOrMethod(reader);
}
// Methods
var methodCount = reader.ReadU2();
var methods = new List<JavaMethodInfo>();
for (var i = 0; i < methodCount; i++)
{
var method = ParseMethod(reader, constantPool);
methods.Add(method);
}
return new JavaClassInfo
{
ClassName = className,
AccessFlags = accessFlags,
Methods = methods
};
}
private static List<ConstantPoolEntry> ParseConstantPool(JavaClassReader reader)
{
var count = reader.ReadU2();
var pool = new List<ConstantPoolEntry>(count) { new() }; // Index 0 is unused
for (var i = 1; i < count; i++)
{
var tag = reader.ReadU1();
var entry = new ConstantPoolEntry { Tag = tag };
switch (tag)
{
case 1: // CONSTANT_Utf8
var length = reader.ReadU2();
entry.StringValue = Encoding.UTF8.GetString(reader.ReadBytes(length));
break;
case 3: // CONSTANT_Integer
case 4: // CONSTANT_Float
reader.Skip(4);
break;
case 5: // CONSTANT_Long
case 6: // CONSTANT_Double
reader.Skip(8);
pool.Add(new ConstantPoolEntry()); // Takes two entries
i++;
break;
case 7: // CONSTANT_Class
case 8: // CONSTANT_String
entry.NameIndex = reader.ReadU2();
break;
case 9: // CONSTANT_Fieldref
case 10: // CONSTANT_Methodref
case 11: // CONSTANT_InterfaceMethodref
entry.ClassIndex = reader.ReadU2();
entry.NameAndTypeIndex = reader.ReadU2();
break;
case 12: // CONSTANT_NameAndType
entry.NameIndex = reader.ReadU2();
entry.DescriptorIndex = reader.ReadU2();
break;
case 15: // CONSTANT_MethodHandle
reader.Skip(3);
break;
case 16: // CONSTANT_MethodType
reader.Skip(2);
break;
case 17: // CONSTANT_Dynamic
case 18: // CONSTANT_InvokeDynamic
reader.Skip(4);
break;
case 19: // CONSTANT_Module
case 20: // CONSTANT_Package
reader.Skip(2);
break;
}
pool.Add(entry);
}
return pool;
}
private static JavaMethodInfo ParseMethod(JavaClassReader reader, List<ConstantPoolEntry> constantPool)
{
var accessFlags = reader.ReadU2();
var nameIndex = reader.ReadU2();
var descriptorIndex = reader.ReadU2();
var name = GetUtf8(constantPool, nameIndex);
var descriptor = GetUtf8(constantPool, descriptorIndex);
// Attributes
var attributeCount = reader.ReadU2();
var codeBytes = Array.Empty<byte>();
var codeLength = 0;
for (var i = 0; i < attributeCount; i++)
{
var attrNameIndex = reader.ReadU2();
var attrLength = reader.ReadU4();
var attrName = GetUtf8(constantPool, attrNameIndex);
if (attrName == "Code")
{
// max_stack (2) + max_locals (2) + code_length (4)
reader.Skip(4);
codeLength = (int)reader.ReadU4();
codeBytes = reader.ReadBytes(codeLength);
// Skip exception table and code attributes
var remainingLength = attrLength - 8 - codeLength;
reader.Skip((int)remainingLength);
}
else
{
reader.Skip((int)attrLength);
}
}
return new JavaMethodInfo
{
Name = name,
Descriptor = descriptor,
AccessFlags = accessFlags,
CodeLength = codeLength,
BodyHash = ComputeHash(codeBytes)
};
}
private static void SkipFieldOrMethod(JavaClassReader reader)
{
reader.Skip(6); // access_flags + name_index + descriptor_index
var attributeCount = reader.ReadU2();
for (var i = 0; i < attributeCount; i++)
{
reader.Skip(2); // attribute_name_index
var length = reader.ReadU4();
reader.Skip((int)length);
}
}
private static string ResolveClassName(List<ConstantPoolEntry> pool, int classIndex)
{
if (classIndex <= 0 || classIndex >= pool.Count)
return "Unknown";
var classEntry = pool[classIndex];
if (classEntry.Tag != 7)
return "Unknown";
return GetUtf8(pool, classEntry.NameIndex).Replace('/', '.');
}
private static string GetUtf8(List<ConstantPoolEntry> pool, int index)
{
if (index <= 0 || index >= pool.Count)
return string.Empty;
return pool[index].StringValue ?? string.Empty;
}
private static string ParseDescriptor(string descriptor)
{
// Convert Java method descriptor to readable signature
// e.g., (Ljava/lang/String;I)V -> (String, int) void
var sb = new StringBuilder();
var i = 0;
if (descriptor.StartsWith('('))
{
sb.Append('(');
i = 1;
var first = true;
while (i < descriptor.Length && descriptor[i] != ')')
{
if (!first) sb.Append(", ");
first = false;
var (typeName, newIndex) = ParseType(descriptor, i);
sb.Append(typeName);
i = newIndex;
}
sb.Append(')');
i++; // Skip ')'
}
if (i < descriptor.Length)
{
var (returnType, _) = ParseType(descriptor, i);
sb.Append(" -> ");
sb.Append(returnType);
}
return sb.ToString();
}
private static (string typeName, int newIndex) ParseType(string descriptor, int index)
{
if (index >= descriptor.Length)
return ("void", index);
var c = descriptor[index];
return c switch
{
'B' => ("byte", index + 1),
'C' => ("char", index + 1),
'D' => ("double", index + 1),
'F' => ("float", index + 1),
'I' => ("int", index + 1),
'J' => ("long", index + 1),
'S' => ("short", index + 1),
'Z' => ("boolean", index + 1),
'V' => ("void", index + 1),
'[' => ParseArrayType(descriptor, index),
'L' => ParseObjectType(descriptor, index),
_ => ("?", index + 1)
};
}
private static (string typeName, int newIndex) ParseArrayType(string descriptor, int index)
{
var (elementType, newIndex) = ParseType(descriptor, index + 1);
return ($"{elementType}[]", newIndex);
}
private static (string typeName, int newIndex) ParseObjectType(string descriptor, int index)
{
var semicolonIndex = descriptor.IndexOf(';', index);
if (semicolonIndex < 0)
return ("Object", index + 1);
var className = descriptor[(index + 1)..semicolonIndex];
var simpleName = className.Split('/')[^1];
return (simpleName, semicolonIndex + 1);
}
private static string ComputeHash(byte[] data)
{
if (data.Length == 0)
return "empty";
var hashBytes = SHA256.HashData(data);
return Convert.ToHexStringLower(hashBytes[..16]);
}
private static string ComputeHash(string data)
{
if (string.IsNullOrEmpty(data))
return "empty";
return ComputeHash(Encoding.UTF8.GetBytes(data));
}
private sealed class JavaClassReader(byte[] data)
{
private int _position;
public byte ReadU1() => data[_position++];
public ushort ReadU2()
{
var value = BinaryPrimitives.ReadUInt16BigEndian(data.AsSpan(_position));
_position += 2;
return value;
}
public uint ReadU4()
{
var value = BinaryPrimitives.ReadUInt32BigEndian(data.AsSpan(_position));
_position += 4;
return value;
}
public byte[] ReadBytes(int count)
{
var result = data[_position..(_position + count)];
_position += count;
return result;
}
public void Skip(int count) => _position += count;
}
private sealed class ConstantPoolEntry
{
public byte Tag { get; init; }
public string? StringValue { get; set; }
public int NameIndex { get; set; }
public int DescriptorIndex { get; set; }
public int ClassIndex { get; set; }
public int NameAndTypeIndex { get; set; }
}
private sealed record JavaClassInfo
{
public required string ClassName { get; init; }
public ushort AccessFlags { get; init; }
public required List<JavaMethodInfo> Methods { get; init; }
}
private sealed record JavaMethodInfo
{
public required string Name { get; init; }
public required string Descriptor { get; init; }
public ushort AccessFlags { get; init; }
public int CodeLength { get; init; }
public required string BodyHash { get; init; }
public bool IsPublic => (AccessFlags & 0x0001) != 0;
public bool IsProtected => (AccessFlags & 0x0004) != 0;
public bool IsSynthetic => (AccessFlags & 0x1000) != 0;
public bool IsBridge => (AccessFlags & 0x0040) != 0;
}
}