// ----------------------------------------------------------------------------- // JavaBytecodeFingerprinter.cs // Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-010) // Description: Java method fingerprinting using bytecode parsing. // Parses .class files from JAR archives for method extraction. // ----------------------------------------------------------------------------- using System; using System.Buffers.Binary; using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Linq; using System.Security.Cryptography; using System.Text; using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.Logging; namespace StellaOps.Scanner.VulnSurfaces.Fingerprint; /// /// Computes method fingerprints for Java packages using bytecode hashing. /// Parses .class files from extracted JAR archives. /// public sealed class JavaBytecodeFingerprinter : IMethodFingerprinter { private readonly ILogger _logger; // Java class file magic number private const uint ClassFileMagic = 0xCAFEBABE; public JavaBytecodeFingerprinter(ILogger logger) { _logger = logger ?? throw new ArgumentNullException(nameof(logger)); } /// public string Ecosystem => "maven"; /// public async Task FingerprintAsync( FingerprintRequest request, CancellationToken cancellationToken = default) { ArgumentNullException.ThrowIfNull(request); var sw = Stopwatch.StartNew(); var methods = new Dictionary(StringComparer.Ordinal); try { var classFiles = GetClassFiles(request.PackagePath); var filesProcessed = 0; foreach (var classPath in classFiles) { cancellationToken.ThrowIfCancellationRequested(); try { await ProcessClassFileAsync(classPath, request.PackagePath, methods, request, cancellationToken); filesProcessed++; } catch (Exception ex) { _logger.LogDebug(ex, "Failed to process class file {Path}", classPath); } } sw.Stop(); _logger.LogDebug( "Fingerprinted {MethodCount} methods from {FileCount} class files in {Duration}ms", methods.Count, filesProcessed, sw.ElapsedMilliseconds); return FingerprintResult.Ok(methods, sw.Elapsed, filesProcessed); } catch (Exception ex) { sw.Stop(); _logger.LogWarning(ex, "Failed to fingerprint Java package at {Path}", request.PackagePath); return FingerprintResult.Fail(ex.Message, sw.Elapsed); } } private static string[] GetClassFiles(string packagePath) { if (!Directory.Exists(packagePath)) return []; return Directory.GetFiles(packagePath, "*.class", SearchOption.AllDirectories) .Where(f => { // Skip META-INF and common non-source directories var relativePath = f.Replace(packagePath, "").TrimStart(Path.DirectorySeparatorChar); return !relativePath.StartsWith("META-INF", StringComparison.OrdinalIgnoreCase); }) .ToArray(); } private async Task ProcessClassFileAsync( string classPath, string packagePath, Dictionary methods, FingerprintRequest request, CancellationToken cancellationToken) { var bytes = await File.ReadAllBytesAsync(classPath, cancellationToken); if (bytes.Length < 10) return; // Verify magic number var magic = BinaryPrimitives.ReadUInt32BigEndian(bytes); if (magic != ClassFileMagic) { _logger.LogDebug("Invalid class file magic in {Path}", classPath); return; } try { var classInfo = ParseClassFile(bytes); var relativePath = Path.GetRelativePath(packagePath, classPath); foreach (var method in classInfo.Methods) { // Skip private methods unless requested if (!request.IncludePrivateMethods && !method.IsPublic && !method.IsProtected) continue; // Skip synthetic and bridge methods if (method.IsSynthetic || method.IsBridge) continue; var methodKey = $"{classInfo.ClassName}::{method.Name}{method.Descriptor}"; methods[methodKey] = new MethodFingerprint { MethodKey = methodKey, DeclaringType = classInfo.ClassName, Name = method.Name, Signature = ParseDescriptor(method.Descriptor), BodyHash = method.BodyHash, SignatureHash = ComputeHash(method.Descriptor), IsPublic = method.IsPublic, BodySize = method.CodeLength, SourceFile = relativePath }; } } catch (Exception ex) { _logger.LogDebug(ex, "Error parsing class file {Path}", classPath); } } private JavaClassInfo ParseClassFile(byte[] bytes) { var reader = new JavaClassReader(bytes); // Skip magic (already verified) reader.Skip(4); // Version info _ = reader.ReadU2(); // minor version _ = reader.ReadU2(); // major version // Constant pool var constantPool = ParseConstantPool(reader); // Access flags var accessFlags = reader.ReadU2(); // This class var thisClassIndex = reader.ReadU2(); var className = ResolveClassName(constantPool, thisClassIndex); // Super class _ = reader.ReadU2(); // super class index // Interfaces var interfaceCount = reader.ReadU2(); reader.Skip(interfaceCount * 2); // Fields var fieldCount = reader.ReadU2(); for (var i = 0; i < fieldCount; i++) { SkipFieldOrMethod(reader); } // Methods var methodCount = reader.ReadU2(); var methods = new List(); for (var i = 0; i < methodCount; i++) { var method = ParseMethod(reader, constantPool); methods.Add(method); } return new JavaClassInfo { ClassName = className, AccessFlags = accessFlags, Methods = methods }; } private static List ParseConstantPool(JavaClassReader reader) { var count = reader.ReadU2(); var pool = new List(count) { new() }; // Index 0 is unused for (var i = 1; i < count; i++) { var tag = reader.ReadU1(); var entry = new ConstantPoolEntry { Tag = tag }; switch (tag) { case 1: // CONSTANT_Utf8 var length = reader.ReadU2(); entry.StringValue = Encoding.UTF8.GetString(reader.ReadBytes(length)); break; case 3: // CONSTANT_Integer case 4: // CONSTANT_Float reader.Skip(4); break; case 5: // CONSTANT_Long case 6: // CONSTANT_Double reader.Skip(8); pool.Add(new ConstantPoolEntry()); // Takes two entries i++; break; case 7: // CONSTANT_Class case 8: // CONSTANT_String entry.NameIndex = reader.ReadU2(); break; case 9: // CONSTANT_Fieldref case 10: // CONSTANT_Methodref case 11: // CONSTANT_InterfaceMethodref entry.ClassIndex = reader.ReadU2(); entry.NameAndTypeIndex = reader.ReadU2(); break; case 12: // CONSTANT_NameAndType entry.NameIndex = reader.ReadU2(); entry.DescriptorIndex = reader.ReadU2(); break; case 15: // CONSTANT_MethodHandle reader.Skip(3); break; case 16: // CONSTANT_MethodType reader.Skip(2); break; case 17: // CONSTANT_Dynamic case 18: // CONSTANT_InvokeDynamic reader.Skip(4); break; case 19: // CONSTANT_Module case 20: // CONSTANT_Package reader.Skip(2); break; } pool.Add(entry); } return pool; } private static JavaMethodInfo ParseMethod(JavaClassReader reader, List constantPool) { var accessFlags = reader.ReadU2(); var nameIndex = reader.ReadU2(); var descriptorIndex = reader.ReadU2(); var name = GetUtf8(constantPool, nameIndex); var descriptor = GetUtf8(constantPool, descriptorIndex); // Attributes var attributeCount = reader.ReadU2(); var codeBytes = Array.Empty(); var codeLength = 0; for (var i = 0; i < attributeCount; i++) { var attrNameIndex = reader.ReadU2(); var attrLength = reader.ReadU4(); var attrName = GetUtf8(constantPool, attrNameIndex); if (attrName == "Code") { // max_stack (2) + max_locals (2) + code_length (4) reader.Skip(4); codeLength = (int)reader.ReadU4(); codeBytes = reader.ReadBytes(codeLength); // Skip exception table and code attributes var remainingLength = attrLength - 8 - codeLength; reader.Skip((int)remainingLength); } else { reader.Skip((int)attrLength); } } return new JavaMethodInfo { Name = name, Descriptor = descriptor, AccessFlags = accessFlags, CodeLength = codeLength, BodyHash = ComputeHash(codeBytes) }; } private static void SkipFieldOrMethod(JavaClassReader reader) { reader.Skip(6); // access_flags + name_index + descriptor_index var attributeCount = reader.ReadU2(); for (var i = 0; i < attributeCount; i++) { reader.Skip(2); // attribute_name_index var length = reader.ReadU4(); reader.Skip((int)length); } } private static string ResolveClassName(List pool, int classIndex) { if (classIndex <= 0 || classIndex >= pool.Count) return "Unknown"; var classEntry = pool[classIndex]; if (classEntry.Tag != 7) return "Unknown"; return GetUtf8(pool, classEntry.NameIndex).Replace('/', '.'); } private static string GetUtf8(List pool, int index) { if (index <= 0 || index >= pool.Count) return string.Empty; return pool[index].StringValue ?? string.Empty; } private static string ParseDescriptor(string descriptor) { // Convert Java method descriptor to readable signature // e.g., (Ljava/lang/String;I)V -> (String, int) void var sb = new StringBuilder(); var i = 0; if (descriptor.StartsWith('(')) { sb.Append('('); i = 1; var first = true; while (i < descriptor.Length && descriptor[i] != ')') { if (!first) sb.Append(", "); first = false; var (typeName, newIndex) = ParseType(descriptor, i); sb.Append(typeName); i = newIndex; } sb.Append(')'); i++; // Skip ')' } if (i < descriptor.Length) { var (returnType, _) = ParseType(descriptor, i); sb.Append(" -> "); sb.Append(returnType); } return sb.ToString(); } private static (string typeName, int newIndex) ParseType(string descriptor, int index) { if (index >= descriptor.Length) return ("void", index); var c = descriptor[index]; return c switch { 'B' => ("byte", index + 1), 'C' => ("char", index + 1), 'D' => ("double", index + 1), 'F' => ("float", index + 1), 'I' => ("int", index + 1), 'J' => ("long", index + 1), 'S' => ("short", index + 1), 'Z' => ("boolean", index + 1), 'V' => ("void", index + 1), '[' => ParseArrayType(descriptor, index), 'L' => ParseObjectType(descriptor, index), _ => ("?", index + 1) }; } private static (string typeName, int newIndex) ParseArrayType(string descriptor, int index) { var (elementType, newIndex) = ParseType(descriptor, index + 1); return ($"{elementType}[]", newIndex); } private static (string typeName, int newIndex) ParseObjectType(string descriptor, int index) { var semicolonIndex = descriptor.IndexOf(';', index); if (semicolonIndex < 0) return ("Object", index + 1); var className = descriptor[(index + 1)..semicolonIndex]; var simpleName = className.Split('/')[^1]; return (simpleName, semicolonIndex + 1); } private static string ComputeHash(byte[] data) { if (data.Length == 0) return "empty"; var hashBytes = SHA256.HashData(data); return Convert.ToHexStringLower(hashBytes[..16]); } private static string ComputeHash(string data) { if (string.IsNullOrEmpty(data)) return "empty"; return ComputeHash(Encoding.UTF8.GetBytes(data)); } private sealed class JavaClassReader(byte[] data) { private int _position; public byte ReadU1() => data[_position++]; public ushort ReadU2() { var value = BinaryPrimitives.ReadUInt16BigEndian(data.AsSpan(_position)); _position += 2; return value; } public uint ReadU4() { var value = BinaryPrimitives.ReadUInt32BigEndian(data.AsSpan(_position)); _position += 4; return value; } public byte[] ReadBytes(int count) { var result = data[_position..(_position + count)]; _position += count; return result; } public void Skip(int count) => _position += count; } private sealed class ConstantPoolEntry { public byte Tag { get; init; } public string? StringValue { get; set; } public int NameIndex { get; set; } public int DescriptorIndex { get; set; } public int ClassIndex { get; set; } public int NameAndTypeIndex { get; set; } } private sealed record JavaClassInfo { public required string ClassName { get; init; } public ushort AccessFlags { get; init; } public required List Methods { get; init; } } private sealed record JavaMethodInfo { public required string Name { get; init; } public required string Descriptor { get; init; } public ushort AccessFlags { get; init; } public int CodeLength { get; init; } public required string BodyHash { get; init; } public bool IsPublic => (AccessFlags & 0x0001) != 0; public bool IsProtected => (AccessFlags & 0x0004) != 0; public bool IsSynthetic => (AccessFlags & 0x1000) != 0; public bool IsBridge => (AccessFlags & 0x0040) != 0; } }