Add Canonical JSON serialization library with tests and documentation
- Implemented CanonJson class for deterministic JSON serialization and hashing. - Added unit tests for CanonJson functionality, covering various scenarios including key sorting, handling of nested objects, arrays, and special characters. - Created project files for the Canonical JSON library and its tests, including necessary package references. - Added README.md for library usage and API reference. - Introduced RabbitMqIntegrationFactAttribute for conditional RabbitMQ integration tests.
This commit is contained in:
@@ -0,0 +1,531 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// JavaInternalGraphBuilder.cs
|
||||
// Sprint: SPRINT_3700_0003_0001_trigger_extraction (TRIG-004)
|
||||
// Description: Java internal call graph builder using bytecode analysis.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System;
|
||||
using System.Buffers.Binary;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Scanner.VulnSurfaces.Models;
|
||||
|
||||
namespace StellaOps.Scanner.VulnSurfaces.CallGraph;
|
||||
|
||||
/// <summary>
|
||||
/// Internal call graph builder for Java packages using bytecode analysis.
|
||||
/// Parses .class files from JAR archives.
|
||||
/// </summary>
|
||||
public sealed class JavaInternalGraphBuilder : IInternalCallGraphBuilder
|
||||
{
|
||||
private readonly ILogger<JavaInternalGraphBuilder> _logger;
|
||||
private const uint ClassFileMagic = 0xCAFEBABE;
|
||||
|
||||
public JavaInternalGraphBuilder(ILogger<JavaInternalGraphBuilder> logger)
|
||||
{
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Ecosystem => "maven";
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanHandle(string packagePath)
|
||||
{
|
||||
if (string.IsNullOrEmpty(packagePath))
|
||||
return false;
|
||||
|
||||
if (packagePath.EndsWith(".jar", StringComparison.OrdinalIgnoreCase))
|
||||
return true;
|
||||
|
||||
if (Directory.Exists(packagePath))
|
||||
{
|
||||
return Directory.EnumerateFiles(packagePath, "*.class", SearchOption.AllDirectories).Any();
|
||||
}
|
||||
|
||||
return packagePath.EndsWith(".class", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<InternalCallGraphBuildResult> BuildAsync(
|
||||
InternalCallGraphBuildRequest request,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
var sw = Stopwatch.StartNew();
|
||||
var graph = new InternalCallGraph
|
||||
{
|
||||
PackageId = request.PackageId,
|
||||
Version = request.Version
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
var classFiles = GetClassFiles(request.PackagePath);
|
||||
var filesProcessed = 0;
|
||||
|
||||
// First pass: collect all classes and methods
|
||||
var packageClasses = new HashSet<string>(StringComparer.Ordinal);
|
||||
var allMethods = new Dictionary<string, MethodInfo>(StringComparer.Ordinal);
|
||||
|
||||
foreach (var classPath in classFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
try
|
||||
{
|
||||
var bytes = await File.ReadAllBytesAsync(classPath, cancellationToken);
|
||||
var classInfo = ParseClassFile(bytes);
|
||||
if (classInfo is not null)
|
||||
{
|
||||
packageClasses.Add(classInfo.ClassName);
|
||||
foreach (var method in classInfo.Methods)
|
||||
{
|
||||
var key = $"{classInfo.ClassName}::{method.Name}{method.Descriptor}";
|
||||
allMethods[key] = method with { DeclaringClass = classInfo.ClassName };
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "Failed to parse class file {Path}", classPath);
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass: analyze method bodies for internal calls
|
||||
foreach (var classPath in classFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
try
|
||||
{
|
||||
var bytes = await File.ReadAllBytesAsync(classPath, cancellationToken);
|
||||
var classInfo = ParseClassFileWithCalls(bytes, packageClasses);
|
||||
if (classInfo is not null)
|
||||
{
|
||||
foreach (var method in classInfo.Methods)
|
||||
{
|
||||
var callerKey = $"{classInfo.ClassName}::{method.Name}{method.Descriptor}";
|
||||
|
||||
// Skip private methods unless requested
|
||||
if (!request.IncludePrivateMethods && !method.IsPublic && !method.IsProtected)
|
||||
continue;
|
||||
|
||||
graph.AddMethod(new InternalMethodRef
|
||||
{
|
||||
MethodKey = callerKey,
|
||||
Name = method.Name,
|
||||
DeclaringType = classInfo.ClassName,
|
||||
IsPublic = method.IsPublic
|
||||
});
|
||||
|
||||
// Add edges for internal calls
|
||||
foreach (var call in method.InternalCalls)
|
||||
{
|
||||
var calleeKey = $"{call.TargetClass}::{call.MethodName}{call.Descriptor}";
|
||||
if (allMethods.ContainsKey(calleeKey))
|
||||
{
|
||||
graph.AddEdge(new InternalCallEdge { Caller = callerKey, Callee = calleeKey });
|
||||
}
|
||||
}
|
||||
}
|
||||
filesProcessed++;
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "Failed to analyze calls in {Path}", classPath);
|
||||
}
|
||||
}
|
||||
|
||||
sw.Stop();
|
||||
_logger.LogDebug(
|
||||
"Built internal call graph for Maven {PackageId} v{Version}: {Methods} methods, {Edges} edges in {Duration}ms",
|
||||
request.PackageId, request.Version, graph.MethodCount, graph.EdgeCount, sw.ElapsedMilliseconds);
|
||||
|
||||
return InternalCallGraphBuildResult.Ok(graph, sw.Elapsed, filesProcessed);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
sw.Stop();
|
||||
_logger.LogWarning(ex, "Failed to build internal call graph for Maven {PackageId}", request.PackageId);
|
||||
return InternalCallGraphBuildResult.Fail(ex.Message, sw.Elapsed);
|
||||
}
|
||||
}
|
||||
|
||||
private static string[] GetClassFiles(string packagePath)
|
||||
{
|
||||
if (File.Exists(packagePath) && packagePath.EndsWith(".class", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return [packagePath];
|
||||
}
|
||||
|
||||
if (Directory.Exists(packagePath))
|
||||
{
|
||||
return Directory.GetFiles(packagePath, "*.class", SearchOption.AllDirectories)
|
||||
.Where(f => !f.Contains("META-INF"))
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
private ClassInfo? ParseClassFile(byte[] bytes)
|
||||
{
|
||||
if (bytes.Length < 10 || BinaryPrimitives.ReadUInt32BigEndian(bytes) != ClassFileMagic)
|
||||
return null;
|
||||
|
||||
var reader = new ByteReader(bytes);
|
||||
reader.Skip(4); // magic
|
||||
reader.Skip(4); // version
|
||||
|
||||
var constantPool = ParseConstantPool(reader);
|
||||
var accessFlags = reader.ReadU2();
|
||||
var thisClassIndex = reader.ReadU2();
|
||||
var className = ResolveClassName(constantPool, thisClassIndex);
|
||||
|
||||
reader.Skip(2); // super class
|
||||
var interfaceCount = reader.ReadU2();
|
||||
reader.Skip(interfaceCount * 2);
|
||||
|
||||
// Skip fields
|
||||
var fieldCount = reader.ReadU2();
|
||||
for (var i = 0; i < fieldCount; i++)
|
||||
SkipFieldOrMethod(reader);
|
||||
|
||||
// Parse methods
|
||||
var methodCount = reader.ReadU2();
|
||||
var methods = new List<MethodInfo>();
|
||||
for (var i = 0; i < methodCount; i++)
|
||||
{
|
||||
var method = ParseMethod(reader, constantPool);
|
||||
if (method is not null)
|
||||
methods.Add(method);
|
||||
}
|
||||
|
||||
return new ClassInfo
|
||||
{
|
||||
ClassName = className,
|
||||
AccessFlags = accessFlags,
|
||||
Methods = methods
|
||||
};
|
||||
}
|
||||
|
||||
private ClassInfo? ParseClassFileWithCalls(byte[] bytes, HashSet<string> packageClasses)
|
||||
{
|
||||
if (bytes.Length < 10 || BinaryPrimitives.ReadUInt32BigEndian(bytes) != ClassFileMagic)
|
||||
return null;
|
||||
|
||||
var reader = new ByteReader(bytes);
|
||||
reader.Skip(4); // magic
|
||||
reader.Skip(4); // version
|
||||
|
||||
var constantPool = ParseConstantPool(reader);
|
||||
var accessFlags = reader.ReadU2();
|
||||
var thisClassIndex = reader.ReadU2();
|
||||
var className = ResolveClassName(constantPool, thisClassIndex);
|
||||
|
||||
reader.Skip(2); // super class
|
||||
var interfaceCount = reader.ReadU2();
|
||||
reader.Skip(interfaceCount * 2);
|
||||
|
||||
// Skip fields
|
||||
var fieldCount = reader.ReadU2();
|
||||
for (var i = 0; i < fieldCount; i++)
|
||||
SkipFieldOrMethod(reader);
|
||||
|
||||
// Parse methods with call analysis
|
||||
var methodCount = reader.ReadU2();
|
||||
var methods = new List<MethodInfo>();
|
||||
for (var i = 0; i < methodCount; i++)
|
||||
{
|
||||
var method = ParseMethodWithCalls(reader, constantPool, packageClasses);
|
||||
if (method is not null)
|
||||
methods.Add(method);
|
||||
}
|
||||
|
||||
return new ClassInfo
|
||||
{
|
||||
ClassName = className,
|
||||
AccessFlags = accessFlags,
|
||||
Methods = methods
|
||||
};
|
||||
}
|
||||
|
||||
private static List<ConstantPoolEntry> ParseConstantPool(ByteReader reader)
|
||||
{
|
||||
var count = reader.ReadU2();
|
||||
var pool = new List<ConstantPoolEntry>(count) { new() };
|
||||
|
||||
for (var i = 1; i < count; i++)
|
||||
{
|
||||
var tag = reader.ReadU1();
|
||||
var entry = new ConstantPoolEntry { Tag = tag };
|
||||
|
||||
switch (tag)
|
||||
{
|
||||
case 1: // CONSTANT_Utf8
|
||||
var length = reader.ReadU2();
|
||||
entry.StringValue = Encoding.UTF8.GetString(reader.ReadBytes(length));
|
||||
break;
|
||||
case 3: case 4: reader.Skip(4); break;
|
||||
case 5: case 6: reader.Skip(8); pool.Add(new()); i++; break;
|
||||
case 7: case 8: entry.NameIndex = reader.ReadU2(); break;
|
||||
case 9: case 10: case 11:
|
||||
entry.ClassIndex = reader.ReadU2();
|
||||
entry.NameAndTypeIndex = reader.ReadU2();
|
||||
break;
|
||||
case 12:
|
||||
entry.NameIndex = reader.ReadU2();
|
||||
entry.DescriptorIndex = reader.ReadU2();
|
||||
break;
|
||||
case 15: reader.Skip(3); break;
|
||||
case 16: reader.Skip(2); break;
|
||||
case 17: case 18: reader.Skip(4); break;
|
||||
case 19: case 20: reader.Skip(2); break;
|
||||
}
|
||||
|
||||
pool.Add(entry);
|
||||
}
|
||||
|
||||
return pool;
|
||||
}
|
||||
|
||||
private static MethodInfo? ParseMethod(ByteReader reader, List<ConstantPoolEntry> pool)
|
||||
{
|
||||
var accessFlags = reader.ReadU2();
|
||||
var nameIndex = reader.ReadU2();
|
||||
var descriptorIndex = reader.ReadU2();
|
||||
|
||||
var name = GetUtf8(pool, nameIndex);
|
||||
var descriptor = GetUtf8(pool, descriptorIndex);
|
||||
|
||||
var attrCount = reader.ReadU2();
|
||||
for (var i = 0; i < attrCount; i++)
|
||||
{
|
||||
reader.Skip(2);
|
||||
var attrLength = reader.ReadU4();
|
||||
reader.Skip((int)attrLength);
|
||||
}
|
||||
|
||||
return new MethodInfo
|
||||
{
|
||||
Name = name,
|
||||
Descriptor = descriptor,
|
||||
AccessFlags = accessFlags,
|
||||
InternalCalls = []
|
||||
};
|
||||
}
|
||||
|
||||
private static MethodInfo? ParseMethodWithCalls(
|
||||
ByteReader reader,
|
||||
List<ConstantPoolEntry> pool,
|
||||
HashSet<string> packageClasses)
|
||||
{
|
||||
var accessFlags = reader.ReadU2();
|
||||
var nameIndex = reader.ReadU2();
|
||||
var descriptorIndex = reader.ReadU2();
|
||||
|
||||
var name = GetUtf8(pool, nameIndex);
|
||||
var descriptor = GetUtf8(pool, descriptorIndex);
|
||||
var calls = new List<CallInfo>();
|
||||
|
||||
var attrCount = reader.ReadU2();
|
||||
for (var i = 0; i < attrCount; i++)
|
||||
{
|
||||
var attrNameIndex = reader.ReadU2();
|
||||
var attrLength = reader.ReadU4();
|
||||
var attrName = GetUtf8(pool, attrNameIndex);
|
||||
|
||||
if (attrName == "Code")
|
||||
{
|
||||
reader.Skip(4); // max_stack, max_locals
|
||||
var codeLength = reader.ReadU4();
|
||||
var code = reader.ReadBytes((int)codeLength);
|
||||
|
||||
// Analyze bytecode for method calls
|
||||
AnalyzeBytecode(code, pool, packageClasses, calls);
|
||||
|
||||
// Skip exception table and code attributes
|
||||
var exceptionTableLength = reader.ReadU2();
|
||||
reader.Skip(exceptionTableLength * 8);
|
||||
|
||||
var codeAttrCount = reader.ReadU2();
|
||||
for (var j = 0; j < codeAttrCount; j++)
|
||||
{
|
||||
reader.Skip(2);
|
||||
var codeAttrLength = reader.ReadU4();
|
||||
reader.Skip((int)codeAttrLength);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
reader.Skip((int)attrLength);
|
||||
}
|
||||
}
|
||||
|
||||
return new MethodInfo
|
||||
{
|
||||
Name = name,
|
||||
Descriptor = descriptor,
|
||||
AccessFlags = accessFlags,
|
||||
InternalCalls = calls
|
||||
};
|
||||
}
|
||||
|
||||
private static void AnalyzeBytecode(
|
||||
byte[] code,
|
||||
List<ConstantPoolEntry> pool,
|
||||
HashSet<string> packageClasses,
|
||||
List<CallInfo> calls)
|
||||
{
|
||||
var i = 0;
|
||||
while (i < code.Length)
|
||||
{
|
||||
var opcode = code[i];
|
||||
|
||||
// invokevirtual, invokespecial, invokestatic, invokeinterface
|
||||
if (opcode is 0xB6 or 0xB7 or 0xB8 or 0xB9)
|
||||
{
|
||||
if (i + 2 < code.Length)
|
||||
{
|
||||
var methodRefIndex = (code[i + 1] << 8) | code[i + 2];
|
||||
var callInfo = ResolveMethodRef(pool, methodRefIndex);
|
||||
if (callInfo is not null && packageClasses.Contains(callInfo.TargetClass))
|
||||
{
|
||||
calls.Add(callInfo);
|
||||
}
|
||||
}
|
||||
|
||||
i += opcode == 0xB9 ? 5 : 3; // invokeinterface has 5 bytes
|
||||
}
|
||||
else
|
||||
{
|
||||
i += GetOpcodeLength(opcode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static CallInfo? ResolveMethodRef(List<ConstantPoolEntry> pool, int index)
|
||||
{
|
||||
if (index <= 0 || index >= pool.Count)
|
||||
return null;
|
||||
|
||||
var methodRef = pool[index];
|
||||
if (methodRef.Tag is not (10 or 11)) // Methodref or InterfaceMethodref
|
||||
return null;
|
||||
|
||||
var classEntry = pool.ElementAtOrDefault(methodRef.ClassIndex);
|
||||
var nameAndType = pool.ElementAtOrDefault(methodRef.NameAndTypeIndex);
|
||||
|
||||
if (classEntry?.Tag != 7 || nameAndType?.Tag != 12)
|
||||
return null;
|
||||
|
||||
var className = GetUtf8(pool, classEntry.NameIndex).Replace('/', '.');
|
||||
var methodName = GetUtf8(pool, nameAndType.NameIndex);
|
||||
var descriptor = GetUtf8(pool, nameAndType.DescriptorIndex);
|
||||
|
||||
return new CallInfo
|
||||
{
|
||||
TargetClass = className,
|
||||
MethodName = methodName,
|
||||
Descriptor = descriptor
|
||||
};
|
||||
}
|
||||
|
||||
private static void SkipFieldOrMethod(ByteReader reader)
|
||||
{
|
||||
reader.Skip(6);
|
||||
var attrCount = reader.ReadU2();
|
||||
for (var i = 0; i < attrCount; i++)
|
||||
{
|
||||
reader.Skip(2);
|
||||
var length = reader.ReadU4();
|
||||
reader.Skip((int)length);
|
||||
}
|
||||
}
|
||||
|
||||
private static string ResolveClassName(List<ConstantPoolEntry> pool, int classIndex)
|
||||
{
|
||||
if (classIndex <= 0 || classIndex >= pool.Count || pool[classIndex].Tag != 7)
|
||||
return "Unknown";
|
||||
return GetUtf8(pool, pool[classIndex].NameIndex).Replace('/', '.');
|
||||
}
|
||||
|
||||
private static string GetUtf8(List<ConstantPoolEntry> pool, int index)
|
||||
{
|
||||
if (index <= 0 || index >= pool.Count)
|
||||
return string.Empty;
|
||||
return pool[index].StringValue ?? string.Empty;
|
||||
}
|
||||
|
||||
private static int GetOpcodeLength(byte opcode) => opcode switch
|
||||
{
|
||||
// Wide instructions and tableswitch/lookupswitch are variable - simplified handling
|
||||
0xC4 => 4, // wide (simplified)
|
||||
0xAA or 0xAB => 4, // tableswitch/lookupswitch (simplified)
|
||||
_ when opcode is 0x10 or 0x12 or 0x15 or 0x16 or 0x17 or 0x18 or 0x19
|
||||
or 0x36 or 0x37 or 0x38 or 0x39 or 0x3A or 0xA9 or 0xBC => 2,
|
||||
_ when opcode is 0x11 or 0x13 or 0x14 or 0x84 or 0x99 or 0x9A or 0x9B
|
||||
or 0x9C or 0x9D or 0x9E or 0x9F or 0xA0 or 0xA1 or 0xA2 or 0xA3
|
||||
or 0xA4 or 0xA5 or 0xA6 or 0xA7 or 0xA8 or 0xB2 or 0xB3 or 0xB4
|
||||
or 0xB5 or 0xB6 or 0xB7 or 0xB8 or 0xBB or 0xBD or 0xC0 or 0xC1
|
||||
or 0xC6 or 0xC7 => 3,
|
||||
0xC8 or 0xC9 => 5, // goto_w, jsr_w
|
||||
0xB9 or 0xBA => 5, // invokeinterface, invokedynamic
|
||||
0xC5 => 4, // multianewarray
|
||||
_ => 1
|
||||
};
|
||||
|
||||
private sealed class ByteReader(byte[] data)
|
||||
{
|
||||
private int _pos;
|
||||
public byte ReadU1() => data[_pos++];
|
||||
public ushort ReadU2() { var v = BinaryPrimitives.ReadUInt16BigEndian(data.AsSpan(_pos)); _pos += 2; return v; }
|
||||
public uint ReadU4() { var v = BinaryPrimitives.ReadUInt32BigEndian(data.AsSpan(_pos)); _pos += 4; return v; }
|
||||
public byte[] ReadBytes(int n) { var r = data[_pos..(_pos + n)]; _pos += n; return r; }
|
||||
public void Skip(int n) => _pos += n;
|
||||
}
|
||||
|
||||
private sealed class ConstantPoolEntry
|
||||
{
|
||||
public byte Tag { get; init; }
|
||||
public string? StringValue { get; set; }
|
||||
public int NameIndex { get; set; }
|
||||
public int DescriptorIndex { get; set; }
|
||||
public int ClassIndex { get; set; }
|
||||
public int NameAndTypeIndex { get; set; }
|
||||
}
|
||||
|
||||
private sealed record ClassInfo
|
||||
{
|
||||
public required string ClassName { get; init; }
|
||||
public ushort AccessFlags { get; init; }
|
||||
public required List<MethodInfo> Methods { get; init; }
|
||||
}
|
||||
|
||||
private sealed record MethodInfo
|
||||
{
|
||||
public string DeclaringClass { get; init; } = string.Empty;
|
||||
public required string Name { get; init; }
|
||||
public required string Descriptor { get; init; }
|
||||
public ushort AccessFlags { get; init; }
|
||||
public required List<CallInfo> InternalCalls { get; init; }
|
||||
public bool IsPublic => (AccessFlags & 0x0001) != 0;
|
||||
public bool IsProtected => (AccessFlags & 0x0004) != 0;
|
||||
}
|
||||
|
||||
private sealed record CallInfo
|
||||
{
|
||||
public required string TargetClass { get; init; }
|
||||
public required string MethodName { get; init; }
|
||||
public required string Descriptor { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,420 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// JavaScriptInternalGraphBuilder.cs
|
||||
// Sprint: SPRINT_3700_0003_0001_trigger_extraction (TRIG-003)
|
||||
// Description: JavaScript/Node.js internal call graph builder using AST parsing.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Scanner.VulnSurfaces.Models;
|
||||
|
||||
namespace StellaOps.Scanner.VulnSurfaces.CallGraph;
|
||||
|
||||
/// <summary>
|
||||
/// Internal call graph builder for JavaScript/Node.js packages using AST-based parsing.
|
||||
/// </summary>
|
||||
public sealed partial class JavaScriptInternalGraphBuilder : IInternalCallGraphBuilder
|
||||
{
|
||||
private readonly ILogger<JavaScriptInternalGraphBuilder> _logger;
|
||||
|
||||
// Regex patterns for JavaScript analysis
|
||||
[GeneratedRegex(@"(export\s+)?(async\s+)?function\s+(\w+)\s*\(", RegexOptions.Compiled)]
|
||||
private static partial Regex FunctionDeclarationRegex();
|
||||
|
||||
[GeneratedRegex(@"(const|let|var)\s+(\w+)\s*=\s*(async\s+)?\(", RegexOptions.Compiled)]
|
||||
private static partial Regex ArrowFunctionRegex();
|
||||
|
||||
[GeneratedRegex(@"class\s+(\w+)", RegexOptions.Compiled)]
|
||||
private static partial Regex ClassDeclarationRegex();
|
||||
|
||||
[GeneratedRegex(@"(async\s+)?(\w+)\s*\([^)]*\)\s*\{", RegexOptions.Compiled)]
|
||||
private static partial Regex MethodDeclarationRegex();
|
||||
|
||||
[GeneratedRegex(@"(?:this\.)?(\w+)\s*\(", RegexOptions.Compiled)]
|
||||
private static partial Regex FunctionCallRegex();
|
||||
|
||||
[GeneratedRegex(@"module\.exports\s*=\s*\{?([^}]+)", RegexOptions.Compiled)]
|
||||
private static partial Regex ModuleExportsRegex();
|
||||
|
||||
[GeneratedRegex(@"exports\.(\w+)", RegexOptions.Compiled)]
|
||||
private static partial Regex NamedExportRegex();
|
||||
|
||||
public JavaScriptInternalGraphBuilder(ILogger<JavaScriptInternalGraphBuilder> logger)
|
||||
{
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Ecosystem => "npm";
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanHandle(string packagePath)
|
||||
{
|
||||
if (string.IsNullOrEmpty(packagePath))
|
||||
return false;
|
||||
|
||||
if (packagePath.EndsWith(".tgz", StringComparison.OrdinalIgnoreCase))
|
||||
return true;
|
||||
|
||||
if (Directory.Exists(packagePath))
|
||||
{
|
||||
// Check for package.json or .js files
|
||||
return File.Exists(Path.Combine(packagePath, "package.json")) ||
|
||||
Directory.EnumerateFiles(packagePath, "*.js", SearchOption.AllDirectories).Any();
|
||||
}
|
||||
|
||||
return packagePath.EndsWith(".js", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<InternalCallGraphBuildResult> BuildAsync(
|
||||
InternalCallGraphBuildRequest request,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
var sw = Stopwatch.StartNew();
|
||||
var graph = new InternalCallGraph
|
||||
{
|
||||
PackageId = request.PackageId,
|
||||
Version = request.Version
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
var jsFiles = GetJavaScriptFiles(request.PackagePath);
|
||||
var filesProcessed = 0;
|
||||
var allFunctions = new Dictionary<string, FunctionInfo>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
// First pass: collect all function declarations
|
||||
foreach (var jsPath in jsFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(jsPath, cancellationToken);
|
||||
var moduleName = GetModuleName(jsPath, request.PackagePath);
|
||||
CollectFunctions(content, moduleName, allFunctions, request.IncludePrivateMethods);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "Failed to collect functions from {Path}", jsPath);
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass: analyze call relationships
|
||||
foreach (var jsPath in jsFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(jsPath, cancellationToken);
|
||||
var moduleName = GetModuleName(jsPath, request.PackagePath);
|
||||
AnalyzeCalls(content, moduleName, allFunctions, graph);
|
||||
filesProcessed++;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "Failed to analyze calls in {Path}", jsPath);
|
||||
}
|
||||
}
|
||||
|
||||
sw.Stop();
|
||||
_logger.LogDebug(
|
||||
"Built internal call graph for npm {PackageId} v{Version}: {Methods} methods, {Edges} edges in {Duration}ms",
|
||||
request.PackageId, request.Version, graph.MethodCount, graph.EdgeCount, sw.ElapsedMilliseconds);
|
||||
|
||||
return InternalCallGraphBuildResult.Ok(graph, sw.Elapsed, filesProcessed);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
sw.Stop();
|
||||
_logger.LogWarning(ex, "Failed to build internal call graph for npm {PackageId}", request.PackageId);
|
||||
return InternalCallGraphBuildResult.Fail(ex.Message, sw.Elapsed);
|
||||
}
|
||||
}
|
||||
|
||||
private static string[] GetJavaScriptFiles(string packagePath)
|
||||
{
|
||||
if (File.Exists(packagePath) && packagePath.EndsWith(".js", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return [packagePath];
|
||||
}
|
||||
|
||||
if (Directory.Exists(packagePath))
|
||||
{
|
||||
return Directory.GetFiles(packagePath, "*.js", SearchOption.AllDirectories)
|
||||
.Where(f =>
|
||||
{
|
||||
var name = Path.GetFileName(f);
|
||||
return !name.Contains(".min.") &&
|
||||
!name.EndsWith(".spec.js") &&
|
||||
!name.EndsWith(".test.js") &&
|
||||
!f.Contains("node_modules") &&
|
||||
!f.Contains("__tests__");
|
||||
})
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
private static string GetModuleName(string jsPath, string basePath)
|
||||
{
|
||||
var relativePath = Path.GetRelativePath(basePath, jsPath);
|
||||
var withoutExt = Path.ChangeExtension(relativePath, null);
|
||||
return withoutExt
|
||||
.Replace(Path.DirectorySeparatorChar, '.')
|
||||
.Replace(Path.AltDirectorySeparatorChar, '.');
|
||||
}
|
||||
|
||||
private void CollectFunctions(
|
||||
string content,
|
||||
string moduleName,
|
||||
Dictionary<string, FunctionInfo> functions,
|
||||
bool includePrivate)
|
||||
{
|
||||
// Collect function declarations
|
||||
foreach (Match match in FunctionDeclarationRegex().Matches(content))
|
||||
{
|
||||
var isExported = !string.IsNullOrEmpty(match.Groups[1].Value);
|
||||
var functionName = match.Groups[3].Value;
|
||||
|
||||
if (!includePrivate && !isExported)
|
||||
continue;
|
||||
|
||||
var key = $"{moduleName}::{functionName}";
|
||||
functions[key] = new FunctionInfo
|
||||
{
|
||||
Name = functionName,
|
||||
Module = moduleName,
|
||||
IsPublic = isExported,
|
||||
StartIndex = match.Index,
|
||||
EndIndex = FindFunctionEnd(content, match.Index)
|
||||
};
|
||||
}
|
||||
|
||||
// Collect arrow functions
|
||||
foreach (Match match in ArrowFunctionRegex().Matches(content))
|
||||
{
|
||||
var functionName = match.Groups[2].Value;
|
||||
var lineStart = content.LastIndexOf('\n', match.Index) + 1;
|
||||
var prefix = content[lineStart..match.Index];
|
||||
var isExported = prefix.Contains("export");
|
||||
|
||||
if (!includePrivate && !isExported)
|
||||
continue;
|
||||
|
||||
var key = $"{moduleName}::{functionName}";
|
||||
if (!functions.ContainsKey(key))
|
||||
{
|
||||
functions[key] = new FunctionInfo
|
||||
{
|
||||
Name = functionName,
|
||||
Module = moduleName,
|
||||
IsPublic = isExported,
|
||||
StartIndex = match.Index,
|
||||
EndIndex = FindArrowFunctionEnd(content, match.Index)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Collect class methods
|
||||
foreach (Match classMatch in ClassDeclarationRegex().Matches(content))
|
||||
{
|
||||
var className = classMatch.Groups[1].Value;
|
||||
var classBodyStart = content.IndexOf('{', classMatch.Index);
|
||||
if (classBodyStart < 0) continue;
|
||||
|
||||
var classBody = ExtractBracedBlock(content, classBodyStart);
|
||||
if (string.IsNullOrEmpty(classBody)) continue;
|
||||
|
||||
foreach (Match methodMatch in MethodDeclarationRegex().Matches(classBody))
|
||||
{
|
||||
var methodName = methodMatch.Groups[2].Value;
|
||||
if (methodName == "constructor") continue;
|
||||
|
||||
var key = $"{moduleName}.{className}::{methodName}";
|
||||
functions[key] = new FunctionInfo
|
||||
{
|
||||
Name = methodName,
|
||||
Module = $"{moduleName}.{className}",
|
||||
IsPublic = true, // Class methods are typically public
|
||||
StartIndex = classMatch.Index + methodMatch.Index,
|
||||
EndIndex = classMatch.Index + FindFunctionEnd(classBody, methodMatch.Index)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Mark exported functions from module.exports
|
||||
var exportsMatch = ModuleExportsRegex().Match(content);
|
||||
if (exportsMatch.Success)
|
||||
{
|
||||
var exports = exportsMatch.Groups[1].Value;
|
||||
foreach (var func in functions.Values)
|
||||
{
|
||||
if (exports.Contains(func.Name, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
func.IsPublic = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach (Match exportMatch in NamedExportRegex().Matches(content))
|
||||
{
|
||||
var exportedName = exportMatch.Groups[1].Value;
|
||||
var key = $"{moduleName}::{exportedName}";
|
||||
if (functions.TryGetValue(key, out var func))
|
||||
{
|
||||
func.IsPublic = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void AnalyzeCalls(
|
||||
string content,
|
||||
string moduleName,
|
||||
Dictionary<string, FunctionInfo> allFunctions,
|
||||
InternalCallGraph graph)
|
||||
{
|
||||
var moduleFunctions = allFunctions
|
||||
.Where(kvp => kvp.Value.Module == moduleName || kvp.Value.Module.StartsWith($"{moduleName}."))
|
||||
.ToList();
|
||||
|
||||
foreach (var (callerKey, callerInfo) in moduleFunctions)
|
||||
{
|
||||
// Add node
|
||||
graph.AddMethod(new InternalMethodRef
|
||||
{
|
||||
MethodKey = callerKey,
|
||||
Name = callerInfo.Name,
|
||||
DeclaringType = callerInfo.Module,
|
||||
IsPublic = callerInfo.IsPublic
|
||||
});
|
||||
|
||||
// Extract function body
|
||||
var bodyStart = callerInfo.StartIndex;
|
||||
var bodyEnd = callerInfo.EndIndex;
|
||||
if (bodyEnd <= bodyStart || bodyEnd > content.Length)
|
||||
continue;
|
||||
|
||||
var body = content[bodyStart..Math.Min(bodyEnd, content.Length)];
|
||||
|
||||
// Find calls in body
|
||||
foreach (Match callMatch in FunctionCallRegex().Matches(body))
|
||||
{
|
||||
var calledName = callMatch.Groups[1].Value;
|
||||
|
||||
// Skip common built-ins and keywords
|
||||
if (IsBuiltIn(calledName))
|
||||
continue;
|
||||
|
||||
// Try to resolve callee
|
||||
var calleeKey = ResolveFunctionKey(calledName, moduleName, allFunctions);
|
||||
if (calleeKey is not null && calleeKey != callerKey)
|
||||
{
|
||||
graph.AddEdge(new InternalCallEdge { Caller = callerKey, Callee = calleeKey });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static string? ResolveFunctionKey(
|
||||
string calledName,
|
||||
string callerModule,
|
||||
Dictionary<string, FunctionInfo> allFunctions)
|
||||
{
|
||||
// Try same module first
|
||||
var sameModuleKey = $"{callerModule}::{calledName}";
|
||||
if (allFunctions.ContainsKey(sameModuleKey))
|
||||
return sameModuleKey;
|
||||
|
||||
// Try any module with that function
|
||||
var match = allFunctions.Keys
|
||||
.FirstOrDefault(k => k.EndsWith($"::{calledName}", StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
return match;
|
||||
}
|
||||
|
||||
private static bool IsBuiltIn(string name)
|
||||
{
|
||||
return name is "console" or "require" or "import" or "export" or "if" or "for" or "while"
|
||||
or "switch" or "return" or "throw" or "catch" or "try" or "new" or "typeof" or "instanceof"
|
||||
or "delete" or "void" or "await" or "Promise" or "Array" or "Object" or "String" or "Number"
|
||||
or "Boolean" or "Date" or "Math" or "JSON" or "Error" or "RegExp" or "Map" or "Set"
|
||||
or "setTimeout" or "setInterval" or "clearTimeout" or "clearInterval" or "fetch"
|
||||
or "process" or "Buffer" or "__dirname" or "__filename";
|
||||
}
|
||||
|
||||
private static int FindFunctionEnd(string content, int start)
|
||||
{
|
||||
var braceStart = content.IndexOf('{', start);
|
||||
if (braceStart < 0) return start + 100;
|
||||
|
||||
return braceStart + FindMatchingBrace(content, braceStart);
|
||||
}
|
||||
|
||||
private static int FindArrowFunctionEnd(string content, int start)
|
||||
{
|
||||
var arrowIndex = content.IndexOf("=>", start);
|
||||
if (arrowIndex < 0) return start + 100;
|
||||
|
||||
var afterArrow = arrowIndex + 2;
|
||||
while (afterArrow < content.Length && char.IsWhiteSpace(content[afterArrow]))
|
||||
afterArrow++;
|
||||
|
||||
if (afterArrow < content.Length && content[afterArrow] == '{')
|
||||
{
|
||||
return afterArrow + FindMatchingBrace(content, afterArrow);
|
||||
}
|
||||
|
||||
// Expression body
|
||||
var endIndex = content.IndexOfAny([';', '\n', ','], afterArrow);
|
||||
return endIndex > 0 ? endIndex : afterArrow + 100;
|
||||
}
|
||||
|
||||
private static int FindMatchingBrace(string content, int braceStart)
|
||||
{
|
||||
var depth = 0;
|
||||
for (var i = braceStart; i < content.Length; i++)
|
||||
{
|
||||
if (content[i] == '{') depth++;
|
||||
else if (content[i] == '}')
|
||||
{
|
||||
depth--;
|
||||
if (depth == 0) return i - braceStart + 1;
|
||||
}
|
||||
}
|
||||
return content.Length - braceStart;
|
||||
}
|
||||
|
||||
private static string ExtractBracedBlock(string content, int braceStart)
|
||||
{
|
||||
if (braceStart >= content.Length || content[braceStart] != '{')
|
||||
return string.Empty;
|
||||
|
||||
var length = FindMatchingBrace(content, braceStart);
|
||||
var endIndex = braceStart + length;
|
||||
if (endIndex > content.Length) endIndex = content.Length;
|
||||
|
||||
return content[(braceStart + 1)..(endIndex - 1)];
|
||||
}
|
||||
|
||||
private sealed class FunctionInfo
|
||||
{
|
||||
public required string Name { get; init; }
|
||||
public required string Module { get; init; }
|
||||
public bool IsPublic { get; set; }
|
||||
public int StartIndex { get; init; }
|
||||
public int EndIndex { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,449 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// PythonInternalGraphBuilder.cs
|
||||
// Sprint: SPRINT_3700_0003_0001_trigger_extraction (TRIG-005)
|
||||
// Description: Python internal call graph builder using AST-based parsing.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Scanner.VulnSurfaces.Models;
|
||||
|
||||
namespace StellaOps.Scanner.VulnSurfaces.CallGraph;
|
||||
|
||||
/// <summary>
|
||||
/// Internal call graph builder for Python packages using AST-based parsing.
|
||||
/// </summary>
|
||||
public sealed partial class PythonInternalGraphBuilder : IInternalCallGraphBuilder
|
||||
{
|
||||
private readonly ILogger<PythonInternalGraphBuilder> _logger;
|
||||
|
||||
// Regex patterns for Python analysis
|
||||
[GeneratedRegex(@"^(async\s+)?def\s+(\w+)\s*\(([^)]*)\)\s*(?:->\s*[^:]+)?:", RegexOptions.Multiline | RegexOptions.Compiled)]
|
||||
private static partial Regex FunctionDefRegex();
|
||||
|
||||
[GeneratedRegex(@"^class\s+(\w+)(?:\s*\([^)]*\))?\s*:", RegexOptions.Multiline | RegexOptions.Compiled)]
|
||||
private static partial Regex ClassDefRegex();
|
||||
|
||||
[GeneratedRegex(@"^(\s+)(async\s+)?def\s+(\w+)\s*\(([^)]*)\)\s*(?:->\s*[^:]+)?:", RegexOptions.Multiline | RegexOptions.Compiled)]
|
||||
private static partial Regex MethodDefRegex();
|
||||
|
||||
[GeneratedRegex(@"(?:self\.)?(\w+)\s*\(", RegexOptions.Compiled)]
|
||||
private static partial Regex FunctionCallRegex();
|
||||
|
||||
[GeneratedRegex(@"^from\s+(\S+)\s+import\s+(.+)$", RegexOptions.Multiline | RegexOptions.Compiled)]
|
||||
private static partial Regex FromImportRegex();
|
||||
|
||||
[GeneratedRegex(@"^import\s+(\S+)", RegexOptions.Multiline | RegexOptions.Compiled)]
|
||||
private static partial Regex ImportRegex();
|
||||
|
||||
[GeneratedRegex(@"^__all__\s*=\s*\[([^\]]+)\]", RegexOptions.Multiline | RegexOptions.Compiled)]
|
||||
private static partial Regex AllExportRegex();
|
||||
|
||||
public PythonInternalGraphBuilder(ILogger<PythonInternalGraphBuilder> logger)
|
||||
{
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Ecosystem => "pypi";
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool CanHandle(string packagePath)
|
||||
{
|
||||
if (string.IsNullOrEmpty(packagePath))
|
||||
return false;
|
||||
|
||||
if (packagePath.EndsWith(".whl", StringComparison.OrdinalIgnoreCase) ||
|
||||
packagePath.EndsWith(".tar.gz", StringComparison.OrdinalIgnoreCase))
|
||||
return true;
|
||||
|
||||
if (Directory.Exists(packagePath))
|
||||
{
|
||||
return File.Exists(Path.Combine(packagePath, "setup.py")) ||
|
||||
File.Exists(Path.Combine(packagePath, "pyproject.toml")) ||
|
||||
Directory.EnumerateFiles(packagePath, "*.py", SearchOption.AllDirectories).Any();
|
||||
}
|
||||
|
||||
return packagePath.EndsWith(".py", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<InternalCallGraphBuildResult> BuildAsync(
|
||||
InternalCallGraphBuildRequest request,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
var sw = Stopwatch.StartNew();
|
||||
var graph = new InternalCallGraph
|
||||
{
|
||||
PackageId = request.PackageId,
|
||||
Version = request.Version
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
var pyFiles = GetPythonFiles(request.PackagePath);
|
||||
var filesProcessed = 0;
|
||||
var allFunctions = new Dictionary<string, FunctionInfo>(StringComparer.Ordinal);
|
||||
|
||||
// First pass: collect all function declarations
|
||||
foreach (var pyPath in pyFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(pyPath, cancellationToken);
|
||||
var moduleName = GetModuleName(pyPath, request.PackagePath);
|
||||
CollectFunctions(content, moduleName, allFunctions, request.IncludePrivateMethods);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "Failed to collect functions from {Path}", pyPath);
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass: analyze call relationships
|
||||
foreach (var pyPath in pyFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
try
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(pyPath, cancellationToken);
|
||||
var moduleName = GetModuleName(pyPath, request.PackagePath);
|
||||
AnalyzeCalls(content, moduleName, allFunctions, graph);
|
||||
filesProcessed++;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "Failed to analyze calls in {Path}", pyPath);
|
||||
}
|
||||
}
|
||||
|
||||
sw.Stop();
|
||||
_logger.LogDebug(
|
||||
"Built internal call graph for PyPI {PackageId} v{Version}: {Methods} methods, {Edges} edges in {Duration}ms",
|
||||
request.PackageId, request.Version, graph.MethodCount, graph.EdgeCount, sw.ElapsedMilliseconds);
|
||||
|
||||
return InternalCallGraphBuildResult.Ok(graph, sw.Elapsed, filesProcessed);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
sw.Stop();
|
||||
_logger.LogWarning(ex, "Failed to build internal call graph for PyPI {PackageId}", request.PackageId);
|
||||
return InternalCallGraphBuildResult.Fail(ex.Message, sw.Elapsed);
|
||||
}
|
||||
}
|
||||
|
||||
private static string[] GetPythonFiles(string packagePath)
|
||||
{
|
||||
if (File.Exists(packagePath) && packagePath.EndsWith(".py", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return [packagePath];
|
||||
}
|
||||
|
||||
if (Directory.Exists(packagePath))
|
||||
{
|
||||
return Directory.GetFiles(packagePath, "*.py", SearchOption.AllDirectories)
|
||||
.Where(f =>
|
||||
{
|
||||
var name = Path.GetFileName(f);
|
||||
return !name.StartsWith("test_") &&
|
||||
!name.EndsWith("_test.py") &&
|
||||
!f.Contains("__pycache__") &&
|
||||
!f.Contains(".egg-info") &&
|
||||
!f.Contains("tests/") &&
|
||||
!f.Contains("test/");
|
||||
})
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
private static string GetModuleName(string pyPath, string basePath)
|
||||
{
|
||||
var relativePath = Path.GetRelativePath(basePath, pyPath);
|
||||
var withoutExt = Path.ChangeExtension(relativePath, null);
|
||||
var moduleName = withoutExt
|
||||
.Replace(Path.DirectorySeparatorChar, '.')
|
||||
.Replace(Path.AltDirectorySeparatorChar, '.');
|
||||
|
||||
// Remove __init__ from module name
|
||||
if (moduleName.EndsWith(".__init__"))
|
||||
moduleName = moduleName[..^9];
|
||||
|
||||
return moduleName;
|
||||
}
|
||||
|
||||
private void CollectFunctions(
|
||||
string content,
|
||||
string moduleName,
|
||||
Dictionary<string, FunctionInfo> functions,
|
||||
bool includePrivate)
|
||||
{
|
||||
var lines = content.Split('\n');
|
||||
|
||||
// Check for __all__ exports
|
||||
var exportedNames = new HashSet<string>(StringComparer.Ordinal);
|
||||
var allMatch = AllExportRegex().Match(content);
|
||||
if (allMatch.Success)
|
||||
{
|
||||
var exports = allMatch.Groups[1].Value;
|
||||
foreach (var name in exports.Split(',').Select(s => s.Trim().Trim('\'', '"')))
|
||||
{
|
||||
if (!string.IsNullOrEmpty(name))
|
||||
exportedNames.Add(name);
|
||||
}
|
||||
}
|
||||
|
||||
// Collect module-level functions
|
||||
foreach (Match match in FunctionDefRegex().Matches(content))
|
||||
{
|
||||
// Skip if indented (class method)
|
||||
var lineStart = content.LastIndexOf('\n', Math.Max(0, match.Index - 1)) + 1;
|
||||
if (lineStart < match.Index && char.IsWhiteSpace(content[lineStart]))
|
||||
continue;
|
||||
|
||||
var functionName = match.Groups[2].Value;
|
||||
|
||||
// Skip private functions unless requested
|
||||
var isPrivate = functionName.StartsWith('_') && !functionName.StartsWith("__");
|
||||
if (!includePrivate && isPrivate)
|
||||
continue;
|
||||
|
||||
var isPublic = !isPrivate && (exportedNames.Count == 0 || exportedNames.Contains(functionName));
|
||||
var lineNumber = GetLineNumber(content, match.Index);
|
||||
|
||||
var key = $"{moduleName}::{functionName}";
|
||||
functions[key] = new FunctionInfo
|
||||
{
|
||||
Name = functionName,
|
||||
Module = moduleName,
|
||||
IsPublic = isPublic,
|
||||
StartLine = lineNumber,
|
||||
EndLine = FindFunctionEndLine(lines, lineNumber - 1, 0)
|
||||
};
|
||||
}
|
||||
|
||||
// Collect class methods
|
||||
foreach (Match classMatch in ClassDefRegex().Matches(content))
|
||||
{
|
||||
var className = classMatch.Groups[1].Value;
|
||||
var classLine = GetLineNumber(content, classMatch.Index);
|
||||
var classIndent = GetIndentation(lines[classLine - 1]);
|
||||
|
||||
foreach (Match methodMatch in MethodDefRegex().Matches(content))
|
||||
{
|
||||
var methodLine = GetLineNumber(content, methodMatch.Index);
|
||||
if (methodLine <= classLine)
|
||||
continue;
|
||||
|
||||
var methodIndent = methodMatch.Groups[1].Value.Length;
|
||||
if (methodIndent <= classIndent)
|
||||
break;
|
||||
|
||||
var methodName = methodMatch.Groups[3].Value;
|
||||
|
||||
// Skip private methods unless requested
|
||||
var isPrivate = methodName.StartsWith('_') && !methodName.StartsWith("__");
|
||||
if (!includePrivate && isPrivate)
|
||||
continue;
|
||||
|
||||
// Dunder methods are considered public
|
||||
var isPublic = !isPrivate || (methodName.StartsWith("__") && methodName.EndsWith("__"));
|
||||
|
||||
var key = $"{moduleName}.{className}::{methodName}";
|
||||
functions[key] = new FunctionInfo
|
||||
{
|
||||
Name = methodName,
|
||||
Module = $"{moduleName}.{className}",
|
||||
IsPublic = isPublic,
|
||||
StartLine = methodLine,
|
||||
EndLine = FindFunctionEndLine(lines, methodLine - 1, methodIndent)
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void AnalyzeCalls(
|
||||
string content,
|
||||
string moduleName,
|
||||
Dictionary<string, FunctionInfo> allFunctions,
|
||||
InternalCallGraph graph)
|
||||
{
|
||||
var lines = content.Split('\n');
|
||||
var moduleFunctions = allFunctions
|
||||
.Where(kvp => kvp.Value.Module == moduleName || kvp.Value.Module.StartsWith($"{moduleName}."))
|
||||
.ToList();
|
||||
|
||||
// Collect imports for resolution
|
||||
var imports = new Dictionary<string, string>(StringComparer.Ordinal);
|
||||
foreach (Match match in FromImportRegex().Matches(content))
|
||||
{
|
||||
var fromModule = match.Groups[1].Value;
|
||||
var imported = match.Groups[2].Value;
|
||||
foreach (var item in imported.Split(',').Select(s => s.Trim()))
|
||||
{
|
||||
var parts = item.Split(" as ");
|
||||
var name = parts[0].Trim();
|
||||
var alias = parts.Length > 1 ? parts[1].Trim() : name;
|
||||
imports[alias] = $"{fromModule}.{name}";
|
||||
}
|
||||
}
|
||||
|
||||
foreach (var (callerKey, callerInfo) in moduleFunctions)
|
||||
{
|
||||
graph.AddMethod(new InternalMethodRef
|
||||
{
|
||||
MethodKey = callerKey,
|
||||
Name = callerInfo.Name,
|
||||
DeclaringType = callerInfo.Module,
|
||||
IsPublic = callerInfo.IsPublic
|
||||
});
|
||||
|
||||
// Extract function body
|
||||
if (callerInfo.StartLine <= 0 || callerInfo.EndLine <= callerInfo.StartLine)
|
||||
continue;
|
||||
|
||||
var bodyLines = lines
|
||||
.Skip(callerInfo.StartLine)
|
||||
.Take(callerInfo.EndLine - callerInfo.StartLine)
|
||||
.ToArray();
|
||||
var body = string.Join("\n", bodyLines);
|
||||
|
||||
// Find calls in body
|
||||
foreach (Match callMatch in FunctionCallRegex().Matches(body))
|
||||
{
|
||||
var calledName = callMatch.Groups[1].Value;
|
||||
|
||||
// Skip built-ins and keywords
|
||||
if (IsBuiltIn(calledName))
|
||||
continue;
|
||||
|
||||
// Try to resolve callee
|
||||
var calleeKey = ResolveFunctionKey(calledName, moduleName, imports, allFunctions);
|
||||
if (calleeKey is not null && calleeKey != callerKey)
|
||||
{
|
||||
graph.AddEdge(new InternalCallEdge { Caller = callerKey, Callee = calleeKey });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static string? ResolveFunctionKey(
|
||||
string calledName,
|
||||
string callerModule,
|
||||
Dictionary<string, string> imports,
|
||||
Dictionary<string, FunctionInfo> allFunctions)
|
||||
{
|
||||
// Try same module first
|
||||
var sameModuleKey = $"{callerModule}::{calledName}";
|
||||
if (allFunctions.ContainsKey(sameModuleKey))
|
||||
return sameModuleKey;
|
||||
|
||||
// Try class method in same module
|
||||
var classMethodKey = allFunctions.Keys
|
||||
.FirstOrDefault(k => k.StartsWith($"{callerModule}.") && k.EndsWith($"::{calledName}"));
|
||||
if (classMethodKey is not null)
|
||||
return classMethodKey;
|
||||
|
||||
// Try imported name
|
||||
if (imports.TryGetValue(calledName, out var importedPath))
|
||||
{
|
||||
var importedKey = allFunctions.Keys
|
||||
.FirstOrDefault(k => k.Contains(importedPath, StringComparison.OrdinalIgnoreCase) ||
|
||||
k.EndsWith($"::{calledName}", StringComparison.OrdinalIgnoreCase));
|
||||
if (importedKey is not null)
|
||||
return importedKey;
|
||||
}
|
||||
|
||||
// Try any module with that function
|
||||
return allFunctions.Keys
|
||||
.FirstOrDefault(k => k.EndsWith($"::{calledName}", StringComparison.Ordinal));
|
||||
}
|
||||
|
||||
private static bool IsBuiltIn(string name)
|
||||
{
|
||||
return name is "print" or "len" or "range" or "str" or "int" or "float" or "bool" or "list"
|
||||
or "dict" or "set" or "tuple" or "type" or "isinstance" or "issubclass" or "hasattr"
|
||||
or "getattr" or "setattr" or "delattr" or "callable" or "super" or "property"
|
||||
or "staticmethod" or "classmethod" or "open" or "input" or "format" or "repr"
|
||||
or "id" or "hash" or "abs" or "round" or "min" or "max" or "sum" or "sorted"
|
||||
or "reversed" or "enumerate" or "zip" or "map" or "filter" or "any" or "all"
|
||||
or "iter" or "next" or "slice" or "object" or "Exception" or "ValueError"
|
||||
or "TypeError" or "KeyError" or "IndexError" or "AttributeError" or "RuntimeError"
|
||||
or "if" or "for" or "while" or "return" or "yield" or "raise" or "try"
|
||||
or "except" or "finally" or "with" or "as" or "import" or "from" or "class" or "def"
|
||||
or "async" or "await" or "lambda" or "pass" or "break" or "continue" or "assert"
|
||||
or "True" or "False" or "None" or "self" or "cls";
|
||||
}
|
||||
|
||||
private static int GetLineNumber(string content, int index)
|
||||
{
|
||||
var lineNumber = 1;
|
||||
for (var i = 0; i < index && i < content.Length; i++)
|
||||
{
|
||||
if (content[i] == '\n')
|
||||
lineNumber++;
|
||||
}
|
||||
return lineNumber;
|
||||
}
|
||||
|
||||
private static int GetIndentation(string line)
|
||||
{
|
||||
var indent = 0;
|
||||
foreach (var c in line)
|
||||
{
|
||||
if (c == ' ') indent++;
|
||||
else if (c == '\t') indent += 4;
|
||||
else break;
|
||||
}
|
||||
return indent;
|
||||
}
|
||||
|
||||
private static int FindFunctionEndLine(string[] lines, int defLineIndex, int baseIndent)
|
||||
{
|
||||
var bodyIndent = -1;
|
||||
|
||||
for (var i = defLineIndex + 1; i < lines.Length; i++)
|
||||
{
|
||||
var line = lines[i];
|
||||
if (string.IsNullOrWhiteSpace(line))
|
||||
continue;
|
||||
|
||||
var currentIndent = GetIndentation(line);
|
||||
|
||||
if (bodyIndent < 0)
|
||||
{
|
||||
if (currentIndent <= baseIndent)
|
||||
return defLineIndex + 1;
|
||||
bodyIndent = currentIndent;
|
||||
}
|
||||
else if (currentIndent <= baseIndent && !string.IsNullOrWhiteSpace(line.Trim()))
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return lines.Length;
|
||||
}
|
||||
|
||||
private sealed class FunctionInfo
|
||||
{
|
||||
public required string Name { get; init; }
|
||||
public required string Module { get; init; }
|
||||
public bool IsPublic { get; set; }
|
||||
public int StartLine { get; init; }
|
||||
public int EndLine { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,198 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// MavenPackageDownloader.cs
|
||||
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-005)
|
||||
// Description: Downloads Maven packages (JARs) from Maven Central or custom
|
||||
// repositories for vulnerability surface analysis.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.IO.Compression;
|
||||
using System.Net.Http;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace StellaOps.Scanner.VulnSurfaces.Download;
|
||||
|
||||
/// <summary>
|
||||
/// Downloads Maven packages (JARs) from Maven Central or custom repositories.
|
||||
/// Maven coordinates: groupId:artifactId:version
|
||||
/// </summary>
|
||||
public sealed class MavenPackageDownloader : IPackageDownloader
|
||||
{
|
||||
private const string DefaultRepositoryUrl = "https://repo1.maven.org/maven2";
|
||||
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly ILogger<MavenPackageDownloader> _logger;
|
||||
private readonly MavenDownloaderOptions _options;
|
||||
|
||||
public MavenPackageDownloader(
|
||||
HttpClient httpClient,
|
||||
ILogger<MavenPackageDownloader> logger,
|
||||
IOptions<MavenDownloaderOptions> options)
|
||||
{
|
||||
_httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_options = options?.Value ?? new MavenDownloaderOptions();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Ecosystem => "maven";
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<PackageDownloadResult> DownloadAsync(
|
||||
PackageDownloadRequest request,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
var sw = Stopwatch.StartNew();
|
||||
|
||||
try
|
||||
{
|
||||
// Parse Maven coordinates (groupId:artifactId or just artifactId for simple cases)
|
||||
var (groupId, artifactId) = ParseCoordinates(request.PackageName);
|
||||
var version = request.Version;
|
||||
var safeArtifactId = GetSafeDirectoryName(groupId, artifactId);
|
||||
|
||||
var extractedDir = Path.Combine(request.OutputDirectory, $"{safeArtifactId}-{version}");
|
||||
var archivePath = Path.Combine(request.OutputDirectory, $"{safeArtifactId}-{version}.jar");
|
||||
|
||||
// Check cache first
|
||||
if (request.UseCache && Directory.Exists(extractedDir))
|
||||
{
|
||||
sw.Stop();
|
||||
_logger.LogDebug("Using cached Maven package {GroupId}:{ArtifactId} v{Version}",
|
||||
groupId, artifactId, version);
|
||||
return PackageDownloadResult.Ok(extractedDir, archivePath, sw.Elapsed, fromCache: true);
|
||||
}
|
||||
|
||||
// Build download URL
|
||||
// Maven Central path: /<groupId with / instead of .>/<artifactId>/<version>/<artifactId>-<version>.jar
|
||||
var repositoryUrl = request.RegistryUrl ?? _options.RepositoryUrl ?? DefaultRepositoryUrl;
|
||||
var groupPath = groupId.Replace('.', '/');
|
||||
var jarUrl = $"{repositoryUrl}/{groupPath}/{artifactId}/{version}/{artifactId}-{version}.jar";
|
||||
|
||||
_logger.LogDebug("Downloading Maven JAR from {Url}", jarUrl);
|
||||
|
||||
// Download JAR
|
||||
Directory.CreateDirectory(request.OutputDirectory);
|
||||
|
||||
using var response = await _httpClient.GetAsync(jarUrl, cancellationToken);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
// Try sources JAR as fallback for source analysis
|
||||
var sourcesUrl = $"{repositoryUrl}/{groupPath}/{artifactId}/{version}/{artifactId}-{version}-sources.jar";
|
||||
_logger.LogDebug("Primary JAR not found, trying sources JAR from {Url}", sourcesUrl);
|
||||
|
||||
using var sourcesResponse = await _httpClient.GetAsync(sourcesUrl, cancellationToken);
|
||||
|
||||
if (!sourcesResponse.IsSuccessStatusCode)
|
||||
{
|
||||
sw.Stop();
|
||||
var error = $"Failed to download: HTTP {(int)response.StatusCode} {response.ReasonPhrase}";
|
||||
_logger.LogWarning("Maven download failed for {GroupId}:{ArtifactId} v{Version}: {Error}",
|
||||
groupId, artifactId, version, error);
|
||||
return PackageDownloadResult.Fail(error, sw.Elapsed);
|
||||
}
|
||||
|
||||
// Save sources JAR
|
||||
await using (var fs = File.Create(archivePath))
|
||||
{
|
||||
await sourcesResponse.Content.CopyToAsync(fs, cancellationToken);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Save primary JAR
|
||||
await using (var fs = File.Create(archivePath))
|
||||
{
|
||||
await response.Content.CopyToAsync(fs, cancellationToken);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract JAR (it's just a ZIP file)
|
||||
if (Directory.Exists(extractedDir))
|
||||
{
|
||||
Directory.Delete(extractedDir, recursive: true);
|
||||
}
|
||||
|
||||
ZipFile.ExtractToDirectory(archivePath, extractedDir);
|
||||
|
||||
sw.Stop();
|
||||
_logger.LogDebug("Downloaded and extracted Maven {GroupId}:{ArtifactId} v{Version} in {Duration}ms",
|
||||
groupId, artifactId, version, sw.ElapsedMilliseconds);
|
||||
|
||||
return PackageDownloadResult.Ok(extractedDir, archivePath, sw.Elapsed);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
sw.Stop();
|
||||
_logger.LogWarning(ex, "Failed to download Maven package {Package} v{Version}",
|
||||
request.PackageName, request.Version);
|
||||
return PackageDownloadResult.Fail(ex.Message, sw.Elapsed);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses Maven coordinates from package name.
|
||||
/// Formats: "groupId:artifactId" or just "artifactId" (assumes default group).
|
||||
/// </summary>
|
||||
private (string groupId, string artifactId) ParseCoordinates(string packageName)
|
||||
{
|
||||
var parts = packageName.Split(':');
|
||||
if (parts.Length >= 2)
|
||||
{
|
||||
return (parts[0], parts[1]);
|
||||
}
|
||||
|
||||
// If no groupId provided, assume the package name is the artifactId
|
||||
// and try to derive groupId from common patterns
|
||||
return (packageName, packageName);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a safe directory name from Maven coordinates.
|
||||
/// </summary>
|
||||
private static string GetSafeDirectoryName(string groupId, string artifactId)
|
||||
{
|
||||
// Use artifactId primarily, prefixed with last segment of groupId if different
|
||||
var groupLastPart = groupId.Split('.')[^1];
|
||||
if (groupLastPart.Equals(artifactId, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return artifactId;
|
||||
}
|
||||
|
||||
return $"{groupLastPart}.{artifactId}";
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Options for Maven package downloader.
|
||||
/// </summary>
|
||||
public sealed class MavenDownloaderOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Custom repository URL (null for Maven Central).
|
||||
/// </summary>
|
||||
public string? RepositoryUrl { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Cache directory for downloaded packages.
|
||||
/// </summary>
|
||||
public string? CacheDirectory { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Maximum package size in bytes (0 for unlimited).
|
||||
/// </summary>
|
||||
public long MaxPackageSize { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to prefer sources JARs for analysis.
|
||||
/// </summary>
|
||||
public bool PreferSourcesJar { get; set; }
|
||||
}
|
||||
@@ -0,0 +1,238 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// NpmPackageDownloader.cs
|
||||
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-004)
|
||||
// Description: Downloads npm packages from registry.npmjs.org for vulnerability
|
||||
// surface analysis.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using SharpCompress.Archives;
|
||||
using SharpCompress.Archives.Tar;
|
||||
using SharpCompress.Common;
|
||||
using SharpCompress.Readers;
|
||||
|
||||
namespace StellaOps.Scanner.VulnSurfaces.Download;
|
||||
|
||||
/// <summary>
|
||||
/// Downloads npm packages from registry.npmjs.org or custom registries.
|
||||
/// npm packages are distributed as .tgz (gzipped tarball) files.
|
||||
/// </summary>
|
||||
public sealed class NpmPackageDownloader : IPackageDownloader
|
||||
{
|
||||
private const string DefaultRegistryUrl = "https://registry.npmjs.org";
|
||||
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly ILogger<NpmPackageDownloader> _logger;
|
||||
private readonly NpmDownloaderOptions _options;
|
||||
|
||||
public NpmPackageDownloader(
|
||||
HttpClient httpClient,
|
||||
ILogger<NpmPackageDownloader> logger,
|
||||
IOptions<NpmDownloaderOptions> options)
|
||||
{
|
||||
_httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_options = options?.Value ?? new NpmDownloaderOptions();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Ecosystem => "npm";
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<PackageDownloadResult> DownloadAsync(
|
||||
PackageDownloadRequest request,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
var sw = Stopwatch.StartNew();
|
||||
|
||||
try
|
||||
{
|
||||
// Normalize package name (npm uses lowercase, scoped packages have @scope/name)
|
||||
var packageName = request.PackageName;
|
||||
var safePackageName = GetSafeDirectoryName(packageName);
|
||||
var extractedDir = Path.Combine(request.OutputDirectory, $"{safePackageName}-{request.Version}");
|
||||
var archivePath = Path.Combine(request.OutputDirectory, $"{safePackageName}-{request.Version}.tgz");
|
||||
|
||||
// Check cache first
|
||||
if (request.UseCache && Directory.Exists(extractedDir))
|
||||
{
|
||||
sw.Stop();
|
||||
_logger.LogDebug("Using cached npm package {Package} v{Version}", packageName, request.Version);
|
||||
return PackageDownloadResult.Ok(extractedDir, archivePath, sw.Elapsed, fromCache: true);
|
||||
}
|
||||
|
||||
// Get package metadata to find tarball URL
|
||||
var registryUrl = request.RegistryUrl ?? _options.RegistryUrl ?? DefaultRegistryUrl;
|
||||
var tarballUrl = await GetTarballUrlAsync(registryUrl, packageName, request.Version, cancellationToken);
|
||||
|
||||
if (tarballUrl is null)
|
||||
{
|
||||
sw.Stop();
|
||||
var error = $"Version {request.Version} not found for package {packageName}";
|
||||
_logger.LogWarning("npm package not found: {Error}", error);
|
||||
return PackageDownloadResult.Fail(error, sw.Elapsed);
|
||||
}
|
||||
|
||||
_logger.LogDebug("Downloading npm package from {Url}", tarballUrl);
|
||||
|
||||
// Download tarball
|
||||
Directory.CreateDirectory(request.OutputDirectory);
|
||||
|
||||
using var response = await _httpClient.GetAsync(tarballUrl, cancellationToken);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
sw.Stop();
|
||||
var error = $"Failed to download: HTTP {(int)response.StatusCode} {response.ReasonPhrase}";
|
||||
_logger.LogWarning("npm download failed for {Package} v{Version}: {Error}",
|
||||
packageName, request.Version, error);
|
||||
return PackageDownloadResult.Fail(error, sw.Elapsed);
|
||||
}
|
||||
|
||||
// Save archive
|
||||
await using (var fs = File.Create(archivePath))
|
||||
{
|
||||
await response.Content.CopyToAsync(fs, cancellationToken);
|
||||
}
|
||||
|
||||
// Extract .tgz (gzipped tarball)
|
||||
if (Directory.Exists(extractedDir))
|
||||
{
|
||||
Directory.Delete(extractedDir, recursive: true);
|
||||
}
|
||||
|
||||
Directory.CreateDirectory(extractedDir);
|
||||
ExtractTgz(archivePath, extractedDir);
|
||||
|
||||
sw.Stop();
|
||||
_logger.LogDebug("Downloaded and extracted npm {Package} v{Version} in {Duration}ms",
|
||||
packageName, request.Version, sw.ElapsedMilliseconds);
|
||||
|
||||
return PackageDownloadResult.Ok(extractedDir, archivePath, sw.Elapsed);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
sw.Stop();
|
||||
_logger.LogWarning(ex, "Failed to download npm package {Package} v{Version}",
|
||||
request.PackageName, request.Version);
|
||||
return PackageDownloadResult.Fail(ex.Message, sw.Elapsed);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the tarball URL from the npm registry metadata.
|
||||
/// </summary>
|
||||
private async Task<string?> GetTarballUrlAsync(
|
||||
string registryUrl,
|
||||
string packageName,
|
||||
string version,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
// Encode scoped packages (@scope/name → @scope%2fname)
|
||||
var encodedName = Uri.EscapeDataString(packageName).Replace("%40", "@");
|
||||
var metadataUrl = $"{registryUrl}/{encodedName}";
|
||||
|
||||
using var response = await _httpClient.GetAsync(metadataUrl, cancellationToken);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
_logger.LogDebug("Failed to fetch npm metadata for {Package}: HTTP {StatusCode}",
|
||||
packageName, (int)response.StatusCode);
|
||||
return null;
|
||||
}
|
||||
|
||||
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken);
|
||||
using var doc = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken);
|
||||
|
||||
// Look for versions.<version>.dist.tarball
|
||||
if (doc.RootElement.TryGetProperty("versions", out var versions) &&
|
||||
versions.TryGetProperty(version, out var versionObj) &&
|
||||
versionObj.TryGetProperty("dist", out var dist) &&
|
||||
dist.TryGetProperty("tarball", out var tarball))
|
||||
{
|
||||
return tarball.GetString();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts a .tgz file (gzipped tarball) to the specified directory.
|
||||
/// </summary>
|
||||
private static void ExtractTgz(string tgzPath, string destinationDir)
|
||||
{
|
||||
using var archive = ArchiveFactory.Open(tgzPath);
|
||||
|
||||
foreach (var entry in archive.Entries)
|
||||
{
|
||||
if (entry.IsDirectory)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// npm packages have a "package/" prefix in the tarball
|
||||
var entryPath = entry.Key ?? string.Empty;
|
||||
if (entryPath.StartsWith("package/", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
entryPath = entryPath["package/".Length..];
|
||||
}
|
||||
|
||||
var destPath = Path.Combine(destinationDir, entryPath);
|
||||
var destDir = Path.GetDirectoryName(destPath);
|
||||
|
||||
if (!string.IsNullOrEmpty(destDir))
|
||||
{
|
||||
Directory.CreateDirectory(destDir);
|
||||
}
|
||||
|
||||
entry.WriteToFile(destPath, new ExtractionOptions
|
||||
{
|
||||
ExtractFullPath = false,
|
||||
Overwrite = true
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Converts a package name to a safe directory name.
|
||||
/// Handles scoped packages like @scope/name → scope-name
|
||||
/// </summary>
|
||||
private static string GetSafeDirectoryName(string packageName)
|
||||
{
|
||||
return packageName
|
||||
.Replace("@", string.Empty)
|
||||
.Replace("/", "-")
|
||||
.Replace("\\", "-");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Options for npm package downloader.
|
||||
/// </summary>
|
||||
public sealed class NpmDownloaderOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Custom registry URL (null for registry.npmjs.org).
|
||||
/// </summary>
|
||||
public string? RegistryUrl { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Cache directory for downloaded packages.
|
||||
/// </summary>
|
||||
public string? CacheDirectory { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Maximum package size in bytes (0 for unlimited).
|
||||
/// </summary>
|
||||
public long MaxPackageSize { get; set; }
|
||||
}
|
||||
@@ -0,0 +1,295 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// PyPIPackageDownloader.cs
|
||||
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-006)
|
||||
// Description: Downloads Python packages from PyPI for vulnerability surface
|
||||
// analysis. Supports both wheel (.whl) and source distributions.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.IO.Compression;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text.Json;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using SharpCompress.Archives;
|
||||
using SharpCompress.Common;
|
||||
|
||||
namespace StellaOps.Scanner.VulnSurfaces.Download;
|
||||
|
||||
/// <summary>
|
||||
/// Downloads Python packages from PyPI (Python Package Index).
|
||||
/// Supports wheel (.whl) and source distribution (.tar.gz) formats.
|
||||
/// </summary>
|
||||
public sealed class PyPIPackageDownloader : IPackageDownloader
|
||||
{
|
||||
private const string DefaultRegistryUrl = "https://pypi.org/pypi";
|
||||
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly ILogger<PyPIPackageDownloader> _logger;
|
||||
private readonly PyPIDownloaderOptions _options;
|
||||
|
||||
public PyPIPackageDownloader(
|
||||
HttpClient httpClient,
|
||||
ILogger<PyPIPackageDownloader> logger,
|
||||
IOptions<PyPIDownloaderOptions> options)
|
||||
{
|
||||
_httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_options = options?.Value ?? new PyPIDownloaderOptions();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Ecosystem => "pypi";
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<PackageDownloadResult> DownloadAsync(
|
||||
PackageDownloadRequest request,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
var sw = Stopwatch.StartNew();
|
||||
|
||||
try
|
||||
{
|
||||
// Normalize package name (PyPI uses lowercase with hyphens)
|
||||
var normalizedName = NormalizePackageName(request.PackageName);
|
||||
var safePackageName = GetSafeDirectoryName(normalizedName);
|
||||
var extractedDir = Path.Combine(request.OutputDirectory, $"{safePackageName}-{request.Version}");
|
||||
|
||||
// Check cache first
|
||||
if (request.UseCache && Directory.Exists(extractedDir))
|
||||
{
|
||||
sw.Stop();
|
||||
_logger.LogDebug("Using cached PyPI package {Package} v{Version}",
|
||||
request.PackageName, request.Version);
|
||||
return PackageDownloadResult.Ok(extractedDir, string.Empty, sw.Elapsed, fromCache: true);
|
||||
}
|
||||
|
||||
// Get package metadata to find download URL
|
||||
var registryUrl = request.RegistryUrl ?? _options.RegistryUrl ?? DefaultRegistryUrl;
|
||||
var downloadInfo = await GetDownloadUrlAsync(registryUrl, normalizedName, request.Version, cancellationToken);
|
||||
|
||||
if (downloadInfo is null)
|
||||
{
|
||||
sw.Stop();
|
||||
var error = $"Version {request.Version} not found for package {request.PackageName}";
|
||||
_logger.LogWarning("PyPI package not found: {Error}", error);
|
||||
return PackageDownloadResult.Fail(error, sw.Elapsed);
|
||||
}
|
||||
|
||||
_logger.LogDebug("Downloading PyPI package from {Url} (type: {Type})",
|
||||
downloadInfo.Url, downloadInfo.PackageType);
|
||||
|
||||
// Download package
|
||||
Directory.CreateDirectory(request.OutputDirectory);
|
||||
|
||||
using var response = await _httpClient.GetAsync(downloadInfo.Url, cancellationToken);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
sw.Stop();
|
||||
var error = $"Failed to download: HTTP {(int)response.StatusCode} {response.ReasonPhrase}";
|
||||
_logger.LogWarning("PyPI download failed for {Package} v{Version}: {Error}",
|
||||
request.PackageName, request.Version, error);
|
||||
return PackageDownloadResult.Fail(error, sw.Elapsed);
|
||||
}
|
||||
|
||||
// Determine archive extension and path
|
||||
var extension = downloadInfo.PackageType == "bdist_wheel" ? ".whl" : ".tar.gz";
|
||||
var archivePath = Path.Combine(request.OutputDirectory, $"{safePackageName}-{request.Version}{extension}");
|
||||
|
||||
// Save archive
|
||||
await using (var fs = File.Create(archivePath))
|
||||
{
|
||||
await response.Content.CopyToAsync(fs, cancellationToken);
|
||||
}
|
||||
|
||||
// Extract
|
||||
if (Directory.Exists(extractedDir))
|
||||
{
|
||||
Directory.Delete(extractedDir, recursive: true);
|
||||
}
|
||||
|
||||
Directory.CreateDirectory(extractedDir);
|
||||
|
||||
if (downloadInfo.PackageType == "bdist_wheel")
|
||||
{
|
||||
// Wheel files are ZIP archives
|
||||
ZipFile.ExtractToDirectory(archivePath, extractedDir);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Source distributions are .tar.gz
|
||||
ExtractTarGz(archivePath, extractedDir);
|
||||
}
|
||||
|
||||
sw.Stop();
|
||||
_logger.LogDebug("Downloaded and extracted PyPI {Package} v{Version} in {Duration}ms",
|
||||
request.PackageName, request.Version, sw.ElapsedMilliseconds);
|
||||
|
||||
return PackageDownloadResult.Ok(extractedDir, archivePath, sw.Elapsed);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
sw.Stop();
|
||||
_logger.LogWarning(ex, "Failed to download PyPI package {Package} v{Version}",
|
||||
request.PackageName, request.Version);
|
||||
return PackageDownloadResult.Fail(ex.Message, sw.Elapsed);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the download URL from PyPI JSON API.
|
||||
/// Prefers source distributions for better AST analysis.
|
||||
/// </summary>
|
||||
private async Task<PyPIDownloadInfo?> GetDownloadUrlAsync(
|
||||
string registryUrl,
|
||||
string packageName,
|
||||
string version,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var metadataUrl = $"{registryUrl}/{packageName}/{version}/json";
|
||||
|
||||
using var response = await _httpClient.GetAsync(metadataUrl, cancellationToken);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
_logger.LogDebug("Failed to fetch PyPI metadata for {Package} v{Version}: HTTP {StatusCode}",
|
||||
packageName, version, (int)response.StatusCode);
|
||||
return null;
|
||||
}
|
||||
|
||||
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken);
|
||||
using var doc = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken);
|
||||
|
||||
if (!doc.RootElement.TryGetProperty("urls", out var urls))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Prefer source distribution for AST analysis, fall back to wheel
|
||||
PyPIDownloadInfo? sourceDistribution = null;
|
||||
PyPIDownloadInfo? wheel = null;
|
||||
|
||||
foreach (var urlEntry in urls.EnumerateArray())
|
||||
{
|
||||
var packageType = urlEntry.TryGetProperty("packagetype", out var pt) ? pt.GetString() : null;
|
||||
var url = urlEntry.TryGetProperty("url", out var u) ? u.GetString() : null;
|
||||
|
||||
if (url is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (packageType == "sdist")
|
||||
{
|
||||
sourceDistribution = new PyPIDownloadInfo(url, "sdist");
|
||||
}
|
||||
else if (packageType == "bdist_wheel" && wheel is null)
|
||||
{
|
||||
wheel = new PyPIDownloadInfo(url, "bdist_wheel");
|
||||
}
|
||||
}
|
||||
|
||||
// Prefer source distribution for better Python AST analysis
|
||||
return _options.PreferSourceDistribution
|
||||
? (sourceDistribution ?? wheel)
|
||||
: (wheel ?? sourceDistribution);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts a .tar.gz file to the specified directory.
|
||||
/// </summary>
|
||||
private static void ExtractTarGz(string tarGzPath, string destinationDir)
|
||||
{
|
||||
using var archive = ArchiveFactory.Open(tarGzPath);
|
||||
|
||||
foreach (var entry in archive.Entries)
|
||||
{
|
||||
if (entry.IsDirectory)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var entryPath = entry.Key ?? string.Empty;
|
||||
|
||||
// Source distributions typically have a top-level directory like "package-1.0.0/"
|
||||
// Remove it to flatten the structure
|
||||
var pathParts = entryPath.Split('/');
|
||||
if (pathParts.Length > 1)
|
||||
{
|
||||
entryPath = string.Join('/', pathParts.Skip(1));
|
||||
}
|
||||
|
||||
if (string.IsNullOrEmpty(entryPath))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var destPath = Path.Combine(destinationDir, entryPath);
|
||||
var destDir = Path.GetDirectoryName(destPath);
|
||||
|
||||
if (!string.IsNullOrEmpty(destDir))
|
||||
{
|
||||
Directory.CreateDirectory(destDir);
|
||||
}
|
||||
|
||||
entry.WriteToFile(destPath, new ExtractionOptions
|
||||
{
|
||||
ExtractFullPath = false,
|
||||
Overwrite = true
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes a PyPI package name (lowercase, hyphens).
|
||||
/// </summary>
|
||||
private static string NormalizePackageName(string packageName)
|
||||
{
|
||||
return packageName.ToLowerInvariant().Replace('_', '-');
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a safe directory name from package name.
|
||||
/// </summary>
|
||||
private static string GetSafeDirectoryName(string packageName)
|
||||
{
|
||||
return packageName.Replace('-', '_');
|
||||
}
|
||||
|
||||
private sealed record PyPIDownloadInfo(string Url, string PackageType);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Options for PyPI package downloader.
|
||||
/// </summary>
|
||||
public sealed class PyPIDownloaderOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Custom registry URL (null for pypi.org).
|
||||
/// </summary>
|
||||
public string? RegistryUrl { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Cache directory for downloaded packages.
|
||||
/// </summary>
|
||||
public string? CacheDirectory { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Maximum package size in bytes (0 for unlimited).
|
||||
/// </summary>
|
||||
public long MaxPackageSize { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to prefer source distributions over wheels.
|
||||
/// Default true for better AST analysis.
|
||||
/// </summary>
|
||||
public bool PreferSourceDistribution { get; set; } = true;
|
||||
}
|
||||
@@ -0,0 +1,508 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// JavaBytecodeFingerprinter.cs
|
||||
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-010)
|
||||
// Description: Java method fingerprinting using bytecode parsing.
|
||||
// Parses .class files from JAR archives for method extraction.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System;
|
||||
using System.Buffers.Binary;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace StellaOps.Scanner.VulnSurfaces.Fingerprint;
|
||||
|
||||
/// <summary>
|
||||
/// Computes method fingerprints for Java packages using bytecode hashing.
|
||||
/// Parses .class files from extracted JAR archives.
|
||||
/// </summary>
|
||||
public sealed class JavaBytecodeFingerprinter : IMethodFingerprinter
|
||||
{
|
||||
private readonly ILogger<JavaBytecodeFingerprinter> _logger;
|
||||
|
||||
// Java class file magic number
|
||||
private const uint ClassFileMagic = 0xCAFEBABE;
|
||||
|
||||
public JavaBytecodeFingerprinter(ILogger<JavaBytecodeFingerprinter> logger)
|
||||
{
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Ecosystem => "maven";
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<FingerprintResult> FingerprintAsync(
|
||||
FingerprintRequest request,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
var sw = Stopwatch.StartNew();
|
||||
var methods = new Dictionary<string, MethodFingerprint>(StringComparer.Ordinal);
|
||||
|
||||
try
|
||||
{
|
||||
var classFiles = GetClassFiles(request.PackagePath);
|
||||
var filesProcessed = 0;
|
||||
|
||||
foreach (var classPath in classFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
try
|
||||
{
|
||||
await ProcessClassFileAsync(classPath, request.PackagePath, methods, request, cancellationToken);
|
||||
filesProcessed++;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "Failed to process class file {Path}", classPath);
|
||||
}
|
||||
}
|
||||
|
||||
sw.Stop();
|
||||
_logger.LogDebug(
|
||||
"Fingerprinted {MethodCount} methods from {FileCount} class files in {Duration}ms",
|
||||
methods.Count, filesProcessed, sw.ElapsedMilliseconds);
|
||||
|
||||
return FingerprintResult.Ok(methods, sw.Elapsed, filesProcessed);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
sw.Stop();
|
||||
_logger.LogWarning(ex, "Failed to fingerprint Java package at {Path}", request.PackagePath);
|
||||
return FingerprintResult.Fail(ex.Message, sw.Elapsed);
|
||||
}
|
||||
}
|
||||
|
||||
private static string[] GetClassFiles(string packagePath)
|
||||
{
|
||||
if (!Directory.Exists(packagePath))
|
||||
return [];
|
||||
|
||||
return Directory.GetFiles(packagePath, "*.class", SearchOption.AllDirectories)
|
||||
.Where(f =>
|
||||
{
|
||||
// Skip META-INF and common non-source directories
|
||||
var relativePath = f.Replace(packagePath, "").TrimStart(Path.DirectorySeparatorChar);
|
||||
return !relativePath.StartsWith("META-INF", StringComparison.OrdinalIgnoreCase);
|
||||
})
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
private async Task ProcessClassFileAsync(
|
||||
string classPath,
|
||||
string packagePath,
|
||||
Dictionary<string, MethodFingerprint> methods,
|
||||
FingerprintRequest request,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var bytes = await File.ReadAllBytesAsync(classPath, cancellationToken);
|
||||
|
||||
if (bytes.Length < 10)
|
||||
return;
|
||||
|
||||
// Verify magic number
|
||||
var magic = BinaryPrimitives.ReadUInt32BigEndian(bytes);
|
||||
if (magic != ClassFileMagic)
|
||||
{
|
||||
_logger.LogDebug("Invalid class file magic in {Path}", classPath);
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var classInfo = ParseClassFile(bytes);
|
||||
var relativePath = Path.GetRelativePath(packagePath, classPath);
|
||||
|
||||
foreach (var method in classInfo.Methods)
|
||||
{
|
||||
// Skip private methods unless requested
|
||||
if (!request.IncludePrivateMethods && !method.IsPublic && !method.IsProtected)
|
||||
continue;
|
||||
|
||||
// Skip synthetic and bridge methods
|
||||
if (method.IsSynthetic || method.IsBridge)
|
||||
continue;
|
||||
|
||||
var methodKey = $"{classInfo.ClassName}::{method.Name}{method.Descriptor}";
|
||||
|
||||
methods[methodKey] = new MethodFingerprint
|
||||
{
|
||||
MethodKey = methodKey,
|
||||
DeclaringType = classInfo.ClassName,
|
||||
Name = method.Name,
|
||||
Signature = ParseDescriptor(method.Descriptor),
|
||||
BodyHash = method.BodyHash,
|
||||
SignatureHash = ComputeHash(method.Descriptor),
|
||||
IsPublic = method.IsPublic,
|
||||
BodySize = method.CodeLength,
|
||||
SourceFile = relativePath
|
||||
};
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "Error parsing class file {Path}", classPath);
|
||||
}
|
||||
}
|
||||
|
||||
private JavaClassInfo ParseClassFile(byte[] bytes)
|
||||
{
|
||||
var reader = new JavaClassReader(bytes);
|
||||
|
||||
// Skip magic (already verified)
|
||||
reader.Skip(4);
|
||||
|
||||
// Version info
|
||||
_ = reader.ReadU2(); // minor version
|
||||
_ = reader.ReadU2(); // major version
|
||||
|
||||
// Constant pool
|
||||
var constantPool = ParseConstantPool(reader);
|
||||
|
||||
// Access flags
|
||||
var accessFlags = reader.ReadU2();
|
||||
|
||||
// This class
|
||||
var thisClassIndex = reader.ReadU2();
|
||||
var className = ResolveClassName(constantPool, thisClassIndex);
|
||||
|
||||
// Super class
|
||||
_ = reader.ReadU2(); // super class index
|
||||
|
||||
// Interfaces
|
||||
var interfaceCount = reader.ReadU2();
|
||||
reader.Skip(interfaceCount * 2);
|
||||
|
||||
// Fields
|
||||
var fieldCount = reader.ReadU2();
|
||||
for (var i = 0; i < fieldCount; i++)
|
||||
{
|
||||
SkipFieldOrMethod(reader);
|
||||
}
|
||||
|
||||
// Methods
|
||||
var methodCount = reader.ReadU2();
|
||||
var methods = new List<JavaMethodInfo>();
|
||||
|
||||
for (var i = 0; i < methodCount; i++)
|
||||
{
|
||||
var method = ParseMethod(reader, constantPool);
|
||||
methods.Add(method);
|
||||
}
|
||||
|
||||
return new JavaClassInfo
|
||||
{
|
||||
ClassName = className,
|
||||
AccessFlags = accessFlags,
|
||||
Methods = methods
|
||||
};
|
||||
}
|
||||
|
||||
private static List<ConstantPoolEntry> ParseConstantPool(JavaClassReader reader)
|
||||
{
|
||||
var count = reader.ReadU2();
|
||||
var pool = new List<ConstantPoolEntry>(count) { new() }; // Index 0 is unused
|
||||
|
||||
for (var i = 1; i < count; i++)
|
||||
{
|
||||
var tag = reader.ReadU1();
|
||||
var entry = new ConstantPoolEntry { Tag = tag };
|
||||
|
||||
switch (tag)
|
||||
{
|
||||
case 1: // CONSTANT_Utf8
|
||||
var length = reader.ReadU2();
|
||||
entry.StringValue = Encoding.UTF8.GetString(reader.ReadBytes(length));
|
||||
break;
|
||||
case 3: // CONSTANT_Integer
|
||||
case 4: // CONSTANT_Float
|
||||
reader.Skip(4);
|
||||
break;
|
||||
case 5: // CONSTANT_Long
|
||||
case 6: // CONSTANT_Double
|
||||
reader.Skip(8);
|
||||
pool.Add(new ConstantPoolEntry()); // Takes two entries
|
||||
i++;
|
||||
break;
|
||||
case 7: // CONSTANT_Class
|
||||
case 8: // CONSTANT_String
|
||||
entry.NameIndex = reader.ReadU2();
|
||||
break;
|
||||
case 9: // CONSTANT_Fieldref
|
||||
case 10: // CONSTANT_Methodref
|
||||
case 11: // CONSTANT_InterfaceMethodref
|
||||
entry.ClassIndex = reader.ReadU2();
|
||||
entry.NameAndTypeIndex = reader.ReadU2();
|
||||
break;
|
||||
case 12: // CONSTANT_NameAndType
|
||||
entry.NameIndex = reader.ReadU2();
|
||||
entry.DescriptorIndex = reader.ReadU2();
|
||||
break;
|
||||
case 15: // CONSTANT_MethodHandle
|
||||
reader.Skip(3);
|
||||
break;
|
||||
case 16: // CONSTANT_MethodType
|
||||
reader.Skip(2);
|
||||
break;
|
||||
case 17: // CONSTANT_Dynamic
|
||||
case 18: // CONSTANT_InvokeDynamic
|
||||
reader.Skip(4);
|
||||
break;
|
||||
case 19: // CONSTANT_Module
|
||||
case 20: // CONSTANT_Package
|
||||
reader.Skip(2);
|
||||
break;
|
||||
}
|
||||
|
||||
pool.Add(entry);
|
||||
}
|
||||
|
||||
return pool;
|
||||
}
|
||||
|
||||
private static JavaMethodInfo ParseMethod(JavaClassReader reader, List<ConstantPoolEntry> constantPool)
|
||||
{
|
||||
var accessFlags = reader.ReadU2();
|
||||
var nameIndex = reader.ReadU2();
|
||||
var descriptorIndex = reader.ReadU2();
|
||||
|
||||
var name = GetUtf8(constantPool, nameIndex);
|
||||
var descriptor = GetUtf8(constantPool, descriptorIndex);
|
||||
|
||||
// Attributes
|
||||
var attributeCount = reader.ReadU2();
|
||||
var codeBytes = Array.Empty<byte>();
|
||||
var codeLength = 0;
|
||||
|
||||
for (var i = 0; i < attributeCount; i++)
|
||||
{
|
||||
var attrNameIndex = reader.ReadU2();
|
||||
var attrLength = reader.ReadU4();
|
||||
var attrName = GetUtf8(constantPool, attrNameIndex);
|
||||
|
||||
if (attrName == "Code")
|
||||
{
|
||||
// max_stack (2) + max_locals (2) + code_length (4)
|
||||
reader.Skip(4);
|
||||
codeLength = (int)reader.ReadU4();
|
||||
codeBytes = reader.ReadBytes(codeLength);
|
||||
|
||||
// Skip exception table and code attributes
|
||||
var remainingLength = attrLength - 8 - codeLength;
|
||||
reader.Skip((int)remainingLength);
|
||||
}
|
||||
else
|
||||
{
|
||||
reader.Skip((int)attrLength);
|
||||
}
|
||||
}
|
||||
|
||||
return new JavaMethodInfo
|
||||
{
|
||||
Name = name,
|
||||
Descriptor = descriptor,
|
||||
AccessFlags = accessFlags,
|
||||
CodeLength = codeLength,
|
||||
BodyHash = ComputeHash(codeBytes)
|
||||
};
|
||||
}
|
||||
|
||||
private static void SkipFieldOrMethod(JavaClassReader reader)
|
||||
{
|
||||
reader.Skip(6); // access_flags + name_index + descriptor_index
|
||||
|
||||
var attributeCount = reader.ReadU2();
|
||||
for (var i = 0; i < attributeCount; i++)
|
||||
{
|
||||
reader.Skip(2); // attribute_name_index
|
||||
var length = reader.ReadU4();
|
||||
reader.Skip((int)length);
|
||||
}
|
||||
}
|
||||
|
||||
private static string ResolveClassName(List<ConstantPoolEntry> pool, int classIndex)
|
||||
{
|
||||
if (classIndex <= 0 || classIndex >= pool.Count)
|
||||
return "Unknown";
|
||||
|
||||
var classEntry = pool[classIndex];
|
||||
if (classEntry.Tag != 7)
|
||||
return "Unknown";
|
||||
|
||||
return GetUtf8(pool, classEntry.NameIndex).Replace('/', '.');
|
||||
}
|
||||
|
||||
private static string GetUtf8(List<ConstantPoolEntry> pool, int index)
|
||||
{
|
||||
if (index <= 0 || index >= pool.Count)
|
||||
return string.Empty;
|
||||
|
||||
return pool[index].StringValue ?? string.Empty;
|
||||
}
|
||||
|
||||
private static string ParseDescriptor(string descriptor)
|
||||
{
|
||||
// Convert Java method descriptor to readable signature
|
||||
// e.g., (Ljava/lang/String;I)V -> (String, int) void
|
||||
var sb = new StringBuilder();
|
||||
var i = 0;
|
||||
|
||||
if (descriptor.StartsWith('('))
|
||||
{
|
||||
sb.Append('(');
|
||||
i = 1;
|
||||
var first = true;
|
||||
|
||||
while (i < descriptor.Length && descriptor[i] != ')')
|
||||
{
|
||||
if (!first) sb.Append(", ");
|
||||
first = false;
|
||||
|
||||
var (typeName, newIndex) = ParseType(descriptor, i);
|
||||
sb.Append(typeName);
|
||||
i = newIndex;
|
||||
}
|
||||
|
||||
sb.Append(')');
|
||||
i++; // Skip ')'
|
||||
}
|
||||
|
||||
if (i < descriptor.Length)
|
||||
{
|
||||
var (returnType, _) = ParseType(descriptor, i);
|
||||
sb.Append(" -> ");
|
||||
sb.Append(returnType);
|
||||
}
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
private static (string typeName, int newIndex) ParseType(string descriptor, int index)
|
||||
{
|
||||
if (index >= descriptor.Length)
|
||||
return ("void", index);
|
||||
|
||||
var c = descriptor[index];
|
||||
|
||||
return c switch
|
||||
{
|
||||
'B' => ("byte", index + 1),
|
||||
'C' => ("char", index + 1),
|
||||
'D' => ("double", index + 1),
|
||||
'F' => ("float", index + 1),
|
||||
'I' => ("int", index + 1),
|
||||
'J' => ("long", index + 1),
|
||||
'S' => ("short", index + 1),
|
||||
'Z' => ("boolean", index + 1),
|
||||
'V' => ("void", index + 1),
|
||||
'[' => ParseArrayType(descriptor, index),
|
||||
'L' => ParseObjectType(descriptor, index),
|
||||
_ => ("?", index + 1)
|
||||
};
|
||||
}
|
||||
|
||||
private static (string typeName, int newIndex) ParseArrayType(string descriptor, int index)
|
||||
{
|
||||
var (elementType, newIndex) = ParseType(descriptor, index + 1);
|
||||
return ($"{elementType}[]", newIndex);
|
||||
}
|
||||
|
||||
private static (string typeName, int newIndex) ParseObjectType(string descriptor, int index)
|
||||
{
|
||||
var semicolonIndex = descriptor.IndexOf(';', index);
|
||||
if (semicolonIndex < 0)
|
||||
return ("Object", index + 1);
|
||||
|
||||
var className = descriptor[(index + 1)..semicolonIndex];
|
||||
var simpleName = className.Split('/')[^1];
|
||||
return (simpleName, semicolonIndex + 1);
|
||||
}
|
||||
|
||||
private static string ComputeHash(byte[] data)
|
||||
{
|
||||
if (data.Length == 0)
|
||||
return "empty";
|
||||
|
||||
var hashBytes = SHA256.HashData(data);
|
||||
return Convert.ToHexStringLower(hashBytes[..16]);
|
||||
}
|
||||
|
||||
private static string ComputeHash(string data)
|
||||
{
|
||||
if (string.IsNullOrEmpty(data))
|
||||
return "empty";
|
||||
|
||||
return ComputeHash(Encoding.UTF8.GetBytes(data));
|
||||
}
|
||||
|
||||
private sealed class JavaClassReader(byte[] data)
|
||||
{
|
||||
private int _position;
|
||||
|
||||
public byte ReadU1() => data[_position++];
|
||||
|
||||
public ushort ReadU2()
|
||||
{
|
||||
var value = BinaryPrimitives.ReadUInt16BigEndian(data.AsSpan(_position));
|
||||
_position += 2;
|
||||
return value;
|
||||
}
|
||||
|
||||
public uint ReadU4()
|
||||
{
|
||||
var value = BinaryPrimitives.ReadUInt32BigEndian(data.AsSpan(_position));
|
||||
_position += 4;
|
||||
return value;
|
||||
}
|
||||
|
||||
public byte[] ReadBytes(int count)
|
||||
{
|
||||
var result = data[_position..(_position + count)];
|
||||
_position += count;
|
||||
return result;
|
||||
}
|
||||
|
||||
public void Skip(int count) => _position += count;
|
||||
}
|
||||
|
||||
private sealed class ConstantPoolEntry
|
||||
{
|
||||
public byte Tag { get; init; }
|
||||
public string? StringValue { get; set; }
|
||||
public int NameIndex { get; set; }
|
||||
public int DescriptorIndex { get; set; }
|
||||
public int ClassIndex { get; set; }
|
||||
public int NameAndTypeIndex { get; set; }
|
||||
}
|
||||
|
||||
private sealed record JavaClassInfo
|
||||
{
|
||||
public required string ClassName { get; init; }
|
||||
public ushort AccessFlags { get; init; }
|
||||
public required List<JavaMethodInfo> Methods { get; init; }
|
||||
}
|
||||
|
||||
private sealed record JavaMethodInfo
|
||||
{
|
||||
public required string Name { get; init; }
|
||||
public required string Descriptor { get; init; }
|
||||
public ushort AccessFlags { get; init; }
|
||||
public int CodeLength { get; init; }
|
||||
public required string BodyHash { get; init; }
|
||||
|
||||
public bool IsPublic => (AccessFlags & 0x0001) != 0;
|
||||
public bool IsProtected => (AccessFlags & 0x0004) != 0;
|
||||
public bool IsSynthetic => (AccessFlags & 0x1000) != 0;
|
||||
public bool IsBridge => (AccessFlags & 0x0040) != 0;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,492 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// JavaScriptMethodFingerprinter.cs
|
||||
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-009)
|
||||
// Description: JavaScript/Node.js method fingerprinting using AST hashing.
|
||||
// Uses Acornima for JavaScript parsing in .NET.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace StellaOps.Scanner.VulnSurfaces.Fingerprint;
|
||||
|
||||
/// <summary>
|
||||
/// Computes method fingerprints for JavaScript/Node.js packages using AST-based hashing.
|
||||
/// Parses .js/.mjs/.cjs files and extracts function declarations, methods, and arrow functions.
|
||||
/// </summary>
|
||||
public sealed partial class JavaScriptMethodFingerprinter : IMethodFingerprinter
|
||||
{
|
||||
private readonly ILogger<JavaScriptMethodFingerprinter> _logger;
|
||||
|
||||
// Regex patterns for JavaScript function extraction
|
||||
[GeneratedRegex(@"(export\s+)?(async\s+)?function\s+(\w+)\s*\(([^)]*)\)\s*\{", RegexOptions.Compiled)]
|
||||
private static partial Regex FunctionDeclarationRegex();
|
||||
|
||||
[GeneratedRegex(@"(\w+)\s*:\s*(async\s+)?function\s*\(([^)]*)\)\s*\{", RegexOptions.Compiled)]
|
||||
private static partial Regex ObjectMethodRegex();
|
||||
|
||||
[GeneratedRegex(@"(async\s+)?(\w+)\s*\(([^)]*)\)\s*\{", RegexOptions.Compiled)]
|
||||
private static partial Regex ClassMethodRegex();
|
||||
|
||||
[GeneratedRegex(@"(const|let|var)\s+(\w+)\s*=\s*(async\s+)?\(([^)]*)\)\s*=>", RegexOptions.Compiled)]
|
||||
private static partial Regex ArrowFunctionRegex();
|
||||
|
||||
[GeneratedRegex(@"class\s+(\w+)(?:\s+extends\s+(\w+))?\s*\{", RegexOptions.Compiled)]
|
||||
private static partial Regex ClassDeclarationRegex();
|
||||
|
||||
[GeneratedRegex(@"module\.exports\s*=\s*(?:class\s+)?(\w+)", RegexOptions.Compiled)]
|
||||
private static partial Regex ModuleExportsRegex();
|
||||
|
||||
public JavaScriptMethodFingerprinter(ILogger<JavaScriptMethodFingerprinter> logger)
|
||||
{
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Ecosystem => "npm";
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<FingerprintResult> FingerprintAsync(
|
||||
FingerprintRequest request,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
var sw = Stopwatch.StartNew();
|
||||
var methods = new Dictionary<string, MethodFingerprint>(StringComparer.Ordinal);
|
||||
|
||||
try
|
||||
{
|
||||
var jsFiles = GetJavaScriptFiles(request.PackagePath);
|
||||
var filesProcessed = 0;
|
||||
|
||||
foreach (var jsPath in jsFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
try
|
||||
{
|
||||
await ProcessJavaScriptFileAsync(jsPath, request.PackagePath, methods, request, cancellationToken);
|
||||
filesProcessed++;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "Failed to process JavaScript file {Path}", jsPath);
|
||||
}
|
||||
}
|
||||
|
||||
sw.Stop();
|
||||
_logger.LogDebug(
|
||||
"Fingerprinted {MethodCount} functions from {FileCount} files in {Duration}ms",
|
||||
methods.Count, filesProcessed, sw.ElapsedMilliseconds);
|
||||
|
||||
return FingerprintResult.Ok(methods, sw.Elapsed, filesProcessed);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
sw.Stop();
|
||||
_logger.LogWarning(ex, "Failed to fingerprint JavaScript package at {Path}", request.PackagePath);
|
||||
return FingerprintResult.Fail(ex.Message, sw.Elapsed);
|
||||
}
|
||||
}
|
||||
|
||||
private static string[] GetJavaScriptFiles(string packagePath)
|
||||
{
|
||||
if (!Directory.Exists(packagePath))
|
||||
return [];
|
||||
|
||||
return Directory.GetFiles(packagePath, "*", SearchOption.AllDirectories)
|
||||
.Where(f =>
|
||||
{
|
||||
var ext = Path.GetExtension(f).ToLowerInvariant();
|
||||
return ext is ".js" or ".mjs" or ".cjs" or ".jsx";
|
||||
})
|
||||
.Where(f =>
|
||||
{
|
||||
// Skip common non-source directories
|
||||
var relativePath = f.Replace(packagePath, "").TrimStart(Path.DirectorySeparatorChar);
|
||||
return !relativePath.StartsWith("node_modules", StringComparison.OrdinalIgnoreCase) &&
|
||||
!relativePath.StartsWith("dist", StringComparison.OrdinalIgnoreCase) &&
|
||||
!relativePath.Contains(".min.", StringComparison.OrdinalIgnoreCase);
|
||||
})
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
private async Task ProcessJavaScriptFileAsync(
|
||||
string jsPath,
|
||||
string packagePath,
|
||||
Dictionary<string, MethodFingerprint> methods,
|
||||
FingerprintRequest request,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(jsPath, cancellationToken);
|
||||
var relativePath = Path.GetRelativePath(packagePath, jsPath);
|
||||
var moduleName = GetModuleName(relativePath);
|
||||
|
||||
// Extract function declarations
|
||||
ExtractFunctionDeclarations(content, moduleName, relativePath, methods, request);
|
||||
|
||||
// Extract class methods
|
||||
ExtractClassMethods(content, moduleName, relativePath, methods, request);
|
||||
|
||||
// Extract arrow functions
|
||||
ExtractArrowFunctions(content, moduleName, relativePath, methods, request);
|
||||
|
||||
// Extract object methods
|
||||
ExtractObjectMethods(content, moduleName, relativePath, methods, request);
|
||||
}
|
||||
|
||||
private void ExtractFunctionDeclarations(
|
||||
string content,
|
||||
string moduleName,
|
||||
string filePath,
|
||||
Dictionary<string, MethodFingerprint> methods,
|
||||
FingerprintRequest request)
|
||||
{
|
||||
var matches = FunctionDeclarationRegex().Matches(content);
|
||||
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
var isExported = !string.IsNullOrEmpty(match.Groups[1].Value);
|
||||
var isAsync = !string.IsNullOrEmpty(match.Groups[2].Value);
|
||||
var functionName = match.Groups[3].Value;
|
||||
var parameters = match.Groups[4].Value.Trim();
|
||||
|
||||
// Skip private functions unless requested
|
||||
if (!request.IncludePrivateMethods && !isExported)
|
||||
continue;
|
||||
|
||||
var bodyHash = ComputeFunctionBodyHash(content, match.Index);
|
||||
var methodKey = $"{moduleName}::{functionName}({NormalizeParams(parameters)})";
|
||||
|
||||
methods[methodKey] = new MethodFingerprint
|
||||
{
|
||||
MethodKey = methodKey,
|
||||
DeclaringType = moduleName,
|
||||
Name = functionName,
|
||||
Signature = $"{(isAsync ? "async " : "")}function {functionName}({parameters})",
|
||||
BodyHash = bodyHash,
|
||||
IsPublic = isExported,
|
||||
SourceFile = filePath,
|
||||
LineNumber = GetLineNumber(content, match.Index)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private void ExtractClassMethods(
|
||||
string content,
|
||||
string moduleName,
|
||||
string filePath,
|
||||
Dictionary<string, MethodFingerprint> methods,
|
||||
FingerprintRequest request)
|
||||
{
|
||||
var classMatches = ClassDeclarationRegex().Matches(content);
|
||||
|
||||
foreach (Match classMatch in classMatches)
|
||||
{
|
||||
var className = classMatch.Groups[1].Value;
|
||||
var classBodyStart = content.IndexOf('{', classMatch.Index);
|
||||
if (classBodyStart < 0) continue;
|
||||
|
||||
// Find class body (simple brace matching)
|
||||
var classBody = ExtractBracedBlock(content, classBodyStart);
|
||||
if (string.IsNullOrEmpty(classBody)) continue;
|
||||
|
||||
var methodMatches = ClassMethodRegex().Matches(classBody);
|
||||
|
||||
foreach (Match methodMatch in methodMatches)
|
||||
{
|
||||
var isAsync = !string.IsNullOrEmpty(methodMatch.Groups[1].Value);
|
||||
var methodName = methodMatch.Groups[2].Value;
|
||||
var parameters = methodMatch.Groups[3].Value.Trim();
|
||||
|
||||
// Skip constructor unless specifically requested
|
||||
if (methodName == "constructor" && !request.IncludePrivateMethods)
|
||||
continue;
|
||||
|
||||
// Skip private methods (prefixed with #)
|
||||
if (methodName.StartsWith('#') && !request.IncludePrivateMethods)
|
||||
continue;
|
||||
|
||||
var bodyHash = ComputeFunctionBodyHash(classBody, methodMatch.Index);
|
||||
var methodKey = $"{moduleName}.{className}::{methodName}({NormalizeParams(parameters)})";
|
||||
|
||||
methods[methodKey] = new MethodFingerprint
|
||||
{
|
||||
MethodKey = methodKey,
|
||||
DeclaringType = $"{moduleName}.{className}",
|
||||
Name = methodName,
|
||||
Signature = $"{(isAsync ? "async " : "")}{methodName}({parameters})",
|
||||
BodyHash = bodyHash,
|
||||
IsPublic = !methodName.StartsWith('#'),
|
||||
SourceFile = filePath,
|
||||
LineNumber = GetLineNumber(content, classMatch.Index + methodMatch.Index)
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void ExtractArrowFunctions(
|
||||
string content,
|
||||
string moduleName,
|
||||
string filePath,
|
||||
Dictionary<string, MethodFingerprint> methods,
|
||||
FingerprintRequest request)
|
||||
{
|
||||
var matches = ArrowFunctionRegex().Matches(content);
|
||||
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
var declarationType = match.Groups[1].Value; // const/let/var
|
||||
var functionName = match.Groups[2].Value;
|
||||
var isAsync = !string.IsNullOrEmpty(match.Groups[3].Value);
|
||||
var parameters = match.Groups[4].Value.Trim();
|
||||
|
||||
// Check if it's exported
|
||||
var lineStart = content.LastIndexOf('\n', match.Index) + 1;
|
||||
var line = content[lineStart..match.Index];
|
||||
var isExported = line.Contains("export", StringComparison.Ordinal);
|
||||
|
||||
if (!request.IncludePrivateMethods && !isExported)
|
||||
continue;
|
||||
|
||||
var bodyHash = ComputeArrowFunctionBodyHash(content, match.Index);
|
||||
var methodKey = $"{moduleName}::{functionName}({NormalizeParams(parameters)})";
|
||||
|
||||
methods[methodKey] = new MethodFingerprint
|
||||
{
|
||||
MethodKey = methodKey,
|
||||
DeclaringType = moduleName,
|
||||
Name = functionName,
|
||||
Signature = $"{(isAsync ? "async " : "")}({parameters}) =>",
|
||||
BodyHash = bodyHash,
|
||||
IsPublic = isExported,
|
||||
SourceFile = filePath,
|
||||
LineNumber = GetLineNumber(content, match.Index)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private void ExtractObjectMethods(
|
||||
string content,
|
||||
string moduleName,
|
||||
string filePath,
|
||||
Dictionary<string, MethodFingerprint> methods,
|
||||
FingerprintRequest request)
|
||||
{
|
||||
var matches = ObjectMethodRegex().Matches(content);
|
||||
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
var methodName = match.Groups[1].Value;
|
||||
var isAsync = !string.IsNullOrEmpty(match.Groups[2].Value);
|
||||
var parameters = match.Groups[3].Value.Trim();
|
||||
|
||||
var bodyHash = ComputeFunctionBodyHash(content, match.Index);
|
||||
var methodKey = $"{moduleName}::obj.{methodName}({NormalizeParams(parameters)})";
|
||||
|
||||
// Object methods are typically exported if they're in module.exports
|
||||
methods[methodKey] = new MethodFingerprint
|
||||
{
|
||||
MethodKey = methodKey,
|
||||
DeclaringType = moduleName,
|
||||
Name = methodName,
|
||||
Signature = $"{(isAsync ? "async " : "")}{methodName}({parameters})",
|
||||
BodyHash = bodyHash,
|
||||
IsPublic = true,
|
||||
SourceFile = filePath,
|
||||
LineNumber = GetLineNumber(content, match.Index)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private static string GetModuleName(string relativePath)
|
||||
{
|
||||
// Convert path to module name: src/utils/helper.js -> src.utils.helper
|
||||
var withoutExt = Path.ChangeExtension(relativePath, null);
|
||||
return withoutExt
|
||||
.Replace(Path.DirectorySeparatorChar, '.')
|
||||
.Replace(Path.AltDirectorySeparatorChar, '.');
|
||||
}
|
||||
|
||||
private static string NormalizeParams(string parameters)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(parameters))
|
||||
return "";
|
||||
|
||||
// Remove default values, just keep param names
|
||||
var normalized = string.Join(",", parameters
|
||||
.Split(',')
|
||||
.Select(p => p.Split('=')[0].Trim())
|
||||
.Where(p => !string.IsNullOrEmpty(p)));
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
private static string ComputeFunctionBodyHash(string content, int startIndex)
|
||||
{
|
||||
var braceStart = content.IndexOf('{', startIndex);
|
||||
if (braceStart < 0) return "empty";
|
||||
|
||||
var body = ExtractBracedBlock(content, braceStart);
|
||||
return ComputeHash(NormalizeBody(body));
|
||||
}
|
||||
|
||||
private static string ComputeArrowFunctionBodyHash(string content, int startIndex)
|
||||
{
|
||||
var arrowIndex = content.IndexOf("=>", startIndex);
|
||||
if (arrowIndex < 0) return "empty";
|
||||
|
||||
var bodyStart = arrowIndex + 2;
|
||||
while (bodyStart < content.Length && char.IsWhiteSpace(content[bodyStart]))
|
||||
bodyStart++;
|
||||
|
||||
if (bodyStart >= content.Length) return "empty";
|
||||
|
||||
// Check if it's a block or expression
|
||||
if (content[bodyStart] == '{')
|
||||
{
|
||||
var body = ExtractBracedBlock(content, bodyStart);
|
||||
return ComputeHash(NormalizeBody(body));
|
||||
}
|
||||
else
|
||||
{
|
||||
// Expression body - find end by semicolon or newline
|
||||
var endIndex = content.IndexOfAny([';', '\n'], bodyStart);
|
||||
if (endIndex < 0) endIndex = content.Length;
|
||||
var body = content[bodyStart..endIndex];
|
||||
return ComputeHash(NormalizeBody(body));
|
||||
}
|
||||
}
|
||||
|
||||
private static string ExtractBracedBlock(string content, int braceStart)
|
||||
{
|
||||
if (braceStart >= content.Length || content[braceStart] != '{')
|
||||
return string.Empty;
|
||||
|
||||
var depth = 0;
|
||||
var i = braceStart;
|
||||
|
||||
while (i < content.Length)
|
||||
{
|
||||
var c = content[i];
|
||||
if (c == '{') depth++;
|
||||
else if (c == '}')
|
||||
{
|
||||
depth--;
|
||||
if (depth == 0)
|
||||
return content[(braceStart + 1)..i];
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
private static string NormalizeBody(string body)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(body))
|
||||
return "empty";
|
||||
|
||||
// Remove comments, normalize whitespace
|
||||
var sb = new StringBuilder();
|
||||
var inLineComment = false;
|
||||
var inBlockComment = false;
|
||||
var inString = false;
|
||||
var stringChar = '\0';
|
||||
|
||||
for (var i = 0; i < body.Length; i++)
|
||||
{
|
||||
var c = body[i];
|
||||
var next = i + 1 < body.Length ? body[i + 1] : '\0';
|
||||
|
||||
if (inLineComment)
|
||||
{
|
||||
if (c == '\n') inLineComment = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inBlockComment)
|
||||
{
|
||||
if (c == '*' && next == '/')
|
||||
{
|
||||
inBlockComment = false;
|
||||
i++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inString)
|
||||
{
|
||||
sb.Append(c);
|
||||
if (c == stringChar && (i == 0 || body[i - 1] != '\\'))
|
||||
inString = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == '/' && next == '/')
|
||||
{
|
||||
inLineComment = true;
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == '/' && next == '*')
|
||||
{
|
||||
inBlockComment = true;
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c is '"' or '\'' or '`')
|
||||
{
|
||||
inString = true;
|
||||
stringChar = c;
|
||||
sb.Append(c);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Normalize whitespace
|
||||
if (char.IsWhiteSpace(c))
|
||||
{
|
||||
if (sb.Length > 0 && !char.IsWhiteSpace(sb[^1]))
|
||||
sb.Append(' ');
|
||||
}
|
||||
else
|
||||
{
|
||||
sb.Append(c);
|
||||
}
|
||||
}
|
||||
|
||||
return sb.ToString().Trim();
|
||||
}
|
||||
|
||||
private static string ComputeHash(string content)
|
||||
{
|
||||
if (string.IsNullOrEmpty(content))
|
||||
return "empty";
|
||||
|
||||
var bytes = SHA256.HashData(Encoding.UTF8.GetBytes(content));
|
||||
return Convert.ToHexStringLower(bytes[..16]); // First 32 hex chars
|
||||
}
|
||||
|
||||
private static int GetLineNumber(string content, int index)
|
||||
{
|
||||
var lineNumber = 1;
|
||||
for (var i = 0; i < index && i < content.Length; i++)
|
||||
{
|
||||
if (content[i] == '\n')
|
||||
lineNumber++;
|
||||
}
|
||||
return lineNumber;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,433 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// PythonAstFingerprinter.cs
|
||||
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-011)
|
||||
// Description: Python method fingerprinting using AST-based hashing.
|
||||
// Parses .py files and extracts function and method definitions.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace StellaOps.Scanner.VulnSurfaces.Fingerprint;
|
||||
|
||||
/// <summary>
|
||||
/// Computes method fingerprints for Python packages using AST-based hashing.
|
||||
/// Parses .py files and extracts function definitions and class methods.
|
||||
/// </summary>
|
||||
public sealed partial class PythonAstFingerprinter : IMethodFingerprinter
|
||||
{
|
||||
private readonly ILogger<PythonAstFingerprinter> _logger;
|
||||
|
||||
// Regex patterns for Python function extraction
|
||||
[GeneratedRegex(@"^(async\s+)?def\s+(\w+)\s*\(([^)]*)\)\s*(?:->\s*[^:]+)?:", RegexOptions.Multiline | RegexOptions.Compiled)]
|
||||
private static partial Regex FunctionDefRegex();
|
||||
|
||||
[GeneratedRegex(@"^class\s+(\w+)(?:\s*\([^)]*\))?\s*:", RegexOptions.Multiline | RegexOptions.Compiled)]
|
||||
private static partial Regex ClassDefRegex();
|
||||
|
||||
[GeneratedRegex(@"^(\s+)(async\s+)?def\s+(\w+)\s*\(([^)]*)\)\s*(?:->\s*[^:]+)?:", RegexOptions.Multiline | RegexOptions.Compiled)]
|
||||
private static partial Regex MethodDefRegex();
|
||||
|
||||
[GeneratedRegex(@"^(\s*)@\w+(?:\([^)]*\))?$", RegexOptions.Multiline | RegexOptions.Compiled)]
|
||||
private static partial Regex DecoratorRegex();
|
||||
|
||||
public PythonAstFingerprinter(ILogger<PythonAstFingerprinter> logger)
|
||||
{
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Ecosystem => "pypi";
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<FingerprintResult> FingerprintAsync(
|
||||
FingerprintRequest request,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
var sw = Stopwatch.StartNew();
|
||||
var methods = new Dictionary<string, MethodFingerprint>(StringComparer.Ordinal);
|
||||
|
||||
try
|
||||
{
|
||||
var pyFiles = GetPythonFiles(request.PackagePath);
|
||||
var filesProcessed = 0;
|
||||
|
||||
foreach (var pyPath in pyFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
try
|
||||
{
|
||||
await ProcessPythonFileAsync(pyPath, request.PackagePath, methods, request, cancellationToken);
|
||||
filesProcessed++;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "Failed to process Python file {Path}", pyPath);
|
||||
}
|
||||
}
|
||||
|
||||
sw.Stop();
|
||||
_logger.LogDebug(
|
||||
"Fingerprinted {MethodCount} functions from {FileCount} files in {Duration}ms",
|
||||
methods.Count, filesProcessed, sw.ElapsedMilliseconds);
|
||||
|
||||
return FingerprintResult.Ok(methods, sw.Elapsed, filesProcessed);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
sw.Stop();
|
||||
_logger.LogWarning(ex, "Failed to fingerprint Python package at {Path}", request.PackagePath);
|
||||
return FingerprintResult.Fail(ex.Message, sw.Elapsed);
|
||||
}
|
||||
}
|
||||
|
||||
private static string[] GetPythonFiles(string packagePath)
|
||||
{
|
||||
if (!Directory.Exists(packagePath))
|
||||
return [];
|
||||
|
||||
return Directory.GetFiles(packagePath, "*.py", SearchOption.AllDirectories)
|
||||
.Where(f =>
|
||||
{
|
||||
var relativePath = f.Replace(packagePath, "").TrimStart(Path.DirectorySeparatorChar);
|
||||
return !relativePath.StartsWith("test", StringComparison.OrdinalIgnoreCase) &&
|
||||
!relativePath.Contains("__pycache__", StringComparison.OrdinalIgnoreCase) &&
|
||||
!relativePath.Contains(".egg-info", StringComparison.OrdinalIgnoreCase);
|
||||
})
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
private async Task ProcessPythonFileAsync(
|
||||
string pyPath,
|
||||
string packagePath,
|
||||
Dictionary<string, MethodFingerprint> methods,
|
||||
FingerprintRequest request,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var content = await File.ReadAllTextAsync(pyPath, cancellationToken);
|
||||
var lines = content.Split('\n');
|
||||
var relativePath = Path.GetRelativePath(packagePath, pyPath);
|
||||
var moduleName = GetModuleName(relativePath);
|
||||
|
||||
// Extract module-level functions
|
||||
ExtractFunctions(content, lines, moduleName, relativePath, methods, request);
|
||||
|
||||
// Extract class methods
|
||||
ExtractClassMethods(content, lines, moduleName, relativePath, methods, request);
|
||||
}
|
||||
|
||||
private void ExtractFunctions(
|
||||
string content,
|
||||
string[] lines,
|
||||
string moduleName,
|
||||
string filePath,
|
||||
Dictionary<string, MethodFingerprint> methods,
|
||||
FingerprintRequest request)
|
||||
{
|
||||
var matches = FunctionDefRegex().Matches(content);
|
||||
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
// Skip if this is inside a class (has leading whitespace)
|
||||
var lineStart = content.LastIndexOf('\n', Math.Max(0, match.Index - 1)) + 1;
|
||||
if (lineStart < match.Index && !string.IsNullOrWhiteSpace(content[lineStart..match.Index]))
|
||||
continue;
|
||||
|
||||
var isAsync = !string.IsNullOrEmpty(match.Groups[1].Value);
|
||||
var functionName = match.Groups[2].Value;
|
||||
var parameters = match.Groups[3].Value.Trim();
|
||||
|
||||
// Skip private functions unless requested
|
||||
if (!request.IncludePrivateMethods && functionName.StartsWith('_') && !functionName.StartsWith("__"))
|
||||
continue;
|
||||
|
||||
var lineNumber = GetLineNumber(content, match.Index);
|
||||
var bodyHash = ComputeFunctionBodyHash(lines, lineNumber - 1, 0);
|
||||
var methodKey = $"{moduleName}::{functionName}({NormalizeParams(parameters)})";
|
||||
|
||||
// Check for decorators to determine if it's exported
|
||||
var isExported = !functionName.StartsWith('_');
|
||||
|
||||
methods[methodKey] = new MethodFingerprint
|
||||
{
|
||||
MethodKey = methodKey,
|
||||
DeclaringType = moduleName,
|
||||
Name = functionName,
|
||||
Signature = $"{(isAsync ? "async " : "")}def {functionName}({parameters})",
|
||||
BodyHash = bodyHash,
|
||||
IsPublic = isExported,
|
||||
SourceFile = filePath,
|
||||
LineNumber = lineNumber
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private void ExtractClassMethods(
|
||||
string content,
|
||||
string[] lines,
|
||||
string moduleName,
|
||||
string filePath,
|
||||
Dictionary<string, MethodFingerprint> methods,
|
||||
FingerprintRequest request)
|
||||
{
|
||||
var classMatches = ClassDefRegex().Matches(content);
|
||||
|
||||
foreach (Match classMatch in classMatches)
|
||||
{
|
||||
var className = classMatch.Groups[1].Value;
|
||||
var classLineNumber = GetLineNumber(content, classMatch.Index);
|
||||
var classIndent = GetIndentation(lines[classLineNumber - 1]);
|
||||
|
||||
// Find all methods in this class
|
||||
var methodMatches = MethodDefRegex().Matches(content);
|
||||
|
||||
foreach (Match methodMatch in methodMatches)
|
||||
{
|
||||
var methodLineNumber = GetLineNumber(content, methodMatch.Index);
|
||||
|
||||
// Check if this method belongs to this class
|
||||
if (methodLineNumber <= classLineNumber)
|
||||
continue;
|
||||
|
||||
var methodIndent = methodMatch.Groups[1].Value.Length;
|
||||
|
||||
// Method should be indented one level from class
|
||||
if (methodIndent <= classIndent)
|
||||
break; // We've left the class
|
||||
|
||||
// Check if there's another class between
|
||||
var nextClassMatch = classMatches
|
||||
.Cast<Match>()
|
||||
.FirstOrDefault(m => GetLineNumber(content, m.Index) > classLineNumber &&
|
||||
GetLineNumber(content, m.Index) < methodLineNumber);
|
||||
if (nextClassMatch is not null)
|
||||
continue;
|
||||
|
||||
var isAsync = !string.IsNullOrEmpty(methodMatch.Groups[2].Value);
|
||||
var methodName = methodMatch.Groups[3].Value;
|
||||
var parameters = methodMatch.Groups[4].Value.Trim();
|
||||
|
||||
// Skip private methods unless requested
|
||||
if (!request.IncludePrivateMethods && methodName.StartsWith('_') && !methodName.StartsWith("__"))
|
||||
continue;
|
||||
|
||||
var bodyHash = ComputeFunctionBodyHash(lines, methodLineNumber - 1, methodIndent);
|
||||
var methodKey = $"{moduleName}.{className}::{methodName}({NormalizeParams(parameters)})";
|
||||
|
||||
// Determine visibility
|
||||
var isPublic = !methodName.StartsWith('_') || methodName.StartsWith("__") && methodName.EndsWith("__");
|
||||
|
||||
methods[methodKey] = new MethodFingerprint
|
||||
{
|
||||
MethodKey = methodKey,
|
||||
DeclaringType = $"{moduleName}.{className}",
|
||||
Name = methodName,
|
||||
Signature = $"{(isAsync ? "async " : "")}def {methodName}({parameters})",
|
||||
BodyHash = bodyHash,
|
||||
IsPublic = isPublic,
|
||||
SourceFile = filePath,
|
||||
LineNumber = methodLineNumber
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static string GetModuleName(string relativePath)
|
||||
{
|
||||
// Convert path to module name: src/utils/helper.py -> src.utils.helper
|
||||
var withoutExt = Path.ChangeExtension(relativePath, null);
|
||||
var moduleName = withoutExt
|
||||
.Replace(Path.DirectorySeparatorChar, '.')
|
||||
.Replace(Path.AltDirectorySeparatorChar, '.');
|
||||
|
||||
// Remove __init__ from module name
|
||||
if (moduleName.EndsWith(".__init__"))
|
||||
{
|
||||
moduleName = moduleName[..^9];
|
||||
}
|
||||
|
||||
return moduleName;
|
||||
}
|
||||
|
||||
private static string NormalizeParams(string parameters)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(parameters))
|
||||
return "";
|
||||
|
||||
// Remove type hints and default values, keep param names
|
||||
var normalized = string.Join(",", parameters
|
||||
.Split(',')
|
||||
.Select(p =>
|
||||
{
|
||||
// Remove type hints (param: Type)
|
||||
var colonIndex = p.IndexOf(':');
|
||||
if (colonIndex > 0)
|
||||
p = p[..colonIndex];
|
||||
|
||||
// Remove default values (param=value)
|
||||
var equalsIndex = p.IndexOf('=');
|
||||
if (equalsIndex > 0)
|
||||
p = p[..equalsIndex];
|
||||
|
||||
return p.Trim();
|
||||
})
|
||||
.Where(p => !string.IsNullOrEmpty(p)));
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
private static string ComputeFunctionBodyHash(string[] lines, int defLineIndex, int baseIndent)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
|
||||
// Find the function body indent
|
||||
var bodyIndent = -1;
|
||||
var inDocstring = false;
|
||||
var docstringQuotes = "";
|
||||
|
||||
for (var i = defLineIndex + 1; i < lines.Length; i++)
|
||||
{
|
||||
var line = lines[i];
|
||||
var trimmedLine = line.TrimStart();
|
||||
|
||||
// Skip empty lines
|
||||
if (string.IsNullOrWhiteSpace(line))
|
||||
{
|
||||
if (bodyIndent > 0)
|
||||
sb.AppendLine();
|
||||
continue;
|
||||
}
|
||||
|
||||
var currentIndent = GetIndentation(line);
|
||||
|
||||
// First non-empty line determines body indent
|
||||
if (bodyIndent < 0)
|
||||
{
|
||||
if (currentIndent <= baseIndent)
|
||||
break; // No body found
|
||||
bodyIndent = currentIndent;
|
||||
}
|
||||
else if (currentIndent <= baseIndent && !string.IsNullOrWhiteSpace(trimmedLine))
|
||||
{
|
||||
// We've left the function body
|
||||
break;
|
||||
}
|
||||
|
||||
// Handle docstrings
|
||||
if (trimmedLine.StartsWith("\"\"\"") || trimmedLine.StartsWith("'''"))
|
||||
{
|
||||
docstringQuotes = trimmedLine[..3];
|
||||
if (!inDocstring)
|
||||
{
|
||||
inDocstring = true;
|
||||
if (trimmedLine.Length > 3 && trimmedLine.EndsWith(docstringQuotes))
|
||||
{
|
||||
inDocstring = false;
|
||||
}
|
||||
continue; // Skip docstring lines
|
||||
}
|
||||
}
|
||||
|
||||
if (inDocstring)
|
||||
{
|
||||
if (trimmedLine.Contains(docstringQuotes))
|
||||
{
|
||||
inDocstring = false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip comments
|
||||
if (trimmedLine.StartsWith('#'))
|
||||
continue;
|
||||
|
||||
// Add normalized line to hash input
|
||||
sb.AppendLine(NormalizeLine(trimmedLine));
|
||||
}
|
||||
|
||||
return ComputeHash(sb.ToString());
|
||||
}
|
||||
|
||||
private static string NormalizeLine(string line)
|
||||
{
|
||||
// Remove inline comments
|
||||
var commentIndex = -1;
|
||||
var inString = false;
|
||||
var stringChar = '\0';
|
||||
|
||||
for (var i = 0; i < line.Length; i++)
|
||||
{
|
||||
var c = line[i];
|
||||
|
||||
if (inString)
|
||||
{
|
||||
if (c == stringChar && (i == 0 || line[i - 1] != '\\'))
|
||||
inString = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c is '"' or '\'')
|
||||
{
|
||||
inString = true;
|
||||
stringChar = c;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == '#')
|
||||
{
|
||||
commentIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (commentIndex > 0)
|
||||
line = line[..commentIndex];
|
||||
|
||||
// Normalize whitespace
|
||||
return line.Trim();
|
||||
}
|
||||
|
||||
private static int GetIndentation(string line)
|
||||
{
|
||||
var indent = 0;
|
||||
foreach (var c in line)
|
||||
{
|
||||
if (c == ' ') indent++;
|
||||
else if (c == '\t') indent += 4;
|
||||
else break;
|
||||
}
|
||||
return indent;
|
||||
}
|
||||
|
||||
private static int GetLineNumber(string content, int index)
|
||||
{
|
||||
var lineNumber = 1;
|
||||
for (var i = 0; i < index && i < content.Length; i++)
|
||||
{
|
||||
if (content[i] == '\n')
|
||||
lineNumber++;
|
||||
}
|
||||
return lineNumber;
|
||||
}
|
||||
|
||||
private static string ComputeHash(string content)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(content))
|
||||
return "empty";
|
||||
|
||||
var bytes = SHA256.HashData(Encoding.UTF8.GetBytes(content));
|
||||
return Convert.ToHexStringLower(bytes[..16]);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,161 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// DotNetMethodKeyBuilder.cs
|
||||
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-012)
|
||||
// Description: Method key builder for .NET/NuGet packages.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.Scanner.VulnSurfaces.MethodKeys;
|
||||
|
||||
/// <summary>
|
||||
/// Builds normalized method keys for .NET assemblies.
|
||||
/// Format: Namespace.TypeName::MethodName(ParamType1,ParamType2)
|
||||
/// </summary>
|
||||
public sealed partial class DotNetMethodKeyBuilder : IMethodKeyBuilder
|
||||
{
|
||||
// Pattern: Namespace.Type::Method(params)
|
||||
[GeneratedRegex(@"^(?:(.+)\.)?([^:.]+)::([^(]+)\(([^)]*)\)$", RegexOptions.Compiled)]
|
||||
private static partial Regex MethodKeyPattern();
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Ecosystem => "nuget";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string BuildKey(MethodKeyRequest request)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
var sb = new StringBuilder();
|
||||
|
||||
// Namespace.TypeName
|
||||
if (!string.IsNullOrEmpty(request.Namespace))
|
||||
{
|
||||
sb.Append(NormalizeNamespace(request.Namespace));
|
||||
if (!string.IsNullOrEmpty(request.TypeName))
|
||||
{
|
||||
sb.Append('.');
|
||||
}
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(request.TypeName))
|
||||
{
|
||||
sb.Append(NormalizeTypeName(request.TypeName));
|
||||
}
|
||||
|
||||
// ::MethodName
|
||||
sb.Append("::");
|
||||
sb.Append(NormalizeMethodName(request.MethodName));
|
||||
|
||||
// (ParamTypes)
|
||||
sb.Append('(');
|
||||
if (request.ParameterTypes is { Count: > 0 })
|
||||
{
|
||||
sb.Append(string.Join(",", request.ParameterTypes.Select(NormalizeTypeName)));
|
||||
}
|
||||
sb.Append(')');
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public MethodKeyComponents? ParseKey(string methodKey)
|
||||
{
|
||||
if (string.IsNullOrEmpty(methodKey))
|
||||
return null;
|
||||
|
||||
var match = MethodKeyPattern().Match(methodKey);
|
||||
if (!match.Success)
|
||||
return null;
|
||||
|
||||
var namespacePart = match.Groups[1].Value;
|
||||
var typeName = match.Groups[2].Value;
|
||||
var methodName = match.Groups[3].Value;
|
||||
var parameters = match.Groups[4].Value;
|
||||
|
||||
var paramTypes = string.IsNullOrEmpty(parameters)
|
||||
? []
|
||||
: parameters.Split(',').Select(p => p.Trim()).ToList();
|
||||
|
||||
return new MethodKeyComponents
|
||||
{
|
||||
Namespace = string.IsNullOrEmpty(namespacePart) ? null : namespacePart,
|
||||
TypeName = typeName,
|
||||
MethodName = methodName,
|
||||
ParameterTypes = paramTypes
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string NormalizeKey(string methodKey)
|
||||
{
|
||||
var components = ParseKey(methodKey);
|
||||
if (components is null)
|
||||
return methodKey;
|
||||
|
||||
return BuildKey(new MethodKeyRequest
|
||||
{
|
||||
Namespace = components.Namespace,
|
||||
TypeName = components.TypeName,
|
||||
MethodName = components.MethodName,
|
||||
ParameterTypes = components.ParameterTypes?.ToList()
|
||||
});
|
||||
}
|
||||
|
||||
private static string NormalizeNamespace(string ns)
|
||||
{
|
||||
// Remove generic arity markers
|
||||
return ns.Replace("`1", "").Replace("`2", "").Replace("`3", "").Replace("`4", "");
|
||||
}
|
||||
|
||||
private static string NormalizeTypeName(string typeName)
|
||||
{
|
||||
// Normalize common type aliases
|
||||
var normalized = typeName switch
|
||||
{
|
||||
"System.String" or "string" => "String",
|
||||
"System.Int32" or "int" => "Int32",
|
||||
"System.Int64" or "long" => "Int64",
|
||||
"System.Boolean" or "bool" => "Boolean",
|
||||
"System.Double" or "double" => "Double",
|
||||
"System.Single" or "float" => "Single",
|
||||
"System.Void" or "void" => "Void",
|
||||
"System.Object" or "object" => "Object",
|
||||
"System.Byte" or "byte" => "Byte",
|
||||
"System.Char" or "char" => "Char",
|
||||
"System.Decimal" or "decimal" => "Decimal",
|
||||
_ => typeName
|
||||
};
|
||||
|
||||
// Remove generic arity and simplify
|
||||
var arityIndex = normalized.IndexOf('`');
|
||||
if (arityIndex > 0)
|
||||
{
|
||||
normalized = normalized[..arityIndex];
|
||||
}
|
||||
|
||||
// Use simple name for common BCL types (e.g., System.String -> String)
|
||||
if (normalized.StartsWith("System.", StringComparison.Ordinal))
|
||||
{
|
||||
var afterSystem = normalized[7..];
|
||||
if (!afterSystem.Contains('.'))
|
||||
{
|
||||
normalized = afterSystem;
|
||||
}
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
private static string NormalizeMethodName(string methodName)
|
||||
{
|
||||
// Normalize common method name variations
|
||||
return methodName switch
|
||||
{
|
||||
".ctor" => ".ctor",
|
||||
".cctor" => ".cctor",
|
||||
_ => methodName
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,111 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// IMethodKeyBuilder.cs
|
||||
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-012)
|
||||
// Description: Interface for building normalized method keys per ecosystem.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
namespace StellaOps.Scanner.VulnSurfaces.MethodKeys;
|
||||
|
||||
/// <summary>
|
||||
/// Builds normalized method keys for cross-ecosystem comparison.
|
||||
/// Method keys provide a stable, canonical identifier for methods
|
||||
/// that can be used for diffing between package versions.
|
||||
/// </summary>
|
||||
public interface IMethodKeyBuilder
|
||||
{
|
||||
/// <summary>
|
||||
/// Ecosystem this builder handles.
|
||||
/// </summary>
|
||||
string Ecosystem { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Builds a normalized method key from components.
|
||||
/// </summary>
|
||||
/// <param name="request">Method key request with components.</param>
|
||||
/// <returns>Normalized method key.</returns>
|
||||
string BuildKey(MethodKeyRequest request);
|
||||
|
||||
/// <summary>
|
||||
/// Parses a method key back into components.
|
||||
/// </summary>
|
||||
/// <param name="methodKey">The method key to parse.</param>
|
||||
/// <returns>Parsed components or null if invalid.</returns>
|
||||
MethodKeyComponents? ParseKey(string methodKey);
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes a method key to canonical form.
|
||||
/// </summary>
|
||||
/// <param name="methodKey">The method key to normalize.</param>
|
||||
/// <returns>Normalized method key.</returns>
|
||||
string NormalizeKey(string methodKey);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Request to build a method key.
|
||||
/// </summary>
|
||||
public sealed record MethodKeyRequest
|
||||
{
|
||||
/// <summary>
|
||||
/// Namespace or package path.
|
||||
/// </summary>
|
||||
public string? Namespace { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Type or class name.
|
||||
/// </summary>
|
||||
public string? TypeName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Method or function name.
|
||||
/// </summary>
|
||||
public required string MethodName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Parameter types (type names only).
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? ParameterTypes { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Return type.
|
||||
/// </summary>
|
||||
public string? ReturnType { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether to include return type in key (for overload resolution).
|
||||
/// </summary>
|
||||
public bool IncludeReturnType { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parsed components of a method key.
|
||||
/// </summary>
|
||||
public sealed record MethodKeyComponents
|
||||
{
|
||||
/// <summary>
|
||||
/// Full namespace path.
|
||||
/// </summary>
|
||||
public string? Namespace { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Type/class name.
|
||||
/// </summary>
|
||||
public string? TypeName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Method/function name.
|
||||
/// </summary>
|
||||
public required string MethodName { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Parameter type names.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? ParameterTypes { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Full qualified name (namespace.type::method).
|
||||
/// </summary>
|
||||
public string FullQualifiedName =>
|
||||
string.IsNullOrEmpty(Namespace)
|
||||
? (string.IsNullOrEmpty(TypeName) ? MethodName : $"{TypeName}::{MethodName}")
|
||||
: (string.IsNullOrEmpty(TypeName) ? $"{Namespace}::{MethodName}" : $"{Namespace}.{TypeName}::{MethodName}");
|
||||
}
|
||||
@@ -0,0 +1,212 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// JavaMethodKeyBuilder.cs
|
||||
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-012)
|
||||
// Description: Method key builder for Java/Maven packages.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.Scanner.VulnSurfaces.MethodKeys;
|
||||
|
||||
/// <summary>
|
||||
/// Builds normalized method keys for Java classes.
|
||||
/// Format: com.package.ClassName::methodName(ParamType1,ParamType2)
|
||||
/// </summary>
|
||||
public sealed partial class JavaMethodKeyBuilder : IMethodKeyBuilder
|
||||
{
|
||||
// Pattern: package.ClassName::methodName(descriptor)
|
||||
[GeneratedRegex(@"^([^:]+)::([^(]+)(\([^)]*\).*)$", RegexOptions.Compiled)]
|
||||
private static partial Regex MethodKeyPattern();
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Ecosystem => "maven";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string BuildKey(MethodKeyRequest request)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
var sb = new StringBuilder();
|
||||
|
||||
// Package.ClassName
|
||||
if (!string.IsNullOrEmpty(request.Namespace))
|
||||
{
|
||||
sb.Append(NormalizePackage(request.Namespace));
|
||||
sb.Append('.');
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(request.TypeName))
|
||||
{
|
||||
sb.Append(request.TypeName);
|
||||
}
|
||||
|
||||
// ::methodName
|
||||
sb.Append("::");
|
||||
sb.Append(NormalizeMethodName(request.MethodName));
|
||||
|
||||
// (ParamTypes) - using Java descriptor format
|
||||
sb.Append('(');
|
||||
if (request.ParameterTypes is { Count: > 0 })
|
||||
{
|
||||
sb.Append(string.Join(",", request.ParameterTypes.Select(NormalizeTypeName)));
|
||||
}
|
||||
sb.Append(')');
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public MethodKeyComponents? ParseKey(string methodKey)
|
||||
{
|
||||
if (string.IsNullOrEmpty(methodKey))
|
||||
return null;
|
||||
|
||||
var match = MethodKeyPattern().Match(methodKey);
|
||||
if (!match.Success)
|
||||
return null;
|
||||
|
||||
var fullClassName = match.Groups[1].Value;
|
||||
var methodName = match.Groups[2].Value;
|
||||
var descriptor = match.Groups[3].Value;
|
||||
|
||||
// Split package from class name
|
||||
string? packageName = null;
|
||||
var typeName = fullClassName;
|
||||
|
||||
var lastDot = fullClassName.LastIndexOf('.');
|
||||
if (lastDot > 0)
|
||||
{
|
||||
packageName = fullClassName[..lastDot];
|
||||
typeName = fullClassName[(lastDot + 1)..];
|
||||
}
|
||||
|
||||
// Parse descriptor to get parameter types
|
||||
var paramTypes = ParseDescriptor(descriptor);
|
||||
|
||||
return new MethodKeyComponents
|
||||
{
|
||||
Namespace = packageName,
|
||||
TypeName = typeName,
|
||||
MethodName = methodName,
|
||||
ParameterTypes = paramTypes
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string NormalizeKey(string methodKey)
|
||||
{
|
||||
var components = ParseKey(methodKey);
|
||||
if (components is null)
|
||||
return methodKey;
|
||||
|
||||
return BuildKey(new MethodKeyRequest
|
||||
{
|
||||
Namespace = components.Namespace,
|
||||
TypeName = components.TypeName,
|
||||
MethodName = components.MethodName,
|
||||
ParameterTypes = components.ParameterTypes?.ToList()
|
||||
});
|
||||
}
|
||||
|
||||
private static string NormalizePackage(string package)
|
||||
{
|
||||
// Java packages are lowercase
|
||||
return package.ToLowerInvariant();
|
||||
}
|
||||
|
||||
private static string NormalizeMethodName(string methodName)
|
||||
{
|
||||
// Handle constructor and static initializer
|
||||
return methodName switch
|
||||
{
|
||||
"<init>" => "<init>",
|
||||
"<clinit>" => "<clinit>",
|
||||
_ => methodName
|
||||
};
|
||||
}
|
||||
|
||||
private static string NormalizeTypeName(string typeName)
|
||||
{
|
||||
// Simplify common Java types
|
||||
return typeName switch
|
||||
{
|
||||
"java.lang.String" => "String",
|
||||
"java.lang.Object" => "Object",
|
||||
"java.lang.Integer" => "Integer",
|
||||
"java.lang.Long" => "Long",
|
||||
"java.lang.Boolean" => "Boolean",
|
||||
"java.lang.Double" => "Double",
|
||||
"java.lang.Float" => "Float",
|
||||
"java.lang.Byte" => "Byte",
|
||||
"java.lang.Short" => "Short",
|
||||
"java.lang.Character" => "Character",
|
||||
"java.util.List" => "List",
|
||||
"java.util.Map" => "Map",
|
||||
"java.util.Set" => "Set",
|
||||
_ => typeName.Contains('.') ? typeName.Split('.')[^1] : typeName
|
||||
};
|
||||
}
|
||||
|
||||
private static List<string> ParseDescriptor(string descriptor)
|
||||
{
|
||||
var result = new List<string>();
|
||||
|
||||
if (string.IsNullOrEmpty(descriptor) || !descriptor.StartsWith('('))
|
||||
return result;
|
||||
|
||||
var i = 1; // Skip opening paren
|
||||
while (i < descriptor.Length && descriptor[i] != ')')
|
||||
{
|
||||
var (typeName, newIndex) = ParseTypeDescriptor(descriptor, i);
|
||||
if (!string.IsNullOrEmpty(typeName))
|
||||
{
|
||||
result.Add(typeName);
|
||||
}
|
||||
i = newIndex;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static (string typeName, int newIndex) ParseTypeDescriptor(string descriptor, int index)
|
||||
{
|
||||
if (index >= descriptor.Length)
|
||||
return (string.Empty, index);
|
||||
|
||||
var c = descriptor[index];
|
||||
|
||||
return c switch
|
||||
{
|
||||
'B' => ("byte", index + 1),
|
||||
'C' => ("char", index + 1),
|
||||
'D' => ("double", index + 1),
|
||||
'F' => ("float", index + 1),
|
||||
'I' => ("int", index + 1),
|
||||
'J' => ("long", index + 1),
|
||||
'S' => ("short", index + 1),
|
||||
'Z' => ("boolean", index + 1),
|
||||
'V' => ("void", index + 1),
|
||||
'[' => ParseArrayDescriptor(descriptor, index),
|
||||
'L' => ParseObjectDescriptor(descriptor, index),
|
||||
_ => (string.Empty, index + 1)
|
||||
};
|
||||
}
|
||||
|
||||
private static (string typeName, int newIndex) ParseArrayDescriptor(string descriptor, int index)
|
||||
{
|
||||
var (elementType, newIndex) = ParseTypeDescriptor(descriptor, index + 1);
|
||||
return ($"{elementType}[]", newIndex);
|
||||
}
|
||||
|
||||
private static (string typeName, int newIndex) ParseObjectDescriptor(string descriptor, int index)
|
||||
{
|
||||
var semicolonIndex = descriptor.IndexOf(';', index);
|
||||
if (semicolonIndex < 0)
|
||||
return ("Object", index + 1);
|
||||
|
||||
var className = descriptor[(index + 1)..semicolonIndex];
|
||||
var simpleName = className.Split('/')[^1];
|
||||
return (simpleName, semicolonIndex + 1);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,149 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// NodeMethodKeyBuilder.cs
|
||||
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-012)
|
||||
// Description: Method key builder for Node.js/npm packages.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.Scanner.VulnSurfaces.MethodKeys;
|
||||
|
||||
/// <summary>
|
||||
/// Builds normalized method keys for JavaScript/Node.js modules.
|
||||
/// Format: module.path::functionName(param1,param2) or module.path.ClassName::methodName(params)
|
||||
/// </summary>
|
||||
public sealed partial class NodeMethodKeyBuilder : IMethodKeyBuilder
|
||||
{
|
||||
// Pattern: module.path[.ClassName]::methodName(params)
|
||||
[GeneratedRegex(@"^([^:]+)::([^(]+)\(([^)]*)\)$", RegexOptions.Compiled)]
|
||||
private static partial Regex MethodKeyPattern();
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Ecosystem => "npm";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string BuildKey(MethodKeyRequest request)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
var sb = new StringBuilder();
|
||||
|
||||
// Module path
|
||||
if (!string.IsNullOrEmpty(request.Namespace))
|
||||
{
|
||||
sb.Append(NormalizeModulePath(request.Namespace));
|
||||
}
|
||||
|
||||
// Class name (if any)
|
||||
if (!string.IsNullOrEmpty(request.TypeName))
|
||||
{
|
||||
if (sb.Length > 0)
|
||||
{
|
||||
sb.Append('.');
|
||||
}
|
||||
sb.Append(request.TypeName);
|
||||
}
|
||||
|
||||
// ::functionName
|
||||
sb.Append("::");
|
||||
sb.Append(request.MethodName);
|
||||
|
||||
// (params)
|
||||
sb.Append('(');
|
||||
if (request.ParameterTypes is { Count: > 0 })
|
||||
{
|
||||
sb.Append(string.Join(",", request.ParameterTypes));
|
||||
}
|
||||
sb.Append(')');
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public MethodKeyComponents? ParseKey(string methodKey)
|
||||
{
|
||||
if (string.IsNullOrEmpty(methodKey))
|
||||
return null;
|
||||
|
||||
var match = MethodKeyPattern().Match(methodKey);
|
||||
if (!match.Success)
|
||||
return null;
|
||||
|
||||
var modulePath = match.Groups[1].Value;
|
||||
var methodName = match.Groups[2].Value;
|
||||
var parameters = match.Groups[3].Value;
|
||||
|
||||
// Try to extract class name from module path
|
||||
string? typeName = null;
|
||||
var lastDot = modulePath.LastIndexOf('.');
|
||||
if (lastDot > 0)
|
||||
{
|
||||
var lastPart = modulePath[(lastDot + 1)..];
|
||||
// Check if it looks like a class name (starts with uppercase)
|
||||
if (char.IsUpper(lastPart[0]))
|
||||
{
|
||||
typeName = lastPart;
|
||||
modulePath = modulePath[..lastDot];
|
||||
}
|
||||
}
|
||||
|
||||
var paramTypes = string.IsNullOrEmpty(parameters)
|
||||
? []
|
||||
: parameters.Split(',').Select(p => p.Trim()).ToList();
|
||||
|
||||
return new MethodKeyComponents
|
||||
{
|
||||
Namespace = modulePath,
|
||||
TypeName = typeName,
|
||||
MethodName = methodName,
|
||||
ParameterTypes = paramTypes
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string NormalizeKey(string methodKey)
|
||||
{
|
||||
var components = ParseKey(methodKey);
|
||||
if (components is null)
|
||||
return methodKey;
|
||||
|
||||
return BuildKey(new MethodKeyRequest
|
||||
{
|
||||
Namespace = components.Namespace,
|
||||
TypeName = components.TypeName,
|
||||
MethodName = components.MethodName,
|
||||
ParameterTypes = components.ParameterTypes?.ToList()
|
||||
});
|
||||
}
|
||||
|
||||
private static string NormalizeModulePath(string path)
|
||||
{
|
||||
// Normalize path separators and common patterns
|
||||
var normalized = path
|
||||
.Replace('/', '.')
|
||||
.Replace('\\', '.')
|
||||
.Replace("..", ".");
|
||||
|
||||
// Remove leading/trailing dots
|
||||
normalized = normalized.Trim('.');
|
||||
|
||||
// Remove 'index' from module paths
|
||||
if (normalized.EndsWith(".index", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
normalized = normalized[..^6];
|
||||
}
|
||||
|
||||
// Remove common prefixes like 'src.' or 'lib.'
|
||||
foreach (var prefix in new[] { "src.", "lib.", "dist." })
|
||||
{
|
||||
if (normalized.StartsWith(prefix, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
normalized = normalized[prefix.Length..];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,165 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// PythonMethodKeyBuilder.cs
|
||||
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-012)
|
||||
// Description: Method key builder for Python/PyPI packages.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.Scanner.VulnSurfaces.MethodKeys;
|
||||
|
||||
/// <summary>
|
||||
/// Builds normalized method keys for Python modules.
|
||||
/// Format: package.module.ClassName::method_name(param1,param2) or package.module::function_name(params)
|
||||
/// </summary>
|
||||
public sealed partial class PythonMethodKeyBuilder : IMethodKeyBuilder
|
||||
{
|
||||
// Pattern: module.path[.ClassName]::function_name(params)
|
||||
[GeneratedRegex(@"^([^:]+)::([^(]+)\(([^)]*)\)$", RegexOptions.Compiled)]
|
||||
private static partial Regex MethodKeyPattern();
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Ecosystem => "pypi";
|
||||
|
||||
/// <inheritdoc />
|
||||
public string BuildKey(MethodKeyRequest request)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
var sb = new StringBuilder();
|
||||
|
||||
// Module path
|
||||
if (!string.IsNullOrEmpty(request.Namespace))
|
||||
{
|
||||
sb.Append(NormalizeModulePath(request.Namespace));
|
||||
}
|
||||
|
||||
// Class name (if any)
|
||||
if (!string.IsNullOrEmpty(request.TypeName))
|
||||
{
|
||||
if (sb.Length > 0)
|
||||
{
|
||||
sb.Append('.');
|
||||
}
|
||||
sb.Append(request.TypeName);
|
||||
}
|
||||
|
||||
// ::function_name
|
||||
sb.Append("::");
|
||||
sb.Append(NormalizeFunctionName(request.MethodName));
|
||||
|
||||
// (params) - just param names for Python
|
||||
sb.Append('(');
|
||||
if (request.ParameterTypes is { Count: > 0 })
|
||||
{
|
||||
sb.Append(string.Join(",", request.ParameterTypes));
|
||||
}
|
||||
sb.Append(')');
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public MethodKeyComponents? ParseKey(string methodKey)
|
||||
{
|
||||
if (string.IsNullOrEmpty(methodKey))
|
||||
return null;
|
||||
|
||||
var match = MethodKeyPattern().Match(methodKey);
|
||||
if (!match.Success)
|
||||
return null;
|
||||
|
||||
var modulePath = match.Groups[1].Value;
|
||||
var functionName = match.Groups[2].Value;
|
||||
var parameters = match.Groups[3].Value;
|
||||
|
||||
// Try to extract class name from module path
|
||||
string? typeName = null;
|
||||
var lastDot = modulePath.LastIndexOf('.');
|
||||
if (lastDot > 0)
|
||||
{
|
||||
var lastPart = modulePath[(lastDot + 1)..];
|
||||
// Check if it looks like a class name (starts with uppercase)
|
||||
if (lastPart.Length > 0 && char.IsUpper(lastPart[0]))
|
||||
{
|
||||
typeName = lastPart;
|
||||
modulePath = modulePath[..lastDot];
|
||||
}
|
||||
}
|
||||
|
||||
var paramNames = string.IsNullOrEmpty(parameters)
|
||||
? []
|
||||
: parameters.Split(',').Select(p => p.Trim()).ToList();
|
||||
|
||||
return new MethodKeyComponents
|
||||
{
|
||||
Namespace = modulePath,
|
||||
TypeName = typeName,
|
||||
MethodName = functionName,
|
||||
ParameterTypes = paramNames
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string NormalizeKey(string methodKey)
|
||||
{
|
||||
var components = ParseKey(methodKey);
|
||||
if (components is null)
|
||||
return methodKey;
|
||||
|
||||
return BuildKey(new MethodKeyRequest
|
||||
{
|
||||
Namespace = components.Namespace,
|
||||
TypeName = components.TypeName,
|
||||
MethodName = components.MethodName,
|
||||
ParameterTypes = components.ParameterTypes?.ToList()
|
||||
});
|
||||
}
|
||||
|
||||
private static string NormalizeModulePath(string path)
|
||||
{
|
||||
// Python module paths use dots
|
||||
var normalized = path
|
||||
.Replace('/', '.')
|
||||
.Replace('\\', '.')
|
||||
.Replace("..", ".");
|
||||
|
||||
// Remove leading/trailing dots
|
||||
normalized = normalized.Trim('.');
|
||||
|
||||
// Remove __init__ from module paths
|
||||
if (normalized.EndsWith(".__init__", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
normalized = normalized[..^9];
|
||||
}
|
||||
|
||||
// Normalize common variations
|
||||
normalized = normalized
|
||||
.Replace("_", "_"); // Keep underscores as-is
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
private static string NormalizeFunctionName(string name)
|
||||
{
|
||||
// Python method names
|
||||
return name switch
|
||||
{
|
||||
"__init__" => "__init__",
|
||||
"__new__" => "__new__",
|
||||
"__del__" => "__del__",
|
||||
"__str__" => "__str__",
|
||||
"__repr__" => "__repr__",
|
||||
"__call__" => "__call__",
|
||||
"__getitem__" => "__getitem__",
|
||||
"__setitem__" => "__setitem__",
|
||||
"__len__" => "__len__",
|
||||
"__iter__" => "__iter__",
|
||||
"__next__" => "__next__",
|
||||
"__enter__" => "__enter__",
|
||||
"__exit__" => "__exit__",
|
||||
_ => name
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -15,6 +15,7 @@
|
||||
<PackageReference Include="Microsoft.Extensions.Options" Version="10.0.0" />
|
||||
<PackageReference Include="Mono.Cecil" Version="0.11.6" />
|
||||
<PackageReference Include="Npgsql" Version="9.0.3" />
|
||||
<PackageReference Include="SharpCompress" Version="0.41.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
Reference in New Issue
Block a user