Add Canonical JSON serialization library with tests and documentation

- Implemented CanonJson class for deterministic JSON serialization and hashing.
- Added unit tests for CanonJson functionality, covering various scenarios including key sorting, handling of nested objects, arrays, and special characters.
- Created project files for the Canonical JSON library and its tests, including necessary package references.
- Added README.md for library usage and API reference.
- Introduced RabbitMqIntegrationFactAttribute for conditional RabbitMQ integration tests.
This commit is contained in:
master
2025-12-19 15:35:00 +02:00
parent 43882078a4
commit 951a38d561
192 changed files with 27550 additions and 2611 deletions

View File

@@ -0,0 +1,531 @@
// -----------------------------------------------------------------------------
// JavaInternalGraphBuilder.cs
// Sprint: SPRINT_3700_0003_0001_trigger_extraction (TRIG-004)
// Description: Java internal call graph builder using bytecode analysis.
// -----------------------------------------------------------------------------
using System;
using System.Buffers.Binary;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using StellaOps.Scanner.VulnSurfaces.Models;
namespace StellaOps.Scanner.VulnSurfaces.CallGraph;
/// <summary>
/// Internal call graph builder for Java packages using bytecode analysis.
/// Parses .class files from JAR archives.
/// </summary>
public sealed class JavaInternalGraphBuilder : IInternalCallGraphBuilder
{
private readonly ILogger<JavaInternalGraphBuilder> _logger;
private const uint ClassFileMagic = 0xCAFEBABE;
public JavaInternalGraphBuilder(ILogger<JavaInternalGraphBuilder> logger)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public string Ecosystem => "maven";
/// <inheritdoc />
public bool CanHandle(string packagePath)
{
if (string.IsNullOrEmpty(packagePath))
return false;
if (packagePath.EndsWith(".jar", StringComparison.OrdinalIgnoreCase))
return true;
if (Directory.Exists(packagePath))
{
return Directory.EnumerateFiles(packagePath, "*.class", SearchOption.AllDirectories).Any();
}
return packagePath.EndsWith(".class", StringComparison.OrdinalIgnoreCase);
}
/// <inheritdoc />
public async Task<InternalCallGraphBuildResult> BuildAsync(
InternalCallGraphBuildRequest request,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(request);
var sw = Stopwatch.StartNew();
var graph = new InternalCallGraph
{
PackageId = request.PackageId,
Version = request.Version
};
try
{
var classFiles = GetClassFiles(request.PackagePath);
var filesProcessed = 0;
// First pass: collect all classes and methods
var packageClasses = new HashSet<string>(StringComparer.Ordinal);
var allMethods = new Dictionary<string, MethodInfo>(StringComparer.Ordinal);
foreach (var classPath in classFiles)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
var bytes = await File.ReadAllBytesAsync(classPath, cancellationToken);
var classInfo = ParseClassFile(bytes);
if (classInfo is not null)
{
packageClasses.Add(classInfo.ClassName);
foreach (var method in classInfo.Methods)
{
var key = $"{classInfo.ClassName}::{method.Name}{method.Descriptor}";
allMethods[key] = method with { DeclaringClass = classInfo.ClassName };
}
}
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Failed to parse class file {Path}", classPath);
}
}
// Second pass: analyze method bodies for internal calls
foreach (var classPath in classFiles)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
var bytes = await File.ReadAllBytesAsync(classPath, cancellationToken);
var classInfo = ParseClassFileWithCalls(bytes, packageClasses);
if (classInfo is not null)
{
foreach (var method in classInfo.Methods)
{
var callerKey = $"{classInfo.ClassName}::{method.Name}{method.Descriptor}";
// Skip private methods unless requested
if (!request.IncludePrivateMethods && !method.IsPublic && !method.IsProtected)
continue;
graph.AddMethod(new InternalMethodRef
{
MethodKey = callerKey,
Name = method.Name,
DeclaringType = classInfo.ClassName,
IsPublic = method.IsPublic
});
// Add edges for internal calls
foreach (var call in method.InternalCalls)
{
var calleeKey = $"{call.TargetClass}::{call.MethodName}{call.Descriptor}";
if (allMethods.ContainsKey(calleeKey))
{
graph.AddEdge(new InternalCallEdge { Caller = callerKey, Callee = calleeKey });
}
}
}
filesProcessed++;
}
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Failed to analyze calls in {Path}", classPath);
}
}
sw.Stop();
_logger.LogDebug(
"Built internal call graph for Maven {PackageId} v{Version}: {Methods} methods, {Edges} edges in {Duration}ms",
request.PackageId, request.Version, graph.MethodCount, graph.EdgeCount, sw.ElapsedMilliseconds);
return InternalCallGraphBuildResult.Ok(graph, sw.Elapsed, filesProcessed);
}
catch (Exception ex)
{
sw.Stop();
_logger.LogWarning(ex, "Failed to build internal call graph for Maven {PackageId}", request.PackageId);
return InternalCallGraphBuildResult.Fail(ex.Message, sw.Elapsed);
}
}
private static string[] GetClassFiles(string packagePath)
{
if (File.Exists(packagePath) && packagePath.EndsWith(".class", StringComparison.OrdinalIgnoreCase))
{
return [packagePath];
}
if (Directory.Exists(packagePath))
{
return Directory.GetFiles(packagePath, "*.class", SearchOption.AllDirectories)
.Where(f => !f.Contains("META-INF"))
.ToArray();
}
return [];
}
private ClassInfo? ParseClassFile(byte[] bytes)
{
if (bytes.Length < 10 || BinaryPrimitives.ReadUInt32BigEndian(bytes) != ClassFileMagic)
return null;
var reader = new ByteReader(bytes);
reader.Skip(4); // magic
reader.Skip(4); // version
var constantPool = ParseConstantPool(reader);
var accessFlags = reader.ReadU2();
var thisClassIndex = reader.ReadU2();
var className = ResolveClassName(constantPool, thisClassIndex);
reader.Skip(2); // super class
var interfaceCount = reader.ReadU2();
reader.Skip(interfaceCount * 2);
// Skip fields
var fieldCount = reader.ReadU2();
for (var i = 0; i < fieldCount; i++)
SkipFieldOrMethod(reader);
// Parse methods
var methodCount = reader.ReadU2();
var methods = new List<MethodInfo>();
for (var i = 0; i < methodCount; i++)
{
var method = ParseMethod(reader, constantPool);
if (method is not null)
methods.Add(method);
}
return new ClassInfo
{
ClassName = className,
AccessFlags = accessFlags,
Methods = methods
};
}
private ClassInfo? ParseClassFileWithCalls(byte[] bytes, HashSet<string> packageClasses)
{
if (bytes.Length < 10 || BinaryPrimitives.ReadUInt32BigEndian(bytes) != ClassFileMagic)
return null;
var reader = new ByteReader(bytes);
reader.Skip(4); // magic
reader.Skip(4); // version
var constantPool = ParseConstantPool(reader);
var accessFlags = reader.ReadU2();
var thisClassIndex = reader.ReadU2();
var className = ResolveClassName(constantPool, thisClassIndex);
reader.Skip(2); // super class
var interfaceCount = reader.ReadU2();
reader.Skip(interfaceCount * 2);
// Skip fields
var fieldCount = reader.ReadU2();
for (var i = 0; i < fieldCount; i++)
SkipFieldOrMethod(reader);
// Parse methods with call analysis
var methodCount = reader.ReadU2();
var methods = new List<MethodInfo>();
for (var i = 0; i < methodCount; i++)
{
var method = ParseMethodWithCalls(reader, constantPool, packageClasses);
if (method is not null)
methods.Add(method);
}
return new ClassInfo
{
ClassName = className,
AccessFlags = accessFlags,
Methods = methods
};
}
private static List<ConstantPoolEntry> ParseConstantPool(ByteReader reader)
{
var count = reader.ReadU2();
var pool = new List<ConstantPoolEntry>(count) { new() };
for (var i = 1; i < count; i++)
{
var tag = reader.ReadU1();
var entry = new ConstantPoolEntry { Tag = tag };
switch (tag)
{
case 1: // CONSTANT_Utf8
var length = reader.ReadU2();
entry.StringValue = Encoding.UTF8.GetString(reader.ReadBytes(length));
break;
case 3: case 4: reader.Skip(4); break;
case 5: case 6: reader.Skip(8); pool.Add(new()); i++; break;
case 7: case 8: entry.NameIndex = reader.ReadU2(); break;
case 9: case 10: case 11:
entry.ClassIndex = reader.ReadU2();
entry.NameAndTypeIndex = reader.ReadU2();
break;
case 12:
entry.NameIndex = reader.ReadU2();
entry.DescriptorIndex = reader.ReadU2();
break;
case 15: reader.Skip(3); break;
case 16: reader.Skip(2); break;
case 17: case 18: reader.Skip(4); break;
case 19: case 20: reader.Skip(2); break;
}
pool.Add(entry);
}
return pool;
}
private static MethodInfo? ParseMethod(ByteReader reader, List<ConstantPoolEntry> pool)
{
var accessFlags = reader.ReadU2();
var nameIndex = reader.ReadU2();
var descriptorIndex = reader.ReadU2();
var name = GetUtf8(pool, nameIndex);
var descriptor = GetUtf8(pool, descriptorIndex);
var attrCount = reader.ReadU2();
for (var i = 0; i < attrCount; i++)
{
reader.Skip(2);
var attrLength = reader.ReadU4();
reader.Skip((int)attrLength);
}
return new MethodInfo
{
Name = name,
Descriptor = descriptor,
AccessFlags = accessFlags,
InternalCalls = []
};
}
private static MethodInfo? ParseMethodWithCalls(
ByteReader reader,
List<ConstantPoolEntry> pool,
HashSet<string> packageClasses)
{
var accessFlags = reader.ReadU2();
var nameIndex = reader.ReadU2();
var descriptorIndex = reader.ReadU2();
var name = GetUtf8(pool, nameIndex);
var descriptor = GetUtf8(pool, descriptorIndex);
var calls = new List<CallInfo>();
var attrCount = reader.ReadU2();
for (var i = 0; i < attrCount; i++)
{
var attrNameIndex = reader.ReadU2();
var attrLength = reader.ReadU4();
var attrName = GetUtf8(pool, attrNameIndex);
if (attrName == "Code")
{
reader.Skip(4); // max_stack, max_locals
var codeLength = reader.ReadU4();
var code = reader.ReadBytes((int)codeLength);
// Analyze bytecode for method calls
AnalyzeBytecode(code, pool, packageClasses, calls);
// Skip exception table and code attributes
var exceptionTableLength = reader.ReadU2();
reader.Skip(exceptionTableLength * 8);
var codeAttrCount = reader.ReadU2();
for (var j = 0; j < codeAttrCount; j++)
{
reader.Skip(2);
var codeAttrLength = reader.ReadU4();
reader.Skip((int)codeAttrLength);
}
}
else
{
reader.Skip((int)attrLength);
}
}
return new MethodInfo
{
Name = name,
Descriptor = descriptor,
AccessFlags = accessFlags,
InternalCalls = calls
};
}
private static void AnalyzeBytecode(
byte[] code,
List<ConstantPoolEntry> pool,
HashSet<string> packageClasses,
List<CallInfo> calls)
{
var i = 0;
while (i < code.Length)
{
var opcode = code[i];
// invokevirtual, invokespecial, invokestatic, invokeinterface
if (opcode is 0xB6 or 0xB7 or 0xB8 or 0xB9)
{
if (i + 2 < code.Length)
{
var methodRefIndex = (code[i + 1] << 8) | code[i + 2];
var callInfo = ResolveMethodRef(pool, methodRefIndex);
if (callInfo is not null && packageClasses.Contains(callInfo.TargetClass))
{
calls.Add(callInfo);
}
}
i += opcode == 0xB9 ? 5 : 3; // invokeinterface has 5 bytes
}
else
{
i += GetOpcodeLength(opcode);
}
}
}
private static CallInfo? ResolveMethodRef(List<ConstantPoolEntry> pool, int index)
{
if (index <= 0 || index >= pool.Count)
return null;
var methodRef = pool[index];
if (methodRef.Tag is not (10 or 11)) // Methodref or InterfaceMethodref
return null;
var classEntry = pool.ElementAtOrDefault(methodRef.ClassIndex);
var nameAndType = pool.ElementAtOrDefault(methodRef.NameAndTypeIndex);
if (classEntry?.Tag != 7 || nameAndType?.Tag != 12)
return null;
var className = GetUtf8(pool, classEntry.NameIndex).Replace('/', '.');
var methodName = GetUtf8(pool, nameAndType.NameIndex);
var descriptor = GetUtf8(pool, nameAndType.DescriptorIndex);
return new CallInfo
{
TargetClass = className,
MethodName = methodName,
Descriptor = descriptor
};
}
private static void SkipFieldOrMethod(ByteReader reader)
{
reader.Skip(6);
var attrCount = reader.ReadU2();
for (var i = 0; i < attrCount; i++)
{
reader.Skip(2);
var length = reader.ReadU4();
reader.Skip((int)length);
}
}
private static string ResolveClassName(List<ConstantPoolEntry> pool, int classIndex)
{
if (classIndex <= 0 || classIndex >= pool.Count || pool[classIndex].Tag != 7)
return "Unknown";
return GetUtf8(pool, pool[classIndex].NameIndex).Replace('/', '.');
}
private static string GetUtf8(List<ConstantPoolEntry> pool, int index)
{
if (index <= 0 || index >= pool.Count)
return string.Empty;
return pool[index].StringValue ?? string.Empty;
}
private static int GetOpcodeLength(byte opcode) => opcode switch
{
// Wide instructions and tableswitch/lookupswitch are variable - simplified handling
0xC4 => 4, // wide (simplified)
0xAA or 0xAB => 4, // tableswitch/lookupswitch (simplified)
_ when opcode is 0x10 or 0x12 or 0x15 or 0x16 or 0x17 or 0x18 or 0x19
or 0x36 or 0x37 or 0x38 or 0x39 or 0x3A or 0xA9 or 0xBC => 2,
_ when opcode is 0x11 or 0x13 or 0x14 or 0x84 or 0x99 or 0x9A or 0x9B
or 0x9C or 0x9D or 0x9E or 0x9F or 0xA0 or 0xA1 or 0xA2 or 0xA3
or 0xA4 or 0xA5 or 0xA6 or 0xA7 or 0xA8 or 0xB2 or 0xB3 or 0xB4
or 0xB5 or 0xB6 or 0xB7 or 0xB8 or 0xBB or 0xBD or 0xC0 or 0xC1
or 0xC6 or 0xC7 => 3,
0xC8 or 0xC9 => 5, // goto_w, jsr_w
0xB9 or 0xBA => 5, // invokeinterface, invokedynamic
0xC5 => 4, // multianewarray
_ => 1
};
private sealed class ByteReader(byte[] data)
{
private int _pos;
public byte ReadU1() => data[_pos++];
public ushort ReadU2() { var v = BinaryPrimitives.ReadUInt16BigEndian(data.AsSpan(_pos)); _pos += 2; return v; }
public uint ReadU4() { var v = BinaryPrimitives.ReadUInt32BigEndian(data.AsSpan(_pos)); _pos += 4; return v; }
public byte[] ReadBytes(int n) { var r = data[_pos..(_pos + n)]; _pos += n; return r; }
public void Skip(int n) => _pos += n;
}
private sealed class ConstantPoolEntry
{
public byte Tag { get; init; }
public string? StringValue { get; set; }
public int NameIndex { get; set; }
public int DescriptorIndex { get; set; }
public int ClassIndex { get; set; }
public int NameAndTypeIndex { get; set; }
}
private sealed record ClassInfo
{
public required string ClassName { get; init; }
public ushort AccessFlags { get; init; }
public required List<MethodInfo> Methods { get; init; }
}
private sealed record MethodInfo
{
public string DeclaringClass { get; init; } = string.Empty;
public required string Name { get; init; }
public required string Descriptor { get; init; }
public ushort AccessFlags { get; init; }
public required List<CallInfo> InternalCalls { get; init; }
public bool IsPublic => (AccessFlags & 0x0001) != 0;
public bool IsProtected => (AccessFlags & 0x0004) != 0;
}
private sealed record CallInfo
{
public required string TargetClass { get; init; }
public required string MethodName { get; init; }
public required string Descriptor { get; init; }
}
}

View File

@@ -0,0 +1,420 @@
// -----------------------------------------------------------------------------
// JavaScriptInternalGraphBuilder.cs
// Sprint: SPRINT_3700_0003_0001_trigger_extraction (TRIG-003)
// Description: JavaScript/Node.js internal call graph builder using AST parsing.
// -----------------------------------------------------------------------------
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using StellaOps.Scanner.VulnSurfaces.Models;
namespace StellaOps.Scanner.VulnSurfaces.CallGraph;
/// <summary>
/// Internal call graph builder for JavaScript/Node.js packages using AST-based parsing.
/// </summary>
public sealed partial class JavaScriptInternalGraphBuilder : IInternalCallGraphBuilder
{
private readonly ILogger<JavaScriptInternalGraphBuilder> _logger;
// Regex patterns for JavaScript analysis
[GeneratedRegex(@"(export\s+)?(async\s+)?function\s+(\w+)\s*\(", RegexOptions.Compiled)]
private static partial Regex FunctionDeclarationRegex();
[GeneratedRegex(@"(const|let|var)\s+(\w+)\s*=\s*(async\s+)?\(", RegexOptions.Compiled)]
private static partial Regex ArrowFunctionRegex();
[GeneratedRegex(@"class\s+(\w+)", RegexOptions.Compiled)]
private static partial Regex ClassDeclarationRegex();
[GeneratedRegex(@"(async\s+)?(\w+)\s*\([^)]*\)\s*\{", RegexOptions.Compiled)]
private static partial Regex MethodDeclarationRegex();
[GeneratedRegex(@"(?:this\.)?(\w+)\s*\(", RegexOptions.Compiled)]
private static partial Regex FunctionCallRegex();
[GeneratedRegex(@"module\.exports\s*=\s*\{?([^}]+)", RegexOptions.Compiled)]
private static partial Regex ModuleExportsRegex();
[GeneratedRegex(@"exports\.(\w+)", RegexOptions.Compiled)]
private static partial Regex NamedExportRegex();
public JavaScriptInternalGraphBuilder(ILogger<JavaScriptInternalGraphBuilder> logger)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public string Ecosystem => "npm";
/// <inheritdoc />
public bool CanHandle(string packagePath)
{
if (string.IsNullOrEmpty(packagePath))
return false;
if (packagePath.EndsWith(".tgz", StringComparison.OrdinalIgnoreCase))
return true;
if (Directory.Exists(packagePath))
{
// Check for package.json or .js files
return File.Exists(Path.Combine(packagePath, "package.json")) ||
Directory.EnumerateFiles(packagePath, "*.js", SearchOption.AllDirectories).Any();
}
return packagePath.EndsWith(".js", StringComparison.OrdinalIgnoreCase);
}
/// <inheritdoc />
public async Task<InternalCallGraphBuildResult> BuildAsync(
InternalCallGraphBuildRequest request,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(request);
var sw = Stopwatch.StartNew();
var graph = new InternalCallGraph
{
PackageId = request.PackageId,
Version = request.Version
};
try
{
var jsFiles = GetJavaScriptFiles(request.PackagePath);
var filesProcessed = 0;
var allFunctions = new Dictionary<string, FunctionInfo>(StringComparer.OrdinalIgnoreCase);
// First pass: collect all function declarations
foreach (var jsPath in jsFiles)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
var content = await File.ReadAllTextAsync(jsPath, cancellationToken);
var moduleName = GetModuleName(jsPath, request.PackagePath);
CollectFunctions(content, moduleName, allFunctions, request.IncludePrivateMethods);
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Failed to collect functions from {Path}", jsPath);
}
}
// Second pass: analyze call relationships
foreach (var jsPath in jsFiles)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
var content = await File.ReadAllTextAsync(jsPath, cancellationToken);
var moduleName = GetModuleName(jsPath, request.PackagePath);
AnalyzeCalls(content, moduleName, allFunctions, graph);
filesProcessed++;
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Failed to analyze calls in {Path}", jsPath);
}
}
sw.Stop();
_logger.LogDebug(
"Built internal call graph for npm {PackageId} v{Version}: {Methods} methods, {Edges} edges in {Duration}ms",
request.PackageId, request.Version, graph.MethodCount, graph.EdgeCount, sw.ElapsedMilliseconds);
return InternalCallGraphBuildResult.Ok(graph, sw.Elapsed, filesProcessed);
}
catch (Exception ex)
{
sw.Stop();
_logger.LogWarning(ex, "Failed to build internal call graph for npm {PackageId}", request.PackageId);
return InternalCallGraphBuildResult.Fail(ex.Message, sw.Elapsed);
}
}
private static string[] GetJavaScriptFiles(string packagePath)
{
if (File.Exists(packagePath) && packagePath.EndsWith(".js", StringComparison.OrdinalIgnoreCase))
{
return [packagePath];
}
if (Directory.Exists(packagePath))
{
return Directory.GetFiles(packagePath, "*.js", SearchOption.AllDirectories)
.Where(f =>
{
var name = Path.GetFileName(f);
return !name.Contains(".min.") &&
!name.EndsWith(".spec.js") &&
!name.EndsWith(".test.js") &&
!f.Contains("node_modules") &&
!f.Contains("__tests__");
})
.ToArray();
}
return [];
}
private static string GetModuleName(string jsPath, string basePath)
{
var relativePath = Path.GetRelativePath(basePath, jsPath);
var withoutExt = Path.ChangeExtension(relativePath, null);
return withoutExt
.Replace(Path.DirectorySeparatorChar, '.')
.Replace(Path.AltDirectorySeparatorChar, '.');
}
private void CollectFunctions(
string content,
string moduleName,
Dictionary<string, FunctionInfo> functions,
bool includePrivate)
{
// Collect function declarations
foreach (Match match in FunctionDeclarationRegex().Matches(content))
{
var isExported = !string.IsNullOrEmpty(match.Groups[1].Value);
var functionName = match.Groups[3].Value;
if (!includePrivate && !isExported)
continue;
var key = $"{moduleName}::{functionName}";
functions[key] = new FunctionInfo
{
Name = functionName,
Module = moduleName,
IsPublic = isExported,
StartIndex = match.Index,
EndIndex = FindFunctionEnd(content, match.Index)
};
}
// Collect arrow functions
foreach (Match match in ArrowFunctionRegex().Matches(content))
{
var functionName = match.Groups[2].Value;
var lineStart = content.LastIndexOf('\n', match.Index) + 1;
var prefix = content[lineStart..match.Index];
var isExported = prefix.Contains("export");
if (!includePrivate && !isExported)
continue;
var key = $"{moduleName}::{functionName}";
if (!functions.ContainsKey(key))
{
functions[key] = new FunctionInfo
{
Name = functionName,
Module = moduleName,
IsPublic = isExported,
StartIndex = match.Index,
EndIndex = FindArrowFunctionEnd(content, match.Index)
};
}
}
// Collect class methods
foreach (Match classMatch in ClassDeclarationRegex().Matches(content))
{
var className = classMatch.Groups[1].Value;
var classBodyStart = content.IndexOf('{', classMatch.Index);
if (classBodyStart < 0) continue;
var classBody = ExtractBracedBlock(content, classBodyStart);
if (string.IsNullOrEmpty(classBody)) continue;
foreach (Match methodMatch in MethodDeclarationRegex().Matches(classBody))
{
var methodName = methodMatch.Groups[2].Value;
if (methodName == "constructor") continue;
var key = $"{moduleName}.{className}::{methodName}";
functions[key] = new FunctionInfo
{
Name = methodName,
Module = $"{moduleName}.{className}",
IsPublic = true, // Class methods are typically public
StartIndex = classMatch.Index + methodMatch.Index,
EndIndex = classMatch.Index + FindFunctionEnd(classBody, methodMatch.Index)
};
}
}
// Mark exported functions from module.exports
var exportsMatch = ModuleExportsRegex().Match(content);
if (exportsMatch.Success)
{
var exports = exportsMatch.Groups[1].Value;
foreach (var func in functions.Values)
{
if (exports.Contains(func.Name, StringComparison.OrdinalIgnoreCase))
{
func.IsPublic = true;
}
}
}
foreach (Match exportMatch in NamedExportRegex().Matches(content))
{
var exportedName = exportMatch.Groups[1].Value;
var key = $"{moduleName}::{exportedName}";
if (functions.TryGetValue(key, out var func))
{
func.IsPublic = true;
}
}
}
private void AnalyzeCalls(
string content,
string moduleName,
Dictionary<string, FunctionInfo> allFunctions,
InternalCallGraph graph)
{
var moduleFunctions = allFunctions
.Where(kvp => kvp.Value.Module == moduleName || kvp.Value.Module.StartsWith($"{moduleName}."))
.ToList();
foreach (var (callerKey, callerInfo) in moduleFunctions)
{
// Add node
graph.AddMethod(new InternalMethodRef
{
MethodKey = callerKey,
Name = callerInfo.Name,
DeclaringType = callerInfo.Module,
IsPublic = callerInfo.IsPublic
});
// Extract function body
var bodyStart = callerInfo.StartIndex;
var bodyEnd = callerInfo.EndIndex;
if (bodyEnd <= bodyStart || bodyEnd > content.Length)
continue;
var body = content[bodyStart..Math.Min(bodyEnd, content.Length)];
// Find calls in body
foreach (Match callMatch in FunctionCallRegex().Matches(body))
{
var calledName = callMatch.Groups[1].Value;
// Skip common built-ins and keywords
if (IsBuiltIn(calledName))
continue;
// Try to resolve callee
var calleeKey = ResolveFunctionKey(calledName, moduleName, allFunctions);
if (calleeKey is not null && calleeKey != callerKey)
{
graph.AddEdge(new InternalCallEdge { Caller = callerKey, Callee = calleeKey });
}
}
}
}
private static string? ResolveFunctionKey(
string calledName,
string callerModule,
Dictionary<string, FunctionInfo> allFunctions)
{
// Try same module first
var sameModuleKey = $"{callerModule}::{calledName}";
if (allFunctions.ContainsKey(sameModuleKey))
return sameModuleKey;
// Try any module with that function
var match = allFunctions.Keys
.FirstOrDefault(k => k.EndsWith($"::{calledName}", StringComparison.OrdinalIgnoreCase));
return match;
}
private static bool IsBuiltIn(string name)
{
return name is "console" or "require" or "import" or "export" or "if" or "for" or "while"
or "switch" or "return" or "throw" or "catch" or "try" or "new" or "typeof" or "instanceof"
or "delete" or "void" or "await" or "Promise" or "Array" or "Object" or "String" or "Number"
or "Boolean" or "Date" or "Math" or "JSON" or "Error" or "RegExp" or "Map" or "Set"
or "setTimeout" or "setInterval" or "clearTimeout" or "clearInterval" or "fetch"
or "process" or "Buffer" or "__dirname" or "__filename";
}
private static int FindFunctionEnd(string content, int start)
{
var braceStart = content.IndexOf('{', start);
if (braceStart < 0) return start + 100;
return braceStart + FindMatchingBrace(content, braceStart);
}
private static int FindArrowFunctionEnd(string content, int start)
{
var arrowIndex = content.IndexOf("=>", start);
if (arrowIndex < 0) return start + 100;
var afterArrow = arrowIndex + 2;
while (afterArrow < content.Length && char.IsWhiteSpace(content[afterArrow]))
afterArrow++;
if (afterArrow < content.Length && content[afterArrow] == '{')
{
return afterArrow + FindMatchingBrace(content, afterArrow);
}
// Expression body
var endIndex = content.IndexOfAny([';', '\n', ','], afterArrow);
return endIndex > 0 ? endIndex : afterArrow + 100;
}
private static int FindMatchingBrace(string content, int braceStart)
{
var depth = 0;
for (var i = braceStart; i < content.Length; i++)
{
if (content[i] == '{') depth++;
else if (content[i] == '}')
{
depth--;
if (depth == 0) return i - braceStart + 1;
}
}
return content.Length - braceStart;
}
private static string ExtractBracedBlock(string content, int braceStart)
{
if (braceStart >= content.Length || content[braceStart] != '{')
return string.Empty;
var length = FindMatchingBrace(content, braceStart);
var endIndex = braceStart + length;
if (endIndex > content.Length) endIndex = content.Length;
return content[(braceStart + 1)..(endIndex - 1)];
}
private sealed class FunctionInfo
{
public required string Name { get; init; }
public required string Module { get; init; }
public bool IsPublic { get; set; }
public int StartIndex { get; init; }
public int EndIndex { get; init; }
}
}

View File

@@ -0,0 +1,449 @@
// -----------------------------------------------------------------------------
// PythonInternalGraphBuilder.cs
// Sprint: SPRINT_3700_0003_0001_trigger_extraction (TRIG-005)
// Description: Python internal call graph builder using AST-based parsing.
// -----------------------------------------------------------------------------
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using StellaOps.Scanner.VulnSurfaces.Models;
namespace StellaOps.Scanner.VulnSurfaces.CallGraph;
/// <summary>
/// Internal call graph builder for Python packages using AST-based parsing.
/// </summary>
public sealed partial class PythonInternalGraphBuilder : IInternalCallGraphBuilder
{
private readonly ILogger<PythonInternalGraphBuilder> _logger;
// Regex patterns for Python analysis
[GeneratedRegex(@"^(async\s+)?def\s+(\w+)\s*\(([^)]*)\)\s*(?:->\s*[^:]+)?:", RegexOptions.Multiline | RegexOptions.Compiled)]
private static partial Regex FunctionDefRegex();
[GeneratedRegex(@"^class\s+(\w+)(?:\s*\([^)]*\))?\s*:", RegexOptions.Multiline | RegexOptions.Compiled)]
private static partial Regex ClassDefRegex();
[GeneratedRegex(@"^(\s+)(async\s+)?def\s+(\w+)\s*\(([^)]*)\)\s*(?:->\s*[^:]+)?:", RegexOptions.Multiline | RegexOptions.Compiled)]
private static partial Regex MethodDefRegex();
[GeneratedRegex(@"(?:self\.)?(\w+)\s*\(", RegexOptions.Compiled)]
private static partial Regex FunctionCallRegex();
[GeneratedRegex(@"^from\s+(\S+)\s+import\s+(.+)$", RegexOptions.Multiline | RegexOptions.Compiled)]
private static partial Regex FromImportRegex();
[GeneratedRegex(@"^import\s+(\S+)", RegexOptions.Multiline | RegexOptions.Compiled)]
private static partial Regex ImportRegex();
[GeneratedRegex(@"^__all__\s*=\s*\[([^\]]+)\]", RegexOptions.Multiline | RegexOptions.Compiled)]
private static partial Regex AllExportRegex();
public PythonInternalGraphBuilder(ILogger<PythonInternalGraphBuilder> logger)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public string Ecosystem => "pypi";
/// <inheritdoc />
public bool CanHandle(string packagePath)
{
if (string.IsNullOrEmpty(packagePath))
return false;
if (packagePath.EndsWith(".whl", StringComparison.OrdinalIgnoreCase) ||
packagePath.EndsWith(".tar.gz", StringComparison.OrdinalIgnoreCase))
return true;
if (Directory.Exists(packagePath))
{
return File.Exists(Path.Combine(packagePath, "setup.py")) ||
File.Exists(Path.Combine(packagePath, "pyproject.toml")) ||
Directory.EnumerateFiles(packagePath, "*.py", SearchOption.AllDirectories).Any();
}
return packagePath.EndsWith(".py", StringComparison.OrdinalIgnoreCase);
}
/// <inheritdoc />
public async Task<InternalCallGraphBuildResult> BuildAsync(
InternalCallGraphBuildRequest request,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(request);
var sw = Stopwatch.StartNew();
var graph = new InternalCallGraph
{
PackageId = request.PackageId,
Version = request.Version
};
try
{
var pyFiles = GetPythonFiles(request.PackagePath);
var filesProcessed = 0;
var allFunctions = new Dictionary<string, FunctionInfo>(StringComparer.Ordinal);
// First pass: collect all function declarations
foreach (var pyPath in pyFiles)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
var content = await File.ReadAllTextAsync(pyPath, cancellationToken);
var moduleName = GetModuleName(pyPath, request.PackagePath);
CollectFunctions(content, moduleName, allFunctions, request.IncludePrivateMethods);
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Failed to collect functions from {Path}", pyPath);
}
}
// Second pass: analyze call relationships
foreach (var pyPath in pyFiles)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
var content = await File.ReadAllTextAsync(pyPath, cancellationToken);
var moduleName = GetModuleName(pyPath, request.PackagePath);
AnalyzeCalls(content, moduleName, allFunctions, graph);
filesProcessed++;
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Failed to analyze calls in {Path}", pyPath);
}
}
sw.Stop();
_logger.LogDebug(
"Built internal call graph for PyPI {PackageId} v{Version}: {Methods} methods, {Edges} edges in {Duration}ms",
request.PackageId, request.Version, graph.MethodCount, graph.EdgeCount, sw.ElapsedMilliseconds);
return InternalCallGraphBuildResult.Ok(graph, sw.Elapsed, filesProcessed);
}
catch (Exception ex)
{
sw.Stop();
_logger.LogWarning(ex, "Failed to build internal call graph for PyPI {PackageId}", request.PackageId);
return InternalCallGraphBuildResult.Fail(ex.Message, sw.Elapsed);
}
}
private static string[] GetPythonFiles(string packagePath)
{
if (File.Exists(packagePath) && packagePath.EndsWith(".py", StringComparison.OrdinalIgnoreCase))
{
return [packagePath];
}
if (Directory.Exists(packagePath))
{
return Directory.GetFiles(packagePath, "*.py", SearchOption.AllDirectories)
.Where(f =>
{
var name = Path.GetFileName(f);
return !name.StartsWith("test_") &&
!name.EndsWith("_test.py") &&
!f.Contains("__pycache__") &&
!f.Contains(".egg-info") &&
!f.Contains("tests/") &&
!f.Contains("test/");
})
.ToArray();
}
return [];
}
private static string GetModuleName(string pyPath, string basePath)
{
var relativePath = Path.GetRelativePath(basePath, pyPath);
var withoutExt = Path.ChangeExtension(relativePath, null);
var moduleName = withoutExt
.Replace(Path.DirectorySeparatorChar, '.')
.Replace(Path.AltDirectorySeparatorChar, '.');
// Remove __init__ from module name
if (moduleName.EndsWith(".__init__"))
moduleName = moduleName[..^9];
return moduleName;
}
private void CollectFunctions(
string content,
string moduleName,
Dictionary<string, FunctionInfo> functions,
bool includePrivate)
{
var lines = content.Split('\n');
// Check for __all__ exports
var exportedNames = new HashSet<string>(StringComparer.Ordinal);
var allMatch = AllExportRegex().Match(content);
if (allMatch.Success)
{
var exports = allMatch.Groups[1].Value;
foreach (var name in exports.Split(',').Select(s => s.Trim().Trim('\'', '"')))
{
if (!string.IsNullOrEmpty(name))
exportedNames.Add(name);
}
}
// Collect module-level functions
foreach (Match match in FunctionDefRegex().Matches(content))
{
// Skip if indented (class method)
var lineStart = content.LastIndexOf('\n', Math.Max(0, match.Index - 1)) + 1;
if (lineStart < match.Index && char.IsWhiteSpace(content[lineStart]))
continue;
var functionName = match.Groups[2].Value;
// Skip private functions unless requested
var isPrivate = functionName.StartsWith('_') && !functionName.StartsWith("__");
if (!includePrivate && isPrivate)
continue;
var isPublic = !isPrivate && (exportedNames.Count == 0 || exportedNames.Contains(functionName));
var lineNumber = GetLineNumber(content, match.Index);
var key = $"{moduleName}::{functionName}";
functions[key] = new FunctionInfo
{
Name = functionName,
Module = moduleName,
IsPublic = isPublic,
StartLine = lineNumber,
EndLine = FindFunctionEndLine(lines, lineNumber - 1, 0)
};
}
// Collect class methods
foreach (Match classMatch in ClassDefRegex().Matches(content))
{
var className = classMatch.Groups[1].Value;
var classLine = GetLineNumber(content, classMatch.Index);
var classIndent = GetIndentation(lines[classLine - 1]);
foreach (Match methodMatch in MethodDefRegex().Matches(content))
{
var methodLine = GetLineNumber(content, methodMatch.Index);
if (methodLine <= classLine)
continue;
var methodIndent = methodMatch.Groups[1].Value.Length;
if (methodIndent <= classIndent)
break;
var methodName = methodMatch.Groups[3].Value;
// Skip private methods unless requested
var isPrivate = methodName.StartsWith('_') && !methodName.StartsWith("__");
if (!includePrivate && isPrivate)
continue;
// Dunder methods are considered public
var isPublic = !isPrivate || (methodName.StartsWith("__") && methodName.EndsWith("__"));
var key = $"{moduleName}.{className}::{methodName}";
functions[key] = new FunctionInfo
{
Name = methodName,
Module = $"{moduleName}.{className}",
IsPublic = isPublic,
StartLine = methodLine,
EndLine = FindFunctionEndLine(lines, methodLine - 1, methodIndent)
};
}
}
}
private void AnalyzeCalls(
string content,
string moduleName,
Dictionary<string, FunctionInfo> allFunctions,
InternalCallGraph graph)
{
var lines = content.Split('\n');
var moduleFunctions = allFunctions
.Where(kvp => kvp.Value.Module == moduleName || kvp.Value.Module.StartsWith($"{moduleName}."))
.ToList();
// Collect imports for resolution
var imports = new Dictionary<string, string>(StringComparer.Ordinal);
foreach (Match match in FromImportRegex().Matches(content))
{
var fromModule = match.Groups[1].Value;
var imported = match.Groups[2].Value;
foreach (var item in imported.Split(',').Select(s => s.Trim()))
{
var parts = item.Split(" as ");
var name = parts[0].Trim();
var alias = parts.Length > 1 ? parts[1].Trim() : name;
imports[alias] = $"{fromModule}.{name}";
}
}
foreach (var (callerKey, callerInfo) in moduleFunctions)
{
graph.AddMethod(new InternalMethodRef
{
MethodKey = callerKey,
Name = callerInfo.Name,
DeclaringType = callerInfo.Module,
IsPublic = callerInfo.IsPublic
});
// Extract function body
if (callerInfo.StartLine <= 0 || callerInfo.EndLine <= callerInfo.StartLine)
continue;
var bodyLines = lines
.Skip(callerInfo.StartLine)
.Take(callerInfo.EndLine - callerInfo.StartLine)
.ToArray();
var body = string.Join("\n", bodyLines);
// Find calls in body
foreach (Match callMatch in FunctionCallRegex().Matches(body))
{
var calledName = callMatch.Groups[1].Value;
// Skip built-ins and keywords
if (IsBuiltIn(calledName))
continue;
// Try to resolve callee
var calleeKey = ResolveFunctionKey(calledName, moduleName, imports, allFunctions);
if (calleeKey is not null && calleeKey != callerKey)
{
graph.AddEdge(new InternalCallEdge { Caller = callerKey, Callee = calleeKey });
}
}
}
}
private static string? ResolveFunctionKey(
string calledName,
string callerModule,
Dictionary<string, string> imports,
Dictionary<string, FunctionInfo> allFunctions)
{
// Try same module first
var sameModuleKey = $"{callerModule}::{calledName}";
if (allFunctions.ContainsKey(sameModuleKey))
return sameModuleKey;
// Try class method in same module
var classMethodKey = allFunctions.Keys
.FirstOrDefault(k => k.StartsWith($"{callerModule}.") && k.EndsWith($"::{calledName}"));
if (classMethodKey is not null)
return classMethodKey;
// Try imported name
if (imports.TryGetValue(calledName, out var importedPath))
{
var importedKey = allFunctions.Keys
.FirstOrDefault(k => k.Contains(importedPath, StringComparison.OrdinalIgnoreCase) ||
k.EndsWith($"::{calledName}", StringComparison.OrdinalIgnoreCase));
if (importedKey is not null)
return importedKey;
}
// Try any module with that function
return allFunctions.Keys
.FirstOrDefault(k => k.EndsWith($"::{calledName}", StringComparison.Ordinal));
}
private static bool IsBuiltIn(string name)
{
return name is "print" or "len" or "range" or "str" or "int" or "float" or "bool" or "list"
or "dict" or "set" or "tuple" or "type" or "isinstance" or "issubclass" or "hasattr"
or "getattr" or "setattr" or "delattr" or "callable" or "super" or "property"
or "staticmethod" or "classmethod" or "open" or "input" or "format" or "repr"
or "id" or "hash" or "abs" or "round" or "min" or "max" or "sum" or "sorted"
or "reversed" or "enumerate" or "zip" or "map" or "filter" or "any" or "all"
or "iter" or "next" or "slice" or "object" or "Exception" or "ValueError"
or "TypeError" or "KeyError" or "IndexError" or "AttributeError" or "RuntimeError"
or "if" or "for" or "while" or "return" or "yield" or "raise" or "try"
or "except" or "finally" or "with" or "as" or "import" or "from" or "class" or "def"
or "async" or "await" or "lambda" or "pass" or "break" or "continue" or "assert"
or "True" or "False" or "None" or "self" or "cls";
}
private static int GetLineNumber(string content, int index)
{
var lineNumber = 1;
for (var i = 0; i < index && i < content.Length; i++)
{
if (content[i] == '\n')
lineNumber++;
}
return lineNumber;
}
private static int GetIndentation(string line)
{
var indent = 0;
foreach (var c in line)
{
if (c == ' ') indent++;
else if (c == '\t') indent += 4;
else break;
}
return indent;
}
private static int FindFunctionEndLine(string[] lines, int defLineIndex, int baseIndent)
{
var bodyIndent = -1;
for (var i = defLineIndex + 1; i < lines.Length; i++)
{
var line = lines[i];
if (string.IsNullOrWhiteSpace(line))
continue;
var currentIndent = GetIndentation(line);
if (bodyIndent < 0)
{
if (currentIndent <= baseIndent)
return defLineIndex + 1;
bodyIndent = currentIndent;
}
else if (currentIndent <= baseIndent && !string.IsNullOrWhiteSpace(line.Trim()))
{
return i;
}
}
return lines.Length;
}
private sealed class FunctionInfo
{
public required string Name { get; init; }
public required string Module { get; init; }
public bool IsPublic { get; set; }
public int StartLine { get; init; }
public int EndLine { get; init; }
}
}

View File

@@ -0,0 +1,198 @@
// -----------------------------------------------------------------------------
// MavenPackageDownloader.cs
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-005)
// Description: Downloads Maven packages (JARs) from Maven Central or custom
// repositories for vulnerability surface analysis.
// -----------------------------------------------------------------------------
using System;
using System.Diagnostics;
using System.IO;
using System.IO.Compression;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace StellaOps.Scanner.VulnSurfaces.Download;
/// <summary>
/// Downloads Maven packages (JARs) from Maven Central or custom repositories.
/// Maven coordinates: groupId:artifactId:version
/// </summary>
public sealed class MavenPackageDownloader : IPackageDownloader
{
private const string DefaultRepositoryUrl = "https://repo1.maven.org/maven2";
private readonly HttpClient _httpClient;
private readonly ILogger<MavenPackageDownloader> _logger;
private readonly MavenDownloaderOptions _options;
public MavenPackageDownloader(
HttpClient httpClient,
ILogger<MavenPackageDownloader> logger,
IOptions<MavenDownloaderOptions> options)
{
_httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_options = options?.Value ?? new MavenDownloaderOptions();
}
/// <inheritdoc />
public string Ecosystem => "maven";
/// <inheritdoc />
public async Task<PackageDownloadResult> DownloadAsync(
PackageDownloadRequest request,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(request);
var sw = Stopwatch.StartNew();
try
{
// Parse Maven coordinates (groupId:artifactId or just artifactId for simple cases)
var (groupId, artifactId) = ParseCoordinates(request.PackageName);
var version = request.Version;
var safeArtifactId = GetSafeDirectoryName(groupId, artifactId);
var extractedDir = Path.Combine(request.OutputDirectory, $"{safeArtifactId}-{version}");
var archivePath = Path.Combine(request.OutputDirectory, $"{safeArtifactId}-{version}.jar");
// Check cache first
if (request.UseCache && Directory.Exists(extractedDir))
{
sw.Stop();
_logger.LogDebug("Using cached Maven package {GroupId}:{ArtifactId} v{Version}",
groupId, artifactId, version);
return PackageDownloadResult.Ok(extractedDir, archivePath, sw.Elapsed, fromCache: true);
}
// Build download URL
// Maven Central path: /<groupId with / instead of .>/<artifactId>/<version>/<artifactId>-<version>.jar
var repositoryUrl = request.RegistryUrl ?? _options.RepositoryUrl ?? DefaultRepositoryUrl;
var groupPath = groupId.Replace('.', '/');
var jarUrl = $"{repositoryUrl}/{groupPath}/{artifactId}/{version}/{artifactId}-{version}.jar";
_logger.LogDebug("Downloading Maven JAR from {Url}", jarUrl);
// Download JAR
Directory.CreateDirectory(request.OutputDirectory);
using var response = await _httpClient.GetAsync(jarUrl, cancellationToken);
if (!response.IsSuccessStatusCode)
{
// Try sources JAR as fallback for source analysis
var sourcesUrl = $"{repositoryUrl}/{groupPath}/{artifactId}/{version}/{artifactId}-{version}-sources.jar";
_logger.LogDebug("Primary JAR not found, trying sources JAR from {Url}", sourcesUrl);
using var sourcesResponse = await _httpClient.GetAsync(sourcesUrl, cancellationToken);
if (!sourcesResponse.IsSuccessStatusCode)
{
sw.Stop();
var error = $"Failed to download: HTTP {(int)response.StatusCode} {response.ReasonPhrase}";
_logger.LogWarning("Maven download failed for {GroupId}:{ArtifactId} v{Version}: {Error}",
groupId, artifactId, version, error);
return PackageDownloadResult.Fail(error, sw.Elapsed);
}
// Save sources JAR
await using (var fs = File.Create(archivePath))
{
await sourcesResponse.Content.CopyToAsync(fs, cancellationToken);
}
}
else
{
// Save primary JAR
await using (var fs = File.Create(archivePath))
{
await response.Content.CopyToAsync(fs, cancellationToken);
}
}
// Extract JAR (it's just a ZIP file)
if (Directory.Exists(extractedDir))
{
Directory.Delete(extractedDir, recursive: true);
}
ZipFile.ExtractToDirectory(archivePath, extractedDir);
sw.Stop();
_logger.LogDebug("Downloaded and extracted Maven {GroupId}:{ArtifactId} v{Version} in {Duration}ms",
groupId, artifactId, version, sw.ElapsedMilliseconds);
return PackageDownloadResult.Ok(extractedDir, archivePath, sw.Elapsed);
}
catch (Exception ex)
{
sw.Stop();
_logger.LogWarning(ex, "Failed to download Maven package {Package} v{Version}",
request.PackageName, request.Version);
return PackageDownloadResult.Fail(ex.Message, sw.Elapsed);
}
}
/// <summary>
/// Parses Maven coordinates from package name.
/// Formats: "groupId:artifactId" or just "artifactId" (assumes default group).
/// </summary>
private (string groupId, string artifactId) ParseCoordinates(string packageName)
{
var parts = packageName.Split(':');
if (parts.Length >= 2)
{
return (parts[0], parts[1]);
}
// If no groupId provided, assume the package name is the artifactId
// and try to derive groupId from common patterns
return (packageName, packageName);
}
/// <summary>
/// Creates a safe directory name from Maven coordinates.
/// </summary>
private static string GetSafeDirectoryName(string groupId, string artifactId)
{
// Use artifactId primarily, prefixed with last segment of groupId if different
var groupLastPart = groupId.Split('.')[^1];
if (groupLastPart.Equals(artifactId, StringComparison.OrdinalIgnoreCase))
{
return artifactId;
}
return $"{groupLastPart}.{artifactId}";
}
}
/// <summary>
/// Options for Maven package downloader.
/// </summary>
public sealed class MavenDownloaderOptions
{
/// <summary>
/// Custom repository URL (null for Maven Central).
/// </summary>
public string? RepositoryUrl { get; set; }
/// <summary>
/// Cache directory for downloaded packages.
/// </summary>
public string? CacheDirectory { get; set; }
/// <summary>
/// Maximum package size in bytes (0 for unlimited).
/// </summary>
public long MaxPackageSize { get; set; }
/// <summary>
/// Whether to prefer sources JARs for analysis.
/// </summary>
public bool PreferSourcesJar { get; set; }
}

View File

@@ -0,0 +1,238 @@
// -----------------------------------------------------------------------------
// NpmPackageDownloader.cs
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-004)
// Description: Downloads npm packages from registry.npmjs.org for vulnerability
// surface analysis.
// -----------------------------------------------------------------------------
using System;
using System.Diagnostics;
using System.IO;
using System.Net.Http;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using SharpCompress.Archives;
using SharpCompress.Archives.Tar;
using SharpCompress.Common;
using SharpCompress.Readers;
namespace StellaOps.Scanner.VulnSurfaces.Download;
/// <summary>
/// Downloads npm packages from registry.npmjs.org or custom registries.
/// npm packages are distributed as .tgz (gzipped tarball) files.
/// </summary>
public sealed class NpmPackageDownloader : IPackageDownloader
{
private const string DefaultRegistryUrl = "https://registry.npmjs.org";
private readonly HttpClient _httpClient;
private readonly ILogger<NpmPackageDownloader> _logger;
private readonly NpmDownloaderOptions _options;
public NpmPackageDownloader(
HttpClient httpClient,
ILogger<NpmPackageDownloader> logger,
IOptions<NpmDownloaderOptions> options)
{
_httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_options = options?.Value ?? new NpmDownloaderOptions();
}
/// <inheritdoc />
public string Ecosystem => "npm";
/// <inheritdoc />
public async Task<PackageDownloadResult> DownloadAsync(
PackageDownloadRequest request,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(request);
var sw = Stopwatch.StartNew();
try
{
// Normalize package name (npm uses lowercase, scoped packages have @scope/name)
var packageName = request.PackageName;
var safePackageName = GetSafeDirectoryName(packageName);
var extractedDir = Path.Combine(request.OutputDirectory, $"{safePackageName}-{request.Version}");
var archivePath = Path.Combine(request.OutputDirectory, $"{safePackageName}-{request.Version}.tgz");
// Check cache first
if (request.UseCache && Directory.Exists(extractedDir))
{
sw.Stop();
_logger.LogDebug("Using cached npm package {Package} v{Version}", packageName, request.Version);
return PackageDownloadResult.Ok(extractedDir, archivePath, sw.Elapsed, fromCache: true);
}
// Get package metadata to find tarball URL
var registryUrl = request.RegistryUrl ?? _options.RegistryUrl ?? DefaultRegistryUrl;
var tarballUrl = await GetTarballUrlAsync(registryUrl, packageName, request.Version, cancellationToken);
if (tarballUrl is null)
{
sw.Stop();
var error = $"Version {request.Version} not found for package {packageName}";
_logger.LogWarning("npm package not found: {Error}", error);
return PackageDownloadResult.Fail(error, sw.Elapsed);
}
_logger.LogDebug("Downloading npm package from {Url}", tarballUrl);
// Download tarball
Directory.CreateDirectory(request.OutputDirectory);
using var response = await _httpClient.GetAsync(tarballUrl, cancellationToken);
if (!response.IsSuccessStatusCode)
{
sw.Stop();
var error = $"Failed to download: HTTP {(int)response.StatusCode} {response.ReasonPhrase}";
_logger.LogWarning("npm download failed for {Package} v{Version}: {Error}",
packageName, request.Version, error);
return PackageDownloadResult.Fail(error, sw.Elapsed);
}
// Save archive
await using (var fs = File.Create(archivePath))
{
await response.Content.CopyToAsync(fs, cancellationToken);
}
// Extract .tgz (gzipped tarball)
if (Directory.Exists(extractedDir))
{
Directory.Delete(extractedDir, recursive: true);
}
Directory.CreateDirectory(extractedDir);
ExtractTgz(archivePath, extractedDir);
sw.Stop();
_logger.LogDebug("Downloaded and extracted npm {Package} v{Version} in {Duration}ms",
packageName, request.Version, sw.ElapsedMilliseconds);
return PackageDownloadResult.Ok(extractedDir, archivePath, sw.Elapsed);
}
catch (Exception ex)
{
sw.Stop();
_logger.LogWarning(ex, "Failed to download npm package {Package} v{Version}",
request.PackageName, request.Version);
return PackageDownloadResult.Fail(ex.Message, sw.Elapsed);
}
}
/// <summary>
/// Gets the tarball URL from the npm registry metadata.
/// </summary>
private async Task<string?> GetTarballUrlAsync(
string registryUrl,
string packageName,
string version,
CancellationToken cancellationToken)
{
// Encode scoped packages (@scope/name → @scope%2fname)
var encodedName = Uri.EscapeDataString(packageName).Replace("%40", "@");
var metadataUrl = $"{registryUrl}/{encodedName}";
using var response = await _httpClient.GetAsync(metadataUrl, cancellationToken);
if (!response.IsSuccessStatusCode)
{
_logger.LogDebug("Failed to fetch npm metadata for {Package}: HTTP {StatusCode}",
packageName, (int)response.StatusCode);
return null;
}
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken);
using var doc = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken);
// Look for versions.<version>.dist.tarball
if (doc.RootElement.TryGetProperty("versions", out var versions) &&
versions.TryGetProperty(version, out var versionObj) &&
versionObj.TryGetProperty("dist", out var dist) &&
dist.TryGetProperty("tarball", out var tarball))
{
return tarball.GetString();
}
return null;
}
/// <summary>
/// Extracts a .tgz file (gzipped tarball) to the specified directory.
/// </summary>
private static void ExtractTgz(string tgzPath, string destinationDir)
{
using var archive = ArchiveFactory.Open(tgzPath);
foreach (var entry in archive.Entries)
{
if (entry.IsDirectory)
{
continue;
}
// npm packages have a "package/" prefix in the tarball
var entryPath = entry.Key ?? string.Empty;
if (entryPath.StartsWith("package/", StringComparison.OrdinalIgnoreCase))
{
entryPath = entryPath["package/".Length..];
}
var destPath = Path.Combine(destinationDir, entryPath);
var destDir = Path.GetDirectoryName(destPath);
if (!string.IsNullOrEmpty(destDir))
{
Directory.CreateDirectory(destDir);
}
entry.WriteToFile(destPath, new ExtractionOptions
{
ExtractFullPath = false,
Overwrite = true
});
}
}
/// <summary>
/// Converts a package name to a safe directory name.
/// Handles scoped packages like @scope/name → scope-name
/// </summary>
private static string GetSafeDirectoryName(string packageName)
{
return packageName
.Replace("@", string.Empty)
.Replace("/", "-")
.Replace("\\", "-");
}
}
/// <summary>
/// Options for npm package downloader.
/// </summary>
public sealed class NpmDownloaderOptions
{
/// <summary>
/// Custom registry URL (null for registry.npmjs.org).
/// </summary>
public string? RegistryUrl { get; set; }
/// <summary>
/// Cache directory for downloaded packages.
/// </summary>
public string? CacheDirectory { get; set; }
/// <summary>
/// Maximum package size in bytes (0 for unlimited).
/// </summary>
public long MaxPackageSize { get; set; }
}

View File

@@ -0,0 +1,295 @@
// -----------------------------------------------------------------------------
// PyPIPackageDownloader.cs
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-006)
// Description: Downloads Python packages from PyPI for vulnerability surface
// analysis. Supports both wheel (.whl) and source distributions.
// -----------------------------------------------------------------------------
using System;
using System.Diagnostics;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Net.Http;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using SharpCompress.Archives;
using SharpCompress.Common;
namespace StellaOps.Scanner.VulnSurfaces.Download;
/// <summary>
/// Downloads Python packages from PyPI (Python Package Index).
/// Supports wheel (.whl) and source distribution (.tar.gz) formats.
/// </summary>
public sealed class PyPIPackageDownloader : IPackageDownloader
{
private const string DefaultRegistryUrl = "https://pypi.org/pypi";
private readonly HttpClient _httpClient;
private readonly ILogger<PyPIPackageDownloader> _logger;
private readonly PyPIDownloaderOptions _options;
public PyPIPackageDownloader(
HttpClient httpClient,
ILogger<PyPIPackageDownloader> logger,
IOptions<PyPIDownloaderOptions> options)
{
_httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_options = options?.Value ?? new PyPIDownloaderOptions();
}
/// <inheritdoc />
public string Ecosystem => "pypi";
/// <inheritdoc />
public async Task<PackageDownloadResult> DownloadAsync(
PackageDownloadRequest request,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(request);
var sw = Stopwatch.StartNew();
try
{
// Normalize package name (PyPI uses lowercase with hyphens)
var normalizedName = NormalizePackageName(request.PackageName);
var safePackageName = GetSafeDirectoryName(normalizedName);
var extractedDir = Path.Combine(request.OutputDirectory, $"{safePackageName}-{request.Version}");
// Check cache first
if (request.UseCache && Directory.Exists(extractedDir))
{
sw.Stop();
_logger.LogDebug("Using cached PyPI package {Package} v{Version}",
request.PackageName, request.Version);
return PackageDownloadResult.Ok(extractedDir, string.Empty, sw.Elapsed, fromCache: true);
}
// Get package metadata to find download URL
var registryUrl = request.RegistryUrl ?? _options.RegistryUrl ?? DefaultRegistryUrl;
var downloadInfo = await GetDownloadUrlAsync(registryUrl, normalizedName, request.Version, cancellationToken);
if (downloadInfo is null)
{
sw.Stop();
var error = $"Version {request.Version} not found for package {request.PackageName}";
_logger.LogWarning("PyPI package not found: {Error}", error);
return PackageDownloadResult.Fail(error, sw.Elapsed);
}
_logger.LogDebug("Downloading PyPI package from {Url} (type: {Type})",
downloadInfo.Url, downloadInfo.PackageType);
// Download package
Directory.CreateDirectory(request.OutputDirectory);
using var response = await _httpClient.GetAsync(downloadInfo.Url, cancellationToken);
if (!response.IsSuccessStatusCode)
{
sw.Stop();
var error = $"Failed to download: HTTP {(int)response.StatusCode} {response.ReasonPhrase}";
_logger.LogWarning("PyPI download failed for {Package} v{Version}: {Error}",
request.PackageName, request.Version, error);
return PackageDownloadResult.Fail(error, sw.Elapsed);
}
// Determine archive extension and path
var extension = downloadInfo.PackageType == "bdist_wheel" ? ".whl" : ".tar.gz";
var archivePath = Path.Combine(request.OutputDirectory, $"{safePackageName}-{request.Version}{extension}");
// Save archive
await using (var fs = File.Create(archivePath))
{
await response.Content.CopyToAsync(fs, cancellationToken);
}
// Extract
if (Directory.Exists(extractedDir))
{
Directory.Delete(extractedDir, recursive: true);
}
Directory.CreateDirectory(extractedDir);
if (downloadInfo.PackageType == "bdist_wheel")
{
// Wheel files are ZIP archives
ZipFile.ExtractToDirectory(archivePath, extractedDir);
}
else
{
// Source distributions are .tar.gz
ExtractTarGz(archivePath, extractedDir);
}
sw.Stop();
_logger.LogDebug("Downloaded and extracted PyPI {Package} v{Version} in {Duration}ms",
request.PackageName, request.Version, sw.ElapsedMilliseconds);
return PackageDownloadResult.Ok(extractedDir, archivePath, sw.Elapsed);
}
catch (Exception ex)
{
sw.Stop();
_logger.LogWarning(ex, "Failed to download PyPI package {Package} v{Version}",
request.PackageName, request.Version);
return PackageDownloadResult.Fail(ex.Message, sw.Elapsed);
}
}
/// <summary>
/// Gets the download URL from PyPI JSON API.
/// Prefers source distributions for better AST analysis.
/// </summary>
private async Task<PyPIDownloadInfo?> GetDownloadUrlAsync(
string registryUrl,
string packageName,
string version,
CancellationToken cancellationToken)
{
var metadataUrl = $"{registryUrl}/{packageName}/{version}/json";
using var response = await _httpClient.GetAsync(metadataUrl, cancellationToken);
if (!response.IsSuccessStatusCode)
{
_logger.LogDebug("Failed to fetch PyPI metadata for {Package} v{Version}: HTTP {StatusCode}",
packageName, version, (int)response.StatusCode);
return null;
}
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken);
using var doc = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken);
if (!doc.RootElement.TryGetProperty("urls", out var urls))
{
return null;
}
// Prefer source distribution for AST analysis, fall back to wheel
PyPIDownloadInfo? sourceDistribution = null;
PyPIDownloadInfo? wheel = null;
foreach (var urlEntry in urls.EnumerateArray())
{
var packageType = urlEntry.TryGetProperty("packagetype", out var pt) ? pt.GetString() : null;
var url = urlEntry.TryGetProperty("url", out var u) ? u.GetString() : null;
if (url is null)
{
continue;
}
if (packageType == "sdist")
{
sourceDistribution = new PyPIDownloadInfo(url, "sdist");
}
else if (packageType == "bdist_wheel" && wheel is null)
{
wheel = new PyPIDownloadInfo(url, "bdist_wheel");
}
}
// Prefer source distribution for better Python AST analysis
return _options.PreferSourceDistribution
? (sourceDistribution ?? wheel)
: (wheel ?? sourceDistribution);
}
/// <summary>
/// Extracts a .tar.gz file to the specified directory.
/// </summary>
private static void ExtractTarGz(string tarGzPath, string destinationDir)
{
using var archive = ArchiveFactory.Open(tarGzPath);
foreach (var entry in archive.Entries)
{
if (entry.IsDirectory)
{
continue;
}
var entryPath = entry.Key ?? string.Empty;
// Source distributions typically have a top-level directory like "package-1.0.0/"
// Remove it to flatten the structure
var pathParts = entryPath.Split('/');
if (pathParts.Length > 1)
{
entryPath = string.Join('/', pathParts.Skip(1));
}
if (string.IsNullOrEmpty(entryPath))
{
continue;
}
var destPath = Path.Combine(destinationDir, entryPath);
var destDir = Path.GetDirectoryName(destPath);
if (!string.IsNullOrEmpty(destDir))
{
Directory.CreateDirectory(destDir);
}
entry.WriteToFile(destPath, new ExtractionOptions
{
ExtractFullPath = false,
Overwrite = true
});
}
}
/// <summary>
/// Normalizes a PyPI package name (lowercase, hyphens).
/// </summary>
private static string NormalizePackageName(string packageName)
{
return packageName.ToLowerInvariant().Replace('_', '-');
}
/// <summary>
/// Creates a safe directory name from package name.
/// </summary>
private static string GetSafeDirectoryName(string packageName)
{
return packageName.Replace('-', '_');
}
private sealed record PyPIDownloadInfo(string Url, string PackageType);
}
/// <summary>
/// Options for PyPI package downloader.
/// </summary>
public sealed class PyPIDownloaderOptions
{
/// <summary>
/// Custom registry URL (null for pypi.org).
/// </summary>
public string? RegistryUrl { get; set; }
/// <summary>
/// Cache directory for downloaded packages.
/// </summary>
public string? CacheDirectory { get; set; }
/// <summary>
/// Maximum package size in bytes (0 for unlimited).
/// </summary>
public long MaxPackageSize { get; set; }
/// <summary>
/// Whether to prefer source distributions over wheels.
/// Default true for better AST analysis.
/// </summary>
public bool PreferSourceDistribution { get; set; } = true;
}

View File

@@ -0,0 +1,508 @@
// -----------------------------------------------------------------------------
// JavaBytecodeFingerprinter.cs
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-010)
// Description: Java method fingerprinting using bytecode parsing.
// Parses .class files from JAR archives for method extraction.
// -----------------------------------------------------------------------------
using System;
using System.Buffers.Binary;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Security.Cryptography;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
namespace StellaOps.Scanner.VulnSurfaces.Fingerprint;
/// <summary>
/// Computes method fingerprints for Java packages using bytecode hashing.
/// Parses .class files from extracted JAR archives.
/// </summary>
public sealed class JavaBytecodeFingerprinter : IMethodFingerprinter
{
private readonly ILogger<JavaBytecodeFingerprinter> _logger;
// Java class file magic number
private const uint ClassFileMagic = 0xCAFEBABE;
public JavaBytecodeFingerprinter(ILogger<JavaBytecodeFingerprinter> logger)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public string Ecosystem => "maven";
/// <inheritdoc />
public async Task<FingerprintResult> FingerprintAsync(
FingerprintRequest request,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(request);
var sw = Stopwatch.StartNew();
var methods = new Dictionary<string, MethodFingerprint>(StringComparer.Ordinal);
try
{
var classFiles = GetClassFiles(request.PackagePath);
var filesProcessed = 0;
foreach (var classPath in classFiles)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
await ProcessClassFileAsync(classPath, request.PackagePath, methods, request, cancellationToken);
filesProcessed++;
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Failed to process class file {Path}", classPath);
}
}
sw.Stop();
_logger.LogDebug(
"Fingerprinted {MethodCount} methods from {FileCount} class files in {Duration}ms",
methods.Count, filesProcessed, sw.ElapsedMilliseconds);
return FingerprintResult.Ok(methods, sw.Elapsed, filesProcessed);
}
catch (Exception ex)
{
sw.Stop();
_logger.LogWarning(ex, "Failed to fingerprint Java package at {Path}", request.PackagePath);
return FingerprintResult.Fail(ex.Message, sw.Elapsed);
}
}
private static string[] GetClassFiles(string packagePath)
{
if (!Directory.Exists(packagePath))
return [];
return Directory.GetFiles(packagePath, "*.class", SearchOption.AllDirectories)
.Where(f =>
{
// Skip META-INF and common non-source directories
var relativePath = f.Replace(packagePath, "").TrimStart(Path.DirectorySeparatorChar);
return !relativePath.StartsWith("META-INF", StringComparison.OrdinalIgnoreCase);
})
.ToArray();
}
private async Task ProcessClassFileAsync(
string classPath,
string packagePath,
Dictionary<string, MethodFingerprint> methods,
FingerprintRequest request,
CancellationToken cancellationToken)
{
var bytes = await File.ReadAllBytesAsync(classPath, cancellationToken);
if (bytes.Length < 10)
return;
// Verify magic number
var magic = BinaryPrimitives.ReadUInt32BigEndian(bytes);
if (magic != ClassFileMagic)
{
_logger.LogDebug("Invalid class file magic in {Path}", classPath);
return;
}
try
{
var classInfo = ParseClassFile(bytes);
var relativePath = Path.GetRelativePath(packagePath, classPath);
foreach (var method in classInfo.Methods)
{
// Skip private methods unless requested
if (!request.IncludePrivateMethods && !method.IsPublic && !method.IsProtected)
continue;
// Skip synthetic and bridge methods
if (method.IsSynthetic || method.IsBridge)
continue;
var methodKey = $"{classInfo.ClassName}::{method.Name}{method.Descriptor}";
methods[methodKey] = new MethodFingerprint
{
MethodKey = methodKey,
DeclaringType = classInfo.ClassName,
Name = method.Name,
Signature = ParseDescriptor(method.Descriptor),
BodyHash = method.BodyHash,
SignatureHash = ComputeHash(method.Descriptor),
IsPublic = method.IsPublic,
BodySize = method.CodeLength,
SourceFile = relativePath
};
}
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Error parsing class file {Path}", classPath);
}
}
private JavaClassInfo ParseClassFile(byte[] bytes)
{
var reader = new JavaClassReader(bytes);
// Skip magic (already verified)
reader.Skip(4);
// Version info
_ = reader.ReadU2(); // minor version
_ = reader.ReadU2(); // major version
// Constant pool
var constantPool = ParseConstantPool(reader);
// Access flags
var accessFlags = reader.ReadU2();
// This class
var thisClassIndex = reader.ReadU2();
var className = ResolveClassName(constantPool, thisClassIndex);
// Super class
_ = reader.ReadU2(); // super class index
// Interfaces
var interfaceCount = reader.ReadU2();
reader.Skip(interfaceCount * 2);
// Fields
var fieldCount = reader.ReadU2();
for (var i = 0; i < fieldCount; i++)
{
SkipFieldOrMethod(reader);
}
// Methods
var methodCount = reader.ReadU2();
var methods = new List<JavaMethodInfo>();
for (var i = 0; i < methodCount; i++)
{
var method = ParseMethod(reader, constantPool);
methods.Add(method);
}
return new JavaClassInfo
{
ClassName = className,
AccessFlags = accessFlags,
Methods = methods
};
}
private static List<ConstantPoolEntry> ParseConstantPool(JavaClassReader reader)
{
var count = reader.ReadU2();
var pool = new List<ConstantPoolEntry>(count) { new() }; // Index 0 is unused
for (var i = 1; i < count; i++)
{
var tag = reader.ReadU1();
var entry = new ConstantPoolEntry { Tag = tag };
switch (tag)
{
case 1: // CONSTANT_Utf8
var length = reader.ReadU2();
entry.StringValue = Encoding.UTF8.GetString(reader.ReadBytes(length));
break;
case 3: // CONSTANT_Integer
case 4: // CONSTANT_Float
reader.Skip(4);
break;
case 5: // CONSTANT_Long
case 6: // CONSTANT_Double
reader.Skip(8);
pool.Add(new ConstantPoolEntry()); // Takes two entries
i++;
break;
case 7: // CONSTANT_Class
case 8: // CONSTANT_String
entry.NameIndex = reader.ReadU2();
break;
case 9: // CONSTANT_Fieldref
case 10: // CONSTANT_Methodref
case 11: // CONSTANT_InterfaceMethodref
entry.ClassIndex = reader.ReadU2();
entry.NameAndTypeIndex = reader.ReadU2();
break;
case 12: // CONSTANT_NameAndType
entry.NameIndex = reader.ReadU2();
entry.DescriptorIndex = reader.ReadU2();
break;
case 15: // CONSTANT_MethodHandle
reader.Skip(3);
break;
case 16: // CONSTANT_MethodType
reader.Skip(2);
break;
case 17: // CONSTANT_Dynamic
case 18: // CONSTANT_InvokeDynamic
reader.Skip(4);
break;
case 19: // CONSTANT_Module
case 20: // CONSTANT_Package
reader.Skip(2);
break;
}
pool.Add(entry);
}
return pool;
}
private static JavaMethodInfo ParseMethod(JavaClassReader reader, List<ConstantPoolEntry> constantPool)
{
var accessFlags = reader.ReadU2();
var nameIndex = reader.ReadU2();
var descriptorIndex = reader.ReadU2();
var name = GetUtf8(constantPool, nameIndex);
var descriptor = GetUtf8(constantPool, descriptorIndex);
// Attributes
var attributeCount = reader.ReadU2();
var codeBytes = Array.Empty<byte>();
var codeLength = 0;
for (var i = 0; i < attributeCount; i++)
{
var attrNameIndex = reader.ReadU2();
var attrLength = reader.ReadU4();
var attrName = GetUtf8(constantPool, attrNameIndex);
if (attrName == "Code")
{
// max_stack (2) + max_locals (2) + code_length (4)
reader.Skip(4);
codeLength = (int)reader.ReadU4();
codeBytes = reader.ReadBytes(codeLength);
// Skip exception table and code attributes
var remainingLength = attrLength - 8 - codeLength;
reader.Skip((int)remainingLength);
}
else
{
reader.Skip((int)attrLength);
}
}
return new JavaMethodInfo
{
Name = name,
Descriptor = descriptor,
AccessFlags = accessFlags,
CodeLength = codeLength,
BodyHash = ComputeHash(codeBytes)
};
}
private static void SkipFieldOrMethod(JavaClassReader reader)
{
reader.Skip(6); // access_flags + name_index + descriptor_index
var attributeCount = reader.ReadU2();
for (var i = 0; i < attributeCount; i++)
{
reader.Skip(2); // attribute_name_index
var length = reader.ReadU4();
reader.Skip((int)length);
}
}
private static string ResolveClassName(List<ConstantPoolEntry> pool, int classIndex)
{
if (classIndex <= 0 || classIndex >= pool.Count)
return "Unknown";
var classEntry = pool[classIndex];
if (classEntry.Tag != 7)
return "Unknown";
return GetUtf8(pool, classEntry.NameIndex).Replace('/', '.');
}
private static string GetUtf8(List<ConstantPoolEntry> pool, int index)
{
if (index <= 0 || index >= pool.Count)
return string.Empty;
return pool[index].StringValue ?? string.Empty;
}
private static string ParseDescriptor(string descriptor)
{
// Convert Java method descriptor to readable signature
// e.g., (Ljava/lang/String;I)V -> (String, int) void
var sb = new StringBuilder();
var i = 0;
if (descriptor.StartsWith('('))
{
sb.Append('(');
i = 1;
var first = true;
while (i < descriptor.Length && descriptor[i] != ')')
{
if (!first) sb.Append(", ");
first = false;
var (typeName, newIndex) = ParseType(descriptor, i);
sb.Append(typeName);
i = newIndex;
}
sb.Append(')');
i++; // Skip ')'
}
if (i < descriptor.Length)
{
var (returnType, _) = ParseType(descriptor, i);
sb.Append(" -> ");
sb.Append(returnType);
}
return sb.ToString();
}
private static (string typeName, int newIndex) ParseType(string descriptor, int index)
{
if (index >= descriptor.Length)
return ("void", index);
var c = descriptor[index];
return c switch
{
'B' => ("byte", index + 1),
'C' => ("char", index + 1),
'D' => ("double", index + 1),
'F' => ("float", index + 1),
'I' => ("int", index + 1),
'J' => ("long", index + 1),
'S' => ("short", index + 1),
'Z' => ("boolean", index + 1),
'V' => ("void", index + 1),
'[' => ParseArrayType(descriptor, index),
'L' => ParseObjectType(descriptor, index),
_ => ("?", index + 1)
};
}
private static (string typeName, int newIndex) ParseArrayType(string descriptor, int index)
{
var (elementType, newIndex) = ParseType(descriptor, index + 1);
return ($"{elementType}[]", newIndex);
}
private static (string typeName, int newIndex) ParseObjectType(string descriptor, int index)
{
var semicolonIndex = descriptor.IndexOf(';', index);
if (semicolonIndex < 0)
return ("Object", index + 1);
var className = descriptor[(index + 1)..semicolonIndex];
var simpleName = className.Split('/')[^1];
return (simpleName, semicolonIndex + 1);
}
private static string ComputeHash(byte[] data)
{
if (data.Length == 0)
return "empty";
var hashBytes = SHA256.HashData(data);
return Convert.ToHexStringLower(hashBytes[..16]);
}
private static string ComputeHash(string data)
{
if (string.IsNullOrEmpty(data))
return "empty";
return ComputeHash(Encoding.UTF8.GetBytes(data));
}
private sealed class JavaClassReader(byte[] data)
{
private int _position;
public byte ReadU1() => data[_position++];
public ushort ReadU2()
{
var value = BinaryPrimitives.ReadUInt16BigEndian(data.AsSpan(_position));
_position += 2;
return value;
}
public uint ReadU4()
{
var value = BinaryPrimitives.ReadUInt32BigEndian(data.AsSpan(_position));
_position += 4;
return value;
}
public byte[] ReadBytes(int count)
{
var result = data[_position..(_position + count)];
_position += count;
return result;
}
public void Skip(int count) => _position += count;
}
private sealed class ConstantPoolEntry
{
public byte Tag { get; init; }
public string? StringValue { get; set; }
public int NameIndex { get; set; }
public int DescriptorIndex { get; set; }
public int ClassIndex { get; set; }
public int NameAndTypeIndex { get; set; }
}
private sealed record JavaClassInfo
{
public required string ClassName { get; init; }
public ushort AccessFlags { get; init; }
public required List<JavaMethodInfo> Methods { get; init; }
}
private sealed record JavaMethodInfo
{
public required string Name { get; init; }
public required string Descriptor { get; init; }
public ushort AccessFlags { get; init; }
public int CodeLength { get; init; }
public required string BodyHash { get; init; }
public bool IsPublic => (AccessFlags & 0x0001) != 0;
public bool IsProtected => (AccessFlags & 0x0004) != 0;
public bool IsSynthetic => (AccessFlags & 0x1000) != 0;
public bool IsBridge => (AccessFlags & 0x0040) != 0;
}
}

View File

@@ -0,0 +1,492 @@
// -----------------------------------------------------------------------------
// JavaScriptMethodFingerprinter.cs
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-009)
// Description: JavaScript/Node.js method fingerprinting using AST hashing.
// Uses Acornima for JavaScript parsing in .NET.
// -----------------------------------------------------------------------------
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
namespace StellaOps.Scanner.VulnSurfaces.Fingerprint;
/// <summary>
/// Computes method fingerprints for JavaScript/Node.js packages using AST-based hashing.
/// Parses .js/.mjs/.cjs files and extracts function declarations, methods, and arrow functions.
/// </summary>
public sealed partial class JavaScriptMethodFingerprinter : IMethodFingerprinter
{
private readonly ILogger<JavaScriptMethodFingerprinter> _logger;
// Regex patterns for JavaScript function extraction
[GeneratedRegex(@"(export\s+)?(async\s+)?function\s+(\w+)\s*\(([^)]*)\)\s*\{", RegexOptions.Compiled)]
private static partial Regex FunctionDeclarationRegex();
[GeneratedRegex(@"(\w+)\s*:\s*(async\s+)?function\s*\(([^)]*)\)\s*\{", RegexOptions.Compiled)]
private static partial Regex ObjectMethodRegex();
[GeneratedRegex(@"(async\s+)?(\w+)\s*\(([^)]*)\)\s*\{", RegexOptions.Compiled)]
private static partial Regex ClassMethodRegex();
[GeneratedRegex(@"(const|let|var)\s+(\w+)\s*=\s*(async\s+)?\(([^)]*)\)\s*=>", RegexOptions.Compiled)]
private static partial Regex ArrowFunctionRegex();
[GeneratedRegex(@"class\s+(\w+)(?:\s+extends\s+(\w+))?\s*\{", RegexOptions.Compiled)]
private static partial Regex ClassDeclarationRegex();
[GeneratedRegex(@"module\.exports\s*=\s*(?:class\s+)?(\w+)", RegexOptions.Compiled)]
private static partial Regex ModuleExportsRegex();
public JavaScriptMethodFingerprinter(ILogger<JavaScriptMethodFingerprinter> logger)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public string Ecosystem => "npm";
/// <inheritdoc />
public async Task<FingerprintResult> FingerprintAsync(
FingerprintRequest request,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(request);
var sw = Stopwatch.StartNew();
var methods = new Dictionary<string, MethodFingerprint>(StringComparer.Ordinal);
try
{
var jsFiles = GetJavaScriptFiles(request.PackagePath);
var filesProcessed = 0;
foreach (var jsPath in jsFiles)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
await ProcessJavaScriptFileAsync(jsPath, request.PackagePath, methods, request, cancellationToken);
filesProcessed++;
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Failed to process JavaScript file {Path}", jsPath);
}
}
sw.Stop();
_logger.LogDebug(
"Fingerprinted {MethodCount} functions from {FileCount} files in {Duration}ms",
methods.Count, filesProcessed, sw.ElapsedMilliseconds);
return FingerprintResult.Ok(methods, sw.Elapsed, filesProcessed);
}
catch (Exception ex)
{
sw.Stop();
_logger.LogWarning(ex, "Failed to fingerprint JavaScript package at {Path}", request.PackagePath);
return FingerprintResult.Fail(ex.Message, sw.Elapsed);
}
}
private static string[] GetJavaScriptFiles(string packagePath)
{
if (!Directory.Exists(packagePath))
return [];
return Directory.GetFiles(packagePath, "*", SearchOption.AllDirectories)
.Where(f =>
{
var ext = Path.GetExtension(f).ToLowerInvariant();
return ext is ".js" or ".mjs" or ".cjs" or ".jsx";
})
.Where(f =>
{
// Skip common non-source directories
var relativePath = f.Replace(packagePath, "").TrimStart(Path.DirectorySeparatorChar);
return !relativePath.StartsWith("node_modules", StringComparison.OrdinalIgnoreCase) &&
!relativePath.StartsWith("dist", StringComparison.OrdinalIgnoreCase) &&
!relativePath.Contains(".min.", StringComparison.OrdinalIgnoreCase);
})
.ToArray();
}
private async Task ProcessJavaScriptFileAsync(
string jsPath,
string packagePath,
Dictionary<string, MethodFingerprint> methods,
FingerprintRequest request,
CancellationToken cancellationToken)
{
var content = await File.ReadAllTextAsync(jsPath, cancellationToken);
var relativePath = Path.GetRelativePath(packagePath, jsPath);
var moduleName = GetModuleName(relativePath);
// Extract function declarations
ExtractFunctionDeclarations(content, moduleName, relativePath, methods, request);
// Extract class methods
ExtractClassMethods(content, moduleName, relativePath, methods, request);
// Extract arrow functions
ExtractArrowFunctions(content, moduleName, relativePath, methods, request);
// Extract object methods
ExtractObjectMethods(content, moduleName, relativePath, methods, request);
}
private void ExtractFunctionDeclarations(
string content,
string moduleName,
string filePath,
Dictionary<string, MethodFingerprint> methods,
FingerprintRequest request)
{
var matches = FunctionDeclarationRegex().Matches(content);
foreach (Match match in matches)
{
var isExported = !string.IsNullOrEmpty(match.Groups[1].Value);
var isAsync = !string.IsNullOrEmpty(match.Groups[2].Value);
var functionName = match.Groups[3].Value;
var parameters = match.Groups[4].Value.Trim();
// Skip private functions unless requested
if (!request.IncludePrivateMethods && !isExported)
continue;
var bodyHash = ComputeFunctionBodyHash(content, match.Index);
var methodKey = $"{moduleName}::{functionName}({NormalizeParams(parameters)})";
methods[methodKey] = new MethodFingerprint
{
MethodKey = methodKey,
DeclaringType = moduleName,
Name = functionName,
Signature = $"{(isAsync ? "async " : "")}function {functionName}({parameters})",
BodyHash = bodyHash,
IsPublic = isExported,
SourceFile = filePath,
LineNumber = GetLineNumber(content, match.Index)
};
}
}
private void ExtractClassMethods(
string content,
string moduleName,
string filePath,
Dictionary<string, MethodFingerprint> methods,
FingerprintRequest request)
{
var classMatches = ClassDeclarationRegex().Matches(content);
foreach (Match classMatch in classMatches)
{
var className = classMatch.Groups[1].Value;
var classBodyStart = content.IndexOf('{', classMatch.Index);
if (classBodyStart < 0) continue;
// Find class body (simple brace matching)
var classBody = ExtractBracedBlock(content, classBodyStart);
if (string.IsNullOrEmpty(classBody)) continue;
var methodMatches = ClassMethodRegex().Matches(classBody);
foreach (Match methodMatch in methodMatches)
{
var isAsync = !string.IsNullOrEmpty(methodMatch.Groups[1].Value);
var methodName = methodMatch.Groups[2].Value;
var parameters = methodMatch.Groups[3].Value.Trim();
// Skip constructor unless specifically requested
if (methodName == "constructor" && !request.IncludePrivateMethods)
continue;
// Skip private methods (prefixed with #)
if (methodName.StartsWith('#') && !request.IncludePrivateMethods)
continue;
var bodyHash = ComputeFunctionBodyHash(classBody, methodMatch.Index);
var methodKey = $"{moduleName}.{className}::{methodName}({NormalizeParams(parameters)})";
methods[methodKey] = new MethodFingerprint
{
MethodKey = methodKey,
DeclaringType = $"{moduleName}.{className}",
Name = methodName,
Signature = $"{(isAsync ? "async " : "")}{methodName}({parameters})",
BodyHash = bodyHash,
IsPublic = !methodName.StartsWith('#'),
SourceFile = filePath,
LineNumber = GetLineNumber(content, classMatch.Index + methodMatch.Index)
};
}
}
}
private void ExtractArrowFunctions(
string content,
string moduleName,
string filePath,
Dictionary<string, MethodFingerprint> methods,
FingerprintRequest request)
{
var matches = ArrowFunctionRegex().Matches(content);
foreach (Match match in matches)
{
var declarationType = match.Groups[1].Value; // const/let/var
var functionName = match.Groups[2].Value;
var isAsync = !string.IsNullOrEmpty(match.Groups[3].Value);
var parameters = match.Groups[4].Value.Trim();
// Check if it's exported
var lineStart = content.LastIndexOf('\n', match.Index) + 1;
var line = content[lineStart..match.Index];
var isExported = line.Contains("export", StringComparison.Ordinal);
if (!request.IncludePrivateMethods && !isExported)
continue;
var bodyHash = ComputeArrowFunctionBodyHash(content, match.Index);
var methodKey = $"{moduleName}::{functionName}({NormalizeParams(parameters)})";
methods[methodKey] = new MethodFingerprint
{
MethodKey = methodKey,
DeclaringType = moduleName,
Name = functionName,
Signature = $"{(isAsync ? "async " : "")}({parameters}) =>",
BodyHash = bodyHash,
IsPublic = isExported,
SourceFile = filePath,
LineNumber = GetLineNumber(content, match.Index)
};
}
}
private void ExtractObjectMethods(
string content,
string moduleName,
string filePath,
Dictionary<string, MethodFingerprint> methods,
FingerprintRequest request)
{
var matches = ObjectMethodRegex().Matches(content);
foreach (Match match in matches)
{
var methodName = match.Groups[1].Value;
var isAsync = !string.IsNullOrEmpty(match.Groups[2].Value);
var parameters = match.Groups[3].Value.Trim();
var bodyHash = ComputeFunctionBodyHash(content, match.Index);
var methodKey = $"{moduleName}::obj.{methodName}({NormalizeParams(parameters)})";
// Object methods are typically exported if they're in module.exports
methods[methodKey] = new MethodFingerprint
{
MethodKey = methodKey,
DeclaringType = moduleName,
Name = methodName,
Signature = $"{(isAsync ? "async " : "")}{methodName}({parameters})",
BodyHash = bodyHash,
IsPublic = true,
SourceFile = filePath,
LineNumber = GetLineNumber(content, match.Index)
};
}
}
private static string GetModuleName(string relativePath)
{
// Convert path to module name: src/utils/helper.js -> src.utils.helper
var withoutExt = Path.ChangeExtension(relativePath, null);
return withoutExt
.Replace(Path.DirectorySeparatorChar, '.')
.Replace(Path.AltDirectorySeparatorChar, '.');
}
private static string NormalizeParams(string parameters)
{
if (string.IsNullOrWhiteSpace(parameters))
return "";
// Remove default values, just keep param names
var normalized = string.Join(",", parameters
.Split(',')
.Select(p => p.Split('=')[0].Trim())
.Where(p => !string.IsNullOrEmpty(p)));
return normalized;
}
private static string ComputeFunctionBodyHash(string content, int startIndex)
{
var braceStart = content.IndexOf('{', startIndex);
if (braceStart < 0) return "empty";
var body = ExtractBracedBlock(content, braceStart);
return ComputeHash(NormalizeBody(body));
}
private static string ComputeArrowFunctionBodyHash(string content, int startIndex)
{
var arrowIndex = content.IndexOf("=>", startIndex);
if (arrowIndex < 0) return "empty";
var bodyStart = arrowIndex + 2;
while (bodyStart < content.Length && char.IsWhiteSpace(content[bodyStart]))
bodyStart++;
if (bodyStart >= content.Length) return "empty";
// Check if it's a block or expression
if (content[bodyStart] == '{')
{
var body = ExtractBracedBlock(content, bodyStart);
return ComputeHash(NormalizeBody(body));
}
else
{
// Expression body - find end by semicolon or newline
var endIndex = content.IndexOfAny([';', '\n'], bodyStart);
if (endIndex < 0) endIndex = content.Length;
var body = content[bodyStart..endIndex];
return ComputeHash(NormalizeBody(body));
}
}
private static string ExtractBracedBlock(string content, int braceStart)
{
if (braceStart >= content.Length || content[braceStart] != '{')
return string.Empty;
var depth = 0;
var i = braceStart;
while (i < content.Length)
{
var c = content[i];
if (c == '{') depth++;
else if (c == '}')
{
depth--;
if (depth == 0)
return content[(braceStart + 1)..i];
}
i++;
}
return string.Empty;
}
private static string NormalizeBody(string body)
{
if (string.IsNullOrWhiteSpace(body))
return "empty";
// Remove comments, normalize whitespace
var sb = new StringBuilder();
var inLineComment = false;
var inBlockComment = false;
var inString = false;
var stringChar = '\0';
for (var i = 0; i < body.Length; i++)
{
var c = body[i];
var next = i + 1 < body.Length ? body[i + 1] : '\0';
if (inLineComment)
{
if (c == '\n') inLineComment = false;
continue;
}
if (inBlockComment)
{
if (c == '*' && next == '/')
{
inBlockComment = false;
i++;
}
continue;
}
if (inString)
{
sb.Append(c);
if (c == stringChar && (i == 0 || body[i - 1] != '\\'))
inString = false;
continue;
}
if (c == '/' && next == '/')
{
inLineComment = true;
i++;
continue;
}
if (c == '/' && next == '*')
{
inBlockComment = true;
i++;
continue;
}
if (c is '"' or '\'' or '`')
{
inString = true;
stringChar = c;
sb.Append(c);
continue;
}
// Normalize whitespace
if (char.IsWhiteSpace(c))
{
if (sb.Length > 0 && !char.IsWhiteSpace(sb[^1]))
sb.Append(' ');
}
else
{
sb.Append(c);
}
}
return sb.ToString().Trim();
}
private static string ComputeHash(string content)
{
if (string.IsNullOrEmpty(content))
return "empty";
var bytes = SHA256.HashData(Encoding.UTF8.GetBytes(content));
return Convert.ToHexStringLower(bytes[..16]); // First 32 hex chars
}
private static int GetLineNumber(string content, int index)
{
var lineNumber = 1;
for (var i = 0; i < index && i < content.Length; i++)
{
if (content[i] == '\n')
lineNumber++;
}
return lineNumber;
}
}

View File

@@ -0,0 +1,433 @@
// -----------------------------------------------------------------------------
// PythonAstFingerprinter.cs
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-011)
// Description: Python method fingerprinting using AST-based hashing.
// Parses .py files and extracts function and method definitions.
// -----------------------------------------------------------------------------
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
namespace StellaOps.Scanner.VulnSurfaces.Fingerprint;
/// <summary>
/// Computes method fingerprints for Python packages using AST-based hashing.
/// Parses .py files and extracts function definitions and class methods.
/// </summary>
public sealed partial class PythonAstFingerprinter : IMethodFingerprinter
{
private readonly ILogger<PythonAstFingerprinter> _logger;
// Regex patterns for Python function extraction
[GeneratedRegex(@"^(async\s+)?def\s+(\w+)\s*\(([^)]*)\)\s*(?:->\s*[^:]+)?:", RegexOptions.Multiline | RegexOptions.Compiled)]
private static partial Regex FunctionDefRegex();
[GeneratedRegex(@"^class\s+(\w+)(?:\s*\([^)]*\))?\s*:", RegexOptions.Multiline | RegexOptions.Compiled)]
private static partial Regex ClassDefRegex();
[GeneratedRegex(@"^(\s+)(async\s+)?def\s+(\w+)\s*\(([^)]*)\)\s*(?:->\s*[^:]+)?:", RegexOptions.Multiline | RegexOptions.Compiled)]
private static partial Regex MethodDefRegex();
[GeneratedRegex(@"^(\s*)@\w+(?:\([^)]*\))?$", RegexOptions.Multiline | RegexOptions.Compiled)]
private static partial Regex DecoratorRegex();
public PythonAstFingerprinter(ILogger<PythonAstFingerprinter> logger)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public string Ecosystem => "pypi";
/// <inheritdoc />
public async Task<FingerprintResult> FingerprintAsync(
FingerprintRequest request,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(request);
var sw = Stopwatch.StartNew();
var methods = new Dictionary<string, MethodFingerprint>(StringComparer.Ordinal);
try
{
var pyFiles = GetPythonFiles(request.PackagePath);
var filesProcessed = 0;
foreach (var pyPath in pyFiles)
{
cancellationToken.ThrowIfCancellationRequested();
try
{
await ProcessPythonFileAsync(pyPath, request.PackagePath, methods, request, cancellationToken);
filesProcessed++;
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Failed to process Python file {Path}", pyPath);
}
}
sw.Stop();
_logger.LogDebug(
"Fingerprinted {MethodCount} functions from {FileCount} files in {Duration}ms",
methods.Count, filesProcessed, sw.ElapsedMilliseconds);
return FingerprintResult.Ok(methods, sw.Elapsed, filesProcessed);
}
catch (Exception ex)
{
sw.Stop();
_logger.LogWarning(ex, "Failed to fingerprint Python package at {Path}", request.PackagePath);
return FingerprintResult.Fail(ex.Message, sw.Elapsed);
}
}
private static string[] GetPythonFiles(string packagePath)
{
if (!Directory.Exists(packagePath))
return [];
return Directory.GetFiles(packagePath, "*.py", SearchOption.AllDirectories)
.Where(f =>
{
var relativePath = f.Replace(packagePath, "").TrimStart(Path.DirectorySeparatorChar);
return !relativePath.StartsWith("test", StringComparison.OrdinalIgnoreCase) &&
!relativePath.Contains("__pycache__", StringComparison.OrdinalIgnoreCase) &&
!relativePath.Contains(".egg-info", StringComparison.OrdinalIgnoreCase);
})
.ToArray();
}
private async Task ProcessPythonFileAsync(
string pyPath,
string packagePath,
Dictionary<string, MethodFingerprint> methods,
FingerprintRequest request,
CancellationToken cancellationToken)
{
var content = await File.ReadAllTextAsync(pyPath, cancellationToken);
var lines = content.Split('\n');
var relativePath = Path.GetRelativePath(packagePath, pyPath);
var moduleName = GetModuleName(relativePath);
// Extract module-level functions
ExtractFunctions(content, lines, moduleName, relativePath, methods, request);
// Extract class methods
ExtractClassMethods(content, lines, moduleName, relativePath, methods, request);
}
private void ExtractFunctions(
string content,
string[] lines,
string moduleName,
string filePath,
Dictionary<string, MethodFingerprint> methods,
FingerprintRequest request)
{
var matches = FunctionDefRegex().Matches(content);
foreach (Match match in matches)
{
// Skip if this is inside a class (has leading whitespace)
var lineStart = content.LastIndexOf('\n', Math.Max(0, match.Index - 1)) + 1;
if (lineStart < match.Index && !string.IsNullOrWhiteSpace(content[lineStart..match.Index]))
continue;
var isAsync = !string.IsNullOrEmpty(match.Groups[1].Value);
var functionName = match.Groups[2].Value;
var parameters = match.Groups[3].Value.Trim();
// Skip private functions unless requested
if (!request.IncludePrivateMethods && functionName.StartsWith('_') && !functionName.StartsWith("__"))
continue;
var lineNumber = GetLineNumber(content, match.Index);
var bodyHash = ComputeFunctionBodyHash(lines, lineNumber - 1, 0);
var methodKey = $"{moduleName}::{functionName}({NormalizeParams(parameters)})";
// Check for decorators to determine if it's exported
var isExported = !functionName.StartsWith('_');
methods[methodKey] = new MethodFingerprint
{
MethodKey = methodKey,
DeclaringType = moduleName,
Name = functionName,
Signature = $"{(isAsync ? "async " : "")}def {functionName}({parameters})",
BodyHash = bodyHash,
IsPublic = isExported,
SourceFile = filePath,
LineNumber = lineNumber
};
}
}
private void ExtractClassMethods(
string content,
string[] lines,
string moduleName,
string filePath,
Dictionary<string, MethodFingerprint> methods,
FingerprintRequest request)
{
var classMatches = ClassDefRegex().Matches(content);
foreach (Match classMatch in classMatches)
{
var className = classMatch.Groups[1].Value;
var classLineNumber = GetLineNumber(content, classMatch.Index);
var classIndent = GetIndentation(lines[classLineNumber - 1]);
// Find all methods in this class
var methodMatches = MethodDefRegex().Matches(content);
foreach (Match methodMatch in methodMatches)
{
var methodLineNumber = GetLineNumber(content, methodMatch.Index);
// Check if this method belongs to this class
if (methodLineNumber <= classLineNumber)
continue;
var methodIndent = methodMatch.Groups[1].Value.Length;
// Method should be indented one level from class
if (methodIndent <= classIndent)
break; // We've left the class
// Check if there's another class between
var nextClassMatch = classMatches
.Cast<Match>()
.FirstOrDefault(m => GetLineNumber(content, m.Index) > classLineNumber &&
GetLineNumber(content, m.Index) < methodLineNumber);
if (nextClassMatch is not null)
continue;
var isAsync = !string.IsNullOrEmpty(methodMatch.Groups[2].Value);
var methodName = methodMatch.Groups[3].Value;
var parameters = methodMatch.Groups[4].Value.Trim();
// Skip private methods unless requested
if (!request.IncludePrivateMethods && methodName.StartsWith('_') && !methodName.StartsWith("__"))
continue;
var bodyHash = ComputeFunctionBodyHash(lines, methodLineNumber - 1, methodIndent);
var methodKey = $"{moduleName}.{className}::{methodName}({NormalizeParams(parameters)})";
// Determine visibility
var isPublic = !methodName.StartsWith('_') || methodName.StartsWith("__") && methodName.EndsWith("__");
methods[methodKey] = new MethodFingerprint
{
MethodKey = methodKey,
DeclaringType = $"{moduleName}.{className}",
Name = methodName,
Signature = $"{(isAsync ? "async " : "")}def {methodName}({parameters})",
BodyHash = bodyHash,
IsPublic = isPublic,
SourceFile = filePath,
LineNumber = methodLineNumber
};
}
}
}
private static string GetModuleName(string relativePath)
{
// Convert path to module name: src/utils/helper.py -> src.utils.helper
var withoutExt = Path.ChangeExtension(relativePath, null);
var moduleName = withoutExt
.Replace(Path.DirectorySeparatorChar, '.')
.Replace(Path.AltDirectorySeparatorChar, '.');
// Remove __init__ from module name
if (moduleName.EndsWith(".__init__"))
{
moduleName = moduleName[..^9];
}
return moduleName;
}
private static string NormalizeParams(string parameters)
{
if (string.IsNullOrWhiteSpace(parameters))
return "";
// Remove type hints and default values, keep param names
var normalized = string.Join(",", parameters
.Split(',')
.Select(p =>
{
// Remove type hints (param: Type)
var colonIndex = p.IndexOf(':');
if (colonIndex > 0)
p = p[..colonIndex];
// Remove default values (param=value)
var equalsIndex = p.IndexOf('=');
if (equalsIndex > 0)
p = p[..equalsIndex];
return p.Trim();
})
.Where(p => !string.IsNullOrEmpty(p)));
return normalized;
}
private static string ComputeFunctionBodyHash(string[] lines, int defLineIndex, int baseIndent)
{
var sb = new StringBuilder();
// Find the function body indent
var bodyIndent = -1;
var inDocstring = false;
var docstringQuotes = "";
for (var i = defLineIndex + 1; i < lines.Length; i++)
{
var line = lines[i];
var trimmedLine = line.TrimStart();
// Skip empty lines
if (string.IsNullOrWhiteSpace(line))
{
if (bodyIndent > 0)
sb.AppendLine();
continue;
}
var currentIndent = GetIndentation(line);
// First non-empty line determines body indent
if (bodyIndent < 0)
{
if (currentIndent <= baseIndent)
break; // No body found
bodyIndent = currentIndent;
}
else if (currentIndent <= baseIndent && !string.IsNullOrWhiteSpace(trimmedLine))
{
// We've left the function body
break;
}
// Handle docstrings
if (trimmedLine.StartsWith("\"\"\"") || trimmedLine.StartsWith("'''"))
{
docstringQuotes = trimmedLine[..3];
if (!inDocstring)
{
inDocstring = true;
if (trimmedLine.Length > 3 && trimmedLine.EndsWith(docstringQuotes))
{
inDocstring = false;
}
continue; // Skip docstring lines
}
}
if (inDocstring)
{
if (trimmedLine.Contains(docstringQuotes))
{
inDocstring = false;
}
continue;
}
// Skip comments
if (trimmedLine.StartsWith('#'))
continue;
// Add normalized line to hash input
sb.AppendLine(NormalizeLine(trimmedLine));
}
return ComputeHash(sb.ToString());
}
private static string NormalizeLine(string line)
{
// Remove inline comments
var commentIndex = -1;
var inString = false;
var stringChar = '\0';
for (var i = 0; i < line.Length; i++)
{
var c = line[i];
if (inString)
{
if (c == stringChar && (i == 0 || line[i - 1] != '\\'))
inString = false;
continue;
}
if (c is '"' or '\'')
{
inString = true;
stringChar = c;
continue;
}
if (c == '#')
{
commentIndex = i;
break;
}
}
if (commentIndex > 0)
line = line[..commentIndex];
// Normalize whitespace
return line.Trim();
}
private static int GetIndentation(string line)
{
var indent = 0;
foreach (var c in line)
{
if (c == ' ') indent++;
else if (c == '\t') indent += 4;
else break;
}
return indent;
}
private static int GetLineNumber(string content, int index)
{
var lineNumber = 1;
for (var i = 0; i < index && i < content.Length; i++)
{
if (content[i] == '\n')
lineNumber++;
}
return lineNumber;
}
private static string ComputeHash(string content)
{
if (string.IsNullOrWhiteSpace(content))
return "empty";
var bytes = SHA256.HashData(Encoding.UTF8.GetBytes(content));
return Convert.ToHexStringLower(bytes[..16]);
}
}

View File

@@ -0,0 +1,161 @@
// -----------------------------------------------------------------------------
// DotNetMethodKeyBuilder.cs
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-012)
// Description: Method key builder for .NET/NuGet packages.
// -----------------------------------------------------------------------------
using System.Text;
using System.Text.RegularExpressions;
namespace StellaOps.Scanner.VulnSurfaces.MethodKeys;
/// <summary>
/// Builds normalized method keys for .NET assemblies.
/// Format: Namespace.TypeName::MethodName(ParamType1,ParamType2)
/// </summary>
public sealed partial class DotNetMethodKeyBuilder : IMethodKeyBuilder
{
// Pattern: Namespace.Type::Method(params)
[GeneratedRegex(@"^(?:(.+)\.)?([^:.]+)::([^(]+)\(([^)]*)\)$", RegexOptions.Compiled)]
private static partial Regex MethodKeyPattern();
/// <inheritdoc />
public string Ecosystem => "nuget";
/// <inheritdoc />
public string BuildKey(MethodKeyRequest request)
{
ArgumentNullException.ThrowIfNull(request);
var sb = new StringBuilder();
// Namespace.TypeName
if (!string.IsNullOrEmpty(request.Namespace))
{
sb.Append(NormalizeNamespace(request.Namespace));
if (!string.IsNullOrEmpty(request.TypeName))
{
sb.Append('.');
}
}
if (!string.IsNullOrEmpty(request.TypeName))
{
sb.Append(NormalizeTypeName(request.TypeName));
}
// ::MethodName
sb.Append("::");
sb.Append(NormalizeMethodName(request.MethodName));
// (ParamTypes)
sb.Append('(');
if (request.ParameterTypes is { Count: > 0 })
{
sb.Append(string.Join(",", request.ParameterTypes.Select(NormalizeTypeName)));
}
sb.Append(')');
return sb.ToString();
}
/// <inheritdoc />
public MethodKeyComponents? ParseKey(string methodKey)
{
if (string.IsNullOrEmpty(methodKey))
return null;
var match = MethodKeyPattern().Match(methodKey);
if (!match.Success)
return null;
var namespacePart = match.Groups[1].Value;
var typeName = match.Groups[2].Value;
var methodName = match.Groups[3].Value;
var parameters = match.Groups[4].Value;
var paramTypes = string.IsNullOrEmpty(parameters)
? []
: parameters.Split(',').Select(p => p.Trim()).ToList();
return new MethodKeyComponents
{
Namespace = string.IsNullOrEmpty(namespacePart) ? null : namespacePart,
TypeName = typeName,
MethodName = methodName,
ParameterTypes = paramTypes
};
}
/// <inheritdoc />
public string NormalizeKey(string methodKey)
{
var components = ParseKey(methodKey);
if (components is null)
return methodKey;
return BuildKey(new MethodKeyRequest
{
Namespace = components.Namespace,
TypeName = components.TypeName,
MethodName = components.MethodName,
ParameterTypes = components.ParameterTypes?.ToList()
});
}
private static string NormalizeNamespace(string ns)
{
// Remove generic arity markers
return ns.Replace("`1", "").Replace("`2", "").Replace("`3", "").Replace("`4", "");
}
private static string NormalizeTypeName(string typeName)
{
// Normalize common type aliases
var normalized = typeName switch
{
"System.String" or "string" => "String",
"System.Int32" or "int" => "Int32",
"System.Int64" or "long" => "Int64",
"System.Boolean" or "bool" => "Boolean",
"System.Double" or "double" => "Double",
"System.Single" or "float" => "Single",
"System.Void" or "void" => "Void",
"System.Object" or "object" => "Object",
"System.Byte" or "byte" => "Byte",
"System.Char" or "char" => "Char",
"System.Decimal" or "decimal" => "Decimal",
_ => typeName
};
// Remove generic arity and simplify
var arityIndex = normalized.IndexOf('`');
if (arityIndex > 0)
{
normalized = normalized[..arityIndex];
}
// Use simple name for common BCL types (e.g., System.String -> String)
if (normalized.StartsWith("System.", StringComparison.Ordinal))
{
var afterSystem = normalized[7..];
if (!afterSystem.Contains('.'))
{
normalized = afterSystem;
}
}
return normalized;
}
private static string NormalizeMethodName(string methodName)
{
// Normalize common method name variations
return methodName switch
{
".ctor" => ".ctor",
".cctor" => ".cctor",
_ => methodName
};
}
}

View File

@@ -0,0 +1,111 @@
// -----------------------------------------------------------------------------
// IMethodKeyBuilder.cs
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-012)
// Description: Interface for building normalized method keys per ecosystem.
// -----------------------------------------------------------------------------
namespace StellaOps.Scanner.VulnSurfaces.MethodKeys;
/// <summary>
/// Builds normalized method keys for cross-ecosystem comparison.
/// Method keys provide a stable, canonical identifier for methods
/// that can be used for diffing between package versions.
/// </summary>
public interface IMethodKeyBuilder
{
/// <summary>
/// Ecosystem this builder handles.
/// </summary>
string Ecosystem { get; }
/// <summary>
/// Builds a normalized method key from components.
/// </summary>
/// <param name="request">Method key request with components.</param>
/// <returns>Normalized method key.</returns>
string BuildKey(MethodKeyRequest request);
/// <summary>
/// Parses a method key back into components.
/// </summary>
/// <param name="methodKey">The method key to parse.</param>
/// <returns>Parsed components or null if invalid.</returns>
MethodKeyComponents? ParseKey(string methodKey);
/// <summary>
/// Normalizes a method key to canonical form.
/// </summary>
/// <param name="methodKey">The method key to normalize.</param>
/// <returns>Normalized method key.</returns>
string NormalizeKey(string methodKey);
}
/// <summary>
/// Request to build a method key.
/// </summary>
public sealed record MethodKeyRequest
{
/// <summary>
/// Namespace or package path.
/// </summary>
public string? Namespace { get; init; }
/// <summary>
/// Type or class name.
/// </summary>
public string? TypeName { get; init; }
/// <summary>
/// Method or function name.
/// </summary>
public required string MethodName { get; init; }
/// <summary>
/// Parameter types (type names only).
/// </summary>
public IReadOnlyList<string>? ParameterTypes { get; init; }
/// <summary>
/// Return type.
/// </summary>
public string? ReturnType { get; init; }
/// <summary>
/// Whether to include return type in key (for overload resolution).
/// </summary>
public bool IncludeReturnType { get; init; }
}
/// <summary>
/// Parsed components of a method key.
/// </summary>
public sealed record MethodKeyComponents
{
/// <summary>
/// Full namespace path.
/// </summary>
public string? Namespace { get; init; }
/// <summary>
/// Type/class name.
/// </summary>
public string? TypeName { get; init; }
/// <summary>
/// Method/function name.
/// </summary>
public required string MethodName { get; init; }
/// <summary>
/// Parameter type names.
/// </summary>
public IReadOnlyList<string>? ParameterTypes { get; init; }
/// <summary>
/// Full qualified name (namespace.type::method).
/// </summary>
public string FullQualifiedName =>
string.IsNullOrEmpty(Namespace)
? (string.IsNullOrEmpty(TypeName) ? MethodName : $"{TypeName}::{MethodName}")
: (string.IsNullOrEmpty(TypeName) ? $"{Namespace}::{MethodName}" : $"{Namespace}.{TypeName}::{MethodName}");
}

View File

@@ -0,0 +1,212 @@
// -----------------------------------------------------------------------------
// JavaMethodKeyBuilder.cs
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-012)
// Description: Method key builder for Java/Maven packages.
// -----------------------------------------------------------------------------
using System.Text;
using System.Text.RegularExpressions;
namespace StellaOps.Scanner.VulnSurfaces.MethodKeys;
/// <summary>
/// Builds normalized method keys for Java classes.
/// Format: com.package.ClassName::methodName(ParamType1,ParamType2)
/// </summary>
public sealed partial class JavaMethodKeyBuilder : IMethodKeyBuilder
{
// Pattern: package.ClassName::methodName(descriptor)
[GeneratedRegex(@"^([^:]+)::([^(]+)(\([^)]*\).*)$", RegexOptions.Compiled)]
private static partial Regex MethodKeyPattern();
/// <inheritdoc />
public string Ecosystem => "maven";
/// <inheritdoc />
public string BuildKey(MethodKeyRequest request)
{
ArgumentNullException.ThrowIfNull(request);
var sb = new StringBuilder();
// Package.ClassName
if (!string.IsNullOrEmpty(request.Namespace))
{
sb.Append(NormalizePackage(request.Namespace));
sb.Append('.');
}
if (!string.IsNullOrEmpty(request.TypeName))
{
sb.Append(request.TypeName);
}
// ::methodName
sb.Append("::");
sb.Append(NormalizeMethodName(request.MethodName));
// (ParamTypes) - using Java descriptor format
sb.Append('(');
if (request.ParameterTypes is { Count: > 0 })
{
sb.Append(string.Join(",", request.ParameterTypes.Select(NormalizeTypeName)));
}
sb.Append(')');
return sb.ToString();
}
/// <inheritdoc />
public MethodKeyComponents? ParseKey(string methodKey)
{
if (string.IsNullOrEmpty(methodKey))
return null;
var match = MethodKeyPattern().Match(methodKey);
if (!match.Success)
return null;
var fullClassName = match.Groups[1].Value;
var methodName = match.Groups[2].Value;
var descriptor = match.Groups[3].Value;
// Split package from class name
string? packageName = null;
var typeName = fullClassName;
var lastDot = fullClassName.LastIndexOf('.');
if (lastDot > 0)
{
packageName = fullClassName[..lastDot];
typeName = fullClassName[(lastDot + 1)..];
}
// Parse descriptor to get parameter types
var paramTypes = ParseDescriptor(descriptor);
return new MethodKeyComponents
{
Namespace = packageName,
TypeName = typeName,
MethodName = methodName,
ParameterTypes = paramTypes
};
}
/// <inheritdoc />
public string NormalizeKey(string methodKey)
{
var components = ParseKey(methodKey);
if (components is null)
return methodKey;
return BuildKey(new MethodKeyRequest
{
Namespace = components.Namespace,
TypeName = components.TypeName,
MethodName = components.MethodName,
ParameterTypes = components.ParameterTypes?.ToList()
});
}
private static string NormalizePackage(string package)
{
// Java packages are lowercase
return package.ToLowerInvariant();
}
private static string NormalizeMethodName(string methodName)
{
// Handle constructor and static initializer
return methodName switch
{
"<init>" => "<init>",
"<clinit>" => "<clinit>",
_ => methodName
};
}
private static string NormalizeTypeName(string typeName)
{
// Simplify common Java types
return typeName switch
{
"java.lang.String" => "String",
"java.lang.Object" => "Object",
"java.lang.Integer" => "Integer",
"java.lang.Long" => "Long",
"java.lang.Boolean" => "Boolean",
"java.lang.Double" => "Double",
"java.lang.Float" => "Float",
"java.lang.Byte" => "Byte",
"java.lang.Short" => "Short",
"java.lang.Character" => "Character",
"java.util.List" => "List",
"java.util.Map" => "Map",
"java.util.Set" => "Set",
_ => typeName.Contains('.') ? typeName.Split('.')[^1] : typeName
};
}
private static List<string> ParseDescriptor(string descriptor)
{
var result = new List<string>();
if (string.IsNullOrEmpty(descriptor) || !descriptor.StartsWith('('))
return result;
var i = 1; // Skip opening paren
while (i < descriptor.Length && descriptor[i] != ')')
{
var (typeName, newIndex) = ParseTypeDescriptor(descriptor, i);
if (!string.IsNullOrEmpty(typeName))
{
result.Add(typeName);
}
i = newIndex;
}
return result;
}
private static (string typeName, int newIndex) ParseTypeDescriptor(string descriptor, int index)
{
if (index >= descriptor.Length)
return (string.Empty, index);
var c = descriptor[index];
return c switch
{
'B' => ("byte", index + 1),
'C' => ("char", index + 1),
'D' => ("double", index + 1),
'F' => ("float", index + 1),
'I' => ("int", index + 1),
'J' => ("long", index + 1),
'S' => ("short", index + 1),
'Z' => ("boolean", index + 1),
'V' => ("void", index + 1),
'[' => ParseArrayDescriptor(descriptor, index),
'L' => ParseObjectDescriptor(descriptor, index),
_ => (string.Empty, index + 1)
};
}
private static (string typeName, int newIndex) ParseArrayDescriptor(string descriptor, int index)
{
var (elementType, newIndex) = ParseTypeDescriptor(descriptor, index + 1);
return ($"{elementType}[]", newIndex);
}
private static (string typeName, int newIndex) ParseObjectDescriptor(string descriptor, int index)
{
var semicolonIndex = descriptor.IndexOf(';', index);
if (semicolonIndex < 0)
return ("Object", index + 1);
var className = descriptor[(index + 1)..semicolonIndex];
var simpleName = className.Split('/')[^1];
return (simpleName, semicolonIndex + 1);
}
}

View File

@@ -0,0 +1,149 @@
// -----------------------------------------------------------------------------
// NodeMethodKeyBuilder.cs
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-012)
// Description: Method key builder for Node.js/npm packages.
// -----------------------------------------------------------------------------
using System.Text;
using System.Text.RegularExpressions;
namespace StellaOps.Scanner.VulnSurfaces.MethodKeys;
/// <summary>
/// Builds normalized method keys for JavaScript/Node.js modules.
/// Format: module.path::functionName(param1,param2) or module.path.ClassName::methodName(params)
/// </summary>
public sealed partial class NodeMethodKeyBuilder : IMethodKeyBuilder
{
// Pattern: module.path[.ClassName]::methodName(params)
[GeneratedRegex(@"^([^:]+)::([^(]+)\(([^)]*)\)$", RegexOptions.Compiled)]
private static partial Regex MethodKeyPattern();
/// <inheritdoc />
public string Ecosystem => "npm";
/// <inheritdoc />
public string BuildKey(MethodKeyRequest request)
{
ArgumentNullException.ThrowIfNull(request);
var sb = new StringBuilder();
// Module path
if (!string.IsNullOrEmpty(request.Namespace))
{
sb.Append(NormalizeModulePath(request.Namespace));
}
// Class name (if any)
if (!string.IsNullOrEmpty(request.TypeName))
{
if (sb.Length > 0)
{
sb.Append('.');
}
sb.Append(request.TypeName);
}
// ::functionName
sb.Append("::");
sb.Append(request.MethodName);
// (params)
sb.Append('(');
if (request.ParameterTypes is { Count: > 0 })
{
sb.Append(string.Join(",", request.ParameterTypes));
}
sb.Append(')');
return sb.ToString();
}
/// <inheritdoc />
public MethodKeyComponents? ParseKey(string methodKey)
{
if (string.IsNullOrEmpty(methodKey))
return null;
var match = MethodKeyPattern().Match(methodKey);
if (!match.Success)
return null;
var modulePath = match.Groups[1].Value;
var methodName = match.Groups[2].Value;
var parameters = match.Groups[3].Value;
// Try to extract class name from module path
string? typeName = null;
var lastDot = modulePath.LastIndexOf('.');
if (lastDot > 0)
{
var lastPart = modulePath[(lastDot + 1)..];
// Check if it looks like a class name (starts with uppercase)
if (char.IsUpper(lastPart[0]))
{
typeName = lastPart;
modulePath = modulePath[..lastDot];
}
}
var paramTypes = string.IsNullOrEmpty(parameters)
? []
: parameters.Split(',').Select(p => p.Trim()).ToList();
return new MethodKeyComponents
{
Namespace = modulePath,
TypeName = typeName,
MethodName = methodName,
ParameterTypes = paramTypes
};
}
/// <inheritdoc />
public string NormalizeKey(string methodKey)
{
var components = ParseKey(methodKey);
if (components is null)
return methodKey;
return BuildKey(new MethodKeyRequest
{
Namespace = components.Namespace,
TypeName = components.TypeName,
MethodName = components.MethodName,
ParameterTypes = components.ParameterTypes?.ToList()
});
}
private static string NormalizeModulePath(string path)
{
// Normalize path separators and common patterns
var normalized = path
.Replace('/', '.')
.Replace('\\', '.')
.Replace("..", ".");
// Remove leading/trailing dots
normalized = normalized.Trim('.');
// Remove 'index' from module paths
if (normalized.EndsWith(".index", StringComparison.OrdinalIgnoreCase))
{
normalized = normalized[..^6];
}
// Remove common prefixes like 'src.' or 'lib.'
foreach (var prefix in new[] { "src.", "lib.", "dist." })
{
if (normalized.StartsWith(prefix, StringComparison.OrdinalIgnoreCase))
{
normalized = normalized[prefix.Length..];
break;
}
}
return normalized;
}
}

View File

@@ -0,0 +1,165 @@
// -----------------------------------------------------------------------------
// PythonMethodKeyBuilder.cs
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-012)
// Description: Method key builder for Python/PyPI packages.
// -----------------------------------------------------------------------------
using System.Text;
using System.Text.RegularExpressions;
namespace StellaOps.Scanner.VulnSurfaces.MethodKeys;
/// <summary>
/// Builds normalized method keys for Python modules.
/// Format: package.module.ClassName::method_name(param1,param2) or package.module::function_name(params)
/// </summary>
public sealed partial class PythonMethodKeyBuilder : IMethodKeyBuilder
{
// Pattern: module.path[.ClassName]::function_name(params)
[GeneratedRegex(@"^([^:]+)::([^(]+)\(([^)]*)\)$", RegexOptions.Compiled)]
private static partial Regex MethodKeyPattern();
/// <inheritdoc />
public string Ecosystem => "pypi";
/// <inheritdoc />
public string BuildKey(MethodKeyRequest request)
{
ArgumentNullException.ThrowIfNull(request);
var sb = new StringBuilder();
// Module path
if (!string.IsNullOrEmpty(request.Namespace))
{
sb.Append(NormalizeModulePath(request.Namespace));
}
// Class name (if any)
if (!string.IsNullOrEmpty(request.TypeName))
{
if (sb.Length > 0)
{
sb.Append('.');
}
sb.Append(request.TypeName);
}
// ::function_name
sb.Append("::");
sb.Append(NormalizeFunctionName(request.MethodName));
// (params) - just param names for Python
sb.Append('(');
if (request.ParameterTypes is { Count: > 0 })
{
sb.Append(string.Join(",", request.ParameterTypes));
}
sb.Append(')');
return sb.ToString();
}
/// <inheritdoc />
public MethodKeyComponents? ParseKey(string methodKey)
{
if (string.IsNullOrEmpty(methodKey))
return null;
var match = MethodKeyPattern().Match(methodKey);
if (!match.Success)
return null;
var modulePath = match.Groups[1].Value;
var functionName = match.Groups[2].Value;
var parameters = match.Groups[3].Value;
// Try to extract class name from module path
string? typeName = null;
var lastDot = modulePath.LastIndexOf('.');
if (lastDot > 0)
{
var lastPart = modulePath[(lastDot + 1)..];
// Check if it looks like a class name (starts with uppercase)
if (lastPart.Length > 0 && char.IsUpper(lastPart[0]))
{
typeName = lastPart;
modulePath = modulePath[..lastDot];
}
}
var paramNames = string.IsNullOrEmpty(parameters)
? []
: parameters.Split(',').Select(p => p.Trim()).ToList();
return new MethodKeyComponents
{
Namespace = modulePath,
TypeName = typeName,
MethodName = functionName,
ParameterTypes = paramNames
};
}
/// <inheritdoc />
public string NormalizeKey(string methodKey)
{
var components = ParseKey(methodKey);
if (components is null)
return methodKey;
return BuildKey(new MethodKeyRequest
{
Namespace = components.Namespace,
TypeName = components.TypeName,
MethodName = components.MethodName,
ParameterTypes = components.ParameterTypes?.ToList()
});
}
private static string NormalizeModulePath(string path)
{
// Python module paths use dots
var normalized = path
.Replace('/', '.')
.Replace('\\', '.')
.Replace("..", ".");
// Remove leading/trailing dots
normalized = normalized.Trim('.');
// Remove __init__ from module paths
if (normalized.EndsWith(".__init__", StringComparison.OrdinalIgnoreCase))
{
normalized = normalized[..^9];
}
// Normalize common variations
normalized = normalized
.Replace("_", "_"); // Keep underscores as-is
return normalized;
}
private static string NormalizeFunctionName(string name)
{
// Python method names
return name switch
{
"__init__" => "__init__",
"__new__" => "__new__",
"__del__" => "__del__",
"__str__" => "__str__",
"__repr__" => "__repr__",
"__call__" => "__call__",
"__getitem__" => "__getitem__",
"__setitem__" => "__setitem__",
"__len__" => "__len__",
"__iter__" => "__iter__",
"__next__" => "__next__",
"__enter__" => "__enter__",
"__exit__" => "__exit__",
_ => name
};
}
}

View File

@@ -15,6 +15,7 @@
<PackageReference Include="Microsoft.Extensions.Options" Version="10.0.0" />
<PackageReference Include="Mono.Cecil" Version="0.11.6" />
<PackageReference Include="Npgsql" Version="9.0.3" />
<PackageReference Include="SharpCompress" Version="0.41.0" />
</ItemGroup>
<ItemGroup>