sprints work
This commit is contained in:
@@ -0,0 +1,350 @@
|
||||
// <copyright file="FunctionBoundaryDetector.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using System.Collections.Immutable;
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.Reachability.Core.Symbols;
|
||||
|
||||
namespace StellaOps.Reachability.Core.CveMapping;
|
||||
|
||||
/// <summary>
|
||||
/// Detects function boundaries in source code from diff context.
|
||||
/// Sprint: SPRINT_20260109_009_003 Task: Implement FunctionBoundaryDetector
|
||||
/// </summary>
|
||||
public sealed partial class FunctionBoundaryDetector
|
||||
{
|
||||
// C#/Java/TypeScript patterns
|
||||
[GeneratedRegex(@"^\s*(?:public|private|protected|internal|static|async|override|virtual|sealed|abstract|\s)*\s*(?:\w+(?:<[^>]+>)?)\s+(\w+)\s*\([^)]*\)\s*(?::\s*\w+)?\s*{?")]
|
||||
private static partial Regex CSharpMethodRegex();
|
||||
|
||||
// Python patterns
|
||||
[GeneratedRegex(@"^\s*(?:async\s+)?def\s+(\w+)\s*\([^)]*\)\s*(?:->.*)?:")]
|
||||
private static partial Regex PythonFunctionRegex();
|
||||
|
||||
// Go patterns
|
||||
[GeneratedRegex(@"^\s*func\s+(?:\([^)]+\)\s+)?(\w+)\s*\([^)]*\)")]
|
||||
private static partial Regex GoFunctionRegex();
|
||||
|
||||
// Rust patterns
|
||||
[GeneratedRegex(@"^\s*(?:pub\s+)?(?:async\s+)?fn\s+(\w+)\s*(?:<[^>]+>)?\s*\([^)]*\)")]
|
||||
private static partial Regex RustFunctionRegex();
|
||||
|
||||
// JavaScript/TypeScript patterns
|
||||
[GeneratedRegex(@"^\s*(?:async\s+)?(?:function\s+)?(\w+)\s*(?:=\s*(?:async\s+)?)?(?:\([^)]*\)\s*(?:=>|{)|:\s*\([^)]*\)\s*(?:=>|{))")]
|
||||
private static partial Regex JsFunctionRegex();
|
||||
|
||||
// Ruby patterns
|
||||
[GeneratedRegex(@"^\s*def\s+(\w+(?:\?|!)?)")]
|
||||
private static partial Regex RubyMethodRegex();
|
||||
|
||||
// PHP patterns
|
||||
[GeneratedRegex(@"^\s*(?:public|private|protected|static|\s)*function\s+(\w+)\s*\(")]
|
||||
private static partial Regex PhpFunctionRegex();
|
||||
|
||||
// C/C++ patterns
|
||||
[GeneratedRegex(@"^\s*(?:\w+(?:\s*[*&])?\s+)+(\w+)\s*\([^)]*\)\s*(?:const)?\s*{?")]
|
||||
private static partial Regex CFunctionRegex();
|
||||
|
||||
// Class patterns for fully-qualified names
|
||||
[GeneratedRegex(@"^\s*(?:public|private|protected|internal|sealed|abstract|static|\s)*(?:class|struct|interface|enum)\s+(\w+)")]
|
||||
private static partial Regex ClassDeclarationRegex();
|
||||
|
||||
[GeneratedRegex(@"^\s*(?:namespace|package)\s+([\w.]+)")]
|
||||
private static partial Regex NamespaceRegex();
|
||||
|
||||
/// <summary>
|
||||
/// Detects the function containing a specific line number.
|
||||
/// </summary>
|
||||
/// <param name="contextLines">Context lines from the diff.</param>
|
||||
/// <param name="targetLine">Target line number to find function for.</param>
|
||||
/// <param name="language">Programming language.</param>
|
||||
/// <returns>Function boundary if found, null otherwise.</returns>
|
||||
public FunctionBoundary? DetectFunction(
|
||||
ImmutableArray<string> contextLines,
|
||||
int targetLine,
|
||||
ProgrammingLanguage language)
|
||||
{
|
||||
if (contextLines.IsDefaultOrEmpty)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var functionRegex = GetFunctionRegex(language);
|
||||
if (functionRegex is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Search backwards from target for function declaration
|
||||
string? functionName = null;
|
||||
var functionStartLine = 0;
|
||||
var namespaceOrPackage = string.Empty;
|
||||
var className = string.Empty;
|
||||
|
||||
// First pass: find namespace/package and class
|
||||
for (var i = 0; i < contextLines.Length; i++)
|
||||
{
|
||||
var line = contextLines[i];
|
||||
|
||||
var nsMatch = NamespaceRegex().Match(line);
|
||||
if (nsMatch.Success)
|
||||
{
|
||||
namespaceOrPackage = nsMatch.Groups[1].Value;
|
||||
}
|
||||
|
||||
var classMatch = ClassDeclarationRegex().Match(line);
|
||||
if (classMatch.Success)
|
||||
{
|
||||
className = classMatch.Groups[1].Value;
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass: find function containing target line
|
||||
var braceDepth = 0;
|
||||
var inFunction = false;
|
||||
|
||||
for (var i = 0; i < contextLines.Length; i++)
|
||||
{
|
||||
var line = contextLines[i];
|
||||
var lineNumber = i + 1; // 1-based
|
||||
|
||||
// Check for function declaration
|
||||
var funcMatch = functionRegex.Match(line);
|
||||
if (funcMatch.Success)
|
||||
{
|
||||
functionName = funcMatch.Groups[1].Value;
|
||||
functionStartLine = lineNumber;
|
||||
inFunction = true;
|
||||
braceDepth = CountBraces(line);
|
||||
}
|
||||
else if (inFunction)
|
||||
{
|
||||
braceDepth += CountBraces(line);
|
||||
|
||||
// For brace-based languages, end at brace depth 0
|
||||
if (braceDepth <= 0 && !IsBracelessLanguage(language))
|
||||
{
|
||||
if (lineNumber >= targetLine && functionName is not null)
|
||||
{
|
||||
return new FunctionBoundary(
|
||||
BuildFullyQualifiedName(namespaceOrPackage, className, functionName),
|
||||
functionStartLine,
|
||||
lineNumber);
|
||||
}
|
||||
inFunction = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if we've found the function containing our target line
|
||||
if (inFunction && lineNumber >= targetLine && functionName is not null)
|
||||
{
|
||||
// Estimate end line (use remaining context or a reasonable default)
|
||||
var endLine = EstimateFunctionEnd(contextLines, i, language);
|
||||
return new FunctionBoundary(
|
||||
BuildFullyQualifiedName(namespaceOrPackage, className, functionName),
|
||||
functionStartLine,
|
||||
endLine);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Detects all functions in the given source context.
|
||||
/// </summary>
|
||||
public ImmutableArray<FunctionBoundary> DetectAllFunctions(
|
||||
ImmutableArray<string> contextLines,
|
||||
ProgrammingLanguage language)
|
||||
{
|
||||
if (contextLines.IsDefaultOrEmpty)
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
var functionRegex = GetFunctionRegex(language);
|
||||
if (functionRegex is null)
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
var functions = new List<FunctionBoundary>();
|
||||
var namespaceOrPackage = string.Empty;
|
||||
var className = string.Empty;
|
||||
|
||||
for (var i = 0; i < contextLines.Length; i++)
|
||||
{
|
||||
var line = contextLines[i];
|
||||
|
||||
var nsMatch = NamespaceRegex().Match(line);
|
||||
if (nsMatch.Success)
|
||||
{
|
||||
namespaceOrPackage = nsMatch.Groups[1].Value;
|
||||
}
|
||||
|
||||
var classMatch = ClassDeclarationRegex().Match(line);
|
||||
if (classMatch.Success)
|
||||
{
|
||||
className = classMatch.Groups[1].Value;
|
||||
}
|
||||
|
||||
var funcMatch = functionRegex.Match(line);
|
||||
if (funcMatch.Success)
|
||||
{
|
||||
var functionName = funcMatch.Groups[1].Value;
|
||||
var startLine = i + 1;
|
||||
var endLine = EstimateFunctionEnd(contextLines, i, language);
|
||||
|
||||
functions.Add(new FunctionBoundary(
|
||||
BuildFullyQualifiedName(namespaceOrPackage, className, functionName),
|
||||
startLine,
|
||||
endLine));
|
||||
}
|
||||
}
|
||||
|
||||
return [.. functions];
|
||||
}
|
||||
|
||||
private static Regex? GetFunctionRegex(ProgrammingLanguage language)
|
||||
{
|
||||
return language switch
|
||||
{
|
||||
ProgrammingLanguage.CSharp => CSharpMethodRegex(),
|
||||
ProgrammingLanguage.Java => CSharpMethodRegex(), // Similar syntax
|
||||
ProgrammingLanguage.Kotlin => CSharpMethodRegex(), // Similar syntax
|
||||
ProgrammingLanguage.Python => PythonFunctionRegex(),
|
||||
ProgrammingLanguage.Go => GoFunctionRegex(),
|
||||
ProgrammingLanguage.Rust => RustFunctionRegex(),
|
||||
ProgrammingLanguage.JavaScript => JsFunctionRegex(),
|
||||
ProgrammingLanguage.TypeScript => JsFunctionRegex(),
|
||||
ProgrammingLanguage.Ruby => RubyMethodRegex(),
|
||||
ProgrammingLanguage.Php => PhpFunctionRegex(),
|
||||
ProgrammingLanguage.C => CFunctionRegex(),
|
||||
ProgrammingLanguage.Cpp => CFunctionRegex(),
|
||||
ProgrammingLanguage.Swift => CSharpMethodRegex(), // Similar syntax
|
||||
ProgrammingLanguage.Scala => CSharpMethodRegex(), // Similar syntax
|
||||
_ => null
|
||||
};
|
||||
}
|
||||
|
||||
private static bool IsBracelessLanguage(ProgrammingLanguage language)
|
||||
{
|
||||
return language is ProgrammingLanguage.Python or ProgrammingLanguage.Ruby;
|
||||
}
|
||||
|
||||
private static int CountBraces(string line)
|
||||
{
|
||||
var count = 0;
|
||||
var inString = false;
|
||||
var stringChar = '\0';
|
||||
|
||||
for (var i = 0; i < line.Length; i++)
|
||||
{
|
||||
var c = line[i];
|
||||
|
||||
// Handle strings
|
||||
if (!inString && (c is '"' or '\''))
|
||||
{
|
||||
inString = true;
|
||||
stringChar = c;
|
||||
}
|
||||
else if (inString && c == stringChar && (i == 0 || line[i - 1] != '\\'))
|
||||
{
|
||||
inString = false;
|
||||
}
|
||||
|
||||
if (!inString)
|
||||
{
|
||||
if (c == '{') count++;
|
||||
else if (c == '}') count--;
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
private static int EstimateFunctionEnd(
|
||||
ImmutableArray<string> contextLines,
|
||||
int startIndex,
|
||||
ProgrammingLanguage language)
|
||||
{
|
||||
if (IsBracelessLanguage(language))
|
||||
{
|
||||
// For Python/Ruby, estimate based on indentation
|
||||
var startIndent = GetIndentation(contextLines[startIndex]);
|
||||
for (var i = startIndex + 1; i < contextLines.Length; i++)
|
||||
{
|
||||
var line = contextLines[i].TrimEnd();
|
||||
if (string.IsNullOrWhiteSpace(line)) continue;
|
||||
|
||||
var currentIndent = GetIndentation(contextLines[i]);
|
||||
if (currentIndent <= startIndent)
|
||||
{
|
||||
return i; // End at line with same or less indentation
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// For brace-based languages, track brace depth
|
||||
var braceDepth = CountBraces(contextLines[startIndex]);
|
||||
for (var i = startIndex + 1; i < contextLines.Length; i++)
|
||||
{
|
||||
braceDepth += CountBraces(contextLines[i]);
|
||||
if (braceDepth <= 0)
|
||||
{
|
||||
return i + 1; // Include the closing brace line
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Default: use remaining context length
|
||||
return contextLines.Length;
|
||||
}
|
||||
|
||||
private static int GetIndentation(string line)
|
||||
{
|
||||
var count = 0;
|
||||
foreach (var c in line)
|
||||
{
|
||||
if (c == ' ') count++;
|
||||
else if (c == '\t') count += 4; // Assume tab = 4 spaces
|
||||
else break;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
private static string BuildFullyQualifiedName(
|
||||
string namespaceOrPackage,
|
||||
string className,
|
||||
string functionName)
|
||||
{
|
||||
var parts = new List<string>();
|
||||
|
||||
if (!string.IsNullOrEmpty(namespaceOrPackage))
|
||||
{
|
||||
parts.Add(namespaceOrPackage);
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(className))
|
||||
{
|
||||
parts.Add(className);
|
||||
}
|
||||
|
||||
parts.Add(functionName);
|
||||
|
||||
return string.Join(".", parts);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents the boundary of a function in source code.
|
||||
/// </summary>
|
||||
/// <param name="FullyQualifiedName">Fully qualified function name.</param>
|
||||
/// <param name="StartLine">Start line (1-based).</param>
|
||||
/// <param name="EndLine">End line (1-based, inclusive).</param>
|
||||
public readonly record struct FunctionBoundary(
|
||||
string FullyQualifiedName,
|
||||
int StartLine,
|
||||
int EndLine);
|
||||
@@ -0,0 +1,310 @@
|
||||
// <copyright file="GitDiffExtractor.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using System.Collections.Immutable;
|
||||
using System.Diagnostics;
|
||||
using StellaOps.Reachability.Core.Symbols;
|
||||
|
||||
namespace StellaOps.Reachability.Core.CveMapping;
|
||||
|
||||
/// <summary>
|
||||
/// Extracts vulnerable symbols from git diffs.
|
||||
/// Sprint: SPRINT_20260109_009_003 Task: Implement GitDiffExtractor
|
||||
/// </summary>
|
||||
public sealed class GitDiffExtractor : IPatchSymbolExtractor
|
||||
{
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly UnifiedDiffParser _diffParser;
|
||||
private readonly FunctionBoundaryDetector _boundaryDetector;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="GitDiffExtractor"/> class.
|
||||
/// </summary>
|
||||
public GitDiffExtractor(
|
||||
HttpClient httpClient,
|
||||
UnifiedDiffParser diffParser,
|
||||
FunctionBoundaryDetector boundaryDetector)
|
||||
{
|
||||
_httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
|
||||
_diffParser = diffParser ?? throw new ArgumentNullException(nameof(diffParser));
|
||||
_boundaryDetector = boundaryDetector ?? throw new ArgumentNullException(nameof(boundaryDetector));
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<PatchAnalysisResult> ExtractFromCommitUrlAsync(
|
||||
string commitUrl,
|
||||
CancellationToken ct)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrEmpty(commitUrl);
|
||||
|
||||
try
|
||||
{
|
||||
// Parse the commit URL to get the raw diff URL
|
||||
var diffUrl = ConvertToDiffUrl(commitUrl);
|
||||
if (diffUrl is null)
|
||||
{
|
||||
return PatchAnalysisResult.Failed($"Unsupported commit URL format: {commitUrl}");
|
||||
}
|
||||
|
||||
// Fetch the diff content
|
||||
var diffContent = await _httpClient.GetStringAsync(diffUrl, ct).ConfigureAwait(false);
|
||||
|
||||
// Extract the commit SHA from the URL
|
||||
var commitSha = ExtractCommitSha(commitUrl);
|
||||
var repositoryUrl = ExtractRepositoryUrl(commitUrl);
|
||||
|
||||
// Parse and extract symbols
|
||||
var result = await ExtractFromDiffAsync(diffContent, ct).ConfigureAwait(false);
|
||||
|
||||
// Enrich with URL metadata
|
||||
return result with
|
||||
{
|
||||
CommitSha = commitSha,
|
||||
RepositoryUrl = repositoryUrl
|
||||
};
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return PatchAnalysisResult.Failed($"Failed to fetch diff from URL: {ex.Message}");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return PatchAnalysisResult.Failed($"Error extracting from commit URL: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public Task<PatchAnalysisResult> ExtractFromDiffAsync(
|
||||
string diffContent,
|
||||
CancellationToken ct)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrEmpty(diffContent);
|
||||
|
||||
try
|
||||
{
|
||||
// Parse the diff
|
||||
var parsedDiff = _diffParser.Parse(diffContent);
|
||||
|
||||
// Track statistics
|
||||
var modifiedFiles = new List<string>();
|
||||
var symbols = new List<VulnerableSymbol>();
|
||||
var totalLinesAdded = 0;
|
||||
var totalLinesRemoved = 0;
|
||||
|
||||
foreach (var fileDiff in parsedDiff.Files)
|
||||
{
|
||||
modifiedFiles.Add(fileDiff.NewPath ?? fileDiff.OldPath ?? "unknown");
|
||||
totalLinesAdded += fileDiff.Hunks.Sum(h => h.AddedLines.Length);
|
||||
totalLinesRemoved += fileDiff.Hunks.Sum(h => h.RemovedLines.Length);
|
||||
|
||||
// Extract symbols from this file
|
||||
var fileSymbols = ExtractSymbolsFromFile(fileDiff);
|
||||
symbols.AddRange(fileSymbols);
|
||||
}
|
||||
|
||||
return Task.FromResult(PatchAnalysisResult.Successful(
|
||||
symbols,
|
||||
modifiedFiles,
|
||||
totalLinesAdded,
|
||||
totalLinesRemoved));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return Task.FromResult(PatchAnalysisResult.Failed($"Error parsing diff: {ex.Message}"));
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<PatchAnalysisResult> ExtractFromLocalCommitAsync(
|
||||
string repositoryPath,
|
||||
string commitSha,
|
||||
CancellationToken ct)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrEmpty(repositoryPath);
|
||||
ArgumentException.ThrowIfNullOrEmpty(commitSha);
|
||||
|
||||
try
|
||||
{
|
||||
// Run git show to get the diff
|
||||
var startInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = "git",
|
||||
Arguments = $"show --format= -p {commitSha}",
|
||||
WorkingDirectory = repositoryPath,
|
||||
RedirectStandardOutput = true,
|
||||
RedirectStandardError = true,
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true
|
||||
};
|
||||
|
||||
using var process = Process.Start(startInfo);
|
||||
if (process is null)
|
||||
{
|
||||
return PatchAnalysisResult.Failed("Failed to start git process");
|
||||
}
|
||||
|
||||
var diffContent = await process.StandardOutput.ReadToEndAsync(ct).ConfigureAwait(false);
|
||||
var errorOutput = await process.StandardError.ReadToEndAsync(ct).ConfigureAwait(false);
|
||||
|
||||
await process.WaitForExitAsync(ct).ConfigureAwait(false);
|
||||
|
||||
if (process.ExitCode != 0)
|
||||
{
|
||||
return PatchAnalysisResult.Failed($"Git show failed: {errorOutput}");
|
||||
}
|
||||
|
||||
var result = await ExtractFromDiffAsync(diffContent, ct).ConfigureAwait(false);
|
||||
|
||||
return result with
|
||||
{
|
||||
CommitSha = commitSha,
|
||||
RepositoryUrl = repositoryPath
|
||||
};
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return PatchAnalysisResult.Failed($"Error extracting from local commit: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private IEnumerable<VulnerableSymbol> ExtractSymbolsFromFile(FileDiff fileDiff)
|
||||
{
|
||||
var symbols = new List<VulnerableSymbol>();
|
||||
var language = DetectLanguage(fileDiff.NewPath ?? fileDiff.OldPath);
|
||||
|
||||
if (language is null)
|
||||
{
|
||||
// Unknown language, skip symbol extraction
|
||||
return symbols;
|
||||
}
|
||||
|
||||
foreach (var hunk in fileDiff.Hunks)
|
||||
{
|
||||
// Focus on removed lines (these are the vulnerable code being fixed)
|
||||
foreach (var line in hunk.RemovedLines)
|
||||
{
|
||||
// Detect if this line is within a function
|
||||
var functionBoundary = _boundaryDetector.DetectFunction(
|
||||
hunk.Context,
|
||||
line.LineNumber,
|
||||
language.Value);
|
||||
|
||||
if (functionBoundary.HasValue)
|
||||
{
|
||||
var boundary = functionBoundary.Value;
|
||||
// Parse the fully qualified name into components
|
||||
var parts = boundary.FullyQualifiedName.Split('.');
|
||||
var methodName = parts.Length > 0 ? parts[^1] : "unknown";
|
||||
var typeName = parts.Length > 1 ? parts[^2] : "_";
|
||||
var namespaceName = parts.Length > 2
|
||||
? string.Join(".", parts[..^2])
|
||||
: string.Empty;
|
||||
|
||||
var canonicalSymbol = CanonicalSymbol.Create(
|
||||
@namespace: namespaceName,
|
||||
type: typeName,
|
||||
method: methodName,
|
||||
signature: "()",
|
||||
source: SymbolSource.PatchAnalysis,
|
||||
originalSymbol: boundary.FullyQualifiedName);
|
||||
|
||||
symbols.Add(new VulnerableSymbol
|
||||
{
|
||||
Symbol = canonicalSymbol,
|
||||
Type = VulnerabilityType.Sink, // Conservative default
|
||||
Confidence = 0.7, // Patch-based confidence
|
||||
Evidence = $"Modified in fix: line {line.LineNumber}",
|
||||
SourceFile = fileDiff.OldPath,
|
||||
LineRange = new LineRange(boundary.StartLine, boundary.EndLine)
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Deduplicate symbols by name
|
||||
return symbols
|
||||
.GroupBy(s => s.Symbol.DisplayName)
|
||||
.Select(g => g.First());
|
||||
}
|
||||
|
||||
private static ProgrammingLanguage? DetectLanguage(string? filePath)
|
||||
{
|
||||
if (string.IsNullOrEmpty(filePath))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var extension = Path.GetExtension(filePath).ToLowerInvariant();
|
||||
|
||||
return extension switch
|
||||
{
|
||||
".cs" => ProgrammingLanguage.CSharp,
|
||||
".java" => ProgrammingLanguage.Java,
|
||||
".kt" or ".kts" => ProgrammingLanguage.Kotlin,
|
||||
".py" => ProgrammingLanguage.Python,
|
||||
".js" => ProgrammingLanguage.JavaScript,
|
||||
".ts" => ProgrammingLanguage.TypeScript,
|
||||
".go" => ProgrammingLanguage.Go,
|
||||
".rs" => ProgrammingLanguage.Rust,
|
||||
".c" or ".h" => ProgrammingLanguage.C,
|
||||
".cpp" or ".cc" or ".cxx" or ".hpp" => ProgrammingLanguage.Cpp,
|
||||
".rb" => ProgrammingLanguage.Ruby,
|
||||
".php" => ProgrammingLanguage.Php,
|
||||
".swift" => ProgrammingLanguage.Swift,
|
||||
".scala" => ProgrammingLanguage.Scala,
|
||||
_ => null
|
||||
};
|
||||
}
|
||||
|
||||
private static string? ConvertToDiffUrl(string commitUrl)
|
||||
{
|
||||
// GitHub: https://github.com/owner/repo/commit/sha -> https://github.com/owner/repo/commit/sha.diff
|
||||
if (commitUrl.Contains("github.com", StringComparison.OrdinalIgnoreCase) &&
|
||||
commitUrl.Contains("/commit/", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return commitUrl.TrimEnd('/') + ".diff";
|
||||
}
|
||||
|
||||
// GitLab: https://gitlab.com/owner/repo/-/commit/sha -> https://gitlab.com/owner/repo/-/commit/sha.diff
|
||||
if (commitUrl.Contains("gitlab.com", StringComparison.OrdinalIgnoreCase) &&
|
||||
commitUrl.Contains("/commit/", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return commitUrl.TrimEnd('/') + ".diff";
|
||||
}
|
||||
|
||||
// Bitbucket: Different format - not directly supported yet
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string? ExtractCommitSha(string commitUrl)
|
||||
{
|
||||
// Extract SHA from URL like /commit/abc123
|
||||
var commitIndex = commitUrl.LastIndexOf("/commit/", StringComparison.OrdinalIgnoreCase);
|
||||
if (commitIndex < 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var sha = commitUrl[(commitIndex + 8)..];
|
||||
// Remove trailing .diff, query string, etc.
|
||||
var endIndex = sha.IndexOfAny(['.', '?', '#']);
|
||||
if (endIndex > 0)
|
||||
{
|
||||
sha = sha[..endIndex];
|
||||
}
|
||||
|
||||
return sha.Length >= 7 ? sha : null;
|
||||
}
|
||||
|
||||
private static string? ExtractRepositoryUrl(string commitUrl)
|
||||
{
|
||||
var commitIndex = commitUrl.LastIndexOf("/commit/", StringComparison.OrdinalIgnoreCase);
|
||||
if (commitIndex < 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return commitUrl[..commitIndex];
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,527 @@
|
||||
// <copyright file="OsvEnricher.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using System.Collections.Immutable;
|
||||
using System.Net.Http.Json;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using StellaOps.Reachability.Core.Symbols;
|
||||
|
||||
namespace StellaOps.Reachability.Core.CveMapping;
|
||||
|
||||
/// <summary>
|
||||
/// Enriches CVE mappings with data from the OSV database.
|
||||
/// Sprint: SPRINT_20260109_009_003 Task: Implement OsvEnricher
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Uses the OSV.dev API (https://api.osv.dev/) to retrieve vulnerability data.
|
||||
/// Supports querying by vulnerability ID or by package.
|
||||
/// </remarks>
|
||||
public sealed class OsvEnricher : IOsvEnricher
|
||||
{
|
||||
private const string OsvApiBaseUrl = "https://api.osv.dev/v1";
|
||||
private static readonly JsonSerializerOptions JsonOptions = CreateJsonOptions();
|
||||
|
||||
private readonly HttpClient _httpClient;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="OsvEnricher"/> class.
|
||||
/// </summary>
|
||||
public OsvEnricher(HttpClient httpClient)
|
||||
{
|
||||
_httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<OsvEnrichmentResult> EnrichAsync(string cveId, CancellationToken ct)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrEmpty(cveId);
|
||||
|
||||
var vulnerability = await GetVulnerabilityAsync(cveId, ct).ConfigureAwait(false);
|
||||
|
||||
if (vulnerability is null)
|
||||
{
|
||||
return OsvEnrichmentResult.NotFound(cveId);
|
||||
}
|
||||
|
||||
var affectedPurls = ExtractPurls(vulnerability);
|
||||
var symbols = ExtractSymbols(vulnerability);
|
||||
var affectedVersions = ExtractAffectedVersions(vulnerability);
|
||||
|
||||
return new OsvEnrichmentResult
|
||||
{
|
||||
CveId = cveId,
|
||||
Found = true,
|
||||
OsvId = vulnerability.Id,
|
||||
AffectedPurls = affectedPurls,
|
||||
Symbols = symbols,
|
||||
AffectedVersions = affectedVersions
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<OsvVulnerability?> GetVulnerabilityAsync(string vulnId, CancellationToken ct)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrEmpty(vulnId);
|
||||
|
||||
try
|
||||
{
|
||||
var url = $"{OsvApiBaseUrl}/vulns/{Uri.EscapeDataString(vulnId)}";
|
||||
var response = await _httpClient.GetAsync(url, ct).ConfigureAwait(false);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
if (response.StatusCode == System.Net.HttpStatusCode.NotFound)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
response.EnsureSuccessStatusCode();
|
||||
}
|
||||
|
||||
var apiResponse = await response.Content
|
||||
.ReadFromJsonAsync<OsvApiVulnerability>(JsonOptions, ct)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
return apiResponse is null ? null : MapToOsvVulnerability(apiResponse);
|
||||
}
|
||||
catch (HttpRequestException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<IReadOnlyList<OsvVulnerability>> QueryByPackageAsync(
|
||||
string ecosystem,
|
||||
string packageName,
|
||||
string? version,
|
||||
CancellationToken ct)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrEmpty(ecosystem);
|
||||
ArgumentException.ThrowIfNullOrEmpty(packageName);
|
||||
|
||||
try
|
||||
{
|
||||
var url = $"{OsvApiBaseUrl}/query";
|
||||
var request = new OsvQueryRequest
|
||||
{
|
||||
Package = new OsvQueryPackage
|
||||
{
|
||||
Ecosystem = MapEcosystem(ecosystem),
|
||||
Name = packageName
|
||||
},
|
||||
Version = version
|
||||
};
|
||||
|
||||
var response = await _httpClient.PostAsJsonAsync(url, request, JsonOptions, ct)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
response.EnsureSuccessStatusCode();
|
||||
|
||||
var queryResponse = await response.Content
|
||||
.ReadFromJsonAsync<OsvQueryResponse>(JsonOptions, ct)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (queryResponse?.Vulns is null || queryResponse.Vulns.Length == 0)
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
return queryResponse.Vulns
|
||||
.Select(MapToOsvVulnerability)
|
||||
.ToImmutableArray();
|
||||
}
|
||||
catch (HttpRequestException)
|
||||
{
|
||||
return [];
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
private static ImmutableArray<string> ExtractPurls(OsvVulnerability vulnerability)
|
||||
{
|
||||
var purls = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var affected in vulnerability.Affected)
|
||||
{
|
||||
if (affected.Package?.Purl is not null)
|
||||
{
|
||||
purls.Add(affected.Package.Purl);
|
||||
}
|
||||
else if (affected.Package is not null)
|
||||
{
|
||||
// Build PURL from ecosystem and name
|
||||
var ecosystem = MapEcosystemToPurlType(affected.Package.Ecosystem);
|
||||
var purl = $"pkg:{ecosystem}/{affected.Package.Name}";
|
||||
purls.Add(purl);
|
||||
}
|
||||
}
|
||||
|
||||
return [.. purls];
|
||||
}
|
||||
|
||||
private static ImmutableArray<VulnerableSymbol> ExtractSymbols(OsvVulnerability vulnerability)
|
||||
{
|
||||
var symbols = new List<VulnerableSymbol>();
|
||||
|
||||
foreach (var affected in vulnerability.Affected)
|
||||
{
|
||||
if (affected.EcosystemSpecific is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try to extract function names from ecosystem-specific data
|
||||
// Different ecosystems use different keys
|
||||
var functionNames = ExtractFunctionNames(affected.EcosystemSpecific);
|
||||
|
||||
foreach (var functionName in functionNames)
|
||||
{
|
||||
var canonicalSymbol = CanonicalSymbol.Create(
|
||||
@namespace: string.Empty,
|
||||
type: "_",
|
||||
method: functionName,
|
||||
signature: "()",
|
||||
source: SymbolSource.OsvAdvisory,
|
||||
originalSymbol: functionName);
|
||||
|
||||
symbols.Add(new VulnerableSymbol
|
||||
{
|
||||
Symbol = canonicalSymbol,
|
||||
Type = VulnerabilityType.Sink,
|
||||
Confidence = 0.9, // High confidence from OSV
|
||||
Evidence = $"OSV advisory: {vulnerability.Id}"
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return [.. symbols];
|
||||
}
|
||||
|
||||
private static IReadOnlyList<string> ExtractFunctionNames(
|
||||
ImmutableDictionary<string, object> ecosystemSpecific)
|
||||
{
|
||||
var functions = new List<string>();
|
||||
|
||||
// Common keys used in OSV ecosystem-specific data
|
||||
var functionKeys = new[] { "functions", "vulnerable_functions", "symbols", "affected_functions" };
|
||||
|
||||
foreach (var key in functionKeys)
|
||||
{
|
||||
if (!ecosystemSpecific.TryGetValue(key, out var value))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (value is JsonElement element)
|
||||
{
|
||||
if (element.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var item in element.EnumerateArray())
|
||||
{
|
||||
if (item.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
var funcName = item.GetString();
|
||||
if (!string.IsNullOrEmpty(funcName))
|
||||
{
|
||||
functions.Add(funcName);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (element.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
var funcName = element.GetString();
|
||||
if (!string.IsNullOrEmpty(funcName))
|
||||
{
|
||||
functions.Add(funcName);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return functions;
|
||||
}
|
||||
|
||||
private static ImmutableArray<AffectedVersionRange> ExtractAffectedVersions(OsvVulnerability vulnerability)
|
||||
{
|
||||
var ranges = new List<AffectedVersionRange>();
|
||||
|
||||
foreach (var affected in vulnerability.Affected)
|
||||
{
|
||||
if (affected.Package is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var purl = affected.Package.Purl
|
||||
?? $"pkg:{MapEcosystemToPurlType(affected.Package.Ecosystem)}/{affected.Package.Name}";
|
||||
|
||||
foreach (var range in affected.Ranges)
|
||||
{
|
||||
string? introduced = null;
|
||||
string? fixedVersion = null;
|
||||
string? lastAffected = null;
|
||||
|
||||
foreach (var evt in range.Events)
|
||||
{
|
||||
if (evt.Introduced is not null)
|
||||
{
|
||||
introduced = evt.Introduced;
|
||||
}
|
||||
if (evt.Fixed is not null)
|
||||
{
|
||||
fixedVersion = evt.Fixed;
|
||||
}
|
||||
if (evt.LastAffected is not null)
|
||||
{
|
||||
lastAffected = evt.LastAffected;
|
||||
}
|
||||
}
|
||||
|
||||
ranges.Add(new AffectedVersionRange
|
||||
{
|
||||
Purl = purl,
|
||||
IntroducedVersion = introduced,
|
||||
FixedVersion = fixedVersion,
|
||||
LastAffectedVersion = lastAffected
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return [.. ranges];
|
||||
}
|
||||
|
||||
private static OsvVulnerability MapToOsvVulnerability(OsvApiVulnerability api)
|
||||
{
|
||||
return new OsvVulnerability
|
||||
{
|
||||
Id = api.Id ?? "unknown",
|
||||
Summary = api.Summary,
|
||||
Details = api.Details,
|
||||
Aliases = api.Aliases?.ToImmutableArray() ?? [],
|
||||
Affected = api.Affected?.Select(MapToOsvAffected).ToImmutableArray() ?? [],
|
||||
Severity = api.Severity?.Select(MapToOsvSeverity).ToImmutableArray() ?? [],
|
||||
References = api.References?.Select(MapToOsvReference).ToImmutableArray() ?? []
|
||||
};
|
||||
}
|
||||
|
||||
private static OsvAffected MapToOsvAffected(OsvApiAffected api)
|
||||
{
|
||||
return new OsvAffected
|
||||
{
|
||||
Package = api.Package is null ? null : new OsvPackage
|
||||
{
|
||||
Ecosystem = api.Package.Ecosystem ?? "unknown",
|
||||
Name = api.Package.Name ?? "unknown",
|
||||
Purl = api.Package.Purl
|
||||
},
|
||||
Ranges = api.Ranges?.Select(MapToOsvRange).ToImmutableArray() ?? [],
|
||||
Versions = api.Versions?.ToImmutableArray() ?? [],
|
||||
EcosystemSpecific = api.EcosystemSpecific?.ToImmutableDictionary()
|
||||
};
|
||||
}
|
||||
|
||||
private static OsvRange MapToOsvRange(OsvApiRange api)
|
||||
{
|
||||
return new OsvRange
|
||||
{
|
||||
Type = api.Type ?? "SEMVER",
|
||||
Events = api.Events?.Select(e => new OsvEvent
|
||||
{
|
||||
Introduced = e.Introduced,
|
||||
Fixed = e.Fixed,
|
||||
LastAffected = e.LastAffected
|
||||
}).ToImmutableArray() ?? []
|
||||
};
|
||||
}
|
||||
|
||||
private static OsvSeverity MapToOsvSeverity(OsvApiSeverity api)
|
||||
{
|
||||
return new OsvSeverity
|
||||
{
|
||||
Type = api.Type ?? "CVSS_V3",
|
||||
Score = api.Score ?? "0.0"
|
||||
};
|
||||
}
|
||||
|
||||
private static OsvReference MapToOsvReference(OsvApiReference api)
|
||||
{
|
||||
return new OsvReference
|
||||
{
|
||||
Type = api.Type ?? "WEB",
|
||||
Url = api.Url ?? string.Empty
|
||||
};
|
||||
}
|
||||
|
||||
private static string MapEcosystem(string ecosystem)
|
||||
{
|
||||
// Map common ecosystem names to OSV format
|
||||
return ecosystem.ToUpperInvariant() switch
|
||||
{
|
||||
"NPM" => "npm",
|
||||
"PYPI" => "PyPI",
|
||||
"MAVEN" => "Maven",
|
||||
"NUGET" => "NuGet",
|
||||
"GO" => "Go",
|
||||
"CRATES.IO" or "CARGO" => "crates.io",
|
||||
"RUBYGEMS" => "RubyGems",
|
||||
"PACKAGIST" => "Packagist",
|
||||
"HEX" => "Hex",
|
||||
"PUB" => "Pub",
|
||||
_ => ecosystem
|
||||
};
|
||||
}
|
||||
|
||||
private static string MapEcosystemToPurlType(string ecosystem)
|
||||
{
|
||||
return ecosystem.ToLowerInvariant() switch
|
||||
{
|
||||
"npm" => "npm",
|
||||
"pypi" => "pypi",
|
||||
"maven" => "maven",
|
||||
"nuget" => "nuget",
|
||||
"go" => "golang",
|
||||
"crates.io" => "cargo",
|
||||
"rubygems" => "gem",
|
||||
"packagist" => "composer",
|
||||
"hex" => "hex",
|
||||
"pub" => "pub",
|
||||
_ => ecosystem.ToLowerInvariant()
|
||||
};
|
||||
}
|
||||
|
||||
private static JsonSerializerOptions CreateJsonOptions()
|
||||
{
|
||||
return new JsonSerializerOptions
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
|
||||
PropertyNameCaseInsensitive = true,
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
|
||||
};
|
||||
}
|
||||
|
||||
// API request/response models
|
||||
private sealed class OsvQueryRequest
|
||||
{
|
||||
[JsonPropertyName("package")]
|
||||
public OsvQueryPackage? Package { get; set; }
|
||||
|
||||
[JsonPropertyName("version")]
|
||||
public string? Version { get; set; }
|
||||
}
|
||||
|
||||
private sealed class OsvQueryPackage
|
||||
{
|
||||
[JsonPropertyName("ecosystem")]
|
||||
public string? Ecosystem { get; set; }
|
||||
|
||||
[JsonPropertyName("name")]
|
||||
public string? Name { get; set; }
|
||||
}
|
||||
|
||||
private sealed class OsvQueryResponse
|
||||
{
|
||||
[JsonPropertyName("vulns")]
|
||||
public OsvApiVulnerability[]? Vulns { get; set; }
|
||||
}
|
||||
|
||||
private sealed class OsvApiVulnerability
|
||||
{
|
||||
[JsonPropertyName("id")]
|
||||
public string? Id { get; set; }
|
||||
|
||||
[JsonPropertyName("summary")]
|
||||
public string? Summary { get; set; }
|
||||
|
||||
[JsonPropertyName("details")]
|
||||
public string? Details { get; set; }
|
||||
|
||||
[JsonPropertyName("aliases")]
|
||||
public string[]? Aliases { get; set; }
|
||||
|
||||
[JsonPropertyName("affected")]
|
||||
public OsvApiAffected[]? Affected { get; set; }
|
||||
|
||||
[JsonPropertyName("severity")]
|
||||
public OsvApiSeverity[]? Severity { get; set; }
|
||||
|
||||
[JsonPropertyName("references")]
|
||||
public OsvApiReference[]? References { get; set; }
|
||||
}
|
||||
|
||||
private sealed class OsvApiAffected
|
||||
{
|
||||
[JsonPropertyName("package")]
|
||||
public OsvApiPackage? Package { get; set; }
|
||||
|
||||
[JsonPropertyName("ranges")]
|
||||
public OsvApiRange[]? Ranges { get; set; }
|
||||
|
||||
[JsonPropertyName("versions")]
|
||||
public string[]? Versions { get; set; }
|
||||
|
||||
[JsonPropertyName("ecosystem_specific")]
|
||||
public Dictionary<string, object>? EcosystemSpecific { get; set; }
|
||||
}
|
||||
|
||||
private sealed class OsvApiPackage
|
||||
{
|
||||
[JsonPropertyName("ecosystem")]
|
||||
public string? Ecosystem { get; set; }
|
||||
|
||||
[JsonPropertyName("name")]
|
||||
public string? Name { get; set; }
|
||||
|
||||
[JsonPropertyName("purl")]
|
||||
public string? Purl { get; set; }
|
||||
}
|
||||
|
||||
private sealed class OsvApiRange
|
||||
{
|
||||
[JsonPropertyName("type")]
|
||||
public string? Type { get; set; }
|
||||
|
||||
[JsonPropertyName("events")]
|
||||
public OsvApiEvent[]? Events { get; set; }
|
||||
}
|
||||
|
||||
private sealed class OsvApiEvent
|
||||
{
|
||||
[JsonPropertyName("introduced")]
|
||||
public string? Introduced { get; set; }
|
||||
|
||||
[JsonPropertyName("fixed")]
|
||||
public string? Fixed { get; set; }
|
||||
|
||||
[JsonPropertyName("last_affected")]
|
||||
public string? LastAffected { get; set; }
|
||||
}
|
||||
|
||||
private sealed class OsvApiSeverity
|
||||
{
|
||||
[JsonPropertyName("type")]
|
||||
public string? Type { get; set; }
|
||||
|
||||
[JsonPropertyName("score")]
|
||||
public string? Score { get; set; }
|
||||
}
|
||||
|
||||
private sealed class OsvApiReference
|
||||
{
|
||||
[JsonPropertyName("type")]
|
||||
public string? Type { get; set; }
|
||||
|
||||
[JsonPropertyName("url")]
|
||||
public string? Url { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,300 @@
|
||||
// <copyright file="UnifiedDiffParser.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using System.Collections.Immutable;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.Reachability.Core.CveMapping;
|
||||
|
||||
/// <summary>
|
||||
/// Parses unified diff format (git diff, patch files).
|
||||
/// Sprint: SPRINT_20260109_009_003 Task: Implement UnifiedDiffParser
|
||||
/// </summary>
|
||||
public sealed partial class UnifiedDiffParser
|
||||
{
|
||||
// Regex patterns for parsing
|
||||
[GeneratedRegex(@"^diff --git a/(.+) b/(.+)$")]
|
||||
private static partial Regex DiffHeaderRegex();
|
||||
|
||||
[GeneratedRegex(@"^--- (?:a/)?(.+)$")]
|
||||
private static partial Regex OldFileRegex();
|
||||
|
||||
[GeneratedRegex(@"^\+\+\+ (?:b/)?(.+)$")]
|
||||
private static partial Regex NewFileRegex();
|
||||
|
||||
[GeneratedRegex(@"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)$")]
|
||||
private static partial Regex HunkHeaderRegex();
|
||||
|
||||
/// <summary>
|
||||
/// Parses unified diff content.
|
||||
/// </summary>
|
||||
/// <param name="diffContent">Raw diff content.</param>
|
||||
/// <returns>Parsed diff structure.</returns>
|
||||
public ParsedDiff Parse(string diffContent)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrEmpty(diffContent);
|
||||
|
||||
var files = new List<FileDiff>();
|
||||
var lines = diffContent.Split('\n');
|
||||
var currentFile = (FileDiff?)null;
|
||||
var currentHunk = (DiffHunk?)null;
|
||||
var contextLines = new List<string>();
|
||||
var addedLines = new List<DiffLine>();
|
||||
var removedLines = new List<DiffLine>();
|
||||
var currentOldLine = 0;
|
||||
var currentNewLine = 0;
|
||||
|
||||
for (var i = 0; i < lines.Length; i++)
|
||||
{
|
||||
var line = lines[i].TrimEnd('\r');
|
||||
|
||||
// Check for new file diff
|
||||
var diffMatch = DiffHeaderRegex().Match(line);
|
||||
if (diffMatch.Success)
|
||||
{
|
||||
// Save previous file and hunk
|
||||
FinalizeHunk(ref currentHunk, ref currentFile, contextLines, addedLines, removedLines);
|
||||
FinalizeFile(ref currentFile, files);
|
||||
|
||||
currentFile = new FileDiff
|
||||
{
|
||||
OldPath = diffMatch.Groups[1].Value,
|
||||
NewPath = diffMatch.Groups[2].Value,
|
||||
Hunks = []
|
||||
};
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for old file path
|
||||
var oldMatch = OldFileRegex().Match(line);
|
||||
if (oldMatch.Success && currentFile is not null)
|
||||
{
|
||||
var path = oldMatch.Groups[1].Value;
|
||||
if (path == "/dev/null")
|
||||
{
|
||||
currentFile = currentFile with { OldPath = null };
|
||||
}
|
||||
else
|
||||
{
|
||||
currentFile = currentFile with { OldPath = path };
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for new file path
|
||||
var newMatch = NewFileRegex().Match(line);
|
||||
if (newMatch.Success && currentFile is not null)
|
||||
{
|
||||
var path = newMatch.Groups[1].Value;
|
||||
if (path == "/dev/null")
|
||||
{
|
||||
currentFile = currentFile with { NewPath = null };
|
||||
}
|
||||
else
|
||||
{
|
||||
currentFile = currentFile with { NewPath = path };
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for hunk header
|
||||
var hunkMatch = HunkHeaderRegex().Match(line);
|
||||
if (hunkMatch.Success && currentFile is not null)
|
||||
{
|
||||
// Save previous hunk
|
||||
FinalizeHunk(ref currentHunk, ref currentFile, contextLines, addedLines, removedLines);
|
||||
|
||||
currentOldLine = int.Parse(hunkMatch.Groups[1].Value, System.Globalization.CultureInfo.InvariantCulture);
|
||||
currentNewLine = int.Parse(hunkMatch.Groups[3].Value, System.Globalization.CultureInfo.InvariantCulture);
|
||||
var funcContext = hunkMatch.Groups[5].Value.Trim();
|
||||
|
||||
currentHunk = new DiffHunk
|
||||
{
|
||||
OldStart = currentOldLine,
|
||||
OldLength = hunkMatch.Groups[2].Success
|
||||
? int.Parse(hunkMatch.Groups[2].Value, System.Globalization.CultureInfo.InvariantCulture)
|
||||
: 1,
|
||||
NewStart = currentNewLine,
|
||||
NewLength = hunkMatch.Groups[4].Success
|
||||
? int.Parse(hunkMatch.Groups[4].Value, System.Globalization.CultureInfo.InvariantCulture)
|
||||
: 1,
|
||||
FunctionContext = string.IsNullOrEmpty(funcContext) ? null : funcContext,
|
||||
Context = [],
|
||||
AddedLines = [],
|
||||
RemovedLines = []
|
||||
};
|
||||
|
||||
contextLines.Clear();
|
||||
addedLines.Clear();
|
||||
removedLines.Clear();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Process diff content lines
|
||||
if (currentHunk is not null)
|
||||
{
|
||||
if (line.StartsWith('+'))
|
||||
{
|
||||
addedLines.Add(new DiffLine(currentNewLine, line[1..]));
|
||||
currentNewLine++;
|
||||
}
|
||||
else if (line.StartsWith('-'))
|
||||
{
|
||||
removedLines.Add(new DiffLine(currentOldLine, line[1..]));
|
||||
currentOldLine++;
|
||||
}
|
||||
else if (line.StartsWith(' ') || line.Length == 0)
|
||||
{
|
||||
var content = line.Length > 0 ? line[1..] : string.Empty;
|
||||
contextLines.Add(content);
|
||||
currentOldLine++;
|
||||
currentNewLine++;
|
||||
}
|
||||
// Ignore other lines (like "\ No newline at end of file")
|
||||
}
|
||||
}
|
||||
|
||||
// Finalize last hunk and file
|
||||
FinalizeHunk(ref currentHunk, ref currentFile, contextLines, addedLines, removedLines);
|
||||
FinalizeFile(ref currentFile, files);
|
||||
|
||||
return new ParsedDiff { Files = [.. files] };
|
||||
}
|
||||
|
||||
private static void FinalizeHunk(
|
||||
ref DiffHunk? currentHunk,
|
||||
ref FileDiff? currentFile,
|
||||
List<string> contextLines,
|
||||
List<DiffLine> addedLines,
|
||||
List<DiffLine> removedLines)
|
||||
{
|
||||
if (currentHunk is null || currentFile is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
currentHunk = currentHunk with
|
||||
{
|
||||
Context = [.. contextLines],
|
||||
AddedLines = [.. addedLines],
|
||||
RemovedLines = [.. removedLines]
|
||||
};
|
||||
|
||||
currentFile = currentFile with
|
||||
{
|
||||
Hunks = currentFile.Hunks.Add(currentHunk)
|
||||
};
|
||||
|
||||
currentHunk = null;
|
||||
}
|
||||
|
||||
private static void FinalizeFile(ref FileDiff? currentFile, List<FileDiff> files)
|
||||
{
|
||||
if (currentFile is not null)
|
||||
{
|
||||
files.Add(currentFile);
|
||||
currentFile = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a parsed unified diff.
|
||||
/// </summary>
|
||||
public sealed record ParsedDiff
|
||||
{
|
||||
/// <summary>
|
||||
/// Files changed in the diff.
|
||||
/// </summary>
|
||||
public required ImmutableArray<FileDiff> Files { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a single file's diff.
|
||||
/// </summary>
|
||||
public sealed record FileDiff
|
||||
{
|
||||
/// <summary>
|
||||
/// Original file path (before changes).
|
||||
/// </summary>
|
||||
public string? OldPath { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// New file path (after changes).
|
||||
/// </summary>
|
||||
public string? NewPath { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Hunks (change sections) in this file.
|
||||
/// </summary>
|
||||
public required ImmutableArray<DiffHunk> Hunks { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether this is a new file.
|
||||
/// </summary>
|
||||
public bool IsNewFile => OldPath is null || OldPath == "/dev/null";
|
||||
|
||||
/// <summary>
|
||||
/// Whether this file was deleted.
|
||||
/// </summary>
|
||||
public bool IsDeleted => NewPath is null || NewPath == "/dev/null";
|
||||
|
||||
/// <summary>
|
||||
/// Whether this file was renamed.
|
||||
/// </summary>
|
||||
public bool IsRenamed => OldPath != NewPath && !IsNewFile && !IsDeleted;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a hunk (change section) in a diff.
|
||||
/// </summary>
|
||||
public sealed record DiffHunk
|
||||
{
|
||||
/// <summary>
|
||||
/// Starting line in the old file.
|
||||
/// </summary>
|
||||
public required int OldStart { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of lines from the old file.
|
||||
/// </summary>
|
||||
public required int OldLength { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Starting line in the new file.
|
||||
/// </summary>
|
||||
public required int NewStart { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of lines in the new file.
|
||||
/// </summary>
|
||||
public required int NewLength { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Function context from the hunk header (if present).
|
||||
/// </summary>
|
||||
public string? FunctionContext { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Context lines (unchanged).
|
||||
/// </summary>
|
||||
public required ImmutableArray<string> Context { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Lines added in this hunk.
|
||||
/// </summary>
|
||||
public required ImmutableArray<DiffLine> AddedLines { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Lines removed in this hunk.
|
||||
/// </summary>
|
||||
public required ImmutableArray<DiffLine> RemovedLines { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a line in a diff with its line number.
|
||||
/// </summary>
|
||||
/// <param name="LineNumber">Line number in the file.</param>
|
||||
/// <param name="Content">Line content (without +/- prefix).</param>
|
||||
public readonly record struct DiffLine(int LineNumber, string Content);
|
||||
@@ -0,0 +1,550 @@
|
||||
// <copyright file="NativeSymbolNormalizer.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.Reachability.Core.Symbols;
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes native C/C++/Rust symbols from ELF, PE, DWARF, PDB, and eBPF.
|
||||
/// Sprint: SPRINT_20260109_009_002 Task: Implement native normalizer
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Handles mangled names from:
|
||||
/// - Itanium C++ ABI (_Z prefix) - GCC, Clang
|
||||
/// - MSVC C++ mangling (? prefix)
|
||||
/// - Rust mangling (_ZN prefix with hash suffix)
|
||||
/// - Plain C symbols (no mangling)
|
||||
/// </remarks>
|
||||
public sealed partial class NativeSymbolNormalizer : ISymbolNormalizer
|
||||
{
|
||||
private static readonly HashSet<SymbolSource> Sources =
|
||||
[
|
||||
SymbolSource.ElfSymtab,
|
||||
SymbolSource.PeExport,
|
||||
SymbolSource.Dwarf,
|
||||
SymbolSource.Pdb,
|
||||
SymbolSource.EbpfUprobe
|
||||
];
|
||||
|
||||
/// <inheritdoc/>
|
||||
public IReadOnlySet<SymbolSource> SupportedSources => Sources;
|
||||
|
||||
/// <inheritdoc/>
|
||||
public bool CanNormalize(SymbolSource source) => Sources.Contains(source);
|
||||
|
||||
/// <inheritdoc/>
|
||||
public CanonicalSymbol? Normalize(RawSymbol raw)
|
||||
{
|
||||
TryNormalize(raw, out var canonical, out _);
|
||||
return canonical;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public bool TryNormalize(RawSymbol raw, out CanonicalSymbol? canonical, out string? error)
|
||||
{
|
||||
canonical = null;
|
||||
error = null;
|
||||
|
||||
if (string.IsNullOrWhiteSpace(raw.Value))
|
||||
{
|
||||
error = "Symbol value is empty";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Try different native symbol formats
|
||||
if (TryParseItaniumMangled(raw, out canonical))
|
||||
return true;
|
||||
|
||||
if (TryParseMsvcMangled(raw, out canonical))
|
||||
return true;
|
||||
|
||||
if (TryParseRustMangled(raw, out canonical))
|
||||
return true;
|
||||
|
||||
if (TryParsePlainCSymbol(raw, out canonical))
|
||||
return true;
|
||||
|
||||
if (TryParseDwarfSymbol(raw, out canonical))
|
||||
return true;
|
||||
|
||||
error = $"Cannot parse native symbol: {raw.Value}";
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses Itanium C++ ABI mangled names (_Z prefix).
|
||||
/// Example: _ZN4llvm12DenseMapBaseINS_8DenseMapIPKNS_5ValueE...
|
||||
/// </summary>
|
||||
private static bool TryParseItaniumMangled(RawSymbol raw, out CanonicalSymbol? canonical)
|
||||
{
|
||||
canonical = null;
|
||||
|
||||
if (!raw.Value.StartsWith("_Z", StringComparison.Ordinal))
|
||||
return false;
|
||||
|
||||
var demangled = DemangleItanium(raw.Value);
|
||||
if (demangled is null)
|
||||
return false;
|
||||
|
||||
return TryParseDemangled(demangled, raw, out canonical);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses MSVC C++ mangled names (? prefix).
|
||||
/// Example: ?lookup@JndiLookup@@QEAA?AVString@@PEAV1@@Z
|
||||
/// </summary>
|
||||
private static bool TryParseMsvcMangled(RawSymbol raw, out CanonicalSymbol? canonical)
|
||||
{
|
||||
canonical = null;
|
||||
|
||||
if (!raw.Value.StartsWith('?'))
|
||||
return false;
|
||||
|
||||
var demangled = DemangleMsvc(raw.Value);
|
||||
if (demangled is null)
|
||||
return false;
|
||||
|
||||
return TryParseDemangled(demangled, raw, out canonical);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses Rust mangled names (v0 or legacy).
|
||||
/// Example: _ZN4core3ptr85drop_in_place$LT$std..rt..lang_start...
|
||||
/// </summary>
|
||||
private static bool TryParseRustMangled(RawSymbol raw, out CanonicalSymbol? canonical)
|
||||
{
|
||||
canonical = null;
|
||||
|
||||
// Rust v0 mangling starts with _R
|
||||
// Legacy Rust mangling starts with _ZN and has hash suffix
|
||||
if (!raw.Value.StartsWith("_R", StringComparison.Ordinal) &&
|
||||
!(raw.Value.StartsWith("_ZN", StringComparison.Ordinal) && RustHashSuffixRegex().IsMatch(raw.Value)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var demangled = DemangleRust(raw.Value);
|
||||
if (demangled is null)
|
||||
return false;
|
||||
|
||||
return TryParseDemangled(demangled, raw, out canonical);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses plain C symbols (function names without mangling).
|
||||
/// Example: ssl_do_handshake, EVP_EncryptInit_ex
|
||||
/// </summary>
|
||||
private static bool TryParsePlainCSymbol(RawSymbol raw, out CanonicalSymbol? canonical)
|
||||
{
|
||||
canonical = null;
|
||||
|
||||
// Plain C symbols are alphanumeric with underscores, no special prefixes
|
||||
if (!PlainCSymbolRegex().IsMatch(raw.Value))
|
||||
return false;
|
||||
|
||||
// Check it's not a mangled symbol
|
||||
if (raw.Value.StartsWith("_Z", StringComparison.Ordinal) ||
|
||||
raw.Value.StartsWith("_R", StringComparison.Ordinal) ||
|
||||
raw.Value.StartsWith('?'))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Extract namespace from prefixes (e.g., ssl_, EVP_, OPENSSL_)
|
||||
var (ns, method) = ExtractCNamespace(raw.Value);
|
||||
|
||||
canonical = CanonicalSymbol.Create(
|
||||
@namespace: ns,
|
||||
type: "_", // C has no classes
|
||||
method: method,
|
||||
signature: "()", // Unknown signature
|
||||
source: raw.Source,
|
||||
purl: raw.Purl,
|
||||
originalSymbol: raw.Value);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses DWARF debug info format.
|
||||
/// Example: namespace::class::method(params) or file.c:function
|
||||
/// </summary>
|
||||
private static bool TryParseDwarfSymbol(RawSymbol raw, out CanonicalSymbol? canonical)
|
||||
{
|
||||
canonical = null;
|
||||
|
||||
// Pattern: namespace::class::method(params)
|
||||
var match = DwarfCppRegex().Match(raw.Value);
|
||||
if (match.Success)
|
||||
{
|
||||
var qualifiedName = match.Groups["qualified"].Value;
|
||||
var @params = match.Groups["params"].Value;
|
||||
|
||||
var parts = qualifiedName.Split("::");
|
||||
var method = parts[^1];
|
||||
var type = parts.Length > 1 ? parts[^2] : "_";
|
||||
var ns = parts.Length > 2 ? string.Join(".", parts[..^2]) : "_";
|
||||
|
||||
canonical = CanonicalSymbol.Create(
|
||||
@namespace: ns,
|
||||
type: type,
|
||||
method: method,
|
||||
signature: NormalizeNativeParams(@params),
|
||||
source: raw.Source,
|
||||
purl: raw.Purl,
|
||||
originalSymbol: raw.Value);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Pattern: file.c:function (GDB style)
|
||||
var fileMatch = DwarfFileRegex().Match(raw.Value);
|
||||
if (fileMatch.Success)
|
||||
{
|
||||
var file = fileMatch.Groups["file"].Value;
|
||||
var function = fileMatch.Groups["function"].Value;
|
||||
|
||||
// Use filename (without extension) as namespace
|
||||
var ns = Path.GetFileNameWithoutExtension(file).ToLowerInvariant();
|
||||
|
||||
canonical = CanonicalSymbol.Create(
|
||||
@namespace: ns,
|
||||
type: "_",
|
||||
method: function,
|
||||
signature: "()",
|
||||
source: raw.Source,
|
||||
purl: raw.Purl,
|
||||
originalSymbol: raw.Value);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses a demangled C++/Rust symbol.
|
||||
/// </summary>
|
||||
private static bool TryParseDemangled(string demangled, RawSymbol raw, out CanonicalSymbol? canonical)
|
||||
{
|
||||
canonical = null;
|
||||
|
||||
// Pattern: namespace::class::method(params)
|
||||
var match = DemangledCppRegex().Match(demangled);
|
||||
if (!match.Success)
|
||||
{
|
||||
// Try simpler pattern without params
|
||||
match = DemangledSimpleRegex().Match(demangled);
|
||||
if (!match.Success)
|
||||
return false;
|
||||
}
|
||||
|
||||
var qualifiedName = match.Groups["qualified"].Value;
|
||||
var @params = match.Groups.ContainsKey("params") ? match.Groups["params"].Value : "";
|
||||
|
||||
// Split by :: to get namespace, type, method
|
||||
var parts = qualifiedName.Split(new[] { "::" }, StringSplitOptions.RemoveEmptyEntries);
|
||||
if (parts.Length == 0)
|
||||
return false;
|
||||
|
||||
var method = parts[^1];
|
||||
var type = parts.Length > 1 ? parts[^2] : "_";
|
||||
var ns = parts.Length > 2 ? string.Join(".", parts[..^2]) : "_";
|
||||
|
||||
// Handle template specializations - remove angle brackets content
|
||||
method = TemplateRegex().Replace(method, "");
|
||||
type = TemplateRegex().Replace(type, "");
|
||||
|
||||
canonical = CanonicalSymbol.Create(
|
||||
@namespace: ns,
|
||||
type: type,
|
||||
method: method,
|
||||
signature: NormalizeNativeParams(@params),
|
||||
source: raw.Source,
|
||||
purl: raw.Purl,
|
||||
originalSymbol: raw.Value);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Demangling for Itanium ABI (basic implementation).
|
||||
/// Full demangling would require external library or comprehensive parser.
|
||||
/// </summary>
|
||||
private static string? DemangleItanium(string mangled)
|
||||
{
|
||||
// Basic Itanium demangling - parse nested names
|
||||
if (!mangled.StartsWith("_Z", StringComparison.Ordinal))
|
||||
return null;
|
||||
|
||||
var result = new StringBuilder();
|
||||
var pos = 2; // Skip _Z
|
||||
|
||||
// Handle nested names (_ZN...E)
|
||||
if (pos < mangled.Length && mangled[pos] == 'N')
|
||||
{
|
||||
pos++; // Skip N
|
||||
var parts = new List<string>();
|
||||
|
||||
while (pos < mangled.Length && mangled[pos] != 'E')
|
||||
{
|
||||
// Read length-prefixed name
|
||||
var lengthStr = new StringBuilder();
|
||||
while (pos < mangled.Length && char.IsDigit(mangled[pos]))
|
||||
{
|
||||
lengthStr.Append(mangled[pos++]);
|
||||
}
|
||||
|
||||
if (lengthStr.Length == 0)
|
||||
{
|
||||
// Skip qualifiers (K=const, V=volatile, etc.)
|
||||
if (pos < mangled.Length && "KVrO".Contains(mangled[pos]))
|
||||
{
|
||||
pos++;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
var length = int.Parse(lengthStr.ToString(), System.Globalization.CultureInfo.InvariantCulture);
|
||||
if (pos + length > mangled.Length)
|
||||
break;
|
||||
|
||||
parts.Add(mangled.Substring(pos, length));
|
||||
pos += length;
|
||||
}
|
||||
|
||||
if (parts.Count > 0)
|
||||
{
|
||||
result.Append(string.Join("::", parts));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Simple name without nesting
|
||||
var lengthStr = new StringBuilder();
|
||||
while (pos < mangled.Length && char.IsDigit(mangled[pos]))
|
||||
{
|
||||
lengthStr.Append(mangled[pos++]);
|
||||
}
|
||||
|
||||
if (lengthStr.Length > 0)
|
||||
{
|
||||
var length = int.Parse(lengthStr.ToString(), System.Globalization.CultureInfo.InvariantCulture);
|
||||
if (pos + length <= mangled.Length)
|
||||
{
|
||||
result.Append(mangled.Substring(pos, length));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to extract parameters (simplified - just mark as having params)
|
||||
if (result.Length > 0)
|
||||
{
|
||||
return result + "()";
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Demangling for MSVC (basic implementation).
|
||||
/// </summary>
|
||||
private static string? DemangleMsvc(string mangled)
|
||||
{
|
||||
// Basic MSVC demangling
|
||||
if (!mangled.StartsWith('?'))
|
||||
return null;
|
||||
|
||||
// Pattern: ?name@scope1@scope2@@...
|
||||
var match = MsvcMangledRegex().Match(mangled);
|
||||
if (!match.Success)
|
||||
return null;
|
||||
|
||||
var name = match.Groups["name"].Value;
|
||||
var scopes = match.Groups["scopes"].Value;
|
||||
|
||||
// Reverse scope order (MSVC stores innermost first)
|
||||
var scopeParts = scopes.Split('@', StringSplitOptions.RemoveEmptyEntries);
|
||||
Array.Reverse(scopeParts);
|
||||
|
||||
if (scopeParts.Length > 0)
|
||||
{
|
||||
return string.Join("::", scopeParts) + "::" + name + "()";
|
||||
}
|
||||
|
||||
return name + "()";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Demangling for Rust (basic implementation).
|
||||
/// </summary>
|
||||
private static string? DemangleRust(string mangled)
|
||||
{
|
||||
// Rust v0 mangling starts with _R
|
||||
if (mangled.StartsWith("_R", StringComparison.Ordinal))
|
||||
{
|
||||
// v0 mangling is complex - basic extraction
|
||||
return ExtractRustV0Symbol(mangled);
|
||||
}
|
||||
|
||||
// Legacy Rust mangling - similar to Itanium but with hash suffix
|
||||
if (mangled.StartsWith("_ZN", StringComparison.Ordinal))
|
||||
{
|
||||
// Remove hash suffix (17h followed by 16 hex chars)
|
||||
var cleaned = RustHashSuffixRegex().Replace(mangled, "E");
|
||||
return DemangleItanium(cleaned.Replace("_ZN", "_ZN"));
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string? ExtractRustV0Symbol(string mangled)
|
||||
{
|
||||
// Very basic v0 extraction - just try to find readable parts
|
||||
var readable = new StringBuilder();
|
||||
var pos = 2; // Skip _R
|
||||
|
||||
while (pos < mangled.Length)
|
||||
{
|
||||
if (char.IsDigit(mangled[pos]))
|
||||
{
|
||||
var lengthStr = new StringBuilder();
|
||||
while (pos < mangled.Length && char.IsDigit(mangled[pos]))
|
||||
{
|
||||
lengthStr.Append(mangled[pos++]);
|
||||
}
|
||||
|
||||
if (lengthStr.Length > 0 && pos < mangled.Length)
|
||||
{
|
||||
// Skip 'u' prefix for unicode if present
|
||||
if (mangled[pos] == 'u')
|
||||
pos++;
|
||||
|
||||
var length = int.Parse(lengthStr.ToString(), System.Globalization.CultureInfo.InvariantCulture);
|
||||
if (pos + length <= mangled.Length && length > 0 && length < 100)
|
||||
{
|
||||
if (readable.Length > 0)
|
||||
readable.Append("::");
|
||||
readable.Append(mangled.AsSpan(pos, length));
|
||||
pos += length;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
|
||||
return readable.Length > 0 ? readable + "()" : null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts namespace from C function naming conventions.
|
||||
/// </summary>
|
||||
private static (string Namespace, string Method) ExtractCNamespace(string symbol)
|
||||
{
|
||||
// Common C library prefixes
|
||||
var prefixes = new[]
|
||||
{
|
||||
("ssl_", "openssl.ssl"),
|
||||
("SSL_", "openssl.ssl"),
|
||||
("EVP_", "openssl.evp"),
|
||||
("OPENSSL_", "openssl"),
|
||||
("BIO_", "openssl.bio"),
|
||||
("X509_", "openssl.x509"),
|
||||
("RSA_", "openssl.rsa"),
|
||||
("EC_", "openssl.ec"),
|
||||
("curl_", "curl"),
|
||||
("CURL_", "curl"),
|
||||
("sqlite3_", "sqlite3"),
|
||||
("png_", "libpng"),
|
||||
("jpeg_", "libjpeg"),
|
||||
("z_", "zlib"),
|
||||
("inflate", "zlib"),
|
||||
("deflate", "zlib"),
|
||||
("xml", "libxml2"),
|
||||
("XML", "libxml2"),
|
||||
("pthread_", "pthread"),
|
||||
("sem_", "posix"),
|
||||
("shm_", "posix"),
|
||||
("mq_", "posix")
|
||||
};
|
||||
|
||||
foreach (var (prefix, ns) in prefixes)
|
||||
{
|
||||
if (symbol.StartsWith(prefix, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var method = symbol[prefix.Length..];
|
||||
return (ns, method.Length > 0 ? method : symbol);
|
||||
}
|
||||
}
|
||||
|
||||
// No known prefix - use generic namespace
|
||||
return ("native", symbol);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes native parameter list to simplified form.
|
||||
/// </summary>
|
||||
private static string NormalizeNativeParams(string @params)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(@params))
|
||||
return "()";
|
||||
|
||||
// Remove const, volatile, pointer/reference decorations
|
||||
var simplified = @params
|
||||
.Replace("const ", "")
|
||||
.Replace("volatile ", "")
|
||||
.Replace(" const", "")
|
||||
.Replace("*", "")
|
||||
.Replace("&", "")
|
||||
.Replace(" ", " ")
|
||||
.Trim();
|
||||
|
||||
// Extract just type names
|
||||
var types = simplified.Split(',', StringSplitOptions.TrimEntries)
|
||||
.Select(p =>
|
||||
{
|
||||
// Get the last word (type name) from each param
|
||||
var parts = p.Split(' ', StringSplitOptions.RemoveEmptyEntries);
|
||||
if (parts.Length == 0)
|
||||
return "";
|
||||
// Handle namespaced types
|
||||
var typeName = parts[^1];
|
||||
if (typeName.Contains("::"))
|
||||
typeName = typeName.Split("::")[^1];
|
||||
return typeName.ToLowerInvariant();
|
||||
})
|
||||
.Where(t => !string.IsNullOrEmpty(t));
|
||||
|
||||
return $"({string.Join(", ", types)})";
|
||||
}
|
||||
|
||||
// Regex patterns
|
||||
[GeneratedRegex(@"^[a-zA-Z_][a-zA-Z0-9_]*$")]
|
||||
private static partial Regex PlainCSymbolRegex();
|
||||
|
||||
[GeneratedRegex(@"(?<qualified>[\w:]+)\s*\((?<params>[^)]*)\)")]
|
||||
private static partial Regex DwarfCppRegex();
|
||||
|
||||
[GeneratedRegex(@"(?<file>[\w./]+\.[ch]pp?):(?<function>\w+)")]
|
||||
private static partial Regex DwarfFileRegex();
|
||||
|
||||
[GeneratedRegex(@"(?<qualified>[\w:]+)\s*\((?<params>[^)]*)\)")]
|
||||
private static partial Regex DemangledCppRegex();
|
||||
|
||||
[GeneratedRegex(@"^(?<qualified>[\w:]+)$")]
|
||||
private static partial Regex DemangledSimpleRegex();
|
||||
|
||||
[GeneratedRegex(@"<[^>]*>")]
|
||||
private static partial Regex TemplateRegex();
|
||||
|
||||
[GeneratedRegex(@"^\?(?<name>\w+)@(?<scopes>[\w@]+)@@")]
|
||||
private static partial Regex MsvcMangledRegex();
|
||||
|
||||
[GeneratedRegex(@"17h[0-9a-f]{16}E?$")]
|
||||
private static partial Regex RustHashSuffixRegex();
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
// <copyright file="ProgrammingLanguage.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
namespace StellaOps.Reachability.Core.Symbols;
|
||||
|
||||
/// <summary>
|
||||
/// Supported programming languages for symbol canonicalization.
|
||||
/// Sprint: SPRINT_20260109_009_003 Task: Create ProgrammingLanguage enum
|
||||
/// </summary>
|
||||
public enum ProgrammingLanguage
|
||||
{
|
||||
/// <summary>Unknown or unsupported language.</summary>
|
||||
Unknown = 0,
|
||||
|
||||
/// <summary>C# (.cs files).</summary>
|
||||
CSharp = 1,
|
||||
|
||||
/// <summary>Java (.java files).</summary>
|
||||
Java = 2,
|
||||
|
||||
/// <summary>Kotlin (.kt, .kts files).</summary>
|
||||
Kotlin = 3,
|
||||
|
||||
/// <summary>Python (.py files).</summary>
|
||||
Python = 4,
|
||||
|
||||
/// <summary>JavaScript (.js files).</summary>
|
||||
JavaScript = 5,
|
||||
|
||||
/// <summary>TypeScript (.ts files).</summary>
|
||||
TypeScript = 6,
|
||||
|
||||
/// <summary>Go (.go files).</summary>
|
||||
Go = 7,
|
||||
|
||||
/// <summary>Rust (.rs files).</summary>
|
||||
Rust = 8,
|
||||
|
||||
/// <summary>C (.c, .h files).</summary>
|
||||
C = 9,
|
||||
|
||||
/// <summary>C++ (.cpp, .cc, .cxx, .hpp files).</summary>
|
||||
Cpp = 10,
|
||||
|
||||
/// <summary>Ruby (.rb files).</summary>
|
||||
Ruby = 11,
|
||||
|
||||
/// <summary>PHP (.php files).</summary>
|
||||
Php = 12,
|
||||
|
||||
/// <summary>Swift (.swift files).</summary>
|
||||
Swift = 13,
|
||||
|
||||
/// <summary>Scala (.scala files).</summary>
|
||||
Scala = 14,
|
||||
|
||||
/// <summary>Objective-C (.m, .mm files).</summary>
|
||||
ObjectiveC = 15,
|
||||
|
||||
/// <summary>Elixir (.ex, .exs files).</summary>
|
||||
Elixir = 16,
|
||||
|
||||
/// <summary>Erlang (.erl files).</summary>
|
||||
Erlang = 17
|
||||
}
|
||||
@@ -0,0 +1,453 @@
|
||||
// <copyright file="ScriptSymbolNormalizer.cs" company="StellaOps">
|
||||
// Copyright (c) StellaOps. Licensed under the AGPL-3.0-or-later.
|
||||
// </copyright>
|
||||
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.Reachability.Core.Symbols;
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes script language symbols from V8 (JS), Python, and PHP.
|
||||
/// Sprint: SPRINT_20260109_009_002 Task: Implement script normalizer
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Handles symbols from:
|
||||
/// - V8 profiler (Node.js) - stack frames
|
||||
/// - Python sys.settrace - function/method traces
|
||||
/// - PHP Xdebug - profiler output
|
||||
/// </remarks>
|
||||
public sealed partial class ScriptSymbolNormalizer : ISymbolNormalizer
|
||||
{
|
||||
private static readonly HashSet<SymbolSource> Sources =
|
||||
[
|
||||
SymbolSource.V8Profiler,
|
||||
SymbolSource.PythonTrace,
|
||||
SymbolSource.PhpXdebug
|
||||
];
|
||||
|
||||
/// <inheritdoc/>
|
||||
public IReadOnlySet<SymbolSource> SupportedSources => Sources;
|
||||
|
||||
/// <inheritdoc/>
|
||||
public bool CanNormalize(SymbolSource source) => Sources.Contains(source);
|
||||
|
||||
/// <inheritdoc/>
|
||||
public CanonicalSymbol? Normalize(RawSymbol raw)
|
||||
{
|
||||
TryNormalize(raw, out var canonical, out _);
|
||||
return canonical;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public bool TryNormalize(RawSymbol raw, out CanonicalSymbol? canonical, out string? error)
|
||||
{
|
||||
canonical = null;
|
||||
error = null;
|
||||
|
||||
if (string.IsNullOrWhiteSpace(raw.Value))
|
||||
{
|
||||
error = "Symbol value is empty";
|
||||
return false;
|
||||
}
|
||||
|
||||
var result = raw.Source switch
|
||||
{
|
||||
SymbolSource.V8Profiler => TryParseV8Symbol(raw, out canonical),
|
||||
SymbolSource.PythonTrace => TryParsePythonSymbol(raw, out canonical),
|
||||
SymbolSource.PhpXdebug => TryParsePhpSymbol(raw, out canonical),
|
||||
_ => TryParseGenericScript(raw, out canonical)
|
||||
};
|
||||
|
||||
if (!result)
|
||||
{
|
||||
error = $"Cannot parse script symbol: {raw.Value}";
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses V8 profiler stack frame format.
|
||||
/// Examples:
|
||||
/// - "lodash.template (lodash.js:1234:56)"
|
||||
/// - "Module._load (internal/modules/cjs/loader.js:789:10)"
|
||||
/// - "anonymous (webpack:///src/app.js:12:3)"
|
||||
/// - "Foo.bar [as baz] (foo.js:1:1)"
|
||||
/// </summary>
|
||||
private static bool TryParseV8Symbol(RawSymbol raw, out CanonicalSymbol? canonical)
|
||||
{
|
||||
canonical = null;
|
||||
|
||||
// Pattern: FunctionName (file:line:col) or Class.method (file:line:col)
|
||||
var match = V8StackFrameRegex().Match(raw.Value);
|
||||
if (match.Success)
|
||||
{
|
||||
var functionName = match.Groups["function"].Value.Trim();
|
||||
var file = match.Groups["file"].Value;
|
||||
|
||||
// Handle "Class.method" or "method"
|
||||
var (ns, type, method) = ParseJsFunctionName(functionName, file);
|
||||
|
||||
canonical = CanonicalSymbol.Create(
|
||||
@namespace: ns,
|
||||
type: type,
|
||||
method: method,
|
||||
signature: "()",
|
||||
source: raw.Source,
|
||||
purl: raw.Purl,
|
||||
originalSymbol: raw.Value);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Simple function name without location
|
||||
if (JsIdentifierRegex().IsMatch(raw.Value))
|
||||
{
|
||||
var (ns, type, method) = ParseJsFunctionName(raw.Value, null);
|
||||
|
||||
canonical = CanonicalSymbol.Create(
|
||||
@namespace: ns,
|
||||
type: type,
|
||||
method: method,
|
||||
signature: "()",
|
||||
source: raw.Source,
|
||||
purl: raw.Purl,
|
||||
originalSymbol: raw.Value);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses Python trace format.
|
||||
/// Examples:
|
||||
/// - "module.submodule:ClassName.method"
|
||||
/// - "package.module:function"
|
||||
/// - "<module>:function" (top-level)
|
||||
/// - "django.template.base:Template.render"
|
||||
/// </summary>
|
||||
private static bool TryParsePythonSymbol(RawSymbol raw, out CanonicalSymbol? canonical)
|
||||
{
|
||||
canonical = null;
|
||||
|
||||
// Pattern with module:qualified_name
|
||||
var colonMatch = PythonColonFormatRegex().Match(raw.Value);
|
||||
if (colonMatch.Success)
|
||||
{
|
||||
var module = colonMatch.Groups["module"].Value;
|
||||
var qualifiedName = colonMatch.Groups["qualified"].Value;
|
||||
|
||||
var (type, method) = ParsePythonQualifiedName(qualifiedName);
|
||||
|
||||
canonical = CanonicalSymbol.Create(
|
||||
@namespace: module == "<module>" ? "_" : module,
|
||||
type: type,
|
||||
method: method,
|
||||
signature: "()",
|
||||
source: raw.Source,
|
||||
purl: raw.Purl,
|
||||
originalSymbol: raw.Value);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Dot-separated pattern: module.Class.method
|
||||
var dotMatch = PythonDotFormatRegex().Match(raw.Value);
|
||||
if (dotMatch.Success)
|
||||
{
|
||||
var parts = raw.Value.Split('.');
|
||||
if (parts.Length >= 2)
|
||||
{
|
||||
var method = parts[^1];
|
||||
var type = parts.Length > 2 && char.IsUpper(parts[^2][0]) ? parts[^2] : "_";
|
||||
var ns = type == "_"
|
||||
? string.Join(".", parts[..^1])
|
||||
: string.Join(".", parts[..^2]);
|
||||
|
||||
canonical = CanonicalSymbol.Create(
|
||||
@namespace: ns.Length > 0 ? ns : "_",
|
||||
type: type,
|
||||
method: method,
|
||||
signature: "()",
|
||||
source: raw.Source,
|
||||
purl: raw.Purl,
|
||||
originalSymbol: raw.Value);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Simple function name
|
||||
if (PythonIdentifierRegex().IsMatch(raw.Value))
|
||||
{
|
||||
canonical = CanonicalSymbol.Create(
|
||||
@namespace: "_",
|
||||
type: "_",
|
||||
method: raw.Value,
|
||||
signature: "()",
|
||||
source: raw.Source,
|
||||
purl: raw.Purl,
|
||||
originalSymbol: raw.Value);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses PHP Xdebug profiler format.
|
||||
/// Examples:
|
||||
/// - "Namespace\\Class->method"
|
||||
/// - "Namespace\\Class::staticMethod"
|
||||
/// - "function_name"
|
||||
/// - "{closure:/path/file.php:123-456}"
|
||||
/// </summary>
|
||||
private static bool TryParsePhpSymbol(RawSymbol raw, out CanonicalSymbol? canonical)
|
||||
{
|
||||
canonical = null;
|
||||
|
||||
// Instance method: Namespace\Class->method
|
||||
var instanceMatch = PhpInstanceMethodRegex().Match(raw.Value);
|
||||
if (instanceMatch.Success)
|
||||
{
|
||||
var fullClass = instanceMatch.Groups["class"].Value;
|
||||
var method = instanceMatch.Groups["method"].Value;
|
||||
|
||||
var (ns, type) = ParsePhpClassName(fullClass);
|
||||
|
||||
canonical = CanonicalSymbol.Create(
|
||||
@namespace: ns,
|
||||
type: type,
|
||||
method: method,
|
||||
signature: "()",
|
||||
source: raw.Source,
|
||||
purl: raw.Purl,
|
||||
originalSymbol: raw.Value);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Static method: Namespace\Class::method
|
||||
var staticMatch = PhpStaticMethodRegex().Match(raw.Value);
|
||||
if (staticMatch.Success)
|
||||
{
|
||||
var fullClass = staticMatch.Groups["class"].Value;
|
||||
var method = staticMatch.Groups["method"].Value;
|
||||
|
||||
var (ns, type) = ParsePhpClassName(fullClass);
|
||||
|
||||
canonical = CanonicalSymbol.Create(
|
||||
@namespace: ns,
|
||||
type: type,
|
||||
method: method,
|
||||
signature: "()",
|
||||
source: raw.Source,
|
||||
purl: raw.Purl,
|
||||
originalSymbol: raw.Value);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Closure: {closure:/path/file.php:123-456}
|
||||
var closureMatch = PhpClosureRegex().Match(raw.Value);
|
||||
if (closureMatch.Success)
|
||||
{
|
||||
var file = closureMatch.Groups["file"].Value;
|
||||
var ns = Path.GetFileNameWithoutExtension(file).ToLowerInvariant();
|
||||
|
||||
canonical = CanonicalSymbol.Create(
|
||||
@namespace: ns.Length > 0 ? ns : "_",
|
||||
type: "_",
|
||||
method: "{closure}",
|
||||
signature: "()",
|
||||
source: raw.Source,
|
||||
purl: raw.Purl,
|
||||
originalSymbol: raw.Value);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Plain function
|
||||
if (PhpFunctionRegex().IsMatch(raw.Value))
|
||||
{
|
||||
canonical = CanonicalSymbol.Create(
|
||||
@namespace: "_",
|
||||
type: "_",
|
||||
method: raw.Value,
|
||||
signature: "()",
|
||||
source: raw.Source,
|
||||
purl: raw.Purl,
|
||||
originalSymbol: raw.Value);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generic script symbol parsing fallback.
|
||||
/// </summary>
|
||||
private static bool TryParseGenericScript(RawSymbol raw, out CanonicalSymbol? canonical)
|
||||
{
|
||||
canonical = null;
|
||||
|
||||
// Try common patterns
|
||||
if (TryParseV8Symbol(raw, out canonical))
|
||||
return true;
|
||||
|
||||
if (TryParsePythonSymbol(raw, out canonical))
|
||||
return true;
|
||||
|
||||
if (TryParsePhpSymbol(raw, out canonical))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses JavaScript function name into namespace, type, method.
|
||||
/// </summary>
|
||||
private static (string Namespace, string Type, string Method) ParseJsFunctionName(string functionName, string? file)
|
||||
{
|
||||
// Remove "as alias" suffix
|
||||
var asIndex = functionName.IndexOf(" [as ", StringComparison.Ordinal);
|
||||
if (asIndex > 0)
|
||||
functionName = functionName[..asIndex];
|
||||
|
||||
// Handle anonymous functions
|
||||
if (functionName is "anonymous" or "<anonymous>" or "(anonymous)")
|
||||
{
|
||||
var ns = ExtractJsNamespaceFromFile(file);
|
||||
return (ns, "_", "{anonymous}");
|
||||
}
|
||||
|
||||
// Handle "Class.method" or "object.method"
|
||||
var parts = functionName.Split('.');
|
||||
if (parts.Length >= 2)
|
||||
{
|
||||
var method = parts[^1];
|
||||
var type = parts[^2];
|
||||
|
||||
// If type starts with uppercase, treat as class
|
||||
if (char.IsUpper(type[0]))
|
||||
{
|
||||
var ns = parts.Length > 2 ? string.Join(".", parts[..^2]) : ExtractJsNamespaceFromFile(file);
|
||||
return (ns, type, method);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Object notation - use as namespace
|
||||
var ns = string.Join(".", parts[..^1]);
|
||||
return (ns, "_", method);
|
||||
}
|
||||
}
|
||||
|
||||
// Simple function name
|
||||
var fileNs = ExtractJsNamespaceFromFile(file);
|
||||
return (fileNs, "_", functionName);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts namespace from JavaScript file path.
|
||||
/// </summary>
|
||||
private static string ExtractJsNamespaceFromFile(string? file)
|
||||
{
|
||||
if (string.IsNullOrEmpty(file))
|
||||
return "_";
|
||||
|
||||
// Remove webpack:/// and similar prefixes
|
||||
file = file.Replace("webpack:///", "")
|
||||
.Replace("file://", "");
|
||||
|
||||
// Get filename without extension
|
||||
var name = Path.GetFileNameWithoutExtension(file);
|
||||
|
||||
// Handle node_modules paths
|
||||
if (file.Contains("node_modules"))
|
||||
{
|
||||
var parts = file.Split(new[] { "node_modules/" }, StringSplitOptions.None);
|
||||
if (parts.Length > 1)
|
||||
{
|
||||
var modulePath = parts[1].Split('/');
|
||||
// Handle scoped packages (@scope/package)
|
||||
if (modulePath.Length > 0 && modulePath[0].StartsWith('@'))
|
||||
{
|
||||
return modulePath.Length > 1 ? $"{modulePath[0]}/{modulePath[1]}" : modulePath[0];
|
||||
}
|
||||
return modulePath[0];
|
||||
}
|
||||
}
|
||||
|
||||
return name.Length > 0 ? name.ToLowerInvariant() : "_";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses Python qualified name (Class.method or method).
|
||||
/// </summary>
|
||||
private static (string Type, string Method) ParsePythonQualifiedName(string qualified)
|
||||
{
|
||||
var parts = qualified.Split('.');
|
||||
if (parts.Length >= 2)
|
||||
{
|
||||
var method = parts[^1];
|
||||
var type = parts[^2];
|
||||
|
||||
// Check if it's a class (starts with uppercase)
|
||||
if (char.IsUpper(type[0]))
|
||||
return (type, method);
|
||||
}
|
||||
|
||||
return ("_", qualified);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses PHP class name with namespace.
|
||||
/// </summary>
|
||||
private static (string Namespace, string Type) ParsePhpClassName(string fullClass)
|
||||
{
|
||||
// Replace backslashes with dots for canonical format
|
||||
var normalized = fullClass.Replace("\\", ".");
|
||||
|
||||
var parts = normalized.Split('.');
|
||||
if (parts.Length >= 2)
|
||||
{
|
||||
var type = parts[^1];
|
||||
var ns = string.Join(".", parts[..^1]);
|
||||
return (ns, type);
|
||||
}
|
||||
|
||||
return ("_", normalized);
|
||||
}
|
||||
|
||||
// Regex patterns
|
||||
[GeneratedRegex(@"^(?<function>[^(]+)\s*\((?<file>[^:)]+)(?::\d+(?::\d+)?)?\)$")]
|
||||
private static partial Regex V8StackFrameRegex();
|
||||
|
||||
[GeneratedRegex(@"^[\w$][\w$\.]*$")]
|
||||
private static partial Regex JsIdentifierRegex();
|
||||
|
||||
[GeneratedRegex(@"^(?<module>[^:]+):(?<qualified>[\w.]+)$")]
|
||||
private static partial Regex PythonColonFormatRegex();
|
||||
|
||||
[GeneratedRegex(@"^[\w.]+$")]
|
||||
private static partial Regex PythonDotFormatRegex();
|
||||
|
||||
[GeneratedRegex(@"^[a-zA-Z_][a-zA-Z0-9_]*$")]
|
||||
private static partial Regex PythonIdentifierRegex();
|
||||
|
||||
[GeneratedRegex(@"^(?<class>[\w\\]+)->(?<method>\w+)$")]
|
||||
private static partial Regex PhpInstanceMethodRegex();
|
||||
|
||||
[GeneratedRegex(@"^(?<class>[\w\\]+)::(?<method>\w+)$")]
|
||||
private static partial Regex PhpStaticMethodRegex();
|
||||
|
||||
[GeneratedRegex(@"^\{closure:(?<file>[^:}]+)(?::\d+-\d+)?\}$")]
|
||||
private static partial Regex PhpClosureRegex();
|
||||
|
||||
[GeneratedRegex(@"^[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*$")]
|
||||
private static partial Regex PhpFunctionRegex();
|
||||
}
|
||||
@@ -67,5 +67,11 @@ public enum SymbolSource
|
||||
PatchAnalysis = 50,
|
||||
|
||||
/// <summary>Manual curation.</summary>
|
||||
ManualCuration = 51
|
||||
ManualCuration = 51,
|
||||
|
||||
/// <summary>OSV advisory database.</summary>
|
||||
OsvAdvisory = 52,
|
||||
|
||||
/// <summary>NVD advisory database.</summary>
|
||||
NvdAdvisory = 53
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user