save progress

This commit is contained in:
StellaOps Bot
2026-01-02 21:06:27 +02:00
parent f46bde5575
commit 3f197814c5
441 changed files with 21545 additions and 4306 deletions

View File

@@ -0,0 +1,27 @@
# Feedser BinaryAnalysis Agent Charter
## Mission
Provide deterministic binary fingerprint extraction and matching for Feedser ingestion pipelines.
## Responsibilities
- Maintain fingerprinters and factory composition with stable ordering.
- Keep outputs deterministic (timestamps, IDs, and match selection).
- Ensure metadata detection is portable and explicit about endianness.
- Document fingerprint formats and matching thresholds.
## Required Reading
- docs/modules/feedser/architecture.md
- docs/modules/platform/architecture-overview.md
- docs/07_HIGH_LEVEL_ARCHITECTURE.md
## Definition of Done
- Fingerprints are stable for identical inputs.
- Time and IDs come from injected providers or explicit inputs.
- Tests cover fingerprinters, factory ordering, and match selection.
## Working Agreement
- 1. Update task status to DOING/DONE in the sprint file and local TASKS.md.
- 2. Review this charter and required docs before coding.
- 3. Prefer TimeProvider and deterministic ID generation.
- 4. Keep outputs stable (ordering, timestamps, hashes) and offline-friendly.
- 5. Revert to TODO if paused; capture context in PR notes.

View File

@@ -0,0 +1,10 @@
# Feedser BinaryAnalysis Task Board
This board mirrors active sprint tasks for this module.
Source of truth: `docs/implplan/SPRINT_20251229_049_BE_csproj_audit_maint_tests.md`.
| Task ID | Status | Notes |
| --- | --- | --- |
| AUDIT-0339-M | DONE | Maintainability audit for Feedser.BinaryAnalysis. |
| AUDIT-0339-T | DONE | Test coverage audit for Feedser.BinaryAnalysis. |
| AUDIT-0339-A | TODO | Pending approval (non-test project). |

View File

@@ -0,0 +1,26 @@
# Feedser Core Agent Charter
## Mission
Provide deterministic patch signature extraction and function signature matching for Feedser evidence collection.
## Responsibilities
- Maintain HunkSig parsing/normalization and function signature extraction.
- Ensure outputs are deterministic (stable hashes, ordering, timestamps).
- Keep regex patterns and scoring rules documented and testable.
## Required Reading
- docs/modules/feedser/architecture.md
- docs/modules/platform/architecture-overview.md
- docs/07_HIGH_LEVEL_ARCHITECTURE.md
## Definition of Done
- Patch signatures are stable for identical diffs.
- Time and IDs come from injected providers or explicit inputs.
- Tests cover parsing, normalization, and function matching logic.
## Working Agreement
- 1. Update task status to DOING/DONE in the sprint file and local TASKS.md.
- 2. Review this charter and required docs before coding.
- 3. Prefer TimeProvider and deterministic ID generation.
- 4. Keep outputs stable (ordering, timestamps, hashes) and offline-friendly.
- 5. Revert to TODO if paused; capture context in PR notes.

View File

@@ -0,0 +1,760 @@
// -----------------------------------------------------------------------------
// FunctionSignatureExtractor.cs
// Sprint: SPRINT_20251230_001_BE_backport_resolver (BP-501 through BP-506)
// Task: Create function signature regex patterns for C, Go, Python, Rust
// Description: Extracts function signatures from patch context for Tier 3/4 matching
// -----------------------------------------------------------------------------
using System.Collections.Immutable;
using System.Text.RegularExpressions;
namespace StellaOps.Feedser.Core;
/// <summary>
/// Extracts function signatures from source code context in patches.
/// Used for Tier 3/4 evidence matching (source patch files, upstream commit mapping).
/// </summary>
public static partial class FunctionSignatureExtractor
{
/// <summary>
/// Extract function signatures from patch context lines.
/// Searches both added/removed lines and context lines for function definitions.
/// </summary>
/// <param name="contextLines">Context lines from the patch (unchanged lines around the diff).</param>
/// <param name="addedLines">Lines added in the patch.</param>
/// <param name="removedLines">Lines removed in the patch.</param>
/// <param name="filePath">File path to determine language by extension.</param>
/// <returns>Extracted function signatures.</returns>
public static ImmutableArray<ExtractedFunction> ExtractFunctionsFromContext(
IEnumerable<string> contextLines,
IEnumerable<string> addedLines,
IEnumerable<string> removedLines,
string filePath)
{
var language = DetectLanguage(filePath);
var allLines = contextLines
.Concat(addedLines)
.Concat(removedLines)
.ToList();
return language switch
{
ProgrammingLanguage.C or ProgrammingLanguage.Cpp => ExtractCFunctions(allLines, filePath),
ProgrammingLanguage.Go => ExtractGoFunctions(allLines, filePath),
ProgrammingLanguage.Python => ExtractPythonFunctions(allLines, filePath),
ProgrammingLanguage.Rust => ExtractRustFunctions(allLines, filePath),
ProgrammingLanguage.Java => ExtractJavaFunctions(allLines, filePath),
ProgrammingLanguage.JavaScript or ProgrammingLanguage.TypeScript => ExtractJavaScriptFunctions(allLines, filePath),
_ => ImmutableArray<ExtractedFunction>.Empty
};
}
/// <summary>
/// Detect programming language from file extension.
/// </summary>
public static ProgrammingLanguage DetectLanguage(string filePath)
{
var ext = Path.GetExtension(filePath).ToLowerInvariant();
return ext switch
{
".c" or ".h" => ProgrammingLanguage.C,
".cpp" or ".cc" or ".cxx" or ".hpp" or ".hxx" or ".hh" => ProgrammingLanguage.Cpp,
".go" => ProgrammingLanguage.Go,
".py" or ".pyw" => ProgrammingLanguage.Python,
".rs" => ProgrammingLanguage.Rust,
".java" => ProgrammingLanguage.Java,
".js" or ".mjs" or ".cjs" => ProgrammingLanguage.JavaScript,
".ts" or ".tsx" => ProgrammingLanguage.TypeScript,
".cs" => ProgrammingLanguage.CSharp,
".rb" => ProgrammingLanguage.Ruby,
".php" => ProgrammingLanguage.Php,
".swift" => ProgrammingLanguage.Swift,
".kt" or ".kts" => ProgrammingLanguage.Kotlin,
_ => ProgrammingLanguage.Unknown
};
}
#region C/C++ Function Extraction (BP-503)
/// <summary>
/// Extract C/C++ function signatures.
/// Patterns: "return_type function_name(params)" or "return_type* function_name(params)"
/// </summary>
private static ImmutableArray<ExtractedFunction> ExtractCFunctions(List<string> lines, string filePath)
{
var functions = ImmutableArray.CreateBuilder<ExtractedFunction>();
var seenSignatures = new HashSet<string>(StringComparer.Ordinal);
foreach (var line in lines)
{
// Match C function definitions
// Pattern: type [modifiers] name(params) { or type [modifiers] name(params);
var match = CFunctionRegex().Match(line);
if (match.Success)
{
var returnType = match.Groups["return"].Value.Trim();
var funcName = match.Groups["name"].Value.Trim();
var params_ = match.Groups["params"].Value.Trim();
// Skip common false positives
if (IsCFalsePositive(funcName))
{
continue;
}
var signature = $"{returnType} {funcName}({params_})";
if (seenSignatures.Add(signature))
{
functions.Add(new ExtractedFunction
{
Name = funcName,
Signature = signature,
Language = ProgrammingLanguage.C,
FilePath = filePath,
Confidence = 0.85
});
}
}
}
return functions.ToImmutable();
}
private static bool IsCFalsePositive(string name)
{
// Skip common keywords that regex might match
return name is "if" or "else" or "for" or "while" or "switch" or "return"
or "sizeof" or "typeof" or "alignof" or "offsetof"
or "defined" or "pragma";
}
/// <summary>
/// Regex for C/C++ function definitions.
/// Matches: return_type [*] function_name(params)
/// </summary>
[GeneratedRegex(@"^\s*(?<return>(?:static\s+|inline\s+|extern\s+|const\s+|unsigned\s+|signed\s+|struct\s+|enum\s+)*[\w*&:\[\]]+(?:\s*\*+)?)\s+(?<name>\w+)\s*\((?<params>[^)]*)\)\s*(?:\{|;|\s*$)", RegexOptions.Compiled)]
private static partial Regex CFunctionRegex();
#endregion
#region Go Function Extraction (BP-504)
/// <summary>
/// Extract Go function signatures.
/// Patterns: "func name(params) return" or "func (receiver) name(params) return"
/// </summary>
private static ImmutableArray<ExtractedFunction> ExtractGoFunctions(List<string> lines, string filePath)
{
var functions = ImmutableArray.CreateBuilder<ExtractedFunction>();
var seenSignatures = new HashSet<string>(StringComparer.Ordinal);
foreach (var line in lines)
{
var match = GoFunctionRegex().Match(line);
if (match.Success)
{
var receiver = match.Groups["receiver"].Value.Trim();
var funcName = match.Groups["name"].Value.Trim();
var params_ = match.Groups["params"].Value.Trim();
var returns = match.Groups["returns"].Value.Trim();
var signature = string.IsNullOrEmpty(receiver)
? $"func {funcName}({params_})"
: $"func ({receiver}) {funcName}({params_})";
if (!string.IsNullOrEmpty(returns))
{
signature += $" {returns}";
}
if (seenSignatures.Add(signature))
{
functions.Add(new ExtractedFunction
{
Name = funcName,
Signature = signature,
Language = ProgrammingLanguage.Go,
FilePath = filePath,
Receiver = string.IsNullOrEmpty(receiver) ? null : receiver,
Confidence = 0.90
});
}
}
}
return functions.ToImmutable();
}
/// <summary>
/// Regex for Go function definitions.
/// Matches: func [(*receiver Type)] name(params) [returns]
/// </summary>
[GeneratedRegex(@"^\s*func\s+(?:\((?<receiver>[^)]+)\)\s+)?(?<name>\w+)\s*\((?<params>[^)]*)\)(?:\s*(?<returns>[\w*\[\]\(\),\s]+))?\s*\{?", RegexOptions.Compiled)]
private static partial Regex GoFunctionRegex();
#endregion
#region Python Function Extraction (BP-505)
/// <summary>
/// Extract Python function signatures.
/// Patterns: "def name(params):" or "async def name(params):"
/// </summary>
private static ImmutableArray<ExtractedFunction> ExtractPythonFunctions(List<string> lines, string filePath)
{
var functions = ImmutableArray.CreateBuilder<ExtractedFunction>();
var seenSignatures = new HashSet<string>(StringComparer.Ordinal);
foreach (var line in lines)
{
var match = PythonFunctionRegex().Match(line);
if (match.Success)
{
var isAsync = match.Groups["async"].Success;
var funcName = match.Groups["name"].Value.Trim();
var params_ = match.Groups["params"].Value.Trim();
var returnType = match.Groups["return"].Value.Trim();
var signature = isAsync ? $"async def {funcName}({params_})" : $"def {funcName}({params_})";
if (!string.IsNullOrEmpty(returnType))
{
signature += $" -> {returnType}";
}
if (seenSignatures.Add(signature))
{
functions.Add(new ExtractedFunction
{
Name = funcName,
Signature = signature,
Language = ProgrammingLanguage.Python,
FilePath = filePath,
IsAsync = isAsync,
Confidence = 0.85
});
}
}
}
return functions.ToImmutable();
}
/// <summary>
/// Regex for Python function definitions.
/// Matches: [async] def name(params) [-> return_type]:
/// </summary>
[GeneratedRegex(@"^\s*(?<async>async\s+)?def\s+(?<name>\w+)\s*\((?<params>[^)]*)\)(?:\s*->\s*(?<return>[\w\[\],\s|]+))?\s*:", RegexOptions.Compiled)]
private static partial Regex PythonFunctionRegex();
#endregion
#region Rust Function Extraction (BP-506)
/// <summary>
/// Extract Rust function signatures.
/// Patterns: "fn name(params) -> return" or "pub fn name(params)"
/// </summary>
private static ImmutableArray<ExtractedFunction> ExtractRustFunctions(List<string> lines, string filePath)
{
var functions = ImmutableArray.CreateBuilder<ExtractedFunction>();
var seenSignatures = new HashSet<string>(StringComparer.Ordinal);
foreach (var line in lines)
{
var match = RustFunctionRegex().Match(line);
if (match.Success)
{
var visibility = match.Groups["vis"].Value.Trim();
var isAsync = match.Groups["async"].Success;
var funcName = match.Groups["name"].Value.Trim();
var generics = match.Groups["generics"].Value.Trim();
var params_ = match.Groups["params"].Value.Trim();
var returns = match.Groups["returns"].Value.Trim();
var signature = "";
if (!string.IsNullOrEmpty(visibility))
{
signature += visibility + " ";
}
if (isAsync)
{
signature += "async ";
}
signature += $"fn {funcName}";
if (!string.IsNullOrEmpty(generics))
{
signature += generics;
}
signature += $"({params_})";
if (!string.IsNullOrEmpty(returns))
{
signature += $" -> {returns}";
}
if (seenSignatures.Add(signature))
{
functions.Add(new ExtractedFunction
{
Name = funcName,
Signature = signature,
Language = ProgrammingLanguage.Rust,
FilePath = filePath,
IsAsync = isAsync,
Confidence = 0.90
});
}
}
}
return functions.ToImmutable();
}
/// <summary>
/// Regex for Rust function definitions.
/// Matches: [pub|pub(crate)] [async] [unsafe] fn name[<generics>](params) [-> return_type]
/// </summary>
[GeneratedRegex(@"^\s*(?<vis>pub(?:\s*\([^)]+\))?\s+)?(?<async>async\s+)?(?:unsafe\s+)?fn\s+(?<name>\w+)(?<generics><[^>]+>)?\s*\((?<params>[^)]*)\)(?:\s*->\s*(?<returns>[\w<>&*'\[\],\s]+))?\s*(?:where|{)?", RegexOptions.Compiled)]
private static partial Regex RustFunctionRegex();
#endregion
#region Java Function Extraction
private static ImmutableArray<ExtractedFunction> ExtractJavaFunctions(List<string> lines, string filePath)
{
var functions = ImmutableArray.CreateBuilder<ExtractedFunction>();
var seenSignatures = new HashSet<string>(StringComparer.Ordinal);
foreach (var line in lines)
{
var match = JavaMethodRegex().Match(line);
if (match.Success)
{
var modifiers = match.Groups["mods"].Value.Trim();
var returnType = match.Groups["return"].Value.Trim();
var methodName = match.Groups["name"].Value.Trim();
var params_ = match.Groups["params"].Value.Trim();
// Skip constructors (name == class name, no return type in signature)
if (string.IsNullOrEmpty(returnType))
{
continue;
}
var signature = $"{modifiers} {returnType} {methodName}({params_})".Trim();
if (seenSignatures.Add(signature))
{
functions.Add(new ExtractedFunction
{
Name = methodName,
Signature = signature,
Language = ProgrammingLanguage.Java,
FilePath = filePath,
Confidence = 0.85
});
}
}
}
return functions.ToImmutable();
}
// Note: mods captures all modifiers as a single group (not repeated)
[GeneratedRegex(@"^\s*(?<mods>(?:(?:public|private|protected|static|final|abstract|synchronized|native|strictfp)\s+)+)?(?<return>[\w<>\[\],\s]+)\s+(?<name>\w+)\s*\((?<params>[^)]*)\)\s*(?:throws\s+[\w,\s]+)?\s*\{?", RegexOptions.Compiled)]
private static partial Regex JavaMethodRegex();
#endregion
#region JavaScript/TypeScript Function Extraction
private static ImmutableArray<ExtractedFunction> ExtractJavaScriptFunctions(List<string> lines, string filePath)
{
var functions = ImmutableArray.CreateBuilder<ExtractedFunction>();
var seenSignatures = new HashSet<string>(StringComparer.Ordinal);
foreach (var line in lines)
{
// Regular function
var match = JsFunctionRegex().Match(line);
if (match.Success)
{
var isAsync = match.Groups["async"].Success;
var funcName = match.Groups["name"].Value.Trim();
var params_ = match.Groups["params"].Value.Trim();
var signature = isAsync ? $"async function {funcName}({params_})" : $"function {funcName}({params_})";
if (seenSignatures.Add(signature))
{
functions.Add(new ExtractedFunction
{
Name = funcName,
Signature = signature,
Language = ProgrammingLanguage.JavaScript,
FilePath = filePath,
IsAsync = isAsync,
Confidence = 0.80
});
}
}
// Arrow function or method
var arrowMatch = JsArrowFunctionRegex().Match(line);
if (arrowMatch.Success)
{
var funcName = arrowMatch.Groups["name"].Value.Trim();
var params_ = arrowMatch.Groups["params"].Value.Trim();
var signature = $"const {funcName} = ({params_}) =>";
if (seenSignatures.Add(signature))
{
functions.Add(new ExtractedFunction
{
Name = funcName,
Signature = signature,
Language = ProgrammingLanguage.JavaScript,
FilePath = filePath,
Confidence = 0.75
});
}
}
}
return functions.ToImmutable();
}
[GeneratedRegex(@"^\s*(?:export\s+)?(?<async>async\s+)?function\s+(?<name>\w+)\s*\((?<params>[^)]*)\)", RegexOptions.Compiled)]
private static partial Regex JsFunctionRegex();
[GeneratedRegex(@"^\s*(?:export\s+)?(?:const|let|var)\s+(?<name>\w+)\s*=\s*(?:async\s+)?\(?(?<params>[^)=]*)\)?\s*=>", RegexOptions.Compiled)]
private static partial Regex JsArrowFunctionRegex();
#endregion
}
/// <summary>
/// Represents an extracted function signature from source code.
/// </summary>
public sealed record ExtractedFunction
{
/// <summary>
/// The function/method name.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// The full signature as extracted.
/// </summary>
public required string Signature { get; init; }
/// <summary>
/// The programming language.
/// </summary>
public required ProgrammingLanguage Language { get; init; }
/// <summary>
/// Source file path.
/// </summary>
public required string FilePath { get; init; }
/// <summary>
/// Confidence in the extraction (0.0 to 1.0).
/// </summary>
public required double Confidence { get; init; }
/// <summary>
/// For methods, the receiver/type (e.g., Go receiver, C++ class).
/// </summary>
public string? Receiver { get; init; }
/// <summary>
/// Whether the function is async.
/// </summary>
public bool IsAsync { get; init; }
}
/// <summary>
/// Result of a fuzzy function match comparison.
/// </summary>
public sealed record FunctionMatchResult
{
/// <summary>
/// The source function being compared.
/// </summary>
public required ExtractedFunction Source { get; init; }
/// <summary>
/// The target function being matched against.
/// </summary>
public required ExtractedFunction Target { get; init; }
/// <summary>
/// Overall match score from 0.0 to 1.0.
/// </summary>
public required double Score { get; init; }
/// <summary>
/// Individual component scores for debugging/audit.
/// </summary>
public required FunctionMatchScores ComponentScores { get; init; }
/// <summary>
/// Whether this is considered a strong match (score >= 0.70).
/// </summary>
public bool IsStrongMatch => Score >= 0.70;
/// <summary>
/// Whether this is considered a weak match (score >= 0.40).
/// </summary>
public bool IsWeakMatch => Score >= 0.40;
}
/// <summary>
/// Component scores for function matching.
/// </summary>
public sealed record FunctionMatchScores
{
/// <summary>
/// Name similarity score (0.0 to 1.0).
/// </summary>
public required double NameScore { get; init; }
/// <summary>
/// Signature similarity score (0.0 to 1.0).
/// </summary>
public required double SignatureScore { get; init; }
/// <summary>
/// Language match bonus (0.0 or 1.0).
/// </summary>
public required double LanguageBonus { get; init; }
}
/// <summary>
/// Supported programming languages for function extraction.
/// </summary>
public enum ProgrammingLanguage
{
Unknown,
C,
Cpp,
Go,
Python,
Rust,
Java,
JavaScript,
TypeScript,
CSharp,
Ruby,
Php,
Swift,
Kotlin
}
/// <summary>
/// Extension methods for fuzzy function matching (BP-508).
/// </summary>
public static partial class FunctionMatchingExtensions
{
/// <summary>
/// Weight for function name similarity.
/// </summary>
private const double NameWeight = 0.50;
/// <summary>
/// Weight for signature similarity.
/// </summary>
private const double SignatureWeight = 0.35;
/// <summary>
/// Bonus for matching language.
/// </summary>
private const double LanguageBonus = 0.15;
/// <summary>
/// Find the best matching function from a candidate set.
/// </summary>
/// <param name="source">Source function to match.</param>
/// <param name="candidates">Candidate functions to match against.</param>
/// <param name="minScore">Minimum score threshold (default 0.40).</param>
/// <returns>Best match result, or null if no match above threshold.</returns>
public static FunctionMatchResult? FindBestMatch(
this ExtractedFunction source,
IEnumerable<ExtractedFunction> candidates,
double minScore = 0.40)
{
FunctionMatchResult? bestMatch = null;
foreach (var candidate in candidates)
{
var result = ComputeMatch(source, candidate);
if (result.Score >= minScore && (bestMatch is null || result.Score > bestMatch.Score))
{
bestMatch = result;
}
}
return bestMatch;
}
/// <summary>
/// Find all matching functions above a threshold.
/// </summary>
/// <param name="source">Source function to match.</param>
/// <param name="candidates">Candidate functions to match against.</param>
/// <param name="minScore">Minimum score threshold (default 0.40).</param>
/// <returns>All matches above threshold, ordered by score descending.</returns>
public static ImmutableArray<FunctionMatchResult> FindAllMatches(
this ExtractedFunction source,
IEnumerable<ExtractedFunction> candidates,
double minScore = 0.40)
{
return candidates
.Select(c => ComputeMatch(source, c))
.Where(r => r.Score >= minScore)
.OrderByDescending(r => r.Score)
.ToImmutableArray();
}
/// <summary>
/// Compute the match score between two functions.
/// </summary>
public static FunctionMatchResult ComputeMatch(ExtractedFunction source, ExtractedFunction target)
{
// Name similarity (Levenshtein-based)
var nameScore = ComputeStringSimilarity(source.Name, target.Name);
// Signature similarity (normalized Levenshtein)
var signatureScore = ComputeStringSimilarity(
NormalizeSignature(source.Signature),
NormalizeSignature(target.Signature));
// Language bonus
var languageBonus = AreLanguagesCompatible(source.Language, target.Language) ? 1.0 : 0.0;
// Weighted total
var score = (nameScore * NameWeight) +
(signatureScore * SignatureWeight) +
(languageBonus * LanguageBonus);
return new FunctionMatchResult
{
Source = source,
Target = target,
Score = Math.Min(1.0, score), // Cap at 1.0
ComponentScores = new FunctionMatchScores
{
NameScore = nameScore,
SignatureScore = signatureScore,
LanguageBonus = languageBonus
}
};
}
/// <summary>
/// Compute string similarity using normalized Levenshtein distance.
/// Returns a value from 0.0 (no similarity) to 1.0 (identical).
/// </summary>
private static double ComputeStringSimilarity(string a, string b)
{
if (string.IsNullOrEmpty(a) && string.IsNullOrEmpty(b))
return 1.0;
if (string.IsNullOrEmpty(a) || string.IsNullOrEmpty(b))
return 0.0;
if (string.Equals(a, b, StringComparison.Ordinal))
return 1.0;
// Case-insensitive comparison gives partial credit
if (string.Equals(a, b, StringComparison.OrdinalIgnoreCase))
return 0.95;
var distance = LevenshteinDistance(a, b);
var maxLen = Math.Max(a.Length, b.Length);
return 1.0 - ((double)distance / maxLen);
}
/// <summary>
/// Compute Levenshtein edit distance between two strings.
/// </summary>
private static int LevenshteinDistance(string a, string b)
{
var n = a.Length;
var m = b.Length;
if (n == 0) return m;
if (m == 0) return n;
// Use two-row optimization to reduce memory
var prev = new int[m + 1];
var curr = new int[m + 1];
for (var j = 0; j <= m; j++)
prev[j] = j;
for (var i = 1; i <= n; i++)
{
curr[0] = i;
for (var j = 1; j <= m; j++)
{
var cost = a[i - 1] == b[j - 1] ? 0 : 1;
curr[j] = Math.Min(
Math.Min(prev[j] + 1, curr[j - 1] + 1),
prev[j - 1] + cost);
}
(prev, curr) = (curr, prev);
}
return prev[m];
}
/// <summary>
/// Normalize signature for comparison (remove whitespace variations, etc).
/// </summary>
private static string NormalizeSignature(string signature)
{
// Remove excessive whitespace
var normalized = WhitespaceRegex().Replace(signature.Trim(), " ");
// Remove common decorators that don't affect identity
normalized = normalized
.Replace("const ", "")
.Replace("static ", "")
.Replace("inline ", "")
.Replace("extern ", "")
.Replace("virtual ", "")
.Replace("override ", "")
.Replace("final ", "");
return normalized;
}
[GeneratedRegex(@"\s+", RegexOptions.Compiled)]
private static partial Regex WhitespaceRegex();
/// <summary>
/// Check if two programming languages are compatible for matching.
/// </summary>
private static bool AreLanguagesCompatible(ProgrammingLanguage a, ProgrammingLanguage b)
{
if (a == b) return true;
// C and C++ are compatible
if ((a is ProgrammingLanguage.C or ProgrammingLanguage.Cpp) &&
(b is ProgrammingLanguage.C or ProgrammingLanguage.Cpp))
return true;
// JavaScript and TypeScript are compatible
if ((a is ProgrammingLanguage.JavaScript or ProgrammingLanguage.TypeScript) &&
(b is ProgrammingLanguage.JavaScript or ProgrammingLanguage.TypeScript))
return true;
return false;
}
}

View File

@@ -0,0 +1,10 @@
# Feedser Core Task Board
This board mirrors active sprint tasks for this module.
Source of truth: `docs/implplan/SPRINT_20251229_049_BE_csproj_audit_maint_tests.md`.
| Task ID | Status | Notes |
| --- | --- | --- |
| AUDIT-0340-M | DONE | Maintainability audit for Feedser.Core. |
| AUDIT-0340-T | DONE | Test coverage audit for Feedser.Core. |
| AUDIT-0340-A | TODO | Pending approval (non-test project). |

View File

@@ -0,0 +1,26 @@
# Feedser Core Tests Agent Charter
## Mission
Validate Feedser patch signature and function extraction behavior with deterministic unit tests.
## Responsibilities
- Maintain test coverage for HunkSig extraction and function signature matching.
- Ensure tests are deterministic and avoid time-sensitive assertions.
- Keep fixtures minimal and offline-safe.
## Required Reading
- docs/modules/feedser/architecture.md
- docs/modules/platform/architecture-overview.md
- docs/07_HIGH_LEVEL_ARCHITECTURE.md
## Definition of Done
- Tests cover parsing, normalization, and matching behavior.
- Tests remain deterministic across runs and environments.
- Failures provide actionable diagnostics.
## Working Agreement
- 1. Update task status to DOING/DONE in the sprint file and local TASKS.md.
- 2. Review this charter and required docs before coding.
- 3. Avoid DateTimeOffset.UtcNow/Guid.NewGuid in tests unless explicitly controlled.
- 4. Keep outputs stable (ordering, timestamps, hashes).
- 5. Revert to TODO if paused; capture context in PR notes.

View File

@@ -0,0 +1,780 @@
// -----------------------------------------------------------------------------
// FunctionSignatureExtractorTests.cs
// Sprint: SPRINT_20251230_001_BE_backport_resolver (BP-507)
// Task: Unit tests for function extraction patterns
// Description: Tests for C, Go, Python, Rust function signature extraction
// -----------------------------------------------------------------------------
using FluentAssertions;
using StellaOps.Feedser.Core;
using StellaOps.TestKit;
using Xunit;
namespace StellaOps.Feedser.Core.Tests;
/// <summary>
/// Unit tests for function signature extraction from patch context.
/// </summary>
[Trait("Category", TestCategories.Unit)]
public sealed class FunctionSignatureExtractorTests
{
#region C Function Tests (BP-503)
[Theory]
[InlineData("void foo(int x)", "foo", "void foo(int x)")]
[InlineData("int main(int argc, char** argv)", "main", "int main(int argc, char** argv)")]
[InlineData("static void* helper(void)", "helper", "static void* helper(void)")]
[InlineData("unsigned int count_items(const char* s)", "count_items", "unsigned int count_items(const char* s)")]
[InlineData("struct Node* create_node(int value)", "create_node", "struct Node* create_node(int value)")]
public void ExtractCFunctions_ValidDefinitions_ExtractsCorrectly(
string line,
string expectedName,
string expectedSignature)
{
// Arrange
var lines = new List<string> { line };
// Act
var functions = FunctionSignatureExtractor.ExtractFunctionsFromContext(
[],
lines,
[],
"test.c");
// Assert
functions.Should().ContainSingle();
functions[0].Name.Should().Be(expectedName);
functions[0].Signature.Should().Contain(expectedSignature);
functions[0].Language.Should().Be(ProgrammingLanguage.C);
}
[Theory]
[InlineData("int result = calculate(x, y);", "calculate")]
[InlineData("if (condition) {", "if")]
[InlineData("for (int i = 0; i < n; i++) {", "for")]
[InlineData("while (running) {", "while")]
public void ExtractCFunctions_FalsePositives_NotExtracted(string line, string notExpected)
{
// Arrange
var lines = new List<string> { line };
// Act
var functions = FunctionSignatureExtractor.ExtractFunctionsFromContext(
[],
lines,
[],
"test.c");
// Assert - should not extract control flow or function calls as definitions
functions.Should().NotContain(f => f.Name == notExpected);
}
[Fact]
public void ExtractCFunctions_SecurityPatch_ExtractsVulnerableFunction()
{
// Arrange - realistic security patch context
var context = new List<string>
{
"/*",
" * Process user input buffer",
" */",
"static int process_buffer(const char* input, size_t len) {"
};
var added = new List<string>
{
" if (len > MAX_BUFFER_SIZE) {",
" return -EINVAL;",
" }"
};
var removed = new List<string>
{
" // No bounds checking - vulnerable!"
};
// Act
var functions = FunctionSignatureExtractor.ExtractFunctionsFromContext(
context,
added,
removed,
"src/buffer.c");
// Assert
functions.Should().ContainSingle();
functions[0].Name.Should().Be("process_buffer");
functions[0].FilePath.Should().Be("src/buffer.c");
}
#endregion
#region Go Function Tests (BP-504)
[Theory]
[InlineData("func main() {", "main", "func main()")]
[InlineData("func (s *Server) handleRequest(w http.ResponseWriter, r *http.Request) {", "handleRequest", "func (s *Server) handleRequest(w http.ResponseWriter, r *http.Request)")]
[InlineData("func Process(data []byte) error {", "Process", "func Process(data []byte)")]
[InlineData("func NewClient(addr string) (*Client, error) {", "NewClient", "func NewClient(addr string)")]
public void ExtractGoFunctions_ValidDefinitions_ExtractsCorrectly(
string line,
string expectedName,
string expectedSignatureContains)
{
// Arrange
var lines = new List<string> { line };
// Act
var functions = FunctionSignatureExtractor.ExtractFunctionsFromContext(
[],
lines,
[],
"main.go");
// Assert
functions.Should().ContainSingle();
functions[0].Name.Should().Be(expectedName);
functions[0].Signature.Should().Contain(expectedSignatureContains);
functions[0].Language.Should().Be(ProgrammingLanguage.Go);
}
[Fact]
public void ExtractGoFunctions_WithReceiver_ExtractsReceiver()
{
// Arrange
var lines = new List<string>
{
"func (c *Client) Connect(ctx context.Context) error {"
};
// Act
var functions = FunctionSignatureExtractor.ExtractFunctionsFromContext(
[],
lines,
[],
"client.go");
// Assert
functions.Should().ContainSingle();
functions[0].Name.Should().Be("Connect");
functions[0].Receiver.Should().Be("c *Client");
}
#endregion
#region Python Function Tests (BP-505)
[Theory]
[InlineData("def foo():", "foo", "def foo()")]
[InlineData("def process_data(data: bytes) -> str:", "process_data", "def process_data(data: bytes)")]
[InlineData("async def fetch_url(url: str) -> Response:", "fetch_url", "async def fetch_url(url: str)")]
[InlineData(" def __init__(self, name: str):", "__init__", "def __init__(self, name: str)")]
public void ExtractPythonFunctions_ValidDefinitions_ExtractsCorrectly(
string line,
string expectedName,
string expectedSignatureContains)
{
// Arrange
var lines = new List<string> { line };
// Act
var functions = FunctionSignatureExtractor.ExtractFunctionsFromContext(
[],
lines,
[],
"test.py");
// Assert
functions.Should().ContainSingle();
functions[0].Name.Should().Be(expectedName);
functions[0].Signature.Should().Contain(expectedSignatureContains);
functions[0].Language.Should().Be(ProgrammingLanguage.Python);
}
[Fact]
public void ExtractPythonFunctions_AsyncFunction_MarkedAsAsync()
{
// Arrange
var lines = new List<string> { "async def download(url: str) -> bytes:" };
// Act
var functions = FunctionSignatureExtractor.ExtractFunctionsFromContext(
[],
lines,
[],
"downloader.py");
// Assert
functions.Should().ContainSingle();
functions[0].IsAsync.Should().BeTrue();
}
#endregion
#region Rust Function Tests (BP-506)
[Theory]
[InlineData("fn main() {", "main", "fn main()")]
[InlineData("pub fn new(size: usize) -> Self {", "new", "pub fn new(size: usize)")]
[InlineData("async fn fetch(url: &str) -> Result<Response, Error> {", "fetch", "async fn fetch(url: &str)")]
[InlineData("pub(crate) unsafe fn raw_ptr(data: *const u8) -> *mut u8 {", "raw_ptr", "fn raw_ptr(data: *const u8)")]
public void ExtractRustFunctions_ValidDefinitions_ExtractsCorrectly(
string line,
string expectedName,
string expectedSignatureContains)
{
// Arrange
var lines = new List<string> { line };
// Act
var functions = FunctionSignatureExtractor.ExtractFunctionsFromContext(
[],
lines,
[],
"lib.rs");
// Assert
functions.Should().ContainSingle();
functions[0].Name.Should().Be(expectedName);
functions[0].Signature.Should().Contain(expectedSignatureContains);
functions[0].Language.Should().Be(ProgrammingLanguage.Rust);
}
[Fact]
public void ExtractRustFunctions_GenericFunction_ExtractsCorrectly()
{
// Arrange
var lines = new List<string>
{
"pub fn parse<T: FromStr>(input: &str) -> Result<T, T::Err> {"
};
// Act
var functions = FunctionSignatureExtractor.ExtractFunctionsFromContext(
[],
lines,
[],
"parser.rs");
// Assert
functions.Should().ContainSingle();
functions[0].Name.Should().Be("parse");
functions[0].Signature.Should().Contain("<T: FromStr>");
}
#endregion
#region Language Detection Tests
[Theory]
[InlineData("test.c", ProgrammingLanguage.C)]
[InlineData("test.h", ProgrammingLanguage.C)]
[InlineData("test.cpp", ProgrammingLanguage.Cpp)]
[InlineData("test.hpp", ProgrammingLanguage.Cpp)]
[InlineData("main.go", ProgrammingLanguage.Go)]
[InlineData("script.py", ProgrammingLanguage.Python)]
[InlineData("lib.rs", ProgrammingLanguage.Rust)]
[InlineData("App.java", ProgrammingLanguage.Java)]
[InlineData("index.js", ProgrammingLanguage.JavaScript)]
[InlineData("component.ts", ProgrammingLanguage.TypeScript)]
[InlineData("unknown.xyz", ProgrammingLanguage.Unknown)]
public void DetectLanguage_KnownExtensions_ReturnsCorrectLanguage(
string filePath,
ProgrammingLanguage expectedLanguage)
{
// Act
var language = FunctionSignatureExtractor.DetectLanguage(filePath);
// Assert
language.Should().Be(expectedLanguage);
}
#endregion
#region Edge Cases
[Fact]
public void ExtractFunctions_EmptyLines_ReturnsEmpty()
{
// Act
var functions = FunctionSignatureExtractor.ExtractFunctionsFromContext(
[],
[],
[],
"test.c");
// Assert
functions.Should().BeEmpty();
}
[Fact]
public void ExtractFunctions_UnknownLanguage_ReturnsEmpty()
{
// Arrange
var lines = new List<string> { "FUNCTION foo()", "END FUNCTION" };
// Act
var functions = FunctionSignatureExtractor.ExtractFunctionsFromContext(
lines,
[],
[],
"test.fortran"); // Unknown extension
// Assert
functions.Should().BeEmpty();
}
[Fact]
public void ExtractFunctions_MultipleFunctions_ExtractsAll()
{
// Arrange
var lines = new List<string>
{
"def foo():",
" pass",
"",
"def bar(x: int) -> int:",
" return x * 2",
"",
"async def baz():",
" await something()"
};
// Act
var functions = FunctionSignatureExtractor.ExtractFunctionsFromContext(
[],
lines,
[],
"module.py");
// Assert
functions.Should().HaveCount(3);
functions.Select(f => f.Name).Should().Contain("foo", "bar", "baz");
}
[Fact]
public void ExtractFunctions_DuplicateFunctions_DeduplicatesBySignature()
{
// Arrange - same function in both context and added lines
var context = new List<string> { "def process():" };
var added = new List<string> { "def process():" };
// Act
var functions = FunctionSignatureExtractor.ExtractFunctionsFromContext(
context,
added,
[],
"test.py");
// Assert - should only extract once
functions.Should().ContainSingle();
}
#endregion
#region Java Function Tests
[Theory]
[InlineData("public void process(String data) {", "process", "public void process(String data)")]
[InlineData("private static int calculate(int x, int y) {", "calculate", "private static int calculate(int x, int y)")]
[InlineData("protected List<String> getItems() {", "getItems", "protected List<String> getItems()")]
public void ExtractJavaFunctions_ValidDefinitions_ExtractsCorrectly(
string line,
string expectedName,
string expectedSignatureContains)
{
// Arrange
var lines = new List<string> { line };
// Act
var functions = FunctionSignatureExtractor.ExtractFunctionsFromContext(
[],
lines,
[],
"Service.java");
// Assert
functions.Should().ContainSingle();
functions[0].Name.Should().Be(expectedName);
functions[0].Signature.Should().Contain(expectedSignatureContains);
functions[0].Language.Should().Be(ProgrammingLanguage.Java);
}
#endregion
#region JavaScript Function Tests
[Theory]
[InlineData("function handleClick(event) {", "handleClick", "function handleClick(event)")]
[InlineData("async function fetchData(url) {", "fetchData", "async function fetchData(url)")]
[InlineData("export function validate(input) {", "validate", "function validate(input)")]
public void ExtractJavaScriptFunctions_ValidDefinitions_ExtractsCorrectly(
string line,
string expectedName,
string expectedSignatureContains)
{
// Arrange
var lines = new List<string> { line };
// Act
var functions = FunctionSignatureExtractor.ExtractFunctionsFromContext(
[],
lines,
[],
"app.js");
// Assert
functions.Should().ContainSingle();
functions[0].Name.Should().Be(expectedName);
functions[0].Signature.Should().Contain(expectedSignatureContains);
functions[0].Language.Should().Be(ProgrammingLanguage.JavaScript);
}
[Fact]
public void ExtractJavaScriptFunctions_ArrowFunction_ExtractsCorrectly()
{
// Arrange
var lines = new List<string>
{
"const processData = (input) => {",
"const square = x => x * x;"
};
// Act
var functions = FunctionSignatureExtractor.ExtractFunctionsFromContext(
[],
lines,
[],
"utils.js");
// Assert
functions.Should().HaveCount(2);
functions.Select(f => f.Name).Should().Contain("processData", "square");
}
#endregion
#region Fuzzy Function Matching Tests (BP-508)
[Fact]
public void ComputeMatch_IdenticalFunctions_ReturnsScore100()
{
// Arrange
var source = new ExtractedFunction
{
Name = "process_data",
Signature = "int process_data(char* input)",
Language = ProgrammingLanguage.C,
FilePath = "test.c",
Confidence = 0.90
};
var target = new ExtractedFunction
{
Name = "process_data",
Signature = "int process_data(char* input)",
Language = ProgrammingLanguage.C,
FilePath = "other.c",
Confidence = 0.90
};
// Act
var result = FunctionMatchingExtensions.ComputeMatch(source, target);
// Assert
result.Score.Should().Be(1.0);
result.IsStrongMatch.Should().BeTrue();
result.ComponentScores.NameScore.Should().Be(1.0);
result.ComponentScores.SignatureScore.Should().Be(1.0);
result.ComponentScores.LanguageBonus.Should().Be(1.0);
}
[Fact]
public void ComputeMatch_SimilarNames_ReturnsHighScore()
{
// Arrange
var source = new ExtractedFunction
{
Name = "process_data",
Signature = "int process_data(char* input)",
Language = ProgrammingLanguage.C,
FilePath = "test.c",
Confidence = 0.90
};
var target = new ExtractedFunction
{
Name = "processData", // CamelCase variant
Signature = "int processData(String input)",
Language = ProgrammingLanguage.Java,
FilePath = "Test.java",
Confidence = 0.85
};
// Act
var result = FunctionMatchingExtensions.ComputeMatch(source, target);
// Assert - names are similar but not identical, different languages
result.Score.Should().BeGreaterThan(0.40); // Weak match threshold
result.ComponentScores.LanguageBonus.Should().Be(0.0); // Different languages
}
[Fact]
public void ComputeMatch_CompletelyDifferent_ReturnsLowScore()
{
// Arrange
var source = new ExtractedFunction
{
Name = "calculate_hash",
Signature = "uint64_t calculate_hash(const char* data, size_t len)",
Language = ProgrammingLanguage.C,
FilePath = "hash.c",
Confidence = 0.90
};
var target = new ExtractedFunction
{
Name = "render_widget",
Signature = "void render_widget(Widget* w)",
Language = ProgrammingLanguage.C,
FilePath = "ui.c",
Confidence = 0.90
};
// Act
var result = FunctionMatchingExtensions.ComputeMatch(source, target);
// Assert - very different functions
result.Score.Should().BeLessThan(0.40);
result.IsStrongMatch.Should().BeFalse();
result.IsWeakMatch.Should().BeFalse();
}
[Fact]
public void ComputeMatch_CAndCppCompatible_GetsLanguageBonus()
{
// Arrange
var source = new ExtractedFunction
{
Name = "init_buffer",
Signature = "void init_buffer(Buffer* buf)",
Language = ProgrammingLanguage.C,
FilePath = "buffer.c",
Confidence = 0.90
};
var target = new ExtractedFunction
{
Name = "init_buffer",
Signature = "void init_buffer(Buffer* buf)",
Language = ProgrammingLanguage.Cpp,
FilePath = "buffer.cpp",
Confidence = 0.90
};
// Act
var result = FunctionMatchingExtensions.ComputeMatch(source, target);
// Assert - C and C++ should be compatible
result.Score.Should().Be(1.0);
result.ComponentScores.LanguageBonus.Should().Be(1.0);
}
[Fact]
public void ComputeMatch_JsAndTsCompatible_GetsLanguageBonus()
{
// Arrange
var source = new ExtractedFunction
{
Name = "handleSubmit",
Signature = "function handleSubmit(event)",
Language = ProgrammingLanguage.JavaScript,
FilePath = "form.js",
Confidence = 0.80
};
var target = new ExtractedFunction
{
Name = "handleSubmit",
Signature = "function handleSubmit(event: Event)",
Language = ProgrammingLanguage.TypeScript,
FilePath = "form.ts",
Confidence = 0.80
};
// Act
var result = FunctionMatchingExtensions.ComputeMatch(source, target);
// Assert - JS and TS should be compatible
result.IsStrongMatch.Should().BeTrue();
result.ComponentScores.LanguageBonus.Should().Be(1.0);
}
[Fact]
public void FindBestMatch_MultipleCandidates_ReturnsBestMatch()
{
// Arrange
var source = new ExtractedFunction
{
Name = "parse_config",
Signature = "Config* parse_config(const char* path)",
Language = ProgrammingLanguage.C,
FilePath = "config.c",
Confidence = 0.90
};
var candidates = new[]
{
new ExtractedFunction
{
Name = "render_ui",
Signature = "void render_ui()",
Language = ProgrammingLanguage.C,
FilePath = "ui.c",
Confidence = 0.90
},
new ExtractedFunction
{
Name = "parse_config", // Exact match
Signature = "Config* parse_config(const char* path)",
Language = ProgrammingLanguage.C,
FilePath = "config_new.c",
Confidence = 0.90
},
new ExtractedFunction
{
Name = "parse_json",
Signature = "Json* parse_json(const char* str)",
Language = ProgrammingLanguage.C,
FilePath = "json.c",
Confidence = 0.90
}
};
// Act
var result = source.FindBestMatch(candidates);
// Assert
result.Should().NotBeNull();
result!.Target.Name.Should().Be("parse_config");
result.Score.Should().Be(1.0);
}
[Fact]
public void FindBestMatch_NoMatchAboveThreshold_ReturnsNull()
{
// Arrange
var source = new ExtractedFunction
{
Name = "very_unique_function_name",
Signature = "void very_unique_function_name()",
Language = ProgrammingLanguage.C,
FilePath = "unique.c",
Confidence = 0.90
};
var candidates = new[]
{
new ExtractedFunction
{
Name = "completely_different",
Signature = "int completely_different(int x, int y)",
Language = ProgrammingLanguage.C,
FilePath = "other.c",
Confidence = 0.90
},
new ExtractedFunction
{
Name = "another_function",
Signature = "void another_function(char* s)",
Language = ProgrammingLanguage.C,
FilePath = "another.c",
Confidence = 0.90
}
};
// Act
var result = source.FindBestMatch(candidates, minScore: 0.70);
// Assert
result.Should().BeNull();
}
[Fact]
public void FindAllMatches_ReturnsMatchesOrderedByScore()
{
// Arrange
var source = new ExtractedFunction
{
Name = "process",
Signature = "void process(Data* d)",
Language = ProgrammingLanguage.C,
FilePath = "proc.c",
Confidence = 0.90
};
var candidates = new[]
{
new ExtractedFunction
{
Name = "process", // Exact
Signature = "void process(Data* d)",
Language = ProgrammingLanguage.C,
FilePath = "exact.c",
Confidence = 0.90
},
new ExtractedFunction
{
Name = "processData", // Similar
Signature = "void processData(Data* d)",
Language = ProgrammingLanguage.C,
FilePath = "similar.c",
Confidence = 0.90
},
new ExtractedFunction
{
Name = "unrelated",
Signature = "int unrelated()",
Language = ProgrammingLanguage.C,
FilePath = "unrelated.c",
Confidence = 0.90
}
};
// Act
var results = source.FindAllMatches(candidates, minScore: 0.30);
// Assert
results.Should().HaveCountGreaterThanOrEqualTo(2);
results[0].Target.Name.Should().Be("process"); // Highest score first
results[0].Score.Should().BeGreaterThan(results[1].Score);
}
[Theory]
[InlineData("processData", "process_data", 0.60)] // Underscore vs CamelCase
[InlineData("getData", "get_data", 0.60)]
[InlineData("handleClick", "handle_click", 0.60)]
public void ComputeMatch_NamingConventionVariants_ReturnsReasonableScore(
string name1,
string name2,
double minExpectedScore)
{
// Arrange
var source = new ExtractedFunction
{
Name = name1,
Signature = $"void {name1}()",
Language = ProgrammingLanguage.JavaScript,
FilePath = "test.js",
Confidence = 0.80
};
var target = new ExtractedFunction
{
Name = name2,
Signature = $"void {name2}()",
Language = ProgrammingLanguage.C,
FilePath = "test.c",
Confidence = 0.90
};
// Act
var result = FunctionMatchingExtensions.ComputeMatch(source, target);
// Assert - should recognize naming convention variants
result.Score.Should().BeGreaterThanOrEqualTo(minExpectedScore);
}
#endregion
}

View File

@@ -0,0 +1,10 @@
# Feedser Core Tests Task Board
This board mirrors active sprint tasks for this module.
Source of truth: `docs/implplan/SPRINT_20251229_049_BE_csproj_audit_maint_tests.md`.
| Task ID | Status | Notes |
| --- | --- | --- |
| AUDIT-0341-M | DONE | Maintainability audit for Feedser.Core.Tests. |
| AUDIT-0341-T | DONE | Test coverage audit for Feedser.Core.Tests. |
| AUDIT-0341-A | DONE | Waived (test project). |