Complete batch 012 (golden set diff) and 013 (advisory chat), fix build errors

Sprints completed:
- SPRINT_20260110_012_* (golden set diff layer - 10 sprints)
- SPRINT_20260110_013_* (advisory chat - 4 sprints)

Build fixes applied:
- Fix namespace conflicts with Microsoft.Extensions.Options.Options.Create
- Fix VexDecisionReachabilityIntegrationTests API drift (major rewrite)
- Fix VexSchemaValidationTests FluentAssertions method name
- Fix FixChainGateIntegrationTests ambiguous type references
- Fix AdvisoryAI test files required properties and namespace aliases
- Add stub types for CveMappingController (ICveSymbolMappingService)
- Fix VerdictBuilderService static context issue

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
master
2026-01-11 10:09:07 +02:00
parent a3b2f30a11
commit 7f7eb8b228
232 changed files with 58979 additions and 91 deletions

View File

@@ -0,0 +1,174 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Frozen;
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring.Extractors;
/// <summary>
/// Maps CWE IDs to likely sink functions and categories.
/// </summary>
public static class CweToSinkMapper
{
private static readonly FrozenDictionary<string, SinkMapping> Mappings = BuildMappings();
/// <summary>
/// Gets sink functions associated with a CWE ID.
/// </summary>
/// <param name="cweId">The CWE ID (e.g., "CWE-120").</param>
/// <returns>Array of sink function names.</returns>
public static ImmutableArray<string> GetSinksForCwe(string cweId)
{
if (string.IsNullOrWhiteSpace(cweId))
return [];
// Normalize CWE ID format
var normalizedId = NormalizeCweId(cweId);
if (Mappings.TryGetValue(normalizedId, out var mapping))
return mapping.Sinks;
return [];
}
/// <summary>
/// Gets the sink category for a CWE ID.
/// </summary>
/// <param name="cweId">The CWE ID.</param>
/// <returns>Sink category or null if unknown.</returns>
public static string? GetCategoryForCwe(string cweId)
{
if (string.IsNullOrWhiteSpace(cweId))
return null;
var normalizedId = NormalizeCweId(cweId);
if (Mappings.TryGetValue(normalizedId, out var mapping))
return mapping.Category;
return null;
}
/// <summary>
/// Gets all sink functions for multiple CWE IDs.
/// </summary>
/// <param name="cweIds">The CWE IDs to look up.</param>
/// <returns>Distinct array of sink function names.</returns>
public static ImmutableArray<string> GetSinksForCwes(IEnumerable<string> cweIds)
{
var sinks = new HashSet<string>(StringComparer.Ordinal);
foreach (var cweId in cweIds)
{
foreach (var sink in GetSinksForCwe(cweId))
{
sinks.Add(sink);
}
}
return [.. sinks.OrderBy(s => s, StringComparer.Ordinal)];
}
/// <summary>
/// Gets all categories for multiple CWE IDs.
/// </summary>
/// <param name="cweIds">The CWE IDs to look up.</param>
/// <returns>Distinct array of categories.</returns>
public static ImmutableArray<string> GetCategoriesForCwes(IEnumerable<string> cweIds)
{
var categories = new HashSet<string>(StringComparer.Ordinal);
foreach (var cweId in cweIds)
{
var category = GetCategoryForCwe(cweId);
if (category is not null)
{
categories.Add(category);
}
}
return [.. categories.OrderBy(c => c, StringComparer.Ordinal)];
}
private static string NormalizeCweId(string cweId)
{
// Handle formats: "CWE-120", "120", "cwe-120"
var id = cweId.Trim();
if (id.StartsWith("CWE-", StringComparison.OrdinalIgnoreCase))
{
return "CWE-" + id.Substring(4);
}
if (int.TryParse(id, out var numericId))
{
return "CWE-" + numericId.ToString(System.Globalization.CultureInfo.InvariantCulture);
}
return id.ToUpperInvariant();
}
private static FrozenDictionary<string, SinkMapping> BuildMappings()
{
var mappings = new Dictionary<string, SinkMapping>(StringComparer.Ordinal)
{
// Buffer overflows
["CWE-120"] = new(SinkCategory.Memory, ["memcpy", "strcpy", "strcat", "sprintf", "gets", "scanf", "strncpy", "strncat"]),
["CWE-121"] = new(SinkCategory.Memory, ["memcpy", "strcpy", "sprintf", "alloca"]), // Stack-based overflow
["CWE-122"] = new(SinkCategory.Memory, ["memcpy", "realloc", "malloc", "calloc"]), // Heap-based overflow
["CWE-787"] = new(SinkCategory.Memory, ["memcpy", "memmove", "memset", "memchr"]), // Out-of-bounds write
["CWE-788"] = new(SinkCategory.Memory, ["memcpy", "memmove"]), // Access of memory beyond end of buffer
// Use after free / double free
["CWE-416"] = new(SinkCategory.Memory, ["free", "delete", "realloc"]), // Use after free
["CWE-415"] = new(SinkCategory.Memory, ["free", "delete"]), // Double free
["CWE-401"] = new(SinkCategory.Memory, ["malloc", "calloc", "realloc", "new"]), // Memory leak
// Command injection
["CWE-78"] = new(SinkCategory.CommandInjection, ["system", "exec", "execl", "execle", "execlp", "execv", "execve", "execvp", "popen", "ShellExecute", "CreateProcess"]),
["CWE-77"] = new(SinkCategory.CommandInjection, ["system", "exec", "popen", "eval"]),
// Code injection
["CWE-94"] = new(SinkCategory.CodeInjection, ["eval", "exec", "compile", "dlopen", "LoadLibrary", "GetProcAddress"]),
["CWE-95"] = new(SinkCategory.CodeInjection, ["eval"]), // Eval injection
// SQL injection
["CWE-89"] = new(SinkCategory.SqlInjection, ["sqlite3_exec", "mysql_query", "mysql_real_query", "PQexec", "PQexecParams", "execute", "executeQuery"]),
// Path traversal
["CWE-22"] = new(SinkCategory.PathTraversal, ["fopen", "open", "access", "stat", "lstat", "readlink", "realpath", "chdir", "mkdir", "rmdir", "unlink"]),
["CWE-23"] = new(SinkCategory.PathTraversal, ["fopen", "open"]), // Relative path traversal
["CWE-36"] = new(SinkCategory.PathTraversal, ["fopen", "open", "stat"]), // Absolute path traversal
// Integer issues
["CWE-190"] = new(SinkCategory.Memory, ["malloc", "calloc", "realloc", "memcpy"]), // Integer overflow
["CWE-191"] = new(SinkCategory.Memory, ["malloc", "memcpy"]), // Integer underflow
["CWE-681"] = new(SinkCategory.Memory, ["malloc", "realloc"]), // Incorrect conversion
// Format string
["CWE-134"] = new(SinkCategory.Memory, ["printf", "fprintf", "sprintf", "snprintf", "vprintf", "vsprintf", "syslog"]),
// Network
["CWE-319"] = new(SinkCategory.Network, ["send", "sendto", "write", "connect"]), // Cleartext transmission
["CWE-295"] = new(SinkCategory.Network, ["SSL_connect", "SSL_accept", "SSL_read", "SSL_write"]), // Improper cert validation
// Crypto
["CWE-326"] = new(SinkCategory.Crypto, ["EVP_EncryptInit", "EVP_DecryptInit", "DES_set_key"]), // Inadequate encryption strength
["CWE-327"] = new(SinkCategory.Crypto, ["MD5", "SHA1", "DES", "RC4", "rand"]), // Broken or risky crypto algorithm
["CWE-328"] = new(SinkCategory.Crypto, ["MD5", "SHA1"]), // Reversible one-way hash
// NULL pointer
["CWE-476"] = new(SinkCategory.Memory, ["memcpy", "strcpy", "strcmp", "strlen"]), // NULL pointer dereference
// Race conditions
["CWE-362"] = new(SinkCategory.Memory, ["open", "fopen", "access", "stat"]), // Race condition
// Information exposure
["CWE-200"] = new(SinkCategory.Network, ["printf", "fprintf", "send", "write", "syslog"]), // Exposure of sensitive info
};
return mappings.ToFrozenDictionary();
}
private sealed record SinkMapping(string Category, ImmutableArray<string> Sinks);
}

View File

@@ -0,0 +1,181 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Immutable;
using System.Globalization;
using System.Text.RegularExpressions;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring.Extractors;
/// <summary>
/// Extracts function hints from vulnerability descriptions.
/// </summary>
public static partial class FunctionHintExtractor
{
/// <summary>
/// Extracts function hints from an advisory description.
/// </summary>
/// <param name="description">The advisory description text.</param>
/// <param name="source">Source identifier for the hints.</param>
/// <returns>Array of function hints with confidence scores.</returns>
public static ImmutableArray<FunctionHint> ExtractFromDescription(string description, string source)
{
if (string.IsNullOrWhiteSpace(description))
return [];
var hints = new Dictionary<string, decimal>(StringComparer.OrdinalIgnoreCase);
// High confidence patterns
ExtractWithPattern(description, InTheFunctionPattern(), hints, 0.9m);
ExtractWithPattern(description, FunctionParenPattern(), hints, 0.85m);
ExtractWithPattern(description, VulnerabilityInPattern(), hints, 0.8m);
// Medium confidence patterns
ExtractWithPattern(description, AllowsViaPattern(), hints, 0.7m);
ExtractWithPattern(description, ViaThePattern(), hints, 0.65m);
ExtractWithPattern(description, CallingPattern(), hints, 0.6m);
// Lower confidence - simple function name mentions
ExtractWithPattern(description, PossibleFunctionPattern(), hints, 0.4m);
// Filter out common false positives
var filtered = hints
.Where(kv => !IsFalsePositive(kv.Key))
.Where(kv => IsValidFunctionName(kv.Key))
.Select(kv => new FunctionHint
{
Name = kv.Key,
Confidence = kv.Value,
Source = source
})
.OrderByDescending(h => h.Confidence)
.ThenBy(h => h.Name, StringComparer.Ordinal)
.ToImmutableArray();
return filtered;
}
/// <summary>
/// Extracts function hints from a commit message.
/// </summary>
/// <param name="message">The commit message.</param>
/// <param name="source">Source identifier.</param>
/// <returns>Array of function hints.</returns>
public static ImmutableArray<FunctionHint> ExtractFromCommitMessage(string message, string source)
{
if (string.IsNullOrWhiteSpace(message))
return [];
var hints = new Dictionary<string, decimal>(StringComparer.OrdinalIgnoreCase);
// Fix patterns in commit messages
ExtractWithPattern(message, FixInPattern(), hints, 0.85m);
ExtractWithPattern(message, PatchPattern(), hints, 0.8m);
ExtractWithPattern(message, FunctionParenPattern(), hints, 0.75m);
var filtered = hints
.Where(kv => !IsFalsePositive(kv.Key))
.Where(kv => IsValidFunctionName(kv.Key))
.Select(kv => new FunctionHint
{
Name = kv.Key,
Confidence = kv.Value,
Source = source
})
.OrderByDescending(h => h.Confidence)
.ThenBy(h => h.Name, StringComparer.Ordinal)
.ToImmutableArray();
return filtered;
}
private static void ExtractWithPattern(
string text,
Regex pattern,
Dictionary<string, decimal> hints,
decimal confidence)
{
foreach (Match match in pattern.Matches(text))
{
var functionName = match.Groups["func"].Value.Trim();
if (!string.IsNullOrEmpty(functionName))
{
// Keep the highest confidence for each function
if (!hints.TryGetValue(functionName, out var existing) || existing < confidence)
{
hints[functionName] = confidence;
}
}
}
}
private static bool IsFalsePositive(string name)
{
// Common words that aren't function names
var falsePositives = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
"a", "an", "the", "is", "it", "in", "on", "to", "of",
"remote", "local", "attacker", "user", "server", "client",
"buffer", "overflow", "memory", "heap", "stack", "null",
"pointer", "integer", "string", "array", "data", "input",
"output", "file", "path", "url", "request", "response",
"allows", "could", "may", "might", "can", "will", "would",
"execute", "code", "arbitrary", "denial", "service", "dos",
"via", "through", "using", "with", "from", "into",
"CVE", "CWE", "CVSS", "NVD", "GHSA", "OSV"
};
return falsePositives.Contains(name);
}
private static bool IsValidFunctionName(string name)
{
// Must be 2-64 characters
if (name.Length < 2 || name.Length > 64)
return false;
// Must start with letter or underscore
if (!char.IsLetter(name[0]) && name[0] != '_')
return false;
// Must contain only valid identifier characters
return name.All(c => char.IsLetterOrDigit(c) || c == '_');
}
// Compiled regex patterns for performance
/// <summary>Pattern: "in the X function"</summary>
[GeneratedRegex(@"in\s+the\s+(?<func>\w+)\s+function", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex InTheFunctionPattern();
/// <summary>Pattern: "X() function" or "X()"</summary>
[GeneratedRegex(@"(?<func>\w+)\s*\(\s*\)", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex FunctionParenPattern();
/// <summary>Pattern: "vulnerability in X"</summary>
[GeneratedRegex(@"vulnerability\s+in\s+(?<func>\w+)", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex VulnerabilityInPattern();
/// <summary>Pattern: "allows X via"</summary>
[GeneratedRegex(@"allows\s+\w+\s+via\s+(?<func>\w+)", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex AllowsViaPattern();
/// <summary>Pattern: "via the X"</summary>
[GeneratedRegex(@"via\s+the\s+(?<func>\w+)", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex ViaThePattern();
/// <summary>Pattern: "calling X"</summary>
[GeneratedRegex(@"calling\s+(?<func>\w+)", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex CallingPattern();
/// <summary>Pattern: possible function name (snake_case or camelCase)</summary>
[GeneratedRegex(@"\b(?<func>[a-z][a-z0-9]*(?:_[a-z0-9]+)+)\b", RegexOptions.Compiled)]
private static partial Regex PossibleFunctionPattern();
/// <summary>Pattern: "fix in X" or "fixed X"</summary>
[GeneratedRegex(@"fix(?:ed)?\s+(?:in\s+)?(?<func>\w+)", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex FixInPattern();
/// <summary>Pattern: "patch X"</summary>
[GeneratedRegex(@"patch\s+(?<func>\w+)", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex PatchPattern();
}

View File

@@ -0,0 +1,197 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring.Extractors;
/// <summary>
/// Interface for source-specific golden set extractors (NVD, OSV, GHSA).
/// </summary>
public interface IGoldenSetSourceExtractor
{
/// <summary>
/// The source type this extractor handles.
/// </summary>
string SourceType { get; }
/// <summary>
/// Extracts golden set data from this source.
/// </summary>
/// <param name="vulnerabilityId">The vulnerability ID.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Source extraction result.</returns>
Task<SourceExtractionResult> ExtractAsync(
string vulnerabilityId,
CancellationToken ct);
/// <summary>
/// Checks if this extractor supports the given vulnerability ID format.
/// </summary>
/// <param name="vulnerabilityId">The vulnerability ID to check.</param>
/// <returns>True if supported; otherwise, false.</returns>
bool Supports(string vulnerabilityId);
}
/// <summary>
/// Result from a single source extractor.
/// </summary>
public sealed record SourceExtractionResult
{
/// <summary>
/// Whether extraction found data.
/// </summary>
public required bool Found { get; init; }
/// <summary>
/// Source information.
/// </summary>
public required ExtractionSource Source { get; init; }
/// <summary>
/// Extracted component name.
/// </summary>
public string? Component { get; init; }
/// <summary>
/// Version range(s) affected.
/// </summary>
public ImmutableArray<VersionRange> AffectedVersions { get; init; } = [];
/// <summary>
/// Function hints extracted from the description.
/// </summary>
public ImmutableArray<FunctionHint> FunctionHints { get; init; } = [];
/// <summary>
/// Sink categories based on CWE mapping.
/// </summary>
public ImmutableArray<string> SinkCategories { get; init; } = [];
/// <summary>
/// Commit references to fix commits.
/// </summary>
public ImmutableArray<CommitReference> CommitReferences { get; init; } = [];
/// <summary>
/// CWE IDs associated with the vulnerability.
/// </summary>
public ImmutableArray<string> CweIds { get; init; } = [];
/// <summary>
/// Severity level (critical, high, medium, low).
/// </summary>
public string? Severity { get; init; }
/// <summary>
/// CVSS v3 score (if available).
/// </summary>
public decimal? CvssScore { get; init; }
/// <summary>
/// Advisory description text.
/// </summary>
public string? Description { get; init; }
/// <summary>
/// Related CVEs (if any).
/// </summary>
public ImmutableArray<string> RelatedCves { get; init; } = [];
/// <summary>
/// Warnings encountered during extraction.
/// </summary>
public ImmutableArray<string> Warnings { get; init; } = [];
/// <summary>
/// Creates a not-found result.
/// </summary>
public static SourceExtractionResult NotFound(string vulnerabilityId, string sourceType, TimeProvider timeProvider)
=> new()
{
Found = false,
Source = new ExtractionSource
{
Type = sourceType,
Reference = vulnerabilityId,
FetchedAt = timeProvider.GetUtcNow()
}
};
}
/// <summary>
/// A version range for affected versions.
/// </summary>
public sealed record VersionRange
{
/// <summary>
/// Minimum affected version (inclusive, null = unbounded).
/// </summary>
public string? MinVersion { get; init; }
/// <summary>
/// Maximum affected version (exclusive, null = unbounded).
/// </summary>
public string? MaxVersion { get; init; }
/// <summary>
/// Fixed version (if known).
/// </summary>
public string? FixedVersion { get; init; }
/// <summary>
/// Ecosystem (e.g., npm, pypi, golang, cargo).
/// </summary>
public string? Ecosystem { get; init; }
}
/// <summary>
/// A hint about a potentially vulnerable function.
/// </summary>
public sealed record FunctionHint
{
/// <summary>
/// Function name.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Confidence in this hint (0.0 - 1.0).
/// </summary>
public required decimal Confidence { get; init; }
/// <summary>
/// How this hint was extracted.
/// </summary>
public required string Source { get; init; }
/// <summary>
/// Optional source file path.
/// </summary>
public string? SourceFile { get; init; }
}
/// <summary>
/// A reference to a fix commit.
/// </summary>
public sealed record CommitReference
{
/// <summary>
/// URL to the commit.
/// </summary>
public required string Url { get; init; }
/// <summary>
/// Commit hash (if extractable).
/// </summary>
public string? Hash { get; init; }
/// <summary>
/// Repository host (github, gitlab, etc.).
/// </summary>
public string? Host { get; init; }
/// <summary>
/// Whether this is confirmed to be a fix commit.
/// </summary>
public bool IsConfirmedFix { get; init; }
}

View File

@@ -0,0 +1,149 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Immutable;
using System.Globalization;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring.Extractors;
/// <summary>
/// Extracts golden set data from NVD (National Vulnerability Database).
/// </summary>
public sealed partial class NvdGoldenSetExtractor : IGoldenSetSourceExtractor
{
private readonly TimeProvider _timeProvider;
private readonly ILogger<NvdGoldenSetExtractor> _logger;
public NvdGoldenSetExtractor(
TimeProvider timeProvider,
ILogger<NvdGoldenSetExtractor> logger)
{
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public string SourceType => ExtractionSourceTypes.Nvd;
/// <inheritdoc />
public bool Supports(string vulnerabilityId)
{
// NVD supports CVE IDs
return CveIdPattern().IsMatch(vulnerabilityId);
}
/// <inheritdoc />
public async Task<SourceExtractionResult> ExtractAsync(
string vulnerabilityId,
CancellationToken ct)
{
ArgumentException.ThrowIfNullOrWhiteSpace(vulnerabilityId);
_logger.LogDebug("Extracting from NVD for {VulnerabilityId}", vulnerabilityId);
// TODO: Implement actual NVD API call
// For now, return a stub result indicating the API needs implementation
await Task.CompletedTask;
var source = new ExtractionSource
{
Type = SourceType,
Reference = string.Format(
CultureInfo.InvariantCulture,
"https://nvd.nist.gov/vuln/detail/{0}",
vulnerabilityId),
FetchedAt = _timeProvider.GetUtcNow()
};
// Return not found for now - real implementation would fetch from NVD
return new SourceExtractionResult
{
Found = false,
Source = source,
Warnings = ["NVD API integration not yet implemented. Please use manual extraction."]
};
}
/// <summary>
/// Extracts function hints from a CVE description.
/// </summary>
internal static ImmutableArray<FunctionHint> ExtractFunctionHintsFromDescription(
string description,
string source)
{
return FunctionHintExtractor.ExtractFromDescription(description, source);
}
/// <summary>
/// Maps CWE IDs to sink functions.
/// </summary>
internal static ImmutableArray<string> MapCweToSinks(ImmutableArray<string> cweIds)
{
return CweToSinkMapper.GetSinksForCwes(cweIds);
}
/// <summary>
/// Extracts commit references from NVD references.
/// </summary>
internal static ImmutableArray<CommitReference> ExtractCommitReferences(IEnumerable<string> referenceUrls)
{
var commits = new List<CommitReference>();
foreach (var url in referenceUrls)
{
if (IsCommitUrl(url, out var host, out var hash))
{
commits.Add(new CommitReference
{
Url = url,
Hash = hash,
Host = host,
IsConfirmedFix = url.Contains("fix", StringComparison.OrdinalIgnoreCase) ||
url.Contains("patch", StringComparison.OrdinalIgnoreCase)
});
}
}
return [.. commits];
}
private static bool IsCommitUrl(string url, out string? host, out string? hash)
{
host = null;
hash = null;
if (string.IsNullOrWhiteSpace(url))
return false;
// GitHub commit URL pattern
var githubMatch = GitHubCommitPattern().Match(url);
if (githubMatch.Success)
{
host = "github";
hash = githubMatch.Groups["hash"].Value;
return true;
}
// GitLab commit URL pattern
var gitlabMatch = GitLabCommitPattern().Match(url);
if (gitlabMatch.Success)
{
host = "gitlab";
hash = gitlabMatch.Groups["hash"].Value;
return true;
}
return false;
}
[GeneratedRegex(@"^CVE-\d{4}-\d{4,}$", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex CveIdPattern();
[GeneratedRegex(@"github\.com/[^/]+/[^/]+/commit/(?<hash>[a-f0-9]{7,40})", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex GitHubCommitPattern();
[GeneratedRegex(@"gitlab\.com/[^/]+/[^/]+/-/commit/(?<hash>[a-f0-9]{7,40})", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex GitLabCommitPattern();
}

View File

@@ -0,0 +1,281 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Immutable;
using System.Globalization;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring;
/// <summary>
/// Default implementation of <see cref="IGoldenSetEnrichmentService"/>.
/// Integrates with AdvisoryAI for AI-powered enrichment.
/// </summary>
public sealed class GoldenSetEnrichmentService : IGoldenSetEnrichmentService
{
private readonly IUpstreamCommitAnalyzer _commitAnalyzer;
private readonly GoldenSetOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<GoldenSetEnrichmentService> _logger;
public GoldenSetEnrichmentService(
IUpstreamCommitAnalyzer commitAnalyzer,
IOptions<GoldenSetOptions> options,
TimeProvider timeProvider,
ILogger<GoldenSetEnrichmentService> logger)
{
_commitAnalyzer = commitAnalyzer;
_options = options.Value;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public bool IsAvailable => _options.Authoring.EnableAiEnrichment;
/// <inheritdoc />
public async Task<GoldenSetEnrichmentResult> EnrichAsync(
GoldenSetDefinition draft,
GoldenSetEnrichmentContext context,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(draft);
ArgumentNullException.ThrowIfNull(context);
if (!IsAvailable)
{
_logger.LogDebug("AI enrichment is disabled");
return GoldenSetEnrichmentResult.NoChanges(draft, "AI enrichment is disabled");
}
_logger.LogInformation("Starting AI enrichment for {VulnerabilityId}", draft.Id);
var actions = new List<EnrichmentAction>();
var warnings = new List<string>();
var enrichedDraft = draft;
// Step 1: Enrich from commit analysis
if (context.CommitAnalysis is not null)
{
var (commitEnriched, commitActions) = ApplyCommitAnalysis(enrichedDraft, context.CommitAnalysis);
enrichedDraft = commitEnriched;
actions.AddRange(commitActions);
}
// Step 2: Enrich from CWE mappings
if (!context.CweIds.IsEmpty)
{
var (cweEnriched, cweActions) = ApplyCweEnrichment(enrichedDraft, context.CweIds);
enrichedDraft = cweEnriched;
actions.AddRange(cweActions);
}
// Step 3: AI-powered enrichment (if available)
// Note: This is where we would call AdvisoryAI service
// For now, we use heuristic-based enrichment only
if (_options.Authoring.EnableAiEnrichment && context.FixCommits.Length > 0)
{
var (aiEnriched, aiActions, aiWarnings) = await ApplyAiEnrichmentAsync(
enrichedDraft, context, ct);
enrichedDraft = aiEnriched;
actions.AddRange(aiActions);
warnings.AddRange(aiWarnings);
}
// Calculate overall confidence
var overallConfidence = CalculateOverallConfidence(actions);
_logger.LogInformation(
"Enrichment complete for {VulnerabilityId}: {ActionCount} actions, {Confidence:P0} confidence",
draft.Id, actions.Count, overallConfidence);
return new GoldenSetEnrichmentResult
{
EnrichedDraft = enrichedDraft,
ActionsApplied = [.. actions],
OverallConfidence = overallConfidence,
Warnings = [.. warnings]
};
}
private static (GoldenSetDefinition, ImmutableArray<EnrichmentAction>) ApplyCommitAnalysis(
GoldenSetDefinition draft,
CommitAnalysisResult analysis)
{
var actions = new List<EnrichmentAction>();
// Add functions from commit analysis
var existingFunctions = draft.Targets
.Select(t => t.FunctionName)
.ToHashSet(StringComparer.OrdinalIgnoreCase);
var newTargets = new List<VulnerableTarget>(draft.Targets);
foreach (var func in analysis.ModifiedFunctions)
{
if (existingFunctions.Contains(func) || func == "<unknown>")
continue;
var newTarget = new VulnerableTarget
{
FunctionName = func,
Sinks = draft.Targets.FirstOrDefault()?.Sinks ?? []
};
newTargets.Add(newTarget);
existingFunctions.Add(func);
actions.Add(new EnrichmentAction
{
Type = EnrichmentActionTypes.FunctionAdded,
Target = "targets",
Value = func,
Confidence = 0.7m,
Rationale = "Function modified in fix commit"
});
}
// Add constants from commit analysis
for (var i = 0; i < newTargets.Count; i++)
{
var target = newTargets[i];
var existingConstants = target.Constants.ToHashSet(StringComparer.Ordinal);
var additionalConstants = analysis.AddedConstants
.Where(c => !existingConstants.Contains(c))
.Take(5) // Limit to avoid noise
.ToImmutableArray();
if (!additionalConstants.IsEmpty)
{
newTargets[i] = target with
{
Constants = target.Constants.AddRange(additionalConstants)
};
foreach (var constant in additionalConstants)
{
actions.Add(new EnrichmentAction
{
Type = EnrichmentActionTypes.ConstantExtracted,
Target = string.Format(CultureInfo.InvariantCulture, "targets[{0}].constants", i),
Value = constant,
Confidence = 0.6m,
Rationale = "Constant found in fix commit"
});
}
}
}
// Remove placeholder target if we have real ones
if (newTargets.Count > 1 && newTargets.Any(t => t.FunctionName == "<unknown>"))
{
newTargets.RemoveAll(t => t.FunctionName == "<unknown>");
}
var enrichedDraft = draft with
{
Targets = [.. newTargets]
};
return (enrichedDraft, [.. actions]);
}
private static (GoldenSetDefinition, ImmutableArray<EnrichmentAction>) ApplyCweEnrichment(
GoldenSetDefinition draft,
ImmutableArray<string> cweIds)
{
var actions = new List<EnrichmentAction>();
// Get sinks from CWE mappings
var mappedSinks = Extractors.CweToSinkMapper.GetSinksForCwes(cweIds);
if (mappedSinks.IsEmpty)
{
return (draft, []);
}
var enrichedTargets = draft.Targets.Select((target, index) =>
{
var existingSinks = target.Sinks.ToHashSet(StringComparer.Ordinal);
var newSinks = mappedSinks
.Where(s => !existingSinks.Contains(s))
.ToImmutableArray();
if (newSinks.IsEmpty)
{
return target;
}
foreach (var sink in newSinks)
{
actions.Add(new EnrichmentAction
{
Type = EnrichmentActionTypes.SinkAdded,
Target = string.Format(CultureInfo.InvariantCulture, "targets[{0}].sinks", index),
Value = sink,
Confidence = 0.65m,
Rationale = "Mapped from CWE classification"
});
}
return target with
{
Sinks = target.Sinks.AddRange(newSinks)
};
}).ToImmutableArray();
var enrichedDraft = draft with
{
Targets = enrichedTargets
};
return (enrichedDraft, [.. actions]);
}
private async Task<(GoldenSetDefinition, ImmutableArray<EnrichmentAction>, ImmutableArray<string>)> ApplyAiEnrichmentAsync(
GoldenSetDefinition draft,
GoldenSetEnrichmentContext context,
CancellationToken ct)
{
// Note: This is a placeholder for actual AI integration
// In production, this would call the AdvisoryAI service
// For now, return the draft unchanged
_logger.LogDebug(
"AI enrichment placeholder - would call AdvisoryAI with {CommitCount} commits",
context.FixCommits.Length);
await Task.CompletedTask;
return (draft, [], ["AI enrichment not yet integrated with AdvisoryAI service"]);
}
private static decimal CalculateOverallConfidence(List<EnrichmentAction> actions)
{
if (actions.Count == 0)
return 0;
// Weight function-related actions higher
var weightedSum = 0m;
var totalWeight = 0m;
foreach (var action in actions)
{
var weight = action.Type switch
{
EnrichmentActionTypes.FunctionAdded => 2.0m,
EnrichmentActionTypes.FunctionRefined => 2.0m,
EnrichmentActionTypes.SinkAdded => 1.5m,
EnrichmentActionTypes.EdgeSuggested => 1.5m,
EnrichmentActionTypes.ConstantExtracted => 1.0m,
_ => 1.0m
};
weightedSum += action.Confidence * weight;
totalWeight += weight;
}
return totalWeight > 0 ? Math.Round(weightedSum / totalWeight, 2) : 0;
}
}

View File

@@ -0,0 +1,421 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Immutable;
using System.Globalization;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.BinaryIndex.GoldenSet.Authoring.Extractors;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring;
/// <summary>
/// Orchestrates golden set extraction from multiple sources.
/// </summary>
public sealed class GoldenSetExtractor : IGoldenSetExtractor
{
private readonly IEnumerable<IGoldenSetSourceExtractor> _sourceExtractors;
private readonly ISinkRegistry _sinkRegistry;
private readonly IOptions<GoldenSetOptions> _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<GoldenSetExtractor> _logger;
public GoldenSetExtractor(
IEnumerable<IGoldenSetSourceExtractor> sourceExtractors,
ISinkRegistry sinkRegistry,
IOptions<GoldenSetOptions> options,
TimeProvider timeProvider,
ILogger<GoldenSetExtractor> logger)
{
_sourceExtractors = sourceExtractors;
_sinkRegistry = sinkRegistry;
_options = options;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public async Task<GoldenSetExtractionResult> ExtractAsync(
string vulnerabilityId,
string? component = null,
ExtractionOptions? options = null,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(vulnerabilityId);
options ??= new ExtractionOptions();
_logger.LogInformation(
"Starting golden set extraction for {VulnerabilityId}",
vulnerabilityId);
var sources = new List<ExtractionSource>();
var sourceResults = new List<SourceExtractionResult>();
var warnings = new List<string>();
// Extract from all applicable sources
var applicableExtractors = _sourceExtractors
.Where(e => e.Supports(vulnerabilityId))
.Where(e => options.Sources.Length == 0 || options.Sources.Contains(e.SourceType, StringComparer.OrdinalIgnoreCase));
foreach (var extractor in applicableExtractors)
{
try
{
_logger.LogDebug(
"Extracting from source {SourceType} for {VulnerabilityId}",
extractor.SourceType,
vulnerabilityId);
var result = await extractor.ExtractAsync(vulnerabilityId, ct);
if (result.Found)
{
sourceResults.Add(result);
sources.Add(result.Source);
}
warnings.AddRange(result.Warnings);
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
_logger.LogWarning(
ex,
"Failed to extract from {SourceType} for {VulnerabilityId}",
extractor.SourceType,
vulnerabilityId);
warnings.Add(string.Format(
CultureInfo.InvariantCulture,
"Failed to extract from {0}: {1}",
extractor.SourceType,
ex.Message));
}
}
if (sourceResults.Count == 0)
{
_logger.LogWarning(
"No data found for {VulnerabilityId} from any source",
vulnerabilityId);
return CreateEmptyResult(vulnerabilityId, component ?? "unknown", warnings);
}
// Merge results and create draft
var draft = CreateDraftFromResults(vulnerabilityId, component, sourceResults);
var confidence = CalculateConfidence(draft, sourceResults);
var suggestions = GenerateSuggestions(draft, sourceResults);
_logger.LogInformation(
"Extraction complete for {VulnerabilityId}: {TargetCount} targets, {Confidence:P0} confidence",
vulnerabilityId,
draft.Targets.Length,
confidence.Overall);
return new GoldenSetExtractionResult
{
Draft = draft,
Confidence = confidence,
Sources = [.. sources],
Suggestions = suggestions,
Warnings = [.. warnings]
};
}
/// <inheritdoc />
public async Task<GoldenSetExtractionResult> EnrichAsync(
GoldenSetDefinition draft,
EnrichmentOptions? options = null,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(draft);
// For now, just return the draft with some basic enrichment
// AI enrichment will be added in a separate service
options ??= new EnrichmentOptions();
_logger.LogInformation(
"Enriching golden set {VulnerabilityId}",
draft.Id);
// Add any missing sinks based on existing function hints
var enrichedTargets = draft.Targets
.Select(t => EnrichTarget(t))
.ToImmutableArray();
var enrichedDraft = draft with
{
Targets = enrichedTargets
};
var confidence = CalculateConfidence(enrichedDraft, []);
return new GoldenSetExtractionResult
{
Draft = enrichedDraft,
Confidence = confidence,
Sources = [],
Suggestions = [],
Warnings = []
};
}
private VulnerableTarget EnrichTarget(VulnerableTarget target)
{
// If no sinks, try to suggest based on function name patterns
if (target.Sinks.Length == 0)
{
var suggestedSinks = GuessSinksFromFunction(target.FunctionName);
if (suggestedSinks.Length > 0)
{
return target with { Sinks = suggestedSinks };
}
}
return target;
}
private ImmutableArray<string> GuessSinksFromFunction(string functionName)
{
// Common patterns that suggest certain sinks
var patterns = new Dictionary<string, string[]>(StringComparer.OrdinalIgnoreCase)
{
["parse"] = ["memcpy", "strcpy"],
["copy"] = ["memcpy", "strcpy"],
["decode"] = ["memcpy"],
["read"] = ["memcpy", "fread"],
["write"] = ["memcpy", "fwrite"],
["alloc"] = ["malloc", "realloc"],
["free"] = ["free"],
["exec"] = ["system", "exec"],
["sql"] = ["sqlite3_exec", "mysql_query"],
["query"] = ["sqlite3_exec", "mysql_query"],
["open"] = ["fopen", "open"],
};
foreach (var (pattern, sinks) in patterns)
{
if (functionName.Contains(pattern, StringComparison.OrdinalIgnoreCase))
{
return [.. sinks];
}
}
return [];
}
private GoldenSetDefinition CreateDraftFromResults(
string vulnerabilityId,
string? component,
List<SourceExtractionResult> results)
{
// Merge component from results if not specified
var mergedComponent = component ?? results
.Select(r => r.Component)
.FirstOrDefault(c => !string.IsNullOrEmpty(c)) ?? "unknown";
// Merge all function hints
var allHints = results
.SelectMany(r => r.FunctionHints)
.GroupBy(h => h.Name, StringComparer.OrdinalIgnoreCase)
.Select(g => g.OrderByDescending(h => h.Confidence).First())
.OrderByDescending(h => h.Confidence)
.ToList();
// Merge all sinks from CWE mappings
var allCweIds = results.SelectMany(r => r.CweIds).Distinct().ToList();
var mappedSinks = CweToSinkMapper.GetSinksForCwes(allCweIds);
// Create targets from function hints
var targets = allHints
.Take(10) // Limit to top 10 functions
.Select(h => new VulnerableTarget
{
FunctionName = h.Name,
Sinks = mappedSinks,
SourceFile = h.SourceFile
})
.ToImmutableArray();
// If no function hints, create a placeholder target
if (targets.Length == 0)
{
targets = [new VulnerableTarget
{
FunctionName = "<unknown>",
Sinks = mappedSinks
}];
}
// Get severity from results
var severity = results
.Select(r => r.Severity)
.FirstOrDefault(s => !string.IsNullOrEmpty(s));
var tags = new List<string>();
if (!string.IsNullOrEmpty(severity))
{
tags.Add(severity.ToLowerInvariant());
}
tags.AddRange(CweToSinkMapper.GetCategoriesForCwes(allCweIds));
return new GoldenSetDefinition
{
Id = vulnerabilityId,
Component = mergedComponent,
Targets = targets,
Metadata = new GoldenSetMetadata
{
AuthorId = "extraction-service",
CreatedAt = _timeProvider.GetUtcNow(),
SourceRef = string.Join(", ", results.Select(r => r.Source.Reference)),
Tags = [.. tags.Distinct().OrderBy(t => t, StringComparer.Ordinal)]
}
};
}
private static ExtractionConfidence CalculateConfidence(
GoldenSetDefinition draft,
List<SourceExtractionResult> results)
{
// Function identification confidence
var funcConfidence = draft.Targets
.Where(t => t.FunctionName != "<unknown>")
.Select(t => 1.0m)
.DefaultIfEmpty(0m)
.Average();
// Edge extraction confidence (none extracted yet)
var edgeConfidence = draft.Targets
.Where(t => t.Edges.Length > 0)
.Select(t => 0.8m)
.DefaultIfEmpty(0m)
.Average();
// Sink mapping confidence
var sinkConfidence = draft.Targets
.Where(t => t.Sinks.Length > 0)
.Select(t => 0.7m)
.DefaultIfEmpty(0m)
.Average();
// Boost confidence if we have multiple sources
var sourceBonus = results.Count > 1 ? 0.1m : 0m;
return ExtractionConfidence.FromComponents(
Math.Min(1.0m, (decimal)funcConfidence + sourceBonus),
(decimal)edgeConfidence,
(decimal)sinkConfidence);
}
private static ImmutableArray<ExtractionSuggestion> GenerateSuggestions(
GoldenSetDefinition draft,
List<SourceExtractionResult> results)
{
var suggestions = new List<ExtractionSuggestion>();
// Suggest adding edges if none present
if (draft.Targets.All(t => t.Edges.Length == 0))
{
suggestions.Add(new ExtractionSuggestion
{
Field = "targets[*].edges",
CurrentValue = null,
SuggestedValue = "Add basic block edges from CFG analysis",
Confidence = 0.9m,
Rationale = "No edges defined. Consider adding control flow edges from binary analysis."
});
}
// Suggest reviewing unknown functions
if (draft.Targets.Any(t => t.FunctionName == "<unknown>"))
{
suggestions.Add(new ExtractionSuggestion
{
Field = "targets[*].function_name",
CurrentValue = "<unknown>",
SuggestedValue = "Identify specific vulnerable function",
Confidence = 0.95m,
Rationale = "Could not identify vulnerable function from advisory. Manual review required."
});
}
// Suggest adding witness if none present
if (draft.Witness is null)
{
suggestions.Add(new ExtractionSuggestion
{
Field = "witness",
CurrentValue = null,
SuggestedValue = "Add witness input for reproducibility",
Confidence = 0.7m,
Rationale = "No witness input defined. Adding reproduction steps improves golden set quality."
});
}
// Suggest commit analysis if commit refs found
var commitRefs = results.SelectMany(r => r.CommitReferences).ToList();
if (commitRefs.Count > 0)
{
suggestions.Add(new ExtractionSuggestion
{
Field = "targets",
CurrentValue = null,
SuggestedValue = string.Format(
CultureInfo.InvariantCulture,
"Analyze {0} fix commit(s) for more precise targets",
commitRefs.Count),
Confidence = 0.8m,
Rationale = "Fix commits are available. AI analysis can extract precise function names and edge patterns.",
Source = "upstream_commit"
});
}
return [.. suggestions];
}
private GoldenSetExtractionResult CreateEmptyResult(
string vulnerabilityId,
string component,
List<string> warnings)
{
var draft = new GoldenSetDefinition
{
Id = vulnerabilityId,
Component = component,
Targets = [new VulnerableTarget { FunctionName = "<unknown>" }],
Metadata = new GoldenSetMetadata
{
AuthorId = "extraction-service",
CreatedAt = _timeProvider.GetUtcNow(),
SourceRef = "none"
}
};
warnings.Add(string.Format(
CultureInfo.InvariantCulture,
"No data found for {0}. Manual authoring required.",
vulnerabilityId));
return new GoldenSetExtractionResult
{
Draft = draft,
Confidence = ExtractionConfidence.Zero,
Sources = [],
Suggestions =
[
new ExtractionSuggestion
{
Field = "targets",
CurrentValue = null,
SuggestedValue = "Manual entry required",
Confidence = 0.0m,
Rationale = "No automated extraction was possible. Please manually define the vulnerable targets."
}
],
Warnings = [.. warnings]
};
}
}

View File

@@ -0,0 +1,322 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Frozen;
using System.Collections.Immutable;
using System.Globalization;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring;
/// <summary>
/// Implementation of the golden set review workflow.
/// </summary>
public sealed class GoldenSetReviewService : IGoldenSetReviewService
{
private readonly IGoldenSetStore _store;
private readonly IGoldenSetValidator _validator;
private readonly TimeProvider _timeProvider;
private readonly ILogger<GoldenSetReviewService> _logger;
// Valid state transitions
private static readonly FrozenDictionary<GoldenSetStatus, FrozenSet<GoldenSetStatus>> ValidTransitions =
new Dictionary<GoldenSetStatus, FrozenSet<GoldenSetStatus>>
{
[GoldenSetStatus.Draft] = new HashSet<GoldenSetStatus>
{
GoldenSetStatus.InReview // Submit for review
}.ToFrozenSet(),
[GoldenSetStatus.InReview] = new HashSet<GoldenSetStatus>
{
GoldenSetStatus.Draft, // Request changes
GoldenSetStatus.Approved // Approve
}.ToFrozenSet(),
[GoldenSetStatus.Approved] = new HashSet<GoldenSetStatus>
{
GoldenSetStatus.Deprecated // Deprecate
}.ToFrozenSet(),
[GoldenSetStatus.Deprecated] = new HashSet<GoldenSetStatus>
{
GoldenSetStatus.Archived // Archive
}.ToFrozenSet(),
[GoldenSetStatus.Archived] = new HashSet<GoldenSetStatus>().ToFrozenSet() // Terminal state
}.ToFrozenDictionary();
public GoldenSetReviewService(
IGoldenSetStore store,
IGoldenSetValidator validator,
TimeProvider timeProvider,
ILogger<GoldenSetReviewService> logger)
{
_store = store;
_validator = validator;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public async Task<ReviewSubmissionResult> SubmitForReviewAsync(
string goldenSetId,
string submitterId,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(goldenSetId);
ArgumentException.ThrowIfNullOrWhiteSpace(submitterId);
_logger.LogInformation(
"Submitting golden set {GoldenSetId} for review by {SubmitterId}",
goldenSetId,
submitterId);
// Get current golden set
var goldenSet = await _store.GetAsync(goldenSetId, ct);
if (goldenSet is null)
{
return ReviewSubmissionResult.Failed(
string.Format(CultureInfo.InvariantCulture, "Golden set {0} not found", goldenSetId));
}
// Check current status allows submission
if (!IsValidTransition(goldenSet.Status, GoldenSetStatus.InReview))
{
return ReviewSubmissionResult.Failed(
string.Format(
CultureInfo.InvariantCulture,
"Cannot submit for review from status {0}. Must be in Draft status.",
goldenSet.Status));
}
// Validate the golden set before submission
var validationResult = await _validator.ValidateAsync(goldenSet.Definition, ct: ct);
if (!validationResult.IsValid)
{
return ReviewSubmissionResult.Failed(
"Validation failed. Please fix errors before submitting.",
[.. validationResult.Errors.Select(e => e.Message)]);
}
// Update status
var updateResult = await _store.UpdateStatusAsync(
goldenSetId,
GoldenSetStatus.InReview,
submitterId,
"Submitted for review",
ct);
if (!updateResult.Success)
{
return ReviewSubmissionResult.Failed(updateResult.Error ?? "Failed to update status");
}
_logger.LogInformation(
"Golden set {GoldenSetId} submitted for review",
goldenSetId);
return ReviewSubmissionResult.Successful(GoldenSetStatus.InReview);
}
/// <inheritdoc />
public async Task<ReviewDecisionResult> ApproveAsync(
string goldenSetId,
string reviewerId,
string? comments = null,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(goldenSetId);
ArgumentException.ThrowIfNullOrWhiteSpace(reviewerId);
_logger.LogInformation(
"Approving golden set {GoldenSetId} by {ReviewerId}",
goldenSetId,
reviewerId);
// Get current golden set
var goldenSet = await _store.GetAsync(goldenSetId, ct);
if (goldenSet is null)
{
return ReviewDecisionResult.Failed(
string.Format(CultureInfo.InvariantCulture, "Golden set {0} not found", goldenSetId));
}
// Check current status allows approval
if (!IsValidTransition(goldenSet.Status, GoldenSetStatus.Approved))
{
return ReviewDecisionResult.Failed(
string.Format(
CultureInfo.InvariantCulture,
"Cannot approve from status {0}. Must be in InReview status.",
goldenSet.Status));
}
// Update the definition with reviewer info
var reviewedDefinition = goldenSet.Definition with
{
Metadata = goldenSet.Definition.Metadata with
{
ReviewedBy = reviewerId,
ReviewedAt = _timeProvider.GetUtcNow()
}
};
// Store updated definition
var storeResult = await _store.StoreAsync(reviewedDefinition, goldenSet.Status, ct);
if (!storeResult.Success)
{
return ReviewDecisionResult.Failed(storeResult.Error ?? "Failed to update definition");
}
// Update status
var updateResult = await _store.UpdateStatusAsync(
goldenSetId,
GoldenSetStatus.Approved,
reviewerId,
comments ?? "Approved",
ct);
if (!updateResult.Success)
{
return ReviewDecisionResult.Failed(updateResult.Error ?? "Failed to update status");
}
_logger.LogInformation(
"Golden set {GoldenSetId} approved by {ReviewerId}",
goldenSetId,
reviewerId);
return ReviewDecisionResult.Successful(GoldenSetStatus.Approved);
}
/// <inheritdoc />
public async Task<ReviewDecisionResult> RequestChangesAsync(
string goldenSetId,
string reviewerId,
string comments,
ImmutableArray<ChangeRequest> changes,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(goldenSetId);
ArgumentException.ThrowIfNullOrWhiteSpace(reviewerId);
ArgumentException.ThrowIfNullOrWhiteSpace(comments);
_logger.LogInformation(
"Requesting changes for golden set {GoldenSetId} by {ReviewerId}",
goldenSetId,
reviewerId);
// Get current golden set
var goldenSet = await _store.GetAsync(goldenSetId, ct);
if (goldenSet is null)
{
return ReviewDecisionResult.Failed(
string.Format(CultureInfo.InvariantCulture, "Golden set {0} not found", goldenSetId));
}
// Check current status allows requesting changes
if (!IsValidTransition(goldenSet.Status, GoldenSetStatus.Draft))
{
return ReviewDecisionResult.Failed(
string.Format(
CultureInfo.InvariantCulture,
"Cannot request changes from status {0}. Must be in InReview status.",
goldenSet.Status));
}
// Format comment with change requests
var fullComment = FormatChangesComment(comments, changes);
// Update status back to draft
var updateResult = await _store.UpdateStatusAsync(
goldenSetId,
GoldenSetStatus.Draft,
reviewerId,
fullComment,
ct);
if (!updateResult.Success)
{
return ReviewDecisionResult.Failed(updateResult.Error ?? "Failed to update status");
}
_logger.LogInformation(
"Changes requested for golden set {GoldenSetId}. {ChangeCount} specific changes.",
goldenSetId,
changes.Length);
return ReviewDecisionResult.Successful(GoldenSetStatus.Draft);
}
/// <inheritdoc />
public async Task<ImmutableArray<ReviewHistoryEntry>> GetHistoryAsync(
string goldenSetId,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(goldenSetId);
// Get audit log from store
var auditLog = await _store.GetAuditLogAsync(goldenSetId, ct);
// Convert audit log entries to review history entries
var history = auditLog
.Select(entry => new ReviewHistoryEntry
{
Action = MapOperationToAction(entry.Operation),
ActorId = entry.ActorId,
Timestamp = entry.Timestamp,
OldStatus = entry.OldStatus,
NewStatus = entry.NewStatus,
Comments = entry.Comment
})
.ToImmutableArray();
return history;
}
/// <inheritdoc />
public bool IsValidTransition(GoldenSetStatus currentStatus, GoldenSetStatus targetStatus)
{
if (ValidTransitions.TryGetValue(currentStatus, out var validTargets))
{
return validTargets.Contains(targetStatus);
}
return false;
}
private static string FormatChangesComment(string comments, ImmutableArray<ChangeRequest> changes)
{
if (changes.Length == 0)
{
return comments;
}
var changeList = string.Join(
Environment.NewLine,
changes.Select(c => string.Format(
CultureInfo.InvariantCulture,
"- [{0}]: {1}",
c.Field,
c.Comment)));
return string.Format(
CultureInfo.InvariantCulture,
"{0}{1}{1}Requested changes:{1}{2}",
comments,
Environment.NewLine,
changeList);
}
private static string MapOperationToAction(string operation)
{
return operation.ToLowerInvariant() switch
{
"created" or "create" => ReviewActions.Created,
"updated" or "update" => ReviewActions.Updated,
"status_change" => ReviewActions.Updated,
_ => operation.ToLowerInvariant()
};
}
}

View File

@@ -0,0 +1,235 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring;
/// <summary>
/// Service for AI-assisted enrichment of golden sets.
/// </summary>
public interface IGoldenSetEnrichmentService
{
/// <summary>
/// Enriches a draft golden set using AI analysis.
/// </summary>
/// <param name="draft">The draft golden set to enrich.</param>
/// <param name="context">Context for enrichment (commits, advisory text, etc.).</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Enrichment result with updated draft.</returns>
Task<GoldenSetEnrichmentResult> EnrichAsync(
GoldenSetDefinition draft,
GoldenSetEnrichmentContext context,
CancellationToken ct = default);
/// <summary>
/// Checks if AI enrichment is available.
/// </summary>
bool IsAvailable { get; }
}
/// <summary>
/// Context provided to the AI for enrichment.
/// </summary>
public sealed record GoldenSetEnrichmentContext
{
/// <summary>
/// Fix commits to analyze.
/// </summary>
public ImmutableArray<AnalyzedCommit> FixCommits { get; init; } = [];
/// <summary>
/// Related CVEs.
/// </summary>
public ImmutableArray<string> RelatedCves { get; init; } = [];
/// <summary>
/// Advisory description text.
/// </summary>
public string? AdvisoryText { get; init; }
/// <summary>
/// Upstream source code snippets (if available).
/// </summary>
public string? UpstreamSourceCode { get; init; }
/// <summary>
/// CWE IDs associated with the vulnerability.
/// </summary>
public ImmutableArray<string> CweIds { get; init; } = [];
/// <summary>
/// Commit analysis result.
/// </summary>
public CommitAnalysisResult? CommitAnalysis { get; init; }
}
/// <summary>
/// Result of AI enrichment.
/// </summary>
public sealed record GoldenSetEnrichmentResult
{
/// <summary>
/// The enriched draft golden set.
/// </summary>
public required GoldenSetDefinition EnrichedDraft { get; init; }
/// <summary>
/// Actions applied during enrichment.
/// </summary>
public ImmutableArray<EnrichmentAction> ActionsApplied { get; init; } = [];
/// <summary>
/// Overall confidence in the enrichment.
/// </summary>
public decimal OverallConfidence { get; init; }
/// <summary>
/// AI's rationale for the enrichments.
/// </summary>
public string? AiRationale { get; init; }
/// <summary>
/// Warnings from the enrichment process.
/// </summary>
public ImmutableArray<string> Warnings { get; init; } = [];
/// <summary>
/// Creates a result with no changes.
/// </summary>
public static GoldenSetEnrichmentResult NoChanges(GoldenSetDefinition draft, string reason)
=> new()
{
EnrichedDraft = draft,
OverallConfidence = 0,
AiRationale = reason
};
}
/// <summary>
/// An action taken during enrichment.
/// </summary>
public sealed record EnrichmentAction
{
/// <summary>
/// Type of action (function_added, edge_suggested, sink_refined, constant_extracted).
/// </summary>
public required string Type { get; init; }
/// <summary>
/// Target of the action (field path or element).
/// </summary>
public required string Target { get; init; }
/// <summary>
/// Value set or suggested.
/// </summary>
public required string Value { get; init; }
/// <summary>
/// Confidence in this action (0.0 - 1.0).
/// </summary>
public required decimal Confidence { get; init; }
/// <summary>
/// Rationale for the action.
/// </summary>
public string? Rationale { get; init; }
}
/// <summary>
/// Known enrichment action types.
/// </summary>
public static class EnrichmentActionTypes
{
public const string FunctionAdded = "function_added";
public const string FunctionRefined = "function_refined";
public const string EdgeSuggested = "edge_suggested";
public const string SinkAdded = "sink_added";
public const string SinkRefined = "sink_refined";
public const string ConstantExtracted = "constant_extracted";
public const string WitnessHintAdded = "witness_hint_added";
public const string TaintInvariantSet = "taint_invariant_set";
}
/// <summary>
/// AI enrichment prompt templates.
/// </summary>
public static class EnrichmentPrompts
{
/// <summary>
/// System prompt for golden set enrichment.
/// </summary>
public const string SystemPrompt = """
You are a security vulnerability analyst specializing in binary analysis and golden set creation for vulnerability detection.
Your task is to analyze vulnerability information and identify specific code-level targets that can be used to detect the vulnerability in compiled binaries.
Focus on:
1. Identifying vulnerable functions from fix commits
2. Extracting specific constants, magic values, or buffer sizes from vulnerable code
3. Suggesting basic block edge patterns when fixes add bounds checks or branches
4. Identifying sink functions that enable exploitation
Be precise and conservative - only suggest targets with high confidence.
""";
/// <summary>
/// User prompt template for enrichment.
/// </summary>
public const string UserPromptTemplate = """
Analyze vulnerability {cve_id} in {component} to identify specific code-level targets.
## Advisory Information
{advisory_text}
## CWE Classifications
{cwe_ids}
## Fix Commits Analysis
Modified functions: {modified_functions}
Added conditions: {added_conditions}
Added constants: {added_constants}
## Current Draft Golden Set
{current_draft_yaml}
## Task
1. Identify the vulnerable function(s) from the fix commits
2. Extract specific constants/magic values that appear in the vulnerable code
3. Suggest basic block edge patterns if the fix adds bounds checks or branches
4. Identify the sink function(s) that enable exploitation
Respond with a JSON object:
```json
{
"functions": [
{
"name": "function_name",
"confidence": 0.95,
"rationale": "Modified in fix commit abc123"
}
],
"constants": [
{
"value": "0x400",
"confidence": 0.8,
"rationale": "Buffer size constant in bounds check"
}
],
"edge_suggestions": [
{
"pattern": "bounds_check_before_memcpy",
"confidence": 0.7,
"rationale": "Fix adds size validation before memory copy"
}
],
"sinks": [
{
"name": "memcpy",
"confidence": 0.9,
"rationale": "Called without size validation in vulnerable version"
}
]
}
```
""";
}

View File

@@ -0,0 +1,266 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring;
/// <summary>
/// Extracts golden set drafts from vulnerability advisories and upstream sources.
/// </summary>
public interface IGoldenSetExtractor
{
/// <summary>
/// Extracts a draft golden set from a CVE/advisory.
/// </summary>
/// <param name="vulnerabilityId">The vulnerability ID (CVE-*, GHSA-*, etc.).</param>
/// <param name="component">The component name (optional - can be auto-detected).</param>
/// <param name="options">Extraction options.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Extraction result with draft and metadata.</returns>
Task<GoldenSetExtractionResult> ExtractAsync(
string vulnerabilityId,
string? component = null,
ExtractionOptions? options = null,
CancellationToken ct = default);
/// <summary>
/// Enriches an existing draft with additional sources.
/// </summary>
/// <param name="draft">The existing draft to enrich.</param>
/// <param name="options">Enrichment options.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Enriched extraction result.</returns>
Task<GoldenSetExtractionResult> EnrichAsync(
GoldenSetDefinition draft,
EnrichmentOptions? options = null,
CancellationToken ct = default);
}
/// <summary>
/// Result of a golden set extraction operation.
/// </summary>
public sealed record GoldenSetExtractionResult
{
/// <summary>
/// The draft golden set definition.
/// </summary>
public required GoldenSetDefinition Draft { get; init; }
/// <summary>
/// Confidence scores for different aspects of the extraction.
/// </summary>
public required ExtractionConfidence Confidence { get; init; }
/// <summary>
/// Sources used during extraction.
/// </summary>
public ImmutableArray<ExtractionSource> Sources { get; init; } = [];
/// <summary>
/// Suggestions for improving the golden set.
/// </summary>
public ImmutableArray<ExtractionSuggestion> Suggestions { get; init; } = [];
/// <summary>
/// Warnings encountered during extraction.
/// </summary>
public ImmutableArray<string> Warnings { get; init; } = [];
/// <summary>
/// Whether extraction was successful (at least partial data found).
/// </summary>
public bool IsSuccess => Confidence.Overall > 0;
}
/// <summary>
/// Confidence scores for extraction quality.
/// </summary>
public sealed record ExtractionConfidence
{
/// <summary>
/// Overall confidence score (0.0 - 1.0).
/// </summary>
public required decimal Overall { get; init; }
/// <summary>
/// Confidence in function identification.
/// </summary>
public required decimal FunctionIdentification { get; init; }
/// <summary>
/// Confidence in edge extraction.
/// </summary>
public required decimal EdgeExtraction { get; init; }
/// <summary>
/// Confidence in sink mapping.
/// </summary>
public required decimal SinkMapping { get; init; }
/// <summary>
/// Creates a zero confidence result.
/// </summary>
public static ExtractionConfidence Zero => new()
{
Overall = 0,
FunctionIdentification = 0,
EdgeExtraction = 0,
SinkMapping = 0
};
/// <summary>
/// Creates a confidence result from component scores.
/// </summary>
public static ExtractionConfidence FromComponents(
decimal functionId,
decimal edgeExtraction,
decimal sinkMapping)
{
// Weighted average: functions most important, then sinks, then edges
var overall = (functionId * 0.5m) + (sinkMapping * 0.3m) + (edgeExtraction * 0.2m);
return new ExtractionConfidence
{
Overall = Math.Round(overall, 2),
FunctionIdentification = functionId,
EdgeExtraction = edgeExtraction,
SinkMapping = sinkMapping
};
}
}
/// <summary>
/// Information about a data source used during extraction.
/// </summary>
public sealed record ExtractionSource
{
/// <summary>
/// Source type (nvd, osv, ghsa, upstream_commit).
/// </summary>
public required string Type { get; init; }
/// <summary>
/// Reference URL or identifier.
/// </summary>
public required string Reference { get; init; }
/// <summary>
/// When the source was fetched.
/// </summary>
public required DateTimeOffset FetchedAt { get; init; }
/// <summary>
/// Optional version/etag of the source data.
/// </summary>
public string? Version { get; init; }
}
/// <summary>
/// A suggestion for improving a golden set.
/// </summary>
public sealed record ExtractionSuggestion
{
/// <summary>
/// Field path being suggested (e.g., "targets[0].sinks").
/// </summary>
public required string Field { get; init; }
/// <summary>
/// Current value (if any).
/// </summary>
public string? CurrentValue { get; init; }
/// <summary>
/// Suggested value.
/// </summary>
public required string SuggestedValue { get; init; }
/// <summary>
/// Confidence in this suggestion (0.0 - 1.0).
/// </summary>
public required decimal Confidence { get; init; }
/// <summary>
/// Human-readable rationale for the suggestion.
/// </summary>
public required string Rationale { get; init; }
/// <summary>
/// Source of the suggestion (ai, nvd, osv, etc.).
/// </summary>
public string? Source { get; init; }
}
/// <summary>
/// Options for golden set extraction.
/// </summary>
public sealed record ExtractionOptions
{
/// <summary>
/// Include analysis of upstream fix commits.
/// </summary>
public bool IncludeUpstreamCommits { get; init; } = true;
/// <summary>
/// Include related CVEs in the analysis.
/// </summary>
public bool IncludeRelatedCves { get; init; } = true;
/// <summary>
/// Use AI for enrichment.
/// </summary>
public bool UseAiEnrichment { get; init; } = true;
/// <summary>
/// Maximum number of upstream commits to analyze.
/// </summary>
public int MaxUpstreamCommits { get; init; } = 5;
/// <summary>
/// Sources to use for extraction (empty = all available).
/// </summary>
public ImmutableArray<string> Sources { get; init; } = [];
/// <summary>
/// Offline mode - skip remote fetches, use cached data only.
/// </summary>
public bool OfflineMode { get; init; }
}
/// <summary>
/// Options for enriching an existing draft.
/// </summary>
public sealed record EnrichmentOptions
{
/// <summary>
/// Analyze commit diffs to extract function changes.
/// </summary>
public bool AnalyzeCommitDiffs { get; init; } = true;
/// <summary>
/// Extract witness hints from test cases.
/// </summary>
public bool ExtractTestCases { get; init; } = true;
/// <summary>
/// Suggest edge patterns from control flow changes.
/// </summary>
public bool SuggestEdgePatterns { get; init; } = true;
/// <summary>
/// Extract constants from vulnerable code.
/// </summary>
public bool ExtractConstants { get; init; } = true;
}
/// <summary>
/// Known source types for extraction.
/// </summary>
public static class ExtractionSourceTypes
{
public const string Nvd = "nvd";
public const string Osv = "osv";
public const string Ghsa = "ghsa";
public const string UpstreamCommit = "upstream_commit";
public const string Ai = "ai";
public const string Manual = "manual";
}

View File

@@ -0,0 +1,224 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring;
/// <summary>
/// Service for managing the golden set review workflow.
/// </summary>
public interface IGoldenSetReviewService
{
/// <summary>
/// Submits a golden set for review.
/// </summary>
/// <param name="goldenSetId">The golden set ID.</param>
/// <param name="submitterId">The submitter's ID.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Submission result.</returns>
Task<ReviewSubmissionResult> SubmitForReviewAsync(
string goldenSetId,
string submitterId,
CancellationToken ct = default);
/// <summary>
/// Approves a golden set.
/// </summary>
/// <param name="goldenSetId">The golden set ID.</param>
/// <param name="reviewerId">The reviewer's ID.</param>
/// <param name="comments">Optional approval comments.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Decision result.</returns>
Task<ReviewDecisionResult> ApproveAsync(
string goldenSetId,
string reviewerId,
string? comments = null,
CancellationToken ct = default);
/// <summary>
/// Requests changes to a golden set.
/// </summary>
/// <param name="goldenSetId">The golden set ID.</param>
/// <param name="reviewerId">The reviewer's ID.</param>
/// <param name="comments">Required comments explaining changes needed.</param>
/// <param name="changes">Specific change requests.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Decision result.</returns>
Task<ReviewDecisionResult> RequestChangesAsync(
string goldenSetId,
string reviewerId,
string comments,
ImmutableArray<ChangeRequest> changes,
CancellationToken ct = default);
/// <summary>
/// Gets the review history for a golden set.
/// </summary>
/// <param name="goldenSetId">The golden set ID.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Array of history entries.</returns>
Task<ImmutableArray<ReviewHistoryEntry>> GetHistoryAsync(
string goldenSetId,
CancellationToken ct = default);
/// <summary>
/// Checks if a transition is valid from the current state.
/// </summary>
/// <param name="currentStatus">The current status.</param>
/// <param name="targetStatus">The target status.</param>
/// <returns>True if transition is valid; otherwise, false.</returns>
bool IsValidTransition(GoldenSetStatus currentStatus, GoldenSetStatus targetStatus);
}
/// <summary>
/// Result of submitting a golden set for review.
/// </summary>
public sealed record ReviewSubmissionResult
{
/// <summary>
/// Whether submission succeeded.
/// </summary>
public required bool Success { get; init; }
/// <summary>
/// The new status after submission.
/// </summary>
public GoldenSetStatus? NewStatus { get; init; }
/// <summary>
/// Error message if submission failed.
/// </summary>
public string? Error { get; init; }
/// <summary>
/// Validation errors that prevented submission.
/// </summary>
public ImmutableArray<string> ValidationErrors { get; init; } = [];
/// <summary>
/// Creates a successful result.
/// </summary>
public static ReviewSubmissionResult Successful(GoldenSetStatus newStatus)
=> new() { Success = true, NewStatus = newStatus };
/// <summary>
/// Creates a failed result.
/// </summary>
public static ReviewSubmissionResult Failed(string error, ImmutableArray<string> validationErrors = default)
=> new() { Success = false, Error = error, ValidationErrors = validationErrors };
}
/// <summary>
/// Result of a review decision (approve/request changes).
/// </summary>
public sealed record ReviewDecisionResult
{
/// <summary>
/// Whether the decision was applied.
/// </summary>
public required bool Success { get; init; }
/// <summary>
/// The new status after the decision.
/// </summary>
public GoldenSetStatus? NewStatus { get; init; }
/// <summary>
/// Error message if decision failed.
/// </summary>
public string? Error { get; init; }
/// <summary>
/// Creates a successful result.
/// </summary>
public static ReviewDecisionResult Successful(GoldenSetStatus newStatus)
=> new() { Success = true, NewStatus = newStatus };
/// <summary>
/// Creates a failed result.
/// </summary>
public static ReviewDecisionResult Failed(string error)
=> new() { Success = false, Error = error };
}
/// <summary>
/// A specific change request from a reviewer.
/// </summary>
public sealed record ChangeRequest
{
/// <summary>
/// Field path that needs changes.
/// </summary>
public required string Field { get; init; }
/// <summary>
/// Current value of the field.
/// </summary>
public string? CurrentValue { get; init; }
/// <summary>
/// Suggested new value.
/// </summary>
public string? SuggestedValue { get; init; }
/// <summary>
/// Comment explaining the requested change.
/// </summary>
public required string Comment { get; init; }
}
/// <summary>
/// An entry in the review history.
/// </summary>
public sealed record ReviewHistoryEntry
{
/// <summary>
/// Action taken (submitted, approved, changes_requested, etc.).
/// </summary>
public required string Action { get; init; }
/// <summary>
/// Who performed the action.
/// </summary>
public required string ActorId { get; init; }
/// <summary>
/// When the action occurred.
/// </summary>
public required DateTimeOffset Timestamp { get; init; }
/// <summary>
/// Status before the action.
/// </summary>
public GoldenSetStatus? OldStatus { get; init; }
/// <summary>
/// Status after the action.
/// </summary>
public GoldenSetStatus? NewStatus { get; init; }
/// <summary>
/// Comments associated with the action.
/// </summary>
public string? Comments { get; init; }
/// <summary>
/// Change requests (if action was changes_requested).
/// </summary>
public ImmutableArray<ChangeRequest> ChangeRequests { get; init; } = [];
}
/// <summary>
/// Known review actions.
/// </summary>
public static class ReviewActions
{
public const string Created = "created";
public const string Updated = "updated";
public const string Submitted = "submitted";
public const string Approved = "approved";
public const string ChangesRequested = "changes_requested";
public const string Published = "published";
public const string Deprecated = "deprecated";
public const string Archived = "archived";
}

View File

@@ -0,0 +1,519 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Immutable;
using System.Globalization;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring;
/// <summary>
/// Analyzes upstream fix commits to extract vulnerability information.
/// </summary>
public interface IUpstreamCommitAnalyzer
{
/// <summary>
/// Fetches and analyzes fix commits from upstream repositories.
/// </summary>
/// <param name="commitUrls">URLs to fix commits.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Analysis result with extracted information.</returns>
Task<CommitAnalysisResult> AnalyzeAsync(
ImmutableArray<string> commitUrls,
CancellationToken ct = default);
/// <summary>
/// Parses a commit URL to extract repository and commit information.
/// </summary>
/// <param name="url">The commit URL.</param>
/// <returns>Parsed commit info or null if not recognized.</returns>
ParsedCommitUrl? ParseCommitUrl(string url);
}
/// <summary>
/// Result of analyzing upstream fix commits.
/// </summary>
public sealed record CommitAnalysisResult
{
/// <summary>
/// Analyzed commits.
/// </summary>
public ImmutableArray<AnalyzedCommit> Commits { get; init; } = [];
/// <summary>
/// Functions modified across all commits.
/// </summary>
public ImmutableArray<string> ModifiedFunctions { get; init; } = [];
/// <summary>
/// Constants added in the fixes.
/// </summary>
public ImmutableArray<string> AddedConstants { get; init; } = [];
/// <summary>
/// Conditions added (if statements, bounds checks).
/// </summary>
public ImmutableArray<string> AddedConditions { get; init; } = [];
/// <summary>
/// Warnings encountered during analysis.
/// </summary>
public ImmutableArray<string> Warnings { get; init; } = [];
/// <summary>
/// Creates an empty result.
/// </summary>
public static CommitAnalysisResult Empty => new();
}
/// <summary>
/// Information about an analyzed commit.
/// </summary>
public sealed record AnalyzedCommit
{
/// <summary>
/// URL to the commit.
/// </summary>
public required string Url { get; init; }
/// <summary>
/// Commit hash.
/// </summary>
public required string Hash { get; init; }
/// <summary>
/// Commit message.
/// </summary>
public string? Message { get; init; }
/// <summary>
/// Files changed in the commit.
/// </summary>
public ImmutableArray<FileDiff> Files { get; init; } = [];
/// <summary>
/// Whether this commit was successfully fetched.
/// </summary>
public bool WasFetched { get; init; }
}
/// <summary>
/// Diff information for a single file.
/// </summary>
public sealed record FileDiff
{
/// <summary>
/// File path.
/// </summary>
public required string Path { get; init; }
/// <summary>
/// Functions modified in this file.
/// </summary>
public ImmutableArray<string> FunctionsModified { get; init; } = [];
/// <summary>
/// Lines added.
/// </summary>
public ImmutableArray<string> LinesAdded { get; init; } = [];
/// <summary>
/// Lines removed.
/// </summary>
public ImmutableArray<string> LinesRemoved { get; init; } = [];
}
/// <summary>
/// Parsed commit URL information.
/// </summary>
public sealed record ParsedCommitUrl
{
/// <summary>
/// Host type (github, gitlab, etc.).
/// </summary>
public required string Host { get; init; }
/// <summary>
/// Repository owner.
/// </summary>
public required string Owner { get; init; }
/// <summary>
/// Repository name.
/// </summary>
public required string Repo { get; init; }
/// <summary>
/// Commit hash.
/// </summary>
public required string Hash { get; init; }
/// <summary>
/// Original URL.
/// </summary>
public required string OriginalUrl { get; init; }
/// <summary>
/// Gets the API URL for fetching commit details.
/// </summary>
public string GetApiUrl() => Host switch
{
"github" => string.Format(
CultureInfo.InvariantCulture,
"https://api.github.com/repos/{0}/{1}/commits/{2}",
Owner, Repo, Hash),
"gitlab" => string.Format(
CultureInfo.InvariantCulture,
"https://gitlab.com/api/v4/projects/{0}%2F{1}/repository/commits/{2}",
Owner, Repo, Hash),
_ => OriginalUrl
};
/// <summary>
/// Gets the diff URL for fetching patch content.
/// </summary>
public string GetDiffUrl() => Host switch
{
"github" => string.Format(
CultureInfo.InvariantCulture,
"https://github.com/{0}/{1}/commit/{2}.diff",
Owner, Repo, Hash),
"gitlab" => string.Format(
CultureInfo.InvariantCulture,
"https://gitlab.com/{0}/{1}/-/commit/{2}.diff",
Owner, Repo, Hash),
_ => OriginalUrl
};
}
/// <summary>
/// Default implementation of <see cref="IUpstreamCommitAnalyzer"/>.
/// </summary>
public sealed partial class UpstreamCommitAnalyzer : IUpstreamCommitAnalyzer
{
private readonly IHttpClientFactory _httpClientFactory;
private readonly TimeProvider _timeProvider;
private readonly ILogger<UpstreamCommitAnalyzer> _logger;
public UpstreamCommitAnalyzer(
IHttpClientFactory httpClientFactory,
TimeProvider timeProvider,
ILogger<UpstreamCommitAnalyzer> logger)
{
_httpClientFactory = httpClientFactory;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public async Task<CommitAnalysisResult> AnalyzeAsync(
ImmutableArray<string> commitUrls,
CancellationToken ct = default)
{
if (commitUrls.IsEmpty)
{
return CommitAnalysisResult.Empty;
}
var commits = new List<AnalyzedCommit>();
var warnings = new List<string>();
var allModifiedFunctions = new HashSet<string>(StringComparer.Ordinal);
var allAddedConstants = new HashSet<string>(StringComparer.Ordinal);
var allAddedConditions = new HashSet<string>(StringComparer.Ordinal);
foreach (var url in commitUrls)
{
var parsed = ParseCommitUrl(url);
if (parsed is null)
{
warnings.Add(string.Format(
CultureInfo.InvariantCulture,
"Could not parse commit URL: {0}",
url));
continue;
}
try
{
var commit = await FetchAndAnalyzeCommitAsync(parsed, ct);
commits.Add(commit);
foreach (var file in commit.Files)
{
foreach (var func in file.FunctionsModified)
{
allModifiedFunctions.Add(func);
}
foreach (var line in file.LinesAdded)
{
ExtractConstantsFromLine(line, allAddedConstants);
ExtractConditionsFromLine(line, allAddedConditions);
}
}
}
catch (HttpRequestException ex)
{
_logger.LogWarning(ex, "Failed to fetch commit {Url}", url);
warnings.Add(string.Format(
CultureInfo.InvariantCulture,
"Failed to fetch commit {0}: {1}",
url, ex.Message));
commits.Add(new AnalyzedCommit
{
Url = url,
Hash = parsed.Hash,
WasFetched = false
});
}
}
return new CommitAnalysisResult
{
Commits = [.. commits],
ModifiedFunctions = [.. allModifiedFunctions.OrderBy(f => f, StringComparer.Ordinal)],
AddedConstants = [.. allAddedConstants.OrderBy(c => c, StringComparer.Ordinal)],
AddedConditions = [.. allAddedConditions.OrderBy(c => c, StringComparer.Ordinal)],
Warnings = [.. warnings]
};
}
/// <inheritdoc />
public ParsedCommitUrl? ParseCommitUrl(string url)
{
if (string.IsNullOrWhiteSpace(url))
return null;
// GitHub: https://github.com/owner/repo/commit/hash
var githubMatch = GitHubCommitPattern().Match(url);
if (githubMatch.Success)
{
return new ParsedCommitUrl
{
Host = "github",
Owner = githubMatch.Groups["owner"].Value,
Repo = githubMatch.Groups["repo"].Value,
Hash = githubMatch.Groups["hash"].Value,
OriginalUrl = url
};
}
// GitLab: https://gitlab.com/owner/repo/-/commit/hash
var gitlabMatch = GitLabCommitPattern().Match(url);
if (gitlabMatch.Success)
{
return new ParsedCommitUrl
{
Host = "gitlab",
Owner = gitlabMatch.Groups["owner"].Value,
Repo = gitlabMatch.Groups["repo"].Value,
Hash = gitlabMatch.Groups["hash"].Value,
OriginalUrl = url
};
}
// Bitbucket: https://bitbucket.org/owner/repo/commits/hash
var bitbucketMatch = BitbucketCommitPattern().Match(url);
if (bitbucketMatch.Success)
{
return new ParsedCommitUrl
{
Host = "bitbucket",
Owner = bitbucketMatch.Groups["owner"].Value,
Repo = bitbucketMatch.Groups["repo"].Value,
Hash = bitbucketMatch.Groups["hash"].Value,
OriginalUrl = url
};
}
return null;
}
private async Task<AnalyzedCommit> FetchAndAnalyzeCommitAsync(
ParsedCommitUrl parsed,
CancellationToken ct)
{
var client = _httpClientFactory.CreateClient("upstream-commits");
// Fetch the diff
var diffUrl = parsed.GetDiffUrl();
_logger.LogDebug("Fetching diff from {Url}", diffUrl);
using var request = new HttpRequestMessage(HttpMethod.Get, diffUrl);
request.Headers.Add("Accept", "text/plain");
request.Headers.Add("User-Agent", "StellaOps-GoldenSet/1.0");
using var response = await client.SendAsync(request, ct);
response.EnsureSuccessStatusCode();
var diffContent = await response.Content.ReadAsStringAsync(ct);
var files = ParseDiff(diffContent);
return new AnalyzedCommit
{
Url = parsed.OriginalUrl,
Hash = parsed.Hash,
Files = files,
WasFetched = true
};
}
private static ImmutableArray<FileDiff> ParseDiff(string diffContent)
{
var files = new List<FileDiff>();
var currentFile = (FileDiff?)null;
var currentAddedLines = new List<string>();
var currentRemovedLines = new List<string>();
var currentFunctions = new HashSet<string>(StringComparer.Ordinal);
foreach (var line in diffContent.Split('\n'))
{
// New file header: diff --git a/path b/path
if (line.StartsWith("diff --git ", StringComparison.Ordinal))
{
// Save previous file
if (currentFile is not null)
{
files.Add(currentFile with
{
LinesAdded = [.. currentAddedLines],
LinesRemoved = [.. currentRemovedLines],
FunctionsModified = [.. currentFunctions]
});
}
// Parse new file path
var pathMatch = DiffFilePathPattern().Match(line);
if (pathMatch.Success)
{
currentFile = new FileDiff { Path = pathMatch.Groups["path"].Value };
currentAddedLines.Clear();
currentRemovedLines.Clear();
currentFunctions.Clear();
}
}
// Hunk header: @@ -start,count +start,count @@ function_context
else if (line.StartsWith("@@ ", StringComparison.Ordinal))
{
var hunkMatch = HunkHeaderPattern().Match(line);
if (hunkMatch.Success && hunkMatch.Groups["func"].Success)
{
var funcName = hunkMatch.Groups["func"].Value.Trim();
if (!string.IsNullOrEmpty(funcName))
{
// Extract function name from context
var funcNameMatch = FunctionNamePattern().Match(funcName);
if (funcNameMatch.Success)
{
currentFunctions.Add(funcNameMatch.Groups["name"].Value);
}
}
}
}
// Added line
else if (line.StartsWith('+') && !line.StartsWith("+++", StringComparison.Ordinal))
{
currentAddedLines.Add(line.Substring(1));
}
// Removed line
else if (line.StartsWith('-') && !line.StartsWith("---", StringComparison.Ordinal))
{
currentRemovedLines.Add(line.Substring(1));
}
}
// Save last file
if (currentFile is not null)
{
files.Add(currentFile with
{
LinesAdded = [.. currentAddedLines],
LinesRemoved = [.. currentRemovedLines],
FunctionsModified = [.. currentFunctions]
});
}
return [.. files];
}
private static void ExtractConstantsFromLine(string line, HashSet<string> constants)
{
// Hex constants: 0x1234, 0XABCD
foreach (Match match in HexConstantPattern().Matches(line))
{
constants.Add(match.Value);
}
// Numeric constants in comparisons: > 1024, < 4096, == 256
foreach (Match match in NumericComparisonPattern().Matches(line))
{
constants.Add(match.Groups["num"].Value);
}
// Size constants: sizeof(type)
foreach (Match match in SizeofPattern().Matches(line))
{
constants.Add(match.Value);
}
}
private static void ExtractConditionsFromLine(string line, HashSet<string> conditions)
{
// Simple bounds checks
if (BoundsCheckPattern().IsMatch(line))
{
conditions.Add("bounds_check");
}
// NULL checks
if (NullCheckPattern().IsMatch(line))
{
conditions.Add("null_check");
}
// Length/size validation
if (LengthCheckPattern().IsMatch(line))
{
conditions.Add("length_check");
}
}
// Regex patterns
[GeneratedRegex(@"github\.com/(?<owner>[^/]+)/(?<repo>[^/]+)/commit/(?<hash>[a-fA-F0-9]{7,40})", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex GitHubCommitPattern();
[GeneratedRegex(@"gitlab\.com/(?<owner>[^/]+)/(?<repo>[^/]+)/-/commit/(?<hash>[a-fA-F0-9]{7,40})", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex GitLabCommitPattern();
[GeneratedRegex(@"bitbucket\.org/(?<owner>[^/]+)/(?<repo>[^/]+)/commits/(?<hash>[a-fA-F0-9]{7,40})", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex BitbucketCommitPattern();
[GeneratedRegex(@"diff --git a/(?<path>.+?) b/", RegexOptions.Compiled)]
private static partial Regex DiffFilePathPattern();
[GeneratedRegex(@"^@@ -\d+(?:,\d+)? \+\d+(?:,\d+)? @@\s*(?<func>.*)$", RegexOptions.Compiled)]
private static partial Regex HunkHeaderPattern();
[GeneratedRegex(@"(?:^|\s)(?<name>\w+)\s*\(", RegexOptions.Compiled)]
private static partial Regex FunctionNamePattern();
[GeneratedRegex(@"0[xX][0-9a-fA-F]+", RegexOptions.Compiled)]
private static partial Regex HexConstantPattern();
[GeneratedRegex(@"[<>=!]=?\s*(?<num>\d{2,})", RegexOptions.Compiled)]
private static partial Regex NumericComparisonPattern();
[GeneratedRegex(@"sizeof\s*\([^)]+\)", RegexOptions.Compiled)]
private static partial Regex SizeofPattern();
[GeneratedRegex(@"\b(len|size|count|length)\s*[<>=]", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex BoundsCheckPattern();
[GeneratedRegex(@"[!=]=\s*NULL\b|\bNULL\s*[!=]=|[!=]=\s*nullptr\b|\bnullptr\s*[!=]=", RegexOptions.Compiled)]
private static partial Regex NullCheckPattern();
[GeneratedRegex(@"\b(strlen|wcslen|sizeof)\s*\(", RegexOptions.Compiled)]
private static partial Regex LengthCheckPattern();
}