Complete batch 012 (golden set diff) and 013 (advisory chat), fix build errors

Sprints completed:
- SPRINT_20260110_012_* (golden set diff layer - 10 sprints)
- SPRINT_20260110_013_* (advisory chat - 4 sprints)

Build fixes applied:
- Fix namespace conflicts with Microsoft.Extensions.Options.Options.Create
- Fix VexDecisionReachabilityIntegrationTests API drift (major rewrite)
- Fix VexSchemaValidationTests FluentAssertions method name
- Fix FixChainGateIntegrationTests ambiguous type references
- Fix AdvisoryAI test files required properties and namespace aliases
- Add stub types for CveMappingController (ICveSymbolMappingService)
- Fix VerdictBuilderService static context issue

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
master
2026-01-11 10:09:07 +02:00
parent a3b2f30a11
commit 7f7eb8b228
232 changed files with 58979 additions and 91 deletions

View File

@@ -0,0 +1,33 @@
# GoldenSet Library Charter
## Mission
Provide foundational data models, storage, and validation for Golden Set definitions - ground-truth facts about vulnerability code-level manifestation.
## Responsibilities
- **Domain Models**: GoldenSetDefinition, VulnerableTarget, BasicBlockEdge, WitnessInput, GoldenSetMetadata
- **Validation**: Schema validation, CVE existence check, edge format validation, sink registry lookup
- **Storage**: PostgreSQL persistence with content-addressed retrieval
- **Serialization**: YAML round-trip serialization with snake_case convention
- **Sink Registry**: Lookup service for known sinks mapped to CWE categories
## Key Principles
1. **Immutability**: All models are immutable records with ImmutableArray collections
2. **Content-Addressing**: All golden sets have SHA256-based content digests for deduplication
3. **Determinism**: Serialization and hashing produce deterministic outputs
4. **Air-Gap Ready**: Validation supports offline mode without external lookups
5. **Human-Readable**: YAML as primary format for git-friendliness
## Dependencies
- `BinaryIndex.Contracts` - Shared contracts and DTOs
- `Npgsql` - PostgreSQL driver
- `YamlDotNet` - YAML serialization
- `Microsoft.Extensions.*` - DI, Options, Logging, Caching
## Required Reading
- `docs/modules/binary-index/golden-set-schema.md`
- `docs/implplan/SPRINT_20260110_012_001_BINDEX_golden_set_foundation.md`
## Test Strategy
- Unit tests in `StellaOps.BinaryIndex.GoldenSet.Tests`
- Integration tests with Testcontainers PostgreSQL
- Property-based tests for serialization round-trip

View File

@@ -0,0 +1,174 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Frozen;
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring.Extractors;
/// <summary>
/// Maps CWE IDs to likely sink functions and categories.
/// </summary>
public static class CweToSinkMapper
{
private static readonly FrozenDictionary<string, SinkMapping> Mappings = BuildMappings();
/// <summary>
/// Gets sink functions associated with a CWE ID.
/// </summary>
/// <param name="cweId">The CWE ID (e.g., "CWE-120").</param>
/// <returns>Array of sink function names.</returns>
public static ImmutableArray<string> GetSinksForCwe(string cweId)
{
if (string.IsNullOrWhiteSpace(cweId))
return [];
// Normalize CWE ID format
var normalizedId = NormalizeCweId(cweId);
if (Mappings.TryGetValue(normalizedId, out var mapping))
return mapping.Sinks;
return [];
}
/// <summary>
/// Gets the sink category for a CWE ID.
/// </summary>
/// <param name="cweId">The CWE ID.</param>
/// <returns>Sink category or null if unknown.</returns>
public static string? GetCategoryForCwe(string cweId)
{
if (string.IsNullOrWhiteSpace(cweId))
return null;
var normalizedId = NormalizeCweId(cweId);
if (Mappings.TryGetValue(normalizedId, out var mapping))
return mapping.Category;
return null;
}
/// <summary>
/// Gets all sink functions for multiple CWE IDs.
/// </summary>
/// <param name="cweIds">The CWE IDs to look up.</param>
/// <returns>Distinct array of sink function names.</returns>
public static ImmutableArray<string> GetSinksForCwes(IEnumerable<string> cweIds)
{
var sinks = new HashSet<string>(StringComparer.Ordinal);
foreach (var cweId in cweIds)
{
foreach (var sink in GetSinksForCwe(cweId))
{
sinks.Add(sink);
}
}
return [.. sinks.OrderBy(s => s, StringComparer.Ordinal)];
}
/// <summary>
/// Gets all categories for multiple CWE IDs.
/// </summary>
/// <param name="cweIds">The CWE IDs to look up.</param>
/// <returns>Distinct array of categories.</returns>
public static ImmutableArray<string> GetCategoriesForCwes(IEnumerable<string> cweIds)
{
var categories = new HashSet<string>(StringComparer.Ordinal);
foreach (var cweId in cweIds)
{
var category = GetCategoryForCwe(cweId);
if (category is not null)
{
categories.Add(category);
}
}
return [.. categories.OrderBy(c => c, StringComparer.Ordinal)];
}
private static string NormalizeCweId(string cweId)
{
// Handle formats: "CWE-120", "120", "cwe-120"
var id = cweId.Trim();
if (id.StartsWith("CWE-", StringComparison.OrdinalIgnoreCase))
{
return "CWE-" + id.Substring(4);
}
if (int.TryParse(id, out var numericId))
{
return "CWE-" + numericId.ToString(System.Globalization.CultureInfo.InvariantCulture);
}
return id.ToUpperInvariant();
}
private static FrozenDictionary<string, SinkMapping> BuildMappings()
{
var mappings = new Dictionary<string, SinkMapping>(StringComparer.Ordinal)
{
// Buffer overflows
["CWE-120"] = new(SinkCategory.Memory, ["memcpy", "strcpy", "strcat", "sprintf", "gets", "scanf", "strncpy", "strncat"]),
["CWE-121"] = new(SinkCategory.Memory, ["memcpy", "strcpy", "sprintf", "alloca"]), // Stack-based overflow
["CWE-122"] = new(SinkCategory.Memory, ["memcpy", "realloc", "malloc", "calloc"]), // Heap-based overflow
["CWE-787"] = new(SinkCategory.Memory, ["memcpy", "memmove", "memset", "memchr"]), // Out-of-bounds write
["CWE-788"] = new(SinkCategory.Memory, ["memcpy", "memmove"]), // Access of memory beyond end of buffer
// Use after free / double free
["CWE-416"] = new(SinkCategory.Memory, ["free", "delete", "realloc"]), // Use after free
["CWE-415"] = new(SinkCategory.Memory, ["free", "delete"]), // Double free
["CWE-401"] = new(SinkCategory.Memory, ["malloc", "calloc", "realloc", "new"]), // Memory leak
// Command injection
["CWE-78"] = new(SinkCategory.CommandInjection, ["system", "exec", "execl", "execle", "execlp", "execv", "execve", "execvp", "popen", "ShellExecute", "CreateProcess"]),
["CWE-77"] = new(SinkCategory.CommandInjection, ["system", "exec", "popen", "eval"]),
// Code injection
["CWE-94"] = new(SinkCategory.CodeInjection, ["eval", "exec", "compile", "dlopen", "LoadLibrary", "GetProcAddress"]),
["CWE-95"] = new(SinkCategory.CodeInjection, ["eval"]), // Eval injection
// SQL injection
["CWE-89"] = new(SinkCategory.SqlInjection, ["sqlite3_exec", "mysql_query", "mysql_real_query", "PQexec", "PQexecParams", "execute", "executeQuery"]),
// Path traversal
["CWE-22"] = new(SinkCategory.PathTraversal, ["fopen", "open", "access", "stat", "lstat", "readlink", "realpath", "chdir", "mkdir", "rmdir", "unlink"]),
["CWE-23"] = new(SinkCategory.PathTraversal, ["fopen", "open"]), // Relative path traversal
["CWE-36"] = new(SinkCategory.PathTraversal, ["fopen", "open", "stat"]), // Absolute path traversal
// Integer issues
["CWE-190"] = new(SinkCategory.Memory, ["malloc", "calloc", "realloc", "memcpy"]), // Integer overflow
["CWE-191"] = new(SinkCategory.Memory, ["malloc", "memcpy"]), // Integer underflow
["CWE-681"] = new(SinkCategory.Memory, ["malloc", "realloc"]), // Incorrect conversion
// Format string
["CWE-134"] = new(SinkCategory.Memory, ["printf", "fprintf", "sprintf", "snprintf", "vprintf", "vsprintf", "syslog"]),
// Network
["CWE-319"] = new(SinkCategory.Network, ["send", "sendto", "write", "connect"]), // Cleartext transmission
["CWE-295"] = new(SinkCategory.Network, ["SSL_connect", "SSL_accept", "SSL_read", "SSL_write"]), // Improper cert validation
// Crypto
["CWE-326"] = new(SinkCategory.Crypto, ["EVP_EncryptInit", "EVP_DecryptInit", "DES_set_key"]), // Inadequate encryption strength
["CWE-327"] = new(SinkCategory.Crypto, ["MD5", "SHA1", "DES", "RC4", "rand"]), // Broken or risky crypto algorithm
["CWE-328"] = new(SinkCategory.Crypto, ["MD5", "SHA1"]), // Reversible one-way hash
// NULL pointer
["CWE-476"] = new(SinkCategory.Memory, ["memcpy", "strcpy", "strcmp", "strlen"]), // NULL pointer dereference
// Race conditions
["CWE-362"] = new(SinkCategory.Memory, ["open", "fopen", "access", "stat"]), // Race condition
// Information exposure
["CWE-200"] = new(SinkCategory.Network, ["printf", "fprintf", "send", "write", "syslog"]), // Exposure of sensitive info
};
return mappings.ToFrozenDictionary();
}
private sealed record SinkMapping(string Category, ImmutableArray<string> Sinks);
}

View File

@@ -0,0 +1,181 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Immutable;
using System.Globalization;
using System.Text.RegularExpressions;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring.Extractors;
/// <summary>
/// Extracts function hints from vulnerability descriptions.
/// </summary>
public static partial class FunctionHintExtractor
{
/// <summary>
/// Extracts function hints from an advisory description.
/// </summary>
/// <param name="description">The advisory description text.</param>
/// <param name="source">Source identifier for the hints.</param>
/// <returns>Array of function hints with confidence scores.</returns>
public static ImmutableArray<FunctionHint> ExtractFromDescription(string description, string source)
{
if (string.IsNullOrWhiteSpace(description))
return [];
var hints = new Dictionary<string, decimal>(StringComparer.OrdinalIgnoreCase);
// High confidence patterns
ExtractWithPattern(description, InTheFunctionPattern(), hints, 0.9m);
ExtractWithPattern(description, FunctionParenPattern(), hints, 0.85m);
ExtractWithPattern(description, VulnerabilityInPattern(), hints, 0.8m);
// Medium confidence patterns
ExtractWithPattern(description, AllowsViaPattern(), hints, 0.7m);
ExtractWithPattern(description, ViaThePattern(), hints, 0.65m);
ExtractWithPattern(description, CallingPattern(), hints, 0.6m);
// Lower confidence - simple function name mentions
ExtractWithPattern(description, PossibleFunctionPattern(), hints, 0.4m);
// Filter out common false positives
var filtered = hints
.Where(kv => !IsFalsePositive(kv.Key))
.Where(kv => IsValidFunctionName(kv.Key))
.Select(kv => new FunctionHint
{
Name = kv.Key,
Confidence = kv.Value,
Source = source
})
.OrderByDescending(h => h.Confidence)
.ThenBy(h => h.Name, StringComparer.Ordinal)
.ToImmutableArray();
return filtered;
}
/// <summary>
/// Extracts function hints from a commit message.
/// </summary>
/// <param name="message">The commit message.</param>
/// <param name="source">Source identifier.</param>
/// <returns>Array of function hints.</returns>
public static ImmutableArray<FunctionHint> ExtractFromCommitMessage(string message, string source)
{
if (string.IsNullOrWhiteSpace(message))
return [];
var hints = new Dictionary<string, decimal>(StringComparer.OrdinalIgnoreCase);
// Fix patterns in commit messages
ExtractWithPattern(message, FixInPattern(), hints, 0.85m);
ExtractWithPattern(message, PatchPattern(), hints, 0.8m);
ExtractWithPattern(message, FunctionParenPattern(), hints, 0.75m);
var filtered = hints
.Where(kv => !IsFalsePositive(kv.Key))
.Where(kv => IsValidFunctionName(kv.Key))
.Select(kv => new FunctionHint
{
Name = kv.Key,
Confidence = kv.Value,
Source = source
})
.OrderByDescending(h => h.Confidence)
.ThenBy(h => h.Name, StringComparer.Ordinal)
.ToImmutableArray();
return filtered;
}
private static void ExtractWithPattern(
string text,
Regex pattern,
Dictionary<string, decimal> hints,
decimal confidence)
{
foreach (Match match in pattern.Matches(text))
{
var functionName = match.Groups["func"].Value.Trim();
if (!string.IsNullOrEmpty(functionName))
{
// Keep the highest confidence for each function
if (!hints.TryGetValue(functionName, out var existing) || existing < confidence)
{
hints[functionName] = confidence;
}
}
}
}
private static bool IsFalsePositive(string name)
{
// Common words that aren't function names
var falsePositives = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
"a", "an", "the", "is", "it", "in", "on", "to", "of",
"remote", "local", "attacker", "user", "server", "client",
"buffer", "overflow", "memory", "heap", "stack", "null",
"pointer", "integer", "string", "array", "data", "input",
"output", "file", "path", "url", "request", "response",
"allows", "could", "may", "might", "can", "will", "would",
"execute", "code", "arbitrary", "denial", "service", "dos",
"via", "through", "using", "with", "from", "into",
"CVE", "CWE", "CVSS", "NVD", "GHSA", "OSV"
};
return falsePositives.Contains(name);
}
private static bool IsValidFunctionName(string name)
{
// Must be 2-64 characters
if (name.Length < 2 || name.Length > 64)
return false;
// Must start with letter or underscore
if (!char.IsLetter(name[0]) && name[0] != '_')
return false;
// Must contain only valid identifier characters
return name.All(c => char.IsLetterOrDigit(c) || c == '_');
}
// Compiled regex patterns for performance
/// <summary>Pattern: "in the X function"</summary>
[GeneratedRegex(@"in\s+the\s+(?<func>\w+)\s+function", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex InTheFunctionPattern();
/// <summary>Pattern: "X() function" or "X()"</summary>
[GeneratedRegex(@"(?<func>\w+)\s*\(\s*\)", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex FunctionParenPattern();
/// <summary>Pattern: "vulnerability in X"</summary>
[GeneratedRegex(@"vulnerability\s+in\s+(?<func>\w+)", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex VulnerabilityInPattern();
/// <summary>Pattern: "allows X via"</summary>
[GeneratedRegex(@"allows\s+\w+\s+via\s+(?<func>\w+)", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex AllowsViaPattern();
/// <summary>Pattern: "via the X"</summary>
[GeneratedRegex(@"via\s+the\s+(?<func>\w+)", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex ViaThePattern();
/// <summary>Pattern: "calling X"</summary>
[GeneratedRegex(@"calling\s+(?<func>\w+)", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex CallingPattern();
/// <summary>Pattern: possible function name (snake_case or camelCase)</summary>
[GeneratedRegex(@"\b(?<func>[a-z][a-z0-9]*(?:_[a-z0-9]+)+)\b", RegexOptions.Compiled)]
private static partial Regex PossibleFunctionPattern();
/// <summary>Pattern: "fix in X" or "fixed X"</summary>
[GeneratedRegex(@"fix(?:ed)?\s+(?:in\s+)?(?<func>\w+)", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex FixInPattern();
/// <summary>Pattern: "patch X"</summary>
[GeneratedRegex(@"patch\s+(?<func>\w+)", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex PatchPattern();
}

View File

@@ -0,0 +1,197 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring.Extractors;
/// <summary>
/// Interface for source-specific golden set extractors (NVD, OSV, GHSA).
/// </summary>
public interface IGoldenSetSourceExtractor
{
/// <summary>
/// The source type this extractor handles.
/// </summary>
string SourceType { get; }
/// <summary>
/// Extracts golden set data from this source.
/// </summary>
/// <param name="vulnerabilityId">The vulnerability ID.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Source extraction result.</returns>
Task<SourceExtractionResult> ExtractAsync(
string vulnerabilityId,
CancellationToken ct);
/// <summary>
/// Checks if this extractor supports the given vulnerability ID format.
/// </summary>
/// <param name="vulnerabilityId">The vulnerability ID to check.</param>
/// <returns>True if supported; otherwise, false.</returns>
bool Supports(string vulnerabilityId);
}
/// <summary>
/// Result from a single source extractor.
/// </summary>
public sealed record SourceExtractionResult
{
/// <summary>
/// Whether extraction found data.
/// </summary>
public required bool Found { get; init; }
/// <summary>
/// Source information.
/// </summary>
public required ExtractionSource Source { get; init; }
/// <summary>
/// Extracted component name.
/// </summary>
public string? Component { get; init; }
/// <summary>
/// Version range(s) affected.
/// </summary>
public ImmutableArray<VersionRange> AffectedVersions { get; init; } = [];
/// <summary>
/// Function hints extracted from the description.
/// </summary>
public ImmutableArray<FunctionHint> FunctionHints { get; init; } = [];
/// <summary>
/// Sink categories based on CWE mapping.
/// </summary>
public ImmutableArray<string> SinkCategories { get; init; } = [];
/// <summary>
/// Commit references to fix commits.
/// </summary>
public ImmutableArray<CommitReference> CommitReferences { get; init; } = [];
/// <summary>
/// CWE IDs associated with the vulnerability.
/// </summary>
public ImmutableArray<string> CweIds { get; init; } = [];
/// <summary>
/// Severity level (critical, high, medium, low).
/// </summary>
public string? Severity { get; init; }
/// <summary>
/// CVSS v3 score (if available).
/// </summary>
public decimal? CvssScore { get; init; }
/// <summary>
/// Advisory description text.
/// </summary>
public string? Description { get; init; }
/// <summary>
/// Related CVEs (if any).
/// </summary>
public ImmutableArray<string> RelatedCves { get; init; } = [];
/// <summary>
/// Warnings encountered during extraction.
/// </summary>
public ImmutableArray<string> Warnings { get; init; } = [];
/// <summary>
/// Creates a not-found result.
/// </summary>
public static SourceExtractionResult NotFound(string vulnerabilityId, string sourceType, TimeProvider timeProvider)
=> new()
{
Found = false,
Source = new ExtractionSource
{
Type = sourceType,
Reference = vulnerabilityId,
FetchedAt = timeProvider.GetUtcNow()
}
};
}
/// <summary>
/// A version range for affected versions.
/// </summary>
public sealed record VersionRange
{
/// <summary>
/// Minimum affected version (inclusive, null = unbounded).
/// </summary>
public string? MinVersion { get; init; }
/// <summary>
/// Maximum affected version (exclusive, null = unbounded).
/// </summary>
public string? MaxVersion { get; init; }
/// <summary>
/// Fixed version (if known).
/// </summary>
public string? FixedVersion { get; init; }
/// <summary>
/// Ecosystem (e.g., npm, pypi, golang, cargo).
/// </summary>
public string? Ecosystem { get; init; }
}
/// <summary>
/// A hint about a potentially vulnerable function.
/// </summary>
public sealed record FunctionHint
{
/// <summary>
/// Function name.
/// </summary>
public required string Name { get; init; }
/// <summary>
/// Confidence in this hint (0.0 - 1.0).
/// </summary>
public required decimal Confidence { get; init; }
/// <summary>
/// How this hint was extracted.
/// </summary>
public required string Source { get; init; }
/// <summary>
/// Optional source file path.
/// </summary>
public string? SourceFile { get; init; }
}
/// <summary>
/// A reference to a fix commit.
/// </summary>
public sealed record CommitReference
{
/// <summary>
/// URL to the commit.
/// </summary>
public required string Url { get; init; }
/// <summary>
/// Commit hash (if extractable).
/// </summary>
public string? Hash { get; init; }
/// <summary>
/// Repository host (github, gitlab, etc.).
/// </summary>
public string? Host { get; init; }
/// <summary>
/// Whether this is confirmed to be a fix commit.
/// </summary>
public bool IsConfirmedFix { get; init; }
}

View File

@@ -0,0 +1,149 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Immutable;
using System.Globalization;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring.Extractors;
/// <summary>
/// Extracts golden set data from NVD (National Vulnerability Database).
/// </summary>
public sealed partial class NvdGoldenSetExtractor : IGoldenSetSourceExtractor
{
private readonly TimeProvider _timeProvider;
private readonly ILogger<NvdGoldenSetExtractor> _logger;
public NvdGoldenSetExtractor(
TimeProvider timeProvider,
ILogger<NvdGoldenSetExtractor> logger)
{
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public string SourceType => ExtractionSourceTypes.Nvd;
/// <inheritdoc />
public bool Supports(string vulnerabilityId)
{
// NVD supports CVE IDs
return CveIdPattern().IsMatch(vulnerabilityId);
}
/// <inheritdoc />
public async Task<SourceExtractionResult> ExtractAsync(
string vulnerabilityId,
CancellationToken ct)
{
ArgumentException.ThrowIfNullOrWhiteSpace(vulnerabilityId);
_logger.LogDebug("Extracting from NVD for {VulnerabilityId}", vulnerabilityId);
// TODO: Implement actual NVD API call
// For now, return a stub result indicating the API needs implementation
await Task.CompletedTask;
var source = new ExtractionSource
{
Type = SourceType,
Reference = string.Format(
CultureInfo.InvariantCulture,
"https://nvd.nist.gov/vuln/detail/{0}",
vulnerabilityId),
FetchedAt = _timeProvider.GetUtcNow()
};
// Return not found for now - real implementation would fetch from NVD
return new SourceExtractionResult
{
Found = false,
Source = source,
Warnings = ["NVD API integration not yet implemented. Please use manual extraction."]
};
}
/// <summary>
/// Extracts function hints from a CVE description.
/// </summary>
internal static ImmutableArray<FunctionHint> ExtractFunctionHintsFromDescription(
string description,
string source)
{
return FunctionHintExtractor.ExtractFromDescription(description, source);
}
/// <summary>
/// Maps CWE IDs to sink functions.
/// </summary>
internal static ImmutableArray<string> MapCweToSinks(ImmutableArray<string> cweIds)
{
return CweToSinkMapper.GetSinksForCwes(cweIds);
}
/// <summary>
/// Extracts commit references from NVD references.
/// </summary>
internal static ImmutableArray<CommitReference> ExtractCommitReferences(IEnumerable<string> referenceUrls)
{
var commits = new List<CommitReference>();
foreach (var url in referenceUrls)
{
if (IsCommitUrl(url, out var host, out var hash))
{
commits.Add(new CommitReference
{
Url = url,
Hash = hash,
Host = host,
IsConfirmedFix = url.Contains("fix", StringComparison.OrdinalIgnoreCase) ||
url.Contains("patch", StringComparison.OrdinalIgnoreCase)
});
}
}
return [.. commits];
}
private static bool IsCommitUrl(string url, out string? host, out string? hash)
{
host = null;
hash = null;
if (string.IsNullOrWhiteSpace(url))
return false;
// GitHub commit URL pattern
var githubMatch = GitHubCommitPattern().Match(url);
if (githubMatch.Success)
{
host = "github";
hash = githubMatch.Groups["hash"].Value;
return true;
}
// GitLab commit URL pattern
var gitlabMatch = GitLabCommitPattern().Match(url);
if (gitlabMatch.Success)
{
host = "gitlab";
hash = gitlabMatch.Groups["hash"].Value;
return true;
}
return false;
}
[GeneratedRegex(@"^CVE-\d{4}-\d{4,}$", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex CveIdPattern();
[GeneratedRegex(@"github\.com/[^/]+/[^/]+/commit/(?<hash>[a-f0-9]{7,40})", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex GitHubCommitPattern();
[GeneratedRegex(@"gitlab\.com/[^/]+/[^/]+/-/commit/(?<hash>[a-f0-9]{7,40})", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex GitLabCommitPattern();
}

View File

@@ -0,0 +1,281 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Immutable;
using System.Globalization;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring;
/// <summary>
/// Default implementation of <see cref="IGoldenSetEnrichmentService"/>.
/// Integrates with AdvisoryAI for AI-powered enrichment.
/// </summary>
public sealed class GoldenSetEnrichmentService : IGoldenSetEnrichmentService
{
private readonly IUpstreamCommitAnalyzer _commitAnalyzer;
private readonly GoldenSetOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<GoldenSetEnrichmentService> _logger;
public GoldenSetEnrichmentService(
IUpstreamCommitAnalyzer commitAnalyzer,
IOptions<GoldenSetOptions> options,
TimeProvider timeProvider,
ILogger<GoldenSetEnrichmentService> logger)
{
_commitAnalyzer = commitAnalyzer;
_options = options.Value;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public bool IsAvailable => _options.Authoring.EnableAiEnrichment;
/// <inheritdoc />
public async Task<GoldenSetEnrichmentResult> EnrichAsync(
GoldenSetDefinition draft,
GoldenSetEnrichmentContext context,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(draft);
ArgumentNullException.ThrowIfNull(context);
if (!IsAvailable)
{
_logger.LogDebug("AI enrichment is disabled");
return GoldenSetEnrichmentResult.NoChanges(draft, "AI enrichment is disabled");
}
_logger.LogInformation("Starting AI enrichment for {VulnerabilityId}", draft.Id);
var actions = new List<EnrichmentAction>();
var warnings = new List<string>();
var enrichedDraft = draft;
// Step 1: Enrich from commit analysis
if (context.CommitAnalysis is not null)
{
var (commitEnriched, commitActions) = ApplyCommitAnalysis(enrichedDraft, context.CommitAnalysis);
enrichedDraft = commitEnriched;
actions.AddRange(commitActions);
}
// Step 2: Enrich from CWE mappings
if (!context.CweIds.IsEmpty)
{
var (cweEnriched, cweActions) = ApplyCweEnrichment(enrichedDraft, context.CweIds);
enrichedDraft = cweEnriched;
actions.AddRange(cweActions);
}
// Step 3: AI-powered enrichment (if available)
// Note: This is where we would call AdvisoryAI service
// For now, we use heuristic-based enrichment only
if (_options.Authoring.EnableAiEnrichment && context.FixCommits.Length > 0)
{
var (aiEnriched, aiActions, aiWarnings) = await ApplyAiEnrichmentAsync(
enrichedDraft, context, ct);
enrichedDraft = aiEnriched;
actions.AddRange(aiActions);
warnings.AddRange(aiWarnings);
}
// Calculate overall confidence
var overallConfidence = CalculateOverallConfidence(actions);
_logger.LogInformation(
"Enrichment complete for {VulnerabilityId}: {ActionCount} actions, {Confidence:P0} confidence",
draft.Id, actions.Count, overallConfidence);
return new GoldenSetEnrichmentResult
{
EnrichedDraft = enrichedDraft,
ActionsApplied = [.. actions],
OverallConfidence = overallConfidence,
Warnings = [.. warnings]
};
}
private static (GoldenSetDefinition, ImmutableArray<EnrichmentAction>) ApplyCommitAnalysis(
GoldenSetDefinition draft,
CommitAnalysisResult analysis)
{
var actions = new List<EnrichmentAction>();
// Add functions from commit analysis
var existingFunctions = draft.Targets
.Select(t => t.FunctionName)
.ToHashSet(StringComparer.OrdinalIgnoreCase);
var newTargets = new List<VulnerableTarget>(draft.Targets);
foreach (var func in analysis.ModifiedFunctions)
{
if (existingFunctions.Contains(func) || func == "<unknown>")
continue;
var newTarget = new VulnerableTarget
{
FunctionName = func,
Sinks = draft.Targets.FirstOrDefault()?.Sinks ?? []
};
newTargets.Add(newTarget);
existingFunctions.Add(func);
actions.Add(new EnrichmentAction
{
Type = EnrichmentActionTypes.FunctionAdded,
Target = "targets",
Value = func,
Confidence = 0.7m,
Rationale = "Function modified in fix commit"
});
}
// Add constants from commit analysis
for (var i = 0; i < newTargets.Count; i++)
{
var target = newTargets[i];
var existingConstants = target.Constants.ToHashSet(StringComparer.Ordinal);
var additionalConstants = analysis.AddedConstants
.Where(c => !existingConstants.Contains(c))
.Take(5) // Limit to avoid noise
.ToImmutableArray();
if (!additionalConstants.IsEmpty)
{
newTargets[i] = target with
{
Constants = target.Constants.AddRange(additionalConstants)
};
foreach (var constant in additionalConstants)
{
actions.Add(new EnrichmentAction
{
Type = EnrichmentActionTypes.ConstantExtracted,
Target = string.Format(CultureInfo.InvariantCulture, "targets[{0}].constants", i),
Value = constant,
Confidence = 0.6m,
Rationale = "Constant found in fix commit"
});
}
}
}
// Remove placeholder target if we have real ones
if (newTargets.Count > 1 && newTargets.Any(t => t.FunctionName == "<unknown>"))
{
newTargets.RemoveAll(t => t.FunctionName == "<unknown>");
}
var enrichedDraft = draft with
{
Targets = [.. newTargets]
};
return (enrichedDraft, [.. actions]);
}
private static (GoldenSetDefinition, ImmutableArray<EnrichmentAction>) ApplyCweEnrichment(
GoldenSetDefinition draft,
ImmutableArray<string> cweIds)
{
var actions = new List<EnrichmentAction>();
// Get sinks from CWE mappings
var mappedSinks = Extractors.CweToSinkMapper.GetSinksForCwes(cweIds);
if (mappedSinks.IsEmpty)
{
return (draft, []);
}
var enrichedTargets = draft.Targets.Select((target, index) =>
{
var existingSinks = target.Sinks.ToHashSet(StringComparer.Ordinal);
var newSinks = mappedSinks
.Where(s => !existingSinks.Contains(s))
.ToImmutableArray();
if (newSinks.IsEmpty)
{
return target;
}
foreach (var sink in newSinks)
{
actions.Add(new EnrichmentAction
{
Type = EnrichmentActionTypes.SinkAdded,
Target = string.Format(CultureInfo.InvariantCulture, "targets[{0}].sinks", index),
Value = sink,
Confidence = 0.65m,
Rationale = "Mapped from CWE classification"
});
}
return target with
{
Sinks = target.Sinks.AddRange(newSinks)
};
}).ToImmutableArray();
var enrichedDraft = draft with
{
Targets = enrichedTargets
};
return (enrichedDraft, [.. actions]);
}
private async Task<(GoldenSetDefinition, ImmutableArray<EnrichmentAction>, ImmutableArray<string>)> ApplyAiEnrichmentAsync(
GoldenSetDefinition draft,
GoldenSetEnrichmentContext context,
CancellationToken ct)
{
// Note: This is a placeholder for actual AI integration
// In production, this would call the AdvisoryAI service
// For now, return the draft unchanged
_logger.LogDebug(
"AI enrichment placeholder - would call AdvisoryAI with {CommitCount} commits",
context.FixCommits.Length);
await Task.CompletedTask;
return (draft, [], ["AI enrichment not yet integrated with AdvisoryAI service"]);
}
private static decimal CalculateOverallConfidence(List<EnrichmentAction> actions)
{
if (actions.Count == 0)
return 0;
// Weight function-related actions higher
var weightedSum = 0m;
var totalWeight = 0m;
foreach (var action in actions)
{
var weight = action.Type switch
{
EnrichmentActionTypes.FunctionAdded => 2.0m,
EnrichmentActionTypes.FunctionRefined => 2.0m,
EnrichmentActionTypes.SinkAdded => 1.5m,
EnrichmentActionTypes.EdgeSuggested => 1.5m,
EnrichmentActionTypes.ConstantExtracted => 1.0m,
_ => 1.0m
};
weightedSum += action.Confidence * weight;
totalWeight += weight;
}
return totalWeight > 0 ? Math.Round(weightedSum / totalWeight, 2) : 0;
}
}

View File

@@ -0,0 +1,421 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Immutable;
using System.Globalization;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.BinaryIndex.GoldenSet.Authoring.Extractors;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring;
/// <summary>
/// Orchestrates golden set extraction from multiple sources.
/// </summary>
public sealed class GoldenSetExtractor : IGoldenSetExtractor
{
private readonly IEnumerable<IGoldenSetSourceExtractor> _sourceExtractors;
private readonly ISinkRegistry _sinkRegistry;
private readonly IOptions<GoldenSetOptions> _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<GoldenSetExtractor> _logger;
public GoldenSetExtractor(
IEnumerable<IGoldenSetSourceExtractor> sourceExtractors,
ISinkRegistry sinkRegistry,
IOptions<GoldenSetOptions> options,
TimeProvider timeProvider,
ILogger<GoldenSetExtractor> logger)
{
_sourceExtractors = sourceExtractors;
_sinkRegistry = sinkRegistry;
_options = options;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public async Task<GoldenSetExtractionResult> ExtractAsync(
string vulnerabilityId,
string? component = null,
ExtractionOptions? options = null,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(vulnerabilityId);
options ??= new ExtractionOptions();
_logger.LogInformation(
"Starting golden set extraction for {VulnerabilityId}",
vulnerabilityId);
var sources = new List<ExtractionSource>();
var sourceResults = new List<SourceExtractionResult>();
var warnings = new List<string>();
// Extract from all applicable sources
var applicableExtractors = _sourceExtractors
.Where(e => e.Supports(vulnerabilityId))
.Where(e => options.Sources.Length == 0 || options.Sources.Contains(e.SourceType, StringComparer.OrdinalIgnoreCase));
foreach (var extractor in applicableExtractors)
{
try
{
_logger.LogDebug(
"Extracting from source {SourceType} for {VulnerabilityId}",
extractor.SourceType,
vulnerabilityId);
var result = await extractor.ExtractAsync(vulnerabilityId, ct);
if (result.Found)
{
sourceResults.Add(result);
sources.Add(result.Source);
}
warnings.AddRange(result.Warnings);
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
_logger.LogWarning(
ex,
"Failed to extract from {SourceType} for {VulnerabilityId}",
extractor.SourceType,
vulnerabilityId);
warnings.Add(string.Format(
CultureInfo.InvariantCulture,
"Failed to extract from {0}: {1}",
extractor.SourceType,
ex.Message));
}
}
if (sourceResults.Count == 0)
{
_logger.LogWarning(
"No data found for {VulnerabilityId} from any source",
vulnerabilityId);
return CreateEmptyResult(vulnerabilityId, component ?? "unknown", warnings);
}
// Merge results and create draft
var draft = CreateDraftFromResults(vulnerabilityId, component, sourceResults);
var confidence = CalculateConfidence(draft, sourceResults);
var suggestions = GenerateSuggestions(draft, sourceResults);
_logger.LogInformation(
"Extraction complete for {VulnerabilityId}: {TargetCount} targets, {Confidence:P0} confidence",
vulnerabilityId,
draft.Targets.Length,
confidence.Overall);
return new GoldenSetExtractionResult
{
Draft = draft,
Confidence = confidence,
Sources = [.. sources],
Suggestions = suggestions,
Warnings = [.. warnings]
};
}
/// <inheritdoc />
public async Task<GoldenSetExtractionResult> EnrichAsync(
GoldenSetDefinition draft,
EnrichmentOptions? options = null,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(draft);
// For now, just return the draft with some basic enrichment
// AI enrichment will be added in a separate service
options ??= new EnrichmentOptions();
_logger.LogInformation(
"Enriching golden set {VulnerabilityId}",
draft.Id);
// Add any missing sinks based on existing function hints
var enrichedTargets = draft.Targets
.Select(t => EnrichTarget(t))
.ToImmutableArray();
var enrichedDraft = draft with
{
Targets = enrichedTargets
};
var confidence = CalculateConfidence(enrichedDraft, []);
return new GoldenSetExtractionResult
{
Draft = enrichedDraft,
Confidence = confidence,
Sources = [],
Suggestions = [],
Warnings = []
};
}
private VulnerableTarget EnrichTarget(VulnerableTarget target)
{
// If no sinks, try to suggest based on function name patterns
if (target.Sinks.Length == 0)
{
var suggestedSinks = GuessSinksFromFunction(target.FunctionName);
if (suggestedSinks.Length > 0)
{
return target with { Sinks = suggestedSinks };
}
}
return target;
}
private ImmutableArray<string> GuessSinksFromFunction(string functionName)
{
// Common patterns that suggest certain sinks
var patterns = new Dictionary<string, string[]>(StringComparer.OrdinalIgnoreCase)
{
["parse"] = ["memcpy", "strcpy"],
["copy"] = ["memcpy", "strcpy"],
["decode"] = ["memcpy"],
["read"] = ["memcpy", "fread"],
["write"] = ["memcpy", "fwrite"],
["alloc"] = ["malloc", "realloc"],
["free"] = ["free"],
["exec"] = ["system", "exec"],
["sql"] = ["sqlite3_exec", "mysql_query"],
["query"] = ["sqlite3_exec", "mysql_query"],
["open"] = ["fopen", "open"],
};
foreach (var (pattern, sinks) in patterns)
{
if (functionName.Contains(pattern, StringComparison.OrdinalIgnoreCase))
{
return [.. sinks];
}
}
return [];
}
private GoldenSetDefinition CreateDraftFromResults(
string vulnerabilityId,
string? component,
List<SourceExtractionResult> results)
{
// Merge component from results if not specified
var mergedComponent = component ?? results
.Select(r => r.Component)
.FirstOrDefault(c => !string.IsNullOrEmpty(c)) ?? "unknown";
// Merge all function hints
var allHints = results
.SelectMany(r => r.FunctionHints)
.GroupBy(h => h.Name, StringComparer.OrdinalIgnoreCase)
.Select(g => g.OrderByDescending(h => h.Confidence).First())
.OrderByDescending(h => h.Confidence)
.ToList();
// Merge all sinks from CWE mappings
var allCweIds = results.SelectMany(r => r.CweIds).Distinct().ToList();
var mappedSinks = CweToSinkMapper.GetSinksForCwes(allCweIds);
// Create targets from function hints
var targets = allHints
.Take(10) // Limit to top 10 functions
.Select(h => new VulnerableTarget
{
FunctionName = h.Name,
Sinks = mappedSinks,
SourceFile = h.SourceFile
})
.ToImmutableArray();
// If no function hints, create a placeholder target
if (targets.Length == 0)
{
targets = [new VulnerableTarget
{
FunctionName = "<unknown>",
Sinks = mappedSinks
}];
}
// Get severity from results
var severity = results
.Select(r => r.Severity)
.FirstOrDefault(s => !string.IsNullOrEmpty(s));
var tags = new List<string>();
if (!string.IsNullOrEmpty(severity))
{
tags.Add(severity.ToLowerInvariant());
}
tags.AddRange(CweToSinkMapper.GetCategoriesForCwes(allCweIds));
return new GoldenSetDefinition
{
Id = vulnerabilityId,
Component = mergedComponent,
Targets = targets,
Metadata = new GoldenSetMetadata
{
AuthorId = "extraction-service",
CreatedAt = _timeProvider.GetUtcNow(),
SourceRef = string.Join(", ", results.Select(r => r.Source.Reference)),
Tags = [.. tags.Distinct().OrderBy(t => t, StringComparer.Ordinal)]
}
};
}
private static ExtractionConfidence CalculateConfidence(
GoldenSetDefinition draft,
List<SourceExtractionResult> results)
{
// Function identification confidence
var funcConfidence = draft.Targets
.Where(t => t.FunctionName != "<unknown>")
.Select(t => 1.0m)
.DefaultIfEmpty(0m)
.Average();
// Edge extraction confidence (none extracted yet)
var edgeConfidence = draft.Targets
.Where(t => t.Edges.Length > 0)
.Select(t => 0.8m)
.DefaultIfEmpty(0m)
.Average();
// Sink mapping confidence
var sinkConfidence = draft.Targets
.Where(t => t.Sinks.Length > 0)
.Select(t => 0.7m)
.DefaultIfEmpty(0m)
.Average();
// Boost confidence if we have multiple sources
var sourceBonus = results.Count > 1 ? 0.1m : 0m;
return ExtractionConfidence.FromComponents(
Math.Min(1.0m, (decimal)funcConfidence + sourceBonus),
(decimal)edgeConfidence,
(decimal)sinkConfidence);
}
private static ImmutableArray<ExtractionSuggestion> GenerateSuggestions(
GoldenSetDefinition draft,
List<SourceExtractionResult> results)
{
var suggestions = new List<ExtractionSuggestion>();
// Suggest adding edges if none present
if (draft.Targets.All(t => t.Edges.Length == 0))
{
suggestions.Add(new ExtractionSuggestion
{
Field = "targets[*].edges",
CurrentValue = null,
SuggestedValue = "Add basic block edges from CFG analysis",
Confidence = 0.9m,
Rationale = "No edges defined. Consider adding control flow edges from binary analysis."
});
}
// Suggest reviewing unknown functions
if (draft.Targets.Any(t => t.FunctionName == "<unknown>"))
{
suggestions.Add(new ExtractionSuggestion
{
Field = "targets[*].function_name",
CurrentValue = "<unknown>",
SuggestedValue = "Identify specific vulnerable function",
Confidence = 0.95m,
Rationale = "Could not identify vulnerable function from advisory. Manual review required."
});
}
// Suggest adding witness if none present
if (draft.Witness is null)
{
suggestions.Add(new ExtractionSuggestion
{
Field = "witness",
CurrentValue = null,
SuggestedValue = "Add witness input for reproducibility",
Confidence = 0.7m,
Rationale = "No witness input defined. Adding reproduction steps improves golden set quality."
});
}
// Suggest commit analysis if commit refs found
var commitRefs = results.SelectMany(r => r.CommitReferences).ToList();
if (commitRefs.Count > 0)
{
suggestions.Add(new ExtractionSuggestion
{
Field = "targets",
CurrentValue = null,
SuggestedValue = string.Format(
CultureInfo.InvariantCulture,
"Analyze {0} fix commit(s) for more precise targets",
commitRefs.Count),
Confidence = 0.8m,
Rationale = "Fix commits are available. AI analysis can extract precise function names and edge patterns.",
Source = "upstream_commit"
});
}
return [.. suggestions];
}
private GoldenSetExtractionResult CreateEmptyResult(
string vulnerabilityId,
string component,
List<string> warnings)
{
var draft = new GoldenSetDefinition
{
Id = vulnerabilityId,
Component = component,
Targets = [new VulnerableTarget { FunctionName = "<unknown>" }],
Metadata = new GoldenSetMetadata
{
AuthorId = "extraction-service",
CreatedAt = _timeProvider.GetUtcNow(),
SourceRef = "none"
}
};
warnings.Add(string.Format(
CultureInfo.InvariantCulture,
"No data found for {0}. Manual authoring required.",
vulnerabilityId));
return new GoldenSetExtractionResult
{
Draft = draft,
Confidence = ExtractionConfidence.Zero,
Sources = [],
Suggestions =
[
new ExtractionSuggestion
{
Field = "targets",
CurrentValue = null,
SuggestedValue = "Manual entry required",
Confidence = 0.0m,
Rationale = "No automated extraction was possible. Please manually define the vulnerable targets."
}
],
Warnings = [.. warnings]
};
}
}

View File

@@ -0,0 +1,322 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Frozen;
using System.Collections.Immutable;
using System.Globalization;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring;
/// <summary>
/// Implementation of the golden set review workflow.
/// </summary>
public sealed class GoldenSetReviewService : IGoldenSetReviewService
{
private readonly IGoldenSetStore _store;
private readonly IGoldenSetValidator _validator;
private readonly TimeProvider _timeProvider;
private readonly ILogger<GoldenSetReviewService> _logger;
// Valid state transitions
private static readonly FrozenDictionary<GoldenSetStatus, FrozenSet<GoldenSetStatus>> ValidTransitions =
new Dictionary<GoldenSetStatus, FrozenSet<GoldenSetStatus>>
{
[GoldenSetStatus.Draft] = new HashSet<GoldenSetStatus>
{
GoldenSetStatus.InReview // Submit for review
}.ToFrozenSet(),
[GoldenSetStatus.InReview] = new HashSet<GoldenSetStatus>
{
GoldenSetStatus.Draft, // Request changes
GoldenSetStatus.Approved // Approve
}.ToFrozenSet(),
[GoldenSetStatus.Approved] = new HashSet<GoldenSetStatus>
{
GoldenSetStatus.Deprecated // Deprecate
}.ToFrozenSet(),
[GoldenSetStatus.Deprecated] = new HashSet<GoldenSetStatus>
{
GoldenSetStatus.Archived // Archive
}.ToFrozenSet(),
[GoldenSetStatus.Archived] = new HashSet<GoldenSetStatus>().ToFrozenSet() // Terminal state
}.ToFrozenDictionary();
public GoldenSetReviewService(
IGoldenSetStore store,
IGoldenSetValidator validator,
TimeProvider timeProvider,
ILogger<GoldenSetReviewService> logger)
{
_store = store;
_validator = validator;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public async Task<ReviewSubmissionResult> SubmitForReviewAsync(
string goldenSetId,
string submitterId,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(goldenSetId);
ArgumentException.ThrowIfNullOrWhiteSpace(submitterId);
_logger.LogInformation(
"Submitting golden set {GoldenSetId} for review by {SubmitterId}",
goldenSetId,
submitterId);
// Get current golden set
var goldenSet = await _store.GetAsync(goldenSetId, ct);
if (goldenSet is null)
{
return ReviewSubmissionResult.Failed(
string.Format(CultureInfo.InvariantCulture, "Golden set {0} not found", goldenSetId));
}
// Check current status allows submission
if (!IsValidTransition(goldenSet.Status, GoldenSetStatus.InReview))
{
return ReviewSubmissionResult.Failed(
string.Format(
CultureInfo.InvariantCulture,
"Cannot submit for review from status {0}. Must be in Draft status.",
goldenSet.Status));
}
// Validate the golden set before submission
var validationResult = await _validator.ValidateAsync(goldenSet.Definition, ct: ct);
if (!validationResult.IsValid)
{
return ReviewSubmissionResult.Failed(
"Validation failed. Please fix errors before submitting.",
[.. validationResult.Errors.Select(e => e.Message)]);
}
// Update status
var updateResult = await _store.UpdateStatusAsync(
goldenSetId,
GoldenSetStatus.InReview,
submitterId,
"Submitted for review",
ct);
if (!updateResult.Success)
{
return ReviewSubmissionResult.Failed(updateResult.Error ?? "Failed to update status");
}
_logger.LogInformation(
"Golden set {GoldenSetId} submitted for review",
goldenSetId);
return ReviewSubmissionResult.Successful(GoldenSetStatus.InReview);
}
/// <inheritdoc />
public async Task<ReviewDecisionResult> ApproveAsync(
string goldenSetId,
string reviewerId,
string? comments = null,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(goldenSetId);
ArgumentException.ThrowIfNullOrWhiteSpace(reviewerId);
_logger.LogInformation(
"Approving golden set {GoldenSetId} by {ReviewerId}",
goldenSetId,
reviewerId);
// Get current golden set
var goldenSet = await _store.GetAsync(goldenSetId, ct);
if (goldenSet is null)
{
return ReviewDecisionResult.Failed(
string.Format(CultureInfo.InvariantCulture, "Golden set {0} not found", goldenSetId));
}
// Check current status allows approval
if (!IsValidTransition(goldenSet.Status, GoldenSetStatus.Approved))
{
return ReviewDecisionResult.Failed(
string.Format(
CultureInfo.InvariantCulture,
"Cannot approve from status {0}. Must be in InReview status.",
goldenSet.Status));
}
// Update the definition with reviewer info
var reviewedDefinition = goldenSet.Definition with
{
Metadata = goldenSet.Definition.Metadata with
{
ReviewedBy = reviewerId,
ReviewedAt = _timeProvider.GetUtcNow()
}
};
// Store updated definition
var storeResult = await _store.StoreAsync(reviewedDefinition, goldenSet.Status, ct);
if (!storeResult.Success)
{
return ReviewDecisionResult.Failed(storeResult.Error ?? "Failed to update definition");
}
// Update status
var updateResult = await _store.UpdateStatusAsync(
goldenSetId,
GoldenSetStatus.Approved,
reviewerId,
comments ?? "Approved",
ct);
if (!updateResult.Success)
{
return ReviewDecisionResult.Failed(updateResult.Error ?? "Failed to update status");
}
_logger.LogInformation(
"Golden set {GoldenSetId} approved by {ReviewerId}",
goldenSetId,
reviewerId);
return ReviewDecisionResult.Successful(GoldenSetStatus.Approved);
}
/// <inheritdoc />
public async Task<ReviewDecisionResult> RequestChangesAsync(
string goldenSetId,
string reviewerId,
string comments,
ImmutableArray<ChangeRequest> changes,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(goldenSetId);
ArgumentException.ThrowIfNullOrWhiteSpace(reviewerId);
ArgumentException.ThrowIfNullOrWhiteSpace(comments);
_logger.LogInformation(
"Requesting changes for golden set {GoldenSetId} by {ReviewerId}",
goldenSetId,
reviewerId);
// Get current golden set
var goldenSet = await _store.GetAsync(goldenSetId, ct);
if (goldenSet is null)
{
return ReviewDecisionResult.Failed(
string.Format(CultureInfo.InvariantCulture, "Golden set {0} not found", goldenSetId));
}
// Check current status allows requesting changes
if (!IsValidTransition(goldenSet.Status, GoldenSetStatus.Draft))
{
return ReviewDecisionResult.Failed(
string.Format(
CultureInfo.InvariantCulture,
"Cannot request changes from status {0}. Must be in InReview status.",
goldenSet.Status));
}
// Format comment with change requests
var fullComment = FormatChangesComment(comments, changes);
// Update status back to draft
var updateResult = await _store.UpdateStatusAsync(
goldenSetId,
GoldenSetStatus.Draft,
reviewerId,
fullComment,
ct);
if (!updateResult.Success)
{
return ReviewDecisionResult.Failed(updateResult.Error ?? "Failed to update status");
}
_logger.LogInformation(
"Changes requested for golden set {GoldenSetId}. {ChangeCount} specific changes.",
goldenSetId,
changes.Length);
return ReviewDecisionResult.Successful(GoldenSetStatus.Draft);
}
/// <inheritdoc />
public async Task<ImmutableArray<ReviewHistoryEntry>> GetHistoryAsync(
string goldenSetId,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(goldenSetId);
// Get audit log from store
var auditLog = await _store.GetAuditLogAsync(goldenSetId, ct);
// Convert audit log entries to review history entries
var history = auditLog
.Select(entry => new ReviewHistoryEntry
{
Action = MapOperationToAction(entry.Operation),
ActorId = entry.ActorId,
Timestamp = entry.Timestamp,
OldStatus = entry.OldStatus,
NewStatus = entry.NewStatus,
Comments = entry.Comment
})
.ToImmutableArray();
return history;
}
/// <inheritdoc />
public bool IsValidTransition(GoldenSetStatus currentStatus, GoldenSetStatus targetStatus)
{
if (ValidTransitions.TryGetValue(currentStatus, out var validTargets))
{
return validTargets.Contains(targetStatus);
}
return false;
}
private static string FormatChangesComment(string comments, ImmutableArray<ChangeRequest> changes)
{
if (changes.Length == 0)
{
return comments;
}
var changeList = string.Join(
Environment.NewLine,
changes.Select(c => string.Format(
CultureInfo.InvariantCulture,
"- [{0}]: {1}",
c.Field,
c.Comment)));
return string.Format(
CultureInfo.InvariantCulture,
"{0}{1}{1}Requested changes:{1}{2}",
comments,
Environment.NewLine,
changeList);
}
private static string MapOperationToAction(string operation)
{
return operation.ToLowerInvariant() switch
{
"created" or "create" => ReviewActions.Created,
"updated" or "update" => ReviewActions.Updated,
"status_change" => ReviewActions.Updated,
_ => operation.ToLowerInvariant()
};
}
}

View File

@@ -0,0 +1,235 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring;
/// <summary>
/// Service for AI-assisted enrichment of golden sets.
/// </summary>
public interface IGoldenSetEnrichmentService
{
/// <summary>
/// Enriches a draft golden set using AI analysis.
/// </summary>
/// <param name="draft">The draft golden set to enrich.</param>
/// <param name="context">Context for enrichment (commits, advisory text, etc.).</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Enrichment result with updated draft.</returns>
Task<GoldenSetEnrichmentResult> EnrichAsync(
GoldenSetDefinition draft,
GoldenSetEnrichmentContext context,
CancellationToken ct = default);
/// <summary>
/// Checks if AI enrichment is available.
/// </summary>
bool IsAvailable { get; }
}
/// <summary>
/// Context provided to the AI for enrichment.
/// </summary>
public sealed record GoldenSetEnrichmentContext
{
/// <summary>
/// Fix commits to analyze.
/// </summary>
public ImmutableArray<AnalyzedCommit> FixCommits { get; init; } = [];
/// <summary>
/// Related CVEs.
/// </summary>
public ImmutableArray<string> RelatedCves { get; init; } = [];
/// <summary>
/// Advisory description text.
/// </summary>
public string? AdvisoryText { get; init; }
/// <summary>
/// Upstream source code snippets (if available).
/// </summary>
public string? UpstreamSourceCode { get; init; }
/// <summary>
/// CWE IDs associated with the vulnerability.
/// </summary>
public ImmutableArray<string> CweIds { get; init; } = [];
/// <summary>
/// Commit analysis result.
/// </summary>
public CommitAnalysisResult? CommitAnalysis { get; init; }
}
/// <summary>
/// Result of AI enrichment.
/// </summary>
public sealed record GoldenSetEnrichmentResult
{
/// <summary>
/// The enriched draft golden set.
/// </summary>
public required GoldenSetDefinition EnrichedDraft { get; init; }
/// <summary>
/// Actions applied during enrichment.
/// </summary>
public ImmutableArray<EnrichmentAction> ActionsApplied { get; init; } = [];
/// <summary>
/// Overall confidence in the enrichment.
/// </summary>
public decimal OverallConfidence { get; init; }
/// <summary>
/// AI's rationale for the enrichments.
/// </summary>
public string? AiRationale { get; init; }
/// <summary>
/// Warnings from the enrichment process.
/// </summary>
public ImmutableArray<string> Warnings { get; init; } = [];
/// <summary>
/// Creates a result with no changes.
/// </summary>
public static GoldenSetEnrichmentResult NoChanges(GoldenSetDefinition draft, string reason)
=> new()
{
EnrichedDraft = draft,
OverallConfidence = 0,
AiRationale = reason
};
}
/// <summary>
/// An action taken during enrichment.
/// </summary>
public sealed record EnrichmentAction
{
/// <summary>
/// Type of action (function_added, edge_suggested, sink_refined, constant_extracted).
/// </summary>
public required string Type { get; init; }
/// <summary>
/// Target of the action (field path or element).
/// </summary>
public required string Target { get; init; }
/// <summary>
/// Value set or suggested.
/// </summary>
public required string Value { get; init; }
/// <summary>
/// Confidence in this action (0.0 - 1.0).
/// </summary>
public required decimal Confidence { get; init; }
/// <summary>
/// Rationale for the action.
/// </summary>
public string? Rationale { get; init; }
}
/// <summary>
/// Known enrichment action types.
/// </summary>
public static class EnrichmentActionTypes
{
public const string FunctionAdded = "function_added";
public const string FunctionRefined = "function_refined";
public const string EdgeSuggested = "edge_suggested";
public const string SinkAdded = "sink_added";
public const string SinkRefined = "sink_refined";
public const string ConstantExtracted = "constant_extracted";
public const string WitnessHintAdded = "witness_hint_added";
public const string TaintInvariantSet = "taint_invariant_set";
}
/// <summary>
/// AI enrichment prompt templates.
/// </summary>
public static class EnrichmentPrompts
{
/// <summary>
/// System prompt for golden set enrichment.
/// </summary>
public const string SystemPrompt = """
You are a security vulnerability analyst specializing in binary analysis and golden set creation for vulnerability detection.
Your task is to analyze vulnerability information and identify specific code-level targets that can be used to detect the vulnerability in compiled binaries.
Focus on:
1. Identifying vulnerable functions from fix commits
2. Extracting specific constants, magic values, or buffer sizes from vulnerable code
3. Suggesting basic block edge patterns when fixes add bounds checks or branches
4. Identifying sink functions that enable exploitation
Be precise and conservative - only suggest targets with high confidence.
""";
/// <summary>
/// User prompt template for enrichment.
/// </summary>
public const string UserPromptTemplate = """
Analyze vulnerability {cve_id} in {component} to identify specific code-level targets.
## Advisory Information
{advisory_text}
## CWE Classifications
{cwe_ids}
## Fix Commits Analysis
Modified functions: {modified_functions}
Added conditions: {added_conditions}
Added constants: {added_constants}
## Current Draft Golden Set
{current_draft_yaml}
## Task
1. Identify the vulnerable function(s) from the fix commits
2. Extract specific constants/magic values that appear in the vulnerable code
3. Suggest basic block edge patterns if the fix adds bounds checks or branches
4. Identify the sink function(s) that enable exploitation
Respond with a JSON object:
```json
{
"functions": [
{
"name": "function_name",
"confidence": 0.95,
"rationale": "Modified in fix commit abc123"
}
],
"constants": [
{
"value": "0x400",
"confidence": 0.8,
"rationale": "Buffer size constant in bounds check"
}
],
"edge_suggestions": [
{
"pattern": "bounds_check_before_memcpy",
"confidence": 0.7,
"rationale": "Fix adds size validation before memory copy"
}
],
"sinks": [
{
"name": "memcpy",
"confidence": 0.9,
"rationale": "Called without size validation in vulnerable version"
}
]
}
```
""";
}

View File

@@ -0,0 +1,266 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring;
/// <summary>
/// Extracts golden set drafts from vulnerability advisories and upstream sources.
/// </summary>
public interface IGoldenSetExtractor
{
/// <summary>
/// Extracts a draft golden set from a CVE/advisory.
/// </summary>
/// <param name="vulnerabilityId">The vulnerability ID (CVE-*, GHSA-*, etc.).</param>
/// <param name="component">The component name (optional - can be auto-detected).</param>
/// <param name="options">Extraction options.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Extraction result with draft and metadata.</returns>
Task<GoldenSetExtractionResult> ExtractAsync(
string vulnerabilityId,
string? component = null,
ExtractionOptions? options = null,
CancellationToken ct = default);
/// <summary>
/// Enriches an existing draft with additional sources.
/// </summary>
/// <param name="draft">The existing draft to enrich.</param>
/// <param name="options">Enrichment options.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Enriched extraction result.</returns>
Task<GoldenSetExtractionResult> EnrichAsync(
GoldenSetDefinition draft,
EnrichmentOptions? options = null,
CancellationToken ct = default);
}
/// <summary>
/// Result of a golden set extraction operation.
/// </summary>
public sealed record GoldenSetExtractionResult
{
/// <summary>
/// The draft golden set definition.
/// </summary>
public required GoldenSetDefinition Draft { get; init; }
/// <summary>
/// Confidence scores for different aspects of the extraction.
/// </summary>
public required ExtractionConfidence Confidence { get; init; }
/// <summary>
/// Sources used during extraction.
/// </summary>
public ImmutableArray<ExtractionSource> Sources { get; init; } = [];
/// <summary>
/// Suggestions for improving the golden set.
/// </summary>
public ImmutableArray<ExtractionSuggestion> Suggestions { get; init; } = [];
/// <summary>
/// Warnings encountered during extraction.
/// </summary>
public ImmutableArray<string> Warnings { get; init; } = [];
/// <summary>
/// Whether extraction was successful (at least partial data found).
/// </summary>
public bool IsSuccess => Confidence.Overall > 0;
}
/// <summary>
/// Confidence scores for extraction quality.
/// </summary>
public sealed record ExtractionConfidence
{
/// <summary>
/// Overall confidence score (0.0 - 1.0).
/// </summary>
public required decimal Overall { get; init; }
/// <summary>
/// Confidence in function identification.
/// </summary>
public required decimal FunctionIdentification { get; init; }
/// <summary>
/// Confidence in edge extraction.
/// </summary>
public required decimal EdgeExtraction { get; init; }
/// <summary>
/// Confidence in sink mapping.
/// </summary>
public required decimal SinkMapping { get; init; }
/// <summary>
/// Creates a zero confidence result.
/// </summary>
public static ExtractionConfidence Zero => new()
{
Overall = 0,
FunctionIdentification = 0,
EdgeExtraction = 0,
SinkMapping = 0
};
/// <summary>
/// Creates a confidence result from component scores.
/// </summary>
public static ExtractionConfidence FromComponents(
decimal functionId,
decimal edgeExtraction,
decimal sinkMapping)
{
// Weighted average: functions most important, then sinks, then edges
var overall = (functionId * 0.5m) + (sinkMapping * 0.3m) + (edgeExtraction * 0.2m);
return new ExtractionConfidence
{
Overall = Math.Round(overall, 2),
FunctionIdentification = functionId,
EdgeExtraction = edgeExtraction,
SinkMapping = sinkMapping
};
}
}
/// <summary>
/// Information about a data source used during extraction.
/// </summary>
public sealed record ExtractionSource
{
/// <summary>
/// Source type (nvd, osv, ghsa, upstream_commit).
/// </summary>
public required string Type { get; init; }
/// <summary>
/// Reference URL or identifier.
/// </summary>
public required string Reference { get; init; }
/// <summary>
/// When the source was fetched.
/// </summary>
public required DateTimeOffset FetchedAt { get; init; }
/// <summary>
/// Optional version/etag of the source data.
/// </summary>
public string? Version { get; init; }
}
/// <summary>
/// A suggestion for improving a golden set.
/// </summary>
public sealed record ExtractionSuggestion
{
/// <summary>
/// Field path being suggested (e.g., "targets[0].sinks").
/// </summary>
public required string Field { get; init; }
/// <summary>
/// Current value (if any).
/// </summary>
public string? CurrentValue { get; init; }
/// <summary>
/// Suggested value.
/// </summary>
public required string SuggestedValue { get; init; }
/// <summary>
/// Confidence in this suggestion (0.0 - 1.0).
/// </summary>
public required decimal Confidence { get; init; }
/// <summary>
/// Human-readable rationale for the suggestion.
/// </summary>
public required string Rationale { get; init; }
/// <summary>
/// Source of the suggestion (ai, nvd, osv, etc.).
/// </summary>
public string? Source { get; init; }
}
/// <summary>
/// Options for golden set extraction.
/// </summary>
public sealed record ExtractionOptions
{
/// <summary>
/// Include analysis of upstream fix commits.
/// </summary>
public bool IncludeUpstreamCommits { get; init; } = true;
/// <summary>
/// Include related CVEs in the analysis.
/// </summary>
public bool IncludeRelatedCves { get; init; } = true;
/// <summary>
/// Use AI for enrichment.
/// </summary>
public bool UseAiEnrichment { get; init; } = true;
/// <summary>
/// Maximum number of upstream commits to analyze.
/// </summary>
public int MaxUpstreamCommits { get; init; } = 5;
/// <summary>
/// Sources to use for extraction (empty = all available).
/// </summary>
public ImmutableArray<string> Sources { get; init; } = [];
/// <summary>
/// Offline mode - skip remote fetches, use cached data only.
/// </summary>
public bool OfflineMode { get; init; }
}
/// <summary>
/// Options for enriching an existing draft.
/// </summary>
public sealed record EnrichmentOptions
{
/// <summary>
/// Analyze commit diffs to extract function changes.
/// </summary>
public bool AnalyzeCommitDiffs { get; init; } = true;
/// <summary>
/// Extract witness hints from test cases.
/// </summary>
public bool ExtractTestCases { get; init; } = true;
/// <summary>
/// Suggest edge patterns from control flow changes.
/// </summary>
public bool SuggestEdgePatterns { get; init; } = true;
/// <summary>
/// Extract constants from vulnerable code.
/// </summary>
public bool ExtractConstants { get; init; } = true;
}
/// <summary>
/// Known source types for extraction.
/// </summary>
public static class ExtractionSourceTypes
{
public const string Nvd = "nvd";
public const string Osv = "osv";
public const string Ghsa = "ghsa";
public const string UpstreamCommit = "upstream_commit";
public const string Ai = "ai";
public const string Manual = "manual";
}

View File

@@ -0,0 +1,224 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring;
/// <summary>
/// Service for managing the golden set review workflow.
/// </summary>
public interface IGoldenSetReviewService
{
/// <summary>
/// Submits a golden set for review.
/// </summary>
/// <param name="goldenSetId">The golden set ID.</param>
/// <param name="submitterId">The submitter's ID.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Submission result.</returns>
Task<ReviewSubmissionResult> SubmitForReviewAsync(
string goldenSetId,
string submitterId,
CancellationToken ct = default);
/// <summary>
/// Approves a golden set.
/// </summary>
/// <param name="goldenSetId">The golden set ID.</param>
/// <param name="reviewerId">The reviewer's ID.</param>
/// <param name="comments">Optional approval comments.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Decision result.</returns>
Task<ReviewDecisionResult> ApproveAsync(
string goldenSetId,
string reviewerId,
string? comments = null,
CancellationToken ct = default);
/// <summary>
/// Requests changes to a golden set.
/// </summary>
/// <param name="goldenSetId">The golden set ID.</param>
/// <param name="reviewerId">The reviewer's ID.</param>
/// <param name="comments">Required comments explaining changes needed.</param>
/// <param name="changes">Specific change requests.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Decision result.</returns>
Task<ReviewDecisionResult> RequestChangesAsync(
string goldenSetId,
string reviewerId,
string comments,
ImmutableArray<ChangeRequest> changes,
CancellationToken ct = default);
/// <summary>
/// Gets the review history for a golden set.
/// </summary>
/// <param name="goldenSetId">The golden set ID.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Array of history entries.</returns>
Task<ImmutableArray<ReviewHistoryEntry>> GetHistoryAsync(
string goldenSetId,
CancellationToken ct = default);
/// <summary>
/// Checks if a transition is valid from the current state.
/// </summary>
/// <param name="currentStatus">The current status.</param>
/// <param name="targetStatus">The target status.</param>
/// <returns>True if transition is valid; otherwise, false.</returns>
bool IsValidTransition(GoldenSetStatus currentStatus, GoldenSetStatus targetStatus);
}
/// <summary>
/// Result of submitting a golden set for review.
/// </summary>
public sealed record ReviewSubmissionResult
{
/// <summary>
/// Whether submission succeeded.
/// </summary>
public required bool Success { get; init; }
/// <summary>
/// The new status after submission.
/// </summary>
public GoldenSetStatus? NewStatus { get; init; }
/// <summary>
/// Error message if submission failed.
/// </summary>
public string? Error { get; init; }
/// <summary>
/// Validation errors that prevented submission.
/// </summary>
public ImmutableArray<string> ValidationErrors { get; init; } = [];
/// <summary>
/// Creates a successful result.
/// </summary>
public static ReviewSubmissionResult Successful(GoldenSetStatus newStatus)
=> new() { Success = true, NewStatus = newStatus };
/// <summary>
/// Creates a failed result.
/// </summary>
public static ReviewSubmissionResult Failed(string error, ImmutableArray<string> validationErrors = default)
=> new() { Success = false, Error = error, ValidationErrors = validationErrors };
}
/// <summary>
/// Result of a review decision (approve/request changes).
/// </summary>
public sealed record ReviewDecisionResult
{
/// <summary>
/// Whether the decision was applied.
/// </summary>
public required bool Success { get; init; }
/// <summary>
/// The new status after the decision.
/// </summary>
public GoldenSetStatus? NewStatus { get; init; }
/// <summary>
/// Error message if decision failed.
/// </summary>
public string? Error { get; init; }
/// <summary>
/// Creates a successful result.
/// </summary>
public static ReviewDecisionResult Successful(GoldenSetStatus newStatus)
=> new() { Success = true, NewStatus = newStatus };
/// <summary>
/// Creates a failed result.
/// </summary>
public static ReviewDecisionResult Failed(string error)
=> new() { Success = false, Error = error };
}
/// <summary>
/// A specific change request from a reviewer.
/// </summary>
public sealed record ChangeRequest
{
/// <summary>
/// Field path that needs changes.
/// </summary>
public required string Field { get; init; }
/// <summary>
/// Current value of the field.
/// </summary>
public string? CurrentValue { get; init; }
/// <summary>
/// Suggested new value.
/// </summary>
public string? SuggestedValue { get; init; }
/// <summary>
/// Comment explaining the requested change.
/// </summary>
public required string Comment { get; init; }
}
/// <summary>
/// An entry in the review history.
/// </summary>
public sealed record ReviewHistoryEntry
{
/// <summary>
/// Action taken (submitted, approved, changes_requested, etc.).
/// </summary>
public required string Action { get; init; }
/// <summary>
/// Who performed the action.
/// </summary>
public required string ActorId { get; init; }
/// <summary>
/// When the action occurred.
/// </summary>
public required DateTimeOffset Timestamp { get; init; }
/// <summary>
/// Status before the action.
/// </summary>
public GoldenSetStatus? OldStatus { get; init; }
/// <summary>
/// Status after the action.
/// </summary>
public GoldenSetStatus? NewStatus { get; init; }
/// <summary>
/// Comments associated with the action.
/// </summary>
public string? Comments { get; init; }
/// <summary>
/// Change requests (if action was changes_requested).
/// </summary>
public ImmutableArray<ChangeRequest> ChangeRequests { get; init; } = [];
}
/// <summary>
/// Known review actions.
/// </summary>
public static class ReviewActions
{
public const string Created = "created";
public const string Updated = "updated";
public const string Submitted = "submitted";
public const string Approved = "approved";
public const string ChangesRequested = "changes_requested";
public const string Published = "published";
public const string Deprecated = "deprecated";
public const string Archived = "archived";
}

View File

@@ -0,0 +1,519 @@
// Licensed under AGPL-3.0-or-later. Copyright (C) 2026 StellaOps Contributors.
using System.Collections.Immutable;
using System.Globalization;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.GoldenSet.Authoring;
/// <summary>
/// Analyzes upstream fix commits to extract vulnerability information.
/// </summary>
public interface IUpstreamCommitAnalyzer
{
/// <summary>
/// Fetches and analyzes fix commits from upstream repositories.
/// </summary>
/// <param name="commitUrls">URLs to fix commits.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Analysis result with extracted information.</returns>
Task<CommitAnalysisResult> AnalyzeAsync(
ImmutableArray<string> commitUrls,
CancellationToken ct = default);
/// <summary>
/// Parses a commit URL to extract repository and commit information.
/// </summary>
/// <param name="url">The commit URL.</param>
/// <returns>Parsed commit info or null if not recognized.</returns>
ParsedCommitUrl? ParseCommitUrl(string url);
}
/// <summary>
/// Result of analyzing upstream fix commits.
/// </summary>
public sealed record CommitAnalysisResult
{
/// <summary>
/// Analyzed commits.
/// </summary>
public ImmutableArray<AnalyzedCommit> Commits { get; init; } = [];
/// <summary>
/// Functions modified across all commits.
/// </summary>
public ImmutableArray<string> ModifiedFunctions { get; init; } = [];
/// <summary>
/// Constants added in the fixes.
/// </summary>
public ImmutableArray<string> AddedConstants { get; init; } = [];
/// <summary>
/// Conditions added (if statements, bounds checks).
/// </summary>
public ImmutableArray<string> AddedConditions { get; init; } = [];
/// <summary>
/// Warnings encountered during analysis.
/// </summary>
public ImmutableArray<string> Warnings { get; init; } = [];
/// <summary>
/// Creates an empty result.
/// </summary>
public static CommitAnalysisResult Empty => new();
}
/// <summary>
/// Information about an analyzed commit.
/// </summary>
public sealed record AnalyzedCommit
{
/// <summary>
/// URL to the commit.
/// </summary>
public required string Url { get; init; }
/// <summary>
/// Commit hash.
/// </summary>
public required string Hash { get; init; }
/// <summary>
/// Commit message.
/// </summary>
public string? Message { get; init; }
/// <summary>
/// Files changed in the commit.
/// </summary>
public ImmutableArray<FileDiff> Files { get; init; } = [];
/// <summary>
/// Whether this commit was successfully fetched.
/// </summary>
public bool WasFetched { get; init; }
}
/// <summary>
/// Diff information for a single file.
/// </summary>
public sealed record FileDiff
{
/// <summary>
/// File path.
/// </summary>
public required string Path { get; init; }
/// <summary>
/// Functions modified in this file.
/// </summary>
public ImmutableArray<string> FunctionsModified { get; init; } = [];
/// <summary>
/// Lines added.
/// </summary>
public ImmutableArray<string> LinesAdded { get; init; } = [];
/// <summary>
/// Lines removed.
/// </summary>
public ImmutableArray<string> LinesRemoved { get; init; } = [];
}
/// <summary>
/// Parsed commit URL information.
/// </summary>
public sealed record ParsedCommitUrl
{
/// <summary>
/// Host type (github, gitlab, etc.).
/// </summary>
public required string Host { get; init; }
/// <summary>
/// Repository owner.
/// </summary>
public required string Owner { get; init; }
/// <summary>
/// Repository name.
/// </summary>
public required string Repo { get; init; }
/// <summary>
/// Commit hash.
/// </summary>
public required string Hash { get; init; }
/// <summary>
/// Original URL.
/// </summary>
public required string OriginalUrl { get; init; }
/// <summary>
/// Gets the API URL for fetching commit details.
/// </summary>
public string GetApiUrl() => Host switch
{
"github" => string.Format(
CultureInfo.InvariantCulture,
"https://api.github.com/repos/{0}/{1}/commits/{2}",
Owner, Repo, Hash),
"gitlab" => string.Format(
CultureInfo.InvariantCulture,
"https://gitlab.com/api/v4/projects/{0}%2F{1}/repository/commits/{2}",
Owner, Repo, Hash),
_ => OriginalUrl
};
/// <summary>
/// Gets the diff URL for fetching patch content.
/// </summary>
public string GetDiffUrl() => Host switch
{
"github" => string.Format(
CultureInfo.InvariantCulture,
"https://github.com/{0}/{1}/commit/{2}.diff",
Owner, Repo, Hash),
"gitlab" => string.Format(
CultureInfo.InvariantCulture,
"https://gitlab.com/{0}/{1}/-/commit/{2}.diff",
Owner, Repo, Hash),
_ => OriginalUrl
};
}
/// <summary>
/// Default implementation of <see cref="IUpstreamCommitAnalyzer"/>.
/// </summary>
public sealed partial class UpstreamCommitAnalyzer : IUpstreamCommitAnalyzer
{
private readonly IHttpClientFactory _httpClientFactory;
private readonly TimeProvider _timeProvider;
private readonly ILogger<UpstreamCommitAnalyzer> _logger;
public UpstreamCommitAnalyzer(
IHttpClientFactory httpClientFactory,
TimeProvider timeProvider,
ILogger<UpstreamCommitAnalyzer> logger)
{
_httpClientFactory = httpClientFactory;
_timeProvider = timeProvider;
_logger = logger;
}
/// <inheritdoc />
public async Task<CommitAnalysisResult> AnalyzeAsync(
ImmutableArray<string> commitUrls,
CancellationToken ct = default)
{
if (commitUrls.IsEmpty)
{
return CommitAnalysisResult.Empty;
}
var commits = new List<AnalyzedCommit>();
var warnings = new List<string>();
var allModifiedFunctions = new HashSet<string>(StringComparer.Ordinal);
var allAddedConstants = new HashSet<string>(StringComparer.Ordinal);
var allAddedConditions = new HashSet<string>(StringComparer.Ordinal);
foreach (var url in commitUrls)
{
var parsed = ParseCommitUrl(url);
if (parsed is null)
{
warnings.Add(string.Format(
CultureInfo.InvariantCulture,
"Could not parse commit URL: {0}",
url));
continue;
}
try
{
var commit = await FetchAndAnalyzeCommitAsync(parsed, ct);
commits.Add(commit);
foreach (var file in commit.Files)
{
foreach (var func in file.FunctionsModified)
{
allModifiedFunctions.Add(func);
}
foreach (var line in file.LinesAdded)
{
ExtractConstantsFromLine(line, allAddedConstants);
ExtractConditionsFromLine(line, allAddedConditions);
}
}
}
catch (HttpRequestException ex)
{
_logger.LogWarning(ex, "Failed to fetch commit {Url}", url);
warnings.Add(string.Format(
CultureInfo.InvariantCulture,
"Failed to fetch commit {0}: {1}",
url, ex.Message));
commits.Add(new AnalyzedCommit
{
Url = url,
Hash = parsed.Hash,
WasFetched = false
});
}
}
return new CommitAnalysisResult
{
Commits = [.. commits],
ModifiedFunctions = [.. allModifiedFunctions.OrderBy(f => f, StringComparer.Ordinal)],
AddedConstants = [.. allAddedConstants.OrderBy(c => c, StringComparer.Ordinal)],
AddedConditions = [.. allAddedConditions.OrderBy(c => c, StringComparer.Ordinal)],
Warnings = [.. warnings]
};
}
/// <inheritdoc />
public ParsedCommitUrl? ParseCommitUrl(string url)
{
if (string.IsNullOrWhiteSpace(url))
return null;
// GitHub: https://github.com/owner/repo/commit/hash
var githubMatch = GitHubCommitPattern().Match(url);
if (githubMatch.Success)
{
return new ParsedCommitUrl
{
Host = "github",
Owner = githubMatch.Groups["owner"].Value,
Repo = githubMatch.Groups["repo"].Value,
Hash = githubMatch.Groups["hash"].Value,
OriginalUrl = url
};
}
// GitLab: https://gitlab.com/owner/repo/-/commit/hash
var gitlabMatch = GitLabCommitPattern().Match(url);
if (gitlabMatch.Success)
{
return new ParsedCommitUrl
{
Host = "gitlab",
Owner = gitlabMatch.Groups["owner"].Value,
Repo = gitlabMatch.Groups["repo"].Value,
Hash = gitlabMatch.Groups["hash"].Value,
OriginalUrl = url
};
}
// Bitbucket: https://bitbucket.org/owner/repo/commits/hash
var bitbucketMatch = BitbucketCommitPattern().Match(url);
if (bitbucketMatch.Success)
{
return new ParsedCommitUrl
{
Host = "bitbucket",
Owner = bitbucketMatch.Groups["owner"].Value,
Repo = bitbucketMatch.Groups["repo"].Value,
Hash = bitbucketMatch.Groups["hash"].Value,
OriginalUrl = url
};
}
return null;
}
private async Task<AnalyzedCommit> FetchAndAnalyzeCommitAsync(
ParsedCommitUrl parsed,
CancellationToken ct)
{
var client = _httpClientFactory.CreateClient("upstream-commits");
// Fetch the diff
var diffUrl = parsed.GetDiffUrl();
_logger.LogDebug("Fetching diff from {Url}", diffUrl);
using var request = new HttpRequestMessage(HttpMethod.Get, diffUrl);
request.Headers.Add("Accept", "text/plain");
request.Headers.Add("User-Agent", "StellaOps-GoldenSet/1.0");
using var response = await client.SendAsync(request, ct);
response.EnsureSuccessStatusCode();
var diffContent = await response.Content.ReadAsStringAsync(ct);
var files = ParseDiff(diffContent);
return new AnalyzedCommit
{
Url = parsed.OriginalUrl,
Hash = parsed.Hash,
Files = files,
WasFetched = true
};
}
private static ImmutableArray<FileDiff> ParseDiff(string diffContent)
{
var files = new List<FileDiff>();
var currentFile = (FileDiff?)null;
var currentAddedLines = new List<string>();
var currentRemovedLines = new List<string>();
var currentFunctions = new HashSet<string>(StringComparer.Ordinal);
foreach (var line in diffContent.Split('\n'))
{
// New file header: diff --git a/path b/path
if (line.StartsWith("diff --git ", StringComparison.Ordinal))
{
// Save previous file
if (currentFile is not null)
{
files.Add(currentFile with
{
LinesAdded = [.. currentAddedLines],
LinesRemoved = [.. currentRemovedLines],
FunctionsModified = [.. currentFunctions]
});
}
// Parse new file path
var pathMatch = DiffFilePathPattern().Match(line);
if (pathMatch.Success)
{
currentFile = new FileDiff { Path = pathMatch.Groups["path"].Value };
currentAddedLines.Clear();
currentRemovedLines.Clear();
currentFunctions.Clear();
}
}
// Hunk header: @@ -start,count +start,count @@ function_context
else if (line.StartsWith("@@ ", StringComparison.Ordinal))
{
var hunkMatch = HunkHeaderPattern().Match(line);
if (hunkMatch.Success && hunkMatch.Groups["func"].Success)
{
var funcName = hunkMatch.Groups["func"].Value.Trim();
if (!string.IsNullOrEmpty(funcName))
{
// Extract function name from context
var funcNameMatch = FunctionNamePattern().Match(funcName);
if (funcNameMatch.Success)
{
currentFunctions.Add(funcNameMatch.Groups["name"].Value);
}
}
}
}
// Added line
else if (line.StartsWith('+') && !line.StartsWith("+++", StringComparison.Ordinal))
{
currentAddedLines.Add(line.Substring(1));
}
// Removed line
else if (line.StartsWith('-') && !line.StartsWith("---", StringComparison.Ordinal))
{
currentRemovedLines.Add(line.Substring(1));
}
}
// Save last file
if (currentFile is not null)
{
files.Add(currentFile with
{
LinesAdded = [.. currentAddedLines],
LinesRemoved = [.. currentRemovedLines],
FunctionsModified = [.. currentFunctions]
});
}
return [.. files];
}
private static void ExtractConstantsFromLine(string line, HashSet<string> constants)
{
// Hex constants: 0x1234, 0XABCD
foreach (Match match in HexConstantPattern().Matches(line))
{
constants.Add(match.Value);
}
// Numeric constants in comparisons: > 1024, < 4096, == 256
foreach (Match match in NumericComparisonPattern().Matches(line))
{
constants.Add(match.Groups["num"].Value);
}
// Size constants: sizeof(type)
foreach (Match match in SizeofPattern().Matches(line))
{
constants.Add(match.Value);
}
}
private static void ExtractConditionsFromLine(string line, HashSet<string> conditions)
{
// Simple bounds checks
if (BoundsCheckPattern().IsMatch(line))
{
conditions.Add("bounds_check");
}
// NULL checks
if (NullCheckPattern().IsMatch(line))
{
conditions.Add("null_check");
}
// Length/size validation
if (LengthCheckPattern().IsMatch(line))
{
conditions.Add("length_check");
}
}
// Regex patterns
[GeneratedRegex(@"github\.com/(?<owner>[^/]+)/(?<repo>[^/]+)/commit/(?<hash>[a-fA-F0-9]{7,40})", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex GitHubCommitPattern();
[GeneratedRegex(@"gitlab\.com/(?<owner>[^/]+)/(?<repo>[^/]+)/-/commit/(?<hash>[a-fA-F0-9]{7,40})", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex GitLabCommitPattern();
[GeneratedRegex(@"bitbucket\.org/(?<owner>[^/]+)/(?<repo>[^/]+)/commits/(?<hash>[a-fA-F0-9]{7,40})", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex BitbucketCommitPattern();
[GeneratedRegex(@"diff --git a/(?<path>.+?) b/", RegexOptions.Compiled)]
private static partial Regex DiffFilePathPattern();
[GeneratedRegex(@"^@@ -\d+(?:,\d+)? \+\d+(?:,\d+)? @@\s*(?<func>.*)$", RegexOptions.Compiled)]
private static partial Regex HunkHeaderPattern();
[GeneratedRegex(@"(?:^|\s)(?<name>\w+)\s*\(", RegexOptions.Compiled)]
private static partial Regex FunctionNamePattern();
[GeneratedRegex(@"0[xX][0-9a-fA-F]+", RegexOptions.Compiled)]
private static partial Regex HexConstantPattern();
[GeneratedRegex(@"[<>=!]=?\s*(?<num>\d{2,})", RegexOptions.Compiled)]
private static partial Regex NumericComparisonPattern();
[GeneratedRegex(@"sizeof\s*\([^)]+\)", RegexOptions.Compiled)]
private static partial Regex SizeofPattern();
[GeneratedRegex(@"\b(len|size|count|length)\s*[<>=]", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex BoundsCheckPattern();
[GeneratedRegex(@"[!=]=\s*NULL\b|\bNULL\s*[!=]=|[!=]=\s*nullptr\b|\bnullptr\s*[!=]=", RegexOptions.Compiled)]
private static partial Regex NullCheckPattern();
[GeneratedRegex(@"\b(strlen|wcslen|sizeof)\s*\(", RegexOptions.Compiled)]
private static partial Regex LengthCheckPattern();
}

View File

@@ -0,0 +1,124 @@
using System.ComponentModel.DataAnnotations;
namespace StellaOps.BinaryIndex.GoldenSet;
/// <summary>
/// Configuration options for the GoldenSet module.
/// </summary>
public sealed class GoldenSetOptions
{
/// <summary>
/// Configuration section name.
/// </summary>
public const string SectionName = "BinaryIndex:GoldenSet";
/// <summary>
/// Current schema version for golden set definitions.
/// </summary>
[Required]
public string SchemaVersion { get; set; } = GoldenSetConstants.CurrentSchemaVersion;
/// <summary>
/// Validation options.
/// </summary>
public GoldenSetValidationOptions Validation { get; set; } = new();
/// <summary>
/// Storage options.
/// </summary>
public GoldenSetStorageOptions Storage { get; set; } = new();
/// <summary>
/// Caching options.
/// </summary>
public GoldenSetCachingOptions Caching { get; set; } = new();
/// <summary>
/// Authoring options.
/// </summary>
public GoldenSetAuthoringOptions Authoring { get; set; } = new();
}
/// <summary>
/// Authoring options for golden sets.
/// </summary>
public sealed class GoldenSetAuthoringOptions
{
/// <summary>
/// Enable AI-assisted enrichment.
/// </summary>
public bool EnableAiEnrichment { get; set; } = true;
/// <summary>
/// Enable upstream commit analysis.
/// </summary>
public bool EnableCommitAnalysis { get; set; } = true;
/// <summary>
/// Maximum number of commits to analyze per vulnerability.
/// </summary>
public int MaxCommitsToAnalyze { get; set; } = 5;
/// <summary>
/// Minimum confidence threshold for auto-accepting AI suggestions.
/// </summary>
public decimal AutoAcceptConfidenceThreshold { get; set; } = 0.8m;
}
/// <summary>
/// Validation options for golden sets.
/// </summary>
public sealed class GoldenSetValidationOptions
{
/// <summary>
/// Validate that the CVE exists in NVD/OSV (requires network).
/// </summary>
public bool ValidateCveExists { get; set; } = true;
/// <summary>
/// Validate that sinks are in the registry.
/// </summary>
public bool ValidateSinks { get; set; } = true;
/// <summary>
/// Validate edge format strictly (must match bbN->bbM).
/// </summary>
public bool StrictEdgeFormat { get; set; } = true;
/// <summary>
/// Skip network calls (air-gap mode).
/// </summary>
public bool OfflineMode { get; set; } = false;
}
/// <summary>
/// Storage options for golden sets.
/// </summary>
public sealed class GoldenSetStorageOptions
{
/// <summary>
/// PostgreSQL schema name for golden sets.
/// </summary>
public string PostgresSchema { get; set; } = "golden_sets";
/// <summary>
/// Connection string name (from configuration).
/// </summary>
public string ConnectionStringName { get; set; } = "BinaryIndex";
}
/// <summary>
/// Caching options for golden sets.
/// </summary>
public sealed class GoldenSetCachingOptions
{
/// <summary>
/// Cache duration for sink registry lookups (minutes).
/// </summary>
public int SinkRegistryCacheMinutes { get; set; } = 60;
/// <summary>
/// Cache duration for golden set definitions (minutes).
/// </summary>
public int DefinitionCacheMinutes { get; set; } = 15;
}

View File

@@ -0,0 +1,100 @@
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using StellaOps.BinaryIndex.GoldenSet.Authoring;
using StellaOps.BinaryIndex.GoldenSet.Authoring.Extractors;
namespace StellaOps.BinaryIndex.GoldenSet;
/// <summary>
/// Extension methods for registering GoldenSet services.
/// </summary>
public static class GoldenSetServiceCollectionExtensions
{
/// <summary>
/// Adds GoldenSet services to the dependency injection container.
/// </summary>
/// <param name="services">The service collection.</param>
/// <param name="configuration">The configuration.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddGoldenSetServices(
this IServiceCollection services,
IConfiguration configuration)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
// Configuration
services.AddOptions<GoldenSetOptions>()
.Bind(configuration.GetSection(GoldenSetOptions.SectionName))
.ValidateDataAnnotations()
.ValidateOnStart();
// Core services
services.TryAddSingleton<ISinkRegistry, SinkRegistry>();
services.TryAddSingleton<IGoldenSetValidator, GoldenSetValidator>();
// Memory cache (if not already registered)
services.AddMemoryCache();
return services;
}
/// <summary>
/// Adds GoldenSet authoring services to the dependency injection container.
/// </summary>
/// <param name="services">The service collection.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddGoldenSetAuthoring(this IServiceCollection services)
{
ArgumentNullException.ThrowIfNull(services);
// Source extractors
services.TryAddEnumerable(ServiceDescriptor.Singleton<IGoldenSetSourceExtractor, NvdGoldenSetExtractor>());
// Composite extractor
services.TryAddSingleton<IGoldenSetExtractor, GoldenSetExtractor>();
// Upstream commit analyzer
services.TryAddSingleton<IUpstreamCommitAnalyzer, UpstreamCommitAnalyzer>();
// Enrichment service
services.TryAddScoped<IGoldenSetEnrichmentService, GoldenSetEnrichmentService>();
// Review workflow
services.TryAddScoped<IGoldenSetReviewService, GoldenSetReviewService>();
return services;
}
/// <summary>
/// Adds PostgreSQL-based GoldenSet storage to the dependency injection container.
/// </summary>
/// <param name="services">The service collection.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddGoldenSetPostgresStorage(this IServiceCollection services)
{
ArgumentNullException.ThrowIfNull(services);
services.TryAddScoped<IGoldenSetStore, PostgresGoldenSetStore>();
return services;
}
/// <summary>
/// Adds a CVE validator implementation to the dependency injection container.
/// </summary>
/// <typeparam name="TValidator">The CVE validator implementation type.</typeparam>
/// <param name="services">The service collection.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddGoldenSetCveValidator<TValidator>(this IServiceCollection services)
where TValidator : class, ICveValidator
{
ArgumentNullException.ThrowIfNull(services);
services.TryAddSingleton<ICveValidator, TValidator>();
return services;
}
}

View File

@@ -0,0 +1,154 @@
-- Golden Set Storage Schema Migration
-- Version: 1.0.0
-- Date: 2026-01-10
-- Description: Initial schema for golden set definitions storage
-- Create schema
CREATE SCHEMA IF NOT EXISTS golden_sets;
-- Main golden set table
CREATE TABLE IF NOT EXISTS golden_sets.definitions (
id TEXT PRIMARY KEY,
component TEXT NOT NULL,
content_digest TEXT NOT NULL UNIQUE,
status TEXT NOT NULL DEFAULT 'draft',
definition_yaml TEXT NOT NULL,
definition_json JSONB NOT NULL,
target_count INTEGER NOT NULL,
author_id TEXT NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
reviewed_by TEXT,
reviewed_at TIMESTAMPTZ,
source_ref TEXT NOT NULL,
tags TEXT[] NOT NULL DEFAULT '{}',
schema_version TEXT NOT NULL DEFAULT '1.0.0',
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Indexes for definitions table
CREATE INDEX IF NOT EXISTS idx_goldensets_component ON golden_sets.definitions(component);
CREATE INDEX IF NOT EXISTS idx_goldensets_status ON golden_sets.definitions(status);
CREATE INDEX IF NOT EXISTS idx_goldensets_digest ON golden_sets.definitions(content_digest);
CREATE INDEX IF NOT EXISTS idx_goldensets_tags ON golden_sets.definitions USING gin(tags);
CREATE INDEX IF NOT EXISTS idx_goldensets_created ON golden_sets.definitions(created_at DESC);
CREATE INDEX IF NOT EXISTS idx_goldensets_component_status ON golden_sets.definitions(component, status);
-- Target extraction table (for efficient function lookup)
CREATE TABLE IF NOT EXISTS golden_sets.targets (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
golden_set_id TEXT NOT NULL REFERENCES golden_sets.definitions(id) ON DELETE CASCADE,
function_name TEXT NOT NULL,
edges JSONB NOT NULL DEFAULT '[]',
sinks TEXT[] NOT NULL DEFAULT '{}',
constants TEXT[] NOT NULL DEFAULT '{}',
taint_invariant TEXT,
source_file TEXT,
source_line INTEGER
);
-- Indexes for targets table
CREATE INDEX IF NOT EXISTS idx_targets_golden_set ON golden_sets.targets(golden_set_id);
CREATE INDEX IF NOT EXISTS idx_targets_function ON golden_sets.targets(function_name);
CREATE INDEX IF NOT EXISTS idx_targets_sinks ON golden_sets.targets USING gin(sinks);
-- Audit log table
CREATE TABLE IF NOT EXISTS golden_sets.audit_log (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
golden_set_id TEXT NOT NULL REFERENCES golden_sets.definitions(id) ON DELETE CASCADE,
action TEXT NOT NULL,
actor_id TEXT NOT NULL,
old_status TEXT,
new_status TEXT,
details JSONB,
timestamp TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Indexes for audit log
CREATE INDEX IF NOT EXISTS idx_audit_golden_set ON golden_sets.audit_log(golden_set_id);
CREATE INDEX IF NOT EXISTS idx_audit_timestamp ON golden_sets.audit_log(timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_audit_actor ON golden_sets.audit_log(actor_id);
-- Sink registry reference table
CREATE TABLE IF NOT EXISTS golden_sets.sink_registry (
sink_name TEXT PRIMARY KEY,
category TEXT NOT NULL,
description TEXT,
cwe_ids TEXT[] NOT NULL DEFAULT '{}',
severity TEXT NOT NULL DEFAULT 'medium'
);
-- Seed common sinks
INSERT INTO golden_sets.sink_registry (sink_name, category, cwe_ids, severity, description) VALUES
-- Memory corruption sinks
('memcpy', 'memory', ARRAY['CWE-120', 'CWE-787'], 'high', 'Buffer copy without bounds checking'),
('strcpy', 'memory', ARRAY['CWE-120', 'CWE-787'], 'high', 'String copy without bounds checking'),
('strncpy', 'memory', ARRAY['CWE-120'], 'medium', 'String copy with size - may not null-terminate'),
('sprintf', 'memory', ARRAY['CWE-120', 'CWE-134'], 'high', 'Format string to buffer without bounds'),
('gets', 'memory', ARRAY['CWE-120'], 'critical', 'Read input without bounds - NEVER USE'),
('strcat', 'memory', ARRAY['CWE-120', 'CWE-787'], 'high', 'String concatenation without bounds'),
-- Memory management
('free', 'memory', ARRAY['CWE-415', 'CWE-416'], 'high', 'Memory deallocation - double-free/use-after-free risk'),
('realloc', 'memory', ARRAY['CWE-416'], 'medium', 'Memory reallocation - use-after-free risk'),
('malloc', 'memory', ARRAY['CWE-401'], 'low', 'Memory allocation - leak risk'),
-- OpenSSL memory
('OPENSSL_malloc', 'memory', ARRAY['CWE-401'], 'low', 'OpenSSL memory allocation'),
('OPENSSL_free', 'memory', ARRAY['CWE-415', 'CWE-416'], 'medium', 'OpenSSL memory deallocation'),
-- Command injection
('system', 'command_injection', ARRAY['CWE-78'], 'critical', 'Execute shell command'),
('exec', 'command_injection', ARRAY['CWE-78'], 'critical', 'Execute command'),
('popen', 'command_injection', ARRAY['CWE-78'], 'high', 'Open pipe to command'),
-- Code injection
('dlopen', 'code_injection', ARRAY['CWE-427'], 'high', 'Dynamic library loading'),
('LoadLibrary', 'code_injection', ARRAY['CWE-427'], 'high', 'Windows DLL loading'),
-- Path traversal
('fopen', 'path_traversal', ARRAY['CWE-22'], 'medium', 'File open'),
('open', 'path_traversal', ARRAY['CWE-22'], 'medium', 'POSIX file open'),
-- Network
('connect', 'network', ARRAY['CWE-918'], 'medium', 'Network connection'),
('send', 'network', ARRAY['CWE-319'], 'medium', 'Send data over network'),
('recv', 'network', ARRAY['CWE-319'], 'medium', 'Receive data from network'),
-- SQL injection
('sqlite3_exec', 'sql_injection', ARRAY['CWE-89'], 'high', 'SQLite execute'),
('mysql_query', 'sql_injection', ARRAY['CWE-89'], 'high', 'MySQL query'),
('PQexec', 'sql_injection', ARRAY['CWE-89'], 'high', 'PostgreSQL execute'),
-- Cryptographic
('EVP_DecryptUpdate', 'crypto', ARRAY['CWE-327'], 'medium', 'OpenSSL decrypt update'),
('EVP_EncryptUpdate', 'crypto', ARRAY['CWE-327'], 'medium', 'OpenSSL encrypt update'),
('d2i_ASN1_OCTET_STRING', 'crypto', ARRAY['CWE-295'], 'medium', 'DER to ASN1 octet string'),
('PKCS12_parse', 'crypto', ARRAY['CWE-295'], 'medium', 'Parse PKCS12 structure'),
('PKCS12_unpack_p7data', 'crypto', ARRAY['CWE-295'], 'medium', 'Unpack PKCS7 data')
ON CONFLICT (sink_name) DO UPDATE SET
category = EXCLUDED.category,
description = EXCLUDED.description,
cwe_ids = EXCLUDED.cwe_ids,
severity = EXCLUDED.severity;
-- Create function for automatic updated_at timestamp
CREATE OR REPLACE FUNCTION golden_sets.update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ language 'plpgsql';
-- Create trigger for updated_at
DROP TRIGGER IF EXISTS update_definitions_updated_at ON golden_sets.definitions;
CREATE TRIGGER update_definitions_updated_at
BEFORE UPDATE ON golden_sets.definitions
FOR EACH ROW
EXECUTE FUNCTION golden_sets.update_updated_at_column();
-- Comments
COMMENT ON TABLE golden_sets.definitions IS 'Ground-truth vulnerability code-level manifestation facts';
COMMENT ON TABLE golden_sets.targets IS 'Individual vulnerable code targets extracted from definitions';
COMMENT ON TABLE golden_sets.audit_log IS 'Audit trail for golden set changes';
COMMENT ON TABLE golden_sets.sink_registry IS 'Reference data for known vulnerability sinks';

View File

@@ -0,0 +1,261 @@
using System.Collections.Immutable;
using System.Globalization;
namespace StellaOps.BinaryIndex.GoldenSet;
/// <summary>
/// Represents ground-truth facts about a vulnerability's code-level manifestation.
/// Hand-curated, reviewed like unit tests, tiny by design.
/// </summary>
public sealed record GoldenSetDefinition
{
/// <summary>
/// Unique identifier (typically CVE ID, e.g., "CVE-2024-0727").
/// </summary>
public required string Id { get; init; }
/// <summary>
/// Affected component name (e.g., "openssl", "glibc").
/// </summary>
public required string Component { get; init; }
/// <summary>
/// Vulnerable code targets (functions, edges, sinks).
/// </summary>
public required ImmutableArray<VulnerableTarget> Targets { get; init; }
/// <summary>
/// Optional witness input for reproducing the vulnerability.
/// </summary>
public WitnessInput? Witness { get; init; }
/// <summary>
/// Metadata about the golden set.
/// </summary>
public required GoldenSetMetadata Metadata { get; init; }
/// <summary>
/// Content-addressed digest of the canonical form (computed, not user-provided).
/// </summary>
public string? ContentDigest { get; init; }
}
/// <summary>
/// A specific vulnerable code target within a component.
/// </summary>
public sealed record VulnerableTarget
{
/// <summary>
/// Function name (symbol or demangled name).
/// </summary>
public required string FunctionName { get; init; }
/// <summary>
/// Basic block edges that constitute the vulnerable path.
/// </summary>
public ImmutableArray<BasicBlockEdge> Edges { get; init; } = [];
/// <summary>
/// Sink functions that are reached (e.g., "memcpy", "strcpy").
/// </summary>
public ImmutableArray<string> Sinks { get; init; } = [];
/// <summary>
/// Constants/magic values that identify the vulnerable code.
/// </summary>
public ImmutableArray<string> Constants { get; init; } = [];
/// <summary>
/// Human-readable invariant that must hold for exploitation.
/// </summary>
public string? TaintInvariant { get; init; }
/// <summary>
/// Optional source file hint.
/// </summary>
public string? SourceFile { get; init; }
/// <summary>
/// Optional source line hint.
/// </summary>
public int? SourceLine { get; init; }
}
/// <summary>
/// A basic block edge in the CFG.
/// Format: "bbN->bbM" where N and M are block identifiers.
/// </summary>
public sealed record BasicBlockEdge
{
/// <summary>
/// Source basic block identifier (e.g., "bb3").
/// </summary>
public required string From { get; init; }
/// <summary>
/// Target basic block identifier (e.g., "bb7").
/// </summary>
public required string To { get; init; }
/// <summary>
/// Parses an edge from string format "bbN->bbM".
/// </summary>
/// <param name="edge">The edge string to parse.</param>
/// <returns>A new BasicBlockEdge instance.</returns>
/// <exception cref="FormatException">Thrown when the edge format is invalid.</exception>
public static BasicBlockEdge Parse(string edge)
{
ArgumentException.ThrowIfNullOrWhiteSpace(edge);
var parts = edge.Split("->", StringSplitOptions.TrimEntries);
if (parts.Length != 2 || string.IsNullOrWhiteSpace(parts[0]) || string.IsNullOrWhiteSpace(parts[1]))
{
throw new FormatException(
string.Format(CultureInfo.InvariantCulture, "Invalid edge format: {0}. Expected 'bbN->bbM'.", edge));
}
return new BasicBlockEdge { From = parts[0], To = parts[1] };
}
/// <summary>
/// Tries to parse an edge from string format "bbN->bbM".
/// </summary>
/// <param name="edge">The edge string to parse.</param>
/// <param name="result">The parsed edge, or null if parsing failed.</param>
/// <returns>True if parsing succeeded; otherwise, false.</returns>
public static bool TryParse(string? edge, out BasicBlockEdge? result)
{
result = null;
if (string.IsNullOrWhiteSpace(edge))
{
return false;
}
var parts = edge.Split("->", StringSplitOptions.TrimEntries);
if (parts.Length != 2 || string.IsNullOrWhiteSpace(parts[0]) || string.IsNullOrWhiteSpace(parts[1]))
{
return false;
}
result = new BasicBlockEdge { From = parts[0], To = parts[1] };
return true;
}
/// <inheritdoc />
public override string ToString() => string.Concat(From, "->", To);
}
/// <summary>
/// Witness input for reproducing the vulnerability.
/// </summary>
public sealed record WitnessInput
{
/// <summary>
/// Command-line arguments to trigger the vulnerability.
/// </summary>
public ImmutableArray<string> Arguments { get; init; } = [];
/// <summary>
/// Human-readable invariant/precondition.
/// </summary>
public string? Invariant { get; init; }
/// <summary>
/// Reference to PoC file (content-addressed, format: "sha256:...").
/// </summary>
public string? PocFileRef { get; init; }
}
/// <summary>
/// Metadata about the golden set.
/// </summary>
public sealed record GoldenSetMetadata
{
/// <summary>
/// Author ID (who created the golden set).
/// </summary>
public required string AuthorId { get; init; }
/// <summary>
/// Creation timestamp (UTC).
/// </summary>
public required DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// Source reference (advisory URL, commit hash, etc.).
/// </summary>
public required string SourceRef { get; init; }
/// <summary>
/// Reviewer ID (if reviewed).
/// </summary>
public string? ReviewedBy { get; init; }
/// <summary>
/// Review timestamp (UTC).
/// </summary>
public DateTimeOffset? ReviewedAt { get; init; }
/// <summary>
/// Classification tags (e.g., "memory-corruption", "heap-overflow").
/// </summary>
public ImmutableArray<string> Tags { get; init; } = [];
/// <summary>
/// Schema version for forward compatibility.
/// </summary>
public string SchemaVersion { get; init; } = GoldenSetConstants.CurrentSchemaVersion;
}
/// <summary>
/// Status of a golden set in the corpus.
/// </summary>
public enum GoldenSetStatus
{
/// <summary>Draft, not yet reviewed.</summary>
Draft,
/// <summary>Under review.</summary>
InReview,
/// <summary>Approved and active.</summary>
Approved,
/// <summary>Deprecated (CVE retracted or superseded).</summary>
Deprecated,
/// <summary>Archived (historical reference only).</summary>
Archived
}
/// <summary>
/// Constants used throughout the Golden Set module.
/// </summary>
public static class GoldenSetConstants
{
/// <summary>
/// Current schema version for golden set definitions.
/// </summary>
public const string CurrentSchemaVersion = "1.0.0";
/// <summary>
/// Regex pattern for CVE IDs.
/// </summary>
public const string CveIdPattern = @"^CVE-\d{4}-\d{4,}$";
/// <summary>
/// Regex pattern for GHSA IDs.
/// </summary>
public const string GhsaIdPattern = @"^GHSA-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}$";
/// <summary>
/// Regex pattern for basic block edge format.
/// </summary>
public const string EdgePattern = @"^bb\d+->bb\d+$";
/// <summary>
/// Regex pattern for content-addressed digest.
/// </summary>
public const string DigestPattern = @"^sha256:[a-f0-9]{64}$";
}

View File

@@ -0,0 +1,227 @@
using System.Collections.Immutable;
using System.Globalization;
using YamlDotNet.Serialization;
using YamlDotNet.Serialization.NamingConventions;
namespace StellaOps.BinaryIndex.GoldenSet;
/// <summary>
/// YAML serialization for golden set definitions.
/// Uses snake_case naming convention for human-readability.
/// </summary>
public static class GoldenSetYamlSerializer
{
private static readonly IDeserializer Deserializer = new DeserializerBuilder()
.WithNamingConvention(UnderscoredNamingConvention.Instance)
.IgnoreUnmatchedProperties()
.Build();
private static readonly ISerializer Serializer = new SerializerBuilder()
.WithNamingConvention(UnderscoredNamingConvention.Instance)
.ConfigureDefaultValuesHandling(DefaultValuesHandling.OmitNull | DefaultValuesHandling.OmitEmptyCollections)
.Build();
/// <summary>
/// Deserializes a golden set from YAML content.
/// </summary>
/// <param name="yaml">YAML content to parse.</param>
/// <returns>Parsed golden set definition.</returns>
/// <exception cref="InvalidOperationException">Thrown when parsing fails.</exception>
public static GoldenSetDefinition Deserialize(string yaml)
{
ArgumentException.ThrowIfNullOrWhiteSpace(yaml);
var dto = Deserializer.Deserialize<GoldenSetYamlDto>(yaml)
?? throw new InvalidOperationException("Failed to deserialize YAML: result was null");
return MapToDefinition(dto);
}
/// <summary>
/// Serializes a golden set to YAML content.
/// </summary>
/// <param name="definition">Definition to serialize.</param>
/// <returns>YAML string representation.</returns>
public static string Serialize(GoldenSetDefinition definition)
{
ArgumentNullException.ThrowIfNull(definition);
var dto = MapToDto(definition);
return Serializer.Serialize(dto);
}
private static GoldenSetDefinition MapToDefinition(GoldenSetYamlDto dto)
{
return new GoldenSetDefinition
{
Id = dto.Id ?? throw new InvalidOperationException("Missing required field: id"),
Component = dto.Component ?? throw new InvalidOperationException("Missing required field: component"),
Targets = dto.Targets?.Select(MapTargetToDefinition).ToImmutableArray()
?? throw new InvalidOperationException("Missing required field: targets"),
Witness = dto.Witness is null ? null : MapWitnessToDefinition(dto.Witness),
Metadata = dto.Metadata is null
? throw new InvalidOperationException("Missing required field: metadata")
: MapMetadataToDefinition(dto.Metadata)
};
}
private static VulnerableTarget MapTargetToDefinition(VulnerableTargetYamlDto dto)
{
return new VulnerableTarget
{
FunctionName = dto.Function ?? throw new InvalidOperationException("Missing required field: function"),
Edges = dto.Edges?.Select(e => BasicBlockEdge.Parse(e)).ToImmutableArray() ?? [],
Sinks = dto.Sinks?.ToImmutableArray() ?? [],
Constants = dto.Constants?.ToImmutableArray() ?? [],
TaintInvariant = dto.TaintInvariant,
SourceFile = dto.SourceFile,
SourceLine = dto.SourceLine
};
}
private static WitnessInput MapWitnessToDefinition(WitnessYamlDto dto)
{
return new WitnessInput
{
Arguments = dto.Arguments?.ToImmutableArray() ?? [],
Invariant = dto.Invariant,
PocFileRef = dto.PocFileRef
};
}
private static GoldenSetMetadata MapMetadataToDefinition(GoldenSetMetadataYamlDto dto)
{
return new GoldenSetMetadata
{
AuthorId = dto.AuthorId ?? throw new InvalidOperationException("Missing required field: metadata.author_id"),
CreatedAt = ParseDateTimeOffset(dto.CreatedAt, "metadata.created_at"),
SourceRef = dto.SourceRef ?? throw new InvalidOperationException("Missing required field: metadata.source_ref"),
ReviewedBy = dto.ReviewedBy,
ReviewedAt = string.IsNullOrWhiteSpace(dto.ReviewedAt) ? null : ParseDateTimeOffset(dto.ReviewedAt, "metadata.reviewed_at"),
Tags = dto.Tags?.ToImmutableArray() ?? [],
SchemaVersion = dto.SchemaVersion ?? GoldenSetConstants.CurrentSchemaVersion
};
}
private static DateTimeOffset ParseDateTimeOffset(string? value, string fieldName)
{
if (string.IsNullOrWhiteSpace(value))
{
throw new InvalidOperationException(
string.Format(CultureInfo.InvariantCulture, "Missing required field: {0}", fieldName));
}
if (!DateTimeOffset.TryParse(value, CultureInfo.InvariantCulture, DateTimeStyles.RoundtripKind, out var result))
{
throw new InvalidOperationException(
string.Format(CultureInfo.InvariantCulture, "Invalid date format in {0}: {1}", fieldName, value));
}
return result;
}
private static GoldenSetYamlDto MapToDto(GoldenSetDefinition definition)
{
return new GoldenSetYamlDto
{
Id = definition.Id,
Component = definition.Component,
Targets = definition.Targets.Select(MapTargetToDto).ToList(),
Witness = definition.Witness is null ? null : MapWitnessToDto(definition.Witness),
Metadata = MapMetadataToDto(definition.Metadata)
};
}
private static VulnerableTargetYamlDto MapTargetToDto(VulnerableTarget target)
{
return new VulnerableTargetYamlDto
{
Function = target.FunctionName,
Edges = target.Edges.IsDefaultOrEmpty ? null : target.Edges.Select(e => e.ToString()).ToList(),
Sinks = target.Sinks.IsDefaultOrEmpty ? null : target.Sinks.ToList(),
Constants = target.Constants.IsDefaultOrEmpty ? null : target.Constants.ToList(),
TaintInvariant = target.TaintInvariant,
SourceFile = target.SourceFile,
SourceLine = target.SourceLine
};
}
private static WitnessYamlDto MapWitnessToDto(WitnessInput witness)
{
return new WitnessYamlDto
{
Arguments = witness.Arguments.IsDefaultOrEmpty ? null : witness.Arguments.ToList(),
Invariant = witness.Invariant,
PocFileRef = witness.PocFileRef
};
}
private static GoldenSetMetadataYamlDto MapMetadataToDto(GoldenSetMetadata metadata)
{
return new GoldenSetMetadataYamlDto
{
AuthorId = metadata.AuthorId,
CreatedAt = metadata.CreatedAt.ToString("O", CultureInfo.InvariantCulture),
SourceRef = metadata.SourceRef,
ReviewedBy = metadata.ReviewedBy,
ReviewedAt = metadata.ReviewedAt?.ToString("O", CultureInfo.InvariantCulture),
Tags = metadata.Tags.IsDefaultOrEmpty ? null : metadata.Tags.ToList(),
SchemaVersion = metadata.SchemaVersion
};
}
}
#region YAML DTOs
/// <summary>
/// YAML DTO for golden set definition.
/// </summary>
internal sealed class GoldenSetYamlDto
{
public string? Id { get; set; }
public string? Component { get; set; }
public List<VulnerableTargetYamlDto>? Targets { get; set; }
public WitnessYamlDto? Witness { get; set; }
public GoldenSetMetadataYamlDto? Metadata { get; set; }
}
/// <summary>
/// YAML DTO for vulnerable target.
/// </summary>
internal sealed class VulnerableTargetYamlDto
{
public string? Function { get; set; }
public List<string>? Edges { get; set; }
public List<string>? Sinks { get; set; }
public List<string>? Constants { get; set; }
public string? TaintInvariant { get; set; }
public string? SourceFile { get; set; }
public int? SourceLine { get; set; }
}
/// <summary>
/// YAML DTO for witness input.
/// </summary>
internal sealed class WitnessYamlDto
{
public List<string>? Arguments { get; set; }
public string? Invariant { get; set; }
public string? PocFileRef { get; set; }
}
/// <summary>
/// YAML DTO for metadata.
/// </summary>
internal sealed class GoldenSetMetadataYamlDto
{
public string? AuthorId { get; set; }
public string? CreatedAt { get; set; }
public string? SourceRef { get; set; }
public string? ReviewedBy { get; set; }
public string? ReviewedAt { get; set; }
public List<string>? Tags { get; set; }
public string? SchemaVersion { get; set; }
}
#endregion

View File

@@ -0,0 +1,82 @@
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.GoldenSet;
/// <summary>
/// Service for looking up known sinks and their metadata.
/// </summary>
public interface ISinkRegistry
{
/// <summary>
/// Checks if a sink is known in the registry.
/// </summary>
/// <param name="sinkName">The sink function name.</param>
/// <returns>True if the sink is known; otherwise, false.</returns>
bool IsKnownSink(string sinkName);
/// <summary>
/// Gets detailed information about a sink.
/// </summary>
/// <param name="sinkName">The sink function name.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Sink information or null if not found.</returns>
Task<SinkInfo?> GetSinkInfoAsync(string sinkName, CancellationToken ct = default);
/// <summary>
/// Gets all sinks in a category.
/// </summary>
/// <param name="category">The category to filter by.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of sinks in the category.</returns>
Task<ImmutableArray<SinkInfo>> GetSinksByCategoryAsync(string category, CancellationToken ct = default);
/// <summary>
/// Gets all sinks associated with a CWE ID.
/// </summary>
/// <param name="cweId">The CWE ID to filter by.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of sinks associated with the CWE.</returns>
Task<ImmutableArray<SinkInfo>> GetSinksByCweAsync(string cweId, CancellationToken ct = default);
}
/// <summary>
/// Information about a known sink function.
/// </summary>
/// <param name="Name">Sink function name.</param>
/// <param name="Category">Category (e.g., "memory", "command_injection").</param>
/// <param name="Description">Human-readable description.</param>
/// <param name="CweIds">Associated CWE IDs.</param>
/// <param name="Severity">Severity level (low, medium, high, critical).</param>
public sealed record SinkInfo(
string Name,
string Category,
string? Description,
ImmutableArray<string> CweIds,
string Severity);
/// <summary>
/// Well-known sink categories.
/// </summary>
public static class SinkCategory
{
/// <summary>Memory corruption sinks (memcpy, strcpy, etc.).</summary>
public const string Memory = "memory";
/// <summary>Command injection sinks (system, exec, etc.).</summary>
public const string CommandInjection = "command_injection";
/// <summary>Code injection sinks (dlopen, LoadLibrary, etc.).</summary>
public const string CodeInjection = "code_injection";
/// <summary>Path traversal sinks (fopen, open, etc.).</summary>
public const string PathTraversal = "path_traversal";
/// <summary>Network-related sinks (connect, send, etc.).</summary>
public const string Network = "network";
/// <summary>SQL injection sinks.</summary>
public const string SqlInjection = "sql_injection";
/// <summary>Cryptographic sinks.</summary>
public const string Crypto = "crypto";
}

View File

@@ -0,0 +1,214 @@
using System.Collections.Immutable;
using Microsoft.Extensions.Caching.Memory;
using Microsoft.Extensions.Logging;
namespace StellaOps.BinaryIndex.GoldenSet;
/// <summary>
/// In-memory sink registry with built-in common sinks.
/// Can be extended with database or external sources.
/// </summary>
public sealed class SinkRegistry : ISinkRegistry
{
private readonly IMemoryCache _cache;
private readonly ILogger<SinkRegistry> _logger;
private readonly ImmutableDictionary<string, SinkInfo> _builtInSinks;
/// <summary>
/// Initializes a new instance of <see cref="SinkRegistry"/>.
/// </summary>
public SinkRegistry(IMemoryCache cache, ILogger<SinkRegistry> logger)
{
_cache = cache ?? throw new ArgumentNullException(nameof(cache));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_builtInSinks = BuildCommonSinks();
_logger.LogDebug("SinkRegistry initialized with {Count} built-in sinks", _builtInSinks.Count);
}
/// <inheritdoc />
public bool IsKnownSink(string sinkName)
{
if (string.IsNullOrWhiteSpace(sinkName))
{
return false;
}
return _builtInSinks.ContainsKey(sinkName);
}
/// <inheritdoc />
public Task<SinkInfo?> GetSinkInfoAsync(string sinkName, CancellationToken ct = default)
{
if (string.IsNullOrWhiteSpace(sinkName))
{
return Task.FromResult<SinkInfo?>(null);
}
_builtInSinks.TryGetValue(sinkName, out var info);
return Task.FromResult(info);
}
/// <inheritdoc />
public Task<ImmutableArray<SinkInfo>> GetSinksByCategoryAsync(string category, CancellationToken ct = default)
{
if (string.IsNullOrWhiteSpace(category))
{
return Task.FromResult(ImmutableArray<SinkInfo>.Empty);
}
var cacheKey = $"sinks_by_category_{category}";
if (!_cache.TryGetValue<ImmutableArray<SinkInfo>>(cacheKey, out var result))
{
result = _builtInSinks.Values
.Where(s => string.Equals(s.Category, category, StringComparison.OrdinalIgnoreCase))
.ToImmutableArray();
_cache.Set(cacheKey, result, TimeSpan.FromMinutes(60));
}
return Task.FromResult(result);
}
/// <inheritdoc />
public Task<ImmutableArray<SinkInfo>> GetSinksByCweAsync(string cweId, CancellationToken ct = default)
{
if (string.IsNullOrWhiteSpace(cweId))
{
return Task.FromResult(ImmutableArray<SinkInfo>.Empty);
}
var cacheKey = $"sinks_by_cwe_{cweId}";
if (!_cache.TryGetValue<ImmutableArray<SinkInfo>>(cacheKey, out var result))
{
result = _builtInSinks.Values
.Where(s => s.CweIds.Contains(cweId, StringComparer.OrdinalIgnoreCase))
.ToImmutableArray();
_cache.Set(cacheKey, result, TimeSpan.FromMinutes(60));
}
return Task.FromResult(result);
}
private static ImmutableDictionary<string, SinkInfo> BuildCommonSinks()
{
var builder = ImmutableDictionary.CreateBuilder<string, SinkInfo>(StringComparer.Ordinal);
// Memory corruption sinks
AddSink(builder, "memcpy", SinkCategory.Memory, "Buffer copy without bounds checking", ["CWE-120", "CWE-787"], "high");
AddSink(builder, "strcpy", SinkCategory.Memory, "String copy without bounds checking", ["CWE-120", "CWE-787"], "high");
AddSink(builder, "strncpy", SinkCategory.Memory, "String copy with size - may not null-terminate", ["CWE-120"], "medium");
AddSink(builder, "sprintf", SinkCategory.Memory, "Format string to buffer without bounds", ["CWE-120", "CWE-134"], "high");
AddSink(builder, "vsprintf", SinkCategory.Memory, "Variable format string without bounds", ["CWE-120", "CWE-134"], "high");
AddSink(builder, "gets", SinkCategory.Memory, "Read input without bounds - NEVER USE", ["CWE-120"], "critical");
AddSink(builder, "scanf", SinkCategory.Memory, "Format input - may overflow buffers", ["CWE-120"], "high");
AddSink(builder, "strcat", SinkCategory.Memory, "String concatenation without bounds", ["CWE-120", "CWE-787"], "high");
AddSink(builder, "strncat", SinkCategory.Memory, "String concatenation with size limit", ["CWE-120"], "medium");
AddSink(builder, "memmove", SinkCategory.Memory, "Memory move - can overlap", ["CWE-120"], "medium");
AddSink(builder, "bcopy", SinkCategory.Memory, "Legacy memory copy", ["CWE-120"], "medium");
// Memory management sinks
AddSink(builder, "free", SinkCategory.Memory, "Memory deallocation - double-free/use-after-free risk", ["CWE-415", "CWE-416"], "high");
AddSink(builder, "realloc", SinkCategory.Memory, "Memory reallocation - use-after-free risk", ["CWE-416"], "medium");
AddSink(builder, "malloc", SinkCategory.Memory, "Memory allocation - leak risk", ["CWE-401"], "low");
AddSink(builder, "calloc", SinkCategory.Memory, "Zeroed memory allocation", ["CWE-401"], "low");
AddSink(builder, "alloca", SinkCategory.Memory, "Stack allocation - stack overflow risk", ["CWE-121"], "medium");
// OpenSSL memory functions
AddSink(builder, "OPENSSL_malloc", SinkCategory.Memory, "OpenSSL memory allocation", ["CWE-401"], "low");
AddSink(builder, "OPENSSL_free", SinkCategory.Memory, "OpenSSL memory deallocation", ["CWE-415", "CWE-416"], "medium");
AddSink(builder, "OPENSSL_realloc", SinkCategory.Memory, "OpenSSL memory reallocation", ["CWE-416"], "medium");
// Command injection sinks
AddSink(builder, "system", SinkCategory.CommandInjection, "Execute shell command", ["CWE-78"], "critical");
AddSink(builder, "exec", SinkCategory.CommandInjection, "Execute command", ["CWE-78"], "critical");
AddSink(builder, "execl", SinkCategory.CommandInjection, "Execute command with args", ["CWE-78"], "critical");
AddSink(builder, "execle", SinkCategory.CommandInjection, "Execute command with environment", ["CWE-78"], "critical");
AddSink(builder, "execlp", SinkCategory.CommandInjection, "Execute command from PATH", ["CWE-78"], "critical");
AddSink(builder, "execv", SinkCategory.CommandInjection, "Execute command with arg vector", ["CWE-78"], "critical");
AddSink(builder, "execve", SinkCategory.CommandInjection, "Execute command with env vector", ["CWE-78"], "critical");
AddSink(builder, "execvp", SinkCategory.CommandInjection, "Execute command from PATH with vector", ["CWE-78"], "critical");
AddSink(builder, "popen", SinkCategory.CommandInjection, "Open pipe to command", ["CWE-78"], "high");
AddSink(builder, "ShellExecute", SinkCategory.CommandInjection, "Windows shell execution", ["CWE-78"], "critical");
AddSink(builder, "ShellExecuteEx", SinkCategory.CommandInjection, "Windows shell execution extended", ["CWE-78"], "critical");
AddSink(builder, "CreateProcess", SinkCategory.CommandInjection, "Windows process creation", ["CWE-78"], "high");
AddSink(builder, "WinExec", SinkCategory.CommandInjection, "Windows command execution", ["CWE-78"], "critical");
// Code injection sinks
AddSink(builder, "dlopen", SinkCategory.CodeInjection, "Dynamic library loading", ["CWE-427"], "high");
AddSink(builder, "dlsym", SinkCategory.CodeInjection, "Dynamic symbol lookup", ["CWE-427"], "medium");
AddSink(builder, "LoadLibrary", SinkCategory.CodeInjection, "Windows DLL loading", ["CWE-427"], "high");
AddSink(builder, "LoadLibraryEx", SinkCategory.CodeInjection, "Windows DLL loading extended", ["CWE-427"], "high");
AddSink(builder, "GetProcAddress", SinkCategory.CodeInjection, "Windows function pointer lookup", ["CWE-427"], "medium");
// Path traversal sinks
AddSink(builder, "fopen", SinkCategory.PathTraversal, "File open", ["CWE-22"], "medium");
AddSink(builder, "open", SinkCategory.PathTraversal, "POSIX file open", ["CWE-22"], "medium");
AddSink(builder, "openat", SinkCategory.PathTraversal, "POSIX file open relative", ["CWE-22"], "medium");
AddSink(builder, "freopen", SinkCategory.PathTraversal, "Reopen file stream", ["CWE-22"], "medium");
AddSink(builder, "creat", SinkCategory.PathTraversal, "Create file", ["CWE-22"], "medium");
AddSink(builder, "mkdir", SinkCategory.PathTraversal, "Create directory", ["CWE-22"], "low");
AddSink(builder, "rmdir", SinkCategory.PathTraversal, "Remove directory", ["CWE-22"], "low");
AddSink(builder, "unlink", SinkCategory.PathTraversal, "Remove file", ["CWE-22"], "medium");
AddSink(builder, "rename", SinkCategory.PathTraversal, "Rename file", ["CWE-22"], "medium");
AddSink(builder, "symlink", SinkCategory.PathTraversal, "Create symbolic link", ["CWE-59"], "medium");
AddSink(builder, "readlink", SinkCategory.PathTraversal, "Read symbolic link", ["CWE-59"], "low");
AddSink(builder, "realpath", SinkCategory.PathTraversal, "Resolve path", ["CWE-22"], "low");
AddSink(builder, "CreateFile", SinkCategory.PathTraversal, "Windows file creation", ["CWE-22"], "medium");
AddSink(builder, "DeleteFile", SinkCategory.PathTraversal, "Windows file deletion", ["CWE-22"], "medium");
// Network sinks
AddSink(builder, "connect", SinkCategory.Network, "Network connection", ["CWE-918"], "medium");
AddSink(builder, "send", SinkCategory.Network, "Send data over network", ["CWE-319"], "medium");
AddSink(builder, "sendto", SinkCategory.Network, "Send data to address", ["CWE-319"], "medium");
AddSink(builder, "recv", SinkCategory.Network, "Receive data from network", ["CWE-319"], "medium");
AddSink(builder, "recvfrom", SinkCategory.Network, "Receive data with address", ["CWE-319"], "medium");
AddSink(builder, "write", SinkCategory.Network, "Write to file descriptor", ["CWE-319"], "low");
AddSink(builder, "read", SinkCategory.Network, "Read from file descriptor", ["CWE-319"], "low");
AddSink(builder, "socket", SinkCategory.Network, "Create socket", ["CWE-918"], "low");
AddSink(builder, "bind", SinkCategory.Network, "Bind socket to address", ["CWE-918"], "low");
AddSink(builder, "listen", SinkCategory.Network, "Listen on socket", ["CWE-918"], "low");
AddSink(builder, "accept", SinkCategory.Network, "Accept connection", ["CWE-918"], "low");
// SQL injection sinks
AddSink(builder, "sqlite3_exec", SinkCategory.SqlInjection, "SQLite execute", ["CWE-89"], "high");
AddSink(builder, "mysql_query", SinkCategory.SqlInjection, "MySQL query", ["CWE-89"], "high");
AddSink(builder, "mysql_real_query", SinkCategory.SqlInjection, "MySQL real query", ["CWE-89"], "high");
AddSink(builder, "PQexec", SinkCategory.SqlInjection, "PostgreSQL execute", ["CWE-89"], "high");
AddSink(builder, "PQexecParams", SinkCategory.SqlInjection, "PostgreSQL parameterized", ["CWE-89"], "medium");
// Cryptographic sinks
AddSink(builder, "EVP_DecryptUpdate", SinkCategory.Crypto, "OpenSSL decrypt update", ["CWE-327"], "medium");
AddSink(builder, "EVP_EncryptUpdate", SinkCategory.Crypto, "OpenSSL encrypt update", ["CWE-327"], "medium");
AddSink(builder, "EVP_DigestUpdate", SinkCategory.Crypto, "OpenSSL digest update", ["CWE-327"], "low");
AddSink(builder, "EVP_SignFinal", SinkCategory.Crypto, "OpenSSL sign final", ["CWE-327"], "medium");
AddSink(builder, "EVP_VerifyFinal", SinkCategory.Crypto, "OpenSSL verify final", ["CWE-327"], "medium");
AddSink(builder, "RSA_private_decrypt", SinkCategory.Crypto, "RSA private key decrypt", ["CWE-327"], "high");
AddSink(builder, "RSA_public_encrypt", SinkCategory.Crypto, "RSA public key encrypt", ["CWE-327"], "medium");
AddSink(builder, "DES_ecb_encrypt", SinkCategory.Crypto, "DES ECB encrypt - weak", ["CWE-327", "CWE-328"], "high");
AddSink(builder, "MD5_Update", SinkCategory.Crypto, "MD5 digest - weak", ["CWE-327", "CWE-328"], "medium");
AddSink(builder, "SHA1_Update", SinkCategory.Crypto, "SHA1 digest - weak for signatures", ["CWE-327", "CWE-328"], "low");
// ASN.1/X.509 parsing sinks (common in OpenSSL vulnerabilities)
AddSink(builder, "d2i_X509", SinkCategory.Crypto, "DER to X509 certificate", ["CWE-295"], "medium");
AddSink(builder, "d2i_ASN1_OCTET_STRING", SinkCategory.Crypto, "DER to ASN1 octet string", ["CWE-295"], "medium");
AddSink(builder, "d2i_PKCS12", SinkCategory.Crypto, "DER to PKCS12", ["CWE-295"], "medium");
AddSink(builder, "PKCS12_parse", SinkCategory.Crypto, "Parse PKCS12 structure", ["CWE-295"], "medium");
AddSink(builder, "PKCS12_unpack_p7data", SinkCategory.Crypto, "Unpack PKCS7 data", ["CWE-295"], "medium");
return builder.ToImmutable();
}
private static void AddSink(
ImmutableDictionary<string, SinkInfo>.Builder builder,
string name,
string category,
string description,
string[] cweIds,
string severity)
{
builder[name] = new SinkInfo(name, category, description, [.. cweIds], severity);
}
}

View File

@@ -0,0 +1,26 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<Description>Golden Set definitions for ground-truth vulnerability code-level manifestation facts.</Description>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Caching.Memory" />
<PackageReference Include="Microsoft.Extensions.Http" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
<PackageReference Include="Microsoft.Extensions.Options.ConfigurationExtensions" />
<PackageReference Include="Microsoft.Extensions.Options.DataAnnotations" />
<PackageReference Include="Npgsql" />
<PackageReference Include="YamlDotNet" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.BinaryIndex.Contracts\StellaOps.BinaryIndex.Contracts.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,346 @@
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.GoldenSet;
/// <summary>
/// Storage interface for golden set definitions.
/// </summary>
public interface IGoldenSetStore
{
/// <summary>
/// Stores a golden set definition.
/// </summary>
/// <param name="definition">The definition to store.</param>
/// <param name="status">Initial status (default: Draft).</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Store result with content digest.</returns>
Task<GoldenSetStoreResult> StoreAsync(
GoldenSetDefinition definition,
GoldenSetStatus status = GoldenSetStatus.Draft,
CancellationToken ct = default);
/// <summary>
/// Retrieves a golden set by ID.
/// </summary>
/// <param name="goldenSetId">The golden set ID (CVE/GHSA ID).</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The definition or null if not found.</returns>
Task<GoldenSetDefinition?> GetByIdAsync(
string goldenSetId,
CancellationToken ct = default);
/// <summary>
/// Retrieves a golden set by content digest.
/// </summary>
/// <param name="contentDigest">The content-addressed digest.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The definition or null if not found.</returns>
Task<GoldenSetDefinition?> GetByDigestAsync(
string contentDigest,
CancellationToken ct = default);
/// <summary>
/// Lists golden sets matching criteria.
/// </summary>
/// <param name="query">Query parameters.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of matching golden set summaries.</returns>
Task<ImmutableArray<GoldenSetSummary>> ListAsync(
GoldenSetListQuery query,
CancellationToken ct = default);
/// <summary>
/// Updates the status of a golden set.
/// </summary>
/// <param name="goldenSetId">The golden set ID.</param>
/// <param name="status">New status.</param>
/// <param name="reviewedBy">Reviewer ID (for InReview->Approved).</param>
/// <param name="ct">Cancellation token.</param>
Task UpdateStatusAsync(
string goldenSetId,
GoldenSetStatus status,
string? reviewedBy = null,
CancellationToken ct = default);
/// <summary>
/// Updates the status of a golden set with a comment.
/// </summary>
/// <param name="goldenSetId">The golden set ID.</param>
/// <param name="status">New status.</param>
/// <param name="actorId">Who made the change.</param>
/// <param name="comment">Comment explaining the change.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Update result.</returns>
Task<GoldenSetStoreResult> UpdateStatusAsync(
string goldenSetId,
GoldenSetStatus status,
string actorId,
string comment,
CancellationToken ct = default);
/// <summary>
/// Retrieves a golden set with its current status.
/// </summary>
/// <param name="goldenSetId">The golden set ID.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The stored golden set or null if not found.</returns>
Task<StoredGoldenSet?> GetAsync(
string goldenSetId,
CancellationToken ct = default);
/// <summary>
/// Gets the audit log for a golden set.
/// </summary>
/// <param name="goldenSetId">The golden set ID.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Audit log entries ordered by timestamp descending.</returns>
Task<ImmutableArray<GoldenSetAuditEntry>> GetAuditLogAsync(
string goldenSetId,
CancellationToken ct = default);
/// <summary>
/// Gets all golden sets applicable to a component.
/// </summary>
/// <param name="component">Component name.</param>
/// <param name="statusFilter">Optional status filter (default: Approved).</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>List of applicable golden sets.</returns>
Task<ImmutableArray<GoldenSetDefinition>> GetByComponentAsync(
string component,
GoldenSetStatus? statusFilter = GoldenSetStatus.Approved,
CancellationToken ct = default);
/// <summary>
/// Deletes a golden set (soft delete - moves to Archived).
/// </summary>
/// <param name="goldenSetId">The golden set ID.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>True if deleted; false if not found.</returns>
Task<bool> DeleteAsync(
string goldenSetId,
CancellationToken ct = default);
}
/// <summary>
/// Result of storing a golden set.
/// </summary>
public sealed record GoldenSetStoreResult
{
/// <summary>
/// Whether the operation succeeded.
/// </summary>
public required bool Success { get; init; }
/// <summary>
/// Content digest of the stored definition.
/// </summary>
public required string ContentDigest { get; init; }
/// <summary>
/// Whether an existing record was updated.
/// </summary>
public bool WasUpdated { get; init; }
/// <summary>
/// Error message if operation failed.
/// </summary>
public string? Error { get; init; }
/// <summary>
/// Creates a success result.
/// </summary>
public static GoldenSetStoreResult Succeeded(string contentDigest, bool wasUpdated = false) => new()
{
Success = true,
ContentDigest = contentDigest,
WasUpdated = wasUpdated
};
/// <summary>
/// Creates a failure result.
/// </summary>
public static GoldenSetStoreResult Failed(string error) => new()
{
Success = false,
ContentDigest = string.Empty,
Error = error
};
}
/// <summary>
/// Summary of a golden set for listing.
/// </summary>
public sealed record GoldenSetSummary
{
/// <summary>
/// Golden set ID (CVE/GHSA ID).
/// </summary>
public required string Id { get; init; }
/// <summary>
/// Component name.
/// </summary>
public required string Component { get; init; }
/// <summary>
/// Current status.
/// </summary>
public required GoldenSetStatus Status { get; init; }
/// <summary>
/// Number of vulnerable targets.
/// </summary>
public required int TargetCount { get; init; }
/// <summary>
/// Creation timestamp.
/// </summary>
public required DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// Review timestamp (if reviewed).
/// </summary>
public DateTimeOffset? ReviewedAt { get; init; }
/// <summary>
/// Content digest.
/// </summary>
public required string ContentDigest { get; init; }
/// <summary>
/// Tags for filtering.
/// </summary>
public ImmutableArray<string> Tags { get; init; } = [];
}
/// <summary>
/// Query parameters for listing golden sets.
/// </summary>
public sealed record GoldenSetListQuery
{
/// <summary>
/// Filter by component name.
/// </summary>
public string? ComponentFilter { get; init; }
/// <summary>
/// Filter by status.
/// </summary>
public GoldenSetStatus? StatusFilter { get; init; }
/// <summary>
/// Filter by tags (any match).
/// </summary>
public ImmutableArray<string>? TagsFilter { get; init; }
/// <summary>
/// Filter by creation date (after).
/// </summary>
public DateTimeOffset? CreatedAfter { get; init; }
/// <summary>
/// Filter by creation date (before).
/// </summary>
public DateTimeOffset? CreatedBefore { get; init; }
/// <summary>
/// Maximum results to return.
/// </summary>
public int Limit { get; init; } = 100;
/// <summary>
/// Offset for pagination.
/// </summary>
public int Offset { get; init; } = 0;
/// <summary>
/// Order by field.
/// </summary>
public GoldenSetOrderBy OrderBy { get; init; } = GoldenSetOrderBy.CreatedAtDesc;
}
/// <summary>
/// Ordering options for golden set listing.
/// </summary>
public enum GoldenSetOrderBy
{
/// <summary>Order by ID ascending.</summary>
IdAsc,
/// <summary>Order by ID descending.</summary>
IdDesc,
/// <summary>Order by creation date ascending.</summary>
CreatedAtAsc,
/// <summary>Order by creation date descending.</summary>
CreatedAtDesc,
/// <summary>Order by component ascending.</summary>
ComponentAsc,
/// <summary>Order by component descending.</summary>
ComponentDesc
}
/// <summary>
/// A stored golden set with its current status.
/// </summary>
public sealed record StoredGoldenSet
{
/// <summary>
/// The golden set definition.
/// </summary>
public required GoldenSetDefinition Definition { get; init; }
/// <summary>
/// Current status.
/// </summary>
public required GoldenSetStatus Status { get; init; }
/// <summary>
/// When the record was created.
/// </summary>
public required DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// When the record was last updated.
/// </summary>
public required DateTimeOffset UpdatedAt { get; init; }
}
/// <summary>
/// An entry in the golden set audit log.
/// </summary>
public sealed record GoldenSetAuditEntry
{
/// <summary>
/// Operation performed.
/// </summary>
public required string Operation { get; init; }
/// <summary>
/// Who performed the operation.
/// </summary>
public required string ActorId { get; init; }
/// <summary>
/// When the operation occurred.
/// </summary>
public required DateTimeOffset Timestamp { get; init; }
/// <summary>
/// Status before the operation.
/// </summary>
public GoldenSetStatus? OldStatus { get; init; }
/// <summary>
/// Status after the operation.
/// </summary>
public GoldenSetStatus? NewStatus { get; init; }
/// <summary>
/// Comment associated with the operation.
/// </summary>
public string? Comment { get; init; }
}

View File

@@ -0,0 +1,665 @@
using System.Collections.Immutable;
using System.Globalization;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Npgsql;
namespace StellaOps.BinaryIndex.GoldenSet;
/// <summary>
/// PostgreSQL implementation of <see cref="IGoldenSetStore"/>.
/// </summary>
internal sealed class PostgresGoldenSetStore : IGoldenSetStore
{
private readonly NpgsqlDataSource _dataSource;
private readonly IGoldenSetValidator _validator;
private readonly TimeProvider _timeProvider;
private readonly GoldenSetOptions _options;
private readonly ILogger<PostgresGoldenSetStore> _logger;
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
WriteIndented = false
};
/// <summary>
/// Initializes a new instance of <see cref="PostgresGoldenSetStore"/>.
/// </summary>
public PostgresGoldenSetStore(
NpgsqlDataSource dataSource,
IGoldenSetValidator validator,
TimeProvider timeProvider,
IOptions<GoldenSetOptions> options,
ILogger<PostgresGoldenSetStore> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_validator = validator ?? throw new ArgumentNullException(nameof(validator));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public async Task<GoldenSetStoreResult> StoreAsync(
GoldenSetDefinition definition,
GoldenSetStatus status = GoldenSetStatus.Draft,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(definition);
// Validate first
var validation = await _validator.ValidateAsync(definition, ct: ct);
if (!validation.IsValid)
{
var errorMessage = string.Join("; ", validation.Errors.Select(e => e.Message));
_logger.LogWarning("Validation failed for golden set {Id}: {Errors}", definition.Id, errorMessage);
return GoldenSetStoreResult.Failed(errorMessage);
}
var digest = validation.ContentDigest!;
var yaml = GoldenSetYamlSerializer.Serialize(definition);
var json = JsonSerializer.Serialize(definition, JsonOptions);
await using var conn = await _dataSource.OpenConnectionAsync(ct);
await using var tx = await conn.BeginTransactionAsync(ct);
try
{
var wasUpdated = await UpsertDefinitionAsync(conn, definition, status, yaml, json, digest, ct);
await DeleteTargetsAsync(conn, definition.Id, ct);
await InsertTargetsAsync(conn, definition, ct);
await InsertAuditLogAsync(conn, definition.Id, wasUpdated ? "updated" : "created",
definition.Metadata.AuthorId, null, status.ToString(), null, ct);
await tx.CommitAsync(ct);
_logger.LogInformation("Stored golden set {Id} with digest {Digest} (updated={Updated})",
definition.Id, digest, wasUpdated);
return GoldenSetStoreResult.Succeeded(digest, wasUpdated);
}
catch (Exception ex)
{
await tx.RollbackAsync(ct);
_logger.LogError(ex, "Failed to store golden set {Id}", definition.Id);
throw;
}
}
/// <inheritdoc />
public async Task<GoldenSetDefinition?> GetByIdAsync(string goldenSetId, CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(goldenSetId);
const string sql = """
SELECT definition_yaml
FROM golden_sets.definitions
WHERE id = @id
""";
await using var conn = await _dataSource.OpenConnectionAsync(ct);
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("@id", goldenSetId);
await using var reader = await cmd.ExecuteReaderAsync(ct);
if (!await reader.ReadAsync(ct))
{
return null;
}
var yaml = reader.GetString(0);
return GoldenSetYamlSerializer.Deserialize(yaml);
}
/// <inheritdoc />
public async Task<GoldenSetDefinition?> GetByDigestAsync(string contentDigest, CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(contentDigest);
const string sql = """
SELECT definition_yaml
FROM golden_sets.definitions
WHERE content_digest = @digest
""";
await using var conn = await _dataSource.OpenConnectionAsync(ct);
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("@digest", contentDigest);
await using var reader = await cmd.ExecuteReaderAsync(ct);
if (!await reader.ReadAsync(ct))
{
return null;
}
var yaml = reader.GetString(0);
return GoldenSetYamlSerializer.Deserialize(yaml);
}
/// <inheritdoc />
public async Task<ImmutableArray<GoldenSetSummary>> ListAsync(GoldenSetListQuery query, CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(query);
var conditions = new List<string>();
var parameters = new Dictionary<string, object>();
if (!string.IsNullOrWhiteSpace(query.ComponentFilter))
{
conditions.Add("component = @component");
parameters["@component"] = query.ComponentFilter;
}
if (query.StatusFilter.HasValue)
{
conditions.Add("status = @status");
parameters["@status"] = query.StatusFilter.Value.ToString().ToLowerInvariant();
}
if (query.TagsFilter.HasValue && !query.TagsFilter.Value.IsEmpty)
{
conditions.Add("tags && @tags");
parameters["@tags"] = query.TagsFilter.Value.ToArray();
}
if (query.CreatedAfter.HasValue)
{
conditions.Add("created_at >= @created_after");
parameters["@created_after"] = query.CreatedAfter.Value;
}
if (query.CreatedBefore.HasValue)
{
conditions.Add("created_at <= @created_before");
parameters["@created_before"] = query.CreatedBefore.Value;
}
var whereClause = conditions.Count > 0 ? "WHERE " + string.Join(" AND ", conditions) : "";
var orderClause = query.OrderBy switch
{
GoldenSetOrderBy.IdAsc => "ORDER BY id ASC",
GoldenSetOrderBy.IdDesc => "ORDER BY id DESC",
GoldenSetOrderBy.CreatedAtAsc => "ORDER BY created_at ASC",
GoldenSetOrderBy.CreatedAtDesc => "ORDER BY created_at DESC",
GoldenSetOrderBy.ComponentAsc => "ORDER BY component ASC",
GoldenSetOrderBy.ComponentDesc => "ORDER BY component DESC",
_ => "ORDER BY created_at DESC"
};
var sql = string.Format(
CultureInfo.InvariantCulture,
"""
SELECT id, component, status, target_count, created_at, reviewed_at, content_digest, tags
FROM golden_sets.definitions
{0}
{1}
LIMIT @limit OFFSET @offset
""",
whereClause,
orderClause);
await using var conn = await _dataSource.OpenConnectionAsync(ct);
await using var cmd = new NpgsqlCommand(sql, conn);
foreach (var (key, value) in parameters)
{
cmd.Parameters.AddWithValue(key, value);
}
cmd.Parameters.AddWithValue("@limit", query.Limit);
cmd.Parameters.AddWithValue("@offset", query.Offset);
var results = ImmutableArray.CreateBuilder<GoldenSetSummary>();
await using var reader = await cmd.ExecuteReaderAsync(ct);
while (await reader.ReadAsync(ct))
{
results.Add(new GoldenSetSummary
{
Id = reader.GetString(0),
Component = reader.GetString(1),
Status = Enum.Parse<GoldenSetStatus>(reader.GetString(2), ignoreCase: true),
TargetCount = reader.GetInt32(3),
CreatedAt = reader.GetFieldValue<DateTimeOffset>(4),
ReviewedAt = reader.IsDBNull(5) ? null : reader.GetFieldValue<DateTimeOffset>(5),
ContentDigest = reader.GetString(6),
Tags = reader.IsDBNull(7) ? [] : ((string[])reader.GetValue(7)).ToImmutableArray()
});
}
return results.ToImmutable();
}
/// <inheritdoc />
public async Task UpdateStatusAsync(
string goldenSetId,
GoldenSetStatus status,
string? reviewedBy = null,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(goldenSetId);
await using var conn = await _dataSource.OpenConnectionAsync(ct);
await using var tx = await conn.BeginTransactionAsync(ct);
try
{
// Get current status
var currentStatus = await GetCurrentStatusAsync(conn, goldenSetId, ct);
if (currentStatus is null)
{
throw new InvalidOperationException($"Golden set {goldenSetId} not found");
}
// Update status
var sql = status is GoldenSetStatus.Approved or GoldenSetStatus.InReview
? """
UPDATE golden_sets.definitions
SET status = @status, reviewed_by = @reviewed_by, reviewed_at = @reviewed_at
WHERE id = @id
"""
: """
UPDATE golden_sets.definitions
SET status = @status
WHERE id = @id
""";
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("@id", goldenSetId);
cmd.Parameters.AddWithValue("@status", status.ToString().ToLowerInvariant());
if (status is GoldenSetStatus.Approved or GoldenSetStatus.InReview)
{
cmd.Parameters.AddWithValue("@reviewed_by", (object?)reviewedBy ?? DBNull.Value);
cmd.Parameters.AddWithValue("@reviewed_at", _timeProvider.GetUtcNow());
}
await cmd.ExecuteNonQueryAsync(ct);
// Audit log
await InsertAuditLogAsync(conn, goldenSetId, "status_changed",
reviewedBy ?? "system", currentStatus, status.ToString(), null, ct);
await tx.CommitAsync(ct);
_logger.LogInformation("Updated golden set {Id} status from {OldStatus} to {NewStatus}",
goldenSetId, currentStatus, status);
}
catch
{
await tx.RollbackAsync(ct);
throw;
}
}
/// <inheritdoc />
public async Task<ImmutableArray<GoldenSetDefinition>> GetByComponentAsync(
string component,
GoldenSetStatus? statusFilter = GoldenSetStatus.Approved,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(component);
var sql = statusFilter.HasValue
? """
SELECT definition_yaml
FROM golden_sets.definitions
WHERE component = @component AND status = @status
ORDER BY created_at DESC
"""
: """
SELECT definition_yaml
FROM golden_sets.definitions
WHERE component = @component
ORDER BY created_at DESC
""";
await using var conn = await _dataSource.OpenConnectionAsync(ct);
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("@component", component);
if (statusFilter.HasValue)
{
cmd.Parameters.AddWithValue("@status", statusFilter.Value.ToString().ToLowerInvariant());
}
var results = ImmutableArray.CreateBuilder<GoldenSetDefinition>();
await using var reader = await cmd.ExecuteReaderAsync(ct);
while (await reader.ReadAsync(ct))
{
var yaml = reader.GetString(0);
results.Add(GoldenSetYamlSerializer.Deserialize(yaml));
}
return results.ToImmutable();
}
/// <inheritdoc />
public async Task<bool> DeleteAsync(string goldenSetId, CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(goldenSetId);
// Soft delete - move to archived status
const string sql = """
UPDATE golden_sets.definitions
SET status = 'archived'
WHERE id = @id AND status != 'archived'
""";
await using var conn = await _dataSource.OpenConnectionAsync(ct);
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("@id", goldenSetId);
var affected = await cmd.ExecuteNonQueryAsync(ct);
return affected > 0;
}
private async Task<bool> UpsertDefinitionAsync(
NpgsqlConnection conn,
GoldenSetDefinition definition,
GoldenSetStatus status,
string yaml,
string json,
string digest,
CancellationToken ct)
{
const string sql = """
INSERT INTO golden_sets.definitions
(id, component, content_digest, status, definition_yaml, definition_json,
target_count, author_id, created_at, source_ref, tags, schema_version)
VALUES
(@id, @component, @digest, @status, @yaml, @json::jsonb,
@target_count, @author_id, @created_at, @source_ref, @tags, @schema_version)
ON CONFLICT (id) DO UPDATE SET
component = EXCLUDED.component,
content_digest = EXCLUDED.content_digest,
status = EXCLUDED.status,
definition_yaml = EXCLUDED.definition_yaml,
definition_json = EXCLUDED.definition_json,
target_count = EXCLUDED.target_count,
source_ref = EXCLUDED.source_ref,
tags = EXCLUDED.tags,
schema_version = EXCLUDED.schema_version
RETURNING (xmax = 0) AS was_inserted
""";
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("@id", definition.Id);
cmd.Parameters.AddWithValue("@component", definition.Component);
cmd.Parameters.AddWithValue("@digest", digest);
cmd.Parameters.AddWithValue("@status", status.ToString().ToLowerInvariant());
cmd.Parameters.AddWithValue("@yaml", yaml);
cmd.Parameters.AddWithValue("@json", json);
cmd.Parameters.AddWithValue("@target_count", definition.Targets.Length);
cmd.Parameters.AddWithValue("@author_id", definition.Metadata.AuthorId);
cmd.Parameters.AddWithValue("@created_at", definition.Metadata.CreatedAt);
cmd.Parameters.AddWithValue("@source_ref", definition.Metadata.SourceRef);
cmd.Parameters.AddWithValue("@tags", definition.Metadata.Tags.ToArray());
cmd.Parameters.AddWithValue("@schema_version", definition.Metadata.SchemaVersion);
var wasInserted = (bool)(await cmd.ExecuteScalarAsync(ct) ?? false);
return !wasInserted; // Return true if was updated (not inserted)
}
private static async Task DeleteTargetsAsync(NpgsqlConnection conn, string goldenSetId, CancellationToken ct)
{
const string sql = "DELETE FROM golden_sets.targets WHERE golden_set_id = @id";
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("@id", goldenSetId);
await cmd.ExecuteNonQueryAsync(ct);
}
private static async Task InsertTargetsAsync(
NpgsqlConnection conn,
GoldenSetDefinition definition,
CancellationToken ct)
{
const string sql = """
INSERT INTO golden_sets.targets
(golden_set_id, function_name, edges, sinks, constants, taint_invariant, source_file, source_line)
VALUES
(@golden_set_id, @function_name, @edges::jsonb, @sinks, @constants, @taint_invariant, @source_file, @source_line)
""";
foreach (var target in definition.Targets)
{
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("@golden_set_id", definition.Id);
cmd.Parameters.AddWithValue("@function_name", target.FunctionName);
cmd.Parameters.AddWithValue("@edges", JsonSerializer.Serialize(target.Edges.Select(e => e.ToString()).ToArray()));
cmd.Parameters.AddWithValue("@sinks", target.Sinks.ToArray());
cmd.Parameters.AddWithValue("@constants", target.Constants.ToArray());
cmd.Parameters.AddWithValue("@taint_invariant", (object?)target.TaintInvariant ?? DBNull.Value);
cmd.Parameters.AddWithValue("@source_file", (object?)target.SourceFile ?? DBNull.Value);
cmd.Parameters.AddWithValue("@source_line", (object?)target.SourceLine ?? DBNull.Value);
await cmd.ExecuteNonQueryAsync(ct);
}
}
private static async Task<string?> GetCurrentStatusAsync(
NpgsqlConnection conn,
string goldenSetId,
CancellationToken ct)
{
const string sql = "SELECT status FROM golden_sets.definitions WHERE id = @id";
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("@id", goldenSetId);
var result = await cmd.ExecuteScalarAsync(ct);
return result as string;
}
private async Task InsertAuditLogAsync(
NpgsqlConnection conn,
string goldenSetId,
string action,
string actorId,
string? oldStatus,
string? newStatus,
object? details,
CancellationToken ct)
{
const string sql = """
INSERT INTO golden_sets.audit_log
(golden_set_id, action, actor_id, old_status, new_status, details, timestamp)
VALUES
(@golden_set_id, @action, @actor_id, @old_status, @new_status, @details::jsonb, @timestamp)
""";
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("@golden_set_id", goldenSetId);
cmd.Parameters.AddWithValue("@action", action);
cmd.Parameters.AddWithValue("@actor_id", actorId);
cmd.Parameters.AddWithValue("@old_status", (object?)oldStatus ?? DBNull.Value);
cmd.Parameters.AddWithValue("@new_status", (object?)newStatus ?? DBNull.Value);
cmd.Parameters.AddWithValue("@details", details is null ? DBNull.Value : JsonSerializer.Serialize(details));
cmd.Parameters.AddWithValue("@timestamp", _timeProvider.GetUtcNow());
await cmd.ExecuteNonQueryAsync(ct);
}
/// <inheritdoc />
public async Task<GoldenSetStoreResult> UpdateStatusAsync(
string goldenSetId,
GoldenSetStatus status,
string actorId,
string comment,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(goldenSetId);
ArgumentException.ThrowIfNullOrWhiteSpace(actorId);
await using var conn = await _dataSource.OpenConnectionAsync(ct);
await using var tx = await conn.BeginTransactionAsync(ct);
try
{
// Get current status
var currentStatus = await GetCurrentStatusAsync(conn, goldenSetId, ct);
if (currentStatus is null)
{
return GoldenSetStoreResult.Failed($"Golden set {goldenSetId} not found");
}
// Update status
var sql = status is GoldenSetStatus.Approved or GoldenSetStatus.InReview
? """
UPDATE golden_sets.definitions
SET status = @status, reviewed_by = @reviewed_by, reviewed_at = @reviewed_at
WHERE id = @id
"""
: """
UPDATE golden_sets.definitions
SET status = @status
WHERE id = @id
""";
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("@id", goldenSetId);
cmd.Parameters.AddWithValue("@status", status.ToString().ToLowerInvariant());
if (status is GoldenSetStatus.Approved or GoldenSetStatus.InReview)
{
cmd.Parameters.AddWithValue("@reviewed_by", actorId);
cmd.Parameters.AddWithValue("@reviewed_at", _timeProvider.GetUtcNow());
}
await cmd.ExecuteNonQueryAsync(ct);
// Audit log with comment
await InsertAuditLogWithCommentAsync(conn, goldenSetId, "status_change",
actorId, currentStatus, status.ToString().ToLowerInvariant(), comment, ct);
await tx.CommitAsync(ct);
_logger.LogInformation("Updated golden set {Id} status from {OldStatus} to {NewStatus} by {Actor}",
goldenSetId, currentStatus, status, actorId);
// Get the content digest to return
var digest = await GetContentDigestAsync(conn, goldenSetId, ct);
return GoldenSetStoreResult.Succeeded(digest ?? string.Empty, wasUpdated: true);
}
catch (Exception ex)
{
await tx.RollbackAsync(ct);
_logger.LogError(ex, "Failed to update status for golden set {Id}", goldenSetId);
return GoldenSetStoreResult.Failed(ex.Message);
}
}
/// <inheritdoc />
public async Task<StoredGoldenSet?> GetAsync(string goldenSetId, CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(goldenSetId);
const string sql = """
SELECT definition_yaml, status, created_at, COALESCE(reviewed_at, created_at) as updated_at
FROM golden_sets.definitions
WHERE id = @id
""";
await using var conn = await _dataSource.OpenConnectionAsync(ct);
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("@id", goldenSetId);
await using var reader = await cmd.ExecuteReaderAsync(ct);
if (!await reader.ReadAsync(ct))
{
return null;
}
var yaml = reader.GetString(0);
var status = Enum.Parse<GoldenSetStatus>(reader.GetString(1), ignoreCase: true);
var createdAt = reader.GetFieldValue<DateTimeOffset>(2);
var updatedAt = reader.GetFieldValue<DateTimeOffset>(3);
return new StoredGoldenSet
{
Definition = GoldenSetYamlSerializer.Deserialize(yaml),
Status = status,
CreatedAt = createdAt,
UpdatedAt = updatedAt
};
}
/// <inheritdoc />
public async Task<ImmutableArray<GoldenSetAuditEntry>> GetAuditLogAsync(
string goldenSetId,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(goldenSetId);
const string sql = """
SELECT action, actor_id, timestamp, old_status, new_status,
COALESCE(details->>'comment', '') as comment
FROM golden_sets.audit_log
WHERE golden_set_id = @id
ORDER BY timestamp DESC
""";
await using var conn = await _dataSource.OpenConnectionAsync(ct);
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("@id", goldenSetId);
var results = ImmutableArray.CreateBuilder<GoldenSetAuditEntry>();
await using var reader = await cmd.ExecuteReaderAsync(ct);
while (await reader.ReadAsync(ct))
{
var oldStatusStr = reader.IsDBNull(3) ? null : reader.GetString(3);
var newStatusStr = reader.IsDBNull(4) ? null : reader.GetString(4);
results.Add(new GoldenSetAuditEntry
{
Operation = reader.GetString(0),
ActorId = reader.GetString(1),
Timestamp = reader.GetFieldValue<DateTimeOffset>(2),
OldStatus = string.IsNullOrEmpty(oldStatusStr) ? null : Enum.Parse<GoldenSetStatus>(oldStatusStr, ignoreCase: true),
NewStatus = string.IsNullOrEmpty(newStatusStr) ? null : Enum.Parse<GoldenSetStatus>(newStatusStr, ignoreCase: true),
Comment = reader.IsDBNull(5) ? null : reader.GetString(5)
});
}
return results.ToImmutable();
}
private static async Task<string?> GetContentDigestAsync(
NpgsqlConnection conn,
string goldenSetId,
CancellationToken ct)
{
const string sql = "SELECT content_digest FROM golden_sets.definitions WHERE id = @id";
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("@id", goldenSetId);
var result = await cmd.ExecuteScalarAsync(ct);
return result as string;
}
private async Task InsertAuditLogWithCommentAsync(
NpgsqlConnection conn,
string goldenSetId,
string action,
string actorId,
string? oldStatus,
string? newStatus,
string? comment,
CancellationToken ct)
{
const string sql = """
INSERT INTO golden_sets.audit_log
(golden_set_id, action, actor_id, old_status, new_status, details, timestamp)
VALUES
(@golden_set_id, @action, @actor_id, @old_status, @new_status, @details::jsonb, @timestamp)
""";
var details = comment is not null ? new { comment } : null;
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("@golden_set_id", goldenSetId);
cmd.Parameters.AddWithValue("@action", action);
cmd.Parameters.AddWithValue("@actor_id", actorId);
cmd.Parameters.AddWithValue("@old_status", (object?)oldStatus ?? DBNull.Value);
cmd.Parameters.AddWithValue("@new_status", (object?)newStatus ?? DBNull.Value);
cmd.Parameters.AddWithValue("@details", details is null ? DBNull.Value : JsonSerializer.Serialize(details));
cmd.Parameters.AddWithValue("@timestamp", _timeProvider.GetUtcNow());
await cmd.ExecuteNonQueryAsync(ct);
}
}

View File

@@ -0,0 +1,406 @@
using System.Collections.Immutable;
using System.Globalization;
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace StellaOps.BinaryIndex.GoldenSet;
/// <summary>
/// Implementation of <see cref="IGoldenSetValidator"/>.
/// </summary>
public sealed partial class GoldenSetValidator : IGoldenSetValidator
{
private readonly ISinkRegistry _sinkRegistry;
private readonly ICveValidator? _cveValidator;
private readonly GoldenSetOptions _options;
private readonly ILogger<GoldenSetValidator> _logger;
/// <summary>
/// Initializes a new instance of <see cref="GoldenSetValidator"/>.
/// </summary>
public GoldenSetValidator(
ISinkRegistry sinkRegistry,
IOptions<GoldenSetOptions> options,
ILogger<GoldenSetValidator> logger,
ICveValidator? cveValidator = null)
{
_sinkRegistry = sinkRegistry ?? throw new ArgumentNullException(nameof(sinkRegistry));
_options = options?.Value ?? throw new ArgumentNullException(nameof(options));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_cveValidator = cveValidator;
}
/// <inheritdoc />
public async Task<GoldenSetValidationResult> ValidateAsync(
GoldenSetDefinition definition,
ValidationOptions? options = null,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(definition);
options ??= new ValidationOptions
{
ValidateCveExists = _options.Validation.ValidateCveExists,
ValidateSinks = _options.Validation.ValidateSinks,
StrictEdgeFormat = _options.Validation.StrictEdgeFormat,
OfflineMode = _options.Validation.OfflineMode
};
var errors = new List<ValidationError>();
var warnings = new List<ValidationWarning>();
// 1. Required fields validation
ValidateRequiredFields(definition, errors);
// 2. ID format validation
ValidateIdFormat(definition.Id, errors);
// 3. CVE existence validation (if enabled and online)
if (options.ValidateCveExists && !options.OfflineMode && _cveValidator is not null)
{
await ValidateCveExistsAsync(definition.Id, errors, ct);
}
// 4. Targets validation
ValidateTargets(definition.Targets, options, errors, warnings);
// 5. Metadata validation
ValidateMetadata(definition.Metadata, errors, warnings);
// If there are errors, return failure
if (errors.Count > 0)
{
_logger.LogDebug("Golden set {Id} validation failed with {ErrorCount} errors", definition.Id, errors.Count);
return GoldenSetValidationResult.Failure(
errors.ToImmutableArray(),
warnings.ToImmutableArray());
}
// Compute content digest
var digest = ComputeContentDigest(definition);
_logger.LogDebug("Golden set {Id} validated successfully with digest {Digest}", definition.Id, digest);
return GoldenSetValidationResult.Success(
definition,
digest,
warnings.ToImmutableArray());
}
/// <inheritdoc />
public async Task<GoldenSetValidationResult> ValidateYamlAsync(
string yamlContent,
ValidationOptions? options = null,
CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(yamlContent);
try
{
var definition = GoldenSetYamlSerializer.Deserialize(yamlContent);
return await ValidateAsync(definition, options, ct);
}
catch (Exception ex) when (ex is YamlDotNet.Core.YamlException or InvalidOperationException)
{
_logger.LogDebug(ex, "YAML parsing failed");
return GoldenSetValidationResult.Failure(
[new ValidationError(ValidationErrorCodes.YamlParseError, ex.Message)]);
}
}
private static void ValidateRequiredFields(GoldenSetDefinition definition, List<ValidationError> errors)
{
if (string.IsNullOrWhiteSpace(definition.Id))
{
errors.Add(new ValidationError(ValidationErrorCodes.RequiredFieldMissing, "Id is required", "id"));
}
if (string.IsNullOrWhiteSpace(definition.Component))
{
errors.Add(new ValidationError(ValidationErrorCodes.RequiredFieldMissing, "Component is required", "component"));
}
if (definition.Targets.IsDefault || definition.Targets.Length == 0)
{
errors.Add(new ValidationError(ValidationErrorCodes.NoTargets, "At least one target is required", "targets"));
}
if (definition.Metadata is null)
{
errors.Add(new ValidationError(ValidationErrorCodes.RequiredFieldMissing, "Metadata is required", "metadata"));
}
}
private static void ValidateIdFormat(string? id, List<ValidationError> errors)
{
if (string.IsNullOrWhiteSpace(id))
{
return; // Already reported as missing
}
// Accept CVE-YYYY-NNNN or GHSA-xxxx-xxxx-xxxx formats
if (!CveIdRegex().IsMatch(id) && !GhsaIdRegex().IsMatch(id))
{
errors.Add(new ValidationError(
ValidationErrorCodes.InvalidIdFormat,
string.Format(CultureInfo.InvariantCulture, "Invalid ID format: {0}. Expected CVE-YYYY-NNNN or GHSA-xxxx-xxxx-xxxx.", id),
"id"));
}
}
private async Task ValidateCveExistsAsync(string id, List<ValidationError> errors, CancellationToken ct)
{
if (_cveValidator is null)
{
return;
}
try
{
var exists = await _cveValidator.ExistsAsync(id, ct);
if (!exists)
{
errors.Add(new ValidationError(
ValidationErrorCodes.CveNotFound,
string.Format(CultureInfo.InvariantCulture, "CVE {0} not found in NVD/OSV", id),
"id"));
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to validate CVE existence for {Id}", id);
// Don't add error - network failures shouldn't block validation
}
}
private void ValidateTargets(
ImmutableArray<VulnerableTarget> targets,
ValidationOptions options,
List<ValidationError> errors,
List<ValidationWarning> warnings)
{
if (targets.IsDefault)
{
return;
}
for (int i = 0; i < targets.Length; i++)
{
var target = targets[i];
var path = string.Format(CultureInfo.InvariantCulture, "targets[{0}]", i);
// Function name required
if (string.IsNullOrWhiteSpace(target.FunctionName))
{
errors.Add(new ValidationError(
ValidationErrorCodes.EmptyFunctionName,
"Function name is required",
string.Concat(path, ".function")));
}
// Edge format validation
if (options.StrictEdgeFormat && !target.Edges.IsDefault)
{
foreach (var edge in target.Edges)
{
if (!IsValidEdgeFormat(edge))
{
errors.Add(new ValidationError(
ValidationErrorCodes.InvalidEdgeFormat,
string.Format(CultureInfo.InvariantCulture, "Invalid edge format: {0}. Expected 'bbN->bbM'.", edge),
string.Concat(path, ".edges")));
}
}
}
// Sink validation
if (options.ValidateSinks && !target.Sinks.IsDefault)
{
foreach (var sink in target.Sinks)
{
if (!_sinkRegistry.IsKnownSink(sink))
{
warnings.Add(new ValidationWarning(
ValidationWarningCodes.UnknownSink,
string.Format(CultureInfo.InvariantCulture, "Sink '{0}' not in registry", sink),
string.Concat(path, ".sinks")));
}
}
}
// Constant format validation
if (!target.Constants.IsDefault)
{
foreach (var constant in target.Constants)
{
if (!IsValidConstant(constant))
{
warnings.Add(new ValidationWarning(
ValidationWarningCodes.MalformedConstant,
string.Format(CultureInfo.InvariantCulture, "Constant '{0}' may be malformed", constant),
string.Concat(path, ".constants")));
}
}
}
// Warn if no edges or sinks
if (target.Edges.IsDefaultOrEmpty)
{
warnings.Add(new ValidationWarning(
ValidationWarningCodes.NoEdges,
"No edges defined for target",
path));
}
if (target.Sinks.IsDefaultOrEmpty)
{
warnings.Add(new ValidationWarning(
ValidationWarningCodes.NoSinks,
"No sinks defined for target",
path));
}
}
}
private static void ValidateMetadata(
GoldenSetMetadata? metadata,
List<ValidationError> errors,
List<ValidationWarning> warnings)
{
if (metadata is null)
{
return; // Already reported as missing
}
if (string.IsNullOrWhiteSpace(metadata.AuthorId))
{
errors.Add(new ValidationError(
ValidationErrorCodes.RequiredFieldMissing,
"Author ID is required",
"metadata.author_id"));
}
if (string.IsNullOrWhiteSpace(metadata.SourceRef))
{
errors.Add(new ValidationError(
ValidationErrorCodes.RequiredFieldMissing,
"Source reference is required",
"metadata.source_ref"));
}
// Validate timestamps are not default/min values
if (metadata.CreatedAt == default || metadata.CreatedAt == DateTimeOffset.MinValue)
{
errors.Add(new ValidationError(
ValidationErrorCodes.InvalidTimestamp,
"Created timestamp is required and must be valid",
"metadata.created_at"));
}
// Validate schema version format
if (!string.IsNullOrEmpty(metadata.SchemaVersion) && !SchemaVersionRegex().IsMatch(metadata.SchemaVersion))
{
errors.Add(new ValidationError(
ValidationErrorCodes.InvalidSchemaVersion,
string.Format(CultureInfo.InvariantCulture, "Invalid schema version format: {0}", metadata.SchemaVersion),
"metadata.schema_version"));
}
// Warn if source ref doesn't look like a URL
if (!string.IsNullOrWhiteSpace(metadata.SourceRef) &&
!Uri.TryCreate(metadata.SourceRef, UriKind.Absolute, out _) &&
!metadata.SourceRef.StartsWith("sha256:", StringComparison.OrdinalIgnoreCase))
{
warnings.Add(new ValidationWarning(
ValidationWarningCodes.InvalidSourceRef,
"Source reference may be invalid (not a URL or hash)",
"metadata.source_ref"));
}
}
private static bool IsValidEdgeFormat(BasicBlockEdge edge)
{
// Accept bb-prefixed blocks or generic block identifiers
return edge.From.StartsWith("bb", StringComparison.Ordinal) &&
edge.To.StartsWith("bb", StringComparison.Ordinal);
}
private static bool IsValidConstant(string constant)
{
if (string.IsNullOrWhiteSpace(constant))
{
return false;
}
// Accept hex (0x...), decimal, or quoted string literals
if (constant.StartsWith("0x", StringComparison.OrdinalIgnoreCase))
{
// Hex constant - verify valid hex digits after prefix
return constant.Length > 2 && constant[2..].All(char.IsAsciiHexDigit);
}
// Accept decimal numbers or any non-empty string
return !string.IsNullOrWhiteSpace(constant);
}
private static string ComputeContentDigest(GoldenSetDefinition definition)
{
// Create a canonical representation for hashing
// We exclude ContentDigest from the hash computation
var canonical = new
{
id = definition.Id,
component = definition.Component,
targets = definition.Targets.Select(t => new
{
function = t.FunctionName,
edges = t.Edges.Select(e => e.ToString()).OrderBy(e => e, StringComparer.Ordinal).ToArray(),
sinks = t.Sinks.OrderBy(s => s, StringComparer.Ordinal).ToArray(),
constants = t.Constants.OrderBy(c => c, StringComparer.Ordinal).ToArray(),
taint_invariant = t.TaintInvariant,
source_file = t.SourceFile,
source_line = t.SourceLine
}).OrderBy(t => t.function, StringComparer.Ordinal).ToArray(),
witness = definition.Witness is null ? null : new
{
arguments = definition.Witness.Arguments.ToArray(),
invariant = definition.Witness.Invariant,
poc_file_ref = definition.Witness.PocFileRef
},
metadata = new
{
author_id = definition.Metadata.AuthorId,
created_at = definition.Metadata.CreatedAt.ToUniversalTime().ToString("O", CultureInfo.InvariantCulture),
source_ref = definition.Metadata.SourceRef,
reviewed_by = definition.Metadata.ReviewedBy,
reviewed_at = definition.Metadata.ReviewedAt?.ToUniversalTime().ToString("O", CultureInfo.InvariantCulture),
tags = definition.Metadata.Tags.OrderBy(t => t, StringComparer.Ordinal).ToArray(),
schema_version = definition.Metadata.SchemaVersion
}
};
var json = JsonSerializer.Serialize(canonical, CanonicalJsonOptions);
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(json));
return string.Concat("sha256:", Convert.ToHexStringLower(hash));
}
private static readonly JsonSerializerOptions CanonicalJsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
WriteIndented = false,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull
};
[GeneratedRegex(GoldenSetConstants.CveIdPattern)]
private static partial Regex CveIdRegex();
[GeneratedRegex(GoldenSetConstants.GhsaIdPattern)]
private static partial Regex GhsaIdRegex();
[GeneratedRegex(@"^\d+\.\d+\.\d+$")]
private static partial Regex SchemaVersionRegex();
}

View File

@@ -0,0 +1,64 @@
namespace StellaOps.BinaryIndex.GoldenSet;
/// <summary>
/// Service for validating CVE existence in external databases.
/// </summary>
public interface ICveValidator
{
/// <summary>
/// Checks if a vulnerability ID exists in NVD/OSV/GHSA.
/// </summary>
/// <param name="vulnerabilityId">The vulnerability ID to check.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>True if the vulnerability exists; otherwise, false.</returns>
Task<bool> ExistsAsync(string vulnerabilityId, CancellationToken ct = default);
/// <summary>
/// Gets vulnerability details if available.
/// </summary>
/// <param name="vulnerabilityId">The vulnerability ID to look up.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Vulnerability details or null if not found.</returns>
Task<CveDetails?> GetDetailsAsync(string vulnerabilityId, CancellationToken ct = default);
}
/// <summary>
/// Basic CVE details from external sources.
/// </summary>
public sealed record CveDetails
{
/// <summary>
/// Vulnerability ID.
/// </summary>
public required string Id { get; init; }
/// <summary>
/// Description of the vulnerability.
/// </summary>
public string? Description { get; init; }
/// <summary>
/// Published date.
/// </summary>
public DateTimeOffset? PublishedDate { get; init; }
/// <summary>
/// Last modified date.
/// </summary>
public DateTimeOffset? ModifiedDate { get; init; }
/// <summary>
/// Associated CWE IDs.
/// </summary>
public IReadOnlyList<string> CweIds { get; init; } = [];
/// <summary>
/// CVSS score if available.
/// </summary>
public double? CvssScore { get; init; }
/// <summary>
/// Source of the data (nvd, osv, ghsa).
/// </summary>
public required string Source { get; init; }
}

View File

@@ -0,0 +1,198 @@
using System.Collections.Immutable;
namespace StellaOps.BinaryIndex.GoldenSet;
/// <summary>
/// Service for validating golden set definitions.
/// </summary>
public interface IGoldenSetValidator
{
/// <summary>
/// Validates a golden set definition.
/// </summary>
/// <param name="definition">The definition to validate.</param>
/// <param name="options">Validation options.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Validation result with errors and warnings.</returns>
Task<GoldenSetValidationResult> ValidateAsync(
GoldenSetDefinition definition,
ValidationOptions? options = null,
CancellationToken ct = default);
/// <summary>
/// Validates a golden set from YAML content.
/// </summary>
/// <param name="yamlContent">YAML string to parse and validate.</param>
/// <param name="options">Validation options.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Validation result with errors and warnings.</returns>
Task<GoldenSetValidationResult> ValidateYamlAsync(
string yamlContent,
ValidationOptions? options = null,
CancellationToken ct = default);
}
/// <summary>
/// Result of golden set validation.
/// </summary>
public sealed record GoldenSetValidationResult
{
/// <summary>
/// Whether the definition is valid (no errors).
/// </summary>
public required bool IsValid { get; init; }
/// <summary>
/// Validation errors (must be empty for IsValid to be true).
/// </summary>
public ImmutableArray<ValidationError> Errors { get; init; } = [];
/// <summary>
/// Validation warnings (do not affect IsValid).
/// </summary>
public ImmutableArray<ValidationWarning> Warnings { get; init; } = [];
/// <summary>
/// Parsed definition with computed content digest (null if errors).
/// </summary>
public GoldenSetDefinition? ParsedDefinition { get; init; }
/// <summary>
/// Content digest of the validated definition (null if errors).
/// </summary>
public string? ContentDigest { get; init; }
/// <summary>
/// Creates a successful validation result.
/// </summary>
public static GoldenSetValidationResult Success(
GoldenSetDefinition definition,
string contentDigest,
ImmutableArray<ValidationWarning> warnings = default) => new()
{
IsValid = true,
ParsedDefinition = definition with { ContentDigest = contentDigest },
ContentDigest = contentDigest,
Warnings = warnings.IsDefault ? [] : warnings
};
/// <summary>
/// Creates a failed validation result.
/// </summary>
public static GoldenSetValidationResult Failure(
ImmutableArray<ValidationError> errors,
ImmutableArray<ValidationWarning> warnings = default) => new()
{
IsValid = false,
Errors = errors,
Warnings = warnings.IsDefault ? [] : warnings
};
}
/// <summary>
/// A validation error (blocks acceptance).
/// </summary>
/// <param name="Code">Error code for programmatic handling.</param>
/// <param name="Message">Human-readable error message.</param>
/// <param name="Path">JSON path to the problematic field.</param>
public sealed record ValidationError(
string Code,
string Message,
string? Path = null);
/// <summary>
/// A validation warning (informational, does not block).
/// </summary>
/// <param name="Code">Warning code for programmatic handling.</param>
/// <param name="Message">Human-readable warning message.</param>
/// <param name="Path">JSON path to the problematic field.</param>
public sealed record ValidationWarning(
string Code,
string Message,
string? Path = null);
/// <summary>
/// Options controlling validation behavior.
/// </summary>
public sealed record ValidationOptions
{
/// <summary>
/// Validate that the CVE exists in NVD/OSV (requires network).
/// </summary>
public bool ValidateCveExists { get; init; } = true;
/// <summary>
/// Validate that sinks are in the registry.
/// </summary>
public bool ValidateSinks { get; init; } = true;
/// <summary>
/// Validate edge format strictly (must match bbN->bbM).
/// </summary>
public bool StrictEdgeFormat { get; init; } = true;
/// <summary>
/// Skip network calls (air-gap mode).
/// </summary>
public bool OfflineMode { get; init; } = false;
}
/// <summary>
/// Well-known validation error codes.
/// </summary>
public static class ValidationErrorCodes
{
/// <summary>Required field is missing.</summary>
public const string RequiredFieldMissing = "REQUIRED_FIELD_MISSING";
/// <summary>CVE not found in external databases.</summary>
public const string CveNotFound = "CVE_NOT_FOUND";
/// <summary>Invalid vulnerability ID format.</summary>
public const string InvalidIdFormat = "INVALID_ID_FORMAT";
/// <summary>Empty or whitespace function name.</summary>
public const string EmptyFunctionName = "EMPTY_FUNCTION_NAME";
/// <summary>Invalid basic block edge format.</summary>
public const string InvalidEdgeFormat = "INVALID_EDGE_FORMAT";
/// <summary>No targets defined.</summary>
public const string NoTargets = "NO_TARGETS";
/// <summary>Invalid constant format.</summary>
public const string InvalidConstant = "INVALID_CONSTANT";
/// <summary>Invalid timestamp format.</summary>
public const string InvalidTimestamp = "INVALID_TIMESTAMP";
/// <summary>Invalid schema version.</summary>
public const string InvalidSchemaVersion = "INVALID_SCHEMA_VERSION";
/// <summary>YAML parsing failed.</summary>
public const string YamlParseError = "YAML_PARSE_ERROR";
}
/// <summary>
/// Well-known validation warning codes.
/// </summary>
public static class ValidationWarningCodes
{
/// <summary>Sink not found in registry.</summary>
public const string UnknownSink = "UNKNOWN_SINK";
/// <summary>Edge format may be non-standard.</summary>
public const string NonStandardEdge = "NON_STANDARD_EDGE";
/// <summary>Constant may be malformed.</summary>
public const string MalformedConstant = "MALFORMED_CONSTANT";
/// <summary>Source reference may be invalid.</summary>
public const string InvalidSourceRef = "INVALID_SOURCE_REF";
/// <summary>No sinks defined for target.</summary>
public const string NoSinks = "NO_SINKS";
/// <summary>No edges defined for target.</summary>
public const string NoEdges = "NO_EDGES";
}