Files
git.stella-ops.org/src/BinaryIndex/__Libraries/StellaOps.BinaryIndex.GoldenSet/Models/GoldenSetDefinition.cs
2026-02-01 21:37:40 +02:00

267 lines
7.7 KiB
C#

using System.Collections.Immutable;
using System.Globalization;
namespace StellaOps.BinaryIndex.GoldenSet;
/// <summary>
/// Represents ground-truth facts about a vulnerability's code-level manifestation.
/// Hand-curated, reviewed like unit tests, tiny by design.
/// </summary>
public sealed record GoldenSetDefinition
{
/// <summary>
/// Unique identifier (typically CVE ID, e.g., "CVE-2024-0727").
/// </summary>
public required string Id { get; init; }
/// <summary>
/// Affected component name (e.g., "openssl", "glibc").
/// </summary>
public required string Component { get; init; }
/// <summary>
/// Vulnerable code targets (functions, edges, sinks).
/// </summary>
public required ImmutableArray<VulnerableTarget> Targets { get; init; }
/// <summary>
/// Optional witness input for reproducing the vulnerability.
/// </summary>
public WitnessInput? Witness { get; init; }
/// <summary>
/// Metadata about the golden set.
/// </summary>
public required GoldenSetMetadata Metadata { get; init; }
/// <summary>
/// Content-addressed digest of the canonical form (computed, not user-provided).
/// </summary>
public string? ContentDigest { get; init; }
}
/// <summary>
/// A specific vulnerable code target within a component.
/// </summary>
public sealed record VulnerableTarget
{
/// <summary>
/// Function name (symbol or demangled name).
/// </summary>
public required string FunctionName { get; init; }
/// <summary>
/// Basic block edges that constitute the vulnerable path.
/// </summary>
public ImmutableArray<BasicBlockEdge> Edges { get; init; } = [];
/// <summary>
/// Sink functions that are reached (e.g., "memcpy", "strcpy").
/// </summary>
public ImmutableArray<string> Sinks { get; init; } = [];
/// <summary>
/// Constants/magic values that identify the vulnerable code.
/// </summary>
public ImmutableArray<string> Constants { get; init; } = [];
/// <summary>
/// Human-readable invariant that must hold for exploitation.
/// </summary>
public string? TaintInvariant { get; init; }
/// <summary>
/// Optional source file hint.
/// </summary>
public string? SourceFile { get; init; }
/// <summary>
/// Optional source line hint.
/// </summary>
public int? SourceLine { get; init; }
}
/// <summary>
/// A basic block edge in the CFG.
/// Format: "bbN->bbM" where N and M are block identifiers.
/// </summary>
public sealed record BasicBlockEdge
{
/// <summary>
/// Source basic block identifier (e.g., "bb3").
/// </summary>
public required string From { get; init; }
/// <summary>
/// Target basic block identifier (e.g., "bb7").
/// </summary>
public required string To { get; init; }
/// <summary>
/// Parses an edge from string format "bbN->bbM".
/// </summary>
/// <param name="edge">The edge string to parse.</param>
/// <returns>A new BasicBlockEdge instance.</returns>
/// <exception cref="FormatException">Thrown when the edge format is invalid.</exception>
public static BasicBlockEdge Parse(string edge)
{
ArgumentException.ThrowIfNullOrWhiteSpace(edge);
var parts = edge.Split("->", StringSplitOptions.TrimEntries);
if (parts.Length != 2 || string.IsNullOrWhiteSpace(parts[0]) || string.IsNullOrWhiteSpace(parts[1]))
{
throw new FormatException(
string.Format(CultureInfo.InvariantCulture, "Invalid edge format: {0}. Expected 'bbN->bbM'.", edge));
}
return new BasicBlockEdge { From = parts[0], To = parts[1] };
}
/// <summary>
/// Tries to parse an edge from string format "bbN->bbM".
/// </summary>
/// <param name="edge">The edge string to parse.</param>
/// <param name="result">The parsed edge, or null if parsing failed.</param>
/// <returns>True if parsing succeeded; otherwise, false.</returns>
public static bool TryParse(string? edge, out BasicBlockEdge? result)
{
result = null;
if (string.IsNullOrWhiteSpace(edge))
{
return false;
}
var parts = edge.Split("->", StringSplitOptions.TrimEntries);
if (parts.Length != 2 || string.IsNullOrWhiteSpace(parts[0]) || string.IsNullOrWhiteSpace(parts[1]))
{
return false;
}
result = new BasicBlockEdge { From = parts[0], To = parts[1] };
return true;
}
/// <inheritdoc />
public override string ToString() => string.Concat(From, "->", To);
}
/// <summary>
/// Witness input for reproducing the vulnerability.
/// </summary>
public sealed record WitnessInput
{
/// <summary>
/// Command-line arguments to trigger the vulnerability.
/// </summary>
public ImmutableArray<string> Arguments { get; init; } = [];
/// <summary>
/// Human-readable invariant/precondition.
/// </summary>
public string? Invariant { get; init; }
/// <summary>
/// Reference to PoC file (content-addressed, format: "sha256:...").
/// </summary>
public string? PocFileRef { get; init; }
}
/// <summary>
/// Metadata about the golden set.
/// </summary>
public sealed record GoldenSetMetadata
{
/// <summary>
/// Author ID (who created the golden set).
/// </summary>
public required string AuthorId { get; init; }
/// <summary>
/// Creation timestamp (UTC).
/// </summary>
public required DateTimeOffset CreatedAt { get; init; }
/// <summary>
/// Source reference (advisory URL, commit hash, etc.).
/// </summary>
public required string SourceRef { get; init; }
/// <summary>
/// Reviewer ID (if reviewed).
/// </summary>
public string? ReviewedBy { get; init; }
/// <summary>
/// Review timestamp (UTC).
/// </summary>
public DateTimeOffset? ReviewedAt { get; init; }
/// <summary>
/// Classification tags (e.g., "memory-corruption", "heap-overflow").
/// </summary>
public ImmutableArray<string> Tags { get; init; } = [];
/// <summary>
/// Schema version for forward compatibility.
/// </summary>
public string SchemaVersion { get; init; } = GoldenSetConstants.CurrentSchemaVersion;
}
/// <summary>
/// Status of a golden set in the corpus.
/// </summary>
public enum GoldenSetStatus
{
/// <summary>Draft, not yet reviewed.</summary>
Draft,
/// <summary>Under review.</summary>
InReview,
/// <summary>Approved and active.</summary>
Approved,
/// <summary>Deprecated (CVE retracted or superseded).</summary>
Deprecated,
/// <summary>Archived (historical reference only).</summary>
Archived
}
/// <summary>
/// Constants used throughout the Golden Set module.
/// </summary>
public static class GoldenSetConstants
{
/// <summary>
/// Current schema version for golden set definitions.
/// </summary>
public const string CurrentSchemaVersion = "1.0.0";
/// <summary>
/// Regex pattern for CVE IDs.
/// </summary>
public const string CveIdPattern = @"^CVE-\d{4}-\d{4,}$";
/// <summary>
/// Regex pattern for GHSA IDs.
/// </summary>
public const string GhsaIdPattern = @"^GHSA-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}$";
/// <summary>
/// Regex pattern for synthetic test fixture IDs.
/// </summary>
public const string SyntheticIdPattern = @"^SYNTH-\d{4}-[a-z][a-z0-9\-]*$";
/// <summary>
/// Regex pattern for basic block edge format.
/// </summary>
public const string EdgePattern = @"^bb\d+->bb\d+$";
/// <summary>
/// Regex pattern for content-addressed digest.
/// </summary>
public const string DigestPattern = @"^sha256:[a-f0-9]{64}$";
}