267 lines
7.7 KiB
C#
267 lines
7.7 KiB
C#
using System.Collections.Immutable;
|
|
using System.Globalization;
|
|
|
|
namespace StellaOps.BinaryIndex.GoldenSet;
|
|
|
|
/// <summary>
|
|
/// Represents ground-truth facts about a vulnerability's code-level manifestation.
|
|
/// Hand-curated, reviewed like unit tests, tiny by design.
|
|
/// </summary>
|
|
public sealed record GoldenSetDefinition
|
|
{
|
|
/// <summary>
|
|
/// Unique identifier (typically CVE ID, e.g., "CVE-2024-0727").
|
|
/// </summary>
|
|
public required string Id { get; init; }
|
|
|
|
/// <summary>
|
|
/// Affected component name (e.g., "openssl", "glibc").
|
|
/// </summary>
|
|
public required string Component { get; init; }
|
|
|
|
/// <summary>
|
|
/// Vulnerable code targets (functions, edges, sinks).
|
|
/// </summary>
|
|
public required ImmutableArray<VulnerableTarget> Targets { get; init; }
|
|
|
|
/// <summary>
|
|
/// Optional witness input for reproducing the vulnerability.
|
|
/// </summary>
|
|
public WitnessInput? Witness { get; init; }
|
|
|
|
/// <summary>
|
|
/// Metadata about the golden set.
|
|
/// </summary>
|
|
public required GoldenSetMetadata Metadata { get; init; }
|
|
|
|
/// <summary>
|
|
/// Content-addressed digest of the canonical form (computed, not user-provided).
|
|
/// </summary>
|
|
public string? ContentDigest { get; init; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// A specific vulnerable code target within a component.
|
|
/// </summary>
|
|
public sealed record VulnerableTarget
|
|
{
|
|
/// <summary>
|
|
/// Function name (symbol or demangled name).
|
|
/// </summary>
|
|
public required string FunctionName { get; init; }
|
|
|
|
/// <summary>
|
|
/// Basic block edges that constitute the vulnerable path.
|
|
/// </summary>
|
|
public ImmutableArray<BasicBlockEdge> Edges { get; init; } = [];
|
|
|
|
/// <summary>
|
|
/// Sink functions that are reached (e.g., "memcpy", "strcpy").
|
|
/// </summary>
|
|
public ImmutableArray<string> Sinks { get; init; } = [];
|
|
|
|
/// <summary>
|
|
/// Constants/magic values that identify the vulnerable code.
|
|
/// </summary>
|
|
public ImmutableArray<string> Constants { get; init; } = [];
|
|
|
|
/// <summary>
|
|
/// Human-readable invariant that must hold for exploitation.
|
|
/// </summary>
|
|
public string? TaintInvariant { get; init; }
|
|
|
|
/// <summary>
|
|
/// Optional source file hint.
|
|
/// </summary>
|
|
public string? SourceFile { get; init; }
|
|
|
|
/// <summary>
|
|
/// Optional source line hint.
|
|
/// </summary>
|
|
public int? SourceLine { get; init; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// A basic block edge in the CFG.
|
|
/// Format: "bbN->bbM" where N and M are block identifiers.
|
|
/// </summary>
|
|
public sealed record BasicBlockEdge
|
|
{
|
|
/// <summary>
|
|
/// Source basic block identifier (e.g., "bb3").
|
|
/// </summary>
|
|
public required string From { get; init; }
|
|
|
|
/// <summary>
|
|
/// Target basic block identifier (e.g., "bb7").
|
|
/// </summary>
|
|
public required string To { get; init; }
|
|
|
|
/// <summary>
|
|
/// Parses an edge from string format "bbN->bbM".
|
|
/// </summary>
|
|
/// <param name="edge">The edge string to parse.</param>
|
|
/// <returns>A new BasicBlockEdge instance.</returns>
|
|
/// <exception cref="FormatException">Thrown when the edge format is invalid.</exception>
|
|
public static BasicBlockEdge Parse(string edge)
|
|
{
|
|
ArgumentException.ThrowIfNullOrWhiteSpace(edge);
|
|
|
|
var parts = edge.Split("->", StringSplitOptions.TrimEntries);
|
|
if (parts.Length != 2 || string.IsNullOrWhiteSpace(parts[0]) || string.IsNullOrWhiteSpace(parts[1]))
|
|
{
|
|
throw new FormatException(
|
|
string.Format(CultureInfo.InvariantCulture, "Invalid edge format: {0}. Expected 'bbN->bbM'.", edge));
|
|
}
|
|
|
|
return new BasicBlockEdge { From = parts[0], To = parts[1] };
|
|
}
|
|
|
|
/// <summary>
|
|
/// Tries to parse an edge from string format "bbN->bbM".
|
|
/// </summary>
|
|
/// <param name="edge">The edge string to parse.</param>
|
|
/// <param name="result">The parsed edge, or null if parsing failed.</param>
|
|
/// <returns>True if parsing succeeded; otherwise, false.</returns>
|
|
public static bool TryParse(string? edge, out BasicBlockEdge? result)
|
|
{
|
|
result = null;
|
|
|
|
if (string.IsNullOrWhiteSpace(edge))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
var parts = edge.Split("->", StringSplitOptions.TrimEntries);
|
|
if (parts.Length != 2 || string.IsNullOrWhiteSpace(parts[0]) || string.IsNullOrWhiteSpace(parts[1]))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
result = new BasicBlockEdge { From = parts[0], To = parts[1] };
|
|
return true;
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public override string ToString() => string.Concat(From, "->", To);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Witness input for reproducing the vulnerability.
|
|
/// </summary>
|
|
public sealed record WitnessInput
|
|
{
|
|
/// <summary>
|
|
/// Command-line arguments to trigger the vulnerability.
|
|
/// </summary>
|
|
public ImmutableArray<string> Arguments { get; init; } = [];
|
|
|
|
/// <summary>
|
|
/// Human-readable invariant/precondition.
|
|
/// </summary>
|
|
public string? Invariant { get; init; }
|
|
|
|
/// <summary>
|
|
/// Reference to PoC file (content-addressed, format: "sha256:...").
|
|
/// </summary>
|
|
public string? PocFileRef { get; init; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// Metadata about the golden set.
|
|
/// </summary>
|
|
public sealed record GoldenSetMetadata
|
|
{
|
|
/// <summary>
|
|
/// Author ID (who created the golden set).
|
|
/// </summary>
|
|
public required string AuthorId { get; init; }
|
|
|
|
/// <summary>
|
|
/// Creation timestamp (UTC).
|
|
/// </summary>
|
|
public required DateTimeOffset CreatedAt { get; init; }
|
|
|
|
/// <summary>
|
|
/// Source reference (advisory URL, commit hash, etc.).
|
|
/// </summary>
|
|
public required string SourceRef { get; init; }
|
|
|
|
/// <summary>
|
|
/// Reviewer ID (if reviewed).
|
|
/// </summary>
|
|
public string? ReviewedBy { get; init; }
|
|
|
|
/// <summary>
|
|
/// Review timestamp (UTC).
|
|
/// </summary>
|
|
public DateTimeOffset? ReviewedAt { get; init; }
|
|
|
|
/// <summary>
|
|
/// Classification tags (e.g., "memory-corruption", "heap-overflow").
|
|
/// </summary>
|
|
public ImmutableArray<string> Tags { get; init; } = [];
|
|
|
|
/// <summary>
|
|
/// Schema version for forward compatibility.
|
|
/// </summary>
|
|
public string SchemaVersion { get; init; } = GoldenSetConstants.CurrentSchemaVersion;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Status of a golden set in the corpus.
|
|
/// </summary>
|
|
public enum GoldenSetStatus
|
|
{
|
|
/// <summary>Draft, not yet reviewed.</summary>
|
|
Draft,
|
|
|
|
/// <summary>Under review.</summary>
|
|
InReview,
|
|
|
|
/// <summary>Approved and active.</summary>
|
|
Approved,
|
|
|
|
/// <summary>Deprecated (CVE retracted or superseded).</summary>
|
|
Deprecated,
|
|
|
|
/// <summary>Archived (historical reference only).</summary>
|
|
Archived
|
|
}
|
|
|
|
/// <summary>
|
|
/// Constants used throughout the Golden Set module.
|
|
/// </summary>
|
|
public static class GoldenSetConstants
|
|
{
|
|
/// <summary>
|
|
/// Current schema version for golden set definitions.
|
|
/// </summary>
|
|
public const string CurrentSchemaVersion = "1.0.0";
|
|
|
|
/// <summary>
|
|
/// Regex pattern for CVE IDs.
|
|
/// </summary>
|
|
public const string CveIdPattern = @"^CVE-\d{4}-\d{4,}$";
|
|
|
|
/// <summary>
|
|
/// Regex pattern for GHSA IDs.
|
|
/// </summary>
|
|
public const string GhsaIdPattern = @"^GHSA-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}$";
|
|
|
|
/// <summary>
|
|
/// Regex pattern for synthetic test fixture IDs.
|
|
/// </summary>
|
|
public const string SyntheticIdPattern = @"^SYNTH-\d{4}-[a-z][a-z0-9\-]*$";
|
|
|
|
/// <summary>
|
|
/// Regex pattern for basic block edge format.
|
|
/// </summary>
|
|
public const string EdgePattern = @"^bb\d+->bb\d+$";
|
|
|
|
/// <summary>
|
|
/// Regex pattern for content-addressed digest.
|
|
/// </summary>
|
|
public const string DigestPattern = @"^sha256:[a-f0-9]{64}$";
|
|
}
|