using System.Collections.Immutable; using System.Globalization; namespace StellaOps.BinaryIndex.GoldenSet; /// /// Represents ground-truth facts about a vulnerability's code-level manifestation. /// Hand-curated, reviewed like unit tests, tiny by design. /// public sealed record GoldenSetDefinition { /// /// Unique identifier (typically CVE ID, e.g., "CVE-2024-0727"). /// public required string Id { get; init; } /// /// Affected component name (e.g., "openssl", "glibc"). /// public required string Component { get; init; } /// /// Vulnerable code targets (functions, edges, sinks). /// public required ImmutableArray Targets { get; init; } /// /// Optional witness input for reproducing the vulnerability. /// public WitnessInput? Witness { get; init; } /// /// Metadata about the golden set. /// public required GoldenSetMetadata Metadata { get; init; } /// /// Content-addressed digest of the canonical form (computed, not user-provided). /// public string? ContentDigest { get; init; } } /// /// A specific vulnerable code target within a component. /// public sealed record VulnerableTarget { /// /// Function name (symbol or demangled name). /// public required string FunctionName { get; init; } /// /// Basic block edges that constitute the vulnerable path. /// public ImmutableArray Edges { get; init; } = []; /// /// Sink functions that are reached (e.g., "memcpy", "strcpy"). /// public ImmutableArray Sinks { get; init; } = []; /// /// Constants/magic values that identify the vulnerable code. /// public ImmutableArray Constants { get; init; } = []; /// /// Human-readable invariant that must hold for exploitation. /// public string? TaintInvariant { get; init; } /// /// Optional source file hint. /// public string? SourceFile { get; init; } /// /// Optional source line hint. /// public int? SourceLine { get; init; } } /// /// A basic block edge in the CFG. /// Format: "bbN->bbM" where N and M are block identifiers. /// public sealed record BasicBlockEdge { /// /// Source basic block identifier (e.g., "bb3"). /// public required string From { get; init; } /// /// Target basic block identifier (e.g., "bb7"). /// public required string To { get; init; } /// /// Parses an edge from string format "bbN->bbM". /// /// The edge string to parse. /// A new BasicBlockEdge instance. /// Thrown when the edge format is invalid. public static BasicBlockEdge Parse(string edge) { ArgumentException.ThrowIfNullOrWhiteSpace(edge); var parts = edge.Split("->", StringSplitOptions.TrimEntries); if (parts.Length != 2 || string.IsNullOrWhiteSpace(parts[0]) || string.IsNullOrWhiteSpace(parts[1])) { throw new FormatException( string.Format(CultureInfo.InvariantCulture, "Invalid edge format: {0}. Expected 'bbN->bbM'.", edge)); } return new BasicBlockEdge { From = parts[0], To = parts[1] }; } /// /// Tries to parse an edge from string format "bbN->bbM". /// /// The edge string to parse. /// The parsed edge, or null if parsing failed. /// True if parsing succeeded; otherwise, false. public static bool TryParse(string? edge, out BasicBlockEdge? result) { result = null; if (string.IsNullOrWhiteSpace(edge)) { return false; } var parts = edge.Split("->", StringSplitOptions.TrimEntries); if (parts.Length != 2 || string.IsNullOrWhiteSpace(parts[0]) || string.IsNullOrWhiteSpace(parts[1])) { return false; } result = new BasicBlockEdge { From = parts[0], To = parts[1] }; return true; } /// public override string ToString() => string.Concat(From, "->", To); } /// /// Witness input for reproducing the vulnerability. /// public sealed record WitnessInput { /// /// Command-line arguments to trigger the vulnerability. /// public ImmutableArray Arguments { get; init; } = []; /// /// Human-readable invariant/precondition. /// public string? Invariant { get; init; } /// /// Reference to PoC file (content-addressed, format: "sha256:..."). /// public string? PocFileRef { get; init; } } /// /// Metadata about the golden set. /// public sealed record GoldenSetMetadata { /// /// Author ID (who created the golden set). /// public required string AuthorId { get; init; } /// /// Creation timestamp (UTC). /// public required DateTimeOffset CreatedAt { get; init; } /// /// Source reference (advisory URL, commit hash, etc.). /// public required string SourceRef { get; init; } /// /// Reviewer ID (if reviewed). /// public string? ReviewedBy { get; init; } /// /// Review timestamp (UTC). /// public DateTimeOffset? ReviewedAt { get; init; } /// /// Classification tags (e.g., "memory-corruption", "heap-overflow"). /// public ImmutableArray Tags { get; init; } = []; /// /// Schema version for forward compatibility. /// public string SchemaVersion { get; init; } = GoldenSetConstants.CurrentSchemaVersion; } /// /// Status of a golden set in the corpus. /// public enum GoldenSetStatus { /// Draft, not yet reviewed. Draft, /// Under review. InReview, /// Approved and active. Approved, /// Deprecated (CVE retracted or superseded). Deprecated, /// Archived (historical reference only). Archived } /// /// Constants used throughout the Golden Set module. /// public static class GoldenSetConstants { /// /// Current schema version for golden set definitions. /// public const string CurrentSchemaVersion = "1.0.0"; /// /// Regex pattern for CVE IDs. /// public const string CveIdPattern = @"^CVE-\d{4}-\d{4,}$"; /// /// Regex pattern for GHSA IDs. /// public const string GhsaIdPattern = @"^GHSA-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}$"; /// /// Regex pattern for synthetic test fixture IDs. /// public const string SyntheticIdPattern = @"^SYNTH-\d{4}-[a-z][a-z0-9\-]*$"; /// /// Regex pattern for basic block edge format. /// public const string EdgePattern = @"^bb\d+->bb\d+$"; /// /// Regex pattern for content-addressed digest. /// public const string DigestPattern = @"^sha256:[a-f0-9]{64}$"; }