using System.Collections.Immutable;
using System.Globalization;
namespace StellaOps.BinaryIndex.GoldenSet;
///
/// Represents ground-truth facts about a vulnerability's code-level manifestation.
/// Hand-curated, reviewed like unit tests, tiny by design.
///
public sealed record GoldenSetDefinition
{
///
/// Unique identifier (typically CVE ID, e.g., "CVE-2024-0727").
///
public required string Id { get; init; }
///
/// Affected component name (e.g., "openssl", "glibc").
///
public required string Component { get; init; }
///
/// Vulnerable code targets (functions, edges, sinks).
///
public required ImmutableArray Targets { get; init; }
///
/// Optional witness input for reproducing the vulnerability.
///
public WitnessInput? Witness { get; init; }
///
/// Metadata about the golden set.
///
public required GoldenSetMetadata Metadata { get; init; }
///
/// Content-addressed digest of the canonical form (computed, not user-provided).
///
public string? ContentDigest { get; init; }
}
///
/// A specific vulnerable code target within a component.
///
public sealed record VulnerableTarget
{
///
/// Function name (symbol or demangled name).
///
public required string FunctionName { get; init; }
///
/// Basic block edges that constitute the vulnerable path.
///
public ImmutableArray Edges { get; init; } = [];
///
/// Sink functions that are reached (e.g., "memcpy", "strcpy").
///
public ImmutableArray Sinks { get; init; } = [];
///
/// Constants/magic values that identify the vulnerable code.
///
public ImmutableArray Constants { get; init; } = [];
///
/// Human-readable invariant that must hold for exploitation.
///
public string? TaintInvariant { get; init; }
///
/// Optional source file hint.
///
public string? SourceFile { get; init; }
///
/// Optional source line hint.
///
public int? SourceLine { get; init; }
}
///
/// A basic block edge in the CFG.
/// Format: "bbN->bbM" where N and M are block identifiers.
///
public sealed record BasicBlockEdge
{
///
/// Source basic block identifier (e.g., "bb3").
///
public required string From { get; init; }
///
/// Target basic block identifier (e.g., "bb7").
///
public required string To { get; init; }
///
/// Parses an edge from string format "bbN->bbM".
///
/// The edge string to parse.
/// A new BasicBlockEdge instance.
/// Thrown when the edge format is invalid.
public static BasicBlockEdge Parse(string edge)
{
ArgumentException.ThrowIfNullOrWhiteSpace(edge);
var parts = edge.Split("->", StringSplitOptions.TrimEntries);
if (parts.Length != 2 || string.IsNullOrWhiteSpace(parts[0]) || string.IsNullOrWhiteSpace(parts[1]))
{
throw new FormatException(
string.Format(CultureInfo.InvariantCulture, "Invalid edge format: {0}. Expected 'bbN->bbM'.", edge));
}
return new BasicBlockEdge { From = parts[0], To = parts[1] };
}
///
/// Tries to parse an edge from string format "bbN->bbM".
///
/// The edge string to parse.
/// The parsed edge, or null if parsing failed.
/// True if parsing succeeded; otherwise, false.
public static bool TryParse(string? edge, out BasicBlockEdge? result)
{
result = null;
if (string.IsNullOrWhiteSpace(edge))
{
return false;
}
var parts = edge.Split("->", StringSplitOptions.TrimEntries);
if (parts.Length != 2 || string.IsNullOrWhiteSpace(parts[0]) || string.IsNullOrWhiteSpace(parts[1]))
{
return false;
}
result = new BasicBlockEdge { From = parts[0], To = parts[1] };
return true;
}
///
public override string ToString() => string.Concat(From, "->", To);
}
///
/// Witness input for reproducing the vulnerability.
///
public sealed record WitnessInput
{
///
/// Command-line arguments to trigger the vulnerability.
///
public ImmutableArray Arguments { get; init; } = [];
///
/// Human-readable invariant/precondition.
///
public string? Invariant { get; init; }
///
/// Reference to PoC file (content-addressed, format: "sha256:...").
///
public string? PocFileRef { get; init; }
}
///
/// Metadata about the golden set.
///
public sealed record GoldenSetMetadata
{
///
/// Author ID (who created the golden set).
///
public required string AuthorId { get; init; }
///
/// Creation timestamp (UTC).
///
public required DateTimeOffset CreatedAt { get; init; }
///
/// Source reference (advisory URL, commit hash, etc.).
///
public required string SourceRef { get; init; }
///
/// Reviewer ID (if reviewed).
///
public string? ReviewedBy { get; init; }
///
/// Review timestamp (UTC).
///
public DateTimeOffset? ReviewedAt { get; init; }
///
/// Classification tags (e.g., "memory-corruption", "heap-overflow").
///
public ImmutableArray Tags { get; init; } = [];
///
/// Schema version for forward compatibility.
///
public string SchemaVersion { get; init; } = GoldenSetConstants.CurrentSchemaVersion;
}
///
/// Status of a golden set in the corpus.
///
public enum GoldenSetStatus
{
/// Draft, not yet reviewed.
Draft,
/// Under review.
InReview,
/// Approved and active.
Approved,
/// Deprecated (CVE retracted or superseded).
Deprecated,
/// Archived (historical reference only).
Archived
}
///
/// Constants used throughout the Golden Set module.
///
public static class GoldenSetConstants
{
///
/// Current schema version for golden set definitions.
///
public const string CurrentSchemaVersion = "1.0.0";
///
/// Regex pattern for CVE IDs.
///
public const string CveIdPattern = @"^CVE-\d{4}-\d{4,}$";
///
/// Regex pattern for GHSA IDs.
///
public const string GhsaIdPattern = @"^GHSA-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}$";
///
/// Regex pattern for synthetic test fixture IDs.
///
public const string SyntheticIdPattern = @"^SYNTH-\d{4}-[a-z][a-z0-9\-]*$";
///
/// Regex pattern for basic block edge format.
///
public const string EdgePattern = @"^bb\d+->bb\d+$";
///
/// Regex pattern for content-addressed digest.
///
public const string DigestPattern = @"^sha256:[a-f0-9]{64}$";
}