// ----------------------------------------------------------------------------- // FuncProofBuilder.cs // Sprint: SPRINT_20251226_009_SCANNER_funcproof // Tasks: FUNC-05, FUNC-07, FUNC-10, FUNC-11 — Symbol/function hashing and trace serialization // Description: Builds FuncProof documents from binary analysis results. // ----------------------------------------------------------------------------- using System.Collections.Immutable; using System.Security.Cryptography; using System.Text; using System.Text.Json; using StellaOps.Cryptography; using StellaOps.Scanner.Evidence.Models; namespace StellaOps.Scanner.Evidence; /// /// Builds FuncProof documents from binary analysis results. /// public sealed class FuncProofBuilder { private static readonly JsonSerializerOptions CanonicalJsonOptions = new() { WriteIndented = false, PropertyNamingPolicy = JsonNamingPolicy.CamelCase, DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull }; private ICryptoHash? _cryptoHash; private FuncProofGenerationOptions _options = new(); private string? _buildId; private string? _buildIdType; private string? _fileSha256; private string? _binaryFormat; private string? _architecture; private bool _isStripped; private readonly Dictionary _sections = new(); private readonly List _functions = []; private readonly List _traces = []; private FuncProofMetadata? _metadata; private string _generatorVersion = "1.0.0"; /// /// Sets the cryptographic hash provider for regional compliance. /// If not set, defaults to SHA-256 for backward compatibility. /// public FuncProofBuilder WithCryptoHash(ICryptoHash cryptoHash) { _cryptoHash = cryptoHash ?? throw new ArgumentNullException(nameof(cryptoHash)); return this; } /// /// Sets the generation options for configurable parameters. /// public FuncProofBuilder WithOptions(FuncProofGenerationOptions options) { _options = options ?? throw new ArgumentNullException(nameof(options)); return this; } /// /// Sets the binary identity information. /// public FuncProofBuilder WithBinaryIdentity( string buildId, string buildIdType, string fileSha256, string binaryFormat, string architecture, bool isStripped) { _buildId = buildId; _buildIdType = buildIdType; _fileSha256 = fileSha256; _binaryFormat = binaryFormat; _architecture = architecture; _isStripped = isStripped; return this; } /// /// Adds a section with hash. /// public FuncProofBuilder AddSection(string name, byte[] content, long offset, long? virtualAddress = null) { var hash = ComputeBlake3Hash(content); _sections[name] = new FuncProofSection { Hash = $"blake3:{hash}", Offset = offset, Size = content.Length, VirtualAddress = virtualAddress }; return this; } /// /// Adds a section with pre-computed hash. /// public FuncProofBuilder AddSection(string name, string hash, long offset, long size, long? virtualAddress = null) { _sections[name] = new FuncProofSection { Hash = hash.StartsWith("blake3:") ? hash : $"blake3:{hash}", Offset = offset, Size = size, VirtualAddress = virtualAddress }; return this; } /// /// Adds a function definition. /// public FuncProofFunctionBuilder AddFunction(string symbol, long startAddress, long endAddress) { var builder = new FuncProofFunctionBuilder(this, symbol, startAddress, endAddress); _functions.Add(builder); return builder; } /// /// Adds an entry→sink trace. /// public FuncProofBuilder AddTrace( string entrySymbolDigest, string sinkSymbolDigest, IReadOnlyList<(string callerDigest, string calleeDigest)> edges, IReadOnlyList? path = null) { var edgeListHash = ComputeEdgeListHash(edges); var hopCount = edges.Count; var maxHops = _options.MaxTraceHops; var truncated = hopCount > maxHops; var effectivePath = path ?? edges.Select(e => e.calleeDigest).Prepend(entrySymbolDigest).ToList(); if (effectivePath.Count > maxHops + 1) { effectivePath = effectivePath.Take(maxHops + 1).ToList(); truncated = true; } var trace = new FuncProofTrace { TraceId = $"trace-{_traces.Count + 1}", EdgeListHash = $"blake3:{edgeListHash}", HopCount = Math.Min(hopCount, maxHops), EntrySymbolDigest = entrySymbolDigest, SinkSymbolDigest = sinkSymbolDigest, Path = effectivePath.ToImmutableArray(), Truncated = truncated }; _traces.Add(trace); return this; } /// /// Sets build metadata. /// public FuncProofBuilder WithMetadata(FuncProofMetadata metadata) { _metadata = metadata; return this; } /// /// Sets the generator version. /// public FuncProofBuilder WithGeneratorVersion(string version) { _generatorVersion = version; return this; } /// /// Builds the FuncProof document. /// public FuncProof Build() { ArgumentException.ThrowIfNullOrWhiteSpace(_buildId); ArgumentException.ThrowIfNullOrWhiteSpace(_buildIdType); ArgumentException.ThrowIfNullOrWhiteSpace(_fileSha256); ArgumentException.ThrowIfNullOrWhiteSpace(_binaryFormat); ArgumentException.ThrowIfNullOrWhiteSpace(_architecture); var functions = _functions .Select(f => f.Build()) .OrderBy(f => f.Start, StringComparer.Ordinal) .ToImmutableArray(); var sections = _sections .OrderBy(kvp => kvp.Key, StringComparer.Ordinal) .ToImmutableDictionary(); var traces = _traces .OrderBy(t => t.TraceId, StringComparer.Ordinal) .ToImmutableArray(); // Build initial proof without proofId var proof = new FuncProof { ProofId = string.Empty, // Placeholder BuildId = _buildId, BuildIdType = _buildIdType, FileSha256 = _fileSha256, BinaryFormat = _binaryFormat, Architecture = _architecture, IsStripped = _isStripped, Sections = sections, Functions = functions, Traces = traces, Meta = _metadata, GeneratedAt = DateTimeOffset.UtcNow, GeneratorVersion = _generatorVersion }; // Compute content-addressable ID var proofId = ComputeProofId(proof, _cryptoHash); return proof with { ProofId = proofId }; } /// /// Computes the content-addressable proof ID. /// Uses ICryptoHash for regional compliance (defaults to BLAKE3 in "world" profile). /// public static string ComputeProofId(FuncProof proof, ICryptoHash? cryptoHash = null) { // Create a version without proofId for hashing var forHashing = proof with { ProofId = string.Empty }; var json = JsonSerializer.Serialize(forHashing, CanonicalJsonOptions); var bytes = Encoding.UTF8.GetBytes(json); var hash = ComputeHashForGraph(bytes, cryptoHash); // Prefix indicates algorithm used (determined by compliance profile) var algorithmPrefix = cryptoHash is not null ? "graph" : "sha256"; return $"{algorithmPrefix}:{hash}"; } /// /// Computes symbol digest: BLAKE3(symbol_name + "|" + start + "|" + end). /// Uses ICryptoHash for regional compliance (defaults to BLAKE3 in "world" profile). /// public static string ComputeSymbolDigest(string symbol, long start, long end, ICryptoHash? cryptoHash = null) { var input = $"{symbol}|{start:x}|{end:x}"; var bytes = Encoding.UTF8.GetBytes(input); return ComputeHashForGraph(bytes, cryptoHash); } /// /// Computes function range hash over the function bytes. /// Uses ICryptoHash for regional compliance (defaults to BLAKE3 in "world" profile). /// public static string ComputeFunctionHash(byte[] functionBytes, ICryptoHash? cryptoHash = null) { return ComputeHashForGraph(functionBytes, cryptoHash); } /// /// Computes edge list hash: hash of sorted edge pairs. /// Uses ICryptoHash for regional compliance (defaults to BLAKE3 in "world" profile). /// private static string ComputeEdgeListHash(IReadOnlyList<(string callerDigest, string calleeDigest)> edges, ICryptoHash? cryptoHash = null) { var sortedEdges = edges .Select(e => $"{e.callerDigest}→{e.calleeDigest}") .OrderBy(e => e, StringComparer.Ordinal) .ToList(); var edgeList = string.Join("\n", sortedEdges); var bytes = Encoding.UTF8.GetBytes(edgeList); return ComputeHashForGraph(bytes, cryptoHash); } /// /// Computes hash using the Graph purpose from ICryptoHash. /// Falls back to SHA-256 if no crypto hash provider is available. /// /// /// Default algorithm by compliance profile: /// - world: BLAKE3-256 /// - fips/kcmvp/eidas: SHA-256 /// - gost: GOST3411-2012-256 /// - sm: SM3 /// private static string ComputeHashForGraph(byte[] data, ICryptoHash? cryptoHash) { if (cryptoHash is not null) { // Use purpose-based hashing for compliance-aware algorithm selection return cryptoHash.ComputeHashHexForPurpose(data, HashPurpose.Graph); } // Fallback: use SHA-256 when no ICryptoHash provider is available // This maintains backward compatibility for tests and standalone usage var hash = SHA256.HashData(data); return Convert.ToHexString(hash).ToLowerInvariant(); } /// /// Computes a BLAKE3-style hash for section content. /// Uses ICryptoHash for regional compliance if set, otherwise uses SHA-256 as fallback. /// private string ComputeBlake3Hash(byte[] content) { return ComputeHashForGraph(content, _cryptoHash); } } /// /// Builder for individual function entries. /// public sealed class FuncProofFunctionBuilder { private readonly FuncProofBuilder _parent; private readonly string _symbol; private readonly long _startAddress; private readonly long _endAddress; private string? _mangledName; private byte[]? _functionBytes; private string? _precomputedHash; private double _confidence = 1.0; private string? _sourceFile; private int? _sourceLine; private bool _isEntrypoint; private string? _entrypointType; private bool _isSink; private string? _sinkVulnId; internal FuncProofFunctionBuilder(FuncProofBuilder parent, string symbol, long startAddress, long endAddress) { _parent = parent; _symbol = symbol; _startAddress = startAddress; _endAddress = endAddress; } /// /// Sets the mangled name if different from symbol. /// public FuncProofFunctionBuilder WithMangledName(string mangledName) { _mangledName = mangledName; return this; } /// /// Sets the function bytes for hash computation. /// public FuncProofFunctionBuilder WithBytes(byte[] bytes) { _functionBytes = bytes; return this; } /// /// Sets a pre-computed hash. /// public FuncProofFunctionBuilder WithHash(string hash) { _precomputedHash = hash; return this; } /// /// Sets the confidence level for boundary detection. /// public FuncProofFunctionBuilder WithConfidence(double confidence) { _confidence = confidence; return this; } /// /// Sets source location from DWARF info. /// public FuncProofFunctionBuilder WithSourceLocation(string file, int line) { _sourceFile = file; _sourceLine = line; return this; } /// /// Marks this function as an entrypoint. /// public FuncProofFunctionBuilder AsEntrypoint(string? type = null) { _isEntrypoint = true; _entrypointType = type; return this; } /// /// Marks this function as a vulnerable sink. /// public FuncProofFunctionBuilder AsSink(string? vulnId = null) { _isSink = true; _sinkVulnId = vulnId; return this; } /// /// Returns to the parent builder. /// public FuncProofBuilder Done() => _parent; /// /// Builds the function entry. /// internal FuncProofFunction Build() { var symbolDigest = FuncProofBuilder.ComputeSymbolDigest(_symbol, _startAddress, _endAddress); string hash; if (_precomputedHash != null) { hash = _precomputedHash.StartsWith("blake3:") ? _precomputedHash : $"blake3:{_precomputedHash}"; } else if (_functionBytes != null) { hash = $"blake3:{FuncProofBuilder.ComputeFunctionHash(_functionBytes)}"; } else { // Use symbol digest as fallback hash hash = $"blake3:{symbolDigest}"; } return new FuncProofFunction { Symbol = _symbol, MangledName = _mangledName, SymbolDigest = symbolDigest, Start = $"0x{_startAddress:x}", End = $"0x{_endAddress:x}", Size = _endAddress - _startAddress, Hash = hash, Confidence = _confidence, SourceFile = _sourceFile, SourceLine = _sourceLine, IsEntrypoint = _isEntrypoint, EntrypointType = _entrypointType, IsSink = _isSink, SinkVulnId = _sinkVulnId }; } }