// Copyright (c) StellaOps. All rights reserved. // Licensed under BUSL-1.1. See LICENSE in the project root. using System.Security.Cryptography; using System.Text; using System.Text.Json; using System.Text.RegularExpressions; using StellaOps.Symbols.Core.Models; namespace StellaOps.BinaryIndex.DeltaSig; /// /// Builder for deterministic hybrid diff artifacts. /// public interface IHybridDiffComposer { /// /// Generates semantic edits from source file pairs. /// SemanticEditScript GenerateSemanticEditScript(IReadOnlyList? sourceDiffs); /// /// Builds a canonical symbol map from a symbol manifest. /// SymbolMap BuildSymbolMap(SymbolManifest manifest, string? binaryDigest = null); /// /// Builds a deterministic fallback map from signature symbols when debug data is unavailable. /// SymbolMap BuildFallbackSymbolMap(DeltaSignature signature, BinaryReference binary, string role); /// /// Builds symbol patch plan by linking edits and symbol-level deltas. /// SymbolPatchPlan BuildSymbolPatchPlan( SemanticEditScript editScript, SymbolMap oldSymbolMap, SymbolMap newSymbolMap, IReadOnlyList deltas); /// /// Builds normalized patch manifest from function deltas. /// PatchManifest BuildPatchManifest( string buildId, string normalizationRecipeId, IReadOnlyList deltas); /// /// Composes all hybrid diff artifacts into one evidence object. /// HybridDiffEvidence Compose( IReadOnlyList? sourceDiffs, SymbolMap oldSymbolMap, SymbolMap newSymbolMap, IReadOnlyList deltas, string normalizationRecipeId); /// /// Computes deterministic digest of a serializable value. /// string ComputeDigest(T value); } /// /// Deterministic implementation of hybrid diff composition. /// public sealed class HybridDiffComposer : IHybridDiffComposer { private static readonly JsonSerializerOptions DigestJsonOptions = new() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase, WriteIndented = false }; private static readonly HashSet ControlKeywords = [ "if", "for", "while", "switch", "catch", "return", "sizeof" ]; private static readonly Regex FunctionAnchorRegex = new( @"(?[A-Za-z_][A-Za-z0-9_:\.]*)\s*\(", RegexOptions.Compiled | RegexOptions.CultureInvariant); /// public SemanticEditScript GenerateSemanticEditScript(IReadOnlyList? sourceDiffs) { var diffs = (sourceDiffs ?? Array.Empty()) .OrderBy(d => NormalizePath(d.Path), StringComparer.Ordinal) .ToList(); var edits = new List(); var treeMaterial = new StringBuilder(); foreach (var diff in diffs) { var normalizedPath = NormalizePath(diff.Path); var before = diff.BeforeContent ?? string.Empty; var after = diff.AfterContent ?? string.Empty; var beforeDigest = ComputeDigest(before); var afterDigest = ComputeDigest(after); treeMaterial .Append(normalizedPath) .Append('|') .Append(beforeDigest) .Append('|') .Append(afterDigest) .Append('\n'); if (string.Equals(beforeDigest, afterDigest, StringComparison.Ordinal)) { continue; } var beforeSymbols = ExtractSymbolBlocks(before); var afterSymbols = ExtractSymbolBlocks(after); if (beforeSymbols.Count == 0 && afterSymbols.Count == 0) { edits.Add(CreateFileEdit(normalizedPath, beforeDigest, afterDigest)); continue; } foreach (var symbol in beforeSymbols.Keys.Except(afterSymbols.Keys, StringComparer.Ordinal).OrderBy(v => v, StringComparer.Ordinal)) { var pre = beforeSymbols[symbol]; edits.Add(CreateSymbolEdit( normalizedPath, symbol, "remove", pre.Hash, null, new SourceSpan { StartLine = pre.StartLine, EndLine = pre.EndLine }, null)); } foreach (var symbol in afterSymbols.Keys.Except(beforeSymbols.Keys, StringComparer.Ordinal).OrderBy(v => v, StringComparer.Ordinal)) { var post = afterSymbols[symbol]; edits.Add(CreateSymbolEdit( normalizedPath, symbol, "add", null, post.Hash, null, new SourceSpan { StartLine = post.StartLine, EndLine = post.EndLine })); } foreach (var symbol in beforeSymbols.Keys.Intersect(afterSymbols.Keys, StringComparer.Ordinal).OrderBy(v => v, StringComparer.Ordinal)) { var pre = beforeSymbols[symbol]; var post = afterSymbols[symbol]; if (!string.Equals(pre.Hash, post.Hash, StringComparison.Ordinal)) { edits.Add(CreateSymbolEdit( normalizedPath, symbol, "update", pre.Hash, post.Hash, new SourceSpan { StartLine = pre.StartLine, EndLine = pre.EndLine }, new SourceSpan { StartLine = post.StartLine, EndLine = post.EndLine })); } } } var orderedEdits = edits .OrderBy(e => e.NodePath, StringComparer.Ordinal) .ThenBy(e => e.EditType, StringComparer.Ordinal) .ToList(); return new SemanticEditScript { SourceTreeDigest = ComputeDigest(treeMaterial.ToString()), Edits = orderedEdits }; } /// public SymbolMap BuildSymbolMap(SymbolManifest manifest, string? binaryDigest = null) { ArgumentNullException.ThrowIfNull(manifest); var sourcePathByCompiled = (manifest.SourceMappings ?? Array.Empty()) .GroupBy(m => m.CompiledPath, StringComparer.Ordinal) .ToDictionary(g => g.Key, g => g.First().SourcePath, StringComparer.Ordinal); var symbols = manifest.Symbols .OrderBy(s => s.Address) .ThenBy(s => s.MangledName, StringComparer.Ordinal) .Select(s => { var size = s.Size == 0 ? 1UL : s.Size; var mappedPath = ResolveSourcePath(s.SourceFile, sourcePathByCompiled); var ranges = mappedPath is null || s.SourceLine is null ? null : new[] { new SourceRange { File = NormalizePath(mappedPath), LineStart = s.SourceLine.Value, LineEnd = s.SourceLine.Value } }; return new SymbolMapEntry { Name = string.IsNullOrWhiteSpace(s.DemangledName) ? s.MangledName : s.DemangledName, Kind = MapSymbolKind(s.Type), AddressStart = s.Address, AddressEnd = s.Address + size - 1UL, Section = ".text", SourceRanges = ranges }; }) .ToList(); return new SymbolMap { BuildId = manifest.DebugId, BinaryDigest = binaryDigest, AddressSource = "manifest", Symbols = symbols }; } /// public SymbolMap BuildFallbackSymbolMap(DeltaSignature signature, BinaryReference binary, string role) { ArgumentNullException.ThrowIfNull(signature); ArgumentNullException.ThrowIfNull(binary); var sha = GetDigestString(binary.Digest); var buildId = string.IsNullOrWhiteSpace(sha) ? $"{role}-fallback" : $"{role}:{sha[..Math.Min(16, sha.Length)]}"; ulong nextAddress = string.Equals(role, "old", StringComparison.OrdinalIgnoreCase) ? 0x100000UL : 0x200000UL; var symbols = new List(); foreach (var symbol in signature.Symbols.OrderBy(s => s.Name, StringComparer.Ordinal)) { var size = symbol.SizeBytes <= 0 ? 1UL : (ulong)symbol.SizeBytes; var start = nextAddress; var end = start + size - 1UL; symbols.Add(new SymbolMapEntry { Name = symbol.Name, Kind = "function", AddressStart = start, AddressEnd = end, Section = symbol.Scope, SourceRanges = null }); var aligned = ((size + 15UL) / 16UL) * 16UL; nextAddress += aligned; } return new SymbolMap { BuildId = buildId, BinaryDigest = string.IsNullOrWhiteSpace(sha) ? null : $"sha256:{sha}", AddressSource = "synthetic-signature", Symbols = symbols }; } /// public SymbolPatchPlan BuildSymbolPatchPlan( SemanticEditScript editScript, SymbolMap oldSymbolMap, SymbolMap newSymbolMap, IReadOnlyList deltas) { ArgumentNullException.ThrowIfNull(editScript); ArgumentNullException.ThrowIfNull(oldSymbolMap); ArgumentNullException.ThrowIfNull(newSymbolMap); ArgumentNullException.ThrowIfNull(deltas); var editsDigest = ComputeDigest(editScript); var oldMapDigest = ComputeDigest(oldSymbolMap); var newMapDigest = ComputeDigest(newSymbolMap); var changes = deltas .OrderBy(d => d.FunctionId, StringComparer.Ordinal) .Select(delta => { var anchors = editScript.Edits .Where(e => IsAnchorMatch(e.Anchor, delta.FunctionId)) .Select(e => e.Anchor) .Distinct(StringComparer.Ordinal) .OrderBy(v => v, StringComparer.Ordinal) .ToList(); if (anchors.Count == 0) { anchors.Add(delta.FunctionId); } return new SymbolPatchChange { Symbol = delta.FunctionId, ChangeType = delta.ChangeType, AstAnchors = anchors, PreHash = delta.OldHash, PostHash = delta.NewHash, DeltaRef = "sha256:" + ComputeDigest($"{delta.FunctionId}|{delta.OldHash}|{delta.NewHash}|{delta.OldSize}|{delta.NewSize}") }; }) .ToList(); return new SymbolPatchPlan { BuildIdBefore = oldSymbolMap.BuildId, BuildIdAfter = newSymbolMap.BuildId, EditsDigest = editsDigest, SymbolMapDigestBefore = oldMapDigest, SymbolMapDigestAfter = newMapDigest, Changes = changes }; } /// public PatchManifest BuildPatchManifest( string buildId, string normalizationRecipeId, IReadOnlyList deltas) { ArgumentException.ThrowIfNullOrWhiteSpace(buildId); ArgumentException.ThrowIfNullOrWhiteSpace(normalizationRecipeId); ArgumentNullException.ThrowIfNull(deltas); var patches = deltas .OrderBy(d => d.FunctionId, StringComparer.Ordinal) .Select(delta => { var start = delta.Address < 0 ? 0UL : (ulong)delta.Address; var rangeSize = delta.NewSize > 0 ? delta.NewSize : delta.OldSize; var end = rangeSize > 0 ? start + (ulong)rangeSize - 1UL : start; return new SymbolPatchArtifact { Symbol = delta.FunctionId, AddressRange = $"0x{start:x}-0x{end:x}", DeltaDigest = "sha256:" + ComputeDigest($"{delta.FunctionId}|{delta.OldHash}|{delta.NewHash}|{delta.OldSize}|{delta.NewSize}|{delta.DiffLen}"), Pre = new PatchSizeHash { Size = delta.OldSize, Hash = string.IsNullOrWhiteSpace(delta.OldHash) ? "sha256:0" : delta.OldHash! }, Post = new PatchSizeHash { Size = delta.NewSize, Hash = string.IsNullOrWhiteSpace(delta.NewHash) ? "sha256:0" : delta.NewHash! } }; }) .ToList(); return new PatchManifest { BuildId = buildId, NormalizationRecipeId = normalizationRecipeId, Patches = patches }; } /// public HybridDiffEvidence Compose( IReadOnlyList? sourceDiffs, SymbolMap oldSymbolMap, SymbolMap newSymbolMap, IReadOnlyList deltas, string normalizationRecipeId) { var script = GenerateSemanticEditScript(sourceDiffs); var patchPlan = BuildSymbolPatchPlan(script, oldSymbolMap, newSymbolMap, deltas); var patchManifest = BuildPatchManifest(newSymbolMap.BuildId, normalizationRecipeId, deltas); var scriptDigest = ComputeDigest(script); var oldMapDigest = ComputeDigest(oldSymbolMap); var newMapDigest = ComputeDigest(newSymbolMap); var patchPlanDigest = ComputeDigest(patchPlan); var patchManifestDigest = ComputeDigest(patchManifest); return new HybridDiffEvidence { SemanticEditScript = script, OldSymbolMap = oldSymbolMap, NewSymbolMap = newSymbolMap, SymbolPatchPlan = patchPlan, PatchManifest = patchManifest, SemanticEditScriptDigest = scriptDigest, OldSymbolMapDigest = oldMapDigest, NewSymbolMapDigest = newMapDigest, SymbolPatchPlanDigest = patchPlanDigest, PatchManifestDigest = patchManifestDigest }; } /// public string ComputeDigest(T value) { var json = value is string s ? s : JsonSerializer.Serialize(value, DigestJsonOptions); var bytes = Encoding.UTF8.GetBytes(json); Span hash = stackalloc byte[SHA256.HashSizeInBytes]; SHA256.HashData(bytes, hash); return Convert.ToHexString(hash).ToLowerInvariant(); } private static string? ResolveSourcePath(string? sourceFile, IReadOnlyDictionary sourcePathByCompiled) { if (string.IsNullOrWhiteSpace(sourceFile)) { return null; } return sourcePathByCompiled.TryGetValue(sourceFile, out var mapped) ? mapped : sourceFile; } private static string MapSymbolKind(SymbolType type) { return type switch { SymbolType.Function => "function", SymbolType.Object or SymbolType.Variable or SymbolType.TlsData => "object", SymbolType.Section => "section", _ => "function" }; } private static string GetDigestString(IReadOnlyDictionary digest) { if (digest.TryGetValue("sha256", out var sha)) { return sha; } return digest.Values.FirstOrDefault() ?? string.Empty; } private static string NormalizePath(string path) { return path.Replace('\\', '/').Trim(); } private static SemanticEdit CreateFileEdit(string path, string beforeDigest, string afterDigest) { var type = string.IsNullOrWhiteSpace(beforeDigest) || beforeDigest == ComputeEmptyDigest() ? "add" : string.IsNullOrWhiteSpace(afterDigest) || afterDigest == ComputeEmptyDigest() ? "remove" : "update"; var nodePath = $"{path}::file"; var stableId = ComputeStableId(path, nodePath, type, beforeDigest, afterDigest); return new SemanticEdit { StableId = stableId, EditType = type, NodeKind = "file", NodePath = nodePath, Anchor = path, PreDigest = beforeDigest, PostDigest = afterDigest }; } private static SemanticEdit CreateSymbolEdit( string path, string symbol, string type, string? preDigest, string? postDigest, SourceSpan? preSpan, SourceSpan? postSpan) { var nodePath = $"{path}::{symbol}"; var stableId = ComputeStableId(path, nodePath, type, preDigest, postDigest); return new SemanticEdit { StableId = stableId, EditType = type, NodeKind = "method", NodePath = nodePath, Anchor = symbol, PreSpan = preSpan, PostSpan = postSpan, PreDigest = preDigest, PostDigest = postDigest }; } private static string ComputeStableId(string path, string nodePath, string type, string? preDigest, string? postDigest) { var material = $"{path}|{nodePath}|{type}|{preDigest}|{postDigest}"; var bytes = Encoding.UTF8.GetBytes(material); Span hash = stackalloc byte[SHA256.HashSizeInBytes]; SHA256.HashData(bytes, hash); return Convert.ToHexString(hash).ToLowerInvariant(); } private static Dictionary ExtractSymbolBlocks(string content) { var lines = content.Split('\n'); var blocks = new Dictionary(StringComparer.Ordinal); for (var i = 0; i < lines.Length; i++) { var line = lines[i]; var match = FunctionAnchorRegex.Match(line); if (!match.Success) { continue; } var name = match.Groups["name"].Value; if (ControlKeywords.Contains(name)) { continue; } var startLine = i + 1; var endLine = startLine; var depth = CountChar(line, '{') - CountChar(line, '}'); var foundOpening = line.Contains('{', StringComparison.Ordinal); var j = i; while (foundOpening && depth > 0 && j + 1 < lines.Length) { j++; var candidate = lines[j]; depth += CountChar(candidate, '{'); depth -= CountChar(candidate, '}'); } if (foundOpening) { endLine = j + 1; i = j; } var sliceStart = startLine - 1; var sliceLength = endLine - startLine + 1; var blockContent = string.Join("\n", lines.Skip(sliceStart).Take(sliceLength)); var blockHash = ComputeBlockHash(blockContent); blocks[name] = new SymbolBlock(name, blockHash, startLine, endLine); } return blocks; } private static int CountChar(string value, char token) { var count = 0; foreach (var c in value) { if (c == token) { count++; } } return count; } private static string ComputeBlockHash(string content) { var bytes = Encoding.UTF8.GetBytes(content); Span hash = stackalloc byte[SHA256.HashSizeInBytes]; SHA256.HashData(bytes, hash); return Convert.ToHexString(hash).ToLowerInvariant(); } private static bool IsAnchorMatch(string anchor, string functionId) { if (string.Equals(anchor, functionId, StringComparison.Ordinal)) { return true; } return anchor.EndsWith($".{functionId}", StringComparison.Ordinal) || anchor.EndsWith($"::{functionId}", StringComparison.Ordinal) || anchor.Contains(functionId, StringComparison.Ordinal); } private static string ComputeEmptyDigest() { Span hash = stackalloc byte[SHA256.HashSizeInBytes]; SHA256.HashData(Array.Empty(), hash); return Convert.ToHexString(hash).ToLowerInvariant(); } private sealed record SymbolBlock(string Name, string Hash, int StartLine, int EndLine); }