// Copyright (c) StellaOps. All rights reserved.
// Licensed under BUSL-1.1. See LICENSE in the project root.
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using System.Text.RegularExpressions;
using StellaOps.Symbols.Core.Models;
namespace StellaOps.BinaryIndex.DeltaSig;
///
/// Builder for deterministic hybrid diff artifacts.
///
public interface IHybridDiffComposer
{
///
/// Generates semantic edits from source file pairs.
///
SemanticEditScript GenerateSemanticEditScript(IReadOnlyList? sourceDiffs);
///
/// Builds a canonical symbol map from a symbol manifest.
///
SymbolMap BuildSymbolMap(SymbolManifest manifest, string? binaryDigest = null);
///
/// Builds a deterministic fallback map from signature symbols when debug data is unavailable.
///
SymbolMap BuildFallbackSymbolMap(DeltaSignature signature, BinaryReference binary, string role);
///
/// Builds symbol patch plan by linking edits and symbol-level deltas.
///
SymbolPatchPlan BuildSymbolPatchPlan(
SemanticEditScript editScript,
SymbolMap oldSymbolMap,
SymbolMap newSymbolMap,
IReadOnlyList deltas);
///
/// Builds normalized patch manifest from function deltas.
///
PatchManifest BuildPatchManifest(
string buildId,
string normalizationRecipeId,
IReadOnlyList deltas);
///
/// Composes all hybrid diff artifacts into one evidence object.
///
HybridDiffEvidence Compose(
IReadOnlyList? sourceDiffs,
SymbolMap oldSymbolMap,
SymbolMap newSymbolMap,
IReadOnlyList deltas,
string normalizationRecipeId);
///
/// Computes deterministic digest of a serializable value.
///
string ComputeDigest(T value);
}
///
/// Deterministic implementation of hybrid diff composition.
///
public sealed class HybridDiffComposer : IHybridDiffComposer
{
private static readonly JsonSerializerOptions DigestJsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false
};
private static readonly HashSet ControlKeywords =
[
"if",
"for",
"while",
"switch",
"catch",
"return",
"sizeof"
];
private static readonly Regex FunctionAnchorRegex = new(
@"(?[A-Za-z_][A-Za-z0-9_:\.]*)\s*\(",
RegexOptions.Compiled | RegexOptions.CultureInvariant);
///
public SemanticEditScript GenerateSemanticEditScript(IReadOnlyList? sourceDiffs)
{
var diffs = (sourceDiffs ?? Array.Empty())
.OrderBy(d => NormalizePath(d.Path), StringComparer.Ordinal)
.ToList();
var edits = new List();
var treeMaterial = new StringBuilder();
foreach (var diff in diffs)
{
var normalizedPath = NormalizePath(diff.Path);
var before = diff.BeforeContent ?? string.Empty;
var after = diff.AfterContent ?? string.Empty;
var beforeDigest = ComputeDigest(before);
var afterDigest = ComputeDigest(after);
treeMaterial
.Append(normalizedPath)
.Append('|')
.Append(beforeDigest)
.Append('|')
.Append(afterDigest)
.Append('\n');
if (string.Equals(beforeDigest, afterDigest, StringComparison.Ordinal))
{
continue;
}
var beforeSymbols = ExtractSymbolBlocks(before);
var afterSymbols = ExtractSymbolBlocks(after);
if (beforeSymbols.Count == 0 && afterSymbols.Count == 0)
{
edits.Add(CreateFileEdit(normalizedPath, beforeDigest, afterDigest));
continue;
}
foreach (var symbol in beforeSymbols.Keys.Except(afterSymbols.Keys, StringComparer.Ordinal).OrderBy(v => v, StringComparer.Ordinal))
{
var pre = beforeSymbols[symbol];
edits.Add(CreateSymbolEdit(
normalizedPath,
symbol,
"remove",
pre.Hash,
null,
new SourceSpan { StartLine = pre.StartLine, EndLine = pre.EndLine },
null));
}
foreach (var symbol in afterSymbols.Keys.Except(beforeSymbols.Keys, StringComparer.Ordinal).OrderBy(v => v, StringComparer.Ordinal))
{
var post = afterSymbols[symbol];
edits.Add(CreateSymbolEdit(
normalizedPath,
symbol,
"add",
null,
post.Hash,
null,
new SourceSpan { StartLine = post.StartLine, EndLine = post.EndLine }));
}
foreach (var symbol in beforeSymbols.Keys.Intersect(afterSymbols.Keys, StringComparer.Ordinal).OrderBy(v => v, StringComparer.Ordinal))
{
var pre = beforeSymbols[symbol];
var post = afterSymbols[symbol];
if (!string.Equals(pre.Hash, post.Hash, StringComparison.Ordinal))
{
edits.Add(CreateSymbolEdit(
normalizedPath,
symbol,
"update",
pre.Hash,
post.Hash,
new SourceSpan { StartLine = pre.StartLine, EndLine = pre.EndLine },
new SourceSpan { StartLine = post.StartLine, EndLine = post.EndLine }));
}
}
}
var orderedEdits = edits
.OrderBy(e => e.NodePath, StringComparer.Ordinal)
.ThenBy(e => e.EditType, StringComparer.Ordinal)
.ToList();
return new SemanticEditScript
{
SourceTreeDigest = ComputeDigest(treeMaterial.ToString()),
Edits = orderedEdits
};
}
///
public SymbolMap BuildSymbolMap(SymbolManifest manifest, string? binaryDigest = null)
{
ArgumentNullException.ThrowIfNull(manifest);
var sourcePathByCompiled = (manifest.SourceMappings ?? Array.Empty())
.GroupBy(m => m.CompiledPath, StringComparer.Ordinal)
.ToDictionary(g => g.Key, g => g.First().SourcePath, StringComparer.Ordinal);
var symbols = manifest.Symbols
.OrderBy(s => s.Address)
.ThenBy(s => s.MangledName, StringComparer.Ordinal)
.Select(s =>
{
var size = s.Size == 0 ? 1UL : s.Size;
var mappedPath = ResolveSourcePath(s.SourceFile, sourcePathByCompiled);
var ranges = mappedPath is null || s.SourceLine is null
? null
: new[]
{
new SourceRange
{
File = NormalizePath(mappedPath),
LineStart = s.SourceLine.Value,
LineEnd = s.SourceLine.Value
}
};
return new SymbolMapEntry
{
Name = string.IsNullOrWhiteSpace(s.DemangledName) ? s.MangledName : s.DemangledName,
Kind = MapSymbolKind(s.Type),
AddressStart = s.Address,
AddressEnd = s.Address + size - 1UL,
Section = ".text",
SourceRanges = ranges
};
})
.ToList();
return new SymbolMap
{
BuildId = manifest.DebugId,
BinaryDigest = binaryDigest,
AddressSource = "manifest",
Symbols = symbols
};
}
///
public SymbolMap BuildFallbackSymbolMap(DeltaSignature signature, BinaryReference binary, string role)
{
ArgumentNullException.ThrowIfNull(signature);
ArgumentNullException.ThrowIfNull(binary);
var sha = GetDigestString(binary.Digest);
var buildId = string.IsNullOrWhiteSpace(sha)
? $"{role}-fallback"
: $"{role}:{sha[..Math.Min(16, sha.Length)]}";
ulong nextAddress = string.Equals(role, "old", StringComparison.OrdinalIgnoreCase)
? 0x100000UL
: 0x200000UL;
var symbols = new List();
foreach (var symbol in signature.Symbols.OrderBy(s => s.Name, StringComparer.Ordinal))
{
var size = symbol.SizeBytes <= 0 ? 1UL : (ulong)symbol.SizeBytes;
var start = nextAddress;
var end = start + size - 1UL;
symbols.Add(new SymbolMapEntry
{
Name = symbol.Name,
Kind = "function",
AddressStart = start,
AddressEnd = end,
Section = symbol.Scope,
SourceRanges = null
});
var aligned = ((size + 15UL) / 16UL) * 16UL;
nextAddress += aligned;
}
return new SymbolMap
{
BuildId = buildId,
BinaryDigest = string.IsNullOrWhiteSpace(sha) ? null : $"sha256:{sha}",
AddressSource = "synthetic-signature",
Symbols = symbols
};
}
///
public SymbolPatchPlan BuildSymbolPatchPlan(
SemanticEditScript editScript,
SymbolMap oldSymbolMap,
SymbolMap newSymbolMap,
IReadOnlyList deltas)
{
ArgumentNullException.ThrowIfNull(editScript);
ArgumentNullException.ThrowIfNull(oldSymbolMap);
ArgumentNullException.ThrowIfNull(newSymbolMap);
ArgumentNullException.ThrowIfNull(deltas);
var editsDigest = ComputeDigest(editScript);
var oldMapDigest = ComputeDigest(oldSymbolMap);
var newMapDigest = ComputeDigest(newSymbolMap);
var changes = deltas
.OrderBy(d => d.FunctionId, StringComparer.Ordinal)
.Select(delta =>
{
var anchors = editScript.Edits
.Where(e => IsAnchorMatch(e.Anchor, delta.FunctionId))
.Select(e => e.Anchor)
.Distinct(StringComparer.Ordinal)
.OrderBy(v => v, StringComparer.Ordinal)
.ToList();
if (anchors.Count == 0)
{
anchors.Add(delta.FunctionId);
}
return new SymbolPatchChange
{
Symbol = delta.FunctionId,
ChangeType = delta.ChangeType,
AstAnchors = anchors,
PreHash = delta.OldHash,
PostHash = delta.NewHash,
DeltaRef = "sha256:" + ComputeDigest($"{delta.FunctionId}|{delta.OldHash}|{delta.NewHash}|{delta.OldSize}|{delta.NewSize}")
};
})
.ToList();
return new SymbolPatchPlan
{
BuildIdBefore = oldSymbolMap.BuildId,
BuildIdAfter = newSymbolMap.BuildId,
EditsDigest = editsDigest,
SymbolMapDigestBefore = oldMapDigest,
SymbolMapDigestAfter = newMapDigest,
Changes = changes
};
}
///
public PatchManifest BuildPatchManifest(
string buildId,
string normalizationRecipeId,
IReadOnlyList deltas)
{
ArgumentException.ThrowIfNullOrWhiteSpace(buildId);
ArgumentException.ThrowIfNullOrWhiteSpace(normalizationRecipeId);
ArgumentNullException.ThrowIfNull(deltas);
var patches = deltas
.OrderBy(d => d.FunctionId, StringComparer.Ordinal)
.Select(delta =>
{
var start = delta.Address < 0 ? 0UL : (ulong)delta.Address;
var rangeSize = delta.NewSize > 0 ? delta.NewSize : delta.OldSize;
var end = rangeSize > 0
? start + (ulong)rangeSize - 1UL
: start;
return new SymbolPatchArtifact
{
Symbol = delta.FunctionId,
AddressRange = $"0x{start:x}-0x{end:x}",
DeltaDigest = "sha256:" + ComputeDigest($"{delta.FunctionId}|{delta.OldHash}|{delta.NewHash}|{delta.OldSize}|{delta.NewSize}|{delta.DiffLen}"),
Pre = new PatchSizeHash
{
Size = delta.OldSize,
Hash = string.IsNullOrWhiteSpace(delta.OldHash) ? "sha256:0" : delta.OldHash!
},
Post = new PatchSizeHash
{
Size = delta.NewSize,
Hash = string.IsNullOrWhiteSpace(delta.NewHash) ? "sha256:0" : delta.NewHash!
}
};
})
.ToList();
return new PatchManifest
{
BuildId = buildId,
NormalizationRecipeId = normalizationRecipeId,
Patches = patches
};
}
///
public HybridDiffEvidence Compose(
IReadOnlyList? sourceDiffs,
SymbolMap oldSymbolMap,
SymbolMap newSymbolMap,
IReadOnlyList deltas,
string normalizationRecipeId)
{
var script = GenerateSemanticEditScript(sourceDiffs);
var patchPlan = BuildSymbolPatchPlan(script, oldSymbolMap, newSymbolMap, deltas);
var patchManifest = BuildPatchManifest(newSymbolMap.BuildId, normalizationRecipeId, deltas);
var scriptDigest = ComputeDigest(script);
var oldMapDigest = ComputeDigest(oldSymbolMap);
var newMapDigest = ComputeDigest(newSymbolMap);
var patchPlanDigest = ComputeDigest(patchPlan);
var patchManifestDigest = ComputeDigest(patchManifest);
return new HybridDiffEvidence
{
SemanticEditScript = script,
OldSymbolMap = oldSymbolMap,
NewSymbolMap = newSymbolMap,
SymbolPatchPlan = patchPlan,
PatchManifest = patchManifest,
SemanticEditScriptDigest = scriptDigest,
OldSymbolMapDigest = oldMapDigest,
NewSymbolMapDigest = newMapDigest,
SymbolPatchPlanDigest = patchPlanDigest,
PatchManifestDigest = patchManifestDigest
};
}
///
public string ComputeDigest(T value)
{
var json = value is string s
? s
: JsonSerializer.Serialize(value, DigestJsonOptions);
var bytes = Encoding.UTF8.GetBytes(json);
Span hash = stackalloc byte[SHA256.HashSizeInBytes];
SHA256.HashData(bytes, hash);
return Convert.ToHexString(hash).ToLowerInvariant();
}
private static string? ResolveSourcePath(string? sourceFile, IReadOnlyDictionary sourcePathByCompiled)
{
if (string.IsNullOrWhiteSpace(sourceFile))
{
return null;
}
return sourcePathByCompiled.TryGetValue(sourceFile, out var mapped)
? mapped
: sourceFile;
}
private static string MapSymbolKind(SymbolType type)
{
return type switch
{
SymbolType.Function => "function",
SymbolType.Object or SymbolType.Variable or SymbolType.TlsData => "object",
SymbolType.Section => "section",
_ => "function"
};
}
private static string GetDigestString(IReadOnlyDictionary digest)
{
if (digest.TryGetValue("sha256", out var sha))
{
return sha;
}
return digest.Values.FirstOrDefault() ?? string.Empty;
}
private static string NormalizePath(string path)
{
return path.Replace('\\', '/').Trim();
}
private static SemanticEdit CreateFileEdit(string path, string beforeDigest, string afterDigest)
{
var type = string.IsNullOrWhiteSpace(beforeDigest) || beforeDigest == ComputeEmptyDigest()
? "add"
: string.IsNullOrWhiteSpace(afterDigest) || afterDigest == ComputeEmptyDigest()
? "remove"
: "update";
var nodePath = $"{path}::file";
var stableId = ComputeStableId(path, nodePath, type, beforeDigest, afterDigest);
return new SemanticEdit
{
StableId = stableId,
EditType = type,
NodeKind = "file",
NodePath = nodePath,
Anchor = path,
PreDigest = beforeDigest,
PostDigest = afterDigest
};
}
private static SemanticEdit CreateSymbolEdit(
string path,
string symbol,
string type,
string? preDigest,
string? postDigest,
SourceSpan? preSpan,
SourceSpan? postSpan)
{
var nodePath = $"{path}::{symbol}";
var stableId = ComputeStableId(path, nodePath, type, preDigest, postDigest);
return new SemanticEdit
{
StableId = stableId,
EditType = type,
NodeKind = "method",
NodePath = nodePath,
Anchor = symbol,
PreSpan = preSpan,
PostSpan = postSpan,
PreDigest = preDigest,
PostDigest = postDigest
};
}
private static string ComputeStableId(string path, string nodePath, string type, string? preDigest, string? postDigest)
{
var material = $"{path}|{nodePath}|{type}|{preDigest}|{postDigest}";
var bytes = Encoding.UTF8.GetBytes(material);
Span hash = stackalloc byte[SHA256.HashSizeInBytes];
SHA256.HashData(bytes, hash);
return Convert.ToHexString(hash).ToLowerInvariant();
}
private static Dictionary ExtractSymbolBlocks(string content)
{
var lines = content.Split('\n');
var blocks = new Dictionary(StringComparer.Ordinal);
for (var i = 0; i < lines.Length; i++)
{
var line = lines[i];
var match = FunctionAnchorRegex.Match(line);
if (!match.Success)
{
continue;
}
var name = match.Groups["name"].Value;
if (ControlKeywords.Contains(name))
{
continue;
}
var startLine = i + 1;
var endLine = startLine;
var depth = CountChar(line, '{') - CountChar(line, '}');
var foundOpening = line.Contains('{', StringComparison.Ordinal);
var j = i;
while (foundOpening && depth > 0 && j + 1 < lines.Length)
{
j++;
var candidate = lines[j];
depth += CountChar(candidate, '{');
depth -= CountChar(candidate, '}');
}
if (foundOpening)
{
endLine = j + 1;
i = j;
}
var sliceStart = startLine - 1;
var sliceLength = endLine - startLine + 1;
var blockContent = string.Join("\n", lines.Skip(sliceStart).Take(sliceLength));
var blockHash = ComputeBlockHash(blockContent);
blocks[name] = new SymbolBlock(name, blockHash, startLine, endLine);
}
return blocks;
}
private static int CountChar(string value, char token)
{
var count = 0;
foreach (var c in value)
{
if (c == token)
{
count++;
}
}
return count;
}
private static string ComputeBlockHash(string content)
{
var bytes = Encoding.UTF8.GetBytes(content);
Span hash = stackalloc byte[SHA256.HashSizeInBytes];
SHA256.HashData(bytes, hash);
return Convert.ToHexString(hash).ToLowerInvariant();
}
private static bool IsAnchorMatch(string anchor, string functionId)
{
if (string.Equals(anchor, functionId, StringComparison.Ordinal))
{
return true;
}
return anchor.EndsWith($".{functionId}", StringComparison.Ordinal) ||
anchor.EndsWith($"::{functionId}", StringComparison.Ordinal) ||
anchor.Contains(functionId, StringComparison.Ordinal);
}
private static string ComputeEmptyDigest()
{
Span hash = stackalloc byte[SHA256.HashSizeInBytes];
SHA256.HashData(Array.Empty(), hash);
return Convert.ToHexString(hash).ToLowerInvariant();
}
private sealed record SymbolBlock(string Name, string Hash, int StartLine, int EndLine);
}