// ----------------------------------------------------------------------------- // SymbolTableDiffAnalyzer.cs // Sprint: SPRINT_20260106_001_003_BINDEX_symbol_table_diff // Tasks: SYM-007 to SYM-015 - Implement symbol table diff analyzer // Description: Symbol table diff analyzer implementation // ----------------------------------------------------------------------------- using Microsoft.Extensions.Logging; using System.Globalization; using System.Security.Cryptography; using System.Text; using System.Text.Json; namespace StellaOps.BinaryIndex.Builders.SymbolDiff; /// /// Analyzes symbol table differences between two binaries. /// public sealed class SymbolTableDiffAnalyzer : ISymbolTableDiffAnalyzer { private readonly ISymbolExtractor _symbolExtractor; private readonly INameDemangler _nameDemangler; private readonly TimeProvider _timeProvider; private readonly ILogger _logger; public SymbolTableDiffAnalyzer( ISymbolExtractor symbolExtractor, INameDemangler nameDemangler, TimeProvider timeProvider, ILogger logger) { _symbolExtractor = symbolExtractor; _nameDemangler = nameDemangler; _timeProvider = timeProvider; _logger = logger; } /// public async Task ComputeDiffAsync( string basePath, string targetPath, SymbolDiffOptions? options = null, CancellationToken ct = default) { options ??= new SymbolDiffOptions(); var now = _timeProvider.GetUtcNow(); _logger.LogDebug("Computing symbol diff between {Base} and {Target}", basePath, targetPath); // Extract symbol tables var baseTable = await ExtractSymbolTableAsync(basePath, ct); var targetTable = await ExtractSymbolTableAsync(targetPath, ct); // Compute symbol changes var exports = ComputeSymbolChanges( baseTable.Exports, targetTable.Exports, options); var imports = ComputeSymbolChanges( baseTable.Imports, targetTable.Imports, options); // Compute version diff var versions = ComputeVersionDiff(baseTable, targetTable); // Compute dynamic linking diff DynamicLinkingDiff? dynamic = null; if (options.IncludeDynamicLinking) { dynamic = ComputeDynamicLinkingDiff(baseTable, targetTable); } // Create diff without ID first var diffWithoutId = new SymbolTableDiff { DiffId = string.Empty, // Placeholder Base = baseTable.Binary, Target = targetTable.Binary, Exports = exports, Imports = imports, Versions = versions, Dynamic = dynamic, AbiCompatibility = new AbiCompatibility { Level = AbiCompatibilityLevel.FullyCompatible, Score = 1.0, IsBackwardCompatible = true, IsForwardCompatible = true, BreakingChanges = [], Warnings = [], Summary = new AbiSummary() }, ComputedAt = now }; // Assess ABI compatibility var abiCompatibility = AssessAbiCompatibility(diffWithoutId); // Compute content-addressed ID var diffId = ComputeDiffId(baseTable.Binary, targetTable.Binary, exports, imports); var diff = diffWithoutId with { DiffId = diffId, AbiCompatibility = abiCompatibility }; _logger.LogInformation( "Symbol diff complete: {ExportsAdded} exports added, {ExportsRemoved} removed, {Level}", exports.Counts.Added, exports.Counts.Removed, abiCompatibility.Level); return diff; } /// public async Task ExtractSymbolTableAsync( string binaryPath, CancellationToken ct = default) { return await _symbolExtractor.ExtractAsync(binaryPath, ct); } /// public AbiCompatibility AssessAbiCompatibility(SymbolTableDiff diff) { var breakingChanges = new List(); var warnings = new List(); // Removed exports are breaking foreach (var removed in diff.Exports.Removed) { breakingChanges.Add(new AbiBreakingChange { Type = AbiBreakType.SymbolRemoved, Severity = removed.Binding == SymbolBinding.Weak ? ChangeSeverity.Low : ChangeSeverity.High, Symbol = removed.Name, Description = $"Exported symbol '{removed.DemangledName ?? removed.Name}' was removed", Impact = "Code linking against this symbol will fail at runtime", Mitigation = "Provide symbol alias or versioned symbol for backward compatibility" }); } // Modified exports with size changes foreach (var modified in diff.Exports.Modified) { if (modified.IsAbiBreaking) { foreach (var change in modified.Changes.Where(c => c.Severity >= ChangeSeverity.High)) { breakingChanges.Add(new AbiBreakingChange { Type = DetermineBreakType(change), Severity = change.Severity, Symbol = modified.Name, Description = $"Symbol '{modified.DemangledName ?? modified.Name}' {change.Attribute} changed from {change.BaseValue} to {change.TargetValue}", Details = $"Attribute: {change.Attribute}" }); } } } // Version removals foreach (var removed in diff.Versions.DefinitionsRemoved) { if (!removed.IsBase) { breakingChanges.Add(new AbiBreakingChange { Type = AbiBreakType.VersionRemoved, Severity = ChangeSeverity.High, Symbol = removed.Name, Description = $"Version definition '{removed.Name}' was removed" }); } } // Added exports are warnings foreach (var added in diff.Exports.Added) { warnings.Add(new AbiWarning { Type = AbiWarningType.SymbolAdded, Symbol = added.Name, Message = $"New exported symbol: {added.DemangledName ?? added.Name}" }); } // Renames are warnings foreach (var rename in diff.Exports.Renamed) { warnings.Add(new AbiWarning { Type = AbiWarningType.SymbolRenamed, Symbol = rename.BaseName, Message = $"Symbol renamed from '{rename.BaseDemangled ?? rename.BaseName}' to '{rename.TargetDemangled ?? rename.TargetName}'" }); } // Calculate compatibility level and score var level = DetermineCompatibilityLevel(breakingChanges); var score = CalculateCompatibilityScore(diff, breakingChanges); return new AbiCompatibility { Level = level, Score = score, IsBackwardCompatible = breakingChanges.Count == 0, IsForwardCompatible = diff.Exports.Added.Count == 0, BreakingChanges = breakingChanges, Warnings = warnings, Summary = new AbiSummary { TotalExportsBase = diff.Exports.Counts.TotalBase, TotalExportsTarget = diff.Exports.Counts.TotalTarget, ExportsAdded = diff.Exports.Counts.Added, ExportsRemoved = diff.Exports.Counts.Removed, ExportsModified = diff.Exports.Counts.Modified, BreakingChangesCount = breakingChanges.Count, WarningsCount = warnings.Count, CompatibilityPercentage = score * 100 } }; } // SYM-009, SYM-010: Compute symbol changes private SymbolChangeSummary ComputeSymbolChanges( IReadOnlyList baseSymbols, IReadOnlyList targetSymbols, SymbolDiffOptions options) { var baseByName = baseSymbols.ToDictionary(s => s.Name, s => s); var targetByName = targetSymbols.ToDictionary(s => s.Name, s => s); var added = new List(); var removed = new List(); var modified = new List(); var renamed = new List(); var unchanged = 0; // Find added symbols foreach (var target in targetSymbols) { if (!baseByName.ContainsKey(target.Name)) { added.Add(MapToSymbolChange(target)); } } // Find removed and modified symbols foreach (var baseSymbol in baseSymbols) { if (!targetByName.TryGetValue(baseSymbol.Name, out var targetSymbol)) { removed.Add(MapToSymbolChange(baseSymbol)); } else { var modification = DetectModification(baseSymbol, targetSymbol); if (modification is not null) { modified.Add(modification); } else { unchanged++; } } } // Detect renames (removed symbols that match added symbols via fingerprint) if (options.DetectRenames) { var detectedRenames = DetectRenames( removed, added, options.RenameSimilarityThreshold); renamed.AddRange(detectedRenames); // Remove renamed from added/removed var renamedBaseNames = new HashSet(detectedRenames.Select(r => r.BaseName)); var renamedTargetNames = new HashSet(detectedRenames.Select(r => r.TargetName)); removed.RemoveAll(r => renamedBaseNames.Contains(r.Name)); added.RemoveAll(a => renamedTargetNames.Contains(a.Name)); } return new SymbolChangeSummary { Added = added, Removed = removed, Modified = modified, Renamed = renamed, Counts = new SymbolChangeCounts { Added = added.Count, Removed = removed.Count, Modified = modified.Count, Renamed = renamed.Count, Unchanged = unchanged, TotalBase = baseSymbols.Count, TotalTarget = targetSymbols.Count } }; } // SYM-011: Compute version diff private VersionMapDiff ComputeVersionDiff(SymbolTable baseTable, SymbolTable targetTable) { var baseDefs = baseTable.VersionDefinitions.ToDictionary(v => v.Name); var targetDefs = targetTable.VersionDefinitions.ToDictionary(v => v.Name); var defsAdded = targetTable.VersionDefinitions .Where(v => !baseDefs.ContainsKey(v.Name)) .ToList(); var defsRemoved = baseTable.VersionDefinitions .Where(v => !targetDefs.ContainsKey(v.Name)) .ToList(); var baseReqs = baseTable.VersionRequirements .ToDictionary(r => $"{r.Library}@{r.Version}"); var targetReqs = targetTable.VersionRequirements .ToDictionary(r => $"{r.Library}@{r.Version}"); var reqsAdded = targetTable.VersionRequirements .Where(r => !baseReqs.ContainsKey($"{r.Library}@{r.Version}")) .ToList(); var reqsRemoved = baseTable.VersionRequirements .Where(r => !targetReqs.ContainsKey($"{r.Library}@{r.Version}")) .ToList(); // Detect version assignment changes var assignmentChanges = new List(); var baseExports = baseTable.Exports.Where(e => e.Version is not null).ToDictionary(e => e.Name); foreach (var target in targetTable.Exports.Where(e => e.Version is not null)) { if (baseExports.TryGetValue(target.Name, out var baseExport)) { if (baseExport.Version != target.Version) { assignmentChanges.Add(new VersionAssignmentChange { SymbolName = target.Name, BaseVersion = baseExport.Version, TargetVersion = target.Version, IsAbiBreaking = true // Version changes can be breaking }); } } } return new VersionMapDiff { DefinitionsAdded = defsAdded, DefinitionsRemoved = defsRemoved, RequirementsAdded = reqsAdded, RequirementsRemoved = reqsRemoved, AssignmentsChanged = assignmentChanges, Counts = new VersionChangeCounts { DefinitionsAdded = defsAdded.Count, DefinitionsRemoved = defsRemoved.Count, RequirementsAdded = reqsAdded.Count, RequirementsRemoved = reqsRemoved.Count, AssignmentsChanged = assignmentChanges.Count } }; } // SYM-012: Compute dynamic linking diff private DynamicLinkingDiff ComputeDynamicLinkingDiff(SymbolTable baseTable, SymbolTable targetTable) { return new DynamicLinkingDiff { Got = ComputeGotDiff(baseTable.GotEntries ?? [], targetTable.GotEntries ?? []), Plt = ComputePltDiff(baseTable.PltEntries ?? [], targetTable.PltEntries ?? []), Rpath = ComputeRpathDiff(baseTable, targetTable), Needed = ComputeNeededDiff(baseTable.NeededLibraries, targetTable.NeededLibraries) }; } private GotDiff ComputeGotDiff(IReadOnlyList baseEntries, IReadOnlyList targetEntries) { var baseBySymbol = baseEntries.ToDictionary(e => e.SymbolName); var targetBySymbol = targetEntries.ToDictionary(e => e.SymbolName); var added = targetEntries.Where(e => !baseBySymbol.ContainsKey(e.SymbolName)).ToList(); var removed = baseEntries.Where(e => !targetBySymbol.ContainsKey(e.SymbolName)).ToList(); var modified = new List(); foreach (var baseEntry in baseEntries) { if (targetBySymbol.TryGetValue(baseEntry.SymbolName, out var targetEntry)) { if (baseEntry.Type != targetEntry.Type || baseEntry.Address != targetEntry.Address) { modified.Add(new GotEntryModification { SymbolName = baseEntry.SymbolName, BaseAddress = baseEntry.Address, TargetAddress = targetEntry.Address, BaseType = baseEntry.Type, TargetType = targetEntry.Type }); } } } return new GotDiff { EntriesAdded = added, EntriesRemoved = removed, EntriesModified = modified, BaseCount = baseEntries.Count, TargetCount = targetEntries.Count }; } private PltDiff ComputePltDiff(IReadOnlyList baseEntries, IReadOnlyList targetEntries) { var baseBySymbol = baseEntries.ToDictionary(e => e.SymbolName); var targetBySymbol = targetEntries.ToDictionary(e => e.SymbolName); var added = targetEntries.Where(e => !baseBySymbol.ContainsKey(e.SymbolName)).ToList(); var removed = baseEntries.Where(e => !targetBySymbol.ContainsKey(e.SymbolName)).ToList(); var reordered = new List(); foreach (var baseEntry in baseEntries) { if (targetBySymbol.TryGetValue(baseEntry.SymbolName, out var targetEntry)) { if (baseEntry.Index != targetEntry.Index) { reordered.Add(new PltReorder { SymbolName = baseEntry.SymbolName, BaseIndex = baseEntry.Index, TargetIndex = targetEntry.Index }); } } } return new PltDiff { EntriesAdded = added, EntriesRemoved = removed, EntriesReordered = reordered, BaseCount = baseEntries.Count, TargetCount = targetEntries.Count }; } private RpathDiff ComputeRpathDiff(SymbolTable baseTable, SymbolTable targetTable) { var basePaths = new HashSet( (baseTable.Rpath ?? []).Concat(baseTable.Runpath ?? [])); var targetPaths = new HashSet( (targetTable.Rpath ?? []).Concat(targetTable.Runpath ?? [])); return new RpathDiff { RpathBase = baseTable.Rpath, RpathTarget = targetTable.Rpath, RunpathBase = baseTable.Runpath, RunpathTarget = targetTable.Runpath, PathsAdded = targetPaths.Except(basePaths).ToList(), PathsRemoved = basePaths.Except(targetPaths).ToList(), HasChanges = !basePaths.SetEquals(targetPaths) }; } private NeededDiff ComputeNeededDiff(IReadOnlyList baseLibs, IReadOnlyList targetLibs) { var baseSet = new HashSet(baseLibs); var targetSet = new HashSet(targetLibs); return new NeededDiff { LibrariesAdded = targetSet.Except(baseSet).ToList(), LibrariesRemoved = baseSet.Except(targetSet).ToList(), BaseLibraries = baseLibs, TargetLibraries = targetLibs }; } // SYM-013: Detect renames via fingerprint matching private IReadOnlyList DetectRenames( List removed, List added, double threshold) { var renames = new List(); // Only consider symbols with fingerprints var removedWithFp = removed.Where(r => r.Fingerprint is not null).ToList(); var addedWithFp = added.Where(a => a.Fingerprint is not null).ToList(); foreach (var removedSymbol in removedWithFp) { // Find best match in added SymbolChange? bestMatch = null; double bestSimilarity = 0; foreach (var addedSymbol in addedWithFp) { var similarity = ComputeFingerprintSimilarity( removedSymbol.Fingerprint!, addedSymbol.Fingerprint!); if (similarity >= threshold && similarity > bestSimilarity) { bestMatch = addedSymbol; bestSimilarity = similarity; } } if (bestMatch is not null) { renames.Add(new SymbolRename { BaseName = removedSymbol.Name, TargetName = bestMatch.Name, BaseDemangled = removedSymbol.DemangledName, TargetDemangled = bestMatch.DemangledName, Fingerprint = removedSymbol.Fingerprint!, Similarity = bestSimilarity, Confidence = DetermineRenameConfidence(bestSimilarity) }); // Remove matched from consideration addedWithFp.Remove(bestMatch); } } return renames; } // SYM-015: Compute content-addressed diff ID private string ComputeDiffId( BinaryRef baseRef, BinaryRef targetRef, SymbolChangeSummary exports, SymbolChangeSummary imports) { var canonical = new { base_sha256 = baseRef.Sha256, target_sha256 = targetRef.Sha256, exports_added = exports.Added.Select(e => e.Name).OrderBy(n => n, StringComparer.Ordinal), exports_removed = exports.Removed.Select(e => e.Name).OrderBy(n => n, StringComparer.Ordinal), imports_added = imports.Added.Select(i => i.Name).OrderBy(n => n, StringComparer.Ordinal), imports_removed = imports.Removed.Select(i => i.Name).OrderBy(n => n, StringComparer.Ordinal) }; var json = JsonSerializer.Serialize(canonical, new JsonSerializerOptions { PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower, WriteIndented = false }); var hash = SHA256.HashData(Encoding.UTF8.GetBytes(json)); return $"sha256:{Convert.ToHexString(hash).ToLowerInvariant()}"; } // Helper methods private static SymbolChange MapToSymbolChange(ExtractedSymbol symbol) { return new SymbolChange { Name = symbol.Name, DemangledName = symbol.DemangledName, Type = symbol.Type, Binding = symbol.Binding, Visibility = symbol.Visibility, Section = symbol.Section, Address = symbol.Address, Size = symbol.Size, Version = symbol.Version, Fingerprint = symbol.Fingerprint }; } private static SymbolModification? DetectModification(ExtractedSymbol baseSymbol, ExtractedSymbol targetSymbol) { var changes = new List(); if (baseSymbol.Type != targetSymbol.Type) { changes.Add(new AttributeChange { Attribute = "type", BaseValue = baseSymbol.Type.ToString(), TargetValue = targetSymbol.Type.ToString(), Severity = ChangeSeverity.High }); } if (baseSymbol.Size != targetSymbol.Size) { changes.Add(new AttributeChange { Attribute = "size", BaseValue = baseSymbol.Size.ToString(CultureInfo.InvariantCulture), TargetValue = targetSymbol.Size.ToString(CultureInfo.InvariantCulture), Severity = targetSymbol.Size < baseSymbol.Size ? ChangeSeverity.High : ChangeSeverity.Low }); } if (baseSymbol.Visibility != targetSymbol.Visibility) { var severityFromVisibility = (baseSymbol.Visibility, targetSymbol.Visibility) switch { (SymbolVisibility.Default, SymbolVisibility.Hidden) => ChangeSeverity.High, (SymbolVisibility.Protected, SymbolVisibility.Hidden) => ChangeSeverity.High, _ => ChangeSeverity.Medium }; changes.Add(new AttributeChange { Attribute = "visibility", BaseValue = baseSymbol.Visibility.ToString(), TargetValue = targetSymbol.Visibility.ToString(), Severity = severityFromVisibility }); } if (baseSymbol.Binding != targetSymbol.Binding) { changes.Add(new AttributeChange { Attribute = "binding", BaseValue = baseSymbol.Binding.ToString(), TargetValue = targetSymbol.Binding.ToString(), Severity = ChangeSeverity.Medium }); } if (changes.Count == 0) { return null; } return new SymbolModification { Name = baseSymbol.Name, DemangledName = baseSymbol.DemangledName ?? targetSymbol.DemangledName, Base = new SymbolAttributes { Type = baseSymbol.Type, Binding = baseSymbol.Binding, Visibility = baseSymbol.Visibility, Section = baseSymbol.Section, Address = baseSymbol.Address, Size = baseSymbol.Size, Version = baseSymbol.Version, Fingerprint = baseSymbol.Fingerprint }, Target = new SymbolAttributes { Type = targetSymbol.Type, Binding = targetSymbol.Binding, Visibility = targetSymbol.Visibility, Section = targetSymbol.Section, Address = targetSymbol.Address, Size = targetSymbol.Size, Version = targetSymbol.Version, Fingerprint = targetSymbol.Fingerprint }, Changes = changes, IsAbiBreaking = changes.Any(c => c.Severity >= ChangeSeverity.High) }; } private static double ComputeFingerprintSimilarity(string fp1, string fp2) { if (fp1 == fp2) return 1.0; // Simple Jaccard similarity on hex characters var set1 = new HashSet(fp1); var set2 = new HashSet(fp2); var intersection = set1.Intersect(set2).Count(); var union = set1.Union(set2).Count(); return union == 0 ? 0 : (double)intersection / union; } private static RenameConfidence DetermineRenameConfidence(double similarity) { return similarity switch { >= 0.95 => RenameConfidence.VeryHigh, >= 0.85 => RenameConfidence.High, >= 0.75 => RenameConfidence.Medium, >= 0.65 => RenameConfidence.Low, _ => RenameConfidence.VeryLow }; } private static AbiBreakType DetermineBreakType(AttributeChange change) { return change.Attribute switch { "type" => AbiBreakType.SymbolTypeChanged, "size" => AbiBreakType.SymbolSizeChanged, "visibility" => AbiBreakType.VisibilityReduced, "binding" => AbiBreakType.BindingChanged, _ => AbiBreakType.SymbolTypeChanged }; } private static AbiCompatibilityLevel DetermineCompatibilityLevel(List breaks) { if (breaks.Count == 0) { return AbiCompatibilityLevel.FullyCompatible; } var criticalCount = breaks.Count(b => b.Severity == ChangeSeverity.Critical); var highCount = breaks.Count(b => b.Severity == ChangeSeverity.High); if (criticalCount > 0 || highCount >= 10) { return AbiCompatibilityLevel.Incompatible; } if (highCount >= 3) { return AbiCompatibilityLevel.MajorIncompatibility; } if (highCount >= 1) { return AbiCompatibilityLevel.MinorIncompatibility; } return AbiCompatibilityLevel.CompatibleWithWarnings; } private static double CalculateCompatibilityScore(SymbolTableDiff diff, List breaks) { if (diff.Exports.Counts.TotalBase == 0) { return 1.0; } var removedWeight = diff.Exports.Counts.Removed * 0.5; var breakingWeight = breaks.Sum(b => b.Severity switch { ChangeSeverity.Critical => 1.0, ChangeSeverity.High => 0.5, ChangeSeverity.Medium => 0.2, _ => 0.1 }); var penalty = (removedWeight + breakingWeight) / diff.Exports.Counts.TotalBase; return Math.Max(0, 1.0 - penalty); } } /// /// Interface for extracting symbols from binaries. /// public interface ISymbolExtractor { /// /// Extracts symbol table from a binary. /// Task ExtractAsync(string binaryPath, CancellationToken ct = default); } /// /// Interface for demangling C++/Rust names. /// public interface INameDemangler { /// /// Demangles a symbol name. /// string? Demangle(string mangledName); /// /// Detects the mangling scheme. /// ManglingScheme DetectScheme(string name); } /// /// Name mangling scheme. /// public enum ManglingScheme { None, ItaniumCxx, MicrosoftCxx, Rust, Swift, Unknown }