using System; using System.Security.Cryptography; using System.Text; namespace StellaOps.Scanner.Reachability; /// /// Builds canonical SymbolIDs per the reachability union schema (v0.1). /// SymbolIDs are stable, path-independent identifiers that enable CAS lookups /// to remain reproducible and cacheable across hosts. /// /// /// /// Format: sym:{lang}:{stable-fragment} /// where stable-fragment is SHA-256(base64url-no-pad) of the canonical tuple per language. /// /// /// INTEROP NOTE: This static class uses SHA-256 for maximum external tool /// compatibility. For compliance-profile-aware symbol IDs that respect GOST/SM3/FIPS profiles, /// use with an injected . /// /// public static class SymbolId { /// /// Supported languages for symbol IDs. /// public static class Lang { public const string Java = "java"; public const string DotNet = "dotnet"; public const string Go = "go"; public const string Node = "node"; public const string Deno = "deno"; public const string Rust = "rust"; public const string Swift = "swift"; public const string Shell = "shell"; public const string Binary = "binary"; public const string Python = "python"; public const string Ruby = "ruby"; public const string Php = "php"; } /// /// Creates a Java symbol ID from method signature components. /// /// Package name (e.g., "com.example"). /// Class name (e.g., "MyClass"). /// Method name (e.g., "doSomething"). /// JVM method descriptor (e.g., "(Ljava/lang/String;)V"). public static string ForJava(string package, string className, string method, string descriptor) { var tuple = $"{Lower(package)}\0{Lower(className)}\0{Lower(method)}\0{Lower(descriptor)}"; return Build(Lang.Java, tuple); } /// /// Creates a .NET symbol ID from member signature components. /// /// Assembly name (without version/key). /// Namespace. /// Type name. /// Member signature using ECMA-335 format. public static string ForDotNet(string assemblyName, string ns, string typeName, string memberSignature) { var tuple = $"{Norm(assemblyName)}\0{Norm(ns)}\0{Norm(typeName)}\0{Norm(memberSignature)}"; return Build(Lang.DotNet, tuple); } /// /// Creates a Node/Deno symbol ID from module export components. /// /// npm package name or normalized absolute path (drive stripped). /// ESM/CJS export path (slash-joined). /// Export kind (e.g., "function", "class", "default"). public static string ForNode(string pkgNameOrPath, string exportPath, string kind) { var tuple = $"{Norm(pkgNameOrPath)}\0{Norm(exportPath)}\0{Norm(kind)}"; return Build(Lang.Node, tuple); } /// /// Creates a Deno symbol ID from module export components. /// public static string ForDeno(string pkgNameOrPath, string exportPath, string kind) { var tuple = $"{Norm(pkgNameOrPath)}\0{Norm(exportPath)}\0{Norm(kind)}"; return Build(Lang.Deno, tuple); } /// /// Creates a Go symbol ID from function/method components. /// /// Module path (e.g., "github.com/example/repo"). /// Package path within module. /// Receiver type (empty for functions). /// Function name. public static string ForGo(string modulePath, string packagePath, string receiver, string func) { var tuple = $"{Norm(modulePath)}\0{Norm(packagePath)}\0{Norm(receiver)}\0{Norm(func)}"; return Build(Lang.Go, tuple); } /// /// Creates a Rust symbol ID from item components. /// /// Crate name. /// Module path within crate (e.g., "foo::bar"). /// Item name (function, struct, trait, etc.). /// Optional Rust-mangled name. public static string ForRust(string crateName, string modulePath, string itemName, string? mangled = null) { var tuple = $"{Norm(crateName)}\0{Norm(modulePath)}\0{Norm(itemName)}\0{Norm(mangled)}"; return Build(Lang.Rust, tuple); } /// /// Creates a Swift symbol ID from member components. /// /// Swift module name. /// Type name (class, struct, enum, protocol). /// Member name. /// Optional Swift-mangled name. public static string ForSwift(string module, string typeName, string member, string? mangled = null) { var tuple = $"{Norm(module)}\0{Norm(typeName)}\0{Norm(member)}\0{Norm(mangled)}"; return Build(Lang.Swift, tuple); } /// /// Creates a shell symbol ID from script/function components. /// /// Relative path to script file. /// Function name or command identifier. public static string ForShell(string scriptRelPath, string functionOrCmd) { var tuple = $"{Norm(scriptRelPath)}\0{Norm(functionOrCmd)}"; return Build(Lang.Shell, tuple); } /// /// Creates a binary symbol ID from ELF/PE/Mach-O components (legacy overload). /// /// Binary build-id (GNU build-id, PE GUID, Mach-O UUID). /// Section name (e.g., ".text", ".dynsym"). /// Symbol name from symbol table. public static string ForBinary(string buildId, string section, string symbolName) => ForBinaryAddressed(buildId, section, string.Empty, symbolName, "static", null); /// /// Creates a binary symbol ID that includes file hash, section, address, and linkage. /// Aligns with {file:hash, section, addr, name, linkage} tuple used by richgraph-v1. /// public static string ForBinaryAddressed(string fileHash, string section, string address, string symbolName, string linkage, string? codeBlockHash = null) { var tuple = $"{Norm(fileHash)}\0{Norm(section)}\0{NormalizeAddress(address)}\0{Norm(symbolName)}\0{Norm(linkage)}\0{Norm(codeBlockHash)}"; return Build(Lang.Binary, tuple); } /// /// Creates a Python symbol ID from module/function components. /// /// Package name or module file path. /// Module path within package (dot-separated). /// Qualified name (class.method or function). public static string ForPython(string packageOrPath, string modulePath, string qualifiedName) { var tuple = $"{Norm(packageOrPath)}\0{Norm(modulePath)}\0{Norm(qualifiedName)}"; return Build(Lang.Python, tuple); } /// /// Creates a Ruby symbol ID from module/method components. /// /// Gem name or file path. /// Module/class path (e.g., "Foo::Bar"). /// Method name (with prefix # for instance, . for class). public static string ForRuby(string gemOrPath, string modulePath, string methodName) { var tuple = $"{Norm(gemOrPath)}\0{Norm(modulePath)}\0{Norm(methodName)}"; return Build(Lang.Ruby, tuple); } /// /// Creates a PHP symbol ID from namespace/function components. /// /// Composer package name or file path. /// Namespace (e.g., "App\\Services"). /// Fully qualified class::method or function name. public static string ForPhp(string composerPackage, string ns, string qualifiedName) { var tuple = $"{Norm(composerPackage)}\0{Norm(ns)}\0{Norm(qualifiedName)}"; return Build(Lang.Php, tuple); } /// /// Creates a symbol ID from a pre-computed canonical tuple and language. /// /// Language identifier (use constants). /// Pre-formatted canonical tuple (NUL-separated components). public static string FromTuple(string lang, string canonicalTuple) { ArgumentException.ThrowIfNullOrWhiteSpace(lang); return Build(lang, canonicalTuple); } /// /// Parses a symbol ID into its language and fragment components. /// /// Tuple of (language, fragment) or null if invalid format. public static (string Lang, string Fragment)? Parse(string symbolId) { if (string.IsNullOrWhiteSpace(symbolId) || !symbolId.StartsWith("sym:", StringComparison.Ordinal)) { return null; } var rest = symbolId.AsSpan(4); // Skip "sym:" var colonIndex = rest.IndexOf(':'); if (colonIndex < 1) { return null; } var lang = rest[..colonIndex].ToString(); var fragment = rest[(colonIndex + 1)..].ToString(); return (lang, fragment); } private static string Build(string lang, string tuple) { var hash = ComputeFragment(tuple); return $"sym:{lang}:{hash}"; } private static string NormalizeAddress(string? value) { if (string.IsNullOrWhiteSpace(value)) { return "0x0"; } var addrText = value.Trim(); var isHex = addrText.StartsWith("0x", StringComparison.OrdinalIgnoreCase); if (isHex) { addrText = addrText[2..]; } if (long.TryParse(addrText, isHex ? System.Globalization.NumberStyles.HexNumber : System.Globalization.NumberStyles.Integer, System.Globalization.CultureInfo.InvariantCulture, out var addrValue)) { if (addrValue < 0) { addrValue = 0; } return $"0x{addrValue:x}"; } // Fallback to normalized string representation addrText = addrText.TrimStart('0'); if (addrText.Length == 0) { addrText = "0"; } return $"0x{addrText.ToLowerInvariant()}"; } private static string ComputeFragment(string tuple) { var bytes = Encoding.UTF8.GetBytes(tuple); var hash = SHA256.HashData(bytes); // Base64url without padding per spec return Convert.ToBase64String(hash) .Replace('+', '-') .Replace('/', '_') .TrimEnd('='); } private static string Lower(string? value) => string.IsNullOrWhiteSpace(value) ? string.Empty : value.Trim().ToLowerInvariant(); private static string Norm(string? value) => string.IsNullOrWhiteSpace(value) ? string.Empty : value.Trim(); }