Add property-based tests for SBOM/VEX document ordering and Unicode normalization determinism

- Implement `SbomVexOrderingDeterminismProperties` for testing component list and vulnerability metadata hash consistency.
- Create `UnicodeNormalizationDeterminismProperties` to validate NFC normalization and Unicode string handling.
- Add project file for `StellaOps.Testing.Determinism.Properties` with necessary dependencies.
- Introduce CI/CD template validation tests including YAML syntax checks and documentation content verification.
- Create validation script for CI/CD templates ensuring all required files and structures are present.
This commit is contained in:
StellaOps Bot
2025-12-26 15:17:15 +02:00
parent 7792749bb4
commit 907783f625
354 changed files with 79727 additions and 1346 deletions

View File

@@ -1,13 +1,31 @@
// -----------------------------------------------------------------------------
// CallGraphServiceCollectionExtensions.cs
// Sprint: SPRINT_20251226_005_SCANNER_reachability_extractors (REACH-REG-01)
// Description: DI registration for all call graph extractors.
// -----------------------------------------------------------------------------
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Scanner.CallGraph.Caching;
using StellaOps.Scanner.CallGraph.DotNet;
using StellaOps.Scanner.CallGraph.Go;
using StellaOps.Scanner.CallGraph.Java;
using StellaOps.Scanner.CallGraph.Node;
using StellaOps.Scanner.CallGraph.Python;
namespace StellaOps.Scanner.CallGraph.DependencyInjection;
/// <summary>
/// Extension methods for registering call graph services in dependency injection.
/// </summary>
public static class CallGraphServiceCollectionExtensions
{
/// <summary>
/// Adds all call graph extraction and analysis services to the service collection.
/// </summary>
/// <param name="services">The service collection.</param>
/// <param name="configuration">The configuration instance.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddCallGraphServices(this IServiceCollection services, IConfiguration configuration)
{
ArgumentNullException.ThrowIfNull(services);
@@ -15,9 +33,18 @@ public static class CallGraphServiceCollectionExtensions
services.Configure<CallGraphCacheConfig>(configuration.GetSection("CallGraph:Cache"));
services.AddSingleton<ICallGraphExtractor, DotNetCallGraphExtractor>();
services.AddSingleton<ICallGraphExtractor, NodeCallGraphExtractor>();
// Register all language-specific call graph extractors
// Each extractor implements ICallGraphExtractor and is keyed by Language property
services.AddSingleton<ICallGraphExtractor, DotNetCallGraphExtractor>(); // .NET/C# via Roslyn
services.AddSingleton<ICallGraphExtractor, JavaCallGraphExtractor>(); // Java via ASM bytecode parsing
services.AddSingleton<ICallGraphExtractor, NodeCallGraphExtractor>(); // Node.js/JavaScript via Babel
services.AddSingleton<ICallGraphExtractor, PythonCallGraphExtractor>(); // Python via AST analysis
services.AddSingleton<ICallGraphExtractor, GoCallGraphExtractor>(); // Go via SSA analysis
// Register the extractor registry for language-based lookup
services.AddSingleton<ICallGraphExtractorRegistry, CallGraphExtractorRegistry>();
// Core analysis services
services.AddSingleton<ReachabilityAnalyzer>();
services.AddSingleton<ICallGraphCacheService, ValkeyCallGraphCacheService>();

View File

@@ -0,0 +1,520 @@
// -----------------------------------------------------------------------------
// FunctionBoundaryDetector.cs
// Sprint: SPRINT_20251226_009_SCANNER_funcproof
// Tasks: FUNC-03, FUNC-04 — Function boundary detection using DWARF/symbol table and heuristics
// Description: Detects function boundaries from binary analysis.
// -----------------------------------------------------------------------------
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Scanner.Evidence;
namespace StellaOps.Scanner.CallGraph.Binary;
/// <summary>
/// Detects function boundaries in native binaries using multiple strategies:
/// 1. DWARF debug info (highest confidence)
/// 2. Symbol table entries (high confidence)
/// 3. Prolog/epilog heuristics for stripped binaries (lower confidence)
/// </summary>
public sealed class FunctionBoundaryDetector
{
private readonly ILogger<FunctionBoundaryDetector> _logger;
private readonly DwarfDebugReader _dwarfReader;
private readonly FuncProofGenerationOptions _options;
// Common function prologs by architecture
private static readonly byte[][] X86_64Prologs =
[
[0x55, 0x48, 0x89, 0xe5], // push rbp; mov rbp, rsp
[0x55, 0x48, 0x8b, 0xec], // push rbp; mov rbp, rsp (alternate)
[0x41, 0x57], // push r15
[0x41, 0x56], // push r14
[0x41, 0x55], // push r13
[0x41, 0x54], // push r12
[0x53], // push rbx
[0x55], // push rbp
];
private static readonly byte[][] Arm64Prologs =
[
[0xfd, 0x7b, 0xbf, 0xa9], // stp x29, x30, [sp, #-16]!
[0xfd, 0x7b, 0xbe, 0xa9], // stp x29, x30, [sp, #-32]!
[0xfd, 0x03, 0x00, 0x91], // mov x29, sp
];
// Common function epilogs
private static readonly byte[][] X86_64Epilogs =
[
[0xc3], // ret
[0xc2], // ret imm16
[0x5d, 0xc3], // pop rbp; ret
[0xc9, 0xc3], // leave; ret
];
private static readonly byte[][] Arm64Epilogs =
[
[0xc0, 0x03, 0x5f, 0xd6], // ret
[0xfd, 0x7b, 0xc1, 0xa8], // ldp x29, x30, [sp], #16
];
public FunctionBoundaryDetector(
ILogger<FunctionBoundaryDetector> logger,
DwarfDebugReader dwarfReader,
IOptions<FuncProofGenerationOptions>? options = null)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_dwarfReader = dwarfReader ?? throw new ArgumentNullException(nameof(dwarfReader));
_options = options?.Value ?? new FuncProofGenerationOptions();
}
/// <summary>
/// Detects function boundaries using all available strategies.
/// </summary>
public async Task<IReadOnlyList<DetectedFunction>> DetectAsync(
string binaryPath,
BinaryFormat format,
BinaryArchitecture architecture,
CancellationToken ct = default)
{
var functions = new List<DetectedFunction>();
// Strategy 1: Try DWARF debug info first (highest confidence)
if (format == BinaryFormat.Elf)
{
try
{
var dwarfInfo = await _dwarfReader.ReadAsync(binaryPath, ct);
if (dwarfInfo.Functions.Count > 0)
{
_logger.LogDebug("Found {Count} functions via DWARF", dwarfInfo.Functions.Count);
foreach (var func in dwarfInfo.Functions)
{
functions.Add(new DetectedFunction
{
Symbol = func.Name,
MangledName = func.LinkageName,
StartAddress = func.LowPc,
EndAddress = func.HighPc,
Confidence = _options.DwarfConfidence,
DetectionMethod = FunctionDetectionMethod.Dwarf,
SourceFile = func.DeclFile,
SourceLine = func.DeclLine
});
}
return functions;
}
}
catch (Exception ex)
{
_logger.LogDebug(ex, "DWARF parsing failed, falling back to symbol table");
}
}
// Strategy 2: Symbol table (high confidence)
var symbols = await ExtractSymbolTableAsync(binaryPath, format, ct);
if (symbols.Count > 0)
{
_logger.LogDebug("Found {Count} functions via symbol table", symbols.Count);
functions.AddRange(symbols.Select(s => new DetectedFunction
{
Symbol = s.Name,
MangledName = s.MangledName,
StartAddress = s.Address,
EndAddress = s.Address + s.Size,
Confidence = _options.SymbolConfidence,
DetectionMethod = FunctionDetectionMethod.SymbolTable
}));
// If we have symbols but no sizes, try to infer from gaps
InferFunctionSizes(functions);
return functions;
}
// Strategy 3: Heuristic prolog/epilog detection (lower confidence)
_logger.LogDebug("Using heuristic function detection for stripped binary");
var textSection = await BinaryTextSectionReader.TryReadAsync(binaryPath, format, ct);
if (textSection is not null)
{
var heuristicFunctions = DetectByPrologEpilog(textSection, architecture);
functions.AddRange(heuristicFunctions);
}
return functions;
}
/// <summary>
/// Extracts function symbols from the binary's symbol table.
/// </summary>
private async Task<List<SymbolEntry>> ExtractSymbolTableAsync(
string binaryPath,
BinaryFormat format,
CancellationToken ct)
{
var symbols = new List<SymbolEntry>();
await using var stream = File.OpenRead(binaryPath);
using var reader = new BinaryReader(stream);
switch (format)
{
case BinaryFormat.Elf:
symbols = await ExtractElfSymbolsAsync(reader, ct);
break;
case BinaryFormat.Pe:
symbols = ExtractPeSymbols(reader);
break;
case BinaryFormat.MachO:
symbols = ExtractMachOSymbols(reader);
break;
}
// Filter to only function symbols
return symbols
.Where(s => s.Type == SymbolType.Function && s.Address != 0)
.OrderBy(s => s.Address)
.ToList();
}
private async Task<List<SymbolEntry>> ExtractElfSymbolsAsync(BinaryReader reader, CancellationToken ct)
{
var symbols = new List<SymbolEntry>();
reader.BaseStream.Seek(0, SeekOrigin.Begin);
var ident = reader.ReadBytes(16);
if (ident[0] != 0x7F || ident[1] != 'E' || ident[2] != 'L' || ident[3] != 'F')
return symbols;
var is64Bit = ident[4] == 2;
// Read section headers to find symbol tables
reader.BaseStream.Seek(is64Bit ? 40 : 32, SeekOrigin.Begin);
var sectionHeaderOffset = is64Bit ? reader.ReadInt64() : reader.ReadInt32();
reader.BaseStream.Seek(is64Bit ? 58 : 46, SeekOrigin.Begin);
var sectionHeaderSize = reader.ReadUInt16();
var sectionCount = reader.ReadUInt16();
var strTabIndex = reader.ReadUInt16();
// Find .symtab and .dynsym sections
for (int i = 0; i < sectionCount; i++)
{
reader.BaseStream.Seek(sectionHeaderOffset + i * sectionHeaderSize, SeekOrigin.Begin);
var nameIdx = reader.ReadUInt32();
var type = reader.ReadUInt32();
// SHT_SYMTAB = 2, SHT_DYNSYM = 11
if (type == 2 || type == 11)
{
reader.BaseStream.Seek(sectionHeaderOffset + i * sectionHeaderSize + (is64Bit ? 24 : 16), SeekOrigin.Begin);
var offset = is64Bit ? reader.ReadInt64() : reader.ReadInt32();
var size = is64Bit ? reader.ReadInt64() : reader.ReadInt32();
reader.BaseStream.Seek(sectionHeaderOffset + i * sectionHeaderSize + (is64Bit ? 40 : 24), SeekOrigin.Begin);
var link = reader.ReadUInt32(); // String table section index
var entSize = is64Bit ? reader.ReadInt64() : reader.ReadInt32();
// Read string table
reader.BaseStream.Seek(sectionHeaderOffset + (int)link * sectionHeaderSize + (is64Bit ? 24 : 16), SeekOrigin.Begin);
var strOffset = is64Bit ? reader.ReadInt64() : reader.ReadInt32();
var strSize = is64Bit ? reader.ReadInt64() : reader.ReadInt32();
reader.BaseStream.Seek(strOffset, SeekOrigin.Begin);
var strTab = reader.ReadBytes((int)strSize);
// Read symbols
var entrySize = is64Bit ? 24 : 16;
var count = size / entrySize;
for (long j = 0; j < count; j++)
{
reader.BaseStream.Seek(offset + j * entrySize, SeekOrigin.Begin);
var stName = reader.ReadUInt32();
var stInfo = is64Bit ? reader.ReadByte() : reader.ReadByte();
var stOther = is64Bit ? reader.ReadByte() : reader.ReadByte();
var stShndx = is64Bit ? reader.ReadUInt16() : reader.ReadUInt16();
long stValue, stSize;
if (is64Bit)
{
reader.BaseStream.Seek(offset + j * entrySize + 8, SeekOrigin.Begin);
stValue = reader.ReadInt64();
stSize = reader.ReadInt64();
}
else
{
reader.BaseStream.Seek(offset + j * entrySize + 4, SeekOrigin.Begin);
stValue = reader.ReadInt32();
stSize = reader.ReadInt32();
}
// STT_FUNC = 2
var stType = stInfo & 0x0f;
if (stType == 2 && stValue != 0)
{
var name = ReadNullTerminatedString(strTab, (int)stName);
if (!string.IsNullOrEmpty(name))
{
symbols.Add(new SymbolEntry
{
Name = DemangleSymbol(name),
MangledName = name,
Address = stValue,
Size = stSize,
Type = SymbolType.Function
});
}
}
}
}
}
return symbols;
}
private List<SymbolEntry> ExtractPeSymbols(BinaryReader reader)
{
// PE symbol extraction - simplified implementation
// Full implementation would parse COFF symbol table or PDB
return [];
}
private List<SymbolEntry> ExtractMachOSymbols(BinaryReader reader)
{
// Mach-O symbol extraction - simplified implementation
// Full implementation would parse LC_SYMTAB load command
return [];
}
/// <summary>
/// Detects functions by scanning for prolog/epilog patterns.
/// </summary>
private List<DetectedFunction> DetectByPrologEpilog(
BinaryTextSection textSection,
BinaryArchitecture architecture)
{
var functions = new List<DetectedFunction>();
var prologs = architecture switch
{
BinaryArchitecture.X86_64 or BinaryArchitecture.X86 => X86_64Prologs,
BinaryArchitecture.Arm64 or BinaryArchitecture.Arm => Arm64Prologs,
_ => X86_64Prologs
};
var epilogs = architecture switch
{
BinaryArchitecture.X86_64 or BinaryArchitecture.X86 => X86_64Epilogs,
BinaryArchitecture.Arm64 or BinaryArchitecture.Arm => Arm64Epilogs,
_ => X86_64Epilogs
};
var data = textSection.Data;
var baseAddr = textSection.VirtualAddress;
// Scan for prologs
var prologOffsets = new List<long>();
for (int i = 0; i < data.Length - 4; i++)
{
foreach (var prolog in prologs)
{
if (i + prolog.Length <= data.Length && MatchesPattern(data, i, prolog))
{
prologOffsets.Add(i);
break;
}
}
}
// For each prolog, find the next epilog to determine function end
for (int p = 0; p < prologOffsets.Count; p++)
{
var start = prologOffsets[p];
var maxEnd = p + 1 < prologOffsets.Count
? prologOffsets[p + 1]
: data.Length;
// Find epilog within range
long end = maxEnd;
for (long i = start + 4; i < maxEnd - 1; i++)
{
foreach (var epilog in epilogs)
{
if (i + epilog.Length <= data.Length && MatchesPattern(data, (int)i, epilog))
{
end = i + epilog.Length;
goto foundEpilog;
}
}
}
foundEpilog:
functions.Add(new DetectedFunction
{
Symbol = $"sub_{baseAddr + start:x}",
StartAddress = baseAddr + start,
EndAddress = baseAddr + end,
Confidence = _options.HeuristicConfidence,
DetectionMethod = FunctionDetectionMethod.Heuristic
});
}
return functions;
}
private static bool MatchesPattern(byte[] data, int offset, byte[] pattern)
{
for (int i = 0; i < pattern.Length; i++)
{
if (data[offset + i] != pattern[i])
return false;
}
return true;
}
/// <summary>
/// Infers function sizes from gaps between symbols.
/// </summary>
private void InferFunctionSizes(List<DetectedFunction> functions)
{
if (functions.Count < 2) return;
var sorted = functions.OrderBy(f => f.StartAddress).ToList();
for (int i = 0; i < sorted.Count - 1; i++)
{
if (sorted[i].EndAddress == sorted[i].StartAddress)
{
// Function has no size, infer from next function
sorted[i] = sorted[i] with
{
EndAddress = sorted[i + 1].StartAddress,
Confidence = sorted[i].Confidence * _options.InferredSizePenalty // Reduce confidence for inferred size
};
}
}
}
private static string ReadNullTerminatedString(byte[] data, int offset)
{
if (offset < 0 || offset >= data.Length)
return string.Empty;
var end = offset;
while (end < data.Length && data[end] != 0)
end++;
return System.Text.Encoding.UTF8.GetString(data, offset, end - offset);
}
private static string DemangleSymbol(string name)
{
// Basic C++ demangling - production would use a proper demangler
if (name.StartsWith("_Z"))
{
// This is a mangled C++ name
// Full implementation would use c++filt or similar
return name;
}
return name;
}
}
/// <summary>
/// Detected function boundary.
/// </summary>
public sealed record DetectedFunction
{
public required string Symbol { get; init; }
public string? MangledName { get; init; }
public required long StartAddress { get; init; }
public required long EndAddress { get; init; }
public required double Confidence { get; init; }
public required FunctionDetectionMethod DetectionMethod { get; init; }
public string? SourceFile { get; init; }
public int? SourceLine { get; init; }
}
/// <summary>
/// Method used to detect function boundaries.
/// </summary>
public enum FunctionDetectionMethod
{
Dwarf,
SymbolTable,
Heuristic
}
/// <summary>
/// Symbol table entry.
/// </summary>
internal record SymbolEntry
{
public required string Name { get; init; }
public string? MangledName { get; init; }
public required long Address { get; init; }
public required long Size { get; init; }
public required SymbolType Type { get; init; }
}
/// <summary>
/// Symbol type.
/// </summary>
internal enum SymbolType
{
Function,
Object,
Other
}
/// <summary>
/// Binary architecture.
/// </summary>
public enum BinaryArchitecture
{
Unknown,
X86,
X86_64,
Arm,
Arm64,
Riscv64
}
/// <summary>
/// Binary format.
/// </summary>
public enum BinaryFormat
{
Elf,
Pe,
MachO
}
/// <summary>
/// Binary .text section data.
/// </summary>
public sealed record BinaryTextSection
{
public required byte[] Data { get; init; }
public required long VirtualAddress { get; init; }
public required BinaryArchitecture Architecture { get; init; }
}
/// <summary>
/// Reader for binary .text sections.
/// </summary>
public static class BinaryTextSectionReader
{
public static async Task<BinaryTextSection?> TryReadAsync(
string path,
BinaryFormat format,
CancellationToken ct)
{
// Simplified implementation - would parse ELF/PE/Mach-O headers
// to locate .text section
await Task.CompletedTask;
return null;
}
}

View File

@@ -0,0 +1,104 @@
// -----------------------------------------------------------------------------
// CallGraphExtractorRegistry.cs
// Sprint: SPRINT_20251226_005_SCANNER_reachability_extractors (REACH-REG-01)
// Description: Registry implementation for language-specific call graph extractors.
// -----------------------------------------------------------------------------
using System.Collections.Frozen;
using Microsoft.Extensions.Logging;
namespace StellaOps.Scanner.CallGraph;
/// <summary>
/// Registry implementation for language-specific call graph extractors.
/// Provides deterministic ordering and language-based lookup.
/// </summary>
/// <remarks>
/// Supported languages (alphabetical order for determinism):
/// - dotnet: .NET/C# via Roslyn semantic analysis
/// - go: Go via SSA-based analysis (external tool or static fallback)
/// - java: Java via ASM bytecode parsing
/// - node: Node.js/JavaScript via Babel AST
/// - python: Python via AST analysis
/// </remarks>
public sealed class CallGraphExtractorRegistry : ICallGraphExtractorRegistry
{
private readonly FrozenDictionary<string, ICallGraphExtractor> _extractorsByLanguage;
private readonly IReadOnlyList<ICallGraphExtractor> _extractors;
private readonly IReadOnlyList<string> _supportedLanguages;
private readonly ILogger<CallGraphExtractorRegistry>? _logger;
/// <summary>
/// Creates a new registry from the provided extractors.
/// </summary>
/// <param name="extractors">The extractors to register.</param>
/// <param name="logger">Optional logger for diagnostics.</param>
public CallGraphExtractorRegistry(
IEnumerable<ICallGraphExtractor> extractors,
ILogger<CallGraphExtractorRegistry>? logger = null)
{
ArgumentNullException.ThrowIfNull(extractors);
_logger = logger;
var extractorList = extractors.ToList();
// Build lookup dictionary (case-insensitive language matching)
var dict = new Dictionary<string, ICallGraphExtractor>(StringComparer.OrdinalIgnoreCase);
foreach (var extractor in extractorList)
{
if (!dict.TryAdd(extractor.Language, extractor))
{
_logger?.LogWarning(
"Duplicate extractor registration for language '{Language}'; keeping first registration",
extractor.Language);
}
}
_extractorsByLanguage = dict.ToFrozenDictionary(StringComparer.OrdinalIgnoreCase);
// Order extractors deterministically by language
_extractors = extractorList
.OrderBy(e => e.Language, StringComparer.OrdinalIgnoreCase)
.ToList()
.AsReadOnly();
_supportedLanguages = _extractorsByLanguage.Keys
.OrderBy(k => k, StringComparer.OrdinalIgnoreCase)
.ToList()
.AsReadOnly();
_logger?.LogInformation(
"CallGraphExtractorRegistry initialized with {Count} extractors: [{Languages}]",
_supportedLanguages.Count,
string.Join(", ", _supportedLanguages));
}
/// <inheritdoc />
public IReadOnlyList<ICallGraphExtractor> Extractors => _extractors;
/// <inheritdoc />
public IReadOnlyList<string> SupportedLanguages => _supportedLanguages;
/// <inheritdoc />
public ICallGraphExtractor? GetExtractor(string language)
{
if (string.IsNullOrWhiteSpace(language))
{
return null;
}
_extractorsByLanguage.TryGetValue(language, out var extractor);
return extractor;
}
/// <inheritdoc />
public bool IsLanguageSupported(string language)
{
if (string.IsNullOrWhiteSpace(language))
{
return false;
}
return _extractorsByLanguage.ContainsKey(language);
}
}

View File

@@ -0,0 +1,38 @@
// -----------------------------------------------------------------------------
// ICallGraphExtractorRegistry.cs
// Sprint: SPRINT_20251226_005_SCANNER_reachability_extractors (REACH-REG-01)
// Description: Registry interface for language-specific call graph extractors.
// -----------------------------------------------------------------------------
namespace StellaOps.Scanner.CallGraph;
/// <summary>
/// Registry for language-specific call graph extractors.
/// Provides lookup by language identifier and enumeration of supported languages.
/// </summary>
public interface ICallGraphExtractorRegistry
{
/// <summary>
/// Gets all registered extractors.
/// </summary>
IReadOnlyList<ICallGraphExtractor> Extractors { get; }
/// <summary>
/// Gets the supported language identifiers.
/// </summary>
IReadOnlyList<string> SupportedLanguages { get; }
/// <summary>
/// Gets an extractor for the specified language.
/// </summary>
/// <param name="language">The language identifier (e.g., "java", "node", "python", "go", "dotnet").</param>
/// <returns>The extractor for the language, or null if not supported.</returns>
ICallGraphExtractor? GetExtractor(string language);
/// <summary>
/// Checks if the specified language is supported.
/// </summary>
/// <param name="language">The language identifier.</param>
/// <returns>True if the language has a registered extractor.</returns>
bool IsLanguageSupported(string language);
}

View File

@@ -26,6 +26,7 @@
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\\StellaOps.Scanner.Evidence\\StellaOps.Scanner.Evidence.csproj" />
<ProjectReference Include="..\\StellaOps.Scanner.Reachability\\StellaOps.Scanner.Reachability.csproj" />
</ItemGroup>
</Project>