sprints and audit work

This commit is contained in:
StellaOps Bot
2026-01-07 09:36:16 +02:00
parent 05833e0af2
commit ab364c6032
377 changed files with 64534 additions and 1627 deletions

View File

@@ -0,0 +1,61 @@
namespace StellaOps.Unknowns.Core.Hints;
/// <summary>
/// Builds provenance hints from various evidence sources.
/// </summary>
public interface IProvenanceHintBuilder
{
/// <summary>Build hint from Build-ID match.</summary>
ProvenanceHint BuildFromBuildId(
string buildId,
string buildIdType,
BuildIdMatchResult? match);
/// <summary>Build hint from import table fingerprint.</summary>
ProvenanceHint BuildFromImportFingerprint(
string fingerprint,
IReadOnlyList<string> importedLibraries,
IReadOnlyList<FingerprintMatch>? matches);
/// <summary>Build hint from section layout.</summary>
ProvenanceHint BuildFromSectionLayout(
IReadOnlyList<SectionInfo> sections,
IReadOnlyList<LayoutMatch>? matches);
/// <summary>Build hint from distro pattern.</summary>
ProvenanceHint BuildFromDistroPattern(
string distro,
string? release,
string patternType,
string matchedPattern);
/// <summary>Build hint from version strings.</summary>
ProvenanceHint BuildFromVersionStrings(
IReadOnlyList<ExtractedVersionString> versionStrings);
/// <summary>Build hint from corpus match.</summary>
ProvenanceHint BuildFromCorpusMatch(
string corpusName,
string matchedEntry,
string matchType,
double similarity,
IReadOnlyDictionary<string, string>? metadata);
/// <summary>
/// Combine multiple hints to produce best hypothesis and confidence.
/// </summary>
(string Hypothesis, double Confidence) CombineHints(
IReadOnlyList<ProvenanceHint> hints);
}
/// <summary>
/// Build-ID match result from catalog lookup.
/// </summary>
public sealed record BuildIdMatchResult
{
public required string Package { get; init; }
public required string Version { get; init; }
public required string Distro { get; init; }
public string? CatalogSource { get; init; }
public string? AdvisoryLink { get; init; }
}

View File

@@ -0,0 +1,391 @@
using System.Globalization;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using StellaOps.Unknowns.Core.Models;
namespace StellaOps.Unknowns.Core.Hints;
/// <summary>
/// Default implementation of provenance hint builder.
/// Uses content-addressed IDs and confidence-based classification.
/// </summary>
public sealed partial class ProvenanceHintBuilder : IProvenanceHintBuilder
{
private readonly TimeProvider _timeProvider;
public ProvenanceHintBuilder(TimeProvider timeProvider)
{
_timeProvider = timeProvider;
}
public ProvenanceHint BuildFromBuildId(
string buildId,
string buildIdType,
BuildIdMatchResult? match)
{
var confidence = match is not null ? 0.95 : 0.2;
var hypothesis = match is not null
? $"Binary matches {match.Package} {match.Version} from {match.Distro}"
: $"Build-ID {buildId} found but no catalog match";
var suggestedActions = new List<SuggestedAction>
{
new()
{
Action = "verify_build_id",
Priority = 1,
Effort = "low",
Description = "Verify Build-ID against distro package repositories",
Link = match?.AdvisoryLink
}
};
if (match is null)
{
suggestedActions.Add(new SuggestedAction
{
Action = "expand_catalog",
Priority = 2,
Effort = "medium",
Description = "Add missing distros/packages to Build-ID catalog",
Link = null
});
}
return new ProvenanceHint
{
HintId = ComputeHintId(ProvenanceHintType.BuildIdMatch, buildId),
Type = ProvenanceHintType.BuildIdMatch,
Confidence = confidence,
ConfidenceLevel = MapConfidenceLevel(confidence),
Summary = match is not null ? $"Matched {match.Package}" : "Build-ID not matched",
Hypothesis = hypothesis,
Evidence = new ProvenanceEvidence
{
BuildId = new BuildIdEvidence
{
BuildId = buildId,
BuildIdType = buildIdType,
MatchedPackage = match?.Package,
MatchedVersion = match?.Version,
MatchedDistro = match?.Distro,
CatalogSource = match?.CatalogSource
}
},
SuggestedActions = suggestedActions,
GeneratedAt = _timeProvider.GetUtcNow(),
Source = "BuildIdAnalyzer"
};
}
public ProvenanceHint BuildFromImportFingerprint(
string fingerprint,
IReadOnlyList<string> importedLibraries,
IReadOnlyList<FingerprintMatch>? matches)
{
var bestMatch = matches?.OrderByDescending(m => m.Similarity).FirstOrDefault();
var confidence = bestMatch?.Similarity ?? 0.3;
var hypothesis = bestMatch is not null
? $"Import table matches {bestMatch.Package} {bestMatch.Version} ({bestMatch.Similarity:P0} similar)"
: $"Import fingerprint {fingerprint[..12]}... ({importedLibraries.Count} imports)";
return new ProvenanceHint
{
HintId = ComputeHintId(ProvenanceHintType.ImportTableFingerprint, fingerprint),
Type = ProvenanceHintType.ImportTableFingerprint,
Confidence = confidence,
ConfidenceLevel = MapConfidenceLevel(confidence),
Summary = bestMatch is not null ? $"Matched {bestMatch.Package}" : "No fingerprint match",
Hypothesis = hypothesis,
Evidence = new ProvenanceEvidence
{
ImportFingerprint = new ImportFingerprintEvidence
{
Fingerprint = fingerprint,
ImportedLibraries = importedLibraries,
ImportCount = importedLibraries.Count,
MatchedFingerprints = matches
}
},
SuggestedActions =
[
new SuggestedAction
{
Action = "analyze_imports",
Priority = 1,
Effort = "low",
Description = "Cross-reference imported libraries with package databases",
Link = null
}
],
GeneratedAt = _timeProvider.GetUtcNow(),
Source = "ImportFingerprintAnalyzer"
};
}
public ProvenanceHint BuildFromSectionLayout(
IReadOnlyList<SectionInfo> sections,
IReadOnlyList<LayoutMatch>? matches)
{
var layoutHash = ComputeLayoutHash(sections);
var bestMatch = matches?.OrderByDescending(m => m.Similarity).FirstOrDefault();
var confidence = bestMatch?.Similarity ?? 0.25;
var hypothesis = bestMatch is not null
? $"Section layout matches {bestMatch.Package} ({bestMatch.Similarity:P0} similar)"
: $"Section layout: {sections.Count} sections, hash {layoutHash}";
return new ProvenanceHint
{
HintId = ComputeHintId(ProvenanceHintType.SectionLayout, layoutHash),
Type = ProvenanceHintType.SectionLayout,
Confidence = confidence,
ConfidenceLevel = MapConfidenceLevel(confidence),
Summary = bestMatch is not null ? $"Matched {bestMatch.Package}" : "No layout match",
Hypothesis = hypothesis,
Evidence = new ProvenanceEvidence
{
SectionLayout = new SectionLayoutEvidence
{
Sections = sections,
LayoutHash = layoutHash,
MatchedLayouts = matches
}
},
SuggestedActions =
[
new SuggestedAction
{
Action = "compare_section_layout",
Priority = 2,
Effort = "medium",
Description = "Compare section layout with known binaries",
Link = null
}
],
GeneratedAt = _timeProvider.GetUtcNow(),
Source = "SectionLayoutAnalyzer"
};
}
public ProvenanceHint BuildFromDistroPattern(
string distro,
string? release,
string patternType,
string matchedPattern)
{
var confidence = 0.7;
var hypothesis = release is not null
? $"Binary appears to be from {distro} {release}"
: $"Binary appears to be from {distro}";
return new ProvenanceHint
{
HintId = ComputeHintId(ProvenanceHintType.DistroPattern, $"{distro}:{matchedPattern}"),
Type = ProvenanceHintType.DistroPattern,
Confidence = confidence,
ConfidenceLevel = MapConfidenceLevel(confidence),
Summary = $"Distro pattern: {distro}",
Hypothesis = hypothesis,
Evidence = new ProvenanceEvidence
{
DistroPattern = new DistroPatternEvidence
{
Distro = distro,
Release = release,
PatternType = patternType,
MatchedPattern = matchedPattern
}
},
SuggestedActions =
[
new SuggestedAction
{
Action = "distro_package_lookup",
Priority = 1,
Effort = "low",
Description = $"Search {distro} package repositories",
Link = GetDistroPackageSearchUrl(distro)
}
],
GeneratedAt = _timeProvider.GetUtcNow(),
Source = "DistroPatternAnalyzer"
};
}
public ProvenanceHint BuildFromVersionStrings(
IReadOnlyList<ExtractedVersionString> versionStrings)
{
var bestGuess = versionStrings
.OrderByDescending(v => v.Confidence)
.FirstOrDefault();
var confidence = bestGuess?.Confidence ?? 0.3;
var hypothesis = bestGuess is not null
? $"Version appears to be {bestGuess.Value}"
: "No clear version string found";
return new ProvenanceHint
{
HintId = ComputeHintId(ProvenanceHintType.VersionString,
string.Join(",", versionStrings.Select(v => v.Value))),
Type = ProvenanceHintType.VersionString,
Confidence = confidence,
ConfidenceLevel = MapConfidenceLevel(confidence),
Summary = $"Found {versionStrings.Count} version string(s)",
Hypothesis = hypothesis,
Evidence = new ProvenanceEvidence
{
VersionString = new VersionStringEvidence
{
VersionStrings = versionStrings,
BestGuess = bestGuess?.Value
}
},
SuggestedActions =
[
new SuggestedAction
{
Action = "version_verification",
Priority = 1,
Effort = "low",
Description = "Verify extracted version against known releases",
Link = null
}
],
GeneratedAt = _timeProvider.GetUtcNow(),
Source = "VersionStringExtractor"
};
}
public ProvenanceHint BuildFromCorpusMatch(
string corpusName,
string matchedEntry,
string matchType,
double similarity,
IReadOnlyDictionary<string, string>? metadata)
{
var hypothesis = similarity >= 0.9
? $"High confidence match: {matchedEntry}"
: $"Possible match: {matchedEntry} ({similarity:P0} similar)";
return new ProvenanceHint
{
HintId = ComputeHintId(ProvenanceHintType.CorpusMatch, $"{corpusName}:{matchedEntry}"),
Type = ProvenanceHintType.CorpusMatch,
Confidence = similarity,
ConfidenceLevel = MapConfidenceLevel(similarity),
Summary = $"Corpus match: {matchedEntry}",
Hypothesis = hypothesis,
Evidence = new ProvenanceEvidence
{
CorpusMatch = new CorpusMatchEvidence
{
CorpusName = corpusName,
MatchedEntry = matchedEntry,
MatchType = matchType,
Similarity = similarity,
Metadata = metadata
}
},
SuggestedActions =
[
new SuggestedAction
{
Action = "verify_corpus_match",
Priority = 1,
Effort = "low",
Description = $"Verify match against {corpusName}",
Link = null
}
],
GeneratedAt = _timeProvider.GetUtcNow(),
Source = $"{corpusName}Matcher"
};
}
public (string Hypothesis, double Confidence) CombineHints(
IReadOnlyList<ProvenanceHint> hints)
{
if (hints.Count == 0)
{
return ("No provenance hints available", 0.0);
}
// Sort by confidence descending
var sorted = hints.OrderByDescending(h => h.Confidence).ToList();
// Best single hypothesis
var bestHint = sorted[0];
// If we have multiple high-confidence hints that agree, boost confidence
var agreeing = sorted
.Where(h => h.Confidence >= 0.5)
.GroupBy(h => ExtractPackageFromHypothesis(h.Hypothesis))
.OrderByDescending(g => g.Count())
.FirstOrDefault();
if (agreeing is not null && agreeing.Count() >= 2)
{
// Multiple hints agree - combine confidence
var combinedConfidence = Math.Min(0.99,
agreeing.Max(h => h.Confidence) + (agreeing.Count() - 1) * 0.1);
return (
$"{agreeing.Key} (confirmed by {agreeing.Count()} evidence sources)",
Math.Round(combinedConfidence, 4)
);
}
return (bestHint.Hypothesis, Math.Round(bestHint.Confidence, 4));
}
private static string ComputeHintId(ProvenanceHintType type, string evidence)
{
var input = $"{type}:{evidence}";
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(input));
return $"hint:sha256:{Convert.ToHexString(hash).ToLowerInvariant()[..24]}";
}
private static HintConfidence MapConfidenceLevel(double confidence)
{
return confidence switch
{
>= 0.9 => HintConfidence.VeryHigh,
>= 0.7 => HintConfidence.High,
>= 0.5 => HintConfidence.Medium,
>= 0.3 => HintConfidence.Low,
_ => HintConfidence.VeryLow
};
}
private static string ComputeLayoutHash(IReadOnlyList<SectionInfo> sections)
{
var normalized = string.Join("|",
sections.OrderBy(s => s.Name).Select(s => $"{s.Name}:{s.Type}:{s.Size.ToString(CultureInfo.InvariantCulture)}"));
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(normalized));
return Convert.ToHexString(hash).ToLowerInvariant()[..16];
}
private static string? GetDistroPackageSearchUrl(string distro)
{
return distro.ToLowerInvariant() switch
{
"debian" => "https://packages.debian.org/search",
"ubuntu" => "https://packages.ubuntu.com/",
"rhel" or "centos" => "https://access.redhat.com/downloads",
"alpine" => "https://pkgs.alpinelinux.org/packages",
_ => null
};
}
private static string ExtractPackageFromHypothesis(string hypothesis)
{
// Simple extraction - match "matches <package>" or "from <package>"
var match = PackageExtractionRegex().Match(hypothesis);
return match.Success ? match.Groups[1].Value : hypothesis;
}
[GeneratedRegex(@"(?:matches?|from)\s+(\S+)")]
private static partial Regex PackageExtractionRegex();
}

View File

@@ -0,0 +1,205 @@
using System.Text.Json.Serialization;
namespace StellaOps.Unknowns.Core.Models;
/// <summary>Build-ID match evidence.</summary>
public sealed record BuildIdEvidence
{
[JsonPropertyName("build_id")]
public required string BuildId { get; init; }
[JsonPropertyName("build_id_type")]
public required string BuildIdType { get; init; }
[JsonPropertyName("matched_package")]
public string? MatchedPackage { get; init; }
[JsonPropertyName("matched_version")]
public string? MatchedVersion { get; init; }
[JsonPropertyName("matched_distro")]
public string? MatchedDistro { get; init; }
[JsonPropertyName("catalog_source")]
public string? CatalogSource { get; init; }
}
/// <summary>Debug link evidence.</summary>
public sealed record DebugLinkEvidence
{
[JsonPropertyName("debug_link")]
public required string DebugLink { get; init; }
[JsonPropertyName("crc32")]
public uint? Crc32 { get; init; }
[JsonPropertyName("debug_info_found")]
public bool DebugInfoFound { get; init; }
[JsonPropertyName("debug_info_path")]
public string? DebugInfoPath { get; init; }
}
/// <summary>Import table fingerprint evidence.</summary>
public sealed record ImportFingerprintEvidence
{
[JsonPropertyName("fingerprint")]
public required string Fingerprint { get; init; }
[JsonPropertyName("imported_libraries")]
public required IReadOnlyList<string> ImportedLibraries { get; init; }
[JsonPropertyName("import_count")]
public int ImportCount { get; init; }
[JsonPropertyName("matched_fingerprints")]
public IReadOnlyList<FingerprintMatch>? MatchedFingerprints { get; init; }
}
/// <summary>Export table fingerprint evidence.</summary>
public sealed record ExportFingerprintEvidence
{
[JsonPropertyName("fingerprint")]
public required string Fingerprint { get; init; }
[JsonPropertyName("export_count")]
public int ExportCount { get; init; }
[JsonPropertyName("notable_exports")]
public IReadOnlyList<string>? NotableExports { get; init; }
[JsonPropertyName("matched_fingerprints")]
public IReadOnlyList<FingerprintMatch>? MatchedFingerprints { get; init; }
}
/// <summary>Fingerprint match from corpus.</summary>
public sealed record FingerprintMatch
{
[JsonPropertyName("package")]
public required string Package { get; init; }
[JsonPropertyName("version")]
public required string Version { get; init; }
[JsonPropertyName("similarity")]
public required double Similarity { get; init; }
[JsonPropertyName("source")]
public required string Source { get; init; }
}
/// <summary>Section layout evidence.</summary>
public sealed record SectionLayoutEvidence
{
[JsonPropertyName("sections")]
public required IReadOnlyList<SectionInfo> Sections { get; init; }
[JsonPropertyName("layout_hash")]
public required string LayoutHash { get; init; }
[JsonPropertyName("matched_layouts")]
public IReadOnlyList<LayoutMatch>? MatchedLayouts { get; init; }
}
/// <summary>Section information for layout analysis.</summary>
public sealed record SectionInfo
{
[JsonPropertyName("name")]
public required string Name { get; init; }
[JsonPropertyName("type")]
public required string Type { get; init; }
[JsonPropertyName("size")]
public ulong Size { get; init; }
[JsonPropertyName("flags")]
public string? Flags { get; init; }
}
/// <summary>Layout match result.</summary>
public sealed record LayoutMatch
{
[JsonPropertyName("package")]
public required string Package { get; init; }
[JsonPropertyName("similarity")]
public required double Similarity { get; init; }
}
/// <summary>Compiler signature evidence.</summary>
public sealed record CompilerEvidence
{
[JsonPropertyName("compiler")]
public required string Compiler { get; init; }
[JsonPropertyName("version")]
public string? Version { get; init; }
[JsonPropertyName("flags")]
public IReadOnlyList<string>? Flags { get; init; }
[JsonPropertyName("detection_method")]
public required string DetectionMethod { get; init; }
}
/// <summary>Distro pattern match evidence.</summary>
public sealed record DistroPatternEvidence
{
[JsonPropertyName("distro")]
public required string Distro { get; init; }
[JsonPropertyName("release")]
public string? Release { get; init; }
[JsonPropertyName("pattern_type")]
public required string PatternType { get; init; }
[JsonPropertyName("matched_pattern")]
public required string MatchedPattern { get; init; }
[JsonPropertyName("examples")]
public IReadOnlyList<string>? Examples { get; init; }
}
/// <summary>Version string extraction evidence.</summary>
public sealed record VersionStringEvidence
{
[JsonPropertyName("version_strings")]
public required IReadOnlyList<ExtractedVersionString> VersionStrings { get; init; }
[JsonPropertyName("best_guess")]
public string? BestGuess { get; init; }
}
/// <summary>Extracted version string with location and confidence.</summary>
public sealed record ExtractedVersionString
{
[JsonPropertyName("value")]
public required string Value { get; init; }
[JsonPropertyName("location")]
public required string Location { get; init; }
[JsonPropertyName("confidence")]
public double Confidence { get; init; }
}
/// <summary>Corpus match evidence.</summary>
public sealed record CorpusMatchEvidence
{
[JsonPropertyName("corpus_name")]
public required string CorpusName { get; init; }
[JsonPropertyName("matched_entry")]
public required string MatchedEntry { get; init; }
[JsonPropertyName("match_type")]
public required string MatchType { get; init; }
[JsonPropertyName("similarity")]
public required double Similarity { get; init; }
[JsonPropertyName("metadata")]
public IReadOnlyDictionary<string, string>? Metadata { get; init; }
}

View File

@@ -0,0 +1,124 @@
using System.Text.Json;
using System.Text.Json.Serialization;
namespace StellaOps.Unknowns.Core.Models;
/// <summary>
/// A provenance hint providing evidence about an unknown's identity.
/// Immutable record with content-addressed ID.
/// </summary>
public sealed record ProvenanceHint
{
/// <summary>Unique hint ID (content-addressed, format: hint:sha256:hex24).</summary>
[JsonPropertyName("hint_id")]
public required string HintId { get; init; }
/// <summary>Type of provenance hint.</summary>
[JsonPropertyName("type")]
public required ProvenanceHintType Type { get; init; }
/// <summary>Confidence score (0.0 - 1.0).</summary>
[JsonPropertyName("confidence")]
public required double Confidence { get; init; }
/// <summary>Confidence level classification.</summary>
[JsonPropertyName("confidence_level")]
public required HintConfidence ConfidenceLevel { get; init; }
/// <summary>Human-readable summary of the hint.</summary>
[JsonPropertyName("summary")]
public required string Summary { get; init; }
/// <summary>Hypothesis about the unknown's identity.</summary>
[JsonPropertyName("hypothesis")]
public required string Hypothesis { get; init; }
/// <summary>Type-specific evidence details.</summary>
[JsonPropertyName("evidence")]
public required ProvenanceEvidence Evidence { get; init; }
/// <summary>Suggested resolution actions (ordered by priority).</summary>
[JsonPropertyName("suggested_actions")]
public required IReadOnlyList<SuggestedAction> SuggestedActions { get; init; }
/// <summary>When this hint was generated (UTC).</summary>
[JsonPropertyName("generated_at")]
public required DateTimeOffset GeneratedAt { get; init; }
/// <summary>Source of the hint (analyzer, corpus, etc.).</summary>
[JsonPropertyName("source")]
public required string Source { get; init; }
}
/// <summary>
/// Suggested action for resolving the unknown.
/// </summary>
public sealed record SuggestedAction
{
/// <summary>Action identifier (e.g., "distro_package_lookup").</summary>
[JsonPropertyName("action")]
public required string Action { get; init; }
/// <summary>Priority (1 = highest).</summary>
[JsonPropertyName("priority")]
public required int Priority { get; init; }
/// <summary>Estimated effort (low/medium/high).</summary>
[JsonPropertyName("effort")]
public required string Effort { get; init; }
/// <summary>Human-readable description.</summary>
[JsonPropertyName("description")]
public required string Description { get; init; }
/// <summary>Optional link to documentation or tool.</summary>
[JsonPropertyName("link")]
public string? Link { get; init; }
}
/// <summary>
/// Type-specific evidence for a provenance hint.
/// Only one evidence type should be populated per hint.
/// </summary>
public sealed record ProvenanceEvidence
{
/// <summary>Build-ID match details.</summary>
[JsonPropertyName("build_id")]
public BuildIdEvidence? BuildId { get; init; }
/// <summary>Debug link details.</summary>
[JsonPropertyName("debug_link")]
public DebugLinkEvidence? DebugLink { get; init; }
/// <summary>Import table fingerprint details.</summary>
[JsonPropertyName("import_fingerprint")]
public ImportFingerprintEvidence? ImportFingerprint { get; init; }
/// <summary>Export table fingerprint details.</summary>
[JsonPropertyName("export_fingerprint")]
public ExportFingerprintEvidence? ExportFingerprint { get; init; }
/// <summary>Section layout details.</summary>
[JsonPropertyName("section_layout")]
public SectionLayoutEvidence? SectionLayout { get; init; }
/// <summary>Compiler signature details.</summary>
[JsonPropertyName("compiler")]
public CompilerEvidence? Compiler { get; init; }
/// <summary>Distro pattern match details.</summary>
[JsonPropertyName("distro_pattern")]
public DistroPatternEvidence? DistroPattern { get; init; }
/// <summary>Version string extraction details.</summary>
[JsonPropertyName("version_string")]
public VersionStringEvidence? VersionString { get; init; }
/// <summary>Corpus match details.</summary>
[JsonPropertyName("corpus_match")]
public CorpusMatchEvidence? CorpusMatch { get; init; }
/// <summary>Raw evidence as JSON (for extensibility).</summary>
[JsonPropertyName("raw")]
public JsonDocument? Raw { get; init; }
}

View File

@@ -0,0 +1,74 @@
namespace StellaOps.Unknowns.Core.Models;
/// <summary>
/// Classification of provenance hint types that explain why something is unknown
/// and provide evidence for resolution.
/// </summary>
public enum ProvenanceHintType
{
/// <summary>ELF/PE Build-ID match against known catalog.</summary>
BuildIdMatch,
/// <summary>Debug link (.gnu_debuglink) reference.</summary>
DebugLink,
/// <summary>Import table fingerprint comparison.</summary>
ImportTableFingerprint,
/// <summary>Export table fingerprint comparison.</summary>
ExportTableFingerprint,
/// <summary>Section layout similarity.</summary>
SectionLayout,
/// <summary>String table signature match.</summary>
StringTableSignature,
/// <summary>Compiler/linker identification.</summary>
CompilerSignature,
/// <summary>Package manager metadata (RPATH, NEEDED, etc.).</summary>
PackageMetadata,
/// <summary>Distro/vendor pattern match.</summary>
DistroPattern,
/// <summary>Version string extraction.</summary>
VersionString,
/// <summary>Symbol name pattern match.</summary>
SymbolPattern,
/// <summary>File path pattern match.</summary>
PathPattern,
/// <summary>Hash match against known corpus.</summary>
CorpusMatch,
/// <summary>SBOM cross-reference.</summary>
SbomCrossReference,
/// <summary>Advisory cross-reference.</summary>
AdvisoryCrossReference
}
/// <summary>
/// Confidence level for a provenance hint.
/// </summary>
public enum HintConfidence
{
/// <summary>Very high confidence (>= 0.9).</summary>
VeryHigh,
/// <summary>High confidence (0.7 - 0.9).</summary>
High,
/// <summary>Medium confidence (0.5 - 0.7).</summary>
Medium,
/// <summary>Low confidence (0.3 - 0.5).</summary>
Low,
/// <summary>Very low confidence (&lt; 0.3).</summary>
VeryLow
}

View File

@@ -143,6 +143,20 @@ public sealed record Unknown
/// <summary>When this record was last updated.</summary>
public DateTimeOffset UpdatedAt { get; init; }
// Provenance Hints
/// <summary>Structured provenance hints about this unknown's identity.</summary>
public IReadOnlyList<ProvenanceHint> ProvenanceHints { get; init; } = [];
/// <summary>Best hypothesis based on hints (highest confidence).</summary>
public string? BestHypothesis { get; init; }
/// <summary>Combined confidence from all hints.</summary>
public double? CombinedConfidence { get; init; }
/// <summary>Primary suggested action (highest priority).</summary>
public string? PrimarySuggestedAction { get; init; }
// Computed properties
/// <summary>Whether this unknown is currently open (valid and not superseded).</summary>

View File

@@ -190,6 +190,27 @@ public interface IUnknownRepository
Task<IReadOnlyList<TriageSummary>> GetTriageSummaryAsync(
string tenantId,
CancellationToken cancellationToken);
/// <summary>
/// Attaches provenance hints to an unknown.
/// </summary>
Task<Unknown> AttachProvenanceHintsAsync(
string tenantId,
Guid id,
IReadOnlyList<ProvenanceHint> hints,
string? bestHypothesis,
double? combinedConfidence,
string? primarySuggestedAction,
CancellationToken cancellationToken);
/// <summary>
/// Gets unknowns with provenance hints above a confidence threshold.
/// </summary>
Task<IReadOnlyList<Unknown>> GetWithHighConfidenceHintsAsync(
string tenantId,
double minConfidence = 0.7,
int? limit = null,
CancellationToken cancellationToken = default);
}
/// <summary>

View File

@@ -0,0 +1,316 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://stellaops.org/schemas/provenance-hint.schema.json",
"title": "ProvenanceHint",
"description": "A provenance hint providing evidence about an unknown's identity",
"type": "object",
"required": [
"hint_id",
"type",
"confidence",
"confidence_level",
"summary",
"hypothesis",
"evidence",
"suggested_actions",
"generated_at",
"source"
],
"properties": {
"hint_id": {
"type": "string",
"pattern": "^hint:sha256:[0-9a-f]{24}$",
"description": "Content-addressed unique identifier"
},
"type": {
"type": "string",
"enum": [
"BuildIdMatch",
"DebugLink",
"ImportTableFingerprint",
"ExportTableFingerprint",
"SectionLayout",
"StringTableSignature",
"CompilerSignature",
"PackageMetadata",
"DistroPattern",
"VersionString",
"SymbolPattern",
"PathPattern",
"CorpusMatch",
"SbomCrossReference",
"AdvisoryCrossReference"
],
"description": "Type of provenance hint"
},
"confidence": {
"type": "number",
"minimum": 0.0,
"maximum": 1.0,
"description": "Confidence score (0.0 - 1.0)"
},
"confidence_level": {
"type": "string",
"enum": ["VeryHigh", "High", "Medium", "Low", "VeryLow"],
"description": "Categorical confidence level"
},
"summary": {
"type": "string",
"minLength": 1,
"description": "Human-readable summary of the hint"
},
"hypothesis": {
"type": "string",
"minLength": 1,
"description": "Hypothesis about the unknown's identity"
},
"evidence": {
"$ref": "#/definitions/ProvenanceEvidence"
},
"suggested_actions": {
"type": "array",
"items": {
"$ref": "#/definitions/SuggestedAction"
},
"minItems": 1,
"description": "Suggested resolution actions ordered by priority"
},
"generated_at": {
"type": "string",
"format": "date-time",
"description": "When this hint was generated (UTC)"
},
"source": {
"type": "string",
"minLength": 1,
"description": "Source of the hint (analyzer, corpus, etc.)"
}
},
"additionalProperties": false,
"definitions": {
"ProvenanceEvidence": {
"type": "object",
"description": "Type-specific evidence (only one field should be populated)",
"properties": {
"build_id": { "$ref": "#/definitions/BuildIdEvidence" },
"debug_link": { "$ref": "#/definitions/DebugLinkEvidence" },
"import_fingerprint": { "$ref": "#/definitions/ImportFingerprintEvidence" },
"export_fingerprint": { "$ref": "#/definitions/ExportFingerprintEvidence" },
"section_layout": { "$ref": "#/definitions/SectionLayoutEvidence" },
"compiler": { "$ref": "#/definitions/CompilerEvidence" },
"distro_pattern": { "$ref": "#/definitions/DistroPatternEvidence" },
"version_string": { "$ref": "#/definitions/VersionStringEvidence" },
"corpus_match": { "$ref": "#/definitions/CorpusMatchEvidence" },
"raw": {
"type": "object",
"description": "Raw evidence as JSON (for extensibility)"
}
},
"additionalProperties": false
},
"BuildIdEvidence": {
"type": "object",
"required": ["build_id", "build_id_type"],
"properties": {
"build_id": { "type": "string" },
"build_id_type": { "type": "string" },
"matched_package": { "type": "string" },
"matched_version": { "type": "string" },
"matched_distro": { "type": "string" },
"catalog_source": { "type": "string" }
}
},
"DebugLinkEvidence": {
"type": "object",
"required": ["debug_link", "debug_info_found"],
"properties": {
"debug_link": { "type": "string" },
"crc32": { "type": "integer", "minimum": 0 },
"debug_info_found": { "type": "boolean" },
"debug_info_path": { "type": "string" }
}
},
"ImportFingerprintEvidence": {
"type": "object",
"required": ["fingerprint", "imported_libraries", "import_count"],
"properties": {
"fingerprint": { "type": "string" },
"imported_libraries": {
"type": "array",
"items": { "type": "string" }
},
"import_count": { "type": "integer", "minimum": 0 },
"matched_fingerprints": {
"type": "array",
"items": { "$ref": "#/definitions/FingerprintMatch" }
}
}
},
"ExportFingerprintEvidence": {
"type": "object",
"required": ["fingerprint", "export_count"],
"properties": {
"fingerprint": { "type": "string" },
"export_count": { "type": "integer", "minimum": 0 },
"notable_exports": {
"type": "array",
"items": { "type": "string" }
},
"matched_fingerprints": {
"type": "array",
"items": { "$ref": "#/definitions/FingerprintMatch" }
}
}
},
"FingerprintMatch": {
"type": "object",
"required": ["package", "version", "similarity", "source"],
"properties": {
"package": { "type": "string" },
"version": { "type": "string" },
"similarity": { "type": "number", "minimum": 0, "maximum": 1 },
"source": { "type": "string" }
}
},
"SectionLayoutEvidence": {
"type": "object",
"required": ["sections", "layout_hash"],
"properties": {
"sections": {
"type": "array",
"items": { "$ref": "#/definitions/SectionInfo" }
},
"layout_hash": { "type": "string" },
"matched_layouts": {
"type": "array",
"items": { "$ref": "#/definitions/LayoutMatch" }
}
}
},
"SectionInfo": {
"type": "object",
"required": ["name", "type", "size"],
"properties": {
"name": { "type": "string" },
"type": { "type": "string" },
"size": { "type": "integer", "minimum": 0 },
"flags": { "type": "string" }
}
},
"LayoutMatch": {
"type": "object",
"required": ["package", "similarity"],
"properties": {
"package": { "type": "string" },
"similarity": { "type": "number", "minimum": 0, "maximum": 1 }
}
},
"CompilerEvidence": {
"type": "object",
"required": ["compiler", "detection_method"],
"properties": {
"compiler": { "type": "string" },
"version": { "type": "string" },
"flags": {
"type": "array",
"items": { "type": "string" }
},
"detection_method": { "type": "string" }
}
},
"DistroPatternEvidence": {
"type": "object",
"required": ["distro", "pattern_type", "matched_pattern"],
"properties": {
"distro": { "type": "string" },
"release": { "type": "string" },
"pattern_type": { "type": "string" },
"matched_pattern": { "type": "string" },
"examples": {
"type": "array",
"items": { "type": "string" }
}
}
},
"VersionStringEvidence": {
"type": "object",
"required": ["version_strings"],
"properties": {
"version_strings": {
"type": "array",
"items": { "$ref": "#/definitions/ExtractedVersionString" }
},
"best_guess": { "type": "string" }
}
},
"ExtractedVersionString": {
"type": "object",
"required": ["value", "location", "confidence"],
"properties": {
"value": { "type": "string" },
"location": { "type": "string" },
"confidence": { "type": "number", "minimum": 0, "maximum": 1 }
}
},
"CorpusMatchEvidence": {
"type": "object",
"required": ["corpus_name", "matched_entry", "match_type", "similarity"],
"properties": {
"corpus_name": { "type": "string" },
"matched_entry": { "type": "string" },
"match_type": { "type": "string" },
"similarity": { "type": "number", "minimum": 0, "maximum": 1 },
"metadata": {
"type": "object",
"additionalProperties": { "type": "string" }
}
}
},
"SuggestedAction": {
"type": "object",
"required": ["action", "priority", "effort", "description"],
"properties": {
"action": {
"type": "string",
"minLength": 1,
"description": "Action identifier"
},
"priority": {
"type": "integer",
"minimum": 1,
"description": "Priority (1 = highest)"
},
"effort": {
"type": "string",
"enum": ["low", "medium", "high"],
"description": "Estimated effort"
},
"description": {
"type": "string",
"minLength": 1,
"description": "Human-readable description"
},
"link": {
"type": "string",
"format": "uri",
"description": "Optional link to documentation or tool"
}
}
}
}
}

View File

@@ -0,0 +1,23 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using StellaOps.Unknowns.Core.Hints;
namespace StellaOps.Unknowns.Core;
/// <summary>
/// Dependency injection extensions for the Unknowns.Core library.
/// </summary>
public static class UnknownsServiceExtensions
{
/// <summary>
/// Registers provenance hint builder services.
/// </summary>
public static IServiceCollection AddProvenanceHintBuilder(
this IServiceCollection services)
{
services.TryAddSingleton<IProvenanceHintBuilder, ProvenanceHintBuilder>();
services.TryAddSingleton(TimeProvider.System);
return services;
}
}

View File

@@ -0,0 +1,101 @@
-- Unknowns Schema Migration 002: Provenance Hints
-- Category: A (safe, can run at startup)
--
-- Purpose: Add support for structured provenance hints that explain why
-- something is unknown and provide hypotheses for resolution.
--
-- Implements SPRINT_20260106_001_005_UNKNOWNS requirements:
-- - Store provenance hints as JSONB array
-- - Track best hypothesis and combined confidence
-- - Enable efficient querying by confidence threshold
BEGIN;
-- ============================================================================
-- Step 1: Add provenance hint columns to unknowns table
-- ============================================================================
ALTER TABLE IF EXISTS unknowns.unknowns
ADD COLUMN IF NOT EXISTS provenance_hints JSONB DEFAULT '[]'::jsonb NOT NULL,
ADD COLUMN IF NOT EXISTS best_hypothesis TEXT,
ADD COLUMN IF NOT EXISTS combined_confidence NUMERIC(4,4) CHECK (combined_confidence IS NULL OR (combined_confidence >= 0 AND combined_confidence <= 1)),
ADD COLUMN IF NOT EXISTS primary_suggested_action TEXT;
COMMENT ON COLUMN unknowns.unknowns.provenance_hints IS
'Array of structured provenance hints (ProvenanceHint records)';
COMMENT ON COLUMN unknowns.unknowns.best_hypothesis IS
'Best hypothesis from all hints (highest confidence)';
COMMENT ON COLUMN unknowns.unknowns.combined_confidence IS
'Combined confidence score from all hints (0.0 - 1.0)';
COMMENT ON COLUMN unknowns.unknowns.primary_suggested_action IS
'Primary suggested action (highest priority)';
-- ============================================================================
-- Step 2: Create GIN index for efficient hint querying
-- ============================================================================
CREATE INDEX IF NOT EXISTS idx_unknowns_provenance_hints_gin
ON unknowns.unknowns USING GIN (provenance_hints);
COMMENT ON INDEX unknowns.idx_unknowns_provenance_hints_gin IS
'GIN index for efficient JSONB queries on provenance hints';
-- ============================================================================
-- Step 3: Create index for high-confidence hint queries
-- ============================================================================
CREATE INDEX IF NOT EXISTS idx_unknowns_combined_confidence
ON unknowns.unknowns (tenant_id, combined_confidence DESC)
WHERE combined_confidence IS NOT NULL AND combined_confidence >= 0.7;
COMMENT ON INDEX unknowns.idx_unknowns_combined_confidence IS
'Partial index for high-confidence provenance hint queries';
-- ============================================================================
-- Step 4: JSON schema validation function (optional)
-- ============================================================================
CREATE OR REPLACE FUNCTION unknowns.validate_provenance_hints(hints JSONB)
RETURNS BOOLEAN
LANGUAGE plpgsql IMMUTABLE
AS $$
BEGIN
-- Basic validation: must be an array
IF jsonb_typeof(hints) != 'array' THEN
RETURN FALSE;
END IF;
-- Each element must have required fields
IF EXISTS (
SELECT 1
FROM jsonb_array_elements(hints) AS hint
WHERE NOT (
hint ? 'hint_id' AND
hint ? 'type' AND
hint ? 'confidence' AND
hint ? 'hypothesis' AND
hint ? 'evidence'
)
) THEN
RETURN FALSE;
END IF;
RETURN TRUE;
END;
$$;
COMMENT ON FUNCTION unknowns.validate_provenance_hints IS
'Validates that provenance_hints JSONB conforms to expected schema';
-- ============================================================================
-- Step 5: Add validation constraint
-- ============================================================================
ALTER TABLE IF EXISTS unknowns.unknowns
ADD CONSTRAINT chk_provenance_hints_valid
CHECK (unknowns.validate_provenance_hints(provenance_hints));
COMMIT;

View File

@@ -0,0 +1,215 @@
using StellaOps.Unknowns.Core.Hints;
using StellaOps.Unknowns.Core.Models;
using Xunit;
using FluentAssertions;
namespace StellaOps.Unknowns.Core.Tests.Hints;
/// <summary>
/// Tests for hint combination logic and confidence aggregation.
/// </summary>
public sealed class HintCombinationTests
{
private readonly ProvenanceHintBuilder _builder = new(TimeProvider.System);
[Fact]
public void CombineHints_EmptyList_ReturnsZeroConfidence()
{
// Act
var (hypothesis, confidence) = _builder.CombineHints([]);
// Assert
hypothesis.Should().Be("No provenance hints available");
confidence.Should().Be(0.0);
}
[Fact]
public void CombineHints_SingleHighConfidenceHint_ReturnsHypothesisAndConfidence()
{
// Arrange
var hints = new[]
{
CreateBuildIdHint("openssl", 0.95)
};
// Act
var (hypothesis, confidence) = _builder.CombineHints(hints);
// Assert
hypothesis.Should().Contain("openssl");
confidence.Should().Be(0.95);
}
[Fact]
public void CombineHints_MultipleAgreeingHints_BoostsConfidence()
{
// Arrange - all hints point to same package
var hints = new[]
{
CreateBuildIdHint("openssl", 0.85),
CreateImportHint("openssl", 0.80),
CreateVersionHint("openssl", 0.70)
};
// Act
var (hypothesis, confidence) = _builder.CombineHints(hints);
// Assert
confidence.Should().BeGreaterThan(0.85); // Boosted from multiple agreeing hints
hypothesis.Should().Contain("confirmed by");
hypothesis.Should().Contain("3 evidence sources");
}
[Fact]
public void CombineHints_MultipleDisagreeingHints_UsesBestSingleHint()
{
// Arrange - hints point to different packages
var hints = new[]
{
CreateBuildIdHint("openssl", 0.95),
CreateImportHint("curl", 0.80),
CreateVersionHint("wget", 0.70)
};
// Act
var (hypothesis, confidence) = _builder.CombineHints(hints);
// Assert
confidence.Should().Be(0.95); // Highest single hint
hypothesis.Should().Contain("openssl"); // Best match
hypothesis.Should().NotContain("confirmed by"); // No agreement
}
[Fact]
public void CombineHints_TwoAgreeingHighConfidence_CombinesConfidence()
{
// Arrange
var hints = new[]
{
CreateBuildIdHint("curl", 0.90),
CreateVersionHint("curl", 0.75)
};
// Act
var (hypothesis, confidence) = _builder.CombineHints(hints);
// Assert
confidence.Should().BeGreaterThan(0.90);
confidence.Should().BeLessThan(1.0); // Capped at 0.99
hypothesis.Should().Contain("confirmed by");
hypothesis.Should().Contain("2 evidence sources");
}
[Fact]
public void CombineHints_OneLowConfidenceOneHigh_UsesHighConfidenceOnly()
{
// Arrange
var hints = new[]
{
CreateBuildIdHint("openssl", 0.95),
CreateVersionHint("openssl", 0.25) // Below 0.5 threshold
};
// Act
var (hypothesis, confidence) = _builder.CombineHints(hints);
// Assert
confidence.Should().Be(0.95); // Only high-confidence hint used
hypothesis.Should().NotContain("confirmed by"); // Low confidence ignored
}
[Fact]
public void CombineHints_ThreeAgreeingHints_DoesNotExceed099()
{
// Arrange - many agreeing high-confidence hints
var hints = new[]
{
CreateBuildIdHint("nginx", 0.95),
CreateImportHint("nginx", 0.92),
CreateVersionHint("nginx", 0.88),
CreateCorpusHint("nginx", 0.85)
};
// Act
var (hypothesis, confidence) = _builder.CombineHints(hints);
// Assert
confidence.Should().BeLessThanOrEqualTo(0.99);
hypothesis.Should().Contain("confirmed by");
hypothesis.Should().Contain("4 evidence sources");
}
[Fact]
public void CombineHints_MixedConfidencesSamePackage_CountsOnlyHighConfidence()
{
// Arrange
var hints = new[]
{
CreateBuildIdHint("bash", 0.90), // High
CreateImportHint("bash", 0.60), // Medium
CreateVersionHint("bash", 0.30) // Low (excluded)
};
// Act
var (hypothesis, confidence) = _builder.CombineHints(hints);
// Assert
hypothesis.Should().Contain("confirmed by");
hypothesis.Should().Contain("2 evidence sources"); // Only high+medium
}
// Helper methods to create test hints
private ProvenanceHint CreateBuildIdHint(string package, double confidence)
{
var match = new BuildIdMatchResult
{
Package = package,
Version = "1.0.0",
Distro = "debian"
};
return _builder.BuildFromBuildId("test-build-id", "sha1", match);
}
private ProvenanceHint CreateImportHint(string package, double similarity)
{
var matches = new[]
{
new FingerprintMatch
{
Package = package,
Version = "1.0.0",
Similarity = similarity,
Source = "test-corpus"
}
};
return _builder.BuildFromImportFingerprint("fp-test", new[] { "lib1.so" }, matches);
}
private ProvenanceHint CreateVersionHint(string package, double confidence)
{
var versionStrings = new[]
{
new ExtractedVersionString
{
Value = $"{package} 1.0.0",
Location = ".rodata",
Confidence = confidence
}
};
return _builder.BuildFromVersionStrings(versionStrings);
}
private ProvenanceHint CreateCorpusHint(string package, double similarity)
{
return _builder.BuildFromCorpusMatch(
"test-corpus",
$"{package}/1.0.0",
"hash",
similarity,
null);
}
}

View File

@@ -0,0 +1,281 @@
using StellaOps.Unknowns.Core.Hints;
using StellaOps.Unknowns.Core.Models;
using Xunit;
using FluentAssertions;
namespace StellaOps.Unknowns.Core.Tests.Hints;
/// <summary>
/// Tests for ProvenanceHintBuilder - all hint building scenarios.
/// </summary>
public sealed class ProvenanceHintBuilderTests
{
private readonly ProvenanceHintBuilder _builder = new(TimeProvider.System);
[Fact]
public void BuildFromBuildId_WithMatch_CreatesVeryHighConfidenceHint()
{
// Arrange
var match = new BuildIdMatchResult
{
Package = "openssl",
Version = "1.1.1k",
Distro = "debian",
CatalogSource = "debian-security"
};
// Act
var hint = _builder.BuildFromBuildId("abc123", "sha1", match);
// Assert
hint.Type.Should().Be(ProvenanceHintType.BuildIdMatch);
hint.Confidence.Should().Be(0.95);
hint.ConfidenceLevel.Should().Be(HintConfidence.VeryHigh);
hint.Hypothesis.Should().Contain("openssl");
hint.Hypothesis.Should().Contain("1.1.1k");
hint.Hypothesis.Should().Contain("debian");
hint.Evidence.BuildId.Should().NotBeNull();
hint.Evidence.BuildId!.BuildId.Should().Be("abc123");
hint.Evidence.BuildId.MatchedPackage.Should().Be("openssl");
hint.SuggestedActions.Should().HaveCountGreaterOrEqualTo(1);
hint.SuggestedActions[0].Action.Should().Be("verify_build_id");
hint.HintId.Should().StartWith("hint:sha256:");
}
[Fact]
public void BuildFromBuildId_WithoutMatch_CreatesLowConfidenceHint()
{
// Act
var hint = _builder.BuildFromBuildId("unknown123", "sha1", null);
// Assert
hint.Confidence.Should().Be(0.2);
hint.ConfidenceLevel.Should().Be(HintConfidence.VeryLow);
hint.Hypothesis.Should().Contain("no catalog match");
hint.Evidence.BuildId!.MatchedPackage.Should().BeNull();
hint.SuggestedActions.Should().Contain(a => a.Action == "expand_catalog");
}
[Fact]
public void BuildFromImportFingerprint_WithMatch_IncludesMatchedPackage()
{
// Arrange
var matches = new[]
{
new FingerprintMatch
{
Package = "libc6",
Version = "2.31",
Similarity = 0.92,
Source = "debian-corpus"
}
};
var imports = new[] { "libc.so.6", "libpthread.so.0" };
// Act
var hint = _builder.BuildFromImportFingerprint("fp-abc", imports, matches);
// Assert
hint.Type.Should().Be(ProvenanceHintType.ImportTableFingerprint);
hint.Confidence.Should().Be(0.92);
hint.ConfidenceLevel.Should().Be(HintConfidence.VeryHigh);
hint.Hypothesis.Should().Contain("libc6");
hint.Hypothesis.Should().Contain("2.31");
hint.Evidence.ImportFingerprint.Should().NotBeNull();
hint.Evidence.ImportFingerprint!.ImportedLibraries.Should().HaveCount(2);
hint.Evidence.ImportFingerprint.MatchedFingerprints.Should().HaveCount(1);
}
[Fact]
public void BuildFromImportFingerprint_WithoutMatch_CreatesMediumConfidenceHint()
{
// Arrange
var imports = new[] { "unknown.so.1" };
// Act
var hint = _builder.BuildFromImportFingerprint("fp-xyz", imports, null);
// Assert
hint.Confidence.Should().Be(0.3);
hint.ConfidenceLevel.Should().Be(HintConfidence.Low);
hint.Hypothesis.Should().Contain("fp-xyz");
hint.Evidence.ImportFingerprint!.MatchedFingerprints.Should().BeNull();
}
[Fact]
public void BuildFromSectionLayout_WithMatch_IncludesSimilarity()
{
// Arrange
var sections = new[]
{
new SectionInfo { Name = ".text", Type = "PROGBITS", Size = 0x1000 },
new SectionInfo { Name = ".data", Type = "PROGBITS", Size = 0x200 }
};
var matches = new[]
{
new LayoutMatch { Package = "bash", Similarity = 0.88 }
};
// Act
var hint = _builder.BuildFromSectionLayout(sections, matches);
// Assert
hint.Type.Should().Be(ProvenanceHintType.SectionLayout);
hint.Confidence.Should().Be(0.88);
hint.ConfidenceLevel.Should().Be(HintConfidence.VeryHigh);
hint.Hypothesis.Should().Contain("bash");
hint.Evidence.SectionLayout.Should().NotBeNull();
hint.Evidence.SectionLayout!.Sections.Should().HaveCount(2);
hint.Evidence.SectionLayout.LayoutHash.Should().NotBeNullOrEmpty();
}
[Fact]
public void BuildFromDistroPattern_IncludesDistroAndRelease()
{
// Act
var hint = _builder.BuildFromDistroPattern("debian", "bullseye", "rpath", "/usr/lib/x86_64-linux-gnu");
// Assert
hint.Type.Should().Be(ProvenanceHintType.DistroPattern);
hint.Confidence.Should().Be(0.7);
hint.ConfidenceLevel.Should().Be(HintConfidence.High);
hint.Hypothesis.Should().Contain("debian");
hint.Hypothesis.Should().Contain("bullseye");
hint.Evidence.DistroPattern.Should().NotBeNull();
hint.Evidence.DistroPattern!.Distro.Should().Be("debian");
hint.Evidence.DistroPattern.Release.Should().Be("bullseye");
hint.SuggestedActions[0].Link.Should().NotBeNull();
}
[Fact]
public void BuildFromVersionStrings_WithMultipleStrings_SelectsBestGuess()
{
// Arrange
var versionStrings = new[]
{
new ExtractedVersionString { Value = "1.2.3", Location = ".rodata", Confidence = 0.8 },
new ExtractedVersionString { Value = "1.2", Location = ".comment", Confidence = 0.5 }
};
// Act
var hint = _builder.BuildFromVersionStrings(versionStrings);
// Assert
hint.Type.Should().Be(ProvenanceHintType.VersionString);
hint.Confidence.Should().Be(0.8);
hint.ConfidenceLevel.Should().Be(HintConfidence.High);
hint.Hypothesis.Should().Contain("1.2.3");
hint.Evidence.VersionString.Should().NotBeNull();
hint.Evidence.VersionString!.BestGuess.Should().Be("1.2.3");
hint.Evidence.VersionString.VersionStrings.Should().HaveCount(2);
}
[Fact]
public void BuildFromCorpusMatch_HighSimilarity_CreatesVeryHighConfidence()
{
// Act
var hint = _builder.BuildFromCorpusMatch(
"debian-packages",
"curl/7.68.0",
"hash",
0.95,
new Dictionary<string, string> { ["arch"] = "amd64" });
// Assert
hint.Type.Should().Be(ProvenanceHintType.CorpusMatch);
hint.Confidence.Should().Be(0.95);
hint.ConfidenceLevel.Should().Be(HintConfidence.VeryHigh);
hint.Hypothesis.Should().Contain("High confidence match");
hint.Hypothesis.Should().Contain("curl/7.68.0");
hint.Evidence.CorpusMatch.Should().NotBeNull();
hint.Evidence.CorpusMatch!.CorpusName.Should().Be("debian-packages");
hint.Evidence.CorpusMatch.Metadata.Should().ContainKey("arch");
}
[Fact]
public void CombineHints_NoHints_ReturnsZeroConfidence()
{
// Act
var (hypothesis, confidence) = _builder.CombineHints([]);
// Assert
hypothesis.Should().Contain("No provenance hints");
confidence.Should().Be(0.0);
}
[Fact]
public void CombineHints_SingleHint_ReturnsBestHypothesis()
{
// Arrange
var hints = new[]
{
_builder.BuildFromBuildId("abc123", "sha1", new BuildIdMatchResult
{
Package = "openssl",
Version = "1.1.1k",
Distro = "debian"
})
};
// Act
var (hypothesis, confidence) = _builder.CombineHints(hints);
// Assert
hypothesis.Should().Contain("openssl");
confidence.Should().Be(0.95);
}
[Fact]
public void CombineHints_MultipleAgreeingHints_BoostsConfidence()
{
// Arrange
var buildIdMatch = new BuildIdMatchResult
{
Package = "openssl",
Version = "1.1.1k",
Distro = "debian"
};
var hints = new[]
{
_builder.BuildFromBuildId("abc123", "sha1", buildIdMatch),
_builder.BuildFromDistroPattern("debian", "bullseye", "rpath", "/usr/lib"),
_builder.BuildFromVersionStrings(new[]
{
new ExtractedVersionString { Value = "1.1.1k", Location = ".rodata", Confidence = 0.7 }
})
};
// Act
var (hypothesis, confidence) = _builder.CombineHints(hints);
// Assert
confidence.Should().BeGreaterThan(0.95); // Boosted from multiple agreeing hints
hypothesis.Should().Contain("confirmed by");
hypothesis.Should().Contain("evidence sources");
}
[Fact]
public void HintId_IsContentAddressed_DeterministicForSameInput()
{
// Arrange & Act
var hint1 = _builder.BuildFromBuildId("abc123", "sha1", null);
var hint2 = _builder.BuildFromBuildId("abc123", "sha1", null);
// Assert
hint1.HintId.Should().Be(hint2.HintId);
}
[Fact]
public void HintId_IsDifferent_ForDifferentInput()
{
// Arrange & Act
var hint1 = _builder.BuildFromBuildId("abc123", "sha1", null);
var hint2 = _builder.BuildFromBuildId("xyz789", "sha1", null);
// Assert
hint1.HintId.Should().NotBe(hint2.HintId);
}
}

View File

@@ -0,0 +1,299 @@
using System.Text.Json;
using StellaOps.Unknowns.Core.Hints;
using StellaOps.Unknowns.Core.Models;
using Xunit;
using FluentAssertions;
using System.Text.Json.Serialization;
namespace StellaOps.Unknowns.Core.Tests.Hints;
/// <summary>
/// Golden fixture tests for provenance hint serialization.
/// Ensures stable JSON output for cross-service compatibility.
/// </summary>
public sealed class ProvenanceHintSerializationTests
{
private static readonly JsonSerializerOptions JsonOptions = new()
{
WriteIndented = false,
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
private readonly ProvenanceHintBuilder _builder = new(new FrozenTimeProvider());
[Fact]
public void BuildIdHint_Serialization_ProducesExpectedJson()
{
// Arrange
var match = new BuildIdMatchResult
{
Package = "openssl",
Version = "1.1.1k",
Distro = "debian",
CatalogSource = "debian-security"
};
var hint = _builder.BuildFromBuildId("abc123def456", "sha1", match);
// Act
var json = JsonSerializer.Serialize(hint, JsonOptions);
var deserialized = JsonSerializer.Deserialize<ProvenanceHint>(json, JsonOptions);
// Assert - round-trip
deserialized.Should().NotBeNull();
deserialized!.Type.Should().Be(ProvenanceHintType.BuildIdMatch);
deserialized.Confidence.Should().Be(0.95);
deserialized.ConfidenceLevel.Should().Be(HintConfidence.VeryHigh);
deserialized.Evidence.BuildId.Should().NotBeNull();
deserialized.Evidence.BuildId!.BuildId.Should().Be("abc123def456");
deserialized.Evidence.BuildId.MatchedPackage.Should().Be("openssl");
// Assert - stable keys
json.Should().Contain("\"hint_id\":");
json.Should().Contain("\"type\":");
json.Should().Contain("\"confidence\":");
json.Should().Contain("\"confidence_level\":");
json.Should().Contain("\"hypothesis\":");
json.Should().Contain("\"evidence\":");
json.Should().Contain("\"suggested_actions\":");
json.Should().Contain("\"generated_at\":");
json.Should().Contain("\"source\":");
}
[Fact]
public void ImportFingerprintHint_Serialization_RoundTripsCorrectly()
{
// Arrange
var matches = new[]
{
new FingerprintMatch
{
Package = "libc6",
Version = "2.31-13",
Similarity = 0.92,
Source = "debian-corpus"
}
};
var imports = new[] { "libc.so.6", "libpthread.so.0", "libdl.so.2" };
var hint = _builder.BuildFromImportFingerprint("fp-abc123", imports, matches);
// Act
var json = JsonSerializer.Serialize(hint, JsonOptions);
var deserialized = JsonSerializer.Deserialize<ProvenanceHint>(json, JsonOptions);
// Assert
deserialized.Should().NotBeNull();
deserialized!.Evidence.ImportFingerprint.Should().NotBeNull();
deserialized.Evidence.ImportFingerprint!.Fingerprint.Should().Be("fp-abc123");
deserialized.Evidence.ImportFingerprint.ImportedLibraries.Should().HaveCount(3);
deserialized.Evidence.ImportFingerprint.MatchedFingerprints.Should().HaveCount(1);
deserialized.Evidence.ImportFingerprint.MatchedFingerprints![0].Package.Should().Be("libc6");
deserialized.Evidence.ImportFingerprint.MatchedFingerprints[0].Similarity.Should().Be(0.92);
}
[Fact]
public void SectionLayoutHint_Serialization_PreservesAllSections()
{
// Arrange
var sections = new[]
{
new SectionInfo { Name = ".text", Type = "PROGBITS", Size = 0x1000, Flags = "AX" },
new SectionInfo { Name = ".data", Type = "PROGBITS", Size = 0x200, Flags = "WA" },
new SectionInfo { Name = ".bss", Type = "NOBITS", Size = 0x100, Flags = "WA" }
};
var matches = new[]
{
new LayoutMatch { Package = "bash", Similarity = 0.88 }
};
var hint = _builder.BuildFromSectionLayout(sections, matches);
// Act
var json = JsonSerializer.Serialize(hint, JsonOptions);
var deserialized = JsonSerializer.Deserialize<ProvenanceHint>(json, JsonOptions);
// Assert
deserialized.Should().NotBeNull();
deserialized!.Evidence.SectionLayout.Should().NotBeNull();
deserialized.Evidence.SectionLayout!.Sections.Should().HaveCount(3);
deserialized.Evidence.SectionLayout.Sections[0].Name.Should().Be(".text");
deserialized.Evidence.SectionLayout.Sections[0].Size.Should().Be(0x1000);
deserialized.Evidence.SectionLayout.LayoutHash.Should().NotBeNullOrEmpty();
deserialized.Evidence.SectionLayout.MatchedLayouts.Should().HaveCount(1);
}
[Fact]
public void DistroPatternHint_Serialization_IncludesAllFields()
{
// Arrange
var hint = _builder.BuildFromDistroPattern(
"debian",
"bullseye",
"rpath",
"/usr/lib/x86_64-linux-gnu");
// Act
var json = JsonSerializer.Serialize(hint, JsonOptions);
var deserialized = JsonSerializer.Deserialize<ProvenanceHint>(json, JsonOptions);
// Assert
deserialized.Should().NotBeNull();
deserialized!.Evidence.DistroPattern.Should().NotBeNull();
deserialized.Evidence.DistroPattern!.Distro.Should().Be("debian");
deserialized.Evidence.DistroPattern.Release.Should().Be("bullseye");
deserialized.Evidence.DistroPattern.PatternType.Should().Be("rpath");
deserialized.Evidence.DistroPattern.MatchedPattern.Should().Be("/usr/lib/x86_64-linux-gnu");
}
[Fact]
public void VersionStringHint_Serialization_PreservesAllVersionStrings()
{
// Arrange
var versionStrings = new[]
{
new ExtractedVersionString { Value = "1.2.3", Location = ".rodata", Confidence = 0.8 },
new ExtractedVersionString { Value = "1.2", Location = ".comment", Confidence = 0.5 },
new ExtractedVersionString { Value = "v1.2.3-stable", Location = ".data", Confidence = 0.7 }
};
var hint = _builder.BuildFromVersionStrings(versionStrings);
// Act
var json = JsonSerializer.Serialize(hint, JsonOptions);
var deserialized = JsonSerializer.Deserialize<ProvenanceHint>(json, JsonOptions);
// Assert
deserialized.Should().NotBeNull();
deserialized!.Evidence.VersionString.Should().NotBeNull();
deserialized.Evidence.VersionString!.VersionStrings.Should().HaveCount(3);
deserialized.Evidence.VersionString.BestGuess.Should().Be("1.2.3"); // Highest confidence
}
[Fact]
public void CorpusMatchHint_Serialization_IncludesMetadata()
{
// Arrange
var metadata = new Dictionary<string, string>
{
["arch"] = "amd64",
["build_date"] = "2024-01-15",
["compiler"] = "gcc-11.2.0"
};
var hint = _builder.BuildFromCorpusMatch(
"debian-packages",
"curl/7.68.0",
"hash",
0.95,
metadata);
// Act
var json = JsonSerializer.Serialize(hint, JsonOptions);
var deserialized = JsonSerializer.Deserialize<ProvenanceHint>(json, JsonOptions);
// Assert
deserialized.Should().NotBeNull();
deserialized!.Evidence.CorpusMatch.Should().NotBeNull();
deserialized.Evidence.CorpusMatch!.CorpusName.Should().Be("debian-packages");
deserialized.Evidence.CorpusMatch.MatchedEntry.Should().Be("curl/7.68.0");
deserialized.Evidence.CorpusMatch.Similarity.Should().Be(0.95);
deserialized.Evidence.CorpusMatch.Metadata.Should().NotBeNull();
deserialized.Evidence.CorpusMatch.Metadata!["arch"].Should().Be("amd64");
}
[Fact]
public void SuggestedActions_Serialization_PreservesOrder()
{
// Arrange
var match = new BuildIdMatchResult
{
Package = "test",
Version = "1.0",
Distro = "debian"
};
var hint = _builder.BuildFromBuildId("test-id", "sha1", match);
// Act
var json = JsonSerializer.Serialize(hint, JsonOptions);
var deserialized = JsonSerializer.Deserialize<ProvenanceHint>(json, JsonOptions);
// Assert
deserialized.Should().NotBeNull();
deserialized!.SuggestedActions.Should().HaveCountGreaterOrEqualTo(1);
deserialized.SuggestedActions[0].Action.Should().NotBeNullOrEmpty();
deserialized.SuggestedActions[0].Priority.Should().BeGreaterThan(0);
deserialized.SuggestedActions[0].Effort.Should().NotBeNullOrEmpty();
deserialized.SuggestedActions[0].Description.Should().NotBeNullOrEmpty();
}
[Fact]
public void HintId_IsDeterministic_ForSameInput()
{
// Arrange & Act
var hint1 = _builder.BuildFromBuildId("same-id", "sha1", null);
var hint2 = _builder.BuildFromBuildId("same-id", "sha1", null);
var json1 = JsonSerializer.Serialize(hint1, JsonOptions);
var json2 = JsonSerializer.Serialize(hint2, JsonOptions);
// Assert
hint1.HintId.Should().Be(hint2.HintId);
json1.Should().Contain(hint1.HintId);
json2.Should().Contain(hint2.HintId);
}
[Fact]
public void GeneratedAt_UsesFixedTimestamp_InTests()
{
// Arrange
var hint = _builder.BuildFromBuildId("test", "sha1", null);
// Act
var json = JsonSerializer.Serialize(hint, JsonOptions);
// Assert
hint.GeneratedAt.Should().Be(new DateTimeOffset(2025, 1, 1, 0, 0, 0, TimeSpan.Zero));
json.Should().Contain("\"generated_at\":\"2025-01-01T00:00:00+00:00\"");
}
[Fact]
public void CompleteHint_JsonOutput_IsValid()
{
// Arrange
var match = new BuildIdMatchResult
{
Package = "nginx",
Version = "1.18.0-6",
Distro = "debian",
CatalogSource = "debian-security",
AdvisoryLink = "https://security.debian.org/nginx"
};
var hint = _builder.BuildFromBuildId("deadbeef0123456789abcdef", "sha256", match);
// Act
var json = JsonSerializer.Serialize(hint, JsonOptions);
// Assert - JSON is parseable
var parsed = JsonDocument.Parse(json);
parsed.RootElement.GetProperty("hint_id").GetString().Should().StartWith("hint:sha256:");
parsed.RootElement.GetProperty("type").GetString().Should().NotBeNullOrEmpty();
parsed.RootElement.GetProperty("confidence").GetDouble().Should().BeInRange(0, 1);
parsed.RootElement.GetProperty("evidence").GetProperty("build_id").GetProperty("catalog_source")
.GetString().Should().Be("debian-security");
}
/// <summary>
/// Frozen time provider for deterministic test timestamps.
/// </summary>
private sealed class FrozenTimeProvider : TimeProvider
{
private static readonly DateTimeOffset FrozenTime = new(2025, 1, 1, 0, 0, 0, TimeSpan.Zero);
public override DateTimeOffset GetUtcNow() => FrozenTime;
}
}