sprints and audit work
This commit is contained in:
@@ -0,0 +1,61 @@
|
||||
namespace StellaOps.Unknowns.Core.Hints;
|
||||
|
||||
/// <summary>
|
||||
/// Builds provenance hints from various evidence sources.
|
||||
/// </summary>
|
||||
public interface IProvenanceHintBuilder
|
||||
{
|
||||
/// <summary>Build hint from Build-ID match.</summary>
|
||||
ProvenanceHint BuildFromBuildId(
|
||||
string buildId,
|
||||
string buildIdType,
|
||||
BuildIdMatchResult? match);
|
||||
|
||||
/// <summary>Build hint from import table fingerprint.</summary>
|
||||
ProvenanceHint BuildFromImportFingerprint(
|
||||
string fingerprint,
|
||||
IReadOnlyList<string> importedLibraries,
|
||||
IReadOnlyList<FingerprintMatch>? matches);
|
||||
|
||||
/// <summary>Build hint from section layout.</summary>
|
||||
ProvenanceHint BuildFromSectionLayout(
|
||||
IReadOnlyList<SectionInfo> sections,
|
||||
IReadOnlyList<LayoutMatch>? matches);
|
||||
|
||||
/// <summary>Build hint from distro pattern.</summary>
|
||||
ProvenanceHint BuildFromDistroPattern(
|
||||
string distro,
|
||||
string? release,
|
||||
string patternType,
|
||||
string matchedPattern);
|
||||
|
||||
/// <summary>Build hint from version strings.</summary>
|
||||
ProvenanceHint BuildFromVersionStrings(
|
||||
IReadOnlyList<ExtractedVersionString> versionStrings);
|
||||
|
||||
/// <summary>Build hint from corpus match.</summary>
|
||||
ProvenanceHint BuildFromCorpusMatch(
|
||||
string corpusName,
|
||||
string matchedEntry,
|
||||
string matchType,
|
||||
double similarity,
|
||||
IReadOnlyDictionary<string, string>? metadata);
|
||||
|
||||
/// <summary>
|
||||
/// Combine multiple hints to produce best hypothesis and confidence.
|
||||
/// </summary>
|
||||
(string Hypothesis, double Confidence) CombineHints(
|
||||
IReadOnlyList<ProvenanceHint> hints);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Build-ID match result from catalog lookup.
|
||||
/// </summary>
|
||||
public sealed record BuildIdMatchResult
|
||||
{
|
||||
public required string Package { get; init; }
|
||||
public required string Version { get; init; }
|
||||
public required string Distro { get; init; }
|
||||
public string? CatalogSource { get; init; }
|
||||
public string? AdvisoryLink { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,391 @@
|
||||
using System.Globalization;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.Unknowns.Core.Models;
|
||||
|
||||
namespace StellaOps.Unknowns.Core.Hints;
|
||||
|
||||
/// <summary>
|
||||
/// Default implementation of provenance hint builder.
|
||||
/// Uses content-addressed IDs and confidence-based classification.
|
||||
/// </summary>
|
||||
public sealed partial class ProvenanceHintBuilder : IProvenanceHintBuilder
|
||||
{
|
||||
private readonly TimeProvider _timeProvider;
|
||||
|
||||
public ProvenanceHintBuilder(TimeProvider timeProvider)
|
||||
{
|
||||
_timeProvider = timeProvider;
|
||||
}
|
||||
|
||||
public ProvenanceHint BuildFromBuildId(
|
||||
string buildId,
|
||||
string buildIdType,
|
||||
BuildIdMatchResult? match)
|
||||
{
|
||||
var confidence = match is not null ? 0.95 : 0.2;
|
||||
var hypothesis = match is not null
|
||||
? $"Binary matches {match.Package} {match.Version} from {match.Distro}"
|
||||
: $"Build-ID {buildId} found but no catalog match";
|
||||
|
||||
var suggestedActions = new List<SuggestedAction>
|
||||
{
|
||||
new()
|
||||
{
|
||||
Action = "verify_build_id",
|
||||
Priority = 1,
|
||||
Effort = "low",
|
||||
Description = "Verify Build-ID against distro package repositories",
|
||||
Link = match?.AdvisoryLink
|
||||
}
|
||||
};
|
||||
|
||||
if (match is null)
|
||||
{
|
||||
suggestedActions.Add(new SuggestedAction
|
||||
{
|
||||
Action = "expand_catalog",
|
||||
Priority = 2,
|
||||
Effort = "medium",
|
||||
Description = "Add missing distros/packages to Build-ID catalog",
|
||||
Link = null
|
||||
});
|
||||
}
|
||||
|
||||
return new ProvenanceHint
|
||||
{
|
||||
HintId = ComputeHintId(ProvenanceHintType.BuildIdMatch, buildId),
|
||||
Type = ProvenanceHintType.BuildIdMatch,
|
||||
Confidence = confidence,
|
||||
ConfidenceLevel = MapConfidenceLevel(confidence),
|
||||
Summary = match is not null ? $"Matched {match.Package}" : "Build-ID not matched",
|
||||
Hypothesis = hypothesis,
|
||||
Evidence = new ProvenanceEvidence
|
||||
{
|
||||
BuildId = new BuildIdEvidence
|
||||
{
|
||||
BuildId = buildId,
|
||||
BuildIdType = buildIdType,
|
||||
MatchedPackage = match?.Package,
|
||||
MatchedVersion = match?.Version,
|
||||
MatchedDistro = match?.Distro,
|
||||
CatalogSource = match?.CatalogSource
|
||||
}
|
||||
},
|
||||
SuggestedActions = suggestedActions,
|
||||
GeneratedAt = _timeProvider.GetUtcNow(),
|
||||
Source = "BuildIdAnalyzer"
|
||||
};
|
||||
}
|
||||
|
||||
public ProvenanceHint BuildFromImportFingerprint(
|
||||
string fingerprint,
|
||||
IReadOnlyList<string> importedLibraries,
|
||||
IReadOnlyList<FingerprintMatch>? matches)
|
||||
{
|
||||
var bestMatch = matches?.OrderByDescending(m => m.Similarity).FirstOrDefault();
|
||||
var confidence = bestMatch?.Similarity ?? 0.3;
|
||||
var hypothesis = bestMatch is not null
|
||||
? $"Import table matches {bestMatch.Package} {bestMatch.Version} ({bestMatch.Similarity:P0} similar)"
|
||||
: $"Import fingerprint {fingerprint[..12]}... ({importedLibraries.Count} imports)";
|
||||
|
||||
return new ProvenanceHint
|
||||
{
|
||||
HintId = ComputeHintId(ProvenanceHintType.ImportTableFingerprint, fingerprint),
|
||||
Type = ProvenanceHintType.ImportTableFingerprint,
|
||||
Confidence = confidence,
|
||||
ConfidenceLevel = MapConfidenceLevel(confidence),
|
||||
Summary = bestMatch is not null ? $"Matched {bestMatch.Package}" : "No fingerprint match",
|
||||
Hypothesis = hypothesis,
|
||||
Evidence = new ProvenanceEvidence
|
||||
{
|
||||
ImportFingerprint = new ImportFingerprintEvidence
|
||||
{
|
||||
Fingerprint = fingerprint,
|
||||
ImportedLibraries = importedLibraries,
|
||||
ImportCount = importedLibraries.Count,
|
||||
MatchedFingerprints = matches
|
||||
}
|
||||
},
|
||||
SuggestedActions =
|
||||
[
|
||||
new SuggestedAction
|
||||
{
|
||||
Action = "analyze_imports",
|
||||
Priority = 1,
|
||||
Effort = "low",
|
||||
Description = "Cross-reference imported libraries with package databases",
|
||||
Link = null
|
||||
}
|
||||
],
|
||||
GeneratedAt = _timeProvider.GetUtcNow(),
|
||||
Source = "ImportFingerprintAnalyzer"
|
||||
};
|
||||
}
|
||||
|
||||
public ProvenanceHint BuildFromSectionLayout(
|
||||
IReadOnlyList<SectionInfo> sections,
|
||||
IReadOnlyList<LayoutMatch>? matches)
|
||||
{
|
||||
var layoutHash = ComputeLayoutHash(sections);
|
||||
var bestMatch = matches?.OrderByDescending(m => m.Similarity).FirstOrDefault();
|
||||
var confidence = bestMatch?.Similarity ?? 0.25;
|
||||
var hypothesis = bestMatch is not null
|
||||
? $"Section layout matches {bestMatch.Package} ({bestMatch.Similarity:P0} similar)"
|
||||
: $"Section layout: {sections.Count} sections, hash {layoutHash}";
|
||||
|
||||
return new ProvenanceHint
|
||||
{
|
||||
HintId = ComputeHintId(ProvenanceHintType.SectionLayout, layoutHash),
|
||||
Type = ProvenanceHintType.SectionLayout,
|
||||
Confidence = confidence,
|
||||
ConfidenceLevel = MapConfidenceLevel(confidence),
|
||||
Summary = bestMatch is not null ? $"Matched {bestMatch.Package}" : "No layout match",
|
||||
Hypothesis = hypothesis,
|
||||
Evidence = new ProvenanceEvidence
|
||||
{
|
||||
SectionLayout = new SectionLayoutEvidence
|
||||
{
|
||||
Sections = sections,
|
||||
LayoutHash = layoutHash,
|
||||
MatchedLayouts = matches
|
||||
}
|
||||
},
|
||||
SuggestedActions =
|
||||
[
|
||||
new SuggestedAction
|
||||
{
|
||||
Action = "compare_section_layout",
|
||||
Priority = 2,
|
||||
Effort = "medium",
|
||||
Description = "Compare section layout with known binaries",
|
||||
Link = null
|
||||
}
|
||||
],
|
||||
GeneratedAt = _timeProvider.GetUtcNow(),
|
||||
Source = "SectionLayoutAnalyzer"
|
||||
};
|
||||
}
|
||||
|
||||
public ProvenanceHint BuildFromDistroPattern(
|
||||
string distro,
|
||||
string? release,
|
||||
string patternType,
|
||||
string matchedPattern)
|
||||
{
|
||||
var confidence = 0.7;
|
||||
var hypothesis = release is not null
|
||||
? $"Binary appears to be from {distro} {release}"
|
||||
: $"Binary appears to be from {distro}";
|
||||
|
||||
return new ProvenanceHint
|
||||
{
|
||||
HintId = ComputeHintId(ProvenanceHintType.DistroPattern, $"{distro}:{matchedPattern}"),
|
||||
Type = ProvenanceHintType.DistroPattern,
|
||||
Confidence = confidence,
|
||||
ConfidenceLevel = MapConfidenceLevel(confidence),
|
||||
Summary = $"Distro pattern: {distro}",
|
||||
Hypothesis = hypothesis,
|
||||
Evidence = new ProvenanceEvidence
|
||||
{
|
||||
DistroPattern = new DistroPatternEvidence
|
||||
{
|
||||
Distro = distro,
|
||||
Release = release,
|
||||
PatternType = patternType,
|
||||
MatchedPattern = matchedPattern
|
||||
}
|
||||
},
|
||||
SuggestedActions =
|
||||
[
|
||||
new SuggestedAction
|
||||
{
|
||||
Action = "distro_package_lookup",
|
||||
Priority = 1,
|
||||
Effort = "low",
|
||||
Description = $"Search {distro} package repositories",
|
||||
Link = GetDistroPackageSearchUrl(distro)
|
||||
}
|
||||
],
|
||||
GeneratedAt = _timeProvider.GetUtcNow(),
|
||||
Source = "DistroPatternAnalyzer"
|
||||
};
|
||||
}
|
||||
|
||||
public ProvenanceHint BuildFromVersionStrings(
|
||||
IReadOnlyList<ExtractedVersionString> versionStrings)
|
||||
{
|
||||
var bestGuess = versionStrings
|
||||
.OrderByDescending(v => v.Confidence)
|
||||
.FirstOrDefault();
|
||||
|
||||
var confidence = bestGuess?.Confidence ?? 0.3;
|
||||
var hypothesis = bestGuess is not null
|
||||
? $"Version appears to be {bestGuess.Value}"
|
||||
: "No clear version string found";
|
||||
|
||||
return new ProvenanceHint
|
||||
{
|
||||
HintId = ComputeHintId(ProvenanceHintType.VersionString,
|
||||
string.Join(",", versionStrings.Select(v => v.Value))),
|
||||
Type = ProvenanceHintType.VersionString,
|
||||
Confidence = confidence,
|
||||
ConfidenceLevel = MapConfidenceLevel(confidence),
|
||||
Summary = $"Found {versionStrings.Count} version string(s)",
|
||||
Hypothesis = hypothesis,
|
||||
Evidence = new ProvenanceEvidence
|
||||
{
|
||||
VersionString = new VersionStringEvidence
|
||||
{
|
||||
VersionStrings = versionStrings,
|
||||
BestGuess = bestGuess?.Value
|
||||
}
|
||||
},
|
||||
SuggestedActions =
|
||||
[
|
||||
new SuggestedAction
|
||||
{
|
||||
Action = "version_verification",
|
||||
Priority = 1,
|
||||
Effort = "low",
|
||||
Description = "Verify extracted version against known releases",
|
||||
Link = null
|
||||
}
|
||||
],
|
||||
GeneratedAt = _timeProvider.GetUtcNow(),
|
||||
Source = "VersionStringExtractor"
|
||||
};
|
||||
}
|
||||
|
||||
public ProvenanceHint BuildFromCorpusMatch(
|
||||
string corpusName,
|
||||
string matchedEntry,
|
||||
string matchType,
|
||||
double similarity,
|
||||
IReadOnlyDictionary<string, string>? metadata)
|
||||
{
|
||||
var hypothesis = similarity >= 0.9
|
||||
? $"High confidence match: {matchedEntry}"
|
||||
: $"Possible match: {matchedEntry} ({similarity:P0} similar)";
|
||||
|
||||
return new ProvenanceHint
|
||||
{
|
||||
HintId = ComputeHintId(ProvenanceHintType.CorpusMatch, $"{corpusName}:{matchedEntry}"),
|
||||
Type = ProvenanceHintType.CorpusMatch,
|
||||
Confidence = similarity,
|
||||
ConfidenceLevel = MapConfidenceLevel(similarity),
|
||||
Summary = $"Corpus match: {matchedEntry}",
|
||||
Hypothesis = hypothesis,
|
||||
Evidence = new ProvenanceEvidence
|
||||
{
|
||||
CorpusMatch = new CorpusMatchEvidence
|
||||
{
|
||||
CorpusName = corpusName,
|
||||
MatchedEntry = matchedEntry,
|
||||
MatchType = matchType,
|
||||
Similarity = similarity,
|
||||
Metadata = metadata
|
||||
}
|
||||
},
|
||||
SuggestedActions =
|
||||
[
|
||||
new SuggestedAction
|
||||
{
|
||||
Action = "verify_corpus_match",
|
||||
Priority = 1,
|
||||
Effort = "low",
|
||||
Description = $"Verify match against {corpusName}",
|
||||
Link = null
|
||||
}
|
||||
],
|
||||
GeneratedAt = _timeProvider.GetUtcNow(),
|
||||
Source = $"{corpusName}Matcher"
|
||||
};
|
||||
}
|
||||
|
||||
public (string Hypothesis, double Confidence) CombineHints(
|
||||
IReadOnlyList<ProvenanceHint> hints)
|
||||
{
|
||||
if (hints.Count == 0)
|
||||
{
|
||||
return ("No provenance hints available", 0.0);
|
||||
}
|
||||
|
||||
// Sort by confidence descending
|
||||
var sorted = hints.OrderByDescending(h => h.Confidence).ToList();
|
||||
|
||||
// Best single hypothesis
|
||||
var bestHint = sorted[0];
|
||||
|
||||
// If we have multiple high-confidence hints that agree, boost confidence
|
||||
var agreeing = sorted
|
||||
.Where(h => h.Confidence >= 0.5)
|
||||
.GroupBy(h => ExtractPackageFromHypothesis(h.Hypothesis))
|
||||
.OrderByDescending(g => g.Count())
|
||||
.FirstOrDefault();
|
||||
|
||||
if (agreeing is not null && agreeing.Count() >= 2)
|
||||
{
|
||||
// Multiple hints agree - combine confidence
|
||||
var combinedConfidence = Math.Min(0.99,
|
||||
agreeing.Max(h => h.Confidence) + (agreeing.Count() - 1) * 0.1);
|
||||
|
||||
return (
|
||||
$"{agreeing.Key} (confirmed by {agreeing.Count()} evidence sources)",
|
||||
Math.Round(combinedConfidence, 4)
|
||||
);
|
||||
}
|
||||
|
||||
return (bestHint.Hypothesis, Math.Round(bestHint.Confidence, 4));
|
||||
}
|
||||
|
||||
private static string ComputeHintId(ProvenanceHintType type, string evidence)
|
||||
{
|
||||
var input = $"{type}:{evidence}";
|
||||
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(input));
|
||||
return $"hint:sha256:{Convert.ToHexString(hash).ToLowerInvariant()[..24]}";
|
||||
}
|
||||
|
||||
private static HintConfidence MapConfidenceLevel(double confidence)
|
||||
{
|
||||
return confidence switch
|
||||
{
|
||||
>= 0.9 => HintConfidence.VeryHigh,
|
||||
>= 0.7 => HintConfidence.High,
|
||||
>= 0.5 => HintConfidence.Medium,
|
||||
>= 0.3 => HintConfidence.Low,
|
||||
_ => HintConfidence.VeryLow
|
||||
};
|
||||
}
|
||||
|
||||
private static string ComputeLayoutHash(IReadOnlyList<SectionInfo> sections)
|
||||
{
|
||||
var normalized = string.Join("|",
|
||||
sections.OrderBy(s => s.Name).Select(s => $"{s.Name}:{s.Type}:{s.Size.ToString(CultureInfo.InvariantCulture)}"));
|
||||
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(normalized));
|
||||
return Convert.ToHexString(hash).ToLowerInvariant()[..16];
|
||||
}
|
||||
|
||||
private static string? GetDistroPackageSearchUrl(string distro)
|
||||
{
|
||||
return distro.ToLowerInvariant() switch
|
||||
{
|
||||
"debian" => "https://packages.debian.org/search",
|
||||
"ubuntu" => "https://packages.ubuntu.com/",
|
||||
"rhel" or "centos" => "https://access.redhat.com/downloads",
|
||||
"alpine" => "https://pkgs.alpinelinux.org/packages",
|
||||
_ => null
|
||||
};
|
||||
}
|
||||
|
||||
private static string ExtractPackageFromHypothesis(string hypothesis)
|
||||
{
|
||||
// Simple extraction - match "matches <package>" or "from <package>"
|
||||
var match = PackageExtractionRegex().Match(hypothesis);
|
||||
return match.Success ? match.Groups[1].Value : hypothesis;
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"(?:matches?|from)\s+(\S+)")]
|
||||
private static partial Regex PackageExtractionRegex();
|
||||
}
|
||||
@@ -0,0 +1,205 @@
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace StellaOps.Unknowns.Core.Models;
|
||||
|
||||
/// <summary>Build-ID match evidence.</summary>
|
||||
public sealed record BuildIdEvidence
|
||||
{
|
||||
[JsonPropertyName("build_id")]
|
||||
public required string BuildId { get; init; }
|
||||
|
||||
[JsonPropertyName("build_id_type")]
|
||||
public required string BuildIdType { get; init; }
|
||||
|
||||
[JsonPropertyName("matched_package")]
|
||||
public string? MatchedPackage { get; init; }
|
||||
|
||||
[JsonPropertyName("matched_version")]
|
||||
public string? MatchedVersion { get; init; }
|
||||
|
||||
[JsonPropertyName("matched_distro")]
|
||||
public string? MatchedDistro { get; init; }
|
||||
|
||||
[JsonPropertyName("catalog_source")]
|
||||
public string? CatalogSource { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>Debug link evidence.</summary>
|
||||
public sealed record DebugLinkEvidence
|
||||
{
|
||||
[JsonPropertyName("debug_link")]
|
||||
public required string DebugLink { get; init; }
|
||||
|
||||
[JsonPropertyName("crc32")]
|
||||
public uint? Crc32 { get; init; }
|
||||
|
||||
[JsonPropertyName("debug_info_found")]
|
||||
public bool DebugInfoFound { get; init; }
|
||||
|
||||
[JsonPropertyName("debug_info_path")]
|
||||
public string? DebugInfoPath { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>Import table fingerprint evidence.</summary>
|
||||
public sealed record ImportFingerprintEvidence
|
||||
{
|
||||
[JsonPropertyName("fingerprint")]
|
||||
public required string Fingerprint { get; init; }
|
||||
|
||||
[JsonPropertyName("imported_libraries")]
|
||||
public required IReadOnlyList<string> ImportedLibraries { get; init; }
|
||||
|
||||
[JsonPropertyName("import_count")]
|
||||
public int ImportCount { get; init; }
|
||||
|
||||
[JsonPropertyName("matched_fingerprints")]
|
||||
public IReadOnlyList<FingerprintMatch>? MatchedFingerprints { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>Export table fingerprint evidence.</summary>
|
||||
public sealed record ExportFingerprintEvidence
|
||||
{
|
||||
[JsonPropertyName("fingerprint")]
|
||||
public required string Fingerprint { get; init; }
|
||||
|
||||
[JsonPropertyName("export_count")]
|
||||
public int ExportCount { get; init; }
|
||||
|
||||
[JsonPropertyName("notable_exports")]
|
||||
public IReadOnlyList<string>? NotableExports { get; init; }
|
||||
|
||||
[JsonPropertyName("matched_fingerprints")]
|
||||
public IReadOnlyList<FingerprintMatch>? MatchedFingerprints { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>Fingerprint match from corpus.</summary>
|
||||
public sealed record FingerprintMatch
|
||||
{
|
||||
[JsonPropertyName("package")]
|
||||
public required string Package { get; init; }
|
||||
|
||||
[JsonPropertyName("version")]
|
||||
public required string Version { get; init; }
|
||||
|
||||
[JsonPropertyName("similarity")]
|
||||
public required double Similarity { get; init; }
|
||||
|
||||
[JsonPropertyName("source")]
|
||||
public required string Source { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>Section layout evidence.</summary>
|
||||
public sealed record SectionLayoutEvidence
|
||||
{
|
||||
[JsonPropertyName("sections")]
|
||||
public required IReadOnlyList<SectionInfo> Sections { get; init; }
|
||||
|
||||
[JsonPropertyName("layout_hash")]
|
||||
public required string LayoutHash { get; init; }
|
||||
|
||||
[JsonPropertyName("matched_layouts")]
|
||||
public IReadOnlyList<LayoutMatch>? MatchedLayouts { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>Section information for layout analysis.</summary>
|
||||
public sealed record SectionInfo
|
||||
{
|
||||
[JsonPropertyName("name")]
|
||||
public required string Name { get; init; }
|
||||
|
||||
[JsonPropertyName("type")]
|
||||
public required string Type { get; init; }
|
||||
|
||||
[JsonPropertyName("size")]
|
||||
public ulong Size { get; init; }
|
||||
|
||||
[JsonPropertyName("flags")]
|
||||
public string? Flags { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>Layout match result.</summary>
|
||||
public sealed record LayoutMatch
|
||||
{
|
||||
[JsonPropertyName("package")]
|
||||
public required string Package { get; init; }
|
||||
|
||||
[JsonPropertyName("similarity")]
|
||||
public required double Similarity { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>Compiler signature evidence.</summary>
|
||||
public sealed record CompilerEvidence
|
||||
{
|
||||
[JsonPropertyName("compiler")]
|
||||
public required string Compiler { get; init; }
|
||||
|
||||
[JsonPropertyName("version")]
|
||||
public string? Version { get; init; }
|
||||
|
||||
[JsonPropertyName("flags")]
|
||||
public IReadOnlyList<string>? Flags { get; init; }
|
||||
|
||||
[JsonPropertyName("detection_method")]
|
||||
public required string DetectionMethod { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>Distro pattern match evidence.</summary>
|
||||
public sealed record DistroPatternEvidence
|
||||
{
|
||||
[JsonPropertyName("distro")]
|
||||
public required string Distro { get; init; }
|
||||
|
||||
[JsonPropertyName("release")]
|
||||
public string? Release { get; init; }
|
||||
|
||||
[JsonPropertyName("pattern_type")]
|
||||
public required string PatternType { get; init; }
|
||||
|
||||
[JsonPropertyName("matched_pattern")]
|
||||
public required string MatchedPattern { get; init; }
|
||||
|
||||
[JsonPropertyName("examples")]
|
||||
public IReadOnlyList<string>? Examples { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>Version string extraction evidence.</summary>
|
||||
public sealed record VersionStringEvidence
|
||||
{
|
||||
[JsonPropertyName("version_strings")]
|
||||
public required IReadOnlyList<ExtractedVersionString> VersionStrings { get; init; }
|
||||
|
||||
[JsonPropertyName("best_guess")]
|
||||
public string? BestGuess { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>Extracted version string with location and confidence.</summary>
|
||||
public sealed record ExtractedVersionString
|
||||
{
|
||||
[JsonPropertyName("value")]
|
||||
public required string Value { get; init; }
|
||||
|
||||
[JsonPropertyName("location")]
|
||||
public required string Location { get; init; }
|
||||
|
||||
[JsonPropertyName("confidence")]
|
||||
public double Confidence { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>Corpus match evidence.</summary>
|
||||
public sealed record CorpusMatchEvidence
|
||||
{
|
||||
[JsonPropertyName("corpus_name")]
|
||||
public required string CorpusName { get; init; }
|
||||
|
||||
[JsonPropertyName("matched_entry")]
|
||||
public required string MatchedEntry { get; init; }
|
||||
|
||||
[JsonPropertyName("match_type")]
|
||||
public required string MatchType { get; init; }
|
||||
|
||||
[JsonPropertyName("similarity")]
|
||||
public required double Similarity { get; init; }
|
||||
|
||||
[JsonPropertyName("metadata")]
|
||||
public IReadOnlyDictionary<string, string>? Metadata { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,124 @@
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace StellaOps.Unknowns.Core.Models;
|
||||
|
||||
/// <summary>
|
||||
/// A provenance hint providing evidence about an unknown's identity.
|
||||
/// Immutable record with content-addressed ID.
|
||||
/// </summary>
|
||||
public sealed record ProvenanceHint
|
||||
{
|
||||
/// <summary>Unique hint ID (content-addressed, format: hint:sha256:hex24).</summary>
|
||||
[JsonPropertyName("hint_id")]
|
||||
public required string HintId { get; init; }
|
||||
|
||||
/// <summary>Type of provenance hint.</summary>
|
||||
[JsonPropertyName("type")]
|
||||
public required ProvenanceHintType Type { get; init; }
|
||||
|
||||
/// <summary>Confidence score (0.0 - 1.0).</summary>
|
||||
[JsonPropertyName("confidence")]
|
||||
public required double Confidence { get; init; }
|
||||
|
||||
/// <summary>Confidence level classification.</summary>
|
||||
[JsonPropertyName("confidence_level")]
|
||||
public required HintConfidence ConfidenceLevel { get; init; }
|
||||
|
||||
/// <summary>Human-readable summary of the hint.</summary>
|
||||
[JsonPropertyName("summary")]
|
||||
public required string Summary { get; init; }
|
||||
|
||||
/// <summary>Hypothesis about the unknown's identity.</summary>
|
||||
[JsonPropertyName("hypothesis")]
|
||||
public required string Hypothesis { get; init; }
|
||||
|
||||
/// <summary>Type-specific evidence details.</summary>
|
||||
[JsonPropertyName("evidence")]
|
||||
public required ProvenanceEvidence Evidence { get; init; }
|
||||
|
||||
/// <summary>Suggested resolution actions (ordered by priority).</summary>
|
||||
[JsonPropertyName("suggested_actions")]
|
||||
public required IReadOnlyList<SuggestedAction> SuggestedActions { get; init; }
|
||||
|
||||
/// <summary>When this hint was generated (UTC).</summary>
|
||||
[JsonPropertyName("generated_at")]
|
||||
public required DateTimeOffset GeneratedAt { get; init; }
|
||||
|
||||
/// <summary>Source of the hint (analyzer, corpus, etc.).</summary>
|
||||
[JsonPropertyName("source")]
|
||||
public required string Source { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Suggested action for resolving the unknown.
|
||||
/// </summary>
|
||||
public sealed record SuggestedAction
|
||||
{
|
||||
/// <summary>Action identifier (e.g., "distro_package_lookup").</summary>
|
||||
[JsonPropertyName("action")]
|
||||
public required string Action { get; init; }
|
||||
|
||||
/// <summary>Priority (1 = highest).</summary>
|
||||
[JsonPropertyName("priority")]
|
||||
public required int Priority { get; init; }
|
||||
|
||||
/// <summary>Estimated effort (low/medium/high).</summary>
|
||||
[JsonPropertyName("effort")]
|
||||
public required string Effort { get; init; }
|
||||
|
||||
/// <summary>Human-readable description.</summary>
|
||||
[JsonPropertyName("description")]
|
||||
public required string Description { get; init; }
|
||||
|
||||
/// <summary>Optional link to documentation or tool.</summary>
|
||||
[JsonPropertyName("link")]
|
||||
public string? Link { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Type-specific evidence for a provenance hint.
|
||||
/// Only one evidence type should be populated per hint.
|
||||
/// </summary>
|
||||
public sealed record ProvenanceEvidence
|
||||
{
|
||||
/// <summary>Build-ID match details.</summary>
|
||||
[JsonPropertyName("build_id")]
|
||||
public BuildIdEvidence? BuildId { get; init; }
|
||||
|
||||
/// <summary>Debug link details.</summary>
|
||||
[JsonPropertyName("debug_link")]
|
||||
public DebugLinkEvidence? DebugLink { get; init; }
|
||||
|
||||
/// <summary>Import table fingerprint details.</summary>
|
||||
[JsonPropertyName("import_fingerprint")]
|
||||
public ImportFingerprintEvidence? ImportFingerprint { get; init; }
|
||||
|
||||
/// <summary>Export table fingerprint details.</summary>
|
||||
[JsonPropertyName("export_fingerprint")]
|
||||
public ExportFingerprintEvidence? ExportFingerprint { get; init; }
|
||||
|
||||
/// <summary>Section layout details.</summary>
|
||||
[JsonPropertyName("section_layout")]
|
||||
public SectionLayoutEvidence? SectionLayout { get; init; }
|
||||
|
||||
/// <summary>Compiler signature details.</summary>
|
||||
[JsonPropertyName("compiler")]
|
||||
public CompilerEvidence? Compiler { get; init; }
|
||||
|
||||
/// <summary>Distro pattern match details.</summary>
|
||||
[JsonPropertyName("distro_pattern")]
|
||||
public DistroPatternEvidence? DistroPattern { get; init; }
|
||||
|
||||
/// <summary>Version string extraction details.</summary>
|
||||
[JsonPropertyName("version_string")]
|
||||
public VersionStringEvidence? VersionString { get; init; }
|
||||
|
||||
/// <summary>Corpus match details.</summary>
|
||||
[JsonPropertyName("corpus_match")]
|
||||
public CorpusMatchEvidence? CorpusMatch { get; init; }
|
||||
|
||||
/// <summary>Raw evidence as JSON (for extensibility).</summary>
|
||||
[JsonPropertyName("raw")]
|
||||
public JsonDocument? Raw { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
namespace StellaOps.Unknowns.Core.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Classification of provenance hint types that explain why something is unknown
|
||||
/// and provide evidence for resolution.
|
||||
/// </summary>
|
||||
public enum ProvenanceHintType
|
||||
{
|
||||
/// <summary>ELF/PE Build-ID match against known catalog.</summary>
|
||||
BuildIdMatch,
|
||||
|
||||
/// <summary>Debug link (.gnu_debuglink) reference.</summary>
|
||||
DebugLink,
|
||||
|
||||
/// <summary>Import table fingerprint comparison.</summary>
|
||||
ImportTableFingerprint,
|
||||
|
||||
/// <summary>Export table fingerprint comparison.</summary>
|
||||
ExportTableFingerprint,
|
||||
|
||||
/// <summary>Section layout similarity.</summary>
|
||||
SectionLayout,
|
||||
|
||||
/// <summary>String table signature match.</summary>
|
||||
StringTableSignature,
|
||||
|
||||
/// <summary>Compiler/linker identification.</summary>
|
||||
CompilerSignature,
|
||||
|
||||
/// <summary>Package manager metadata (RPATH, NEEDED, etc.).</summary>
|
||||
PackageMetadata,
|
||||
|
||||
/// <summary>Distro/vendor pattern match.</summary>
|
||||
DistroPattern,
|
||||
|
||||
/// <summary>Version string extraction.</summary>
|
||||
VersionString,
|
||||
|
||||
/// <summary>Symbol name pattern match.</summary>
|
||||
SymbolPattern,
|
||||
|
||||
/// <summary>File path pattern match.</summary>
|
||||
PathPattern,
|
||||
|
||||
/// <summary>Hash match against known corpus.</summary>
|
||||
CorpusMatch,
|
||||
|
||||
/// <summary>SBOM cross-reference.</summary>
|
||||
SbomCrossReference,
|
||||
|
||||
/// <summary>Advisory cross-reference.</summary>
|
||||
AdvisoryCrossReference
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Confidence level for a provenance hint.
|
||||
/// </summary>
|
||||
public enum HintConfidence
|
||||
{
|
||||
/// <summary>Very high confidence (>= 0.9).</summary>
|
||||
VeryHigh,
|
||||
|
||||
/// <summary>High confidence (0.7 - 0.9).</summary>
|
||||
High,
|
||||
|
||||
/// <summary>Medium confidence (0.5 - 0.7).</summary>
|
||||
Medium,
|
||||
|
||||
/// <summary>Low confidence (0.3 - 0.5).</summary>
|
||||
Low,
|
||||
|
||||
/// <summary>Very low confidence (< 0.3).</summary>
|
||||
VeryLow
|
||||
}
|
||||
@@ -143,6 +143,20 @@ public sealed record Unknown
|
||||
/// <summary>When this record was last updated.</summary>
|
||||
public DateTimeOffset UpdatedAt { get; init; }
|
||||
|
||||
// Provenance Hints
|
||||
|
||||
/// <summary>Structured provenance hints about this unknown's identity.</summary>
|
||||
public IReadOnlyList<ProvenanceHint> ProvenanceHints { get; init; } = [];
|
||||
|
||||
/// <summary>Best hypothesis based on hints (highest confidence).</summary>
|
||||
public string? BestHypothesis { get; init; }
|
||||
|
||||
/// <summary>Combined confidence from all hints.</summary>
|
||||
public double? CombinedConfidence { get; init; }
|
||||
|
||||
/// <summary>Primary suggested action (highest priority).</summary>
|
||||
public string? PrimarySuggestedAction { get; init; }
|
||||
|
||||
// Computed properties
|
||||
|
||||
/// <summary>Whether this unknown is currently open (valid and not superseded).</summary>
|
||||
|
||||
@@ -190,6 +190,27 @@ public interface IUnknownRepository
|
||||
Task<IReadOnlyList<TriageSummary>> GetTriageSummaryAsync(
|
||||
string tenantId,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Attaches provenance hints to an unknown.
|
||||
/// </summary>
|
||||
Task<Unknown> AttachProvenanceHintsAsync(
|
||||
string tenantId,
|
||||
Guid id,
|
||||
IReadOnlyList<ProvenanceHint> hints,
|
||||
string? bestHypothesis,
|
||||
double? combinedConfidence,
|
||||
string? primarySuggestedAction,
|
||||
CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Gets unknowns with provenance hints above a confidence threshold.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<Unknown>> GetWithHighConfidenceHintsAsync(
|
||||
string tenantId,
|
||||
double minConfidence = 0.7,
|
||||
int? limit = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -0,0 +1,316 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"$id": "https://stellaops.org/schemas/provenance-hint.schema.json",
|
||||
"title": "ProvenanceHint",
|
||||
"description": "A provenance hint providing evidence about an unknown's identity",
|
||||
"type": "object",
|
||||
"required": [
|
||||
"hint_id",
|
||||
"type",
|
||||
"confidence",
|
||||
"confidence_level",
|
||||
"summary",
|
||||
"hypothesis",
|
||||
"evidence",
|
||||
"suggested_actions",
|
||||
"generated_at",
|
||||
"source"
|
||||
],
|
||||
"properties": {
|
||||
"hint_id": {
|
||||
"type": "string",
|
||||
"pattern": "^hint:sha256:[0-9a-f]{24}$",
|
||||
"description": "Content-addressed unique identifier"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"BuildIdMatch",
|
||||
"DebugLink",
|
||||
"ImportTableFingerprint",
|
||||
"ExportTableFingerprint",
|
||||
"SectionLayout",
|
||||
"StringTableSignature",
|
||||
"CompilerSignature",
|
||||
"PackageMetadata",
|
||||
"DistroPattern",
|
||||
"VersionString",
|
||||
"SymbolPattern",
|
||||
"PathPattern",
|
||||
"CorpusMatch",
|
||||
"SbomCrossReference",
|
||||
"AdvisoryCrossReference"
|
||||
],
|
||||
"description": "Type of provenance hint"
|
||||
},
|
||||
"confidence": {
|
||||
"type": "number",
|
||||
"minimum": 0.0,
|
||||
"maximum": 1.0,
|
||||
"description": "Confidence score (0.0 - 1.0)"
|
||||
},
|
||||
"confidence_level": {
|
||||
"type": "string",
|
||||
"enum": ["VeryHigh", "High", "Medium", "Low", "VeryLow"],
|
||||
"description": "Categorical confidence level"
|
||||
},
|
||||
"summary": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"description": "Human-readable summary of the hint"
|
||||
},
|
||||
"hypothesis": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"description": "Hypothesis about the unknown's identity"
|
||||
},
|
||||
"evidence": {
|
||||
"$ref": "#/definitions/ProvenanceEvidence"
|
||||
},
|
||||
"suggested_actions": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/SuggestedAction"
|
||||
},
|
||||
"minItems": 1,
|
||||
"description": "Suggested resolution actions ordered by priority"
|
||||
},
|
||||
"generated_at": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "When this hint was generated (UTC)"
|
||||
},
|
||||
"source": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"description": "Source of the hint (analyzer, corpus, etc.)"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
||||
"definitions": {
|
||||
"ProvenanceEvidence": {
|
||||
"type": "object",
|
||||
"description": "Type-specific evidence (only one field should be populated)",
|
||||
"properties": {
|
||||
"build_id": { "$ref": "#/definitions/BuildIdEvidence" },
|
||||
"debug_link": { "$ref": "#/definitions/DebugLinkEvidence" },
|
||||
"import_fingerprint": { "$ref": "#/definitions/ImportFingerprintEvidence" },
|
||||
"export_fingerprint": { "$ref": "#/definitions/ExportFingerprintEvidence" },
|
||||
"section_layout": { "$ref": "#/definitions/SectionLayoutEvidence" },
|
||||
"compiler": { "$ref": "#/definitions/CompilerEvidence" },
|
||||
"distro_pattern": { "$ref": "#/definitions/DistroPatternEvidence" },
|
||||
"version_string": { "$ref": "#/definitions/VersionStringEvidence" },
|
||||
"corpus_match": { "$ref": "#/definitions/CorpusMatchEvidence" },
|
||||
"raw": {
|
||||
"type": "object",
|
||||
"description": "Raw evidence as JSON (for extensibility)"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
|
||||
"BuildIdEvidence": {
|
||||
"type": "object",
|
||||
"required": ["build_id", "build_id_type"],
|
||||
"properties": {
|
||||
"build_id": { "type": "string" },
|
||||
"build_id_type": { "type": "string" },
|
||||
"matched_package": { "type": "string" },
|
||||
"matched_version": { "type": "string" },
|
||||
"matched_distro": { "type": "string" },
|
||||
"catalog_source": { "type": "string" }
|
||||
}
|
||||
},
|
||||
|
||||
"DebugLinkEvidence": {
|
||||
"type": "object",
|
||||
"required": ["debug_link", "debug_info_found"],
|
||||
"properties": {
|
||||
"debug_link": { "type": "string" },
|
||||
"crc32": { "type": "integer", "minimum": 0 },
|
||||
"debug_info_found": { "type": "boolean" },
|
||||
"debug_info_path": { "type": "string" }
|
||||
}
|
||||
},
|
||||
|
||||
"ImportFingerprintEvidence": {
|
||||
"type": "object",
|
||||
"required": ["fingerprint", "imported_libraries", "import_count"],
|
||||
"properties": {
|
||||
"fingerprint": { "type": "string" },
|
||||
"imported_libraries": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" }
|
||||
},
|
||||
"import_count": { "type": "integer", "minimum": 0 },
|
||||
"matched_fingerprints": {
|
||||
"type": "array",
|
||||
"items": { "$ref": "#/definitions/FingerprintMatch" }
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
"ExportFingerprintEvidence": {
|
||||
"type": "object",
|
||||
"required": ["fingerprint", "export_count"],
|
||||
"properties": {
|
||||
"fingerprint": { "type": "string" },
|
||||
"export_count": { "type": "integer", "minimum": 0 },
|
||||
"notable_exports": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" }
|
||||
},
|
||||
"matched_fingerprints": {
|
||||
"type": "array",
|
||||
"items": { "$ref": "#/definitions/FingerprintMatch" }
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
"FingerprintMatch": {
|
||||
"type": "object",
|
||||
"required": ["package", "version", "similarity", "source"],
|
||||
"properties": {
|
||||
"package": { "type": "string" },
|
||||
"version": { "type": "string" },
|
||||
"similarity": { "type": "number", "minimum": 0, "maximum": 1 },
|
||||
"source": { "type": "string" }
|
||||
}
|
||||
},
|
||||
|
||||
"SectionLayoutEvidence": {
|
||||
"type": "object",
|
||||
"required": ["sections", "layout_hash"],
|
||||
"properties": {
|
||||
"sections": {
|
||||
"type": "array",
|
||||
"items": { "$ref": "#/definitions/SectionInfo" }
|
||||
},
|
||||
"layout_hash": { "type": "string" },
|
||||
"matched_layouts": {
|
||||
"type": "array",
|
||||
"items": { "$ref": "#/definitions/LayoutMatch" }
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
"SectionInfo": {
|
||||
"type": "object",
|
||||
"required": ["name", "type", "size"],
|
||||
"properties": {
|
||||
"name": { "type": "string" },
|
||||
"type": { "type": "string" },
|
||||
"size": { "type": "integer", "minimum": 0 },
|
||||
"flags": { "type": "string" }
|
||||
}
|
||||
},
|
||||
|
||||
"LayoutMatch": {
|
||||
"type": "object",
|
||||
"required": ["package", "similarity"],
|
||||
"properties": {
|
||||
"package": { "type": "string" },
|
||||
"similarity": { "type": "number", "minimum": 0, "maximum": 1 }
|
||||
}
|
||||
},
|
||||
|
||||
"CompilerEvidence": {
|
||||
"type": "object",
|
||||
"required": ["compiler", "detection_method"],
|
||||
"properties": {
|
||||
"compiler": { "type": "string" },
|
||||
"version": { "type": "string" },
|
||||
"flags": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" }
|
||||
},
|
||||
"detection_method": { "type": "string" }
|
||||
}
|
||||
},
|
||||
|
||||
"DistroPatternEvidence": {
|
||||
"type": "object",
|
||||
"required": ["distro", "pattern_type", "matched_pattern"],
|
||||
"properties": {
|
||||
"distro": { "type": "string" },
|
||||
"release": { "type": "string" },
|
||||
"pattern_type": { "type": "string" },
|
||||
"matched_pattern": { "type": "string" },
|
||||
"examples": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" }
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
"VersionStringEvidence": {
|
||||
"type": "object",
|
||||
"required": ["version_strings"],
|
||||
"properties": {
|
||||
"version_strings": {
|
||||
"type": "array",
|
||||
"items": { "$ref": "#/definitions/ExtractedVersionString" }
|
||||
},
|
||||
"best_guess": { "type": "string" }
|
||||
}
|
||||
},
|
||||
|
||||
"ExtractedVersionString": {
|
||||
"type": "object",
|
||||
"required": ["value", "location", "confidence"],
|
||||
"properties": {
|
||||
"value": { "type": "string" },
|
||||
"location": { "type": "string" },
|
||||
"confidence": { "type": "number", "minimum": 0, "maximum": 1 }
|
||||
}
|
||||
},
|
||||
|
||||
"CorpusMatchEvidence": {
|
||||
"type": "object",
|
||||
"required": ["corpus_name", "matched_entry", "match_type", "similarity"],
|
||||
"properties": {
|
||||
"corpus_name": { "type": "string" },
|
||||
"matched_entry": { "type": "string" },
|
||||
"match_type": { "type": "string" },
|
||||
"similarity": { "type": "number", "minimum": 0, "maximum": 1 },
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": { "type": "string" }
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
"SuggestedAction": {
|
||||
"type": "object",
|
||||
"required": ["action", "priority", "effort", "description"],
|
||||
"properties": {
|
||||
"action": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"description": "Action identifier"
|
||||
},
|
||||
"priority": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"description": "Priority (1 = highest)"
|
||||
},
|
||||
"effort": {
|
||||
"type": "string",
|
||||
"enum": ["low", "medium", "high"],
|
||||
"description": "Estimated effort"
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"description": "Human-readable description"
|
||||
},
|
||||
"link": {
|
||||
"type": "string",
|
||||
"format": "uri",
|
||||
"description": "Optional link to documentation or tool"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
using StellaOps.Unknowns.Core.Hints;
|
||||
|
||||
namespace StellaOps.Unknowns.Core;
|
||||
|
||||
/// <summary>
|
||||
/// Dependency injection extensions for the Unknowns.Core library.
|
||||
/// </summary>
|
||||
public static class UnknownsServiceExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Registers provenance hint builder services.
|
||||
/// </summary>
|
||||
public static IServiceCollection AddProvenanceHintBuilder(
|
||||
this IServiceCollection services)
|
||||
{
|
||||
services.TryAddSingleton<IProvenanceHintBuilder, ProvenanceHintBuilder>();
|
||||
services.TryAddSingleton(TimeProvider.System);
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
-- Unknowns Schema Migration 002: Provenance Hints
|
||||
-- Category: A (safe, can run at startup)
|
||||
--
|
||||
-- Purpose: Add support for structured provenance hints that explain why
|
||||
-- something is unknown and provide hypotheses for resolution.
|
||||
--
|
||||
-- Implements SPRINT_20260106_001_005_UNKNOWNS requirements:
|
||||
-- - Store provenance hints as JSONB array
|
||||
-- - Track best hypothesis and combined confidence
|
||||
-- - Enable efficient querying by confidence threshold
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- ============================================================================
|
||||
-- Step 1: Add provenance hint columns to unknowns table
|
||||
-- ============================================================================
|
||||
|
||||
ALTER TABLE IF EXISTS unknowns.unknowns
|
||||
ADD COLUMN IF NOT EXISTS provenance_hints JSONB DEFAULT '[]'::jsonb NOT NULL,
|
||||
ADD COLUMN IF NOT EXISTS best_hypothesis TEXT,
|
||||
ADD COLUMN IF NOT EXISTS combined_confidence NUMERIC(4,4) CHECK (combined_confidence IS NULL OR (combined_confidence >= 0 AND combined_confidence <= 1)),
|
||||
ADD COLUMN IF NOT EXISTS primary_suggested_action TEXT;
|
||||
|
||||
COMMENT ON COLUMN unknowns.unknowns.provenance_hints IS
|
||||
'Array of structured provenance hints (ProvenanceHint records)';
|
||||
|
||||
COMMENT ON COLUMN unknowns.unknowns.best_hypothesis IS
|
||||
'Best hypothesis from all hints (highest confidence)';
|
||||
|
||||
COMMENT ON COLUMN unknowns.unknowns.combined_confidence IS
|
||||
'Combined confidence score from all hints (0.0 - 1.0)';
|
||||
|
||||
COMMENT ON COLUMN unknowns.unknowns.primary_suggested_action IS
|
||||
'Primary suggested action (highest priority)';
|
||||
|
||||
-- ============================================================================
|
||||
-- Step 2: Create GIN index for efficient hint querying
|
||||
-- ============================================================================
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_unknowns_provenance_hints_gin
|
||||
ON unknowns.unknowns USING GIN (provenance_hints);
|
||||
|
||||
COMMENT ON INDEX unknowns.idx_unknowns_provenance_hints_gin IS
|
||||
'GIN index for efficient JSONB queries on provenance hints';
|
||||
|
||||
-- ============================================================================
|
||||
-- Step 3: Create index for high-confidence hint queries
|
||||
-- ============================================================================
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_unknowns_combined_confidence
|
||||
ON unknowns.unknowns (tenant_id, combined_confidence DESC)
|
||||
WHERE combined_confidence IS NOT NULL AND combined_confidence >= 0.7;
|
||||
|
||||
COMMENT ON INDEX unknowns.idx_unknowns_combined_confidence IS
|
||||
'Partial index for high-confidence provenance hint queries';
|
||||
|
||||
-- ============================================================================
|
||||
-- Step 4: JSON schema validation function (optional)
|
||||
-- ============================================================================
|
||||
|
||||
CREATE OR REPLACE FUNCTION unknowns.validate_provenance_hints(hints JSONB)
|
||||
RETURNS BOOLEAN
|
||||
LANGUAGE plpgsql IMMUTABLE
|
||||
AS $$
|
||||
BEGIN
|
||||
-- Basic validation: must be an array
|
||||
IF jsonb_typeof(hints) != 'array' THEN
|
||||
RETURN FALSE;
|
||||
END IF;
|
||||
|
||||
-- Each element must have required fields
|
||||
IF EXISTS (
|
||||
SELECT 1
|
||||
FROM jsonb_array_elements(hints) AS hint
|
||||
WHERE NOT (
|
||||
hint ? 'hint_id' AND
|
||||
hint ? 'type' AND
|
||||
hint ? 'confidence' AND
|
||||
hint ? 'hypothesis' AND
|
||||
hint ? 'evidence'
|
||||
)
|
||||
) THEN
|
||||
RETURN FALSE;
|
||||
END IF;
|
||||
|
||||
RETURN TRUE;
|
||||
END;
|
||||
$$;
|
||||
|
||||
COMMENT ON FUNCTION unknowns.validate_provenance_hints IS
|
||||
'Validates that provenance_hints JSONB conforms to expected schema';
|
||||
|
||||
-- ============================================================================
|
||||
-- Step 5: Add validation constraint
|
||||
-- ============================================================================
|
||||
|
||||
ALTER TABLE IF EXISTS unknowns.unknowns
|
||||
ADD CONSTRAINT chk_provenance_hints_valid
|
||||
CHECK (unknowns.validate_provenance_hints(provenance_hints));
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,215 @@
|
||||
using StellaOps.Unknowns.Core.Hints;
|
||||
using StellaOps.Unknowns.Core.Models;
|
||||
using Xunit;
|
||||
using FluentAssertions;
|
||||
|
||||
namespace StellaOps.Unknowns.Core.Tests.Hints;
|
||||
|
||||
/// <summary>
|
||||
/// Tests for hint combination logic and confidence aggregation.
|
||||
/// </summary>
|
||||
public sealed class HintCombinationTests
|
||||
{
|
||||
private readonly ProvenanceHintBuilder _builder = new(TimeProvider.System);
|
||||
|
||||
[Fact]
|
||||
public void CombineHints_EmptyList_ReturnsZeroConfidence()
|
||||
{
|
||||
// Act
|
||||
var (hypothesis, confidence) = _builder.CombineHints([]);
|
||||
|
||||
// Assert
|
||||
hypothesis.Should().Be("No provenance hints available");
|
||||
confidence.Should().Be(0.0);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CombineHints_SingleHighConfidenceHint_ReturnsHypothesisAndConfidence()
|
||||
{
|
||||
// Arrange
|
||||
var hints = new[]
|
||||
{
|
||||
CreateBuildIdHint("openssl", 0.95)
|
||||
};
|
||||
|
||||
// Act
|
||||
var (hypothesis, confidence) = _builder.CombineHints(hints);
|
||||
|
||||
// Assert
|
||||
hypothesis.Should().Contain("openssl");
|
||||
confidence.Should().Be(0.95);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CombineHints_MultipleAgreeingHints_BoostsConfidence()
|
||||
{
|
||||
// Arrange - all hints point to same package
|
||||
var hints = new[]
|
||||
{
|
||||
CreateBuildIdHint("openssl", 0.85),
|
||||
CreateImportHint("openssl", 0.80),
|
||||
CreateVersionHint("openssl", 0.70)
|
||||
};
|
||||
|
||||
// Act
|
||||
var (hypothesis, confidence) = _builder.CombineHints(hints);
|
||||
|
||||
// Assert
|
||||
confidence.Should().BeGreaterThan(0.85); // Boosted from multiple agreeing hints
|
||||
hypothesis.Should().Contain("confirmed by");
|
||||
hypothesis.Should().Contain("3 evidence sources");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CombineHints_MultipleDisagreeingHints_UsesBestSingleHint()
|
||||
{
|
||||
// Arrange - hints point to different packages
|
||||
var hints = new[]
|
||||
{
|
||||
CreateBuildIdHint("openssl", 0.95),
|
||||
CreateImportHint("curl", 0.80),
|
||||
CreateVersionHint("wget", 0.70)
|
||||
};
|
||||
|
||||
// Act
|
||||
var (hypothesis, confidence) = _builder.CombineHints(hints);
|
||||
|
||||
// Assert
|
||||
confidence.Should().Be(0.95); // Highest single hint
|
||||
hypothesis.Should().Contain("openssl"); // Best match
|
||||
hypothesis.Should().NotContain("confirmed by"); // No agreement
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CombineHints_TwoAgreeingHighConfidence_CombinesConfidence()
|
||||
{
|
||||
// Arrange
|
||||
var hints = new[]
|
||||
{
|
||||
CreateBuildIdHint("curl", 0.90),
|
||||
CreateVersionHint("curl", 0.75)
|
||||
};
|
||||
|
||||
// Act
|
||||
var (hypothesis, confidence) = _builder.CombineHints(hints);
|
||||
|
||||
// Assert
|
||||
confidence.Should().BeGreaterThan(0.90);
|
||||
confidence.Should().BeLessThan(1.0); // Capped at 0.99
|
||||
hypothesis.Should().Contain("confirmed by");
|
||||
hypothesis.Should().Contain("2 evidence sources");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CombineHints_OneLowConfidenceOneHigh_UsesHighConfidenceOnly()
|
||||
{
|
||||
// Arrange
|
||||
var hints = new[]
|
||||
{
|
||||
CreateBuildIdHint("openssl", 0.95),
|
||||
CreateVersionHint("openssl", 0.25) // Below 0.5 threshold
|
||||
};
|
||||
|
||||
// Act
|
||||
var (hypothesis, confidence) = _builder.CombineHints(hints);
|
||||
|
||||
// Assert
|
||||
confidence.Should().Be(0.95); // Only high-confidence hint used
|
||||
hypothesis.Should().NotContain("confirmed by"); // Low confidence ignored
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CombineHints_ThreeAgreeingHints_DoesNotExceed099()
|
||||
{
|
||||
// Arrange - many agreeing high-confidence hints
|
||||
var hints = new[]
|
||||
{
|
||||
CreateBuildIdHint("nginx", 0.95),
|
||||
CreateImportHint("nginx", 0.92),
|
||||
CreateVersionHint("nginx", 0.88),
|
||||
CreateCorpusHint("nginx", 0.85)
|
||||
};
|
||||
|
||||
// Act
|
||||
var (hypothesis, confidence) = _builder.CombineHints(hints);
|
||||
|
||||
// Assert
|
||||
confidence.Should().BeLessThanOrEqualTo(0.99);
|
||||
hypothesis.Should().Contain("confirmed by");
|
||||
hypothesis.Should().Contain("4 evidence sources");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CombineHints_MixedConfidencesSamePackage_CountsOnlyHighConfidence()
|
||||
{
|
||||
// Arrange
|
||||
var hints = new[]
|
||||
{
|
||||
CreateBuildIdHint("bash", 0.90), // High
|
||||
CreateImportHint("bash", 0.60), // Medium
|
||||
CreateVersionHint("bash", 0.30) // Low (excluded)
|
||||
};
|
||||
|
||||
// Act
|
||||
var (hypothesis, confidence) = _builder.CombineHints(hints);
|
||||
|
||||
// Assert
|
||||
hypothesis.Should().Contain("confirmed by");
|
||||
hypothesis.Should().Contain("2 evidence sources"); // Only high+medium
|
||||
}
|
||||
|
||||
// Helper methods to create test hints
|
||||
|
||||
private ProvenanceHint CreateBuildIdHint(string package, double confidence)
|
||||
{
|
||||
var match = new BuildIdMatchResult
|
||||
{
|
||||
Package = package,
|
||||
Version = "1.0.0",
|
||||
Distro = "debian"
|
||||
};
|
||||
|
||||
return _builder.BuildFromBuildId("test-build-id", "sha1", match);
|
||||
}
|
||||
|
||||
private ProvenanceHint CreateImportHint(string package, double similarity)
|
||||
{
|
||||
var matches = new[]
|
||||
{
|
||||
new FingerprintMatch
|
||||
{
|
||||
Package = package,
|
||||
Version = "1.0.0",
|
||||
Similarity = similarity,
|
||||
Source = "test-corpus"
|
||||
}
|
||||
};
|
||||
|
||||
return _builder.BuildFromImportFingerprint("fp-test", new[] { "lib1.so" }, matches);
|
||||
}
|
||||
|
||||
private ProvenanceHint CreateVersionHint(string package, double confidence)
|
||||
{
|
||||
var versionStrings = new[]
|
||||
{
|
||||
new ExtractedVersionString
|
||||
{
|
||||
Value = $"{package} 1.0.0",
|
||||
Location = ".rodata",
|
||||
Confidence = confidence
|
||||
}
|
||||
};
|
||||
|
||||
return _builder.BuildFromVersionStrings(versionStrings);
|
||||
}
|
||||
|
||||
private ProvenanceHint CreateCorpusHint(string package, double similarity)
|
||||
{
|
||||
return _builder.BuildFromCorpusMatch(
|
||||
"test-corpus",
|
||||
$"{package}/1.0.0",
|
||||
"hash",
|
||||
similarity,
|
||||
null);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,281 @@
|
||||
using StellaOps.Unknowns.Core.Hints;
|
||||
using StellaOps.Unknowns.Core.Models;
|
||||
using Xunit;
|
||||
using FluentAssertions;
|
||||
|
||||
namespace StellaOps.Unknowns.Core.Tests.Hints;
|
||||
|
||||
/// <summary>
|
||||
/// Tests for ProvenanceHintBuilder - all hint building scenarios.
|
||||
/// </summary>
|
||||
public sealed class ProvenanceHintBuilderTests
|
||||
{
|
||||
private readonly ProvenanceHintBuilder _builder = new(TimeProvider.System);
|
||||
|
||||
[Fact]
|
||||
public void BuildFromBuildId_WithMatch_CreatesVeryHighConfidenceHint()
|
||||
{
|
||||
// Arrange
|
||||
var match = new BuildIdMatchResult
|
||||
{
|
||||
Package = "openssl",
|
||||
Version = "1.1.1k",
|
||||
Distro = "debian",
|
||||
CatalogSource = "debian-security"
|
||||
};
|
||||
|
||||
// Act
|
||||
var hint = _builder.BuildFromBuildId("abc123", "sha1", match);
|
||||
|
||||
// Assert
|
||||
hint.Type.Should().Be(ProvenanceHintType.BuildIdMatch);
|
||||
hint.Confidence.Should().Be(0.95);
|
||||
hint.ConfidenceLevel.Should().Be(HintConfidence.VeryHigh);
|
||||
hint.Hypothesis.Should().Contain("openssl");
|
||||
hint.Hypothesis.Should().Contain("1.1.1k");
|
||||
hint.Hypothesis.Should().Contain("debian");
|
||||
hint.Evidence.BuildId.Should().NotBeNull();
|
||||
hint.Evidence.BuildId!.BuildId.Should().Be("abc123");
|
||||
hint.Evidence.BuildId.MatchedPackage.Should().Be("openssl");
|
||||
hint.SuggestedActions.Should().HaveCountGreaterOrEqualTo(1);
|
||||
hint.SuggestedActions[0].Action.Should().Be("verify_build_id");
|
||||
hint.HintId.Should().StartWith("hint:sha256:");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BuildFromBuildId_WithoutMatch_CreatesLowConfidenceHint()
|
||||
{
|
||||
// Act
|
||||
var hint = _builder.BuildFromBuildId("unknown123", "sha1", null);
|
||||
|
||||
// Assert
|
||||
hint.Confidence.Should().Be(0.2);
|
||||
hint.ConfidenceLevel.Should().Be(HintConfidence.VeryLow);
|
||||
hint.Hypothesis.Should().Contain("no catalog match");
|
||||
hint.Evidence.BuildId!.MatchedPackage.Should().BeNull();
|
||||
hint.SuggestedActions.Should().Contain(a => a.Action == "expand_catalog");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BuildFromImportFingerprint_WithMatch_IncludesMatchedPackage()
|
||||
{
|
||||
// Arrange
|
||||
var matches = new[]
|
||||
{
|
||||
new FingerprintMatch
|
||||
{
|
||||
Package = "libc6",
|
||||
Version = "2.31",
|
||||
Similarity = 0.92,
|
||||
Source = "debian-corpus"
|
||||
}
|
||||
};
|
||||
|
||||
var imports = new[] { "libc.so.6", "libpthread.so.0" };
|
||||
|
||||
// Act
|
||||
var hint = _builder.BuildFromImportFingerprint("fp-abc", imports, matches);
|
||||
|
||||
// Assert
|
||||
hint.Type.Should().Be(ProvenanceHintType.ImportTableFingerprint);
|
||||
hint.Confidence.Should().Be(0.92);
|
||||
hint.ConfidenceLevel.Should().Be(HintConfidence.VeryHigh);
|
||||
hint.Hypothesis.Should().Contain("libc6");
|
||||
hint.Hypothesis.Should().Contain("2.31");
|
||||
hint.Evidence.ImportFingerprint.Should().NotBeNull();
|
||||
hint.Evidence.ImportFingerprint!.ImportedLibraries.Should().HaveCount(2);
|
||||
hint.Evidence.ImportFingerprint.MatchedFingerprints.Should().HaveCount(1);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BuildFromImportFingerprint_WithoutMatch_CreatesMediumConfidenceHint()
|
||||
{
|
||||
// Arrange
|
||||
var imports = new[] { "unknown.so.1" };
|
||||
|
||||
// Act
|
||||
var hint = _builder.BuildFromImportFingerprint("fp-xyz", imports, null);
|
||||
|
||||
// Assert
|
||||
hint.Confidence.Should().Be(0.3);
|
||||
hint.ConfidenceLevel.Should().Be(HintConfidence.Low);
|
||||
hint.Hypothesis.Should().Contain("fp-xyz");
|
||||
hint.Evidence.ImportFingerprint!.MatchedFingerprints.Should().BeNull();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BuildFromSectionLayout_WithMatch_IncludesSimilarity()
|
||||
{
|
||||
// Arrange
|
||||
var sections = new[]
|
||||
{
|
||||
new SectionInfo { Name = ".text", Type = "PROGBITS", Size = 0x1000 },
|
||||
new SectionInfo { Name = ".data", Type = "PROGBITS", Size = 0x200 }
|
||||
};
|
||||
|
||||
var matches = new[]
|
||||
{
|
||||
new LayoutMatch { Package = "bash", Similarity = 0.88 }
|
||||
};
|
||||
|
||||
// Act
|
||||
var hint = _builder.BuildFromSectionLayout(sections, matches);
|
||||
|
||||
// Assert
|
||||
hint.Type.Should().Be(ProvenanceHintType.SectionLayout);
|
||||
hint.Confidence.Should().Be(0.88);
|
||||
hint.ConfidenceLevel.Should().Be(HintConfidence.VeryHigh);
|
||||
hint.Hypothesis.Should().Contain("bash");
|
||||
hint.Evidence.SectionLayout.Should().NotBeNull();
|
||||
hint.Evidence.SectionLayout!.Sections.Should().HaveCount(2);
|
||||
hint.Evidence.SectionLayout.LayoutHash.Should().NotBeNullOrEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BuildFromDistroPattern_IncludesDistroAndRelease()
|
||||
{
|
||||
// Act
|
||||
var hint = _builder.BuildFromDistroPattern("debian", "bullseye", "rpath", "/usr/lib/x86_64-linux-gnu");
|
||||
|
||||
// Assert
|
||||
hint.Type.Should().Be(ProvenanceHintType.DistroPattern);
|
||||
hint.Confidence.Should().Be(0.7);
|
||||
hint.ConfidenceLevel.Should().Be(HintConfidence.High);
|
||||
hint.Hypothesis.Should().Contain("debian");
|
||||
hint.Hypothesis.Should().Contain("bullseye");
|
||||
hint.Evidence.DistroPattern.Should().NotBeNull();
|
||||
hint.Evidence.DistroPattern!.Distro.Should().Be("debian");
|
||||
hint.Evidence.DistroPattern.Release.Should().Be("bullseye");
|
||||
hint.SuggestedActions[0].Link.Should().NotBeNull();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BuildFromVersionStrings_WithMultipleStrings_SelectsBestGuess()
|
||||
{
|
||||
// Arrange
|
||||
var versionStrings = new[]
|
||||
{
|
||||
new ExtractedVersionString { Value = "1.2.3", Location = ".rodata", Confidence = 0.8 },
|
||||
new ExtractedVersionString { Value = "1.2", Location = ".comment", Confidence = 0.5 }
|
||||
};
|
||||
|
||||
// Act
|
||||
var hint = _builder.BuildFromVersionStrings(versionStrings);
|
||||
|
||||
// Assert
|
||||
hint.Type.Should().Be(ProvenanceHintType.VersionString);
|
||||
hint.Confidence.Should().Be(0.8);
|
||||
hint.ConfidenceLevel.Should().Be(HintConfidence.High);
|
||||
hint.Hypothesis.Should().Contain("1.2.3");
|
||||
hint.Evidence.VersionString.Should().NotBeNull();
|
||||
hint.Evidence.VersionString!.BestGuess.Should().Be("1.2.3");
|
||||
hint.Evidence.VersionString.VersionStrings.Should().HaveCount(2);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BuildFromCorpusMatch_HighSimilarity_CreatesVeryHighConfidence()
|
||||
{
|
||||
// Act
|
||||
var hint = _builder.BuildFromCorpusMatch(
|
||||
"debian-packages",
|
||||
"curl/7.68.0",
|
||||
"hash",
|
||||
0.95,
|
||||
new Dictionary<string, string> { ["arch"] = "amd64" });
|
||||
|
||||
// Assert
|
||||
hint.Type.Should().Be(ProvenanceHintType.CorpusMatch);
|
||||
hint.Confidence.Should().Be(0.95);
|
||||
hint.ConfidenceLevel.Should().Be(HintConfidence.VeryHigh);
|
||||
hint.Hypothesis.Should().Contain("High confidence match");
|
||||
hint.Hypothesis.Should().Contain("curl/7.68.0");
|
||||
hint.Evidence.CorpusMatch.Should().NotBeNull();
|
||||
hint.Evidence.CorpusMatch!.CorpusName.Should().Be("debian-packages");
|
||||
hint.Evidence.CorpusMatch.Metadata.Should().ContainKey("arch");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CombineHints_NoHints_ReturnsZeroConfidence()
|
||||
{
|
||||
// Act
|
||||
var (hypothesis, confidence) = _builder.CombineHints([]);
|
||||
|
||||
// Assert
|
||||
hypothesis.Should().Contain("No provenance hints");
|
||||
confidence.Should().Be(0.0);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CombineHints_SingleHint_ReturnsBestHypothesis()
|
||||
{
|
||||
// Arrange
|
||||
var hints = new[]
|
||||
{
|
||||
_builder.BuildFromBuildId("abc123", "sha1", new BuildIdMatchResult
|
||||
{
|
||||
Package = "openssl",
|
||||
Version = "1.1.1k",
|
||||
Distro = "debian"
|
||||
})
|
||||
};
|
||||
|
||||
// Act
|
||||
var (hypothesis, confidence) = _builder.CombineHints(hints);
|
||||
|
||||
// Assert
|
||||
hypothesis.Should().Contain("openssl");
|
||||
confidence.Should().Be(0.95);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CombineHints_MultipleAgreeingHints_BoostsConfidence()
|
||||
{
|
||||
// Arrange
|
||||
var buildIdMatch = new BuildIdMatchResult
|
||||
{
|
||||
Package = "openssl",
|
||||
Version = "1.1.1k",
|
||||
Distro = "debian"
|
||||
};
|
||||
|
||||
var hints = new[]
|
||||
{
|
||||
_builder.BuildFromBuildId("abc123", "sha1", buildIdMatch),
|
||||
_builder.BuildFromDistroPattern("debian", "bullseye", "rpath", "/usr/lib"),
|
||||
_builder.BuildFromVersionStrings(new[]
|
||||
{
|
||||
new ExtractedVersionString { Value = "1.1.1k", Location = ".rodata", Confidence = 0.7 }
|
||||
})
|
||||
};
|
||||
|
||||
// Act
|
||||
var (hypothesis, confidence) = _builder.CombineHints(hints);
|
||||
|
||||
// Assert
|
||||
confidence.Should().BeGreaterThan(0.95); // Boosted from multiple agreeing hints
|
||||
hypothesis.Should().Contain("confirmed by");
|
||||
hypothesis.Should().Contain("evidence sources");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HintId_IsContentAddressed_DeterministicForSameInput()
|
||||
{
|
||||
// Arrange & Act
|
||||
var hint1 = _builder.BuildFromBuildId("abc123", "sha1", null);
|
||||
var hint2 = _builder.BuildFromBuildId("abc123", "sha1", null);
|
||||
|
||||
// Assert
|
||||
hint1.HintId.Should().Be(hint2.HintId);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HintId_IsDifferent_ForDifferentInput()
|
||||
{
|
||||
// Arrange & Act
|
||||
var hint1 = _builder.BuildFromBuildId("abc123", "sha1", null);
|
||||
var hint2 = _builder.BuildFromBuildId("xyz789", "sha1", null);
|
||||
|
||||
// Assert
|
||||
hint1.HintId.Should().NotBe(hint2.HintId);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,299 @@
|
||||
using System.Text.Json;
|
||||
using StellaOps.Unknowns.Core.Hints;
|
||||
using StellaOps.Unknowns.Core.Models;
|
||||
using Xunit;
|
||||
using FluentAssertions;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace StellaOps.Unknowns.Core.Tests.Hints;
|
||||
|
||||
/// <summary>
|
||||
/// Golden fixture tests for provenance hint serialization.
|
||||
/// Ensures stable JSON output for cross-service compatibility.
|
||||
/// </summary>
|
||||
public sealed class ProvenanceHintSerializationTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
WriteIndented = false,
|
||||
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
|
||||
};
|
||||
|
||||
private readonly ProvenanceHintBuilder _builder = new(new FrozenTimeProvider());
|
||||
|
||||
[Fact]
|
||||
public void BuildIdHint_Serialization_ProducesExpectedJson()
|
||||
{
|
||||
// Arrange
|
||||
var match = new BuildIdMatchResult
|
||||
{
|
||||
Package = "openssl",
|
||||
Version = "1.1.1k",
|
||||
Distro = "debian",
|
||||
CatalogSource = "debian-security"
|
||||
};
|
||||
|
||||
var hint = _builder.BuildFromBuildId("abc123def456", "sha1", match);
|
||||
|
||||
// Act
|
||||
var json = JsonSerializer.Serialize(hint, JsonOptions);
|
||||
var deserialized = JsonSerializer.Deserialize<ProvenanceHint>(json, JsonOptions);
|
||||
|
||||
// Assert - round-trip
|
||||
deserialized.Should().NotBeNull();
|
||||
deserialized!.Type.Should().Be(ProvenanceHintType.BuildIdMatch);
|
||||
deserialized.Confidence.Should().Be(0.95);
|
||||
deserialized.ConfidenceLevel.Should().Be(HintConfidence.VeryHigh);
|
||||
deserialized.Evidence.BuildId.Should().NotBeNull();
|
||||
deserialized.Evidence.BuildId!.BuildId.Should().Be("abc123def456");
|
||||
deserialized.Evidence.BuildId.MatchedPackage.Should().Be("openssl");
|
||||
|
||||
// Assert - stable keys
|
||||
json.Should().Contain("\"hint_id\":");
|
||||
json.Should().Contain("\"type\":");
|
||||
json.Should().Contain("\"confidence\":");
|
||||
json.Should().Contain("\"confidence_level\":");
|
||||
json.Should().Contain("\"hypothesis\":");
|
||||
json.Should().Contain("\"evidence\":");
|
||||
json.Should().Contain("\"suggested_actions\":");
|
||||
json.Should().Contain("\"generated_at\":");
|
||||
json.Should().Contain("\"source\":");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ImportFingerprintHint_Serialization_RoundTripsCorrectly()
|
||||
{
|
||||
// Arrange
|
||||
var matches = new[]
|
||||
{
|
||||
new FingerprintMatch
|
||||
{
|
||||
Package = "libc6",
|
||||
Version = "2.31-13",
|
||||
Similarity = 0.92,
|
||||
Source = "debian-corpus"
|
||||
}
|
||||
};
|
||||
|
||||
var imports = new[] { "libc.so.6", "libpthread.so.0", "libdl.so.2" };
|
||||
var hint = _builder.BuildFromImportFingerprint("fp-abc123", imports, matches);
|
||||
|
||||
// Act
|
||||
var json = JsonSerializer.Serialize(hint, JsonOptions);
|
||||
var deserialized = JsonSerializer.Deserialize<ProvenanceHint>(json, JsonOptions);
|
||||
|
||||
// Assert
|
||||
deserialized.Should().NotBeNull();
|
||||
deserialized!.Evidence.ImportFingerprint.Should().NotBeNull();
|
||||
deserialized.Evidence.ImportFingerprint!.Fingerprint.Should().Be("fp-abc123");
|
||||
deserialized.Evidence.ImportFingerprint.ImportedLibraries.Should().HaveCount(3);
|
||||
deserialized.Evidence.ImportFingerprint.MatchedFingerprints.Should().HaveCount(1);
|
||||
deserialized.Evidence.ImportFingerprint.MatchedFingerprints![0].Package.Should().Be("libc6");
|
||||
deserialized.Evidence.ImportFingerprint.MatchedFingerprints[0].Similarity.Should().Be(0.92);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SectionLayoutHint_Serialization_PreservesAllSections()
|
||||
{
|
||||
// Arrange
|
||||
var sections = new[]
|
||||
{
|
||||
new SectionInfo { Name = ".text", Type = "PROGBITS", Size = 0x1000, Flags = "AX" },
|
||||
new SectionInfo { Name = ".data", Type = "PROGBITS", Size = 0x200, Flags = "WA" },
|
||||
new SectionInfo { Name = ".bss", Type = "NOBITS", Size = 0x100, Flags = "WA" }
|
||||
};
|
||||
|
||||
var matches = new[]
|
||||
{
|
||||
new LayoutMatch { Package = "bash", Similarity = 0.88 }
|
||||
};
|
||||
|
||||
var hint = _builder.BuildFromSectionLayout(sections, matches);
|
||||
|
||||
// Act
|
||||
var json = JsonSerializer.Serialize(hint, JsonOptions);
|
||||
var deserialized = JsonSerializer.Deserialize<ProvenanceHint>(json, JsonOptions);
|
||||
|
||||
// Assert
|
||||
deserialized.Should().NotBeNull();
|
||||
deserialized!.Evidence.SectionLayout.Should().NotBeNull();
|
||||
deserialized.Evidence.SectionLayout!.Sections.Should().HaveCount(3);
|
||||
deserialized.Evidence.SectionLayout.Sections[0].Name.Should().Be(".text");
|
||||
deserialized.Evidence.SectionLayout.Sections[0].Size.Should().Be(0x1000);
|
||||
deserialized.Evidence.SectionLayout.LayoutHash.Should().NotBeNullOrEmpty();
|
||||
deserialized.Evidence.SectionLayout.MatchedLayouts.Should().HaveCount(1);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DistroPatternHint_Serialization_IncludesAllFields()
|
||||
{
|
||||
// Arrange
|
||||
var hint = _builder.BuildFromDistroPattern(
|
||||
"debian",
|
||||
"bullseye",
|
||||
"rpath",
|
||||
"/usr/lib/x86_64-linux-gnu");
|
||||
|
||||
// Act
|
||||
var json = JsonSerializer.Serialize(hint, JsonOptions);
|
||||
var deserialized = JsonSerializer.Deserialize<ProvenanceHint>(json, JsonOptions);
|
||||
|
||||
// Assert
|
||||
deserialized.Should().NotBeNull();
|
||||
deserialized!.Evidence.DistroPattern.Should().NotBeNull();
|
||||
deserialized.Evidence.DistroPattern!.Distro.Should().Be("debian");
|
||||
deserialized.Evidence.DistroPattern.Release.Should().Be("bullseye");
|
||||
deserialized.Evidence.DistroPattern.PatternType.Should().Be("rpath");
|
||||
deserialized.Evidence.DistroPattern.MatchedPattern.Should().Be("/usr/lib/x86_64-linux-gnu");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void VersionStringHint_Serialization_PreservesAllVersionStrings()
|
||||
{
|
||||
// Arrange
|
||||
var versionStrings = new[]
|
||||
{
|
||||
new ExtractedVersionString { Value = "1.2.3", Location = ".rodata", Confidence = 0.8 },
|
||||
new ExtractedVersionString { Value = "1.2", Location = ".comment", Confidence = 0.5 },
|
||||
new ExtractedVersionString { Value = "v1.2.3-stable", Location = ".data", Confidence = 0.7 }
|
||||
};
|
||||
|
||||
var hint = _builder.BuildFromVersionStrings(versionStrings);
|
||||
|
||||
// Act
|
||||
var json = JsonSerializer.Serialize(hint, JsonOptions);
|
||||
var deserialized = JsonSerializer.Deserialize<ProvenanceHint>(json, JsonOptions);
|
||||
|
||||
// Assert
|
||||
deserialized.Should().NotBeNull();
|
||||
deserialized!.Evidence.VersionString.Should().NotBeNull();
|
||||
deserialized.Evidence.VersionString!.VersionStrings.Should().HaveCount(3);
|
||||
deserialized.Evidence.VersionString.BestGuess.Should().Be("1.2.3"); // Highest confidence
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CorpusMatchHint_Serialization_IncludesMetadata()
|
||||
{
|
||||
// Arrange
|
||||
var metadata = new Dictionary<string, string>
|
||||
{
|
||||
["arch"] = "amd64",
|
||||
["build_date"] = "2024-01-15",
|
||||
["compiler"] = "gcc-11.2.0"
|
||||
};
|
||||
|
||||
var hint = _builder.BuildFromCorpusMatch(
|
||||
"debian-packages",
|
||||
"curl/7.68.0",
|
||||
"hash",
|
||||
0.95,
|
||||
metadata);
|
||||
|
||||
// Act
|
||||
var json = JsonSerializer.Serialize(hint, JsonOptions);
|
||||
var deserialized = JsonSerializer.Deserialize<ProvenanceHint>(json, JsonOptions);
|
||||
|
||||
// Assert
|
||||
deserialized.Should().NotBeNull();
|
||||
deserialized!.Evidence.CorpusMatch.Should().NotBeNull();
|
||||
deserialized.Evidence.CorpusMatch!.CorpusName.Should().Be("debian-packages");
|
||||
deserialized.Evidence.CorpusMatch.MatchedEntry.Should().Be("curl/7.68.0");
|
||||
deserialized.Evidence.CorpusMatch.Similarity.Should().Be(0.95);
|
||||
deserialized.Evidence.CorpusMatch.Metadata.Should().NotBeNull();
|
||||
deserialized.Evidence.CorpusMatch.Metadata!["arch"].Should().Be("amd64");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SuggestedActions_Serialization_PreservesOrder()
|
||||
{
|
||||
// Arrange
|
||||
var match = new BuildIdMatchResult
|
||||
{
|
||||
Package = "test",
|
||||
Version = "1.0",
|
||||
Distro = "debian"
|
||||
};
|
||||
|
||||
var hint = _builder.BuildFromBuildId("test-id", "sha1", match);
|
||||
|
||||
// Act
|
||||
var json = JsonSerializer.Serialize(hint, JsonOptions);
|
||||
var deserialized = JsonSerializer.Deserialize<ProvenanceHint>(json, JsonOptions);
|
||||
|
||||
// Assert
|
||||
deserialized.Should().NotBeNull();
|
||||
deserialized!.SuggestedActions.Should().HaveCountGreaterOrEqualTo(1);
|
||||
deserialized.SuggestedActions[0].Action.Should().NotBeNullOrEmpty();
|
||||
deserialized.SuggestedActions[0].Priority.Should().BeGreaterThan(0);
|
||||
deserialized.SuggestedActions[0].Effort.Should().NotBeNullOrEmpty();
|
||||
deserialized.SuggestedActions[0].Description.Should().NotBeNullOrEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HintId_IsDeterministic_ForSameInput()
|
||||
{
|
||||
// Arrange & Act
|
||||
var hint1 = _builder.BuildFromBuildId("same-id", "sha1", null);
|
||||
var hint2 = _builder.BuildFromBuildId("same-id", "sha1", null);
|
||||
|
||||
var json1 = JsonSerializer.Serialize(hint1, JsonOptions);
|
||||
var json2 = JsonSerializer.Serialize(hint2, JsonOptions);
|
||||
|
||||
// Assert
|
||||
hint1.HintId.Should().Be(hint2.HintId);
|
||||
json1.Should().Contain(hint1.HintId);
|
||||
json2.Should().Contain(hint2.HintId);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GeneratedAt_UsesFixedTimestamp_InTests()
|
||||
{
|
||||
// Arrange
|
||||
var hint = _builder.BuildFromBuildId("test", "sha1", null);
|
||||
|
||||
// Act
|
||||
var json = JsonSerializer.Serialize(hint, JsonOptions);
|
||||
|
||||
// Assert
|
||||
hint.GeneratedAt.Should().Be(new DateTimeOffset(2025, 1, 1, 0, 0, 0, TimeSpan.Zero));
|
||||
json.Should().Contain("\"generated_at\":\"2025-01-01T00:00:00+00:00\"");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CompleteHint_JsonOutput_IsValid()
|
||||
{
|
||||
// Arrange
|
||||
var match = new BuildIdMatchResult
|
||||
{
|
||||
Package = "nginx",
|
||||
Version = "1.18.0-6",
|
||||
Distro = "debian",
|
||||
CatalogSource = "debian-security",
|
||||
AdvisoryLink = "https://security.debian.org/nginx"
|
||||
};
|
||||
|
||||
var hint = _builder.BuildFromBuildId("deadbeef0123456789abcdef", "sha256", match);
|
||||
|
||||
// Act
|
||||
var json = JsonSerializer.Serialize(hint, JsonOptions);
|
||||
|
||||
// Assert - JSON is parseable
|
||||
var parsed = JsonDocument.Parse(json);
|
||||
parsed.RootElement.GetProperty("hint_id").GetString().Should().StartWith("hint:sha256:");
|
||||
parsed.RootElement.GetProperty("type").GetString().Should().NotBeNullOrEmpty();
|
||||
parsed.RootElement.GetProperty("confidence").GetDouble().Should().BeInRange(0, 1);
|
||||
parsed.RootElement.GetProperty("evidence").GetProperty("build_id").GetProperty("catalog_source")
|
||||
.GetString().Should().Be("debian-security");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Frozen time provider for deterministic test timestamps.
|
||||
/// </summary>
|
||||
private sealed class FrozenTimeProvider : TimeProvider
|
||||
{
|
||||
private static readonly DateTimeOffset FrozenTime = new(2025, 1, 1, 0, 0, 0, TimeSpan.Zero);
|
||||
|
||||
public override DateTimeOffset GetUtcNow() => FrozenTime;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user