partly or unimplemented features - now implemented

This commit is contained in:
master
2026-02-09 08:53:51 +02:00
parent 1bf6bbf395
commit 4bdc298ec1
674 changed files with 90194 additions and 2271 deletions

View File

@@ -0,0 +1,416 @@
using FluentAssertions;
using StellaOps.Scanner.Reachability;
using Xunit;
namespace StellaOps.Scanner.Reachability.Tests.Benchmarks;
public sealed class ReachabilityTierCorpusTests
{
[Fact]
public void Corpus_ShouldContainExpectedToyServices_WithValidLabels()
{
var corpus = ReachabilityTierCorpus.Load();
corpus.Services.Select(service => service.Service).Should().Equal(
"svc-01-log4shell-java",
"svc-02-prototype-pollution-node",
"svc-03-pickle-deserialization-python",
"svc-04-text-template-go",
"svc-05-xmlserializer-dotnet",
"svc-06-erb-injection-ruby");
corpus.Services.Should().OnlyContain(service => service.Cves.Count > 0);
corpus.Services.Should().OnlyContain(service => service.SchemaVersion == "v1");
foreach (var service in corpus.Services)
{
var serviceDirectory = Path.Combine(corpus.RootPath, service.Service);
Directory.Exists(serviceDirectory).Should().BeTrue($"toy service directory '{service.Service}' should exist");
var entrypointPath = Path.Combine(serviceDirectory, service.Entrypoint);
File.Exists(entrypointPath).Should().BeTrue($"entrypoint '{service.Entrypoint}' should exist for '{service.Service}'");
}
}
[Fact]
public void Corpus_ShouldCover_AllR0ToR4Tiers()
{
var corpus = ReachabilityTierCorpus.Load();
var tiers = corpus.Services
.SelectMany(service => service.Cves)
.Select(cve => cve.Tier)
.Distinct()
.OrderBy(tier => tier)
.ToArray();
tiers.Should().Equal(ReachabilityTier.R0, ReachabilityTier.R1, ReachabilityTier.R2, ReachabilityTier.R3, ReachabilityTier.R4);
}
[Fact]
public void Corpus_ShouldMapTierLabels_ToReachabilityConfidenceTier()
{
ReachabilityTier.R0.ToConfidenceTier().Should().Be(ReachabilityConfidenceTier.Unreachable);
ReachabilityTier.R1.ToConfidenceTier().Should().Be(ReachabilityConfidenceTier.Present);
ReachabilityTier.R2.ToConfidenceTier().Should().Be(ReachabilityConfidenceTier.Present);
ReachabilityTier.R3.ToConfidenceTier().Should().Be(ReachabilityConfidenceTier.Likely);
ReachabilityTier.R4.ToConfidenceTier().Should().Be(ReachabilityConfidenceTier.Confirmed);
}
[Fact]
public void PrecisionRecallHarness_ShouldReportPerfectScores_WhenPredictionsMatchGroundTruth()
{
var corpus = ReachabilityTierCorpus.Load();
var expected = corpus.ToExpectedTierMap();
var predicted = new Dictionary<string, ReachabilityTier>(expected, StringComparer.Ordinal);
var metrics = ReachabilityTierMetricHarness.Compute(expected, predicted);
metrics.Values.Should().OnlyContain(metric =>
metric.TruePositives >= 0 &&
metric.FalsePositives >= 0 &&
metric.FalseNegatives >= 0 &&
metric.Precision == 1.0 &&
metric.Recall == 1.0 &&
metric.F1 == 1.0);
}
[Fact]
public void PrecisionRecallHarness_ShouldComputePerTierMetrics_Deterministically()
{
var corpus = ReachabilityTierCorpus.Load();
var expected = corpus.ToExpectedTierMap();
var predicted = new Dictionary<string, ReachabilityTier>(StringComparer.Ordinal)
{
["CVE-2021-44228"] = ReachabilityTier.R4,
["CVE-2022-24999"] = ReachabilityTier.R1,
["CVE-2011-2526"] = ReachabilityTier.R3,
["CVE-2023-24538"] = ReachabilityTier.R1,
["CVE-2021-26701"] = ReachabilityTier.R0,
["CVE-2021-41819"] = ReachabilityTier.R2
};
var firstRun = ReachabilityTierMetricHarness.Compute(expected, predicted);
var secondRun = ReachabilityTierMetricHarness.Compute(expected, predicted);
secondRun.Should().Equal(firstRun);
firstRun[ReachabilityTier.R4].Precision.Should().Be(1.0);
firstRun[ReachabilityTier.R4].Recall.Should().Be(0.5);
firstRun[ReachabilityTier.R4].F1.Should().BeApproximately(0.6667, 0.0001);
firstRun[ReachabilityTier.R2].Precision.Should().Be(0.0);
firstRun[ReachabilityTier.R2].Recall.Should().Be(0.0);
firstRun[ReachabilityTier.R2].F1.Should().Be(0.0);
firstRun[ReachabilityTier.R1].Precision.Should().Be(0.5);
firstRun[ReachabilityTier.R1].Recall.Should().Be(1.0);
firstRun[ReachabilityTier.R1].F1.Should().BeApproximately(0.6667, 0.0001);
}
}
internal sealed record ReachabilityTierCorpus(string RootPath, IReadOnlyList<ToyServiceLabel> Services)
{
public static ReachabilityTierCorpus Load()
{
var root = ResolveCorpusRoot();
var serviceDirectories = Directory
.EnumerateDirectories(root, "svc-*", SearchOption.TopDirectoryOnly)
.OrderBy(path => path, StringComparer.Ordinal)
.ToArray();
var services = serviceDirectories
.Select(directory => ToyServiceLabelParser.Parse(Path.Combine(directory, "labels.yaml")))
.OrderBy(service => service.Service, StringComparer.Ordinal)
.ToArray();
return new ReachabilityTierCorpus(root, services);
}
public IReadOnlyDictionary<string, ReachabilityTier> ToExpectedTierMap()
{
var map = new SortedDictionary<string, ReachabilityTier>(StringComparer.Ordinal);
foreach (var cve in Services.SelectMany(service => service.Cves))
{
map[cve.Id] = cve.Tier;
}
return map;
}
private static string ResolveCorpusRoot()
{
var outputDatasetPath = Path.Combine(AppContext.BaseDirectory, "Datasets", "toys");
if (Directory.Exists(outputDatasetPath))
{
return outputDatasetPath;
}
var current = new DirectoryInfo(AppContext.BaseDirectory);
while (current is not null)
{
var repoDatasetPath = Path.Combine(current.FullName, "src", "Scanner", "__Tests", "__Datasets", "toys");
if (Directory.Exists(repoDatasetPath))
{
return repoDatasetPath;
}
current = current.Parent;
}
throw new DirectoryNotFoundException("Could not locate the toy reachability dataset directory.");
}
}
internal enum ReachabilityTier
{
R0 = 0,
R1 = 1,
R2 = 2,
R3 = 3,
R4 = 4
}
internal static class ReachabilityTierExtensions
{
public static ReachabilityConfidenceTier ToConfidenceTier(this ReachabilityTier tier) =>
tier switch
{
ReachabilityTier.R0 => ReachabilityConfidenceTier.Unreachable,
ReachabilityTier.R1 => ReachabilityConfidenceTier.Present,
ReachabilityTier.R2 => ReachabilityConfidenceTier.Present,
ReachabilityTier.R3 => ReachabilityConfidenceTier.Likely,
ReachabilityTier.R4 => ReachabilityConfidenceTier.Confirmed,
_ => ReachabilityConfidenceTier.Unknown
};
}
internal sealed record ToyServiceLabel(
string SchemaVersion,
string Service,
string Language,
string Entrypoint,
IReadOnlyList<ToyCveLabel> Cves);
internal sealed record ToyCveLabel(
string Id,
string Package,
ReachabilityTier Tier,
string Rationale);
internal static class ToyServiceLabelParser
{
public static ToyServiceLabel Parse(string labelsPath)
{
if (!File.Exists(labelsPath))
{
throw new FileNotFoundException("labels.yaml is required for every toy service.", labelsPath);
}
string? schemaVersion = null;
string? service = null;
string? language = null;
string? entrypoint = null;
var cves = new List<ToyCveLabel>();
CveBuilder? current = null;
foreach (var rawLine in File.ReadLines(labelsPath))
{
var line = rawLine.Trim();
if (line.Length == 0 || line.StartsWith('#'))
{
continue;
}
if (line.StartsWith("- id:", StringComparison.Ordinal))
{
if (current is not null)
{
cves.Add(current.Build(labelsPath));
}
current = new CveBuilder { Id = ValueAfterColon(line) };
continue;
}
if (line.StartsWith("schema_version:", StringComparison.Ordinal))
{
schemaVersion = ValueAfterColon(line);
continue;
}
if (line.StartsWith("service:", StringComparison.Ordinal))
{
service = ValueAfterColon(line);
continue;
}
if (line.StartsWith("language:", StringComparison.Ordinal))
{
language = ValueAfterColon(line);
continue;
}
if (line.StartsWith("entrypoint:", StringComparison.Ordinal))
{
entrypoint = ValueAfterColon(line);
continue;
}
if (current is null)
{
continue;
}
if (line.StartsWith("package:", StringComparison.Ordinal))
{
current.Package = ValueAfterColon(line);
continue;
}
if (line.StartsWith("tier:", StringComparison.Ordinal))
{
current.Tier = ParseTier(ValueAfterColon(line), labelsPath);
continue;
}
if (line.StartsWith("rationale:", StringComparison.Ordinal))
{
current.Rationale = ValueAfterColon(line);
}
}
if (current is not null)
{
cves.Add(current.Build(labelsPath));
}
if (string.IsNullOrWhiteSpace(schemaVersion) ||
string.IsNullOrWhiteSpace(service) ||
string.IsNullOrWhiteSpace(language) ||
string.IsNullOrWhiteSpace(entrypoint))
{
throw new InvalidDataException($"labels.yaml is missing required top-level fields: {labelsPath}");
}
if (cves.Count == 0)
{
throw new InvalidDataException($"labels.yaml must include at least one CVE label: {labelsPath}");
}
return new ToyServiceLabel(schemaVersion, service, language, entrypoint, cves);
}
private static ReachabilityTier ParseTier(string value, string labelsPath) =>
value switch
{
"R0" => ReachabilityTier.R0,
"R1" => ReachabilityTier.R1,
"R2" => ReachabilityTier.R2,
"R3" => ReachabilityTier.R3,
"R4" => ReachabilityTier.R4,
_ => throw new InvalidDataException($"Unsupported tier '{value}' in {labelsPath}.")
};
private static string ValueAfterColon(string line)
{
var separator = line.IndexOf(':', StringComparison.Ordinal);
if (separator < 0 || separator == line.Length - 1)
{
return string.Empty;
}
return line[(separator + 1)..].Trim();
}
private sealed class CveBuilder
{
public string? Id { get; init; }
public string? Package { get; set; }
public ReachabilityTier? Tier { get; set; }
public string? Rationale { get; set; }
public ToyCveLabel Build(string labelsPath)
{
if (string.IsNullOrWhiteSpace(Id) ||
string.IsNullOrWhiteSpace(Package) ||
!Tier.HasValue ||
string.IsNullOrWhiteSpace(Rationale))
{
throw new InvalidDataException($"CVE label entry is missing required fields in {labelsPath}.");
}
return new ToyCveLabel(Id, Package, Tier.Value, Rationale);
}
}
}
internal static class ReachabilityTierMetricHarness
{
public static IReadOnlyDictionary<ReachabilityTier, TierMetrics> Compute(
IReadOnlyDictionary<string, ReachabilityTier> expected,
IReadOnlyDictionary<string, ReachabilityTier> predicted)
{
var cveIds = expected.Keys
.Concat(predicted.Keys)
.Distinct(StringComparer.Ordinal)
.OrderBy(id => id, StringComparer.Ordinal)
.ToArray();
var results = new SortedDictionary<ReachabilityTier, TierMetrics>();
foreach (ReachabilityTier tier in Enum.GetValues<ReachabilityTier>())
{
var truePositives = 0;
var falsePositives = 0;
var falseNegatives = 0;
foreach (var cveId in cveIds)
{
var expectedTier = expected.TryGetValue(cveId, out var expectedValue) ? expectedValue : (ReachabilityTier?)null;
var predictedTier = predicted.TryGetValue(cveId, out var predictedValue) ? predictedValue : (ReachabilityTier?)null;
if (expectedTier == tier && predictedTier == tier)
{
truePositives++;
}
else if (expectedTier != tier && predictedTier == tier)
{
falsePositives++;
}
else if (expectedTier == tier && predictedTier != tier)
{
falseNegatives++;
}
}
var precision = truePositives + falsePositives == 0
? 1.0
: (double)truePositives / (truePositives + falsePositives);
var recall = truePositives + falseNegatives == 0
? 1.0
: (double)truePositives / (truePositives + falseNegatives);
var f1 = precision + recall == 0
? 0.0
: 2 * precision * recall / (precision + recall);
results[tier] = new TierMetrics(
truePositives,
falsePositives,
falseNegatives,
Math.Round(precision, 4),
Math.Round(recall, 4),
Math.Round(f1, 4));
}
return results;
}
}
internal sealed record TierMetrics(
int TruePositives,
int FalsePositives,
int FalseNegatives,
double Precision,
double Recall,
double F1);

View File

@@ -24,4 +24,10 @@
<ProjectReference Include="../../../BinaryIndex/__Libraries/StellaOps.BinaryIndex.Decompiler/StellaOps.BinaryIndex.Decompiler.csproj" />
<ProjectReference Include="../../../BinaryIndex/__Libraries/StellaOps.BinaryIndex.Ghidra/StellaOps.BinaryIndex.Ghidra.csproj" />
</ItemGroup>
<ItemGroup>
<None Include="..\__Datasets\toys\**\*"
Link="Datasets\toys\%(RecursiveDir)%(Filename)%(Extension)"
CopyToOutputDirectory="PreserveNewest" />
</ItemGroup>
</Project>

View File

@@ -6,3 +6,4 @@ Source of truth: `docs/implplan/SPRINT_20260130_002_Tools_csproj_remediation_sol
| --- | --- | --- |
| REMED-05 | TODO | Remediation checklist: docs/implplan/audits/csproj-standards/remediation/checklists/src/Scanner/__Tests/StellaOps.Scanner.Reachability.Tests/StellaOps.Scanner.Reachability.Tests.md. |
| REMED-06 | DONE | SOLID review notes captured for SPRINT_20260130_002. |
| SPRINT-20260208-059-REACHCORPUS-001 | DONE | Built deterministic toy-service reachability corpus (`labels.yaml`) and per-tier precision/recall harness for sprint 059 (2026-02-08). |