devops folders consolidate

This commit is contained in:
master
2026-01-25 23:27:41 +02:00
parent 6e687b523a
commit a743bb9a1d
613 changed files with 8611 additions and 41846 deletions

View File

@@ -0,0 +1,379 @@
// -----------------------------------------------------------------------------
// PackageIdfServiceTests.cs
// Sprint: SPRINT_20260125_001_Concelier_linkset_correlation_v2
// Task: CORR-V2-007
// Description: Unit tests for package IDF keys, options, and conceptual IDF computations
// -----------------------------------------------------------------------------
using FluentAssertions;
using Xunit;
using StellaOps.TestKit;
namespace StellaOps.Concelier.Cache.Valkey.Tests;
/// <summary>
/// Unit tests for package IDF caching key generation, options, and IDF formulas.
/// Note: Service-level tests requiring Valkey are in the Integration folder.
/// </summary>
public class PackageIdfKeyTests
{
#region IDF Key Generation Tests
[Trait("Category", TestCategories.Unit)]
[Fact]
public void IdfPackage_GeneratesCorrectKey()
{
// Arrange
var packageName = "pkg:npm/lodash@4.17.21";
// Act
var key = AdvisoryCacheKeys.IdfPackage(packageName);
// Assert
key.Should().Be("concelier:idf:pkg:pkg:npm/lodash@4.17.21");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void IdfPackage_NormalizesToLowercase()
{
// Arrange
var packageName = "pkg:NPM/Lodash@4.17.21";
// Act
var key = AdvisoryCacheKeys.IdfPackage(packageName);
// Assert
key.Should().Be("concelier:idf:pkg:pkg:npm/lodash@4.17.21");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void IdfPackage_WithCustomPrefix_GeneratesCorrectKey()
{
// Arrange
var packageName = "pkg:npm/express@4.18.2";
var prefix = "prod:";
// Act
var key = AdvisoryCacheKeys.IdfPackage(packageName, prefix);
// Assert
key.Should().Be("prod:idf:pkg:pkg:npm/express@4.18.2");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void IdfCorpusSize_GeneratesCorrectKey()
{
// Act
var key = AdvisoryCacheKeys.IdfCorpusSize();
// Assert
key.Should().Be("concelier:idf:stats:corpus_size");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void IdfLastRefresh_GeneratesCorrectKey()
{
// Act
var key = AdvisoryCacheKeys.IdfLastRefresh();
// Assert
key.Should().Be("concelier:idf:stats:last_refresh");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void IdfRefreshLock_GeneratesCorrectKey()
{
// Act
var key = AdvisoryCacheKeys.IdfRefreshLock();
// Assert
key.Should().Be("concelier:idf:lock:refresh");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void IdfDocumentFrequency_GeneratesCorrectKey()
{
// Arrange
var packageName = "pkg:cargo/serde@1.0.0";
// Act
var key = AdvisoryCacheKeys.IdfDocumentFrequency(packageName);
// Assert
key.Should().Be("concelier:idf:df:pkg:cargo/serde@1.0.0");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void IdfPackagePattern_GeneratesCorrectPattern()
{
// Act
var pattern = AdvisoryCacheKeys.IdfPackagePattern();
// Assert
pattern.Should().Be("concelier:idf:pkg:*");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void IdfStatsHits_GeneratesCorrectKey()
{
// Act
var key = AdvisoryCacheKeys.IdfStatsHits();
// Assert
key.Should().Be("concelier:idf:stats:hits");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void IdfStatsMisses_GeneratesCorrectKey()
{
// Act
var key = AdvisoryCacheKeys.IdfStatsMisses();
// Assert
key.Should().Be("concelier:idf:stats:misses");
}
#endregion
}
/// <summary>
/// Tests for PackageIdfOptions defaults and configuration.
/// </summary>
public class PackageIdfOptionsTests
{
[Trait("Category", TestCategories.Unit)]
[Fact]
public void PackageIdfOptions_DefaultValues_AreCorrect()
{
// Arrange & Act
var options = new PackageIdfOptions();
// Assert
options.Enabled.Should().BeTrue();
options.IdfTtl.Should().Be(TimeSpan.FromHours(1));
options.CorpusStatsTtl.Should().Be(TimeSpan.FromHours(4));
options.MinIdfThreshold.Should().Be(0.01);
options.DefaultIdfWeight.Should().Be(1.0);
options.MaxCacheEntries.Should().Be(100_000);
options.NormalizeScores.Should().BeTrue();
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void PackageIdfOptions_SectionName_IsCorrect()
{
// Assert
PackageIdfOptions.SectionName.Should().Be("Concelier:PackageIdf");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void PackageIdfOptions_CanBeCustomized()
{
// Arrange & Act
var options = new PackageIdfOptions
{
Enabled = false,
IdfTtl = TimeSpan.FromMinutes(30),
CorpusStatsTtl = TimeSpan.FromHours(2),
MinIdfThreshold = 0.05,
DefaultIdfWeight = 0.5,
MaxCacheEntries = 50_000,
NormalizeScores = false
};
// Assert
options.Enabled.Should().BeFalse();
options.IdfTtl.Should().Be(TimeSpan.FromMinutes(30));
options.CorpusStatsTtl.Should().Be(TimeSpan.FromHours(2));
options.MinIdfThreshold.Should().Be(0.05);
options.DefaultIdfWeight.Should().Be(0.5);
options.MaxCacheEntries.Should().Be(50_000);
options.NormalizeScores.Should().BeFalse();
}
}
/// <summary>
/// Tests for IDF formula computation (conceptual validation).
/// </summary>
public class IdfFormulaTests
{
[Trait("Category", TestCategories.Unit)]
[Theory]
[InlineData(10000, 1, 9.21)] // Rare package: log(10000/2) ≈ 8.52
[InlineData(10000, 5000, 0.69)] // Common package: log(10000/5001) ≈ 0.69
[InlineData(10000, 10000, 0.0)] // Ubiquitous: log(10000/10001) ≈ 0
public void IdfFormula_ComputesCorrectly(long corpusSize, long docFrequency, double expectedRawIdf)
{
// This test validates the IDF formula used in UpdateCorpusStatsAsync
// IDF = log(N / (1 + df))
// Act
var rawIdf = Math.Log((double)corpusSize / (1 + docFrequency));
// Assert
rawIdf.Should().BeApproximately(expectedRawIdf, 0.1);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void IdfFormula_RarePackageHasHighWeight()
{
// Arrange
const long corpusSize = 100_000;
const long rareDocFrequency = 5;
const long commonDocFrequency = 50_000;
// Act
var rareIdf = Math.Log((double)corpusSize / (1 + rareDocFrequency));
var commonIdf = Math.Log((double)corpusSize / (1 + commonDocFrequency));
// Assert - rare package should have much higher IDF
rareIdf.Should().BeGreaterThan(commonIdf * 5);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void IdfNormalization_ScalesToUnitInterval()
{
// Arrange - simulate corpus with various document frequencies
var corpusSize = 100_000L;
var documentFrequencies = new Dictionary<string, long>
{
["pkg:npm/lodash"] = 80_000, // Very common
["pkg:npm/express"] = 40_000, // Common
["pkg:cargo/serde"] = 10_000, // Moderate
["pkg:npm/obscure"] = 100, // Rare
["pkg:cargo/unique"] = 1 // Very rare
};
// Act - compute raw IDFs
var rawIdfs = documentFrequencies.ToDictionary(
kv => kv.Key,
kv => Math.Log((double)corpusSize / (1 + kv.Value)));
var maxIdf = rawIdfs.Values.Max();
// Normalize to 0-1
var normalizedIdfs = rawIdfs.ToDictionary(
kv => kv.Key,
kv => kv.Value / maxIdf);
// Assert - all values should be in [0, 1]
foreach (var (pkg, idf) in normalizedIdfs)
{
idf.Should().BeGreaterThanOrEqualTo(0.0, because: $"{pkg} should have non-negative IDF");
idf.Should().BeLessThanOrEqualTo(1.0, because: $"{pkg} should have IDF ≤ 1.0");
}
// The rarest package should have IDF close to 1.0
normalizedIdfs["pkg:cargo/unique"].Should().BeApproximately(1.0, 0.01);
// The most common package should have low IDF
normalizedIdfs["pkg:npm/lodash"].Should().BeLessThan(0.3);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void IdfWeight_DiscriminatesBetweenPackages()
{
// This test validates that IDF provides meaningful discrimination
// for linkset correlation
// Arrange
var corpusSize = 50_000L;
// Package that appears in many advisories (low discrimination)
var commonPkgDf = 25_000L;
// Package that appears in few advisories (high discrimination)
var rarePkgDf = 50L;
// Act
var commonIdf = Math.Log((double)corpusSize / (1 + commonPkgDf));
var rareIdf = Math.Log((double)corpusSize / (1 + rarePkgDf));
// Normalize
var maxIdf = Math.Max(commonIdf, rareIdf);
var commonNorm = commonIdf / maxIdf;
var rareNorm = rareIdf / maxIdf;
// Assert
// When two advisories share a rare package, it should be a stronger
// correlation signal than when they share a common package
rareNorm.Should().BeGreaterThan(commonNorm * 3,
because: "sharing a rare package should be 3x more discriminative than sharing a common package");
}
}
/// <summary>
/// Tests for PackageIdfMetrics instrumentation.
/// </summary>
public class PackageIdfMetricsTests
{
[Trait("Category", TestCategories.Unit)]
[Fact]
public void PackageIdfMetrics_ActivitySourceName_IsCorrect()
{
// Assert
PackageIdfMetrics.ActivitySourceName.Should().Be("StellaOps.Concelier.PackageIdf");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void PackageIdfMetrics_MeterName_IsCorrect()
{
// Assert
PackageIdfMetrics.MeterName.Should().Be("StellaOps.Concelier.PackageIdf");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void PackageIdfMetrics_CanBeCreatedAndDisposed()
{
// Arrange & Act
using var metrics = new PackageIdfMetrics();
// Assert - no exception thrown
metrics.Should().NotBeNull();
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void PackageIdfMetrics_RecordsOperations_WithoutException()
{
// Arrange
using var metrics = new PackageIdfMetrics();
// Act & Assert - none of these should throw
metrics.RecordHit();
metrics.RecordHits(5);
metrics.RecordMiss();
metrics.RecordMisses(3);
metrics.RecordRefresh(100);
metrics.RecordLatency(15.5, "get");
metrics.RecordIdfWeight(0.75);
metrics.UpdateCorpusSize(50_000);
metrics.UpdateCachedEntries(10_000);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void PackageIdfMetrics_StartActivity_ReturnsNullWhenNoListeners()
{
// Act
var activity = PackageIdfMetrics.StartActivity("test-operation");
// Assert - no listeners registered, so activity should be null
// (This is expected behavior for OpenTelemetry when no exporters are configured)
// Just verify it doesn't throw
}
}

View File

@@ -0,0 +1,636 @@
// -----------------------------------------------------------------------------
// LinksetCorrelationV2Tests.cs
// Sprint: SPRINT_20260125_001_Concelier_linkset_correlation_v2
// Task: CORR-V2-001 through CORR-V2-008
// Description: Comprehensive tests for V2 correlation algorithm
// -----------------------------------------------------------------------------
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using FluentAssertions;
using StellaOps.Concelier.Core.Linksets;
using Xunit;
namespace StellaOps.Concelier.Core.Tests.Linksets;
/// <summary>
/// Tests for the V2 linkset correlation algorithm.
/// Validates graph-based alias connectivity, pairwise package coverage,
/// version compatibility, patch lineage, and typed conflict severities.
/// </summary>
public sealed class LinksetCorrelationV2Tests
{
#region CORR-V2-001: Alias Connectivity (Graph-based)
[Fact]
public void AliasConnectivity_TransitiveBridging_CorrectlyLinksThreeSources()
{
// Arrange: A has CVE-X, B has CVE-X + GHSA-Y, C has GHSA-Y
// V1 would produce score=0 (empty intersection)
// V2 should produce high score via transitive bridging
var inputs = new[]
{
CreateInput("obs-a", "nvd", aliases: new[] { "CVE-2025-1234" }),
CreateInput("obs-b", "ghsa", aliases: new[] { "CVE-2025-1234", "GHSA-aaaa-bbbb-cccc" }),
CreateInput("obs-c", "osv", aliases: new[] { "GHSA-aaaa-bbbb-cccc" })
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert
// With only alias signals: 0.30*1.0 + 0.10*1.0 + neutrals = 0.50
result.Confidence.Should().BeGreaterThanOrEqualTo(0.5, "transitive bridging should yield positive confidence");
result.SignalScores["aliasConnectivity"].Should().Be(1.0, "all observations connected via alias graph");
result.Conflicts.Should().NotContain(c => c.Reason == "alias-inconsistency",
"no inconsistency when transitively connected");
}
[Fact]
public void AliasConnectivity_DisjointAliases_ProducesLowScoreAndConflict()
{
// Arrange: Two sources with completely disjoint aliases (no bridging)
var inputs = new[]
{
CreateInput("obs-a", "nvd", aliases: new[] { "CVE-2025-1111" }),
CreateInput("obs-b", "vendor", aliases: new[] { "VENDOR-ADV-999" })
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert
result.SignalScores["aliasConnectivity"].Should().Be(0.5, "50% in LCC (each disconnected)");
result.Conflicts.Should().Contain(c => c.Reason == "alias-inconsistency");
}
[Fact]
public void AliasConnectivity_DistinctCVEs_ProducesHardConflict()
{
// Arrange: Two different CVE identifiers in the cluster = hard conflict
var inputs = new[]
{
CreateInput("obs-a", "nvd", aliases: new[] { "CVE-2025-1111" }),
CreateInput("obs-b", "ghsa", aliases: new[] { "CVE-2025-2222" })
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert
result.Conflicts.Should().Contain(c =>
c.Reason == "distinct-cves" && c.Severity == ConflictSeverity.Hard);
result.Confidence.Should().BeLessThan(0.5, "hard conflict should significantly reduce confidence");
}
[Fact]
public void AliasConnectivity_SingleObservation_ReturnsFullScoreWithAliases()
{
// Arrange
var inputs = new[] { CreateInput("obs-a", "nvd", aliases: new[] { "CVE-2025-1234" }) };
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert
result.SignalScores["aliasConnectivity"].Should().Be(1.0);
result.Conflicts.Should().BeEmpty();
}
[Fact]
public void AliasConnectivity_NoAliases_ReturnsZeroScore()
{
// Arrange
var inputs = new[]
{
CreateInput("obs-a", "nvd", aliases: Array.Empty<string>()),
CreateInput("obs-b", "vendor", aliases: Array.Empty<string>())
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert
result.SignalScores["aliasConnectivity"].Should().Be(0.0);
}
#endregion
#region CORR-V2-002: Package Coverage (Pairwise + IDF)
[Fact]
public void PackageCoverage_ThinSource_DoesNotCollapseScore()
{
// Arrange: Source A and B share package, Source C has no packages
// V1 intersection-across-all would produce 0
// V2 pairwise should still produce positive score
var inputs = new[]
{
CreateInput("obs-a", "nvd", purls: new[] { "pkg:npm/lodash@4.17.21" }),
CreateInput("obs-b", "ghsa", purls: new[] { "pkg:npm/lodash@4.17.20" }),
CreateInput("obs-c", "vendor", purls: Array.Empty<string>())
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert
result.SignalScores["packageCoverage"].Should().BeGreaterThan(0,
"thin source should not collapse pairwise coverage");
}
[Fact]
public void PackageCoverage_ExactPurlMatch_BoostsScore()
{
// Arrange: Same exact PURL (with version)
var inputs = new[]
{
CreateInput("obs-a", "nvd", purls: new[] { "pkg:npm/lodash@4.17.21" }),
CreateInput("obs-b", "ghsa", purls: new[] { "pkg:npm/lodash@4.17.21" })
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert
result.SignalScores["packageCoverage"].Should().BeGreaterThanOrEqualTo(0.8,
"exact PURL match should boost score");
}
[Fact]
public void PackageCoverage_NoOverlap_ReturnsZero()
{
// Arrange: Completely different packages
var inputs = new[]
{
CreateInput("obs-a", "nvd", purls: new[] { "pkg:npm/lodash@4.17.21" }),
CreateInput("obs-b", "ghsa", purls: new[] { "pkg:pypi/requests@2.28.0" })
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert
result.SignalScores["packageCoverage"].Should().Be(0);
}
[Fact]
public void PackageCoverage_WithIdfProvider_WeightsRarePackagesHigher()
{
// Arrange: Custom IDF provider
var inputs = new[]
{
CreateInput("obs-a", "nvd", purls: new[] { "pkg:cargo/obscure-lib@1.0.0" }),
CreateInput("obs-b", "ghsa", purls: new[] { "pkg:cargo/obscure-lib@1.0.0" })
};
// IDF provider: rare package gets high weight
double IdfProvider(string pkg) => pkg.Contains("obscure") ? 5.0 : 1.0;
// Act
var result = LinksetCorrelationV2.Compute(inputs, packageIdfProvider: IdfProvider);
// Assert
result.SignalScores["packageCoverage"].Should().BeGreaterThan(0.5);
}
#endregion
#region CORR-V2-003: Reference Score (Positive-Only)
[Fact]
public void ReferenceScore_ZeroOverlap_ReturnsNeutral_NoConflict()
{
// Arrange: Different references from different sources
// V1 would emit reference-clash
// V2 should return neutral (0.5) with no conflict
var inputs = new[]
{
CreateInput("obs-a", "nvd", references: new[] { "https://nvd.nist.gov/vuln/detail/CVE-2025-1234" }),
CreateInput("obs-b", "ghsa", references: new[] { "https://github.com/advisories/GHSA-xxxx" })
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert
result.SignalScores["referenceOverlap"].Should().Be(0.5, "zero overlap = neutral, not negative");
result.Conflicts.Should().NotContain(c => c.Reason == "reference-clash",
"no conflict for simple disjoint references");
}
[Fact]
public void ReferenceScore_PartialOverlap_ProducesPositiveScore()
{
// Arrange: Some shared references
var inputs = new[]
{
CreateInput("obs-a", "nvd", references: new[]
{
"https://example.com/advisory",
"https://nvd.nist.gov/vuln/detail/CVE-2025-1234"
}),
CreateInput("obs-b", "ghsa", references: new[]
{
"https://example.com/advisory",
"https://github.com/advisories/GHSA-xxxx"
})
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert
result.SignalScores["referenceOverlap"].Should().BeGreaterThan(0.5);
}
[Fact]
public void ReferenceScore_NormalizesUrls()
{
// Arrange: Same URL with different casing/protocol
var inputs = new[]
{
CreateInput("obs-a", "nvd", references: new[] { "http://Example.COM/advisory?utm_source=test" }),
CreateInput("obs-b", "ghsa", references: new[] { "https://example.com/advisory" })
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert: Should match after normalization
result.SignalScores["referenceOverlap"].Should().BeGreaterThan(0.5);
}
#endregion
#region CORR-V2-004: Typed Conflict Severities
[Fact]
public void ConflictPenalty_HardConflict_AppliesLargePenalty()
{
// Arrange: Distinct CVEs = hard conflict
var inputs = new[]
{
CreateInput("obs-a", "nvd", aliases: new[] { "CVE-2025-1111" }),
CreateInput("obs-b", "ghsa", aliases: new[] { "CVE-2025-2222" })
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert
var hardConflict = result.Conflicts.FirstOrDefault(c => c.Severity == ConflictSeverity.Hard);
hardConflict.Should().NotBeNull();
result.Confidence.Should().BeLessThan(0.5);
}
[Fact]
public void ConflictPenalty_SoftConflict_AppliesSmallPenalty()
{
// Arrange: Same CVE but overlapping version ranges (share at least one version)
var inputs = new[]
{
CreateInput("obs-a", "nvd",
aliases: new[] { "CVE-2025-1234" },
purls: new[] { "pkg:npm/lodash@4.17.20", "pkg:npm/lodash@4.17.21" }),
CreateInput("obs-b", "ghsa",
aliases: new[] { "CVE-2025-1234" },
purls: new[] { "pkg:npm/lodash@4.17.20", "pkg:npm/lodash@4.17.19" })
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert: Should have soft divergence conflict (overlapping but not equivalent)
var softConflict = result.Conflicts.FirstOrDefault(c =>
c.Severity == ConflictSeverity.Soft && c.Reason == "affected-range-divergence");
softConflict.Should().NotBeNull("overlapping but non-equivalent ranges should produce soft conflict");
result.Confidence.Should().BeGreaterThan(0.5, "soft conflicts should not severely impact confidence");
}
[Fact]
public void ConflictPenalty_Saturates_AtMaximum()
{
// Arrange: Multiple hard conflicts
var inputs = new[]
{
CreateInput("obs-a", "nvd",
aliases: new[] { "CVE-2025-1111" },
purls: new[] { "pkg:npm/lodash@1.0.0" }),
CreateInput("obs-b", "ghsa",
aliases: new[] { "CVE-2025-2222" },
purls: new[] { "pkg:npm/lodash@9.0.0" })
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert: Confidence should not go below 0.1 minimum
result.Confidence.Should().BeGreaterThanOrEqualTo(0.1);
}
#endregion
#region CORR-V2-005: Patch Lineage
[Fact]
public void PatchLineage_ExactCommitShaMatch_ProducesHighScore()
{
// Arrange: Same commit SHA in patch references
var inputs = new[]
{
CreateInput("obs-a", "nvd",
aliases: new[] { "CVE-2025-1234" },
patchReferences: new[] { "https://github.com/org/repo/commit/abc123def456789012345678901234567890abcd" }),
CreateInput("obs-b", "ghsa",
aliases: new[] { "CVE-2025-1234" },
patchReferences: new[] { "https://github.com/org/repo/commit/abc123def456789012345678901234567890abcd" })
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert
result.SignalScores["patchLineage"].Should().Be(1.0, "exact commit SHA match is very strong signal");
}
[Fact]
public void PatchLineage_DifferentCommits_ProducesZeroScore()
{
// Arrange: Different commit SHAs
var inputs = new[]
{
CreateInput("obs-a", "nvd",
patchReferences: new[] { "https://github.com/org/repo/commit/1111111111111111111111111111111111111111" }),
CreateInput("obs-b", "ghsa",
patchReferences: new[] { "https://github.com/org/repo/commit/2222222222222222222222222222222222222222" })
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert
result.SignalScores["patchLineage"].Should().Be(0);
}
[Fact]
public void PatchLineage_NoPatchData_ReturnsZero()
{
// Arrange: No patch references
var inputs = new[]
{
CreateInput("obs-a", "nvd", aliases: new[] { "CVE-2025-1234" }),
CreateInput("obs-b", "ghsa", aliases: new[] { "CVE-2025-1234" })
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert
result.SignalScores["patchLineage"].Should().Be(0);
}
#endregion
#region CORR-V2-006: Version Compatibility
[Fact]
public void VersionCompatibility_EquivalentRanges_ProducesHighScore()
{
// Arrange: Same versions for same package
var inputs = new[]
{
CreateInput("obs-a", "nvd", purls: new[] { "pkg:npm/lodash@4.17.21" }),
CreateInput("obs-b", "ghsa", purls: new[] { "pkg:npm/lodash@4.17.21" })
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert
result.SignalScores["versionCompatibility"].Should().BeGreaterThanOrEqualTo(0.8);
result.Conflicts.Should().NotContain(c =>
c.Reason == "affected-range-divergence" || c.Reason == "disjoint-version-ranges");
}
[Fact]
public void VersionCompatibility_OverlappingRanges_ProducesMediumScoreWithSoftConflict()
{
// Arrange: Overlapping but not identical versions
var inputs = new[]
{
CreateInput("obs-a", "nvd", purls: new[] { "pkg:npm/lodash@4.17.21", "pkg:npm/lodash@4.17.20" }),
CreateInput("obs-b", "ghsa", purls: new[] { "pkg:npm/lodash@4.17.20", "pkg:npm/lodash@4.17.19" })
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert
result.SignalScores["versionCompatibility"].Should().BeInRange(0.4, 0.8);
result.Conflicts.Should().Contain(c =>
c.Reason == "affected-range-divergence" && c.Severity == ConflictSeverity.Soft);
}
[Fact]
public void VersionCompatibility_DisjointRanges_ProducesLowScoreWithHardConflict()
{
// Arrange: Completely different versions for same package
var inputs = new[]
{
CreateInput("obs-a", "nvd", purls: new[] { "pkg:npm/lodash@1.0.0" }),
CreateInput("obs-b", "ghsa", purls: new[] { "pkg:npm/lodash@9.0.0" })
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert
result.Conflicts.Should().Contain(c =>
c.Reason == "disjoint-version-ranges" && c.Severity == ConflictSeverity.Hard);
}
#endregion
#region CORR-V2-008: Integrated Scoring
[Fact]
public void IntegratedScoring_HighConfidenceScenario()
{
// Arrange: Strong signals across all dimensions
var inputs = new[]
{
CreateInput("obs-a", "nvd",
aliases: new[] { "CVE-2025-1234" },
purls: new[] { "pkg:npm/vulnerable-lib@2.0.0" },
cpes: new[] { "cpe:2.3:a:vendor:vulnerable-lib:2.0.0:*:*:*:*:*:*:*" },
references: new[] { "https://example.com/advisory" },
patchReferences: new[] { "https://github.com/org/repo/commit/abc123def456789012345678901234567890abcd" },
fetchedAt: DateTimeOffset.Parse("2025-01-25T10:00:00Z", CultureInfo.InvariantCulture)),
CreateInput("obs-b", "ghsa",
aliases: new[] { "CVE-2025-1234", "GHSA-xxxx-yyyy-zzzz" },
purls: new[] { "pkg:npm/vulnerable-lib@2.0.0" },
cpes: new[] { "cpe:2.3:a:vendor:vulnerable-lib:2.0.0:*:*:*:*:*:*:*" },
references: new[] { "https://example.com/advisory", "https://github.com/advisories/GHSA-xxxx" },
patchReferences: new[] { "https://github.com/org/repo/commit/abc123def456789012345678901234567890abcd" },
fetchedAt: DateTimeOffset.Parse("2025-01-25T11:00:00Z", CultureInfo.InvariantCulture))
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert
result.Confidence.Should().BeGreaterThanOrEqualTo(0.85, "all signals strong = high confidence");
result.Conflicts.Should().BeEmpty();
// Verify individual signals
result.SignalScores["aliasConnectivity"].Should().Be(1.0);
result.SignalScores["aliasAuthority"].Should().Be(1.0); // CVE present
result.SignalScores["packageCoverage"].Should().BeGreaterThanOrEqualTo(0.8);
result.SignalScores["patchLineage"].Should().Be(1.0);
result.SignalScores["freshness"].Should().Be(1.0); // Within 48h
}
[Fact]
public void IntegratedScoring_MixedSignalsScenario()
{
// Arrange: Some strong signals, some weak
// Note: Disconnected aliases will produce alias-inconsistency conflict
var inputs = new[]
{
CreateInput("obs-a", "nvd",
aliases: new[] { "CVE-2025-1234" },
purls: new[] { "pkg:npm/lodash@4.17.21" },
fetchedAt: DateTimeOffset.Parse("2025-01-10T00:00:00Z", CultureInfo.InvariantCulture)),
CreateInput("obs-b", "vendor",
aliases: new[] { "VENDOR-2025-001" }, // No CVE, only vendor ID
purls: new[] { "pkg:npm/lodash@4.17.20" }, // Different version
fetchedAt: DateTimeOffset.Parse("2025-01-25T00:00:00Z", CultureInfo.InvariantCulture)) // 15 days apart
};
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert
// Disconnected aliases + version divergence = conflicts reducing confidence
// Minimum confidence is 0.1 when there are conflicts but some evidence
result.Confidence.Should().BeInRange(0.1, 0.4, "mixed signals with conflicts = low-moderate confidence");
result.SignalScores["aliasConnectivity"].Should().BeLessThan(1.0); // Disconnected
result.SignalScores["freshness"].Should().BeLessThan(0.5); // 15 days spread
}
[Fact]
public void IntegratedScoring_EmptyInputs_ReturnsFullConfidence()
{
// Arrange
var inputs = Array.Empty<LinksetCorrelationV2.InputV2>();
// Act
var result = LinksetCorrelationV2.Compute(inputs);
// Assert
result.Confidence.Should().Be(1.0);
result.Conflicts.Should().BeEmpty();
}
#endregion
#region Determinism Tests
[Fact]
public void Determinism_SameInputs_ProduceSameOutput()
{
// Arrange
var inputs = new[]
{
CreateInput("obs-a", "nvd", aliases: new[] { "CVE-2025-1234" }),
CreateInput("obs-b", "ghsa", aliases: new[] { "CVE-2025-1234", "GHSA-xxxx" })
};
// Act
var result1 = LinksetCorrelationV2.Compute(inputs);
var result2 = LinksetCorrelationV2.Compute(inputs);
// Assert
result1.Confidence.Should().Be(result2.Confidence);
result1.Conflicts.Should().BeEquivalentTo(result2.Conflicts);
result1.SignalScores.Should().BeEquivalentTo(result2.SignalScores);
}
[Fact]
public void Determinism_InputOrdering_DoesNotAffectResult()
{
// Arrange
var inputsA = new[]
{
CreateInput("obs-a", "nvd", aliases: new[] { "CVE-2025-1234" }),
CreateInput("obs-b", "ghsa", aliases: new[] { "CVE-2025-1234" })
};
var inputsB = new[]
{
CreateInput("obs-b", "ghsa", aliases: new[] { "CVE-2025-1234" }),
CreateInput("obs-a", "nvd", aliases: new[] { "CVE-2025-1234" })
};
// Act
var resultA = LinksetCorrelationV2.Compute(inputsA);
var resultB = LinksetCorrelationV2.Compute(inputsB);
// Assert
resultA.Confidence.Should().Be(resultB.Confidence);
}
[Fact]
public void Conflicts_AreDeduplicated()
{
// Arrange: Add duplicate conflicts via additionalConflicts
// Use inputs that won't generate their own alias-inconsistency
var inputs = new[]
{
CreateInput("obs-a", "nvd", aliases: new[] { "CVE-2025-1234" }),
CreateInput("obs-b", "ghsa", aliases: new[] { "CVE-2025-1234" }) // Same CVE = connected
};
var additionalConflicts = new List<AdvisoryLinksetConflict>
{
new("custom-field", "custom-reason", new[] { "a", "b" }),
new("custom-field", "custom-reason", new[] { "a", "b" }) // Duplicate
};
// Act
var result = LinksetCorrelationV2.Compute(inputs, additionalConflicts);
// Assert: Should deduplicate the additional conflicts
result.Conflicts.Count(c => c.Reason == "custom-reason").Should().Be(1);
}
#endregion
#region Helper Methods
private static LinksetCorrelationV2.InputV2 CreateInput(
string observationId,
string? vendor = null,
string[]? aliases = null,
string[]? purls = null,
string[]? cpes = null,
string[]? references = null,
string[]? patchReferences = null,
DateTimeOffset? fetchedAt = null)
{
return new LinksetCorrelationV2.InputV2(
ObservationId: observationId,
Vendor: vendor,
FetchedAt: fetchedAt,
Aliases: aliases ?? Array.Empty<string>(),
Purls: purls ?? Array.Empty<string>(),
Cpes: cpes ?? Array.Empty<string>(),
References: references ?? Array.Empty<string>(),
PatchReferences: patchReferences);
}
#endregion
}

View File

@@ -0,0 +1,561 @@
// -----------------------------------------------------------------------------
// TextSimilarityScorerTests.cs
// Sprint: SPRINT_20260125_001_Concelier_linkset_correlation_v2
// Task: CORR-V2-010
// Description: Unit tests and performance benchmarks for TextSimilarityScorer
// -----------------------------------------------------------------------------
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using FluentAssertions;
using StellaOps.Concelier.Core.Linksets;
using StellaOps.TestKit;
using Xunit;
namespace StellaOps.Concelier.Core.Tests.Linksets;
/// <summary>
/// Unit tests for <see cref="TextSimilarityScorer"/>.
/// </summary>
public class TextSimilarityScorerTests
{
private readonly TextSimilarityScorer _scorer = new();
#region Tokenization Tests
[Trait("Category", TestCategories.Unit)]
[Fact]
public void Tokenize_EmptyString_ReturnsEmpty()
{
// Act
var tokens = _scorer.Tokenize("");
// Assert
tokens.Should().BeEmpty();
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void Tokenize_NullString_ReturnsEmpty()
{
// Act
var tokens = _scorer.Tokenize(null!);
// Assert
tokens.Should().BeEmpty();
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void Tokenize_NormalizesToLowercase()
{
// Arrange
var text = "BUFFER OVERFLOW Memory Corruption";
// Act
var tokens = _scorer.Tokenize(text);
// Assert
tokens.Should().AllSatisfy(t => t.Should().Be(t.ToLowerInvariant()));
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void Tokenize_RemovesStopWords()
{
// Arrange
var text = "The vulnerability allows an attacker to execute code";
// Act
var tokens = _scorer.Tokenize(text);
// Assert - common stop words should be removed
tokens.Should().NotContain("the");
tokens.Should().NotContain("an");
tokens.Should().NotContain("to");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void Tokenize_RemovesShortTokens()
{
// Arrange
var text = "CVE ID in XSS bug";
// Act
var tokens = _scorer.Tokenize(text);
// Assert - tokens shorter than 3 chars should be removed
tokens.Should().NotContain("id");
tokens.Should().NotContain("in");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void Tokenize_RemovesNumericTokens()
{
// Arrange
var text = "version 123 release 2024";
// Act
var tokens = _scorer.Tokenize(text);
// Assert - pure numeric tokens should be removed
tokens.Should().NotContain("123");
tokens.Should().NotContain("2024");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void Tokenize_KeepsAlphanumericTokens()
{
// Arrange
var text = "CVE2024 log4j2 spring4shell";
// Act
var tokens = _scorer.Tokenize(text);
// Assert - alphanumeric tokens should be kept
tokens.Should().Contain("cve2024");
tokens.Should().Contain("log4j2");
tokens.Should().Contain("spring4shell");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void Tokenize_IsDeterministic()
{
// Arrange
var text = "Memory corruption in JSON parser leads to arbitrary code execution";
// Act
var tokens1 = _scorer.Tokenize(text);
var tokens2 = _scorer.Tokenize(text);
// Assert
tokens1.Should().BeEquivalentTo(tokens2, options => options.WithStrictOrdering());
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void Tokenize_SortsTokensForDeterminism()
{
// Arrange
var text = "zebra alpha memory parser";
// Act
var tokens = _scorer.Tokenize(text);
// Assert - tokens should be sorted alphabetically
tokens.Should().BeInAscendingOrder();
}
#endregion
#region Pairwise Similarity Tests
[Trait("Category", TestCategories.Unit)]
[Fact]
public void ComputePairwiseSimilarity_IdenticalTexts_ReturnsOne()
{
// Arrange
var text = "A heap-based buffer overflow in libpng allows remote attackers to execute arbitrary code";
// Act
var similarity = _scorer.ComputePairwiseSimilarity(text, text);
// Assert
similarity.Should().BeApproximately(1.0, 0.01);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void ComputePairwiseSimilarity_CompletelyDifferent_ReturnsLowScore()
{
// Arrange
var text1 = "SQL injection in database query handler";
var text2 = "Memory corruption in graphics renderer";
// Act
var similarity = _scorer.ComputePairwiseSimilarity(text1, text2);
// Assert
similarity.Should().BeLessThan(0.3);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void ComputePairwiseSimilarity_SimilarDescriptions_ReturnsPositiveScore()
{
// Arrange - same vulnerability described differently
var text1 = "A heap-based buffer overflow in the PNG image parser allows remote code execution";
var text2 = "Remote code execution via heap buffer overflow in PNG image processing library";
// Act
var similarity = _scorer.ComputePairwiseSimilarity(text1, text2);
// Assert - TF-IDF similarity for short texts with stop words removed
// is typically moderate (0.2-0.5 range)
similarity.Should().BeGreaterThan(0.2);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void ComputePairwiseSimilarity_EmptyFirst_ReturnsZero()
{
// Act
var similarity = _scorer.ComputePairwiseSimilarity("", "some text here");
// Assert
similarity.Should().Be(0.0);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void ComputePairwiseSimilarity_EmptySecond_ReturnsZero()
{
// Act
var similarity = _scorer.ComputePairwiseSimilarity("some text here", "");
// Assert
similarity.Should().Be(0.0);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void ComputePairwiseSimilarity_OnlyStopWords_ReturnsZero()
{
// Arrange - text with only stop words
var text1 = "the and or but";
var text2 = "the and or but";
// Act
var similarity = _scorer.ComputePairwiseSimilarity(text1, text2);
// Assert - no tokens after stop word removal
similarity.Should().Be(0.0);
}
#endregion
#region Average Similarity Tests
[Trait("Category", TestCategories.Unit)]
[Fact]
public void ComputeAverageSimilarity_SingleDescription_ReturnsZero()
{
// Arrange
var descriptions = new[] { "Only one description here" };
// Act
var similarity = _scorer.ComputeAverageSimilarity(descriptions);
// Assert
similarity.Should().Be(0.0);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void ComputeAverageSimilarity_EmptyCollection_ReturnsZero()
{
// Act
var similarity = _scorer.ComputeAverageSimilarity(Array.Empty<string>());
// Assert
similarity.Should().Be(0.0);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void ComputeAverageSimilarity_IdenticalDescriptions_ReturnsOne()
{
// Arrange
var description = "A critical buffer overflow vulnerability in the image processing library";
var descriptions = new[] { description, description, description };
// Act
var similarity = _scorer.ComputeAverageSimilarity(descriptions);
// Assert
similarity.Should().BeApproximately(1.0, 0.01);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void ComputeAverageSimilarity_MixedSimilarity_ReturnsReasonableAverage()
{
// Arrange - three descriptions about the same CVE from different sources
var descriptions = new[]
{
"A heap-based buffer overflow in libpng before 1.6.37 allows remote attackers to cause denial of service",
"Buffer overflow vulnerability in PNG library (libpng) can be exploited by remote attackers for DoS",
"libpng contains a heap overflow that may lead to denial of service when processing malformed PNG files"
};
// Act
var similarity = _scorer.ComputeAverageSimilarity(descriptions);
// Assert - TF-IDF similarity for related security texts typically
// produces moderate scores (0.1-0.4 range) after stop word removal
similarity.Should().BeGreaterThan(0.1);
similarity.Should().BeLessThanOrEqualTo(1.0);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void ComputeAverageSimilarity_SkipsEmptyDescriptions()
{
// Arrange
var descriptions = new[]
{
"A critical vulnerability in the parser",
"",
null!,
" ",
"A critical vulnerability in the parser"
};
// Act
var similarity = _scorer.ComputeAverageSimilarity(descriptions);
// Assert - should only consider non-empty descriptions
similarity.Should().BeApproximately(1.0, 0.01);
}
#endregion
#region Options Tests
[Trait("Category", TestCategories.Unit)]
[Fact]
public void TextSimilarityOptions_DefaultValues_AreCorrect()
{
// Arrange & Act
var options = new TextSimilarityOptions();
// Assert
options.Enabled.Should().BeFalse();
options.Weight.Should().Be(0.05);
options.MinTokenLength.Should().Be(3);
options.CustomStopWords.Should().BeNull();
options.EnableStemming.Should().BeFalse();
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void TextSimilarityOptions_SectionName_IsCorrect()
{
// Assert
TextSimilarityOptions.SectionName.Should().Be("Concelier:Correlation:TextSimilarity");
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void Scorer_WithCustomStopWords_UsesCustomList()
{
// Arrange
var options = new TextSimilarityOptions
{
CustomStopWords = new[] { "custom", "stop", "words" }
};
var scorer = new TextSimilarityScorer(options);
// Act
var tokens = scorer.Tokenize("custom stop words remain here");
// Assert - custom stop words should be removed
tokens.Should().NotContain("custom");
tokens.Should().NotContain("stop");
tokens.Should().NotContain("words");
tokens.Should().Contain("remain");
tokens.Should().Contain("here");
}
#endregion
#region Real-World Description Fixtures
[Trait("Category", TestCategories.Unit)]
[Theory]
[MemberData(nameof(RealWorldDescriptionFixtures))]
public void ComputeAverageSimilarity_RealWorldFixtures_ReturnsExpectedRange(
string[] descriptions,
double minExpected,
double maxExpected,
string scenario)
{
// Act
var similarity = _scorer.ComputeAverageSimilarity(descriptions);
// Assert
similarity.Should().BeGreaterThanOrEqualTo(minExpected,
because: $"scenario '{scenario}' should have similarity >= {minExpected}");
similarity.Should().BeLessThanOrEqualTo(maxExpected,
because: $"scenario '{scenario}' should have similarity <= {maxExpected}");
}
public static IEnumerable<object[]> RealWorldDescriptionFixtures()
{
// CVE-2021-44228 (Log4Shell) - same vulnerability, different sources
// TF-IDF similarity for related security texts is typically 0.1-0.5
yield return new object[]
{
new[]
{
"Apache Log4j2 2.0-beta9 through 2.15.0 (excluding security releases 2.12.2, 2.12.3, and 2.3.1) JNDI features used in configuration, log messages, and parameters do not protect against attacker controlled LDAP and other JNDI related endpoints.",
"A flaw was found in the Java logging library Apache Log4j in version 2.x. When configured to use a JNDI URL with a LDAP scheme, an attacker can execute arbitrary code.",
"Remote code execution vulnerability in Apache Log4j2 allows attackers to execute arbitrary code via JNDI lookup in log messages."
},
0.05, 0.9, "Log4Shell - same CVE, different sources"
};
// Unrelated vulnerabilities - should have low similarity
yield return new object[]
{
new[]
{
"SQL injection vulnerability in the login form allows authentication bypass",
"Cross-site scripting (XSS) in the comments section enables script injection",
"Buffer overflow in image processing library causes denial of service"
},
0.0, 0.4, "Unrelated vulnerabilities"
};
// Same library, different CVEs - moderate similarity
yield return new object[]
{
new[]
{
"OpenSSL before 3.0.7 allows remote attackers to cause a denial of service via a crafted X.509 certificate",
"OpenSSL 3.0.x before 3.0.5 contains a heap-based buffer overflow in the SM2 implementation",
"A timing-based side channel in OpenSSL allows recovery of private key material"
},
0.05, 0.6, "Same library (OpenSSL), different CVEs"
};
}
#endregion
#region Determinism Tests
[Trait("Category", TestCategories.Unit)]
[Fact]
public void ComputeAverageSimilarity_IsDeterministic()
{
// Arrange
var descriptions = new[]
{
"A heap-based buffer overflow in libpng",
"Buffer overflow in PNG library",
"libpng heap overflow vulnerability"
};
// Act
var similarity1 = _scorer.ComputeAverageSimilarity(descriptions);
var similarity2 = _scorer.ComputeAverageSimilarity(descriptions);
var similarity3 = _scorer.ComputeAverageSimilarity(descriptions);
// Assert
similarity1.Should().Be(similarity2);
similarity2.Should().Be(similarity3);
}
[Trait("Category", TestCategories.Unit)]
[Fact]
public void ComputePairwiseSimilarity_IsDeterministic()
{
// Arrange
var text1 = "Memory corruption in JSON parser";
var text2 = "JSON parser memory corruption vulnerability";
// Act
var similarity1 = _scorer.ComputePairwiseSimilarity(text1, text2);
var similarity2 = _scorer.ComputePairwiseSimilarity(text1, text2);
// Assert
similarity1.Should().Be(similarity2);
}
#endregion
}
/// <summary>
/// Performance benchmarks for <see cref="TextSimilarityScorer"/>.
/// Target: <= 5ms per pair.
/// </summary>
public class TextSimilarityScorerBenchmarks
{
private readonly TextSimilarityScorer _scorer = new();
[Trait("Category", TestCategories.Performance)]
[Fact]
public void ComputePairwiseSimilarity_MeetsPerformanceTarget()
{
// Arrange - realistic vulnerability descriptions
var text1 = "A heap-based buffer overflow vulnerability has been discovered in the image processing library libpng version 1.6.37. Remote attackers can exploit this flaw by providing specially crafted PNG files, potentially leading to arbitrary code execution or denial of service conditions.";
var text2 = "The PNG image handling library (libpng) contains a buffer overflow vulnerability in the row processing function. Exploitation of this issue allows attackers to execute arbitrary code in the context of the application using the affected library.";
// Warmup
for (var i = 0; i < 10; i++)
{
_scorer.ComputePairwiseSimilarity(text1, text2);
}
// Act - measure 100 iterations
var sw = Stopwatch.StartNew();
const int iterations = 100;
for (var i = 0; i < iterations; i++)
{
_scorer.ComputePairwiseSimilarity(text1, text2);
}
sw.Stop();
var averageMs = sw.Elapsed.TotalMilliseconds / iterations;
// Assert - target: <= 5ms per pair
averageMs.Should().BeLessThanOrEqualTo(5.0,
because: $"text similarity computation should complete within 5ms per pair (actual: {averageMs:F3} ms)");
}
[Trait("Category", TestCategories.Performance)]
[Fact]
public void ComputeAverageSimilarity_FiveDescriptions_MeetsPerformanceTarget()
{
// Arrange - 5 descriptions = 10 pairs
var descriptions = new[]
{
"Apache Log4j2 JNDI features do not protect against attacker controlled LDAP endpoints",
"A flaw in Log4j in version 2.x allows attackers to execute arbitrary code via JNDI lookup",
"Remote code execution in Apache Log4j2 via malicious JNDI lookup patterns",
"Log4j2 vulnerability allows remote attackers to execute code through JNDI injection",
"Critical RCE vulnerability in Apache Log4j2 logging library through JNDI features"
};
// Warmup
for (var i = 0; i < 10; i++)
{
_scorer.ComputeAverageSimilarity(descriptions);
}
// Act
var sw = Stopwatch.StartNew();
const int iterations = 100;
for (var i = 0; i < iterations; i++)
{
_scorer.ComputeAverageSimilarity(descriptions);
}
sw.Stop();
var averageMs = sw.Elapsed.TotalMilliseconds / iterations;
var pairsPerCall = 10; // C(5,2) = 10 pairs
var msPerPair = averageMs / pairsPerCall;
// Assert - target: <= 5ms per pair
msPerPair.Should().BeLessThanOrEqualTo(5.0,
because: $"text similarity computation should complete within 5ms per pair (actual: {msPerPair:F3} ms)");
}
}