devops folders consolidate
This commit is contained in:
@@ -0,0 +1,379 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// PackageIdfServiceTests.cs
|
||||
// Sprint: SPRINT_20260125_001_Concelier_linkset_correlation_v2
|
||||
// Task: CORR-V2-007
|
||||
// Description: Unit tests for package IDF keys, options, and conceptual IDF computations
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using FluentAssertions;
|
||||
using Xunit;
|
||||
|
||||
using StellaOps.TestKit;
|
||||
namespace StellaOps.Concelier.Cache.Valkey.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// Unit tests for package IDF caching key generation, options, and IDF formulas.
|
||||
/// Note: Service-level tests requiring Valkey are in the Integration folder.
|
||||
/// </summary>
|
||||
public class PackageIdfKeyTests
|
||||
{
|
||||
#region IDF Key Generation Tests
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void IdfPackage_GeneratesCorrectKey()
|
||||
{
|
||||
// Arrange
|
||||
var packageName = "pkg:npm/lodash@4.17.21";
|
||||
|
||||
// Act
|
||||
var key = AdvisoryCacheKeys.IdfPackage(packageName);
|
||||
|
||||
// Assert
|
||||
key.Should().Be("concelier:idf:pkg:pkg:npm/lodash@4.17.21");
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void IdfPackage_NormalizesToLowercase()
|
||||
{
|
||||
// Arrange
|
||||
var packageName = "pkg:NPM/Lodash@4.17.21";
|
||||
|
||||
// Act
|
||||
var key = AdvisoryCacheKeys.IdfPackage(packageName);
|
||||
|
||||
// Assert
|
||||
key.Should().Be("concelier:idf:pkg:pkg:npm/lodash@4.17.21");
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void IdfPackage_WithCustomPrefix_GeneratesCorrectKey()
|
||||
{
|
||||
// Arrange
|
||||
var packageName = "pkg:npm/express@4.18.2";
|
||||
var prefix = "prod:";
|
||||
|
||||
// Act
|
||||
var key = AdvisoryCacheKeys.IdfPackage(packageName, prefix);
|
||||
|
||||
// Assert
|
||||
key.Should().Be("prod:idf:pkg:pkg:npm/express@4.18.2");
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void IdfCorpusSize_GeneratesCorrectKey()
|
||||
{
|
||||
// Act
|
||||
var key = AdvisoryCacheKeys.IdfCorpusSize();
|
||||
|
||||
// Assert
|
||||
key.Should().Be("concelier:idf:stats:corpus_size");
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void IdfLastRefresh_GeneratesCorrectKey()
|
||||
{
|
||||
// Act
|
||||
var key = AdvisoryCacheKeys.IdfLastRefresh();
|
||||
|
||||
// Assert
|
||||
key.Should().Be("concelier:idf:stats:last_refresh");
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void IdfRefreshLock_GeneratesCorrectKey()
|
||||
{
|
||||
// Act
|
||||
var key = AdvisoryCacheKeys.IdfRefreshLock();
|
||||
|
||||
// Assert
|
||||
key.Should().Be("concelier:idf:lock:refresh");
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void IdfDocumentFrequency_GeneratesCorrectKey()
|
||||
{
|
||||
// Arrange
|
||||
var packageName = "pkg:cargo/serde@1.0.0";
|
||||
|
||||
// Act
|
||||
var key = AdvisoryCacheKeys.IdfDocumentFrequency(packageName);
|
||||
|
||||
// Assert
|
||||
key.Should().Be("concelier:idf:df:pkg:cargo/serde@1.0.0");
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void IdfPackagePattern_GeneratesCorrectPattern()
|
||||
{
|
||||
// Act
|
||||
var pattern = AdvisoryCacheKeys.IdfPackagePattern();
|
||||
|
||||
// Assert
|
||||
pattern.Should().Be("concelier:idf:pkg:*");
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void IdfStatsHits_GeneratesCorrectKey()
|
||||
{
|
||||
// Act
|
||||
var key = AdvisoryCacheKeys.IdfStatsHits();
|
||||
|
||||
// Assert
|
||||
key.Should().Be("concelier:idf:stats:hits");
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void IdfStatsMisses_GeneratesCorrectKey()
|
||||
{
|
||||
// Act
|
||||
var key = AdvisoryCacheKeys.IdfStatsMisses();
|
||||
|
||||
// Assert
|
||||
key.Should().Be("concelier:idf:stats:misses");
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Tests for PackageIdfOptions defaults and configuration.
|
||||
/// </summary>
|
||||
public class PackageIdfOptionsTests
|
||||
{
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void PackageIdfOptions_DefaultValues_AreCorrect()
|
||||
{
|
||||
// Arrange & Act
|
||||
var options = new PackageIdfOptions();
|
||||
|
||||
// Assert
|
||||
options.Enabled.Should().BeTrue();
|
||||
options.IdfTtl.Should().Be(TimeSpan.FromHours(1));
|
||||
options.CorpusStatsTtl.Should().Be(TimeSpan.FromHours(4));
|
||||
options.MinIdfThreshold.Should().Be(0.01);
|
||||
options.DefaultIdfWeight.Should().Be(1.0);
|
||||
options.MaxCacheEntries.Should().Be(100_000);
|
||||
options.NormalizeScores.Should().BeTrue();
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void PackageIdfOptions_SectionName_IsCorrect()
|
||||
{
|
||||
// Assert
|
||||
PackageIdfOptions.SectionName.Should().Be("Concelier:PackageIdf");
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void PackageIdfOptions_CanBeCustomized()
|
||||
{
|
||||
// Arrange & Act
|
||||
var options = new PackageIdfOptions
|
||||
{
|
||||
Enabled = false,
|
||||
IdfTtl = TimeSpan.FromMinutes(30),
|
||||
CorpusStatsTtl = TimeSpan.FromHours(2),
|
||||
MinIdfThreshold = 0.05,
|
||||
DefaultIdfWeight = 0.5,
|
||||
MaxCacheEntries = 50_000,
|
||||
NormalizeScores = false
|
||||
};
|
||||
|
||||
// Assert
|
||||
options.Enabled.Should().BeFalse();
|
||||
options.IdfTtl.Should().Be(TimeSpan.FromMinutes(30));
|
||||
options.CorpusStatsTtl.Should().Be(TimeSpan.FromHours(2));
|
||||
options.MinIdfThreshold.Should().Be(0.05);
|
||||
options.DefaultIdfWeight.Should().Be(0.5);
|
||||
options.MaxCacheEntries.Should().Be(50_000);
|
||||
options.NormalizeScores.Should().BeFalse();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Tests for IDF formula computation (conceptual validation).
|
||||
/// </summary>
|
||||
public class IdfFormulaTests
|
||||
{
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Theory]
|
||||
[InlineData(10000, 1, 9.21)] // Rare package: log(10000/2) ≈ 8.52
|
||||
[InlineData(10000, 5000, 0.69)] // Common package: log(10000/5001) ≈ 0.69
|
||||
[InlineData(10000, 10000, 0.0)] // Ubiquitous: log(10000/10001) ≈ 0
|
||||
public void IdfFormula_ComputesCorrectly(long corpusSize, long docFrequency, double expectedRawIdf)
|
||||
{
|
||||
// This test validates the IDF formula used in UpdateCorpusStatsAsync
|
||||
// IDF = log(N / (1 + df))
|
||||
|
||||
// Act
|
||||
var rawIdf = Math.Log((double)corpusSize / (1 + docFrequency));
|
||||
|
||||
// Assert
|
||||
rawIdf.Should().BeApproximately(expectedRawIdf, 0.1);
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void IdfFormula_RarePackageHasHighWeight()
|
||||
{
|
||||
// Arrange
|
||||
const long corpusSize = 100_000;
|
||||
const long rareDocFrequency = 5;
|
||||
const long commonDocFrequency = 50_000;
|
||||
|
||||
// Act
|
||||
var rareIdf = Math.Log((double)corpusSize / (1 + rareDocFrequency));
|
||||
var commonIdf = Math.Log((double)corpusSize / (1 + commonDocFrequency));
|
||||
|
||||
// Assert - rare package should have much higher IDF
|
||||
rareIdf.Should().BeGreaterThan(commonIdf * 5);
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void IdfNormalization_ScalesToUnitInterval()
|
||||
{
|
||||
// Arrange - simulate corpus with various document frequencies
|
||||
var corpusSize = 100_000L;
|
||||
var documentFrequencies = new Dictionary<string, long>
|
||||
{
|
||||
["pkg:npm/lodash"] = 80_000, // Very common
|
||||
["pkg:npm/express"] = 40_000, // Common
|
||||
["pkg:cargo/serde"] = 10_000, // Moderate
|
||||
["pkg:npm/obscure"] = 100, // Rare
|
||||
["pkg:cargo/unique"] = 1 // Very rare
|
||||
};
|
||||
|
||||
// Act - compute raw IDFs
|
||||
var rawIdfs = documentFrequencies.ToDictionary(
|
||||
kv => kv.Key,
|
||||
kv => Math.Log((double)corpusSize / (1 + kv.Value)));
|
||||
|
||||
var maxIdf = rawIdfs.Values.Max();
|
||||
|
||||
// Normalize to 0-1
|
||||
var normalizedIdfs = rawIdfs.ToDictionary(
|
||||
kv => kv.Key,
|
||||
kv => kv.Value / maxIdf);
|
||||
|
||||
// Assert - all values should be in [0, 1]
|
||||
foreach (var (pkg, idf) in normalizedIdfs)
|
||||
{
|
||||
idf.Should().BeGreaterThanOrEqualTo(0.0, because: $"{pkg} should have non-negative IDF");
|
||||
idf.Should().BeLessThanOrEqualTo(1.0, because: $"{pkg} should have IDF ≤ 1.0");
|
||||
}
|
||||
|
||||
// The rarest package should have IDF close to 1.0
|
||||
normalizedIdfs["pkg:cargo/unique"].Should().BeApproximately(1.0, 0.01);
|
||||
|
||||
// The most common package should have low IDF
|
||||
normalizedIdfs["pkg:npm/lodash"].Should().BeLessThan(0.3);
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void IdfWeight_DiscriminatesBetweenPackages()
|
||||
{
|
||||
// This test validates that IDF provides meaningful discrimination
|
||||
// for linkset correlation
|
||||
|
||||
// Arrange
|
||||
var corpusSize = 50_000L;
|
||||
|
||||
// Package that appears in many advisories (low discrimination)
|
||||
var commonPkgDf = 25_000L;
|
||||
// Package that appears in few advisories (high discrimination)
|
||||
var rarePkgDf = 50L;
|
||||
|
||||
// Act
|
||||
var commonIdf = Math.Log((double)corpusSize / (1 + commonPkgDf));
|
||||
var rareIdf = Math.Log((double)corpusSize / (1 + rarePkgDf));
|
||||
|
||||
// Normalize
|
||||
var maxIdf = Math.Max(commonIdf, rareIdf);
|
||||
var commonNorm = commonIdf / maxIdf;
|
||||
var rareNorm = rareIdf / maxIdf;
|
||||
|
||||
// Assert
|
||||
// When two advisories share a rare package, it should be a stronger
|
||||
// correlation signal than when they share a common package
|
||||
rareNorm.Should().BeGreaterThan(commonNorm * 3,
|
||||
because: "sharing a rare package should be 3x more discriminative than sharing a common package");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Tests for PackageIdfMetrics instrumentation.
|
||||
/// </summary>
|
||||
public class PackageIdfMetricsTests
|
||||
{
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void PackageIdfMetrics_ActivitySourceName_IsCorrect()
|
||||
{
|
||||
// Assert
|
||||
PackageIdfMetrics.ActivitySourceName.Should().Be("StellaOps.Concelier.PackageIdf");
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void PackageIdfMetrics_MeterName_IsCorrect()
|
||||
{
|
||||
// Assert
|
||||
PackageIdfMetrics.MeterName.Should().Be("StellaOps.Concelier.PackageIdf");
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void PackageIdfMetrics_CanBeCreatedAndDisposed()
|
||||
{
|
||||
// Arrange & Act
|
||||
using var metrics = new PackageIdfMetrics();
|
||||
|
||||
// Assert - no exception thrown
|
||||
metrics.Should().NotBeNull();
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void PackageIdfMetrics_RecordsOperations_WithoutException()
|
||||
{
|
||||
// Arrange
|
||||
using var metrics = new PackageIdfMetrics();
|
||||
|
||||
// Act & Assert - none of these should throw
|
||||
metrics.RecordHit();
|
||||
metrics.RecordHits(5);
|
||||
metrics.RecordMiss();
|
||||
metrics.RecordMisses(3);
|
||||
metrics.RecordRefresh(100);
|
||||
metrics.RecordLatency(15.5, "get");
|
||||
metrics.RecordIdfWeight(0.75);
|
||||
metrics.UpdateCorpusSize(50_000);
|
||||
metrics.UpdateCachedEntries(10_000);
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void PackageIdfMetrics_StartActivity_ReturnsNullWhenNoListeners()
|
||||
{
|
||||
// Act
|
||||
var activity = PackageIdfMetrics.StartActivity("test-operation");
|
||||
|
||||
// Assert - no listeners registered, so activity should be null
|
||||
// (This is expected behavior for OpenTelemetry when no exporters are configured)
|
||||
// Just verify it doesn't throw
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,636 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// LinksetCorrelationV2Tests.cs
|
||||
// Sprint: SPRINT_20260125_001_Concelier_linkset_correlation_v2
|
||||
// Task: CORR-V2-001 through CORR-V2-008
|
||||
// Description: Comprehensive tests for V2 correlation algorithm
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.Linq;
|
||||
using FluentAssertions;
|
||||
using StellaOps.Concelier.Core.Linksets;
|
||||
using Xunit;
|
||||
|
||||
namespace StellaOps.Concelier.Core.Tests.Linksets;
|
||||
|
||||
/// <summary>
|
||||
/// Tests for the V2 linkset correlation algorithm.
|
||||
/// Validates graph-based alias connectivity, pairwise package coverage,
|
||||
/// version compatibility, patch lineage, and typed conflict severities.
|
||||
/// </summary>
|
||||
public sealed class LinksetCorrelationV2Tests
|
||||
{
|
||||
#region CORR-V2-001: Alias Connectivity (Graph-based)
|
||||
|
||||
[Fact]
|
||||
public void AliasConnectivity_TransitiveBridging_CorrectlyLinksThreeSources()
|
||||
{
|
||||
// Arrange: A has CVE-X, B has CVE-X + GHSA-Y, C has GHSA-Y
|
||||
// V1 would produce score=0 (empty intersection)
|
||||
// V2 should produce high score via transitive bridging
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd", aliases: new[] { "CVE-2025-1234" }),
|
||||
CreateInput("obs-b", "ghsa", aliases: new[] { "CVE-2025-1234", "GHSA-aaaa-bbbb-cccc" }),
|
||||
CreateInput("obs-c", "osv", aliases: new[] { "GHSA-aaaa-bbbb-cccc" })
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
// With only alias signals: 0.30*1.0 + 0.10*1.0 + neutrals = 0.50
|
||||
result.Confidence.Should().BeGreaterThanOrEqualTo(0.5, "transitive bridging should yield positive confidence");
|
||||
result.SignalScores["aliasConnectivity"].Should().Be(1.0, "all observations connected via alias graph");
|
||||
result.Conflicts.Should().NotContain(c => c.Reason == "alias-inconsistency",
|
||||
"no inconsistency when transitively connected");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AliasConnectivity_DisjointAliases_ProducesLowScoreAndConflict()
|
||||
{
|
||||
// Arrange: Two sources with completely disjoint aliases (no bridging)
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd", aliases: new[] { "CVE-2025-1111" }),
|
||||
CreateInput("obs-b", "vendor", aliases: new[] { "VENDOR-ADV-999" })
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
result.SignalScores["aliasConnectivity"].Should().Be(0.5, "50% in LCC (each disconnected)");
|
||||
result.Conflicts.Should().Contain(c => c.Reason == "alias-inconsistency");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AliasConnectivity_DistinctCVEs_ProducesHardConflict()
|
||||
{
|
||||
// Arrange: Two different CVE identifiers in the cluster = hard conflict
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd", aliases: new[] { "CVE-2025-1111" }),
|
||||
CreateInput("obs-b", "ghsa", aliases: new[] { "CVE-2025-2222" })
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
result.Conflicts.Should().Contain(c =>
|
||||
c.Reason == "distinct-cves" && c.Severity == ConflictSeverity.Hard);
|
||||
result.Confidence.Should().BeLessThan(0.5, "hard conflict should significantly reduce confidence");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AliasConnectivity_SingleObservation_ReturnsFullScoreWithAliases()
|
||||
{
|
||||
// Arrange
|
||||
var inputs = new[] { CreateInput("obs-a", "nvd", aliases: new[] { "CVE-2025-1234" }) };
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
result.SignalScores["aliasConnectivity"].Should().Be(1.0);
|
||||
result.Conflicts.Should().BeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AliasConnectivity_NoAliases_ReturnsZeroScore()
|
||||
{
|
||||
// Arrange
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd", aliases: Array.Empty<string>()),
|
||||
CreateInput("obs-b", "vendor", aliases: Array.Empty<string>())
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
result.SignalScores["aliasConnectivity"].Should().Be(0.0);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region CORR-V2-002: Package Coverage (Pairwise + IDF)
|
||||
|
||||
[Fact]
|
||||
public void PackageCoverage_ThinSource_DoesNotCollapseScore()
|
||||
{
|
||||
// Arrange: Source A and B share package, Source C has no packages
|
||||
// V1 intersection-across-all would produce 0
|
||||
// V2 pairwise should still produce positive score
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd", purls: new[] { "pkg:npm/lodash@4.17.21" }),
|
||||
CreateInput("obs-b", "ghsa", purls: new[] { "pkg:npm/lodash@4.17.20" }),
|
||||
CreateInput("obs-c", "vendor", purls: Array.Empty<string>())
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
result.SignalScores["packageCoverage"].Should().BeGreaterThan(0,
|
||||
"thin source should not collapse pairwise coverage");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PackageCoverage_ExactPurlMatch_BoostsScore()
|
||||
{
|
||||
// Arrange: Same exact PURL (with version)
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd", purls: new[] { "pkg:npm/lodash@4.17.21" }),
|
||||
CreateInput("obs-b", "ghsa", purls: new[] { "pkg:npm/lodash@4.17.21" })
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
result.SignalScores["packageCoverage"].Should().BeGreaterThanOrEqualTo(0.8,
|
||||
"exact PURL match should boost score");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PackageCoverage_NoOverlap_ReturnsZero()
|
||||
{
|
||||
// Arrange: Completely different packages
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd", purls: new[] { "pkg:npm/lodash@4.17.21" }),
|
||||
CreateInput("obs-b", "ghsa", purls: new[] { "pkg:pypi/requests@2.28.0" })
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
result.SignalScores["packageCoverage"].Should().Be(0);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PackageCoverage_WithIdfProvider_WeightsRarePackagesHigher()
|
||||
{
|
||||
// Arrange: Custom IDF provider
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd", purls: new[] { "pkg:cargo/obscure-lib@1.0.0" }),
|
||||
CreateInput("obs-b", "ghsa", purls: new[] { "pkg:cargo/obscure-lib@1.0.0" })
|
||||
};
|
||||
|
||||
// IDF provider: rare package gets high weight
|
||||
double IdfProvider(string pkg) => pkg.Contains("obscure") ? 5.0 : 1.0;
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs, packageIdfProvider: IdfProvider);
|
||||
|
||||
// Assert
|
||||
result.SignalScores["packageCoverage"].Should().BeGreaterThan(0.5);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region CORR-V2-003: Reference Score (Positive-Only)
|
||||
|
||||
[Fact]
|
||||
public void ReferenceScore_ZeroOverlap_ReturnsNeutral_NoConflict()
|
||||
{
|
||||
// Arrange: Different references from different sources
|
||||
// V1 would emit reference-clash
|
||||
// V2 should return neutral (0.5) with no conflict
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd", references: new[] { "https://nvd.nist.gov/vuln/detail/CVE-2025-1234" }),
|
||||
CreateInput("obs-b", "ghsa", references: new[] { "https://github.com/advisories/GHSA-xxxx" })
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
result.SignalScores["referenceOverlap"].Should().Be(0.5, "zero overlap = neutral, not negative");
|
||||
result.Conflicts.Should().NotContain(c => c.Reason == "reference-clash",
|
||||
"no conflict for simple disjoint references");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReferenceScore_PartialOverlap_ProducesPositiveScore()
|
||||
{
|
||||
// Arrange: Some shared references
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd", references: new[]
|
||||
{
|
||||
"https://example.com/advisory",
|
||||
"https://nvd.nist.gov/vuln/detail/CVE-2025-1234"
|
||||
}),
|
||||
CreateInput("obs-b", "ghsa", references: new[]
|
||||
{
|
||||
"https://example.com/advisory",
|
||||
"https://github.com/advisories/GHSA-xxxx"
|
||||
})
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
result.SignalScores["referenceOverlap"].Should().BeGreaterThan(0.5);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReferenceScore_NormalizesUrls()
|
||||
{
|
||||
// Arrange: Same URL with different casing/protocol
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd", references: new[] { "http://Example.COM/advisory?utm_source=test" }),
|
||||
CreateInput("obs-b", "ghsa", references: new[] { "https://example.com/advisory" })
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert: Should match after normalization
|
||||
result.SignalScores["referenceOverlap"].Should().BeGreaterThan(0.5);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region CORR-V2-004: Typed Conflict Severities
|
||||
|
||||
[Fact]
|
||||
public void ConflictPenalty_HardConflict_AppliesLargePenalty()
|
||||
{
|
||||
// Arrange: Distinct CVEs = hard conflict
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd", aliases: new[] { "CVE-2025-1111" }),
|
||||
CreateInput("obs-b", "ghsa", aliases: new[] { "CVE-2025-2222" })
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
var hardConflict = result.Conflicts.FirstOrDefault(c => c.Severity == ConflictSeverity.Hard);
|
||||
hardConflict.Should().NotBeNull();
|
||||
result.Confidence.Should().BeLessThan(0.5);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ConflictPenalty_SoftConflict_AppliesSmallPenalty()
|
||||
{
|
||||
// Arrange: Same CVE but overlapping version ranges (share at least one version)
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd",
|
||||
aliases: new[] { "CVE-2025-1234" },
|
||||
purls: new[] { "pkg:npm/lodash@4.17.20", "pkg:npm/lodash@4.17.21" }),
|
||||
CreateInput("obs-b", "ghsa",
|
||||
aliases: new[] { "CVE-2025-1234" },
|
||||
purls: new[] { "pkg:npm/lodash@4.17.20", "pkg:npm/lodash@4.17.19" })
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert: Should have soft divergence conflict (overlapping but not equivalent)
|
||||
var softConflict = result.Conflicts.FirstOrDefault(c =>
|
||||
c.Severity == ConflictSeverity.Soft && c.Reason == "affected-range-divergence");
|
||||
softConflict.Should().NotBeNull("overlapping but non-equivalent ranges should produce soft conflict");
|
||||
result.Confidence.Should().BeGreaterThan(0.5, "soft conflicts should not severely impact confidence");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ConflictPenalty_Saturates_AtMaximum()
|
||||
{
|
||||
// Arrange: Multiple hard conflicts
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd",
|
||||
aliases: new[] { "CVE-2025-1111" },
|
||||
purls: new[] { "pkg:npm/lodash@1.0.0" }),
|
||||
CreateInput("obs-b", "ghsa",
|
||||
aliases: new[] { "CVE-2025-2222" },
|
||||
purls: new[] { "pkg:npm/lodash@9.0.0" })
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert: Confidence should not go below 0.1 minimum
|
||||
result.Confidence.Should().BeGreaterThanOrEqualTo(0.1);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region CORR-V2-005: Patch Lineage
|
||||
|
||||
[Fact]
|
||||
public void PatchLineage_ExactCommitShaMatch_ProducesHighScore()
|
||||
{
|
||||
// Arrange: Same commit SHA in patch references
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd",
|
||||
aliases: new[] { "CVE-2025-1234" },
|
||||
patchReferences: new[] { "https://github.com/org/repo/commit/abc123def456789012345678901234567890abcd" }),
|
||||
CreateInput("obs-b", "ghsa",
|
||||
aliases: new[] { "CVE-2025-1234" },
|
||||
patchReferences: new[] { "https://github.com/org/repo/commit/abc123def456789012345678901234567890abcd" })
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
result.SignalScores["patchLineage"].Should().Be(1.0, "exact commit SHA match is very strong signal");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PatchLineage_DifferentCommits_ProducesZeroScore()
|
||||
{
|
||||
// Arrange: Different commit SHAs
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd",
|
||||
patchReferences: new[] { "https://github.com/org/repo/commit/1111111111111111111111111111111111111111" }),
|
||||
CreateInput("obs-b", "ghsa",
|
||||
patchReferences: new[] { "https://github.com/org/repo/commit/2222222222222222222222222222222222222222" })
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
result.SignalScores["patchLineage"].Should().Be(0);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PatchLineage_NoPatchData_ReturnsZero()
|
||||
{
|
||||
// Arrange: No patch references
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd", aliases: new[] { "CVE-2025-1234" }),
|
||||
CreateInput("obs-b", "ghsa", aliases: new[] { "CVE-2025-1234" })
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
result.SignalScores["patchLineage"].Should().Be(0);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region CORR-V2-006: Version Compatibility
|
||||
|
||||
[Fact]
|
||||
public void VersionCompatibility_EquivalentRanges_ProducesHighScore()
|
||||
{
|
||||
// Arrange: Same versions for same package
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd", purls: new[] { "pkg:npm/lodash@4.17.21" }),
|
||||
CreateInput("obs-b", "ghsa", purls: new[] { "pkg:npm/lodash@4.17.21" })
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
result.SignalScores["versionCompatibility"].Should().BeGreaterThanOrEqualTo(0.8);
|
||||
result.Conflicts.Should().NotContain(c =>
|
||||
c.Reason == "affected-range-divergence" || c.Reason == "disjoint-version-ranges");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void VersionCompatibility_OverlappingRanges_ProducesMediumScoreWithSoftConflict()
|
||||
{
|
||||
// Arrange: Overlapping but not identical versions
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd", purls: new[] { "pkg:npm/lodash@4.17.21", "pkg:npm/lodash@4.17.20" }),
|
||||
CreateInput("obs-b", "ghsa", purls: new[] { "pkg:npm/lodash@4.17.20", "pkg:npm/lodash@4.17.19" })
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
result.SignalScores["versionCompatibility"].Should().BeInRange(0.4, 0.8);
|
||||
result.Conflicts.Should().Contain(c =>
|
||||
c.Reason == "affected-range-divergence" && c.Severity == ConflictSeverity.Soft);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void VersionCompatibility_DisjointRanges_ProducesLowScoreWithHardConflict()
|
||||
{
|
||||
// Arrange: Completely different versions for same package
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd", purls: new[] { "pkg:npm/lodash@1.0.0" }),
|
||||
CreateInput("obs-b", "ghsa", purls: new[] { "pkg:npm/lodash@9.0.0" })
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
result.Conflicts.Should().Contain(c =>
|
||||
c.Reason == "disjoint-version-ranges" && c.Severity == ConflictSeverity.Hard);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region CORR-V2-008: Integrated Scoring
|
||||
|
||||
[Fact]
|
||||
public void IntegratedScoring_HighConfidenceScenario()
|
||||
{
|
||||
// Arrange: Strong signals across all dimensions
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd",
|
||||
aliases: new[] { "CVE-2025-1234" },
|
||||
purls: new[] { "pkg:npm/vulnerable-lib@2.0.0" },
|
||||
cpes: new[] { "cpe:2.3:a:vendor:vulnerable-lib:2.0.0:*:*:*:*:*:*:*" },
|
||||
references: new[] { "https://example.com/advisory" },
|
||||
patchReferences: new[] { "https://github.com/org/repo/commit/abc123def456789012345678901234567890abcd" },
|
||||
fetchedAt: DateTimeOffset.Parse("2025-01-25T10:00:00Z", CultureInfo.InvariantCulture)),
|
||||
CreateInput("obs-b", "ghsa",
|
||||
aliases: new[] { "CVE-2025-1234", "GHSA-xxxx-yyyy-zzzz" },
|
||||
purls: new[] { "pkg:npm/vulnerable-lib@2.0.0" },
|
||||
cpes: new[] { "cpe:2.3:a:vendor:vulnerable-lib:2.0.0:*:*:*:*:*:*:*" },
|
||||
references: new[] { "https://example.com/advisory", "https://github.com/advisories/GHSA-xxxx" },
|
||||
patchReferences: new[] { "https://github.com/org/repo/commit/abc123def456789012345678901234567890abcd" },
|
||||
fetchedAt: DateTimeOffset.Parse("2025-01-25T11:00:00Z", CultureInfo.InvariantCulture))
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
result.Confidence.Should().BeGreaterThanOrEqualTo(0.85, "all signals strong = high confidence");
|
||||
result.Conflicts.Should().BeEmpty();
|
||||
|
||||
// Verify individual signals
|
||||
result.SignalScores["aliasConnectivity"].Should().Be(1.0);
|
||||
result.SignalScores["aliasAuthority"].Should().Be(1.0); // CVE present
|
||||
result.SignalScores["packageCoverage"].Should().BeGreaterThanOrEqualTo(0.8);
|
||||
result.SignalScores["patchLineage"].Should().Be(1.0);
|
||||
result.SignalScores["freshness"].Should().Be(1.0); // Within 48h
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IntegratedScoring_MixedSignalsScenario()
|
||||
{
|
||||
// Arrange: Some strong signals, some weak
|
||||
// Note: Disconnected aliases will produce alias-inconsistency conflict
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd",
|
||||
aliases: new[] { "CVE-2025-1234" },
|
||||
purls: new[] { "pkg:npm/lodash@4.17.21" },
|
||||
fetchedAt: DateTimeOffset.Parse("2025-01-10T00:00:00Z", CultureInfo.InvariantCulture)),
|
||||
CreateInput("obs-b", "vendor",
|
||||
aliases: new[] { "VENDOR-2025-001" }, // No CVE, only vendor ID
|
||||
purls: new[] { "pkg:npm/lodash@4.17.20" }, // Different version
|
||||
fetchedAt: DateTimeOffset.Parse("2025-01-25T00:00:00Z", CultureInfo.InvariantCulture)) // 15 days apart
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
// Disconnected aliases + version divergence = conflicts reducing confidence
|
||||
// Minimum confidence is 0.1 when there are conflicts but some evidence
|
||||
result.Confidence.Should().BeInRange(0.1, 0.4, "mixed signals with conflicts = low-moderate confidence");
|
||||
result.SignalScores["aliasConnectivity"].Should().BeLessThan(1.0); // Disconnected
|
||||
result.SignalScores["freshness"].Should().BeLessThan(0.5); // 15 days spread
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IntegratedScoring_EmptyInputs_ReturnsFullConfidence()
|
||||
{
|
||||
// Arrange
|
||||
var inputs = Array.Empty<LinksetCorrelationV2.InputV2>();
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
result.Confidence.Should().Be(1.0);
|
||||
result.Conflicts.Should().BeEmpty();
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Determinism Tests
|
||||
|
||||
[Fact]
|
||||
public void Determinism_SameInputs_ProduceSameOutput()
|
||||
{
|
||||
// Arrange
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd", aliases: new[] { "CVE-2025-1234" }),
|
||||
CreateInput("obs-b", "ghsa", aliases: new[] { "CVE-2025-1234", "GHSA-xxxx" })
|
||||
};
|
||||
|
||||
// Act
|
||||
var result1 = LinksetCorrelationV2.Compute(inputs);
|
||||
var result2 = LinksetCorrelationV2.Compute(inputs);
|
||||
|
||||
// Assert
|
||||
result1.Confidence.Should().Be(result2.Confidence);
|
||||
result1.Conflicts.Should().BeEquivalentTo(result2.Conflicts);
|
||||
result1.SignalScores.Should().BeEquivalentTo(result2.SignalScores);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Determinism_InputOrdering_DoesNotAffectResult()
|
||||
{
|
||||
// Arrange
|
||||
var inputsA = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd", aliases: new[] { "CVE-2025-1234" }),
|
||||
CreateInput("obs-b", "ghsa", aliases: new[] { "CVE-2025-1234" })
|
||||
};
|
||||
|
||||
var inputsB = new[]
|
||||
{
|
||||
CreateInput("obs-b", "ghsa", aliases: new[] { "CVE-2025-1234" }),
|
||||
CreateInput("obs-a", "nvd", aliases: new[] { "CVE-2025-1234" })
|
||||
};
|
||||
|
||||
// Act
|
||||
var resultA = LinksetCorrelationV2.Compute(inputsA);
|
||||
var resultB = LinksetCorrelationV2.Compute(inputsB);
|
||||
|
||||
// Assert
|
||||
resultA.Confidence.Should().Be(resultB.Confidence);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Conflicts_AreDeduplicated()
|
||||
{
|
||||
// Arrange: Add duplicate conflicts via additionalConflicts
|
||||
// Use inputs that won't generate their own alias-inconsistency
|
||||
var inputs = new[]
|
||||
{
|
||||
CreateInput("obs-a", "nvd", aliases: new[] { "CVE-2025-1234" }),
|
||||
CreateInput("obs-b", "ghsa", aliases: new[] { "CVE-2025-1234" }) // Same CVE = connected
|
||||
};
|
||||
|
||||
var additionalConflicts = new List<AdvisoryLinksetConflict>
|
||||
{
|
||||
new("custom-field", "custom-reason", new[] { "a", "b" }),
|
||||
new("custom-field", "custom-reason", new[] { "a", "b" }) // Duplicate
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = LinksetCorrelationV2.Compute(inputs, additionalConflicts);
|
||||
|
||||
// Assert: Should deduplicate the additional conflicts
|
||||
result.Conflicts.Count(c => c.Reason == "custom-reason").Should().Be(1);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Helper Methods
|
||||
|
||||
private static LinksetCorrelationV2.InputV2 CreateInput(
|
||||
string observationId,
|
||||
string? vendor = null,
|
||||
string[]? aliases = null,
|
||||
string[]? purls = null,
|
||||
string[]? cpes = null,
|
||||
string[]? references = null,
|
||||
string[]? patchReferences = null,
|
||||
DateTimeOffset? fetchedAt = null)
|
||||
{
|
||||
return new LinksetCorrelationV2.InputV2(
|
||||
ObservationId: observationId,
|
||||
Vendor: vendor,
|
||||
FetchedAt: fetchedAt,
|
||||
Aliases: aliases ?? Array.Empty<string>(),
|
||||
Purls: purls ?? Array.Empty<string>(),
|
||||
Cpes: cpes ?? Array.Empty<string>(),
|
||||
References: references ?? Array.Empty<string>(),
|
||||
PatchReferences: patchReferences);
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
@@ -0,0 +1,561 @@
|
||||
// -----------------------------------------------------------------------------
|
||||
// TextSimilarityScorerTests.cs
|
||||
// Sprint: SPRINT_20260125_001_Concelier_linkset_correlation_v2
|
||||
// Task: CORR-V2-010
|
||||
// Description: Unit tests and performance benchmarks for TextSimilarityScorer
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using FluentAssertions;
|
||||
using StellaOps.Concelier.Core.Linksets;
|
||||
using StellaOps.TestKit;
|
||||
using Xunit;
|
||||
|
||||
namespace StellaOps.Concelier.Core.Tests.Linksets;
|
||||
|
||||
/// <summary>
|
||||
/// Unit tests for <see cref="TextSimilarityScorer"/>.
|
||||
/// </summary>
|
||||
public class TextSimilarityScorerTests
|
||||
{
|
||||
private readonly TextSimilarityScorer _scorer = new();
|
||||
|
||||
#region Tokenization Tests
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void Tokenize_EmptyString_ReturnsEmpty()
|
||||
{
|
||||
// Act
|
||||
var tokens = _scorer.Tokenize("");
|
||||
|
||||
// Assert
|
||||
tokens.Should().BeEmpty();
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void Tokenize_NullString_ReturnsEmpty()
|
||||
{
|
||||
// Act
|
||||
var tokens = _scorer.Tokenize(null!);
|
||||
|
||||
// Assert
|
||||
tokens.Should().BeEmpty();
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void Tokenize_NormalizesToLowercase()
|
||||
{
|
||||
// Arrange
|
||||
var text = "BUFFER OVERFLOW Memory Corruption";
|
||||
|
||||
// Act
|
||||
var tokens = _scorer.Tokenize(text);
|
||||
|
||||
// Assert
|
||||
tokens.Should().AllSatisfy(t => t.Should().Be(t.ToLowerInvariant()));
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void Tokenize_RemovesStopWords()
|
||||
{
|
||||
// Arrange
|
||||
var text = "The vulnerability allows an attacker to execute code";
|
||||
|
||||
// Act
|
||||
var tokens = _scorer.Tokenize(text);
|
||||
|
||||
// Assert - common stop words should be removed
|
||||
tokens.Should().NotContain("the");
|
||||
tokens.Should().NotContain("an");
|
||||
tokens.Should().NotContain("to");
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void Tokenize_RemovesShortTokens()
|
||||
{
|
||||
// Arrange
|
||||
var text = "CVE ID in XSS bug";
|
||||
|
||||
// Act
|
||||
var tokens = _scorer.Tokenize(text);
|
||||
|
||||
// Assert - tokens shorter than 3 chars should be removed
|
||||
tokens.Should().NotContain("id");
|
||||
tokens.Should().NotContain("in");
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void Tokenize_RemovesNumericTokens()
|
||||
{
|
||||
// Arrange
|
||||
var text = "version 123 release 2024";
|
||||
|
||||
// Act
|
||||
var tokens = _scorer.Tokenize(text);
|
||||
|
||||
// Assert - pure numeric tokens should be removed
|
||||
tokens.Should().NotContain("123");
|
||||
tokens.Should().NotContain("2024");
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void Tokenize_KeepsAlphanumericTokens()
|
||||
{
|
||||
// Arrange
|
||||
var text = "CVE2024 log4j2 spring4shell";
|
||||
|
||||
// Act
|
||||
var tokens = _scorer.Tokenize(text);
|
||||
|
||||
// Assert - alphanumeric tokens should be kept
|
||||
tokens.Should().Contain("cve2024");
|
||||
tokens.Should().Contain("log4j2");
|
||||
tokens.Should().Contain("spring4shell");
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void Tokenize_IsDeterministic()
|
||||
{
|
||||
// Arrange
|
||||
var text = "Memory corruption in JSON parser leads to arbitrary code execution";
|
||||
|
||||
// Act
|
||||
var tokens1 = _scorer.Tokenize(text);
|
||||
var tokens2 = _scorer.Tokenize(text);
|
||||
|
||||
// Assert
|
||||
tokens1.Should().BeEquivalentTo(tokens2, options => options.WithStrictOrdering());
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void Tokenize_SortsTokensForDeterminism()
|
||||
{
|
||||
// Arrange
|
||||
var text = "zebra alpha memory parser";
|
||||
|
||||
// Act
|
||||
var tokens = _scorer.Tokenize(text);
|
||||
|
||||
// Assert - tokens should be sorted alphabetically
|
||||
tokens.Should().BeInAscendingOrder();
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Pairwise Similarity Tests
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void ComputePairwiseSimilarity_IdenticalTexts_ReturnsOne()
|
||||
{
|
||||
// Arrange
|
||||
var text = "A heap-based buffer overflow in libpng allows remote attackers to execute arbitrary code";
|
||||
|
||||
// Act
|
||||
var similarity = _scorer.ComputePairwiseSimilarity(text, text);
|
||||
|
||||
// Assert
|
||||
similarity.Should().BeApproximately(1.0, 0.01);
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void ComputePairwiseSimilarity_CompletelyDifferent_ReturnsLowScore()
|
||||
{
|
||||
// Arrange
|
||||
var text1 = "SQL injection in database query handler";
|
||||
var text2 = "Memory corruption in graphics renderer";
|
||||
|
||||
// Act
|
||||
var similarity = _scorer.ComputePairwiseSimilarity(text1, text2);
|
||||
|
||||
// Assert
|
||||
similarity.Should().BeLessThan(0.3);
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void ComputePairwiseSimilarity_SimilarDescriptions_ReturnsPositiveScore()
|
||||
{
|
||||
// Arrange - same vulnerability described differently
|
||||
var text1 = "A heap-based buffer overflow in the PNG image parser allows remote code execution";
|
||||
var text2 = "Remote code execution via heap buffer overflow in PNG image processing library";
|
||||
|
||||
// Act
|
||||
var similarity = _scorer.ComputePairwiseSimilarity(text1, text2);
|
||||
|
||||
// Assert - TF-IDF similarity for short texts with stop words removed
|
||||
// is typically moderate (0.2-0.5 range)
|
||||
similarity.Should().BeGreaterThan(0.2);
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void ComputePairwiseSimilarity_EmptyFirst_ReturnsZero()
|
||||
{
|
||||
// Act
|
||||
var similarity = _scorer.ComputePairwiseSimilarity("", "some text here");
|
||||
|
||||
// Assert
|
||||
similarity.Should().Be(0.0);
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void ComputePairwiseSimilarity_EmptySecond_ReturnsZero()
|
||||
{
|
||||
// Act
|
||||
var similarity = _scorer.ComputePairwiseSimilarity("some text here", "");
|
||||
|
||||
// Assert
|
||||
similarity.Should().Be(0.0);
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void ComputePairwiseSimilarity_OnlyStopWords_ReturnsZero()
|
||||
{
|
||||
// Arrange - text with only stop words
|
||||
var text1 = "the and or but";
|
||||
var text2 = "the and or but";
|
||||
|
||||
// Act
|
||||
var similarity = _scorer.ComputePairwiseSimilarity(text1, text2);
|
||||
|
||||
// Assert - no tokens after stop word removal
|
||||
similarity.Should().Be(0.0);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Average Similarity Tests
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void ComputeAverageSimilarity_SingleDescription_ReturnsZero()
|
||||
{
|
||||
// Arrange
|
||||
var descriptions = new[] { "Only one description here" };
|
||||
|
||||
// Act
|
||||
var similarity = _scorer.ComputeAverageSimilarity(descriptions);
|
||||
|
||||
// Assert
|
||||
similarity.Should().Be(0.0);
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void ComputeAverageSimilarity_EmptyCollection_ReturnsZero()
|
||||
{
|
||||
// Act
|
||||
var similarity = _scorer.ComputeAverageSimilarity(Array.Empty<string>());
|
||||
|
||||
// Assert
|
||||
similarity.Should().Be(0.0);
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void ComputeAverageSimilarity_IdenticalDescriptions_ReturnsOne()
|
||||
{
|
||||
// Arrange
|
||||
var description = "A critical buffer overflow vulnerability in the image processing library";
|
||||
var descriptions = new[] { description, description, description };
|
||||
|
||||
// Act
|
||||
var similarity = _scorer.ComputeAverageSimilarity(descriptions);
|
||||
|
||||
// Assert
|
||||
similarity.Should().BeApproximately(1.0, 0.01);
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void ComputeAverageSimilarity_MixedSimilarity_ReturnsReasonableAverage()
|
||||
{
|
||||
// Arrange - three descriptions about the same CVE from different sources
|
||||
var descriptions = new[]
|
||||
{
|
||||
"A heap-based buffer overflow in libpng before 1.6.37 allows remote attackers to cause denial of service",
|
||||
"Buffer overflow vulnerability in PNG library (libpng) can be exploited by remote attackers for DoS",
|
||||
"libpng contains a heap overflow that may lead to denial of service when processing malformed PNG files"
|
||||
};
|
||||
|
||||
// Act
|
||||
var similarity = _scorer.ComputeAverageSimilarity(descriptions);
|
||||
|
||||
// Assert - TF-IDF similarity for related security texts typically
|
||||
// produces moderate scores (0.1-0.4 range) after stop word removal
|
||||
similarity.Should().BeGreaterThan(0.1);
|
||||
similarity.Should().BeLessThanOrEqualTo(1.0);
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void ComputeAverageSimilarity_SkipsEmptyDescriptions()
|
||||
{
|
||||
// Arrange
|
||||
var descriptions = new[]
|
||||
{
|
||||
"A critical vulnerability in the parser",
|
||||
"",
|
||||
null!,
|
||||
" ",
|
||||
"A critical vulnerability in the parser"
|
||||
};
|
||||
|
||||
// Act
|
||||
var similarity = _scorer.ComputeAverageSimilarity(descriptions);
|
||||
|
||||
// Assert - should only consider non-empty descriptions
|
||||
similarity.Should().BeApproximately(1.0, 0.01);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Options Tests
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void TextSimilarityOptions_DefaultValues_AreCorrect()
|
||||
{
|
||||
// Arrange & Act
|
||||
var options = new TextSimilarityOptions();
|
||||
|
||||
// Assert
|
||||
options.Enabled.Should().BeFalse();
|
||||
options.Weight.Should().Be(0.05);
|
||||
options.MinTokenLength.Should().Be(3);
|
||||
options.CustomStopWords.Should().BeNull();
|
||||
options.EnableStemming.Should().BeFalse();
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void TextSimilarityOptions_SectionName_IsCorrect()
|
||||
{
|
||||
// Assert
|
||||
TextSimilarityOptions.SectionName.Should().Be("Concelier:Correlation:TextSimilarity");
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void Scorer_WithCustomStopWords_UsesCustomList()
|
||||
{
|
||||
// Arrange
|
||||
var options = new TextSimilarityOptions
|
||||
{
|
||||
CustomStopWords = new[] { "custom", "stop", "words" }
|
||||
};
|
||||
var scorer = new TextSimilarityScorer(options);
|
||||
|
||||
// Act
|
||||
var tokens = scorer.Tokenize("custom stop words remain here");
|
||||
|
||||
// Assert - custom stop words should be removed
|
||||
tokens.Should().NotContain("custom");
|
||||
tokens.Should().NotContain("stop");
|
||||
tokens.Should().NotContain("words");
|
||||
tokens.Should().Contain("remain");
|
||||
tokens.Should().Contain("here");
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Real-World Description Fixtures
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Theory]
|
||||
[MemberData(nameof(RealWorldDescriptionFixtures))]
|
||||
public void ComputeAverageSimilarity_RealWorldFixtures_ReturnsExpectedRange(
|
||||
string[] descriptions,
|
||||
double minExpected,
|
||||
double maxExpected,
|
||||
string scenario)
|
||||
{
|
||||
// Act
|
||||
var similarity = _scorer.ComputeAverageSimilarity(descriptions);
|
||||
|
||||
// Assert
|
||||
similarity.Should().BeGreaterThanOrEqualTo(minExpected,
|
||||
because: $"scenario '{scenario}' should have similarity >= {minExpected}");
|
||||
similarity.Should().BeLessThanOrEqualTo(maxExpected,
|
||||
because: $"scenario '{scenario}' should have similarity <= {maxExpected}");
|
||||
}
|
||||
|
||||
public static IEnumerable<object[]> RealWorldDescriptionFixtures()
|
||||
{
|
||||
// CVE-2021-44228 (Log4Shell) - same vulnerability, different sources
|
||||
// TF-IDF similarity for related security texts is typically 0.1-0.5
|
||||
yield return new object[]
|
||||
{
|
||||
new[]
|
||||
{
|
||||
"Apache Log4j2 2.0-beta9 through 2.15.0 (excluding security releases 2.12.2, 2.12.3, and 2.3.1) JNDI features used in configuration, log messages, and parameters do not protect against attacker controlled LDAP and other JNDI related endpoints.",
|
||||
"A flaw was found in the Java logging library Apache Log4j in version 2.x. When configured to use a JNDI URL with a LDAP scheme, an attacker can execute arbitrary code.",
|
||||
"Remote code execution vulnerability in Apache Log4j2 allows attackers to execute arbitrary code via JNDI lookup in log messages."
|
||||
},
|
||||
0.05, 0.9, "Log4Shell - same CVE, different sources"
|
||||
};
|
||||
|
||||
// Unrelated vulnerabilities - should have low similarity
|
||||
yield return new object[]
|
||||
{
|
||||
new[]
|
||||
{
|
||||
"SQL injection vulnerability in the login form allows authentication bypass",
|
||||
"Cross-site scripting (XSS) in the comments section enables script injection",
|
||||
"Buffer overflow in image processing library causes denial of service"
|
||||
},
|
||||
0.0, 0.4, "Unrelated vulnerabilities"
|
||||
};
|
||||
|
||||
// Same library, different CVEs - moderate similarity
|
||||
yield return new object[]
|
||||
{
|
||||
new[]
|
||||
{
|
||||
"OpenSSL before 3.0.7 allows remote attackers to cause a denial of service via a crafted X.509 certificate",
|
||||
"OpenSSL 3.0.x before 3.0.5 contains a heap-based buffer overflow in the SM2 implementation",
|
||||
"A timing-based side channel in OpenSSL allows recovery of private key material"
|
||||
},
|
||||
0.05, 0.6, "Same library (OpenSSL), different CVEs"
|
||||
};
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Determinism Tests
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void ComputeAverageSimilarity_IsDeterministic()
|
||||
{
|
||||
// Arrange
|
||||
var descriptions = new[]
|
||||
{
|
||||
"A heap-based buffer overflow in libpng",
|
||||
"Buffer overflow in PNG library",
|
||||
"libpng heap overflow vulnerability"
|
||||
};
|
||||
|
||||
// Act
|
||||
var similarity1 = _scorer.ComputeAverageSimilarity(descriptions);
|
||||
var similarity2 = _scorer.ComputeAverageSimilarity(descriptions);
|
||||
var similarity3 = _scorer.ComputeAverageSimilarity(descriptions);
|
||||
|
||||
// Assert
|
||||
similarity1.Should().Be(similarity2);
|
||||
similarity2.Should().Be(similarity3);
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Unit)]
|
||||
[Fact]
|
||||
public void ComputePairwiseSimilarity_IsDeterministic()
|
||||
{
|
||||
// Arrange
|
||||
var text1 = "Memory corruption in JSON parser";
|
||||
var text2 = "JSON parser memory corruption vulnerability";
|
||||
|
||||
// Act
|
||||
var similarity1 = _scorer.ComputePairwiseSimilarity(text1, text2);
|
||||
var similarity2 = _scorer.ComputePairwiseSimilarity(text1, text2);
|
||||
|
||||
// Assert
|
||||
similarity1.Should().Be(similarity2);
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Performance benchmarks for <see cref="TextSimilarityScorer"/>.
|
||||
/// Target: <= 5ms per pair.
|
||||
/// </summary>
|
||||
public class TextSimilarityScorerBenchmarks
|
||||
{
|
||||
private readonly TextSimilarityScorer _scorer = new();
|
||||
|
||||
[Trait("Category", TestCategories.Performance)]
|
||||
[Fact]
|
||||
public void ComputePairwiseSimilarity_MeetsPerformanceTarget()
|
||||
{
|
||||
// Arrange - realistic vulnerability descriptions
|
||||
var text1 = "A heap-based buffer overflow vulnerability has been discovered in the image processing library libpng version 1.6.37. Remote attackers can exploit this flaw by providing specially crafted PNG files, potentially leading to arbitrary code execution or denial of service conditions.";
|
||||
var text2 = "The PNG image handling library (libpng) contains a buffer overflow vulnerability in the row processing function. Exploitation of this issue allows attackers to execute arbitrary code in the context of the application using the affected library.";
|
||||
|
||||
// Warmup
|
||||
for (var i = 0; i < 10; i++)
|
||||
{
|
||||
_scorer.ComputePairwiseSimilarity(text1, text2);
|
||||
}
|
||||
|
||||
// Act - measure 100 iterations
|
||||
var sw = Stopwatch.StartNew();
|
||||
const int iterations = 100;
|
||||
|
||||
for (var i = 0; i < iterations; i++)
|
||||
{
|
||||
_scorer.ComputePairwiseSimilarity(text1, text2);
|
||||
}
|
||||
|
||||
sw.Stop();
|
||||
var averageMs = sw.Elapsed.TotalMilliseconds / iterations;
|
||||
|
||||
// Assert - target: <= 5ms per pair
|
||||
averageMs.Should().BeLessThanOrEqualTo(5.0,
|
||||
because: $"text similarity computation should complete within 5ms per pair (actual: {averageMs:F3} ms)");
|
||||
}
|
||||
|
||||
[Trait("Category", TestCategories.Performance)]
|
||||
[Fact]
|
||||
public void ComputeAverageSimilarity_FiveDescriptions_MeetsPerformanceTarget()
|
||||
{
|
||||
// Arrange - 5 descriptions = 10 pairs
|
||||
var descriptions = new[]
|
||||
{
|
||||
"Apache Log4j2 JNDI features do not protect against attacker controlled LDAP endpoints",
|
||||
"A flaw in Log4j in version 2.x allows attackers to execute arbitrary code via JNDI lookup",
|
||||
"Remote code execution in Apache Log4j2 via malicious JNDI lookup patterns",
|
||||
"Log4j2 vulnerability allows remote attackers to execute code through JNDI injection",
|
||||
"Critical RCE vulnerability in Apache Log4j2 logging library through JNDI features"
|
||||
};
|
||||
|
||||
// Warmup
|
||||
for (var i = 0; i < 10; i++)
|
||||
{
|
||||
_scorer.ComputeAverageSimilarity(descriptions);
|
||||
}
|
||||
|
||||
// Act
|
||||
var sw = Stopwatch.StartNew();
|
||||
const int iterations = 100;
|
||||
|
||||
for (var i = 0; i < iterations; i++)
|
||||
{
|
||||
_scorer.ComputeAverageSimilarity(descriptions);
|
||||
}
|
||||
|
||||
sw.Stop();
|
||||
var averageMs = sw.Elapsed.TotalMilliseconds / iterations;
|
||||
var pairsPerCall = 10; // C(5,2) = 10 pairs
|
||||
var msPerPair = averageMs / pairsPerCall;
|
||||
|
||||
// Assert - target: <= 5ms per pair
|
||||
msPerPair.Should().BeLessThanOrEqualTo(5.0,
|
||||
because: $"text similarity computation should complete within 5ms per pair (actual: {msPerPair:F3} ms)");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user