// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright © 2025 StellaOps // Sprint: SPRINT_8200_0012_0003_policy_engine_integration // Task: PINT-8200-043 - Attestation reproducibility test: verify EWS proofs validate using FluentAssertions; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; using StellaOps.Policy.Engine.Scoring.EvidenceWeightedScore; using StellaOps.Signals.EvidenceWeightedScore; using StellaOps.Signals.EvidenceWeightedScore.Normalizers; using System.Security.Cryptography; using System.Text; using System.Text.Json; using Xunit; namespace StellaOps.Policy.Engine.Tests.Integration; /// /// Attestation reproducibility tests verifying that EWS proofs validate correctly. /// Tests that scoring decisions can be reproduced and verified for audit purposes. /// [Trait("Category", "Attestation")] [Trait("Category", "Integration")] [Trait("Sprint", "8200.0012.0003")] [Trait("Task", "PINT-8200-043")] public sealed class EwsAttestationReproducibilityTests { private static ServiceCollection CreateServicesWithConfiguration() { var services = new ServiceCollection(); var configuration = new ConfigurationBuilder() .AddInMemoryCollection() .Build(); services.AddSingleton(configuration); return services; } #region Policy Digest Reproducibility Tests [Fact(DisplayName = "Policy digest is reproducible for same policy")] public void PolicyDigest_IsReproducible_ForSamePolicy() { // Arrange var policy1 = EvidenceWeightPolicy.DefaultProduction; var policy2 = EvidenceWeightPolicy.DefaultProduction; // Act var digest1 = policy1.ComputeDigest(); var digest2 = policy2.ComputeDigest(); // Assert digest1.Should().Be(digest2, "same policy should produce same digest"); digest1.Should().HaveLength(64, "SHA256 hex digest should be 64 chars"); } [Fact(DisplayName = "Policy digest changes when weights change")] public void PolicyDigest_Changes_WhenWeightsChange() { // Arrange var policy1 = EvidenceWeightPolicy.DefaultProduction; var policy2 = new EvidenceWeightPolicy { Version = "ews.v1", Profile = "modified", Weights = new EvidenceWeights { Rch = 0.35, // Changed Rts = 0.25, Bkp = 0.15, Xpl = 0.15, Src = 0.10, Mit = 0.10 } }; // Act var digest1 = policy1.ComputeDigest(); var digest2 = policy2.ComputeDigest(); // Assert digest1.Should().NotBe(digest2, "different policies should produce different digests"); } [Fact(DisplayName = "Policy canonical JSON is deterministic")] public void PolicyCanonicalJson_IsDeterministic() { // Arrange var policy = EvidenceWeightPolicy.DefaultProduction; // Act - Get canonical JSON multiple times var json1 = policy.GetCanonicalJson(); var json2 = policy.GetCanonicalJson(); var json3 = policy.GetCanonicalJson(); // Assert json1.Should().Be(json2, "canonical JSON should be deterministic"); json2.Should().Be(json3, "canonical JSON should be deterministic"); } #endregion #region Score Calculation Reproducibility Tests [Fact(DisplayName = "Score calculation is reproducible with same inputs and policy")] public void ScoreCalculation_IsReproducible_WithSameInputsAndPolicy() { // Arrange var calculator = new EvidenceWeightedScoreCalculator(); var input = CreateTestInput("reproducible-score-test"); var policy = EvidenceWeightPolicy.DefaultProduction; // Act var result1 = calculator.Calculate(input, policy); var result2 = calculator.Calculate(input, policy); // Assert - Everything should match exactly result1.Score.Should().Be(result2.Score); result1.Bucket.Should().Be(result2.Bucket); result1.PolicyDigest.Should().Be(result2.PolicyDigest); result1.Flags.Should().BeEquivalentTo(result2.Flags); } [Fact(DisplayName = "Score result contains valid policy digest")] public void ScoreResult_ContainsValidPolicyDigest() { // Arrange var calculator = new EvidenceWeightedScoreCalculator(); var input = CreateTestInput("policy-digest-in-result"); var policy = EvidenceWeightPolicy.DefaultProduction; // Act var result = calculator.Calculate(input, policy); // Assert result.PolicyDigest.Should().NotBeNullOrEmpty("result should contain policy digest"); result.PolicyDigest.Should().Be(policy.ComputeDigest(), "result's policy digest should match the policy used"); } [Fact(DisplayName = "Score can be verified by recalculating with same inputs")] public void Score_CanBeVerified_ByRecalculating() { // Arrange - Original calculation var calculator = new EvidenceWeightedScoreCalculator(); var input = CreateTestInput("verification-test"); var policy = EvidenceWeightPolicy.DefaultProduction; var original = calculator.Calculate(input, policy); // Create a "proof" structure that could be stored/transmitted var proof = new { FindingId = original.FindingId, Score = original.Score, Bucket = original.Bucket, PolicyDigest = original.PolicyDigest, Inputs = original.Inputs }; // Act - Verification: recalculate with same inputs and verify var recreatedInput = new EvidenceWeightedScoreInput { FindingId = proof.FindingId, Rch = proof.Inputs.Rch, Rts = proof.Inputs.Rts, Bkp = proof.Inputs.Bkp, Xpl = proof.Inputs.Xpl, Src = proof.Inputs.Src, Mit = proof.Inputs.Mit }; var verification = calculator.Calculate(recreatedInput, policy); // Assert - Verification should produce identical results verification.Score.Should().Be(proof.Score, "verified score should match original"); verification.Bucket.Should().Be(proof.Bucket, "verified bucket should match original"); verification.PolicyDigest.Should().Be(proof.PolicyDigest, "policy digest should match"); } #endregion #region Enrichment Chain Reproducibility Tests [Fact(DisplayName = "Enrichment result contains reproducibility information")] public void EnrichmentResult_ContainsReproducibilityInfo() { // Arrange var services = CreateServicesWithConfiguration(); services.AddEvidenceWeightedScoring(); services.AddEvidenceNormalizers(); services.AddEvidenceWeightedScore(opts => opts.Enabled = true); var provider = services.BuildServiceProvider(); var enricher = provider.GetRequiredService(); var evidence = CreateTestEvidence("reproducibility-info-test"); // Act var result = enricher.Enrich(evidence); // Assert result.IsSuccess.Should().BeTrue(); result.Score.Should().NotBeNull(); result.Score!.PolicyDigest.Should().NotBeNullOrEmpty("score should include policy digest for verification"); result.Score!.Inputs.Should().NotBeNull("score should include inputs for reproducibility"); } [Fact(DisplayName = "Enrichment is reproducible for same evidence")] public void Enrichment_IsReproducible_ForSameEvidence() { // Arrange var services = CreateServicesWithConfiguration(); services.AddEvidenceWeightedScoring(); services.AddEvidenceNormalizers(); services.AddEvidenceWeightedScore(opts => { opts.Enabled = true; opts.EnableCaching = false; // Disable caching to test actual reproducibility }); var provider = services.BuildServiceProvider(); var enricher = provider.GetRequiredService(); var evidence = CreateTestEvidence("enrichment-reproducibility"); // Act - Multiple enrichments var results = Enumerable.Range(0, 10) .Select(_ => enricher.Enrich(evidence)) .ToList(); // Assert - All should be identical var first = results[0]; results.Should().AllSatisfy(r => { r.Score!.Score.Should().Be(first.Score!.Score); r.Score!.PolicyDigest.Should().Be(first.Score!.PolicyDigest); r.Score!.Bucket.Should().Be(first.Score!.Bucket); }); } #endregion #region Attestation Proof Structure Tests [Fact(DisplayName = "Score proof contains all required verification fields")] public void ScoreProof_ContainsAllRequiredFields() { // Arrange var calculator = new EvidenceWeightedScoreCalculator(); var input = CreateTestInput("proof-fields-test"); var policy = EvidenceWeightPolicy.DefaultProduction; // Act var result = calculator.Calculate(input, policy); // Assert - All fields needed for verification are present result.FindingId.Should().NotBeNullOrEmpty("finding ID required for correlation"); result.Score.Should().BeInRange(0, 100, "score required for verdict"); result.Bucket.Should().BeDefined("bucket required for triage"); result.PolicyDigest.Should().NotBeNullOrEmpty("policy digest required for version tracking"); result.Inputs.Should().NotBeNull("inputs required for reproducibility"); result.Weights.Should().NotBeNull("weights required for audit"); result.CalculatedAt.Should().NotBe(default, "timestamp required for audit trail"); } [Fact(DisplayName = "Score proof is JSON serializable for attestation")] public void ScoreProof_IsJsonSerializable() { // Arrange var calculator = new EvidenceWeightedScoreCalculator(); var input = CreateTestInput("json-serialization-test"); var result = calculator.Calculate(input, EvidenceWeightPolicy.DefaultProduction); // Act var json = JsonSerializer.Serialize(result); var deserialized = JsonSerializer.Deserialize(json); // Assert json.Should().NotBeNullOrEmpty(); deserialized.Should().NotBeNull(); deserialized!.Score.Should().Be(result.Score); deserialized.PolicyDigest.Should().Be(result.PolicyDigest); deserialized.FindingId.Should().Be(result.FindingId); } [Fact(DisplayName = "Score proof hash is reproducible")] public void ScoreProofHash_IsReproducible() { // Arrange var calculator = new EvidenceWeightedScoreCalculator(); var input = CreateTestInput("proof-hash-test"); var policy = EvidenceWeightPolicy.DefaultProduction; // Act - Calculate twice and compute hash of each var result1 = calculator.Calculate(input, policy); var result2 = calculator.Calculate(input, policy); var hash1 = ComputeProofHash(result1); var hash2 = ComputeProofHash(result2); // Assert hash1.Should().Be(hash2, "proof hash should be reproducible"); } #endregion #region Cross-Instance Reproducibility Tests [Fact(DisplayName = "Different calculator instances produce same results")] public void DifferentCalculatorInstances_ProduceSameResults() { // Arrange var calculator1 = new EvidenceWeightedScoreCalculator(); var calculator2 = new EvidenceWeightedScoreCalculator(); var input = CreateTestInput("cross-instance-test"); var policy = EvidenceWeightPolicy.DefaultProduction; // Act var result1 = calculator1.Calculate(input, policy); var result2 = calculator2.Calculate(input, policy); // Assert result1.Score.Should().Be(result2.Score); result1.Bucket.Should().Be(result2.Bucket); result1.PolicyDigest.Should().Be(result2.PolicyDigest); } [Fact(DisplayName = "Different service provider instances produce same results")] public void DifferentServiceProviderInstances_ProduceSameResults() { // Arrange - Two independent service providers var services1 = CreateServicesWithConfiguration(); services1.AddEvidenceWeightedScoring(); services1.AddEvidenceNormalizers(); services1.AddEvidenceWeightedScore(opts => opts.Enabled = true); var provider1 = services1.BuildServiceProvider(); var services2 = CreateServicesWithConfiguration(); services2.AddEvidenceWeightedScoring(); services2.AddEvidenceNormalizers(); services2.AddEvidenceWeightedScore(opts => opts.Enabled = true); var provider2 = services2.BuildServiceProvider(); var enricher1 = provider1.GetRequiredService(); var enricher2 = provider2.GetRequiredService(); var evidence = CreateTestEvidence("cross-provider-test"); // Act var result1 = enricher1.Enrich(evidence); var result2 = enricher2.Enrich(evidence); // Assert result1.Score!.Score.Should().Be(result2.Score!.Score); result1.Score!.PolicyDigest.Should().Be(result2.Score!.PolicyDigest); } #endregion #region Timestamp and Audit Trail Tests [Fact(DisplayName = "Calculation timestamp is captured")] public void CalculationTimestamp_IsCaptured() { // Arrange var calculator = new EvidenceWeightedScoreCalculator(); var input = CreateTestInput("timestamp-test"); var before = DateTimeOffset.UtcNow; // Act var result = calculator.Calculate(input, EvidenceWeightPolicy.DefaultProduction); var after = DateTimeOffset.UtcNow; // Assert result.CalculatedAt.Should().BeOnOrAfter(before); result.CalculatedAt.Should().BeOnOrBefore(after); } [Fact(DisplayName = "Breakdown provides audit trail for score components")] public void Breakdown_ProvidesAuditTrail() { // Arrange var calculator = new EvidenceWeightedScoreCalculator(); var input = CreateTestInput("breakdown-audit"); // Act var result = calculator.Calculate(input, EvidenceWeightPolicy.DefaultProduction); // Assert result.Breakdown.Should().NotBeEmpty("breakdown should explain score composition"); // Each dimension should be accounted for result.Breakdown.Should().HaveCountGreaterThanOrEqualTo(1); } [Fact(DisplayName = "Explanations provide human-readable audit information")] public void Explanations_ProvideHumanReadableAudit() { // Arrange var calculator = new EvidenceWeightedScoreCalculator(); var input = new EvidenceWeightedScoreInput { FindingId = "explanation-test", Rch = 0.9, // High reachability Rts = 0.8, // High runtime Bkp = 0.2, // Low backport Xpl = 0.7, // High exploit Src = 0.6, Mit = 0.1 // Low mitigation }; // Act var result = calculator.Calculate(input, EvidenceWeightPolicy.DefaultProduction); // Assert - Should have explanations for high-risk findings result.Explanations.Should().NotBeEmpty("high-risk input should generate explanations"); } #endregion #region Test Helpers private static EvidenceWeightedScoreInput CreateTestInput(string findingId) { return new EvidenceWeightedScoreInput { FindingId = findingId, Rch = 0.70, Rts = 0.55, Bkp = 0.35, Xpl = 0.50, Src = 0.60, Mit = 0.20 }; } private static FindingEvidence CreateTestEvidence(string findingId) { return new FindingEvidence { FindingId = findingId, Reachability = new ReachabilityInput { State = StellaOps.Signals.EvidenceWeightedScore.ReachabilityState.DynamicReachable, Confidence = 0.80 }, Runtime = new RuntimeInput { Posture = StellaOps.Signals.EvidenceWeightedScore.RuntimePosture.ActiveTracing, ObservationCount = 3, RecencyFactor = 0.70 }, Exploit = new ExploitInput { EpssScore = 0.40, EpssPercentile = 70, KevStatus = KevStatus.NotInKev, PublicExploitAvailable = false } }; } private static string ComputeProofHash(EvidenceWeightedScoreResult result) { // Hash the critical reproducibility fields var proofData = $"{result.FindingId}:{result.Score}:{result.Bucket}:{result.PolicyDigest}:" + $"{result.Inputs.Rch}:{result.Inputs.Rts}:{result.Inputs.Bkp}:" + $"{result.Inputs.Xpl}:{result.Inputs.Src}:{result.Inputs.Mit}"; var bytes = SHA256.HashData(Encoding.UTF8.GetBytes(proofData)); return Convert.ToHexStringLower(bytes); } #endregion }