save progress

This commit is contained in:
StellaOps Bot
2026-01-06 09:42:02 +02:00
parent 94d68bee8b
commit 37e11918e0
443 changed files with 85863 additions and 897 deletions

View File

@@ -0,0 +1,146 @@
// <copyright file="Models.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
// Sprint: SPRINT_20260105_002_004_TEST_policy_explainability
// Task: PEXP-009, PEXP-010
using System.Collections.Immutable;
using System.Text.RegularExpressions;
namespace StellaOps.Testing.Policy;
/// <summary>
/// Represents a versioned policy configuration.
/// </summary>
/// <param name="VersionId">Unique version identifier (e.g., commit hash or version tag).</param>
/// <param name="PolicyType">Type of policy (e.g., "K4Lattice", "VexPrecedence", "RiskScoring").</param>
/// <param name="Parameters">Policy parameters.</param>
/// <param name="CreatedAt">When this version was created.</param>
public sealed record PolicyVersion(
string VersionId,
string PolicyType,
ImmutableDictionary<string, string> Parameters,
DateTimeOffset CreatedAt);
/// <summary>
/// A test input for policy evaluation.
/// </summary>
/// <param name="InputId">Unique identifier for this test input.</param>
/// <param name="Description">Human-readable description.</param>
/// <param name="Input">The actual input data.</param>
/// <param name="ExpectedOutcome">Optional expected outcome for assertion.</param>
public sealed record PolicyTestInput(
string InputId,
string Description,
object Input,
string? ExpectedOutcome = null);
/// <summary>
/// Result of evaluating a policy.
/// </summary>
/// <param name="Outcome">The outcome value.</param>
/// <param name="Score">Numeric score if applicable.</param>
/// <param name="ContributingFactors">Factors that contributed to the outcome.</param>
/// <param name="EvaluatedAt">When the evaluation occurred.</param>
public sealed record PolicyEvaluationResult(
string Outcome,
decimal Score,
ImmutableArray<string> ContributingFactors,
DateTimeOffset EvaluatedAt);
/// <summary>
/// Result of computing behavioral diff between policies.
/// </summary>
/// <param name="BaselinePolicy">The baseline policy version.</param>
/// <param name="NewPolicy">The new policy version.</param>
/// <param name="TotalInputsTested">Total number of inputs tested.</param>
/// <param name="InputsWithChangedBehavior">Number of inputs with changed behavior.</param>
/// <param name="Diffs">Individual input differences.</param>
/// <param name="Summary">Human-readable summary.</param>
public sealed record PolicyDiffResult(
PolicyVersion BaselinePolicy,
PolicyVersion NewPolicy,
int TotalInputsTested,
int InputsWithChangedBehavior,
ImmutableArray<PolicyInputDiff> Diffs,
string Summary);
/// <summary>
/// Difference in behavior for a single input.
/// </summary>
/// <param name="InputId">The input that changed.</param>
/// <param name="InputDescription">Description of the input.</param>
/// <param name="BaselineOutcome">Outcome with baseline policy.</param>
/// <param name="NewOutcome">Outcome with new policy.</param>
/// <param name="Delta">Details of the change.</param>
public sealed record PolicyInputDiff(
string InputId,
string InputDescription,
PolicyEvaluationResult BaselineOutcome,
PolicyEvaluationResult NewOutcome,
PolicyDelta Delta);
/// <summary>
/// Details of a behavioral change between policies.
/// </summary>
/// <param name="OutcomeChanged">Whether the outcome value changed.</param>
/// <param name="BaselineOutcome">Previous outcome.</param>
/// <param name="NewOutcome">New outcome.</param>
/// <param name="ScoreDelta">Change in score.</param>
/// <param name="AddedFactors">Factors added in new policy.</param>
/// <param name="RemovedFactors">Factors removed from baseline.</param>
/// <param name="ChangedFactors">Factors with changed values.</param>
public sealed record PolicyDelta(
bool OutcomeChanged,
string BaselineOutcome,
string NewOutcome,
decimal ScoreDelta,
ImmutableArray<string> AddedFactors,
ImmutableArray<string> RemovedFactors,
ImmutableArray<FactorChange> ChangedFactors);
/// <summary>
/// A change in a contributing factor.
/// </summary>
/// <param name="FactorId">Factor identifier.</param>
/// <param name="ChangeType">Type of change (e.g., "WeightChanged", "ThresholdChanged").</param>
/// <param name="OldValue">Previous value.</param>
/// <param name="NewValue">New value.</param>
public sealed record FactorChange(
string FactorId,
string ChangeType,
string OldValue,
string NewValue);
/// <summary>
/// Expected policy diff for regression testing.
/// </summary>
/// <param name="BaselineVersion">Baseline policy version.</param>
/// <param name="NewVersion">New policy version.</param>
/// <param name="ExpectedDiffs">Expected behavioral changes.</param>
public sealed record ExpectedPolicyDiff(
string BaselineVersion,
string NewVersion,
ImmutableArray<ExpectedInputChange> ExpectedDiffs);
/// <summary>
/// Expected change for a specific input.
/// </summary>
/// <param name="InputId">The input identifier.</param>
/// <param name="ExpectedOutcome">Expected new outcome.</param>
/// <param name="Justification">Why this change is expected.</param>
public sealed record ExpectedInputChange(
string InputId,
string ExpectedOutcome,
string Justification);
/// <summary>
/// Allowed policy change for regression testing.
/// </summary>
/// <param name="InputPattern">Regex pattern matching allowed input IDs.</param>
/// <param name="AllowedOutcomes">Allowed outcome values (empty means any).</param>
/// <param name="Justification">Why this change is allowed.</param>
public sealed record AllowedPolicyChange(
Regex InputPattern,
ImmutableArray<string> AllowedOutcomes,
string Justification);

View File

@@ -0,0 +1,213 @@
// <copyright file="PolicyDiffEngine.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
// Sprint: SPRINT_20260105_002_004_TEST_policy_explainability
// Task: PEXP-010
using System.Collections.Immutable;
using Microsoft.Extensions.Logging;
namespace StellaOps.Testing.Policy;
/// <summary>
/// Computes behavioral diff between policy versions.
/// </summary>
public sealed class PolicyDiffEngine
{
private readonly IPolicyEvaluator _evaluator;
private readonly ILogger<PolicyDiffEngine> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="PolicyDiffEngine"/> class.
/// </summary>
/// <param name="evaluator">Policy evaluator.</param>
/// <param name="logger">Logger instance.</param>
public PolicyDiffEngine(IPolicyEvaluator evaluator, ILogger<PolicyDiffEngine> logger)
{
_evaluator = evaluator;
_logger = logger;
}
/// <summary>
/// Compute behavioral diff for a set of test inputs.
/// </summary>
/// <param name="baselinePolicy">Baseline policy version.</param>
/// <param name="newPolicy">New policy version.</param>
/// <param name="testInputs">Test inputs to evaluate.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Policy diff result.</returns>
public async Task<PolicyDiffResult> ComputeDiffAsync(
PolicyVersion baselinePolicy,
PolicyVersion newPolicy,
IEnumerable<PolicyTestInput> testInputs,
CancellationToken ct = default)
{
var inputList = testInputs.ToList();
var diffs = new List<PolicyInputDiff>();
_logger.LogInformation(
"Computing policy diff: {BaselineVersion} -> {NewVersion}, {InputCount} inputs",
baselinePolicy.VersionId, newPolicy.VersionId, inputList.Count);
foreach (var input in inputList)
{
ct.ThrowIfCancellationRequested();
// Evaluate with baseline policy
var baselineResult = await _evaluator.EvaluateAsync(
input.Input, baselinePolicy, ct);
// Evaluate with new policy
var newResult = await _evaluator.EvaluateAsync(
input.Input, newPolicy, ct);
if (!ResultsEqual(baselineResult, newResult))
{
var delta = ComputeDelta(baselineResult, newResult);
diffs.Add(new PolicyInputDiff(
InputId: input.InputId,
InputDescription: input.Description,
BaselineOutcome: baselineResult,
NewOutcome: newResult,
Delta: delta));
_logger.LogDebug(
"Input '{InputId}' changed: {Baseline} -> {New}",
input.InputId, baselineResult.Outcome, newResult.Outcome);
}
}
var summary = GenerateSummary(baselinePolicy, newPolicy, diffs);
_logger.LogInformation(
"Policy diff complete: {ChangedCount}/{TotalCount} inputs changed",
diffs.Count, inputList.Count);
return new PolicyDiffResult(
BaselinePolicy: baselinePolicy,
NewPolicy: newPolicy,
TotalInputsTested: inputList.Count,
InputsWithChangedBehavior: diffs.Count,
Diffs: [.. diffs],
Summary: summary);
}
private static bool ResultsEqual(PolicyEvaluationResult a, PolicyEvaluationResult b)
{
return a.Outcome == b.Outcome && a.Score == b.Score;
}
private static PolicyDelta ComputeDelta(
PolicyEvaluationResult baseline,
PolicyEvaluationResult newResult)
{
var addedFactors = newResult.ContributingFactors
.Except(baseline.ContributingFactors)
.ToImmutableArray();
var removedFactors = baseline.ContributingFactors
.Except(newResult.ContributingFactors)
.ToImmutableArray();
return new PolicyDelta(
OutcomeChanged: baseline.Outcome != newResult.Outcome,
BaselineOutcome: baseline.Outcome,
NewOutcome: newResult.Outcome,
ScoreDelta: newResult.Score - baseline.Score,
AddedFactors: addedFactors,
RemovedFactors: removedFactors,
ChangedFactors: []); // Factor changes require more detailed comparison
}
private static string GenerateSummary(
PolicyVersion baseline,
PolicyVersion newPolicy,
List<PolicyInputDiff> diffs)
{
if (diffs.Count == 0)
{
return $"No behavioral changes between {baseline.VersionId} and {newPolicy.VersionId}.";
}
var outcomeChanges = diffs.Count(d => d.Delta.OutcomeChanged);
var scoreOnlyChanges = diffs.Count - outcomeChanges;
var parts = new List<string>
{
$"{diffs.Count} input(s) changed behavior"
};
if (outcomeChanges > 0)
{
parts.Add($"{outcomeChanges} outcome change(s)");
}
if (scoreOnlyChanges > 0)
{
parts.Add($"{scoreOnlyChanges} score-only change(s)");
}
return string.Join(", ", parts) + ".";
}
}
/// <summary>
/// Interface for policy evaluation.
/// </summary>
public interface IPolicyEvaluator
{
/// <summary>
/// Evaluate an input with a specific policy version.
/// </summary>
/// <param name="input">The input to evaluate.</param>
/// <param name="policy">The policy version to use.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Evaluation result.</returns>
Task<PolicyEvaluationResult> EvaluateAsync(
object input,
PolicyVersion policy,
CancellationToken ct = default);
}
/// <summary>
/// Mock policy evaluator for testing.
/// </summary>
public sealed class MockPolicyEvaluator : IPolicyEvaluator
{
private readonly Dictionary<(string inputId, string policyVersion), PolicyEvaluationResult> _results = new();
/// <summary>
/// Configure a specific result for an input/policy combination.
/// </summary>
/// <param name="inputId">Input identifier.</param>
/// <param name="policyVersion">Policy version.</param>
/// <param name="result">The result to return.</param>
public void SetResult(string inputId, string policyVersion, PolicyEvaluationResult result)
{
_results[(inputId, policyVersion)] = result;
}
/// <inheritdoc/>
public Task<PolicyEvaluationResult> EvaluateAsync(
object input,
PolicyVersion policy,
CancellationToken ct = default)
{
var inputId = input is PolicyTestInput pti ? pti.InputId :
input is string s ? s :
input?.ToString() ?? "unknown";
if (_results.TryGetValue((inputId, policy.VersionId), out var result))
{
return Task.FromResult(result);
}
// Default result if not configured
return Task.FromResult(new PolicyEvaluationResult(
Outcome: "unknown",
Score: 0m,
ContributingFactors: [],
EvaluatedAt: DateTimeOffset.UtcNow));
}
}

View File

@@ -0,0 +1,190 @@
// <copyright file="PolicyRegressionTestBase.cs" company="StellaOps">
// Copyright (c) StellaOps. Licensed under AGPL-3.0-or-later.
// </copyright>
// Sprint: SPRINT_20260105_002_004_TEST_policy_explainability
// Task: PEXP-011
using System.Collections.Immutable;
using FluentAssertions;
using Microsoft.Extensions.Logging.Abstractions;
namespace StellaOps.Testing.Policy;
/// <summary>
/// Base class for policy regression tests.
/// </summary>
public abstract class PolicyRegressionTestBase
{
/// <summary>
/// Gets the policy diff engine.
/// </summary>
protected PolicyDiffEngine DiffEngine { get; private set; } = null!;
/// <summary>
/// Gets the policy evaluator.
/// </summary>
protected IPolicyEvaluator Evaluator { get; private set; } = null!;
/// <summary>
/// Initializes the test infrastructure.
/// </summary>
protected virtual void Initialize()
{
Evaluator = CreateEvaluator();
DiffEngine = new PolicyDiffEngine(
Evaluator,
NullLogger<PolicyDiffEngine>.Instance);
}
/// <summary>
/// Load a policy version by identifier.
/// </summary>
/// <param name="version">Version identifier (e.g., "v1", "previous", "current").</param>
/// <returns>Policy version.</returns>
protected abstract PolicyVersion LoadPolicy(string version);
/// <summary>
/// Get the standard test inputs for this policy type.
/// </summary>
/// <returns>Enumerable of test inputs.</returns>
protected abstract IEnumerable<PolicyTestInput> GetStandardTestInputs();
/// <summary>
/// Create the policy evaluator to use.
/// </summary>
/// <returns>Policy evaluator instance.</returns>
protected abstract IPolicyEvaluator CreateEvaluator();
/// <summary>
/// Load expected diff between two versions.
/// </summary>
/// <param name="diffId">Diff identifier (e.g., "v1-to-v2").</param>
/// <returns>Expected policy diff.</returns>
protected virtual ExpectedPolicyDiff? LoadExpectedDiff(string diffId)
{
// Default implementation returns null - subclasses can override
return null;
}
/// <summary>
/// Load allowed changes for regression testing.
/// </summary>
/// <returns>Collection of allowed changes.</returns>
protected virtual IEnumerable<AllowedPolicyChange> LoadAllowedChanges()
{
// Default: no changes allowed
return [];
}
/// <summary>
/// Assert that policy change produces only expected diffs.
/// </summary>
/// <param name="previousVersion">Previous policy version identifier.</param>
/// <param name="currentVersion">Current policy version identifier.</param>
/// <param name="expectedDiff">Expected diff (null to fail on any change).</param>
/// <param name="ct">Cancellation token.</param>
protected async Task AssertPolicyChangeProducesExpectedDiffAsync(
string previousVersion,
string currentVersion,
ExpectedPolicyDiff? expectedDiff,
CancellationToken ct = default)
{
var previousPolicy = LoadPolicy(previousVersion);
var currentPolicy = LoadPolicy(currentVersion);
var actualDiff = await DiffEngine.ComputeDiffAsync(
previousPolicy,
currentPolicy,
GetStandardTestInputs(),
ct);
if (expectedDiff is null)
{
actualDiff.InputsWithChangedBehavior.Should().Be(0,
"No behavioral changes expected");
return;
}
actualDiff.InputsWithChangedBehavior.Should().Be(
expectedDiff.ExpectedDiffs.Length,
"Number of changed inputs should match expected");
foreach (var expected in expectedDiff.ExpectedDiffs)
{
var actual = actualDiff.Diffs
.FirstOrDefault(d => d.InputId == expected.InputId);
actual.Should().NotBeNull(
$"Expected change for input '{expected.InputId}' not found");
actual!.Delta.NewOutcome.Should().Be(expected.ExpectedOutcome,
$"Outcome mismatch for input '{expected.InputId}'");
}
}
/// <summary>
/// Assert that policy change has no unexpected regressions.
/// </summary>
/// <param name="previousVersion">Previous policy version identifier.</param>
/// <param name="currentVersion">Current policy version identifier.</param>
/// <param name="ct">Cancellation token.</param>
protected async Task AssertNoUnexpectedRegressionsAsync(
string previousVersion,
string currentVersion,
CancellationToken ct = default)
{
var previousPolicy = LoadPolicy(previousVersion);
var currentPolicy = LoadPolicy(currentVersion);
var allowedChanges = LoadAllowedChanges().ToList();
var diff = await DiffEngine.ComputeDiffAsync(
previousPolicy,
currentPolicy,
GetStandardTestInputs(),
ct);
var unexpectedChanges = diff.Diffs
.Where(d => !IsChangeAllowed(d, allowedChanges))
.ToList();
unexpectedChanges.Should().BeEmpty(
$"Found unexpected policy regressions: {FormatChanges(unexpectedChanges)}");
}
/// <summary>
/// Check if a change is in the allowed list.
/// </summary>
private static bool IsChangeAllowed(
PolicyInputDiff diff,
IEnumerable<AllowedPolicyChange> allowedChanges)
{
return allowedChanges.Any(a =>
a.InputPattern.IsMatch(diff.InputId) &&
(a.AllowedOutcomes.IsDefaultOrEmpty ||
a.AllowedOutcomes.Contains(diff.Delta.NewOutcome)));
}
/// <summary>
/// Format unexpected changes for error message.
/// </summary>
private static string FormatChanges(List<PolicyInputDiff> changes)
{
if (changes.Count == 0)
{
return "none";
}
var descriptions = changes
.Take(5)
.Select(c => $"'{c.InputId}': {c.Delta.BaselineOutcome} -> {c.Delta.NewOutcome}");
var result = string.Join(", ", descriptions);
if (changes.Count > 5)
{
result += $" ... and {changes.Count - 5} more";
}
return result;
}
}

View File

@@ -0,0 +1,26 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<OutputType>Exe</OutputType>
<UseAppHost>true</UseAppHost>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<IsPackable>true</IsPackable>
<Description>Policy-as-code testing framework with diff-based regression detection</Description>
</PropertyGroup>
<ItemGroup>
<InternalsVisibleTo Include="StellaOps.Testing.Policy.Tests" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="FluentAssertions" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="xunit.v3.assert" PrivateAssets="all" />
<PackageReference Include="xunit.v3.core" PrivateAssets="all" />
</ItemGroup>
</Project>