up
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-12-01 21:16:22 +02:00
parent c11d87d252
commit 909d9b6220
208 changed files with 860954 additions and 832 deletions

View File

@@ -0,0 +1,239 @@
using System.Collections.Immutable;
using System.Linq;
using StellaOps.Policy;
using StellaOps.Policy.Engine.Caching;
using StellaOps.Policy.Engine.Evaluation;
using StellaOps.Policy.Engine.Services;
using StellaOps.PolicyDsl;
namespace StellaOps.Policy.Engine.BatchEvaluation;
internal sealed record BatchEvaluationRequestDto(
string TenantId,
IReadOnlyList<BatchEvaluationItemDto> Items,
int? PageSize = null,
string? PageToken = null,
int? BudgetMs = null);
internal sealed record BatchEvaluationItemDto(
string PackId,
int Version,
string SubjectPurl,
string AdvisoryId,
EvaluationSeverityDto Severity,
AdvisoryDto Advisory,
VexEvidenceDto Vex,
SbomDto Sbom,
ExceptionsDto Exceptions,
ReachabilityDto Reachability,
DateTimeOffset? EvaluationTimestamp,
bool BypassCache = false);
internal sealed record EvaluationSeverityDto(string Normalized, decimal? Score = null);
internal sealed record AdvisoryDto(IDictionary<string, string> Metadata, string Source = "unknown");
internal sealed record VexEvidenceDto(IReadOnlyList<VexStatementDto> Statements);
internal sealed record VexStatementDto(string Status, string Justification, string StatementId, DateTimeOffset? Timestamp = null);
internal sealed record SbomDto(IReadOnlyList<string> Tags, IReadOnlyList<ComponentDto>? Components = null);
internal sealed record ComponentDto(
string Name,
string Version,
string Type,
string? Purl = null,
IDictionary<string, string>? Metadata = null);
internal sealed record ExceptionsDto(
IDictionary<string, PolicyExceptionEffect>? Effects = null,
IReadOnlyList<ExceptionInstanceDto>? Instances = null);
internal sealed record ExceptionInstanceDto(
string Id,
string EffectId,
ExceptionScopeDto Scope,
DateTimeOffset CreatedAt,
IDictionary<string, string>? Metadata = null);
internal sealed record ExceptionScopeDto(
IReadOnlyList<string>? RuleNames = null,
IReadOnlyList<string>? Severities = null,
IReadOnlyList<string>? Sources = null,
IReadOnlyList<string>? Tags = null);
internal sealed record ReachabilityDto(
string State,
decimal Confidence = 0m,
decimal Score = 0m,
bool HasRuntimeEvidence = false,
string? Source = null,
string? Method = null,
string? EvidenceRef = null);
internal sealed record BatchEvaluationResultDto(
string PackId,
int Version,
string PolicyDigest,
string Status,
string? Severity,
string? RuleName,
int? Priority,
IReadOnlyDictionary<string, string> Annotations,
IReadOnlyList<string> Warnings,
PolicyExceptionApplication? AppliedException,
string CorrelationId,
bool Cached,
CacheSource CacheSource,
long EvaluationDurationMs);
internal sealed record BatchEvaluationResponseDto(
IReadOnlyList<BatchEvaluationResultDto> Results,
string? NextPageToken,
int Total,
int Returned,
int CacheHits,
int CacheMisses,
long DurationMs,
long? BudgetRemainingMs);
internal static class BatchEvaluationValidator
{
public static bool TryValidate(BatchEvaluationRequestDto request, out string error)
{
if (request is null)
{
error = "Request body is required.";
return false;
}
if (request.Items is null || request.Items.Count == 0)
{
error = "At least one item is required.";
return false;
}
if (request.PageSize is int size && (size <= 0 || size > 500))
{
error = "PageSize must be between 1 and 500.";
return false;
}
if (request.Items.Any(static i => i.EvaluationTimestamp is null))
{
error = "Each item must provide evaluationTimestamp to keep evaluation deterministic.";
return false;
}
error = string.Empty;
return true;
}
}
internal static class BatchEvaluationMapper
{
public static IReadOnlyList<RuntimeEvaluationRequest> ToRuntimeRequests(string tenantId, IEnumerable<BatchEvaluationItemDto> items)
{
return items.Select(item => ToRuntimeRequest(tenantId, item)).ToList();
}
private static RuntimeEvaluationRequest ToRuntimeRequest(string tenantId, BatchEvaluationItemDto item)
{
var severity = new PolicyEvaluationSeverity(
item.Severity.Normalized,
item.Severity.Score);
var advisory = new PolicyEvaluationAdvisory(
item.Advisory.Source,
item.Advisory.Metadata.ToImmutableDictionary(StringComparer.OrdinalIgnoreCase));
var vex = new PolicyEvaluationVexEvidence(
item.Vex.Statements
.Select(stmt => new PolicyEvaluationVexStatement(
stmt.Status,
stmt.Justification,
stmt.StatementId,
stmt.Timestamp))
.ToImmutableArray());
var sbom = new PolicyEvaluationSbom(
item.Sbom.Tags.ToImmutableHashSet(StringComparer.OrdinalIgnoreCase),
(item.Sbom.Components ?? Array.Empty<ComponentDto>())
.Select(comp => new PolicyEvaluationComponent(
comp.Name,
comp.Version,
comp.Type,
comp.Purl,
(comp.Metadata ?? new Dictionary<string, string>())
.ToImmutableDictionary(StringComparer.OrdinalIgnoreCase)))
.ToImmutableArray());
var exceptions = new PolicyEvaluationExceptions(
(item.Exceptions.Effects ?? new Dictionary<string, PolicyExceptionEffect>())
.ToImmutableDictionary(StringComparer.OrdinalIgnoreCase),
(item.Exceptions.Instances ?? Array.Empty<ExceptionInstanceDto>())
.Select(instance => new PolicyEvaluationExceptionInstance(
instance.Id,
instance.EffectId,
new PolicyEvaluationExceptionScope(
Normalize(instance.Scope.RuleNames),
Normalize(instance.Scope.Severities),
Normalize(instance.Scope.Sources),
Normalize(instance.Scope.Tags)),
instance.CreatedAt,
(instance.Metadata ?? new Dictionary<string, string>())
.ToImmutableDictionary(StringComparer.OrdinalIgnoreCase)))
.ToImmutableArray());
var reachability = new PolicyEvaluationReachability(
item.Reachability.State,
item.Reachability.Confidence,
item.Reachability.Score,
item.Reachability.HasRuntimeEvidence,
item.Reachability.Source,
item.Reachability.Method,
item.Reachability.EvidenceRef);
return new RuntimeEvaluationRequest(
PackId: item.PackId,
Version: item.Version,
TenantId: tenantId,
SubjectPurl: item.SubjectPurl,
AdvisoryId: item.AdvisoryId,
Severity: severity,
Advisory: advisory,
Vex: vex,
Sbom: sbom,
Exceptions: exceptions,
Reachability: reachability,
EvaluationTimestamp: item.EvaluationTimestamp,
BypassCache: item.BypassCache);
}
private static ImmutableHashSet<string> Normalize(IReadOnlyList<string>? values)
{
return (values ?? Array.Empty<string>())
.Where(static value => !string.IsNullOrWhiteSpace(value))
.Select(static value => value.Trim())
.ToImmutableHashSet(StringComparer.OrdinalIgnoreCase);
}
}
internal interface IRuntimeEvaluationExecutor
{
Task<RuntimeEvaluationResponse> EvaluateAsync(RuntimeEvaluationRequest request, CancellationToken cancellationToken);
}
internal sealed class RuntimeEvaluationExecutor : IRuntimeEvaluationExecutor
{
private readonly PolicyRuntimeEvaluationService _service;
public RuntimeEvaluationExecutor(PolicyRuntimeEvaluationService service)
{
_service = service ?? throw new ArgumentNullException(nameof(service));
}
public Task<RuntimeEvaluationResponse> EvaluateAsync(RuntimeEvaluationRequest request, CancellationToken cancellationToken) =>
_service.EvaluateAsync(request, cancellationToken);
}

View File

@@ -0,0 +1,147 @@
using System.Diagnostics;
using System.Linq;
using Microsoft.AspNetCore.Http.HttpResults;
using Microsoft.AspNetCore.Mvc;
using StellaOps.Auth.Abstractions;
using StellaOps.Policy.Engine.BatchEvaluation;
using StellaOps.Policy.Engine.Services;
namespace StellaOps.Policy.Engine.Endpoints;
internal static class BatchEvaluationEndpoint
{
public static IEndpointRouteBuilder MapBatchEvaluation(this IEndpointRouteBuilder routes)
{
var group = routes.MapGroup("/policy/eval")
.RequireAuthorization()
.WithTags("Policy Evaluation");
group.MapPost("/batch", EvaluateBatchAsync)
.WithName("PolicyEngine.BatchEvaluate")
.WithSummary("Batch-evaluate policy packs against advisory/VEX/SBOM tuples with deterministic ordering and cache-aware responses.")
.Produces<BatchEvaluationResponseDto>(StatusCodes.Status200OK)
.Produces<ProblemHttpResult>(StatusCodes.Status400BadRequest);
return routes;
}
private static async Task<IResult> EvaluateBatchAsync(
HttpContext httpContext,
[FromBody] BatchEvaluationRequestDto request,
IRuntimeEvaluationExecutor evaluator,
TimeProvider timeProvider,
CancellationToken cancellationToken)
{
var scopeResult = ScopeAuthorization.RequireScope(httpContext, StellaOpsScopes.PolicyRead);
if (scopeResult is not null)
{
return scopeResult;
}
if (!BatchEvaluationValidator.TryValidate(request, out var error))
{
return Results.BadRequest(new ProblemDetails
{
Title = "Invalid request",
Detail = error,
Status = StatusCodes.Status400BadRequest
});
}
if (!TryParseOffset(request.PageToken, out var offset))
{
return Results.BadRequest(new ProblemDetails
{
Title = "Invalid pageToken",
Detail = "pageToken must be a non-negative integer offset.",
Status = StatusCodes.Status400BadRequest
});
}
var pageSize = Math.Clamp(request.PageSize ?? 100, 1, 500);
var budgetMs = request.BudgetMs;
var sw = Stopwatch.StartNew();
var pageItems = request.Items
.Skip(offset)
.Take(pageSize)
.ToList();
var runtimeRequests = BatchEvaluationMapper.ToRuntimeRequests(request.TenantId, pageItems);
var results = new List<BatchEvaluationResultDto>(runtimeRequests.Count);
var cacheHits = 0;
var cacheMisses = 0;
var processed = 0;
foreach (var runtimeRequest in runtimeRequests)
{
if (budgetMs is int budget && sw.ElapsedMilliseconds >= budget)
{
break;
}
var response = await evaluator.EvaluateAsync(runtimeRequest, cancellationToken).ConfigureAwait(false);
processed++;
if (response.Cached)
{
cacheHits++;
}
else
{
cacheMisses++;
}
results.Add(new BatchEvaluationResultDto(
response.PackId,
response.Version,
response.PolicyDigest,
response.Status,
response.Severity,
response.RuleName,
response.Priority,
response.Annotations,
response.Warnings,
response.AppliedException,
response.CorrelationId,
response.Cached,
response.CacheSource,
response.EvaluationDurationMs));
}
var nextOffset = offset + processed;
string? nextPageToken = null;
if (nextOffset < request.Items.Count)
{
nextPageToken = nextOffset.ToString();
}
var budgetRemaining = budgetMs is int budgetValue
? Math.Max(0, budgetValue - sw.ElapsedMilliseconds)
: (long?)null;
var responsePayload = new BatchEvaluationResponseDto(
Results: results,
NextPageToken: nextPageToken,
Total: request.Items.Count,
Returned: processed,
CacheHits: cacheHits,
CacheMisses: cacheMisses,
DurationMs: sw.ElapsedMilliseconds,
BudgetRemainingMs: budgetRemaining);
return Results.Ok(responsePayload);
}
private static bool TryParseOffset(string? token, out int offset)
{
if (string.IsNullOrWhiteSpace(token))
{
offset = 0;
return true;
}
return int.TryParse(token, out offset) && offset >= 0;
}
}

View File

@@ -0,0 +1,42 @@
using Microsoft.Extensions.Logging;
using StellaOps.Policy.Engine.ExceptionCache;
namespace StellaOps.Policy.Engine.Events;
/// <summary>
/// Publishes exception lifecycle events and keeps local caches warm.
/// </summary>
public interface IExceptionEventPublisher
{
Task PublishAsync(ExceptionEvent exceptionEvent, CancellationToken cancellationToken = default);
}
internal sealed class LoggingExceptionEventPublisher : IExceptionEventPublisher
{
private readonly IExceptionEffectiveCache? _cache;
private readonly ILogger<LoggingExceptionEventPublisher> _logger;
public LoggingExceptionEventPublisher(
IExceptionEffectiveCache? cache,
ILogger<LoggingExceptionEventPublisher> logger)
{
_cache = cache;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task PublishAsync(ExceptionEvent exceptionEvent, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(exceptionEvent);
if (_cache is not null)
{
await _cache.HandleExceptionEventAsync(exceptionEvent, cancellationToken).ConfigureAwait(false);
}
_logger.LogInformation(
"Published exception event {EventType} for exception {ExceptionId} tenant {TenantId}",
exceptionEvent.EventType,
exceptionEvent.ExceptionId,
exceptionEvent.TenantId);
}
}

View File

@@ -0,0 +1,45 @@
namespace StellaOps.Policy.Engine.Options;
/// <summary>
/// Options controlling the exception activation/expiry lifecycle worker.
/// </summary>
public sealed class PolicyEngineExceptionLifecycleOptions
{
/// <summary>Polling interval for lifecycle checks.</summary>
public int PollIntervalSeconds { get; set; } = 60;
/// <summary>How far back to look when picking up overdue activations.</summary>
public int ActivationLookbackMinutes { get; set; } = 5;
/// <summary>How far back to look when expiring exceptions.</summary>
public int ExpiryLookbackMinutes { get; set; } = 5;
/// <summary>How far ahead to look for upcoming expirations.</summary>
public int ExpiryHorizonMinutes { get; set; } = 5;
/// <summary>Maximum exceptions processed per cycle.</summary>
public int MaxBatchSize { get; set; } = 500;
public void Validate()
{
if (PollIntervalSeconds <= 0)
{
throw new InvalidOperationException("Exception lifecycle poll interval must be greater than zero.");
}
if (ActivationLookbackMinutes < 0 || ExpiryLookbackMinutes < 0 || ExpiryHorizonMinutes < 0)
{
throw new InvalidOperationException("Exception lifecycle windows cannot be negative.");
}
if (MaxBatchSize <= 0)
{
throw new InvalidOperationException("Exception lifecycle batch size must be greater than zero.");
}
}
public TimeSpan PollInterval => TimeSpan.FromSeconds(PollIntervalSeconds);
public TimeSpan ActivationLookback => TimeSpan.FromMinutes(ActivationLookbackMinutes);
public TimeSpan ExpiryLookback => TimeSpan.FromMinutes(ExpiryLookbackMinutes);
public TimeSpan ExpiryHorizon => TimeSpan.FromMinutes(ExpiryHorizonMinutes);
}

View File

@@ -33,11 +33,13 @@ public sealed class PolicyEngineOptions
public ReachabilityFactsCacheOptions ReachabilityCache { get; } = new();
public PolicyEvaluationCacheOptions EvaluationCache { get; } = new();
public EffectiveDecisionMapOptions EffectiveDecisionMap { get; } = new();
public ExceptionCacheOptions ExceptionCache { get; } = new();
public PolicyEvaluationCacheOptions EvaluationCache { get; } = new();
public EffectiveDecisionMapOptions EffectiveDecisionMap { get; } = new();
public ExceptionCacheOptions ExceptionCache { get; } = new();
public PolicyEngineExceptionLifecycleOptions ExceptionLifecycle { get; } = new();
public void Validate()
{
@@ -45,12 +47,13 @@ public sealed class PolicyEngineOptions
Storage.Validate();
Workers.Validate();
ResourceServer.Validate();
Compilation.Validate();
Activation.Validate();
Telemetry.Validate();
RiskProfile.Validate();
}
}
Compilation.Validate();
Activation.Validate();
Telemetry.Validate();
RiskProfile.Validate();
ExceptionLifecycle.Validate();
}
}
public sealed class PolicyEngineAuthorityOptions
{

View File

@@ -5,18 +5,22 @@ using StellaOps.Auth.Abstractions;
using StellaOps.Auth.Client;
using StellaOps.Auth.ServerIntegration;
using StellaOps.Configuration;
using StellaOps.Policy.Engine.Hosting;
using StellaOps.Policy.Engine.Options;
using StellaOps.Policy.Engine.Compilation;
using StellaOps.Policy.Engine.Endpoints;
using StellaOps.PolicyDsl;
using StellaOps.Policy.Engine.Services;
using StellaOps.Policy.Engine.Workers;
using StellaOps.Policy.Engine.Streaming;
using StellaOps.Policy.Engine.Telemetry;
using StellaOps.AirGap.Policy;
using StellaOps.Policy.Engine.Orchestration;
using StellaOps.Policy.Engine.ReachabilityFacts;
using StellaOps.Policy.Engine.Hosting;
using StellaOps.Policy.Engine.Options;
using StellaOps.Policy.Engine.Compilation;
using StellaOps.Policy.Engine.Endpoints;
using StellaOps.Policy.Engine.BatchEvaluation;
using StellaOps.Policy.Engine.DependencyInjection;
using StellaOps.PolicyDsl;
using StellaOps.Policy.Engine.Services;
using StellaOps.Policy.Engine.Workers;
using StellaOps.Policy.Engine.Streaming;
using StellaOps.Policy.Engine.Telemetry;
using StellaOps.AirGap.Policy;
using StellaOps.Policy.Engine.Orchestration;
using StellaOps.Policy.Engine.ReachabilityFacts;
using StellaOps.Policy.Engine.Storage.InMemory;
using StellaOps.Policy.Engine.Storage.Mongo.Repositories;
var builder = WebApplication.CreateBuilder(args);
@@ -108,9 +112,10 @@ builder.Services.AddOptions<PolicyEngineOptions>()
})
.ValidateOnStart();
builder.Services.AddSingleton(sp => sp.GetRequiredService<IOptions<PolicyEngineOptions>>().Value);
builder.Services.AddSingleton(TimeProvider.System);
builder.Services.AddSingleton<PolicyEngineStartupDiagnostics>();
builder.Services.AddSingleton(sp => sp.GetRequiredService<IOptions<PolicyEngineOptions>>().Value);
builder.Services.AddSingleton(sp => sp.GetRequiredService<PolicyEngineOptions>().ExceptionLifecycle);
builder.Services.AddSingleton(TimeProvider.System);
builder.Services.AddSingleton<PolicyEngineStartupDiagnostics>();
builder.Services.AddSingleton<PolicyTimelineEvents>();
builder.Services.AddSingleton<EvidenceBundleService>();
builder.Services.AddSingleton<PolicyEvaluationAttestationService>();
@@ -123,41 +128,50 @@ builder.Services.AddSingleton<StellaOps.Policy.Engine.Scoring.IRiskScoringJobSto
builder.Services.AddSingleton<StellaOps.Policy.Engine.Scoring.RiskScoringTriggerService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Simulation.RiskSimulationService>();
builder.Services.AddSingleton<StellaOps.Policy.RiskProfile.Export.ProfileExportService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Events.ProfileEventPublisher>();
builder.Services.AddHostedService<IncidentModeExpirationWorker>();
builder.Services.AddHostedService<PolicyEngineBootstrapWorker>();
builder.Services.AddSingleton<StellaOps.PolicyDsl.PolicyCompiler>();
builder.Services.AddSingleton<PolicyCompilationService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Services.PathScopeMetrics>();
builder.Services.AddSingleton<PolicyEvaluationService>();
builder.Services.AddSingleton<PathScopeSimulationService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Overlay.OverlayProjectionService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Overlay.IOverlayEventSink, StellaOps.Policy.Engine.Overlay.LoggingOverlayEventSink>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Overlay.OverlayChangeEventPublisher>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Overlay.PathScopeSimulationBridgeService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Events.ProfileEventPublisher>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Events.IExceptionEventPublisher>(sp =>
new StellaOps.Policy.Engine.Events.LoggingExceptionEventPublisher(
sp.GetService<StellaOps.Policy.Engine.ExceptionCache.IExceptionEffectiveCache>(),
sp.GetRequiredService<ILogger<StellaOps.Policy.Engine.Events.LoggingExceptionEventPublisher>>()));
builder.Services.AddSingleton<ExceptionLifecycleService>();
builder.Services.AddHostedService<ExceptionLifecycleWorker>();
builder.Services.AddHostedService<IncidentModeExpirationWorker>();
builder.Services.AddHostedService<PolicyEngineBootstrapWorker>();
builder.Services.AddSingleton<StellaOps.PolicyDsl.PolicyCompiler>();
builder.Services.AddSingleton<PolicyCompilationService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Services.PathScopeMetrics>();
builder.Services.AddSingleton<PolicyEvaluationService>();
builder.Services.AddPolicyEngineCore();
builder.Services.AddSingleton<PathScopeSimulationService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Overlay.OverlayProjectionService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Overlay.IOverlayEventSink, StellaOps.Policy.Engine.Overlay.LoggingOverlayEventSink>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Overlay.OverlayChangeEventPublisher>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Overlay.PathScopeSimulationBridgeService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.TrustWeighting.TrustWeightingService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.AdvisoryAI.AdvisoryAiKnobsService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.BatchContext.BatchContextService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Services.EvidenceSummaryService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Services.PolicyBundleService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Services.PolicyRuntimeEvaluator>();
builder.Services.AddSingleton<IPolicyPackRepository, InMemoryPolicyPackRepository>();
builder.Services.AddSingleton<IOrchestratorJobStore, InMemoryOrchestratorJobStore>();
builder.Services.AddSingleton<OrchestratorJobService>();
builder.Services.AddSingleton<IWorkerResultStore, InMemoryWorkerResultStore>();
builder.Services.AddSingleton<PolicyWorkerService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Ledger.ILedgerExportStore, StellaOps.Policy.Engine.Ledger.InMemoryLedgerExportStore>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Ledger.LedgerExportService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Snapshots.ISnapshotStore, StellaOps.Policy.Engine.Snapshots.InMemorySnapshotStore>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Snapshots.SnapshotService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Violations.IViolationEventStore, StellaOps.Policy.Engine.Violations.InMemoryViolationEventStore>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Violations.ViolationEventService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Violations.SeverityFusionService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Violations.ConflictHandlingService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Services.PolicyDecisionService>();
builder.Services.AddSingleton<IReachabilityFactsStore, InMemoryReachabilityFactsStore>();
builder.Services.AddSingleton<IReachabilityFactsOverlayCache, InMemoryReachabilityFactsOverlayCache>();
builder.Services.AddSingleton<ReachabilityFactsJoiningService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Services.PolicyRuntimeEvaluator>();
builder.Services.AddSingleton<IPolicyPackRepository, InMemoryPolicyPackRepository>();
builder.Services.AddSingleton<IOrchestratorJobStore, InMemoryOrchestratorJobStore>();
builder.Services.AddSingleton<OrchestratorJobService>();
builder.Services.AddSingleton<IWorkerResultStore, InMemoryWorkerResultStore>();
builder.Services.AddSingleton<PolicyWorkerService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Ledger.ILedgerExportStore, StellaOps.Policy.Engine.Ledger.InMemoryLedgerExportStore>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Ledger.LedgerExportService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Snapshots.ISnapshotStore, StellaOps.Policy.Engine.Snapshots.InMemorySnapshotStore>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Snapshots.SnapshotService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Violations.IViolationEventStore, StellaOps.Policy.Engine.Violations.InMemoryViolationEventStore>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Violations.ViolationEventService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Violations.SeverityFusionService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Violations.ConflictHandlingService>();
builder.Services.AddSingleton<StellaOps.Policy.Engine.Services.PolicyDecisionService>();
builder.Services.AddSingleton<IExceptionRepository, InMemoryExceptionRepository>();
builder.Services.AddSingleton<IReachabilityFactsStore, InMemoryReachabilityFactsStore>();
builder.Services.AddSingleton<IReachabilityFactsOverlayCache, InMemoryReachabilityFactsOverlayCache>();
builder.Services.AddSingleton<ReachabilityFactsJoiningService>();
builder.Services.AddSingleton<IRuntimeEvaluationExecutor, RuntimeEvaluationExecutor>();
builder.Services.AddHttpContextAccessor();
builder.Services.AddRouting(options => options.LowercaseUrls = true);
@@ -202,13 +216,14 @@ app.MapGet("/readyz", (PolicyEngineStartupDiagnostics diagnostics) =>
app.MapGet("/", () => Results.Redirect("/healthz"));
app.MapPolicyCompilation();
app.MapPolicyPacks();
app.MapPathScopeSimulation();
app.MapOverlaySimulation();
app.MapEvidenceSummaries();
app.MapTrustWeighting();
app.MapAdvisoryAiKnobs();
app.MapPolicyCompilation();
app.MapPolicyPacks();
app.MapPathScopeSimulation();
app.MapOverlaySimulation();
app.MapEvidenceSummaries();
app.MapBatchEvaluation();
app.MapTrustWeighting();
app.MapAdvisoryAiKnobs();
app.MapBatchContext();
app.MapOrchestratorJobs();
app.MapPolicyWorker();

View File

@@ -4,11 +4,11 @@ This service hosts the Policy Engine APIs and background workers introduced in *
## Compliance Checklist
- [x] Configuration loads from `policy-engine.yaml`/environment variables and validates on startup.
- [x] Authority client scaffolding enforces `policy:*` + `effective:write` scopes and respects back-channel timeouts.
- [x] Resource server authentication requires Policy Engine scopes with tenant-aware policies.
- [x] Health and readiness endpoints exist for platform probes.
- [ ] Deterministic policy evaluation pipeline implemented (POLICY-ENGINE-20-002).
- [ ] Mongo materialisation writers implemented (POLICY-ENGINE-20-004).
- [ ] Observability (metrics/traces/logs) completed (POLICY-ENGINE-20-007).
- [ ] Comprehensive test suites and perf baselines established (POLICY-ENGINE-20-008).
- [x] Configuration loads from `policy-engine.yaml`/environment variables and validates on startup.
- [x] Authority client scaffolding enforces `policy:*` + `effective:write` scopes and respects back-channel timeouts.
- [x] Resource server authentication requires Policy Engine scopes with tenant-aware policies.
- [x] Health and readiness endpoints exist for platform probes.
- [x] Deterministic policy evaluation pipeline implemented (POLICY-ENGINE-20-002).
- [x] Mongo materialisation writers implemented (POLICY-ENGINE-20-004).
- [x] Observability (metrics/traces/logs) completed (POLICY-ENGINE-20-007).
- [x] Comprehensive test suites and perf baselines established (POLICY-ENGINE-20-008).

View File

@@ -1,3 +1,4 @@
using System.Buffers.Binary;
using System.Security.Cryptography;
using System.Text;
using StellaOps.Policy.Engine.Domain;
@@ -62,7 +63,7 @@ internal sealed class EvidenceSummaryService
private DateTimeOffset DeriveIngestedAt(byte[] hashBytes)
{
// Use a deterministic timestamp within the last 30 days to avoid non-determinism in tests.
var seconds = BitConverter.ToUInt32(hashBytes, 0) % (30u * 24u * 60u * 60u);
var seconds = BinaryPrimitives.ReadUInt32BigEndian(hashBytes) % (30u * 24u * 60u * 60u);
var baseline = _timeProvider.GetUtcNow().UtcDateTime.Date; // midnight UTC today
var dt = baseline.AddSeconds(seconds);
return new DateTimeOffset(dt, TimeSpan.Zero);

View File

@@ -58,6 +58,7 @@ internal sealed class PolicyRuntimeEvaluationService
private readonly IPolicyPackRepository _repository;
private readonly IPolicyEvaluationCache _cache;
private readonly PolicyEvaluator _evaluator;
private readonly ReachabilityFacts.ReachabilityFactsJoiningService? _reachabilityFacts;
private readonly TimeProvider _timeProvider;
private readonly ILogger<PolicyRuntimeEvaluationService> _logger;
@@ -71,12 +72,14 @@ internal sealed class PolicyRuntimeEvaluationService
IPolicyPackRepository repository,
IPolicyEvaluationCache cache,
PolicyEvaluator evaluator,
ReachabilityFacts.ReachabilityFactsJoiningService? reachabilityFacts,
TimeProvider timeProvider,
ILogger<PolicyRuntimeEvaluationService> logger)
{
_repository = repository ?? throw new ArgumentNullException(nameof(repository));
_cache = cache ?? throw new ArgumentNullException(nameof(cache));
_evaluator = evaluator ?? throw new ArgumentNullException(nameof(evaluator));
_reachabilityFacts = reachabilityFacts;
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
@@ -90,35 +93,38 @@ internal sealed class PolicyRuntimeEvaluationService
{
ArgumentNullException.ThrowIfNull(request);
using var activity = PolicyEngineTelemetry.StartEvaluateActivity(
request.TenantId, request.PackId, runId: null);
activity?.SetTag("policy.version", request.Version);
activity?.SetTag("subject.purl", request.SubjectPurl);
activity?.SetTag("advisory.id", request.AdvisoryId);
var startTimestamp = _timeProvider.GetTimestamp();
var evaluationTimestamp = request.EvaluationTimestamp ?? _timeProvider.GetUtcNow();
var effectiveRequest = _reachabilityFacts is null
? request
: await EnrichReachabilityAsync(request, cancellationToken).ConfigureAwait(false);
using var activity = PolicyEngineTelemetry.StartEvaluateActivity(
effectiveRequest.TenantId, effectiveRequest.PackId, runId: null);
activity?.SetTag("policy.version", effectiveRequest.Version);
activity?.SetTag("subject.purl", effectiveRequest.SubjectPurl);
activity?.SetTag("advisory.id", effectiveRequest.AdvisoryId);
// Load the compiled policy bundle
var bundle = await _repository.GetBundleAsync(request.PackId, request.Version, cancellationToken)
var bundle = await _repository.GetBundleAsync(effectiveRequest.PackId, effectiveRequest.Version, cancellationToken)
.ConfigureAwait(false);
if (bundle is null)
{
PolicyEngineTelemetry.RecordError("evaluation", request.TenantId);
PolicyEngineTelemetry.RecordEvaluationFailure(request.TenantId, request.PackId, "bundle_not_found");
PolicyEngineTelemetry.RecordError("evaluation", effectiveRequest.TenantId);
PolicyEngineTelemetry.RecordEvaluationFailure(effectiveRequest.TenantId, effectiveRequest.PackId, "bundle_not_found");
activity?.SetStatus(ActivityStatusCode.Error, "Bundle not found");
throw new InvalidOperationException(
$"Policy bundle not found for pack '{request.PackId}' version {request.Version}.");
$"Policy bundle not found for pack '{effectiveRequest.PackId}' version {effectiveRequest.Version}.");
}
// Compute deterministic cache key
var subjectDigest = ComputeSubjectDigest(request.TenantId, request.SubjectPurl, request.AdvisoryId);
var contextDigest = ComputeContextDigest(request);
var subjectDigest = ComputeSubjectDigest(effectiveRequest.TenantId, effectiveRequest.SubjectPurl, effectiveRequest.AdvisoryId);
var contextDigest = ComputeContextDigest(effectiveRequest);
var cacheKey = PolicyEvaluationCacheKey.Create(bundle.Digest, subjectDigest, contextDigest);
// Try cache lookup unless bypassed
if (!request.BypassCache)
if (!effectiveRequest.BypassCache)
{
var cacheResult = await _cache.GetAsync(cacheKey, cancellationToken).ConfigureAwait(false);
if (cacheResult.CacheHit && cacheResult.Entry is not null)
@@ -132,10 +138,10 @@ internal sealed class PolicyRuntimeEvaluationService
activity?.SetStatus(ActivityStatusCode.Ok);
_logger.LogDebug(
"Cache hit for evaluation {PackId}@{Version} subject {Subject} from {Source}",
request.PackId, request.Version, request.SubjectPurl, cacheResult.Source);
effectiveRequest.PackId, effectiveRequest.Version, effectiveRequest.SubjectPurl, cacheResult.Source);
return CreateResponseFromCache(
request, bundle.Digest, cacheResult.Entry, cacheResult.Source, duration);
effectiveRequest, bundle.Digest, cacheResult.Entry, cacheResult.Source, duration);
}
}
@@ -153,13 +159,13 @@ internal sealed class PolicyRuntimeEvaluationService
}
var context = new PolicyEvaluationContext(
request.Severity,
effectiveRequest.Severity,
new PolicyEvaluationEnvironment(ImmutableDictionary<string, string>.Empty),
request.Advisory,
request.Vex,
request.Sbom,
request.Exceptions,
request.Reachability,
effectiveRequest.Advisory,
effectiveRequest.Vex,
effectiveRequest.Sbom,
effectiveRequest.Exceptions,
effectiveRequest.Reachability,
evaluationTimestamp);
var evalRequest = new Evaluation.PolicyEvaluationRequest(document, context);
@@ -187,11 +193,25 @@ internal sealed class PolicyRuntimeEvaluationService
var evalDurationSeconds = evalDuration / 1000.0;
// Record metrics
PolicyEngineTelemetry.RecordEvaluationLatency(evalDurationSeconds, request.TenantId, request.PackId);
PolicyEngineTelemetry.RecordEvaluation(request.TenantId, request.PackId, "full");
PolicyEngineTelemetry.RecordEvaluationLatency(evalDurationSeconds, effectiveRequest.TenantId, effectiveRequest.PackId);
PolicyEngineTelemetry.RecordEvaluation(effectiveRequest.TenantId, effectiveRequest.PackId, "full");
if (!string.IsNullOrEmpty(result.RuleName))
{
PolicyEngineTelemetry.RecordRuleFired(request.PackId, result.RuleName);
PolicyEngineTelemetry.RecordRuleFired(effectiveRequest.PackId, result.RuleName);
}
if (result.AppliedException is not null)
{
PolicyEngineTelemetry.RecordExceptionApplication(effectiveRequest.TenantId, result.AppliedException.EffectType.ToString());
PolicyEngineTelemetry.RecordExceptionApplicationLatency(evalDurationSeconds, effectiveRequest.TenantId, result.AppliedException.EffectType.ToString());
_logger.LogInformation(
"Applied exception {ExceptionId} (effect {EffectType}) for tenant {TenantId} pack {PackId}@{Version} aoc {CompilationId}",
result.AppliedException.ExceptionId,
result.AppliedException.EffectType,
effectiveRequest.TenantId,
effectiveRequest.PackId,
effectiveRequest.Version,
bundle.AocMetadata?.CompilationId ?? "none");
}
activity?.SetTag("evaluation.status", result.Status);
@@ -201,11 +221,11 @@ internal sealed class PolicyRuntimeEvaluationService
_logger.LogDebug(
"Evaluated {PackId}@{Version} subject {Subject} in {Duration}ms - {Status}",
request.PackId, request.Version, request.SubjectPurl, evalDuration, result.Status);
effectiveRequest.PackId, effectiveRequest.Version, effectiveRequest.SubjectPurl, evalDuration, result.Status);
return new RuntimeEvaluationResponse(
request.PackId,
request.Version,
effectiveRequest.PackId,
effectiveRequest.Version,
bundle.Digest,
result.Status,
result.Severity,
@@ -240,8 +260,12 @@ internal sealed class PolicyRuntimeEvaluationService
var cacheHits = 0;
var cacheMisses = 0;
var hydratedRequests = _reachabilityFacts is null
? requests
: await EnrichReachabilityBatchAsync(requests, cancellationToken).ConfigureAwait(false);
// Group by pack/version for bundle loading efficiency
var groups = requests.GroupBy(r => (r.PackId, r.Version));
var groups = hydratedRequests.GroupBy(r => (r.PackId, r.Version));
foreach (var group in groups)
{
@@ -351,6 +375,20 @@ internal sealed class PolicyRuntimeEvaluationService
PolicyEngineTelemetry.RecordRuleFired(packId, result.RuleName);
}
if (result.AppliedException is not null)
{
PolicyEngineTelemetry.RecordExceptionApplication(request.TenantId, result.AppliedException.EffectType.ToString());
PolicyEngineTelemetry.RecordExceptionApplicationLatency(duration / 1000.0, request.TenantId, result.AppliedException.EffectType.ToString());
_logger.LogInformation(
"Applied exception {ExceptionId} (effect {EffectType}) for tenant {TenantId} pack {PackId}@{Version} aoc {CompilationId}",
result.AppliedException.ExceptionId,
result.AppliedException.EffectType,
request.TenantId,
request.PackId,
request.Version,
bundle.AocMetadata?.CompilationId ?? "none");
}
results.Add(new RuntimeEvaluationResponse(
request.PackId,
request.Version,
@@ -448,7 +486,15 @@ internal sealed class PolicyRuntimeEvaluationService
vexStatements = request.Vex.Statements.Select(s => $"{s.Status}:{s.Justification}").OrderBy(s => s).ToArray(),
sbomTags = request.Sbom.Tags.OrderBy(t => t).ToArray(),
exceptionCount = request.Exceptions.Instances.Length,
reachability = request.Reachability.State,
reachability = new
{
state = request.Reachability.State,
confidence = request.Reachability.Confidence,
score = request.Reachability.Score,
hasRuntimeEvidence = request.Reachability.HasRuntimeEvidence,
source = request.Reachability.Source,
method = request.Reachability.Method
},
};
var json = JsonSerializer.Serialize(contextData, ContextSerializerOptions);
@@ -470,5 +516,98 @@ internal sealed class PolicyRuntimeEvaluationService
var elapsed = _timeProvider.GetElapsedTime(startTimestamp);
return (long)elapsed.TotalMilliseconds;
}
private async Task<RuntimeEvaluationRequest> EnrichReachabilityAsync(
RuntimeEvaluationRequest request,
CancellationToken cancellationToken)
{
if (_reachabilityFacts is null || !request.Reachability.IsUnknown)
{
return request;
}
var fact = await _reachabilityFacts
.GetFactAsync(request.TenantId, request.SubjectPurl, request.AdvisoryId, cancellationToken)
.ConfigureAwait(false);
if (fact is null)
{
return request;
}
var reachability = new PolicyEvaluationReachability(
State: fact.State.ToString().ToLowerInvariant(),
Confidence: fact.Confidence,
Score: fact.Score,
HasRuntimeEvidence: fact.HasRuntimeEvidence,
Source: fact.Source,
Method: fact.Method.ToString().ToLowerInvariant(),
EvidenceRef: fact.EvidenceRef ?? fact.EvidenceHash);
ReachabilityFacts.ReachabilityFactsTelemetry.RecordFactApplied(reachability.State);
return request with { Reachability = reachability };
}
private async Task<IReadOnlyList<RuntimeEvaluationRequest>> EnrichReachabilityBatchAsync(
IReadOnlyList<RuntimeEvaluationRequest> requests,
CancellationToken cancellationToken)
{
if (_reachabilityFacts is null)
{
return requests;
}
var enriched = new List<RuntimeEvaluationRequest>(requests.Count);
foreach (var tenantGroup in requests.GroupBy(r => r.TenantId, StringComparer.Ordinal))
{
var pending = tenantGroup
.Where(r => r.Reachability.IsUnknown)
.Select(r => new ReachabilityFacts.ReachabilityFactsRequest(r.SubjectPurl, r.AdvisoryId))
.Distinct()
.ToList();
ReachabilityFacts.ReachabilityFactsBatch? batch = null;
if (pending.Count > 0)
{
batch = await _reachabilityFacts
.GetFactsBatchAsync(tenantGroup.Key, pending, cancellationToken)
.ConfigureAwait(false);
}
var lookup = batch?.Found ?? new Dictionary<ReachabilityFacts.ReachabilityFactKey, ReachabilityFacts.ReachabilityFact>();
foreach (var request in tenantGroup)
{
if (!request.Reachability.IsUnknown)
{
enriched.Add(request);
continue;
}
var key = new ReachabilityFacts.ReachabilityFactKey(request.TenantId, request.SubjectPurl, request.AdvisoryId);
if (lookup.TryGetValue(key, out var fact))
{
var reachability = new PolicyEvaluationReachability(
State: fact.State.ToString().ToLowerInvariant(),
Confidence: fact.Confidence,
Score: fact.Score,
HasRuntimeEvidence: fact.HasRuntimeEvidence,
Source: fact.Source,
Method: fact.Method.ToString().ToLowerInvariant(),
EvidenceRef: fact.EvidenceRef ?? fact.EvidenceHash);
ReachabilityFacts.ReachabilityFactsTelemetry.RecordFactApplied(reachability.State);
enriched.Add(request with { Reachability = reachability });
}
else
{
enriched.Add(request);
}
}
}
return enriched;
}
}

View File

@@ -0,0 +1,351 @@
using System.Collections.Immutable;
using System.Collections.Concurrent;
using System.Linq;
using StellaOps.Policy.Engine.Storage.Mongo.Documents;
using StellaOps.Policy.Engine.Storage.Mongo.Repositories;
namespace StellaOps.Policy.Engine.Storage.InMemory;
/// <summary>
/// In-memory implementation of IExceptionRepository for offline/test runs.
/// Provides minimal semantics needed for lifecycle processing.
/// </summary>
public sealed class InMemoryExceptionRepository : IExceptionRepository
{
private readonly ConcurrentDictionary<(string Tenant, string Id), PolicyExceptionDocument> _exceptions = new();
private readonly ConcurrentDictionary<(string Tenant, string Id), ExceptionBindingDocument> _bindings = new();
public Task<PolicyExceptionDocument> CreateExceptionAsync(PolicyExceptionDocument exception, CancellationToken cancellationToken)
{
_exceptions[(exception.TenantId.ToLowerInvariant(), exception.Id)] = Clone(exception);
return Task.FromResult(exception);
}
public Task<PolicyExceptionDocument?> GetExceptionAsync(string tenantId, string exceptionId, CancellationToken cancellationToken)
{
_exceptions.TryGetValue((tenantId.ToLowerInvariant(), exceptionId), out var value);
return Task.FromResult(value is null ? null : Clone(value));
}
public Task<PolicyExceptionDocument?> UpdateExceptionAsync(PolicyExceptionDocument exception, CancellationToken cancellationToken)
{
_exceptions[(exception.TenantId.ToLowerInvariant(), exception.Id)] = Clone(exception);
return Task.FromResult<PolicyExceptionDocument?>(exception);
}
public Task<ImmutableArray<PolicyExceptionDocument>> ListExceptionsAsync(ExceptionQueryOptions options, CancellationToken cancellationToken)
{
var query = _exceptions.Values.AsEnumerable();
if (options.Statuses.Any())
{
query = query.Where(e => options.Statuses.Contains(e.Status, StringComparer.OrdinalIgnoreCase));
}
if (options.Types.Any())
{
query = query.Where(e => options.Types.Contains(e.ExceptionType, StringComparer.OrdinalIgnoreCase));
}
return Task.FromResult(query.Select(Clone).ToImmutableArray());
}
public Task<ImmutableArray<PolicyExceptionDocument>> ListExceptionsAsync(string tenantId, ExceptionQueryOptions options, CancellationToken cancellationToken)
{
var tenant = tenantId.ToLowerInvariant();
var scoped = _exceptions.Values.Where(e => e.TenantId.Equals(tenant, StringComparison.OrdinalIgnoreCase)).ToList();
var result = scoped.AsEnumerable();
if (options.Statuses.Any())
{
result = result.Where(e => options.Statuses.Contains(e.Status, StringComparer.OrdinalIgnoreCase));
}
if (options.Types.Any())
{
result = result.Where(e => options.Types.Contains(e.ExceptionType, StringComparer.OrdinalIgnoreCase));
}
return Task.FromResult(result.Select(Clone).ToImmutableArray());
}
public Task<ImmutableArray<PolicyExceptionDocument>> FindApplicableExceptionsAsync(string tenantId, ExceptionQueryOptions options, CancellationToken cancellationToken)
{
var tenant = tenantId.ToLowerInvariant();
var results = _exceptions.Values
.Where(e => e.TenantId.Equals(tenant, StringComparison.OrdinalIgnoreCase))
.Where(e => e.Status.Equals("active", StringComparison.OrdinalIgnoreCase))
.Select(Clone)
.ToImmutableArray();
return Task.FromResult(results);
}
public Task<bool> UpdateExceptionStatusAsync(string tenantId, string exceptionId, string newStatus, DateTimeOffset timestamp, CancellationToken cancellationToken)
{
var key = (tenantId.ToLowerInvariant(), exceptionId);
if (!_exceptions.TryGetValue(key, out var existing))
{
return Task.FromResult(false);
}
var updated = Clone(existing);
updated.Status = newStatus;
updated.UpdatedAt = timestamp;
if (newStatus == "active")
{
updated.ActivatedAt = timestamp;
}
if (newStatus == "expired")
{
updated.RevokedAt = timestamp;
}
_exceptions[key] = updated;
return Task.FromResult(true);
}
public Task<bool> RevokeExceptionAsync(string tenantId, string exceptionId, string revokedBy, string? reason, DateTimeOffset timestamp, CancellationToken cancellationToken)
{
return UpdateExceptionStatusAsync(tenantId, exceptionId, "revoked", timestamp, cancellationToken);
}
public Task<ImmutableArray<PolicyExceptionDocument>> GetExpiringExceptionsAsync(string tenantId, DateTimeOffset from, DateTimeOffset to, CancellationToken cancellationToken)
{
var tenant = tenantId.ToLowerInvariant();
var results = _exceptions.Values
.Where(e => e.TenantId.Equals(tenant, StringComparison.OrdinalIgnoreCase))
.Where(e => e.Status.Equals("active", StringComparison.OrdinalIgnoreCase))
.Where(e => e.ExpiresAt is not null && e.ExpiresAt >= from && e.ExpiresAt <= to)
.Select(Clone)
.ToImmutableArray();
return Task.FromResult(results);
}
public Task<ImmutableArray<PolicyExceptionDocument>> GetPendingActivationsAsync(string tenantId, DateTimeOffset asOf, CancellationToken cancellationToken)
{
var tenant = tenantId.ToLowerInvariant();
var results = _exceptions.Values
.Where(e => e.TenantId.Equals(tenant, StringComparison.OrdinalIgnoreCase))
.Where(e => e.Status.Equals("approved", StringComparison.OrdinalIgnoreCase))
.Where(e => e.EffectiveFrom is null || e.EffectiveFrom <= asOf)
.Select(Clone)
.ToImmutableArray();
return Task.FromResult(results);
}
public Task<ExceptionReviewDocument> CreateReviewAsync(ExceptionReviewDocument review, CancellationToken cancellationToken)
{
return Task.FromResult(review);
}
public Task<ExceptionReviewDocument?> GetReviewAsync(string tenantId, string reviewId, CancellationToken cancellationToken)
{
return Task.FromResult<ExceptionReviewDocument?>(null);
}
public Task<ExceptionReviewDocument?> AddReviewDecisionAsync(string tenantId, string reviewId, ReviewDecisionDocument decision, CancellationToken cancellationToken)
{
return Task.FromResult<ExceptionReviewDocument?>(null);
}
public Task<ExceptionReviewDocument?> CompleteReviewAsync(string tenantId, string reviewId, string finalStatus, DateTimeOffset completedAt, CancellationToken cancellationToken)
{
return Task.FromResult<ExceptionReviewDocument?>(null);
}
public Task<ImmutableArray<ExceptionReviewDocument>> GetReviewsForExceptionAsync(string tenantId, string exceptionId, CancellationToken cancellationToken)
{
return Task.FromResult(ImmutableArray<ExceptionReviewDocument>.Empty);
}
public Task<ImmutableArray<ExceptionReviewDocument>> GetPendingReviewsAsync(string tenantId, string? reviewerId, CancellationToken cancellationToken)
{
return Task.FromResult(ImmutableArray<ExceptionReviewDocument>.Empty);
}
public Task<ExceptionBindingDocument> UpsertBindingAsync(ExceptionBindingDocument binding, CancellationToken cancellationToken)
{
_bindings[(binding.TenantId.ToLowerInvariant(), binding.Id)] = Clone(binding);
return Task.FromResult(binding);
}
public Task<ImmutableArray<ExceptionBindingDocument>> GetBindingsForExceptionAsync(string tenantId, string exceptionId, CancellationToken cancellationToken)
{
var tenant = tenantId.ToLowerInvariant();
var results = _bindings.Values
.Where(b => b.TenantId.Equals(tenant, StringComparison.OrdinalIgnoreCase) && b.ExceptionId == exceptionId)
.Select(Clone)
.ToImmutableArray();
return Task.FromResult(results);
}
public Task<ImmutableArray<ExceptionBindingDocument>> GetActiveBindingsForAssetAsync(string tenantId, string assetId, DateTimeOffset asOf, CancellationToken cancellationToken)
{
var tenant = tenantId.ToLowerInvariant();
var results = _bindings.Values
.Where(b => b.TenantId.Equals(tenant, StringComparison.OrdinalIgnoreCase))
.Where(b => b.AssetId == assetId)
.Where(b => b.Status == "active")
.Where(b => b.EffectiveFrom <= asOf && (b.ExpiresAt is null || b.ExpiresAt > asOf))
.Select(Clone)
.ToImmutableArray();
return Task.FromResult(results);
}
public Task<long> DeleteBindingsForExceptionAsync(string tenantId, string exceptionId, CancellationToken cancellationToken)
{
var tenant = tenantId.ToLowerInvariant();
var removed = _bindings.Where(kvp => kvp.Key.Tenant == tenant && kvp.Value.ExceptionId == exceptionId).ToList();
foreach (var kvp in removed)
{
_bindings.TryRemove(kvp.Key, out _);
}
return Task.FromResult((long)removed.Count);
}
public Task<ImmutableArray<ExceptionBindingDocument>> GetExpiredBindingsAsync(string tenantId, DateTimeOffset asOf, CancellationToken cancellationToken)
{
var tenant = tenantId.ToLowerInvariant();
var results = _bindings.Values
.Where(b => b.TenantId.Equals(tenant, StringComparison.OrdinalIgnoreCase))
.Where(b => b.Status == "active")
.Where(b => b.ExpiresAt is not null && b.ExpiresAt < asOf)
.Select(Clone)
.ToImmutableArray();
return Task.FromResult(results);
}
public Task<IReadOnlyDictionary<string, int>> GetExceptionCountsByStatusAsync(string tenantId, CancellationToken cancellationToken)
{
var tenant = tenantId.ToLowerInvariant();
var counts = _exceptions.Values
.Where(e => e.TenantId.Equals(tenant, StringComparison.OrdinalIgnoreCase))
.GroupBy(e => e.Status)
.ToDictionary(g => g.Key, g => g.Count(), StringComparer.OrdinalIgnoreCase);
return Task.FromResult((IReadOnlyDictionary<string, int>)counts);
}
public Task<ImmutableArray<ExceptionBindingDocument>> GetExpiredBindingsAsync(string tenantId, DateTimeOffset asOf, int limit, CancellationToken cancellationToken)
{
var tenant = tenantId.ToLowerInvariant();
var results = _bindings.Values
.Where(b => string.Equals(b.TenantId, tenant, StringComparison.OrdinalIgnoreCase))
.Where(b => b.Status == "active")
.Where(b => b.ExpiresAt is not null && b.ExpiresAt < asOf)
.Take(limit)
.Select(Clone)
.ToImmutableArray();
return Task.FromResult(results);
}
public Task<bool> UpdateBindingStatusAsync(string tenantId, string bindingId, string newStatus, CancellationToken cancellationToken)
{
var key = _bindings.Keys.FirstOrDefault(k => string.Equals(k.Tenant, tenantId, StringComparison.OrdinalIgnoreCase) && k.Id == bindingId);
if (key == default)
{
return Task.FromResult(false);
}
if (_bindings.TryGetValue(key, out var binding))
{
var updated = Clone(binding);
updated.Status = newStatus;
_bindings[key] = updated;
return Task.FromResult(true);
}
return Task.FromResult(false);
}
public Task<ImmutableArray<PolicyExceptionDocument>> FindApplicableExceptionsAsync(string tenantId, string assetId, string? advisoryId, DateTimeOffset asOf, CancellationToken cancellationToken)
{
var tenant = tenantId.ToLowerInvariant();
var activeExceptions = _exceptions.Values
.Where(e => string.Equals(e.TenantId, tenant, StringComparison.OrdinalIgnoreCase))
.Where(e => e.Status.Equals("active", StringComparison.OrdinalIgnoreCase))
.Where(e => (e.EffectiveFrom is null || e.EffectiveFrom <= asOf) && (e.ExpiresAt is null || e.ExpiresAt > asOf))
.ToDictionary(e => e.Id, Clone);
if (activeExceptions.Count == 0)
{
return Task.FromResult(ImmutableArray<PolicyExceptionDocument>.Empty);
}
var matchingIds = _bindings.Values
.Where(b => string.Equals(b.TenantId, tenant, StringComparison.OrdinalIgnoreCase))
.Where(b => b.Status == "active")
.Where(b => b.EffectiveFrom <= asOf && (b.ExpiresAt is null || b.ExpiresAt > asOf))
.Where(b => b.AssetId == assetId)
.Where(b => advisoryId is null || string.IsNullOrEmpty(b.AdvisoryId) || b.AdvisoryId == advisoryId)
.Select(b => b.ExceptionId)
.ToHashSet(StringComparer.OrdinalIgnoreCase);
foreach (var ex in activeExceptions.Values)
{
if (ex.Scope.ApplyToAll)
{
matchingIds.Add(ex.Id);
}
else if (ex.Scope.AssetIds.Contains(assetId, StringComparer.OrdinalIgnoreCase))
{
matchingIds.Add(ex.Id);
}
else if (advisoryId is not null && ex.Scope.AdvisoryIds.Contains(advisoryId, StringComparer.OrdinalIgnoreCase))
{
matchingIds.Add(ex.Id);
}
}
var result = matchingIds
.Where(activeExceptions.ContainsKey)
.Select(id => activeExceptions[id])
.ToImmutableArray();
return Task.FromResult(result);
}
private static PolicyExceptionDocument Clone(PolicyExceptionDocument source)
{
return new PolicyExceptionDocument
{
Id = source.Id,
TenantId = source.TenantId,
Name = source.Name,
ExceptionType = source.ExceptionType,
Status = source.Status,
EffectiveFrom = source.EffectiveFrom,
ExpiresAt = source.ExpiresAt,
CreatedAt = source.CreatedAt,
UpdatedAt = source.UpdatedAt,
ActivatedAt = source.ActivatedAt,
RevokedAt = source.RevokedAt,
RevokedBy = source.RevokedBy,
RevocationReason = source.RevocationReason,
Scope = source.Scope,
RiskAssessment = source.RiskAssessment,
Tags = source.Tags,
};
}
private static ExceptionBindingDocument Clone(ExceptionBindingDocument source)
{
return new ExceptionBindingDocument
{
Id = source.Id,
TenantId = source.TenantId,
ExceptionId = source.ExceptionId,
AssetId = source.AssetId,
AdvisoryId = source.AdvisoryId,
Status = source.Status,
EffectiveFrom = source.EffectiveFrom,
ExpiresAt = source.ExpiresAt,
};
}
}

View File

@@ -33,13 +33,20 @@ internal interface IExceptionRepository
CancellationToken cancellationToken);
/// <summary>
/// Lists exceptions with filtering and pagination.
/// Lists exceptions for a tenant with filtering and pagination.
/// </summary>
Task<ImmutableArray<PolicyExceptionDocument>> ListExceptionsAsync(
string tenantId,
ExceptionQueryOptions options,
CancellationToken cancellationToken);
/// <summary>
/// Lists exceptions across all tenants with filtering and pagination.
/// </summary>
Task<ImmutableArray<PolicyExceptionDocument>> ListExceptionsAsync(
ExceptionQueryOptions options,
CancellationToken cancellationToken);
/// <summary>
/// Finds active exceptions that apply to a specific asset/advisory.
/// </summary>

View File

@@ -100,12 +100,50 @@ internal sealed class MongoExceptionRepository : IExceptionRepository
string tenantId,
ExceptionQueryOptions options,
CancellationToken cancellationToken)
{
var filter = BuildFilter(options, tenantId.ToLowerInvariant());
var sort = BuildSort(options);
var results = await Exceptions
.Find(filter)
.Sort(sort)
.Skip(options.Skip)
.Limit(options.Limit)
.ToListAsync(cancellationToken)
.ConfigureAwait(false);
return results.ToImmutableArray();
}
public async Task<ImmutableArray<PolicyExceptionDocument>> ListExceptionsAsync(
ExceptionQueryOptions options,
CancellationToken cancellationToken)
{
var filter = BuildFilter(options, tenantId: null);
var sort = BuildSort(options);
var results = await Exceptions
.Find(filter)
.Sort(sort)
.Skip(options.Skip)
.Limit(options.Limit)
.ToListAsync(cancellationToken)
.ConfigureAwait(false);
return results.ToImmutableArray();
}
private static FilterDefinition<PolicyExceptionDocument> BuildFilter(
ExceptionQueryOptions options,
string? tenantId)
{
var filterBuilder = Builders<PolicyExceptionDocument>.Filter;
var filters = new List<FilterDefinition<PolicyExceptionDocument>>
var filters = new List<FilterDefinition<PolicyExceptionDocument>>();
if (!string.IsNullOrWhiteSpace(tenantId))
{
filterBuilder.Eq(e => e.TenantId, tenantId.ToLowerInvariant())
};
filters.Add(filterBuilder.Eq(e => e.TenantId, tenantId));
}
if (options.Statuses.Length > 0)
{
@@ -135,21 +173,19 @@ internal sealed class MongoExceptionRepository : IExceptionRepository
filterBuilder.Gt(e => e.ExpiresAt, now)));
}
var filter = filterBuilder.And(filters);
if (filters.Count == 0)
{
return FilterDefinition<PolicyExceptionDocument>.Empty;
}
var sort = options.SortDirection.Equals("asc", StringComparison.OrdinalIgnoreCase)
return filterBuilder.And(filters);
}
private static SortDefinition<PolicyExceptionDocument> BuildSort(ExceptionQueryOptions options)
{
return options.SortDirection.Equals("asc", StringComparison.OrdinalIgnoreCase)
? Builders<PolicyExceptionDocument>.Sort.Ascending(options.SortBy)
: Builders<PolicyExceptionDocument>.Sort.Descending(options.SortBy);
var results = await Exceptions
.Find(filter)
.Sort(sort)
.Skip(options.Skip)
.Limit(options.Limit)
.ToListAsync(cancellationToken)
.ConfigureAwait(false);
return results.ToImmutableArray();
}
public async Task<ImmutableArray<PolicyExceptionDocument>> FindApplicableExceptionsAsync(

View File

@@ -363,12 +363,33 @@ public static class PolicyEngineTelemetry
/// </summary>
public static Counter<long> ExceptionOperations => ExceptionOperationsCounter;
// Counter: policy_exception_cache_operations_total{tenant,operation}
private static readonly Counter<long> ExceptionCacheOperationsCounter =
Meter.CreateCounter<long>(
"policy_exception_cache_operations_total",
unit: "operations",
description: "Total exception cache operations (hit, miss, set, warm, invalidate).");
// Counter: policy_exception_cache_operations_total{tenant,operation}
private static readonly Counter<long> ExceptionCacheOperationsCounter =
Meter.CreateCounter<long>(
"policy_exception_cache_operations_total",
unit: "operations",
description: "Total exception cache operations (hit, miss, set, warm, invalidate).");
// Counter: policy_exception_applications_total{tenant,effect}
private static readonly Counter<long> ExceptionApplicationsCounter =
Meter.CreateCounter<long>(
"policy_exception_applications_total",
unit: "applications",
description: "Total applied exceptions during evaluation by effect type.");
// Histogram: policy_exception_application_latency_seconds{tenant,effect}
private static readonly Histogram<double> ExceptionApplicationLatencyHistogram =
Meter.CreateHistogram<double>(
"policy_exception_application_latency_seconds",
unit: "s",
description: "Latency impact of exception application during evaluation.");
// Counter: policy_exception_lifecycle_total{tenant,event}
private static readonly Counter<long> ExceptionLifecycleCounter =
Meter.CreateCounter<long>(
"policy_exception_lifecycle_total",
unit: "events",
description: "Lifecycle events for exceptions (activated, expired, revoked).");
/// <summary>
/// Counter for exception cache operations.
@@ -611,16 +632,58 @@ public static class PolicyEngineTelemetry
/// </summary>
/// <param name="tenant">Tenant identifier.</param>
/// <param name="operation">Operation type (hit, miss, set, warm, invalidate_*, event_*).</param>
public static void RecordExceptionCacheOperation(string tenant, string operation)
{
var tags = new TagList
{
{ "tenant", NormalizeTenant(tenant) },
{ "operation", NormalizeTag(operation) },
};
ExceptionCacheOperationsCounter.Add(1, tags);
}
public static void RecordExceptionCacheOperation(string tenant, string operation)
{
var tags = new TagList
{
{ "tenant", NormalizeTenant(tenant) },
{ "operation", NormalizeTag(operation) },
};
ExceptionCacheOperationsCounter.Add(1, tags);
}
/// <summary>
/// Records that an exception was applied during evaluation.
/// </summary>
public static void RecordExceptionApplication(string tenant, string effectType)
{
var tags = new TagList
{
{ "tenant", NormalizeTenant(tenant) },
{ "effect", NormalizeTag(effectType) },
};
ExceptionApplicationsCounter.Add(1, tags);
}
/// <summary>
/// Records latency attributed to exception application during evaluation.
/// </summary>
public static void RecordExceptionApplicationLatency(double seconds, string tenant, string effectType)
{
var tags = new TagList
{
{ "tenant", NormalizeTenant(tenant) },
{ "effect", NormalizeTag(effectType) },
};
ExceptionApplicationLatencyHistogram.Record(seconds, tags);
}
/// <summary>
/// Records an exception lifecycle event (activated, expired, revoked).
/// </summary>
public static void RecordExceptionLifecycle(string tenant, string eventType)
{
var tags = new TagList
{
{ "tenant", NormalizeTenant(tenant) },
{ "event", NormalizeTag(eventType) },
};
ExceptionLifecycleCounter.Add(1, tags);
}
#region Golden Signals - Recording Methods

View File

@@ -0,0 +1,127 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using System.Collections.Immutable;
using System.Linq;
using StellaOps.Policy.Engine.ExceptionCache;
using StellaOps.Policy.Engine.Events;
using StellaOps.Policy.Engine.Options;
using StellaOps.Policy.Engine.Storage.Mongo.Repositories;
using StellaOps.Policy.Engine.Telemetry;
namespace StellaOps.Policy.Engine.Workers;
/// <summary>
/// Executes activation/expiry flows for exceptions and emits lifecycle events.
/// Split from the hosted worker for testability.
/// </summary>
internal sealed class ExceptionLifecycleService
{
private readonly IExceptionRepository _repository;
private readonly IExceptionEventPublisher _publisher;
private readonly IOptions<PolicyEngineOptions> _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<ExceptionLifecycleService> _logger;
public ExceptionLifecycleService(
IExceptionRepository repository,
IExceptionEventPublisher publisher,
IOptions<PolicyEngineOptions> options,
TimeProvider timeProvider,
ILogger<ExceptionLifecycleService> logger)
{
_repository = repository ?? throw new ArgumentNullException(nameof(repository));
_publisher = publisher ?? throw new ArgumentNullException(nameof(publisher));
_options = options ?? throw new ArgumentNullException(nameof(options));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task ProcessOnceAsync(CancellationToken cancellationToken)
{
var now = _timeProvider.GetUtcNow();
var lifecycle = _options.Value.ExceptionLifecycle;
var pendingActivations = await _repository
.ListExceptionsAsync(new ExceptionQueryOptions
{
Statuses = ImmutableArray.Create("approved"),
}, cancellationToken)
.ConfigureAwait(false);
pendingActivations = pendingActivations
.Where(ex => ex.EffectiveFrom is null || ex.EffectiveFrom <= now)
.Take(lifecycle.MaxBatchSize)
.ToImmutableArray();
foreach (var ex in pendingActivations)
{
var activated = await _repository.UpdateExceptionStatusAsync(
ex.TenantId, ex.Id, "active", now, cancellationToken).ConfigureAwait(false);
if (!activated)
{
continue;
}
PolicyEngineTelemetry.RecordExceptionLifecycle(ex.TenantId, "activated");
await _publisher.PublishAsync(new ExceptionEvent
{
EventType = "activated",
TenantId = ex.TenantId,
ExceptionId = ex.Id,
ExceptionName = ex.Name,
ExceptionType = ex.ExceptionType,
OccurredAt = now,
}, cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Activated exception {ExceptionId} for tenant {TenantId} (effective from {EffectiveFrom:o})",
ex.Id,
ex.TenantId,
ex.EffectiveFrom);
}
var expiryWindowStart = now - lifecycle.ExpiryLookback;
var expiryWindowEnd = now + lifecycle.ExpiryHorizon;
var expiring = await _repository
.ListExceptionsAsync(new ExceptionQueryOptions
{
Statuses = ImmutableArray.Create("active"),
}, cancellationToken)
.ConfigureAwait(false);
expiring = expiring
.Where(ex => ex.ExpiresAt is not null && ex.ExpiresAt >= expiryWindowStart && ex.ExpiresAt <= expiryWindowEnd)
.Take(lifecycle.MaxBatchSize)
.ToImmutableArray();
foreach (var ex in expiring)
{
var expired = await _repository.UpdateExceptionStatusAsync(
ex.TenantId, ex.Id, "expired", now, cancellationToken).ConfigureAwait(false);
if (!expired)
{
continue;
}
PolicyEngineTelemetry.RecordExceptionLifecycle(ex.TenantId, "expired");
await _publisher.PublishAsync(new ExceptionEvent
{
EventType = "expired",
TenantId = ex.TenantId,
ExceptionId = ex.Id,
ExceptionName = ex.Name,
ExceptionType = ex.ExceptionType,
OccurredAt = now,
}, cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Expired exception {ExceptionId} for tenant {TenantId} at {ExpiresAt:o}",
ex.Id,
ex.TenantId,
ex.ExpiresAt);
}
}
}

View File

@@ -0,0 +1,51 @@
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using StellaOps.Policy.Engine.Options;
namespace StellaOps.Policy.Engine.Workers;
/// <summary>
/// Hosted service that periodically runs exception activation/expiry checks.
/// </summary>
internal sealed class ExceptionLifecycleWorker : BackgroundService
{
private readonly ExceptionLifecycleService _service;
private readonly PolicyEngineExceptionLifecycleOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<ExceptionLifecycleWorker> _logger;
public ExceptionLifecycleWorker(
ExceptionLifecycleService service,
PolicyEngineExceptionLifecycleOptions options,
TimeProvider timeProvider,
ILogger<ExceptionLifecycleWorker> logger)
{
_service = service ?? throw new ArgumentNullException(nameof(service));
_options = options ?? throw new ArgumentNullException(nameof(options));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
_logger.LogInformation("Starting exception lifecycle worker (interval {Interval}s)", _options.PollIntervalSeconds);
while (!stoppingToken.IsCancellationRequested)
{
try
{
await _service.ProcessOnceAsync(stoppingToken).ConfigureAwait(false);
}
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
{
break;
}
catch (Exception ex)
{
_logger.LogError(ex, "Exception lifecycle worker iteration failed");
}
await Task.Delay(_options.PollInterval, stoppingToken).ConfigureAwait(false);
}
}
}