up
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-11-28 09:40:40 +02:00
parent 1c6730a1d2
commit 05da719048
206 changed files with 34741 additions and 1751 deletions

View File

@@ -79,6 +79,7 @@ internal sealed class PolicyBundleService
Size: payload.Length,
CreatedAt: createdAt,
Payload: payload.ToImmutableArray(),
CompiledDocument: compileResult.Document,
AocMetadata: aocMetadata);
await _repository.StoreBundleAsync(packId, version, record, cancellationToken).ConfigureAwait(false);

View File

@@ -1,9 +1,12 @@
using System;
using System.Collections.Immutable;
using System.Diagnostics;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Policy;
using StellaOps.Policy.Engine.Compilation;
using StellaOps.Policy.Engine.Options;
using StellaOps.Policy.Engine.Telemetry;
using StellaOps.PolicyDsl;
using DslCompiler = StellaOps.PolicyDsl.PolicyCompiler;
using DslCompilationResult = StellaOps.PolicyDsl.PolicyCompilationResult;
@@ -27,19 +30,25 @@ internal sealed class PolicyCompilationService
{
private readonly DslCompiler compiler;
private readonly PolicyComplexityAnalyzer complexityAnalyzer;
private readonly PolicyMetadataExtractor metadataExtractor;
private readonly IOptionsMonitor<PolicyEngineOptions> optionsMonitor;
private readonly TimeProvider timeProvider;
private readonly ILogger<PolicyCompilationService> _logger;
public PolicyCompilationService(
DslCompiler compiler,
PolicyComplexityAnalyzer complexityAnalyzer,
PolicyMetadataExtractor metadataExtractor,
IOptionsMonitor<PolicyEngineOptions> optionsMonitor,
TimeProvider timeProvider)
TimeProvider timeProvider,
ILogger<PolicyCompilationService>? logger = null)
{
this.compiler = compiler ?? throw new ArgumentNullException(nameof(compiler));
this.complexityAnalyzer = complexityAnalyzer ?? throw new ArgumentNullException(nameof(complexityAnalyzer));
this.metadataExtractor = metadataExtractor ?? throw new ArgumentNullException(nameof(metadataExtractor));
this.optionsMonitor = optionsMonitor ?? throw new ArgumentNullException(nameof(optionsMonitor));
this.timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? Microsoft.Extensions.Logging.Abstractions.NullLogger<PolicyCompilationService>.Instance;
}
public PolicyCompilationResultDto Compile(PolicyCompileRequest request)
@@ -56,6 +65,9 @@ internal sealed class PolicyCompilationService
if (!string.Equals(request.Dsl.Syntax, "stella-dsl@1", StringComparison.Ordinal))
{
PolicyEngineTelemetry.RecordCompilation("unsupported_syntax", 0);
PolicyEngineTelemetry.RecordError("compilation");
_logger.LogWarning("Compilation rejected: unsupported syntax {Syntax}", request.Dsl.Syntax ?? "null");
return PolicyCompilationResultDto.FromFailure(
ImmutableArray.Create(PolicyIssue.Error(
DiagnosticCodes.UnsupportedSyntaxVersion,
@@ -65,13 +77,23 @@ internal sealed class PolicyCompilationService
durationMilliseconds: 0);
}
using var activity = PolicyEngineTelemetry.StartCompileActivity(policyId: null, version: request.Dsl.Syntax);
var start = timeProvider.GetTimestamp();
var result = compiler.Compile(request.Dsl.Source);
var elapsed = timeProvider.GetElapsedTime(start, timeProvider.GetTimestamp());
var durationMilliseconds = (long)Math.Ceiling(elapsed.TotalMilliseconds);
var durationSeconds = elapsed.TotalSeconds;
if (!result.Success || result.Document is null)
{
PolicyEngineTelemetry.RecordCompilation("failure", durationSeconds);
PolicyEngineTelemetry.RecordError("compilation");
activity?.SetStatus(ActivityStatusCode.Error, "Compilation failed");
_logger.LogWarning(
"Policy compilation failed in {DurationMs}ms with {DiagnosticCount} diagnostics",
durationMilliseconds,
result.Diagnostics.IsDefault ? 0 : result.Diagnostics.Length);
return PolicyCompilationResultDto.FromFailure(result.Diagnostics, null, durationMilliseconds);
}
@@ -79,6 +101,9 @@ internal sealed class PolicyCompilationService
var diagnostics = result.Diagnostics.IsDefault ? ImmutableArray<PolicyIssue>.Empty : result.Diagnostics;
var limits = optionsMonitor.CurrentValue?.Compilation ?? new PolicyEngineCompilationOptions();
activity?.SetTag("policy.rule_count", result.Document.Rules.Length);
activity?.SetTag("policy.complexity_score", complexity.Score);
if (limits.EnforceComplexity && complexity.Score > limits.MaxComplexityScore)
{
var diagnostic = PolicyIssue.Error(
@@ -86,6 +111,12 @@ internal sealed class PolicyCompilationService
$"Policy complexity score {complexity.Score:F2} exceeds configured maximum {limits.MaxComplexityScore:F2}. Reduce rule count or expression depth.",
"$.rules");
diagnostics = AppendDiagnostic(diagnostics, diagnostic);
PolicyEngineTelemetry.RecordCompilation("complexity_exceeded", durationSeconds);
PolicyEngineTelemetry.RecordError("compilation");
activity?.SetStatus(ActivityStatusCode.Error, "Complexity exceeded");
_logger.LogWarning(
"Policy compilation rejected: complexity {Score:F2} exceeds limit {MaxScore:F2}",
complexity.Score, limits.MaxComplexityScore);
return PolicyCompilationResultDto.FromFailure(diagnostics, complexity, durationMilliseconds);
}
@@ -96,10 +127,27 @@ internal sealed class PolicyCompilationService
$"Policy compilation time {durationMilliseconds} ms exceeded limit {limits.MaxDurationMilliseconds} ms.",
"$.dsl");
diagnostics = AppendDiagnostic(diagnostics, diagnostic);
PolicyEngineTelemetry.RecordCompilation("duration_exceeded", durationSeconds);
PolicyEngineTelemetry.RecordError("compilation");
activity?.SetStatus(ActivityStatusCode.Error, "Duration exceeded");
_logger.LogWarning(
"Policy compilation rejected: duration {DurationMs}ms exceeds limit {MaxDurationMs}ms",
durationMilliseconds, limits.MaxDurationMilliseconds);
return PolicyCompilationResultDto.FromFailure(diagnostics, complexity, durationMilliseconds);
}
return PolicyCompilationResultDto.FromSuccess(result, complexity, durationMilliseconds);
// Extract extended metadata (symbol table, rule index, documentation, coverage, hashes)
var metadata = metadataExtractor.Extract(result.Document, result.CanonicalRepresentation);
PolicyEngineTelemetry.RecordCompilation("success", durationSeconds);
activity?.SetStatus(ActivityStatusCode.Ok);
activity?.SetTag("policy.symbol_count", metadata.SymbolTable.Symbols.Length);
activity?.SetTag("policy.coverage_paths", metadata.CoverageMetadata.CoveragePaths.Length);
_logger.LogDebug(
"Policy compiled successfully in {DurationMs}ms: {RuleCount} rules, complexity {Score:F2}, {SymbolCount} symbols",
durationMilliseconds, result.Document.Rules.Length, complexity.Score, metadata.SymbolTable.Symbols.Length);
return PolicyCompilationResultDto.FromSuccess(result, complexity, metadata, durationMilliseconds);
}
private static ImmutableArray<PolicyIssue> AppendDiagnostic(ImmutableArray<PolicyIssue> diagnostics, PolicyIssue diagnostic)
@@ -119,17 +167,20 @@ internal sealed record PolicyCompilationResultDto(
ImmutableArray<byte> CanonicalRepresentation,
ImmutableArray<PolicyIssue> Diagnostics,
PolicyComplexityReport? Complexity,
long DurationMilliseconds)
long DurationMilliseconds,
IrDocument? Document = null,
PolicyCompileMetadata? Metadata = null)
{
public static PolicyCompilationResultDto FromFailure(
ImmutableArray<PolicyIssue> diagnostics,
PolicyComplexityReport? complexity,
long durationMilliseconds) =>
new(false, null, null, ImmutableArray<byte>.Empty, diagnostics, complexity, durationMilliseconds);
new(false, null, null, ImmutableArray<byte>.Empty, diagnostics, complexity, durationMilliseconds, null, null);
public static PolicyCompilationResultDto FromSuccess(
DslCompilationResult compilationResult,
PolicyComplexityReport complexity,
PolicyCompileMetadata metadata,
long durationMilliseconds)
{
if (compilationResult.Document is null)
@@ -145,7 +196,9 @@ internal sealed record PolicyCompilationResultDto(
compilationResult.CanonicalRepresentation,
compilationResult.Diagnostics,
complexity,
durationMilliseconds);
durationMilliseconds,
compilationResult.Document,
metadata);
}
}

View File

@@ -0,0 +1,497 @@
using System.Collections.Immutable;
using Microsoft.Extensions.Logging;
using StellaOps.Policy.Engine.Domain;
using StellaOps.Policy.Engine.Telemetry;
namespace StellaOps.Policy.Engine.Services;
/// <summary>
/// Query options for retrieving explain traces.
/// </summary>
public sealed record ExplainQueryOptions
{
/// <summary>
/// Filter by policy ID.
/// </summary>
public string? PolicyId { get; init; }
/// <summary>
/// Filter by policy version.
/// </summary>
public int? PolicyVersion { get; init; }
/// <summary>
/// Filter by run ID.
/// </summary>
public string? RunId { get; init; }
/// <summary>
/// Filter by component PURL.
/// </summary>
public string? ComponentPurl { get; init; }
/// <summary>
/// Filter by vulnerability ID.
/// </summary>
public string? VulnerabilityId { get; init; }
/// <summary>
/// Filter by final outcome.
/// </summary>
public string? FinalOutcome { get; init; }
/// <summary>
/// Filter by evaluation time range start.
/// </summary>
public DateTimeOffset? FromTime { get; init; }
/// <summary>
/// Filter by evaluation time range end.
/// </summary>
public DateTimeOffset? ToTime { get; init; }
/// <summary>
/// Maximum number of results to return.
/// </summary>
public int Limit { get; init; } = 100;
/// <summary>
/// Number of results to skip for pagination.
/// </summary>
public int Skip { get; init; } = 0;
/// <summary>
/// Include rule steps in results (can be large).
/// </summary>
public bool IncludeRuleSteps { get; init; } = true;
/// <summary>
/// Include VEX evidence in results.
/// </summary>
public bool IncludeVexEvidence { get; init; } = true;
}
/// <summary>
/// Stored explain trace with AOC chain reference.
/// </summary>
public sealed record StoredExplainTrace
{
/// <summary>
/// Unique identifier.
/// </summary>
public required string Id { get; init; }
/// <summary>
/// The explain trace data.
/// </summary>
public required ExplainTrace Trace { get; init; }
/// <summary>
/// Reference to the AOC chain for this decision.
/// </summary>
public ExplainAocChain? AocChain { get; init; }
/// <summary>
/// When this trace was stored.
/// </summary>
public required DateTimeOffset StoredAt { get; init; }
}
/// <summary>
/// AOC chain linking a decision to its attestation chain.
/// </summary>
public sealed record ExplainAocChain
{
/// <summary>
/// Compilation ID that produced the policy bundle.
/// </summary>
public required string CompilationId { get; init; }
/// <summary>
/// Compiler version used.
/// </summary>
public required string CompilerVersion { get; init; }
/// <summary>
/// Source digest of the policy document.
/// </summary>
public required string SourceDigest { get; init; }
/// <summary>
/// Artifact digest of the compiled bundle.
/// </summary>
public required string ArtifactDigest { get; init; }
/// <summary>
/// Reference to the signed attestation.
/// </summary>
public ExplainAttestationRef? AttestationRef { get; init; }
/// <summary>
/// Provenance information.
/// </summary>
public ExplainProvenance? Provenance { get; init; }
}
/// <summary>
/// Attestation reference for AOC chain.
/// </summary>
public sealed record ExplainAttestationRef(
string AttestationId,
string EnvelopeDigest,
string? Uri,
string? SigningKeyId);
/// <summary>
/// Provenance for AOC chain.
/// </summary>
public sealed record ExplainProvenance(
string SourceType,
string? SourceUrl,
string? Submitter,
string? CommitSha,
string? Branch);
/// <summary>
/// Repository interface for explain trace persistence.
/// </summary>
public interface IExplainTraceRepository
{
/// <summary>
/// Stores an explain trace.
/// </summary>
Task<StoredExplainTrace> StoreAsync(
string tenantId,
ExplainTrace trace,
ExplainAocChain? aocChain,
TimeSpan? retention,
CancellationToken cancellationToken);
/// <summary>
/// Retrieves an explain trace by ID.
/// </summary>
Task<StoredExplainTrace?> GetByIdAsync(
string tenantId,
string id,
CancellationToken cancellationToken);
/// <summary>
/// Retrieves an explain trace by run ID and subject hash.
/// </summary>
Task<StoredExplainTrace?> GetByRunAndSubjectAsync(
string tenantId,
string runId,
string subjectHash,
CancellationToken cancellationToken);
/// <summary>
/// Queries explain traces with filtering and pagination.
/// </summary>
Task<IReadOnlyList<StoredExplainTrace>> QueryAsync(
string tenantId,
ExplainQueryOptions options,
CancellationToken cancellationToken);
/// <summary>
/// Gets all explain traces for a policy run.
/// </summary>
Task<IReadOnlyList<StoredExplainTrace>> GetByRunIdAsync(
string tenantId,
string runId,
CancellationToken cancellationToken);
/// <summary>
/// Deletes explain traces older than the specified retention period.
/// </summary>
Task<int> PruneExpiredAsync(
string tenantId,
CancellationToken cancellationToken);
}
/// <summary>
/// Service for persisting and retrieving policy explain traces with AOC chain linkage.
/// </summary>
internal sealed class PolicyExplainerService
{
private readonly IExplainTraceRepository _repository;
private readonly IPolicyPackRepository _policyRepository;
private readonly ILogger<PolicyExplainerService> _logger;
private readonly TimeProvider _timeProvider;
private readonly TimeSpan _defaultRetention;
public PolicyExplainerService(
IExplainTraceRepository repository,
IPolicyPackRepository policyRepository,
ILogger<PolicyExplainerService> logger,
TimeProvider timeProvider,
TimeSpan? defaultRetention = null)
{
_repository = repository ?? throw new ArgumentNullException(nameof(repository));
_policyRepository = policyRepository ?? throw new ArgumentNullException(nameof(policyRepository));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_defaultRetention = defaultRetention ?? TimeSpan.FromDays(30);
}
/// <summary>
/// Stores an explain trace and links it to the AOC chain from the policy bundle.
/// </summary>
public async Task<StoredExplainTrace> StoreExplainTraceAsync(
string tenantId,
ExplainTrace trace,
TimeSpan? retention = null,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(tenantId);
ArgumentNullException.ThrowIfNull(trace);
_logger.LogDebug(
"Storing explain trace for run {RunId}, policy {PolicyId}:{Version}, tenant {TenantId}",
trace.RunId, trace.PolicyId, trace.PolicyVersion, tenantId);
// Try to get AOC chain from the policy bundle
ExplainAocChain? aocChain = null;
if (trace.PolicyVersion.HasValue)
{
var revision = await _policyRepository.GetRevisionAsync(
trace.PolicyId,
trace.PolicyVersion.Value,
cancellationToken).ConfigureAwait(false);
if (revision?.Bundle?.AocMetadata is not null)
{
var aoc = revision.Bundle.AocMetadata;
aocChain = new ExplainAocChain
{
CompilationId = aoc.CompilationId,
CompilerVersion = aoc.CompilerVersion,
SourceDigest = aoc.SourceDigest,
ArtifactDigest = aoc.ArtifactDigest,
AttestationRef = aoc.AttestationRef is not null
? new ExplainAttestationRef(
aoc.AttestationRef.AttestationId,
aoc.AttestationRef.EnvelopeDigest,
aoc.AttestationRef.Uri,
aoc.AttestationRef.SigningKeyId)
: null,
Provenance = aoc.Provenance is not null
? new ExplainProvenance(
aoc.Provenance.SourceType,
aoc.Provenance.SourceUrl,
aoc.Provenance.Submitter,
aoc.Provenance.CommitSha,
aoc.Provenance.Branch)
: null
};
_logger.LogDebug(
"Linked explain trace to AOC chain: compilation {CompilationId}, attestation {AttestationId}",
aocChain.CompilationId,
aocChain.AttestationRef?.AttestationId ?? "(none)");
}
}
var stored = await _repository.StoreAsync(
tenantId,
trace,
aocChain,
retention ?? _defaultRetention,
cancellationToken).ConfigureAwait(false);
PolicyEngineTelemetry.ExplainTracesStored.Add(1,
new KeyValuePair<string, object?>("tenant_id", tenantId),
new KeyValuePair<string, object?>("policy_id", trace.PolicyId));
return stored;
}
/// <summary>
/// Retrieves an explain trace by its ID.
/// </summary>
public Task<StoredExplainTrace?> GetExplainTraceAsync(
string tenantId,
string traceId,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(tenantId);
ArgumentNullException.ThrowIfNull(traceId);
return _repository.GetByIdAsync(tenantId, traceId, cancellationToken);
}
/// <summary>
/// Retrieves an explain trace for a specific decision.
/// </summary>
public Task<StoredExplainTrace?> GetExplainTraceForDecisionAsync(
string tenantId,
string runId,
string subjectHash,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(tenantId);
ArgumentNullException.ThrowIfNull(runId);
ArgumentNullException.ThrowIfNull(subjectHash);
return _repository.GetByRunAndSubjectAsync(tenantId, runId, subjectHash, cancellationToken);
}
/// <summary>
/// Gets all explain traces for a policy run.
/// </summary>
public Task<IReadOnlyList<StoredExplainTrace>> GetExplainTracesForRunAsync(
string tenantId,
string runId,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(tenantId);
ArgumentNullException.ThrowIfNull(runId);
return _repository.GetByRunIdAsync(tenantId, runId, cancellationToken);
}
/// <summary>
/// Queries explain traces with filtering and pagination.
/// </summary>
public Task<IReadOnlyList<StoredExplainTrace>> QueryExplainTracesAsync(
string tenantId,
ExplainQueryOptions options,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(tenantId);
options ??= new ExplainQueryOptions();
return _repository.QueryAsync(tenantId, options, cancellationToken);
}
/// <summary>
/// Gets the AOC chain for a stored explain trace.
/// </summary>
public async Task<ExplainAocChain?> GetAocChainForTraceAsync(
string tenantId,
string traceId,
CancellationToken cancellationToken = default)
{
var trace = await GetExplainTraceAsync(tenantId, traceId, cancellationToken).ConfigureAwait(false);
return trace?.AocChain;
}
/// <summary>
/// Validates that an explain trace's AOC chain is intact.
/// </summary>
public async Task<AocChainValidationResult> ValidateAocChainAsync(
string tenantId,
string traceId,
CancellationToken cancellationToken = default)
{
var trace = await GetExplainTraceAsync(tenantId, traceId, cancellationToken).ConfigureAwait(false);
if (trace is null)
{
return new AocChainValidationResult(
IsValid: false,
ValidationMessage: "Explain trace not found",
PolicyFound: false,
BundleIntact: false,
AttestationAvailable: false);
}
if (trace.AocChain is null)
{
return new AocChainValidationResult(
IsValid: false,
ValidationMessage: "No AOC chain linked to this trace",
PolicyFound: true,
BundleIntact: false,
AttestationAvailable: false);
}
// Verify the policy revision still exists
if (!trace.Trace.PolicyVersion.HasValue)
{
return new AocChainValidationResult(
IsValid: false,
ValidationMessage: "Trace has no policy version",
PolicyFound: false,
BundleIntact: false,
AttestationAvailable: false);
}
var revision = await _policyRepository.GetRevisionAsync(
trace.Trace.PolicyId,
trace.Trace.PolicyVersion.Value,
cancellationToken).ConfigureAwait(false);
if (revision is null)
{
return new AocChainValidationResult(
IsValid: false,
ValidationMessage: $"Policy revision {trace.Trace.PolicyId}:{trace.Trace.PolicyVersion} no longer exists",
PolicyFound: false,
BundleIntact: false,
AttestationAvailable: false);
}
// Verify bundle digest matches
var bundleIntact = revision.Bundle?.Digest == trace.AocChain.ArtifactDigest;
if (!bundleIntact)
{
return new AocChainValidationResult(
IsValid: false,
ValidationMessage: "Bundle digest mismatch - policy bundle has been modified",
PolicyFound: true,
BundleIntact: false,
AttestationAvailable: trace.AocChain.AttestationRef is not null);
}
// Verify AOC metadata matches
var aocMatches = revision.Bundle?.AocMetadata?.CompilationId == trace.AocChain.CompilationId &&
revision.Bundle?.AocMetadata?.SourceDigest == trace.AocChain.SourceDigest;
if (!aocMatches)
{
return new AocChainValidationResult(
IsValid: false,
ValidationMessage: "AOC metadata mismatch - compilation chain has been modified",
PolicyFound: true,
BundleIntact: true,
AttestationAvailable: trace.AocChain.AttestationRef is not null);
}
return new AocChainValidationResult(
IsValid: true,
ValidationMessage: "AOC chain is intact and verifiable",
PolicyFound: true,
BundleIntact: true,
AttestationAvailable: trace.AocChain.AttestationRef is not null);
}
/// <summary>
/// Prunes expired explain traces for a tenant.
/// </summary>
public async Task<int> PruneExpiredTracesAsync(
string tenantId,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(tenantId);
var pruned = await _repository.PruneExpiredAsync(tenantId, cancellationToken).ConfigureAwait(false);
if (pruned > 0)
{
_logger.LogInformation(
"Pruned {Count} expired explain traces for tenant {TenantId}",
pruned, tenantId);
}
return pruned;
}
}
/// <summary>
/// Result of AOC chain validation.
/// </summary>
public sealed record AocChainValidationResult(
bool IsValid,
string ValidationMessage,
bool PolicyFound,
bool BundleIntact,
bool AttestationAvailable);

View File

@@ -1,4 +1,5 @@
using System.Collections.Immutable;
using System.Diagnostics;
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
@@ -6,6 +7,7 @@ using Microsoft.Extensions.Logging;
using StellaOps.Policy.Engine.Caching;
using StellaOps.Policy.Engine.Domain;
using StellaOps.Policy.Engine.Evaluation;
using StellaOps.Policy.Engine.Telemetry;
using StellaOps.PolicyDsl;
namespace StellaOps.Policy.Engine.Services;
@@ -88,6 +90,12 @@ internal sealed class PolicyRuntimeEvaluationService
{
ArgumentNullException.ThrowIfNull(request);
using var activity = PolicyEngineTelemetry.StartEvaluateActivity(
request.TenantId, request.PackId, runId: null);
activity?.SetTag("policy.version", request.Version);
activity?.SetTag("subject.purl", request.SubjectPurl);
activity?.SetTag("advisory.id", request.AdvisoryId);
var startTimestamp = _timeProvider.GetTimestamp();
var evaluationTimestamp = request.EvaluationTimestamp ?? _timeProvider.GetUtcNow();
@@ -97,6 +105,9 @@ internal sealed class PolicyRuntimeEvaluationService
if (bundle is null)
{
PolicyEngineTelemetry.RecordError("evaluation", request.TenantId);
PolicyEngineTelemetry.RecordEvaluationFailure(request.TenantId, request.PackId, "bundle_not_found");
activity?.SetStatus(ActivityStatusCode.Error, "Bundle not found");
throw new InvalidOperationException(
$"Policy bundle not found for pack '{request.PackId}' version {request.Version}.");
}
@@ -113,6 +124,12 @@ internal sealed class PolicyRuntimeEvaluationService
if (cacheResult.CacheHit && cacheResult.Entry is not null)
{
var duration = GetElapsedMilliseconds(startTimestamp);
var durationSeconds = duration / 1000.0;
PolicyEngineTelemetry.RecordEvaluationLatency(durationSeconds, request.TenantId, request.PackId);
PolicyEngineTelemetry.RecordEvaluation(request.TenantId, request.PackId, "cached");
activity?.SetTag("cache.hit", true);
activity?.SetTag("cache.source", cacheResult.Source.ToString());
activity?.SetStatus(ActivityStatusCode.Ok);
_logger.LogDebug(
"Cache hit for evaluation {PackId}@{Version} subject {Subject} from {Source}",
request.PackId, request.Version, request.SubjectPurl, cacheResult.Source);
@@ -122,12 +139,17 @@ internal sealed class PolicyRuntimeEvaluationService
}
}
activity?.SetTag("cache.hit", false);
// Cache miss - perform evaluation
var document = DeserializeCompiledPolicy(bundle.Payload);
var document = bundle.CompiledDocument;
if (document is null)
{
PolicyEngineTelemetry.RecordError("evaluation", request.TenantId);
PolicyEngineTelemetry.RecordEvaluationFailure(request.TenantId, request.PackId, "document_not_found");
activity?.SetStatus(ActivityStatusCode.Error, "Document not found");
throw new InvalidOperationException(
$"Failed to deserialize compiled policy for pack '{request.PackId}' version {request.Version}.");
$"Compiled policy document not found for pack '{request.PackId}' version {request.Version}.");
}
var context = new PolicyEvaluationContext(
@@ -162,6 +184,21 @@ internal sealed class PolicyRuntimeEvaluationService
await _cache.SetAsync(cacheKey, cacheEntry, cancellationToken).ConfigureAwait(false);
var evalDuration = GetElapsedMilliseconds(startTimestamp);
var evalDurationSeconds = evalDuration / 1000.0;
// Record metrics
PolicyEngineTelemetry.RecordEvaluationLatency(evalDurationSeconds, request.TenantId, request.PackId);
PolicyEngineTelemetry.RecordEvaluation(request.TenantId, request.PackId, "full");
if (!string.IsNullOrEmpty(result.RuleName))
{
PolicyEngineTelemetry.RecordRuleFired(request.PackId, result.RuleName);
}
activity?.SetTag("evaluation.status", result.Status);
activity?.SetTag("evaluation.rule", result.RuleName ?? "none");
activity?.SetTag("evaluation.duration_ms", evalDuration);
activity?.SetStatus(ActivityStatusCode.Ok);
_logger.LogDebug(
"Evaluated {PackId}@{Version} subject {Subject} in {Duration}ms - {Status}",
request.PackId, request.Version, request.SubjectPurl, evalDuration, result.Status);
@@ -195,7 +232,13 @@ internal sealed class PolicyRuntimeEvaluationService
return Array.Empty<RuntimeEvaluationResponse>();
}
using var activity = PolicyEngineTelemetry.ActivitySource.StartActivity("policy.evaluate_batch", ActivityKind.Internal);
activity?.SetTag("batch.size", requests.Count);
var batchStartTimestamp = _timeProvider.GetTimestamp();
var results = new List<RuntimeEvaluationResponse>(requests.Count);
var cacheHits = 0;
var cacheMisses = 0;
// Group by pack/version for bundle loading efficiency
var groups = requests.GroupBy(r => (r.PackId, r.Version));
@@ -210,6 +253,7 @@ internal sealed class PolicyRuntimeEvaluationService
{
foreach (var request in group)
{
PolicyEngineTelemetry.RecordEvaluationFailure(request.TenantId, packId, "bundle_not_found");
_logger.LogWarning(
"Policy bundle not found for pack '{PackId}' version {Version}, skipping evaluation",
packId, version);
@@ -217,11 +261,12 @@ internal sealed class PolicyRuntimeEvaluationService
continue;
}
var document = DeserializeCompiledPolicy(bundle.Payload);
var document = bundle.CompiledDocument;
if (document is null)
{
PolicyEngineTelemetry.RecordEvaluationFailure("default", packId, "document_not_found");
_logger.LogWarning(
"Failed to deserialize policy bundle for pack '{PackId}' version {Version}",
"Compiled policy document not found for pack '{PackId}' version {Version}",
packId, version);
continue;
}
@@ -249,6 +294,8 @@ internal sealed class PolicyRuntimeEvaluationService
{
var response = CreateResponseFromCache(request, bundle.Digest, entry, CacheSource.InMemory, 0);
results.Add(response);
cacheHits++;
PolicyEngineTelemetry.RecordEvaluation(request.TenantId, packId, "cached");
}
else
{
@@ -294,6 +341,15 @@ internal sealed class PolicyRuntimeEvaluationService
expiresAt);
entriesToCache[key] = cacheEntry;
cacheMisses++;
// Record metrics for each evaluation
PolicyEngineTelemetry.RecordEvaluationLatency(duration / 1000.0, request.TenantId, packId);
PolicyEngineTelemetry.RecordEvaluation(request.TenantId, packId, "full");
if (!string.IsNullOrEmpty(result.RuleName))
{
PolicyEngineTelemetry.RecordRuleFired(packId, result.RuleName);
}
results.Add(new RuntimeEvaluationResponse(
request.PackId,
@@ -319,6 +375,17 @@ internal sealed class PolicyRuntimeEvaluationService
}
}
// Record batch-level metrics
var batchDuration = GetElapsedMilliseconds(batchStartTimestamp);
activity?.SetTag("batch.cache_hits", cacheHits);
activity?.SetTag("batch.cache_misses", cacheMisses);
activity?.SetTag("batch.duration_ms", batchDuration);
activity?.SetStatus(ActivityStatusCode.Ok);
_logger.LogDebug(
"Batch evaluation completed: {Total} subjects, {CacheHits} cache hits, {CacheMisses} evaluated in {Duration}ms",
requests.Count, cacheHits, cacheMisses, batchDuration);
return results;
}
@@ -398,24 +465,6 @@ internal sealed class PolicyRuntimeEvaluationService
return Convert.ToHexString(hash);
}
private static PolicyIrDocument? DeserializeCompiledPolicy(ImmutableArray<byte> payload)
{
if (payload.IsDefaultOrEmpty)
{
return null;
}
try
{
var json = Encoding.UTF8.GetString(payload.AsSpan());
return JsonSerializer.Deserialize<PolicyIrDocument>(json);
}
catch
{
return null;
}
}
private long GetElapsedMilliseconds(long startTimestamp)
{
var elapsed = _timeProvider.GetElapsedTime(startTimestamp);