using System; using System.Collections.Generic; using System.Diagnostics.Metrics; using System.Globalization; using System.Linq; using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using StellaOps.Zastava.Core.Contracts; using StellaOps.Zastava.Observer.Backend; using StellaOps.Zastava.Observer.Configuration; using StellaOps.Zastava.Observer.ContainerRuntime.Cri; using StellaOps.Zastava.Observer.Surface; namespace StellaOps.Zastava.Observer.Posture; internal sealed class RuntimePostureEvaluator : IRuntimePostureEvaluator { private static readonly Meter Meter = new("StellaOps.Zastava.Observer", "1.0.0"); private static readonly Counter ManifestFailuresCounter = Meter.CreateCounter( "zastava_surface_manifest_failures_total", description: "Count of Surface manifest fetch failures"); private readonly IRuntimePolicyClient policyClient; private readonly IRuntimePostureCache cache; private readonly IRuntimeSurfaceFsClient surfaceFsClient; private readonly IOptionsMonitor optionsMonitor; private readonly TimeProvider timeProvider; private readonly ILogger logger; public RuntimePostureEvaluator( IRuntimePolicyClient policyClient, IRuntimePostureCache cache, IRuntimeSurfaceFsClient surfaceFsClient, IOptionsMonitor optionsMonitor, TimeProvider timeProvider, ILogger logger) { this.policyClient = policyClient ?? throw new ArgumentNullException(nameof(policyClient)); this.cache = cache ?? throw new ArgumentNullException(nameof(cache)); this.surfaceFsClient = surfaceFsClient ?? throw new ArgumentNullException(nameof(surfaceFsClient)); this.optionsMonitor = optionsMonitor ?? throw new ArgumentNullException(nameof(optionsMonitor)); this.timeProvider = timeProvider ?? TimeProvider.System; this.logger = logger ?? throw new ArgumentNullException(nameof(logger)); } public async Task EvaluateAsync(CriContainerInfo container, CancellationToken cancellationToken) { ArgumentNullException.ThrowIfNull(container); var evidence = new List(); var now = timeProvider.GetUtcNow(); var cacheOptions = optionsMonitor.CurrentValue.Posture; var fallbackTtl = TimeSpan.FromSeconds(Math.Clamp(cacheOptions.FallbackTtlSeconds, 30, 86400)); var staleThreshold = TimeSpan.FromSeconds(Math.Clamp(cacheOptions.StaleWarningThresholdSeconds, 30, 86400)); var imageKey = ResolveImageKey(container); if (string.IsNullOrWhiteSpace(imageKey)) { evidence.Add(new RuntimeEvidence { Signal = "runtime.posture.skipped", Value = "no-image-ref" }); return new RuntimePostureEvaluationResult(null, evidence); } var cached = cache.Get(imageKey); if (cached is not null && !cached.IsExpired(now)) { evidence.Add(new RuntimeEvidence { Signal = "runtime.posture.cache", Value = "hit" }); return new RuntimePostureEvaluationResult(cached.Posture, evidence); } try { var request = BuildRequest(container, imageKey); var response = await policyClient.EvaluateAsync(request, cancellationToken).ConfigureAwait(false); if (!response.Results.TryGetValue(imageKey, out var imageResult)) { evidence.Add(new RuntimeEvidence { Signal = "runtime.posture.missing", Value = "policy-empty" }); return new RuntimePostureEvaluationResult(null, evidence); } var posture = MapPosture(imageResult); var expiresAt = response.ExpiresAtUtc != default ? response.ExpiresAtUtc : now.AddSeconds(response.TtlSeconds > 0 ? response.TtlSeconds : fallbackTtl.TotalSeconds); cache.Set(imageKey, posture, expiresAt, now); evidence.Add(new RuntimeEvidence { Signal = "runtime.posture.source", Value = "backend" }); evidence.Add(new RuntimeEvidence { Signal = "runtime.posture.ttl", Value = expiresAt.ToString("O", CultureInfo.InvariantCulture) }); await EnrichWithManifestAsync(imageResult.ManifestDigest, evidence, cancellationToken).ConfigureAwait(false); return new RuntimePostureEvaluationResult(posture, evidence); } catch (Exception ex) when (!cancellationToken.IsCancellationRequested) { logger.LogWarning(ex, "Runtime posture evaluation failed for image {ImageRef}.", imageKey); if (cached is not null) { var cacheSignal = cached.IsExpired(now) ? cached.IsStale(now, staleThreshold) ? "stale-warning" : "stale" : "hit"; evidence.Add(new RuntimeEvidence { Signal = "runtime.posture.cache", Value = cacheSignal }); evidence.Add(new RuntimeEvidence { Signal = "runtime.posture.error", Value = ex.GetType().Name }); return new RuntimePostureEvaluationResult(cached.Posture, evidence); } evidence.Add(new RuntimeEvidence { Signal = "runtime.posture.error", Value = ex.GetType().Name }); return new RuntimePostureEvaluationResult(null, evidence); } } private static string? ResolveImageKey(CriContainerInfo container) { if (!string.IsNullOrWhiteSpace(container.ImageRef)) { return container.ImageRef; } return string.IsNullOrWhiteSpace(container.Image) ? null : container.Image; } private static RuntimePolicyRequest BuildRequest(CriContainerInfo container, string imageKey) { var labels = container.Labels.Count == 0 ? null : new Dictionary(container.Labels, StringComparer.Ordinal); labels?.Remove(CriLabelKeys.PodUid); return new RuntimePolicyRequest { Namespace = container.Labels.TryGetValue(CriLabelKeys.PodNamespace, out var ns) ? ns : null, Labels = labels, Images = new[] { imageKey } }; } private static RuntimePosture MapPosture(RuntimePolicyImageResult result) { var posture = new RuntimePosture { ImageSigned = result.Signed, SbomReferrer = result.HasSbomReferrers ? "present" : "missing" }; if (result.Rekor is not null) { posture = posture with { Attestation = new RuntimeAttestation { Uuid = result.Rekor.Uuid, Verified = result.Rekor.Verified } }; } return posture; } private async Task EnrichWithManifestAsync(string? manifestDigest, List evidence, CancellationToken cancellationToken) { if (string.IsNullOrWhiteSpace(manifestDigest)) { return; } try { var manifest = await surfaceFsClient.TryGetManifestAsync(manifestDigest, cancellationToken).ConfigureAwait(false); if (manifest is null) { ManifestFailuresCounter.Add(1, new KeyValuePair("reason", "not_found")); evidence.Add(new RuntimeEvidence { Signal = "runtime.surface.manifest", Value = "not_found" }); logger.LogDebug("Surface manifest {ManifestDigest} not found in local cache.", manifestDigest); return; } evidence.Add(new RuntimeEvidence { Signal = "runtime.surface.manifest", Value = "resolved" }); evidence.Add(new RuntimeEvidence { Signal = "runtime.surface.manifestDigest", Value = manifestDigest }); if (!string.IsNullOrWhiteSpace(manifest.ImageDigest)) { evidence.Add(new RuntimeEvidence { Signal = "runtime.surface.imageDigest", Value = manifest.ImageDigest }); } foreach (var artifact in manifest.Artifacts) { var artifactKind = artifact.Kind; if (string.IsNullOrWhiteSpace(artifactKind)) { continue; } evidence.Add(new RuntimeEvidence { Signal = $"runtime.surface.artifact.{artifactKind}", Value = artifact.Digest }); if (artifact.Metadata is not null && artifact.Metadata.Count > 0) { foreach (var kvp in artifact.Metadata.Take(5)) { evidence.Add(new RuntimeEvidence { Signal = $"runtime.surface.artifact.{artifactKind}.{kvp.Key}", Value = kvp.Value }); } } } } catch (Exception ex) when (!cancellationToken.IsCancellationRequested) { ManifestFailuresCounter.Add(1, new KeyValuePair("reason", "fetch_error")); evidence.Add(new RuntimeEvidence { Signal = "runtime.surface.manifest", Value = "fetch_error" }); logger.LogWarning(ex, "Failed to fetch Surface manifest {ManifestDigest}.", manifestDigest); } } }