This commit is contained in:
@@ -0,0 +1,9 @@
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Backend;
|
||||
|
||||
internal interface IRuntimePolicyClient
|
||||
{
|
||||
Task<RuntimePolicyResponse> EvaluateAsync(RuntimePolicyRequest request, CancellationToken cancellationToken = default);
|
||||
}
|
||||
237
src/StellaOps.Zastava.Observer/Backend/RuntimeEventsClient.cs
Normal file
237
src/StellaOps.Zastava.Observer/Backend/RuntimeEventsClient.cs
Normal file
@@ -0,0 +1,237 @@
|
||||
using System.Linq;
|
||||
using System.Net;
|
||||
using System.Net.Http.Headers;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Zastava.Core.Configuration;
|
||||
using StellaOps.Zastava.Core.Contracts;
|
||||
using StellaOps.Zastava.Core.Diagnostics;
|
||||
using StellaOps.Zastava.Core.Security;
|
||||
using StellaOps.Zastava.Core.Serialization;
|
||||
using StellaOps.Zastava.Observer.Configuration;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Backend;
|
||||
|
||||
internal interface IRuntimeEventsClient
|
||||
{
|
||||
Task<RuntimeEventPublishResult> PublishAsync(RuntimeEventsIngestRequest request, CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
internal sealed class RuntimeEventsClient : IRuntimeEventsClient
|
||||
{
|
||||
private static readonly JsonSerializerOptions SerializerOptions = new()
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
|
||||
};
|
||||
|
||||
static RuntimeEventsClient()
|
||||
{
|
||||
SerializerOptions.Converters.Add(new JsonStringEnumConverter(JsonNamingPolicy.CamelCase, allowIntegerValues: false));
|
||||
}
|
||||
|
||||
private readonly HttpClient httpClient;
|
||||
private readonly IZastavaAuthorityTokenProvider authorityTokenProvider;
|
||||
private readonly IOptionsMonitor<ZastavaRuntimeOptions> runtimeOptions;
|
||||
private readonly IOptionsMonitor<ZastavaObserverOptions> observerOptions;
|
||||
private readonly IZastavaRuntimeMetrics runtimeMetrics;
|
||||
private readonly ILogger<RuntimeEventsClient> logger;
|
||||
|
||||
public RuntimeEventsClient(
|
||||
HttpClient httpClient,
|
||||
IZastavaAuthorityTokenProvider authorityTokenProvider,
|
||||
IOptionsMonitor<ZastavaRuntimeOptions> runtimeOptions,
|
||||
IOptionsMonitor<ZastavaObserverOptions> observerOptions,
|
||||
IZastavaRuntimeMetrics runtimeMetrics,
|
||||
ILogger<RuntimeEventsClient> logger)
|
||||
{
|
||||
this.httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
|
||||
this.authorityTokenProvider = authorityTokenProvider ?? throw new ArgumentNullException(nameof(authorityTokenProvider));
|
||||
this.runtimeOptions = runtimeOptions ?? throw new ArgumentNullException(nameof(runtimeOptions));
|
||||
this.observerOptions = observerOptions ?? throw new ArgumentNullException(nameof(observerOptions));
|
||||
this.runtimeMetrics = runtimeMetrics ?? throw new ArgumentNullException(nameof(runtimeMetrics));
|
||||
this.logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<RuntimeEventPublishResult> PublishAsync(RuntimeEventsIngestRequest request, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
if (request.Events.Count == 0)
|
||||
{
|
||||
return RuntimeEventPublishResult.Empty;
|
||||
}
|
||||
|
||||
var runtime = runtimeOptions.CurrentValue;
|
||||
var authority = runtime.Authority;
|
||||
var audience = authority.Audience.FirstOrDefault() ?? "scanner";
|
||||
var scopes = authority.Scopes ?? Array.Empty<string>();
|
||||
var token = await authorityTokenProvider.GetAsync(audience, scopes, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var backend = observerOptions.CurrentValue.Backend;
|
||||
var requestPath = backend.EventsPath;
|
||||
|
||||
using var httpRequest = new HttpRequestMessage(HttpMethod.Post, requestPath);
|
||||
var payload = ZastavaCanonicalJsonSerializer.SerializeToUtf8Bytes(request);
|
||||
httpRequest.Content = new ByteArrayContent(payload);
|
||||
httpRequest.Content.Headers.ContentType = new MediaTypeHeaderValue("application/json");
|
||||
httpRequest.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
|
||||
httpRequest.Headers.Authorization = CreateAuthorizationHeader(token);
|
||||
|
||||
var stopwatch = System.Diagnostics.Stopwatch.StartNew();
|
||||
try
|
||||
{
|
||||
using var response = await httpClient.SendAsync(httpRequest, cancellationToken).ConfigureAwait(false);
|
||||
stopwatch.Stop();
|
||||
RecordLatency(stopwatch.Elapsed.TotalMilliseconds, success: response.IsSuccessStatusCode);
|
||||
|
||||
if (response.IsSuccessStatusCode)
|
||||
{
|
||||
var body = await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false);
|
||||
RuntimeEventsIngestResponse? parsed = null;
|
||||
if (!string.IsNullOrWhiteSpace(body))
|
||||
{
|
||||
parsed = JsonSerializer.Deserialize<RuntimeEventsIngestResponse>(body, SerializerOptions);
|
||||
}
|
||||
|
||||
var accepted = parsed?.Accepted ?? request.Events.Count;
|
||||
var duplicates = parsed?.Duplicates ?? 0;
|
||||
|
||||
logger.LogDebug("Published runtime events batch (batchId={BatchId}, accepted={Accepted}, duplicates={Duplicates}).",
|
||||
request.BatchId,
|
||||
accepted,
|
||||
duplicates);
|
||||
|
||||
return RuntimeEventPublishResult.Successful(accepted, duplicates);
|
||||
}
|
||||
|
||||
if (response.StatusCode == HttpStatusCode.TooManyRequests)
|
||||
{
|
||||
var retryAfter = ParseRetryAfter(response.Headers.RetryAfter) ?? TimeSpan.FromSeconds(5);
|
||||
logger.LogWarning("Runtime events publish rate limited (batchId={BatchId}, retryAfter={RetryAfter}).", request.BatchId, retryAfter);
|
||||
return RuntimeEventPublishResult.FromRateLimit(retryAfter);
|
||||
}
|
||||
|
||||
var errorBody = await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false);
|
||||
logger.LogWarning("Runtime events publish failed with status {Status} (batchId={BatchId}): {Payload}",
|
||||
(int)response.StatusCode,
|
||||
request.BatchId,
|
||||
Truncate(errorBody));
|
||||
|
||||
throw new RuntimeEventsException($"Runtime events publish failed with status {(int)response.StatusCode}", response.StatusCode);
|
||||
}
|
||||
catch (RuntimeEventsException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
stopwatch.Stop();
|
||||
RecordLatency(stopwatch.Elapsed.TotalMilliseconds, success: false);
|
||||
logger.LogWarning(ex, "Runtime events publish encountered an exception (batchId={BatchId}).", request.BatchId);
|
||||
throw new RuntimeEventsException("Runtime events publish failed due to network error.", HttpStatusCode.ServiceUnavailable, ex);
|
||||
}
|
||||
}
|
||||
|
||||
private AuthenticationHeaderValue CreateAuthorizationHeader(ZastavaOperationalToken token)
|
||||
{
|
||||
var scheme = string.Equals(token.TokenType, "dpop", StringComparison.OrdinalIgnoreCase)
|
||||
? "DPoP"
|
||||
: token.TokenType;
|
||||
return new AuthenticationHeaderValue(scheme, token.AccessToken);
|
||||
}
|
||||
|
||||
private void RecordLatency(double elapsedMs, bool success)
|
||||
{
|
||||
var tags = runtimeMetrics.DefaultTags
|
||||
.Concat(new[]
|
||||
{
|
||||
new KeyValuePair<string, object?>("endpoint", "runtime-events"),
|
||||
new KeyValuePair<string, object?>("success", success ? "true" : "false")
|
||||
})
|
||||
.ToArray();
|
||||
runtimeMetrics.BackendLatencyMs.Record(elapsedMs, tags);
|
||||
}
|
||||
|
||||
private static TimeSpan? ParseRetryAfter(RetryConditionHeaderValue? retryAfter)
|
||||
{
|
||||
if (retryAfter is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (retryAfter.Delta.HasValue)
|
||||
{
|
||||
return retryAfter.Delta.Value;
|
||||
}
|
||||
|
||||
if (retryAfter.Date.HasValue)
|
||||
{
|
||||
var delta = retryAfter.Date.Value.UtcDateTime - DateTime.UtcNow;
|
||||
return delta > TimeSpan.Zero ? delta : TimeSpan.Zero;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string Truncate(string? value, int maxLength = 512)
|
||||
{
|
||||
if (string.IsNullOrEmpty(value))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
return value.Length <= maxLength ? value : value[..maxLength] + "…";
|
||||
}
|
||||
}
|
||||
|
||||
internal sealed record RuntimeEventsIngestRequest
|
||||
{
|
||||
[JsonPropertyName("batchId")]
|
||||
public string? BatchId { get; init; }
|
||||
|
||||
[JsonPropertyName("events")]
|
||||
public IReadOnlyList<RuntimeEventEnvelope> Events { get; init; } = Array.Empty<RuntimeEventEnvelope>();
|
||||
}
|
||||
|
||||
internal sealed record RuntimeEventsIngestResponse
|
||||
{
|
||||
[JsonPropertyName("accepted")]
|
||||
public int Accepted { get; init; }
|
||||
|
||||
[JsonPropertyName("duplicates")]
|
||||
public int Duplicates { get; init; }
|
||||
}
|
||||
|
||||
internal readonly record struct RuntimeEventPublishResult(
|
||||
bool Success,
|
||||
bool RateLimited,
|
||||
TimeSpan RetryAfter,
|
||||
int Accepted,
|
||||
int Duplicates)
|
||||
{
|
||||
public static RuntimeEventPublishResult Empty => new(true, false, TimeSpan.Zero, 0, 0);
|
||||
|
||||
public static RuntimeEventPublishResult Successful(int accepted, int duplicates)
|
||||
=> new(true, false, TimeSpan.Zero, accepted, duplicates);
|
||||
|
||||
public static RuntimeEventPublishResult FromRateLimit(TimeSpan retryAfter)
|
||||
=> new(false, true, retryAfter, 0, 0);
|
||||
}
|
||||
|
||||
internal sealed class RuntimeEventsException : Exception
|
||||
{
|
||||
public RuntimeEventsException(string message, HttpStatusCode statusCode, Exception? innerException = null)
|
||||
: base(message, innerException)
|
||||
{
|
||||
StatusCode = statusCode;
|
||||
}
|
||||
|
||||
public HttpStatusCode StatusCode { get; }
|
||||
}
|
||||
128
src/StellaOps.Zastava.Observer/Backend/RuntimePolicyClient.cs
Normal file
128
src/StellaOps.Zastava.Observer/Backend/RuntimePolicyClient.cs
Normal file
@@ -0,0 +1,128 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Net.Http.Headers;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Zastava.Core.Configuration;
|
||||
using StellaOps.Zastava.Core.Diagnostics;
|
||||
using StellaOps.Zastava.Core.Security;
|
||||
using StellaOps.Zastava.Observer.Configuration;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Backend;
|
||||
|
||||
internal sealed class RuntimePolicyClient : IRuntimePolicyClient
|
||||
{
|
||||
private static readonly JsonSerializerOptions SerializerOptions = new()
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
|
||||
};
|
||||
|
||||
static RuntimePolicyClient()
|
||||
{
|
||||
SerializerOptions.Converters.Add(new JsonStringEnumConverter(JsonNamingPolicy.CamelCase, allowIntegerValues: false));
|
||||
}
|
||||
|
||||
private readonly HttpClient httpClient;
|
||||
private readonly IZastavaAuthorityTokenProvider authorityTokenProvider;
|
||||
private readonly IOptionsMonitor<ZastavaRuntimeOptions> runtimeOptions;
|
||||
private readonly IOptionsMonitor<ZastavaObserverOptions> observerOptions;
|
||||
private readonly IZastavaRuntimeMetrics runtimeMetrics;
|
||||
private readonly ILogger<RuntimePolicyClient> logger;
|
||||
|
||||
public RuntimePolicyClient(
|
||||
HttpClient httpClient,
|
||||
IZastavaAuthorityTokenProvider authorityTokenProvider,
|
||||
IOptionsMonitor<ZastavaRuntimeOptions> runtimeOptions,
|
||||
IOptionsMonitor<ZastavaObserverOptions> observerOptions,
|
||||
IZastavaRuntimeMetrics runtimeMetrics,
|
||||
ILogger<RuntimePolicyClient> logger)
|
||||
{
|
||||
this.httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
|
||||
this.authorityTokenProvider = authorityTokenProvider ?? throw new ArgumentNullException(nameof(authorityTokenProvider));
|
||||
this.runtimeOptions = runtimeOptions ?? throw new ArgumentNullException(nameof(runtimeOptions));
|
||||
this.observerOptions = observerOptions ?? throw new ArgumentNullException(nameof(observerOptions));
|
||||
this.runtimeMetrics = runtimeMetrics ?? throw new ArgumentNullException(nameof(runtimeMetrics));
|
||||
this.logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<RuntimePolicyResponse> EvaluateAsync(RuntimePolicyRequest request, CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
var runtime = runtimeOptions.CurrentValue;
|
||||
var authority = runtime.Authority;
|
||||
var audience = authority.Audience.FirstOrDefault() ?? "scanner";
|
||||
|
||||
var token = await authorityTokenProvider
|
||||
.GetAsync(audience, authority.Scopes ?? Array.Empty<string>(), cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
var backend = observerOptions.CurrentValue.Backend;
|
||||
EnsureBackendGuardrails(backend);
|
||||
|
||||
using var httpRequest = new HttpRequestMessage(HttpMethod.Post, backend.PolicyPath)
|
||||
{
|
||||
Content = new StringContent(JsonSerializer.Serialize(request, SerializerOptions), Encoding.UTF8, "application/json")
|
||||
};
|
||||
|
||||
httpRequest.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
|
||||
httpRequest.Headers.Authorization = CreateAuthorizationHeader(token);
|
||||
|
||||
var stopwatch = Stopwatch.StartNew();
|
||||
try
|
||||
{
|
||||
using var response = await httpClient.SendAsync(httpRequest, cancellationToken).ConfigureAwait(false);
|
||||
var payload = await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
logger.LogWarning("Runtime policy call returned {StatusCode}: {Payload}", (int)response.StatusCode, payload);
|
||||
throw new RuntimePolicyException($"Runtime policy call failed with status {(int)response.StatusCode}", response.StatusCode);
|
||||
}
|
||||
|
||||
var result = JsonSerializer.Deserialize<RuntimePolicyResponse>(payload, SerializerOptions);
|
||||
if (result is null)
|
||||
{
|
||||
throw new RuntimePolicyException("Runtime policy response payload was empty or invalid.", response.StatusCode);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
finally
|
||||
{
|
||||
stopwatch.Stop();
|
||||
RecordLatency(stopwatch.Elapsed.TotalMilliseconds);
|
||||
}
|
||||
}
|
||||
|
||||
private AuthenticationHeaderValue CreateAuthorizationHeader(ZastavaOperationalToken token)
|
||||
{
|
||||
var scheme = string.Equals(token.TokenType, "dpop", StringComparison.OrdinalIgnoreCase) ? "DPoP" : token.TokenType;
|
||||
return new AuthenticationHeaderValue(scheme, token.AccessToken);
|
||||
}
|
||||
|
||||
private void RecordLatency(double elapsedMs)
|
||||
{
|
||||
var tags = runtimeMetrics.DefaultTags
|
||||
.Concat(new[] { new KeyValuePair<string, object?>("endpoint", "policy") })
|
||||
.ToArray();
|
||||
runtimeMetrics.BackendLatencyMs.Record(elapsedMs, tags);
|
||||
}
|
||||
|
||||
private static void EnsureBackendGuardrails(ZastavaObserverBackendOptions backend)
|
||||
{
|
||||
if (!backend.AllowInsecureHttp && !string.Equals(backend.BaseAddress.Scheme, Uri.UriSchemeHttps, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
throw new InvalidOperationException("Observer backend baseAddress must use HTTPS unless allowInsecureHttp is true.");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json.Serialization;
|
||||
using StellaOps.Zastava.Core.Contracts;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Backend;
|
||||
|
||||
internal sealed record RuntimePolicyRequest
|
||||
{
|
||||
[JsonPropertyName("namespace")]
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
public string? Namespace { get; init; }
|
||||
|
||||
[JsonPropertyName("labels")]
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
public IReadOnlyDictionary<string, string>? Labels { get; init; }
|
||||
|
||||
[JsonPropertyName("images")]
|
||||
public required IReadOnlyList<string> Images { get; init; }
|
||||
}
|
||||
|
||||
internal sealed record RuntimePolicyResponse
|
||||
{
|
||||
[JsonPropertyName("ttlSeconds")]
|
||||
public int TtlSeconds { get; init; }
|
||||
|
||||
[JsonPropertyName("expiresAtUtc")]
|
||||
public DateTimeOffset ExpiresAtUtc { get; init; }
|
||||
|
||||
[JsonPropertyName("policyRevision")]
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
public string? PolicyRevision { get; init; }
|
||||
|
||||
[JsonPropertyName("results")]
|
||||
public IReadOnlyDictionary<string, RuntimePolicyImageResult> Results { get; init; } = new Dictionary<string, RuntimePolicyImageResult>(StringComparer.Ordinal);
|
||||
}
|
||||
|
||||
internal sealed record RuntimePolicyImageResult
|
||||
{
|
||||
[JsonPropertyName("policyVerdict")]
|
||||
public PolicyVerdict PolicyVerdict { get; init; } = PolicyVerdict.Error;
|
||||
|
||||
[JsonPropertyName("signed")]
|
||||
public bool Signed { get; init; }
|
||||
|
||||
[JsonPropertyName("hasSbomReferrers")]
|
||||
public bool HasSbomReferrers { get; init; }
|
||||
|
||||
[JsonPropertyName("hasSbom")]
|
||||
public bool HasSbomLegacy { get; init; }
|
||||
|
||||
[JsonPropertyName("reasons")]
|
||||
public IReadOnlyList<string> Reasons { get; init; } = Array.Empty<string>();
|
||||
|
||||
[JsonPropertyName("rekor")]
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
public RuntimePolicyRekorResult? Rekor { get; init; }
|
||||
}
|
||||
|
||||
internal sealed record RuntimePolicyRekorResult
|
||||
{
|
||||
[JsonPropertyName("uuid")]
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
public string? Uuid { get; init; }
|
||||
|
||||
[JsonPropertyName("url")]
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
public string? Url { get; init; }
|
||||
|
||||
[JsonPropertyName("verified")]
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
public bool? Verified { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
using System;
|
||||
using System.Net;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Backend;
|
||||
|
||||
internal sealed class RuntimePolicyException : Exception
|
||||
{
|
||||
public RuntimePolicyException(string message, HttpStatusCode statusCode)
|
||||
: base(message)
|
||||
{
|
||||
StatusCode = statusCode;
|
||||
}
|
||||
|
||||
public RuntimePolicyException(string message, HttpStatusCode statusCode, Exception innerException)
|
||||
: base(message, innerException)
|
||||
{
|
||||
StatusCode = statusCode;
|
||||
}
|
||||
|
||||
public HttpStatusCode StatusCode { get; }
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
using System.ComponentModel.DataAnnotations;
|
||||
using System.IO;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Configuration;
|
||||
|
||||
@@ -38,6 +39,24 @@ public sealed class ZastavaObserverOptions
|
||||
[Range(1, 512)]
|
||||
public int PublishBatchSize { get; set; } = 32;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum interval (seconds) that events may remain buffered before forcing a publish.
|
||||
/// </summary>
|
||||
[Range(typeof(double), "0.1", "30")]
|
||||
public double PublishFlushIntervalSeconds { get; set; } = 2;
|
||||
|
||||
/// <summary>
|
||||
/// Directory used for disk-backed runtime event buffering.
|
||||
/// </summary>
|
||||
[Required(AllowEmptyStrings = false)]
|
||||
public string EventBufferPath { get; set; } = Path.Combine(Path.GetTempPath(), "zastava-observer", "runtime-events");
|
||||
|
||||
/// <summary>
|
||||
/// Maximum on-disk bytes retained for buffered runtime events.
|
||||
/// </summary>
|
||||
[Range(typeof(long), "1048576", "1073741824")]
|
||||
public long MaxDiskBufferBytes { get; set; } = 64 * 1024 * 1024; // 64 MiB
|
||||
|
||||
/// <summary>
|
||||
/// Connectivity/backoff settings applied when CRI endpoints fail temporarily.
|
||||
/// </summary>
|
||||
@@ -58,6 +77,101 @@ public sealed class ZastavaObserverOptions
|
||||
Enabled = true
|
||||
}
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Scanner backend configuration for posture checks and event ingestion.
|
||||
/// </summary>
|
||||
[Required]
|
||||
public ZastavaObserverBackendOptions Backend { get; set; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Posture-specific configuration values.
|
||||
/// </summary>
|
||||
[Required]
|
||||
public ZastavaObserverPostureOptions Posture { get; set; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Root path for accessing host process information (defaults to /host/proc).
|
||||
/// </summary>
|
||||
[Required(AllowEmptyStrings = false)]
|
||||
public string ProcRootPath { get; set; } = "/host/proc";
|
||||
|
||||
/// <summary>
|
||||
/// Maximum number of loaded libraries captured per process.
|
||||
/// </summary>
|
||||
[Range(8, 4096)]
|
||||
public int MaxTrackedLibraries { get; set; } = 256;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum size (in bytes) of a library file to hash when collecting loaded libraries.
|
||||
/// </summary>
|
||||
[Range(typeof(long), "1024", "1073741824")]
|
||||
public long MaxLibraryBytes { get; set; } = 33554432; // 32 MiB
|
||||
|
||||
/// <summary>
|
||||
/// Maximum cumulative bytes hashed across libraries for a single process capture.
|
||||
/// </summary>
|
||||
[Range(typeof(long), "1024", "2147483647")]
|
||||
public long MaxLibraryHashBytes { get; set; } = 64_000_000; // ~61 MiB budget
|
||||
|
||||
/// <summary>
|
||||
/// Maximum number of entrypoint arguments captured for reporting.
|
||||
/// </summary>
|
||||
[Range(1, 128)]
|
||||
public int MaxEntrypointArguments { get; set; } = 32;
|
||||
}
|
||||
|
||||
public sealed class ZastavaObserverBackendOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Base address for Scanner WebService runtime APIs.
|
||||
/// </summary>
|
||||
[Required]
|
||||
public Uri BaseAddress { get; init; } = new("https://scanner.internal");
|
||||
|
||||
/// <summary>
|
||||
/// Runtime policy endpoint path.
|
||||
/// </summary>
|
||||
[Required(AllowEmptyStrings = false)]
|
||||
public string PolicyPath { get; init; } = "/api/v1/scanner/policy/runtime";
|
||||
|
||||
/// <summary>
|
||||
/// Runtime events ingestion endpoint path.
|
||||
/// </summary>
|
||||
[Required(AllowEmptyStrings = false)]
|
||||
public string EventsPath { get; init; } = "/api/v1/runtime/events";
|
||||
|
||||
/// <summary>
|
||||
/// Request timeout for backend calls in seconds.
|
||||
/// </summary>
|
||||
[Range(typeof(double), "1", "120")]
|
||||
public double RequestTimeoutSeconds { get; init; } = 5;
|
||||
|
||||
/// <summary>
|
||||
/// Allows plain HTTP endpoints when true (default false for safety).
|
||||
/// </summary>
|
||||
public bool AllowInsecureHttp { get; init; }
|
||||
}
|
||||
|
||||
public sealed class ZastavaObserverPostureOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Path where posture cache entries are persisted across restarts.
|
||||
/// </summary>
|
||||
[Required(AllowEmptyStrings = false)]
|
||||
public string CachePath { get; init; } = Path.Combine(Path.GetTempPath(), "zastava-observer", "posture-cache.json");
|
||||
|
||||
/// <summary>
|
||||
/// Fallback TTL (seconds) applied when backend omits an explicit expiry.
|
||||
/// </summary>
|
||||
[Range(30, 86400)]
|
||||
public int FallbackTtlSeconds { get; init; } = 300;
|
||||
|
||||
/// <summary>
|
||||
/// Threshold (seconds) after expiration where stale cache usage triggers warnings.
|
||||
/// </summary>
|
||||
[Range(30, 86400)]
|
||||
public int StaleWarningThresholdSeconds { get; init; } = 900;
|
||||
}
|
||||
|
||||
public sealed class ObserverBackoffOptions
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
namespace StellaOps.Zastava.Observer.ContainerRuntime;
|
||||
|
||||
internal sealed class ContainerStateTrackerFactory
|
||||
{
|
||||
public ContainerStateTracker Create()
|
||||
=> new();
|
||||
}
|
||||
@@ -24,7 +24,8 @@ internal static class CriConversions
|
||||
FinishedAt: null,
|
||||
ExitCode: null,
|
||||
Reason: null,
|
||||
Message: null);
|
||||
Message: null,
|
||||
Pid: null);
|
||||
}
|
||||
|
||||
public static CriContainerInfo MergeStatus(CriContainerInfo baseline, ContainerStatus? status)
|
||||
@@ -47,8 +48,9 @@ internal static class CriConversions
|
||||
ExitCode = status.ExitCode != 0 ? status.ExitCode : baseline.ExitCode,
|
||||
Reason = string.IsNullOrWhiteSpace(status.Reason) ? baseline.Reason : status.Reason,
|
||||
Message = string.IsNullOrWhiteSpace(status.Message) ? baseline.Message : status.Message,
|
||||
Image: status.Image?.Image ?? baseline.Image,
|
||||
ImageRef: string.IsNullOrWhiteSpace(status.ImageRef) ? baseline.ImageRef : status.ImageRef,
|
||||
Pid = baseline.Pid,
|
||||
Image = status.Image?.Image ?? baseline.Image,
|
||||
ImageRef = string.IsNullOrWhiteSpace(status.ImageRef) ? baseline.ImageRef : status.ImageRef,
|
||||
Labels = labels,
|
||||
Annotations = annotations
|
||||
};
|
||||
|
||||
@@ -21,7 +21,8 @@ internal sealed record CriContainerInfo(
|
||||
DateTimeOffset? FinishedAt,
|
||||
int? ExitCode,
|
||||
string? Reason,
|
||||
string? Message);
|
||||
string? Message,
|
||||
int? Pid);
|
||||
|
||||
internal static class CriLabelKeys
|
||||
{
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
using System.IO;
|
||||
using System.Net.Sockets;
|
||||
using System.Linq;
|
||||
using System.Net.Sockets;
|
||||
using System.Text.Json;
|
||||
using Grpc.Core;
|
||||
using Grpc.Net.Client;
|
||||
using Microsoft.Extensions.Logging;
|
||||
@@ -92,7 +93,7 @@ internal sealed class CriRuntimeClient : ICriRuntimeClient
|
||||
var response = await client.ContainerStatusAsync(new ContainerStatusRequest
|
||||
{
|
||||
ContainerId = containerId,
|
||||
Verbose = false
|
||||
Verbose = true
|
||||
}, cancellationToken: cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (response.Status is null)
|
||||
@@ -112,7 +113,14 @@ internal sealed class CriRuntimeClient : ICriRuntimeClient
|
||||
CreatedAt = response.Status.CreatedAt
|
||||
});
|
||||
|
||||
return CriConversions.MergeStatus(baseline, response.Status);
|
||||
var merged = CriConversions.MergeStatus(baseline, response.Status);
|
||||
|
||||
if (response.Info is { Count: > 0 } && TryExtractPid(response.Info, out var pid))
|
||||
{
|
||||
merged = merged with { Pid = pid };
|
||||
}
|
||||
|
||||
return merged;
|
||||
}
|
||||
catch (RpcException ex) when (ex.StatusCode is StatusCode.NotFound or StatusCode.DeadlineExceeded)
|
||||
{
|
||||
@@ -121,16 +129,49 @@ internal sealed class CriRuntimeClient : ICriRuntimeClient
|
||||
}
|
||||
}
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
private static bool TryExtractPid(IDictionary<string, string> info, out int pid)
|
||||
{
|
||||
if (info.TryGetValue("pid", out var value) && int.TryParse(value, out pid))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
foreach (var entry in info.Values)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(entry))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
using var document = JsonDocument.Parse(entry);
|
||||
if (document.RootElement.TryGetProperty("pid", out var pidElement) && pidElement.TryGetInt32(out pid))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
pid = default;
|
||||
return false;
|
||||
}
|
||||
|
||||
public ValueTask DisposeAsync()
|
||||
{
|
||||
try
|
||||
{
|
||||
await channel.DisposeAsync().ConfigureAwait(false);
|
||||
channel.Dispose();
|
||||
}
|
||||
catch (InvalidOperationException)
|
||||
{
|
||||
// Channel already disposed.
|
||||
}
|
||||
|
||||
return ValueTask.CompletedTask;
|
||||
}
|
||||
|
||||
private static void EnsureHttp2Switch()
|
||||
@@ -161,7 +202,7 @@ internal sealed class CriRuntimeClient : ICriRuntimeClient
|
||||
EnableMultipleHttp2Connections = true
|
||||
};
|
||||
|
||||
if (endpoint.ConnectTimeout is { } timeout and > TimeSpan.Zero)
|
||||
if (endpoint.ConnectTimeout is { } timeout && timeout > TimeSpan.Zero)
|
||||
{
|
||||
handler.ConnectTimeout = timeout;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
using System;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Zastava.Core.Configuration;
|
||||
using StellaOps.Zastava.Observer.Configuration;
|
||||
using StellaOps.Zastava.Observer.ContainerRuntime.Cri;
|
||||
using StellaOps.Zastava.Observer.ContainerRuntime;
|
||||
using StellaOps.Zastava.Observer.Posture;
|
||||
using StellaOps.Zastava.Observer.Runtime;
|
||||
using StellaOps.Zastava.Observer.Worker;
|
||||
using StellaOps.Zastava.Observer.Backend;
|
||||
|
||||
namespace Microsoft.Extensions.DependencyInjection;
|
||||
|
||||
public static class ObserverServiceCollectionExtensions
|
||||
{
|
||||
public static IServiceCollection AddZastavaObserver(this IServiceCollection services, IConfiguration configuration)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
ArgumentNullException.ThrowIfNull(configuration);
|
||||
|
||||
services.AddZastavaRuntimeCore(configuration, componentName: "observer");
|
||||
|
||||
services.AddOptions<ZastavaObserverOptions>()
|
||||
.Bind(configuration.GetSection(ZastavaObserverOptions.SectionName))
|
||||
.ValidateDataAnnotations()
|
||||
.PostConfigure(options =>
|
||||
{
|
||||
if (options.Backoff.Initial <= TimeSpan.Zero)
|
||||
{
|
||||
options.Backoff.Initial = TimeSpan.FromSeconds(1);
|
||||
}
|
||||
|
||||
if (options.Backoff.Max < options.Backoff.Initial)
|
||||
{
|
||||
options.Backoff.Max = options.Backoff.Initial;
|
||||
}
|
||||
|
||||
if (!options.Backend.AllowInsecureHttp && !string.Equals(options.Backend.BaseAddress.Scheme, Uri.UriSchemeHttps, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
throw new InvalidOperationException("Observer backend baseAddress must use HTTPS unless allowInsecureHttp is explicitly enabled.");
|
||||
}
|
||||
|
||||
if (!options.Backend.PolicyPath.StartsWith("/", StringComparison.Ordinal))
|
||||
{
|
||||
throw new InvalidOperationException("Observer backend policyPath must be absolute (start with '/').");
|
||||
}
|
||||
|
||||
if (!options.Backend.EventsPath.StartsWith("/", StringComparison.Ordinal))
|
||||
{
|
||||
throw new InvalidOperationException("Observer backend eventsPath must be absolute (start with '/').");
|
||||
}
|
||||
})
|
||||
.ValidateOnStart();
|
||||
|
||||
services.TryAddSingleton(TimeProvider.System);
|
||||
services.TryAddSingleton<ICriRuntimeClientFactory, CriRuntimeClientFactory>();
|
||||
services.TryAddSingleton<IRuntimeEventBuffer, RuntimeEventBuffer>();
|
||||
services.TryAddSingleton<IRuntimeProcessCollector, RuntimeProcessCollector>();
|
||||
services.TryAddSingleton<IRuntimePostureCache, RuntimePostureCache>();
|
||||
services.TryAddSingleton<IRuntimePostureEvaluator, RuntimePostureEvaluator>();
|
||||
services.TryAddSingleton<ContainerStateTrackerFactory>();
|
||||
services.TryAddSingleton<ContainerRuntimePoller>();
|
||||
|
||||
services.AddHttpClient<IRuntimePolicyClient, RuntimePolicyClient>()
|
||||
.ConfigureHttpClient((provider, client) =>
|
||||
{
|
||||
var optionsMonitor = provider.GetRequiredService<IOptionsMonitor<ZastavaObserverOptions>>();
|
||||
var backend = optionsMonitor.CurrentValue.Backend;
|
||||
client.BaseAddress = backend.BaseAddress;
|
||||
client.Timeout = TimeSpan.FromSeconds(Math.Clamp(backend.RequestTimeoutSeconds, 1, 120));
|
||||
});
|
||||
|
||||
services.AddHttpClient<IRuntimeEventsClient, RuntimeEventsClient>()
|
||||
.ConfigureHttpClient((provider, client) =>
|
||||
{
|
||||
var optionsMonitor = provider.GetRequiredService<IOptionsMonitor<ZastavaObserverOptions>>();
|
||||
var backend = optionsMonitor.CurrentValue.Backend;
|
||||
client.BaseAddress = backend.BaseAddress;
|
||||
client.Timeout = TimeSpan.FromSeconds(Math.Clamp(backend.RequestTimeoutSeconds, 1, 120));
|
||||
});
|
||||
|
||||
services.TryAddEnumerable(ServiceDescriptor.Singleton<IPostConfigureOptions<ZastavaRuntimeOptions>, ObserverRuntimeOptionsPostConfigure>());
|
||||
|
||||
services.AddHostedService<ObserverBootstrapService>();
|
||||
services.AddHostedService<ContainerLifecycleHostedService>();
|
||||
services.AddHostedService<RuntimeEventDispatchService>();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
|
||||
internal sealed class ObserverRuntimeOptionsPostConfigure : IPostConfigureOptions<ZastavaRuntimeOptions>
|
||||
{
|
||||
public void PostConfigure(string? name, ZastavaRuntimeOptions options)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(options.Component))
|
||||
{
|
||||
options.Component = "observer";
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
using System;
|
||||
|
||||
using StellaOps.Zastava.Core.Contracts;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Posture;
|
||||
|
||||
internal interface IRuntimePostureCache
|
||||
{
|
||||
RuntimePostureCacheEntry? Get(string key);
|
||||
|
||||
void Set(string key, RuntimePosture posture, DateTimeOffset expiresAtUtc, DateTimeOffset storedAtUtc);
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using StellaOps.Zastava.Observer.ContainerRuntime.Cri;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Posture;
|
||||
|
||||
internal interface IRuntimePostureEvaluator
|
||||
{
|
||||
Task<RuntimePostureEvaluationResult> EvaluateAsync(CriContainerInfo container, CancellationToken cancellationToken);
|
||||
}
|
||||
180
src/StellaOps.Zastava.Observer/Posture/RuntimePostureCache.cs
Normal file
180
src/StellaOps.Zastava.Observer/Posture/RuntimePostureCache.cs
Normal file
@@ -0,0 +1,180 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Zastava.Core.Contracts;
|
||||
using StellaOps.Zastava.Observer.Configuration;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Posture;
|
||||
|
||||
internal sealed class RuntimePostureCache : IRuntimePostureCache
|
||||
{
|
||||
private const int CurrentVersion = 1;
|
||||
|
||||
private static readonly JsonSerializerOptions SerializerOptions = new()
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
|
||||
};
|
||||
|
||||
private readonly IOptionsMonitor<ZastavaObserverOptions> optionsMonitor;
|
||||
private readonly ILogger<RuntimePostureCache> logger;
|
||||
private readonly object entriesLock = new();
|
||||
private readonly object fileLock = new();
|
||||
private readonly Dictionary<string, RuntimePostureCacheEntry> entries = new(StringComparer.Ordinal);
|
||||
|
||||
public RuntimePostureCache(
|
||||
IOptionsMonitor<ZastavaObserverOptions> optionsMonitor,
|
||||
ILogger<RuntimePostureCache> logger)
|
||||
{
|
||||
this.optionsMonitor = optionsMonitor ?? throw new ArgumentNullException(nameof(optionsMonitor));
|
||||
this.logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
|
||||
Load();
|
||||
}
|
||||
|
||||
public RuntimePostureCacheEntry? Get(string key)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(key))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
lock (entriesLock)
|
||||
{
|
||||
return entries.TryGetValue(key, out var entry) ? entry : null;
|
||||
}
|
||||
}
|
||||
|
||||
public void Set(string key, RuntimePosture posture, DateTimeOffset expiresAtUtc, DateTimeOffset storedAtUtc)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(key))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
ArgumentNullException.ThrowIfNull(posture);
|
||||
var normalizedKey = key.Trim();
|
||||
var entry = new RuntimePostureCacheEntry(posture, expiresAtUtc, storedAtUtc);
|
||||
|
||||
lock (entriesLock)
|
||||
{
|
||||
entries[normalizedKey] = entry;
|
||||
}
|
||||
|
||||
Persist();
|
||||
}
|
||||
|
||||
private void Load()
|
||||
{
|
||||
var path = GetCachePath();
|
||||
if (!File.Exists(path))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var json = File.ReadAllText(path);
|
||||
var snapshot = JsonSerializer.Deserialize<CacheFileModel>(json, SerializerOptions);
|
||||
if (snapshot?.Entries is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
lock (entriesLock)
|
||||
{
|
||||
entries.Clear();
|
||||
foreach (var entry in snapshot.Entries)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(entry.Key) || entry.Posture is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
entries[entry.Key] = new RuntimePostureCacheEntry(
|
||||
entry.Posture,
|
||||
entry.ExpiresAtUtc,
|
||||
entry.StoredAtUtc);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogWarning(ex, "Failed to load runtime posture cache from {CachePath}; starting empty.", path);
|
||||
}
|
||||
}
|
||||
|
||||
private void Persist()
|
||||
{
|
||||
var path = GetCachePath();
|
||||
var directory = Path.GetDirectoryName(path);
|
||||
if (!string.IsNullOrEmpty(directory))
|
||||
{
|
||||
Directory.CreateDirectory(directory);
|
||||
}
|
||||
|
||||
CacheFileModel snapshot;
|
||||
lock (entriesLock)
|
||||
{
|
||||
var ordered = entries
|
||||
.OrderBy(pair => pair.Key, StringComparer.Ordinal)
|
||||
.Select(pair => new CacheFileEntry
|
||||
{
|
||||
Key = pair.Key,
|
||||
ExpiresAtUtc = pair.Value.ExpiresAtUtc,
|
||||
StoredAtUtc = pair.Value.StoredAtUtc,
|
||||
Posture = pair.Value.Posture
|
||||
})
|
||||
.ToList();
|
||||
|
||||
snapshot = new CacheFileModel
|
||||
{
|
||||
Version = CurrentVersion,
|
||||
Entries = ordered
|
||||
};
|
||||
}
|
||||
|
||||
var json = JsonSerializer.Serialize(snapshot, SerializerOptions);
|
||||
|
||||
lock (fileLock)
|
||||
{
|
||||
var tempPath = path + ".tmp";
|
||||
File.WriteAllText(tempPath, json);
|
||||
File.Move(tempPath, path, overwrite: true);
|
||||
}
|
||||
}
|
||||
|
||||
private string GetCachePath()
|
||||
{
|
||||
return optionsMonitor.CurrentValue.Posture.CachePath;
|
||||
}
|
||||
|
||||
private sealed record CacheFileModel
|
||||
{
|
||||
[JsonPropertyName("version")]
|
||||
public int Version { get; init; }
|
||||
|
||||
[JsonPropertyName("entries")]
|
||||
public List<CacheFileEntry> Entries { get; init; } = new();
|
||||
}
|
||||
|
||||
private sealed record CacheFileEntry
|
||||
{
|
||||
[JsonPropertyName("key")]
|
||||
public string Key { get; init; } = string.Empty;
|
||||
|
||||
[JsonPropertyName("expiresAtUtc")]
|
||||
public DateTimeOffset ExpiresAtUtc { get; init; }
|
||||
|
||||
[JsonPropertyName("storedAtUtc")]
|
||||
public DateTimeOffset StoredAtUtc { get; init; }
|
||||
|
||||
[JsonPropertyName("posture")]
|
||||
public RuntimePosture? Posture { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
using System;
|
||||
using StellaOps.Zastava.Core.Contracts;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Posture;
|
||||
|
||||
internal sealed record RuntimePostureCacheEntry(RuntimePosture Posture, DateTimeOffset ExpiresAtUtc, DateTimeOffset StoredAtUtc)
|
||||
{
|
||||
public bool IsExpired(DateTimeOffset now) => now >= ExpiresAtUtc;
|
||||
|
||||
public bool IsStale(DateTimeOffset now, TimeSpan staleThreshold)
|
||||
=> now - ExpiresAtUtc >= staleThreshold;
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
using System.Collections.Generic;
|
||||
using StellaOps.Zastava.Core.Contracts;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Posture;
|
||||
|
||||
internal sealed record RuntimePostureEvaluationResult(RuntimePosture? Posture, IReadOnlyList<RuntimeEvidence> Evidence);
|
||||
@@ -0,0 +1,188 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Zastava.Core.Contracts;
|
||||
using StellaOps.Zastava.Observer.Backend;
|
||||
using StellaOps.Zastava.Observer.Configuration;
|
||||
using StellaOps.Zastava.Observer.ContainerRuntime.Cri;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Posture;
|
||||
|
||||
internal sealed class RuntimePostureEvaluator : IRuntimePostureEvaluator
|
||||
{
|
||||
private readonly IRuntimePolicyClient policyClient;
|
||||
private readonly IRuntimePostureCache cache;
|
||||
private readonly IOptionsMonitor<ZastavaObserverOptions> optionsMonitor;
|
||||
private readonly TimeProvider timeProvider;
|
||||
private readonly ILogger<RuntimePostureEvaluator> logger;
|
||||
|
||||
public RuntimePostureEvaluator(
|
||||
IRuntimePolicyClient policyClient,
|
||||
IRuntimePostureCache cache,
|
||||
IOptionsMonitor<ZastavaObserverOptions> optionsMonitor,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<RuntimePostureEvaluator> logger)
|
||||
{
|
||||
this.policyClient = policyClient ?? throw new ArgumentNullException(nameof(policyClient));
|
||||
this.cache = cache ?? throw new ArgumentNullException(nameof(cache));
|
||||
this.optionsMonitor = optionsMonitor ?? throw new ArgumentNullException(nameof(optionsMonitor));
|
||||
this.timeProvider = timeProvider ?? TimeProvider.System;
|
||||
this.logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<RuntimePostureEvaluationResult> EvaluateAsync(CriContainerInfo container, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(container);
|
||||
|
||||
var evidence = new List<RuntimeEvidence>();
|
||||
var now = timeProvider.GetUtcNow();
|
||||
var cacheOptions = optionsMonitor.CurrentValue.Posture;
|
||||
var fallbackTtl = TimeSpan.FromSeconds(Math.Clamp(cacheOptions.FallbackTtlSeconds, 30, 86400));
|
||||
var staleThreshold = TimeSpan.FromSeconds(Math.Clamp(cacheOptions.StaleWarningThresholdSeconds, 30, 86400));
|
||||
|
||||
var imageKey = ResolveImageKey(container);
|
||||
if (string.IsNullOrWhiteSpace(imageKey))
|
||||
{
|
||||
evidence.Add(new RuntimeEvidence
|
||||
{
|
||||
Signal = "runtime.posture.skipped",
|
||||
Value = "no-image-ref"
|
||||
});
|
||||
return new RuntimePostureEvaluationResult(null, evidence);
|
||||
}
|
||||
|
||||
var cached = cache.Get(imageKey);
|
||||
if (cached is not null && !cached.IsExpired(now))
|
||||
{
|
||||
evidence.Add(new RuntimeEvidence
|
||||
{
|
||||
Signal = "runtime.posture.cache",
|
||||
Value = "hit"
|
||||
});
|
||||
return new RuntimePostureEvaluationResult(cached.Posture, evidence);
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var request = BuildRequest(container, imageKey);
|
||||
var response = await policyClient.EvaluateAsync(request, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (!response.Results.TryGetValue(imageKey, out var imageResult))
|
||||
{
|
||||
evidence.Add(new RuntimeEvidence
|
||||
{
|
||||
Signal = "runtime.posture.missing",
|
||||
Value = "policy-empty"
|
||||
});
|
||||
return new RuntimePostureEvaluationResult(null, evidence);
|
||||
}
|
||||
|
||||
var posture = MapPosture(imageResult);
|
||||
var expiresAt = response.ExpiresAtUtc != default
|
||||
? response.ExpiresAtUtc
|
||||
: now.AddSeconds(response.TtlSeconds > 0 ? response.TtlSeconds : fallbackTtl.TotalSeconds);
|
||||
|
||||
cache.Set(imageKey, posture, expiresAt, now);
|
||||
|
||||
evidence.Add(new RuntimeEvidence
|
||||
{
|
||||
Signal = "runtime.posture.source",
|
||||
Value = "backend"
|
||||
});
|
||||
evidence.Add(new RuntimeEvidence
|
||||
{
|
||||
Signal = "runtime.posture.ttl",
|
||||
Value = expiresAt.ToString("O", CultureInfo.InvariantCulture)
|
||||
});
|
||||
|
||||
return new RuntimePostureEvaluationResult(posture, evidence);
|
||||
}
|
||||
catch (Exception ex) when (!cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
logger.LogWarning(ex, "Runtime posture evaluation failed for image {ImageRef}.", imageKey);
|
||||
|
||||
if (cached is not null)
|
||||
{
|
||||
var cacheSignal = cached.IsExpired(now)
|
||||
? cached.IsStale(now, staleThreshold) ? "stale-warning" : "stale"
|
||||
: "hit";
|
||||
|
||||
evidence.Add(new RuntimeEvidence
|
||||
{
|
||||
Signal = "runtime.posture.cache",
|
||||
Value = cacheSignal
|
||||
});
|
||||
|
||||
evidence.Add(new RuntimeEvidence
|
||||
{
|
||||
Signal = "runtime.posture.error",
|
||||
Value = ex.GetType().Name
|
||||
});
|
||||
|
||||
return new RuntimePostureEvaluationResult(cached.Posture, evidence);
|
||||
}
|
||||
|
||||
evidence.Add(new RuntimeEvidence
|
||||
{
|
||||
Signal = "runtime.posture.error",
|
||||
Value = ex.GetType().Name
|
||||
});
|
||||
|
||||
return new RuntimePostureEvaluationResult(null, evidence);
|
||||
}
|
||||
}
|
||||
|
||||
private static string? ResolveImageKey(CriContainerInfo container)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(container.ImageRef))
|
||||
{
|
||||
return container.ImageRef;
|
||||
}
|
||||
|
||||
return string.IsNullOrWhiteSpace(container.Image) ? null : container.Image;
|
||||
}
|
||||
|
||||
private static RuntimePolicyRequest BuildRequest(CriContainerInfo container, string imageKey)
|
||||
{
|
||||
var labels = container.Labels.Count == 0
|
||||
? null
|
||||
: new Dictionary<string, string>(container.Labels, StringComparer.Ordinal);
|
||||
|
||||
labels?.Remove(CriLabelKeys.PodUid);
|
||||
|
||||
return new RuntimePolicyRequest
|
||||
{
|
||||
Namespace = container.Labels.TryGetValue(CriLabelKeys.PodNamespace, out var ns) ? ns : null,
|
||||
Labels = labels,
|
||||
Images = new[] { imageKey }
|
||||
};
|
||||
}
|
||||
|
||||
private static RuntimePosture MapPosture(RuntimePolicyImageResult result)
|
||||
{
|
||||
var posture = new RuntimePosture
|
||||
{
|
||||
ImageSigned = result.Signed,
|
||||
SbomReferrer = result.HasSbomReferrers ? "present" : "missing"
|
||||
};
|
||||
|
||||
if (result.Rekor is not null)
|
||||
{
|
||||
posture = posture with
|
||||
{
|
||||
Attestation = new RuntimeAttestation
|
||||
{
|
||||
Uuid = result.Rekor.Uuid,
|
||||
Verified = result.Rekor.Verified
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
return posture;
|
||||
}
|
||||
}
|
||||
@@ -4,7 +4,6 @@ using StellaOps.Zastava.Observer.Worker;
|
||||
|
||||
var builder = Host.CreateApplicationBuilder(args);
|
||||
|
||||
builder.Services.AddZastavaRuntimeCore(builder.Configuration, componentName: "observer");
|
||||
builder.Services.AddHostedService<ObserverBootstrapService>();
|
||||
builder.Services.AddZastavaObserver(builder.Configuration);
|
||||
|
||||
await builder.Build().RunAsync();
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
[assembly: InternalsVisibleTo("StellaOps.Zastava.Observer.Tests")]
|
||||
287
src/StellaOps.Zastava.Observer/Runtime/ElfBuildIdReader.cs
Normal file
287
src/StellaOps.Zastava.Observer/Runtime/ElfBuildIdReader.cs
Normal file
@@ -0,0 +1,287 @@
|
||||
using System.Buffers.Binary;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Runtime;
|
||||
|
||||
internal static class ElfBuildIdReader
|
||||
{
|
||||
private const int ElfIdentificationSize = 16;
|
||||
private const byte ElfClass32 = 1;
|
||||
private const byte ElfClass64 = 2;
|
||||
private const byte ElfDataLittleEndian = 1;
|
||||
private const byte ElfDataBigEndian = 2;
|
||||
private const uint ProgramHeaderTypeNote = 4;
|
||||
private const uint NoteTypeGnuBuildId = 3;
|
||||
private const int Alignment = 4;
|
||||
private const int MaxNoteSegmentBytes = 1 << 20; // 1 MiB
|
||||
|
||||
public static async Task<string?> TryReadBuildIdAsync(string path, CancellationToken cancellationToken)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(path))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite | FileShare.Delete);
|
||||
var header = await ReadHeaderAsync(stream, cancellationToken).ConfigureAwait(false);
|
||||
if (header is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return await ReadBuildIdFromNotesAsync(stream, header.Value, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex) when (ex is IOException or UnauthorizedAccessException or NotSupportedException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<string?> ReadBuildIdFromNotesAsync(Stream stream, ElfHeader header, CancellationToken cancellationToken)
|
||||
{
|
||||
if (header.ProgramHeaderEntrySize is 0 || header.ProgramHeaderCount is 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var entryBuffer = new byte[header.ProgramHeaderEntrySize];
|
||||
for (var index = 0; index < header.ProgramHeaderCount; index++)
|
||||
{
|
||||
var entryOffset = header.ProgramHeaderOffset + (ulong)header.ProgramHeaderEntrySize * (ulong)index;
|
||||
if (entryOffset > (ulong)stream.Length)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
stream.Seek((long)entryOffset, SeekOrigin.Begin);
|
||||
if (!await ReadExactlyAsync(stream, entryBuffer.AsMemory(0, header.ProgramHeaderEntrySize), cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
var entry = entryBuffer.AsSpan(0, header.ProgramHeaderEntrySize);
|
||||
var type = ReadUInt32(entry, 0, header.IsLittleEndian);
|
||||
if (type != ProgramHeaderTypeNote)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
ulong segmentOffset;
|
||||
ulong segmentSize;
|
||||
if (header.Class == ElfClass64)
|
||||
{
|
||||
segmentOffset = ReadUInt64(entry, 8, header.IsLittleEndian);
|
||||
segmentSize = ReadUInt64(entry, 32, header.IsLittleEndian);
|
||||
}
|
||||
else
|
||||
{
|
||||
segmentOffset = ReadUInt32(entry, 4, header.IsLittleEndian);
|
||||
segmentSize = ReadUInt32(entry, 16, header.IsLittleEndian);
|
||||
}
|
||||
|
||||
if (segmentSize == 0 || segmentOffset > (ulong)stream.Length)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var boundedSize = (int)Math.Min(segmentSize, (ulong)MaxNoteSegmentBytes);
|
||||
if (boundedSize <= 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
stream.Seek((long)segmentOffset, SeekOrigin.Begin);
|
||||
var segmentBuffer = new byte[boundedSize];
|
||||
if (!await ReadExactlyAsync(stream, segmentBuffer.AsMemory(0, boundedSize), cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var buildId = ParseNoteSegment(segmentBuffer.AsSpan(0, boundedSize), header.IsLittleEndian);
|
||||
if (buildId is not null)
|
||||
{
|
||||
return buildId;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string? ParseNoteSegment(ReadOnlySpan<byte> segment, bool isLittleEndian)
|
||||
{
|
||||
var offset = 0;
|
||||
while (offset + 12 <= segment.Length)
|
||||
{
|
||||
var nameSize = ReadUInt32(segment, offset, isLittleEndian);
|
||||
var descSize = ReadUInt32(segment, offset + 4, isLittleEndian);
|
||||
var type = ReadUInt32(segment, offset + 8, isLittleEndian);
|
||||
offset += 12;
|
||||
|
||||
if (nameSize > int.MaxValue || descSize > int.MaxValue)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var alignedNameSize = Align((int)nameSize);
|
||||
var alignedDescSize = Align((int)descSize);
|
||||
|
||||
if (offset + alignedNameSize + alignedDescSize > segment.Length)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var nameBytes = segment.Slice(offset, (int)nameSize);
|
||||
offset += alignedNameSize;
|
||||
|
||||
var descriptorBytes = segment.Slice(offset, (int)descSize);
|
||||
offset += alignedDescSize;
|
||||
|
||||
if (type == NoteTypeGnuBuildId && IsGnuName(nameBytes))
|
||||
{
|
||||
return Convert.ToHexString(descriptorBytes).ToLowerInvariant();
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static bool IsGnuName(ReadOnlySpan<byte> name)
|
||||
{
|
||||
var length = name.IndexOf((byte)0);
|
||||
if (length < 0)
|
||||
{
|
||||
length = name.Length;
|
||||
}
|
||||
|
||||
if (length != 3)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return name[0] == (byte)'G'
|
||||
&& name[1] == (byte)'N'
|
||||
&& name[2] == (byte)'U';
|
||||
}
|
||||
|
||||
private static async Task<ElfHeader?> ReadHeaderAsync(Stream stream, CancellationToken cancellationToken)
|
||||
{
|
||||
stream.Seek(0, SeekOrigin.Begin);
|
||||
var identBuffer = new byte[ElfIdentificationSize];
|
||||
if (!await ReadExactlyAsync(stream, identBuffer.AsMemory(0, ElfIdentificationSize), cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var ident = identBuffer.AsSpan();
|
||||
if (ident[0] != 0x7F || ident[1] != (byte)'E' || ident[2] != (byte)'L' || ident[3] != (byte)'F')
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var elfClass = ident[4];
|
||||
if (elfClass != ElfClass32 && elfClass != ElfClass64)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var dataEncoding = ident[5];
|
||||
var isLittleEndian = dataEncoding is ElfDataLittleEndian or 0;
|
||||
if (dataEncoding == 0)
|
||||
{
|
||||
isLittleEndian = true;
|
||||
}
|
||||
else if (dataEncoding != ElfDataLittleEndian && dataEncoding != ElfDataBigEndian)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var remainingHeaderSize = elfClass == ElfClass64 ? 64 - ElfIdentificationSize : 52 - ElfIdentificationSize;
|
||||
var buffer = new byte[remainingHeaderSize];
|
||||
if (!await ReadExactlyAsync(stream, buffer.AsMemory(0, remainingHeaderSize), cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var span = buffer.AsSpan(0, remainingHeaderSize);
|
||||
ulong programHeaderOffset;
|
||||
ushort programHeaderEntrySize;
|
||||
ushort programHeaderCount;
|
||||
|
||||
if (elfClass == ElfClass64)
|
||||
{
|
||||
programHeaderOffset = ReadUInt64(span, 16, isLittleEndian);
|
||||
programHeaderEntrySize = ReadUInt16(span, 38, isLittleEndian);
|
||||
programHeaderCount = ReadUInt16(span, 40, isLittleEndian);
|
||||
}
|
||||
else
|
||||
{
|
||||
programHeaderOffset = ReadUInt32(span, 12, isLittleEndian);
|
||||
programHeaderEntrySize = ReadUInt16(span, 26, isLittleEndian);
|
||||
programHeaderCount = ReadUInt16(span, 28, isLittleEndian);
|
||||
}
|
||||
|
||||
return new ElfHeader(elfClass, isLittleEndian, programHeaderOffset, programHeaderEntrySize, programHeaderCount);
|
||||
}
|
||||
|
||||
private static uint ReadUInt32(ReadOnlySpan<byte> buffer, int offset, bool isLittleEndian)
|
||||
{
|
||||
var slice = buffer.Slice(offset, sizeof(uint));
|
||||
return isLittleEndian
|
||||
? BinaryPrimitives.ReadUInt32LittleEndian(slice)
|
||||
: BinaryPrimitives.ReadUInt32BigEndian(slice);
|
||||
}
|
||||
|
||||
private static ulong ReadUInt64(ReadOnlySpan<byte> buffer, int offset, bool isLittleEndian)
|
||||
{
|
||||
var slice = buffer.Slice(offset, sizeof(ulong));
|
||||
return isLittleEndian
|
||||
? BinaryPrimitives.ReadUInt64LittleEndian(slice)
|
||||
: BinaryPrimitives.ReadUInt64BigEndian(slice);
|
||||
}
|
||||
|
||||
private static ushort ReadUInt16(ReadOnlySpan<byte> buffer, int offset, bool isLittleEndian)
|
||||
{
|
||||
var slice = buffer.Slice(offset, sizeof(ushort));
|
||||
return isLittleEndian
|
||||
? BinaryPrimitives.ReadUInt16LittleEndian(slice)
|
||||
: BinaryPrimitives.ReadUInt16BigEndian(slice);
|
||||
}
|
||||
|
||||
private static int Align(int value)
|
||||
=> (value + (Alignment - 1)) & ~(Alignment - 1);
|
||||
|
||||
private static async Task<bool> ReadExactlyAsync(Stream stream, Memory<byte> buffer, CancellationToken cancellationToken)
|
||||
{
|
||||
var total = 0;
|
||||
while (total < buffer.Length)
|
||||
{
|
||||
var read = await stream.ReadAsync(buffer.Slice(total), cancellationToken).ConfigureAwait(false);
|
||||
if (read == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
total += read;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private readonly record struct ElfHeader(byte Class, bool IsLittleEndian, ulong ProgramHeaderOffset, ushort ProgramHeaderEntrySize, ushort ProgramHeaderCount)
|
||||
{
|
||||
public byte Class { get; } = Class;
|
||||
public bool IsLittleEndian { get; } = IsLittleEndian;
|
||||
public ulong ProgramHeaderOffset { get; } = ProgramHeaderOffset;
|
||||
public ushort ProgramHeaderEntrySize { get; } = ProgramHeaderEntrySize;
|
||||
public ushort ProgramHeaderCount { get; } = ProgramHeaderCount;
|
||||
}
|
||||
}
|
||||
297
src/StellaOps.Zastava.Observer/Runtime/RuntimeEventBuffer.cs
Normal file
297
src/StellaOps.Zastava.Observer/Runtime/RuntimeEventBuffer.cs
Normal file
@@ -0,0 +1,297 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Linq;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Threading.Channels;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Zastava.Core.Contracts;
|
||||
using StellaOps.Zastava.Core.Serialization;
|
||||
using StellaOps.Zastava.Observer.Configuration;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Runtime;
|
||||
|
||||
internal interface IRuntimeEventBuffer
|
||||
{
|
||||
ValueTask WriteBatchAsync(IReadOnlyList<RuntimeEventEnvelope> envelopes, CancellationToken cancellationToken);
|
||||
|
||||
IAsyncEnumerable<RuntimeEventBufferItem> ReadAllAsync(CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
internal sealed record RuntimeEventBufferItem(
|
||||
RuntimeEventEnvelope Envelope,
|
||||
Func<ValueTask> CompleteAsync,
|
||||
Func<CancellationToken, ValueTask> RequeueAsync);
|
||||
|
||||
internal sealed class RuntimeEventBuffer : IRuntimeEventBuffer
|
||||
{
|
||||
private static readonly string FileExtension = ".json";
|
||||
|
||||
private readonly Channel<string> channel;
|
||||
private readonly ConcurrentDictionary<string, byte> inFlight = new(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly object capacityLock = new();
|
||||
private readonly string spoolPath;
|
||||
private readonly ILogger<RuntimeEventBuffer> logger;
|
||||
private readonly TimeProvider timeProvider;
|
||||
private readonly long maxDiskBytes;
|
||||
|
||||
private long currentBytes;
|
||||
private readonly int capacity;
|
||||
|
||||
public RuntimeEventBuffer(
|
||||
IOptions<ZastavaObserverOptions> observerOptions,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<RuntimeEventBuffer> logger)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(observerOptions);
|
||||
this.timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
|
||||
this.logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
|
||||
var options = observerOptions.Value ?? throw new ArgumentNullException(nameof(observerOptions));
|
||||
|
||||
capacity = Math.Clamp(options.MaxInMemoryBuffer, 16, 65536);
|
||||
spoolPath = EnsureSpoolDirectory(options.EventBufferPath);
|
||||
maxDiskBytes = Math.Clamp(options.MaxDiskBufferBytes, 1_048_576L, 1_073_741_824L); // 1 MiB – 1 GiB
|
||||
|
||||
var channelOptions = new BoundedChannelOptions(capacity)
|
||||
{
|
||||
AllowSynchronousContinuations = false,
|
||||
FullMode = BoundedChannelFullMode.Wait,
|
||||
SingleReader = false,
|
||||
SingleWriter = false
|
||||
};
|
||||
|
||||
channel = Channel.CreateBounded<string>(channelOptions);
|
||||
|
||||
var existingFiles = Directory.EnumerateFiles(spoolPath, $"*{FileExtension}", SearchOption.TopDirectoryOnly)
|
||||
.OrderBy(static path => path, StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
|
||||
foreach (var path in existingFiles)
|
||||
{
|
||||
var size = TryGetLength(path);
|
||||
if (size > 0)
|
||||
{
|
||||
Interlocked.Add(ref currentBytes, size);
|
||||
}
|
||||
|
||||
// enqueue existing events for replay
|
||||
if (!channel.Writer.TryWrite(path))
|
||||
{
|
||||
_ = channel.Writer.WriteAsync(path);
|
||||
}
|
||||
}
|
||||
|
||||
if (existingFiles.Length > 0)
|
||||
{
|
||||
logger.LogInformation("Runtime event buffer restored {Count} pending events ({Bytes} bytes) from disk spool.",
|
||||
existingFiles.Length,
|
||||
Interlocked.Read(ref currentBytes));
|
||||
}
|
||||
}
|
||||
|
||||
public async ValueTask WriteBatchAsync(IReadOnlyList<RuntimeEventEnvelope> envelopes, CancellationToken cancellationToken)
|
||||
{
|
||||
if (envelopes is null || envelopes.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var envelope in envelopes)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var payload = ZastavaCanonicalJsonSerializer.SerializeToUtf8Bytes(envelope);
|
||||
var filePath = await PersistAsync(payload, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
await channel.Writer.WriteAsync(filePath, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
if (envelopes.Count > capacity / 2)
|
||||
{
|
||||
logger.LogDebug("Buffered {Count} runtime events; channel capacity {Capacity}.", envelopes.Count, capacity);
|
||||
}
|
||||
}
|
||||
|
||||
public async IAsyncEnumerable<RuntimeEventBufferItem> ReadAllAsync([EnumeratorCancellation] CancellationToken cancellationToken)
|
||||
{
|
||||
while (await channel.Reader.WaitToReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
while (channel.Reader.TryRead(out var filePath))
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
if (!File.Exists(filePath))
|
||||
{
|
||||
RemoveMetricsForMissingFile(filePath);
|
||||
continue;
|
||||
}
|
||||
|
||||
RuntimeEventEnvelope? envelope = null;
|
||||
try
|
||||
{
|
||||
var json = await File.ReadAllTextAsync(filePath, cancellationToken).ConfigureAwait(false);
|
||||
envelope = ZastavaCanonicalJsonSerializer.Deserialize<RuntimeEventEnvelope>(json);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogWarning(ex, "Failed to read runtime event payload from {Path}; dropping.", filePath);
|
||||
await DeleteFileSilentlyAsync(filePath).ConfigureAwait(false);
|
||||
continue;
|
||||
}
|
||||
|
||||
var currentPath = filePath;
|
||||
inFlight[currentPath] = 0;
|
||||
|
||||
yield return new RuntimeEventBufferItem(
|
||||
envelope,
|
||||
CompleteAsync(currentPath),
|
||||
RequeueAsync(currentPath));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Func<ValueTask> CompleteAsync(string filePath)
|
||||
=> async () =>
|
||||
{
|
||||
try
|
||||
{
|
||||
await DeleteFileSilentlyAsync(filePath).ConfigureAwait(false);
|
||||
}
|
||||
finally
|
||||
{
|
||||
inFlight.TryRemove(filePath, out _);
|
||||
}
|
||||
};
|
||||
|
||||
private Func<CancellationToken, ValueTask> RequeueAsync(string filePath)
|
||||
=> async cancellationToken =>
|
||||
{
|
||||
inFlight.TryRemove(filePath, out _);
|
||||
if (!File.Exists(filePath))
|
||||
{
|
||||
RemoveMetricsForMissingFile(filePath);
|
||||
return;
|
||||
}
|
||||
|
||||
await channel.Writer.WriteAsync(filePath, cancellationToken).ConfigureAwait(false);
|
||||
};
|
||||
|
||||
private async Task<string> PersistAsync(byte[] payload, CancellationToken cancellationToken)
|
||||
{
|
||||
var timestamp = timeProvider.GetUtcNow().UtcTicks;
|
||||
var fileName = $"{timestamp:D20}-{Guid.NewGuid():N}{FileExtension}";
|
||||
var filePath = Path.Combine(spoolPath, fileName);
|
||||
|
||||
Directory.CreateDirectory(spoolPath);
|
||||
await File.WriteAllBytesAsync(filePath, payload, cancellationToken).ConfigureAwait(false);
|
||||
Interlocked.Add(ref currentBytes, payload.Length);
|
||||
|
||||
EnforceCapacity();
|
||||
return filePath;
|
||||
}
|
||||
|
||||
private void EnforceCapacity()
|
||||
{
|
||||
if (Volatile.Read(ref currentBytes) <= maxDiskBytes)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
lock (capacityLock)
|
||||
{
|
||||
if (currentBytes <= maxDiskBytes)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var candidates = Directory.EnumerateFiles(spoolPath, $"*{FileExtension}", SearchOption.TopDirectoryOnly)
|
||||
.OrderBy(static path => path, StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
|
||||
foreach (var file in candidates)
|
||||
{
|
||||
if (currentBytes <= maxDiskBytes)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (inFlight.ContainsKey(file))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var length = TryGetLength(file);
|
||||
try
|
||||
{
|
||||
File.Delete(file);
|
||||
if (length > 0)
|
||||
{
|
||||
Interlocked.Add(ref currentBytes, -length);
|
||||
}
|
||||
|
||||
logger.LogWarning("Dropped runtime event {FileName} to enforce disk buffer capacity (limit {MaxBytes} bytes).",
|
||||
Path.GetFileName(file),
|
||||
maxDiskBytes);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogWarning(ex, "Failed to purge runtime event buffer file {FileName}.", Path.GetFileName(file));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Task DeleteFileSilentlyAsync(string filePath)
|
||||
{
|
||||
if (!File.Exists(filePath))
|
||||
{
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
var length = TryGetLength(filePath);
|
||||
try
|
||||
{
|
||||
File.Delete(filePath);
|
||||
if (length > 0)
|
||||
{
|
||||
Interlocked.Add(ref currentBytes, -length);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogWarning(ex, "Failed to delete runtime event buffer file {FileName}.", Path.GetFileName(filePath));
|
||||
}
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
private void RemoveMetricsForMissingFile(string filePath)
|
||||
{
|
||||
var length = TryGetLength(filePath);
|
||||
if (length > 0)
|
||||
{
|
||||
Interlocked.Add(ref currentBytes, -length);
|
||||
}
|
||||
}
|
||||
|
||||
private static string EnsureSpoolDirectory(string? value)
|
||||
{
|
||||
var path = string.IsNullOrWhiteSpace(value)
|
||||
? Path.Combine(Path.GetTempPath(), "zastava-observer", "runtime-events")
|
||||
: value!;
|
||||
|
||||
Directory.CreateDirectory(path);
|
||||
return path;
|
||||
}
|
||||
|
||||
private static long TryGetLength(string path)
|
||||
{
|
||||
try
|
||||
{
|
||||
var info = new FileInfo(path);
|
||||
return info.Exists ? info.Length : 0;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,525 @@
|
||||
using System.Buffers;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Runtime.CompilerServices;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Zastava.Core.Contracts;
|
||||
using StellaOps.Zastava.Observer.Configuration;
|
||||
using StellaOps.Zastava.Observer.ContainerRuntime.Cri;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Runtime;
|
||||
|
||||
internal interface IRuntimeProcessCollector
|
||||
{
|
||||
Task<RuntimeProcessCapture?> CollectAsync(CriContainerInfo container, CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
internal sealed class RuntimeProcessCollector : IRuntimeProcessCollector
|
||||
{
|
||||
private static readonly Regex ShellRegex = new(@"(^|/)(ba)?sh$", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Compiled);
|
||||
private static readonly Regex PythonRegex = new(@"(^|/)(python)(\d+(\.\d+)*)?$", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Compiled);
|
||||
private static readonly Regex NodeRegex = new(@"(^|/)(node|npm|npx)$", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Compiled);
|
||||
private const string SyntheticArgvFile = "<argv>";
|
||||
private const int MaxInterpreterTargetLength = 512;
|
||||
|
||||
private readonly ZastavaObserverOptions options;
|
||||
private readonly ILogger<RuntimeProcessCollector> logger;
|
||||
|
||||
public RuntimeProcessCollector(IOptions<ZastavaObserverOptions> options, ILogger<RuntimeProcessCollector> logger)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
this.options = options.Value;
|
||||
this.logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<RuntimeProcessCapture?> CollectAsync(CriContainerInfo container, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(container);
|
||||
if (container.Pid is null or <= 0)
|
||||
{
|
||||
logger.LogDebug("Container {ContainerId} lacks PID information; skipping process capture.", container.Id);
|
||||
return null;
|
||||
}
|
||||
|
||||
var pid = container.Pid.Value;
|
||||
var procRoot = options.ProcRootPath.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar);
|
||||
var pidDirectory = Path.Combine(procRoot, pid.ToString(CultureInfo.InvariantCulture));
|
||||
|
||||
try
|
||||
{
|
||||
var process = await ReadProcessAsync(pidDirectory, pid, cancellationToken).ConfigureAwait(false);
|
||||
if (process is null)
|
||||
{
|
||||
logger.LogDebug("No cmdline information available for PID {Pid}; skipping process capture.", pid);
|
||||
return null;
|
||||
}
|
||||
|
||||
var buildId = await ElfBuildIdReader.TryReadBuildIdAsync(Path.Combine(pidDirectory, "exe"), cancellationToken).ConfigureAwait(false);
|
||||
if (!string.IsNullOrWhiteSpace(buildId))
|
||||
{
|
||||
process = process with { BuildId = buildId };
|
||||
}
|
||||
|
||||
var (libraries, evidence) = await ReadLibrariesAsync(pidDirectory, cancellationToken).ConfigureAwait(false);
|
||||
evidence.Insert(0, new RuntimeEvidence
|
||||
{
|
||||
Signal = "procfs.cmdline",
|
||||
Value = $"{pid}:{string.Join(' ', process.Entrypoint)}"
|
||||
});
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(buildId))
|
||||
{
|
||||
evidence.Add(new RuntimeEvidence
|
||||
{
|
||||
Signal = "procfs.buildId",
|
||||
Value = buildId
|
||||
});
|
||||
}
|
||||
|
||||
return new RuntimeProcessCapture(process, libraries, evidence);
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogWarning(ex, "Failed to capture process information for container {ContainerId} (PID {Pid}).", container.Id, pid);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<RuntimeProcess?> ReadProcessAsync(string pidDirectory, int pid, CancellationToken cancellationToken)
|
||||
{
|
||||
var cmdlinePath = Path.Combine(pidDirectory, "cmdline");
|
||||
if (!File.Exists(cmdlinePath))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var content = await File.ReadAllBytesAsync(cmdlinePath, cancellationToken).ConfigureAwait(false);
|
||||
if (content.Length == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var arguments = ParseCmdline(content, options.MaxEntrypointArguments);
|
||||
if (arguments.Count == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var entryTrace = BuildEntryTrace(arguments);
|
||||
|
||||
return new RuntimeProcess
|
||||
{
|
||||
Pid = pid,
|
||||
Entrypoint = arguments,
|
||||
EntryTrace = entryTrace
|
||||
};
|
||||
}
|
||||
|
||||
private async Task<(IReadOnlyList<RuntimeLoadedLibrary> Libraries, List<RuntimeEvidence> Evidence)> ReadLibrariesAsync(
|
||||
string pidDirectory,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var mapsPath = Path.Combine(pidDirectory, "maps");
|
||||
var libraries = new List<RuntimeLoadedLibrary>();
|
||||
var evidence = new List<RuntimeEvidence>();
|
||||
|
||||
if (!File.Exists(mapsPath))
|
||||
{
|
||||
return (libraries, evidence);
|
||||
}
|
||||
|
||||
var seen = new HashSet<string>(StringComparer.Ordinal);
|
||||
var limit = Math.Max(1, options.MaxTrackedLibraries);
|
||||
var perFileLimit = Math.Max(1024L, options.MaxLibraryBytes);
|
||||
var hashBudget = options.MaxLibraryHashBytes <= 0
|
||||
? long.MaxValue
|
||||
: Math.Max(perFileLimit, options.MaxLibraryHashBytes);
|
||||
long hashedBytes = 0;
|
||||
var budgetSignaled = false;
|
||||
|
||||
await foreach (var line in ReadLinesAsync(mapsPath, cancellationToken))
|
||||
{
|
||||
if (!TryParseMapsEntry(line, out var path, out var baseAddress))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!seen.Add(path))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (libraries.Count >= limit)
|
||||
{
|
||||
evidence.Add(new RuntimeEvidence
|
||||
{
|
||||
Signal = "procfs.maps.truncated",
|
||||
Value = $"limit={limit}"
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
long length;
|
||||
long? inode;
|
||||
try
|
||||
{
|
||||
var fileInfo = new FileInfo(path);
|
||||
length = fileInfo.Length;
|
||||
inode = TryGetInode(fileInfo);
|
||||
}
|
||||
catch (Exception ex) when (ex is IOException or UnauthorizedAccessException)
|
||||
{
|
||||
evidence.Add(new RuntimeEvidence
|
||||
{
|
||||
Signal = "procfs.maps.error",
|
||||
Value = $"{path}:{ex.GetType().Name}"
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
var sizeExceeded = length > perFileLimit;
|
||||
string? hash = null;
|
||||
|
||||
if (!sizeExceeded && length > 0)
|
||||
{
|
||||
var remainingBudget = hashBudget - hashedBytes;
|
||||
if (remainingBudget <= 0 || length > remainingBudget)
|
||||
{
|
||||
if (!budgetSignaled && hashBudget != long.MaxValue)
|
||||
{
|
||||
evidence.Add(new RuntimeEvidence
|
||||
{
|
||||
Signal = "procfs.maps.hashBudget",
|
||||
Value = $"limit={hashBudget}"
|
||||
});
|
||||
budgetSignaled = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
try
|
||||
{
|
||||
using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite | FileShare.Delete);
|
||||
hash = await ComputeSha256Async(stream, cancellationToken).ConfigureAwait(false);
|
||||
hashedBytes += length;
|
||||
}
|
||||
catch (Exception ex) when (ex is IOException or UnauthorizedAccessException)
|
||||
{
|
||||
evidence.Add(new RuntimeEvidence
|
||||
{
|
||||
Signal = "procfs.maps.error",
|
||||
Value = $"{path}:{ex.GetType().Name}"
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (sizeExceeded)
|
||||
{
|
||||
evidence.Add(new RuntimeEvidence
|
||||
{
|
||||
Signal = "procfs.maps.skipped",
|
||||
Value = $"{path}:size>{perFileLimit}"
|
||||
});
|
||||
}
|
||||
|
||||
var library = new RuntimeLoadedLibrary
|
||||
{
|
||||
Path = path,
|
||||
Inode = inode,
|
||||
Sha256 = hash
|
||||
};
|
||||
libraries.Add(library);
|
||||
|
||||
var value = baseAddress is null ? path : $"{path}@{baseAddress}";
|
||||
evidence.Add(new RuntimeEvidence
|
||||
{
|
||||
Signal = "procfs.maps",
|
||||
Value = value
|
||||
});
|
||||
}
|
||||
|
||||
evidence.Add(new RuntimeEvidence
|
||||
{
|
||||
Signal = "procfs.maps.count",
|
||||
Value = libraries.Count.ToString(CultureInfo.InvariantCulture)
|
||||
});
|
||||
|
||||
return (libraries, evidence);
|
||||
}
|
||||
|
||||
private static async IAsyncEnumerable<string> ReadLinesAsync(string path, [EnumeratorCancellation] CancellationToken cancellationToken)
|
||||
{
|
||||
using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite | FileShare.Delete);
|
||||
using var reader = new StreamReader(stream, Encoding.UTF8);
|
||||
while (true)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
var line = await reader.ReadLineAsync().ConfigureAwait(false);
|
||||
if (line is null)
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
yield return line;
|
||||
}
|
||||
}
|
||||
|
||||
private static bool TryParseMapsEntry(string line, out string path, out string? baseAddress)
|
||||
{
|
||||
path = string.Empty;
|
||||
baseAddress = null;
|
||||
|
||||
if (string.IsNullOrWhiteSpace(line))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var span = line.AsSpan().Trim();
|
||||
var lastSpace = span.LastIndexOf(' ');
|
||||
if (lastSpace < 0 || lastSpace >= span.Length - 1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var candidate = span[(lastSpace + 1)..].Trim();
|
||||
if (candidate.IsEmpty || candidate[0] == '[')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
path = candidate.ToString();
|
||||
|
||||
var firstSpace = span.IndexOf(' ');
|
||||
if (firstSpace > 0)
|
||||
{
|
||||
var rangeSpan = span[..firstSpace];
|
||||
var dashIndex = rangeSpan.IndexOf('-');
|
||||
if (dashIndex > 0)
|
||||
{
|
||||
var startSpan = rangeSpan[..dashIndex];
|
||||
if (!startSpan.IsEmpty)
|
||||
{
|
||||
baseAddress = startSpan.StartsWith("0x", StringComparison.OrdinalIgnoreCase)
|
||||
? startSpan.ToString()
|
||||
: $"0x{startSpan.ToString()}";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static async Task<string> ComputeSha256Async(Stream stream, CancellationToken cancellationToken)
|
||||
{
|
||||
using var sha = SHA256.Create();
|
||||
var buffer = ArrayPool<byte>.Shared.Rent(8192);
|
||||
try
|
||||
{
|
||||
int read;
|
||||
while ((read = await stream.ReadAsync(buffer.AsMemory(0, buffer.Length), cancellationToken).ConfigureAwait(false)) > 0)
|
||||
{
|
||||
sha.TransformBlock(buffer, 0, read, null, 0);
|
||||
}
|
||||
sha.TransformFinalBlock(Array.Empty<byte>(), 0, 0);
|
||||
return Convert.ToHexString(sha.Hash!).ToLowerInvariant();
|
||||
}
|
||||
finally
|
||||
{
|
||||
ArrayPool<byte>.Shared.Return(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
private static long? TryGetInode(FileInfo fileInfo) => null;
|
||||
|
||||
private static List<string> ParseCmdline(byte[] content, int maxArguments)
|
||||
{
|
||||
var segments = Encoding.UTF8.GetString(content).Split('\0', StringSplitOptions.RemoveEmptyEntries);
|
||||
var list = segments.Take(maxArguments).ToList();
|
||||
return list;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<RuntimeEntryTrace> BuildEntryTrace(IReadOnlyList<string> arguments)
|
||||
{
|
||||
var traces = new List<RuntimeEntryTrace>();
|
||||
if (arguments.Count == 0)
|
||||
{
|
||||
return traces;
|
||||
}
|
||||
|
||||
var first = arguments[0];
|
||||
traces.Add(new RuntimeEntryTrace
|
||||
{
|
||||
File = first,
|
||||
Op = "exec",
|
||||
Target = first
|
||||
});
|
||||
|
||||
if (arguments.Count >= 3 && ShellRegex.IsMatch(first) && string.Equals(arguments[1], "-c", StringComparison.Ordinal))
|
||||
{
|
||||
var script = arguments[2];
|
||||
var tokens = TokenizeCommand(script);
|
||||
if (tokens.Count > 0)
|
||||
{
|
||||
traces.Add(new RuntimeEntryTrace
|
||||
{
|
||||
File = tokens[0],
|
||||
Op = "shell",
|
||||
Target = script
|
||||
});
|
||||
|
||||
TryAddInterpreterTrace(traces, tokens);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
TryAddInterpreterTrace(traces, arguments);
|
||||
}
|
||||
|
||||
return traces;
|
||||
}
|
||||
|
||||
private static void TryAddInterpreterTrace(List<RuntimeEntryTrace> traces, IReadOnlyList<string> tokens)
|
||||
{
|
||||
if (tokens.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var interpreter = tokens[0];
|
||||
if (PythonRegex.IsMatch(interpreter))
|
||||
{
|
||||
var target = ResolveInterpreterTarget(tokens, 1);
|
||||
if (!string.IsNullOrEmpty(target))
|
||||
{
|
||||
traces.Add(new RuntimeEntryTrace
|
||||
{
|
||||
File = SyntheticArgvFile,
|
||||
Op = "python",
|
||||
Target = TrimTarget(target!)
|
||||
});
|
||||
}
|
||||
}
|
||||
else if (NodeRegex.IsMatch(interpreter))
|
||||
{
|
||||
var target = ResolveInterpreterTarget(tokens, 1);
|
||||
if (!string.IsNullOrEmpty(target))
|
||||
{
|
||||
traces.Add(new RuntimeEntryTrace
|
||||
{
|
||||
File = SyntheticArgvFile,
|
||||
Op = "node",
|
||||
Target = TrimTarget(target!)
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static string? ResolveInterpreterTarget(IReadOnlyList<string> tokens, int startIndex)
|
||||
{
|
||||
for (var i = startIndex; i < tokens.Count; i++)
|
||||
{
|
||||
var candidate = tokens[i];
|
||||
if (string.IsNullOrWhiteSpace(candidate))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (candidate.StartsWith("-", StringComparison.Ordinal))
|
||||
{
|
||||
if ((string.Equals(candidate, "-m", StringComparison.Ordinal)
|
||||
|| string.Equals(candidate, "-c", StringComparison.Ordinal)
|
||||
|| string.Equals(candidate, "-e", StringComparison.Ordinal))
|
||||
&& i + 1 < tokens.Count)
|
||||
{
|
||||
return tokens[i + 1];
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
return candidate;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string TrimTarget(string value)
|
||||
{
|
||||
if (value.Length <= MaxInterpreterTargetLength)
|
||||
{
|
||||
return value;
|
||||
}
|
||||
|
||||
return value[..MaxInterpreterTargetLength];
|
||||
}
|
||||
|
||||
private static List<string> TokenizeCommand(string command)
|
||||
{
|
||||
var tokens = new List<string>();
|
||||
if (string.IsNullOrWhiteSpace(command))
|
||||
{
|
||||
return tokens;
|
||||
}
|
||||
|
||||
var current = new StringBuilder();
|
||||
bool inQuotes = false;
|
||||
char quoteChar = '"';
|
||||
|
||||
foreach (var ch in command)
|
||||
{
|
||||
if (inQuotes)
|
||||
{
|
||||
if (ch == quoteChar)
|
||||
{
|
||||
inQuotes = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
current.Append(ch);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ch == '"' || ch == '\'')
|
||||
{
|
||||
inQuotes = true;
|
||||
quoteChar = ch;
|
||||
}
|
||||
else if (char.IsWhiteSpace(ch))
|
||||
{
|
||||
if (current.Length > 0)
|
||||
{
|
||||
tokens.Add(current.ToString());
|
||||
current.Clear();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
current.Append(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (current.Length > 0)
|
||||
{
|
||||
tokens.Add(current.ToString());
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
}
|
||||
|
||||
internal sealed record RuntimeProcessCapture(
|
||||
RuntimeProcess Process,
|
||||
IReadOnlyList<RuntimeLoadedLibrary> Libraries,
|
||||
IReadOnlyList<RuntimeEvidence> Evidence);
|
||||
@@ -2,8 +2,10 @@
|
||||
|
||||
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|
||||
|----|--------|----------|------------|-------------|---------------|
|
||||
| ZASTAVA-OBS-12-001 | DOING | Zastava Observer Guild | ZASTAVA-CORE-12-201 | Build container lifecycle watcher that tails CRI (containerd/cri-o/docker) events and emits deterministic runtime records with buffering + backoff. | Fixture cluster produces start/stop events with stable ordering, jitter/backoff tested, metrics/logging wired. |
|
||||
| ZASTAVA-OBS-12-002 | TODO | Zastava Observer Guild | ZASTAVA-OBS-12-001 | Capture entrypoint traces and loaded libraries, hashing binaries and correlating to SBOM baseline per architecture sections 2.1 and 10. | EntryTrace parser covers shell/python/node launchers, loaded library hashes recorded, fixtures assert linkage to SBOM usage view. |
|
||||
| ZASTAVA-OBS-12-003 | TODO | Zastava Observer Guild | ZASTAVA-OBS-12-002 | Implement runtime posture checks (signature/SBOM/attestation presence) with offline caching and warning surfaces. | Observer marks posture status, caches refresh across restarts, integration tests prove offline tolerance. |
|
||||
| ZASTAVA-OBS-12-004 | TODO | Zastava Observer Guild | ZASTAVA-OBS-12-002 | Batch `/runtime/events` submissions with disk-backed buffer, rate limits, and deterministic envelopes. | Buffered submissions survive restart, rate-limits enforced in tests, JSON envelopes match schema in docs/events. |
|
||||
| ZASTAVA-OBS-17-005 | TODO | Zastava Observer Guild | ZASTAVA-OBS-12-002 | Collect GNU build-id for ELF processes and attach it to emitted runtime events to enable symbol lookup + debug-store correlation. | Observer reads build-id via `/proc/<pid>/exe`/notes without pausing workloads, runtime events include `buildId` field, fixtures cover glibc/musl images, docs updated with retrieval notes. |
|
||||
| ZASTAVA-OBS-12-001 | DONE (2025-10-24) | Zastava Observer Guild | ZASTAVA-CORE-12-201 | Build container lifecycle watcher that tails CRI (containerd/cri-o/docker) events and emits deterministic runtime records with buffering + backoff. | Fixture cluster produces start/stop events with stable ordering, jitter/backoff tested, metrics/logging wired. |
|
||||
| ZASTAVA-OBS-12-002 | DONE (2025-10-24) | Zastava Observer Guild | ZASTAVA-OBS-12-001 | Capture entrypoint traces and loaded libraries, hashing binaries and correlating to SBOM baseline per architecture sections 2.1 and 10. | EntryTrace parser covers shell/python/node launchers, loaded library hashes recorded, fixtures assert linkage to SBOM usage view. |
|
||||
| ZASTAVA-OBS-12-003 | DONE (2025-10-24) | Zastava Observer Guild | ZASTAVA-OBS-12-002 | Implement runtime posture checks (signature/SBOM/attestation presence) with offline caching and warning surfaces. | Observer marks posture status, caches refresh across restarts, integration tests prove offline tolerance. |
|
||||
| ZASTAVA-OBS-12-004 | DONE (2025-10-24) | Zastava Observer Guild | ZASTAVA-OBS-12-002 | Batch `/runtime/events` submissions with disk-backed buffer, rate limits, and deterministic envelopes. | Buffered submissions survive restart, rate-limits enforced in tests, JSON envelopes match schema in docs/events. |
|
||||
| ZASTAVA-OBS-17-005 | DOING (2025-10-24) | Zastava Observer Guild | ZASTAVA-OBS-12-002 | Collect GNU build-id for ELF processes and attach it to emitted runtime events to enable symbol lookup + debug-store correlation. | Observer reads build-id via `/proc/<pid>/exe`/notes without pausing workloads, runtime events include `buildId` field, fixtures cover glibc/musl images, docs updated with retrieval notes. |
|
||||
|
||||
> 2025-10-24: Observer unit tests pending; `dotnet restore` requires offline copies of `Google.Protobuf`, `Grpc.Net.Client`, `Grpc.Tools` in `local-nuget` before execution can be verified.
|
||||
|
||||
26
src/StellaOps.Zastava.Observer/Worker/BackoffCalculator.cs
Normal file
26
src/StellaOps.Zastava.Observer/Worker/BackoffCalculator.cs
Normal file
@@ -0,0 +1,26 @@
|
||||
using StellaOps.Zastava.Observer.Configuration;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Worker;
|
||||
|
||||
internal static class BackoffCalculator
|
||||
{
|
||||
public static TimeSpan ComputeDelay(ObserverBackoffOptions options, int attempt, Random random)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
ArgumentNullException.ThrowIfNull(random);
|
||||
|
||||
var cappedAttempt = Math.Max(1, attempt);
|
||||
var baseDelayMs = options.Initial.TotalMilliseconds * Math.Pow(2, cappedAttempt - 1);
|
||||
baseDelayMs = Math.Min(baseDelayMs, options.Max.TotalMilliseconds);
|
||||
|
||||
if (options.JitterRatio <= 0)
|
||||
{
|
||||
return TimeSpan.FromMilliseconds(baseDelayMs);
|
||||
}
|
||||
|
||||
var jitterWindow = baseDelayMs * options.JitterRatio;
|
||||
var jitter = (random.NextDouble() * 2 - 1) * jitterWindow;
|
||||
var jittered = Math.Clamp(baseDelayMs + jitter, options.Initial.TotalMilliseconds, options.Max.TotalMilliseconds);
|
||||
return TimeSpan.FromMilliseconds(jittered);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,197 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Zastava.Core.Contracts;
|
||||
using StellaOps.Zastava.Core.Configuration;
|
||||
using StellaOps.Zastava.Core.Diagnostics;
|
||||
using StellaOps.Zastava.Observer.Configuration;
|
||||
using StellaOps.Zastava.Observer.ContainerRuntime;
|
||||
using StellaOps.Zastava.Observer.ContainerRuntime.Cri;
|
||||
using StellaOps.Zastava.Observer.Runtime;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Worker;
|
||||
|
||||
internal sealed class ContainerLifecycleHostedService : BackgroundService
|
||||
{
|
||||
private readonly ICriRuntimeClientFactory clientFactory;
|
||||
private readonly IOptionsMonitor<ZastavaObserverOptions> observerOptions;
|
||||
private readonly IOptionsMonitor<ZastavaRuntimeOptions> runtimeOptions;
|
||||
private readonly IZastavaLogScopeBuilder logScopeBuilder;
|
||||
private readonly IZastavaRuntimeMetrics runtimeMetrics;
|
||||
private readonly IRuntimeEventBuffer eventBuffer;
|
||||
private readonly ContainerStateTrackerFactory trackerFactory;
|
||||
private readonly ContainerRuntimePoller poller;
|
||||
private readonly IRuntimeProcessCollector processCollector;
|
||||
private readonly TimeProvider timeProvider;
|
||||
private readonly ILogger<ContainerLifecycleHostedService> logger;
|
||||
private readonly Random jitterRandom = new();
|
||||
|
||||
public ContainerLifecycleHostedService(
|
||||
ICriRuntimeClientFactory clientFactory,
|
||||
IOptionsMonitor<ZastavaObserverOptions> observerOptions,
|
||||
IOptionsMonitor<ZastavaRuntimeOptions> runtimeOptions,
|
||||
IZastavaLogScopeBuilder logScopeBuilder,
|
||||
IZastavaRuntimeMetrics runtimeMetrics,
|
||||
IRuntimeEventBuffer eventBuffer,
|
||||
ContainerStateTrackerFactory trackerFactory,
|
||||
ContainerRuntimePoller poller,
|
||||
IRuntimeProcessCollector processCollector,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<ContainerLifecycleHostedService> logger)
|
||||
{
|
||||
this.clientFactory = clientFactory ?? throw new ArgumentNullException(nameof(clientFactory));
|
||||
this.observerOptions = observerOptions ?? throw new ArgumentNullException(nameof(observerOptions));
|
||||
this.runtimeOptions = runtimeOptions ?? throw new ArgumentNullException(nameof(runtimeOptions));
|
||||
this.logScopeBuilder = logScopeBuilder ?? throw new ArgumentNullException(nameof(logScopeBuilder));
|
||||
this.runtimeMetrics = runtimeMetrics ?? throw new ArgumentNullException(nameof(runtimeMetrics));
|
||||
this.eventBuffer = eventBuffer ?? throw new ArgumentNullException(nameof(eventBuffer));
|
||||
this.trackerFactory = trackerFactory ?? throw new ArgumentNullException(nameof(trackerFactory));
|
||||
this.poller = poller ?? throw new ArgumentNullException(nameof(poller));
|
||||
this.processCollector = processCollector ?? throw new ArgumentNullException(nameof(processCollector));
|
||||
this.timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
|
||||
this.logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
protected override Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
var options = observerOptions.CurrentValue;
|
||||
var activeEndpoints = options.Runtimes
|
||||
.Where(static runtime => runtime.Enabled)
|
||||
.ToArray();
|
||||
|
||||
if (activeEndpoints.Length == 0)
|
||||
{
|
||||
logger.LogWarning("No container runtime endpoints configured; lifecycle watcher idle.");
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
var tasks = activeEndpoints
|
||||
.Select(endpoint => MonitorRuntimeAsync(endpoint, stoppingToken))
|
||||
.ToArray();
|
||||
|
||||
return Task.WhenAll(tasks);
|
||||
}
|
||||
|
||||
private async Task MonitorRuntimeAsync(ContainerRuntimeEndpointOptions endpoint, CancellationToken cancellationToken)
|
||||
{
|
||||
var runtime = runtimeOptions.CurrentValue;
|
||||
var tenant = runtime.Tenant;
|
||||
var nodeName = observerOptions.CurrentValue.NodeName;
|
||||
var pollInterval = endpoint.PollInterval ?? observerOptions.CurrentValue.PollInterval;
|
||||
var backoffOptions = observerOptions.CurrentValue.Backoff;
|
||||
|
||||
while (!cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
await using var client = clientFactory.Create(endpoint);
|
||||
CriRuntimeIdentity identity;
|
||||
try
|
||||
{
|
||||
identity = await client.GetIdentityAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex) when (!cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
await HandleFailureAsync(endpoint, 1, backoffOptions, ex, cancellationToken).ConfigureAwait(false);
|
||||
continue;
|
||||
}
|
||||
|
||||
var tracker = trackerFactory.Create();
|
||||
var failureCount = 0;
|
||||
|
||||
while (!cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
var envelopes = await poller.PollAsync(
|
||||
tracker,
|
||||
client,
|
||||
endpoint,
|
||||
identity,
|
||||
tenant,
|
||||
nodeName,
|
||||
timeProvider,
|
||||
processCollector,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (envelopes.Count > 0)
|
||||
{
|
||||
await PublishAsync(endpoint, envelopes, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
failureCount = 0;
|
||||
await Task.Delay(pollInterval, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return;
|
||||
}
|
||||
catch (Exception ex) when (!cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
failureCount++;
|
||||
await HandleFailureAsync(endpoint, failureCount, backoffOptions, ex, cancellationToken).ConfigureAwait(false);
|
||||
break; // recreate client
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async Task PublishAsync(ContainerRuntimeEndpointOptions endpoint, IReadOnlyList<RuntimeEventEnvelope> envelopes, CancellationToken cancellationToken)
|
||||
{
|
||||
var endpointName = endpoint.ResolveName();
|
||||
foreach (var envelope in envelopes)
|
||||
{
|
||||
var tags = runtimeMetrics.DefaultTags
|
||||
.Concat(new[]
|
||||
{
|
||||
new KeyValuePair<string, object?>("runtime_endpoint", endpointName),
|
||||
new KeyValuePair<string, object?>("event_kind", envelope.Event.Kind.ToString().ToLowerInvariant())
|
||||
})
|
||||
.ToArray();
|
||||
runtimeMetrics.RuntimeEvents.Add(1, tags);
|
||||
|
||||
var scope = logScopeBuilder.BuildScope(
|
||||
correlationId: envelope.Event.EventId,
|
||||
node: envelope.Event.Node,
|
||||
workload: envelope.Event.Workload.ContainerId,
|
||||
eventId: envelope.Event.EventId,
|
||||
additional: new Dictionary<string, string>
|
||||
{
|
||||
["runtimeEndpoint"] = endpointName,
|
||||
["kind"] = envelope.Event.Kind.ToString()
|
||||
});
|
||||
|
||||
using (logger.BeginScope(scope))
|
||||
{
|
||||
logger.LogInformation("Observed container {ContainerId} ({Kind}) for node {Node}.",
|
||||
envelope.Event.Workload.ContainerId,
|
||||
envelope.Event.Kind,
|
||||
envelope.Event.Node);
|
||||
}
|
||||
}
|
||||
|
||||
await eventBuffer.WriteBatchAsync(envelopes, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private async Task HandleFailureAsync(
|
||||
ContainerRuntimeEndpointOptions endpoint,
|
||||
int failureCount,
|
||||
ObserverBackoffOptions backoffOptions,
|
||||
Exception exception,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var delay = BackoffCalculator.ComputeDelay(backoffOptions, failureCount, jitterRandom);
|
||||
logger.LogWarning(exception, "Runtime watcher for {Endpoint} encountered error (attempt {Attempt}); retrying after {Delay}.",
|
||||
endpoint.ResolveName(),
|
||||
failureCount,
|
||||
delay);
|
||||
|
||||
try
|
||||
{
|
||||
await Task.Delay(delay, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
||||
124
src/StellaOps.Zastava.Observer/Worker/ContainerRuntimePoller.cs
Normal file
124
src/StellaOps.Zastava.Observer/Worker/ContainerRuntimePoller.cs
Normal file
@@ -0,0 +1,124 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Zastava.Core.Contracts;
|
||||
using StellaOps.Zastava.Observer.Configuration;
|
||||
using StellaOps.Zastava.Observer.ContainerRuntime;
|
||||
using StellaOps.Zastava.Observer.ContainerRuntime.Cri;
|
||||
using StellaOps.Zastava.Observer.Cri;
|
||||
using StellaOps.Zastava.Observer.Posture;
|
||||
using StellaOps.Zastava.Observer.Runtime;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Worker;
|
||||
|
||||
internal sealed class ContainerRuntimePoller
|
||||
{
|
||||
private readonly ILogger<ContainerRuntimePoller> logger;
|
||||
private readonly IRuntimePostureEvaluator? postureEvaluator;
|
||||
|
||||
public ContainerRuntimePoller(ILogger<ContainerRuntimePoller> logger, IRuntimePostureEvaluator? postureEvaluator = null)
|
||||
{
|
||||
this.logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
this.postureEvaluator = postureEvaluator;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<RuntimeEventEnvelope>> PollAsync(
|
||||
ContainerStateTracker tracker,
|
||||
ICriRuntimeClient client,
|
||||
ContainerRuntimeEndpointOptions endpoint,
|
||||
CriRuntimeIdentity identity,
|
||||
string tenant,
|
||||
string nodeName,
|
||||
TimeProvider timeProvider,
|
||||
IRuntimeProcessCollector? processCollector,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(tracker);
|
||||
ArgumentNullException.ThrowIfNull(client);
|
||||
ArgumentNullException.ThrowIfNull(endpoint);
|
||||
ArgumentNullException.ThrowIfNull(identity);
|
||||
ArgumentNullException.ThrowIfNull(timeProvider);
|
||||
|
||||
var pollTimestamp = timeProvider.GetUtcNow();
|
||||
tracker.BeginCycle();
|
||||
|
||||
var runningContainers = await client.ListContainersAsync(ContainerState.ContainerRunning, cancellationToken).ConfigureAwait(false);
|
||||
var generated = new List<RuntimeEventEnvelope>();
|
||||
|
||||
if (runningContainers.Count > 0)
|
||||
{
|
||||
foreach (var container in runningContainers)
|
||||
{
|
||||
var enriched = container;
|
||||
var status = await client.GetContainerStatusAsync(container.Id, cancellationToken).ConfigureAwait(false);
|
||||
if (status is not null)
|
||||
{
|
||||
enriched = status;
|
||||
}
|
||||
|
||||
var lifecycleEvent = tracker.MarkRunning(enriched, pollTimestamp);
|
||||
if (lifecycleEvent is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
RuntimeProcessCapture? capture = null;
|
||||
if (processCollector is not null && lifecycleEvent.Kind == ContainerLifecycleEventKind.Start)
|
||||
{
|
||||
capture = await processCollector.CollectAsync(enriched, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
RuntimePostureEvaluationResult? posture = null;
|
||||
if (this.postureEvaluator is not null)
|
||||
{
|
||||
posture = await this.postureEvaluator.EvaluateAsync(enriched, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
generated.Add(RuntimeEventFactory.Create(
|
||||
lifecycleEvent,
|
||||
endpoint,
|
||||
identity,
|
||||
tenant,
|
||||
nodeName,
|
||||
capture,
|
||||
posture?.Posture,
|
||||
posture?.Evidence));
|
||||
}
|
||||
}
|
||||
|
||||
var stopEvents = await tracker.CompleteCycleAsync(
|
||||
id => client.GetContainerStatusAsync(id, cancellationToken),
|
||||
pollTimestamp,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
foreach (var lifecycleEvent in stopEvents)
|
||||
{
|
||||
RuntimePostureEvaluationResult? posture = null;
|
||||
if (this.postureEvaluator is not null)
|
||||
{
|
||||
posture = await this.postureEvaluator.EvaluateAsync(lifecycleEvent.Snapshot, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
generated.Add(RuntimeEventFactory.Create(
|
||||
lifecycleEvent,
|
||||
endpoint,
|
||||
identity,
|
||||
tenant,
|
||||
nodeName,
|
||||
null,
|
||||
posture?.Posture,
|
||||
posture?.Evidence));
|
||||
}
|
||||
|
||||
if (generated.Count == 0)
|
||||
{
|
||||
return Array.Empty<RuntimeEventEnvelope>();
|
||||
}
|
||||
|
||||
var ordered = generated
|
||||
.OrderBy(static envelope => envelope.Event.When)
|
||||
.ThenBy(static envelope => envelope.Event.Workload.ContainerId, StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
|
||||
logger.LogDebug("Generated {Count} runtime events for endpoint {EndpointName}.", ordered.Length, endpoint.ResolveName());
|
||||
return ordered;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,225 @@
|
||||
using System.Linq;
|
||||
using System.Net;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Zastava.Observer.Backend;
|
||||
using StellaOps.Zastava.Observer.Configuration;
|
||||
using StellaOps.Zastava.Observer.Runtime;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Worker;
|
||||
|
||||
internal sealed class RuntimeEventDispatchService : BackgroundService
|
||||
{
|
||||
private readonly IRuntimeEventBuffer buffer;
|
||||
private readonly IRuntimeEventsClient eventsClient;
|
||||
private readonly IOptionsMonitor<ZastavaObserverOptions> observerOptions;
|
||||
private readonly TimeProvider timeProvider;
|
||||
private readonly ILogger<RuntimeEventDispatchService> logger;
|
||||
|
||||
public RuntimeEventDispatchService(
|
||||
IRuntimeEventBuffer buffer,
|
||||
IRuntimeEventsClient eventsClient,
|
||||
IOptionsMonitor<ZastavaObserverOptions> observerOptions,
|
||||
TimeProvider timeProvider,
|
||||
ILogger<RuntimeEventDispatchService> logger)
|
||||
{
|
||||
this.buffer = buffer ?? throw new ArgumentNullException(nameof(buffer));
|
||||
this.eventsClient = eventsClient ?? throw new ArgumentNullException(nameof(eventsClient));
|
||||
this.observerOptions = observerOptions ?? throw new ArgumentNullException(nameof(observerOptions));
|
||||
this.timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
|
||||
this.logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
var batch = new List<RuntimeEventBufferItem>();
|
||||
var enumerator = buffer.ReadAllAsync(stoppingToken).GetAsyncEnumerator(stoppingToken);
|
||||
Task<bool>? moveNextTask = null;
|
||||
Task? flushDelayTask = null;
|
||||
CancellationTokenSource? flushDelayCts = null;
|
||||
|
||||
try
|
||||
{
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
moveNextTask ??= enumerator.MoveNextAsync().AsTask();
|
||||
|
||||
if (batch.Count > 0 && flushDelayTask is null)
|
||||
{
|
||||
StartFlushTimer(ref flushDelayTask, ref flushDelayCts, stoppingToken);
|
||||
}
|
||||
|
||||
Task completedTask;
|
||||
if (flushDelayTask is null)
|
||||
{
|
||||
completedTask = await Task.WhenAny(moveNextTask).ConfigureAwait(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
completedTask = await Task.WhenAny(moveNextTask, flushDelayTask).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
if (completedTask == moveNextTask)
|
||||
{
|
||||
if (!await moveNextTask.ConfigureAwait(false))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
var item = enumerator.Current;
|
||||
batch.Add(item);
|
||||
moveNextTask = null;
|
||||
|
||||
var options = observerOptions.CurrentValue;
|
||||
var batchSize = Math.Clamp(options.PublishBatchSize, 1, 512);
|
||||
if (batch.Count >= batchSize)
|
||||
{
|
||||
ResetFlushTimer(ref flushDelayTask, ref flushDelayCts);
|
||||
await FlushAsync(batch, stoppingToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// flush timer triggered
|
||||
ResetFlushTimer(ref flushDelayTask, ref flushDelayCts);
|
||||
if (batch.Count > 0)
|
||||
{
|
||||
await FlushAsync(batch, stoppingToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
ResetFlushTimer(ref flushDelayTask, ref flushDelayCts);
|
||||
|
||||
if (batch.Count > 0 && !stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
await FlushAsync(batch, stoppingToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
if (moveNextTask is not null)
|
||||
{
|
||||
try { await moveNextTask.ConfigureAwait(false); }
|
||||
catch { /* ignored */ }
|
||||
}
|
||||
|
||||
await enumerator.DisposeAsync().ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task FlushAsync(List<RuntimeEventBufferItem> batch, CancellationToken cancellationToken)
|
||||
{
|
||||
if (batch.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var request = new RuntimeEventsIngestRequest
|
||||
{
|
||||
BatchId = $"obs-{timeProvider.GetUtcNow():yyyyMMddTHHmmssfff}-{Guid.NewGuid():N}",
|
||||
Events = batch.Select(item => item.Envelope).ToArray()
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
var result = await eventsClient.PublishAsync(request, cancellationToken).ConfigureAwait(false);
|
||||
if (result.Success)
|
||||
{
|
||||
foreach (var item in batch)
|
||||
{
|
||||
await item.CompleteAsync().ConfigureAwait(false);
|
||||
}
|
||||
|
||||
logger.LogInformation("Runtime events batch published (batchId={BatchId}, accepted={Accepted}, duplicates={Duplicates}).",
|
||||
request.BatchId,
|
||||
result.Accepted,
|
||||
result.Duplicates);
|
||||
}
|
||||
else if (result.RateLimited)
|
||||
{
|
||||
await RequeueBatchAsync(batch, cancellationToken).ConfigureAwait(false);
|
||||
await DelayAsync(result.RetryAfter, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
catch (RuntimeEventsException ex) when (!cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
logger.LogWarning(ex, "Runtime events publish failed (status={StatusCode}); batch will be retried.", (int)ex.StatusCode);
|
||||
await RequeueBatchAsync(batch, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var backoff = ex.StatusCode == HttpStatusCode.ServiceUnavailable
|
||||
? TimeSpan.FromSeconds(5)
|
||||
: TimeSpan.FromSeconds(2);
|
||||
|
||||
await DelayAsync(backoff, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex) when (!cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
logger.LogWarning(ex, "Runtime events publish encountered an unexpected error; batch will be retried.");
|
||||
await RequeueBatchAsync(batch, cancellationToken).ConfigureAwait(false);
|
||||
await DelayAsync(TimeSpan.FromSeconds(5), cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
finally
|
||||
{
|
||||
batch.Clear();
|
||||
}
|
||||
}
|
||||
|
||||
private async Task RequeueBatchAsync(IEnumerable<RuntimeEventBufferItem> batch, CancellationToken cancellationToken)
|
||||
{
|
||||
foreach (var item in batch)
|
||||
{
|
||||
try
|
||||
{
|
||||
await item.RequeueAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogWarning(ex, "Failed to requeue runtime event {EventId}; dropping.", item.Envelope.Event.EventId);
|
||||
await item.CompleteAsync().ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async Task DelayAsync(TimeSpan delay, CancellationToken cancellationToken)
|
||||
{
|
||||
if (delay <= TimeSpan.Zero)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await Task.Delay(delay, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
private void StartFlushTimer(ref Task? flushTask, ref CancellationTokenSource? cts, CancellationToken stoppingToken)
|
||||
{
|
||||
var options = observerOptions.CurrentValue;
|
||||
var flushIntervalSeconds = Math.Clamp(options.PublishFlushIntervalSeconds, 0.1, 30);
|
||||
var flushInterval = TimeSpan.FromSeconds(flushIntervalSeconds);
|
||||
|
||||
cts = CancellationTokenSource.CreateLinkedTokenSource(stoppingToken);
|
||||
flushTask = Task.Delay(flushInterval, cts.Token);
|
||||
}
|
||||
|
||||
private void ResetFlushTimer(ref Task? flushTask, ref CancellationTokenSource? cts)
|
||||
{
|
||||
if (cts is not null)
|
||||
{
|
||||
try { cts.Cancel(); } catch { /* ignore */ }
|
||||
cts.Dispose();
|
||||
cts = null;
|
||||
}
|
||||
flushTask = null;
|
||||
}
|
||||
}
|
||||
148
src/StellaOps.Zastava.Observer/Worker/RuntimeEventFactory.cs
Normal file
148
src/StellaOps.Zastava.Observer/Worker/RuntimeEventFactory.cs
Normal file
@@ -0,0 +1,148 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using StellaOps.Zastava.Core.Contracts;
|
||||
using StellaOps.Zastava.Observer.Configuration;
|
||||
using StellaOps.Zastava.Observer.ContainerRuntime;
|
||||
using StellaOps.Zastava.Observer.ContainerRuntime.Cri;
|
||||
using StellaOps.Zastava.Observer.Runtime;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Worker;
|
||||
|
||||
internal static class RuntimeEventFactory
|
||||
{
|
||||
public static RuntimeEventEnvelope Create(
|
||||
ContainerLifecycleEvent lifecycleEvent,
|
||||
ContainerRuntimeEndpointOptions endpoint,
|
||||
CriRuntimeIdentity identity,
|
||||
string tenant,
|
||||
string nodeName,
|
||||
RuntimeProcessCapture? capture = null,
|
||||
RuntimePosture? posture = null,
|
||||
IReadOnlyList<RuntimeEvidence>? additionalEvidence = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(lifecycleEvent);
|
||||
ArgumentNullException.ThrowIfNull(endpoint);
|
||||
ArgumentNullException.ThrowIfNull(identity);
|
||||
ArgumentNullException.ThrowIfNull(tenant);
|
||||
ArgumentNullException.ThrowIfNull(nodeName);
|
||||
|
||||
var snapshot = lifecycleEvent.Snapshot;
|
||||
var workloadLabels = snapshot.Labels ?? new Dictionary<string, string>(StringComparer.Ordinal);
|
||||
var annotations = snapshot.Annotations is null
|
||||
? new Dictionary<string, string>(StringComparer.Ordinal)
|
||||
: new Dictionary<string, string>(snapshot.Annotations, StringComparer.Ordinal);
|
||||
|
||||
var platform = ResolvePlatform(workloadLabels, endpoint);
|
||||
var runtimeEvent = new RuntimeEvent
|
||||
{
|
||||
EventId = ComputeEventId(nodeName, lifecycleEvent),
|
||||
When = lifecycleEvent.Timestamp,
|
||||
Kind = lifecycleEvent.Kind == ContainerLifecycleEventKind.Start
|
||||
? RuntimeEventKind.ContainerStart
|
||||
: RuntimeEventKind.ContainerStop,
|
||||
Tenant = tenant,
|
||||
Node = nodeName,
|
||||
Runtime = new RuntimeEngine
|
||||
{
|
||||
Engine = endpoint.Engine.ToEngineString(),
|
||||
Version = identity.RuntimeVersion
|
||||
},
|
||||
Workload = new RuntimeWorkload
|
||||
{
|
||||
Platform = platform,
|
||||
Namespace = TryGet(workloadLabels, CriLabelKeys.PodNamespace),
|
||||
Pod = TryGet(workloadLabels, CriLabelKeys.PodName),
|
||||
Container = TryGet(workloadLabels, CriLabelKeys.ContainerName) ?? snapshot.Name,
|
||||
ContainerId = $"{endpoint.Engine.ToEngineString()}://{snapshot.Id}",
|
||||
ImageRef = ResolveImageRef(snapshot),
|
||||
Owner = null
|
||||
},
|
||||
Process = capture?.Process,
|
||||
LoadedLibraries = capture?.Libraries ?? Array.Empty<RuntimeLoadedLibrary>(),
|
||||
Posture = posture,
|
||||
Evidence = MergeEvidence(capture?.Evidence, additionalEvidence),
|
||||
Annotations = annotations.Count == 0 ? null : new SortedDictionary<string, string>(annotations, StringComparer.Ordinal)
|
||||
};
|
||||
|
||||
return RuntimeEventEnvelope.Create(runtimeEvent, ZastavaContractVersions.RuntimeEvent);
|
||||
}
|
||||
|
||||
private static string ResolvePlatform(IReadOnlyDictionary<string, string> labels, ContainerRuntimeEndpointOptions endpoint)
|
||||
{
|
||||
if (labels.ContainsKey(CriLabelKeys.PodName))
|
||||
{
|
||||
return "kubernetes";
|
||||
}
|
||||
|
||||
return endpoint.Engine.ToEngineString();
|
||||
}
|
||||
|
||||
private static IReadOnlyList<RuntimeEvidence> MergeEvidence(
|
||||
IReadOnlyList<RuntimeEvidence>? primary,
|
||||
IReadOnlyList<RuntimeEvidence>? secondary)
|
||||
{
|
||||
if ((primary is null || primary.Count == 0) && (secondary is null || secondary.Count == 0))
|
||||
{
|
||||
return Array.Empty<RuntimeEvidence>();
|
||||
}
|
||||
|
||||
if (secondary is null || secondary.Count == 0)
|
||||
{
|
||||
return primary ?? Array.Empty<RuntimeEvidence>();
|
||||
}
|
||||
|
||||
if (primary is null || primary.Count == 0)
|
||||
{
|
||||
return secondary;
|
||||
}
|
||||
|
||||
var merged = new List<RuntimeEvidence>(primary.Count + secondary.Count);
|
||||
merged.AddRange(primary);
|
||||
merged.AddRange(secondary);
|
||||
return merged;
|
||||
}
|
||||
|
||||
private static string? ResolveImageRef(CriContainerInfo snapshot)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(snapshot.ImageRef))
|
||||
{
|
||||
return snapshot.ImageRef;
|
||||
}
|
||||
|
||||
return snapshot.Image;
|
||||
}
|
||||
|
||||
private static string? TryGet(IReadOnlyDictionary<string, string> dictionary, string key)
|
||||
{
|
||||
if (dictionary.TryGetValue(key, out var value) && !string.IsNullOrWhiteSpace(value))
|
||||
{
|
||||
return value;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string ComputeEventId(string nodeName, ContainerLifecycleEvent lifecycleEvent)
|
||||
{
|
||||
var builder = new StringBuilder()
|
||||
.Append(nodeName)
|
||||
.Append('|')
|
||||
.Append(lifecycleEvent.Snapshot.Id)
|
||||
.Append('|')
|
||||
.Append(lifecycleEvent.Timestamp.ToUniversalTime().Ticks)
|
||||
.Append('|')
|
||||
.Append((int)lifecycleEvent.Kind);
|
||||
|
||||
var bytes = Encoding.UTF8.GetBytes(builder.ToString());
|
||||
Span<byte> hash = stackalloc byte[16];
|
||||
if (!MD5.TryHashData(bytes, hash, out _))
|
||||
{
|
||||
using var md5 = MD5.Create();
|
||||
hash = md5.ComputeHash(bytes).AsSpan(0, 16);
|
||||
}
|
||||
|
||||
var guid = new Guid(hash);
|
||||
return guid.ToString("N");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user