This commit is contained in:
@@ -0,0 +1,128 @@
|
||||
using System.ComponentModel.DataAnnotations;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Configuration;
|
||||
|
||||
/// <summary>
|
||||
/// Observer-specific configuration applied on top of the shared runtime options.
|
||||
/// </summary>
|
||||
public sealed class ZastavaObserverOptions
|
||||
{
|
||||
public const string SectionName = "zastava:observer";
|
||||
|
||||
private const string DefaultContainerdSocket = "unix:///run/containerd/containerd.sock";
|
||||
|
||||
/// <summary>
|
||||
/// Logical node identifier emitted with runtime events (defaults to environment hostname).
|
||||
/// </summary>
|
||||
[Required(AllowEmptyStrings = false)]
|
||||
public string NodeName { get; set; } =
|
||||
Environment.GetEnvironmentVariable("ZASTAVA_NODE_NAME")
|
||||
?? Environment.GetEnvironmentVariable("KUBERNETES_NODE_NAME")
|
||||
?? Environment.MachineName;
|
||||
|
||||
/// <summary>
|
||||
/// Baseline polling interval when watching CRI runtimes.
|
||||
/// </summary>
|
||||
[Range(typeof(TimeSpan), "00:00:01", "00:10:00")]
|
||||
public TimeSpan PollInterval { get; set; } = TimeSpan.FromSeconds(2);
|
||||
|
||||
/// <summary>
|
||||
/// Maximum number of runtime events held in the in-memory buffer.
|
||||
/// </summary>
|
||||
[Range(16, 65536)]
|
||||
public int MaxInMemoryBuffer { get; set; } = 2048;
|
||||
|
||||
/// <summary>
|
||||
/// Number of runtime events drained in one batch by downstream publishers.
|
||||
/// </summary>
|
||||
[Range(1, 512)]
|
||||
public int PublishBatchSize { get; set; } = 32;
|
||||
|
||||
/// <summary>
|
||||
/// Connectivity/backoff settings applied when CRI endpoints fail temporarily.
|
||||
/// </summary>
|
||||
[Required]
|
||||
public ObserverBackoffOptions Backoff { get; set; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// CRI runtime endpoints to monitor.
|
||||
/// </summary>
|
||||
[Required]
|
||||
public IList<ContainerRuntimeEndpointOptions> Runtimes { get; set; } = new List<ContainerRuntimeEndpointOptions>
|
||||
{
|
||||
new()
|
||||
{
|
||||
Name = "containerd",
|
||||
Engine = ContainerRuntimeEngine.Containerd,
|
||||
Endpoint = DefaultContainerdSocket,
|
||||
Enabled = true
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public sealed class ObserverBackoffOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Initial backoff delay applied after the first failure.
|
||||
/// </summary>
|
||||
[Range(typeof(TimeSpan), "00:00:01", "00:05:00")]
|
||||
public TimeSpan Initial { get; set; } = TimeSpan.FromSeconds(1);
|
||||
|
||||
/// <summary>
|
||||
/// Maximum backoff delay after repeated failures.
|
||||
/// </summary>
|
||||
[Range(typeof(TimeSpan), "00:00:01", "00:10:00")]
|
||||
public TimeSpan Max { get; set; } = TimeSpan.FromSeconds(30);
|
||||
|
||||
/// <summary>
|
||||
/// Jitter ratio applied to the computed delay (0 disables jitter).
|
||||
/// </summary>
|
||||
[Range(0.0, 0.5)]
|
||||
public double JitterRatio { get; set; } = 0.2;
|
||||
}
|
||||
|
||||
public sealed class ContainerRuntimeEndpointOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Friendly name used for logging/metrics (defaults to engine identifier).
|
||||
/// </summary>
|
||||
public string? Name { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Runtime engine backing the endpoint.
|
||||
/// </summary>
|
||||
public ContainerRuntimeEngine Engine { get; set; } = ContainerRuntimeEngine.Containerd;
|
||||
|
||||
/// <summary>
|
||||
/// Endpoint URI (unix:///run/containerd/containerd.sock, npipe://./pipe/dockershim, https://127.0.0.1:1234, ...).
|
||||
/// </summary>
|
||||
[Required(AllowEmptyStrings = false)]
|
||||
public string Endpoint { get; set; } = "unix:///run/containerd/containerd.sock";
|
||||
|
||||
/// <summary>
|
||||
/// Optional explicit polling interval for this endpoint (falls back to global PollInterval).
|
||||
/// </summary>
|
||||
[Range(typeof(TimeSpan), "00:00:01", "00:10:00")]
|
||||
public TimeSpan? PollInterval { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional connection timeout override.
|
||||
/// </summary>
|
||||
[Range(typeof(TimeSpan), "00:00:01", "00:01:00")]
|
||||
public TimeSpan? ConnectTimeout { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Flag to allow disabling endpoints without removing configuration entries.
|
||||
/// </summary>
|
||||
public bool Enabled { get; set; } = true;
|
||||
|
||||
public string ResolveName()
|
||||
=> string.IsNullOrWhiteSpace(Name) ? Engine.ToString().ToLowerInvariant() : Name!;
|
||||
}
|
||||
|
||||
public enum ContainerRuntimeEngine
|
||||
{
|
||||
Containerd,
|
||||
CriO,
|
||||
Docker
|
||||
}
|
||||
@@ -0,0 +1,134 @@
|
||||
using StellaOps.Zastava.Observer.ContainerRuntime.Cri;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.ContainerRuntime;
|
||||
|
||||
internal sealed class ContainerStateTracker
|
||||
{
|
||||
private readonly Dictionary<string, ContainerStateEntry> entries = new(StringComparer.Ordinal);
|
||||
|
||||
public void BeginCycle()
|
||||
{
|
||||
foreach (var entry in entries.Values)
|
||||
{
|
||||
entry.SeenInCycle = false;
|
||||
}
|
||||
}
|
||||
|
||||
public ContainerLifecycleEvent? MarkRunning(CriContainerInfo snapshot, DateTimeOffset fallbackTimestamp)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(snapshot);
|
||||
var timestamp = snapshot.StartedAt ?? snapshot.CreatedAt;
|
||||
if (timestamp <= DateTimeOffset.MinValue)
|
||||
{
|
||||
timestamp = fallbackTimestamp;
|
||||
}
|
||||
|
||||
if (!entries.TryGetValue(snapshot.Id, out var entry))
|
||||
{
|
||||
entry = new ContainerStateEntry(snapshot);
|
||||
entries[snapshot.Id] = entry;
|
||||
entry.SeenInCycle = true;
|
||||
entry.State = ContainerLifecycleState.Running;
|
||||
entry.LastStart = timestamp;
|
||||
entry.LastSnapshot = snapshot;
|
||||
return new ContainerLifecycleEvent(ContainerLifecycleEventKind.Start, timestamp, snapshot);
|
||||
}
|
||||
|
||||
entry.SeenInCycle = true;
|
||||
|
||||
if (timestamp > entry.LastStart)
|
||||
{
|
||||
entry.LastStart = timestamp;
|
||||
entry.State = ContainerLifecycleState.Running;
|
||||
entry.LastSnapshot = snapshot;
|
||||
return new ContainerLifecycleEvent(ContainerLifecycleEventKind.Start, timestamp, snapshot);
|
||||
}
|
||||
|
||||
entry.State = ContainerLifecycleState.Running;
|
||||
entry.LastSnapshot = snapshot;
|
||||
return null;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<ContainerLifecycleEvent>> CompleteCycleAsync(
|
||||
Func<string, Task<CriContainerInfo?>> statusProvider,
|
||||
DateTimeOffset fallbackTimestamp,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(statusProvider);
|
||||
|
||||
var events = new List<ContainerLifecycleEvent>();
|
||||
foreach (var (containerId, entry) in entries.ToArray())
|
||||
{
|
||||
if (entry.SeenInCycle)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
CriContainerInfo? status = null;
|
||||
if (entry.LastSnapshot is not null && entry.LastSnapshot.FinishedAt is not null)
|
||||
{
|
||||
status = entry.LastSnapshot;
|
||||
}
|
||||
else
|
||||
{
|
||||
status = await statusProvider(containerId).ConfigureAwait(false) ?? entry.LastSnapshot;
|
||||
}
|
||||
|
||||
var stopTimestamp = status?.FinishedAt ?? fallbackTimestamp;
|
||||
if (stopTimestamp <= DateTimeOffset.MinValue)
|
||||
{
|
||||
stopTimestamp = fallbackTimestamp;
|
||||
}
|
||||
|
||||
if (entry.LastStop is not null && stopTimestamp <= entry.LastStop)
|
||||
{
|
||||
entries.Remove(containerId);
|
||||
continue;
|
||||
}
|
||||
|
||||
var snapshot = status ?? entry.LastSnapshot ?? entry.MetadataFallback;
|
||||
var stopEvent = new ContainerLifecycleEvent(ContainerLifecycleEventKind.Stop, stopTimestamp, snapshot);
|
||||
events.Add(stopEvent);
|
||||
|
||||
entry.LastStop = stopTimestamp;
|
||||
entry.State = ContainerLifecycleState.Stopped;
|
||||
entries.Remove(containerId);
|
||||
}
|
||||
|
||||
return events
|
||||
.OrderBy(static e => e.Timestamp)
|
||||
.ThenBy(static e => e.Snapshot.Id, StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
private sealed class ContainerStateEntry
|
||||
{
|
||||
public ContainerStateEntry(CriContainerInfo seed)
|
||||
{
|
||||
MetadataFallback = seed;
|
||||
LastSnapshot = seed;
|
||||
}
|
||||
|
||||
public ContainerLifecycleState State { get; set; } = ContainerLifecycleState.Unknown;
|
||||
public bool SeenInCycle { get; set; }
|
||||
public DateTimeOffset LastStart { get; set; } = DateTimeOffset.MinValue;
|
||||
public DateTimeOffset? LastStop { get; set; }
|
||||
public CriContainerInfo MetadataFallback { get; }
|
||||
public CriContainerInfo? LastSnapshot { get; set; }
|
||||
}
|
||||
}
|
||||
|
||||
internal enum ContainerLifecycleState
|
||||
{
|
||||
Unknown,
|
||||
Running,
|
||||
Stopped
|
||||
}
|
||||
|
||||
internal sealed record ContainerLifecycleEvent(ContainerLifecycleEventKind Kind, DateTimeOffset Timestamp, CriContainerInfo Snapshot);
|
||||
|
||||
internal enum ContainerLifecycleEventKind
|
||||
{
|
||||
Start,
|
||||
Stop
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
using StellaOps.Zastava.Observer.Cri;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.ContainerRuntime.Cri;
|
||||
|
||||
internal static class CriConversions
|
||||
{
|
||||
private const long NanosecondsPerTick = 100;
|
||||
|
||||
public static CriContainerInfo ToContainerInfo(Container container)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(container);
|
||||
|
||||
return new CriContainerInfo(
|
||||
Id: container.Id ?? string.Empty,
|
||||
PodSandboxId: container.PodSandboxId ?? string.Empty,
|
||||
Name: container.Metadata?.Name ?? string.Empty,
|
||||
Attempt: container.Metadata?.Attempt ?? 0,
|
||||
Image: container.Image?.Image,
|
||||
ImageRef: container.ImageRef,
|
||||
Labels: container.Labels?.ToDictionary(static pair => pair.Key, static pair => pair.Value, StringComparer.Ordinal) ?? new Dictionary<string, string>(StringComparer.Ordinal),
|
||||
Annotations: container.Annotations?.ToDictionary(static pair => pair.Key, static pair => pair.Value, StringComparer.Ordinal) ?? new Dictionary<string, string>(StringComparer.Ordinal),
|
||||
CreatedAt: FromUnixNanoseconds(container.CreatedAt),
|
||||
StartedAt: null,
|
||||
FinishedAt: null,
|
||||
ExitCode: null,
|
||||
Reason: null,
|
||||
Message: null);
|
||||
}
|
||||
|
||||
public static CriContainerInfo MergeStatus(CriContainerInfo baseline, ContainerStatus? status)
|
||||
{
|
||||
if (status is null)
|
||||
{
|
||||
return baseline;
|
||||
}
|
||||
|
||||
var labels = status.Labels?.ToDictionary(static pair => pair.Key, static pair => pair.Value, StringComparer.Ordinal)
|
||||
?? baseline.Labels;
|
||||
var annotations = status.Annotations?.ToDictionary(static pair => pair.Key, static pair => pair.Value, StringComparer.Ordinal)
|
||||
?? baseline.Annotations;
|
||||
|
||||
return baseline with
|
||||
{
|
||||
CreatedAt = status.CreatedAt > 0 ? FromUnixNanoseconds(status.CreatedAt) : baseline.CreatedAt,
|
||||
StartedAt = status.StartedAt > 0 ? FromUnixNanoseconds(status.StartedAt) : baseline.StartedAt,
|
||||
FinishedAt = status.FinishedAt > 0 ? FromUnixNanoseconds(status.FinishedAt) : baseline.FinishedAt,
|
||||
ExitCode = status.ExitCode != 0 ? status.ExitCode : baseline.ExitCode,
|
||||
Reason = string.IsNullOrWhiteSpace(status.Reason) ? baseline.Reason : status.Reason,
|
||||
Message = string.IsNullOrWhiteSpace(status.Message) ? baseline.Message : status.Message,
|
||||
Image: status.Image?.Image ?? baseline.Image,
|
||||
ImageRef: string.IsNullOrWhiteSpace(status.ImageRef) ? baseline.ImageRef : status.ImageRef,
|
||||
Labels = labels,
|
||||
Annotations = annotations
|
||||
};
|
||||
}
|
||||
|
||||
public static DateTimeOffset FromUnixNanoseconds(long nanoseconds)
|
||||
{
|
||||
if (nanoseconds <= 0)
|
||||
{
|
||||
return DateTimeOffset.MinValue;
|
||||
}
|
||||
|
||||
var seconds = Math.DivRem(nanoseconds, 1_000_000_000, out var remainder);
|
||||
var ticks = remainder / NanosecondsPerTick;
|
||||
try
|
||||
{
|
||||
var baseTime = DateTimeOffset.FromUnixTimeSeconds(seconds);
|
||||
return baseTime.AddTicks(ticks);
|
||||
}
|
||||
catch (ArgumentOutOfRangeException)
|
||||
{
|
||||
return DateTimeOffset.UnixEpoch;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
using StellaOps.Zastava.Observer.Configuration;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.ContainerRuntime.Cri;
|
||||
|
||||
internal sealed record CriRuntimeIdentity(
|
||||
string RuntimeName,
|
||||
string RuntimeVersion,
|
||||
string RuntimeApiVersion);
|
||||
|
||||
internal sealed record CriContainerInfo(
|
||||
string Id,
|
||||
string PodSandboxId,
|
||||
string Name,
|
||||
uint Attempt,
|
||||
string? Image,
|
||||
string? ImageRef,
|
||||
IReadOnlyDictionary<string, string> Labels,
|
||||
IReadOnlyDictionary<string, string> Annotations,
|
||||
DateTimeOffset CreatedAt,
|
||||
DateTimeOffset? StartedAt,
|
||||
DateTimeOffset? FinishedAt,
|
||||
int? ExitCode,
|
||||
string? Reason,
|
||||
string? Message);
|
||||
|
||||
internal static class CriLabelKeys
|
||||
{
|
||||
public const string PodName = "io.kubernetes.pod.name";
|
||||
public const string PodNamespace = "io.kubernetes.pod.namespace";
|
||||
public const string PodUid = "io.kubernetes.pod.uid";
|
||||
public const string ContainerName = "io.kubernetes.container.name";
|
||||
}
|
||||
|
||||
internal static class ContainerRuntimeEngineExtensions
|
||||
{
|
||||
public static string ToEngineString(this ContainerRuntimeEngine engine)
|
||||
=> engine switch
|
||||
{
|
||||
ContainerRuntimeEngine.Containerd => "containerd",
|
||||
ContainerRuntimeEngine.CriO => "cri-o",
|
||||
ContainerRuntimeEngine.Docker => "docker",
|
||||
_ => "unknown"
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,213 @@
|
||||
using System.IO;
|
||||
using System.Net.Sockets;
|
||||
using System.Linq;
|
||||
using Grpc.Core;
|
||||
using Grpc.Net.Client;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Zastava.Observer.Configuration;
|
||||
using StellaOps.Zastava.Observer.Cri;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.ContainerRuntime.Cri;
|
||||
|
||||
internal interface ICriRuntimeClient : IAsyncDisposable
|
||||
{
|
||||
ContainerRuntimeEndpointOptions Endpoint { get; }
|
||||
Task<CriRuntimeIdentity> GetIdentityAsync(CancellationToken cancellationToken);
|
||||
Task<IReadOnlyList<CriContainerInfo>> ListContainersAsync(ContainerState state, CancellationToken cancellationToken);
|
||||
Task<CriContainerInfo?> GetContainerStatusAsync(string containerId, CancellationToken cancellationToken);
|
||||
}
|
||||
|
||||
internal sealed class CriRuntimeClient : ICriRuntimeClient
|
||||
{
|
||||
private static readonly object SwitchLock = new();
|
||||
private static bool http2SwitchApplied;
|
||||
|
||||
private readonly GrpcChannel channel;
|
||||
private readonly RuntimeService.RuntimeServiceClient client;
|
||||
private readonly ILogger<CriRuntimeClient> logger;
|
||||
|
||||
public CriRuntimeClient(ContainerRuntimeEndpointOptions endpoint, ILogger<CriRuntimeClient> logger)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(endpoint);
|
||||
this.logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
Endpoint = endpoint;
|
||||
|
||||
EnsureHttp2Switch();
|
||||
channel = CreateChannel(endpoint);
|
||||
client = new RuntimeService.RuntimeServiceClient(channel);
|
||||
}
|
||||
|
||||
public ContainerRuntimeEndpointOptions Endpoint { get; }
|
||||
|
||||
public async Task<CriRuntimeIdentity> GetIdentityAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var response = await client.VersionAsync(new VersionRequest(), cancellationToken: cancellationToken).ConfigureAwait(false);
|
||||
return new CriRuntimeIdentity(
|
||||
RuntimeName: response.RuntimeName ?? Endpoint.Engine.ToEngineString(),
|
||||
RuntimeVersion: response.RuntimeVersion ?? "unknown",
|
||||
RuntimeApiVersion: response.RuntimeApiVersion ?? response.Version ?? "unknown");
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<CriContainerInfo>> ListContainersAsync(ContainerState state, CancellationToken cancellationToken)
|
||||
{
|
||||
var request = new ListContainersRequest
|
||||
{
|
||||
Filter = new ContainerFilter
|
||||
{
|
||||
State = new ContainerStateValue
|
||||
{
|
||||
State = state
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
var response = await client.ListContainersAsync(request, cancellationToken: cancellationToken).ConfigureAwait(false);
|
||||
if (response.Containers is null || response.Containers.Count == 0)
|
||||
{
|
||||
return Array.Empty<CriContainerInfo>();
|
||||
}
|
||||
|
||||
return response.Containers
|
||||
.Select(CriConversions.ToContainerInfo)
|
||||
.ToArray();
|
||||
}
|
||||
catch (RpcException ex) when (ex.StatusCode == StatusCode.Unimplemented)
|
||||
{
|
||||
logger.LogWarning(ex, "Runtime endpoint {Endpoint} does not support ListContainers for state {State}.", Endpoint.Endpoint, state);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<CriContainerInfo?> GetContainerStatusAsync(string containerId, CancellationToken cancellationToken)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(containerId))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var response = await client.ContainerStatusAsync(new ContainerStatusRequest
|
||||
{
|
||||
ContainerId = containerId,
|
||||
Verbose = false
|
||||
}, cancellationToken: cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (response.Status is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var baseline = CriConversions.ToContainerInfo(new Container
|
||||
{
|
||||
Id = response.Status.Id,
|
||||
PodSandboxId = response.Status.Metadata?.Name ?? string.Empty,
|
||||
Metadata = response.Status.Metadata,
|
||||
Image = response.Status.Image,
|
||||
ImageRef = response.Status.ImageRef,
|
||||
Labels = { response.Status.Labels },
|
||||
Annotations = { response.Status.Annotations },
|
||||
CreatedAt = response.Status.CreatedAt
|
||||
});
|
||||
|
||||
return CriConversions.MergeStatus(baseline, response.Status);
|
||||
}
|
||||
catch (RpcException ex) when (ex.StatusCode is StatusCode.NotFound or StatusCode.DeadlineExceeded)
|
||||
{
|
||||
logger.LogDebug(ex, "Container {ContainerId} no longer available when querying status.", containerId);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
try
|
||||
{
|
||||
await channel.DisposeAsync().ConfigureAwait(false);
|
||||
}
|
||||
catch (InvalidOperationException)
|
||||
{
|
||||
// Channel already disposed.
|
||||
}
|
||||
}
|
||||
|
||||
private static void EnsureHttp2Switch()
|
||||
{
|
||||
if (http2SwitchApplied)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
lock (SwitchLock)
|
||||
{
|
||||
if (!http2SwitchApplied)
|
||||
{
|
||||
AppContext.SetSwitch("System.Net.Http.SocketsHttpHandler.Http2UnencryptedSupport", true);
|
||||
http2SwitchApplied = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private GrpcChannel CreateChannel(ContainerRuntimeEndpointOptions endpoint)
|
||||
{
|
||||
if (IsUnixEndpoint(endpoint.Endpoint, out var unixPath))
|
||||
{
|
||||
var resolvedPath = unixPath;
|
||||
var handler = new SocketsHttpHandler
|
||||
{
|
||||
ConnectCallback = (context, cancellationToken) => ConnectUnixDomainSocketAsync(resolvedPath, cancellationToken),
|
||||
EnableMultipleHttp2Connections = true
|
||||
};
|
||||
|
||||
if (endpoint.ConnectTimeout is { } timeout and > TimeSpan.Zero)
|
||||
{
|
||||
handler.ConnectTimeout = timeout;
|
||||
}
|
||||
|
||||
return GrpcChannel.ForAddress("http://unix.local", new GrpcChannelOptions
|
||||
{
|
||||
HttpHandler = handler,
|
||||
DisposeHttpClient = true
|
||||
});
|
||||
}
|
||||
|
||||
return GrpcChannel.ForAddress(endpoint.Endpoint, new GrpcChannelOptions
|
||||
{
|
||||
DisposeHttpClient = true
|
||||
});
|
||||
}
|
||||
|
||||
private static bool IsUnixEndpoint(string endpoint, out string path)
|
||||
{
|
||||
if (endpoint.StartsWith("unix://", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
path = endpoint["unix://".Length..];
|
||||
return true;
|
||||
}
|
||||
|
||||
path = string.Empty;
|
||||
return false;
|
||||
}
|
||||
|
||||
private static async ValueTask<Stream> ConnectUnixDomainSocketAsync(string unixPath, CancellationToken cancellationToken)
|
||||
{
|
||||
var socket = new Socket(AddressFamily.Unix, SocketType.Stream, ProtocolType.Unspecified)
|
||||
{
|
||||
NoDelay = true
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
var endpoint = new UnixDomainSocketEndPoint(unixPath);
|
||||
await socket.ConnectAsync(endpoint, cancellationToken).ConfigureAwait(false);
|
||||
return new NetworkStream(socket, ownsSocket: true);
|
||||
}
|
||||
catch
|
||||
{
|
||||
socket.Dispose();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Zastava.Observer.Configuration;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.ContainerRuntime.Cri;
|
||||
|
||||
internal interface ICriRuntimeClientFactory
|
||||
{
|
||||
ICriRuntimeClient Create(ContainerRuntimeEndpointOptions endpoint);
|
||||
}
|
||||
|
||||
internal sealed class CriRuntimeClientFactory : ICriRuntimeClientFactory
|
||||
{
|
||||
private readonly IServiceProvider serviceProvider;
|
||||
|
||||
public CriRuntimeClientFactory(IServiceProvider serviceProvider)
|
||||
{
|
||||
this.serviceProvider = serviceProvider ?? throw new ArgumentNullException(nameof(serviceProvider));
|
||||
}
|
||||
|
||||
public ICriRuntimeClient Create(ContainerRuntimeEndpointOptions endpoint)
|
||||
{
|
||||
var logger = serviceProvider.GetRequiredService<ILogger<CriRuntimeClient>>();
|
||||
return new CriRuntimeClient(endpoint, logger);
|
||||
}
|
||||
}
|
||||
10
src/StellaOps.Zastava.Observer/Program.cs
Normal file
10
src/StellaOps.Zastava.Observer/Program.cs
Normal file
@@ -0,0 +1,10 @@
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using StellaOps.Zastava.Observer.Worker;
|
||||
|
||||
var builder = Host.CreateApplicationBuilder(args);
|
||||
|
||||
builder.Services.AddZastavaRuntimeCore(builder.Configuration, componentName: "observer");
|
||||
builder.Services.AddHostedService<ObserverBootstrapService>();
|
||||
|
||||
await builder.Build().RunAsync();
|
||||
1855
src/StellaOps.Zastava.Observer/Protos/runtime/v1/runtime.proto
Normal file
1855
src/StellaOps.Zastava.Observer/Protos/runtime/v1/runtime.proto
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,24 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Google.Protobuf" Version="3.27.2" />
|
||||
<PackageReference Include="Grpc.Net.Client" Version="2.65.0" />
|
||||
<PackageReference Include="Grpc.Tools" Version="2.65.0">
|
||||
<PrivateAssets>All</PrivateAssets>
|
||||
</PackageReference>
|
||||
<PackageReference Include="Serilog.Extensions.Hosting" Version="8.0.0" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\StellaOps.Zastava.Core\StellaOps.Zastava.Core.csproj" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Protobuf Include="Protos/runtime/v1/runtime.proto" GrpcServices="Client" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|
||||
|----|--------|----------|------------|-------------|---------------|
|
||||
| ZASTAVA-OBS-12-001 | TODO | Zastava Observer Guild | ZASTAVA-CORE-12-201 | Build container lifecycle watcher that tails CRI (containerd/cri-o/docker) events and emits deterministic runtime records with buffering + backoff. | Fixture cluster produces start/stop events with stable ordering, jitter/backoff tested, metrics/logging wired. |
|
||||
| ZASTAVA-OBS-12-001 | DOING | Zastava Observer Guild | ZASTAVA-CORE-12-201 | Build container lifecycle watcher that tails CRI (containerd/cri-o/docker) events and emits deterministic runtime records with buffering + backoff. | Fixture cluster produces start/stop events with stable ordering, jitter/backoff tested, metrics/logging wired. |
|
||||
| ZASTAVA-OBS-12-002 | TODO | Zastava Observer Guild | ZASTAVA-OBS-12-001 | Capture entrypoint traces and loaded libraries, hashing binaries and correlating to SBOM baseline per architecture sections 2.1 and 10. | EntryTrace parser covers shell/python/node launchers, loaded library hashes recorded, fixtures assert linkage to SBOM usage view. |
|
||||
| ZASTAVA-OBS-12-003 | TODO | Zastava Observer Guild | ZASTAVA-OBS-12-002 | Implement runtime posture checks (signature/SBOM/attestation presence) with offline caching and warning surfaces. | Observer marks posture status, caches refresh across restarts, integration tests prove offline tolerance. |
|
||||
| ZASTAVA-OBS-12-004 | TODO | Zastava Observer Guild | ZASTAVA-OBS-12-002 | Batch `/runtime/events` submissions with disk-backed buffer, rate limits, and deterministic envelopes. | Buffered submissions survive restart, rate-limits enforced in tests, JSON envelopes match schema in docs/events. |
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Zastava.Core.Configuration;
|
||||
using StellaOps.Zastava.Core.Diagnostics;
|
||||
using StellaOps.Zastava.Core.Security;
|
||||
|
||||
namespace StellaOps.Zastava.Observer.Worker;
|
||||
|
||||
/// <summary>
|
||||
/// Minimal bootstrap worker ensuring runtime core wiring is exercised.
|
||||
/// </summary>
|
||||
internal sealed class ObserverBootstrapService : BackgroundService
|
||||
{
|
||||
private readonly IZastavaLogScopeBuilder logScopeBuilder;
|
||||
private readonly IZastavaRuntimeMetrics runtimeMetrics;
|
||||
private readonly IZastavaAuthorityTokenProvider authorityTokenProvider;
|
||||
private readonly IHostApplicationLifetime applicationLifetime;
|
||||
private readonly ILogger<ObserverBootstrapService> logger;
|
||||
private readonly ZastavaRuntimeOptions runtimeOptions;
|
||||
|
||||
public ObserverBootstrapService(
|
||||
IZastavaLogScopeBuilder logScopeBuilder,
|
||||
IZastavaRuntimeMetrics runtimeMetrics,
|
||||
IZastavaAuthorityTokenProvider authorityTokenProvider,
|
||||
IOptions<ZastavaRuntimeOptions> runtimeOptions,
|
||||
IHostApplicationLifetime applicationLifetime,
|
||||
ILogger<ObserverBootstrapService> logger)
|
||||
{
|
||||
this.logScopeBuilder = logScopeBuilder;
|
||||
this.runtimeMetrics = runtimeMetrics;
|
||||
this.authorityTokenProvider = authorityTokenProvider;
|
||||
this.applicationLifetime = applicationLifetime;
|
||||
this.logger = logger;
|
||||
this.runtimeOptions = runtimeOptions.Value;
|
||||
}
|
||||
|
||||
protected override Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
var scope = logScopeBuilder.BuildScope(eventId: "observer.bootstrap");
|
||||
using (logger.BeginScope(scope))
|
||||
{
|
||||
logger.LogInformation("Zastava observer runtime core initialised for tenant {Tenant}, component {Component}.", runtimeOptions.Tenant, runtimeOptions.Component);
|
||||
logger.LogDebug("Observer metrics meter {MeterName} registered with {TagCount} default tags.", runtimeMetrics.Meter.Name, runtimeMetrics.DefaultTags.Count);
|
||||
}
|
||||
|
||||
// Observer implementation will hook into the authority token provider when connectors arrive.
|
||||
applicationLifetime.ApplicationStarted.Register(() => logger.LogInformation("Observer bootstrap complete."));
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user