Refactor code structure for improved readability and maintainability; optimize performance in key functions.
This commit is contained in:
35
src/Scanner/__Libraries/StellaOps.Scanner.Runtime/AGENTS.md
Normal file
35
src/Scanner/__Libraries/StellaOps.Scanner.Runtime/AGENTS.md
Normal file
@@ -0,0 +1,35 @@
|
||||
# AGENTS - Scanner Runtime Library
|
||||
|
||||
## Mission
|
||||
Capture and normalize runtime trace evidence (eBPF/ETW) and merge it with static reachability graphs to produce observed-path evidence.
|
||||
|
||||
## Roles
|
||||
- Backend engineer (.NET 10, C# preview).
|
||||
- QA engineer (deterministic tests; offline fixtures).
|
||||
|
||||
## Required Reading
|
||||
- `docs/README.md`
|
||||
- `docs/07_HIGH_LEVEL_ARCHITECTURE.md`
|
||||
- `docs/modules/platform/architecture-overview.md`
|
||||
- `docs/modules/scanner/architecture.md`
|
||||
- `docs/modules/zastava/architecture.md`
|
||||
- `docs/reachability/runtime-facts.md`
|
||||
- `docs/reachability/runtime-static-union-schema.md`
|
||||
|
||||
## Working Directory & Boundaries
|
||||
- Primary scope: `src/Scanner/__Libraries/StellaOps.Scanner.Runtime/`
|
||||
- Tests: `src/Scanner/__Tests/StellaOps.Scanner.Runtime.Tests/`
|
||||
- Avoid cross-module edits unless explicitly noted in the sprint.
|
||||
|
||||
## Determinism & Offline Rules
|
||||
- Normalize timestamps to UTC; stable ordering of events and edges.
|
||||
- Offline-first; no network access in collectors or ingestion.
|
||||
- Prefer configuration-driven retention policies with deterministic pruning.
|
||||
|
||||
## Testing Expectations
|
||||
- Unit tests for ingestion, merge, and retention logic.
|
||||
- Use deterministic fixtures (fixed timestamps and IDs).
|
||||
|
||||
## Workflow
|
||||
- Update sprint status on task transitions.
|
||||
- Log design/decision changes in sprint Execution Log.
|
||||
@@ -0,0 +1,150 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace StellaOps.Scanner.Runtime.Ebpf;
|
||||
|
||||
/// <summary>
|
||||
/// eBPF-based trace collector for Linux using uprobe tracing.
|
||||
/// </summary>
|
||||
public sealed class EbpfTraceCollector : ITraceCollector
|
||||
{
|
||||
private readonly ILogger<EbpfTraceCollector> _logger;
|
||||
private readonly ISymbolResolver _symbolResolver;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private bool _isRunning;
|
||||
private TraceCollectorStats _stats = new TraceCollectorStats
|
||||
{
|
||||
EventsCollected = 0,
|
||||
EventsDropped = 0,
|
||||
BytesProcessed = 0,
|
||||
StartedAt = DateTimeOffset.UtcNow
|
||||
};
|
||||
|
||||
public EbpfTraceCollector(
|
||||
ILogger<EbpfTraceCollector> logger,
|
||||
ISymbolResolver symbolResolver,
|
||||
TimeProvider? timeProvider = null)
|
||||
{
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_symbolResolver = symbolResolver ?? throw new ArgumentNullException(nameof(symbolResolver));
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
}
|
||||
|
||||
public Task StartAsync(TraceCollectorConfig config, CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(config);
|
||||
|
||||
if (!RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
|
||||
{
|
||||
throw new PlatformNotSupportedException("eBPF tracing is only supported on Linux");
|
||||
}
|
||||
|
||||
if (_isRunning)
|
||||
{
|
||||
throw new InvalidOperationException("Collector is already running");
|
||||
}
|
||||
|
||||
_logger.LogInformation(
|
||||
"Starting eBPF trace collector for PID {Pid}, container {Container}",
|
||||
config.TargetPid,
|
||||
config.TargetContainerId ?? "all");
|
||||
|
||||
// TODO: Actual eBPF program loading and uprobe attachment
|
||||
// This would use libbpf or bpf2go to:
|
||||
// 1. Load BPF program into kernel
|
||||
// 2. Attach uprobes to target functions
|
||||
// 3. Set up ringbuffer for event streaming
|
||||
// 4. Handle ASLR via /proc/pid/maps
|
||||
|
||||
_isRunning = true;
|
||||
_stats = _stats with { StartedAt = _timeProvider.GetUtcNow() };
|
||||
|
||||
_logger.LogInformation("eBPF trace collector started successfully");
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public Task StopAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (!_isRunning)
|
||||
{
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
_logger.LogInformation("Stopping eBPF trace collector");
|
||||
|
||||
// TODO: Detach uprobes and cleanup BPF resources
|
||||
|
||||
_isRunning = false;
|
||||
_stats = _stats with { Duration = _timeProvider.GetUtcNow() - _stats.StartedAt };
|
||||
|
||||
_logger.LogInformation(
|
||||
"eBPF trace collector stopped. Events: {Events}, Dropped: {Dropped}",
|
||||
_stats.EventsCollected,
|
||||
_stats.EventsDropped);
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public async IAsyncEnumerable<RuntimeCallEvent> GetEventsAsync(
|
||||
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (!_isRunning)
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
// TODO: Read events from eBPF ringbuffer
|
||||
// This is a placeholder - actual implementation would:
|
||||
// 1. Poll ringbuffer for events
|
||||
// 2. Resolve symbols using /proc/kallsyms and binary debug info
|
||||
// 3. Handle container namespace awareness
|
||||
// 4. Apply rate limiting
|
||||
|
||||
await Task.Delay(100, cancellationToken).ConfigureAwait(false);
|
||||
yield break;
|
||||
}
|
||||
|
||||
public TraceCollectorStats GetStatistics() => _stats;
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
await StopAsync().ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Symbol resolver for eBPF events.
|
||||
/// </summary>
|
||||
public interface ISymbolResolver
|
||||
{
|
||||
Task<string> ResolveSymbolAsync(uint pid, ulong address, CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Symbol resolver implementation using /proc and binary debug info.
|
||||
/// </summary>
|
||||
public sealed class LinuxSymbolResolver : ISymbolResolver
|
||||
{
|
||||
private readonly ILogger<LinuxSymbolResolver> _logger;
|
||||
|
||||
public LinuxSymbolResolver(ILogger<LinuxSymbolResolver> logger)
|
||||
{
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<string> ResolveSymbolAsync(
|
||||
uint pid,
|
||||
ulong address,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
// TODO: Actual symbol resolution:
|
||||
// 1. Read /proc/{pid}/maps to find binary containing address
|
||||
// 2. Adjust for ASLR offset
|
||||
// 3. Use libdwarf or addr2line to resolve symbol
|
||||
// 4. Cache results for performance
|
||||
|
||||
await Task.Delay(1, cancellationToken).ConfigureAwait(false);
|
||||
return $"func_0x{address:x}";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,112 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace StellaOps.Scanner.Runtime.Etw;
|
||||
|
||||
/// <summary>
|
||||
/// ETW-based trace collector for Windows.
|
||||
/// </summary>
|
||||
public sealed class EtwTraceCollector : ITraceCollector
|
||||
{
|
||||
private readonly ILogger<EtwTraceCollector> _logger;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private bool _isRunning;
|
||||
private TraceCollectorStats _stats = new TraceCollectorStats
|
||||
{
|
||||
EventsCollected = 0,
|
||||
EventsDropped = 0,
|
||||
BytesProcessed = 0,
|
||||
StartedAt = DateTimeOffset.UtcNow
|
||||
};
|
||||
|
||||
public EtwTraceCollector(
|
||||
ILogger<EtwTraceCollector> logger,
|
||||
TimeProvider? timeProvider = null)
|
||||
{
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
}
|
||||
|
||||
public Task StartAsync(TraceCollectorConfig config, CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(config);
|
||||
|
||||
if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
|
||||
{
|
||||
throw new PlatformNotSupportedException("ETW tracing is only supported on Windows");
|
||||
}
|
||||
|
||||
if (_isRunning)
|
||||
{
|
||||
throw new InvalidOperationException("Collector is already running");
|
||||
}
|
||||
|
||||
_logger.LogInformation(
|
||||
"Starting ETW trace collector for PID {Pid}",
|
||||
config.TargetPid);
|
||||
|
||||
// TODO: Actual ETW session setup
|
||||
// This would use TraceEvent or Microsoft.Diagnostics.Tracing.TraceEvent to:
|
||||
// 1. Create ETW session
|
||||
// 2. Subscribe to Microsoft-Windows-DotNETRuntime provider
|
||||
// 3. Subscribe to native call events
|
||||
// 4. Enable stack walking
|
||||
// 5. Filter by process ID
|
||||
|
||||
_isRunning = true;
|
||||
_stats = _stats with { StartedAt = _timeProvider.GetUtcNow() };
|
||||
|
||||
_logger.LogInformation("ETW trace collector started successfully");
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public Task StopAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (!_isRunning)
|
||||
{
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
_logger.LogInformation("Stopping ETW trace collector");
|
||||
|
||||
// TODO: Stop ETW session and cleanup
|
||||
|
||||
_isRunning = false;
|
||||
_stats = _stats with { Duration = _timeProvider.GetUtcNow() - _stats.StartedAt };
|
||||
|
||||
_logger.LogInformation(
|
||||
"ETW trace collector stopped. Events: {Events}, Dropped: {Dropped}",
|
||||
_stats.EventsCollected,
|
||||
_stats.EventsDropped);
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public async IAsyncEnumerable<RuntimeCallEvent> GetEventsAsync(
|
||||
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (!_isRunning)
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
// TODO: Process ETW events
|
||||
// This is a placeholder - actual implementation would:
|
||||
// 1. Subscribe to ETW event stream
|
||||
// 2. Process CLR and native method events
|
||||
// 3. Resolve symbols using DbgHelp
|
||||
// 4. Correlate stack traces
|
||||
// 5. Apply rate limiting
|
||||
|
||||
await Task.Delay(100, cancellationToken).ConfigureAwait(false);
|
||||
yield break;
|
||||
}
|
||||
|
||||
public TraceCollectorStats GetStatistics() => _stats;
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
await StopAsync().ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,136 @@
|
||||
namespace StellaOps.Scanner.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Runtime call event captured by trace collector.
|
||||
/// </summary>
|
||||
public sealed record RuntimeCallEvent
|
||||
{
|
||||
/// <summary>
|
||||
/// Nanoseconds since boot (Linux) or UTC timestamp (Windows).
|
||||
/// </summary>
|
||||
public required ulong Timestamp { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Process ID.
|
||||
/// </summary>
|
||||
public required uint Pid { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Thread ID.
|
||||
/// </summary>
|
||||
public required uint Tid { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Caller function address.
|
||||
/// </summary>
|
||||
public required ulong CallerAddress { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Callee function address.
|
||||
/// </summary>
|
||||
public required ulong CalleeAddress { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Resolved caller symbol name.
|
||||
/// </summary>
|
||||
public required string CallerSymbol { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Resolved callee symbol name.
|
||||
/// </summary>
|
||||
public required string CalleeSymbol { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Binary path containing the symbols.
|
||||
/// </summary>
|
||||
public required string BinaryPath { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Container ID if running in container.
|
||||
/// </summary>
|
||||
public string? ContainerId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Stack trace if available.
|
||||
/// </summary>
|
||||
public IReadOnlyList<ulong>? StackTrace { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Configuration for trace collector.
|
||||
/// </summary>
|
||||
public sealed record TraceCollectorConfig
|
||||
{
|
||||
/// <summary>
|
||||
/// Target process ID to trace (0 = all processes).
|
||||
/// </summary>
|
||||
public uint TargetPid { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Target container ID to trace.
|
||||
/// </summary>
|
||||
public string? TargetContainerId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Symbol patterns to trace (glob patterns).
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? SymbolPatterns { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Binary paths to trace.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string>? BinaryPaths { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Maximum events per second (rate limiting).
|
||||
/// </summary>
|
||||
public int MaxEventsPerSecond { get; init; } = 10_000;
|
||||
|
||||
/// <summary>
|
||||
/// Event buffer size.
|
||||
/// </summary>
|
||||
public int BufferSize { get; init; } = 8192;
|
||||
|
||||
/// <summary>
|
||||
/// Enable stack trace capture.
|
||||
/// </summary>
|
||||
public bool CaptureStackTraces { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Platform-agnostic trace collector interface.
|
||||
/// </summary>
|
||||
public interface ITraceCollector : IAsyncDisposable
|
||||
{
|
||||
/// <summary>
|
||||
/// Start collecting runtime traces.
|
||||
/// </summary>
|
||||
Task StartAsync(TraceCollectorConfig config, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Stop collecting traces.
|
||||
/// </summary>
|
||||
Task StopAsync(CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Get stream of runtime call events.
|
||||
/// </summary>
|
||||
IAsyncEnumerable<RuntimeCallEvent> GetEventsAsync(CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Get collector statistics.
|
||||
/// </summary>
|
||||
TraceCollectorStats GetStatistics();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Trace collector statistics.
|
||||
/// </summary>
|
||||
public sealed record TraceCollectorStats
|
||||
{
|
||||
public required long EventsCollected { get; init; }
|
||||
public required long EventsDropped { get; init; }
|
||||
public required long BytesProcessed { get; init; }
|
||||
public required DateTimeOffset StartedAt { get; init; }
|
||||
public TimeSpan? Duration { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
namespace StellaOps.Scanner.Runtime.Ingestion;
|
||||
|
||||
/// <summary>
|
||||
/// Normalized runtime trace for storage.
|
||||
/// </summary>
|
||||
public sealed record NormalizedTrace
|
||||
{
|
||||
public required string TraceId { get; init; }
|
||||
public required string ScanId { get; init; }
|
||||
public required DateTimeOffset CollectedAt { get; init; }
|
||||
public required IReadOnlyList<RuntimeCallEdge> Edges { get; init; }
|
||||
public required TraceMetadata Metadata { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Runtime call edge.
|
||||
/// </summary>
|
||||
public sealed record RuntimeCallEdge
|
||||
{
|
||||
public required string From { get; init; }
|
||||
public required string To { get; init; }
|
||||
public required ulong ObservationCount { get; init; }
|
||||
public required DateTimeOffset FirstObserved { get; init; }
|
||||
public required DateTimeOffset LastObserved { get; init; }
|
||||
public IReadOnlyList<ulong>? StackTraces { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Trace metadata.
|
||||
/// </summary>
|
||||
public sealed record TraceMetadata
|
||||
{
|
||||
public required uint ProcessId { get; init; }
|
||||
public required string BinaryPath { get; init; }
|
||||
public required TimeSpan Duration { get; init; }
|
||||
public required long EventCount { get; init; }
|
||||
public string? ContainerId { get; init; }
|
||||
public string? CollectorVersion { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Service for ingesting and storing runtime traces.
|
||||
/// </summary>
|
||||
public interface ITraceIngestionService
|
||||
{
|
||||
/// <summary>
|
||||
/// Ingest runtime call events and normalize for storage.
|
||||
/// </summary>
|
||||
Task<NormalizedTrace> IngestAsync(
|
||||
IAsyncEnumerable<RuntimeCallEvent> events,
|
||||
string scanId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Store normalized trace.
|
||||
/// </summary>
|
||||
Task<string> StoreAsync(
|
||||
NormalizedTrace trace,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Retrieve trace by ID.
|
||||
/// </summary>
|
||||
Task<NormalizedTrace?> GetTraceAsync(
|
||||
string traceId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Get all traces for a scan.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<NormalizedTrace>> GetTracesForScanAsync(
|
||||
string scanId,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,187 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Scanner.Cache.Abstractions;
|
||||
using System.Security.Cryptography;
|
||||
|
||||
namespace StellaOps.Scanner.Runtime.Ingestion;
|
||||
|
||||
/// <summary>
|
||||
/// Service for ingesting runtime traces.
|
||||
/// </summary>
|
||||
public sealed class TraceIngestionService : ITraceIngestionService
|
||||
{
|
||||
private readonly IFileContentAddressableStore _cas;
|
||||
private readonly ILogger<TraceIngestionService> _logger;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
|
||||
public TraceIngestionService(
|
||||
IFileContentAddressableStore cas,
|
||||
ILogger<TraceIngestionService> logger,
|
||||
TimeProvider? timeProvider = null)
|
||||
{
|
||||
_cas = cas ?? throw new ArgumentNullException(nameof(cas));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
}
|
||||
|
||||
public async Task<NormalizedTrace> IngestAsync(
|
||||
IAsyncEnumerable<RuntimeCallEvent> events,
|
||||
string scanId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(events);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(scanId);
|
||||
|
||||
var edgeMap = new Dictionary<(string, string), RuntimeCallEdgeBuilder>();
|
||||
var eventCount = 0L;
|
||||
var firstEvent = (DateTimeOffset?)null;
|
||||
var lastEvent = (DateTimeOffset?)null;
|
||||
uint? pid = null;
|
||||
string? binaryPath = null;
|
||||
|
||||
await foreach (var evt in events.WithCancellation(cancellationToken))
|
||||
{
|
||||
eventCount++;
|
||||
|
||||
var timestamp = DateTimeOffset.FromUnixTimeMilliseconds((long)(evt.Timestamp / 1_000_000));
|
||||
firstEvent ??= timestamp;
|
||||
lastEvent = timestamp;
|
||||
pid ??= evt.Pid;
|
||||
binaryPath ??= evt.BinaryPath;
|
||||
|
||||
var key = (evt.CallerSymbol, evt.CalleeSymbol);
|
||||
|
||||
if (!edgeMap.TryGetValue(key, out var builder))
|
||||
{
|
||||
builder = new RuntimeCallEdgeBuilder
|
||||
{
|
||||
From = evt.CallerSymbol,
|
||||
To = evt.CalleeSymbol,
|
||||
FirstObserved = timestamp,
|
||||
LastObserved = timestamp,
|
||||
ObservationCount = 1
|
||||
};
|
||||
edgeMap[key] = builder;
|
||||
}
|
||||
else
|
||||
{
|
||||
builder.LastObserved = timestamp;
|
||||
builder.ObservationCount++;
|
||||
}
|
||||
}
|
||||
|
||||
var edges = edgeMap.Values
|
||||
.Select(b => new RuntimeCallEdge
|
||||
{
|
||||
From = b.From,
|
||||
To = b.To,
|
||||
ObservationCount = b.ObservationCount,
|
||||
FirstObserved = b.FirstObserved,
|
||||
LastObserved = b.LastObserved
|
||||
})
|
||||
.OrderBy(e => e.From)
|
||||
.ThenBy(e => e.To)
|
||||
.ToList();
|
||||
|
||||
var duration = (lastEvent ?? _timeProvider.GetUtcNow()) - (firstEvent ?? _timeProvider.GetUtcNow());
|
||||
|
||||
var trace = new NormalizedTrace
|
||||
{
|
||||
TraceId = GenerateTraceId(scanId, eventCount),
|
||||
ScanId = scanId,
|
||||
CollectedAt = _timeProvider.GetUtcNow(),
|
||||
Edges = edges,
|
||||
Metadata = new TraceMetadata
|
||||
{
|
||||
ProcessId = pid ?? 0,
|
||||
BinaryPath = binaryPath ?? "unknown",
|
||||
Duration = duration,
|
||||
EventCount = eventCount
|
||||
}
|
||||
};
|
||||
|
||||
_logger.LogInformation(
|
||||
"Ingested trace {TraceId} for scan {ScanId}: {EdgeCount} edges from {EventCount} events",
|
||||
trace.TraceId,
|
||||
scanId,
|
||||
edges.Count,
|
||||
eventCount);
|
||||
|
||||
return trace;
|
||||
}
|
||||
|
||||
public async Task<string> StoreAsync(
|
||||
NormalizedTrace trace,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(trace);
|
||||
|
||||
var json = System.Text.Json.JsonSerializer.Serialize(trace);
|
||||
var bytes = System.Text.Encoding.UTF8.GetBytes(json);
|
||||
|
||||
await using var stream = new MemoryStream(bytes, writable: false);
|
||||
var casKey = $"trace_{trace.TraceId}";
|
||||
|
||||
await _cas.PutAsync(new FileCasPutRequest(casKey, stream, leaveOpen: false), cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
_logger.LogInformation("Stored trace {TraceId} in CAS with key {CasKey}", trace.TraceId, casKey);
|
||||
|
||||
return trace.TraceId;
|
||||
}
|
||||
|
||||
public async Task<NormalizedTrace?> GetTraceAsync(
|
||||
string traceId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(traceId);
|
||||
|
||||
var casKey = $"trace_{traceId}";
|
||||
|
||||
try
|
||||
{
|
||||
var bytes = await _cas.GetAsync(new FileCasGetRequest(casKey), cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (bytes is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var trace = System.Text.Json.JsonSerializer.Deserialize<NormalizedTrace>(bytes);
|
||||
return trace;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Error retrieving trace {TraceId}", traceId);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<NormalizedTrace>> GetTracesForScanAsync(
|
||||
string scanId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(scanId);
|
||||
|
||||
// TODO: Implement scan-to-trace index
|
||||
// For now, return empty list
|
||||
await Task.Delay(1, cancellationToken).ConfigureAwait(false);
|
||||
return Array.Empty<NormalizedTrace>();
|
||||
}
|
||||
|
||||
private static string GenerateTraceId(string scanId, long eventCount)
|
||||
{
|
||||
var input = $"{scanId}|{eventCount}|{DateTimeOffset.UtcNow.Ticks}";
|
||||
var hash = SHA256.HashData(System.Text.Encoding.UTF8.GetBytes(input));
|
||||
return $"trace_{Convert.ToHexString(hash)[..16].ToLowerInvariant()}";
|
||||
}
|
||||
|
||||
private sealed class RuntimeCallEdgeBuilder
|
||||
{
|
||||
public required string From { get; init; }
|
||||
public required string To { get; init; }
|
||||
public required DateTimeOffset FirstObserved { get; set; }
|
||||
public required DateTimeOffset LastObserved { get; set; }
|
||||
public required ulong ObservationCount { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
using StellaOps.Scanner.Reachability;
|
||||
using StellaOps.Scanner.Runtime.Ingestion;
|
||||
|
||||
namespace StellaOps.Scanner.Runtime.Merge;
|
||||
|
||||
/// <summary>
|
||||
/// Merged graph combining static analysis and runtime observations.
|
||||
/// </summary>
|
||||
public sealed record MergedGraph
|
||||
{
|
||||
public required RichGraph StaticGraph { get; init; }
|
||||
public required NormalizedTrace RuntimeTrace { get; init; }
|
||||
public required RichGraph UnionGraph { get; init; }
|
||||
public required MergeStatistics Statistics { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Statistics from static+runtime merge.
|
||||
/// </summary>
|
||||
public sealed record MergeStatistics
|
||||
{
|
||||
public required int StaticEdges { get; init; }
|
||||
public required int RuntimeEdges { get; init; }
|
||||
public required int ConfirmedEdges { get; init; }
|
||||
public required int NewEdges { get; init; }
|
||||
public required int UnobservedEdges { get; init; }
|
||||
public required double CoveragePercent { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Edge enrichment from runtime observations.
|
||||
/// </summary>
|
||||
public sealed record EdgeEnrichment
|
||||
{
|
||||
public required bool Observed { get; init; }
|
||||
public required DateTimeOffset? FirstObserved { get; init; }
|
||||
public required DateTimeOffset? LastObserved { get; init; }
|
||||
public required ulong ObservationCount { get; init; }
|
||||
public required double ConfidenceBoost { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Merges static analysis graphs with runtime trace data.
|
||||
/// </summary>
|
||||
public interface IStaticRuntimeMerger
|
||||
{
|
||||
/// <summary>
|
||||
/// Merge static graph with runtime trace.
|
||||
/// </summary>
|
||||
Task<MergedGraph> MergeAsync(
|
||||
RichGraph staticGraph,
|
||||
NormalizedTrace runtimeTrace,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Enrich static edges with runtime observations.
|
||||
/// </summary>
|
||||
Task<IReadOnlyDictionary<string, EdgeEnrichment>> EnrichEdgesAsync(
|
||||
RichGraph staticGraph,
|
||||
NormalizedTrace runtimeTrace,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,186 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Scanner.Reachability;
|
||||
using StellaOps.Scanner.Runtime.Ingestion;
|
||||
|
||||
namespace StellaOps.Scanner.Runtime.Merge;
|
||||
|
||||
/// <summary>
|
||||
/// Merges static analysis with runtime observations.
|
||||
/// </summary>
|
||||
public sealed class StaticRuntimeMerger : IStaticRuntimeMerger
|
||||
{
|
||||
private readonly ILogger<StaticRuntimeMerger> _logger;
|
||||
private const double RuntimeObservationConfidenceBoost = 0.3;
|
||||
|
||||
public StaticRuntimeMerger(ILogger<StaticRuntimeMerger> logger)
|
||||
{
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task<MergedGraph> MergeAsync(
|
||||
RichGraph staticGraph,
|
||||
NormalizedTrace runtimeTrace,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(staticGraph);
|
||||
ArgumentNullException.ThrowIfNull(runtimeTrace);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Merging static graph ({StaticEdges} edges) with runtime trace ({RuntimeEdges} edges)",
|
||||
staticGraph.Edges.Count,
|
||||
runtimeTrace.Edges.Count);
|
||||
|
||||
var enrichment = await EnrichEdgesAsync(staticGraph, runtimeTrace, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
var unionEdges = BuildUnionEdges(staticGraph, runtimeTrace, enrichment);
|
||||
var unionGraph = staticGraph with { Edges = unionEdges };
|
||||
|
||||
var stats = ComputeStatistics(staticGraph, runtimeTrace, enrichment);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Merge complete: {Confirmed} confirmed, {New} new, {Unobserved} unobserved, {Coverage:F1}% coverage",
|
||||
stats.ConfirmedEdges,
|
||||
stats.NewEdges,
|
||||
stats.UnobservedEdges,
|
||||
stats.CoveragePercent);
|
||||
|
||||
return new MergedGraph
|
||||
{
|
||||
StaticGraph = staticGraph,
|
||||
RuntimeTrace = runtimeTrace,
|
||||
UnionGraph = unionGraph,
|
||||
Statistics = stats
|
||||
};
|
||||
}
|
||||
|
||||
public Task<IReadOnlyDictionary<string, EdgeEnrichment>> EnrichEdgesAsync(
|
||||
RichGraph staticGraph,
|
||||
NormalizedTrace runtimeTrace,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(staticGraph);
|
||||
ArgumentNullException.ThrowIfNull(runtimeTrace);
|
||||
|
||||
var runtimeEdgeMap = runtimeTrace.Edges
|
||||
.ToDictionary(e => EdgeKey(e.From, e.To), e => e);
|
||||
|
||||
var enrichment = new Dictionary<string, EdgeEnrichment>();
|
||||
|
||||
foreach (var staticEdge in staticGraph.Edges)
|
||||
{
|
||||
var key = EdgeKey(staticEdge.From, staticEdge.To);
|
||||
|
||||
if (runtimeEdgeMap.TryGetValue(key, out var runtimeEdge))
|
||||
{
|
||||
// Edge confirmed by runtime observation
|
||||
enrichment[key] = new EdgeEnrichment
|
||||
{
|
||||
Observed = true,
|
||||
FirstObserved = runtimeEdge.FirstObserved,
|
||||
LastObserved = runtimeEdge.LastObserved,
|
||||
ObservationCount = runtimeEdge.ObservationCount,
|
||||
ConfidenceBoost = RuntimeObservationConfidenceBoost
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
// Edge not observed at runtime
|
||||
enrichment[key] = new EdgeEnrichment
|
||||
{
|
||||
Observed = false,
|
||||
FirstObserved = null,
|
||||
LastObserved = null,
|
||||
ObservationCount = 0,
|
||||
ConfidenceBoost = 0.0
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return Task.FromResult<IReadOnlyDictionary<string, EdgeEnrichment>>(enrichment);
|
||||
}
|
||||
|
||||
private IReadOnlyList<RichGraphEdge> BuildUnionEdges(
|
||||
RichGraph staticGraph,
|
||||
NormalizedTrace runtimeTrace,
|
||||
IReadOnlyDictionary<string, EdgeEnrichment> enrichment)
|
||||
{
|
||||
var unionEdges = new List<RichGraphEdge>();
|
||||
var staticEdgeKeys = new HashSet<string>();
|
||||
|
||||
// Add enriched static edges
|
||||
foreach (var staticEdge in staticGraph.Edges)
|
||||
{
|
||||
var key = EdgeKey(staticEdge.From, staticEdge.To);
|
||||
staticEdgeKeys.Add(key);
|
||||
|
||||
if (enrichment.TryGetValue(key, out var enrich) && enrich.Observed)
|
||||
{
|
||||
var boostedConfidence = Math.Min(1.0, staticEdge.Confidence + enrich.ConfidenceBoost);
|
||||
unionEdges.Add(staticEdge with { Confidence = boostedConfidence });
|
||||
}
|
||||
else
|
||||
{
|
||||
unionEdges.Add(staticEdge);
|
||||
}
|
||||
}
|
||||
|
||||
// Add runtime-only edges (new discoveries)
|
||||
foreach (var runtimeEdge in runtimeTrace.Edges)
|
||||
{
|
||||
var key = EdgeKey(runtimeEdge.From, runtimeEdge.To);
|
||||
|
||||
if (!staticEdgeKeys.Contains(key))
|
||||
{
|
||||
// New edge discovered at runtime
|
||||
unionEdges.Add(new RichGraphEdge(
|
||||
From: runtimeEdge.From,
|
||||
To: runtimeEdge.To,
|
||||
Kind: "runtime_observed",
|
||||
Purl: null,
|
||||
SymbolDigest: null,
|
||||
Evidence: new[] { "runtime_observation" },
|
||||
Confidence: 0.95,
|
||||
Candidates: null,
|
||||
Gates: null,
|
||||
GateMultiplierBps: 10000));
|
||||
}
|
||||
}
|
||||
|
||||
return unionEdges.OrderBy(e => e.From).ThenBy(e => e.To).ToList();
|
||||
}
|
||||
|
||||
private static MergeStatistics ComputeStatistics(
|
||||
RichGraph staticGraph,
|
||||
NormalizedTrace runtimeTrace,
|
||||
IReadOnlyDictionary<string, EdgeEnrichment> enrichment)
|
||||
{
|
||||
var staticEdges = staticGraph.Edges.Count;
|
||||
var runtimeEdges = runtimeTrace.Edges.Count;
|
||||
var confirmedEdges = enrichment.Count(e => e.Value.Observed);
|
||||
var unobservedEdges = staticEdges - confirmedEdges;
|
||||
|
||||
var runtimeEdgeKeys = runtimeTrace.Edges
|
||||
.Select(e => EdgeKey(e.From, e.To))
|
||||
.ToHashSet();
|
||||
|
||||
var staticEdgeKeys = staticGraph.Edges
|
||||
.Select(e => EdgeKey(e.From, e.To))
|
||||
.ToHashSet();
|
||||
|
||||
var newEdges = runtimeEdgeKeys.Except(staticEdgeKeys).Count();
|
||||
var coverage = staticEdges > 0 ? (double)confirmedEdges / staticEdges * 100.0 : 0.0;
|
||||
|
||||
return new MergeStatistics
|
||||
{
|
||||
StaticEdges = staticEdges,
|
||||
RuntimeEdges = runtimeEdges,
|
||||
ConfirmedEdges = confirmedEdges,
|
||||
NewEdges = newEdges,
|
||||
UnobservedEdges = unobservedEdges,
|
||||
CoveragePercent = coverage
|
||||
};
|
||||
}
|
||||
|
||||
private static string EdgeKey(string from, string to) => $"{from}→{to}";
|
||||
}
|
||||
@@ -0,0 +1,419 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace StellaOps.Scanner.Runtime.Retention;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration for trace retention policies.
|
||||
/// </summary>
|
||||
public sealed record TraceRetentionOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Default retention period for trace data. Default: 30 days.
|
||||
/// </summary>
|
||||
public TimeSpan DefaultRetentionPeriod { get; init; } = TimeSpan.FromDays(30);
|
||||
|
||||
/// <summary>
|
||||
/// Extended retention period for traces referenced by active slices. Default: 90 days.
|
||||
/// </summary>
|
||||
public TimeSpan ActiveSliceRetentionPeriod { get; init; } = TimeSpan.FromDays(90);
|
||||
|
||||
/// <summary>
|
||||
/// Maximum storage quota in bytes. Default: 10 GB.
|
||||
/// </summary>
|
||||
public long MaxStorageQuotaBytes { get; init; } = 10L * 1024 * 1024 * 1024;
|
||||
|
||||
/// <summary>
|
||||
/// Whether to aggregate old traces into summaries before deletion. Default: true.
|
||||
/// </summary>
|
||||
public bool EnableAggregation { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Age threshold for trace aggregation. Default: 7 days.
|
||||
/// </summary>
|
||||
public TimeSpan AggregationThreshold { get; init; } = TimeSpan.FromDays(7);
|
||||
|
||||
/// <summary>
|
||||
/// Batch size for pruning operations. Default: 1000.
|
||||
/// </summary>
|
||||
public int PruningBatchSize { get; init; } = 1000;
|
||||
|
||||
/// <summary>
|
||||
/// Interval between automatic pruning runs. Default: 1 hour.
|
||||
/// </summary>
|
||||
public TimeSpan PruningInterval { get; init; } = TimeSpan.FromHours(1);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of a pruning operation.
|
||||
/// </summary>
|
||||
public sealed record PruningResult
|
||||
{
|
||||
public required DateTimeOffset CompletedAt { get; init; }
|
||||
public required int TracesDeleted { get; init; }
|
||||
public required int TracesAggregated { get; init; }
|
||||
public required long BytesFreed { get; init; }
|
||||
public required int TracesRetained { get; init; }
|
||||
public required TimeSpan Duration { get; init; }
|
||||
public string? Error { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Aggregated trace summary for old traces.
|
||||
/// </summary>
|
||||
public sealed record TraceSummary
|
||||
{
|
||||
public required string ScanId { get; init; }
|
||||
public required DateTimeOffset PeriodStart { get; init; }
|
||||
public required DateTimeOffset PeriodEnd { get; init; }
|
||||
public required int TotalEvents { get; init; }
|
||||
public required int UniqueEdges { get; init; }
|
||||
public required Dictionary<string, int> EdgeCounts { get; init; }
|
||||
public required DateTimeOffset AggregatedAt { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Interface for trace storage operations needed by retention manager.
|
||||
/// </summary>
|
||||
public interface ITraceStorageProvider
|
||||
{
|
||||
Task<IReadOnlyList<TraceMetadata>> GetTracesOlderThanAsync(
|
||||
DateTimeOffset threshold,
|
||||
int limit,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
Task<IReadOnlyList<TraceMetadata>> GetTracesReferencedBySlicesAsync(
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
Task<long> GetTotalStorageUsedAsync(CancellationToken cancellationToken = default);
|
||||
|
||||
Task DeleteTracesAsync(
|
||||
IEnumerable<string> traceIds,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
Task StoreSummaryAsync(
|
||||
TraceSummary summary,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
Task<IReadOnlyList<RuntimeCallEvent>> GetTraceEventsAsync(
|
||||
string traceId,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Metadata for a stored trace.
|
||||
/// </summary>
|
||||
public sealed record TraceMetadata
|
||||
{
|
||||
public required string TraceId { get; init; }
|
||||
public required string ScanId { get; init; }
|
||||
public required DateTimeOffset CreatedAt { get; init; }
|
||||
public required long SizeBytes { get; init; }
|
||||
public required int EventCount { get; init; }
|
||||
public bool IsReferencedBySlice { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Runtime call event (shared with RuntimeStaticMerger).
|
||||
/// </summary>
|
||||
public sealed record RuntimeCallEvent
|
||||
{
|
||||
public required ulong Timestamp { get; init; }
|
||||
public required uint Pid { get; init; }
|
||||
public required uint Tid { get; init; }
|
||||
public required string CallerSymbol { get; init; }
|
||||
public required string CalleeSymbol { get; init; }
|
||||
public required string BinaryPath { get; init; }
|
||||
public string? TraceDigest { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Manages trace retention and pruning policies.
|
||||
/// </summary>
|
||||
public sealed class TraceRetentionManager
|
||||
{
|
||||
private readonly ITraceStorageProvider _storage;
|
||||
private readonly TraceRetentionOptions _options;
|
||||
private readonly ILogger<TraceRetentionManager> _logger;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
|
||||
public TraceRetentionManager(
|
||||
ITraceStorageProvider storage,
|
||||
TraceRetentionOptions? options = null,
|
||||
ILogger<TraceRetentionManager>? logger = null,
|
||||
TimeProvider? timeProvider = null)
|
||||
{
|
||||
_storage = storage ?? throw new ArgumentNullException(nameof(storage));
|
||||
_options = options ?? new TraceRetentionOptions();
|
||||
_logger = logger ?? Microsoft.Extensions.Logging.Abstractions.NullLogger<TraceRetentionManager>.Instance;
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Run a pruning cycle to enforce retention policies.
|
||||
/// </summary>
|
||||
public async Task<PruningResult> PruneAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
var startTime = _timeProvider.GetUtcNow();
|
||||
var tracesDeleted = 0;
|
||||
var tracesAggregated = 0;
|
||||
long bytesFreed = 0;
|
||||
string? error = null;
|
||||
|
||||
try
|
||||
{
|
||||
_logger.LogInformation("Starting trace pruning cycle");
|
||||
|
||||
// Get traces referenced by active slices (protected from deletion)
|
||||
var protectedTraces = await _storage.GetTracesReferencedBySlicesAsync(cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
var protectedIds = protectedTraces.Select(t => t.TraceId).ToHashSet(StringComparer.Ordinal);
|
||||
|
||||
_logger.LogDebug("{Count} traces protected by slice references", protectedIds.Count);
|
||||
|
||||
// Check quota - if exceeded, delete oldest first regardless of age
|
||||
var currentUsage = await _storage.GetTotalStorageUsedAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (currentUsage > _options.MaxStorageQuotaBytes)
|
||||
{
|
||||
var quotaResult = await EnforceQuotaAsync(protectedIds, currentUsage, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
tracesDeleted += quotaResult.Deleted;
|
||||
bytesFreed += quotaResult.BytesFreed;
|
||||
}
|
||||
|
||||
// Delete traces older than retention period
|
||||
var retentionThreshold = startTime - _options.DefaultRetentionPeriod;
|
||||
var oldTraces = await _storage.GetTracesOlderThanAsync(
|
||||
retentionThreshold,
|
||||
_options.PruningBatchSize,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var tracesToDelete = oldTraces
|
||||
.Where(t => !protectedIds.Contains(t.TraceId))
|
||||
.ToList();
|
||||
|
||||
// Aggregate before deletion if enabled
|
||||
if (_options.EnableAggregation && tracesToDelete.Count > 0)
|
||||
{
|
||||
tracesAggregated = await AggregateTracesAsync(tracesToDelete, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// Delete old traces
|
||||
if (tracesToDelete.Count > 0)
|
||||
{
|
||||
await _storage.DeleteTracesAsync(
|
||||
tracesToDelete.Select(t => t.TraceId),
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
bytesFreed += tracesToDelete.Sum(t => t.SizeBytes);
|
||||
tracesDeleted += tracesToDelete.Count;
|
||||
|
||||
_logger.LogInformation(
|
||||
"Deleted {Count} traces older than {Threshold:O}, freed {Bytes:N0} bytes",
|
||||
tracesToDelete.Count,
|
||||
retentionThreshold,
|
||||
bytesFreed);
|
||||
}
|
||||
|
||||
// Delete protected traces if they exceed extended retention
|
||||
var extendedThreshold = startTime - _options.ActiveSliceRetentionPeriod;
|
||||
var expiredProtected = protectedTraces
|
||||
.Where(t => t.CreatedAt < extendedThreshold)
|
||||
.ToList();
|
||||
|
||||
if (expiredProtected.Count > 0)
|
||||
{
|
||||
await _storage.DeleteTracesAsync(
|
||||
expiredProtected.Select(t => t.TraceId),
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
bytesFreed += expiredProtected.Sum(t => t.SizeBytes);
|
||||
tracesDeleted += expiredProtected.Count;
|
||||
|
||||
_logger.LogInformation(
|
||||
"Deleted {Count} protected traces exceeding extended retention ({Days} days)",
|
||||
expiredProtected.Count,
|
||||
_options.ActiveSliceRetentionPeriod.TotalDays);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Error during trace pruning");
|
||||
error = ex.Message;
|
||||
}
|
||||
|
||||
var duration = _timeProvider.GetUtcNow() - startTime;
|
||||
var tracesRetained = await GetRetainedCountAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Pruning cycle completed in {Duration:N1}ms: {Deleted} deleted, {Aggregated} aggregated, {Retained} retained, {BytesFreed:N0} bytes freed",
|
||||
duration.TotalMilliseconds,
|
||||
tracesDeleted,
|
||||
tracesAggregated,
|
||||
tracesRetained,
|
||||
bytesFreed);
|
||||
|
||||
return new PruningResult
|
||||
{
|
||||
CompletedAt = _timeProvider.GetUtcNow(),
|
||||
TracesDeleted = tracesDeleted,
|
||||
TracesAggregated = tracesAggregated,
|
||||
BytesFreed = bytesFreed,
|
||||
TracesRetained = tracesRetained,
|
||||
Duration = duration,
|
||||
Error = error
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Get current storage statistics.
|
||||
/// </summary>
|
||||
public async Task<StorageStatistics> GetStatisticsAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
var usage = await _storage.GetTotalStorageUsedAsync(cancellationToken).ConfigureAwait(false);
|
||||
var protectedTraces = await _storage.GetTracesReferencedBySlicesAsync(cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
return new StorageStatistics
|
||||
{
|
||||
TotalBytesUsed = usage,
|
||||
QuotaBytesLimit = _options.MaxStorageQuotaBytes,
|
||||
QuotaUsageRatio = (double)usage / _options.MaxStorageQuotaBytes,
|
||||
ProtectedTraceCount = protectedTraces.Count,
|
||||
ProtectedBytesUsed = protectedTraces.Sum(t => t.SizeBytes)
|
||||
};
|
||||
}
|
||||
|
||||
private async Task<(int Deleted, long BytesFreed)> EnforceQuotaAsync(
|
||||
HashSet<string> protectedIds,
|
||||
long currentUsage,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var targetUsage = (long)(_options.MaxStorageQuotaBytes * 0.9); // Target 90% of quota
|
||||
var bytesToFree = currentUsage - targetUsage;
|
||||
|
||||
if (bytesToFree <= 0) return (0, 0);
|
||||
|
||||
_logger.LogWarning(
|
||||
"Storage quota exceeded ({Usage:N0}/{Quota:N0} bytes), freeing {ToFree:N0} bytes",
|
||||
currentUsage,
|
||||
_options.MaxStorageQuotaBytes,
|
||||
bytesToFree);
|
||||
|
||||
var deleted = 0;
|
||||
long freed = 0;
|
||||
|
||||
// Get oldest traces first
|
||||
var threshold = _timeProvider.GetUtcNow(); // Get all traces
|
||||
while (freed < bytesToFree)
|
||||
{
|
||||
var batch = await _storage.GetTracesOlderThanAsync(
|
||||
threshold,
|
||||
_options.PruningBatchSize,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (batch.Count == 0) break;
|
||||
|
||||
var toDelete = batch
|
||||
.Where(t => !protectedIds.Contains(t.TraceId))
|
||||
.OrderBy(t => t.CreatedAt)
|
||||
.TakeWhile(t =>
|
||||
{
|
||||
if (freed >= bytesToFree) return false;
|
||||
freed += t.SizeBytes;
|
||||
return true;
|
||||
})
|
||||
.ToList();
|
||||
|
||||
if (toDelete.Count == 0) break;
|
||||
|
||||
await _storage.DeleteTracesAsync(
|
||||
toDelete.Select(t => t.TraceId),
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
deleted += toDelete.Count;
|
||||
threshold = toDelete.Min(t => t.CreatedAt);
|
||||
}
|
||||
|
||||
return (deleted, freed);
|
||||
}
|
||||
|
||||
private async Task<int> AggregateTracesAsync(
|
||||
IReadOnlyList<TraceMetadata> traces,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var aggregated = 0;
|
||||
var grouped = traces.GroupBy(t => t.ScanId);
|
||||
|
||||
foreach (var group in grouped)
|
||||
{
|
||||
var scanId = group.Key;
|
||||
var edgeCounts = new Dictionary<string, int>(StringComparer.Ordinal);
|
||||
var totalEvents = 0;
|
||||
|
||||
DateTimeOffset? periodStart = null;
|
||||
DateTimeOffset? periodEnd = null;
|
||||
|
||||
foreach (var trace in group)
|
||||
{
|
||||
periodStart = periodStart == null
|
||||
? trace.CreatedAt
|
||||
: (trace.CreatedAt < periodStart ? trace.CreatedAt : periodStart);
|
||||
periodEnd = periodEnd == null
|
||||
? trace.CreatedAt
|
||||
: (trace.CreatedAt > periodEnd ? trace.CreatedAt : periodEnd);
|
||||
|
||||
var events = await _storage.GetTraceEventsAsync(trace.TraceId, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
foreach (var evt in events)
|
||||
{
|
||||
totalEvents++;
|
||||
var edgeKey = $"{evt.CallerSymbol}->{evt.CalleeSymbol}";
|
||||
edgeCounts.TryGetValue(edgeKey, out var count);
|
||||
edgeCounts[edgeKey] = count + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (periodStart.HasValue && periodEnd.HasValue && totalEvents > 0)
|
||||
{
|
||||
var summary = new TraceSummary
|
||||
{
|
||||
ScanId = scanId,
|
||||
PeriodStart = periodStart.Value,
|
||||
PeriodEnd = periodEnd.Value,
|
||||
TotalEvents = totalEvents,
|
||||
UniqueEdges = edgeCounts.Count,
|
||||
EdgeCounts = edgeCounts,
|
||||
AggregatedAt = _timeProvider.GetUtcNow()
|
||||
};
|
||||
|
||||
await _storage.StoreSummaryAsync(summary, cancellationToken).ConfigureAwait(false);
|
||||
aggregated += group.Count();
|
||||
}
|
||||
}
|
||||
|
||||
return aggregated;
|
||||
}
|
||||
|
||||
private async Task<int> GetRetainedCountAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var traces = await _storage.GetTracesOlderThanAsync(
|
||||
_timeProvider.GetUtcNow().AddYears(100), // Far future to get all
|
||||
int.MaxValue,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
return traces.Count;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Storage statistics for traces.
|
||||
/// </summary>
|
||||
public sealed record StorageStatistics
|
||||
{
|
||||
public long TotalBytesUsed { get; init; }
|
||||
public long QuotaBytesLimit { get; init; }
|
||||
public double QuotaUsageRatio { get; init; }
|
||||
public int ProtectedTraceCount { get; init; }
|
||||
public long ProtectedBytesUsed { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,95 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using StellaOps.Scanner.Reachability.Slices;
|
||||
using StellaOps.Scanner.Runtime.Merge;
|
||||
|
||||
namespace StellaOps.Scanner.Runtime.Slices;
|
||||
|
||||
/// <summary>
|
||||
/// Generates reachability slices with runtime observation evidence.
|
||||
/// </summary>
|
||||
public sealed class ObservedSliceGenerator
|
||||
{
|
||||
private readonly SliceExtractor _sliceExtractor;
|
||||
private readonly ILogger<ObservedSliceGenerator> _logger;
|
||||
|
||||
public ObservedSliceGenerator(
|
||||
SliceExtractor sliceExtractor,
|
||||
ILogger<ObservedSliceGenerator> logger)
|
||||
{
|
||||
_sliceExtractor = sliceExtractor ?? throw new ArgumentNullException(nameof(sliceExtractor));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generate slice from merged static+runtime graph.
|
||||
/// </summary>
|
||||
public ReachabilitySlice GenerateObservedSlice(
|
||||
MergedGraph mergedGraph,
|
||||
SliceQuery query,
|
||||
SliceInputs inputs,
|
||||
StellaOps.Scanner.Core.ScanManifest manifest)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(mergedGraph);
|
||||
ArgumentNullException.ThrowIfNull(query);
|
||||
ArgumentNullException.ThrowIfNull(inputs);
|
||||
ArgumentNullException.ThrowIfNull(manifest);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Generating observed slice with {Coverage:F1}% runtime coverage",
|
||||
mergedGraph.Statistics.CoveragePercent);
|
||||
|
||||
var extractionRequest = new SliceExtractionRequest(
|
||||
mergedGraph.UnionGraph,
|
||||
inputs,
|
||||
query,
|
||||
manifest);
|
||||
|
||||
var slice = _sliceExtractor.Extract(extractionRequest);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Generated observed slice: {Nodes} nodes, {Edges} edges, verdict={Verdict}",
|
||||
slice.Subgraph.Nodes.Length,
|
||||
slice.Subgraph.Edges.Length,
|
||||
slice.Verdict.Status);
|
||||
|
||||
return slice;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Annotate slice edges with runtime observation metadata.
|
||||
/// </summary>
|
||||
public ReachabilitySlice AnnotateWithRuntimeEvidence(
|
||||
ReachabilitySlice slice,
|
||||
IReadOnlyDictionary<string, EdgeEnrichment> enrichment)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(slice);
|
||||
ArgumentNullException.ThrowIfNull(enrichment);
|
||||
|
||||
var annotatedEdges = slice.Subgraph.Edges
|
||||
.Select(edge =>
|
||||
{
|
||||
var key = $"{edge.From}→{edge.To}";
|
||||
|
||||
if (enrichment.TryGetValue(key, out var enrich) && enrich.Observed)
|
||||
{
|
||||
return edge with
|
||||
{
|
||||
Observed = new ObservedEdgeMetadata
|
||||
{
|
||||
FirstObserved = enrich.FirstObserved ?? DateTimeOffset.UtcNow,
|
||||
LastObserved = enrich.LastObserved ?? DateTimeOffset.UtcNow,
|
||||
ObservationCount = (int)enrich.ObservationCount,
|
||||
TraceDigest = null
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
return edge;
|
||||
})
|
||||
.ToArray();
|
||||
|
||||
var annotatedSubgraph = slice.Subgraph with { Edges = annotatedEdges.ToImmutableArray() };
|
||||
|
||||
return slice with { Subgraph = annotatedSubgraph };
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user