Fix build and code structure improvements. New but essential UI functionality. CI improvements. Documentation improvements. AI module improvements.

This commit is contained in:
StellaOps Bot
2025-12-26 21:54:17 +02:00
parent 335ff7da16
commit c2b9cd8d1f
3717 changed files with 264714 additions and 48202 deletions

View File

@@ -0,0 +1,135 @@
// Licensed to StellaOps under the AGPL-3.0-or-later license.
using StellaOps.ReachGraph.Schema;
namespace StellaOps.ReachGraph.Persistence;
/// <summary>
/// Repository for persisting and retrieving reachability graphs.
/// </summary>
public interface IReachGraphRepository
{
/// <summary>
/// Store a reachability graph. Idempotent by digest.
/// </summary>
/// <param name="graph">The graph to store.</param>
/// <param name="tenantId">The tenant identifier.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Result indicating if newly created or already existed.</returns>
Task<StoreResult> StoreAsync(
ReachGraphMinimal graph,
string tenantId,
CancellationToken cancellationToken = default);
/// <summary>
/// Retrieve a graph by its digest.
/// </summary>
/// <param name="digest">The BLAKE3 digest.</param>
/// <param name="tenantId">The tenant identifier.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The graph if found, null otherwise.</returns>
Task<ReachGraphMinimal?> GetByDigestAsync(
string digest,
string tenantId,
CancellationToken cancellationToken = default);
/// <summary>
/// List graphs for an artifact.
/// </summary>
/// <param name="artifactDigest">The artifact digest.</param>
/// <param name="tenantId">The tenant identifier.</param>
/// <param name="limit">Maximum number of results.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>List of graph summaries.</returns>
Task<IReadOnlyList<ReachGraphSummary>> ListByArtifactAsync(
string artifactDigest,
string tenantId,
int limit = 50,
CancellationToken cancellationToken = default);
/// <summary>
/// Find graphs containing a specific CVE.
/// </summary>
/// <param name="cveId">The CVE identifier.</param>
/// <param name="tenantId">The tenant identifier.</param>
/// <param name="limit">Maximum number of results.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>List of graph summaries.</returns>
Task<IReadOnlyList<ReachGraphSummary>> FindByCveAsync(
string cveId,
string tenantId,
int limit = 50,
CancellationToken cancellationToken = default);
/// <summary>
/// Delete a graph by digest (soft delete for audit trail).
/// </summary>
/// <param name="digest">The BLAKE3 digest.</param>
/// <param name="tenantId">The tenant identifier.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>True if deleted, false if not found.</returns>
Task<bool> DeleteAsync(
string digest,
string tenantId,
CancellationToken cancellationToken = default);
/// <summary>
/// Record a replay verification result.
/// </summary>
/// <param name="entry">The replay log entry.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task RecordReplayAsync(
ReplayLogEntry entry,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Result of storing a graph.
/// </summary>
public sealed record StoreResult
{
public required string Digest { get; init; }
public required bool Created { get; init; }
public required string ArtifactDigest { get; init; }
public required int NodeCount { get; init; }
public required int EdgeCount { get; init; }
public required DateTimeOffset StoredAt { get; init; }
}
/// <summary>
/// Summary of a stored graph (without full blob).
/// </summary>
public sealed record ReachGraphSummary
{
public required string Digest { get; init; }
public required string ArtifactDigest { get; init; }
public required int NodeCount { get; init; }
public required int EdgeCount { get; init; }
public required int BlobSizeBytes { get; init; }
public required DateTimeOffset CreatedAt { get; init; }
public required ReachGraphScope Scope { get; init; }
}
/// <summary>
/// Entry in the replay verification log.
/// </summary>
public sealed record ReplayLogEntry
{
public required string SubgraphDigest { get; init; }
public required ReachGraphInputs InputDigests { get; init; }
public required string ComputedDigest { get; init; }
public required bool Matches { get; init; }
public required string TenantId { get; init; }
public required int DurationMs { get; init; }
public ReplayDivergence? Divergence { get; init; }
}
/// <summary>
/// Describes divergence when replay doesn't match.
/// </summary>
public sealed record ReplayDivergence
{
public required int NodesAdded { get; init; }
public required int NodesRemoved { get; init; }
public required int EdgesChanged { get; init; }
}

View File

@@ -0,0 +1,141 @@
-- ReachGraph Store Schema
-- Content-addressed storage for reachability subgraphs
-- Version: 1.0.0
-- Created: 2025-12-27
BEGIN;
-- Create schema if not exists
CREATE SCHEMA IF NOT EXISTS reachgraph;
-- Main subgraph storage
-- Stores compressed reachgraph.min.json with content-addressed digest
CREATE TABLE reachgraph.subgraphs (
digest TEXT PRIMARY KEY, -- BLAKE3 of canonical JSON
artifact_digest TEXT NOT NULL, -- Image/artifact this applies to
tenant_id TEXT NOT NULL, -- Tenant isolation
scope JSONB NOT NULL, -- {entrypoints, selectors, cves}
node_count INTEGER NOT NULL,
edge_count INTEGER NOT NULL,
blob BYTEA NOT NULL, -- Compressed reachgraph.min.json (gzip)
blob_size_bytes INTEGER NOT NULL,
provenance JSONB NOT NULL, -- {intoto, inputs, computedAt, analyzer}
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT chk_digest_format CHECK (digest ~ '^blake3:[a-f0-9]{64}$'),
CONSTRAINT chk_artifact_digest_format CHECK (artifact_digest ~ '^(sha256|sha512):[a-f0-9]+$'),
CONSTRAINT chk_node_count_positive CHECK (node_count >= 0),
CONSTRAINT chk_edge_count_positive CHECK (edge_count >= 0),
CONSTRAINT chk_blob_size_positive CHECK (blob_size_bytes > 0)
);
-- Composite index for tenant + artifact lookup (most common query pattern)
CREATE INDEX idx_subgraphs_tenant_artifact
ON reachgraph.subgraphs (tenant_id, artifact_digest, created_at DESC);
-- Index for fast artifact lookup across tenants (admin queries)
CREATE INDEX idx_subgraphs_artifact
ON reachgraph.subgraphs (artifact_digest, created_at DESC);
-- Index for CVE-based queries using GIN on scope->'cves'
CREATE INDEX idx_subgraphs_cves
ON reachgraph.subgraphs USING GIN ((scope->'cves') jsonb_path_ops);
-- Index for entrypoint-based queries
CREATE INDEX idx_subgraphs_entrypoints
ON reachgraph.subgraphs USING GIN ((scope->'entrypoints') jsonb_path_ops);
-- Index for provenance input lookup (find graphs by source SBOM/VEX/callgraph)
CREATE INDEX idx_subgraphs_provenance_inputs
ON reachgraph.subgraphs USING GIN ((provenance->'inputs') jsonb_path_ops);
-- Slice cache (precomputed slices for hot queries)
CREATE TABLE reachgraph.slice_cache (
cache_key TEXT PRIMARY KEY, -- {digest}:{queryType}:{queryHash}
subgraph_digest TEXT NOT NULL REFERENCES reachgraph.subgraphs(digest) ON DELETE CASCADE,
slice_blob BYTEA NOT NULL, -- Compressed slice JSON
query_type TEXT NOT NULL, -- 'package', 'cve', 'entrypoint', 'file'
query_params JSONB NOT NULL, -- Original query parameters
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
expires_at TIMESTAMPTZ NOT NULL, -- TTL for cache expiration
hit_count INTEGER NOT NULL DEFAULT 0,
CONSTRAINT chk_query_type CHECK (query_type IN ('package', 'cve', 'entrypoint', 'file'))
);
-- Index for cache expiry cleanup
CREATE INDEX idx_slice_cache_expiry
ON reachgraph.slice_cache (expires_at);
-- Index for cache lookup by subgraph
CREATE INDEX idx_slice_cache_subgraph
ON reachgraph.slice_cache (subgraph_digest, created_at DESC);
-- Audit log for replay verification
CREATE TABLE reachgraph.replay_log (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
subgraph_digest TEXT NOT NULL,
input_digests JSONB NOT NULL, -- {sbom, vex, callgraph, runtimeFacts}
computed_digest TEXT NOT NULL, -- Result of replay
matches BOOLEAN NOT NULL, -- Did it match expected digest?
divergence JSONB, -- {nodesAdded, nodesRemoved, edgesChanged} if mismatch
tenant_id TEXT NOT NULL,
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
duration_ms INTEGER NOT NULL,
CONSTRAINT chk_duration_positive CHECK (duration_ms >= 0)
);
-- Index for replay log lookups
CREATE INDEX idx_replay_log_digest
ON reachgraph.replay_log (subgraph_digest, computed_at DESC);
-- Index for replay log by tenant
CREATE INDEX idx_replay_log_tenant
ON reachgraph.replay_log (tenant_id, computed_at DESC);
-- Index for finding replay failures
CREATE INDEX idx_replay_log_failures
ON reachgraph.replay_log (matches, computed_at DESC)
WHERE matches = false;
-- Enable Row Level Security
ALTER TABLE reachgraph.subgraphs ENABLE ROW LEVEL SECURITY;
ALTER TABLE reachgraph.slice_cache ENABLE ROW LEVEL SECURITY;
ALTER TABLE reachgraph.replay_log ENABLE ROW LEVEL SECURITY;
-- RLS policies (tenant isolation)
-- Note: current_setting('app.tenant_id', true) returns NULL if not set, which blocks all access
CREATE POLICY tenant_isolation_subgraphs ON reachgraph.subgraphs
FOR ALL
USING (tenant_id = current_setting('app.tenant_id', true))
WITH CHECK (tenant_id = current_setting('app.tenant_id', true));
-- Slice cache inherits tenant from parent subgraph (via foreign key)
CREATE POLICY tenant_isolation_slice_cache ON reachgraph.slice_cache
FOR ALL
USING (
subgraph_digest IN (
SELECT digest FROM reachgraph.subgraphs
WHERE tenant_id = current_setting('app.tenant_id', true)
)
);
CREATE POLICY tenant_isolation_replay_log ON reachgraph.replay_log
FOR ALL
USING (tenant_id = current_setting('app.tenant_id', true))
WITH CHECK (tenant_id = current_setting('app.tenant_id', true));
-- Comments for documentation
COMMENT ON SCHEMA reachgraph IS 'ReachGraph store schema for content-addressed reachability subgraphs';
COMMENT ON TABLE reachgraph.subgraphs IS 'Content-addressed storage for reachability subgraphs with DSSE signing support';
COMMENT ON TABLE reachgraph.slice_cache IS 'Cache for precomputed subgraph slices (package/CVE/entrypoint/file queries)';
COMMENT ON TABLE reachgraph.replay_log IS 'Audit log for deterministic replay verification';
COMMENT ON COLUMN reachgraph.subgraphs.digest IS 'BLAKE3-256 hash of canonical JSON (format: blake3:{hex})';
COMMENT ON COLUMN reachgraph.subgraphs.artifact_digest IS 'Container image or artifact digest this graph applies to';
COMMENT ON COLUMN reachgraph.subgraphs.blob IS 'Gzip-compressed reachgraph.min.json';
COMMENT ON COLUMN reachgraph.subgraphs.scope IS 'Analysis scope: entrypoints, selectors, and optional CVE filters';
COMMENT ON COLUMN reachgraph.subgraphs.provenance IS 'Provenance info: intoto links, input digests, analyzer metadata';
COMMIT;

View File

@@ -0,0 +1,345 @@
// Licensed to StellaOps under the AGPL-3.0-or-later license.
using System.Collections.Immutable;
using System.IO.Compression;
using System.Text.Json;
using Dapper;
using Microsoft.Extensions.Logging;
using Npgsql;
using StellaOps.ReachGraph.Hashing;
using StellaOps.ReachGraph.Schema;
using StellaOps.ReachGraph.Serialization;
namespace StellaOps.ReachGraph.Persistence;
/// <summary>
/// PostgreSQL implementation of the ReachGraph repository.
/// </summary>
public sealed class PostgresReachGraphRepository : IReachGraphRepository
{
private readonly NpgsqlDataSource _dataSource;
private readonly CanonicalReachGraphSerializer _serializer;
private readonly ReachGraphDigestComputer _digestComputer;
private readonly ILogger<PostgresReachGraphRepository> _logger;
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
};
public PostgresReachGraphRepository(
NpgsqlDataSource dataSource,
CanonicalReachGraphSerializer serializer,
ReachGraphDigestComputer digestComputer,
ILogger<PostgresReachGraphRepository> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_serializer = serializer ?? throw new ArgumentNullException(nameof(serializer));
_digestComputer = digestComputer ?? throw new ArgumentNullException(nameof(digestComputer));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public async Task<StoreResult> StoreAsync(
ReachGraphMinimal graph,
string tenantId,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(graph);
ArgumentException.ThrowIfNullOrEmpty(tenantId);
var digest = _digestComputer.ComputeDigest(graph);
var canonicalBytes = _serializer.SerializeMinimal(graph);
var compressedBlob = CompressGzip(canonicalBytes);
var scopeJson = JsonSerializer.Serialize(new
{
entrypoints = graph.Scope.Entrypoints,
selectors = graph.Scope.Selectors,
cves = graph.Scope.Cves
}, JsonOptions);
var provenanceJson = JsonSerializer.Serialize(new
{
intoto = graph.Provenance.Intoto,
inputs = graph.Provenance.Inputs,
computedAt = graph.Provenance.ComputedAt,
analyzer = graph.Provenance.Analyzer
}, JsonOptions);
await using var connection = await _dataSource.OpenConnectionAsync(cancellationToken);
await SetTenantContext(connection, tenantId, cancellationToken);
const string sql = """
INSERT INTO reachgraph.subgraphs (
digest, artifact_digest, tenant_id, scope, node_count, edge_count,
blob, blob_size_bytes, provenance
)
VALUES (
@Digest, @ArtifactDigest, @TenantId, @Scope::jsonb, @NodeCount, @EdgeCount,
@Blob, @BlobSizeBytes, @Provenance::jsonb
)
ON CONFLICT (digest) DO NOTHING
RETURNING created_at
""";
var result = await connection.QuerySingleOrDefaultAsync<DateTimeOffset?>(sql, new
{
Digest = digest,
ArtifactDigest = graph.Artifact.Digest,
TenantId = tenantId,
Scope = scopeJson,
NodeCount = graph.Nodes.Length,
EdgeCount = graph.Edges.Length,
Blob = compressedBlob,
BlobSizeBytes = compressedBlob.Length,
Provenance = provenanceJson
});
var created = result.HasValue;
var storedAt = result ?? DateTimeOffset.UtcNow;
_logger.LogInformation(
"{Action} reachability graph {Digest} for artifact {Artifact}",
created ? "Stored" : "Found existing",
digest,
graph.Artifact.Digest);
return new StoreResult
{
Digest = digest,
Created = created,
ArtifactDigest = graph.Artifact.Digest,
NodeCount = graph.Nodes.Length,
EdgeCount = graph.Edges.Length,
StoredAt = storedAt
};
}
/// <inheritdoc />
public async Task<ReachGraphMinimal?> GetByDigestAsync(
string digest,
string tenantId,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrEmpty(digest);
ArgumentException.ThrowIfNullOrEmpty(tenantId);
await using var connection = await _dataSource.OpenConnectionAsync(cancellationToken);
await SetTenantContext(connection, tenantId, cancellationToken);
const string sql = """
SELECT blob
FROM reachgraph.subgraphs
WHERE digest = @Digest
""";
var blob = await connection.QuerySingleOrDefaultAsync<byte[]>(sql, new { Digest = digest });
if (blob is null)
{
return null;
}
var decompressed = DecompressGzip(blob);
return _serializer.Deserialize(decompressed);
}
/// <inheritdoc />
public async Task<IReadOnlyList<ReachGraphSummary>> ListByArtifactAsync(
string artifactDigest,
string tenantId,
int limit = 50,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrEmpty(artifactDigest);
ArgumentException.ThrowIfNullOrEmpty(tenantId);
await using var connection = await _dataSource.OpenConnectionAsync(cancellationToken);
await SetTenantContext(connection, tenantId, cancellationToken);
const string sql = """
SELECT digest, artifact_digest, node_count, edge_count, blob_size_bytes, created_at, scope
FROM reachgraph.subgraphs
WHERE artifact_digest = @ArtifactDigest
ORDER BY created_at DESC
LIMIT @Limit
""";
var rows = await connection.QueryAsync<dynamic>(sql, new { ArtifactDigest = artifactDigest, Limit = limit });
return rows.Select(row => new ReachGraphSummary
{
Digest = row.digest,
ArtifactDigest = row.artifact_digest,
NodeCount = row.node_count,
EdgeCount = row.edge_count,
BlobSizeBytes = row.blob_size_bytes,
CreatedAt = row.created_at,
Scope = ParseScope((string)row.scope)
}).ToList();
}
/// <inheritdoc />
public async Task<IReadOnlyList<ReachGraphSummary>> FindByCveAsync(
string cveId,
string tenantId,
int limit = 50,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrEmpty(cveId);
ArgumentException.ThrowIfNullOrEmpty(tenantId);
await using var connection = await _dataSource.OpenConnectionAsync(cancellationToken);
await SetTenantContext(connection, tenantId, cancellationToken);
const string sql = """
SELECT digest, artifact_digest, node_count, edge_count, blob_size_bytes, created_at, scope
FROM reachgraph.subgraphs
WHERE scope->'cves' @> @CveJson::jsonb
ORDER BY created_at DESC
LIMIT @Limit
""";
var cveJson = JsonSerializer.Serialize(new[] { cveId });
var rows = await connection.QueryAsync<dynamic>(sql, new { CveJson = cveJson, Limit = limit });
return rows.Select(row => new ReachGraphSummary
{
Digest = row.digest,
ArtifactDigest = row.artifact_digest,
NodeCount = row.node_count,
EdgeCount = row.edge_count,
BlobSizeBytes = row.blob_size_bytes,
CreatedAt = row.created_at,
Scope = ParseScope((string)row.scope)
}).ToList();
}
/// <inheritdoc />
public async Task<bool> DeleteAsync(
string digest,
string tenantId,
CancellationToken cancellationToken = default)
{
ArgumentException.ThrowIfNullOrEmpty(digest);
ArgumentException.ThrowIfNullOrEmpty(tenantId);
await using var connection = await _dataSource.OpenConnectionAsync(cancellationToken);
await SetTenantContext(connection, tenantId, cancellationToken);
// Using DELETE for now; could be soft-delete with deleted_at column
const string sql = """
DELETE FROM reachgraph.subgraphs
WHERE digest = @Digest
RETURNING digest
""";
var deleted = await connection.QuerySingleOrDefaultAsync<string>(sql, new { Digest = digest });
if (deleted is not null)
{
_logger.LogInformation("Deleted reachability graph {Digest}", digest);
return true;
}
return false;
}
/// <inheritdoc />
public async Task RecordReplayAsync(
ReplayLogEntry entry,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(entry);
await using var connection = await _dataSource.OpenConnectionAsync(cancellationToken);
await SetTenantContext(connection, entry.TenantId, cancellationToken);
var inputsJson = JsonSerializer.Serialize(entry.InputDigests, JsonOptions);
var divergenceJson = entry.Divergence is not null
? JsonSerializer.Serialize(entry.Divergence, JsonOptions)
: null;
const string sql = """
INSERT INTO reachgraph.replay_log (
subgraph_digest, input_digests, computed_digest, matches,
divergence, tenant_id, duration_ms
)
VALUES (
@SubgraphDigest, @InputDigests::jsonb, @ComputedDigest, @Matches,
@Divergence::jsonb, @TenantId, @DurationMs
)
""";
await connection.ExecuteAsync(sql, new
{
entry.SubgraphDigest,
InputDigests = inputsJson,
entry.ComputedDigest,
entry.Matches,
Divergence = divergenceJson,
entry.TenantId,
entry.DurationMs
});
_logger.LogInformation(
"Recorded replay {Result} for {Digest} (computed: {Computed}, {Duration}ms)",
entry.Matches ? "MATCH" : "MISMATCH",
entry.SubgraphDigest,
entry.ComputedDigest,
entry.DurationMs);
}
private static async Task SetTenantContext(
NpgsqlConnection connection,
string tenantId,
CancellationToken cancellationToken)
{
await using var cmd = connection.CreateCommand();
cmd.CommandText = "SET LOCAL app.tenant_id = @TenantId";
cmd.Parameters.AddWithValue("TenantId", tenantId);
await cmd.ExecuteNonQueryAsync(cancellationToken);
}
private static byte[] CompressGzip(byte[] data)
{
using var output = new MemoryStream();
using (var gzip = new GZipStream(output, CompressionLevel.SmallestSize, leaveOpen: true))
{
gzip.Write(data);
}
return output.ToArray();
}
private static byte[] DecompressGzip(byte[] compressed)
{
using var input = new MemoryStream(compressed);
using var gzip = new GZipStream(input, CompressionMode.Decompress);
using var output = new MemoryStream();
gzip.CopyTo(output);
return output.ToArray();
}
private static ReachGraphScope ParseScope(string json)
{
using var doc = JsonDocument.Parse(json);
var root = doc.RootElement;
var entrypoints = root.TryGetProperty("entrypoints", out var ep)
? ep.EnumerateArray().Select(e => e.GetString()!).ToImmutableArray()
: ImmutableArray<string>.Empty;
var selectors = root.TryGetProperty("selectors", out var sel)
? sel.EnumerateArray().Select(s => s.GetString()!).ToImmutableArray()
: ImmutableArray<string>.Empty;
ImmutableArray<string>? cves = null;
if (root.TryGetProperty("cves", out var cvesElem) && cvesElem.ValueKind == JsonValueKind.Array)
{
cves = cvesElem.EnumerateArray().Select(c => c.GetString()!).ToImmutableArray();
}
return new ReachGraphScope(entrypoints, selectors, cves);
}
}

View File

@@ -0,0 +1,32 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
<RootNamespace>StellaOps.ReachGraph.Persistence</RootNamespace>
<Description>PostgreSQL persistence layer for StellaOps ReachGraph</Description>
<Authors>StellaOps</Authors>
<PackageId>StellaOps.ReachGraph.Persistence</PackageId>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Dapper" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Npgsql" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\StellaOps.ReachGraph\StellaOps.ReachGraph.csproj" />
</ItemGroup>
<ItemGroup>
<EmbeddedResource Include="Migrations\*.sql" />
</ItemGroup>
<ItemGroup>
<InternalsVisibleTo Include="StellaOps.ReachGraph.Persistence.Tests" />
</ItemGroup>
</Project>