save progress

This commit is contained in:
StellaOps Bot
2025-12-18 09:10:36 +02:00
parent b4235c134c
commit 28823a8960
169 changed files with 11995 additions and 449 deletions

View File

@@ -79,6 +79,8 @@ public static class ServiceCollectionExtensions
services.AddScoped<IProofSpineRepository, PostgresProofSpineRepository>();
services.AddScoped<ICallGraphSnapshotRepository, PostgresCallGraphSnapshotRepository>();
services.AddScoped<IReachabilityResultRepository, PostgresReachabilityResultRepository>();
services.AddScoped<ICodeChangeRepository, PostgresCodeChangeRepository>();
services.AddScoped<IReachabilityDriftResultRepository, PostgresReachabilityDriftResultRepository>();
services.AddSingleton<IEntryTraceResultStore, EntryTraceResultStore>();
services.AddSingleton<IRubyPackageInventoryStore, RubyPackageInventoryStore>();
services.AddSingleton<IBunPackageInventoryStore, BunPackageInventoryStore>();

View File

@@ -1,13 +1,21 @@
-- Call graph snapshots + reachability analysis results
-- Sprint: SPRINT_3600_0002_0001_call_graph_infrastructure
CREATE SCHEMA IF NOT EXISTS scanner;
-- Note: migrations are executed with the module schema as the active search_path.
-- Keep objects unqualified so integration tests can run in isolated schemas.
CREATE OR REPLACE FUNCTION current_tenant_id()
RETURNS UUID AS $$
BEGIN
RETURN NULLIF(current_setting('app.tenant_id', TRUE), '')::UUID;
END;
$$ LANGUAGE plpgsql STABLE;
-- -----------------------------------------------------------------------------
-- Table: scanner.call_graph_snapshots
-- Table: call_graph_snapshots
-- Purpose: Cache call graph snapshots per scan/language for reachability drift.
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS scanner.call_graph_snapshots (
CREATE TABLE IF NOT EXISTS call_graph_snapshots (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
@@ -27,24 +35,26 @@ CREATE TABLE IF NOT EXISTS scanner.call_graph_snapshots (
);
CREATE INDEX IF NOT EXISTS idx_call_graph_snapshots_tenant_scan
ON scanner.call_graph_snapshots (tenant_id, scan_id, language);
ON call_graph_snapshots (tenant_id, scan_id, language);
CREATE INDEX IF NOT EXISTS idx_call_graph_snapshots_graph_digest
ON scanner.call_graph_snapshots (graph_digest);
ON call_graph_snapshots (graph_digest);
CREATE INDEX IF NOT EXISTS idx_call_graph_snapshots_extracted_at
ON scanner.call_graph_snapshots USING BRIN (extracted_at);
ON call_graph_snapshots USING BRIN (extracted_at);
ALTER TABLE scanner.call_graph_snapshots ENABLE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS call_graph_snapshots_tenant_isolation ON scanner.call_graph_snapshots;
CREATE POLICY call_graph_snapshots_tenant_isolation ON scanner.call_graph_snapshots
USING (tenant_id = scanner.current_tenant_id());
ALTER TABLE call_graph_snapshots ENABLE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS call_graph_snapshots_tenant_isolation ON call_graph_snapshots;
CREATE POLICY call_graph_snapshots_tenant_isolation ON call_graph_snapshots
FOR ALL
USING (tenant_id = current_tenant_id())
WITH CHECK (tenant_id = current_tenant_id());
COMMENT ON TABLE scanner.call_graph_snapshots IS 'Call graph snapshots per scan/language for reachability drift detection.';
COMMENT ON TABLE call_graph_snapshots IS 'Call graph snapshots per scan/language for reachability drift detection.';
-- -----------------------------------------------------------------------------
-- Table: scanner.reachability_results
-- Table: reachability_results
-- Purpose: Cache reachability BFS results (reachable sinks + shortest paths).
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS scanner.reachability_results (
CREATE TABLE IF NOT EXISTS reachability_results (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
@@ -63,16 +73,17 @@ CREATE TABLE IF NOT EXISTS scanner.reachability_results (
);
CREATE INDEX IF NOT EXISTS idx_reachability_results_tenant_scan
ON scanner.reachability_results (tenant_id, scan_id, language);
ON reachability_results (tenant_id, scan_id, language);
CREATE INDEX IF NOT EXISTS idx_reachability_results_graph_digest
ON scanner.reachability_results (graph_digest);
ON reachability_results (graph_digest);
CREATE INDEX IF NOT EXISTS idx_reachability_results_computed_at
ON scanner.reachability_results USING BRIN (computed_at);
ON reachability_results USING BRIN (computed_at);
ALTER TABLE scanner.reachability_results ENABLE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS reachability_results_tenant_isolation ON scanner.reachability_results;
CREATE POLICY reachability_results_tenant_isolation ON scanner.reachability_results
USING (tenant_id = scanner.current_tenant_id());
COMMENT ON TABLE scanner.reachability_results IS 'Reachability analysis results per scan/language with shortest paths.';
ALTER TABLE reachability_results ENABLE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS reachability_results_tenant_isolation ON reachability_results;
CREATE POLICY reachability_results_tenant_isolation ON reachability_results
FOR ALL
USING (tenant_id = current_tenant_id())
WITH CHECK (tenant_id = current_tenant_id());
COMMENT ON TABLE reachability_results IS 'Reachability analysis results per scan/language with shortest paths.';

View File

@@ -0,0 +1,151 @@
-- Reachability drift: code changes + drift results
-- Sprint: SPRINT_3600_0003_0001_drift_detection_engine
-- Note: migrations are executed with the module schema as the active search_path.
-- Keep objects unqualified so integration tests can run in isolated schemas.
CREATE OR REPLACE FUNCTION current_tenant_id()
RETURNS UUID AS $$
BEGIN
RETURN NULLIF(current_setting('app.tenant_id', TRUE), '')::UUID;
END;
$$ LANGUAGE plpgsql STABLE;
-- -----------------------------------------------------------------------------
-- Table: code_changes
-- Purpose: Store coarse code change facts extracted from call graph diffs.
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS code_changes (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
scan_id TEXT NOT NULL,
base_scan_id TEXT NOT NULL,
language TEXT NOT NULL,
node_id TEXT,
file TEXT NOT NULL,
symbol TEXT NOT NULL,
change_kind TEXT NOT NULL,
details JSONB,
detected_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT code_changes_unique UNIQUE (tenant_id, scan_id, base_scan_id, language, symbol, change_kind)
);
CREATE INDEX IF NOT EXISTS idx_code_changes_tenant_scan
ON code_changes (tenant_id, scan_id, base_scan_id, language);
CREATE INDEX IF NOT EXISTS idx_code_changes_symbol
ON code_changes (symbol);
CREATE INDEX IF NOT EXISTS idx_code_changes_kind
ON code_changes (change_kind);
CREATE INDEX IF NOT EXISTS idx_code_changes_detected_at
ON code_changes USING BRIN (detected_at);
ALTER TABLE code_changes ENABLE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS code_changes_tenant_isolation ON code_changes;
CREATE POLICY code_changes_tenant_isolation ON code_changes
FOR ALL
USING (tenant_id = current_tenant_id())
WITH CHECK (tenant_id = current_tenant_id());
COMMENT ON TABLE code_changes IS 'Code change facts for reachability drift analysis.';
-- -----------------------------------------------------------------------------
-- Extend: material_risk_changes
-- Purpose: Store drift-specific attachments alongside Smart-Diff R1 changes.
-- -----------------------------------------------------------------------------
ALTER TABLE material_risk_changes
ADD COLUMN IF NOT EXISTS base_scan_id TEXT,
ADD COLUMN IF NOT EXISTS cause TEXT,
ADD COLUMN IF NOT EXISTS cause_kind TEXT,
ADD COLUMN IF NOT EXISTS path_nodes JSONB,
ADD COLUMN IF NOT EXISTS associated_vulns JSONB;
CREATE INDEX IF NOT EXISTS idx_material_risk_changes_cause_kind
ON material_risk_changes(cause_kind)
WHERE cause_kind IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_material_risk_changes_base_scan
ON material_risk_changes(base_scan_id)
WHERE base_scan_id IS NOT NULL;
-- -----------------------------------------------------------------------------
-- Table: reachability_drift_results
-- Purpose: Aggregate drift results per scan pair and language.
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS reachability_drift_results (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
base_scan_id TEXT NOT NULL,
head_scan_id TEXT NOT NULL,
language TEXT NOT NULL,
newly_reachable_count INT NOT NULL DEFAULT 0,
newly_unreachable_count INT NOT NULL DEFAULT 0,
detected_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
result_digest TEXT NOT NULL,
CONSTRAINT reachability_drift_unique UNIQUE (tenant_id, base_scan_id, head_scan_id, language, result_digest)
);
CREATE INDEX IF NOT EXISTS idx_reachability_drift_head
ON reachability_drift_results (tenant_id, head_scan_id, language);
CREATE INDEX IF NOT EXISTS idx_reachability_drift_detected_at
ON reachability_drift_results USING BRIN (detected_at);
ALTER TABLE reachability_drift_results ENABLE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS drift_results_tenant_isolation ON reachability_drift_results;
CREATE POLICY drift_results_tenant_isolation ON reachability_drift_results
FOR ALL
USING (tenant_id = current_tenant_id())
WITH CHECK (tenant_id = current_tenant_id());
COMMENT ON TABLE reachability_drift_results IS 'Aggregate drift results per scan pair + language.';
-- -----------------------------------------------------------------------------
-- Table: drifted_sinks
-- Purpose: Individual sink drift records (paged by API).
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS drifted_sinks (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
drift_result_id UUID NOT NULL REFERENCES reachability_drift_results(id) ON DELETE CASCADE,
sink_node_id TEXT NOT NULL,
symbol TEXT NOT NULL,
sink_category TEXT NOT NULL,
direction TEXT NOT NULL,
cause_kind TEXT NOT NULL,
cause_description TEXT NOT NULL,
cause_symbol TEXT,
cause_file TEXT,
cause_line INT,
code_change_id UUID REFERENCES code_changes(id),
compressed_path JSONB NOT NULL,
associated_vulns JSONB,
CONSTRAINT drifted_sinks_unique UNIQUE (drift_result_id, sink_node_id)
);
CREATE INDEX IF NOT EXISTS idx_drifted_sinks_drift
ON drifted_sinks (drift_result_id);
CREATE INDEX IF NOT EXISTS idx_drifted_sinks_direction
ON drifted_sinks (direction);
CREATE INDEX IF NOT EXISTS idx_drifted_sinks_category
ON drifted_sinks (sink_category);
ALTER TABLE drifted_sinks ENABLE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS drifted_sinks_tenant_isolation ON drifted_sinks;
CREATE POLICY drifted_sinks_tenant_isolation ON drifted_sinks
FOR ALL
USING (tenant_id = current_tenant_id())
WITH CHECK (tenant_id = current_tenant_id());
COMMENT ON TABLE drifted_sinks IS 'Individual drifted sink records with causes and compressed paths.';

View File

@@ -0,0 +1,23 @@
-- scanner api ingestion persistence (startup migration)
-- Purpose: Store idempotency state for Scanner.WebService ingestion endpoints.
CREATE TABLE IF NOT EXISTS callgraph_ingestions (
id TEXT PRIMARY KEY,
tenant_id UUID NOT NULL,
scan_id TEXT NOT NULL,
content_digest TEXT NOT NULL,
language TEXT NOT NULL,
node_count INT NOT NULL,
edge_count INT NOT NULL,
created_at_utc TIMESTAMPTZ NOT NULL DEFAULT NOW(),
callgraph_json JSONB NOT NULL,
CONSTRAINT callgraph_ingestions_unique_per_scan UNIQUE (tenant_id, scan_id, content_digest)
);
CREATE INDEX IF NOT EXISTS ix_callgraph_ingestions_scan
ON callgraph_ingestions (tenant_id, scan_id, created_at_utc DESC, id);
CREATE INDEX IF NOT EXISTS ix_callgraph_ingestions_digest
ON callgraph_ingestions (tenant_id, content_digest);

View File

@@ -0,0 +1,12 @@
-- =============================================================================
-- 010_smart_diff_priority_score_widen.sql
-- Purpose: Widen Smart-Diff material risk change priority_score to support
-- advisory scoring formula (can exceed NUMERIC(6,4)).
--
-- Note: migrations are executed inside a transaction by the migration runner.
-- Do not include BEGIN/COMMIT in migration files.
-- =============================================================================
ALTER TABLE material_risk_changes
ALTER COLUMN priority_score TYPE NUMERIC(12, 4)
USING priority_score::NUMERIC(12, 4);

View File

@@ -11,4 +11,5 @@ internal static class MigrationIds
public const string UnknownsRankingContainment = "007_unknowns_ranking_containment.sql";
public const string EpssIntegration = "008_epss_integration.sql";
public const string CallGraphTables = "009_call_graph_tables.sql";
public const string ReachabilityDriftTables = "010_reachability_drift_tables.sql";
}

View File

@@ -8,6 +8,9 @@ namespace StellaOps.Scanner.Storage.Postgres;
public sealed class PostgresCallGraphSnapshotRepository : ICallGraphSnapshotRepository
{
private const string TenantContext = "00000000-0000-0000-0000-000000000001";
private static readonly Guid TenantId = Guid.Parse(TenantContext);
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
{
WriteIndented = false
@@ -16,6 +19,9 @@ public sealed class PostgresCallGraphSnapshotRepository : ICallGraphSnapshotRepo
private readonly ScannerDataSource _dataSource;
private readonly ILogger<PostgresCallGraphSnapshotRepository> _logger;
private string SchemaName => _dataSource.SchemaName ?? ScannerDataSource.DefaultSchema;
private string CallGraphSnapshotsTable => $"{SchemaName}.call_graph_snapshots";
public PostgresCallGraphSnapshotRepository(
ScannerDataSource dataSource,
ILogger<PostgresCallGraphSnapshotRepository> logger)
@@ -29,8 +35,8 @@ public sealed class PostgresCallGraphSnapshotRepository : ICallGraphSnapshotRepo
ArgumentNullException.ThrowIfNull(snapshot);
var trimmed = snapshot.Trimmed();
const string sql = """
INSERT INTO scanner.call_graph_snapshots (
var sql = $"""
INSERT INTO {CallGraphSnapshotsTable} (
tenant_id,
scan_id,
language,
@@ -63,12 +69,11 @@ public sealed class PostgresCallGraphSnapshotRepository : ICallGraphSnapshotRepo
""";
var json = JsonSerializer.Serialize(trimmed, JsonOptions);
var tenantId = GetCurrentTenantId();
await using var connection = await _dataSource.OpenConnectionAsync(ct).ConfigureAwait(false);
await using var connection = await _dataSource.OpenConnectionAsync(TenantContext, ct).ConfigureAwait(false);
await connection.ExecuteAsync(new CommandDefinition(sql, new
{
TenantId = tenantId,
TenantId = TenantId,
ScanId = trimmed.ScanId,
Language = trimmed.Language,
GraphDigest = trimmed.GraphDigest,
@@ -93,18 +98,18 @@ public sealed class PostgresCallGraphSnapshotRepository : ICallGraphSnapshotRepo
ArgumentException.ThrowIfNullOrWhiteSpace(scanId);
ArgumentException.ThrowIfNullOrWhiteSpace(language);
const string sql = """
var sql = $"""
SELECT snapshot_json
FROM scanner.call_graph_snapshots
FROM {CallGraphSnapshotsTable}
WHERE tenant_id = @TenantId AND scan_id = @ScanId AND language = @Language
ORDER BY extracted_at DESC
LIMIT 1
""";
await using var connection = await _dataSource.OpenConnectionAsync(ct).ConfigureAwait(false);
await using var connection = await _dataSource.OpenConnectionAsync(TenantContext, ct).ConfigureAwait(false);
var json = await connection.ExecuteScalarAsync<string?>(new CommandDefinition(sql, new
{
TenantId = GetCurrentTenantId(),
TenantId = TenantId,
ScanId = scanId,
Language = language
}, cancellationToken: ct)).ConfigureAwait(false);
@@ -116,10 +121,5 @@ public sealed class PostgresCallGraphSnapshotRepository : ICallGraphSnapshotRepo
return JsonSerializer.Deserialize<CallGraphSnapshot>(json, JsonOptions);
}
private static Guid GetCurrentTenantId()
{
return Guid.Parse("00000000-0000-0000-0000-000000000001");
}
}

View File

@@ -0,0 +1,114 @@
using System.Text.Json;
using Dapper;
using Microsoft.Extensions.Logging;
using StellaOps.Scanner.ReachabilityDrift;
using StellaOps.Scanner.Storage.Repositories;
namespace StellaOps.Scanner.Storage.Postgres;
public sealed class PostgresCodeChangeRepository : ICodeChangeRepository
{
private const string TenantContext = "00000000-0000-0000-0000-000000000001";
private static readonly Guid TenantId = Guid.Parse(TenantContext);
private readonly ScannerDataSource _dataSource;
private readonly ILogger<PostgresCodeChangeRepository> _logger;
private string SchemaName => _dataSource.SchemaName ?? ScannerDataSource.DefaultSchema;
private string CodeChangesTable => $"{SchemaName}.code_changes";
public PostgresCodeChangeRepository(
ScannerDataSource dataSource,
ILogger<PostgresCodeChangeRepository> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task StoreAsync(IReadOnlyList<CodeChangeFact> changes, CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(changes);
if (changes.Count == 0)
{
return;
}
var sql = $"""
INSERT INTO {CodeChangesTable} (
id,
tenant_id,
scan_id,
base_scan_id,
language,
node_id,
file,
symbol,
change_kind,
details,
detected_at
) VALUES (
@Id,
@TenantId,
@ScanId,
@BaseScanId,
@Language,
@NodeId,
@File,
@Symbol,
@ChangeKind,
@Details::jsonb,
@DetectedAt
)
ON CONFLICT (tenant_id, scan_id, base_scan_id, language, symbol, change_kind) DO UPDATE SET
node_id = EXCLUDED.node_id,
file = EXCLUDED.file,
details = EXCLUDED.details,
detected_at = EXCLUDED.detected_at
""";
var rows = changes.Select(change => new
{
change.Id,
TenantId,
ScanId = change.ScanId.Trim(),
BaseScanId = change.BaseScanId.Trim(),
Language = change.Language.Trim(),
NodeId = string.IsNullOrWhiteSpace(change.NodeId) ? null : change.NodeId.Trim(),
File = change.File.Trim(),
Symbol = change.Symbol.Trim(),
ChangeKind = ToDbValue(change.Kind),
Details = SerializeDetails(change.Details),
DetectedAt = change.DetectedAt.UtcDateTime
}).ToList();
await using var connection = await _dataSource.OpenConnectionAsync(TenantContext, ct).ConfigureAwait(false);
await connection.ExecuteAsync(new CommandDefinition(sql, rows, cancellationToken: ct)).ConfigureAwait(false);
_logger.LogDebug(
"Stored {Count} code change facts scan={ScanId} base={BaseScanId} lang={Language}",
changes.Count,
changes[0].ScanId,
changes[0].BaseScanId,
changes[0].Language);
}
private static string? SerializeDetails(JsonElement? details)
=> details is { ValueKind: not JsonValueKind.Undefined and not JsonValueKind.Null }
? details.Value.GetRawText()
: null;
private static string ToDbValue(CodeChangeKind kind)
{
return kind switch
{
CodeChangeKind.Added => "added",
CodeChangeKind.Removed => "removed",
CodeChangeKind.SignatureChanged => "signature_changed",
CodeChangeKind.GuardChanged => "guard_changed",
CodeChangeKind.DependencyChanged => "dependency_changed",
CodeChangeKind.VisibilityChanged => "visibility_changed",
_ => kind.ToString()
};
}
}

View File

@@ -0,0 +1,527 @@
using System.Collections.Immutable;
using System.Text.Json;
using Dapper;
using Microsoft.Extensions.Logging;
using StellaOps.Scanner.Reachability;
using StellaOps.Scanner.ReachabilityDrift;
using StellaOps.Scanner.Storage.Repositories;
namespace StellaOps.Scanner.Storage.Postgres;
public sealed class PostgresReachabilityDriftResultRepository : IReachabilityDriftResultRepository
{
private const string TenantContext = "00000000-0000-0000-0000-000000000001";
private static readonly Guid TenantId = Guid.Parse(TenantContext);
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
{
WriteIndented = false
};
private readonly ScannerDataSource _dataSource;
private readonly ILogger<PostgresReachabilityDriftResultRepository> _logger;
private string SchemaName => _dataSource.SchemaName ?? ScannerDataSource.DefaultSchema;
private string DriftResultsTable => $"{SchemaName}.reachability_drift_results";
private string DriftedSinksTable => $"{SchemaName}.drifted_sinks";
public PostgresReachabilityDriftResultRepository(
ScannerDataSource dataSource,
ILogger<PostgresReachabilityDriftResultRepository> logger)
{
_dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task StoreAsync(ReachabilityDriftResult result, CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(result);
var insertResultSql = $"""
INSERT INTO {DriftResultsTable} (
id,
tenant_id,
base_scan_id,
head_scan_id,
language,
newly_reachable_count,
newly_unreachable_count,
detected_at,
result_digest
) VALUES (
@Id,
@TenantId,
@BaseScanId,
@HeadScanId,
@Language,
@NewlyReachableCount,
@NewlyUnreachableCount,
@DetectedAt,
@ResultDigest
)
ON CONFLICT (tenant_id, base_scan_id, head_scan_id, language, result_digest) DO UPDATE SET
newly_reachable_count = EXCLUDED.newly_reachable_count,
newly_unreachable_count = EXCLUDED.newly_unreachable_count,
detected_at = EXCLUDED.detected_at
RETURNING id
""";
var deleteSinksSql = $"""
DELETE FROM {DriftedSinksTable}
WHERE tenant_id = @TenantId AND drift_result_id = @DriftId
""";
var insertSinkSql = $"""
INSERT INTO {DriftedSinksTable} (
id,
tenant_id,
drift_result_id,
sink_node_id,
symbol,
sink_category,
direction,
cause_kind,
cause_description,
cause_symbol,
cause_file,
cause_line,
code_change_id,
compressed_path,
associated_vulns
) VALUES (
@Id,
@TenantId,
@DriftId,
@SinkNodeId,
@Symbol,
@SinkCategory,
@Direction,
@CauseKind,
@CauseDescription,
@CauseSymbol,
@CauseFile,
@CauseLine,
@CodeChangeId,
@CompressedPath::jsonb,
@AssociatedVulns::jsonb
)
ON CONFLICT (drift_result_id, sink_node_id) DO UPDATE SET
symbol = EXCLUDED.symbol,
sink_category = EXCLUDED.sink_category,
direction = EXCLUDED.direction,
cause_kind = EXCLUDED.cause_kind,
cause_description = EXCLUDED.cause_description,
cause_symbol = EXCLUDED.cause_symbol,
cause_file = EXCLUDED.cause_file,
cause_line = EXCLUDED.cause_line,
code_change_id = EXCLUDED.code_change_id,
compressed_path = EXCLUDED.compressed_path,
associated_vulns = EXCLUDED.associated_vulns
""";
await using var connection = await _dataSource.OpenConnectionAsync(TenantContext, ct).ConfigureAwait(false);
await using var transaction = await connection.BeginTransactionAsync(ct).ConfigureAwait(false);
try
{
var driftId = await connection.ExecuteScalarAsync<Guid>(new CommandDefinition(
insertResultSql,
new
{
result.Id,
TenantId,
BaseScanId = result.BaseScanId.Trim(),
HeadScanId = result.HeadScanId.Trim(),
Language = result.Language.Trim(),
NewlyReachableCount = result.NewlyReachable.Length,
NewlyUnreachableCount = result.NewlyUnreachable.Length,
DetectedAt = result.DetectedAt.UtcDateTime,
result.ResultDigest
},
transaction: transaction,
cancellationToken: ct))
.ConfigureAwait(false);
await connection.ExecuteAsync(new CommandDefinition(
deleteSinksSql,
new { TenantId, DriftId = driftId },
transaction: transaction,
cancellationToken: ct))
.ConfigureAwait(false);
var sinkRows = EnumerateSinkRows(driftId, result.NewlyReachable, DriftDirection.BecameReachable)
.Concat(EnumerateSinkRows(driftId, result.NewlyUnreachable, DriftDirection.BecameUnreachable))
.ToList();
if (sinkRows.Count > 0)
{
await connection.ExecuteAsync(new CommandDefinition(
insertSinkSql,
sinkRows,
transaction: transaction,
cancellationToken: ct))
.ConfigureAwait(false);
}
await transaction.CommitAsync(ct).ConfigureAwait(false);
_logger.LogDebug(
"Stored drift result drift={DriftId} base={BaseScanId} head={HeadScanId} lang={Language}",
driftId,
result.BaseScanId,
result.HeadScanId,
result.Language);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to store drift result base={BaseScanId} head={HeadScanId}", result.BaseScanId, result.HeadScanId);
await transaction.RollbackAsync(ct).ConfigureAwait(false);
throw;
}
}
public async Task<ReachabilityDriftResult?> TryGetLatestForHeadAsync(string headScanId, string language, CancellationToken ct = default)
{
ArgumentException.ThrowIfNullOrWhiteSpace(headScanId);
ArgumentException.ThrowIfNullOrWhiteSpace(language);
var sql = $"""
SELECT id, base_scan_id, head_scan_id, language, detected_at, result_digest
FROM {DriftResultsTable}
WHERE tenant_id = @TenantId AND head_scan_id = @HeadScanId AND language = @Language
ORDER BY detected_at DESC
LIMIT 1
""";
await using var connection = await _dataSource.OpenConnectionAsync(TenantContext, ct).ConfigureAwait(false);
var header = await connection.QuerySingleOrDefaultAsync<DriftHeaderRow>(new CommandDefinition(
sql,
new
{
TenantId,
HeadScanId = headScanId.Trim(),
Language = language.Trim()
},
cancellationToken: ct)).ConfigureAwait(false);
if (header is null)
{
return null;
}
return await LoadResultAsync(connection, header, ct).ConfigureAwait(false);
}
public async Task<ReachabilityDriftResult?> TryGetByIdAsync(Guid driftId, CancellationToken ct = default)
{
var sql = $"""
SELECT id, base_scan_id, head_scan_id, language, detected_at, result_digest
FROM {DriftResultsTable}
WHERE tenant_id = @TenantId AND id = @DriftId
LIMIT 1
""";
await using var connection = await _dataSource.OpenConnectionAsync(TenantContext, ct).ConfigureAwait(false);
var header = await connection.QuerySingleOrDefaultAsync<DriftHeaderRow>(new CommandDefinition(
sql,
new
{
TenantId,
DriftId = driftId
},
cancellationToken: ct)).ConfigureAwait(false);
if (header is null)
{
return null;
}
return await LoadResultAsync(connection, header, ct).ConfigureAwait(false);
}
public async Task<bool> ExistsAsync(Guid driftId, CancellationToken ct = default)
{
var sql = $"""
SELECT 1
FROM {DriftResultsTable}
WHERE tenant_id = @TenantId AND id = @DriftId
LIMIT 1
""";
await using var connection = await _dataSource.OpenConnectionAsync(TenantContext, ct).ConfigureAwait(false);
var result = await connection.ExecuteScalarAsync<int?>(new CommandDefinition(
sql,
new { TenantId, DriftId = driftId },
cancellationToken: ct)).ConfigureAwait(false);
return result is not null;
}
public async Task<IReadOnlyList<DriftedSink>> ListSinksAsync(
Guid driftId,
DriftDirection direction,
int offset,
int limit,
CancellationToken ct = default)
{
if (offset < 0)
{
throw new ArgumentOutOfRangeException(nameof(offset));
}
if (limit <= 0)
{
throw new ArgumentOutOfRangeException(nameof(limit));
}
var sql = $"""
SELECT
id,
sink_node_id,
symbol,
sink_category,
direction,
cause_kind,
cause_description,
cause_symbol,
cause_file,
cause_line,
code_change_id,
compressed_path,
associated_vulns
FROM {DriftedSinksTable}
WHERE tenant_id = @TenantId AND drift_result_id = @DriftId AND direction = @Direction
ORDER BY sink_node_id ASC
OFFSET @Offset LIMIT @Limit
""";
await using var connection = await _dataSource.OpenConnectionAsync(TenantContext, ct).ConfigureAwait(false);
var rows = await connection.QueryAsync<DriftSinkRow>(new CommandDefinition(
sql,
new
{
TenantId,
DriftId = driftId,
Direction = ToDbValue(direction),
Offset = offset,
Limit = limit
},
cancellationToken: ct)).ConfigureAwait(false);
return rows.Select(r => r.ToModel(direction)).ToList();
}
private static IEnumerable<object> EnumerateSinkRows(Guid driftId, ImmutableArray<DriftedSink> sinks, DriftDirection direction)
{
foreach (var sink in sinks)
{
var pathJson = JsonSerializer.Serialize(sink.Path, JsonOptions);
var vulnsJson = sink.AssociatedVulns.IsDefaultOrEmpty
? null
: JsonSerializer.Serialize(sink.AssociatedVulns, JsonOptions);
yield return new
{
sink.Id,
TenantId,
DriftId = driftId,
SinkNodeId = sink.SinkNodeId,
Symbol = sink.Symbol,
SinkCategory = ToDbValue(sink.SinkCategory),
Direction = ToDbValue(direction),
CauseKind = ToDbValue(sink.Cause.Kind),
CauseDescription = sink.Cause.Description,
CauseSymbol = sink.Cause.ChangedSymbol,
CauseFile = sink.Cause.ChangedFile,
CauseLine = sink.Cause.ChangedLine,
CodeChangeId = sink.Cause.CodeChangeId,
CompressedPath = pathJson,
AssociatedVulns = vulnsJson
};
}
}
private async Task<ReachabilityDriftResult> LoadResultAsync(
System.Data.IDbConnection connection,
DriftHeaderRow header,
CancellationToken ct)
{
var sinksSql = $"""
SELECT
id,
sink_node_id,
symbol,
sink_category,
direction,
cause_kind,
cause_description,
cause_symbol,
cause_file,
cause_line,
code_change_id,
compressed_path,
associated_vulns
FROM {DriftedSinksTable}
WHERE tenant_id = @TenantId AND drift_result_id = @DriftId
ORDER BY direction ASC, sink_node_id ASC
""";
var rows = (await connection.QueryAsync<DriftSinkRow>(new CommandDefinition(
sinksSql,
new { TenantId, DriftId = header.id },
cancellationToken: ct)).ConfigureAwait(false)).ToList();
var reachable = rows
.Where(r => string.Equals(r.direction, ToDbValue(DriftDirection.BecameReachable), StringComparison.Ordinal))
.Select(r => r.ToModel(DriftDirection.BecameReachable))
.OrderBy(s => s.SinkNodeId, StringComparer.Ordinal)
.ToImmutableArray();
var unreachable = rows
.Where(r => string.Equals(r.direction, ToDbValue(DriftDirection.BecameUnreachable), StringComparison.Ordinal))
.Select(r => r.ToModel(DriftDirection.BecameUnreachable))
.OrderBy(s => s.SinkNodeId, StringComparer.Ordinal)
.ToImmutableArray();
return new ReachabilityDriftResult
{
Id = header.id,
BaseScanId = header.base_scan_id,
HeadScanId = header.head_scan_id,
Language = header.language,
DetectedAt = header.detected_at,
NewlyReachable = reachable,
NewlyUnreachable = unreachable,
ResultDigest = header.result_digest
};
}
private static string ToDbValue(DriftDirection direction)
=> direction == DriftDirection.BecameReachable ? "became_reachable" : "became_unreachable";
private static string ToDbValue(DriftCauseKind kind)
{
return kind switch
{
DriftCauseKind.GuardRemoved => "guard_removed",
DriftCauseKind.GuardAdded => "guard_added",
DriftCauseKind.NewPublicRoute => "new_public_route",
DriftCauseKind.VisibilityEscalated => "visibility_escalated",
DriftCauseKind.DependencyUpgraded => "dependency_upgraded",
DriftCauseKind.SymbolRemoved => "symbol_removed",
_ => "unknown"
};
}
private static string ToDbValue(SinkCategory category)
{
return category switch
{
SinkCategory.CmdExec => "CMD_EXEC",
SinkCategory.UnsafeDeser => "UNSAFE_DESER",
SinkCategory.SqlRaw => "SQL_RAW",
SinkCategory.Ssrf => "SSRF",
SinkCategory.FileWrite => "FILE_WRITE",
SinkCategory.PathTraversal => "PATH_TRAVERSAL",
SinkCategory.TemplateInjection => "TEMPLATE_INJECTION",
SinkCategory.CryptoWeak => "CRYPTO_WEAK",
SinkCategory.AuthzBypass => "AUTHZ_BYPASS",
_ => category.ToString()
};
}
private static DriftCauseKind ParseCauseKind(string value)
{
return value.Trim().ToLowerInvariant() switch
{
"guard_removed" => DriftCauseKind.GuardRemoved,
"guard_added" => DriftCauseKind.GuardAdded,
"new_public_route" => DriftCauseKind.NewPublicRoute,
"visibility_escalated" => DriftCauseKind.VisibilityEscalated,
"dependency_upgraded" => DriftCauseKind.DependencyUpgraded,
"symbol_removed" => DriftCauseKind.SymbolRemoved,
_ => DriftCauseKind.Unknown
};
}
private static SinkCategory ParseSinkCategory(string value)
{
return value.Trim().ToUpperInvariant() switch
{
"CMD_EXEC" => SinkCategory.CmdExec,
"UNSAFE_DESER" => SinkCategory.UnsafeDeser,
"SQL_RAW" => SinkCategory.SqlRaw,
"SSRF" => SinkCategory.Ssrf,
"FILE_WRITE" => SinkCategory.FileWrite,
"PATH_TRAVERSAL" => SinkCategory.PathTraversal,
"TEMPLATE_INJECTION" => SinkCategory.TemplateInjection,
"CRYPTO_WEAK" => SinkCategory.CryptoWeak,
"AUTHZ_BYPASS" => SinkCategory.AuthzBypass,
_ => SinkCategory.CmdExec
};
}
private sealed class DriftHeaderRow
{
public Guid id { get; init; }
public string base_scan_id { get; init; } = string.Empty;
public string head_scan_id { get; init; } = string.Empty;
public string language { get; init; } = string.Empty;
public DateTimeOffset detected_at { get; init; }
public string result_digest { get; init; } = string.Empty;
}
private sealed class DriftSinkRow
{
public Guid id { get; init; }
public string sink_node_id { get; init; } = string.Empty;
public string symbol { get; init; } = string.Empty;
public string sink_category { get; init; } = string.Empty;
public string direction { get; init; } = string.Empty;
public string cause_kind { get; init; } = string.Empty;
public string cause_description { get; init; } = string.Empty;
public string? cause_symbol { get; init; }
public string? cause_file { get; init; }
public int? cause_line { get; init; }
public Guid? code_change_id { get; init; }
public string compressed_path { get; init; } = "{}";
public string? associated_vulns { get; init; }
public DriftedSink ToModel(DriftDirection direction)
{
var path = JsonSerializer.Deserialize<CompressedPath>(compressed_path, JsonOptions)
?? new CompressedPath
{
Entrypoint = new PathNode { NodeId = string.Empty, Symbol = string.Empty },
Sink = new PathNode { NodeId = string.Empty, Symbol = string.Empty },
IntermediateCount = 0,
KeyNodes = ImmutableArray<PathNode>.Empty
};
var vulns = string.IsNullOrWhiteSpace(associated_vulns)
? ImmutableArray<AssociatedVuln>.Empty
: (JsonSerializer.Deserialize<AssociatedVuln[]>(associated_vulns!, JsonOptions) ?? [])
.ToImmutableArray();
return new DriftedSink
{
Id = id,
SinkNodeId = sink_node_id,
Symbol = symbol,
SinkCategory = ParseSinkCategory(sink_category),
Direction = direction,
Cause = new DriftCause
{
Kind = ParseCauseKind(cause_kind),
Description = cause_description,
ChangedSymbol = cause_symbol,
ChangedFile = cause_file,
ChangedLine = cause_line,
CodeChangeId = code_change_id
},
Path = path,
AssociatedVulns = vulns
};
}
}
}

View File

@@ -8,6 +8,9 @@ namespace StellaOps.Scanner.Storage.Postgres;
public sealed class PostgresReachabilityResultRepository : IReachabilityResultRepository
{
private const string TenantContext = "00000000-0000-0000-0000-000000000001";
private static readonly Guid TenantId = Guid.Parse(TenantContext);
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
{
WriteIndented = false
@@ -16,6 +19,9 @@ public sealed class PostgresReachabilityResultRepository : IReachabilityResultRe
private readonly ScannerDataSource _dataSource;
private readonly ILogger<PostgresReachabilityResultRepository> _logger;
private string SchemaName => _dataSource.SchemaName ?? ScannerDataSource.DefaultSchema;
private string ReachabilityResultsTable => $"{SchemaName}.reachability_results";
public PostgresReachabilityResultRepository(
ScannerDataSource dataSource,
ILogger<PostgresReachabilityResultRepository> logger)
@@ -29,8 +35,8 @@ public sealed class PostgresReachabilityResultRepository : IReachabilityResultRe
ArgumentNullException.ThrowIfNull(result);
var trimmed = result.Trimmed();
const string sql = """
INSERT INTO scanner.reachability_results (
var sql = $"""
INSERT INTO {ReachabilityResultsTable} (
tenant_id,
scan_id,
language,
@@ -59,12 +65,11 @@ public sealed class PostgresReachabilityResultRepository : IReachabilityResultRe
""";
var json = JsonSerializer.Serialize(trimmed, JsonOptions);
var tenantId = GetCurrentTenantId();
await using var connection = await _dataSource.OpenConnectionAsync(ct).ConfigureAwait(false);
await using var connection = await _dataSource.OpenConnectionAsync(TenantContext, ct).ConfigureAwait(false);
await connection.ExecuteAsync(new CommandDefinition(sql, new
{
TenantId = tenantId,
TenantId = TenantId,
ScanId = trimmed.ScanId,
Language = trimmed.Language,
GraphDigest = trimmed.GraphDigest,
@@ -87,18 +92,18 @@ public sealed class PostgresReachabilityResultRepository : IReachabilityResultRe
ArgumentException.ThrowIfNullOrWhiteSpace(scanId);
ArgumentException.ThrowIfNullOrWhiteSpace(language);
const string sql = """
var sql = $"""
SELECT result_json
FROM scanner.reachability_results
FROM {ReachabilityResultsTable}
WHERE tenant_id = @TenantId AND scan_id = @ScanId AND language = @Language
ORDER BY computed_at DESC
LIMIT 1
""";
await using var connection = await _dataSource.OpenConnectionAsync(ct).ConfigureAwait(false);
await using var connection = await _dataSource.OpenConnectionAsync(TenantContext, ct).ConfigureAwait(false);
var json = await connection.ExecuteScalarAsync<string?>(new CommandDefinition(sql, new
{
TenantId = GetCurrentTenantId(),
TenantId = TenantId,
ScanId = scanId,
Language = language
}, cancellationToken: ct)).ConfigureAwait(false);
@@ -110,10 +115,5 @@ public sealed class PostgresReachabilityResultRepository : IReachabilityResultRe
return JsonSerializer.Deserialize<ReachabilityAnalysisResult>(json, JsonOptions);
}
private static Guid GetCurrentTenantId()
{
return Guid.Parse("00000000-0000-0000-0000-000000000001");
}
}

View File

@@ -0,0 +1,9 @@
using StellaOps.Scanner.ReachabilityDrift;
namespace StellaOps.Scanner.Storage.Repositories;
public interface ICodeChangeRepository
{
Task StoreAsync(IReadOnlyList<CodeChangeFact> changes, CancellationToken ct = default);
}

View File

@@ -0,0 +1,21 @@
using StellaOps.Scanner.ReachabilityDrift;
namespace StellaOps.Scanner.Storage.Repositories;
public interface IReachabilityDriftResultRepository
{
Task StoreAsync(ReachabilityDriftResult result, CancellationToken ct = default);
Task<ReachabilityDriftResult?> TryGetLatestForHeadAsync(string headScanId, string language, CancellationToken ct = default);
Task<ReachabilityDriftResult?> TryGetByIdAsync(Guid driftId, CancellationToken ct = default);
Task<bool> ExistsAsync(Guid driftId, CancellationToken ct = default);
Task<IReadOnlyList<DriftedSink>> ListSinksAsync(
Guid driftId,
DriftDirection direction,
int offset,
int limit,
CancellationToken ct = default);
}

View File

@@ -24,6 +24,7 @@
<ProjectReference Include="..\\StellaOps.Scanner.CallGraph\\StellaOps.Scanner.CallGraph.csproj" />
<ProjectReference Include="..\\StellaOps.Scanner.Core\\StellaOps.Scanner.Core.csproj" />
<ProjectReference Include="..\\StellaOps.Scanner.ProofSpine\\StellaOps.Scanner.ProofSpine.csproj" />
<ProjectReference Include="..\\StellaOps.Scanner.ReachabilityDrift\\StellaOps.Scanner.ReachabilityDrift.csproj" />
<ProjectReference Include="..\\StellaOps.Scanner.SmartDiff\\StellaOps.Scanner.SmartDiff.csproj" />
<ProjectReference Include="..\\..\\..\\__Libraries\\StellaOps.Infrastructure.Postgres\\StellaOps.Infrastructure.Postgres.csproj" />
</ItemGroup>

View File

@@ -3,3 +3,4 @@
| Task ID | Sprint | Status | Notes |
| --- | --- | --- | --- |
| `PROOFSPINE-3100-DB` | `docs/implplan/SPRINT_3100_0001_0001_proof_spine_system.md` | DOING | Add Postgres migrations and repository for ProofSpine persistence (`proof_spines`, `proof_segments`, `proof_spine_history`). |
| `SCAN-API-3103-004` | `docs/implplan/SPRINT_3103_0001_0001_scanner_api_ingestion_completion.md` | DOING | Fix scanner storage connection/schema issues surfaced by Scanner WebService ingestion tests. |