update evidence bundle to include new evidence types and implement ProofSpine integration
Some checks failed
Lighthouse CI / Lighthouse Audit (push) Has been cancelled
Lighthouse CI / Axe Accessibility Audit (push) Has been cancelled
Manifest Integrity / Validate Pack Fixtures (push) Has been cancelled
Manifest Integrity / Validate Schema Integrity (push) Has been cancelled
Manifest Integrity / Validate Contract Documents (push) Has been cancelled
Manifest Integrity / Audit SHA256SUMS Files (push) Has been cancelled
Manifest Integrity / Verify Merkle Roots (push) Has been cancelled
api-governance / spectral-lint (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
sm-remote-ci / build-and-test (push) Has been cancelled
Notify Smoke Test / Notification Smoke Test (push) Has been cancelled
oas-ci / oas-validate (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Signals Reachability Scoring & Events / reachability-smoke (push) Has been cancelled
Signals Reachability Scoring & Events / sign-and-upload (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Notify Smoke Test / Notify Unit Tests (push) Has been cancelled
Notify Smoke Test / Notifier Service Tests (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-12-15 09:15:30 +02:00
parent 8c8f0c632d
commit 505fe7a885
49 changed files with 4756 additions and 551 deletions

View File

@@ -0,0 +1,115 @@
-- ============================================================
-- UNKNOWNS SCORING SCHEMA EXTENSION
-- Sprint: SPRINT_1102_0001_0001
-- Advisory Reference: 14-Dec-2025 - Triage and Unknowns Technical Reference
-- ============================================================
-- Ensure schema exists
CREATE SCHEMA IF NOT EXISTS signals;
-- Extend unknowns table with scoring columns
ALTER TABLE signals.unknowns
-- Scoring factors (range: 0.0 - 1.0)
ADD COLUMN IF NOT EXISTS popularity_p FLOAT DEFAULT 0.0
CONSTRAINT chk_popularity_range CHECK (popularity_p >= 0.0 AND popularity_p <= 1.0),
ADD COLUMN IF NOT EXISTS deployment_count INT DEFAULT 0,
ADD COLUMN IF NOT EXISTS exploit_potential_e FLOAT DEFAULT 0.0
CONSTRAINT chk_exploit_range CHECK (exploit_potential_e >= 0.0 AND exploit_potential_e <= 1.0),
ADD COLUMN IF NOT EXISTS uncertainty_u FLOAT DEFAULT 0.0
CONSTRAINT chk_uncertainty_range CHECK (uncertainty_u >= 0.0 AND uncertainty_u <= 1.0),
ADD COLUMN IF NOT EXISTS centrality_c FLOAT DEFAULT 0.0
CONSTRAINT chk_centrality_range CHECK (centrality_c >= 0.0 AND centrality_c <= 1.0),
ADD COLUMN IF NOT EXISTS degree_centrality INT DEFAULT 0,
ADD COLUMN IF NOT EXISTS betweenness_centrality FLOAT DEFAULT 0.0,
ADD COLUMN IF NOT EXISTS staleness_s FLOAT DEFAULT 0.0
CONSTRAINT chk_staleness_range CHECK (staleness_s >= 0.0 AND staleness_s <= 1.0),
ADD COLUMN IF NOT EXISTS days_since_analysis INT DEFAULT 0,
-- Composite score and band
ADD COLUMN IF NOT EXISTS score FLOAT DEFAULT 0.0
CONSTRAINT chk_score_range CHECK (score >= 0.0 AND score <= 1.0),
ADD COLUMN IF NOT EXISTS band TEXT DEFAULT 'cold'
CONSTRAINT chk_band_value CHECK (band IN ('hot', 'warm', 'cold')),
-- Uncertainty flags (JSONB for extensibility)
ADD COLUMN IF NOT EXISTS unknown_flags JSONB DEFAULT '{}'::jsonb,
-- Normalization trace for debugging/audit
ADD COLUMN IF NOT EXISTS normalization_trace JSONB,
-- Rescan scheduling
ADD COLUMN IF NOT EXISTS rescan_attempts INT DEFAULT 0,
ADD COLUMN IF NOT EXISTS last_rescan_result TEXT,
ADD COLUMN IF NOT EXISTS next_scheduled_rescan TIMESTAMPTZ,
ADD COLUMN IF NOT EXISTS last_analyzed_at TIMESTAMPTZ,
-- Graph slice reference
ADD COLUMN IF NOT EXISTS graph_slice_hash BYTEA,
ADD COLUMN IF NOT EXISTS evidence_set_hash BYTEA,
ADD COLUMN IF NOT EXISTS callgraph_attempt_hash BYTEA,
-- Version tracking
ADD COLUMN IF NOT EXISTS purl_version TEXT,
-- Timestamps
ADD COLUMN IF NOT EXISTS updated_at TIMESTAMPTZ DEFAULT NOW();
-- Create indexes for efficient querying
CREATE INDEX IF NOT EXISTS idx_unknowns_band
ON signals.unknowns(band);
CREATE INDEX IF NOT EXISTS idx_unknowns_score_desc
ON signals.unknowns(score DESC);
CREATE INDEX IF NOT EXISTS idx_unknowns_band_score
ON signals.unknowns(band, score DESC);
CREATE INDEX IF NOT EXISTS idx_unknowns_next_rescan
ON signals.unknowns(next_scheduled_rescan)
WHERE next_scheduled_rescan IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_unknowns_hot_band
ON signals.unknowns(score DESC)
WHERE band = 'hot';
CREATE INDEX IF NOT EXISTS idx_unknowns_purl
ON signals.unknowns(purl);
-- GIN index for JSONB flags queries
CREATE INDEX IF NOT EXISTS idx_unknowns_flags_gin
ON signals.unknowns USING GIN (unknown_flags);
-- ============================================================
-- COMMENTS
-- ============================================================
COMMENT ON COLUMN signals.unknowns.popularity_p IS
'Deployment impact score (P). Formula: min(1, log10(1 + deployments)/log10(1 + 100))';
COMMENT ON COLUMN signals.unknowns.exploit_potential_e IS
'Exploit consequence potential (E). Based on CVE severity, KEV status.';
COMMENT ON COLUMN signals.unknowns.uncertainty_u IS
'Uncertainty density (U). Aggregated from flags: no_provenance(0.30), version_range(0.25), conflicting_feeds(0.20), missing_vector(0.15), unreachable_source(0.10)';
COMMENT ON COLUMN signals.unknowns.centrality_c IS
'Graph centrality (C). Normalized betweenness centrality.';
COMMENT ON COLUMN signals.unknowns.staleness_s IS
'Evidence staleness (S). Formula: min(1, age_days / 14)';
COMMENT ON COLUMN signals.unknowns.score IS
'Composite score: clamp01(wP*P + wE*E + wU*U + wC*C + wS*S). Default weights: wP=0.25, wE=0.25, wU=0.25, wC=0.15, wS=0.10';
COMMENT ON COLUMN signals.unknowns.band IS
'Triage band. HOT (>=0.70): immediate rescan. WARM (0.40-0.69): scheduled 12-72h. COLD (<0.40): weekly batch.';
COMMENT ON COLUMN signals.unknowns.unknown_flags IS
'JSONB flags: {no_provenance_anchor, version_range, conflicting_feeds, missing_vector, unreachable_source_advisory, dynamic_call_target, external_assembly}';
COMMENT ON COLUMN signals.unknowns.normalization_trace IS
'JSONB trace of scoring computation for audit/debugging. Includes raw values, normalized values, weights, and formula.';

View File

@@ -1,5 +1,7 @@
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Npgsql;
using NpgsqlTypes;
using StellaOps.Infrastructure.Postgres.Repositories;
using StellaOps.Signals.Models;
using StellaOps.Signals.Persistence;
@@ -8,9 +10,16 @@ namespace StellaOps.Signals.Storage.Postgres.Repositories;
/// <summary>
/// PostgreSQL implementation of <see cref="IUnknownsRepository"/>.
/// Supports full scoring schema per Sprint 1102.
/// </summary>
public sealed class PostgresUnknownsRepository : RepositoryBase<SignalsDataSource>, IUnknownsRepository
{
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
WriteIndented = false
};
private bool _tableInitialized;
public PostgresUnknownsRepository(SignalsDataSource dataSource, ILogger<PostgresUnknownsRepository> logger)
@@ -40,10 +49,35 @@ public sealed class PostgresUnknownsRepository : RepositoryBase<SignalsDataSourc
await deleteCommand.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
// Insert new items
// Insert new items with all scoring columns
const string insertSql = @"
INSERT INTO signals.unknowns (id, subject_key, callgraph_id, symbol_id, code_id, purl, edge_from, edge_to, reason, created_at)
VALUES (@id, @subject_key, @callgraph_id, @symbol_id, @code_id, @purl, @edge_from, @edge_to, @reason, @created_at)";
INSERT INTO signals.unknowns (
id, subject_key, callgraph_id, symbol_id, code_id, purl, purl_version,
edge_from, edge_to, reason,
popularity_p, deployment_count,
exploit_potential_e,
uncertainty_u,
centrality_c, degree_centrality, betweenness_centrality,
staleness_s, days_since_analysis,
score, band,
unknown_flags, normalization_trace,
rescan_attempts, last_rescan_result, next_scheduled_rescan, last_analyzed_at,
graph_slice_hash, evidence_set_hash, callgraph_attempt_hash,
created_at, updated_at
) VALUES (
@id, @subject_key, @callgraph_id, @symbol_id, @code_id, @purl, @purl_version,
@edge_from, @edge_to, @reason,
@popularity_p, @deployment_count,
@exploit_potential_e,
@uncertainty_u,
@centrality_c, @degree_centrality, @betweenness_centrality,
@staleness_s, @days_since_analysis,
@score, @band,
@unknown_flags, @normalization_trace,
@rescan_attempts, @last_rescan_result, @next_scheduled_rescan, @last_analyzed_at,
@graph_slice_hash, @evidence_set_hash, @callgraph_attempt_hash,
@created_at, @updated_at
)";
foreach (var item in items)
{
@@ -55,16 +89,7 @@ public sealed class PostgresUnknownsRepository : RepositoryBase<SignalsDataSourc
var itemId = string.IsNullOrWhiteSpace(item.Id) ? Guid.NewGuid().ToString("N") : item.Id.Trim();
await using var insertCommand = CreateCommand(insertSql, connection, transaction);
AddParameter(insertCommand, "@id", itemId);
AddParameter(insertCommand, "@subject_key", normalizedSubjectKey);
AddParameter(insertCommand, "@callgraph_id", (object?)item.CallgraphId ?? DBNull.Value);
AddParameter(insertCommand, "@symbol_id", (object?)item.SymbolId ?? DBNull.Value);
AddParameter(insertCommand, "@code_id", (object?)item.CodeId ?? DBNull.Value);
AddParameter(insertCommand, "@purl", (object?)item.Purl ?? DBNull.Value);
AddParameter(insertCommand, "@edge_from", (object?)item.EdgeFrom ?? DBNull.Value);
AddParameter(insertCommand, "@edge_to", (object?)item.EdgeTo ?? DBNull.Value);
AddParameter(insertCommand, "@reason", (object?)item.Reason ?? DBNull.Value);
AddParameter(insertCommand, "@created_at", item.CreatedAt);
AddInsertParameters(insertCommand, itemId, normalizedSubjectKey, item);
await insertCommand.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
@@ -84,11 +109,10 @@ public sealed class PostgresUnknownsRepository : RepositoryBase<SignalsDataSourc
await EnsureTableAsync(cancellationToken).ConfigureAwait(false);
const string sql = @"
SELECT id, subject_key, callgraph_id, symbol_id, code_id, purl, edge_from, edge_to, reason, created_at
const string sql = SelectAllColumns + @"
FROM signals.unknowns
WHERE subject_key = @subject_key
ORDER BY created_at DESC";
ORDER BY score DESC, created_at DESC";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
@@ -124,20 +148,297 @@ public sealed class PostgresUnknownsRepository : RepositoryBase<SignalsDataSourc
return result is long count ? (int)count : 0;
}
private static UnknownSymbolDocument MapUnknownSymbol(NpgsqlDataReader reader) => new()
public async Task BulkUpdateAsync(IEnumerable<UnknownSymbolDocument> items, CancellationToken cancellationToken)
{
Id = reader.GetString(0),
SubjectKey = reader.GetString(1),
CallgraphId = reader.IsDBNull(2) ? null : reader.GetString(2),
SymbolId = reader.IsDBNull(3) ? null : reader.GetString(3),
CodeId = reader.IsDBNull(4) ? null : reader.GetString(4),
Purl = reader.IsDBNull(5) ? null : reader.GetString(5),
EdgeFrom = reader.IsDBNull(6) ? null : reader.GetString(6),
EdgeTo = reader.IsDBNull(7) ? null : reader.GetString(7),
Reason = reader.IsDBNull(8) ? null : reader.GetString(8),
CreatedAt = reader.GetFieldValue<DateTimeOffset>(9)
ArgumentNullException.ThrowIfNull(items);
await EnsureTableAsync(cancellationToken).ConfigureAwait(false);
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var transaction = await connection.BeginTransactionAsync(cancellationToken).ConfigureAwait(false);
try
{
const string updateSql = @"
UPDATE signals.unknowns SET
popularity_p = @popularity_p,
deployment_count = @deployment_count,
exploit_potential_e = @exploit_potential_e,
uncertainty_u = @uncertainty_u,
centrality_c = @centrality_c,
degree_centrality = @degree_centrality,
betweenness_centrality = @betweenness_centrality,
staleness_s = @staleness_s,
days_since_analysis = @days_since_analysis,
score = @score,
band = @band,
unknown_flags = @unknown_flags,
normalization_trace = @normalization_trace,
rescan_attempts = @rescan_attempts,
last_rescan_result = @last_rescan_result,
next_scheduled_rescan = @next_scheduled_rescan,
last_analyzed_at = @last_analyzed_at,
graph_slice_hash = @graph_slice_hash,
evidence_set_hash = @evidence_set_hash,
callgraph_attempt_hash = @callgraph_attempt_hash,
updated_at = @updated_at
WHERE subject_key = @subject_key AND id = @id";
foreach (var item in items)
{
if (item is null || string.IsNullOrWhiteSpace(item.Id) || string.IsNullOrWhiteSpace(item.SubjectKey))
{
continue;
}
await using var updateCommand = CreateCommand(updateSql, connection, transaction);
AddUpdateParameters(updateCommand, item);
await updateCommand.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
await transaction.CommitAsync(cancellationToken).ConfigureAwait(false);
}
catch
{
await transaction.RollbackAsync(cancellationToken).ConfigureAwait(false);
throw;
}
}
public async Task<IReadOnlyList<string>> GetAllSubjectKeysAsync(CancellationToken cancellationToken)
{
await EnsureTableAsync(cancellationToken).ConfigureAwait(false);
const string sql = @"
SELECT DISTINCT subject_key
FROM signals.unknowns
ORDER BY subject_key";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var results = new List<string>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
results.Add(reader.GetString(0));
}
return results;
}
public async Task<IReadOnlyList<UnknownSymbolDocument>> GetDueForRescanAsync(
UnknownsBand band,
int limit,
CancellationToken cancellationToken)
{
await EnsureTableAsync(cancellationToken).ConfigureAwait(false);
var bandValue = band.ToString().ToLowerInvariant();
const string sql = SelectAllColumns + @"
FROM signals.unknowns
WHERE band = @band
AND (next_scheduled_rescan IS NULL OR next_scheduled_rescan <= NOW())
ORDER BY score DESC
LIMIT @limit";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "@band", bandValue);
AddParameter(command, "@limit", limit);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var results = new List<UnknownSymbolDocument>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
results.Add(MapUnknownSymbol(reader));
}
return results;
}
private const string SelectAllColumns = @"
SELECT id, subject_key, callgraph_id, symbol_id, code_id, purl, purl_version,
edge_from, edge_to, reason,
popularity_p, deployment_count,
exploit_potential_e,
uncertainty_u,
centrality_c, degree_centrality, betweenness_centrality,
staleness_s, days_since_analysis,
score, band,
unknown_flags, normalization_trace,
rescan_attempts, last_rescan_result, next_scheduled_rescan, last_analyzed_at,
graph_slice_hash, evidence_set_hash, callgraph_attempt_hash,
created_at, updated_at";
private void AddInsertParameters(NpgsqlCommand command, string itemId, string subjectKey, UnknownSymbolDocument item)
{
AddParameter(command, "@id", itemId);
AddParameter(command, "@subject_key", subjectKey);
AddParameter(command, "@callgraph_id", (object?)item.CallgraphId ?? DBNull.Value);
AddParameter(command, "@symbol_id", (object?)item.SymbolId ?? DBNull.Value);
AddParameter(command, "@code_id", (object?)item.CodeId ?? DBNull.Value);
AddParameter(command, "@purl", (object?)item.Purl ?? DBNull.Value);
AddParameter(command, "@purl_version", (object?)item.PurlVersion ?? DBNull.Value);
AddParameter(command, "@edge_from", (object?)item.EdgeFrom ?? DBNull.Value);
AddParameter(command, "@edge_to", (object?)item.EdgeTo ?? DBNull.Value);
AddParameter(command, "@reason", (object?)item.Reason ?? DBNull.Value);
// Scoring factors
AddParameter(command, "@popularity_p", item.PopularityScore);
AddParameter(command, "@deployment_count", item.DeploymentCount);
AddParameter(command, "@exploit_potential_e", item.ExploitPotentialScore);
AddParameter(command, "@uncertainty_u", item.UncertaintyScore);
AddParameter(command, "@centrality_c", item.CentralityScore);
AddParameter(command, "@degree_centrality", item.DegreeCentrality);
AddParameter(command, "@betweenness_centrality", item.BetweennessCentrality);
AddParameter(command, "@staleness_s", item.StalenessScore);
AddParameter(command, "@days_since_analysis", item.DaysSinceLastAnalysis);
// Composite
AddParameter(command, "@score", item.Score);
AddParameter(command, "@band", item.Band.ToString().ToLowerInvariant());
// JSONB columns
AddJsonParameter(command, "@unknown_flags", item.Flags);
AddJsonParameter(command, "@normalization_trace", item.NormalizationTrace);
// Rescan scheduling
AddParameter(command, "@rescan_attempts", item.RescanAttempts);
AddParameter(command, "@last_rescan_result", (object?)item.LastRescanResult ?? DBNull.Value);
AddParameter(command, "@next_scheduled_rescan", item.NextScheduledRescan.HasValue ? item.NextScheduledRescan.Value : DBNull.Value);
AddParameter(command, "@last_analyzed_at", item.LastAnalyzedAt.HasValue ? item.LastAnalyzedAt.Value : DBNull.Value);
// Hashes
AddParameter(command, "@graph_slice_hash", item.GraphSliceHash != null ? Convert.FromHexString(item.GraphSliceHash) : DBNull.Value);
AddParameter(command, "@evidence_set_hash", item.EvidenceSetHash != null ? Convert.FromHexString(item.EvidenceSetHash) : DBNull.Value);
AddParameter(command, "@callgraph_attempt_hash", item.CallgraphAttemptHash != null ? Convert.FromHexString(item.CallgraphAttemptHash) : DBNull.Value);
// Timestamps
AddParameter(command, "@created_at", item.CreatedAt == default ? DateTimeOffset.UtcNow : item.CreatedAt);
AddParameter(command, "@updated_at", DateTimeOffset.UtcNow);
}
private void AddUpdateParameters(NpgsqlCommand command, UnknownSymbolDocument item)
{
AddParameter(command, "@id", item.Id);
AddParameter(command, "@subject_key", item.SubjectKey);
// Scoring factors
AddParameter(command, "@popularity_p", item.PopularityScore);
AddParameter(command, "@deployment_count", item.DeploymentCount);
AddParameter(command, "@exploit_potential_e", item.ExploitPotentialScore);
AddParameter(command, "@uncertainty_u", item.UncertaintyScore);
AddParameter(command, "@centrality_c", item.CentralityScore);
AddParameter(command, "@degree_centrality", item.DegreeCentrality);
AddParameter(command, "@betweenness_centrality", item.BetweennessCentrality);
AddParameter(command, "@staleness_s", item.StalenessScore);
AddParameter(command, "@days_since_analysis", item.DaysSinceLastAnalysis);
// Composite
AddParameter(command, "@score", item.Score);
AddParameter(command, "@band", item.Band.ToString().ToLowerInvariant());
// JSONB columns
AddJsonParameter(command, "@unknown_flags", item.Flags);
AddJsonParameter(command, "@normalization_trace", item.NormalizationTrace);
// Rescan scheduling
AddParameter(command, "@rescan_attempts", item.RescanAttempts);
AddParameter(command, "@last_rescan_result", (object?)item.LastRescanResult ?? DBNull.Value);
AddParameter(command, "@next_scheduled_rescan", item.NextScheduledRescan.HasValue ? item.NextScheduledRescan.Value : DBNull.Value);
AddParameter(command, "@last_analyzed_at", item.LastAnalyzedAt.HasValue ? item.LastAnalyzedAt.Value : DBNull.Value);
// Hashes
AddParameter(command, "@graph_slice_hash", item.GraphSliceHash != null ? Convert.FromHexString(item.GraphSliceHash) : DBNull.Value);
AddParameter(command, "@evidence_set_hash", item.EvidenceSetHash != null ? Convert.FromHexString(item.EvidenceSetHash) : DBNull.Value);
AddParameter(command, "@callgraph_attempt_hash", item.CallgraphAttemptHash != null ? Convert.FromHexString(item.CallgraphAttemptHash) : DBNull.Value);
// Timestamps
AddParameter(command, "@updated_at", DateTimeOffset.UtcNow);
}
private static void AddJsonParameter<T>(NpgsqlCommand command, string name, T? value) where T : class
{
var param = command.Parameters.Add(name, NpgsqlDbType.Jsonb);
param.Value = value != null ? JsonSerializer.Serialize(value, JsonOptions) : DBNull.Value;
}
private static UnknownSymbolDocument MapUnknownSymbol(NpgsqlDataReader reader)
{
var doc = new UnknownSymbolDocument
{
Id = reader.GetString(0),
SubjectKey = reader.GetString(1),
CallgraphId = reader.IsDBNull(2) ? null : reader.GetString(2),
SymbolId = reader.IsDBNull(3) ? null : reader.GetString(3),
CodeId = reader.IsDBNull(4) ? null : reader.GetString(4),
Purl = reader.IsDBNull(5) ? null : reader.GetString(5),
PurlVersion = reader.IsDBNull(6) ? null : reader.GetString(6),
EdgeFrom = reader.IsDBNull(7) ? null : reader.GetString(7),
EdgeTo = reader.IsDBNull(8) ? null : reader.GetString(8),
Reason = reader.IsDBNull(9) ? null : reader.GetString(9),
// Scoring factors
PopularityScore = reader.IsDBNull(10) ? 0.0 : reader.GetDouble(10),
DeploymentCount = reader.IsDBNull(11) ? 0 : reader.GetInt32(11),
ExploitPotentialScore = reader.IsDBNull(12) ? 0.0 : reader.GetDouble(12),
UncertaintyScore = reader.IsDBNull(13) ? 0.0 : reader.GetDouble(13),
CentralityScore = reader.IsDBNull(14) ? 0.0 : reader.GetDouble(14),
DegreeCentrality = reader.IsDBNull(15) ? 0 : reader.GetInt32(15),
BetweennessCentrality = reader.IsDBNull(16) ? 0.0 : reader.GetDouble(16),
StalenessScore = reader.IsDBNull(17) ? 0.0 : reader.GetDouble(17),
DaysSinceLastAnalysis = reader.IsDBNull(18) ? 0 : reader.GetInt32(18),
// Composite
Score = reader.IsDBNull(19) ? 0.0 : reader.GetDouble(19),
Band = ParseBand(reader.IsDBNull(20) ? "cold" : reader.GetString(20)),
// JSONB columns
Flags = ParseJson<UnknownFlags>(reader, 21) ?? new UnknownFlags(),
NormalizationTrace = ParseJson<UnknownsNormalizationTrace>(reader, 22),
// Rescan scheduling
RescanAttempts = reader.IsDBNull(23) ? 0 : reader.GetInt32(23),
LastRescanResult = reader.IsDBNull(24) ? null : reader.GetString(24),
NextScheduledRescan = reader.IsDBNull(25) ? null : reader.GetFieldValue<DateTimeOffset>(25),
LastAnalyzedAt = reader.IsDBNull(26) ? null : reader.GetFieldValue<DateTimeOffset>(26),
// Hashes
GraphSliceHash = reader.IsDBNull(27) ? null : Convert.ToHexString(reader.GetFieldValue<byte[]>(27)).ToLowerInvariant(),
EvidenceSetHash = reader.IsDBNull(28) ? null : Convert.ToHexString(reader.GetFieldValue<byte[]>(28)).ToLowerInvariant(),
CallgraphAttemptHash = reader.IsDBNull(29) ? null : Convert.ToHexString(reader.GetFieldValue<byte[]>(29)).ToLowerInvariant(),
// Timestamps
CreatedAt = reader.IsDBNull(30) ? DateTimeOffset.UtcNow : reader.GetFieldValue<DateTimeOffset>(30),
UpdatedAt = reader.IsDBNull(31) ? DateTimeOffset.UtcNow : reader.GetFieldValue<DateTimeOffset>(31)
};
return doc;
}
private static UnknownsBand ParseBand(string value) => value.ToLowerInvariant() switch
{
"hot" => UnknownsBand.Hot,
"warm" => UnknownsBand.Warm,
_ => UnknownsBand.Cold
};
private static T? ParseJson<T>(NpgsqlDataReader reader, int ordinal) where T : class
{
if (reader.IsDBNull(ordinal))
{
return null;
}
var json = reader.GetString(ordinal);
return JsonSerializer.Deserialize<T>(json, JsonOptions);
}
private static NpgsqlCommand CreateCommand(string sql, NpgsqlConnection connection, NpgsqlTransaction transaction)
{
var command = new NpgsqlCommand(sql, connection, transaction);
@@ -151,6 +452,7 @@ public sealed class PostgresUnknownsRepository : RepositoryBase<SignalsDataSourc
return;
}
// Create schema and base table
const string ddl = @"
CREATE SCHEMA IF NOT EXISTS signals;
@@ -161,16 +463,58 @@ public sealed class PostgresUnknownsRepository : RepositoryBase<SignalsDataSourc
symbol_id TEXT,
code_id TEXT,
purl TEXT,
purl_version TEXT,
edge_from TEXT,
edge_to TEXT,
reason TEXT,
created_at TIMESTAMPTZ NOT NULL,
PRIMARY KEY (subject_key, id)
-- Scoring factors
popularity_p FLOAT DEFAULT 0.0,
deployment_count INT DEFAULT 0,
exploit_potential_e FLOAT DEFAULT 0.0,
uncertainty_u FLOAT DEFAULT 0.0,
centrality_c FLOAT DEFAULT 0.0,
degree_centrality INT DEFAULT 0,
betweenness_centrality FLOAT DEFAULT 0.0,
staleness_s FLOAT DEFAULT 0.0,
days_since_analysis INT DEFAULT 0,
-- Composite
score FLOAT DEFAULT 0.0,
band TEXT DEFAULT 'cold',
-- JSONB
unknown_flags JSONB DEFAULT '{}'::jsonb,
normalization_trace JSONB,
-- Rescan
rescan_attempts INT DEFAULT 0,
last_rescan_result TEXT,
next_scheduled_rescan TIMESTAMPTZ,
last_analyzed_at TIMESTAMPTZ,
-- Hashes
graph_slice_hash BYTEA,
evidence_set_hash BYTEA,
callgraph_attempt_hash BYTEA,
-- Timestamps
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (subject_key, id),
CONSTRAINT chk_popularity_range CHECK (popularity_p >= 0.0 AND popularity_p <= 1.0),
CONSTRAINT chk_exploit_range CHECK (exploit_potential_e >= 0.0 AND exploit_potential_e <= 1.0),
CONSTRAINT chk_uncertainty_range CHECK (uncertainty_u >= 0.0 AND uncertainty_u <= 1.0),
CONSTRAINT chk_centrality_range CHECK (centrality_c >= 0.0 AND centrality_c <= 1.0),
CONSTRAINT chk_staleness_range CHECK (staleness_s >= 0.0 AND staleness_s <= 1.0),
CONSTRAINT chk_score_range CHECK (score >= 0.0 AND score <= 1.0),
CONSTRAINT chk_band_value CHECK (band IN ('hot', 'warm', 'cold'))
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_unknowns_subject_key ON signals.unknowns (subject_key);
CREATE INDEX IF NOT EXISTS idx_unknowns_callgraph_id ON signals.unknowns (callgraph_id) WHERE callgraph_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_unknowns_symbol_id ON signals.unknowns (symbol_id) WHERE symbol_id IS NOT NULL;";
CREATE INDEX IF NOT EXISTS idx_unknowns_symbol_id ON signals.unknowns (symbol_id) WHERE symbol_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_unknowns_band ON signals.unknowns(band);
CREATE INDEX IF NOT EXISTS idx_unknowns_score_desc ON signals.unknowns(score DESC);
CREATE INDEX IF NOT EXISTS idx_unknowns_band_score ON signals.unknowns(band, score DESC);
CREATE INDEX IF NOT EXISTS idx_unknowns_next_rescan ON signals.unknowns(next_scheduled_rescan) WHERE next_scheduled_rescan IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_unknowns_hot_band ON signals.unknowns(score DESC) WHERE band = 'hot';
CREATE INDEX IF NOT EXISTS idx_unknowns_purl ON signals.unknowns(purl);";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(ddl, connection);

View File

@@ -16,6 +16,11 @@ public interface IUnknownsRepository
/// </summary>
Task BulkUpdateAsync(IEnumerable<UnknownSymbolDocument> items, CancellationToken cancellationToken);
/// <summary>
/// Returns all known subject keys containing unknowns.
/// </summary>
Task<IReadOnlyList<string>> GetAllSubjectKeysAsync(CancellationToken cancellationToken);
/// <summary>
/// Gets unknowns due for rescan in a specific band.
/// </summary>

View File

@@ -0,0 +1,33 @@
using System.Collections.Concurrent;
using System.Threading;
using System.Threading.Tasks;
namespace StellaOps.Signals.Persistence;
public sealed class InMemoryDeploymentRefsRepository : IDeploymentRefsRepository
{
private readonly ConcurrentDictionary<string, int> _deploymentsByPurl = new(StringComparer.OrdinalIgnoreCase);
public void SetDeployments(string purl, int deployments)
{
ArgumentException.ThrowIfNullOrWhiteSpace(purl);
if (deployments < 0)
{
throw new ArgumentOutOfRangeException(nameof(deployments), "Deployments cannot be negative.");
}
_deploymentsByPurl[purl.Trim()] = deployments;
}
public Task<int> CountDeploymentsAsync(string purl, CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
if (string.IsNullOrWhiteSpace(purl))
{
return Task.FromResult(0);
}
return Task.FromResult(_deploymentsByPurl.TryGetValue(purl.Trim(), out var count) ? count : 0);
}
}

View File

@@ -0,0 +1,35 @@
using System.Collections.Concurrent;
using System.Threading;
using System.Threading.Tasks;
namespace StellaOps.Signals.Persistence;
public sealed class InMemoryGraphMetricsRepository : IGraphMetricsRepository
{
private readonly ConcurrentDictionary<string, GraphMetrics> _metrics = new(StringComparer.OrdinalIgnoreCase);
public void SetMetrics(string symbolId, string callgraphId, GraphMetrics metrics)
{
ArgumentException.ThrowIfNullOrWhiteSpace(symbolId);
ArgumentException.ThrowIfNullOrWhiteSpace(callgraphId);
var key = BuildKey(symbolId, callgraphId);
_metrics[key] = metrics;
}
public Task<GraphMetrics?> GetMetricsAsync(string symbolId, string callgraphId, CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
if (string.IsNullOrWhiteSpace(symbolId) || string.IsNullOrWhiteSpace(callgraphId))
{
return Task.FromResult<GraphMetrics?>(null);
}
var key = BuildKey(symbolId, callgraphId);
return Task.FromResult(_metrics.TryGetValue(key, out var metrics) ? metrics : null);
}
private static string BuildKey(string symbolId, string callgraphId)
=> $"{callgraphId.Trim()}|{symbolId.Trim()}";
}

View File

@@ -78,7 +78,9 @@ internal sealed class UnknownsIngestionService : IUnknownsIngestionService
EdgeFrom = entry.EdgeFrom?.Trim(),
EdgeTo = entry.EdgeTo?.Trim(),
Reason = entry.Reason?.Trim(),
CreatedAt = now
CreatedAt = now,
UpdatedAt = now,
LastAnalyzedAt = now
});
}

View File

@@ -75,9 +75,11 @@ public sealed class UnknownsScoringService : IUnknownsScoringService
UnknownsScoringOptions opts,
CancellationToken cancellationToken)
{
var now = _timeProvider.GetUtcNow();
var trace = new UnknownsNormalizationTrace
{
ComputedAt = _timeProvider.GetUtcNow(),
ComputedAt = now,
Weights = new Dictionary<string, double>
{
["wP"] = opts.WeightPopularity,
@@ -139,24 +141,21 @@ public sealed class UnknownsScoringService : IUnknownsScoringService
trace.FinalScore = score;
// Band assignment
unknown.Band = score switch
{
>= 0.70 => UnknownsBand.Hot,
>= 0.40 => UnknownsBand.Warm,
_ => UnknownsBand.Cold
};
unknown.Band = score >= opts.HotThreshold
? UnknownsBand.Hot
: score >= opts.WarmThreshold ? UnknownsBand.Warm : UnknownsBand.Cold;
trace.AssignedBand = unknown.Band.ToString();
// Schedule next rescan based on band
unknown.NextScheduledRescan = unknown.Band switch
{
UnknownsBand.Hot => _timeProvider.GetUtcNow().AddMinutes(15),
UnknownsBand.Warm => _timeProvider.GetUtcNow().AddHours(opts.WarmRescanHours),
_ => _timeProvider.GetUtcNow().AddDays(opts.ColdRescanDays)
UnknownsBand.Hot => now.AddMinutes(opts.HotRescanMinutes),
UnknownsBand.Warm => now.AddHours(opts.WarmRescanHours),
_ => now.AddDays(opts.ColdRescanDays)
};
unknown.NormalizationTrace = trace;
unknown.UpdatedAt = _timeProvider.GetUtcNow();
unknown.UpdatedAt = now;
_logger.LogDebug(
"Scored unknown {UnknownId}: P={P:F2} E={E:F2} U={U:F2} C={C:F2} S={S:F2} → Score={Score:F2} Band={Band}",
@@ -270,9 +269,28 @@ public sealed class UnknownsScoringService : IUnknownsScoringService
return (1.0, opts.StalenessMaxDays); // Never analyzed = maximum staleness
var daysSince = (int)(_timeProvider.GetUtcNow() - lastAnalyzedAt.Value).TotalDays;
if (daysSince < 0)
{
daysSince = 0;
}
// Formula: S = min(1, age_days / max_days)
var score = Math.Min(1.0, (double)daysSince / opts.StalenessMaxDays);
// Exponential staleness: decayFactor = exp(-t/tau), staleness = (1 - decayFactor) normalized to reach 1 at maxDays.
// This models confidence decay (higher staleness means lower confidence in evidence freshness).
if (opts.StalenessTauDays > 0 && opts.StalenessMaxDays > 0)
{
var maxDays = Math.Max(1, opts.StalenessMaxDays);
var decayFactor = Math.Exp(-daysSince / opts.StalenessTauDays);
var maxDecayFactor = Math.Exp(-maxDays / opts.StalenessTauDays);
var numerator = 1.0 - decayFactor;
var denominator = 1.0 - maxDecayFactor;
var normalized = denominator <= 0 ? 0.0 : numerator / denominator;
return (Math.Clamp(normalized, 0.0, 1.0), daysSince);
}
// Fallback linear: S = min(1, age_days / max_days)
var score = opts.StalenessMaxDays <= 0
? 0.0
: Math.Min(1.0, (double)daysSince / opts.StalenessMaxDays);
return (score, daysSince);
}