Add call graph fixtures for various languages and scenarios
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
Findings Ledger CI / build-test (push) Has been cancelled
Findings Ledger CI / migration-validation (push) Has been cancelled
Findings Ledger CI / generate-manifest (push) Has been cancelled
Lighthouse CI / Lighthouse Audit (push) Has been cancelled
Lighthouse CI / Axe Accessibility Audit (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Reachability Corpus Validation / validate-corpus (push) Has been cancelled
Reachability Corpus Validation / validate-ground-truths (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Signals Reachability Scoring & Events / reachability-smoke (push) Has been cancelled
Reachability Corpus Validation / determinism-check (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
Signals Reachability Scoring & Events / sign-and-upload (push) Has been cancelled

- Introduced `all-edge-reasons.json` to test edge resolution reasons in .NET.
- Added `all-visibility-levels.json` to validate method visibility levels in .NET.
- Created `dotnet-aspnetcore-minimal.json` for a minimal ASP.NET Core application.
- Included `go-gin-api.json` for a Go Gin API application structure.
- Added `java-spring-boot.json` for the Spring PetClinic application in Java.
- Introduced `legacy-no-schema.json` for legacy application structure without schema.
- Created `node-express-api.json` for an Express.js API application structure.
This commit is contained in:
master
2025-12-16 10:44:24 +02:00
parent 4391f35d8a
commit 5a480a3c2a
223 changed files with 19367 additions and 727 deletions

View File

@@ -0,0 +1,199 @@
-- ============================================================
-- DEPLOYMENT REFERENCES AND GRAPH METRICS TABLES
-- Sprint: SPRINT_1105_0001_0001
-- Advisory Reference: 14-Dec-2025 - Triage and Unknowns Technical Reference
-- Purpose: Enable popularity (P) and centrality (C) factors for unknowns scoring
-- ============================================================
-- Ensure schema exists
CREATE SCHEMA IF NOT EXISTS signals;
-- ============================================================
-- DEPLOYMENT REFERENCES TABLE
-- Tracks package deployments for popularity scoring
-- ============================================================
CREATE TABLE IF NOT EXISTS signals.deploy_refs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- Package identifier (PURL)
purl TEXT NOT NULL,
-- Version (optional, for specific version tracking)
purl_version TEXT,
-- Deployment target
image_id TEXT NOT NULL,
image_digest TEXT,
-- Environment classification
environment TEXT NOT NULL DEFAULT 'unknown'
CONSTRAINT chk_environment CHECK (environment IN ('production', 'staging', 'development', 'test', 'unknown')),
-- Deployment metadata
namespace TEXT,
cluster TEXT,
region TEXT,
-- Timestamps
first_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
last_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-- Unique constraint per package/image/env combination
CONSTRAINT uq_deploy_refs_purl_image_env
UNIQUE (purl, image_id, environment)
);
-- Indexes for efficient querying
CREATE INDEX IF NOT EXISTS idx_deploy_refs_purl
ON signals.deploy_refs(purl);
CREATE INDEX IF NOT EXISTS idx_deploy_refs_purl_version
ON signals.deploy_refs(purl, purl_version)
WHERE purl_version IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_deploy_refs_last_seen
ON signals.deploy_refs(last_seen_at);
CREATE INDEX IF NOT EXISTS idx_deploy_refs_environment
ON signals.deploy_refs(environment);
-- Partial index for active deployments (seen in last 30 days)
CREATE INDEX IF NOT EXISTS idx_deploy_refs_active
ON signals.deploy_refs(purl, last_seen_at)
WHERE last_seen_at > NOW() - INTERVAL '30 days';
-- ============================================================
-- GRAPH METRICS TABLE
-- Stores computed centrality metrics for call graph nodes
-- ============================================================
CREATE TABLE IF NOT EXISTS signals.graph_metrics (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- Node identifier (symbol ID from call graph)
node_id TEXT NOT NULL,
-- Call graph this metric belongs to
callgraph_id TEXT NOT NULL,
-- Node type for categorization
node_type TEXT NOT NULL DEFAULT 'symbol'
CONSTRAINT chk_node_type CHECK (node_type IN ('symbol', 'package', 'function', 'class', 'method')),
-- Centrality metrics
degree_centrality INT NOT NULL DEFAULT 0,
in_degree INT NOT NULL DEFAULT 0,
out_degree INT NOT NULL DEFAULT 0,
betweenness_centrality FLOAT NOT NULL DEFAULT 0.0,
closeness_centrality FLOAT,
eigenvector_centrality FLOAT,
-- Normalized scores (0.0 - 1.0)
normalized_betweenness FLOAT
CONSTRAINT chk_norm_betweenness CHECK (normalized_betweenness IS NULL OR (normalized_betweenness >= 0.0 AND normalized_betweenness <= 1.0)),
normalized_degree FLOAT
CONSTRAINT chk_norm_degree CHECK (normalized_degree IS NULL OR (normalized_degree >= 0.0 AND normalized_degree <= 1.0)),
-- Computation metadata
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
computation_duration_ms INT,
algorithm_version TEXT NOT NULL DEFAULT '1.0',
-- Graph statistics at computation time
total_nodes INT,
total_edges INT,
-- Unique constraint per node/graph combination
CONSTRAINT uq_graph_metrics_node_graph
UNIQUE (node_id, callgraph_id)
);
-- Indexes for efficient querying
CREATE INDEX IF NOT EXISTS idx_graph_metrics_node
ON signals.graph_metrics(node_id);
CREATE INDEX IF NOT EXISTS idx_graph_metrics_callgraph
ON signals.graph_metrics(callgraph_id);
CREATE INDEX IF NOT EXISTS idx_graph_metrics_betweenness
ON signals.graph_metrics(betweenness_centrality DESC);
CREATE INDEX IF NOT EXISTS idx_graph_metrics_computed
ON signals.graph_metrics(computed_at);
-- Partial index for high-centrality nodes (top 10% typically)
CREATE INDEX IF NOT EXISTS idx_graph_metrics_high_centrality
ON signals.graph_metrics(callgraph_id, normalized_betweenness DESC)
WHERE normalized_betweenness > 0.5;
-- ============================================================
-- HELPER VIEWS
-- ============================================================
-- Deployment counts per package (for popularity scoring)
CREATE OR REPLACE VIEW signals.deploy_counts AS
SELECT
purl,
COUNT(DISTINCT image_id) as image_count,
COUNT(DISTINCT environment) as env_count,
COUNT(*) as total_deployments,
MAX(last_seen_at) as last_deployment,
MIN(first_seen_at) as first_deployment
FROM signals.deploy_refs
WHERE last_seen_at > NOW() - INTERVAL '30 days'
GROUP BY purl;
-- High-centrality nodes per graph
CREATE OR REPLACE VIEW signals.high_centrality_nodes AS
SELECT
callgraph_id,
node_id,
node_type,
betweenness_centrality,
normalized_betweenness,
degree_centrality,
computed_at
FROM signals.graph_metrics
WHERE normalized_betweenness > 0.5
ORDER BY callgraph_id, normalized_betweenness DESC;
-- ============================================================
-- COMMENTS
-- ============================================================
COMMENT ON TABLE signals.deploy_refs IS
'Tracks package deployments across images and environments for popularity scoring (P factor).';
COMMENT ON COLUMN signals.deploy_refs.purl IS
'Package URL (PURL) identifier, e.g., pkg:npm/lodash@4.17.21';
COMMENT ON COLUMN signals.deploy_refs.environment IS
'Deployment environment: production (highest weight), staging, development, test, unknown';
COMMENT ON COLUMN signals.deploy_refs.first_seen_at IS
'When this package was first observed in this image/environment';
COMMENT ON COLUMN signals.deploy_refs.last_seen_at IS
'Most recent observation timestamp; used for active deployment filtering';
COMMENT ON TABLE signals.graph_metrics IS
'Stores computed graph centrality metrics for call graph nodes (C factor).';
COMMENT ON COLUMN signals.graph_metrics.node_id IS
'Symbol identifier from call graph, matches SymbolId format';
COMMENT ON COLUMN signals.graph_metrics.betweenness_centrality IS
'Raw betweenness centrality: number of shortest paths passing through this node';
COMMENT ON COLUMN signals.graph_metrics.normalized_betweenness IS
'Betweenness normalized to 0.0-1.0 range: raw / max(raw) across graph';
COMMENT ON COLUMN signals.graph_metrics.algorithm_version IS
'Version of centrality algorithm used (e.g., "brandes-1.0")';
COMMENT ON VIEW signals.deploy_counts IS
'Aggregated deployment counts per package for popularity scoring. Only includes active deployments (last 30 days).';
COMMENT ON VIEW signals.high_centrality_nodes IS
'Nodes with normalized betweenness > 0.5, sorted by centrality within each graph.';

View File

@@ -0,0 +1,340 @@
-- ============================================================
-- SPRINT_3102: Call Graph Relational Tables
-- Enables cross-artifact queries, analytics, and efficient lookups
-- ============================================================
CREATE SCHEMA IF NOT EXISTS signals;
-- =============================================================================
-- SCAN TRACKING
-- =============================================================================
-- Tracks scan context for call graph analysis
CREATE TABLE IF NOT EXISTS signals.scans (
scan_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
artifact_digest TEXT NOT NULL,
repo_uri TEXT,
commit_sha TEXT,
sbom_digest TEXT,
policy_digest TEXT,
status TEXT NOT NULL DEFAULT 'pending'
CHECK (status IN ('pending', 'processing', 'completed', 'failed')),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
completed_at TIMESTAMPTZ,
error_message TEXT,
-- Composite index for cache lookups
CONSTRAINT scans_artifact_sbom_unique UNIQUE (artifact_digest, sbom_digest)
);
CREATE INDEX IF NOT EXISTS idx_scans_status ON signals.scans(status);
CREATE INDEX IF NOT EXISTS idx_scans_artifact ON signals.scans(artifact_digest);
CREATE INDEX IF NOT EXISTS idx_scans_commit ON signals.scans(commit_sha) WHERE commit_sha IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_scans_created ON signals.scans(created_at DESC);
COMMENT ON TABLE signals.scans IS
'Tracks scan context for call graph analysis';
-- =============================================================================
-- ARTIFACTS
-- =============================================================================
-- Individual artifacts (assemblies, JARs, modules) within a scan
CREATE TABLE IF NOT EXISTS signals.artifacts (
artifact_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
artifact_key TEXT NOT NULL,
kind TEXT NOT NULL CHECK (kind IN ('assembly', 'jar', 'module', 'binary', 'script')),
sha256 TEXT NOT NULL,
purl TEXT,
build_id TEXT,
file_path TEXT,
size_bytes BIGINT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT artifacts_scan_key_unique UNIQUE (scan_id, artifact_key)
);
CREATE INDEX IF NOT EXISTS idx_artifacts_scan ON signals.artifacts(scan_id);
CREATE INDEX IF NOT EXISTS idx_artifacts_sha256 ON signals.artifacts(sha256);
CREATE INDEX IF NOT EXISTS idx_artifacts_purl ON signals.artifacts(purl) WHERE purl IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_artifacts_build_id ON signals.artifacts(build_id) WHERE build_id IS NOT NULL;
COMMENT ON TABLE signals.artifacts IS
'Individual artifacts (assemblies, JARs, modules) within a scan';
-- =============================================================================
-- CALL GRAPH NODES
-- =============================================================================
-- Individual nodes (symbols) in call graphs
CREATE TABLE IF NOT EXISTS signals.cg_nodes (
id BIGSERIAL PRIMARY KEY,
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
node_id TEXT NOT NULL,
artifact_key TEXT,
symbol_key TEXT NOT NULL,
visibility TEXT NOT NULL DEFAULT 'unknown'
CHECK (visibility IN ('public', 'internal', 'protected', 'private', 'unknown')),
is_entrypoint_candidate BOOLEAN NOT NULL DEFAULT FALSE,
purl TEXT,
symbol_digest TEXT,
flags INT NOT NULL DEFAULT 0,
attributes JSONB,
CONSTRAINT cg_nodes_scan_node_unique UNIQUE (scan_id, node_id)
);
-- Primary lookup indexes
CREATE INDEX IF NOT EXISTS idx_cg_nodes_scan ON signals.cg_nodes(scan_id);
CREATE INDEX IF NOT EXISTS idx_cg_nodes_symbol_key ON signals.cg_nodes(symbol_key);
CREATE INDEX IF NOT EXISTS idx_cg_nodes_purl ON signals.cg_nodes(purl) WHERE purl IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_cg_nodes_entrypoint ON signals.cg_nodes(scan_id, is_entrypoint_candidate)
WHERE is_entrypoint_candidate = TRUE;
-- Full-text search on symbol keys
CREATE INDEX IF NOT EXISTS idx_cg_nodes_symbol_fts ON signals.cg_nodes
USING gin(to_tsvector('simple', symbol_key));
COMMENT ON TABLE signals.cg_nodes IS
'Individual nodes (symbols) in call graphs';
COMMENT ON COLUMN signals.cg_nodes.visibility IS
'Symbol visibility: public, internal, protected, private, unknown';
COMMENT ON COLUMN signals.cg_nodes.flags IS
'Bitfield for node properties (static, virtual, async, etc.)';
-- =============================================================================
-- CALL GRAPH EDGES
-- =============================================================================
-- Call edges between nodes
CREATE TABLE IF NOT EXISTS signals.cg_edges (
id BIGSERIAL PRIMARY KEY,
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
from_node_id TEXT NOT NULL,
to_node_id TEXT NOT NULL,
kind SMALLINT NOT NULL DEFAULT 0, -- 0=static, 1=heuristic, 2=runtime
reason SMALLINT NOT NULL DEFAULT 0, -- EdgeReason enum value
weight REAL NOT NULL DEFAULT 1.0,
offset_bytes INT,
is_resolved BOOLEAN NOT NULL DEFAULT TRUE,
provenance TEXT,
-- Composite unique constraint
CONSTRAINT cg_edges_unique UNIQUE (scan_id, from_node_id, to_node_id, kind, reason)
);
-- Traversal indexes (critical for reachability queries)
CREATE INDEX IF NOT EXISTS idx_cg_edges_scan ON signals.cg_edges(scan_id);
CREATE INDEX IF NOT EXISTS idx_cg_edges_from ON signals.cg_edges(scan_id, from_node_id);
CREATE INDEX IF NOT EXISTS idx_cg_edges_to ON signals.cg_edges(scan_id, to_node_id);
-- Covering index for common traversal pattern
CREATE INDEX IF NOT EXISTS idx_cg_edges_traversal ON signals.cg_edges(scan_id, from_node_id)
INCLUDE (to_node_id, kind, weight);
COMMENT ON TABLE signals.cg_edges IS
'Call edges between nodes in the call graph';
COMMENT ON COLUMN signals.cg_edges.kind IS
'Edge kind: 0=static, 1=heuristic, 2=runtime';
COMMENT ON COLUMN signals.cg_edges.reason IS
'EdgeReason enum value explaining why this edge exists';
-- =============================================================================
-- ENTRYPOINTS
-- =============================================================================
-- Framework-aware entrypoints
CREATE TABLE IF NOT EXISTS signals.entrypoints (
id BIGSERIAL PRIMARY KEY,
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
node_id TEXT NOT NULL,
kind TEXT NOT NULL CHECK (kind IN (
'http', 'grpc', 'cli', 'job', 'event', 'message_queue',
'timer', 'test', 'main', 'module_init', 'static_constructor', 'unknown'
)),
framework TEXT,
route TEXT,
http_method TEXT,
phase TEXT NOT NULL DEFAULT 'runtime'
CHECK (phase IN ('module_init', 'app_start', 'runtime', 'shutdown')),
order_idx INT NOT NULL DEFAULT 0,
CONSTRAINT entrypoints_scan_node_unique UNIQUE (scan_id, node_id, kind)
);
CREATE INDEX IF NOT EXISTS idx_entrypoints_scan ON signals.entrypoints(scan_id);
CREATE INDEX IF NOT EXISTS idx_entrypoints_kind ON signals.entrypoints(kind);
CREATE INDEX IF NOT EXISTS idx_entrypoints_route ON signals.entrypoints(route) WHERE route IS NOT NULL;
COMMENT ON TABLE signals.entrypoints IS
'Framework-aware entrypoints detected in the call graph';
COMMENT ON COLUMN signals.entrypoints.phase IS
'Execution phase: module_init, app_start, runtime, shutdown';
-- =============================================================================
-- SYMBOL-TO-COMPONENT MAPPING
-- =============================================================================
-- Maps symbols to SBOM components (for vuln correlation)
CREATE TABLE IF NOT EXISTS signals.symbol_component_map (
id BIGSERIAL PRIMARY KEY,
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
node_id TEXT NOT NULL,
purl TEXT NOT NULL,
mapping_kind TEXT NOT NULL CHECK (mapping_kind IN (
'exact', 'assembly', 'namespace', 'heuristic'
)),
confidence REAL NOT NULL DEFAULT 1.0,
evidence JSONB,
CONSTRAINT symbol_component_map_unique UNIQUE (scan_id, node_id, purl)
);
CREATE INDEX IF NOT EXISTS idx_symbol_component_scan ON signals.symbol_component_map(scan_id);
CREATE INDEX IF NOT EXISTS idx_symbol_component_purl ON signals.symbol_component_map(purl);
CREATE INDEX IF NOT EXISTS idx_symbol_component_node ON signals.symbol_component_map(scan_id, node_id);
COMMENT ON TABLE signals.symbol_component_map IS
'Maps symbols to SBOM components for vulnerability correlation';
COMMENT ON COLUMN signals.symbol_component_map.mapping_kind IS
'How the mapping was determined: exact, assembly, namespace, heuristic';
-- =============================================================================
-- REACHABILITY RESULTS
-- =============================================================================
-- Component-level reachability status
CREATE TABLE IF NOT EXISTS signals.reachability_components (
id BIGSERIAL PRIMARY KEY,
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
purl TEXT NOT NULL,
status SMALLINT NOT NULL DEFAULT 0, -- ReachabilityStatus enum
lattice_state TEXT,
confidence REAL NOT NULL DEFAULT 0,
why JSONB,
evidence JSONB,
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT reachability_components_unique UNIQUE (scan_id, purl)
);
CREATE INDEX IF NOT EXISTS idx_reachability_components_scan ON signals.reachability_components(scan_id);
CREATE INDEX IF NOT EXISTS idx_reachability_components_purl ON signals.reachability_components(purl);
CREATE INDEX IF NOT EXISTS idx_reachability_components_status ON signals.reachability_components(status);
COMMENT ON TABLE signals.reachability_components IS
'Component-level reachability status for each scan';
-- CVE-level reachability findings
CREATE TABLE IF NOT EXISTS signals.reachability_findings (
id BIGSERIAL PRIMARY KEY,
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
cve_id TEXT NOT NULL,
purl TEXT NOT NULL,
status SMALLINT NOT NULL DEFAULT 0,
lattice_state TEXT,
confidence REAL NOT NULL DEFAULT 0,
path_witness TEXT[],
why JSONB,
evidence JSONB,
spine_id UUID, -- Reference to proof spine
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT reachability_findings_unique UNIQUE (scan_id, cve_id, purl)
);
CREATE INDEX IF NOT EXISTS idx_reachability_findings_scan ON signals.reachability_findings(scan_id);
CREATE INDEX IF NOT EXISTS idx_reachability_findings_cve ON signals.reachability_findings(cve_id);
CREATE INDEX IF NOT EXISTS idx_reachability_findings_purl ON signals.reachability_findings(purl);
CREATE INDEX IF NOT EXISTS idx_reachability_findings_status ON signals.reachability_findings(status);
COMMENT ON TABLE signals.reachability_findings IS
'CVE-level reachability findings with path witnesses';
COMMENT ON COLUMN signals.reachability_findings.path_witness IS
'Array of node IDs forming the reachability path';
-- =============================================================================
-- RUNTIME SAMPLES
-- =============================================================================
-- Stack trace samples from runtime evidence
CREATE TABLE IF NOT EXISTS signals.runtime_samples (
id BIGSERIAL PRIMARY KEY,
scan_id UUID NOT NULL REFERENCES signals.scans(scan_id) ON DELETE CASCADE,
collected_at TIMESTAMPTZ NOT NULL,
env_hash TEXT,
timestamp TIMESTAMPTZ NOT NULL,
pid INT,
thread_id INT,
frames TEXT[] NOT NULL,
weight REAL NOT NULL DEFAULT 1.0,
container_id TEXT,
pod_name TEXT
);
CREATE INDEX IF NOT EXISTS idx_runtime_samples_scan ON signals.runtime_samples(scan_id);
CREATE INDEX IF NOT EXISTS idx_runtime_samples_collected ON signals.runtime_samples(collected_at DESC);
-- GIN index for frame array searches
CREATE INDEX IF NOT EXISTS idx_runtime_samples_frames ON signals.runtime_samples USING gin(frames);
COMMENT ON TABLE signals.runtime_samples IS
'Stack trace samples from runtime evidence collection';
-- =============================================================================
-- MATERIALIZED VIEWS FOR ANALYTICS
-- =============================================================================
-- Daily scan statistics
CREATE MATERIALIZED VIEW IF NOT EXISTS signals.scan_stats_daily AS
SELECT
DATE_TRUNC('day', created_at) AS day,
COUNT(*) AS total_scans,
COUNT(*) FILTER (WHERE status = 'completed') AS completed_scans,
COUNT(*) FILTER (WHERE status = 'failed') AS failed_scans,
AVG(EXTRACT(EPOCH FROM (completed_at - created_at))) FILTER (WHERE status = 'completed') AS avg_duration_seconds
FROM signals.scans
GROUP BY DATE_TRUNC('day', created_at)
ORDER BY day DESC;
CREATE UNIQUE INDEX IF NOT EXISTS idx_scan_stats_daily_day ON signals.scan_stats_daily(day);
-- CVE reachability summary
CREATE MATERIALIZED VIEW IF NOT EXISTS signals.cve_reachability_summary AS
SELECT
cve_id,
COUNT(DISTINCT scan_id) AS affected_scans,
COUNT(DISTINCT purl) AS affected_components,
COUNT(*) FILTER (WHERE status = 2) AS reachable_count, -- REACHABLE_STATIC
COUNT(*) FILTER (WHERE status = 3) AS proven_count, -- REACHABLE_PROVEN
COUNT(*) FILTER (WHERE status = 0) AS unreachable_count,
AVG(confidence) AS avg_confidence,
MAX(computed_at) AS last_updated
FROM signals.reachability_findings
GROUP BY cve_id;
CREATE UNIQUE INDEX IF NOT EXISTS idx_cve_reachability_summary_cve ON signals.cve_reachability_summary(cve_id);
-- =============================================================================
-- REFRESH FUNCTION
-- =============================================================================
-- Function to refresh materialized views
CREATE OR REPLACE FUNCTION signals.refresh_analytics_views()
RETURNS void AS $$
BEGIN
REFRESH MATERIALIZED VIEW CONCURRENTLY signals.scan_stats_daily;
REFRESH MATERIALIZED VIEW CONCURRENTLY signals.cve_reachability_summary;
END;
$$ LANGUAGE plpgsql;
COMMENT ON FUNCTION signals.refresh_analytics_views IS
'Refreshes all analytics materialized views concurrently';

View File

@@ -0,0 +1,249 @@
using Microsoft.Extensions.Logging;
using StellaOps.Infrastructure.Postgres.Repositories;
using StellaOps.Signals.Persistence;
namespace StellaOps.Signals.Storage.Postgres.Repositories;
/// <summary>
/// PostgreSQL implementation of <see cref="IDeploymentRefsRepository"/>.
/// Tracks package deployments for popularity scoring (P factor).
/// </summary>
public sealed class PostgresDeploymentRefsRepository : RepositoryBase<SignalsDataSource>, IDeploymentRefsRepository
{
private bool _tableInitialized;
public PostgresDeploymentRefsRepository(SignalsDataSource dataSource, ILogger<PostgresDeploymentRefsRepository> logger)
: base(dataSource, logger)
{
}
public async Task<int> CountDeploymentsAsync(string purl, CancellationToken cancellationToken = default)
{
if (string.IsNullOrWhiteSpace(purl))
return 0;
await EnsureTableAsync(cancellationToken).ConfigureAwait(false);
const string sql = @"
SELECT COUNT(DISTINCT image_id)
FROM signals.deploy_refs
WHERE purl = @purl
AND last_seen_at > NOW() - INTERVAL '30 days'";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "@purl", purl.Trim());
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
return result is long count ? (int)count : 0;
}
public async Task<IReadOnlyList<string>> GetDeploymentIdsAsync(string purl, int limit, CancellationToken cancellationToken = default)
{
if (string.IsNullOrWhiteSpace(purl))
return Array.Empty<string>();
await EnsureTableAsync(cancellationToken).ConfigureAwait(false);
const string sql = @"
SELECT DISTINCT image_id
FROM signals.deploy_refs
WHERE purl = @purl
AND last_seen_at > NOW() - INTERVAL '30 days'
ORDER BY image_id
LIMIT @limit";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "@purl", purl.Trim());
AddParameter(command, "@limit", limit);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var results = new List<string>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
results.Add(reader.GetString(0));
}
return results;
}
public async Task UpsertAsync(DeploymentRef deployment, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(deployment);
await EnsureTableAsync(cancellationToken).ConfigureAwait(false);
const string sql = @"
INSERT INTO signals.deploy_refs (
purl, purl_version, image_id, image_digest,
environment, namespace, cluster, region,
first_seen_at, last_seen_at
) VALUES (
@purl, @purl_version, @image_id, @image_digest,
@environment, @namespace, @cluster, @region,
NOW(), NOW()
)
ON CONFLICT (purl, image_id, environment)
DO UPDATE SET
purl_version = COALESCE(EXCLUDED.purl_version, signals.deploy_refs.purl_version),
image_digest = COALESCE(EXCLUDED.image_digest, signals.deploy_refs.image_digest),
namespace = COALESCE(EXCLUDED.namespace, signals.deploy_refs.namespace),
cluster = COALESCE(EXCLUDED.cluster, signals.deploy_refs.cluster),
region = COALESCE(EXCLUDED.region, signals.deploy_refs.region),
last_seen_at = NOW()";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "@purl", deployment.Purl.Trim());
AddParameter(command, "@purl_version", (object?)deployment.PurlVersion ?? DBNull.Value);
AddParameter(command, "@image_id", deployment.ImageId.Trim());
AddParameter(command, "@image_digest", (object?)deployment.ImageDigest ?? DBNull.Value);
AddParameter(command, "@environment", deployment.Environment.Trim());
AddParameter(command, "@namespace", (object?)deployment.Namespace ?? DBNull.Value);
AddParameter(command, "@cluster", (object?)deployment.Cluster ?? DBNull.Value);
AddParameter(command, "@region", (object?)deployment.Region ?? DBNull.Value);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
public async Task BulkUpsertAsync(IEnumerable<DeploymentRef> deployments, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(deployments);
await EnsureTableAsync(cancellationToken).ConfigureAwait(false);
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var transaction = await connection.BeginTransactionAsync(cancellationToken).ConfigureAwait(false);
try
{
const string sql = @"
INSERT INTO signals.deploy_refs (
purl, purl_version, image_id, image_digest,
environment, namespace, cluster, region,
first_seen_at, last_seen_at
) VALUES (
@purl, @purl_version, @image_id, @image_digest,
@environment, @namespace, @cluster, @region,
NOW(), NOW()
)
ON CONFLICT (purl, image_id, environment)
DO UPDATE SET
purl_version = COALESCE(EXCLUDED.purl_version, signals.deploy_refs.purl_version),
image_digest = COALESCE(EXCLUDED.image_digest, signals.deploy_refs.image_digest),
namespace = COALESCE(EXCLUDED.namespace, signals.deploy_refs.namespace),
cluster = COALESCE(EXCLUDED.cluster, signals.deploy_refs.cluster),
region = COALESCE(EXCLUDED.region, signals.deploy_refs.region),
last_seen_at = NOW()";
foreach (var deployment in deployments)
{
if (deployment is null)
continue;
await using var command = CreateCommand(sql, connection, transaction);
AddParameter(command, "@purl", deployment.Purl.Trim());
AddParameter(command, "@purl_version", (object?)deployment.PurlVersion ?? DBNull.Value);
AddParameter(command, "@image_id", deployment.ImageId.Trim());
AddParameter(command, "@image_digest", (object?)deployment.ImageDigest ?? DBNull.Value);
AddParameter(command, "@environment", deployment.Environment.Trim());
AddParameter(command, "@namespace", (object?)deployment.Namespace ?? DBNull.Value);
AddParameter(command, "@cluster", (object?)deployment.Cluster ?? DBNull.Value);
AddParameter(command, "@region", (object?)deployment.Region ?? DBNull.Value);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
await transaction.CommitAsync(cancellationToken).ConfigureAwait(false);
}
catch
{
await transaction.RollbackAsync(cancellationToken).ConfigureAwait(false);
throw;
}
}
public async Task<DeploymentSummary?> GetSummaryAsync(string purl, CancellationToken cancellationToken = default)
{
if (string.IsNullOrWhiteSpace(purl))
return null;
await EnsureTableAsync(cancellationToken).ConfigureAwait(false);
const string sql = @"
SELECT
purl,
COUNT(DISTINCT image_id) as image_count,
COUNT(DISTINCT environment) as env_count,
COUNT(*) as total_deployments,
MAX(last_seen_at) as last_deployment,
MIN(first_seen_at) as first_deployment
FROM signals.deploy_refs
WHERE purl = @purl
AND last_seen_at > NOW() - INTERVAL '30 days'
GROUP BY purl";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "@purl", purl.Trim());
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
return null;
return new DeploymentSummary
{
Purl = reader.GetString(0),
ImageCount = reader.IsDBNull(1) ? 0 : Convert.ToInt32(reader.GetInt64(1)),
EnvironmentCount = reader.IsDBNull(2) ? 0 : Convert.ToInt32(reader.GetInt64(2)),
TotalDeployments = reader.IsDBNull(3) ? 0 : Convert.ToInt32(reader.GetInt64(3)),
LastDeployment = reader.IsDBNull(4) ? null : reader.GetFieldValue<DateTimeOffset>(4),
FirstDeployment = reader.IsDBNull(5) ? null : reader.GetFieldValue<DateTimeOffset>(5)
};
}
private static Npgsql.NpgsqlCommand CreateCommand(string sql, Npgsql.NpgsqlConnection connection, Npgsql.NpgsqlTransaction transaction)
{
return new Npgsql.NpgsqlCommand(sql, connection, transaction);
}
private async Task EnsureTableAsync(CancellationToken cancellationToken)
{
if (_tableInitialized)
return;
const string ddl = @"
CREATE SCHEMA IF NOT EXISTS signals;
CREATE TABLE IF NOT EXISTS signals.deploy_refs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
purl TEXT NOT NULL,
purl_version TEXT,
image_id TEXT NOT NULL,
image_digest TEXT,
environment TEXT NOT NULL DEFAULT 'unknown',
namespace TEXT,
cluster TEXT,
region TEXT,
first_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
last_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT uq_deploy_refs_purl_image_env UNIQUE (purl, image_id, environment)
);
CREATE INDEX IF NOT EXISTS idx_deploy_refs_purl ON signals.deploy_refs(purl);
CREATE INDEX IF NOT EXISTS idx_deploy_refs_purl_version ON signals.deploy_refs(purl, purl_version) WHERE purl_version IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_deploy_refs_last_seen ON signals.deploy_refs(last_seen_at);
CREATE INDEX IF NOT EXISTS idx_deploy_refs_environment ON signals.deploy_refs(environment);";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(ddl, connection);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
_tableInitialized = true;
}
}

View File

@@ -0,0 +1,296 @@
using Microsoft.Extensions.Logging;
using StellaOps.Infrastructure.Postgres.Repositories;
using StellaOps.Signals.Persistence;
namespace StellaOps.Signals.Storage.Postgres.Repositories;
/// <summary>
/// PostgreSQL implementation of <see cref="IGraphMetricsRepository"/>.
/// Stores computed centrality metrics for call graph nodes (C factor).
/// </summary>
public sealed class PostgresGraphMetricsRepository : RepositoryBase<SignalsDataSource>, IGraphMetricsRepository
{
private bool _tableInitialized;
public PostgresGraphMetricsRepository(SignalsDataSource dataSource, ILogger<PostgresGraphMetricsRepository> logger)
: base(dataSource, logger)
{
}
public async Task<GraphMetrics?> GetMetricsAsync(
string symbolId,
string callgraphId,
CancellationToken cancellationToken = default)
{
if (string.IsNullOrWhiteSpace(symbolId) || string.IsNullOrWhiteSpace(callgraphId))
return null;
await EnsureTableAsync(cancellationToken).ConfigureAwait(false);
const string sql = @"
SELECT
node_id, callgraph_id, node_type,
degree_centrality, in_degree, out_degree,
betweenness_centrality, closeness_centrality,
normalized_betweenness, normalized_degree,
computed_at, computation_duration_ms, algorithm_version,
total_nodes, total_edges
FROM signals.graph_metrics
WHERE node_id = @node_id AND callgraph_id = @callgraph_id";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "@node_id", symbolId.Trim());
AddParameter(command, "@callgraph_id", callgraphId.Trim());
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
return null;
return MapMetrics(reader);
}
public async Task UpsertAsync(GraphMetrics metrics, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(metrics);
await EnsureTableAsync(cancellationToken).ConfigureAwait(false);
const string sql = @"
INSERT INTO signals.graph_metrics (
node_id, callgraph_id, node_type,
degree_centrality, in_degree, out_degree,
betweenness_centrality, closeness_centrality,
normalized_betweenness, normalized_degree,
computed_at, computation_duration_ms, algorithm_version,
total_nodes, total_edges
) VALUES (
@node_id, @callgraph_id, @node_type,
@degree_centrality, @in_degree, @out_degree,
@betweenness_centrality, @closeness_centrality,
@normalized_betweenness, @normalized_degree,
@computed_at, @computation_duration_ms, @algorithm_version,
@total_nodes, @total_edges
)
ON CONFLICT (node_id, callgraph_id)
DO UPDATE SET
node_type = EXCLUDED.node_type,
degree_centrality = EXCLUDED.degree_centrality,
in_degree = EXCLUDED.in_degree,
out_degree = EXCLUDED.out_degree,
betweenness_centrality = EXCLUDED.betweenness_centrality,
closeness_centrality = EXCLUDED.closeness_centrality,
normalized_betweenness = EXCLUDED.normalized_betweenness,
normalized_degree = EXCLUDED.normalized_degree,
computed_at = EXCLUDED.computed_at,
computation_duration_ms = EXCLUDED.computation_duration_ms,
algorithm_version = EXCLUDED.algorithm_version,
total_nodes = EXCLUDED.total_nodes,
total_edges = EXCLUDED.total_edges";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddMetricsParameters(command, metrics);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
public async Task BulkUpsertAsync(IEnumerable<GraphMetrics> metrics, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(metrics);
await EnsureTableAsync(cancellationToken).ConfigureAwait(false);
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var transaction = await connection.BeginTransactionAsync(cancellationToken).ConfigureAwait(false);
try
{
const string sql = @"
INSERT INTO signals.graph_metrics (
node_id, callgraph_id, node_type,
degree_centrality, in_degree, out_degree,
betweenness_centrality, closeness_centrality,
normalized_betweenness, normalized_degree,
computed_at, computation_duration_ms, algorithm_version,
total_nodes, total_edges
) VALUES (
@node_id, @callgraph_id, @node_type,
@degree_centrality, @in_degree, @out_degree,
@betweenness_centrality, @closeness_centrality,
@normalized_betweenness, @normalized_degree,
@computed_at, @computation_duration_ms, @algorithm_version,
@total_nodes, @total_edges
)
ON CONFLICT (node_id, callgraph_id)
DO UPDATE SET
node_type = EXCLUDED.node_type,
degree_centrality = EXCLUDED.degree_centrality,
in_degree = EXCLUDED.in_degree,
out_degree = EXCLUDED.out_degree,
betweenness_centrality = EXCLUDED.betweenness_centrality,
closeness_centrality = EXCLUDED.closeness_centrality,
normalized_betweenness = EXCLUDED.normalized_betweenness,
normalized_degree = EXCLUDED.normalized_degree,
computed_at = EXCLUDED.computed_at,
computation_duration_ms = EXCLUDED.computation_duration_ms,
algorithm_version = EXCLUDED.algorithm_version,
total_nodes = EXCLUDED.total_nodes,
total_edges = EXCLUDED.total_edges";
foreach (var m in metrics)
{
if (m is null)
continue;
await using var command = CreateCommand(sql, connection, transaction);
AddMetricsParameters(command, m);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
await transaction.CommitAsync(cancellationToken).ConfigureAwait(false);
}
catch
{
await transaction.RollbackAsync(cancellationToken).ConfigureAwait(false);
throw;
}
}
public async Task<IReadOnlyList<string>> GetStaleCallgraphsAsync(
TimeSpan maxAge,
int limit,
CancellationToken cancellationToken = default)
{
await EnsureTableAsync(cancellationToken).ConfigureAwait(false);
const string sql = @"
SELECT DISTINCT callgraph_id
FROM signals.graph_metrics
WHERE computed_at < @cutoff
ORDER BY callgraph_id
LIMIT @limit";
var cutoff = DateTimeOffset.UtcNow - maxAge;
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "@cutoff", cutoff);
AddParameter(command, "@limit", limit);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var results = new List<string>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
results.Add(reader.GetString(0));
}
return results;
}
public async Task DeleteByCallgraphAsync(string callgraphId, CancellationToken cancellationToken = default)
{
if (string.IsNullOrWhiteSpace(callgraphId))
return;
await EnsureTableAsync(cancellationToken).ConfigureAwait(false);
const string sql = "DELETE FROM signals.graph_metrics WHERE callgraph_id = @callgraph_id";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "@callgraph_id", callgraphId.Trim());
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
private void AddMetricsParameters(Npgsql.NpgsqlCommand command, GraphMetrics metrics)
{
AddParameter(command, "@node_id", metrics.NodeId.Trim());
AddParameter(command, "@callgraph_id", metrics.CallgraphId.Trim());
AddParameter(command, "@node_type", metrics.NodeType);
AddParameter(command, "@degree_centrality", metrics.Degree);
AddParameter(command, "@in_degree", metrics.InDegree);
AddParameter(command, "@out_degree", metrics.OutDegree);
AddParameter(command, "@betweenness_centrality", metrics.Betweenness);
AddParameter(command, "@closeness_centrality", metrics.Closeness.HasValue ? metrics.Closeness.Value : DBNull.Value);
AddParameter(command, "@normalized_betweenness", metrics.NormalizedBetweenness.HasValue ? metrics.NormalizedBetweenness.Value : DBNull.Value);
AddParameter(command, "@normalized_degree", metrics.NormalizedDegree.HasValue ? metrics.NormalizedDegree.Value : DBNull.Value);
AddParameter(command, "@computed_at", metrics.ComputedAt == default ? DateTimeOffset.UtcNow : metrics.ComputedAt);
AddParameter(command, "@computation_duration_ms", metrics.ComputationDurationMs.HasValue ? metrics.ComputationDurationMs.Value : DBNull.Value);
AddParameter(command, "@algorithm_version", metrics.AlgorithmVersion);
AddParameter(command, "@total_nodes", metrics.TotalNodes.HasValue ? metrics.TotalNodes.Value : DBNull.Value);
AddParameter(command, "@total_edges", metrics.TotalEdges.HasValue ? metrics.TotalEdges.Value : DBNull.Value);
}
private static Npgsql.NpgsqlCommand CreateCommand(string sql, Npgsql.NpgsqlConnection connection, Npgsql.NpgsqlTransaction transaction)
{
return new Npgsql.NpgsqlCommand(sql, connection, transaction);
}
private static GraphMetrics MapMetrics(Npgsql.NpgsqlDataReader reader)
{
return new GraphMetrics
{
NodeId = reader.GetString(0),
CallgraphId = reader.GetString(1),
NodeType = reader.IsDBNull(2) ? "symbol" : reader.GetString(2),
Degree = reader.IsDBNull(3) ? 0 : reader.GetInt32(3),
InDegree = reader.IsDBNull(4) ? 0 : reader.GetInt32(4),
OutDegree = reader.IsDBNull(5) ? 0 : reader.GetInt32(5),
Betweenness = reader.IsDBNull(6) ? 0.0 : reader.GetDouble(6),
Closeness = reader.IsDBNull(7) ? null : reader.GetDouble(7),
NormalizedBetweenness = reader.IsDBNull(8) ? null : reader.GetDouble(8),
NormalizedDegree = reader.IsDBNull(9) ? null : reader.GetDouble(9),
ComputedAt = reader.IsDBNull(10) ? DateTimeOffset.UtcNow : reader.GetFieldValue<DateTimeOffset>(10),
ComputationDurationMs = reader.IsDBNull(11) ? null : reader.GetInt32(11),
AlgorithmVersion = reader.IsDBNull(12) ? "1.0" : reader.GetString(12),
TotalNodes = reader.IsDBNull(13) ? null : reader.GetInt32(13),
TotalEdges = reader.IsDBNull(14) ? null : reader.GetInt32(14)
};
}
private async Task EnsureTableAsync(CancellationToken cancellationToken)
{
if (_tableInitialized)
return;
const string ddl = @"
CREATE SCHEMA IF NOT EXISTS signals;
CREATE TABLE IF NOT EXISTS signals.graph_metrics (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
node_id TEXT NOT NULL,
callgraph_id TEXT NOT NULL,
node_type TEXT NOT NULL DEFAULT 'symbol',
degree_centrality INT NOT NULL DEFAULT 0,
in_degree INT NOT NULL DEFAULT 0,
out_degree INT NOT NULL DEFAULT 0,
betweenness_centrality FLOAT NOT NULL DEFAULT 0.0,
closeness_centrality FLOAT,
eigenvector_centrality FLOAT,
normalized_betweenness FLOAT,
normalized_degree FLOAT,
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
computation_duration_ms INT,
algorithm_version TEXT NOT NULL DEFAULT '1.0',
total_nodes INT,
total_edges INT,
CONSTRAINT uq_graph_metrics_node_graph UNIQUE (node_id, callgraph_id)
);
CREATE INDEX IF NOT EXISTS idx_graph_metrics_node ON signals.graph_metrics(node_id);
CREATE INDEX IF NOT EXISTS idx_graph_metrics_callgraph ON signals.graph_metrics(callgraph_id);
CREATE INDEX IF NOT EXISTS idx_graph_metrics_betweenness ON signals.graph_metrics(betweenness_centrality DESC);
CREATE INDEX IF NOT EXISTS idx_graph_metrics_computed ON signals.graph_metrics(computed_at);";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(ddl, connection);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
_tableInitialized = true;
}
}

View File

@@ -261,6 +261,72 @@ public sealed class PostgresUnknownsRepository : RepositoryBase<SignalsDataSourc
return results;
}
public async Task<IReadOnlyList<UnknownSymbolDocument>> QueryAsync(
UnknownsBand? band,
int limit,
int offset,
CancellationToken cancellationToken)
{
await EnsureTableAsync(cancellationToken).ConfigureAwait(false);
var sql = SelectAllColumns + @"
FROM signals.unknowns
WHERE 1=1";
if (band.HasValue)
{
sql += " AND band = @band";
}
sql += @"
ORDER BY score DESC, created_at DESC
LIMIT @limit OFFSET @offset";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
if (band.HasValue)
{
AddParameter(command, "@band", band.Value.ToString().ToLowerInvariant());
}
AddParameter(command, "@limit", limit);
AddParameter(command, "@offset", offset);
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
var results = new List<UnknownSymbolDocument>();
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
results.Add(MapUnknownSymbol(reader));
}
return results;
}
public async Task<UnknownSymbolDocument?> GetByIdAsync(string id, CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(id))
return null;
await EnsureTableAsync(cancellationToken).ConfigureAwait(false);
const string sql = SelectAllColumns + @"
FROM signals.unknowns
WHERE id = @id";
await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var command = CreateCommand(sql, connection);
AddParameter(command, "@id", id.Trim());
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
return null;
return MapUnknownSymbol(reader);
}
private const string SelectAllColumns = @"
SELECT id, subject_key, callgraph_id, symbol_id, code_id, purl, purl_version,
edge_from, edge_to, reason,

View File

@@ -31,6 +31,9 @@ public static class ServiceCollectionExtensions
services.AddSingleton<IReachabilityFactRepository, PostgresReachabilityFactRepository>();
services.AddSingleton<IUnknownsRepository, PostgresUnknownsRepository>();
services.AddSingleton<IReachabilityStoreRepository, PostgresReachabilityStoreRepository>();
services.AddSingleton<IDeploymentRefsRepository, PostgresDeploymentRefsRepository>();
services.AddSingleton<IGraphMetricsRepository, PostgresGraphMetricsRepository>();
services.AddSingleton<ICallGraphQueryRepository, PostgresCallGraphQueryRepository>();
return services;
}
@@ -53,6 +56,9 @@ public static class ServiceCollectionExtensions
services.AddSingleton<IReachabilityFactRepository, PostgresReachabilityFactRepository>();
services.AddSingleton<IUnknownsRepository, PostgresUnknownsRepository>();
services.AddSingleton<IReachabilityStoreRepository, PostgresReachabilityStoreRepository>();
services.AddSingleton<IDeploymentRefsRepository, PostgresDeploymentRefsRepository>();
services.AddSingleton<IGraphMetricsRepository, PostgresGraphMetricsRepository>();
services.AddSingleton<ICallGraphQueryRepository, PostgresCallGraphQueryRepository>();
return services;
}

View File

@@ -1,3 +1,4 @@
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
@@ -12,4 +13,52 @@ public interface IDeploymentRefsRepository
/// Counts distinct deployments referencing a package.
/// </summary>
Task<int> CountDeploymentsAsync(string purl, CancellationToken cancellationToken = default);
/// <summary>
/// Gets deployment IDs referencing a package.
/// </summary>
Task<IReadOnlyList<string>> GetDeploymentIdsAsync(string purl, int limit, CancellationToken cancellationToken = default);
/// <summary>
/// Records or updates a deployment reference.
/// </summary>
Task UpsertAsync(DeploymentRef deployment, CancellationToken cancellationToken = default);
/// <summary>
/// Records multiple deployment references in a batch.
/// </summary>
Task BulkUpsertAsync(IEnumerable<DeploymentRef> deployments, CancellationToken cancellationToken = default);
/// <summary>
/// Gets deployment summary for a package.
/// </summary>
Task<DeploymentSummary?> GetSummaryAsync(string purl, CancellationToken cancellationToken = default);
}
/// <summary>
/// Represents a deployment reference record.
/// </summary>
public sealed class DeploymentRef
{
public required string Purl { get; init; }
public string? PurlVersion { get; init; }
public required string ImageId { get; init; }
public string? ImageDigest { get; init; }
public required string Environment { get; init; }
public string? Namespace { get; init; }
public string? Cluster { get; init; }
public string? Region { get; init; }
}
/// <summary>
/// Summary of deployments for a package.
/// </summary>
public sealed class DeploymentSummary
{
public required string Purl { get; init; }
public int ImageCount { get; init; }
public int EnvironmentCount { get; init; }
public int TotalDeployments { get; init; }
public DateTimeOffset? LastDeployment { get; init; }
public DateTimeOffset? FirstDeployment { get; init; }
}

View File

@@ -1,3 +1,5 @@
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
@@ -15,11 +17,53 @@ public interface IGraphMetricsRepository
string symbolId,
string callgraphId,
CancellationToken cancellationToken = default);
/// <summary>
/// Stores computed metrics for a node.
/// </summary>
Task UpsertAsync(GraphMetrics metrics, CancellationToken cancellationToken = default);
/// <summary>
/// Bulk stores metrics for a call graph.
/// </summary>
Task BulkUpsertAsync(IEnumerable<GraphMetrics> metrics, CancellationToken cancellationToken = default);
/// <summary>
/// Gets callgraph IDs that need recomputation (older than threshold).
/// </summary>
Task<IReadOnlyList<string>> GetStaleCallgraphsAsync(
TimeSpan maxAge,
int limit,
CancellationToken cancellationToken = default);
/// <summary>
/// Deletes all metrics for a callgraph.
/// </summary>
Task DeleteByCallgraphAsync(string callgraphId, CancellationToken cancellationToken = default);
}
/// <summary>
/// Centrality metrics for a symbol.
/// </summary>
public sealed record GraphMetrics(
int Degree,
double Betweenness);
public sealed class GraphMetrics
{
public required string NodeId { get; init; }
public required string CallgraphId { get; init; }
public string NodeType { get; init; } = "symbol";
public int Degree { get; init; }
public int InDegree { get; init; }
public int OutDegree { get; init; }
public double Betweenness { get; init; }
public double? Closeness { get; init; }
public double? NormalizedBetweenness { get; init; }
public double? NormalizedDegree { get; init; }
public DateTimeOffset ComputedAt { get; init; }
public int? ComputationDurationMs { get; init; }
public string AlgorithmVersion { get; init; } = "1.0";
public int? TotalNodes { get; init; }
public int? TotalEdges { get; init; }
}

View File

@@ -1,4 +1,7 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
@@ -7,6 +10,7 @@ namespace StellaOps.Signals.Persistence;
public sealed class InMemoryDeploymentRefsRepository : IDeploymentRefsRepository
{
private readonly ConcurrentDictionary<string, int> _deploymentsByPurl = new(StringComparer.OrdinalIgnoreCase);
private readonly ConcurrentDictionary<string, List<DeploymentRef>> _refsByPurl = new(StringComparer.OrdinalIgnoreCase);
public void SetDeployments(string purl, int deployments)
{
@@ -28,6 +32,82 @@ public sealed class InMemoryDeploymentRefsRepository : IDeploymentRefsRepository
return Task.FromResult(0);
}
return Task.FromResult(_deploymentsByPurl.TryGetValue(purl.Trim(), out var count) ? count : 0);
var key = purl.Trim();
if (_deploymentsByPurl.TryGetValue(key, out var count))
return Task.FromResult(count);
if (_refsByPurl.TryGetValue(key, out var refs))
return Task.FromResult(refs.Count);
return Task.FromResult(0);
}
public Task<IReadOnlyList<string>> GetDeploymentIdsAsync(string purl, int limit, CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
if (string.IsNullOrWhiteSpace(purl))
return Task.FromResult<IReadOnlyList<string>>(Array.Empty<string>());
if (_refsByPurl.TryGetValue(purl.Trim(), out var refs))
return Task.FromResult<IReadOnlyList<string>>(refs.Take(limit).Select(r => r.ImageId).ToList());
return Task.FromResult<IReadOnlyList<string>>(Array.Empty<string>());
}
public Task UpsertAsync(DeploymentRef deployment, CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
ArgumentNullException.ThrowIfNull(deployment);
var key = deployment.Purl.Trim();
_refsByPurl.AddOrUpdate(
key,
_ => new List<DeploymentRef> { deployment },
(_, list) =>
{
var existing = list.FindIndex(r =>
r.ImageId == deployment.ImageId &&
r.Environment == deployment.Environment);
if (existing >= 0)
list[existing] = deployment;
else
list.Add(deployment);
return list;
});
return Task.CompletedTask;
}
public Task BulkUpsertAsync(IEnumerable<DeploymentRef> deployments, CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
ArgumentNullException.ThrowIfNull(deployments);
foreach (var deployment in deployments)
{
UpsertAsync(deployment, cancellationToken).GetAwaiter().GetResult();
}
return Task.CompletedTask;
}
public Task<DeploymentSummary?> GetSummaryAsync(string purl, CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
if (string.IsNullOrWhiteSpace(purl))
return Task.FromResult<DeploymentSummary?>(null);
if (!_refsByPurl.TryGetValue(purl.Trim(), out var refs) || refs.Count == 0)
return Task.FromResult<DeploymentSummary?>(null);
return Task.FromResult<DeploymentSummary?>(new DeploymentSummary
{
Purl = purl,
ImageCount = refs.Select(r => r.ImageId).Distinct().Count(),
EnvironmentCount = refs.Select(r => r.Environment).Distinct().Count(),
TotalDeployments = refs.Count
});
}
}

View File

@@ -1,4 +1,7 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
@@ -30,6 +33,64 @@ public sealed class InMemoryGraphMetricsRepository : IGraphMetricsRepository
return Task.FromResult(_metrics.TryGetValue(key, out var metrics) ? metrics : null);
}
public Task UpsertAsync(GraphMetrics metrics, CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
ArgumentNullException.ThrowIfNull(metrics);
var key = BuildKey(metrics.NodeId, metrics.CallgraphId);
_metrics[key] = metrics;
return Task.CompletedTask;
}
public Task BulkUpsertAsync(IEnumerable<GraphMetrics> metrics, CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
ArgumentNullException.ThrowIfNull(metrics);
foreach (var m in metrics)
{
var key = BuildKey(m.NodeId, m.CallgraphId);
_metrics[key] = m;
}
return Task.CompletedTask;
}
public Task<IReadOnlyList<string>> GetStaleCallgraphsAsync(TimeSpan maxAge, int limit, CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
var cutoff = DateTimeOffset.UtcNow - maxAge;
var staleGraphs = _metrics.Values
.Where(m => m.ComputedAt < cutoff)
.Select(m => m.CallgraphId)
.Distinct()
.Take(limit)
.ToList();
return Task.FromResult<IReadOnlyList<string>>(staleGraphs);
}
public Task DeleteByCallgraphAsync(string callgraphId, CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
if (string.IsNullOrWhiteSpace(callgraphId))
return Task.CompletedTask;
var keysToRemove = _metrics.Keys
.Where(k => k.StartsWith(callgraphId.Trim() + "|", StringComparison.OrdinalIgnoreCase))
.ToList();
foreach (var key in keysToRemove)
{
_metrics.TryRemove(key, out _);
}
return Task.CompletedTask;
}
private static string BuildKey(string symbolId, string callgraphId)
=> $"{callgraphId.Trim()}|{symbolId.Trim()}";
}

View File

@@ -289,5 +289,19 @@ public class ReachabilityScoringServiceTests
return Task.FromResult<IReadOnlyList<UnknownSymbolDocument>>(
Stored.Where(x => x.Band == band).Take(limit).ToList());
}
public Task<IReadOnlyList<UnknownSymbolDocument>> QueryAsync(UnknownsBand? band, int limit, int offset, CancellationToken cancellationToken)
{
var query = Stored.AsEnumerable();
if (band.HasValue)
query = query.Where(x => x.Band == band.Value);
return Task.FromResult<IReadOnlyList<UnknownSymbolDocument>>(
query.Skip(offset).Take(limit).ToList());
}
public Task<UnknownSymbolDocument?> GetByIdAsync(string id, CancellationToken cancellationToken)
{
return Task.FromResult(Stored.FirstOrDefault(x => x.Id == id));
}
}
}

View File

@@ -475,6 +475,20 @@ public class UnknownsDecayServiceTests
return Task.FromResult<IReadOnlyList<UnknownSymbolDocument>>(
_stored.Where(x => x.Band == band).Take(limit).ToList());
}
public Task<IReadOnlyList<UnknownSymbolDocument>> QueryAsync(UnknownsBand? band, int limit, int offset, CancellationToken cancellationToken)
{
var query = _stored.AsEnumerable();
if (band.HasValue)
query = query.Where(x => x.Band == band.Value);
return Task.FromResult<IReadOnlyList<UnknownSymbolDocument>>(
query.Skip(offset).Take(limit).ToList());
}
public Task<UnknownSymbolDocument?> GetByIdAsync(string id, CancellationToken cancellationToken)
{
return Task.FromResult(_stored.FirstOrDefault(x => x.Id == id));
}
}
private sealed class InMemoryDeploymentRefsRepository : IDeploymentRefsRepository
@@ -492,6 +506,13 @@ public class UnknownsDecayServiceTests
{
return Task.FromResult<IReadOnlyList<string>>(Array.Empty<string>());
}
public Task UpsertAsync(DeploymentRef deployment, CancellationToken cancellationToken) => Task.CompletedTask;
public Task BulkUpsertAsync(IEnumerable<DeploymentRef> deployments, CancellationToken cancellationToken) => Task.CompletedTask;
public Task<DeploymentSummary?> GetSummaryAsync(string purl, CancellationToken cancellationToken) =>
Task.FromResult<DeploymentSummary?>(null);
}
private sealed class InMemoryGraphMetricsRepository : IGraphMetricsRepository
@@ -508,6 +529,15 @@ public class UnknownsDecayServiceTests
_metrics.TryGetValue($"{symbolId}:{callgraphId}", out var metrics);
return Task.FromResult(metrics);
}
public Task UpsertAsync(GraphMetrics metrics, CancellationToken cancellationToken) => Task.CompletedTask;
public Task BulkUpsertAsync(IEnumerable<GraphMetrics> metrics, CancellationToken cancellationToken) => Task.CompletedTask;
public Task<IReadOnlyList<string>> GetStaleCallgraphsAsync(TimeSpan maxAge, int limit, CancellationToken cancellationToken) =>
Task.FromResult<IReadOnlyList<string>>(Array.Empty<string>());
public Task DeleteByCallgraphAsync(string callgraphId, CancellationToken cancellationToken) => Task.CompletedTask;
}
#endregion

View File

@@ -103,5 +103,19 @@ public class UnknownsIngestionServiceTests
return Task.FromResult<IReadOnlyList<UnknownSymbolDocument>>(
Stored.Where(x => x.Band == band).Take(limit).ToList());
}
public Task<IReadOnlyList<UnknownSymbolDocument>> QueryAsync(UnknownsBand? band, int limit, int offset, CancellationToken cancellationToken)
{
var query = Stored.AsEnumerable();
if (band.HasValue)
query = query.Where(x => x.Band == band.Value);
return Task.FromResult<IReadOnlyList<UnknownSymbolDocument>>(
query.Skip(offset).Take(limit).ToList());
}
public Task<UnknownSymbolDocument?> GetByIdAsync(string id, CancellationToken cancellationToken)
{
return Task.FromResult(Stored.FirstOrDefault(x => x.Id == id));
}
}
}

View File

@@ -0,0 +1,759 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using FluentAssertions;
using Microsoft.Extensions.Logging.Abstractions;
using MsOptions = Microsoft.Extensions.Options;
using StellaOps.Signals.Models;
using StellaOps.Signals.Options;
using StellaOps.Signals.Persistence;
using StellaOps.Signals.Services;
using Xunit;
namespace StellaOps.Signals.Tests;
/// <summary>
/// Integration tests for the unknowns scoring system.
/// Tests end-to-end flow: ingest → score → persist → query.
/// </summary>
public sealed class UnknownsScoringIntegrationTests
{
private readonly MockTimeProvider _timeProvider;
private readonly FullInMemoryUnknownsRepository _unknownsRepo;
private readonly InMemoryDeploymentRefsRepository _deploymentRefs;
private readonly InMemoryGraphMetricsRepository _graphMetrics;
private readonly UnknownsScoringOptions _defaultOptions;
public UnknownsScoringIntegrationTests()
{
_timeProvider = new MockTimeProvider(new DateTimeOffset(2025, 12, 15, 12, 0, 0, TimeSpan.Zero));
_unknownsRepo = new FullInMemoryUnknownsRepository();
_deploymentRefs = new InMemoryDeploymentRefsRepository();
_graphMetrics = new InMemoryGraphMetricsRepository();
_defaultOptions = new UnknownsScoringOptions();
}
private UnknownsScoringService CreateService(UnknownsScoringOptions? options = null)
{
return new UnknownsScoringService(
_unknownsRepo,
_deploymentRefs,
_graphMetrics,
MsOptions.Options.Create(options ?? _defaultOptions),
_timeProvider,
NullLogger<UnknownsScoringService>.Instance);
}
#region End-to-End Flow Tests
[Fact]
public async Task EndToEnd_IngestScoreAndQueryByBand()
{
// Arrange: Create unknowns with varying factors
var now = _timeProvider.GetUtcNow();
var subjectKey = "test|1.0.0";
var unknowns = new List<UnknownSymbolDocument>
{
// High-priority unknown (should be HOT)
new()
{
Id = "unknown-hot",
SubjectKey = subjectKey,
Purl = "pkg:npm/critical-pkg@1.0.0",
SymbolId = "sym-hot",
CallgraphId = "cg-1",
LastAnalyzedAt = now.AddDays(-14),
Flags = new UnknownFlags
{
NoProvenanceAnchor = true,
VersionRange = true,
ConflictingFeeds = true,
MissingVector = true
},
CreatedAt = now.AddDays(-20)
},
// Medium-priority unknown (should be WARM)
new()
{
Id = "unknown-warm",
SubjectKey = subjectKey,
Purl = "pkg:npm/moderate-pkg@2.0.0",
SymbolId = "sym-warm",
CallgraphId = "cg-1",
LastAnalyzedAt = now.AddDays(-7),
Flags = new UnknownFlags
{
NoProvenanceAnchor = true,
VersionRange = true
},
CreatedAt = now.AddDays(-10)
},
// Low-priority unknown (should be COLD)
new()
{
Id = "unknown-cold",
SubjectKey = subjectKey,
Purl = "pkg:npm/low-pkg@3.0.0",
LastAnalyzedAt = now,
Flags = new UnknownFlags(),
CreatedAt = now.AddDays(-1)
}
};
// Set up deployment refs for popularity factor
_deploymentRefs.SetDeploymentCount("pkg:npm/critical-pkg@1.0.0", 100);
_deploymentRefs.SetDeploymentCount("pkg:npm/moderate-pkg@2.0.0", 50);
_deploymentRefs.SetDeploymentCount("pkg:npm/low-pkg@3.0.0", 1);
// Set up graph metrics for centrality factor
_graphMetrics.SetMetrics("sym-hot", "cg-1", new GraphMetrics { NodeId = "sym-hot", CallgraphId = "cg-1", Degree = 20, Betweenness = 800.0 });
_graphMetrics.SetMetrics("sym-warm", "cg-1", new GraphMetrics { NodeId = "sym-warm", CallgraphId = "cg-1", Degree = 10, Betweenness = 300.0 });
// Act 1: Ingest unknowns
await _unknownsRepo.UpsertAsync(subjectKey, unknowns, CancellationToken.None);
// Act 2: Score all unknowns
var service = CreateService();
var result = await service.RecomputeAsync(subjectKey, CancellationToken.None);
// Assert: Verify scoring result
result.TotalUnknowns.Should().Be(3);
result.SubjectKey.Should().Be(subjectKey);
// Act 3: Query by band
var hotItems = await _unknownsRepo.QueryAsync(UnknownsBand.Hot, 10, 0, CancellationToken.None);
var warmItems = await _unknownsRepo.QueryAsync(UnknownsBand.Warm, 10, 0, CancellationToken.None);
var coldItems = await _unknownsRepo.QueryAsync(UnknownsBand.Cold, 10, 0, CancellationToken.None);
// Assert: Verify band distribution
hotItems.Should().Contain(u => u.Id == "unknown-hot");
warmItems.Should().Contain(u => u.Id == "unknown-warm");
coldItems.Should().Contain(u => u.Id == "unknown-cold");
// Verify scores are persisted
var hotUnknown = await _unknownsRepo.GetByIdAsync("unknown-hot", CancellationToken.None);
hotUnknown.Should().NotBeNull();
hotUnknown!.Score.Should().BeGreaterThanOrEqualTo(_defaultOptions.HotThreshold);
hotUnknown.NormalizationTrace.Should().NotBeNull();
}
[Fact]
public async Task EndToEnd_RecomputePreservesExistingData()
{
// Arrange
var now = _timeProvider.GetUtcNow();
var subjectKey = "preserve|1.0.0";
var unknowns = new List<UnknownSymbolDocument>
{
new()
{
Id = "preserve-1",
SubjectKey = subjectKey,
Purl = "pkg:npm/preserve@1.0.0",
Reason = "Missing symbol resolution",
EdgeFrom = "caller",
EdgeTo = "target",
LastAnalyzedAt = now.AddDays(-5),
Flags = new UnknownFlags { NoProvenanceAnchor = true },
CreatedAt = now.AddDays(-10)
}
};
await _unknownsRepo.UpsertAsync(subjectKey, unknowns, CancellationToken.None);
// Act: Score
var service = CreateService();
await service.RecomputeAsync(subjectKey, CancellationToken.None);
// Assert: Original data preserved
var retrieved = await _unknownsRepo.GetByIdAsync("preserve-1", CancellationToken.None);
retrieved.Should().NotBeNull();
retrieved!.Reason.Should().Be("Missing symbol resolution");
retrieved.EdgeFrom.Should().Be("caller");
retrieved.EdgeTo.Should().Be("target");
retrieved.SubjectKey.Should().Be(subjectKey);
}
[Fact]
public async Task EndToEnd_MultipleSubjectsIndependent()
{
// Arrange: Create unknowns in two different subjects
var now = _timeProvider.GetUtcNow();
var subject1Unknowns = new List<UnknownSymbolDocument>
{
new()
{
Id = "s1-unknown",
SubjectKey = "subject1|1.0.0",
Purl = "pkg:npm/s1pkg@1.0.0",
LastAnalyzedAt = now.AddDays(-14),
Flags = new UnknownFlags { NoProvenanceAnchor = true, VersionRange = true },
CreatedAt = now.AddDays(-20)
}
};
var subject2Unknowns = new List<UnknownSymbolDocument>
{
new()
{
Id = "s2-unknown",
SubjectKey = "subject2|2.0.0",
LastAnalyzedAt = now,
Flags = new UnknownFlags(),
CreatedAt = now.AddDays(-1)
}
};
await _unknownsRepo.UpsertAsync("subject1|1.0.0", subject1Unknowns, CancellationToken.None);
await _unknownsRepo.UpsertAsync("subject2|2.0.0", subject2Unknowns, CancellationToken.None);
// Act: Score each subject independently
var service = CreateService();
var result1 = await service.RecomputeAsync("subject1|1.0.0", CancellationToken.None);
var result2 = await service.RecomputeAsync("subject2|2.0.0", CancellationToken.None);
// Assert: Each subject scored independently
result1.SubjectKey.Should().Be("subject1|1.0.0");
result1.TotalUnknowns.Should().Be(1);
result2.SubjectKey.Should().Be("subject2|2.0.0");
result2.TotalUnknowns.Should().Be(1);
// Verify different bands
var s1 = await _unknownsRepo.GetByIdAsync("s1-unknown", CancellationToken.None);
var s2 = await _unknownsRepo.GetByIdAsync("s2-unknown", CancellationToken.None);
s1!.Score.Should().BeGreaterThan(s2!.Score, "S1 has more uncertainty flags");
}
#endregion
#region Rescan Scheduling Tests
[Fact]
public async Task Rescan_GetDueForRescan_ReturnsCorrectBandItems()
{
// Arrange: Create unknowns with different bands
var now = _timeProvider.GetUtcNow();
var subjectKey = "rescan|1.0.0";
var unknowns = new List<UnknownSymbolDocument>
{
new()
{
Id = "hot-rescan",
SubjectKey = subjectKey,
Band = UnknownsBand.Hot,
NextScheduledRescan = now.AddMinutes(-5), // Due
CreatedAt = now.AddDays(-1)
},
new()
{
Id = "warm-rescan",
SubjectKey = subjectKey,
Band = UnknownsBand.Warm,
NextScheduledRescan = now.AddHours(12), // Not due
CreatedAt = now.AddDays(-1)
},
new()
{
Id = "cold-rescan",
SubjectKey = subjectKey,
Band = UnknownsBand.Cold,
NextScheduledRescan = now.AddDays(7), // Not due
CreatedAt = now.AddDays(-1)
}
};
await _unknownsRepo.UpsertAsync(subjectKey, unknowns, CancellationToken.None);
// Act: Query due for rescan
var hotDue = await _unknownsRepo.GetDueForRescanAsync(UnknownsBand.Hot, 10, CancellationToken.None);
var warmDue = await _unknownsRepo.GetDueForRescanAsync(UnknownsBand.Warm, 10, CancellationToken.None);
// Assert
hotDue.Should().Contain(u => u.Id == "hot-rescan");
warmDue.Should().NotContain(u => u.Id == "warm-rescan", "WARM item not yet due");
}
[Fact]
public async Task Rescan_NextScheduledRescan_SetByBand()
{
// Arrange
var now = _timeProvider.GetUtcNow();
var subjectKey = "schedule|1.0.0";
var unknowns = new List<UnknownSymbolDocument>
{
new()
{
Id = "schedule-hot",
SubjectKey = subjectKey,
Purl = "pkg:npm/schedule@1.0.0",
LastAnalyzedAt = now.AddDays(-14),
Flags = new UnknownFlags
{
NoProvenanceAnchor = true,
VersionRange = true,
ConflictingFeeds = true,
MissingVector = true
},
CreatedAt = now.AddDays(-20)
},
new()
{
Id = "schedule-cold",
SubjectKey = subjectKey,
LastAnalyzedAt = now,
Flags = new UnknownFlags(),
CreatedAt = now.AddDays(-1)
}
};
_deploymentRefs.SetDeploymentCount("pkg:npm/schedule@1.0.0", 100);
await _unknownsRepo.UpsertAsync(subjectKey, unknowns, CancellationToken.None);
// Act
var service = CreateService();
await service.RecomputeAsync(subjectKey, CancellationToken.None);
// Assert
var hot = await _unknownsRepo.GetByIdAsync("schedule-hot", CancellationToken.None);
var cold = await _unknownsRepo.GetByIdAsync("schedule-cold", CancellationToken.None);
if (hot!.Band == UnknownsBand.Hot)
{
hot.NextScheduledRescan.Should().Be(now.AddMinutes(_defaultOptions.HotRescanMinutes));
}
cold!.NextScheduledRescan.Should().Be(now.AddDays(_defaultOptions.ColdRescanDays));
}
#endregion
#region Query and Pagination Tests
[Fact]
public async Task Query_PaginationWorks()
{
// Arrange
var now = _timeProvider.GetUtcNow();
var subjectKey = "pagination|1.0.0";
var unknowns = Enumerable.Range(1, 20)
.Select(i => new UnknownSymbolDocument
{
Id = $"page-{i:D2}",
SubjectKey = subjectKey,
Band = UnknownsBand.Warm,
CreatedAt = now.AddDays(-i)
})
.ToList();
await _unknownsRepo.UpsertAsync(subjectKey, unknowns, CancellationToken.None);
// Act: Query with pagination
var page1 = await _unknownsRepo.QueryAsync(UnknownsBand.Warm, limit: 5, offset: 0, CancellationToken.None);
var page2 = await _unknownsRepo.QueryAsync(UnknownsBand.Warm, limit: 5, offset: 5, CancellationToken.None);
// Assert
page1.Should().HaveCount(5);
page2.Should().HaveCount(5);
page1.Select(u => u.Id).Should().NotIntersectWith(page2.Select(u => u.Id));
}
[Fact]
public async Task Query_FilterByBandReturnsOnlyMatchingItems()
{
// Arrange
var now = _timeProvider.GetUtcNow();
var unknowns = new List<UnknownSymbolDocument>
{
new() { Id = "hot-1", SubjectKey = "filter|1.0.0", Band = UnknownsBand.Hot, CreatedAt = now },
new() { Id = "hot-2", SubjectKey = "filter|1.0.0", Band = UnknownsBand.Hot, CreatedAt = now },
new() { Id = "warm-1", SubjectKey = "filter|1.0.0", Band = UnknownsBand.Warm, CreatedAt = now },
new() { Id = "cold-1", SubjectKey = "filter|1.0.0", Band = UnknownsBand.Cold, CreatedAt = now }
};
await _unknownsRepo.UpsertAsync("filter|1.0.0", unknowns, CancellationToken.None);
// Act
var hotOnly = await _unknownsRepo.QueryAsync(UnknownsBand.Hot, 10, 0, CancellationToken.None);
var warmOnly = await _unknownsRepo.QueryAsync(UnknownsBand.Warm, 10, 0, CancellationToken.None);
var all = await _unknownsRepo.QueryAsync(null, 10, 0, CancellationToken.None);
// Assert
hotOnly.Should().HaveCount(2);
hotOnly.Should().AllSatisfy(u => u.Band.Should().Be(UnknownsBand.Hot));
warmOnly.Should().HaveCount(1);
warmOnly.Single().Band.Should().Be(UnknownsBand.Warm);
all.Should().HaveCount(4);
}
#endregion
#region Explain / Normalization Trace Tests
[Fact]
public async Task Explain_NormalizationTraceContainsAllFactors()
{
// Arrange
var now = _timeProvider.GetUtcNow();
var subjectKey = "explain|1.0.0";
var unknowns = new List<UnknownSymbolDocument>
{
new()
{
Id = "explain-1",
SubjectKey = subjectKey,
Purl = "pkg:npm/explain@1.0.0",
SymbolId = "sym-explain",
CallgraphId = "cg-explain",
LastAnalyzedAt = now.AddDays(-7),
Flags = new UnknownFlags
{
NoProvenanceAnchor = true,
VersionRange = true
},
CreatedAt = now.AddDays(-10)
}
};
_deploymentRefs.SetDeploymentCount("pkg:npm/explain@1.0.0", 75);
_graphMetrics.SetMetrics("sym-explain", "cg-explain", new GraphMetrics { NodeId = "sym-explain", CallgraphId = "cg-explain", Degree = 15, Betweenness = 450.0 });
await _unknownsRepo.UpsertAsync(subjectKey, unknowns, CancellationToken.None);
// Act
var service = CreateService();
await service.RecomputeAsync(subjectKey, CancellationToken.None);
// Assert: Get by ID and verify trace
var explained = await _unknownsRepo.GetByIdAsync("explain-1", CancellationToken.None);
explained.Should().NotBeNull();
var trace = explained!.NormalizationTrace;
trace.Should().NotBeNull();
// Verify all factors are traced
trace!.Weights.Should().ContainKey("wP");
trace.Weights.Should().ContainKey("wE");
trace.Weights.Should().ContainKey("wU");
trace.Weights.Should().ContainKey("wC");
trace.Weights.Should().ContainKey("wS");
// Verify popularity trace
trace.RawPopularity.Should().Be(75);
trace.NormalizedPopularity.Should().BeInRange(0.0, 1.0);
trace.PopularityFormula.Should().Contain("75");
// Verify uncertainty trace
trace.ActiveFlags.Should().Contain("NoProvenanceAnchor");
trace.ActiveFlags.Should().Contain("VersionRange");
trace.NormalizedUncertainty.Should().BeInRange(0.0, 1.0);
// Verify centrality trace
trace.RawCentrality.Should().Be(450.0);
trace.NormalizedCentrality.Should().BeInRange(0.0, 1.0);
// Verify staleness trace
trace.RawStaleness.Should().Be(7);
trace.NormalizedStaleness.Should().BeInRange(0.0, 1.0);
// Verify final score
trace.FinalScore.Should().Be(explained.Score);
trace.AssignedBand.Should().Be(explained.Band.ToString());
}
[Fact]
public async Task Explain_TraceEnablesReplay()
{
// Arrange: Score an unknown
var now = _timeProvider.GetUtcNow();
var subjectKey = "replay|1.0.0";
var unknowns = new List<UnknownSymbolDocument>
{
new()
{
Id = "replay-1",
SubjectKey = subjectKey,
Purl = "pkg:npm/replay@1.0.0",
LastAnalyzedAt = now.AddDays(-10),
Flags = new UnknownFlags { NoProvenanceAnchor = true },
CreatedAt = now.AddDays(-15)
}
};
_deploymentRefs.SetDeploymentCount("pkg:npm/replay@1.0.0", 30);
await _unknownsRepo.UpsertAsync(subjectKey, unknowns, CancellationToken.None);
var service = CreateService();
await service.RecomputeAsync(subjectKey, CancellationToken.None);
// Act: Retrieve and verify we can replay the score from trace
var scored = await _unknownsRepo.GetByIdAsync("replay-1", CancellationToken.None);
var trace = scored!.NormalizationTrace!;
// Replay: weighted sum of normalized factors
var replayedScore =
trace.Weights["wP"] * trace.NormalizedPopularity +
trace.Weights["wE"] * trace.NormalizedExploitPotential +
trace.Weights["wU"] * trace.NormalizedUncertainty +
trace.Weights["wC"] * trace.NormalizedCentrality +
trace.Weights["wS"] * trace.NormalizedStaleness;
// Assert: Replayed score matches
replayedScore.Should().BeApproximately(trace.FinalScore, 0.001);
replayedScore.Should().BeApproximately(scored.Score, 0.001);
}
#endregion
#region Determinism Tests
[Fact]
public async Task Determinism_SameInputsProduceSameScores()
{
// Arrange
var now = _timeProvider.GetUtcNow();
// Create two identical unknowns in different subjects
var unknown1 = new UnknownSymbolDocument
{
Id = "det-1",
SubjectKey = "det-subject1|1.0.0",
Purl = "pkg:npm/determinism@1.0.0",
SymbolId = "sym-det",
CallgraphId = "cg-det",
LastAnalyzedAt = now.AddDays(-5),
Flags = new UnknownFlags { NoProvenanceAnchor = true, VersionRange = true },
CreatedAt = now.AddDays(-10)
};
var unknown2 = new UnknownSymbolDocument
{
Id = "det-2",
SubjectKey = "det-subject2|1.0.0",
Purl = "pkg:npm/determinism@1.0.0",
SymbolId = "sym-det",
CallgraphId = "cg-det",
LastAnalyzedAt = now.AddDays(-5),
Flags = new UnknownFlags { NoProvenanceAnchor = true, VersionRange = true },
CreatedAt = now.AddDays(-10)
};
_deploymentRefs.SetDeploymentCount("pkg:npm/determinism@1.0.0", 42);
_graphMetrics.SetMetrics("sym-det", "cg-det", new GraphMetrics { NodeId = "sym-det", CallgraphId = "cg-det", Degree = 8, Betweenness = 200.0 });
await _unknownsRepo.UpsertAsync("det-subject1|1.0.0", new[] { unknown1 }, CancellationToken.None);
await _unknownsRepo.UpsertAsync("det-subject2|1.0.0", new[] { unknown2 }, CancellationToken.None);
// Act
var service = CreateService();
await service.RecomputeAsync("det-subject1|1.0.0", CancellationToken.None);
await service.RecomputeAsync("det-subject2|1.0.0", CancellationToken.None);
// Assert
var scored1 = await _unknownsRepo.GetByIdAsync("det-1", CancellationToken.None);
var scored2 = await _unknownsRepo.GetByIdAsync("det-2", CancellationToken.None);
scored1!.Score.Should().Be(scored2!.Score);
scored1.Band.Should().Be(scored2.Band);
scored1.PopularityScore.Should().Be(scored2.PopularityScore);
scored1.UncertaintyScore.Should().Be(scored2.UncertaintyScore);
scored1.CentralityScore.Should().Be(scored2.CentralityScore);
scored1.StalenessScore.Should().Be(scored2.StalenessScore);
}
[Fact]
public async Task Determinism_ConsecutiveRecomputesProduceSameResults()
{
// Arrange
var now = _timeProvider.GetUtcNow();
var subjectKey = "consecutive|1.0.0";
var unknowns = new List<UnknownSymbolDocument>
{
new()
{
Id = "consec-1",
SubjectKey = subjectKey,
Purl = "pkg:npm/consecutive@1.0.0",
LastAnalyzedAt = now.AddDays(-3),
Flags = new UnknownFlags { NoProvenanceAnchor = true },
CreatedAt = now.AddDays(-5)
}
};
_deploymentRefs.SetDeploymentCount("pkg:npm/consecutive@1.0.0", 25);
await _unknownsRepo.UpsertAsync(subjectKey, unknowns, CancellationToken.None);
// Act: Score twice
var service = CreateService();
var result1 = await service.RecomputeAsync(subjectKey, CancellationToken.None);
var scored1 = await _unknownsRepo.GetByIdAsync("consec-1", CancellationToken.None);
var score1 = scored1!.Score;
var result2 = await service.RecomputeAsync(subjectKey, CancellationToken.None);
var scored2 = await _unknownsRepo.GetByIdAsync("consec-1", CancellationToken.None);
var score2 = scored2!.Score;
// Assert
score1.Should().Be(score2);
result1.HotCount.Should().Be(result2.HotCount);
result1.WarmCount.Should().Be(result2.WarmCount);
result1.ColdCount.Should().Be(result2.ColdCount);
}
#endregion
#region Test Infrastructure
private sealed class MockTimeProvider : TimeProvider
{
private DateTimeOffset _now;
public MockTimeProvider(DateTimeOffset now) => _now = now;
public override DateTimeOffset GetUtcNow() => _now;
public void Advance(TimeSpan duration) => _now = _now.Add(duration);
}
private sealed class FullInMemoryUnknownsRepository : IUnknownsRepository
{
private readonly List<UnknownSymbolDocument> _stored = new();
public Task UpsertAsync(string subjectKey, IEnumerable<UnknownSymbolDocument> items, CancellationToken cancellationToken)
{
_stored.RemoveAll(x => x.SubjectKey == subjectKey);
_stored.AddRange(items);
return Task.CompletedTask;
}
public Task<IReadOnlyList<UnknownSymbolDocument>> GetBySubjectAsync(string subjectKey, CancellationToken cancellationToken)
{
return Task.FromResult<IReadOnlyList<UnknownSymbolDocument>>(
_stored.Where(x => x.SubjectKey == subjectKey).ToList());
}
public Task<int> CountBySubjectAsync(string subjectKey, CancellationToken cancellationToken)
{
return Task.FromResult(_stored.Count(x => x.SubjectKey == subjectKey));
}
public Task BulkUpdateAsync(IEnumerable<UnknownSymbolDocument> items, CancellationToken cancellationToken)
{
foreach (var item in items)
{
var existing = _stored.FindIndex(x => x.Id == item.Id);
if (existing >= 0)
_stored[existing] = item;
else
_stored.Add(item);
}
return Task.CompletedTask;
}
public Task<IReadOnlyList<string>> GetAllSubjectKeysAsync(CancellationToken cancellationToken)
{
return Task.FromResult<IReadOnlyList<string>>(
_stored.Select(x => x.SubjectKey).Distinct().ToList());
}
public Task<IReadOnlyList<UnknownSymbolDocument>> GetDueForRescanAsync(
UnknownsBand band,
int limit,
CancellationToken cancellationToken)
{
var now = DateTimeOffset.UtcNow;
return Task.FromResult<IReadOnlyList<UnknownSymbolDocument>>(
_stored
.Where(x => x.Band == band && (x.NextScheduledRescan == null || x.NextScheduledRescan <= now))
.Take(limit)
.ToList());
}
public Task<IReadOnlyList<UnknownSymbolDocument>> QueryAsync(
UnknownsBand? band,
int limit,
int offset,
CancellationToken cancellationToken)
{
var query = _stored.AsEnumerable();
if (band.HasValue)
{
query = query.Where(x => x.Band == band.Value);
}
return Task.FromResult<IReadOnlyList<UnknownSymbolDocument>>(
query.Skip(offset).Take(limit).ToList());
}
public Task<UnknownSymbolDocument?> GetByIdAsync(string id, CancellationToken cancellationToken)
{
return Task.FromResult(_stored.FirstOrDefault(x => x.Id == id));
}
}
private sealed class InMemoryDeploymentRefsRepository : IDeploymentRefsRepository
{
private readonly Dictionary<string, int> _counts = new();
public void SetDeploymentCount(string purl, int count) => _counts[purl] = count;
public Task<int> CountDeploymentsAsync(string purl, CancellationToken cancellationToken)
{
return Task.FromResult(_counts.TryGetValue(purl, out var count) ? count : 0);
}
public Task<IReadOnlyList<string>> GetDeploymentIdsAsync(string purl, int limit, CancellationToken cancellationToken)
{
return Task.FromResult<IReadOnlyList<string>>(Array.Empty<string>());
}
public Task UpsertAsync(DeploymentRef deployment, CancellationToken cancellationToken) => Task.CompletedTask;
public Task BulkUpsertAsync(IEnumerable<DeploymentRef> deployments, CancellationToken cancellationToken) => Task.CompletedTask;
public Task<DeploymentSummary?> GetSummaryAsync(string purl, CancellationToken cancellationToken) =>
Task.FromResult<DeploymentSummary?>(null);
}
private sealed class InMemoryGraphMetricsRepository : IGraphMetricsRepository
{
private readonly Dictionary<string, GraphMetrics> _metrics = new();
public void SetMetrics(string symbolId, string callgraphId, GraphMetrics metrics)
{
_metrics[$"{symbolId}:{callgraphId}"] = metrics;
}
public Task<GraphMetrics?> GetMetricsAsync(string symbolId, string callgraphId, CancellationToken cancellationToken)
{
_metrics.TryGetValue($"{symbolId}:{callgraphId}", out var metrics);
return Task.FromResult(metrics);
}
public Task UpsertAsync(GraphMetrics metrics, CancellationToken cancellationToken) => Task.CompletedTask;
public Task BulkUpsertAsync(IEnumerable<GraphMetrics> metrics, CancellationToken cancellationToken) => Task.CompletedTask;
public Task<IReadOnlyList<string>> GetStaleCallgraphsAsync(TimeSpan maxAge, int limit, CancellationToken cancellationToken) =>
Task.FromResult<IReadOnlyList<string>>(Array.Empty<string>());
public Task DeleteByCallgraphAsync(string callgraphId, CancellationToken cancellationToken) => Task.CompletedTask;
}
#endregion
}

View File

@@ -297,7 +297,7 @@ public class UnknownsScoringServiceTests
};
_deploymentRefs.SetDeploymentCount("pkg:npm/test@1.0.0", 50);
_graphMetrics.SetMetrics("sym-1", "cg-1", new GraphMetrics(Degree: 10, Betweenness: 500.0));
_graphMetrics.SetMetrics("sym-1", "cg-1", new GraphMetrics { NodeId = "sym-1", CallgraphId = "cg-1", Degree = 10, Betweenness = 500.0 });
var scored = await service.ScoreUnknownAsync(unknown, _defaultOptions, CancellationToken.None);
@@ -495,6 +495,20 @@ public class UnknownsScoringServiceTests
return Task.FromResult<IReadOnlyList<UnknownSymbolDocument>>(
_stored.Where(x => x.Band == band).Take(limit).ToList());
}
public Task<IReadOnlyList<UnknownSymbolDocument>> QueryAsync(UnknownsBand? band, int limit, int offset, CancellationToken cancellationToken)
{
var query = _stored.AsEnumerable();
if (band.HasValue)
query = query.Where(x => x.Band == band.Value);
return Task.FromResult<IReadOnlyList<UnknownSymbolDocument>>(
query.Skip(offset).Take(limit).ToList());
}
public Task<UnknownSymbolDocument?> GetByIdAsync(string id, CancellationToken cancellationToken)
{
return Task.FromResult(_stored.FirstOrDefault(x => x.Id == id));
}
}
private sealed class InMemoryDeploymentRefsRepository : IDeploymentRefsRepository
@@ -512,6 +526,13 @@ public class UnknownsScoringServiceTests
{
return Task.FromResult<IReadOnlyList<string>>(Array.Empty<string>());
}
public Task UpsertAsync(DeploymentRef deployment, CancellationToken cancellationToken) => Task.CompletedTask;
public Task BulkUpsertAsync(IEnumerable<DeploymentRef> deployments, CancellationToken cancellationToken) => Task.CompletedTask;
public Task<DeploymentSummary?> GetSummaryAsync(string purl, CancellationToken cancellationToken) =>
Task.FromResult<DeploymentSummary?>(null);
}
private sealed class InMemoryGraphMetricsRepository : IGraphMetricsRepository
@@ -528,6 +549,15 @@ public class UnknownsScoringServiceTests
_metrics.TryGetValue($"{symbolId}:{callgraphId}", out var metrics);
return Task.FromResult(metrics);
}
public Task UpsertAsync(GraphMetrics metrics, CancellationToken cancellationToken) => Task.CompletedTask;
public Task BulkUpsertAsync(IEnumerable<GraphMetrics> metrics, CancellationToken cancellationToken) => Task.CompletedTask;
public Task<IReadOnlyList<string>> GetStaleCallgraphsAsync(TimeSpan maxAge, int limit, CancellationToken cancellationToken) =>
Task.FromResult<IReadOnlyList<string>>(Array.Empty<string>());
public Task DeleteByCallgraphAsync(string callgraphId, CancellationToken cancellationToken) => Task.CompletedTask;
}
#endregion