This commit is contained in:
StellaOps Bot
2025-12-14 23:20:14 +02:00
parent 3411e825cd
commit b058dbe031
356 changed files with 68310 additions and 1108 deletions

View File

@@ -1,10 +1,11 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
@@ -71,9 +72,46 @@ internal sealed class CallgraphIngestionService : ICallgraphIngestionService
: normalized.SchemaVersion;
var analyzerMeta = request.Analyzer ?? normalized.Analyzer;
parseStream.Position = 0;
var artifactHash = ComputeSha256(artifactBytes);
var graphHash = ComputeGraphHash(normalized);
var document = new CallgraphDocument
{
Language = parser.Language,
LanguageType = CallgraphLanguage.Unknown,
Component = request.Component,
Version = request.Version,
Nodes = new List<CallgraphNode>(normalized.Nodes),
Edges = new List<CallgraphEdge>(normalized.Edges),
Roots = new List<CallgraphRoot>(normalized.Roots),
Entrypoints = normalized.Entrypoints is null
? new List<CallgraphEntrypoint>()
: new List<CallgraphEntrypoint>(normalized.Entrypoints),
Metadata = request.Metadata is null
? null
: new Dictionary<string, string?>(request.Metadata, StringComparer.OrdinalIgnoreCase),
Artifact = new CallgraphArtifactMetadata
{
ContentType = request.ArtifactContentType
},
IngestedAt = timeProvider.GetUtcNow()
};
document.Metadata ??= new Dictionary<string, string?>(StringComparer.OrdinalIgnoreCase);
document.Metadata["formatVersion"] = normalized.FormatVersion;
document.Metadata["schemaVersion"] = schemaVersion;
if (analyzerMeta is not null)
{
foreach (var kv in analyzerMeta)
{
document.Metadata[$"analyzer.{kv.Key}"] = kv.Value;
}
}
document.SchemaVersion = schemaVersion;
document = CallgraphSchemaMigrator.EnsureV1(document);
var graphHash = ComputeGraphHash(document);
document.GraphHash = graphHash;
var manifest = new CallgraphManifest
{
@@ -83,9 +121,9 @@ internal sealed class CallgraphIngestionService : ICallgraphIngestionService
ArtifactHash = artifactHash,
GraphHash = graphHash,
SchemaVersion = schemaVersion,
NodeCount = normalized.Nodes.Count,
EdgeCount = normalized.Edges.Count,
RootCount = normalized.Roots.Count,
NodeCount = document.Nodes.Count,
EdgeCount = document.Edges.Count,
RootCount = document.Roots?.Count ?? 0,
CreatedAt = timeProvider.GetUtcNow()
};
@@ -106,43 +144,14 @@ internal sealed class CallgraphIngestionService : ICallgraphIngestionService
parseStream,
cancellationToken).ConfigureAwait(false);
var document = new CallgraphDocument
{
Language = parser.Language,
Component = request.Component,
Version = request.Version,
Nodes = new List<CallgraphNode>(normalized.Nodes),
Edges = new List<CallgraphEdge>(normalized.Edges),
Roots = new List<CallgraphRoot>(normalized.Roots),
Metadata = request.Metadata is null
? null
: new Dictionary<string, string?>(request.Metadata, StringComparer.OrdinalIgnoreCase),
Artifact = new CallgraphArtifactMetadata
{
Path = artifactMetadata.Path,
Hash = artifactMetadata.Hash,
CasUri = artifactMetadata.CasUri,
ManifestPath = artifactMetadata.ManifestPath,
ManifestCasUri = artifactMetadata.ManifestCasUri,
GraphHash = graphHash,
ContentType = artifactMetadata.ContentType,
Length = artifactMetadata.Length
},
IngestedAt = timeProvider.GetUtcNow()
};
document.Metadata ??= new Dictionary<string, string?>(StringComparer.OrdinalIgnoreCase);
document.Metadata["formatVersion"] = normalized.FormatVersion;
document.Metadata["schemaVersion"] = schemaVersion;
if (analyzerMeta is not null)
{
foreach (var kv in analyzerMeta)
{
document.Metadata[$"analyzer.{kv.Key}"] = kv.Value;
}
}
document.GraphHash = graphHash;
document.SchemaVersion = schemaVersion;
document.Artifact.Path = artifactMetadata.Path;
document.Artifact.Hash = artifactMetadata.Hash;
document.Artifact.CasUri = artifactMetadata.CasUri;
document.Artifact.ManifestPath = artifactMetadata.ManifestPath;
document.Artifact.ManifestCasUri = artifactMetadata.ManifestCasUri;
document.Artifact.GraphHash = graphHash;
document.Artifact.ContentType = artifactMetadata.ContentType;
document.Artifact.Length = artifactMetadata.Length;
document = await repository.UpsertAsync(document, cancellationToken).ConfigureAwait(false);
@@ -166,7 +175,7 @@ internal sealed class CallgraphIngestionService : ICallgraphIngestionService
document.Artifact.Path,
document.Artifact.Hash,
document.Artifact.CasUri,
graphHash,
document.GraphHash,
document.Artifact.ManifestCasUri,
schemaVersion,
document.Nodes.Count,
@@ -216,13 +225,14 @@ internal sealed class CallgraphIngestionService : ICallgraphIngestionService
return Convert.ToHexString(hash);
}
private static string ComputeGraphHash(CallgraphParseResult result)
private static string ComputeGraphHash(CallgraphDocument document)
{
var builder = new StringBuilder();
builder.Append("schema|").Append(result.SchemaVersion).AppendLine();
builder.Append("schema|").Append(document.Schema).AppendLine();
builder.Append("language|").Append(document.LanguageType).Append('|').Append(document.Language).AppendLine();
foreach (var node in result.Nodes.OrderBy(n => n.Id, StringComparer.Ordinal))
foreach (var node in document.Nodes.OrderBy(n => n.Id, StringComparer.Ordinal))
{
builder
.Append(node.Id).Append('|')
@@ -236,29 +246,62 @@ internal sealed class CallgraphIngestionService : ICallgraphIngestionService
.Append(node.BuildId).Append('|')
.Append(node.CodeId).Append('|')
.Append(node.Language).Append('|')
.Append(node.SymbolKey).Append('|')
.Append(node.ArtifactKey).Append('|')
.Append(node.Visibility).Append('|')
.Append(node.IsEntrypointCandidate).Append('|')
.Append(node.Flags).Append('|')
.Append(Join(node.Evidence)).Append('|')
.Append(JoinDict(node.Analyzer))
.Append(JoinDict(node.Analyzer)).Append('|')
.Append(JoinDict(node.Attributes))
.AppendLine();
}
foreach (var edge in result.Edges.OrderBy(e => e.SourceId, StringComparer.Ordinal).ThenBy(e => e.TargetId, StringComparer.Ordinal))
foreach (var edge in document.Edges
.OrderBy(e => e.SourceId, StringComparer.Ordinal)
.ThenBy(e => e.TargetId, StringComparer.Ordinal)
.ThenBy(e => e.Type, StringComparer.Ordinal)
.ThenBy(e => e.Offset ?? -1))
{
builder
.Append(edge.SourceId).Append("->").Append(edge.TargetId).Append('|')
.Append(edge.Type).Append('|')
.Append(edge.Kind).Append('|')
.Append(edge.Reason).Append('|')
.Append(edge.Weight.ToString("G17", CultureInfo.InvariantCulture)).Append('|')
.Append(edge.Offset?.ToString(CultureInfo.InvariantCulture) ?? string.Empty).Append('|')
.Append(edge.IsResolved).Append('|')
.Append(edge.Provenance).Append('|')
.Append(edge.Purl).Append('|')
.Append(edge.SymbolDigest).Append('|')
.Append(edge.Confidence?.ToString() ?? string.Empty).Append('|')
.Append(edge.Confidence?.ToString("G17", CultureInfo.InvariantCulture) ?? string.Empty).Append('|')
.Append(Join(edge.Candidates)).Append('|')
.Append(Join(edge.Evidence))
.AppendLine();
}
foreach (var root in result.Roots.OrderBy(r => r.Id, StringComparer.Ordinal))
foreach (var root in (document.Roots ?? new List<CallgraphRoot>()).OrderBy(r => r.Id, StringComparer.Ordinal))
{
builder.Append("root|").Append(root.Id).Append('|').Append(root.Phase).Append('|').Append(root.Source).AppendLine();
}
foreach (var entrypoint in document.Entrypoints
.OrderBy(e => (int)e.Phase)
.ThenBy(e => e.Order)
.ThenBy(e => e.NodeId, StringComparer.Ordinal))
{
builder
.Append("entrypoint|").Append(entrypoint.NodeId).Append('|')
.Append(entrypoint.Kind).Append('|')
.Append(entrypoint.Framework).Append('|')
.Append(entrypoint.Phase).Append('|')
.Append(entrypoint.Route).Append('|')
.Append(entrypoint.HttpMethod).Append('|')
.Append(entrypoint.Source).Append('|')
.Append(entrypoint.Order.ToString(CultureInfo.InvariantCulture))
.AppendLine();
}
return ComputeSha256(Encoding.UTF8.GetBytes(builder.ToString()));
}
@@ -286,6 +329,21 @@ internal sealed class CallgraphIngestionService : ICallgraphIngestionService
}
return ordered.ToString();
}
private static string JoinDict(IReadOnlyDictionary<string, string>? values)
{
if (values is null)
{
return string.Empty;
}
var ordered = new StringBuilder();
foreach (var kv in values.OrderBy(k => k.Key, StringComparer.Ordinal))
{
ordered.Append(kv.Key).Append('=').Append(kv.Value).Append(';');
}
return ordered.ToString();
}
}
/// <summary>

View File

@@ -35,6 +35,7 @@ internal sealed class CallgraphNormalizationService : ICallgraphNormalizationSer
var edges = NormalizeEdges(result.Edges, nodesById);
var roots = NormalizeRoots(result.Roots);
var entrypoints = NormalizeEntrypoints(result.Entrypoints, nodesById);
return new CallgraphParseResult(
Nodes: nodesById.Values.OrderBy(n => n.Id, StringComparer.Ordinal).ToList(),
@@ -42,7 +43,8 @@ internal sealed class CallgraphNormalizationService : ICallgraphNormalizationSer
Roots: roots,
FormatVersion: string.IsNullOrWhiteSpace(result.FormatVersion) ? "1.0" : result.FormatVersion.Trim(),
SchemaVersion: string.IsNullOrWhiteSpace(result.SchemaVersion) ? "1.0" : result.SchemaVersion.Trim(),
Analyzer: result.Analyzer);
Analyzer: result.Analyzer,
Entrypoints: entrypoints);
}
private static CallgraphNode NormalizeNode(CallgraphNode node, string language)
@@ -154,6 +156,79 @@ internal sealed class CallgraphNormalizationService : ICallgraphNormalizationSer
.ToList();
}
private static IReadOnlyList<CallgraphEntrypoint>? NormalizeEntrypoints(
IReadOnlyList<CallgraphEntrypoint>? entrypoints,
IReadOnlyDictionary<string, CallgraphNode> nodes)
{
if (entrypoints is null)
{
return null;
}
var list = new List<CallgraphEntrypoint>(entrypoints.Count);
var seen = new HashSet<string>(StringComparer.Ordinal);
foreach (var entrypoint in entrypoints)
{
var nodeId = entrypoint.NodeId?.Trim();
if (string.IsNullOrWhiteSpace(nodeId))
{
continue;
}
if (!nodes.ContainsKey(nodeId))
{
continue;
}
var normalized = new CallgraphEntrypoint
{
NodeId = nodeId,
Kind = entrypoint.Kind,
Route = string.IsNullOrWhiteSpace(entrypoint.Route) ? null : entrypoint.Route.Trim(),
HttpMethod = string.IsNullOrWhiteSpace(entrypoint.HttpMethod) ? null : entrypoint.HttpMethod.Trim().ToUpperInvariant(),
Framework = entrypoint.Framework,
Source = string.IsNullOrWhiteSpace(entrypoint.Source) ? null : entrypoint.Source.Trim(),
Phase = entrypoint.Phase,
Order = 0
};
var key = $"{normalized.NodeId}|{normalized.Kind}|{normalized.Framework}|{normalized.Phase}|{normalized.Route}|{normalized.HttpMethod}|{normalized.Source}";
if (seen.Add(key))
{
list.Add(normalized);
}
}
var sorted = list
.OrderBy(e => (int)e.Phase)
.ThenBy(e => e.NodeId, StringComparer.Ordinal)
.ThenBy(e => e.Kind)
.ThenBy(e => e.Framework)
.ThenBy(e => e.Route, StringComparer.Ordinal)
.ThenBy(e => e.HttpMethod, StringComparer.Ordinal)
.ThenBy(e => e.Source, StringComparer.Ordinal)
.ToList();
var orderByPhase = sorted
.GroupBy(e => e.Phase)
.OrderBy(g => (int)g.Key)
.ToList();
var ordered = new List<CallgraphEntrypoint>(sorted.Count);
foreach (var group in orderByPhase)
{
var order = 0;
foreach (var entrypoint in group)
{
entrypoint.Order = order++;
ordered.Add(entrypoint);
}
}
return ordered;
}
private static string? DeriveNamespace(string id, string? file, string language)
{
if (string.Equals(language, "java", StringComparison.OrdinalIgnoreCase))

View File

@@ -0,0 +1,38 @@
using System.Threading;
using System.Threading.Tasks;
using StellaOps.Signals.Models;
using StellaOps.Signals.Options;
namespace StellaOps.Signals.Services;
/// <summary>
/// Service for computing multi-factor scores for unknowns.
/// </summary>
public interface IUnknownsScoringService
{
/// <summary>
/// Recomputes scores for all unknowns in a subject.
/// </summary>
Task<UnknownsScoringResult> RecomputeAsync(
string subjectKey,
CancellationToken cancellationToken = default);
/// <summary>
/// Scores a single unknown using the 5-factor formula.
/// </summary>
Task<UnknownSymbolDocument> ScoreUnknownAsync(
UnknownSymbolDocument unknown,
UnknownsScoringOptions options,
CancellationToken cancellationToken = default);
}
/// <summary>
/// Result of scoring computation.
/// </summary>
public sealed record UnknownsScoringResult(
string SubjectKey,
int TotalUnknowns,
int HotCount,
int WarmCount,
int ColdCount,
System.DateTimeOffset ComputedAt);

View File

@@ -0,0 +1,279 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Signals.Models;
using StellaOps.Signals.Options;
using StellaOps.Signals.Persistence;
namespace StellaOps.Signals.Services;
/// <summary>
/// Computes multi-factor scores for unknowns and assigns triage bands.
/// </summary>
public sealed class UnknownsScoringService : IUnknownsScoringService
{
private readonly IUnknownsRepository _repository;
private readonly IDeploymentRefsRepository _deploymentRefs;
private readonly IGraphMetricsRepository _graphMetrics;
private readonly IOptions<UnknownsScoringOptions> _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<UnknownsScoringService> _logger;
public UnknownsScoringService(
IUnknownsRepository repository,
IDeploymentRefsRepository deploymentRefs,
IGraphMetricsRepository graphMetrics,
IOptions<UnknownsScoringOptions> options,
TimeProvider timeProvider,
ILogger<UnknownsScoringService> logger)
{
_repository = repository;
_deploymentRefs = deploymentRefs;
_graphMetrics = graphMetrics;
_options = options;
_timeProvider = timeProvider;
_logger = logger;
}
/// <summary>
/// Recomputes scores for all unknowns in a subject.
/// </summary>
public async Task<UnknownsScoringResult> RecomputeAsync(
string subjectKey,
CancellationToken cancellationToken = default)
{
var unknowns = await _repository.GetBySubjectAsync(subjectKey, cancellationToken);
var updated = new List<UnknownSymbolDocument>();
var opts = _options.Value;
foreach (var unknown in unknowns)
{
var scored = await ScoreUnknownAsync(unknown, opts, cancellationToken);
updated.Add(scored);
}
await _repository.BulkUpdateAsync(updated, cancellationToken);
return new UnknownsScoringResult(
SubjectKey: subjectKey,
TotalUnknowns: updated.Count,
HotCount: updated.Count(u => u.Band == UnknownsBand.Hot),
WarmCount: updated.Count(u => u.Band == UnknownsBand.Warm),
ColdCount: updated.Count(u => u.Band == UnknownsBand.Cold),
ComputedAt: _timeProvider.GetUtcNow());
}
/// <summary>
/// Scores a single unknown using the 5-factor formula.
/// </summary>
public async Task<UnknownSymbolDocument> ScoreUnknownAsync(
UnknownSymbolDocument unknown,
UnknownsScoringOptions opts,
CancellationToken cancellationToken)
{
var trace = new UnknownsNormalizationTrace
{
ComputedAt = _timeProvider.GetUtcNow(),
Weights = new Dictionary<string, double>
{
["wP"] = opts.WeightPopularity,
["wE"] = opts.WeightExploitPotential,
["wU"] = opts.WeightUncertainty,
["wC"] = opts.WeightCentrality,
["wS"] = opts.WeightStaleness
}
};
// Factor P: Popularity (deployment impact)
var (popularityScore, deploymentCount) = await ComputePopularityAsync(
unknown.Purl, opts, cancellationToken);
unknown.PopularityScore = popularityScore;
unknown.DeploymentCount = deploymentCount;
trace.RawPopularity = deploymentCount;
trace.NormalizedPopularity = popularityScore;
trace.PopularityFormula = $"min(1, log10(1 + {deploymentCount}) / log10(1 + {opts.PopularityMaxDeployments}))";
// Factor E: Exploit potential (CVE severity)
var exploitScore = ComputeExploitPotential(unknown);
unknown.ExploitPotentialScore = exploitScore;
trace.RawExploitPotential = exploitScore;
trace.NormalizedExploitPotential = exploitScore;
// Factor U: Uncertainty density (from flags)
var (uncertaintyScore, activeFlags) = ComputeUncertainty(unknown.Flags, opts);
unknown.UncertaintyScore = uncertaintyScore;
trace.RawUncertainty = uncertaintyScore;
trace.NormalizedUncertainty = Math.Min(1.0, uncertaintyScore);
trace.ActiveFlags = activeFlags;
// Factor C: Graph centrality
var (centralityScore, degree, betweenness) = await ComputeCentralityAsync(
unknown.SymbolId, unknown.CallgraphId, opts, cancellationToken);
unknown.CentralityScore = centralityScore;
unknown.DegreeCentrality = degree;
unknown.BetweennessCentrality = betweenness;
trace.RawCentrality = betweenness;
trace.NormalizedCentrality = centralityScore;
// Factor S: Evidence staleness
var (stalenessScore, daysSince) = ComputeStaleness(unknown.LastAnalyzedAt, opts);
unknown.StalenessScore = stalenessScore;
unknown.DaysSinceLastAnalysis = daysSince;
trace.RawStaleness = daysSince;
trace.NormalizedStaleness = stalenessScore;
// Composite score
var score = Math.Clamp(
opts.WeightPopularity * unknown.PopularityScore +
opts.WeightExploitPotential * unknown.ExploitPotentialScore +
opts.WeightUncertainty * unknown.UncertaintyScore +
opts.WeightCentrality * unknown.CentralityScore +
opts.WeightStaleness * unknown.StalenessScore,
0.0, 1.0);
unknown.Score = score;
trace.FinalScore = score;
// Band assignment
unknown.Band = score switch
{
>= 0.70 => UnknownsBand.Hot,
>= 0.40 => UnknownsBand.Warm,
_ => UnknownsBand.Cold
};
trace.AssignedBand = unknown.Band.ToString();
// Schedule next rescan based on band
unknown.NextScheduledRescan = unknown.Band switch
{
UnknownsBand.Hot => _timeProvider.GetUtcNow().AddMinutes(15),
UnknownsBand.Warm => _timeProvider.GetUtcNow().AddHours(opts.WarmRescanHours),
_ => _timeProvider.GetUtcNow().AddDays(opts.ColdRescanDays)
};
unknown.NormalizationTrace = trace;
unknown.UpdatedAt = _timeProvider.GetUtcNow();
_logger.LogDebug(
"Scored unknown {UnknownId}: P={P:F2} E={E:F2} U={U:F2} C={C:F2} S={S:F2} → Score={Score:F2} Band={Band}",
unknown.Id,
unknown.PopularityScore,
unknown.ExploitPotentialScore,
unknown.UncertaintyScore,
unknown.CentralityScore,
unknown.StalenessScore,
unknown.Score,
unknown.Band);
return unknown;
}
private async Task<(double Score, int DeploymentCount)> ComputePopularityAsync(
string? purl,
UnknownsScoringOptions opts,
CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(purl))
return (0.0, 0);
var deployments = await _deploymentRefs.CountDeploymentsAsync(purl, cancellationToken);
// Formula: P = min(1, log10(1 + deployments) / log10(1 + maxDeployments))
var score = Math.Min(1.0,
Math.Log10(1 + deployments) / Math.Log10(1 + opts.PopularityMaxDeployments));
return (score, deployments);
}
private static double ComputeExploitPotential(UnknownSymbolDocument unknown)
{
// If we have associated CVE severity, use it
// Otherwise, assume medium potential (0.5)
// This could be enhanced with KEV lookup, exploit DB, etc.
return 0.5;
}
private static (double Score, List<string> ActiveFlags) ComputeUncertainty(
UnknownFlags flags,
UnknownsScoringOptions opts)
{
var score = 0.0;
var activeFlags = new List<string>();
if (flags.NoProvenanceAnchor)
{
score += opts.FlagWeightNoProvenance;
activeFlags.Add("NoProvenanceAnchor");
}
if (flags.VersionRange)
{
score += opts.FlagWeightVersionRange;
activeFlags.Add("VersionRange");
}
if (flags.ConflictingFeeds)
{
score += opts.FlagWeightConflictingFeeds;
activeFlags.Add("ConflictingFeeds");
}
if (flags.MissingVector)
{
score += opts.FlagWeightMissingVector;
activeFlags.Add("MissingVector");
}
if (flags.UnreachableSourceAdvisory)
{
score += opts.FlagWeightUnreachableSource;
activeFlags.Add("UnreachableSourceAdvisory");
}
if (flags.DynamicCallTarget)
{
score += opts.FlagWeightDynamicTarget;
activeFlags.Add("DynamicCallTarget");
}
if (flags.ExternalAssembly)
{
score += opts.FlagWeightExternalAssembly;
activeFlags.Add("ExternalAssembly");
}
return (Math.Min(1.0, score), activeFlags);
}
private async Task<(double Score, int Degree, double Betweenness)> ComputeCentralityAsync(
string? symbolId,
string? callgraphId,
UnknownsScoringOptions opts,
CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(symbolId) || string.IsNullOrWhiteSpace(callgraphId))
return (0.0, 0, 0.0);
var metrics = await _graphMetrics.GetMetricsAsync(symbolId, callgraphId, cancellationToken);
if (metrics is null)
return (0.0, 0, 0.0);
// Normalize betweenness to 0-1 range
var normalizedBetweenness = Math.Min(1.0, metrics.Betweenness / opts.CentralityMaxBetweenness);
return (normalizedBetweenness, metrics.Degree, metrics.Betweenness);
}
private (double Score, int DaysSince) ComputeStaleness(
DateTimeOffset? lastAnalyzedAt,
UnknownsScoringOptions opts)
{
if (lastAnalyzedAt is null)
return (1.0, opts.StalenessMaxDays); // Never analyzed = maximum staleness
var daysSince = (int)(_timeProvider.GetUtcNow() - lastAnalyzedAt.Value).TotalDays;
// Formula: S = min(1, age_days / max_days)
var score = Math.Min(1.0, (double)daysSince / opts.StalenessMaxDays);
return (score, daysSince);
}
}