save checkpoint

This commit is contained in:
master
2026-02-12 21:02:43 +02:00
parent 5bca406787
commit 9911b7d73c
593 changed files with 174390 additions and 1376 deletions

View File

@@ -9,7 +9,9 @@ using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
@@ -87,8 +89,24 @@ internal sealed class DpkgPackageAnalyzer : OsPackageAnalyzerBase
var provides = entry.Provides.ToArray();
var fileEvidence = BuildFileEvidence(infoDirectory, entry, evidenceFactory, cancellationToken);
var changelogEntries = ReadChangelogEntries(context.RootPath, fileEvidence, cancellationToken);
var changelogBugMappings = ChangelogBugReferenceExtractor.Extract(changelogEntries.ToArray());
var cveHints = CveHintExtractor.Extract(entry.Description, string.Join(' ', dependencies), string.Join(' ', provides));
if (changelogBugMappings.BugReferences.Count > 0)
{
vendorMetadata["changelogBugRefs"] = changelogBugMappings.ToBugReferencesMetadataValue();
}
if (changelogBugMappings.BugToCves.Count > 0)
{
vendorMetadata["changelogBugToCves"] = changelogBugMappings.ToBugToCvesMetadataValue();
}
var cveHints = CveHintExtractor.Extract(
entry.Description,
string.Join(' ', dependencies),
string.Join(' ', provides),
string.Join('\n', changelogEntries));
var record = new OSPackageRecord(
AnalyzerId,
@@ -247,6 +265,83 @@ internal sealed class DpkgPackageAnalyzer : OsPackageAnalyzerBase
return new ReadOnlyCollection<OSPackageFileEvidence>(evidence);
}
private static IReadOnlyList<string> ReadChangelogEntries(
string rootPath,
IReadOnlyList<OSPackageFileEvidence> files,
CancellationToken cancellationToken)
{
var entries = new List<string>();
foreach (var file in files)
{
cancellationToken.ThrowIfCancellationRequested();
if (!LooksLikeChangelog(file.Path))
{
continue;
}
var relativePath = file.Path.TrimStart('/', '\\').Replace('/', Path.DirectorySeparatorChar);
if (string.IsNullOrWhiteSpace(relativePath))
{
continue;
}
var fullPath = Path.Combine(rootPath, relativePath);
if (!File.Exists(fullPath))
{
continue;
}
var changelogText = TryReadChangelogFile(fullPath);
if (string.IsNullOrWhiteSpace(changelogText))
{
continue;
}
entries.Add(changelogText);
}
return new ReadOnlyCollection<string>(entries);
}
private static bool LooksLikeChangelog(string path)
=> path.EndsWith("changelog", StringComparison.OrdinalIgnoreCase)
|| path.EndsWith("changelog.gz", StringComparison.OrdinalIgnoreCase)
|| path.Contains("/changelog.", StringComparison.OrdinalIgnoreCase)
|| path.Contains("\\changelog.", StringComparison.OrdinalIgnoreCase);
private static string? TryReadChangelogFile(string fullPath)
{
const int maxChars = 256 * 1024;
try
{
using var fileStream = File.OpenRead(fullPath);
using Stream contentStream = fullPath.EndsWith(".gz", StringComparison.OrdinalIgnoreCase)
? new GZipStream(fileStream, CompressionMode.Decompress)
: fileStream;
using var reader = new StreamReader(contentStream, Encoding.UTF8, detectEncodingFromByteOrderMarks: true);
var buffer = new char[maxChars];
var read = reader.ReadBlock(buffer, 0, buffer.Length);
return read <= 0 ? null : new string(buffer, 0, read);
}
catch (IOException)
{
return null;
}
catch (InvalidDataException)
{
return null;
}
catch (UnauthorizedAccessException)
{
return null;
}
}
private static IEnumerable<string> GetInfoFileCandidates(string packageName, string architecture)
{
yield return packageName + ":" + architecture;

View File

@@ -69,6 +69,18 @@ internal sealed class RpmPackageAnalyzer : OsPackageAnalyzerBase
vendorMetadata[$"rpm:{kvp.Key}"] = kvp.Value;
}
var changelogBugMappings = ChangelogBugReferenceExtractor.Extract(
header.ChangeLogs.ToArray());
if (changelogBugMappings.BugReferences.Count > 0)
{
vendorMetadata["changelogBugRefs"] = changelogBugMappings.ToBugReferencesMetadataValue();
}
if (changelogBugMappings.BugToCves.Count > 0)
{
vendorMetadata["changelogBugToCves"] = changelogBugMappings.ToBugToCvesMetadataValue();
}
var provides = ComposeRelations(header.Provides, header.ProvideVersions);
var requires = ComposeRelations(header.Requires, header.RequireVersions);

View File

@@ -0,0 +1,183 @@
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Linq;
using System.Text.RegularExpressions;
namespace StellaOps.Scanner.Analyzers.OS.Helpers;
public static partial class ChangelogBugReferenceExtractor
{
public static ChangelogBugReferenceExtractionResult Extract(params string?[] changelogInputs)
{
if (changelogInputs is null || changelogInputs.Length == 0)
{
return ChangelogBugReferenceExtractionResult.Empty;
}
var bugReferences = new SortedSet<string>(StringComparer.Ordinal);
var bugToCves = new SortedDictionary<string, SortedSet<string>>(StringComparer.Ordinal);
foreach (var input in changelogInputs)
{
if (string.IsNullOrWhiteSpace(input))
{
continue;
}
foreach (var entry in SplitEntries(input))
{
if (string.IsNullOrWhiteSpace(entry))
{
continue;
}
var cves = ExtractCves(entry);
var bugsInEntry = ExtractBugs(entry);
foreach (var bug in bugsInEntry)
{
bugReferences.Add(bug);
}
if (cves.Count == 0 || bugsInEntry.Count == 0)
{
continue;
}
foreach (var bug in bugsInEntry)
{
if (!bugToCves.TryGetValue(bug, out var mapped))
{
mapped = new SortedSet<string>(StringComparer.Ordinal);
bugToCves[bug] = mapped;
}
mapped.UnionWith(cves);
}
}
}
if (bugReferences.Count == 0)
{
return ChangelogBugReferenceExtractionResult.Empty;
}
var immutableMap = new ReadOnlyDictionary<string, IReadOnlyList<string>>(
bugToCves.ToDictionary(
pair => pair.Key,
pair => (IReadOnlyList<string>)new ReadOnlyCollection<string>(pair.Value.ToArray()),
StringComparer.Ordinal));
return new ChangelogBugReferenceExtractionResult(
new ReadOnlyCollection<string>(bugReferences.ToArray()),
immutableMap);
}
private static IReadOnlyList<string> SplitEntries(string input)
{
var entries = new List<string>();
foreach (var paragraph in EntrySeparatorRegex().Split(input))
{
if (string.IsNullOrWhiteSpace(paragraph))
{
continue;
}
foreach (var line in paragraph.Split('\n'))
{
var trimmed = line.Trim();
if (trimmed.Length == 0)
{
continue;
}
entries.Add(trimmed);
}
}
return entries.Count == 0
? new[] { input.Trim() }
: entries;
}
private static IReadOnlyList<string> ExtractCves(string entry)
{
var cves = new SortedSet<string>(StringComparer.Ordinal);
foreach (Match match in CveRegex().Matches(entry))
{
cves.Add(match.Value.ToUpperInvariant());
}
return cves.ToArray();
}
private static IReadOnlyList<string> ExtractBugs(string entry)
{
var bugs = new SortedSet<string>(StringComparer.Ordinal);
foreach (Match closesMatch in DebianClosesRegex().Matches(entry))
{
foreach (Match idMatch in HashBugIdRegex().Matches(closesMatch.Value))
{
bugs.Add($"debian:#{idMatch.Groups["id"].Value}");
}
}
foreach (Match rhbz in RhbzRegex().Matches(entry))
{
bugs.Add($"rhbz:#{rhbz.Groups["id"].Value}");
}
foreach (Match lp in LaunchpadShortRegex().Matches(entry))
{
bugs.Add($"launchpad:#{lp.Groups["id"].Value}");
}
foreach (Match lp in LaunchpadLongRegex().Matches(entry))
{
bugs.Add($"launchpad:#{lp.Groups["id"].Value}");
}
return bugs.ToArray();
}
[GeneratedRegex(@"(?:\r?\n){2,}", RegexOptions.Compiled)]
private static partial Regex EntrySeparatorRegex();
[GeneratedRegex(@"CVE-\d{4}-\d{4,7}", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex CveRegex();
[GeneratedRegex(@"\bCloses\s*:\s*(?:#\d+[,\s]*)+", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex DebianClosesRegex();
[GeneratedRegex(@"#(?<id>\d+)", RegexOptions.Compiled)]
private static partial Regex HashBugIdRegex();
[GeneratedRegex(@"\bRHBZ\s*#\s*(?<id>\d+)\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex RhbzRegex();
[GeneratedRegex(@"\bLP\s*:\s*#\s*(?<id>\d+)\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex LaunchpadShortRegex();
[GeneratedRegex(@"\bLaunchpad(?:\s+bug)?\s*#\s*(?<id>\d+)\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex LaunchpadLongRegex();
}
public sealed record ChangelogBugReferenceExtractionResult(
IReadOnlyList<string> BugReferences,
IReadOnlyDictionary<string, IReadOnlyList<string>> BugToCves)
{
public static ChangelogBugReferenceExtractionResult Empty { get; } = new(
Array.Empty<string>(),
new ReadOnlyDictionary<string, IReadOnlyList<string>>(
new Dictionary<string, IReadOnlyList<string>>(0, StringComparer.Ordinal)));
public string ToBugReferencesMetadataValue()
=> string.Join(",", BugReferences);
public string ToBugToCvesMetadataValue()
=> string.Join(
";",
BugToCves.Select(static pair => $"{pair.Key}=>{string.Join('|', pair.Value)}"));
}

View File

@@ -4,5 +4,6 @@ Source of truth: `docs/implplan/SPRINT_20260130_002_Tools_csproj_remediation_sol
| Task ID | Status | Notes |
| --- | --- | --- |
| QA-SCANNER-VERIFY-010 | DONE | Implemented deterministic changelog bug-id to CVE mapping (`Closes`, `RHBZ`, `LP`) for OS analyzers with Tier 0/1/2 evidence in run-001. |
| REMED-06-SOLID | DOING | SOLID review for OS analyzer files (Tier 0 remediation batch) in progress. |
| REMED-06 | DONE | SOLID review notes captured for SPRINT_20260130_002. |

View File

@@ -48,6 +48,8 @@ public static class ScanAnalysisKeys
public const string ReplaySealedBundleMetadata = "analysis.replay.sealed.bundle";
public const string BinaryVulnerabilityFindings = "analysis.binary.findings";
public const string BinaryBuildIdMappings = "analysis.binary.buildid.mappings";
public const string BinaryPatchVerificationResult = "analysis.binary.patchverification.result";
// Sprint: SPRINT_3500_0001_0001 - Proof of Exposure
public const string VulnerabilityMatches = "analysis.poe.vulnerability.matches";

View File

@@ -4,5 +4,6 @@ Source of truth: `docs/implplan/SPRINT_20260130_002_Tools_csproj_remediation_sol
| Task ID | Status | Notes |
| --- | --- | --- |
| QA-SCANNER-VERIFY-009 | DONE | `SPRINT_20260212_002_Scanner_unchecked_feature_verification_batch1.md`: extended binary analysis contracts with Build-ID mapping and patch-verification analysis keys for worker runtime wiring (2026-02-12). |
| REMED-05 | TODO | Remediation checklist: docs/implplan/audits/csproj-standards/remediation/checklists/src/Scanner/__Libraries/StellaOps.Scanner.Core/StellaOps.Scanner.Core.md. |
| REMED-06 | DONE | SOLID review notes captured for SPRINT_20260130_002. |

View File

@@ -159,7 +159,8 @@ public sealed record EntryTraceGraph(
ImmutableArray<EntryTraceEdge> Edges,
ImmutableArray<EntryTraceDiagnostic> Diagnostics,
ImmutableArray<EntryTracePlan> Plans,
ImmutableArray<EntryTraceTerminal> Terminals);
ImmutableArray<EntryTraceTerminal> Terminals,
EntryTraceBinaryIntelligence? BinaryIntelligence = null);
/// <summary>
/// Describes a classified terminal executable.
@@ -188,6 +189,41 @@ public sealed record EntryTraceTerminal(
string WorkingDirectory,
ImmutableArray<string> Arguments);
/// <summary>
/// Binary intelligence attached to an entry trace graph.
/// </summary>
public sealed record EntryTraceBinaryIntelligence(
ImmutableArray<EntryTraceBinaryTarget> Targets,
int TotalTargets,
int AnalyzedTargets,
int TotalVulnerableMatches,
DateTimeOffset GeneratedAtUtc);
/// <summary>
/// Binary analysis summary for one resolved terminal target.
/// </summary>
public sealed record EntryTraceBinaryTarget(
string Path,
string BinaryHash,
string Architecture,
string Format,
int FunctionCount,
int RecoveredSymbolCount,
int SourceCorrelationCount,
int VulnerableMatchCount,
ImmutableArray<EntryTraceBinaryVulnerability> VulnerableMatches);
/// <summary>
/// Vulnerability evidence from binary intelligence matching.
/// </summary>
public sealed record EntryTraceBinaryVulnerability(
string VulnerabilityId,
string? FunctionName,
string SourcePackage,
string VulnerableFunctionName,
float MatchConfidence,
string Severity);
/// <summary>
/// Represents a fallback entrypoint candidate inferred from image metadata or filesystem.
/// </summary>

View File

@@ -40,6 +40,7 @@ public static class EntryTraceGraphSerializer
public List<EntryTraceDiagnosticContract> Diagnostics { get; set; } = new();
public List<EntryTracePlanContract> Plans { get; set; } = new();
public List<EntryTraceTerminalContract> Terminals { get; set; } = new();
public EntryTraceBinaryIntelligenceContract? BinaryIntelligence { get; set; }
public static EntryTraceGraphContract FromGraph(EntryTraceGraph graph)
{
@@ -50,7 +51,10 @@ public static class EntryTraceGraphSerializer
Edges = graph.Edges.Select(EntryTraceEdgeContract.FromEdge).ToList(),
Diagnostics = graph.Diagnostics.Select(EntryTraceDiagnosticContract.FromDiagnostic).ToList(),
Plans = graph.Plans.Select(EntryTracePlanContract.FromPlan).ToList(),
Terminals = graph.Terminals.Select(EntryTraceTerminalContract.FromTerminal).ToList()
Terminals = graph.Terminals.Select(EntryTraceTerminalContract.FromTerminal).ToList(),
BinaryIntelligence = graph.BinaryIntelligence is null
? null
: EntryTraceBinaryIntelligenceContract.FromBinaryIntelligence(graph.BinaryIntelligence)
};
}
@@ -62,7 +66,116 @@ public static class EntryTraceGraphSerializer
Edges.Select(e => e.ToEdge()).ToImmutableArray(),
Diagnostics.Select(d => d.ToDiagnostic()).ToImmutableArray(),
Plans.Select(p => p.ToPlan()).ToImmutableArray(),
Terminals.Select(t => t.ToTerminal()).ToImmutableArray());
Terminals.Select(t => t.ToTerminal()).ToImmutableArray(),
BinaryIntelligence?.ToBinaryIntelligence());
}
}
private sealed class EntryTraceBinaryIntelligenceContract
{
public List<EntryTraceBinaryTargetContract> Targets { get; set; } = new();
public int TotalTargets { get; set; }
public int AnalyzedTargets { get; set; }
public int TotalVulnerableMatches { get; set; }
public DateTimeOffset GeneratedAtUtc { get; set; }
public static EntryTraceBinaryIntelligenceContract FromBinaryIntelligence(EntryTraceBinaryIntelligence intelligence)
{
return new EntryTraceBinaryIntelligenceContract
{
Targets = intelligence.Targets.Select(EntryTraceBinaryTargetContract.FromBinaryTarget).ToList(),
TotalTargets = intelligence.TotalTargets,
AnalyzedTargets = intelligence.AnalyzedTargets,
TotalVulnerableMatches = intelligence.TotalVulnerableMatches,
GeneratedAtUtc = intelligence.GeneratedAtUtc
};
}
public EntryTraceBinaryIntelligence ToBinaryIntelligence()
{
return new EntryTraceBinaryIntelligence(
Targets.Select(target => target.ToBinaryTarget()).ToImmutableArray(),
TotalTargets,
AnalyzedTargets,
TotalVulnerableMatches,
GeneratedAtUtc);
}
}
private sealed class EntryTraceBinaryTargetContract
{
public string Path { get; set; } = string.Empty;
public string BinaryHash { get; set; } = string.Empty;
public string Architecture { get; set; } = string.Empty;
public string Format { get; set; } = string.Empty;
public int FunctionCount { get; set; }
public int RecoveredSymbolCount { get; set; }
public int SourceCorrelationCount { get; set; }
public int VulnerableMatchCount { get; set; }
public List<EntryTraceBinaryVulnerabilityContract> VulnerableMatches { get; set; } = new();
public static EntryTraceBinaryTargetContract FromBinaryTarget(EntryTraceBinaryTarget target)
{
return new EntryTraceBinaryTargetContract
{
Path = target.Path,
BinaryHash = target.BinaryHash,
Architecture = target.Architecture,
Format = target.Format,
FunctionCount = target.FunctionCount,
RecoveredSymbolCount = target.RecoveredSymbolCount,
SourceCorrelationCount = target.SourceCorrelationCount,
VulnerableMatchCount = target.VulnerableMatchCount,
VulnerableMatches = target.VulnerableMatches.Select(EntryTraceBinaryVulnerabilityContract.FromBinaryVulnerability).ToList()
};
}
public EntryTraceBinaryTarget ToBinaryTarget()
{
return new EntryTraceBinaryTarget(
Path,
BinaryHash,
Architecture,
Format,
FunctionCount,
RecoveredSymbolCount,
SourceCorrelationCount,
VulnerableMatchCount,
VulnerableMatches.Select(vulnerability => vulnerability.ToBinaryVulnerability()).ToImmutableArray());
}
}
private sealed class EntryTraceBinaryVulnerabilityContract
{
public string VulnerabilityId { get; set; } = string.Empty;
public string? FunctionName { get; set; }
public string SourcePackage { get; set; } = string.Empty;
public string VulnerableFunctionName { get; set; } = string.Empty;
public float MatchConfidence { get; set; }
public string Severity { get; set; } = string.Empty;
public static EntryTraceBinaryVulnerabilityContract FromBinaryVulnerability(EntryTraceBinaryVulnerability vulnerability)
{
return new EntryTraceBinaryVulnerabilityContract
{
VulnerabilityId = vulnerability.VulnerabilityId,
FunctionName = vulnerability.FunctionName,
SourcePackage = vulnerability.SourcePackage,
VulnerableFunctionName = vulnerability.VulnerableFunctionName,
MatchConfidence = vulnerability.MatchConfidence,
Severity = vulnerability.Severity
};
}
public EntryTraceBinaryVulnerability ToBinaryVulnerability()
{
return new EntryTraceBinaryVulnerability(
VulnerabilityId,
FunctionName,
SourcePackage,
VulnerableFunctionName,
MatchConfidence,
Severity);
}
}

View File

@@ -4,5 +4,6 @@ Source of truth: `docs/implplan/SPRINT_20260130_002_Tools_csproj_remediation_sol
| Task ID | Status | Notes |
| --- | --- | --- |
| QA-SCANNER-VERIFY-008 | DONE | `SPRINT_20260212_002_Scanner_unchecked_feature_verification_batch1.md`: binary intelligence graph contract/serializer path verified with run-002 Tier 1/Tier 2 evidence and dossier promotion (2026-02-12). |
| REMED-05 | TODO | Remediation checklist: docs/implplan/audits/csproj-standards/remediation/checklists/src/Scanner/__Libraries/StellaOps.Scanner.EntryTrace/StellaOps.Scanner.EntryTrace.md. |
| REMED-06 | DONE | SOLID review notes captured for SPRINT_20260130_002. |

View File

@@ -13,7 +13,8 @@ public sealed class PostgresArtifactBomRepository : IArtifactBomRepository
private readonly ScannerDataSource _dataSource;
private readonly ILogger<PostgresArtifactBomRepository> _logger;
private string SchemaName => _dataSource.SchemaName ?? ScannerDataSource.DefaultSchema;
// Artifact BOM projection migrations/functions are currently bound to the default scanner schema.
private const string SchemaName = ScannerDataSource.DefaultSchema;
private string TableName => $"{SchemaName}.artifact_boms";
public PostgresArtifactBomRepository(