316 lines
11 KiB
C#
316 lines
11 KiB
C#
// =============================================================================
|
|
// SpdxParser.cs
|
|
// SPDX SBOM parser implementation
|
|
// Part of Step 2: Evidence Collection (Task T5)
|
|
// =============================================================================
|
|
|
|
using System.Globalization;
|
|
using System.Text.Json;
|
|
|
|
namespace StellaOps.AirGap.Importer.Reconciliation.Parsers;
|
|
|
|
/// <summary>
|
|
/// Parser for SPDX SBOM format (JSON).
|
|
/// Supports SPDX 2.2 and 2.3 schemas.
|
|
/// </summary>
|
|
public sealed class SpdxParser : ISbomParser
|
|
{
|
|
private static readonly JsonDocumentOptions DocumentOptions = new()
|
|
{
|
|
AllowTrailingCommas = true,
|
|
CommentHandling = JsonCommentHandling.Skip
|
|
};
|
|
|
|
public SbomFormat DetectFormat(string filePath)
|
|
{
|
|
ArgumentException.ThrowIfNullOrWhiteSpace(filePath);
|
|
|
|
// SPDX files typically end with .spdx.json
|
|
if (filePath.EndsWith(".spdx.json", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
return SbomFormat.Spdx;
|
|
}
|
|
|
|
// Try to detect from content
|
|
if (File.Exists(filePath))
|
|
{
|
|
try
|
|
{
|
|
using var stream = File.OpenRead(filePath);
|
|
using var reader = new StreamReader(stream);
|
|
var firstChars = new char[1024];
|
|
var read = reader.Read(firstChars, 0, firstChars.Length);
|
|
var content = new string(firstChars, 0, read);
|
|
|
|
if (content.Contains("\"spdxVersion\"", StringComparison.OrdinalIgnoreCase) ||
|
|
content.Contains("\"SPDXID\"", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
return SbomFormat.Spdx;
|
|
}
|
|
}
|
|
catch
|
|
{
|
|
// Ignore detection errors
|
|
}
|
|
}
|
|
|
|
return SbomFormat.Unknown;
|
|
}
|
|
|
|
public async Task<SbomParseResult> ParseAsync(string filePath, CancellationToken cancellationToken = default)
|
|
{
|
|
ArgumentException.ThrowIfNullOrWhiteSpace(filePath);
|
|
|
|
if (!File.Exists(filePath))
|
|
{
|
|
return SbomParseResult.Failure($"File not found: {filePath}", SbomFormat.Spdx);
|
|
}
|
|
|
|
try
|
|
{
|
|
await using var stream = File.OpenRead(filePath);
|
|
return await ParseAsync(stream, SbomFormat.Spdx, cancellationToken);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
return SbomParseResult.Failure($"Failed to parse SPDX file: {ex.Message}", SbomFormat.Spdx);
|
|
}
|
|
}
|
|
|
|
public async Task<SbomParseResult> ParseAsync(Stream stream, SbomFormat format, CancellationToken cancellationToken = default)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(stream);
|
|
|
|
try
|
|
{
|
|
using var document = await JsonDocument.ParseAsync(stream, DocumentOptions, cancellationToken);
|
|
var root = document.RootElement;
|
|
|
|
// Validate spdxVersion
|
|
if (!root.TryGetProperty("spdxVersion", out var versionProp))
|
|
{
|
|
return SbomParseResult.Failure("Not a valid SPDX document: missing spdxVersion", SbomFormat.Spdx);
|
|
}
|
|
|
|
var specVersion = versionProp.GetString();
|
|
if (string.IsNullOrEmpty(specVersion) ||
|
|
!specVersion.StartsWith("SPDX-", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
return SbomParseResult.Failure("Not a valid SPDX document: invalid spdxVersion", SbomFormat.Spdx);
|
|
}
|
|
|
|
// Extract version number (e.g., "SPDX-2.3" -> "2.3")
|
|
specVersion = specVersion[5..];
|
|
|
|
// Extract document namespace (serves as serial number)
|
|
string? serialNumber = null;
|
|
if (root.TryGetProperty("documentNamespace", out var namespaceProp))
|
|
{
|
|
serialNumber = namespaceProp.GetString();
|
|
}
|
|
|
|
// Extract creation timestamp
|
|
DateTimeOffset? createdAt = null;
|
|
if (root.TryGetProperty("creationInfo", out var creationInfoProp) &&
|
|
creationInfoProp.TryGetProperty("created", out var createdProp))
|
|
{
|
|
if (TryParseTimestamp(createdProp.GetString(), out var parsed))
|
|
{
|
|
createdAt = parsed;
|
|
}
|
|
}
|
|
|
|
// Extract generator tool
|
|
string? generatorTool = null;
|
|
if (root.TryGetProperty("creationInfo", out var creationInfo) &&
|
|
creationInfo.TryGetProperty("creators", out var creatorsProp) &&
|
|
creatorsProp.ValueKind == JsonValueKind.Array)
|
|
{
|
|
var tools = new List<string>();
|
|
foreach (var creator in creatorsProp.EnumerateArray())
|
|
{
|
|
var creatorStr = creator.GetString();
|
|
if (creatorStr?.StartsWith("Tool:", StringComparison.OrdinalIgnoreCase) == true)
|
|
{
|
|
tools.Add(creatorStr[5..].Trim());
|
|
}
|
|
}
|
|
generatorTool = tools.Count > 0 ? string.Join(", ", tools) : null;
|
|
}
|
|
|
|
// Extract primary package (documentDescribes)
|
|
SbomSubject? primarySubject = null;
|
|
var describedIds = new HashSet<string>(StringComparer.Ordinal);
|
|
|
|
if (root.TryGetProperty("documentDescribes", out var describesProp) &&
|
|
describesProp.ValueKind == JsonValueKind.Array)
|
|
{
|
|
foreach (var id in describesProp.EnumerateArray())
|
|
{
|
|
var spdxId = id.GetString();
|
|
if (!string.IsNullOrEmpty(spdxId))
|
|
{
|
|
describedIds.Add(spdxId);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Extract all packages
|
|
var subjects = new List<SbomSubject>();
|
|
int totalComponentCount = 0;
|
|
|
|
if (root.TryGetProperty("packages", out var packagesProp) &&
|
|
packagesProp.ValueKind == JsonValueKind.Array)
|
|
{
|
|
foreach (var package in packagesProp.EnumerateArray())
|
|
{
|
|
totalComponentCount++;
|
|
var subject = ParsePackage(package);
|
|
if (subject is not null)
|
|
{
|
|
subjects.Add(subject);
|
|
|
|
// Check if this is the primary subject
|
|
if (subject.SpdxId is not null && describedIds.Contains(subject.SpdxId))
|
|
{
|
|
primarySubject ??= subject;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sort subjects for deterministic ordering
|
|
subjects = subjects
|
|
.OrderBy(s => s.Digest, StringComparer.Ordinal)
|
|
.ThenBy(s => s.Name ?? string.Empty, StringComparer.Ordinal)
|
|
.ToList();
|
|
|
|
return SbomParseResult.Success(
|
|
format: SbomFormat.Spdx,
|
|
subjects: subjects,
|
|
specVersion: specVersion,
|
|
serialNumber: serialNumber,
|
|
createdAt: createdAt,
|
|
generatorTool: generatorTool,
|
|
primarySubject: primarySubject,
|
|
totalComponentCount: totalComponentCount);
|
|
}
|
|
catch (JsonException ex)
|
|
{
|
|
return SbomParseResult.Failure($"JSON parsing error: {ex.Message}", SbomFormat.Spdx);
|
|
}
|
|
}
|
|
|
|
private static SbomSubject? ParsePackage(JsonElement package)
|
|
{
|
|
// Extract checksums
|
|
var hashes = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
|
|
if (package.TryGetProperty("checksums", out var checksumsProp) &&
|
|
checksumsProp.ValueKind == JsonValueKind.Array)
|
|
{
|
|
foreach (var checksum in checksumsProp.EnumerateArray())
|
|
{
|
|
if (checksum.TryGetProperty("algorithm", out var algProp) &&
|
|
checksum.TryGetProperty("checksumValue", out var valueProp))
|
|
{
|
|
var alg = algProp.GetString();
|
|
var value = valueProp.GetString();
|
|
if (!string.IsNullOrEmpty(alg) && !string.IsNullOrEmpty(value))
|
|
{
|
|
hashes[alg] = value;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Determine primary digest (prefer SHA256)
|
|
var digest = TrySelectSha256Digest(hashes);
|
|
|
|
// If no digest, this package can't be indexed by digest
|
|
if (string.IsNullOrEmpty(digest))
|
|
{
|
|
return null;
|
|
}
|
|
|
|
// Extract SPDXID
|
|
string? spdxId = null;
|
|
if (package.TryGetProperty("SPDXID", out var spdxIdProp))
|
|
{
|
|
spdxId = spdxIdProp.GetString();
|
|
}
|
|
|
|
// Extract other properties
|
|
string? name = null;
|
|
if (package.TryGetProperty("name", out var nameProp))
|
|
{
|
|
name = nameProp.GetString();
|
|
}
|
|
|
|
string? version = null;
|
|
if (package.TryGetProperty("versionInfo", out var versionProp))
|
|
{
|
|
version = versionProp.GetString();
|
|
}
|
|
|
|
// SPDX uses external refs for purl
|
|
string? purl = null;
|
|
if (package.TryGetProperty("externalRefs", out var refsProp) &&
|
|
refsProp.ValueKind == JsonValueKind.Array)
|
|
{
|
|
foreach (var extRef in refsProp.EnumerateArray())
|
|
{
|
|
if (extRef.TryGetProperty("referenceType", out var refTypeProp) &&
|
|
refTypeProp.GetString()?.Equals("purl", StringComparison.OrdinalIgnoreCase) == true &&
|
|
extRef.TryGetProperty("referenceLocator", out var locatorProp))
|
|
{
|
|
purl = locatorProp.GetString();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// SPDX doesn't have component type directly, check primaryPackagePurpose
|
|
string? type = null;
|
|
if (package.TryGetProperty("primaryPackagePurpose", out var purposeProp))
|
|
{
|
|
type = purposeProp.GetString();
|
|
}
|
|
|
|
return new SbomSubject
|
|
{
|
|
Digest = digest,
|
|
Name = name,
|
|
Version = version,
|
|
Purl = purl,
|
|
Type = type,
|
|
SpdxId = spdxId,
|
|
Hashes = hashes
|
|
};
|
|
}
|
|
|
|
private static string NormalizeDigest(string digest)
|
|
{
|
|
return ArtifactIndex.NormalizeDigest(digest);
|
|
}
|
|
|
|
private static bool TryParseTimestamp(string? value, out DateTimeOffset timestamp)
|
|
{
|
|
timestamp = default;
|
|
return !string.IsNullOrWhiteSpace(value) &&
|
|
DateTimeOffset.TryParse(value, CultureInfo.InvariantCulture, DateTimeStyles.RoundtripKind, out timestamp);
|
|
}
|
|
|
|
private static string? TrySelectSha256Digest(IReadOnlyDictionary<string, string> hashes)
|
|
{
|
|
foreach (var key in new[] { "SHA256", "SHA-256", "sha256" })
|
|
{
|
|
if (hashes.TryGetValue(key, out var sha256))
|
|
{
|
|
return NormalizeDigest("sha256:" + sha256);
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
}
|