// ============================================================================= // SpdxParser.cs // SPDX SBOM parser implementation // Part of Step 2: Evidence Collection (Task T5) // ============================================================================= using System.Globalization; using System.Text.Json; namespace StellaOps.AirGap.Importer.Reconciliation.Parsers; /// /// Parser for SPDX SBOM format (JSON). /// Supports SPDX 2.2 and 2.3 schemas. /// public sealed class SpdxParser : ISbomParser { private static readonly JsonDocumentOptions DocumentOptions = new() { AllowTrailingCommas = true, CommentHandling = JsonCommentHandling.Skip }; public SbomFormat DetectFormat(string filePath) { ArgumentException.ThrowIfNullOrWhiteSpace(filePath); // SPDX files typically end with .spdx.json if (filePath.EndsWith(".spdx.json", StringComparison.OrdinalIgnoreCase)) { return SbomFormat.Spdx; } // Try to detect from content if (File.Exists(filePath)) { try { using var stream = File.OpenRead(filePath); using var reader = new StreamReader(stream); var firstChars = new char[1024]; var read = reader.Read(firstChars, 0, firstChars.Length); var content = new string(firstChars, 0, read); if (content.Contains("\"spdxVersion\"", StringComparison.OrdinalIgnoreCase) || content.Contains("\"SPDXID\"", StringComparison.OrdinalIgnoreCase)) { return SbomFormat.Spdx; } } catch { // Ignore detection errors } } return SbomFormat.Unknown; } public async Task ParseAsync(string filePath, CancellationToken cancellationToken = default) { ArgumentException.ThrowIfNullOrWhiteSpace(filePath); if (!File.Exists(filePath)) { return SbomParseResult.Failure($"File not found: {filePath}", SbomFormat.Spdx); } try { await using var stream = File.OpenRead(filePath); return await ParseAsync(stream, SbomFormat.Spdx, cancellationToken); } catch (Exception ex) { return SbomParseResult.Failure($"Failed to parse SPDX file: {ex.Message}", SbomFormat.Spdx); } } public async Task ParseAsync(Stream stream, SbomFormat format, CancellationToken cancellationToken = default) { ArgumentNullException.ThrowIfNull(stream); try { using var document = await JsonDocument.ParseAsync(stream, DocumentOptions, cancellationToken); var root = document.RootElement; // Validate spdxVersion if (!root.TryGetProperty("spdxVersion", out var versionProp)) { return SbomParseResult.Failure("Not a valid SPDX document: missing spdxVersion", SbomFormat.Spdx); } var specVersion = versionProp.GetString(); if (string.IsNullOrEmpty(specVersion) || !specVersion.StartsWith("SPDX-", StringComparison.OrdinalIgnoreCase)) { return SbomParseResult.Failure("Not a valid SPDX document: invalid spdxVersion", SbomFormat.Spdx); } // Extract version number (e.g., "SPDX-2.3" -> "2.3") specVersion = specVersion[5..]; // Extract document namespace (serves as serial number) string? serialNumber = null; if (root.TryGetProperty("documentNamespace", out var namespaceProp)) { serialNumber = namespaceProp.GetString(); } // Extract creation timestamp DateTimeOffset? createdAt = null; if (root.TryGetProperty("creationInfo", out var creationInfoProp) && creationInfoProp.TryGetProperty("created", out var createdProp)) { if (TryParseTimestamp(createdProp.GetString(), out var parsed)) { createdAt = parsed; } } // Extract generator tool string? generatorTool = null; if (root.TryGetProperty("creationInfo", out var creationInfo) && creationInfo.TryGetProperty("creators", out var creatorsProp) && creatorsProp.ValueKind == JsonValueKind.Array) { var tools = new List(); foreach (var creator in creatorsProp.EnumerateArray()) { var creatorStr = creator.GetString(); if (creatorStr?.StartsWith("Tool:", StringComparison.OrdinalIgnoreCase) == true) { tools.Add(creatorStr[5..].Trim()); } } generatorTool = tools.Count > 0 ? string.Join(", ", tools) : null; } // Extract primary package (documentDescribes) SbomSubject? primarySubject = null; var describedIds = new HashSet(StringComparer.Ordinal); if (root.TryGetProperty("documentDescribes", out var describesProp) && describesProp.ValueKind == JsonValueKind.Array) { foreach (var id in describesProp.EnumerateArray()) { var spdxId = id.GetString(); if (!string.IsNullOrEmpty(spdxId)) { describedIds.Add(spdxId); } } } // Extract all packages var subjects = new List(); int totalComponentCount = 0; if (root.TryGetProperty("packages", out var packagesProp) && packagesProp.ValueKind == JsonValueKind.Array) { foreach (var package in packagesProp.EnumerateArray()) { totalComponentCount++; var subject = ParsePackage(package); if (subject is not null) { subjects.Add(subject); // Check if this is the primary subject if (subject.SpdxId is not null && describedIds.Contains(subject.SpdxId)) { primarySubject ??= subject; } } } } // Sort subjects for deterministic ordering subjects = subjects .OrderBy(s => s.Digest, StringComparer.Ordinal) .ThenBy(s => s.Name ?? string.Empty, StringComparer.Ordinal) .ToList(); return SbomParseResult.Success( format: SbomFormat.Spdx, subjects: subjects, specVersion: specVersion, serialNumber: serialNumber, createdAt: createdAt, generatorTool: generatorTool, primarySubject: primarySubject, totalComponentCount: totalComponentCount); } catch (JsonException ex) { return SbomParseResult.Failure($"JSON parsing error: {ex.Message}", SbomFormat.Spdx); } } private static SbomSubject? ParsePackage(JsonElement package) { // Extract checksums var hashes = new Dictionary(StringComparer.OrdinalIgnoreCase); if (package.TryGetProperty("checksums", out var checksumsProp) && checksumsProp.ValueKind == JsonValueKind.Array) { foreach (var checksum in checksumsProp.EnumerateArray()) { if (checksum.TryGetProperty("algorithm", out var algProp) && checksum.TryGetProperty("checksumValue", out var valueProp)) { var alg = algProp.GetString(); var value = valueProp.GetString(); if (!string.IsNullOrEmpty(alg) && !string.IsNullOrEmpty(value)) { hashes[alg] = value; } } } } // Determine primary digest (prefer SHA256) var digest = TrySelectSha256Digest(hashes); // If no digest, this package can't be indexed by digest if (string.IsNullOrEmpty(digest)) { return null; } // Extract SPDXID string? spdxId = null; if (package.TryGetProperty("SPDXID", out var spdxIdProp)) { spdxId = spdxIdProp.GetString(); } // Extract other properties string? name = null; if (package.TryGetProperty("name", out var nameProp)) { name = nameProp.GetString(); } string? version = null; if (package.TryGetProperty("versionInfo", out var versionProp)) { version = versionProp.GetString(); } // SPDX uses external refs for purl string? purl = null; if (package.TryGetProperty("externalRefs", out var refsProp) && refsProp.ValueKind == JsonValueKind.Array) { foreach (var extRef in refsProp.EnumerateArray()) { if (extRef.TryGetProperty("referenceType", out var refTypeProp) && refTypeProp.GetString()?.Equals("purl", StringComparison.OrdinalIgnoreCase) == true && extRef.TryGetProperty("referenceLocator", out var locatorProp)) { purl = locatorProp.GetString(); break; } } } // SPDX doesn't have component type directly, check primaryPackagePurpose string? type = null; if (package.TryGetProperty("primaryPackagePurpose", out var purposeProp)) { type = purposeProp.GetString(); } return new SbomSubject { Digest = digest, Name = name, Version = version, Purl = purl, Type = type, SpdxId = spdxId, Hashes = hashes }; } private static string NormalizeDigest(string digest) { return ArtifactIndex.NormalizeDigest(digest); } private static bool TryParseTimestamp(string? value, out DateTimeOffset timestamp) { timestamp = default; return !string.IsNullOrWhiteSpace(value) && DateTimeOffset.TryParse(value, CultureInfo.InvariantCulture, DateTimeStyles.RoundtripKind, out timestamp); } private static string? TrySelectSha256Digest(IReadOnlyDictionary hashes) { foreach (var key in new[] { "SHA256", "SHA-256", "sha256" }) { if (hashes.TryGetValue(key, out var sha256)) { return NormalizeDigest("sha256:" + sha256); } } return null; } }