// ----------------------------------------------------------------------------- // NpmPackageDownloader.cs // Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-004) // Description: Downloads npm packages from registry.npmjs.org for vulnerability // surface analysis. // ----------------------------------------------------------------------------- using System; using System.Diagnostics; using System.IO; using System.Net.Http; using System.Text.Json; using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using SharpCompress.Archives; using SharpCompress.Archives.Tar; using SharpCompress.Common; using SharpCompress.Readers; namespace StellaOps.Scanner.VulnSurfaces.Download; /// /// Downloads npm packages from registry.npmjs.org or custom registries. /// npm packages are distributed as .tgz (gzipped tarball) files. /// public sealed class NpmPackageDownloader : IPackageDownloader { private const string DefaultRegistryUrl = "https://registry.npmjs.org"; private readonly HttpClient _httpClient; private readonly ILogger _logger; private readonly NpmDownloaderOptions _options; public NpmPackageDownloader( HttpClient httpClient, ILogger logger, IOptions options) { _httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient)); _logger = logger ?? throw new ArgumentNullException(nameof(logger)); _options = options?.Value ?? new NpmDownloaderOptions(); } /// public string Ecosystem => "npm"; /// public async Task DownloadAsync( PackageDownloadRequest request, CancellationToken cancellationToken = default) { ArgumentNullException.ThrowIfNull(request); var sw = Stopwatch.StartNew(); try { // Normalize package name (npm uses lowercase, scoped packages have @scope/name) var packageName = request.PackageName; var safePackageName = GetSafeDirectoryName(packageName); var extractedDir = Path.Combine(request.OutputDirectory, $"{safePackageName}-{request.Version}"); var archivePath = Path.Combine(request.OutputDirectory, $"{safePackageName}-{request.Version}.tgz"); // Check cache first if (request.UseCache && Directory.Exists(extractedDir)) { sw.Stop(); _logger.LogDebug("Using cached npm package {Package} v{Version}", packageName, request.Version); return PackageDownloadResult.Ok(extractedDir, archivePath, sw.Elapsed, fromCache: true); } // Get package metadata to find tarball URL var registryUrl = request.RegistryUrl ?? _options.RegistryUrl ?? DefaultRegistryUrl; var tarballUrl = await GetTarballUrlAsync(registryUrl, packageName, request.Version, cancellationToken); if (tarballUrl is null) { sw.Stop(); var error = $"Version {request.Version} not found for package {packageName}"; _logger.LogWarning("npm package not found: {Error}", error); return PackageDownloadResult.Fail(error, sw.Elapsed); } _logger.LogDebug("Downloading npm package from {Url}", tarballUrl); // Download tarball Directory.CreateDirectory(request.OutputDirectory); using var response = await _httpClient.GetAsync(tarballUrl, cancellationToken); if (!response.IsSuccessStatusCode) { sw.Stop(); var error = $"Failed to download: HTTP {(int)response.StatusCode} {response.ReasonPhrase}"; _logger.LogWarning("npm download failed for {Package} v{Version}: {Error}", packageName, request.Version, error); return PackageDownloadResult.Fail(error, sw.Elapsed); } // Save archive await using (var fs = File.Create(archivePath)) { await response.Content.CopyToAsync(fs, cancellationToken); } // Extract .tgz (gzipped tarball) if (Directory.Exists(extractedDir)) { Directory.Delete(extractedDir, recursive: true); } Directory.CreateDirectory(extractedDir); ExtractTgz(archivePath, extractedDir); sw.Stop(); _logger.LogDebug("Downloaded and extracted npm {Package} v{Version} in {Duration}ms", packageName, request.Version, sw.ElapsedMilliseconds); return PackageDownloadResult.Ok(extractedDir, archivePath, sw.Elapsed); } catch (Exception ex) { sw.Stop(); _logger.LogWarning(ex, "Failed to download npm package {Package} v{Version}", request.PackageName, request.Version); return PackageDownloadResult.Fail(ex.Message, sw.Elapsed); } } /// /// Gets the tarball URL from the npm registry metadata. /// private async Task GetTarballUrlAsync( string registryUrl, string packageName, string version, CancellationToken cancellationToken) { // Encode scoped packages (@scope/name → @scope%2fname) var encodedName = Uri.EscapeDataString(packageName).Replace("%40", "@"); var metadataUrl = $"{registryUrl}/{encodedName}"; using var response = await _httpClient.GetAsync(metadataUrl, cancellationToken); if (!response.IsSuccessStatusCode) { _logger.LogDebug("Failed to fetch npm metadata for {Package}: HTTP {StatusCode}", packageName, (int)response.StatusCode); return null; } await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken); using var doc = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken); // Look for versions..dist.tarball if (doc.RootElement.TryGetProperty("versions", out var versions) && versions.TryGetProperty(version, out var versionObj) && versionObj.TryGetProperty("dist", out var dist) && dist.TryGetProperty("tarball", out var tarball)) { return tarball.GetString(); } return null; } /// /// Extracts a .tgz file (gzipped tarball) to the specified directory. /// private static void ExtractTgz(string tgzPath, string destinationDir) { using var archive = ArchiveFactory.Open(tgzPath); foreach (var entry in archive.Entries) { if (entry.IsDirectory) { continue; } // npm packages have a "package/" prefix in the tarball var entryPath = entry.Key ?? string.Empty; if (entryPath.StartsWith("package/", StringComparison.OrdinalIgnoreCase)) { entryPath = entryPath["package/".Length..]; } var destPath = Path.Combine(destinationDir, entryPath); var destDir = Path.GetDirectoryName(destPath); if (!string.IsNullOrEmpty(destDir)) { Directory.CreateDirectory(destDir); } entry.WriteToFile(destPath, new ExtractionOptions { ExtractFullPath = false, Overwrite = true }); } } /// /// Converts a package name to a safe directory name. /// Handles scoped packages like @scope/name → scope-name /// private static string GetSafeDirectoryName(string packageName) { return packageName .Replace("@", string.Empty) .Replace("/", "-") .Replace("\\", "-"); } } /// /// Options for npm package downloader. /// public sealed class NpmDownloaderOptions { /// /// Custom registry URL (null for registry.npmjs.org). /// public string? RegistryUrl { get; set; } /// /// Cache directory for downloaded packages. /// public string? CacheDirectory { get; set; } /// /// Maximum package size in bytes (0 for unlimited). /// public long MaxPackageSize { get; set; } }