// -----------------------------------------------------------------------------
// NpmPackageDownloader.cs
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-004)
// Description: Downloads npm packages from registry.npmjs.org for vulnerability
// surface analysis.
// -----------------------------------------------------------------------------
using System;
using System.Diagnostics;
using System.IO;
using System.Net.Http;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using SharpCompress.Archives;
using SharpCompress.Archives.Tar;
using SharpCompress.Common;
using SharpCompress.Readers;
namespace StellaOps.Scanner.VulnSurfaces.Download;
///
/// Downloads npm packages from registry.npmjs.org or custom registries.
/// npm packages are distributed as .tgz (gzipped tarball) files.
///
public sealed class NpmPackageDownloader : IPackageDownloader
{
private const string DefaultRegistryUrl = "https://registry.npmjs.org";
private readonly HttpClient _httpClient;
private readonly ILogger _logger;
private readonly NpmDownloaderOptions _options;
public NpmPackageDownloader(
HttpClient httpClient,
ILogger logger,
IOptions options)
{
_httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_options = options?.Value ?? new NpmDownloaderOptions();
}
///
public string Ecosystem => "npm";
///
public async Task DownloadAsync(
PackageDownloadRequest request,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(request);
var sw = Stopwatch.StartNew();
try
{
// Normalize package name (npm uses lowercase, scoped packages have @scope/name)
var packageName = request.PackageName;
var safePackageName = GetSafeDirectoryName(packageName);
var extractedDir = Path.Combine(request.OutputDirectory, $"{safePackageName}-{request.Version}");
var archivePath = Path.Combine(request.OutputDirectory, $"{safePackageName}-{request.Version}.tgz");
// Check cache first
if (request.UseCache && Directory.Exists(extractedDir))
{
sw.Stop();
_logger.LogDebug("Using cached npm package {Package} v{Version}", packageName, request.Version);
return PackageDownloadResult.Ok(extractedDir, archivePath, sw.Elapsed, fromCache: true);
}
// Get package metadata to find tarball URL
var registryUrl = request.RegistryUrl ?? _options.RegistryUrl ?? DefaultRegistryUrl;
var tarballUrl = await GetTarballUrlAsync(registryUrl, packageName, request.Version, cancellationToken);
if (tarballUrl is null)
{
sw.Stop();
var error = $"Version {request.Version} not found for package {packageName}";
_logger.LogWarning("npm package not found: {Error}", error);
return PackageDownloadResult.Fail(error, sw.Elapsed);
}
_logger.LogDebug("Downloading npm package from {Url}", tarballUrl);
// Download tarball
Directory.CreateDirectory(request.OutputDirectory);
using var response = await _httpClient.GetAsync(tarballUrl, cancellationToken);
if (!response.IsSuccessStatusCode)
{
sw.Stop();
var error = $"Failed to download: HTTP {(int)response.StatusCode} {response.ReasonPhrase}";
_logger.LogWarning("npm download failed for {Package} v{Version}: {Error}",
packageName, request.Version, error);
return PackageDownloadResult.Fail(error, sw.Elapsed);
}
// Save archive
await using (var fs = File.Create(archivePath))
{
await response.Content.CopyToAsync(fs, cancellationToken);
}
// Extract .tgz (gzipped tarball)
if (Directory.Exists(extractedDir))
{
Directory.Delete(extractedDir, recursive: true);
}
Directory.CreateDirectory(extractedDir);
ExtractTgz(archivePath, extractedDir);
sw.Stop();
_logger.LogDebug("Downloaded and extracted npm {Package} v{Version} in {Duration}ms",
packageName, request.Version, sw.ElapsedMilliseconds);
return PackageDownloadResult.Ok(extractedDir, archivePath, sw.Elapsed);
}
catch (Exception ex)
{
sw.Stop();
_logger.LogWarning(ex, "Failed to download npm package {Package} v{Version}",
request.PackageName, request.Version);
return PackageDownloadResult.Fail(ex.Message, sw.Elapsed);
}
}
///
/// Gets the tarball URL from the npm registry metadata.
///
private async Task GetTarballUrlAsync(
string registryUrl,
string packageName,
string version,
CancellationToken cancellationToken)
{
// Encode scoped packages (@scope/name → @scope%2fname)
var encodedName = Uri.EscapeDataString(packageName).Replace("%40", "@");
var metadataUrl = $"{registryUrl}/{encodedName}";
using var response = await _httpClient.GetAsync(metadataUrl, cancellationToken);
if (!response.IsSuccessStatusCode)
{
_logger.LogDebug("Failed to fetch npm metadata for {Package}: HTTP {StatusCode}",
packageName, (int)response.StatusCode);
return null;
}
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken);
using var doc = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken);
// Look for versions..dist.tarball
if (doc.RootElement.TryGetProperty("versions", out var versions) &&
versions.TryGetProperty(version, out var versionObj) &&
versionObj.TryGetProperty("dist", out var dist) &&
dist.TryGetProperty("tarball", out var tarball))
{
return tarball.GetString();
}
return null;
}
///
/// Extracts a .tgz file (gzipped tarball) to the specified directory.
///
private static void ExtractTgz(string tgzPath, string destinationDir)
{
using var archive = ArchiveFactory.Open(tgzPath);
foreach (var entry in archive.Entries)
{
if (entry.IsDirectory)
{
continue;
}
// npm packages have a "package/" prefix in the tarball
var entryPath = entry.Key ?? string.Empty;
if (entryPath.StartsWith("package/", StringComparison.OrdinalIgnoreCase))
{
entryPath = entryPath["package/".Length..];
}
var destPath = Path.Combine(destinationDir, entryPath);
var destDir = Path.GetDirectoryName(destPath);
if (!string.IsNullOrEmpty(destDir))
{
Directory.CreateDirectory(destDir);
}
entry.WriteToFile(destPath, new ExtractionOptions
{
ExtractFullPath = false,
Overwrite = true
});
}
}
///
/// Converts a package name to a safe directory name.
/// Handles scoped packages like @scope/name → scope-name
///
private static string GetSafeDirectoryName(string packageName)
{
return packageName
.Replace("@", string.Empty)
.Replace("/", "-")
.Replace("\\", "-");
}
}
///
/// Options for npm package downloader.
///
public sealed class NpmDownloaderOptions
{
///
/// Custom registry URL (null for registry.npmjs.org).
///
public string? RegistryUrl { get; set; }
///
/// Cache directory for downloaded packages.
///
public string? CacheDirectory { get; set; }
///
/// Maximum package size in bytes (0 for unlimited).
///
public long MaxPackageSize { get; set; }
}