- Implemented CanonJson class for deterministic JSON serialization and hashing. - Added unit tests for CanonJson functionality, covering various scenarios including key sorting, handling of nested objects, arrays, and special characters. - Created project files for the Canonical JSON library and its tests, including necessary package references. - Added README.md for library usage and API reference. - Introduced RabbitMqIntegrationFactAttribute for conditional RabbitMQ integration tests.
239 lines
8.4 KiB
C#
239 lines
8.4 KiB
C#
// -----------------------------------------------------------------------------
|
|
// NpmPackageDownloader.cs
|
|
// Sprint: SPRINT_3700_0002_0001_vuln_surfaces_core (SURF-004)
|
|
// Description: Downloads npm packages from registry.npmjs.org for vulnerability
|
|
// surface analysis.
|
|
// -----------------------------------------------------------------------------
|
|
|
|
using System;
|
|
using System.Diagnostics;
|
|
using System.IO;
|
|
using System.Net.Http;
|
|
using System.Text.Json;
|
|
using System.Threading;
|
|
using System.Threading.Tasks;
|
|
using Microsoft.Extensions.Logging;
|
|
using Microsoft.Extensions.Options;
|
|
using SharpCompress.Archives;
|
|
using SharpCompress.Archives.Tar;
|
|
using SharpCompress.Common;
|
|
using SharpCompress.Readers;
|
|
|
|
namespace StellaOps.Scanner.VulnSurfaces.Download;
|
|
|
|
/// <summary>
|
|
/// Downloads npm packages from registry.npmjs.org or custom registries.
|
|
/// npm packages are distributed as .tgz (gzipped tarball) files.
|
|
/// </summary>
|
|
public sealed class NpmPackageDownloader : IPackageDownloader
|
|
{
|
|
private const string DefaultRegistryUrl = "https://registry.npmjs.org";
|
|
|
|
private readonly HttpClient _httpClient;
|
|
private readonly ILogger<NpmPackageDownloader> _logger;
|
|
private readonly NpmDownloaderOptions _options;
|
|
|
|
public NpmPackageDownloader(
|
|
HttpClient httpClient,
|
|
ILogger<NpmPackageDownloader> logger,
|
|
IOptions<NpmDownloaderOptions> options)
|
|
{
|
|
_httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
|
|
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
|
_options = options?.Value ?? new NpmDownloaderOptions();
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public string Ecosystem => "npm";
|
|
|
|
/// <inheritdoc />
|
|
public async Task<PackageDownloadResult> DownloadAsync(
|
|
PackageDownloadRequest request,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(request);
|
|
|
|
var sw = Stopwatch.StartNew();
|
|
|
|
try
|
|
{
|
|
// Normalize package name (npm uses lowercase, scoped packages have @scope/name)
|
|
var packageName = request.PackageName;
|
|
var safePackageName = GetSafeDirectoryName(packageName);
|
|
var extractedDir = Path.Combine(request.OutputDirectory, $"{safePackageName}-{request.Version}");
|
|
var archivePath = Path.Combine(request.OutputDirectory, $"{safePackageName}-{request.Version}.tgz");
|
|
|
|
// Check cache first
|
|
if (request.UseCache && Directory.Exists(extractedDir))
|
|
{
|
|
sw.Stop();
|
|
_logger.LogDebug("Using cached npm package {Package} v{Version}", packageName, request.Version);
|
|
return PackageDownloadResult.Ok(extractedDir, archivePath, sw.Elapsed, fromCache: true);
|
|
}
|
|
|
|
// Get package metadata to find tarball URL
|
|
var registryUrl = request.RegistryUrl ?? _options.RegistryUrl ?? DefaultRegistryUrl;
|
|
var tarballUrl = await GetTarballUrlAsync(registryUrl, packageName, request.Version, cancellationToken);
|
|
|
|
if (tarballUrl is null)
|
|
{
|
|
sw.Stop();
|
|
var error = $"Version {request.Version} not found for package {packageName}";
|
|
_logger.LogWarning("npm package not found: {Error}", error);
|
|
return PackageDownloadResult.Fail(error, sw.Elapsed);
|
|
}
|
|
|
|
_logger.LogDebug("Downloading npm package from {Url}", tarballUrl);
|
|
|
|
// Download tarball
|
|
Directory.CreateDirectory(request.OutputDirectory);
|
|
|
|
using var response = await _httpClient.GetAsync(tarballUrl, cancellationToken);
|
|
|
|
if (!response.IsSuccessStatusCode)
|
|
{
|
|
sw.Stop();
|
|
var error = $"Failed to download: HTTP {(int)response.StatusCode} {response.ReasonPhrase}";
|
|
_logger.LogWarning("npm download failed for {Package} v{Version}: {Error}",
|
|
packageName, request.Version, error);
|
|
return PackageDownloadResult.Fail(error, sw.Elapsed);
|
|
}
|
|
|
|
// Save archive
|
|
await using (var fs = File.Create(archivePath))
|
|
{
|
|
await response.Content.CopyToAsync(fs, cancellationToken);
|
|
}
|
|
|
|
// Extract .tgz (gzipped tarball)
|
|
if (Directory.Exists(extractedDir))
|
|
{
|
|
Directory.Delete(extractedDir, recursive: true);
|
|
}
|
|
|
|
Directory.CreateDirectory(extractedDir);
|
|
ExtractTgz(archivePath, extractedDir);
|
|
|
|
sw.Stop();
|
|
_logger.LogDebug("Downloaded and extracted npm {Package} v{Version} in {Duration}ms",
|
|
packageName, request.Version, sw.ElapsedMilliseconds);
|
|
|
|
return PackageDownloadResult.Ok(extractedDir, archivePath, sw.Elapsed);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
sw.Stop();
|
|
_logger.LogWarning(ex, "Failed to download npm package {Package} v{Version}",
|
|
request.PackageName, request.Version);
|
|
return PackageDownloadResult.Fail(ex.Message, sw.Elapsed);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Gets the tarball URL from the npm registry metadata.
|
|
/// </summary>
|
|
private async Task<string?> GetTarballUrlAsync(
|
|
string registryUrl,
|
|
string packageName,
|
|
string version,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
// Encode scoped packages (@scope/name → @scope%2fname)
|
|
var encodedName = Uri.EscapeDataString(packageName).Replace("%40", "@");
|
|
var metadataUrl = $"{registryUrl}/{encodedName}";
|
|
|
|
using var response = await _httpClient.GetAsync(metadataUrl, cancellationToken);
|
|
|
|
if (!response.IsSuccessStatusCode)
|
|
{
|
|
_logger.LogDebug("Failed to fetch npm metadata for {Package}: HTTP {StatusCode}",
|
|
packageName, (int)response.StatusCode);
|
|
return null;
|
|
}
|
|
|
|
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken);
|
|
using var doc = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken);
|
|
|
|
// Look for versions.<version>.dist.tarball
|
|
if (doc.RootElement.TryGetProperty("versions", out var versions) &&
|
|
versions.TryGetProperty(version, out var versionObj) &&
|
|
versionObj.TryGetProperty("dist", out var dist) &&
|
|
dist.TryGetProperty("tarball", out var tarball))
|
|
{
|
|
return tarball.GetString();
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Extracts a .tgz file (gzipped tarball) to the specified directory.
|
|
/// </summary>
|
|
private static void ExtractTgz(string tgzPath, string destinationDir)
|
|
{
|
|
using var archive = ArchiveFactory.Open(tgzPath);
|
|
|
|
foreach (var entry in archive.Entries)
|
|
{
|
|
if (entry.IsDirectory)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
// npm packages have a "package/" prefix in the tarball
|
|
var entryPath = entry.Key ?? string.Empty;
|
|
if (entryPath.StartsWith("package/", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
entryPath = entryPath["package/".Length..];
|
|
}
|
|
|
|
var destPath = Path.Combine(destinationDir, entryPath);
|
|
var destDir = Path.GetDirectoryName(destPath);
|
|
|
|
if (!string.IsNullOrEmpty(destDir))
|
|
{
|
|
Directory.CreateDirectory(destDir);
|
|
}
|
|
|
|
entry.WriteToFile(destPath, new ExtractionOptions
|
|
{
|
|
ExtractFullPath = false,
|
|
Overwrite = true
|
|
});
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Converts a package name to a safe directory name.
|
|
/// Handles scoped packages like @scope/name → scope-name
|
|
/// </summary>
|
|
private static string GetSafeDirectoryName(string packageName)
|
|
{
|
|
return packageName
|
|
.Replace("@", string.Empty)
|
|
.Replace("/", "-")
|
|
.Replace("\\", "-");
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Options for npm package downloader.
|
|
/// </summary>
|
|
public sealed class NpmDownloaderOptions
|
|
{
|
|
/// <summary>
|
|
/// Custom registry URL (null for registry.npmjs.org).
|
|
/// </summary>
|
|
public string? RegistryUrl { get; set; }
|
|
|
|
/// <summary>
|
|
/// Cache directory for downloaded packages.
|
|
/// </summary>
|
|
public string? CacheDirectory { get; set; }
|
|
|
|
/// <summary>
|
|
/// Maximum package size in bytes (0 for unlimited).
|
|
/// </summary>
|
|
public long MaxPackageSize { get; set; }
|
|
}
|