Files
git.stella-ops.org/src/SbomService/StellaOps.SbomService/Services/RegistryDiscoveryService.cs

586 lines
21 KiB
C#

// Copyright (c) StellaOps. Licensed under BUSL-1.1.
// SPRINT_20251229_012 REG-SRC-005: Registry discovery service
using System.Net.Http.Headers;
using System.Text.Json;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Options;
using StellaOps.SbomService.Models;
using StellaOps.SbomService.Repositories;
namespace StellaOps.SbomService.Services;
/// <summary>
/// Service for discovering repositories and tags from container registries.
/// Supports OCI Distribution Spec compliant registries.
/// </summary>
public interface IRegistryDiscoveryService
{
/// <summary>
/// Discover repositories in a registry source.
/// </summary>
Task<DiscoveryResult> DiscoverRepositoriesAsync(
string sourceId,
CancellationToken cancellationToken = default);
/// <summary>
/// Discover tags for a specific repository.
/// </summary>
Task<TagDiscoveryResult> DiscoverTagsAsync(
string sourceId,
string repository,
CancellationToken cancellationToken = default);
/// <summary>
/// Discover all images (repositories + tags) matching the source's filters.
/// </summary>
Task<ImageDiscoveryResult> DiscoverImagesAsync(
string sourceId,
CancellationToken cancellationToken = default);
}
public class RegistryDiscoveryService : IRegistryDiscoveryService
{
private readonly IRegistrySourceRepository _sourceRepo;
private readonly IHttpClientFactory _httpClientFactory;
private readonly ILogger<RegistryDiscoveryService> _logger;
private readonly RegistryHttpOptions _httpOptions;
private readonly IReadOnlySet<string> _allowedSchemes;
private readonly IReadOnlyList<string> _allowedHosts;
private readonly bool _allowAllHosts;
public RegistryDiscoveryService(
IRegistrySourceRepository sourceRepo,
IHttpClientFactory httpClientFactory,
ILogger<RegistryDiscoveryService> logger,
IOptions<RegistryHttpOptions>? httpOptions = null)
{
_sourceRepo = sourceRepo;
_httpClientFactory = httpClientFactory;
_logger = logger;
_httpOptions = httpOptions?.Value ?? new RegistryHttpOptions();
_allowedSchemes = OutboundUrlPolicy.NormalizeSchemes(_httpOptions.AllowedSchemes);
_allowedHosts = OutboundUrlPolicy.NormalizeHosts(_httpOptions.AllowedHosts, out _allowAllHosts);
}
public async Task<DiscoveryResult> DiscoverRepositoriesAsync(
string sourceId,
CancellationToken cancellationToken = default)
{
if (!Guid.TryParse(sourceId, out var sourceGuid))
{
return new DiscoveryResult(false, "Invalid source ID format", []);
}
var source = await _sourceRepo.GetByIdAsync(sourceGuid, cancellationToken);
if (source is null)
{
return new DiscoveryResult(false, "Source not found", []);
}
try
{
if (!TryGetRegistryBaseUri(source.RegistryUrl, out var registryUri, out var urlError))
{
_logger.LogWarning("Registry URL rejected for source {SourceId}: {Error}", sourceId, urlError);
return new DiscoveryResult(false, urlError, []);
}
if (!TryCreateHttpClient(source, registryUri, out var client, out var credentialError))
{
_logger.LogWarning("Registry credentials rejected for source {SourceId}: {Error}", sourceId, credentialError);
return new DiscoveryResult(false, credentialError ?? "Invalid registry credentials", []);
}
var repositories = new List<string>();
Uri? nextLink = new Uri(registryUri, "/v2/_catalog");
// Paginate through repository list
while (nextLink is not null)
{
var response = await client.GetAsync(nextLink, cancellationToken);
if (!response.IsSuccessStatusCode)
{
var error = await response.Content.ReadAsStringAsync(cancellationToken);
_logger.LogWarning("Failed to list repositories for {SourceId}: {Status} - {Error}",
sourceId, response.StatusCode, error);
return new DiscoveryResult(false, $"Registry returned {response.StatusCode}", repositories);
}
var content = await response.Content.ReadAsStringAsync(cancellationToken);
var catalog = JsonDocument.Parse(content);
if (catalog.RootElement.TryGetProperty("repositories", out var repos))
{
foreach (var repo in repos.EnumerateArray())
{
var repoName = repo.GetString();
if (!string.IsNullOrEmpty(repoName) && MatchesRepositoryFilters(repoName, source))
{
repositories.Add(repoName);
}
}
}
// Check for pagination link
nextLink = ResolveNextLink(registryUri, ExtractNextLink(response.Headers), out var linkError);
if (linkError is not null)
{
_logger.LogWarning("Registry pagination link rejected for {SourceId}: {Error}", sourceId, linkError);
return new DiscoveryResult(false, linkError, repositories);
}
}
_logger.LogInformation("Discovered {Count} repositories for source {SourceId}",
repositories.Count, sourceId);
return new DiscoveryResult(true, null, repositories);
}
catch (HttpRequestException ex)
{
_logger.LogError(ex, "Network error discovering repositories for source {SourceId}", sourceId);
return new DiscoveryResult(false, $"Network error: {ex.Message}", []);
}
catch (Exception ex)
{
_logger.LogError(ex, "Unexpected error discovering repositories for source {SourceId}", sourceId);
return new DiscoveryResult(false, $"Unexpected error: {ex.Message}", []);
}
}
public async Task<TagDiscoveryResult> DiscoverTagsAsync(
string sourceId,
string repository,
CancellationToken cancellationToken = default)
{
if (!Guid.TryParse(sourceId, out var sourceGuid))
{
return new TagDiscoveryResult(false, "Invalid source ID format", repository, []);
}
var source = await _sourceRepo.GetByIdAsync(sourceGuid, cancellationToken);
if (source is null)
{
return new TagDiscoveryResult(false, "Source not found", repository, []);
}
try
{
if (!TryGetRegistryBaseUri(source.RegistryUrl, out var registryUri, out var urlError))
{
_logger.LogWarning("Registry URL rejected for source {SourceId}: {Error}", sourceId, urlError);
return new TagDiscoveryResult(false, urlError, repository, []);
}
if (!TryCreateHttpClient(source, registryUri, out var client, out var credentialError))
{
_logger.LogWarning("Registry credentials rejected for source {SourceId}: {Error}", sourceId, credentialError);
return new TagDiscoveryResult(false, credentialError ?? "Invalid registry credentials", repository, []);
}
var tags = new List<TagInfo>();
Uri? nextLink = new Uri(registryUri, $"/v2/{repository}/tags/list");
while (nextLink is not null)
{
var response = await client.GetAsync(nextLink, cancellationToken);
if (!response.IsSuccessStatusCode)
{
var error = await response.Content.ReadAsStringAsync(cancellationToken);
_logger.LogWarning("Failed to list tags for {Repository} in source {SourceId}: {Status} - {Error}",
repository, sourceId, response.StatusCode, error);
return new TagDiscoveryResult(false, $"Registry returned {response.StatusCode}", repository, tags);
}
var content = await response.Content.ReadAsStringAsync(cancellationToken);
var tagList = JsonDocument.Parse(content);
if (tagList.RootElement.TryGetProperty("tags", out var tagsElement) && tagsElement.ValueKind == JsonValueKind.Array)
{
foreach (var tag in tagsElement.EnumerateArray())
{
var tagName = tag.GetString();
if (!string.IsNullOrEmpty(tagName) && MatchesTagFilters(tagName, source))
{
// Get manifest digest for each tag
var digest = await GetManifestDigestAsync(client, registryUri, repository, tagName, cancellationToken);
tags.Add(new TagInfo(tagName, digest));
}
}
}
nextLink = ResolveNextLink(registryUri, ExtractNextLink(response.Headers), out var linkError);
if (linkError is not null)
{
_logger.LogWarning("Registry pagination link rejected for {SourceId}: {Error}", sourceId, linkError);
return new TagDiscoveryResult(false, linkError, repository, tags);
}
}
_logger.LogInformation("Discovered {Count} tags for {Repository} in source {SourceId}",
tags.Count, repository, sourceId);
return new TagDiscoveryResult(true, null, repository, tags);
}
catch (HttpRequestException ex)
{
_logger.LogError(ex, "Network error discovering tags for {Repository} in source {SourceId}", repository, sourceId);
return new TagDiscoveryResult(false, $"Network error: {ex.Message}", repository, []);
}
catch (Exception ex)
{
_logger.LogError(ex, "Unexpected error discovering tags for {Repository} in source {SourceId}", repository, sourceId);
return new TagDiscoveryResult(false, $"Unexpected error: {ex.Message}", repository, []);
}
}
public async Task<ImageDiscoveryResult> DiscoverImagesAsync(
string sourceId,
CancellationToken cancellationToken = default)
{
var repoResult = await DiscoverRepositoriesAsync(sourceId, cancellationToken);
if (!repoResult.Success)
{
return new ImageDiscoveryResult(false, repoResult.Error, []);
}
var images = new List<DiscoveredImage>();
var errors = new List<string>();
foreach (var repo in repoResult.Repositories)
{
var tagResult = await DiscoverTagsAsync(sourceId, repo, cancellationToken);
if (!tagResult.Success)
{
errors.Add($"{repo}: {tagResult.Error}");
continue;
}
foreach (var tag in tagResult.Tags)
{
images.Add(new DiscoveredImage(repo, tag.Name, tag.Digest));
}
}
var message = errors.Count > 0
? $"Completed with {errors.Count} errors: {string.Join("; ", errors.Take(3))}"
: null;
_logger.LogInformation("Discovered {Count} images across {RepoCount} repositories for source {SourceId}",
images.Count, repoResult.Repositories.Count, sourceId);
return new ImageDiscoveryResult(errors.Count == 0 || images.Count > 0, message, images);
}
private bool TryCreateHttpClient(
RegistrySource source,
Uri baseUri,
out HttpClient client,
out string? error)
{
client = _httpClientFactory.CreateClient("RegistryDiscovery");
client.BaseAddress = baseUri;
error = null;
if (_httpOptions.Timeout <= TimeSpan.Zero || _httpOptions.Timeout == Timeout.InfiniteTimeSpan)
{
error = "Registry timeout must be a positive, non-infinite duration.";
return false;
}
if (client.Timeout == Timeout.InfiniteTimeSpan || client.Timeout > _httpOptions.Timeout)
{
client.Timeout = _httpOptions.Timeout;
}
// Set default headers
client.DefaultRequestHeaders.Accept.Add(
new MediaTypeWithQualityHeaderValue("application/vnd.docker.distribution.manifest.v2+json"));
client.DefaultRequestHeaders.Accept.Add(
new MediaTypeWithQualityHeaderValue("application/vnd.oci.image.manifest.v1+json"));
// TODO: In production, resolve AuthRef to get actual credentials
// For now, handle basic auth if credential ref looks like "basic:user:pass"
if (!TryApplyCredentials(client, source, out error))
{
return false;
}
return true;
}
private async Task<string?> GetManifestDigestAsync(
HttpClient client,
Uri registryUri,
string repository,
string tag,
CancellationToken cancellationToken)
{
try
{
var url = new Uri(registryUri, $"/v2/{repository}/manifests/{tag}");
var request = new HttpRequestMessage(HttpMethod.Head, url);
request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("application/vnd.docker.distribution.manifest.v2+json"));
request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("application/vnd.oci.image.manifest.v1+json"));
var response = await client.SendAsync(request, cancellationToken);
if (response.IsSuccessStatusCode &&
response.Headers.TryGetValues("Docker-Content-Digest", out var digests))
{
return digests.FirstOrDefault();
}
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Failed to get manifest digest for {Repository}:{Tag}", repository, tag);
}
return null;
}
private bool TryGetRegistryBaseUri(string raw, out Uri registryUri, out string error)
{
return OutboundUrlPolicy.TryNormalizeUri(
raw,
_allowedSchemes,
_allowedHosts,
_allowAllHosts,
allowMissingScheme: true,
defaultScheme: "https",
out registryUri!,
out error);
}
private bool TryApplyCredentials(HttpClient client, RegistrySource source, out string? error)
{
error = null;
if (string.IsNullOrWhiteSpace(source.CredentialRef))
{
return true;
}
var credentialRef = source.CredentialRef.Trim();
if (credentialRef.StartsWith("authref://", StringComparison.OrdinalIgnoreCase) ||
credentialRef.StartsWith("secret://", StringComparison.OrdinalIgnoreCase) ||
credentialRef.StartsWith("vault://", StringComparison.OrdinalIgnoreCase))
{
return true;
}
if (!_httpOptions.AllowInlineCredentials)
{
error = "Inline credentials are disabled.";
return false;
}
if (credentialRef.StartsWith("basic:", StringComparison.OrdinalIgnoreCase))
{
var parts = credentialRef[6..].Split(':', 2);
if (parts.Length != 2 || string.IsNullOrWhiteSpace(parts[0]))
{
error = "Invalid basic credential format.";
return false;
}
var credentials = Convert.ToBase64String(
System.Text.Encoding.UTF8.GetBytes($"{parts[0]}:{parts[1]}"));
client.DefaultRequestHeaders.Authorization =
new AuthenticationHeaderValue("Basic", credentials);
return true;
}
if (credentialRef.StartsWith("bearer:", StringComparison.OrdinalIgnoreCase))
{
var token = credentialRef[7..];
if (string.IsNullOrWhiteSpace(token))
{
error = "Invalid bearer credential format.";
return false;
}
client.DefaultRequestHeaders.Authorization =
new AuthenticationHeaderValue("Bearer", token);
return true;
}
error = "Unsupported credential reference scheme.";
return false;
}
private static string? ExtractNextLink(HttpResponseHeaders headers)
{
if (!headers.TryGetValues("Link", out var linkHeaders))
{
return null;
}
foreach (var link in linkHeaders)
{
// Parse Link header format: </v2/_catalog?last=repo&n=100>; rel="next"
var match = Regex.Match(link, @"<([^>]+)>;\s*rel=""?next""?");
if (match.Success)
{
return match.Groups[1].Value;
}
}
return null;
}
private Uri? ResolveNextLink(Uri baseUri, string? linkValue, out string? error)
{
error = null;
if (string.IsNullOrWhiteSpace(linkValue))
{
return null;
}
if (Uri.TryCreate(linkValue, UriKind.Absolute, out var absolute))
{
if (!OutboundUrlPolicy.TryNormalizeUri(
absolute.ToString(),
_allowedSchemes,
_allowedHosts,
_allowAllHosts,
allowMissingScheme: false,
defaultScheme: "https",
out var normalized,
out var linkError))
{
error = linkError;
return null;
}
return normalized;
}
if (Uri.TryCreate(linkValue, UriKind.Relative, out var relative))
{
return new Uri(baseUri, relative);
}
error = "Invalid registry pagination link.";
return null;
}
private static bool MatchesRepositoryFilters(string repository, RegistrySource source)
{
// If no filters, match all
if (source.RepositoryAllowlist.Count == 0 && source.RepositoryDenylist.Count == 0)
{
return true;
}
// Check denylist first
if (source.RepositoryDenylist.Count > 0 && MatchesPatterns(repository, source.RepositoryDenylist))
{
return false;
}
// If allowlist exists, must match
if (source.RepositoryAllowlist.Count > 0 && !MatchesPatterns(repository, source.RepositoryAllowlist))
{
return false;
}
return true;
}
private static bool MatchesTagFilters(string tag, RegistrySource source)
{
// If no filters, match all
if (source.TagAllowlist.Count == 0 && source.TagDenylist.Count == 0)
{
return true;
}
// Check denylist first
if (source.TagDenylist.Count > 0 && MatchesPatterns(tag, source.TagDenylist))
{
return false;
}
// If allowlist exists, must match
if (source.TagAllowlist.Count > 0 && !MatchesPatterns(tag, source.TagAllowlist))
{
return false;
}
return true;
}
private static bool MatchesPatterns(string value, IReadOnlyList<string> patterns)
{
foreach (var pattern in patterns)
{
if (MatchesGlobPattern(value, pattern))
{
return true;
}
}
return false;
}
private static bool MatchesGlobPattern(string value, string pattern)
{
if (pattern == "*")
{
return true;
}
if (!pattern.Contains('*'))
{
return value.Equals(pattern, StringComparison.OrdinalIgnoreCase);
}
var regexPattern = "^" + Regex.Escape(pattern).Replace("\\*", ".*") + "$";
return Regex.IsMatch(value, regexPattern, RegexOptions.IgnoreCase);
}
}
/// <summary>
/// Result of repository discovery.
/// </summary>
public sealed record DiscoveryResult(
bool Success,
string? Error,
IReadOnlyList<string> Repositories);
/// <summary>
/// Result of tag discovery for a repository.
/// </summary>
public sealed record TagDiscoveryResult(
bool Success,
string? Error,
string Repository,
IReadOnlyList<TagInfo> Tags);
/// <summary>
/// Information about a discovered tag.
/// </summary>
public sealed record TagInfo(
string Name,
string? Digest);
/// <summary>
/// Result of full image discovery.
/// </summary>
public sealed record ImageDiscoveryResult(
bool Success,
string? Error,
IReadOnlyList<DiscoveredImage> Images);
/// <summary>
/// A discovered container image.
/// </summary>
public sealed record DiscoveredImage(
string Repository,
string Tag,
string? Digest)
{
public string FullReference => $"{Repository}:{Tag}";
public string? DigestReference => Digest is not null ? $"{Repository}@{Digest}" : null;
}