Rename Concelier Source modules to Connector
This commit is contained in:
31
src/StellaOps.Concelier.Connector.Common/AGENTS.md
Normal file
31
src/StellaOps.Concelier.Connector.Common/AGENTS.md
Normal file
@@ -0,0 +1,31 @@
|
||||
# AGENTS
|
||||
## Role
|
||||
Shared connector toolkit. Provides HTTP clients, retry/backoff, conditional GET (ETag/Last-Modified), schema validation, pagination helpers, clocks, and common DTO utilities for all connectors.
|
||||
## Scope
|
||||
- Typed HttpClient registrations with allowlisted hosts and timeouts.
|
||||
- Request pipeline: retries with jitter, backoff on 429/5xx, rate-limit tracking per source.
|
||||
- Conditional GET helpers (If-None-Match, If-Modified-Since), window cursors, and pagination iterators.
|
||||
- Validators: JSON Schema, XML Schema (for example XmlSchemaValidator), and sanitizers.
|
||||
- Content hashing and raw document capture helpers; metadata extraction (headers, status).
|
||||
- HTML sanitization, URL normalization, and PDF-to-text extraction utilities for feeds that require cleanup before validation.
|
||||
## Participants
|
||||
- Source.* connectors (NVD, Red Hat, JVN, PSIRTs, CERTs, ICS).
|
||||
- Storage.Mongo (document/dto repositories using shared shapes).
|
||||
- Core (jobs schedule/trigger for connectors).
|
||||
- QA (canned HTTP server harness, schema fixtures).
|
||||
## Interfaces & contracts
|
||||
- All network calls must pass through configured HttpClient with allowlist and sane timeouts; no direct new HttpClient().
|
||||
- Validators return detailed errors; invalid payloads quarantined and not mapped.
|
||||
- Cursor helpers implement sliding windows and ID-based pagination; rely on IClock/TimeProvider for determinism.
|
||||
- Strict provenance tags for extraction method: parser, oval, package.nevra, llm (gated).
|
||||
## In/Out of scope
|
||||
In: HTTP plumbing, validators, cursor/backoff utilities, hashing.
|
||||
Out: connector-specific schemas/mapping rules, merge precedence.
|
||||
## Observability & security expectations
|
||||
- Metrics: SourceDiagnostics publishes `concelier.source.http.*` counters/histograms tagged with `concelier.source=<connector>` plus retries/failures; connector dashboards slice on that tag instead of bespoke metric names.
|
||||
- Logs include uri, status, retries, etag; redact tokens and auth headers.
|
||||
- Distributed tracing hooks and per-connector counters should be wired centrally for consistent observability.
|
||||
## Tests
|
||||
- Author and review coverage in `../StellaOps.Concelier.Connector.Common.Tests`.
|
||||
- Shared fixtures (e.g., `MongoIntegrationFixture`, `ConnectorTestHarness`) live in `../StellaOps.Concelier.Testing`.
|
||||
- Keep fixtures deterministic; match new cases to real-world advisories or regression scenarios.
|
||||
@@ -0,0 +1,29 @@
|
||||
namespace StellaOps.Concelier.Connector.Common.Cursors;
|
||||
|
||||
/// <summary>
|
||||
/// Provides helpers for computing pagination start indices for sources that expose total result counts.
|
||||
/// </summary>
|
||||
public static class PaginationPlanner
|
||||
{
|
||||
/// <summary>
|
||||
/// Enumerates additional page start indices given the total result count returned by the source.
|
||||
/// The first page (at <paramref name="firstPageStartIndex"/>) is assumed to be already fetched.
|
||||
/// </summary>
|
||||
public static IEnumerable<int> EnumerateAdditionalPages(int totalResults, int resultsPerPage, int firstPageStartIndex = 0)
|
||||
{
|
||||
if (totalResults <= 0 || resultsPerPage <= 0)
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
if (firstPageStartIndex < 0)
|
||||
{
|
||||
firstPageStartIndex = 0;
|
||||
}
|
||||
|
||||
for (var start = firstPageStartIndex + resultsPerPage; start < totalResults; start += resultsPerPage)
|
||||
{
|
||||
yield return start;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
namespace StellaOps.Concelier.Connector.Common.Cursors;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration applied when advancing sliding time-window cursors.
|
||||
/// </summary>
|
||||
public sealed class TimeWindowCursorOptions
|
||||
{
|
||||
public TimeSpan WindowSize { get; init; } = TimeSpan.FromHours(4);
|
||||
|
||||
public TimeSpan Overlap { get; init; } = TimeSpan.FromMinutes(5);
|
||||
|
||||
public TimeSpan InitialBackfill { get; init; } = TimeSpan.FromDays(7);
|
||||
|
||||
public TimeSpan MinimumWindowSize { get; init; } = TimeSpan.FromMinutes(1);
|
||||
|
||||
public void EnsureValid()
|
||||
{
|
||||
if (WindowSize <= TimeSpan.Zero)
|
||||
{
|
||||
throw new InvalidOperationException("Window size must be positive.");
|
||||
}
|
||||
|
||||
if (Overlap < TimeSpan.Zero)
|
||||
{
|
||||
throw new InvalidOperationException("Window overlap cannot be negative.");
|
||||
}
|
||||
|
||||
if (Overlap >= WindowSize)
|
||||
{
|
||||
throw new InvalidOperationException("Window overlap must be less than the window size.");
|
||||
}
|
||||
|
||||
if (InitialBackfill <= TimeSpan.Zero)
|
||||
{
|
||||
throw new InvalidOperationException("Initial backfill must be positive.");
|
||||
}
|
||||
|
||||
if (MinimumWindowSize <= TimeSpan.Zero)
|
||||
{
|
||||
throw new InvalidOperationException("Minimum window size must be positive.");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
namespace StellaOps.Concelier.Connector.Common.Cursors;
|
||||
|
||||
/// <summary>
|
||||
/// Utility methods for computing sliding time-window ranges used by connectors.
|
||||
/// </summary>
|
||||
public static class TimeWindowCursorPlanner
|
||||
{
|
||||
public static TimeWindow GetNextWindow(DateTimeOffset now, TimeWindowCursorState? state, TimeWindowCursorOptions options)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
options.EnsureValid();
|
||||
|
||||
var effectiveState = state ?? TimeWindowCursorState.Empty;
|
||||
|
||||
var earliest = now - options.InitialBackfill;
|
||||
var anchorEnd = effectiveState.LastWindowEnd ?? earliest;
|
||||
if (anchorEnd < earliest)
|
||||
{
|
||||
anchorEnd = earliest;
|
||||
}
|
||||
|
||||
var start = anchorEnd - options.Overlap;
|
||||
if (start < earliest)
|
||||
{
|
||||
start = earliest;
|
||||
}
|
||||
|
||||
var end = start + options.WindowSize;
|
||||
if (end > now)
|
||||
{
|
||||
end = now;
|
||||
}
|
||||
|
||||
if (end <= start)
|
||||
{
|
||||
end = start + options.MinimumWindowSize;
|
||||
if (end > now)
|
||||
{
|
||||
end = now;
|
||||
}
|
||||
}
|
||||
|
||||
if (end <= start)
|
||||
{
|
||||
throw new InvalidOperationException("Unable to compute a non-empty time window with the provided options.");
|
||||
}
|
||||
|
||||
return new TimeWindow(start, end);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,84 @@
|
||||
using MongoDB.Bson;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Cursors;
|
||||
|
||||
/// <summary>
|
||||
/// Represents the persisted state of a sliding time-window cursor.
|
||||
/// </summary>
|
||||
public sealed record TimeWindowCursorState(DateTimeOffset? LastWindowStart, DateTimeOffset? LastWindowEnd)
|
||||
{
|
||||
public static TimeWindowCursorState Empty { get; } = new(null, null);
|
||||
|
||||
public TimeWindowCursorState WithWindow(TimeWindow window)
|
||||
{
|
||||
return new TimeWindowCursorState(window.Start, window.End);
|
||||
}
|
||||
|
||||
public BsonDocument ToBsonDocument(string startField = "windowStart", string endField = "windowEnd")
|
||||
{
|
||||
var document = new BsonDocument();
|
||||
WriteTo(document, startField, endField);
|
||||
return document;
|
||||
}
|
||||
|
||||
public void WriteTo(BsonDocument document, string startField = "windowStart", string endField = "windowEnd")
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(document);
|
||||
ArgumentException.ThrowIfNullOrEmpty(startField);
|
||||
ArgumentException.ThrowIfNullOrEmpty(endField);
|
||||
|
||||
document.Remove(startField);
|
||||
document.Remove(endField);
|
||||
|
||||
if (LastWindowStart.HasValue)
|
||||
{
|
||||
document[startField] = LastWindowStart.Value.UtcDateTime;
|
||||
}
|
||||
|
||||
if (LastWindowEnd.HasValue)
|
||||
{
|
||||
document[endField] = LastWindowEnd.Value.UtcDateTime;
|
||||
}
|
||||
}
|
||||
|
||||
public static TimeWindowCursorState FromBsonDocument(BsonDocument? document, string startField = "windowStart", string endField = "windowEnd")
|
||||
{
|
||||
if (document is null)
|
||||
{
|
||||
return Empty;
|
||||
}
|
||||
|
||||
DateTimeOffset? start = null;
|
||||
DateTimeOffset? end = null;
|
||||
|
||||
if (document.TryGetValue(startField, out var startValue))
|
||||
{
|
||||
start = ReadDateTimeOffset(startValue);
|
||||
}
|
||||
|
||||
if (document.TryGetValue(endField, out var endValue))
|
||||
{
|
||||
end = ReadDateTimeOffset(endValue);
|
||||
}
|
||||
|
||||
return new TimeWindowCursorState(start, end);
|
||||
}
|
||||
|
||||
private static DateTimeOffset? ReadDateTimeOffset(BsonValue value)
|
||||
{
|
||||
return value.BsonType switch
|
||||
{
|
||||
BsonType.DateTime => DateTime.SpecifyKind(value.ToUniversalTime(), DateTimeKind.Utc),
|
||||
BsonType.String when DateTimeOffset.TryParse(value.AsString, out var parsed) => parsed.ToUniversalTime(),
|
||||
_ => null,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Simple value object describing a time window.
|
||||
/// </summary>
|
||||
public readonly record struct TimeWindow(DateTimeOffset Start, DateTimeOffset End)
|
||||
{
|
||||
public TimeSpan Duration => End - Start;
|
||||
}
|
||||
27
src/StellaOps.Concelier.Connector.Common/DocumentStatuses.cs
Normal file
27
src/StellaOps.Concelier.Connector.Common/DocumentStatuses.cs
Normal file
@@ -0,0 +1,27 @@
|
||||
namespace StellaOps.Concelier.Connector.Common;
|
||||
|
||||
/// <summary>
|
||||
/// Well-known lifecycle statuses for raw source documents as they move through fetch/parse/map stages.
|
||||
/// </summary>
|
||||
public static class DocumentStatuses
|
||||
{
|
||||
/// <summary>
|
||||
/// Document captured from the upstream source and awaiting schema validation/parsing.
|
||||
/// </summary>
|
||||
public const string PendingParse = "pending-parse";
|
||||
|
||||
/// <summary>
|
||||
/// Document parsed and sanitized; awaiting canonical mapping.
|
||||
/// </summary>
|
||||
public const string PendingMap = "pending-map";
|
||||
|
||||
/// <summary>
|
||||
/// Document fully mapped to canonical advisories.
|
||||
/// </summary>
|
||||
public const string Mapped = "mapped";
|
||||
|
||||
/// <summary>
|
||||
/// Document failed processing; requires manual intervention before retry.
|
||||
/// </summary>
|
||||
public const string Failed = "failed";
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
using System.Security.Cryptography;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Fetch;
|
||||
|
||||
/// <summary>
|
||||
/// Jitter source backed by <see cref="RandomNumberGenerator"/> for thread-safe, high-entropy delays.
|
||||
/// </summary>
|
||||
public sealed class CryptoJitterSource : IJitterSource
|
||||
{
|
||||
public TimeSpan Next(TimeSpan minInclusive, TimeSpan maxInclusive)
|
||||
{
|
||||
if (maxInclusive < minInclusive)
|
||||
{
|
||||
throw new ArgumentException("Max jitter must be greater than or equal to min jitter.", nameof(maxInclusive));
|
||||
}
|
||||
|
||||
if (minInclusive < TimeSpan.Zero)
|
||||
{
|
||||
minInclusive = TimeSpan.Zero;
|
||||
}
|
||||
|
||||
if (maxInclusive == minInclusive)
|
||||
{
|
||||
return minInclusive;
|
||||
}
|
||||
|
||||
var minTicks = minInclusive.Ticks;
|
||||
var maxTicks = maxInclusive.Ticks;
|
||||
var range = maxTicks - minTicks;
|
||||
|
||||
Span<byte> buffer = stackalloc byte[8];
|
||||
RandomNumberGenerator.Fill(buffer);
|
||||
var sample = BitConverter.ToUInt64(buffer);
|
||||
var ratio = sample / (double)ulong.MaxValue;
|
||||
var jitterTicks = (long)Math.Round(range * ratio, MidpointRounding.AwayFromZero);
|
||||
if (jitterTicks > range)
|
||||
{
|
||||
jitterTicks = range;
|
||||
}
|
||||
|
||||
return TimeSpan.FromTicks(minTicks + jitterTicks);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
namespace StellaOps.Concelier.Connector.Common.Fetch;
|
||||
|
||||
/// <summary>
|
||||
/// Produces random jitter durations used to decorrelate retries.
|
||||
/// </summary>
|
||||
public interface IJitterSource
|
||||
{
|
||||
TimeSpan Next(TimeSpan minInclusive, TimeSpan maxInclusive);
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
using MongoDB.Bson;
|
||||
using MongoDB.Driver;
|
||||
using MongoDB.Driver.GridFS;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Fetch;
|
||||
|
||||
/// <summary>
|
||||
/// Handles persistence of raw upstream documents in GridFS buckets for later parsing.
|
||||
/// </summary>
|
||||
public sealed class RawDocumentStorage
|
||||
{
|
||||
private const string BucketName = "documents";
|
||||
|
||||
private readonly IMongoDatabase _database;
|
||||
|
||||
public RawDocumentStorage(IMongoDatabase database)
|
||||
{
|
||||
_database = database ?? throw new ArgumentNullException(nameof(database));
|
||||
}
|
||||
|
||||
private GridFSBucket CreateBucket() => new(_database, new GridFSBucketOptions
|
||||
{
|
||||
BucketName = BucketName,
|
||||
WriteConcern = _database.Settings.WriteConcern,
|
||||
ReadConcern = _database.Settings.ReadConcern,
|
||||
});
|
||||
|
||||
public Task<ObjectId> UploadAsync(
|
||||
string sourceName,
|
||||
string uri,
|
||||
byte[] content,
|
||||
string? contentType,
|
||||
CancellationToken cancellationToken)
|
||||
=> UploadAsync(sourceName, uri, content, contentType, expiresAt: null, cancellationToken);
|
||||
|
||||
public async Task<ObjectId> UploadAsync(
|
||||
string sourceName,
|
||||
string uri,
|
||||
byte[] content,
|
||||
string? contentType,
|
||||
DateTimeOffset? expiresAt,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrEmpty(sourceName);
|
||||
ArgumentException.ThrowIfNullOrEmpty(uri);
|
||||
ArgumentNullException.ThrowIfNull(content);
|
||||
|
||||
var bucket = CreateBucket();
|
||||
var filename = $"{sourceName}/{Guid.NewGuid():N}";
|
||||
var metadata = new BsonDocument
|
||||
{
|
||||
["sourceName"] = sourceName,
|
||||
["uri"] = uri,
|
||||
};
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(contentType))
|
||||
{
|
||||
metadata["contentType"] = contentType;
|
||||
}
|
||||
|
||||
if (expiresAt.HasValue)
|
||||
{
|
||||
metadata["expiresAt"] = expiresAt.Value.UtcDateTime;
|
||||
}
|
||||
|
||||
return await bucket.UploadFromBytesAsync(filename, content, new GridFSUploadOptions
|
||||
{
|
||||
Metadata = metadata,
|
||||
}, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public Task<byte[]> DownloadAsync(ObjectId id, CancellationToken cancellationToken)
|
||||
{
|
||||
var bucket = CreateBucket();
|
||||
return bucket.DownloadAsBytesAsync(id, cancellationToken: cancellationToken);
|
||||
}
|
||||
|
||||
public async Task DeleteAsync(ObjectId id, CancellationToken cancellationToken)
|
||||
{
|
||||
var bucket = CreateBucket();
|
||||
try
|
||||
{
|
||||
await bucket.DeleteAsync(id, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (GridFSFileNotFoundException)
|
||||
{
|
||||
// Already removed; ignore.
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
using System.Net;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Fetch;
|
||||
|
||||
/// <summary>
|
||||
/// Result of fetching raw response content without persisting a document.
|
||||
/// </summary>
|
||||
public sealed record SourceFetchContentResult
|
||||
{
|
||||
private SourceFetchContentResult(
|
||||
HttpStatusCode statusCode,
|
||||
byte[]? content,
|
||||
bool notModified,
|
||||
string? etag,
|
||||
DateTimeOffset? lastModified,
|
||||
string? contentType,
|
||||
int attempts,
|
||||
IReadOnlyDictionary<string, string>? headers)
|
||||
{
|
||||
StatusCode = statusCode;
|
||||
Content = content;
|
||||
IsNotModified = notModified;
|
||||
ETag = etag;
|
||||
LastModified = lastModified;
|
||||
ContentType = contentType;
|
||||
Attempts = attempts;
|
||||
Headers = headers;
|
||||
}
|
||||
|
||||
public HttpStatusCode StatusCode { get; }
|
||||
|
||||
public byte[]? Content { get; }
|
||||
|
||||
public bool IsSuccess => Content is not null;
|
||||
|
||||
public bool IsNotModified { get; }
|
||||
|
||||
public string? ETag { get; }
|
||||
|
||||
public DateTimeOffset? LastModified { get; }
|
||||
|
||||
public string? ContentType { get; }
|
||||
|
||||
public int Attempts { get; }
|
||||
|
||||
public IReadOnlyDictionary<string, string>? Headers { get; }
|
||||
|
||||
public static SourceFetchContentResult Success(
|
||||
HttpStatusCode statusCode,
|
||||
byte[] content,
|
||||
string? etag,
|
||||
DateTimeOffset? lastModified,
|
||||
string? contentType,
|
||||
int attempts,
|
||||
IReadOnlyDictionary<string, string>? headers)
|
||||
=> new(statusCode, content, notModified: false, etag, lastModified, contentType, attempts, headers);
|
||||
|
||||
public static SourceFetchContentResult NotModified(HttpStatusCode statusCode, int attempts)
|
||||
=> new(statusCode, null, notModified: true, etag: null, lastModified: null, contentType: null, attempts, headers: null);
|
||||
|
||||
public static SourceFetchContentResult Skipped(HttpStatusCode statusCode, int attempts)
|
||||
=> new(statusCode, null, notModified: false, etag: null, lastModified: null, contentType: null, attempts, headers: null);
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Net.Http;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Fetch;
|
||||
|
||||
/// <summary>
|
||||
/// Parameters describing a fetch operation for a source connector.
|
||||
/// </summary>
|
||||
public sealed record SourceFetchRequest(
|
||||
string ClientName,
|
||||
string SourceName,
|
||||
HttpMethod Method,
|
||||
Uri RequestUri,
|
||||
IReadOnlyDictionary<string, string>? Metadata = null,
|
||||
string? ETag = null,
|
||||
DateTimeOffset? LastModified = null,
|
||||
TimeSpan? TimeoutOverride = null,
|
||||
IReadOnlyList<string>? AcceptHeaders = null)
|
||||
{
|
||||
public SourceFetchRequest(string clientName, string sourceName, Uri requestUri)
|
||||
: this(clientName, sourceName, HttpMethod.Get, requestUri)
|
||||
{
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
using System.Net;
|
||||
using StellaOps.Concelier.Storage.Mongo.Documents;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Fetch;
|
||||
|
||||
/// <summary>
|
||||
/// Outcome of fetching a raw document from an upstream source.
|
||||
/// </summary>
|
||||
public sealed record SourceFetchResult
|
||||
{
|
||||
private SourceFetchResult(HttpStatusCode statusCode, DocumentRecord? document, bool notModified)
|
||||
{
|
||||
StatusCode = statusCode;
|
||||
Document = document;
|
||||
IsNotModified = notModified;
|
||||
}
|
||||
|
||||
public HttpStatusCode StatusCode { get; }
|
||||
|
||||
public DocumentRecord? Document { get; }
|
||||
|
||||
public bool IsSuccess => Document is not null;
|
||||
|
||||
public bool IsNotModified { get; }
|
||||
|
||||
public static SourceFetchResult Success(DocumentRecord document, HttpStatusCode statusCode)
|
||||
=> new(statusCode, document, notModified: false);
|
||||
|
||||
public static SourceFetchResult NotModified(HttpStatusCode statusCode)
|
||||
=> new(statusCode, null, notModified: true);
|
||||
|
||||
public static SourceFetchResult Skipped(HttpStatusCode statusCode)
|
||||
=> new(statusCode, null, notModified: false);
|
||||
}
|
||||
@@ -0,0 +1,338 @@
|
||||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
using System.Linq;
|
||||
using System.Net;
|
||||
using System.Net.Http;
|
||||
using System.Net.Http.Headers;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using MongoDB.Bson;
|
||||
using StellaOps.Concelier.Connector.Common.Http;
|
||||
using StellaOps.Concelier.Connector.Common.Telemetry;
|
||||
using StellaOps.Concelier.Storage.Mongo;
|
||||
using StellaOps.Concelier.Storage.Mongo.Documents;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Fetch;
|
||||
|
||||
/// <summary>
|
||||
/// Executes HTTP fetches for connectors, capturing raw responses with metadata for downstream stages.
|
||||
/// </summary>
|
||||
public sealed class SourceFetchService
|
||||
{
|
||||
private static readonly string[] DefaultAcceptHeaders = { "application/json" };
|
||||
|
||||
private readonly IHttpClientFactory _httpClientFactory;
|
||||
private readonly RawDocumentStorage _rawDocumentStorage;
|
||||
private readonly IDocumentStore _documentStore;
|
||||
private readonly ILogger<SourceFetchService> _logger;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly IOptionsMonitor<SourceHttpClientOptions> _httpClientOptions;
|
||||
private readonly IOptions<MongoStorageOptions> _storageOptions;
|
||||
private readonly IJitterSource _jitterSource;
|
||||
|
||||
public SourceFetchService(
|
||||
IHttpClientFactory httpClientFactory,
|
||||
RawDocumentStorage rawDocumentStorage,
|
||||
IDocumentStore documentStore,
|
||||
ILogger<SourceFetchService> logger,
|
||||
IJitterSource jitterSource,
|
||||
TimeProvider? timeProvider = null,
|
||||
IOptionsMonitor<SourceHttpClientOptions>? httpClientOptions = null,
|
||||
IOptions<MongoStorageOptions>? storageOptions = null)
|
||||
{
|
||||
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
|
||||
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
|
||||
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_jitterSource = jitterSource ?? throw new ArgumentNullException(nameof(jitterSource));
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
_httpClientOptions = httpClientOptions ?? throw new ArgumentNullException(nameof(httpClientOptions));
|
||||
_storageOptions = storageOptions ?? throw new ArgumentNullException(nameof(storageOptions));
|
||||
}
|
||||
|
||||
public async Task<SourceFetchResult> FetchAsync(SourceFetchRequest request, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
using var activity = SourceDiagnostics.StartFetch(request.SourceName, request.RequestUri, request.Method.Method, request.ClientName);
|
||||
var stopwatch = Stopwatch.StartNew();
|
||||
|
||||
try
|
||||
{
|
||||
var sendResult = await SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);
|
||||
var response = sendResult.Response;
|
||||
|
||||
using (response)
|
||||
{
|
||||
var duration = stopwatch.Elapsed;
|
||||
activity?.SetTag("http.status_code", (int)response.StatusCode);
|
||||
activity?.SetTag("http.retry.count", sendResult.Attempts - 1);
|
||||
|
||||
var rateLimitRemaining = TryGetHeaderValue(response.Headers, "x-ratelimit-remaining");
|
||||
|
||||
if (response.StatusCode == HttpStatusCode.NotModified)
|
||||
{
|
||||
_logger.LogDebug("Source {Source} returned 304 Not Modified for {Uri}", request.SourceName, request.RequestUri);
|
||||
SourceDiagnostics.RecordHttpRequest(request.SourceName, request.ClientName, response.StatusCode, sendResult.Attempts, duration, response.Content.Headers.ContentLength, rateLimitRemaining);
|
||||
activity?.SetStatus(ActivityStatusCode.Ok);
|
||||
return SourceFetchResult.NotModified(response.StatusCode);
|
||||
}
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
var body = await ReadResponsePreviewAsync(response, cancellationToken).ConfigureAwait(false);
|
||||
SourceDiagnostics.RecordHttpRequest(request.SourceName, request.ClientName, response.StatusCode, sendResult.Attempts, duration, response.Content.Headers.ContentLength, rateLimitRemaining);
|
||||
activity?.SetStatus(ActivityStatusCode.Error, body);
|
||||
throw new HttpRequestException($"Fetch failed with status {(int)response.StatusCode} {response.StatusCode} from {request.RequestUri}. Body preview: {body}");
|
||||
}
|
||||
|
||||
var contentBytes = await response.Content.ReadAsByteArrayAsync(cancellationToken).ConfigureAwait(false);
|
||||
var sha256 = Convert.ToHexString(SHA256.HashData(contentBytes)).ToLowerInvariant();
|
||||
var fetchedAt = _timeProvider.GetUtcNow();
|
||||
var contentType = response.Content.Headers.ContentType?.ToString();
|
||||
var storageOptions = _storageOptions.Value;
|
||||
var retention = storageOptions.RawDocumentRetention;
|
||||
DateTimeOffset? expiresAt = null;
|
||||
if (retention > TimeSpan.Zero)
|
||||
{
|
||||
var grace = storageOptions.RawDocumentRetentionTtlGrace >= TimeSpan.Zero
|
||||
? storageOptions.RawDocumentRetentionTtlGrace
|
||||
: TimeSpan.Zero;
|
||||
|
||||
try
|
||||
{
|
||||
expiresAt = fetchedAt.Add(retention).Add(grace);
|
||||
}
|
||||
catch (ArgumentOutOfRangeException)
|
||||
{
|
||||
expiresAt = DateTimeOffset.MaxValue;
|
||||
}
|
||||
}
|
||||
|
||||
var gridFsId = await _rawDocumentStorage.UploadAsync(
|
||||
request.SourceName,
|
||||
request.RequestUri.ToString(),
|
||||
contentBytes,
|
||||
contentType,
|
||||
expiresAt,
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var headers = CreateHeaderDictionary(response);
|
||||
|
||||
var metadata = request.Metadata is null
|
||||
? new Dictionary<string, string>(StringComparer.Ordinal)
|
||||
: new Dictionary<string, string>(request.Metadata, StringComparer.Ordinal);
|
||||
metadata["attempts"] = sendResult.Attempts.ToString(CultureInfo.InvariantCulture);
|
||||
metadata["fetchedAt"] = fetchedAt.ToString("O");
|
||||
|
||||
var existing = await _documentStore.FindBySourceAndUriAsync(request.SourceName, request.RequestUri.ToString(), cancellationToken).ConfigureAwait(false);
|
||||
var recordId = existing?.Id ?? Guid.NewGuid();
|
||||
|
||||
var record = new DocumentRecord(
|
||||
recordId,
|
||||
request.SourceName,
|
||||
request.RequestUri.ToString(),
|
||||
fetchedAt,
|
||||
sha256,
|
||||
DocumentStatuses.PendingParse,
|
||||
contentType,
|
||||
headers,
|
||||
metadata,
|
||||
response.Headers.ETag?.Tag,
|
||||
response.Content.Headers.LastModified,
|
||||
gridFsId,
|
||||
expiresAt);
|
||||
|
||||
var upserted = await _documentStore.UpsertAsync(record, cancellationToken).ConfigureAwait(false);
|
||||
SourceDiagnostics.RecordHttpRequest(request.SourceName, request.ClientName, response.StatusCode, sendResult.Attempts, duration, contentBytes.LongLength, rateLimitRemaining);
|
||||
activity?.SetStatus(ActivityStatusCode.Ok);
|
||||
_logger.LogInformation("Fetched {Source} document {Uri} (sha256={Sha})", request.SourceName, request.RequestUri, sha256);
|
||||
return SourceFetchResult.Success(upserted, response.StatusCode);
|
||||
}
|
||||
}
|
||||
catch (Exception ex) when (ex is HttpRequestException or TaskCanceledException)
|
||||
{
|
||||
activity?.SetStatus(ActivityStatusCode.Error, ex.Message);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<SourceFetchContentResult> FetchContentAsync(SourceFetchRequest request, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
using var activity = SourceDiagnostics.StartFetch(request.SourceName, request.RequestUri, request.Method.Method, request.ClientName);
|
||||
var stopwatch = Stopwatch.StartNew();
|
||||
|
||||
try
|
||||
{
|
||||
_ = _httpClientOptions.Get(request.ClientName);
|
||||
var sendResult = await SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);
|
||||
var response = sendResult.Response;
|
||||
|
||||
using (response)
|
||||
{
|
||||
var duration = stopwatch.Elapsed;
|
||||
activity?.SetTag("http.status_code", (int)response.StatusCode);
|
||||
activity?.SetTag("http.retry.count", sendResult.Attempts - 1);
|
||||
|
||||
var rateLimitRemaining = TryGetHeaderValue(response.Headers, "x-ratelimit-remaining");
|
||||
|
||||
if (response.StatusCode == HttpStatusCode.NotModified)
|
||||
{
|
||||
_logger.LogDebug("Source {Source} returned 304 Not Modified for {Uri}", request.SourceName, request.RequestUri);
|
||||
SourceDiagnostics.RecordHttpRequest(request.SourceName, request.ClientName, response.StatusCode, sendResult.Attempts, duration, response.Content.Headers.ContentLength, rateLimitRemaining);
|
||||
activity?.SetStatus(ActivityStatusCode.Ok);
|
||||
return SourceFetchContentResult.NotModified(response.StatusCode, sendResult.Attempts);
|
||||
}
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
var body = await ReadResponsePreviewAsync(response, cancellationToken).ConfigureAwait(false);
|
||||
SourceDiagnostics.RecordHttpRequest(request.SourceName, request.ClientName, response.StatusCode, sendResult.Attempts, duration, response.Content.Headers.ContentLength, rateLimitRemaining);
|
||||
activity?.SetStatus(ActivityStatusCode.Error, body);
|
||||
throw new HttpRequestException($"Fetch failed with status {(int)response.StatusCode} {response.StatusCode} from {request.RequestUri}. Body preview: {body}");
|
||||
}
|
||||
|
||||
var contentBytes = await response.Content.ReadAsByteArrayAsync(cancellationToken).ConfigureAwait(false);
|
||||
var headers = CreateHeaderDictionary(response);
|
||||
SourceDiagnostics.RecordHttpRequest(request.SourceName, request.ClientName, response.StatusCode, sendResult.Attempts, duration, response.Content.Headers.ContentLength ?? contentBytes.LongLength, rateLimitRemaining);
|
||||
activity?.SetStatus(ActivityStatusCode.Ok);
|
||||
return SourceFetchContentResult.Success(
|
||||
response.StatusCode,
|
||||
contentBytes,
|
||||
response.Headers.ETag?.Tag,
|
||||
response.Content.Headers.LastModified,
|
||||
response.Content.Headers.ContentType?.ToString(),
|
||||
sendResult.Attempts,
|
||||
headers);
|
||||
}
|
||||
}
|
||||
catch (Exception ex) when (ex is HttpRequestException or TaskCanceledException)
|
||||
{
|
||||
activity?.SetStatus(ActivityStatusCode.Error, ex.Message);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<SourceFetchSendResult> SendAsync(SourceFetchRequest request, HttpCompletionOption completionOption, CancellationToken cancellationToken)
|
||||
{
|
||||
var attemptCount = 0;
|
||||
var options = _httpClientOptions.Get(request.ClientName);
|
||||
|
||||
var response = await SourceRetryPolicy.SendWithRetryAsync(
|
||||
() => CreateHttpRequestMessage(request),
|
||||
async (httpRequest, ct) =>
|
||||
{
|
||||
attemptCount++;
|
||||
var client = _httpClientFactory.CreateClient(request.ClientName);
|
||||
if (request.TimeoutOverride.HasValue)
|
||||
{
|
||||
client.Timeout = request.TimeoutOverride.Value;
|
||||
}
|
||||
|
||||
return await client.SendAsync(httpRequest, completionOption, ct).ConfigureAwait(false);
|
||||
},
|
||||
maxAttempts: options.MaxAttempts,
|
||||
baseDelay: options.BaseDelay,
|
||||
_jitterSource,
|
||||
context => SourceDiagnostics.RecordRetry(
|
||||
request.SourceName,
|
||||
request.ClientName,
|
||||
context.Response?.StatusCode,
|
||||
context.Attempt,
|
||||
context.Delay),
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return new SourceFetchSendResult(response, attemptCount);
|
||||
}
|
||||
|
||||
internal static HttpRequestMessage CreateHttpRequestMessage(SourceFetchRequest request)
|
||||
{
|
||||
var httpRequest = new HttpRequestMessage(request.Method, request.RequestUri);
|
||||
var acceptValues = request.AcceptHeaders is { Count: > 0 } headers
|
||||
? headers
|
||||
: DefaultAcceptHeaders;
|
||||
|
||||
httpRequest.Headers.Accept.Clear();
|
||||
var added = false;
|
||||
foreach (var mediaType in acceptValues)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(mediaType))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (MediaTypeWithQualityHeaderValue.TryParse(mediaType, out var headerValue))
|
||||
{
|
||||
httpRequest.Headers.Accept.Add(headerValue);
|
||||
added = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!added)
|
||||
{
|
||||
httpRequest.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue(DefaultAcceptHeaders[0]));
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(request.ETag))
|
||||
{
|
||||
if (System.Net.Http.Headers.EntityTagHeaderValue.TryParse(request.ETag, out var etag))
|
||||
{
|
||||
httpRequest.Headers.IfNoneMatch.Add(etag);
|
||||
}
|
||||
}
|
||||
|
||||
if (request.LastModified.HasValue)
|
||||
{
|
||||
httpRequest.Headers.IfModifiedSince = request.LastModified.Value;
|
||||
}
|
||||
|
||||
return httpRequest;
|
||||
}
|
||||
|
||||
private static async Task<string> ReadResponsePreviewAsync(HttpResponseMessage response, CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
var buffer = await response.Content.ReadAsByteArrayAsync(cancellationToken).ConfigureAwait(false);
|
||||
var preview = Encoding.UTF8.GetString(buffer);
|
||||
return preview.Length > 256 ? preview[..256] : preview;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return "<unavailable>";
|
||||
}
|
||||
}
|
||||
|
||||
private static string? TryGetHeaderValue(HttpResponseHeaders headers, string name)
|
||||
{
|
||||
if (headers.TryGetValues(name, out var values))
|
||||
{
|
||||
return values.FirstOrDefault();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static Dictionary<string, string> CreateHeaderDictionary(HttpResponseMessage response)
|
||||
{
|
||||
var headers = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var header in response.Headers)
|
||||
{
|
||||
headers[header.Key] = string.Join(",", header.Value);
|
||||
}
|
||||
|
||||
foreach (var header in response.Content.Headers)
|
||||
{
|
||||
headers[header.Key] = string.Join(",", header.Value);
|
||||
}
|
||||
|
||||
return headers;
|
||||
}
|
||||
|
||||
private readonly record struct SourceFetchSendResult(HttpResponseMessage Response, int Attempts);
|
||||
}
|
||||
@@ -0,0 +1,184 @@
|
||||
using System.Globalization;
|
||||
using System.Net;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Fetch;
|
||||
|
||||
/// <summary>
|
||||
/// Provides retry/backoff behavior for source HTTP fetches.
|
||||
/// </summary>
|
||||
internal static class SourceRetryPolicy
|
||||
{
|
||||
private static readonly StringComparer HeaderComparer = StringComparer.OrdinalIgnoreCase;
|
||||
|
||||
public static async Task<HttpResponseMessage> SendWithRetryAsync(
|
||||
Func<HttpRequestMessage> requestFactory,
|
||||
Func<HttpRequestMessage, CancellationToken, Task<HttpResponseMessage>> sender,
|
||||
int maxAttempts,
|
||||
TimeSpan baseDelay,
|
||||
IJitterSource jitterSource,
|
||||
Action<SourceRetryAttemptContext>? onRetry,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(requestFactory);
|
||||
ArgumentNullException.ThrowIfNull(sender);
|
||||
ArgumentNullException.ThrowIfNull(jitterSource);
|
||||
|
||||
var attempt = 0;
|
||||
|
||||
while (true)
|
||||
{
|
||||
attempt++;
|
||||
using var request = requestFactory();
|
||||
HttpResponseMessage response;
|
||||
|
||||
try
|
||||
{
|
||||
response = await sender(request, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex) when (attempt < maxAttempts)
|
||||
{
|
||||
var delay = ComputeDelay(baseDelay, attempt, jitterSource: jitterSource);
|
||||
onRetry?.Invoke(new SourceRetryAttemptContext(attempt, null, ex, delay));
|
||||
await Task.Delay(delay, cancellationToken).ConfigureAwait(false);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (NeedsRetry(response) && attempt < maxAttempts)
|
||||
{
|
||||
var delay = ComputeDelay(
|
||||
baseDelay,
|
||||
attempt,
|
||||
GetRetryAfter(response),
|
||||
jitterSource);
|
||||
onRetry?.Invoke(new SourceRetryAttemptContext(attempt, response, null, delay));
|
||||
response.Dispose();
|
||||
await Task.Delay(delay, cancellationToken).ConfigureAwait(false);
|
||||
continue;
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
}
|
||||
|
||||
private static bool NeedsRetry(HttpResponseMessage response)
|
||||
{
|
||||
if (response.StatusCode == System.Net.HttpStatusCode.TooManyRequests)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (IsRateLimitResponse(response))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
var status = (int)response.StatusCode;
|
||||
return status >= 500 && status < 600;
|
||||
}
|
||||
|
||||
private static TimeSpan ComputeDelay(TimeSpan baseDelay, int attempt, TimeSpan? retryAfter = null, IJitterSource? jitterSource = null)
|
||||
{
|
||||
if (retryAfter.HasValue && retryAfter.Value > TimeSpan.Zero)
|
||||
{
|
||||
return retryAfter.Value;
|
||||
}
|
||||
|
||||
var exponential = TimeSpan.FromMilliseconds(baseDelay.TotalMilliseconds * Math.Pow(2, attempt - 1));
|
||||
var jitter = jitterSource?.Next(TimeSpan.FromMilliseconds(50), TimeSpan.FromMilliseconds(250))
|
||||
?? TimeSpan.FromMilliseconds(Random.Shared.Next(50, 250));
|
||||
return exponential + jitter;
|
||||
}
|
||||
|
||||
private static bool IsRateLimitResponse(HttpResponseMessage response)
|
||||
{
|
||||
if (response.Headers.RetryAfter is not null)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (response.StatusCode == System.Net.HttpStatusCode.Forbidden || response.StatusCode == System.Net.HttpStatusCode.TooManyRequests)
|
||||
{
|
||||
if (TryGetRateLimitRemaining(response, out var remaining) && remaining <= 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (response.Headers.TryGetValues("X-RateLimit-Reset", out var _))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static bool TryGetRateLimitRemaining(HttpResponseMessage response, out long remaining)
|
||||
{
|
||||
remaining = 0;
|
||||
if (response.Headers.TryGetValues("X-RateLimit-Remaining", out var values))
|
||||
{
|
||||
foreach (var value in values)
|
||||
{
|
||||
if (long.TryParse(value, NumberStyles.Integer, CultureInfo.InvariantCulture, out var parsed))
|
||||
{
|
||||
remaining = parsed;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static TimeSpan? GetRetryAfter(HttpResponseMessage response)
|
||||
{
|
||||
var retryAfter = response.Headers.RetryAfter;
|
||||
if (retryAfter is not null)
|
||||
{
|
||||
if (retryAfter.Delta.HasValue && retryAfter.Delta.Value > TimeSpan.Zero)
|
||||
{
|
||||
return retryAfter.Delta;
|
||||
}
|
||||
|
||||
if (retryAfter.Date.HasValue)
|
||||
{
|
||||
var delta = retryAfter.Date.Value - DateTimeOffset.UtcNow;
|
||||
if (delta > TimeSpan.Zero)
|
||||
{
|
||||
return delta;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (response.Headers.TryGetValues("Retry-After", out var retryAfterValues))
|
||||
{
|
||||
foreach (var value in retryAfterValues)
|
||||
{
|
||||
if (double.TryParse(value, NumberStyles.Float, CultureInfo.InvariantCulture, out var seconds) && seconds > 0)
|
||||
{
|
||||
return TimeSpan.FromSeconds(seconds);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (response.Headers.TryGetValues("X-RateLimit-Reset", out var resetValues))
|
||||
{
|
||||
foreach (var value in resetValues)
|
||||
{
|
||||
if (long.TryParse(value, NumberStyles.Integer, CultureInfo.InvariantCulture, out var epochSeconds))
|
||||
{
|
||||
var resetTime = DateTimeOffset.FromUnixTimeSeconds(epochSeconds);
|
||||
var delta = resetTime - DateTimeOffset.UtcNow;
|
||||
if (delta > TimeSpan.Zero)
|
||||
{
|
||||
return delta;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
internal readonly record struct SourceRetryAttemptContext(int Attempt, HttpResponseMessage? Response, Exception? Exception, TimeSpan Delay);
|
||||
@@ -0,0 +1,180 @@
|
||||
using System.Linq;
|
||||
using AngleSharp.Dom;
|
||||
using AngleSharp.Html.Parser;
|
||||
using StellaOps.Concelier.Connector.Common.Url;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Html;
|
||||
|
||||
/// <summary>
|
||||
/// Sanitizes untrusted HTML fragments produced by upstream advisories.
|
||||
/// Removes executable content, enforces an allowlist of elements, and normalizes anchor href values.
|
||||
/// </summary>
|
||||
public sealed class HtmlContentSanitizer
|
||||
{
|
||||
private static readonly HashSet<string> AllowedElements = new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
"a", "abbr", "article", "b", "body", "blockquote", "br", "code", "dd", "div", "dl", "dt",
|
||||
"em", "h1", "h2", "h3", "h4", "h5", "h6", "html", "i", "li", "ol", "p", "pre", "s",
|
||||
"section", "small", "span", "strong", "sub", "sup", "table", "tbody", "td", "th", "thead", "tr", "ul"
|
||||
};
|
||||
|
||||
private static readonly HashSet<string> UrlAttributes = new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
"href", "src",
|
||||
};
|
||||
|
||||
private readonly HtmlParser _parser;
|
||||
|
||||
public HtmlContentSanitizer()
|
||||
{
|
||||
_parser = new HtmlParser(new HtmlParserOptions
|
||||
{
|
||||
IsKeepingSourceReferences = false,
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sanitizes <paramref name="html"/> and returns a safe fragment suitable for rendering.
|
||||
/// </summary>
|
||||
public string Sanitize(string? html, Uri? baseUri = null)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(html))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var document = _parser.ParseDocument(html);
|
||||
if (document.Body is null)
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
foreach (var element in document.All.ToList())
|
||||
{
|
||||
if (IsDangerous(element))
|
||||
{
|
||||
element.Remove();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!AllowedElements.Contains(element.LocalName))
|
||||
{
|
||||
var owner = element.Owner;
|
||||
if (owner is null)
|
||||
{
|
||||
element.Remove();
|
||||
continue;
|
||||
}
|
||||
|
||||
var text = element.TextContent ?? string.Empty;
|
||||
element.Replace(owner.CreateTextNode(text));
|
||||
continue;
|
||||
}
|
||||
|
||||
CleanAttributes(element, baseUri);
|
||||
}
|
||||
|
||||
var body = document.Body ?? document.DocumentElement;
|
||||
if (body is null)
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var innerHtml = body.InnerHtml;
|
||||
return string.IsNullOrWhiteSpace(innerHtml) ? string.Empty : innerHtml.Trim();
|
||||
}
|
||||
|
||||
private static bool IsDangerous(IElement element)
|
||||
{
|
||||
if (string.Equals(element.LocalName, "script", StringComparison.OrdinalIgnoreCase)
|
||||
|| string.Equals(element.LocalName, "style", StringComparison.OrdinalIgnoreCase)
|
||||
|| string.Equals(element.LocalName, "iframe", StringComparison.OrdinalIgnoreCase)
|
||||
|| string.Equals(element.LocalName, "object", StringComparison.OrdinalIgnoreCase)
|
||||
|| string.Equals(element.LocalName, "embed", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static void CleanAttributes(IElement element, Uri? baseUri)
|
||||
{
|
||||
if (element.Attributes is null || element.Attributes.Length == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var attribute in element.Attributes.ToList())
|
||||
{
|
||||
if (attribute.Name.StartsWith("on", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
element.RemoveAttribute(attribute.Name);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (UrlAttributes.Contains(attribute.Name))
|
||||
{
|
||||
NormalizeUrlAttribute(element, attribute, baseUri);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!IsAttributeAllowed(element.LocalName, attribute.Name))
|
||||
{
|
||||
element.RemoveAttribute(attribute.Name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static bool IsAttributeAllowed(string elementName, string attributeName)
|
||||
{
|
||||
if (string.Equals(attributeName, "title", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (string.Equals(elementName, "a", StringComparison.OrdinalIgnoreCase)
|
||||
&& string.Equals(attributeName, "rel", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (string.Equals(elementName, "table", StringComparison.OrdinalIgnoreCase)
|
||||
&& (string.Equals(attributeName, "border", StringComparison.OrdinalIgnoreCase)
|
||||
|| string.Equals(attributeName, "cellpadding", StringComparison.OrdinalIgnoreCase)
|
||||
|| string.Equals(attributeName, "cellspacing", StringComparison.OrdinalIgnoreCase)))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static void NormalizeUrlAttribute(IElement element, IAttr attribute, Uri? baseUri)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(attribute.Value))
|
||||
{
|
||||
element.RemoveAttribute(attribute.Name);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!UrlNormalizer.TryNormalize(attribute.Value, baseUri, out var normalized))
|
||||
{
|
||||
element.RemoveAttribute(attribute.Name);
|
||||
return;
|
||||
}
|
||||
|
||||
if (string.Equals(element.LocalName, "a", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
element.SetAttribute("rel", "noopener nofollow noreferrer");
|
||||
}
|
||||
|
||||
if (normalized is null)
|
||||
{
|
||||
element.RemoveAttribute(attribute.Name);
|
||||
return;
|
||||
}
|
||||
|
||||
element.SetAttribute(attribute.Name, normalized.ToString());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
using System.Net.Http.Headers;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Http;
|
||||
|
||||
/// <summary>
|
||||
/// Delegating handler that enforces an allowlist of destination hosts for outbound requests.
|
||||
/// </summary>
|
||||
internal sealed class AllowlistedHttpMessageHandler : DelegatingHandler
|
||||
{
|
||||
private readonly IReadOnlyCollection<string> _allowedHosts;
|
||||
|
||||
public AllowlistedHttpMessageHandler(SourceHttpClientOptions options)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
var snapshot = options.GetAllowedHostsSnapshot();
|
||||
if (snapshot.Count == 0)
|
||||
{
|
||||
throw new InvalidOperationException("Source HTTP client must configure at least one allowed host.");
|
||||
}
|
||||
|
||||
_allowedHosts = snapshot;
|
||||
}
|
||||
|
||||
protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
var host = request.RequestUri?.Host;
|
||||
if (string.IsNullOrWhiteSpace(host) || !_allowedHosts.Contains(host))
|
||||
{
|
||||
throw new InvalidOperationException($"Request host '{host ?? "<null>"}' is not allowlisted for this source.");
|
||||
}
|
||||
|
||||
return base.SendAsync(request, cancellationToken);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,197 @@
|
||||
using System.Net;
|
||||
using System.Net.Http;
|
||||
using System.Net.Security;
|
||||
using System.Security.Cryptography.X509Certificates;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Options;
|
||||
using StellaOps.Concelier.Connector.Common.Xml;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Http;
|
||||
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Registers a named HTTP client configured for a source connector with allowlisted hosts and sensible defaults.
|
||||
/// </summary>
|
||||
public static IHttpClientBuilder AddSourceHttpClient(this IServiceCollection services, string name, Action<SourceHttpClientOptions> configure)
|
||||
=> services.AddSourceHttpClient(name, (_, options) => configure(options));
|
||||
|
||||
public static IHttpClientBuilder AddSourceHttpClient(this IServiceCollection services, string name, Action<IServiceProvider, SourceHttpClientOptions> configure)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
ArgumentException.ThrowIfNullOrEmpty(name);
|
||||
ArgumentNullException.ThrowIfNull(configure);
|
||||
|
||||
services.AddOptions<SourceHttpClientOptions>(name).Configure<IServiceProvider>((options, sp) =>
|
||||
{
|
||||
configure(sp, options);
|
||||
SourceHttpClientConfigurationBinder.Apply(sp, name, options);
|
||||
});
|
||||
|
||||
return services
|
||||
.AddHttpClient(name)
|
||||
.ConfigureHttpClient((sp, client) =>
|
||||
{
|
||||
var options = sp.GetRequiredService<IOptionsMonitor<SourceHttpClientOptions>>().Get(name);
|
||||
|
||||
if (options.BaseAddress is not null)
|
||||
{
|
||||
client.BaseAddress = options.BaseAddress;
|
||||
}
|
||||
|
||||
client.Timeout = options.Timeout;
|
||||
client.DefaultRequestHeaders.UserAgent.Clear();
|
||||
client.DefaultRequestHeaders.UserAgent.ParseAdd(options.UserAgent);
|
||||
client.DefaultRequestVersion = options.RequestVersion;
|
||||
client.DefaultVersionPolicy = options.VersionPolicy;
|
||||
|
||||
foreach (var header in options.DefaultRequestHeaders)
|
||||
{
|
||||
client.DefaultRequestHeaders.TryAddWithoutValidation(header.Key, header.Value);
|
||||
}
|
||||
})
|
||||
.ConfigurePrimaryHttpMessageHandler((sp) =>
|
||||
{
|
||||
var options = sp.GetRequiredService<IOptionsMonitor<SourceHttpClientOptions>>().Get(name).Clone();
|
||||
var handler = new SocketsHttpHandler
|
||||
{
|
||||
AllowAutoRedirect = options.AllowAutoRedirect,
|
||||
AutomaticDecompression = DecompressionMethods.All,
|
||||
EnableMultipleHttp2Connections = options.EnableMultipleHttp2Connections,
|
||||
};
|
||||
options.ConfigureHandler?.Invoke(handler);
|
||||
ApplyProxySettings(handler, options);
|
||||
|
||||
if (options.ServerCertificateCustomValidation is not null)
|
||||
{
|
||||
handler.SslOptions.RemoteCertificateValidationCallback = (_, certificate, chain, sslPolicyErrors) =>
|
||||
{
|
||||
X509Certificate2? certToValidate = certificate as X509Certificate2;
|
||||
X509Certificate2? disposable = null;
|
||||
if (certToValidate is null && certificate is not null)
|
||||
{
|
||||
disposable = X509CertificateLoader.LoadCertificate(certificate.Export(X509ContentType.Cert));
|
||||
certToValidate = disposable;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
return options.ServerCertificateCustomValidation(certToValidate, chain, sslPolicyErrors);
|
||||
}
|
||||
finally
|
||||
{
|
||||
disposable?.Dispose();
|
||||
}
|
||||
};
|
||||
}
|
||||
else if (options.TrustedRootCertificates.Count > 0 && handler.SslOptions.RemoteCertificateValidationCallback is null)
|
||||
{
|
||||
var trustedRoots = new X509Certificate2Collection();
|
||||
foreach (var certificate in options.TrustedRootCertificates)
|
||||
{
|
||||
trustedRoots.Add(certificate);
|
||||
}
|
||||
|
||||
handler.SslOptions.RemoteCertificateValidationCallback = (_, certificate, chain, errors) =>
|
||||
{
|
||||
if (errors == SslPolicyErrors.None)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (certificate is null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
X509Certificate2? certToValidate = certificate as X509Certificate2;
|
||||
X509Certificate2? disposable = null;
|
||||
try
|
||||
{
|
||||
if (certToValidate is null)
|
||||
{
|
||||
disposable = X509CertificateLoader.LoadCertificate(certificate.Export(X509ContentType.Cert));
|
||||
certToValidate = disposable;
|
||||
}
|
||||
|
||||
using var customChain = new X509Chain();
|
||||
customChain.ChainPolicy.TrustMode = X509ChainTrustMode.CustomRootTrust;
|
||||
customChain.ChainPolicy.CustomTrustStore.Clear();
|
||||
customChain.ChainPolicy.CustomTrustStore.AddRange(trustedRoots);
|
||||
customChain.ChainPolicy.RevocationMode = X509RevocationMode.NoCheck;
|
||||
customChain.ChainPolicy.VerificationFlags = X509VerificationFlags.NoFlag;
|
||||
|
||||
if (chain is not null)
|
||||
{
|
||||
foreach (var element in chain.ChainElements)
|
||||
{
|
||||
customChain.ChainPolicy.ExtraStore.Add(element.Certificate);
|
||||
}
|
||||
}
|
||||
|
||||
return certToValidate is not null && customChain.Build(certToValidate);
|
||||
}
|
||||
finally
|
||||
{
|
||||
disposable?.Dispose();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
return handler;
|
||||
})
|
||||
.AddHttpMessageHandler(sp =>
|
||||
{
|
||||
var options = sp.GetRequiredService<IOptionsMonitor<SourceHttpClientOptions>>().Get(name).Clone();
|
||||
return new AllowlistedHttpMessageHandler(options);
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Registers shared helpers used by source connectors.
|
||||
/// </summary>
|
||||
public static IServiceCollection AddSourceCommon(this IServiceCollection services)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
|
||||
services.AddSingleton<Json.JsonSchemaValidator>();
|
||||
services.AddSingleton<Json.IJsonSchemaValidator>(sp => sp.GetRequiredService<Json.JsonSchemaValidator>());
|
||||
services.AddSingleton<XmlSchemaValidator>();
|
||||
services.AddSingleton<IXmlSchemaValidator>(sp => sp.GetRequiredService<XmlSchemaValidator>());
|
||||
services.AddSingleton<Fetch.IJitterSource, Fetch.CryptoJitterSource>();
|
||||
services.AddSingleton<Fetch.RawDocumentStorage>();
|
||||
services.AddSingleton<Fetch.SourceFetchService>();
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
private static void ApplyProxySettings(SocketsHttpHandler handler, SourceHttpClientOptions options)
|
||||
{
|
||||
if (options.ProxyAddress is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var proxy = new WebProxy(options.ProxyAddress)
|
||||
{
|
||||
BypassProxyOnLocal = options.ProxyBypassOnLocal,
|
||||
UseDefaultCredentials = options.ProxyUseDefaultCredentials,
|
||||
};
|
||||
|
||||
if (options.ProxyBypassList.Count > 0)
|
||||
{
|
||||
proxy.BypassList = options.ProxyBypassList.ToArray();
|
||||
}
|
||||
|
||||
if (!options.ProxyUseDefaultCredentials
|
||||
&& !string.IsNullOrWhiteSpace(options.ProxyUsername))
|
||||
{
|
||||
proxy.Credentials = new NetworkCredential(
|
||||
options.ProxyUsername,
|
||||
options.ProxyPassword ?? string.Empty);
|
||||
}
|
||||
|
||||
handler.Proxy = proxy;
|
||||
handler.UseProxy = true;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,360 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Net.Security;
|
||||
using System.Security.Cryptography;
|
||||
using System.Security.Cryptography.X509Certificates;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Http;
|
||||
|
||||
internal static class SourceHttpClientConfigurationBinder
|
||||
{
|
||||
private const string ConcelierSection = "concelier";
|
||||
private const string HttpClientsSection = "httpClients";
|
||||
private const string SourcesSection = "sources";
|
||||
private const string HttpSection = "http";
|
||||
private const string AllowInvalidKey = "allowInvalidCertificates";
|
||||
private const string TrustedRootPathsKey = "trustedRootPaths";
|
||||
private const string ProxySection = "proxy";
|
||||
private const string ProxyAddressKey = "address";
|
||||
private const string ProxyBypassOnLocalKey = "bypassOnLocal";
|
||||
private const string ProxyBypassListKey = "bypassList";
|
||||
private const string ProxyUseDefaultCredentialsKey = "useDefaultCredentials";
|
||||
private const string ProxyUsernameKey = "username";
|
||||
private const string ProxyPasswordKey = "password";
|
||||
private const string OfflineRootKey = "offlineRoot";
|
||||
private const string OfflineRootEnvironmentVariable = "CONCELIER_OFFLINE_ROOT";
|
||||
|
||||
public static void Apply(IServiceProvider services, string clientName, SourceHttpClientOptions options)
|
||||
{
|
||||
var configuration = services.GetService(typeof(IConfiguration)) as IConfiguration;
|
||||
if (configuration is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var loggerFactory = services.GetService(typeof(ILoggerFactory)) as ILoggerFactory;
|
||||
var logger = loggerFactory?.CreateLogger("SourceHttpClientConfiguration");
|
||||
|
||||
var hostEnvironment = services.GetService(typeof(IHostEnvironment)) as IHostEnvironment;
|
||||
|
||||
var processed = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var section in EnumerateCandidateSections(configuration, clientName))
|
||||
{
|
||||
if (section is null || !section.Exists() || !processed.Add(section.Path))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
ApplySection(section, configuration, hostEnvironment, clientName, options, logger);
|
||||
}
|
||||
}
|
||||
|
||||
private static IEnumerable<IConfigurationSection> EnumerateCandidateSections(IConfiguration configuration, string clientName)
|
||||
{
|
||||
var names = BuildCandidateNames(clientName);
|
||||
foreach (var name in names)
|
||||
{
|
||||
var httpClientSection = GetSection(configuration, ConcelierSection, HttpClientsSection, name);
|
||||
if (httpClientSection is not null && httpClientSection.Exists())
|
||||
{
|
||||
yield return httpClientSection;
|
||||
}
|
||||
|
||||
var sourceHttpSection = GetSection(configuration, ConcelierSection, SourcesSection, name, HttpSection);
|
||||
if (sourceHttpSection is not null && sourceHttpSection.Exists())
|
||||
{
|
||||
yield return sourceHttpSection;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static IEnumerable<string> BuildCandidateNames(string clientName)
|
||||
{
|
||||
yield return clientName;
|
||||
|
||||
if (clientName.StartsWith("source.", StringComparison.OrdinalIgnoreCase) && clientName.Length > "source.".Length)
|
||||
{
|
||||
yield return clientName["source.".Length..];
|
||||
}
|
||||
|
||||
var noDots = clientName.Replace('.', '_');
|
||||
if (!string.Equals(noDots, clientName, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
yield return noDots;
|
||||
}
|
||||
}
|
||||
|
||||
private static IConfigurationSection? GetSection(IConfiguration configuration, params string[] pathSegments)
|
||||
{
|
||||
IConfiguration? current = configuration;
|
||||
foreach (var segment in pathSegments)
|
||||
{
|
||||
if (current is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
current = current.GetSection(segment);
|
||||
}
|
||||
|
||||
return current as IConfigurationSection;
|
||||
}
|
||||
|
||||
private static void ApplySection(
|
||||
IConfigurationSection section,
|
||||
IConfiguration rootConfiguration,
|
||||
IHostEnvironment? hostEnvironment,
|
||||
string clientName,
|
||||
SourceHttpClientOptions options,
|
||||
ILogger? logger)
|
||||
{
|
||||
var allowInvalid = section.GetValue<bool?>(AllowInvalidKey);
|
||||
if (allowInvalid == true)
|
||||
{
|
||||
options.AllowInvalidServerCertificates = true;
|
||||
var previous = options.ServerCertificateCustomValidation;
|
||||
options.ServerCertificateCustomValidation = (certificate, chain, errors) =>
|
||||
{
|
||||
if (allowInvalid == true)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return previous?.Invoke(certificate, chain, errors) ?? errors == SslPolicyErrors.None;
|
||||
};
|
||||
|
||||
logger?.LogWarning(
|
||||
"Source HTTP client '{ClientName}' is configured to bypass TLS certificate validation.",
|
||||
clientName);
|
||||
}
|
||||
|
||||
var offlineRoot = section.GetValue<string?>(OfflineRootKey)
|
||||
?? rootConfiguration.GetSection(ConcelierSection).GetValue<string?>(OfflineRootKey)
|
||||
?? Environment.GetEnvironmentVariable(OfflineRootEnvironmentVariable);
|
||||
|
||||
ApplyTrustedRoots(section, offlineRoot, hostEnvironment, clientName, options, logger);
|
||||
ApplyProxyConfiguration(section, clientName, options, logger);
|
||||
}
|
||||
|
||||
private static void ApplyTrustedRoots(
|
||||
IConfigurationSection section,
|
||||
string? offlineRoot,
|
||||
IHostEnvironment? hostEnvironment,
|
||||
string clientName,
|
||||
SourceHttpClientOptions options,
|
||||
ILogger? logger)
|
||||
{
|
||||
var trustedRootSection = section.GetSection(TrustedRootPathsKey);
|
||||
if (!trustedRootSection.Exists())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var paths = trustedRootSection.Get<string[]?>();
|
||||
if (paths is null || paths.Length == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var rawPath in paths)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(rawPath))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var resolvedPath = ResolvePath(rawPath, offlineRoot, hostEnvironment);
|
||||
if (!File.Exists(resolvedPath))
|
||||
{
|
||||
var message = string.Format(
|
||||
CultureInfo.InvariantCulture,
|
||||
"Trusted root certificate '{0}' resolved to '{1}' but was not found.",
|
||||
rawPath,
|
||||
resolvedPath);
|
||||
throw new FileNotFoundException(message, resolvedPath);
|
||||
}
|
||||
|
||||
foreach (var certificate in LoadCertificates(resolvedPath))
|
||||
{
|
||||
try
|
||||
{
|
||||
AddTrustedCertificate(options, certificate);
|
||||
logger?.LogInformation(
|
||||
"Source HTTP client '{ClientName}' loaded trusted root certificate '{Thumbprint}' from '{Path}'.",
|
||||
clientName,
|
||||
certificate.Thumbprint,
|
||||
resolvedPath);
|
||||
}
|
||||
finally
|
||||
{
|
||||
certificate.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void ApplyProxyConfiguration(
|
||||
IConfigurationSection section,
|
||||
string clientName,
|
||||
SourceHttpClientOptions options,
|
||||
ILogger? logger)
|
||||
{
|
||||
var proxySection = section.GetSection(ProxySection);
|
||||
if (!proxySection.Exists())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var address = proxySection.GetValue<string?>(ProxyAddressKey);
|
||||
if (!string.IsNullOrWhiteSpace(address))
|
||||
{
|
||||
if (Uri.TryCreate(address, UriKind.Absolute, out var uri))
|
||||
{
|
||||
options.ProxyAddress = uri;
|
||||
}
|
||||
else
|
||||
{
|
||||
logger?.LogWarning(
|
||||
"Source HTTP client '{ClientName}' has invalid proxy address '{ProxyAddress}'.",
|
||||
clientName,
|
||||
address);
|
||||
}
|
||||
}
|
||||
|
||||
var bypassOnLocal = proxySection.GetValue<bool?>(ProxyBypassOnLocalKey);
|
||||
if (bypassOnLocal.HasValue)
|
||||
{
|
||||
options.ProxyBypassOnLocal = bypassOnLocal.Value;
|
||||
}
|
||||
|
||||
var bypassListSection = proxySection.GetSection(ProxyBypassListKey);
|
||||
if (bypassListSection.Exists())
|
||||
{
|
||||
var entries = bypassListSection.Get<string[]?>();
|
||||
options.ProxyBypassList.Clear();
|
||||
if (entries is not null)
|
||||
{
|
||||
foreach (var entry in entries)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(entry))
|
||||
{
|
||||
options.ProxyBypassList.Add(entry.Trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var useDefaultCredentials = proxySection.GetValue<bool?>(ProxyUseDefaultCredentialsKey);
|
||||
if (useDefaultCredentials.HasValue)
|
||||
{
|
||||
options.ProxyUseDefaultCredentials = useDefaultCredentials.Value;
|
||||
}
|
||||
|
||||
var username = proxySection.GetValue<string?>(ProxyUsernameKey);
|
||||
if (!string.IsNullOrWhiteSpace(username))
|
||||
{
|
||||
options.ProxyUsername = username.Trim();
|
||||
}
|
||||
|
||||
var password = proxySection.GetValue<string?>(ProxyPasswordKey);
|
||||
if (!string.IsNullOrWhiteSpace(password))
|
||||
{
|
||||
options.ProxyPassword = password;
|
||||
}
|
||||
}
|
||||
|
||||
private static string ResolvePath(string path, string? offlineRoot, IHostEnvironment? hostEnvironment)
|
||||
{
|
||||
if (Path.IsPathRooted(path))
|
||||
{
|
||||
return path;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(offlineRoot))
|
||||
{
|
||||
return Path.GetFullPath(Path.Combine(offlineRoot!, path));
|
||||
}
|
||||
|
||||
var baseDirectory = hostEnvironment?.ContentRootPath ?? AppContext.BaseDirectory;
|
||||
return Path.GetFullPath(Path.Combine(baseDirectory, path));
|
||||
}
|
||||
|
||||
private static IEnumerable<X509Certificate2> LoadCertificates(string path)
|
||||
{
|
||||
var certificates = new List<X509Certificate2>();
|
||||
var extension = Path.GetExtension(path);
|
||||
|
||||
if (extension.Equals(".pem", StringComparison.OrdinalIgnoreCase) || extension.Equals(".crt", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var collection = new X509Certificate2Collection();
|
||||
try
|
||||
{
|
||||
collection.ImportFromPemFile(path);
|
||||
}
|
||||
catch (CryptographicException)
|
||||
{
|
||||
collection.Clear();
|
||||
}
|
||||
|
||||
if (collection.Count > 0)
|
||||
{
|
||||
foreach (var certificate in collection)
|
||||
{
|
||||
certificates.Add(certificate.CopyWithPrivateKeyIfAvailable());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
certificates.Add(X509Certificate2.CreateFromPemFile(path));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Use X509CertificateLoader to load certificates from PKCS#12 files (.pfx, .p12, etc.)
|
||||
var certificate = System.Security.Cryptography.X509Certificates.X509CertificateLoader.LoadPkcs12(
|
||||
File.ReadAllBytes(path),
|
||||
password: null);
|
||||
certificates.Add(certificate);
|
||||
}
|
||||
|
||||
return certificates;
|
||||
}
|
||||
|
||||
private static void AddTrustedCertificate(SourceHttpClientOptions options, X509Certificate2 certificate)
|
||||
{
|
||||
if (certificate is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (options.TrustedRootCertificates.Any(existing =>
|
||||
string.Equals(existing.Thumbprint, certificate.Thumbprint, StringComparison.OrdinalIgnoreCase)))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
options.TrustedRootCertificates.Add(certificate);
|
||||
}
|
||||
|
||||
// Helper extension method to copy certificate (preserves private key if present)
|
||||
private static X509Certificate2 CopyWithPrivateKeyIfAvailable(this X509Certificate2 certificate)
|
||||
{
|
||||
// In .NET 9+, use X509CertificateLoader instead of obsolete constructors
|
||||
if (certificate.HasPrivateKey)
|
||||
{
|
||||
// Export with private key and re-import using X509CertificateLoader
|
||||
var exported = certificate.Export(X509ContentType.Pkcs12);
|
||||
return X509CertificateLoader.LoadPkcs12(exported, password: null);
|
||||
}
|
||||
else
|
||||
{
|
||||
// For certificates without private keys, load from raw data
|
||||
return X509CertificateLoader.LoadCertificate(certificate.RawData);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,170 @@
|
||||
using System.Collections.ObjectModel;
|
||||
using System.Net;
|
||||
using System.Net.Http;
|
||||
using System.Net.Security;
|
||||
using System.Security.Cryptography.X509Certificates;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Http;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration applied to named HTTP clients used by connectors.
|
||||
/// </summary>
|
||||
public sealed class SourceHttpClientOptions
|
||||
{
|
||||
private readonly HashSet<string> _allowedHosts = new(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly Dictionary<string, string> _defaultHeaders = new(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the base address used for relative requests.
|
||||
/// </summary>
|
||||
public Uri? BaseAddress { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the client timeout.
|
||||
/// </summary>
|
||||
public TimeSpan Timeout { get; set; } = TimeSpan.FromSeconds(30);
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the user-agent string applied to outgoing requests.
|
||||
/// </summary>
|
||||
public string UserAgent { get; set; } = "StellaOps.Concelier/1.0";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether redirects are allowed. Defaults to <c>true</c>.
|
||||
/// </summary>
|
||||
public bool AllowAutoRedirect { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum number of retry attempts for transient failures.
|
||||
/// </summary>
|
||||
public int MaxAttempts { get; set; } = 3;
|
||||
|
||||
/// <summary>
|
||||
/// Base delay applied to the exponential backoff policy.
|
||||
/// </summary>
|
||||
public TimeSpan BaseDelay { get; set; } = TimeSpan.FromSeconds(2);
|
||||
|
||||
/// <summary>
|
||||
/// Hosts that this client is allowed to contact.
|
||||
/// </summary>
|
||||
public ISet<string> AllowedHosts => _allowedHosts;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the default HTTP version requested by the client. Defaults to HTTP/2.
|
||||
/// </summary>
|
||||
public Version RequestVersion { get; set; } = HttpVersion.Version20;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the policy that determines how HTTP version negotiation occurs. Defaults to <see cref="HttpVersionPolicy.RequestVersionOrLower"/>.
|
||||
/// </summary>
|
||||
public HttpVersionPolicy VersionPolicy { get; set; } = HttpVersionPolicy.RequestVersionOrLower;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets a value indicating whether multiple HTTP/2 connections may be established to the same endpoint.
|
||||
/// </summary>
|
||||
public bool EnableMultipleHttp2Connections { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Optional callback to customise the underlying <see cref="SocketsHttpHandler"/>.
|
||||
/// </summary>
|
||||
public Action<SocketsHttpHandler>? ConfigureHandler { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional proxy address used for outbound requests.
|
||||
/// </summary>
|
||||
public Uri? ProxyAddress { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Indicates whether the proxy should be bypassed for local addresses. Defaults to <c>true</c>.
|
||||
/// </summary>
|
||||
public bool ProxyBypassOnLocal { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Optional explicit bypass list applied to the proxy.
|
||||
/// </summary>
|
||||
public IList<string> ProxyBypassList { get; } = new List<string>();
|
||||
|
||||
/// <summary>
|
||||
/// Indicates whether the default credentials should be used for the proxy.
|
||||
/// </summary>
|
||||
public bool ProxyUseDefaultCredentials { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional proxy username.
|
||||
/// </summary>
|
||||
public string? ProxyUsername { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional proxy password.
|
||||
/// </summary>
|
||||
public string? ProxyPassword { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets a value indicating whether server certificate validation should be bypassed.
|
||||
/// </summary>
|
||||
public bool AllowInvalidServerCertificates { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Additional trusted root certificates appended to the default trust store when negotiating TLS.
|
||||
/// </summary>
|
||||
public IList<X509Certificate2> TrustedRootCertificates { get; } = new List<X509Certificate2>();
|
||||
|
||||
/// <summary>
|
||||
/// Optional callback invoked to validate remote certificates when <see cref="TrustedRootCertificates"/> is insufficient.
|
||||
/// </summary>
|
||||
public Func<X509Certificate2?, X509Chain?, SslPolicyErrors, bool>? ServerCertificateCustomValidation { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Default request headers appended to each outgoing request.
|
||||
/// </summary>
|
||||
public IDictionary<string, string> DefaultRequestHeaders => _defaultHeaders;
|
||||
|
||||
internal SourceHttpClientOptions Clone()
|
||||
{
|
||||
var clone = new SourceHttpClientOptions
|
||||
{
|
||||
BaseAddress = BaseAddress,
|
||||
Timeout = Timeout,
|
||||
UserAgent = UserAgent,
|
||||
AllowAutoRedirect = AllowAutoRedirect,
|
||||
MaxAttempts = MaxAttempts,
|
||||
BaseDelay = BaseDelay,
|
||||
RequestVersion = RequestVersion,
|
||||
VersionPolicy = VersionPolicy,
|
||||
EnableMultipleHttp2Connections = EnableMultipleHttp2Connections,
|
||||
ConfigureHandler = ConfigureHandler,
|
||||
AllowInvalidServerCertificates = AllowInvalidServerCertificates,
|
||||
ServerCertificateCustomValidation = ServerCertificateCustomValidation,
|
||||
ProxyAddress = ProxyAddress,
|
||||
ProxyBypassOnLocal = ProxyBypassOnLocal,
|
||||
ProxyUseDefaultCredentials = ProxyUseDefaultCredentials,
|
||||
ProxyUsername = ProxyUsername,
|
||||
ProxyPassword = ProxyPassword,
|
||||
};
|
||||
|
||||
foreach (var host in _allowedHosts)
|
||||
{
|
||||
clone.AllowedHosts.Add(host);
|
||||
}
|
||||
|
||||
foreach (var header in _defaultHeaders)
|
||||
{
|
||||
clone.DefaultRequestHeaders[header.Key] = header.Value;
|
||||
}
|
||||
|
||||
foreach (var certificate in TrustedRootCertificates)
|
||||
{
|
||||
clone.TrustedRootCertificates.Add(certificate);
|
||||
}
|
||||
|
||||
foreach (var entry in ProxyBypassList)
|
||||
{
|
||||
clone.ProxyBypassList.Add(entry);
|
||||
}
|
||||
|
||||
return clone;
|
||||
}
|
||||
|
||||
internal IReadOnlyCollection<string> GetAllowedHostsSnapshot()
|
||||
=> new ReadOnlyCollection<string>(_allowedHosts.ToArray());
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
using System.Text.Json;
|
||||
using Json.Schema;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Json;
|
||||
|
||||
public interface IJsonSchemaValidator
|
||||
{
|
||||
void Validate(JsonDocument document, JsonSchema schema, string documentName);
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
namespace StellaOps.Concelier.Connector.Common.Json;
|
||||
|
||||
public sealed record JsonSchemaValidationError(
|
||||
string InstanceLocation,
|
||||
string SchemaLocation,
|
||||
string Message,
|
||||
string Keyword);
|
||||
@@ -0,0 +1,15 @@
|
||||
namespace StellaOps.Concelier.Connector.Common.Json;
|
||||
|
||||
public sealed class JsonSchemaValidationException : Exception
|
||||
{
|
||||
public JsonSchemaValidationException(string documentName, IReadOnlyList<JsonSchemaValidationError> errors)
|
||||
: base($"JSON schema validation failed for '{documentName}'.")
|
||||
{
|
||||
DocumentName = documentName;
|
||||
Errors = errors ?? Array.Empty<JsonSchemaValidationError>();
|
||||
}
|
||||
|
||||
public string DocumentName { get; }
|
||||
|
||||
public IReadOnlyList<JsonSchemaValidationError> Errors { get; }
|
||||
}
|
||||
@@ -0,0 +1,92 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text.Json;
|
||||
using Json.Schema;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Json;
|
||||
public sealed class JsonSchemaValidator : IJsonSchemaValidator
|
||||
{
|
||||
private readonly ILogger<JsonSchemaValidator> _logger;
|
||||
private const int MaxLoggedErrors = 5;
|
||||
|
||||
public JsonSchemaValidator(ILogger<JsonSchemaValidator> logger)
|
||||
{
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public void Validate(JsonDocument document, JsonSchema schema, string documentName)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(document);
|
||||
ArgumentNullException.ThrowIfNull(schema);
|
||||
ArgumentException.ThrowIfNullOrEmpty(documentName);
|
||||
|
||||
var result = schema.Evaluate(document.RootElement, new EvaluationOptions
|
||||
{
|
||||
OutputFormat = OutputFormat.List,
|
||||
RequireFormatValidation = true,
|
||||
});
|
||||
|
||||
if (result.IsValid)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var errors = CollectErrors(result);
|
||||
|
||||
if (errors.Count == 0)
|
||||
{
|
||||
_logger.LogWarning("Schema validation failed for {Document} with unknown errors", documentName);
|
||||
throw new JsonSchemaValidationException(documentName, errors);
|
||||
}
|
||||
|
||||
foreach (var violation in errors.Take(MaxLoggedErrors))
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Schema violation for {Document} at {InstanceLocation} (keyword: {Keyword}): {Message}",
|
||||
documentName,
|
||||
string.IsNullOrEmpty(violation.InstanceLocation) ? "#" : violation.InstanceLocation,
|
||||
violation.Keyword,
|
||||
violation.Message);
|
||||
}
|
||||
|
||||
if (errors.Count > MaxLoggedErrors)
|
||||
{
|
||||
_logger.LogWarning("{Count} additional schema violations for {Document} suppressed", errors.Count - MaxLoggedErrors, documentName);
|
||||
}
|
||||
|
||||
throw new JsonSchemaValidationException(documentName, errors);
|
||||
}
|
||||
|
||||
private static IReadOnlyList<JsonSchemaValidationError> CollectErrors(EvaluationResults result)
|
||||
{
|
||||
var errors = new List<JsonSchemaValidationError>();
|
||||
Aggregate(result, errors);
|
||||
return errors;
|
||||
}
|
||||
|
||||
private static void Aggregate(EvaluationResults node, List<JsonSchemaValidationError> errors)
|
||||
{
|
||||
if (node.Errors is { Count: > 0 })
|
||||
{
|
||||
foreach (var kvp in node.Errors)
|
||||
{
|
||||
errors.Add(new JsonSchemaValidationError(
|
||||
node.InstanceLocation?.ToString() ?? string.Empty,
|
||||
node.SchemaLocation?.ToString() ?? string.Empty,
|
||||
kvp.Value,
|
||||
kvp.Key));
|
||||
}
|
||||
}
|
||||
|
||||
if (node.Details is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var child in node.Details)
|
||||
{
|
||||
Aggregate(child, errors);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,197 @@
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using NuGet.Versioning;
|
||||
using StellaOps.Concelier.Normalization.Identifiers;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Packages;
|
||||
|
||||
/// <summary>
|
||||
/// Shared helpers for working with Package URLs and SemVer coordinates inside connectors.
|
||||
/// </summary>
|
||||
public static class PackageCoordinateHelper
|
||||
{
|
||||
public static bool TryParsePackageUrl(string? value, out PackageCoordinates? coordinates)
|
||||
{
|
||||
coordinates = null;
|
||||
if (!IdentifierNormalizer.TryNormalizePackageUrl(value, out var canonical, out var packageUrl) || packageUrl is null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var namespaceSegments = packageUrl.NamespaceSegments.ToArray();
|
||||
var subpathSegments = packageUrl.SubpathSegments.ToArray();
|
||||
var qualifiers = packageUrl.Qualifiers.ToDictionary(kvp => kvp.Key, kvp => kvp.Value, StringComparer.OrdinalIgnoreCase);
|
||||
var canonicalRebuilt = BuildPackageUrl(
|
||||
packageUrl.Type,
|
||||
namespaceSegments,
|
||||
packageUrl.Name,
|
||||
packageUrl.Version,
|
||||
qualifiers,
|
||||
subpathSegments);
|
||||
|
||||
coordinates = new PackageCoordinates(
|
||||
Canonical: canonicalRebuilt,
|
||||
Type: packageUrl.Type,
|
||||
NamespaceSegments: namespaceSegments,
|
||||
Name: packageUrl.Name,
|
||||
Version: packageUrl.Version,
|
||||
Qualifiers: qualifiers,
|
||||
SubpathSegments: subpathSegments,
|
||||
Original: packageUrl.Original);
|
||||
return true;
|
||||
}
|
||||
|
||||
public static PackageCoordinates ParsePackageUrl(string value)
|
||||
{
|
||||
if (!TryParsePackageUrl(value, out var coordinates) || coordinates is null)
|
||||
{
|
||||
throw new FormatException($"Value '{value}' is not a valid Package URL");
|
||||
}
|
||||
|
||||
return coordinates;
|
||||
}
|
||||
|
||||
public static bool TryParseSemVer(string? value, out SemanticVersion? version, out string? normalized)
|
||||
{
|
||||
version = null;
|
||||
normalized = null;
|
||||
|
||||
if (string.IsNullOrWhiteSpace(value))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!SemanticVersion.TryParse(value.Trim(), out var parsed))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
version = parsed;
|
||||
normalized = parsed.ToNormalizedString();
|
||||
return true;
|
||||
}
|
||||
|
||||
public static bool TryParseSemVerRange(string? value, out VersionRange? range)
|
||||
{
|
||||
range = null;
|
||||
if (string.IsNullOrWhiteSpace(value))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var trimmed = value.Trim();
|
||||
if (trimmed.StartsWith("^", StringComparison.Ordinal))
|
||||
{
|
||||
var baseSegment = trimmed[1..];
|
||||
if (!SemanticVersion.TryParse(baseSegment, out var baseVersion))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var upperBound = CalculateCaretUpperBound(baseVersion);
|
||||
var caretExpression = $"[{baseVersion.ToNormalizedString()}, {upperBound.ToNormalizedString()})";
|
||||
if (VersionRange.TryParse(caretExpression, out var caretRange))
|
||||
{
|
||||
range = caretRange;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!VersionRange.TryParse(trimmed, out var parsed))
|
||||
{
|
||||
try
|
||||
{
|
||||
parsed = VersionRange.Parse(trimmed);
|
||||
}
|
||||
catch
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
range = parsed;
|
||||
return true;
|
||||
}
|
||||
|
||||
public static string BuildPackageUrl(
|
||||
string type,
|
||||
IReadOnlyList<string>? namespaceSegments,
|
||||
string name,
|
||||
string? version = null,
|
||||
IReadOnlyDictionary<string, string>? qualifiers = null,
|
||||
IReadOnlyList<string>? subpathSegments = null)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrEmpty(type);
|
||||
ArgumentException.ThrowIfNullOrEmpty(name);
|
||||
|
||||
var builder = new StringBuilder("pkg:");
|
||||
builder.Append(type.Trim().ToLowerInvariant());
|
||||
builder.Append('/');
|
||||
|
||||
if (namespaceSegments is not null && namespaceSegments.Count > 0)
|
||||
{
|
||||
builder.Append(string.Join('/', namespaceSegments.Select(NormalizeSegment)));
|
||||
builder.Append('/');
|
||||
}
|
||||
|
||||
builder.Append(NormalizeSegment(name));
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(version))
|
||||
{
|
||||
builder.Append('@');
|
||||
builder.Append(version.Trim());
|
||||
}
|
||||
|
||||
if (qualifiers is not null && qualifiers.Count > 0)
|
||||
{
|
||||
builder.Append('?');
|
||||
builder.Append(string.Join('&', qualifiers
|
||||
.OrderBy(static kvp => kvp.Key, StringComparer.OrdinalIgnoreCase)
|
||||
.Select(kvp => $"{NormalizeSegment(kvp.Key)}={NormalizeSegment(kvp.Value)}")));
|
||||
}
|
||||
|
||||
if (subpathSegments is not null && subpathSegments.Count > 0)
|
||||
{
|
||||
builder.Append('#');
|
||||
builder.Append(string.Join('/', subpathSegments.Select(NormalizeSegment)));
|
||||
}
|
||||
|
||||
return builder.ToString();
|
||||
}
|
||||
|
||||
private static string NormalizeSegment(string value)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(value);
|
||||
var trimmed = value.Trim();
|
||||
var unescaped = Uri.UnescapeDataString(trimmed);
|
||||
var encoded = Uri.EscapeDataString(unescaped);
|
||||
return encoded.Replace("%40", "@");
|
||||
}
|
||||
|
||||
private static SemanticVersion CalculateCaretUpperBound(SemanticVersion baseVersion)
|
||||
{
|
||||
if (baseVersion.Major > 0)
|
||||
{
|
||||
return new SemanticVersion(baseVersion.Major + 1, 0, 0);
|
||||
}
|
||||
|
||||
if (baseVersion.Minor > 0)
|
||||
{
|
||||
return new SemanticVersion(0, baseVersion.Minor + 1, 0);
|
||||
}
|
||||
|
||||
return new SemanticVersion(0, 0, baseVersion.Patch + 1);
|
||||
}
|
||||
}
|
||||
|
||||
public sealed record PackageCoordinates(
|
||||
string Canonical,
|
||||
string Type,
|
||||
IReadOnlyList<string> NamespaceSegments,
|
||||
string Name,
|
||||
string? Version,
|
||||
IReadOnlyDictionary<string, string> Qualifiers,
|
||||
IReadOnlyList<string> SubpathSegments,
|
||||
string Original);
|
||||
184
src/StellaOps.Concelier.Connector.Common/Pdf/PdfTextExtractor.cs
Normal file
184
src/StellaOps.Concelier.Connector.Common/Pdf/PdfTextExtractor.cs
Normal file
@@ -0,0 +1,184 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Text;
|
||||
using UglyToad.PdfPig;
|
||||
using UglyToad.PdfPig.Content;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Pdf;
|
||||
|
||||
/// <summary>
|
||||
/// Extracts text from PDF advisories using UglyToad.PdfPig without requiring native dependencies.
|
||||
/// </summary>
|
||||
public sealed class PdfTextExtractor
|
||||
{
|
||||
public async Task<PdfExtractionResult> ExtractTextAsync(Stream pdfStream, PdfExtractionOptions? options = null, CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(pdfStream);
|
||||
options ??= PdfExtractionOptions.Default;
|
||||
|
||||
using var buffer = new MemoryStream();
|
||||
await pdfStream.CopyToAsync(buffer, cancellationToken).ConfigureAwait(false);
|
||||
var rawBytes = buffer.ToArray();
|
||||
buffer.Position = 0;
|
||||
|
||||
using var document = PdfDocument.Open(buffer, new ParsingOptions
|
||||
{
|
||||
ClipPaths = true,
|
||||
UseLenientParsing = true,
|
||||
});
|
||||
|
||||
var builder = new StringBuilder();
|
||||
var pageCount = 0;
|
||||
|
||||
var totalPages = document.NumberOfPages;
|
||||
for (var index = 1; index <= totalPages; index++)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
Page page;
|
||||
try
|
||||
{
|
||||
page = document.GetPage(index);
|
||||
}
|
||||
catch (InvalidOperationException ex) when (ex.Message.Contains("empty stack", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
pageCount++;
|
||||
if (options.MaxPages.HasValue && pageCount > options.MaxPages.Value)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (pageCount > 1 && options.PageSeparator is not null)
|
||||
{
|
||||
builder.Append(options.PageSeparator);
|
||||
}
|
||||
|
||||
string text;
|
||||
try
|
||||
{
|
||||
if (options.PreserveLayout)
|
||||
{
|
||||
text = page.Text;
|
||||
}
|
||||
else
|
||||
{
|
||||
text = FlattenWords(page.GetWords());
|
||||
}
|
||||
}
|
||||
catch (InvalidOperationException ex) when (ex.Message.Contains("empty stack", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
try
|
||||
{
|
||||
text = FlattenWords(page.GetWords());
|
||||
}
|
||||
catch
|
||||
{
|
||||
try
|
||||
{
|
||||
text = FlattenLetters(page.Letters);
|
||||
}
|
||||
catch
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(text))
|
||||
{
|
||||
builder.AppendLine(text.Trim());
|
||||
}
|
||||
}
|
||||
|
||||
if (builder.Length == 0)
|
||||
{
|
||||
var raw = Encoding.ASCII.GetString(rawBytes);
|
||||
var matches = Regex.Matches(raw, "\\(([^\\)]+)\\)", RegexOptions.CultureInvariant);
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
var value = match.Groups[1].Value;
|
||||
if (!string.IsNullOrWhiteSpace(value))
|
||||
{
|
||||
builder.AppendLine(value.Trim());
|
||||
}
|
||||
}
|
||||
|
||||
if (builder.Length > 0 && matches.Count > 0)
|
||||
{
|
||||
pageCount = Math.Max(pageCount, matches.Count);
|
||||
}
|
||||
}
|
||||
else if (builder.Length > 0 && pageCount == 0)
|
||||
{
|
||||
pageCount = 1;
|
||||
}
|
||||
|
||||
return new PdfExtractionResult(builder.ToString().Trim(), pageCount);
|
||||
}
|
||||
|
||||
private static string FlattenWords(IEnumerable<Word> words)
|
||||
{
|
||||
var builder = new StringBuilder();
|
||||
var first = true;
|
||||
foreach (var word in words)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(word.Text))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!first)
|
||||
{
|
||||
builder.Append(' ');
|
||||
}
|
||||
|
||||
builder.Append(word.Text.Trim());
|
||||
first = false;
|
||||
}
|
||||
|
||||
return builder.ToString();
|
||||
}
|
||||
|
||||
private static string FlattenLetters(IEnumerable<Letter> letters)
|
||||
{
|
||||
var builder = new StringBuilder();
|
||||
foreach (var letter in letters)
|
||||
{
|
||||
if (letter.Value is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
builder.Append(letter.Value);
|
||||
}
|
||||
|
||||
return builder.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
public sealed record PdfExtractionResult(string Text, int PagesProcessed);
|
||||
|
||||
public sealed record PdfExtractionOptions
|
||||
{
|
||||
public static PdfExtractionOptions Default { get; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Maximum number of pages to read. Null reads the entire document.
|
||||
/// </summary>
|
||||
public int? MaxPages { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When true, uses PdfPig's native layout text. When false, collapses to a single line per page.
|
||||
/// </summary>
|
||||
public bool PreserveLayout { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Separator inserted between pages. Null disables separators.
|
||||
/// </summary>
|
||||
public string? PageSeparator { get; init; } = "\n\n";
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
[assembly: InternalsVisibleTo("StellaOps.Concelier.Connector.Common.Tests")]
|
||||
@@ -0,0 +1,21 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="JsonSchema.Net" Version="5.3.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.5" />
|
||||
<PackageReference Include="MongoDB.Driver.GridFS" Version="2.22.0" />
|
||||
<PackageReference Include="MongoDB.Driver" Version="2.22.0" />
|
||||
<PackageReference Include="AngleSharp" Version="1.1.1" />
|
||||
<PackageReference Include="UglyToad.PdfPig" Version="1.7.0-custom-5" />
|
||||
<PackageReference Include="NuGet.Versioning" Version="6.9.1" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\StellaOps.Concelier.Storage.Mongo\StellaOps.Concelier.Storage.Mongo.csproj" />
|
||||
<ProjectReference Include="..\StellaOps.Concelier.Normalization\StellaOps.Concelier.Normalization.csproj" />
|
||||
<ProjectReference Include="../StellaOps.Plugin/StellaOps.Plugin.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
19
src/StellaOps.Concelier.Connector.Common/TASKS.md
Normal file
19
src/StellaOps.Concelier.Connector.Common/TASKS.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# TASKS
|
||||
| Task | Owner(s) | Depends on | Notes |
|
||||
|---|---|---|---|
|
||||
|Register source HTTP clients with allowlists and timeouts|BE-Conn-Shared|Source.Common|**DONE** – `AddSourceHttpClient` wires named clients with host allowlists/timeouts.|
|
||||
|Implement retry/backoff with jitter and 429 handling|BE-Conn-Shared|Source.Common|**DONE** – `SourceRetryPolicy` retries with 429/5xx handling and exponential backoff.|
|
||||
|Conditional GET helpers (ETag/Last-Modified)|BE-Conn-Shared|Source.Common|**DONE** – `SourceFetchRequest` + fetch result propagate etag/last-modified for NotModified handling.|
|
||||
|Windowed cursor and pagination utilities|BE-Conn-Shared|Source.Common|**DONE** – `TimeWindowCursorPlanner` + `PaginationPlanner` centralize sliding windows and additional page indices.|
|
||||
|JSON/XML schema validators with rich errors|BE-Conn-Shared, QA|Source.Common|DONE – JsonSchemaValidator surfaces keyword/path/message details + tests.|
|
||||
|Raw document capture helper|BE-Conn-Shared|Storage.Mongo|**DONE** – `SourceFetchService` stores raw payload + headers with sha256 metadata.|
|
||||
|Canned HTTP test harness|QA|Source.Common|DONE – enriched `CannedHttpMessageHandler` with method-aware queues, request capture, fallbacks, and helpers + unit coverage.|
|
||||
|HTML sanitization and URL normalization utilities|BE-Conn-Shared|Source.Common|DONE – `HtmlContentSanitizer` + `UrlNormalizer` provide safe fragments and canonical links for connectors.|
|
||||
|PDF-to-text sandbox helper|BE-Conn-Shared|Source.Common|DONE – `PdfTextExtractor` uses PdfPig to yield deterministic text with options + tests.|
|
||||
|PURL and SemVer helper library|BE-Conn-Shared|Models|DONE – `PackageCoordinateHelper` exposes normalized purl + SemVer parsing utilities backed by normalization.|
|
||||
|Telemetry wiring (logs/metrics/traces)|BE-Conn-Shared|Observability|DONE – `SourceDiagnostics` emits Activity/Meter signals integrated into fetch pipeline and WebService OTEL setup.|
|
||||
|Shared jitter source in retry policy|BE-Conn-Shared|Source.Common|**DONE** – `SourceRetryPolicy` now consumes injected `CryptoJitterSource` for thread-safe jitter.|
|
||||
|Allow per-request Accept header overrides|BE-Conn-Shared|Source.Common|**DONE** – `SourceFetchRequest.AcceptHeaders` honored by `SourceFetchService` plus unit tests for overrides.|
|
||||
|FEEDCONN-SHARED-HTTP2-001 HTTP version fallback policy|BE-Conn-Shared, Source.Common|Source.Common|**DONE (2025-10-11)** – `AddSourceHttpClient` now honours per-connector HTTP version/ policy, exposes handler customisation, and defaults to downgrade-friendly settings; unit tests cover handler configuration hook.|
|
||||
|FEEDCONN-SHARED-TLS-001 Sovereign trust store support|BE-Conn-Shared, Ops|Source.Common|**DONE (2025-10-11)** – `SourceHttpClientOptions` now exposes `TrustedRootCertificates`, `ServerCertificateCustomValidation`, and `AllowInvalidServerCertificates`, and `AddSourceHttpClient` runs the shared configuration binder so connectors can pull `concelier:httpClients|sources:<name>:http` settings (incl. Offline Kit relative PEM paths via `concelier:offline:root`). Tests cover handler wiring. Ops follow-up: package RU trust roots for Offline Kit distribution.|
|
||||
|FEEDCONN-SHARED-STATE-003 Source state seeding helper|Tools Guild, BE-Conn-MSRC|Tools|**TODO (2025-10-15)** – Provide a reusable CLI/utility to seed `pendingDocuments`/`pendingMappings` for connectors (MSRC backfills require scripted CVRF + detail injection). Coordinate with MSRC team for expected JSON schema and handoff once prototype lands.|
|
||||
@@ -0,0 +1,107 @@
|
||||
using System.Diagnostics;
|
||||
using System.Diagnostics.Metrics;
|
||||
using System.Net;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Telemetry;
|
||||
|
||||
/// <summary>
|
||||
/// Central telemetry instrumentation for connector HTTP operations.
|
||||
/// </summary>
|
||||
public static class SourceDiagnostics
|
||||
{
|
||||
public const string ActivitySourceName = "StellaOps.Concelier.Connector";
|
||||
public const string MeterName = "StellaOps.Concelier.Connector";
|
||||
|
||||
private static readonly ActivitySource ActivitySource = new(ActivitySourceName);
|
||||
private static readonly Meter Meter = new(MeterName);
|
||||
|
||||
private static readonly Counter<long> HttpRequestCounter = Meter.CreateCounter<long>("concelier.source.http.requests");
|
||||
private static readonly Counter<long> HttpRetryCounter = Meter.CreateCounter<long>("concelier.source.http.retries");
|
||||
private static readonly Counter<long> HttpFailureCounter = Meter.CreateCounter<long>("concelier.source.http.failures");
|
||||
private static readonly Counter<long> HttpNotModifiedCounter = Meter.CreateCounter<long>("concelier.source.http.not_modified");
|
||||
private static readonly Histogram<double> HttpDuration = Meter.CreateHistogram<double>("concelier.source.http.duration", unit: "ms");
|
||||
private static readonly Histogram<long> HttpPayloadBytes = Meter.CreateHistogram<long>("concelier.source.http.payload_bytes", unit: "byte");
|
||||
|
||||
public static Activity? StartFetch(string sourceName, Uri requestUri, string httpMethod, string? clientName)
|
||||
{
|
||||
var tags = new ActivityTagsCollection
|
||||
{
|
||||
{ "concelier.source", sourceName },
|
||||
{ "http.method", httpMethod },
|
||||
{ "http.url", requestUri.ToString() },
|
||||
};
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(clientName))
|
||||
{
|
||||
tags.Add("http.client_name", clientName!);
|
||||
}
|
||||
|
||||
return ActivitySource.StartActivity("SourceFetch", ActivityKind.Client, parentContext: default, tags: tags);
|
||||
}
|
||||
|
||||
public static void RecordHttpRequest(string sourceName, string? clientName, HttpStatusCode statusCode, int attemptCount, TimeSpan duration, long? contentLength, string? rateLimitRemaining)
|
||||
{
|
||||
var tags = BuildDefaultTags(sourceName, clientName, statusCode, attemptCount);
|
||||
HttpRequestCounter.Add(1, tags);
|
||||
HttpDuration.Record(duration.TotalMilliseconds, tags);
|
||||
|
||||
if (contentLength.HasValue && contentLength.Value >= 0)
|
||||
{
|
||||
HttpPayloadBytes.Record(contentLength.Value, tags);
|
||||
}
|
||||
|
||||
if (statusCode == HttpStatusCode.NotModified)
|
||||
{
|
||||
HttpNotModifiedCounter.Add(1, tags);
|
||||
}
|
||||
|
||||
if ((int)statusCode >= 500 || statusCode == HttpStatusCode.TooManyRequests)
|
||||
{
|
||||
HttpFailureCounter.Add(1, tags);
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(rateLimitRemaining) && long.TryParse(rateLimitRemaining, out var remaining))
|
||||
{
|
||||
tags.Add("http.rate_limit.remaining", remaining);
|
||||
}
|
||||
}
|
||||
|
||||
public static void RecordRetry(string sourceName, string? clientName, HttpStatusCode? statusCode, int attempt, TimeSpan delay)
|
||||
{
|
||||
var tags = new TagList
|
||||
{
|
||||
{ "concelier.source", sourceName },
|
||||
{ "http.retry_attempt", attempt },
|
||||
{ "http.retry_delay_ms", delay.TotalMilliseconds },
|
||||
};
|
||||
|
||||
if (clientName is not null)
|
||||
{
|
||||
tags.Add("http.client_name", clientName);
|
||||
}
|
||||
|
||||
if (statusCode.HasValue)
|
||||
{
|
||||
tags.Add("http.status_code", (int)statusCode.Value);
|
||||
}
|
||||
|
||||
HttpRetryCounter.Add(1, tags);
|
||||
}
|
||||
|
||||
private static TagList BuildDefaultTags(string sourceName, string? clientName, HttpStatusCode statusCode, int attemptCount)
|
||||
{
|
||||
var tags = new TagList
|
||||
{
|
||||
{ "concelier.source", sourceName },
|
||||
{ "http.status_code", (int)statusCode },
|
||||
{ "http.attempts", attemptCount },
|
||||
};
|
||||
|
||||
if (clientName is not null)
|
||||
{
|
||||
tags.Add("http.client_name", clientName);
|
||||
}
|
||||
|
||||
return tags;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,210 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Net;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Testing;
|
||||
|
||||
/// <summary>
|
||||
/// Deterministic HTTP handler used by tests to supply canned responses keyed by request URI and method.
|
||||
/// Tracks requests for assertions and supports fallbacks/exceptions.
|
||||
/// </summary>
|
||||
public sealed class CannedHttpMessageHandler : HttpMessageHandler
|
||||
{
|
||||
private readonly ConcurrentDictionary<RequestKey, ConcurrentQueue<Func<HttpRequestMessage, HttpResponseMessage>>> _responses =
|
||||
new(RequestKeyComparer.Instance);
|
||||
|
||||
private readonly ConcurrentQueue<CannedRequestRecord> _requests = new();
|
||||
|
||||
private Func<HttpRequestMessage, HttpResponseMessage>? _fallback;
|
||||
|
||||
/// <summary>
|
||||
/// Recorded requests in arrival order.
|
||||
/// </summary>
|
||||
public IReadOnlyCollection<CannedRequestRecord> Requests => _requests.ToArray();
|
||||
|
||||
/// <summary>
|
||||
/// Registers a canned response for a GET request to <paramref name="requestUri"/>.
|
||||
/// </summary>
|
||||
public void AddResponse(Uri requestUri, Func<HttpResponseMessage> factory)
|
||||
=> AddResponse(HttpMethod.Get, requestUri, _ => factory());
|
||||
|
||||
/// <summary>
|
||||
/// Registers a canned response for the specified method and URI.
|
||||
/// </summary>
|
||||
public void AddResponse(HttpMethod method, Uri requestUri, Func<HttpResponseMessage> factory)
|
||||
=> AddResponse(method, requestUri, _ => factory());
|
||||
|
||||
/// <summary>
|
||||
/// Registers a canned response using the full request context.
|
||||
/// </summary>
|
||||
public void AddResponse(HttpMethod method, Uri requestUri, Func<HttpRequestMessage, HttpResponseMessage> factory)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(method);
|
||||
ArgumentNullException.ThrowIfNull(requestUri);
|
||||
ArgumentNullException.ThrowIfNull(factory);
|
||||
|
||||
var key = new RequestKey(method, requestUri);
|
||||
var queue = _responses.GetOrAdd(key, static _ => new ConcurrentQueue<Func<HttpRequestMessage, HttpResponseMessage>>());
|
||||
queue.Enqueue(factory);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Registers an exception to be thrown for the specified request.
|
||||
/// </summary>
|
||||
public void AddException(HttpMethod method, Uri requestUri, Exception exception)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(exception);
|
||||
AddResponse(method, requestUri, _ => throw exception);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Registers a fallback used when no specific response is queued for a request.
|
||||
/// </summary>
|
||||
public void SetFallback(Func<HttpRequestMessage, HttpResponseMessage> fallback)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(fallback);
|
||||
_fallback = fallback;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Clears registered responses and captured requests.
|
||||
/// </summary>
|
||||
public void Clear()
|
||||
{
|
||||
_responses.Clear();
|
||||
while (_requests.TryDequeue(out _))
|
||||
{
|
||||
}
|
||||
_fallback = null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Throws if any responses remain queued.
|
||||
/// </summary>
|
||||
public void AssertNoPendingResponses()
|
||||
{
|
||||
foreach (var queue in _responses.Values)
|
||||
{
|
||||
if (!queue.IsEmpty)
|
||||
{
|
||||
throw new InvalidOperationException("Not all canned responses were consumed.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates an <see cref="HttpClient"/> wired to this handler.
|
||||
/// </summary>
|
||||
public HttpClient CreateClient()
|
||||
=> new(this, disposeHandler: false)
|
||||
{
|
||||
Timeout = TimeSpan.FromSeconds(10),
|
||||
};
|
||||
|
||||
protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
|
||||
{
|
||||
if (request.RequestUri is null)
|
||||
{
|
||||
throw new InvalidOperationException("Request URI is required for canned responses.");
|
||||
}
|
||||
|
||||
var key = new RequestKey(request.Method ?? HttpMethod.Get, request.RequestUri);
|
||||
var factory = DequeueFactory(key);
|
||||
|
||||
if (factory is null)
|
||||
{
|
||||
if (_fallback is null)
|
||||
{
|
||||
throw new InvalidOperationException($"No canned response registered for {request.Method} {request.RequestUri}.");
|
||||
}
|
||||
|
||||
factory = _fallback;
|
||||
}
|
||||
|
||||
var snapshot = CaptureRequest(request);
|
||||
_requests.Enqueue(snapshot);
|
||||
|
||||
var response = factory(request);
|
||||
response.RequestMessage ??= request;
|
||||
return Task.FromResult(response);
|
||||
}
|
||||
|
||||
private Func<HttpRequestMessage, HttpResponseMessage>? DequeueFactory(RequestKey key)
|
||||
{
|
||||
if (_responses.TryGetValue(key, out var queue) && queue.TryDequeue(out var factory))
|
||||
{
|
||||
return factory;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static CannedRequestRecord CaptureRequest(HttpRequestMessage request)
|
||||
{
|
||||
var headers = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var header in request.Headers)
|
||||
{
|
||||
headers[header.Key] = string.Join(',', header.Value);
|
||||
}
|
||||
|
||||
if (request.Content is not null)
|
||||
{
|
||||
foreach (var header in request.Content.Headers)
|
||||
{
|
||||
headers[header.Key] = string.Join(',', header.Value);
|
||||
}
|
||||
}
|
||||
|
||||
return new CannedRequestRecord(
|
||||
Timestamp: DateTimeOffset.UtcNow,
|
||||
Method: request.Method ?? HttpMethod.Get,
|
||||
Uri: request.RequestUri!,
|
||||
Headers: headers);
|
||||
}
|
||||
|
||||
private readonly record struct RequestKey(HttpMethod Method, string Uri)
|
||||
{
|
||||
public RequestKey(HttpMethod method, Uri uri)
|
||||
: this(method, uri.ToString())
|
||||
{
|
||||
}
|
||||
|
||||
public bool Equals(RequestKey other)
|
||||
=> string.Equals(Method.Method, other.Method.Method, StringComparison.OrdinalIgnoreCase)
|
||||
&& string.Equals(Uri, other.Uri, StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
public override int GetHashCode()
|
||||
{
|
||||
var methodHash = StringComparer.OrdinalIgnoreCase.GetHashCode(Method.Method);
|
||||
var uriHash = StringComparer.OrdinalIgnoreCase.GetHashCode(Uri);
|
||||
return HashCode.Combine(methodHash, uriHash);
|
||||
}
|
||||
}
|
||||
|
||||
private sealed class RequestKeyComparer : IEqualityComparer<RequestKey>
|
||||
{
|
||||
public static readonly RequestKeyComparer Instance = new();
|
||||
|
||||
public bool Equals(RequestKey x, RequestKey y) => x.Equals(y);
|
||||
|
||||
public int GetHashCode(RequestKey obj) => obj.GetHashCode();
|
||||
}
|
||||
|
||||
public readonly record struct CannedRequestRecord(DateTimeOffset Timestamp, HttpMethod Method, Uri Uri, IReadOnlyDictionary<string, string> Headers);
|
||||
|
||||
private static HttpResponseMessage BuildTextResponse(HttpStatusCode statusCode, string content, string contentType)
|
||||
{
|
||||
var message = new HttpResponseMessage(statusCode)
|
||||
{
|
||||
Content = new StringContent(content, Encoding.UTF8, contentType),
|
||||
};
|
||||
return message;
|
||||
}
|
||||
|
||||
public void AddJsonResponse(Uri requestUri, string json, HttpStatusCode statusCode = HttpStatusCode.OK)
|
||||
=> AddResponse(requestUri, () => BuildTextResponse(statusCode, json, "application/json"));
|
||||
|
||||
public void AddTextResponse(Uri requestUri, string content, string contentType = "text/plain", HttpStatusCode statusCode = HttpStatusCode.OK)
|
||||
=> AddResponse(requestUri, () => BuildTextResponse(statusCode, content, contentType));
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
namespace StellaOps.Concelier.Connector.Common.Url;
|
||||
|
||||
/// <summary>
|
||||
/// Utilities for normalizing URLs from upstream feeds.
|
||||
/// </summary>
|
||||
public static class UrlNormalizer
|
||||
{
|
||||
/// <summary>
|
||||
/// Attempts to normalize <paramref name="value"/> relative to <paramref name="baseUri"/>.
|
||||
/// Removes fragments and enforces HTTPS when possible.
|
||||
/// </summary>
|
||||
public static bool TryNormalize(string? value, Uri? baseUri, out Uri? normalized, bool stripFragment = true, bool forceHttps = false)
|
||||
{
|
||||
normalized = null;
|
||||
if (string.IsNullOrWhiteSpace(value))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!Uri.TryCreate(value.Trim(), UriKind.RelativeOrAbsolute, out var candidate))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!candidate.IsAbsoluteUri)
|
||||
{
|
||||
if (baseUri is null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!Uri.TryCreate(baseUri, candidate, out candidate))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (forceHttps && string.Equals(candidate.Scheme, Uri.UriSchemeHttp, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
candidate = new UriBuilder(candidate) { Scheme = Uri.UriSchemeHttps, Port = candidate.IsDefaultPort ? -1 : candidate.Port }.Uri;
|
||||
}
|
||||
|
||||
if (stripFragment && !string.IsNullOrEmpty(candidate.Fragment))
|
||||
{
|
||||
var builder = new UriBuilder(candidate) { Fragment = string.Empty };
|
||||
candidate = builder.Uri;
|
||||
}
|
||||
|
||||
normalized = candidate;
|
||||
return true;
|
||||
}
|
||||
|
||||
public static Uri NormalizeOrThrow(string value, Uri? baseUri = null, bool stripFragment = true, bool forceHttps = false)
|
||||
{
|
||||
if (!TryNormalize(value, baseUri, out var normalized, stripFragment, forceHttps) || normalized is null)
|
||||
{
|
||||
throw new FormatException($"Value '{value}' is not a valid URI");
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
using System.Xml.Linq;
|
||||
using System.Xml.Schema;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Xml;
|
||||
|
||||
public interface IXmlSchemaValidator
|
||||
{
|
||||
void Validate(XDocument document, XmlSchemaSet schemaSet, string documentName);
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
namespace StellaOps.Concelier.Connector.Common.Xml;
|
||||
|
||||
public sealed record XmlSchemaValidationError(string Message, string? Location);
|
||||
@@ -0,0 +1,18 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Xml;
|
||||
|
||||
public sealed class XmlSchemaValidationException : Exception
|
||||
{
|
||||
public XmlSchemaValidationException(string documentName, IReadOnlyList<XmlSchemaValidationError> errors)
|
||||
: base($"XML schema validation failed for '{documentName}'.")
|
||||
{
|
||||
DocumentName = documentName;
|
||||
Errors = errors ?? Array.Empty<XmlSchemaValidationError>();
|
||||
}
|
||||
|
||||
public string DocumentName { get; }
|
||||
|
||||
public IReadOnlyList<XmlSchemaValidationError> Errors { get; }
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Xml.Linq;
|
||||
using System.Xml.Schema;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace StellaOps.Concelier.Connector.Common.Xml;
|
||||
|
||||
public sealed class XmlSchemaValidator : IXmlSchemaValidator
|
||||
{
|
||||
private readonly ILogger<XmlSchemaValidator> _logger;
|
||||
|
||||
public XmlSchemaValidator(ILogger<XmlSchemaValidator> logger)
|
||||
{
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public void Validate(XDocument document, XmlSchemaSet schemaSet, string documentName)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(document);
|
||||
ArgumentNullException.ThrowIfNull(schemaSet);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(documentName);
|
||||
|
||||
var errors = new List<XmlSchemaValidationError>();
|
||||
|
||||
void Handler(object? sender, ValidationEventArgs args)
|
||||
{
|
||||
if (args is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var location = FormatLocation(args.Exception);
|
||||
errors.Add(new XmlSchemaValidationError(args.Message, location));
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
document.Validate(schemaSet, Handler, addSchemaInfo: true);
|
||||
}
|
||||
catch (System.Xml.Schema.XmlSchemaValidationException ex)
|
||||
{
|
||||
var location = FormatLocation(ex);
|
||||
errors.Add(new XmlSchemaValidationError(ex.Message, location));
|
||||
}
|
||||
|
||||
if (errors.Count > 0)
|
||||
{
|
||||
var exception = new XmlSchemaValidationException(documentName, errors);
|
||||
_logger.LogError(exception, "XML schema validation failed for {DocumentName}", documentName);
|
||||
throw exception;
|
||||
}
|
||||
|
||||
_logger.LogDebug("XML schema validation succeeded for {DocumentName}", documentName);
|
||||
}
|
||||
|
||||
private static string? FormatLocation(System.Xml.Schema.XmlSchemaException? exception)
|
||||
{
|
||||
if (exception is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (exception.LineNumber <= 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return $"line {exception.LineNumber}, position {exception.LinePosition}";
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user