Rename Concelier Source modules to Connector

This commit is contained in:
master
2025-10-18 20:11:18 +03:00
parent 89ede53cc3
commit 052da7a7d0
789 changed files with 1489 additions and 1489 deletions

View File

@@ -0,0 +1,28 @@
# AGENTS
## Role
Kaspersky ICS-CERT connector; authoritative for OT/ICS vendor advisories covered by Kaspersky ICS-CERT; maps affected products as ICS domain entities with platform tags.
## Scope
- Discover/fetch advisories list; window by publish date or slug; fetch detail pages; handle pagination.
- Validate HTML or JSON; extract CVEs, affected OT vendors/models/families, mitigations; normalize product taxonomy; map fixed versions if present.
- Persist raw docs with sha256; maintain source_state; idempotent mapping.
## Participants
- Source.Common (HTTP, HTML helpers, validators).
- Storage.Mongo (document, dto, advisory, alias, affected, reference, source_state).
- Models (canonical; affected.platform="ics-vendor", tags for device families).
- Core/WebService (jobs: source:ics-kaspersky:fetch|parse|map).
- Merge engine respects ICS vendor authority for OT impact.
## Interfaces & contracts
- Aliases: CVE ids; if stable ICS-CERT advisory id exists, store scheme "ICS-KASP".
- Affected: Type=vendor; Vendor/Product populated; platforms/tags for device family or firmware line; versions with fixedBy when explicit.
- References: advisory, vendor pages, mitigation guides; typed; deduped.
- Provenance: method=parser; value=advisory slug.
## In/Out of scope
In: ICS advisory mapping, affected vendor products, mitigation references.
Out: firmware downloads; reverse-engineering artifacts.
## Observability & security expectations
- Metrics: SourceDiagnostics publishes `concelier.source.http.*` counters/histograms with `concelier.source=ics-kaspersky` to track fetch totals, parse failures, and mapped affected counts.
- Logs: slugs, vendor/product counts, timing; allowlist host.
## Tests
- Author and review coverage in `../StellaOps.Concelier.Connector.Ics.Kaspersky.Tests`.
- Shared fixtures (e.g., `MongoIntegrationFixture`, `ConnectorTestHarness`) live in `../StellaOps.Concelier.Testing`.
- Keep fixtures deterministic; match new cases to real-world advisories or regression scenarios.

View File

@@ -0,0 +1,53 @@
using System;
using System.Diagnostics.CodeAnalysis;
namespace StellaOps.Concelier.Connector.Ics.Kaspersky.Configuration;
public sealed class KasperskyOptions
{
public static string HttpClientName => "source.ics.kaspersky";
public Uri FeedUri { get; set; } = new("https://ics-cert.kaspersky.com/feed-advisories/", UriKind.Absolute);
public TimeSpan WindowSize { get; set; } = TimeSpan.FromDays(30);
public TimeSpan WindowOverlap { get; set; } = TimeSpan.FromDays(2);
public int MaxPagesPerFetch { get; set; } = 3;
public TimeSpan RequestDelay { get; set; } = TimeSpan.FromMilliseconds(500);
[MemberNotNull(nameof(FeedUri))]
public void Validate()
{
if (FeedUri is null || !FeedUri.IsAbsoluteUri)
{
throw new InvalidOperationException("FeedUri must be an absolute URI.");
}
if (WindowSize <= TimeSpan.Zero)
{
throw new InvalidOperationException("WindowSize must be greater than zero.");
}
if (WindowOverlap < TimeSpan.Zero)
{
throw new InvalidOperationException("WindowOverlap cannot be negative.");
}
if (WindowOverlap >= WindowSize)
{
throw new InvalidOperationException("WindowOverlap must be smaller than WindowSize.");
}
if (MaxPagesPerFetch <= 0)
{
throw new InvalidOperationException("MaxPagesPerFetch must be positive.");
}
if (RequestDelay < TimeSpan.Zero)
{
throw new InvalidOperationException("RequestDelay cannot be negative.");
}
}
}

View File

@@ -0,0 +1,14 @@
using System;
using System.Collections.Immutable;
namespace StellaOps.Concelier.Connector.Ics.Kaspersky.Internal;
internal sealed record KasperskyAdvisoryDto(
string AdvisoryKey,
string Title,
string Link,
DateTimeOffset Published,
string? Summary,
string Content,
ImmutableArray<string> CveIds,
ImmutableArray<string> VendorNames);

View File

@@ -0,0 +1,172 @@
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace StellaOps.Concelier.Connector.Ics.Kaspersky.Internal;
internal static class KasperskyAdvisoryParser
{
private static readonly Regex CveRegex = new("CVE-\\d{4}-\\d+", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex WhitespaceRegex = new("\\s+", RegexOptions.Compiled);
public static KasperskyAdvisoryDto Parse(
string advisoryKey,
string title,
string link,
DateTimeOffset published,
string? summary,
byte[] rawHtml)
{
var content = ExtractText(rawHtml);
var cves = ExtractCves(title, summary, content);
var vendors = ExtractVendors(title, summary, content);
return new KasperskyAdvisoryDto(
advisoryKey,
title,
link,
published,
summary,
content,
cves,
vendors);
}
private static string ExtractText(byte[] rawHtml)
{
if (rawHtml.Length == 0)
{
return string.Empty;
}
var html = Encoding.UTF8.GetString(rawHtml);
html = Regex.Replace(html, "<script[\\s\\S]*?</script>", string.Empty, RegexOptions.IgnoreCase);
html = Regex.Replace(html, "<style[\\s\\S]*?</style>", string.Empty, RegexOptions.IgnoreCase);
html = Regex.Replace(html, "<!--.*?-->", string.Empty, RegexOptions.Singleline);
html = Regex.Replace(html, "<[^>]+>", " ");
var decoded = System.Net.WebUtility.HtmlDecode(html);
return string.IsNullOrWhiteSpace(decoded) ? string.Empty : WhitespaceRegex.Replace(decoded, " ").Trim();
}
private static ImmutableArray<string> ExtractCves(string title, string? summary, string content)
{
var set = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
void Capture(string? text)
{
if (string.IsNullOrWhiteSpace(text))
{
return;
}
foreach (Match match in CveRegex.Matches(text))
{
if (match.Success)
{
set.Add(match.Value.ToUpperInvariant());
}
}
}
Capture(title);
Capture(summary);
Capture(content);
return set.OrderBy(static cve => cve, StringComparer.Ordinal).ToImmutableArray();
}
private static ImmutableArray<string> ExtractVendors(string title, string? summary, string content)
{
var candidates = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
void AddCandidate(string? text)
{
if (string.IsNullOrWhiteSpace(text))
{
return;
}
foreach (var segment in SplitSegments(text))
{
var cleaned = CleanVendorSegment(segment);
if (!string.IsNullOrWhiteSpace(cleaned))
{
candidates.Add(cleaned);
}
}
}
AddCandidate(title);
AddCandidate(summary);
AddCandidate(content);
return candidates.Count == 0
? ImmutableArray<string>.Empty
: candidates
.OrderBy(static vendor => vendor, StringComparer.Ordinal)
.ToImmutableArray();
}
private static IEnumerable<string> SplitSegments(string text)
{
var separators = new[] { ".", "-", "", "—", ":" };
var queue = new Queue<string>();
queue.Enqueue(text);
foreach (var separator in separators)
{
var count = queue.Count;
for (var i = 0; i < count; i++)
{
var item = queue.Dequeue();
var parts = item.Split(separator, StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries);
foreach (var part in parts)
{
queue.Enqueue(part);
}
}
}
return queue;
}
private static string? CleanVendorSegment(string value)
{
var trimmed = value.Trim();
if (string.IsNullOrEmpty(trimmed))
{
return null;
}
var lowered = trimmed.ToLowerInvariant();
if (lowered.Contains("cve-", StringComparison.Ordinal) || lowered.Contains("vulnerability", StringComparison.Ordinal))
{
trimmed = trimmed.Split(new[] { "vulnerability", "vulnerabilities" }, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).FirstOrDefault() ?? trimmed;
}
var providedMatch = Regex.Match(trimmed, "provided by\\s+(?<vendor>[A-Za-z0-9&.,' ]+)", RegexOptions.IgnoreCase);
if (providedMatch.Success)
{
trimmed = providedMatch.Groups["vendor"].Value;
}
var descriptorMatch = Regex.Match(trimmed, "^(?<vendor>[A-Z][A-Za-z0-9&.,' ]{1,80}?)(?:\\s+(?:controllers?|devices?|modules?|products?|gateways?|routers?|appliances?|systems?|solutions?|firmware))\\b", RegexOptions.IgnoreCase);
if (descriptorMatch.Success)
{
trimmed = descriptorMatch.Groups["vendor"].Value;
}
trimmed = trimmed.Replace("", "'", StringComparison.Ordinal);
trimmed = trimmed.Replace("\"", string.Empty, StringComparison.Ordinal);
if (trimmed.Length > 200)
{
trimmed = trimmed[..200];
}
return string.IsNullOrWhiteSpace(trimmed) ? null : trimmed;
}
}

View File

@@ -0,0 +1,207 @@
using System;
using System.Collections.Generic;
using System.Linq;
using MongoDB.Bson;
namespace StellaOps.Concelier.Connector.Ics.Kaspersky.Internal;
internal sealed record KasperskyCursor(
DateTimeOffset? LastPublished,
IReadOnlyCollection<Guid> PendingDocuments,
IReadOnlyCollection<Guid> PendingMappings,
IReadOnlyDictionary<string, KasperskyFetchMetadata> FetchCache)
{
private static readonly IReadOnlyCollection<Guid> EmptyGuidList = Array.Empty<Guid>();
private static readonly IReadOnlyDictionary<string, KasperskyFetchMetadata> EmptyFetchCache =
new Dictionary<string, KasperskyFetchMetadata>(StringComparer.OrdinalIgnoreCase);
public static KasperskyCursor Empty { get; } = new(null, EmptyGuidList, EmptyGuidList, EmptyFetchCache);
public BsonDocument ToBsonDocument()
{
var document = new BsonDocument
{
["pendingDocuments"] = new BsonArray(PendingDocuments.Select(id => id.ToString())),
["pendingMappings"] = new BsonArray(PendingMappings.Select(id => id.ToString())),
};
if (LastPublished.HasValue)
{
document["lastPublished"] = LastPublished.Value.UtcDateTime;
}
if (FetchCache.Count > 0)
{
var cacheArray = new BsonArray();
foreach (var (uri, metadata) in FetchCache)
{
var cacheDocument = new BsonDocument
{
["uri"] = uri,
};
if (!string.IsNullOrWhiteSpace(metadata.ETag))
{
cacheDocument["etag"] = metadata.ETag;
}
if (metadata.LastModified.HasValue)
{
cacheDocument["lastModified"] = metadata.LastModified.Value.UtcDateTime;
}
cacheArray.Add(cacheDocument);
}
document["fetchCache"] = cacheArray;
}
return document;
}
public static KasperskyCursor FromBson(BsonDocument? document)
{
if (document is null || document.ElementCount == 0)
{
return Empty;
}
var lastPublished = document.TryGetValue("lastPublished", out var lastPublishedValue)
? ParseDate(lastPublishedValue)
: null;
var pendingDocuments = ReadGuidArray(document, "pendingDocuments");
var pendingMappings = ReadGuidArray(document, "pendingMappings");
var fetchCache = ReadFetchCache(document);
return new KasperskyCursor(lastPublished, pendingDocuments, pendingMappings, fetchCache);
}
public KasperskyCursor WithPendingDocuments(IEnumerable<Guid> ids)
=> this with { PendingDocuments = ids?.Distinct().ToArray() ?? EmptyGuidList };
public KasperskyCursor WithPendingMappings(IEnumerable<Guid> ids)
=> this with { PendingMappings = ids?.Distinct().ToArray() ?? EmptyGuidList };
public KasperskyCursor WithLastPublished(DateTimeOffset? timestamp)
=> this with { LastPublished = timestamp };
public KasperskyCursor WithFetchMetadata(string requestUri, string? etag, DateTimeOffset? lastModified)
{
if (string.IsNullOrWhiteSpace(requestUri))
{
return this;
}
var cache = new Dictionary<string, KasperskyFetchMetadata>(FetchCache, StringComparer.OrdinalIgnoreCase)
{
[requestUri] = new KasperskyFetchMetadata(etag, lastModified),
};
return this with { FetchCache = cache };
}
public KasperskyCursor PruneFetchCache(IEnumerable<string> keepUris)
{
if (FetchCache.Count == 0)
{
return this;
}
var keepSet = new HashSet<string>(keepUris ?? Array.Empty<string>(), StringComparer.OrdinalIgnoreCase);
if (keepSet.Count == 0)
{
return this;
}
var cache = new Dictionary<string, KasperskyFetchMetadata>(StringComparer.OrdinalIgnoreCase);
foreach (var uri in keepSet)
{
if (FetchCache.TryGetValue(uri, out var metadata))
{
cache[uri] = metadata;
}
}
return this with { FetchCache = cache };
}
public bool TryGetFetchMetadata(string requestUri, out KasperskyFetchMetadata metadata)
{
if (FetchCache.TryGetValue(requestUri, out metadata!))
{
return true;
}
metadata = default!;
return false;
}
private static DateTimeOffset? ParseDate(BsonValue value)
{
return value.BsonType switch
{
BsonType.DateTime => DateTime.SpecifyKind(value.ToUniversalTime(), DateTimeKind.Utc),
BsonType.String when DateTimeOffset.TryParse(value.AsString, out var parsed) => parsed.ToUniversalTime(),
_ => null,
};
}
private static IReadOnlyCollection<Guid> ReadGuidArray(BsonDocument document, string field)
{
if (!document.TryGetValue(field, out var value) || value is not BsonArray array)
{
return Array.Empty<Guid>();
}
var results = new List<Guid>(array.Count);
foreach (var element in array)
{
if (element is null)
{
continue;
}
if (Guid.TryParse(element.ToString(), out var guid))
{
results.Add(guid);
}
}
return results;
}
private static IReadOnlyDictionary<string, KasperskyFetchMetadata> ReadFetchCache(BsonDocument document)
{
if (!document.TryGetValue("fetchCache", out var value) || value is not BsonArray array)
{
return EmptyFetchCache;
}
var cache = new Dictionary<string, KasperskyFetchMetadata>(StringComparer.OrdinalIgnoreCase);
foreach (var element in array)
{
if (element is not BsonDocument cacheDocument)
{
continue;
}
if (!cacheDocument.TryGetValue("uri", out var uriValue) || uriValue.BsonType != BsonType.String)
{
continue;
}
var uri = uriValue.AsString;
string? etag = cacheDocument.TryGetValue("etag", out var etagValue) && etagValue.IsString ? etagValue.AsString : null;
DateTimeOffset? lastModified = cacheDocument.TryGetValue("lastModified", out var lastModifiedValue)
? ParseDate(lastModifiedValue)
: null;
cache[uri] = new KasperskyFetchMetadata(etag, lastModified);
}
return cache.Count == 0 ? EmptyFetchCache : cache;
}
}
internal sealed record KasperskyFetchMetadata(string? ETag, DateTimeOffset? LastModified);

View File

@@ -0,0 +1,133 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Xml.Linq;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.Concelier.Connector.Ics.Kaspersky.Configuration;
namespace StellaOps.Concelier.Connector.Ics.Kaspersky.Internal;
public sealed class KasperskyFeedClient
{
private readonly IHttpClientFactory _httpClientFactory;
private readonly KasperskyOptions _options;
private readonly ILogger<KasperskyFeedClient> _logger;
private static readonly XNamespace ContentNamespace = "http://purl.org/rss/1.0/modules/content/";
public KasperskyFeedClient(IHttpClientFactory httpClientFactory, IOptions<KasperskyOptions> options, ILogger<KasperskyFeedClient> logger)
{
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<IReadOnlyList<KasperskyFeedItem>> GetItemsAsync(int page, CancellationToken cancellationToken)
{
var client = _httpClientFactory.CreateClient(KasperskyOptions.HttpClientName);
var feedUri = BuildUri(_options.FeedUri, page);
using var response = await client.GetAsync(feedUri, cancellationToken).ConfigureAwait(false);
response.EnsureSuccessStatusCode();
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false);
using var reader = new StreamReader(stream, Encoding.UTF8);
var xml = await reader.ReadToEndAsync().ConfigureAwait(false);
var document = XDocument.Parse(xml, LoadOptions.None);
var items = new List<KasperskyFeedItem>();
var channel = document.Root?.Element("channel");
if (channel is null)
{
_logger.LogWarning("Feed {FeedUri} is missing channel element", feedUri);
return items;
}
foreach (var item in channel.Elements("item"))
{
var title = item.Element("title")?.Value?.Trim();
var linkValue = item.Element("link")?.Value?.Trim();
var pubDateValue = item.Element("pubDate")?.Value?.Trim();
var summary = item.Element("description")?.Value?.Trim();
if (string.IsNullOrWhiteSpace(title) || string.IsNullOrWhiteSpace(linkValue) || string.IsNullOrWhiteSpace(pubDateValue))
{
continue;
}
if (!Uri.TryCreate(linkValue, UriKind.Absolute, out var link))
{
_logger.LogWarning("Skipping feed item with invalid link: {Link}", linkValue);
continue;
}
if (!DateTimeOffset.TryParse(pubDateValue, CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal, out var published))
{
_logger.LogWarning("Skipping feed item {Title} due to invalid pubDate {PubDate}", title, pubDateValue);
continue;
}
var encoded = item.Element(ContentNamespace + "encoded")?.Value;
if (!string.IsNullOrWhiteSpace(encoded))
{
summary ??= HtmlToPlainText(encoded);
}
items.Add(new KasperskyFeedItem(title, Canonicalize(link), published.ToUniversalTime(), summary));
}
return items;
}
private static Uri BuildUri(Uri baseUri, int page)
{
if (page <= 1)
{
return baseUri;
}
var builder = new UriBuilder(baseUri);
var trimmed = builder.Query.TrimStart('?');
var pageSegment = $"paged={page.ToString(CultureInfo.InvariantCulture)}";
builder.Query = string.IsNullOrEmpty(trimmed)
? pageSegment
: $"{trimmed}&{pageSegment}";
return builder.Uri;
}
private static Uri Canonicalize(Uri link)
{
if (string.IsNullOrEmpty(link.Query))
{
return link;
}
var builder = new UriBuilder(link)
{
Query = string.Empty,
};
return builder.Uri;
}
private static string? HtmlToPlainText(string html)
{
if (string.IsNullOrWhiteSpace(html))
{
return null;
}
var withoutScripts = System.Text.RegularExpressions.Regex.Replace(html, "<script[\\s\\S]*?</script>", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
var withoutStyles = System.Text.RegularExpressions.Regex.Replace(withoutScripts, "<style[\\s\\S]*?</style>", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
var withoutTags = System.Text.RegularExpressions.Regex.Replace(withoutStyles, "<[^>]+>", " ");
var decoded = System.Net.WebUtility.HtmlDecode(withoutTags);
return string.IsNullOrWhiteSpace(decoded) ? null : System.Text.RegularExpressions.Regex.Replace(decoded, "\\s+", " ").Trim();
}
}

View File

@@ -0,0 +1,9 @@
using System;
namespace StellaOps.Concelier.Connector.Ics.Kaspersky.Internal;
public sealed record KasperskyFeedItem(
string Title,
Uri Link,
DateTimeOffset Published,
string? Summary);

View File

@@ -0,0 +1,46 @@
using System;
using System.Threading;
using System.Threading.Tasks;
using StellaOps.Concelier.Core.Jobs;
namespace StellaOps.Concelier.Connector.Ics.Kaspersky;
internal static class KasperskyJobKinds
{
public const string Fetch = "source:ics-kaspersky:fetch";
public const string Parse = "source:ics-kaspersky:parse";
public const string Map = "source:ics-kaspersky:map";
}
internal sealed class KasperskyFetchJob : IJob
{
private readonly KasperskyConnector _connector;
public KasperskyFetchJob(KasperskyConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.FetchAsync(context.Services, cancellationToken);
}
internal sealed class KasperskyParseJob : IJob
{
private readonly KasperskyConnector _connector;
public KasperskyParseJob(KasperskyConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.ParseAsync(context.Services, cancellationToken);
}
internal sealed class KasperskyMapJob : IJob
{
private readonly KasperskyConnector _connector;
public KasperskyMapJob(KasperskyConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.MapAsync(context.Services, cancellationToken);
}

View File

@@ -0,0 +1,464 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using StellaOps.Concelier.Models;
using StellaOps.Concelier.Connector.Common;
using StellaOps.Concelier.Connector.Common.Fetch;
using StellaOps.Concelier.Connector.Ics.Kaspersky.Configuration;
using StellaOps.Concelier.Connector.Ics.Kaspersky.Internal;
using StellaOps.Concelier.Storage.Mongo;
using StellaOps.Concelier.Storage.Mongo.Advisories;
using StellaOps.Concelier.Storage.Mongo.Documents;
using StellaOps.Concelier.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Ics.Kaspersky;
public sealed class KasperskyConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.General)
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
private readonly KasperskyFeedClient _feedClient;
private readonly SourceFetchService _fetchService;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly KasperskyOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<KasperskyConnector> _logger;
public KasperskyConnector(
KasperskyFeedClient feedClient,
SourceFetchService fetchService,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<KasperskyOptions> options,
TimeProvider? timeProvider,
ILogger<KasperskyConnector> logger)
{
_feedClient = feedClient ?? throw new ArgumentNullException(nameof(feedClient));
_fetchService = fetchService ?? throw new ArgumentNullException(nameof(fetchService));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => KasperskyConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var windowStart = cursor.LastPublished.HasValue
? cursor.LastPublished.Value - _options.WindowOverlap
: now - _options.WindowSize;
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var maxPublished = cursor.LastPublished ?? DateTimeOffset.MinValue;
var cursorState = cursor;
var touchedResources = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
for (var page = 1; page <= _options.MaxPagesPerFetch; page++)
{
IReadOnlyList<KasperskyFeedItem> items;
try
{
items = await _feedClient.GetItemsAsync(page, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to load Kaspersky ICS feed page {Page}", page);
await _stateRepository.MarkFailureAsync(
SourceName,
now,
TimeSpan.FromMinutes(5),
ex.Message,
cancellationToken).ConfigureAwait(false);
throw;
}
if (items.Count == 0)
{
break;
}
foreach (var item in items)
{
if (item.Published < windowStart)
{
page = _options.MaxPagesPerFetch + 1;
break;
}
if (_options.RequestDelay > TimeSpan.Zero)
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["kaspersky.title"] = item.Title,
["kaspersky.link"] = item.Link.ToString(),
["kaspersky.published"] = item.Published.ToString("O"),
};
if (!string.IsNullOrWhiteSpace(item.Summary))
{
metadata["kaspersky.summary"] = item.Summary!;
}
var slug = ExtractSlug(item.Link);
if (!string.IsNullOrWhiteSpace(slug))
{
metadata["kaspersky.slug"] = slug;
}
var resourceKey = item.Link.ToString();
touchedResources.Add(resourceKey);
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, resourceKey, cancellationToken).ConfigureAwait(false);
var fetchRequest = new SourceFetchRequest(KasperskyOptions.HttpClientName, SourceName, item.Link)
{
Metadata = metadata,
};
if (cursorState.TryGetFetchMetadata(resourceKey, out var cachedFetch))
{
fetchRequest = fetchRequest with
{
ETag = cachedFetch.ETag,
LastModified = cachedFetch.LastModified,
};
}
SourceFetchResult result;
try
{
result = await _fetchService.FetchAsync(fetchRequest, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to fetch Kaspersky advisory {Link}", item.Link);
await _stateRepository.MarkFailureAsync(
SourceName,
_timeProvider.GetUtcNow(),
TimeSpan.FromMinutes(5),
ex.Message,
cancellationToken).ConfigureAwait(false);
throw;
}
if (result.IsNotModified)
{
continue;
}
if (!result.IsSuccess || result.Document is null)
{
continue;
}
if (existing is not null
&& string.Equals(existing.Sha256, result.Document.Sha256, StringComparison.OrdinalIgnoreCase)
&& string.Equals(existing.Status, DocumentStatuses.Mapped, StringComparison.Ordinal))
{
await _documentStore.UpdateStatusAsync(result.Document.Id, existing.Status, cancellationToken).ConfigureAwait(false);
cursorState = cursorState.WithFetchMetadata(resourceKey, result.Document.Etag, result.Document.LastModified);
if (item.Published > maxPublished)
{
maxPublished = item.Published;
}
continue;
}
pendingDocuments.Add(result.Document.Id);
cursorState = cursorState.WithFetchMetadata(resourceKey, result.Document.Etag, result.Document.LastModified);
if (item.Published > maxPublished)
{
maxPublished = item.Published;
}
}
}
cursorState = cursorState.PruneFetchCache(touchedResources);
var updatedCursor = cursorState
.WithPendingDocuments(pendingDocuments)
.WithLastPublished(maxPublished == DateTimeOffset.MinValue ? cursor.LastPublished : maxPublished);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remainingDocuments.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_logger.LogWarning("Kaspersky document {DocumentId} missing GridFS content", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
var metadata = document.Metadata ?? new Dictionary<string, string>();
var title = metadata.TryGetValue("kaspersky.title", out var titleValue) ? titleValue : document.Uri;
var link = metadata.TryGetValue("kaspersky.link", out var linkValue) ? linkValue : document.Uri;
var published = metadata.TryGetValue("kaspersky.published", out var publishedValue) && DateTimeOffset.TryParse(publishedValue, out var parsedPublished)
? parsedPublished.ToUniversalTime()
: document.FetchedAt;
var summary = metadata.TryGetValue("kaspersky.summary", out var summaryValue) ? summaryValue : null;
var slug = metadata.TryGetValue("kaspersky.slug", out var slugValue) ? slugValue : ExtractSlug(new Uri(link, UriKind.Absolute));
var advisoryKey = string.IsNullOrWhiteSpace(slug) ? Guid.NewGuid().ToString("N") : slug;
byte[] rawBytes;
try
{
rawBytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed downloading raw Kaspersky document {DocumentId}", document.Id);
throw;
}
var dto = KasperskyAdvisoryParser.Parse(advisoryKey, title, link, published, summary, rawBytes);
var payload = BsonDocument.Parse(JsonSerializer.Serialize(dto, SerializerOptions));
var dtoRecord = new DtoRecord(Guid.NewGuid(), document.Id, SourceName, "ics.kaspersky/1", payload, _timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dto = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dto is null || document is null)
{
_logger.LogWarning("Skipping Kaspersky mapping for {DocumentId}: DTO or document missing", documentId);
pendingMappings.Remove(documentId);
continue;
}
var dtoJson = dto.Payload.ToJson(new MongoDB.Bson.IO.JsonWriterSettings
{
OutputMode = MongoDB.Bson.IO.JsonOutputMode.RelaxedExtendedJson,
});
KasperskyAdvisoryDto advisoryDto;
try
{
advisoryDto = JsonSerializer.Deserialize<KasperskyAdvisoryDto>(dtoJson, SerializerOptions)
?? throw new InvalidOperationException("Deserialized DTO was null.");
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to deserialize Kaspersky DTO for {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var fetchProvenance = new AdvisoryProvenance(SourceName, "document", document.Uri, document.FetchedAt);
var mappingProvenance = new AdvisoryProvenance(SourceName, "mapping", advisoryDto.AdvisoryKey, dto.ValidatedAt);
var aliases = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
advisoryDto.AdvisoryKey,
};
foreach (var cve in advisoryDto.CveIds)
{
aliases.Add(cve);
}
var references = new List<AdvisoryReference>();
try
{
references.Add(new AdvisoryReference(
advisoryDto.Link,
"advisory",
"kaspersky-ics",
null,
new AdvisoryProvenance(SourceName, "reference", advisoryDto.Link, dto.ValidatedAt)));
}
catch (ArgumentException)
{
_logger.LogWarning("Invalid advisory link {Link} for {AdvisoryKey}", advisoryDto.Link, advisoryDto.AdvisoryKey);
}
foreach (var cve in advisoryDto.CveIds)
{
var url = $"https://www.cve.org/CVERecord?id={cve}";
try
{
references.Add(new AdvisoryReference(
url,
"advisory",
cve,
null,
new AdvisoryProvenance(SourceName, "reference", url, dto.ValidatedAt)));
}
catch (ArgumentException)
{
// ignore malformed
}
}
var affectedPackages = new List<AffectedPackage>();
foreach (var vendor in advisoryDto.VendorNames)
{
var provenance = new[]
{
new AdvisoryProvenance(SourceName, "affected", vendor, dto.ValidatedAt)
};
var rangePrimitives = new RangePrimitives(
null,
null,
null,
new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase)
{
["ics.vendor"] = vendor
});
var ranges = new[]
{
new AffectedVersionRange(
rangeKind: "vendor",
introducedVersion: null,
fixedVersion: null,
lastAffectedVersion: null,
rangeExpression: null,
provenance: provenance[0],
primitives: rangePrimitives)
};
affectedPackages.Add(new AffectedPackage(
AffectedPackageTypes.IcsVendor,
vendor,
platform: null,
versionRanges: ranges,
statuses: Array.Empty<AffectedPackageStatus>(),
provenance: provenance));
}
var advisory = new Advisory(
advisoryDto.AdvisoryKey,
advisoryDto.Title,
advisoryDto.Summary ?? advisoryDto.Content,
language: "en",
published: advisoryDto.Published,
modified: advisoryDto.Published,
severity: null,
exploitKnown: false,
aliases: aliases,
references: references,
affectedPackages: affectedPackages,
cvssMetrics: Array.Empty<CvssMetric>(),
provenance: new[] { fetchProvenance, mappingProvenance });
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<KasperskyCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? KasperskyCursor.Empty : KasperskyCursor.FromBson(state.Cursor);
}
private async Task UpdateCursorAsync(KasperskyCursor cursor, CancellationToken cancellationToken)
{
await _stateRepository.UpdateCursorAsync(SourceName, cursor.ToBsonDocument(), _timeProvider.GetUtcNow(), cancellationToken).ConfigureAwait(false);
}
private static string? ExtractSlug(Uri link)
{
var segments = link.Segments;
if (segments.Length == 0)
{
return null;
}
var last = segments[^1].Trim('/');
return string.IsNullOrWhiteSpace(last) && segments.Length > 1 ? segments[^2].Trim('/') : last;
}
}

View File

@@ -0,0 +1,19 @@
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Plugin;
namespace StellaOps.Concelier.Connector.Ics.Kaspersky;
public sealed class KasperskyConnectorPlugin : IConnectorPlugin
{
public const string SourceName = "ics-kaspersky";
public string Name => SourceName;
public bool IsAvailable(IServiceProvider services) => services is not null;
public IFeedConnector Create(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
return ActivatorUtilities.CreateInstance<KasperskyConnector>(services);
}
}

View File

@@ -0,0 +1,54 @@
using System;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.DependencyInjection;
using StellaOps.Concelier.Core.Jobs;
using StellaOps.Concelier.Connector.Ics.Kaspersky.Configuration;
namespace StellaOps.Concelier.Connector.Ics.Kaspersky;
public sealed class KasperskyDependencyInjectionRoutine : IDependencyInjectionRoutine
{
private const string ConfigurationSection = "concelier:sources:ics-kaspersky";
public IServiceCollection Register(IServiceCollection services, IConfiguration configuration)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
services.AddKasperskyIcsConnector(options =>
{
configuration.GetSection(ConfigurationSection).Bind(options);
options.Validate();
});
services.AddTransient<KasperskyFetchJob>();
services.AddTransient<KasperskyParseJob>();
services.AddTransient<KasperskyMapJob>();
services.PostConfigure<JobSchedulerOptions>(options =>
{
EnsureJob(options, KasperskyJobKinds.Fetch, typeof(KasperskyFetchJob));
EnsureJob(options, KasperskyJobKinds.Parse, typeof(KasperskyParseJob));
EnsureJob(options, KasperskyJobKinds.Map, typeof(KasperskyMapJob));
});
return services;
}
private static void EnsureJob(JobSchedulerOptions options, string kind, Type jobType)
{
if (options.Definitions.ContainsKey(kind))
{
return;
}
options.Definitions[kind] = new JobDefinition(
kind,
jobType,
options.DefaultTimeout,
options.DefaultLeaseDuration,
CronExpression: null,
Enabled: true);
}
}

View File

@@ -0,0 +1,37 @@
using System;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using StellaOps.Concelier.Connector.Common.Http;
using StellaOps.Concelier.Connector.Ics.Kaspersky.Configuration;
using StellaOps.Concelier.Connector.Ics.Kaspersky.Internal;
namespace StellaOps.Concelier.Connector.Ics.Kaspersky;
public static class KasperskyServiceCollectionExtensions
{
public static IServiceCollection AddKasperskyIcsConnector(this IServiceCollection services, Action<KasperskyOptions> configure)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configure);
services.AddOptions<KasperskyOptions>()
.Configure(configure)
.PostConfigure(static opts => opts.Validate());
services.AddSourceHttpClient(KasperskyOptions.HttpClientName, (sp, clientOptions) =>
{
var options = sp.GetRequiredService<IOptions<KasperskyOptions>>().Value;
clientOptions.BaseAddress = options.FeedUri;
clientOptions.Timeout = TimeSpan.FromSeconds(30);
clientOptions.UserAgent = "StellaOps.Concelier.IcsKaspersky/1.0";
clientOptions.AllowedHosts.Clear();
clientOptions.AllowedHosts.Add(options.FeedUri.Host);
clientOptions.DefaultRequestHeaders["Accept"] = "application/rss+xml";
});
services.AddTransient<KasperskyFeedClient>();
services.AddTransient<KasperskyConnector>();
return services;
}
}

View File

@@ -0,0 +1,16 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="../StellaOps.Plugin/StellaOps.Plugin.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Connector.Common/StellaOps.Concelier.Connector.Common.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Models/StellaOps.Concelier.Models.csproj" />
<ProjectReference Include="../StellaOps.Concelier.Storage.Mongo/StellaOps.Concelier.Storage.Mongo.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,10 @@
# TASKS
| Task | Owner(s) | Depends on | Notes |
|---|---|---|---|
|List/detail fetcher with windowing|BE-Conn-ICS-Kaspersky|Source.Common|**DONE** feed client paginates and fetches detail pages with window overlap.|
|Extractor (vendors/models/CVEs)|BE-Conn-ICS-Kaspersky|Source.Common|**DONE** parser normalizes vendor/model taxonomy into DTO.|
|DTO validation and sanitizer|BE-Conn-ICS-Kaspersky, QA|Source.Common|**DONE** HTML parsed into DTO with sanitizer guardrails.|
|Canonical mapping (affected, refs)|BE-Conn-ICS-Kaspersky|Models|**DONE** mapper outputs `ics-vendor` affected entries with provenance.|
|State/dedupe and fixtures|BE-Conn-ICS-Kaspersky, QA|Storage.Mongo|**DONE** duplicate-content and resume tests exercise SHA gating + cursor hygiene.|
|Backoff on fetch failures|BE-Conn-ICS-Kaspersky|Storage.Mongo|**DONE** feed/page failures mark source_state with timed backoff.|
|Conditional fetch caching|BE-Conn-ICS-Kaspersky|Source.Common|**DONE** fetch cache persists ETag/Last-Modified; not-modified scenarios validated in tests.|