Initial commit (history squashed)

This commit is contained in:
master
2025-10-07 10:14:21 +03:00
commit 016c5a3fe7
1132 changed files with 117842 additions and 0 deletions

View File

@@ -0,0 +1,26 @@
# AGENTS
## Role
Connector for OSV.dev across ecosystems; authoritative SemVer/PURL ranges for OSS packages.
## Scope
- Fetch by ecosystem or time range; handle pagination and changed-since cursors.
- Parse OSV JSON; validate schema; capture introduced/fixed events, database_specific where relevant.
- Map to Advisory with AffectedPackage(type=semver, Identifier=PURL); preserve SemVer constraints and introduced/fixed chronology.
- Maintain per-ecosystem cursors and deduplicate runs via payload hashes to keep reruns idempotent.
## Participants
- Source.Common supplies HTTP clients, pagination helpers, and validators.
- Storage.Mongo persists documents, DTOs, advisories, and source_state cursors.
- Merge engine resolves OSV vs GHSA consistency; prefers SemVer data for libraries; distro OVAL still overrides OS packages.
- Exporters serialize per-ecosystem ranges untouched.
## Interfaces & contracts
- Job kinds: osv:fetch, osv:parse, osv:map (naming consistent with other connectors).
- Aliases include CVE/GHSA/OSV IDs; references include advisory/patch/release URLs.
- Provenance records method=parser and source=osv.
## In/Out of scope
In: SemVer+PURL accuracy for OSS ecosystems.
Out: vendor PSIRT and distro OVAL specifics.
## Observability & security expectations
- Metrics: SourceDiagnostics exposes the shared `feedser.source.http.*` counters/histograms tagged `feedser.source=osv`; observability dashboards slice on the tag to monitor item volume, schema failures, range counts, and ecosystem coverage. Logs include ecosystem and cursor values.
## Tests
- Author and review coverage in `../StellaOps.Feedser.Source.Osv.Tests`.
- Shared fixtures (e.g., `MongoIntegrationFixture`, `ConnectorTestHarness`) live in `../StellaOps.Feedser.Testing`.
- Keep fixtures deterministic; match new cases to real-world advisories or regression scenarios.

View File

@@ -0,0 +1,81 @@
using System.Diagnostics.CodeAnalysis;
namespace StellaOps.Feedser.Source.Osv.Configuration;
public sealed class OsvOptions
{
public const string HttpClientName = "source.osv";
public Uri BaseUri { get; set; } = new("https://osv-vulnerabilities.storage.googleapis.com/", UriKind.Absolute);
public IReadOnlyList<string> Ecosystems { get; set; } = new[] { "PyPI", "npm", "Maven", "Go", "crates" };
public TimeSpan InitialBackfill { get; set; } = TimeSpan.FromDays(14);
public TimeSpan ModifiedTolerance { get; set; } = TimeSpan.FromMinutes(10);
public int MaxAdvisoriesPerFetch { get; set; } = 250;
public string ArchiveFileName { get; set; } = "all.zip";
public TimeSpan RequestDelay { get; set; } = TimeSpan.FromMilliseconds(250);
public TimeSpan HttpTimeout { get; set; } = TimeSpan.FromMinutes(3);
[MemberNotNull(nameof(BaseUri), nameof(Ecosystems), nameof(ArchiveFileName))]
public void Validate()
{
if (BaseUri is null || !BaseUri.IsAbsoluteUri)
{
throw new InvalidOperationException("OSV base URI must be an absolute URI.");
}
if (string.IsNullOrWhiteSpace(ArchiveFileName))
{
throw new InvalidOperationException("OSV archive file name must be provided.");
}
if (!ArchiveFileName.EndsWith(".zip", StringComparison.OrdinalIgnoreCase))
{
throw new InvalidOperationException("OSV archive file name must be a .zip resource.");
}
if (Ecosystems is null || Ecosystems.Count == 0)
{
throw new InvalidOperationException("At least one OSV ecosystem must be configured.");
}
foreach (var ecosystem in Ecosystems)
{
if (string.IsNullOrWhiteSpace(ecosystem))
{
throw new InvalidOperationException("Ecosystem names cannot be null or whitespace.");
}
}
if (InitialBackfill <= TimeSpan.Zero)
{
throw new InvalidOperationException("Initial backfill window must be positive.");
}
if (ModifiedTolerance < TimeSpan.Zero)
{
throw new InvalidOperationException("Modified tolerance cannot be negative.");
}
if (MaxAdvisoriesPerFetch <= 0)
{
throw new InvalidOperationException("Max advisories per fetch must be greater than zero.");
}
if (RequestDelay < TimeSpan.Zero)
{
throw new InvalidOperationException("Request delay cannot be negative.");
}
if (HttpTimeout <= TimeSpan.Zero)
{
throw new InvalidOperationException("HTTP timeout must be positive.");
}
}
}

View File

@@ -0,0 +1,290 @@
using System;
using System.Collections.Generic;
using System.Linq;
using MongoDB.Bson;
namespace StellaOps.Feedser.Source.Osv.Internal;
internal sealed record OsvCursor(
IReadOnlyDictionary<string, DateTimeOffset?> LastModifiedByEcosystem,
IReadOnlyDictionary<string, IReadOnlyCollection<string>> ProcessedIdsByEcosystem,
IReadOnlyDictionary<string, OsvArchiveMetadata> ArchiveMetadataByEcosystem,
IReadOnlyCollection<Guid> PendingDocuments,
IReadOnlyCollection<Guid> PendingMappings)
{
private static readonly IReadOnlyDictionary<string, DateTimeOffset?> EmptyLastModified =
new Dictionary<string, DateTimeOffset?>(StringComparer.OrdinalIgnoreCase);
private static readonly IReadOnlyDictionary<string, IReadOnlyCollection<string>> EmptyProcessedIds =
new Dictionary<string, IReadOnlyCollection<string>>(StringComparer.OrdinalIgnoreCase);
private static readonly IReadOnlyDictionary<string, OsvArchiveMetadata> EmptyArchiveMetadata =
new Dictionary<string, OsvArchiveMetadata>(StringComparer.OrdinalIgnoreCase);
private static readonly IReadOnlyCollection<Guid> EmptyGuidList = Array.Empty<Guid>();
private static readonly IReadOnlyCollection<string> EmptyStringList = Array.Empty<string>();
public static OsvCursor Empty { get; } = new(EmptyLastModified, EmptyProcessedIds, EmptyArchiveMetadata, EmptyGuidList, EmptyGuidList);
public BsonDocument ToBsonDocument()
{
var document = new BsonDocument
{
["pendingDocuments"] = new BsonArray(PendingDocuments.Select(id => id.ToString())),
["pendingMappings"] = new BsonArray(PendingMappings.Select(id => id.ToString())),
};
if (LastModifiedByEcosystem.Count > 0)
{
var lastModifiedDoc = new BsonDocument();
foreach (var (ecosystem, timestamp) in LastModifiedByEcosystem)
{
lastModifiedDoc[ecosystem] = timestamp.HasValue ? BsonValue.Create(timestamp.Value.UtcDateTime) : BsonNull.Value;
}
document["lastModified"] = lastModifiedDoc;
}
if (ProcessedIdsByEcosystem.Count > 0)
{
var processedDoc = new BsonDocument();
foreach (var (ecosystem, ids) in ProcessedIdsByEcosystem)
{
processedDoc[ecosystem] = new BsonArray(ids.Select(id => id));
}
document["processed"] = processedDoc;
}
if (ArchiveMetadataByEcosystem.Count > 0)
{
var metadataDoc = new BsonDocument();
foreach (var (ecosystem, metadata) in ArchiveMetadataByEcosystem)
{
var element = new BsonDocument();
if (!string.IsNullOrWhiteSpace(metadata.ETag))
{
element["etag"] = metadata.ETag;
}
if (metadata.LastModified.HasValue)
{
element["lastModified"] = metadata.LastModified.Value.UtcDateTime;
}
metadataDoc[ecosystem] = element;
}
document["archive"] = metadataDoc;
}
return document;
}
public static OsvCursor FromBson(BsonDocument? document)
{
if (document is null || document.ElementCount == 0)
{
return Empty;
}
var lastModified = ReadLastModified(document.TryGetValue("lastModified", out var lastModifiedValue) ? lastModifiedValue : null);
var processed = ReadProcessedIds(document.TryGetValue("processed", out var processedValue) ? processedValue : null);
var archiveMetadata = ReadArchiveMetadata(document.TryGetValue("archive", out var archiveValue) ? archiveValue : null);
var pendingDocuments = ReadGuidList(document, "pendingDocuments");
var pendingMappings = ReadGuidList(document, "pendingMappings");
return new OsvCursor(lastModified, processed, archiveMetadata, pendingDocuments, pendingMappings);
}
public DateTimeOffset? GetLastModified(string ecosystem)
{
ArgumentException.ThrowIfNullOrEmpty(ecosystem);
return LastModifiedByEcosystem.TryGetValue(ecosystem, out var value) ? value : null;
}
public bool HasProcessedId(string ecosystem, string id)
{
ArgumentException.ThrowIfNullOrEmpty(ecosystem);
ArgumentException.ThrowIfNullOrEmpty(id);
return ProcessedIdsByEcosystem.TryGetValue(ecosystem, out var ids)
&& ids.Contains(id, StringComparer.OrdinalIgnoreCase);
}
public OsvCursor WithLastModified(string ecosystem, DateTimeOffset timestamp, IEnumerable<string> processedIds)
{
ArgumentException.ThrowIfNullOrEmpty(ecosystem);
var lastModified = new Dictionary<string, DateTimeOffset?>(LastModifiedByEcosystem, StringComparer.OrdinalIgnoreCase)
{
[ecosystem] = timestamp.ToUniversalTime(),
};
var processed = new Dictionary<string, IReadOnlyCollection<string>>(ProcessedIdsByEcosystem, StringComparer.OrdinalIgnoreCase)
{
[ecosystem] = processedIds?.Where(static id => !string.IsNullOrWhiteSpace(id))
.Select(static id => id.Trim())
.Distinct(StringComparer.OrdinalIgnoreCase)
.ToArray() ?? EmptyStringList,
};
return this with { LastModifiedByEcosystem = lastModified, ProcessedIdsByEcosystem = processed };
}
public OsvCursor WithPendingDocuments(IEnumerable<Guid> ids)
=> this with { PendingDocuments = ids?.Distinct().ToArray() ?? EmptyGuidList };
public OsvCursor WithPendingMappings(IEnumerable<Guid> ids)
=> this with { PendingMappings = ids?.Distinct().ToArray() ?? EmptyGuidList };
public OsvCursor AddProcessedId(string ecosystem, string id)
{
ArgumentException.ThrowIfNullOrEmpty(ecosystem);
ArgumentException.ThrowIfNullOrEmpty(id);
var processed = new Dictionary<string, IReadOnlyCollection<string>>(ProcessedIdsByEcosystem, StringComparer.OrdinalIgnoreCase);
if (!processed.TryGetValue(ecosystem, out var ids))
{
ids = EmptyStringList;
}
var set = new HashSet<string>(ids, StringComparer.OrdinalIgnoreCase)
{
id.Trim(),
};
processed[ecosystem] = set.ToArray();
return this with { ProcessedIdsByEcosystem = processed };
}
public bool TryGetArchiveMetadata(string ecosystem, out OsvArchiveMetadata metadata)
{
ArgumentException.ThrowIfNullOrEmpty(ecosystem);
return ArchiveMetadataByEcosystem.TryGetValue(ecosystem, out metadata!);
}
public OsvCursor WithArchiveMetadata(string ecosystem, string? etag, DateTimeOffset? lastModified)
{
ArgumentException.ThrowIfNullOrEmpty(ecosystem);
var metadata = new Dictionary<string, OsvArchiveMetadata>(ArchiveMetadataByEcosystem, StringComparer.OrdinalIgnoreCase)
{
[ecosystem] = new OsvArchiveMetadata(etag?.Trim(), lastModified?.ToUniversalTime()),
};
return this with { ArchiveMetadataByEcosystem = metadata };
}
private static IReadOnlyDictionary<string, DateTimeOffset?> ReadLastModified(BsonValue? value)
{
if (value is not BsonDocument document)
{
return EmptyLastModified;
}
var dictionary = new Dictionary<string, DateTimeOffset?>(StringComparer.OrdinalIgnoreCase);
foreach (var element in document.Elements)
{
if (element.Value is null || element.Value.IsBsonNull)
{
dictionary[element.Name] = null;
continue;
}
dictionary[element.Name] = ParseDate(element.Value);
}
return dictionary;
}
private static IReadOnlyDictionary<string, IReadOnlyCollection<string>> ReadProcessedIds(BsonValue? value)
{
if (value is not BsonDocument document)
{
return EmptyProcessedIds;
}
var dictionary = new Dictionary<string, IReadOnlyCollection<string>>(StringComparer.OrdinalIgnoreCase);
foreach (var element in document.Elements)
{
if (element.Value is not BsonArray array)
{
continue;
}
var ids = new List<string>(array.Count);
foreach (var idValue in array)
{
if (idValue?.BsonType == BsonType.String)
{
var str = idValue.AsString.Trim();
if (!string.IsNullOrWhiteSpace(str))
{
ids.Add(str);
}
}
}
dictionary[element.Name] = ids.Count == 0
? EmptyStringList
: ids.Distinct(StringComparer.OrdinalIgnoreCase).ToArray();
}
return dictionary;
}
private static IReadOnlyDictionary<string, OsvArchiveMetadata> ReadArchiveMetadata(BsonValue? value)
{
if (value is not BsonDocument document)
{
return EmptyArchiveMetadata;
}
var dictionary = new Dictionary<string, OsvArchiveMetadata>(StringComparer.OrdinalIgnoreCase);
foreach (var element in document.Elements)
{
if (element.Value is not BsonDocument metadataDocument)
{
continue;
}
string? etag = metadataDocument.TryGetValue("etag", out var etagValue) && etagValue.IsString ? etagValue.AsString : null;
DateTimeOffset? lastModified = metadataDocument.TryGetValue("lastModified", out var lastModifiedValue)
? ParseDate(lastModifiedValue)
: null;
dictionary[element.Name] = new OsvArchiveMetadata(etag, lastModified);
}
return dictionary.Count == 0 ? EmptyArchiveMetadata : dictionary;
}
private static IReadOnlyCollection<Guid> ReadGuidList(BsonDocument document, string field)
{
if (!document.TryGetValue(field, out var value) || value is not BsonArray array)
{
return EmptyGuidList;
}
var list = new List<Guid>(array.Count);
foreach (var element in array)
{
if (Guid.TryParse(element.ToString(), out var guid))
{
list.Add(guid);
}
}
return list;
}
private static DateTimeOffset? ParseDate(BsonValue value)
{
return value.BsonType switch
{
BsonType.DateTime => DateTime.SpecifyKind(value.ToUniversalTime(), DateTimeKind.Utc),
BsonType.String when DateTimeOffset.TryParse(value.AsString, out var parsed) => parsed.ToUniversalTime(),
_ => null,
};
}
}
internal sealed record OsvArchiveMetadata(string? ETag, DateTimeOffset? LastModified);

View File

@@ -0,0 +1,484 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using StellaOps.Feedser.Models;
using StellaOps.Feedser.Normalization.Cvss;
using StellaOps.Feedser.Normalization.Identifiers;
using StellaOps.Feedser.Normalization.Text;
using StellaOps.Feedser.Source.Common;
using StellaOps.Feedser.Storage.Mongo.Documents;
using StellaOps.Feedser.Storage.Mongo.Dtos;
namespace StellaOps.Feedser.Source.Osv.Internal;
internal static class OsvMapper
{
private static readonly string[] SeverityOrder = { "none", "low", "medium", "high", "critical" };
public static Advisory Map(
OsvVulnerabilityDto dto,
DocumentRecord document,
DtoRecord dtoRecord,
string ecosystem)
{
ArgumentNullException.ThrowIfNull(dto);
ArgumentNullException.ThrowIfNull(document);
ArgumentNullException.ThrowIfNull(dtoRecord);
ArgumentException.ThrowIfNullOrEmpty(ecosystem);
var recordedAt = dtoRecord.ValidatedAt;
var fetchProvenance = new AdvisoryProvenance(
OsvConnectorPlugin.SourceName,
"document",
document.Uri,
document.FetchedAt,
new[] { ProvenanceFieldMasks.Advisory });
var mappingProvenance = new AdvisoryProvenance(
OsvConnectorPlugin.SourceName,
"mapping",
dto.Id,
recordedAt,
new[] { ProvenanceFieldMasks.Advisory });
var aliases = BuildAliases(dto);
var references = BuildReferences(dto, recordedAt);
var credits = BuildCredits(dto, recordedAt);
var affectedPackages = BuildAffectedPackages(dto, ecosystem, recordedAt);
var cvssMetrics = BuildCvssMetrics(dto, recordedAt, out var severity);
var normalizedDescription = DescriptionNormalizer.Normalize(new[]
{
new LocalizedText(dto.Details, "en"),
new LocalizedText(dto.Summary, "en"),
});
var title = string.IsNullOrWhiteSpace(dto.Summary) ? dto.Id : dto.Summary!.Trim();
var summary = string.IsNullOrWhiteSpace(normalizedDescription.Text) ? dto.Summary : normalizedDescription.Text;
var language = string.IsNullOrWhiteSpace(normalizedDescription.Language) ? null : normalizedDescription.Language;
return new Advisory(
dto.Id,
title,
summary,
language,
dto.Published?.ToUniversalTime(),
dto.Modified?.ToUniversalTime(),
severity,
exploitKnown: false,
aliases,
credits,
references,
affectedPackages,
cvssMetrics,
new[] { fetchProvenance, mappingProvenance });
}
private static IEnumerable<string> BuildAliases(OsvVulnerabilityDto dto)
{
var aliases = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
dto.Id,
};
if (dto.Aliases is not null)
{
foreach (var alias in dto.Aliases)
{
if (!string.IsNullOrWhiteSpace(alias))
{
aliases.Add(alias.Trim());
}
}
}
if (dto.Related is not null)
{
foreach (var related in dto.Related)
{
if (!string.IsNullOrWhiteSpace(related))
{
aliases.Add(related.Trim());
}
}
}
return aliases;
}
private static IReadOnlyList<AdvisoryReference> BuildReferences(OsvVulnerabilityDto dto, DateTimeOffset recordedAt)
{
if (dto.References is null || dto.References.Count == 0)
{
return Array.Empty<AdvisoryReference>();
}
var references = new List<AdvisoryReference>(dto.References.Count);
foreach (var reference in dto.References)
{
if (string.IsNullOrWhiteSpace(reference.Url))
{
continue;
}
var kind = NormalizeReferenceKind(reference.Type);
var provenance = new AdvisoryProvenance(
OsvConnectorPlugin.SourceName,
"reference",
reference.Url,
recordedAt,
new[] { ProvenanceFieldMasks.References });
try
{
references.Add(new AdvisoryReference(reference.Url, kind, reference.Type, null, provenance));
}
catch (ArgumentException)
{
// ignore invalid URLs
}
}
if (references.Count <= 1)
{
return references;
}
references.Sort(CompareReferences);
var deduped = new List<AdvisoryReference>(references.Count);
string? lastUrl = null;
foreach (var reference in references)
{
if (lastUrl is not null && string.Equals(lastUrl, reference.Url, StringComparison.OrdinalIgnoreCase))
{
continue;
}
deduped.Add(reference);
lastUrl = reference.Url;
}
return deduped;
}
private static string? NormalizeReferenceKind(string? type)
{
if (string.IsNullOrWhiteSpace(type))
{
return null;
}
return type.Trim().ToLowerInvariant() switch
{
"advisory" => "advisory",
"exploit" => "exploit",
"fix" or "patch" => "patch",
"report" => "report",
"article" => "article",
_ => null,
};
}
private static IReadOnlyList<AffectedPackage> BuildAffectedPackages(OsvVulnerabilityDto dto, string ecosystem, DateTimeOffset recordedAt)
{
if (dto.Affected is null || dto.Affected.Count == 0)
{
return Array.Empty<AffectedPackage>();
}
var packages = new List<AffectedPackage>(dto.Affected.Count);
foreach (var affected in dto.Affected)
{
if (affected.Package is null)
{
continue;
}
var identifier = DetermineIdentifier(affected.Package, ecosystem);
if (identifier is null)
{
continue;
}
var provenance = new[]
{
new AdvisoryProvenance(
OsvConnectorPlugin.SourceName,
"affected",
identifier,
recordedAt,
new[] { ProvenanceFieldMasks.AffectedPackages }),
};
var ranges = BuildVersionRanges(affected, recordedAt, identifier);
packages.Add(new AffectedPackage(
AffectedPackageTypes.SemVer,
identifier,
platform: affected.Package.Ecosystem,
versionRanges: ranges,
statuses: Array.Empty<AffectedPackageStatus>(),
provenance: provenance));
}
return packages;
}
private static IReadOnlyList<AdvisoryCredit> BuildCredits(OsvVulnerabilityDto dto, DateTimeOffset recordedAt)
{
if (dto.Credits is null || dto.Credits.Count == 0)
{
return Array.Empty<AdvisoryCredit>();
}
var credits = new List<AdvisoryCredit>(dto.Credits.Count);
foreach (var credit in dto.Credits)
{
var displayName = Validation.TrimToNull(credit.Name);
if (displayName is null)
{
continue;
}
var contacts = credit.Contact is null
? Array.Empty<string>()
: credit.Contact
.Where(static contact => !string.IsNullOrWhiteSpace(contact))
.Select(static contact => contact.Trim())
.Where(static contact => contact.Length > 0)
.ToArray();
var provenance = new AdvisoryProvenance(
OsvConnectorPlugin.SourceName,
"credit",
displayName,
recordedAt,
new[] { ProvenanceFieldMasks.Credits });
credits.Add(new AdvisoryCredit(displayName, credit.Type, contacts, provenance));
}
return credits.Count == 0 ? Array.Empty<AdvisoryCredit>() : credits;
}
private static IReadOnlyList<AffectedVersionRange> BuildVersionRanges(OsvAffectedPackageDto affected, DateTimeOffset recordedAt, string identifier)
{
if (affected.Ranges is null || affected.Ranges.Count == 0)
{
return Array.Empty<AffectedVersionRange>();
}
var ranges = new List<AffectedVersionRange>();
foreach (var range in affected.Ranges)
{
if (!"semver".Equals(range.Type, StringComparison.OrdinalIgnoreCase)
&& !"ecosystem".Equals(range.Type, StringComparison.OrdinalIgnoreCase))
{
continue;
}
var provenance = new AdvisoryProvenance(
OsvConnectorPlugin.SourceName,
"range",
identifier,
recordedAt,
new[] { ProvenanceFieldMasks.VersionRanges });
if (range.Events is null || range.Events.Count == 0)
{
continue;
}
string? introduced = null;
string? lastAffected = null;
foreach (var evt in range.Events)
{
if (!string.IsNullOrWhiteSpace(evt.Introduced))
{
introduced = evt.Introduced.Trim();
lastAffected = null;
}
if (!string.IsNullOrWhiteSpace(evt.LastAffected))
{
lastAffected = evt.LastAffected.Trim();
}
if (!string.IsNullOrWhiteSpace(evt.Fixed))
{
var fixedVersion = evt.Fixed.Trim();
ranges.Add(new AffectedVersionRange(
"semver",
introduced,
fixedVersion,
lastAffected,
rangeExpression: null,
provenance: provenance,
primitives: BuildSemVerPrimitives(introduced, fixedVersion, lastAffected)));
introduced = null;
lastAffected = null;
}
if (!string.IsNullOrWhiteSpace(evt.Limit))
{
lastAffected = evt.Limit.Trim();
}
}
if (introduced is not null || lastAffected is not null)
{
ranges.Add(new AffectedVersionRange(
"semver",
introduced,
fixedVersion: null,
lastAffected,
rangeExpression: null,
provenance: provenance,
primitives: BuildSemVerPrimitives(introduced, null, lastAffected)));
}
}
return ranges.Count == 0
? Array.Empty<AffectedVersionRange>()
: ranges;
}
private static RangePrimitives BuildSemVerPrimitives(string? introduced, string? fixedVersion, string? lastAffected)
{
var semver = new SemVerPrimitive(
introduced,
IntroducedInclusive: true,
fixedVersion,
FixedInclusive: false,
lastAffected,
LastAffectedInclusive: true,
ConstraintExpression: null);
return new RangePrimitives(semver, null, null, null);
}
private static string? DetermineIdentifier(OsvPackageDto package, string ecosystem)
{
if (!string.IsNullOrWhiteSpace(package.Purl)
&& IdentifierNormalizer.TryNormalizePackageUrl(package.Purl, out var normalized))
{
return normalized;
}
if (!string.IsNullOrWhiteSpace(package.Name))
{
var name = package.Name.Trim();
return string.IsNullOrWhiteSpace(package.Ecosystem)
? $"{ecosystem}:{name}"
: $"{package.Ecosystem.Trim()}:{name}";
}
return null;
}
private static IReadOnlyList<CvssMetric> BuildCvssMetrics(OsvVulnerabilityDto dto, DateTimeOffset recordedAt, out string? severity)
{
severity = null;
if (dto.Severity is null || dto.Severity.Count == 0)
{
return Array.Empty<CvssMetric>();
}
var metrics = new List<CvssMetric>(dto.Severity.Count);
var bestRank = -1;
foreach (var severityEntry in dto.Severity)
{
if (string.IsNullOrWhiteSpace(severityEntry.Score))
{
continue;
}
if (!CvssMetricNormalizer.TryNormalize(severityEntry.Type, severityEntry.Score, null, null, out var normalized))
{
continue;
}
var provenance = new AdvisoryProvenance(OsvConnectorPlugin.SourceName, "cvss", severityEntry.Type ?? "osv", recordedAt);
metrics.Add(normalized.ToModel(provenance));
var rank = Array.IndexOf(SeverityOrder, normalized.BaseSeverity);
if (rank > bestRank)
{
bestRank = rank;
severity = normalized.BaseSeverity;
}
}
if (bestRank < 0 && dto.DatabaseSpecific.ValueKind == JsonValueKind.Object &&
dto.DatabaseSpecific.TryGetProperty("severity", out var severityProperty))
{
var fallback = severityProperty.GetString();
if (!string.IsNullOrWhiteSpace(fallback))
{
severity = SeverityNormalization.Normalize(fallback);
}
}
return metrics;
}
private static int CompareReferences(AdvisoryReference? left, AdvisoryReference? right)
{
if (ReferenceEquals(left, right))
{
return 0;
}
if (left is null)
{
return 1;
}
if (right is null)
{
return -1;
}
var compare = StringComparer.OrdinalIgnoreCase.Compare(left.Url, right.Url);
if (compare != 0)
{
return compare;
}
compare = CompareNullable(left.Kind, right.Kind);
if (compare != 0)
{
return compare;
}
compare = CompareNullable(left.SourceTag, right.SourceTag);
if (compare != 0)
{
return compare;
}
return left.Provenance.RecordedAt.CompareTo(right.Provenance.RecordedAt);
}
private static int CompareNullable(string? left, string? right)
{
if (left is null && right is null)
{
return 0;
}
if (left is null)
{
return 1;
}
if (right is null)
{
return -1;
}
return StringComparer.Ordinal.Compare(left, right);
}
}

View File

@@ -0,0 +1,129 @@
using System;
using System.Collections.Generic;
using System.Text.Json;
using System.Text.Json.Serialization;
namespace StellaOps.Feedser.Source.Osv.Internal;
internal sealed record OsvVulnerabilityDto
{
[JsonPropertyName("id")]
public string Id { get; init; } = string.Empty;
[JsonPropertyName("summary")]
public string? Summary { get; init; }
[JsonPropertyName("details")]
public string? Details { get; init; }
[JsonPropertyName("aliases")]
public IReadOnlyList<string>? Aliases { get; init; }
[JsonPropertyName("related")]
public IReadOnlyList<string>? Related { get; init; }
[JsonPropertyName("published")]
public DateTimeOffset? Published { get; init; }
[JsonPropertyName("modified")]
public DateTimeOffset? Modified { get; init; }
[JsonPropertyName("severity")]
public IReadOnlyList<OsvSeverityDto>? Severity { get; init; }
[JsonPropertyName("references")]
public IReadOnlyList<OsvReferenceDto>? References { get; init; }
[JsonPropertyName("affected")]
public IReadOnlyList<OsvAffectedPackageDto>? Affected { get; init; }
[JsonPropertyName("credits")]
public IReadOnlyList<OsvCreditDto>? Credits { get; init; }
[JsonPropertyName("database_specific")]
public JsonElement DatabaseSpecific { get; init; }
}
internal sealed record OsvSeverityDto
{
[JsonPropertyName("type")]
public string? Type { get; init; }
[JsonPropertyName("score")]
public string? Score { get; init; }
}
internal sealed record OsvReferenceDto
{
[JsonPropertyName("type")]
public string? Type { get; init; }
[JsonPropertyName("url")]
public string? Url { get; init; }
}
internal sealed record OsvCreditDto
{
[JsonPropertyName("name")]
public string? Name { get; init; }
[JsonPropertyName("type")]
public string? Type { get; init; }
[JsonPropertyName("contact")]
public IReadOnlyList<string>? Contact { get; init; }
}
internal sealed record OsvAffectedPackageDto
{
[JsonPropertyName("package")]
public OsvPackageDto? Package { get; init; }
[JsonPropertyName("ranges")]
public IReadOnlyList<OsvRangeDto>? Ranges { get; init; }
[JsonPropertyName("versions")]
public IReadOnlyList<string>? Versions { get; init; }
[JsonPropertyName("ecosystem_specific")]
public JsonElement EcosystemSpecific { get; init; }
}
internal sealed record OsvPackageDto
{
[JsonPropertyName("ecosystem")]
public string? Ecosystem { get; init; }
[JsonPropertyName("name")]
public string? Name { get; init; }
[JsonPropertyName("purl")]
public string? Purl { get; init; }
}
internal sealed record OsvRangeDto
{
[JsonPropertyName("type")]
public string? Type { get; init; }
[JsonPropertyName("events")]
public IReadOnlyList<OsvEventDto>? Events { get; init; }
[JsonPropertyName("repo")]
public string? Repository { get; init; }
}
internal sealed record OsvEventDto
{
[JsonPropertyName("introduced")]
public string? Introduced { get; init; }
[JsonPropertyName("fixed")]
public string? Fixed { get; init; }
[JsonPropertyName("last_affected")]
public string? LastAffected { get; init; }
[JsonPropertyName("limit")]
public string? Limit { get; init; }
}

View File

@@ -0,0 +1,46 @@
using System;
using System.Threading;
using System.Threading.Tasks;
using StellaOps.Feedser.Core.Jobs;
namespace StellaOps.Feedser.Source.Osv;
internal static class OsvJobKinds
{
public const string Fetch = "source:osv:fetch";
public const string Parse = "source:osv:parse";
public const string Map = "source:osv:map";
}
internal sealed class OsvFetchJob : IJob
{
private readonly OsvConnector _connector;
public OsvFetchJob(OsvConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.FetchAsync(context.Services, cancellationToken);
}
internal sealed class OsvParseJob : IJob
{
private readonly OsvConnector _connector;
public OsvParseJob(OsvConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.ParseAsync(context.Services, cancellationToken);
}
internal sealed class OsvMapJob : IJob
{
private readonly OsvConnector _connector;
public OsvMapJob(OsvConnector connector)
=> _connector = connector ?? throw new ArgumentNullException(nameof(connector));
public Task ExecuteAsync(JobExecutionContext context, CancellationToken cancellationToken)
=> _connector.MapAsync(context.Services, cancellationToken);
}

View File

@@ -0,0 +1,500 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Security.Cryptography;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using MongoDB.Bson.IO;
using StellaOps.Feedser.Models;
using StellaOps.Feedser.Models;
using StellaOps.Feedser.Source.Common;
using StellaOps.Feedser.Source.Common.Fetch;
using StellaOps.Feedser.Source.Osv.Configuration;
using StellaOps.Feedser.Source.Osv.Internal;
using StellaOps.Feedser.Storage.Mongo;
using StellaOps.Feedser.Storage.Mongo.Advisories;
using StellaOps.Feedser.Storage.Mongo.Documents;
using StellaOps.Feedser.Storage.Mongo.Dtos;
using StellaOps.Plugin;
namespace StellaOps.Feedser.Source.Osv;
public sealed class OsvConnector : IFeedConnector
{
private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web)
{
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
PropertyNameCaseInsensitive = true,
};
private readonly IHttpClientFactory _httpClientFactory;
private readonly RawDocumentStorage _rawDocumentStorage;
private readonly IDocumentStore _documentStore;
private readonly IDtoStore _dtoStore;
private readonly IAdvisoryStore _advisoryStore;
private readonly ISourceStateRepository _stateRepository;
private readonly OsvOptions _options;
private readonly TimeProvider _timeProvider;
private readonly ILogger<OsvConnector> _logger;
public OsvConnector(
IHttpClientFactory httpClientFactory,
RawDocumentStorage rawDocumentStorage,
IDocumentStore documentStore,
IDtoStore dtoStore,
IAdvisoryStore advisoryStore,
ISourceStateRepository stateRepository,
IOptions<OsvOptions> options,
TimeProvider? timeProvider,
ILogger<OsvConnector> logger)
{
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
_rawDocumentStorage = rawDocumentStorage ?? throw new ArgumentNullException(nameof(rawDocumentStorage));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_dtoStore = dtoStore ?? throw new ArgumentNullException(nameof(dtoStore));
_advisoryStore = advisoryStore ?? throw new ArgumentNullException(nameof(advisoryStore));
_stateRepository = stateRepository ?? throw new ArgumentNullException(nameof(stateRepository));
_options = (options ?? throw new ArgumentNullException(nameof(options))).Value ?? throw new ArgumentNullException(nameof(options));
_options.Validate();
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string SourceName => OsvConnectorPlugin.SourceName;
public async Task FetchAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow();
var pendingDocuments = cursor.PendingDocuments.ToHashSet();
var cursorState = cursor;
var remainingCapacity = _options.MaxAdvisoriesPerFetch;
foreach (var ecosystem in _options.Ecosystems)
{
if (remainingCapacity <= 0)
{
break;
}
cancellationToken.ThrowIfCancellationRequested();
try
{
var result = await FetchEcosystemAsync(
ecosystem,
cursorState,
pendingDocuments,
now,
remainingCapacity,
cancellationToken).ConfigureAwait(false);
cursorState = result.Cursor;
remainingCapacity -= result.NewDocuments;
}
catch (Exception ex)
{
_logger.LogError(ex, "OSV fetch failed for ecosystem {Ecosystem}", ecosystem);
await _stateRepository.MarkFailureAsync(SourceName, now, TimeSpan.FromMinutes(10), ex.Message, cancellationToken).ConfigureAwait(false);
throw;
}
}
cursorState = cursorState
.WithPendingDocuments(pendingDocuments)
.WithPendingMappings(cursor.PendingMappings);
await UpdateCursorAsync(cursorState, cancellationToken).ConfigureAwait(false);
}
public async Task ParseAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingDocuments.Count == 0)
{
return;
}
var remainingDocuments = cursor.PendingDocuments.ToList();
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingDocuments)
{
cancellationToken.ThrowIfCancellationRequested();
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (document is null)
{
remainingDocuments.Remove(documentId);
continue;
}
if (!document.GridFsId.HasValue)
{
_logger.LogWarning("OSV document {DocumentId} missing GridFS content", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
byte[] bytes;
try
{
bytes = await _rawDocumentStorage.DownloadAsync(document.GridFsId.Value, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Unable to download OSV raw document {DocumentId}", document.Id);
throw;
}
OsvVulnerabilityDto? dto;
try
{
dto = JsonSerializer.Deserialize<OsvVulnerabilityDto>(bytes, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to deserialize OSV document {DocumentId} ({Uri})", document.Id, document.Uri);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
if (dto is null || string.IsNullOrWhiteSpace(dto.Id))
{
_logger.LogWarning("OSV document {DocumentId} produced empty payload", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
continue;
}
var sanitized = JsonSerializer.Serialize(dto, SerializerOptions);
var payload = MongoDB.Bson.BsonDocument.Parse(sanitized);
var dtoRecord = new DtoRecord(
Guid.NewGuid(),
document.Id,
SourceName,
"osv.v1",
payload,
_timeProvider.GetUtcNow());
await _dtoStore.UpsertAsync(dtoRecord, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.PendingMap, cancellationToken).ConfigureAwait(false);
remainingDocuments.Remove(documentId);
if (!pendingMappings.Contains(documentId))
{
pendingMappings.Add(documentId);
}
}
var updatedCursor = cursor
.WithPendingDocuments(remainingDocuments)
.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
public async Task MapAsync(IServiceProvider services, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(services);
var cursor = await GetCursorAsync(cancellationToken).ConfigureAwait(false);
if (cursor.PendingMappings.Count == 0)
{
return;
}
var pendingMappings = cursor.PendingMappings.ToList();
foreach (var documentId in cursor.PendingMappings)
{
cancellationToken.ThrowIfCancellationRequested();
var dto = await _dtoStore.FindByDocumentIdAsync(documentId, cancellationToken).ConfigureAwait(false);
var document = await _documentStore.FindAsync(documentId, cancellationToken).ConfigureAwait(false);
if (dto is null || document is null)
{
pendingMappings.Remove(documentId);
continue;
}
var payloadJson = dto.Payload.ToJson(new JsonWriterSettings
{
OutputMode = JsonOutputMode.RelaxedExtendedJson,
});
OsvVulnerabilityDto? osvDto;
try
{
osvDto = JsonSerializer.Deserialize<OsvVulnerabilityDto>(payloadJson, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to deserialize OSV DTO for document {DocumentId}", document.Id);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
if (osvDto is null || string.IsNullOrWhiteSpace(osvDto.Id))
{
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Failed, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
continue;
}
var ecosystem = document.Metadata is not null && document.Metadata.TryGetValue("osv.ecosystem", out var ecosystemValue)
? ecosystemValue
: "unknown";
var advisory = OsvMapper.Map(osvDto, document, dto, ecosystem);
await _advisoryStore.UpsertAsync(advisory, cancellationToken).ConfigureAwait(false);
await _documentStore.UpdateStatusAsync(document.Id, DocumentStatuses.Mapped, cancellationToken).ConfigureAwait(false);
pendingMappings.Remove(documentId);
}
var updatedCursor = cursor.WithPendingMappings(pendingMappings);
await UpdateCursorAsync(updatedCursor, cancellationToken).ConfigureAwait(false);
}
private async Task<OsvCursor> GetCursorAsync(CancellationToken cancellationToken)
{
var state = await _stateRepository.TryGetAsync(SourceName, cancellationToken).ConfigureAwait(false);
return state is null ? OsvCursor.Empty : OsvCursor.FromBson(state.Cursor);
}
private async Task UpdateCursorAsync(OsvCursor cursor, CancellationToken cancellationToken)
{
var document = cursor.ToBsonDocument();
await _stateRepository.UpdateCursorAsync(SourceName, document, _timeProvider.GetUtcNow(), cancellationToken).ConfigureAwait(false);
}
private async Task<(OsvCursor Cursor, int NewDocuments)> FetchEcosystemAsync(
string ecosystem,
OsvCursor cursor,
HashSet<Guid> pendingDocuments,
DateTimeOffset now,
int remainingCapacity,
CancellationToken cancellationToken)
{
var client = _httpClientFactory.CreateClient(OsvOptions.HttpClientName);
client.Timeout = _options.HttpTimeout;
var archiveUri = BuildArchiveUri(ecosystem);
using var request = new HttpRequestMessage(HttpMethod.Get, archiveUri);
if (cursor.TryGetArchiveMetadata(ecosystem, out var archiveMetadata))
{
if (!string.IsNullOrWhiteSpace(archiveMetadata.ETag))
{
request.Headers.TryAddWithoutValidation("If-None-Match", archiveMetadata.ETag);
}
if (archiveMetadata.LastModified.HasValue)
{
request.Headers.IfModifiedSince = archiveMetadata.LastModified.Value;
}
}
using var response = await client.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);
if (response.StatusCode == HttpStatusCode.NotModified)
{
return (cursor, 0);
}
response.EnsureSuccessStatusCode();
await using var archiveStream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false);
using var archive = new ZipArchive(archiveStream, ZipArchiveMode.Read, leaveOpen: false);
var existingLastModified = cursor.GetLastModified(ecosystem);
var processedIdsSet = cursor.ProcessedIdsByEcosystem.TryGetValue(ecosystem, out var processedIds)
? new HashSet<string>(processedIds, StringComparer.OrdinalIgnoreCase)
: new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var currentMaxModified = existingLastModified ?? DateTimeOffset.MinValue;
var currentProcessedIds = new HashSet<string>(processedIdsSet, StringComparer.OrdinalIgnoreCase);
var processedUpdated = false;
var newDocuments = 0;
var minimumModified = existingLastModified.HasValue
? existingLastModified.Value - _options.ModifiedTolerance
: now - _options.InitialBackfill;
ProvenanceDiagnostics.ReportResumeWindow(SourceName, minimumModified, _logger);
foreach (var entry in archive.Entries)
{
if (remainingCapacity <= 0)
{
break;
}
cancellationToken.ThrowIfCancellationRequested();
if (!entry.FullName.EndsWith(".json", StringComparison.OrdinalIgnoreCase))
{
continue;
}
await using var entryStream = entry.Open();
using var memory = new MemoryStream();
await entryStream.CopyToAsync(memory, cancellationToken).ConfigureAwait(false);
var bytes = memory.ToArray();
OsvVulnerabilityDto? dto;
try
{
dto = JsonSerializer.Deserialize<OsvVulnerabilityDto>(bytes, SerializerOptions);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to parse OSV entry {Entry} for ecosystem {Ecosystem}", entry.FullName, ecosystem);
continue;
}
if (dto is null || string.IsNullOrWhiteSpace(dto.Id))
{
continue;
}
var modified = (dto.Modified ?? dto.Published ?? DateTimeOffset.MinValue).ToUniversalTime();
if (modified < minimumModified)
{
continue;
}
if (existingLastModified.HasValue && modified < existingLastModified.Value - _options.ModifiedTolerance)
{
continue;
}
if (modified < currentMaxModified - _options.ModifiedTolerance)
{
continue;
}
if (modified == currentMaxModified && currentProcessedIds.Contains(dto.Id))
{
continue;
}
var documentUri = BuildDocumentUri(ecosystem, dto.Id);
var sha256 = Convert.ToHexString(SHA256.HashData(bytes)).ToLowerInvariant();
var existing = await _documentStore.FindBySourceAndUriAsync(SourceName, documentUri, cancellationToken).ConfigureAwait(false);
if (existing is not null && string.Equals(existing.Sha256, sha256, StringComparison.OrdinalIgnoreCase))
{
continue;
}
var gridFsId = await _rawDocumentStorage.UploadAsync(SourceName, documentUri, bytes, "application/json", null, cancellationToken).ConfigureAwait(false);
var metadata = new Dictionary<string, string>(StringComparer.Ordinal)
{
["osv.ecosystem"] = ecosystem,
["osv.id"] = dto.Id,
["osv.modified"] = modified.ToString("O"),
};
var recordId = existing?.Id ?? Guid.NewGuid();
var record = new DocumentRecord(
recordId,
SourceName,
documentUri,
_timeProvider.GetUtcNow(),
sha256,
DocumentStatuses.PendingParse,
"application/json",
Headers: null,
Metadata: metadata,
Etag: null,
LastModified: modified,
GridFsId: gridFsId,
ExpiresAt: null);
var upserted = await _documentStore.UpsertAsync(record, cancellationToken).ConfigureAwait(false);
pendingDocuments.Add(upserted.Id);
newDocuments++;
remainingCapacity--;
if (modified > currentMaxModified)
{
currentMaxModified = modified;
currentProcessedIds = new HashSet<string>(StringComparer.OrdinalIgnoreCase) { dto.Id };
processedUpdated = true;
}
else if (modified == currentMaxModified)
{
currentProcessedIds.Add(dto.Id);
processedUpdated = true;
}
if (_options.RequestDelay > TimeSpan.Zero)
{
try
{
await Task.Delay(_options.RequestDelay, cancellationToken).ConfigureAwait(false);
}
catch (TaskCanceledException)
{
break;
}
}
}
if (processedUpdated && currentMaxModified != DateTimeOffset.MinValue)
{
cursor = cursor.WithLastModified(ecosystem, currentMaxModified, currentProcessedIds);
}
else if (processedUpdated && existingLastModified.HasValue)
{
cursor = cursor.WithLastModified(ecosystem, existingLastModified.Value, currentProcessedIds);
}
var etag = response.Headers.ETag?.Tag;
var lastModifiedHeader = response.Content.Headers.LastModified;
cursor = cursor.WithArchiveMetadata(ecosystem, etag, lastModifiedHeader);
return (cursor, newDocuments);
}
private Uri BuildArchiveUri(string ecosystem)
{
var trimmed = ecosystem.Trim('/');
var baseUri = _options.BaseUri;
var builder = new UriBuilder(baseUri);
var path = builder.Path;
if (!path.EndsWith('/'))
{
path += "/";
}
path += $"{trimmed}/{_options.ArchiveFileName}";
builder.Path = path;
return builder.Uri;
}
private static string BuildDocumentUri(string ecosystem, string vulnerabilityId)
{
var safeId = vulnerabilityId.Replace(' ', '-');
return $"https://osv-vulnerabilities.storage.googleapis.com/{ecosystem}/{safeId}.json";
}
}

View File

@@ -0,0 +1,20 @@
using System;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.Plugin;
namespace StellaOps.Feedser.Source.Osv;
public sealed class OsvConnectorPlugin : IConnectorPlugin
{
public string Name => SourceName;
public static string SourceName => "osv";
public bool IsAvailable(IServiceProvider services) => services is not null;
public IFeedConnector Create(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
return ActivatorUtilities.CreateInstance<OsvConnector>(services);
}
}

View File

@@ -0,0 +1,53 @@
using System;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using StellaOps.DependencyInjection;
using StellaOps.Feedser.Core.Jobs;
using StellaOps.Feedser.Source.Osv.Configuration;
namespace StellaOps.Feedser.Source.Osv;
public sealed class OsvDependencyInjectionRoutine : IDependencyInjectionRoutine
{
private const string ConfigurationSection = "feedser:sources:osv";
private const string FetchCron = "0,20,40 * * * *";
private const string ParseCron = "5,25,45 * * * *";
private const string MapCron = "10,30,50 * * * *";
private static readonly TimeSpan FetchTimeout = TimeSpan.FromMinutes(15);
private static readonly TimeSpan ParseTimeout = TimeSpan.FromMinutes(20);
private static readonly TimeSpan MapTimeout = TimeSpan.FromMinutes(20);
private static readonly TimeSpan LeaseDuration = TimeSpan.FromMinutes(10);
public IServiceCollection Register(IServiceCollection services, IConfiguration configuration)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
services.AddOsvConnector(options =>
{
configuration.GetSection(ConfigurationSection).Bind(options);
options.Validate();
});
var scheduler = new JobSchedulerBuilder(services);
scheduler
.AddJob<OsvFetchJob>(
OsvJobKinds.Fetch,
cronExpression: FetchCron,
timeout: FetchTimeout,
leaseDuration: LeaseDuration)
.AddJob<OsvParseJob>(
OsvJobKinds.Parse,
cronExpression: ParseCron,
timeout: ParseTimeout,
leaseDuration: LeaseDuration)
.AddJob<OsvMapJob>(
OsvJobKinds.Map,
cronExpression: MapCron,
timeout: MapTimeout,
leaseDuration: LeaseDuration);
return services;
}
}

View File

@@ -0,0 +1,37 @@
using System;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using StellaOps.Feedser.Source.Common.Http;
using StellaOps.Feedser.Source.Osv.Configuration;
namespace StellaOps.Feedser.Source.Osv;
public static class OsvServiceCollectionExtensions
{
public static IServiceCollection AddOsvConnector(this IServiceCollection services, Action<OsvOptions> configure)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configure);
services.AddOptions<OsvOptions>()
.Configure(configure)
.PostConfigure(static opts => opts.Validate());
services.AddSourceHttpClient(OsvOptions.HttpClientName, (sp, clientOptions) =>
{
var options = sp.GetRequiredService<IOptions<OsvOptions>>().Value;
clientOptions.BaseAddress = options.BaseUri;
clientOptions.Timeout = options.HttpTimeout;
clientOptions.UserAgent = "StellaOps.Feedser.OSV/1.0";
clientOptions.AllowedHosts.Clear();
clientOptions.AllowedHosts.Add(options.BaseUri.Host);
clientOptions.DefaultRequestHeaders["Accept"] = "application/zip";
});
services.AddTransient<OsvConnector>();
services.AddTransient<OsvFetchJob>();
services.AddTransient<OsvParseJob>();
services.AddTransient<OsvMapJob>();
return services;
}
}

View File

@@ -0,0 +1,3 @@
using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("FixtureUpdater")]

View File

@@ -0,0 +1,23 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="../StellaOps.Plugin/StellaOps.Plugin.csproj" />
<ProjectReference Include="..\StellaOps.Feedser.Source.Common\StellaOps.Feedser.Source.Common.csproj" />
<ProjectReference Include="..\StellaOps.Feedser.Models\StellaOps.Feedser.Models.csproj" />
<ProjectReference Include="..\StellaOps.Feedser.Storage.Mongo\StellaOps.Feedser.Storage.Mongo.csproj" />
<ProjectReference Include="..\StellaOps.Feedser.Normalization\StellaOps.Feedser.Normalization.csproj" />
<ProjectReference Include="..\StellaOps.Feedser.Core\StellaOps.Feedser.Core.csproj" />
</ItemGroup>
<ItemGroup>
<AssemblyAttribute Include="System.Runtime.CompilerServices.InternalsVisibleTo">
<_Parameter1>StellaOps.Feedser.Tests</_Parameter1>
</AssemblyAttribute>
<AssemblyAttribute Include="System.Runtime.CompilerServices.InternalsVisibleTo">
<_Parameter1>StellaOps.Feedser.Source.Osv.Tests</_Parameter1>
</AssemblyAttribute>
</ItemGroup>
</Project>

View File

@@ -0,0 +1,15 @@
# TASKS
| Task | Owner(s) | Depends on | Notes |
|---|---|---|---|
|Ecosystem fetchers (npm, pypi, maven, go, crates)|BE-Conn-OSV|Source.Common|**DONE** archive fetch loop iterates ecosystems with pagination + change gating.|
|OSV options & HttpClient configuration|BE-Conn-OSV|Source.Common|**DONE** `OsvOptions` + `AddOsvConnector` configure allowlisted HttpClient.|
|DTO validation + sanitizer|BE-Conn-OSV|Source.Common|**DONE** JSON deserialization sanitizes payloads before persistence; schema enforcement deferred.|
|Mapper to canonical SemVer ranges|BE-Conn-OSV|Models|**DONE** `OsvMapper` emits SemVer ranges with provenance metadata.|
|Alias consolidation (GHSA/CVE)|BE-Merge|Merge|DONE OSV advisory records now emit GHSA/CVE aliases captured by alias graph tests.|
|Tests: snapshot per ecosystem|QA|Tests|DONE deterministic snapshots added for npm and PyPI advisories.|
|Cursor persistence and hash gating|BE-Conn-OSV|Storage.Mongo|**DONE** `OsvCursor` tracks per-ecosystem metadata and SHA gating.|
|Parity checks vs GHSA data|QA|Merge|DONE `OsvGhsaParityRegressionTests` keep OSV ↔ GHSA fixtures green; regeneration workflow documented in docs/19_TEST_SUITE_OVERVIEW.md.|
|Connector DI routine & job registration|BE-Conn-OSV|Core|**DONE** DI routine registers fetch/parse/map jobs with scheduler.|
|Implement OSV fetch/parse/map skeleton|BE-Conn-OSV|Source.Common|**DONE** connector now persists documents, DTOs, and canonical advisories.|
|FEEDCONN-OSV-02-004 OSV references & credits alignment|BE-Conn-OSV|Models `FEEDMODELS-SCHEMA-01-002`|**DONE (2025-10-11)** Mapper normalizes references with provenance masks, emits advisory credits, and regression fixtures/assertions cover the new fields.|
|FEEDCONN-OSV-02-005 Fixture updater workflow|BE-Conn-OSV, QA|Docs|TODO Document `tools/FixtureUpdater`, add parity regression steps, and ensure future refreshes capture credit metadata consistently.|