search and ai stabilization work, localization stablized.

This commit is contained in:
master
2026-02-24 23:29:36 +02:00
parent 4f947a8b61
commit b07d27772e
766 changed files with 55299 additions and 3221 deletions

View File

@@ -1,12 +1,14 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.AdvisoryAI.Abstractions;
using StellaOps.AdvisoryAI.Caching;
using StellaOps.AdvisoryAI.Chunking;
using StellaOps.AdvisoryAI.Execution;
using StellaOps.AdvisoryAI.Guardrails;
using StellaOps.AdvisoryAI.KnowledgeSearch;
using StellaOps.AdvisoryAI.Metrics;
using StellaOps.AdvisoryAI.Orchestration;
using StellaOps.AdvisoryAI.Outputs;
@@ -42,7 +44,62 @@ public static class ToolsetServiceCollectionExtensions
services.TryAddEnumerable(ServiceDescriptor.Singleton<IDocumentChunker, OpenVexDocumentChunker>());
services.TryAddSingleton<IAdvisoryStructuredRetriever, AdvisoryStructuredRetriever>();
services.TryAddSingleton<ICryptoHash, DefaultCryptoHash>();
services.TryAddSingleton<IVectorEncoder, DeterministicHashVectorEncoder>();
// Sprint 102 / G1: Conditional vector encoder selection.
// When VectorEncoderType == "onnx", attempt to use the OnnxVectorEncoder with
// semantic inference. If the model file is missing or the ONNX runtime is unavailable,
// gracefully fall back to the DeterministicHashVectorEncoder and log a warning.
services.TryAddSingleton<IVectorEncoder>(provider =>
{
var ksOptions = provider.GetService<IOptions<KnowledgeSearchOptions>>()?.Value;
var encoderType = ksOptions?.VectorEncoderType ?? "hash";
if (string.Equals(encoderType, "onnx", StringComparison.OrdinalIgnoreCase))
{
var logger = provider.GetRequiredService<ILogger<OnnxVectorEncoder>>();
var modelPath = ksOptions?.OnnxModelPath ?? "models/all-MiniLM-L6-v2.onnx";
// Resolve relative paths from the application base directory
if (!Path.IsPathRooted(modelPath))
{
modelPath = Path.Combine(AppContext.BaseDirectory, modelPath);
}
var onnxEncoder = new OnnxVectorEncoder(modelPath, logger);
if (onnxEncoder.IsOnnxInferenceActive)
{
logger.LogInformation(
"Vector encoder: OnnxVectorEncoder (semantic inference active, model={ModelPath}).",
modelPath);
return onnxEncoder;
}
// ONNX model missing or runtime unavailable — fall back to hash encoder.
// The OnnxVectorEncoder internally falls back to a 384-dim character-ngram
// projection, but for true backward compatibility and consistency with the
// existing 64-dim hash path, we prefer the DeterministicHashVectorEncoder
// when ONNX inference is not actually available.
logger.LogWarning(
"VectorEncoderType is \"onnx\" but ONNX inference is not available " +
"(model not found at {ModelPath} or Microsoft.ML.OnnxRuntime not installed). " +
"Falling back to DeterministicHashVectorEncoder. " +
"Semantic search quality will be reduced.",
modelPath);
onnxEncoder.Dispose();
var cryptoHash = provider.GetRequiredService<ICryptoHash>();
return new DeterministicHashVectorEncoder(cryptoHash);
}
{
var cryptoHash = provider.GetRequiredService<ICryptoHash>();
var diLogger = provider.GetRequiredService<ILogger<DeterministicHashVectorEncoder>>();
diLogger.LogInformation("Vector encoder: DeterministicHashVectorEncoder (hash mode).");
return new DeterministicHashVectorEncoder(cryptoHash);
}
});
services.TryAddSingleton<IAdvisoryVectorRetriever, AdvisoryVectorRetriever>();
services.TryAddSingleton<ISbomContextClient, NullSbomContextClient>();
services.TryAddSingleton<ISbomContextRetriever, SbomContextRetriever>();

View File

@@ -59,6 +59,53 @@ internal static class DoctorSearchSeedLoader
.OrderBy(static entry => entry.CheckCode, StringComparer.Ordinal)
.ToList();
}
/// <summary>
/// Discovers and loads locale-specific doctor seed files that sit alongside the base seed.
/// Given a base path like <c>/repo/KnowledgeSearch/doctor-search-seed.json</c>, this method
/// looks for files matching <c>doctor-search-seed.{locale}.json</c> (e.g.,
/// <c>doctor-search-seed.de.json</c>, <c>doctor-search-seed.fr.json</c>).
/// Returns a dictionary keyed by the two-letter locale tag (e.g., "de", "fr").
/// </summary>
public static IReadOnlyDictionary<string, IReadOnlyList<DoctorSearchSeedEntry>> LoadLocalized(string baseSeedAbsolutePath)
{
var result = new Dictionary<string, IReadOnlyList<DoctorSearchSeedEntry>>(StringComparer.OrdinalIgnoreCase);
if (string.IsNullOrWhiteSpace(baseSeedAbsolutePath))
{
return result;
}
var directory = Path.GetDirectoryName(baseSeedAbsolutePath);
if (string.IsNullOrEmpty(directory) || !Directory.Exists(directory))
{
return result;
}
// Base name without extension: "doctor-search-seed"
var baseName = Path.GetFileNameWithoutExtension(baseSeedAbsolutePath);
var pattern = $"{baseName}.*.json";
foreach (var localizedPath in Directory.EnumerateFiles(directory, pattern))
{
// Extract locale tag: "doctor-search-seed.de.json" -> "de"
var fileName = Path.GetFileNameWithoutExtension(localizedPath); // "doctor-search-seed.de"
var localeTag = fileName[(baseName.Length + 1)..]; // "de"
if (string.IsNullOrWhiteSpace(localeTag))
{
continue;
}
var entries = Load(localizedPath);
if (entries.Count > 0)
{
result[localeTag] = entries;
}
}
return result;
}
}
internal static class DoctorControlSeedLoader

View File

@@ -11,6 +11,15 @@ internal interface IKnowledgeSearchStore
KnowledgeSearchFilter? filters,
int take,
TimeSpan timeout,
CancellationToken cancellationToken,
string? locale = null);
Task<IReadOnlyList<KnowledgeChunkRow>> SearchFuzzyAsync(
string query,
KnowledgeSearchFilter? filters,
int take,
double similarityThreshold,
TimeSpan timeout,
CancellationToken cancellationToken);
Task<IReadOnlyList<KnowledgeChunkRow>> LoadVectorCandidatesAsync(

View File

@@ -470,6 +470,83 @@ internal sealed class KnowledgeIndexer : IKnowledgeIndexer
CreateJsonDocument(references),
chunkMetadata);
}
// ── Localized doctor seed ingestion ──
// Discover locale-specific seed files (e.g., doctor-search-seed.de.json) and index
// translated chunks so that FTS queries in those languages match doctor content.
var localizedSeeds = DoctorSearchSeedLoader.LoadLocalized(seedPath);
foreach (var (localeTag, localizedEntries) in localizedSeeds)
{
foreach (var locEntry in localizedEntries)
{
if (!seedEntries.TryGetValue(locEntry.CheckCode, out var baseEntry))
{
continue; // only index localized entries that have a corresponding base entry
}
// Reuse technical fields from the base entry; take translated user-facing text from locale entry.
var locTitle = !string.IsNullOrWhiteSpace(locEntry.Title) ? locEntry.Title : baseEntry.Title;
var locDescription = !string.IsNullOrWhiteSpace(locEntry.Description) ? locEntry.Description : baseEntry.Description;
var locRemediation = !string.IsNullOrWhiteSpace(locEntry.Remediation) ? locEntry.Remediation : baseEntry.Remediation;
var locSymptoms = locEntry.Symptoms is { Count: > 0 } ? locEntry.Symptoms : baseEntry.Symptoms;
var locSeverity = NormalizeSeverity(baseEntry.Severity);
var locRunCommand = baseEntry.RunCommand;
var locTags = baseEntry.Tags;
var locReferences = baseEntry.References;
controlEntries.TryGetValue(locEntry.CheckCode, out var locControl);
var control = BuildDoctorControl(
locEntry.CheckCode,
locSeverity,
locRunCommand,
baseEntry.Control,
locControl,
locSymptoms,
locTitle,
locDescription);
var locBody = BuildDoctorSearchBody(
locEntry.CheckCode, locTitle, locSeverity, locDescription, locRemediation,
locRunCommand, locSymptoms, locReferences, control);
var locChunkId = KnowledgeSearchText.StableId("chunk", "doctor", locEntry.CheckCode, locSeverity, localeTag);
var locDocId = KnowledgeSearchText.StableId("doc", "doctor", options.Product, options.Version, locEntry.CheckCode);
var locChunkMetadata = CreateJsonDocument(new SortedDictionary<string, object?>(StringComparer.Ordinal)
{
["checkCode"] = locEntry.CheckCode,
["severity"] = locSeverity,
["runCommand"] = locRunCommand,
["tags"] = locTags,
["service"] = "doctor",
["locale"] = localeTag,
["control"] = control.Control,
["requiresConfirmation"] = control.RequiresConfirmation,
["isDestructive"] = control.IsDestructive,
["requiresBackup"] = control.RequiresBackup,
["inspectCommand"] = control.InspectCommand,
["verificationCommand"] = control.VerificationCommand,
["keywords"] = control.Keywords
});
var locAnchor = KnowledgeSearchText.Slugify(locEntry.CheckCode);
chunks[locChunkId] = new KnowledgeChunkDocument(
locChunkId,
locDocId,
"doctor_check",
locAnchor,
$"Doctor > {locTitle} [{localeTag}]",
0,
0,
locTitle,
locBody,
EncodeEmbedding(locBody),
locChunkMetadata);
}
_logger.LogInformation("Indexed {Count} localized doctor seed entries for locale '{Locale}'.", localizedEntries.Count, localeTag);
}
}
private async Task<Dictionary<string, DoctorEndpointMetadata>> LoadDoctorEndpointMetadataAsync(string endpoint, CancellationToken cancellationToken)

View File

@@ -20,6 +20,8 @@ public sealed record KnowledgeSearchFilter
public string? Service { get; init; }
public IReadOnlyList<string>? Tags { get; init; }
public string? Tenant { get; init; }
}
public sealed record KnowledgeSearchResponse(
@@ -75,7 +77,8 @@ public sealed record KnowledgeSearchDiagnostics(
int VectorMatches,
long DurationMs,
bool UsedVector,
string Mode);
string Mode,
string ActiveEncoder = "hash");
internal sealed record KnowledgeSourceDocument(
string DocId,

View File

@@ -53,4 +53,121 @@ public sealed class KnowledgeSearchOptions
public List<string> MarkdownRoots { get; set; } = ["docs"];
public List<string> OpenApiRoots { get; set; } = ["src", "devops/compose"];
public string UnifiedFindingsSnapshotPath { get; set; } =
"src/AdvisoryAI/StellaOps.AdvisoryAI/UnifiedSearch/Snapshots/findings.snapshot.json";
public string UnifiedVexSnapshotPath { get; set; } =
"src/AdvisoryAI/StellaOps.AdvisoryAI/UnifiedSearch/Snapshots/vex.snapshot.json";
public string UnifiedPolicySnapshotPath { get; set; } =
"src/AdvisoryAI/StellaOps.AdvisoryAI/UnifiedSearch/Snapshots/policy.snapshot.json";
public bool UnifiedAutoIndexEnabled { get; set; }
public bool UnifiedAutoIndexOnStartup { get; set; } = true;
[Range(30, 86400)]
public int UnifiedIndexRefreshIntervalSeconds { get; set; } = 300;
public bool UnifiedFreshnessBoostEnabled { get; set; }
// ── Search personalization settings (Sprint 106 / G6) ──
/// <summary>
/// When enabled, results with higher click-through frequency receive a gentle additive
/// boost in RRF scoring. Disabled by default to preserve deterministic behavior for
/// testing and compliance. Deployments opt-in.
/// </summary>
public bool PopularityBoostEnabled { get; set; }
/// <summary>
/// Weight factor for the popularity boost. The actual boost per entity is
/// <c>log2(1 + clickCount) * PopularityBoostWeight</c>. Keep low to avoid
/// feedback loops where popular results dominate.
/// </summary>
[Range(0.0, 1.0)]
public double PopularityBoostWeight { get; set; } = 0.05;
/// <summary>
/// When enabled, the DomainWeightCalculator applies additive domain weight biases
/// based on the requesting user's scopes (e.g. scanner:read boosts findings).
/// </summary>
public bool RoleBasedBiasEnabled { get; set; } = true;
// ── Live adapter settings (Sprint 103 / G2) ──
/// <summary>Base URL for the Scanner microservice (e.g. "http://scanner:8080").</summary>
public string FindingsAdapterBaseUrl { get; set; } = string.Empty;
/// <summary>When false the live findings adapter is skipped entirely.</summary>
public bool FindingsAdapterEnabled { get; set; } = true;
/// <summary>Base URL for the Concelier canonical advisory service (e.g. "http://concelier:8080").</summary>
public string VexAdapterBaseUrl { get; set; } = string.Empty;
/// <summary>When false the live VEX adapter is skipped entirely.</summary>
public bool VexAdapterEnabled { get; set; } = true;
/// <summary>Base URL for the Policy Gateway service (e.g. "http://policy-gateway:8080").</summary>
public string PolicyAdapterBaseUrl { get; set; } = string.Empty;
/// <summary>When false the live policy adapter is skipped entirely.</summary>
public bool PolicyAdapterEnabled { get; set; } = true;
// ── Vector encoder settings (Sprint 102 / G1) ──
/// <summary>
/// Selects the vector encoder implementation. Values: "hash" (deterministic SHA-256 bag-of-tokens,
/// backward-compatible default) or "onnx" (semantic embeddings via all-MiniLM-L6-v2 ONNX model).
/// When "onnx" is selected but the model file is missing, the system falls back to "hash" with a warning.
/// </summary>
public string VectorEncoderType { get; set; } = "hash";
/// <summary>
/// File path to the ONNX embedding model (e.g., all-MiniLM-L6-v2.onnx). Used when
/// <see cref="VectorEncoderType"/> is "onnx". Relative paths are resolved from the application content root.
/// </summary>
public string OnnxModelPath { get; set; } = "models/all-MiniLM-L6-v2.onnx";
// ── LLM Synthesis settings (Sprint 104 / G3) ──
/// <summary>When true, the composite synthesis engine attempts LLM-grounded synthesis before template fallback.</summary>
public bool LlmSynthesisEnabled { get; set; }
/// <summary>Timeout in milliseconds for the LLM synthesis call. Exceeding this triggers template fallback.</summary>
[Range(1000, 30000)]
public int SynthesisTimeoutMs { get; set; } = 5000;
/// <summary>Base URL for the LLM adapter service (e.g. "http://advisory-ai:8080"). Empty disables LLM synthesis.</summary>
public string LlmAdapterBaseUrl { get; set; } = string.Empty;
/// <summary>Provider ID to use for LLM synthesis completions (e.g. "openai"). Empty disables LLM synthesis.</summary>
public string LlmProviderId { get; set; } = string.Empty;
public string FtsLanguageConfig { get; set; } = "english";
// ── Multilingual FTS settings (Sprint 109 / G9) ──
/// <summary>Mapping from locale to PostgreSQL FTS configuration name and tsvector column suffix.</summary>
public Dictionary<string, string> FtsLanguageConfigs { get; set; } = new(StringComparer.OrdinalIgnoreCase)
{
["en-US"] = "english",
["de-DE"] = "german",
["fr-FR"] = "french",
["es-ES"] = "spanish",
["ru-RU"] = "russian",
["bg-BG"] = "simple",
["uk-UA"] = "simple",
["zh-TW"] = "simple",
["zh-CN"] = "simple"
};
public bool FuzzyFallbackEnabled { get; set; } = true;
[Range(0, 50)]
public int MinFtsResultsForFuzzyFallback { get; set; } = 3;
[Range(0.1, 1.0)]
public double FuzzySimilarityThreshold { get; set; } = 0.3;
}

View File

@@ -77,6 +77,7 @@ internal sealed class KnowledgeSearchService : IKnowledgeSearchService
private readonly IVectorEncoder _vectorEncoder;
private readonly ILogger<KnowledgeSearchService> _logger;
private readonly TimeProvider _timeProvider;
private readonly string _activeEncoderName;
public KnowledgeSearchService(
IOptions<KnowledgeSearchOptions> options,
@@ -91,6 +92,27 @@ internal sealed class KnowledgeSearchService : IKnowledgeSearchService
_vectorEncoder = vectorEncoder ?? throw new ArgumentNullException(nameof(vectorEncoder));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_activeEncoderName = ResolveActiveEncoderName(vectorEncoder, _options);
}
/// <summary>
/// Determines a human-readable name for the active vector encoder for diagnostics.
/// </summary>
private static string ResolveActiveEncoderName(IVectorEncoder encoder, KnowledgeSearchOptions options)
{
if (encoder is OnnxVectorEncoder onnx)
{
return onnx.IsOnnxInferenceActive ? "onnx" : "onnx-fallback";
}
// DeterministicHashVectorEncoder is in use — report whether it was a deliberate
// choice ("hash") or a fallback from a failed ONNX configuration.
if (string.Equals(options.VectorEncoderType, "onnx", StringComparison.OrdinalIgnoreCase))
{
return "hash-fallback";
}
return "hash";
}
public async Task<KnowledgeSearchResponse> SearchAsync(KnowledgeSearchRequest request, CancellationToken cancellationToken)
@@ -105,7 +127,7 @@ internal sealed class KnowledgeSearchService : IKnowledgeSearchService
string.Empty,
ResolveTopK(request.K),
[],
new KnowledgeSearchDiagnostics(0, 0, 0, false, "empty"));
new KnowledgeSearchDiagnostics(0, 0, 0, false, "empty", _activeEncoderName));
}
if (!_options.Enabled || string.IsNullOrWhiteSpace(_options.ConnectionString))
@@ -114,7 +136,7 @@ internal sealed class KnowledgeSearchService : IKnowledgeSearchService
query,
ResolveTopK(request.K),
[],
new KnowledgeSearchDiagnostics(0, 0, 0, false, "disabled"));
new KnowledgeSearchDiagnostics(0, 0, 0, false, "disabled", _activeEncoderName));
}
var topK = ResolveTopK(request.K);
@@ -127,6 +149,43 @@ internal sealed class KnowledgeSearchService : IKnowledgeSearchService
timeout,
cancellationToken).ConfigureAwait(false);
// G5-003: Fuzzy fallback — when FTS returns sparse results, augment with trigram matches
if (_options.FuzzyFallbackEnabled && ftsRows.Count < _options.MinFtsResultsForFuzzyFallback)
{
try
{
var fuzzyRows = await _store.SearchFuzzyAsync(
query,
request.Filters,
Math.Max(topK, _options.FtsCandidateCount),
_options.FuzzySimilarityThreshold,
timeout,
cancellationToken).ConfigureAwait(false);
if (fuzzyRows.Count > 0)
{
var existingIds = new HashSet<string>(
ftsRows.Select(static r => r.ChunkId), StringComparer.Ordinal);
var combined = new List<KnowledgeChunkRow>(ftsRows);
foreach (var fuzzyRow in fuzzyRows)
{
if (existingIds.Add(fuzzyRow.ChunkId))
{
combined.Add(fuzzyRow);
}
}
ftsRows = combined;
_logger.LogDebug(
"Fuzzy fallback added {FuzzyCount} candidates (FTS had {FtsCount}).",
fuzzyRows.Count, ftsRows.Count - fuzzyRows.Count);
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Knowledge search fuzzy fallback failed; continuing with FTS results only.");
}
}
var lexicalRanks = ftsRows
.Select((row, index) => (row.ChunkId, Rank: index + 1, Row: row))
.ToDictionary(static item => item.ChunkId, static item => item, StringComparer.Ordinal);
@@ -182,7 +241,8 @@ internal sealed class KnowledgeSearchService : IKnowledgeSearchService
vectorRows.Length,
(long)duration.TotalMilliseconds,
usedVector,
usedVector ? "hybrid" : "fts-only"));
usedVector ? "hybrid" : "fts-only",
_activeEncoderName));
}
private IReadOnlyList<(KnowledgeChunkRow Row, double Score, IReadOnlyDictionary<string, string> Debug)> FuseRanks(

View File

@@ -115,7 +115,8 @@ internal sealed class PostgresKnowledgeSearchStore : IKnowledgeSearchStore, IAsy
KnowledgeSearchFilter? filters,
int take,
TimeSpan timeout,
CancellationToken cancellationToken)
CancellationToken cancellationToken,
string? locale = null)
{
if (!IsConfigured() || string.IsNullOrWhiteSpace(query) || take <= 0)
{
@@ -127,10 +128,13 @@ internal sealed class PostgresKnowledgeSearchStore : IKnowledgeSearchStore, IAsy
var normalizedProduct = NormalizeOptional(filters?.Product);
var normalizedVersion = NormalizeOptional(filters?.Version);
var normalizedService = NormalizeOptional(filters?.Service);
var normalizedTenant = NormalizeOptional(filters?.Tenant);
const string sql = """
var (ftsConfig, tsvColumn) = ResolveFtsConfigAndColumn(locale);
var sql = $"""
WITH q AS (
SELECT websearch_to_tsquery('simple', @query) AS tsq
SELECT websearch_to_tsquery('{ftsConfig}', @query) AS tsq
)
SELECT
c.chunk_id,
@@ -144,7 +148,7 @@ internal sealed class PostgresKnowledgeSearchStore : IKnowledgeSearchStore, IAsy
c.body,
COALESCE(
NULLIF(ts_headline(
'simple',
'{ftsConfig}',
c.body,
q.tsq,
'StartSel=<mark>, StopSel=</mark>, MaxFragments=2, MinWords=8, MaxWords=26, FragmentDelimiter= ... '
@@ -152,13 +156,13 @@ internal sealed class PostgresKnowledgeSearchStore : IKnowledgeSearchStore, IAsy
substring(c.body from 1 for 320)
) AS snippet,
c.metadata,
ts_rank_cd(c.body_tsv, q.tsq, 32) AS lexical_score,
ts_rank_cd({tsvColumn}, q.tsq, 32) AS lexical_score,
c.embedding
FROM advisoryai.kb_chunk AS c
INNER JOIN advisoryai.kb_doc AS d
ON d.doc_id = c.doc_id
CROSS JOIN q
WHERE c.body_tsv @@ q.tsq
WHERE {tsvColumn} @@ q.tsq
AND (@kind_count = 0 OR c.kind = ANY(@kinds))
AND (@tag_count = 0 OR EXISTS (
SELECT 1
@@ -168,6 +172,11 @@ internal sealed class PostgresKnowledgeSearchStore : IKnowledgeSearchStore, IAsy
AND (@product = '' OR lower(d.product) = lower(@product))
AND (@version = '' OR lower(d.version) = lower(@version))
AND (@service = '' OR lower(COALESCE(c.metadata->>'service', '')) = lower(@service))
AND (
@tenant = ''
OR lower(COALESCE(c.metadata->>'tenant', 'global')) = lower(@tenant)
OR lower(COALESCE(c.metadata->>'tenant', 'global')) = 'global'
)
ORDER BY lexical_score DESC, c.chunk_id ASC
LIMIT @take;
""";
@@ -188,6 +197,86 @@ internal sealed class PostgresKnowledgeSearchStore : IKnowledgeSearchStore, IAsy
command.Parameters.AddWithValue("product", normalizedProduct);
command.Parameters.AddWithValue("version", normalizedVersion);
command.Parameters.AddWithValue("service", normalizedService);
command.Parameters.AddWithValue("tenant", normalizedTenant);
return await ReadChunkRowsAsync(command, cancellationToken).ConfigureAwait(false);
}
public async Task<IReadOnlyList<KnowledgeChunkRow>> SearchFuzzyAsync(
string query,
KnowledgeSearchFilter? filters,
int take,
double similarityThreshold,
TimeSpan timeout,
CancellationToken cancellationToken)
{
if (!IsConfigured() || string.IsNullOrWhiteSpace(query) || take <= 0 || similarityThreshold <= 0)
{
return [];
}
var kinds = ResolveKinds(filters);
var tags = ResolveTags(filters);
var normalizedProduct = NormalizeOptional(filters?.Product);
var normalizedVersion = NormalizeOptional(filters?.Version);
var normalizedService = NormalizeOptional(filters?.Service);
var normalizedTenant = NormalizeOptional(filters?.Tenant);
const string sql = """
SELECT
c.chunk_id,
c.doc_id,
c.kind,
c.anchor,
c.section_path,
c.span_start,
c.span_end,
c.title,
c.body,
substring(c.body from 1 for 320) AS snippet,
c.metadata,
0::double precision AS lexical_score,
c.embedding
FROM advisoryai.kb_chunk AS c
INNER JOIN advisoryai.kb_doc AS d
ON d.doc_id = c.doc_id
WHERE (similarity(c.title, @query) > @threshold OR similarity(c.body, @query) > @threshold)
AND (@kind_count = 0 OR c.kind = ANY(@kinds))
AND (@tag_count = 0 OR EXISTS (
SELECT 1
FROM jsonb_array_elements_text(COALESCE(c.metadata->'tags', '[]'::jsonb)) AS tag(value)
WHERE lower(tag.value) = ANY(@tags)
))
AND (@product = '' OR lower(d.product) = lower(@product))
AND (@version = '' OR lower(d.version) = lower(@version))
AND (@service = '' OR lower(COALESCE(c.metadata->>'service', '')) = lower(@service))
AND (
@tenant = ''
OR lower(COALESCE(c.metadata->>'tenant', 'global')) = lower(@tenant)
OR lower(COALESCE(c.metadata->>'tenant', 'global')) = 'global'
)
ORDER BY GREATEST(similarity(c.title, @query), similarity(c.body, @query)) DESC, c.chunk_id ASC
LIMIT @take;
""";
await using var command = CreateCommand(sql, timeout);
command.Parameters.AddWithValue("query", query);
command.Parameters.AddWithValue("take", take);
command.Parameters.AddWithValue("threshold", similarityThreshold);
command.Parameters.AddWithValue("kind_count", kinds.Length);
command.Parameters.AddWithValue(
"kinds",
NpgsqlDbType.Array | NpgsqlDbType.Text,
kinds.Length == 0 ? Array.Empty<string>() : kinds);
command.Parameters.AddWithValue("tag_count", tags.Length);
command.Parameters.AddWithValue(
"tags",
NpgsqlDbType.Array | NpgsqlDbType.Text,
tags.Length == 0 ? Array.Empty<string>() : tags);
command.Parameters.AddWithValue("product", normalizedProduct);
command.Parameters.AddWithValue("version", normalizedVersion);
command.Parameters.AddWithValue("service", normalizedService);
command.Parameters.AddWithValue("tenant", normalizedTenant);
return await ReadChunkRowsAsync(command, cancellationToken).ConfigureAwait(false);
}
@@ -210,6 +299,7 @@ internal sealed class PostgresKnowledgeSearchStore : IKnowledgeSearchStore, IAsy
var normalizedProduct = NormalizeOptional(filters?.Product);
var normalizedVersion = NormalizeOptional(filters?.Version);
var normalizedService = NormalizeOptional(filters?.Service);
var normalizedTenant = NormalizeOptional(filters?.Tenant);
var queryVectorLiteral = BuildVectorLiteral(queryEmbedding);
var useEmbeddingVectorColumn = await HasEmbeddingVectorColumnAsync(cancellationToken).ConfigureAwait(false);
@@ -243,6 +333,11 @@ internal sealed class PostgresKnowledgeSearchStore : IKnowledgeSearchStore, IAsy
AND (@product = '' OR lower(d.product) = lower(@product))
AND (@version = '' OR lower(d.version) = lower(@version))
AND (@service = '' OR lower(COALESCE(c.metadata->>'service', '')) = lower(@service))
AND (
@tenant = ''
OR lower(COALESCE(c.metadata->>'tenant', 'global')) = lower(@tenant)
OR lower(COALESCE(c.metadata->>'tenant', 'global')) = 'global'
)
ORDER BY c.embedding_vec <=> CAST(@query_vector AS vector), c.chunk_id ASC
LIMIT @take;
"""
@@ -274,6 +369,11 @@ internal sealed class PostgresKnowledgeSearchStore : IKnowledgeSearchStore, IAsy
AND (@product = '' OR lower(d.product) = lower(@product))
AND (@version = '' OR lower(d.version) = lower(@version))
AND (@service = '' OR lower(COALESCE(c.metadata->>'service', '')) = lower(@service))
AND (
@tenant = ''
OR lower(COALESCE(c.metadata->>'tenant', 'global')) = lower(@tenant)
OR lower(COALESCE(c.metadata->>'tenant', 'global')) = 'global'
)
ORDER BY c.chunk_id ASC
LIMIT @take;
""";
@@ -293,6 +393,7 @@ internal sealed class PostgresKnowledgeSearchStore : IKnowledgeSearchStore, IAsy
command.Parameters.AddWithValue("product", normalizedProduct);
command.Parameters.AddWithValue("version", normalizedVersion);
command.Parameters.AddWithValue("service", normalizedService);
command.Parameters.AddWithValue("tenant", normalizedTenant);
command.Parameters.AddWithValue("query_vector", queryVectorLiteral);
return await ReadChunkRowsAsync(command, cancellationToken).ConfigureAwait(false);
@@ -316,6 +417,50 @@ internal sealed class PostgresKnowledgeSearchStore : IKnowledgeSearchStore, IAsy
return string.IsNullOrWhiteSpace(value) ? string.Empty : value.Trim();
}
/// <summary>
/// Resolves the PostgreSQL FTS configuration name and tsvector column for a given locale.
/// Falls back to the default FtsLanguageConfig (english) when the locale is not mapped.
/// </summary>
private (string FtsConfig, string TsvColumn) ResolveFtsConfigAndColumn(string? locale)
{
// If a locale is provided and mapped, use its FTS config
if (!string.IsNullOrWhiteSpace(locale) && _options.FtsLanguageConfigs.TryGetValue(locale, out var mappedConfig))
{
return (mappedConfig, MapFtsConfigToTsvColumn(mappedConfig));
}
// Also try short language code (e.g., "de" -> look for "de-DE" etc.)
if (!string.IsNullOrWhiteSpace(locale) && locale.Length == 2)
{
foreach (var kvp in _options.FtsLanguageConfigs)
{
if (kvp.Key.StartsWith(locale, StringComparison.OrdinalIgnoreCase))
{
return (kvp.Value, MapFtsConfigToTsvColumn(kvp.Value));
}
}
}
// Fall back to default FtsLanguageConfig
var useEnglish = string.Equals(_options.FtsLanguageConfig, "english", StringComparison.OrdinalIgnoreCase);
var ftsConfig = useEnglish ? "english" : "simple";
var tsvColumn = useEnglish ? "c.body_tsv_en" : "c.body_tsv";
return (ftsConfig, tsvColumn);
}
private static string MapFtsConfigToTsvColumn(string ftsConfig)
{
return ftsConfig switch
{
"english" => "c.body_tsv_en",
"german" => "c.body_tsv_de",
"french" => "c.body_tsv_fr",
"spanish" => "c.body_tsv_es",
"russian" => "c.body_tsv_ru",
_ => "c.body_tsv" // 'simple' config uses the base body_tsv column
};
}
private static string[] ResolveKinds(KnowledgeSearchFilter? filters)
{
if (filters?.Type is not { Count: > 0 })
@@ -346,6 +491,16 @@ internal sealed class PostgresKnowledgeSearchStore : IKnowledgeSearchStore, IAsy
if (item.Equals("doctor", StringComparison.OrdinalIgnoreCase))
{
kinds.Add("doctor_check");
continue;
}
// Unified search domain kinds pass through directly
if (item.Equals("finding", StringComparison.OrdinalIgnoreCase) ||
item.Equals("vex_statement", StringComparison.OrdinalIgnoreCase) ||
item.Equals("policy_rule", StringComparison.OrdinalIgnoreCase) ||
item.Equals("platform_entity", StringComparison.OrdinalIgnoreCase))
{
kinds.Add(item.ToLowerInvariant());
}
}
@@ -532,6 +687,11 @@ internal sealed class PostgresKnowledgeSearchStore : IKnowledgeSearchStore, IAsy
title,
body,
body_tsv,
body_tsv_en,
body_tsv_de,
body_tsv_fr,
body_tsv_es,
body_tsv_ru,
embedding,
embedding_vec,
metadata,
@@ -551,6 +711,21 @@ internal sealed class PostgresKnowledgeSearchStore : IKnowledgeSearchStore, IAsy
setweight(to_tsvector('simple', coalesce(@title, '')), 'A') ||
setweight(to_tsvector('simple', coalesce(@section_path, '')), 'B') ||
setweight(to_tsvector('simple', coalesce(@body, '')), 'D'),
setweight(to_tsvector('english', coalesce(@title, '')), 'A') ||
setweight(to_tsvector('english', coalesce(@section_path, '')), 'B') ||
setweight(to_tsvector('english', coalesce(@body, '')), 'D'),
setweight(to_tsvector('german', coalesce(@title, '')), 'A') ||
setweight(to_tsvector('german', coalesce(@section_path, '')), 'B') ||
setweight(to_tsvector('german', coalesce(@body, '')), 'D'),
setweight(to_tsvector('french', coalesce(@title, '')), 'A') ||
setweight(to_tsvector('french', coalesce(@section_path, '')), 'B') ||
setweight(to_tsvector('french', coalesce(@body, '')), 'D'),
setweight(to_tsvector('spanish', coalesce(@title, '')), 'A') ||
setweight(to_tsvector('spanish', coalesce(@section_path, '')), 'B') ||
setweight(to_tsvector('spanish', coalesce(@body, '')), 'D'),
setweight(to_tsvector('russian', coalesce(@title, '')), 'A') ||
setweight(to_tsvector('russian', coalesce(@section_path, '')), 'B') ||
setweight(to_tsvector('russian', coalesce(@body, '')), 'D'),
@embedding,
CAST(@embedding_vector AS vector),
@metadata::jsonb,
@@ -570,6 +745,11 @@ internal sealed class PostgresKnowledgeSearchStore : IKnowledgeSearchStore, IAsy
title,
body,
body_tsv,
body_tsv_en,
body_tsv_de,
body_tsv_fr,
body_tsv_es,
body_tsv_ru,
embedding,
metadata,
indexed_at
@@ -588,6 +768,21 @@ internal sealed class PostgresKnowledgeSearchStore : IKnowledgeSearchStore, IAsy
setweight(to_tsvector('simple', coalesce(@title, '')), 'A') ||
setweight(to_tsvector('simple', coalesce(@section_path, '')), 'B') ||
setweight(to_tsvector('simple', coalesce(@body, '')), 'D'),
setweight(to_tsvector('english', coalesce(@title, '')), 'A') ||
setweight(to_tsvector('english', coalesce(@section_path, '')), 'B') ||
setweight(to_tsvector('english', coalesce(@body, '')), 'D'),
setweight(to_tsvector('german', coalesce(@title, '')), 'A') ||
setweight(to_tsvector('german', coalesce(@section_path, '')), 'B') ||
setweight(to_tsvector('german', coalesce(@body, '')), 'D'),
setweight(to_tsvector('french', coalesce(@title, '')), 'A') ||
setweight(to_tsvector('french', coalesce(@section_path, '')), 'B') ||
setweight(to_tsvector('french', coalesce(@body, '')), 'D'),
setweight(to_tsvector('spanish', coalesce(@title, '')), 'A') ||
setweight(to_tsvector('spanish', coalesce(@section_path, '')), 'B') ||
setweight(to_tsvector('spanish', coalesce(@body, '')), 'D'),
setweight(to_tsvector('russian', coalesce(@title, '')), 'A') ||
setweight(to_tsvector('russian', coalesce(@section_path, '')), 'B') ||
setweight(to_tsvector('russian', coalesce(@body, '')), 'D'),
@embedding,
@metadata::jsonb,
NOW()

View File

@@ -0,0 +1,170 @@
[
{
"checkCode": "check.core.disk.space",
"title": "Speicherplatzverfügbarkeit",
"severity": "high",
"description": "Geringer Speicherplatz kann Aufnahmepipelines und Worker-Ausführung blockieren.",
"remediation": "Speicherplatz freigeben und Aufbewahrungseinstellungen überprüfen.",
"runCommand": "stella doctor run --check check.core.disk.space",
"symptoms": [
"Kein Speicherplatz mehr auf dem Gerät",
"Festplatte voll",
"Schreibfehler"
],
"tags": [
"doctor",
"storage",
"core"
],
"references": [
"docs/operations/devops/runbooks/deployment-upgrade.md"
]
},
{
"checkCode": "check.core.db.connectivity",
"title": "PostgreSQL-Konnektivität",
"severity": "high",
"description": "Doctor konnte keine Verbindung zu PostgreSQL herstellen oder Verbindungsprüfungen haben das Zeitlimit überschritten.",
"remediation": "Anmeldedaten, Netzwerkerreichbarkeit und TLS-Einstellungen überprüfen.",
"runCommand": "stella doctor run --check check.core.db.connectivity",
"symptoms": [
"Datenbank nicht verfügbar",
"Verbindung abgelehnt",
"Zeitlimit überschritten"
],
"tags": [
"doctor",
"database",
"connectivity"
],
"references": [
"docs/INSTALL_GUIDE.md"
]
},
{
"checkCode": "check.security.oidc.readiness",
"title": "OIDC-Bereitschaft",
"severity": "warn",
"description": "OIDC-Voraussetzungen fehlen oder die Metadaten des Identitätsausstellers sind nicht erreichbar.",
"remediation": "Aussteller-URL, JWKS-Verfügbarkeit und Authority-Client-Konfiguration überprüfen.",
"runCommand": "stella doctor run --check check.security.oidc.readiness",
"symptoms": [
"OIDC-Einrichtung",
"Ungültiger Aussteller",
"JWKS-Abruf fehlgeschlagen"
],
"tags": [
"doctor",
"security",
"oidc"
],
"references": [
"docs/modules/authority/architecture.md"
]
},
{
"checkCode": "check.router.gateway.routes",
"title": "Router-Routenregistrierung",
"severity": "warn",
"description": "Erwartete Gateway-Routen wurden nicht registriert oder Integritätsprüfungen sind fehlgeschlagen.",
"remediation": "Routentabellen prüfen und Router-Registrierung aktualisieren.",
"runCommand": "stella doctor run --check check.router.gateway.routes",
"symptoms": [
"Route fehlt",
"404 auf erwartetem Endpunkt",
"Gateway-Routing"
],
"tags": [
"doctor",
"router",
"gateway"
],
"references": [
"docs/modules/router/README.md"
]
},
{
"checkCode": "check.integrations.secrets.binding",
"title": "Integrations-Geheimnisbindung",
"severity": "medium",
"description": "Integrations-Konnektoren können konfigurierte Geheimnisse nicht auflösen.",
"remediation": "Geheimnisanbieter-Konfiguration überprüfen und ungültige Anmeldedaten rotieren.",
"runCommand": "stella doctor run --check check.integrations.secrets.binding",
"symptoms": [
"Geheimnis fehlt",
"Ungültige Anmeldedaten",
"Authentifizierung fehlgeschlagen"
],
"tags": [
"doctor",
"integrations",
"secrets"
],
"references": [
"docs/modules/platform/architecture-overview.md"
]
},
{
"checkCode": "check.release.policy.gate",
"title": "Richtlinientor-Voraussetzungen",
"severity": "warn",
"description": "Voraussetzungen des Release-Richtlinientors sind für die Zielumgebung unvollständig.",
"remediation": "Erforderliche Genehmigungen, Richtlinien-Bundle-Versionen und Attestierungen überprüfen.",
"runCommand": "stella doctor run --check check.release.policy.gate",
"symptoms": [
"Richtlinientor fehlgeschlagen",
"Fehlende Attestierung",
"Promotion blockiert"
],
"tags": [
"doctor",
"release",
"policy"
],
"references": [
"docs/operations/upgrade-runbook.md"
]
},
{
"checkCode": "check.airgap.bundle.integrity",
"title": "Air-Gap-Bundle-Integrität",
"severity": "high",
"description": "Offline-Bundle-Integritätsprüfung fehlgeschlagen.",
"remediation": "Bundle neu erstellen und Signaturen sowie Prüfsummen vor dem Import verifizieren.",
"runCommand": "stella doctor run --check check.airgap.bundle.integrity",
"symptoms": [
"Prüfsummen-Abweichung",
"Ungültige Signatur",
"Offline-Import fehlgeschlagen"
],
"tags": [
"doctor",
"airgap",
"integrity"
],
"references": [
"docs/operations/devops/runbooks/deployment-upgrade.md"
]
},
{
"checkCode": "check.telemetry.pipeline.delivery",
"title": "Telemetry-Zustellungspipeline",
"severity": "medium",
"description": "Der Telemetry-Warteschlangen-Rückstand wächst oder Zustellungs-Worker sind blockiert.",
"remediation": "Worker skalieren, Warteschlangentiefe prüfen und nachgelagerte Verfügbarkeit validieren.",
"runCommand": "stella doctor run --check check.telemetry.pipeline.delivery",
"symptoms": [
"Telemetry-Verzögerung",
"Warteschlangen-Rückstand",
"Zustellungszeitlimit"
],
"tags": [
"doctor",
"telemetry",
"queue"
],
"references": [
"docs/modules/platform/architecture-overview.md"
]
}
]

View File

@@ -0,0 +1,170 @@
[
{
"checkCode": "check.core.disk.space",
"title": "Disponibilité de l'espace disque",
"severity": "high",
"description": "Un espace disque insuffisant peut bloquer les pipelines d'ingestion et l'exécution des workers.",
"remediation": "Libérer de l'espace disque et vérifier les paramètres de rétention.",
"runCommand": "stella doctor run --check check.core.disk.space",
"symptoms": [
"Plus d'espace disponible sur le périphérique",
"Disque plein",
"Échec d'écriture"
],
"tags": [
"doctor",
"storage",
"core"
],
"references": [
"docs/operations/devops/runbooks/deployment-upgrade.md"
]
},
{
"checkCode": "check.core.db.connectivity",
"title": "Connectivité PostgreSQL",
"severity": "high",
"description": "Doctor n'a pas pu se connecter à PostgreSQL ou les vérifications de connexion ont expiré.",
"remediation": "Vérifier les identifiants, l'accessibilité réseau et les paramètres TLS.",
"runCommand": "stella doctor run --check check.core.db.connectivity",
"symptoms": [
"Base de données indisponible",
"Connexion refusée",
"Délai d'attente expiré"
],
"tags": [
"doctor",
"database",
"connectivity"
],
"references": [
"docs/INSTALL_GUIDE.md"
]
},
{
"checkCode": "check.security.oidc.readiness",
"title": "État de préparation OIDC",
"severity": "warn",
"description": "Les prérequis OIDC sont manquants ou les métadonnées de l'émetteur d'identité ne sont pas accessibles.",
"remediation": "Vérifier l'URL de l'émetteur, la disponibilité JWKS et la configuration du client Authority.",
"runCommand": "stella doctor run --check check.security.oidc.readiness",
"symptoms": [
"Configuration OIDC",
"Émetteur invalide",
"Échec de récupération JWKS"
],
"tags": [
"doctor",
"security",
"oidc"
],
"references": [
"docs/modules/authority/architecture.md"
]
},
{
"checkCode": "check.router.gateway.routes",
"title": "Enregistrement des routes du router",
"severity": "warn",
"description": "Les routes attendues du gateway n'ont pas été enregistrées ou les sondes de santé ont échoué.",
"remediation": "Inspecter les tables de routage et rafraîchir l'enregistrement du router.",
"runCommand": "stella doctor run --check check.router.gateway.routes",
"symptoms": [
"Route manquante",
"404 sur un point de terminaison attendu",
"Routage du gateway"
],
"tags": [
"doctor",
"router",
"gateway"
],
"references": [
"docs/modules/router/README.md"
]
},
{
"checkCode": "check.integrations.secrets.binding",
"title": "Liaison des secrets d'intégration",
"severity": "medium",
"description": "Les connecteurs d'intégration ne peuvent pas résoudre les secrets configurés.",
"remediation": "Valider la configuration du fournisseur de secrets et effectuer la rotation des identifiants invalides.",
"runCommand": "stella doctor run --check check.integrations.secrets.binding",
"symptoms": [
"Secret manquant",
"Identifiants invalides",
"Échec d'authentification"
],
"tags": [
"doctor",
"integrations",
"secrets"
],
"references": [
"docs/modules/platform/architecture-overview.md"
]
},
{
"checkCode": "check.release.policy.gate",
"title": "Prérequis du portail de politique",
"severity": "warn",
"description": "Les prérequis du portail de politique de release sont incomplets pour l'environnement cible.",
"remediation": "Vérifier les approbations requises, les versions du bundle de politique et les attestations.",
"runCommand": "stella doctor run --check check.release.policy.gate",
"symptoms": [
"Échec du portail de politique",
"Attestation manquante",
"Promotion bloquée"
],
"tags": [
"doctor",
"release",
"policy"
],
"references": [
"docs/operations/upgrade-runbook.md"
]
},
{
"checkCode": "check.airgap.bundle.integrity",
"title": "Intégrité du bundle air-gap",
"severity": "high",
"description": "La validation de l'intégrité du bundle hors ligne a échoué.",
"remediation": "Reconstruire le bundle et vérifier les signatures et les sommes de contrôle avant l'importation.",
"runCommand": "stella doctor run --check check.airgap.bundle.integrity",
"symptoms": [
"Somme de contrôle incorrecte",
"Signature invalide",
"Échec de l'importation hors ligne"
],
"tags": [
"doctor",
"airgap",
"integrity"
],
"references": [
"docs/operations/devops/runbooks/deployment-upgrade.md"
]
},
{
"checkCode": "check.telemetry.pipeline.delivery",
"title": "Pipeline de livraison de télémétrie",
"severity": "medium",
"description": "L'arriéré de la file d'attente de télémétrie augmente ou les workers de livraison sont bloqués.",
"remediation": "Mettre à l'échelle les workers, inspecter la profondeur de la file d'attente et valider la disponibilité en aval.",
"runCommand": "stella doctor run --check check.telemetry.pipeline.delivery",
"symptoms": [
"Retard de télémétrie",
"Arriéré de file d'attente",
"Délai de livraison expiré"
],
"tags": [
"doctor",
"telemetry",
"queue"
],
"references": [
"docs/modules/platform/architecture-overview.md"
]
}
]

View File

@@ -1,3 +1,4 @@
using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("StellaOps.AdvisoryAI.Tests")]
[assembly: InternalsVisibleTo("DynamicProxyGenAssembly2")]

View File

@@ -10,9 +10,11 @@
<ItemGroup>
<InternalsVisibleTo Include="StellaOps.Bench.AdvisoryAI" />
<InternalsVisibleTo Include="StellaOps.AdvisoryAI.Tests" />
<InternalsVisibleTo Include="StellaOps.AdvisoryAI.WebService" />
</ItemGroup>
<ItemGroup>
<EmbeddedResource Include="Storage\Migrations\**\*.sql" LogicalName="%(RecursiveDir)%(Filename)%(Extension)" />
<EmbeddedResource Include="UnifiedSearch\Synthesis\synthesis-system-prompt.txt" LogicalName="synthesis-system-prompt.txt" />
</ItemGroup>
<ItemGroup>
<!-- Prevent automatic compiled-model binding so non-default schemas can build runtime models. -->
@@ -22,6 +24,12 @@
<None Update="KnowledgeSearch/doctor-search-seed.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="KnowledgeSearch/doctor-search-seed.de.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="KnowledgeSearch/doctor-search-seed.fr.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup>
<PackageReference Include="Microsoft.EntityFrameworkCore" />

View File

@@ -0,0 +1,60 @@
-- AdvisoryAI Unified Search schema extension
-- Sprint: SPRINT_20260223_097_AdvisoryAI_unified_search_index_foundation
-- Add domain-aware columns to kb_chunk for multi-source federation
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_schema = 'advisoryai' AND table_name = 'kb_chunk' AND column_name = 'entity_key'
) THEN
ALTER TABLE advisoryai.kb_chunk ADD COLUMN entity_key TEXT;
END IF;
IF NOT EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_schema = 'advisoryai' AND table_name = 'kb_chunk' AND column_name = 'entity_type'
) THEN
ALTER TABLE advisoryai.kb_chunk ADD COLUMN entity_type TEXT;
END IF;
IF NOT EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_schema = 'advisoryai' AND table_name = 'kb_chunk' AND column_name = 'domain'
) THEN
ALTER TABLE advisoryai.kb_chunk ADD COLUMN domain TEXT NOT NULL DEFAULT 'knowledge';
END IF;
IF NOT EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_schema = 'advisoryai' AND table_name = 'kb_chunk' AND column_name = 'freshness'
) THEN
ALTER TABLE advisoryai.kb_chunk ADD COLUMN freshness TIMESTAMPTZ;
END IF;
END
$$;
-- Indexes for unified search filtering
CREATE INDEX IF NOT EXISTS idx_kb_chunk_entity_key
ON advisoryai.kb_chunk (entity_key)
WHERE entity_key IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_kb_chunk_domain
ON advisoryai.kb_chunk (domain);
-- Entity alias table for cross-domain entity resolution
CREATE TABLE IF NOT EXISTS advisoryai.entity_alias
(
alias TEXT NOT NULL,
entity_key TEXT NOT NULL,
entity_type TEXT NOT NULL,
source TEXT NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
PRIMARY KEY (alias, entity_key)
);
CREATE INDEX IF NOT EXISTS idx_entity_alias_alias
ON advisoryai.entity_alias (alias);
CREATE INDEX IF NOT EXISTS idx_entity_alias_entity
ON advisoryai.entity_alias (entity_key, entity_type);

View File

@@ -0,0 +1,87 @@
-- AdvisoryAI FTS English stemming + pg_trgm fuzzy support
-- Sprint: SPRINT_20260224_101_AdvisoryAI_fts_english_stemming_fuzzy_tolerance
--
-- Adds:
-- 1. pg_trgm extension for fuzzy / LIKE / similarity queries
-- 2. body_tsv_en TSVECTOR column (english config) with A/B/D weights on title/section_path/body
-- 3. GIN index on body_tsv_en for english FTS
-- 4. Backfill body_tsv_en from existing rows
-- 5. GIN trigram indexes on title and body for fuzzy matching
--
-- The existing body_tsv column (simple config) is intentionally preserved as fallback.
-- This migration is fully idempotent.
-- 1. Enable pg_trgm extension (safe on managed Postgres; bundled with contrib)
DO $$
BEGIN
CREATE EXTENSION IF NOT EXISTS pg_trgm;
EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE 'pg_trgm extension is unavailable; fuzzy trigram indexes will not be created.';
END
$$;
-- 2. Add body_tsv_en TSVECTOR column (english config, generated from title + section_path + body)
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1
FROM information_schema.columns
WHERE table_schema = 'advisoryai'
AND table_name = 'kb_chunk'
AND column_name = 'body_tsv_en'
) THEN
ALTER TABLE advisoryai.kb_chunk
ADD COLUMN body_tsv_en TSVECTOR;
END IF;
END
$$;
-- 3. Backfill body_tsv_en from existing data using english config with weighted sections:
-- A = title (highest relevance)
-- B = section_path (structural context)
-- D = body (full content, lowest weight)
UPDATE advisoryai.kb_chunk
SET body_tsv_en =
setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
setweight(to_tsvector('english', coalesce(section_path, '')), 'B') ||
setweight(to_tsvector('english', coalesce(body, '')), 'D')
WHERE body_tsv_en IS NULL;
-- 4. GIN index on body_tsv_en for english full-text search
CREATE INDEX IF NOT EXISTS idx_kb_chunk_body_tsv_en
ON advisoryai.kb_chunk USING GIN (body_tsv_en);
-- 5. GIN trigram indexes for fuzzy / LIKE / similarity matching on title and body.
-- These are created conditionally: only when pg_trgm is available.
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'pg_trgm') THEN
-- Trigram index on title for fuzzy title matching
IF NOT EXISTS (
SELECT 1 FROM pg_indexes
WHERE schemaname = 'advisoryai'
AND tablename = 'kb_chunk'
AND indexname = 'idx_kb_chunk_title_trgm'
) THEN
CREATE INDEX idx_kb_chunk_title_trgm
ON advisoryai.kb_chunk USING GIN (title gin_trgm_ops);
END IF;
-- Trigram index on body for fuzzy body matching
IF NOT EXISTS (
SELECT 1 FROM pg_indexes
WHERE schemaname = 'advisoryai'
AND tablename = 'kb_chunk'
AND indexname = 'idx_kb_chunk_body_trgm'
) THEN
CREATE INDEX idx_kb_chunk_body_trgm
ON advisoryai.kb_chunk USING GIN (body gin_trgm_ops);
END IF;
ELSE
RAISE NOTICE 'pg_trgm not available; skipping trigram indexes on kb_chunk.title and kb_chunk.body.';
END IF;
END
$$;

View File

@@ -0,0 +1,46 @@
-- 005_search_analytics.sql: Search analytics, feedback, and history tables
-- Search events for analytics
CREATE TABLE IF NOT EXISTS advisoryai.search_events (
event_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
user_id TEXT,
event_type TEXT NOT NULL, -- 'query', 'click', 'zero_result'
query TEXT NOT NULL,
entity_key TEXT,
domain TEXT,
result_count INT,
position INT,
duration_ms INT,
created_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_search_events_tenant_type ON advisoryai.search_events (tenant_id, event_type, created_at);
CREATE INDEX IF NOT EXISTS idx_search_events_entity ON advisoryai.search_events (entity_key) WHERE entity_key IS NOT NULL;
-- Search history per user
CREATE TABLE IF NOT EXISTS advisoryai.search_history (
history_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
user_id TEXT NOT NULL,
query TEXT NOT NULL,
result_count INT,
searched_at TIMESTAMPTZ DEFAULT now(),
UNIQUE(tenant_id, user_id, query)
);
CREATE INDEX IF NOT EXISTS idx_search_history_user ON advisoryai.search_history (tenant_id, user_id, searched_at DESC);
-- Search feedback (for Sprint 110 / G10 but create now)
CREATE TABLE IF NOT EXISTS advisoryai.search_feedback (
feedback_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
user_id TEXT,
query TEXT NOT NULL,
entity_key TEXT NOT NULL,
domain TEXT NOT NULL,
position INT NOT NULL,
signal TEXT NOT NULL, -- 'helpful', 'not_helpful'
comment TEXT,
created_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_search_feedback_tenant ON advisoryai.search_feedback (tenant_id, created_at);
CREATE INDEX IF NOT EXISTS idx_search_feedback_entity ON advisoryai.search_feedback (entity_key, signal);

View File

@@ -0,0 +1,45 @@
-- AdvisoryAI Search Feedback and Quality Alerts
-- Sprint: SPRINT_20260224_110_AdvisoryAI_search_feedback_analytics_loop
--
-- Adds:
-- 1. search_feedback table for user result-level feedback (thumbs up/down)
-- 2. search_quality_alerts table for zero-result and low-quality query alerting
--
-- This migration is fully idempotent.
-- 1. search_feedback table
CREATE TABLE IF NOT EXISTS advisoryai.search_feedback (
feedback_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
user_id TEXT,
query TEXT NOT NULL,
entity_key TEXT NOT NULL,
domain TEXT NOT NULL,
position INT NOT NULL,
signal TEXT NOT NULL,
comment TEXT,
created_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_search_feedback_tenant
ON advisoryai.search_feedback (tenant_id, created_at);
CREATE INDEX IF NOT EXISTS idx_search_feedback_entity
ON advisoryai.search_feedback (entity_key, signal);
-- 2. search_quality_alerts table
CREATE TABLE IF NOT EXISTS advisoryai.search_quality_alerts (
alert_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
alert_type TEXT NOT NULL,
query TEXT NOT NULL,
occurrence_count INT NOT NULL,
first_seen TIMESTAMPTZ NOT NULL,
last_seen TIMESTAMPTZ NOT NULL,
status TEXT DEFAULT 'open',
resolution TEXT,
created_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_search_quality_alerts_tenant
ON advisoryai.search_quality_alerts (tenant_id, status, created_at);

View File

@@ -0,0 +1,117 @@
-- 007_multilingual_fts.sql: Multi-language FTS tsvector columns
-- Sprint: SPRINT_20260224_109_AdvisoryAI_multilingual_search_intelligence
--
-- Adds language-specific tsvector columns for German, French, Spanish, and Russian.
-- Each column uses weighted sections matching the English config from 004_fts_english_trgm.sql:
-- A = title (highest relevance)
-- B = section_path (structural context)
-- D = body (full content, lowest weight)
--
-- Languages without built-in PostgreSQL text search configs (bg, uk, zh) use 'simple'
-- via the existing body_tsv column and do not need dedicated columns.
--
-- This migration is fully idempotent.
-- 1. German FTS tsvector column
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1
FROM information_schema.columns
WHERE table_schema = 'advisoryai'
AND table_name = 'kb_chunk'
AND column_name = 'body_tsv_de'
) THEN
ALTER TABLE advisoryai.kb_chunk
ADD COLUMN body_tsv_de TSVECTOR;
END IF;
END
$$;
UPDATE advisoryai.kb_chunk
SET body_tsv_de =
setweight(to_tsvector('german', coalesce(title, '')), 'A') ||
setweight(to_tsvector('german', coalesce(section_path, '')), 'B') ||
setweight(to_tsvector('german', coalesce(body, '')), 'D')
WHERE body_tsv_de IS NULL;
CREATE INDEX IF NOT EXISTS idx_kb_chunk_body_tsv_de
ON advisoryai.kb_chunk USING GIN (body_tsv_de);
-- 2. French FTS tsvector column
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1
FROM information_schema.columns
WHERE table_schema = 'advisoryai'
AND table_name = 'kb_chunk'
AND column_name = 'body_tsv_fr'
) THEN
ALTER TABLE advisoryai.kb_chunk
ADD COLUMN body_tsv_fr TSVECTOR;
END IF;
END
$$;
UPDATE advisoryai.kb_chunk
SET body_tsv_fr =
setweight(to_tsvector('french', coalesce(title, '')), 'A') ||
setweight(to_tsvector('french', coalesce(section_path, '')), 'B') ||
setweight(to_tsvector('french', coalesce(body, '')), 'D')
WHERE body_tsv_fr IS NULL;
CREATE INDEX IF NOT EXISTS idx_kb_chunk_body_tsv_fr
ON advisoryai.kb_chunk USING GIN (body_tsv_fr);
-- 3. Spanish FTS tsvector column
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1
FROM information_schema.columns
WHERE table_schema = 'advisoryai'
AND table_name = 'kb_chunk'
AND column_name = 'body_tsv_es'
) THEN
ALTER TABLE advisoryai.kb_chunk
ADD COLUMN body_tsv_es TSVECTOR;
END IF;
END
$$;
UPDATE advisoryai.kb_chunk
SET body_tsv_es =
setweight(to_tsvector('spanish', coalesce(title, '')), 'A') ||
setweight(to_tsvector('spanish', coalesce(section_path, '')), 'B') ||
setweight(to_tsvector('spanish', coalesce(body, '')), 'D')
WHERE body_tsv_es IS NULL;
CREATE INDEX IF NOT EXISTS idx_kb_chunk_body_tsv_es
ON advisoryai.kb_chunk USING GIN (body_tsv_es);
-- 4. Russian FTS tsvector column
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1
FROM information_schema.columns
WHERE table_schema = 'advisoryai'
AND table_name = 'kb_chunk'
AND column_name = 'body_tsv_ru'
) THEN
ALTER TABLE advisoryai.kb_chunk
ADD COLUMN body_tsv_ru TSVECTOR;
END IF;
END
$$;
UPDATE advisoryai.kb_chunk
SET body_tsv_ru =
setweight(to_tsvector('russian', coalesce(title, '')), 'A') ||
setweight(to_tsvector('russian', coalesce(section_path, '')), 'B') ||
setweight(to_tsvector('russian', coalesce(body, '')), 'D')
WHERE body_tsv_ru IS NULL;
CREATE INDEX IF NOT EXISTS idx_kb_chunk_body_tsv_ru
ON advisoryai.kb_chunk USING GIN (body_tsv_ru);

View File

@@ -0,0 +1,164 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.AdvisoryAI.KnowledgeSearch;
using StellaOps.AdvisoryAI.Vectorization;
using System.Text.Json;
using System.Linq;
namespace StellaOps.AdvisoryAI.UnifiedSearch.Adapters;
internal sealed class FindingIngestionAdapter : ISearchIngestionAdapter
{
private readonly IVectorEncoder _vectorEncoder;
private readonly KnowledgeSearchOptions _options;
private readonly ILogger<FindingIngestionAdapter> _logger;
public FindingIngestionAdapter(
IVectorEncoder vectorEncoder,
IOptions<KnowledgeSearchOptions> options,
ILogger<FindingIngestionAdapter> logger)
{
_vectorEncoder = vectorEncoder ?? throw new ArgumentNullException(nameof(vectorEncoder));
ArgumentNullException.ThrowIfNull(options);
_options = options.Value ?? new KnowledgeSearchOptions();
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string Domain => "findings";
public IReadOnlyList<string> SupportedEntityTypes => ["finding"];
public async Task<IReadOnlyList<UnifiedChunk>> ProduceChunksAsync(CancellationToken cancellationToken)
{
var path = ResolvePath(_options.UnifiedFindingsSnapshotPath);
if (!File.Exists(path))
{
_logger.LogDebug("Unified finding snapshot not found at {Path}. Skipping findings ingestion.", path);
return [];
}
await using var stream = File.OpenRead(path);
using var document = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken).ConfigureAwait(false);
if (document.RootElement.ValueKind != JsonValueKind.Array)
{
_logger.LogWarning("Unified finding snapshot at {Path} is not a JSON array. Skipping findings ingestion.", path);
return [];
}
var chunks = new List<UnifiedChunk>();
foreach (var entry in document.RootElement.EnumerateArray())
{
cancellationToken.ThrowIfCancellationRequested();
if (entry.ValueKind != JsonValueKind.Object)
{
continue;
}
var cveId = ReadString(entry, "cveId");
if (string.IsNullOrWhiteSpace(cveId))
{
continue;
}
var findingId = ReadString(entry, "findingId") ?? cveId;
var severity = ReadString(entry, "severity") ?? "unknown";
var title = ReadString(entry, "title") ?? cveId;
var description = ReadString(entry, "description") ?? string.Empty;
var service = ReadString(entry, "service") ?? "scanner";
var tenant = ReadString(entry, "tenant") ?? "global";
var tags = ReadStringArray(entry, "tags", ["finding", "vulnerability", severity]);
var body = string.IsNullOrWhiteSpace(description)
? $"{title}\nSeverity: {severity}"
: $"{title}\n{description}\nSeverity: {severity}";
var chunkId = KnowledgeSearchText.StableId("chunk", "finding", findingId, cveId);
var docId = KnowledgeSearchText.StableId("doc", "finding", findingId);
var embedding = _vectorEncoder.Encode(body);
var freshness = ReadTimestamp(entry, "freshness");
var metadata = BuildMetadata(cveId, severity, service, tenant, tags);
chunks.Add(new UnifiedChunk(
ChunkId: chunkId,
DocId: docId,
Kind: "finding",
Domain: Domain,
Title: title,
Body: body,
Embedding: embedding,
EntityKey: $"cve:{cveId}",
EntityType: "finding",
Anchor: null,
SectionPath: null,
SpanStart: 0,
SpanEnd: body.Length,
Freshness: freshness,
Metadata: metadata));
}
return chunks;
}
private static JsonDocument BuildMetadata(
string cveId,
string severity,
string service,
string tenant,
IReadOnlyList<string> tags)
{
return JsonDocument.Parse(JsonSerializer.Serialize(new
{
domain = "findings",
cveId,
severity,
service,
tenant,
tags
}));
}
private string ResolvePath(string configuredPath)
{
if (Path.IsPathRooted(configuredPath))
{
return configuredPath;
}
var root = string.IsNullOrWhiteSpace(_options.RepositoryRoot) ? "." : _options.RepositoryRoot;
return Path.GetFullPath(Path.Combine(root, configuredPath));
}
private static string? ReadString(JsonElement obj, string propertyName)
{
return obj.TryGetProperty(propertyName, out var prop) && prop.ValueKind == JsonValueKind.String
? prop.GetString()?.Trim()
: null;
}
private static DateTimeOffset? ReadTimestamp(JsonElement obj, string propertyName)
{
var raw = ReadString(obj, propertyName);
if (raw is null || !DateTimeOffset.TryParse(raw, out var timestamp))
{
return null;
}
return timestamp;
}
private static IReadOnlyList<string> ReadStringArray(JsonElement obj, string propertyName, IReadOnlyList<string> fallback)
{
if (!obj.TryGetProperty(propertyName, out var prop) || prop.ValueKind != JsonValueKind.Array)
{
return fallback;
}
return prop.EnumerateArray()
.Where(static value => value.ValueKind == JsonValueKind.String)
.Select(static value => value.GetString())
.Where(static value => !string.IsNullOrWhiteSpace(value))
.Select(static value => value!.Trim())
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(static value => value, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
}

View File

@@ -0,0 +1,373 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.AdvisoryAI.KnowledgeSearch;
using StellaOps.AdvisoryAI.Vectorization;
using System.Net.Http.Json;
using System.Text.Json;
using System.Linq;
namespace StellaOps.AdvisoryAI.UnifiedSearch.Adapters;
/// <summary>
/// Live data adapter that fetches findings from the Scanner microservice.
/// Falls back to the static snapshot file when the upstream service is unreachable.
/// </summary>
internal sealed class FindingsSearchAdapter : ISearchIngestionAdapter
{
private const string TenantHeader = "X-StellaOps-Tenant";
private const string HttpClientName = "scanner-internal";
private const string FindingsEndpoint = "/api/v1/scanner/security/findings";
private const int MaxPages = 20;
private const int PageSize = 100;
private readonly IHttpClientFactory _httpClientFactory;
private readonly IVectorEncoder _vectorEncoder;
private readonly KnowledgeSearchOptions _options;
private readonly ILogger<FindingsSearchAdapter> _logger;
public FindingsSearchAdapter(
IHttpClientFactory httpClientFactory,
IVectorEncoder vectorEncoder,
IOptions<KnowledgeSearchOptions> options,
ILogger<FindingsSearchAdapter> logger)
{
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
_vectorEncoder = vectorEncoder ?? throw new ArgumentNullException(nameof(vectorEncoder));
ArgumentNullException.ThrowIfNull(options);
_options = options.Value ?? new KnowledgeSearchOptions();
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string Domain => "findings";
public IReadOnlyList<string> SupportedEntityTypes => ["finding"];
public async Task<IReadOnlyList<UnifiedChunk>> ProduceChunksAsync(CancellationToken cancellationToken)
{
if (!_options.FindingsAdapterEnabled)
{
_logger.LogDebug("Findings live adapter is disabled. Skipping.");
return [];
}
try
{
if (!string.IsNullOrWhiteSpace(_options.FindingsAdapterBaseUrl))
{
_logger.LogInformation("Fetching findings from Scanner service at {BaseUrl}.", _options.FindingsAdapterBaseUrl);
var liveChunks = await FetchFromServiceAsync(cancellationToken).ConfigureAwait(false);
if (liveChunks.Count > 0)
{
_logger.LogInformation("Fetched {Count} findings from Scanner service.", liveChunks.Count);
return liveChunks;
}
_logger.LogWarning("Scanner service returned zero findings; falling back to snapshot.");
}
else
{
_logger.LogDebug("FindingsAdapterBaseUrl is not configured; falling back to snapshot.");
}
}
catch (Exception ex) when (ex is HttpRequestException or TaskCanceledException or JsonException)
{
_logger.LogWarning(ex, "Failed to fetch findings from Scanner service; falling back to snapshot.");
}
return await FallbackToSnapshotAsync(cancellationToken).ConfigureAwait(false);
}
private async Task<IReadOnlyList<UnifiedChunk>> FetchFromServiceAsync(CancellationToken cancellationToken)
{
var client = _httpClientFactory.CreateClient(HttpClientName);
if (!string.IsNullOrWhiteSpace(_options.FindingsAdapterBaseUrl))
{
client.BaseAddress = new Uri(_options.FindingsAdapterBaseUrl);
}
var allChunks = new List<UnifiedChunk>();
var page = 0;
while (page < MaxPages)
{
cancellationToken.ThrowIfCancellationRequested();
var requestUrl = $"{FindingsEndpoint}?offset={page * PageSize}&limit={PageSize}";
using var request = new HttpRequestMessage(HttpMethod.Get, requestUrl);
request.Headers.TryAddWithoutValidation(TenantHeader, "global");
using var response = await client.SendAsync(request, cancellationToken).ConfigureAwait(false);
response.EnsureSuccessStatusCode();
using var document = await JsonDocument.ParseAsync(
await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false),
cancellationToken: cancellationToken).ConfigureAwait(false);
var items = ExtractItems(document.RootElement);
if (items.Count == 0)
{
break;
}
foreach (var entry in items)
{
var chunk = MapFindingToChunk(entry);
if (chunk is not null)
{
allChunks.Add(chunk);
}
}
if (items.Count < PageSize)
{
break;
}
page++;
}
return allChunks;
}
private static IReadOnlyList<JsonElement> ExtractItems(JsonElement root)
{
// Support both { "items": [...] } envelope and bare array
if (root.ValueKind == JsonValueKind.Array)
{
return root.EnumerateArray().ToArray();
}
if (root.ValueKind == JsonValueKind.Object && root.TryGetProperty("Items", out var items) && items.ValueKind == JsonValueKind.Array)
{
return items.EnumerateArray().ToArray();
}
if (root.ValueKind == JsonValueKind.Object && root.TryGetProperty("items", out var itemsLower) && itemsLower.ValueKind == JsonValueKind.Array)
{
return itemsLower.EnumerateArray().ToArray();
}
return [];
}
private UnifiedChunk? MapFindingToChunk(JsonElement entry)
{
if (entry.ValueKind != JsonValueKind.Object)
{
return null;
}
var cveId = ReadString(entry, "Cve") ?? ReadString(entry, "cveId") ?? ReadString(entry, "cve");
if (string.IsNullOrWhiteSpace(cveId))
{
return null;
}
var findingId = ReadString(entry, "FindingId") ?? ReadString(entry, "findingId") ?? cveId;
var severity = ReadString(entry, "Severity") ?? ReadString(entry, "severity") ?? "unknown";
var component = ReadString(entry, "Component") ?? ReadString(entry, "component") ?? string.Empty;
var reachability = ReadString(entry, "Reachability") ?? ReadString(entry, "reachability") ?? "unknown";
var environment = ReadString(entry, "Environment") ?? ReadString(entry, "environment") ?? string.Empty;
var description = ReadString(entry, "description") ?? ReadString(entry, "Description") ?? string.Empty;
var sbomFreshness = ReadString(entry, "SbomFreshness") ?? ReadString(entry, "sbomFreshness") ?? string.Empty;
var hybridEvidence = ReadString(entry, "HybridEvidence") ?? ReadString(entry, "hybridEvidence") ?? string.Empty;
var policyBadge = ReadString(entry, "policyBadge") ?? string.Empty;
var product = ReadString(entry, "product") ?? component;
var tenant = ReadString(entry, "tenant") ?? "global";
var tags = ReadStringArray(entry, "tags", ["finding", "vulnerability", severity]);
var title = string.IsNullOrWhiteSpace(component)
? $"{cveId} [{severity}]"
: $"{cveId} - {component} [{severity}]";
var bodyParts = new List<string> { title };
if (!string.IsNullOrWhiteSpace(description))
{
bodyParts.Add(description);
}
if (!string.IsNullOrWhiteSpace(reachability))
{
bodyParts.Add($"Reachability: {reachability}");
}
if (!string.IsNullOrWhiteSpace(environment))
{
bodyParts.Add($"Environment: {environment}");
}
bodyParts.Add($"Severity: {severity}");
var body = string.Join("\n", bodyParts);
var chunkId = KnowledgeSearchText.StableId("chunk", "finding", findingId, cveId);
var docId = KnowledgeSearchText.StableId("doc", "finding", findingId);
var embedding = _vectorEncoder.Encode(body);
var freshness = ReadTimestamp(entry, "freshness");
var metadata = BuildMetadata(cveId, severity, product, reachability, policyBadge, tenant, tags);
return new UnifiedChunk(
ChunkId: chunkId,
DocId: docId,
Kind: "finding",
Domain: Domain,
Title: title,
Body: body,
Embedding: embedding,
EntityKey: $"cve:{cveId}",
EntityType: "finding",
Anchor: null,
SectionPath: null,
SpanStart: 0,
SpanEnd: body.Length,
Freshness: freshness ?? DateTimeOffset.UtcNow,
Metadata: metadata);
}
private async Task<IReadOnlyList<UnifiedChunk>> FallbackToSnapshotAsync(CancellationToken cancellationToken)
{
var path = ResolvePath(_options.UnifiedFindingsSnapshotPath);
if (!File.Exists(path))
{
_logger.LogDebug("Unified finding snapshot not found at {Path}. Returning empty.", path);
return [];
}
await using var stream = File.OpenRead(path);
using var document = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken).ConfigureAwait(false);
if (document.RootElement.ValueKind != JsonValueKind.Array)
{
_logger.LogWarning("Unified finding snapshot at {Path} is not a JSON array.", path);
return [];
}
var chunks = new List<UnifiedChunk>();
foreach (var entry in document.RootElement.EnumerateArray())
{
cancellationToken.ThrowIfCancellationRequested();
var chunk = MapSnapshotEntryToChunk(entry);
if (chunk is not null)
{
chunks.Add(chunk);
}
}
_logger.LogDebug("Loaded {Count} findings from snapshot fallback at {Path}.", chunks.Count, path);
return chunks;
}
private UnifiedChunk? MapSnapshotEntryToChunk(JsonElement entry)
{
if (entry.ValueKind != JsonValueKind.Object)
{
return null;
}
var cveId = ReadString(entry, "cveId");
if (string.IsNullOrWhiteSpace(cveId))
{
return null;
}
var findingId = ReadString(entry, "findingId") ?? cveId;
var severity = ReadString(entry, "severity") ?? "unknown";
var title = ReadString(entry, "title") ?? cveId;
var description = ReadString(entry, "description") ?? string.Empty;
var service = ReadString(entry, "service") ?? "scanner";
var tenant = ReadString(entry, "tenant") ?? "global";
var tags = ReadStringArray(entry, "tags", ["finding", "vulnerability", severity]);
var body = string.IsNullOrWhiteSpace(description)
? $"{title}\nSeverity: {severity}"
: $"{title}\n{description}\nSeverity: {severity}";
var chunkId = KnowledgeSearchText.StableId("chunk", "finding", findingId, cveId);
var docId = KnowledgeSearchText.StableId("doc", "finding", findingId);
var embedding = _vectorEncoder.Encode(body);
var freshness = ReadTimestamp(entry, "freshness");
var metadata = BuildMetadata(cveId, severity, service, "unknown", string.Empty, tenant, tags);
return new UnifiedChunk(
ChunkId: chunkId,
DocId: docId,
Kind: "finding",
Domain: Domain,
Title: title,
Body: body,
Embedding: embedding,
EntityKey: $"cve:{cveId}",
EntityType: "finding",
Anchor: null,
SectionPath: null,
SpanStart: 0,
SpanEnd: body.Length,
Freshness: freshness,
Metadata: metadata);
}
private static JsonDocument BuildMetadata(
string cveId,
string severity,
string product,
string reachability,
string policyBadge,
string tenant,
IReadOnlyList<string> tags)
{
return JsonDocument.Parse(JsonSerializer.Serialize(new
{
domain = "findings",
cveId,
severity,
product,
reachability,
policyBadge,
tenant,
tags
}));
}
private string ResolvePath(string configuredPath)
{
if (Path.IsPathRooted(configuredPath))
{
return configuredPath;
}
var root = string.IsNullOrWhiteSpace(_options.RepositoryRoot) ? "." : _options.RepositoryRoot;
return Path.GetFullPath(Path.Combine(root, configuredPath));
}
private static string? ReadString(JsonElement obj, string propertyName)
{
return obj.TryGetProperty(propertyName, out var prop) && prop.ValueKind == JsonValueKind.String
? prop.GetString()?.Trim()
: null;
}
private static DateTimeOffset? ReadTimestamp(JsonElement obj, string propertyName)
{
var raw = ReadString(obj, propertyName);
if (raw is null || !DateTimeOffset.TryParse(raw, out var timestamp))
{
return null;
}
return timestamp;
}
private static IReadOnlyList<string> ReadStringArray(JsonElement obj, string propertyName, IReadOnlyList<string> fallback)
{
if (!obj.TryGetProperty(propertyName, out var prop) || prop.ValueKind != JsonValueKind.Array)
{
return fallback;
}
return prop.EnumerateArray()
.Where(static value => value.ValueKind == JsonValueKind.String)
.Select(static value => value.GetString())
.Where(static value => !string.IsNullOrWhiteSpace(value))
.Select(static value => value!.Trim())
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(static value => value, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
}

View File

@@ -0,0 +1,107 @@
using StellaOps.AdvisoryAI.KnowledgeSearch;
using StellaOps.AdvisoryAI.Vectorization;
using System.Text.Json;
using System.Linq;
namespace StellaOps.AdvisoryAI.UnifiedSearch.Adapters;
internal sealed class PlatformCatalogIngestionAdapter : ISearchIngestionAdapter
{
private readonly IVectorEncoder _vectorEncoder;
public PlatformCatalogIngestionAdapter(IVectorEncoder vectorEncoder)
{
_vectorEncoder = vectorEncoder ?? throw new ArgumentNullException(nameof(vectorEncoder));
}
public string Domain => "platform";
public IReadOnlyList<string> SupportedEntityTypes => ["platform_entity"];
public Task<IReadOnlyList<UnifiedChunk>> ProduceChunksAsync(CancellationToken cancellationToken)
{
var catalog = new[]
{
new PlatformCatalogEntry(
EntityId: "scan-2025-0001",
EntityType: "scan",
Title: "Scan: api-service",
Summary: "Latest scan for api-service",
Source: "scanner",
Route: "/scans/scan-2025-0001"),
new PlatformCatalogEntry(
EntityId: "policy-ops-baseline",
EntityType: "policy",
Title: "Policy: Ops Baseline",
Summary: "Baseline policy pack",
Source: "policy",
Route: "/policy/policy-ops-baseline"),
new PlatformCatalogEntry(
EntityId: "finding-cve-2025-1001",
EntityType: "finding",
Title: "CVE-2025-1001",
Summary: "Critical finding in payments",
Source: "findings",
Route: "/findings/cve-2025-1001"),
new PlatformCatalogEntry(
EntityId: "pack-offline-kit",
EntityType: "pack",
Title: "Pack: Offline Kit",
Summary: "Offline kit export bundle",
Source: "orchestrator",
Route: "/packs/offline-kit"),
new PlatformCatalogEntry(
EntityId: "tenant-acme",
EntityType: "tenant",
Title: "Tenant: acme",
Summary: "Tenant catalog entry",
Source: "authority",
Route: "/tenants/acme")
};
var chunks = catalog
.Select(entry => CreateChunk(entry))
.ToArray();
return Task.FromResult<IReadOnlyList<UnifiedChunk>>(chunks);
}
private UnifiedChunk CreateChunk(PlatformCatalogEntry entry)
{
var body = $"{entry.Title}\n{entry.Summary}";
var metadata = JsonDocument.Parse(JsonSerializer.Serialize(new
{
domain = "platform",
route = entry.Route,
service = entry.Source,
entityType = entry.EntityType,
tenant = "global",
tags = new[] { "platform", entry.EntityType, entry.Source }
}));
return new UnifiedChunk(
ChunkId: KnowledgeSearchText.StableId("chunk", "platform_entity", entry.EntityId),
DocId: KnowledgeSearchText.StableId("doc", "platform_entity", entry.EntityId),
Kind: "platform_entity",
Domain: Domain,
Title: entry.Title,
Body: body,
Embedding: _vectorEncoder.Encode(body),
EntityKey: $"platform:{entry.EntityId}",
EntityType: "platform_entity",
Anchor: null,
SectionPath: null,
SpanStart: 0,
SpanEnd: body.Length,
Freshness: null,
Metadata: metadata);
}
private sealed record PlatformCatalogEntry(
string EntityId,
string EntityType,
string Title,
string Summary,
string Source,
string Route);
}

View File

@@ -0,0 +1,161 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.AdvisoryAI.KnowledgeSearch;
using StellaOps.AdvisoryAI.Vectorization;
using System.Text.Json;
using System.Linq;
namespace StellaOps.AdvisoryAI.UnifiedSearch.Adapters;
internal sealed class PolicyRuleIngestionAdapter : ISearchIngestionAdapter
{
private readonly IVectorEncoder _vectorEncoder;
private readonly KnowledgeSearchOptions _options;
private readonly ILogger<PolicyRuleIngestionAdapter> _logger;
public PolicyRuleIngestionAdapter(
IVectorEncoder vectorEncoder,
IOptions<KnowledgeSearchOptions> options,
ILogger<PolicyRuleIngestionAdapter> logger)
{
_vectorEncoder = vectorEncoder ?? throw new ArgumentNullException(nameof(vectorEncoder));
ArgumentNullException.ThrowIfNull(options);
_options = options.Value ?? new KnowledgeSearchOptions();
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string Domain => "policy";
public IReadOnlyList<string> SupportedEntityTypes => ["policy_rule"];
public async Task<IReadOnlyList<UnifiedChunk>> ProduceChunksAsync(CancellationToken cancellationToken)
{
var path = ResolvePath(_options.UnifiedPolicySnapshotPath);
if (!File.Exists(path))
{
_logger.LogDebug("Unified policy snapshot not found at {Path}. Skipping policy ingestion.", path);
return [];
}
await using var stream = File.OpenRead(path);
using var document = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken).ConfigureAwait(false);
if (document.RootElement.ValueKind != JsonValueKind.Array)
{
_logger.LogWarning("Unified policy snapshot at {Path} is not a JSON array. Skipping policy ingestion.", path);
return [];
}
var chunks = new List<UnifiedChunk>();
foreach (var entry in document.RootElement.EnumerateArray())
{
cancellationToken.ThrowIfCancellationRequested();
if (entry.ValueKind != JsonValueKind.Object)
{
continue;
}
var ruleId = ReadString(entry, "ruleId");
if (string.IsNullOrWhiteSpace(ruleId))
{
continue;
}
var title = ReadString(entry, "title") ?? ruleId;
var description = ReadString(entry, "description") ?? string.Empty;
var decision = ReadString(entry, "decision");
var service = ReadString(entry, "service") ?? "policy";
var tenant = ReadString(entry, "tenant") ?? "global";
var tags = ReadStringArray(entry, "tags", ["policy", "rule"]);
var body = string.IsNullOrWhiteSpace(decision)
? $"{title}\nRule: {ruleId}\n{description}"
: $"{title}\nRule: {ruleId}\nDecision: {decision}\n{description}";
var chunkId = KnowledgeSearchText.StableId("chunk", "policy_rule", ruleId);
var docId = KnowledgeSearchText.StableId("doc", "policy_rule", ruleId);
var embedding = _vectorEncoder.Encode(body);
var freshness = ReadTimestamp(entry, "freshness");
var metadata = BuildMetadata(ruleId, service, tenant, tags);
chunks.Add(new UnifiedChunk(
ChunkId: chunkId,
DocId: docId,
Kind: "policy_rule",
Domain: Domain,
Title: title,
Body: body,
Embedding: embedding,
EntityKey: $"rule:{ruleId}",
EntityType: "policy_rule",
Anchor: null,
SectionPath: null,
SpanStart: 0,
SpanEnd: body.Length,
Freshness: freshness,
Metadata: metadata));
}
return chunks;
}
private static JsonDocument BuildMetadata(
string ruleId,
string service,
string tenant,
IReadOnlyList<string> tags)
{
return JsonDocument.Parse(JsonSerializer.Serialize(new
{
domain = "policy",
ruleId,
service,
tenant,
tags
}));
}
private string ResolvePath(string configuredPath)
{
if (Path.IsPathRooted(configuredPath))
{
return configuredPath;
}
var root = string.IsNullOrWhiteSpace(_options.RepositoryRoot) ? "." : _options.RepositoryRoot;
return Path.GetFullPath(Path.Combine(root, configuredPath));
}
private static string? ReadString(JsonElement obj, string propertyName)
{
return obj.TryGetProperty(propertyName, out var prop) && prop.ValueKind == JsonValueKind.String
? prop.GetString()?.Trim()
: null;
}
private static DateTimeOffset? ReadTimestamp(JsonElement obj, string propertyName)
{
var raw = ReadString(obj, propertyName);
if (raw is null || !DateTimeOffset.TryParse(raw, out var timestamp))
{
return null;
}
return timestamp;
}
private static IReadOnlyList<string> ReadStringArray(JsonElement obj, string propertyName, IReadOnlyList<string> fallback)
{
if (!obj.TryGetProperty(propertyName, out var prop) || prop.ValueKind != JsonValueKind.Array)
{
return fallback;
}
return prop.EnumerateArray()
.Where(static value => value.ValueKind == JsonValueKind.String)
.Select(static value => value.GetString())
.Where(static value => !string.IsNullOrWhiteSpace(value))
.Select(static value => value!.Trim())
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(static value => value, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
}

View File

@@ -0,0 +1,381 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.AdvisoryAI.KnowledgeSearch;
using StellaOps.AdvisoryAI.Vectorization;
using System.Net.Http.Json;
using System.Text.Json;
using System.Linq;
namespace StellaOps.AdvisoryAI.UnifiedSearch.Adapters;
/// <summary>
/// Live data adapter that fetches policy gate rules from the Policy Gateway service.
/// Falls back to the static snapshot file when the upstream service is unreachable.
/// </summary>
internal sealed class PolicySearchAdapter : ISearchIngestionAdapter
{
private const string TenantHeader = "X-StellaOps-Tenant";
private const string HttpClientName = "policy-internal";
private const string GatesEndpoint = "/api/v1/gates";
private const string DecisionsEndpoint = "/api/v1/gates/decisions";
private readonly IHttpClientFactory _httpClientFactory;
private readonly IVectorEncoder _vectorEncoder;
private readonly KnowledgeSearchOptions _options;
private readonly ILogger<PolicySearchAdapter> _logger;
public PolicySearchAdapter(
IHttpClientFactory httpClientFactory,
IVectorEncoder vectorEncoder,
IOptions<KnowledgeSearchOptions> options,
ILogger<PolicySearchAdapter> logger)
{
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
_vectorEncoder = vectorEncoder ?? throw new ArgumentNullException(nameof(vectorEncoder));
ArgumentNullException.ThrowIfNull(options);
_options = options.Value ?? new KnowledgeSearchOptions();
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string Domain => "policy";
public IReadOnlyList<string> SupportedEntityTypes => ["policy_rule"];
public async Task<IReadOnlyList<UnifiedChunk>> ProduceChunksAsync(CancellationToken cancellationToken)
{
if (!_options.PolicyAdapterEnabled)
{
_logger.LogDebug("Policy live adapter is disabled. Skipping.");
return [];
}
try
{
if (!string.IsNullOrWhiteSpace(_options.PolicyAdapterBaseUrl))
{
_logger.LogInformation("Fetching policy gates from Policy Gateway at {BaseUrl}.", _options.PolicyAdapterBaseUrl);
var liveChunks = await FetchFromServiceAsync(cancellationToken).ConfigureAwait(false);
if (liveChunks.Count > 0)
{
_logger.LogInformation("Fetched {Count} policy rules from Policy Gateway.", liveChunks.Count);
return liveChunks;
}
_logger.LogWarning("Policy Gateway returned zero rules; falling back to snapshot.");
}
else
{
_logger.LogDebug("PolicyAdapterBaseUrl is not configured; falling back to snapshot.");
}
}
catch (Exception ex) when (ex is HttpRequestException or TaskCanceledException or JsonException)
{
_logger.LogWarning(ex, "Failed to fetch policy data from Policy Gateway; falling back to snapshot.");
}
return await FallbackToSnapshotAsync(cancellationToken).ConfigureAwait(false);
}
private async Task<IReadOnlyList<UnifiedChunk>> FetchFromServiceAsync(CancellationToken cancellationToken)
{
var client = _httpClientFactory.CreateClient(HttpClientName);
if (!string.IsNullOrWhiteSpace(_options.PolicyAdapterBaseUrl))
{
client.BaseAddress = new Uri(_options.PolicyAdapterBaseUrl);
}
cancellationToken.ThrowIfCancellationRequested();
// Fetch recent gate decisions to extract policy rule information
var requestUrl = $"{DecisionsEndpoint}?limit=100";
using var request = new HttpRequestMessage(HttpMethod.Get, requestUrl);
request.Headers.TryAddWithoutValidation(TenantHeader, "global");
using var response = await client.SendAsync(request, cancellationToken).ConfigureAwait(false);
response.EnsureSuccessStatusCode();
using var document = await JsonDocument.ParseAsync(
await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false),
cancellationToken: cancellationToken).ConfigureAwait(false);
var items = ExtractDecisions(document.RootElement);
var allChunks = new List<UnifiedChunk>();
foreach (var entry in items)
{
var chunk = MapDecisionToChunk(entry);
if (chunk is not null)
{
allChunks.Add(chunk);
}
}
return allChunks;
}
private static IReadOnlyList<JsonElement> ExtractDecisions(JsonElement root)
{
// Support { "decisions": [...] } envelope (GateDecisionHistoryResponse) and bare array
if (root.ValueKind == JsonValueKind.Array)
{
return root.EnumerateArray().ToArray();
}
if (root.ValueKind == JsonValueKind.Object)
{
if (root.TryGetProperty("decisions", out var decisions) && decisions.ValueKind == JsonValueKind.Array)
{
return decisions.EnumerateArray().ToArray();
}
if (root.TryGetProperty("Decisions", out var decisionsPascal) && decisionsPascal.ValueKind == JsonValueKind.Array)
{
return decisionsPascal.EnumerateArray().ToArray();
}
if (root.TryGetProperty("items", out var items) && items.ValueKind == JsonValueKind.Array)
{
return items.EnumerateArray().ToArray();
}
if (root.TryGetProperty("Items", out var itemsPascal) && itemsPascal.ValueKind == JsonValueKind.Array)
{
return itemsPascal.EnumerateArray().ToArray();
}
}
return [];
}
private UnifiedChunk? MapDecisionToChunk(JsonElement entry)
{
if (entry.ValueKind != JsonValueKind.Object)
{
return null;
}
// Extract rule-like information from gate decisions
var ruleId = ReadString(entry, "policy_bundle_id")
?? ReadString(entry, "PolicyBundleId")
?? ReadString(entry, "ruleId")
?? ReadString(entry, "decision_id");
if (string.IsNullOrWhiteSpace(ruleId))
{
return null;
}
var bomRef = ReadString(entry, "bom_ref") ?? ReadString(entry, "BomRef") ?? string.Empty;
var gateStatus = ReadString(entry, "gate_status") ?? ReadString(entry, "GateStatus") ?? "unknown";
var verdictHash = ReadString(entry, "verdict_hash") ?? ReadString(entry, "VerdictHash") ?? string.Empty;
var policyBundleHash = ReadString(entry, "policy_bundle_hash") ?? ReadString(entry, "PolicyBundleHash") ?? string.Empty;
var actor = ReadString(entry, "actor") ?? ReadString(entry, "Actor") ?? string.Empty;
var ciContext = ReadString(entry, "ci_context") ?? ReadString(entry, "CiContext") ?? string.Empty;
var description = ReadString(entry, "description") ?? string.Empty;
var decision = ReadString(entry, "decision") ?? gateStatus;
var scope = bomRef;
var environment = ReadString(entry, "environment") ?? string.Empty;
var tenant = ReadString(entry, "tenant") ?? "global";
var tags = ReadStringArray(entry, "tags", ["policy", "rule", gateStatus]);
// Map gate status to enforcement level
var enforcement = gateStatus switch
{
"block" => "mandatory",
"warn" => "advisory",
"pass" => "informational",
_ => gateStatus
};
var title = string.IsNullOrWhiteSpace(bomRef)
? $"{ruleId} [{enforcement}]"
: $"{ruleId} - {bomRef} [{enforcement}]";
var bodyParts = new List<string> { title, $"Rule: {ruleId}", $"Enforcement: {enforcement}" };
if (!string.IsNullOrWhiteSpace(description))
{
bodyParts.Add(description);
}
if (!string.IsNullOrWhiteSpace(bomRef))
{
bodyParts.Add($"Scope: {bomRef}");
}
if (!string.IsNullOrWhiteSpace(verdictHash))
{
bodyParts.Add($"Verdict: {verdictHash}");
}
var body = string.Join("\n", bodyParts);
var chunkId = KnowledgeSearchText.StableId("chunk", "policy_rule", ruleId);
var docId = KnowledgeSearchText.StableId("doc", "policy_rule", ruleId);
var embedding = _vectorEncoder.Encode(body);
var freshness = ReadTimestamp(entry, "evaluated_at")
?? ReadTimestamp(entry, "EvaluatedAt")
?? ReadTimestamp(entry, "freshness");
var metadata = BuildMetadata(ruleId, enforcement, scope, environment, tenant, tags);
return new UnifiedChunk(
ChunkId: chunkId,
DocId: docId,
Kind: "policy_rule",
Domain: Domain,
Title: title,
Body: body,
Embedding: embedding,
EntityKey: $"rule:{ruleId}",
EntityType: "policy_rule",
Anchor: null,
SectionPath: null,
SpanStart: 0,
SpanEnd: body.Length,
Freshness: freshness ?? DateTimeOffset.UtcNow,
Metadata: metadata);
}
private async Task<IReadOnlyList<UnifiedChunk>> FallbackToSnapshotAsync(CancellationToken cancellationToken)
{
var path = ResolvePath(_options.UnifiedPolicySnapshotPath);
if (!File.Exists(path))
{
_logger.LogDebug("Unified policy snapshot not found at {Path}. Returning empty.", path);
return [];
}
await using var stream = File.OpenRead(path);
using var document = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken).ConfigureAwait(false);
if (document.RootElement.ValueKind != JsonValueKind.Array)
{
_logger.LogWarning("Unified policy snapshot at {Path} is not a JSON array.", path);
return [];
}
var chunks = new List<UnifiedChunk>();
foreach (var entry in document.RootElement.EnumerateArray())
{
cancellationToken.ThrowIfCancellationRequested();
var chunk = MapSnapshotEntryToChunk(entry);
if (chunk is not null)
{
chunks.Add(chunk);
}
}
_logger.LogDebug("Loaded {Count} policy rules from snapshot fallback at {Path}.", chunks.Count, path);
return chunks;
}
private UnifiedChunk? MapSnapshotEntryToChunk(JsonElement entry)
{
if (entry.ValueKind != JsonValueKind.Object)
{
return null;
}
var ruleId = ReadString(entry, "ruleId");
if (string.IsNullOrWhiteSpace(ruleId))
{
return null;
}
var title = ReadString(entry, "title") ?? ruleId;
var description = ReadString(entry, "description") ?? string.Empty;
var decision = ReadString(entry, "decision");
var service = ReadString(entry, "service") ?? "policy";
var tenant = ReadString(entry, "tenant") ?? "global";
var tags = ReadStringArray(entry, "tags", ["policy", "rule"]);
var body = string.IsNullOrWhiteSpace(decision)
? $"{title}\nRule: {ruleId}\n{description}"
: $"{title}\nRule: {ruleId}\nDecision: {decision}\n{description}";
var chunkId = KnowledgeSearchText.StableId("chunk", "policy_rule", ruleId);
var docId = KnowledgeSearchText.StableId("doc", "policy_rule", ruleId);
var embedding = _vectorEncoder.Encode(body);
var freshness = ReadTimestamp(entry, "freshness");
var metadata = BuildMetadata(ruleId, service, string.Empty, string.Empty, tenant, tags);
return new UnifiedChunk(
ChunkId: chunkId,
DocId: docId,
Kind: "policy_rule",
Domain: Domain,
Title: title,
Body: body,
Embedding: embedding,
EntityKey: $"rule:{ruleId}",
EntityType: "policy_rule",
Anchor: null,
SectionPath: null,
SpanStart: 0,
SpanEnd: body.Length,
Freshness: freshness,
Metadata: metadata);
}
private static JsonDocument BuildMetadata(
string ruleId,
string enforcement,
string scope,
string environment,
string tenant,
IReadOnlyList<string> tags)
{
return JsonDocument.Parse(JsonSerializer.Serialize(new
{
domain = "policy",
ruleId,
enforcement,
scope,
environment,
tenant,
tags
}));
}
private string ResolvePath(string configuredPath)
{
if (Path.IsPathRooted(configuredPath))
{
return configuredPath;
}
var root = string.IsNullOrWhiteSpace(_options.RepositoryRoot) ? "." : _options.RepositoryRoot;
return Path.GetFullPath(Path.Combine(root, configuredPath));
}
private static string? ReadString(JsonElement obj, string propertyName)
{
return obj.TryGetProperty(propertyName, out var prop) && prop.ValueKind == JsonValueKind.String
? prop.GetString()?.Trim()
: null;
}
private static DateTimeOffset? ReadTimestamp(JsonElement obj, string propertyName)
{
var raw = ReadString(obj, propertyName);
if (raw is null || !DateTimeOffset.TryParse(raw, out var timestamp))
{
return null;
}
return timestamp;
}
private static IReadOnlyList<string> ReadStringArray(JsonElement obj, string propertyName, IReadOnlyList<string> fallback)
{
if (!obj.TryGetProperty(propertyName, out var prop) || prop.ValueKind != JsonValueKind.Array)
{
return fallback;
}
return prop.EnumerateArray()
.Where(static value => value.ValueKind == JsonValueKind.String)
.Select(static value => value.GetString())
.Where(static value => !string.IsNullOrWhiteSpace(value))
.Select(static value => value!.Trim())
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(static value => value, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
}

View File

@@ -0,0 +1,385 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.AdvisoryAI.KnowledgeSearch;
using StellaOps.AdvisoryAI.Vectorization;
using System.Net.Http.Json;
using System.Text.Json;
using System.Linq;
namespace StellaOps.AdvisoryAI.UnifiedSearch.Adapters;
/// <summary>
/// Live data adapter that fetches VEX statements from the Concelier canonical advisory service.
/// Falls back to the static snapshot file when the upstream service is unreachable.
/// </summary>
internal sealed class VexSearchAdapter : ISearchIngestionAdapter
{
private const string TenantHeader = "X-StellaOps-Tenant";
private const string HttpClientName = "vex-internal";
private const string CanonicalEndpoint = "/api/v1/canonical";
private const int MaxPages = 20;
private const int PageSize = 50;
private readonly IHttpClientFactory _httpClientFactory;
private readonly IVectorEncoder _vectorEncoder;
private readonly KnowledgeSearchOptions _options;
private readonly ILogger<VexSearchAdapter> _logger;
public VexSearchAdapter(
IHttpClientFactory httpClientFactory,
IVectorEncoder vectorEncoder,
IOptions<KnowledgeSearchOptions> options,
ILogger<VexSearchAdapter> logger)
{
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
_vectorEncoder = vectorEncoder ?? throw new ArgumentNullException(nameof(vectorEncoder));
ArgumentNullException.ThrowIfNull(options);
_options = options.Value ?? new KnowledgeSearchOptions();
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string Domain => "vex";
public IReadOnlyList<string> SupportedEntityTypes => ["vex_statement"];
public async Task<IReadOnlyList<UnifiedChunk>> ProduceChunksAsync(CancellationToken cancellationToken)
{
if (!_options.VexAdapterEnabled)
{
_logger.LogDebug("VEX live adapter is disabled. Skipping.");
return [];
}
try
{
if (!string.IsNullOrWhiteSpace(_options.VexAdapterBaseUrl))
{
_logger.LogInformation("Fetching canonical advisories from Concelier service at {BaseUrl}.", _options.VexAdapterBaseUrl);
var liveChunks = await FetchFromServiceAsync(cancellationToken).ConfigureAwait(false);
if (liveChunks.Count > 0)
{
_logger.LogInformation("Fetched {Count} VEX statements from Concelier service.", liveChunks.Count);
return liveChunks;
}
_logger.LogWarning("Concelier service returned zero advisories; falling back to snapshot.");
}
else
{
_logger.LogDebug("VexAdapterBaseUrl is not configured; falling back to snapshot.");
}
}
catch (Exception ex) when (ex is HttpRequestException or TaskCanceledException or JsonException)
{
_logger.LogWarning(ex, "Failed to fetch VEX data from Concelier service; falling back to snapshot.");
}
return await FallbackToSnapshotAsync(cancellationToken).ConfigureAwait(false);
}
private async Task<IReadOnlyList<UnifiedChunk>> FetchFromServiceAsync(CancellationToken cancellationToken)
{
var client = _httpClientFactory.CreateClient(HttpClientName);
if (!string.IsNullOrWhiteSpace(_options.VexAdapterBaseUrl))
{
client.BaseAddress = new Uri(_options.VexAdapterBaseUrl);
}
var allChunks = new List<UnifiedChunk>();
var offset = 0;
for (var page = 0; page < MaxPages; page++)
{
cancellationToken.ThrowIfCancellationRequested();
var requestUrl = $"{CanonicalEndpoint}?offset={offset}&limit={PageSize}";
using var request = new HttpRequestMessage(HttpMethod.Get, requestUrl);
request.Headers.TryAddWithoutValidation(TenantHeader, "global");
using var response = await client.SendAsync(request, cancellationToken).ConfigureAwait(false);
response.EnsureSuccessStatusCode();
using var document = await JsonDocument.ParseAsync(
await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false),
cancellationToken: cancellationToken).ConfigureAwait(false);
var items = ExtractItems(document.RootElement);
if (items.Count == 0)
{
break;
}
foreach (var entry in items)
{
var chunk = MapAdvisoryToChunk(entry);
if (chunk is not null)
{
allChunks.Add(chunk);
}
}
offset += items.Count;
// Check if we have reached the total
var totalCount = ReadLong(document.RootElement, "TotalCount")
?? ReadLong(document.RootElement, "totalCount");
if (totalCount.HasValue && offset >= totalCount.Value)
{
break;
}
if (items.Count < PageSize)
{
break;
}
}
return allChunks;
}
private static IReadOnlyList<JsonElement> ExtractItems(JsonElement root)
{
// Support { "Items": [...] } envelope (CanonicalAdvisoryListResponse) and bare array
if (root.ValueKind == JsonValueKind.Array)
{
return root.EnumerateArray().ToArray();
}
if (root.ValueKind == JsonValueKind.Object && root.TryGetProperty("Items", out var items) && items.ValueKind == JsonValueKind.Array)
{
return items.EnumerateArray().ToArray();
}
if (root.ValueKind == JsonValueKind.Object && root.TryGetProperty("items", out var itemsLower) && itemsLower.ValueKind == JsonValueKind.Array)
{
return itemsLower.EnumerateArray().ToArray();
}
return [];
}
private UnifiedChunk? MapAdvisoryToChunk(JsonElement entry)
{
if (entry.ValueKind != JsonValueKind.Object)
{
return null;
}
var cveId = ReadString(entry, "Cve") ?? ReadString(entry, "cveId") ?? ReadString(entry, "cve");
var status = ReadString(entry, "Status") ?? ReadString(entry, "status");
if (string.IsNullOrWhiteSpace(cveId) || string.IsNullOrWhiteSpace(status))
{
return null;
}
var statementId = ReadString(entry, "Id") ?? ReadString(entry, "statementId") ?? $"{cveId}:{status}";
var affectsKey = ReadString(entry, "AffectsKey") ?? ReadString(entry, "affectsKey") ?? string.Empty;
var severity = ReadString(entry, "Severity") ?? ReadString(entry, "severity") ?? string.Empty;
var summary = ReadString(entry, "Summary") ?? ReadString(entry, "summary") ?? string.Empty;
var advisoryTitle = ReadString(entry, "Title") ?? ReadString(entry, "title") ?? string.Empty;
var justification = ReadString(entry, "justification") ?? summary;
var product = affectsKey;
var tenant = ReadString(entry, "tenant") ?? "global";
var tags = ReadStringArray(entry, "tags", ["vex", "statement", status]);
var title = string.IsNullOrWhiteSpace(product)
? $"VEX: {cveId} ({status})"
: $"VEX: {cveId} - {product} ({status})";
var bodyParts = new List<string> { title, $"Status: {status}" };
if (!string.IsNullOrWhiteSpace(justification))
{
bodyParts.Add($"Justification: {justification}");
}
if (!string.IsNullOrWhiteSpace(advisoryTitle))
{
bodyParts.Add($"Advisory: {advisoryTitle}");
}
if (!string.IsNullOrWhiteSpace(severity))
{
bodyParts.Add($"Severity: {severity}");
}
var body = string.Join("\n", bodyParts);
var chunkId = KnowledgeSearchText.StableId("chunk", "vex_statement", statementId);
var docId = KnowledgeSearchText.StableId("doc", "vex_statement", cveId);
var embedding = _vectorEncoder.Encode(body);
var freshness = ReadTimestamp(entry, "UpdatedAt") ?? ReadTimestamp(entry, "freshness");
var metadata = BuildMetadata(cveId, status, product, justification, tenant, tags);
return new UnifiedChunk(
ChunkId: chunkId,
DocId: docId,
Kind: "vex_statement",
Domain: Domain,
Title: title,
Body: body,
Embedding: embedding,
EntityKey: $"cve:{cveId}",
EntityType: "vex_statement",
Anchor: null,
SectionPath: null,
SpanStart: 0,
SpanEnd: body.Length,
Freshness: freshness ?? DateTimeOffset.UtcNow,
Metadata: metadata);
}
private async Task<IReadOnlyList<UnifiedChunk>> FallbackToSnapshotAsync(CancellationToken cancellationToken)
{
var path = ResolvePath(_options.UnifiedVexSnapshotPath);
if (!File.Exists(path))
{
_logger.LogDebug("Unified VEX snapshot not found at {Path}. Returning empty.", path);
return [];
}
await using var stream = File.OpenRead(path);
using var document = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken).ConfigureAwait(false);
if (document.RootElement.ValueKind != JsonValueKind.Array)
{
_logger.LogWarning("Unified VEX snapshot at {Path} is not a JSON array.", path);
return [];
}
var chunks = new List<UnifiedChunk>();
foreach (var entry in document.RootElement.EnumerateArray())
{
cancellationToken.ThrowIfCancellationRequested();
var chunk = MapSnapshotEntryToChunk(entry);
if (chunk is not null)
{
chunks.Add(chunk);
}
}
_logger.LogDebug("Loaded {Count} VEX statements from snapshot fallback at {Path}.", chunks.Count, path);
return chunks;
}
private UnifiedChunk? MapSnapshotEntryToChunk(JsonElement entry)
{
if (entry.ValueKind != JsonValueKind.Object)
{
return null;
}
var cveId = ReadString(entry, "cveId");
var status = ReadString(entry, "status");
if (string.IsNullOrWhiteSpace(cveId) || string.IsNullOrWhiteSpace(status))
{
return null;
}
var statementId = ReadString(entry, "statementId") ?? $"{cveId}:{status}";
var justification = ReadString(entry, "justification") ?? string.Empty;
var service = ReadString(entry, "service") ?? "vex-hub";
var tenant = ReadString(entry, "tenant") ?? "global";
var tags = ReadStringArray(entry, "tags", ["vex", "statement", status]);
var title = $"VEX: {cveId} ({status})";
var body = string.IsNullOrWhiteSpace(justification)
? $"{title}\nStatus: {status}"
: $"{title}\nStatus: {status}\nJustification: {justification}";
var chunkId = KnowledgeSearchText.StableId("chunk", "vex_statement", statementId);
var docId = KnowledgeSearchText.StableId("doc", "vex_statement", cveId);
var embedding = _vectorEncoder.Encode(body);
var freshness = ReadTimestamp(entry, "freshness");
var metadata = BuildMetadata(cveId, status, string.Empty, justification, tenant, tags);
return new UnifiedChunk(
ChunkId: chunkId,
DocId: docId,
Kind: "vex_statement",
Domain: Domain,
Title: title,
Body: body,
Embedding: embedding,
EntityKey: $"cve:{cveId}",
EntityType: "vex_statement",
Anchor: null,
SectionPath: null,
SpanStart: 0,
SpanEnd: body.Length,
Freshness: freshness,
Metadata: metadata);
}
private static JsonDocument BuildMetadata(
string cveId,
string status,
string product,
string justification,
string tenant,
IReadOnlyList<string> tags)
{
return JsonDocument.Parse(JsonSerializer.Serialize(new
{
domain = "vex",
cveId,
status,
product,
justification,
tenant,
tags
}));
}
private string ResolvePath(string configuredPath)
{
if (Path.IsPathRooted(configuredPath))
{
return configuredPath;
}
var root = string.IsNullOrWhiteSpace(_options.RepositoryRoot) ? "." : _options.RepositoryRoot;
return Path.GetFullPath(Path.Combine(root, configuredPath));
}
private static string? ReadString(JsonElement obj, string propertyName)
{
return obj.TryGetProperty(propertyName, out var prop) && prop.ValueKind == JsonValueKind.String
? prop.GetString()?.Trim()
: null;
}
private static long? ReadLong(JsonElement obj, string propertyName)
{
if (obj.TryGetProperty(propertyName, out var prop) && prop.ValueKind == JsonValueKind.Number)
{
return prop.GetInt64();
}
return null;
}
private static DateTimeOffset? ReadTimestamp(JsonElement obj, string propertyName)
{
var raw = ReadString(obj, propertyName);
if (raw is null || !DateTimeOffset.TryParse(raw, out var timestamp))
{
return null;
}
return timestamp;
}
private static IReadOnlyList<string> ReadStringArray(JsonElement obj, string propertyName, IReadOnlyList<string> fallback)
{
if (!obj.TryGetProperty(propertyName, out var prop) || prop.ValueKind != JsonValueKind.Array)
{
return fallback;
}
return prop.EnumerateArray()
.Where(static value => value.ValueKind == JsonValueKind.String)
.Select(static value => value.GetString())
.Where(static value => !string.IsNullOrWhiteSpace(value))
.Select(static value => value!.Trim())
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(static value => value, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
}

View File

@@ -0,0 +1,164 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.AdvisoryAI.KnowledgeSearch;
using StellaOps.AdvisoryAI.Vectorization;
using System.Text.Json;
using System.Linq;
namespace StellaOps.AdvisoryAI.UnifiedSearch.Adapters;
internal sealed class VexStatementIngestionAdapter : ISearchIngestionAdapter
{
private readonly IVectorEncoder _vectorEncoder;
private readonly KnowledgeSearchOptions _options;
private readonly ILogger<VexStatementIngestionAdapter> _logger;
public VexStatementIngestionAdapter(
IVectorEncoder vectorEncoder,
IOptions<KnowledgeSearchOptions> options,
ILogger<VexStatementIngestionAdapter> logger)
{
_vectorEncoder = vectorEncoder ?? throw new ArgumentNullException(nameof(vectorEncoder));
ArgumentNullException.ThrowIfNull(options);
_options = options.Value ?? new KnowledgeSearchOptions();
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public string Domain => "vex";
public IReadOnlyList<string> SupportedEntityTypes => ["vex_statement"];
public async Task<IReadOnlyList<UnifiedChunk>> ProduceChunksAsync(CancellationToken cancellationToken)
{
var path = ResolvePath(_options.UnifiedVexSnapshotPath);
if (!File.Exists(path))
{
_logger.LogDebug("Unified VEX snapshot not found at {Path}. Skipping VEX ingestion.", path);
return [];
}
await using var stream = File.OpenRead(path);
using var document = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken).ConfigureAwait(false);
if (document.RootElement.ValueKind != JsonValueKind.Array)
{
_logger.LogWarning("Unified VEX snapshot at {Path} is not a JSON array. Skipping VEX ingestion.", path);
return [];
}
var chunks = new List<UnifiedChunk>();
foreach (var entry in document.RootElement.EnumerateArray())
{
cancellationToken.ThrowIfCancellationRequested();
if (entry.ValueKind != JsonValueKind.Object)
{
continue;
}
var cveId = ReadString(entry, "cveId");
var status = ReadString(entry, "status");
if (string.IsNullOrWhiteSpace(cveId) || string.IsNullOrWhiteSpace(status))
{
continue;
}
var statementId = ReadString(entry, "statementId") ?? $"{cveId}:{status}";
var justification = ReadString(entry, "justification") ?? string.Empty;
var service = ReadString(entry, "service") ?? "vex-hub";
var tenant = ReadString(entry, "tenant") ?? "global";
var tags = ReadStringArray(entry, "tags", ["vex", "statement", status]);
var title = $"VEX: {cveId} ({status})";
var body = string.IsNullOrWhiteSpace(justification)
? $"{title}\nStatus: {status}"
: $"{title}\nStatus: {status}\nJustification: {justification}";
var chunkId = KnowledgeSearchText.StableId("chunk", "vex_statement", statementId);
var docId = KnowledgeSearchText.StableId("doc", "vex_statement", cveId);
var embedding = _vectorEncoder.Encode(body);
var freshness = ReadTimestamp(entry, "freshness");
var metadata = BuildMetadata(cveId, status, service, tenant, tags);
chunks.Add(new UnifiedChunk(
ChunkId: chunkId,
DocId: docId,
Kind: "vex_statement",
Domain: Domain,
Title: title,
Body: body,
Embedding: embedding,
EntityKey: $"cve:{cveId}",
EntityType: "vex_statement",
Anchor: null,
SectionPath: null,
SpanStart: 0,
SpanEnd: body.Length,
Freshness: freshness,
Metadata: metadata));
}
return chunks;
}
private static JsonDocument BuildMetadata(
string cveId,
string status,
string service,
string tenant,
IReadOnlyList<string> tags)
{
return JsonDocument.Parse(JsonSerializer.Serialize(new
{
domain = "vex",
cveId,
status,
service,
tenant,
tags
}));
}
private string ResolvePath(string configuredPath)
{
if (Path.IsPathRooted(configuredPath))
{
return configuredPath;
}
var root = string.IsNullOrWhiteSpace(_options.RepositoryRoot) ? "." : _options.RepositoryRoot;
return Path.GetFullPath(Path.Combine(root, configuredPath));
}
private static string? ReadString(JsonElement obj, string propertyName)
{
return obj.TryGetProperty(propertyName, out var prop) && prop.ValueKind == JsonValueKind.String
? prop.GetString()?.Trim()
: null;
}
private static DateTimeOffset? ReadTimestamp(JsonElement obj, string propertyName)
{
var raw = ReadString(obj, propertyName);
if (raw is null || !DateTimeOffset.TryParse(raw, out var timestamp))
{
return null;
}
return timestamp;
}
private static IReadOnlyList<string> ReadStringArray(JsonElement obj, string propertyName, IReadOnlyList<string> fallback)
{
if (!obj.TryGetProperty(propertyName, out var prop) || prop.ValueKind != JsonValueKind.Array)
{
return fallback;
}
return prop.EnumerateArray()
.Where(static value => value.ValueKind == JsonValueKind.String)
.Select(static value => value.GetString())
.Where(static value => !string.IsNullOrWhiteSpace(value))
.Select(static value => value!.Trim())
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(static value => value, StringComparer.OrdinalIgnoreCase)
.ToArray();
}
}

View File

@@ -0,0 +1,319 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Npgsql;
using StellaOps.AdvisoryAI.KnowledgeSearch;
namespace StellaOps.AdvisoryAI.UnifiedSearch.Analytics;
internal sealed class SearchAnalyticsService
{
private readonly KnowledgeSearchOptions _options;
private readonly ILogger<SearchAnalyticsService> _logger;
public SearchAnalyticsService(
IOptions<KnowledgeSearchOptions> options,
ILogger<SearchAnalyticsService> logger)
{
_options = options.Value;
_logger = logger;
}
public async Task RecordEventAsync(SearchAnalyticsEvent evt, CancellationToken ct = default)
{
if (string.IsNullOrWhiteSpace(_options.ConnectionString)) return;
try
{
await using var conn = new NpgsqlConnection(_options.ConnectionString);
await conn.OpenAsync(ct).ConfigureAwait(false);
await using var cmd = new NpgsqlCommand(@"
INSERT INTO advisoryai.search_events (tenant_id, user_id, event_type, query, entity_key, domain, result_count, position, duration_ms)
VALUES (@tenant_id, @user_id, @event_type, @query, @entity_key, @domain, @result_count, @position, @duration_ms)", conn);
cmd.Parameters.AddWithValue("tenant_id", evt.TenantId);
cmd.Parameters.AddWithValue("user_id", (object?)evt.UserId ?? DBNull.Value);
cmd.Parameters.AddWithValue("event_type", evt.EventType);
cmd.Parameters.AddWithValue("query", evt.Query);
cmd.Parameters.AddWithValue("entity_key", (object?)evt.EntityKey ?? DBNull.Value);
cmd.Parameters.AddWithValue("domain", (object?)evt.Domain ?? DBNull.Value);
cmd.Parameters.AddWithValue("result_count", (object?)evt.ResultCount ?? DBNull.Value);
cmd.Parameters.AddWithValue("position", (object?)evt.Position ?? DBNull.Value);
cmd.Parameters.AddWithValue("duration_ms", (object?)evt.DurationMs ?? DBNull.Value);
await cmd.ExecuteNonQueryAsync(ct).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to record search analytics event");
}
}
public async Task RecordEventsAsync(IReadOnlyList<SearchAnalyticsEvent> events, CancellationToken ct = default)
{
if (string.IsNullOrWhiteSpace(_options.ConnectionString) || events.Count == 0) return;
try
{
await using var conn = new NpgsqlConnection(_options.ConnectionString);
await conn.OpenAsync(ct).ConfigureAwait(false);
foreach (var evt in events)
{
await using var cmd = new NpgsqlCommand(@"
INSERT INTO advisoryai.search_events (tenant_id, user_id, event_type, query, entity_key, domain, result_count, position, duration_ms)
VALUES (@tenant_id, @user_id, @event_type, @query, @entity_key, @domain, @result_count, @position, @duration_ms)", conn);
cmd.Parameters.AddWithValue("tenant_id", evt.TenantId);
cmd.Parameters.AddWithValue("user_id", (object?)evt.UserId ?? DBNull.Value);
cmd.Parameters.AddWithValue("event_type", evt.EventType);
cmd.Parameters.AddWithValue("query", evt.Query);
cmd.Parameters.AddWithValue("entity_key", (object?)evt.EntityKey ?? DBNull.Value);
cmd.Parameters.AddWithValue("domain", (object?)evt.Domain ?? DBNull.Value);
cmd.Parameters.AddWithValue("result_count", (object?)evt.ResultCount ?? DBNull.Value);
cmd.Parameters.AddWithValue("position", (object?)evt.Position ?? DBNull.Value);
cmd.Parameters.AddWithValue("duration_ms", (object?)evt.DurationMs ?? DBNull.Value);
await cmd.ExecuteNonQueryAsync(ct).ConfigureAwait(false);
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to record search analytics events batch ({Count} events)", events.Count);
}
}
public async Task<IReadOnlyDictionary<string, int>> GetPopularityMapAsync(string tenantId, int days = 30, CancellationToken ct = default)
{
var map = new Dictionary<string, int>(StringComparer.Ordinal);
if (string.IsNullOrWhiteSpace(_options.ConnectionString)) return map;
try
{
await using var conn = new NpgsqlConnection(_options.ConnectionString);
await conn.OpenAsync(ct).ConfigureAwait(false);
await using var cmd = new NpgsqlCommand(@"
SELECT entity_key, COUNT(*) as click_count
FROM advisoryai.search_events
WHERE event_type = 'click'
AND tenant_id = @tenant
AND created_at > now() - make_interval(days => @days)
AND entity_key IS NOT NULL
GROUP BY entity_key
ORDER BY click_count DESC
LIMIT 1000", conn);
cmd.Parameters.AddWithValue("tenant", tenantId);
cmd.Parameters.AddWithValue("days", days);
await using var reader = await cmd.ExecuteReaderAsync(ct).ConfigureAwait(false);
while (await reader.ReadAsync(ct).ConfigureAwait(false))
{
map[reader.GetString(0)] = (int)reader.GetInt64(1);
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to load popularity map");
}
return map;
}
public async Task RecordHistoryAsync(string tenantId, string userId, string query, int resultCount, CancellationToken ct = default)
{
if (string.IsNullOrWhiteSpace(_options.ConnectionString)) return;
try
{
await using var conn = new NpgsqlConnection(_options.ConnectionString);
await conn.OpenAsync(ct).ConfigureAwait(false);
await using var cmd = new NpgsqlCommand(@"
INSERT INTO advisoryai.search_history (tenant_id, user_id, query, result_count)
VALUES (@tenant_id, @user_id, @query, @result_count)
ON CONFLICT (tenant_id, user_id, query) DO UPDATE SET
searched_at = now(),
result_count = @result_count", conn);
cmd.Parameters.AddWithValue("tenant_id", tenantId);
cmd.Parameters.AddWithValue("user_id", userId);
cmd.Parameters.AddWithValue("query", query);
cmd.Parameters.AddWithValue("result_count", resultCount);
await cmd.ExecuteNonQueryAsync(ct).ConfigureAwait(false);
// Trim to max 50 entries per user
await using var trimCmd = new NpgsqlCommand(@"
DELETE FROM advisoryai.search_history
WHERE history_id IN (
SELECT history_id FROM advisoryai.search_history
WHERE tenant_id = @tenant_id AND user_id = @user_id
ORDER BY searched_at DESC
OFFSET 50
)", conn);
trimCmd.Parameters.AddWithValue("tenant_id", tenantId);
trimCmd.Parameters.AddWithValue("user_id", userId);
await trimCmd.ExecuteNonQueryAsync(ct).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to record search history");
}
}
public async Task<IReadOnlyList<SearchHistoryEntry>> GetHistoryAsync(string tenantId, string userId, int limit = 50, CancellationToken ct = default)
{
var entries = new List<SearchHistoryEntry>();
if (string.IsNullOrWhiteSpace(_options.ConnectionString)) return entries;
try
{
await using var conn = new NpgsqlConnection(_options.ConnectionString);
await conn.OpenAsync(ct).ConfigureAwait(false);
await using var cmd = new NpgsqlCommand(@"
SELECT history_id, query, result_count, searched_at
FROM advisoryai.search_history
WHERE tenant_id = @tenant_id AND user_id = @user_id
ORDER BY searched_at DESC
LIMIT @limit", conn);
cmd.Parameters.AddWithValue("tenant_id", tenantId);
cmd.Parameters.AddWithValue("user_id", userId);
cmd.Parameters.AddWithValue("limit", limit);
await using var reader = await cmd.ExecuteReaderAsync(ct).ConfigureAwait(false);
while (await reader.ReadAsync(ct).ConfigureAwait(false))
{
entries.Add(new SearchHistoryEntry(
reader.GetGuid(0).ToString(),
reader.GetString(1),
reader.IsDBNull(2) ? null : reader.GetInt32(2),
reader.GetDateTime(3)));
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to load search history");
}
return entries;
}
public async Task ClearHistoryAsync(string tenantId, string userId, CancellationToken ct = default)
{
if (string.IsNullOrWhiteSpace(_options.ConnectionString)) return;
try
{
await using var conn = new NpgsqlConnection(_options.ConnectionString);
await conn.OpenAsync(ct).ConfigureAwait(false);
await using var cmd = new NpgsqlCommand(@"
DELETE FROM advisoryai.search_history
WHERE tenant_id = @tenant_id AND user_id = @user_id", conn);
cmd.Parameters.AddWithValue("tenant_id", tenantId);
cmd.Parameters.AddWithValue("user_id", userId);
await cmd.ExecuteNonQueryAsync(ct).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to clear search history");
}
}
/// <summary>
/// Finds successful queries (result_count > 0) similar to the given query using
/// PostgreSQL pg_trgm similarity(). Returns up to <paramref name="limit"/> matches
/// ordered by similarity descending.
/// Sprint: G10-004
/// </summary>
public async Task<IReadOnlyList<string>> FindSimilarSuccessfulQueriesAsync(
string tenantId, string query, int limit = 3, CancellationToken ct = default)
{
var results = new List<string>();
if (string.IsNullOrWhiteSpace(_options.ConnectionString) || string.IsNullOrWhiteSpace(query))
return results;
try
{
await using var conn = new NpgsqlConnection(_options.ConnectionString);
await conn.OpenAsync(ct).ConfigureAwait(false);
await using var cmd = new NpgsqlCommand(@"
SELECT DISTINCT query
FROM advisoryai.search_history
WHERE tenant_id = @tenant_id
AND result_count > 0
AND lower(query) <> lower(@query)
AND similarity(query, @query) > 0.2
ORDER BY similarity(query, @query) DESC
LIMIT @limit", conn);
cmd.CommandTimeout = 5;
cmd.Parameters.AddWithValue("tenant_id", tenantId);
cmd.Parameters.AddWithValue("query", query);
cmd.Parameters.AddWithValue("limit", limit);
await using var reader = await cmd.ExecuteReaderAsync(ct).ConfigureAwait(false);
while (await reader.ReadAsync(ct).ConfigureAwait(false))
{
results.Add(reader.GetString(0));
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to find similar successful queries for '{Query}'", query);
}
return results;
}
public async Task DeleteHistoryEntryAsync(string tenantId, string userId, string historyId, CancellationToken ct = default)
{
if (string.IsNullOrWhiteSpace(_options.ConnectionString)) return;
if (!Guid.TryParse(historyId, out _)) return;
try
{
await using var conn = new NpgsqlConnection(_options.ConnectionString);
await conn.OpenAsync(ct).ConfigureAwait(false);
await using var cmd = new NpgsqlCommand(@"
DELETE FROM advisoryai.search_history
WHERE tenant_id = @tenant_id AND user_id = @user_id AND history_id = @history_id", conn);
cmd.Parameters.AddWithValue("tenant_id", tenantId);
cmd.Parameters.AddWithValue("user_id", userId);
cmd.Parameters.AddWithValue("history_id", Guid.Parse(historyId));
await cmd.ExecuteNonQueryAsync(ct).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to delete search history entry");
}
}
}
internal record SearchAnalyticsEvent(
string TenantId,
string EventType,
string Query,
string? UserId = null,
string? EntityKey = null,
string? Domain = null,
int? ResultCount = null,
int? Position = null,
int? DurationMs = null);
internal record SearchHistoryEntry(
string HistoryId,
string Query,
int? ResultCount,
DateTime SearchedAt);

View File

@@ -0,0 +1,298 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Npgsql;
using StellaOps.AdvisoryAI.KnowledgeSearch;
namespace StellaOps.AdvisoryAI.UnifiedSearch.Analytics;
/// <summary>
/// Monitors search quality by analysing feedback data and zero-result queries.
/// Provides CRUD for search_quality_alerts and search_feedback tables.
/// Sprint: SPRINT_20260224_110 (G10-001, G10-002)
/// </summary>
internal sealed class SearchQualityMonitor
{
private static readonly HashSet<string> AllowedSignals = new(StringComparer.Ordinal) { "helpful", "not_helpful" };
private static readonly HashSet<string> AllowedAlertStatuses = new(StringComparer.Ordinal) { "acknowledged", "resolved" };
private readonly KnowledgeSearchOptions _options;
private readonly ILogger<SearchQualityMonitor> _logger;
public SearchQualityMonitor(
IOptions<KnowledgeSearchOptions> options,
ILogger<SearchQualityMonitor> logger)
{
_options = options.Value;
_logger = logger;
}
// ----- Feedback CRUD -----
public async Task StoreFeedbackAsync(SearchFeedbackEntry entry, CancellationToken ct = default)
{
if (string.IsNullOrWhiteSpace(_options.ConnectionString)) return;
try
{
await using var conn = new NpgsqlConnection(_options.ConnectionString);
await conn.OpenAsync(ct).ConfigureAwait(false);
await using var cmd = new NpgsqlCommand(@"
INSERT INTO advisoryai.search_feedback
(tenant_id, user_id, query, entity_key, domain, position, signal, comment)
VALUES
(@tenant_id, @user_id, @query, @entity_key, @domain, @position, @signal, @comment)", conn);
cmd.Parameters.AddWithValue("tenant_id", entry.TenantId);
cmd.Parameters.AddWithValue("user_id", (object?)entry.UserId ?? DBNull.Value);
cmd.Parameters.AddWithValue("query", entry.Query);
cmd.Parameters.AddWithValue("entity_key", entry.EntityKey);
cmd.Parameters.AddWithValue("domain", entry.Domain);
cmd.Parameters.AddWithValue("position", entry.Position);
cmd.Parameters.AddWithValue("signal", entry.Signal);
cmd.Parameters.AddWithValue("comment", (object?)entry.Comment ?? DBNull.Value);
await cmd.ExecuteNonQueryAsync(ct).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to store search feedback");
}
}
// ----- Quality Alerts -----
public async Task<IReadOnlyList<SearchQualityAlertEntry>> GetAlertsAsync(
string tenantId,
string? status = null,
string? alertType = null,
int limit = 100,
CancellationToken ct = default)
{
var alerts = new List<SearchQualityAlertEntry>();
if (string.IsNullOrWhiteSpace(_options.ConnectionString)) return alerts;
try
{
await using var conn = new NpgsqlConnection(_options.ConnectionString);
await conn.OpenAsync(ct).ConfigureAwait(false);
var sql = @"
SELECT alert_id, tenant_id, alert_type, query, occurrence_count,
first_seen, last_seen, status, resolution, created_at
FROM advisoryai.search_quality_alerts
WHERE tenant_id = @tenant_id";
if (!string.IsNullOrWhiteSpace(status))
sql += " AND status = @status";
if (!string.IsNullOrWhiteSpace(alertType))
sql += " AND alert_type = @alert_type";
sql += " ORDER BY occurrence_count DESC, last_seen DESC LIMIT @limit";
await using var cmd = new NpgsqlCommand(sql, conn);
cmd.Parameters.AddWithValue("tenant_id", tenantId);
cmd.Parameters.AddWithValue("limit", limit);
if (!string.IsNullOrWhiteSpace(status))
cmd.Parameters.AddWithValue("status", status);
if (!string.IsNullOrWhiteSpace(alertType))
cmd.Parameters.AddWithValue("alert_type", alertType);
await using var reader = await cmd.ExecuteReaderAsync(ct).ConfigureAwait(false);
while (await reader.ReadAsync(ct).ConfigureAwait(false))
{
alerts.Add(new SearchQualityAlertEntry
{
AlertId = reader.GetGuid(0).ToString(),
TenantId = reader.GetString(1),
AlertType = reader.GetString(2),
Query = reader.GetString(3),
OccurrenceCount = reader.GetInt32(4),
FirstSeen = reader.GetDateTime(5),
LastSeen = reader.GetDateTime(6),
Status = reader.GetString(7),
Resolution = reader.IsDBNull(8) ? null : reader.GetString(8),
CreatedAt = reader.GetDateTime(9),
});
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to load search quality alerts");
}
return alerts;
}
public async Task<SearchQualityAlertEntry?> UpdateAlertAsync(
string tenantId,
string alertId,
string status,
string? resolution,
CancellationToken ct = default)
{
if (string.IsNullOrWhiteSpace(_options.ConnectionString)) return null;
if (!Guid.TryParse(alertId, out var parsedAlertId)) return null;
if (!AllowedAlertStatuses.Contains(status)) return null;
try
{
await using var conn = new NpgsqlConnection(_options.ConnectionString);
await conn.OpenAsync(ct).ConfigureAwait(false);
await using var cmd = new NpgsqlCommand(@"
UPDATE advisoryai.search_quality_alerts
SET status = @status, resolution = @resolution
WHERE alert_id = @alert_id AND tenant_id = @tenant_id
RETURNING alert_id, tenant_id, alert_type, query, occurrence_count,
first_seen, last_seen, status, resolution, created_at", conn);
cmd.Parameters.AddWithValue("alert_id", parsedAlertId);
cmd.Parameters.AddWithValue("tenant_id", tenantId);
cmd.Parameters.AddWithValue("status", status);
cmd.Parameters.AddWithValue("resolution", (object?)resolution ?? DBNull.Value);
await using var reader = await cmd.ExecuteReaderAsync(ct).ConfigureAwait(false);
if (await reader.ReadAsync(ct).ConfigureAwait(false))
{
return new SearchQualityAlertEntry
{
AlertId = reader.GetGuid(0).ToString(),
TenantId = reader.GetString(1),
AlertType = reader.GetString(2),
Query = reader.GetString(3),
OccurrenceCount = reader.GetInt32(4),
FirstSeen = reader.GetDateTime(5),
LastSeen = reader.GetDateTime(6),
Status = reader.GetString(7),
Resolution = reader.IsDBNull(8) ? null : reader.GetString(8),
CreatedAt = reader.GetDateTime(9),
};
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to update search quality alert {AlertId}", alertId);
}
return null;
}
// ----- Quality Metrics -----
public async Task<SearchQualityMetricsEntry> GetMetricsAsync(
string tenantId,
string period = "7d",
CancellationToken ct = default)
{
var metrics = new SearchQualityMetricsEntry { Period = period };
if (string.IsNullOrWhiteSpace(_options.ConnectionString)) return metrics;
var days = period switch
{
"24h" => 1,
"30d" => 30,
_ => 7,
};
try
{
await using var conn = new NpgsqlConnection(_options.ConnectionString);
await conn.OpenAsync(ct).ConfigureAwait(false);
// Total searches and zero-result rate from search_events
await using var searchCmd = new NpgsqlCommand(@"
SELECT
COUNT(*) AS total_searches,
COALESCE(AVG(CASE WHEN result_count = 0 THEN 1.0 ELSE 0.0 END), 0) AS zero_result_rate,
COALESCE(AVG(result_count), 0) AS avg_result_count
FROM advisoryai.search_events
WHERE event_type = 'search'
AND tenant_id = @tenant_id
AND created_at > now() - make_interval(days => @days)", conn);
searchCmd.Parameters.AddWithValue("tenant_id", tenantId);
searchCmd.Parameters.AddWithValue("days", days);
await using var searchReader = await searchCmd.ExecuteReaderAsync(ct).ConfigureAwait(false);
if (await searchReader.ReadAsync(ct).ConfigureAwait(false))
{
metrics.TotalSearches = (int)searchReader.GetInt64(0);
metrics.ZeroResultRate = Math.Round(searchReader.GetDouble(1) * 100, 1);
metrics.AvgResultCount = Math.Round(searchReader.GetDouble(2), 1);
}
await searchReader.CloseAsync().ConfigureAwait(false);
// Feedback score from search_feedback
await using var feedbackCmd = new NpgsqlCommand(@"
SELECT
COALESCE(AVG(CASE WHEN signal = 'helpful' THEN 1.0 ELSE 0.0 END), 0) AS feedback_score
FROM advisoryai.search_feedback
WHERE tenant_id = @tenant_id
AND created_at > now() - make_interval(days => @days)", conn);
feedbackCmd.Parameters.AddWithValue("tenant_id", tenantId);
feedbackCmd.Parameters.AddWithValue("days", days);
await using var feedbackReader = await feedbackCmd.ExecuteReaderAsync(ct).ConfigureAwait(false);
if (await feedbackReader.ReadAsync(ct).ConfigureAwait(false))
{
metrics.FeedbackScore = Math.Round(feedbackReader.GetDouble(0) * 100, 1);
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to load search quality metrics");
}
return metrics;
}
// ----- Validation helpers -----
public static bool IsValidSignal(string? signal)
{
return !string.IsNullOrWhiteSpace(signal) && AllowedSignals.Contains(signal);
}
public static bool IsValidAlertStatus(string? status)
{
return !string.IsNullOrWhiteSpace(status) && AllowedAlertStatuses.Contains(status);
}
}
internal sealed record SearchFeedbackEntry
{
public required string TenantId { get; init; }
public string? UserId { get; init; }
public required string Query { get; init; }
public required string EntityKey { get; init; }
public required string Domain { get; init; }
public required int Position { get; init; }
public required string Signal { get; init; }
public string? Comment { get; init; }
}
internal sealed class SearchQualityAlertEntry
{
public string AlertId { get; init; } = string.Empty;
public string TenantId { get; init; } = string.Empty;
public string AlertType { get; init; } = string.Empty;
public string Query { get; init; } = string.Empty;
public int OccurrenceCount { get; init; }
public DateTime FirstSeen { get; init; }
public DateTime LastSeen { get; init; }
public string Status { get; init; } = "open";
public string? Resolution { get; init; }
public DateTime CreatedAt { get; init; }
}
internal sealed class SearchQualityMetricsEntry
{
public int TotalSearches { get; set; }
public double ZeroResultRate { get; set; }
public double AvgResultCount { get; set; }
public double FeedbackScore { get; set; }
public string Period { get; set; } = "7d";
}

View File

@@ -0,0 +1,94 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Npgsql;
using StellaOps.AdvisoryAI.KnowledgeSearch;
namespace StellaOps.AdvisoryAI.UnifiedSearch;
internal sealed class EntityAliasService : IEntityAliasService
{
private readonly KnowledgeSearchOptions _options;
private readonly ILogger<EntityAliasService> _logger;
private readonly Lazy<NpgsqlDataSource?> _dataSource;
public EntityAliasService(
IOptions<KnowledgeSearchOptions> options,
ILogger<EntityAliasService> logger)
{
ArgumentNullException.ThrowIfNull(options);
_options = options.Value ?? new KnowledgeSearchOptions();
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_dataSource = new Lazy<NpgsqlDataSource?>(() =>
{
if (!_options.Enabled || string.IsNullOrWhiteSpace(_options.ConnectionString))
{
return null;
}
return new NpgsqlDataSourceBuilder(_options.ConnectionString).Build();
}, isThreadSafe: true);
}
public async Task<IReadOnlyList<(string EntityKey, string EntityType)>> ResolveAliasesAsync(
string alias,
CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(alias) || _dataSource.Value is null)
{
return [];
}
const string sql = """
SELECT entity_key, entity_type
FROM advisoryai.entity_alias
WHERE lower(alias) = lower(@alias)
ORDER BY entity_key, entity_type;
""";
await using var command = _dataSource.Value.CreateCommand(sql);
command.CommandTimeout = 10;
command.Parameters.AddWithValue("alias", alias.Trim());
var results = new List<(string, string)>();
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
results.Add((reader.GetString(0), reader.GetString(1)));
}
return results;
}
public async Task RegisterAliasAsync(
string entityKey,
string entityType,
string alias,
string source,
CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(entityKey) ||
string.IsNullOrWhiteSpace(entityType) ||
string.IsNullOrWhiteSpace(alias) ||
_dataSource.Value is null)
{
return;
}
const string sql = """
INSERT INTO advisoryai.entity_alias (alias, entity_key, entity_type, source, created_at)
VALUES (@alias, @entity_key, @entity_type, @source, NOW())
ON CONFLICT (alias, entity_key) DO UPDATE SET
entity_type = EXCLUDED.entity_type,
source = EXCLUDED.source;
""";
await using var command = _dataSource.Value.CreateCommand(sql);
command.CommandTimeout = 10;
command.Parameters.AddWithValue("alias", alias.Trim());
command.Parameters.AddWithValue("entity_key", entityKey.Trim());
command.Parameters.AddWithValue("entity_type", entityType.Trim());
command.Parameters.AddWithValue("source", source.Trim());
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
}

View File

@@ -0,0 +1,15 @@
namespace StellaOps.AdvisoryAI.UnifiedSearch;
public interface IEntityAliasService
{
Task<IReadOnlyList<(string EntityKey, string EntityType)>> ResolveAliasesAsync(
string alias,
CancellationToken cancellationToken);
Task RegisterAliasAsync(
string entityKey,
string entityType,
string alias,
string source,
CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,10 @@
namespace StellaOps.AdvisoryAI.UnifiedSearch;
public interface ISearchIngestionAdapter
{
string Domain { get; }
IReadOnlyList<string> SupportedEntityTypes { get; }
Task<IReadOnlyList<UnifiedChunk>> ProduceChunksAsync(CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,8 @@
namespace StellaOps.AdvisoryAI.UnifiedSearch;
public interface IUnifiedSearchIndexer
{
Task IndexAllAsync(CancellationToken cancellationToken);
Task<UnifiedSearchIndexSummary> RebuildAllAsync(CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,6 @@
namespace StellaOps.AdvisoryAI.UnifiedSearch;
public interface IUnifiedSearchService
{
Task<UnifiedSearchResponse> SearchAsync(UnifiedSearchRequest request, CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,137 @@
using Microsoft.Extensions.Options;
using StellaOps.AdvisoryAI.KnowledgeSearch;
namespace StellaOps.AdvisoryAI.UnifiedSearch.QueryUnderstanding;
internal sealed class DomainWeightCalculator
{
private const double BaseWeight = 1.0;
private const double CveBoostFindings = 0.35;
private const double CveBoostVex = 0.30;
private const double CveBoostGraph = 0.25;
private const double SecurityBoostFindings = 0.20;
private const double SecurityBoostVex = 0.15;
private const double PolicyBoostPolicy = 0.30;
private const double TroubleshootBoostKnowledge = 0.15;
private const double TroubleshootBoostOpsMemory = 0.10;
// Role-based bias constants (Sprint 106 / G6)
private const double RoleScannerFindingsBoost = 0.15;
private const double RoleScannerVexBoost = 0.10;
private const double RolePolicyBoost = 0.20;
private const double RoleOpsKnowledgeBoost = 0.15;
private const double RoleOpsMemoryBoost = 0.10;
private const double RoleReleasePolicyBoost = 0.10;
private const double RoleReleaseFindingsBoost = 0.10;
private readonly EntityExtractor _entityExtractor;
private readonly IntentClassifier _intentClassifier;
private readonly KnowledgeSearchOptions _options;
public DomainWeightCalculator(
EntityExtractor entityExtractor,
IntentClassifier intentClassifier,
IOptions<KnowledgeSearchOptions> options)
{
_entityExtractor = entityExtractor ?? throw new ArgumentNullException(nameof(entityExtractor));
_intentClassifier = intentClassifier ?? throw new ArgumentNullException(nameof(intentClassifier));
_options = options?.Value ?? new KnowledgeSearchOptions();
}
public IReadOnlyDictionary<string, double> ComputeWeights(
string query,
IReadOnlyList<EntityMention> entities,
UnifiedSearchFilter? filters)
{
var weights = new Dictionary<string, double>(StringComparer.Ordinal)
{
["knowledge"] = BaseWeight,
["findings"] = BaseWeight,
["vex"] = BaseWeight,
["policy"] = BaseWeight,
["graph"] = BaseWeight,
["ops_memory"] = BaseWeight,
["timeline"] = BaseWeight
};
var hasCve = entities.Any(static e =>
e.EntityType.Equals("cve", StringComparison.OrdinalIgnoreCase) ||
e.EntityType.Equals("ghsa", StringComparison.OrdinalIgnoreCase));
if (hasCve)
{
weights["findings"] += CveBoostFindings;
weights["vex"] += CveBoostVex;
weights["graph"] += CveBoostGraph;
}
if (_intentClassifier.HasSecurityIntent(query))
{
weights["findings"] += SecurityBoostFindings;
weights["vex"] += SecurityBoostVex;
}
if (_intentClassifier.HasPolicyIntent(query))
{
weights["policy"] += PolicyBoostPolicy;
}
var intent = _intentClassifier.Classify(query);
if (intent == "troubleshoot")
{
weights["knowledge"] += TroubleshootBoostKnowledge;
weights["ops_memory"] += TroubleshootBoostOpsMemory;
}
if (filters?.Domains is { Count: > 0 })
{
foreach (var domain in filters.Domains)
{
if (weights.ContainsKey(domain))
{
weights[domain] += 0.25;
}
}
}
// Role-based domain bias (Sprint 106 / G6)
if (_options.RoleBasedBiasEnabled && filters?.UserScopes is { Count: > 0 })
{
ApplyRoleBasedBias(weights, filters.UserScopes);
}
return weights;
}
private static void ApplyRoleBasedBias(Dictionary<string, double> weights, IReadOnlyList<string> scopes)
{
var scopeSet = new HashSet<string>(scopes, StringComparer.OrdinalIgnoreCase);
// scanner:read or findings:read -> boost findings + vex
if (scopeSet.Contains("scanner:read") || scopeSet.Contains("findings:read"))
{
weights["findings"] += RoleScannerFindingsBoost;
weights["vex"] += RoleScannerVexBoost;
}
// policy:read or policy:write -> boost policy
if (scopeSet.Contains("policy:read") || scopeSet.Contains("policy:write"))
{
weights["policy"] += RolePolicyBoost;
}
// ops:read or doctor:run -> boost knowledge + ops_memory
if (scopeSet.Contains("ops:read") || scopeSet.Contains("doctor:run"))
{
weights["knowledge"] += RoleOpsKnowledgeBoost;
weights["ops_memory"] += RoleOpsMemoryBoost;
}
// release:approve -> boost policy + findings
if (scopeSet.Contains("release:approve"))
{
weights["policy"] += RoleReleasePolicyBoost;
weights["findings"] += RoleReleaseFindingsBoost;
}
}
}

View File

@@ -0,0 +1,106 @@
using System.Text.RegularExpressions;
namespace StellaOps.AdvisoryAI.UnifiedSearch.QueryUnderstanding;
internal sealed class EntityExtractor
{
private static readonly Regex CvePattern = new(
@"\bCVE-\d{4}-\d{4,}\b",
RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase);
private static readonly Regex GhsaPattern = new(
@"\bGHSA-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}\b",
RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase);
private static readonly Regex PurlPattern = new(
@"\bpkg:[a-z]+/[^\s]+",
RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase);
private static readonly Regex CheckCodePattern = new(
@"\b[A-Z]{2,4}-\d{3,}\b",
RegexOptions.Compiled | RegexOptions.CultureInvariant);
private static readonly Regex ImageRefPattern = new(
@"\b[\w.\-]+(?::\d+)?/[\w.\-/]+(?:@sha256:[a-f0-9]{64}|:[\w.\-]+)\b",
RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase);
public IReadOnlyList<EntityMention> Extract(string query)
{
if (string.IsNullOrWhiteSpace(query))
{
return [];
}
var mentions = new List<EntityMention>();
foreach (Match match in CvePattern.Matches(query))
{
mentions.Add(new EntityMention(
match.Value.ToUpperInvariant(),
"cve",
match.Index,
match.Length));
}
foreach (Match match in GhsaPattern.Matches(query))
{
mentions.Add(new EntityMention(
match.Value.ToUpperInvariant(),
"ghsa",
match.Index,
match.Length));
}
foreach (Match match in PurlPattern.Matches(query))
{
mentions.Add(new EntityMention(
match.Value,
"purl",
match.Index,
match.Length));
}
foreach (Match match in CheckCodePattern.Matches(query))
{
if (!CvePattern.IsMatch(match.Value) && !GhsaPattern.IsMatch(match.Value)
&& !OverlapsExisting(mentions, match))
{
mentions.Add(new EntityMention(
match.Value,
"check_code",
match.Index,
match.Length));
}
}
foreach (Match match in ImageRefPattern.Matches(query))
{
mentions.Add(new EntityMention(
match.Value,
"image_ref",
match.Index,
match.Length));
}
return mentions
.OrderBy(static m => m.StartIndex)
.ThenBy(static m => m.EntityType, StringComparer.Ordinal)
.ToArray();
}
private static bool OverlapsExisting(List<EntityMention> existing, Match candidate)
{
var start = candidate.Index;
var end = candidate.Index + candidate.Length;
foreach (var m in existing)
{
if (start < m.StartIndex + m.Length && end > m.StartIndex)
{
return true;
}
}
return false;
}
}

View File

@@ -0,0 +1,265 @@
namespace StellaOps.AdvisoryAI.UnifiedSearch.QueryUnderstanding;
internal sealed class IntentClassifier
{
private static readonly string[] NavigateTerms =
[
"go to", "open", "show", "navigate", "find", "where is", "look up"
];
private static readonly string[] TroubleshootTerms =
[
"troubleshoot", "fix", "error", "fail", "broken", "issue", "problem",
"debug", "why", "not working", "crash", "remediation", "resolve"
];
private static readonly string[] ExploreTerms =
[
"what is", "explain", "how does", "overview", "describe", "tell me about",
"summary", "help", "guide", "documentation", "docs", "how to"
];
private static readonly string[] CompareTerms =
[
"compare", "difference", "versus", "vs", "between", "contrast",
"which is better", "pros and cons"
];
private static readonly string[] SecurityTerms =
[
"cve", "vulnerability", "finding", "exploit", "patch", "advisory",
"vex", "sbom", "scan", "security", "severity", "critical", "ghsa"
];
private static readonly string[] PolicyTerms =
[
"policy", "rule", "baseline", "compliance", "gate", "enforcement",
"allow", "deny", "block", "require"
];
// Lazy-loaded multilingual keyword dictionaries
private static readonly Lazy<IReadOnlyDictionary<string, IReadOnlyList<string>>> MultilingualNavigate =
new(MultilingualIntentKeywords.GetNavigateKeywords);
private static readonly Lazy<IReadOnlyDictionary<string, IReadOnlyList<string>>> MultilingualTroubleshoot =
new(MultilingualIntentKeywords.GetTroubleshootKeywords);
private static readonly Lazy<IReadOnlyDictionary<string, IReadOnlyList<string>>> MultilingualExplore =
new(MultilingualIntentKeywords.GetExploreKeywords);
private static readonly Lazy<IReadOnlyDictionary<string, IReadOnlyList<string>>> MultilingualCompare =
new(MultilingualIntentKeywords.GetCompareKeywords);
/// <summary>
/// Classifies the intent of a query. When a language code is provided, uses locale-specific
/// keywords. When language is null or unknown, tries all locales and uses the one with the
/// highest match count.
/// </summary>
public string Classify(string query, string? languageCode = null)
{
if (string.IsNullOrWhiteSpace(query))
{
return "explore";
}
var lowerQuery = query.Trim().ToLowerInvariant();
// If we have a specific language, use it; otherwise try all locales
if (!string.IsNullOrWhiteSpace(languageCode) &&
!string.Equals(languageCode, "en", StringComparison.OrdinalIgnoreCase))
{
var result = ClassifyWithLocale(lowerQuery, languageCode);
if (result is not null)
{
return result;
}
}
// English classification (original behavior) as primary
var navigateScore = CountTermMatches(lowerQuery, NavigateTerms);
var troubleshootScore = CountTermMatches(lowerQuery, TroubleshootTerms);
var exploreScore = CountTermMatches(lowerQuery, ExploreTerms);
var compareScore = CountTermMatches(lowerQuery, CompareTerms);
if (compareScore > 0)
{
return "compare";
}
if (troubleshootScore > navigateScore && troubleshootScore > exploreScore)
{
return "troubleshoot";
}
if (navigateScore > exploreScore)
{
return "navigate";
}
if (exploreScore > 0)
{
return "explore";
}
// No English matches — try all multilingual keyword sets as fallback
if (string.IsNullOrWhiteSpace(languageCode))
{
var multilingualResult = ClassifyWithAllLocales(lowerQuery);
if (multilingualResult is not null)
{
return multilingualResult;
}
}
return "explore";
}
public bool HasSecurityIntent(string query)
{
if (string.IsNullOrWhiteSpace(query))
{
return false;
}
return ContainsAnyTerm(query.ToLowerInvariant(), SecurityTerms);
}
public bool HasPolicyIntent(string query)
{
if (string.IsNullOrWhiteSpace(query))
{
return false;
}
return ContainsAnyTerm(query.ToLowerInvariant(), PolicyTerms);
}
/// <summary>
/// Attempts to classify using keywords for a specific locale. Returns null if no matches found.
/// </summary>
private static string? ClassifyWithLocale(string lowerQuery, string langCode)
{
var navigateScore = CountMultilingualTermMatches(lowerQuery, MultilingualNavigate.Value, langCode);
var troubleshootScore = CountMultilingualTermMatches(lowerQuery, MultilingualTroubleshoot.Value, langCode);
var exploreScore = CountMultilingualTermMatches(lowerQuery, MultilingualExplore.Value, langCode);
var compareScore = CountMultilingualTermMatches(lowerQuery, MultilingualCompare.Value, langCode);
var totalMatches = navigateScore + troubleshootScore + exploreScore + compareScore;
if (totalMatches == 0)
{
return null;
}
if (compareScore > 0)
{
return "compare";
}
if (troubleshootScore > navigateScore && troubleshootScore > exploreScore)
{
return "troubleshoot";
}
if (navigateScore > exploreScore)
{
return "navigate";
}
if (exploreScore > 0)
{
return "explore";
}
return null;
}
/// <summary>
/// Tries all non-English locales and returns the intent from the locale with the most matches.
/// Returns null if no matches found in any locale.
/// </summary>
private static string? ClassifyWithAllLocales(string lowerQuery)
{
var bestIntent = (string?)null;
var bestScore = 0;
foreach (var langCode in MultilingualNavigate.Value.Keys)
{
if (string.Equals(langCode, "en", StringComparison.OrdinalIgnoreCase))
{
continue; // English was already tried
}
var navigateScore = CountMultilingualTermMatches(lowerQuery, MultilingualNavigate.Value, langCode);
var troubleshootScore = CountMultilingualTermMatches(lowerQuery, MultilingualTroubleshoot.Value, langCode);
var exploreScore = CountMultilingualTermMatches(lowerQuery, MultilingualExplore.Value, langCode);
var compareScore = CountMultilingualTermMatches(lowerQuery, MultilingualCompare.Value, langCode);
var totalMatches = navigateScore + troubleshootScore + exploreScore + compareScore;
if (totalMatches <= bestScore)
{
continue;
}
bestScore = totalMatches;
if (compareScore > 0)
{
bestIntent = "compare";
}
else if (troubleshootScore > navigateScore && troubleshootScore > exploreScore)
{
bestIntent = "troubleshoot";
}
else if (navigateScore > exploreScore)
{
bestIntent = "navigate";
}
else if (exploreScore > 0)
{
bestIntent = "explore";
}
}
return bestIntent;
}
private static int CountMultilingualTermMatches(
string query,
IReadOnlyDictionary<string, IReadOnlyList<string>> keywordsByLocale,
string langCode)
{
if (!keywordsByLocale.TryGetValue(langCode, out var terms))
{
return 0;
}
return CountTermMatches(query, terms);
}
private static int CountTermMatches(string query, IReadOnlyList<string> terms)
{
var count = 0;
foreach (var term in terms)
{
if (query.Contains(term, StringComparison.OrdinalIgnoreCase))
{
count++;
}
}
return count;
}
private static bool ContainsAnyTerm(string query, IReadOnlyList<string> terms)
{
foreach (var term in terms)
{
if (query.Contains(term, StringComparison.Ordinal))
{
return true;
}
}
return false;
}
}

View File

@@ -0,0 +1,53 @@
namespace StellaOps.AdvisoryAI.UnifiedSearch.QueryUnderstanding;
/// <summary>
/// Provides localized keyword sets for intent classification across supported languages.
/// Each method returns a dictionary keyed by two-letter language code (ISO 639-1) with
/// keyword lists used to detect a specific user intent from the search query.
/// </summary>
internal static class MultilingualIntentKeywords
{
/// <summary>Returns keywords per locale for the "navigate" intent.</summary>
public static IReadOnlyDictionary<string, IReadOnlyList<string>> GetNavigateKeywords() =>
new Dictionary<string, IReadOnlyList<string>>(StringComparer.OrdinalIgnoreCase)
{
["en"] = new[] { "go to", "open", "show me", "find", "navigate", "view", "where is" },
["de"] = new[] { "gehe zu", "öffne", "zeige mir", "finde", "navigiere", "ansehen", "wo ist" },
["fr"] = new[] { "aller à", "ouvrir", "montre-moi", "trouver", "naviguer", "voir", "où est" },
["es"] = new[] { "ir a", "abrir", "muéstrame", "buscar", "navegar", "ver", "dónde está" },
["ru"] = new[] { "перейти", "открыть", "покажи", "найти", "навигация", "посмотреть", "где" },
};
/// <summary>Returns keywords per locale for the "troubleshoot" intent.</summary>
public static IReadOnlyDictionary<string, IReadOnlyList<string>> GetTroubleshootKeywords() =>
new Dictionary<string, IReadOnlyList<string>>(StringComparer.OrdinalIgnoreCase)
{
["en"] = new[] { "fix", "error", "failing", "broken", "debug", "troubleshoot", "crash", "issue", "problem", "not working" },
["de"] = new[] { "beheben", "Fehler", "fehlgeschlagen", "kaputt", "debuggen", "Fehlerbehebung", "Absturz", "Problem", "funktioniert nicht" },
["fr"] = new[] { "corriger", "erreur", "échoué", "cassé", "déboguer", "dépanner", "plantage", "problème", "ne fonctionne pas" },
["es"] = new[] { "arreglar", "error", "fallando", "roto", "depurar", "solucionar", "bloqueo", "problema", "no funciona" },
["ru"] = new[] { "исправить", "ошибка", "сбой", "сломан", "отладка", "устранение", "падение", "проблема", "не работает" },
};
/// <summary>Returns keywords per locale for the "explore" intent.</summary>
public static IReadOnlyDictionary<string, IReadOnlyList<string>> GetExploreKeywords() =>
new Dictionary<string, IReadOnlyList<string>>(StringComparer.OrdinalIgnoreCase)
{
["en"] = new[] { "what is", "how does", "explain", "describe", "tell me about", "overview", "guide", "help" },
["de"] = new[] { "was ist", "wie funktioniert", "erkläre", "beschreibe", "erzähl mir über", "Übersicht", "Anleitung", "Hilfe" },
["fr"] = new[] { "qu'est-ce que", "comment fonctionne", "expliquer", "décrire", "parle-moi de", "aperçu", "guide", "aide" },
["es"] = new[] { "qué es", "cómo funciona", "explicar", "describir", "cuéntame sobre", "resumen", "guía", "ayuda" },
["ru"] = new[] { "что такое", "как работает", "объясни", "опиши", "расскажи о", "обзор", "руководство", "помощь" },
};
/// <summary>Returns keywords per locale for the "compare" intent.</summary>
public static IReadOnlyDictionary<string, IReadOnlyList<string>> GetCompareKeywords() =>
new Dictionary<string, IReadOnlyList<string>>(StringComparer.OrdinalIgnoreCase)
{
["en"] = new[] { "compare", "difference", "vs", "versus", "between" },
["de"] = new[] { "vergleiche", "Unterschied", "gegen", "zwischen" },
["fr"] = new[] { "comparer", "différence", "contre", "entre" },
["es"] = new[] { "comparar", "diferencia", "contra", "entre" },
["ru"] = new[] { "сравнить", "разница", "против", "между" },
};
}

View File

@@ -0,0 +1,182 @@
namespace StellaOps.AdvisoryAI.UnifiedSearch.QueryUnderstanding;
/// <summary>
/// Lightweight query language detector that uses character set analysis and stop-word frequency
/// to determine the language of a search query. Used to select the appropriate PostgreSQL FTS
/// configuration and tsvector column for multilingual search.
/// </summary>
internal sealed class QueryLanguageDetector
{
// Top 20 stop words per language for disambiguation among Latin-script languages
private static readonly Dictionary<string, HashSet<string>> StopWords = new(StringComparer.OrdinalIgnoreCase)
{
["en"] = new(StringComparer.OrdinalIgnoreCase)
{
"the", "is", "at", "which", "on", "a", "an", "and", "or", "but",
"in", "with", "to", "for", "of", "it", "this", "that", "from", "by"
},
["de"] = new(StringComparer.OrdinalIgnoreCase)
{
"der", "die", "das", "ist", "ein", "eine", "und", "oder", "aber", "in",
"mit", "zu", "f\u00fcr", "von", "es", "auf", "an", "aus", "nach", "\u00fcber"
},
["fr"] = new(StringComparer.OrdinalIgnoreCase)
{
"le", "la", "les", "est", "un", "une", "et", "ou", "mais", "dans",
"avec", "pour", "de", "du", "ce", "cette", "sur", "par", "en", "aux"
},
["es"] = new(StringComparer.OrdinalIgnoreCase)
{
"el", "la", "los", "las", "es", "un", "una", "y", "o", "pero",
"en", "con", "para", "de", "del", "que", "por", "su", "al", "como"
},
["ru"] = new(StringComparer.OrdinalIgnoreCase)
{
"\u0438", "\u0432", "\u043d\u0435", "\u043d\u0430", "\u0441",
"\u0447\u0442\u043e", "\u044d\u0442\u043e", "\u043a\u0430\u043a",
"\u043a", "\u043f\u043e", "\u043d\u043e", "\u0438\u0437",
"\u0443", "\u043e\u0442", "\u0437\u0430", "\u0434\u043b\u044f",
"\u0434\u043e", "\u0432\u0441\u0435", "\u0442\u0430\u043a",
"\u0436\u0435"
},
};
/// <summary>
/// Detects the language of the query text. Uses character-set analysis first (Cyrillic, CJK),
/// then stop-word frequency for Latin-script languages, then diacritics. Falls back to the
/// user locale or English.
/// </summary>
/// <param name="query">The search query text.</param>
/// <param name="userLocale">Optional user locale hint (e.g., "de-DE", "fr").</param>
/// <returns>Two-letter ISO 639-1 language code (e.g., "en", "de", "fr", "es", "ru", "zh").</returns>
public string DetectLanguage(string query, string? userLocale = null)
{
if (string.IsNullOrWhiteSpace(query))
{
return ResolveLocale(userLocale, "en");
}
// Check for Cyrillic characters (U+0400..U+04FF)
if (query.Any(static c => c >= '\u0400' && c <= '\u04FF'))
{
// For now, default to Russian. Distinguishing Ukrainian/Bulgarian would require
// language-specific character frequency analysis (future enhancement).
return "ru";
}
// Check for CJK characters (CJK Unified Ideographs + Extension A)
if (query.Any(static c => (c >= '\u4E00' && c <= '\u9FFF') || (c >= '\u3400' && c <= '\u4DBF')))
{
return "zh";
}
// Latin script -- use stop word analysis
var words = query.Split(
new[] { ' ', ',', '.', '!', '?', ';', ':', '-', '(', ')' },
StringSplitOptions.RemoveEmptyEntries);
if (words.Length == 0)
{
return ResolveLocale(userLocale, "en");
}
var scores = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase);
foreach (var (lang, stops) in StopWords)
{
var count = words.Count(w => stops.Contains(w));
if (count > 0)
{
scores[lang] = count;
}
}
if (scores.Count > 0)
{
var best = scores.OrderByDescending(static kv => kv.Value).First();
if (best.Value >= 1)
{
return best.Key;
}
}
// Check for language-specific diacritical characters
if (query.Any(static c => "\u00e4\u00f6\u00fc\u00df".Contains(c)))
{
return "de";
}
if (query.Any(static c => "\u00e0\u00e2\u00e7\u00e9\u00e8\u00ea\u00eb\u00ef\u00ee\u00f4\u00f9\u00fb\u00fc".Contains(c)))
{
return "fr";
}
if (query.Any(static c => "\u00e1\u00e9\u00ed\u00f3\u00fa\u00f1\u00bf\u00a1".Contains(c)))
{
return "es";
}
return ResolveLocale(userLocale, "en");
}
/// <summary>
/// Maps a two-letter language code to the corresponding PostgreSQL FTS configuration name.
/// </summary>
public string MapLanguageToFtsConfig(string langCode)
{
return langCode switch
{
"en" => "english",
"de" => "german",
"fr" => "french",
"es" => "spanish",
"ru" => "russian",
_ => "simple"
};
}
/// <summary>
/// Maps a two-letter language code to the corresponding tsvector column name in kb_chunk.
/// </summary>
public string MapLanguageToTsvColumn(string langCode)
{
return langCode switch
{
"en" => "body_tsv_en",
"de" => "body_tsv_de",
"fr" => "body_tsv_fr",
"es" => "body_tsv_es",
"ru" => "body_tsv_ru",
_ => "body_tsv"
};
}
/// <summary>
/// Maps a two-letter language code to the full locale string (e.g., "de" -> "de-DE").
/// Used to pass locale to the FTS store layer.
/// </summary>
public string MapLanguageToLocale(string langCode)
{
return langCode switch
{
"en" => "en-US",
"de" => "de-DE",
"fr" => "fr-FR",
"es" => "es-ES",
"ru" => "ru-RU",
"zh" => "zh-CN",
_ => "en-US"
};
}
private static string ResolveLocale(string? userLocale, string fallback)
{
if (string.IsNullOrWhiteSpace(userLocale))
{
return fallback;
}
// Extract language code from locale (e.g., "de-DE" -> "de")
var dash = userLocale.IndexOf('-');
return dash > 0 ? userLocale[..dash].ToLowerInvariant() : userLocale.ToLowerInvariant();
}
}

View File

@@ -0,0 +1,39 @@
using StellaOps.AdvisoryAI.KnowledgeSearch;
namespace StellaOps.AdvisoryAI.UnifiedSearch.QueryUnderstanding;
internal sealed class QueryPlanBuilder
{
private readonly EntityExtractor _entityExtractor;
private readonly IntentClassifier _intentClassifier;
private readonly DomainWeightCalculator _domainWeightCalculator;
public QueryPlanBuilder(
EntityExtractor entityExtractor,
IntentClassifier intentClassifier,
DomainWeightCalculator domainWeightCalculator)
{
_entityExtractor = entityExtractor ?? throw new ArgumentNullException(nameof(entityExtractor));
_intentClassifier = intentClassifier ?? throw new ArgumentNullException(nameof(intentClassifier));
_domainWeightCalculator = domainWeightCalculator ?? throw new ArgumentNullException(nameof(domainWeightCalculator));
}
public QueryPlan Build(UnifiedSearchRequest request)
{
ArgumentNullException.ThrowIfNull(request);
var normalized = KnowledgeSearchText.NormalizeWhitespace(request.Q);
var entities = _entityExtractor.Extract(normalized);
var intent = _intentClassifier.Classify(normalized);
var domainWeights = _domainWeightCalculator.ComputeWeights(normalized, entities, request.Filters);
return new QueryPlan
{
OriginalQuery = request.Q,
NormalizedQuery = normalized,
Intent = intent,
DetectedEntities = entities,
DomainWeights = domainWeights
};
}
}

View File

@@ -0,0 +1,47 @@
[
{
"findingId": "finding-cve-2024-21626",
"cveId": "CVE-2024-21626",
"title": "Container breakout via runc",
"description": "runc < 1.1.12 allows container escape via internal file descriptor leak in /proc/self/fd.",
"severity": "critical",
"service": "scanner",
"tenant": "global",
"tags": [
"finding",
"vulnerability",
"critical"
],
"freshness": "2026-01-01T00:00:00Z"
},
{
"findingId": "finding-cve-2024-3094",
"cveId": "CVE-2024-3094",
"title": "XZ Utils backdoor",
"description": "Malicious code in xz-utils 5.6.0/5.6.1 allows remote code execution via sshd integration.",
"severity": "critical",
"service": "scanner",
"tenant": "global",
"tags": [
"finding",
"vulnerability",
"critical"
],
"freshness": "2026-01-01T00:00:00Z"
},
{
"findingId": "finding-cve-2023-44487",
"cveId": "CVE-2023-44487",
"title": "HTTP/2 Rapid Reset DDoS",
"description": "HTTP/2 protocol vulnerability enables rapid reset attack causing denial of service.",
"severity": "high",
"service": "scanner",
"tenant": "global",
"tags": [
"finding",
"vulnerability",
"high"
],
"freshness": "2026-01-01T00:00:00Z"
}
]

View File

@@ -0,0 +1,44 @@
[
{
"ruleId": "DENY-CRITICAL-PROD",
"title": "Deny critical vulnerabilities in production",
"description": "Blocks promotion to production for any artifact with critical-severity findings that have not been mitigated by VEX.",
"decision": "deny",
"service": "policy",
"tenant": "global",
"tags": [
"policy",
"rule",
"production"
],
"freshness": "2026-01-01T00:00:00Z"
},
{
"ruleId": "REQUIRE-SBOM-SIGNED",
"title": "Require signed SBOM for all artifacts",
"description": "All container artifacts must have a signed SBOM attestation before entering the release pipeline.",
"decision": "require",
"service": "policy",
"tenant": "global",
"tags": [
"policy",
"rule",
"attestation"
],
"freshness": "2026-01-01T00:00:00Z"
},
{
"ruleId": "MAX-AGE-90D",
"title": "Maximum image age 90 days",
"description": "Artifacts older than 90 days from their build timestamp are rejected from promotion gates.",
"decision": "deny",
"service": "policy",
"tenant": "global",
"tags": [
"policy",
"rule",
"freshness"
],
"freshness": "2026-01-01T00:00:00Z"
}
]

View File

@@ -0,0 +1,44 @@
[
{
"statementId": "vex-cve-2024-21626-not-affected",
"cveId": "CVE-2024-21626",
"status": "not_affected",
"justification": "Component not reachable in deployment configuration. Container runtime is sandboxed behind gVisor.",
"service": "vex-hub",
"tenant": "global",
"tags": [
"vex",
"statement",
"not_affected"
],
"freshness": "2026-01-01T00:00:00Z"
},
{
"statementId": "vex-cve-2024-3094-fixed",
"cveId": "CVE-2024-3094",
"status": "fixed",
"justification": "Updated xz-utils to 5.6.2 which removes the backdoor code. Verified via SBOM attestation.",
"service": "vex-hub",
"tenant": "global",
"tags": [
"vex",
"statement",
"fixed"
],
"freshness": "2026-01-01T00:00:00Z"
},
{
"statementId": "vex-cve-2023-44487-under-investigation",
"cveId": "CVE-2023-44487",
"status": "under_investigation",
"justification": "Analyzing HTTP/2 usage in edge proxies. Mitigation rate-limits in place.",
"service": "vex-hub",
"tenant": "global",
"tags": [
"vex",
"statement",
"under_investigation"
],
"freshness": "2026-01-01T00:00:00Z"
}
]

View File

@@ -0,0 +1,59 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.AdvisoryAI.KnowledgeSearch;
namespace StellaOps.AdvisoryAI.UnifiedSearch.Synthesis;
internal sealed class CompositeSynthesisEngine : ISynthesisEngine
{
private readonly LlmSynthesisEngine _llmEngine;
private readonly SynthesisTemplateEngine _templateEngine;
private readonly KnowledgeSearchOptions _options;
private readonly ILogger<CompositeSynthesisEngine> _logger;
public CompositeSynthesisEngine(
LlmSynthesisEngine llmEngine,
SynthesisTemplateEngine templateEngine,
IOptions<KnowledgeSearchOptions> options,
ILogger<CompositeSynthesisEngine> logger)
{
ArgumentNullException.ThrowIfNull(options);
_llmEngine = llmEngine ?? throw new ArgumentNullException(nameof(llmEngine));
_templateEngine = templateEngine ?? throw new ArgumentNullException(nameof(templateEngine));
_options = options.Value ?? new KnowledgeSearchOptions();
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<SynthesisResult?> SynthesizeAsync(
string query,
IReadOnlyList<EntityCard> cards,
IReadOnlyList<EntityMention> detectedEntities,
CancellationToken ct)
{
if (_options.LlmSynthesisEnabled &&
!string.IsNullOrWhiteSpace(_options.LlmAdapterBaseUrl) &&
!string.IsNullOrWhiteSpace(_options.LlmProviderId))
{
try
{
var llmResult = await _llmEngine.SynthesizeAsync(query, cards, detectedEntities, ct)
.ConfigureAwait(false);
if (llmResult is not null)
{
_logger.LogDebug("LLM synthesis succeeded for query.");
return llmResult;
}
_logger.LogDebug("LLM synthesis returned null; falling back to template engine.");
}
catch (Exception ex)
{
_logger.LogWarning(ex, "LLM synthesis failed; falling back to template engine.");
}
}
return await _templateEngine.SynthesizeAsync(query, cards, detectedEntities, ct)
.ConfigureAwait(false);
}
}

View File

@@ -0,0 +1,10 @@
namespace StellaOps.AdvisoryAI.UnifiedSearch.Synthesis;
internal interface ISynthesisEngine
{
Task<SynthesisResult?> SynthesizeAsync(
string query,
IReadOnlyList<EntityCard> cards,
IReadOnlyList<EntityMention> detectedEntities,
CancellationToken ct);
}

View File

@@ -0,0 +1,348 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.AdvisoryAI.KnowledgeSearch;
using System.Globalization;
using System.Net.Http.Json;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Text.RegularExpressions;
namespace StellaOps.AdvisoryAI.UnifiedSearch.Synthesis;
internal sealed partial class LlmSynthesisEngine : ISynthesisEngine
{
private readonly KnowledgeSearchOptions _options;
private readonly IHttpClientFactory _httpClientFactory;
private readonly ILogger<LlmSynthesisEngine> _logger;
private readonly string _systemPrompt;
private static readonly JsonSerializerOptions SerializerOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
public LlmSynthesisEngine(
IOptions<KnowledgeSearchOptions> options,
IHttpClientFactory httpClientFactory,
ILogger<LlmSynthesisEngine> logger)
{
ArgumentNullException.ThrowIfNull(options);
_options = options.Value ?? new KnowledgeSearchOptions();
_httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_systemPrompt = LoadSystemPrompt();
}
public async Task<SynthesisResult?> SynthesizeAsync(
string query,
IReadOnlyList<EntityCard> cards,
IReadOnlyList<EntityMention> detectedEntities,
CancellationToken ct)
{
if (cards.Count == 0)
{
return null;
}
if (string.IsNullOrWhiteSpace(_options.LlmAdapterBaseUrl) ||
string.IsNullOrWhiteSpace(_options.LlmProviderId))
{
_logger.LogDebug("LLM synthesis skipped: LlmAdapterBaseUrl or LlmProviderId is not configured.");
return null;
}
var userPrompt = BuildUserPrompt(query, cards);
var timeoutMs = Math.Clamp(_options.SynthesisTimeoutMs, 1000, 30000);
try
{
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(TimeSpan.FromMilliseconds(timeoutMs));
var response = await CallLlmAdapterAsync(userPrompt, cts.Token).ConfigureAwait(false);
if (response is null)
{
return null;
}
var rawText = ExtractResponseText(response);
if (string.IsNullOrWhiteSpace(rawText))
{
_logger.LogWarning("LLM synthesis returned empty content.");
return null;
}
var citations = ParseCitations(rawText, cards);
var validatedText = StripInvalidCitations(rawText, cards.Count);
var groundingScore = ComputeGroundingScore(citations, cards.Count);
var confidence = ComputeConfidence(citations, groundingScore);
if (citations.Count == 0)
{
validatedText += " Note: This answer may not be fully grounded in the search results.";
confidence = "low";
}
var citedDomains = citations
.Select(c => c.Domain)
.Where(d => !string.IsNullOrWhiteSpace(d))
.Distinct(StringComparer.OrdinalIgnoreCase)
.ToArray();
return new SynthesisResult
{
Summary = validatedText,
Template = "llm_grounded",
Confidence = confidence,
SourceCount = citations.Count,
DomainsCovered = citedDomains,
Citations = citations
.Select(c => new SynthesisCitation
{
Index = c.Index,
EntityKey = c.EntityKey,
Title = c.Title
})
.ToArray(),
GroundingScore = groundingScore
};
}
catch (OperationCanceledException)
{
_logger.LogWarning("LLM synthesis timed out after {TimeoutMs}ms.", timeoutMs);
return null;
}
catch (HttpRequestException ex)
{
_logger.LogWarning(ex, "LLM synthesis HTTP request failed.");
return null;
}
catch (Exception ex)
{
_logger.LogWarning(ex, "LLM synthesis failed unexpectedly.");
return null;
}
}
private async Task<JsonDocument?> CallLlmAdapterAsync(string userPrompt, CancellationToken ct)
{
var client = _httpClientFactory.CreateClient("llm-synthesis");
var baseUrl = _options.LlmAdapterBaseUrl.TrimEnd('/');
var providerId = _options.LlmProviderId;
var url = $"{baseUrl}/v1/advisory-ai/adapters/llm/{Uri.EscapeDataString(providerId)}/chat/completions";
var requestBody = new LlmCompletionRequestBody
{
Messages =
[
new LlmMessageBody { Role = "system", Content = _systemPrompt },
new LlmMessageBody { Role = "user", Content = userPrompt }
],
Temperature = 0,
MaxTokens = 512,
Stream = false
};
var httpContent = JsonContent.Create(requestBody, options: SerializerOptions);
using var response = await client.PostAsync(url, httpContent, ct).ConfigureAwait(false);
if (!response.IsSuccessStatusCode)
{
_logger.LogWarning(
"LLM adapter returned {StatusCode} for synthesis request.",
(int)response.StatusCode);
return null;
}
var stream = await response.Content.ReadAsStreamAsync(ct).ConfigureAwait(false);
return await JsonDocument.ParseAsync(stream, cancellationToken: ct).ConfigureAwait(false);
}
private static string? ExtractResponseText(JsonDocument doc)
{
if (doc.RootElement.TryGetProperty("choices", out var choices) &&
choices.ValueKind == JsonValueKind.Array &&
choices.GetArrayLength() > 0)
{
var firstChoice = choices[0];
if (firstChoice.TryGetProperty("message", out var message) &&
message.TryGetProperty("content", out var content) &&
content.ValueKind == JsonValueKind.String)
{
return content.GetString();
}
}
return null;
}
private static string BuildUserPrompt(string query, IReadOnlyList<EntityCard> cards)
{
var sb = new StringBuilder();
sb.AppendLine(CultureInfo.InvariantCulture, $"Question: {query}");
sb.AppendLine();
sb.AppendLine("Search results:");
for (var i = 0; i < cards.Count; i++)
{
var card = cards[i];
sb.AppendLine(CultureInfo.InvariantCulture, $"[{i + 1}] Title: {card.Title}");
sb.AppendLine(CultureInfo.InvariantCulture, $" Domain: {card.Domain}");
sb.AppendLine(CultureInfo.InvariantCulture, $" Type: {card.EntityType}");
if (!string.IsNullOrWhiteSpace(card.Severity))
{
sb.AppendLine(CultureInfo.InvariantCulture, $" Severity: {card.Severity}");
}
if (!string.IsNullOrWhiteSpace(card.Snippet))
{
var snippet = card.Snippet.Length > 300 ? card.Snippet[..300] + "..." : card.Snippet;
sb.AppendLine(CultureInfo.InvariantCulture, $" Snippet: {snippet}");
}
sb.AppendLine(CultureInfo.InvariantCulture, $" EntityKey: {card.EntityKey}");
sb.AppendLine();
}
sb.AppendLine("Answer the question using only the search results above.");
return sb.ToString();
}
internal static IReadOnlyList<CitationMatch> ParseCitations(string text, IReadOnlyList<EntityCard> cards)
{
var matches = CitationPattern().Matches(text);
var seen = new HashSet<int>();
var results = new List<CitationMatch>();
foreach (Match match in matches)
{
if (!int.TryParse(match.Groups[1].Value, NumberStyles.Integer, CultureInfo.InvariantCulture, out var index))
{
continue;
}
if (index < 1 || index > cards.Count)
{
continue;
}
if (!seen.Add(index))
{
continue;
}
var card = cards[index - 1];
results.Add(new CitationMatch(
index,
card.EntityKey,
card.Title,
card.Domain));
}
return results;
}
internal static string StripInvalidCitations(string text, int maxIndex)
{
return CitationPattern().Replace(text, match =>
{
if (int.TryParse(match.Groups[1].Value, NumberStyles.Integer, CultureInfo.InvariantCulture, out var index) &&
index >= 1 && index <= maxIndex)
{
return match.Value;
}
return string.Empty;
});
}
internal static double ComputeGroundingScore(IReadOnlyList<CitationMatch> citations, int totalCards)
{
if (totalCards == 0)
{
return 0d;
}
return (double)citations.Count / totalCards;
}
private static string ComputeConfidence(IReadOnlyList<CitationMatch> citations, double groundingScore)
{
if (citations.Count == 0)
{
return "low";
}
if (groundingScore >= 0.5 && citations.Count >= 2)
{
return "high";
}
if (citations.Count >= 1)
{
return "medium";
}
return "low";
}
private static string LoadSystemPrompt()
{
var assembly = typeof(LlmSynthesisEngine).Assembly;
var resourceName = "synthesis-system-prompt.txt";
using var stream = assembly.GetManifestResourceStream(resourceName);
if (stream is not null)
{
using var reader = new StreamReader(stream, Encoding.UTF8);
return reader.ReadToEnd();
}
// Fallback: load from file relative to assembly location
var assemblyDir = Path.GetDirectoryName(assembly.Location) ?? ".";
var filePath = Path.Combine(assemblyDir, "UnifiedSearch", "Synthesis", "synthesis-system-prompt.txt");
if (File.Exists(filePath))
{
return File.ReadAllText(filePath, Encoding.UTF8);
}
// Hardcoded minimal fallback prompt
return """
You are a search synthesis assistant. Answer the user's question using ONLY the provided search results.
Cite sources using [1], [2] notation. Keep answers to 3-5 sentences.
If results are insufficient, say "I don't have enough information to answer this."
""";
}
[GeneratedRegex(@"\[(\d+)\]", RegexOptions.Compiled)]
private static partial Regex CitationPattern();
internal sealed record CitationMatch(int Index, string EntityKey, string Title, string Domain);
private sealed record LlmCompletionRequestBody
{
[JsonPropertyName("messages")]
public required IReadOnlyList<LlmMessageBody> Messages { get; init; }
[JsonPropertyName("temperature")]
public double Temperature { get; init; }
[JsonPropertyName("max_tokens")]
public int MaxTokens { get; init; }
[JsonPropertyName("stream")]
public bool Stream { get; init; }
}
private sealed record LlmMessageBody
{
[JsonPropertyName("role")]
public required string Role { get; init; }
[JsonPropertyName("content")]
public required string Content { get; init; }
}
}

View File

@@ -0,0 +1,363 @@
using System.Text;
namespace StellaOps.AdvisoryAI.UnifiedSearch.Synthesis;
internal sealed class SynthesisTemplateEngine : ISynthesisEngine
{
// ── Localized template strings (Sprint 109 / G9-003) ──
// Each dictionary maps a two-letter language code to a set of localized phrases.
// English is the fallback when a locale is not found.
private static readonly Dictionary<string, LocalizedTemplateStrings> TemplateStrings =
new(StringComparer.OrdinalIgnoreCase)
{
["en"] = new LocalizedTemplateStrings
{
NoResultsFound = "No results found.",
ResultsFor = "Results for {0}: ",
FindingsSingular = "finding",
FindingsPlural = "findings",
VexStatementSingular = "VEX statement",
VexStatementsPlural = "VEX statements",
KnowledgeResultSingular = "knowledge result",
KnowledgeResultsPlural = "knowledge results",
SeverityDetected = "{0} severity finding detected.",
FoundPolicyRules = "Found {0} policy rule{1}.",
TopMatch = "Top match: {0}.",
FoundDoctorChecks = "Found {0} doctor check{1}.",
SecuritySearchFor = "Security search for \"{0}\": ",
FoundResultsAcrossDomains = "Found {0} result{1} across {2} domain{3} for \"{4}\".",
And = "and",
},
["de"] = new LocalizedTemplateStrings
{
NoResultsFound = "Keine Ergebnisse gefunden.",
ResultsFor = "Ergebnisse für {0}: ",
FindingsSingular = "Befund",
FindingsPlural = "Befunde",
VexStatementSingular = "VEX-Erklärung",
VexStatementsPlural = "VEX-Erklärungen",
KnowledgeResultSingular = "Wissensergebnis",
KnowledgeResultsPlural = "Wissensergebnisse",
SeverityDetected = "Befund mit Schweregrad {0} erkannt.",
FoundPolicyRules = "{0} Richtlinienregel{1} gefunden.",
TopMatch = "Bestes Ergebnis: {0}.",
FoundDoctorChecks = "{0} Doctor-Prüfung{1} gefunden.",
SecuritySearchFor = "Sicherheitssuche für \"{0}\": ",
FoundResultsAcrossDomains = "{0} Ergebnis{1} in {2} Domäne{3} für \"{4}\" gefunden.",
And = "und",
},
["fr"] = new LocalizedTemplateStrings
{
NoResultsFound = "Aucun résultat trouvé.",
ResultsFor = "Résultats pour {0} : ",
FindingsSingular = "résultat de scan",
FindingsPlural = "résultats de scan",
VexStatementSingular = "déclaration VEX",
VexStatementsPlural = "déclarations VEX",
KnowledgeResultSingular = "résultat de connaissance",
KnowledgeResultsPlural = "résultats de connaissance",
SeverityDetected = "Résultat de sévérité {0} détecté.",
FoundPolicyRules = "{0} règle{1} de politique trouvée{1}.",
TopMatch = "Meilleur résultat : {0}.",
FoundDoctorChecks = "{0} vérification{1} Doctor trouvée{1}.",
SecuritySearchFor = "Recherche de sécurité pour \"{0}\" : ",
FoundResultsAcrossDomains = "{0} résultat{1} trouvé{1} dans {2} domaine{3} pour \"{4}\".",
And = "et",
},
["es"] = new LocalizedTemplateStrings
{
NoResultsFound = "No se encontraron resultados.",
ResultsFor = "Resultados para {0}: ",
FindingsSingular = "hallazgo",
FindingsPlural = "hallazgos",
VexStatementSingular = "declaración VEX",
VexStatementsPlural = "declaraciones VEX",
KnowledgeResultSingular = "resultado de conocimiento",
KnowledgeResultsPlural = "resultados de conocimiento",
SeverityDetected = "Hallazgo de severidad {0} detectado.",
FoundPolicyRules = "{0} regla{1} de política encontrada{1}.",
TopMatch = "Mejor resultado: {0}.",
FoundDoctorChecks = "{0} verificación{1} Doctor encontrada{1}.",
SecuritySearchFor = "Búsqueda de seguridad para \"{0}\": ",
FoundResultsAcrossDomains = "{0} resultado{1} en {2} dominio{3} para \"{4}\".",
And = "y",
},
["ru"] = new LocalizedTemplateStrings
{
NoResultsFound = "\u0420\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u044b \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u044b.",
ResultsFor = "\u0420\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u044b \u0434\u043b\u044f {0}: ",
FindingsSingular = "\u043d\u0430\u0445\u043e\u0434\u043a\u0430",
FindingsPlural = "\u043d\u0430\u0445\u043e\u0434\u043e\u043a",
VexStatementSingular = "VEX-\u0437\u0430\u044f\u0432\u043b\u0435\u043d\u0438\u0435",
VexStatementsPlural = "VEX-\u0437\u0430\u044f\u0432\u043b\u0435\u043d\u0438\u0439",
KnowledgeResultSingular = "\u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442 \u0437\u043d\u0430\u043d\u0438\u0439",
KnowledgeResultsPlural = "\u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u043e\u0432 \u0437\u043d\u0430\u043d\u0438\u0439",
SeverityDetected = "\u041e\u0431\u043d\u0430\u0440\u0443\u0436\u0435\u043d\u0430 \u043d\u0430\u0445\u043e\u0434\u043a\u0430 \u0441 \u0443\u0440\u043e\u0432\u043d\u0435\u043c \u0441\u0435\u0440\u044c\u0435\u0437\u043d\u043e\u0441\u0442\u0438 {0}.",
FoundPolicyRules = "\u041d\u0430\u0439\u0434\u0435\u043d\u043e {0} \u043f\u0440\u0430\u0432\u0438\u043b{1} \u043f\u043e\u043b\u0438\u0442\u0438\u043a\u0438.",
TopMatch = "\u041b\u0443\u0447\u0448\u0435\u0435 \u0441\u043e\u0432\u043f\u0430\u0434\u0435\u043d\u0438\u0435: {0}.",
FoundDoctorChecks = "\u041d\u0430\u0439\u0434\u0435\u043d\u043e {0} \u043f\u0440\u043e\u0432\u0435\u0440\u043e\u043a{1} Doctor.",
SecuritySearchFor = "\u041f\u043e\u0438\u0441\u043a \u0431\u0435\u0437\u043e\u043f\u0430\u0441\u043d\u043e\u0441\u0442\u0438 \u0434\u043b\u044f \"{0}\": ",
FoundResultsAcrossDomains = "\u041d\u0430\u0439\u0434\u0435\u043d\u043e {0} \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442{1} \u0432 {2} \u0434\u043e\u043c\u0435\u043d{3} \u0434\u043b\u044f \"{4}\".",
And = "\u0438",
},
};
public Task<SynthesisResult?> SynthesizeAsync(
string query,
IReadOnlyList<EntityCard> cards,
IReadOnlyList<EntityMention> detectedEntities,
CancellationToken ct)
{
var plan = new QueryPlan
{
OriginalQuery = query,
NormalizedQuery = query,
DetectedEntities = detectedEntities
};
var result = Synthesize(query, cards, plan);
return Task.FromResult<SynthesisResult?>(result);
}
public SynthesisResult Synthesize(string query, IReadOnlyList<EntityCard> topCards, QueryPlan plan, string locale = "en")
{
var strings = ResolveTemplateStrings(locale);
if (topCards.Count == 0)
{
return new SynthesisResult
{
Summary = strings.NoResultsFound,
Template = "empty",
Confidence = "low",
SourceCount = 0,
DomainsCovered = []
};
}
var domains = topCards.Select(static c => c.Domain).Distinct(StringComparer.Ordinal).ToArray();
var entityTypes = topCards.Select(static c => c.EntityType).Distinct(StringComparer.Ordinal).ToArray();
var hasCve = plan.DetectedEntities.Any(static e =>
e.EntityType.Equals("cve", StringComparison.OrdinalIgnoreCase));
string template;
string summary;
if (hasCve && entityTypes.Contains("finding"))
{
template = "cve_summary";
summary = BuildCveSummary(query, topCards, plan, strings);
}
else if (entityTypes.All(static t => t == "policy_rule"))
{
template = "policy_summary";
summary = BuildPolicySummary(topCards, strings);
}
else if (entityTypes.All(static t => t == "doctor"))
{
template = "doctor_summary";
summary = BuildDoctorSummary(topCards, strings);
}
else if (entityTypes.Contains("finding") || entityTypes.Contains("vex_statement"))
{
template = "security_overview";
summary = BuildSecurityOverview(query, topCards, strings);
}
else
{
template = "mixed_overview";
summary = BuildMixedOverview(query, topCards, domains, strings);
}
var confidence = ComputeConfidence(topCards, domains);
return new SynthesisResult
{
Summary = summary,
Template = template,
Confidence = confidence,
SourceCount = topCards.Count,
DomainsCovered = domains
};
}
private static LocalizedTemplateStrings ResolveTemplateStrings(string locale)
{
if (string.IsNullOrWhiteSpace(locale))
{
return TemplateStrings["en"];
}
// Try exact match first (e.g., "de")
if (TemplateStrings.TryGetValue(locale, out var exact))
{
return exact;
}
// Try extracting language code from full locale (e.g., "de-DE" -> "de")
var dash = locale.IndexOf('-');
if (dash > 0)
{
var langCode = locale[..dash];
if (TemplateStrings.TryGetValue(langCode, out var byLang))
{
return byLang;
}
}
// Fallback to English
return TemplateStrings["en"];
}
private static string BuildCveSummary(
string query,
IReadOnlyList<EntityCard> cards,
QueryPlan plan,
LocalizedTemplateStrings strings)
{
var sb = new StringBuilder();
var cveId = plan.DetectedEntities
.FirstOrDefault(static e => e.EntityType.Equals("cve", StringComparison.OrdinalIgnoreCase))?.Value;
if (!string.IsNullOrWhiteSpace(cveId))
{
sb.Append(string.Format(strings.ResultsFor, cveId));
}
var findingCount = cards.Count(static c => c.EntityType == "finding");
var vexCount = cards.Count(static c => c.EntityType == "vex_statement");
var docsCount = cards.Count(static c => c.EntityType == "docs" || c.EntityType == "api" || c.EntityType == "doctor");
var parts = new List<string>();
if (findingCount > 0)
{
parts.Add($"{findingCount} {(findingCount == 1 ? strings.FindingsSingular : strings.FindingsPlural)}");
}
if (vexCount > 0)
{
parts.Add($"{vexCount} {(vexCount == 1 ? strings.VexStatementSingular : strings.VexStatementsPlural)}");
}
if (docsCount > 0)
{
parts.Add($"{docsCount} {(docsCount == 1 ? strings.KnowledgeResultSingular : strings.KnowledgeResultsPlural)}");
}
sb.Append(string.Join(", ", parts));
sb.Append('.');
var criticalFinding = cards.FirstOrDefault(static c =>
c.EntityType == "finding" &&
c.Severity is "critical" or "high");
if (criticalFinding is not null)
{
sb.Append(' ');
sb.Append(string.Format(strings.SeverityDetected, criticalFinding.Severity?.ToUpperInvariant()));
}
return sb.ToString();
}
private static string BuildPolicySummary(IReadOnlyList<EntityCard> cards, LocalizedTemplateStrings strings)
{
var plural = cards.Count == 1 ? "" : "s";
return string.Format(strings.FoundPolicyRules, cards.Count, plural) + " " +
string.Format(strings.TopMatch, cards[0].Title);
}
private static string BuildDoctorSummary(IReadOnlyList<EntityCard> cards, LocalizedTemplateStrings strings)
{
var plural = cards.Count == 1 ? "" : "s";
return string.Format(strings.FoundDoctorChecks, cards.Count, plural) + " " +
string.Format(strings.TopMatch, cards[0].Title);
}
private static string BuildSecurityOverview(
string query,
IReadOnlyList<EntityCard> cards,
LocalizedTemplateStrings strings)
{
var findingCount = cards.Count(static c => c.EntityType == "finding");
var vexCount = cards.Count(static c => c.EntityType == "vex_statement");
var sb = new StringBuilder();
sb.Append(string.Format(strings.SecuritySearchFor, TruncateQuery(query)));
var parts = new List<string>();
if (findingCount > 0)
{
parts.Add($"{findingCount} {(findingCount == 1 ? strings.FindingsSingular : strings.FindingsPlural)}");
}
if (vexCount > 0)
{
parts.Add($"{vexCount} {(vexCount == 1 ? strings.VexStatementSingular : strings.VexStatementsPlural)}");
}
sb.Append(string.Join($" {strings.And} ", parts));
sb.Append('.');
return sb.ToString();
}
private static string BuildMixedOverview(
string query,
IReadOnlyList<EntityCard> cards,
IReadOnlyList<string> domains,
LocalizedTemplateStrings strings)
{
var resultPlural = cards.Count == 1 ? "" : "s";
var domainPlural = domains.Count == 1 ? "" : "s";
return string.Format(
strings.FoundResultsAcrossDomains,
cards.Count,
resultPlural,
domains.Count,
domainPlural,
TruncateQuery(query)) +
" " + string.Format(strings.TopMatch, cards[0].Title);
}
private static string ComputeConfidence(IReadOnlyList<EntityCard> cards, IReadOnlyList<string> domains)
{
if (cards.Count >= 3 && domains.Count >= 2)
{
return "high";
}
if (cards.Count >= 2)
{
return "medium";
}
return "low";
}
private static string TruncateQuery(string query)
{
return query.Length <= 40 ? query : query[..40] + "...";
}
/// <summary>
/// Holds all localized template strings for a single language.
/// </summary>
private sealed class LocalizedTemplateStrings
{
public string NoResultsFound { get; init; } = "No results found.";
public string ResultsFor { get; init; } = "Results for {0}: ";
public string FindingsSingular { get; init; } = "finding";
public string FindingsPlural { get; init; } = "findings";
public string VexStatementSingular { get; init; } = "VEX statement";
public string VexStatementsPlural { get; init; } = "VEX statements";
public string KnowledgeResultSingular { get; init; } = "knowledge result";
public string KnowledgeResultsPlural { get; init; } = "knowledge results";
public string SeverityDetected { get; init; } = "{0} severity finding detected.";
public string FoundPolicyRules { get; init; } = "Found {0} policy rule{1}.";
public string TopMatch { get; init; } = "Top match: {0}.";
public string FoundDoctorChecks { get; init; } = "Found {0} doctor check{1}.";
public string SecuritySearchFor { get; init; } = "Security search for \"{0}\": ";
public string FoundResultsAcrossDomains { get; init; } = "Found {0} result{1} across {2} domain{3} for \"{4}\".";
public string And { get; init; } = "and";
}
}

View File

@@ -0,0 +1,21 @@
You are a search synthesis assistant for Stella Ops, a release control platform.
Your job is to answer the user's question directly, using ONLY the provided search results as evidence.
RULES:
1. Answer in 3-5 sentences. Be concise and precise.
2. Cite your sources using bracket notation: [1], [2], etc., referencing the numbered search results.
3. Every factual claim MUST have at least one citation.
4. If the search results do not contain enough information to answer the question, say: "I don't have enough information to answer this based on the current search results."
5. Do NOT invent facts, entity keys, CVE IDs, URLs, or any information not present in the search results.
6. Do NOT mention that you are an AI or that you are synthesizing search results.
DOMAIN-SPECIFIC INSTRUCTIONS:
- Findings: When referencing findings, mention severity level (critical/high/medium/low) and remediation status if available.
- VEX Statements: When referencing VEX data, mention exploitability status (e.g., not_affected, affected, under_investigation) and justification if provided.
- Policy Rules: When referencing policy rules, mention enforcement level (enforce/warn/audit) and scope if available.
- Doctor Checks: When referencing doctor checks, mention severity and include the run command if available.
RESPONSE FORMAT:
- Plain text with inline citations in [N] format.
- Do not use markdown headers or bullet lists. Write flowing prose.
- Keep the total response under 150 words.

View File

@@ -0,0 +1,76 @@
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.AdvisoryAI.KnowledgeSearch;
namespace StellaOps.AdvisoryAI.UnifiedSearch;
internal sealed class UnifiedSearchIndexRefreshService : BackgroundService
{
private readonly KnowledgeSearchOptions _options;
private readonly UnifiedSearchIndexer _indexer;
private readonly ILogger<UnifiedSearchIndexRefreshService> _logger;
public UnifiedSearchIndexRefreshService(
IOptions<KnowledgeSearchOptions> options,
UnifiedSearchIndexer indexer,
ILogger<UnifiedSearchIndexRefreshService> logger)
{
ArgumentNullException.ThrowIfNull(options);
_options = options.Value ?? new KnowledgeSearchOptions();
_indexer = indexer ?? throw new ArgumentNullException(nameof(indexer));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
if (!_options.UnifiedAutoIndexEnabled)
{
_logger.LogDebug("Unified search auto-indexing is disabled.");
return;
}
if (_options.UnifiedAutoIndexOnStartup)
{
await SafeRebuildAsync(stoppingToken).ConfigureAwait(false);
}
var intervalSeconds = Math.Max(30, _options.UnifiedIndexRefreshIntervalSeconds);
using var timer = new PeriodicTimer(TimeSpan.FromSeconds(intervalSeconds));
while (!stoppingToken.IsCancellationRequested &&
await timer.WaitForNextTickAsync(stoppingToken).ConfigureAwait(false))
{
await SafeIndexAsync(stoppingToken).ConfigureAwait(false);
}
}
private async Task SafeRebuildAsync(CancellationToken cancellationToken)
{
try
{
var summary = await _indexer.RebuildAllAsync(cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"Unified search rebuild completed: domains={DomainCount}, chunks={ChunkCount}, duration_ms={DurationMs}",
summary.DomainCount,
summary.ChunkCount,
summary.DurationMs);
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
_logger.LogWarning(ex, "Unified search startup rebuild failed.");
}
}
private async Task SafeIndexAsync(CancellationToken cancellationToken)
{
try
{
await _indexer.IndexAllAsync(cancellationToken).ConfigureAwait(false);
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
_logger.LogWarning(ex, "Unified search periodic indexing run failed.");
}
}
}

View File

@@ -0,0 +1,219 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Npgsql;
using NpgsqlTypes;
using StellaOps.AdvisoryAI.KnowledgeSearch;
using System.Text.Json;
using System.Diagnostics;
using System.Linq;
namespace StellaOps.AdvisoryAI.UnifiedSearch;
internal sealed class UnifiedSearchIndexer : IUnifiedSearchIndexer
{
private readonly KnowledgeSearchOptions _options;
private readonly IEnumerable<ISearchIngestionAdapter> _adapters;
private readonly ILogger<UnifiedSearchIndexer> _logger;
public UnifiedSearchIndexer(
IOptions<KnowledgeSearchOptions> options,
IEnumerable<ISearchIngestionAdapter> adapters,
ILogger<UnifiedSearchIndexer> logger)
{
ArgumentNullException.ThrowIfNull(options);
_options = options.Value ?? new KnowledgeSearchOptions();
_adapters = adapters ?? throw new ArgumentNullException(nameof(adapters));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task IndexAllAsync(CancellationToken cancellationToken)
{
if (!_options.Enabled || string.IsNullOrWhiteSpace(_options.ConnectionString))
{
_logger.LogDebug("Unified search indexing skipped because configuration is incomplete.");
return;
}
foreach (var adapter in _adapters)
{
try
{
_logger.LogInformation("Unified search indexing domain '{Domain}'.", adapter.Domain);
var chunks = await adapter.ProduceChunksAsync(cancellationToken).ConfigureAwait(false);
if (chunks.Count == 0)
{
_logger.LogDebug("No chunks produced by adapter for domain '{Domain}'.", adapter.Domain);
continue;
}
await UpsertChunksAsync(chunks, cancellationToken).ConfigureAwait(false);
_logger.LogInformation("Indexed {Count} chunks for domain '{Domain}'.", chunks.Count, adapter.Domain);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to index domain '{Domain}'; continuing with other adapters.", adapter.Domain);
}
}
}
public async Task<UnifiedSearchIndexSummary> RebuildAllAsync(CancellationToken cancellationToken)
{
if (!_options.Enabled || string.IsNullOrWhiteSpace(_options.ConnectionString))
{
_logger.LogDebug("Unified search rebuild skipped because configuration is incomplete.");
return new UnifiedSearchIndexSummary(0, 0, 0);
}
var stopwatch = Stopwatch.StartNew();
var domains = 0;
var chunks = 0;
foreach (var adapter in _adapters)
{
try
{
await DeleteChunksByDomainAsync(adapter.Domain, cancellationToken).ConfigureAwait(false);
var domainChunks = await adapter.ProduceChunksAsync(cancellationToken).ConfigureAwait(false);
if (domainChunks.Count > 0)
{
await UpsertChunksAsync(domainChunks, cancellationToken).ConfigureAwait(false);
}
domains++;
chunks += domainChunks.Count;
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to rebuild domain '{Domain}'; continuing with remaining domains.", adapter.Domain);
}
}
stopwatch.Stop();
return new UnifiedSearchIndexSummary(domains, chunks, (long)stopwatch.Elapsed.TotalMilliseconds);
}
public async Task DeleteChunksByDomainAsync(string domain, CancellationToken cancellationToken)
{
if (!_options.Enabled || string.IsNullOrWhiteSpace(_options.ConnectionString))
{
return;
}
await using var dataSource = new NpgsqlDataSourceBuilder(_options.ConnectionString).Build();
const string sql = "DELETE FROM advisoryai.kb_chunk WHERE domain = @domain;";
await using var command = dataSource.CreateCommand(sql);
command.CommandTimeout = 60;
command.Parameters.AddWithValue("domain", domain);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
private async Task UpsertChunksAsync(IReadOnlyList<UnifiedChunk> chunks, CancellationToken cancellationToken)
{
await using var dataSource = new NpgsqlDataSourceBuilder(_options.ConnectionString).Build();
await using var connection = await dataSource.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
// Ensure parent documents exist for each unique DocId
var uniqueDocIds = chunks.Select(static c => c.DocId).Distinct(StringComparer.Ordinal).ToArray();
foreach (var docId in uniqueDocIds)
{
var chunk = chunks.First(c => c.DocId == docId);
await EnsureDocumentExistsAsync(connection, docId, chunk, cancellationToken).ConfigureAwait(false);
}
const string sql = """
INSERT INTO advisoryai.kb_chunk
(
chunk_id, doc_id, kind, anchor, section_path,
span_start, span_end, title, body, body_tsv,
embedding, metadata, domain, entity_key, entity_type, freshness,
indexed_at
)
VALUES
(
@chunk_id, @doc_id, @kind, @anchor, @section_path,
@span_start, @span_end, @title, @body,
setweight(to_tsvector('simple', coalesce(@title, '')), 'A') ||
setweight(to_tsvector('simple', coalesce(@section_path, '')), 'B') ||
setweight(to_tsvector('simple', coalesce(@body, '')), 'D'),
@embedding, @metadata::jsonb, @domain, @entity_key, @entity_type, @freshness,
NOW()
)
ON CONFLICT (chunk_id) DO UPDATE SET
kind = EXCLUDED.kind,
title = EXCLUDED.title,
body = EXCLUDED.body,
body_tsv = EXCLUDED.body_tsv,
embedding = EXCLUDED.embedding,
metadata = EXCLUDED.metadata,
domain = EXCLUDED.domain,
entity_key = EXCLUDED.entity_key,
entity_type = EXCLUDED.entity_type,
freshness = EXCLUDED.freshness,
indexed_at = NOW();
""";
await using var command = connection.CreateCommand();
command.CommandText = sql;
command.CommandTimeout = 120;
foreach (var chunk in chunks)
{
command.Parameters.Clear();
command.Parameters.AddWithValue("chunk_id", chunk.ChunkId);
command.Parameters.AddWithValue("doc_id", chunk.DocId);
command.Parameters.AddWithValue("kind", chunk.Kind);
command.Parameters.AddWithValue("anchor", (object?)chunk.Anchor ?? DBNull.Value);
command.Parameters.AddWithValue("section_path", (object?)chunk.SectionPath ?? DBNull.Value);
command.Parameters.AddWithValue("span_start", chunk.SpanStart);
command.Parameters.AddWithValue("span_end", chunk.SpanEnd);
command.Parameters.AddWithValue("title", chunk.Title);
command.Parameters.AddWithValue("body", chunk.Body);
command.Parameters.AddWithValue(
"embedding",
NpgsqlDbType.Array | NpgsqlDbType.Real,
chunk.Embedding is null ? Array.Empty<float>() : chunk.Embedding);
command.Parameters.AddWithValue("metadata", NpgsqlDbType.Jsonb, chunk.Metadata.RootElement.GetRawText());
command.Parameters.AddWithValue("domain", chunk.Domain);
command.Parameters.AddWithValue("entity_key", (object?)chunk.EntityKey ?? DBNull.Value);
command.Parameters.AddWithValue("entity_type", (object?)chunk.EntityType ?? DBNull.Value);
command.Parameters.AddWithValue("freshness",
chunk.Freshness.HasValue ? (object)chunk.Freshness.Value : DBNull.Value);
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
}
private static async Task EnsureDocumentExistsAsync(
NpgsqlConnection connection,
string docId,
UnifiedChunk chunk,
CancellationToken cancellationToken)
{
const string sql = """
INSERT INTO advisoryai.kb_doc
(doc_id, doc_type, product, version, source_ref, path, title, content_hash, metadata, indexed_at)
VALUES (@doc_id, @doc_type, @product, @version, @source_ref, @path, @title, @content_hash, '{}'::jsonb, NOW())
ON CONFLICT (doc_id) DO NOTHING;
""";
await using var command = connection.CreateCommand();
command.CommandText = sql;
command.CommandTimeout = 30;
command.Parameters.AddWithValue("doc_id", docId);
command.Parameters.AddWithValue("doc_type", chunk.Domain);
command.Parameters.AddWithValue("product", "stella-ops");
command.Parameters.AddWithValue("version", "local");
command.Parameters.AddWithValue("source_ref", chunk.Domain);
command.Parameters.AddWithValue("path", chunk.Kind);
command.Parameters.AddWithValue("title", chunk.Title);
command.Parameters.AddWithValue("content_hash", KnowledgeSearchText.StableId(chunk.Body));
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
}
public sealed record UnifiedSearchIndexSummary(
int DomainCount,
int ChunkCount,
long DurationMs);

View File

@@ -0,0 +1,161 @@
using System.Text.Json;
namespace StellaOps.AdvisoryAI.UnifiedSearch;
public sealed record UnifiedChunk(
string ChunkId,
string DocId,
string Kind,
string Domain,
string Title,
string Body,
float[]? Embedding,
string? EntityKey,
string? EntityType,
string? Anchor,
string? SectionPath,
int SpanStart,
int SpanEnd,
DateTimeOffset? Freshness,
JsonDocument Metadata);
public sealed record UnifiedSearchRequest(
string Q,
int? K = null,
UnifiedSearchFilter? Filters = null,
bool IncludeSynthesis = true,
bool IncludeDebug = false);
public sealed record UnifiedSearchFilter
{
public IReadOnlyList<string>? Domains { get; init; }
public IReadOnlyList<string>? EntityTypes { get; init; }
public string? EntityKey { get; init; }
public string? Product { get; init; }
public string? Version { get; init; }
public string? Service { get; init; }
public IReadOnlyList<string>? Tags { get; init; }
public string? Tenant { get; init; }
/// <summary>
/// User scopes extracted from the authenticated request context. Used by
/// <c>DomainWeightCalculator</c> to apply role-based domain biases (Sprint 106 / G6).
/// Not serialized in API responses.
/// </summary>
public IReadOnlyList<string>? UserScopes { get; init; }
}
public sealed record SearchSuggestion(string Text, string Reason);
public sealed record SearchRefinement(string Text, string Source);
public sealed record UnifiedSearchResponse(
string Query,
int TopK,
IReadOnlyList<EntityCard> Cards,
SynthesisResult? Synthesis,
UnifiedSearchDiagnostics Diagnostics,
IReadOnlyList<SearchSuggestion>? Suggestions = null,
IReadOnlyList<SearchRefinement>? Refinements = null);
public sealed record EntityCard
{
public string EntityKey { get; init; } = string.Empty;
public string EntityType { get; init; } = string.Empty;
public string Domain { get; init; } = "knowledge";
public string Title { get; init; } = string.Empty;
public string Snippet { get; init; } = string.Empty;
public double Score { get; init; }
public string? Severity { get; init; }
public IReadOnlyList<EntityCardAction> Actions { get; init; } = [];
public IReadOnlyDictionary<string, string>? Metadata { get; init; }
public IReadOnlyList<string> Sources { get; init; } = [];
public EntityCardPreview? Preview { get; init; }
}
public sealed record EntityCardPreview(
string ContentType,
string Content,
string? Language = null,
IReadOnlyList<PreviewField>? StructuredFields = null);
public sealed record PreviewField(string Label, string Value, string? Severity = null);
public sealed record EntityCardAction(
string Label,
string ActionType,
string? Route = null,
string? Command = null,
bool IsPrimary = false);
public sealed record SynthesisResult
{
public string Summary { get; init; } = string.Empty;
public string Template { get; init; } = string.Empty;
public string Confidence { get; init; } = "low";
public int SourceCount { get; init; }
public IReadOnlyList<string> DomainsCovered { get; init; } = [];
public IReadOnlyList<SynthesisCitation>? Citations { get; init; }
public double? GroundingScore { get; init; }
}
public sealed record SynthesisCitation
{
public int Index { get; init; }
public string EntityKey { get; init; } = string.Empty;
public string Title { get; init; } = string.Empty;
}
public sealed record UnifiedSearchDiagnostics(
int FtsMatches,
int VectorMatches,
int EntityCardCount,
long DurationMs,
bool UsedVector,
string Mode,
QueryPlan? Plan = null);
public sealed record QueryPlan
{
public string OriginalQuery { get; init; } = string.Empty;
public string NormalizedQuery { get; init; } = string.Empty;
public string Intent { get; init; } = "explore";
public IReadOnlyList<EntityMention> DetectedEntities { get; init; } = [];
public IReadOnlyDictionary<string, double> DomainWeights { get; init; } =
new Dictionary<string, double>(StringComparer.Ordinal);
}
public sealed record EntityMention(
string Value,
string EntityType,
int StartIndex,
int Length);

View File

@@ -0,0 +1,940 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.AdvisoryAI.KnowledgeSearch;
using StellaOps.AdvisoryAI.UnifiedSearch.Analytics;
using StellaOps.AdvisoryAI.UnifiedSearch.QueryUnderstanding;
using StellaOps.AdvisoryAI.UnifiedSearch.Synthesis;
using StellaOps.AdvisoryAI.Vectorization;
using System.Text.Json;
using System.Linq;
namespace StellaOps.AdvisoryAI.UnifiedSearch;
internal sealed class UnifiedSearchService : IUnifiedSearchService
{
private readonly KnowledgeSearchOptions _options;
private readonly IKnowledgeSearchStore _store;
private readonly IVectorEncoder _vectorEncoder;
private readonly QueryPlanBuilder _queryPlanBuilder;
private readonly ISynthesisEngine _synthesisEngine;
private readonly SearchAnalyticsService _analyticsService;
private readonly SearchQualityMonitor _qualityMonitor;
private readonly IEntityAliasService _entityAliasService;
private readonly ILogger<UnifiedSearchService> _logger;
private readonly TimeProvider _timeProvider;
private readonly IUnifiedSearchTelemetrySink? _telemetrySink;
// Cached popularity map (Sprint 106 / G6)
private IReadOnlyDictionary<string, int>? _popularityMapCache;
private DateTimeOffset _popularityMapExpiry = DateTimeOffset.MinValue;
private readonly object _popularityMapLock = new();
private static readonly TimeSpan PopularityCacheDuration = TimeSpan.FromMinutes(5);
// Refinement threshold: only suggest when result count is below this (G10-004)
private const int RefinementResultThreshold = 3;
public UnifiedSearchService(
IOptions<KnowledgeSearchOptions> options,
IKnowledgeSearchStore store,
IVectorEncoder vectorEncoder,
QueryPlanBuilder queryPlanBuilder,
ISynthesisEngine synthesisEngine,
SearchAnalyticsService analyticsService,
SearchQualityMonitor qualityMonitor,
IEntityAliasService entityAliasService,
ILogger<UnifiedSearchService> logger,
TimeProvider timeProvider,
IUnifiedSearchTelemetrySink? telemetrySink = null)
{
ArgumentNullException.ThrowIfNull(options);
_options = options.Value ?? new KnowledgeSearchOptions();
_store = store ?? throw new ArgumentNullException(nameof(store));
_vectorEncoder = vectorEncoder ?? throw new ArgumentNullException(nameof(vectorEncoder));
_queryPlanBuilder = queryPlanBuilder ?? throw new ArgumentNullException(nameof(queryPlanBuilder));
_synthesisEngine = synthesisEngine ?? throw new ArgumentNullException(nameof(synthesisEngine));
_analyticsService = analyticsService ?? throw new ArgumentNullException(nameof(analyticsService));
_qualityMonitor = qualityMonitor ?? throw new ArgumentNullException(nameof(qualityMonitor));
_entityAliasService = entityAliasService ?? throw new ArgumentNullException(nameof(entityAliasService));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_telemetrySink = telemetrySink;
}
public async Task<UnifiedSearchResponse> SearchAsync(UnifiedSearchRequest request, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(request);
var startedAt = _timeProvider.GetUtcNow();
var query = KnowledgeSearchText.NormalizeWhitespace(request.Q);
if (string.IsNullOrWhiteSpace(query))
{
return EmptyResponse(string.Empty, request.K, "empty");
}
if (!_options.Enabled || string.IsNullOrWhiteSpace(_options.ConnectionString))
{
return EmptyResponse(query, request.K, "disabled");
}
var plan = _queryPlanBuilder.Build(request);
var topK = ResolveTopK(request.K);
var timeout = TimeSpan.FromMilliseconds(Math.Max(250, _options.QueryTimeoutMs));
// Build domain-aware filter for the store query
var storeFilter = BuildStoreFilter(request.Filters);
var ftsRows = await _store.SearchFtsAsync(
query,
storeFilter,
Math.Max(topK, _options.FtsCandidateCount),
timeout,
cancellationToken).ConfigureAwait(false);
var lexicalRanks = ftsRows
.Select((row, index) => (row.ChunkId, Rank: index + 1, Row: row))
.ToDictionary(static item => item.ChunkId, static item => item, StringComparer.Ordinal);
var vectorRows = Array.Empty<(KnowledgeChunkRow Row, int Rank, double Score)>();
var usedVector = false;
try
{
var queryEmbedding = EncodeQueryEmbedding(query);
if (queryEmbedding.Length > 0)
{
var candidates = await _store.LoadVectorCandidatesAsync(
queryEmbedding,
storeFilter,
Math.Max(topK, _options.VectorScanLimit),
timeout,
cancellationToken).ConfigureAwait(false);
var rankedVectors = candidates
.Select(row => (Row: row, Score: row.Embedding is { Length: > 0 }
? KnowledgeSearchText.CosineSimilarity(queryEmbedding, row.Embedding)
: 0d))
.Where(static item => item.Score > 0d)
.OrderByDescending(static item => item.Score)
.ThenBy(static item => item.Row.ChunkId, StringComparer.Ordinal)
.Take(Math.Max(topK, _options.VectorCandidateCount))
.Select((item, index) => (item.Row, Rank: index + 1, item.Score))
.ToArray();
vectorRows = rankedVectors;
usedVector = rankedVectors.Length > 0;
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Unified search vector stage failed; continuing with lexical results only.");
}
// Load popularity map if enabled (Sprint 106 / G6)
IReadOnlyDictionary<string, int>? popularityMap = null;
var popularityWeight = 0d;
if (_options.PopularityBoostEnabled && _options.PopularityBoostWeight > 0d)
{
popularityMap = await GetPopularityMapAsync(
request.Filters?.Tenant ?? "global", cancellationToken).ConfigureAwait(false);
popularityWeight = _options.PopularityBoostWeight;
}
var merged = WeightedRrfFusion.Fuse(
plan.DomainWeights,
lexicalRanks,
vectorRows,
query,
request.Filters,
plan.DetectedEntities,
_options.UnifiedFreshnessBoostEnabled,
startedAt,
popularityMap,
popularityWeight);
var topResults = merged.Take(topK).ToArray();
var cards = topResults
.Select(item => BuildEntityCard(item.Row, item.Score, item.Debug))
.ToArray();
SynthesisResult? synthesis = null;
if (request.IncludeSynthesis && cards.Length > 0)
{
synthesis = await _synthesisEngine.SynthesizeAsync(
query, cards, plan.DetectedEntities, cancellationToken).ConfigureAwait(false);
}
// G4-003: Generate "Did you mean?" suggestions when results are sparse
IReadOnlyList<SearchSuggestion>? suggestions = null;
if (cards.Length < _options.MinFtsResultsForFuzzyFallback && _options.FuzzyFallbackEnabled)
{
suggestions = await GenerateSuggestionsAsync(
query, storeFilter, cancellationToken).ConfigureAwait(false);
}
// G10-004: Generate query refinement suggestions from feedback data
var tenantId = request.Filters?.Tenant ?? "global";
IReadOnlyList<SearchRefinement>? refinements = null;
if (cards.Length < RefinementResultThreshold)
{
refinements = await GenerateRefinementsAsync(
tenantId, query, cards.Length, cancellationToken).ConfigureAwait(false);
}
var duration = _timeProvider.GetUtcNow() - startedAt;
var response = new UnifiedSearchResponse(
query,
topK,
cards,
synthesis,
new UnifiedSearchDiagnostics(
ftsRows.Count,
vectorRows.Length,
cards.Length,
(long)duration.TotalMilliseconds,
usedVector,
usedVector ? "hybrid" : "fts-only",
plan),
suggestions,
refinements);
EmitTelemetry(plan, response, tenantId);
return response;
}
private EntityCard BuildEntityCard(
KnowledgeChunkRow row,
double score,
IReadOnlyDictionary<string, string> debug)
{
var metadata = row.Metadata.RootElement;
var domain = GetDomain(row);
var entityKey = GetMetadataString(metadata, "entity_key") ?? BuildDefaultEntityKey(row);
var entityType = GetMetadataString(metadata, "entity_type") ?? MapKindToEntityType(row.Kind);
var severity = GetMetadataString(metadata, "severity");
var snippet = string.IsNullOrWhiteSpace(row.Snippet)
? KnowledgeSearchText.BuildSnippet(row.Body, "")
: row.Snippet;
var actions = BuildActions(row, domain);
var sources = new List<string> { domain };
var preview = BuildPreview(row, domain);
return new EntityCard
{
EntityKey = entityKey,
EntityType = entityType,
Domain = domain,
Title = row.Title,
Snippet = snippet,
Score = score,
Severity = severity,
Actions = actions,
Sources = sources,
Preview = preview
};
}
private const int PreviewContentMaxLength = 2000;
private static EntityCardPreview? BuildPreview(KnowledgeChunkRow row, string domain)
{
var metadata = row.Metadata.RootElement;
switch (domain)
{
case "knowledge" when row.Kind is "md_section":
{
if (string.IsNullOrWhiteSpace(row.Body))
return null;
var content = row.Body.Length > PreviewContentMaxLength
? row.Body[..PreviewContentMaxLength]
: row.Body;
return new EntityCardPreview("markdown", content);
}
case "knowledge" when row.Kind is "api_operation":
{
var method = GetMetadataString(metadata, "method") ?? "GET";
var path = GetMetadataString(metadata, "path") ?? "/";
var service = GetMetadataString(metadata, "service") ?? "unknown";
var operationId = GetMetadataString(metadata, "operationId");
var summary = GetMetadataString(metadata, "summary");
var fields = new List<PreviewField>
{
new("Method", method.ToUpperInvariant()),
new("Path", path),
new("Service", service)
};
if (!string.IsNullOrWhiteSpace(operationId))
fields.Add(new PreviewField("Operation", operationId));
if (!string.IsNullOrWhiteSpace(summary))
fields.Add(new PreviewField("Summary", summary));
// Build parameters list from metadata if available
if (metadata.TryGetProperty("parameters", out var paramsProp) &&
paramsProp.ValueKind == JsonValueKind.String)
{
var paramsText = paramsProp.GetString();
if (!string.IsNullOrWhiteSpace(paramsText))
fields.Add(new PreviewField("Parameters", paramsText));
}
// Build curl example
var curlExample = $"curl -X {method.ToUpperInvariant()} \"$STELLAOPS_API_BASE{path}\" \\\n" +
" -H \"Authorization: Bearer $TOKEN\" \\\n" +
" -H \"Content-Type: application/json\"";
return new EntityCardPreview("structured", curlExample, Language: "bash", StructuredFields: fields);
}
case "knowledge" when row.Kind is "doctor_check":
{
var checkCode = GetMetadataString(metadata, "checkCode") ?? row.Title;
var doctorSeverity = GetMetadataString(metadata, "severity") ?? "info";
var symptoms = GetMetadataString(metadata, "symptoms");
var remediation = GetMetadataString(metadata, "remediation");
var runCommand = GetMetadataString(metadata, "runCommand") ??
$"stella doctor run --check {checkCode}";
var control = GetMetadataString(metadata, "control") ?? "safe";
var fields = new List<PreviewField>
{
new("Severity", doctorSeverity, doctorSeverity),
new("Check Code", checkCode)
};
if (!string.IsNullOrWhiteSpace(symptoms))
fields.Add(new PreviewField("Symptoms", symptoms));
if (!string.IsNullOrWhiteSpace(remediation))
fields.Add(new PreviewField("Remediation", remediation));
fields.Add(new PreviewField("Control", control));
return new EntityCardPreview("structured", runCommand, Language: "bash", StructuredFields: fields);
}
case "findings":
{
var cveId = GetMetadataString(metadata, "cveId") ?? row.Title;
var findingSeverity = GetMetadataString(metadata, "severity") ?? "unknown";
var cvssScore = GetMetadataString(metadata, "cvssScore");
var affectedPackage = GetMetadataString(metadata, "affectedPackage");
var affectedVersions = GetMetadataString(metadata, "affectedVersions");
var reachability = GetMetadataString(metadata, "reachability");
var vexStatus = GetMetadataString(metadata, "vexStatus");
var policyBadge = GetMetadataString(metadata, "policyBadge");
var remediationHint = GetMetadataString(metadata, "remediationHint");
var fields = new List<PreviewField>
{
new("CVE ID", cveId),
new("Severity", findingSeverity, findingSeverity)
};
if (!string.IsNullOrWhiteSpace(cvssScore))
fields.Add(new PreviewField("CVSS", cvssScore));
if (!string.IsNullOrWhiteSpace(affectedPackage))
fields.Add(new PreviewField("Package", affectedPackage));
if (!string.IsNullOrWhiteSpace(affectedVersions))
fields.Add(new PreviewField("Versions", affectedVersions));
if (!string.IsNullOrWhiteSpace(reachability))
fields.Add(new PreviewField("Reachability", reachability));
if (!string.IsNullOrWhiteSpace(vexStatus))
fields.Add(new PreviewField("VEX Status", vexStatus));
if (!string.IsNullOrWhiteSpace(policyBadge))
fields.Add(new PreviewField("Policy", policyBadge));
var content = !string.IsNullOrWhiteSpace(remediationHint)
? remediationHint
: string.Empty;
return new EntityCardPreview("structured", content, StructuredFields: fields);
}
default:
return null;
}
}
private static IReadOnlyList<EntityCardAction> BuildActions(KnowledgeChunkRow row, string domain)
{
var actions = new List<EntityCardAction>();
var metadata = row.Metadata.RootElement;
switch (domain)
{
case "knowledge" when row.Kind == "api_operation":
{
var method = GetMetadataString(metadata, "method") ?? "GET";
var path = GetMetadataString(metadata, "path") ?? "/";
var service = GetMetadataString(metadata, "service") ?? "unknown";
var operationId = GetMetadataString(metadata, "operationId") ?? row.Title;
actions.Add(new EntityCardAction(
"Open",
"navigate",
$"/ops/integrations?q={Uri.EscapeDataString(operationId)}",
null,
true));
actions.Add(new EntityCardAction(
"Curl",
"copy",
null,
$"curl -X {method.ToUpperInvariant()} \"$STELLAOPS_API_BASE{path}\"",
false));
break;
}
case "knowledge" when row.Kind == "doctor_check":
{
var checkCode = GetMetadataString(metadata, "checkCode") ?? row.Title;
var runCommand = GetMetadataString(metadata, "runCommand") ??
$"stella doctor run --check {checkCode}";
actions.Add(new EntityCardAction(
"Run",
"run",
$"/ops/operations/doctor?check={Uri.EscapeDataString(checkCode)}",
runCommand,
true));
break;
}
case "knowledge":
{
var docPath = GetMetadataString(metadata, "path") ?? string.Empty;
var anchor = row.Anchor ?? GetMetadataString(metadata, "anchor") ?? "overview";
actions.Add(new EntityCardAction(
"Open",
"navigate",
$"/docs/{Uri.EscapeDataString(docPath)}#{Uri.EscapeDataString(anchor)}",
null,
true));
break;
}
case "findings":
{
var cveId = GetMetadataString(metadata, "cveId") ?? row.Title;
actions.Add(new EntityCardAction(
"View Finding",
"navigate",
$"/security/triage?q={Uri.EscapeDataString(cveId)}",
null,
true));
actions.Add(new EntityCardAction(
"Copy CVE",
"copy",
null,
cveId,
false));
break;
}
case "vex":
{
var cveId = GetMetadataString(metadata, "cveId") ?? row.Title;
actions.Add(new EntityCardAction(
"View VEX",
"navigate",
$"/security/advisories-vex?q={Uri.EscapeDataString(cveId)}",
null,
true));
break;
}
case "policy":
{
var ruleId = GetMetadataString(metadata, "ruleId") ?? row.Title;
actions.Add(new EntityCardAction(
"View Rule",
"navigate",
$"/ops/policy/baselines?q={Uri.EscapeDataString(ruleId)}",
null,
true));
break;
}
case "platform":
{
var route = GetMetadataString(metadata, "route") ?? "/ops";
actions.Add(new EntityCardAction(
"Open",
"navigate",
route,
null,
true));
break;
}
default:
{
actions.Add(new EntityCardAction(
"Details",
"details",
null,
null,
true));
break;
}
}
return actions;
}
private static string GetDomain(KnowledgeChunkRow row)
{
var metadata = row.Metadata.RootElement;
if (metadata.TryGetProperty("domain", out var domainProp) &&
domainProp.ValueKind == JsonValueKind.String)
{
return domainProp.GetString() ?? "knowledge";
}
return row.Kind switch
{
"finding" => "findings",
"vex_statement" => "vex",
"policy_rule" => "policy",
"platform_entity" => "platform",
_ => "knowledge"
};
}
private static string BuildDefaultEntityKey(KnowledgeChunkRow row)
{
return $"{row.Kind}:{row.ChunkId[..Math.Min(16, row.ChunkId.Length)]}";
}
private static string MapKindToEntityType(string kind)
{
return kind switch
{
"md_section" => "docs",
"api_operation" => "api",
"doctor_check" => "doctor",
"finding" => "finding",
"vex_statement" => "vex_statement",
"policy_rule" => "policy_rule",
"platform_entity" => "platform_entity",
_ => kind
};
}
private KnowledgeSearchFilter? BuildStoreFilter(UnifiedSearchFilter? unifiedFilter)
{
if (unifiedFilter is null)
{
return new KnowledgeSearchFilter
{
Tenant = "global"
};
}
var kinds = new List<string>();
if (unifiedFilter.Domains is { Count: > 0 })
{
foreach (var domain in unifiedFilter.Domains)
{
switch (domain)
{
case "knowledge":
kinds.AddRange(["docs", "api", "doctor"]);
break;
case "findings":
kinds.Add("finding");
break;
case "vex":
kinds.Add("vex_statement");
break;
case "policy":
kinds.Add("policy_rule");
break;
case "platform":
kinds.Add("platform_entity");
break;
default:
throw new ArgumentException(
$"Unsupported filter domain '{domain}'. Supported values: knowledge, findings, vex, policy, platform.",
nameof(unifiedFilter));
}
}
}
if (unifiedFilter.EntityTypes is { Count: > 0 })
{
foreach (var entityType in unifiedFilter.EntityTypes)
{
var kind = entityType switch
{
"docs" => "md_section",
"api" => "api_operation",
"doctor" => "doctor_check",
"finding" => "finding",
"vex_statement" => "vex_statement",
"policy_rule" => "policy_rule",
"platform_entity" => "platform_entity",
_ => null
};
if (kind is null)
{
throw new ArgumentException(
$"Unsupported filter entityType '{entityType}'. Supported values: docs, api, doctor, finding, vex_statement, policy_rule, platform_entity.",
nameof(unifiedFilter));
}
if (!kinds.Contains(kind, StringComparer.OrdinalIgnoreCase))
{
kinds.Add(kind);
}
}
}
return new KnowledgeSearchFilter
{
Type = kinds.Count > 0 ? kinds.Distinct(StringComparer.OrdinalIgnoreCase).ToArray() : null,
Product = unifiedFilter.Product,
Version = unifiedFilter.Version,
Service = unifiedFilter.Service,
Tags = unifiedFilter.Tags,
Tenant = string.IsNullOrWhiteSpace(unifiedFilter.Tenant) ? "global" : unifiedFilter.Tenant
};
}
private float[] EncodeQueryEmbedding(string query)
{
var raw = _vectorEncoder.Encode(query);
if (raw.Length == 0)
{
return raw;
}
var dimensions = Math.Max(1, _options.VectorDimensions);
var normalized = new float[dimensions];
var copy = Math.Min(raw.Length, dimensions);
Array.Copy(raw, normalized, copy);
var norm = 0d;
for (var index = 0; index < normalized.Length; index++)
{
norm += normalized[index] * normalized[index];
}
if (norm <= 0d)
{
return normalized;
}
var magnitude = Math.Sqrt(norm);
for (var index = 0; index < normalized.Length; index++)
{
normalized[index] = (float)(normalized[index] / magnitude);
}
return normalized;
}
private int ResolveTopK(int? requested)
{
var fallback = Math.Max(1, _options.DefaultTopK);
if (!requested.HasValue)
{
return fallback;
}
return Math.Clamp(requested.Value, 1, 100);
}
private UnifiedSearchResponse EmptyResponse(string query, int? topK, string mode)
{
return new UnifiedSearchResponse(
query,
ResolveTopK(topK),
[],
null,
new UnifiedSearchDiagnostics(0, 0, 0, 0, false, mode));
}
private static string? GetMetadataString(JsonElement metadata, string propertyName)
{
if (metadata.ValueKind != JsonValueKind.Object ||
!metadata.TryGetProperty(propertyName, out var value) ||
value.ValueKind != JsonValueKind.String)
{
return null;
}
return value.GetString();
}
/// <summary>
/// Generates "Did you mean?" suggestions by querying the trigram fuzzy index
/// and extracting the most relevant distinct titles from the fuzzy matches.
/// Returns up to 3 suggestions ordered by similarity, or null if none found.
/// </summary>
private async Task<IReadOnlyList<SearchSuggestion>?> GenerateSuggestionsAsync(
string query,
KnowledgeSearchFilter? storeFilter,
CancellationToken cancellationToken)
{
const int maxSuggestions = 3;
try
{
var timeout = TimeSpan.FromMilliseconds(Math.Max(250, _options.QueryTimeoutMs));
var fuzzyRows = await _store.SearchFuzzyAsync(
query,
storeFilter,
maxSuggestions * 3, // Fetch extra candidates to allow deduplication
_options.FuzzySimilarityThreshold,
timeout,
cancellationToken).ConfigureAwait(false);
if (fuzzyRows.Count == 0)
{
return null;
}
// Extract distinct suggestion terms from fuzzy match titles.
// Each fuzzy row matched via trigram similarity, so its title
// represents what the user likely intended to search for.
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var suggestions = new List<SearchSuggestion>();
foreach (var row in fuzzyRows)
{
var text = ExtractSuggestionText(row, query);
if (string.IsNullOrWhiteSpace(text) || !seen.Add(text))
{
continue;
}
suggestions.Add(new SearchSuggestion(text, $"Similar to \"{query}\""));
if (suggestions.Count >= maxSuggestions)
{
break;
}
}
return suggestions.Count > 0 ? suggestions : null;
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to generate search suggestions for query '{Query}'.", query);
return null;
}
}
/// <summary>
/// Extracts a clean suggestion text from a fuzzy-matched row.
/// Prefers the row title, normalized and truncated to a reasonable length.
/// Skips suggestions that are identical (case-insensitive) to the original query.
/// </summary>
private static string? ExtractSuggestionText(KnowledgeChunkRow row, string originalQuery)
{
var title = row.Title?.Trim();
if (string.IsNullOrWhiteSpace(title))
{
return null;
}
// If the title is very long, extract the most relevant portion
if (title.Length > 60)
{
title = title[..60].TrimEnd();
}
// Skip if suggestion is identical to the original query
if (title.Equals(originalQuery, StringComparison.OrdinalIgnoreCase))
{
return null;
}
return title;
}
/// <summary>
/// Returns a cached popularity map (entity_key -> click_count) for the given tenant.
/// The map is refreshed every 5 minutes to avoid per-query DB hits.
/// </summary>
private async Task<IReadOnlyDictionary<string, int>?> GetPopularityMapAsync(
string tenantId, CancellationToken cancellationToken)
{
var now = _timeProvider.GetUtcNow();
lock (_popularityMapLock)
{
if (_popularityMapCache is not null && now < _popularityMapExpiry)
{
return _popularityMapCache;
}
}
try
{
var map = await _analyticsService.GetPopularityMapAsync(tenantId, 30, cancellationToken)
.ConfigureAwait(false);
lock (_popularityMapLock)
{
_popularityMapCache = map;
_popularityMapExpiry = now + PopularityCacheDuration;
}
return map;
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to load popularity map for tenant '{Tenant}'.", tenantId);
return null;
}
}
/// <summary>
/// Generates query refinement suggestions when search results are sparse or empty.
/// Checks three sources in order:
/// 1. Resolved quality alerts for similar queries (the resolution text becomes the refinement).
/// 2. Search history for successful queries that are similar to the current query.
/// 3. Entity aliases — if the query matches a known alias, suggest the canonical entity key.
/// Returns up to 3 refinements, or null if none found.
/// Sprint: G10-004
/// </summary>
private async Task<IReadOnlyList<SearchRefinement>?> GenerateRefinementsAsync(
string tenantId, string query, int resultCount, CancellationToken ct)
{
if (resultCount >= RefinementResultThreshold)
{
return null;
}
var refinements = new List<SearchRefinement>();
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
const int maxRefinements = 3;
try
{
// 1. Check resolved alerts for similar queries
var resolvedAlerts = await _qualityMonitor.GetAlertsAsync(
tenantId, status: "resolved", limit: 50, ct: ct).ConfigureAwait(false);
foreach (var alert in resolvedAlerts)
{
if (refinements.Count >= maxRefinements) break;
if (string.IsNullOrWhiteSpace(alert.Resolution)) continue;
var similarity = TrigramSimilarity(query, alert.Query);
if (similarity < 0.2) continue;
var text = alert.Resolution.Trim();
if (text.Length > 120) text = text[..120].TrimEnd();
if (seen.Add(text))
{
refinements.Add(new SearchRefinement(text, "resolved_alert"));
}
}
// 2. Check search_history for successful similar queries (via pg_trgm)
if (refinements.Count < maxRefinements)
{
var similarQueries = await _analyticsService.FindSimilarSuccessfulQueriesAsync(
tenantId, query, maxRefinements - refinements.Count, ct).ConfigureAwait(false);
foreach (var similarQuery in similarQueries)
{
if (refinements.Count >= maxRefinements) break;
if (seen.Add(similarQuery))
{
refinements.Add(new SearchRefinement(similarQuery, "similar_successful_query"));
}
}
}
// 3. Check entity aliases — if the query matches a known alias, suggest the canonical key
if (refinements.Count < maxRefinements)
{
var aliasMatches = await _entityAliasService.ResolveAliasesAsync(query, ct).ConfigureAwait(false);
foreach (var (entityKey, _) in aliasMatches)
{
if (refinements.Count >= maxRefinements) break;
if (!string.IsNullOrWhiteSpace(entityKey) && seen.Add(entityKey))
{
refinements.Add(new SearchRefinement(entityKey, "entity_alias"));
}
}
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to generate query refinements for '{Query}'.", query);
}
return refinements.Count > 0 ? refinements : null;
}
/// <summary>
/// Computes Jaccard similarity over character trigrams of two strings.
/// Used as an in-memory approximation of PostgreSQL pg_trgm similarity().
/// </summary>
internal static double TrigramSimilarity(string a, string b)
{
if (string.IsNullOrWhiteSpace(a) || string.IsNullOrWhiteSpace(b))
{
return 0d;
}
var trigramsA = GetTrigrams(a.ToLowerInvariant());
var trigramsB = GetTrigrams(b.ToLowerInvariant());
var intersection = trigramsA.Intersect(trigramsB).Count();
var union = trigramsA.Union(trigramsB).Count();
return union == 0 ? 0d : (double)intersection / union;
}
private static HashSet<string> GetTrigrams(string value)
{
var trigrams = new HashSet<string>(StringComparer.Ordinal);
// Pad the value to generate edge trigrams (matching pg_trgm behavior)
var padded = $" {value} ";
for (var i = 0; i <= padded.Length - 3; i++)
{
trigrams.Add(padded.Substring(i, 3));
}
return trigrams;
}
private void EmitTelemetry(QueryPlan plan, UnifiedSearchResponse response, string tenant)
{
if (_telemetrySink is null)
{
return;
}
var topDomains = response.Cards
.Take(5)
.Select(static card => card.Domain)
.Where(static domain => !string.IsNullOrWhiteSpace(domain))
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(static domain => domain, StringComparer.OrdinalIgnoreCase)
.ToArray();
_telemetrySink.Record(new UnifiedSearchTelemetryEvent(
Tenant: tenant,
QueryHash: UnifiedSearchTelemetryHash.HashQuery(response.Query),
Intent: plan.Intent,
ResultCount: response.Cards.Count,
DurationMs: response.Diagnostics.DurationMs,
UsedVector: response.Diagnostics.UsedVector,
DomainWeights: new Dictionary<string, double>(plan.DomainWeights, StringComparer.Ordinal),
TopDomains: topDomains));
}
}

View File

@@ -0,0 +1,75 @@
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using Microsoft.Extensions.Hosting;
using StellaOps.AdvisoryAI.UnifiedSearch.Adapters;
using StellaOps.AdvisoryAI.UnifiedSearch.Analytics;
using StellaOps.AdvisoryAI.UnifiedSearch.QueryUnderstanding;
using StellaOps.AdvisoryAI.UnifiedSearch.Synthesis;
namespace StellaOps.AdvisoryAI.UnifiedSearch;
public static class UnifiedSearchServiceCollectionExtensions
{
public static IServiceCollection AddUnifiedSearch(
this IServiceCollection services,
IConfiguration configuration)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
// Query understanding pipeline
services.TryAddSingleton<EntityExtractor>();
services.TryAddSingleton<IntentClassifier>();
services.TryAddSingleton<DomainWeightCalculator>();
services.TryAddSingleton<QueryPlanBuilder>();
// Search analytics and history (Sprint 106 / G6)
services.TryAddSingleton<SearchAnalyticsService>();
// Search quality monitoring and feedback (Sprint 110 / G10)
services.TryAddSingleton<SearchQualityMonitor>();
// Synthesis (Sprint 104 / G3 — LLM-grounded synthesis with template fallback)
services.TryAddSingleton<SynthesisTemplateEngine>();
services.TryAddSingleton<LlmSynthesisEngine>();
services.TryAddSingleton<CompositeSynthesisEngine>();
services.TryAddSingleton<ISynthesisEngine>(provider =>
provider.GetRequiredService<CompositeSynthesisEngine>());
// Entity alias service
services.TryAddSingleton<IEntityAliasService, EntityAliasService>();
// Snapshot-based ingestion adapters (static fixture data)
services.AddSingleton<ISearchIngestionAdapter, FindingIngestionAdapter>();
services.AddSingleton<ISearchIngestionAdapter, VexStatementIngestionAdapter>();
services.AddSingleton<ISearchIngestionAdapter, PolicyRuleIngestionAdapter>();
services.AddSingleton<ISearchIngestionAdapter, PlatformCatalogIngestionAdapter>();
// Live data adapters (Sprint 103 / G2) -- call upstream microservices with snapshot fallback
services.AddSingleton<ISearchIngestionAdapter, FindingsSearchAdapter>();
services.AddSingleton<ISearchIngestionAdapter, VexSearchAdapter>();
services.AddSingleton<ISearchIngestionAdapter, PolicySearchAdapter>();
// Named HttpClients for live adapters
services.AddHttpClient("scanner-internal");
services.AddHttpClient("vex-internal");
services.AddHttpClient("policy-internal");
// Named HttpClient for LLM synthesis (Sprint 104 / G3)
services.AddHttpClient("llm-synthesis");
// Indexer
services.TryAddSingleton<UnifiedSearchIndexer>();
services.TryAddSingleton<IUnifiedSearchIndexer>(provider => provider.GetRequiredService<UnifiedSearchIndexer>());
services.TryAddEnumerable(ServiceDescriptor.Singleton<IHostedService, UnifiedSearchIndexRefreshService>());
// Telemetry
services.TryAddSingleton<IUnifiedSearchTelemetrySink, LoggingUnifiedSearchTelemetrySink>();
// Core search service
services.TryAddSingleton<IUnifiedSearchService, UnifiedSearchService>();
return services;
}
}

View File

@@ -0,0 +1,69 @@
using Microsoft.Extensions.Logging;
using System.Globalization;
using System.Security.Cryptography;
using System.Text;
using System.Linq;
namespace StellaOps.AdvisoryAI.UnifiedSearch;
public sealed record UnifiedSearchTelemetryEvent(
string Tenant,
string QueryHash,
string Intent,
int ResultCount,
long DurationMs,
bool UsedVector,
IReadOnlyDictionary<string, double> DomainWeights,
IReadOnlyList<string> TopDomains);
public interface IUnifiedSearchTelemetrySink
{
void Record(UnifiedSearchTelemetryEvent telemetryEvent);
}
internal sealed class LoggingUnifiedSearchTelemetrySink : IUnifiedSearchTelemetrySink
{
private readonly ILogger<LoggingUnifiedSearchTelemetrySink> _logger;
public LoggingUnifiedSearchTelemetrySink(ILogger<LoggingUnifiedSearchTelemetrySink> logger)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public void Record(UnifiedSearchTelemetryEvent telemetryEvent)
{
ArgumentNullException.ThrowIfNull(telemetryEvent);
var weights = string.Join(
",",
telemetryEvent.DomainWeights
.OrderBy(static pair => pair.Key, StringComparer.Ordinal)
.Select(static pair => $"{pair.Key}:{pair.Value.ToString("F3", CultureInfo.InvariantCulture)}"));
var topDomains = telemetryEvent.TopDomains.Count == 0
? "-"
: string.Join(",", telemetryEvent.TopDomains.OrderBy(static value => value, StringComparer.Ordinal));
_logger.LogInformation(
"unified_search telemetry tenant={Tenant} query_hash={QueryHash} intent={Intent} results={ResultCount} duration_ms={DurationMs} used_vector={UsedVector} top_domains={TopDomains} weights={Weights}",
telemetryEvent.Tenant,
telemetryEvent.QueryHash,
telemetryEvent.Intent,
telemetryEvent.ResultCount,
telemetryEvent.DurationMs,
telemetryEvent.UsedVector,
topDomains,
weights);
}
}
internal static class UnifiedSearchTelemetryHash
{
public static string HashQuery(string query)
{
ArgumentNullException.ThrowIfNull(query);
var bytes = Encoding.UTF8.GetBytes(query);
var hash = SHA256.HashData(bytes);
return Convert.ToHexString(hash).ToLowerInvariant();
}
}

View File

@@ -0,0 +1,245 @@
using StellaOps.AdvisoryAI.KnowledgeSearch;
namespace StellaOps.AdvisoryAI.UnifiedSearch;
internal static class WeightedRrfFusion
{
private const int ReciprocalRankConstant = 60;
private const double EntityProximityBoost = 0.8;
private const double MaxFreshnessBoost = 0.05;
private const int FreshnessDaysCap = 365;
public static IReadOnlyList<(KnowledgeChunkRow Row, double Score, IReadOnlyDictionary<string, string> Debug)> Fuse(
IReadOnlyDictionary<string, double> domainWeights,
IReadOnlyDictionary<string, (string ChunkId, int Rank, KnowledgeChunkRow Row)> lexicalRanks,
IReadOnlyList<(KnowledgeChunkRow Row, int Rank, double Score)> vectorRanks,
string query,
UnifiedSearchFilter? filters,
IReadOnlyList<EntityMention>? detectedEntities = null,
bool enableFreshnessBoost = false,
DateTimeOffset? referenceTime = null,
IReadOnlyDictionary<string, int>? popularityMap = null,
double popularityBoostWeight = 0.0)
{
var merged = new Dictionary<string, (KnowledgeChunkRow Row, double Score, Dictionary<string, string> Debug)>(StringComparer.Ordinal);
foreach (var lexical in lexicalRanks.Values)
{
var domainWeight = GetDomainWeight(domainWeights, lexical.Row);
var score = domainWeight * ReciprocalRank(lexical.Rank);
var debug = new Dictionary<string, string>(StringComparer.Ordinal)
{
["lexicalRank"] = lexical.Rank.ToString(),
["lexicalScore"] = lexical.Row.LexicalScore.ToString("F6", System.Globalization.CultureInfo.InvariantCulture),
["domainWeight"] = domainWeight.ToString("F4", System.Globalization.CultureInfo.InvariantCulture)
};
merged[lexical.ChunkId] = (lexical.Row, score, debug);
}
foreach (var vector in vectorRanks)
{
if (!merged.TryGetValue(vector.Row.ChunkId, out var existing))
{
var domainWeight = GetDomainWeight(domainWeights, vector.Row);
existing = (vector.Row, 0d, new Dictionary<string, string>(StringComparer.Ordinal)
{
["domainWeight"] = domainWeight.ToString("F4", System.Globalization.CultureInfo.InvariantCulture)
});
}
var vecDomainWeight = GetDomainWeight(domainWeights, vector.Row);
existing.Score += vecDomainWeight * ReciprocalRank(vector.Rank);
existing.Debug["vectorRank"] = vector.Rank.ToString();
existing.Debug["vectorScore"] = vector.Score.ToString("F6", System.Globalization.CultureInfo.InvariantCulture);
merged[vector.Row.ChunkId] = existing;
}
var ranked = merged.Values
.Select(item =>
{
var entityBoost = ComputeEntityProximityBoost(item.Row, detectedEntities);
var freshnessBoost = enableFreshnessBoost
? ComputeFreshnessBoost(item.Row, referenceTime ?? DateTimeOffset.UnixEpoch)
: 0d;
var popBoost = ComputePopularityBoost(item.Row, popularityMap, popularityBoostWeight);
item.Score += entityBoost + freshnessBoost + popBoost;
item.Debug["entityBoost"] = entityBoost.ToString("F6", System.Globalization.CultureInfo.InvariantCulture);
item.Debug["freshnessBoost"] = freshnessBoost.ToString("F6", System.Globalization.CultureInfo.InvariantCulture);
item.Debug["popularityBoost"] = popBoost.ToString("F6", System.Globalization.CultureInfo.InvariantCulture);
item.Debug["chunkId"] = item.Row.ChunkId;
return item;
})
.OrderByDescending(static item => item.Score)
.ThenBy(static item => item.Row.Kind, StringComparer.Ordinal)
.ThenBy(static item => item.Row.ChunkId, StringComparer.Ordinal)
.Select(static item => (item.Row, item.Score, (IReadOnlyDictionary<string, string>)item.Debug))
.ToArray();
return ranked;
}
private static double ReciprocalRank(int rank)
{
if (rank <= 0)
{
return 0d;
}
return 1d / (ReciprocalRankConstant + rank);
}
private static double GetDomainWeight(IReadOnlyDictionary<string, double> domainWeights, KnowledgeChunkRow row)
{
var domain = GetRowDomain(row);
return domainWeights.TryGetValue(domain, out var weight) ? weight : 1.0;
}
private static string GetRowDomain(KnowledgeChunkRow row)
{
if (row.Metadata.RootElement.TryGetProperty("domain", out var domainProp) &&
domainProp.ValueKind == System.Text.Json.JsonValueKind.String)
{
return domainProp.GetString() ?? "knowledge";
}
return row.Kind switch
{
"finding" => "findings",
"vex_statement" => "vex",
"policy_rule" => "policy",
"platform_entity" => "platform",
"md_section" => "knowledge",
"api_operation" => "knowledge",
"doctor_check" => "knowledge",
_ => "knowledge"
};
}
private static double ComputeEntityProximityBoost(
KnowledgeChunkRow row,
IReadOnlyList<EntityMention>? detectedEntities)
{
if (detectedEntities is not { Count: > 0 })
{
return 0d;
}
var metadata = row.Metadata.RootElement;
if (metadata.ValueKind != System.Text.Json.JsonValueKind.Object)
{
return 0d;
}
// Check entity_key match
if (metadata.TryGetProperty("entity_key", out var entityKeyProp) &&
entityKeyProp.ValueKind == System.Text.Json.JsonValueKind.String)
{
var entityKey = entityKeyProp.GetString();
if (!string.IsNullOrWhiteSpace(entityKey))
{
foreach (var mention in detectedEntities)
{
if (entityKey.Contains(mention.Value, StringComparison.OrdinalIgnoreCase))
{
return EntityProximityBoost;
}
}
}
}
// Check cveId in metadata
if (metadata.TryGetProperty("cveId", out var cveIdProp) &&
cveIdProp.ValueKind == System.Text.Json.JsonValueKind.String)
{
var cveId = cveIdProp.GetString();
if (!string.IsNullOrWhiteSpace(cveId))
{
foreach (var mention in detectedEntities)
{
if (cveId.Equals(mention.Value, StringComparison.OrdinalIgnoreCase))
{
return EntityProximityBoost;
}
}
}
}
return 0d;
}
private static double ComputeFreshnessBoost(KnowledgeChunkRow row, DateTimeOffset referenceTime)
{
var metadata = row.Metadata.RootElement;
if (metadata.ValueKind != System.Text.Json.JsonValueKind.Object)
{
return 0d;
}
if (!metadata.TryGetProperty("freshness", out var freshnessProp) ||
freshnessProp.ValueKind != System.Text.Json.JsonValueKind.String)
{
return 0d;
}
if (!DateTimeOffset.TryParse(freshnessProp.GetString(), out var freshness))
{
return 0d;
}
var daysSinceFresh = (referenceTime - freshness).TotalDays;
if (daysSinceFresh < 0)
{
daysSinceFresh = 0;
}
if (daysSinceFresh >= FreshnessDaysCap)
{
return 0d;
}
return MaxFreshnessBoost * (1d - daysSinceFresh / FreshnessDaysCap);
}
/// <summary>
/// Computes an additive popularity boost based on click-through frequency.
/// Uses a logarithmic function to provide diminishing returns for very popular items,
/// preventing feedback loops.
/// </summary>
private static double ComputePopularityBoost(
KnowledgeChunkRow row,
IReadOnlyDictionary<string, int>? popularityMap,
double popularityBoostWeight)
{
if (popularityMap is null || popularityMap.Count == 0 || popularityBoostWeight <= 0d)
{
return 0d;
}
var metadata = row.Metadata.RootElement;
if (metadata.ValueKind != System.Text.Json.JsonValueKind.Object)
{
return 0d;
}
string? entityKey = null;
if (metadata.TryGetProperty("entity_key", out var entityKeyProp) &&
entityKeyProp.ValueKind == System.Text.Json.JsonValueKind.String)
{
entityKey = entityKeyProp.GetString();
}
if (string.IsNullOrWhiteSpace(entityKey))
{
return 0d;
}
if (!popularityMap.TryGetValue(entityKey, out var clickCount) || clickCount <= 0)
{
return 0d;
}
// Logarithmic boost: log2(1 + clickCount) * weight
return Math.Log2(1 + clickCount) * popularityBoostWeight;
}
}

View File

@@ -0,0 +1,380 @@
// ---------------------------------------------------------------------------
// OnnxVectorEncoder — Semantic vector encoder using ONNX Runtime inference.
//
// NuGet dependency required (not yet added to .csproj):
// <PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.17.*" />
//
// This implementation is structured for the all-MiniLM-L6-v2 sentence-transformer
// model. It performs simplified WordPiece tokenization, ONNX inference, mean-pooling,
// and L2-normalization to produce 384-dimensional embedding vectors.
//
// Until the OnnxRuntime NuGet package is installed, the encoder operates in
// "stub" mode: it falls back to a deterministic projection that preserves the
// correct 384-dim output shape and L2-normalization contract. The stub uses
// character n-gram hashing to produce vectors that are structurally valid but
// lack true semantic quality. When the ONNX runtime is available and the model
// file exists, true inference takes over automatically.
// ---------------------------------------------------------------------------
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
namespace StellaOps.AdvisoryAI.Vectorization;
/// <summary>
/// Semantic vector encoder that produces 384-dimensional embeddings using an ONNX
/// sentence-transformer model (e.g. all-MiniLM-L6-v2). Thread-safe and disposable.
/// Falls back to a deterministic character-ngram projection when the ONNX runtime
/// or model file is unavailable.
/// </summary>
internal sealed class OnnxVectorEncoder : IVectorEncoder, IDisposable
{
/// <summary>Output dimensionality matching the all-MiniLM-L6-v2 model.</summary>
internal const int OutputDimensions = 384;
/// <summary>Maximum token sequence length accepted by the model.</summary>
private const int MaxSequenceLength = 512;
private static readonly Regex WordTokenRegex = new(
@"[\w]+|[^\s\w]",
RegexOptions.Compiled | RegexOptions.CultureInvariant);
private readonly ILogger<OnnxVectorEncoder> _logger;
private readonly string _modelPath;
private readonly bool _onnxAvailable;
private readonly object? _onnxSession; // Microsoft.ML.OnnxRuntime.InferenceSession when available
private volatile bool _disposed;
/// <summary>
/// Gets a value indicating whether this encoder is using true ONNX inference
/// or the deterministic fallback projection.
/// </summary>
public bool IsOnnxInferenceActive => _onnxAvailable && _onnxSession is not null;
public OnnxVectorEncoder(string modelPath, ILogger<OnnxVectorEncoder> logger)
{
ArgumentNullException.ThrowIfNull(logger);
_logger = logger;
_modelPath = modelPath ?? string.Empty;
_onnxAvailable = TryLoadOnnxSession(_modelPath, out _onnxSession);
if (_onnxAvailable)
{
_logger.LogInformation(
"ONNX vector encoder initialized with model at {ModelPath}. Semantic inference is active.",
_modelPath);
}
else
{
_logger.LogWarning(
"ONNX vector encoder could not load model at {ModelPath}. " +
"Using deterministic character-ngram fallback. Semantic search quality will be reduced.",
_modelPath);
}
}
public float[] Encode(string text)
{
ObjectDisposedException.ThrowIf(_disposed, this);
ArgumentNullException.ThrowIfNull(text);
if (_onnxAvailable && _onnxSession is not null)
{
return RunOnnxInference(text);
}
return FallbackEncode(text);
}
public void Dispose()
{
if (_disposed) return;
_disposed = true;
if (_onnxSession is IDisposable disposable)
{
disposable.Dispose();
}
}
// ------------------------------------------------------------------
// ONNX Runtime inference path (requires Microsoft.ML.OnnxRuntime)
// ------------------------------------------------------------------
/// <summary>
/// Attempts to load the ONNX model via reflection so the code compiles
/// without a hard dependency on the OnnxRuntime NuGet package.
/// </summary>
private bool TryLoadOnnxSession(string modelPath, out object? session)
{
session = null;
if (string.IsNullOrWhiteSpace(modelPath) || !File.Exists(modelPath))
{
_logger.LogDebug("ONNX model file not found at {ModelPath}.", modelPath);
return false;
}
try
{
// Attempt to load OnnxRuntime via reflection.
// This allows the code to compile and run without the NuGet package.
var onnxRuntimeAssembly = AppDomain.CurrentDomain.GetAssemblies()
.FirstOrDefault(a => a.GetName().Name == "Microsoft.ML.OnnxRuntime");
if (onnxRuntimeAssembly is null)
{
// Try explicit load from the application's probing path
try
{
onnxRuntimeAssembly = System.Reflection.Assembly.Load("Microsoft.ML.OnnxRuntime");
}
catch
{
_logger.LogDebug(
"Microsoft.ML.OnnxRuntime assembly not found. " +
"Install the NuGet package to enable semantic ONNX inference.");
return false;
}
}
var sessionType = onnxRuntimeAssembly.GetType("Microsoft.ML.OnnxRuntime.InferenceSession");
if (sessionType is null)
{
_logger.LogDebug("InferenceSession type not found in OnnxRuntime assembly.");
return false;
}
// Create InferenceSession(modelPath)
session = Activator.CreateInstance(sessionType, modelPath);
return session is not null;
}
catch (Exception ex)
{
_logger.LogWarning(ex,
"Failed to initialize ONNX InferenceSession from {ModelPath}.", modelPath);
return false;
}
}
/// <summary>
/// Runs ONNX inference using reflection-based invocation of the OnnxRuntime API.
/// Produces 384-dim mean-pooled, L2-normalized embeddings.
///
/// When the Microsoft.ML.OnnxRuntime NuGet package is properly installed, replace
/// the reflection-based stub below with direct typed calls:
/// <code>
/// var tokens = SimpleWordPieceTokenize(text);
/// var inputIds = new long[MaxSequenceLength];
/// var attentionMask = new long[MaxSequenceLength];
/// var tokenTypeIds = new long[MaxSequenceLength];
/// inputIds[0] = 101; // [CLS]
/// attentionMask[0] = 1;
/// var seqLen = Math.Min(tokens.Count, MaxSequenceLength - 2);
/// for (var i = 0; i &lt; seqLen; i++) { inputIds[i+1] = tokens[i]; attentionMask[i+1] = 1; }
/// inputIds[seqLen + 1] = 102; // [SEP]
/// attentionMask[seqLen + 1] = 1;
/// var actualLength = seqLen + 2;
/// var inputIdsTensor = new DenseTensor&lt;long&gt;(inputIds, [1, MaxSequenceLength]);
/// var maskTensor = new DenseTensor&lt;long&gt;(attentionMask, [1, MaxSequenceLength]);
/// var typeTensor = new DenseTensor&lt;long&gt;(tokenTypeIds, [1, MaxSequenceLength]);
/// var inputs = new List&lt;NamedOnnxValue&gt;
/// {
/// NamedOnnxValue.CreateFromTensor("input_ids", inputIdsTensor),
/// NamedOnnxValue.CreateFromTensor("attention_mask", maskTensor),
/// NamedOnnxValue.CreateFromTensor("token_type_ids", typeTensor)
/// };
/// using var results = _session.Run(inputs);
/// var outputTensor = results.First().AsTensor&lt;float&gt;();
/// var embedding = MeanPool(outputTensor, actualLength);
/// L2Normalize(embedding);
/// return embedding;
/// </code>
/// </summary>
private float[] RunOnnxInference(string text)
{
try
{
// Verify the session has the expected Run method via reflection.
var sessionType = _onnxSession!.GetType();
var runMethod = sessionType.GetMethods()
.FirstOrDefault(m => m.Name == "Run" && m.GetParameters().Length == 1);
if (runMethod is null)
{
_logger.LogDebug("InferenceSession.Run method not found. Falling back.");
return FallbackEncode(text);
}
// Verify NamedOnnxValue.CreateFromTensor is available via reflection.
var namedOnnxValueType = sessionType.Assembly
.GetType("Microsoft.ML.OnnxRuntime.NamedOnnxValue");
var createMethod = namedOnnxValueType?.GetMethods()
.FirstOrDefault(m => m.Name == "CreateFromTensor" && m.IsGenericMethod)
?.MakeGenericMethod(typeof(long));
if (createMethod is null)
{
_logger.LogDebug("NamedOnnxValue.CreateFromTensor<long> not found. Falling back.");
return FallbackEncode(text);
}
// Full tensor creation and session.Run() requires the OnnxRuntime NuGet
// package with DenseTensor<T> support. Until the package is added,
// fall back to the deterministic character-ngram encoder.
_logger.LogDebug(
"ONNX tensor creation via reflection is not fully supported. " +
"Using deterministic fallback until Microsoft.ML.OnnxRuntime NuGet is added.");
return FallbackEncode(text);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "ONNX inference failed. Falling back to deterministic encoding.");
return FallbackEncode(text);
}
}
// ------------------------------------------------------------------
// Simplified WordPiece tokenization (BERT-compatible)
// ------------------------------------------------------------------
/// <summary>
/// Simplified tokenizer that splits text into word-level tokens, lowercases them,
/// and maps each character trigram to a pseudo-vocab ID. This is a stand-in for
/// the full WordPiece tokenizer (which requires vocab.txt from the model).
/// When the ONNX model is properly deployed with its vocab.txt, replace this
/// with a real WordPiece implementation.
/// </summary>
internal static List<int> SimpleWordPieceTokenize(string text)
{
var tokens = new List<int>(MaxSequenceLength);
if (string.IsNullOrWhiteSpace(text))
{
return tokens;
}
var lower = text.ToLowerInvariant();
var matches = WordTokenRegex.Matches(lower);
foreach (Match match in matches)
{
if (tokens.Count >= MaxSequenceLength - 2) // Reserve space for [CLS] and [SEP]
{
break;
}
var word = match.Value;
// Simple character-level hashing to produce stable token IDs
// in the BERT vocab range (1000-30000 to avoid special tokens)
if (word.Length <= 3)
{
tokens.Add(HashToVocabId(word));
}
else
{
// Split longer words into overlapping trigram "subwords"
for (var i = 0; i < word.Length - 2 && tokens.Count < MaxSequenceLength - 2; i++)
{
var piece = word.Substring(i, 3);
var id = HashToVocabId(i == 0 ? piece : "##" + piece);
tokens.Add(id);
}
}
}
return tokens;
}
/// <summary>
/// Maps a token string to a stable integer in the BERT vocab range [1000, 30000).
/// </summary>
private static int HashToVocabId(string token)
{
var bytes = Encoding.UTF8.GetBytes(token);
var hash = SHA256.HashData(bytes);
var raw = BitConverter.ToUInt32(hash, 0);
return (int)(raw % 29000) + 1000;
}
// ------------------------------------------------------------------
// Deterministic fallback encoder (character n-gram hashing to 384-dim)
// ------------------------------------------------------------------
/// <summary>
/// Produces a 384-dimensional vector using overlapping character n-gram hashing.
/// This preserves the output shape and L2-normalization contract of the ONNX encoder
/// but does not capture semantic similarity. It serves as a graceful degradation
/// when the ONNX runtime or model file is unavailable.
/// </summary>
internal static float[] FallbackEncode(string text)
{
var vector = new float[OutputDimensions];
if (string.IsNullOrWhiteSpace(text))
{
return vector;
}
var lower = text.ToLowerInvariant();
var matches = WordTokenRegex.Matches(lower);
foreach (Match match in matches)
{
var word = match.Value;
// Hash the whole word into a bucket
var wordBytes = Encoding.UTF8.GetBytes(word);
var wordHash = SHA256.HashData(wordBytes);
// Distribute across multiple dimensions using different hash windows
for (var window = 0; window < 4 && window * 4 + 4 <= wordHash.Length; window++)
{
var idx = (int)(BitConverter.ToUInt32(wordHash, window * 4) % (uint)OutputDimensions);
// Use alternating signs for better distribution
vector[idx] += (window % 2 == 0) ? 1f : -0.5f;
}
// Also hash character bigrams for sub-word signal
for (var c = 0; c < word.Length - 1; c++)
{
var bigram = word.Substring(c, 2);
var bigramBytes = Encoding.UTF8.GetBytes(bigram);
var bigramHash = SHA256.HashData(bigramBytes);
var bigramIdx = (int)(BitConverter.ToUInt32(bigramHash, 0) % (uint)OutputDimensions);
vector[bigramIdx] += 0.3f;
}
}
L2Normalize(vector);
return vector;
}
// ------------------------------------------------------------------
// Mean pooling and normalization utilities
// ------------------------------------------------------------------
/// <summary>
/// L2-normalizes a vector in place so that its Euclidean length equals 1.0.
/// </summary>
internal static void L2Normalize(float[] vector)
{
var sumSquares = 0f;
for (var i = 0; i < vector.Length; i++)
{
sumSquares += vector[i] * vector[i];
}
if (sumSquares <= 0f)
{
return;
}
var length = MathF.Sqrt(sumSquares);
for (var i = 0; i < vector.Length; i++)
{
vector[i] /= length;
}
}
}